From b3f4ac8acc1336c91957835d3423caf809d46940 Mon Sep 17 00:00:00 2001 From: "wenmeng.zwm" Date: Fri, 29 Jul 2022 10:28:50 +0800 Subject: [PATCH] [to #43115042] add trainer usage doc 1. add trainer doc 2. support local configuration file for trainer 3. update nlp trainer test Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/9541239 --- docs/source/quick_start.md | 8 +-- docs/source/tutorials/index.rst | 1 + docs/source/tutorials/trainer.md | 66 ++++++++++++++++++++++ modelscope/trainers/trainer.py | 4 +- tests/pipelines/test_csanmt_translation.py | 3 +- tests/pipelines/test_image_matting.py | 3 +- tests/trainers/test_trainer_with_nlp.py | 32 +++++++++++ 7 files changed, 106 insertions(+), 11 deletions(-) create mode 100644 docs/source/tutorials/trainer.md diff --git a/docs/source/quick_start.md b/docs/source/quick_start.md index 6a188594..dea6f054 100644 --- a/docs/source/quick_start.md +++ b/docs/source/quick_start.md @@ -65,10 +65,6 @@ python -c "from modelscope.pipelines import pipeline;print(pipeline('word-segmen pipeline函数提供了简洁的推理接口,相关介绍和示例请参考[pipeline使用教程](tutorials/pipeline.md) -## 训练 +## 训练 & 评估 -to be done - -## 评估 - -to be done +Trainer类提供了简洁的Finetuning和评估接口,相关介绍和示例请参考[Trainer使用教程](tutorials/trainer.md) diff --git a/docs/source/tutorials/index.rst b/docs/source/tutorials/index.rst index 1de2244b..9d8528c2 100644 --- a/docs/source/tutorials/index.rst +++ b/docs/source/tutorials/index.rst @@ -3,3 +3,4 @@ :caption: Tutorials pipeline.md + trainer.md diff --git a/docs/source/tutorials/trainer.md b/docs/source/tutorials/trainer.md new file mode 100644 index 00000000..f97aa327 --- /dev/null +++ b/docs/source/tutorials/trainer.md @@ -0,0 +1,66 @@ +# Trainer使用教程 +Modelscope提供了众多预训练模型,你可以使用其中任意一个,利用公开数据集或者私有数据集针对特定任务进行模型训练,在本篇文章中将介绍如何使用Modelscope的`Trainer`模块进行Finetuning和评估。 + +## 环境准备 +详细步骤可以参考 [快速开始](../quick_start.md) + +### 准备数据集 + +在开始Finetuning前,需要准备一个数据集用以训练和评估,详细可以参考数据集使用教程。 + +`临时写法`,我们通过数据集接口创建一个虚假的dataset +```python +from datasets import Dataset +dataset_dict = { + 'sentence1': [ + 'This is test sentence1-1', 'This is test sentence2-1', + 'This is test sentence3-1' + ], + 'sentence2': [ + 'This is test sentence1-2', 'This is test sentence2-2', + 'This is test sentence3-2' + ], + 'label': [0, 1, 1] +} +train_dataset = MsDataset.from_hf_dataset(Dataset.from_dict(dataset_dict)) +eval_dataset = MsDataset.from_hf_dataset(Dataset.from_dict(dataset_dict)) +``` +### 训练 +ModelScope把所有训练相关的配置信息全部放到了模型仓库下的`configuration.json`中,因此我们只需要创建Trainer,加载配置文件,传入数据集即可完成训练。 + +首先,通过工厂方法创建Trainer, 需要传入模型仓库路径, 训练数据集对象,评估数据集对象,训练目录 +```python +kwargs = dict( + model='damo/nlp_structbert_sentiment-classification_chinese-base', + train_dataset=train_dataset, + eval_dataset=eval_dataset, + work_dir='work_dir') + +trainer = build_trainer(default_args=kwargs) +``` + +启动训练。 +```python +trainer.train() +``` + +如果需要调整训练参数,可以在模型仓库页面下载`configuration.json`文件到本地,修改参数后,指定配置文件路径,创建trainer +```python +kwargs = dict( + model='damo/nlp_structbert_sentiment-classification_chinese-base', + train_dataset=train_dataset, + eval_dataset=eval_dataset, + cfg_file='你的配置文件路径' + work_dir='work_dir') + +trainer = build_trainer(default_args=kwargs) +trainer.train() +``` + + +### 评估 +训练过程中会定期使用验证集进行评估测试, Trainer模块也支持指定特定轮次保存的checkpoint路径,进行单次评估。 +```python +eval_results = trainer.evaluate('work_dir/epoch_10.pth') +print(eval_results) +``` diff --git a/modelscope/trainers/trainer.py b/modelscope/trainers/trainer.py index 21999845..10a2f189 100644 --- a/modelscope/trainers/trainer.py +++ b/modelscope/trainers/trainer.py @@ -93,7 +93,9 @@ class EpochBasedTrainer(BaseTrainer): else: self.model_dir = snapshot_download( model, revision=model_revision) - cfg_file = os.path.join(self.model_dir, ModelFile.CONFIGURATION) + if cfg_file is None: + cfg_file = os.path.join(self.model_dir, + ModelFile.CONFIGURATION) self.model = self.build_model() else: assert cfg_file is not None, 'Config file should not be None if model is an nn.Module class' diff --git a/tests/pipelines/test_csanmt_translation.py b/tests/pipelines/test_csanmt_translation.py index f1f99b2e..449b0cb7 100644 --- a/tests/pipelines/test_csanmt_translation.py +++ b/tests/pipelines/test_csanmt_translation.py @@ -14,8 +14,7 @@ class TranslationTest(unittest.TestCase): @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') def test_run_with_model_name(self): - pipeline_ins = pipeline( - task=Tasks.translation, model=self.model_id, model_revision='beta') + pipeline_ins = pipeline(task=Tasks.translation, model=self.model_id) print(pipeline_ins(input=self.inputs)) diff --git a/tests/pipelines/test_image_matting.py b/tests/pipelines/test_image_matting.py index 6f13dce7..af8ace50 100644 --- a/tests/pipelines/test_image_matting.py +++ b/tests/pipelines/test_image_matting.py @@ -46,8 +46,7 @@ class ImageMattingTest(unittest.TestCase): @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') def test_run_modelhub(self): - img_matting = pipeline( - Tasks.image_matting, model=self.model_id, model_revision='beta') + img_matting = pipeline(Tasks.image_matting, model=self.model_id) result = img_matting('data/test/images/image_matting.png') cv2.imwrite('result.png', result[OutputKeys.OUTPUT_IMG]) diff --git a/tests/trainers/test_trainer_with_nlp.py b/tests/trainers/test_trainer_with_nlp.py index cf2ef6d2..93d13065 100644 --- a/tests/trainers/test_trainer_with_nlp.py +++ b/tests/trainers/test_trainer_with_nlp.py @@ -5,11 +5,13 @@ import tempfile import unittest from modelscope.hub.snapshot_download import snapshot_download +from modelscope.metainfo import Metrics from modelscope.models.nlp.sbert_for_sequence_classification import \ SbertTextClassfier from modelscope.msdatasets import MsDataset from modelscope.trainers import build_trainer from modelscope.utils.constant import ModelFile +from modelscope.utils.hub import read_config from modelscope.utils.test_utils import test_level @@ -73,6 +75,36 @@ class TestTrainerWithNlp(unittest.TestCase): for i in range(10): self.assertIn(f'epoch_{i+1}.pth', results_files) + eval_results = trainer.evaluate( + checkpoint_path=os.path.join(self.tmp_dir, 'epoch_10.pth')) + self.assertTrue(Metrics.accuracy in eval_results) + + @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') + def test_trainer_with_user_defined_config(self): + model_id = 'damo/nlp_structbert_sentiment-classification_chinese-base' + cfg = read_config(model_id, revision='beta') + cfg.train.max_epochs = 20 + cfg.train.work_dir = self.tmp_dir + cfg_file = os.path.join(self.tmp_dir, 'config.json') + cfg.dump(cfg_file) + kwargs = dict( + model=model_id, + train_dataset=self.dataset, + eval_dataset=self.dataset, + cfg_file=cfg_file, + model_revision='beta') + + trainer = build_trainer(default_args=kwargs) + trainer.train() + results_files = os.listdir(self.tmp_dir) + self.assertIn(f'{trainer.timestamp}.log.json', results_files) + for i in range(20): + self.assertIn(f'epoch_{i+1}.pth', results_files) + + eval_results = trainer.evaluate( + checkpoint_path=os.path.join(self.tmp_dir, 'epoch_10.pth')) + self.assertTrue(Metrics.accuracy in eval_results) + @unittest.skipUnless(test_level() >= 2, 'skip test in current test level') def test_trainer_with_model_and_args(self): tmp_dir = tempfile.TemporaryDirectory().name