|
|
|
@@ -8,78 +8,23 @@ import json |
|
|
|
from modelscope.metainfo import Metrics, Trainers |
|
|
|
from modelscope.msdatasets import MsDataset |
|
|
|
from modelscope.trainers import build_trainer |
|
|
|
from modelscope.utils.constant import ModelFile |
|
|
|
from modelscope.utils.constant import DownloadMode, ModelFile |
|
|
|
from modelscope.utils.test_utils import test_level |
|
|
|
|
|
|
|
|
|
|
|
class TestOfaTrainer(unittest.TestCase): |
|
|
|
|
|
|
|
def setUp(self) -> None: |
|
|
|
# self.finetune_cfg = \ |
|
|
|
# {'framework': 'pytorch', |
|
|
|
# 'task': 'image-captioning', |
|
|
|
# 'model': {'type': 'ofa', |
|
|
|
# 'beam_search': {'beam_size': 5, |
|
|
|
# 'max_len_b': 16, |
|
|
|
# 'min_len': 1, |
|
|
|
# 'no_repeat_ngram_size': 0}, |
|
|
|
# 'seed': 7, |
|
|
|
# 'max_src_length': 256, |
|
|
|
# 'language': 'en', |
|
|
|
# 'gen_type': 'generation', |
|
|
|
# 'patch_image_size': 480, |
|
|
|
# 'max_image_size': 480, |
|
|
|
# 'imagenet_default_mean_and_std': False}, |
|
|
|
# 'pipeline': {'type': 'image-captioning'}, |
|
|
|
# 'dataset': {'column_map': {'text': 'caption'}}, |
|
|
|
# 'train': {'work_dir': 'work/ckpts/caption', |
|
|
|
# # 'launcher': 'pytorch', |
|
|
|
# 'max_epochs': 1, |
|
|
|
# 'use_fp16': True, |
|
|
|
# 'dataloader': {'batch_size_per_gpu': 4, 'workers_per_gpu': 0}, |
|
|
|
# 'lr_scheduler': {'name': 'polynomial_decay', |
|
|
|
# 'warmup_proportion': 0.01, |
|
|
|
# 'lr_end': 1e-07}, |
|
|
|
# 'lr_scheduler_hook': {'type': 'LrSchedulerHook', 'by_epoch': False}, |
|
|
|
# 'optimizer': {'type': 'AdamW', 'lr': 5e-05, 'weight_decay': 0.01}, |
|
|
|
# 'optimizer_hook': {'type': 'TorchAMPOptimizerHook', |
|
|
|
# 'cumulative_iters': 1, |
|
|
|
# 'grad_clip': {'max_norm': 1.0, 'norm_type': 2}, |
|
|
|
# 'loss_keys': 'loss'}, |
|
|
|
# 'criterion': {'name': 'AdjustLabelSmoothedCrossEntropyCriterion', |
|
|
|
# 'constraint_range': None, |
|
|
|
# 'drop_worst_after': 0, |
|
|
|
# 'drop_worst_ratio': 0.0, |
|
|
|
# 'ignore_eos': False, |
|
|
|
# 'ignore_prefix_size': 0, |
|
|
|
# 'label_smoothing': 0.1, |
|
|
|
# 'reg_alpha': 1.0, |
|
|
|
# 'report_accuracy': False, |
|
|
|
# 'sample_patch_num': 196, |
|
|
|
# 'sentence_avg': False, |
|
|
|
# 'use_rdrop': True}, |
|
|
|
# 'hooks': [{'type': 'BestCkptSaverHook', |
|
|
|
# 'metric_key': 'bleu-4', |
|
|
|
# 'interval': 100}, |
|
|
|
# {'type': 'TextLoggerHook', 'interval': 1}, |
|
|
|
# {'type': 'IterTimerHook'}, |
|
|
|
# {'type': 'EvaluationHook', 'by_epoch': True, 'interval': 1}]}, |
|
|
|
# 'evaluation': {'dataloader': {'batch_size_per_gpu': 4, 'workers_per_gpu': 0}, |
|
|
|
# 'metrics': [{'type': 'bleu', |
|
|
|
# 'eval_tokenized_bleu': False, |
|
|
|
# 'ref_name': 'labels', |
|
|
|
# 'hyp_name': 'caption'}]}, |
|
|
|
# 'preprocessor': []} |
|
|
|
self.finetune_cfg = \ |
|
|
|
{'framework': 'pytorch', |
|
|
|
'task': 'ocr-recognition', |
|
|
|
'model': {'type': 'ofa', |
|
|
|
'beam_search': {'beam_size': 5, |
|
|
|
'max_len_b': 16, |
|
|
|
'max_len_b': 64, |
|
|
|
'min_len': 1, |
|
|
|
'no_repeat_ngram_size': 0}, |
|
|
|
'seed': 7, |
|
|
|
'max_src_length': 256, |
|
|
|
'max_src_length': 128, |
|
|
|
'language': 'zh', |
|
|
|
'gen_type': 'generation', |
|
|
|
'patch_image_size': 480, |
|
|
|
@@ -115,13 +60,13 @@ class TestOfaTrainer(unittest.TestCase): |
|
|
|
'sentence_avg': False, |
|
|
|
'use_rdrop': True}, |
|
|
|
'hooks': [{'type': 'BestCkptSaverHook', |
|
|
|
'metric_key': 'ned', |
|
|
|
'metric_key': 'accuracy', |
|
|
|
'interval': 100}, |
|
|
|
{'type': 'TextLoggerHook', 'interval': 1}, |
|
|
|
{'type': 'IterTimerHook'}, |
|
|
|
{'type': 'EvaluationHook', 'by_epoch': True, 'interval': 1}]}, |
|
|
|
'evaluation': {'dataloader': {'batch_size_per_gpu': 4, 'workers_per_gpu': 0}, |
|
|
|
'metrics': [{'type': 'ned'}]}, |
|
|
|
'metrics': [{'type': 'accuracy'}]}, |
|
|
|
'preprocessor': []} |
|
|
|
|
|
|
|
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level') |
|
|
|
@@ -140,12 +85,14 @@ class TestOfaTrainer(unittest.TestCase): |
|
|
|
'ocr_fudanvi_zh', |
|
|
|
subset_name='scene', |
|
|
|
namespace='modelscope', |
|
|
|
split='train[:12]'), |
|
|
|
split='train[:1000]', |
|
|
|
download_mode=DownloadMode.REUSE_DATASET_IF_EXISTS), |
|
|
|
eval_dataset=MsDataset.load( |
|
|
|
'ocr_fudanvi_zh', |
|
|
|
subset_name='scene', |
|
|
|
namespace='modelscope', |
|
|
|
split='validation[:4]'), |
|
|
|
split='test[:100]', |
|
|
|
download_mode=DownloadMode.REUSE_DATASET_IF_EXISTS), |
|
|
|
cfg_file=config_file) |
|
|
|
trainer = build_trainer(name=Trainers.ofa, default_args=args) |
|
|
|
trainer.train() |
|
|
|
|