| @@ -377,7 +377,7 @@ class Metrics(object): | |||
| audio_noise_metric = 'audio-noise-metric' | |||
| # text gen | |||
| bleu = 'bleu' | |||
| BLEU = 'bleu' | |||
| # metrics for image denoise task | |||
| image_denoise_metric = 'image-denoise-metric' | |||
| @@ -399,6 +399,8 @@ class Metrics(object): | |||
| movie_scene_segmentation_metric = 'movie-scene-segmentation-metric' | |||
| # metric for inpainting task | |||
| image_inpainting_metric = 'image-inpainting-metric' | |||
| # metric for ocr | |||
| NED = 'ned' | |||
| class Optimizers(object): | |||
| @@ -11,7 +11,7 @@ from .builder import METRICS, MetricKeys | |||
| EVAL_BLEU_ORDER = 4 | |||
| @METRICS.register_module(group_key=default_group, module_name=Metrics.bleu) | |||
| @METRICS.register_module(group_key=default_group, module_name=Metrics.BLEU) | |||
| class BleuMetric(Metric): | |||
| """The metric computation bleu for text generation classes. | |||
| @@ -23,6 +23,7 @@ class MetricKeys(object): | |||
| BLEU_4 = 'bleu-4' | |||
| ROUGE_1 = 'rouge-1' | |||
| ROUGE_L = 'rouge-l' | |||
| NED = 'ned' # ocr metric | |||
| task_default_metrics = { | |||
| @@ -16,6 +16,7 @@ from .base import Preprocessor | |||
| from .builder import PREPROCESSORS | |||
| from .ofa import * # noqa | |||
| from .ofa.utils.collate import collate_fn | |||
| from .ofa.utils.constant import OFA_TASK_KEY_MAPPING | |||
| __all__ = [ | |||
| 'OfaPreprocessor', | |||
| @@ -51,24 +52,13 @@ class OfaPreprocessor(Preprocessor): | |||
| Tasks.text_summarization: OfaSummarizationPreprocessor, | |||
| Tasks.text_to_image_synthesis: OfaTextToImageSynthesisPreprocessor | |||
| } | |||
| input_key_mapping = { | |||
| Tasks.ocr_recognition: ['image'], | |||
| Tasks.image_captioning: ['image'], | |||
| Tasks.image_classification: ['image'], | |||
| Tasks.text_summarization: ['text'], | |||
| Tasks.text_classification: ['text', 'text2'], | |||
| Tasks.visual_grounding: ['image', 'text'], | |||
| Tasks.visual_question_answering: ['image', 'text'], | |||
| Tasks.visual_entailment: ['image', 'text', 'text2'], | |||
| Tasks.text_to_image_synthesis: ['text'] | |||
| } | |||
| model_dir = model_dir if osp.exists(model_dir) else snapshot_download( | |||
| model_dir) | |||
| self.cfg = Config.from_file( | |||
| osp.join(model_dir, ModelFile.CONFIGURATION)) | |||
| self.preprocess = preprocess_mapping[self.cfg.task]( | |||
| cfg=self.cfg, model_dir=model_dir, mode=mode) | |||
| self.keys = input_key_mapping[self.cfg.task] | |||
| self.keys = OFA_TASK_KEY_MAPPING[self.cfg.task] | |||
| self.tokenizer = self.preprocess.tokenizer | |||
| if kwargs.get('no_collate', None): | |||
| self.no_collate = True | |||
| @@ -6,9 +6,12 @@ from os import path as osp | |||
| import json | |||
| import numpy as np | |||
| import torch | |||
| from PIL import Image | |||
| from modelscope.models.multi_modal.ofa import OFATokenizer, OFATokenizerZH | |||
| from modelscope.preprocessors.image import load_image | |||
| from modelscope.utils.trie import Trie | |||
| from .utils.constant import OFA_TASK_KEY_MAPPING | |||
| from .utils.random_help import set_torch_seed | |||
| @@ -59,6 +62,14 @@ class OfaBasePreprocessor: | |||
| self.mean = [0.5, 0.5, 0.5] | |||
| self.std = [0.5, 0.5, 0.5] | |||
| self.patch_image_size = self.cfg.model.get('patch_image_size', 480) | |||
| self.column_map = { | |||
| key: key | |||
| for key in OFA_TASK_KEY_MAPPING[self.cfg.task] | |||
| } | |||
| if hasattr(self.cfg, | |||
| 'dataset') and self.cfg.dataset.column_map is not None: | |||
| for k, v in self.cfg.dataset.column_map.items(): | |||
| self.column_map[k] = v | |||
| self.transtab = str.maketrans( | |||
| {key: None | |||
| for key in string.punctuation}) | |||
| @@ -147,3 +158,8 @@ class OfaBasePreprocessor: | |||
| constraint_prefix_token) | |||
| constraint_mask[i][constraint_nodes] = True | |||
| sample['constraint_mask'] = constraint_mask | |||
| def get_img_pil(self, path_or_url_or_pil): | |||
| image = path_or_url_or_pil if isinstance(path_or_url_or_pil, Image.Image) \ | |||
| else load_image(path_or_url_or_pil) | |||
| return image | |||
| @@ -1,12 +1,9 @@ | |||
| # Copyright (c) Alibaba, Inc. and its affiliates. | |||
| import os | |||
| from typing import Any, Dict, Union | |||
| from typing import Any, Dict | |||
| import torch | |||
| from PIL import Image | |||
| from torchvision import transforms | |||
| from modelscope.preprocessors.image import load_image | |||
| from modelscope.utils.constant import ModeKeys | |||
| from .base import OfaBasePreprocessor | |||
| @@ -46,7 +43,7 @@ class OfaImageCaptioningPreprocessor(OfaBasePreprocessor): | |||
| def _build_train_sample(self, data: Dict[str, Any]) -> Dict[str, Any]: | |||
| sample = self._build_infer_sample(data) | |||
| target = data['text'] | |||
| target = data[self.column_map['text']] | |||
| target = target.translate(self.transtab).strip() | |||
| target_token_list = target.strip().split() | |||
| target = ' '.join(target_token_list[:self.max_tgt_length]) | |||
| @@ -56,8 +53,7 @@ class OfaImageCaptioningPreprocessor(OfaBasePreprocessor): | |||
| return sample | |||
| def _build_infer_sample(self, data: Dict[str, Any]) -> Dict[str, Any]: | |||
| image = data['image'] if isinstance( | |||
| data['image'], Image.Image) else load_image(data['image']) | |||
| image = self.get_img_pil(data[self.column_map['image']]) | |||
| patch_image = self.patch_resize_transform(image) | |||
| prompt = self.cfg.model.get('prompt', ' what does the image describe?') | |||
| inputs = self.tokenize_text(prompt) | |||
| @@ -66,6 +62,6 @@ class OfaImageCaptioningPreprocessor(OfaBasePreprocessor): | |||
| 'patch_image': patch_image, | |||
| 'patch_mask': torch.tensor([True]) | |||
| } | |||
| if 'text' in data: | |||
| sample['label'] = data['text'] | |||
| if self.column_map['text'] in data: | |||
| sample['label'] = data[self.column_map['text']] | |||
| return sample | |||
| @@ -1,7 +1,5 @@ | |||
| # Copyright (c) Alibaba, Inc. and its affiliates. | |||
| import random | |||
| import unicodedata | |||
| from typing import Any, Dict, Union | |||
| from typing import Any, Dict | |||
| import torch | |||
| from PIL import Image | |||
| @@ -0,0 +1,13 @@ | |||
| from modelscope.utils.constant import Tasks | |||
| OFA_TASK_KEY_MAPPING = { | |||
| Tasks.ocr_recognition: ['image'], | |||
| Tasks.image_captioning: ['image'], | |||
| Tasks.image_classification: ['image'], | |||
| Tasks.text_summarization: ['text'], | |||
| Tasks.text_classification: ['text', 'text2'], | |||
| Tasks.visual_grounding: ['image', 'text'], | |||
| Tasks.visual_question_answering: ['image', 'text'], | |||
| Tasks.visual_entailment: ['image', 'text', 'text2'], | |||
| Tasks.text_to_image_synthesis: ['text'] | |||
| } | |||
| @@ -2,21 +2,27 @@ | |||
| import math | |||
| import os | |||
| import shutil | |||
| from functools import partial | |||
| from typing import Callable, Dict, Optional, Tuple, Union | |||
| from datasets import load_dataset | |||
| import torch | |||
| from torch import distributed as dist | |||
| from torch import nn | |||
| from torch.utils.data import Dataset | |||
| from modelscope.metainfo import Trainers | |||
| from modelscope.models.base import Model | |||
| from modelscope.models.base import Model, TorchModel | |||
| from modelscope.msdatasets.ms_dataset import MsDataset | |||
| from modelscope.preprocessors.base import Preprocessor | |||
| from modelscope.preprocessors.multi_modal import OfaPreprocessor | |||
| from modelscope.preprocessors.ofa.utils.collate import collate_fn | |||
| from modelscope.trainers import EpochBasedTrainer | |||
| from modelscope.trainers.builder import TRAINERS | |||
| from modelscope.trainers.optimizer.builder import build_optimizer | |||
| from modelscope.utils.config import Config | |||
| from modelscope.utils.constant import ConfigKeys, ModeKeys, ModelFile | |||
| from modelscope.utils.constant import (DEFAULT_MODEL_REVISION, ConfigKeys, | |||
| ModeKeys) | |||
| from .ofa_trainer_utils import (AdjustLabelSmoothedCrossEntropyCriterion, | |||
| get_schedule) | |||
| @@ -24,56 +30,100 @@ from .ofa_trainer_utils import (AdjustLabelSmoothedCrossEntropyCriterion, | |||
| @TRAINERS.register_module(module_name=Trainers.ofa_tasks) | |||
| class OFATrainer(EpochBasedTrainer): | |||
| def __init__(self, model: str, *args, **kwargs): | |||
| model = Model.from_pretrained(model) | |||
| def __init__( | |||
| self, | |||
| model: Optional[Union[TorchModel, nn.Module, str]] = None, | |||
| cfg_file: Optional[str] = None, | |||
| arg_parse_fn: Optional[Callable] = None, | |||
| data_collator: Optional[Union[Callable, Dict[str, | |||
| Callable]]] = None, | |||
| train_dataset: Optional[Union[MsDataset, Dataset]] = None, | |||
| eval_dataset: Optional[Union[MsDataset, Dataset]] = None, | |||
| preprocessor: Optional[Union[Preprocessor, | |||
| Dict[str, Preprocessor]]] = None, | |||
| optimizers: Tuple[torch.optim.Optimizer, | |||
| torch.optim.lr_scheduler._LRScheduler] = (None, | |||
| None), | |||
| model_revision: Optional[str] = DEFAULT_MODEL_REVISION, | |||
| seed: int = 42, | |||
| **kwargs): | |||
| model = Model.from_pretrained(model, revision=model_revision) | |||
| model_dir = model.model_dir | |||
| cfg_file = os.path.join(model_dir, ModelFile.CONFIGURATION) | |||
| cfg = Config.from_file(cfg_file) | |||
| dataset = self._build_dataset_with_config(cfg) | |||
| preprocessor = { | |||
| ConfigKeys.train: | |||
| OfaPreprocessor( | |||
| model_dir=model_dir, mode=ModeKeys.TRAIN, no_collate=True), | |||
| ConfigKeys.val: | |||
| OfaPreprocessor( | |||
| model_dir=model_dir, mode=ModeKeys.EVAL, no_collate=True), | |||
| if 'work_dir' not in kwargs or len(kwargs['work_dir']) == 0: | |||
| work_dir = cfg.train.work_dir | |||
| else: | |||
| work_dir = kwargs['work_dir'] | |||
| tokenizer_files = { | |||
| 'zh': [ | |||
| 'tokenizer.json', 'tokenizer_config.json', 'vocab.txt', | |||
| 'config.json' | |||
| ], | |||
| 'en': | |||
| ['tokenizer.json', 'vocab.json', 'merges.txt', 'config.json'], | |||
| } | |||
| for filename in tokenizer_files[cfg.model.get('language', 'en')]: | |||
| finetune_file = os.path.join(work_dir, filename) | |||
| pretrain_file = os.path.join(model_dir, filename) | |||
| if os.path.exists(finetune_file): | |||
| continue | |||
| if os.path.exists(pretrain_file): | |||
| shutil.copy(pretrain_file, finetune_file) | |||
| if preprocessor is None: | |||
| preprocessor = { | |||
| ConfigKeys.train: | |||
| OfaPreprocessor( | |||
| model_dir=work_dir, mode=ModeKeys.TRAIN, no_collate=True), | |||
| ConfigKeys.val: | |||
| OfaPreprocessor( | |||
| model_dir=work_dir, mode=ModeKeys.EVAL, no_collate=True), | |||
| } | |||
| # use torchrun launch | |||
| world_size = int(os.environ.get('WORLD_SIZE', 1)) | |||
| epoch_steps = math.ceil( | |||
| len(dataset['train']) / # noqa | |||
| len(train_dataset) / # noqa | |||
| (cfg.train.dataloader.batch_size_per_gpu * world_size)) # noqa | |||
| cfg.train.lr_scheduler.num_train_steps = epoch_steps * cfg.train.max_epochs | |||
| cfg.train.criterion.tokenizer = model.tokenizer | |||
| self.criterion = AdjustLabelSmoothedCrossEntropyCriterion( | |||
| cfg.train.criterion) | |||
| optimizer = build_optimizer(model, cfg=cfg.train.optimizer) | |||
| scheduler_class, scheduler_args = get_schedule(cfg.train.lr_scheduler) | |||
| if scheduler_class is not None: | |||
| lr_scheduler = scheduler_class(**{'optimizer': optimizer}, | |||
| **scheduler_args) | |||
| if optimizers[0] is None: | |||
| optimizer = build_optimizer(model, cfg=cfg.train.optimizer) | |||
| else: | |||
| lr_scheduler = None | |||
| collator = partial( | |||
| collate_fn, | |||
| pad_idx=model.tokenizer.pad_token_id, | |||
| eos_idx=model.tokenizer.eos_token_id, | |||
| ) | |||
| optimizer = optimizers[0] | |||
| if optimizers[1] is None: | |||
| scheduler_class, scheduler_args = get_schedule( | |||
| cfg.train.lr_scheduler) | |||
| if scheduler_class is not None: | |||
| lr_scheduler = scheduler_class(**{'optimizer': optimizer}, | |||
| **scheduler_args) | |||
| else: | |||
| lr_scheduler = None | |||
| else: | |||
| lr_scheduler = optimizers[1] | |||
| optimizers = (optimizer, lr_scheduler) | |||
| if data_collator is None: | |||
| data_collator = partial( | |||
| collate_fn, | |||
| pad_idx=model.tokenizer.pad_token_id, | |||
| eos_idx=model.tokenizer.eos_token_id, | |||
| ) | |||
| if 'launcher' not in kwargs and cfg.train.get('launcher', None): | |||
| kwargs['launcher'] = cfg.train.launcher | |||
| if 'use_fp16' not in kwargs and cfg.train.get('use_fp16', False): | |||
| kwargs['use_fp16'] = cfg.train.use_fp16 | |||
| kwargs['to_tensor'] = False | |||
| super().__init__( | |||
| cfg_file=cfg_file, | |||
| model=model, | |||
| data_collator=collator, | |||
| train_dataset=dataset['train'], | |||
| eval_dataset=dataset['valid'], | |||
| cfg_file=cfg_file, | |||
| arg_parse_fn=arg_parse_fn, | |||
| data_collator=data_collator, | |||
| train_dataset=train_dataset, | |||
| eval_dataset=eval_dataset, | |||
| preprocessor=preprocessor, | |||
| optimizers=(optimizer, lr_scheduler), | |||
| work_dir=cfg.train.work_dir, | |||
| *args, | |||
| optimizers=optimizers, | |||
| seed=seed, | |||
| **kwargs, | |||
| ) | |||
| @@ -102,24 +152,3 @@ class OFATrainer(EpochBasedTrainer): | |||
| else: | |||
| self.log_buffer.update(train_outputs['log_vars']) | |||
| self.train_outputs = train_outputs | |||
| def _build_dataset_with_config(self, cfg): | |||
| if hasattr(cfg.dataset, 'hf_dataset'): | |||
| dataset = load_dataset( | |||
| cfg.dataset.script, | |||
| data_files=cfg.dataset.hf_dataset, | |||
| sep=cfg.dataset.sep, | |||
| ) | |||
| dataset = MsDataset.from_hf_dataset( | |||
| dataset.rename_columns(cfg.dataset.column_map)) | |||
| return dataset | |||
| elif hasattr(cfg.dataset, 'ms_dataset'): | |||
| dataset_d = dict() | |||
| for key in cfg.dataset.ms_dataset.keys(): | |||
| dataset_d[key] = MsDataset.load(**cfg.dataset.ms_dataset[key]) | |||
| dataset_d[key] = MsDataset.from_hf_dataset( | |||
| dataset_d[key]._hf_ds.rename_columns( | |||
| cfg.dataset.column_map)) | |||
| return dataset_d | |||
| else: | |||
| raise NotImplementedError | |||
| @@ -282,6 +282,7 @@ class ConfigKeys(object): | |||
| """Fixed keywords in configuration file""" | |||
| train = 'train' | |||
| val = 'val' | |||
| test = 'test' | |||
| class Requirements(object): | |||
| @@ -5,27 +5,102 @@ import os.path as osp | |||
| import shutil | |||
| import unittest | |||
| from modelscope.metainfo import Trainers | |||
| import json | |||
| from modelscope.metainfo import Metrics, Trainers | |||
| from modelscope.msdatasets import MsDataset | |||
| from modelscope.trainers import build_trainer | |||
| from modelscope.utils.constant import ModelFile | |||
| from modelscope.utils.test_utils import test_level | |||
| class TestOfaTrainer(unittest.TestCase): | |||
| @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') | |||
| def test_trainer(self): | |||
| os.environ['LOCAL_RANK'] = '0' | |||
| model_id = 'damo/ofa_text-classification_mnli_large_en' | |||
| default_args = {'model': model_id} | |||
| trainer = build_trainer( | |||
| name=Trainers.ofa_tasks, default_args=default_args) | |||
| os.makedirs(trainer.work_dir, exist_ok=True) | |||
| def setUp(self) -> None: | |||
| self.finetune_cfg = \ | |||
| {'framework': 'pytorch', | |||
| 'task': 'image-captioning', | |||
| 'model': {'type': 'ofa', | |||
| 'beam_search': {'beam_size': 5, | |||
| 'max_len_b': 16, | |||
| 'min_len': 1, | |||
| 'no_repeat_ngram_size': 0}, | |||
| 'seed': 7, | |||
| 'max_src_length': 256, | |||
| 'language': 'en', | |||
| 'gen_type': 'generation', | |||
| 'patch_image_size': 480, | |||
| 'max_image_size': 480, | |||
| 'imagenet_default_mean_and_std': False}, | |||
| 'pipeline': {'type': 'image-captioning'}, | |||
| 'dataset': {'column_map': {'text': 'caption'}}, | |||
| 'train': {'work_dir': 'work/ckpts/caption', | |||
| # 'launcher': 'pytorch', | |||
| 'max_epochs': 1, | |||
| 'use_fp16': True, | |||
| 'dataloader': {'batch_size_per_gpu': 4, 'workers_per_gpu': 0}, | |||
| 'lr_scheduler': {'name': 'polynomial_decay', | |||
| 'warmup_proportion': 0.01, | |||
| 'lr_end': 1e-07}, | |||
| 'lr_scheduler_hook': {'type': 'LrSchedulerHook', 'by_epoch': False}, | |||
| 'optimizer': {'type': 'AdamW', 'lr': 5e-05, 'weight_decay': 0.01}, | |||
| 'optimizer_hook': {'type': 'TorchAMPOptimizerHook', | |||
| 'cumulative_iters': 1, | |||
| 'grad_clip': {'max_norm': 1.0, 'norm_type': 2}, | |||
| 'loss_keys': 'loss'}, | |||
| 'criterion': {'name': 'AdjustLabelSmoothedCrossEntropyCriterion', | |||
| 'constraint_range': None, | |||
| 'drop_worst_after': 0, | |||
| 'drop_worst_ratio': 0.0, | |||
| 'ignore_eos': False, | |||
| 'ignore_prefix_size': 0, | |||
| 'label_smoothing': 0.0, | |||
| 'reg_alpha': 1.0, | |||
| 'report_accuracy': False, | |||
| 'sample_patch_num': 196, | |||
| 'sentence_avg': False, | |||
| 'use_rdrop': False}, | |||
| 'hooks': [{'type': 'BestCkptSaverHook', | |||
| 'metric_key': 'bleu-4', | |||
| 'interval': 100}, | |||
| {'type': 'TextLoggerHook', 'interval': 1}, | |||
| {'type': 'IterTimerHook'}, | |||
| {'type': 'EvaluationHook', 'by_epoch': True, 'interval': 1}]}, | |||
| 'evaluation': {'dataloader': {'batch_size_per_gpu': 4, 'workers_per_gpu': 0}, | |||
| 'metrics': [{'type': 'bleu', | |||
| 'eval_tokenized_bleu': False, | |||
| 'ref_name': 'labels', | |||
| 'hyp_name': 'caption'}]}, | |||
| 'preprocessor': []} | |||
| @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') | |||
| def test_trainer_std(self): | |||
| WORKSPACE = './workspace/ckpts/caption' | |||
| os.makedirs(WORKSPACE, exist_ok=True) | |||
| config_file = os.path.join(WORKSPACE, 'configuration.json') | |||
| with open(config_file, 'w') as writer: | |||
| json.dump(self.finetune_cfg, writer) | |||
| pretrained_model = '/apsarapangu/disk2/yichang.zyc/ckpt/MaaS/ofa_image-caption_coco_large_en' | |||
| args = dict( | |||
| model=pretrained_model, | |||
| work_dir=WORKSPACE, | |||
| train_dataset=MsDataset.load( | |||
| 'coco_2014_caption', | |||
| namespace='modelscope', | |||
| split='train[:100]'), | |||
| eval_dataset=MsDataset.load( | |||
| 'coco_2014_caption', | |||
| namespace='modelscope', | |||
| split='validation[:20]'), | |||
| metrics=[Metrics.BLEU], | |||
| cfg_file=config_file) | |||
| trainer = build_trainer(name=Trainers.ofa_tasks, default_args=args) | |||
| trainer.train() | |||
| assert len( | |||
| glob.glob(osp.join(trainer.work_dir, | |||
| 'best_epoch*_accuracy*.pth'))) == 2 | |||
| if os.path.exists(self.trainer.work_dir): | |||
| shutil.rmtree(self.trainer.work_dir) | |||
| self.assertIn(ModelFile.TORCH_MODEL_BIN_FILE, | |||
| os.path.join(WORKSPACE, 'output')) | |||
| shutil.rmtree(WORKSPACE) | |||
| if __name__ == '__main__': | |||