Browse Source

format finetune code, and ut case

master
行嗔 3 years ago
parent
commit
55fb3b05a9
11 changed files with 215 additions and 94 deletions
  1. +3
    -1
      modelscope/metainfo.py
  2. +1
    -1
      modelscope/metrics/bleu_metric.py
  3. +1
    -0
      modelscope/metrics/builder.py
  4. +2
    -12
      modelscope/preprocessors/multi_modal.py
  5. +16
    -0
      modelscope/preprocessors/ofa/base.py
  6. +5
    -9
      modelscope/preprocessors/ofa/image_captioning.py
  7. +1
    -3
      modelscope/preprocessors/ofa/ocr_recognition.py
  8. +13
    -0
      modelscope/preprocessors/ofa/utils/constant.py
  9. +83
    -54
      modelscope/trainers/multi_modal/ofa/ofa_trainer.py
  10. +1
    -0
      modelscope/utils/constant.py
  11. +89
    -14
      tests/trainers/test_ofa_trainer.py

+ 3
- 1
modelscope/metainfo.py View File

@@ -377,7 +377,7 @@ class Metrics(object):
audio_noise_metric = 'audio-noise-metric' audio_noise_metric = 'audio-noise-metric'


# text gen # text gen
bleu = 'bleu'
BLEU = 'bleu'


# metrics for image denoise task # metrics for image denoise task
image_denoise_metric = 'image-denoise-metric' image_denoise_metric = 'image-denoise-metric'
@@ -399,6 +399,8 @@ class Metrics(object):
movie_scene_segmentation_metric = 'movie-scene-segmentation-metric' movie_scene_segmentation_metric = 'movie-scene-segmentation-metric'
# metric for inpainting task # metric for inpainting task
image_inpainting_metric = 'image-inpainting-metric' image_inpainting_metric = 'image-inpainting-metric'
# metric for ocr
NED = 'ned'




class Optimizers(object): class Optimizers(object):


+ 1
- 1
modelscope/metrics/bleu_metric.py View File

@@ -11,7 +11,7 @@ from .builder import METRICS, MetricKeys
EVAL_BLEU_ORDER = 4 EVAL_BLEU_ORDER = 4




@METRICS.register_module(group_key=default_group, module_name=Metrics.bleu)
@METRICS.register_module(group_key=default_group, module_name=Metrics.BLEU)
class BleuMetric(Metric): class BleuMetric(Metric):
"""The metric computation bleu for text generation classes. """The metric computation bleu for text generation classes.




+ 1
- 0
modelscope/metrics/builder.py View File

@@ -23,6 +23,7 @@ class MetricKeys(object):
BLEU_4 = 'bleu-4' BLEU_4 = 'bleu-4'
ROUGE_1 = 'rouge-1' ROUGE_1 = 'rouge-1'
ROUGE_L = 'rouge-l' ROUGE_L = 'rouge-l'
NED = 'ned' # ocr metric




task_default_metrics = { task_default_metrics = {


+ 2
- 12
modelscope/preprocessors/multi_modal.py View File

@@ -16,6 +16,7 @@ from .base import Preprocessor
from .builder import PREPROCESSORS from .builder import PREPROCESSORS
from .ofa import * # noqa from .ofa import * # noqa
from .ofa.utils.collate import collate_fn from .ofa.utils.collate import collate_fn
from .ofa.utils.constant import OFA_TASK_KEY_MAPPING


__all__ = [ __all__ = [
'OfaPreprocessor', 'OfaPreprocessor',
@@ -51,24 +52,13 @@ class OfaPreprocessor(Preprocessor):
Tasks.text_summarization: OfaSummarizationPreprocessor, Tasks.text_summarization: OfaSummarizationPreprocessor,
Tasks.text_to_image_synthesis: OfaTextToImageSynthesisPreprocessor Tasks.text_to_image_synthesis: OfaTextToImageSynthesisPreprocessor
} }
input_key_mapping = {
Tasks.ocr_recognition: ['image'],
Tasks.image_captioning: ['image'],
Tasks.image_classification: ['image'],
Tasks.text_summarization: ['text'],
Tasks.text_classification: ['text', 'text2'],
Tasks.visual_grounding: ['image', 'text'],
Tasks.visual_question_answering: ['image', 'text'],
Tasks.visual_entailment: ['image', 'text', 'text2'],
Tasks.text_to_image_synthesis: ['text']
}
model_dir = model_dir if osp.exists(model_dir) else snapshot_download( model_dir = model_dir if osp.exists(model_dir) else snapshot_download(
model_dir) model_dir)
self.cfg = Config.from_file( self.cfg = Config.from_file(
osp.join(model_dir, ModelFile.CONFIGURATION)) osp.join(model_dir, ModelFile.CONFIGURATION))
self.preprocess = preprocess_mapping[self.cfg.task]( self.preprocess = preprocess_mapping[self.cfg.task](
cfg=self.cfg, model_dir=model_dir, mode=mode) cfg=self.cfg, model_dir=model_dir, mode=mode)
self.keys = input_key_mapping[self.cfg.task]
self.keys = OFA_TASK_KEY_MAPPING[self.cfg.task]
self.tokenizer = self.preprocess.tokenizer self.tokenizer = self.preprocess.tokenizer
if kwargs.get('no_collate', None): if kwargs.get('no_collate', None):
self.no_collate = True self.no_collate = True


+ 16
- 0
modelscope/preprocessors/ofa/base.py View File

@@ -6,9 +6,12 @@ from os import path as osp
import json import json
import numpy as np import numpy as np
import torch import torch
from PIL import Image


from modelscope.models.multi_modal.ofa import OFATokenizer, OFATokenizerZH from modelscope.models.multi_modal.ofa import OFATokenizer, OFATokenizerZH
from modelscope.preprocessors.image import load_image
from modelscope.utils.trie import Trie from modelscope.utils.trie import Trie
from .utils.constant import OFA_TASK_KEY_MAPPING
from .utils.random_help import set_torch_seed from .utils.random_help import set_torch_seed




@@ -59,6 +62,14 @@ class OfaBasePreprocessor:
self.mean = [0.5, 0.5, 0.5] self.mean = [0.5, 0.5, 0.5]
self.std = [0.5, 0.5, 0.5] self.std = [0.5, 0.5, 0.5]
self.patch_image_size = self.cfg.model.get('patch_image_size', 480) self.patch_image_size = self.cfg.model.get('patch_image_size', 480)
self.column_map = {
key: key
for key in OFA_TASK_KEY_MAPPING[self.cfg.task]
}
if hasattr(self.cfg,
'dataset') and self.cfg.dataset.column_map is not None:
for k, v in self.cfg.dataset.column_map.items():
self.column_map[k] = v
self.transtab = str.maketrans( self.transtab = str.maketrans(
{key: None {key: None
for key in string.punctuation}) for key in string.punctuation})
@@ -147,3 +158,8 @@ class OfaBasePreprocessor:
constraint_prefix_token) constraint_prefix_token)
constraint_mask[i][constraint_nodes] = True constraint_mask[i][constraint_nodes] = True
sample['constraint_mask'] = constraint_mask sample['constraint_mask'] = constraint_mask

def get_img_pil(self, path_or_url_or_pil):
image = path_or_url_or_pil if isinstance(path_or_url_or_pil, Image.Image) \
else load_image(path_or_url_or_pil)
return image

+ 5
- 9
modelscope/preprocessors/ofa/image_captioning.py View File

@@ -1,12 +1,9 @@
# Copyright (c) Alibaba, Inc. and its affiliates. # Copyright (c) Alibaba, Inc. and its affiliates.
import os
from typing import Any, Dict, Union
from typing import Any, Dict


import torch import torch
from PIL import Image
from torchvision import transforms from torchvision import transforms


from modelscope.preprocessors.image import load_image
from modelscope.utils.constant import ModeKeys from modelscope.utils.constant import ModeKeys
from .base import OfaBasePreprocessor from .base import OfaBasePreprocessor


@@ -46,7 +43,7 @@ class OfaImageCaptioningPreprocessor(OfaBasePreprocessor):


def _build_train_sample(self, data: Dict[str, Any]) -> Dict[str, Any]: def _build_train_sample(self, data: Dict[str, Any]) -> Dict[str, Any]:
sample = self._build_infer_sample(data) sample = self._build_infer_sample(data)
target = data['text']
target = data[self.column_map['text']]
target = target.translate(self.transtab).strip() target = target.translate(self.transtab).strip()
target_token_list = target.strip().split() target_token_list = target.strip().split()
target = ' '.join(target_token_list[:self.max_tgt_length]) target = ' '.join(target_token_list[:self.max_tgt_length])
@@ -56,8 +53,7 @@ class OfaImageCaptioningPreprocessor(OfaBasePreprocessor):
return sample return sample


def _build_infer_sample(self, data: Dict[str, Any]) -> Dict[str, Any]: def _build_infer_sample(self, data: Dict[str, Any]) -> Dict[str, Any]:
image = data['image'] if isinstance(
data['image'], Image.Image) else load_image(data['image'])
image = self.get_img_pil(data[self.column_map['image']])
patch_image = self.patch_resize_transform(image) patch_image = self.patch_resize_transform(image)
prompt = self.cfg.model.get('prompt', ' what does the image describe?') prompt = self.cfg.model.get('prompt', ' what does the image describe?')
inputs = self.tokenize_text(prompt) inputs = self.tokenize_text(prompt)
@@ -66,6 +62,6 @@ class OfaImageCaptioningPreprocessor(OfaBasePreprocessor):
'patch_image': patch_image, 'patch_image': patch_image,
'patch_mask': torch.tensor([True]) 'patch_mask': torch.tensor([True])
} }
if 'text' in data:
sample['label'] = data['text']
if self.column_map['text'] in data:
sample['label'] = data[self.column_map['text']]
return sample return sample

+ 1
- 3
modelscope/preprocessors/ofa/ocr_recognition.py View File

@@ -1,7 +1,5 @@
# Copyright (c) Alibaba, Inc. and its affiliates. # Copyright (c) Alibaba, Inc. and its affiliates.
import random
import unicodedata
from typing import Any, Dict, Union
from typing import Any, Dict


import torch import torch
from PIL import Image from PIL import Image


+ 13
- 0
modelscope/preprocessors/ofa/utils/constant.py View File

@@ -0,0 +1,13 @@
from modelscope.utils.constant import Tasks

OFA_TASK_KEY_MAPPING = {
Tasks.ocr_recognition: ['image'],
Tasks.image_captioning: ['image'],
Tasks.image_classification: ['image'],
Tasks.text_summarization: ['text'],
Tasks.text_classification: ['text', 'text2'],
Tasks.visual_grounding: ['image', 'text'],
Tasks.visual_question_answering: ['image', 'text'],
Tasks.visual_entailment: ['image', 'text', 'text2'],
Tasks.text_to_image_synthesis: ['text']
}

+ 83
- 54
modelscope/trainers/multi_modal/ofa/ofa_trainer.py View File

@@ -2,21 +2,27 @@


import math import math
import os import os
import shutil
from functools import partial from functools import partial
from typing import Callable, Dict, Optional, Tuple, Union


from datasets import load_dataset
import torch
from torch import distributed as dist from torch import distributed as dist
from torch import nn
from torch.utils.data import Dataset


from modelscope.metainfo import Trainers from modelscope.metainfo import Trainers
from modelscope.models.base import Model
from modelscope.models.base import Model, TorchModel
from modelscope.msdatasets.ms_dataset import MsDataset from modelscope.msdatasets.ms_dataset import MsDataset
from modelscope.preprocessors.base import Preprocessor
from modelscope.preprocessors.multi_modal import OfaPreprocessor from modelscope.preprocessors.multi_modal import OfaPreprocessor
from modelscope.preprocessors.ofa.utils.collate import collate_fn from modelscope.preprocessors.ofa.utils.collate import collate_fn
from modelscope.trainers import EpochBasedTrainer from modelscope.trainers import EpochBasedTrainer
from modelscope.trainers.builder import TRAINERS from modelscope.trainers.builder import TRAINERS
from modelscope.trainers.optimizer.builder import build_optimizer from modelscope.trainers.optimizer.builder import build_optimizer
from modelscope.utils.config import Config from modelscope.utils.config import Config
from modelscope.utils.constant import ConfigKeys, ModeKeys, ModelFile
from modelscope.utils.constant import (DEFAULT_MODEL_REVISION, ConfigKeys,
ModeKeys)
from .ofa_trainer_utils import (AdjustLabelSmoothedCrossEntropyCriterion, from .ofa_trainer_utils import (AdjustLabelSmoothedCrossEntropyCriterion,
get_schedule) get_schedule)


@@ -24,56 +30,100 @@ from .ofa_trainer_utils import (AdjustLabelSmoothedCrossEntropyCriterion,
@TRAINERS.register_module(module_name=Trainers.ofa_tasks) @TRAINERS.register_module(module_name=Trainers.ofa_tasks)
class OFATrainer(EpochBasedTrainer): class OFATrainer(EpochBasedTrainer):


def __init__(self, model: str, *args, **kwargs):
model = Model.from_pretrained(model)
def __init__(
self,
model: Optional[Union[TorchModel, nn.Module, str]] = None,
cfg_file: Optional[str] = None,
arg_parse_fn: Optional[Callable] = None,
data_collator: Optional[Union[Callable, Dict[str,
Callable]]] = None,
train_dataset: Optional[Union[MsDataset, Dataset]] = None,
eval_dataset: Optional[Union[MsDataset, Dataset]] = None,
preprocessor: Optional[Union[Preprocessor,
Dict[str, Preprocessor]]] = None,
optimizers: Tuple[torch.optim.Optimizer,
torch.optim.lr_scheduler._LRScheduler] = (None,
None),
model_revision: Optional[str] = DEFAULT_MODEL_REVISION,
seed: int = 42,
**kwargs):
model = Model.from_pretrained(model, revision=model_revision)
model_dir = model.model_dir model_dir = model.model_dir
cfg_file = os.path.join(model_dir, ModelFile.CONFIGURATION)
cfg = Config.from_file(cfg_file) cfg = Config.from_file(cfg_file)
dataset = self._build_dataset_with_config(cfg)
preprocessor = {
ConfigKeys.train:
OfaPreprocessor(
model_dir=model_dir, mode=ModeKeys.TRAIN, no_collate=True),
ConfigKeys.val:
OfaPreprocessor(
model_dir=model_dir, mode=ModeKeys.EVAL, no_collate=True),
if 'work_dir' not in kwargs or len(kwargs['work_dir']) == 0:
work_dir = cfg.train.work_dir
else:
work_dir = kwargs['work_dir']
tokenizer_files = {
'zh': [
'tokenizer.json', 'tokenizer_config.json', 'vocab.txt',
'config.json'
],
'en':
['tokenizer.json', 'vocab.json', 'merges.txt', 'config.json'],
} }
for filename in tokenizer_files[cfg.model.get('language', 'en')]:
finetune_file = os.path.join(work_dir, filename)
pretrain_file = os.path.join(model_dir, filename)
if os.path.exists(finetune_file):
continue
if os.path.exists(pretrain_file):
shutil.copy(pretrain_file, finetune_file)

if preprocessor is None:
preprocessor = {
ConfigKeys.train:
OfaPreprocessor(
model_dir=work_dir, mode=ModeKeys.TRAIN, no_collate=True),
ConfigKeys.val:
OfaPreprocessor(
model_dir=work_dir, mode=ModeKeys.EVAL, no_collate=True),
}
# use torchrun launch # use torchrun launch
world_size = int(os.environ.get('WORLD_SIZE', 1)) world_size = int(os.environ.get('WORLD_SIZE', 1))
epoch_steps = math.ceil( epoch_steps = math.ceil(
len(dataset['train']) / # noqa
len(train_dataset) / # noqa
(cfg.train.dataloader.batch_size_per_gpu * world_size)) # noqa (cfg.train.dataloader.batch_size_per_gpu * world_size)) # noqa
cfg.train.lr_scheduler.num_train_steps = epoch_steps * cfg.train.max_epochs cfg.train.lr_scheduler.num_train_steps = epoch_steps * cfg.train.max_epochs
cfg.train.criterion.tokenizer = model.tokenizer cfg.train.criterion.tokenizer = model.tokenizer
self.criterion = AdjustLabelSmoothedCrossEntropyCriterion( self.criterion = AdjustLabelSmoothedCrossEntropyCriterion(
cfg.train.criterion) cfg.train.criterion)
optimizer = build_optimizer(model, cfg=cfg.train.optimizer)
scheduler_class, scheduler_args = get_schedule(cfg.train.lr_scheduler)
if scheduler_class is not None:
lr_scheduler = scheduler_class(**{'optimizer': optimizer},
**scheduler_args)
if optimizers[0] is None:
optimizer = build_optimizer(model, cfg=cfg.train.optimizer)
else: else:
lr_scheduler = None
collator = partial(
collate_fn,
pad_idx=model.tokenizer.pad_token_id,
eos_idx=model.tokenizer.eos_token_id,
)
optimizer = optimizers[0]
if optimizers[1] is None:
scheduler_class, scheduler_args = get_schedule(
cfg.train.lr_scheduler)
if scheduler_class is not None:
lr_scheduler = scheduler_class(**{'optimizer': optimizer},
**scheduler_args)
else:
lr_scheduler = None
else:
lr_scheduler = optimizers[1]
optimizers = (optimizer, lr_scheduler)
if data_collator is None:
data_collator = partial(
collate_fn,
pad_idx=model.tokenizer.pad_token_id,
eos_idx=model.tokenizer.eos_token_id,
)
if 'launcher' not in kwargs and cfg.train.get('launcher', None): if 'launcher' not in kwargs and cfg.train.get('launcher', None):
kwargs['launcher'] = cfg.train.launcher kwargs['launcher'] = cfg.train.launcher
if 'use_fp16' not in kwargs and cfg.train.get('use_fp16', False): if 'use_fp16' not in kwargs and cfg.train.get('use_fp16', False):
kwargs['use_fp16'] = cfg.train.use_fp16 kwargs['use_fp16'] = cfg.train.use_fp16
kwargs['to_tensor'] = False kwargs['to_tensor'] = False
super().__init__( super().__init__(
cfg_file=cfg_file,
model=model, model=model,
data_collator=collator,
train_dataset=dataset['train'],
eval_dataset=dataset['valid'],
cfg_file=cfg_file,
arg_parse_fn=arg_parse_fn,
data_collator=data_collator,
train_dataset=train_dataset,
eval_dataset=eval_dataset,
preprocessor=preprocessor, preprocessor=preprocessor,
optimizers=(optimizer, lr_scheduler),
work_dir=cfg.train.work_dir,
*args,
optimizers=optimizers,
seed=seed,
**kwargs, **kwargs,
) )


@@ -102,24 +152,3 @@ class OFATrainer(EpochBasedTrainer):
else: else:
self.log_buffer.update(train_outputs['log_vars']) self.log_buffer.update(train_outputs['log_vars'])
self.train_outputs = train_outputs self.train_outputs = train_outputs

def _build_dataset_with_config(self, cfg):
if hasattr(cfg.dataset, 'hf_dataset'):
dataset = load_dataset(
cfg.dataset.script,
data_files=cfg.dataset.hf_dataset,
sep=cfg.dataset.sep,
)
dataset = MsDataset.from_hf_dataset(
dataset.rename_columns(cfg.dataset.column_map))
return dataset
elif hasattr(cfg.dataset, 'ms_dataset'):
dataset_d = dict()
for key in cfg.dataset.ms_dataset.keys():
dataset_d[key] = MsDataset.load(**cfg.dataset.ms_dataset[key])
dataset_d[key] = MsDataset.from_hf_dataset(
dataset_d[key]._hf_ds.rename_columns(
cfg.dataset.column_map))
return dataset_d
else:
raise NotImplementedError

+ 1
- 0
modelscope/utils/constant.py View File

@@ -282,6 +282,7 @@ class ConfigKeys(object):
"""Fixed keywords in configuration file""" """Fixed keywords in configuration file"""
train = 'train' train = 'train'
val = 'val' val = 'val'
test = 'test'




class Requirements(object): class Requirements(object):


+ 89
- 14
tests/trainers/test_ofa_trainer.py View File

@@ -5,27 +5,102 @@ import os.path as osp
import shutil import shutil
import unittest import unittest


from modelscope.metainfo import Trainers
import json

from modelscope.metainfo import Metrics, Trainers
from modelscope.msdatasets import MsDataset
from modelscope.trainers import build_trainer from modelscope.trainers import build_trainer
from modelscope.utils.constant import ModelFile
from modelscope.utils.test_utils import test_level from modelscope.utils.test_utils import test_level




class TestOfaTrainer(unittest.TestCase): class TestOfaTrainer(unittest.TestCase):


@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
def test_trainer(self):
os.environ['LOCAL_RANK'] = '0'
model_id = 'damo/ofa_text-classification_mnli_large_en'
default_args = {'model': model_id}
trainer = build_trainer(
name=Trainers.ofa_tasks, default_args=default_args)
os.makedirs(trainer.work_dir, exist_ok=True)
def setUp(self) -> None:
self.finetune_cfg = \
{'framework': 'pytorch',
'task': 'image-captioning',
'model': {'type': 'ofa',
'beam_search': {'beam_size': 5,
'max_len_b': 16,
'min_len': 1,
'no_repeat_ngram_size': 0},
'seed': 7,
'max_src_length': 256,
'language': 'en',
'gen_type': 'generation',
'patch_image_size': 480,
'max_image_size': 480,
'imagenet_default_mean_and_std': False},
'pipeline': {'type': 'image-captioning'},
'dataset': {'column_map': {'text': 'caption'}},
'train': {'work_dir': 'work/ckpts/caption',
# 'launcher': 'pytorch',
'max_epochs': 1,
'use_fp16': True,
'dataloader': {'batch_size_per_gpu': 4, 'workers_per_gpu': 0},
'lr_scheduler': {'name': 'polynomial_decay',
'warmup_proportion': 0.01,
'lr_end': 1e-07},
'lr_scheduler_hook': {'type': 'LrSchedulerHook', 'by_epoch': False},
'optimizer': {'type': 'AdamW', 'lr': 5e-05, 'weight_decay': 0.01},
'optimizer_hook': {'type': 'TorchAMPOptimizerHook',
'cumulative_iters': 1,
'grad_clip': {'max_norm': 1.0, 'norm_type': 2},
'loss_keys': 'loss'},
'criterion': {'name': 'AdjustLabelSmoothedCrossEntropyCriterion',
'constraint_range': None,
'drop_worst_after': 0,
'drop_worst_ratio': 0.0,
'ignore_eos': False,
'ignore_prefix_size': 0,
'label_smoothing': 0.0,
'reg_alpha': 1.0,
'report_accuracy': False,
'sample_patch_num': 196,
'sentence_avg': False,
'use_rdrop': False},
'hooks': [{'type': 'BestCkptSaverHook',
'metric_key': 'bleu-4',
'interval': 100},
{'type': 'TextLoggerHook', 'interval': 1},
{'type': 'IterTimerHook'},
{'type': 'EvaluationHook', 'by_epoch': True, 'interval': 1}]},
'evaluation': {'dataloader': {'batch_size_per_gpu': 4, 'workers_per_gpu': 0},
'metrics': [{'type': 'bleu',
'eval_tokenized_bleu': False,
'ref_name': 'labels',
'hyp_name': 'caption'}]},
'preprocessor': []}

@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
def test_trainer_std(self):
WORKSPACE = './workspace/ckpts/caption'
os.makedirs(WORKSPACE, exist_ok=True)
config_file = os.path.join(WORKSPACE, 'configuration.json')
with open(config_file, 'w') as writer:
json.dump(self.finetune_cfg, writer)

pretrained_model = '/apsarapangu/disk2/yichang.zyc/ckpt/MaaS/ofa_image-caption_coco_large_en'
args = dict(
model=pretrained_model,
work_dir=WORKSPACE,
train_dataset=MsDataset.load(
'coco_2014_caption',
namespace='modelscope',
split='train[:100]'),
eval_dataset=MsDataset.load(
'coco_2014_caption',
namespace='modelscope',
split='validation[:20]'),
metrics=[Metrics.BLEU],
cfg_file=config_file)
trainer = build_trainer(name=Trainers.ofa_tasks, default_args=args)
trainer.train() trainer.train()
assert len(
glob.glob(osp.join(trainer.work_dir,
'best_epoch*_accuracy*.pth'))) == 2
if os.path.exists(self.trainer.work_dir):
shutil.rmtree(self.trainer.work_dir)

self.assertIn(ModelFile.TORCH_MODEL_BIN_FILE,
os.path.join(WORKSPACE, 'output'))
shutil.rmtree(WORKSPACE)




if __name__ == '__main__': if __name__ == '__main__':


Loading…
Cancel
Save