diff --git a/modelscope/metainfo.py b/modelscope/metainfo.py index d889f5cf..a3b0bd67 100644 --- a/modelscope/metainfo.py +++ b/modelscope/metainfo.py @@ -145,11 +145,20 @@ class Trainers(object): For a model specific Trainer, you can use ${ModelName}-${Task}-trainer. """ - default = 'Trainer' + default = 'trainer' - # multi-modal tasks + # multi-modal trainers clip_multi_modal_embedding = 'clip-multi-modal-embedding' + # cv trainers + image_instance_segmentation = 'image-instance-segmentation' + image_portrait_enhancement = 'image-portrait-enhancement' + + # nlp trainers + bert_sentiment_analysis = 'bert-sentiment-analysis' + nlp_base_trainer = 'nlp-base-trainer' + nlp_veco_trainer = 'nlp-veco-trainer' + class Preprocessors(object): """ Names for different preprocessor. @@ -219,3 +228,52 @@ class Metrics(object): image_color_enhance_metric = 'image-color-enhance-metric' # metrics for image-portrait-enhancement task image_portrait_enhancement_metric = 'image-portrait-enhancement-metric' + + +class Optimizers(object): + """ Names for different OPTIMIZER. + + Holds the standard optimizer name to use for identifying different optimizer. + This should be used to register optimizer. + """ + + default = 'optimizer' + + SGD = 'SGD' + + +class Hooks(object): + """ Names for different hooks. + + All kinds of hooks are defined here + """ + # lr + LrSchedulerHook = 'LrSchedulerHook' + PlateauLrSchedulerHook = 'PlateauLrSchedulerHook' + NoneLrSchedulerHook = 'NoneLrSchedulerHook' + + # optimizer + OptimizerHook = 'OptimizerHook' + TorchAMPOptimizerHook = 'TorchAMPOptimizerHook' + ApexAMPOptimizerHook = 'ApexAMPOptimizerHook' + NoneOptimizerHook = 'NoneOptimizerHook' + + # checkpoint + CheckpointHook = 'CheckpointHook' + BestCkptSaverHook = 'BestCkptSaverHook' + + # logger + TextLoggerHook = 'TextLoggerHook' + TensorboardHook = 'TensorboardHook' + + IterTimerHook = 'IterTimerHook' + EvaluationHook = 'EvaluationHook' + + +class LR_Schedulers(object): + """learning rate scheduler is defined here + + """ + LinearWarmup = 'LinearWarmup' + ConstantWarmup = 'ConstantWarmup' + ExponentialWarmup = 'ExponentialWarmup' diff --git a/modelscope/trainers/__init__.py b/modelscope/trainers/__init__.py index d802fd8b..17ed7f3c 100644 --- a/modelscope/trainers/__init__.py +++ b/modelscope/trainers/__init__.py @@ -1,8 +1,38 @@ -from .base import DummyTrainer -from .builder import build_trainer -from .cv import (ImageInstanceSegmentationTrainer, - ImagePortraitEnhancementTrainer) -from .multi_modal import CLIPTrainer -from .nlp import SequenceClassificationTrainer -from .nlp_trainer import NlpEpochBasedTrainer, VecoTrainer -from .trainer import EpochBasedTrainer +# Copyright (c) Alibaba, Inc. and its affiliates. +from typing import TYPE_CHECKING + +from modelscope.utils.import_utils import LazyImportModule + +if TYPE_CHECKING: + from .base import DummyTrainer + from .builder import build_trainer + from .cv import (ImageInstanceSegmentationTrainer, + ImagePortraitEnhancementTrainer) + from .multi_modal import CLIPTrainer + from .nlp import SequenceClassificationTrainer + from .nlp_trainer import NlpEpochBasedTrainer, VecoTrainer + from .trainer import EpochBasedTrainer + +else: + _import_structure = { + 'base': ['DummyTrainer'], + 'builder': ['build_trainer'], + 'cv': [ + 'ImageInstanceSegmentationTrainer', + 'ImagePortraitEnhancementTrainer' + ], + 'multi_modal': ['CLIPTrainer'], + 'nlp': ['SequenceClassificationTrainer'], + 'nlp_trainer': ['NlpEpochBasedTrainer', 'VecoTrainer'], + 'trainer': ['EpochBasedTrainer'] + } + + import sys + + sys.modules[__name__] = LazyImportModule( + __name__, + globals()['__file__'], + _import_structure, + module_spec=__spec__, + extra_objects={}, + ) diff --git a/modelscope/trainers/builder.py b/modelscope/trainers/builder.py index 7f787011..87e99b30 100644 --- a/modelscope/trainers/builder.py +++ b/modelscope/trainers/builder.py @@ -1,5 +1,5 @@ # Copyright (c) Alibaba, Inc. and its affiliates. - +from modelscope.metainfo import Trainers from modelscope.utils.config import ConfigDict from modelscope.utils.constant import Tasks from modelscope.utils.registry import Registry, build_from_cfg @@ -8,7 +8,7 @@ TRAINERS = Registry('trainers') HOOKS = Registry('hooks') -def build_trainer(name: str = 'EpochBasedTrainer', default_args: dict = None): +def build_trainer(name: str = Trainers.default, default_args: dict = None): """ build trainer given a trainer name Args: diff --git a/modelscope/trainers/cv/__init__.py b/modelscope/trainers/cv/__init__.py index 36d64af7..99c2aea5 100644 --- a/modelscope/trainers/cv/__init__.py +++ b/modelscope/trainers/cv/__init__.py @@ -1,3 +1,27 @@ -from .image_instance_segmentation_trainer import \ - ImageInstanceSegmentationTrainer -from .image_portrait_enhancement_trainer import ImagePortraitEnhancementTrainer +# Copyright (c) Alibaba, Inc. and its affiliates. +from typing import TYPE_CHECKING + +from modelscope.utils.import_utils import LazyImportModule + +if TYPE_CHECKING: + from .image_instance_segmentation_trainer import \ + ImageInstanceSegmentationTrainer + from .image_portrait_enhancement_trainer import ImagePortraitEnhancementTrainer + +else: + _import_structure = { + 'image_instance_segmentation_trainer': + ['ImageInstanceSegmentationTrainer'], + 'image_portrait_enhancement_trainer': + ['ImagePortraitEnhancementTrainer'], + } + + import sys + + sys.modules[__name__] = LazyImportModule( + __name__, + globals()['__file__'], + _import_structure, + module_spec=__spec__, + extra_objects={}, + ) diff --git a/modelscope/trainers/cv/image_instance_segmentation_trainer.py b/modelscope/trainers/cv/image_instance_segmentation_trainer.py index aa8cc9e3..e7632147 100644 --- a/modelscope/trainers/cv/image_instance_segmentation_trainer.py +++ b/modelscope/trainers/cv/image_instance_segmentation_trainer.py @@ -1,8 +1,9 @@ +from modelscope.metainfo import Trainers from modelscope.trainers.builder import TRAINERS from modelscope.trainers.trainer import EpochBasedTrainer -@TRAINERS.register_module(module_name='image-instance-segmentation') +@TRAINERS.register_module(module_name=Trainers.image_instance_segmentation) class ImageInstanceSegmentationTrainer(EpochBasedTrainer): def __init__(self, *args, **kwargs): diff --git a/modelscope/trainers/cv/image_portrait_enhancement_trainer.py b/modelscope/trainers/cv/image_portrait_enhancement_trainer.py index 67c94213..7ef0de79 100644 --- a/modelscope/trainers/cv/image_portrait_enhancement_trainer.py +++ b/modelscope/trainers/cv/image_portrait_enhancement_trainer.py @@ -4,6 +4,7 @@ from collections.abc import Mapping import torch from torch import distributed as dist +from modelscope.metainfo import Trainers from modelscope.trainers.builder import TRAINERS from modelscope.trainers.optimizer.builder import build_optimizer from modelscope.trainers.trainer import EpochBasedTrainer @@ -11,7 +12,7 @@ from modelscope.utils.constant import ModeKeys from modelscope.utils.logger import get_logger -@TRAINERS.register_module(module_name='gpen') +@TRAINERS.register_module(module_name=Trainers.image_portrait_enhancement) class ImagePortraitEnhancementTrainer(EpochBasedTrainer): def train_step(self, model, inputs): diff --git a/modelscope/trainers/hooks/__init__.py b/modelscope/trainers/hooks/__init__.py index ff55da09..f133041b 100644 --- a/modelscope/trainers/hooks/__init__.py +++ b/modelscope/trainers/hooks/__init__.py @@ -1,18 +1,42 @@ # Copyright (c) Alibaba, Inc. and its affiliates. -from .builder import HOOKS, build_hook -from .checkpoint_hook import BestCkptSaverHook, CheckpointHook -from .evaluation_hook import EvaluationHook -from .hook import Hook -from .iter_timer_hook import IterTimerHook -from .logger.text_logger_hook import TextLoggerHook -from .lr_scheduler_hook import LrSchedulerHook -from .optimizer_hook import (ApexAMPOptimizerHook, OptimizerHook, - TorchAMPOptimizerHook) -from .priority import Priority +from typing import TYPE_CHECKING -__all__ = [ - 'Hook', 'HOOKS', 'CheckpointHook', 'EvaluationHook', 'LrSchedulerHook', - 'OptimizerHook', 'Priority', 'build_hook', 'TextLoggerHook', - 'IterTimerHook', 'TorchAMPOptimizerHook', 'ApexAMPOptimizerHook', - 'BestCkptSaverHook', 'NoneOptimizerHook', 'NoneLrSchedulerHook' -] +from modelscope.utils.import_utils import LazyImportModule + +if TYPE_CHECKING: + from .builder import HOOKS, build_hook + from .checkpoint_hook import BestCkptSaverHook, CheckpointHook + from .evaluation_hook import EvaluationHook + from .hook import Hook + from .iter_timer_hook import IterTimerHook + from .logger import TextLoggerHook, TensorboardHook + from .lr_scheduler_hook import LrSchedulerHook + from .optimizer import (ApexAMPOptimizerHook, NoneOptimizerHook, + OptimizerHook, TorchAMPOptimizerHook) + from .priority import Priority, get_priority + +else: + _import_structure = { + 'builder': ['HOOKS', 'build_hook'], + 'checkpoint_hook': ['BestCkptSaverHook', 'CheckpointHook'], + 'evaluation_hook': ['EvaluationHook'], + 'hook': ['Hook'], + 'iter_timer_hook': ['IterTimerHook'], + 'logger': ['TensorboardHook', 'TextLoggerHook'], + 'lr_scheduler_hook': ['LrSchedulerHook'], + 'optimizer_hook': [ + 'ApexAMPOptimizerHook', 'NoneOptimizerHook', 'OptimizerHook', + 'TorchAMPOptimizerHook' + ], + 'priority': ['Priority', 'get'] + } + + import sys + + sys.modules[__name__] = LazyImportModule( + __name__, + globals()['__file__'], + _import_structure, + module_spec=__spec__, + extra_objects={}, + ) diff --git a/modelscope/trainers/hooks/checkpoint_hook.py b/modelscope/trainers/hooks/checkpoint_hook.py index 4cb08130..fc0281a1 100644 --- a/modelscope/trainers/hooks/checkpoint_hook.py +++ b/modelscope/trainers/hooks/checkpoint_hook.py @@ -2,6 +2,7 @@ import os from modelscope import __version__ +from modelscope.metainfo import Hooks from modelscope.utils.checkpoint import save_checkpoint from modelscope.utils.constant import LogKeys from modelscope.utils.logger import get_logger @@ -11,7 +12,7 @@ from .hook import Hook from .priority import Priority -@HOOKS.register_module() +@HOOKS.register_module(module_name=Hooks.CheckpointHook) class CheckpointHook(Hook): """Save checkpoints periodically. @@ -98,7 +99,7 @@ class CheckpointHook(Hook): return False -@HOOKS.register_module() +@HOOKS.register_module(module_name=Hooks.BestCkptSaverHook) class BestCkptSaverHook(CheckpointHook): """Save best checkpoints hook. Args: diff --git a/modelscope/trainers/hooks/evaluation_hook.py b/modelscope/trainers/hooks/evaluation_hook.py index aea27f2f..4479fa23 100644 --- a/modelscope/trainers/hooks/evaluation_hook.py +++ b/modelscope/trainers/hooks/evaluation_hook.py @@ -1,9 +1,10 @@ # Copyright (c) Alibaba, Inc. and its affiliates. +from modelscope.metainfo import Hooks from .builder import HOOKS from .hook import Hook -@HOOKS.register_module() +@HOOKS.register_module(module_name=Hooks.EvaluationHook) class EvaluationHook(Hook): """Evaluation hook. Args: diff --git a/modelscope/trainers/hooks/iter_timer_hook.py b/modelscope/trainers/hooks/iter_timer_hook.py index 70d8508b..6af78235 100644 --- a/modelscope/trainers/hooks/iter_timer_hook.py +++ b/modelscope/trainers/hooks/iter_timer_hook.py @@ -1,13 +1,14 @@ # Copyright (c) Alibaba, Inc. and its affiliates. import time +from modelscope.metainfo import Hooks from modelscope.utils.constant import LogKeys from .builder import HOOKS from .hook import Hook from .priority import Priority -@HOOKS.register_module() +@HOOKS.register_module(module_name=Hooks.IterTimerHook) class IterTimerHook(Hook): PRIORITY = Priority.LOW diff --git a/modelscope/trainers/hooks/logger/__init__.py b/modelscope/trainers/hooks/logger/__init__.py index f5cd544b..583cd32b 100644 --- a/modelscope/trainers/hooks/logger/__init__.py +++ b/modelscope/trainers/hooks/logger/__init__.py @@ -1,7 +1,27 @@ # Copyright (c) Alibaba, Inc. and its affiliates. +from typing import TYPE_CHECKING + from modelscope.trainers.utils.log_buffer import LogBuffer -from .base import LoggerHook -from .tensorboard_hook import TensorboardHook -from .text_logger_hook import TextLoggerHook +from modelscope.utils.import_utils import LazyImportModule + +if TYPE_CHECKING: + from .base import LoggerHook + from .tensorboard_hook import TensorboardHook + from .text_logger_hook import TextLoggerHook + +else: + _import_structure = { + 'base': ['LoggerHook'], + 'tensorboard_hook': ['TensorboardHook'], + 'text_logger_hook': ['TextLoggerHook'] + } + + import sys -__all__ = ['TextLoggerHook', 'LoggerHook', 'LogBuffer', 'TensorboardHook'] + sys.modules[__name__] = LazyImportModule( + __name__, + globals()['__file__'], + _import_structure, + module_spec=__spec__, + extra_objects={}, + ) diff --git a/modelscope/trainers/hooks/logger/tensorboard_hook.py b/modelscope/trainers/hooks/logger/tensorboard_hook.py index a6a68768..a12f7ae7 100644 --- a/modelscope/trainers/hooks/logger/tensorboard_hook.py +++ b/modelscope/trainers/hooks/logger/tensorboard_hook.py @@ -1,13 +1,14 @@ # Copyright (c) Alibaba, Inc. and its affiliates. import os +from modelscope.metainfo import Hooks from modelscope.trainers.hooks.builder import HOOKS from modelscope.utils.constant import LogKeys from modelscope.utils.torch_utils import master_only from .base import LoggerHook -@HOOKS.register_module() +@HOOKS.register_module(module_name=Hooks.TensorboardHook) class TensorboardHook(LoggerHook): """TensorBoard hook for visualization. Args: diff --git a/modelscope/trainers/hooks/logger/text_logger_hook.py b/modelscope/trainers/hooks/logger/text_logger_hook.py index 99c9749d..168792d9 100644 --- a/modelscope/trainers/hooks/logger/text_logger_hook.py +++ b/modelscope/trainers/hooks/logger/text_logger_hook.py @@ -8,13 +8,14 @@ import json import torch from torch import distributed as dist +from modelscope.metainfo import Hooks from modelscope.trainers.hooks.builder import HOOKS from modelscope.trainers.hooks.logger.base import LoggerHook from modelscope.utils.constant import LogKeys, ModeKeys from modelscope.utils.torch_utils import get_dist_info, is_master -@HOOKS.register_module() +@HOOKS.register_module(module_name=Hooks.TextLoggerHook) class TextLoggerHook(LoggerHook): """Logger hook in text, Output log to both console and local json file. diff --git a/modelscope/trainers/hooks/lr_scheduler_hook.py b/modelscope/trainers/hooks/lr_scheduler_hook.py index 9a5de392..ca0ec01b 100644 --- a/modelscope/trainers/hooks/lr_scheduler_hook.py +++ b/modelscope/trainers/hooks/lr_scheduler_hook.py @@ -1,4 +1,5 @@ # Copyright (c) Alibaba, Inc. and its affiliates. +from modelscope.metainfo import Hooks from modelscope.trainers.lrscheduler.builder import build_lr_scheduler from modelscope.utils.constant import LogKeys from modelscope.utils.logger import get_logger @@ -8,7 +9,7 @@ from .hook import Hook from .priority import Priority -@HOOKS.register_module() +@HOOKS.register_module(module_name=Hooks.LrSchedulerHook) class LrSchedulerHook(Hook): """Lr scheduler. @@ -78,7 +79,7 @@ class LrSchedulerHook(Hook): return lr -@HOOKS.register_module() +@HOOKS.register_module(module_name=Hooks.PlateauLrSchedulerHook) class PlateauLrSchedulerHook(LrSchedulerHook): """Lr scheduler hook for `ReduceLROnPlateau`. @@ -119,7 +120,7 @@ class PlateauLrSchedulerHook(LrSchedulerHook): trainer.lr_scheduler.step(metrics=metrics) -@HOOKS.register_module() +@HOOKS.register_module(module_name=Hooks.NoneLrSchedulerHook) class NoneLrSchedulerHook(LrSchedulerHook): PRIORITY = Priority.LOW # should be after EvaluationHook diff --git a/modelscope/trainers/hooks/optimizer/__init__.py b/modelscope/trainers/hooks/optimizer/__init__.py new file mode 100644 index 00000000..d7c8c862 --- /dev/null +++ b/modelscope/trainers/hooks/optimizer/__init__.py @@ -0,0 +1,26 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. +from typing import TYPE_CHECKING + +from modelscope.utils.import_utils import LazyImportModule + +if TYPE_CHECKING: + from .apex_optimizer_hook import ApexAMPOptimizerHook + from .base import OptimizerHook, NoneOptimizerHook + from .torch_optimizer_hook import TorchAMPOptimizerHook + +else: + _import_structure = { + 'apex_optimizer_hook': ['ApexAMPOptimizerHook'], + 'base': ['OptimizerHook', 'NoneOptimizerHook'], + 'torch_optimizer_hook': ['TorchAMPOptimizerHook'] + } + + import sys + + sys.modules[__name__] = LazyImportModule( + __name__, + globals()['__file__'], + _import_structure, + module_spec=__spec__, + extra_objects={}, + ) diff --git a/modelscope/trainers/hooks/optimizer/apex_optimizer_hook.py b/modelscope/trainers/hooks/optimizer/apex_optimizer_hook.py new file mode 100644 index 00000000..f87ae849 --- /dev/null +++ b/modelscope/trainers/hooks/optimizer/apex_optimizer_hook.py @@ -0,0 +1,75 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. +import logging + +from modelscope.metainfo import Hooks +from modelscope.trainers.hooks.builder import HOOKS +from .base import OptimizerHook + + +@HOOKS.register_module(module_name=Hooks.ApexAMPOptimizerHook) +class ApexAMPOptimizerHook(OptimizerHook): + """Fp16 optimizer, if torch version is less than 1.6.0, + you must install apex (https://www.github.com/nvidia/apex) else use torch.cuda.amp by default + Args: + cumulative_iters (int): interval of gradients accumulation. Default: 1 + grad_clip (dict): Default None. Containing keys: + max_norm (float or int): max norm of the gradients + norm_type (float or int): type of the used p-norm. Can be ``'inf'`` for infinity norm. + More details please refer to `torch.nn.utils.clip_grad.clip_grad_norm_` + loss_keys (str | list): keys list of loss + opt_level (str): "O0" and "O3" are not true mixed precision, + but they are useful for establishing accuracy and speed baselines, respectively. + "O1" and "O2" are different implementations of mixed precision. + Try both, and see what gives the best speedup and accuracy for your model. + """ + + def __init__(self, + cumulative_iters=1, + grad_clip=None, + loss_keys='loss', + opt_level='O1'): + + super(ApexAMPOptimizerHook, self).__init__( + grad_clip=grad_clip, loss_keys=loss_keys) + self.cumulative_iters = cumulative_iters + self.opt_level = opt_level + + try: + from apex import amp + except ImportError: + raise ValueError( + 'apex not installed, please install apex from https://www.github.com/nvidia/apex.' + ) + + def before_run(self, trainer): + from apex import amp + + logging.info('open fp16') + # TODO: fix it should initialze amp with model not wrapper by DDP or DP + if hasattr(trainer.model, 'module'): + trainer.model, trainer.optimizer = amp.initialize( + trainer.model.module, + trainer.optimizer, + opt_level=self.opt_level) + else: + trainer.model, trainer.optimizer = amp.initialize( + trainer.model, trainer.optimizer, opt_level=self.opt_level) + + trainer.optimizer.zero_grad() + + def after_train_iter(self, trainer): + for k in self.loss_keys: + trainer.train_outputs[k] /= self.cumulative_iters + + from apex import amp + for k in self.loss_keys: + with amp.scale_loss(trainer.train_outputs[k], + trainer.optimizer) as scaled_loss: + scaled_loss.backward() + + if self.every_n_iters(trainer, self.cumulative_iters): + if self.grad_clip is not None: + self.clip_grads(trainer.model.parameters(), **self.grad_clip) + + trainer.optimizer.step() + trainer.optimizer.zero_grad() diff --git a/modelscope/trainers/hooks/optimizer/base.py b/modelscope/trainers/hooks/optimizer/base.py new file mode 100644 index 00000000..dffad6ea --- /dev/null +++ b/modelscope/trainers/hooks/optimizer/base.py @@ -0,0 +1,73 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. +import logging + +from torch.nn.utils import clip_grad + +from modelscope.metainfo import Hooks +from modelscope.trainers.hooks.builder import HOOKS +from modelscope.trainers.hooks.hook import Hook +from modelscope.trainers.hooks.priority import Priority + + +@HOOKS.register_module(module_name=Hooks.OptimizerHook) +class OptimizerHook(Hook): + """Optimizer hook + + Args: + cumulative_iters (int): interval of gradients accumulation. Default: 1 + grad_clip (dict): Default None. Containing keys: + max_norm (float or int): max norm of the gradients + norm_type (float or int): type of the used p-norm. Can be ``'inf'`` for infinity norm. + More details please refer to `torch.nn.utils.clip_grad.clip_grad_norm_` + loss_keys (str | list): keys list of loss + """ + + PRIORITY = Priority.ABOVE_NORMAL + + def __init__(self, + cumulative_iters=1, + grad_clip=None, + loss_keys='loss') -> None: + if isinstance(loss_keys, str): + loss_keys = [loss_keys] + assert isinstance(loss_keys, (tuple, list)) + self.loss_keys = loss_keys + self.cumulative_iters = cumulative_iters + self.grad_clip = grad_clip + + def clip_grads(self, params, **clip_args): + params = list( + filter(lambda p: p.requires_grad and p.grad is not None, params)) + if len(params) > 0: + return clip_grad.clip_grad_norm_(params, **clip_args) + + def before_run(self, trainer): + trainer.optimizer.zero_grad() + + def after_train_iter(self, trainer): + for k in self.loss_keys: + trainer.train_outputs[k] /= self.cumulative_iters + trainer.train_outputs[k].backward() + + if self.every_n_iters(trainer, self.cumulative_iters): + if self.grad_clip is not None: + self.clip_grads(trainer.model.parameters(), **self.grad_clip) + + trainer.optimizer.step() + trainer.optimizer.zero_grad() + + +@HOOKS.register_module(module_name=Hooks.NoneOptimizerHook) +class NoneOptimizerHook(OptimizerHook): + + def __init__(self, cumulative_iters=1, grad_clip=None, loss_keys='loss'): + + super(NoneOptimizerHook, self).__init__( + grad_clip=grad_clip, loss_keys=loss_keys) + self.cumulative_iters = cumulative_iters + + def before_run(self, trainer): + return + + def after_train_iter(self, trainer): + return diff --git a/modelscope/trainers/hooks/optimizer/torch_optimizer_hook.py b/modelscope/trainers/hooks/optimizer/torch_optimizer_hook.py new file mode 100644 index 00000000..30ea88a2 --- /dev/null +++ b/modelscope/trainers/hooks/optimizer/torch_optimizer_hook.py @@ -0,0 +1,83 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. +import logging + +from modelscope.metainfo import Hooks +from modelscope.trainers.hooks.builder import HOOKS +from .base import OptimizerHook + + +@HOOKS.register_module(module_name=Hooks.TorchAMPOptimizerHook) +class TorchAMPOptimizerHook(OptimizerHook): + """Fp16 optimizer, if torch version is less than 1.6.0, + you must install apex (https://www.github.com/nvidia/apex) else use torch.cuda.amp by default + Args: + cumulative_iters (int): interval of gradients accumulation. Default: 1 + grad_clip (dict): Default None. Containing keys: + max_norm (float or int): max norm of the gradients + norm_type (float or int): type of the used p-norm. Can be ``'inf'`` for infinity norm. + More details please refer to `torch.nn.utils.clip_grad.clip_grad_norm_` + loss_keys (str | list): keys list of loss + loss_scale (float | dict): grade scale config. If loss_scale is a float, + static loss scaling will be used with the specified scale. + It can also be a dict containing arguments of GradScalar. For Pytorch >= 1.6, + we use official torch.cuda.amp.GradScaler. + please refer to: https://pytorch.org/docs/stable/amp.html#torch.cuda.amp.GradScaler for the parameters. + """ + + def __init__(self, + cumulative_iters=1, + grad_clip=None, + loss_keys='loss', + loss_scale={}): + + super(TorchAMPOptimizerHook, self).__init__( + grad_clip=grad_clip, loss_keys=loss_keys) + self.cumulative_iters = cumulative_iters + self._scale_update_param = None + + from torch.cuda import amp + + if isinstance(loss_scale, float): + self._scale_update_param = loss_scale + self.scaler = amp.GradScaler(init_scale=loss_scale) + elif isinstance(loss_scale, dict): + self.scaler = amp.GradScaler(**loss_scale) + else: + raise ValueError( + '`loss_scale` type must be in [float, dict], but got {loss_scale}' + ) + + def before_run(self, trainer): + logging.info('open fp16') + trainer.optimizer.zero_grad() + + if hasattr(trainer.model, 'module'): + self._ori_model_forward = trainer.model.module.forward + self._model = trainer.model.module + else: + self._ori_model_forward = trainer.model.forward + self._model = trainer.model + + self.ori_model_forward = trainer.model.forward + + def before_train_iter(self, trainer): + from torch.cuda import amp + setattr(self._model, 'forward', amp.autocast()(self._model.forward)) + + def after_train_iter(self, trainer): + for k in self.loss_keys: + trainer.train_outputs[k] /= self.cumulative_iters + + for k in self.loss_keys: + self.scaler.scale(trainer.train_outputs[k]).backward() + + if self.every_n_iters(trainer, self.cumulative_iters): + self.scaler.unscale_(trainer.optimizer) + if self.grad_clip is not None: + self.clip_grads(trainer.model.parameters(), **self.grad_clip) + + self.scaler.step(trainer.optimizer) + self.scaler.update(self._scale_update_param) + trainer.optimizer.zero_grad() + + setattr(self._model, 'forward', self._ori_model_forward) diff --git a/modelscope/trainers/hooks/optimizer_hook.py b/modelscope/trainers/hooks/optimizer_hook.py deleted file mode 100644 index 294a06a6..00000000 --- a/modelscope/trainers/hooks/optimizer_hook.py +++ /dev/null @@ -1,218 +0,0 @@ -# Copyright (c) Alibaba, Inc. and its affiliates. -import logging - -from torch.nn.utils import clip_grad - -from .builder import HOOKS -from .hook import Hook -from .priority import Priority - - -@HOOKS.register_module() -class OptimizerHook(Hook): - """Optimizer hook - - Args: - cumulative_iters (int): interval of gradients accumulation. Default: 1 - grad_clip (dict): Default None. Containing keys: - max_norm (float or int): max norm of the gradients - norm_type (float or int): type of the used p-norm. Can be ``'inf'`` for infinity norm. - More details please refer to `torch.nn.utils.clip_grad.clip_grad_norm_` - loss_keys (str | list): keys list of loss - """ - - PRIORITY = Priority.ABOVE_NORMAL - - def __init__(self, - cumulative_iters=1, - grad_clip=None, - loss_keys='loss') -> None: - if isinstance(loss_keys, str): - loss_keys = [loss_keys] - assert isinstance(loss_keys, (tuple, list)) - self.loss_keys = loss_keys - self.cumulative_iters = cumulative_iters - self.grad_clip = grad_clip - - def clip_grads(self, params, **clip_args): - params = list( - filter(lambda p: p.requires_grad and p.grad is not None, params)) - if len(params) > 0: - return clip_grad.clip_grad_norm_(params, **clip_args) - - def before_run(self, trainer): - trainer.optimizer.zero_grad() - - def after_train_iter(self, trainer): - for k in self.loss_keys: - trainer.train_outputs[k] /= self.cumulative_iters - trainer.train_outputs[k].backward() - - if self.every_n_iters(trainer, self.cumulative_iters): - if self.grad_clip is not None: - self.clip_grads(trainer.model.parameters(), **self.grad_clip) - - trainer.optimizer.step() - trainer.optimizer.zero_grad() - - -@HOOKS.register_module() -class TorchAMPOptimizerHook(OptimizerHook): - """Fp16 optimizer, if torch version is less than 1.6.0, - you must install apex (https://www.github.com/nvidia/apex) else use torch.cuda.amp by default - Args: - cumulative_iters (int): interval of gradients accumulation. Default: 1 - grad_clip (dict): Default None. Containing keys: - max_norm (float or int): max norm of the gradients - norm_type (float or int): type of the used p-norm. Can be ``'inf'`` for infinity norm. - More details please refer to `torch.nn.utils.clip_grad.clip_grad_norm_` - loss_keys (str | list): keys list of loss - loss_scale (float | dict): grade scale config. If loss_scale is a float, - static loss scaling will be used with the specified scale. - It can also be a dict containing arguments of GradScalar. For Pytorch >= 1.6, - we use official torch.cuda.amp.GradScaler. - please refer to: https://pytorch.org/docs/stable/amp.html#torch.cuda.amp.GradScaler for the parameters. - """ - - def __init__(self, - cumulative_iters=1, - grad_clip=None, - loss_keys='loss', - loss_scale={}): - - super(TorchAMPOptimizerHook, self).__init__( - grad_clip=grad_clip, loss_keys=loss_keys) - self.cumulative_iters = cumulative_iters - self._scale_update_param = None - - from torch.cuda import amp - - if isinstance(loss_scale, float): - self._scale_update_param = loss_scale - self.scaler = amp.GradScaler(init_scale=loss_scale) - elif isinstance(loss_scale, dict): - self.scaler = amp.GradScaler(**loss_scale) - else: - raise ValueError( - '`loss_scale` type must be in [float, dict], but got {loss_scale}' - ) - - def before_run(self, trainer): - logging.info('open fp16') - trainer.optimizer.zero_grad() - - if hasattr(trainer.model, 'module'): - self._ori_model_forward = trainer.model.module.forward - self._model = trainer.model.module - else: - self._ori_model_forward = trainer.model.forward - self._model = trainer.model - - self.ori_model_forward = trainer.model.forward - - def before_train_iter(self, trainer): - from torch.cuda import amp - setattr(self._model, 'forward', amp.autocast()(self._model.forward)) - - def after_train_iter(self, trainer): - for k in self.loss_keys: - trainer.train_outputs[k] /= self.cumulative_iters - - for k in self.loss_keys: - self.scaler.scale(trainer.train_outputs[k]).backward() - - if self.every_n_iters(trainer, self.cumulative_iters): - self.scaler.unscale_(trainer.optimizer) - if self.grad_clip is not None: - self.clip_grads(trainer.model.parameters(), **self.grad_clip) - - self.scaler.step(trainer.optimizer) - self.scaler.update(self._scale_update_param) - trainer.optimizer.zero_grad() - - setattr(self._model, 'forward', self._ori_model_forward) - - -@HOOKS.register_module() -class ApexAMPOptimizerHook(OptimizerHook): - """Fp16 optimizer, if torch version is less than 1.6.0, - you must install apex (https://www.github.com/nvidia/apex) else use torch.cuda.amp by default - Args: - cumulative_iters (int): interval of gradients accumulation. Default: 1 - grad_clip (dict): Default None. Containing keys: - max_norm (float or int): max norm of the gradients - norm_type (float or int): type of the used p-norm. Can be ``'inf'`` for infinity norm. - More details please refer to `torch.nn.utils.clip_grad.clip_grad_norm_` - loss_keys (str | list): keys list of loss - opt_level (str): "O0" and "O3" are not true mixed precision, - but they are useful for establishing accuracy and speed baselines, respectively. - "O1" and "O2" are different implementations of mixed precision. - Try both, and see what gives the best speedup and accuracy for your model. - """ - - def __init__(self, - cumulative_iters=1, - grad_clip=None, - loss_keys='loss', - opt_level='O1'): - - super(ApexAMPOptimizerHook, self).__init__( - grad_clip=grad_clip, loss_keys=loss_keys) - self.cumulative_iters = cumulative_iters - self.opt_level = opt_level - - try: - from apex import amp - except ImportError: - raise ValueError( - 'apex not installed, please install apex from https://www.github.com/nvidia/apex.' - ) - - def before_run(self, trainer): - from apex import amp - - logging.info('open fp16') - # TODO: fix it should initialze amp with model not wrapper by DDP or DP - if hasattr(trainer.model, 'module'): - trainer.model, trainer.optimizer = amp.initialize( - trainer.model.module, - trainer.optimizer, - opt_level=self.opt_level) - else: - trainer.model, trainer.optimizer = amp.initialize( - trainer.model, trainer.optimizer, opt_level=self.opt_level) - - trainer.optimizer.zero_grad() - - def after_train_iter(self, trainer): - for k in self.loss_keys: - trainer.train_outputs[k] /= self.cumulative_iters - - from apex import amp - for k in self.loss_keys: - with amp.scale_loss(trainer.train_outputs[k], - trainer.optimizer) as scaled_loss: - scaled_loss.backward() - - if self.every_n_iters(trainer, self.cumulative_iters): - if self.grad_clip is not None: - self.clip_grads(trainer.model.parameters(), **self.grad_clip) - - trainer.optimizer.step() - trainer.optimizer.zero_grad() - - -@HOOKS.register_module() -class NoneOptimizerHook(OptimizerHook): - - def __init__(self, cumulative_iters=1, grad_clip=None, loss_keys='loss'): - - super(NoneOptimizerHook, self).__init__( - grad_clip=grad_clip, loss_keys=loss_keys) - self.cumulative_iters = cumulative_iters - - def before_run(self, trainer): - return - - def after_train_iter(self, trainer): - return diff --git a/modelscope/trainers/lrscheduler/__init__.py b/modelscope/trainers/lrscheduler/__init__.py index 336a45f0..54576353 100644 --- a/modelscope/trainers/lrscheduler/__init__.py +++ b/modelscope/trainers/lrscheduler/__init__.py @@ -1,8 +1,25 @@ # Copyright (c) Alibaba, Inc. and its affiliates. -from .builder import LR_SCHEDULER, build_lr_scheduler -from .warmup import BaseWarmup, ConstantWarmup, ExponentialWarmup, LinearWarmup +from typing import TYPE_CHECKING -__all__ = [ - 'LR_SCHEDULER', 'build_lr_scheduler', 'BaseWarmup', 'ConstantWarmup', - 'LinearWarmup', 'ExponentialWarmup' -] +from modelscope.utils.import_utils import LazyImportModule + +if TYPE_CHECKING: + from .builder import LR_SCHEDULER, build_lr_scheduler + from .warmup import BaseWarmup, ConstantWarmup, ExponentialWarmup, LinearWarmup + +else: + _import_structure = { + 'builder': ['LR_SCHEDULER', 'build_lr_scheduler'], + 'warmup': + ['BaseWarmup', 'ConstantWarmup', 'ExponentialWarmup', 'LinearWarmup'] + } + + import sys + + sys.modules[__name__] = LazyImportModule( + __name__, + globals()['__file__'], + _import_structure, + module_spec=__spec__, + extra_objects={}, + ) diff --git a/modelscope/trainers/lrscheduler/builder.py b/modelscope/trainers/lrscheduler/builder.py index dc458d56..3a892001 100644 --- a/modelscope/trainers/lrscheduler/builder.py +++ b/modelscope/trainers/lrscheduler/builder.py @@ -4,7 +4,7 @@ import inspect from modelscope.utils.config import ConfigDict from modelscope.utils.registry import Registry, build_from_cfg, default_group -LR_SCHEDULER = Registry('lr scheduler') +LR_SCHEDULER = Registry('lr_scheduler') def build_lr_scheduler(cfg: ConfigDict, default_args: dict = None): diff --git a/modelscope/trainers/lrscheduler/warmup/__init__.py b/modelscope/trainers/lrscheduler/warmup/__init__.py index ad8e11c0..5263f2ff 100644 --- a/modelscope/trainers/lrscheduler/warmup/__init__.py +++ b/modelscope/trainers/lrscheduler/warmup/__init__.py @@ -1,5 +1,25 @@ # Copyright (c) Alibaba, Inc. and its affiliates. -from .base import BaseWarmup -from .warmup import ConstantWarmup, ExponentialWarmup, LinearWarmup -__all__ = ['BaseWarmup', 'ConstantWarmup', 'LinearWarmup', 'ExponentialWarmup'] +from typing import TYPE_CHECKING + +from modelscope.utils.import_utils import LazyImportModule + +if TYPE_CHECKING: + from .base import BaseWarmup + from .warmup import ConstantWarmup, ExponentialWarmup, LinearWarmup + +else: + _import_structure = { + 'base': ['BaseWarmup'], + 'warmup': ['ConstantWarmup', 'ExponentialWarmup', 'LinearWarmup'] + } + + import sys + + sys.modules[__name__] = LazyImportModule( + __name__, + globals()['__file__'], + _import_structure, + module_spec=__spec__, + extra_objects={}, + ) diff --git a/modelscope/trainers/lrscheduler/warmup/warmup.py b/modelscope/trainers/lrscheduler/warmup/warmup.py index d00c83b0..777796ef 100644 --- a/modelscope/trainers/lrscheduler/warmup/warmup.py +++ b/modelscope/trainers/lrscheduler/warmup/warmup.py @@ -1,9 +1,10 @@ # Copyright (c) Alibaba, Inc. and its affiliates. +from modelscope.metainfo import LR_Schedulers from modelscope.trainers.lrscheduler.builder import LR_SCHEDULER from .base import BaseWarmup -@LR_SCHEDULER.register_module() +@LR_SCHEDULER.register_module(module_name=LR_Schedulers.ConstantWarmup) class ConstantWarmup(BaseWarmup): """Linear warmup scheduler. @@ -29,7 +30,7 @@ class ConstantWarmup(BaseWarmup): return self.warmup_ratio -@LR_SCHEDULER.register_module() +@LR_SCHEDULER.register_module(module_name=LR_Schedulers.LinearWarmup) class LinearWarmup(BaseWarmup): """Linear warmup scheduler. @@ -54,7 +55,7 @@ class LinearWarmup(BaseWarmup): return 1 - k -@LR_SCHEDULER.register_module() +@LR_SCHEDULER.register_module(module_name=LR_Schedulers.ExponentialWarmup) class ExponentialWarmup(BaseWarmup): """Exponential warmup scheduler. diff --git a/modelscope/trainers/multi_modal/__init__.py b/modelscope/trainers/multi_modal/__init__.py index 7d386349..89b7e1bc 100644 --- a/modelscope/trainers/multi_modal/__init__.py +++ b/modelscope/trainers/multi_modal/__init__.py @@ -1 +1,20 @@ -from .clip import CLIPTrainer +# Copyright (c) Alibaba, Inc. and its affiliates. +from typing import TYPE_CHECKING + +from modelscope.utils.import_utils import LazyImportModule + +if TYPE_CHECKING: + from .clip import CLIPTrainer + +else: + _import_structure = {'clip': ['CLIPTrainer']} + + import sys + + sys.modules[__name__] = LazyImportModule( + __name__, + globals()['__file__'], + _import_structure, + module_spec=__spec__, + extra_objects={}, + ) diff --git a/modelscope/trainers/nlp/__init__.py b/modelscope/trainers/nlp/__init__.py index 6d61da43..888f9941 100644 --- a/modelscope/trainers/nlp/__init__.py +++ b/modelscope/trainers/nlp/__init__.py @@ -1 +1,22 @@ -from .sequence_classification_trainer import SequenceClassificationTrainer +# Copyright (c) Alibaba, Inc. and its affiliates. +from typing import TYPE_CHECKING + +from modelscope.utils.import_utils import LazyImportModule + +if TYPE_CHECKING: + from .sequence_classification_trainer import SequenceClassificationTrainer + +else: + _import_structure = { + 'sequence_classification_trainer': ['SequenceClassificationTrainer'] + } + + import sys + + sys.modules[__name__] = LazyImportModule( + __name__, + globals()['__file__'], + _import_structure, + module_spec=__spec__, + extra_objects={}, + ) diff --git a/modelscope/trainers/nlp/sequence_classification_trainer.py b/modelscope/trainers/nlp/sequence_classification_trainer.py index 86c8df58..64fd59b4 100644 --- a/modelscope/trainers/nlp/sequence_classification_trainer.py +++ b/modelscope/trainers/nlp/sequence_classification_trainer.py @@ -3,6 +3,7 @@ from typing import Dict, Optional, Tuple, Union import numpy as np +from modelscope.metainfo import Trainers from modelscope.trainers.base import BaseTrainer from modelscope.trainers.builder import TRAINERS from modelscope.utils.logger import get_logger @@ -11,7 +12,7 @@ PATH = None logger = get_logger(PATH) -@TRAINERS.register_module(module_name=r'bert-sentiment-analysis') +@TRAINERS.register_module(module_name=Trainers.bert_sentiment_analysis) class SequenceClassificationTrainer(BaseTrainer): def __init__(self, cfg_file: str, *args, **kwargs): diff --git a/modelscope/trainers/nlp_trainer.py b/modelscope/trainers/nlp_trainer.py index c8121db6..527f4087 100644 --- a/modelscope/trainers/nlp_trainer.py +++ b/modelscope/trainers/nlp_trainer.py @@ -6,6 +6,7 @@ from torch import nn from torch.utils.data import Dataset from modelscope.hub.snapshot_download import snapshot_download +from modelscope.metainfo import Trainers from modelscope.metrics.builder import build_metric from modelscope.models.base import Model, TorchModel from modelscope.msdatasets import MsDataset @@ -17,7 +18,7 @@ from .base import TRAINERS from .trainer import EpochBasedTrainer -@TRAINERS.register_module(module_name='NlpEpochBasedTrainer') +@TRAINERS.register_module(module_name=Trainers.nlp_base_trainer) class NlpEpochBasedTrainer(EpochBasedTrainer): def __init__( @@ -142,7 +143,7 @@ class NlpEpochBasedTrainer(EpochBasedTrainer): return build_preprocessor(cfg, Tasks.find_field_by_task(self.cfg.task)) -@TRAINERS.register_module(module_name='VecoTrainer') +@TRAINERS.register_module(module_name=Trainers.nlp_veco_trainer) class VecoTrainer(NlpEpochBasedTrainer): def evaluate(self, checkpoint_path=None): diff --git a/modelscope/trainers/trainer.py b/modelscope/trainers/trainer.py index c5574f32..a96c186c 100644 --- a/modelscope/trainers/trainer.py +++ b/modelscope/trainers/trainer.py @@ -17,6 +17,7 @@ from torch.utils.data import DataLoader, Dataset from torch.utils.data.distributed import DistributedSampler from modelscope.hub.snapshot_download import snapshot_download +from modelscope.metainfo import Trainers from modelscope.metrics import build_metric, task_default_metrics from modelscope.models.base import Model, TorchModel from modelscope.msdatasets.ms_dataset import MsDataset @@ -45,7 +46,7 @@ from .parallel.builder import build_parallel from .parallel.utils import is_parallel -@TRAINERS.register_module() +@TRAINERS.register_module(module_name=Trainers.default) class EpochBasedTrainer(BaseTrainer): """Epoch based Trainer, a training helper for PyTorch. diff --git a/modelscope/utils/ast_utils.py b/modelscope/utils/ast_utils.py index b8ee1258..fe382c54 100644 --- a/modelscope/utils/ast_utils.py +++ b/modelscope/utils/ast_utils.py @@ -5,6 +5,7 @@ import importlib import os import os.path as osp import time +import traceback from functools import reduce from typing import Generator, Union @@ -13,8 +14,9 @@ import json from modelscope import __version__ from modelscope.fileio.file import LocalStorage -from modelscope.metainfo import (Heads, Metrics, Models, Pipelines, - Preprocessors, TaskModels, Trainers) +from modelscope.metainfo import (Heads, Hooks, LR_Schedulers, Metrics, Models, + Optimizers, Pipelines, Preprocessors, + TaskModels, Trainers) from modelscope.utils.constant import Fields, Tasks from modelscope.utils.file_utils import get_default_cache_dir from modelscope.utils.logger import get_logger @@ -28,7 +30,8 @@ MODELSCOPE_PATH = '/'.join(os.path.dirname(__file__).split('/')[:-1]) REGISTER_MODULE = 'register_module' IGNORED_PACKAGES = ['modelscope', '.'] SCAN_SUB_FOLDERS = [ - 'models', 'metrics', 'pipelines', 'preprocessors', 'task_datasets' + 'models', 'metrics', 'pipelines', 'preprocessors', 'task_datasets', + 'trainers' ] INDEXER_FILE = 'ast_indexer' DECORATOR_KEY = 'decorators' @@ -305,9 +308,11 @@ class AstScaning(object): output = [functions[0]] if len(args_list) == 0 and len(keyword_list) == 0: - args_list.append(None) + args_list.append(default_group) if len(keyword_list) == 0 and len(args_list) == 1: args_list.append(None) + if len(keyword_list) == 1 and len(args_list) == 0: + args_list.append(default_group) args_list.extend(keyword_list) @@ -318,6 +323,8 @@ class AstScaning(object): # the case (default_group) elif item[1] is None: output.append(item[0]) + elif isinstance(item, str): + output.append(item) else: output.append('.'.join(item)) return (output[0], self._get_registry_value(output[1]), @@ -443,9 +450,11 @@ class FilesAstScaning(object): try: output = self.astScaner.generate_ast(file) except Exception as e: + detail = traceback.extract_tb(e.__traceback__) raise Exception( - 'During ast indexing, there are index errors in the ' - f'file {file} : {type(e).__name__}.{e}') + f'During ast indexing, error is in the file {detail[-1].filename}' + f' line: {detail[-1].lineno}: "{detail[-1].line}" with error msg: ' + f'"{type(e).__name__}: {e}"') import_list = self.parse_import(output) return output[DECORATOR_KEY], import_list @@ -523,14 +532,14 @@ class FilesAstScaning(object): return md5.hexdigest() -fileScaner = FilesAstScaning() +file_scanner = FilesAstScaning() def _save_index(index, file_path): # convert tuple key to str key index[INDEX_KEY] = {str(k): v for k, v in index[INDEX_KEY].items()} index[VERSION_KEY] = __version__ - index[MD5_KEY] = fileScaner.files_mtime_md5() + index[MD5_KEY] = file_scanner.files_mtime_md5() json_index = json.dumps(index) storage.write(json_index.encode(), file_path) index[INDEX_KEY] = { @@ -579,7 +588,7 @@ def load_index(force_rebuild=False): index = None if not force_rebuild and os.path.exists(file_path): wrapped_index = _load_index(file_path) - md5 = fileScaner.files_mtime_md5() + md5 = file_scanner.files_mtime_md5() if (wrapped_index[VERSION_KEY] == __version__ and wrapped_index[MD5_KEY] == md5): index = wrapped_index @@ -591,7 +600,7 @@ def load_index(force_rebuild=False): logger.info( f'No valid ast index found from {file_path}, rebuilding ast index!' ) - index = fileScaner.get_files_scan_results() + index = file_scanner.get_files_scan_results() _save_index(index, file_path) return index diff --git a/requirements/multi-modal.txt b/requirements/multi-modal.txt index d16f2f26..5bc7abd5 100644 --- a/requirements/multi-modal.txt +++ b/requirements/multi-modal.txt @@ -7,4 +7,6 @@ pycocotools>=2.0.4 # which introduced compatability issues that are being investigated rouge_score<=0.0.4 timm +tokenizers torchvision +transformers>=4.12.0 diff --git a/requirements/nlp.txt b/requirements/nlp.txt index c69174fe..9bc543d7 100644 --- a/requirements/nlp.txt +++ b/requirements/nlp.txt @@ -6,3 +6,5 @@ pai-easynlp rouge_score<=0.0.4 seqeval spacy>=2.3.5 +tokenizers +transformers>=4.12.0 diff --git a/requirements/runtime.txt b/requirements/runtime.txt index 5675f031..e2b78f06 100644 --- a/requirements/runtime.txt +++ b/requirements/runtime.txt @@ -13,7 +13,5 @@ requests scipy setuptools tensorboard -tokenizers tqdm>=4.64.0 -transformers>=4.12.0 yapf diff --git a/tests/trainers/hooks/logger/test_tensorboard_hook.py b/tests/trainers/hooks/logger/test_tensorboard_hook.py index 6c08f160..54c31056 100644 --- a/tests/trainers/hooks/logger/test_tensorboard_hook.py +++ b/tests/trainers/hooks/logger/test_tensorboard_hook.py @@ -10,6 +10,7 @@ import numpy as np import torch from torch import nn +from modelscope.metainfo import Trainers from modelscope.trainers import build_trainer from modelscope.utils.constant import LogKeys, ModelFile from modelscope.utils.test_utils import create_dummy_test_dataset @@ -73,7 +74,7 @@ class TensorboardHookTest(unittest.TestCase): with open(config_path, 'w') as f: json.dump(json_cfg, f) - trainer_name = 'EpochBasedTrainer' + trainer_name = Trainers.default kwargs = dict( cfg_file=config_path, model=DummyModel(), diff --git a/tests/trainers/hooks/test_checkpoint_hook.py b/tests/trainers/hooks/test_checkpoint_hook.py index 8dbd0130..1c81d057 100644 --- a/tests/trainers/hooks/test_checkpoint_hook.py +++ b/tests/trainers/hooks/test_checkpoint_hook.py @@ -9,6 +9,7 @@ import numpy as np import torch from torch import nn +from modelscope.metainfo import Trainers from modelscope.metrics.builder import METRICS, MetricKeys from modelscope.trainers import build_trainer from modelscope.utils.constant import LogKeys, ModelFile @@ -108,7 +109,7 @@ class CheckpointHookTest(unittest.TestCase): with open(config_path, 'w') as f: json.dump(json_cfg, f) - trainer_name = 'EpochBasedTrainer' + trainer_name = Trainers.default kwargs = dict( cfg_file=config_path, model=DummyModel(), @@ -179,7 +180,7 @@ class BestCkptSaverHookTest(unittest.TestCase): with open(config_path, 'w') as f: json.dump(json_cfg, f) - trainer_name = 'EpochBasedTrainer' + trainer_name = Trainers.default kwargs = dict( cfg_file=config_path, model=DummyModel(), diff --git a/tests/trainers/hooks/test_evaluation_hook.py b/tests/trainers/hooks/test_evaluation_hook.py index 2927db47..9e65f127 100644 --- a/tests/trainers/hooks/test_evaluation_hook.py +++ b/tests/trainers/hooks/test_evaluation_hook.py @@ -9,6 +9,7 @@ import numpy as np import torch from torch import nn +from modelscope.metainfo import Trainers from modelscope.metrics.builder import METRICS, MetricKeys from modelscope.trainers import build_trainer from modelscope.utils.constant import LogKeys, ModelFile @@ -97,7 +98,7 @@ class EvaluationHookTest(unittest.TestCase): with open(config_path, 'w') as f: json.dump(json_cfg, f) - trainer_name = 'EpochBasedTrainer' + trainer_name = Trainers.default kwargs = dict( cfg_file=config_path, model=DummyModel(), diff --git a/tests/trainers/hooks/test_lr_scheduler_hook.py b/tests/trainers/hooks/test_lr_scheduler_hook.py index 7e057ff0..eb30fb52 100644 --- a/tests/trainers/hooks/test_lr_scheduler_hook.py +++ b/tests/trainers/hooks/test_lr_scheduler_hook.py @@ -11,6 +11,7 @@ from torch import nn from torch.optim import SGD from torch.optim.lr_scheduler import MultiStepLR, ReduceLROnPlateau +from modelscope.metainfo import Trainers from modelscope.metrics.builder import METRICS, MetricKeys from modelscope.trainers import build_trainer from modelscope.utils.constant import LogKeys, ModelFile, TrainerStages @@ -89,7 +90,7 @@ class LrSchedulerHookTest(unittest.TestCase): model = DummyModel() optimizer = SGD(model.parameters(), lr=0.01) lr_scheduler = MultiStepLR(optimizer, milestones=[2, 4]) - trainer_name = 'EpochBasedTrainer' + trainer_name = Trainers.default kwargs = dict( cfg_file=config_path, model=model, @@ -161,7 +162,7 @@ class LrSchedulerHookTest(unittest.TestCase): model = DummyModel() # optimmizer = SGD(model.parameters(), lr=0.01) # lr_scheduler = MultiStepLR(optimmizer, milestones=[2, 4]) - trainer_name = 'EpochBasedTrainer' + trainer_name = Trainers.default kwargs = dict( cfg_file=config_path, model=model, @@ -258,7 +259,7 @@ class PlateauLrSchedulerHookTest(unittest.TestCase): model = DummyModel() optimizer = SGD(model.parameters(), lr=0.01) - trainer_name = 'EpochBasedTrainer' + trainer_name = Trainers.default kwargs = dict( cfg_file=config_path, model=model, diff --git a/tests/trainers/hooks/test_optimizer_hook.py b/tests/trainers/hooks/test_optimizer_hook.py index 42d45619..62c70632 100644 --- a/tests/trainers/hooks/test_optimizer_hook.py +++ b/tests/trainers/hooks/test_optimizer_hook.py @@ -11,6 +11,7 @@ from torch import nn from torch.optim import SGD from torch.optim.lr_scheduler import MultiStepLR +from modelscope.metainfo import Trainers from modelscope.trainers import build_trainer from modelscope.utils.constant import ModelFile, TrainerStages from modelscope.utils.test_utils import create_dummy_test_dataset @@ -64,7 +65,7 @@ class OptimizerHookTest(unittest.TestCase): model = DummyModel() optimizer = SGD(model.parameters(), lr=0.01) lr_scheduler = MultiStepLR(optimizer, milestones=[1, 2]) - trainer_name = 'EpochBasedTrainer' + trainer_name = Trainers.default kwargs = dict( cfg_file=config_path, model=model, @@ -130,7 +131,7 @@ class TorchAMPOptimizerHookTest(unittest.TestCase): model = DummyModel().cuda() optimizer = SGD(model.parameters(), lr=0.01) lr_scheduler = MultiStepLR(optimizer, milestones=[1, 2]) - trainer_name = 'EpochBasedTrainer' + trainer_name = Trainers.default kwargs = dict( cfg_file=config_path, model=model, diff --git a/tests/trainers/hooks/test_timer_hook.py b/tests/trainers/hooks/test_timer_hook.py index d92b5f89..6f24809b 100644 --- a/tests/trainers/hooks/test_timer_hook.py +++ b/tests/trainers/hooks/test_timer_hook.py @@ -11,6 +11,7 @@ from torch import nn from torch.optim import SGD from torch.optim.lr_scheduler import MultiStepLR +from modelscope.metainfo import Trainers from modelscope.trainers import build_trainer from modelscope.utils.constant import LogKeys, ModelFile, TrainerStages from modelscope.utils.test_utils import create_dummy_test_dataset @@ -68,7 +69,7 @@ class IterTimerHookTest(unittest.TestCase): model = DummyModel() optimizer = SGD(model.parameters(), lr=0.01) lr_scheduler = MultiStepLR(optimizer, milestones=[2, 4]) - trainer_name = 'EpochBasedTrainer' + trainer_name = Trainers.default kwargs = dict( cfg_file=config_path, model=model, diff --git a/tests/trainers/test_finetune_sequence_classification.py b/tests/trainers/test_finetune_sequence_classification.py index 8e147f92..12c7da77 100644 --- a/tests/trainers/test_finetune_sequence_classification.py +++ b/tests/trainers/test_finetune_sequence_classification.py @@ -4,6 +4,7 @@ import shutil import tempfile import unittest +from modelscope.metainfo import Trainers from modelscope.trainers import build_trainer @@ -23,7 +24,7 @@ class TestFinetuneSequenceClassification(unittest.TestCase): model_id, train_dataset, eval_dataset, - name='NlpEpochBasedTrainer', + name=Trainers.nlp_base_trainer, cfg_modify_fn=None, **kwargs): kwargs = dict( @@ -236,7 +237,7 @@ class TestFinetuneSequenceClassification(unittest.TestCase): 'damo/nlp_veco_fill-mask-large', train_datasets, eval_datasets, - name='VecoTrainer', + name=Trainers.nlp_veco_trainer, cfg_modify_fn=cfg_modify_fn) diff --git a/tests/trainers/test_finetune_token_classificatin.py b/tests/trainers/test_finetune_token_classificatin.py index 7449bc69..0348bef5 100644 --- a/tests/trainers/test_finetune_token_classificatin.py +++ b/tests/trainers/test_finetune_token_classificatin.py @@ -5,6 +5,7 @@ import tempfile import unittest from functools import reduce +from modelscope.metainfo import Trainers from modelscope.trainers import build_trainer from modelscope.utils.test_utils import test_level @@ -25,7 +26,7 @@ class TestFinetuneTokenClassification(unittest.TestCase): model_id, train_dataset, eval_dataset, - name='NlpEpochBasedTrainer', + name=Trainers.nlp_base_trainer, cfg_modify_fn=None, **kwargs): kwargs = dict( diff --git a/tests/trainers/test_image_instance_segmentation_trainer.py b/tests/trainers/test_image_instance_segmentation_trainer.py index 6c9f031f..35d0378f 100644 --- a/tests/trainers/test_image_instance_segmentation_trainer.py +++ b/tests/trainers/test_image_instance_segmentation_trainer.py @@ -7,6 +7,7 @@ import zipfile from functools import partial from modelscope.hub.snapshot_download import snapshot_download +from modelscope.metainfo import Trainers from modelscope.models.cv.image_instance_segmentation import ( CascadeMaskRCNNSwinModel, ImageInstanceSegmentationCocoDataset) from modelscope.trainers import build_trainer @@ -79,7 +80,7 @@ class TestImageInstanceSegmentationTrainer(unittest.TestCase): work_dir=self.tmp_dir) trainer = build_trainer( - name='image-instance-segmentation', default_args=kwargs) + name=Trainers.image_instance_segmentation, default_args=kwargs) trainer.train() results_files = os.listdir(self.tmp_dir) self.assertIn(f'{trainer.timestamp}.log.json', results_files) @@ -103,7 +104,7 @@ class TestImageInstanceSegmentationTrainer(unittest.TestCase): work_dir=self.tmp_dir) trainer = build_trainer( - name='image-instance-segmentation', default_args=kwargs) + name=Trainers.image_instance_segmentation, default_args=kwargs) trainer.train() results_files = os.listdir(self.tmp_dir) self.assertIn(f'{trainer.timestamp}.log.json', results_files) diff --git a/tests/trainers/test_image_portrait_enhancement_trainer.py b/tests/trainers/test_image_portrait_enhancement_trainer.py index 3de78347..dc450ff0 100644 --- a/tests/trainers/test_image_portrait_enhancement_trainer.py +++ b/tests/trainers/test_image_portrait_enhancement_trainer.py @@ -11,6 +11,7 @@ import torch from torch.utils import data as data from modelscope.hub.snapshot_download import snapshot_download +from modelscope.metainfo import Trainers from modelscope.models.cv.image_portrait_enhancement import \ ImagePortraitEnhancement from modelscope.trainers import build_trainer @@ -91,7 +92,8 @@ class TestImagePortraitEnhancementTrainer(unittest.TestCase): device='gpu', work_dir=self.tmp_dir) - trainer = build_trainer(name='gpen', default_args=kwargs) + trainer = build_trainer( + name=Trainers.image_portrait_enhancement, default_args=kwargs) trainer.train() @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') @@ -111,7 +113,8 @@ class TestImagePortraitEnhancementTrainer(unittest.TestCase): max_epochs=2, work_dir=self.tmp_dir) - trainer = build_trainer(name='gpen', default_args=kwargs) + trainer = build_trainer( + name=Trainers.image_portrait_enhancement, default_args=kwargs) trainer.train() diff --git a/tests/trainers/test_text_generation_trainer.py b/tests/trainers/test_text_generation_trainer.py index 8921ecfa..a60bc903 100644 --- a/tests/trainers/test_text_generation_trainer.py +++ b/tests/trainers/test_text_generation_trainer.py @@ -5,6 +5,7 @@ import tempfile import unittest from modelscope.hub.snapshot_download import snapshot_download +from modelscope.metainfo import Trainers from modelscope.models.nlp.palm_v2 import PalmForTextGeneration from modelscope.msdatasets import MsDataset from modelscope.trainers import build_trainer @@ -57,7 +58,7 @@ class TestTextGenerationTrainer(unittest.TestCase): work_dir=self.tmp_dir) trainer = build_trainer( - name='NlpEpochBasedTrainer', default_args=kwargs) + name=Trainers.nlp_base_trainer, default_args=kwargs) trainer.train() results_files = os.listdir(self.tmp_dir) self.assertIn(f'{trainer.timestamp}.log.json', results_files) @@ -122,7 +123,7 @@ class TestTextGenerationTrainer(unittest.TestCase): cfg_modify_fn=cfg_modify_fn, model_revision='beta') trainer = build_trainer( - name='NlpEpochBasedTrainer', default_args=kwargs) + name=Trainers.nlp_base_trainer, default_args=kwargs) trainer.train() diff --git a/tests/trainers/test_trainer.py b/tests/trainers/test_trainer.py index 9d4e79df..03b13674 100644 --- a/tests/trainers/test_trainer.py +++ b/tests/trainers/test_trainer.py @@ -13,6 +13,7 @@ from torch import nn from torch.optim import SGD from torch.optim.lr_scheduler import StepLR +from modelscope.metainfo import Trainers from modelscope.metrics.builder import MetricKeys from modelscope.msdatasets import MsDataset from modelscope.trainers import build_trainer @@ -101,14 +102,14 @@ class TrainerTest(unittest.TestCase): 'workers_per_gpu': 1, 'shuffle': False }, - 'metrics': ['seq_cls_metric'] + 'metrics': ['seq-cls-metric'] } } config_path = os.path.join(self.tmp_dir, ModelFile.CONFIGURATION) with open(config_path, 'w') as f: json.dump(json_cfg, f) - trainer_name = 'EpochBasedTrainer' + trainer_name = Trainers.default kwargs = dict( cfg_file=config_path, model=DummyModel(), @@ -155,7 +156,7 @@ class TrainerTest(unittest.TestCase): 'workers_per_gpu': 1, 'shuffle': False }, - 'metrics': ['seq_cls_metric'] + 'metrics': ['seq-cls-metric'] } } @@ -166,7 +167,7 @@ class TrainerTest(unittest.TestCase): model = DummyModel() optimmizer = SGD(model.parameters(), lr=0.01) lr_scheduler = StepLR(optimmizer, 2) - trainer_name = 'EpochBasedTrainer' + trainer_name = Trainers.default kwargs = dict( cfg_file=config_path, model=model, @@ -205,7 +206,7 @@ class TrainerTest(unittest.TestCase): 'workers_per_gpu': 1, 'shuffle': False }, - 'metrics': ['seq_cls_metric'] + 'metrics': ['seq-cls-metric'] } } @@ -216,7 +217,7 @@ class TrainerTest(unittest.TestCase): model = DummyModel() optimmizer = SGD(model.parameters(), lr=0.01) lr_scheduler = StepLR(optimmizer, 2) - trainer_name = 'EpochBasedTrainer' + trainer_name = Trainers.default kwargs = dict( cfg_file=config_path, model=model, diff --git a/tests/trainers/test_trainer_gpu.py b/tests/trainers/test_trainer_gpu.py index 3a36dc02..6502a68d 100644 --- a/tests/trainers/test_trainer_gpu.py +++ b/tests/trainers/test_trainer_gpu.py @@ -12,8 +12,9 @@ from torch import nn from torch.optim import SGD from torch.optim.lr_scheduler import StepLR +from modelscope.metainfo import Trainers from modelscope.metrics.builder import MetricKeys -from modelscope.trainers import build_trainer +from modelscope.trainers import EpochBasedTrainer, build_trainer from modelscope.utils.constant import LogKeys, ModeKeys, ModelFile from modelscope.utils.test_utils import (DistributedTestCase, create_dummy_test_dataset, test_level) @@ -70,7 +71,7 @@ def train_func(work_dir, dist=False): model = DummyModel() optimmizer = SGD(model.parameters(), lr=0.01) lr_scheduler = StepLR(optimmizer, 2) - trainer_name = 'EpochBasedTrainer' + trainer_name = Trainers.default kwargs = dict( cfg_file=config_path, model=model,