| @@ -145,11 +145,20 @@ class Trainers(object): | |||
| For a model specific Trainer, you can use ${ModelName}-${Task}-trainer. | |||
| """ | |||
| default = 'Trainer' | |||
| default = 'trainer' | |||
| # multi-modal tasks | |||
| # multi-modal trainers | |||
| clip_multi_modal_embedding = 'clip-multi-modal-embedding' | |||
| # cv trainers | |||
| image_instance_segmentation = 'image-instance-segmentation' | |||
| image_portrait_enhancement = 'image-portrait-enhancement' | |||
| # nlp trainers | |||
| bert_sentiment_analysis = 'bert-sentiment-analysis' | |||
| nlp_base_trainer = 'nlp-base-trainer' | |||
| nlp_veco_trainer = 'nlp-veco-trainer' | |||
| class Preprocessors(object): | |||
| """ Names for different preprocessor. | |||
| @@ -219,3 +228,52 @@ class Metrics(object): | |||
| image_color_enhance_metric = 'image-color-enhance-metric' | |||
| # metrics for image-portrait-enhancement task | |||
| image_portrait_enhancement_metric = 'image-portrait-enhancement-metric' | |||
| class Optimizers(object): | |||
| """ Names for different OPTIMIZER. | |||
| Holds the standard optimizer name to use for identifying different optimizer. | |||
| This should be used to register optimizer. | |||
| """ | |||
| default = 'optimizer' | |||
| SGD = 'SGD' | |||
| class Hooks(object): | |||
| """ Names for different hooks. | |||
| All kinds of hooks are defined here | |||
| """ | |||
| # lr | |||
| LrSchedulerHook = 'LrSchedulerHook' | |||
| PlateauLrSchedulerHook = 'PlateauLrSchedulerHook' | |||
| NoneLrSchedulerHook = 'NoneLrSchedulerHook' | |||
| # optimizer | |||
| OptimizerHook = 'OptimizerHook' | |||
| TorchAMPOptimizerHook = 'TorchAMPOptimizerHook' | |||
| ApexAMPOptimizerHook = 'ApexAMPOptimizerHook' | |||
| NoneOptimizerHook = 'NoneOptimizerHook' | |||
| # checkpoint | |||
| CheckpointHook = 'CheckpointHook' | |||
| BestCkptSaverHook = 'BestCkptSaverHook' | |||
| # logger | |||
| TextLoggerHook = 'TextLoggerHook' | |||
| TensorboardHook = 'TensorboardHook' | |||
| IterTimerHook = 'IterTimerHook' | |||
| EvaluationHook = 'EvaluationHook' | |||
| class LR_Schedulers(object): | |||
| """learning rate scheduler is defined here | |||
| """ | |||
| LinearWarmup = 'LinearWarmup' | |||
| ConstantWarmup = 'ConstantWarmup' | |||
| ExponentialWarmup = 'ExponentialWarmup' | |||
| @@ -1,8 +1,38 @@ | |||
| from .base import DummyTrainer | |||
| from .builder import build_trainer | |||
| from .cv import (ImageInstanceSegmentationTrainer, | |||
| ImagePortraitEnhancementTrainer) | |||
| from .multi_modal import CLIPTrainer | |||
| from .nlp import SequenceClassificationTrainer | |||
| from .nlp_trainer import NlpEpochBasedTrainer, VecoTrainer | |||
| from .trainer import EpochBasedTrainer | |||
| # Copyright (c) Alibaba, Inc. and its affiliates. | |||
| from typing import TYPE_CHECKING | |||
| from modelscope.utils.import_utils import LazyImportModule | |||
| if TYPE_CHECKING: | |||
| from .base import DummyTrainer | |||
| from .builder import build_trainer | |||
| from .cv import (ImageInstanceSegmentationTrainer, | |||
| ImagePortraitEnhancementTrainer) | |||
| from .multi_modal import CLIPTrainer | |||
| from .nlp import SequenceClassificationTrainer | |||
| from .nlp_trainer import NlpEpochBasedTrainer, VecoTrainer | |||
| from .trainer import EpochBasedTrainer | |||
| else: | |||
| _import_structure = { | |||
| 'base': ['DummyTrainer'], | |||
| 'builder': ['build_trainer'], | |||
| 'cv': [ | |||
| 'ImageInstanceSegmentationTrainer', | |||
| 'ImagePortraitEnhancementTrainer' | |||
| ], | |||
| 'multi_modal': ['CLIPTrainer'], | |||
| 'nlp': ['SequenceClassificationTrainer'], | |||
| 'nlp_trainer': ['NlpEpochBasedTrainer', 'VecoTrainer'], | |||
| 'trainer': ['EpochBasedTrainer'] | |||
| } | |||
| import sys | |||
| sys.modules[__name__] = LazyImportModule( | |||
| __name__, | |||
| globals()['__file__'], | |||
| _import_structure, | |||
| module_spec=__spec__, | |||
| extra_objects={}, | |||
| ) | |||
| @@ -1,5 +1,5 @@ | |||
| # Copyright (c) Alibaba, Inc. and its affiliates. | |||
| from modelscope.metainfo import Trainers | |||
| from modelscope.utils.config import ConfigDict | |||
| from modelscope.utils.constant import Tasks | |||
| from modelscope.utils.registry import Registry, build_from_cfg | |||
| @@ -8,7 +8,7 @@ TRAINERS = Registry('trainers') | |||
| HOOKS = Registry('hooks') | |||
| def build_trainer(name: str = 'EpochBasedTrainer', default_args: dict = None): | |||
| def build_trainer(name: str = Trainers.default, default_args: dict = None): | |||
| """ build trainer given a trainer name | |||
| Args: | |||
| @@ -1,3 +1,27 @@ | |||
| from .image_instance_segmentation_trainer import \ | |||
| ImageInstanceSegmentationTrainer | |||
| from .image_portrait_enhancement_trainer import ImagePortraitEnhancementTrainer | |||
| # Copyright (c) Alibaba, Inc. and its affiliates. | |||
| from typing import TYPE_CHECKING | |||
| from modelscope.utils.import_utils import LazyImportModule | |||
| if TYPE_CHECKING: | |||
| from .image_instance_segmentation_trainer import \ | |||
| ImageInstanceSegmentationTrainer | |||
| from .image_portrait_enhancement_trainer import ImagePortraitEnhancementTrainer | |||
| else: | |||
| _import_structure = { | |||
| 'image_instance_segmentation_trainer': | |||
| ['ImageInstanceSegmentationTrainer'], | |||
| 'image_portrait_enhancement_trainer': | |||
| ['ImagePortraitEnhancementTrainer'], | |||
| } | |||
| import sys | |||
| sys.modules[__name__] = LazyImportModule( | |||
| __name__, | |||
| globals()['__file__'], | |||
| _import_structure, | |||
| module_spec=__spec__, | |||
| extra_objects={}, | |||
| ) | |||
| @@ -1,8 +1,9 @@ | |||
| from modelscope.metainfo import Trainers | |||
| from modelscope.trainers.builder import TRAINERS | |||
| from modelscope.trainers.trainer import EpochBasedTrainer | |||
| @TRAINERS.register_module(module_name='image-instance-segmentation') | |||
| @TRAINERS.register_module(module_name=Trainers.image_instance_segmentation) | |||
| class ImageInstanceSegmentationTrainer(EpochBasedTrainer): | |||
| def __init__(self, *args, **kwargs): | |||
| @@ -4,6 +4,7 @@ from collections.abc import Mapping | |||
| import torch | |||
| from torch import distributed as dist | |||
| from modelscope.metainfo import Trainers | |||
| from modelscope.trainers.builder import TRAINERS | |||
| from modelscope.trainers.optimizer.builder import build_optimizer | |||
| from modelscope.trainers.trainer import EpochBasedTrainer | |||
| @@ -11,7 +12,7 @@ from modelscope.utils.constant import ModeKeys | |||
| from modelscope.utils.logger import get_logger | |||
| @TRAINERS.register_module(module_name='gpen') | |||
| @TRAINERS.register_module(module_name=Trainers.image_portrait_enhancement) | |||
| class ImagePortraitEnhancementTrainer(EpochBasedTrainer): | |||
| def train_step(self, model, inputs): | |||
| @@ -1,18 +1,42 @@ | |||
| # Copyright (c) Alibaba, Inc. and its affiliates. | |||
| from .builder import HOOKS, build_hook | |||
| from .checkpoint_hook import BestCkptSaverHook, CheckpointHook | |||
| from .evaluation_hook import EvaluationHook | |||
| from .hook import Hook | |||
| from .iter_timer_hook import IterTimerHook | |||
| from .logger.text_logger_hook import TextLoggerHook | |||
| from .lr_scheduler_hook import LrSchedulerHook | |||
| from .optimizer_hook import (ApexAMPOptimizerHook, OptimizerHook, | |||
| TorchAMPOptimizerHook) | |||
| from .priority import Priority | |||
| from typing import TYPE_CHECKING | |||
| __all__ = [ | |||
| 'Hook', 'HOOKS', 'CheckpointHook', 'EvaluationHook', 'LrSchedulerHook', | |||
| 'OptimizerHook', 'Priority', 'build_hook', 'TextLoggerHook', | |||
| 'IterTimerHook', 'TorchAMPOptimizerHook', 'ApexAMPOptimizerHook', | |||
| 'BestCkptSaverHook', 'NoneOptimizerHook', 'NoneLrSchedulerHook' | |||
| ] | |||
| from modelscope.utils.import_utils import LazyImportModule | |||
| if TYPE_CHECKING: | |||
| from .builder import HOOKS, build_hook | |||
| from .checkpoint_hook import BestCkptSaverHook, CheckpointHook | |||
| from .evaluation_hook import EvaluationHook | |||
| from .hook import Hook | |||
| from .iter_timer_hook import IterTimerHook | |||
| from .logger import TextLoggerHook, TensorboardHook | |||
| from .lr_scheduler_hook import LrSchedulerHook | |||
| from .optimizer import (ApexAMPOptimizerHook, NoneOptimizerHook, | |||
| OptimizerHook, TorchAMPOptimizerHook) | |||
| from .priority import Priority, get_priority | |||
| else: | |||
| _import_structure = { | |||
| 'builder': ['HOOKS', 'build_hook'], | |||
| 'checkpoint_hook': ['BestCkptSaverHook', 'CheckpointHook'], | |||
| 'evaluation_hook': ['EvaluationHook'], | |||
| 'hook': ['Hook'], | |||
| 'iter_timer_hook': ['IterTimerHook'], | |||
| 'logger': ['TensorboardHook', 'TextLoggerHook'], | |||
| 'lr_scheduler_hook': ['LrSchedulerHook'], | |||
| 'optimizer_hook': [ | |||
| 'ApexAMPOptimizerHook', 'NoneOptimizerHook', 'OptimizerHook', | |||
| 'TorchAMPOptimizerHook' | |||
| ], | |||
| 'priority': ['Priority', 'get'] | |||
| } | |||
| import sys | |||
| sys.modules[__name__] = LazyImportModule( | |||
| __name__, | |||
| globals()['__file__'], | |||
| _import_structure, | |||
| module_spec=__spec__, | |||
| extra_objects={}, | |||
| ) | |||
| @@ -2,6 +2,7 @@ | |||
| import os | |||
| from modelscope import __version__ | |||
| from modelscope.metainfo import Hooks | |||
| from modelscope.utils.checkpoint import save_checkpoint | |||
| from modelscope.utils.constant import LogKeys | |||
| from modelscope.utils.logger import get_logger | |||
| @@ -11,7 +12,7 @@ from .hook import Hook | |||
| from .priority import Priority | |||
| @HOOKS.register_module() | |||
| @HOOKS.register_module(module_name=Hooks.CheckpointHook) | |||
| class CheckpointHook(Hook): | |||
| """Save checkpoints periodically. | |||
| @@ -98,7 +99,7 @@ class CheckpointHook(Hook): | |||
| return False | |||
| @HOOKS.register_module() | |||
| @HOOKS.register_module(module_name=Hooks.BestCkptSaverHook) | |||
| class BestCkptSaverHook(CheckpointHook): | |||
| """Save best checkpoints hook. | |||
| Args: | |||
| @@ -1,9 +1,10 @@ | |||
| # Copyright (c) Alibaba, Inc. and its affiliates. | |||
| from modelscope.metainfo import Hooks | |||
| from .builder import HOOKS | |||
| from .hook import Hook | |||
| @HOOKS.register_module() | |||
| @HOOKS.register_module(module_name=Hooks.EvaluationHook) | |||
| class EvaluationHook(Hook): | |||
| """Evaluation hook. | |||
| Args: | |||
| @@ -1,13 +1,14 @@ | |||
| # Copyright (c) Alibaba, Inc. and its affiliates. | |||
| import time | |||
| from modelscope.metainfo import Hooks | |||
| from modelscope.utils.constant import LogKeys | |||
| from .builder import HOOKS | |||
| from .hook import Hook | |||
| from .priority import Priority | |||
| @HOOKS.register_module() | |||
| @HOOKS.register_module(module_name=Hooks.IterTimerHook) | |||
| class IterTimerHook(Hook): | |||
| PRIORITY = Priority.LOW | |||
| @@ -1,7 +1,27 @@ | |||
| # Copyright (c) Alibaba, Inc. and its affiliates. | |||
| from typing import TYPE_CHECKING | |||
| from modelscope.trainers.utils.log_buffer import LogBuffer | |||
| from .base import LoggerHook | |||
| from .tensorboard_hook import TensorboardHook | |||
| from .text_logger_hook import TextLoggerHook | |||
| from modelscope.utils.import_utils import LazyImportModule | |||
| if TYPE_CHECKING: | |||
| from .base import LoggerHook | |||
| from .tensorboard_hook import TensorboardHook | |||
| from .text_logger_hook import TextLoggerHook | |||
| else: | |||
| _import_structure = { | |||
| 'base': ['LoggerHook'], | |||
| 'tensorboard_hook': ['TensorboardHook'], | |||
| 'text_logger_hook': ['TextLoggerHook'] | |||
| } | |||
| import sys | |||
| __all__ = ['TextLoggerHook', 'LoggerHook', 'LogBuffer', 'TensorboardHook'] | |||
| sys.modules[__name__] = LazyImportModule( | |||
| __name__, | |||
| globals()['__file__'], | |||
| _import_structure, | |||
| module_spec=__spec__, | |||
| extra_objects={}, | |||
| ) | |||
| @@ -1,13 +1,14 @@ | |||
| # Copyright (c) Alibaba, Inc. and its affiliates. | |||
| import os | |||
| from modelscope.metainfo import Hooks | |||
| from modelscope.trainers.hooks.builder import HOOKS | |||
| from modelscope.utils.constant import LogKeys | |||
| from modelscope.utils.torch_utils import master_only | |||
| from .base import LoggerHook | |||
| @HOOKS.register_module() | |||
| @HOOKS.register_module(module_name=Hooks.TensorboardHook) | |||
| class TensorboardHook(LoggerHook): | |||
| """TensorBoard hook for visualization. | |||
| Args: | |||
| @@ -8,13 +8,14 @@ import json | |||
| import torch | |||
| from torch import distributed as dist | |||
| from modelscope.metainfo import Hooks | |||
| from modelscope.trainers.hooks.builder import HOOKS | |||
| from modelscope.trainers.hooks.logger.base import LoggerHook | |||
| from modelscope.utils.constant import LogKeys, ModeKeys | |||
| from modelscope.utils.torch_utils import get_dist_info, is_master | |||
| @HOOKS.register_module() | |||
| @HOOKS.register_module(module_name=Hooks.TextLoggerHook) | |||
| class TextLoggerHook(LoggerHook): | |||
| """Logger hook in text, Output log to both console and local json file. | |||
| @@ -1,4 +1,5 @@ | |||
| # Copyright (c) Alibaba, Inc. and its affiliates. | |||
| from modelscope.metainfo import Hooks | |||
| from modelscope.trainers.lrscheduler.builder import build_lr_scheduler | |||
| from modelscope.utils.constant import LogKeys | |||
| from modelscope.utils.logger import get_logger | |||
| @@ -8,7 +9,7 @@ from .hook import Hook | |||
| from .priority import Priority | |||
| @HOOKS.register_module() | |||
| @HOOKS.register_module(module_name=Hooks.LrSchedulerHook) | |||
| class LrSchedulerHook(Hook): | |||
| """Lr scheduler. | |||
| @@ -78,7 +79,7 @@ class LrSchedulerHook(Hook): | |||
| return lr | |||
| @HOOKS.register_module() | |||
| @HOOKS.register_module(module_name=Hooks.PlateauLrSchedulerHook) | |||
| class PlateauLrSchedulerHook(LrSchedulerHook): | |||
| """Lr scheduler hook for `ReduceLROnPlateau`. | |||
| @@ -119,7 +120,7 @@ class PlateauLrSchedulerHook(LrSchedulerHook): | |||
| trainer.lr_scheduler.step(metrics=metrics) | |||
| @HOOKS.register_module() | |||
| @HOOKS.register_module(module_name=Hooks.NoneLrSchedulerHook) | |||
| class NoneLrSchedulerHook(LrSchedulerHook): | |||
| PRIORITY = Priority.LOW # should be after EvaluationHook | |||
| @@ -0,0 +1,26 @@ | |||
| # Copyright (c) Alibaba, Inc. and its affiliates. | |||
| from typing import TYPE_CHECKING | |||
| from modelscope.utils.import_utils import LazyImportModule | |||
| if TYPE_CHECKING: | |||
| from .apex_optimizer_hook import ApexAMPOptimizerHook | |||
| from .base import OptimizerHook, NoneOptimizerHook | |||
| from .torch_optimizer_hook import TorchAMPOptimizerHook | |||
| else: | |||
| _import_structure = { | |||
| 'apex_optimizer_hook': ['ApexAMPOptimizerHook'], | |||
| 'base': ['OptimizerHook', 'NoneOptimizerHook'], | |||
| 'torch_optimizer_hook': ['TorchAMPOptimizerHook'] | |||
| } | |||
| import sys | |||
| sys.modules[__name__] = LazyImportModule( | |||
| __name__, | |||
| globals()['__file__'], | |||
| _import_structure, | |||
| module_spec=__spec__, | |||
| extra_objects={}, | |||
| ) | |||
| @@ -0,0 +1,75 @@ | |||
| # Copyright (c) Alibaba, Inc. and its affiliates. | |||
| import logging | |||
| from modelscope.metainfo import Hooks | |||
| from modelscope.trainers.hooks.builder import HOOKS | |||
| from .base import OptimizerHook | |||
| @HOOKS.register_module(module_name=Hooks.ApexAMPOptimizerHook) | |||
| class ApexAMPOptimizerHook(OptimizerHook): | |||
| """Fp16 optimizer, if torch version is less than 1.6.0, | |||
| you must install apex (https://www.github.com/nvidia/apex) else use torch.cuda.amp by default | |||
| Args: | |||
| cumulative_iters (int): interval of gradients accumulation. Default: 1 | |||
| grad_clip (dict): Default None. Containing keys: | |||
| max_norm (float or int): max norm of the gradients | |||
| norm_type (float or int): type of the used p-norm. Can be ``'inf'`` for infinity norm. | |||
| More details please refer to `torch.nn.utils.clip_grad.clip_grad_norm_` | |||
| loss_keys (str | list): keys list of loss | |||
| opt_level (str): "O0" and "O3" are not true mixed precision, | |||
| but they are useful for establishing accuracy and speed baselines, respectively. | |||
| "O1" and "O2" are different implementations of mixed precision. | |||
| Try both, and see what gives the best speedup and accuracy for your model. | |||
| """ | |||
| def __init__(self, | |||
| cumulative_iters=1, | |||
| grad_clip=None, | |||
| loss_keys='loss', | |||
| opt_level='O1'): | |||
| super(ApexAMPOptimizerHook, self).__init__( | |||
| grad_clip=grad_clip, loss_keys=loss_keys) | |||
| self.cumulative_iters = cumulative_iters | |||
| self.opt_level = opt_level | |||
| try: | |||
| from apex import amp | |||
| except ImportError: | |||
| raise ValueError( | |||
| 'apex not installed, please install apex from https://www.github.com/nvidia/apex.' | |||
| ) | |||
| def before_run(self, trainer): | |||
| from apex import amp | |||
| logging.info('open fp16') | |||
| # TODO: fix it should initialze amp with model not wrapper by DDP or DP | |||
| if hasattr(trainer.model, 'module'): | |||
| trainer.model, trainer.optimizer = amp.initialize( | |||
| trainer.model.module, | |||
| trainer.optimizer, | |||
| opt_level=self.opt_level) | |||
| else: | |||
| trainer.model, trainer.optimizer = amp.initialize( | |||
| trainer.model, trainer.optimizer, opt_level=self.opt_level) | |||
| trainer.optimizer.zero_grad() | |||
| def after_train_iter(self, trainer): | |||
| for k in self.loss_keys: | |||
| trainer.train_outputs[k] /= self.cumulative_iters | |||
| from apex import amp | |||
| for k in self.loss_keys: | |||
| with amp.scale_loss(trainer.train_outputs[k], | |||
| trainer.optimizer) as scaled_loss: | |||
| scaled_loss.backward() | |||
| if self.every_n_iters(trainer, self.cumulative_iters): | |||
| if self.grad_clip is not None: | |||
| self.clip_grads(trainer.model.parameters(), **self.grad_clip) | |||
| trainer.optimizer.step() | |||
| trainer.optimizer.zero_grad() | |||
| @@ -0,0 +1,73 @@ | |||
| # Copyright (c) Alibaba, Inc. and its affiliates. | |||
| import logging | |||
| from torch.nn.utils import clip_grad | |||
| from modelscope.metainfo import Hooks | |||
| from modelscope.trainers.hooks.builder import HOOKS | |||
| from modelscope.trainers.hooks.hook import Hook | |||
| from modelscope.trainers.hooks.priority import Priority | |||
| @HOOKS.register_module(module_name=Hooks.OptimizerHook) | |||
| class OptimizerHook(Hook): | |||
| """Optimizer hook | |||
| Args: | |||
| cumulative_iters (int): interval of gradients accumulation. Default: 1 | |||
| grad_clip (dict): Default None. Containing keys: | |||
| max_norm (float or int): max norm of the gradients | |||
| norm_type (float or int): type of the used p-norm. Can be ``'inf'`` for infinity norm. | |||
| More details please refer to `torch.nn.utils.clip_grad.clip_grad_norm_` | |||
| loss_keys (str | list): keys list of loss | |||
| """ | |||
| PRIORITY = Priority.ABOVE_NORMAL | |||
| def __init__(self, | |||
| cumulative_iters=1, | |||
| grad_clip=None, | |||
| loss_keys='loss') -> None: | |||
| if isinstance(loss_keys, str): | |||
| loss_keys = [loss_keys] | |||
| assert isinstance(loss_keys, (tuple, list)) | |||
| self.loss_keys = loss_keys | |||
| self.cumulative_iters = cumulative_iters | |||
| self.grad_clip = grad_clip | |||
| def clip_grads(self, params, **clip_args): | |||
| params = list( | |||
| filter(lambda p: p.requires_grad and p.grad is not None, params)) | |||
| if len(params) > 0: | |||
| return clip_grad.clip_grad_norm_(params, **clip_args) | |||
| def before_run(self, trainer): | |||
| trainer.optimizer.zero_grad() | |||
| def after_train_iter(self, trainer): | |||
| for k in self.loss_keys: | |||
| trainer.train_outputs[k] /= self.cumulative_iters | |||
| trainer.train_outputs[k].backward() | |||
| if self.every_n_iters(trainer, self.cumulative_iters): | |||
| if self.grad_clip is not None: | |||
| self.clip_grads(trainer.model.parameters(), **self.grad_clip) | |||
| trainer.optimizer.step() | |||
| trainer.optimizer.zero_grad() | |||
| @HOOKS.register_module(module_name=Hooks.NoneOptimizerHook) | |||
| class NoneOptimizerHook(OptimizerHook): | |||
| def __init__(self, cumulative_iters=1, grad_clip=None, loss_keys='loss'): | |||
| super(NoneOptimizerHook, self).__init__( | |||
| grad_clip=grad_clip, loss_keys=loss_keys) | |||
| self.cumulative_iters = cumulative_iters | |||
| def before_run(self, trainer): | |||
| return | |||
| def after_train_iter(self, trainer): | |||
| return | |||
| @@ -0,0 +1,83 @@ | |||
| # Copyright (c) Alibaba, Inc. and its affiliates. | |||
| import logging | |||
| from modelscope.metainfo import Hooks | |||
| from modelscope.trainers.hooks.builder import HOOKS | |||
| from .base import OptimizerHook | |||
| @HOOKS.register_module(module_name=Hooks.TorchAMPOptimizerHook) | |||
| class TorchAMPOptimizerHook(OptimizerHook): | |||
| """Fp16 optimizer, if torch version is less than 1.6.0, | |||
| you must install apex (https://www.github.com/nvidia/apex) else use torch.cuda.amp by default | |||
| Args: | |||
| cumulative_iters (int): interval of gradients accumulation. Default: 1 | |||
| grad_clip (dict): Default None. Containing keys: | |||
| max_norm (float or int): max norm of the gradients | |||
| norm_type (float or int): type of the used p-norm. Can be ``'inf'`` for infinity norm. | |||
| More details please refer to `torch.nn.utils.clip_grad.clip_grad_norm_` | |||
| loss_keys (str | list): keys list of loss | |||
| loss_scale (float | dict): grade scale config. If loss_scale is a float, | |||
| static loss scaling will be used with the specified scale. | |||
| It can also be a dict containing arguments of GradScalar. For Pytorch >= 1.6, | |||
| we use official torch.cuda.amp.GradScaler. | |||
| please refer to: https://pytorch.org/docs/stable/amp.html#torch.cuda.amp.GradScaler for the parameters. | |||
| """ | |||
| def __init__(self, | |||
| cumulative_iters=1, | |||
| grad_clip=None, | |||
| loss_keys='loss', | |||
| loss_scale={}): | |||
| super(TorchAMPOptimizerHook, self).__init__( | |||
| grad_clip=grad_clip, loss_keys=loss_keys) | |||
| self.cumulative_iters = cumulative_iters | |||
| self._scale_update_param = None | |||
| from torch.cuda import amp | |||
| if isinstance(loss_scale, float): | |||
| self._scale_update_param = loss_scale | |||
| self.scaler = amp.GradScaler(init_scale=loss_scale) | |||
| elif isinstance(loss_scale, dict): | |||
| self.scaler = amp.GradScaler(**loss_scale) | |||
| else: | |||
| raise ValueError( | |||
| '`loss_scale` type must be in [float, dict], but got {loss_scale}' | |||
| ) | |||
| def before_run(self, trainer): | |||
| logging.info('open fp16') | |||
| trainer.optimizer.zero_grad() | |||
| if hasattr(trainer.model, 'module'): | |||
| self._ori_model_forward = trainer.model.module.forward | |||
| self._model = trainer.model.module | |||
| else: | |||
| self._ori_model_forward = trainer.model.forward | |||
| self._model = trainer.model | |||
| self.ori_model_forward = trainer.model.forward | |||
| def before_train_iter(self, trainer): | |||
| from torch.cuda import amp | |||
| setattr(self._model, 'forward', amp.autocast()(self._model.forward)) | |||
| def after_train_iter(self, trainer): | |||
| for k in self.loss_keys: | |||
| trainer.train_outputs[k] /= self.cumulative_iters | |||
| for k in self.loss_keys: | |||
| self.scaler.scale(trainer.train_outputs[k]).backward() | |||
| if self.every_n_iters(trainer, self.cumulative_iters): | |||
| self.scaler.unscale_(trainer.optimizer) | |||
| if self.grad_clip is not None: | |||
| self.clip_grads(trainer.model.parameters(), **self.grad_clip) | |||
| self.scaler.step(trainer.optimizer) | |||
| self.scaler.update(self._scale_update_param) | |||
| trainer.optimizer.zero_grad() | |||
| setattr(self._model, 'forward', self._ori_model_forward) | |||
| @@ -1,218 +0,0 @@ | |||
| # Copyright (c) Alibaba, Inc. and its affiliates. | |||
| import logging | |||
| from torch.nn.utils import clip_grad | |||
| from .builder import HOOKS | |||
| from .hook import Hook | |||
| from .priority import Priority | |||
| @HOOKS.register_module() | |||
| class OptimizerHook(Hook): | |||
| """Optimizer hook | |||
| Args: | |||
| cumulative_iters (int): interval of gradients accumulation. Default: 1 | |||
| grad_clip (dict): Default None. Containing keys: | |||
| max_norm (float or int): max norm of the gradients | |||
| norm_type (float or int): type of the used p-norm. Can be ``'inf'`` for infinity norm. | |||
| More details please refer to `torch.nn.utils.clip_grad.clip_grad_norm_` | |||
| loss_keys (str | list): keys list of loss | |||
| """ | |||
| PRIORITY = Priority.ABOVE_NORMAL | |||
| def __init__(self, | |||
| cumulative_iters=1, | |||
| grad_clip=None, | |||
| loss_keys='loss') -> None: | |||
| if isinstance(loss_keys, str): | |||
| loss_keys = [loss_keys] | |||
| assert isinstance(loss_keys, (tuple, list)) | |||
| self.loss_keys = loss_keys | |||
| self.cumulative_iters = cumulative_iters | |||
| self.grad_clip = grad_clip | |||
| def clip_grads(self, params, **clip_args): | |||
| params = list( | |||
| filter(lambda p: p.requires_grad and p.grad is not None, params)) | |||
| if len(params) > 0: | |||
| return clip_grad.clip_grad_norm_(params, **clip_args) | |||
| def before_run(self, trainer): | |||
| trainer.optimizer.zero_grad() | |||
| def after_train_iter(self, trainer): | |||
| for k in self.loss_keys: | |||
| trainer.train_outputs[k] /= self.cumulative_iters | |||
| trainer.train_outputs[k].backward() | |||
| if self.every_n_iters(trainer, self.cumulative_iters): | |||
| if self.grad_clip is not None: | |||
| self.clip_grads(trainer.model.parameters(), **self.grad_clip) | |||
| trainer.optimizer.step() | |||
| trainer.optimizer.zero_grad() | |||
| @HOOKS.register_module() | |||
| class TorchAMPOptimizerHook(OptimizerHook): | |||
| """Fp16 optimizer, if torch version is less than 1.6.0, | |||
| you must install apex (https://www.github.com/nvidia/apex) else use torch.cuda.amp by default | |||
| Args: | |||
| cumulative_iters (int): interval of gradients accumulation. Default: 1 | |||
| grad_clip (dict): Default None. Containing keys: | |||
| max_norm (float or int): max norm of the gradients | |||
| norm_type (float or int): type of the used p-norm. Can be ``'inf'`` for infinity norm. | |||
| More details please refer to `torch.nn.utils.clip_grad.clip_grad_norm_` | |||
| loss_keys (str | list): keys list of loss | |||
| loss_scale (float | dict): grade scale config. If loss_scale is a float, | |||
| static loss scaling will be used with the specified scale. | |||
| It can also be a dict containing arguments of GradScalar. For Pytorch >= 1.6, | |||
| we use official torch.cuda.amp.GradScaler. | |||
| please refer to: https://pytorch.org/docs/stable/amp.html#torch.cuda.amp.GradScaler for the parameters. | |||
| """ | |||
| def __init__(self, | |||
| cumulative_iters=1, | |||
| grad_clip=None, | |||
| loss_keys='loss', | |||
| loss_scale={}): | |||
| super(TorchAMPOptimizerHook, self).__init__( | |||
| grad_clip=grad_clip, loss_keys=loss_keys) | |||
| self.cumulative_iters = cumulative_iters | |||
| self._scale_update_param = None | |||
| from torch.cuda import amp | |||
| if isinstance(loss_scale, float): | |||
| self._scale_update_param = loss_scale | |||
| self.scaler = amp.GradScaler(init_scale=loss_scale) | |||
| elif isinstance(loss_scale, dict): | |||
| self.scaler = amp.GradScaler(**loss_scale) | |||
| else: | |||
| raise ValueError( | |||
| '`loss_scale` type must be in [float, dict], but got {loss_scale}' | |||
| ) | |||
| def before_run(self, trainer): | |||
| logging.info('open fp16') | |||
| trainer.optimizer.zero_grad() | |||
| if hasattr(trainer.model, 'module'): | |||
| self._ori_model_forward = trainer.model.module.forward | |||
| self._model = trainer.model.module | |||
| else: | |||
| self._ori_model_forward = trainer.model.forward | |||
| self._model = trainer.model | |||
| self.ori_model_forward = trainer.model.forward | |||
| def before_train_iter(self, trainer): | |||
| from torch.cuda import amp | |||
| setattr(self._model, 'forward', amp.autocast()(self._model.forward)) | |||
| def after_train_iter(self, trainer): | |||
| for k in self.loss_keys: | |||
| trainer.train_outputs[k] /= self.cumulative_iters | |||
| for k in self.loss_keys: | |||
| self.scaler.scale(trainer.train_outputs[k]).backward() | |||
| if self.every_n_iters(trainer, self.cumulative_iters): | |||
| self.scaler.unscale_(trainer.optimizer) | |||
| if self.grad_clip is not None: | |||
| self.clip_grads(trainer.model.parameters(), **self.grad_clip) | |||
| self.scaler.step(trainer.optimizer) | |||
| self.scaler.update(self._scale_update_param) | |||
| trainer.optimizer.zero_grad() | |||
| setattr(self._model, 'forward', self._ori_model_forward) | |||
| @HOOKS.register_module() | |||
| class ApexAMPOptimizerHook(OptimizerHook): | |||
| """Fp16 optimizer, if torch version is less than 1.6.0, | |||
| you must install apex (https://www.github.com/nvidia/apex) else use torch.cuda.amp by default | |||
| Args: | |||
| cumulative_iters (int): interval of gradients accumulation. Default: 1 | |||
| grad_clip (dict): Default None. Containing keys: | |||
| max_norm (float or int): max norm of the gradients | |||
| norm_type (float or int): type of the used p-norm. Can be ``'inf'`` for infinity norm. | |||
| More details please refer to `torch.nn.utils.clip_grad.clip_grad_norm_` | |||
| loss_keys (str | list): keys list of loss | |||
| opt_level (str): "O0" and "O3" are not true mixed precision, | |||
| but they are useful for establishing accuracy and speed baselines, respectively. | |||
| "O1" and "O2" are different implementations of mixed precision. | |||
| Try both, and see what gives the best speedup and accuracy for your model. | |||
| """ | |||
| def __init__(self, | |||
| cumulative_iters=1, | |||
| grad_clip=None, | |||
| loss_keys='loss', | |||
| opt_level='O1'): | |||
| super(ApexAMPOptimizerHook, self).__init__( | |||
| grad_clip=grad_clip, loss_keys=loss_keys) | |||
| self.cumulative_iters = cumulative_iters | |||
| self.opt_level = opt_level | |||
| try: | |||
| from apex import amp | |||
| except ImportError: | |||
| raise ValueError( | |||
| 'apex not installed, please install apex from https://www.github.com/nvidia/apex.' | |||
| ) | |||
| def before_run(self, trainer): | |||
| from apex import amp | |||
| logging.info('open fp16') | |||
| # TODO: fix it should initialze amp with model not wrapper by DDP or DP | |||
| if hasattr(trainer.model, 'module'): | |||
| trainer.model, trainer.optimizer = amp.initialize( | |||
| trainer.model.module, | |||
| trainer.optimizer, | |||
| opt_level=self.opt_level) | |||
| else: | |||
| trainer.model, trainer.optimizer = amp.initialize( | |||
| trainer.model, trainer.optimizer, opt_level=self.opt_level) | |||
| trainer.optimizer.zero_grad() | |||
| def after_train_iter(self, trainer): | |||
| for k in self.loss_keys: | |||
| trainer.train_outputs[k] /= self.cumulative_iters | |||
| from apex import amp | |||
| for k in self.loss_keys: | |||
| with amp.scale_loss(trainer.train_outputs[k], | |||
| trainer.optimizer) as scaled_loss: | |||
| scaled_loss.backward() | |||
| if self.every_n_iters(trainer, self.cumulative_iters): | |||
| if self.grad_clip is not None: | |||
| self.clip_grads(trainer.model.parameters(), **self.grad_clip) | |||
| trainer.optimizer.step() | |||
| trainer.optimizer.zero_grad() | |||
| @HOOKS.register_module() | |||
| class NoneOptimizerHook(OptimizerHook): | |||
| def __init__(self, cumulative_iters=1, grad_clip=None, loss_keys='loss'): | |||
| super(NoneOptimizerHook, self).__init__( | |||
| grad_clip=grad_clip, loss_keys=loss_keys) | |||
| self.cumulative_iters = cumulative_iters | |||
| def before_run(self, trainer): | |||
| return | |||
| def after_train_iter(self, trainer): | |||
| return | |||
| @@ -1,8 +1,25 @@ | |||
| # Copyright (c) Alibaba, Inc. and its affiliates. | |||
| from .builder import LR_SCHEDULER, build_lr_scheduler | |||
| from .warmup import BaseWarmup, ConstantWarmup, ExponentialWarmup, LinearWarmup | |||
| from typing import TYPE_CHECKING | |||
| __all__ = [ | |||
| 'LR_SCHEDULER', 'build_lr_scheduler', 'BaseWarmup', 'ConstantWarmup', | |||
| 'LinearWarmup', 'ExponentialWarmup' | |||
| ] | |||
| from modelscope.utils.import_utils import LazyImportModule | |||
| if TYPE_CHECKING: | |||
| from .builder import LR_SCHEDULER, build_lr_scheduler | |||
| from .warmup import BaseWarmup, ConstantWarmup, ExponentialWarmup, LinearWarmup | |||
| else: | |||
| _import_structure = { | |||
| 'builder': ['LR_SCHEDULER', 'build_lr_scheduler'], | |||
| 'warmup': | |||
| ['BaseWarmup', 'ConstantWarmup', 'ExponentialWarmup', 'LinearWarmup'] | |||
| } | |||
| import sys | |||
| sys.modules[__name__] = LazyImportModule( | |||
| __name__, | |||
| globals()['__file__'], | |||
| _import_structure, | |||
| module_spec=__spec__, | |||
| extra_objects={}, | |||
| ) | |||
| @@ -4,7 +4,7 @@ import inspect | |||
| from modelscope.utils.config import ConfigDict | |||
| from modelscope.utils.registry import Registry, build_from_cfg, default_group | |||
| LR_SCHEDULER = Registry('lr scheduler') | |||
| LR_SCHEDULER = Registry('lr_scheduler') | |||
| def build_lr_scheduler(cfg: ConfigDict, default_args: dict = None): | |||
| @@ -1,5 +1,25 @@ | |||
| # Copyright (c) Alibaba, Inc. and its affiliates. | |||
| from .base import BaseWarmup | |||
| from .warmup import ConstantWarmup, ExponentialWarmup, LinearWarmup | |||
| __all__ = ['BaseWarmup', 'ConstantWarmup', 'LinearWarmup', 'ExponentialWarmup'] | |||
| from typing import TYPE_CHECKING | |||
| from modelscope.utils.import_utils import LazyImportModule | |||
| if TYPE_CHECKING: | |||
| from .base import BaseWarmup | |||
| from .warmup import ConstantWarmup, ExponentialWarmup, LinearWarmup | |||
| else: | |||
| _import_structure = { | |||
| 'base': ['BaseWarmup'], | |||
| 'warmup': ['ConstantWarmup', 'ExponentialWarmup', 'LinearWarmup'] | |||
| } | |||
| import sys | |||
| sys.modules[__name__] = LazyImportModule( | |||
| __name__, | |||
| globals()['__file__'], | |||
| _import_structure, | |||
| module_spec=__spec__, | |||
| extra_objects={}, | |||
| ) | |||
| @@ -1,9 +1,10 @@ | |||
| # Copyright (c) Alibaba, Inc. and its affiliates. | |||
| from modelscope.metainfo import LR_Schedulers | |||
| from modelscope.trainers.lrscheduler.builder import LR_SCHEDULER | |||
| from .base import BaseWarmup | |||
| @LR_SCHEDULER.register_module() | |||
| @LR_SCHEDULER.register_module(module_name=LR_Schedulers.ConstantWarmup) | |||
| class ConstantWarmup(BaseWarmup): | |||
| """Linear warmup scheduler. | |||
| @@ -29,7 +30,7 @@ class ConstantWarmup(BaseWarmup): | |||
| return self.warmup_ratio | |||
| @LR_SCHEDULER.register_module() | |||
| @LR_SCHEDULER.register_module(module_name=LR_Schedulers.LinearWarmup) | |||
| class LinearWarmup(BaseWarmup): | |||
| """Linear warmup scheduler. | |||
| @@ -54,7 +55,7 @@ class LinearWarmup(BaseWarmup): | |||
| return 1 - k | |||
| @LR_SCHEDULER.register_module() | |||
| @LR_SCHEDULER.register_module(module_name=LR_Schedulers.ExponentialWarmup) | |||
| class ExponentialWarmup(BaseWarmup): | |||
| """Exponential warmup scheduler. | |||
| @@ -1 +1,20 @@ | |||
| from .clip import CLIPTrainer | |||
| # Copyright (c) Alibaba, Inc. and its affiliates. | |||
| from typing import TYPE_CHECKING | |||
| from modelscope.utils.import_utils import LazyImportModule | |||
| if TYPE_CHECKING: | |||
| from .clip import CLIPTrainer | |||
| else: | |||
| _import_structure = {'clip': ['CLIPTrainer']} | |||
| import sys | |||
| sys.modules[__name__] = LazyImportModule( | |||
| __name__, | |||
| globals()['__file__'], | |||
| _import_structure, | |||
| module_spec=__spec__, | |||
| extra_objects={}, | |||
| ) | |||
| @@ -1 +1,22 @@ | |||
| from .sequence_classification_trainer import SequenceClassificationTrainer | |||
| # Copyright (c) Alibaba, Inc. and its affiliates. | |||
| from typing import TYPE_CHECKING | |||
| from modelscope.utils.import_utils import LazyImportModule | |||
| if TYPE_CHECKING: | |||
| from .sequence_classification_trainer import SequenceClassificationTrainer | |||
| else: | |||
| _import_structure = { | |||
| 'sequence_classification_trainer': ['SequenceClassificationTrainer'] | |||
| } | |||
| import sys | |||
| sys.modules[__name__] = LazyImportModule( | |||
| __name__, | |||
| globals()['__file__'], | |||
| _import_structure, | |||
| module_spec=__spec__, | |||
| extra_objects={}, | |||
| ) | |||
| @@ -3,6 +3,7 @@ from typing import Dict, Optional, Tuple, Union | |||
| import numpy as np | |||
| from modelscope.metainfo import Trainers | |||
| from modelscope.trainers.base import BaseTrainer | |||
| from modelscope.trainers.builder import TRAINERS | |||
| from modelscope.utils.logger import get_logger | |||
| @@ -11,7 +12,7 @@ PATH = None | |||
| logger = get_logger(PATH) | |||
| @TRAINERS.register_module(module_name=r'bert-sentiment-analysis') | |||
| @TRAINERS.register_module(module_name=Trainers.bert_sentiment_analysis) | |||
| class SequenceClassificationTrainer(BaseTrainer): | |||
| def __init__(self, cfg_file: str, *args, **kwargs): | |||
| @@ -6,6 +6,7 @@ from torch import nn | |||
| from torch.utils.data import Dataset | |||
| from modelscope.hub.snapshot_download import snapshot_download | |||
| from modelscope.metainfo import Trainers | |||
| from modelscope.metrics.builder import build_metric | |||
| from modelscope.models.base import Model, TorchModel | |||
| from modelscope.msdatasets import MsDataset | |||
| @@ -17,7 +18,7 @@ from .base import TRAINERS | |||
| from .trainer import EpochBasedTrainer | |||
| @TRAINERS.register_module(module_name='NlpEpochBasedTrainer') | |||
| @TRAINERS.register_module(module_name=Trainers.nlp_base_trainer) | |||
| class NlpEpochBasedTrainer(EpochBasedTrainer): | |||
| def __init__( | |||
| @@ -142,7 +143,7 @@ class NlpEpochBasedTrainer(EpochBasedTrainer): | |||
| return build_preprocessor(cfg, Tasks.find_field_by_task(self.cfg.task)) | |||
| @TRAINERS.register_module(module_name='VecoTrainer') | |||
| @TRAINERS.register_module(module_name=Trainers.nlp_veco_trainer) | |||
| class VecoTrainer(NlpEpochBasedTrainer): | |||
| def evaluate(self, checkpoint_path=None): | |||
| @@ -17,6 +17,7 @@ from torch.utils.data import DataLoader, Dataset | |||
| from torch.utils.data.distributed import DistributedSampler | |||
| from modelscope.hub.snapshot_download import snapshot_download | |||
| from modelscope.metainfo import Trainers | |||
| from modelscope.metrics import build_metric, task_default_metrics | |||
| from modelscope.models.base import Model, TorchModel | |||
| from modelscope.msdatasets.ms_dataset import MsDataset | |||
| @@ -45,7 +46,7 @@ from .parallel.builder import build_parallel | |||
| from .parallel.utils import is_parallel | |||
| @TRAINERS.register_module() | |||
| @TRAINERS.register_module(module_name=Trainers.default) | |||
| class EpochBasedTrainer(BaseTrainer): | |||
| """Epoch based Trainer, a training helper for PyTorch. | |||
| @@ -5,6 +5,7 @@ import importlib | |||
| import os | |||
| import os.path as osp | |||
| import time | |||
| import traceback | |||
| from functools import reduce | |||
| from typing import Generator, Union | |||
| @@ -13,8 +14,9 @@ import json | |||
| from modelscope import __version__ | |||
| from modelscope.fileio.file import LocalStorage | |||
| from modelscope.metainfo import (Heads, Metrics, Models, Pipelines, | |||
| Preprocessors, TaskModels, Trainers) | |||
| from modelscope.metainfo import (Heads, Hooks, LR_Schedulers, Metrics, Models, | |||
| Optimizers, Pipelines, Preprocessors, | |||
| TaskModels, Trainers) | |||
| from modelscope.utils.constant import Fields, Tasks | |||
| from modelscope.utils.file_utils import get_default_cache_dir | |||
| from modelscope.utils.logger import get_logger | |||
| @@ -28,7 +30,8 @@ MODELSCOPE_PATH = '/'.join(os.path.dirname(__file__).split('/')[:-1]) | |||
| REGISTER_MODULE = 'register_module' | |||
| IGNORED_PACKAGES = ['modelscope', '.'] | |||
| SCAN_SUB_FOLDERS = [ | |||
| 'models', 'metrics', 'pipelines', 'preprocessors', 'task_datasets' | |||
| 'models', 'metrics', 'pipelines', 'preprocessors', 'task_datasets', | |||
| 'trainers' | |||
| ] | |||
| INDEXER_FILE = 'ast_indexer' | |||
| DECORATOR_KEY = 'decorators' | |||
| @@ -305,9 +308,11 @@ class AstScaning(object): | |||
| output = [functions[0]] | |||
| if len(args_list) == 0 and len(keyword_list) == 0: | |||
| args_list.append(None) | |||
| args_list.append(default_group) | |||
| if len(keyword_list) == 0 and len(args_list) == 1: | |||
| args_list.append(None) | |||
| if len(keyword_list) == 1 and len(args_list) == 0: | |||
| args_list.append(default_group) | |||
| args_list.extend(keyword_list) | |||
| @@ -318,6 +323,8 @@ class AstScaning(object): | |||
| # the case (default_group) | |||
| elif item[1] is None: | |||
| output.append(item[0]) | |||
| elif isinstance(item, str): | |||
| output.append(item) | |||
| else: | |||
| output.append('.'.join(item)) | |||
| return (output[0], self._get_registry_value(output[1]), | |||
| @@ -443,9 +450,11 @@ class FilesAstScaning(object): | |||
| try: | |||
| output = self.astScaner.generate_ast(file) | |||
| except Exception as e: | |||
| detail = traceback.extract_tb(e.__traceback__) | |||
| raise Exception( | |||
| 'During ast indexing, there are index errors in the ' | |||
| f'file {file} : {type(e).__name__}.{e}') | |||
| f'During ast indexing, error is in the file {detail[-1].filename}' | |||
| f' line: {detail[-1].lineno}: "{detail[-1].line}" with error msg: ' | |||
| f'"{type(e).__name__}: {e}"') | |||
| import_list = self.parse_import(output) | |||
| return output[DECORATOR_KEY], import_list | |||
| @@ -523,14 +532,14 @@ class FilesAstScaning(object): | |||
| return md5.hexdigest() | |||
| fileScaner = FilesAstScaning() | |||
| file_scanner = FilesAstScaning() | |||
| def _save_index(index, file_path): | |||
| # convert tuple key to str key | |||
| index[INDEX_KEY] = {str(k): v for k, v in index[INDEX_KEY].items()} | |||
| index[VERSION_KEY] = __version__ | |||
| index[MD5_KEY] = fileScaner.files_mtime_md5() | |||
| index[MD5_KEY] = file_scanner.files_mtime_md5() | |||
| json_index = json.dumps(index) | |||
| storage.write(json_index.encode(), file_path) | |||
| index[INDEX_KEY] = { | |||
| @@ -579,7 +588,7 @@ def load_index(force_rebuild=False): | |||
| index = None | |||
| if not force_rebuild and os.path.exists(file_path): | |||
| wrapped_index = _load_index(file_path) | |||
| md5 = fileScaner.files_mtime_md5() | |||
| md5 = file_scanner.files_mtime_md5() | |||
| if (wrapped_index[VERSION_KEY] == __version__ | |||
| and wrapped_index[MD5_KEY] == md5): | |||
| index = wrapped_index | |||
| @@ -591,7 +600,7 @@ def load_index(force_rebuild=False): | |||
| logger.info( | |||
| f'No valid ast index found from {file_path}, rebuilding ast index!' | |||
| ) | |||
| index = fileScaner.get_files_scan_results() | |||
| index = file_scanner.get_files_scan_results() | |||
| _save_index(index, file_path) | |||
| return index | |||
| @@ -7,4 +7,6 @@ pycocotools>=2.0.4 | |||
| # which introduced compatability issues that are being investigated | |||
| rouge_score<=0.0.4 | |||
| timm | |||
| tokenizers | |||
| torchvision | |||
| transformers>=4.12.0 | |||
| @@ -6,3 +6,5 @@ pai-easynlp | |||
| rouge_score<=0.0.4 | |||
| seqeval | |||
| spacy>=2.3.5 | |||
| tokenizers | |||
| transformers>=4.12.0 | |||
| @@ -13,7 +13,5 @@ requests | |||
| scipy | |||
| setuptools | |||
| tensorboard | |||
| tokenizers | |||
| tqdm>=4.64.0 | |||
| transformers>=4.12.0 | |||
| yapf | |||
| @@ -10,6 +10,7 @@ import numpy as np | |||
| import torch | |||
| from torch import nn | |||
| from modelscope.metainfo import Trainers | |||
| from modelscope.trainers import build_trainer | |||
| from modelscope.utils.constant import LogKeys, ModelFile | |||
| from modelscope.utils.test_utils import create_dummy_test_dataset | |||
| @@ -73,7 +74,7 @@ class TensorboardHookTest(unittest.TestCase): | |||
| with open(config_path, 'w') as f: | |||
| json.dump(json_cfg, f) | |||
| trainer_name = 'EpochBasedTrainer' | |||
| trainer_name = Trainers.default | |||
| kwargs = dict( | |||
| cfg_file=config_path, | |||
| model=DummyModel(), | |||
| @@ -9,6 +9,7 @@ import numpy as np | |||
| import torch | |||
| from torch import nn | |||
| from modelscope.metainfo import Trainers | |||
| from modelscope.metrics.builder import METRICS, MetricKeys | |||
| from modelscope.trainers import build_trainer | |||
| from modelscope.utils.constant import LogKeys, ModelFile | |||
| @@ -108,7 +109,7 @@ class CheckpointHookTest(unittest.TestCase): | |||
| with open(config_path, 'w') as f: | |||
| json.dump(json_cfg, f) | |||
| trainer_name = 'EpochBasedTrainer' | |||
| trainer_name = Trainers.default | |||
| kwargs = dict( | |||
| cfg_file=config_path, | |||
| model=DummyModel(), | |||
| @@ -179,7 +180,7 @@ class BestCkptSaverHookTest(unittest.TestCase): | |||
| with open(config_path, 'w') as f: | |||
| json.dump(json_cfg, f) | |||
| trainer_name = 'EpochBasedTrainer' | |||
| trainer_name = Trainers.default | |||
| kwargs = dict( | |||
| cfg_file=config_path, | |||
| model=DummyModel(), | |||
| @@ -9,6 +9,7 @@ import numpy as np | |||
| import torch | |||
| from torch import nn | |||
| from modelscope.metainfo import Trainers | |||
| from modelscope.metrics.builder import METRICS, MetricKeys | |||
| from modelscope.trainers import build_trainer | |||
| from modelscope.utils.constant import LogKeys, ModelFile | |||
| @@ -97,7 +98,7 @@ class EvaluationHookTest(unittest.TestCase): | |||
| with open(config_path, 'w') as f: | |||
| json.dump(json_cfg, f) | |||
| trainer_name = 'EpochBasedTrainer' | |||
| trainer_name = Trainers.default | |||
| kwargs = dict( | |||
| cfg_file=config_path, | |||
| model=DummyModel(), | |||
| @@ -11,6 +11,7 @@ from torch import nn | |||
| from torch.optim import SGD | |||
| from torch.optim.lr_scheduler import MultiStepLR, ReduceLROnPlateau | |||
| from modelscope.metainfo import Trainers | |||
| from modelscope.metrics.builder import METRICS, MetricKeys | |||
| from modelscope.trainers import build_trainer | |||
| from modelscope.utils.constant import LogKeys, ModelFile, TrainerStages | |||
| @@ -89,7 +90,7 @@ class LrSchedulerHookTest(unittest.TestCase): | |||
| model = DummyModel() | |||
| optimizer = SGD(model.parameters(), lr=0.01) | |||
| lr_scheduler = MultiStepLR(optimizer, milestones=[2, 4]) | |||
| trainer_name = 'EpochBasedTrainer' | |||
| trainer_name = Trainers.default | |||
| kwargs = dict( | |||
| cfg_file=config_path, | |||
| model=model, | |||
| @@ -161,7 +162,7 @@ class LrSchedulerHookTest(unittest.TestCase): | |||
| model = DummyModel() | |||
| # optimmizer = SGD(model.parameters(), lr=0.01) | |||
| # lr_scheduler = MultiStepLR(optimmizer, milestones=[2, 4]) | |||
| trainer_name = 'EpochBasedTrainer' | |||
| trainer_name = Trainers.default | |||
| kwargs = dict( | |||
| cfg_file=config_path, | |||
| model=model, | |||
| @@ -258,7 +259,7 @@ class PlateauLrSchedulerHookTest(unittest.TestCase): | |||
| model = DummyModel() | |||
| optimizer = SGD(model.parameters(), lr=0.01) | |||
| trainer_name = 'EpochBasedTrainer' | |||
| trainer_name = Trainers.default | |||
| kwargs = dict( | |||
| cfg_file=config_path, | |||
| model=model, | |||
| @@ -11,6 +11,7 @@ from torch import nn | |||
| from torch.optim import SGD | |||
| from torch.optim.lr_scheduler import MultiStepLR | |||
| from modelscope.metainfo import Trainers | |||
| from modelscope.trainers import build_trainer | |||
| from modelscope.utils.constant import ModelFile, TrainerStages | |||
| from modelscope.utils.test_utils import create_dummy_test_dataset | |||
| @@ -64,7 +65,7 @@ class OptimizerHookTest(unittest.TestCase): | |||
| model = DummyModel() | |||
| optimizer = SGD(model.parameters(), lr=0.01) | |||
| lr_scheduler = MultiStepLR(optimizer, milestones=[1, 2]) | |||
| trainer_name = 'EpochBasedTrainer' | |||
| trainer_name = Trainers.default | |||
| kwargs = dict( | |||
| cfg_file=config_path, | |||
| model=model, | |||
| @@ -130,7 +131,7 @@ class TorchAMPOptimizerHookTest(unittest.TestCase): | |||
| model = DummyModel().cuda() | |||
| optimizer = SGD(model.parameters(), lr=0.01) | |||
| lr_scheduler = MultiStepLR(optimizer, milestones=[1, 2]) | |||
| trainer_name = 'EpochBasedTrainer' | |||
| trainer_name = Trainers.default | |||
| kwargs = dict( | |||
| cfg_file=config_path, | |||
| model=model, | |||
| @@ -11,6 +11,7 @@ from torch import nn | |||
| from torch.optim import SGD | |||
| from torch.optim.lr_scheduler import MultiStepLR | |||
| from modelscope.metainfo import Trainers | |||
| from modelscope.trainers import build_trainer | |||
| from modelscope.utils.constant import LogKeys, ModelFile, TrainerStages | |||
| from modelscope.utils.test_utils import create_dummy_test_dataset | |||
| @@ -68,7 +69,7 @@ class IterTimerHookTest(unittest.TestCase): | |||
| model = DummyModel() | |||
| optimizer = SGD(model.parameters(), lr=0.01) | |||
| lr_scheduler = MultiStepLR(optimizer, milestones=[2, 4]) | |||
| trainer_name = 'EpochBasedTrainer' | |||
| trainer_name = Trainers.default | |||
| kwargs = dict( | |||
| cfg_file=config_path, | |||
| model=model, | |||
| @@ -4,6 +4,7 @@ import shutil | |||
| import tempfile | |||
| import unittest | |||
| from modelscope.metainfo import Trainers | |||
| from modelscope.trainers import build_trainer | |||
| @@ -23,7 +24,7 @@ class TestFinetuneSequenceClassification(unittest.TestCase): | |||
| model_id, | |||
| train_dataset, | |||
| eval_dataset, | |||
| name='NlpEpochBasedTrainer', | |||
| name=Trainers.nlp_base_trainer, | |||
| cfg_modify_fn=None, | |||
| **kwargs): | |||
| kwargs = dict( | |||
| @@ -236,7 +237,7 @@ class TestFinetuneSequenceClassification(unittest.TestCase): | |||
| 'damo/nlp_veco_fill-mask-large', | |||
| train_datasets, | |||
| eval_datasets, | |||
| name='VecoTrainer', | |||
| name=Trainers.nlp_veco_trainer, | |||
| cfg_modify_fn=cfg_modify_fn) | |||
| @@ -5,6 +5,7 @@ import tempfile | |||
| import unittest | |||
| from functools import reduce | |||
| from modelscope.metainfo import Trainers | |||
| from modelscope.trainers import build_trainer | |||
| from modelscope.utils.test_utils import test_level | |||
| @@ -25,7 +26,7 @@ class TestFinetuneTokenClassification(unittest.TestCase): | |||
| model_id, | |||
| train_dataset, | |||
| eval_dataset, | |||
| name='NlpEpochBasedTrainer', | |||
| name=Trainers.nlp_base_trainer, | |||
| cfg_modify_fn=None, | |||
| **kwargs): | |||
| kwargs = dict( | |||
| @@ -7,6 +7,7 @@ import zipfile | |||
| from functools import partial | |||
| from modelscope.hub.snapshot_download import snapshot_download | |||
| from modelscope.metainfo import Trainers | |||
| from modelscope.models.cv.image_instance_segmentation import ( | |||
| CascadeMaskRCNNSwinModel, ImageInstanceSegmentationCocoDataset) | |||
| from modelscope.trainers import build_trainer | |||
| @@ -79,7 +80,7 @@ class TestImageInstanceSegmentationTrainer(unittest.TestCase): | |||
| work_dir=self.tmp_dir) | |||
| trainer = build_trainer( | |||
| name='image-instance-segmentation', default_args=kwargs) | |||
| name=Trainers.image_instance_segmentation, default_args=kwargs) | |||
| trainer.train() | |||
| results_files = os.listdir(self.tmp_dir) | |||
| self.assertIn(f'{trainer.timestamp}.log.json', results_files) | |||
| @@ -103,7 +104,7 @@ class TestImageInstanceSegmentationTrainer(unittest.TestCase): | |||
| work_dir=self.tmp_dir) | |||
| trainer = build_trainer( | |||
| name='image-instance-segmentation', default_args=kwargs) | |||
| name=Trainers.image_instance_segmentation, default_args=kwargs) | |||
| trainer.train() | |||
| results_files = os.listdir(self.tmp_dir) | |||
| self.assertIn(f'{trainer.timestamp}.log.json', results_files) | |||
| @@ -11,6 +11,7 @@ import torch | |||
| from torch.utils import data as data | |||
| from modelscope.hub.snapshot_download import snapshot_download | |||
| from modelscope.metainfo import Trainers | |||
| from modelscope.models.cv.image_portrait_enhancement import \ | |||
| ImagePortraitEnhancement | |||
| from modelscope.trainers import build_trainer | |||
| @@ -91,7 +92,8 @@ class TestImagePortraitEnhancementTrainer(unittest.TestCase): | |||
| device='gpu', | |||
| work_dir=self.tmp_dir) | |||
| trainer = build_trainer(name='gpen', default_args=kwargs) | |||
| trainer = build_trainer( | |||
| name=Trainers.image_portrait_enhancement, default_args=kwargs) | |||
| trainer.train() | |||
| @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') | |||
| @@ -111,7 +113,8 @@ class TestImagePortraitEnhancementTrainer(unittest.TestCase): | |||
| max_epochs=2, | |||
| work_dir=self.tmp_dir) | |||
| trainer = build_trainer(name='gpen', default_args=kwargs) | |||
| trainer = build_trainer( | |||
| name=Trainers.image_portrait_enhancement, default_args=kwargs) | |||
| trainer.train() | |||
| @@ -5,6 +5,7 @@ import tempfile | |||
| import unittest | |||
| from modelscope.hub.snapshot_download import snapshot_download | |||
| from modelscope.metainfo import Trainers | |||
| from modelscope.models.nlp.palm_v2 import PalmForTextGeneration | |||
| from modelscope.msdatasets import MsDataset | |||
| from modelscope.trainers import build_trainer | |||
| @@ -57,7 +58,7 @@ class TestTextGenerationTrainer(unittest.TestCase): | |||
| work_dir=self.tmp_dir) | |||
| trainer = build_trainer( | |||
| name='NlpEpochBasedTrainer', default_args=kwargs) | |||
| name=Trainers.nlp_base_trainer, default_args=kwargs) | |||
| trainer.train() | |||
| results_files = os.listdir(self.tmp_dir) | |||
| self.assertIn(f'{trainer.timestamp}.log.json', results_files) | |||
| @@ -122,7 +123,7 @@ class TestTextGenerationTrainer(unittest.TestCase): | |||
| cfg_modify_fn=cfg_modify_fn, | |||
| model_revision='beta') | |||
| trainer = build_trainer( | |||
| name='NlpEpochBasedTrainer', default_args=kwargs) | |||
| name=Trainers.nlp_base_trainer, default_args=kwargs) | |||
| trainer.train() | |||
| @@ -13,6 +13,7 @@ from torch import nn | |||
| from torch.optim import SGD | |||
| from torch.optim.lr_scheduler import StepLR | |||
| from modelscope.metainfo import Trainers | |||
| from modelscope.metrics.builder import MetricKeys | |||
| from modelscope.msdatasets import MsDataset | |||
| from modelscope.trainers import build_trainer | |||
| @@ -101,14 +102,14 @@ class TrainerTest(unittest.TestCase): | |||
| 'workers_per_gpu': 1, | |||
| 'shuffle': False | |||
| }, | |||
| 'metrics': ['seq_cls_metric'] | |||
| 'metrics': ['seq-cls-metric'] | |||
| } | |||
| } | |||
| config_path = os.path.join(self.tmp_dir, ModelFile.CONFIGURATION) | |||
| with open(config_path, 'w') as f: | |||
| json.dump(json_cfg, f) | |||
| trainer_name = 'EpochBasedTrainer' | |||
| trainer_name = Trainers.default | |||
| kwargs = dict( | |||
| cfg_file=config_path, | |||
| model=DummyModel(), | |||
| @@ -155,7 +156,7 @@ class TrainerTest(unittest.TestCase): | |||
| 'workers_per_gpu': 1, | |||
| 'shuffle': False | |||
| }, | |||
| 'metrics': ['seq_cls_metric'] | |||
| 'metrics': ['seq-cls-metric'] | |||
| } | |||
| } | |||
| @@ -166,7 +167,7 @@ class TrainerTest(unittest.TestCase): | |||
| model = DummyModel() | |||
| optimmizer = SGD(model.parameters(), lr=0.01) | |||
| lr_scheduler = StepLR(optimmizer, 2) | |||
| trainer_name = 'EpochBasedTrainer' | |||
| trainer_name = Trainers.default | |||
| kwargs = dict( | |||
| cfg_file=config_path, | |||
| model=model, | |||
| @@ -205,7 +206,7 @@ class TrainerTest(unittest.TestCase): | |||
| 'workers_per_gpu': 1, | |||
| 'shuffle': False | |||
| }, | |||
| 'metrics': ['seq_cls_metric'] | |||
| 'metrics': ['seq-cls-metric'] | |||
| } | |||
| } | |||
| @@ -216,7 +217,7 @@ class TrainerTest(unittest.TestCase): | |||
| model = DummyModel() | |||
| optimmizer = SGD(model.parameters(), lr=0.01) | |||
| lr_scheduler = StepLR(optimmizer, 2) | |||
| trainer_name = 'EpochBasedTrainer' | |||
| trainer_name = Trainers.default | |||
| kwargs = dict( | |||
| cfg_file=config_path, | |||
| model=model, | |||
| @@ -12,8 +12,9 @@ from torch import nn | |||
| from torch.optim import SGD | |||
| from torch.optim.lr_scheduler import StepLR | |||
| from modelscope.metainfo import Trainers | |||
| from modelscope.metrics.builder import MetricKeys | |||
| from modelscope.trainers import build_trainer | |||
| from modelscope.trainers import EpochBasedTrainer, build_trainer | |||
| from modelscope.utils.constant import LogKeys, ModeKeys, ModelFile | |||
| from modelscope.utils.test_utils import (DistributedTestCase, | |||
| create_dummy_test_dataset, test_level) | |||
| @@ -70,7 +71,7 @@ def train_func(work_dir, dist=False): | |||
| model = DummyModel() | |||
| optimmizer = SGD(model.parameters(), lr=0.01) | |||
| lr_scheduler = StepLR(optimmizer, 2) | |||
| trainer_name = 'EpochBasedTrainer' | |||
| trainer_name = Trainers.default | |||
| kwargs = dict( | |||
| cfg_file=config_path, | |||
| model=model, | |||