Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/9935664 * adapt to msdataset for EasyCVmaster
| @@ -0,0 +1,59 @@ | |||||
| # Copyright (c) Alibaba, Inc. and its affiliates. | |||||
| import os.path as osp | |||||
| class EasyCVBaseDataset(object): | |||||
| """Adapt to MSDataset. | |||||
| Subclasses need to implement ``DATA_STRUCTURE``, the format is as follows, e.g.: | |||||
| { | |||||
| '${data source name}': { | |||||
| 'train':{ | |||||
| '${image root arg}': 'images', # directory name of images relative to the root path | |||||
| '${label root arg}': 'labels', # directory name of lables relative to the root path | |||||
| ... | |||||
| }, | |||||
| 'validation': { | |||||
| '${image root arg}': 'images', | |||||
| '${label root arg}': 'labels', | |||||
| ... | |||||
| } | |||||
| } | |||||
| } | |||||
| Args: | |||||
| split_config (dict): Dataset root path from MSDataset, e.g. | |||||
| {"train":"local cache path"} or {"evaluation":"local cache path"}. | |||||
| preprocessor (Preprocessor): An optional preprocessor instance, please make sure the preprocessor fits for | |||||
| the model if supplied. Not support yet. | |||||
| mode: Training or Evaluation. | |||||
| """ | |||||
| DATA_STRUCTURE = None | |||||
| def __init__(self, | |||||
| split_config=None, | |||||
| preprocessor=None, | |||||
| mode=None, | |||||
| args=(), | |||||
| kwargs={}) -> None: | |||||
| self.split_config = split_config | |||||
| self.preprocessor = preprocessor | |||||
| self.mode = mode | |||||
| if self.split_config is not None: | |||||
| self._update_data_source(kwargs['data_source']) | |||||
| def _update_data_source(self, data_source): | |||||
| data_root = next(iter(self.split_config.values())) | |||||
| split = next(iter(self.split_config.keys())) | |||||
| # TODO: msdataset should support these keys to be configured in the dataset's json file and passed in | |||||
| if data_source['type'] not in list(self.DATA_STRUCTURE.keys()): | |||||
| raise ValueError( | |||||
| 'Only support %s now, but get %s.' % | |||||
| (list(self.DATA_STRUCTURE.keys()), data_source['type'])) | |||||
| # join data root path of msdataset and default relative name | |||||
| update_args = self.DATA_STRUCTURE[data_source['type']][split] | |||||
| for k, v in update_args.items(): | |||||
| data_source.update({k: osp.join(data_root, v)}) | |||||
| @@ -1,21 +1,65 @@ | |||||
| # Copyright (c) Alibaba, Inc. and its affiliates. | # Copyright (c) Alibaba, Inc. and its affiliates. | ||||
| import os.path as osp | |||||
| from easycv.datasets.segmentation import SegDataset as _SegDataset | from easycv.datasets.segmentation import SegDataset as _SegDataset | ||||
| from modelscope.metainfo import Datasets | from modelscope.metainfo import Datasets | ||||
| from modelscope.msdatasets.cv.easycv_base import EasyCVBaseDataset | |||||
| from modelscope.msdatasets.task_datasets.builder import TASK_DATASETS | from modelscope.msdatasets.task_datasets.builder import TASK_DATASETS | ||||
| from modelscope.utils.constant import Tasks | from modelscope.utils.constant import Tasks | ||||
| class EasyCVSegBaseDataset(EasyCVBaseDataset): | |||||
| DATA_STRUCTURE = { | |||||
| # data source name | |||||
| 'SegSourceRaw': { | |||||
| 'train': { | |||||
| 'img_root': | |||||
| 'images', # directory name of images relative to the root path | |||||
| 'label_root': | |||||
| 'annotations', # directory name of annotation relative to the root path | |||||
| 'split': | |||||
| 'train.txt' # split file name relative to the root path | |||||
| }, | |||||
| 'validation': { | |||||
| 'img_root': 'images', | |||||
| 'label_root': 'annotations', | |||||
| 'split': 'val.txt' | |||||
| } | |||||
| } | |||||
| } | |||||
| @TASK_DATASETS.register_module( | @TASK_DATASETS.register_module( | ||||
| group_key=Tasks.image_segmentation, module_name=Datasets.SegDataset) | group_key=Tasks.image_segmentation, module_name=Datasets.SegDataset) | ||||
| class SegDataset(_SegDataset): | |||||
| class SegDataset(EasyCVSegBaseDataset, _SegDataset): | |||||
| """EasyCV dataset for Sementic segmentation. | """EasyCV dataset for Sementic segmentation. | ||||
| For more details, please refer to : | For more details, please refer to : | ||||
| https://github.com/alibaba/EasyCV/blob/master/easycv/datasets/segmentation/raw.py . | https://github.com/alibaba/EasyCV/blob/master/easycv/datasets/segmentation/raw.py . | ||||
| Args: | Args: | ||||
| split_config (dict): Dataset root path from MSDataset, e.g. | |||||
| {"train":"local cache path"} or {"evaluation":"local cache path"}. | |||||
| preprocessor (Preprocessor): An optional preprocessor instance, please make sure the preprocessor fits for | |||||
| the model if supplied. Not support yet. | |||||
| mode: Training or Evaluation. | |||||
| data_source: Data source config to parse input data. | data_source: Data source config to parse input data. | ||||
| pipeline: Sequence of transform object or config dict to be composed. | pipeline: Sequence of transform object or config dict to be composed. | ||||
| ignore_index (int): Label index to be ignored. | ignore_index (int): Label index to be ignored. | ||||
| profiling: If set True, will print transform time. | profiling: If set True, will print transform time. | ||||
| """ | """ | ||||
| def __init__(self, | |||||
| split_config=None, | |||||
| preprocessor=None, | |||||
| mode=None, | |||||
| *args, | |||||
| **kwargs) -> None: | |||||
| EasyCVSegBaseDataset.__init__( | |||||
| self, | |||||
| split_config=split_config, | |||||
| preprocessor=preprocessor, | |||||
| mode=mode, | |||||
| args=args, | |||||
| kwargs=kwargs) | |||||
| _SegDataset.__init__(self, *args, **kwargs) | |||||
| @@ -1,31 +1,71 @@ | |||||
| # Copyright (c) Alibaba, Inc. and its affiliates. | # Copyright (c) Alibaba, Inc. and its affiliates. | ||||
| import os.path as osp | |||||
| from easycv.datasets.detection import DetDataset as _DetDataset | from easycv.datasets.detection import DetDataset as _DetDataset | ||||
| from easycv.datasets.detection import \ | from easycv.datasets.detection import \ | ||||
| DetImagesMixDataset as _DetImagesMixDataset | DetImagesMixDataset as _DetImagesMixDataset | ||||
| from modelscope.metainfo import Datasets | from modelscope.metainfo import Datasets | ||||
| from modelscope.msdatasets.cv.easycv_base import EasyCVBaseDataset | |||||
| from modelscope.msdatasets.task_datasets import TASK_DATASETS | from modelscope.msdatasets.task_datasets import TASK_DATASETS | ||||
| from modelscope.utils.constant import Tasks | from modelscope.utils.constant import Tasks | ||||
| class EasyCVDetBaseDataset(EasyCVBaseDataset): | |||||
| DATA_STRUCTURE = { | |||||
| 'DetSourceCoco': { | |||||
| 'train': { | |||||
| 'ann_file': | |||||
| 'train.json', # file name of annotation relative to the root path | |||||
| 'img_prefix': | |||||
| 'images', # directory name of images relative to the root path | |||||
| }, | |||||
| 'validation': { | |||||
| 'ann_file': 'val.json', | |||||
| 'img_prefix': 'images', | |||||
| } | |||||
| } | |||||
| } | |||||
| @TASK_DATASETS.register_module( | @TASK_DATASETS.register_module( | ||||
| group_key=Tasks.image_object_detection, module_name=Datasets.DetDataset) | group_key=Tasks.image_object_detection, module_name=Datasets.DetDataset) | ||||
| class DetDataset(_DetDataset): | |||||
| class DetDataset(EasyCVDetBaseDataset, _DetDataset): | |||||
| """EasyCV dataset for object detection. | """EasyCV dataset for object detection. | ||||
| For more details, please refer to https://github.com/alibaba/EasyCV/blob/master/easycv/datasets/detection/raw.py . | For more details, please refer to https://github.com/alibaba/EasyCV/blob/master/easycv/datasets/detection/raw.py . | ||||
| Args: | Args: | ||||
| split_config (dict): Dataset root path from MSDataset, e.g. | |||||
| {"train":"local cache path"} or {"evaluation":"local cache path"}. | |||||
| preprocessor (Preprocessor): An optional preprocessor instance, please make sure the preprocessor fits for | |||||
| the model if supplied. Not support yet. | |||||
| mode: Training or Evaluation. | |||||
| data_source: Data source config to parse input data. | data_source: Data source config to parse input data. | ||||
| pipeline: Transform config list | pipeline: Transform config list | ||||
| profiling: If set True, will print pipeline time | profiling: If set True, will print pipeline time | ||||
| classes: A list of class names, used in evaluation for result and groundtruth visualization | classes: A list of class names, used in evaluation for result and groundtruth visualization | ||||
| """ | """ | ||||
| def __init__(self, | |||||
| split_config=None, | |||||
| preprocessor=None, | |||||
| mode=None, | |||||
| *args, | |||||
| **kwargs) -> None: | |||||
| EasyCVDetBaseDataset.__init__( | |||||
| self, | |||||
| split_config=split_config, | |||||
| preprocessor=preprocessor, | |||||
| mode=mode, | |||||
| args=args, | |||||
| kwargs=kwargs) | |||||
| _DetDataset.__init__(self, *args, **kwargs) | |||||
| @TASK_DATASETS.register_module( | @TASK_DATASETS.register_module( | ||||
| group_key=Tasks.image_object_detection, | group_key=Tasks.image_object_detection, | ||||
| module_name=Datasets.DetImagesMixDataset) | module_name=Datasets.DetImagesMixDataset) | ||||
| class DetImagesMixDataset(_DetImagesMixDataset): | |||||
| class DetImagesMixDataset(EasyCVDetBaseDataset, _DetImagesMixDataset): | |||||
| """EasyCV dataset for object detection, a wrapper of multiple images mixed dataset. | """EasyCV dataset for object detection, a wrapper of multiple images mixed dataset. | ||||
| Suitable for training on multiple images mixed data augmentation like | Suitable for training on multiple images mixed data augmentation like | ||||
| mosaic and mixup. For the augmentation pipeline of mixed image data, | mosaic and mixup. For the augmentation pipeline of mixed image data, | ||||
| @@ -38,6 +78,11 @@ class DetImagesMixDataset(_DetImagesMixDataset): | |||||
| For more details, please refer to https://github.com/alibaba/EasyCV/blob/master/easycv/datasets/detection/mix.py . | For more details, please refer to https://github.com/alibaba/EasyCV/blob/master/easycv/datasets/detection/mix.py . | ||||
| Args: | Args: | ||||
| split_config (dict): Dataset root path from MSDataset, e.g. | |||||
| {"train":"local cache path"} or {"evaluation":"local cache path"}. | |||||
| preprocessor (Preprocessor): An optional preprocessor instance, please make sure the preprocessor fits for | |||||
| the model if supplied. Not support yet. | |||||
| mode: Training or Evaluation. | |||||
| data_source (:obj:`DetSourceCoco`): Data source config to parse input data. | data_source (:obj:`DetSourceCoco`): Data source config to parse input data. | ||||
| pipeline (Sequence[dict]): Sequence of transform object or | pipeline (Sequence[dict]): Sequence of transform object or | ||||
| config dict to be composed. | config dict to be composed. | ||||
| @@ -47,3 +92,18 @@ class DetImagesMixDataset(_DetImagesMixDataset): | |||||
| be skip pipeline. Default to None. | be skip pipeline. Default to None. | ||||
| label_padding: out labeling padding [N, 120, 5] | label_padding: out labeling padding [N, 120, 5] | ||||
| """ | """ | ||||
| def __init__(self, | |||||
| split_config=None, | |||||
| preprocessor=None, | |||||
| mode=None, | |||||
| *args, | |||||
| **kwargs) -> None: | |||||
| EasyCVDetBaseDataset.__init__( | |||||
| self, | |||||
| split_config=split_config, | |||||
| preprocessor=preprocessor, | |||||
| mode=mode, | |||||
| args=args, | |||||
| kwargs=kwargs) | |||||
| _DetImagesMixDataset.__init__(self, *args, **kwargs) | |||||
| @@ -27,7 +27,6 @@ class EasyCVEpochBasedTrainer(EpochBasedTrainer): | |||||
| """Epoch based Trainer for EasyCV. | """Epoch based Trainer for EasyCV. | ||||
| Args: | Args: | ||||
| task: Task name. | |||||
| cfg_file(str): The config file of EasyCV. | cfg_file(str): The config file of EasyCV. | ||||
| model (:obj:`torch.nn.Module` or :obj:`TorchModel` or `str`): The model to be run, or a valid model dir | model (:obj:`torch.nn.Module` or :obj:`TorchModel` or `str`): The model to be run, or a valid model dir | ||||
| or a model id. If model is None, build_model method will be called. | or a model id. If model is None, build_model method will be called. | ||||
| @@ -51,7 +50,6 @@ class EasyCVEpochBasedTrainer(EpochBasedTrainer): | |||||
| def __init__( | def __init__( | ||||
| self, | self, | ||||
| task: str, | |||||
| cfg_file: Optional[str] = None, | cfg_file: Optional[str] = None, | ||||
| model: Optional[Union[TorchModel, nn.Module, str]] = None, | model: Optional[Union[TorchModel, nn.Module, str]] = None, | ||||
| arg_parse_fn: Optional[Callable] = None, | arg_parse_fn: Optional[Callable] = None, | ||||
| @@ -64,7 +62,6 @@ class EasyCVEpochBasedTrainer(EpochBasedTrainer): | |||||
| model_revision: Optional[str] = DEFAULT_MODEL_REVISION, | model_revision: Optional[str] = DEFAULT_MODEL_REVISION, | ||||
| **kwargs): | **kwargs): | ||||
| self.task = task | |||||
| register_util.register_parallel() | register_util.register_parallel() | ||||
| register_util.register_part_mmcv_hooks_to_ms() | register_util.register_part_mmcv_hooks_to_ms() | ||||
| @@ -168,8 +165,3 @@ class EasyCVEpochBasedTrainer(EpochBasedTrainer): | |||||
| device_ids=[torch.cuda.current_device()]) | device_ids=[torch.cuda.current_device()]) | ||||
| return build_parallel(dp_cfg) | return build_parallel(dp_cfg) | ||||
| def rebuild_config(self, cfg: Config): | |||||
| cfg.task = self.task | |||||
| return cfg | |||||
| @@ -4,16 +4,49 @@ import logging | |||||
| from modelscope.trainers.hooks import HOOKS | from modelscope.trainers.hooks import HOOKS | ||||
| from modelscope.trainers.parallel.builder import PARALLEL | from modelscope.trainers.parallel.builder import PARALLEL | ||||
| from modelscope.utils.registry import default_group | |||||
| class _RegisterManager: | |||||
| def __init__(self): | |||||
| self.registries = {} | |||||
| def add(self, module, name, group_key=default_group): | |||||
| if module.name not in self.registries: | |||||
| self.registries[module.name] = {} | |||||
| if group_key not in self.registries[module.name]: | |||||
| self.registries[module.name][group_key] = [] | |||||
| self.registries[module.name][group_key].append(name) | |||||
| def exists(self, module, name, group_key=default_group): | |||||
| if self.registries.get(module.name, None) is None: | |||||
| return False | |||||
| if self.registries[module.name].get(group_key, None) is None: | |||||
| return False | |||||
| if name in self.registries[module.name][group_key]: | |||||
| return True | |||||
| return False | |||||
| _dynamic_register = _RegisterManager() | |||||
| def register_parallel(): | def register_parallel(): | ||||
| from mmcv.parallel import MMDistributedDataParallel, MMDataParallel | from mmcv.parallel import MMDistributedDataParallel, MMDataParallel | ||||
| PARALLEL.register_module( | |||||
| module_name='MMDistributedDataParallel', | |||||
| module_cls=MMDistributedDataParallel) | |||||
| PARALLEL.register_module( | |||||
| module_name='MMDataParallel', module_cls=MMDataParallel) | |||||
| mmddp = 'MMDistributedDataParallel' | |||||
| mmdp = 'MMDataParallel' | |||||
| if not _dynamic_register.exists(PARALLEL, mmddp): | |||||
| _dynamic_register.add(PARALLEL, mmddp) | |||||
| PARALLEL.register_module( | |||||
| module_name=mmddp, module_cls=MMDistributedDataParallel) | |||||
| if not _dynamic_register.exists(PARALLEL, mmdp): | |||||
| _dynamic_register.add(PARALLEL, mmdp) | |||||
| PARALLEL.register_module(module_name=mmdp, module_cls=MMDataParallel) | |||||
| def register_hook_to_ms(hook_name, logger=None): | def register_hook_to_ms(hook_name, logger=None): | ||||
| @@ -24,6 +57,10 @@ def register_hook_to_ms(hook_name, logger=None): | |||||
| raise ValueError( | raise ValueError( | ||||
| f'Not found hook "{hook_name}" in EasyCV hook registries!') | f'Not found hook "{hook_name}" in EasyCV hook registries!') | ||||
| if _dynamic_register.exists(HOOKS, hook_name): | |||||
| return | |||||
| _dynamic_register.add(HOOKS, hook_name) | |||||
| obj = _EV_HOOKS._module_dict[hook_name] | obj = _EV_HOOKS._module_dict[hook_name] | ||||
| HOOKS.register_module(module_name=hook_name, module_cls=obj) | HOOKS.register_module(module_name=hook_name, module_cls=obj) | ||||
| @@ -41,18 +78,19 @@ def register_part_mmcv_hooks_to_ms(): | |||||
| from mmcv.runner.hooks import lr_updater | from mmcv.runner.hooks import lr_updater | ||||
| from mmcv.runner.hooks import HOOKS as _MMCV_HOOKS | from mmcv.runner.hooks import HOOKS as _MMCV_HOOKS | ||||
| from easycv.hooks import StepFixCosineAnnealingLrUpdaterHook, YOLOXLrUpdaterHook | from easycv.hooks import StepFixCosineAnnealingLrUpdaterHook, YOLOXLrUpdaterHook | ||||
| from easycv.hooks.logger import PreLoggerHook | |||||
| mmcv_hooks_in_easycv = [('StepFixCosineAnnealingLrUpdaterHook', | mmcv_hooks_in_easycv = [('StepFixCosineAnnealingLrUpdaterHook', | ||||
| StepFixCosineAnnealingLrUpdaterHook), | StepFixCosineAnnealingLrUpdaterHook), | ||||
| ('YOLOXLrUpdaterHook', YOLOXLrUpdaterHook), | |||||
| ('PreLoggerHook', PreLoggerHook)] | |||||
| ('YOLOXLrUpdaterHook', YOLOXLrUpdaterHook)] | |||||
| members = inspect.getmembers(lr_updater) | members = inspect.getmembers(lr_updater) | ||||
| members.extend(mmcv_hooks_in_easycv) | members.extend(mmcv_hooks_in_easycv) | ||||
| for name, obj in members: | for name, obj in members: | ||||
| if name in _MMCV_HOOKS._module_dict: | if name in _MMCV_HOOKS._module_dict: | ||||
| if _dynamic_register.exists(HOOKS, name): | |||||
| continue | |||||
| _dynamic_register.add(HOOKS, name) | |||||
| HOOKS.register_module( | HOOKS.register_module( | ||||
| module_name=name, | module_name=name, | ||||
| module_cls=obj, | module_cls=obj, | ||||
| @@ -164,10 +164,14 @@ class EpochBasedTrainer(BaseTrainer): | |||||
| self.train_dataset = self.to_task_dataset( | self.train_dataset = self.to_task_dataset( | ||||
| train_dataset, | train_dataset, | ||||
| mode=ModeKeys.TRAIN, | mode=ModeKeys.TRAIN, | ||||
| task_data_config=self.cfg.dataset.get('train', None) if hasattr( | |||||
| self.cfg, 'dataset') else None, | |||||
| preprocessor=self.train_preprocessor) | preprocessor=self.train_preprocessor) | ||||
| self.eval_dataset = self.to_task_dataset( | self.eval_dataset = self.to_task_dataset( | ||||
| eval_dataset, | eval_dataset, | ||||
| mode=ModeKeys.EVAL, | mode=ModeKeys.EVAL, | ||||
| task_data_config=self.cfg.dataset.get('val', None) if hasattr( | |||||
| self.cfg, 'dataset') else None, | |||||
| preprocessor=self.eval_preprocessor) | preprocessor=self.eval_preprocessor) | ||||
| self.train_data_collator, self.eval_default_collate = None, None | self.train_data_collator, self.eval_default_collate = None, None | ||||
| @@ -298,6 +302,7 @@ class EpochBasedTrainer(BaseTrainer): | |||||
| def to_task_dataset(self, | def to_task_dataset(self, | ||||
| datasets: Union[Dataset, List[Dataset]], | datasets: Union[Dataset, List[Dataset]], | ||||
| mode: str, | mode: str, | ||||
| task_data_config: Config = None, | |||||
| preprocessor: Optional[Preprocessor] = None): | preprocessor: Optional[Preprocessor] = None): | ||||
| """Build the task specific dataset processor for this trainer. | """Build the task specific dataset processor for this trainer. | ||||
| @@ -310,20 +315,29 @@ class EpochBasedTrainer(BaseTrainer): | |||||
| if isinstance(datasets, TorchTaskDataset): | if isinstance(datasets, TorchTaskDataset): | ||||
| return datasets | return datasets | ||||
| elif isinstance(datasets, MsDataset): | elif isinstance(datasets, MsDataset): | ||||
| cfg = ConfigDict(type=self.cfg.model.type, mode=mode) if hasattr(self.cfg, ConfigFields.model) \ | |||||
| else ConfigDict(type=None, mode=mode) | |||||
| if task_data_config is None: | |||||
| # adapt to some special models | |||||
| task_data_config = ConfigDict( | |||||
| type=self.cfg.model.type) if hasattr( | |||||
| self.cfg, ConfigFields.model) else ConfigDict( | |||||
| type=None) | |||||
| task_data_config.update(dict(mode=mode)) | |||||
| return datasets.to_torch_dataset( | return datasets.to_torch_dataset( | ||||
| task_data_config=cfg, | |||||
| task_name=self.cfg.task | |||||
| if hasattr(self.cfg, ConfigFields.task) else None, | |||||
| task_data_config=task_data_config, | |||||
| task_name=self.cfg.task, | |||||
| preprocessors=preprocessor) | preprocessors=preprocessor) | ||||
| elif isinstance(datasets, List) and isinstance( | elif isinstance(datasets, List) and isinstance( | ||||
| datasets[0], MsDataset): | datasets[0], MsDataset): | ||||
| cfg = ConfigDict(type=self.cfg.model.type, mode=mode) if hasattr(self.cfg, ConfigFields.model) \ | |||||
| else ConfigDict(type=None, mode=mode) | |||||
| if task_data_config is None: | |||||
| # adapt to some special models | |||||
| task_data_config = ConfigDict( | |||||
| type=self.cfg.model.type) if hasattr( | |||||
| self.cfg, ConfigFields.model) else ConfigDict( | |||||
| type=None) | |||||
| task_data_config.update(dict(mode=mode)) | |||||
| datasets = [ | datasets = [ | ||||
| d.to_torch_dataset( | d.to_torch_dataset( | ||||
| task_data_config=cfg, | |||||
| task_data_config=task_data_config, | |||||
| task_name=self.cfg.task, | task_name=self.cfg.task, | ||||
| preprocessors=preprocessor) for d in datasets | preprocessors=preprocessor) for d in datasets | ||||
| ] | ] | ||||
| @@ -331,12 +345,12 @@ class EpochBasedTrainer(BaseTrainer): | |||||
| type=self.cfg.task, mode=mode, datasets=datasets) | type=self.cfg.task, mode=mode, datasets=datasets) | ||||
| return build_task_dataset(cfg, self.cfg.task) | return build_task_dataset(cfg, self.cfg.task) | ||||
| else: | else: | ||||
| cfg = ConfigDict( | |||||
| type=self.cfg.model.type, | |||||
| mode=mode, | |||||
| datasets=datasets, | |||||
| preprocessor=preprocessor) | |||||
| return build_task_dataset(cfg, self.cfg.task) | |||||
| task_data_config.update( | |||||
| dict( | |||||
| mode=mode, | |||||
| datasets=datasets, | |||||
| preprocessor=preprocessor)) | |||||
| return build_task_dataset(task_data_config, self.cfg.task) | |||||
| except Exception: | except Exception: | ||||
| if isinstance(datasets, (List, Tuple)) or preprocessor is not None: | if isinstance(datasets, (List, Tuple)) or preprocessor is not None: | ||||
| return TorchTaskDataset( | return TorchTaskDataset( | ||||
| @@ -14,10 +14,10 @@ import unittest | |||||
| from typing import OrderedDict | from typing import OrderedDict | ||||
| import requests | import requests | ||||
| from datasets import Dataset | |||||
| import torch | |||||
| from datasets.config import TF_AVAILABLE, TORCH_AVAILABLE | from datasets.config import TF_AVAILABLE, TORCH_AVAILABLE | ||||
| from torch.utils.data import Dataset | |||||
| from modelscope.msdatasets import MsDataset | |||||
| from .torch_utils import _find_free_port | from .torch_utils import _find_free_port | ||||
| TEST_LEVEL = 2 | TEST_LEVEL = 2 | ||||
| @@ -49,9 +49,25 @@ def set_test_level(level: int): | |||||
| TEST_LEVEL = level | TEST_LEVEL = level | ||||
| class DummyTorchDataset(Dataset): | |||||
| def __init__(self, feat, label, num) -> None: | |||||
| self.feat = feat | |||||
| self.label = label | |||||
| self.num = num | |||||
| def __getitem__(self, index): | |||||
| return { | |||||
| 'feat': torch.Tensor(self.feat), | |||||
| 'labels': torch.Tensor(self.label) | |||||
| } | |||||
| def __len__(self): | |||||
| return self.num | |||||
| def create_dummy_test_dataset(feat, label, num): | def create_dummy_test_dataset(feat, label, num): | ||||
| return MsDataset.from_hf_dataset( | |||||
| Dataset.from_dict(dict(feat=[feat] * num, labels=[label] * num))) | |||||
| return DummyTorchDataset(feat, label, num) | |||||
| def download_and_untar(fpath, furl, dst) -> str: | def download_and_untar(fpath, furl, dst) -> str: | ||||
| @@ -6,10 +6,10 @@ import tempfile | |||||
| import unittest | import unittest | ||||
| import json | import json | ||||
| import requests | |||||
| import torch | import torch | ||||
| from modelscope.metainfo import Models, Pipelines, Trainers | from modelscope.metainfo import Models, Pipelines, Trainers | ||||
| from modelscope.msdatasets import MsDataset | |||||
| from modelscope.trainers import build_trainer | from modelscope.trainers import build_trainer | ||||
| from modelscope.utils.config import Config | from modelscope.utils.config import Config | ||||
| from modelscope.utils.constant import LogKeys, ModeKeys, Tasks | from modelscope.utils.constant import LogKeys, ModeKeys, Tasks | ||||
| @@ -18,55 +18,19 @@ from modelscope.utils.test_utils import DistributedTestCase, test_level | |||||
| from modelscope.utils.torch_utils import is_master | from modelscope.utils.torch_utils import is_master | ||||
| def _download_data(url, save_dir): | |||||
| r = requests.get(url, verify=True) | |||||
| if not os.path.exists(save_dir): | |||||
| os.makedirs(save_dir) | |||||
| zip_name = os.path.split(url)[-1] | |||||
| save_path = os.path.join(save_dir, zip_name) | |||||
| with open(save_path, 'wb') as f: | |||||
| f.write(r.content) | |||||
| unpack_dir = os.path.join(save_dir, os.path.splitext(zip_name)[0]) | |||||
| shutil.unpack_archive(save_path, unpack_dir) | |||||
| def train_func(work_dir, dist=False, log_config=3, imgs_per_gpu=4): | |||||
| def train_func(work_dir, dist=False, log_interval=3, imgs_per_gpu=4): | |||||
| import easycv | import easycv | ||||
| config_path = os.path.join( | config_path = os.path.join( | ||||
| os.path.dirname(easycv.__file__), | os.path.dirname(easycv.__file__), | ||||
| 'configs/detection/yolox/yolox_s_8xb16_300e_coco.py') | 'configs/detection/yolox/yolox_s_8xb16_300e_coco.py') | ||||
| data_dir = os.path.join(work_dir, 'small_coco_test') | |||||
| url = 'http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/datasets/small_coco.zip' | |||||
| if is_master(): | |||||
| _download_data(url, data_dir) | |||||
| import time | |||||
| time.sleep(1) | |||||
| cfg = Config.from_file(config_path) | cfg = Config.from_file(config_path) | ||||
| cfg.work_dir = work_dir | |||||
| cfg.total_epochs = 2 | |||||
| cfg.checkpoint_config.interval = 1 | |||||
| cfg.eval_config.interval = 1 | |||||
| cfg.log_config = dict( | |||||
| interval=log_config, | |||||
| hooks=[ | |||||
| cfg.log_config.update( | |||||
| dict(hooks=[ | |||||
| dict(type='TextLoggerHook'), | dict(type='TextLoggerHook'), | ||||
| dict(type='TensorboardLoggerHook') | dict(type='TensorboardLoggerHook') | ||||
| ]) | |||||
| cfg.data.train.data_source.ann_file = os.path.join( | |||||
| data_dir, 'small_coco/small_coco/instances_train2017_20.json') | |||||
| cfg.data.train.data_source.img_prefix = os.path.join( | |||||
| data_dir, 'small_coco/small_coco/train2017') | |||||
| cfg.data.val.data_source.ann_file = os.path.join( | |||||
| data_dir, 'small_coco/small_coco/instances_val2017_20.json') | |||||
| cfg.data.val.data_source.img_prefix = os.path.join( | |||||
| data_dir, 'small_coco/small_coco/val2017') | |||||
| cfg.data.imgs_per_gpu = imgs_per_gpu | |||||
| cfg.data.workers_per_gpu = 2 | |||||
| cfg.data.val.imgs_per_gpu = 2 | |||||
| ])) # not support TensorboardLoggerHookV2 | |||||
| ms_cfg_file = os.path.join(work_dir, 'ms_yolox_s_8xb16_300e_coco.json') | ms_cfg_file = os.path.join(work_dir, 'ms_yolox_s_8xb16_300e_coco.json') | ||||
| from easycv.utils.ms_utils import to_ms_config | from easycv.utils.ms_utils import to_ms_config | ||||
| @@ -81,9 +45,41 @@ def train_func(work_dir, dist=False, log_config=3, imgs_per_gpu=4): | |||||
| save_path=ms_cfg_file) | save_path=ms_cfg_file) | ||||
| trainer_name = Trainers.easycv | trainer_name = Trainers.easycv | ||||
| train_dataset = MsDataset.load( | |||||
| dataset_name='small_coco_for_test', namespace='EasyCV', split='train') | |||||
| eval_dataset = MsDataset.load( | |||||
| dataset_name='small_coco_for_test', | |||||
| namespace='EasyCV', | |||||
| split='validation') | |||||
| cfg_options = { | |||||
| 'train.max_epochs': | |||||
| 2, | |||||
| 'train.dataloader.batch_size_per_gpu': | |||||
| imgs_per_gpu, | |||||
| 'evaluation.dataloader.batch_size_per_gpu': | |||||
| 2, | |||||
| 'train.hooks': [ | |||||
| { | |||||
| 'type': 'CheckpointHook', | |||||
| 'interval': 1 | |||||
| }, | |||||
| { | |||||
| 'type': 'EvaluationHook', | |||||
| 'interval': 1 | |||||
| }, | |||||
| { | |||||
| 'type': 'TextLoggerHook', | |||||
| 'interval': log_interval | |||||
| }, | |||||
| ] | |||||
| } | |||||
| kwargs = dict( | kwargs = dict( | ||||
| task=Tasks.image_object_detection, | |||||
| cfg_file=ms_cfg_file, | cfg_file=ms_cfg_file, | ||||
| train_dataset=train_dataset, | |||||
| eval_dataset=eval_dataset, | |||||
| work_dir=work_dir, | |||||
| cfg_options=cfg_options, | |||||
| launcher='pytorch' if dist else None) | launcher='pytorch' if dist else None) | ||||
| trainer = build_trainer(trainer_name, kwargs) | trainer = build_trainer(trainer_name, kwargs) | ||||
| @@ -105,11 +101,8 @@ class EasyCVTrainerTestSingleGpu(unittest.TestCase): | |||||
| super().tearDown() | super().tearDown() | ||||
| shutil.rmtree(self.tmp_dir, ignore_errors=True) | shutil.rmtree(self.tmp_dir, ignore_errors=True) | ||||
| @unittest.skipIf( | |||||
| True, 'The test cases are all run in the master process, ' | |||||
| 'cause registry conflicts, and it should run in the subprocess.') | |||||
| @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') | |||||
| def test_single_gpu(self): | def test_single_gpu(self): | ||||
| # TODO: run in subprocess | |||||
| train_func(self.tmp_dir) | train_func(self.tmp_dir) | ||||
| results_files = os.listdir(self.tmp_dir) | results_files = os.listdir(self.tmp_dir) | ||||
| @@ -185,7 +178,7 @@ class EasyCVTrainerTestMultiGpus(DistributedTestCase): | |||||
| num_gpus=2, | num_gpus=2, | ||||
| work_dir=self.tmp_dir, | work_dir=self.tmp_dir, | ||||
| dist=True, | dist=True, | ||||
| log_config=2, | |||||
| log_interval=2, | |||||
| imgs_per_gpu=5) | imgs_per_gpu=5) | ||||
| results_files = os.listdir(self.tmp_dir) | results_files = os.listdir(self.tmp_dir) | ||||
| @@ -5,28 +5,14 @@ import shutil | |||||
| import tempfile | import tempfile | ||||
| import unittest | import unittest | ||||
| import requests | |||||
| import torch | import torch | ||||
| from modelscope.metainfo import Trainers | from modelscope.metainfo import Trainers | ||||
| from modelscope.msdatasets import MsDataset | |||||
| from modelscope.trainers import build_trainer | from modelscope.trainers import build_trainer | ||||
| from modelscope.utils.constant import LogKeys, Tasks | from modelscope.utils.constant import LogKeys, Tasks | ||||
| from modelscope.utils.logger import get_logger | from modelscope.utils.logger import get_logger | ||||
| from modelscope.utils.test_utils import test_level | from modelscope.utils.test_utils import test_level | ||||
| from modelscope.utils.torch_utils import is_master | |||||
| def _download_data(url, save_dir): | |||||
| r = requests.get(url, verify=True) | |||||
| if not os.path.exists(save_dir): | |||||
| os.makedirs(save_dir) | |||||
| zip_name = os.path.split(url)[-1] | |||||
| save_path = os.path.join(save_dir, zip_name) | |||||
| with open(save_path, 'wb') as f: | |||||
| f.write(r.content) | |||||
| unpack_dir = os.path.join(save_dir, os.path.splitext(zip_name)[0]) | |||||
| shutil.unpack_archive(save_path, unpack_dir) | |||||
| @unittest.skipIf(not torch.cuda.is_available(), 'cuda unittest') | @unittest.skipIf(not torch.cuda.is_available(), 'cuda unittest') | ||||
| @@ -45,46 +31,32 @@ class EasyCVTrainerTestSegformer(unittest.TestCase): | |||||
| shutil.rmtree(self.tmp_dir, ignore_errors=True) | shutil.rmtree(self.tmp_dir, ignore_errors=True) | ||||
| def _train(self): | def _train(self): | ||||
| from modelscope.trainers.easycv.trainer import EasyCVEpochBasedTrainer | |||||
| url = 'http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/datasets/small_coco_stuff164k.zip' | |||||
| data_dir = os.path.join(self.tmp_dir, 'data') | |||||
| if is_master(): | |||||
| _download_data(url, data_dir) | |||||
| # adapt to ditributed mode | |||||
| # adapt to distributed mode | |||||
| from easycv.utils.test_util import pseudo_dist_init | from easycv.utils.test_util import pseudo_dist_init | ||||
| pseudo_dist_init() | pseudo_dist_init() | ||||
| root_path = os.path.join(data_dir, 'small_coco_stuff164k') | |||||
| cfg_options = { | |||||
| 'train.max_epochs': | |||||
| 2, | |||||
| 'dataset.train.data_source.img_root': | |||||
| os.path.join(root_path, 'train2017'), | |||||
| 'dataset.train.data_source.label_root': | |||||
| os.path.join(root_path, 'annotations/train2017'), | |||||
| 'dataset.train.data_source.split': | |||||
| os.path.join(root_path, 'train.txt'), | |||||
| 'dataset.val.data_source.img_root': | |||||
| os.path.join(root_path, 'val2017'), | |||||
| 'dataset.val.data_source.label_root': | |||||
| os.path.join(root_path, 'annotations/val2017'), | |||||
| 'dataset.val.data_source.split': | |||||
| os.path.join(root_path, 'val.txt'), | |||||
| } | |||||
| cfg_options = {'train.max_epochs': 2} | |||||
| trainer_name = Trainers.easycv | trainer_name = Trainers.easycv | ||||
| train_dataset = MsDataset.load( | |||||
| dataset_name='small_coco_stuff164k', | |||||
| namespace='EasyCV', | |||||
| split='train') | |||||
| eval_dataset = MsDataset.load( | |||||
| dataset_name='small_coco_stuff164k', | |||||
| namespace='EasyCV', | |||||
| split='validation') | |||||
| kwargs = dict( | kwargs = dict( | ||||
| task=Tasks.image_segmentation, | |||||
| model='EasyCV/EasyCV-Segformer-b0', | model='EasyCV/EasyCV-Segformer-b0', | ||||
| train_dataset=train_dataset, | |||||
| eval_dataset=eval_dataset, | |||||
| work_dir=self.tmp_dir, | work_dir=self.tmp_dir, | ||||
| cfg_options=cfg_options) | cfg_options=cfg_options) | ||||
| trainer = build_trainer(trainer_name, kwargs) | trainer = build_trainer(trainer_name, kwargs) | ||||
| trainer.train() | trainer.train() | ||||
| @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') | |||||
| @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') | |||||
| def test_single_gpu_segformer(self): | def test_single_gpu_segformer(self): | ||||
| self._train() | self._train() | ||||
| @@ -64,7 +64,7 @@ class TrainerTest(unittest.TestCase): | |||||
| super().tearDown() | super().tearDown() | ||||
| shutil.rmtree(self.tmp_dir) | shutil.rmtree(self.tmp_dir) | ||||
| @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') | |||||
| @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') | |||||
| def test_train_0(self): | def test_train_0(self): | ||||
| json_cfg = { | json_cfg = { | ||||
| 'task': Tasks.image_classification, | 'task': Tasks.image_classification, | ||||
| @@ -139,7 +139,7 @@ class TrainerTest(unittest.TestCase): | |||||
| self.assertIn(f'{LogKeys.EPOCH}_2.pth', results_files) | self.assertIn(f'{LogKeys.EPOCH}_2.pth', results_files) | ||||
| self.assertIn(f'{LogKeys.EPOCH}_3.pth', results_files) | self.assertIn(f'{LogKeys.EPOCH}_3.pth', results_files) | ||||
| @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') | |||||
| @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') | |||||
| def test_train_1(self): | def test_train_1(self): | ||||
| json_cfg = { | json_cfg = { | ||||
| 'task': Tasks.image_classification, | 'task': Tasks.image_classification, | ||||
| @@ -200,7 +200,7 @@ class TrainerTest(unittest.TestCase): | |||||
| self.assertIn(f'{LogKeys.EPOCH}_2.pth', results_files) | self.assertIn(f'{LogKeys.EPOCH}_2.pth', results_files) | ||||
| self.assertIn(f'{LogKeys.EPOCH}_3.pth', results_files) | self.assertIn(f'{LogKeys.EPOCH}_3.pth', results_files) | ||||
| @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') | |||||
| @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') | |||||
| def test_train_with_default_config(self): | def test_train_with_default_config(self): | ||||
| json_cfg = { | json_cfg = { | ||||
| 'task': Tasks.image_classification, | 'task': Tasks.image_classification, | ||||
| @@ -319,7 +319,7 @@ class TrainerTest(unittest.TestCase): | |||||
| for i in [2, 5, 8]: | for i in [2, 5, 8]: | ||||
| self.assertIn(MetricKeys.ACCURACY, lines[i]) | self.assertIn(MetricKeys.ACCURACY, lines[i]) | ||||
| @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') | |||||
| @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') | |||||
| def test_train_with_iters_per_epoch(self): | def test_train_with_iters_per_epoch(self): | ||||
| json_cfg = { | json_cfg = { | ||||
| 'task': Tasks.image_classification, | 'task': Tasks.image_classification, | ||||
| @@ -441,7 +441,7 @@ class TrainerTest(unittest.TestCase): | |||||
| class DummyTrainerTest(unittest.TestCase): | class DummyTrainerTest(unittest.TestCase): | ||||
| @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') | |||||
| @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') | |||||
| def test_dummy(self): | def test_dummy(self): | ||||
| default_args = dict(cfg_file='configs/examples/train.json') | default_args = dict(cfg_file='configs/examples/train.json') | ||||
| trainer = build_trainer('dummy', default_args) | trainer = build_trainer('dummy', default_args) | ||||
| @@ -17,7 +17,7 @@ from modelscope.metainfo import Metrics, Trainers | |||||
| from modelscope.metrics.builder import MetricKeys | from modelscope.metrics.builder import MetricKeys | ||||
| from modelscope.models.base import Model | from modelscope.models.base import Model | ||||
| from modelscope.trainers import EpochBasedTrainer, build_trainer | from modelscope.trainers import EpochBasedTrainer, build_trainer | ||||
| from modelscope.utils.constant import LogKeys, ModeKeys, ModelFile | |||||
| from modelscope.utils.constant import LogKeys, ModeKeys, ModelFile, Tasks | |||||
| from modelscope.utils.test_utils import (DistributedTestCase, | from modelscope.utils.test_utils import (DistributedTestCase, | ||||
| create_dummy_test_dataset, test_level) | create_dummy_test_dataset, test_level) | ||||
| @@ -55,6 +55,7 @@ class DummyModel(nn.Module, Model): | |||||
| def train_func(work_dir, dist=False, iterable_dataset=False, **kwargs): | def train_func(work_dir, dist=False, iterable_dataset=False, **kwargs): | ||||
| json_cfg = { | json_cfg = { | ||||
| 'task': Tasks.image_classification, | |||||
| 'train': { | 'train': { | ||||
| 'work_dir': work_dir, | 'work_dir': work_dir, | ||||
| 'dataloader': { | 'dataloader': { | ||||
| @@ -119,7 +120,7 @@ class TrainerTestSingleGpu(unittest.TestCase): | |||||
| super().tearDown() | super().tearDown() | ||||
| shutil.rmtree(self.tmp_dir) | shutil.rmtree(self.tmp_dir) | ||||
| @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') | |||||
| @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') | |||||
| def test_single_gpu(self): | def test_single_gpu(self): | ||||
| train_func(self.tmp_dir) | train_func(self.tmp_dir) | ||||