Browse Source

adapt to msdataset for EasyCV

Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/9935664

    * adapt to msdataset for EasyCV
master
jiangnana.jnn 3 years ago
parent
commit
5e176da3a1
11 changed files with 322 additions and 133 deletions
  1. +59
    -0
      modelscope/msdatasets/cv/easycv_base.py
  2. +45
    -1
      modelscope/msdatasets/cv/image_semantic_segmentation/segmentation_dataset.py
  3. +62
    -2
      modelscope/msdatasets/cv/object_detection/detection_dataset.py
  4. +0
    -8
      modelscope/trainers/easycv/trainer.py
  5. +46
    -8
      modelscope/trainers/easycv/utils/register_util.py
  6. +28
    -14
      modelscope/trainers/trainer.py
  7. +20
    -4
      modelscope/utils/test_utils.py
  8. +40
    -47
      tests/trainers/easycv/test_easycv_trainer.py
  9. +14
    -42
      tests/trainers/easycv/test_segformer.py
  10. +5
    -5
      tests/trainers/test_trainer.py
  11. +3
    -2
      tests/trainers/test_trainer_gpu.py

+ 59
- 0
modelscope/msdatasets/cv/easycv_base.py View File

@@ -0,0 +1,59 @@
# Copyright (c) Alibaba, Inc. and its affiliates.

import os.path as osp


class EasyCVBaseDataset(object):
"""Adapt to MSDataset.
Subclasses need to implement ``DATA_STRUCTURE``, the format is as follows, e.g.:

{
'${data source name}': {
'train':{
'${image root arg}': 'images', # directory name of images relative to the root path
'${label root arg}': 'labels', # directory name of lables relative to the root path
...
},
'validation': {
'${image root arg}': 'images',
'${label root arg}': 'labels',
...
}
}
}

Args:
split_config (dict): Dataset root path from MSDataset, e.g.
{"train":"local cache path"} or {"evaluation":"local cache path"}.
preprocessor (Preprocessor): An optional preprocessor instance, please make sure the preprocessor fits for
the model if supplied. Not support yet.
mode: Training or Evaluation.
"""
DATA_STRUCTURE = None

def __init__(self,
split_config=None,
preprocessor=None,
mode=None,
args=(),
kwargs={}) -> None:
self.split_config = split_config
self.preprocessor = preprocessor
self.mode = mode
if self.split_config is not None:
self._update_data_source(kwargs['data_source'])

def _update_data_source(self, data_source):
data_root = next(iter(self.split_config.values()))
split = next(iter(self.split_config.keys()))

# TODO: msdataset should support these keys to be configured in the dataset's json file and passed in
if data_source['type'] not in list(self.DATA_STRUCTURE.keys()):
raise ValueError(
'Only support %s now, but get %s.' %
(list(self.DATA_STRUCTURE.keys()), data_source['type']))

# join data root path of msdataset and default relative name
update_args = self.DATA_STRUCTURE[data_source['type']][split]
for k, v in update_args.items():
data_source.update({k: osp.join(data_root, v)})

+ 45
- 1
modelscope/msdatasets/cv/image_semantic_segmentation/segmentation_dataset.py View File

@@ -1,21 +1,65 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
import os.path as osp

from easycv.datasets.segmentation import SegDataset as _SegDataset

from modelscope.metainfo import Datasets
from modelscope.msdatasets.cv.easycv_base import EasyCVBaseDataset
from modelscope.msdatasets.task_datasets.builder import TASK_DATASETS
from modelscope.utils.constant import Tasks


class EasyCVSegBaseDataset(EasyCVBaseDataset):
DATA_STRUCTURE = {
# data source name
'SegSourceRaw': {
'train': {
'img_root':
'images', # directory name of images relative to the root path
'label_root':
'annotations', # directory name of annotation relative to the root path
'split':
'train.txt' # split file name relative to the root path
},
'validation': {
'img_root': 'images',
'label_root': 'annotations',
'split': 'val.txt'
}
}
}


@TASK_DATASETS.register_module(
group_key=Tasks.image_segmentation, module_name=Datasets.SegDataset)
class SegDataset(_SegDataset):
class SegDataset(EasyCVSegBaseDataset, _SegDataset):
"""EasyCV dataset for Sementic segmentation.
For more details, please refer to :
https://github.com/alibaba/EasyCV/blob/master/easycv/datasets/segmentation/raw.py .

Args:
split_config (dict): Dataset root path from MSDataset, e.g.
{"train":"local cache path"} or {"evaluation":"local cache path"}.
preprocessor (Preprocessor): An optional preprocessor instance, please make sure the preprocessor fits for
the model if supplied. Not support yet.
mode: Training or Evaluation.
data_source: Data source config to parse input data.
pipeline: Sequence of transform object or config dict to be composed.
ignore_index (int): Label index to be ignored.
profiling: If set True, will print transform time.
"""

def __init__(self,
split_config=None,
preprocessor=None,
mode=None,
*args,
**kwargs) -> None:
EasyCVSegBaseDataset.__init__(
self,
split_config=split_config,
preprocessor=preprocessor,
mode=mode,
args=args,
kwargs=kwargs)
_SegDataset.__init__(self, *args, **kwargs)

+ 62
- 2
modelscope/msdatasets/cv/object_detection/detection_dataset.py View File

@@ -1,31 +1,71 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
import os.path as osp

from easycv.datasets.detection import DetDataset as _DetDataset
from easycv.datasets.detection import \
DetImagesMixDataset as _DetImagesMixDataset

from modelscope.metainfo import Datasets
from modelscope.msdatasets.cv.easycv_base import EasyCVBaseDataset
from modelscope.msdatasets.task_datasets import TASK_DATASETS
from modelscope.utils.constant import Tasks


class EasyCVDetBaseDataset(EasyCVBaseDataset):
DATA_STRUCTURE = {
'DetSourceCoco': {
'train': {
'ann_file':
'train.json', # file name of annotation relative to the root path
'img_prefix':
'images', # directory name of images relative to the root path
},
'validation': {
'ann_file': 'val.json',
'img_prefix': 'images',
}
}
}


@TASK_DATASETS.register_module(
group_key=Tasks.image_object_detection, module_name=Datasets.DetDataset)
class DetDataset(_DetDataset):
class DetDataset(EasyCVDetBaseDataset, _DetDataset):
"""EasyCV dataset for object detection.
For more details, please refer to https://github.com/alibaba/EasyCV/blob/master/easycv/datasets/detection/raw.py .

Args:
split_config (dict): Dataset root path from MSDataset, e.g.
{"train":"local cache path"} or {"evaluation":"local cache path"}.
preprocessor (Preprocessor): An optional preprocessor instance, please make sure the preprocessor fits for
the model if supplied. Not support yet.
mode: Training or Evaluation.
data_source: Data source config to parse input data.
pipeline: Transform config list
profiling: If set True, will print pipeline time
classes: A list of class names, used in evaluation for result and groundtruth visualization
"""

def __init__(self,
split_config=None,
preprocessor=None,
mode=None,
*args,
**kwargs) -> None:
EasyCVDetBaseDataset.__init__(
self,
split_config=split_config,
preprocessor=preprocessor,
mode=mode,
args=args,
kwargs=kwargs)
_DetDataset.__init__(self, *args, **kwargs)


@TASK_DATASETS.register_module(
group_key=Tasks.image_object_detection,
module_name=Datasets.DetImagesMixDataset)
class DetImagesMixDataset(_DetImagesMixDataset):
class DetImagesMixDataset(EasyCVDetBaseDataset, _DetImagesMixDataset):
"""EasyCV dataset for object detection, a wrapper of multiple images mixed dataset.
Suitable for training on multiple images mixed data augmentation like
mosaic and mixup. For the augmentation pipeline of mixed image data,
@@ -38,6 +78,11 @@ class DetImagesMixDataset(_DetImagesMixDataset):
For more details, please refer to https://github.com/alibaba/EasyCV/blob/master/easycv/datasets/detection/mix.py .

Args:
split_config (dict): Dataset root path from MSDataset, e.g.
{"train":"local cache path"} or {"evaluation":"local cache path"}.
preprocessor (Preprocessor): An optional preprocessor instance, please make sure the preprocessor fits for
the model if supplied. Not support yet.
mode: Training or Evaluation.
data_source (:obj:`DetSourceCoco`): Data source config to parse input data.
pipeline (Sequence[dict]): Sequence of transform object or
config dict to be composed.
@@ -47,3 +92,18 @@ class DetImagesMixDataset(_DetImagesMixDataset):
be skip pipeline. Default to None.
label_padding: out labeling padding [N, 120, 5]
"""

def __init__(self,
split_config=None,
preprocessor=None,
mode=None,
*args,
**kwargs) -> None:
EasyCVDetBaseDataset.__init__(
self,
split_config=split_config,
preprocessor=preprocessor,
mode=mode,
args=args,
kwargs=kwargs)
_DetImagesMixDataset.__init__(self, *args, **kwargs)

+ 0
- 8
modelscope/trainers/easycv/trainer.py View File

@@ -27,7 +27,6 @@ class EasyCVEpochBasedTrainer(EpochBasedTrainer):
"""Epoch based Trainer for EasyCV.

Args:
task: Task name.
cfg_file(str): The config file of EasyCV.
model (:obj:`torch.nn.Module` or :obj:`TorchModel` or `str`): The model to be run, or a valid model dir
or a model id. If model is None, build_model method will be called.
@@ -51,7 +50,6 @@ class EasyCVEpochBasedTrainer(EpochBasedTrainer):

def __init__(
self,
task: str,
cfg_file: Optional[str] = None,
model: Optional[Union[TorchModel, nn.Module, str]] = None,
arg_parse_fn: Optional[Callable] = None,
@@ -64,7 +62,6 @@ class EasyCVEpochBasedTrainer(EpochBasedTrainer):
model_revision: Optional[str] = DEFAULT_MODEL_REVISION,
**kwargs):

self.task = task
register_util.register_parallel()
register_util.register_part_mmcv_hooks_to_ms()

@@ -168,8 +165,3 @@ class EasyCVEpochBasedTrainer(EpochBasedTrainer):
device_ids=[torch.cuda.current_device()])

return build_parallel(dp_cfg)

def rebuild_config(self, cfg: Config):
cfg.task = self.task

return cfg

+ 46
- 8
modelscope/trainers/easycv/utils/register_util.py View File

@@ -4,16 +4,49 @@ import logging

from modelscope.trainers.hooks import HOOKS
from modelscope.trainers.parallel.builder import PARALLEL
from modelscope.utils.registry import default_group


class _RegisterManager:

def __init__(self):
self.registries = {}

def add(self, module, name, group_key=default_group):
if module.name not in self.registries:
self.registries[module.name] = {}
if group_key not in self.registries[module.name]:
self.registries[module.name][group_key] = []

self.registries[module.name][group_key].append(name)

def exists(self, module, name, group_key=default_group):
if self.registries.get(module.name, None) is None:
return False
if self.registries[module.name].get(group_key, None) is None:
return False
if name in self.registries[module.name][group_key]:
return True

return False


_dynamic_register = _RegisterManager()


def register_parallel():
from mmcv.parallel import MMDistributedDataParallel, MMDataParallel

PARALLEL.register_module(
module_name='MMDistributedDataParallel',
module_cls=MMDistributedDataParallel)
PARALLEL.register_module(
module_name='MMDataParallel', module_cls=MMDataParallel)
mmddp = 'MMDistributedDataParallel'
mmdp = 'MMDataParallel'

if not _dynamic_register.exists(PARALLEL, mmddp):
_dynamic_register.add(PARALLEL, mmddp)
PARALLEL.register_module(
module_name=mmddp, module_cls=MMDistributedDataParallel)
if not _dynamic_register.exists(PARALLEL, mmdp):
_dynamic_register.add(PARALLEL, mmdp)
PARALLEL.register_module(module_name=mmdp, module_cls=MMDataParallel)


def register_hook_to_ms(hook_name, logger=None):
@@ -24,6 +57,10 @@ def register_hook_to_ms(hook_name, logger=None):
raise ValueError(
f'Not found hook "{hook_name}" in EasyCV hook registries!')

if _dynamic_register.exists(HOOKS, hook_name):
return
_dynamic_register.add(HOOKS, hook_name)

obj = _EV_HOOKS._module_dict[hook_name]
HOOKS.register_module(module_name=hook_name, module_cls=obj)

@@ -41,18 +78,19 @@ def register_part_mmcv_hooks_to_ms():
from mmcv.runner.hooks import lr_updater
from mmcv.runner.hooks import HOOKS as _MMCV_HOOKS
from easycv.hooks import StepFixCosineAnnealingLrUpdaterHook, YOLOXLrUpdaterHook
from easycv.hooks.logger import PreLoggerHook

mmcv_hooks_in_easycv = [('StepFixCosineAnnealingLrUpdaterHook',
StepFixCosineAnnealingLrUpdaterHook),
('YOLOXLrUpdaterHook', YOLOXLrUpdaterHook),
('PreLoggerHook', PreLoggerHook)]
('YOLOXLrUpdaterHook', YOLOXLrUpdaterHook)]

members = inspect.getmembers(lr_updater)
members.extend(mmcv_hooks_in_easycv)

for name, obj in members:
if name in _MMCV_HOOKS._module_dict:
if _dynamic_register.exists(HOOKS, name):
continue
_dynamic_register.add(HOOKS, name)
HOOKS.register_module(
module_name=name,
module_cls=obj,


+ 28
- 14
modelscope/trainers/trainer.py View File

@@ -164,10 +164,14 @@ class EpochBasedTrainer(BaseTrainer):
self.train_dataset = self.to_task_dataset(
train_dataset,
mode=ModeKeys.TRAIN,
task_data_config=self.cfg.dataset.get('train', None) if hasattr(
self.cfg, 'dataset') else None,
preprocessor=self.train_preprocessor)
self.eval_dataset = self.to_task_dataset(
eval_dataset,
mode=ModeKeys.EVAL,
task_data_config=self.cfg.dataset.get('val', None) if hasattr(
self.cfg, 'dataset') else None,
preprocessor=self.eval_preprocessor)

self.train_data_collator, self.eval_default_collate = None, None
@@ -298,6 +302,7 @@ class EpochBasedTrainer(BaseTrainer):
def to_task_dataset(self,
datasets: Union[Dataset, List[Dataset]],
mode: str,
task_data_config: Config = None,
preprocessor: Optional[Preprocessor] = None):
"""Build the task specific dataset processor for this trainer.

@@ -310,20 +315,29 @@ class EpochBasedTrainer(BaseTrainer):
if isinstance(datasets, TorchTaskDataset):
return datasets
elif isinstance(datasets, MsDataset):
cfg = ConfigDict(type=self.cfg.model.type, mode=mode) if hasattr(self.cfg, ConfigFields.model) \
else ConfigDict(type=None, mode=mode)
if task_data_config is None:
# adapt to some special models
task_data_config = ConfigDict(
type=self.cfg.model.type) if hasattr(
self.cfg, ConfigFields.model) else ConfigDict(
type=None)
task_data_config.update(dict(mode=mode))
return datasets.to_torch_dataset(
task_data_config=cfg,
task_name=self.cfg.task
if hasattr(self.cfg, ConfigFields.task) else None,
task_data_config=task_data_config,
task_name=self.cfg.task,
preprocessors=preprocessor)
elif isinstance(datasets, List) and isinstance(
datasets[0], MsDataset):
cfg = ConfigDict(type=self.cfg.model.type, mode=mode) if hasattr(self.cfg, ConfigFields.model) \
else ConfigDict(type=None, mode=mode)
if task_data_config is None:
# adapt to some special models
task_data_config = ConfigDict(
type=self.cfg.model.type) if hasattr(
self.cfg, ConfigFields.model) else ConfigDict(
type=None)
task_data_config.update(dict(mode=mode))
datasets = [
d.to_torch_dataset(
task_data_config=cfg,
task_data_config=task_data_config,
task_name=self.cfg.task,
preprocessors=preprocessor) for d in datasets
]
@@ -331,12 +345,12 @@ class EpochBasedTrainer(BaseTrainer):
type=self.cfg.task, mode=mode, datasets=datasets)
return build_task_dataset(cfg, self.cfg.task)
else:
cfg = ConfigDict(
type=self.cfg.model.type,
mode=mode,
datasets=datasets,
preprocessor=preprocessor)
return build_task_dataset(cfg, self.cfg.task)
task_data_config.update(
dict(
mode=mode,
datasets=datasets,
preprocessor=preprocessor))
return build_task_dataset(task_data_config, self.cfg.task)
except Exception:
if isinstance(datasets, (List, Tuple)) or preprocessor is not None:
return TorchTaskDataset(


+ 20
- 4
modelscope/utils/test_utils.py View File

@@ -14,10 +14,10 @@ import unittest
from typing import OrderedDict

import requests
from datasets import Dataset
import torch
from datasets.config import TF_AVAILABLE, TORCH_AVAILABLE
from torch.utils.data import Dataset

from modelscope.msdatasets import MsDataset
from .torch_utils import _find_free_port

TEST_LEVEL = 2
@@ -49,9 +49,25 @@ def set_test_level(level: int):
TEST_LEVEL = level


class DummyTorchDataset(Dataset):

def __init__(self, feat, label, num) -> None:
self.feat = feat
self.label = label
self.num = num

def __getitem__(self, index):
return {
'feat': torch.Tensor(self.feat),
'labels': torch.Tensor(self.label)
}

def __len__(self):
return self.num


def create_dummy_test_dataset(feat, label, num):
return MsDataset.from_hf_dataset(
Dataset.from_dict(dict(feat=[feat] * num, labels=[label] * num)))
return DummyTorchDataset(feat, label, num)


def download_and_untar(fpath, furl, dst) -> str:


+ 40
- 47
tests/trainers/easycv/test_easycv_trainer.py View File

@@ -6,10 +6,10 @@ import tempfile
import unittest

import json
import requests
import torch

from modelscope.metainfo import Models, Pipelines, Trainers
from modelscope.msdatasets import MsDataset
from modelscope.trainers import build_trainer
from modelscope.utils.config import Config
from modelscope.utils.constant import LogKeys, ModeKeys, Tasks
@@ -18,55 +18,19 @@ from modelscope.utils.test_utils import DistributedTestCase, test_level
from modelscope.utils.torch_utils import is_master


def _download_data(url, save_dir):
r = requests.get(url, verify=True)
if not os.path.exists(save_dir):
os.makedirs(save_dir)
zip_name = os.path.split(url)[-1]
save_path = os.path.join(save_dir, zip_name)
with open(save_path, 'wb') as f:
f.write(r.content)

unpack_dir = os.path.join(save_dir, os.path.splitext(zip_name)[0])
shutil.unpack_archive(save_path, unpack_dir)


def train_func(work_dir, dist=False, log_config=3, imgs_per_gpu=4):
def train_func(work_dir, dist=False, log_interval=3, imgs_per_gpu=4):
import easycv
config_path = os.path.join(
os.path.dirname(easycv.__file__),
'configs/detection/yolox/yolox_s_8xb16_300e_coco.py')

data_dir = os.path.join(work_dir, 'small_coco_test')
url = 'http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/datasets/small_coco.zip'
if is_master():
_download_data(url, data_dir)

import time
time.sleep(1)
cfg = Config.from_file(config_path)

cfg.work_dir = work_dir
cfg.total_epochs = 2
cfg.checkpoint_config.interval = 1
cfg.eval_config.interval = 1
cfg.log_config = dict(
interval=log_config,
hooks=[
cfg.log_config.update(
dict(hooks=[
dict(type='TextLoggerHook'),
dict(type='TensorboardLoggerHook')
])
cfg.data.train.data_source.ann_file = os.path.join(
data_dir, 'small_coco/small_coco/instances_train2017_20.json')
cfg.data.train.data_source.img_prefix = os.path.join(
data_dir, 'small_coco/small_coco/train2017')
cfg.data.val.data_source.ann_file = os.path.join(
data_dir, 'small_coco/small_coco/instances_val2017_20.json')
cfg.data.val.data_source.img_prefix = os.path.join(
data_dir, 'small_coco/small_coco/val2017')
cfg.data.imgs_per_gpu = imgs_per_gpu
cfg.data.workers_per_gpu = 2
cfg.data.val.imgs_per_gpu = 2
])) # not support TensorboardLoggerHookV2

ms_cfg_file = os.path.join(work_dir, 'ms_yolox_s_8xb16_300e_coco.json')
from easycv.utils.ms_utils import to_ms_config
@@ -81,9 +45,41 @@ def train_func(work_dir, dist=False, log_config=3, imgs_per_gpu=4):
save_path=ms_cfg_file)

trainer_name = Trainers.easycv
train_dataset = MsDataset.load(
dataset_name='small_coco_for_test', namespace='EasyCV', split='train')
eval_dataset = MsDataset.load(
dataset_name='small_coco_for_test',
namespace='EasyCV',
split='validation')

cfg_options = {
'train.max_epochs':
2,
'train.dataloader.batch_size_per_gpu':
imgs_per_gpu,
'evaluation.dataloader.batch_size_per_gpu':
2,
'train.hooks': [
{
'type': 'CheckpointHook',
'interval': 1
},
{
'type': 'EvaluationHook',
'interval': 1
},
{
'type': 'TextLoggerHook',
'interval': log_interval
},
]
}
kwargs = dict(
task=Tasks.image_object_detection,
cfg_file=ms_cfg_file,
train_dataset=train_dataset,
eval_dataset=eval_dataset,
work_dir=work_dir,
cfg_options=cfg_options,
launcher='pytorch' if dist else None)

trainer = build_trainer(trainer_name, kwargs)
@@ -105,11 +101,8 @@ class EasyCVTrainerTestSingleGpu(unittest.TestCase):
super().tearDown()
shutil.rmtree(self.tmp_dir, ignore_errors=True)

@unittest.skipIf(
True, 'The test cases are all run in the master process, '
'cause registry conflicts, and it should run in the subprocess.')
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
def test_single_gpu(self):
# TODO: run in subprocess
train_func(self.tmp_dir)

results_files = os.listdir(self.tmp_dir)
@@ -185,7 +178,7 @@ class EasyCVTrainerTestMultiGpus(DistributedTestCase):
num_gpus=2,
work_dir=self.tmp_dir,
dist=True,
log_config=2,
log_interval=2,
imgs_per_gpu=5)

results_files = os.listdir(self.tmp_dir)


+ 14
- 42
tests/trainers/easycv/test_segformer.py View File

@@ -5,28 +5,14 @@ import shutil
import tempfile
import unittest

import requests
import torch

from modelscope.metainfo import Trainers
from modelscope.msdatasets import MsDataset
from modelscope.trainers import build_trainer
from modelscope.utils.constant import LogKeys, Tasks
from modelscope.utils.logger import get_logger
from modelscope.utils.test_utils import test_level
from modelscope.utils.torch_utils import is_master


def _download_data(url, save_dir):
r = requests.get(url, verify=True)
if not os.path.exists(save_dir):
os.makedirs(save_dir)
zip_name = os.path.split(url)[-1]
save_path = os.path.join(save_dir, zip_name)
with open(save_path, 'wb') as f:
f.write(r.content)

unpack_dir = os.path.join(save_dir, os.path.splitext(zip_name)[0])
shutil.unpack_archive(save_path, unpack_dir)


@unittest.skipIf(not torch.cuda.is_available(), 'cuda unittest')
@@ -45,46 +31,32 @@ class EasyCVTrainerTestSegformer(unittest.TestCase):
shutil.rmtree(self.tmp_dir, ignore_errors=True)

def _train(self):
from modelscope.trainers.easycv.trainer import EasyCVEpochBasedTrainer

url = 'http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/datasets/small_coco_stuff164k.zip'
data_dir = os.path.join(self.tmp_dir, 'data')
if is_master():
_download_data(url, data_dir)

# adapt to ditributed mode
# adapt to distributed mode
from easycv.utils.test_util import pseudo_dist_init
pseudo_dist_init()

root_path = os.path.join(data_dir, 'small_coco_stuff164k')
cfg_options = {
'train.max_epochs':
2,
'dataset.train.data_source.img_root':
os.path.join(root_path, 'train2017'),
'dataset.train.data_source.label_root':
os.path.join(root_path, 'annotations/train2017'),
'dataset.train.data_source.split':
os.path.join(root_path, 'train.txt'),
'dataset.val.data_source.img_root':
os.path.join(root_path, 'val2017'),
'dataset.val.data_source.label_root':
os.path.join(root_path, 'annotations/val2017'),
'dataset.val.data_source.split':
os.path.join(root_path, 'val.txt'),
}
cfg_options = {'train.max_epochs': 2}

trainer_name = Trainers.easycv
train_dataset = MsDataset.load(
dataset_name='small_coco_stuff164k',
namespace='EasyCV',
split='train')
eval_dataset = MsDataset.load(
dataset_name='small_coco_stuff164k',
namespace='EasyCV',
split='validation')
kwargs = dict(
task=Tasks.image_segmentation,
model='EasyCV/EasyCV-Segformer-b0',
train_dataset=train_dataset,
eval_dataset=eval_dataset,
work_dir=self.tmp_dir,
cfg_options=cfg_options)

trainer = build_trainer(trainer_name, kwargs)
trainer.train()

@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
def test_single_gpu_segformer(self):
self._train()



+ 5
- 5
tests/trainers/test_trainer.py View File

@@ -64,7 +64,7 @@ class TrainerTest(unittest.TestCase):
super().tearDown()
shutil.rmtree(self.tmp_dir)

@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
def test_train_0(self):
json_cfg = {
'task': Tasks.image_classification,
@@ -139,7 +139,7 @@ class TrainerTest(unittest.TestCase):
self.assertIn(f'{LogKeys.EPOCH}_2.pth', results_files)
self.assertIn(f'{LogKeys.EPOCH}_3.pth', results_files)

@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
def test_train_1(self):
json_cfg = {
'task': Tasks.image_classification,
@@ -200,7 +200,7 @@ class TrainerTest(unittest.TestCase):
self.assertIn(f'{LogKeys.EPOCH}_2.pth', results_files)
self.assertIn(f'{LogKeys.EPOCH}_3.pth', results_files)

@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
def test_train_with_default_config(self):
json_cfg = {
'task': Tasks.image_classification,
@@ -319,7 +319,7 @@ class TrainerTest(unittest.TestCase):
for i in [2, 5, 8]:
self.assertIn(MetricKeys.ACCURACY, lines[i])

@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
def test_train_with_iters_per_epoch(self):
json_cfg = {
'task': Tasks.image_classification,
@@ -441,7 +441,7 @@ class TrainerTest(unittest.TestCase):

class DummyTrainerTest(unittest.TestCase):

@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
def test_dummy(self):
default_args = dict(cfg_file='configs/examples/train.json')
trainer = build_trainer('dummy', default_args)


+ 3
- 2
tests/trainers/test_trainer_gpu.py View File

@@ -17,7 +17,7 @@ from modelscope.metainfo import Metrics, Trainers
from modelscope.metrics.builder import MetricKeys
from modelscope.models.base import Model
from modelscope.trainers import EpochBasedTrainer, build_trainer
from modelscope.utils.constant import LogKeys, ModeKeys, ModelFile
from modelscope.utils.constant import LogKeys, ModeKeys, ModelFile, Tasks
from modelscope.utils.test_utils import (DistributedTestCase,
create_dummy_test_dataset, test_level)

@@ -55,6 +55,7 @@ class DummyModel(nn.Module, Model):

def train_func(work_dir, dist=False, iterable_dataset=False, **kwargs):
json_cfg = {
'task': Tasks.image_classification,
'train': {
'work_dir': work_dir,
'dataloader': {
@@ -119,7 +120,7 @@ class TrainerTestSingleGpu(unittest.TestCase):
super().tearDown()
shutil.rmtree(self.tmp_dir)

@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
def test_single_gpu(self):
train_func(self.tmp_dir)



Loading…
Cancel
Save