Browse Source

Merge remote-tracking branch 'origin/master' into ofa/finetune

# Conflicts:
#	modelscope/models/multi_modal/ofa_for_all_tasks.py
#	modelscope/msdatasets/ms_dataset.py
#	modelscope/trainers/utils/inference.py
master
行嗔 3 years ago
parent
commit
537827e5a1
64 changed files with 363 additions and 185 deletions
  1. +3
    -0
      data/test/audios/1ch_nihaomiya.wav
  2. +2
    -0
      modelscope/metrics/image_instance_segmentation_metric.py
  3. +2
    -0
      modelscope/metrics/movie_scene_segmentation_metric.py
  4. +2
    -2
      modelscope/models/cv/image_instance_segmentation/backbones/swin_transformer.py
  5. +2
    -0
      modelscope/models/cv/image_instance_segmentation/cascade_mask_rcnn_swin.py
  6. +1
    -0
      modelscope/models/cv/image_instance_segmentation/datasets/__init__.py
  7. +5
    -4
      modelscope/models/cv/image_instance_segmentation/datasets/transforms.py
  8. +1
    -0
      modelscope/models/cv/image_instance_segmentation/model.py
  9. +2
    -0
      modelscope/models/cv/image_instance_segmentation/postprocess_utils.py
  10. +3
    -0
      modelscope/models/cv/movie_scene_segmentation/model.py
  11. +1
    -0
      modelscope/models/cv/movie_scene_segmentation/utils/__init__.py
  12. +2
    -6
      modelscope/models/cv/movie_scene_segmentation/utils/head.py
  13. +2
    -4
      modelscope/models/cv/movie_scene_segmentation/utils/save_op.py
  14. +1
    -3
      modelscope/models/cv/movie_scene_segmentation/utils/shot_encoder.py
  15. +1
    -0
      modelscope/models/cv/object_detection/mmdet_model.py
  16. +2
    -0
      modelscope/models/cv/object_detection/mmdet_ms/__init__.py
  17. +2
    -0
      modelscope/models/cv/object_detection/mmdet_ms/backbones/__init__.py
  18. +2
    -0
      modelscope/models/cv/object_detection/mmdet_ms/dense_heads/__init__.py
  19. +2
    -1
      modelscope/models/cv/object_detection/mmdet_ms/dense_heads/anchor_head.py
  20. +2
    -1
      modelscope/models/cv/object_detection/mmdet_ms/dense_heads/rpn_head.py
  21. +2
    -0
      modelscope/models/cv/object_detection/mmdet_ms/necks/__init__.py
  22. +2
    -1
      modelscope/models/cv/object_detection/mmdet_ms/necks/fpn.py
  23. +2
    -0
      modelscope/models/cv/object_detection/mmdet_ms/roi_heads/__init__.py
  24. +2
    -0
      modelscope/models/cv/object_detection/mmdet_ms/roi_heads/bbox_heads/__init__.py
  25. +2
    -1
      modelscope/models/cv/object_detection/mmdet_ms/roi_heads/bbox_heads/convfc_bbox_head.py
  26. +2
    -0
      modelscope/models/cv/object_detection/mmdet_ms/roi_heads/mask_heads/__init__.py
  27. +2
    -1
      modelscope/models/cv/object_detection/mmdet_ms/roi_heads/mask_heads/fcn_mask_head.py
  28. +2
    -0
      modelscope/models/cv/object_detection/mmdet_ms/utils/__init__.py
  29. +2
    -1
      modelscope/models/cv/object_detection/mmdet_ms/utils/checkpoint.py
  30. +2
    -2
      modelscope/models/cv/object_detection/mmdet_ms/utils/convModule_norm.py
  31. +2
    -0
      modelscope/models/cv/salient_detection/models/__init__.py
  32. +2
    -1
      modelscope/models/cv/salient_detection/models/u2net.py
  33. +1
    -0
      modelscope/models/cv/salient_detection/salient_model.py
  34. +1
    -3
      modelscope/models/multi_modal/ofa_for_all_tasks.py
  35. +51
    -52
      modelscope/msdatasets/ms_dataset.py
  36. +2
    -0
      modelscope/msdatasets/task_datasets/image_instance_segmentation_coco_dataset.py
  37. +1
    -0
      modelscope/msdatasets/task_datasets/movie_scene_segmentation/__init__.py
  38. +2
    -3
      modelscope/msdatasets/task_datasets/movie_scene_segmentation/movie_scene_segmentation_dataset.py
  39. +22
    -19
      modelscope/pipelines/audio/kws_farfield_pipeline.py
  40. +2
    -0
      modelscope/pipelines/cv/action_detection_pipeline.py
  41. +7
    -4
      modelscope/pipelines/cv/easycv_pipelines/base.py
  42. +7
    -5
      modelscope/pipelines/cv/easycv_pipelines/face_2d_keypoints_pipeline.py
  43. +3
    -2
      modelscope/pipelines/cv/hand_2d_keypoints_pipeline.py
  44. +1
    -0
      modelscope/pipelines/cv/image_instance_segmentation_pipeline.py
  45. +7
    -1
      modelscope/pipelines/cv/image_style_transfer_pipeline.py
  46. +1
    -0
      modelscope/pipelines/cv/movie_scene_segmentation_pipeline.py
  47. +1
    -0
      modelscope/preprocessors/movie_scene_segmentation/__init__.py
  48. +2
    -6
      modelscope/preprocessors/movie_scene_segmentation/transforms.py
  49. +2
    -1
      modelscope/preprocessors/multi_modal.py
  50. +1
    -0
      modelscope/trainers/cv/image_instance_segmentation_trainer.py
  51. +1
    -0
      modelscope/trainers/cv/movie_scene_segmentation_trainer.py
  52. +20
    -10
      modelscope/trainers/trainer.py
  53. +6
    -6
      modelscope/trainers/utils/inference.py
  54. +53
    -9
      modelscope/utils/demo_utils.py
  55. +4
    -0
      modelscope/utils/torch_utils.py
  56. +1
    -1
      modelscope/version.py
  57. +1
    -1
      requirements/cv.txt
  58. +40
    -19
      tests/pipelines/easycv_pipelines/test_segmentation_pipeline.py
  59. +1
    -1
      tests/pipelines/test_face_2d_keypoints.py
  60. +11
    -0
      tests/pipelines/test_key_word_spotting_farfield.py
  61. +8
    -8
      tests/pipelines/test_mplug_tasks.py
  62. +6
    -4
      tests/pipelines/test_ofa_tasks.py
  63. +1
    -0
      tests/run_config.yaml
  64. +32
    -2
      tests/trainers/test_trainer_gpu.py

+ 3
- 0
data/test/audios/1ch_nihaomiya.wav View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:4f7f5a0a4efca1e83463cb44460c66b56fb7cd673eb6da37924637bc05ef758d
size 1440044

+ 2
- 0
modelscope/metrics/image_instance_segmentation_metric.py View File

@@ -1,3 +1,5 @@
# Part of the implementation is borrowed and modified from MMDetection, publicly available at
# https://github.com/open-mmlab/mmdetection/blob/master/mmdet/datasets/coco.py
import os.path as osp
import tempfile
from collections import OrderedDict


+ 2
- 0
modelscope/metrics/movie_scene_segmentation_metric.py View File

@@ -1,3 +1,5 @@
# The implementation here is modified based on BaSSL,
# originally Apache 2.0 License and publicly available at https://github.com/kakaobrain/bassl
from typing import Dict

import numpy as np


+ 2
- 2
modelscope/models/cv/image_instance_segmentation/backbones/swin_transformer.py View File

@@ -1,5 +1,5 @@
# Modified from: https://github.com/microsoft/Swin-Transformer/blob/main/models/swin_transformer.py
# The implementation is adopted from Swin Transformer, made publicly available under the MIT License at
# https://github.com/microsoft/Swin-Transformer/blob/main/models/swin_transformer.py
import numpy as np
import torch
import torch.nn as nn


+ 2
- 0
modelscope/models/cv/image_instance_segmentation/cascade_mask_rcnn_swin.py View File

@@ -1,3 +1,5 @@
# Part of the implementation is borrowed and modified from MMDetection, publicly available at
# https://github.com/open-mmlab/mmdetection/blob/master/mmdet/models/detectors/two_stage.py
import os
from collections import OrderedDict



+ 1
- 0
modelscope/models/cv/image_instance_segmentation/datasets/__init__.py View File

@@ -1 +1,2 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
from .transforms import build_preprocess_transform

+ 5
- 4
modelscope/models/cv/image_instance_segmentation/datasets/transforms.py View File

@@ -1,3 +1,4 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
import os.path as osp

import numpy as np
@@ -51,9 +52,9 @@ class LoadImageFromFile:
"""Load an image from file.

Required keys are "img_prefix" and "img_info" (a dict that must contain the
key "filename"). Added or updated keys are "filename", "img", "img_shape",
"ori_shape" (same as `img_shape`), "pad_shape" (same as `img_shape`),
"scale_factor" (1.0) and "img_norm_cfg" (means=0 and stds=1).
key "filename", "ann_file", and "classes"). Added or updated keys are
"filename", "ori_filename", "img", "img_shape", "ori_shape" (same as `img_shape`),
"img_fields", "ann_file" (path to annotation file) and "classes".

Args:
to_float32 (bool): Whether to convert the loaded image to a float32
@@ -73,7 +74,7 @@ class LoadImageFromFile:
"""Call functions to load image and get image meta information.

Args:
results (dict): Result dict from :obj:`ImageInstanceSegmentationDataset`.
results (dict): Result dict from :obj:`ImageInstanceSegmentationCocoDataset`.

Returns:
dict: The dict contains loaded image and meta information.


+ 1
- 0
modelscope/models/cv/image_instance_segmentation/model.py View File

@@ -1,3 +1,4 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
import os
from typing import Any, Dict



+ 2
- 0
modelscope/models/cv/image_instance_segmentation/postprocess_utils.py View File

@@ -1,3 +1,5 @@
# Part of the implementation is borrowed and modified from MMDetection, publicly available at
# https://github.com/open-mmlab/mmdetection/blob/master/mmdet/core/visualization/image.py
import itertools

import cv2


+ 3
- 0
modelscope/models/cv/movie_scene_segmentation/model.py View File

@@ -1,3 +1,6 @@
# The implementation here is modified based on BaSSL,
# originally Apache 2.0 License and publicly avaialbe at https://github.com/kakaobrain/bassl

import os
import os.path as osp
from typing import Any, Dict


+ 1
- 0
modelscope/models/cv/movie_scene_segmentation/utils/__init__.py View File

@@ -1,3 +1,4 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
from .save_op import get_pred_boundary, pred2scene, scene2video
from .shot_encoder import resnet50
from .trn import TransformerCRN

+ 2
- 6
modelscope/models/cv/movie_scene_segmentation/utils/head.py View File

@@ -1,9 +1,5 @@
# ------------------------------------------------------------------------------------
# BaSSL
# Copyright (c) 2021 KakaoBrain. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 [see LICENSE for details]
# Github: https://github.com/kakaobrain/bassl
# ------------------------------------------------------------------------------------
# The implementation here is modified based on BaSSL,
# originally Apache 2.0 License and publicly avaialbe at https://github.com/kakaobrain/bassl

import torch.nn as nn
import torch.nn.functional as F


+ 2
- 4
modelscope/models/cv/movie_scene_segmentation/utils/save_op.py View File

@@ -1,7 +1,5 @@
# ----------------------------------------------------------------------------------
# The codes below partially refer to the SceneSeg LGSS.
# Github: https://github.com/AnyiRao/SceneSeg
# ----------------------------------------------------------------------------------
# The implementation here is modified based on SceneSeg,
# originally Apache 2.0 License and publicly avaialbe at https://github.com/AnyiRao/SceneSeg
import os
import os.path as osp
import subprocess


+ 1
- 3
modelscope/models/cv/movie_scene_segmentation/utils/shot_encoder.py View File

@@ -1,6 +1,4 @@
"""
Modified from original implementation in torchvision
"""
# The implementation is adopted from torchvision

from typing import Any, Callable, List, Optional, Type, Union



+ 1
- 0
modelscope/models/cv/object_detection/mmdet_model.py View File

@@ -1,3 +1,4 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
import os.path as osp

import numpy as np


+ 2
- 0
modelscope/models/cv/object_detection/mmdet_ms/__init__.py View File

@@ -1,3 +1,5 @@
# Implementation in this file is modified based on ViTAE-Transformer
# Originally Apache 2.0 License and publicly avaialbe at https://github.com/ViTAE-Transformer/ViTDet
from .backbones import ViT
from .dense_heads import AnchorNHead, RPNNHead
from .necks import FPNF


+ 2
- 0
modelscope/models/cv/object_detection/mmdet_ms/backbones/__init__.py View File

@@ -1,3 +1,5 @@
# Implementation in this file is modified based on ViTAE-Transformer
# Originally Apache 2.0 License and publicly avaialbe at https://github.com/ViTAE-Transformer/ViTDet
from .vit import ViT

__all__ = ['ViT']

+ 2
- 0
modelscope/models/cv/object_detection/mmdet_ms/dense_heads/__init__.py View File

@@ -1,3 +1,5 @@
# Implementation in this file is modified based on ViTAE-Transformer
# Originally Apache 2.0 License and publicly avaialbe at https://github.com/ViTAE-Transformer/ViTDet
from .anchor_head import AnchorNHead
from .rpn_head import RPNNHead



+ 2
- 1
modelscope/models/cv/object_detection/mmdet_ms/dense_heads/anchor_head.py View File

@@ -1,5 +1,6 @@
# Copyright (c) OpenMMLab. All rights reserved.
# Implementation in this file is modifed from source code avaiable via https://github.com/ViTAE-Transformer/ViTDet
# Implementation in this file is modified based on ViTAE-Transformer
# Originally Apache 2.0 License and publicly avaialbe at https://github.com/ViTAE-Transformer/ViTDet
from mmdet.models.builder import HEADS
from mmdet.models.dense_heads import AnchorHead



+ 2
- 1
modelscope/models/cv/object_detection/mmdet_ms/dense_heads/rpn_head.py View File

@@ -1,5 +1,6 @@
# Copyright (c) OpenMMLab. All rights reserved.
# Implementation in this file is modifed from source code avaiable via https://github.com/ViTAE-Transformer/ViTDet
# Implementation in this file is modified based on ViTAE-Transformer
# Originally Apache 2.0 License and publicly avaialbe at https://github.com/ViTAE-Transformer/ViTDet
import copy

import torch


+ 2
- 0
modelscope/models/cv/object_detection/mmdet_ms/necks/__init__.py View File

@@ -1,3 +1,5 @@
# Implementation in this file is modified based on ViTAE-Transformer
# Originally Apache 2.0 License and publicly avaialbe at https://github.com/ViTAE-Transformer/ViTDet
from .fpn import FPNF

__all__ = ['FPNF']

+ 2
- 1
modelscope/models/cv/object_detection/mmdet_ms/necks/fpn.py View File

@@ -1,5 +1,6 @@
# Copyright (c) OpenMMLab. All rights reserved.
# Implementation in this file is modifed from source code avaiable via https://github.com/ViTAE-Transformer/ViTDet
# Implementation in this file is modified based on ViTAE-Transformer
# Originally Apache 2.0 License and publicly avaialbe at https://github.com/ViTAE-Transformer/ViTDet
import torch.nn as nn
import torch.nn.functional as F
from mmcv.runner import BaseModule, auto_fp16


+ 2
- 0
modelscope/models/cv/object_detection/mmdet_ms/roi_heads/__init__.py View File

@@ -1,3 +1,5 @@
# Implementation in this file is modified based on ViTAE-Transformer
# Originally Apache 2.0 License and publicly avaialbe at https://github.com/ViTAE-Transformer/ViTDet
from .bbox_heads import (ConvFCBBoxNHead, Shared2FCBBoxNHead,
Shared4Conv1FCBBoxNHead)
from .mask_heads import FCNMaskNHead


+ 2
- 0
modelscope/models/cv/object_detection/mmdet_ms/roi_heads/bbox_heads/__init__.py View File

@@ -1,3 +1,5 @@
# Implementation in this file is modified based on ViTAE-Transformer
# Originally Apache 2.0 License and publicly avaialbe at https://github.com/ViTAE-Transformer/ViTDet
from .convfc_bbox_head import (ConvFCBBoxNHead, Shared2FCBBoxNHead,
Shared4Conv1FCBBoxNHead)



+ 2
- 1
modelscope/models/cv/object_detection/mmdet_ms/roi_heads/bbox_heads/convfc_bbox_head.py View File

@@ -1,5 +1,6 @@
# Copyright (c) OpenMMLab. All rights reserved.
# Implementation in this file is modifed from source code avaiable via https://github.com/ViTAE-Transformer/ViTDet
# Implementation in this file is modified based on ViTAE-Transformer
# Originally Apache 2.0 License and publicly avaialbe at https://github.com/ViTAE-Transformer/ViTDet
import torch.nn as nn
from mmdet.models.builder import HEADS
from mmdet.models.roi_heads.bbox_heads.bbox_head import BBoxHead


+ 2
- 0
modelscope/models/cv/object_detection/mmdet_ms/roi_heads/mask_heads/__init__.py View File

@@ -1,3 +1,5 @@
# Implementation in this file is modified based on ViTAE-Transformer
# Originally Apache 2.0 License and publicly avaialbe at https://github.com/ViTAE-Transformer/ViTDet
from .fcn_mask_head import FCNMaskNHead

__all__ = ['FCNMaskNHead']

+ 2
- 1
modelscope/models/cv/object_detection/mmdet_ms/roi_heads/mask_heads/fcn_mask_head.py View File

@@ -1,5 +1,6 @@
# Copyright (c) OpenMMLab. All rights reserved.
# Implementation in this file is modifed from source code avaiable via https://github.com/ViTAE-Transformer/ViTDet
# Implementation in this file is modified based on ViTAE-Transformer
# Originally Apache 2.0 License and publicly avaialbe at https://github.com/ViTAE-Transformer/ViTDet
from warnings import warn

import numpy as np


+ 2
- 0
modelscope/models/cv/object_detection/mmdet_ms/utils/__init__.py View File

@@ -1,3 +1,5 @@
# Implementation in this file is modified based on ViTAE-Transformer
# Originally Apache 2.0 License and publicly avaialbe at https://github.com/ViTAE-Transformer/ViTDet
from .checkpoint import load_checkpoint
from .convModule_norm import ConvModule_Norm



+ 2
- 1
modelscope/models/cv/object_detection/mmdet_ms/utils/checkpoint.py View File

@@ -1,5 +1,6 @@
# Copyright (c) Open-MMLab. All rights reserved.
# Implementation adopted from ViTAE-Transformer, source code avaiable via https://github.com/ViTAE-Transformer/ViTDet
# Implementation in this file is modified based on ViTAE-Transformer
# Originally Apache 2.0 License and publicly avaialbe at https://github.com/ViTAE-Transformer/ViTDet
import io
import os
import os.path as osp


+ 2
- 2
modelscope/models/cv/object_detection/mmdet_ms/utils/convModule_norm.py View File

@@ -1,5 +1,5 @@
# Implementation adopted from ViTAE-Transformer, source code avaiable via https://github.com/ViTAE-Transformer/ViTDet
# Implementation in this file is modified based on ViTAE-Transformer
# Originally Apache 2.0 License and publicly avaialbe at https://github.com/ViTAE-Transformer/ViTDet
from mmcv.cnn import ConvModule




+ 2
- 0
modelscope/models/cv/salient_detection/models/__init__.py View File

@@ -1 +1,3 @@
# The implementation is adopted from U-2-Net, made publicly available under the Apache 2.0 License
# source code avaiable via https://github.com/xuebinqin/U-2-Net
from .u2net import U2NET

+ 2
- 1
modelscope/models/cv/salient_detection/models/u2net.py View File

@@ -1,4 +1,5 @@
# Implementation in this file is modifed from source code avaiable via https://github.com/xuebinqin/U-2-Net
# The implementation is adopted from U-2-Net, made publicly available under the Apache 2.0 License
# source code avaiable via https://github.com/xuebinqin/U-2-Net
import torch
import torch.nn as nn
import torch.nn.functional as F


+ 1
- 0
modelscope/models/cv/salient_detection/salient_model.py View File

@@ -1,3 +1,4 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
import os.path as osp

import cv2


+ 1
- 3
modelscope/models/multi_modal/ofa_for_all_tasks.py View File

@@ -37,9 +37,7 @@ class OfaForAllTasks(TorchModel):

def __init__(self, model_dir, *args, **kwargs):
super().__init__(model_dir=model_dir, *args, **kwargs)
sd = torch.load(osp.join(model_dir, ModelFile.TORCH_MODEL_BIN_FILE))
sd = sd if 'meta' not in sd else sd['state_dict']
model = OFAModel.from_pretrained(model_dir, state_dict=sd)
model = OFAModel.from_pretrained(model_dir)
self.cfg = Config.from_file(
osp.join(model_dir, ModelFile.CONFIGURATION))
self.model = model.module if hasattr(model, 'module') else model


+ 51
- 52
modelscope/msdatasets/ms_dataset.py View File

@@ -44,44 +44,40 @@ def format_list(para) -> List:
return para


class MsIterableDataset(torch.utils.data.IterableDataset):
class MsMapDataset(torch.utils.data.Dataset):

def __init__(self, dataset: Iterable, preprocessor_list, retained_columns,
columns):
super(MsIterableDataset).__init__()
columns, to_tensor):
super(MsDataset).__init__()
self.dataset = dataset
self.preprocessor_list = preprocessor_list
self.to_tensor = to_tensor
self.retained_columns = retained_columns
self.columns = columns

def __len__(self):
return len(self.dataset)

def __iter__(self):
worker_info = torch.utils.data.get_worker_info()
if worker_info is None: # single-process data loading
iter_start = 0
iter_end = len(self.dataset)
else: # in a worker process
per_worker = math.ceil(
len(self.dataset) / float(worker_info.num_workers))
worker_id = worker_info.id
iter_start = worker_id * per_worker
iter_end = min(iter_start + per_worker, len(self.dataset))

for idx in range(iter_start, iter_end):
item_dict = self.dataset[idx]
res = {
k: torch.tensor(item_dict[k])
for k in self.columns if k in self.retained_columns
}
for preprocessor in self.preprocessor_list:
res.update({
k: v # k: torch.tensor(v)
for k, v in preprocessor(item_dict).items()
if k in self.retained_columns
})
yield res
def type_converter(self, x):
if self.to_tensor:
return torch.tensor(x)
else:
return x

def __getitem__(self, index):
item_dict = self.dataset[index]
res = {
k: self.type_converter(item_dict[k])
for k in self.columns
if (not self.to_tensor) or k in self.retained_columns
}
for preprocessor in self.preprocessor_list:
res.update({
k: self.type_converter(v)
for k, v in preprocessor(item_dict).items()
if (not self.to_tensor) or k in self.retained_columns
})
return res


class MsDataset:
@@ -341,6 +337,7 @@ class MsDataset:
self,
preprocessors: Union[Callable, List[Callable]],
columns: Union[str, List[str]] = None,
to_tensor: bool = True,
):
preprocessor_list = preprocessors if isinstance(
preprocessors, list) else [preprocessors]
@@ -350,29 +347,29 @@ class MsDataset:
columns = [
key for key in self._hf_ds.features.keys() if key in columns
]
sample = next(iter(self._hf_ds))
retained_columns = []
if to_tensor:
sample = next(iter(self._hf_ds))

sample_res = {k: np.array(sample[k]) for k in columns}
for processor in preprocessor_list:
sample_res.update(
{k: np.array(v)
for k, v in processor(sample).items()})
sample_res = {k: np.array(sample[k]) for k in columns}
for processor in preprocessor_list:
sample_res.update(
{k: np.array(v)
for k, v in processor(sample).items()})

def is_numpy_number(value):
return np.issubdtype(value.dtype, np.integer) or np.issubdtype(
value.dtype, np.floating) or np.issubdtype(
value.dtype, np.bool)
def is_numpy_number(value):
return np.issubdtype(value.dtype, np.integer) or np.issubdtype(
value.dtype, np.floating)

retained_columns = []
for k in sample_res.keys():
if not is_numpy_number(sample_res[k]):
logger.warning(
f'Data of column {k} is non-numeric, will be removed')
# continue
retained_columns.append(k)
for k in sample_res.keys():
if not is_numpy_number(sample_res[k]):
logger.warning(
f'Data of column {k} is non-numeric, will be removed')
continue
retained_columns.append(k)

return MsIterableDataset(self._hf_ds, preprocessor_list,
retained_columns, columns)
return MsMapDataset(self._hf_ds, preprocessor_list, retained_columns,
columns, to_tensor)

def to_torch_dataset(
self,
@@ -380,6 +377,7 @@ class MsDataset:
preprocessors: Union[Callable, List[Callable]] = None,
task_name: str = None,
task_data_config: ConfigDict = None,
to_tensor: bool = True,
**format_kwargs,
):
"""Create a torch.utils.data.Dataset from the MS Dataset. The torch.utils.data.Dataset can be passed to
@@ -387,13 +385,14 @@ class MsDataset:

Args:
preprocessors (Callable or List[Callable], default None): (list of) Preprocessor object used to process
every sample of the dataset. The output type of processors is dict, and each numeric field of the dict
every sample of the dataset. The output type of processors is dict, and each (numeric) field of the dict
will be used as a field of torch.utils.data.Dataset.
columns (str or List[str], default None): Dataset column(s) to be loaded (numeric data only). If the
preprocessor is None, the arg columns must have at least one column. If the `preprocessors` is not None,
the output fields of processors will also be added.
columns (str or List[str], default None): Dataset column(s) to be loaded (numeric data only if
`to_tensor` is True). If the preprocessor is None, the arg columns must have at least one column.
If the `preprocessors` is not None, the output fields of processors will also be added.
task_name (str, default None): task name, refer to :obj:`Tasks` for more details
task_data_config (ConfigDict, default None): config dict for model object.
to_tensor (bool, default None): whether convert the data types of dataset column(s) to torch.tensor or not.
format_kwargs: A `dict` of arguments to be passed to the `torch.tensor`.

Returns:
@@ -410,7 +409,7 @@ class MsDataset:
return build_task_dataset(task_data_config, task_name)
if preprocessors is not None:
return self.to_torch_dataset_with_processors(
preprocessors, columns=columns)
preprocessors, columns=columns, to_tensor=to_tensor)
else:
self._hf_ds.reset_format()
self._hf_ds.set_format(


+ 2
- 0
modelscope/msdatasets/task_datasets/image_instance_segmentation_coco_dataset.py View File

@@ -1,3 +1,5 @@
# Part of the implementation is borrowed and modified from MMDetection, publicly available at
# https://github.com/open-mmlab/mmdetection/blob/master/mmdet/datasets/coco.py
import os.path as osp

import numpy as np


+ 1
- 0
modelscope/msdatasets/task_datasets/movie_scene_segmentation/__init__.py View File

@@ -1 +1,2 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
from .movie_scene_segmentation_dataset import MovieSceneSegmentationDataset

+ 2
- 3
modelscope/msdatasets/task_datasets/movie_scene_segmentation/movie_scene_segmentation_dataset.py View File

@@ -1,6 +1,5 @@
# ---------------------------------------------------------------------------------------------------
# The implementation is built upon BaSSL, publicly available at https://github.com/kakaobrain/bassl
# ---------------------------------------------------------------------------------------------------
# The implementation here is modified based on BaSSL,
# originally Apache 2.0 License and publicly available at https://github.com/kakaobrain/bassl
import copy
import os
import os.path as osp


+ 22
- 19
modelscope/pipelines/audio/kws_farfield_pipeline.py View File

@@ -4,6 +4,9 @@ import io
import wave
from typing import Any, Dict

import numpy
import soundfile as sf

from modelscope.fileio import File
from modelscope.metainfo import Pipelines
from modelscope.outputs import OutputKeys
@@ -37,7 +40,6 @@ class KWSFarfieldPipeline(Pipeline):
self.model.eval()
frame_size = self.INPUT_CHANNELS * self.SAMPLE_WIDTH
self._nframe = self.model.size_in // frame_size
self.frame_count = 0

def preprocess(self, inputs: Input, **preprocess_params) -> Dict[str, Any]:
if isinstance(inputs, bytes):
@@ -54,35 +56,36 @@ class KWSFarfieldPipeline(Pipeline):
input_file = inputs['input_file']
if isinstance(input_file, str):
input_file = File.read(input_file)
if isinstance(input_file, bytes):
input_file = io.BytesIO(input_file)
self.frame_count = 0
frames, samplerate = sf.read(io.BytesIO(input_file), dtype='int16')
if len(frames.shape) == 1:
frames = numpy.stack((frames, frames, numpy.zeros_like(frames)), 1)

kws_list = []
with wave.open(input_file, 'rb') as fin:
if 'output_file' in inputs:
with wave.open(inputs['output_file'], 'wb') as fout:
fout.setframerate(self.SAMPLE_RATE)
fout.setnchannels(self.OUTPUT_CHANNELS)
fout.setsampwidth(self.SAMPLE_WIDTH)
self._process(fin, kws_list, fout)
else:
self._process(fin, kws_list)
if 'output_file' in inputs:
with wave.open(inputs['output_file'], 'wb') as fout:
fout.setframerate(self.SAMPLE_RATE)
fout.setnchannels(self.OUTPUT_CHANNELS)
fout.setsampwidth(self.SAMPLE_WIDTH)
self._process(frames, kws_list, fout)
else:
self._process(frames, kws_list)
return {OutputKeys.KWS_LIST: kws_list}

def _process(self,
fin: wave.Wave_read,
frames: numpy.ndarray,
kws_list,
fout: wave.Wave_write = None):
data = fin.readframes(self._nframe)
while len(data) >= self.model.size_in:
self.frame_count += self._nframe
for start_index in range(0, frames.shape[0], self._nframe):
end_index = start_index + self._nframe
if end_index > frames.shape[0]:
end_index = frames.shape[0]
data = frames[start_index:end_index, :].tobytes()
result = self.model.forward_decode(data)
if fout:
fout.writeframes(result['pcm'])
if 'kws' in result:
result['kws']['offset'] += self.frame_count / self.SAMPLE_RATE
result['kws']['offset'] += start_index / self.SAMPLE_RATE
kws_list.append(result['kws'])
data = fin.readframes(self._nframe)

def postprocess(self, inputs: Dict[str, Any], **kwargs) -> Dict[str, Any]:
return inputs

+ 2
- 0
modelscope/pipelines/cv/action_detection_pipeline.py View File

@@ -1,3 +1,5 @@
# Copyright (c) Alibaba, Inc. and its affiliates.

import math
import os.path as osp
from typing import Any, Dict


+ 7
- 4
modelscope/pipelines/cv/easycv_pipelines/base.py View File

@@ -10,6 +10,7 @@ from modelscope.hub.snapshot_download import snapshot_download
from modelscope.pipelines.util import is_official_hub_path
from modelscope.utils.config import Config
from modelscope.utils.constant import DEFAULT_MODEL_REVISION, ModelFile
from modelscope.utils.device import create_device


class EasyCVPipeline(object):
@@ -53,16 +54,19 @@ class EasyCVPipeline(object):
), f'Not find "{ModelFile.CONFIGURATION}" in model directory!'

self.cfg = Config.from_file(self.config_file)
self.predict_op = self._build_predict_op()
if 'device' in kwargs:
kwargs['device'] = create_device(kwargs['device'])
self.predict_op = self._build_predict_op(**kwargs)

def _build_predict_op(self):
def _build_predict_op(self, **kwargs):
"""Build EasyCV predictor."""
from easycv.predictors.builder import build_predictor

easycv_config = self._to_easycv_config()
pipeline_op = build_predictor(self.cfg.pipeline.predictor_config, {
'model_path': self.model_path,
'config_file': easycv_config
'config_file': easycv_config,
**kwargs
})
return pipeline_op

@@ -91,5 +95,4 @@ class EasyCVPipeline(object):
return easycv_config

def __call__(self, inputs) -> Any:
# TODO: support image url
return self.predict_op(inputs)

+ 7
- 5
modelscope/pipelines/cv/easycv_pipelines/face_2d_keypoints_pipeline.py View File

@@ -4,7 +4,6 @@ from typing import Any
from modelscope.metainfo import Pipelines
from modelscope.outputs import OutputKeys
from modelscope.pipelines.builder import PIPELINES
from modelscope.preprocessors import LoadImage
from modelscope.utils.constant import ModelFile, Tasks
from .base import EasyCVPipeline

@@ -34,8 +33,11 @@ class Face2DKeypointsPipeline(EasyCVPipeline):
return self.predict_op.show_result(img, points, scale, save_path)

def __call__(self, inputs) -> Any:
output = self.predict_op(inputs)[0][0]
points = output['point']
poses = output['pose']
outputs = self.predict_op(inputs)

return {OutputKeys.KEYPOINTS: points, OutputKeys.POSES: poses}
results = [{
OutputKeys.KEYPOINTS: output['point'],
OutputKeys.POSES: output['pose']
} for output in outputs]

return results

+ 3
- 2
modelscope/pipelines/cv/hand_2d_keypoints_pipeline.py View File

@@ -28,7 +28,7 @@ class Hand2DKeypointsPipeline(EasyCVPipeline):
*args,
**kwargs)

def _build_predict_op(self):
def _build_predict_op(self, **kwargs):
"""Build EasyCV predictor."""
from easycv.predictors.builder import build_predictor
detection_predictor_type = self.cfg['DETECTION']['type']
@@ -46,6 +46,7 @@ class Hand2DKeypointsPipeline(EasyCVPipeline):
easycv_config = self._to_easycv_config()
pipeline_op = build_predictor(self.cfg.pipeline.predictor_config, {
'model_path': self.model_path,
'config_file': easycv_config
'config_file': easycv_config,
**kwargs
})
return pipeline_op

+ 1
- 0
modelscope/pipelines/cv/image_instance_segmentation_pipeline.py View File

@@ -1,3 +1,4 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
import os
from typing import Any, Dict, Optional, Union



+ 7
- 1
modelscope/pipelines/cv/image_style_transfer_pipeline.py View File

@@ -61,7 +61,13 @@ class ImageStyleTransferPipeline(Pipeline):
def _sanitize_parameters(self, **pipeline_parameters):
return pipeline_parameters, {}, {}

def preprocess(self, content: Input, style: Input) -> Dict[str, Any]:
def preprocess(self,
content: Input,
style: Input = None) -> Dict[str, Any]:
if type(content) is dict: # for demo service
style = content['style']
content = content['content']

content = LoadImage.convert_to_ndarray(content)
if len(content.shape) == 2:
content = cv2.cvtColor(content, cv2.COLOR_GRAY2BGR)


+ 1
- 0
modelscope/pipelines/cv/movie_scene_segmentation_pipeline.py View File

@@ -1,3 +1,4 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
from typing import Any, Dict

import torch


+ 1
- 0
modelscope/preprocessors/movie_scene_segmentation/__init__.py View File

@@ -1,3 +1,4 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
from typing import TYPE_CHECKING

from modelscope.utils.import_utils import LazyImportModule


+ 2
- 6
modelscope/preprocessors/movie_scene_segmentation/transforms.py View File

@@ -1,9 +1,5 @@
# ------------------------------------------------------------------------------------
# The codes below partially refer to the BaSSL
# Copyright (c) 2021 KakaoBrain. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 [see LICENSE for details]
# Github: https://github.com/kakaobrain/bassl
# ------------------------------------------------------------------------------------
# The implementation here is modified based on BaSSL,
# originally Apache 2.0 License and publicly avaialbe at https://github.com/kakaobrain/bassl
import numbers
import os.path as osp
import random


+ 2
- 1
modelscope/preprocessors/multi_modal.py View File

@@ -186,7 +186,8 @@ class MPlugPreprocessor(Preprocessor):
image = image.convert('RGB')
image = self.patch_resize_transform(image)
question = '' if self.cfg.task == Tasks.image_captioning \
else data[1 if isinstance(data, tuple) else 'question']
else data[1 if isinstance(data, tuple)
else ('text' if 'text' in data else 'question')]
question = self.tokenizer(
question.lower(),
padding='max_length',


+ 1
- 0
modelscope/trainers/cv/image_instance_segmentation_trainer.py View File

@@ -1,3 +1,4 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
from modelscope.metainfo import Trainers
from modelscope.trainers.builder import TRAINERS
from modelscope.trainers.trainer import EpochBasedTrainer


+ 1
- 0
modelscope/trainers/cv/movie_scene_segmentation_trainer.py View File

@@ -1,3 +1,4 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
from modelscope.metainfo import Trainers
from modelscope.trainers.builder import TRAINERS
from modelscope.trainers.trainer import EpochBasedTrainer


+ 20
- 10
modelscope/trainers/trainer.py View File

@@ -37,8 +37,8 @@ from modelscope.utils.device import create_device, verify_device
from modelscope.utils.file_utils import func_receive_dict_inputs
from modelscope.utils.logger import get_logger
from modelscope.utils.registry import build_from_cfg
from modelscope.utils.torch_utils import (get_dist_info, init_dist,
set_random_seed)
from modelscope.utils.torch_utils import (get_dist_info, get_local_rank,
init_dist, set_random_seed)
from .base import BaseTrainer
from .builder import TRAINERS
from .default_config import DEFAULT_CONFIG
@@ -155,8 +155,17 @@ class EpochBasedTrainer(BaseTrainer):
if self.eval_preprocessor is not None:
self.eval_preprocessor.mode = ModeKeys.EVAL

if kwargs.get('launcher', None) is not None:
init_dist(kwargs['launcher'])

_, world_size = get_dist_info()
self._dist = world_size > 1

device_name = kwargs.get('device', 'gpu')
verify_device(device_name)
if self._dist:
local_rank = get_local_rank()
device_name = f'cuda:{local_rank}'

self.device = create_device(device_name)

self.train_dataset = self.to_task_dataset(
@@ -219,11 +228,6 @@ class EpochBasedTrainer(BaseTrainer):

self.use_fp16 = kwargs.get('use_fp16', False)

if kwargs.get('launcher', None) is not None:
init_dist(kwargs['launcher'])

self._dist = get_dist_info()[1] > 1

# model placement
if self.device.type == 'cuda':
self.model.to(self.device)
@@ -532,8 +536,14 @@ class EpochBasedTrainer(BaseTrainer):
model.train()
self._mode = ModeKeys.TRAIN
# call model forward but not __call__ to skip postprocess
if isinstance(inputs,
Mapping) and not func_receive_dict_inputs(model.forward):

if is_parallel(model):
receive_dict_inputs = func_receive_dict_inputs(
model.module.forward)
else:
receive_dict_inputs = func_receive_dict_inputs(model.forward)

if isinstance(inputs, Mapping) and not receive_dict_inputs:
train_outputs = model.forward(**inputs)
else:
train_outputs = model.forward(inputs)


+ 6
- 6
modelscope/trainers/utils/inference.py View File

@@ -9,9 +9,9 @@ from collections.abc import Mapping

import torch
from torch import distributed as dist
from torch.nn.parallel import DistributedDataParallel
from tqdm import tqdm

from modelscope.trainers.parallel.utils import is_parallel
from modelscope.utils.data_utils import to_device
from modelscope.utils.file_utils import func_receive_dict_inputs
from modelscope.utils.torch_utils import (broadcast, get_dist_info, is_master,
@@ -138,7 +138,10 @@ def multi_gpu_test(model,
data_len = data_loader_iters_per_gpu * world_size
desc = 'Total test iterations with multi gpus'

time.sleep(2) # This line can prevent deadlock problem in some cases.
if is_parallel(model):
receive_dict_inputs = func_receive_dict_inputs(model.module.forward)
else:
receive_dict_inputs = func_receive_dict_inputs(model.forward)

count = 0
with tqdm(total=data_len, desc=desc) as pbar:
@@ -146,10 +149,7 @@ def multi_gpu_test(model,
data = to_device(data, device)
data_list.append(data)
with torch.no_grad():
forward_func = model.module.forward if \
isinstance(model, DistributedDataParallel) else model.forward
if isinstance(data, Mapping
) and not func_receive_dict_inputs(forward_func):
if isinstance(data, Mapping) and not receive_dict_inputs:
result = model.forward(**data)
else:
result = model.forward(data)


+ 53
- 9
modelscope/utils/demo_utils.py View File

@@ -123,7 +123,7 @@ INPUT_EXAMPLES = {
'urlPaths': {
'outUrls': [{
'outputKey': OutputKeys.OUTPUT_PCM,
'fileType': 'wav'
'fileType': 'pcm'
}]
}
},
@@ -134,7 +134,7 @@ INPUT_EXAMPLES = {
'urlPaths': {
'outUrls': [{
'outputKey': OutputKeys.OUTPUT_PCM,
'fileType': 'wav'
'fileType': 'pcm'
}]
}
},
@@ -147,7 +147,13 @@ INPUT_EXAMPLES = {
'http://xingchen-data.oss-cn-zhangjiakou.aliyuncs.com/maas/visual-grounding/visual_grounding.png',
'a blue turtle-like pokemon with round head'
],
'urlPaths': {}
'urlPaths': {
'inUrls': [{
'name': 'image'
}, {
'name': 'text'
}]
}
},
TasksIODescriptions.visual_question_answering: {
'task':
@@ -156,7 +162,16 @@ INPUT_EXAMPLES = {
'http://225252-file.oss-cn-hangzhou-zmf.aliyuncs.com/maas_demo/visual_question_answering.png',
'what is grown on the plant?'
],
'urlPaths': {}
'urlPaths': {
'inUrls': [{
'name': 'image'
}, {
'name': 'text'
}],
'outUrls': [{
'outputKey': 'text'
}]
}
},
TasksIODescriptions.visual_entailment: {
'task':
@@ -165,7 +180,14 @@ INPUT_EXAMPLES = {
'http://xingchen-data.oss-cn-zhangjiakou.aliyuncs.com/maas/visual-entailment/visual_entailment.jpg',
'there are two birds.', 'test'
],
'urlPaths': {}
'urlPaths': {
'inUrls': [{
'name': 'image'
}, {
'name': 'text'
}],
'outUrls': [{}]
}
},
TasksIODescriptions.generative_multi_modal_embedding: {
'task':
@@ -174,7 +196,14 @@ INPUT_EXAMPLES = {
'http://clip-multimodal.oss-cn-beijing.aliyuncs.com/lingchen/demo/dogs.jpg',
'dogs playing in the grass'
],
'urlPaths': {}
'urlPaths': {
'inUrls': [{
'name': 'image'
}, {
'name': 'text'
}],
'outUrls': [{}]
}
},
}

@@ -192,7 +221,13 @@ class DemoCompatibilityCheck(object):
print('testing demo: ', self.task, self.model_id)
test_pipline = pipeline(self.task, self.model_id)
req = INPUT_EXAMPLES[TASKS_INPUT_TEMPLATES[self.task]]
output = test_pipline(preprocess(req))
inputs = preprocess(req)
params = req.get('parameters', {})
# modelscope inference
if params != {}:
output = test_pipline(inputs, **params)
else:
output = test_pipline(inputs)
json.dumps(output, cls=NumpyEncoder)
result = postprocess(req, output)
print(result)
@@ -215,11 +250,21 @@ class NumpyEncoder(json.JSONEncoder):


def preprocess(req):
in_urls = req.get('urlPaths').get('inUrls')
if len(req['inputs']) == 1:
inputs = req['inputs'][0]
else:
inputs = tuple(req['inputs'])
return inputs
if in_urls is None or len(in_urls) == 0:
return inputs

inputs_dict = {}
for i, in_url in enumerate(in_urls):
input_name = in_url.get('name')
if input_name is None or input_name == '':
return inputs
inputs_dict[input_name] = req['inputs'][i]
return inputs_dict


def postprocess(req, resp):
@@ -242,4 +287,3 @@ def postprocess(req, resp):
out_mem_file = io.BytesIO()
out_mem_file.write(new_resp.get(output_key))
return type(out_mem_file)
# TODO(lingcai.wl): support more file type

+ 4
- 0
modelscope/utils/torch_utils.py View File

@@ -115,6 +115,10 @@ def get_dist_info() -> Tuple[int, int]:
return rank, world_size


def get_local_rank():
return int(os.environ.get('LOCAL_RANK', 0))


def is_master():
rank, _ = get_dist_info()
return rank == 0


+ 1
- 1
modelscope/version.py View File

@@ -1 +1 @@
__version__ = '0.4.3'
__version__ = '0.4.4'

+ 1
- 1
requirements/cv.txt View File

@@ -14,7 +14,7 @@ mmcls>=0.21.0
mmdet>=2.25.0
networkx>=2.5
onnxruntime>=1.10
pai-easycv>=0.6.0
pai-easycv>=0.6.3.4
pandas
psutil
regex


+ 40
- 19
tests/pipelines/easycv_pipelines/test_segmentation_pipeline.py View File

@@ -1,10 +1,11 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
import unittest
from distutils.version import LooseVersion

import easycv
import numpy as np
from PIL import Image

from modelscope.metainfo import Pipelines
from modelscope.pipelines import pipeline
from modelscope.utils.constant import Tasks
from modelscope.utils.test_utils import test_level
@@ -14,7 +15,7 @@ class EasyCVSegmentationPipelineTest(unittest.TestCase):

img_path = 'data/test/images/image_segmentation.jpg'

def _internal_test__(self, model_id):
def _internal_test_(self, model_id):
img = np.asarray(Image.open(self.img_path))

semantic_seg = pipeline(task=Tasks.image_segmentation, model=model_id)
@@ -24,41 +25,61 @@ class EasyCVSegmentationPipelineTest(unittest.TestCase):

results = outputs[0]
self.assertListEqual(
list(img.shape)[:2], list(results['seg_pred'][0].shape))
self.assertListEqual(results['seg_pred'][0][1, 4:10].tolist(),
[161 for i in range(6)])
self.assertListEqual(results['seg_pred'][0][-1, -10:].tolist(),
[133 for i in range(10)])
list(img.shape)[:2], list(results['seg_pred'].shape))

@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
def _internal_test_batch_(self, model_id, num_samples=2, batch_size=2):
# TODO: support in the future
img = np.asarray(Image.open(self.img_path))
num_samples = num_samples
batch_size = batch_size
semantic_seg = pipeline(
task=Tasks.image_segmentation,
model=model_id,
batch_size=batch_size)
outputs = semantic_seg([self.img_path] * num_samples)

self.assertEqual(semantic_seg.predict_op.batch_size, batch_size)
self.assertEqual(len(outputs), num_samples)

for output in outputs:
self.assertListEqual(
list(img.shape)[:2], list(output['seg_pred'].shape))

@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
def test_segformer_b0(self):
model_id = 'damo/cv_segformer-b0_image_semantic-segmentation_coco-stuff164k'
self._internal_test__(model_id)
self._internal_test_(model_id)
self._internal_test_batch_(model_id)

@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
def test_segformer_b1(self):
model_id = 'damo/cv_segformer-b1_image_semantic-segmentation_coco-stuff164k'
self._internal_test__(model_id)
self._internal_test_(model_id)
self._internal_test_batch_(model_id)

@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
def test_segformer_b2(self):
model_id = 'damo/cv_segformer-b2_image_semantic-segmentation_coco-stuff164k'
self._internal_test__(model_id)
self._internal_test_(model_id)
self._internal_test_batch_(model_id)

@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
def test_segformer_b3(self):
model_id = 'damo/cv_segformer-b3_image_semantic-segmentation_coco-stuff164k'
self._internal_test__(model_id)
self._internal_test_(model_id)
self._internal_test_batch_(model_id)

@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
def test_segformer_b4(self):
model_id = 'damo/cv_segformer-b4_image_semantic-segmentation_coco-stuff164k'
self._internal_test__(model_id)
self._internal_test_(model_id)
self._internal_test_batch_(model_id)

@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
def test_segformer_b5(self):
model_id = 'damo/cv_segformer-b5_image_semantic-segmentation_coco-stuff164k'
self._internal_test__(model_id)
self._internal_test_(model_id)
self._internal_test_batch_(model_id)


if __name__ == '__main__':


+ 1
- 1
tests/pipelines/test_face_2d_keypoints.py View File

@@ -18,7 +18,7 @@ class EasyCVFace2DKeypointsPipelineTest(unittest.TestCase):

face_2d_keypoints_align = pipeline(
task=Tasks.face_2d_keypoints, model=model_id)
output = face_2d_keypoints_align(img_path)
output = face_2d_keypoints_align(img_path)[0]

output_keypoints = output[OutputKeys.KEYPOINTS]
output_pose = output[OutputKeys.POSES]


+ 11
- 0
tests/pipelines/test_key_word_spotting_farfield.py View File

@@ -8,6 +8,7 @@ from modelscope.utils.constant import Tasks
from modelscope.utils.test_utils import test_level

TEST_SPEECH_FILE = 'data/test/audios/3ch_nihaomiya.wav'
TEST_SPEECH_FILE_MONO = 'data/test/audios/1ch_nihaomiya.wav'
TEST_SPEECH_URL = 'https://modelscope.cn/api/v1/models/damo/' \
'speech_dfsmn_kws_char_farfield_16k_nihaomiya/repo' \
'?Revision=master&FilePath=examples/3ch_nihaomiya.wav'
@@ -26,6 +27,16 @@ class KWSFarfieldTest(unittest.TestCase):
self.assertEqual(len(result['kws_list']), 5)
print(result['kws_list'][-1])

@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
def test_mono(self):
kws = pipeline(Tasks.keyword_spotting, model=self.model_id)
inputs = {
'input_file': os.path.join(os.getcwd(), TEST_SPEECH_FILE_MONO)
}
result = kws(inputs)
self.assertEqual(len(result['kws_list']), 5)
print(result['kws_list'][-1])

@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
def test_url(self):
kws = pipeline(Tasks.keyword_spotting, model=self.model_id)


+ 8
- 8
tests/pipelines/test_mplug_tasks.py View File

@@ -44,8 +44,8 @@ class MplugTasksTest(unittest.TestCase, DemoCompatibilityCheck):
'damo/mplug_visual-question-answering_coco_large_en')
pipeline_vqa = pipeline(Tasks.visual_question_answering, model=model)
image = Image.open('data/test/images/image_mplug_vqa.jpg')
question = 'What is the woman doing?'
input = {'image': image, 'question': question}
text = 'What is the woman doing?'
input = {'image': image, 'text': text}
result = pipeline_vqa(input)
print(result)

@@ -54,8 +54,8 @@ class MplugTasksTest(unittest.TestCase, DemoCompatibilityCheck):
model = 'damo/mplug_visual-question-answering_coco_large_en'
pipeline_vqa = pipeline(Tasks.visual_question_answering, model=model)
image = Image.open('data/test/images/image_mplug_vqa.jpg')
question = 'What is the woman doing?'
input = {'image': image, 'question': question}
text = 'What is the woman doing?'
input = {'image': image, 'text': text}
result = pipeline_vqa(input)
print(result)

@@ -65,8 +65,8 @@ class MplugTasksTest(unittest.TestCase, DemoCompatibilityCheck):
'damo/mplug_image-text-retrieval_flickr30k_large_en')
pipeline_retrieval = pipeline(Tasks.image_text_retrieval, model=model)
image = Image.open('data/test/images/image-text-retrieval.jpg')
question = 'Two young guys with shaggy hair look at their hands while hanging out in the yard.'
input = {'image': image, 'question': question}
text = 'Two young guys with shaggy hair look at their hands while hanging out in the yard.'
input = {'image': image, 'text': text}
result = pipeline_retrieval(input)
print(result)

@@ -75,8 +75,8 @@ class MplugTasksTest(unittest.TestCase, DemoCompatibilityCheck):
model = 'damo/mplug_image-text-retrieval_flickr30k_large_en'
pipeline_retrieval = pipeline(Tasks.image_text_retrieval, model=model)
image = Image.open('data/test/images/image-text-retrieval.jpg')
question = 'Two young guys with shaggy hair look at their hands while hanging out in the yard.'
input = {'image': image, 'question': question}
text = 'Two young guys with shaggy hair look at their hands while hanging out in the yard.'
input = {'image': image, 'text': text}
result = pipeline_retrieval(input)
print(result)



+ 6
- 4
tests/pipelines/test_ofa_tasks.py View File

@@ -147,8 +147,10 @@ class OfaTasksTest(unittest.TestCase, DemoCompatibilityCheck):
result = ofa_pipe(input)
print(result)
image_name = image.split('/')[-2]
self.save_img(image, result[OutputKeys.BOXES],
osp.join('large_en_model_' + image_name + '.png'))
self.save_img(
image,
result[OutputKeys.BOXES][0], # just one box
osp.join('large_en_model_' + image_name + '.png'))

@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
def test_run_with_visual_grounding_with_name(self):
@@ -161,7 +163,7 @@ class OfaTasksTest(unittest.TestCase, DemoCompatibilityCheck):
result = ofa_pipe(input)
print(result)
image_name = image.split('/')[-2]
self.save_img(image, result[OutputKeys.BOXES],
self.save_img(image, result[OutputKeys.BOXES][0],
osp.join('large_en_name_' + image_name + '.png'))

@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
@@ -174,7 +176,7 @@ class OfaTasksTest(unittest.TestCase, DemoCompatibilityCheck):
result = ofa_pipe(input)
print(result)
image_name = image.split('/')[-1]
self.save_img(image, result[OutputKeys.BOXES],
self.save_img(image, result[OutputKeys.BOXES][0],
osp.join('large_zh_name_' + image_name))

@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')


+ 1
- 0
tests/run_config.yaml View File

@@ -9,6 +9,7 @@ isolated: # test cases that may require excessive anmount of GPU memory, which
- test_image_super_resolution.py
- test_easycv_trainer.py
- test_segformer.py
- test_segmentation_pipeline.py

envs:
default: # default env, case not in other env will in default, pytorch.


+ 32
- 2
tests/trainers/test_trainer_gpu.py View File

@@ -53,7 +53,18 @@ class DummyModel(nn.Module, Model):
return dict(logits=x, loss=loss)


def train_func(work_dir, dist=False, iterable_dataset=False, **kwargs):
class DummyModelForwardInputs(DummyModel):

def forward(self, inputs):
feat, labels = inputs['feat'], inputs['labels']
return super().forward(feat, labels)


def train_func(work_dir,
dist=False,
iterable_dataset=False,
forward_inputs=False,
**kwargs):
json_cfg = {
'task': Tasks.image_classification,
'train': {
@@ -81,7 +92,10 @@ def train_func(work_dir, dist=False, iterable_dataset=False, **kwargs):
with open(config_path, 'w') as f:
json.dump(json_cfg, f)

model = DummyModel()
if forward_inputs:
model = DummyModelForwardInputs()
else:
model = DummyModel()
optimmizer = SGD(model.parameters(), lr=0.01)
lr_scheduler = StepLR(optimmizer, 2)
trainer_name = Trainers.default
@@ -273,6 +287,22 @@ class TrainerTestMultiGpus(DistributedTestCase):
for i in [1, 3, 5]:
self.assertIn(MetricKeys.ACCURACY, lines[i])

@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
def test_multi_gpus_forward_inputs(self):
self.start(
train_func,
num_gpus=2,
work_dir=self.tmp_dir,
dist=True,
forward_inputs=True)

results_files = os.listdir(self.tmp_dir)
json_files = glob.glob(os.path.join(self.tmp_dir, '*.log.json'))
self.assertEqual(len(json_files), 1)
self.assertIn(f'{LogKeys.EPOCH}_1.pth', results_files)
self.assertIn(f'{LogKeys.EPOCH}_2.pth', results_files)
self.assertIn(f'{LogKeys.EPOCH}_3.pth', results_files)

# TODO: support iters_per_epoch for dist mode
@unittest.skipIf(True, 'need to adapt to DistributedSampler')
def test_multi_gpus_with_iters_per_epoch(self):


Loading…
Cancel
Save