Browse Source

Merge remote-tracking branch 'origin/master' into ofa/finetune

# Conflicts:
#	modelscope/models/multi_modal/ofa_for_all_tasks.py
#	modelscope/msdatasets/ms_dataset.py
#	modelscope/trainers/utils/inference.py
master
行嗔 3 years ago
parent
commit
537827e5a1
64 changed files with 363 additions and 185 deletions
  1. +3
    -0
      data/test/audios/1ch_nihaomiya.wav
  2. +2
    -0
      modelscope/metrics/image_instance_segmentation_metric.py
  3. +2
    -0
      modelscope/metrics/movie_scene_segmentation_metric.py
  4. +2
    -2
      modelscope/models/cv/image_instance_segmentation/backbones/swin_transformer.py
  5. +2
    -0
      modelscope/models/cv/image_instance_segmentation/cascade_mask_rcnn_swin.py
  6. +1
    -0
      modelscope/models/cv/image_instance_segmentation/datasets/__init__.py
  7. +5
    -4
      modelscope/models/cv/image_instance_segmentation/datasets/transforms.py
  8. +1
    -0
      modelscope/models/cv/image_instance_segmentation/model.py
  9. +2
    -0
      modelscope/models/cv/image_instance_segmentation/postprocess_utils.py
  10. +3
    -0
      modelscope/models/cv/movie_scene_segmentation/model.py
  11. +1
    -0
      modelscope/models/cv/movie_scene_segmentation/utils/__init__.py
  12. +2
    -6
      modelscope/models/cv/movie_scene_segmentation/utils/head.py
  13. +2
    -4
      modelscope/models/cv/movie_scene_segmentation/utils/save_op.py
  14. +1
    -3
      modelscope/models/cv/movie_scene_segmentation/utils/shot_encoder.py
  15. +1
    -0
      modelscope/models/cv/object_detection/mmdet_model.py
  16. +2
    -0
      modelscope/models/cv/object_detection/mmdet_ms/__init__.py
  17. +2
    -0
      modelscope/models/cv/object_detection/mmdet_ms/backbones/__init__.py
  18. +2
    -0
      modelscope/models/cv/object_detection/mmdet_ms/dense_heads/__init__.py
  19. +2
    -1
      modelscope/models/cv/object_detection/mmdet_ms/dense_heads/anchor_head.py
  20. +2
    -1
      modelscope/models/cv/object_detection/mmdet_ms/dense_heads/rpn_head.py
  21. +2
    -0
      modelscope/models/cv/object_detection/mmdet_ms/necks/__init__.py
  22. +2
    -1
      modelscope/models/cv/object_detection/mmdet_ms/necks/fpn.py
  23. +2
    -0
      modelscope/models/cv/object_detection/mmdet_ms/roi_heads/__init__.py
  24. +2
    -0
      modelscope/models/cv/object_detection/mmdet_ms/roi_heads/bbox_heads/__init__.py
  25. +2
    -1
      modelscope/models/cv/object_detection/mmdet_ms/roi_heads/bbox_heads/convfc_bbox_head.py
  26. +2
    -0
      modelscope/models/cv/object_detection/mmdet_ms/roi_heads/mask_heads/__init__.py
  27. +2
    -1
      modelscope/models/cv/object_detection/mmdet_ms/roi_heads/mask_heads/fcn_mask_head.py
  28. +2
    -0
      modelscope/models/cv/object_detection/mmdet_ms/utils/__init__.py
  29. +2
    -1
      modelscope/models/cv/object_detection/mmdet_ms/utils/checkpoint.py
  30. +2
    -2
      modelscope/models/cv/object_detection/mmdet_ms/utils/convModule_norm.py
  31. +2
    -0
      modelscope/models/cv/salient_detection/models/__init__.py
  32. +2
    -1
      modelscope/models/cv/salient_detection/models/u2net.py
  33. +1
    -0
      modelscope/models/cv/salient_detection/salient_model.py
  34. +1
    -3
      modelscope/models/multi_modal/ofa_for_all_tasks.py
  35. +51
    -52
      modelscope/msdatasets/ms_dataset.py
  36. +2
    -0
      modelscope/msdatasets/task_datasets/image_instance_segmentation_coco_dataset.py
  37. +1
    -0
      modelscope/msdatasets/task_datasets/movie_scene_segmentation/__init__.py
  38. +2
    -3
      modelscope/msdatasets/task_datasets/movie_scene_segmentation/movie_scene_segmentation_dataset.py
  39. +22
    -19
      modelscope/pipelines/audio/kws_farfield_pipeline.py
  40. +2
    -0
      modelscope/pipelines/cv/action_detection_pipeline.py
  41. +7
    -4
      modelscope/pipelines/cv/easycv_pipelines/base.py
  42. +7
    -5
      modelscope/pipelines/cv/easycv_pipelines/face_2d_keypoints_pipeline.py
  43. +3
    -2
      modelscope/pipelines/cv/hand_2d_keypoints_pipeline.py
  44. +1
    -0
      modelscope/pipelines/cv/image_instance_segmentation_pipeline.py
  45. +7
    -1
      modelscope/pipelines/cv/image_style_transfer_pipeline.py
  46. +1
    -0
      modelscope/pipelines/cv/movie_scene_segmentation_pipeline.py
  47. +1
    -0
      modelscope/preprocessors/movie_scene_segmentation/__init__.py
  48. +2
    -6
      modelscope/preprocessors/movie_scene_segmentation/transforms.py
  49. +2
    -1
      modelscope/preprocessors/multi_modal.py
  50. +1
    -0
      modelscope/trainers/cv/image_instance_segmentation_trainer.py
  51. +1
    -0
      modelscope/trainers/cv/movie_scene_segmentation_trainer.py
  52. +20
    -10
      modelscope/trainers/trainer.py
  53. +6
    -6
      modelscope/trainers/utils/inference.py
  54. +53
    -9
      modelscope/utils/demo_utils.py
  55. +4
    -0
      modelscope/utils/torch_utils.py
  56. +1
    -1
      modelscope/version.py
  57. +1
    -1
      requirements/cv.txt
  58. +40
    -19
      tests/pipelines/easycv_pipelines/test_segmentation_pipeline.py
  59. +1
    -1
      tests/pipelines/test_face_2d_keypoints.py
  60. +11
    -0
      tests/pipelines/test_key_word_spotting_farfield.py
  61. +8
    -8
      tests/pipelines/test_mplug_tasks.py
  62. +6
    -4
      tests/pipelines/test_ofa_tasks.py
  63. +1
    -0
      tests/run_config.yaml
  64. +32
    -2
      tests/trainers/test_trainer_gpu.py

+ 3
- 0
data/test/audios/1ch_nihaomiya.wav View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:4f7f5a0a4efca1e83463cb44460c66b56fb7cd673eb6da37924637bc05ef758d
size 1440044

+ 2
- 0
modelscope/metrics/image_instance_segmentation_metric.py View File

@@ -1,3 +1,5 @@
# Part of the implementation is borrowed and modified from MMDetection, publicly available at
# https://github.com/open-mmlab/mmdetection/blob/master/mmdet/datasets/coco.py
import os.path as osp import os.path as osp
import tempfile import tempfile
from collections import OrderedDict from collections import OrderedDict


+ 2
- 0
modelscope/metrics/movie_scene_segmentation_metric.py View File

@@ -1,3 +1,5 @@
# The implementation here is modified based on BaSSL,
# originally Apache 2.0 License and publicly available at https://github.com/kakaobrain/bassl
from typing import Dict from typing import Dict


import numpy as np import numpy as np


+ 2
- 2
modelscope/models/cv/image_instance_segmentation/backbones/swin_transformer.py View File

@@ -1,5 +1,5 @@
# Modified from: https://github.com/microsoft/Swin-Transformer/blob/main/models/swin_transformer.py
# The implementation is adopted from Swin Transformer, made publicly available under the MIT License at
# https://github.com/microsoft/Swin-Transformer/blob/main/models/swin_transformer.py
import numpy as np import numpy as np
import torch import torch
import torch.nn as nn import torch.nn as nn


+ 2
- 0
modelscope/models/cv/image_instance_segmentation/cascade_mask_rcnn_swin.py View File

@@ -1,3 +1,5 @@
# Part of the implementation is borrowed and modified from MMDetection, publicly available at
# https://github.com/open-mmlab/mmdetection/blob/master/mmdet/models/detectors/two_stage.py
import os import os
from collections import OrderedDict from collections import OrderedDict




+ 1
- 0
modelscope/models/cv/image_instance_segmentation/datasets/__init__.py View File

@@ -1 +1,2 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
from .transforms import build_preprocess_transform from .transforms import build_preprocess_transform

+ 5
- 4
modelscope/models/cv/image_instance_segmentation/datasets/transforms.py View File

@@ -1,3 +1,4 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
import os.path as osp import os.path as osp


import numpy as np import numpy as np
@@ -51,9 +52,9 @@ class LoadImageFromFile:
"""Load an image from file. """Load an image from file.


Required keys are "img_prefix" and "img_info" (a dict that must contain the Required keys are "img_prefix" and "img_info" (a dict that must contain the
key "filename"). Added or updated keys are "filename", "img", "img_shape",
"ori_shape" (same as `img_shape`), "pad_shape" (same as `img_shape`),
"scale_factor" (1.0) and "img_norm_cfg" (means=0 and stds=1).
key "filename", "ann_file", and "classes"). Added or updated keys are
"filename", "ori_filename", "img", "img_shape", "ori_shape" (same as `img_shape`),
"img_fields", "ann_file" (path to annotation file) and "classes".


Args: Args:
to_float32 (bool): Whether to convert the loaded image to a float32 to_float32 (bool): Whether to convert the loaded image to a float32
@@ -73,7 +74,7 @@ class LoadImageFromFile:
"""Call functions to load image and get image meta information. """Call functions to load image and get image meta information.


Args: Args:
results (dict): Result dict from :obj:`ImageInstanceSegmentationDataset`.
results (dict): Result dict from :obj:`ImageInstanceSegmentationCocoDataset`.


Returns: Returns:
dict: The dict contains loaded image and meta information. dict: The dict contains loaded image and meta information.


+ 1
- 0
modelscope/models/cv/image_instance_segmentation/model.py View File

@@ -1,3 +1,4 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
import os import os
from typing import Any, Dict from typing import Any, Dict




+ 2
- 0
modelscope/models/cv/image_instance_segmentation/postprocess_utils.py View File

@@ -1,3 +1,5 @@
# Part of the implementation is borrowed and modified from MMDetection, publicly available at
# https://github.com/open-mmlab/mmdetection/blob/master/mmdet/core/visualization/image.py
import itertools import itertools


import cv2 import cv2


+ 3
- 0
modelscope/models/cv/movie_scene_segmentation/model.py View File

@@ -1,3 +1,6 @@
# The implementation here is modified based on BaSSL,
# originally Apache 2.0 License and publicly avaialbe at https://github.com/kakaobrain/bassl

import os import os
import os.path as osp import os.path as osp
from typing import Any, Dict from typing import Any, Dict


+ 1
- 0
modelscope/models/cv/movie_scene_segmentation/utils/__init__.py View File

@@ -1,3 +1,4 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
from .save_op import get_pred_boundary, pred2scene, scene2video from .save_op import get_pred_boundary, pred2scene, scene2video
from .shot_encoder import resnet50 from .shot_encoder import resnet50
from .trn import TransformerCRN from .trn import TransformerCRN

+ 2
- 6
modelscope/models/cv/movie_scene_segmentation/utils/head.py View File

@@ -1,9 +1,5 @@
# ------------------------------------------------------------------------------------
# BaSSL
# Copyright (c) 2021 KakaoBrain. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 [see LICENSE for details]
# Github: https://github.com/kakaobrain/bassl
# ------------------------------------------------------------------------------------
# The implementation here is modified based on BaSSL,
# originally Apache 2.0 License and publicly avaialbe at https://github.com/kakaobrain/bassl


import torch.nn as nn import torch.nn as nn
import torch.nn.functional as F import torch.nn.functional as F


+ 2
- 4
modelscope/models/cv/movie_scene_segmentation/utils/save_op.py View File

@@ -1,7 +1,5 @@
# ----------------------------------------------------------------------------------
# The codes below partially refer to the SceneSeg LGSS.
# Github: https://github.com/AnyiRao/SceneSeg
# ----------------------------------------------------------------------------------
# The implementation here is modified based on SceneSeg,
# originally Apache 2.0 License and publicly avaialbe at https://github.com/AnyiRao/SceneSeg
import os import os
import os.path as osp import os.path as osp
import subprocess import subprocess


+ 1
- 3
modelscope/models/cv/movie_scene_segmentation/utils/shot_encoder.py View File

@@ -1,6 +1,4 @@
"""
Modified from original implementation in torchvision
"""
# The implementation is adopted from torchvision


from typing import Any, Callable, List, Optional, Type, Union from typing import Any, Callable, List, Optional, Type, Union




+ 1
- 0
modelscope/models/cv/object_detection/mmdet_model.py View File

@@ -1,3 +1,4 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
import os.path as osp import os.path as osp


import numpy as np import numpy as np


+ 2
- 0
modelscope/models/cv/object_detection/mmdet_ms/__init__.py View File

@@ -1,3 +1,5 @@
# Implementation in this file is modified based on ViTAE-Transformer
# Originally Apache 2.0 License and publicly avaialbe at https://github.com/ViTAE-Transformer/ViTDet
from .backbones import ViT from .backbones import ViT
from .dense_heads import AnchorNHead, RPNNHead from .dense_heads import AnchorNHead, RPNNHead
from .necks import FPNF from .necks import FPNF


+ 2
- 0
modelscope/models/cv/object_detection/mmdet_ms/backbones/__init__.py View File

@@ -1,3 +1,5 @@
# Implementation in this file is modified based on ViTAE-Transformer
# Originally Apache 2.0 License and publicly avaialbe at https://github.com/ViTAE-Transformer/ViTDet
from .vit import ViT from .vit import ViT


__all__ = ['ViT'] __all__ = ['ViT']

+ 2
- 0
modelscope/models/cv/object_detection/mmdet_ms/dense_heads/__init__.py View File

@@ -1,3 +1,5 @@
# Implementation in this file is modified based on ViTAE-Transformer
# Originally Apache 2.0 License and publicly avaialbe at https://github.com/ViTAE-Transformer/ViTDet
from .anchor_head import AnchorNHead from .anchor_head import AnchorNHead
from .rpn_head import RPNNHead from .rpn_head import RPNNHead




+ 2
- 1
modelscope/models/cv/object_detection/mmdet_ms/dense_heads/anchor_head.py View File

@@ -1,5 +1,6 @@
# Copyright (c) OpenMMLab. All rights reserved. # Copyright (c) OpenMMLab. All rights reserved.
# Implementation in this file is modifed from source code avaiable via https://github.com/ViTAE-Transformer/ViTDet
# Implementation in this file is modified based on ViTAE-Transformer
# Originally Apache 2.0 License and publicly avaialbe at https://github.com/ViTAE-Transformer/ViTDet
from mmdet.models.builder import HEADS from mmdet.models.builder import HEADS
from mmdet.models.dense_heads import AnchorHead from mmdet.models.dense_heads import AnchorHead




+ 2
- 1
modelscope/models/cv/object_detection/mmdet_ms/dense_heads/rpn_head.py View File

@@ -1,5 +1,6 @@
# Copyright (c) OpenMMLab. All rights reserved. # Copyright (c) OpenMMLab. All rights reserved.
# Implementation in this file is modifed from source code avaiable via https://github.com/ViTAE-Transformer/ViTDet
# Implementation in this file is modified based on ViTAE-Transformer
# Originally Apache 2.0 License and publicly avaialbe at https://github.com/ViTAE-Transformer/ViTDet
import copy import copy


import torch import torch


+ 2
- 0
modelscope/models/cv/object_detection/mmdet_ms/necks/__init__.py View File

@@ -1,3 +1,5 @@
# Implementation in this file is modified based on ViTAE-Transformer
# Originally Apache 2.0 License and publicly avaialbe at https://github.com/ViTAE-Transformer/ViTDet
from .fpn import FPNF from .fpn import FPNF


__all__ = ['FPNF'] __all__ = ['FPNF']

+ 2
- 1
modelscope/models/cv/object_detection/mmdet_ms/necks/fpn.py View File

@@ -1,5 +1,6 @@
# Copyright (c) OpenMMLab. All rights reserved. # Copyright (c) OpenMMLab. All rights reserved.
# Implementation in this file is modifed from source code avaiable via https://github.com/ViTAE-Transformer/ViTDet
# Implementation in this file is modified based on ViTAE-Transformer
# Originally Apache 2.0 License and publicly avaialbe at https://github.com/ViTAE-Transformer/ViTDet
import torch.nn as nn import torch.nn as nn
import torch.nn.functional as F import torch.nn.functional as F
from mmcv.runner import BaseModule, auto_fp16 from mmcv.runner import BaseModule, auto_fp16


+ 2
- 0
modelscope/models/cv/object_detection/mmdet_ms/roi_heads/__init__.py View File

@@ -1,3 +1,5 @@
# Implementation in this file is modified based on ViTAE-Transformer
# Originally Apache 2.0 License and publicly avaialbe at https://github.com/ViTAE-Transformer/ViTDet
from .bbox_heads import (ConvFCBBoxNHead, Shared2FCBBoxNHead, from .bbox_heads import (ConvFCBBoxNHead, Shared2FCBBoxNHead,
Shared4Conv1FCBBoxNHead) Shared4Conv1FCBBoxNHead)
from .mask_heads import FCNMaskNHead from .mask_heads import FCNMaskNHead


+ 2
- 0
modelscope/models/cv/object_detection/mmdet_ms/roi_heads/bbox_heads/__init__.py View File

@@ -1,3 +1,5 @@
# Implementation in this file is modified based on ViTAE-Transformer
# Originally Apache 2.0 License and publicly avaialbe at https://github.com/ViTAE-Transformer/ViTDet
from .convfc_bbox_head import (ConvFCBBoxNHead, Shared2FCBBoxNHead, from .convfc_bbox_head import (ConvFCBBoxNHead, Shared2FCBBoxNHead,
Shared4Conv1FCBBoxNHead) Shared4Conv1FCBBoxNHead)




+ 2
- 1
modelscope/models/cv/object_detection/mmdet_ms/roi_heads/bbox_heads/convfc_bbox_head.py View File

@@ -1,5 +1,6 @@
# Copyright (c) OpenMMLab. All rights reserved. # Copyright (c) OpenMMLab. All rights reserved.
# Implementation in this file is modifed from source code avaiable via https://github.com/ViTAE-Transformer/ViTDet
# Implementation in this file is modified based on ViTAE-Transformer
# Originally Apache 2.0 License and publicly avaialbe at https://github.com/ViTAE-Transformer/ViTDet
import torch.nn as nn import torch.nn as nn
from mmdet.models.builder import HEADS from mmdet.models.builder import HEADS
from mmdet.models.roi_heads.bbox_heads.bbox_head import BBoxHead from mmdet.models.roi_heads.bbox_heads.bbox_head import BBoxHead


+ 2
- 0
modelscope/models/cv/object_detection/mmdet_ms/roi_heads/mask_heads/__init__.py View File

@@ -1,3 +1,5 @@
# Implementation in this file is modified based on ViTAE-Transformer
# Originally Apache 2.0 License and publicly avaialbe at https://github.com/ViTAE-Transformer/ViTDet
from .fcn_mask_head import FCNMaskNHead from .fcn_mask_head import FCNMaskNHead


__all__ = ['FCNMaskNHead'] __all__ = ['FCNMaskNHead']

+ 2
- 1
modelscope/models/cv/object_detection/mmdet_ms/roi_heads/mask_heads/fcn_mask_head.py View File

@@ -1,5 +1,6 @@
# Copyright (c) OpenMMLab. All rights reserved. # Copyright (c) OpenMMLab. All rights reserved.
# Implementation in this file is modifed from source code avaiable via https://github.com/ViTAE-Transformer/ViTDet
# Implementation in this file is modified based on ViTAE-Transformer
# Originally Apache 2.0 License and publicly avaialbe at https://github.com/ViTAE-Transformer/ViTDet
from warnings import warn from warnings import warn


import numpy as np import numpy as np


+ 2
- 0
modelscope/models/cv/object_detection/mmdet_ms/utils/__init__.py View File

@@ -1,3 +1,5 @@
# Implementation in this file is modified based on ViTAE-Transformer
# Originally Apache 2.0 License and publicly avaialbe at https://github.com/ViTAE-Transformer/ViTDet
from .checkpoint import load_checkpoint from .checkpoint import load_checkpoint
from .convModule_norm import ConvModule_Norm from .convModule_norm import ConvModule_Norm




+ 2
- 1
modelscope/models/cv/object_detection/mmdet_ms/utils/checkpoint.py View File

@@ -1,5 +1,6 @@
# Copyright (c) Open-MMLab. All rights reserved. # Copyright (c) Open-MMLab. All rights reserved.
# Implementation adopted from ViTAE-Transformer, source code avaiable via https://github.com/ViTAE-Transformer/ViTDet
# Implementation in this file is modified based on ViTAE-Transformer
# Originally Apache 2.0 License and publicly avaialbe at https://github.com/ViTAE-Transformer/ViTDet
import io import io
import os import os
import os.path as osp import os.path as osp


+ 2
- 2
modelscope/models/cv/object_detection/mmdet_ms/utils/convModule_norm.py View File

@@ -1,5 +1,5 @@
# Implementation adopted from ViTAE-Transformer, source code avaiable via https://github.com/ViTAE-Transformer/ViTDet
# Implementation in this file is modified based on ViTAE-Transformer
# Originally Apache 2.0 License and publicly avaialbe at https://github.com/ViTAE-Transformer/ViTDet
from mmcv.cnn import ConvModule from mmcv.cnn import ConvModule






+ 2
- 0
modelscope/models/cv/salient_detection/models/__init__.py View File

@@ -1 +1,3 @@
# The implementation is adopted from U-2-Net, made publicly available under the Apache 2.0 License
# source code avaiable via https://github.com/xuebinqin/U-2-Net
from .u2net import U2NET from .u2net import U2NET

+ 2
- 1
modelscope/models/cv/salient_detection/models/u2net.py View File

@@ -1,4 +1,5 @@
# Implementation in this file is modifed from source code avaiable via https://github.com/xuebinqin/U-2-Net
# The implementation is adopted from U-2-Net, made publicly available under the Apache 2.0 License
# source code avaiable via https://github.com/xuebinqin/U-2-Net
import torch import torch
import torch.nn as nn import torch.nn as nn
import torch.nn.functional as F import torch.nn.functional as F


+ 1
- 0
modelscope/models/cv/salient_detection/salient_model.py View File

@@ -1,3 +1,4 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
import os.path as osp import os.path as osp


import cv2 import cv2


+ 1
- 3
modelscope/models/multi_modal/ofa_for_all_tasks.py View File

@@ -37,9 +37,7 @@ class OfaForAllTasks(TorchModel):


def __init__(self, model_dir, *args, **kwargs): def __init__(self, model_dir, *args, **kwargs):
super().__init__(model_dir=model_dir, *args, **kwargs) super().__init__(model_dir=model_dir, *args, **kwargs)
sd = torch.load(osp.join(model_dir, ModelFile.TORCH_MODEL_BIN_FILE))
sd = sd if 'meta' not in sd else sd['state_dict']
model = OFAModel.from_pretrained(model_dir, state_dict=sd)
model = OFAModel.from_pretrained(model_dir)
self.cfg = Config.from_file( self.cfg = Config.from_file(
osp.join(model_dir, ModelFile.CONFIGURATION)) osp.join(model_dir, ModelFile.CONFIGURATION))
self.model = model.module if hasattr(model, 'module') else model self.model = model.module if hasattr(model, 'module') else model


+ 51
- 52
modelscope/msdatasets/ms_dataset.py View File

@@ -44,44 +44,40 @@ def format_list(para) -> List:
return para return para




class MsIterableDataset(torch.utils.data.IterableDataset):
class MsMapDataset(torch.utils.data.Dataset):


def __init__(self, dataset: Iterable, preprocessor_list, retained_columns, def __init__(self, dataset: Iterable, preprocessor_list, retained_columns,
columns):
super(MsIterableDataset).__init__()
columns, to_tensor):
super(MsDataset).__init__()
self.dataset = dataset self.dataset = dataset
self.preprocessor_list = preprocessor_list self.preprocessor_list = preprocessor_list
self.to_tensor = to_tensor
self.retained_columns = retained_columns self.retained_columns = retained_columns
self.columns = columns self.columns = columns


def __len__(self): def __len__(self):
return len(self.dataset) return len(self.dataset)


def __iter__(self):
worker_info = torch.utils.data.get_worker_info()
if worker_info is None: # single-process data loading
iter_start = 0
iter_end = len(self.dataset)
else: # in a worker process
per_worker = math.ceil(
len(self.dataset) / float(worker_info.num_workers))
worker_id = worker_info.id
iter_start = worker_id * per_worker
iter_end = min(iter_start + per_worker, len(self.dataset))

for idx in range(iter_start, iter_end):
item_dict = self.dataset[idx]
res = {
k: torch.tensor(item_dict[k])
for k in self.columns if k in self.retained_columns
}
for preprocessor in self.preprocessor_list:
res.update({
k: v # k: torch.tensor(v)
for k, v in preprocessor(item_dict).items()
if k in self.retained_columns
})
yield res
def type_converter(self, x):
if self.to_tensor:
return torch.tensor(x)
else:
return x

def __getitem__(self, index):
item_dict = self.dataset[index]
res = {
k: self.type_converter(item_dict[k])
for k in self.columns
if (not self.to_tensor) or k in self.retained_columns
}
for preprocessor in self.preprocessor_list:
res.update({
k: self.type_converter(v)
for k, v in preprocessor(item_dict).items()
if (not self.to_tensor) or k in self.retained_columns
})
return res




class MsDataset: class MsDataset:
@@ -341,6 +337,7 @@ class MsDataset:
self, self,
preprocessors: Union[Callable, List[Callable]], preprocessors: Union[Callable, List[Callable]],
columns: Union[str, List[str]] = None, columns: Union[str, List[str]] = None,
to_tensor: bool = True,
): ):
preprocessor_list = preprocessors if isinstance( preprocessor_list = preprocessors if isinstance(
preprocessors, list) else [preprocessors] preprocessors, list) else [preprocessors]
@@ -350,29 +347,29 @@ class MsDataset:
columns = [ columns = [
key for key in self._hf_ds.features.keys() if key in columns key for key in self._hf_ds.features.keys() if key in columns
] ]
sample = next(iter(self._hf_ds))
retained_columns = []
if to_tensor:
sample = next(iter(self._hf_ds))


sample_res = {k: np.array(sample[k]) for k in columns}
for processor in preprocessor_list:
sample_res.update(
{k: np.array(v)
for k, v in processor(sample).items()})
sample_res = {k: np.array(sample[k]) for k in columns}
for processor in preprocessor_list:
sample_res.update(
{k: np.array(v)
for k, v in processor(sample).items()})


def is_numpy_number(value):
return np.issubdtype(value.dtype, np.integer) or np.issubdtype(
value.dtype, np.floating) or np.issubdtype(
value.dtype, np.bool)
def is_numpy_number(value):
return np.issubdtype(value.dtype, np.integer) or np.issubdtype(
value.dtype, np.floating)


retained_columns = []
for k in sample_res.keys():
if not is_numpy_number(sample_res[k]):
logger.warning(
f'Data of column {k} is non-numeric, will be removed')
# continue
retained_columns.append(k)
for k in sample_res.keys():
if not is_numpy_number(sample_res[k]):
logger.warning(
f'Data of column {k} is non-numeric, will be removed')
continue
retained_columns.append(k)


return MsIterableDataset(self._hf_ds, preprocessor_list,
retained_columns, columns)
return MsMapDataset(self._hf_ds, preprocessor_list, retained_columns,
columns, to_tensor)


def to_torch_dataset( def to_torch_dataset(
self, self,
@@ -380,6 +377,7 @@ class MsDataset:
preprocessors: Union[Callable, List[Callable]] = None, preprocessors: Union[Callable, List[Callable]] = None,
task_name: str = None, task_name: str = None,
task_data_config: ConfigDict = None, task_data_config: ConfigDict = None,
to_tensor: bool = True,
**format_kwargs, **format_kwargs,
): ):
"""Create a torch.utils.data.Dataset from the MS Dataset. The torch.utils.data.Dataset can be passed to """Create a torch.utils.data.Dataset from the MS Dataset. The torch.utils.data.Dataset can be passed to
@@ -387,13 +385,14 @@ class MsDataset:


Args: Args:
preprocessors (Callable or List[Callable], default None): (list of) Preprocessor object used to process preprocessors (Callable or List[Callable], default None): (list of) Preprocessor object used to process
every sample of the dataset. The output type of processors is dict, and each numeric field of the dict
every sample of the dataset. The output type of processors is dict, and each (numeric) field of the dict
will be used as a field of torch.utils.data.Dataset. will be used as a field of torch.utils.data.Dataset.
columns (str or List[str], default None): Dataset column(s) to be loaded (numeric data only). If the
preprocessor is None, the arg columns must have at least one column. If the `preprocessors` is not None,
the output fields of processors will also be added.
columns (str or List[str], default None): Dataset column(s) to be loaded (numeric data only if
`to_tensor` is True). If the preprocessor is None, the arg columns must have at least one column.
If the `preprocessors` is not None, the output fields of processors will also be added.
task_name (str, default None): task name, refer to :obj:`Tasks` for more details task_name (str, default None): task name, refer to :obj:`Tasks` for more details
task_data_config (ConfigDict, default None): config dict for model object. task_data_config (ConfigDict, default None): config dict for model object.
to_tensor (bool, default None): whether convert the data types of dataset column(s) to torch.tensor or not.
format_kwargs: A `dict` of arguments to be passed to the `torch.tensor`. format_kwargs: A `dict` of arguments to be passed to the `torch.tensor`.


Returns: Returns:
@@ -410,7 +409,7 @@ class MsDataset:
return build_task_dataset(task_data_config, task_name) return build_task_dataset(task_data_config, task_name)
if preprocessors is not None: if preprocessors is not None:
return self.to_torch_dataset_with_processors( return self.to_torch_dataset_with_processors(
preprocessors, columns=columns)
preprocessors, columns=columns, to_tensor=to_tensor)
else: else:
self._hf_ds.reset_format() self._hf_ds.reset_format()
self._hf_ds.set_format( self._hf_ds.set_format(


+ 2
- 0
modelscope/msdatasets/task_datasets/image_instance_segmentation_coco_dataset.py View File

@@ -1,3 +1,5 @@
# Part of the implementation is borrowed and modified from MMDetection, publicly available at
# https://github.com/open-mmlab/mmdetection/blob/master/mmdet/datasets/coco.py
import os.path as osp import os.path as osp


import numpy as np import numpy as np


+ 1
- 0
modelscope/msdatasets/task_datasets/movie_scene_segmentation/__init__.py View File

@@ -1 +1,2 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
from .movie_scene_segmentation_dataset import MovieSceneSegmentationDataset from .movie_scene_segmentation_dataset import MovieSceneSegmentationDataset

+ 2
- 3
modelscope/msdatasets/task_datasets/movie_scene_segmentation/movie_scene_segmentation_dataset.py View File

@@ -1,6 +1,5 @@
# ---------------------------------------------------------------------------------------------------
# The implementation is built upon BaSSL, publicly available at https://github.com/kakaobrain/bassl
# ---------------------------------------------------------------------------------------------------
# The implementation here is modified based on BaSSL,
# originally Apache 2.0 License and publicly available at https://github.com/kakaobrain/bassl
import copy import copy
import os import os
import os.path as osp import os.path as osp


+ 22
- 19
modelscope/pipelines/audio/kws_farfield_pipeline.py View File

@@ -4,6 +4,9 @@ import io
import wave import wave
from typing import Any, Dict from typing import Any, Dict


import numpy
import soundfile as sf

from modelscope.fileio import File from modelscope.fileio import File
from modelscope.metainfo import Pipelines from modelscope.metainfo import Pipelines
from modelscope.outputs import OutputKeys from modelscope.outputs import OutputKeys
@@ -37,7 +40,6 @@ class KWSFarfieldPipeline(Pipeline):
self.model.eval() self.model.eval()
frame_size = self.INPUT_CHANNELS * self.SAMPLE_WIDTH frame_size = self.INPUT_CHANNELS * self.SAMPLE_WIDTH
self._nframe = self.model.size_in // frame_size self._nframe = self.model.size_in // frame_size
self.frame_count = 0


def preprocess(self, inputs: Input, **preprocess_params) -> Dict[str, Any]: def preprocess(self, inputs: Input, **preprocess_params) -> Dict[str, Any]:
if isinstance(inputs, bytes): if isinstance(inputs, bytes):
@@ -54,35 +56,36 @@ class KWSFarfieldPipeline(Pipeline):
input_file = inputs['input_file'] input_file = inputs['input_file']
if isinstance(input_file, str): if isinstance(input_file, str):
input_file = File.read(input_file) input_file = File.read(input_file)
if isinstance(input_file, bytes):
input_file = io.BytesIO(input_file)
self.frame_count = 0
frames, samplerate = sf.read(io.BytesIO(input_file), dtype='int16')
if len(frames.shape) == 1:
frames = numpy.stack((frames, frames, numpy.zeros_like(frames)), 1)

kws_list = [] kws_list = []
with wave.open(input_file, 'rb') as fin:
if 'output_file' in inputs:
with wave.open(inputs['output_file'], 'wb') as fout:
fout.setframerate(self.SAMPLE_RATE)
fout.setnchannels(self.OUTPUT_CHANNELS)
fout.setsampwidth(self.SAMPLE_WIDTH)
self._process(fin, kws_list, fout)
else:
self._process(fin, kws_list)
if 'output_file' in inputs:
with wave.open(inputs['output_file'], 'wb') as fout:
fout.setframerate(self.SAMPLE_RATE)
fout.setnchannels(self.OUTPUT_CHANNELS)
fout.setsampwidth(self.SAMPLE_WIDTH)
self._process(frames, kws_list, fout)
else:
self._process(frames, kws_list)
return {OutputKeys.KWS_LIST: kws_list} return {OutputKeys.KWS_LIST: kws_list}


def _process(self, def _process(self,
fin: wave.Wave_read,
frames: numpy.ndarray,
kws_list, kws_list,
fout: wave.Wave_write = None): fout: wave.Wave_write = None):
data = fin.readframes(self._nframe)
while len(data) >= self.model.size_in:
self.frame_count += self._nframe
for start_index in range(0, frames.shape[0], self._nframe):
end_index = start_index + self._nframe
if end_index > frames.shape[0]:
end_index = frames.shape[0]
data = frames[start_index:end_index, :].tobytes()
result = self.model.forward_decode(data) result = self.model.forward_decode(data)
if fout: if fout:
fout.writeframes(result['pcm']) fout.writeframes(result['pcm'])
if 'kws' in result: if 'kws' in result:
result['kws']['offset'] += self.frame_count / self.SAMPLE_RATE
result['kws']['offset'] += start_index / self.SAMPLE_RATE
kws_list.append(result['kws']) kws_list.append(result['kws'])
data = fin.readframes(self._nframe)


def postprocess(self, inputs: Dict[str, Any], **kwargs) -> Dict[str, Any]: def postprocess(self, inputs: Dict[str, Any], **kwargs) -> Dict[str, Any]:
return inputs return inputs

+ 2
- 0
modelscope/pipelines/cv/action_detection_pipeline.py View File

@@ -1,3 +1,5 @@
# Copyright (c) Alibaba, Inc. and its affiliates.

import math import math
import os.path as osp import os.path as osp
from typing import Any, Dict from typing import Any, Dict


+ 7
- 4
modelscope/pipelines/cv/easycv_pipelines/base.py View File

@@ -10,6 +10,7 @@ from modelscope.hub.snapshot_download import snapshot_download
from modelscope.pipelines.util import is_official_hub_path from modelscope.pipelines.util import is_official_hub_path
from modelscope.utils.config import Config from modelscope.utils.config import Config
from modelscope.utils.constant import DEFAULT_MODEL_REVISION, ModelFile from modelscope.utils.constant import DEFAULT_MODEL_REVISION, ModelFile
from modelscope.utils.device import create_device




class EasyCVPipeline(object): class EasyCVPipeline(object):
@@ -53,16 +54,19 @@ class EasyCVPipeline(object):
), f'Not find "{ModelFile.CONFIGURATION}" in model directory!' ), f'Not find "{ModelFile.CONFIGURATION}" in model directory!'


self.cfg = Config.from_file(self.config_file) self.cfg = Config.from_file(self.config_file)
self.predict_op = self._build_predict_op()
if 'device' in kwargs:
kwargs['device'] = create_device(kwargs['device'])
self.predict_op = self._build_predict_op(**kwargs)


def _build_predict_op(self):
def _build_predict_op(self, **kwargs):
"""Build EasyCV predictor.""" """Build EasyCV predictor."""
from easycv.predictors.builder import build_predictor from easycv.predictors.builder import build_predictor


easycv_config = self._to_easycv_config() easycv_config = self._to_easycv_config()
pipeline_op = build_predictor(self.cfg.pipeline.predictor_config, { pipeline_op = build_predictor(self.cfg.pipeline.predictor_config, {
'model_path': self.model_path, 'model_path': self.model_path,
'config_file': easycv_config
'config_file': easycv_config,
**kwargs
}) })
return pipeline_op return pipeline_op


@@ -91,5 +95,4 @@ class EasyCVPipeline(object):
return easycv_config return easycv_config


def __call__(self, inputs) -> Any: def __call__(self, inputs) -> Any:
# TODO: support image url
return self.predict_op(inputs) return self.predict_op(inputs)

+ 7
- 5
modelscope/pipelines/cv/easycv_pipelines/face_2d_keypoints_pipeline.py View File

@@ -4,7 +4,6 @@ from typing import Any
from modelscope.metainfo import Pipelines from modelscope.metainfo import Pipelines
from modelscope.outputs import OutputKeys from modelscope.outputs import OutputKeys
from modelscope.pipelines.builder import PIPELINES from modelscope.pipelines.builder import PIPELINES
from modelscope.preprocessors import LoadImage
from modelscope.utils.constant import ModelFile, Tasks from modelscope.utils.constant import ModelFile, Tasks
from .base import EasyCVPipeline from .base import EasyCVPipeline


@@ -34,8 +33,11 @@ class Face2DKeypointsPipeline(EasyCVPipeline):
return self.predict_op.show_result(img, points, scale, save_path) return self.predict_op.show_result(img, points, scale, save_path)


def __call__(self, inputs) -> Any: def __call__(self, inputs) -> Any:
output = self.predict_op(inputs)[0][0]
points = output['point']
poses = output['pose']
outputs = self.predict_op(inputs)


return {OutputKeys.KEYPOINTS: points, OutputKeys.POSES: poses}
results = [{
OutputKeys.KEYPOINTS: output['point'],
OutputKeys.POSES: output['pose']
} for output in outputs]

return results

+ 3
- 2
modelscope/pipelines/cv/hand_2d_keypoints_pipeline.py View File

@@ -28,7 +28,7 @@ class Hand2DKeypointsPipeline(EasyCVPipeline):
*args, *args,
**kwargs) **kwargs)


def _build_predict_op(self):
def _build_predict_op(self, **kwargs):
"""Build EasyCV predictor.""" """Build EasyCV predictor."""
from easycv.predictors.builder import build_predictor from easycv.predictors.builder import build_predictor
detection_predictor_type = self.cfg['DETECTION']['type'] detection_predictor_type = self.cfg['DETECTION']['type']
@@ -46,6 +46,7 @@ class Hand2DKeypointsPipeline(EasyCVPipeline):
easycv_config = self._to_easycv_config() easycv_config = self._to_easycv_config()
pipeline_op = build_predictor(self.cfg.pipeline.predictor_config, { pipeline_op = build_predictor(self.cfg.pipeline.predictor_config, {
'model_path': self.model_path, 'model_path': self.model_path,
'config_file': easycv_config
'config_file': easycv_config,
**kwargs
}) })
return pipeline_op return pipeline_op

+ 1
- 0
modelscope/pipelines/cv/image_instance_segmentation_pipeline.py View File

@@ -1,3 +1,4 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
import os import os
from typing import Any, Dict, Optional, Union from typing import Any, Dict, Optional, Union




+ 7
- 1
modelscope/pipelines/cv/image_style_transfer_pipeline.py View File

@@ -61,7 +61,13 @@ class ImageStyleTransferPipeline(Pipeline):
def _sanitize_parameters(self, **pipeline_parameters): def _sanitize_parameters(self, **pipeline_parameters):
return pipeline_parameters, {}, {} return pipeline_parameters, {}, {}


def preprocess(self, content: Input, style: Input) -> Dict[str, Any]:
def preprocess(self,
content: Input,
style: Input = None) -> Dict[str, Any]:
if type(content) is dict: # for demo service
style = content['style']
content = content['content']

content = LoadImage.convert_to_ndarray(content) content = LoadImage.convert_to_ndarray(content)
if len(content.shape) == 2: if len(content.shape) == 2:
content = cv2.cvtColor(content, cv2.COLOR_GRAY2BGR) content = cv2.cvtColor(content, cv2.COLOR_GRAY2BGR)


+ 1
- 0
modelscope/pipelines/cv/movie_scene_segmentation_pipeline.py View File

@@ -1,3 +1,4 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
from typing import Any, Dict from typing import Any, Dict


import torch import torch


+ 1
- 0
modelscope/preprocessors/movie_scene_segmentation/__init__.py View File

@@ -1,3 +1,4 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
from typing import TYPE_CHECKING from typing import TYPE_CHECKING


from modelscope.utils.import_utils import LazyImportModule from modelscope.utils.import_utils import LazyImportModule


+ 2
- 6
modelscope/preprocessors/movie_scene_segmentation/transforms.py View File

@@ -1,9 +1,5 @@
# ------------------------------------------------------------------------------------
# The codes below partially refer to the BaSSL
# Copyright (c) 2021 KakaoBrain. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 [see LICENSE for details]
# Github: https://github.com/kakaobrain/bassl
# ------------------------------------------------------------------------------------
# The implementation here is modified based on BaSSL,
# originally Apache 2.0 License and publicly avaialbe at https://github.com/kakaobrain/bassl
import numbers import numbers
import os.path as osp import os.path as osp
import random import random


+ 2
- 1
modelscope/preprocessors/multi_modal.py View File

@@ -186,7 +186,8 @@ class MPlugPreprocessor(Preprocessor):
image = image.convert('RGB') image = image.convert('RGB')
image = self.patch_resize_transform(image) image = self.patch_resize_transform(image)
question = '' if self.cfg.task == Tasks.image_captioning \ question = '' if self.cfg.task == Tasks.image_captioning \
else data[1 if isinstance(data, tuple) else 'question']
else data[1 if isinstance(data, tuple)
else ('text' if 'text' in data else 'question')]
question = self.tokenizer( question = self.tokenizer(
question.lower(), question.lower(),
padding='max_length', padding='max_length',


+ 1
- 0
modelscope/trainers/cv/image_instance_segmentation_trainer.py View File

@@ -1,3 +1,4 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
from modelscope.metainfo import Trainers from modelscope.metainfo import Trainers
from modelscope.trainers.builder import TRAINERS from modelscope.trainers.builder import TRAINERS
from modelscope.trainers.trainer import EpochBasedTrainer from modelscope.trainers.trainer import EpochBasedTrainer


+ 1
- 0
modelscope/trainers/cv/movie_scene_segmentation_trainer.py View File

@@ -1,3 +1,4 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
from modelscope.metainfo import Trainers from modelscope.metainfo import Trainers
from modelscope.trainers.builder import TRAINERS from modelscope.trainers.builder import TRAINERS
from modelscope.trainers.trainer import EpochBasedTrainer from modelscope.trainers.trainer import EpochBasedTrainer


+ 20
- 10
modelscope/trainers/trainer.py View File

@@ -37,8 +37,8 @@ from modelscope.utils.device import create_device, verify_device
from modelscope.utils.file_utils import func_receive_dict_inputs from modelscope.utils.file_utils import func_receive_dict_inputs
from modelscope.utils.logger import get_logger from modelscope.utils.logger import get_logger
from modelscope.utils.registry import build_from_cfg from modelscope.utils.registry import build_from_cfg
from modelscope.utils.torch_utils import (get_dist_info, init_dist,
set_random_seed)
from modelscope.utils.torch_utils import (get_dist_info, get_local_rank,
init_dist, set_random_seed)
from .base import BaseTrainer from .base import BaseTrainer
from .builder import TRAINERS from .builder import TRAINERS
from .default_config import DEFAULT_CONFIG from .default_config import DEFAULT_CONFIG
@@ -155,8 +155,17 @@ class EpochBasedTrainer(BaseTrainer):
if self.eval_preprocessor is not None: if self.eval_preprocessor is not None:
self.eval_preprocessor.mode = ModeKeys.EVAL self.eval_preprocessor.mode = ModeKeys.EVAL


if kwargs.get('launcher', None) is not None:
init_dist(kwargs['launcher'])

_, world_size = get_dist_info()
self._dist = world_size > 1

device_name = kwargs.get('device', 'gpu') device_name = kwargs.get('device', 'gpu')
verify_device(device_name)
if self._dist:
local_rank = get_local_rank()
device_name = f'cuda:{local_rank}'

self.device = create_device(device_name) self.device = create_device(device_name)


self.train_dataset = self.to_task_dataset( self.train_dataset = self.to_task_dataset(
@@ -219,11 +228,6 @@ class EpochBasedTrainer(BaseTrainer):


self.use_fp16 = kwargs.get('use_fp16', False) self.use_fp16 = kwargs.get('use_fp16', False)


if kwargs.get('launcher', None) is not None:
init_dist(kwargs['launcher'])

self._dist = get_dist_info()[1] > 1

# model placement # model placement
if self.device.type == 'cuda': if self.device.type == 'cuda':
self.model.to(self.device) self.model.to(self.device)
@@ -532,8 +536,14 @@ class EpochBasedTrainer(BaseTrainer):
model.train() model.train()
self._mode = ModeKeys.TRAIN self._mode = ModeKeys.TRAIN
# call model forward but not __call__ to skip postprocess # call model forward but not __call__ to skip postprocess
if isinstance(inputs,
Mapping) and not func_receive_dict_inputs(model.forward):

if is_parallel(model):
receive_dict_inputs = func_receive_dict_inputs(
model.module.forward)
else:
receive_dict_inputs = func_receive_dict_inputs(model.forward)

if isinstance(inputs, Mapping) and not receive_dict_inputs:
train_outputs = model.forward(**inputs) train_outputs = model.forward(**inputs)
else: else:
train_outputs = model.forward(inputs) train_outputs = model.forward(inputs)


+ 6
- 6
modelscope/trainers/utils/inference.py View File

@@ -9,9 +9,9 @@ from collections.abc import Mapping


import torch import torch
from torch import distributed as dist from torch import distributed as dist
from torch.nn.parallel import DistributedDataParallel
from tqdm import tqdm from tqdm import tqdm


from modelscope.trainers.parallel.utils import is_parallel
from modelscope.utils.data_utils import to_device from modelscope.utils.data_utils import to_device
from modelscope.utils.file_utils import func_receive_dict_inputs from modelscope.utils.file_utils import func_receive_dict_inputs
from modelscope.utils.torch_utils import (broadcast, get_dist_info, is_master, from modelscope.utils.torch_utils import (broadcast, get_dist_info, is_master,
@@ -138,7 +138,10 @@ def multi_gpu_test(model,
data_len = data_loader_iters_per_gpu * world_size data_len = data_loader_iters_per_gpu * world_size
desc = 'Total test iterations with multi gpus' desc = 'Total test iterations with multi gpus'


time.sleep(2) # This line can prevent deadlock problem in some cases.
if is_parallel(model):
receive_dict_inputs = func_receive_dict_inputs(model.module.forward)
else:
receive_dict_inputs = func_receive_dict_inputs(model.forward)


count = 0 count = 0
with tqdm(total=data_len, desc=desc) as pbar: with tqdm(total=data_len, desc=desc) as pbar:
@@ -146,10 +149,7 @@ def multi_gpu_test(model,
data = to_device(data, device) data = to_device(data, device)
data_list.append(data) data_list.append(data)
with torch.no_grad(): with torch.no_grad():
forward_func = model.module.forward if \
isinstance(model, DistributedDataParallel) else model.forward
if isinstance(data, Mapping
) and not func_receive_dict_inputs(forward_func):
if isinstance(data, Mapping) and not receive_dict_inputs:
result = model.forward(**data) result = model.forward(**data)
else: else:
result = model.forward(data) result = model.forward(data)


+ 53
- 9
modelscope/utils/demo_utils.py View File

@@ -123,7 +123,7 @@ INPUT_EXAMPLES = {
'urlPaths': { 'urlPaths': {
'outUrls': [{ 'outUrls': [{
'outputKey': OutputKeys.OUTPUT_PCM, 'outputKey': OutputKeys.OUTPUT_PCM,
'fileType': 'wav'
'fileType': 'pcm'
}] }]
} }
}, },
@@ -134,7 +134,7 @@ INPUT_EXAMPLES = {
'urlPaths': { 'urlPaths': {
'outUrls': [{ 'outUrls': [{
'outputKey': OutputKeys.OUTPUT_PCM, 'outputKey': OutputKeys.OUTPUT_PCM,
'fileType': 'wav'
'fileType': 'pcm'
}] }]
} }
}, },
@@ -147,7 +147,13 @@ INPUT_EXAMPLES = {
'http://xingchen-data.oss-cn-zhangjiakou.aliyuncs.com/maas/visual-grounding/visual_grounding.png', 'http://xingchen-data.oss-cn-zhangjiakou.aliyuncs.com/maas/visual-grounding/visual_grounding.png',
'a blue turtle-like pokemon with round head' 'a blue turtle-like pokemon with round head'
], ],
'urlPaths': {}
'urlPaths': {
'inUrls': [{
'name': 'image'
}, {
'name': 'text'
}]
}
}, },
TasksIODescriptions.visual_question_answering: { TasksIODescriptions.visual_question_answering: {
'task': 'task':
@@ -156,7 +162,16 @@ INPUT_EXAMPLES = {
'http://225252-file.oss-cn-hangzhou-zmf.aliyuncs.com/maas_demo/visual_question_answering.png', 'http://225252-file.oss-cn-hangzhou-zmf.aliyuncs.com/maas_demo/visual_question_answering.png',
'what is grown on the plant?' 'what is grown on the plant?'
], ],
'urlPaths': {}
'urlPaths': {
'inUrls': [{
'name': 'image'
}, {
'name': 'text'
}],
'outUrls': [{
'outputKey': 'text'
}]
}
}, },
TasksIODescriptions.visual_entailment: { TasksIODescriptions.visual_entailment: {
'task': 'task':
@@ -165,7 +180,14 @@ INPUT_EXAMPLES = {
'http://xingchen-data.oss-cn-zhangjiakou.aliyuncs.com/maas/visual-entailment/visual_entailment.jpg', 'http://xingchen-data.oss-cn-zhangjiakou.aliyuncs.com/maas/visual-entailment/visual_entailment.jpg',
'there are two birds.', 'test' 'there are two birds.', 'test'
], ],
'urlPaths': {}
'urlPaths': {
'inUrls': [{
'name': 'image'
}, {
'name': 'text'
}],
'outUrls': [{}]
}
}, },
TasksIODescriptions.generative_multi_modal_embedding: { TasksIODescriptions.generative_multi_modal_embedding: {
'task': 'task':
@@ -174,7 +196,14 @@ INPUT_EXAMPLES = {
'http://clip-multimodal.oss-cn-beijing.aliyuncs.com/lingchen/demo/dogs.jpg', 'http://clip-multimodal.oss-cn-beijing.aliyuncs.com/lingchen/demo/dogs.jpg',
'dogs playing in the grass' 'dogs playing in the grass'
], ],
'urlPaths': {}
'urlPaths': {
'inUrls': [{
'name': 'image'
}, {
'name': 'text'
}],
'outUrls': [{}]
}
}, },
} }


@@ -192,7 +221,13 @@ class DemoCompatibilityCheck(object):
print('testing demo: ', self.task, self.model_id) print('testing demo: ', self.task, self.model_id)
test_pipline = pipeline(self.task, self.model_id) test_pipline = pipeline(self.task, self.model_id)
req = INPUT_EXAMPLES[TASKS_INPUT_TEMPLATES[self.task]] req = INPUT_EXAMPLES[TASKS_INPUT_TEMPLATES[self.task]]
output = test_pipline(preprocess(req))
inputs = preprocess(req)
params = req.get('parameters', {})
# modelscope inference
if params != {}:
output = test_pipline(inputs, **params)
else:
output = test_pipline(inputs)
json.dumps(output, cls=NumpyEncoder) json.dumps(output, cls=NumpyEncoder)
result = postprocess(req, output) result = postprocess(req, output)
print(result) print(result)
@@ -215,11 +250,21 @@ class NumpyEncoder(json.JSONEncoder):




def preprocess(req): def preprocess(req):
in_urls = req.get('urlPaths').get('inUrls')
if len(req['inputs']) == 1: if len(req['inputs']) == 1:
inputs = req['inputs'][0] inputs = req['inputs'][0]
else: else:
inputs = tuple(req['inputs']) inputs = tuple(req['inputs'])
return inputs
if in_urls is None or len(in_urls) == 0:
return inputs

inputs_dict = {}
for i, in_url in enumerate(in_urls):
input_name = in_url.get('name')
if input_name is None or input_name == '':
return inputs
inputs_dict[input_name] = req['inputs'][i]
return inputs_dict




def postprocess(req, resp): def postprocess(req, resp):
@@ -242,4 +287,3 @@ def postprocess(req, resp):
out_mem_file = io.BytesIO() out_mem_file = io.BytesIO()
out_mem_file.write(new_resp.get(output_key)) out_mem_file.write(new_resp.get(output_key))
return type(out_mem_file) return type(out_mem_file)
# TODO(lingcai.wl): support more file type

+ 4
- 0
modelscope/utils/torch_utils.py View File

@@ -115,6 +115,10 @@ def get_dist_info() -> Tuple[int, int]:
return rank, world_size return rank, world_size




def get_local_rank():
return int(os.environ.get('LOCAL_RANK', 0))


def is_master(): def is_master():
rank, _ = get_dist_info() rank, _ = get_dist_info()
return rank == 0 return rank == 0


+ 1
- 1
modelscope/version.py View File

@@ -1 +1 @@
__version__ = '0.4.3'
__version__ = '0.4.4'

+ 1
- 1
requirements/cv.txt View File

@@ -14,7 +14,7 @@ mmcls>=0.21.0
mmdet>=2.25.0 mmdet>=2.25.0
networkx>=2.5 networkx>=2.5
onnxruntime>=1.10 onnxruntime>=1.10
pai-easycv>=0.6.0
pai-easycv>=0.6.3.4
pandas pandas
psutil psutil
regex regex


+ 40
- 19
tests/pipelines/easycv_pipelines/test_segmentation_pipeline.py View File

@@ -1,10 +1,11 @@
# Copyright (c) Alibaba, Inc. and its affiliates. # Copyright (c) Alibaba, Inc. and its affiliates.
import unittest import unittest
from distutils.version import LooseVersion


import easycv
import numpy as np import numpy as np
from PIL import Image from PIL import Image


from modelscope.metainfo import Pipelines
from modelscope.pipelines import pipeline from modelscope.pipelines import pipeline
from modelscope.utils.constant import Tasks from modelscope.utils.constant import Tasks
from modelscope.utils.test_utils import test_level from modelscope.utils.test_utils import test_level
@@ -14,7 +15,7 @@ class EasyCVSegmentationPipelineTest(unittest.TestCase):


img_path = 'data/test/images/image_segmentation.jpg' img_path = 'data/test/images/image_segmentation.jpg'


def _internal_test__(self, model_id):
def _internal_test_(self, model_id):
img = np.asarray(Image.open(self.img_path)) img = np.asarray(Image.open(self.img_path))


semantic_seg = pipeline(task=Tasks.image_segmentation, model=model_id) semantic_seg = pipeline(task=Tasks.image_segmentation, model=model_id)
@@ -24,41 +25,61 @@ class EasyCVSegmentationPipelineTest(unittest.TestCase):


results = outputs[0] results = outputs[0]
self.assertListEqual( self.assertListEqual(
list(img.shape)[:2], list(results['seg_pred'][0].shape))
self.assertListEqual(results['seg_pred'][0][1, 4:10].tolist(),
[161 for i in range(6)])
self.assertListEqual(results['seg_pred'][0][-1, -10:].tolist(),
[133 for i in range(10)])
list(img.shape)[:2], list(results['seg_pred'].shape))


@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
def _internal_test_batch_(self, model_id, num_samples=2, batch_size=2):
# TODO: support in the future
img = np.asarray(Image.open(self.img_path))
num_samples = num_samples
batch_size = batch_size
semantic_seg = pipeline(
task=Tasks.image_segmentation,
model=model_id,
batch_size=batch_size)
outputs = semantic_seg([self.img_path] * num_samples)

self.assertEqual(semantic_seg.predict_op.batch_size, batch_size)
self.assertEqual(len(outputs), num_samples)

for output in outputs:
self.assertListEqual(
list(img.shape)[:2], list(output['seg_pred'].shape))

@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
def test_segformer_b0(self): def test_segformer_b0(self):
model_id = 'damo/cv_segformer-b0_image_semantic-segmentation_coco-stuff164k' model_id = 'damo/cv_segformer-b0_image_semantic-segmentation_coco-stuff164k'
self._internal_test__(model_id)
self._internal_test_(model_id)
self._internal_test_batch_(model_id)


@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
def test_segformer_b1(self): def test_segformer_b1(self):
model_id = 'damo/cv_segformer-b1_image_semantic-segmentation_coco-stuff164k' model_id = 'damo/cv_segformer-b1_image_semantic-segmentation_coco-stuff164k'
self._internal_test__(model_id)
self._internal_test_(model_id)
self._internal_test_batch_(model_id)


@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
def test_segformer_b2(self): def test_segformer_b2(self):
model_id = 'damo/cv_segformer-b2_image_semantic-segmentation_coco-stuff164k' model_id = 'damo/cv_segformer-b2_image_semantic-segmentation_coco-stuff164k'
self._internal_test__(model_id)
self._internal_test_(model_id)
self._internal_test_batch_(model_id)


@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
def test_segformer_b3(self): def test_segformer_b3(self):
model_id = 'damo/cv_segformer-b3_image_semantic-segmentation_coco-stuff164k' model_id = 'damo/cv_segformer-b3_image_semantic-segmentation_coco-stuff164k'
self._internal_test__(model_id)
self._internal_test_(model_id)
self._internal_test_batch_(model_id)


@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
def test_segformer_b4(self): def test_segformer_b4(self):
model_id = 'damo/cv_segformer-b4_image_semantic-segmentation_coco-stuff164k' model_id = 'damo/cv_segformer-b4_image_semantic-segmentation_coco-stuff164k'
self._internal_test__(model_id)
self._internal_test_(model_id)
self._internal_test_batch_(model_id)


@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
def test_segformer_b5(self): def test_segformer_b5(self):
model_id = 'damo/cv_segformer-b5_image_semantic-segmentation_coco-stuff164k' model_id = 'damo/cv_segformer-b5_image_semantic-segmentation_coco-stuff164k'
self._internal_test__(model_id)
self._internal_test_(model_id)
self._internal_test_batch_(model_id)




if __name__ == '__main__': if __name__ == '__main__':


+ 1
- 1
tests/pipelines/test_face_2d_keypoints.py View File

@@ -18,7 +18,7 @@ class EasyCVFace2DKeypointsPipelineTest(unittest.TestCase):


face_2d_keypoints_align = pipeline( face_2d_keypoints_align = pipeline(
task=Tasks.face_2d_keypoints, model=model_id) task=Tasks.face_2d_keypoints, model=model_id)
output = face_2d_keypoints_align(img_path)
output = face_2d_keypoints_align(img_path)[0]


output_keypoints = output[OutputKeys.KEYPOINTS] output_keypoints = output[OutputKeys.KEYPOINTS]
output_pose = output[OutputKeys.POSES] output_pose = output[OutputKeys.POSES]


+ 11
- 0
tests/pipelines/test_key_word_spotting_farfield.py View File

@@ -8,6 +8,7 @@ from modelscope.utils.constant import Tasks
from modelscope.utils.test_utils import test_level from modelscope.utils.test_utils import test_level


TEST_SPEECH_FILE = 'data/test/audios/3ch_nihaomiya.wav' TEST_SPEECH_FILE = 'data/test/audios/3ch_nihaomiya.wav'
TEST_SPEECH_FILE_MONO = 'data/test/audios/1ch_nihaomiya.wav'
TEST_SPEECH_URL = 'https://modelscope.cn/api/v1/models/damo/' \ TEST_SPEECH_URL = 'https://modelscope.cn/api/v1/models/damo/' \
'speech_dfsmn_kws_char_farfield_16k_nihaomiya/repo' \ 'speech_dfsmn_kws_char_farfield_16k_nihaomiya/repo' \
'?Revision=master&FilePath=examples/3ch_nihaomiya.wav' '?Revision=master&FilePath=examples/3ch_nihaomiya.wav'
@@ -26,6 +27,16 @@ class KWSFarfieldTest(unittest.TestCase):
self.assertEqual(len(result['kws_list']), 5) self.assertEqual(len(result['kws_list']), 5)
print(result['kws_list'][-1]) print(result['kws_list'][-1])


@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
def test_mono(self):
kws = pipeline(Tasks.keyword_spotting, model=self.model_id)
inputs = {
'input_file': os.path.join(os.getcwd(), TEST_SPEECH_FILE_MONO)
}
result = kws(inputs)
self.assertEqual(len(result['kws_list']), 5)
print(result['kws_list'][-1])

@unittest.skipUnless(test_level() >= 0, 'skip test in current test level') @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
def test_url(self): def test_url(self):
kws = pipeline(Tasks.keyword_spotting, model=self.model_id) kws = pipeline(Tasks.keyword_spotting, model=self.model_id)


+ 8
- 8
tests/pipelines/test_mplug_tasks.py View File

@@ -44,8 +44,8 @@ class MplugTasksTest(unittest.TestCase, DemoCompatibilityCheck):
'damo/mplug_visual-question-answering_coco_large_en') 'damo/mplug_visual-question-answering_coco_large_en')
pipeline_vqa = pipeline(Tasks.visual_question_answering, model=model) pipeline_vqa = pipeline(Tasks.visual_question_answering, model=model)
image = Image.open('data/test/images/image_mplug_vqa.jpg') image = Image.open('data/test/images/image_mplug_vqa.jpg')
question = 'What is the woman doing?'
input = {'image': image, 'question': question}
text = 'What is the woman doing?'
input = {'image': image, 'text': text}
result = pipeline_vqa(input) result = pipeline_vqa(input)
print(result) print(result)


@@ -54,8 +54,8 @@ class MplugTasksTest(unittest.TestCase, DemoCompatibilityCheck):
model = 'damo/mplug_visual-question-answering_coco_large_en' model = 'damo/mplug_visual-question-answering_coco_large_en'
pipeline_vqa = pipeline(Tasks.visual_question_answering, model=model) pipeline_vqa = pipeline(Tasks.visual_question_answering, model=model)
image = Image.open('data/test/images/image_mplug_vqa.jpg') image = Image.open('data/test/images/image_mplug_vqa.jpg')
question = 'What is the woman doing?'
input = {'image': image, 'question': question}
text = 'What is the woman doing?'
input = {'image': image, 'text': text}
result = pipeline_vqa(input) result = pipeline_vqa(input)
print(result) print(result)


@@ -65,8 +65,8 @@ class MplugTasksTest(unittest.TestCase, DemoCompatibilityCheck):
'damo/mplug_image-text-retrieval_flickr30k_large_en') 'damo/mplug_image-text-retrieval_flickr30k_large_en')
pipeline_retrieval = pipeline(Tasks.image_text_retrieval, model=model) pipeline_retrieval = pipeline(Tasks.image_text_retrieval, model=model)
image = Image.open('data/test/images/image-text-retrieval.jpg') image = Image.open('data/test/images/image-text-retrieval.jpg')
question = 'Two young guys with shaggy hair look at their hands while hanging out in the yard.'
input = {'image': image, 'question': question}
text = 'Two young guys with shaggy hair look at their hands while hanging out in the yard.'
input = {'image': image, 'text': text}
result = pipeline_retrieval(input) result = pipeline_retrieval(input)
print(result) print(result)


@@ -75,8 +75,8 @@ class MplugTasksTest(unittest.TestCase, DemoCompatibilityCheck):
model = 'damo/mplug_image-text-retrieval_flickr30k_large_en' model = 'damo/mplug_image-text-retrieval_flickr30k_large_en'
pipeline_retrieval = pipeline(Tasks.image_text_retrieval, model=model) pipeline_retrieval = pipeline(Tasks.image_text_retrieval, model=model)
image = Image.open('data/test/images/image-text-retrieval.jpg') image = Image.open('data/test/images/image-text-retrieval.jpg')
question = 'Two young guys with shaggy hair look at their hands while hanging out in the yard.'
input = {'image': image, 'question': question}
text = 'Two young guys with shaggy hair look at their hands while hanging out in the yard.'
input = {'image': image, 'text': text}
result = pipeline_retrieval(input) result = pipeline_retrieval(input)
print(result) print(result)




+ 6
- 4
tests/pipelines/test_ofa_tasks.py View File

@@ -147,8 +147,10 @@ class OfaTasksTest(unittest.TestCase, DemoCompatibilityCheck):
result = ofa_pipe(input) result = ofa_pipe(input)
print(result) print(result)
image_name = image.split('/')[-2] image_name = image.split('/')[-2]
self.save_img(image, result[OutputKeys.BOXES],
osp.join('large_en_model_' + image_name + '.png'))
self.save_img(
image,
result[OutputKeys.BOXES][0], # just one box
osp.join('large_en_model_' + image_name + '.png'))


@unittest.skipUnless(test_level() >= 1, 'skip test in current test level') @unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
def test_run_with_visual_grounding_with_name(self): def test_run_with_visual_grounding_with_name(self):
@@ -161,7 +163,7 @@ class OfaTasksTest(unittest.TestCase, DemoCompatibilityCheck):
result = ofa_pipe(input) result = ofa_pipe(input)
print(result) print(result)
image_name = image.split('/')[-2] image_name = image.split('/')[-2]
self.save_img(image, result[OutputKeys.BOXES],
self.save_img(image, result[OutputKeys.BOXES][0],
osp.join('large_en_name_' + image_name + '.png')) osp.join('large_en_name_' + image_name + '.png'))


@unittest.skipUnless(test_level() >= 0, 'skip test in current test level') @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
@@ -174,7 +176,7 @@ class OfaTasksTest(unittest.TestCase, DemoCompatibilityCheck):
result = ofa_pipe(input) result = ofa_pipe(input)
print(result) print(result)
image_name = image.split('/')[-1] image_name = image.split('/')[-1]
self.save_img(image, result[OutputKeys.BOXES],
self.save_img(image, result[OutputKeys.BOXES][0],
osp.join('large_zh_name_' + image_name)) osp.join('large_zh_name_' + image_name))


@unittest.skipUnless(test_level() >= 1, 'skip test in current test level') @unittest.skipUnless(test_level() >= 1, 'skip test in current test level')


+ 1
- 0
tests/run_config.yaml View File

@@ -9,6 +9,7 @@ isolated: # test cases that may require excessive anmount of GPU memory, which
- test_image_super_resolution.py - test_image_super_resolution.py
- test_easycv_trainer.py - test_easycv_trainer.py
- test_segformer.py - test_segformer.py
- test_segmentation_pipeline.py


envs: envs:
default: # default env, case not in other env will in default, pytorch. default: # default env, case not in other env will in default, pytorch.


+ 32
- 2
tests/trainers/test_trainer_gpu.py View File

@@ -53,7 +53,18 @@ class DummyModel(nn.Module, Model):
return dict(logits=x, loss=loss) return dict(logits=x, loss=loss)




def train_func(work_dir, dist=False, iterable_dataset=False, **kwargs):
class DummyModelForwardInputs(DummyModel):

def forward(self, inputs):
feat, labels = inputs['feat'], inputs['labels']
return super().forward(feat, labels)


def train_func(work_dir,
dist=False,
iterable_dataset=False,
forward_inputs=False,
**kwargs):
json_cfg = { json_cfg = {
'task': Tasks.image_classification, 'task': Tasks.image_classification,
'train': { 'train': {
@@ -81,7 +92,10 @@ def train_func(work_dir, dist=False, iterable_dataset=False, **kwargs):
with open(config_path, 'w') as f: with open(config_path, 'w') as f:
json.dump(json_cfg, f) json.dump(json_cfg, f)


model = DummyModel()
if forward_inputs:
model = DummyModelForwardInputs()
else:
model = DummyModel()
optimmizer = SGD(model.parameters(), lr=0.01) optimmizer = SGD(model.parameters(), lr=0.01)
lr_scheduler = StepLR(optimmizer, 2) lr_scheduler = StepLR(optimmizer, 2)
trainer_name = Trainers.default trainer_name = Trainers.default
@@ -273,6 +287,22 @@ class TrainerTestMultiGpus(DistributedTestCase):
for i in [1, 3, 5]: for i in [1, 3, 5]:
self.assertIn(MetricKeys.ACCURACY, lines[i]) self.assertIn(MetricKeys.ACCURACY, lines[i])


@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
def test_multi_gpus_forward_inputs(self):
self.start(
train_func,
num_gpus=2,
work_dir=self.tmp_dir,
dist=True,
forward_inputs=True)

results_files = os.listdir(self.tmp_dir)
json_files = glob.glob(os.path.join(self.tmp_dir, '*.log.json'))
self.assertEqual(len(json_files), 1)
self.assertIn(f'{LogKeys.EPOCH}_1.pth', results_files)
self.assertIn(f'{LogKeys.EPOCH}_2.pth', results_files)
self.assertIn(f'{LogKeys.EPOCH}_3.pth', results_files)

# TODO: support iters_per_epoch for dist mode # TODO: support iters_per_epoch for dist mode
@unittest.skipIf(True, 'need to adapt to DistributedSampler') @unittest.skipIf(True, 'need to adapt to DistributedSampler')
def test_multi_gpus_with_iters_per_epoch(self): def test_multi_gpus_with_iters_per_epoch(self):


Loading…
Cancel
Save