diff --git a/data/test/audios/1ch_nihaomiya.wav b/data/test/audios/1ch_nihaomiya.wav new file mode 100644 index 00000000..4618d412 --- /dev/null +++ b/data/test/audios/1ch_nihaomiya.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f7f5a0a4efca1e83463cb44460c66b56fb7cd673eb6da37924637bc05ef758d +size 1440044 diff --git a/modelscope/metrics/image_instance_segmentation_metric.py b/modelscope/metrics/image_instance_segmentation_metric.py index 7deafbce..86a19d13 100644 --- a/modelscope/metrics/image_instance_segmentation_metric.py +++ b/modelscope/metrics/image_instance_segmentation_metric.py @@ -1,3 +1,5 @@ +# Part of the implementation is borrowed and modified from MMDetection, publicly available at +# https://github.com/open-mmlab/mmdetection/blob/master/mmdet/datasets/coco.py import os.path as osp import tempfile from collections import OrderedDict diff --git a/modelscope/metrics/movie_scene_segmentation_metric.py b/modelscope/metrics/movie_scene_segmentation_metric.py index 56bdbd1c..65725b6f 100644 --- a/modelscope/metrics/movie_scene_segmentation_metric.py +++ b/modelscope/metrics/movie_scene_segmentation_metric.py @@ -1,3 +1,5 @@ +# The implementation here is modified based on BaSSL, +# originally Apache 2.0 License and publicly available at https://github.com/kakaobrain/bassl from typing import Dict import numpy as np diff --git a/modelscope/models/cv/image_instance_segmentation/backbones/swin_transformer.py b/modelscope/models/cv/image_instance_segmentation/backbones/swin_transformer.py index 3e7609e1..2007688d 100644 --- a/modelscope/models/cv/image_instance_segmentation/backbones/swin_transformer.py +++ b/modelscope/models/cv/image_instance_segmentation/backbones/swin_transformer.py @@ -1,5 +1,5 @@ -# Modified from: https://github.com/microsoft/Swin-Transformer/blob/main/models/swin_transformer.py - +# The implementation is adopted from Swin Transformer, made publicly available under the MIT License at +# https://github.com/microsoft/Swin-Transformer/blob/main/models/swin_transformer.py import numpy as np import torch import torch.nn as nn diff --git a/modelscope/models/cv/image_instance_segmentation/cascade_mask_rcnn_swin.py b/modelscope/models/cv/image_instance_segmentation/cascade_mask_rcnn_swin.py index 30e70f82..ff83271e 100644 --- a/modelscope/models/cv/image_instance_segmentation/cascade_mask_rcnn_swin.py +++ b/modelscope/models/cv/image_instance_segmentation/cascade_mask_rcnn_swin.py @@ -1,3 +1,5 @@ +# Part of the implementation is borrowed and modified from MMDetection, publicly available at +# https://github.com/open-mmlab/mmdetection/blob/master/mmdet/models/detectors/two_stage.py import os from collections import OrderedDict diff --git a/modelscope/models/cv/image_instance_segmentation/datasets/__init__.py b/modelscope/models/cv/image_instance_segmentation/datasets/__init__.py index cca1432f..1b096fb3 100644 --- a/modelscope/models/cv/image_instance_segmentation/datasets/__init__.py +++ b/modelscope/models/cv/image_instance_segmentation/datasets/__init__.py @@ -1 +1,2 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. from .transforms import build_preprocess_transform diff --git a/modelscope/models/cv/image_instance_segmentation/datasets/transforms.py b/modelscope/models/cv/image_instance_segmentation/datasets/transforms.py index c2c11286..f0dde759 100644 --- a/modelscope/models/cv/image_instance_segmentation/datasets/transforms.py +++ b/modelscope/models/cv/image_instance_segmentation/datasets/transforms.py @@ -1,3 +1,4 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. import os.path as osp import numpy as np @@ -51,9 +52,9 @@ class LoadImageFromFile: """Load an image from file. Required keys are "img_prefix" and "img_info" (a dict that must contain the - key "filename"). Added or updated keys are "filename", "img", "img_shape", - "ori_shape" (same as `img_shape`), "pad_shape" (same as `img_shape`), - "scale_factor" (1.0) and "img_norm_cfg" (means=0 and stds=1). + key "filename", "ann_file", and "classes"). Added or updated keys are + "filename", "ori_filename", "img", "img_shape", "ori_shape" (same as `img_shape`), + "img_fields", "ann_file" (path to annotation file) and "classes". Args: to_float32 (bool): Whether to convert the loaded image to a float32 @@ -73,7 +74,7 @@ class LoadImageFromFile: """Call functions to load image and get image meta information. Args: - results (dict): Result dict from :obj:`ImageInstanceSegmentationDataset`. + results (dict): Result dict from :obj:`ImageInstanceSegmentationCocoDataset`. Returns: dict: The dict contains loaded image and meta information. diff --git a/modelscope/models/cv/image_instance_segmentation/model.py b/modelscope/models/cv/image_instance_segmentation/model.py index 2be59623..a56a1608 100644 --- a/modelscope/models/cv/image_instance_segmentation/model.py +++ b/modelscope/models/cv/image_instance_segmentation/model.py @@ -1,3 +1,4 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. import os from typing import Any, Dict diff --git a/modelscope/models/cv/image_instance_segmentation/postprocess_utils.py b/modelscope/models/cv/image_instance_segmentation/postprocess_utils.py index 531e2efd..6058cd73 100644 --- a/modelscope/models/cv/image_instance_segmentation/postprocess_utils.py +++ b/modelscope/models/cv/image_instance_segmentation/postprocess_utils.py @@ -1,3 +1,5 @@ +# Part of the implementation is borrowed and modified from MMDetection, publicly available at +# https://github.com/open-mmlab/mmdetection/blob/master/mmdet/core/visualization/image.py import itertools import cv2 diff --git a/modelscope/models/cv/movie_scene_segmentation/model.py b/modelscope/models/cv/movie_scene_segmentation/model.py index e9576963..676b5ac1 100644 --- a/modelscope/models/cv/movie_scene_segmentation/model.py +++ b/modelscope/models/cv/movie_scene_segmentation/model.py @@ -1,3 +1,6 @@ +# The implementation here is modified based on BaSSL, +# originally Apache 2.0 License and publicly avaialbe at https://github.com/kakaobrain/bassl + import os import os.path as osp from typing import Any, Dict diff --git a/modelscope/models/cv/movie_scene_segmentation/utils/__init__.py b/modelscope/models/cv/movie_scene_segmentation/utils/__init__.py index 3682726f..e5a929aa 100644 --- a/modelscope/models/cv/movie_scene_segmentation/utils/__init__.py +++ b/modelscope/models/cv/movie_scene_segmentation/utils/__init__.py @@ -1,3 +1,4 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. from .save_op import get_pred_boundary, pred2scene, scene2video from .shot_encoder import resnet50 from .trn import TransformerCRN diff --git a/modelscope/models/cv/movie_scene_segmentation/utils/head.py b/modelscope/models/cv/movie_scene_segmentation/utils/head.py index 20a87e66..d6468c53 100644 --- a/modelscope/models/cv/movie_scene_segmentation/utils/head.py +++ b/modelscope/models/cv/movie_scene_segmentation/utils/head.py @@ -1,9 +1,5 @@ -# ------------------------------------------------------------------------------------ -# BaSSL -# Copyright (c) 2021 KakaoBrain. All Rights Reserved. -# Licensed under the Apache License, Version 2.0 [see LICENSE for details] -# Github: https://github.com/kakaobrain/bassl -# ------------------------------------------------------------------------------------ +# The implementation here is modified based on BaSSL, +# originally Apache 2.0 License and publicly avaialbe at https://github.com/kakaobrain/bassl import torch.nn as nn import torch.nn.functional as F diff --git a/modelscope/models/cv/movie_scene_segmentation/utils/save_op.py b/modelscope/models/cv/movie_scene_segmentation/utils/save_op.py index d7c8c0ed..cf26d21a 100644 --- a/modelscope/models/cv/movie_scene_segmentation/utils/save_op.py +++ b/modelscope/models/cv/movie_scene_segmentation/utils/save_op.py @@ -1,7 +1,5 @@ -# ---------------------------------------------------------------------------------- -# The codes below partially refer to the SceneSeg LGSS. -# Github: https://github.com/AnyiRao/SceneSeg -# ---------------------------------------------------------------------------------- +# The implementation here is modified based on SceneSeg, +# originally Apache 2.0 License and publicly avaialbe at https://github.com/AnyiRao/SceneSeg import os import os.path as osp import subprocess diff --git a/modelscope/models/cv/movie_scene_segmentation/utils/shot_encoder.py b/modelscope/models/cv/movie_scene_segmentation/utils/shot_encoder.py index 7ad1907f..11d20b13 100644 --- a/modelscope/models/cv/movie_scene_segmentation/utils/shot_encoder.py +++ b/modelscope/models/cv/movie_scene_segmentation/utils/shot_encoder.py @@ -1,6 +1,4 @@ -""" -Modified from original implementation in torchvision -""" +# The implementation is adopted from torchvision from typing import Any, Callable, List, Optional, Type, Union diff --git a/modelscope/models/cv/object_detection/mmdet_model.py b/modelscope/models/cv/object_detection/mmdet_model.py index 7bf81349..485d440a 100644 --- a/modelscope/models/cv/object_detection/mmdet_model.py +++ b/modelscope/models/cv/object_detection/mmdet_model.py @@ -1,3 +1,4 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. import os.path as osp import numpy as np diff --git a/modelscope/models/cv/object_detection/mmdet_ms/__init__.py b/modelscope/models/cv/object_detection/mmdet_ms/__init__.py index 2e47ce76..3a1fdd0b 100644 --- a/modelscope/models/cv/object_detection/mmdet_ms/__init__.py +++ b/modelscope/models/cv/object_detection/mmdet_ms/__init__.py @@ -1,3 +1,5 @@ +# Implementation in this file is modified based on ViTAE-Transformer +# Originally Apache 2.0 License and publicly avaialbe at https://github.com/ViTAE-Transformer/ViTDet from .backbones import ViT from .dense_heads import AnchorNHead, RPNNHead from .necks import FPNF diff --git a/modelscope/models/cv/object_detection/mmdet_ms/backbones/__init__.py b/modelscope/models/cv/object_detection/mmdet_ms/backbones/__init__.py index 3b34dad6..c0697d48 100644 --- a/modelscope/models/cv/object_detection/mmdet_ms/backbones/__init__.py +++ b/modelscope/models/cv/object_detection/mmdet_ms/backbones/__init__.py @@ -1,3 +1,5 @@ +# Implementation in this file is modified based on ViTAE-Transformer +# Originally Apache 2.0 License and publicly avaialbe at https://github.com/ViTAE-Transformer/ViTDet from .vit import ViT __all__ = ['ViT'] diff --git a/modelscope/models/cv/object_detection/mmdet_ms/dense_heads/__init__.py b/modelscope/models/cv/object_detection/mmdet_ms/dense_heads/__init__.py index 0fba8c00..0d34e996 100644 --- a/modelscope/models/cv/object_detection/mmdet_ms/dense_heads/__init__.py +++ b/modelscope/models/cv/object_detection/mmdet_ms/dense_heads/__init__.py @@ -1,3 +1,5 @@ +# Implementation in this file is modified based on ViTAE-Transformer +# Originally Apache 2.0 License and publicly avaialbe at https://github.com/ViTAE-Transformer/ViTDet from .anchor_head import AnchorNHead from .rpn_head import RPNNHead diff --git a/modelscope/models/cv/object_detection/mmdet_ms/dense_heads/anchor_head.py b/modelscope/models/cv/object_detection/mmdet_ms/dense_heads/anchor_head.py index b4114652..d4ea5282 100644 --- a/modelscope/models/cv/object_detection/mmdet_ms/dense_heads/anchor_head.py +++ b/modelscope/models/cv/object_detection/mmdet_ms/dense_heads/anchor_head.py @@ -1,5 +1,6 @@ # Copyright (c) OpenMMLab. All rights reserved. -# Implementation in this file is modifed from source code avaiable via https://github.com/ViTAE-Transformer/ViTDet +# Implementation in this file is modified based on ViTAE-Transformer +# Originally Apache 2.0 License and publicly avaialbe at https://github.com/ViTAE-Transformer/ViTDet from mmdet.models.builder import HEADS from mmdet.models.dense_heads import AnchorHead diff --git a/modelscope/models/cv/object_detection/mmdet_ms/dense_heads/rpn_head.py b/modelscope/models/cv/object_detection/mmdet_ms/dense_heads/rpn_head.py index f53368ce..8e934a5c 100644 --- a/modelscope/models/cv/object_detection/mmdet_ms/dense_heads/rpn_head.py +++ b/modelscope/models/cv/object_detection/mmdet_ms/dense_heads/rpn_head.py @@ -1,5 +1,6 @@ # Copyright (c) OpenMMLab. All rights reserved. -# Implementation in this file is modifed from source code avaiable via https://github.com/ViTAE-Transformer/ViTDet +# Implementation in this file is modified based on ViTAE-Transformer +# Originally Apache 2.0 License and publicly avaialbe at https://github.com/ViTAE-Transformer/ViTDet import copy import torch diff --git a/modelscope/models/cv/object_detection/mmdet_ms/necks/__init__.py b/modelscope/models/cv/object_detection/mmdet_ms/necks/__init__.py index 5b0b6210..d164987e 100644 --- a/modelscope/models/cv/object_detection/mmdet_ms/necks/__init__.py +++ b/modelscope/models/cv/object_detection/mmdet_ms/necks/__init__.py @@ -1,3 +1,5 @@ +# Implementation in this file is modified based on ViTAE-Transformer +# Originally Apache 2.0 License and publicly avaialbe at https://github.com/ViTAE-Transformer/ViTDet from .fpn import FPNF __all__ = ['FPNF'] diff --git a/modelscope/models/cv/object_detection/mmdet_ms/necks/fpn.py b/modelscope/models/cv/object_detection/mmdet_ms/necks/fpn.py index 52529b28..5f8648ce 100644 --- a/modelscope/models/cv/object_detection/mmdet_ms/necks/fpn.py +++ b/modelscope/models/cv/object_detection/mmdet_ms/necks/fpn.py @@ -1,5 +1,6 @@ # Copyright (c) OpenMMLab. All rights reserved. -# Implementation in this file is modifed from source code avaiable via https://github.com/ViTAE-Transformer/ViTDet +# Implementation in this file is modified based on ViTAE-Transformer +# Originally Apache 2.0 License and publicly avaialbe at https://github.com/ViTAE-Transformer/ViTDet import torch.nn as nn import torch.nn.functional as F from mmcv.runner import BaseModule, auto_fp16 diff --git a/modelscope/models/cv/object_detection/mmdet_ms/roi_heads/__init__.py b/modelscope/models/cv/object_detection/mmdet_ms/roi_heads/__init__.py index a6be3775..658280df 100644 --- a/modelscope/models/cv/object_detection/mmdet_ms/roi_heads/__init__.py +++ b/modelscope/models/cv/object_detection/mmdet_ms/roi_heads/__init__.py @@ -1,3 +1,5 @@ +# Implementation in this file is modified based on ViTAE-Transformer +# Originally Apache 2.0 License and publicly avaialbe at https://github.com/ViTAE-Transformer/ViTDet from .bbox_heads import (ConvFCBBoxNHead, Shared2FCBBoxNHead, Shared4Conv1FCBBoxNHead) from .mask_heads import FCNMaskNHead diff --git a/modelscope/models/cv/object_detection/mmdet_ms/roi_heads/bbox_heads/__init__.py b/modelscope/models/cv/object_detection/mmdet_ms/roi_heads/bbox_heads/__init__.py index 0d4d5b6b..61d93503 100644 --- a/modelscope/models/cv/object_detection/mmdet_ms/roi_heads/bbox_heads/__init__.py +++ b/modelscope/models/cv/object_detection/mmdet_ms/roi_heads/bbox_heads/__init__.py @@ -1,3 +1,5 @@ +# Implementation in this file is modified based on ViTAE-Transformer +# Originally Apache 2.0 License and publicly avaialbe at https://github.com/ViTAE-Transformer/ViTDet from .convfc_bbox_head import (ConvFCBBoxNHead, Shared2FCBBoxNHead, Shared4Conv1FCBBoxNHead) diff --git a/modelscope/models/cv/object_detection/mmdet_ms/roi_heads/bbox_heads/convfc_bbox_head.py b/modelscope/models/cv/object_detection/mmdet_ms/roi_heads/bbox_heads/convfc_bbox_head.py index d2e04b80..726329a1 100644 --- a/modelscope/models/cv/object_detection/mmdet_ms/roi_heads/bbox_heads/convfc_bbox_head.py +++ b/modelscope/models/cv/object_detection/mmdet_ms/roi_heads/bbox_heads/convfc_bbox_head.py @@ -1,5 +1,6 @@ # Copyright (c) OpenMMLab. All rights reserved. -# Implementation in this file is modifed from source code avaiable via https://github.com/ViTAE-Transformer/ViTDet +# Implementation in this file is modified based on ViTAE-Transformer +# Originally Apache 2.0 License and publicly avaialbe at https://github.com/ViTAE-Transformer/ViTDet import torch.nn as nn from mmdet.models.builder import HEADS from mmdet.models.roi_heads.bbox_heads.bbox_head import BBoxHead diff --git a/modelscope/models/cv/object_detection/mmdet_ms/roi_heads/mask_heads/__init__.py b/modelscope/models/cv/object_detection/mmdet_ms/roi_heads/mask_heads/__init__.py index 8f816850..043e62a0 100644 --- a/modelscope/models/cv/object_detection/mmdet_ms/roi_heads/mask_heads/__init__.py +++ b/modelscope/models/cv/object_detection/mmdet_ms/roi_heads/mask_heads/__init__.py @@ -1,3 +1,5 @@ +# Implementation in this file is modified based on ViTAE-Transformer +# Originally Apache 2.0 License and publicly avaialbe at https://github.com/ViTAE-Transformer/ViTDet from .fcn_mask_head import FCNMaskNHead __all__ = ['FCNMaskNHead'] diff --git a/modelscope/models/cv/object_detection/mmdet_ms/roi_heads/mask_heads/fcn_mask_head.py b/modelscope/models/cv/object_detection/mmdet_ms/roi_heads/mask_heads/fcn_mask_head.py index e5aedc98..335f6b8f 100644 --- a/modelscope/models/cv/object_detection/mmdet_ms/roi_heads/mask_heads/fcn_mask_head.py +++ b/modelscope/models/cv/object_detection/mmdet_ms/roi_heads/mask_heads/fcn_mask_head.py @@ -1,5 +1,6 @@ # Copyright (c) OpenMMLab. All rights reserved. -# Implementation in this file is modifed from source code avaiable via https://github.com/ViTAE-Transformer/ViTDet +# Implementation in this file is modified based on ViTAE-Transformer +# Originally Apache 2.0 License and publicly avaialbe at https://github.com/ViTAE-Transformer/ViTDet from warnings import warn import numpy as np diff --git a/modelscope/models/cv/object_detection/mmdet_ms/utils/__init__.py b/modelscope/models/cv/object_detection/mmdet_ms/utils/__init__.py index 971a0232..34f240c6 100644 --- a/modelscope/models/cv/object_detection/mmdet_ms/utils/__init__.py +++ b/modelscope/models/cv/object_detection/mmdet_ms/utils/__init__.py @@ -1,3 +1,5 @@ +# Implementation in this file is modified based on ViTAE-Transformer +# Originally Apache 2.0 License and publicly avaialbe at https://github.com/ViTAE-Transformer/ViTDet from .checkpoint import load_checkpoint from .convModule_norm import ConvModule_Norm diff --git a/modelscope/models/cv/object_detection/mmdet_ms/utils/checkpoint.py b/modelscope/models/cv/object_detection/mmdet_ms/utils/checkpoint.py index 593af1cc..7833f592 100644 --- a/modelscope/models/cv/object_detection/mmdet_ms/utils/checkpoint.py +++ b/modelscope/models/cv/object_detection/mmdet_ms/utils/checkpoint.py @@ -1,5 +1,6 @@ # Copyright (c) Open-MMLab. All rights reserved. -# Implementation adopted from ViTAE-Transformer, source code avaiable via https://github.com/ViTAE-Transformer/ViTDet +# Implementation in this file is modified based on ViTAE-Transformer +# Originally Apache 2.0 License and publicly avaialbe at https://github.com/ViTAE-Transformer/ViTDet import io import os import os.path as osp diff --git a/modelscope/models/cv/object_detection/mmdet_ms/utils/convModule_norm.py b/modelscope/models/cv/object_detection/mmdet_ms/utils/convModule_norm.py index d81c24e1..a15780f7 100644 --- a/modelscope/models/cv/object_detection/mmdet_ms/utils/convModule_norm.py +++ b/modelscope/models/cv/object_detection/mmdet_ms/utils/convModule_norm.py @@ -1,5 +1,5 @@ -# Implementation adopted from ViTAE-Transformer, source code avaiable via https://github.com/ViTAE-Transformer/ViTDet - +# Implementation in this file is modified based on ViTAE-Transformer +# Originally Apache 2.0 License and publicly avaialbe at https://github.com/ViTAE-Transformer/ViTDet from mmcv.cnn import ConvModule diff --git a/modelscope/models/cv/salient_detection/models/__init__.py b/modelscope/models/cv/salient_detection/models/__init__.py index 0850c33d..8ea7a5d3 100644 --- a/modelscope/models/cv/salient_detection/models/__init__.py +++ b/modelscope/models/cv/salient_detection/models/__init__.py @@ -1 +1,3 @@ +# The implementation is adopted from U-2-Net, made publicly available under the Apache 2.0 License +# source code avaiable via https://github.com/xuebinqin/U-2-Net from .u2net import U2NET diff --git a/modelscope/models/cv/salient_detection/models/u2net.py b/modelscope/models/cv/salient_detection/models/u2net.py index 0a0a4511..05dbf7ad 100644 --- a/modelscope/models/cv/salient_detection/models/u2net.py +++ b/modelscope/models/cv/salient_detection/models/u2net.py @@ -1,4 +1,5 @@ -# Implementation in this file is modifed from source code avaiable via https://github.com/xuebinqin/U-2-Net +# The implementation is adopted from U-2-Net, made publicly available under the Apache 2.0 License +# source code avaiable via https://github.com/xuebinqin/U-2-Net import torch import torch.nn as nn import torch.nn.functional as F diff --git a/modelscope/models/cv/salient_detection/salient_model.py b/modelscope/models/cv/salient_detection/salient_model.py index 539d1f24..6e617f58 100644 --- a/modelscope/models/cv/salient_detection/salient_model.py +++ b/modelscope/models/cv/salient_detection/salient_model.py @@ -1,3 +1,4 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. import os.path as osp import cv2 diff --git a/modelscope/models/multi_modal/ofa_for_all_tasks.py b/modelscope/models/multi_modal/ofa_for_all_tasks.py index cb8d3826..ab9b0357 100644 --- a/modelscope/models/multi_modal/ofa_for_all_tasks.py +++ b/modelscope/models/multi_modal/ofa_for_all_tasks.py @@ -37,9 +37,7 @@ class OfaForAllTasks(TorchModel): def __init__(self, model_dir, *args, **kwargs): super().__init__(model_dir=model_dir, *args, **kwargs) - sd = torch.load(osp.join(model_dir, ModelFile.TORCH_MODEL_BIN_FILE)) - sd = sd if 'meta' not in sd else sd['state_dict'] - model = OFAModel.from_pretrained(model_dir, state_dict=sd) + model = OFAModel.from_pretrained(model_dir) self.cfg = Config.from_file( osp.join(model_dir, ModelFile.CONFIGURATION)) self.model = model.module if hasattr(model, 'module') else model diff --git a/modelscope/msdatasets/ms_dataset.py b/modelscope/msdatasets/ms_dataset.py index ca84db4f..361b8ae0 100644 --- a/modelscope/msdatasets/ms_dataset.py +++ b/modelscope/msdatasets/ms_dataset.py @@ -44,44 +44,40 @@ def format_list(para) -> List: return para -class MsIterableDataset(torch.utils.data.IterableDataset): +class MsMapDataset(torch.utils.data.Dataset): def __init__(self, dataset: Iterable, preprocessor_list, retained_columns, - columns): - super(MsIterableDataset).__init__() + columns, to_tensor): + super(MsDataset).__init__() self.dataset = dataset self.preprocessor_list = preprocessor_list + self.to_tensor = to_tensor self.retained_columns = retained_columns self.columns = columns def __len__(self): return len(self.dataset) - def __iter__(self): - worker_info = torch.utils.data.get_worker_info() - if worker_info is None: # single-process data loading - iter_start = 0 - iter_end = len(self.dataset) - else: # in a worker process - per_worker = math.ceil( - len(self.dataset) / float(worker_info.num_workers)) - worker_id = worker_info.id - iter_start = worker_id * per_worker - iter_end = min(iter_start + per_worker, len(self.dataset)) - - for idx in range(iter_start, iter_end): - item_dict = self.dataset[idx] - res = { - k: torch.tensor(item_dict[k]) - for k in self.columns if k in self.retained_columns - } - for preprocessor in self.preprocessor_list: - res.update({ - k: v # k: torch.tensor(v) - for k, v in preprocessor(item_dict).items() - if k in self.retained_columns - }) - yield res + def type_converter(self, x): + if self.to_tensor: + return torch.tensor(x) + else: + return x + + def __getitem__(self, index): + item_dict = self.dataset[index] + res = { + k: self.type_converter(item_dict[k]) + for k in self.columns + if (not self.to_tensor) or k in self.retained_columns + } + for preprocessor in self.preprocessor_list: + res.update({ + k: self.type_converter(v) + for k, v in preprocessor(item_dict).items() + if (not self.to_tensor) or k in self.retained_columns + }) + return res class MsDataset: @@ -341,6 +337,7 @@ class MsDataset: self, preprocessors: Union[Callable, List[Callable]], columns: Union[str, List[str]] = None, + to_tensor: bool = True, ): preprocessor_list = preprocessors if isinstance( preprocessors, list) else [preprocessors] @@ -350,29 +347,29 @@ class MsDataset: columns = [ key for key in self._hf_ds.features.keys() if key in columns ] - sample = next(iter(self._hf_ds)) + retained_columns = [] + if to_tensor: + sample = next(iter(self._hf_ds)) - sample_res = {k: np.array(sample[k]) for k in columns} - for processor in preprocessor_list: - sample_res.update( - {k: np.array(v) - for k, v in processor(sample).items()}) + sample_res = {k: np.array(sample[k]) for k in columns} + for processor in preprocessor_list: + sample_res.update( + {k: np.array(v) + for k, v in processor(sample).items()}) - def is_numpy_number(value): - return np.issubdtype(value.dtype, np.integer) or np.issubdtype( - value.dtype, np.floating) or np.issubdtype( - value.dtype, np.bool) + def is_numpy_number(value): + return np.issubdtype(value.dtype, np.integer) or np.issubdtype( + value.dtype, np.floating) - retained_columns = [] - for k in sample_res.keys(): - if not is_numpy_number(sample_res[k]): - logger.warning( - f'Data of column {k} is non-numeric, will be removed') - # continue - retained_columns.append(k) + for k in sample_res.keys(): + if not is_numpy_number(sample_res[k]): + logger.warning( + f'Data of column {k} is non-numeric, will be removed') + continue + retained_columns.append(k) - return MsIterableDataset(self._hf_ds, preprocessor_list, - retained_columns, columns) + return MsMapDataset(self._hf_ds, preprocessor_list, retained_columns, + columns, to_tensor) def to_torch_dataset( self, @@ -380,6 +377,7 @@ class MsDataset: preprocessors: Union[Callable, List[Callable]] = None, task_name: str = None, task_data_config: ConfigDict = None, + to_tensor: bool = True, **format_kwargs, ): """Create a torch.utils.data.Dataset from the MS Dataset. The torch.utils.data.Dataset can be passed to @@ -387,13 +385,14 @@ class MsDataset: Args: preprocessors (Callable or List[Callable], default None): (list of) Preprocessor object used to process - every sample of the dataset. The output type of processors is dict, and each numeric field of the dict + every sample of the dataset. The output type of processors is dict, and each (numeric) field of the dict will be used as a field of torch.utils.data.Dataset. - columns (str or List[str], default None): Dataset column(s) to be loaded (numeric data only). If the - preprocessor is None, the arg columns must have at least one column. If the `preprocessors` is not None, - the output fields of processors will also be added. + columns (str or List[str], default None): Dataset column(s) to be loaded (numeric data only if + `to_tensor` is True). If the preprocessor is None, the arg columns must have at least one column. + If the `preprocessors` is not None, the output fields of processors will also be added. task_name (str, default None): task name, refer to :obj:`Tasks` for more details task_data_config (ConfigDict, default None): config dict for model object. + to_tensor (bool, default None): whether convert the data types of dataset column(s) to torch.tensor or not. format_kwargs: A `dict` of arguments to be passed to the `torch.tensor`. Returns: @@ -410,7 +409,7 @@ class MsDataset: return build_task_dataset(task_data_config, task_name) if preprocessors is not None: return self.to_torch_dataset_with_processors( - preprocessors, columns=columns) + preprocessors, columns=columns, to_tensor=to_tensor) else: self._hf_ds.reset_format() self._hf_ds.set_format( diff --git a/modelscope/msdatasets/task_datasets/image_instance_segmentation_coco_dataset.py b/modelscope/msdatasets/task_datasets/image_instance_segmentation_coco_dataset.py index 10cf7bfb..1c7bc249 100644 --- a/modelscope/msdatasets/task_datasets/image_instance_segmentation_coco_dataset.py +++ b/modelscope/msdatasets/task_datasets/image_instance_segmentation_coco_dataset.py @@ -1,3 +1,5 @@ +# Part of the implementation is borrowed and modified from MMDetection, publicly available at +# https://github.com/open-mmlab/mmdetection/blob/master/mmdet/datasets/coco.py import os.path as osp import numpy as np diff --git a/modelscope/msdatasets/task_datasets/movie_scene_segmentation/__init__.py b/modelscope/msdatasets/task_datasets/movie_scene_segmentation/__init__.py index e56039ac..b1bc40f8 100644 --- a/modelscope/msdatasets/task_datasets/movie_scene_segmentation/__init__.py +++ b/modelscope/msdatasets/task_datasets/movie_scene_segmentation/__init__.py @@ -1 +1,2 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. from .movie_scene_segmentation_dataset import MovieSceneSegmentationDataset diff --git a/modelscope/msdatasets/task_datasets/movie_scene_segmentation/movie_scene_segmentation_dataset.py b/modelscope/msdatasets/task_datasets/movie_scene_segmentation/movie_scene_segmentation_dataset.py index 925d6281..68cbf918 100644 --- a/modelscope/msdatasets/task_datasets/movie_scene_segmentation/movie_scene_segmentation_dataset.py +++ b/modelscope/msdatasets/task_datasets/movie_scene_segmentation/movie_scene_segmentation_dataset.py @@ -1,6 +1,5 @@ -# --------------------------------------------------------------------------------------------------- -# The implementation is built upon BaSSL, publicly available at https://github.com/kakaobrain/bassl -# --------------------------------------------------------------------------------------------------- +# The implementation here is modified based on BaSSL, +# originally Apache 2.0 License and publicly available at https://github.com/kakaobrain/bassl import copy import os import os.path as osp diff --git a/modelscope/pipelines/audio/kws_farfield_pipeline.py b/modelscope/pipelines/audio/kws_farfield_pipeline.py index 62f58fee..e2f618fa 100644 --- a/modelscope/pipelines/audio/kws_farfield_pipeline.py +++ b/modelscope/pipelines/audio/kws_farfield_pipeline.py @@ -4,6 +4,9 @@ import io import wave from typing import Any, Dict +import numpy +import soundfile as sf + from modelscope.fileio import File from modelscope.metainfo import Pipelines from modelscope.outputs import OutputKeys @@ -37,7 +40,6 @@ class KWSFarfieldPipeline(Pipeline): self.model.eval() frame_size = self.INPUT_CHANNELS * self.SAMPLE_WIDTH self._nframe = self.model.size_in // frame_size - self.frame_count = 0 def preprocess(self, inputs: Input, **preprocess_params) -> Dict[str, Any]: if isinstance(inputs, bytes): @@ -54,35 +56,36 @@ class KWSFarfieldPipeline(Pipeline): input_file = inputs['input_file'] if isinstance(input_file, str): input_file = File.read(input_file) - if isinstance(input_file, bytes): - input_file = io.BytesIO(input_file) - self.frame_count = 0 + frames, samplerate = sf.read(io.BytesIO(input_file), dtype='int16') + if len(frames.shape) == 1: + frames = numpy.stack((frames, frames, numpy.zeros_like(frames)), 1) + kws_list = [] - with wave.open(input_file, 'rb') as fin: - if 'output_file' in inputs: - with wave.open(inputs['output_file'], 'wb') as fout: - fout.setframerate(self.SAMPLE_RATE) - fout.setnchannels(self.OUTPUT_CHANNELS) - fout.setsampwidth(self.SAMPLE_WIDTH) - self._process(fin, kws_list, fout) - else: - self._process(fin, kws_list) + if 'output_file' in inputs: + with wave.open(inputs['output_file'], 'wb') as fout: + fout.setframerate(self.SAMPLE_RATE) + fout.setnchannels(self.OUTPUT_CHANNELS) + fout.setsampwidth(self.SAMPLE_WIDTH) + self._process(frames, kws_list, fout) + else: + self._process(frames, kws_list) return {OutputKeys.KWS_LIST: kws_list} def _process(self, - fin: wave.Wave_read, + frames: numpy.ndarray, kws_list, fout: wave.Wave_write = None): - data = fin.readframes(self._nframe) - while len(data) >= self.model.size_in: - self.frame_count += self._nframe + for start_index in range(0, frames.shape[0], self._nframe): + end_index = start_index + self._nframe + if end_index > frames.shape[0]: + end_index = frames.shape[0] + data = frames[start_index:end_index, :].tobytes() result = self.model.forward_decode(data) if fout: fout.writeframes(result['pcm']) if 'kws' in result: - result['kws']['offset'] += self.frame_count / self.SAMPLE_RATE + result['kws']['offset'] += start_index / self.SAMPLE_RATE kws_list.append(result['kws']) - data = fin.readframes(self._nframe) def postprocess(self, inputs: Dict[str, Any], **kwargs) -> Dict[str, Any]: return inputs diff --git a/modelscope/pipelines/cv/action_detection_pipeline.py b/modelscope/pipelines/cv/action_detection_pipeline.py index 72335d5b..74d1862e 100644 --- a/modelscope/pipelines/cv/action_detection_pipeline.py +++ b/modelscope/pipelines/cv/action_detection_pipeline.py @@ -1,3 +1,5 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. + import math import os.path as osp from typing import Any, Dict diff --git a/modelscope/pipelines/cv/easycv_pipelines/base.py b/modelscope/pipelines/cv/easycv_pipelines/base.py index d6495f0a..8aea1146 100644 --- a/modelscope/pipelines/cv/easycv_pipelines/base.py +++ b/modelscope/pipelines/cv/easycv_pipelines/base.py @@ -10,6 +10,7 @@ from modelscope.hub.snapshot_download import snapshot_download from modelscope.pipelines.util import is_official_hub_path from modelscope.utils.config import Config from modelscope.utils.constant import DEFAULT_MODEL_REVISION, ModelFile +from modelscope.utils.device import create_device class EasyCVPipeline(object): @@ -53,16 +54,19 @@ class EasyCVPipeline(object): ), f'Not find "{ModelFile.CONFIGURATION}" in model directory!' self.cfg = Config.from_file(self.config_file) - self.predict_op = self._build_predict_op() + if 'device' in kwargs: + kwargs['device'] = create_device(kwargs['device']) + self.predict_op = self._build_predict_op(**kwargs) - def _build_predict_op(self): + def _build_predict_op(self, **kwargs): """Build EasyCV predictor.""" from easycv.predictors.builder import build_predictor easycv_config = self._to_easycv_config() pipeline_op = build_predictor(self.cfg.pipeline.predictor_config, { 'model_path': self.model_path, - 'config_file': easycv_config + 'config_file': easycv_config, + **kwargs }) return pipeline_op @@ -91,5 +95,4 @@ class EasyCVPipeline(object): return easycv_config def __call__(self, inputs) -> Any: - # TODO: support image url return self.predict_op(inputs) diff --git a/modelscope/pipelines/cv/easycv_pipelines/face_2d_keypoints_pipeline.py b/modelscope/pipelines/cv/easycv_pipelines/face_2d_keypoints_pipeline.py index eb4d6c15..7c32e0fc 100644 --- a/modelscope/pipelines/cv/easycv_pipelines/face_2d_keypoints_pipeline.py +++ b/modelscope/pipelines/cv/easycv_pipelines/face_2d_keypoints_pipeline.py @@ -4,7 +4,6 @@ from typing import Any from modelscope.metainfo import Pipelines from modelscope.outputs import OutputKeys from modelscope.pipelines.builder import PIPELINES -from modelscope.preprocessors import LoadImage from modelscope.utils.constant import ModelFile, Tasks from .base import EasyCVPipeline @@ -34,8 +33,11 @@ class Face2DKeypointsPipeline(EasyCVPipeline): return self.predict_op.show_result(img, points, scale, save_path) def __call__(self, inputs) -> Any: - output = self.predict_op(inputs)[0][0] - points = output['point'] - poses = output['pose'] + outputs = self.predict_op(inputs) - return {OutputKeys.KEYPOINTS: points, OutputKeys.POSES: poses} + results = [{ + OutputKeys.KEYPOINTS: output['point'], + OutputKeys.POSES: output['pose'] + } for output in outputs] + + return results diff --git a/modelscope/pipelines/cv/hand_2d_keypoints_pipeline.py b/modelscope/pipelines/cv/hand_2d_keypoints_pipeline.py index db66f5d2..bad0c652 100644 --- a/modelscope/pipelines/cv/hand_2d_keypoints_pipeline.py +++ b/modelscope/pipelines/cv/hand_2d_keypoints_pipeline.py @@ -28,7 +28,7 @@ class Hand2DKeypointsPipeline(EasyCVPipeline): *args, **kwargs) - def _build_predict_op(self): + def _build_predict_op(self, **kwargs): """Build EasyCV predictor.""" from easycv.predictors.builder import build_predictor detection_predictor_type = self.cfg['DETECTION']['type'] @@ -46,6 +46,7 @@ class Hand2DKeypointsPipeline(EasyCVPipeline): easycv_config = self._to_easycv_config() pipeline_op = build_predictor(self.cfg.pipeline.predictor_config, { 'model_path': self.model_path, - 'config_file': easycv_config + 'config_file': easycv_config, + **kwargs }) return pipeline_op diff --git a/modelscope/pipelines/cv/image_instance_segmentation_pipeline.py b/modelscope/pipelines/cv/image_instance_segmentation_pipeline.py index ce0bf907..5a0f0d7e 100644 --- a/modelscope/pipelines/cv/image_instance_segmentation_pipeline.py +++ b/modelscope/pipelines/cv/image_instance_segmentation_pipeline.py @@ -1,3 +1,4 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. import os from typing import Any, Dict, Optional, Union diff --git a/modelscope/pipelines/cv/image_style_transfer_pipeline.py b/modelscope/pipelines/cv/image_style_transfer_pipeline.py index 827a0d44..64e67115 100644 --- a/modelscope/pipelines/cv/image_style_transfer_pipeline.py +++ b/modelscope/pipelines/cv/image_style_transfer_pipeline.py @@ -61,7 +61,13 @@ class ImageStyleTransferPipeline(Pipeline): def _sanitize_parameters(self, **pipeline_parameters): return pipeline_parameters, {}, {} - def preprocess(self, content: Input, style: Input) -> Dict[str, Any]: + def preprocess(self, + content: Input, + style: Input = None) -> Dict[str, Any]: + if type(content) is dict: # for demo service + style = content['style'] + content = content['content'] + content = LoadImage.convert_to_ndarray(content) if len(content.shape) == 2: content = cv2.cvtColor(content, cv2.COLOR_GRAY2BGR) diff --git a/modelscope/pipelines/cv/movie_scene_segmentation_pipeline.py b/modelscope/pipelines/cv/movie_scene_segmentation_pipeline.py index 0ef0261d..b5acf17a 100644 --- a/modelscope/pipelines/cv/movie_scene_segmentation_pipeline.py +++ b/modelscope/pipelines/cv/movie_scene_segmentation_pipeline.py @@ -1,3 +1,4 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. from typing import Any, Dict import torch diff --git a/modelscope/preprocessors/movie_scene_segmentation/__init__.py b/modelscope/preprocessors/movie_scene_segmentation/__init__.py index 73da792d..b28ccabc 100644 --- a/modelscope/preprocessors/movie_scene_segmentation/__init__.py +++ b/modelscope/preprocessors/movie_scene_segmentation/__init__.py @@ -1,3 +1,4 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. from typing import TYPE_CHECKING from modelscope.utils.import_utils import LazyImportModule diff --git a/modelscope/preprocessors/movie_scene_segmentation/transforms.py b/modelscope/preprocessors/movie_scene_segmentation/transforms.py index b4e57420..5b84003c 100644 --- a/modelscope/preprocessors/movie_scene_segmentation/transforms.py +++ b/modelscope/preprocessors/movie_scene_segmentation/transforms.py @@ -1,9 +1,5 @@ -# ------------------------------------------------------------------------------------ -# The codes below partially refer to the BaSSL -# Copyright (c) 2021 KakaoBrain. All Rights Reserved. -# Licensed under the Apache License, Version 2.0 [see LICENSE for details] -# Github: https://github.com/kakaobrain/bassl -# ------------------------------------------------------------------------------------ +# The implementation here is modified based on BaSSL, +# originally Apache 2.0 License and publicly avaialbe at https://github.com/kakaobrain/bassl import numbers import os.path as osp import random diff --git a/modelscope/preprocessors/multi_modal.py b/modelscope/preprocessors/multi_modal.py index 6cacb235..6d06bbb9 100644 --- a/modelscope/preprocessors/multi_modal.py +++ b/modelscope/preprocessors/multi_modal.py @@ -186,7 +186,8 @@ class MPlugPreprocessor(Preprocessor): image = image.convert('RGB') image = self.patch_resize_transform(image) question = '' if self.cfg.task == Tasks.image_captioning \ - else data[1 if isinstance(data, tuple) else 'question'] + else data[1 if isinstance(data, tuple) + else ('text' if 'text' in data else 'question')] question = self.tokenizer( question.lower(), padding='max_length', diff --git a/modelscope/trainers/cv/image_instance_segmentation_trainer.py b/modelscope/trainers/cv/image_instance_segmentation_trainer.py index 2e2415dc..a777bde1 100644 --- a/modelscope/trainers/cv/image_instance_segmentation_trainer.py +++ b/modelscope/trainers/cv/image_instance_segmentation_trainer.py @@ -1,3 +1,4 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. from modelscope.metainfo import Trainers from modelscope.trainers.builder import TRAINERS from modelscope.trainers.trainer import EpochBasedTrainer diff --git a/modelscope/trainers/cv/movie_scene_segmentation_trainer.py b/modelscope/trainers/cv/movie_scene_segmentation_trainer.py index ee4dd849..7645f9f3 100644 --- a/modelscope/trainers/cv/movie_scene_segmentation_trainer.py +++ b/modelscope/trainers/cv/movie_scene_segmentation_trainer.py @@ -1,3 +1,4 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. from modelscope.metainfo import Trainers from modelscope.trainers.builder import TRAINERS from modelscope.trainers.trainer import EpochBasedTrainer diff --git a/modelscope/trainers/trainer.py b/modelscope/trainers/trainer.py index d188ae6f..793092c8 100644 --- a/modelscope/trainers/trainer.py +++ b/modelscope/trainers/trainer.py @@ -37,8 +37,8 @@ from modelscope.utils.device import create_device, verify_device from modelscope.utils.file_utils import func_receive_dict_inputs from modelscope.utils.logger import get_logger from modelscope.utils.registry import build_from_cfg -from modelscope.utils.torch_utils import (get_dist_info, init_dist, - set_random_seed) +from modelscope.utils.torch_utils import (get_dist_info, get_local_rank, + init_dist, set_random_seed) from .base import BaseTrainer from .builder import TRAINERS from .default_config import DEFAULT_CONFIG @@ -155,8 +155,17 @@ class EpochBasedTrainer(BaseTrainer): if self.eval_preprocessor is not None: self.eval_preprocessor.mode = ModeKeys.EVAL + if kwargs.get('launcher', None) is not None: + init_dist(kwargs['launcher']) + + _, world_size = get_dist_info() + self._dist = world_size > 1 + device_name = kwargs.get('device', 'gpu') - verify_device(device_name) + if self._dist: + local_rank = get_local_rank() + device_name = f'cuda:{local_rank}' + self.device = create_device(device_name) self.train_dataset = self.to_task_dataset( @@ -219,11 +228,6 @@ class EpochBasedTrainer(BaseTrainer): self.use_fp16 = kwargs.get('use_fp16', False) - if kwargs.get('launcher', None) is not None: - init_dist(kwargs['launcher']) - - self._dist = get_dist_info()[1] > 1 - # model placement if self.device.type == 'cuda': self.model.to(self.device) @@ -532,8 +536,14 @@ class EpochBasedTrainer(BaseTrainer): model.train() self._mode = ModeKeys.TRAIN # call model forward but not __call__ to skip postprocess - if isinstance(inputs, - Mapping) and not func_receive_dict_inputs(model.forward): + + if is_parallel(model): + receive_dict_inputs = func_receive_dict_inputs( + model.module.forward) + else: + receive_dict_inputs = func_receive_dict_inputs(model.forward) + + if isinstance(inputs, Mapping) and not receive_dict_inputs: train_outputs = model.forward(**inputs) else: train_outputs = model.forward(inputs) diff --git a/modelscope/trainers/utils/inference.py b/modelscope/trainers/utils/inference.py index c6a291d9..1f8f8ed0 100644 --- a/modelscope/trainers/utils/inference.py +++ b/modelscope/trainers/utils/inference.py @@ -9,9 +9,9 @@ from collections.abc import Mapping import torch from torch import distributed as dist -from torch.nn.parallel import DistributedDataParallel from tqdm import tqdm +from modelscope.trainers.parallel.utils import is_parallel from modelscope.utils.data_utils import to_device from modelscope.utils.file_utils import func_receive_dict_inputs from modelscope.utils.torch_utils import (broadcast, get_dist_info, is_master, @@ -138,7 +138,10 @@ def multi_gpu_test(model, data_len = data_loader_iters_per_gpu * world_size desc = 'Total test iterations with multi gpus' - time.sleep(2) # This line can prevent deadlock problem in some cases. + if is_parallel(model): + receive_dict_inputs = func_receive_dict_inputs(model.module.forward) + else: + receive_dict_inputs = func_receive_dict_inputs(model.forward) count = 0 with tqdm(total=data_len, desc=desc) as pbar: @@ -146,10 +149,7 @@ def multi_gpu_test(model, data = to_device(data, device) data_list.append(data) with torch.no_grad(): - forward_func = model.module.forward if \ - isinstance(model, DistributedDataParallel) else model.forward - if isinstance(data, Mapping - ) and not func_receive_dict_inputs(forward_func): + if isinstance(data, Mapping) and not receive_dict_inputs: result = model.forward(**data) else: result = model.forward(data) diff --git a/modelscope/utils/demo_utils.py b/modelscope/utils/demo_utils.py index 41ac0bca..363ae950 100644 --- a/modelscope/utils/demo_utils.py +++ b/modelscope/utils/demo_utils.py @@ -123,7 +123,7 @@ INPUT_EXAMPLES = { 'urlPaths': { 'outUrls': [{ 'outputKey': OutputKeys.OUTPUT_PCM, - 'fileType': 'wav' + 'fileType': 'pcm' }] } }, @@ -134,7 +134,7 @@ INPUT_EXAMPLES = { 'urlPaths': { 'outUrls': [{ 'outputKey': OutputKeys.OUTPUT_PCM, - 'fileType': 'wav' + 'fileType': 'pcm' }] } }, @@ -147,7 +147,13 @@ INPUT_EXAMPLES = { 'http://xingchen-data.oss-cn-zhangjiakou.aliyuncs.com/maas/visual-grounding/visual_grounding.png', 'a blue turtle-like pokemon with round head' ], - 'urlPaths': {} + 'urlPaths': { + 'inUrls': [{ + 'name': 'image' + }, { + 'name': 'text' + }] + } }, TasksIODescriptions.visual_question_answering: { 'task': @@ -156,7 +162,16 @@ INPUT_EXAMPLES = { 'http://225252-file.oss-cn-hangzhou-zmf.aliyuncs.com/maas_demo/visual_question_answering.png', 'what is grown on the plant?' ], - 'urlPaths': {} + 'urlPaths': { + 'inUrls': [{ + 'name': 'image' + }, { + 'name': 'text' + }], + 'outUrls': [{ + 'outputKey': 'text' + }] + } }, TasksIODescriptions.visual_entailment: { 'task': @@ -165,7 +180,14 @@ INPUT_EXAMPLES = { 'http://xingchen-data.oss-cn-zhangjiakou.aliyuncs.com/maas/visual-entailment/visual_entailment.jpg', 'there are two birds.', 'test' ], - 'urlPaths': {} + 'urlPaths': { + 'inUrls': [{ + 'name': 'image' + }, { + 'name': 'text' + }], + 'outUrls': [{}] + } }, TasksIODescriptions.generative_multi_modal_embedding: { 'task': @@ -174,7 +196,14 @@ INPUT_EXAMPLES = { 'http://clip-multimodal.oss-cn-beijing.aliyuncs.com/lingchen/demo/dogs.jpg', 'dogs playing in the grass' ], - 'urlPaths': {} + 'urlPaths': { + 'inUrls': [{ + 'name': 'image' + }, { + 'name': 'text' + }], + 'outUrls': [{}] + } }, } @@ -192,7 +221,13 @@ class DemoCompatibilityCheck(object): print('testing demo: ', self.task, self.model_id) test_pipline = pipeline(self.task, self.model_id) req = INPUT_EXAMPLES[TASKS_INPUT_TEMPLATES[self.task]] - output = test_pipline(preprocess(req)) + inputs = preprocess(req) + params = req.get('parameters', {}) + # modelscope inference + if params != {}: + output = test_pipline(inputs, **params) + else: + output = test_pipline(inputs) json.dumps(output, cls=NumpyEncoder) result = postprocess(req, output) print(result) @@ -215,11 +250,21 @@ class NumpyEncoder(json.JSONEncoder): def preprocess(req): + in_urls = req.get('urlPaths').get('inUrls') if len(req['inputs']) == 1: inputs = req['inputs'][0] else: inputs = tuple(req['inputs']) - return inputs + if in_urls is None or len(in_urls) == 0: + return inputs + + inputs_dict = {} + for i, in_url in enumerate(in_urls): + input_name = in_url.get('name') + if input_name is None or input_name == '': + return inputs + inputs_dict[input_name] = req['inputs'][i] + return inputs_dict def postprocess(req, resp): @@ -242,4 +287,3 @@ def postprocess(req, resp): out_mem_file = io.BytesIO() out_mem_file.write(new_resp.get(output_key)) return type(out_mem_file) - # TODO(lingcai.wl): support more file type diff --git a/modelscope/utils/torch_utils.py b/modelscope/utils/torch_utils.py index 6d4132f6..74d9bb7b 100644 --- a/modelscope/utils/torch_utils.py +++ b/modelscope/utils/torch_utils.py @@ -115,6 +115,10 @@ def get_dist_info() -> Tuple[int, int]: return rank, world_size +def get_local_rank(): + return int(os.environ.get('LOCAL_RANK', 0)) + + def is_master(): rank, _ = get_dist_info() return rank == 0 diff --git a/modelscope/version.py b/modelscope/version.py index 908c0bb7..9a8e054a 100644 --- a/modelscope/version.py +++ b/modelscope/version.py @@ -1 +1 @@ -__version__ = '0.4.3' +__version__ = '0.4.4' diff --git a/requirements/cv.txt b/requirements/cv.txt index ebb61851..8c06242a 100644 --- a/requirements/cv.txt +++ b/requirements/cv.txt @@ -14,7 +14,7 @@ mmcls>=0.21.0 mmdet>=2.25.0 networkx>=2.5 onnxruntime>=1.10 -pai-easycv>=0.6.0 +pai-easycv>=0.6.3.4 pandas psutil regex diff --git a/tests/pipelines/easycv_pipelines/test_segmentation_pipeline.py b/tests/pipelines/easycv_pipelines/test_segmentation_pipeline.py index 6cfdacc6..80ab36a6 100644 --- a/tests/pipelines/easycv_pipelines/test_segmentation_pipeline.py +++ b/tests/pipelines/easycv_pipelines/test_segmentation_pipeline.py @@ -1,10 +1,11 @@ # Copyright (c) Alibaba, Inc. and its affiliates. import unittest +from distutils.version import LooseVersion +import easycv import numpy as np from PIL import Image -from modelscope.metainfo import Pipelines from modelscope.pipelines import pipeline from modelscope.utils.constant import Tasks from modelscope.utils.test_utils import test_level @@ -14,7 +15,7 @@ class EasyCVSegmentationPipelineTest(unittest.TestCase): img_path = 'data/test/images/image_segmentation.jpg' - def _internal_test__(self, model_id): + def _internal_test_(self, model_id): img = np.asarray(Image.open(self.img_path)) semantic_seg = pipeline(task=Tasks.image_segmentation, model=model_id) @@ -24,41 +25,61 @@ class EasyCVSegmentationPipelineTest(unittest.TestCase): results = outputs[0] self.assertListEqual( - list(img.shape)[:2], list(results['seg_pred'][0].shape)) - self.assertListEqual(results['seg_pred'][0][1, 4:10].tolist(), - [161 for i in range(6)]) - self.assertListEqual(results['seg_pred'][0][-1, -10:].tolist(), - [133 for i in range(10)]) + list(img.shape)[:2], list(results['seg_pred'].shape)) - @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') + def _internal_test_batch_(self, model_id, num_samples=2, batch_size=2): + # TODO: support in the future + img = np.asarray(Image.open(self.img_path)) + num_samples = num_samples + batch_size = batch_size + semantic_seg = pipeline( + task=Tasks.image_segmentation, + model=model_id, + batch_size=batch_size) + outputs = semantic_seg([self.img_path] * num_samples) + + self.assertEqual(semantic_seg.predict_op.batch_size, batch_size) + self.assertEqual(len(outputs), num_samples) + + for output in outputs: + self.assertListEqual( + list(img.shape)[:2], list(output['seg_pred'].shape)) + + @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') def test_segformer_b0(self): model_id = 'damo/cv_segformer-b0_image_semantic-segmentation_coco-stuff164k' - self._internal_test__(model_id) + self._internal_test_(model_id) + self._internal_test_batch_(model_id) - @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') + @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') def test_segformer_b1(self): model_id = 'damo/cv_segformer-b1_image_semantic-segmentation_coco-stuff164k' - self._internal_test__(model_id) + self._internal_test_(model_id) + self._internal_test_batch_(model_id) - @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') + @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') def test_segformer_b2(self): model_id = 'damo/cv_segformer-b2_image_semantic-segmentation_coco-stuff164k' - self._internal_test__(model_id) + self._internal_test_(model_id) + self._internal_test_batch_(model_id) - @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') + @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') def test_segformer_b3(self): model_id = 'damo/cv_segformer-b3_image_semantic-segmentation_coco-stuff164k' - self._internal_test__(model_id) + self._internal_test_(model_id) + self._internal_test_batch_(model_id) - @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') + @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') def test_segformer_b4(self): model_id = 'damo/cv_segformer-b4_image_semantic-segmentation_coco-stuff164k' - self._internal_test__(model_id) + self._internal_test_(model_id) + self._internal_test_batch_(model_id) - @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') + @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') def test_segformer_b5(self): model_id = 'damo/cv_segformer-b5_image_semantic-segmentation_coco-stuff164k' - self._internal_test__(model_id) + self._internal_test_(model_id) + self._internal_test_batch_(model_id) if __name__ == '__main__': diff --git a/tests/pipelines/test_face_2d_keypoints.py b/tests/pipelines/test_face_2d_keypoints.py index a5e347e8..667ecddc 100644 --- a/tests/pipelines/test_face_2d_keypoints.py +++ b/tests/pipelines/test_face_2d_keypoints.py @@ -18,7 +18,7 @@ class EasyCVFace2DKeypointsPipelineTest(unittest.TestCase): face_2d_keypoints_align = pipeline( task=Tasks.face_2d_keypoints, model=model_id) - output = face_2d_keypoints_align(img_path) + output = face_2d_keypoints_align(img_path)[0] output_keypoints = output[OutputKeys.KEYPOINTS] output_pose = output[OutputKeys.POSES] diff --git a/tests/pipelines/test_key_word_spotting_farfield.py b/tests/pipelines/test_key_word_spotting_farfield.py index fea7afd7..f8c167de 100644 --- a/tests/pipelines/test_key_word_spotting_farfield.py +++ b/tests/pipelines/test_key_word_spotting_farfield.py @@ -8,6 +8,7 @@ from modelscope.utils.constant import Tasks from modelscope.utils.test_utils import test_level TEST_SPEECH_FILE = 'data/test/audios/3ch_nihaomiya.wav' +TEST_SPEECH_FILE_MONO = 'data/test/audios/1ch_nihaomiya.wav' TEST_SPEECH_URL = 'https://modelscope.cn/api/v1/models/damo/' \ 'speech_dfsmn_kws_char_farfield_16k_nihaomiya/repo' \ '?Revision=master&FilePath=examples/3ch_nihaomiya.wav' @@ -26,6 +27,16 @@ class KWSFarfieldTest(unittest.TestCase): self.assertEqual(len(result['kws_list']), 5) print(result['kws_list'][-1]) + @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') + def test_mono(self): + kws = pipeline(Tasks.keyword_spotting, model=self.model_id) + inputs = { + 'input_file': os.path.join(os.getcwd(), TEST_SPEECH_FILE_MONO) + } + result = kws(inputs) + self.assertEqual(len(result['kws_list']), 5) + print(result['kws_list'][-1]) + @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') def test_url(self): kws = pipeline(Tasks.keyword_spotting, model=self.model_id) diff --git a/tests/pipelines/test_mplug_tasks.py b/tests/pipelines/test_mplug_tasks.py index 273d3105..a3ace62d 100644 --- a/tests/pipelines/test_mplug_tasks.py +++ b/tests/pipelines/test_mplug_tasks.py @@ -44,8 +44,8 @@ class MplugTasksTest(unittest.TestCase, DemoCompatibilityCheck): 'damo/mplug_visual-question-answering_coco_large_en') pipeline_vqa = pipeline(Tasks.visual_question_answering, model=model) image = Image.open('data/test/images/image_mplug_vqa.jpg') - question = 'What is the woman doing?' - input = {'image': image, 'question': question} + text = 'What is the woman doing?' + input = {'image': image, 'text': text} result = pipeline_vqa(input) print(result) @@ -54,8 +54,8 @@ class MplugTasksTest(unittest.TestCase, DemoCompatibilityCheck): model = 'damo/mplug_visual-question-answering_coco_large_en' pipeline_vqa = pipeline(Tasks.visual_question_answering, model=model) image = Image.open('data/test/images/image_mplug_vqa.jpg') - question = 'What is the woman doing?' - input = {'image': image, 'question': question} + text = 'What is the woman doing?' + input = {'image': image, 'text': text} result = pipeline_vqa(input) print(result) @@ -65,8 +65,8 @@ class MplugTasksTest(unittest.TestCase, DemoCompatibilityCheck): 'damo/mplug_image-text-retrieval_flickr30k_large_en') pipeline_retrieval = pipeline(Tasks.image_text_retrieval, model=model) image = Image.open('data/test/images/image-text-retrieval.jpg') - question = 'Two young guys with shaggy hair look at their hands while hanging out in the yard.' - input = {'image': image, 'question': question} + text = 'Two young guys with shaggy hair look at their hands while hanging out in the yard.' + input = {'image': image, 'text': text} result = pipeline_retrieval(input) print(result) @@ -75,8 +75,8 @@ class MplugTasksTest(unittest.TestCase, DemoCompatibilityCheck): model = 'damo/mplug_image-text-retrieval_flickr30k_large_en' pipeline_retrieval = pipeline(Tasks.image_text_retrieval, model=model) image = Image.open('data/test/images/image-text-retrieval.jpg') - question = 'Two young guys with shaggy hair look at their hands while hanging out in the yard.' - input = {'image': image, 'question': question} + text = 'Two young guys with shaggy hair look at their hands while hanging out in the yard.' + input = {'image': image, 'text': text} result = pipeline_retrieval(input) print(result) diff --git a/tests/pipelines/test_ofa_tasks.py b/tests/pipelines/test_ofa_tasks.py index 9a72d1ff..e6638dfa 100644 --- a/tests/pipelines/test_ofa_tasks.py +++ b/tests/pipelines/test_ofa_tasks.py @@ -147,8 +147,10 @@ class OfaTasksTest(unittest.TestCase, DemoCompatibilityCheck): result = ofa_pipe(input) print(result) image_name = image.split('/')[-2] - self.save_img(image, result[OutputKeys.BOXES], - osp.join('large_en_model_' + image_name + '.png')) + self.save_img( + image, + result[OutputKeys.BOXES][0], # just one box + osp.join('large_en_model_' + image_name + '.png')) @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') def test_run_with_visual_grounding_with_name(self): @@ -161,7 +163,7 @@ class OfaTasksTest(unittest.TestCase, DemoCompatibilityCheck): result = ofa_pipe(input) print(result) image_name = image.split('/')[-2] - self.save_img(image, result[OutputKeys.BOXES], + self.save_img(image, result[OutputKeys.BOXES][0], osp.join('large_en_name_' + image_name + '.png')) @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') @@ -174,7 +176,7 @@ class OfaTasksTest(unittest.TestCase, DemoCompatibilityCheck): result = ofa_pipe(input) print(result) image_name = image.split('/')[-1] - self.save_img(image, result[OutputKeys.BOXES], + self.save_img(image, result[OutputKeys.BOXES][0], osp.join('large_zh_name_' + image_name)) @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') diff --git a/tests/run_config.yaml b/tests/run_config.yaml index fc983023..4c571b7f 100644 --- a/tests/run_config.yaml +++ b/tests/run_config.yaml @@ -9,6 +9,7 @@ isolated: # test cases that may require excessive anmount of GPU memory, which - test_image_super_resolution.py - test_easycv_trainer.py - test_segformer.py + - test_segmentation_pipeline.py envs: default: # default env, case not in other env will in default, pytorch. diff --git a/tests/trainers/test_trainer_gpu.py b/tests/trainers/test_trainer_gpu.py index 1f622287..0176704a 100644 --- a/tests/trainers/test_trainer_gpu.py +++ b/tests/trainers/test_trainer_gpu.py @@ -53,7 +53,18 @@ class DummyModel(nn.Module, Model): return dict(logits=x, loss=loss) -def train_func(work_dir, dist=False, iterable_dataset=False, **kwargs): +class DummyModelForwardInputs(DummyModel): + + def forward(self, inputs): + feat, labels = inputs['feat'], inputs['labels'] + return super().forward(feat, labels) + + +def train_func(work_dir, + dist=False, + iterable_dataset=False, + forward_inputs=False, + **kwargs): json_cfg = { 'task': Tasks.image_classification, 'train': { @@ -81,7 +92,10 @@ def train_func(work_dir, dist=False, iterable_dataset=False, **kwargs): with open(config_path, 'w') as f: json.dump(json_cfg, f) - model = DummyModel() + if forward_inputs: + model = DummyModelForwardInputs() + else: + model = DummyModel() optimmizer = SGD(model.parameters(), lr=0.01) lr_scheduler = StepLR(optimmizer, 2) trainer_name = Trainers.default @@ -273,6 +287,22 @@ class TrainerTestMultiGpus(DistributedTestCase): for i in [1, 3, 5]: self.assertIn(MetricKeys.ACCURACY, lines[i]) + @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') + def test_multi_gpus_forward_inputs(self): + self.start( + train_func, + num_gpus=2, + work_dir=self.tmp_dir, + dist=True, + forward_inputs=True) + + results_files = os.listdir(self.tmp_dir) + json_files = glob.glob(os.path.join(self.tmp_dir, '*.log.json')) + self.assertEqual(len(json_files), 1) + self.assertIn(f'{LogKeys.EPOCH}_1.pth', results_files) + self.assertIn(f'{LogKeys.EPOCH}_2.pth', results_files) + self.assertIn(f'{LogKeys.EPOCH}_3.pth', results_files) + # TODO: support iters_per_epoch for dist mode @unittest.skipIf(True, 'need to adapt to DistributedSampler') def test_multi_gpus_with_iters_per_epoch(self):