# Conflicts: # modelscope/models/multi_modal/ofa_for_all_tasks.py # modelscope/msdatasets/ms_dataset.py # modelscope/trainers/utils/inference.pymaster
| @@ -0,0 +1,3 @@ | |||||
| version https://git-lfs.github.com/spec/v1 | |||||
| oid sha256:4f7f5a0a4efca1e83463cb44460c66b56fb7cd673eb6da37924637bc05ef758d | |||||
| size 1440044 | |||||
| @@ -1,3 +1,5 @@ | |||||
| # Part of the implementation is borrowed and modified from MMDetection, publicly available at | |||||
| # https://github.com/open-mmlab/mmdetection/blob/master/mmdet/datasets/coco.py | |||||
| import os.path as osp | import os.path as osp | ||||
| import tempfile | import tempfile | ||||
| from collections import OrderedDict | from collections import OrderedDict | ||||
| @@ -1,3 +1,5 @@ | |||||
| # The implementation here is modified based on BaSSL, | |||||
| # originally Apache 2.0 License and publicly available at https://github.com/kakaobrain/bassl | |||||
| from typing import Dict | from typing import Dict | ||||
| import numpy as np | import numpy as np | ||||
| @@ -1,5 +1,5 @@ | |||||
| # Modified from: https://github.com/microsoft/Swin-Transformer/blob/main/models/swin_transformer.py | |||||
| # The implementation is adopted from Swin Transformer, made publicly available under the MIT License at | |||||
| # https://github.com/microsoft/Swin-Transformer/blob/main/models/swin_transformer.py | |||||
| import numpy as np | import numpy as np | ||||
| import torch | import torch | ||||
| import torch.nn as nn | import torch.nn as nn | ||||
| @@ -1,3 +1,5 @@ | |||||
| # Part of the implementation is borrowed and modified from MMDetection, publicly available at | |||||
| # https://github.com/open-mmlab/mmdetection/blob/master/mmdet/models/detectors/two_stage.py | |||||
| import os | import os | ||||
| from collections import OrderedDict | from collections import OrderedDict | ||||
| @@ -1 +1,2 @@ | |||||
| # Copyright (c) Alibaba, Inc. and its affiliates. | |||||
| from .transforms import build_preprocess_transform | from .transforms import build_preprocess_transform | ||||
| @@ -1,3 +1,4 @@ | |||||
| # Copyright (c) Alibaba, Inc. and its affiliates. | |||||
| import os.path as osp | import os.path as osp | ||||
| import numpy as np | import numpy as np | ||||
| @@ -51,9 +52,9 @@ class LoadImageFromFile: | |||||
| """Load an image from file. | """Load an image from file. | ||||
| Required keys are "img_prefix" and "img_info" (a dict that must contain the | Required keys are "img_prefix" and "img_info" (a dict that must contain the | ||||
| key "filename"). Added or updated keys are "filename", "img", "img_shape", | |||||
| "ori_shape" (same as `img_shape`), "pad_shape" (same as `img_shape`), | |||||
| "scale_factor" (1.0) and "img_norm_cfg" (means=0 and stds=1). | |||||
| key "filename", "ann_file", and "classes"). Added or updated keys are | |||||
| "filename", "ori_filename", "img", "img_shape", "ori_shape" (same as `img_shape`), | |||||
| "img_fields", "ann_file" (path to annotation file) and "classes". | |||||
| Args: | Args: | ||||
| to_float32 (bool): Whether to convert the loaded image to a float32 | to_float32 (bool): Whether to convert the loaded image to a float32 | ||||
| @@ -73,7 +74,7 @@ class LoadImageFromFile: | |||||
| """Call functions to load image and get image meta information. | """Call functions to load image and get image meta information. | ||||
| Args: | Args: | ||||
| results (dict): Result dict from :obj:`ImageInstanceSegmentationDataset`. | |||||
| results (dict): Result dict from :obj:`ImageInstanceSegmentationCocoDataset`. | |||||
| Returns: | Returns: | ||||
| dict: The dict contains loaded image and meta information. | dict: The dict contains loaded image and meta information. | ||||
| @@ -1,3 +1,4 @@ | |||||
| # Copyright (c) Alibaba, Inc. and its affiliates. | |||||
| import os | import os | ||||
| from typing import Any, Dict | from typing import Any, Dict | ||||
| @@ -1,3 +1,5 @@ | |||||
| # Part of the implementation is borrowed and modified from MMDetection, publicly available at | |||||
| # https://github.com/open-mmlab/mmdetection/blob/master/mmdet/core/visualization/image.py | |||||
| import itertools | import itertools | ||||
| import cv2 | import cv2 | ||||
| @@ -1,3 +1,6 @@ | |||||
| # The implementation here is modified based on BaSSL, | |||||
| # originally Apache 2.0 License and publicly avaialbe at https://github.com/kakaobrain/bassl | |||||
| import os | import os | ||||
| import os.path as osp | import os.path as osp | ||||
| from typing import Any, Dict | from typing import Any, Dict | ||||
| @@ -1,3 +1,4 @@ | |||||
| # Copyright (c) Alibaba, Inc. and its affiliates. | |||||
| from .save_op import get_pred_boundary, pred2scene, scene2video | from .save_op import get_pred_boundary, pred2scene, scene2video | ||||
| from .shot_encoder import resnet50 | from .shot_encoder import resnet50 | ||||
| from .trn import TransformerCRN | from .trn import TransformerCRN | ||||
| @@ -1,9 +1,5 @@ | |||||
| # ------------------------------------------------------------------------------------ | |||||
| # BaSSL | |||||
| # Copyright (c) 2021 KakaoBrain. All Rights Reserved. | |||||
| # Licensed under the Apache License, Version 2.0 [see LICENSE for details] | |||||
| # Github: https://github.com/kakaobrain/bassl | |||||
| # ------------------------------------------------------------------------------------ | |||||
| # The implementation here is modified based on BaSSL, | |||||
| # originally Apache 2.0 License and publicly avaialbe at https://github.com/kakaobrain/bassl | |||||
| import torch.nn as nn | import torch.nn as nn | ||||
| import torch.nn.functional as F | import torch.nn.functional as F | ||||
| @@ -1,7 +1,5 @@ | |||||
| # ---------------------------------------------------------------------------------- | |||||
| # The codes below partially refer to the SceneSeg LGSS. | |||||
| # Github: https://github.com/AnyiRao/SceneSeg | |||||
| # ---------------------------------------------------------------------------------- | |||||
| # The implementation here is modified based on SceneSeg, | |||||
| # originally Apache 2.0 License and publicly avaialbe at https://github.com/AnyiRao/SceneSeg | |||||
| import os | import os | ||||
| import os.path as osp | import os.path as osp | ||||
| import subprocess | import subprocess | ||||
| @@ -1,6 +1,4 @@ | |||||
| """ | |||||
| Modified from original implementation in torchvision | |||||
| """ | |||||
| # The implementation is adopted from torchvision | |||||
| from typing import Any, Callable, List, Optional, Type, Union | from typing import Any, Callable, List, Optional, Type, Union | ||||
| @@ -1,3 +1,4 @@ | |||||
| # Copyright (c) Alibaba, Inc. and its affiliates. | |||||
| import os.path as osp | import os.path as osp | ||||
| import numpy as np | import numpy as np | ||||
| @@ -1,3 +1,5 @@ | |||||
| # Implementation in this file is modified based on ViTAE-Transformer | |||||
| # Originally Apache 2.0 License and publicly avaialbe at https://github.com/ViTAE-Transformer/ViTDet | |||||
| from .backbones import ViT | from .backbones import ViT | ||||
| from .dense_heads import AnchorNHead, RPNNHead | from .dense_heads import AnchorNHead, RPNNHead | ||||
| from .necks import FPNF | from .necks import FPNF | ||||
| @@ -1,3 +1,5 @@ | |||||
| # Implementation in this file is modified based on ViTAE-Transformer | |||||
| # Originally Apache 2.0 License and publicly avaialbe at https://github.com/ViTAE-Transformer/ViTDet | |||||
| from .vit import ViT | from .vit import ViT | ||||
| __all__ = ['ViT'] | __all__ = ['ViT'] | ||||
| @@ -1,3 +1,5 @@ | |||||
| # Implementation in this file is modified based on ViTAE-Transformer | |||||
| # Originally Apache 2.0 License and publicly avaialbe at https://github.com/ViTAE-Transformer/ViTDet | |||||
| from .anchor_head import AnchorNHead | from .anchor_head import AnchorNHead | ||||
| from .rpn_head import RPNNHead | from .rpn_head import RPNNHead | ||||
| @@ -1,5 +1,6 @@ | |||||
| # Copyright (c) OpenMMLab. All rights reserved. | # Copyright (c) OpenMMLab. All rights reserved. | ||||
| # Implementation in this file is modifed from source code avaiable via https://github.com/ViTAE-Transformer/ViTDet | |||||
| # Implementation in this file is modified based on ViTAE-Transformer | |||||
| # Originally Apache 2.0 License and publicly avaialbe at https://github.com/ViTAE-Transformer/ViTDet | |||||
| from mmdet.models.builder import HEADS | from mmdet.models.builder import HEADS | ||||
| from mmdet.models.dense_heads import AnchorHead | from mmdet.models.dense_heads import AnchorHead | ||||
| @@ -1,5 +1,6 @@ | |||||
| # Copyright (c) OpenMMLab. All rights reserved. | # Copyright (c) OpenMMLab. All rights reserved. | ||||
| # Implementation in this file is modifed from source code avaiable via https://github.com/ViTAE-Transformer/ViTDet | |||||
| # Implementation in this file is modified based on ViTAE-Transformer | |||||
| # Originally Apache 2.0 License and publicly avaialbe at https://github.com/ViTAE-Transformer/ViTDet | |||||
| import copy | import copy | ||||
| import torch | import torch | ||||
| @@ -1,3 +1,5 @@ | |||||
| # Implementation in this file is modified based on ViTAE-Transformer | |||||
| # Originally Apache 2.0 License and publicly avaialbe at https://github.com/ViTAE-Transformer/ViTDet | |||||
| from .fpn import FPNF | from .fpn import FPNF | ||||
| __all__ = ['FPNF'] | __all__ = ['FPNF'] | ||||
| @@ -1,5 +1,6 @@ | |||||
| # Copyright (c) OpenMMLab. All rights reserved. | # Copyright (c) OpenMMLab. All rights reserved. | ||||
| # Implementation in this file is modifed from source code avaiable via https://github.com/ViTAE-Transformer/ViTDet | |||||
| # Implementation in this file is modified based on ViTAE-Transformer | |||||
| # Originally Apache 2.0 License and publicly avaialbe at https://github.com/ViTAE-Transformer/ViTDet | |||||
| import torch.nn as nn | import torch.nn as nn | ||||
| import torch.nn.functional as F | import torch.nn.functional as F | ||||
| from mmcv.runner import BaseModule, auto_fp16 | from mmcv.runner import BaseModule, auto_fp16 | ||||
| @@ -1,3 +1,5 @@ | |||||
| # Implementation in this file is modified based on ViTAE-Transformer | |||||
| # Originally Apache 2.0 License and publicly avaialbe at https://github.com/ViTAE-Transformer/ViTDet | |||||
| from .bbox_heads import (ConvFCBBoxNHead, Shared2FCBBoxNHead, | from .bbox_heads import (ConvFCBBoxNHead, Shared2FCBBoxNHead, | ||||
| Shared4Conv1FCBBoxNHead) | Shared4Conv1FCBBoxNHead) | ||||
| from .mask_heads import FCNMaskNHead | from .mask_heads import FCNMaskNHead | ||||
| @@ -1,3 +1,5 @@ | |||||
| # Implementation in this file is modified based on ViTAE-Transformer | |||||
| # Originally Apache 2.0 License and publicly avaialbe at https://github.com/ViTAE-Transformer/ViTDet | |||||
| from .convfc_bbox_head import (ConvFCBBoxNHead, Shared2FCBBoxNHead, | from .convfc_bbox_head import (ConvFCBBoxNHead, Shared2FCBBoxNHead, | ||||
| Shared4Conv1FCBBoxNHead) | Shared4Conv1FCBBoxNHead) | ||||
| @@ -1,5 +1,6 @@ | |||||
| # Copyright (c) OpenMMLab. All rights reserved. | # Copyright (c) OpenMMLab. All rights reserved. | ||||
| # Implementation in this file is modifed from source code avaiable via https://github.com/ViTAE-Transformer/ViTDet | |||||
| # Implementation in this file is modified based on ViTAE-Transformer | |||||
| # Originally Apache 2.0 License and publicly avaialbe at https://github.com/ViTAE-Transformer/ViTDet | |||||
| import torch.nn as nn | import torch.nn as nn | ||||
| from mmdet.models.builder import HEADS | from mmdet.models.builder import HEADS | ||||
| from mmdet.models.roi_heads.bbox_heads.bbox_head import BBoxHead | from mmdet.models.roi_heads.bbox_heads.bbox_head import BBoxHead | ||||
| @@ -1,3 +1,5 @@ | |||||
| # Implementation in this file is modified based on ViTAE-Transformer | |||||
| # Originally Apache 2.0 License and publicly avaialbe at https://github.com/ViTAE-Transformer/ViTDet | |||||
| from .fcn_mask_head import FCNMaskNHead | from .fcn_mask_head import FCNMaskNHead | ||||
| __all__ = ['FCNMaskNHead'] | __all__ = ['FCNMaskNHead'] | ||||
| @@ -1,5 +1,6 @@ | |||||
| # Copyright (c) OpenMMLab. All rights reserved. | # Copyright (c) OpenMMLab. All rights reserved. | ||||
| # Implementation in this file is modifed from source code avaiable via https://github.com/ViTAE-Transformer/ViTDet | |||||
| # Implementation in this file is modified based on ViTAE-Transformer | |||||
| # Originally Apache 2.0 License and publicly avaialbe at https://github.com/ViTAE-Transformer/ViTDet | |||||
| from warnings import warn | from warnings import warn | ||||
| import numpy as np | import numpy as np | ||||
| @@ -1,3 +1,5 @@ | |||||
| # Implementation in this file is modified based on ViTAE-Transformer | |||||
| # Originally Apache 2.0 License and publicly avaialbe at https://github.com/ViTAE-Transformer/ViTDet | |||||
| from .checkpoint import load_checkpoint | from .checkpoint import load_checkpoint | ||||
| from .convModule_norm import ConvModule_Norm | from .convModule_norm import ConvModule_Norm | ||||
| @@ -1,5 +1,6 @@ | |||||
| # Copyright (c) Open-MMLab. All rights reserved. | # Copyright (c) Open-MMLab. All rights reserved. | ||||
| # Implementation adopted from ViTAE-Transformer, source code avaiable via https://github.com/ViTAE-Transformer/ViTDet | |||||
| # Implementation in this file is modified based on ViTAE-Transformer | |||||
| # Originally Apache 2.0 License and publicly avaialbe at https://github.com/ViTAE-Transformer/ViTDet | |||||
| import io | import io | ||||
| import os | import os | ||||
| import os.path as osp | import os.path as osp | ||||
| @@ -1,5 +1,5 @@ | |||||
| # Implementation adopted from ViTAE-Transformer, source code avaiable via https://github.com/ViTAE-Transformer/ViTDet | |||||
| # Implementation in this file is modified based on ViTAE-Transformer | |||||
| # Originally Apache 2.0 License and publicly avaialbe at https://github.com/ViTAE-Transformer/ViTDet | |||||
| from mmcv.cnn import ConvModule | from mmcv.cnn import ConvModule | ||||
| @@ -1 +1,3 @@ | |||||
| # The implementation is adopted from U-2-Net, made publicly available under the Apache 2.0 License | |||||
| # source code avaiable via https://github.com/xuebinqin/U-2-Net | |||||
| from .u2net import U2NET | from .u2net import U2NET | ||||
| @@ -1,4 +1,5 @@ | |||||
| # Implementation in this file is modifed from source code avaiable via https://github.com/xuebinqin/U-2-Net | |||||
| # The implementation is adopted from U-2-Net, made publicly available under the Apache 2.0 License | |||||
| # source code avaiable via https://github.com/xuebinqin/U-2-Net | |||||
| import torch | import torch | ||||
| import torch.nn as nn | import torch.nn as nn | ||||
| import torch.nn.functional as F | import torch.nn.functional as F | ||||
| @@ -1,3 +1,4 @@ | |||||
| # Copyright (c) Alibaba, Inc. and its affiliates. | |||||
| import os.path as osp | import os.path as osp | ||||
| import cv2 | import cv2 | ||||
| @@ -37,9 +37,7 @@ class OfaForAllTasks(TorchModel): | |||||
| def __init__(self, model_dir, *args, **kwargs): | def __init__(self, model_dir, *args, **kwargs): | ||||
| super().__init__(model_dir=model_dir, *args, **kwargs) | super().__init__(model_dir=model_dir, *args, **kwargs) | ||||
| sd = torch.load(osp.join(model_dir, ModelFile.TORCH_MODEL_BIN_FILE)) | |||||
| sd = sd if 'meta' not in sd else sd['state_dict'] | |||||
| model = OFAModel.from_pretrained(model_dir, state_dict=sd) | |||||
| model = OFAModel.from_pretrained(model_dir) | |||||
| self.cfg = Config.from_file( | self.cfg = Config.from_file( | ||||
| osp.join(model_dir, ModelFile.CONFIGURATION)) | osp.join(model_dir, ModelFile.CONFIGURATION)) | ||||
| self.model = model.module if hasattr(model, 'module') else model | self.model = model.module if hasattr(model, 'module') else model | ||||
| @@ -44,44 +44,40 @@ def format_list(para) -> List: | |||||
| return para | return para | ||||
| class MsIterableDataset(torch.utils.data.IterableDataset): | |||||
| class MsMapDataset(torch.utils.data.Dataset): | |||||
| def __init__(self, dataset: Iterable, preprocessor_list, retained_columns, | def __init__(self, dataset: Iterable, preprocessor_list, retained_columns, | ||||
| columns): | |||||
| super(MsIterableDataset).__init__() | |||||
| columns, to_tensor): | |||||
| super(MsDataset).__init__() | |||||
| self.dataset = dataset | self.dataset = dataset | ||||
| self.preprocessor_list = preprocessor_list | self.preprocessor_list = preprocessor_list | ||||
| self.to_tensor = to_tensor | |||||
| self.retained_columns = retained_columns | self.retained_columns = retained_columns | ||||
| self.columns = columns | self.columns = columns | ||||
| def __len__(self): | def __len__(self): | ||||
| return len(self.dataset) | return len(self.dataset) | ||||
| def __iter__(self): | |||||
| worker_info = torch.utils.data.get_worker_info() | |||||
| if worker_info is None: # single-process data loading | |||||
| iter_start = 0 | |||||
| iter_end = len(self.dataset) | |||||
| else: # in a worker process | |||||
| per_worker = math.ceil( | |||||
| len(self.dataset) / float(worker_info.num_workers)) | |||||
| worker_id = worker_info.id | |||||
| iter_start = worker_id * per_worker | |||||
| iter_end = min(iter_start + per_worker, len(self.dataset)) | |||||
| for idx in range(iter_start, iter_end): | |||||
| item_dict = self.dataset[idx] | |||||
| res = { | |||||
| k: torch.tensor(item_dict[k]) | |||||
| for k in self.columns if k in self.retained_columns | |||||
| } | |||||
| for preprocessor in self.preprocessor_list: | |||||
| res.update({ | |||||
| k: v # k: torch.tensor(v) | |||||
| for k, v in preprocessor(item_dict).items() | |||||
| if k in self.retained_columns | |||||
| }) | |||||
| yield res | |||||
| def type_converter(self, x): | |||||
| if self.to_tensor: | |||||
| return torch.tensor(x) | |||||
| else: | |||||
| return x | |||||
| def __getitem__(self, index): | |||||
| item_dict = self.dataset[index] | |||||
| res = { | |||||
| k: self.type_converter(item_dict[k]) | |||||
| for k in self.columns | |||||
| if (not self.to_tensor) or k in self.retained_columns | |||||
| } | |||||
| for preprocessor in self.preprocessor_list: | |||||
| res.update({ | |||||
| k: self.type_converter(v) | |||||
| for k, v in preprocessor(item_dict).items() | |||||
| if (not self.to_tensor) or k in self.retained_columns | |||||
| }) | |||||
| return res | |||||
| class MsDataset: | class MsDataset: | ||||
| @@ -341,6 +337,7 @@ class MsDataset: | |||||
| self, | self, | ||||
| preprocessors: Union[Callable, List[Callable]], | preprocessors: Union[Callable, List[Callable]], | ||||
| columns: Union[str, List[str]] = None, | columns: Union[str, List[str]] = None, | ||||
| to_tensor: bool = True, | |||||
| ): | ): | ||||
| preprocessor_list = preprocessors if isinstance( | preprocessor_list = preprocessors if isinstance( | ||||
| preprocessors, list) else [preprocessors] | preprocessors, list) else [preprocessors] | ||||
| @@ -350,29 +347,29 @@ class MsDataset: | |||||
| columns = [ | columns = [ | ||||
| key for key in self._hf_ds.features.keys() if key in columns | key for key in self._hf_ds.features.keys() if key in columns | ||||
| ] | ] | ||||
| sample = next(iter(self._hf_ds)) | |||||
| retained_columns = [] | |||||
| if to_tensor: | |||||
| sample = next(iter(self._hf_ds)) | |||||
| sample_res = {k: np.array(sample[k]) for k in columns} | |||||
| for processor in preprocessor_list: | |||||
| sample_res.update( | |||||
| {k: np.array(v) | |||||
| for k, v in processor(sample).items()}) | |||||
| sample_res = {k: np.array(sample[k]) for k in columns} | |||||
| for processor in preprocessor_list: | |||||
| sample_res.update( | |||||
| {k: np.array(v) | |||||
| for k, v in processor(sample).items()}) | |||||
| def is_numpy_number(value): | |||||
| return np.issubdtype(value.dtype, np.integer) or np.issubdtype( | |||||
| value.dtype, np.floating) or np.issubdtype( | |||||
| value.dtype, np.bool) | |||||
| def is_numpy_number(value): | |||||
| return np.issubdtype(value.dtype, np.integer) or np.issubdtype( | |||||
| value.dtype, np.floating) | |||||
| retained_columns = [] | |||||
| for k in sample_res.keys(): | |||||
| if not is_numpy_number(sample_res[k]): | |||||
| logger.warning( | |||||
| f'Data of column {k} is non-numeric, will be removed') | |||||
| # continue | |||||
| retained_columns.append(k) | |||||
| for k in sample_res.keys(): | |||||
| if not is_numpy_number(sample_res[k]): | |||||
| logger.warning( | |||||
| f'Data of column {k} is non-numeric, will be removed') | |||||
| continue | |||||
| retained_columns.append(k) | |||||
| return MsIterableDataset(self._hf_ds, preprocessor_list, | |||||
| retained_columns, columns) | |||||
| return MsMapDataset(self._hf_ds, preprocessor_list, retained_columns, | |||||
| columns, to_tensor) | |||||
| def to_torch_dataset( | def to_torch_dataset( | ||||
| self, | self, | ||||
| @@ -380,6 +377,7 @@ class MsDataset: | |||||
| preprocessors: Union[Callable, List[Callable]] = None, | preprocessors: Union[Callable, List[Callable]] = None, | ||||
| task_name: str = None, | task_name: str = None, | ||||
| task_data_config: ConfigDict = None, | task_data_config: ConfigDict = None, | ||||
| to_tensor: bool = True, | |||||
| **format_kwargs, | **format_kwargs, | ||||
| ): | ): | ||||
| """Create a torch.utils.data.Dataset from the MS Dataset. The torch.utils.data.Dataset can be passed to | """Create a torch.utils.data.Dataset from the MS Dataset. The torch.utils.data.Dataset can be passed to | ||||
| @@ -387,13 +385,14 @@ class MsDataset: | |||||
| Args: | Args: | ||||
| preprocessors (Callable or List[Callable], default None): (list of) Preprocessor object used to process | preprocessors (Callable or List[Callable], default None): (list of) Preprocessor object used to process | ||||
| every sample of the dataset. The output type of processors is dict, and each numeric field of the dict | |||||
| every sample of the dataset. The output type of processors is dict, and each (numeric) field of the dict | |||||
| will be used as a field of torch.utils.data.Dataset. | will be used as a field of torch.utils.data.Dataset. | ||||
| columns (str or List[str], default None): Dataset column(s) to be loaded (numeric data only). If the | |||||
| preprocessor is None, the arg columns must have at least one column. If the `preprocessors` is not None, | |||||
| the output fields of processors will also be added. | |||||
| columns (str or List[str], default None): Dataset column(s) to be loaded (numeric data only if | |||||
| `to_tensor` is True). If the preprocessor is None, the arg columns must have at least one column. | |||||
| If the `preprocessors` is not None, the output fields of processors will also be added. | |||||
| task_name (str, default None): task name, refer to :obj:`Tasks` for more details | task_name (str, default None): task name, refer to :obj:`Tasks` for more details | ||||
| task_data_config (ConfigDict, default None): config dict for model object. | task_data_config (ConfigDict, default None): config dict for model object. | ||||
| to_tensor (bool, default None): whether convert the data types of dataset column(s) to torch.tensor or not. | |||||
| format_kwargs: A `dict` of arguments to be passed to the `torch.tensor`. | format_kwargs: A `dict` of arguments to be passed to the `torch.tensor`. | ||||
| Returns: | Returns: | ||||
| @@ -410,7 +409,7 @@ class MsDataset: | |||||
| return build_task_dataset(task_data_config, task_name) | return build_task_dataset(task_data_config, task_name) | ||||
| if preprocessors is not None: | if preprocessors is not None: | ||||
| return self.to_torch_dataset_with_processors( | return self.to_torch_dataset_with_processors( | ||||
| preprocessors, columns=columns) | |||||
| preprocessors, columns=columns, to_tensor=to_tensor) | |||||
| else: | else: | ||||
| self._hf_ds.reset_format() | self._hf_ds.reset_format() | ||||
| self._hf_ds.set_format( | self._hf_ds.set_format( | ||||
| @@ -1,3 +1,5 @@ | |||||
| # Part of the implementation is borrowed and modified from MMDetection, publicly available at | |||||
| # https://github.com/open-mmlab/mmdetection/blob/master/mmdet/datasets/coco.py | |||||
| import os.path as osp | import os.path as osp | ||||
| import numpy as np | import numpy as np | ||||
| @@ -1 +1,2 @@ | |||||
| # Copyright (c) Alibaba, Inc. and its affiliates. | |||||
| from .movie_scene_segmentation_dataset import MovieSceneSegmentationDataset | from .movie_scene_segmentation_dataset import MovieSceneSegmentationDataset | ||||
| @@ -1,6 +1,5 @@ | |||||
| # --------------------------------------------------------------------------------------------------- | |||||
| # The implementation is built upon BaSSL, publicly available at https://github.com/kakaobrain/bassl | |||||
| # --------------------------------------------------------------------------------------------------- | |||||
| # The implementation here is modified based on BaSSL, | |||||
| # originally Apache 2.0 License and publicly available at https://github.com/kakaobrain/bassl | |||||
| import copy | import copy | ||||
| import os | import os | ||||
| import os.path as osp | import os.path as osp | ||||
| @@ -4,6 +4,9 @@ import io | |||||
| import wave | import wave | ||||
| from typing import Any, Dict | from typing import Any, Dict | ||||
| import numpy | |||||
| import soundfile as sf | |||||
| from modelscope.fileio import File | from modelscope.fileio import File | ||||
| from modelscope.metainfo import Pipelines | from modelscope.metainfo import Pipelines | ||||
| from modelscope.outputs import OutputKeys | from modelscope.outputs import OutputKeys | ||||
| @@ -37,7 +40,6 @@ class KWSFarfieldPipeline(Pipeline): | |||||
| self.model.eval() | self.model.eval() | ||||
| frame_size = self.INPUT_CHANNELS * self.SAMPLE_WIDTH | frame_size = self.INPUT_CHANNELS * self.SAMPLE_WIDTH | ||||
| self._nframe = self.model.size_in // frame_size | self._nframe = self.model.size_in // frame_size | ||||
| self.frame_count = 0 | |||||
| def preprocess(self, inputs: Input, **preprocess_params) -> Dict[str, Any]: | def preprocess(self, inputs: Input, **preprocess_params) -> Dict[str, Any]: | ||||
| if isinstance(inputs, bytes): | if isinstance(inputs, bytes): | ||||
| @@ -54,35 +56,36 @@ class KWSFarfieldPipeline(Pipeline): | |||||
| input_file = inputs['input_file'] | input_file = inputs['input_file'] | ||||
| if isinstance(input_file, str): | if isinstance(input_file, str): | ||||
| input_file = File.read(input_file) | input_file = File.read(input_file) | ||||
| if isinstance(input_file, bytes): | |||||
| input_file = io.BytesIO(input_file) | |||||
| self.frame_count = 0 | |||||
| frames, samplerate = sf.read(io.BytesIO(input_file), dtype='int16') | |||||
| if len(frames.shape) == 1: | |||||
| frames = numpy.stack((frames, frames, numpy.zeros_like(frames)), 1) | |||||
| kws_list = [] | kws_list = [] | ||||
| with wave.open(input_file, 'rb') as fin: | |||||
| if 'output_file' in inputs: | |||||
| with wave.open(inputs['output_file'], 'wb') as fout: | |||||
| fout.setframerate(self.SAMPLE_RATE) | |||||
| fout.setnchannels(self.OUTPUT_CHANNELS) | |||||
| fout.setsampwidth(self.SAMPLE_WIDTH) | |||||
| self._process(fin, kws_list, fout) | |||||
| else: | |||||
| self._process(fin, kws_list) | |||||
| if 'output_file' in inputs: | |||||
| with wave.open(inputs['output_file'], 'wb') as fout: | |||||
| fout.setframerate(self.SAMPLE_RATE) | |||||
| fout.setnchannels(self.OUTPUT_CHANNELS) | |||||
| fout.setsampwidth(self.SAMPLE_WIDTH) | |||||
| self._process(frames, kws_list, fout) | |||||
| else: | |||||
| self._process(frames, kws_list) | |||||
| return {OutputKeys.KWS_LIST: kws_list} | return {OutputKeys.KWS_LIST: kws_list} | ||||
| def _process(self, | def _process(self, | ||||
| fin: wave.Wave_read, | |||||
| frames: numpy.ndarray, | |||||
| kws_list, | kws_list, | ||||
| fout: wave.Wave_write = None): | fout: wave.Wave_write = None): | ||||
| data = fin.readframes(self._nframe) | |||||
| while len(data) >= self.model.size_in: | |||||
| self.frame_count += self._nframe | |||||
| for start_index in range(0, frames.shape[0], self._nframe): | |||||
| end_index = start_index + self._nframe | |||||
| if end_index > frames.shape[0]: | |||||
| end_index = frames.shape[0] | |||||
| data = frames[start_index:end_index, :].tobytes() | |||||
| result = self.model.forward_decode(data) | result = self.model.forward_decode(data) | ||||
| if fout: | if fout: | ||||
| fout.writeframes(result['pcm']) | fout.writeframes(result['pcm']) | ||||
| if 'kws' in result: | if 'kws' in result: | ||||
| result['kws']['offset'] += self.frame_count / self.SAMPLE_RATE | |||||
| result['kws']['offset'] += start_index / self.SAMPLE_RATE | |||||
| kws_list.append(result['kws']) | kws_list.append(result['kws']) | ||||
| data = fin.readframes(self._nframe) | |||||
| def postprocess(self, inputs: Dict[str, Any], **kwargs) -> Dict[str, Any]: | def postprocess(self, inputs: Dict[str, Any], **kwargs) -> Dict[str, Any]: | ||||
| return inputs | return inputs | ||||
| @@ -1,3 +1,5 @@ | |||||
| # Copyright (c) Alibaba, Inc. and its affiliates. | |||||
| import math | import math | ||||
| import os.path as osp | import os.path as osp | ||||
| from typing import Any, Dict | from typing import Any, Dict | ||||
| @@ -10,6 +10,7 @@ from modelscope.hub.snapshot_download import snapshot_download | |||||
| from modelscope.pipelines.util import is_official_hub_path | from modelscope.pipelines.util import is_official_hub_path | ||||
| from modelscope.utils.config import Config | from modelscope.utils.config import Config | ||||
| from modelscope.utils.constant import DEFAULT_MODEL_REVISION, ModelFile | from modelscope.utils.constant import DEFAULT_MODEL_REVISION, ModelFile | ||||
| from modelscope.utils.device import create_device | |||||
| class EasyCVPipeline(object): | class EasyCVPipeline(object): | ||||
| @@ -53,16 +54,19 @@ class EasyCVPipeline(object): | |||||
| ), f'Not find "{ModelFile.CONFIGURATION}" in model directory!' | ), f'Not find "{ModelFile.CONFIGURATION}" in model directory!' | ||||
| self.cfg = Config.from_file(self.config_file) | self.cfg = Config.from_file(self.config_file) | ||||
| self.predict_op = self._build_predict_op() | |||||
| if 'device' in kwargs: | |||||
| kwargs['device'] = create_device(kwargs['device']) | |||||
| self.predict_op = self._build_predict_op(**kwargs) | |||||
| def _build_predict_op(self): | |||||
| def _build_predict_op(self, **kwargs): | |||||
| """Build EasyCV predictor.""" | """Build EasyCV predictor.""" | ||||
| from easycv.predictors.builder import build_predictor | from easycv.predictors.builder import build_predictor | ||||
| easycv_config = self._to_easycv_config() | easycv_config = self._to_easycv_config() | ||||
| pipeline_op = build_predictor(self.cfg.pipeline.predictor_config, { | pipeline_op = build_predictor(self.cfg.pipeline.predictor_config, { | ||||
| 'model_path': self.model_path, | 'model_path': self.model_path, | ||||
| 'config_file': easycv_config | |||||
| 'config_file': easycv_config, | |||||
| **kwargs | |||||
| }) | }) | ||||
| return pipeline_op | return pipeline_op | ||||
| @@ -91,5 +95,4 @@ class EasyCVPipeline(object): | |||||
| return easycv_config | return easycv_config | ||||
| def __call__(self, inputs) -> Any: | def __call__(self, inputs) -> Any: | ||||
| # TODO: support image url | |||||
| return self.predict_op(inputs) | return self.predict_op(inputs) | ||||
| @@ -4,7 +4,6 @@ from typing import Any | |||||
| from modelscope.metainfo import Pipelines | from modelscope.metainfo import Pipelines | ||||
| from modelscope.outputs import OutputKeys | from modelscope.outputs import OutputKeys | ||||
| from modelscope.pipelines.builder import PIPELINES | from modelscope.pipelines.builder import PIPELINES | ||||
| from modelscope.preprocessors import LoadImage | |||||
| from modelscope.utils.constant import ModelFile, Tasks | from modelscope.utils.constant import ModelFile, Tasks | ||||
| from .base import EasyCVPipeline | from .base import EasyCVPipeline | ||||
| @@ -34,8 +33,11 @@ class Face2DKeypointsPipeline(EasyCVPipeline): | |||||
| return self.predict_op.show_result(img, points, scale, save_path) | return self.predict_op.show_result(img, points, scale, save_path) | ||||
| def __call__(self, inputs) -> Any: | def __call__(self, inputs) -> Any: | ||||
| output = self.predict_op(inputs)[0][0] | |||||
| points = output['point'] | |||||
| poses = output['pose'] | |||||
| outputs = self.predict_op(inputs) | |||||
| return {OutputKeys.KEYPOINTS: points, OutputKeys.POSES: poses} | |||||
| results = [{ | |||||
| OutputKeys.KEYPOINTS: output['point'], | |||||
| OutputKeys.POSES: output['pose'] | |||||
| } for output in outputs] | |||||
| return results | |||||
| @@ -28,7 +28,7 @@ class Hand2DKeypointsPipeline(EasyCVPipeline): | |||||
| *args, | *args, | ||||
| **kwargs) | **kwargs) | ||||
| def _build_predict_op(self): | |||||
| def _build_predict_op(self, **kwargs): | |||||
| """Build EasyCV predictor.""" | """Build EasyCV predictor.""" | ||||
| from easycv.predictors.builder import build_predictor | from easycv.predictors.builder import build_predictor | ||||
| detection_predictor_type = self.cfg['DETECTION']['type'] | detection_predictor_type = self.cfg['DETECTION']['type'] | ||||
| @@ -46,6 +46,7 @@ class Hand2DKeypointsPipeline(EasyCVPipeline): | |||||
| easycv_config = self._to_easycv_config() | easycv_config = self._to_easycv_config() | ||||
| pipeline_op = build_predictor(self.cfg.pipeline.predictor_config, { | pipeline_op = build_predictor(self.cfg.pipeline.predictor_config, { | ||||
| 'model_path': self.model_path, | 'model_path': self.model_path, | ||||
| 'config_file': easycv_config | |||||
| 'config_file': easycv_config, | |||||
| **kwargs | |||||
| }) | }) | ||||
| return pipeline_op | return pipeline_op | ||||
| @@ -1,3 +1,4 @@ | |||||
| # Copyright (c) Alibaba, Inc. and its affiliates. | |||||
| import os | import os | ||||
| from typing import Any, Dict, Optional, Union | from typing import Any, Dict, Optional, Union | ||||
| @@ -61,7 +61,13 @@ class ImageStyleTransferPipeline(Pipeline): | |||||
| def _sanitize_parameters(self, **pipeline_parameters): | def _sanitize_parameters(self, **pipeline_parameters): | ||||
| return pipeline_parameters, {}, {} | return pipeline_parameters, {}, {} | ||||
| def preprocess(self, content: Input, style: Input) -> Dict[str, Any]: | |||||
| def preprocess(self, | |||||
| content: Input, | |||||
| style: Input = None) -> Dict[str, Any]: | |||||
| if type(content) is dict: # for demo service | |||||
| style = content['style'] | |||||
| content = content['content'] | |||||
| content = LoadImage.convert_to_ndarray(content) | content = LoadImage.convert_to_ndarray(content) | ||||
| if len(content.shape) == 2: | if len(content.shape) == 2: | ||||
| content = cv2.cvtColor(content, cv2.COLOR_GRAY2BGR) | content = cv2.cvtColor(content, cv2.COLOR_GRAY2BGR) | ||||
| @@ -1,3 +1,4 @@ | |||||
| # Copyright (c) Alibaba, Inc. and its affiliates. | |||||
| from typing import Any, Dict | from typing import Any, Dict | ||||
| import torch | import torch | ||||
| @@ -1,3 +1,4 @@ | |||||
| # Copyright (c) Alibaba, Inc. and its affiliates. | |||||
| from typing import TYPE_CHECKING | from typing import TYPE_CHECKING | ||||
| from modelscope.utils.import_utils import LazyImportModule | from modelscope.utils.import_utils import LazyImportModule | ||||
| @@ -1,9 +1,5 @@ | |||||
| # ------------------------------------------------------------------------------------ | |||||
| # The codes below partially refer to the BaSSL | |||||
| # Copyright (c) 2021 KakaoBrain. All Rights Reserved. | |||||
| # Licensed under the Apache License, Version 2.0 [see LICENSE for details] | |||||
| # Github: https://github.com/kakaobrain/bassl | |||||
| # ------------------------------------------------------------------------------------ | |||||
| # The implementation here is modified based on BaSSL, | |||||
| # originally Apache 2.0 License and publicly avaialbe at https://github.com/kakaobrain/bassl | |||||
| import numbers | import numbers | ||||
| import os.path as osp | import os.path as osp | ||||
| import random | import random | ||||
| @@ -186,7 +186,8 @@ class MPlugPreprocessor(Preprocessor): | |||||
| image = image.convert('RGB') | image = image.convert('RGB') | ||||
| image = self.patch_resize_transform(image) | image = self.patch_resize_transform(image) | ||||
| question = '' if self.cfg.task == Tasks.image_captioning \ | question = '' if self.cfg.task == Tasks.image_captioning \ | ||||
| else data[1 if isinstance(data, tuple) else 'question'] | |||||
| else data[1 if isinstance(data, tuple) | |||||
| else ('text' if 'text' in data else 'question')] | |||||
| question = self.tokenizer( | question = self.tokenizer( | ||||
| question.lower(), | question.lower(), | ||||
| padding='max_length', | padding='max_length', | ||||
| @@ -1,3 +1,4 @@ | |||||
| # Copyright (c) Alibaba, Inc. and its affiliates. | |||||
| from modelscope.metainfo import Trainers | from modelscope.metainfo import Trainers | ||||
| from modelscope.trainers.builder import TRAINERS | from modelscope.trainers.builder import TRAINERS | ||||
| from modelscope.trainers.trainer import EpochBasedTrainer | from modelscope.trainers.trainer import EpochBasedTrainer | ||||
| @@ -1,3 +1,4 @@ | |||||
| # Copyright (c) Alibaba, Inc. and its affiliates. | |||||
| from modelscope.metainfo import Trainers | from modelscope.metainfo import Trainers | ||||
| from modelscope.trainers.builder import TRAINERS | from modelscope.trainers.builder import TRAINERS | ||||
| from modelscope.trainers.trainer import EpochBasedTrainer | from modelscope.trainers.trainer import EpochBasedTrainer | ||||
| @@ -37,8 +37,8 @@ from modelscope.utils.device import create_device, verify_device | |||||
| from modelscope.utils.file_utils import func_receive_dict_inputs | from modelscope.utils.file_utils import func_receive_dict_inputs | ||||
| from modelscope.utils.logger import get_logger | from modelscope.utils.logger import get_logger | ||||
| from modelscope.utils.registry import build_from_cfg | from modelscope.utils.registry import build_from_cfg | ||||
| from modelscope.utils.torch_utils import (get_dist_info, init_dist, | |||||
| set_random_seed) | |||||
| from modelscope.utils.torch_utils import (get_dist_info, get_local_rank, | |||||
| init_dist, set_random_seed) | |||||
| from .base import BaseTrainer | from .base import BaseTrainer | ||||
| from .builder import TRAINERS | from .builder import TRAINERS | ||||
| from .default_config import DEFAULT_CONFIG | from .default_config import DEFAULT_CONFIG | ||||
| @@ -155,8 +155,17 @@ class EpochBasedTrainer(BaseTrainer): | |||||
| if self.eval_preprocessor is not None: | if self.eval_preprocessor is not None: | ||||
| self.eval_preprocessor.mode = ModeKeys.EVAL | self.eval_preprocessor.mode = ModeKeys.EVAL | ||||
| if kwargs.get('launcher', None) is not None: | |||||
| init_dist(kwargs['launcher']) | |||||
| _, world_size = get_dist_info() | |||||
| self._dist = world_size > 1 | |||||
| device_name = kwargs.get('device', 'gpu') | device_name = kwargs.get('device', 'gpu') | ||||
| verify_device(device_name) | |||||
| if self._dist: | |||||
| local_rank = get_local_rank() | |||||
| device_name = f'cuda:{local_rank}' | |||||
| self.device = create_device(device_name) | self.device = create_device(device_name) | ||||
| self.train_dataset = self.to_task_dataset( | self.train_dataset = self.to_task_dataset( | ||||
| @@ -219,11 +228,6 @@ class EpochBasedTrainer(BaseTrainer): | |||||
| self.use_fp16 = kwargs.get('use_fp16', False) | self.use_fp16 = kwargs.get('use_fp16', False) | ||||
| if kwargs.get('launcher', None) is not None: | |||||
| init_dist(kwargs['launcher']) | |||||
| self._dist = get_dist_info()[1] > 1 | |||||
| # model placement | # model placement | ||||
| if self.device.type == 'cuda': | if self.device.type == 'cuda': | ||||
| self.model.to(self.device) | self.model.to(self.device) | ||||
| @@ -532,8 +536,14 @@ class EpochBasedTrainer(BaseTrainer): | |||||
| model.train() | model.train() | ||||
| self._mode = ModeKeys.TRAIN | self._mode = ModeKeys.TRAIN | ||||
| # call model forward but not __call__ to skip postprocess | # call model forward but not __call__ to skip postprocess | ||||
| if isinstance(inputs, | |||||
| Mapping) and not func_receive_dict_inputs(model.forward): | |||||
| if is_parallel(model): | |||||
| receive_dict_inputs = func_receive_dict_inputs( | |||||
| model.module.forward) | |||||
| else: | |||||
| receive_dict_inputs = func_receive_dict_inputs(model.forward) | |||||
| if isinstance(inputs, Mapping) and not receive_dict_inputs: | |||||
| train_outputs = model.forward(**inputs) | train_outputs = model.forward(**inputs) | ||||
| else: | else: | ||||
| train_outputs = model.forward(inputs) | train_outputs = model.forward(inputs) | ||||
| @@ -9,9 +9,9 @@ from collections.abc import Mapping | |||||
| import torch | import torch | ||||
| from torch import distributed as dist | from torch import distributed as dist | ||||
| from torch.nn.parallel import DistributedDataParallel | |||||
| from tqdm import tqdm | from tqdm import tqdm | ||||
| from modelscope.trainers.parallel.utils import is_parallel | |||||
| from modelscope.utils.data_utils import to_device | from modelscope.utils.data_utils import to_device | ||||
| from modelscope.utils.file_utils import func_receive_dict_inputs | from modelscope.utils.file_utils import func_receive_dict_inputs | ||||
| from modelscope.utils.torch_utils import (broadcast, get_dist_info, is_master, | from modelscope.utils.torch_utils import (broadcast, get_dist_info, is_master, | ||||
| @@ -138,7 +138,10 @@ def multi_gpu_test(model, | |||||
| data_len = data_loader_iters_per_gpu * world_size | data_len = data_loader_iters_per_gpu * world_size | ||||
| desc = 'Total test iterations with multi gpus' | desc = 'Total test iterations with multi gpus' | ||||
| time.sleep(2) # This line can prevent deadlock problem in some cases. | |||||
| if is_parallel(model): | |||||
| receive_dict_inputs = func_receive_dict_inputs(model.module.forward) | |||||
| else: | |||||
| receive_dict_inputs = func_receive_dict_inputs(model.forward) | |||||
| count = 0 | count = 0 | ||||
| with tqdm(total=data_len, desc=desc) as pbar: | with tqdm(total=data_len, desc=desc) as pbar: | ||||
| @@ -146,10 +149,7 @@ def multi_gpu_test(model, | |||||
| data = to_device(data, device) | data = to_device(data, device) | ||||
| data_list.append(data) | data_list.append(data) | ||||
| with torch.no_grad(): | with torch.no_grad(): | ||||
| forward_func = model.module.forward if \ | |||||
| isinstance(model, DistributedDataParallel) else model.forward | |||||
| if isinstance(data, Mapping | |||||
| ) and not func_receive_dict_inputs(forward_func): | |||||
| if isinstance(data, Mapping) and not receive_dict_inputs: | |||||
| result = model.forward(**data) | result = model.forward(**data) | ||||
| else: | else: | ||||
| result = model.forward(data) | result = model.forward(data) | ||||
| @@ -123,7 +123,7 @@ INPUT_EXAMPLES = { | |||||
| 'urlPaths': { | 'urlPaths': { | ||||
| 'outUrls': [{ | 'outUrls': [{ | ||||
| 'outputKey': OutputKeys.OUTPUT_PCM, | 'outputKey': OutputKeys.OUTPUT_PCM, | ||||
| 'fileType': 'wav' | |||||
| 'fileType': 'pcm' | |||||
| }] | }] | ||||
| } | } | ||||
| }, | }, | ||||
| @@ -134,7 +134,7 @@ INPUT_EXAMPLES = { | |||||
| 'urlPaths': { | 'urlPaths': { | ||||
| 'outUrls': [{ | 'outUrls': [{ | ||||
| 'outputKey': OutputKeys.OUTPUT_PCM, | 'outputKey': OutputKeys.OUTPUT_PCM, | ||||
| 'fileType': 'wav' | |||||
| 'fileType': 'pcm' | |||||
| }] | }] | ||||
| } | } | ||||
| }, | }, | ||||
| @@ -147,7 +147,13 @@ INPUT_EXAMPLES = { | |||||
| 'http://xingchen-data.oss-cn-zhangjiakou.aliyuncs.com/maas/visual-grounding/visual_grounding.png', | 'http://xingchen-data.oss-cn-zhangjiakou.aliyuncs.com/maas/visual-grounding/visual_grounding.png', | ||||
| 'a blue turtle-like pokemon with round head' | 'a blue turtle-like pokemon with round head' | ||||
| ], | ], | ||||
| 'urlPaths': {} | |||||
| 'urlPaths': { | |||||
| 'inUrls': [{ | |||||
| 'name': 'image' | |||||
| }, { | |||||
| 'name': 'text' | |||||
| }] | |||||
| } | |||||
| }, | }, | ||||
| TasksIODescriptions.visual_question_answering: { | TasksIODescriptions.visual_question_answering: { | ||||
| 'task': | 'task': | ||||
| @@ -156,7 +162,16 @@ INPUT_EXAMPLES = { | |||||
| 'http://225252-file.oss-cn-hangzhou-zmf.aliyuncs.com/maas_demo/visual_question_answering.png', | 'http://225252-file.oss-cn-hangzhou-zmf.aliyuncs.com/maas_demo/visual_question_answering.png', | ||||
| 'what is grown on the plant?' | 'what is grown on the plant?' | ||||
| ], | ], | ||||
| 'urlPaths': {} | |||||
| 'urlPaths': { | |||||
| 'inUrls': [{ | |||||
| 'name': 'image' | |||||
| }, { | |||||
| 'name': 'text' | |||||
| }], | |||||
| 'outUrls': [{ | |||||
| 'outputKey': 'text' | |||||
| }] | |||||
| } | |||||
| }, | }, | ||||
| TasksIODescriptions.visual_entailment: { | TasksIODescriptions.visual_entailment: { | ||||
| 'task': | 'task': | ||||
| @@ -165,7 +180,14 @@ INPUT_EXAMPLES = { | |||||
| 'http://xingchen-data.oss-cn-zhangjiakou.aliyuncs.com/maas/visual-entailment/visual_entailment.jpg', | 'http://xingchen-data.oss-cn-zhangjiakou.aliyuncs.com/maas/visual-entailment/visual_entailment.jpg', | ||||
| 'there are two birds.', 'test' | 'there are two birds.', 'test' | ||||
| ], | ], | ||||
| 'urlPaths': {} | |||||
| 'urlPaths': { | |||||
| 'inUrls': [{ | |||||
| 'name': 'image' | |||||
| }, { | |||||
| 'name': 'text' | |||||
| }], | |||||
| 'outUrls': [{}] | |||||
| } | |||||
| }, | }, | ||||
| TasksIODescriptions.generative_multi_modal_embedding: { | TasksIODescriptions.generative_multi_modal_embedding: { | ||||
| 'task': | 'task': | ||||
| @@ -174,7 +196,14 @@ INPUT_EXAMPLES = { | |||||
| 'http://clip-multimodal.oss-cn-beijing.aliyuncs.com/lingchen/demo/dogs.jpg', | 'http://clip-multimodal.oss-cn-beijing.aliyuncs.com/lingchen/demo/dogs.jpg', | ||||
| 'dogs playing in the grass' | 'dogs playing in the grass' | ||||
| ], | ], | ||||
| 'urlPaths': {} | |||||
| 'urlPaths': { | |||||
| 'inUrls': [{ | |||||
| 'name': 'image' | |||||
| }, { | |||||
| 'name': 'text' | |||||
| }], | |||||
| 'outUrls': [{}] | |||||
| } | |||||
| }, | }, | ||||
| } | } | ||||
| @@ -192,7 +221,13 @@ class DemoCompatibilityCheck(object): | |||||
| print('testing demo: ', self.task, self.model_id) | print('testing demo: ', self.task, self.model_id) | ||||
| test_pipline = pipeline(self.task, self.model_id) | test_pipline = pipeline(self.task, self.model_id) | ||||
| req = INPUT_EXAMPLES[TASKS_INPUT_TEMPLATES[self.task]] | req = INPUT_EXAMPLES[TASKS_INPUT_TEMPLATES[self.task]] | ||||
| output = test_pipline(preprocess(req)) | |||||
| inputs = preprocess(req) | |||||
| params = req.get('parameters', {}) | |||||
| # modelscope inference | |||||
| if params != {}: | |||||
| output = test_pipline(inputs, **params) | |||||
| else: | |||||
| output = test_pipline(inputs) | |||||
| json.dumps(output, cls=NumpyEncoder) | json.dumps(output, cls=NumpyEncoder) | ||||
| result = postprocess(req, output) | result = postprocess(req, output) | ||||
| print(result) | print(result) | ||||
| @@ -215,11 +250,21 @@ class NumpyEncoder(json.JSONEncoder): | |||||
| def preprocess(req): | def preprocess(req): | ||||
| in_urls = req.get('urlPaths').get('inUrls') | |||||
| if len(req['inputs']) == 1: | if len(req['inputs']) == 1: | ||||
| inputs = req['inputs'][0] | inputs = req['inputs'][0] | ||||
| else: | else: | ||||
| inputs = tuple(req['inputs']) | inputs = tuple(req['inputs']) | ||||
| return inputs | |||||
| if in_urls is None or len(in_urls) == 0: | |||||
| return inputs | |||||
| inputs_dict = {} | |||||
| for i, in_url in enumerate(in_urls): | |||||
| input_name = in_url.get('name') | |||||
| if input_name is None or input_name == '': | |||||
| return inputs | |||||
| inputs_dict[input_name] = req['inputs'][i] | |||||
| return inputs_dict | |||||
| def postprocess(req, resp): | def postprocess(req, resp): | ||||
| @@ -242,4 +287,3 @@ def postprocess(req, resp): | |||||
| out_mem_file = io.BytesIO() | out_mem_file = io.BytesIO() | ||||
| out_mem_file.write(new_resp.get(output_key)) | out_mem_file.write(new_resp.get(output_key)) | ||||
| return type(out_mem_file) | return type(out_mem_file) | ||||
| # TODO(lingcai.wl): support more file type | |||||
| @@ -115,6 +115,10 @@ def get_dist_info() -> Tuple[int, int]: | |||||
| return rank, world_size | return rank, world_size | ||||
| def get_local_rank(): | |||||
| return int(os.environ.get('LOCAL_RANK', 0)) | |||||
| def is_master(): | def is_master(): | ||||
| rank, _ = get_dist_info() | rank, _ = get_dist_info() | ||||
| return rank == 0 | return rank == 0 | ||||
| @@ -1 +1 @@ | |||||
| __version__ = '0.4.3' | |||||
| __version__ = '0.4.4' | |||||
| @@ -14,7 +14,7 @@ mmcls>=0.21.0 | |||||
| mmdet>=2.25.0 | mmdet>=2.25.0 | ||||
| networkx>=2.5 | networkx>=2.5 | ||||
| onnxruntime>=1.10 | onnxruntime>=1.10 | ||||
| pai-easycv>=0.6.0 | |||||
| pai-easycv>=0.6.3.4 | |||||
| pandas | pandas | ||||
| psutil | psutil | ||||
| regex | regex | ||||
| @@ -1,10 +1,11 @@ | |||||
| # Copyright (c) Alibaba, Inc. and its affiliates. | # Copyright (c) Alibaba, Inc. and its affiliates. | ||||
| import unittest | import unittest | ||||
| from distutils.version import LooseVersion | |||||
| import easycv | |||||
| import numpy as np | import numpy as np | ||||
| from PIL import Image | from PIL import Image | ||||
| from modelscope.metainfo import Pipelines | |||||
| from modelscope.pipelines import pipeline | from modelscope.pipelines import pipeline | ||||
| from modelscope.utils.constant import Tasks | from modelscope.utils.constant import Tasks | ||||
| from modelscope.utils.test_utils import test_level | from modelscope.utils.test_utils import test_level | ||||
| @@ -14,7 +15,7 @@ class EasyCVSegmentationPipelineTest(unittest.TestCase): | |||||
| img_path = 'data/test/images/image_segmentation.jpg' | img_path = 'data/test/images/image_segmentation.jpg' | ||||
| def _internal_test__(self, model_id): | |||||
| def _internal_test_(self, model_id): | |||||
| img = np.asarray(Image.open(self.img_path)) | img = np.asarray(Image.open(self.img_path)) | ||||
| semantic_seg = pipeline(task=Tasks.image_segmentation, model=model_id) | semantic_seg = pipeline(task=Tasks.image_segmentation, model=model_id) | ||||
| @@ -24,41 +25,61 @@ class EasyCVSegmentationPipelineTest(unittest.TestCase): | |||||
| results = outputs[0] | results = outputs[0] | ||||
| self.assertListEqual( | self.assertListEqual( | ||||
| list(img.shape)[:2], list(results['seg_pred'][0].shape)) | |||||
| self.assertListEqual(results['seg_pred'][0][1, 4:10].tolist(), | |||||
| [161 for i in range(6)]) | |||||
| self.assertListEqual(results['seg_pred'][0][-1, -10:].tolist(), | |||||
| [133 for i in range(10)]) | |||||
| list(img.shape)[:2], list(results['seg_pred'].shape)) | |||||
| @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') | |||||
| def _internal_test_batch_(self, model_id, num_samples=2, batch_size=2): | |||||
| # TODO: support in the future | |||||
| img = np.asarray(Image.open(self.img_path)) | |||||
| num_samples = num_samples | |||||
| batch_size = batch_size | |||||
| semantic_seg = pipeline( | |||||
| task=Tasks.image_segmentation, | |||||
| model=model_id, | |||||
| batch_size=batch_size) | |||||
| outputs = semantic_seg([self.img_path] * num_samples) | |||||
| self.assertEqual(semantic_seg.predict_op.batch_size, batch_size) | |||||
| self.assertEqual(len(outputs), num_samples) | |||||
| for output in outputs: | |||||
| self.assertListEqual( | |||||
| list(img.shape)[:2], list(output['seg_pred'].shape)) | |||||
| @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') | |||||
| def test_segformer_b0(self): | def test_segformer_b0(self): | ||||
| model_id = 'damo/cv_segformer-b0_image_semantic-segmentation_coco-stuff164k' | model_id = 'damo/cv_segformer-b0_image_semantic-segmentation_coco-stuff164k' | ||||
| self._internal_test__(model_id) | |||||
| self._internal_test_(model_id) | |||||
| self._internal_test_batch_(model_id) | |||||
| @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') | |||||
| @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') | |||||
| def test_segformer_b1(self): | def test_segformer_b1(self): | ||||
| model_id = 'damo/cv_segformer-b1_image_semantic-segmentation_coco-stuff164k' | model_id = 'damo/cv_segformer-b1_image_semantic-segmentation_coco-stuff164k' | ||||
| self._internal_test__(model_id) | |||||
| self._internal_test_(model_id) | |||||
| self._internal_test_batch_(model_id) | |||||
| @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') | |||||
| @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') | |||||
| def test_segformer_b2(self): | def test_segformer_b2(self): | ||||
| model_id = 'damo/cv_segformer-b2_image_semantic-segmentation_coco-stuff164k' | model_id = 'damo/cv_segformer-b2_image_semantic-segmentation_coco-stuff164k' | ||||
| self._internal_test__(model_id) | |||||
| self._internal_test_(model_id) | |||||
| self._internal_test_batch_(model_id) | |||||
| @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') | |||||
| @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') | |||||
| def test_segformer_b3(self): | def test_segformer_b3(self): | ||||
| model_id = 'damo/cv_segformer-b3_image_semantic-segmentation_coco-stuff164k' | model_id = 'damo/cv_segformer-b3_image_semantic-segmentation_coco-stuff164k' | ||||
| self._internal_test__(model_id) | |||||
| self._internal_test_(model_id) | |||||
| self._internal_test_batch_(model_id) | |||||
| @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') | |||||
| @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') | |||||
| def test_segformer_b4(self): | def test_segformer_b4(self): | ||||
| model_id = 'damo/cv_segformer-b4_image_semantic-segmentation_coco-stuff164k' | model_id = 'damo/cv_segformer-b4_image_semantic-segmentation_coco-stuff164k' | ||||
| self._internal_test__(model_id) | |||||
| self._internal_test_(model_id) | |||||
| self._internal_test_batch_(model_id) | |||||
| @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') | |||||
| @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') | |||||
| def test_segformer_b5(self): | def test_segformer_b5(self): | ||||
| model_id = 'damo/cv_segformer-b5_image_semantic-segmentation_coco-stuff164k' | model_id = 'damo/cv_segformer-b5_image_semantic-segmentation_coco-stuff164k' | ||||
| self._internal_test__(model_id) | |||||
| self._internal_test_(model_id) | |||||
| self._internal_test_batch_(model_id) | |||||
| if __name__ == '__main__': | if __name__ == '__main__': | ||||
| @@ -18,7 +18,7 @@ class EasyCVFace2DKeypointsPipelineTest(unittest.TestCase): | |||||
| face_2d_keypoints_align = pipeline( | face_2d_keypoints_align = pipeline( | ||||
| task=Tasks.face_2d_keypoints, model=model_id) | task=Tasks.face_2d_keypoints, model=model_id) | ||||
| output = face_2d_keypoints_align(img_path) | |||||
| output = face_2d_keypoints_align(img_path)[0] | |||||
| output_keypoints = output[OutputKeys.KEYPOINTS] | output_keypoints = output[OutputKeys.KEYPOINTS] | ||||
| output_pose = output[OutputKeys.POSES] | output_pose = output[OutputKeys.POSES] | ||||
| @@ -8,6 +8,7 @@ from modelscope.utils.constant import Tasks | |||||
| from modelscope.utils.test_utils import test_level | from modelscope.utils.test_utils import test_level | ||||
| TEST_SPEECH_FILE = 'data/test/audios/3ch_nihaomiya.wav' | TEST_SPEECH_FILE = 'data/test/audios/3ch_nihaomiya.wav' | ||||
| TEST_SPEECH_FILE_MONO = 'data/test/audios/1ch_nihaomiya.wav' | |||||
| TEST_SPEECH_URL = 'https://modelscope.cn/api/v1/models/damo/' \ | TEST_SPEECH_URL = 'https://modelscope.cn/api/v1/models/damo/' \ | ||||
| 'speech_dfsmn_kws_char_farfield_16k_nihaomiya/repo' \ | 'speech_dfsmn_kws_char_farfield_16k_nihaomiya/repo' \ | ||||
| '?Revision=master&FilePath=examples/3ch_nihaomiya.wav' | '?Revision=master&FilePath=examples/3ch_nihaomiya.wav' | ||||
| @@ -26,6 +27,16 @@ class KWSFarfieldTest(unittest.TestCase): | |||||
| self.assertEqual(len(result['kws_list']), 5) | self.assertEqual(len(result['kws_list']), 5) | ||||
| print(result['kws_list'][-1]) | print(result['kws_list'][-1]) | ||||
| @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') | |||||
| def test_mono(self): | |||||
| kws = pipeline(Tasks.keyword_spotting, model=self.model_id) | |||||
| inputs = { | |||||
| 'input_file': os.path.join(os.getcwd(), TEST_SPEECH_FILE_MONO) | |||||
| } | |||||
| result = kws(inputs) | |||||
| self.assertEqual(len(result['kws_list']), 5) | |||||
| print(result['kws_list'][-1]) | |||||
| @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') | @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') | ||||
| def test_url(self): | def test_url(self): | ||||
| kws = pipeline(Tasks.keyword_spotting, model=self.model_id) | kws = pipeline(Tasks.keyword_spotting, model=self.model_id) | ||||
| @@ -44,8 +44,8 @@ class MplugTasksTest(unittest.TestCase, DemoCompatibilityCheck): | |||||
| 'damo/mplug_visual-question-answering_coco_large_en') | 'damo/mplug_visual-question-answering_coco_large_en') | ||||
| pipeline_vqa = pipeline(Tasks.visual_question_answering, model=model) | pipeline_vqa = pipeline(Tasks.visual_question_answering, model=model) | ||||
| image = Image.open('data/test/images/image_mplug_vqa.jpg') | image = Image.open('data/test/images/image_mplug_vqa.jpg') | ||||
| question = 'What is the woman doing?' | |||||
| input = {'image': image, 'question': question} | |||||
| text = 'What is the woman doing?' | |||||
| input = {'image': image, 'text': text} | |||||
| result = pipeline_vqa(input) | result = pipeline_vqa(input) | ||||
| print(result) | print(result) | ||||
| @@ -54,8 +54,8 @@ class MplugTasksTest(unittest.TestCase, DemoCompatibilityCheck): | |||||
| model = 'damo/mplug_visual-question-answering_coco_large_en' | model = 'damo/mplug_visual-question-answering_coco_large_en' | ||||
| pipeline_vqa = pipeline(Tasks.visual_question_answering, model=model) | pipeline_vqa = pipeline(Tasks.visual_question_answering, model=model) | ||||
| image = Image.open('data/test/images/image_mplug_vqa.jpg') | image = Image.open('data/test/images/image_mplug_vqa.jpg') | ||||
| question = 'What is the woman doing?' | |||||
| input = {'image': image, 'question': question} | |||||
| text = 'What is the woman doing?' | |||||
| input = {'image': image, 'text': text} | |||||
| result = pipeline_vqa(input) | result = pipeline_vqa(input) | ||||
| print(result) | print(result) | ||||
| @@ -65,8 +65,8 @@ class MplugTasksTest(unittest.TestCase, DemoCompatibilityCheck): | |||||
| 'damo/mplug_image-text-retrieval_flickr30k_large_en') | 'damo/mplug_image-text-retrieval_flickr30k_large_en') | ||||
| pipeline_retrieval = pipeline(Tasks.image_text_retrieval, model=model) | pipeline_retrieval = pipeline(Tasks.image_text_retrieval, model=model) | ||||
| image = Image.open('data/test/images/image-text-retrieval.jpg') | image = Image.open('data/test/images/image-text-retrieval.jpg') | ||||
| question = 'Two young guys with shaggy hair look at their hands while hanging out in the yard.' | |||||
| input = {'image': image, 'question': question} | |||||
| text = 'Two young guys with shaggy hair look at their hands while hanging out in the yard.' | |||||
| input = {'image': image, 'text': text} | |||||
| result = pipeline_retrieval(input) | result = pipeline_retrieval(input) | ||||
| print(result) | print(result) | ||||
| @@ -75,8 +75,8 @@ class MplugTasksTest(unittest.TestCase, DemoCompatibilityCheck): | |||||
| model = 'damo/mplug_image-text-retrieval_flickr30k_large_en' | model = 'damo/mplug_image-text-retrieval_flickr30k_large_en' | ||||
| pipeline_retrieval = pipeline(Tasks.image_text_retrieval, model=model) | pipeline_retrieval = pipeline(Tasks.image_text_retrieval, model=model) | ||||
| image = Image.open('data/test/images/image-text-retrieval.jpg') | image = Image.open('data/test/images/image-text-retrieval.jpg') | ||||
| question = 'Two young guys with shaggy hair look at their hands while hanging out in the yard.' | |||||
| input = {'image': image, 'question': question} | |||||
| text = 'Two young guys with shaggy hair look at their hands while hanging out in the yard.' | |||||
| input = {'image': image, 'text': text} | |||||
| result = pipeline_retrieval(input) | result = pipeline_retrieval(input) | ||||
| print(result) | print(result) | ||||
| @@ -147,8 +147,10 @@ class OfaTasksTest(unittest.TestCase, DemoCompatibilityCheck): | |||||
| result = ofa_pipe(input) | result = ofa_pipe(input) | ||||
| print(result) | print(result) | ||||
| image_name = image.split('/')[-2] | image_name = image.split('/')[-2] | ||||
| self.save_img(image, result[OutputKeys.BOXES], | |||||
| osp.join('large_en_model_' + image_name + '.png')) | |||||
| self.save_img( | |||||
| image, | |||||
| result[OutputKeys.BOXES][0], # just one box | |||||
| osp.join('large_en_model_' + image_name + '.png')) | |||||
| @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') | @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') | ||||
| def test_run_with_visual_grounding_with_name(self): | def test_run_with_visual_grounding_with_name(self): | ||||
| @@ -161,7 +163,7 @@ class OfaTasksTest(unittest.TestCase, DemoCompatibilityCheck): | |||||
| result = ofa_pipe(input) | result = ofa_pipe(input) | ||||
| print(result) | print(result) | ||||
| image_name = image.split('/')[-2] | image_name = image.split('/')[-2] | ||||
| self.save_img(image, result[OutputKeys.BOXES], | |||||
| self.save_img(image, result[OutputKeys.BOXES][0], | |||||
| osp.join('large_en_name_' + image_name + '.png')) | osp.join('large_en_name_' + image_name + '.png')) | ||||
| @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') | @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') | ||||
| @@ -174,7 +176,7 @@ class OfaTasksTest(unittest.TestCase, DemoCompatibilityCheck): | |||||
| result = ofa_pipe(input) | result = ofa_pipe(input) | ||||
| print(result) | print(result) | ||||
| image_name = image.split('/')[-1] | image_name = image.split('/')[-1] | ||||
| self.save_img(image, result[OutputKeys.BOXES], | |||||
| self.save_img(image, result[OutputKeys.BOXES][0], | |||||
| osp.join('large_zh_name_' + image_name)) | osp.join('large_zh_name_' + image_name)) | ||||
| @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') | @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') | ||||
| @@ -9,6 +9,7 @@ isolated: # test cases that may require excessive anmount of GPU memory, which | |||||
| - test_image_super_resolution.py | - test_image_super_resolution.py | ||||
| - test_easycv_trainer.py | - test_easycv_trainer.py | ||||
| - test_segformer.py | - test_segformer.py | ||||
| - test_segmentation_pipeline.py | |||||
| envs: | envs: | ||||
| default: # default env, case not in other env will in default, pytorch. | default: # default env, case not in other env will in default, pytorch. | ||||
| @@ -53,7 +53,18 @@ class DummyModel(nn.Module, Model): | |||||
| return dict(logits=x, loss=loss) | return dict(logits=x, loss=loss) | ||||
| def train_func(work_dir, dist=False, iterable_dataset=False, **kwargs): | |||||
| class DummyModelForwardInputs(DummyModel): | |||||
| def forward(self, inputs): | |||||
| feat, labels = inputs['feat'], inputs['labels'] | |||||
| return super().forward(feat, labels) | |||||
| def train_func(work_dir, | |||||
| dist=False, | |||||
| iterable_dataset=False, | |||||
| forward_inputs=False, | |||||
| **kwargs): | |||||
| json_cfg = { | json_cfg = { | ||||
| 'task': Tasks.image_classification, | 'task': Tasks.image_classification, | ||||
| 'train': { | 'train': { | ||||
| @@ -81,7 +92,10 @@ def train_func(work_dir, dist=False, iterable_dataset=False, **kwargs): | |||||
| with open(config_path, 'w') as f: | with open(config_path, 'w') as f: | ||||
| json.dump(json_cfg, f) | json.dump(json_cfg, f) | ||||
| model = DummyModel() | |||||
| if forward_inputs: | |||||
| model = DummyModelForwardInputs() | |||||
| else: | |||||
| model = DummyModel() | |||||
| optimmizer = SGD(model.parameters(), lr=0.01) | optimmizer = SGD(model.parameters(), lr=0.01) | ||||
| lr_scheduler = StepLR(optimmizer, 2) | lr_scheduler = StepLR(optimmizer, 2) | ||||
| trainer_name = Trainers.default | trainer_name = Trainers.default | ||||
| @@ -273,6 +287,22 @@ class TrainerTestMultiGpus(DistributedTestCase): | |||||
| for i in [1, 3, 5]: | for i in [1, 3, 5]: | ||||
| self.assertIn(MetricKeys.ACCURACY, lines[i]) | self.assertIn(MetricKeys.ACCURACY, lines[i]) | ||||
| @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') | |||||
| def test_multi_gpus_forward_inputs(self): | |||||
| self.start( | |||||
| train_func, | |||||
| num_gpus=2, | |||||
| work_dir=self.tmp_dir, | |||||
| dist=True, | |||||
| forward_inputs=True) | |||||
| results_files = os.listdir(self.tmp_dir) | |||||
| json_files = glob.glob(os.path.join(self.tmp_dir, '*.log.json')) | |||||
| self.assertEqual(len(json_files), 1) | |||||
| self.assertIn(f'{LogKeys.EPOCH}_1.pth', results_files) | |||||
| self.assertIn(f'{LogKeys.EPOCH}_2.pth', results_files) | |||||
| self.assertIn(f'{LogKeys.EPOCH}_3.pth', results_files) | |||||
| # TODO: support iters_per_epoch for dist mode | # TODO: support iters_per_epoch for dist mode | ||||
| @unittest.skipIf(True, 'need to adapt to DistributedSampler') | @unittest.skipIf(True, 'need to adapt to DistributedSampler') | ||||
| def test_multi_gpus_with_iters_per_epoch(self): | def test_multi_gpus_with_iters_per_epoch(self): | ||||