Browse Source

[to #46273042]feat: pipeline trainer stat information from snapshot_download

Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/10837490
master^2
mulin.lyh 3 years ago
parent
commit
a4c36a2920
59 changed files with 271 additions and 401 deletions
  1. +14
    -3
      modelscope/models/base/base_model.py
  2. +2
    -2
      modelscope/models/multi_modal/clip/model.py
  3. +0
    -1
      modelscope/models/nlp/mglm/mglm_for_text_summarization.py
  4. +7
    -8
      modelscope/pipelines/base.py
  5. +10
    -3
      modelscope/pipelines/builder.py
  6. +4
    -10
      modelscope/pipelines/cv/animal_recognition_pipeline.py
  7. +1
    -2
      modelscope/pipelines/cv/body_3d_keypoints_pipeline.py
  8. +5
    -2
      modelscope/pipelines/cv/easycv_pipelines/base.py
  9. +0
    -1
      modelscope/pipelines/cv/easycv_pipelines/human_wholebody_keypoint_pipeline.py
  10. +4
    -9
      modelscope/pipelines/cv/general_recognition_pipeline.py
  11. +0
    -1
      modelscope/pipelines/cv/hand_2d_keypoints_pipeline.py
  12. +7
    -14
      modelscope/pipelines/cv/image_classification_pipeline.py
  13. +1
    -3
      modelscope/pipelines/cv/image_color_enhance_pipeline.py
  14. +2
    -5
      modelscope/pipelines/cv/image_denoise_pipeline.py
  15. +1
    -1
      modelscope/pipelines/cv/language_guided_video_summarization_pipeline.py
  16. +4
    -10
      modelscope/pipelines/cv/virtual_try_on_pipeline.py
  17. +6
    -15
      modelscope/pipelines/multi_modal/image_captioning_pipeline.py
  18. +3
    -12
      modelscope/pipelines/multi_modal/image_text_retrieval_pipeline.py
  19. +4
    -11
      modelscope/pipelines/multi_modal/multi_modal_embedding_pipeline.py
  20. +4
    -13
      modelscope/pipelines/multi_modal/ocr_recognition_pipeline.py
  21. +3
    -11
      modelscope/pipelines/multi_modal/text_to_image_synthesis_pipeline.py
  22. +6
    -15
      modelscope/pipelines/multi_modal/visual_entailment_pipeline.py
  23. +6
    -15
      modelscope/pipelines/multi_modal/visual_grounding_pipeline.py
  24. +6
    -8
      modelscope/pipelines/multi_modal/visual_question_answering_pipeline.py
  25. +3
    -5
      modelscope/pipelines/nlp/conversational_text_to_sql_pipeline.py
  26. +4
    -6
      modelscope/pipelines/nlp/dialog_intent_prediction_pipeline.py
  27. +3
    -6
      modelscope/pipelines/nlp/dialog_modeling_pipeline.py
  28. +5
    -8
      modelscope/pipelines/nlp/dialog_state_tracking_pipeline.py
  29. +8
    -13
      modelscope/pipelines/nlp/document_segmentation_pipeline.py
  30. +5
    -7
      modelscope/pipelines/nlp/faq_question_answering_pipeline.py
  31. +6
    -9
      modelscope/pipelines/nlp/feature_extraction_pipeline.py
  32. +8
    -12
      modelscope/pipelines/nlp/fill_mask_pipeline.py
  33. +4
    -7
      modelscope/pipelines/nlp/information_extraction_pipeline.py
  34. +1
    -1
      modelscope/pipelines/nlp/mglm_text_summarization_pipeline.py
  35. +10
    -17
      modelscope/pipelines/nlp/named_entity_recognition_pipeline.py
  36. +4
    -5
      modelscope/pipelines/nlp/sentence_embedding_pipeline.py
  37. +6
    -15
      modelscope/pipelines/nlp/summarization_pipeline.py
  38. +7
    -8
      modelscope/pipelines/nlp/table_question_answering_pipeline.py
  39. +6
    -8
      modelscope/pipelines/nlp/text2text_generation_pipeline.py
  40. +7
    -8
      modelscope/pipelines/nlp/text_classification_pipeline.py
  41. +4
    -6
      modelscope/pipelines/nlp/text_error_correction_pipeline.py
  42. +6
    -7
      modelscope/pipelines/nlp/text_generation_pipeline.py
  43. +3
    -5
      modelscope/pipelines/nlp/text_ranking_pipeline.py
  44. +5
    -6
      modelscope/pipelines/nlp/token_classification_pipeline.py
  45. +2
    -2
      modelscope/pipelines/nlp/translation_quality_estimation_pipeline.py
  46. +5
    -6
      modelscope/pipelines/nlp/word_segmentation_pipeline.py
  47. +4
    -6
      modelscope/pipelines/nlp/zero_shot_classification_pipeline.py
  48. +8
    -14
      modelscope/pipelines/science/protein_structure_pipeline.py
  49. +5
    -2
      modelscope/preprocessors/base.py
  50. +4
    -3
      modelscope/preprocessors/multi_modal.py
  51. +2
    -7
      modelscope/trainers/audio/kws_farfield_trainer.py
  52. +14
    -0
      modelscope/trainers/base.py
  53. +3
    -2
      modelscope/trainers/multi_modal/clip/clip_trainer.py
  54. +3
    -2
      modelscope/trainers/multi_modal/ofa/ofa_trainer.py
  55. +4
    -8
      modelscope/trainers/multi_modal/team/team_trainer.py
  56. +1
    -2
      modelscope/trainers/nlp/csanmt_translation_trainer.py
  57. +1
    -6
      modelscope/trainers/nlp_trainer.py
  58. +2
    -7
      modelscope/trainers/trainer.py
  59. +8
    -0
      modelscope/utils/constant.py

+ 14
- 3
modelscope/models/base/base_model.py View File

@@ -5,10 +5,10 @@ from abc import ABC, abstractmethod
from typing import Any, Callable, Dict, List, Optional, Union from typing import Any, Callable, Dict, List, Optional, Union


from modelscope.hub.snapshot_download import snapshot_download from modelscope.hub.snapshot_download import snapshot_download
from modelscope.models.builder import MODELS, build_model
from modelscope.models.builder import build_model
from modelscope.utils.checkpoint import save_checkpoint, save_pretrained from modelscope.utils.checkpoint import save_checkpoint, save_pretrained
from modelscope.utils.config import Config from modelscope.utils.config import Config
from modelscope.utils.constant import DEFAULT_MODEL_REVISION, ModelFile, Tasks
from modelscope.utils.constant import DEFAULT_MODEL_REVISION, Invoke, ModelFile
from modelscope.utils.device import verify_device from modelscope.utils.device import verify_device
from modelscope.utils.logger import get_logger from modelscope.utils.logger import get_logger


@@ -94,6 +94,10 @@ class Model(ABC):
if prefetched is not None: if prefetched is not None:
kwargs.pop('model_prefetched') kwargs.pop('model_prefetched')


invoked_by = kwargs.get(Invoke.KEY)
if invoked_by is not None:
kwargs.pop(Invoke.KEY)

if osp.exists(model_name_or_path): if osp.exists(model_name_or_path):
local_model_dir = model_name_or_path local_model_dir = model_name_or_path
else: else:
@@ -101,7 +105,13 @@ class Model(ABC):
raise RuntimeError( raise RuntimeError(
'Expecting model is pre-fetched locally, but is not found.' 'Expecting model is pre-fetched locally, but is not found.'
) )
local_model_dir = snapshot_download(model_name_or_path, revision)

if invoked_by is not None:
invoked_by = {Invoke.KEY: invoked_by}
else:
invoked_by = {Invoke.KEY: Invoke.PRETRAINED}
local_model_dir = snapshot_download(
model_name_or_path, revision, user_agent=invoked_by)
logger.info(f'initialize model from {local_model_dir}') logger.info(f'initialize model from {local_model_dir}')
if cfg_dict is not None: if cfg_dict is not None:
cfg = cfg_dict cfg = cfg_dict
@@ -133,6 +143,7 @@ class Model(ABC):
model.cfg = cfg model.cfg = cfg


model.name = model_name_or_path model.name = model_name_or_path
model.model_dir = local_model_dir
return model return model


def save_pretrained(self, def save_pretrained(self,


+ 2
- 2
modelscope/models/multi_modal/clip/model.py View File

@@ -509,8 +509,8 @@ def convert_weights(model: nn.Module):
@MODELS.register_module(Tasks.multi_modal_embedding, module_name=Models.clip) @MODELS.register_module(Tasks.multi_modal_embedding, module_name=Models.clip)
class CLIPForMultiModalEmbedding(TorchModel): class CLIPForMultiModalEmbedding(TorchModel):


def __init__(self, model_dir, device_id=-1):
super().__init__(model_dir=model_dir, device_id=device_id)
def __init__(self, model_dir, *args, **kwargs):
super().__init__(model_dir=model_dir, *args, **kwargs)


# Initialize the model. # Initialize the model.
vision_model_config_file = '{}/vision_model_config.json'.format( vision_model_config_file = '{}/vision_model_config.json'.format(


+ 0
- 1
modelscope/models/nlp/mglm/mglm_for_text_summarization.py View File

@@ -9,7 +9,6 @@ import numpy as np
import torch import torch
import torch.nn.functional as F import torch.nn.functional as F


from modelscope.hub.snapshot_download import snapshot_download
from modelscope.metainfo import Models from modelscope.metainfo import Models
from modelscope.models.base import Tensor, TorchModel from modelscope.models.base import Tensor, TorchModel
from modelscope.models.builder import MODELS from modelscope.models.builder import MODELS


+ 7
- 8
modelscope/pipelines/base.py View File

@@ -16,7 +16,7 @@ from modelscope.outputs import TASK_OUTPUTS
from modelscope.pipeline_inputs import TASK_INPUTS, check_input_type from modelscope.pipeline_inputs import TASK_INPUTS, check_input_type
from modelscope.preprocessors import Preprocessor from modelscope.preprocessors import Preprocessor
from modelscope.utils.config import Config from modelscope.utils.config import Config
from modelscope.utils.constant import Frameworks, ModelFile
from modelscope.utils.constant import Frameworks, Invoke, ModelFile
from modelscope.utils.device import (create_device, device_placement, from modelscope.utils.device import (create_device, device_placement,
verify_device) verify_device)
from modelscope.utils.hub import read_config, snapshot_download from modelscope.utils.hub import read_config, snapshot_download
@@ -47,8 +47,10 @@ class Pipeline(ABC):
logger.info(f'initiate model from location {model}.') logger.info(f'initiate model from location {model}.')
# expecting model has been prefetched to local cache beforehand # expecting model has been prefetched to local cache beforehand
return Model.from_pretrained( return Model.from_pretrained(
model, model_prefetched=True,
device=self.device_name) if is_model(model) else model
model,
device=self.device_name,
model_prefetched=True,
invoked_by=Invoke.PIPELINE) if is_model(model) else model
else: else:
return model return model


@@ -383,15 +385,12 @@ class DistributedPipeline(Pipeline):
preprocessor: Union[Preprocessor, List[Preprocessor]] = None, preprocessor: Union[Preprocessor, List[Preprocessor]] = None,
auto_collate=True, auto_collate=True,
**kwargs): **kwargs):
self.preprocessor = preprocessor
super().__init__(model=model, preprocessor=preprocessor, kwargs=kwargs)
self._model_prepare = False self._model_prepare = False
self._model_prepare_lock = Lock() self._model_prepare_lock = Lock()
self._auto_collate = auto_collate self._auto_collate = auto_collate


if os.path.exists(model):
self.model_dir = model
else:
self.model_dir = snapshot_download(model)
self.model_dir = self.model.model_dir
self.cfg = read_config(self.model_dir) self.cfg = read_config(self.model_dir)
self.world_size = self.cfg.model.world_size self.world_size = self.cfg.model.world_size
self.model_pool = None self.model_pool = None


+ 10
- 3
modelscope/pipelines/builder.py View File

@@ -7,7 +7,7 @@ from modelscope.hub.snapshot_download import snapshot_download
from modelscope.metainfo import Pipelines from modelscope.metainfo import Pipelines
from modelscope.models.base import Model from modelscope.models.base import Model
from modelscope.utils.config import ConfigDict, check_config from modelscope.utils.config import ConfigDict, check_config
from modelscope.utils.constant import DEFAULT_MODEL_REVISION, Tasks
from modelscope.utils.constant import DEFAULT_MODEL_REVISION, Invoke, Tasks
from modelscope.utils.hub import read_config from modelscope.utils.hub import read_config
from modelscope.utils.registry import Registry, build_from_cfg from modelscope.utils.registry import Registry, build_from_cfg
from .base import Pipeline from .base import Pipeline
@@ -209,6 +209,8 @@ DEFAULT_MODEL_FOR_PIPELINE = {
Tasks.referring_video_object_segmentation: Tasks.referring_video_object_segmentation:
(Pipelines.referring_video_object_segmentation, (Pipelines.referring_video_object_segmentation,
'damo/cv_swin-t_referring_video-object-segmentation'), 'damo/cv_swin-t_referring_video-object-segmentation'),
Tasks.video_summarization: (Pipelines.video_summarization,
'damo/cv_googlenet_pgl-video-summarization'),
} }




@@ -220,14 +222,19 @@ def normalize_model_input(model, model_revision):
# skip revision download if model is a local directory # skip revision download if model is a local directory
if not os.path.exists(model): if not os.path.exists(model):
# note that if there is already a local copy, snapshot_download will check and skip downloading # note that if there is already a local copy, snapshot_download will check and skip downloading
model = snapshot_download(model, revision=model_revision)
model = snapshot_download(
model,
revision=model_revision,
user_agent={Invoke.KEY: Invoke.PIPELINE})
elif isinstance(model, list) and isinstance(model[0], str): elif isinstance(model, list) and isinstance(model[0], str):
for idx in range(len(model)): for idx in range(len(model)):
if is_official_hub_path( if is_official_hub_path(
model[idx], model[idx],
model_revision) and not os.path.exists(model[idx]): model_revision) and not os.path.exists(model[idx]):
model[idx] = snapshot_download( model[idx] = snapshot_download(
model[idx], revision=model_revision)
model[idx],
revision=model_revision,
user_agent={Invoke.KEY: Invoke.PIPELINE})
return model return model






+ 4
- 10
modelscope/pipelines/cv/animal_recognition_pipeline.py View File

@@ -8,14 +8,13 @@ import torch
from PIL import Image from PIL import Image
from torchvision import transforms from torchvision import transforms


from modelscope.hub.snapshot_download import snapshot_download
from modelscope.metainfo import Pipelines from modelscope.metainfo import Pipelines
from modelscope.models.cv.animal_recognition import Bottleneck, ResNet from modelscope.models.cv.animal_recognition import Bottleneck, ResNet
from modelscope.outputs import OutputKeys from modelscope.outputs import OutputKeys
from modelscope.pipelines.base import Input, Pipeline from modelscope.pipelines.base import Input, Pipeline
from modelscope.pipelines.builder import PIPELINES from modelscope.pipelines.builder import PIPELINES
from modelscope.preprocessors import LoadImage from modelscope.preprocessors import LoadImage
from modelscope.utils.constant import Tasks
from modelscope.utils.constant import Devices, ModelFile, Tasks
from modelscope.utils.logger import get_logger from modelscope.utils.logger import get_logger


logger = get_logger() logger = get_logger()
@@ -67,15 +66,10 @@ class AnimalRecognitionPipeline(Pipeline):
filter_param(src_params, own_state) filter_param(src_params, own_state)
model.load_state_dict(own_state) model.load_state_dict(own_state)


self.model = resnest101(num_classes=8288)
local_model_dir = model
if osp.exists(model):
local_model_dir = model
else:
local_model_dir = snapshot_download(model)
self.local_path = local_model_dir
self.local_path = self.model
src_params = torch.load( src_params = torch.load(
osp.join(local_model_dir, 'pytorch_model.pt'), 'cpu')
osp.join(self.local_path, ModelFile.TORCH_MODEL_FILE), Devices.cpu)
self.model = resnest101(num_classes=8288)
load_pretrained(self.model, src_params) load_pretrained(self.model, src_params)
logger.info('load model done') logger.info('load model done')




+ 1
- 2
modelscope/pipelines/cv/body_3d_keypoints_pipeline.py View File

@@ -120,8 +120,7 @@ class Body3DKeypointsPipeline(Pipeline):
""" """
super().__init__(model=model, **kwargs) super().__init__(model=model, **kwargs)


self.keypoint_model_3d = model if isinstance(
model, BodyKeypointsDetection3D) else Model.from_pretrained(model)
self.keypoint_model_3d = self.model
self.keypoint_model_3d.eval() self.keypoint_model_3d.eval()


# init human body 2D keypoints detection pipeline # init human body 2D keypoints detection pipeline


+ 5
- 2
modelscope/pipelines/cv/easycv_pipelines/base.py View File

@@ -11,7 +11,7 @@ from PIL import ImageFile
from modelscope.hub.snapshot_download import snapshot_download from modelscope.hub.snapshot_download import snapshot_download
from modelscope.pipelines.util import is_official_hub_path from modelscope.pipelines.util import is_official_hub_path
from modelscope.utils.config import Config from modelscope.utils.config import Config
from modelscope.utils.constant import DEFAULT_MODEL_REVISION, ModelFile
from modelscope.utils.constant import DEFAULT_MODEL_REVISION, Invoke, ModelFile
from modelscope.utils.device import create_device from modelscope.utils.device import create_device




@@ -37,7 +37,9 @@ class EasyCVPipeline(object):
assert is_official_hub_path( assert is_official_hub_path(
model), 'Only support local model path and official hub path!' model), 'Only support local model path and official hub path!'
model_dir = snapshot_download( model_dir = snapshot_download(
model_id=model, revision=DEFAULT_MODEL_REVISION)
model_id=model,
revision=DEFAULT_MODEL_REVISION,
user_agent={Invoke.KEY: Invoke.PIPELINE})


assert osp.isdir(model_dir) assert osp.isdir(model_dir)
model_files = glob.glob( model_files = glob.glob(
@@ -48,6 +50,7 @@ class EasyCVPipeline(object):


model_path = model_files[0] model_path = model_files[0]
self.model_path = model_path self.model_path = model_path
self.model_dir = model_dir


# get configuration file from source model dir # get configuration file from source model dir
self.config_file = os.path.join(model_dir, ModelFile.CONFIGURATION) self.config_file = os.path.join(model_dir, ModelFile.CONFIGURATION)


+ 0
- 1
modelscope/pipelines/cv/easycv_pipelines/human_wholebody_keypoint_pipeline.py View File

@@ -24,7 +24,6 @@ class HumanWholebodyKeypointsPipeline(EasyCVPipeline):
model (str): model id on modelscope hub or local model path. model (str): model id on modelscope hub or local model path.
model_file_pattern (str): model file pattern. model_file_pattern (str): model file pattern.
""" """
self.model_dir = model
super(HumanWholebodyKeypointsPipeline, self).__init__( super(HumanWholebodyKeypointsPipeline, self).__init__(
model=model, model=model,
model_file_pattern=model_file_pattern, model_file_pattern=model_file_pattern,


+ 4
- 9
modelscope/pipelines/cv/general_recognition_pipeline.py View File

@@ -8,7 +8,6 @@ import torch
from PIL import Image from PIL import Image
from torchvision import transforms from torchvision import transforms


from modelscope.hub.snapshot_download import snapshot_download
from modelscope.metainfo import Pipelines from modelscope.metainfo import Pipelines
from modelscope.models.cv.animal_recognition import resnet from modelscope.models.cv.animal_recognition import resnet
from modelscope.outputs import OutputKeys from modelscope.outputs import OutputKeys
@@ -67,16 +66,12 @@ class GeneralRecognitionPipeline(Pipeline):
filter_param(src_params, own_state) filter_param(src_params, own_state)
model.load_state_dict(own_state) model.load_state_dict(own_state)


self.model = resnest101(num_classes=54092)
local_model_dir = model
device = 'cpu' device = 'cpu'
if osp.exists(model):
local_model_dir = model
else:
local_model_dir = snapshot_download(model)
self.local_path = local_model_dir
self.local_path = self.model
src_params = torch.load( src_params = torch.load(
osp.join(local_model_dir, ModelFile.TORCH_MODEL_FILE), device)
osp.join(self.local_path, ModelFile.TORCH_MODEL_FILE), device)

self.model = resnest101(num_classes=54092)
load_pretrained(self.model, src_params) load_pretrained(self.model, src_params)
logger.info('load model done') logger.info('load model done')




+ 0
- 1
modelscope/pipelines/cv/hand_2d_keypoints_pipeline.py View File

@@ -21,7 +21,6 @@ class Hand2DKeypointsPipeline(EasyCVPipeline):
model (str): model id on modelscope hub or local model path. model (str): model id on modelscope hub or local model path.
model_file_pattern (str): model file pattern. model_file_pattern (str): model file pattern.
""" """
self.model_dir = model
super(Hand2DKeypointsPipeline, self).__init__( super(Hand2DKeypointsPipeline, self).__init__(
model=model, model=model,
model_file_pattern=model_file_pattern, model_file_pattern=model_file_pattern,


+ 7
- 14
modelscope/pipelines/cv/image_classification_pipeline.py View File

@@ -1,5 +1,5 @@
# Copyright (c) Alibaba, Inc. and its affiliates. # Copyright (c) Alibaba, Inc. and its affiliates.
from typing import Any, Dict, Union
from typing import Any, Dict, Optional, Union


import cv2 import cv2
import numpy as np import numpy as np
@@ -25,22 +25,15 @@ class ImageClassificationPipeline(Pipeline):


def __init__(self, def __init__(self,
model: Union[Model, str], model: Union[Model, str],
preprocessor: [Preprocessor] = None,
preprocessor: Optional[Preprocessor] = None,
**kwargs): **kwargs):
super().__init__(model=model)
super().__init__(model=model, preprocessor=preprocessor, **kwargs)
assert isinstance(model, str) or isinstance(model, Model), \ assert isinstance(model, str) or isinstance(model, Model), \
'model must be a single str or OfaForAllTasks' 'model must be a single str or OfaForAllTasks'
if isinstance(model, str):
pipe_model = Model.from_pretrained(model)
elif isinstance(model, Model):
pipe_model = model
else:
raise NotImplementedError
pipe_model.model.eval()
pipe_model.to(get_device())
if preprocessor is None and isinstance(pipe_model, OfaForAllTasks):
preprocessor = OfaPreprocessor(model_dir=pipe_model.model_dir)
super().__init__(model=pipe_model, preprocessor=preprocessor, **kwargs)
self.model.eval()
self.model.to(get_device())
if preprocessor is None and isinstance(self.model, OfaForAllTasks):
self.preprocessor = OfaPreprocessor(model_dir=self.model.model_dir)


def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, Any]: def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, Any]:
return inputs return inputs


+ 1
- 3
modelscope/pipelines/cv/image_color_enhance_pipeline.py View File

@@ -32,10 +32,8 @@ class ImageColorEnhancePipeline(Pipeline):
Args: Args:
model: model id on modelscope hub. model: model id on modelscope hub.
""" """
model = model if isinstance(
model, ImageColorEnhance) else Model.from_pretrained(model)
model.eval()
super().__init__(model=model, preprocessor=preprocessor, **kwargs) super().__init__(model=model, preprocessor=preprocessor, **kwargs)
self.model.eval()


if torch.cuda.is_available(): if torch.cuda.is_available():
self._device = torch.device('cuda') self._device = torch.device('cuda')


+ 2
- 5
modelscope/pipelines/cv/image_denoise_pipeline.py View File

@@ -32,17 +32,14 @@ class ImageDenoisePipeline(Pipeline):
Args: Args:
model: model id on modelscope hub. model: model id on modelscope hub.
""" """
model = model if isinstance(
model, NAFNetForImageDenoise) else Model.from_pretrained(model)
model.eval()
super().__init__(model=model, preprocessor=preprocessor, **kwargs) super().__init__(model=model, preprocessor=preprocessor, **kwargs)
self.config = model.config
self.model.eval()
self.config = self.model.config


if torch.cuda.is_available(): if torch.cuda.is_available():
self._device = torch.device('cuda') self._device = torch.device('cuda')
else: else:
self._device = torch.device('cpu') self._device = torch.device('cpu')
self.model = model
logger.info('load image denoise model done') logger.info('load image denoise model done')


def preprocess(self, input: Input) -> Dict[str, Any]: def preprocess(self, input: Input) -> Dict[str, Any]:


+ 1
- 1
modelscope/pipelines/cv/language_guided_video_summarization_pipeline.py View File

@@ -44,7 +44,7 @@ class LanguageGuidedVideoSummarizationPipeline(Pipeline):
""" """
super().__init__(model=model, auto_collate=False, **kwargs) super().__init__(model=model, auto_collate=False, **kwargs)
logger.info(f'loading model from {model}') logger.info(f'loading model from {model}')
self.model_dir = model
self.model_dir = self.model.model_dir


self.tmp_dir = kwargs.get('tmp_dir', None) self.tmp_dir = kwargs.get('tmp_dir', None)
if self.tmp_dir is None: if self.tmp_dir is None:


+ 4
- 10
modelscope/pipelines/cv/virtual_try_on_pipeline.py View File

@@ -9,7 +9,6 @@ import PIL
import torch import torch
from PIL import Image from PIL import Image


from modelscope.hub.snapshot_download import snapshot_download
from modelscope.metainfo import Pipelines from modelscope.metainfo import Pipelines
from modelscope.models.cv.virual_tryon import SDAFNet_Tryon from modelscope.models.cv.virual_tryon import SDAFNet_Tryon
from modelscope.outputs import OutputKeys from modelscope.outputs import OutputKeys
@@ -52,17 +51,12 @@ class VirtualTryonPipeline(Pipeline):
filter_param(src_params, own_state) filter_param(src_params, own_state)
model.load_state_dict(own_state) model.load_state_dict(own_state)


self.model = SDAFNet_Tryon(ref_in_channel=6).to(self.device)
local_model_dir = model
if osp.exists(model):
local_model_dir = model
else:
local_model_dir = snapshot_download(model)
self.local_path = local_model_dir
self.local_path = self.model
src_params = torch.load( src_params = torch.load(
osp.join(local_model_dir, ModelFile.TORCH_MODEL_FILE), 'cpu')
osp.join(self.local_path, ModelFile.TORCH_MODEL_FILE), 'cpu')
self.model = SDAFNet_Tryon(ref_in_channel=6).to(self.device)
load_pretrained(self.model, src_params) load_pretrained(self.model, src_params)
self.model = self.model.eval()
self.model.eval()
self.size = 192 self.size = 192
from torchvision import transforms from torchvision import transforms
self.test_transforms = transforms.Compose([ self.test_transforms = transforms.Compose([


+ 6
- 15
modelscope/pipelines/multi_modal/image_captioning_pipeline.py View File

@@ -29,22 +29,13 @@ class ImageCaptioningPipeline(Pipeline):
Args: Args:
model: model id on modelscope hub. model: model id on modelscope hub.
""" """
super().__init__(model=model)
assert isinstance(model, str) or isinstance(model, Model), \
'model must be a single str or OfaForAllTasks'
if isinstance(model, str):
pipe_model = Model.from_pretrained(model)
elif isinstance(model, Model):
pipe_model = model
else:
raise NotImplementedError
pipe_model.model.eval()
super().__init__(model=model, preprocessor=preprocessor, **kwargs)
self.model.eval()
if preprocessor is None: if preprocessor is None:
if isinstance(pipe_model, OfaForAllTasks):
preprocessor = OfaPreprocessor(pipe_model.model_dir)
elif isinstance(pipe_model, MPlugForAllTasks):
preprocessor = MPlugPreprocessor(pipe_model.model_dir)
super().__init__(model=pipe_model, preprocessor=preprocessor, **kwargs)
if isinstance(self.model, OfaForAllTasks):
self.preprocessor = OfaPreprocessor(self.model.model_dir)
elif isinstance(self.model, MPlugForAllTasks):
self.preprocessor = MPlugPreprocessor(self.model.model_dir)


def _batch(self, data): def _batch(self, data):
if isinstance(self.model, OfaForAllTasks): if isinstance(self.model, OfaForAllTasks):


+ 3
- 12
modelscope/pipelines/multi_modal/image_text_retrieval_pipeline.py View File

@@ -28,19 +28,10 @@ class ImageTextRetrievalPipeline(Pipeline):
Args: Args:
model: model id on modelscope hub. model: model id on modelscope hub.
""" """
super().__init__(model=model)
assert isinstance(model, str) or isinstance(model, Model), \
f'model must be a single str or Model, but got {type(model)}'
if isinstance(model, str):
pipe_model = Model.from_pretrained(model)
elif isinstance(model, Model):
pipe_model = model
else:
raise NotImplementedError
pipe_model.model.eval()
super().__init__(model=model, preprocessor=preprocessor, **kwargs)
self.model.eval()
if preprocessor is None: if preprocessor is None:
preprocessor = MPlugPreprocessor(pipe_model.model_dir)
super().__init__(model=pipe_model, preprocessor=preprocessor, **kwargs)
self.preprocessor = MPlugPreprocessor(self.model.model_dir)


def forward(self, inputs: Dict[str, Any], def forward(self, inputs: Dict[str, Any],
**forward_params) -> Dict[str, Any]: **forward_params) -> Dict[str, Any]:


+ 4
- 11
modelscope/pipelines/multi_modal/multi_modal_embedding_pipeline.py View File

@@ -28,21 +28,14 @@ class MultiModalEmbeddingPipeline(Pipeline):
Args: Args:
model: model id on modelscope hub. model: model id on modelscope hub.
""" """
if isinstance(model, str):
pipe_model = Model.from_pretrained(model)
elif isinstance(model, Model):
pipe_model = model
else:
raise NotImplementedError('model must be a single str')
pipe_model.eval()
super().__init__(model=model, preprocessor=preprocessor, **kwargs)
self.model.eval()
if preprocessor is None: if preprocessor is None:
if isinstance(pipe_model, CLIPForMultiModalEmbedding):
preprocessor = CLIPPreprocessor(pipe_model.model_dir)
if isinstance(self.model, CLIPForMultiModalEmbedding):
self.preprocessor = CLIPPreprocessor(self.model.model_dir)
else: else:
raise NotImplementedError raise NotImplementedError


super().__init__(model=pipe_model, preprocessor=preprocessor, **kwargs)

def forward(self, input: Dict[str, Any]) -> Dict[str, Any]: def forward(self, input: Dict[str, Any]) -> Dict[str, Any]:
return self.model(self.preprocess(input)) return self.model(self.preprocess(input))




+ 4
- 13
modelscope/pipelines/multi_modal/ocr_recognition_pipeline.py View File

@@ -28,20 +28,11 @@ class OcrRecognitionPipeline(Pipeline):
Args: Args:
model: model id on modelscope hub. model: model id on modelscope hub.
""" """
super().__init__(model=model)
assert isinstance(model, str) or isinstance(model, Model), \
'model must be a single str or OfaForAllTasks'
if isinstance(model, str):
pipe_model = Model.from_pretrained(model)
elif isinstance(model, Model):
pipe_model = model
else:
raise NotImplementedError
pipe_model.model.eval()
super().__init__(model=model, preprocessor=preprocessor, **kwargs)
self.model.eval()
if preprocessor is None: if preprocessor is None:
if isinstance(pipe_model, OfaForAllTasks):
preprocessor = OfaPreprocessor(pipe_model.model_dir)
super().__init__(model=pipe_model, preprocessor=preprocessor, **kwargs)
if isinstance(self.model, OfaForAllTasks):
self.preprocessor = OfaPreprocessor(self.model.model_dir)


def forward(self, inputs: Dict[str, Any], def forward(self, inputs: Dict[str, Any],
**forward_params) -> Dict[str, Any]: **forward_params) -> Dict[str, Any]:


+ 3
- 11
modelscope/pipelines/multi_modal/text_to_image_synthesis_pipeline.py View File

@@ -31,18 +31,10 @@ class TextToImageSynthesisPipeline(Pipeline):
Args: Args:
model: model id on modelscope hub. model: model id on modelscope hub.
""" """
device_id = 0 if torch.cuda.is_available() else -1
if isinstance(model, str):
pipe_model = Model.from_pretrained(model, device_id=device_id)
elif isinstance(model, Model):
pipe_model = model
else:
raise NotImplementedError(
f'expecting a Model instance or str, but get {type(model)}.')
if preprocessor is None and isinstance(pipe_model,
super().__init__(model=model, preprocessor=preprocessor, **kwargs)
if preprocessor is None and isinstance(self.model,
OfaForTextToImageSynthesis): OfaForTextToImageSynthesis):
preprocessor = OfaPreprocessor(pipe_model.model_dir)
super().__init__(model=pipe_model, preprocessor=preprocessor, **kwargs)
self.preprocessor = OfaPreprocessor(self.model.model_dir)


def preprocess(self, input: Input, **preprocess_params) -> Dict[str, Any]: def preprocess(self, input: Input, **preprocess_params) -> Dict[str, Any]:
if self.preprocessor is not None: if self.preprocessor is not None:


+ 6
- 15
modelscope/pipelines/multi_modal/visual_entailment_pipeline.py View File

@@ -1,5 +1,5 @@
# Copyright (c) Alibaba, Inc. and its affiliates. # Copyright (c) Alibaba, Inc. and its affiliates.
from typing import Any, Dict, Union
from typing import Any, Dict, Optional, Union


from modelscope.metainfo import Pipelines from modelscope.metainfo import Pipelines
from modelscope.models.multi_modal import OfaForAllTasks from modelscope.models.multi_modal import OfaForAllTasks
@@ -18,26 +18,17 @@ class VisualEntailmentPipeline(Pipeline):


def __init__(self, def __init__(self,
model: Union[Model, str], model: Union[Model, str],
preprocessor: [Preprocessor] = None,
preprocessor: Optional[Preprocessor] = None,
**kwargs): **kwargs):
""" """
use `model` and `preprocessor` to create a visual entailment pipeline for prediction use `model` and `preprocessor` to create a visual entailment pipeline for prediction
Args: Args:
model: model id on modelscope hub. model: model id on modelscope hub.
""" """
super().__init__(model=model)
assert isinstance(model, str) or isinstance(model, Model), \
'model must be a single str or OfaForAllTasks'
if isinstance(model, str):
pipe_model = Model.from_pretrained(model)
elif isinstance(model, Model):
pipe_model = model
else:
raise NotImplementedError
pipe_model.model.eval()
if preprocessor is None and isinstance(pipe_model, OfaForAllTasks):
preprocessor = OfaPreprocessor(model_dir=pipe_model.model_dir)
super().__init__(model=pipe_model, preprocessor=preprocessor, **kwargs)
super().__init__(model=model, preprocessor=preprocessor, **kwargs)
self.model.eval()
if preprocessor is None and isinstance(self.model, OfaForAllTasks):
self.preprocessor = OfaPreprocessor(model_dir=self.model.model_dir)


def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, Any]: def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, Any]:
return inputs return inputs

+ 6
- 15
modelscope/pipelines/multi_modal/visual_grounding_pipeline.py View File

@@ -1,5 +1,5 @@
# Copyright (c) Alibaba, Inc. and its affiliates. # Copyright (c) Alibaba, Inc. and its affiliates.
from typing import Any, Dict, Union
from typing import Any, Dict, Optional, Union


from modelscope.metainfo import Pipelines from modelscope.metainfo import Pipelines
from modelscope.models.multi_modal import OfaForAllTasks from modelscope.models.multi_modal import OfaForAllTasks
@@ -18,26 +18,17 @@ class VisualGroundingPipeline(Pipeline):


def __init__(self, def __init__(self,
model: Union[Model, str], model: Union[Model, str],
preprocessor: [Preprocessor] = None,
preprocessor: Optional[Preprocessor] = None,
**kwargs): **kwargs):
""" """
use `model` and `preprocessor` to create a visual grounding pipeline for prediction use `model` and `preprocessor` to create a visual grounding pipeline for prediction
Args: Args:
model: model id on modelscope hub. model: model id on modelscope hub.
""" """
super().__init__(model=model)
assert isinstance(model, str) or isinstance(model, Model), \
'model must be a single str or OfaForAllTasks'
if isinstance(model, str):
pipe_model = Model.from_pretrained(model)
elif isinstance(model, Model):
pipe_model = model
else:
raise NotImplementedError
pipe_model.model.eval()
if preprocessor is None and isinstance(pipe_model, OfaForAllTasks):
preprocessor = OfaPreprocessor(model_dir=pipe_model.model_dir)
super().__init__(model=pipe_model, preprocessor=preprocessor, **kwargs)
super().__init__(model=model, preprocessor=preprocessor, **kwargs)
self.model.model.eval()
if preprocessor is None and isinstance(self.model, OfaForAllTasks):
self.preprocessor = OfaPreprocessor(model_dir=self.model.model_dir)


def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, Any]: def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, Any]:
return inputs return inputs

+ 6
- 8
modelscope/pipelines/multi_modal/visual_question_answering_pipeline.py View File

@@ -31,15 +31,13 @@ class VisualQuestionAnsweringPipeline(Pipeline):
model (MPlugForVisualQuestionAnswering): a model instance model (MPlugForVisualQuestionAnswering): a model instance
preprocessor (MPlugVisualQuestionAnsweringPreprocessor): a preprocessor instance preprocessor (MPlugVisualQuestionAnsweringPreprocessor): a preprocessor instance
""" """
model = model if isinstance(model,
Model) else Model.from_pretrained(model)
if preprocessor is None:
if isinstance(model, OfaForAllTasks):
preprocessor = OfaPreprocessor(model.model_dir)
elif isinstance(model, MPlugForAllTasks):
preprocessor = MPlugPreprocessor(model.model_dir)
model.model.eval()
super().__init__(model=model, preprocessor=preprocessor, **kwargs) super().__init__(model=model, preprocessor=preprocessor, **kwargs)
if preprocessor is None:
if isinstance(self.model, OfaForAllTasks):
self.preprocessor = OfaPreprocessor(self.model.model_dir)
elif isinstance(self.model, MPlugForAllTasks):
self.preprocessor = MPlugPreprocessor(self.model.model_dir)
self.model.eval()


def forward(self, inputs: Dict[str, Any], def forward(self, inputs: Dict[str, Any],
**forward_params) -> Dict[str, Any]: **forward_params) -> Dict[str, Any]:


+ 3
- 5
modelscope/pipelines/nlp/conversational_text_to_sql_pipeline.py View File

@@ -32,12 +32,10 @@ class ConversationalTextToSqlPipeline(Pipeline):
preprocessor (ConversationalTextToSqlPreprocessor): preprocessor (ConversationalTextToSqlPreprocessor):
a preprocessor instance a preprocessor instance
""" """
model = model if isinstance(
model, StarForTextToSql) else Model.from_pretrained(model)
if preprocessor is None:
preprocessor = ConversationalTextToSqlPreprocessor(model.model_dir)

super().__init__(model=model, preprocessor=preprocessor, **kwargs) super().__init__(model=model, preprocessor=preprocessor, **kwargs)
if preprocessor is None:
self.preprocessor = ConversationalTextToSqlPreprocessor(
self.model.model_dir)


def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, str]: def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, str]:
"""process the prediction results """process the prediction results


+ 4
- 6
modelscope/pipelines/nlp/dialog_intent_prediction_pipeline.py View File

@@ -30,13 +30,11 @@ class DialogIntentPredictionPipeline(Pipeline):
or a SpaceForDialogIntent instance. or a SpaceForDialogIntent instance.
preprocessor (DialogIntentPredictionPreprocessor): An optional preprocessor instance. preprocessor (DialogIntentPredictionPreprocessor): An optional preprocessor instance.
""" """
model = model if isinstance(
model, SpaceForDialogIntent) else Model.from_pretrained(model)
if preprocessor is None:
preprocessor = DialogIntentPredictionPreprocessor(model.model_dir)
self.model = model
super().__init__(model=model, preprocessor=preprocessor, **kwargs) super().__init__(model=model, preprocessor=preprocessor, **kwargs)
self.categories = preprocessor.categories
if preprocessor is None:
self.preprocessor = DialogIntentPredictionPreprocessor(
self.model.model_dir)
self.categories = self.preprocessor.categories


def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, str]: def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, str]:
"""process the prediction results """process the prediction results


+ 3
- 6
modelscope/pipelines/nlp/dialog_modeling_pipeline.py View File

@@ -29,13 +29,10 @@ class DialogModelingPipeline(Pipeline):
or a SpaceForDialogModeling instance. or a SpaceForDialogModeling instance.
preprocessor (DialogModelingPreprocessor): An optional preprocessor instance. preprocessor (DialogModelingPreprocessor): An optional preprocessor instance.
""" """
model = model if isinstance(
model, SpaceForDialogModeling) else Model.from_pretrained(model)
self.model = model
if preprocessor is None:
preprocessor = DialogModelingPreprocessor(model.model_dir)
super().__init__(model=model, preprocessor=preprocessor, **kwargs) super().__init__(model=model, preprocessor=preprocessor, **kwargs)
self.preprocessor = preprocessor
if preprocessor is None:
self.preprocessor = DialogModelingPreprocessor(
self.model.model_dir)


def postprocess(self, inputs: Dict[str, Tensor]) -> Dict[str, str]: def postprocess(self, inputs: Dict[str, Tensor]) -> Dict[str, str]:
"""process the prediction results """process the prediction results


+ 5
- 8
modelscope/pipelines/nlp/dialog_state_tracking_pipeline.py View File

@@ -31,16 +31,13 @@ class DialogStateTrackingPipeline(Pipeline):
from the model hub, or a SpaceForDialogStateTracking instance. from the model hub, or a SpaceForDialogStateTracking instance.
preprocessor (DialogStateTrackingPreprocessor): An optional preprocessor instance. preprocessor (DialogStateTrackingPreprocessor): An optional preprocessor instance.
""" """

model = model if isinstance(
model, SpaceForDST) else Model.from_pretrained(model)
self.model = model
if preprocessor is None:
preprocessor = DialogStateTrackingPreprocessor(model.model_dir)
super().__init__(model=model, preprocessor=preprocessor, **kwargs) super().__init__(model=model, preprocessor=preprocessor, **kwargs)
if preprocessor is None:
self.preprocessor = DialogStateTrackingPreprocessor(
self.model.model_dir)


self.tokenizer = preprocessor.tokenizer
self.config = preprocessor.config
self.tokenizer = self.preprocessor.tokenizer
self.config = self.preprocessor.config


def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, str]: def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, str]:
"""process the prediction results """process the prediction results


+ 8
- 13
modelscope/pipelines/nlp/document_segmentation_pipeline.py View File

@@ -31,27 +31,22 @@ class DocumentSegmentationPipeline(Pipeline):
model: Union[Model, str], model: Union[Model, str],
preprocessor: DocumentSegmentationPreprocessor = None, preprocessor: DocumentSegmentationPreprocessor = None,
**kwargs): **kwargs):
super().__init__(model=model, preprocessor=preprocessor, **kwargs)


model = model if isinstance(model,
Model) else Model.from_pretrained(model)

self.model_dir = model.model_dir
self.model_cfg = model.forward()
self.model_dir = self.model.model_dir
self.model_cfg = self.model.forward()


if self.model_cfg['type'] == 'bert': if self.model_cfg['type'] == 'bert':
config = BertConfig.from_pretrained(model.model_dir, num_labels=2)
config = BertConfig.from_pretrained(self.model_dir, num_labels=2)
elif self.model_cfg['type'] == 'ponet': elif self.model_cfg['type'] == 'ponet':
config = PoNetConfig.from_pretrained(model.model_dir, num_labels=2)
config = PoNetConfig.from_pretrained(self.model_dir, num_labels=2)


self.document_segmentation_model = model.build_with_config(
self.document_segmentation_model = self.model.build_with_config(
config=config) config=config)


if preprocessor is None: if preprocessor is None:
preprocessor = DocumentSegmentationPreprocessor(
self.model_dir, config)
super().__init__(model=model, preprocessor=preprocessor, **kwargs)

self.preprocessor = preprocessor
self.preprocessor = DocumentSegmentationPreprocessor(
self.model.model_dir, config)


def __call__( def __call__(
self, documents: Union[List[List[str]], List[str], self, documents: Union[List[List[str]], List[str],


+ 5
- 7
modelscope/pipelines/nlp/faq_question_answering_pipeline.py View File

@@ -21,12 +21,10 @@ class FaqQuestionAnsweringPipeline(Pipeline):
model: Union[str, Model], model: Union[str, Model],
preprocessor: Preprocessor = None, preprocessor: Preprocessor = None,
**kwargs): **kwargs):
model = Model.from_pretrained(model) if isinstance(model,
str) else model
if preprocessor is None:
preprocessor = Preprocessor.from_pretrained(
model.model_dir, **kwargs)
super().__init__(model=model, preprocessor=preprocessor, **kwargs) super().__init__(model=model, preprocessor=preprocessor, **kwargs)
if preprocessor is None:
self.preprocessor = Preprocessor.from_pretrained(
self.model.model_dir, **kwargs)


def _sanitize_parameters(self, **pipeline_parameters): def _sanitize_parameters(self, **pipeline_parameters):
return pipeline_parameters, pipeline_parameters, pipeline_parameters return pipeline_parameters, pipeline_parameters, pipeline_parameters
@@ -37,11 +35,11 @@ class FaqQuestionAnsweringPipeline(Pipeline):
sentence_vecs = sentence_vecs.detach().tolist() sentence_vecs = sentence_vecs.detach().tolist()
return sentence_vecs return sentence_vecs


def forward(self, inputs: [list, Dict[str, Any]],
def forward(self, inputs: Union[list, Dict[str, Any]],
**forward_params) -> Dict[str, Any]: **forward_params) -> Dict[str, Any]:
return self.model(inputs) return self.model(inputs)


def postprocess(self, inputs: [list, Dict[str, Any]],
def postprocess(self, inputs: Union[list, Dict[str, Any]],
**postprocess_params) -> Dict[str, Any]: **postprocess_params) -> Dict[str, Any]:
scores = inputs['scores'] scores = inputs['scores']
labels = [] labels = []


+ 6
- 9
modelscope/pipelines/nlp/feature_extraction_pipeline.py View File

@@ -46,21 +46,18 @@ class FeatureExtractionPipeline(Pipeline):




""" """
model = model if isinstance(model,
Model) else Model.from_pretrained(model)
super().__init__(model=model, preprocessor=preprocessor, **kwargs)


if preprocessor is None: if preprocessor is None:
preprocessor = NLPPreprocessor(
model.model_dir,
self.preprocessor = NLPPreprocessor(
self.model.model_dir,
padding=kwargs.pop('padding', False), padding=kwargs.pop('padding', False),
sequence_length=kwargs.pop('sequence_length', 128)) sequence_length=kwargs.pop('sequence_length', 128))
model.eval()
super().__init__(model=model, preprocessor=preprocessor, **kwargs)
self.model.eval()


self.preprocessor = preprocessor
self.config = Config.from_file( self.config = Config.from_file(
os.path.join(model.model_dir, ModelFile.CONFIGURATION))
self.tokenizer = preprocessor.tokenizer
os.path.join(self.model.model_dir, ModelFile.CONFIGURATION))
self.tokenizer = self.preprocessor.tokenizer


def forward(self, inputs: Dict[str, Any], def forward(self, inputs: Dict[str, Any],
**forward_params) -> Dict[str, Any]: **forward_params) -> Dict[str, Any]:


+ 8
- 12
modelscope/pipelines/nlp/fill_mask_pipeline.py View File

@@ -53,22 +53,18 @@ class FillMaskPipeline(Pipeline):
If the xlm-roberta(xlm-roberta, veco, etc.) based model is used, the mask token is '<mask>'. If the xlm-roberta(xlm-roberta, veco, etc.) based model is used, the mask token is '<mask>'.
To view other examples plese check the tests/pipelines/test_fill_mask.py. To view other examples plese check the tests/pipelines/test_fill_mask.py.
""" """

fill_mask_model = Model.from_pretrained(model) if isinstance(
model, str) else model

super().__init__(model=model, preprocessor=preprocessor, **kwargs)
if preprocessor is None: if preprocessor is None:
preprocessor = Preprocessor.from_pretrained(
fill_mask_model.model_dir,
self.preprocessor = Preprocessor.from_pretrained(
self.model.model_dir,
first_sequence=first_sequence, first_sequence=first_sequence,
second_sequence=None, second_sequence=None,
sequence_length=kwargs.pop('sequence_length', 128)) sequence_length=kwargs.pop('sequence_length', 128))
fill_mask_model.eval()
assert hasattr(
preprocessor, 'mask_id'
), 'The input preprocessor should have the mask_id attribute.'
super().__init__(
model=fill_mask_model, preprocessor=preprocessor, **kwargs)
assert hasattr(
self.preprocessor, 'mask_id'
), 'The input preprocessor should have the mask_id attribute.'

self.model.eval()


def forward(self, inputs: Dict[str, Any], def forward(self, inputs: Dict[str, Any],
**forward_params) -> Dict[str, Any]: **forward_params) -> Dict[str, Any]:


+ 4
- 7
modelscope/pipelines/nlp/information_extraction_pipeline.py View File

@@ -25,15 +25,12 @@ class InformationExtractionPipeline(Pipeline):
model: Union[Model, str], model: Union[Model, str],
preprocessor: Optional[Preprocessor] = None, preprocessor: Optional[Preprocessor] = None,
**kwargs): **kwargs):

model = model if isinstance(model,
Model) else Model.from_pretrained(model)
super().__init__(model=model, preprocessor=preprocessor, **kwargs)
if preprocessor is None: if preprocessor is None:
preprocessor = RelationExtractionPreprocessor(
model.model_dir,
self.preprocessor = RelationExtractionPreprocessor(
self.model.model_dir,
sequence_length=kwargs.pop('sequence_length', 512)) sequence_length=kwargs.pop('sequence_length', 512))
model.eval()
super().__init__(model=model, preprocessor=preprocessor, **kwargs)
self.model.eval()


def forward(self, inputs: Dict[str, Any], def forward(self, inputs: Dict[str, Any],
**forward_params) -> Dict[str, Any]: **forward_params) -> Dict[str, Any]:


+ 1
- 1
modelscope/pipelines/nlp/mglm_text_summarization_pipeline.py View File

@@ -21,7 +21,7 @@ class MGLMTextSummarizationPipeline(Pipeline):


def __init__(self, def __init__(self,
model: Union[MGLMForTextSummarization, str], model: Union[MGLMForTextSummarization, str],
preprocessor: [Preprocessor] = None,
preprocessor: Optional[Preprocessor] = None,
*args, *args,
**kwargs): **kwargs):
model = MGLMForTextSummarization(model) if isinstance(model, model = MGLMForTextSummarization(model) if isinstance(model,


+ 10
- 17
modelscope/pipelines/nlp/named_entity_recognition_pipeline.py View File

@@ -50,15 +50,12 @@ class NamedEntityRecognitionPipeline(TokenClassificationPipeline):


To view other examples plese check the tests/pipelines/test_named_entity_recognition.py. To view other examples plese check the tests/pipelines/test_named_entity_recognition.py.
""" """

model = model if isinstance(model,
Model) else Model.from_pretrained(model)
super().__init__(model=model, preprocessor=preprocessor, **kwargs)
if preprocessor is None: if preprocessor is None:
preprocessor = TokenClassificationPreprocessor(
model.model_dir,
self.preprocessor = TokenClassificationPreprocessor(
self.model.model_dir,
sequence_length=kwargs.pop('sequence_length', 128)) sequence_length=kwargs.pop('sequence_length', 128))
model.eval()
super().__init__(model=model, preprocessor=preprocessor, **kwargs)
self.model.eval()
self.id2label = kwargs.get('id2label') self.id2label = kwargs.get('id2label')
if self.id2label is None and hasattr(self.preprocessor, 'id2label'): if self.id2label is None and hasattr(self.preprocessor, 'id2label'):
self.id2label = self.preprocessor.id2label self.id2label = self.preprocessor.id2label
@@ -73,13 +70,11 @@ class NamedEntityRecognitionThaiPipeline(NamedEntityRecognitionPipeline):
model: Union[Model, str], model: Union[Model, str],
preprocessor: Optional[Preprocessor] = None, preprocessor: Optional[Preprocessor] = None,
**kwargs): **kwargs):
model = model if isinstance(model,
Model) else Model.from_pretrained(model)
super().__init__(model=model, preprocessor=preprocessor, **kwargs)
if preprocessor is None: if preprocessor is None:
preprocessor = NERPreprocessorThai(
model.model_dir,
self.preprocessor = NERPreprocessorThai(
self.model.model_dir,
sequence_length=kwargs.pop('sequence_length', 512)) sequence_length=kwargs.pop('sequence_length', 512))
super().__init__(model=model, preprocessor=preprocessor, **kwargs)




@PIPELINES.register_module( @PIPELINES.register_module(
@@ -91,10 +86,8 @@ class NamedEntityRecognitionVietPipeline(NamedEntityRecognitionPipeline):
model: Union[Model, str], model: Union[Model, str],
preprocessor: Optional[Preprocessor] = None, preprocessor: Optional[Preprocessor] = None,
**kwargs): **kwargs):
model = model if isinstance(model,
Model) else Model.from_pretrained(model)
super().__init__(model=model, preprocessor=preprocessor, **kwargs)
if preprocessor is None: if preprocessor is None:
preprocessor = NERPreprocessorViet(
model.model_dir,
self.preprocessor = NERPreprocessorViet(
self.model.model_dir,
sequence_length=kwargs.pop('sequence_length', 512)) sequence_length=kwargs.pop('sequence_length', 512))
super().__init__(model=model, preprocessor=preprocessor, **kwargs)

+ 4
- 5
modelscope/pipelines/nlp/sentence_embedding_pipeline.py View File

@@ -32,14 +32,13 @@ class SentenceEmbeddingPipeline(Pipeline):
the model if supplied. the model if supplied.
sequence_length: Max sequence length in the user's custom scenario. 128 will be used as a default value. sequence_length: Max sequence length in the user's custom scenario. 128 will be used as a default value.
""" """
model = Model.from_pretrained(model) if isinstance(model,
str) else model
super().__init__(model=model, preprocessor=preprocessor, **kwargs)
if preprocessor is None: if preprocessor is None:
preprocessor = Preprocessor.from_pretrained(
model.model_dir if isinstance(model, Model) else model,
self.preprocessor = Preprocessor.from_pretrained(
self.model.model_dir
if isinstance(self.model, Model) else model,
first_sequence=first_sequence, first_sequence=first_sequence,
sequence_length=kwargs.pop('sequence_length', 128)) sequence_length=kwargs.pop('sequence_length', 128))
super().__init__(model=model, preprocessor=preprocessor, **kwargs)


def forward(self, inputs: Dict[str, Any], def forward(self, inputs: Dict[str, Any],
**forward_params) -> Dict[str, Any]: **forward_params) -> Dict[str, Any]:


+ 6
- 15
modelscope/pipelines/nlp/summarization_pipeline.py View File

@@ -1,5 +1,5 @@
# Copyright (c) Alibaba, Inc. and its affiliates. # Copyright (c) Alibaba, Inc. and its affiliates.
from typing import Any, Dict, Union
from typing import Any, Dict, Optional, Union


from modelscope.metainfo import Pipelines from modelscope.metainfo import Pipelines
from modelscope.models.multi_modal import OfaForAllTasks from modelscope.models.multi_modal import OfaForAllTasks
@@ -18,7 +18,7 @@ class SummarizationPipeline(Pipeline):


def __init__(self, def __init__(self,
model: Union[Model, str], model: Union[Model, str],
preprocessor: [Preprocessor] = None,
preprocessor: Optional[Preprocessor] = None,
**kwargs): **kwargs):
"""Use `model` and `preprocessor` to create a Summarization pipeline for prediction. """Use `model` and `preprocessor` to create a Summarization pipeline for prediction.


@@ -27,19 +27,10 @@ class SummarizationPipeline(Pipeline):
or a model id from the model hub, or a model instance. or a model id from the model hub, or a model instance.
preprocessor (Preprocessor): An optional preprocessor instance. preprocessor (Preprocessor): An optional preprocessor instance.
""" """
super().__init__(model=model)
assert isinstance(model, str) or isinstance(model, Model), \
'model must be a single str or OfaForAllTasks'
if isinstance(model, str):
pipe_model = Model.from_pretrained(model)
elif isinstance(model, Model):
pipe_model = model
else:
raise NotImplementedError
pipe_model.model.eval()
if preprocessor is None and isinstance(pipe_model, OfaForAllTasks):
preprocessor = OfaPreprocessor(model_dir=pipe_model.model_dir)
super().__init__(model=pipe_model, preprocessor=preprocessor, **kwargs)
super().__init__(model=model, preprocessor=preprocessor, **kwargs)
self.model.eval()
if preprocessor is None and isinstance(self.model, OfaForAllTasks):
self.preprocessor = OfaPreprocessor(model_dir=self.model.model_dir)


def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, Any]: def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, Any]:
return inputs return inputs

+ 7
- 8
modelscope/pipelines/nlp/table_question_answering_pipeline.py View File

@@ -41,21 +41,22 @@ class TableQuestionAnsweringPipeline(Pipeline):
preprocessor (TableQuestionAnsweringPreprocessor): a preprocessor instance preprocessor (TableQuestionAnsweringPreprocessor): a preprocessor instance
db (Database): a database to store tables in the database db (Database): a database to store tables in the database
""" """
model = model if isinstance(
model, TableQuestionAnswering) else Model.from_pretrained(model)
super().__init__(model=model, preprocessor=preprocessor, **kwargs)
if preprocessor is None: if preprocessor is None:
preprocessor = TableQuestionAnsweringPreprocessor(model.model_dir)
self.preprocessor = TableQuestionAnsweringPreprocessor(
self.model.model_dir)


# initilize tokenizer # initilize tokenizer
self.tokenizer = BertTokenizer( self.tokenizer = BertTokenizer(
os.path.join(model.model_dir, ModelFile.VOCAB_FILE))
os.path.join(self.model.model_dir, ModelFile.VOCAB_FILE))


# initialize database # initialize database
if db is None: if db is None:
self.db = Database( self.db = Database(
tokenizer=self.tokenizer, tokenizer=self.tokenizer,
table_file_path=os.path.join(model.model_dir, 'table.json'),
syn_dict_file_path=os.path.join(model.model_dir,
table_file_path=os.path.join(self.model.model_dir,
'table.json'),
syn_dict_file_path=os.path.join(self.model.model_dir,
'synonym.txt')) 'synonym.txt'))
else: else:
self.db = db self.db = db
@@ -71,8 +72,6 @@ class TableQuestionAnsweringPipeline(Pipeline):
self.schema_link_dict = constant.schema_link_dict self.schema_link_dict = constant.schema_link_dict
self.limit_dict = constant.limit_dict self.limit_dict = constant.limit_dict


super().__init__(model=model, preprocessor=preprocessor, **kwargs)

def post_process_multi_turn(self, history_sql, result, table): def post_process_multi_turn(self, history_sql, result, table):
action = self.action_ops[result['action']] action = self.action_ops[result['action']]
headers = table['header_name'] headers = table['header_name']


+ 6
- 8
modelscope/pipelines/nlp/text2text_generation_pipeline.py View File

@@ -63,16 +63,14 @@ class Text2TextGenerationPipeline(Pipeline):


To view other examples plese check the tests/pipelines/test_text_generation.py. To view other examples plese check the tests/pipelines/test_text_generation.py.
""" """
model = model if isinstance(model,
Model) else Model.from_pretrained(model)
super().__init__(model=model, preprocessor=preprocessor, **kwargs)
if preprocessor is None: if preprocessor is None:
preprocessor = Text2TextGenerationPreprocessor(
model.model_dir,
self.preprocessor = Text2TextGenerationPreprocessor(
self.model.model_dir,
sequence_length=kwargs.pop('sequence_length', 128)) sequence_length=kwargs.pop('sequence_length', 128))
self.tokenizer = preprocessor.tokenizer
self.pipeline = model.pipeline.type
model.eval()
super().__init__(model=model, preprocessor=preprocessor, **kwargs)
self.tokenizer = self.preprocessor.tokenizer
self.pipeline = self.model.pipeline.type
self.model.eval()


def preprocess(self, inputs: Input, **preprocess_params) -> Dict[str, Any]: def preprocess(self, inputs: Input, **preprocess_params) -> Dict[str, Any]:
""" Provide specific preprocess for text2text generation pipeline in order to handl multi tasks """ Provide specific preprocess for text2text generation pipeline in order to handl multi tasks


+ 7
- 8
modelscope/pipelines/nlp/text_classification_pipeline.py View File

@@ -53,25 +53,24 @@ class TextClassificationPipeline(Pipeline):
NOTE: Inputs of type 'str' are also supported. In this scenario, the 'first_sequence' and 'second_sequence' NOTE: Inputs of type 'str' are also supported. In this scenario, the 'first_sequence' and 'second_sequence'
param will have no affection. param will have no affection.
""" """
model = Model.from_pretrained(model) if isinstance(model,
str) else model
super().__init__(model=model, preprocessor=preprocessor, **kwargs)


if preprocessor is None: if preprocessor is None:
if model.__class__.__name__ == 'OfaForAllTasks':
preprocessor = Preprocessor.from_pretrained(
model_name_or_path=model.model_dir,
if self.model.__class__.__name__ == 'OfaForAllTasks':
self.preprocessor = Preprocessor.from_pretrained(
model_name_or_path=self.model.model_dir,
type=Preprocessors.ofa_tasks_preprocessor, type=Preprocessors.ofa_tasks_preprocessor,
field=Fields.multi_modal) field=Fields.multi_modal)
else: else:
first_sequence = kwargs.pop('first_sequence', 'first_sequence') first_sequence = kwargs.pop('first_sequence', 'first_sequence')
second_sequence = kwargs.pop('second_sequence', None) second_sequence = kwargs.pop('second_sequence', None)
preprocessor = Preprocessor.from_pretrained(
model if isinstance(model, str) else model.model_dir,
self.preprocessor = Preprocessor.from_pretrained(
self.model
if isinstance(self.model, str) else self.model.model_dir,
first_sequence=first_sequence, first_sequence=first_sequence,
second_sequence=second_sequence, second_sequence=second_sequence,
sequence_length=kwargs.pop('sequence_length', 512)) sequence_length=kwargs.pop('sequence_length', 512))


super().__init__(model=model, preprocessor=preprocessor, **kwargs)
self.id2label = kwargs.get('id2label') self.id2label = kwargs.get('id2label')
if self.id2label is None and hasattr(self.preprocessor, 'id2label'): if self.id2label is None and hasattr(self.preprocessor, 'id2label'):
self.id2label = self.preprocessor.id2label self.id2label = self.preprocessor.id2label


+ 4
- 6
modelscope/pipelines/nlp/text_error_correction_pipeline.py View File

@@ -40,14 +40,12 @@ class TextErrorCorrectionPipeline(Pipeline):


To view other examples plese check the tests/pipelines/test_text_error_correction.py. To view other examples plese check the tests/pipelines/test_text_error_correction.py.
""" """
super().__init__(model=model, preprocessor=preprocessor, **kwargs)


model = model if isinstance(
model,
BartForTextErrorCorrection) else Model.from_pretrained(model)
if preprocessor is None: if preprocessor is None:
preprocessor = TextErrorCorrectionPreprocessor(model.model_dir)
self.vocab = preprocessor.vocab
super().__init__(model=model, preprocessor=preprocessor, **kwargs)
self.preprocessor = TextErrorCorrectionPreprocessor(
self.model.model_dir)
self.vocab = self.preprocessor.vocab


def forward(self, inputs: Dict[str, Any], def forward(self, inputs: Dict[str, Any],
**forward_params) -> Dict[str, Any]: **forward_params) -> Dict[str, Any]:


+ 6
- 7
modelscope/pipelines/nlp/text_generation_pipeline.py View File

@@ -51,15 +51,14 @@ class TextGenerationPipeline(Pipeline):


To view other examples plese check the tests/pipelines/test_text_generation.py. To view other examples plese check the tests/pipelines/test_text_generation.py.
""" """
model = model if isinstance(model,
Model) else Model.from_pretrained(model)
cfg = read_config(model.model_dir)
super().__init__(model=model, preprocessor=preprocessor, **kwargs)
cfg = read_config(self.model.model_dir)
self.postprocessor = cfg.pop('postprocessor', 'decode') self.postprocessor = cfg.pop('postprocessor', 'decode')
if preprocessor is None: if preprocessor is None:
preprocessor_cfg = cfg.preprocessor preprocessor_cfg = cfg.preprocessor
preprocessor_cfg.update({ preprocessor_cfg.update({
'model_dir': 'model_dir':
model.model_dir,
self.model.model_dir,
'first_sequence': 'first_sequence':
first_sequence, first_sequence,
'second_sequence': 'second_sequence':
@@ -67,9 +66,9 @@ class TextGenerationPipeline(Pipeline):
'sequence_length': 'sequence_length':
kwargs.pop('sequence_length', 128) kwargs.pop('sequence_length', 128)
}) })
preprocessor = build_preprocessor(preprocessor_cfg, Fields.nlp)
model.eval()
super().__init__(model=model, preprocessor=preprocessor, **kwargs)
self.preprocessor = build_preprocessor(preprocessor_cfg,
Fields.nlp)
self.model.eval()


def _sanitize_parameters(self, **pipeline_parameters): def _sanitize_parameters(self, **pipeline_parameters):
return {}, pipeline_parameters, {} return {}, pipeline_parameters, {}


+ 3
- 5
modelscope/pipelines/nlp/text_ranking_pipeline.py View File

@@ -32,14 +32,12 @@ class TextRankingPipeline(Pipeline):
the model if supplied. the model if supplied.
sequence_length: Max sequence length in the user's custom scenario. 128 will be used as a default value. sequence_length: Max sequence length in the user's custom scenario. 128 will be used as a default value.
""" """
model = Model.from_pretrained(model) if isinstance(model,
str) else model
super().__init__(model=model, preprocessor=preprocessor, **kwargs)


if preprocessor is None: if preprocessor is None:
preprocessor = Preprocessor.from_pretrained(
model.model_dir,
self.preprocessor = Preprocessor.from_pretrained(
self.model.model_dir,
sequence_length=kwargs.pop('sequence_length', 128)) sequence_length=kwargs.pop('sequence_length', 128))
super().__init__(model=model, preprocessor=preprocessor, **kwargs)


def forward(self, inputs: Dict[str, Any], def forward(self, inputs: Dict[str, Any],
**forward_params) -> Dict[str, Any]: **forward_params) -> Dict[str, Any]:


+ 5
- 6
modelscope/pipelines/nlp/token_classification_pipeline.py View File

@@ -39,15 +39,14 @@ class TokenClassificationPipeline(Pipeline):
model (str or Model): A model instance or a model local dir or a model id in the model hub. model (str or Model): A model instance or a model local dir or a model id in the model hub.
preprocessor (Preprocessor): a preprocessor instance, must not be None. preprocessor (Preprocessor): a preprocessor instance, must not be None.
""" """
model = Model.from_pretrained(model) if isinstance(model,
str) else model
super().__init__(model=model, preprocessor=preprocessor, **kwargs)


if preprocessor is None: if preprocessor is None:
preprocessor = Preprocessor.from_pretrained(
model.model_dir,
self.preprocessor = Preprocessor.from_pretrained(
self.model.model_dir,
sequence_length=kwargs.pop('sequence_length', 128)) sequence_length=kwargs.pop('sequence_length', 128))
model.eval()
super().__init__(model=model, preprocessor=preprocessor, **kwargs)
self.model.eval()
self.id2label = kwargs.get('id2label') self.id2label = kwargs.get('id2label')
if self.id2label is None and hasattr(self.preprocessor, 'id2label'): if self.id2label is None and hasattr(self.preprocessor, 'id2label'):
self.id2label = self.preprocessor.id2label self.id2label = self.preprocessor.id2label


+ 2
- 2
modelscope/pipelines/nlp/translation_quality_estimation_pipeline.py View File

@@ -27,10 +27,10 @@ class TranslationQualityEstimationPipeline(Pipeline):


def __init__(self, model: str, device: str = 'gpu', **kwargs): def __init__(self, model: str, device: str = 'gpu', **kwargs):
super().__init__(model=model, device=device) super().__init__(model=model, device=device)
model_file = os.path.join(model, ModelFile.TORCH_MODEL_FILE)
model_file = os.path.join(self.model, ModelFile.TORCH_MODEL_FILE)
with open(model_file, 'rb') as f: with open(model_file, 'rb') as f:
buffer = io.BytesIO(f.read()) buffer = io.BytesIO(f.read())
self.tokenizer = XLMRobertaTokenizer.from_pretrained(model)
self.tokenizer = XLMRobertaTokenizer.from_pretrained(self.model)
self.model = torch.jit.load( self.model = torch.jit.load(
buffer, map_location=self.device).to(self.device) buffer, map_location=self.device).to(self.device)




+ 5
- 6
modelscope/pipelines/nlp/word_segmentation_pipeline.py View File

@@ -49,14 +49,13 @@ class WordSegmentationPipeline(TokenClassificationPipeline):


To view other examples plese check the tests/pipelines/test_word_segmentation.py. To view other examples plese check the tests/pipelines/test_word_segmentation.py.
""" """
model = model if isinstance(model,
Model) else Model.from_pretrained(model)
super().__init__(model=model, preprocessor=preprocessor, **kwargs)
if preprocessor is None: if preprocessor is None:
preprocessor = TokenClassificationPreprocessor(
model.model_dir,
self.preprocessor = TokenClassificationPreprocessor(
self.model.model_dir,
sequence_length=kwargs.pop('sequence_length', 128)) sequence_length=kwargs.pop('sequence_length', 128))
model.eval()
super().__init__(model=model, preprocessor=preprocessor, **kwargs)
self.model.eval()
self.id2label = kwargs.get('id2label') self.id2label = kwargs.get('id2label')
if self.id2label is None and hasattr(self.preprocessor, 'id2label'): if self.id2label is None and hasattr(self.preprocessor, 'id2label'):
self.id2label = self.preprocessor.id2label self.id2label = self.preprocessor.id2label


+ 4
- 6
modelscope/pipelines/nlp/zero_shot_classification_pipeline.py View File

@@ -59,16 +59,14 @@ class ZeroShotClassificationPipeline(Pipeline):
""" """
assert isinstance(model, str) or isinstance(model, Model), \ assert isinstance(model, str) or isinstance(model, Model), \
'model must be a single str or Model' 'model must be a single str or Model'
model = model if isinstance(model,
Model) else Model.from_pretrained(model)
super().__init__(model=model, preprocessor=preprocessor, **kwargs)
self.entailment_id = 0 self.entailment_id = 0
self.contradiction_id = 2 self.contradiction_id = 2
if preprocessor is None: if preprocessor is None:
preprocessor = ZeroShotClassificationPreprocessor(
model.model_dir,
self.preprocessor = ZeroShotClassificationPreprocessor(
self.model.model_dir,
sequence_length=kwargs.pop('sequence_length', 512)) sequence_length=kwargs.pop('sequence_length', 512))
model.eval()
super().__init__(model=model, preprocessor=preprocessor, **kwargs)
self.model.eval()


def _sanitize_parameters(self, **kwargs): def _sanitize_parameters(self, **kwargs):
preprocess_params = {} preprocess_params = {}


+ 8
- 14
modelscope/pipelines/science/protein_structure_pipeline.py View File

@@ -105,22 +105,16 @@ class ProteinStructurePipeline(Pipeline):
>>> print(pipeline_ins(protein)) >>> print(pipeline_ins(protein))


""" """
import copy
model_path = copy.deepcopy(model) if isinstance(model, str) else None
cfg = read_config(model_path) # only model is str
self.cfg = cfg
super().__init__(model=model, preprocessor=preprocessor, **kwargs)
self.cfg = read_config(self.model.model_dir)
self.config = model_config( self.config = model_config(
cfg['pipeline']['model_name']) # alphafold config
model = model if isinstance(
model, Model) else Model.from_pretrained(model_path)
self.postprocessor = cfg.pop('postprocessor', None)
self.cfg['pipeline']['model_name']) # alphafold config
self.postprocessor = self.cfg.pop('postprocessor', None)
if preprocessor is None: if preprocessor is None:
preprocessor_cfg = cfg.preprocessor
preprocessor = build_preprocessor(preprocessor_cfg, Fields.science)
model.eval()
model.model.inference_mode()
model.model_dir = model_path
super().__init__(model=model, preprocessor=preprocessor, **kwargs)
preprocessor_cfg = self.cfg.preprocessor
self.preprocessor = build_preprocessor(preprocessor_cfg,
Fields.science)
self.model.eval()


def _sanitize_parameters(self, **pipeline_parameters): def _sanitize_parameters(self, **pipeline_parameters):
return pipeline_parameters, pipeline_parameters, pipeline_parameters return pipeline_parameters, pipeline_parameters, pipeline_parameters


+ 5
- 2
modelscope/preprocessors/base.py View File

@@ -6,7 +6,8 @@ from typing import Any, Dict, Optional, Sequence


from modelscope.metainfo import Models, Preprocessors from modelscope.metainfo import Models, Preprocessors
from modelscope.utils.config import Config, ConfigDict from modelscope.utils.config import Config, ConfigDict
from modelscope.utils.constant import DEFAULT_MODEL_REVISION, ModeKeys, Tasks
from modelscope.utils.constant import (DEFAULT_MODEL_REVISION, Invoke,
ModeKeys, Tasks)
from modelscope.utils.hub import read_config, snapshot_download from modelscope.utils.hub import read_config, snapshot_download
from modelscope.utils.logger import get_logger from modelscope.utils.logger import get_logger
from .builder import build_preprocessor from .builder import build_preprocessor
@@ -194,7 +195,9 @@ class Preprocessor(ABC):
""" """
if not os.path.exists(model_name_or_path): if not os.path.exists(model_name_or_path):
model_dir = snapshot_download( model_dir = snapshot_download(
model_name_or_path, revision=revision)
model_name_or_path,
revision=revision,
user_agent={Invoke.KEY: Invoke.PREPROCESSOR})
else: else:
model_dir = model_name_or_path model_dir = model_name_or_path
if cfg_dict is None: if cfg_dict is None:


+ 4
- 3
modelscope/preprocessors/multi_modal.py View File

@@ -14,7 +14,8 @@ from modelscope.metainfo import Preprocessors
from modelscope.pipelines.base import Input from modelscope.pipelines.base import Input
from modelscope.preprocessors import load_image from modelscope.preprocessors import load_image
from modelscope.utils.config import Config from modelscope.utils.config import Config
from modelscope.utils.constant import Fields, ModeKeys, ModelFile, Tasks
from modelscope.utils.constant import (Fields, Invoke, ModeKeys, ModelFile,
Tasks)
from .base import Preprocessor from .base import Preprocessor
from .builder import PREPROCESSORS from .builder import PREPROCESSORS
from .ofa import * # noqa from .ofa import * # noqa
@@ -57,7 +58,7 @@ class OfaPreprocessor(Preprocessor):
Tasks.auto_speech_recognition: OfaASRPreprocessor Tasks.auto_speech_recognition: OfaASRPreprocessor
} }
model_dir = model_dir if osp.exists(model_dir) else snapshot_download( model_dir = model_dir if osp.exists(model_dir) else snapshot_download(
model_dir)
model_dir, user_agent={Invoke.KEY: Invoke.PREPROCESSOR})
self.cfg = Config.from_file( self.cfg = Config.from_file(
osp.join(model_dir, ModelFile.CONFIGURATION)) osp.join(model_dir, ModelFile.CONFIGURATION))
self.preprocess = preprocess_mapping[self.cfg.task]( self.preprocess = preprocess_mapping[self.cfg.task](
@@ -131,7 +132,7 @@ class CLIPPreprocessor(Preprocessor):
""" """
super().__init__(*args, **kwargs) super().__init__(*args, **kwargs)
model_dir = model_dir if osp.exists(model_dir) else snapshot_download( model_dir = model_dir if osp.exists(model_dir) else snapshot_download(
model_dir)
model_dir, user_agent={Invoke.KEY: Invoke.PREPROCESSOR})
self.mode = mode self.mode = mode
# text tokenizer # text tokenizer
from modelscope.models.multi_modal.clip.bert_tokenizer import FullTokenizer from modelscope.models.multi_modal.clip.bert_tokenizer import FullTokenizer


+ 2
- 7
modelscope/trainers/audio/kws_farfield_trainer.py View File

@@ -8,7 +8,6 @@ import torch
from torch import nn as nn from torch import nn as nn
from torch import optim as optim from torch import optim as optim


from modelscope.hub.snapshot_download import snapshot_download
from modelscope.metainfo import Trainers from modelscope.metainfo import Trainers
from modelscope.models import Model, TorchModel from modelscope.models import Model, TorchModel
from modelscope.msdatasets.task_datasets.audio import KWSDataLoader, KWSDataset from modelscope.msdatasets.task_datasets.audio import KWSDataLoader, KWSDataset
@@ -54,12 +53,8 @@ class KWSFarfieldTrainer(BaseTrainer):
**kwargs): **kwargs):


if isinstance(model, str): if isinstance(model, str):
if os.path.exists(model):
self.model_dir = model if os.path.isdir(
model) else os.path.dirname(model)
else:
self.model_dir = snapshot_download(
model, revision=model_revision)
self.model_dir = self.get_or_download_model_dir(
model, model_revision)
if cfg_file is None: if cfg_file is None:
cfg_file = os.path.join(self.model_dir, cfg_file = os.path.join(self.model_dir,
ModelFile.CONFIGURATION) ModelFile.CONFIGURATION)


+ 14
- 0
modelscope/trainers/base.py View File

@@ -1,11 +1,14 @@
# Copyright (c) Alibaba, Inc. and its affiliates. # Copyright (c) Alibaba, Inc. and its affiliates.


import os
import time import time
from abc import ABC, abstractmethod from abc import ABC, abstractmethod
from typing import Callable, Dict, List, Optional, Tuple, Union from typing import Callable, Dict, List, Optional, Tuple, Union


from modelscope.hub.snapshot_download import snapshot_download
from modelscope.trainers.builder import TRAINERS from modelscope.trainers.builder import TRAINERS
from modelscope.utils.config import Config from modelscope.utils.config import Config
from modelscope.utils.constant import Invoke
from .utils.log_buffer import LogBuffer from .utils.log_buffer import LogBuffer




@@ -32,6 +35,17 @@ class BaseTrainer(ABC):
self.log_buffer = LogBuffer() self.log_buffer = LogBuffer()
self.timestamp = time.strftime('%Y%m%d_%H%M%S', time.localtime()) self.timestamp = time.strftime('%Y%m%d_%H%M%S', time.localtime())


def get_or_download_model_dir(self, model, model_revision=None):
if os.path.exists(model):
model_cache_dir = model if os.path.isdir(
model) else os.path.dirname(model)
else:
model_cache_dir = snapshot_download(
model,
revision=model_revision,
user_agent={Invoke.KEY: Invoke.TRAINER})
return model_cache_dir

@abstractmethod @abstractmethod
def train(self, *args, **kwargs): def train(self, *args, **kwargs):
""" Train (and evaluate) process """ Train (and evaluate) process


+ 3
- 2
modelscope/trainers/multi_modal/clip/clip_trainer.py View File

@@ -20,7 +20,7 @@ from modelscope.trainers.builder import TRAINERS
from modelscope.trainers.optimizer.builder import build_optimizer from modelscope.trainers.optimizer.builder import build_optimizer
from modelscope.utils.config import Config from modelscope.utils.config import Config
from modelscope.utils.constant import (DEFAULT_MODEL_REVISION, ConfigKeys, from modelscope.utils.constant import (DEFAULT_MODEL_REVISION, ConfigKeys,
ModeKeys)
Invoke, ModeKeys)
from .clip_trainer_utils import get_loss, get_optimizer_params, get_schedule from .clip_trainer_utils import get_loss, get_optimizer_params, get_schedule




@@ -52,7 +52,8 @@ class CLIPTrainer(EpochBasedTrainer):
model_revision: Optional[str] = DEFAULT_MODEL_REVISION, model_revision: Optional[str] = DEFAULT_MODEL_REVISION,
seed: int = 42, seed: int = 42,
**kwargs): **kwargs):
model = Model.from_pretrained(model, revision=model_revision)
model = Model.from_pretrained(
model, revision=model_revision, invoked_by=Invoke.TRAINER)
# for training & eval, we convert the model from FP16 back to FP32 # for training & eval, we convert the model from FP16 back to FP32
# to compatible with modelscope amp training # to compatible with modelscope amp training
convert_models_to_fp32(model) convert_models_to_fp32(model)


+ 3
- 2
modelscope/trainers/multi_modal/ofa/ofa_trainer.py View File

@@ -23,7 +23,7 @@ from modelscope.trainers.optimizer.builder import build_optimizer
from modelscope.trainers.parallel.utils import is_parallel from modelscope.trainers.parallel.utils import is_parallel
from modelscope.utils.config import Config from modelscope.utils.config import Config
from modelscope.utils.constant import (DEFAULT_MODEL_REVISION, ConfigKeys, from modelscope.utils.constant import (DEFAULT_MODEL_REVISION, ConfigKeys,
ModeKeys)
Invoke, ModeKeys)
from .ofa_trainer_utils import (AdjustLabelSmoothedCrossEntropyCriterion, from .ofa_trainer_utils import (AdjustLabelSmoothedCrossEntropyCriterion,
get_schedule) get_schedule)


@@ -49,7 +49,8 @@ class OFATrainer(EpochBasedTrainer):
model_revision: Optional[str] = DEFAULT_MODEL_REVISION, model_revision: Optional[str] = DEFAULT_MODEL_REVISION,
seed: int = 42, seed: int = 42,
**kwargs): **kwargs):
model = Model.from_pretrained(model, revision=model_revision)
model = Model.from_pretrained(
model, revision=model_revision, invoked_by=Invoke.TRAINER)
model_dir = model.model_dir model_dir = model.model_dir
self.cfg_modify_fn = cfg_modify_fn self.cfg_modify_fn = cfg_modify_fn
cfg = self.rebuild_config(Config.from_file(cfg_file)) cfg = self.rebuild_config(Config.from_file(cfg_file))


+ 4
- 8
modelscope/trainers/multi_modal/team/team_trainer.py View File

@@ -7,21 +7,17 @@ from typing import Callable, Dict, Optional
import numpy as np import numpy as np
import torch import torch
import torch.nn as nn import torch.nn as nn
import torchvision.datasets as datasets
import torchvision.transforms as transforms
from sklearn.metrics import confusion_matrix from sklearn.metrics import confusion_matrix
from torch.optim import AdamW
from torch.utils.data import DataLoader, Dataset from torch.utils.data import DataLoader, Dataset


from modelscope.metainfo import Trainers from modelscope.metainfo import Trainers
from modelscope.models.base import Model from modelscope.models.base import Model
from modelscope.msdatasets import MsDataset
from modelscope.trainers.base import BaseTrainer from modelscope.trainers.base import BaseTrainer
from modelscope.trainers.builder import TRAINERS from modelscope.trainers.builder import TRAINERS
from modelscope.trainers.multi_modal.team.team_trainer_utils import (
get_optimizer, train_mapping, val_mapping)
from modelscope.trainers.multi_modal.team.team_trainer_utils import \
get_optimizer
from modelscope.utils.config import Config from modelscope.utils.config import Config
from modelscope.utils.constant import DownloadMode, ModeKeys
from modelscope.utils.constant import Invoke
from modelscope.utils.logger import get_logger from modelscope.utils.logger import get_logger


logger = get_logger() logger = get_logger()
@@ -36,7 +32,7 @@ class TEAMImgClsTrainer(BaseTrainer):
super().__init__(cfg_file) super().__init__(cfg_file)


self.cfg = Config.from_file(cfg_file) self.cfg = Config.from_file(cfg_file)
team_model = Model.from_pretrained(model)
team_model = Model.from_pretrained(model, invoked_by=Invoke.TRAINER)
image_model = team_model.model.image_model.vision_transformer image_model = team_model.model.image_model.vision_transformer
classification_model = nn.Sequential( classification_model = nn.Sequential(
OrderedDict([('encoder', image_model), OrderedDict([('encoder', image_model),


+ 1
- 2
modelscope/trainers/nlp/csanmt_translation_trainer.py View File

@@ -24,8 +24,7 @@ logger = get_logger()
class CsanmtTranslationTrainer(BaseTrainer): class CsanmtTranslationTrainer(BaseTrainer):


def __init__(self, model: str, cfg_file: str = None, *args, **kwargs): def __init__(self, model: str, cfg_file: str = None, *args, **kwargs):
if not osp.exists(model):
model = snapshot_download(model)
model = self.get_or_download_model_dir(model)
tf.reset_default_graph() tf.reset_default_graph()


self.model_dir = model self.model_dir = model


+ 1
- 6
modelscope/trainers/nlp_trainer.py View File

@@ -10,7 +10,6 @@ import torch
from torch import nn from torch import nn
from torch.utils.data import Dataset from torch.utils.data import Dataset


from modelscope.hub.snapshot_download import snapshot_download
from modelscope.metainfo import Trainers from modelscope.metainfo import Trainers
from modelscope.metrics.builder import build_metric from modelscope.metrics.builder import build_metric
from modelscope.models.base import Model, TorchModel from modelscope.models.base import Model, TorchModel
@@ -478,11 +477,7 @@ class NlpEpochBasedTrainer(EpochBasedTrainer):
""" """


if isinstance(model, str): if isinstance(model, str):
if os.path.exists(model):
model_dir = model if os.path.isdir(model) else os.path.dirname(
model)
else:
model_dir = snapshot_download(model, revision=model_revision)
model_dir = self.get_or_download_model_dir(model, model_revision)
if cfg_file is None: if cfg_file is None:
cfg_file = os.path.join(model_dir, ModelFile.CONFIGURATION) cfg_file = os.path.join(model_dir, ModelFile.CONFIGURATION)
else: else:


+ 2
- 7
modelscope/trainers/trainer.py View File

@@ -14,7 +14,6 @@ from torch.utils.data import DataLoader, Dataset
from torch.utils.data.dataloader import default_collate from torch.utils.data.dataloader import default_collate
from torch.utils.data.distributed import DistributedSampler from torch.utils.data.distributed import DistributedSampler


from modelscope.hub.snapshot_download import snapshot_download
from modelscope.metainfo import Trainers from modelscope.metainfo import Trainers
from modelscope.metrics import build_metric, task_default_metrics from modelscope.metrics import build_metric, task_default_metrics
from modelscope.models.base import Model, TorchModel from modelscope.models.base import Model, TorchModel
@@ -98,12 +97,8 @@ class EpochBasedTrainer(BaseTrainer):
self._seed = seed self._seed = seed
set_random_seed(self._seed) set_random_seed(self._seed)
if isinstance(model, str): if isinstance(model, str):
if os.path.exists(model):
self.model_dir = model if os.path.isdir(
model) else os.path.dirname(model)
else:
self.model_dir = snapshot_download(
model, revision=model_revision)
self.model_dir = self.get_or_download_model_dir(
model, model_revision)
if cfg_file is None: if cfg_file is None:
cfg_file = os.path.join(self.model_dir, cfg_file = os.path.join(self.model_dir,
ModelFile.CONFIGURATION) ModelFile.CONFIGURATION)


+ 8
- 0
modelscope/utils/constant.py View File

@@ -291,6 +291,14 @@ class ModelFile(object):
TS_MODEL_FILE = 'model.ts' TS_MODEL_FILE = 'model.ts'




class Invoke(object):
KEY = 'invoked_by'
PRETRAINED = 'from_pretrained'
PIPELINE = 'pipeline'
TRAINER = 'trainer'
PREPROCESSOR = 'preprocessor'


class ConfigFields(object): class ConfigFields(object):
""" First level keyword in configuration file """ First level keyword in configuration file
""" """


Loading…
Cancel
Save