Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/10837490master^2
| @@ -5,10 +5,10 @@ from abc import ABC, abstractmethod | |||
| from typing import Any, Callable, Dict, List, Optional, Union | |||
| from modelscope.hub.snapshot_download import snapshot_download | |||
| from modelscope.models.builder import MODELS, build_model | |||
| from modelscope.models.builder import build_model | |||
| from modelscope.utils.checkpoint import save_checkpoint, save_pretrained | |||
| from modelscope.utils.config import Config | |||
| from modelscope.utils.constant import DEFAULT_MODEL_REVISION, ModelFile, Tasks | |||
| from modelscope.utils.constant import DEFAULT_MODEL_REVISION, Invoke, ModelFile | |||
| from modelscope.utils.device import verify_device | |||
| from modelscope.utils.logger import get_logger | |||
| @@ -94,6 +94,10 @@ class Model(ABC): | |||
| if prefetched is not None: | |||
| kwargs.pop('model_prefetched') | |||
| invoked_by = kwargs.get(Invoke.KEY) | |||
| if invoked_by is not None: | |||
| kwargs.pop(Invoke.KEY) | |||
| if osp.exists(model_name_or_path): | |||
| local_model_dir = model_name_or_path | |||
| else: | |||
| @@ -101,7 +105,13 @@ class Model(ABC): | |||
| raise RuntimeError( | |||
| 'Expecting model is pre-fetched locally, but is not found.' | |||
| ) | |||
| local_model_dir = snapshot_download(model_name_or_path, revision) | |||
| if invoked_by is not None: | |||
| invoked_by = {Invoke.KEY: invoked_by} | |||
| else: | |||
| invoked_by = {Invoke.KEY: Invoke.PRETRAINED} | |||
| local_model_dir = snapshot_download( | |||
| model_name_or_path, revision, user_agent=invoked_by) | |||
| logger.info(f'initialize model from {local_model_dir}') | |||
| if cfg_dict is not None: | |||
| cfg = cfg_dict | |||
| @@ -133,6 +143,7 @@ class Model(ABC): | |||
| model.cfg = cfg | |||
| model.name = model_name_or_path | |||
| model.model_dir = local_model_dir | |||
| return model | |||
| def save_pretrained(self, | |||
| @@ -509,8 +509,8 @@ def convert_weights(model: nn.Module): | |||
| @MODELS.register_module(Tasks.multi_modal_embedding, module_name=Models.clip) | |||
| class CLIPForMultiModalEmbedding(TorchModel): | |||
| def __init__(self, model_dir, device_id=-1): | |||
| super().__init__(model_dir=model_dir, device_id=device_id) | |||
| def __init__(self, model_dir, *args, **kwargs): | |||
| super().__init__(model_dir=model_dir, *args, **kwargs) | |||
| # Initialize the model. | |||
| vision_model_config_file = '{}/vision_model_config.json'.format( | |||
| @@ -9,7 +9,6 @@ import numpy as np | |||
| import torch | |||
| import torch.nn.functional as F | |||
| from modelscope.hub.snapshot_download import snapshot_download | |||
| from modelscope.metainfo import Models | |||
| from modelscope.models.base import Tensor, TorchModel | |||
| from modelscope.models.builder import MODELS | |||
| @@ -16,7 +16,7 @@ from modelscope.outputs import TASK_OUTPUTS | |||
| from modelscope.pipeline_inputs import TASK_INPUTS, check_input_type | |||
| from modelscope.preprocessors import Preprocessor | |||
| from modelscope.utils.config import Config | |||
| from modelscope.utils.constant import Frameworks, ModelFile | |||
| from modelscope.utils.constant import Frameworks, Invoke, ModelFile | |||
| from modelscope.utils.device import (create_device, device_placement, | |||
| verify_device) | |||
| from modelscope.utils.hub import read_config, snapshot_download | |||
| @@ -47,8 +47,10 @@ class Pipeline(ABC): | |||
| logger.info(f'initiate model from location {model}.') | |||
| # expecting model has been prefetched to local cache beforehand | |||
| return Model.from_pretrained( | |||
| model, model_prefetched=True, | |||
| device=self.device_name) if is_model(model) else model | |||
| model, | |||
| device=self.device_name, | |||
| model_prefetched=True, | |||
| invoked_by=Invoke.PIPELINE) if is_model(model) else model | |||
| else: | |||
| return model | |||
| @@ -383,15 +385,12 @@ class DistributedPipeline(Pipeline): | |||
| preprocessor: Union[Preprocessor, List[Preprocessor]] = None, | |||
| auto_collate=True, | |||
| **kwargs): | |||
| self.preprocessor = preprocessor | |||
| super().__init__(model=model, preprocessor=preprocessor, kwargs=kwargs) | |||
| self._model_prepare = False | |||
| self._model_prepare_lock = Lock() | |||
| self._auto_collate = auto_collate | |||
| if os.path.exists(model): | |||
| self.model_dir = model | |||
| else: | |||
| self.model_dir = snapshot_download(model) | |||
| self.model_dir = self.model.model_dir | |||
| self.cfg = read_config(self.model_dir) | |||
| self.world_size = self.cfg.model.world_size | |||
| self.model_pool = None | |||
| @@ -7,7 +7,7 @@ from modelscope.hub.snapshot_download import snapshot_download | |||
| from modelscope.metainfo import Pipelines | |||
| from modelscope.models.base import Model | |||
| from modelscope.utils.config import ConfigDict, check_config | |||
| from modelscope.utils.constant import DEFAULT_MODEL_REVISION, Tasks | |||
| from modelscope.utils.constant import DEFAULT_MODEL_REVISION, Invoke, Tasks | |||
| from modelscope.utils.hub import read_config | |||
| from modelscope.utils.registry import Registry, build_from_cfg | |||
| from .base import Pipeline | |||
| @@ -209,6 +209,8 @@ DEFAULT_MODEL_FOR_PIPELINE = { | |||
| Tasks.referring_video_object_segmentation: | |||
| (Pipelines.referring_video_object_segmentation, | |||
| 'damo/cv_swin-t_referring_video-object-segmentation'), | |||
| Tasks.video_summarization: (Pipelines.video_summarization, | |||
| 'damo/cv_googlenet_pgl-video-summarization'), | |||
| } | |||
| @@ -220,14 +222,19 @@ def normalize_model_input(model, model_revision): | |||
| # skip revision download if model is a local directory | |||
| if not os.path.exists(model): | |||
| # note that if there is already a local copy, snapshot_download will check and skip downloading | |||
| model = snapshot_download(model, revision=model_revision) | |||
| model = snapshot_download( | |||
| model, | |||
| revision=model_revision, | |||
| user_agent={Invoke.KEY: Invoke.PIPELINE}) | |||
| elif isinstance(model, list) and isinstance(model[0], str): | |||
| for idx in range(len(model)): | |||
| if is_official_hub_path( | |||
| model[idx], | |||
| model_revision) and not os.path.exists(model[idx]): | |||
| model[idx] = snapshot_download( | |||
| model[idx], revision=model_revision) | |||
| model[idx], | |||
| revision=model_revision, | |||
| user_agent={Invoke.KEY: Invoke.PIPELINE}) | |||
| return model | |||
| @@ -8,14 +8,13 @@ import torch | |||
| from PIL import Image | |||
| from torchvision import transforms | |||
| from modelscope.hub.snapshot_download import snapshot_download | |||
| from modelscope.metainfo import Pipelines | |||
| from modelscope.models.cv.animal_recognition import Bottleneck, ResNet | |||
| from modelscope.outputs import OutputKeys | |||
| from modelscope.pipelines.base import Input, Pipeline | |||
| from modelscope.pipelines.builder import PIPELINES | |||
| from modelscope.preprocessors import LoadImage | |||
| from modelscope.utils.constant import Tasks | |||
| from modelscope.utils.constant import Devices, ModelFile, Tasks | |||
| from modelscope.utils.logger import get_logger | |||
| logger = get_logger() | |||
| @@ -67,15 +66,10 @@ class AnimalRecognitionPipeline(Pipeline): | |||
| filter_param(src_params, own_state) | |||
| model.load_state_dict(own_state) | |||
| self.model = resnest101(num_classes=8288) | |||
| local_model_dir = model | |||
| if osp.exists(model): | |||
| local_model_dir = model | |||
| else: | |||
| local_model_dir = snapshot_download(model) | |||
| self.local_path = local_model_dir | |||
| self.local_path = self.model | |||
| src_params = torch.load( | |||
| osp.join(local_model_dir, 'pytorch_model.pt'), 'cpu') | |||
| osp.join(self.local_path, ModelFile.TORCH_MODEL_FILE), Devices.cpu) | |||
| self.model = resnest101(num_classes=8288) | |||
| load_pretrained(self.model, src_params) | |||
| logger.info('load model done') | |||
| @@ -120,8 +120,7 @@ class Body3DKeypointsPipeline(Pipeline): | |||
| """ | |||
| super().__init__(model=model, **kwargs) | |||
| self.keypoint_model_3d = model if isinstance( | |||
| model, BodyKeypointsDetection3D) else Model.from_pretrained(model) | |||
| self.keypoint_model_3d = self.model | |||
| self.keypoint_model_3d.eval() | |||
| # init human body 2D keypoints detection pipeline | |||
| @@ -11,7 +11,7 @@ from PIL import ImageFile | |||
| from modelscope.hub.snapshot_download import snapshot_download | |||
| from modelscope.pipelines.util import is_official_hub_path | |||
| from modelscope.utils.config import Config | |||
| from modelscope.utils.constant import DEFAULT_MODEL_REVISION, ModelFile | |||
| from modelscope.utils.constant import DEFAULT_MODEL_REVISION, Invoke, ModelFile | |||
| from modelscope.utils.device import create_device | |||
| @@ -37,7 +37,9 @@ class EasyCVPipeline(object): | |||
| assert is_official_hub_path( | |||
| model), 'Only support local model path and official hub path!' | |||
| model_dir = snapshot_download( | |||
| model_id=model, revision=DEFAULT_MODEL_REVISION) | |||
| model_id=model, | |||
| revision=DEFAULT_MODEL_REVISION, | |||
| user_agent={Invoke.KEY: Invoke.PIPELINE}) | |||
| assert osp.isdir(model_dir) | |||
| model_files = glob.glob( | |||
| @@ -48,6 +50,7 @@ class EasyCVPipeline(object): | |||
| model_path = model_files[0] | |||
| self.model_path = model_path | |||
| self.model_dir = model_dir | |||
| # get configuration file from source model dir | |||
| self.config_file = os.path.join(model_dir, ModelFile.CONFIGURATION) | |||
| @@ -24,7 +24,6 @@ class HumanWholebodyKeypointsPipeline(EasyCVPipeline): | |||
| model (str): model id on modelscope hub or local model path. | |||
| model_file_pattern (str): model file pattern. | |||
| """ | |||
| self.model_dir = model | |||
| super(HumanWholebodyKeypointsPipeline, self).__init__( | |||
| model=model, | |||
| model_file_pattern=model_file_pattern, | |||
| @@ -8,7 +8,6 @@ import torch | |||
| from PIL import Image | |||
| from torchvision import transforms | |||
| from modelscope.hub.snapshot_download import snapshot_download | |||
| from modelscope.metainfo import Pipelines | |||
| from modelscope.models.cv.animal_recognition import resnet | |||
| from modelscope.outputs import OutputKeys | |||
| @@ -67,16 +66,12 @@ class GeneralRecognitionPipeline(Pipeline): | |||
| filter_param(src_params, own_state) | |||
| model.load_state_dict(own_state) | |||
| self.model = resnest101(num_classes=54092) | |||
| local_model_dir = model | |||
| device = 'cpu' | |||
| if osp.exists(model): | |||
| local_model_dir = model | |||
| else: | |||
| local_model_dir = snapshot_download(model) | |||
| self.local_path = local_model_dir | |||
| self.local_path = self.model | |||
| src_params = torch.load( | |||
| osp.join(local_model_dir, ModelFile.TORCH_MODEL_FILE), device) | |||
| osp.join(self.local_path, ModelFile.TORCH_MODEL_FILE), device) | |||
| self.model = resnest101(num_classes=54092) | |||
| load_pretrained(self.model, src_params) | |||
| logger.info('load model done') | |||
| @@ -21,7 +21,6 @@ class Hand2DKeypointsPipeline(EasyCVPipeline): | |||
| model (str): model id on modelscope hub or local model path. | |||
| model_file_pattern (str): model file pattern. | |||
| """ | |||
| self.model_dir = model | |||
| super(Hand2DKeypointsPipeline, self).__init__( | |||
| model=model, | |||
| model_file_pattern=model_file_pattern, | |||
| @@ -1,5 +1,5 @@ | |||
| # Copyright (c) Alibaba, Inc. and its affiliates. | |||
| from typing import Any, Dict, Union | |||
| from typing import Any, Dict, Optional, Union | |||
| import cv2 | |||
| import numpy as np | |||
| @@ -25,22 +25,15 @@ class ImageClassificationPipeline(Pipeline): | |||
| def __init__(self, | |||
| model: Union[Model, str], | |||
| preprocessor: [Preprocessor] = None, | |||
| preprocessor: Optional[Preprocessor] = None, | |||
| **kwargs): | |||
| super().__init__(model=model) | |||
| super().__init__(model=model, preprocessor=preprocessor, **kwargs) | |||
| assert isinstance(model, str) or isinstance(model, Model), \ | |||
| 'model must be a single str or OfaForAllTasks' | |||
| if isinstance(model, str): | |||
| pipe_model = Model.from_pretrained(model) | |||
| elif isinstance(model, Model): | |||
| pipe_model = model | |||
| else: | |||
| raise NotImplementedError | |||
| pipe_model.model.eval() | |||
| pipe_model.to(get_device()) | |||
| if preprocessor is None and isinstance(pipe_model, OfaForAllTasks): | |||
| preprocessor = OfaPreprocessor(model_dir=pipe_model.model_dir) | |||
| super().__init__(model=pipe_model, preprocessor=preprocessor, **kwargs) | |||
| self.model.eval() | |||
| self.model.to(get_device()) | |||
| if preprocessor is None and isinstance(self.model, OfaForAllTasks): | |||
| self.preprocessor = OfaPreprocessor(model_dir=self.model.model_dir) | |||
| def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, Any]: | |||
| return inputs | |||
| @@ -32,10 +32,8 @@ class ImageColorEnhancePipeline(Pipeline): | |||
| Args: | |||
| model: model id on modelscope hub. | |||
| """ | |||
| model = model if isinstance( | |||
| model, ImageColorEnhance) else Model.from_pretrained(model) | |||
| model.eval() | |||
| super().__init__(model=model, preprocessor=preprocessor, **kwargs) | |||
| self.model.eval() | |||
| if torch.cuda.is_available(): | |||
| self._device = torch.device('cuda') | |||
| @@ -32,17 +32,14 @@ class ImageDenoisePipeline(Pipeline): | |||
| Args: | |||
| model: model id on modelscope hub. | |||
| """ | |||
| model = model if isinstance( | |||
| model, NAFNetForImageDenoise) else Model.from_pretrained(model) | |||
| model.eval() | |||
| super().__init__(model=model, preprocessor=preprocessor, **kwargs) | |||
| self.config = model.config | |||
| self.model.eval() | |||
| self.config = self.model.config | |||
| if torch.cuda.is_available(): | |||
| self._device = torch.device('cuda') | |||
| else: | |||
| self._device = torch.device('cpu') | |||
| self.model = model | |||
| logger.info('load image denoise model done') | |||
| def preprocess(self, input: Input) -> Dict[str, Any]: | |||
| @@ -44,7 +44,7 @@ class LanguageGuidedVideoSummarizationPipeline(Pipeline): | |||
| """ | |||
| super().__init__(model=model, auto_collate=False, **kwargs) | |||
| logger.info(f'loading model from {model}') | |||
| self.model_dir = model | |||
| self.model_dir = self.model.model_dir | |||
| self.tmp_dir = kwargs.get('tmp_dir', None) | |||
| if self.tmp_dir is None: | |||
| @@ -9,7 +9,6 @@ import PIL | |||
| import torch | |||
| from PIL import Image | |||
| from modelscope.hub.snapshot_download import snapshot_download | |||
| from modelscope.metainfo import Pipelines | |||
| from modelscope.models.cv.virual_tryon import SDAFNet_Tryon | |||
| from modelscope.outputs import OutputKeys | |||
| @@ -52,17 +51,12 @@ class VirtualTryonPipeline(Pipeline): | |||
| filter_param(src_params, own_state) | |||
| model.load_state_dict(own_state) | |||
| self.model = SDAFNet_Tryon(ref_in_channel=6).to(self.device) | |||
| local_model_dir = model | |||
| if osp.exists(model): | |||
| local_model_dir = model | |||
| else: | |||
| local_model_dir = snapshot_download(model) | |||
| self.local_path = local_model_dir | |||
| self.local_path = self.model | |||
| src_params = torch.load( | |||
| osp.join(local_model_dir, ModelFile.TORCH_MODEL_FILE), 'cpu') | |||
| osp.join(self.local_path, ModelFile.TORCH_MODEL_FILE), 'cpu') | |||
| self.model = SDAFNet_Tryon(ref_in_channel=6).to(self.device) | |||
| load_pretrained(self.model, src_params) | |||
| self.model = self.model.eval() | |||
| self.model.eval() | |||
| self.size = 192 | |||
| from torchvision import transforms | |||
| self.test_transforms = transforms.Compose([ | |||
| @@ -29,22 +29,13 @@ class ImageCaptioningPipeline(Pipeline): | |||
| Args: | |||
| model: model id on modelscope hub. | |||
| """ | |||
| super().__init__(model=model) | |||
| assert isinstance(model, str) or isinstance(model, Model), \ | |||
| 'model must be a single str or OfaForAllTasks' | |||
| if isinstance(model, str): | |||
| pipe_model = Model.from_pretrained(model) | |||
| elif isinstance(model, Model): | |||
| pipe_model = model | |||
| else: | |||
| raise NotImplementedError | |||
| pipe_model.model.eval() | |||
| super().__init__(model=model, preprocessor=preprocessor, **kwargs) | |||
| self.model.eval() | |||
| if preprocessor is None: | |||
| if isinstance(pipe_model, OfaForAllTasks): | |||
| preprocessor = OfaPreprocessor(pipe_model.model_dir) | |||
| elif isinstance(pipe_model, MPlugForAllTasks): | |||
| preprocessor = MPlugPreprocessor(pipe_model.model_dir) | |||
| super().__init__(model=pipe_model, preprocessor=preprocessor, **kwargs) | |||
| if isinstance(self.model, OfaForAllTasks): | |||
| self.preprocessor = OfaPreprocessor(self.model.model_dir) | |||
| elif isinstance(self.model, MPlugForAllTasks): | |||
| self.preprocessor = MPlugPreprocessor(self.model.model_dir) | |||
| def _batch(self, data): | |||
| if isinstance(self.model, OfaForAllTasks): | |||
| @@ -28,19 +28,10 @@ class ImageTextRetrievalPipeline(Pipeline): | |||
| Args: | |||
| model: model id on modelscope hub. | |||
| """ | |||
| super().__init__(model=model) | |||
| assert isinstance(model, str) or isinstance(model, Model), \ | |||
| f'model must be a single str or Model, but got {type(model)}' | |||
| if isinstance(model, str): | |||
| pipe_model = Model.from_pretrained(model) | |||
| elif isinstance(model, Model): | |||
| pipe_model = model | |||
| else: | |||
| raise NotImplementedError | |||
| pipe_model.model.eval() | |||
| super().__init__(model=model, preprocessor=preprocessor, **kwargs) | |||
| self.model.eval() | |||
| if preprocessor is None: | |||
| preprocessor = MPlugPreprocessor(pipe_model.model_dir) | |||
| super().__init__(model=pipe_model, preprocessor=preprocessor, **kwargs) | |||
| self.preprocessor = MPlugPreprocessor(self.model.model_dir) | |||
| def forward(self, inputs: Dict[str, Any], | |||
| **forward_params) -> Dict[str, Any]: | |||
| @@ -28,21 +28,14 @@ class MultiModalEmbeddingPipeline(Pipeline): | |||
| Args: | |||
| model: model id on modelscope hub. | |||
| """ | |||
| if isinstance(model, str): | |||
| pipe_model = Model.from_pretrained(model) | |||
| elif isinstance(model, Model): | |||
| pipe_model = model | |||
| else: | |||
| raise NotImplementedError('model must be a single str') | |||
| pipe_model.eval() | |||
| super().__init__(model=model, preprocessor=preprocessor, **kwargs) | |||
| self.model.eval() | |||
| if preprocessor is None: | |||
| if isinstance(pipe_model, CLIPForMultiModalEmbedding): | |||
| preprocessor = CLIPPreprocessor(pipe_model.model_dir) | |||
| if isinstance(self.model, CLIPForMultiModalEmbedding): | |||
| self.preprocessor = CLIPPreprocessor(self.model.model_dir) | |||
| else: | |||
| raise NotImplementedError | |||
| super().__init__(model=pipe_model, preprocessor=preprocessor, **kwargs) | |||
| def forward(self, input: Dict[str, Any]) -> Dict[str, Any]: | |||
| return self.model(self.preprocess(input)) | |||
| @@ -28,20 +28,11 @@ class OcrRecognitionPipeline(Pipeline): | |||
| Args: | |||
| model: model id on modelscope hub. | |||
| """ | |||
| super().__init__(model=model) | |||
| assert isinstance(model, str) or isinstance(model, Model), \ | |||
| 'model must be a single str or OfaForAllTasks' | |||
| if isinstance(model, str): | |||
| pipe_model = Model.from_pretrained(model) | |||
| elif isinstance(model, Model): | |||
| pipe_model = model | |||
| else: | |||
| raise NotImplementedError | |||
| pipe_model.model.eval() | |||
| super().__init__(model=model, preprocessor=preprocessor, **kwargs) | |||
| self.model.eval() | |||
| if preprocessor is None: | |||
| if isinstance(pipe_model, OfaForAllTasks): | |||
| preprocessor = OfaPreprocessor(pipe_model.model_dir) | |||
| super().__init__(model=pipe_model, preprocessor=preprocessor, **kwargs) | |||
| if isinstance(self.model, OfaForAllTasks): | |||
| self.preprocessor = OfaPreprocessor(self.model.model_dir) | |||
| def forward(self, inputs: Dict[str, Any], | |||
| **forward_params) -> Dict[str, Any]: | |||
| @@ -31,18 +31,10 @@ class TextToImageSynthesisPipeline(Pipeline): | |||
| Args: | |||
| model: model id on modelscope hub. | |||
| """ | |||
| device_id = 0 if torch.cuda.is_available() else -1 | |||
| if isinstance(model, str): | |||
| pipe_model = Model.from_pretrained(model, device_id=device_id) | |||
| elif isinstance(model, Model): | |||
| pipe_model = model | |||
| else: | |||
| raise NotImplementedError( | |||
| f'expecting a Model instance or str, but get {type(model)}.') | |||
| if preprocessor is None and isinstance(pipe_model, | |||
| super().__init__(model=model, preprocessor=preprocessor, **kwargs) | |||
| if preprocessor is None and isinstance(self.model, | |||
| OfaForTextToImageSynthesis): | |||
| preprocessor = OfaPreprocessor(pipe_model.model_dir) | |||
| super().__init__(model=pipe_model, preprocessor=preprocessor, **kwargs) | |||
| self.preprocessor = OfaPreprocessor(self.model.model_dir) | |||
| def preprocess(self, input: Input, **preprocess_params) -> Dict[str, Any]: | |||
| if self.preprocessor is not None: | |||
| @@ -1,5 +1,5 @@ | |||
| # Copyright (c) Alibaba, Inc. and its affiliates. | |||
| from typing import Any, Dict, Union | |||
| from typing import Any, Dict, Optional, Union | |||
| from modelscope.metainfo import Pipelines | |||
| from modelscope.models.multi_modal import OfaForAllTasks | |||
| @@ -18,26 +18,17 @@ class VisualEntailmentPipeline(Pipeline): | |||
| def __init__(self, | |||
| model: Union[Model, str], | |||
| preprocessor: [Preprocessor] = None, | |||
| preprocessor: Optional[Preprocessor] = None, | |||
| **kwargs): | |||
| """ | |||
| use `model` and `preprocessor` to create a visual entailment pipeline for prediction | |||
| Args: | |||
| model: model id on modelscope hub. | |||
| """ | |||
| super().__init__(model=model) | |||
| assert isinstance(model, str) or isinstance(model, Model), \ | |||
| 'model must be a single str or OfaForAllTasks' | |||
| if isinstance(model, str): | |||
| pipe_model = Model.from_pretrained(model) | |||
| elif isinstance(model, Model): | |||
| pipe_model = model | |||
| else: | |||
| raise NotImplementedError | |||
| pipe_model.model.eval() | |||
| if preprocessor is None and isinstance(pipe_model, OfaForAllTasks): | |||
| preprocessor = OfaPreprocessor(model_dir=pipe_model.model_dir) | |||
| super().__init__(model=pipe_model, preprocessor=preprocessor, **kwargs) | |||
| super().__init__(model=model, preprocessor=preprocessor, **kwargs) | |||
| self.model.eval() | |||
| if preprocessor is None and isinstance(self.model, OfaForAllTasks): | |||
| self.preprocessor = OfaPreprocessor(model_dir=self.model.model_dir) | |||
| def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, Any]: | |||
| return inputs | |||
| @@ -1,5 +1,5 @@ | |||
| # Copyright (c) Alibaba, Inc. and its affiliates. | |||
| from typing import Any, Dict, Union | |||
| from typing import Any, Dict, Optional, Union | |||
| from modelscope.metainfo import Pipelines | |||
| from modelscope.models.multi_modal import OfaForAllTasks | |||
| @@ -18,26 +18,17 @@ class VisualGroundingPipeline(Pipeline): | |||
| def __init__(self, | |||
| model: Union[Model, str], | |||
| preprocessor: [Preprocessor] = None, | |||
| preprocessor: Optional[Preprocessor] = None, | |||
| **kwargs): | |||
| """ | |||
| use `model` and `preprocessor` to create a visual grounding pipeline for prediction | |||
| Args: | |||
| model: model id on modelscope hub. | |||
| """ | |||
| super().__init__(model=model) | |||
| assert isinstance(model, str) or isinstance(model, Model), \ | |||
| 'model must be a single str or OfaForAllTasks' | |||
| if isinstance(model, str): | |||
| pipe_model = Model.from_pretrained(model) | |||
| elif isinstance(model, Model): | |||
| pipe_model = model | |||
| else: | |||
| raise NotImplementedError | |||
| pipe_model.model.eval() | |||
| if preprocessor is None and isinstance(pipe_model, OfaForAllTasks): | |||
| preprocessor = OfaPreprocessor(model_dir=pipe_model.model_dir) | |||
| super().__init__(model=pipe_model, preprocessor=preprocessor, **kwargs) | |||
| super().__init__(model=model, preprocessor=preprocessor, **kwargs) | |||
| self.model.model.eval() | |||
| if preprocessor is None and isinstance(self.model, OfaForAllTasks): | |||
| self.preprocessor = OfaPreprocessor(model_dir=self.model.model_dir) | |||
| def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, Any]: | |||
| return inputs | |||
| @@ -31,15 +31,13 @@ class VisualQuestionAnsweringPipeline(Pipeline): | |||
| model (MPlugForVisualQuestionAnswering): a model instance | |||
| preprocessor (MPlugVisualQuestionAnsweringPreprocessor): a preprocessor instance | |||
| """ | |||
| model = model if isinstance(model, | |||
| Model) else Model.from_pretrained(model) | |||
| if preprocessor is None: | |||
| if isinstance(model, OfaForAllTasks): | |||
| preprocessor = OfaPreprocessor(model.model_dir) | |||
| elif isinstance(model, MPlugForAllTasks): | |||
| preprocessor = MPlugPreprocessor(model.model_dir) | |||
| model.model.eval() | |||
| super().__init__(model=model, preprocessor=preprocessor, **kwargs) | |||
| if preprocessor is None: | |||
| if isinstance(self.model, OfaForAllTasks): | |||
| self.preprocessor = OfaPreprocessor(self.model.model_dir) | |||
| elif isinstance(self.model, MPlugForAllTasks): | |||
| self.preprocessor = MPlugPreprocessor(self.model.model_dir) | |||
| self.model.eval() | |||
| def forward(self, inputs: Dict[str, Any], | |||
| **forward_params) -> Dict[str, Any]: | |||
| @@ -32,12 +32,10 @@ class ConversationalTextToSqlPipeline(Pipeline): | |||
| preprocessor (ConversationalTextToSqlPreprocessor): | |||
| a preprocessor instance | |||
| """ | |||
| model = model if isinstance( | |||
| model, StarForTextToSql) else Model.from_pretrained(model) | |||
| if preprocessor is None: | |||
| preprocessor = ConversationalTextToSqlPreprocessor(model.model_dir) | |||
| super().__init__(model=model, preprocessor=preprocessor, **kwargs) | |||
| if preprocessor is None: | |||
| self.preprocessor = ConversationalTextToSqlPreprocessor( | |||
| self.model.model_dir) | |||
| def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, str]: | |||
| """process the prediction results | |||
| @@ -30,13 +30,11 @@ class DialogIntentPredictionPipeline(Pipeline): | |||
| or a SpaceForDialogIntent instance. | |||
| preprocessor (DialogIntentPredictionPreprocessor): An optional preprocessor instance. | |||
| """ | |||
| model = model if isinstance( | |||
| model, SpaceForDialogIntent) else Model.from_pretrained(model) | |||
| if preprocessor is None: | |||
| preprocessor = DialogIntentPredictionPreprocessor(model.model_dir) | |||
| self.model = model | |||
| super().__init__(model=model, preprocessor=preprocessor, **kwargs) | |||
| self.categories = preprocessor.categories | |||
| if preprocessor is None: | |||
| self.preprocessor = DialogIntentPredictionPreprocessor( | |||
| self.model.model_dir) | |||
| self.categories = self.preprocessor.categories | |||
| def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, str]: | |||
| """process the prediction results | |||
| @@ -29,13 +29,10 @@ class DialogModelingPipeline(Pipeline): | |||
| or a SpaceForDialogModeling instance. | |||
| preprocessor (DialogModelingPreprocessor): An optional preprocessor instance. | |||
| """ | |||
| model = model if isinstance( | |||
| model, SpaceForDialogModeling) else Model.from_pretrained(model) | |||
| self.model = model | |||
| if preprocessor is None: | |||
| preprocessor = DialogModelingPreprocessor(model.model_dir) | |||
| super().__init__(model=model, preprocessor=preprocessor, **kwargs) | |||
| self.preprocessor = preprocessor | |||
| if preprocessor is None: | |||
| self.preprocessor = DialogModelingPreprocessor( | |||
| self.model.model_dir) | |||
| def postprocess(self, inputs: Dict[str, Tensor]) -> Dict[str, str]: | |||
| """process the prediction results | |||
| @@ -31,16 +31,13 @@ class DialogStateTrackingPipeline(Pipeline): | |||
| from the model hub, or a SpaceForDialogStateTracking instance. | |||
| preprocessor (DialogStateTrackingPreprocessor): An optional preprocessor instance. | |||
| """ | |||
| model = model if isinstance( | |||
| model, SpaceForDST) else Model.from_pretrained(model) | |||
| self.model = model | |||
| if preprocessor is None: | |||
| preprocessor = DialogStateTrackingPreprocessor(model.model_dir) | |||
| super().__init__(model=model, preprocessor=preprocessor, **kwargs) | |||
| if preprocessor is None: | |||
| self.preprocessor = DialogStateTrackingPreprocessor( | |||
| self.model.model_dir) | |||
| self.tokenizer = preprocessor.tokenizer | |||
| self.config = preprocessor.config | |||
| self.tokenizer = self.preprocessor.tokenizer | |||
| self.config = self.preprocessor.config | |||
| def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, str]: | |||
| """process the prediction results | |||
| @@ -31,27 +31,22 @@ class DocumentSegmentationPipeline(Pipeline): | |||
| model: Union[Model, str], | |||
| preprocessor: DocumentSegmentationPreprocessor = None, | |||
| **kwargs): | |||
| super().__init__(model=model, preprocessor=preprocessor, **kwargs) | |||
| model = model if isinstance(model, | |||
| Model) else Model.from_pretrained(model) | |||
| self.model_dir = model.model_dir | |||
| self.model_cfg = model.forward() | |||
| self.model_dir = self.model.model_dir | |||
| self.model_cfg = self.model.forward() | |||
| if self.model_cfg['type'] == 'bert': | |||
| config = BertConfig.from_pretrained(model.model_dir, num_labels=2) | |||
| config = BertConfig.from_pretrained(self.model_dir, num_labels=2) | |||
| elif self.model_cfg['type'] == 'ponet': | |||
| config = PoNetConfig.from_pretrained(model.model_dir, num_labels=2) | |||
| config = PoNetConfig.from_pretrained(self.model_dir, num_labels=2) | |||
| self.document_segmentation_model = model.build_with_config( | |||
| self.document_segmentation_model = self.model.build_with_config( | |||
| config=config) | |||
| if preprocessor is None: | |||
| preprocessor = DocumentSegmentationPreprocessor( | |||
| self.model_dir, config) | |||
| super().__init__(model=model, preprocessor=preprocessor, **kwargs) | |||
| self.preprocessor = preprocessor | |||
| self.preprocessor = DocumentSegmentationPreprocessor( | |||
| self.model.model_dir, config) | |||
| def __call__( | |||
| self, documents: Union[List[List[str]], List[str], | |||
| @@ -21,12 +21,10 @@ class FaqQuestionAnsweringPipeline(Pipeline): | |||
| model: Union[str, Model], | |||
| preprocessor: Preprocessor = None, | |||
| **kwargs): | |||
| model = Model.from_pretrained(model) if isinstance(model, | |||
| str) else model | |||
| if preprocessor is None: | |||
| preprocessor = Preprocessor.from_pretrained( | |||
| model.model_dir, **kwargs) | |||
| super().__init__(model=model, preprocessor=preprocessor, **kwargs) | |||
| if preprocessor is None: | |||
| self.preprocessor = Preprocessor.from_pretrained( | |||
| self.model.model_dir, **kwargs) | |||
| def _sanitize_parameters(self, **pipeline_parameters): | |||
| return pipeline_parameters, pipeline_parameters, pipeline_parameters | |||
| @@ -37,11 +35,11 @@ class FaqQuestionAnsweringPipeline(Pipeline): | |||
| sentence_vecs = sentence_vecs.detach().tolist() | |||
| return sentence_vecs | |||
| def forward(self, inputs: [list, Dict[str, Any]], | |||
| def forward(self, inputs: Union[list, Dict[str, Any]], | |||
| **forward_params) -> Dict[str, Any]: | |||
| return self.model(inputs) | |||
| def postprocess(self, inputs: [list, Dict[str, Any]], | |||
| def postprocess(self, inputs: Union[list, Dict[str, Any]], | |||
| **postprocess_params) -> Dict[str, Any]: | |||
| scores = inputs['scores'] | |||
| labels = [] | |||
| @@ -46,21 +46,18 @@ class FeatureExtractionPipeline(Pipeline): | |||
| """ | |||
| model = model if isinstance(model, | |||
| Model) else Model.from_pretrained(model) | |||
| super().__init__(model=model, preprocessor=preprocessor, **kwargs) | |||
| if preprocessor is None: | |||
| preprocessor = NLPPreprocessor( | |||
| model.model_dir, | |||
| self.preprocessor = NLPPreprocessor( | |||
| self.model.model_dir, | |||
| padding=kwargs.pop('padding', False), | |||
| sequence_length=kwargs.pop('sequence_length', 128)) | |||
| model.eval() | |||
| super().__init__(model=model, preprocessor=preprocessor, **kwargs) | |||
| self.model.eval() | |||
| self.preprocessor = preprocessor | |||
| self.config = Config.from_file( | |||
| os.path.join(model.model_dir, ModelFile.CONFIGURATION)) | |||
| self.tokenizer = preprocessor.tokenizer | |||
| os.path.join(self.model.model_dir, ModelFile.CONFIGURATION)) | |||
| self.tokenizer = self.preprocessor.tokenizer | |||
| def forward(self, inputs: Dict[str, Any], | |||
| **forward_params) -> Dict[str, Any]: | |||
| @@ -53,22 +53,18 @@ class FillMaskPipeline(Pipeline): | |||
| If the xlm-roberta(xlm-roberta, veco, etc.) based model is used, the mask token is '<mask>'. | |||
| To view other examples plese check the tests/pipelines/test_fill_mask.py. | |||
| """ | |||
| fill_mask_model = Model.from_pretrained(model) if isinstance( | |||
| model, str) else model | |||
| super().__init__(model=model, preprocessor=preprocessor, **kwargs) | |||
| if preprocessor is None: | |||
| preprocessor = Preprocessor.from_pretrained( | |||
| fill_mask_model.model_dir, | |||
| self.preprocessor = Preprocessor.from_pretrained( | |||
| self.model.model_dir, | |||
| first_sequence=first_sequence, | |||
| second_sequence=None, | |||
| sequence_length=kwargs.pop('sequence_length', 128)) | |||
| fill_mask_model.eval() | |||
| assert hasattr( | |||
| preprocessor, 'mask_id' | |||
| ), 'The input preprocessor should have the mask_id attribute.' | |||
| super().__init__( | |||
| model=fill_mask_model, preprocessor=preprocessor, **kwargs) | |||
| assert hasattr( | |||
| self.preprocessor, 'mask_id' | |||
| ), 'The input preprocessor should have the mask_id attribute.' | |||
| self.model.eval() | |||
| def forward(self, inputs: Dict[str, Any], | |||
| **forward_params) -> Dict[str, Any]: | |||
| @@ -25,15 +25,12 @@ class InformationExtractionPipeline(Pipeline): | |||
| model: Union[Model, str], | |||
| preprocessor: Optional[Preprocessor] = None, | |||
| **kwargs): | |||
| model = model if isinstance(model, | |||
| Model) else Model.from_pretrained(model) | |||
| super().__init__(model=model, preprocessor=preprocessor, **kwargs) | |||
| if preprocessor is None: | |||
| preprocessor = RelationExtractionPreprocessor( | |||
| model.model_dir, | |||
| self.preprocessor = RelationExtractionPreprocessor( | |||
| self.model.model_dir, | |||
| sequence_length=kwargs.pop('sequence_length', 512)) | |||
| model.eval() | |||
| super().__init__(model=model, preprocessor=preprocessor, **kwargs) | |||
| self.model.eval() | |||
| def forward(self, inputs: Dict[str, Any], | |||
| **forward_params) -> Dict[str, Any]: | |||
| @@ -21,7 +21,7 @@ class MGLMTextSummarizationPipeline(Pipeline): | |||
| def __init__(self, | |||
| model: Union[MGLMForTextSummarization, str], | |||
| preprocessor: [Preprocessor] = None, | |||
| preprocessor: Optional[Preprocessor] = None, | |||
| *args, | |||
| **kwargs): | |||
| model = MGLMForTextSummarization(model) if isinstance(model, | |||
| @@ -50,15 +50,12 @@ class NamedEntityRecognitionPipeline(TokenClassificationPipeline): | |||
| To view other examples plese check the tests/pipelines/test_named_entity_recognition.py. | |||
| """ | |||
| model = model if isinstance(model, | |||
| Model) else Model.from_pretrained(model) | |||
| super().__init__(model=model, preprocessor=preprocessor, **kwargs) | |||
| if preprocessor is None: | |||
| preprocessor = TokenClassificationPreprocessor( | |||
| model.model_dir, | |||
| self.preprocessor = TokenClassificationPreprocessor( | |||
| self.model.model_dir, | |||
| sequence_length=kwargs.pop('sequence_length', 128)) | |||
| model.eval() | |||
| super().__init__(model=model, preprocessor=preprocessor, **kwargs) | |||
| self.model.eval() | |||
| self.id2label = kwargs.get('id2label') | |||
| if self.id2label is None and hasattr(self.preprocessor, 'id2label'): | |||
| self.id2label = self.preprocessor.id2label | |||
| @@ -73,13 +70,11 @@ class NamedEntityRecognitionThaiPipeline(NamedEntityRecognitionPipeline): | |||
| model: Union[Model, str], | |||
| preprocessor: Optional[Preprocessor] = None, | |||
| **kwargs): | |||
| model = model if isinstance(model, | |||
| Model) else Model.from_pretrained(model) | |||
| super().__init__(model=model, preprocessor=preprocessor, **kwargs) | |||
| if preprocessor is None: | |||
| preprocessor = NERPreprocessorThai( | |||
| model.model_dir, | |||
| self.preprocessor = NERPreprocessorThai( | |||
| self.model.model_dir, | |||
| sequence_length=kwargs.pop('sequence_length', 512)) | |||
| super().__init__(model=model, preprocessor=preprocessor, **kwargs) | |||
| @PIPELINES.register_module( | |||
| @@ -91,10 +86,8 @@ class NamedEntityRecognitionVietPipeline(NamedEntityRecognitionPipeline): | |||
| model: Union[Model, str], | |||
| preprocessor: Optional[Preprocessor] = None, | |||
| **kwargs): | |||
| model = model if isinstance(model, | |||
| Model) else Model.from_pretrained(model) | |||
| super().__init__(model=model, preprocessor=preprocessor, **kwargs) | |||
| if preprocessor is None: | |||
| preprocessor = NERPreprocessorViet( | |||
| model.model_dir, | |||
| self.preprocessor = NERPreprocessorViet( | |||
| self.model.model_dir, | |||
| sequence_length=kwargs.pop('sequence_length', 512)) | |||
| super().__init__(model=model, preprocessor=preprocessor, **kwargs) | |||
| @@ -32,14 +32,13 @@ class SentenceEmbeddingPipeline(Pipeline): | |||
| the model if supplied. | |||
| sequence_length: Max sequence length in the user's custom scenario. 128 will be used as a default value. | |||
| """ | |||
| model = Model.from_pretrained(model) if isinstance(model, | |||
| str) else model | |||
| super().__init__(model=model, preprocessor=preprocessor, **kwargs) | |||
| if preprocessor is None: | |||
| preprocessor = Preprocessor.from_pretrained( | |||
| model.model_dir if isinstance(model, Model) else model, | |||
| self.preprocessor = Preprocessor.from_pretrained( | |||
| self.model.model_dir | |||
| if isinstance(self.model, Model) else model, | |||
| first_sequence=first_sequence, | |||
| sequence_length=kwargs.pop('sequence_length', 128)) | |||
| super().__init__(model=model, preprocessor=preprocessor, **kwargs) | |||
| def forward(self, inputs: Dict[str, Any], | |||
| **forward_params) -> Dict[str, Any]: | |||
| @@ -1,5 +1,5 @@ | |||
| # Copyright (c) Alibaba, Inc. and its affiliates. | |||
| from typing import Any, Dict, Union | |||
| from typing import Any, Dict, Optional, Union | |||
| from modelscope.metainfo import Pipelines | |||
| from modelscope.models.multi_modal import OfaForAllTasks | |||
| @@ -18,7 +18,7 @@ class SummarizationPipeline(Pipeline): | |||
| def __init__(self, | |||
| model: Union[Model, str], | |||
| preprocessor: [Preprocessor] = None, | |||
| preprocessor: Optional[Preprocessor] = None, | |||
| **kwargs): | |||
| """Use `model` and `preprocessor` to create a Summarization pipeline for prediction. | |||
| @@ -27,19 +27,10 @@ class SummarizationPipeline(Pipeline): | |||
| or a model id from the model hub, or a model instance. | |||
| preprocessor (Preprocessor): An optional preprocessor instance. | |||
| """ | |||
| super().__init__(model=model) | |||
| assert isinstance(model, str) or isinstance(model, Model), \ | |||
| 'model must be a single str or OfaForAllTasks' | |||
| if isinstance(model, str): | |||
| pipe_model = Model.from_pretrained(model) | |||
| elif isinstance(model, Model): | |||
| pipe_model = model | |||
| else: | |||
| raise NotImplementedError | |||
| pipe_model.model.eval() | |||
| if preprocessor is None and isinstance(pipe_model, OfaForAllTasks): | |||
| preprocessor = OfaPreprocessor(model_dir=pipe_model.model_dir) | |||
| super().__init__(model=pipe_model, preprocessor=preprocessor, **kwargs) | |||
| super().__init__(model=model, preprocessor=preprocessor, **kwargs) | |||
| self.model.eval() | |||
| if preprocessor is None and isinstance(self.model, OfaForAllTasks): | |||
| self.preprocessor = OfaPreprocessor(model_dir=self.model.model_dir) | |||
| def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, Any]: | |||
| return inputs | |||
| @@ -41,21 +41,22 @@ class TableQuestionAnsweringPipeline(Pipeline): | |||
| preprocessor (TableQuestionAnsweringPreprocessor): a preprocessor instance | |||
| db (Database): a database to store tables in the database | |||
| """ | |||
| model = model if isinstance( | |||
| model, TableQuestionAnswering) else Model.from_pretrained(model) | |||
| super().__init__(model=model, preprocessor=preprocessor, **kwargs) | |||
| if preprocessor is None: | |||
| preprocessor = TableQuestionAnsweringPreprocessor(model.model_dir) | |||
| self.preprocessor = TableQuestionAnsweringPreprocessor( | |||
| self.model.model_dir) | |||
| # initilize tokenizer | |||
| self.tokenizer = BertTokenizer( | |||
| os.path.join(model.model_dir, ModelFile.VOCAB_FILE)) | |||
| os.path.join(self.model.model_dir, ModelFile.VOCAB_FILE)) | |||
| # initialize database | |||
| if db is None: | |||
| self.db = Database( | |||
| tokenizer=self.tokenizer, | |||
| table_file_path=os.path.join(model.model_dir, 'table.json'), | |||
| syn_dict_file_path=os.path.join(model.model_dir, | |||
| table_file_path=os.path.join(self.model.model_dir, | |||
| 'table.json'), | |||
| syn_dict_file_path=os.path.join(self.model.model_dir, | |||
| 'synonym.txt')) | |||
| else: | |||
| self.db = db | |||
| @@ -71,8 +72,6 @@ class TableQuestionAnsweringPipeline(Pipeline): | |||
| self.schema_link_dict = constant.schema_link_dict | |||
| self.limit_dict = constant.limit_dict | |||
| super().__init__(model=model, preprocessor=preprocessor, **kwargs) | |||
| def post_process_multi_turn(self, history_sql, result, table): | |||
| action = self.action_ops[result['action']] | |||
| headers = table['header_name'] | |||
| @@ -63,16 +63,14 @@ class Text2TextGenerationPipeline(Pipeline): | |||
| To view other examples plese check the tests/pipelines/test_text_generation.py. | |||
| """ | |||
| model = model if isinstance(model, | |||
| Model) else Model.from_pretrained(model) | |||
| super().__init__(model=model, preprocessor=preprocessor, **kwargs) | |||
| if preprocessor is None: | |||
| preprocessor = Text2TextGenerationPreprocessor( | |||
| model.model_dir, | |||
| self.preprocessor = Text2TextGenerationPreprocessor( | |||
| self.model.model_dir, | |||
| sequence_length=kwargs.pop('sequence_length', 128)) | |||
| self.tokenizer = preprocessor.tokenizer | |||
| self.pipeline = model.pipeline.type | |||
| model.eval() | |||
| super().__init__(model=model, preprocessor=preprocessor, **kwargs) | |||
| self.tokenizer = self.preprocessor.tokenizer | |||
| self.pipeline = self.model.pipeline.type | |||
| self.model.eval() | |||
| def preprocess(self, inputs: Input, **preprocess_params) -> Dict[str, Any]: | |||
| """ Provide specific preprocess for text2text generation pipeline in order to handl multi tasks | |||
| @@ -53,25 +53,24 @@ class TextClassificationPipeline(Pipeline): | |||
| NOTE: Inputs of type 'str' are also supported. In this scenario, the 'first_sequence' and 'second_sequence' | |||
| param will have no affection. | |||
| """ | |||
| model = Model.from_pretrained(model) if isinstance(model, | |||
| str) else model | |||
| super().__init__(model=model, preprocessor=preprocessor, **kwargs) | |||
| if preprocessor is None: | |||
| if model.__class__.__name__ == 'OfaForAllTasks': | |||
| preprocessor = Preprocessor.from_pretrained( | |||
| model_name_or_path=model.model_dir, | |||
| if self.model.__class__.__name__ == 'OfaForAllTasks': | |||
| self.preprocessor = Preprocessor.from_pretrained( | |||
| model_name_or_path=self.model.model_dir, | |||
| type=Preprocessors.ofa_tasks_preprocessor, | |||
| field=Fields.multi_modal) | |||
| else: | |||
| first_sequence = kwargs.pop('first_sequence', 'first_sequence') | |||
| second_sequence = kwargs.pop('second_sequence', None) | |||
| preprocessor = Preprocessor.from_pretrained( | |||
| model if isinstance(model, str) else model.model_dir, | |||
| self.preprocessor = Preprocessor.from_pretrained( | |||
| self.model | |||
| if isinstance(self.model, str) else self.model.model_dir, | |||
| first_sequence=first_sequence, | |||
| second_sequence=second_sequence, | |||
| sequence_length=kwargs.pop('sequence_length', 512)) | |||
| super().__init__(model=model, preprocessor=preprocessor, **kwargs) | |||
| self.id2label = kwargs.get('id2label') | |||
| if self.id2label is None and hasattr(self.preprocessor, 'id2label'): | |||
| self.id2label = self.preprocessor.id2label | |||
| @@ -40,14 +40,12 @@ class TextErrorCorrectionPipeline(Pipeline): | |||
| To view other examples plese check the tests/pipelines/test_text_error_correction.py. | |||
| """ | |||
| super().__init__(model=model, preprocessor=preprocessor, **kwargs) | |||
| model = model if isinstance( | |||
| model, | |||
| BartForTextErrorCorrection) else Model.from_pretrained(model) | |||
| if preprocessor is None: | |||
| preprocessor = TextErrorCorrectionPreprocessor(model.model_dir) | |||
| self.vocab = preprocessor.vocab | |||
| super().__init__(model=model, preprocessor=preprocessor, **kwargs) | |||
| self.preprocessor = TextErrorCorrectionPreprocessor( | |||
| self.model.model_dir) | |||
| self.vocab = self.preprocessor.vocab | |||
| def forward(self, inputs: Dict[str, Any], | |||
| **forward_params) -> Dict[str, Any]: | |||
| @@ -51,15 +51,14 @@ class TextGenerationPipeline(Pipeline): | |||
| To view other examples plese check the tests/pipelines/test_text_generation.py. | |||
| """ | |||
| model = model if isinstance(model, | |||
| Model) else Model.from_pretrained(model) | |||
| cfg = read_config(model.model_dir) | |||
| super().__init__(model=model, preprocessor=preprocessor, **kwargs) | |||
| cfg = read_config(self.model.model_dir) | |||
| self.postprocessor = cfg.pop('postprocessor', 'decode') | |||
| if preprocessor is None: | |||
| preprocessor_cfg = cfg.preprocessor | |||
| preprocessor_cfg.update({ | |||
| 'model_dir': | |||
| model.model_dir, | |||
| self.model.model_dir, | |||
| 'first_sequence': | |||
| first_sequence, | |||
| 'second_sequence': | |||
| @@ -67,9 +66,9 @@ class TextGenerationPipeline(Pipeline): | |||
| 'sequence_length': | |||
| kwargs.pop('sequence_length', 128) | |||
| }) | |||
| preprocessor = build_preprocessor(preprocessor_cfg, Fields.nlp) | |||
| model.eval() | |||
| super().__init__(model=model, preprocessor=preprocessor, **kwargs) | |||
| self.preprocessor = build_preprocessor(preprocessor_cfg, | |||
| Fields.nlp) | |||
| self.model.eval() | |||
| def _sanitize_parameters(self, **pipeline_parameters): | |||
| return {}, pipeline_parameters, {} | |||
| @@ -32,14 +32,12 @@ class TextRankingPipeline(Pipeline): | |||
| the model if supplied. | |||
| sequence_length: Max sequence length in the user's custom scenario. 128 will be used as a default value. | |||
| """ | |||
| model = Model.from_pretrained(model) if isinstance(model, | |||
| str) else model | |||
| super().__init__(model=model, preprocessor=preprocessor, **kwargs) | |||
| if preprocessor is None: | |||
| preprocessor = Preprocessor.from_pretrained( | |||
| model.model_dir, | |||
| self.preprocessor = Preprocessor.from_pretrained( | |||
| self.model.model_dir, | |||
| sequence_length=kwargs.pop('sequence_length', 128)) | |||
| super().__init__(model=model, preprocessor=preprocessor, **kwargs) | |||
| def forward(self, inputs: Dict[str, Any], | |||
| **forward_params) -> Dict[str, Any]: | |||
| @@ -39,15 +39,14 @@ class TokenClassificationPipeline(Pipeline): | |||
| model (str or Model): A model instance or a model local dir or a model id in the model hub. | |||
| preprocessor (Preprocessor): a preprocessor instance, must not be None. | |||
| """ | |||
| model = Model.from_pretrained(model) if isinstance(model, | |||
| str) else model | |||
| super().__init__(model=model, preprocessor=preprocessor, **kwargs) | |||
| if preprocessor is None: | |||
| preprocessor = Preprocessor.from_pretrained( | |||
| model.model_dir, | |||
| self.preprocessor = Preprocessor.from_pretrained( | |||
| self.model.model_dir, | |||
| sequence_length=kwargs.pop('sequence_length', 128)) | |||
| model.eval() | |||
| super().__init__(model=model, preprocessor=preprocessor, **kwargs) | |||
| self.model.eval() | |||
| self.id2label = kwargs.get('id2label') | |||
| if self.id2label is None and hasattr(self.preprocessor, 'id2label'): | |||
| self.id2label = self.preprocessor.id2label | |||
| @@ -27,10 +27,10 @@ class TranslationQualityEstimationPipeline(Pipeline): | |||
| def __init__(self, model: str, device: str = 'gpu', **kwargs): | |||
| super().__init__(model=model, device=device) | |||
| model_file = os.path.join(model, ModelFile.TORCH_MODEL_FILE) | |||
| model_file = os.path.join(self.model, ModelFile.TORCH_MODEL_FILE) | |||
| with open(model_file, 'rb') as f: | |||
| buffer = io.BytesIO(f.read()) | |||
| self.tokenizer = XLMRobertaTokenizer.from_pretrained(model) | |||
| self.tokenizer = XLMRobertaTokenizer.from_pretrained(self.model) | |||
| self.model = torch.jit.load( | |||
| buffer, map_location=self.device).to(self.device) | |||
| @@ -49,14 +49,13 @@ class WordSegmentationPipeline(TokenClassificationPipeline): | |||
| To view other examples plese check the tests/pipelines/test_word_segmentation.py. | |||
| """ | |||
| model = model if isinstance(model, | |||
| Model) else Model.from_pretrained(model) | |||
| super().__init__(model=model, preprocessor=preprocessor, **kwargs) | |||
| if preprocessor is None: | |||
| preprocessor = TokenClassificationPreprocessor( | |||
| model.model_dir, | |||
| self.preprocessor = TokenClassificationPreprocessor( | |||
| self.model.model_dir, | |||
| sequence_length=kwargs.pop('sequence_length', 128)) | |||
| model.eval() | |||
| super().__init__(model=model, preprocessor=preprocessor, **kwargs) | |||
| self.model.eval() | |||
| self.id2label = kwargs.get('id2label') | |||
| if self.id2label is None and hasattr(self.preprocessor, 'id2label'): | |||
| self.id2label = self.preprocessor.id2label | |||
| @@ -59,16 +59,14 @@ class ZeroShotClassificationPipeline(Pipeline): | |||
| """ | |||
| assert isinstance(model, str) or isinstance(model, Model), \ | |||
| 'model must be a single str or Model' | |||
| model = model if isinstance(model, | |||
| Model) else Model.from_pretrained(model) | |||
| super().__init__(model=model, preprocessor=preprocessor, **kwargs) | |||
| self.entailment_id = 0 | |||
| self.contradiction_id = 2 | |||
| if preprocessor is None: | |||
| preprocessor = ZeroShotClassificationPreprocessor( | |||
| model.model_dir, | |||
| self.preprocessor = ZeroShotClassificationPreprocessor( | |||
| self.model.model_dir, | |||
| sequence_length=kwargs.pop('sequence_length', 512)) | |||
| model.eval() | |||
| super().__init__(model=model, preprocessor=preprocessor, **kwargs) | |||
| self.model.eval() | |||
| def _sanitize_parameters(self, **kwargs): | |||
| preprocess_params = {} | |||
| @@ -105,22 +105,16 @@ class ProteinStructurePipeline(Pipeline): | |||
| >>> print(pipeline_ins(protein)) | |||
| """ | |||
| import copy | |||
| model_path = copy.deepcopy(model) if isinstance(model, str) else None | |||
| cfg = read_config(model_path) # only model is str | |||
| self.cfg = cfg | |||
| super().__init__(model=model, preprocessor=preprocessor, **kwargs) | |||
| self.cfg = read_config(self.model.model_dir) | |||
| self.config = model_config( | |||
| cfg['pipeline']['model_name']) # alphafold config | |||
| model = model if isinstance( | |||
| model, Model) else Model.from_pretrained(model_path) | |||
| self.postprocessor = cfg.pop('postprocessor', None) | |||
| self.cfg['pipeline']['model_name']) # alphafold config | |||
| self.postprocessor = self.cfg.pop('postprocessor', None) | |||
| if preprocessor is None: | |||
| preprocessor_cfg = cfg.preprocessor | |||
| preprocessor = build_preprocessor(preprocessor_cfg, Fields.science) | |||
| model.eval() | |||
| model.model.inference_mode() | |||
| model.model_dir = model_path | |||
| super().__init__(model=model, preprocessor=preprocessor, **kwargs) | |||
| preprocessor_cfg = self.cfg.preprocessor | |||
| self.preprocessor = build_preprocessor(preprocessor_cfg, | |||
| Fields.science) | |||
| self.model.eval() | |||
| def _sanitize_parameters(self, **pipeline_parameters): | |||
| return pipeline_parameters, pipeline_parameters, pipeline_parameters | |||
| @@ -6,7 +6,8 @@ from typing import Any, Dict, Optional, Sequence | |||
| from modelscope.metainfo import Models, Preprocessors | |||
| from modelscope.utils.config import Config, ConfigDict | |||
| from modelscope.utils.constant import DEFAULT_MODEL_REVISION, ModeKeys, Tasks | |||
| from modelscope.utils.constant import (DEFAULT_MODEL_REVISION, Invoke, | |||
| ModeKeys, Tasks) | |||
| from modelscope.utils.hub import read_config, snapshot_download | |||
| from modelscope.utils.logger import get_logger | |||
| from .builder import build_preprocessor | |||
| @@ -194,7 +195,9 @@ class Preprocessor(ABC): | |||
| """ | |||
| if not os.path.exists(model_name_or_path): | |||
| model_dir = snapshot_download( | |||
| model_name_or_path, revision=revision) | |||
| model_name_or_path, | |||
| revision=revision, | |||
| user_agent={Invoke.KEY: Invoke.PREPROCESSOR}) | |||
| else: | |||
| model_dir = model_name_or_path | |||
| if cfg_dict is None: | |||
| @@ -14,7 +14,8 @@ from modelscope.metainfo import Preprocessors | |||
| from modelscope.pipelines.base import Input | |||
| from modelscope.preprocessors import load_image | |||
| from modelscope.utils.config import Config | |||
| from modelscope.utils.constant import Fields, ModeKeys, ModelFile, Tasks | |||
| from modelscope.utils.constant import (Fields, Invoke, ModeKeys, ModelFile, | |||
| Tasks) | |||
| from .base import Preprocessor | |||
| from .builder import PREPROCESSORS | |||
| from .ofa import * # noqa | |||
| @@ -57,7 +58,7 @@ class OfaPreprocessor(Preprocessor): | |||
| Tasks.auto_speech_recognition: OfaASRPreprocessor | |||
| } | |||
| model_dir = model_dir if osp.exists(model_dir) else snapshot_download( | |||
| model_dir) | |||
| model_dir, user_agent={Invoke.KEY: Invoke.PREPROCESSOR}) | |||
| self.cfg = Config.from_file( | |||
| osp.join(model_dir, ModelFile.CONFIGURATION)) | |||
| self.preprocess = preprocess_mapping[self.cfg.task]( | |||
| @@ -131,7 +132,7 @@ class CLIPPreprocessor(Preprocessor): | |||
| """ | |||
| super().__init__(*args, **kwargs) | |||
| model_dir = model_dir if osp.exists(model_dir) else snapshot_download( | |||
| model_dir) | |||
| model_dir, user_agent={Invoke.KEY: Invoke.PREPROCESSOR}) | |||
| self.mode = mode | |||
| # text tokenizer | |||
| from modelscope.models.multi_modal.clip.bert_tokenizer import FullTokenizer | |||
| @@ -8,7 +8,6 @@ import torch | |||
| from torch import nn as nn | |||
| from torch import optim as optim | |||
| from modelscope.hub.snapshot_download import snapshot_download | |||
| from modelscope.metainfo import Trainers | |||
| from modelscope.models import Model, TorchModel | |||
| from modelscope.msdatasets.task_datasets.audio import KWSDataLoader, KWSDataset | |||
| @@ -54,12 +53,8 @@ class KWSFarfieldTrainer(BaseTrainer): | |||
| **kwargs): | |||
| if isinstance(model, str): | |||
| if os.path.exists(model): | |||
| self.model_dir = model if os.path.isdir( | |||
| model) else os.path.dirname(model) | |||
| else: | |||
| self.model_dir = snapshot_download( | |||
| model, revision=model_revision) | |||
| self.model_dir = self.get_or_download_model_dir( | |||
| model, model_revision) | |||
| if cfg_file is None: | |||
| cfg_file = os.path.join(self.model_dir, | |||
| ModelFile.CONFIGURATION) | |||
| @@ -1,11 +1,14 @@ | |||
| # Copyright (c) Alibaba, Inc. and its affiliates. | |||
| import os | |||
| import time | |||
| from abc import ABC, abstractmethod | |||
| from typing import Callable, Dict, List, Optional, Tuple, Union | |||
| from modelscope.hub.snapshot_download import snapshot_download | |||
| from modelscope.trainers.builder import TRAINERS | |||
| from modelscope.utils.config import Config | |||
| from modelscope.utils.constant import Invoke | |||
| from .utils.log_buffer import LogBuffer | |||
| @@ -32,6 +35,17 @@ class BaseTrainer(ABC): | |||
| self.log_buffer = LogBuffer() | |||
| self.timestamp = time.strftime('%Y%m%d_%H%M%S', time.localtime()) | |||
| def get_or_download_model_dir(self, model, model_revision=None): | |||
| if os.path.exists(model): | |||
| model_cache_dir = model if os.path.isdir( | |||
| model) else os.path.dirname(model) | |||
| else: | |||
| model_cache_dir = snapshot_download( | |||
| model, | |||
| revision=model_revision, | |||
| user_agent={Invoke.KEY: Invoke.TRAINER}) | |||
| return model_cache_dir | |||
| @abstractmethod | |||
| def train(self, *args, **kwargs): | |||
| """ Train (and evaluate) process | |||
| @@ -20,7 +20,7 @@ from modelscope.trainers.builder import TRAINERS | |||
| from modelscope.trainers.optimizer.builder import build_optimizer | |||
| from modelscope.utils.config import Config | |||
| from modelscope.utils.constant import (DEFAULT_MODEL_REVISION, ConfigKeys, | |||
| ModeKeys) | |||
| Invoke, ModeKeys) | |||
| from .clip_trainer_utils import get_loss, get_optimizer_params, get_schedule | |||
| @@ -52,7 +52,8 @@ class CLIPTrainer(EpochBasedTrainer): | |||
| model_revision: Optional[str] = DEFAULT_MODEL_REVISION, | |||
| seed: int = 42, | |||
| **kwargs): | |||
| model = Model.from_pretrained(model, revision=model_revision) | |||
| model = Model.from_pretrained( | |||
| model, revision=model_revision, invoked_by=Invoke.TRAINER) | |||
| # for training & eval, we convert the model from FP16 back to FP32 | |||
| # to compatible with modelscope amp training | |||
| convert_models_to_fp32(model) | |||
| @@ -23,7 +23,7 @@ from modelscope.trainers.optimizer.builder import build_optimizer | |||
| from modelscope.trainers.parallel.utils import is_parallel | |||
| from modelscope.utils.config import Config | |||
| from modelscope.utils.constant import (DEFAULT_MODEL_REVISION, ConfigKeys, | |||
| ModeKeys) | |||
| Invoke, ModeKeys) | |||
| from .ofa_trainer_utils import (AdjustLabelSmoothedCrossEntropyCriterion, | |||
| get_schedule) | |||
| @@ -49,7 +49,8 @@ class OFATrainer(EpochBasedTrainer): | |||
| model_revision: Optional[str] = DEFAULT_MODEL_REVISION, | |||
| seed: int = 42, | |||
| **kwargs): | |||
| model = Model.from_pretrained(model, revision=model_revision) | |||
| model = Model.from_pretrained( | |||
| model, revision=model_revision, invoked_by=Invoke.TRAINER) | |||
| model_dir = model.model_dir | |||
| self.cfg_modify_fn = cfg_modify_fn | |||
| cfg = self.rebuild_config(Config.from_file(cfg_file)) | |||
| @@ -7,21 +7,17 @@ from typing import Callable, Dict, Optional | |||
| import numpy as np | |||
| import torch | |||
| import torch.nn as nn | |||
| import torchvision.datasets as datasets | |||
| import torchvision.transforms as transforms | |||
| from sklearn.metrics import confusion_matrix | |||
| from torch.optim import AdamW | |||
| from torch.utils.data import DataLoader, Dataset | |||
| from modelscope.metainfo import Trainers | |||
| from modelscope.models.base import Model | |||
| from modelscope.msdatasets import MsDataset | |||
| from modelscope.trainers.base import BaseTrainer | |||
| from modelscope.trainers.builder import TRAINERS | |||
| from modelscope.trainers.multi_modal.team.team_trainer_utils import ( | |||
| get_optimizer, train_mapping, val_mapping) | |||
| from modelscope.trainers.multi_modal.team.team_trainer_utils import \ | |||
| get_optimizer | |||
| from modelscope.utils.config import Config | |||
| from modelscope.utils.constant import DownloadMode, ModeKeys | |||
| from modelscope.utils.constant import Invoke | |||
| from modelscope.utils.logger import get_logger | |||
| logger = get_logger() | |||
| @@ -36,7 +32,7 @@ class TEAMImgClsTrainer(BaseTrainer): | |||
| super().__init__(cfg_file) | |||
| self.cfg = Config.from_file(cfg_file) | |||
| team_model = Model.from_pretrained(model) | |||
| team_model = Model.from_pretrained(model, invoked_by=Invoke.TRAINER) | |||
| image_model = team_model.model.image_model.vision_transformer | |||
| classification_model = nn.Sequential( | |||
| OrderedDict([('encoder', image_model), | |||
| @@ -24,8 +24,7 @@ logger = get_logger() | |||
| class CsanmtTranslationTrainer(BaseTrainer): | |||
| def __init__(self, model: str, cfg_file: str = None, *args, **kwargs): | |||
| if not osp.exists(model): | |||
| model = snapshot_download(model) | |||
| model = self.get_or_download_model_dir(model) | |||
| tf.reset_default_graph() | |||
| self.model_dir = model | |||
| @@ -10,7 +10,6 @@ import torch | |||
| from torch import nn | |||
| from torch.utils.data import Dataset | |||
| from modelscope.hub.snapshot_download import snapshot_download | |||
| from modelscope.metainfo import Trainers | |||
| from modelscope.metrics.builder import build_metric | |||
| from modelscope.models.base import Model, TorchModel | |||
| @@ -478,11 +477,7 @@ class NlpEpochBasedTrainer(EpochBasedTrainer): | |||
| """ | |||
| if isinstance(model, str): | |||
| if os.path.exists(model): | |||
| model_dir = model if os.path.isdir(model) else os.path.dirname( | |||
| model) | |||
| else: | |||
| model_dir = snapshot_download(model, revision=model_revision) | |||
| model_dir = self.get_or_download_model_dir(model, model_revision) | |||
| if cfg_file is None: | |||
| cfg_file = os.path.join(model_dir, ModelFile.CONFIGURATION) | |||
| else: | |||
| @@ -14,7 +14,6 @@ from torch.utils.data import DataLoader, Dataset | |||
| from torch.utils.data.dataloader import default_collate | |||
| from torch.utils.data.distributed import DistributedSampler | |||
| from modelscope.hub.snapshot_download import snapshot_download | |||
| from modelscope.metainfo import Trainers | |||
| from modelscope.metrics import build_metric, task_default_metrics | |||
| from modelscope.models.base import Model, TorchModel | |||
| @@ -98,12 +97,8 @@ class EpochBasedTrainer(BaseTrainer): | |||
| self._seed = seed | |||
| set_random_seed(self._seed) | |||
| if isinstance(model, str): | |||
| if os.path.exists(model): | |||
| self.model_dir = model if os.path.isdir( | |||
| model) else os.path.dirname(model) | |||
| else: | |||
| self.model_dir = snapshot_download( | |||
| model, revision=model_revision) | |||
| self.model_dir = self.get_or_download_model_dir( | |||
| model, model_revision) | |||
| if cfg_file is None: | |||
| cfg_file = os.path.join(self.model_dir, | |||
| ModelFile.CONFIGURATION) | |||
| @@ -291,6 +291,14 @@ class ModelFile(object): | |||
| TS_MODEL_FILE = 'model.ts' | |||
| class Invoke(object): | |||
| KEY = 'invoked_by' | |||
| PRETRAINED = 'from_pretrained' | |||
| PIPELINE = 'pipeline' | |||
| TRAINER = 'trainer' | |||
| PREPROCESSOR = 'preprocessor' | |||
| class ConfigFields(object): | |||
| """ First level keyword in configuration file | |||
| """ | |||