| @@ -37,6 +37,7 @@ do | |||||
| -e TEST_ACCESS_TOKEN_CITEST=$TEST_ACCESS_TOKEN_CITEST \ | -e TEST_ACCESS_TOKEN_CITEST=$TEST_ACCESS_TOKEN_CITEST \ | ||||
| -e TEST_ACCESS_TOKEN_SDKDEV=$TEST_ACCESS_TOKEN_SDKDEV \ | -e TEST_ACCESS_TOKEN_SDKDEV=$TEST_ACCESS_TOKEN_SDKDEV \ | ||||
| -e TEST_LEVEL=$TEST_LEVEL \ | -e TEST_LEVEL=$TEST_LEVEL \ | ||||
| -e MODELSCOPE_ENVIRONMENT='ci' \ | |||||
| -e TEST_UPLOAD_MS_TOKEN=$TEST_UPLOAD_MS_TOKEN \ | -e TEST_UPLOAD_MS_TOKEN=$TEST_UPLOAD_MS_TOKEN \ | ||||
| -e MODEL_TAG_URL=$MODEL_TAG_URL \ | -e MODEL_TAG_URL=$MODEL_TAG_URL \ | ||||
| --workdir=$CODE_DIR_IN_CONTAINER \ | --workdir=$CODE_DIR_IN_CONTAINER \ | ||||
| @@ -59,6 +60,7 @@ do | |||||
| -e TEST_ACCESS_TOKEN_CITEST=$TEST_ACCESS_TOKEN_CITEST \ | -e TEST_ACCESS_TOKEN_CITEST=$TEST_ACCESS_TOKEN_CITEST \ | ||||
| -e TEST_ACCESS_TOKEN_SDKDEV=$TEST_ACCESS_TOKEN_SDKDEV \ | -e TEST_ACCESS_TOKEN_SDKDEV=$TEST_ACCESS_TOKEN_SDKDEV \ | ||||
| -e TEST_LEVEL=$TEST_LEVEL \ | -e TEST_LEVEL=$TEST_LEVEL \ | ||||
| -e MODELSCOPE_ENVIRONMENT='ci' \ | |||||
| -e TEST_UPLOAD_MS_TOKEN=$TEST_UPLOAD_MS_TOKEN \ | -e TEST_UPLOAD_MS_TOKEN=$TEST_UPLOAD_MS_TOKEN \ | ||||
| -e MODEL_TAG_URL=$MODEL_TAG_URL \ | -e MODEL_TAG_URL=$MODEL_TAG_URL \ | ||||
| --workdir=$CODE_DIR_IN_CONTAINER \ | --workdir=$CODE_DIR_IN_CONTAINER \ | ||||
| @@ -1,16 +1,26 @@ | |||||
| # Introduction | # Introduction | ||||
| ModelScope library is targeted to support training, evaluation and inference for the state of the art models provided by Mind and further support third-party models provided by users outside alibaba. | |||||
| [ModelScope]( https://www.modelscope.cn) is a “Model-as-a-Service” (MaaS) platform that seeks to bringing together most advanced machine learning models from the AI community, and to streamlining the process of leveraging and applying AI models . The core ModelScope library enables developers to perform model inference, training and evaluation, through rich layers of API designs that facilitate a unified experience across state-of-the-art models from different AI domains. | |||||
| # Design doc | |||||
| The Python library offers the layered-APIs necessary for model contributors to integrate models from CV, NLP, Speech, Multi-Modality, as well as Scientific-computation, into the ModelScope ecosystem. Implementations for all these different models are encapsulated within the library in a way that allows easy and unified access. With such integration, model inference, finetuning, and evaluations can be done within only a few lines of codes. In the meantime, flexibilities are provided so that different components in the model applications can be customized as well, where necessary. | |||||
| Please refer to alidoc [link](https://alidocs.dingtalk.com/i/nodes/OBldywvrKxo89xmAO05yJQk2ngpNbLz4?nav=spaces&navQuery=spaceId%3Dnb9XJNlZxbgrOXyA&iframeQuery=utm_source%3Dportal%26utm_medium%3Dportal_space_file_tree) | |||||
| Apart from harboring implementations of various models, ModelScope library also enables the necessary interactions with the backend services of ModelScope, particularly with the Model-Hub and Dataset-Hub. Such interactions facilitate various entity (models and datasets) management to be performed seamlessly under-the-hood, such as entity lookup, version control, and cache management. | |||||
| # Development doc | |||||
| # Installation | |||||
| Please refer to [develop.md](docs/source/develop.md) | |||||
| Please refer to [installation](https://modelscope.cn/docs/%E7%8E%AF%E5%A2%83%E5%AE%89%E8%A3%85). | |||||
| # ChangeLog | |||||
| * 20/05/2022 First release version | |||||
| # Get Started | |||||
| Refer to [change_log.md](docs/source/change_log.md) for more details | |||||
| You can refer to [quick_start](https://modelscope.cn/docs/%E5%BF%AB%E9%80%9F%E5%BC%80%E5%A7%8B) for quick start. | |||||
| We also provide other documentations including: | |||||
| * [Introduction to tasks](https://modelscope.cn/docs/%E4%BB%BB%E5%8A%A1%E7%9A%84%E4%BB%8B%E7%BB%8D) | |||||
| * [Use pipeline for model inference](https://modelscope.cn/docs/%E6%A8%A1%E5%9E%8B%E7%9A%84%E6%8E%A8%E7%90%86Pipeline) | |||||
| * [Finetune example](https://modelscope.cn/docs/%E6%A8%A1%E5%9E%8B%E7%9A%84%E8%AE%AD%E7%BB%83Train) | |||||
| * [Preprocessing of data](https://modelscope.cn/docs/%E6%95%B0%E6%8D%AE%E7%9A%84%E9%A2%84%E5%A4%84%E7%90%86) | |||||
| * [Evaluation metrics](https://modelscope.cn/docs/%E6%A8%A1%E5%9E%8B%E7%9A%84%E8%AF%84%E4%BC%B0) | |||||
| # License | |||||
| This project is licensed under the [Apache License (Version 2.0)](https://github.com/modelscope/modelscope/blob/master/LICENSE). | |||||
| @@ -128,7 +128,7 @@ class TorchModelExporter(Exporter): | |||||
| args_list = list(args) | args_list = list(args) | ||||
| else: | else: | ||||
| args_list = [args] | args_list = [args] | ||||
| if isinstance(args_list[-1], dict): | |||||
| if isinstance(args_list[-1], Mapping): | |||||
| args_dict = args_list[-1] | args_dict = args_list[-1] | ||||
| args_list = args_list[:-1] | args_list = args_list[:-1] | ||||
| n_nonkeyword = len(args_list) | n_nonkeyword = len(args_list) | ||||
| @@ -284,9 +284,8 @@ class TorchModelExporter(Exporter): | |||||
| 'Model property dummy_inputs must be set.') | 'Model property dummy_inputs must be set.') | ||||
| dummy_inputs = collate_fn(dummy_inputs, device) | dummy_inputs = collate_fn(dummy_inputs, device) | ||||
| if isinstance(dummy_inputs, Mapping): | if isinstance(dummy_inputs, Mapping): | ||||
| dummy_inputs = self._decide_input_format(model, dummy_inputs) | |||||
| dummy_inputs_filter = [] | dummy_inputs_filter = [] | ||||
| for _input in dummy_inputs: | |||||
| for _input in self._decide_input_format(model, dummy_inputs): | |||||
| if _input is not None: | if _input is not None: | ||||
| dummy_inputs_filter.append(_input) | dummy_inputs_filter.append(_input) | ||||
| else: | else: | ||||
| @@ -23,7 +23,8 @@ from modelscope.hub.constants import (API_RESPONSE_FIELD_DATA, | |||||
| API_RESPONSE_FIELD_MESSAGE, | API_RESPONSE_FIELD_MESSAGE, | ||||
| API_RESPONSE_FIELD_USERNAME, | API_RESPONSE_FIELD_USERNAME, | ||||
| DEFAULT_CREDENTIALS_PATH, | DEFAULT_CREDENTIALS_PATH, | ||||
| MODELSCOPE_ENVIRONMENT, ONE_YEAR_SECONDS, | |||||
| MODELSCOPE_ENVIRONMENT, | |||||
| MODELSCOPE_USERNAME, ONE_YEAR_SECONDS, | |||||
| Licenses, ModelVisibility) | Licenses, ModelVisibility) | ||||
| from modelscope.hub.errors import (InvalidParameter, NotExistError, | from modelscope.hub.errors import (InvalidParameter, NotExistError, | ||||
| NotLoginException, NoValidRevisionError, | NotLoginException, NoValidRevisionError, | ||||
| @@ -38,8 +39,8 @@ from modelscope.utils.constant import (DEFAULT_DATASET_REVISION, | |||||
| DEFAULT_MODEL_REVISION, | DEFAULT_MODEL_REVISION, | ||||
| DEFAULT_REPOSITORY_REVISION, | DEFAULT_REPOSITORY_REVISION, | ||||
| MASTER_MODEL_BRANCH, DatasetFormations, | MASTER_MODEL_BRANCH, DatasetFormations, | ||||
| DatasetMetaFormats, DownloadMode, | |||||
| ModelFile) | |||||
| DatasetMetaFormats, DownloadChannel, | |||||
| DownloadMode, ModelFile) | |||||
| from modelscope.utils.logger import get_logger | from modelscope.utils.logger import get_logger | ||||
| from .utils.utils import (get_endpoint, get_release_datetime, | from .utils.utils import (get_endpoint, get_release_datetime, | ||||
| model_id_to_group_owner_name) | model_id_to_group_owner_name) | ||||
| @@ -645,6 +646,25 @@ class HubApi: | |||||
| def check_local_cookies(self, use_cookies) -> CookieJar: | def check_local_cookies(self, use_cookies) -> CookieJar: | ||||
| return self._check_cookie(use_cookies=use_cookies) | return self._check_cookie(use_cookies=use_cookies) | ||||
| def dataset_download_uv(self, dataset_name: str, namespace: str): | |||||
| if not dataset_name or not namespace: | |||||
| raise ValueError('dataset_name or namespace cannot be empty!') | |||||
| # get channel and user_name | |||||
| channel = DownloadChannel.LOCAL.value | |||||
| user_name = '' | |||||
| if MODELSCOPE_ENVIRONMENT in os.environ: | |||||
| channel = os.environ[MODELSCOPE_ENVIRONMENT] | |||||
| if MODELSCOPE_USERNAME in os.environ: | |||||
| user_name = os.environ[MODELSCOPE_USERNAME] | |||||
| url = f'{self.endpoint}/api/v1/datasets/{namespace}/{dataset_name}/download/uv/{channel}?user={user_name}' | |||||
| cookies = ModelScopeConfig.get_cookies() | |||||
| r = requests.post(url, cookies=cookies, headers=self.headers) | |||||
| resp = r.json() | |||||
| raise_on_error(resp) | |||||
| return resp['Message'] | |||||
| class ModelScopeConfig: | class ModelScopeConfig: | ||||
| path_credential = expanduser(DEFAULT_CREDENTIALS_PATH) | path_credential = expanduser(DEFAULT_CREDENTIALS_PATH) | ||||
| @@ -760,14 +780,18 @@ class ModelScopeConfig: | |||||
| env = 'custom' | env = 'custom' | ||||
| if MODELSCOPE_ENVIRONMENT in os.environ: | if MODELSCOPE_ENVIRONMENT in os.environ: | ||||
| env = os.environ[MODELSCOPE_ENVIRONMENT] | env = os.environ[MODELSCOPE_ENVIRONMENT] | ||||
| user_name = 'unknown' | |||||
| if MODELSCOPE_USERNAME in os.environ: | |||||
| user_name = os.environ[MODELSCOPE_USERNAME] | |||||
| ua = 'modelscope/%s; python/%s; session_id/%s; platform/%s; processor/%s; env/%s' % ( | |||||
| ua = 'modelscope/%s; python/%s; session_id/%s; platform/%s; processor/%s; env/%s; user/%s' % ( | |||||
| __version__, | __version__, | ||||
| platform.python_version(), | platform.python_version(), | ||||
| ModelScopeConfig.get_user_session_id(), | ModelScopeConfig.get_user_session_id(), | ||||
| platform.platform(), | platform.platform(), | ||||
| platform.processor(), | platform.processor(), | ||||
| env, | env, | ||||
| user_name, | |||||
| ) | ) | ||||
| if isinstance(user_agent, dict): | if isinstance(user_agent, dict): | ||||
| ua = '; '.join(f'{k}/{v}' for k, v in user_agent.items()) | ua = '; '.join(f'{k}/{v}' for k, v in user_agent.items()) | ||||
| @@ -18,6 +18,7 @@ API_RESPONSE_FIELD_EMAIL = 'Email' | |||||
| API_RESPONSE_FIELD_MESSAGE = 'Message' | API_RESPONSE_FIELD_MESSAGE = 'Message' | ||||
| MODELSCOPE_ENVIRONMENT = 'MODELSCOPE_ENVIRONMENT' | MODELSCOPE_ENVIRONMENT = 'MODELSCOPE_ENVIRONMENT' | ||||
| MODELSCOPE_SDK_DEBUG = 'MODELSCOPE_SDK_DEBUG' | MODELSCOPE_SDK_DEBUG = 'MODELSCOPE_SDK_DEBUG' | ||||
| MODELSCOPE_USERNAME = 'MODELSCOPE_USERNAME' | |||||
| ONE_YEAR_SECONDS = 24 * 365 * 60 * 60 | ONE_YEAR_SECONDS = 24 * 365 * 60 * 60 | ||||
| @@ -5,6 +5,8 @@ import os | |||||
| from datetime import datetime | from datetime import datetime | ||||
| from typing import Optional | from typing import Optional | ||||
| import requests | |||||
| from modelscope.hub.constants import (DEFAULT_MODELSCOPE_DOMAIN, | from modelscope.hub.constants import (DEFAULT_MODELSCOPE_DOMAIN, | ||||
| DEFAULT_MODELSCOPE_GROUP, | DEFAULT_MODELSCOPE_GROUP, | ||||
| MODEL_ID_SEPARATOR, MODELSCOPE_SDK_DEBUG, | MODEL_ID_SEPARATOR, MODELSCOPE_SDK_DEBUG, | ||||
| @@ -85,3 +87,16 @@ def file_integrity_validation(file_path, expected_sha256): | |||||
| msg = 'File %s integrity check failed, the download may be incomplete, please try again.' % file_path | msg = 'File %s integrity check failed, the download may be incomplete, please try again.' % file_path | ||||
| logger.error(msg) | logger.error(msg) | ||||
| raise FileIntegrityError(msg) | raise FileIntegrityError(msg) | ||||
| def create_library_statistics(method: str, name: str, cn_name: Optional[str]): | |||||
| try: | |||||
| from modelscope.hub.api import ModelScopeConfig | |||||
| path = f'{get_endpoint()}/api/v1/statistics/library' | |||||
| headers = {'user-agent': ModelScopeConfig.get_user_agent()} | |||||
| params = {'Method': method, 'Name': name, 'CnName': cn_name} | |||||
| r = requests.post(path, params=params, headers=headers) | |||||
| r.raise_for_status() | |||||
| except Exception: | |||||
| pass | |||||
| return | |||||
| @@ -1,6 +1,7 @@ | |||||
| # Copyright (c) Alibaba, Inc. and its affiliates. | # Copyright (c) Alibaba, Inc. and its affiliates. | ||||
| import os | import os | ||||
| import tempfile | |||||
| from typing import Dict, Optional | from typing import Dict, Optional | ||||
| from modelscope.metainfo import Models | from modelscope.metainfo import Models | ||||
| @@ -36,12 +37,15 @@ class FSMNSeleNetV2Decorator(TorchModel): | |||||
| else: | else: | ||||
| sc_config_file = os.path.join(model_dir, self.SC_CONFIG) | sc_config_file = os.path.join(model_dir, self.SC_CONFIG) | ||||
| model_txt_file = os.path.join(model_dir, self.MODEL_TXT) | model_txt_file = os.path.join(model_dir, self.MODEL_TXT) | ||||
| self.tmp_dir = tempfile.TemporaryDirectory() | |||||
| new_config_file = os.path.join(self.tmp_dir.name, self.SC_CONFIG) | |||||
| self._sc = None | self._sc = None | ||||
| if os.path.exists(model_txt_file): | if os.path.exists(model_txt_file): | ||||
| conf_dict = dict(mode=56542, kws_model=model_txt_file) | conf_dict = dict(mode=56542, kws_model=model_txt_file) | ||||
| update_conf(sc_config_file, sc_config_file, conf_dict) | |||||
| update_conf(sc_config_file, new_config_file, conf_dict) | |||||
| import py_sound_connect | import py_sound_connect | ||||
| self._sc = py_sound_connect.SoundConnect(sc_config_file) | |||||
| self._sc = py_sound_connect.SoundConnect(new_config_file) | |||||
| self.size_in = self._sc.bytesPerBlockIn() | self.size_in = self._sc.bytesPerBlockIn() | ||||
| self.size_out = self._sc.bytesPerBlockOut() | self.size_out = self._sc.bytesPerBlockOut() | ||||
| else: | else: | ||||
| @@ -49,6 +53,9 @@ class FSMNSeleNetV2Decorator(TorchModel): | |||||
| f'Invalid model directory! Failed to load model file: {model_txt_file}.' | f'Invalid model directory! Failed to load model file: {model_txt_file}.' | ||||
| ) | ) | ||||
| def __del__(self): | |||||
| self.tmp_dir.cleanup() | |||||
| def forward(self, input: Dict[str, Tensor]) -> Dict[str, Tensor]: | def forward(self, input: Dict[str, Tensor]) -> Dict[str, Tensor]: | ||||
| return self.model.forward(input) | return self.model.forward(input) | ||||
| @@ -131,6 +131,8 @@ class Model(ABC): | |||||
| if not hasattr(model, 'cfg'): | if not hasattr(model, 'cfg'): | ||||
| model.cfg = cfg | model.cfg = cfg | ||||
| model.name = model_name_or_path | |||||
| return model | return model | ||||
| def save_pretrained(self, | def save_pretrained(self, | ||||
| @@ -349,11 +349,13 @@ class CLIP(nn.Module): | |||||
| text_num_hidden_layers: int, | text_num_hidden_layers: int, | ||||
| text_type_vocab_size: int, | text_type_vocab_size: int, | ||||
| tokenizer: FullTokenizer, | tokenizer: FullTokenizer, | ||||
| # vision_head_width, added this param for ViT-H | |||||
| vision_head_width: int = 64, | |||||
| ): | ): | ||||
| super().__init__() | super().__init__() | ||||
| if isinstance(vision_layers, (tuple, list)): | if isinstance(vision_layers, (tuple, list)): | ||||
| vision_heads = vision_width * 32 // 64 | |||||
| vision_heads = vision_width * 32 // vision_head_width | |||||
| self.visual = ModifiedResNet( | self.visual = ModifiedResNet( | ||||
| layers=vision_layers, | layers=vision_layers, | ||||
| output_dim=embed_dim, | output_dim=embed_dim, | ||||
| @@ -361,7 +363,7 @@ class CLIP(nn.Module): | |||||
| input_resolution=image_resolution, | input_resolution=image_resolution, | ||||
| width=vision_width) | width=vision_width) | ||||
| else: | else: | ||||
| vision_heads = vision_width // 64 | |||||
| vision_heads = vision_width // vision_head_width | |||||
| self.visual = VisualTransformer( | self.visual = VisualTransformer( | ||||
| input_resolution=image_resolution, | input_resolution=image_resolution, | ||||
| patch_size=vision_patch_size, | patch_size=vision_patch_size, | ||||
| @@ -136,6 +136,12 @@ class OFAConfig(PretrainedConfig): | |||||
| entangle_position_embedding=False, | entangle_position_embedding=False, | ||||
| interpolate_position=False, | interpolate_position=False, | ||||
| orig_patch_image_size=224, | orig_patch_image_size=224, | ||||
| share_attn_bias=False, | |||||
| use_image_feature=True, | |||||
| disable_entangle=False, | |||||
| use_ofasys=False, | |||||
| vit_type='vit_base', | |||||
| vit_drop_path_rate=0.0, | |||||
| **kwargs): | **kwargs): | ||||
| self.vocab_size = vocab_size | self.vocab_size = vocab_size | ||||
| self.max_position_embeddings = max_position_embeddings | self.max_position_embeddings = max_position_embeddings | ||||
| @@ -178,6 +184,13 @@ class OFAConfig(PretrainedConfig): | |||||
| self.interpolate_position = interpolate_position | self.interpolate_position = interpolate_position | ||||
| self.orig_patch_image_size = orig_patch_image_size | self.orig_patch_image_size = orig_patch_image_size | ||||
| self.share_attn_bias = share_attn_bias | |||||
| self.use_image_feature = use_image_feature | |||||
| self.disable_entangle = disable_entangle | |||||
| self.use_ofasys = use_ofasys | |||||
| self.vit_type = vit_type | |||||
| self.vit_drop_path_rate = vit_drop_path_rate | |||||
| super().__init__( | super().__init__( | ||||
| pad_token_id=pad_token_id, | pad_token_id=pad_token_id, | ||||
| bos_token_id=bos_token_id, | bos_token_id=bos_token_id, | ||||
| @@ -35,6 +35,8 @@ from transformers.utils import logging | |||||
| from .configuration_ofa import OFAConfig | from .configuration_ofa import OFAConfig | ||||
| from .generate import utils | from .generate import utils | ||||
| from .resnet import ResNet | from .resnet import ResNet | ||||
| from .utils.utils import DropPath | |||||
| from .vit import vit_base, vit_huge, vit_large, vit_large_336 | |||||
| logger = logging.get_logger(__name__) | logger = logging.get_logger(__name__) | ||||
| @@ -249,45 +251,6 @@ class LayerDropModuleList(nn.ModuleList): | |||||
| yield m | yield m | ||||
| def drop_path(x, drop_prob: float = 0.0, training: bool = False): | |||||
| r""" | |||||
| Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks). | |||||
| Args: | |||||
| x (`nn.Modules`): input nn layers. | |||||
| drop_prob (`float`): drop path ratio. | |||||
| training (`bool`): whether is training or inference. | |||||
| """ | |||||
| if drop_prob == 0.0 or not training: | |||||
| return x | |||||
| keep_prob = 1 - drop_prob | |||||
| shape = (1, x.shape[1], 1) | |||||
| random_tensor = keep_prob + torch.rand( | |||||
| shape, dtype=x.dtype, device=x.device) | |||||
| random_tensor.floor_() # binarize | |||||
| output = x.div(keep_prob) * random_tensor | |||||
| return output | |||||
| class DropPath(nn.Module): | |||||
| r""" | |||||
| Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks). | |||||
| Args: | |||||
| drop_prob: drop path ratio. | |||||
| """ | |||||
| def __init__(self, drop_prob=None): | |||||
| super().__init__() | |||||
| self.drop_prob = drop_prob | |||||
| def forward(self, x): | |||||
| return drop_path(x, self.drop_prob, self.training) | |||||
| def extra_repr(self) -> str: | |||||
| return 'p={}'.format(self.drop_prob) | |||||
| class OFAAttention(nn.Module): | class OFAAttention(nn.Module): | ||||
| r""" | r""" | ||||
| Multi-headed attention, with additional implementation for NormFormer. | Multi-headed attention, with additional implementation for NormFormer. | ||||
| @@ -898,31 +861,49 @@ class OFAEncoder(OFAPreTrainedModel): | |||||
| self.padding_idx) | self.padding_idx) | ||||
| if config.add_type_embedding: | if config.add_type_embedding: | ||||
| self.type_embedding = Embedding(2, embed_dim, padding_idx=None) | |||||
| if config.use_image_feature: | |||||
| self.type_embedding = Embedding(2, embed_dim, padding_idx=None) | |||||
| else: | |||||
| self.type_embedding = Embedding(1, embed_dim, padding_idx=None) | |||||
| else: | else: | ||||
| self.type_embedding = None | self.type_embedding = None | ||||
| if config.resnet_type == 'resnet18': | |||||
| self.embed_images = ResNet( | |||||
| [2, 2, 2], drop_path_rate=config.resnet_drop_path_rate) | |||||
| elif config.resnet_type == 'resnet34': | |||||
| self.embed_images = ResNet( | |||||
| [3, 4, 6], drop_path_rate=config.resnet_drop_path_rate) | |||||
| elif config.resnet_type == 'resnet50': | |||||
| self.embed_images = ResNet( | |||||
| [3, 4, 6], drop_path_rate=config.resnet_drop_path_rate) | |||||
| elif config.resnet_type == 'resnet101': | |||||
| self.embed_images = ResNet( | |||||
| [3, 4, 23], drop_path_rate=config.resnet_drop_path_rate) | |||||
| elif config.resnet_type == 'resnet152': | |||||
| self.embed_images = ResNet( | |||||
| [3, 8, 36], drop_path_rate=config.resnet_drop_path_rate) | |||||
| else: | |||||
| raise NotImplementedError | |||||
| if config.use_image_feature: | |||||
| if config.use_ofasys: | |||||
| vit_backbone = { | |||||
| 'vit_base': vit_base, | |||||
| 'vit_large': vit_large, | |||||
| 'vit_large_336': vit_large_336, | |||||
| 'vit_huge': vit_huge, | |||||
| }[config.vit_type] | |||||
| self.embed_images = vit_backbone(config.vit_drop_path_rate) | |||||
| self.image_proj = Linear(1024, embed_dim) | |||||
| self.image_proj = Linear(self.embed_images.width, embed_dim) | |||||
| if config.resnet_model_path: | |||||
| else: | |||||
| if config.resnet_type == 'resnet18': | |||||
| self.embed_images = ResNet( | |||||
| [2, 2, 2], drop_path_rate=config.resnet_drop_path_rate) | |||||
| elif config.resnet_type == 'resnet34': | |||||
| self.embed_images = ResNet( | |||||
| [3, 4, 6], drop_path_rate=config.resnet_drop_path_rate) | |||||
| elif config.resnet_type == 'resnet50': | |||||
| self.embed_images = ResNet( | |||||
| [3, 4, 6], drop_path_rate=config.resnet_drop_path_rate) | |||||
| elif config.resnet_type == 'resnet101': | |||||
| self.embed_images = ResNet( | |||||
| [3, 4, 23], | |||||
| drop_path_rate=config.resnet_drop_path_rate) | |||||
| elif config.resnet_type == 'resnet152': | |||||
| self.embed_images = ResNet( | |||||
| [3, 8, 36], | |||||
| drop_path_rate=config.resnet_drop_path_rate) | |||||
| else: | |||||
| raise NotImplementedError | |||||
| self.image_proj = Linear(1024, embed_dim) | |||||
| if not config.use_ofasys and config.resnet_model_path: | |||||
| print('load resnet {}'.format(config.resnet_model_path)) | print('load resnet {}'.format(config.resnet_model_path)) | ||||
| resnet_state_dict = torch.load(config.resnet_model_path) | resnet_state_dict = torch.load(config.resnet_model_path) | ||||
| self.embed_images.load_state_dict(resnet_state_dict) | self.embed_images.load_state_dict(resnet_state_dict) | ||||
| @@ -933,14 +914,21 @@ class OFAEncoder(OFAPreTrainedModel): | |||||
| self.embed_positions = Embedding(self.max_source_positions + 2, | self.embed_positions = Embedding(self.max_source_positions + 2, | ||||
| embed_dim) | embed_dim) | ||||
| self.embed_image_positions = Embedding(config.image_bucket_size**2 + 1, | |||||
| embed_dim) | |||||
| self.pos_ln = LayerNorm(embed_dim) | |||||
| self.image_pos_ln = LayerNorm(embed_dim) | |||||
| if config.use_image_feature: | |||||
| self.embed_image_positions = Embedding( | |||||
| config.image_bucket_size**2 + 1, embed_dim) | |||||
| if not config.use_ofasys: | |||||
| self.pos_ln = LayerNorm(embed_dim) | |||||
| if config.use_image_feature: | |||||
| self.image_pos_ln = LayerNorm(embed_dim) | |||||
| self.pos_scaling = float(embed_dim / self.num_attention_heads | self.pos_scaling = float(embed_dim / self.num_attention_heads | ||||
| * config.attn_scale_factor)**-0.5 | * config.attn_scale_factor)**-0.5 | ||||
| self.pos_q_linear = nn.Linear(embed_dim, embed_dim) | |||||
| self.pos_k_linear = nn.Linear(embed_dim, embed_dim) | |||||
| if not (config.use_ofasys and config.entangle_position_embedding): | |||||
| self.pos_q_linear = nn.Linear(embed_dim, embed_dim) | |||||
| self.pos_k_linear = nn.Linear(embed_dim, embed_dim) | |||||
| if self.encoder_layerdrop > 0.0: | if self.encoder_layerdrop > 0.0: | ||||
| self.layers = LayerDropModuleList(p=self.encoder_layerdrop) | self.layers = LayerDropModuleList(p=self.encoder_layerdrop) | ||||
| @@ -965,22 +953,28 @@ class OFAEncoder(OFAPreTrainedModel): | |||||
| self.token_bucket_size = config.token_bucket_size | self.token_bucket_size = config.token_bucket_size | ||||
| token_num_rel_dis = 2 * config.token_bucket_size - 1 | token_num_rel_dis = 2 * config.token_bucket_size - 1 | ||||
| token_rp_bucket = make_token_bucket_position(config.token_bucket_size) | token_rp_bucket = make_token_bucket_position(config.token_bucket_size) | ||||
| self.share_attn_bias = config.share_attn_bias | |||||
| num_rel_pos_tables = 1 if config.share_attn_bias else config.encoder_layers | |||||
| self.token_rel_pos_table_list = nn.ModuleList([ | self.token_rel_pos_table_list = nn.ModuleList([ | ||||
| Embedding( | Embedding( | ||||
| token_num_rel_dis, self.num_attention_heads, zero_init=True) | token_num_rel_dis, self.num_attention_heads, zero_init=True) | ||||
| for _ in range(config.encoder_layers) | |||||
| for _ in range(num_rel_pos_tables) | |||||
| ]) | ]) | ||||
| self.image_bucket_size = config.image_bucket_size | |||||
| image_num_rel_dis = (2 * config.image_bucket_size | |||||
| - 1) * (2 * config.image_bucket_size - 1) + 3 | |||||
| image_rp_bucket = make_image_bucket_position(config.image_bucket_size, | |||||
| image_num_rel_dis) | |||||
| self.image_rel_pos_table_list = nn.ModuleList([ | |||||
| Embedding( | |||||
| image_num_rel_dis, self.num_attention_heads, zero_init=True) | |||||
| for _ in range(config.encoder_layers) | |||||
| ]) | |||||
| if config.use_image_feature: | |||||
| self.image_bucket_size = config.image_bucket_size | |||||
| image_num_rel_dis = (2 * config.image_bucket_size | |||||
| - 1) * (2 * config.image_bucket_size - 1) + 3 | |||||
| image_rp_bucket = make_image_bucket_position( | |||||
| config.image_bucket_size, image_num_rel_dis) | |||||
| self.image_rel_pos_table_list = nn.ModuleList([ | |||||
| Embedding( | |||||
| image_num_rel_dis, | |||||
| self.num_attention_heads, | |||||
| zero_init=True) for _ in range(num_rel_pos_tables) | |||||
| ]) | |||||
| self.register_buffer('image_rp_bucket', image_rp_bucket) | |||||
| if config.layernorm_embedding: | if config.layernorm_embedding: | ||||
| self.layernorm_embedding = LayerNorm(embed_dim) | self.layernorm_embedding = LayerNorm(embed_dim) | ||||
| @@ -988,12 +982,12 @@ class OFAEncoder(OFAPreTrainedModel): | |||||
| self.layernorm_embedding = None | self.layernorm_embedding = None | ||||
| self.register_buffer('token_rp_bucket', token_rp_bucket) | self.register_buffer('token_rp_bucket', token_rp_bucket) | ||||
| self.register_buffer('image_rp_bucket', image_rp_bucket) | |||||
| self.entangle_position_embedding = config.entangle_position_embedding | self.entangle_position_embedding = config.entangle_position_embedding | ||||
| self.gradient_checkpointing = False | self.gradient_checkpointing = False | ||||
| # Initialize weights and apply final processing | # Initialize weights and apply final processing | ||||
| self.post_init() | self.post_init() | ||||
| self.use_ofasys = config.use_ofasys | |||||
| def get_input_embeddings(self): | def get_input_embeddings(self): | ||||
| r""" | r""" | ||||
| @@ -1305,21 +1299,41 @@ class OFAEncoder(OFAPreTrainedModel): | |||||
| if has_pads: | if has_pads: | ||||
| x = x * (1 - encoder_padding_mask.unsqueeze(-1).type_as(x)) | x = x * (1 - encoder_padding_mask.unsqueeze(-1).type_as(x)) | ||||
| pos_embed = self.pos_ln(pos_embed) | |||||
| if patch_images is not None: | |||||
| image_pos_embed = self.image_pos_ln(image_pos_embed) | |||||
| pos_embed = torch.cat([image_pos_embed, pos_embed], dim=1) | |||||
| if patch_images_2 is not None: | |||||
| image_pos_embed_2 = self.image_pos_ln(image_pos_embed_2) | |||||
| pos_embed = torch.cat([image_pos_embed_2, pos_embed], dim=1) | |||||
| if self.use_ofasys: | |||||
| if patch_images is not None: | |||||
| pos_embed = torch.cat([image_pos_embed, pos_embed], dim=1) | |||||
| if patch_images_2 is not None: | |||||
| pos_embed = torch.cat([image_pos_embed_2, pos_embed], dim=1) | |||||
| else: | |||||
| pos_embed = self.pos_ln(pos_embed) | |||||
| if patch_images is not None: | |||||
| image_pos_embed = self.image_pos_ln(image_pos_embed) | |||||
| pos_embed = torch.cat([image_pos_embed, pos_embed], dim=1) | |||||
| if patch_images_2 is not None: | |||||
| image_pos_embed_2 = self.image_pos_ln(image_pos_embed_2) | |||||
| pos_embed = torch.cat([image_pos_embed_2, pos_embed], dim=1) | |||||
| def build_abs_pos_bias(pos_embed): | |||||
| batch_size, seq_length = pos_embed.size(0), pos_embed.size(1) | |||||
| if not (self.use_ofasys and self.entangle_position_embedding): | |||||
| pos_q = self.pos_q_linear(pos_embed).view( | |||||
| batch_size, seq_length, self.num_attention_heads, | |||||
| -1).transpose(1, 2) * self.pos_scaling | |||||
| pos_k = self.pos_k_linear(pos_embed).view( | |||||
| batch_size, seq_length, self.num_attention_heads, | |||||
| -1).transpose(1, 2) | |||||
| abs_pos_bias = torch.matmul(pos_q, pos_k.transpose(2, 3)) | |||||
| else: | |||||
| abs_pos_bias = torch.zeros( | |||||
| batch_size, | |||||
| self.num_attention_heads, | |||||
| seq_length, | |||||
| seq_length, | |||||
| dtype=pos_embed.dtype, | |||||
| device=pos_embed.device) | |||||
| return abs_pos_bias | |||||
| pos_q = self.pos_q_linear(pos_embed).view( | |||||
| x.size(0), x.size(1), self.num_attention_heads, -1).transpose( | |||||
| 1, 2) * self.pos_scaling | |||||
| pos_k = self.pos_k_linear(pos_embed).view( | |||||
| x.size(0), x.size(1), self.num_attention_heads, | |||||
| -1).transpose(1, 2) | |||||
| abs_pos_bias = torch.matmul(pos_q, pos_k.transpose(2, 3)) | |||||
| abs_pos_bias = build_abs_pos_bias(pos_embed) | |||||
| # expand attention_mask | # expand attention_mask | ||||
| if has_pads: | if has_pads: | ||||
| @@ -1334,19 +1348,22 @@ class OFAEncoder(OFAPreTrainedModel): | |||||
| if output_hidden_states: | if output_hidden_states: | ||||
| encoder_states += (x, ) | encoder_states += (x, ) | ||||
| self_attn_bias = abs_pos_bias.clone() | self_attn_bias = abs_pos_bias.clone() | ||||
| real_idx = 0 if self.share_attn_bias else idx | |||||
| self_attn_bias[:, :, -input_ids.size(1):, | self_attn_bias[:, :, -input_ids.size(1):, | ||||
| -input_ids.size(1):] += self.get_rel_pos_bias( | -input_ids.size(1):] += self.get_rel_pos_bias( | ||||
| input_ids, idx) | |||||
| input_ids, real_idx) | |||||
| if patch_images_2 is not None: | if patch_images_2 is not None: | ||||
| self_attn_bias[:, :, :image_num_patches_2, :image_num_patches_2] += \ | self_attn_bias[:, :, :image_num_patches_2, :image_num_patches_2] += \ | ||||
| self.get_image_rel_pos_bias(image_position_ids_2, idx) | |||||
| self.get_image_rel_pos_bias(image_position_ids_2, real_idx) | |||||
| self_attn_bias[:, :, | self_attn_bias[:, :, | ||||
| image_num_patches_2:image_num_patches_2 + image_num_patches, # noqa | image_num_patches_2:image_num_patches_2 + image_num_patches, # noqa | ||||
| image_num_patches_2:image_num_patches_2 + image_num_patches] += \ | image_num_patches_2:image_num_patches_2 + image_num_patches] += \ | ||||
| self.get_image_rel_pos_bias(image_position_ids, idx) # noqa | |||||
| self.get_image_rel_pos_bias(image_position_ids, real_idx) # noqa | |||||
| elif patch_images is not None: | elif patch_images is not None: | ||||
| self_attn_bias[:, :, :x.size(1) - input_ids.size(1), :x.size(1) - input_ids.size(1)] += \ | self_attn_bias[:, :, :x.size(1) - input_ids.size(1), :x.size(1) - input_ids.size(1)] += \ | ||||
| self.get_image_rel_pos_bias(image_position_ids, idx) | |||||
| self.get_image_rel_pos_bias(image_position_ids, real_idx) | |||||
| self_attn_bias = self_attn_bias.reshape(-1, x.size(1), x.size(1)) | self_attn_bias = self_attn_bias.reshape(-1, x.size(1), x.size(1)) | ||||
| hidden_outputs = layer( | hidden_outputs = layer( | ||||
| @@ -1398,6 +1415,8 @@ class OFADecoder(OFAPreTrainedModel): | |||||
| self._future_mask = torch.empty(0) | self._future_mask = torch.empty(0) | ||||
| self.share_input_output_embed = config.share_decoder_input_output_embed | self.share_input_output_embed = config.share_decoder_input_output_embed | ||||
| self.num_attention_heads = config.decoder_attention_heads | self.num_attention_heads = config.decoder_attention_heads | ||||
| self.use_ofasys = config.use_ofasys | |||||
| self.disable_entangle = config.disable_entangle | |||||
| if embed_tokens is not None: | if embed_tokens is not None: | ||||
| self.embed_tokens = embed_tokens | self.embed_tokens = embed_tokens | ||||
| @@ -1415,18 +1434,31 @@ class OFADecoder(OFAPreTrainedModel): | |||||
| else: | else: | ||||
| self.layernorm_embedding = None | self.layernorm_embedding = None | ||||
| if config.use_ofasys: | |||||
| if config.add_type_embedding: | |||||
| self.type_embedding = Embedding( | |||||
| 1, self.embed_dim, padding_idx=None) | |||||
| else: | |||||
| self.type_embedding = None | |||||
| self.window_size = config.code_image_size // 8 | self.window_size = config.code_image_size // 8 | ||||
| self.embed_positions = Embedding(self.max_target_positions + 2, | self.embed_positions = Embedding(self.max_target_positions + 2, | ||||
| self.embed_dim) | self.embed_dim) | ||||
| self.embed_image_positions = Embedding(config.image_bucket_size**2 + 1, | |||||
| self.embed_dim) | |||||
| self.pos_ln = LayerNorm(self.embed_dim) | |||||
| self.image_pos_ln = LayerNorm(self.embed_dim) | |||||
| if not config.use_ofasys: | |||||
| self.embed_image_positions = Embedding( | |||||
| config.image_bucket_size**2 + 1, self.embed_dim) | |||||
| if not config.use_ofasys: | |||||
| self.pos_ln = LayerNorm(self.embed_dim) | |||||
| self.image_pos_ln = LayerNorm(self.embed_dim) | |||||
| self.pos_scaling = float(self.embed_dim / self.num_attention_heads | self.pos_scaling = float(self.embed_dim / self.num_attention_heads | ||||
| * config.attn_scale_factor)**-0.5 | * config.attn_scale_factor)**-0.5 | ||||
| self.self_pos_q_linear = nn.Linear(self.embed_dim, self.embed_dim) | |||||
| self.self_pos_k_linear = nn.Linear(self.embed_dim, self.embed_dim) | |||||
| if not (config.use_ofasys and config.entangle_position_embedding): | |||||
| self.self_pos_q_linear = nn.Linear(self.embed_dim, self.embed_dim) | |||||
| self.self_pos_k_linear = nn.Linear(self.embed_dim, self.embed_dim) | |||||
| self.cross_pos_q_linear = nn.Linear(self.embed_dim, self.embed_dim) | self.cross_pos_q_linear = nn.Linear(self.embed_dim, self.embed_dim) | ||||
| self.cross_pos_k_linear = nn.Linear(self.embed_dim, self.embed_dim) | self.cross_pos_k_linear = nn.Linear(self.embed_dim, self.embed_dim) | ||||
| @@ -1463,33 +1495,41 @@ class OFADecoder(OFAPreTrainedModel): | |||||
| self.token_bucket_size = config.token_bucket_size | self.token_bucket_size = config.token_bucket_size | ||||
| token_num_rel_dis = 2 * config.token_bucket_size - 1 | token_num_rel_dis = 2 * config.token_bucket_size - 1 | ||||
| token_rp_bucket = make_token_bucket_position(config.token_bucket_size) | token_rp_bucket = make_token_bucket_position(config.token_bucket_size) | ||||
| self.share_attn_bias = config.share_attn_bias | |||||
| num_rel_pos_tables = 1 if config.share_attn_bias else config.decoder_layers | |||||
| self.token_rel_pos_table_list = nn.ModuleList([ | self.token_rel_pos_table_list = nn.ModuleList([ | ||||
| Embedding( | Embedding( | ||||
| token_num_rel_dis, self.num_attention_heads, zero_init=True) | token_num_rel_dis, self.num_attention_heads, zero_init=True) | ||||
| for _ in range(config.decoder_layers) | |||||
| for _ in range(num_rel_pos_tables) | |||||
| ]) | ]) | ||||
| self.image_bucket_size = config.image_bucket_size | |||||
| image_num_rel_dis = (2 * config.image_bucket_size | |||||
| - 1) * (2 * config.image_bucket_size - 1) + 3 | |||||
| image_rp_bucket = make_image_bucket_position(config.image_bucket_size, | |||||
| image_num_rel_dis) | |||||
| image_position_idx = torch.arange(self.window_size).unsqueeze(0).expand(self.window_size, self.window_size) + \ | |||||
| torch.arange(self.window_size).unsqueeze(1) * config.image_bucket_size + 1 # noqa | |||||
| image_position_idx = torch.cat( | |||||
| [torch.tensor([0]), image_position_idx.view(-1)]) | |||||
| image_position_idx = torch.cat( | |||||
| [image_position_idx, | |||||
| torch.tensor([1024] * 768)]) | |||||
| self.image_rel_pos_table_list = nn.ModuleList([ | |||||
| Embedding( | |||||
| image_num_rel_dis, self.num_attention_heads, zero_init=True) | |||||
| for _ in range(config.decoder_layers) | |||||
| ]) | |||||
| if config.use_image_feature: | |||||
| if not config.use_ofasys: | |||||
| self.image_bucket_size = config.image_bucket_size | |||||
| image_num_rel_dis = (2 * config.image_bucket_size - 1) * ( | |||||
| 2 * config.image_bucket_size - 1) + 3 | |||||
| image_rp_bucket = make_image_bucket_position( | |||||
| config.image_bucket_size, image_num_rel_dis) | |||||
| image_position_idx = torch.arange(self.window_size).unsqueeze(0).expand(self.window_size, self.window_size) + \ | |||||
| torch.arange(self.window_size).unsqueeze(1) * config.image_bucket_size + 1 # noqa | |||||
| image_position_idx = torch.cat( | |||||
| [torch.tensor([0]), | |||||
| image_position_idx.view(-1)]) | |||||
| image_position_idx = torch.cat( | |||||
| [image_position_idx, | |||||
| torch.tensor([1024] * 768)]) | |||||
| self.register_buffer('image_position_idx', image_position_idx) | |||||
| self.image_rel_pos_table_list = nn.ModuleList([ | |||||
| Embedding( | |||||
| image_num_rel_dis, | |||||
| self.num_attention_heads, | |||||
| zero_init=True) for _ in range(num_rel_pos_tables) | |||||
| ]) | |||||
| self.register_buffer('image_rp_bucket', image_rp_bucket) | |||||
| self.register_buffer('token_rp_bucket', token_rp_bucket) | self.register_buffer('token_rp_bucket', token_rp_bucket) | ||||
| self.register_buffer('image_rp_bucket', image_rp_bucket) | |||||
| self.register_buffer('image_position_idx', image_position_idx) | |||||
| self.entangle_position_embedding = config.entangle_position_embedding | self.entangle_position_embedding = config.entangle_position_embedding | ||||
| self.gradient_checkpointing = False | self.gradient_checkpointing = False | ||||
| @@ -1556,26 +1596,46 @@ class OFADecoder(OFAPreTrainedModel): | |||||
| batch_size = tgt_pos_embed.size(0) | batch_size = tgt_pos_embed.size(0) | ||||
| tgt_len = tgt_pos_embed.size(1) | tgt_len = tgt_pos_embed.size(1) | ||||
| tgt_pos_embed = self.image_pos_ln( | |||||
| tgt_pos_embed) if use_image else self.pos_ln(tgt_pos_embed) | |||||
| if not self.use_ofasys: | |||||
| tgt_pos_embed = self.image_pos_ln( | |||||
| tgt_pos_embed) if use_image else self.pos_ln(tgt_pos_embed) | |||||
| if src_pos_embed is not None: | if src_pos_embed is not None: | ||||
| src_len = src_pos_embed.size(1) | src_len = src_pos_embed.size(1) | ||||
| pos_q = self.cross_pos_q_linear(tgt_pos_embed).view( | |||||
| batch_size, tgt_len, self.num_attention_heads, -1).transpose( | |||||
| 1, 2) * self.pos_scaling | |||||
| pos_k = self.cross_pos_k_linear(src_pos_embed).view( | |||||
| batch_size, src_len, self.num_attention_heads, | |||||
| -1).transpose(1, 2) | |||||
| if not (self.entangle_position_embedding and self.use_ofasys): | |||||
| pos_q = self.cross_pos_q_linear(tgt_pos_embed).view( | |||||
| batch_size, tgt_len, self.num_attention_heads, | |||||
| -1).transpose(1, 2) * self.pos_scaling | |||||
| pos_k = self.cross_pos_k_linear(src_pos_embed).view( | |||||
| batch_size, src_len, self.num_attention_heads, | |||||
| -1).transpose(1, 2) | |||||
| abs_pos_bias = torch.matmul(pos_q, pos_k.transpose(2, 3)) | |||||
| else: | |||||
| abs_pos_bias = torch.zeros( | |||||
| batch_size, | |||||
| self.num_attention_heads, | |||||
| tgt_len, | |||||
| src_len, | |||||
| dtype=tgt_pos_embed.dtype, | |||||
| device=tgt_pos_embed.device) | |||||
| else: | else: | ||||
| src_len = tgt_pos_embed.size(1) | |||||
| pos_q = self.self_pos_q_linear(tgt_pos_embed).view( | |||||
| batch_size, tgt_len, self.num_attention_heads, -1).transpose( | |||||
| 1, 2) * self.pos_scaling | |||||
| pos_k = self.self_pos_k_linear(tgt_pos_embed).view( | |||||
| batch_size, src_len, self.num_attention_heads, | |||||
| -1).transpose(1, 2) | |||||
| abs_pos_bias = torch.matmul(pos_q, pos_k.transpose(2, 3)) | |||||
| # batch_size, seq_length = tgt_pos_embed.size(0), tgt_pos_embed.size(1) | |||||
| if not (self.entangle_position_embedding and self.use_ofasys): | |||||
| pos_q = self.self_pos_q_linear(tgt_pos_embed).view( | |||||
| batch_size, tgt_len, self.num_attention_heads, | |||||
| -1).transpose(1, 2) * self.pos_scaling | |||||
| pos_k = self.self_pos_k_linear(tgt_pos_embed).view( | |||||
| batch_size, tgt_len, self.num_attention_heads, | |||||
| -1).transpose(1, 2) | |||||
| abs_pos_bias = torch.matmul(pos_q, pos_k.transpose(2, 3)) | |||||
| else: | |||||
| abs_pos_bias = torch.zeros( | |||||
| batch_size, | |||||
| self.num_attention_heads, | |||||
| tgt_len, | |||||
| tgt_len, | |||||
| dtype=tgt_pos_embed.dtype, | |||||
| device=tgt_pos_embed.device) | |||||
| return abs_pos_bias | return abs_pos_bias | ||||
| @@ -1809,17 +1869,18 @@ class OFADecoder(OFAPreTrainedModel): | |||||
| past_key_values) > 0 else None | past_key_values) > 0 else None | ||||
| self_attn_bias = self_abs_pos_bias.clone() | self_attn_bias = self_abs_pos_bias.clone() | ||||
| real_idx = 0 if self.share_attn_bias else idx | |||||
| if code_masks is None or not code_masks.any(): | if code_masks is None or not code_masks.any(): | ||||
| self_attn_bias += self.get_rel_pos_bias( | self_attn_bias += self.get_rel_pos_bias( | ||||
| all_prev_output_tokens, idx).unsqueeze(0) | |||||
| all_prev_output_tokens, real_idx).unsqueeze(0) | |||||
| elif code_masks is not None and code_masks.all(): | elif code_masks is not None and code_masks.all(): | ||||
| self_attn_bias += self.get_image_rel_pos_bias( | self_attn_bias += self.get_image_rel_pos_bias( | ||||
| all_prev_output_tokens, idx).unsqueeze(0) | |||||
| all_prev_output_tokens, real_idx).unsqueeze(0) | |||||
| else: | else: | ||||
| self_attn_bias[~code_masks] += self.get_rel_pos_bias( | self_attn_bias[~code_masks] += self.get_rel_pos_bias( | ||||
| all_prev_output_tokens, idx).unsqueeze(0) | |||||
| all_prev_output_tokens, real_idx).unsqueeze(0) | |||||
| self_attn_bias[code_masks] += self.get_image_rel_pos_bias( | self_attn_bias[code_masks] += self.get_image_rel_pos_bias( | ||||
| all_prev_output_tokens, idx).unsqueeze(0) | |||||
| all_prev_output_tokens, real_idx).unsqueeze(0) | |||||
| self_attn_bias = self_attn_bias.reshape( | self_attn_bias = self_attn_bias.reshape( | ||||
| -1, | -1, | ||||
| *self_attn_bias.size()[-2:]) | *self_attn_bias.size()[-2:]) | ||||
| @@ -1892,6 +1953,7 @@ class OFAModel(OFAPreTrainedModel): | |||||
| self.encoder = OFAEncoder(config, shared) | self.encoder = OFAEncoder(config, shared) | ||||
| self.decoder = OFADecoder(config, shared) | self.decoder = OFADecoder(config, shared) | ||||
| self.use_ofasys = config.use_ofasys | |||||
| # Initialize weights and apply final processing | # Initialize weights and apply final processing | ||||
| self.post_init() | self.post_init() | ||||
| @@ -2,6 +2,7 @@ | |||||
| from typing import Optional | from typing import Optional | ||||
| import torch | import torch | ||||
| import torch.nn as nn | |||||
| def expand_mask(mask: torch.Tensor, | def expand_mask(mask: torch.Tensor, | ||||
| @@ -17,3 +18,42 @@ def expand_mask(mask: torch.Tensor, | |||||
| src_len).to(dtype) | src_len).to(dtype) | ||||
| return expanded_mask.masked_fill(expanded_mask.bool(), | return expanded_mask.masked_fill(expanded_mask.bool(), | ||||
| torch.finfo(dtype).min) | torch.finfo(dtype).min) | ||||
| def drop_path(x, drop_prob: float = 0.0, training: bool = False): | |||||
| r""" | |||||
| Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks). | |||||
| Args: | |||||
| x (`nn.Modules`): input nn layers. | |||||
| drop_prob (`float`): drop path ratio. | |||||
| training (`bool`): whether is training or inference. | |||||
| """ | |||||
| if drop_prob == 0.0 or not training: | |||||
| return x | |||||
| keep_prob = 1 - drop_prob | |||||
| shape = (1, x.shape[1], 1) | |||||
| random_tensor = keep_prob + torch.rand( | |||||
| shape, dtype=x.dtype, device=x.device) | |||||
| random_tensor.floor_() # binarize | |||||
| output = x.div(keep_prob) * random_tensor | |||||
| return output | |||||
| class DropPath(nn.Module): | |||||
| r""" | |||||
| Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks). | |||||
| Args: | |||||
| drop_prob: drop path ratio. | |||||
| """ | |||||
| def __init__(self, drop_prob=None): | |||||
| super().__init__() | |||||
| self.drop_prob = drop_prob | |||||
| def forward(self, x): | |||||
| return drop_path(x, self.drop_prob, self.training) | |||||
| def extra_repr(self) -> str: | |||||
| return 'p={}'.format(self.drop_prob) | |||||
| @@ -0,0 +1,155 @@ | |||||
| from collections import OrderedDict | |||||
| import torch | |||||
| import torch.nn.functional as F | |||||
| from fairseq.modules import LayerNorm | |||||
| from torch import nn | |||||
| from .utils.utils import DropPath | |||||
| __all__ = [ | |||||
| 'vit_base', | |||||
| 'vit_large', | |||||
| 'vit_large_336', | |||||
| 'vit_huge', | |||||
| ] | |||||
| class QuickGELU(nn.Module): | |||||
| def forward(self, x: torch.Tensor): | |||||
| return x * torch.sigmoid(1.702 * x) | |||||
| class ResidualAttentionBlock(nn.Module): | |||||
| def __init__(self, | |||||
| d_model: int, | |||||
| n_head: int, | |||||
| attn_mask: torch.Tensor = None, | |||||
| drop_path_rate=0.0): | |||||
| super().__init__() | |||||
| self.attn = nn.MultiheadAttention(d_model, n_head) | |||||
| self.ln_1 = LayerNorm(d_model) | |||||
| self.mlp = nn.Sequential( | |||||
| OrderedDict([ | |||||
| ('c_fc', nn.Linear(d_model, d_model * 4)), | |||||
| ('gelu', QuickGELU()), | |||||
| ('c_proj', nn.Linear(d_model * 4, d_model)), | |||||
| ])) | |||||
| self.ln_2 = LayerNorm(d_model) | |||||
| self.attn_mask = attn_mask | |||||
| self.drop_path = DropPath(drop_path_rate) | |||||
| def attention(self, x: torch.Tensor): | |||||
| self.attn_mask = ( | |||||
| self.attn_mask.to(dtype=x.dtype, device=x.device) | |||||
| if self.attn_mask is not None else None) | |||||
| return self.attn( | |||||
| x, x, x, need_weights=False, attn_mask=self.attn_mask)[0] | |||||
| def forward(self, x: torch.Tensor): | |||||
| x = x + self.drop_path(self.attention(self.ln_1(x))) | |||||
| x = x + self.drop_path(self.mlp(self.ln_2(x))) | |||||
| return x | |||||
| class Transformer(nn.Module): | |||||
| def __init__( | |||||
| self, | |||||
| width: int, | |||||
| layers: int, | |||||
| heads: int, | |||||
| attn_mask: torch.Tensor = None, | |||||
| drop_path_rate: float = 0.0, | |||||
| ): | |||||
| super().__init__() | |||||
| self.width = width | |||||
| self.layers = layers | |||||
| self.resblocks = nn.Sequential(*[ | |||||
| ResidualAttentionBlock(width, heads, attn_mask, drop_path_rate) | |||||
| for _ in range(layers) | |||||
| ]) | |||||
| def forward(self, x: torch.Tensor): | |||||
| return self.resblocks(x) | |||||
| class VisionTransformer(nn.Module): | |||||
| def __init__( | |||||
| self, | |||||
| input_resolution: int, | |||||
| patch_size: int, | |||||
| width: int, | |||||
| layers: int, | |||||
| heads: int, | |||||
| drop_path_rate: float = 0.0, | |||||
| ): | |||||
| super().__init__() | |||||
| self.input_resolution = input_resolution | |||||
| self.patch_size = patch_size | |||||
| self.conv1 = nn.Conv2d( | |||||
| in_channels=3, | |||||
| out_channels=width, | |||||
| kernel_size=patch_size, | |||||
| stride=patch_size, | |||||
| bias=False, | |||||
| ) | |||||
| scale = width**-0.5 | |||||
| self.width = width | |||||
| self.positional_embedding = nn.Parameter(scale * torch.randn( | |||||
| (input_resolution // patch_size)**2 + 1, width)) | |||||
| self.ln_pre = LayerNorm(width) | |||||
| self.transformer = Transformer( | |||||
| width, layers, heads, drop_path_rate=drop_path_rate) | |||||
| def forward(self, x: torch.Tensor): | |||||
| resolution = x.shape[-2] | |||||
| height, width = x.shape[-2] // self.patch_size, x.shape[ | |||||
| -1] // self.patch_size | |||||
| x = self.conv1(x) # shape = [*, width, grid, grid] | |||||
| x = x.reshape(x.shape[0], x.shape[1], | |||||
| -1) # shape = [*, width, grid ** 2] | |||||
| x = x.permute(0, 2, 1) # shape = [*, grid ** 2, width] | |||||
| if resolution != self.input_resolution: | |||||
| old_pe = self.positional_embedding[1:] | |||||
| patch_num = self.input_resolution // self.patch_size | |||||
| old_pe = old_pe.reshape(1, patch_num, patch_num, | |||||
| -1).permute(0, 3, 1, 2) | |||||
| new_pe = F.interpolate( | |||||
| old_pe, size=(height, width), mode='bilinear') | |||||
| new_pe = new_pe.permute(0, 2, 3, 1).reshape(height * width, -1) | |||||
| x = x + new_pe.to(x.dtype) | |||||
| else: | |||||
| x = x + self.positional_embedding[1:].to(x.dtype) | |||||
| x = self.ln_pre(x) | |||||
| x = x.permute(1, 0, 2) # NLD -> LND | |||||
| x = self.transformer(x) | |||||
| x = x.permute(1, 0, 2) # LND -> NLD | |||||
| bz, seq, hidden = x.shape | |||||
| x = x.transpose(1, 2).reshape(bz, hidden, height, width) | |||||
| return x | |||||
| def vit_base(drop_path_rate: float = 0.0): | |||||
| return VisionTransformer(224, 16, 768, 9, 12, drop_path_rate) | |||||
| def vit_large(drop_path_rate: float = 0.0): | |||||
| return VisionTransformer(224, 14, 1024, 18, 16, drop_path_rate) | |||||
| def vit_large_336(drop_path_rate: float = 0.0): | |||||
| return VisionTransformer(336, 14, 1024, 18, 16, drop_path_rate) | |||||
| def vit_huge(drop_path_rate: float = 0.0): | |||||
| return VisionTransformer(224, 14, 1280, 24, 16, drop_path_rate) | |||||
| @@ -1,6 +1,7 @@ | |||||
| # Copyright (c) Alibaba, Inc. and its affiliates. | # Copyright (c) Alibaba, Inc. and its affiliates. | ||||
| import math | import math | ||||
| import os | import os | ||||
| import re | |||||
| import string | import string | ||||
| from functools import partial | from functools import partial | ||||
| from os import path as osp | from os import path as osp | ||||
| @@ -53,8 +54,11 @@ class OfaForAllTasks(TorchModel): | |||||
| raise NotImplementedError | raise NotImplementedError | ||||
| # there is some diff between here and our ofa code, | # there is some diff between here and our ofa code, | ||||
| # there will be no need to use param: use_bpe | # there will be no need to use param: use_bpe | ||||
| self.tokenizer.add_tokens(['<code_{}>'.format(i) for i in range(8192)]) | |||||
| self.tokenizer.add_tokens(['<bin_{}>'.format(i) for i in range(1000)]) | |||||
| if not model.use_ofasys: | |||||
| self.tokenizer.add_tokens( | |||||
| ['<code_{}>'.format(i) for i in range(8192)]) | |||||
| self.tokenizer.add_tokens( | |||||
| ['<bin_{}>'.format(i) for i in range(1000)]) | |||||
| self.cfg.update({'num_bins': 1000, 'num_codes': 8192}) | self.cfg.update({'num_bins': 1000, 'num_codes': 8192}) | ||||
| self.batch_size = self.cfg.model.get('batch_size', 1) | self.batch_size = self.cfg.model.get('batch_size', 1) | ||||
| self.patch_image_size = self.cfg.model.get('patch_image_size', 480) | self.patch_image_size = self.cfg.model.get('patch_image_size', 480) | ||||
| @@ -107,6 +111,8 @@ class OfaForAllTasks(TorchModel): | |||||
| Tasks.text_classification: inference_d[self.gen_type], | Tasks.text_classification: inference_d[self.gen_type], | ||||
| Tasks.image_classification: inference_d[self.gen_type], | Tasks.image_classification: inference_d[self.gen_type], | ||||
| } | } | ||||
| pattern_str = '((?<=[^ a-zA-Z0-9.,:!?]) +| +(?=[^ a-zA-Z0-9.,:!?]))' | |||||
| self.pattern = re.compile(pattern_str) | |||||
| def forward(self, input: Dict[str, Any]) -> Dict[str, Any]: | def forward(self, input: Dict[str, Any]) -> Dict[str, Any]: | ||||
| input = move_to_device(input, self.model.device) | input = move_to_device(input, self.model.device) | ||||
| @@ -132,8 +138,18 @@ class OfaForAllTasks(TorchModel): | |||||
| caption = input[OutputKeys.CAPTION] | caption = input[OutputKeys.CAPTION] | ||||
| result_l = list() | result_l = list() | ||||
| for cap in caption: | for cap in caption: | ||||
| result_l.append(cap.translate(self.transtab).strip()) | |||||
| if self.language == 'en': | |||||
| result_l.append(cap.translate(self.transtab).strip()) | |||||
| else: | |||||
| result_l.append(cap) | |||||
| input[OutputKeys.CAPTION] = result_l | input[OutputKeys.CAPTION] = result_l | ||||
| if self.gen_type == 'generation' and self.language in [ | |||||
| 'zh', 'cn' | |||||
| ] and self.cfg.task != Tasks.visual_grounding: | |||||
| ret_l = list() | |||||
| for text in input[OFA_TASK_KEY_MAPPING[self.cfg.task]]: | |||||
| ret_l.append(self.detokenizer(text)) | |||||
| input[OFA_TASK_KEY_MAPPING[self.cfg.task]] = ret_l | |||||
| return input | return input | ||||
| def _text_gen_inference(self, input): | def _text_gen_inference(self, input): | ||||
| @@ -311,3 +327,6 @@ class OfaForAllTasks(TorchModel): | |||||
| save_function=partial(save_function, with_meta=False), | save_function=partial(save_function, with_meta=False), | ||||
| config=config, | config=config, | ||||
| **kwargs) | **kwargs) | ||||
| def detokenizer(self, text): | |||||
| return self.pattern.sub('', text) | |||||
| @@ -1,3 +1,6 @@ | |||||
| # The Uni-fold implementation is also open-sourced by the authors under Apache-2.0 license, | |||||
| # and is publicly available at https://github.com/dptech-corp/Uni-Fold. | |||||
| import copy | import copy | ||||
| import logging | import logging | ||||
| import os | import os | ||||
| @@ -1,3 +1,6 @@ | |||||
| # The Uni-fold implementation is also open-sourced by the authors under Apache-2.0 license, | |||||
| # and is publicly available at https://github.com/dptech-corp/Uni-Fold. | |||||
| import argparse | import argparse | ||||
| import os | import os | ||||
| from typing import Any | from typing import Any | ||||
| @@ -0,0 +1,3 @@ | |||||
| # The Uni-fold implementation is also open-sourced by the authors under Apache-2.0 license, | |||||
| # and is publicly available at https://github.com/dptech-corp/Uni-Fold. | |||||
| """Unifold Modules.""" | |||||
| @@ -274,6 +274,8 @@ class MsDataset: | |||||
| try: | try: | ||||
| api.on_dataset_download( | api.on_dataset_download( | ||||
| dataset_name=download_dataset, namespace=namespace) | dataset_name=download_dataset, namespace=namespace) | ||||
| api.dataset_download_uv( | |||||
| dataset_name=download_dataset, namespace=namespace) | |||||
| except Exception as e: | except Exception as e: | ||||
| logger.error(e) | logger.error(e) | ||||
| @@ -69,11 +69,23 @@ TASK_OUTPUTS = { | |||||
| # face 2d keypoint result for single sample | # face 2d keypoint result for single sample | ||||
| # { | # { | ||||
| # "keypoints": [ | # "keypoints": [ | ||||
| # [x1, y1]*106 | |||||
| # [[x, y]*106], | |||||
| # [[x, y]*106], | |||||
| # [[x, y]*106], | |||||
| # ], | # ], | ||||
| # "poses": [pitch, roll, yaw] | |||||
| # "poses": [ | |||||
| # [pitch, roll, yaw], | |||||
| # [pitch, roll, yaw], | |||||
| # [pitch, roll, yaw], | |||||
| # ], | |||||
| # "boxes": [ | |||||
| # [x1, y1, x2, y2], | |||||
| # [x1, y1, x2, y2], | |||||
| # [x1, y1, x2, y2], | |||||
| # ] | |||||
| # } | # } | ||||
| Tasks.face_2d_keypoints: [OutputKeys.KEYPOINTS, OutputKeys.POSES], | |||||
| Tasks.face_2d_keypoints: | |||||
| [OutputKeys.KEYPOINTS, OutputKeys.POSES, OutputKeys.BOXES], | |||||
| # face detection result for single sample | # face detection result for single sample | ||||
| # { | # { | ||||
| @@ -479,17 +491,8 @@ TASK_OUTPUTS = { | |||||
| # word segmentation result for single sample | # word segmentation result for single sample | ||||
| # { | # { | ||||
| # "output": "今天 天气 不错 , 适合 出去 游玩" | # "output": "今天 天气 不错 , 适合 出去 游玩" | ||||
| # "labels": [ | |||||
| # {'word': '今天', 'label': 'PROPN'}, | |||||
| # {'word': '天气', 'label': 'PROPN'}, | |||||
| # {'word': '不错', 'label': 'VERB'}, | |||||
| # {'word': ',', 'label': 'NUM'}, | |||||
| # {'word': '适合', 'label': 'NOUN'}, | |||||
| # {'word': '出去', 'label': 'PART'}, | |||||
| # {'word': '游玩', 'label': 'ADV'}, | |||||
| # ] | |||||
| # } | # } | ||||
| Tasks.word_segmentation: [OutputKeys.OUTPUT, OutputKeys.LABELS], | |||||
| Tasks.word_segmentation: [OutputKeys.OUTPUT], | |||||
| # TODO @wenmeng.zwm support list of result check | # TODO @wenmeng.zwm support list of result check | ||||
| # named entity recognition result for single sample | # named entity recognition result for single sample | ||||
| @@ -699,8 +702,9 @@ TASK_OUTPUTS = { | |||||
| # "text_embedding": np.array with shape [1, D], | # "text_embedding": np.array with shape [1, D], | ||||
| # "caption": "this is an image caption text." | # "caption": "this is an image caption text." | ||||
| # } | # } | ||||
| Tasks.generative_multi_modal_embedding: | |||||
| [OutputKeys.IMG_EMBEDDING, OutputKeys.TEXT_EMBEDDING, OutputKeys.CAPTION], | |||||
| Tasks.generative_multi_modal_embedding: [ | |||||
| OutputKeys.IMG_EMBEDDING, OutputKeys.TEXT_EMBEDDING, OutputKeys.CAPTION | |||||
| ], | |||||
| # multi-modal similarity result for single sample | # multi-modal similarity result for single sample | ||||
| # { | # { | ||||
| @@ -10,6 +10,7 @@ from typing import Any, Dict, Generator, List, Mapping, Union | |||||
| import numpy as np | import numpy as np | ||||
| from modelscope.hub.utils.utils import create_library_statistics | |||||
| from modelscope.models.base import Model | from modelscope.models.base import Model | ||||
| from modelscope.msdatasets import MsDataset | from modelscope.msdatasets import MsDataset | ||||
| from modelscope.outputs import TASK_OUTPUTS | from modelscope.outputs import TASK_OUTPUTS | ||||
| @@ -151,7 +152,9 @@ class Pipeline(ABC): | |||||
| **kwargs) -> Union[Dict[str, Any], Generator]: | **kwargs) -> Union[Dict[str, Any], Generator]: | ||||
| # model provider should leave it as it is | # model provider should leave it as it is | ||||
| # modelscope library developer will handle this function | # modelscope library developer will handle this function | ||||
| for single_model in self.models: | |||||
| if hasattr(single_model, 'name'): | |||||
| create_library_statistics('pipeline', single_model.name, None) | |||||
| # place model to cpu or gpu | # place model to cpu or gpu | ||||
| if (self.model or (self.has_multiple_models and self.models[0])): | if (self.model or (self.has_multiple_models and self.models[0])): | ||||
| if not self._model_prepare: | if not self._model_prepare: | ||||
| @@ -93,9 +93,8 @@ DEFAULT_MODEL_FOR_PIPELINE = { | |||||
| 'damo/cv_resnet50_live-category'), | 'damo/cv_resnet50_live-category'), | ||||
| Tasks.video_category: (Pipelines.video_category, | Tasks.video_category: (Pipelines.video_category, | ||||
| 'damo/cv_resnet50_video-category'), | 'damo/cv_resnet50_video-category'), | ||||
| Tasks.multi_modal_embedding: | |||||
| (Pipelines.multi_modal_embedding, | |||||
| 'damo/multi-modal_clip-vit-large-patch14_zh'), | |||||
| Tasks.multi_modal_embedding: (Pipelines.multi_modal_embedding, | |||||
| 'damo/multi-modal_clip-vit-base-patch16_zh'), | |||||
| Tasks.generative_multi_modal_embedding: | Tasks.generative_multi_modal_embedding: | ||||
| (Pipelines.generative_multi_modal_embedding, | (Pipelines.generative_multi_modal_embedding, | ||||
| 'damo/multi-modal_gemm-vit-large-patch14_generative-multi-modal-embedding' | 'damo/multi-modal_gemm-vit-large-patch14_generative-multi-modal-embedding' | ||||
| @@ -132,8 +132,8 @@ class Body3DKeypointsPipeline(Pipeline): | |||||
| device='gpu' if torch.cuda.is_available() else 'cpu') | device='gpu' if torch.cuda.is_available() else 'cpu') | ||||
| def preprocess(self, input: Input) -> Dict[str, Any]: | def preprocess(self, input: Input) -> Dict[str, Any]: | ||||
| video_url = input | |||||
| video_frames = self.read_video_frames(video_url) | |||||
| self.video_url = input | |||||
| video_frames = self.read_video_frames(self.video_url) | |||||
| if 0 == len(video_frames): | if 0 == len(video_frames): | ||||
| res = {'success': False, 'msg': 'get video frame failed.'} | res = {'success': False, 'msg': 'get video frame failed.'} | ||||
| return res | return res | ||||
| @@ -198,7 +198,7 @@ class Body3DKeypointsPipeline(Pipeline): | |||||
| } | } | ||||
| if not input['success']: | if not input['success']: | ||||
| pass | |||||
| res[OutputKeys.OUTPUT_VIDEO] = self.video_url | |||||
| else: | else: | ||||
| poses = input[KeypointsTypes.POSES_CAMERA] | poses = input[KeypointsTypes.POSES_CAMERA] | ||||
| pred_3d_pose = poses.data.cpu().numpy()[ | pred_3d_pose = poses.data.cpu().numpy()[ | ||||
| @@ -1,12 +1,22 @@ | |||||
| # Copyright (c) Alibaba, Inc. and its affiliates. | # Copyright (c) Alibaba, Inc. and its affiliates. | ||||
| import copy | |||||
| import math | |||||
| from typing import Any | from typing import Any | ||||
| import cv2 | |||||
| import numpy as np | |||||
| from modelscope.metainfo import Pipelines | from modelscope.metainfo import Pipelines | ||||
| from modelscope.outputs import OutputKeys | from modelscope.outputs import OutputKeys | ||||
| from modelscope.pipelines import pipeline | |||||
| from modelscope.pipelines.builder import PIPELINES | from modelscope.pipelines.builder import PIPELINES | ||||
| from modelscope.preprocessors import LoadImage | |||||
| from modelscope.utils.constant import ModelFile, Tasks | from modelscope.utils.constant import ModelFile, Tasks | ||||
| from modelscope.utils.logger import get_logger | |||||
| from .base import EasyCVPipeline | from .base import EasyCVPipeline | ||||
| logger = get_logger() | |||||
| @PIPELINES.register_module( | @PIPELINES.register_module( | ||||
| Tasks.face_2d_keypoints, module_name=Pipelines.face_2d_keypoints) | Tasks.face_2d_keypoints, module_name=Pipelines.face_2d_keypoints) | ||||
| @@ -29,18 +39,206 @@ class Face2DKeypointsPipeline(EasyCVPipeline): | |||||
| *args, | *args, | ||||
| **kwargs) | **kwargs) | ||||
| # face detect pipeline | |||||
| det_model_id = 'damo/cv_resnet_facedetection_scrfd10gkps' | |||||
| self.face_detection = pipeline( | |||||
| Tasks.face_detection, model=det_model_id) | |||||
| def show_result(self, img, points, scale=2, save_path=None): | def show_result(self, img, points, scale=2, save_path=None): | ||||
| return self.predict_op.show_result(img, points, scale, save_path) | return self.predict_op.show_result(img, points, scale, save_path) | ||||
| def _choose_face(self, det_result, min_face=10): | |||||
| """ | |||||
| choose face with maximum area | |||||
| Args: | |||||
| det_result: output of face detection pipeline | |||||
| min_face: minimum size of valid face w/h | |||||
| """ | |||||
| bboxes = np.array(det_result[OutputKeys.BOXES]) | |||||
| landmarks = np.array(det_result[OutputKeys.KEYPOINTS]) | |||||
| if bboxes.shape[0] == 0: | |||||
| logger.warn('No face detected!') | |||||
| return None | |||||
| # face idx with enough size | |||||
| face_idx = [] | |||||
| for i in range(bboxes.shape[0]): | |||||
| box = bboxes[i] | |||||
| if (box[2] - box[0]) >= min_face and (box[3] - box[1]) >= min_face: | |||||
| face_idx += [i] | |||||
| if len(face_idx) == 0: | |||||
| logger.warn( | |||||
| f'Face size not enough, less than {min_face}x{min_face}!') | |||||
| return None | |||||
| bboxes = bboxes[face_idx] | |||||
| landmarks = landmarks[face_idx] | |||||
| return bboxes, landmarks | |||||
| def expend_box(self, box, w, h, scalex=0.3, scaley=0.5): | |||||
| x1 = box[0] | |||||
| y1 = box[1] | |||||
| wb = box[2] - x1 | |||||
| hb = box[3] - y1 | |||||
| deltax = int(wb * scalex) | |||||
| deltay1 = int(hb * scaley) | |||||
| deltay2 = int(hb * scalex) | |||||
| x1 = x1 - deltax | |||||
| y1 = y1 - deltay1 | |||||
| if x1 < 0: | |||||
| deltax = deltax + x1 | |||||
| x1 = 0 | |||||
| if y1 < 0: | |||||
| deltay1 = deltay1 + y1 | |||||
| y1 = 0 | |||||
| x2 = x1 + wb + 2 * deltax | |||||
| y2 = y1 + hb + deltay1 + deltay2 | |||||
| x2 = np.clip(x2, 0, w - 1) | |||||
| y2 = np.clip(y2, 0, h - 1) | |||||
| return [x1, y1, x2, y2] | |||||
| def rotate_point(self, angle, center, landmark): | |||||
| rad = angle * np.pi / 180.0 | |||||
| alpha = np.cos(rad) | |||||
| beta = np.sin(rad) | |||||
| M = np.zeros((2, 3), dtype=np.float32) | |||||
| M[0, 0] = alpha | |||||
| M[0, 1] = beta | |||||
| M[0, 2] = (1 - alpha) * center[0] - beta * center[1] | |||||
| M[1, 0] = -beta | |||||
| M[1, 1] = alpha | |||||
| M[1, 2] = beta * center[0] + (1 - alpha) * center[1] | |||||
| landmark_ = np.asarray([(M[0, 0] * x + M[0, 1] * y + M[0, 2], | |||||
| M[1, 0] * x + M[1, 1] * y + M[1, 2]) | |||||
| for (x, y) in landmark]) | |||||
| return M, landmark_ | |||||
| def rotate_crop_img(self, img, pts, M): | |||||
| imgT = cv2.warpAffine(img, M, (int(img.shape[1]), int(img.shape[0]))) | |||||
| x1 = pts[5][0] | |||||
| x2 = pts[5][0] | |||||
| y1 = pts[5][1] | |||||
| y2 = pts[5][1] | |||||
| for i in range(0, 9): | |||||
| x1 = min(x1, pts[i][0]) | |||||
| x2 = max(x2, pts[i][0]) | |||||
| y1 = min(y1, pts[i][1]) | |||||
| y2 = max(y2, pts[i][1]) | |||||
| height, width, _ = imgT.shape | |||||
| x1 = min(max(0, int(x1)), width) | |||||
| y1 = min(max(0, int(y1)), height) | |||||
| x2 = min(max(0, int(x2)), width) | |||||
| y2 = min(max(0, int(y2)), height) | |||||
| sub_imgT = imgT[y1:y2, x1:x2] | |||||
| return sub_imgT, imgT, [x1, y1, x2, y2] | |||||
| def crop_img(self, imgT, pts): | |||||
| enlarge_ratio = 1.1 | |||||
| x1 = np.min(pts[:, 0]) | |||||
| x2 = np.max(pts[:, 0]) | |||||
| y1 = np.min(pts[:, 1]) | |||||
| y2 = np.max(pts[:, 1]) | |||||
| w = x2 - x1 + 1 | |||||
| h = y2 - y1 + 1 | |||||
| x1 = int(x1 - (enlarge_ratio - 1.0) / 2.0 * w) | |||||
| y1 = int(y1 - (enlarge_ratio - 1.0) / 2.0 * h) | |||||
| x1 = max(0, x1) | |||||
| y1 = max(0, y1) | |||||
| new_w = int(enlarge_ratio * w) | |||||
| new_h = int(enlarge_ratio * h) | |||||
| new_x1 = x1 | |||||
| new_y1 = y1 | |||||
| new_x2 = new_x1 + new_w | |||||
| new_y2 = new_y1 + new_h | |||||
| height, width, _ = imgT.shape | |||||
| new_x1 = min(max(0, new_x1), width) | |||||
| new_y1 = min(max(0, new_y1), height) | |||||
| new_x2 = max(min(width, new_x2), 0) | |||||
| new_y2 = max(min(height, new_y2), 0) | |||||
| sub_imgT = imgT[new_y1:new_y2, new_x1:new_x2] | |||||
| return sub_imgT, [new_x1, new_y1, new_x2, new_y2] | |||||
| def __call__(self, inputs) -> Any: | def __call__(self, inputs) -> Any: | ||||
| outputs = self.predict_op(inputs) | |||||
| img = LoadImage.convert_to_ndarray(inputs) | |||||
| h, w, c = img.shape | |||||
| img_rgb = copy.deepcopy(img) | |||||
| img_rgb = img_rgb[:, :, ::-1] | |||||
| det_result = self.face_detection(img_rgb) | |||||
| bboxes = np.array(det_result[OutputKeys.BOXES]) | |||||
| if bboxes.shape[0] == 0: | |||||
| logger.warn('No face detected!') | |||||
| results = { | |||||
| OutputKeys.KEYPOINTS: [], | |||||
| OutputKeys.POSES: [], | |||||
| OutputKeys.BOXES: [] | |||||
| } | |||||
| return results | |||||
| boxes, keypoints = self._choose_face(det_result) | |||||
| output_boxes = [] | |||||
| output_keypoints = [] | |||||
| output_poses = [] | |||||
| for index, box_ori in enumerate(boxes): | |||||
| box = self.expend_box(box_ori, w, h, scalex=0.1, scaley=0.1) | |||||
| y0 = int(box[1]) | |||||
| y1 = int(box[3]) | |||||
| x0 = int(box[0]) | |||||
| x1 = int(box[2]) | |||||
| sub_img = img[y0:y1, x0:x1] | |||||
| keypoint = keypoints[index] | |||||
| pts = [[keypoint[0], keypoint[1]], [keypoint[2], keypoint[3]], | |||||
| [keypoint[4], keypoint[5]], [keypoint[6], keypoint[7]], | |||||
| [keypoint[8], keypoint[9]], [box[0], box[1]], | |||||
| [box[2], box[1]], [box[0], box[3]], [box[2], box[3]]] | |||||
| # radian | |||||
| angle = math.atan2((pts[1][1] - pts[0][1]), | |||||
| (pts[1][0] - pts[0][0])) | |||||
| # angle | |||||
| theta = angle * (180 / np.pi) | |||||
| center = [w // 2, h // 2] | |||||
| cx, cy = center | |||||
| M, landmark_ = self.rotate_point(theta, (cx, cy), pts) | |||||
| sub_imgT, imgT, bbox = self.rotate_crop_img(img, landmark_, M) | |||||
| outputs = self.predict_op([sub_imgT])[0] | |||||
| tmp_keypoints = outputs['point'] | |||||
| for idx in range(0, len(tmp_keypoints)): | |||||
| tmp_keypoints[idx][0] += bbox[0] | |||||
| tmp_keypoints[idx][1] += bbox[1] | |||||
| for idx in range(0, 6): | |||||
| sub_img, bbox = self.crop_img(imgT, tmp_keypoints) | |||||
| outputs = self.predict_op([sub_img])[0] | |||||
| tmp_keypoints = outputs['point'] | |||||
| for idx in range(0, len(tmp_keypoints)): | |||||
| tmp_keypoints[idx][0] += bbox[0] | |||||
| tmp_keypoints[idx][1] += bbox[1] | |||||
| M2, tmp_keypoints = self.rotate_point(-theta, (cx, cy), | |||||
| tmp_keypoints) | |||||
| results = [{ | |||||
| OutputKeys.KEYPOINTS: output['point'], | |||||
| OutputKeys.POSES: output['pose'] | |||||
| } for output in outputs] | |||||
| output_keypoints.append(np.array(tmp_keypoints)) | |||||
| output_poses.append(np.array(outputs['pose'])) | |||||
| output_boxes.append(np.array(box_ori)) | |||||
| if self._is_single_inputs(inputs): | |||||
| results = results[0] | |||||
| results = { | |||||
| OutputKeys.KEYPOINTS: output_keypoints, | |||||
| OutputKeys.POSES: output_poses, | |||||
| OutputKeys.BOXES: output_boxes | |||||
| } | |||||
| return results | return results | ||||
| @@ -109,13 +109,13 @@ class TokenClassificationPipeline(Pipeline): | |||||
| chunk['span'] = text[chunk['start']:chunk['end']] | chunk['span'] = text[chunk['start']:chunk['end']] | ||||
| chunks.append(chunk) | chunks.append(chunk) | ||||
| # for cws output | |||||
| # for cws outputs | |||||
| if len(chunks) > 0 and chunks[0]['type'] == 'cws': | if len(chunks) > 0 and chunks[0]['type'] == 'cws': | ||||
| spans = [ | spans = [ | ||||
| chunk['span'] for chunk in chunks if chunk['span'].strip() | chunk['span'] for chunk in chunks if chunk['span'].strip() | ||||
| ] | ] | ||||
| seg_result = ' '.join(spans) | seg_result = ' '.join(spans) | ||||
| outputs = {OutputKeys.OUTPUT: seg_result, OutputKeys.LABELS: []} | |||||
| outputs = {OutputKeys.OUTPUT: seg_result} | |||||
| # for ner outputs | # for ner outputs | ||||
| else: | else: | ||||
| @@ -115,15 +115,15 @@ class WordSegmentationPipeline(Pipeline): | |||||
| chunk['span'] = text[chunk['start']:chunk['end']] | chunk['span'] = text[chunk['start']:chunk['end']] | ||||
| chunks.append(chunk) | chunks.append(chunk) | ||||
| # for cws output | |||||
| # for cws outputs | |||||
| if len(chunks) > 0 and chunks[0]['type'] == 'cws': | if len(chunks) > 0 and chunks[0]['type'] == 'cws': | ||||
| spans = [ | spans = [ | ||||
| chunk['span'] for chunk in chunks if chunk['span'].strip() | chunk['span'] for chunk in chunks if chunk['span'].strip() | ||||
| ] | ] | ||||
| seg_result = ' '.join(spans) | seg_result = ' '.join(spans) | ||||
| outputs = {OutputKeys.OUTPUT: seg_result, OutputKeys.LABELS: []} | |||||
| outputs = {OutputKeys.OUTPUT: seg_result} | |||||
| # for ner outpus | |||||
| # for ner output | |||||
| else: | else: | ||||
| outputs = {OutputKeys.OUTPUT: chunks} | outputs = {OutputKeys.OUTPUT: chunks} | ||||
| return outputs | return outputs | ||||
| @@ -77,7 +77,7 @@ class OfaPreprocessor(Preprocessor): | |||||
| data[key] = item | data[key] = item | ||||
| return data | return data | ||||
| def _ofa_input_compatibility_conversion(self, data): | |||||
| def _ofa_input_compatibility_conversion(self, data): # fake | |||||
| if 'image' in data and self.cfg.model.get('type', None) == 'ofa': | if 'image' in data and self.cfg.model.get('type', None) == 'ofa': | ||||
| if isinstance(data['image'], str): | if isinstance(data['image'], str): | ||||
| image = load_image(data['image']) | image = load_image(data['image']) | ||||
| @@ -34,6 +34,7 @@ class NLPBasePreprocessor(Preprocessor, ABC): | |||||
| label=None, | label=None, | ||||
| label2id=None, | label2id=None, | ||||
| mode=ModeKeys.INFERENCE, | mode=ModeKeys.INFERENCE, | ||||
| use_fast=None, | |||||
| **kwargs): | **kwargs): | ||||
| """The NLP preprocessor base class. | """The NLP preprocessor base class. | ||||
| @@ -45,14 +46,18 @@ class NLPBasePreprocessor(Preprocessor, ABC): | |||||
| label2id: An optional label2id mapping, the class will try to call utils.parse_label_mapping | label2id: An optional label2id mapping, the class will try to call utils.parse_label_mapping | ||||
| if this mapping is not supplied. | if this mapping is not supplied. | ||||
| mode: Run this preprocessor in either 'train'/'eval'/'inference' mode | mode: Run this preprocessor in either 'train'/'eval'/'inference' mode | ||||
| use_fast: use the fast version of tokenizer | |||||
| """ | """ | ||||
| self.model_dir = model_dir | self.model_dir = model_dir | ||||
| self.first_sequence = first_sequence | self.first_sequence = first_sequence | ||||
| self.second_sequence = second_sequence | self.second_sequence = second_sequence | ||||
| self.label = label | self.label = label | ||||
| self.use_fast = kwargs.pop('use_fast', None) | |||||
| if self.use_fast is None and os.path.isfile( | |||||
| self.use_fast = use_fast | |||||
| if self.use_fast is None and model_dir is None: | |||||
| self.use_fast = False | |||||
| elif self.use_fast is None and os.path.isfile( | |||||
| os.path.join(model_dir, 'tokenizer_config.json')): | os.path.join(model_dir, 'tokenizer_config.json')): | ||||
| with open(os.path.join(model_dir, 'tokenizer_config.json'), | with open(os.path.join(model_dir, 'tokenizer_config.json'), | ||||
| 'r') as f: | 'r') as f: | ||||
| @@ -61,8 +66,8 @@ class NLPBasePreprocessor(Preprocessor, ABC): | |||||
| self.use_fast = False if self.use_fast is None else self.use_fast | self.use_fast = False if self.use_fast is None else self.use_fast | ||||
| self.label2id = label2id | self.label2id = label2id | ||||
| if self.label2id is None: | |||||
| self.label2id = parse_label_mapping(self.model_dir) | |||||
| if self.label2id is None and model_dir is not None: | |||||
| self.label2id = parse_label_mapping(model_dir) | |||||
| super().__init__(mode, **kwargs) | super().__init__(mode, **kwargs) | ||||
| @property | @property | ||||
| @@ -106,6 +111,7 @@ class NLPTokenizerPreprocessorBase(NLPBasePreprocessor): | |||||
| label: str = 'label', | label: str = 'label', | ||||
| label2id: dict = None, | label2id: dict = None, | ||||
| mode: str = ModeKeys.INFERENCE, | mode: str = ModeKeys.INFERENCE, | ||||
| use_fast: bool = None, | |||||
| **kwargs): | **kwargs): | ||||
| """The NLP tokenizer preprocessor base class. | """The NLP tokenizer preprocessor base class. | ||||
| @@ -122,11 +128,12 @@ class NLPTokenizerPreprocessorBase(NLPBasePreprocessor): | |||||
| - config.json label2id/id2label | - config.json label2id/id2label | ||||
| - label_mapping.json | - label_mapping.json | ||||
| mode: Run this preprocessor in either 'train'/'eval'/'inference' mode, the behavior may be different. | mode: Run this preprocessor in either 'train'/'eval'/'inference' mode, the behavior may be different. | ||||
| use_fast: use the fast version of tokenizer | |||||
| kwargs: These kwargs will be directly fed into the tokenizer. | kwargs: These kwargs will be directly fed into the tokenizer. | ||||
| """ | """ | ||||
| super().__init__(model_dir, first_sequence, second_sequence, label, | super().__init__(model_dir, first_sequence, second_sequence, label, | ||||
| label2id, mode) | |||||
| label2id, mode, use_fast, **kwargs) | |||||
| self.model_dir = model_dir | self.model_dir = model_dir | ||||
| self.tokenize_kwargs = kwargs | self.tokenize_kwargs = kwargs | ||||
| self.tokenizer = self.build_tokenizer(model_dir) | self.tokenizer = self.build_tokenizer(model_dir) | ||||
| @@ -2,6 +2,7 @@ | |||||
| from typing import Any, Dict, Tuple, Union | from typing import Any, Dict, Tuple, Union | ||||
| import numpy as np | |||||
| import torch | import torch | ||||
| from modelscope.metainfo import Preprocessors | from modelscope.metainfo import Preprocessors | ||||
| @@ -20,9 +21,7 @@ class WordSegmentationBlankSetToLabelPreprocessor(NLPBasePreprocessor): | |||||
| """ | """ | ||||
| def __init__(self, **kwargs): | def __init__(self, **kwargs): | ||||
| super().__init__(**kwargs) | |||||
| self.first_sequence: str = kwargs.pop('first_sequence', | |||||
| 'first_sequence') | |||||
| self.first_sequence: str = kwargs.pop('first_sequence', 'tokens') | |||||
| self.label = kwargs.pop('label', OutputKeys.LABELS) | self.label = kwargs.pop('label', OutputKeys.LABELS) | ||||
| def __call__(self, data: str) -> Union[Dict[str, Any], Tuple]: | def __call__(self, data: str) -> Union[Dict[str, Any], Tuple]: | ||||
| @@ -80,10 +79,9 @@ class TokenClassificationPreprocessor(NLPTokenizerPreprocessorBase): | |||||
| 'is_split_into_words', False) | 'is_split_into_words', False) | ||||
| if 'label2id' in kwargs: | if 'label2id' in kwargs: | ||||
| kwargs.pop('label2id') | kwargs.pop('label2id') | ||||
| self.tokenize_kwargs = kwargs | |||||
| @type_assert(object, str) | |||||
| def __call__(self, data: str) -> Dict[str, Any]: | |||||
| @type_assert(object, (str, dict)) | |||||
| def __call__(self, data: Union[dict, str]) -> Dict[str, Any]: | |||||
| """process the raw input data | """process the raw input data | ||||
| Args: | Args: | ||||
| @@ -99,18 +97,24 @@ class TokenClassificationPreprocessor(NLPTokenizerPreprocessorBase): | |||||
| text = None | text = None | ||||
| labels_list = None | labels_list = None | ||||
| if isinstance(data, str): | if isinstance(data, str): | ||||
| # for inference inputs without label | |||||
| text = data | text = data | ||||
| self.tokenize_kwargs['add_special_tokens'] = False | |||||
| elif isinstance(data, dict): | elif isinstance(data, dict): | ||||
| # for finetune inputs with label | |||||
| text = data.get(self.first_sequence) | text = data.get(self.first_sequence) | ||||
| labels_list = data.get(self.label) | labels_list = data.get(self.label) | ||||
| if isinstance(text, list): | |||||
| self.tokenize_kwargs['is_split_into_words'] = True | |||||
| input_ids = [] | input_ids = [] | ||||
| label_mask = [] | label_mask = [] | ||||
| offset_mapping = [] | offset_mapping = [] | ||||
| if self.is_split_into_words: | |||||
| for offset, token in enumerate(list(data)): | |||||
| subtoken_ids = self.tokenizer.encode( | |||||
| token, add_special_tokens=False) | |||||
| token_type_ids = [] | |||||
| if self.is_split_into_words and self._mode == ModeKeys.INFERENCE: | |||||
| for offset, token in enumerate(list(text)): | |||||
| subtoken_ids = self.tokenizer.encode(token, | |||||
| **self.tokenize_kwargs) | |||||
| if len(subtoken_ids) == 0: | if len(subtoken_ids) == 0: | ||||
| subtoken_ids = [self.tokenizer.unk_token_id] | subtoken_ids = [self.tokenizer.unk_token_id] | ||||
| input_ids.extend(subtoken_ids) | input_ids.extend(subtoken_ids) | ||||
| @@ -119,10 +123,9 @@ class TokenClassificationPreprocessor(NLPTokenizerPreprocessorBase): | |||||
| else: | else: | ||||
| if self.tokenizer.is_fast: | if self.tokenizer.is_fast: | ||||
| encodings = self.tokenizer( | encodings = self.tokenizer( | ||||
| text, | |||||
| add_special_tokens=False, | |||||
| return_offsets_mapping=True, | |||||
| **self.tokenize_kwargs) | |||||
| text, return_offsets_mapping=True, **self.tokenize_kwargs) | |||||
| attention_mask = encodings['attention_mask'] | |||||
| token_type_ids = encodings['token_type_ids'] | |||||
| input_ids = encodings['input_ids'] | input_ids = encodings['input_ids'] | ||||
| word_ids = encodings.word_ids() | word_ids = encodings.word_ids() | ||||
| for i in range(len(word_ids)): | for i in range(len(word_ids)): | ||||
| @@ -137,75 +140,85 @@ class TokenClassificationPreprocessor(NLPTokenizerPreprocessorBase): | |||||
| label_mask.append(1) | label_mask.append(1) | ||||
| offset_mapping.append(encodings['offset_mapping'][i]) | offset_mapping.append(encodings['offset_mapping'][i]) | ||||
| else: | else: | ||||
| encodings = self.tokenizer( | |||||
| text, add_special_tokens=False, **self.tokenize_kwargs) | |||||
| encodings = self.tokenizer(text, **self.tokenize_kwargs) | |||||
| input_ids = encodings['input_ids'] | input_ids = encodings['input_ids'] | ||||
| label_mask, offset_mapping = self.get_label_mask_and_offset_mapping( | label_mask, offset_mapping = self.get_label_mask_and_offset_mapping( | ||||
| text) | text) | ||||
| if len(input_ids) >= self.sequence_length - 2: | |||||
| input_ids = input_ids[:self.sequence_length - 2] | |||||
| label_mask = label_mask[:self.sequence_length - 2] | |||||
| input_ids = [self.tokenizer.cls_token_id | |||||
| ] + input_ids + [self.tokenizer.sep_token_id] | |||||
| label_mask = [0] + label_mask + [0] | |||||
| attention_mask = [1] * len(input_ids) | |||||
| offset_mapping = offset_mapping[:sum(label_mask)] | |||||
| if self._mode == ModeKeys.INFERENCE: | |||||
| if len(input_ids) >= self.sequence_length - 2: | |||||
| input_ids = input_ids[:self.sequence_length - 2] | |||||
| label_mask = label_mask[:self.sequence_length - 2] | |||||
| input_ids = [self.tokenizer.cls_token_id | |||||
| ] + input_ids + [self.tokenizer.sep_token_id] | |||||
| label_mask = [0] + label_mask + [0] | |||||
| attention_mask = [1] * len(input_ids) | |||||
| offset_mapping = offset_mapping[:sum(label_mask)] | |||||
| if not self.is_transformer_based_model: | |||||
| input_ids = input_ids[1:-1] | |||||
| attention_mask = attention_mask[1:-1] | |||||
| label_mask = label_mask[1:-1] | |||||
| if not self.is_transformer_based_model: | |||||
| input_ids = input_ids[1:-1] | |||||
| attention_mask = attention_mask[1:-1] | |||||
| label_mask = label_mask[1:-1] | |||||
| if self._mode == ModeKeys.INFERENCE: | |||||
| input_ids = torch.tensor(input_ids).unsqueeze(0) | input_ids = torch.tensor(input_ids).unsqueeze(0) | ||||
| attention_mask = torch.tensor(attention_mask).unsqueeze(0) | attention_mask = torch.tensor(attention_mask).unsqueeze(0) | ||||
| label_mask = torch.tensor( | label_mask = torch.tensor( | ||||
| label_mask, dtype=torch.bool).unsqueeze(0) | label_mask, dtype=torch.bool).unsqueeze(0) | ||||
| # the token classification | |||||
| output = { | |||||
| 'text': text, | |||||
| 'input_ids': input_ids, | |||||
| 'attention_mask': attention_mask, | |||||
| 'label_mask': label_mask, | |||||
| 'offset_mapping': offset_mapping | |||||
| } | |||||
| # align the labels with tokenized text | |||||
| if labels_list is not None: | |||||
| assert self.label2id is not None | |||||
| # Map that sends B-Xxx label to its I-Xxx counterpart | |||||
| b_to_i_label = [] | |||||
| label_enumerate_values = [ | |||||
| k for k, v in sorted( | |||||
| self.label2id.items(), key=lambda item: item[1]) | |||||
| ] | |||||
| for idx, label in enumerate(label_enumerate_values): | |||||
| if label.startswith('B-') and label.replace( | |||||
| 'B-', 'I-') in label_enumerate_values: | |||||
| b_to_i_label.append( | |||||
| label_enumerate_values.index( | |||||
| label.replace('B-', 'I-'))) | |||||
| else: | |||||
| b_to_i_label.append(idx) | |||||
| # the token classification | |||||
| output = { | |||||
| 'text': text, | |||||
| 'input_ids': input_ids, | |||||
| 'attention_mask': attention_mask, | |||||
| 'label_mask': label_mask, | |||||
| 'offset_mapping': offset_mapping | |||||
| } | |||||
| else: | |||||
| output = { | |||||
| 'input_ids': input_ids, | |||||
| 'token_type_ids': token_type_ids, | |||||
| 'attention_mask': attention_mask, | |||||
| 'label_mask': label_mask, | |||||
| } | |||||
| label_row = [self.label2id[lb] for lb in labels_list] | |||||
| previous_word_idx = None | |||||
| label_ids = [] | |||||
| for word_idx in word_ids: | |||||
| if word_idx is None: | |||||
| label_ids.append(-100) | |||||
| elif word_idx != previous_word_idx: | |||||
| label_ids.append(label_row[word_idx]) | |||||
| else: | |||||
| if self.label_all_tokens: | |||||
| label_ids.append(b_to_i_label[label_row[word_idx]]) | |||||
| # align the labels with tokenized text | |||||
| if labels_list is not None: | |||||
| assert self.label2id is not None | |||||
| # Map that sends B-Xxx label to its I-Xxx counterpart | |||||
| b_to_i_label = [] | |||||
| label_enumerate_values = [ | |||||
| k for k, v in sorted( | |||||
| self.label2id.items(), key=lambda item: item[1]) | |||||
| ] | |||||
| for idx, label in enumerate(label_enumerate_values): | |||||
| if label.startswith('B-') and label.replace( | |||||
| 'B-', 'I-') in label_enumerate_values: | |||||
| b_to_i_label.append( | |||||
| label_enumerate_values.index( | |||||
| label.replace('B-', 'I-'))) | |||||
| else: | else: | ||||
| b_to_i_label.append(idx) | |||||
| label_row = [self.label2id[lb] for lb in labels_list] | |||||
| previous_word_idx = None | |||||
| label_ids = [] | |||||
| for word_idx in word_ids: | |||||
| if word_idx is None: | |||||
| label_ids.append(-100) | label_ids.append(-100) | ||||
| previous_word_idx = word_idx | |||||
| labels = label_ids | |||||
| output['labels'] = labels | |||||
| elif word_idx != previous_word_idx: | |||||
| label_ids.append(label_row[word_idx]) | |||||
| else: | |||||
| if self.label_all_tokens: | |||||
| label_ids.append(b_to_i_label[label_row[word_idx]]) | |||||
| else: | |||||
| label_ids.append(-100) | |||||
| previous_word_idx = word_idx | |||||
| labels = label_ids | |||||
| output['labels'] = labels | |||||
| output = { | |||||
| k: np.array(v) if isinstance(v, list) else v | |||||
| for k, v in output.items() | |||||
| } | |||||
| return output | return output | ||||
| def get_tokenizer_class(self): | def get_tokenizer_class(self): | ||||
| @@ -74,8 +74,8 @@ class OfaOcrRecognitionPreprocessor(OfaBasePreprocessor): | |||||
| self.patch_resize_transform = transforms.Compose([ | self.patch_resize_transform = transforms.Compose([ | ||||
| lambda image: ocr_resize( | lambda image: ocr_resize( | ||||
| image, | image, | ||||
| self.cfg.model.patch_image_size, | |||||
| is_document=self.cfg.model.is_document), | |||||
| self.patch_image_size, | |||||
| is_document=self.cfg.model.get('is_document', False)), | |||||
| transforms.ToTensor(), | transforms.ToTensor(), | ||||
| transforms.Normalize(mean=self.mean, std=self.std), | transforms.Normalize(mean=self.mean, std=self.std), | ||||
| ]) | ]) | ||||
| @@ -69,11 +69,14 @@ class KWSFarfieldTrainer(BaseTrainer): | |||||
| super().__init__(cfg_file, arg_parse_fn) | super().__init__(cfg_file, arg_parse_fn) | ||||
| self.model = self.build_model() | |||||
| self.work_dir = work_dir | |||||
| # the number of model output dimension | # the number of model output dimension | ||||
| # should update config outside the trainer, if user need more wake word | # should update config outside the trainer, if user need more wake word | ||||
| num_syn = kwargs.get('num_syn', None) | |||||
| if num_syn: | |||||
| self.cfg.model.num_syn = num_syn | |||||
| self._num_classes = self.cfg.model.num_syn | self._num_classes = self.cfg.model.num_syn | ||||
| self.model = self.build_model() | |||||
| self.work_dir = work_dir | |||||
| if kwargs.get('launcher', None) is not None: | if kwargs.get('launcher', None) is not None: | ||||
| init_dist(kwargs['launcher']) | init_dist(kwargs['launcher']) | ||||
| @@ -103,20 +103,20 @@ class AdjustLabelSmoothedCrossEntropyCriterion(_Loss): | |||||
| def __init__(self, args): | def __init__(self, args): | ||||
| super().__init__() | super().__init__() | ||||
| self.sentence_avg = args.sentence_avg | |||||
| self.eps = args.label_smoothing | |||||
| self.ignore_prefix_size = args.ignore_prefix_size | |||||
| self.ignore_eos = args.ignore_eos | |||||
| self.report_accuracy = args.report_accuracy | |||||
| self.drop_worst_ratio = args.drop_worst_ratio | |||||
| self.drop_worst_after = args.drop_worst_after | |||||
| self.use_rdrop = args.use_rdrop | |||||
| self.reg_alpha = args.reg_alpha | |||||
| self.sample_patch_num = args.sample_patch_num | |||||
| self.sentence_avg = args.get('sentence_avg', False) | |||||
| self.eps = args.get('label_smoothing', 0.1) | |||||
| self.ignore_prefix_size = args.get('ignore_prefix_size', 0) | |||||
| self.ignore_eos = args.get('ignore_eos', False) | |||||
| self.report_accuracy = args.get('report_accuracy', False) | |||||
| self.drop_worst_ratio = args.get('drop_worst_ratio', 0.0) | |||||
| self.drop_worst_after = args.get('drop_worst_after', 0) | |||||
| self.use_rdrop = args.get('use_rdrop', False) | |||||
| self.reg_alpha = args.get('reg_alpha', 1.0) | |||||
| self.sample_patch_num = args.get('sample_patch_num', 196) | |||||
| self.constraint_start = None | self.constraint_start = None | ||||
| self.constraint_end = None | self.constraint_end = None | ||||
| if args.constraint_range: | |||||
| if args.get('constraint_range', None): | |||||
| constraint_start, constraint_end = args.constraint_range.split(',') | constraint_start, constraint_end = args.constraint_range.split(',') | ||||
| self.constraint_start = int(constraint_start) | self.constraint_start = int(constraint_start) | ||||
| self.constraint_end = int(constraint_end) | self.constraint_end = int(constraint_end) | ||||
| @@ -18,7 +18,7 @@ class TextGenerationTrainer(NlpEpochBasedTrainer): | |||||
| return tokenizer.decode(tokens.tolist(), skip_special_tokens=True) | return tokenizer.decode(tokens.tolist(), skip_special_tokens=True) | ||||
| def evaluation_step(self, data): | def evaluation_step(self, data): | ||||
| model = self.model | |||||
| model = self.model.module if self._dist else self.model | |||||
| model.eval() | model.eval() | ||||
| with torch.no_grad(): | with torch.no_grad(): | ||||
| @@ -586,14 +586,16 @@ class NlpEpochBasedTrainer(EpochBasedTrainer): | |||||
| preprocessor_mode=ModeKeys.TRAIN, | preprocessor_mode=ModeKeys.TRAIN, | ||||
| **model_args, | **model_args, | ||||
| **self.train_keys, | **self.train_keys, | ||||
| mode=ModeKeys.TRAIN) | |||||
| mode=ModeKeys.TRAIN, | |||||
| use_fast=True) | |||||
| eval_preprocessor = Preprocessor.from_pretrained( | eval_preprocessor = Preprocessor.from_pretrained( | ||||
| self.model_dir, | self.model_dir, | ||||
| cfg_dict=self.cfg, | cfg_dict=self.cfg, | ||||
| preprocessor_mode=ModeKeys.EVAL, | preprocessor_mode=ModeKeys.EVAL, | ||||
| **model_args, | **model_args, | ||||
| **self.eval_keys, | **self.eval_keys, | ||||
| mode=ModeKeys.EVAL) | |||||
| mode=ModeKeys.EVAL, | |||||
| use_fast=True) | |||||
| return train_preprocessor, eval_preprocessor | return train_preprocessor, eval_preprocessor | ||||
| @@ -15,6 +15,7 @@ from torch.utils.data.dataloader import default_collate | |||||
| from torch.utils.data.distributed import DistributedSampler | from torch.utils.data.distributed import DistributedSampler | ||||
| from modelscope.hub.snapshot_download import snapshot_download | from modelscope.hub.snapshot_download import snapshot_download | ||||
| from modelscope.hub.utils.utils import create_library_statistics | |||||
| from modelscope.metainfo import Trainers | from modelscope.metainfo import Trainers | ||||
| from modelscope.metrics import build_metric, task_default_metrics | from modelscope.metrics import build_metric, task_default_metrics | ||||
| from modelscope.models.base import Model, TorchModel | from modelscope.models.base import Model, TorchModel | ||||
| @@ -436,6 +437,8 @@ class EpochBasedTrainer(BaseTrainer): | |||||
| def train(self, checkpoint_path=None, *args, **kwargs): | def train(self, checkpoint_path=None, *args, **kwargs): | ||||
| self._mode = ModeKeys.TRAIN | self._mode = ModeKeys.TRAIN | ||||
| if hasattr(self.model, 'name'): | |||||
| create_library_statistics('train', self.model.name, None) | |||||
| if self.train_dataset is None: | if self.train_dataset is None: | ||||
| self.train_dataloader = self.get_train_dataloader() | self.train_dataloader = self.get_train_dataloader() | ||||
| @@ -456,6 +459,8 @@ class EpochBasedTrainer(BaseTrainer): | |||||
| self.train_loop(self.train_dataloader) | self.train_loop(self.train_dataloader) | ||||
| def evaluate(self, checkpoint_path=None): | def evaluate(self, checkpoint_path=None): | ||||
| if hasattr(self.model, 'name'): | |||||
| create_library_statistics('evaluate', self.model.name, None) | |||||
| if checkpoint_path is not None and os.path.isfile(checkpoint_path): | if checkpoint_path is not None and os.path.isfile(checkpoint_path): | ||||
| from modelscope.trainers.hooks import CheckpointHook | from modelscope.trainers.hooks import CheckpointHook | ||||
| CheckpointHook.load_checkpoint(checkpoint_path, self) | CheckpointHook.load_checkpoint(checkpoint_path, self) | ||||
| @@ -876,7 +881,7 @@ class EpochBasedTrainer(BaseTrainer): | |||||
| Subclass and override to inject custom behavior. | Subclass and override to inject custom behavior. | ||||
| """ | """ | ||||
| model = self.model | |||||
| model = self.model.module if self._dist else self.model | |||||
| model.eval() | model.eval() | ||||
| if is_parallel(model): | if is_parallel(model): | ||||
| @@ -238,6 +238,14 @@ class DownloadMode(enum.Enum): | |||||
| FORCE_REDOWNLOAD = 'force_redownload' | FORCE_REDOWNLOAD = 'force_redownload' | ||||
| class DownloadChannel(enum.Enum): | |||||
| """ Channels of datasets downloading for uv/pv counting. | |||||
| """ | |||||
| LOCAL = 'local' | |||||
| DSW = 'dsw' | |||||
| EAIS = 'eais' | |||||
| class UploadMode(enum.Enum): | class UploadMode(enum.Enum): | ||||
| """ How to upload object to remote. | """ How to upload object to remote. | ||||
| """ | """ | ||||
| @@ -91,6 +91,71 @@ def draw_keypoints(output, original_image): | |||||
| return image | return image | ||||
| def draw_106face_keypoints(in_path, | |||||
| keypoints, | |||||
| boxes, | |||||
| scale=4.0, | |||||
| save_path=None): | |||||
| face_contour_point_index = [ | |||||
| 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, | |||||
| 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32 | |||||
| ] | |||||
| left_eye_brow_point_index = [33, 34, 35, 36, 37, 38, 39, 40, 41, 33] | |||||
| right_eye_brow_point_index = [42, 43, 44, 45, 46, 47, 48, 49, 50, 42] | |||||
| left_eye_point_index = [66, 67, 68, 69, 70, 71, 72, 73, 66] | |||||
| right_eye_point_index = [75, 76, 77, 78, 79, 80, 81, 82, 75] | |||||
| nose_bridge_point_index = [51, 52, 53, 54] | |||||
| nose_contour_point_index = [55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65] | |||||
| mouth_outer_point_index = [ | |||||
| 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 84 | |||||
| ] | |||||
| mouth_inter_point_index = [96, 97, 98, 99, 100, 101, 102, 103, 96] | |||||
| img = cv2.imread(in_path) | |||||
| for i in range(len(boxes)): | |||||
| draw_box(img, np.array(boxes[i])) | |||||
| image = cv2.resize(img, dsize=None, fx=scale, fy=scale) | |||||
| def draw_line(point_index, image, point): | |||||
| for i in range(len(point_index) - 1): | |||||
| cur_index = point_index[i] | |||||
| next_index = point_index[i + 1] | |||||
| cur_pt = (int(point[cur_index][0] * scale), | |||||
| int(point[cur_index][1] * scale)) | |||||
| next_pt = (int(point[next_index][0] * scale), | |||||
| int(point[next_index][1] * scale)) | |||||
| cv2.line(image, cur_pt, next_pt, (0, 0, 255), thickness=2) | |||||
| for i in range(len(keypoints)): | |||||
| points = keypoints[i] | |||||
| draw_line(face_contour_point_index, image, points) | |||||
| draw_line(left_eye_brow_point_index, image, points) | |||||
| draw_line(right_eye_brow_point_index, image, points) | |||||
| draw_line(left_eye_point_index, image, points) | |||||
| draw_line(right_eye_point_index, image, points) | |||||
| draw_line(nose_bridge_point_index, image, points) | |||||
| draw_line(nose_contour_point_index, image, points) | |||||
| draw_line(mouth_outer_point_index, image, points) | |||||
| draw_line(mouth_inter_point_index, image, points) | |||||
| size = len(points) | |||||
| for i in range(size): | |||||
| x = int(points[i][0]) | |||||
| y = int(points[i][1]) | |||||
| cv2.putText(image, str(i), (int(x * scale), int(y * scale)), | |||||
| cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1) | |||||
| cv2.circle(image, (int(x * scale), int(y * scale)), 2, (0, 255, 0), | |||||
| cv2.FILLED) | |||||
| if save_path is not None: | |||||
| cv2.imwrite(save_path, image) | |||||
| return image | |||||
| def draw_face_detection_no_lm_result(img_path, detection_result): | def draw_face_detection_no_lm_result(img_path, detection_result): | ||||
| bboxes = np.array(detection_result[OutputKeys.BOXES]) | bboxes = np.array(detection_result[OutputKeys.BOXES]) | ||||
| scores = np.array(detection_result[OutputKeys.SCORES]) | scores = np.array(detection_result[OutputKeys.SCORES]) | ||||
| @@ -1,6 +1,7 @@ | |||||
| addict | addict | ||||
| attrs | attrs | ||||
| datasets | |||||
| # version beyond 2.5.2 introduces compatbility issue and is being resolved | |||||
| datasets<=2.5.2 | |||||
| easydict | easydict | ||||
| einops | einops | ||||
| filelock>=3.3.0 | filelock>=3.3.0 | ||||
| @@ -2,6 +2,8 @@ ftfy>=6.0.3 | |||||
| ofa>=0.0.2 | ofa>=0.0.2 | ||||
| pycocoevalcap>=1.2 | pycocoevalcap>=1.2 | ||||
| pycocotools>=2.0.4 | pycocotools>=2.0.4 | ||||
| # compatible with taming-transformers-rom1504 | |||||
| pytorch_lightning<=1.7.7 | |||||
| # rough-score was just recently updated from 0.0.4 to 0.0.7 | # rough-score was just recently updated from 0.0.4 to 0.0.7 | ||||
| # which introduced compatability issues that are being investigated | # which introduced compatability issues that are being investigated | ||||
| rouge_score<=0.0.4 | rouge_score<=0.0.4 | ||||
| @@ -1,4 +1,6 @@ | |||||
| biopython | |||||
| iopath | iopath | ||||
| ipdb | |||||
| lmdb | lmdb | ||||
| ml_collections | ml_collections | ||||
| scipy | scipy | ||||
| @@ -8,7 +8,8 @@ import zipfile | |||||
| from modelscope.msdatasets import MsDataset | from modelscope.msdatasets import MsDataset | ||||
| from modelscope.msdatasets.utils.dataset_utils import list_dataset_objects | from modelscope.msdatasets.utils.dataset_utils import list_dataset_objects | ||||
| from modelscope.utils import logger as logging | from modelscope.utils import logger as logging | ||||
| from modelscope.utils.constant import DEFAULT_DATASET_REVISION, ModelFile | |||||
| from modelscope.utils.constant import (DEFAULT_DATASET_REVISION, DownloadMode, | |||||
| ModelFile) | |||||
| from modelscope.utils.test_utils import test_level | from modelscope.utils.test_utils import test_level | ||||
| logger = logging.get_logger(__name__) | logger = logging.get_logger(__name__) | ||||
| @@ -104,7 +105,10 @@ class DatasetUploadTest(unittest.TestCase): | |||||
| @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') | @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') | ||||
| def test_ds_download_dir(self): | def test_ds_download_dir(self): | ||||
| test_ds = MsDataset.load(self.dataset_name, self.namespace) | |||||
| test_ds = MsDataset.load( | |||||
| self.dataset_name, | |||||
| namespace=self.namespace, | |||||
| download_mode=DownloadMode.FORCE_REDOWNLOAD) | |||||
| assert test_ds.config_kwargs['split_config'].values() | assert test_ds.config_kwargs['split_config'].values() | ||||
| @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') | @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') | ||||
| @@ -21,9 +21,10 @@ class TestModelOutput(unittest.TestCase): | |||||
| self.assertEqual(outputs['logits'], torch.Tensor([1])) | self.assertEqual(outputs['logits'], torch.Tensor([1])) | ||||
| self.assertEqual(outputs[0], torch.Tensor([1])) | self.assertEqual(outputs[0], torch.Tensor([1])) | ||||
| self.assertEqual(outputs.logits, torch.Tensor([1])) | self.assertEqual(outputs.logits, torch.Tensor([1])) | ||||
| outputs.loss = torch.Tensor([2]) | |||||
| logits, loss = outputs | logits, loss = outputs | ||||
| self.assertEqual(logits, torch.Tensor([1])) | self.assertEqual(logits, torch.Tensor([1])) | ||||
| self.assertTrue(loss is None) | |||||
| self.assertTrue(loss is not None) | |||||
| if __name__ == '__main__': | if __name__ == '__main__': | ||||
| @@ -1,11 +1,10 @@ | |||||
| # Copyright (c) Alibaba, Inc. and its affiliates. | # Copyright (c) Alibaba, Inc. and its affiliates. | ||||
| import unittest | import unittest | ||||
| import cv2 | |||||
| from modelscope.outputs import OutputKeys | from modelscope.outputs import OutputKeys | ||||
| from modelscope.pipelines import pipeline | from modelscope.pipelines import pipeline | ||||
| from modelscope.utils.constant import Tasks | from modelscope.utils.constant import Tasks | ||||
| from modelscope.utils.cv.image_utils import draw_106face_keypoints | |||||
| from modelscope.utils.test_utils import test_level | from modelscope.utils.test_utils import test_level | ||||
| @@ -13,7 +12,7 @@ class EasyCVFace2DKeypointsPipelineTest(unittest.TestCase): | |||||
| @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') | @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') | ||||
| def test_face_2d_keypoints(self): | def test_face_2d_keypoints(self): | ||||
| img_path = 'data/test/images/keypoints_detect/test_img_face_2d_keypoints.png' | |||||
| img_path = 'data/test/images/face_detection.png' | |||||
| model_id = 'damo/cv_mobilenet_face-2d-keypoints_alignment' | model_id = 'damo/cv_mobilenet_face-2d-keypoints_alignment' | ||||
| face_2d_keypoints_align = pipeline( | face_2d_keypoints_align = pipeline( | ||||
| @@ -21,15 +20,21 @@ class EasyCVFace2DKeypointsPipelineTest(unittest.TestCase): | |||||
| output = face_2d_keypoints_align(img_path) | output = face_2d_keypoints_align(img_path) | ||||
| output_keypoints = output[OutputKeys.KEYPOINTS] | output_keypoints = output[OutputKeys.KEYPOINTS] | ||||
| output_pose = output[OutputKeys.POSES] | |||||
| img = cv2.imread(img_path) | |||||
| img = face_2d_keypoints_align.show_result( | |||||
| img, output_keypoints, scale=2, save_path='face_keypoints.jpg') | |||||
| self.assertEqual(output_keypoints.shape[0], 106) | |||||
| self.assertEqual(output_keypoints.shape[1], 2) | |||||
| self.assertEqual(output_pose.shape[0], 3) | |||||
| output_poses = output[OutputKeys.POSES] | |||||
| output_boxes = output[OutputKeys.BOXES] | |||||
| draw_106face_keypoints( | |||||
| img_path, | |||||
| output_keypoints, | |||||
| output_boxes, | |||||
| scale=2, | |||||
| save_path='face_keypoints.jpg') | |||||
| for idx in range(len(output_keypoints)): | |||||
| self.assertEqual(output_keypoints[idx].shape[0], 106) | |||||
| self.assertEqual(output_keypoints[idx].shape[1], 2) | |||||
| self.assertEqual(output_poses[idx].shape[0], 3) | |||||
| self.assertEqual(output_boxes[idx].shape[0], 4) | |||||
| if __name__ == '__main__': | if __name__ == '__main__': | ||||
| @@ -19,9 +19,11 @@ class NamedEntityRecognitionTest(unittest.TestCase, DemoCompatibilityCheck): | |||||
| self.task = Tasks.named_entity_recognition | self.task = Tasks.named_entity_recognition | ||||
| self.model_id = 'damo/nlp_raner_named-entity-recognition_chinese-base-news' | self.model_id = 'damo/nlp_raner_named-entity-recognition_chinese-base-news' | ||||
| english_model_id = 'damo/nlp_raner_named-entity-recognition_english-large-ecom' | |||||
| tcrf_model_id = 'damo/nlp_raner_named-entity-recognition_chinese-base-news' | tcrf_model_id = 'damo/nlp_raner_named-entity-recognition_chinese-base-news' | ||||
| lcrf_model_id = 'damo/nlp_lstm_named-entity-recognition_chinese-news' | lcrf_model_id = 'damo/nlp_lstm_named-entity-recognition_chinese-news' | ||||
| sentence = '这与温岭市新河镇的一个神秘的传说有关。' | sentence = '这与温岭市新河镇的一个神秘的传说有关。' | ||||
| sentence_en = 'pizza shovel' | |||||
| @unittest.skipUnless(test_level() >= 2, 'skip test in current test level') | @unittest.skipUnless(test_level() >= 2, 'skip test in current test level') | ||||
| def test_run_tcrf_by_direct_model_download(self): | def test_run_tcrf_by_direct_model_download(self): | ||||
| @@ -89,6 +91,12 @@ class NamedEntityRecognitionTest(unittest.TestCase, DemoCompatibilityCheck): | |||||
| task=Tasks.named_entity_recognition, model=self.lcrf_model_id) | task=Tasks.named_entity_recognition, model=self.lcrf_model_id) | ||||
| print(pipeline_ins(input=self.sentence)) | print(pipeline_ins(input=self.sentence)) | ||||
| @unittest.skipUnless(test_level() >= 2, 'skip test in current test level') | |||||
| def test_run_english_with_model_name(self): | |||||
| pipeline_ins = pipeline( | |||||
| task=Tasks.named_entity_recognition, model=self.english_model_id) | |||||
| print(pipeline_ins(input='pizza shovel')) | |||||
| @unittest.skipUnless(test_level() >= 2, 'skip test in current test level') | @unittest.skipUnless(test_level() >= 2, 'skip test in current test level') | ||||
| def test_run_with_default_model(self): | def test_run_with_default_model(self): | ||||
| pipeline_ins = pipeline(task=Tasks.named_entity_recognition) | pipeline_ins = pipeline(task=Tasks.named_entity_recognition) | ||||
| @@ -19,7 +19,7 @@ class UnifoldProteinStructureTest(unittest.TestCase, DemoCompatibilityCheck): | |||||
| self.protein_multimer = 'GAMGLPEEPSSPQESTLKALSLYEAHLSSYIMYLQTFLVKTKQKVNNKNYPEFTLFDTSKLKKDQTLKSIKT' + \ | self.protein_multimer = 'GAMGLPEEPSSPQESTLKALSLYEAHLSSYIMYLQTFLVKTKQKVNNKNYPEFTLFDTSKLKKDQTLKSIKT' + \ | ||||
| 'NIAALKNHIDKIKPIAMQIYKKYSKNIP' | 'NIAALKNHIDKIKPIAMQIYKKYSKNIP' | ||||
| @unittest.skipUnless(test_level() >= 2, 'skip test in current test level') | |||||
| @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') | |||||
| def test_run_by_direct_model_download(self): | def test_run_by_direct_model_download(self): | ||||
| model_dir = snapshot_download(self.model_id) | model_dir = snapshot_download(self.model_id) | ||||
| mono_pipeline_ins = pipeline(task=self.task, model=model_dir) | mono_pipeline_ins = pipeline(task=self.task, model=model_dir) | ||||
| @@ -87,7 +87,7 @@ class TestFinetuneTokenClassification(unittest.TestCase): | |||||
| cfg['dataset'] = { | cfg['dataset'] = { | ||||
| 'train': { | 'train': { | ||||
| 'labels': label_enumerate_values, | 'labels': label_enumerate_values, | ||||
| 'first_sequence': 'first_sequence', | |||||
| 'first_sequence': 'tokens', | |||||
| 'label': 'labels', | 'label': 'labels', | ||||
| } | } | ||||
| } | } | ||||