| @@ -0,0 +1,3 @@ | |||||
| version https://git-lfs.github.com/spec/v1 | |||||
| oid sha256:b37b706885849037b5fa7fa44a3b78a6375f768d95ce46bfcb8e7329d038a692 | |||||
| size 181725 | |||||
| @@ -9,7 +9,7 @@ import requests | |||||
| from modelscope.utils.logger import get_logger | from modelscope.utils.logger import get_logger | ||||
| from .constants import MODELSCOPE_URL_SCHEME | from .constants import MODELSCOPE_URL_SCHEME | ||||
| from .errors import NotExistError, is_ok, raise_on_error | |||||
| from .errors import InvalidParameter, NotExistError, is_ok, raise_on_error | |||||
| from .utils.utils import (get_endpoint, get_gitlab_domain, | from .utils.utils import (get_endpoint, get_gitlab_domain, | ||||
| model_id_to_group_owner_name) | model_id_to_group_owner_name) | ||||
| @@ -61,17 +61,21 @@ class HubApi: | |||||
| return d['Data']['AccessToken'], cookies | return d['Data']['AccessToken'], cookies | ||||
| def create_model(self, model_id: str, chinese_name: str, visibility: int, | |||||
| license: str) -> str: | |||||
| def create_model( | |||||
| self, | |||||
| model_id: str, | |||||
| visibility: str, | |||||
| license: str, | |||||
| chinese_name: Optional[str] = None, | |||||
| ) -> str: | |||||
| """ | """ | ||||
| Create model repo at ModelScopeHub | Create model repo at ModelScopeHub | ||||
| Args: | Args: | ||||
| model_id:(`str`): The model id | model_id:(`str`): The model id | ||||
| chinese_name(`str`): chinese name of the model | |||||
| visibility(`int`): visibility of the model(1-private, 3-internal, 5-public) | |||||
| license(`str`): license of the model, candidates can be found at: TBA | |||||
| visibility(`int`): visibility of the model(1-private, 5-public), default public. | |||||
| license(`str`): license of the model, default none. | |||||
| chinese_name(`str`, *optional*): chinese name of the model | |||||
| Returns: | Returns: | ||||
| name of the model created | name of the model created | ||||
| @@ -79,6 +83,8 @@ class HubApi: | |||||
| model_id = {owner}/{name} | model_id = {owner}/{name} | ||||
| </Tip> | </Tip> | ||||
| """ | """ | ||||
| if model_id is None: | |||||
| raise InvalidParameter('model_id is required!') | |||||
| cookies = ModelScopeConfig.get_cookies() | cookies = ModelScopeConfig.get_cookies() | ||||
| if cookies is None: | if cookies is None: | ||||
| raise ValueError('Token does not exist, please login first.') | raise ValueError('Token does not exist, please login first.') | ||||
| @@ -151,11 +157,33 @@ class HubApi: | |||||
| else: | else: | ||||
| r.raise_for_status() | r.raise_for_status() | ||||
| def _check_cookie(self, | |||||
| use_cookies: Union[bool, | |||||
| CookieJar] = False) -> CookieJar: | |||||
| cookies = None | |||||
| if isinstance(use_cookies, CookieJar): | |||||
| cookies = use_cookies | |||||
| elif use_cookies: | |||||
| cookies = ModelScopeConfig.get_cookies() | |||||
| if cookies is None: | |||||
| raise ValueError('Token does not exist, please login first.') | |||||
| return cookies | |||||
| def get_model_branches_and_tags( | def get_model_branches_and_tags( | ||||
| self, | self, | ||||
| model_id: str, | model_id: str, | ||||
| use_cookies: Union[bool, CookieJar] = False | |||||
| ) -> Tuple[List[str], List[str]]: | ) -> Tuple[List[str], List[str]]: | ||||
| cookies = ModelScopeConfig.get_cookies() | |||||
| """Get model branch and tags. | |||||
| Args: | |||||
| model_id (str): The model id | |||||
| use_cookies (Union[bool, CookieJar], optional): If is cookieJar, we will use this cookie, if True, will | |||||
| will load cookie from local. Defaults to False. | |||||
| Returns: | |||||
| Tuple[List[str], List[str]]: _description_ | |||||
| """ | |||||
| cookies = self._check_cookie(use_cookies) | |||||
| path = f'{self.endpoint}/api/v1/models/{model_id}/revisions' | path = f'{self.endpoint}/api/v1/models/{model_id}/revisions' | ||||
| r = requests.get(path, cookies=cookies) | r = requests.get(path, cookies=cookies) | ||||
| @@ -169,23 +197,33 @@ class HubApi: | |||||
| ] if info['RevisionMap']['Tags'] else [] | ] if info['RevisionMap']['Tags'] else [] | ||||
| return branches, tags | return branches, tags | ||||
| def get_model_files( | |||||
| self, | |||||
| model_id: str, | |||||
| revision: Optional[str] = 'master', | |||||
| root: Optional[str] = None, | |||||
| recursive: Optional[str] = False, | |||||
| use_cookies: Union[bool, CookieJar] = False) -> List[dict]: | |||||
| def get_model_files(self, | |||||
| model_id: str, | |||||
| revision: Optional[str] = 'master', | |||||
| root: Optional[str] = None, | |||||
| recursive: Optional[str] = False, | |||||
| use_cookies: Union[bool, CookieJar] = False, | |||||
| is_snapshot: Optional[bool] = True) -> List[dict]: | |||||
| """List the models files. | |||||
| cookies = None | |||||
| if isinstance(use_cookies, CookieJar): | |||||
| cookies = use_cookies | |||||
| elif use_cookies: | |||||
| cookies = ModelScopeConfig.get_cookies() | |||||
| if cookies is None: | |||||
| raise ValueError('Token does not exist, please login first.') | |||||
| Args: | |||||
| model_id (str): The model id | |||||
| revision (Optional[str], optional): The branch or tag name. Defaults to 'master'. | |||||
| root (Optional[str], optional): The root path. Defaults to None. | |||||
| recursive (Optional[str], optional): Is recurive list files. Defaults to False. | |||||
| use_cookies (Union[bool, CookieJar], optional): If is cookieJar, we will use this cookie, if True, will | |||||
| will load cookie from local. Defaults to False. | |||||
| is_snapshot(Optional[bool], optional): when snapshot_download set to True, otherwise False. | |||||
| path = f'{self.endpoint}/api/v1/models/{model_id}/repo/files?Revision={revision}&Recursive={recursive}' | |||||
| Raises: | |||||
| ValueError: If user_cookies is True, but no local cookie. | |||||
| Returns: | |||||
| List[dict]: Model file list. | |||||
| """ | |||||
| path = '%s/api/v1/models/%s/repo/files?Revision=%s&Recursive=%s&Snapshot=%s' % ( | |||||
| self.endpoint, model_id, revision, recursive, is_snapshot) | |||||
| cookies = self._check_cookie(use_cookies) | |||||
| if root is not None: | if root is not None: | ||||
| path = path + f'&Root={root}' | path = path + f'&Root={root}' | ||||
| @@ -10,6 +10,10 @@ class GitError(Exception): | |||||
| pass | pass | ||||
| class InvalidParameter(Exception): | |||||
| pass | |||||
| def is_ok(rsp): | def is_ok(rsp): | ||||
| """ Check the request is ok | """ Check the request is ok | ||||
| @@ -32,3 +36,18 @@ def raise_on_error(rsp): | |||||
| return True | return True | ||||
| else: | else: | ||||
| raise RequestError(rsp['Message']) | raise RequestError(rsp['Message']) | ||||
| # TODO use raise_on_error instead if modelhub and datahub response have uniform structures, | |||||
| def datahub_raise_on_error(url, rsp): | |||||
| """If response error, raise exception | |||||
| Args: | |||||
| rsp (_type_): The server response | |||||
| """ | |||||
| if rsp.get('Code') == 200: | |||||
| return True | |||||
| else: | |||||
| raise RequestError( | |||||
| f"Url = {url}, Status = {rsp.get('status')}, error = {rsp.get('error')}, message = {rsp.get('message')}" | |||||
| ) | |||||
| @@ -7,6 +7,7 @@ import tempfile | |||||
| import time | import time | ||||
| from functools import partial | from functools import partial | ||||
| from hashlib import sha256 | from hashlib import sha256 | ||||
| from http.cookiejar import CookieJar | |||||
| from pathlib import Path | from pathlib import Path | ||||
| from typing import BinaryIO, Dict, Optional, Union | from typing import BinaryIO, Dict, Optional, Union | ||||
| from uuid import uuid4 | from uuid import uuid4 | ||||
| @@ -107,7 +108,9 @@ def model_file_download( | |||||
| _api = HubApi() | _api = HubApi() | ||||
| headers = {'user-agent': http_user_agent(user_agent=user_agent, )} | headers = {'user-agent': http_user_agent(user_agent=user_agent, )} | ||||
| branches, tags = _api.get_model_branches_and_tags(model_id) | |||||
| cookies = ModelScopeConfig.get_cookies() | |||||
| branches, tags = _api.get_model_branches_and_tags( | |||||
| model_id, use_cookies=False if cookies is None else cookies) | |||||
| file_to_download_info = None | file_to_download_info = None | ||||
| is_commit_id = False | is_commit_id = False | ||||
| if revision in branches or revision in tags: # The revision is version or tag, | if revision in branches or revision in tags: # The revision is version or tag, | ||||
| @@ -117,18 +120,19 @@ def model_file_download( | |||||
| model_id=model_id, | model_id=model_id, | ||||
| revision=revision, | revision=revision, | ||||
| recursive=True, | recursive=True, | ||||
| ) | |||||
| use_cookies=False if cookies is None else cookies, | |||||
| is_snapshot=False) | |||||
| for model_file in model_files: | for model_file in model_files: | ||||
| if model_file['Type'] == 'tree': | if model_file['Type'] == 'tree': | ||||
| continue | continue | ||||
| if model_file['Path'] == file_path: | if model_file['Path'] == file_path: | ||||
| model_file['Branch'] = revision | |||||
| if cache.exists(model_file): | if cache.exists(model_file): | ||||
| return cache.get_file_by_info(model_file) | return cache.get_file_by_info(model_file) | ||||
| else: | else: | ||||
| file_to_download_info = model_file | file_to_download_info = model_file | ||||
| break | |||||
| if file_to_download_info is None: | if file_to_download_info is None: | ||||
| raise NotExistError('The file path: %s not exist in: %s' % | raise NotExistError('The file path: %s not exist in: %s' % | ||||
| @@ -141,8 +145,6 @@ def model_file_download( | |||||
| return cached_file_path # the file is in cache. | return cached_file_path # the file is in cache. | ||||
| is_commit_id = True | is_commit_id = True | ||||
| # we need to download again | # we need to download again | ||||
| # TODO: skip using JWT for authorization, use cookie instead | |||||
| cookies = ModelScopeConfig.get_cookies() | |||||
| url_to_download = get_file_download_url(model_id, file_path, revision) | url_to_download = get_file_download_url(model_id, file_path, revision) | ||||
| file_to_download_info = { | file_to_download_info = { | ||||
| 'Path': file_path, | 'Path': file_path, | ||||
| @@ -202,7 +204,7 @@ def http_get_file( | |||||
| url: str, | url: str, | ||||
| local_dir: str, | local_dir: str, | ||||
| file_name: str, | file_name: str, | ||||
| cookies: Dict[str, str], | |||||
| cookies: CookieJar, | |||||
| headers: Optional[Dict[str, str]] = None, | headers: Optional[Dict[str, str]] = None, | ||||
| ): | ): | ||||
| """ | """ | ||||
| @@ -217,7 +219,7 @@ def http_get_file( | |||||
| local directory where the downloaded file stores | local directory where the downloaded file stores | ||||
| file_name(`str`): | file_name(`str`): | ||||
| name of the file stored in `local_dir` | name of the file stored in `local_dir` | ||||
| cookies(`Dict[str, str]`): | |||||
| cookies(`CookieJar`): | |||||
| cookies used to authentication the user, which is used for downloading private repos | cookies used to authentication the user, which is used for downloading private repos | ||||
| headers(`Optional[Dict[str, str]] = None`): | headers(`Optional[Dict[str, str]] = None`): | ||||
| http headers to carry necessary info when requesting the remote file | http headers to carry necessary info when requesting the remote file | ||||
| @@ -70,6 +70,14 @@ class GitCommandWrapper(metaclass=Singleton): | |||||
| except GitError: | except GitError: | ||||
| return False | return False | ||||
| def git_lfs_install(self, repo_dir): | |||||
| cmd = ['git', '-C', repo_dir, 'lfs', 'install'] | |||||
| try: | |||||
| self._run_git_command(*cmd) | |||||
| return True | |||||
| except GitError: | |||||
| return False | |||||
| def clone(self, | def clone(self, | ||||
| repo_base_dir: str, | repo_base_dir: str, | ||||
| token: str, | token: str, | ||||
| @@ -1,7 +1,7 @@ | |||||
| import os | import os | ||||
| from typing import List, Optional | from typing import List, Optional | ||||
| from modelscope.hub.errors import GitError | |||||
| from modelscope.hub.errors import GitError, InvalidParameter | |||||
| from modelscope.utils.logger import get_logger | from modelscope.utils.logger import get_logger | ||||
| from .api import ModelScopeConfig | from .api import ModelScopeConfig | ||||
| from .constants import MODELSCOPE_URL_SCHEME | from .constants import MODELSCOPE_URL_SCHEME | ||||
| @@ -49,6 +49,8 @@ class Repository: | |||||
| git_wrapper = GitCommandWrapper() | git_wrapper = GitCommandWrapper() | ||||
| if not git_wrapper.is_lfs_installed(): | if not git_wrapper.is_lfs_installed(): | ||||
| logger.error('git lfs is not installed, please install.') | logger.error('git lfs is not installed, please install.') | ||||
| else: | |||||
| git_wrapper.git_lfs_install(self.model_dir) # init repo lfs | |||||
| self.git_wrapper = GitCommandWrapper(git_path) | self.git_wrapper = GitCommandWrapper(git_path) | ||||
| os.makedirs(self.model_dir, exist_ok=True) | os.makedirs(self.model_dir, exist_ok=True) | ||||
| @@ -74,8 +76,6 @@ class Repository: | |||||
| def push(self, | def push(self, | ||||
| commit_message: str, | commit_message: str, | ||||
| files: List[str] = list(), | |||||
| all_files: bool = False, | |||||
| branch: Optional[str] = 'master', | branch: Optional[str] = 'master', | ||||
| force: bool = False): | force: bool = False): | ||||
| """Push local to remote, this method will do. | """Push local to remote, this method will do. | ||||
| @@ -86,8 +86,12 @@ class Repository: | |||||
| commit_message (str): commit message | commit_message (str): commit message | ||||
| revision (Optional[str], optional): which branch to push. Defaults to 'master'. | revision (Optional[str], optional): which branch to push. Defaults to 'master'. | ||||
| """ | """ | ||||
| if commit_message is None: | |||||
| msg = 'commit_message must be provided!' | |||||
| raise InvalidParameter(msg) | |||||
| url = self.git_wrapper.get_repo_remote_url(self.model_dir) | url = self.git_wrapper.get_repo_remote_url(self.model_dir) | ||||
| self.git_wrapper.add(self.model_dir, files, all_files) | |||||
| self.git_wrapper.pull(self.model_dir) | |||||
| self.git_wrapper.add(self.model_dir, all_files=True) | |||||
| self.git_wrapper.commit(self.model_dir, commit_message) | self.git_wrapper.commit(self.model_dir, commit_message) | ||||
| self.git_wrapper.push( | self.git_wrapper.push( | ||||
| repo_dir=self.model_dir, | repo_dir=self.model_dir, | ||||
| @@ -20,8 +20,7 @@ def snapshot_download(model_id: str, | |||||
| revision: Optional[str] = 'master', | revision: Optional[str] = 'master', | ||||
| cache_dir: Union[str, Path, None] = None, | cache_dir: Union[str, Path, None] = None, | ||||
| user_agent: Optional[Union[Dict, str]] = None, | user_agent: Optional[Union[Dict, str]] = None, | ||||
| local_files_only: Optional[bool] = False, | |||||
| private: Optional[bool] = False) -> str: | |||||
| local_files_only: Optional[bool] = False) -> str: | |||||
| """Download all files of a repo. | """Download all files of a repo. | ||||
| Downloads a whole snapshot of a repo's files at the specified revision. This | Downloads a whole snapshot of a repo's files at the specified revision. This | ||||
| is useful when you want all files from a repo, because you don't know which | is useful when you want all files from a repo, because you don't know which | ||||
| @@ -79,8 +78,10 @@ def snapshot_download(model_id: str, | |||||
| # make headers | # make headers | ||||
| headers = {'user-agent': http_user_agent(user_agent=user_agent, )} | headers = {'user-agent': http_user_agent(user_agent=user_agent, )} | ||||
| _api = HubApi() | _api = HubApi() | ||||
| cookies = ModelScopeConfig.get_cookies() | |||||
| # get file list from model repo | # get file list from model repo | ||||
| branches, tags = _api.get_model_branches_and_tags(model_id) | |||||
| branches, tags = _api.get_model_branches_and_tags( | |||||
| model_id, use_cookies=False if cookies is None else cookies) | |||||
| if revision not in branches and revision not in tags: | if revision not in branches and revision not in tags: | ||||
| raise NotExistError('The specified branch or tag : %s not exist!' | raise NotExistError('The specified branch or tag : %s not exist!' | ||||
| % revision) | % revision) | ||||
| @@ -89,11 +90,8 @@ def snapshot_download(model_id: str, | |||||
| model_id=model_id, | model_id=model_id, | ||||
| revision=revision, | revision=revision, | ||||
| recursive=True, | recursive=True, | ||||
| use_cookies=private) | |||||
| cookies = None | |||||
| if private: | |||||
| cookies = ModelScopeConfig.get_cookies() | |||||
| use_cookies=False if cookies is None else cookies, | |||||
| is_snapshot=True) | |||||
| for model_file in model_files: | for model_file in model_files: | ||||
| if model_file['Type'] == 'tree': | if model_file['Type'] == 'tree': | ||||
| @@ -116,7 +114,7 @@ def snapshot_download(model_id: str, | |||||
| local_dir=tempfile.gettempdir(), | local_dir=tempfile.gettempdir(), | ||||
| file_name=model_file['Name'], | file_name=model_file['Name'], | ||||
| headers=headers, | headers=headers, | ||||
| cookies=None if cookies is None else cookies.get_dict()) | |||||
| cookies=cookies) | |||||
| # put file to cache | # put file to cache | ||||
| cache.put_file( | cache.put_file( | ||||
| model_file, | model_file, | ||||
| @@ -101,8 +101,9 @@ class FileSystemCache(object): | |||||
| Args: | Args: | ||||
| key (dict): The cache key. | key (dict): The cache key. | ||||
| """ | """ | ||||
| self.cached_files.remove(key) | |||||
| self.save_cached_files() | |||||
| if key in self.cached_files: | |||||
| self.cached_files.remove(key) | |||||
| self.save_cached_files() | |||||
| def exists(self, key): | def exists(self, key): | ||||
| for cache_file in self.cached_files: | for cache_file in self.cached_files: | ||||
| @@ -204,6 +205,7 @@ class ModelFileSystemCache(FileSystemCache): | |||||
| return orig_path | return orig_path | ||||
| else: | else: | ||||
| self.remove_key(cached_file) | self.remove_key(cached_file) | ||||
| break | |||||
| return None | return None | ||||
| @@ -230,6 +232,7 @@ class ModelFileSystemCache(FileSystemCache): | |||||
| cached_key['Revision'].startswith(key['Revision']) | cached_key['Revision'].startswith(key['Revision']) | ||||
| or key['Revision'].startswith(cached_key['Revision'])): | or key['Revision'].startswith(cached_key['Revision'])): | ||||
| is_exists = True | is_exists = True | ||||
| break | |||||
| file_path = os.path.join(self.cache_root_location, | file_path = os.path.join(self.cache_root_location, | ||||
| model_file_info['Path']) | model_file_info['Path']) | ||||
| if is_exists: | if is_exists: | ||||
| @@ -253,6 +256,7 @@ class ModelFileSystemCache(FileSystemCache): | |||||
| cached_file['Path']) | cached_file['Path']) | ||||
| if os.path.exists(file_path): | if os.path.exists(file_path): | ||||
| os.remove(file_path) | os.remove(file_path) | ||||
| break | |||||
| def put_file(self, model_file_info, model_file_location): | def put_file(self, model_file_info, model_file_location): | ||||
| """Put model on model_file_location to cache, the model first download to /tmp, and move to cache. | """Put model on model_file_location to cache, the model first download to /tmp, and move to cache. | ||||
| @@ -21,11 +21,13 @@ class Models(object): | |||||
| sambert_hifi_16k = 'sambert-hifi-16k' | sambert_hifi_16k = 'sambert-hifi-16k' | ||||
| generic_tts_frontend = 'generic-tts-frontend' | generic_tts_frontend = 'generic-tts-frontend' | ||||
| hifigan16k = 'hifigan16k' | hifigan16k = 'hifigan16k' | ||||
| speech_frcrn_ans_cirm_16k = 'speech_frcrn_ans_cirm_16k' | |||||
| kws_kwsbp = 'kws-kwsbp' | kws_kwsbp = 'kws-kwsbp' | ||||
| # multi-modal models | # multi-modal models | ||||
| ofa = 'ofa' | ofa = 'ofa' | ||||
| clip = 'clip-multi-modal-embedding' | clip = 'clip-multi-modal-embedding' | ||||
| mplug = 'mplug' | |||||
| class Pipelines(object): | class Pipelines(object): | ||||
| @@ -43,6 +45,7 @@ class Pipelines(object): | |||||
| person_image_cartoon = 'unet-person-image-cartoon' | person_image_cartoon = 'unet-person-image-cartoon' | ||||
| ocr_detection = 'resnet18-ocr-detection' | ocr_detection = 'resnet18-ocr-detection' | ||||
| action_recognition = 'TAdaConv_action-recognition' | action_recognition = 'TAdaConv_action-recognition' | ||||
| animal_recognation = 'resnet101-animal_recog' | |||||
| # nlp tasks | # nlp tasks | ||||
| sentence_similarity = 'sentence-similarity' | sentence_similarity = 'sentence-similarity' | ||||
| @@ -55,15 +58,18 @@ class Pipelines(object): | |||||
| dialog_intent_prediction = 'dialog-intent-prediction' | dialog_intent_prediction = 'dialog-intent-prediction' | ||||
| dialog_modeling = 'dialog-modeling' | dialog_modeling = 'dialog-modeling' | ||||
| dialog_state_tracking = 'dialog-state-tracking' | dialog_state_tracking = 'dialog-state-tracking' | ||||
| zero_shot_classification = 'zero-shot-classification' | |||||
| # audio tasks | # audio tasks | ||||
| sambert_hifigan_16k_tts = 'sambert-hifigan-16k-tts' | sambert_hifigan_16k_tts = 'sambert-hifigan-16k-tts' | ||||
| speech_dfsmn_aec_psm_16k = 'speech-dfsmn-aec-psm-16k' | speech_dfsmn_aec_psm_16k = 'speech-dfsmn-aec-psm-16k' | ||||
| speech_frcrn_ans_cirm_16k = 'speech_frcrn_ans_cirm_16k' | |||||
| kws_kwsbp = 'kws-kwsbp' | kws_kwsbp = 'kws-kwsbp' | ||||
| # multi-modal tasks | # multi-modal tasks | ||||
| image_caption = 'image-caption' | image_caption = 'image-caption' | ||||
| multi_modal_embedding = 'multi-modal-embedding' | multi_modal_embedding = 'multi-modal-embedding' | ||||
| visual_question_answering = 'visual-question-answering' | |||||
| class Trainers(object): | class Trainers(object): | ||||
| @@ -99,6 +105,8 @@ class Preprocessors(object): | |||||
| token_cls_tokenizer = 'token-cls-tokenizer' | token_cls_tokenizer = 'token-cls-tokenizer' | ||||
| nli_tokenizer = 'nli-tokenizer' | nli_tokenizer = 'nli-tokenizer' | ||||
| sen_cls_tokenizer = 'sen-cls-tokenizer' | sen_cls_tokenizer = 'sen-cls-tokenizer' | ||||
| sbert_token_cls_tokenizer = 'sbert-token-cls-tokenizer' | |||||
| zero_shot_cls_tokenizer = 'zero-shot-cls-tokenizer' | |||||
| # audio preprocessor | # audio preprocessor | ||||
| linear_aec_fbank = 'linear-aec-fbank' | linear_aec_fbank = 'linear-aec-fbank' | ||||
| @@ -107,3 +115,4 @@ class Preprocessors(object): | |||||
| # multi-modal | # multi-modal | ||||
| ofa_image_caption = 'ofa-image-caption' | ofa_image_caption = 'ofa-image-caption' | ||||
| mplug_visual_question_answering = 'mplug-visual-question-answering' | |||||
| @@ -1,12 +1,15 @@ | |||||
| # Copyright (c) Alibaba, Inc. and its affiliates. | # Copyright (c) Alibaba, Inc. and its affiliates. | ||||
| from .audio.ans.frcrn import FRCRNModel | |||||
| from .audio.kws import GenericKeyWordSpotting | from .audio.kws import GenericKeyWordSpotting | ||||
| from .audio.tts.am import SambertNetHifi16k | from .audio.tts.am import SambertNetHifi16k | ||||
| from .audio.tts.vocoder import Hifigan16k | from .audio.tts.vocoder import Hifigan16k | ||||
| from .base import Model | from .base import Model | ||||
| from .builder import MODELS, build_model | from .builder import MODELS, build_model | ||||
| from .multi_modal import OfaForImageCaptioning | from .multi_modal import OfaForImageCaptioning | ||||
| from .nlp import (BertForSequenceClassification, SbertForNLI, | |||||
| from .nlp import (BertForMaskedLM, BertForSequenceClassification, SbertForNLI, | |||||
| SbertForSentenceSimilarity, SbertForSentimentClassification, | SbertForSentenceSimilarity, SbertForSentimentClassification, | ||||
| SbertForTokenClassification, StructBertForMaskedLM, | |||||
| SbertForTokenClassification, SpaceForDialogIntentModel, | |||||
| SpaceForDialogModelingModel, | |||||
| SpaceForDialogStateTrackingModel, StructBertForMaskedLM, | |||||
| VecoForMaskedLM) | VecoForMaskedLM) | ||||
| @@ -0,0 +1,248 @@ | |||||
| import torch | |||||
| import torch.nn as nn | |||||
| import torch.nn.functional as F | |||||
| class UniDeepFsmn(nn.Module): | |||||
| def __init__(self, input_dim, output_dim, lorder=None, hidden_size=None): | |||||
| super(UniDeepFsmn, self).__init__() | |||||
| self.input_dim = input_dim | |||||
| self.output_dim = output_dim | |||||
| if lorder is None: | |||||
| return | |||||
| self.lorder = lorder | |||||
| self.hidden_size = hidden_size | |||||
| self.linear = nn.Linear(input_dim, hidden_size) | |||||
| self.project = nn.Linear(hidden_size, output_dim, bias=False) | |||||
| self.conv1 = nn.Conv2d( | |||||
| output_dim, | |||||
| output_dim, [lorder, 1], [1, 1], | |||||
| groups=output_dim, | |||||
| bias=False) | |||||
| def forward(self, input): | |||||
| r""" | |||||
| Args: | |||||
| input: torch with shape: batch (b) x sequence(T) x feature (h) | |||||
| Returns: | |||||
| batch (b) x channel (c) x sequence(T) x feature (h) | |||||
| """ | |||||
| f1 = F.relu(self.linear(input)) | |||||
| p1 = self.project(f1) | |||||
| x = torch.unsqueeze(p1, 1) | |||||
| # x: batch (b) x channel (c) x sequence(T) x feature (h) | |||||
| x_per = x.permute(0, 3, 2, 1) | |||||
| # x_per: batch (b) x feature (h) x sequence(T) x channel (c) | |||||
| y = F.pad(x_per, [0, 0, self.lorder - 1, 0]) | |||||
| out = x_per + self.conv1(y) | |||||
| out1 = out.permute(0, 3, 2, 1) | |||||
| # out1: batch (b) x channel (c) x sequence(T) x feature (h) | |||||
| return input + out1.squeeze() | |||||
| class ComplexUniDeepFsmn(nn.Module): | |||||
| def __init__(self, nIn, nHidden=128, nOut=128): | |||||
| super(ComplexUniDeepFsmn, self).__init__() | |||||
| self.fsmn_re_L1 = UniDeepFsmn(nIn, nHidden, 20, nHidden) | |||||
| self.fsmn_im_L1 = UniDeepFsmn(nIn, nHidden, 20, nHidden) | |||||
| self.fsmn_re_L2 = UniDeepFsmn(nHidden, nOut, 20, nHidden) | |||||
| self.fsmn_im_L2 = UniDeepFsmn(nHidden, nOut, 20, nHidden) | |||||
| def forward(self, x): | |||||
| r""" | |||||
| Args: | |||||
| x: torch with shape [batch, channel, feature, sequence, 2], eg: [6, 256, 1, 106, 2] | |||||
| Returns: | |||||
| [batch, feature, sequence, 2], eg: [6, 99, 1024, 2] | |||||
| """ | |||||
| # | |||||
| b, c, h, T, d = x.size() | |||||
| x = torch.reshape(x, (b, c * h, T, d)) | |||||
| # x: [b,h,T,2], [6, 256, 106, 2] | |||||
| x = torch.transpose(x, 1, 2) | |||||
| # x: [b,T,h,2], [6, 106, 256, 2] | |||||
| real_L1 = self.fsmn_re_L1(x[..., 0]) - self.fsmn_im_L1(x[..., 1]) | |||||
| imaginary_L1 = self.fsmn_re_L1(x[..., 1]) + self.fsmn_im_L1(x[..., 0]) | |||||
| # GRU output: [99, 6, 128] | |||||
| real = self.fsmn_re_L2(real_L1) - self.fsmn_im_L2(imaginary_L1) | |||||
| imaginary = self.fsmn_re_L2(imaginary_L1) + self.fsmn_im_L2(real_L1) | |||||
| # output: [b,T,h,2], [99, 6, 1024, 2] | |||||
| output = torch.stack((real, imaginary), dim=-1) | |||||
| # output: [b,h,T,2], [6, 99, 1024, 2] | |||||
| output = torch.transpose(output, 1, 2) | |||||
| output = torch.reshape(output, (b, c, h, T, d)) | |||||
| return output | |||||
| class ComplexUniDeepFsmn_L1(nn.Module): | |||||
| def __init__(self, nIn, nHidden=128, nOut=128): | |||||
| super(ComplexUniDeepFsmn_L1, self).__init__() | |||||
| self.fsmn_re_L1 = UniDeepFsmn(nIn, nHidden, 20, nHidden) | |||||
| self.fsmn_im_L1 = UniDeepFsmn(nIn, nHidden, 20, nHidden) | |||||
| def forward(self, x): | |||||
| r""" | |||||
| Args: | |||||
| x: torch with shape [batch, channel, feature, sequence, 2], eg: [6, 256, 1, 106, 2] | |||||
| """ | |||||
| b, c, h, T, d = x.size() | |||||
| # x : [b,T,h,c,2] | |||||
| x = torch.transpose(x, 1, 3) | |||||
| x = torch.reshape(x, (b * T, h, c, d)) | |||||
| real = self.fsmn_re_L1(x[..., 0]) - self.fsmn_im_L1(x[..., 1]) | |||||
| imaginary = self.fsmn_re_L1(x[..., 1]) + self.fsmn_im_L1(x[..., 0]) | |||||
| # output: [b*T,h,c,2], [6*106, h, 256, 2] | |||||
| output = torch.stack((real, imaginary), dim=-1) | |||||
| output = torch.reshape(output, (b, T, h, c, d)) | |||||
| output = torch.transpose(output, 1, 3) | |||||
| return output | |||||
| class ComplexConv2d(nn.Module): | |||||
| # https://github.com/litcoderr/ComplexCNN/blob/master/complexcnn/modules.py | |||||
| def __init__(self, | |||||
| in_channel, | |||||
| out_channel, | |||||
| kernel_size, | |||||
| stride=1, | |||||
| padding=0, | |||||
| dilation=1, | |||||
| groups=1, | |||||
| bias=True, | |||||
| **kwargs): | |||||
| super().__init__() | |||||
| # Model components | |||||
| self.conv_re = nn.Conv2d( | |||||
| in_channel, | |||||
| out_channel, | |||||
| kernel_size, | |||||
| stride=stride, | |||||
| padding=padding, | |||||
| dilation=dilation, | |||||
| groups=groups, | |||||
| bias=bias, | |||||
| **kwargs) | |||||
| self.conv_im = nn.Conv2d( | |||||
| in_channel, | |||||
| out_channel, | |||||
| kernel_size, | |||||
| stride=stride, | |||||
| padding=padding, | |||||
| dilation=dilation, | |||||
| groups=groups, | |||||
| bias=bias, | |||||
| **kwargs) | |||||
| def forward(self, x): | |||||
| r""" | |||||
| Args: | |||||
| x: torch with shape: [batch,channel,axis1,axis2,2] | |||||
| """ | |||||
| real = self.conv_re(x[..., 0]) - self.conv_im(x[..., 1]) | |||||
| imaginary = self.conv_re(x[..., 1]) + self.conv_im(x[..., 0]) | |||||
| output = torch.stack((real, imaginary), dim=-1) | |||||
| return output | |||||
| class ComplexConvTranspose2d(nn.Module): | |||||
| def __init__(self, | |||||
| in_channel, | |||||
| out_channel, | |||||
| kernel_size, | |||||
| stride=1, | |||||
| padding=0, | |||||
| output_padding=0, | |||||
| dilation=1, | |||||
| groups=1, | |||||
| bias=True, | |||||
| **kwargs): | |||||
| super().__init__() | |||||
| # Model components | |||||
| self.tconv_re = nn.ConvTranspose2d( | |||||
| in_channel, | |||||
| out_channel, | |||||
| kernel_size=kernel_size, | |||||
| stride=stride, | |||||
| padding=padding, | |||||
| output_padding=output_padding, | |||||
| groups=groups, | |||||
| bias=bias, | |||||
| dilation=dilation, | |||||
| **kwargs) | |||||
| self.tconv_im = nn.ConvTranspose2d( | |||||
| in_channel, | |||||
| out_channel, | |||||
| kernel_size=kernel_size, | |||||
| stride=stride, | |||||
| padding=padding, | |||||
| output_padding=output_padding, | |||||
| groups=groups, | |||||
| bias=bias, | |||||
| dilation=dilation, | |||||
| **kwargs) | |||||
| def forward(self, x): # shpae of x : [batch,channel,axis1,axis2,2] | |||||
| real = self.tconv_re(x[..., 0]) - self.tconv_im(x[..., 1]) | |||||
| imaginary = self.tconv_re(x[..., 1]) + self.tconv_im(x[..., 0]) | |||||
| output = torch.stack((real, imaginary), dim=-1) | |||||
| return output | |||||
| class ComplexBatchNorm2d(nn.Module): | |||||
| def __init__(self, | |||||
| num_features, | |||||
| eps=1e-5, | |||||
| momentum=0.1, | |||||
| affine=True, | |||||
| track_running_stats=True, | |||||
| **kwargs): | |||||
| super().__init__() | |||||
| self.bn_re = nn.BatchNorm2d( | |||||
| num_features=num_features, | |||||
| momentum=momentum, | |||||
| affine=affine, | |||||
| eps=eps, | |||||
| track_running_stats=track_running_stats, | |||||
| **kwargs) | |||||
| self.bn_im = nn.BatchNorm2d( | |||||
| num_features=num_features, | |||||
| momentum=momentum, | |||||
| affine=affine, | |||||
| eps=eps, | |||||
| track_running_stats=track_running_stats, | |||||
| **kwargs) | |||||
| def forward(self, x): | |||||
| real = self.bn_re(x[..., 0]) | |||||
| imag = self.bn_im(x[..., 1]) | |||||
| output = torch.stack((real, imag), dim=-1) | |||||
| return output | |||||
| @@ -0,0 +1,112 @@ | |||||
| import numpy as np | |||||
| import torch | |||||
| import torch.nn as nn | |||||
| import torch.nn.functional as F | |||||
| from scipy.signal import get_window | |||||
| def init_kernels(win_len, win_inc, fft_len, win_type=None, invers=False): | |||||
| if win_type == 'None' or win_type is None: | |||||
| window = np.ones(win_len) | |||||
| else: | |||||
| window = get_window(win_type, win_len, fftbins=True)**0.5 | |||||
| N = fft_len | |||||
| fourier_basis = np.fft.rfft(np.eye(N))[:win_len] | |||||
| real_kernel = np.real(fourier_basis) | |||||
| imag_kernel = np.imag(fourier_basis) | |||||
| kernel = np.concatenate([real_kernel, imag_kernel], 1).T | |||||
| if invers: | |||||
| kernel = np.linalg.pinv(kernel).T | |||||
| kernel = kernel * window | |||||
| kernel = kernel[:, None, :] | |||||
| return torch.from_numpy(kernel.astype(np.float32)), torch.from_numpy( | |||||
| window[None, :, None].astype(np.float32)) | |||||
| class ConvSTFT(nn.Module): | |||||
| def __init__(self, | |||||
| win_len, | |||||
| win_inc, | |||||
| fft_len=None, | |||||
| win_type='hamming', | |||||
| feature_type='real', | |||||
| fix=True): | |||||
| super(ConvSTFT, self).__init__() | |||||
| if fft_len is None: | |||||
| self.fft_len = np.int(2**np.ceil(np.log2(win_len))) | |||||
| else: | |||||
| self.fft_len = fft_len | |||||
| kernel, _ = init_kernels(win_len, win_inc, self.fft_len, win_type) | |||||
| self.weight = nn.Parameter(kernel, requires_grad=(not fix)) | |||||
| self.feature_type = feature_type | |||||
| self.stride = win_inc | |||||
| self.win_len = win_len | |||||
| self.dim = self.fft_len | |||||
| def forward(self, inputs): | |||||
| if inputs.dim() == 2: | |||||
| inputs = torch.unsqueeze(inputs, 1) | |||||
| outputs = F.conv1d(inputs, self.weight, stride=self.stride) | |||||
| if self.feature_type == 'complex': | |||||
| return outputs | |||||
| else: | |||||
| dim = self.dim // 2 + 1 | |||||
| real = outputs[:, :dim, :] | |||||
| imag = outputs[:, dim:, :] | |||||
| mags = torch.sqrt(real**2 + imag**2) | |||||
| phase = torch.atan2(imag, real) | |||||
| return mags, phase | |||||
| class ConviSTFT(nn.Module): | |||||
| def __init__(self, | |||||
| win_len, | |||||
| win_inc, | |||||
| fft_len=None, | |||||
| win_type='hamming', | |||||
| feature_type='real', | |||||
| fix=True): | |||||
| super(ConviSTFT, self).__init__() | |||||
| if fft_len is None: | |||||
| self.fft_len = np.int(2**np.ceil(np.log2(win_len))) | |||||
| else: | |||||
| self.fft_len = fft_len | |||||
| kernel, window = init_kernels( | |||||
| win_len, win_inc, self.fft_len, win_type, invers=True) | |||||
| self.weight = nn.Parameter(kernel, requires_grad=(not fix)) | |||||
| self.feature_type = feature_type | |||||
| self.win_type = win_type | |||||
| self.win_len = win_len | |||||
| self.win_inc = win_inc | |||||
| self.stride = win_inc | |||||
| self.dim = self.fft_len | |||||
| self.register_buffer('window', window) | |||||
| self.register_buffer('enframe', torch.eye(win_len)[:, None, :]) | |||||
| def forward(self, inputs, phase=None): | |||||
| """ | |||||
| Args: | |||||
| inputs : [B, N+2, T] (complex spec) or [B, N//2+1, T] (mags) | |||||
| phase: [B, N//2+1, T] (if not none) | |||||
| """ | |||||
| if phase is not None: | |||||
| real = inputs * torch.cos(phase) | |||||
| imag = inputs * torch.sin(phase) | |||||
| inputs = torch.cat([real, imag], 1) | |||||
| outputs = F.conv_transpose1d(inputs, self.weight, stride=self.stride) | |||||
| # this is from torch-stft: https://github.com/pseeth/torch-stft | |||||
| t = self.window.repeat(1, 1, inputs.size(-1))**2 | |||||
| coff = F.conv_transpose1d(t, self.enframe, stride=self.stride) | |||||
| outputs = outputs / (coff + 1e-8) | |||||
| return outputs | |||||
| @@ -0,0 +1,309 @@ | |||||
| import os | |||||
| from typing import Dict | |||||
| import torch | |||||
| import torch.nn as nn | |||||
| import torch.nn.functional as F | |||||
| from modelscope.metainfo import Models | |||||
| from modelscope.models.builder import MODELS | |||||
| from modelscope.utils.constant import ModelFile, Tasks | |||||
| from ...base import Model, Tensor | |||||
| from .conv_stft import ConviSTFT, ConvSTFT | |||||
| from .unet import UNet | |||||
| class FTB(nn.Module): | |||||
| def __init__(self, input_dim=257, in_channel=9, r_channel=5): | |||||
| super(FTB, self).__init__() | |||||
| self.in_channel = in_channel | |||||
| self.conv1 = nn.Sequential( | |||||
| nn.Conv2d(in_channel, r_channel, kernel_size=[1, 1]), | |||||
| nn.BatchNorm2d(r_channel), nn.ReLU()) | |||||
| self.conv1d = nn.Sequential( | |||||
| nn.Conv1d( | |||||
| r_channel * input_dim, in_channel, kernel_size=9, padding=4), | |||||
| nn.BatchNorm1d(in_channel), nn.ReLU()) | |||||
| self.freq_fc = nn.Linear(input_dim, input_dim, bias=False) | |||||
| self.conv2 = nn.Sequential( | |||||
| nn.Conv2d(in_channel * 2, in_channel, kernel_size=[1, 1]), | |||||
| nn.BatchNorm2d(in_channel), nn.ReLU()) | |||||
| def forward(self, inputs): | |||||
| ''' | |||||
| inputs should be [Batch, Ca, Dim, Time] | |||||
| ''' | |||||
| # T-F attention | |||||
| conv1_out = self.conv1(inputs) | |||||
| B, C, D, T = conv1_out.size() | |||||
| reshape1_out = torch.reshape(conv1_out, [B, C * D, T]) | |||||
| conv1d_out = self.conv1d(reshape1_out) | |||||
| conv1d_out = torch.reshape(conv1d_out, [B, self.in_channel, 1, T]) | |||||
| # now is also [B,C,D,T] | |||||
| att_out = conv1d_out * inputs | |||||
| # tranpose to [B,C,T,D] | |||||
| att_out = torch.transpose(att_out, 2, 3) | |||||
| freqfc_out = self.freq_fc(att_out) | |||||
| att_out = torch.transpose(freqfc_out, 2, 3) | |||||
| cat_out = torch.cat([att_out, inputs], 1) | |||||
| outputs = self.conv2(cat_out) | |||||
| return outputs | |||||
| @MODELS.register_module( | |||||
| Tasks.speech_signal_process, module_name=Models.speech_frcrn_ans_cirm_16k) | |||||
| class FRCRNModel(Model): | |||||
| r""" A decorator of FRCRN for integrating into modelscope framework """ | |||||
| def __init__(self, model_dir: str, *args, **kwargs): | |||||
| """initialize the frcrn model from the `model_dir` path. | |||||
| Args: | |||||
| model_dir (str): the model path. | |||||
| """ | |||||
| super().__init__(model_dir, *args, **kwargs) | |||||
| self._model = FRCRN(*args, **kwargs) | |||||
| model_bin_file = os.path.join(model_dir, | |||||
| ModelFile.TORCH_MODEL_BIN_FILE) | |||||
| if os.path.exists(model_bin_file): | |||||
| checkpoint = torch.load(model_bin_file) | |||||
| self._model.load_state_dict(checkpoint, strict=False) | |||||
| def forward(self, input: Dict[str, Tensor]) -> Dict[str, Tensor]: | |||||
| output = self._model.forward(input) | |||||
| return { | |||||
| 'spec_l1': output[0], | |||||
| 'wav_l1': output[1], | |||||
| 'mask_l1': output[2], | |||||
| 'spec_l2': output[3], | |||||
| 'wav_l2': output[4], | |||||
| 'mask_l2': output[5] | |||||
| } | |||||
| def to(self, *args, **kwargs): | |||||
| self._model = self._model.to(*args, **kwargs) | |||||
| return self | |||||
| def eval(self): | |||||
| self._model = self._model.train(False) | |||||
| return self | |||||
| class FRCRN(nn.Module): | |||||
| r""" Frequency Recurrent CRN """ | |||||
| def __init__(self, | |||||
| complex, | |||||
| model_complexity, | |||||
| model_depth, | |||||
| log_amp, | |||||
| padding_mode, | |||||
| win_len=400, | |||||
| win_inc=100, | |||||
| fft_len=512, | |||||
| win_type='hanning'): | |||||
| r""" | |||||
| Args: | |||||
| complex: Whether to use complex networks. | |||||
| model_complexity: define the model complexity with the number of layers | |||||
| model_depth: Only two options are available : 10, 20 | |||||
| log_amp: Whether to use log amplitude to estimate signals | |||||
| padding_mode: Encoder's convolution filter. 'zeros', 'reflect' | |||||
| win_len: length of window used for defining one frame of sample points | |||||
| win_inc: length of window shifting (equivalent to hop_size) | |||||
| fft_len: number of Short Time Fourier Transform (STFT) points | |||||
| win_type: windowing type used in STFT, eg. 'hanning', 'hamming' | |||||
| """ | |||||
| super().__init__() | |||||
| self.feat_dim = fft_len // 2 + 1 | |||||
| self.win_len = win_len | |||||
| self.win_inc = win_inc | |||||
| self.fft_len = fft_len | |||||
| self.win_type = win_type | |||||
| fix = True | |||||
| self.stft = ConvSTFT( | |||||
| self.win_len, | |||||
| self.win_inc, | |||||
| self.fft_len, | |||||
| self.win_type, | |||||
| feature_type='complex', | |||||
| fix=fix) | |||||
| self.istft = ConviSTFT( | |||||
| self.win_len, | |||||
| self.win_inc, | |||||
| self.fft_len, | |||||
| self.win_type, | |||||
| feature_type='complex', | |||||
| fix=fix) | |||||
| self.unet = UNet( | |||||
| 1, | |||||
| complex=complex, | |||||
| model_complexity=model_complexity, | |||||
| model_depth=model_depth, | |||||
| padding_mode=padding_mode) | |||||
| self.unet2 = UNet( | |||||
| 1, | |||||
| complex=complex, | |||||
| model_complexity=model_complexity, | |||||
| model_depth=model_depth, | |||||
| padding_mode=padding_mode) | |||||
| def forward(self, inputs): | |||||
| out_list = [] | |||||
| # [B, D*2, T] | |||||
| cmp_spec = self.stft(inputs) | |||||
| # [B, 1, D*2, T] | |||||
| cmp_spec = torch.unsqueeze(cmp_spec, 1) | |||||
| # to [B, 2, D, T] real_part/imag_part | |||||
| cmp_spec = torch.cat([ | |||||
| cmp_spec[:, :, :self.feat_dim, :], | |||||
| cmp_spec[:, :, self.feat_dim:, :], | |||||
| ], 1) | |||||
| # [B, 2, D, T] | |||||
| cmp_spec = torch.unsqueeze(cmp_spec, 4) | |||||
| # [B, 1, D, T, 2] | |||||
| cmp_spec = torch.transpose(cmp_spec, 1, 4) | |||||
| unet1_out = self.unet(cmp_spec) | |||||
| cmp_mask1 = torch.tanh(unet1_out) | |||||
| unet2_out = self.unet2(unet1_out) | |||||
| cmp_mask2 = torch.tanh(unet2_out) | |||||
| est_spec, est_wav, est_mask = self.apply_mask(cmp_spec, cmp_mask1) | |||||
| out_list.append(est_spec) | |||||
| out_list.append(est_wav) | |||||
| out_list.append(est_mask) | |||||
| cmp_mask2 = cmp_mask2 + cmp_mask1 | |||||
| est_spec, est_wav, est_mask = self.apply_mask(cmp_spec, cmp_mask2) | |||||
| out_list.append(est_spec) | |||||
| out_list.append(est_wav) | |||||
| out_list.append(est_mask) | |||||
| return out_list | |||||
| def apply_mask(self, cmp_spec, cmp_mask): | |||||
| est_spec = torch.cat([ | |||||
| cmp_spec[:, :, :, :, 0] * cmp_mask[:, :, :, :, 0] | |||||
| - cmp_spec[:, :, :, :, 1] * cmp_mask[:, :, :, :, 1], | |||||
| cmp_spec[:, :, :, :, 0] * cmp_mask[:, :, :, :, 1] | |||||
| + cmp_spec[:, :, :, :, 1] * cmp_mask[:, :, :, :, 0] | |||||
| ], 1) | |||||
| est_spec = torch.cat([est_spec[:, 0, :, :], est_spec[:, 1, :, :]], 1) | |||||
| cmp_mask = torch.squeeze(cmp_mask, 1) | |||||
| cmp_mask = torch.cat([cmp_mask[:, :, :, 0], cmp_mask[:, :, :, 1]], 1) | |||||
| est_wav = self.istft(est_spec) | |||||
| est_wav = torch.squeeze(est_wav, 1) | |||||
| return est_spec, est_wav, cmp_mask | |||||
| def get_params(self, weight_decay=0.0): | |||||
| # add L2 penalty | |||||
| weights, biases = [], [] | |||||
| for name, param in self.named_parameters(): | |||||
| if 'bias' in name: | |||||
| biases += [param] | |||||
| else: | |||||
| weights += [param] | |||||
| params = [{ | |||||
| 'params': weights, | |||||
| 'weight_decay': weight_decay, | |||||
| }, { | |||||
| 'params': biases, | |||||
| 'weight_decay': 0.0, | |||||
| }] | |||||
| return params | |||||
| def loss(self, noisy, labels, out_list, mode='Mix'): | |||||
| if mode == 'SiSNR': | |||||
| count = 0 | |||||
| while count < len(out_list): | |||||
| est_spec = out_list[count] | |||||
| count = count + 1 | |||||
| est_wav = out_list[count] | |||||
| count = count + 1 | |||||
| est_mask = out_list[count] | |||||
| count = count + 1 | |||||
| if count != 3: | |||||
| loss = self.loss_1layer(noisy, est_spec, est_wav, labels, | |||||
| est_mask, mode) | |||||
| return loss | |||||
| elif mode == 'Mix': | |||||
| count = 0 | |||||
| while count < len(out_list): | |||||
| est_spec = out_list[count] | |||||
| count = count + 1 | |||||
| est_wav = out_list[count] | |||||
| count = count + 1 | |||||
| est_mask = out_list[count] | |||||
| count = count + 1 | |||||
| if count != 3: | |||||
| amp_loss, phase_loss, SiSNR_loss = self.loss_1layer( | |||||
| noisy, est_spec, est_wav, labels, est_mask, mode) | |||||
| loss = amp_loss + phase_loss + SiSNR_loss | |||||
| return loss, amp_loss, phase_loss | |||||
| def loss_1layer(self, noisy, est, est_wav, labels, cmp_mask, mode='Mix'): | |||||
| r""" Compute the loss by mode | |||||
| mode == 'Mix' | |||||
| est: [B, F*2, T] | |||||
| labels: [B, F*2,T] | |||||
| mode == 'SiSNR' | |||||
| est: [B, T] | |||||
| labels: [B, T] | |||||
| """ | |||||
| if mode == 'SiSNR': | |||||
| if labels.dim() == 3: | |||||
| labels = torch.squeeze(labels, 1) | |||||
| if est_wav.dim() == 3: | |||||
| est_wav = torch.squeeze(est_wav, 1) | |||||
| return -si_snr(est_wav, labels) | |||||
| elif mode == 'Mix': | |||||
| if labels.dim() == 3: | |||||
| labels = torch.squeeze(labels, 1) | |||||
| if est_wav.dim() == 3: | |||||
| est_wav = torch.squeeze(est_wav, 1) | |||||
| SiSNR_loss = -si_snr(est_wav, labels) | |||||
| b, d, t = est.size() | |||||
| S = self.stft(labels) | |||||
| Sr = S[:, :self.feat_dim, :] | |||||
| Si = S[:, self.feat_dim:, :] | |||||
| Y = self.stft(noisy) | |||||
| Yr = Y[:, :self.feat_dim, :] | |||||
| Yi = Y[:, self.feat_dim:, :] | |||||
| Y_pow = Yr**2 + Yi**2 | |||||
| gth_mask = torch.cat([(Sr * Yr + Si * Yi) / (Y_pow + 1e-8), | |||||
| (Si * Yr - Sr * Yi) / (Y_pow + 1e-8)], 1) | |||||
| gth_mask[gth_mask > 2] = 1 | |||||
| gth_mask[gth_mask < -2] = -1 | |||||
| amp_loss = F.mse_loss(gth_mask[:, :self.feat_dim, :], | |||||
| cmp_mask[:, :self.feat_dim, :]) * d | |||||
| phase_loss = F.mse_loss(gth_mask[:, self.feat_dim:, :], | |||||
| cmp_mask[:, self.feat_dim:, :]) * d | |||||
| return amp_loss, phase_loss, SiSNR_loss | |||||
| def l2_norm(s1, s2): | |||||
| norm = torch.sum(s1 * s2, -1, keepdim=True) | |||||
| return norm | |||||
| def si_snr(s1, s2, eps=1e-8): | |||||
| s1_s2_norm = l2_norm(s1, s2) | |||||
| s2_s2_norm = l2_norm(s2, s2) | |||||
| s_target = s1_s2_norm / (s2_s2_norm + eps) * s2 | |||||
| e_nosie = s1 - s_target | |||||
| target_norm = l2_norm(s_target, s_target) | |||||
| noise_norm = l2_norm(e_nosie, e_nosie) | |||||
| snr = 10 * torch.log10((target_norm) / (noise_norm + eps) + eps) | |||||
| return torch.mean(snr) | |||||
| @@ -0,0 +1,26 @@ | |||||
| import torch | |||||
| from torch import nn | |||||
| class SELayer(nn.Module): | |||||
| def __init__(self, channel, reduction=16): | |||||
| super(SELayer, self).__init__() | |||||
| self.avg_pool = nn.AdaptiveAvgPool2d(1) | |||||
| self.fc_r = nn.Sequential( | |||||
| nn.Linear(channel, channel // reduction), nn.ReLU(inplace=True), | |||||
| nn.Linear(channel // reduction, channel), nn.Sigmoid()) | |||||
| self.fc_i = nn.Sequential( | |||||
| nn.Linear(channel, channel // reduction), nn.ReLU(inplace=True), | |||||
| nn.Linear(channel // reduction, channel), nn.Sigmoid()) | |||||
| def forward(self, x): | |||||
| b, c, _, _, _ = x.size() | |||||
| x_r = self.avg_pool(x[:, :, :, :, 0]).view(b, c) | |||||
| x_i = self.avg_pool(x[:, :, :, :, 1]).view(b, c) | |||||
| y_r = self.fc_r(x_r).view(b, c, 1, 1, 1) - self.fc_i(x_i).view( | |||||
| b, c, 1, 1, 1) | |||||
| y_i = self.fc_r(x_i).view(b, c, 1, 1, 1) + self.fc_i(x_r).view( | |||||
| b, c, 1, 1, 1) | |||||
| y = torch.cat([y_r, y_i], 4) | |||||
| return x * y | |||||
| @@ -0,0 +1,269 @@ | |||||
| import torch | |||||
| import torch.nn as nn | |||||
| from . import complex_nn | |||||
| from .se_module_complex import SELayer | |||||
| class Encoder(nn.Module): | |||||
| def __init__(self, | |||||
| in_channels, | |||||
| out_channels, | |||||
| kernel_size, | |||||
| stride, | |||||
| padding=None, | |||||
| complex=False, | |||||
| padding_mode='zeros'): | |||||
| super().__init__() | |||||
| if padding is None: | |||||
| padding = [(i - 1) // 2 for i in kernel_size] # 'SAME' padding | |||||
| if complex: | |||||
| conv = complex_nn.ComplexConv2d | |||||
| bn = complex_nn.ComplexBatchNorm2d | |||||
| else: | |||||
| conv = nn.Conv2d | |||||
| bn = nn.BatchNorm2d | |||||
| self.conv = conv( | |||||
| in_channels, | |||||
| out_channels, | |||||
| kernel_size=kernel_size, | |||||
| stride=stride, | |||||
| padding=padding, | |||||
| padding_mode=padding_mode) | |||||
| self.bn = bn(out_channels) | |||||
| self.relu = nn.LeakyReLU(inplace=True) | |||||
| def forward(self, x): | |||||
| x = self.conv(x) | |||||
| x = self.bn(x) | |||||
| x = self.relu(x) | |||||
| return x | |||||
| class Decoder(nn.Module): | |||||
| def __init__(self, | |||||
| in_channels, | |||||
| out_channels, | |||||
| kernel_size, | |||||
| stride, | |||||
| padding=(0, 0), | |||||
| complex=False): | |||||
| super().__init__() | |||||
| if complex: | |||||
| tconv = complex_nn.ComplexConvTranspose2d | |||||
| bn = complex_nn.ComplexBatchNorm2d | |||||
| else: | |||||
| tconv = nn.ConvTranspose2d | |||||
| bn = nn.BatchNorm2d | |||||
| self.transconv = tconv( | |||||
| in_channels, | |||||
| out_channels, | |||||
| kernel_size=kernel_size, | |||||
| stride=stride, | |||||
| padding=padding) | |||||
| self.bn = bn(out_channels) | |||||
| self.relu = nn.LeakyReLU(inplace=True) | |||||
| def forward(self, x): | |||||
| x = self.transconv(x) | |||||
| x = self.bn(x) | |||||
| x = self.relu(x) | |||||
| return x | |||||
| class UNet(nn.Module): | |||||
| def __init__(self, | |||||
| input_channels=1, | |||||
| complex=False, | |||||
| model_complexity=45, | |||||
| model_depth=20, | |||||
| padding_mode='zeros'): | |||||
| super().__init__() | |||||
| if complex: | |||||
| model_complexity = int(model_complexity // 1.414) | |||||
| self.set_size( | |||||
| model_complexity=model_complexity, | |||||
| input_channels=input_channels, | |||||
| model_depth=model_depth) | |||||
| self.encoders = [] | |||||
| self.model_length = model_depth // 2 | |||||
| self.fsmn = complex_nn.ComplexUniDeepFsmn(128, 128, 128) | |||||
| self.se_layers_enc = [] | |||||
| self.fsmn_enc = [] | |||||
| for i in range(self.model_length): | |||||
| fsmn_enc = complex_nn.ComplexUniDeepFsmn_L1(128, 128, 128) | |||||
| self.add_module('fsmn_enc{}'.format(i), fsmn_enc) | |||||
| self.fsmn_enc.append(fsmn_enc) | |||||
| module = Encoder( | |||||
| self.enc_channels[i], | |||||
| self.enc_channels[i + 1], | |||||
| kernel_size=self.enc_kernel_sizes[i], | |||||
| stride=self.enc_strides[i], | |||||
| padding=self.enc_paddings[i], | |||||
| complex=complex, | |||||
| padding_mode=padding_mode) | |||||
| self.add_module('encoder{}'.format(i), module) | |||||
| self.encoders.append(module) | |||||
| se_layer_enc = SELayer(self.enc_channels[i + 1], 8) | |||||
| self.add_module('se_layer_enc{}'.format(i), se_layer_enc) | |||||
| self.se_layers_enc.append(se_layer_enc) | |||||
| self.decoders = [] | |||||
| self.fsmn_dec = [] | |||||
| self.se_layers_dec = [] | |||||
| for i in range(self.model_length): | |||||
| fsmn_dec = complex_nn.ComplexUniDeepFsmn_L1(128, 128, 128) | |||||
| self.add_module('fsmn_dec{}'.format(i), fsmn_dec) | |||||
| self.fsmn_dec.append(fsmn_dec) | |||||
| module = Decoder( | |||||
| self.dec_channels[i] * 2, | |||||
| self.dec_channels[i + 1], | |||||
| kernel_size=self.dec_kernel_sizes[i], | |||||
| stride=self.dec_strides[i], | |||||
| padding=self.dec_paddings[i], | |||||
| complex=complex) | |||||
| self.add_module('decoder{}'.format(i), module) | |||||
| self.decoders.append(module) | |||||
| if i < self.model_length - 1: | |||||
| se_layer_dec = SELayer(self.dec_channels[i + 1], 8) | |||||
| self.add_module('se_layer_dec{}'.format(i), se_layer_dec) | |||||
| self.se_layers_dec.append(se_layer_dec) | |||||
| if complex: | |||||
| conv = complex_nn.ComplexConv2d | |||||
| else: | |||||
| conv = nn.Conv2d | |||||
| linear = conv(self.dec_channels[-1], 1, 1) | |||||
| self.add_module('linear', linear) | |||||
| self.complex = complex | |||||
| self.padding_mode = padding_mode | |||||
| self.decoders = nn.ModuleList(self.decoders) | |||||
| self.encoders = nn.ModuleList(self.encoders) | |||||
| self.se_layers_enc = nn.ModuleList(self.se_layers_enc) | |||||
| self.se_layers_dec = nn.ModuleList(self.se_layers_dec) | |||||
| self.fsmn_enc = nn.ModuleList(self.fsmn_enc) | |||||
| self.fsmn_dec = nn.ModuleList(self.fsmn_dec) | |||||
| def forward(self, inputs): | |||||
| x = inputs | |||||
| # go down | |||||
| xs = [] | |||||
| xs_se = [] | |||||
| xs_se.append(x) | |||||
| for i, encoder in enumerate(self.encoders): | |||||
| xs.append(x) | |||||
| if i > 0: | |||||
| x = self.fsmn_enc[i](x) | |||||
| x = encoder(x) | |||||
| xs_se.append(self.se_layers_enc[i](x)) | |||||
| # xs : x0=input x1 ... x9 | |||||
| x = self.fsmn(x) | |||||
| p = x | |||||
| for i, decoder in enumerate(self.decoders): | |||||
| p = decoder(p) | |||||
| if i < self.model_length - 1: | |||||
| p = self.fsmn_dec[i](p) | |||||
| if i == self.model_length - 1: | |||||
| break | |||||
| if i < self.model_length - 2: | |||||
| p = self.se_layers_dec[i](p) | |||||
| p = torch.cat([p, xs_se[self.model_length - 1 - i]], dim=1) | |||||
| # cmp_spec: [12, 1, 513, 64, 2] | |||||
| cmp_spec = self.linear(p) | |||||
| return cmp_spec | |||||
| def set_size(self, model_complexity, model_depth=20, input_channels=1): | |||||
| if model_depth == 14: | |||||
| self.enc_channels = [ | |||||
| input_channels, 128, 128, 128, 128, 128, 128, 128 | |||||
| ] | |||||
| self.enc_kernel_sizes = [(5, 2), (5, 2), (5, 2), (5, 2), (5, 2), | |||||
| (5, 2), (2, 2)] | |||||
| self.enc_strides = [(2, 1), (2, 1), (2, 1), (2, 1), (2, 1), (2, 1), | |||||
| (2, 1)] | |||||
| self.enc_paddings = [(0, 1), (0, 1), (0, 1), (0, 1), (0, 1), | |||||
| (0, 1), (0, 1)] | |||||
| self.dec_channels = [64, 128, 128, 128, 128, 128, 128, 1] | |||||
| self.dec_kernel_sizes = [(2, 2), (5, 2), (5, 2), (5, 2), (6, 2), | |||||
| (5, 2), (5, 2)] | |||||
| self.dec_strides = [(2, 1), (2, 1), (2, 1), (2, 1), (2, 1), (2, 1), | |||||
| (2, 1)] | |||||
| self.dec_paddings = [(0, 1), (0, 1), (0, 1), (0, 1), (0, 1), | |||||
| (0, 1), (0, 1)] | |||||
| elif model_depth == 10: | |||||
| self.enc_channels = [ | |||||
| input_channels, | |||||
| 16, | |||||
| 32, | |||||
| 64, | |||||
| 128, | |||||
| 256, | |||||
| ] | |||||
| self.enc_kernel_sizes = [(3, 3), (3, 3), (3, 3), (3, 3), (3, 3)] | |||||
| self.enc_strides = [(2, 1), (2, 1), (2, 1), (2, 1), (2, 1)] | |||||
| self.enc_paddings = [(0, 1), (0, 1), (0, 1), (0, 1), (0, 1)] | |||||
| self.dec_channels = [128, 128, 64, 32, 16, 1] | |||||
| self.dec_kernel_sizes = [(3, 3), (3, 3), (3, 3), (4, 3), (3, 3)] | |||||
| self.dec_strides = [(2, 1), (2, 1), (2, 1), (2, 1), (2, 1)] | |||||
| self.dec_paddings = [(0, 1), (0, 1), (0, 1), (0, 1), (0, 1)] | |||||
| elif model_depth == 20: | |||||
| self.enc_channels = [ | |||||
| input_channels, model_complexity, model_complexity, | |||||
| model_complexity * 2, model_complexity * 2, | |||||
| model_complexity * 2, model_complexity * 2, | |||||
| model_complexity * 2, model_complexity * 2, | |||||
| model_complexity * 2, 128 | |||||
| ] | |||||
| self.enc_kernel_sizes = [(7, 1), (1, 7), (6, 4), (7, 5), (5, 3), | |||||
| (5, 3), (5, 3), (5, 3), (5, 3), (5, 3)] | |||||
| self.enc_strides = [(1, 1), (1, 1), (2, 2), (2, 1), (2, 2), (2, 1), | |||||
| (2, 2), (2, 1), (2, 2), (2, 1)] | |||||
| self.enc_paddings = [ | |||||
| (3, 0), | |||||
| (0, 3), | |||||
| None, # (0, 2), | |||||
| None, | |||||
| None, # (3,1), | |||||
| None, # (3,1), | |||||
| None, # (1,2), | |||||
| None, | |||||
| None, | |||||
| None | |||||
| ] | |||||
| self.dec_channels = [ | |||||
| 0, model_complexity * 2, model_complexity * 2, | |||||
| model_complexity * 2, model_complexity * 2, | |||||
| model_complexity * 2, model_complexity * 2, | |||||
| model_complexity * 2, model_complexity * 2, | |||||
| model_complexity * 2, model_complexity * 2, | |||||
| model_complexity * 2 | |||||
| ] | |||||
| self.dec_kernel_sizes = [(4, 3), (4, 2), (4, 3), (4, 2), (4, 3), | |||||
| (4, 2), (6, 3), (7, 4), (1, 7), (7, 1)] | |||||
| self.dec_strides = [(2, 1), (2, 2), (2, 1), (2, 2), (2, 1), (2, 2), | |||||
| (2, 1), (2, 2), (1, 1), (1, 1)] | |||||
| self.dec_paddings = [(1, 1), (1, 0), (1, 1), (1, 0), (1, 1), | |||||
| (1, 0), (2, 1), (2, 1), (0, 3), (3, 0)] | |||||
| else: | |||||
| raise ValueError('Unknown model depth : {}'.format(model_depth)) | |||||
| @@ -0,0 +1,430 @@ | |||||
| import math | |||||
| import torch | |||||
| import torch.nn as nn | |||||
| from .splat import SplAtConv2d | |||||
| __all__ = ['ResNet', 'Bottleneck'] | |||||
| class DropBlock2D(object): | |||||
| def __init__(self, *args, **kwargs): | |||||
| raise NotImplementedError | |||||
| class GlobalAvgPool2d(nn.Module): | |||||
| def __init__(self): | |||||
| """Global average pooling over the input's spatial dimensions""" | |||||
| super(GlobalAvgPool2d, self).__init__() | |||||
| def forward(self, inputs): | |||||
| return nn.functional.adaptive_avg_pool2d(inputs, | |||||
| 1).view(inputs.size(0), -1) | |||||
| class Bottleneck(nn.Module): | |||||
| expansion = 4 | |||||
| def __init__(self, | |||||
| inplanes, | |||||
| planes, | |||||
| stride=1, | |||||
| downsample=None, | |||||
| radix=1, | |||||
| cardinality=1, | |||||
| bottleneck_width=64, | |||||
| avd=False, | |||||
| avd_first=False, | |||||
| dilation=1, | |||||
| is_first=False, | |||||
| rectified_conv=False, | |||||
| rectify_avg=False, | |||||
| norm_layer=None, | |||||
| dropblock_prob=0.0, | |||||
| last_gamma=False): | |||||
| super(Bottleneck, self).__init__() | |||||
| group_width = int(planes * (bottleneck_width / 64.)) * cardinality | |||||
| self.conv1 = nn.Conv2d( | |||||
| inplanes, group_width, kernel_size=1, bias=False) | |||||
| self.bn1 = norm_layer(group_width) | |||||
| self.dropblock_prob = dropblock_prob | |||||
| self.radix = radix | |||||
| self.avd = avd and (stride > 1 or is_first) | |||||
| self.avd_first = avd_first | |||||
| if self.avd: | |||||
| self.avd_layer = nn.AvgPool2d(3, stride, padding=1) | |||||
| stride = 1 | |||||
| if dropblock_prob > 0.0: | |||||
| self.dropblock1 = DropBlock2D(dropblock_prob, 3) | |||||
| if radix == 1: | |||||
| self.dropblock2 = DropBlock2D(dropblock_prob, 3) | |||||
| self.dropblock3 = DropBlock2D(dropblock_prob, 3) | |||||
| if radix >= 1: | |||||
| self.conv2 = SplAtConv2d( | |||||
| group_width, | |||||
| group_width, | |||||
| kernel_size=3, | |||||
| stride=stride, | |||||
| padding=dilation, | |||||
| dilation=dilation, | |||||
| groups=cardinality, | |||||
| bias=False, | |||||
| radix=radix, | |||||
| rectify=rectified_conv, | |||||
| rectify_avg=rectify_avg, | |||||
| norm_layer=norm_layer, | |||||
| dropblock_prob=dropblock_prob) | |||||
| elif rectified_conv: | |||||
| from rfconv import RFConv2d | |||||
| self.conv2 = RFConv2d( | |||||
| group_width, | |||||
| group_width, | |||||
| kernel_size=3, | |||||
| stride=stride, | |||||
| padding=dilation, | |||||
| dilation=dilation, | |||||
| groups=cardinality, | |||||
| bias=False, | |||||
| average_mode=rectify_avg) | |||||
| self.bn2 = norm_layer(group_width) | |||||
| else: | |||||
| self.conv2 = nn.Conv2d( | |||||
| group_width, | |||||
| group_width, | |||||
| kernel_size=3, | |||||
| stride=stride, | |||||
| padding=dilation, | |||||
| dilation=dilation, | |||||
| groups=cardinality, | |||||
| bias=False) | |||||
| self.bn2 = norm_layer(group_width) | |||||
| self.conv3 = nn.Conv2d( | |||||
| group_width, planes * 4, kernel_size=1, bias=False) | |||||
| self.bn3 = norm_layer(planes * 4) | |||||
| if last_gamma: | |||||
| from torch.nn.init import zeros_ | |||||
| zeros_(self.bn3.weight) | |||||
| self.relu = nn.ReLU(inplace=True) | |||||
| self.downsample = downsample | |||||
| self.dilation = dilation | |||||
| self.stride = stride | |||||
| def forward(self, x): | |||||
| residual = x | |||||
| out = self.conv1(x) | |||||
| out = self.bn1(out) | |||||
| if self.dropblock_prob > 0.0: | |||||
| out = self.dropblock1(out) | |||||
| out = self.relu(out) | |||||
| if self.avd and self.avd_first: | |||||
| out = self.avd_layer(out) | |||||
| out = self.conv2(out) | |||||
| if self.radix == 0: | |||||
| out = self.bn2(out) | |||||
| if self.dropblock_prob > 0.0: | |||||
| out = self.dropblock2(out) | |||||
| out = self.relu(out) | |||||
| if self.avd and not self.avd_first: | |||||
| out = self.avd_layer(out) | |||||
| out = self.conv3(out) | |||||
| out = self.bn3(out) | |||||
| if self.dropblock_prob > 0.0: | |||||
| out = self.dropblock3(out) | |||||
| if self.downsample is not None: | |||||
| residual = self.downsample(x) | |||||
| out += residual | |||||
| out = self.relu(out) | |||||
| return out | |||||
| class ResNet(nn.Module): | |||||
| def __init__(self, | |||||
| block, | |||||
| layers, | |||||
| radix=1, | |||||
| groups=1, | |||||
| bottleneck_width=64, | |||||
| num_classes=1000, | |||||
| dilated=False, | |||||
| dilation=1, | |||||
| deep_stem=False, | |||||
| stem_width=64, | |||||
| avg_down=False, | |||||
| rectified_conv=False, | |||||
| rectify_avg=False, | |||||
| avd=False, | |||||
| avd_first=False, | |||||
| final_drop=0.0, | |||||
| dropblock_prob=0, | |||||
| last_gamma=False, | |||||
| norm_layer=nn.BatchNorm2d): | |||||
| self.cardinality = groups | |||||
| self.bottleneck_width = bottleneck_width | |||||
| # ResNet-D params | |||||
| self.inplanes = stem_width * 2 if deep_stem else 64 | |||||
| self.avg_down = avg_down | |||||
| self.last_gamma = last_gamma | |||||
| # ResNeSt params | |||||
| self.radix = radix | |||||
| self.avd = avd | |||||
| self.avd_first = avd_first | |||||
| super(ResNet, self).__init__() | |||||
| self.rectified_conv = rectified_conv | |||||
| self.rectify_avg = rectify_avg | |||||
| if rectified_conv: | |||||
| from rfconv import RFConv2d | |||||
| conv_layer = RFConv2d | |||||
| else: | |||||
| conv_layer = nn.Conv2d | |||||
| conv_kwargs = {'average_mode': rectify_avg} if rectified_conv else {} | |||||
| if deep_stem: | |||||
| self.conv1 = nn.Sequential( | |||||
| conv_layer( | |||||
| 3, | |||||
| stem_width, | |||||
| kernel_size=3, | |||||
| stride=2, | |||||
| padding=1, | |||||
| bias=False, | |||||
| **conv_kwargs), | |||||
| norm_layer(stem_width), | |||||
| nn.ReLU(inplace=True), | |||||
| conv_layer( | |||||
| stem_width, | |||||
| stem_width, | |||||
| kernel_size=3, | |||||
| stride=1, | |||||
| padding=1, | |||||
| bias=False, | |||||
| **conv_kwargs), | |||||
| norm_layer(stem_width), | |||||
| nn.ReLU(inplace=True), | |||||
| conv_layer( | |||||
| stem_width, | |||||
| stem_width * 2, | |||||
| kernel_size=3, | |||||
| stride=1, | |||||
| padding=1, | |||||
| bias=False, | |||||
| **conv_kwargs), | |||||
| ) | |||||
| else: | |||||
| self.conv1 = conv_layer( | |||||
| 3, | |||||
| 64, | |||||
| kernel_size=7, | |||||
| stride=2, | |||||
| padding=3, | |||||
| bias=False, | |||||
| **conv_kwargs) | |||||
| self.bn1 = norm_layer(self.inplanes) | |||||
| self.relu = nn.ReLU(inplace=True) | |||||
| self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) | |||||
| self.layer1 = self._make_layer( | |||||
| block, 64, layers[0], norm_layer=norm_layer, is_first=False) | |||||
| self.layer2 = self._make_layer( | |||||
| block, 128, layers[1], stride=2, norm_layer=norm_layer) | |||||
| if dilated or dilation == 4: | |||||
| self.layer3 = self._make_layer( | |||||
| block, | |||||
| 256, | |||||
| layers[2], | |||||
| stride=1, | |||||
| dilation=2, | |||||
| norm_layer=norm_layer, | |||||
| dropblock_prob=dropblock_prob) | |||||
| self.layer4 = self._make_layer( | |||||
| block, | |||||
| 512, | |||||
| layers[3], | |||||
| stride=1, | |||||
| dilation=4, | |||||
| norm_layer=norm_layer, | |||||
| dropblock_prob=dropblock_prob) | |||||
| elif dilation == 2: | |||||
| self.layer3 = self._make_layer( | |||||
| block, | |||||
| 256, | |||||
| layers[2], | |||||
| stride=2, | |||||
| dilation=1, | |||||
| norm_layer=norm_layer, | |||||
| dropblock_prob=dropblock_prob) | |||||
| self.layer4 = self._make_layer( | |||||
| block, | |||||
| 512, | |||||
| layers[3], | |||||
| stride=1, | |||||
| dilation=2, | |||||
| norm_layer=norm_layer, | |||||
| dropblock_prob=dropblock_prob) | |||||
| else: | |||||
| self.layer3 = self._make_layer( | |||||
| block, | |||||
| 256, | |||||
| layers[2], | |||||
| stride=2, | |||||
| norm_layer=norm_layer, | |||||
| dropblock_prob=dropblock_prob) | |||||
| self.layer4 = self._make_layer( | |||||
| block, | |||||
| 512, | |||||
| layers[3], | |||||
| stride=2, | |||||
| norm_layer=norm_layer, | |||||
| dropblock_prob=dropblock_prob) | |||||
| self.avgpool = GlobalAvgPool2d() | |||||
| self.drop = nn.Dropout(final_drop) if final_drop > 0.0 else None | |||||
| self.fc = nn.Linear(512 * block.expansion, num_classes) | |||||
| for m in self.modules(): | |||||
| if isinstance(m, nn.Conv2d): | |||||
| n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels | |||||
| m.weight.data.normal_(0, math.sqrt(2. / n)) | |||||
| elif isinstance(m, norm_layer): | |||||
| m.weight.data.fill_(1) | |||||
| m.bias.data.zero_() | |||||
| def _make_layer(self, | |||||
| block, | |||||
| planes, | |||||
| blocks, | |||||
| stride=1, | |||||
| dilation=1, | |||||
| norm_layer=None, | |||||
| dropblock_prob=0.0, | |||||
| is_first=True): | |||||
| downsample = None | |||||
| if stride != 1 or self.inplanes != planes * block.expansion: | |||||
| down_layers = [] | |||||
| if self.avg_down: | |||||
| if dilation == 1: | |||||
| down_layers.append( | |||||
| nn.AvgPool2d( | |||||
| kernel_size=stride, | |||||
| stride=stride, | |||||
| ceil_mode=True, | |||||
| count_include_pad=False)) | |||||
| else: | |||||
| down_layers.append( | |||||
| nn.AvgPool2d( | |||||
| kernel_size=1, | |||||
| stride=1, | |||||
| ceil_mode=True, | |||||
| count_include_pad=False)) | |||||
| down_layers.append( | |||||
| nn.Conv2d( | |||||
| self.inplanes, | |||||
| planes * block.expansion, | |||||
| kernel_size=1, | |||||
| stride=1, | |||||
| bias=False)) | |||||
| else: | |||||
| down_layers.append( | |||||
| nn.Conv2d( | |||||
| self.inplanes, | |||||
| planes * block.expansion, | |||||
| kernel_size=1, | |||||
| stride=stride, | |||||
| bias=False)) | |||||
| down_layers.append(norm_layer(planes * block.expansion)) | |||||
| downsample = nn.Sequential(*down_layers) | |||||
| layers = [] | |||||
| if dilation == 1 or dilation == 2: | |||||
| layers.append( | |||||
| block( | |||||
| self.inplanes, | |||||
| planes, | |||||
| stride, | |||||
| downsample=downsample, | |||||
| radix=self.radix, | |||||
| cardinality=self.cardinality, | |||||
| bottleneck_width=self.bottleneck_width, | |||||
| avd=self.avd, | |||||
| avd_first=self.avd_first, | |||||
| dilation=1, | |||||
| is_first=is_first, | |||||
| rectified_conv=self.rectified_conv, | |||||
| rectify_avg=self.rectify_avg, | |||||
| norm_layer=norm_layer, | |||||
| dropblock_prob=dropblock_prob, | |||||
| last_gamma=self.last_gamma)) | |||||
| elif dilation == 4: | |||||
| layers.append( | |||||
| block( | |||||
| self.inplanes, | |||||
| planes, | |||||
| stride, | |||||
| downsample=downsample, | |||||
| radix=self.radix, | |||||
| cardinality=self.cardinality, | |||||
| bottleneck_width=self.bottleneck_width, | |||||
| avd=self.avd, | |||||
| avd_first=self.avd_first, | |||||
| dilation=2, | |||||
| is_first=is_first, | |||||
| rectified_conv=self.rectified_conv, | |||||
| rectify_avg=self.rectify_avg, | |||||
| norm_layer=norm_layer, | |||||
| dropblock_prob=dropblock_prob, | |||||
| last_gamma=self.last_gamma)) | |||||
| else: | |||||
| raise RuntimeError('=> unknown dilation size: {}'.format(dilation)) | |||||
| self.inplanes = planes * block.expansion | |||||
| for i in range(1, blocks): | |||||
| layers.append( | |||||
| block( | |||||
| self.inplanes, | |||||
| planes, | |||||
| radix=self.radix, | |||||
| cardinality=self.cardinality, | |||||
| bottleneck_width=self.bottleneck_width, | |||||
| avd=self.avd, | |||||
| avd_first=self.avd_first, | |||||
| dilation=dilation, | |||||
| rectified_conv=self.rectified_conv, | |||||
| rectify_avg=self.rectify_avg, | |||||
| norm_layer=norm_layer, | |||||
| dropblock_prob=dropblock_prob, | |||||
| last_gamma=self.last_gamma)) | |||||
| return nn.Sequential(*layers) | |||||
| def forward(self, x): | |||||
| x = self.conv1(x) | |||||
| x = self.bn1(x) | |||||
| x = self.relu(x) | |||||
| x = self.maxpool(x) | |||||
| x = self.layer1(x) | |||||
| x = self.layer2(x) | |||||
| x = self.layer3(x) | |||||
| x = self.layer4(x) | |||||
| x = self.avgpool(x) | |||||
| x = torch.flatten(x, 1) | |||||
| if self.drop: | |||||
| x = self.drop(x) | |||||
| x = self.fc(x) | |||||
| return x | |||||
| @@ -0,0 +1,125 @@ | |||||
| """Split-Attention""" | |||||
| import torch | |||||
| import torch.nn.functional as F | |||||
| from torch import nn | |||||
| from torch.nn import BatchNorm2d, Conv2d, Linear, Module, ReLU | |||||
| from torch.nn.modules.utils import _pair | |||||
| __all__ = ['SplAtConv2d'] | |||||
| class SplAtConv2d(Module): | |||||
| """Split-Attention Conv2d | |||||
| """ | |||||
| def __init__(self, | |||||
| in_channels, | |||||
| channels, | |||||
| kernel_size, | |||||
| stride=(1, 1), | |||||
| padding=(0, 0), | |||||
| dilation=(1, 1), | |||||
| groups=1, | |||||
| bias=True, | |||||
| radix=2, | |||||
| reduction_factor=4, | |||||
| rectify=False, | |||||
| rectify_avg=False, | |||||
| norm_layer=None, | |||||
| dropblock_prob=0.0, | |||||
| **kwargs): | |||||
| super(SplAtConv2d, self).__init__() | |||||
| padding = _pair(padding) | |||||
| self.rectify = rectify and (padding[0] > 0 or padding[1] > 0) | |||||
| self.rectify_avg = rectify_avg | |||||
| inter_channels = max(in_channels * radix // reduction_factor, 32) | |||||
| self.radix = radix | |||||
| self.cardinality = groups | |||||
| self.channels = channels | |||||
| self.dropblock_prob = dropblock_prob | |||||
| if self.rectify: | |||||
| from rfconv import RFConv2d | |||||
| self.conv = RFConv2d( | |||||
| in_channels, | |||||
| channels * radix, | |||||
| kernel_size, | |||||
| stride, | |||||
| padding, | |||||
| dilation, | |||||
| groups=groups * radix, | |||||
| bias=bias, | |||||
| average_mode=rectify_avg, | |||||
| **kwargs) | |||||
| else: | |||||
| self.conv = Conv2d( | |||||
| in_channels, | |||||
| channels * radix, | |||||
| kernel_size, | |||||
| stride, | |||||
| padding, | |||||
| dilation, | |||||
| groups=groups * radix, | |||||
| bias=bias, | |||||
| **kwargs) | |||||
| self.use_bn = norm_layer is not None | |||||
| if self.use_bn: | |||||
| self.bn0 = norm_layer(channels * radix) | |||||
| self.relu = ReLU(inplace=True) | |||||
| self.fc1 = Conv2d(channels, inter_channels, 1, groups=self.cardinality) | |||||
| if self.use_bn: | |||||
| self.bn1 = norm_layer(inter_channels) | |||||
| self.fc2 = Conv2d( | |||||
| inter_channels, channels * radix, 1, groups=self.cardinality) | |||||
| if dropblock_prob > 0.0: | |||||
| self.dropblock = DropBlock2D(dropblock_prob, 3) | |||||
| self.rsoftmax = rSoftMax(radix, groups) | |||||
| def forward(self, x): | |||||
| x = self.conv(x) | |||||
| if self.use_bn: | |||||
| x = self.bn0(x) | |||||
| if self.dropblock_prob > 0.0: | |||||
| x = self.dropblock(x) | |||||
| x = self.relu(x) | |||||
| batch, rchannel = x.shape[:2] | |||||
| if self.radix > 1: | |||||
| splited = torch.split(x, rchannel // self.radix, dim=1) | |||||
| gap = sum(splited) | |||||
| else: | |||||
| gap = x | |||||
| gap = F.adaptive_avg_pool2d(gap, 1) | |||||
| gap = self.fc1(gap) | |||||
| if self.use_bn: | |||||
| gap = self.bn1(gap) | |||||
| gap = self.relu(gap) | |||||
| atten = self.fc2(gap) | |||||
| atten = self.rsoftmax(atten).view(batch, -1, 1, 1) | |||||
| if self.radix > 1: | |||||
| attens = torch.split(atten, rchannel // self.radix, dim=1) | |||||
| out = sum([att * split for (att, split) in zip(attens, splited)]) | |||||
| else: | |||||
| out = atten * x | |||||
| return out.contiguous() | |||||
| class rSoftMax(nn.Module): | |||||
| def __init__(self, radix, cardinality): | |||||
| super().__init__() | |||||
| self.radix = radix | |||||
| self.cardinality = cardinality | |||||
| def forward(self, x): | |||||
| batch = x.size(0) | |||||
| if self.radix > 1: | |||||
| x = x.view(batch, self.cardinality, self.radix, -1).transpose(1, 2) | |||||
| x = F.softmax(x, dim=1) | |||||
| x = x.reshape(batch, -1) | |||||
| else: | |||||
| x = torch.sigmoid(x) | |||||
| return x | |||||
| @@ -1,2 +1,4 @@ | |||||
| from .clip.clip_model import CLIPForMultiModalEmbedding | from .clip.clip_model import CLIPForMultiModalEmbedding | ||||
| from .image_captioning_model import OfaForImageCaptioning | from .image_captioning_model import OfaForImageCaptioning | ||||
| from .mplug_for_visual_question_answering import \ | |||||
| MPlugForVisualQuestionAnswering | |||||
| @@ -0,0 +1,46 @@ | |||||
| from typing import Dict | |||||
| from ...metainfo import Models | |||||
| from ...utils.constant import Tasks | |||||
| from ..base import Model, Tensor | |||||
| from ..builder import MODELS | |||||
| __all__ = ['MPlugForVisualQuestionAnswering'] | |||||
| @MODELS.register_module( | |||||
| Tasks.visual_question_answering, module_name=Models.mplug) | |||||
| class MPlugForVisualQuestionAnswering(Model): | |||||
| def __init__(self, model_dir: str, *args, **kwargs): | |||||
| """initialize the mplug model from the `model_dir` path. | |||||
| Args: | |||||
| model_dir (str): the model path. | |||||
| """ | |||||
| super().__init__(model_dir, *args, **kwargs) | |||||
| from sofa.models.mplug import MPlugForVisualQuestionAnswering | |||||
| self.model = MPlugForVisualQuestionAnswering.from_pretrained(model_dir) | |||||
| self.tokenizer = self.model.tokenizer | |||||
| def train(self): | |||||
| return self.model.train() | |||||
| def eval(self): | |||||
| return self.model.eval() | |||||
| def forward(self, input: Dict[str, Tensor]) -> Dict[str, Tensor]: | |||||
| """return the result by the model | |||||
| Args: | |||||
| input (Dict[str, Tensor]): the preprocessed data | |||||
| Returns: | |||||
| Dict[str, Tensor]: results | |||||
| Example: | |||||
| { | |||||
| 'predictions': Tensor([[1377, 4959, 2785, 6392...])]), | |||||
| } | |||||
| """ | |||||
| return self.model(**input)[0] | |||||
| @@ -5,6 +5,7 @@ from .sbert_for_nli import * # noqa F403 | |||||
| from .sbert_for_sentence_similarity import * # noqa F403 | from .sbert_for_sentence_similarity import * # noqa F403 | ||||
| from .sbert_for_sentiment_classification import * # noqa F403 | from .sbert_for_sentiment_classification import * # noqa F403 | ||||
| from .sbert_for_token_classification import * # noqa F403 | from .sbert_for_token_classification import * # noqa F403 | ||||
| from .sbert_for_zero_shot_classification import * # noqa F403 | |||||
| from .space.dialog_intent_prediction_model import * # noqa F403 | from .space.dialog_intent_prediction_model import * # noqa F403 | ||||
| from .space.dialog_modeling_model import * # noqa F403 | from .space.dialog_modeling_model import * # noqa F403 | ||||
| from .space.dialog_state_tracking_model import * # noqa F403 | from .space.dialog_state_tracking_model import * # noqa F403 | ||||
| @@ -7,7 +7,7 @@ from ...utils.constant import Tasks | |||||
| from ..base import Model, Tensor | from ..base import Model, Tensor | ||||
| from ..builder import MODELS | from ..builder import MODELS | ||||
| __all__ = ['StructBertForMaskedLM', 'VecoForMaskedLM'] | |||||
| __all__ = ['BertForMaskedLM', 'StructBertForMaskedLM', 'VecoForMaskedLM'] | |||||
| class MaskedLanguageModelBase(Model): | class MaskedLanguageModelBase(Model): | ||||
| @@ -61,3 +61,11 @@ class VecoForMaskedLM(MaskedLanguageModelBase): | |||||
| def build_model(self): | def build_model(self): | ||||
| from sofa import VecoForMaskedLM | from sofa import VecoForMaskedLM | ||||
| return VecoForMaskedLM.from_pretrained(self.model_dir) | return VecoForMaskedLM.from_pretrained(self.model_dir) | ||||
| @MODELS.register_module(Tasks.fill_mask, module_name=Models.bert) | |||||
| class BertForMaskedLM(MaskedLanguageModelBase): | |||||
| def build_model(self): | |||||
| from transformers import BertForMaskedLM | |||||
| return BertForMaskedLM.from_pretrained(self.model_dir) | |||||
| @@ -0,0 +1,50 @@ | |||||
| from typing import Any, Dict | |||||
| import numpy as np | |||||
| from modelscope.utils.constant import Tasks | |||||
| from ...metainfo import Models | |||||
| from ..base import Model | |||||
| from ..builder import MODELS | |||||
| __all__ = ['SbertForZeroShotClassification'] | |||||
| @MODELS.register_module( | |||||
| Tasks.zero_shot_classification, module_name=Models.structbert) | |||||
| class SbertForZeroShotClassification(Model): | |||||
| def __init__(self, model_dir: str, *args, **kwargs): | |||||
| """initialize the zero shot classification model from the `model_dir` path. | |||||
| Args: | |||||
| model_dir (str): the model path. | |||||
| """ | |||||
| super().__init__(model_dir, *args, **kwargs) | |||||
| from sofa import SbertForSequenceClassification | |||||
| self.model = SbertForSequenceClassification.from_pretrained(model_dir) | |||||
| def train(self): | |||||
| return self.model.train() | |||||
| def eval(self): | |||||
| return self.model.eval() | |||||
| def forward(self, input: Dict[str, Any]) -> Dict[str, np.ndarray]: | |||||
| """return the result by the model | |||||
| Args: | |||||
| input (Dict[str, Any]): the preprocessed data | |||||
| Returns: | |||||
| Dict[str, np.ndarray]: results | |||||
| Example: | |||||
| { | |||||
| 'logits': array([[-0.53860897, 1.5029076 ]], dtype=float32) # true value | |||||
| } | |||||
| """ | |||||
| outputs = self.model(**input) | |||||
| logits = outputs['logits'].numpy() | |||||
| res = {'logits': logits} | |||||
| return res | |||||
| @@ -6,11 +6,11 @@ from ....utils.nlp.space.utils_dst import batch_to_device | |||||
| from ...base import Model, Tensor | from ...base import Model, Tensor | ||||
| from ...builder import MODELS | from ...builder import MODELS | ||||
| __all__ = ['DialogStateTrackingModel'] | |||||
| __all__ = ['SpaceForDialogStateTrackingModel'] | |||||
| @MODELS.register_module(Tasks.dialog_state_tracking, module_name=r'space') | @MODELS.register_module(Tasks.dialog_state_tracking, module_name=r'space') | ||||
| class DialogStateTrackingModel(Model): | |||||
| class SpaceForDialogStateTrackingModel(Model): | |||||
| def __init__(self, model_dir: str, *args, **kwargs): | def __init__(self, model_dir: str, *args, **kwargs): | ||||
| """initialize the test generation model from the `model_dir` path. | """initialize the test generation model from the `model_dir` path. | ||||
| @@ -19,4 +19,4 @@ DOWNLOADED_DATASETS_PATH = Path( | |||||
| os.getenv('DOWNLOADED_DATASETS_PATH', DEFAULT_DOWNLOADED_DATASETS_PATH)) | os.getenv('DOWNLOADED_DATASETS_PATH', DEFAULT_DOWNLOADED_DATASETS_PATH)) | ||||
| MS_HUB_ENDPOINT = os.environ.get('MS_HUB_ENDPOINT', | MS_HUB_ENDPOINT = os.environ.get('MS_HUB_ENDPOINT', | ||||
| 'http://101.201.119.157:31752') | |||||
| 'http://123.57.189.90:31752') | |||||
| @@ -3,7 +3,7 @@ from typing import (Any, Callable, Dict, Iterable, List, Mapping, Optional, | |||||
| Sequence, Union) | Sequence, Union) | ||||
| import numpy as np | import numpy as np | ||||
| from datasets import Dataset | |||||
| from datasets import Dataset, DatasetDict | |||||
| from datasets import load_dataset as hf_load_dataset | from datasets import load_dataset as hf_load_dataset | ||||
| from datasets.config import TF_AVAILABLE, TORCH_AVAILABLE | from datasets.config import TF_AVAILABLE, TORCH_AVAILABLE | ||||
| from datasets.packaged_modules import _PACKAGED_DATASETS_MODULES | from datasets.packaged_modules import _PACKAGED_DATASETS_MODULES | ||||
| @@ -12,7 +12,7 @@ from datasets.utils.file_utils import (is_relative_path, | |||||
| from modelscope.msdatasets.config import MS_DATASETS_CACHE | from modelscope.msdatasets.config import MS_DATASETS_CACHE | ||||
| from modelscope.msdatasets.utils.ms_api import MsApi | from modelscope.msdatasets.utils.ms_api import MsApi | ||||
| from modelscope.utils.constant import Hubs | |||||
| from modelscope.utils.constant import DownloadMode, Hubs | |||||
| from modelscope.utils.logger import get_logger | from modelscope.utils.logger import get_logger | ||||
| logger = get_logger() | logger = get_logger() | ||||
| @@ -34,6 +34,10 @@ class MsDataset: | |||||
| def __init__(self, hf_ds: Dataset, target: Optional[str] = None): | def __init__(self, hf_ds: Dataset, target: Optional[str] = None): | ||||
| self._hf_ds = hf_ds | self._hf_ds = hf_ds | ||||
| if target is not None and target not in self._hf_ds.features: | |||||
| raise TypeError( | |||||
| f'"target" must be a column of the dataset({list(self._hf_ds.features.keys())}, but got {target}' | |||||
| ) | |||||
| self.target = target | self.target = target | ||||
| def __iter__(self): | def __iter__(self): | ||||
| @@ -48,17 +52,23 @@ class MsDataset: | |||||
| @classmethod | @classmethod | ||||
| def from_hf_dataset(cls, | def from_hf_dataset(cls, | ||||
| hf_ds: Dataset, | |||||
| hf_ds: Union[Dataset, DatasetDict], | |||||
| target: str = None) -> Union[dict, 'MsDataset']: | target: str = None) -> Union[dict, 'MsDataset']: | ||||
| if isinstance(hf_ds, Dataset): | if isinstance(hf_ds, Dataset): | ||||
| return cls(hf_ds, target) | return cls(hf_ds, target) | ||||
| if len(hf_ds.keys()) == 1: | |||||
| return cls(next(iter(hf_ds.values())), target) | |||||
| return {k: cls(v, target) for k, v in hf_ds.items()} | |||||
| elif isinstance(hf_ds, DatasetDict): | |||||
| if len(hf_ds.keys()) == 1: | |||||
| return cls(next(iter(hf_ds.values())), target) | |||||
| return {k: cls(v, target) for k, v in hf_ds.items()} | |||||
| else: | |||||
| raise TypeError( | |||||
| f'"hf_ds" must be a Dataset or DatasetDict, but got {type(hf_ds)}' | |||||
| ) | |||||
| @staticmethod | @staticmethod | ||||
| def load( | def load( | ||||
| dataset_name: Union[str, list], | dataset_name: Union[str, list], | ||||
| namespace: Optional[str] = None, | |||||
| target: Optional[str] = None, | target: Optional[str] = None, | ||||
| version: Optional[str] = None, | version: Optional[str] = None, | ||||
| hub: Optional[Hubs] = Hubs.modelscope, | hub: Optional[Hubs] = Hubs.modelscope, | ||||
| @@ -67,23 +77,32 @@ class MsDataset: | |||||
| data_dir: Optional[str] = None, | data_dir: Optional[str] = None, | ||||
| data_files: Optional[Union[str, Sequence[str], | data_files: Optional[Union[str, Sequence[str], | ||||
| Mapping[str, Union[str, | Mapping[str, Union[str, | ||||
| Sequence[str]]]]] = None | |||||
| Sequence[str]]]]] = None, | |||||
| download_mode: Optional[DownloadMode] = DownloadMode. | |||||
| REUSE_DATASET_IF_EXISTS | |||||
| ) -> Union[dict, 'MsDataset']: | ) -> Union[dict, 'MsDataset']: | ||||
| """Load a MsDataset from the ModelScope Hub, Hugging Face Hub, urls, or a local dataset. | """Load a MsDataset from the ModelScope Hub, Hugging Face Hub, urls, or a local dataset. | ||||
| Args: | Args: | ||||
| dataset_name (str): Path or name of the dataset. | dataset_name (str): Path or name of the dataset. | ||||
| namespace(str, optional): Namespace of the dataset. It should not be None, if you load a remote dataset | |||||
| from Hubs.modelscope, | |||||
| target (str, optional): Name of the column to output. | target (str, optional): Name of the column to output. | ||||
| version (str, optional): Version of the dataset script to load: | version (str, optional): Version of the dataset script to load: | ||||
| subset_name (str, optional): Defining the subset_name of the dataset. | subset_name (str, optional): Defining the subset_name of the dataset. | ||||
| data_dir (str, optional): Defining the data_dir of the dataset configuration. I | data_dir (str, optional): Defining the data_dir of the dataset configuration. I | ||||
| data_files (str or Sequence or Mapping, optional): Path(s) to source data file(s). | data_files (str or Sequence or Mapping, optional): Path(s) to source data file(s). | ||||
| split (str, optional): Which split of the data to load. | split (str, optional): Which split of the data to load. | ||||
| hub (Hubs, optional): When loading from a remote hub, where it is from | |||||
| hub (Hubs or str, optional): When loading from a remote hub, where it is from. default Hubs.modelscope | |||||
| download_mode (DownloadMode or str, optional): How to treat existing datasets. default | |||||
| DownloadMode.REUSE_DATASET_IF_EXISTS | |||||
| Returns: | Returns: | ||||
| MsDataset (obj:`MsDataset`): MsDataset object for a certain dataset. | MsDataset (obj:`MsDataset`): MsDataset object for a certain dataset. | ||||
| """ | """ | ||||
| download_mode = DownloadMode(download_mode | |||||
| or DownloadMode.REUSE_DATASET_IF_EXISTS) | |||||
| hub = Hubs(hub or Hubs.modelscope) | |||||
| if hub == Hubs.huggingface: | if hub == Hubs.huggingface: | ||||
| dataset = hf_load_dataset( | dataset = hf_load_dataset( | ||||
| dataset_name, | dataset_name, | ||||
| @@ -91,21 +110,25 @@ class MsDataset: | |||||
| revision=version, | revision=version, | ||||
| split=split, | split=split, | ||||
| data_dir=data_dir, | data_dir=data_dir, | ||||
| data_files=data_files) | |||||
| data_files=data_files, | |||||
| download_mode=download_mode.value) | |||||
| return MsDataset.from_hf_dataset(dataset, target=target) | return MsDataset.from_hf_dataset(dataset, target=target) | ||||
| else: | |||||
| elif hub == Hubs.modelscope: | |||||
| return MsDataset._load_ms_dataset( | return MsDataset._load_ms_dataset( | ||||
| dataset_name, | dataset_name, | ||||
| namespace=namespace, | |||||
| target=target, | target=target, | ||||
| subset_name=subset_name, | subset_name=subset_name, | ||||
| version=version, | version=version, | ||||
| split=split, | split=split, | ||||
| data_dir=data_dir, | data_dir=data_dir, | ||||
| data_files=data_files) | |||||
| data_files=data_files, | |||||
| download_mode=download_mode) | |||||
| @staticmethod | @staticmethod | ||||
| def _load_ms_dataset( | def _load_ms_dataset( | ||||
| dataset_name: Union[str, list], | dataset_name: Union[str, list], | ||||
| namespace: Optional[str] = None, | |||||
| target: Optional[str] = None, | target: Optional[str] = None, | ||||
| version: Optional[str] = None, | version: Optional[str] = None, | ||||
| subset_name: Optional[str] = None, | subset_name: Optional[str] = None, | ||||
| @@ -113,17 +136,19 @@ class MsDataset: | |||||
| data_dir: Optional[str] = None, | data_dir: Optional[str] = None, | ||||
| data_files: Optional[Union[str, Sequence[str], | data_files: Optional[Union[str, Sequence[str], | ||||
| Mapping[str, Union[str, | Mapping[str, Union[str, | ||||
| Sequence[str]]]]] = None | |||||
| Sequence[str]]]]] = None, | |||||
| download_mode: Optional[DownloadMode] = None | |||||
| ) -> Union[dict, 'MsDataset']: | ) -> Union[dict, 'MsDataset']: | ||||
| if isinstance(dataset_name, str): | if isinstance(dataset_name, str): | ||||
| use_hf = False | use_hf = False | ||||
| if dataset_name in _PACKAGED_DATASETS_MODULES or os.path.isdir(dataset_name) or \ | if dataset_name in _PACKAGED_DATASETS_MODULES or os.path.isdir(dataset_name) or \ | ||||
| (os.path.isfile(dataset_name) and dataset_name.endswith('.py')): | (os.path.isfile(dataset_name) and dataset_name.endswith('.py')): | ||||
| use_hf = True | use_hf = True | ||||
| elif is_relative_path(dataset_name): | |||||
| elif is_relative_path(dataset_name) and dataset_name.count( | |||||
| '/') == 0: | |||||
| ms_api = MsApi() | ms_api = MsApi() | ||||
| dataset_scripts = ms_api.fetch_dataset_scripts( | dataset_scripts = ms_api.fetch_dataset_scripts( | ||||
| dataset_name, version) | |||||
| dataset_name, namespace, download_mode, version) | |||||
| if 'py' in dataset_scripts: # dataset copied from hf datasets | if 'py' in dataset_scripts: # dataset copied from hf datasets | ||||
| dataset_name = dataset_scripts['py'][0] | dataset_name = dataset_scripts['py'][0] | ||||
| use_hf = True | use_hf = True | ||||
| @@ -140,7 +165,8 @@ class MsDataset: | |||||
| split=split, | split=split, | ||||
| data_dir=data_dir, | data_dir=data_dir, | ||||
| data_files=data_files, | data_files=data_files, | ||||
| cache_dir=MS_DATASETS_CACHE) | |||||
| cache_dir=MS_DATASETS_CACHE, | |||||
| download_mode=download_mode.value) | |||||
| else: | else: | ||||
| # TODO load from ms datahub | # TODO load from ms datahub | ||||
| raise NotImplementedError( | raise NotImplementedError( | ||||
| @@ -1,11 +1,14 @@ | |||||
| import os | import os | ||||
| import shutil | |||||
| from collections import defaultdict | from collections import defaultdict | ||||
| from typing import Optional | from typing import Optional | ||||
| import requests | import requests | ||||
| from modelscope.hub.errors import NotExistError, datahub_raise_on_error | |||||
| from modelscope.msdatasets.config import (DOWNLOADED_DATASETS_PATH, | from modelscope.msdatasets.config import (DOWNLOADED_DATASETS_PATH, | ||||
| MS_HUB_ENDPOINT) | MS_HUB_ENDPOINT) | ||||
| from modelscope.utils.constant import DownloadMode | |||||
| from modelscope.utils.logger import get_logger | from modelscope.utils.logger import get_logger | ||||
| logger = get_logger() | logger = get_logger() | ||||
| @@ -27,23 +30,38 @@ class MsApi: | |||||
| def fetch_dataset_scripts(self, | def fetch_dataset_scripts(self, | ||||
| dataset_name: str, | dataset_name: str, | ||||
| version: Optional[str] = 'master', | |||||
| force_download=False): | |||||
| datahub_url = f'{self.endpoint}/api/v1/datasets?Query={dataset_name}' | |||||
| r = requests.get(datahub_url) | |||||
| r.raise_for_status() | |||||
| dataset_list = r.json()['Data'] | |||||
| if len(dataset_list) == 0: | |||||
| return None | |||||
| dataset_id = dataset_list[0]['Id'] | |||||
| namespace: str, | |||||
| download_mode: Optional[DownloadMode], | |||||
| version: Optional[str] = 'master'): | |||||
| if namespace is None: | |||||
| raise ValueError( | |||||
| f'Dataset from Hubs.modelscope should have a valid "namespace", but get {namespace}' | |||||
| ) | |||||
| version = version or 'master' | version = version or 'master' | ||||
| datahub_url = f'{self.endpoint}/api/v1/datasets/{dataset_id}/repo/tree?Revision={version}' | |||||
| r = requests.get(datahub_url) | |||||
| r.raise_for_status() | |||||
| file_list = r.json()['Data']['Files'] | |||||
| cache_dir = os.path.join(DOWNLOADED_DATASETS_PATH, dataset_name, | cache_dir = os.path.join(DOWNLOADED_DATASETS_PATH, dataset_name, | ||||
| version) | |||||
| namespace, version) | |||||
| download_mode = DownloadMode(download_mode | |||||
| or DownloadMode.REUSE_DATASET_IF_EXISTS) | |||||
| if download_mode == DownloadMode.FORCE_REDOWNLOAD and os.path.exists( | |||||
| cache_dir): | |||||
| shutil.rmtree(cache_dir) | |||||
| os.makedirs(cache_dir, exist_ok=True) | os.makedirs(cache_dir, exist_ok=True) | ||||
| datahub_url = f'{self.endpoint}/api/v1/datasets/{namespace}/{dataset_name}' | |||||
| r = requests.get(datahub_url) | |||||
| resp = r.json() | |||||
| datahub_raise_on_error(datahub_url, resp) | |||||
| dataset_id = resp['Data']['Id'] | |||||
| datahub_url = f'{self.endpoint}/api/v1/datasets/{dataset_id}/repo/tree?Revision={version}' | |||||
| r = requests.get(datahub_url) | |||||
| resp = r.json() | |||||
| datahub_raise_on_error(datahub_url, resp) | |||||
| file_list = resp['Data'] | |||||
| if file_list is None: | |||||
| raise NotExistError( | |||||
| f'The modelscope dataset [dataset_name = {dataset_name}, namespace = {namespace}, ' | |||||
| f'version = {version}] dose not exist') | |||||
| file_list = file_list['Files'] | |||||
| local_paths = defaultdict(list) | local_paths = defaultdict(list) | ||||
| for file_info in file_list: | for file_info in file_list: | ||||
| file_path = file_info['Path'] | file_path = file_info['Path'] | ||||
| @@ -54,7 +72,7 @@ class MsApi: | |||||
| r.raise_for_status() | r.raise_for_status() | ||||
| content = r.json()['Data']['Content'] | content = r.json()['Data']['Content'] | ||||
| local_path = os.path.join(cache_dir, file_path) | local_path = os.path.join(cache_dir, file_path) | ||||
| if os.path.exists(local_path) and not force_download: | |||||
| if os.path.exists(local_path): | |||||
| logger.warning( | logger.warning( | ||||
| f"Reusing dataset {dataset_name}'s python file ({local_path})" | f"Reusing dataset {dataset_name}'s python file ({local_path})" | ||||
| ) | ) | ||||
| @@ -1,4 +1,7 @@ | |||||
| # from .audio import LinearAECPipeline | |||||
| from .audio import LinearAECPipeline | |||||
| from .audio.ans_pipeline import ANSPipeline | |||||
| from .base import Pipeline | from .base import Pipeline | ||||
| from .builder import pipeline | from .builder import pipeline | ||||
| from .cv import * # noqa F403 | |||||
| from .multi_modal import * # noqa F403 | |||||
| from .nlp import * # noqa F403 | from .nlp import * # noqa F403 | ||||
| @@ -0,0 +1,117 @@ | |||||
| import os.path | |||||
| from typing import Any, Dict | |||||
| import librosa | |||||
| import numpy as np | |||||
| import soundfile as sf | |||||
| import torch | |||||
| from modelscope.metainfo import Pipelines | |||||
| from modelscope.utils.constant import Tasks | |||||
| from ..base import Input, Pipeline | |||||
| from ..builder import PIPELINES | |||||
| def audio_norm(x): | |||||
| rms = (x**2).mean()**0.5 | |||||
| scalar = 10**(-25 / 20) / rms | |||||
| x = x * scalar | |||||
| pow_x = x**2 | |||||
| avg_pow_x = pow_x.mean() | |||||
| rmsx = pow_x[pow_x > avg_pow_x].mean()**0.5 | |||||
| scalarx = 10**(-25 / 20) / rmsx | |||||
| x = x * scalarx | |||||
| return x | |||||
| @PIPELINES.register_module( | |||||
| Tasks.speech_signal_process, | |||||
| module_name=Pipelines.speech_frcrn_ans_cirm_16k) | |||||
| class ANSPipeline(Pipeline): | |||||
| r"""ANS (Acoustic Noise Suppression) Inference Pipeline . | |||||
| When invoke the class with pipeline.__call__(), it accept only one parameter: | |||||
| inputs(str): the path of wav file | |||||
| """ | |||||
| SAMPLE_RATE = 16000 | |||||
| def __init__(self, model): | |||||
| r""" | |||||
| Args: | |||||
| model: model id on modelscope hub. | |||||
| """ | |||||
| super().__init__(model=model) | |||||
| self.device = torch.device( | |||||
| 'cuda' if torch.cuda.is_available() else 'cpu') | |||||
| self.model = self.model.to(self.device) | |||||
| self.model.eval() | |||||
| def preprocess(self, inputs: Input) -> Dict[str, Any]: | |||||
| assert isinstance(inputs, str) and os.path.exists(inputs) and os.path.isfile(inputs), \ | |||||
| f'Input file do not exists: {inputs}' | |||||
| data1, fs = sf.read(inputs) | |||||
| data1 = audio_norm(data1) | |||||
| if fs != self.SAMPLE_RATE: | |||||
| data1 = librosa.resample(data1, fs, self.SAMPLE_RATE) | |||||
| if len(data1.shape) > 1: | |||||
| data1 = data1[:, 0] | |||||
| data = data1.astype(np.float32) | |||||
| inputs = np.reshape(data, [1, data.shape[0]]) | |||||
| return {'ndarray': inputs, 'nsamples': data.shape[0]} | |||||
| def forward(self, inputs: Dict[str, Any]) -> Dict[str, Any]: | |||||
| ndarray = inputs['ndarray'] | |||||
| nsamples = inputs['nsamples'] | |||||
| decode_do_segement = False | |||||
| window = 16000 | |||||
| stride = int(window * 0.75) | |||||
| print('inputs:{}'.format(ndarray.shape)) | |||||
| b, t = ndarray.shape # size() | |||||
| if t > window * 120: | |||||
| decode_do_segement = True | |||||
| if t < window: | |||||
| ndarray = np.concatenate( | |||||
| [ndarray, np.zeros((ndarray.shape[0], window - t))], 1) | |||||
| elif t < window + stride: | |||||
| padding = window + stride - t | |||||
| print('padding: {}'.format(padding)) | |||||
| ndarray = np.concatenate( | |||||
| [ndarray, np.zeros((ndarray.shape[0], padding))], 1) | |||||
| else: | |||||
| if (t - window) % stride != 0: | |||||
| padding = t - (t - window) // stride * stride | |||||
| print('padding: {}'.format(padding)) | |||||
| ndarray = np.concatenate( | |||||
| [ndarray, np.zeros((ndarray.shape[0], padding))], 1) | |||||
| print('inputs after padding:{}'.format(ndarray.shape)) | |||||
| with torch.no_grad(): | |||||
| ndarray = torch.from_numpy(np.float32(ndarray)).to(self.device) | |||||
| b, t = ndarray.shape | |||||
| if decode_do_segement: | |||||
| outputs = np.zeros(t) | |||||
| give_up_length = (window - stride) // 2 | |||||
| current_idx = 0 | |||||
| while current_idx + window <= t: | |||||
| print('current_idx: {}'.format(current_idx)) | |||||
| tmp_input = ndarray[:, current_idx:current_idx + window] | |||||
| tmp_output = self.model( | |||||
| tmp_input, )['wav_l2'][0].cpu().numpy() | |||||
| end_index = current_idx + window - give_up_length | |||||
| if current_idx == 0: | |||||
| outputs[current_idx: | |||||
| end_index] = tmp_output[:-give_up_length] | |||||
| else: | |||||
| outputs[current_idx | |||||
| + give_up_length:end_index] = tmp_output[ | |||||
| give_up_length:-give_up_length] | |||||
| current_idx += stride | |||||
| else: | |||||
| outputs = self.model(ndarray)['wav_l2'][0].cpu().numpy() | |||||
| return {'output_pcm': outputs[:nsamples]} | |||||
| def postprocess(self, inputs: Dict[str, Any], **kwargs) -> Dict[str, Any]: | |||||
| if 'output_path' in kwargs.keys(): | |||||
| sf.write(kwargs['output_path'], inputs['output_pcm'], | |||||
| self.SAMPLE_RATE) | |||||
| return inputs | |||||
| @@ -74,33 +74,57 @@ class Pipeline(ABC): | |||||
| self.preprocessor = preprocessor | self.preprocessor = preprocessor | ||||
| def __call__(self, input: Union[Input, List[Input]], *args, | def __call__(self, input: Union[Input, List[Input]], *args, | ||||
| **post_kwargs) -> Union[Dict[str, Any], Generator]: | |||||
| **kwargs) -> Union[Dict[str, Any], Generator]: | |||||
| # model provider should leave it as it is | # model provider should leave it as it is | ||||
| # modelscope library developer will handle this function | # modelscope library developer will handle this function | ||||
| # simple showcase, need to support iterator type for both tensorflow and pytorch | # simple showcase, need to support iterator type for both tensorflow and pytorch | ||||
| # input_dict = self._handle_input(input) | # input_dict = self._handle_input(input) | ||||
| # sanitize the parameters | |||||
| preprocess_params, forward_params, postprocess_params = self._sanitize_parameters( | |||||
| **kwargs) | |||||
| kwargs['preprocess_params'] = preprocess_params | |||||
| kwargs['forward_params'] = forward_params | |||||
| kwargs['postprocess_params'] = postprocess_params | |||||
| if isinstance(input, list): | if isinstance(input, list): | ||||
| output = [] | output = [] | ||||
| for ele in input: | for ele in input: | ||||
| output.append(self._process_single(ele, *args, **post_kwargs)) | |||||
| output.append(self._process_single(ele, *args, **kwargs)) | |||||
| elif isinstance(input, MsDataset): | elif isinstance(input, MsDataset): | ||||
| return self._process_iterator(input, *args, **post_kwargs) | |||||
| return self._process_iterator(input, *args, **kwargs) | |||||
| else: | else: | ||||
| output = self._process_single(input, *args, **post_kwargs) | |||||
| output = self._process_single(input, *args, **kwargs) | |||||
| return output | return output | ||||
| def _process_iterator(self, input: Input, *args, **post_kwargs): | |||||
| def _sanitize_parameters(self, **pipeline_parameters): | |||||
| """ | |||||
| this method should sanitize the keyword args to preprocessor params, | |||||
| forward params and postprocess params on '__call__' or '_process_single' method | |||||
| considered to be a normal classmethod with default implementation / output | |||||
| Default Returns: | |||||
| Dict[str, str]: preprocess_params = {} | |||||
| Dict[str, str]: forward_params = {} | |||||
| Dict[str, str]: postprocess_params = pipeline_parameters | |||||
| """ | |||||
| return {}, {}, pipeline_parameters | |||||
| def _process_iterator(self, input: Input, *args, **kwargs): | |||||
| for ele in input: | for ele in input: | ||||
| yield self._process_single(ele, *args, **post_kwargs) | |||||
| yield self._process_single(ele, *args, **kwargs) | |||||
| def _process_single(self, input: Input, *args, **kwargs) -> Dict[str, Any]: | |||||
| preprocess_params = kwargs.get('preprocess_params') | |||||
| forward_params = kwargs.get('forward_params') | |||||
| postprocess_params = kwargs.get('postprocess_params') | |||||
| def _process_single(self, input: Input, *args, | |||||
| **post_kwargs) -> Dict[str, Any]: | |||||
| out = self.preprocess(input) | |||||
| out = self.forward(out) | |||||
| out = self.postprocess(out, **post_kwargs) | |||||
| out = self.preprocess(input, **preprocess_params) | |||||
| out = self.forward(out, **forward_params) | |||||
| out = self.postprocess(out, **postprocess_params) | |||||
| self._check_output(out) | self._check_output(out) | ||||
| return out | return out | ||||
| @@ -120,20 +144,21 @@ class Pipeline(ABC): | |||||
| raise ValueError(f'expected output keys are {output_keys}, ' | raise ValueError(f'expected output keys are {output_keys}, ' | ||||
| f'those {missing_keys} are missing') | f'those {missing_keys} are missing') | ||||
| def preprocess(self, inputs: Input) -> Dict[str, Any]: | |||||
| def preprocess(self, inputs: Input, **preprocess_params) -> Dict[str, Any]: | |||||
| """ Provide default implementation based on preprocess_cfg and user can reimplement it | """ Provide default implementation based on preprocess_cfg and user can reimplement it | ||||
| """ | """ | ||||
| assert self.preprocessor is not None, 'preprocess method should be implemented' | assert self.preprocessor is not None, 'preprocess method should be implemented' | ||||
| assert not isinstance(self.preprocessor, List),\ | assert not isinstance(self.preprocessor, List),\ | ||||
| 'default implementation does not support using multiple preprocessors.' | 'default implementation does not support using multiple preprocessors.' | ||||
| return self.preprocessor(inputs) | |||||
| return self.preprocessor(inputs, **preprocess_params) | |||||
| def forward(self, inputs: Dict[str, Any]) -> Dict[str, Any]: | |||||
| def forward(self, inputs: Dict[str, Any], | |||||
| **forward_params) -> Dict[str, Any]: | |||||
| """ Provide default implementation using self.model and user can reimplement it | """ Provide default implementation using self.model and user can reimplement it | ||||
| """ | """ | ||||
| assert self.model is not None, 'forward method should be implemented' | assert self.model is not None, 'forward method should be implemented' | ||||
| assert not self.has_multiple_models, 'default implementation does not support multiple models in a pipeline.' | assert not self.has_multiple_models, 'default implementation does not support multiple models in a pipeline.' | ||||
| return self.model(inputs) | |||||
| return self.model(inputs, **forward_params) | |||||
| @abstractmethod | @abstractmethod | ||||
| def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, Any]: | def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, Any]: | ||||
| @@ -33,6 +33,9 @@ DEFAULT_MODEL_FOR_PIPELINE = { | |||||
| 'damo/bert-base-sst2'), | 'damo/bert-base-sst2'), | ||||
| Tasks.text_generation: (Pipelines.text_generation, | Tasks.text_generation: (Pipelines.text_generation, | ||||
| 'damo/nlp_palm2.0_text-generation_chinese-base'), | 'damo/nlp_palm2.0_text-generation_chinese-base'), | ||||
| Tasks.zero_shot_classification: | |||||
| (Pipelines.zero_shot_classification, | |||||
| 'damo/nlp_structbert_zero-shot-classification_chinese-base'), | |||||
| Tasks.image_captioning: (Pipelines.image_caption, | Tasks.image_captioning: (Pipelines.image_caption, | ||||
| 'damo/ofa_image-caption_coco_large_en'), | 'damo/ofa_image-caption_coco_large_en'), | ||||
| Tasks.image_generation: | Tasks.image_generation: | ||||
| @@ -45,7 +48,10 @@ DEFAULT_MODEL_FOR_PIPELINE = { | |||||
| 'damo/cv_TAdaConv_action-recognition'), | 'damo/cv_TAdaConv_action-recognition'), | ||||
| Tasks.multi_modal_embedding: | Tasks.multi_modal_embedding: | ||||
| (Pipelines.multi_modal_embedding, | (Pipelines.multi_modal_embedding, | ||||
| 'damo/multi-modal_clip-vit-large-patch14-chinese_multi-modal-embedding') | |||||
| 'damo/multi-modal_clip-vit-large-patch14-chinese_multi-modal-embedding'), | |||||
| Tasks.visual_question_answering: | |||||
| (Pipelines.visual_question_answering, | |||||
| 'damo/mplug_visual-question-answering_coco_large_en'), | |||||
| } | } | ||||
| @@ -1,4 +1,5 @@ | |||||
| from .action_recognition_pipeline import ActionRecognitionPipeline | from .action_recognition_pipeline import ActionRecognitionPipeline | ||||
| from .animal_recog_pipeline import AnimalRecogPipeline | |||||
| from .image_cartoon_pipeline import ImageCartoonPipeline | from .image_cartoon_pipeline import ImageCartoonPipeline | ||||
| from .image_matting_pipeline import ImageMattingPipeline | from .image_matting_pipeline import ImageMattingPipeline | ||||
| from .ocr_detection_pipeline import OCRDetectionPipeline | from .ocr_detection_pipeline import OCRDetectionPipeline | ||||
| @@ -0,0 +1,127 @@ | |||||
| import os.path as osp | |||||
| import tempfile | |||||
| from typing import Any, Dict | |||||
| import cv2 | |||||
| import numpy as np | |||||
| import torch | |||||
| from PIL import Image | |||||
| from torchvision import transforms | |||||
| from modelscope.fileio import File | |||||
| from modelscope.hub.snapshot_download import snapshot_download | |||||
| from modelscope.metainfo import Pipelines | |||||
| from modelscope.models.cv.animal_recognition import resnet | |||||
| from modelscope.pipelines.base import Input | |||||
| from modelscope.preprocessors import load_image | |||||
| from modelscope.utils.constant import ModelFile, Tasks | |||||
| from modelscope.utils.logger import get_logger | |||||
| from ..base import Pipeline | |||||
| from ..builder import PIPELINES | |||||
| logger = get_logger() | |||||
| @PIPELINES.register_module( | |||||
| Tasks.image_classification, module_name=Pipelines.animal_recognation) | |||||
| class AnimalRecogPipeline(Pipeline): | |||||
| def __init__(self, model: str): | |||||
| super().__init__(model=model) | |||||
| import torch | |||||
| def resnest101(**kwargs): | |||||
| model = resnet.ResNet( | |||||
| resnet.Bottleneck, [3, 4, 23, 3], | |||||
| radix=2, | |||||
| groups=1, | |||||
| bottleneck_width=64, | |||||
| deep_stem=True, | |||||
| stem_width=64, | |||||
| avg_down=True, | |||||
| avd=True, | |||||
| avd_first=False, | |||||
| **kwargs) | |||||
| return model | |||||
| def filter_param(src_params, own_state): | |||||
| copied_keys = [] | |||||
| for name, param in src_params.items(): | |||||
| if 'module.' == name[0:7]: | |||||
| name = name[7:] | |||||
| if '.module.' not in list(own_state.keys())[0]: | |||||
| name = name.replace('.module.', '.') | |||||
| if (name in own_state) and (own_state[name].shape | |||||
| == param.shape): | |||||
| own_state[name].copy_(param) | |||||
| copied_keys.append(name) | |||||
| def load_pretrained(model, src_params): | |||||
| if 'state_dict' in src_params: | |||||
| src_params = src_params['state_dict'] | |||||
| own_state = model.state_dict() | |||||
| filter_param(src_params, own_state) | |||||
| model.load_state_dict(own_state) | |||||
| self.model = resnest101(num_classes=8288) | |||||
| local_model_dir = model | |||||
| if osp.exists(model): | |||||
| local_model_dir = model | |||||
| else: | |||||
| local_model_dir = snapshot_download(model) | |||||
| self.local_path = local_model_dir | |||||
| src_params = torch.load( | |||||
| osp.join(local_model_dir, 'pytorch_model.pt'), 'cpu') | |||||
| load_pretrained(self.model, src_params) | |||||
| logger.info('load model done') | |||||
| def preprocess(self, input: Input) -> Dict[str, Any]: | |||||
| if isinstance(input, str): | |||||
| img = load_image(input) | |||||
| elif isinstance(input, PIL.Image.Image): | |||||
| img = input.convert('RGB') | |||||
| elif isinstance(input, np.ndarray): | |||||
| if len(input.shape) == 2: | |||||
| img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR) | |||||
| img = input[:, :, ::-1] | |||||
| img = Image.fromarray(img.astype('uint8')).convert('RGB') | |||||
| else: | |||||
| raise TypeError(f'input should be either str, PIL.Image,' | |||||
| f' np.array, but got {type(input)}') | |||||
| normalize = transforms.Normalize( | |||||
| mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) | |||||
| test_transforms = transforms.Compose([ | |||||
| transforms.Resize(256), | |||||
| transforms.CenterCrop(224), | |||||
| transforms.ToTensor(), normalize | |||||
| ]) | |||||
| img = test_transforms(img) | |||||
| result = {'img': img} | |||||
| return result | |||||
| def forward(self, input: Dict[str, Any]) -> Dict[str, Any]: | |||||
| def set_phase(model, is_train): | |||||
| if is_train: | |||||
| model.train() | |||||
| else: | |||||
| model.eval() | |||||
| is_train = False | |||||
| set_phase(self.model, is_train) | |||||
| img = input['img'] | |||||
| input_img = torch.unsqueeze(img, 0) | |||||
| outputs = self.model(input_img) | |||||
| return {'outputs': outputs} | |||||
| def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, Any]: | |||||
| label_mapping_path = osp.join(self.local_path, 'label_mapping.txt') | |||||
| with open(label_mapping_path, 'r') as f: | |||||
| label_mapping = f.readlines() | |||||
| score = torch.max(inputs['outputs']) | |||||
| inputs = { | |||||
| 'scores': score.item(), | |||||
| 'labels': label_mapping[inputs['outputs'].argmax()].split('\t')[1] | |||||
| } | |||||
| return inputs | |||||
| @@ -8,7 +8,6 @@ import cv2 | |||||
| import numpy as np | import numpy as np | ||||
| import PIL | import PIL | ||||
| import tensorflow as tf | import tensorflow as tf | ||||
| import tf_slim as slim | |||||
| from modelscope.metainfo import Pipelines | from modelscope.metainfo import Pipelines | ||||
| from modelscope.pipelines.base import Input | from modelscope.pipelines.base import Input | ||||
| @@ -19,6 +18,11 @@ from ..base import Pipeline | |||||
| from ..builder import PIPELINES | from ..builder import PIPELINES | ||||
| from .ocr_utils import model_resnet_mutex_v4_linewithchar, ops, utils | from .ocr_utils import model_resnet_mutex_v4_linewithchar, ops, utils | ||||
| if tf.__version__ >= '2.0': | |||||
| import tf_slim as slim | |||||
| else: | |||||
| from tensorflow.contrib import slim | |||||
| if tf.__version__ >= '2.0': | if tf.__version__ >= '2.0': | ||||
| tf = tf.compat.v1 | tf = tf.compat.v1 | ||||
| tf.compat.v1.disable_eager_execution() | tf.compat.v1.disable_eager_execution() | ||||
| @@ -44,6 +48,7 @@ class OCRDetectionPipeline(Pipeline): | |||||
| def __init__(self, model: str): | def __init__(self, model: str): | ||||
| super().__init__(model=model) | super().__init__(model=model) | ||||
| tf.reset_default_graph() | |||||
| model_path = osp.join( | model_path = osp.join( | ||||
| osp.join(self.model, ModelFile.TF_CHECKPOINT_FOLDER), | osp.join(self.model, ModelFile.TF_CHECKPOINT_FOLDER), | ||||
| 'checkpoint-80000') | 'checkpoint-80000') | ||||
| @@ -51,51 +56,56 @@ class OCRDetectionPipeline(Pipeline): | |||||
| config = tf.ConfigProto(allow_soft_placement=True) | config = tf.ConfigProto(allow_soft_placement=True) | ||||
| config.gpu_options.allow_growth = True | config.gpu_options.allow_growth = True | ||||
| self._session = tf.Session(config=config) | self._session = tf.Session(config=config) | ||||
| global_step = tf.get_variable( | |||||
| 'global_step', [], | |||||
| initializer=tf.constant_initializer(0), | |||||
| dtype=tf.int64, | |||||
| trainable=False) | |||||
| variable_averages = tf.train.ExponentialMovingAverage( | |||||
| 0.997, global_step) | |||||
| self.input_images = tf.placeholder( | self.input_images = tf.placeholder( | ||||
| tf.float32, shape=[1, 1024, 1024, 3], name='input_images') | tf.float32, shape=[1, 1024, 1024, 3], name='input_images') | ||||
| self.output = {} | self.output = {} | ||||
| # detector | |||||
| detector = model_resnet_mutex_v4_linewithchar.SegLinkDetector() | |||||
| all_maps = detector.build_model(self.input_images, is_training=False) | |||||
| # decode local predictions | |||||
| all_nodes, all_links, all_reg = [], [], [] | |||||
| for i, maps in enumerate(all_maps): | |||||
| cls_maps, lnk_maps, reg_maps = maps[0], maps[1], maps[2] | |||||
| reg_maps = tf.multiply(reg_maps, OFFSET_VARIANCE) | |||||
| cls_prob = tf.nn.softmax(tf.reshape(cls_maps, [-1, 2])) | |||||
| lnk_prob_pos = tf.nn.softmax(tf.reshape(lnk_maps, [-1, 4])[:, :2]) | |||||
| lnk_prob_mut = tf.nn.softmax(tf.reshape(lnk_maps, [-1, 4])[:, 2:]) | |||||
| lnk_prob = tf.concat([lnk_prob_pos, lnk_prob_mut], axis=1) | |||||
| all_nodes.append(cls_prob) | |||||
| all_links.append(lnk_prob) | |||||
| all_reg.append(reg_maps) | |||||
| # decode segments and links | |||||
| image_size = tf.shape(self.input_images)[1:3] | |||||
| segments, group_indices, segment_counts, _ = ops.decode_segments_links_python( | |||||
| image_size, | |||||
| all_nodes, | |||||
| all_links, | |||||
| all_reg, | |||||
| anchor_sizes=list(detector.anchor_sizes)) | |||||
| # combine segments | |||||
| combined_rboxes, combined_counts = ops.combine_segments_python( | |||||
| segments, group_indices, segment_counts) | |||||
| self.output['combined_rboxes'] = combined_rboxes | |||||
| self.output['combined_counts'] = combined_counts | |||||
| with tf.variable_scope('', reuse=tf.AUTO_REUSE): | |||||
| global_step = tf.get_variable( | |||||
| 'global_step', [], | |||||
| initializer=tf.constant_initializer(0), | |||||
| dtype=tf.int64, | |||||
| trainable=False) | |||||
| variable_averages = tf.train.ExponentialMovingAverage( | |||||
| 0.997, global_step) | |||||
| # detector | |||||
| detector = model_resnet_mutex_v4_linewithchar.SegLinkDetector() | |||||
| all_maps = detector.build_model( | |||||
| self.input_images, is_training=False) | |||||
| # decode local predictions | |||||
| all_nodes, all_links, all_reg = [], [], [] | |||||
| for i, maps in enumerate(all_maps): | |||||
| cls_maps, lnk_maps, reg_maps = maps[0], maps[1], maps[2] | |||||
| reg_maps = tf.multiply(reg_maps, OFFSET_VARIANCE) | |||||
| cls_prob = tf.nn.softmax(tf.reshape(cls_maps, [-1, 2])) | |||||
| lnk_prob_pos = tf.nn.softmax( | |||||
| tf.reshape(lnk_maps, [-1, 4])[:, :2]) | |||||
| lnk_prob_mut = tf.nn.softmax( | |||||
| tf.reshape(lnk_maps, [-1, 4])[:, 2:]) | |||||
| lnk_prob = tf.concat([lnk_prob_pos, lnk_prob_mut], axis=1) | |||||
| all_nodes.append(cls_prob) | |||||
| all_links.append(lnk_prob) | |||||
| all_reg.append(reg_maps) | |||||
| # decode segments and links | |||||
| image_size = tf.shape(self.input_images)[1:3] | |||||
| segments, group_indices, segment_counts, _ = ops.decode_segments_links_python( | |||||
| image_size, | |||||
| all_nodes, | |||||
| all_links, | |||||
| all_reg, | |||||
| anchor_sizes=list(detector.anchor_sizes)) | |||||
| # combine segments | |||||
| combined_rboxes, combined_counts = ops.combine_segments_python( | |||||
| segments, group_indices, segment_counts) | |||||
| self.output['combined_rboxes'] = combined_rboxes | |||||
| self.output['combined_counts'] = combined_counts | |||||
| with self._session.as_default() as sess: | with self._session.as_default() as sess: | ||||
| logger.info(f'loading model from {model_path}') | logger.info(f'loading model from {model_path}') | ||||
| @@ -1,8 +1,12 @@ | |||||
| import tensorflow as tf | import tensorflow as tf | ||||
| import tf_slim as slim | |||||
| from . import ops, resnet18_v1, resnet_utils | from . import ops, resnet18_v1, resnet_utils | ||||
| if tf.__version__ >= '2.0': | |||||
| import tf_slim as slim | |||||
| else: | |||||
| from tensorflow.contrib import slim | |||||
| if tf.__version__ >= '2.0': | if tf.__version__ >= '2.0': | ||||
| tf = tf.compat.v1 | tf = tf.compat.v1 | ||||
| @@ -30,10 +30,14 @@ ResNet-101 for semantic segmentation into 21 classes: | |||||
| output_stride=16) | output_stride=16) | ||||
| """ | """ | ||||
| import tensorflow as tf | import tensorflow as tf | ||||
| import tf_slim as slim | |||||
| from . import resnet_utils | from . import resnet_utils | ||||
| if tf.__version__ >= '2.0': | |||||
| import tf_slim as slim | |||||
| else: | |||||
| from tensorflow.contrib import slim | |||||
| if tf.__version__ >= '2.0': | if tf.__version__ >= '2.0': | ||||
| tf = tf.compat.v1 | tf = tf.compat.v1 | ||||
| @@ -19,7 +19,11 @@ implementation is more memory efficient. | |||||
| import collections | import collections | ||||
| import tensorflow as tf | import tensorflow as tf | ||||
| import tf_slim as slim | |||||
| if tf.__version__ >= '2.0': | |||||
| import tf_slim as slim | |||||
| else: | |||||
| from tensorflow.contrib import slim | |||||
| if tf.__version__ >= '2.0': | if tf.__version__ >= '2.0': | ||||
| tf = tf.compat.v1 | tf = tf.compat.v1 | ||||
| @@ -1,2 +1,3 @@ | |||||
| from .image_captioning_pipeline import ImageCaptionPipeline | from .image_captioning_pipeline import ImageCaptionPipeline | ||||
| from .multi_modal_embedding_pipeline import MultiModalEmbeddingPipeline | from .multi_modal_embedding_pipeline import MultiModalEmbeddingPipeline | ||||
| from .visual_question_answering_pipeline import VisualQuestionAnsweringPipeline | |||||
| @@ -0,0 +1,65 @@ | |||||
| from typing import Any, Dict, Optional, Union | |||||
| import torch | |||||
| from ...metainfo import Pipelines | |||||
| from ...models import Model | |||||
| from ...models.multi_modal import MPlugForVisualQuestionAnswering | |||||
| from ...preprocessors import MPlugVisualQuestionAnsweringPreprocessor | |||||
| from ...utils.constant import Tasks | |||||
| from ..base import Pipeline, Tensor | |||||
| from ..builder import PIPELINES | |||||
| __all__ = ['VisualQuestionAnsweringPipeline'] | |||||
| @PIPELINES.register_module( | |||||
| Tasks.visual_question_answering, | |||||
| module_name=Pipelines.visual_question_answering) | |||||
| class VisualQuestionAnsweringPipeline(Pipeline): | |||||
| def __init__(self, | |||||
| model: Union[MPlugForVisualQuestionAnswering, str], | |||||
| preprocessor: Optional[ | |||||
| MPlugVisualQuestionAnsweringPreprocessor] = None, | |||||
| **kwargs): | |||||
| """use `model` and `preprocessor` to create a visual question answering pipeline for prediction | |||||
| Args: | |||||
| model (MPlugForVisualQuestionAnswering): a model instance | |||||
| preprocessor (MPlugVisualQuestionAnsweringPreprocessor): a preprocessor instance | |||||
| """ | |||||
| model = model if isinstance( | |||||
| model, | |||||
| MPlugForVisualQuestionAnswering) else Model.from_pretrained(model) | |||||
| if preprocessor is None: | |||||
| preprocessor = MPlugVisualQuestionAnsweringPreprocessor( | |||||
| model.model_dir) | |||||
| model.eval() | |||||
| super().__init__(model=model, preprocessor=preprocessor, **kwargs) | |||||
| self.tokenizer = model.tokenizer | |||||
| def forward(self, inputs: Dict[str, Any], | |||||
| **forward_params) -> Dict[str, Any]: | |||||
| with torch.no_grad(): | |||||
| return super().forward(inputs, **forward_params) | |||||
| def postprocess(self, inputs: Dict[str, Tensor], | |||||
| **postprocess_params) -> Dict[str, str]: | |||||
| """process the prediction results | |||||
| Args: | |||||
| inputs (Dict[str, Any]): _description_ | |||||
| Returns: | |||||
| Dict[str, str]: the prediction results | |||||
| """ | |||||
| replace_tokens_bert = (('[unused0]', ''), ('[PAD]', ''), | |||||
| ('[unused1]', ''), (r' +', ' '), ('[SEP]', ''), | |||||
| ('[unused2]', ''), ('[CLS]', ''), ('[UNK]', '')) | |||||
| pred_string = self.tokenizer.decode(inputs[0][0]) | |||||
| for _old, _new in replace_tokens_bert: | |||||
| pred_string = pred_string.replace(_old, _new) | |||||
| pred_string.strip() | |||||
| return {'answer': pred_string} | |||||
| @@ -1,7 +1,7 @@ | |||||
| from typing import Any, Dict | from typing import Any, Dict | ||||
| from ...metainfo import Pipelines | from ...metainfo import Pipelines | ||||
| from ...models.nlp import DialogStateTrackingModel | |||||
| from ...models import SpaceForDialogStateTrackingModel | |||||
| from ...preprocessors import DialogStateTrackingPreprocessor | from ...preprocessors import DialogStateTrackingPreprocessor | ||||
| from ...utils.constant import Tasks | from ...utils.constant import Tasks | ||||
| from ..base import Pipeline | from ..base import Pipeline | ||||
| @@ -14,7 +14,7 @@ __all__ = ['DialogStateTrackingPipeline'] | |||||
| Tasks.dialog_state_tracking, module_name=Pipelines.dialog_state_tracking) | Tasks.dialog_state_tracking, module_name=Pipelines.dialog_state_tracking) | ||||
| class DialogStateTrackingPipeline(Pipeline): | class DialogStateTrackingPipeline(Pipeline): | ||||
| def __init__(self, model: DialogStateTrackingModel, | |||||
| def __init__(self, model: SpaceForDialogStateTrackingModel, | |||||
| preprocessor: DialogStateTrackingPreprocessor, **kwargs): | preprocessor: DialogStateTrackingPreprocessor, **kwargs): | ||||
| """use `model` and `preprocessor` to create a nlp text classification pipeline for prediction | """use `model` and `preprocessor` to create a nlp text classification pipeline for prediction | ||||
| @@ -52,7 +52,7 @@ class DialogStateTrackingPipeline(Pipeline): | |||||
| _outputs[5], unique_ids, input_ids_unmasked, | _outputs[5], unique_ids, input_ids_unmasked, | ||||
| values, inform, prefix, ds) | values, inform, prefix, ds) | ||||
| return ds | |||||
| return {'dialog_states': ds} | |||||
| def predict_and_format(config, tokenizer, features, per_slot_class_logits, | def predict_and_format(config, tokenizer, features, per_slot_class_logits, | ||||
| @@ -1,3 +1,4 @@ | |||||
| import os | |||||
| from typing import Any, Dict, Optional, Union | from typing import Any, Dict, Optional, Union | ||||
| import torch | import torch | ||||
| @@ -6,11 +7,13 @@ from ...metainfo import Pipelines | |||||
| from ...models import Model | from ...models import Model | ||||
| from ...models.nlp.masked_language_model import MaskedLanguageModelBase | from ...models.nlp.masked_language_model import MaskedLanguageModelBase | ||||
| from ...preprocessors import FillMaskPreprocessor | from ...preprocessors import FillMaskPreprocessor | ||||
| from ...utils.constant import Tasks | |||||
| from ...utils.config import Config | |||||
| from ...utils.constant import ModelFile, Tasks | |||||
| from ..base import Pipeline, Tensor | from ..base import Pipeline, Tensor | ||||
| from ..builder import PIPELINES | from ..builder import PIPELINES | ||||
| __all__ = ['FillMaskPipeline'] | __all__ = ['FillMaskPipeline'] | ||||
| _type_map = {'veco': 'roberta', 'sbert': 'bert'} | |||||
| @PIPELINES.register_module(Tasks.fill_mask, module_name=Pipelines.fill_mask) | @PIPELINES.register_module(Tasks.fill_mask, module_name=Pipelines.fill_mask) | ||||
| @@ -29,7 +32,6 @@ class FillMaskPipeline(Pipeline): | |||||
| """ | """ | ||||
| fill_mask_model = model if isinstance( | fill_mask_model = model if isinstance( | ||||
| model, MaskedLanguageModelBase) else Model.from_pretrained(model) | model, MaskedLanguageModelBase) else Model.from_pretrained(model) | ||||
| assert fill_mask_model.config is not None | |||||
| if preprocessor is None: | if preprocessor is None: | ||||
| preprocessor = FillMaskPreprocessor( | preprocessor = FillMaskPreprocessor( | ||||
| @@ -41,11 +43,13 @@ class FillMaskPipeline(Pipeline): | |||||
| model=fill_mask_model, preprocessor=preprocessor, **kwargs) | model=fill_mask_model, preprocessor=preprocessor, **kwargs) | ||||
| self.preprocessor = preprocessor | self.preprocessor = preprocessor | ||||
| self.config = Config.from_file( | |||||
| os.path.join(fill_mask_model.model_dir, ModelFile.CONFIGURATION)) | |||||
| self.tokenizer = preprocessor.tokenizer | self.tokenizer = preprocessor.tokenizer | ||||
| self.mask_id = {'veco': 250001, 'sbert': 103} | |||||
| self.mask_id = {'roberta': 250001, 'bert': 103} | |||||
| self.rep_map = { | self.rep_map = { | ||||
| 'sbert': { | |||||
| 'bert': { | |||||
| '[unused0]': '', | '[unused0]': '', | ||||
| '[PAD]': '', | '[PAD]': '', | ||||
| '[unused1]': '', | '[unused1]': '', | ||||
| @@ -55,7 +59,7 @@ class FillMaskPipeline(Pipeline): | |||||
| '[CLS]': '', | '[CLS]': '', | ||||
| '[UNK]': '' | '[UNK]': '' | ||||
| }, | }, | ||||
| 'veco': { | |||||
| 'roberta': { | |||||
| r' +': ' ', | r' +': ' ', | ||||
| '<mask>': '<q>', | '<mask>': '<q>', | ||||
| '<pad>': '', | '<pad>': '', | ||||
| @@ -84,7 +88,9 @@ class FillMaskPipeline(Pipeline): | |||||
| input_ids = inputs['input_ids'].detach().numpy() | input_ids = inputs['input_ids'].detach().numpy() | ||||
| pred_ids = np.argmax(logits, axis=-1) | pred_ids = np.argmax(logits, axis=-1) | ||||
| model_type = self.model.config.model_type | model_type = self.model.config.model_type | ||||
| rst_ids = np.where(input_ids == self.mask_id[model_type], pred_ids, | |||||
| process_type = model_type if model_type in self.mask_id else _type_map[ | |||||
| model_type] | |||||
| rst_ids = np.where(input_ids == self.mask_id[process_type], pred_ids, | |||||
| input_ids) | input_ids) | ||||
| def rep_tokens(string, rep_map): | def rep_tokens(string, rep_map): | ||||
| @@ -94,14 +100,12 @@ class FillMaskPipeline(Pipeline): | |||||
| pred_strings = [] | pred_strings = [] | ||||
| for ids in rst_ids: # batch | for ids in rst_ids: # batch | ||||
| # TODO vocab size is not stable | |||||
| if self.model.config.vocab_size == 21128: # zh bert | |||||
| if 'language' in self.config.model and self.config.model.language == 'zh': | |||||
| pred_string = self.tokenizer.convert_ids_to_tokens(ids) | pred_string = self.tokenizer.convert_ids_to_tokens(ids) | ||||
| pred_string = ''.join(pred_string) | pred_string = ''.join(pred_string) | ||||
| else: | else: | ||||
| pred_string = self.tokenizer.decode(ids) | pred_string = self.tokenizer.decode(ids) | ||||
| pred_string = rep_tokens(pred_string, self.rep_map[model_type]) | |||||
| pred_string = rep_tokens(pred_string, self.rep_map[process_type]) | |||||
| pred_strings.append(pred_string) | pred_strings.append(pred_string) | ||||
| return {'text': pred_strings} | return {'text': pred_strings} | ||||
| @@ -153,5 +153,43 @@ TASK_OUTPUTS = { | |||||
| # { | # { | ||||
| # "image": np.ndarray with shape [height, width, 3] | # "image": np.ndarray with shape [height, width, 3] | ||||
| # } | # } | ||||
| Tasks.text_to_image_synthesis: ['image'] | |||||
| Tasks.text_to_image_synthesis: ['image'], | |||||
| Tasks.dialog_modeling: [], | |||||
| Tasks.dialog_intent_prediction: [], | |||||
| # { | |||||
| # "dialog_states": { | |||||
| # "taxi-leaveAt": "none", | |||||
| # "taxi-destination": "none", | |||||
| # "taxi-departure": "none", | |||||
| # "taxi-arriveBy": "none", | |||||
| # "restaurant-book_people": "none", | |||||
| # "restaurant-book_day": "none", | |||||
| # "restaurant-book_time": "none", | |||||
| # "restaurant-food": "none", | |||||
| # "restaurant-pricerange": "none", | |||||
| # "restaurant-name": "none", | |||||
| # "restaurant-area": "none", | |||||
| # "hotel-book_people": "none", | |||||
| # "hotel-book_day": "none", | |||||
| # "hotel-book_stay": "none", | |||||
| # "hotel-name": "none", | |||||
| # "hotel-area": "none", | |||||
| # "hotel-parking": "none", | |||||
| # "hotel-pricerange": "cheap", | |||||
| # "hotel-stars": "none", | |||||
| # "hotel-internet": "none", | |||||
| # "hotel-type": "true", | |||||
| # "attraction-type": "none", | |||||
| # "attraction-name": "none", | |||||
| # "attraction-area": "none", | |||||
| # "train-book_people": "none", | |||||
| # "train-leaveAt": "none", | |||||
| # "train-destination": "none", | |||||
| # "train-day": "none", | |||||
| # "train-arriveBy": "none", | |||||
| # "train-departure": "none" | |||||
| # } | |||||
| # } | |||||
| Tasks.dialog_state_tracking: ['dialog_states'] | |||||
| } | } | ||||
| @@ -6,7 +6,7 @@ from .base import Preprocessor | |||||
| from .common import Compose | from .common import Compose | ||||
| from .image import LoadImage, load_image | from .image import LoadImage, load_image | ||||
| from .kws import WavToLists | from .kws import WavToLists | ||||
| from .multi_modal import OfaImageCaptionPreprocessor | |||||
| from .multi_modal import * # noqa F403 | |||||
| from .nlp import * # noqa F403 | from .nlp import * # noqa F403 | ||||
| from .space.dialog_intent_prediction_preprocessor import * # noqa F403 | from .space.dialog_intent_prediction_preprocessor import * # noqa F403 | ||||
| from .space.dialog_modeling_preprocessor import * # noqa F403 | from .space.dialog_modeling_preprocessor import * # noqa F403 | ||||
| @@ -16,6 +16,7 @@ from .image import load_image | |||||
| __all__ = [ | __all__ = [ | ||||
| 'OfaImageCaptionPreprocessor', | 'OfaImageCaptionPreprocessor', | ||||
| 'MPlugVisualQuestionAnsweringPreprocessor', | |||||
| ] | ] | ||||
| @@ -110,3 +111,47 @@ class OfaImageCaptionPreprocessor(Preprocessor): | |||||
| } | } | ||||
| } | } | ||||
| return sample | return sample | ||||
| @PREPROCESSORS.register_module( | |||||
| Fields.multi_modal, | |||||
| module_name=Preprocessors.mplug_visual_question_answering) | |||||
| class MPlugVisualQuestionAnsweringPreprocessor(Preprocessor): | |||||
| def __init__(self, model_dir: str, *args, **kwargs): | |||||
| """preprocess the data via 'bert-base-uncased' tokenizer and configuration | |||||
| """ | |||||
| super().__init__(*args, **kwargs) | |||||
| # tokenizer | |||||
| from transformers import AutoTokenizer | |||||
| self.tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased') | |||||
| # load configuration | |||||
| from sofa.models.mplug import CONFIG_NAME, MPlugConfig | |||||
| config = MPlugConfig.from_yaml_file(osp.join(model_dir, CONFIG_NAME)) | |||||
| # Initialize transform | |||||
| from torchvision import transforms | |||||
| mean = (0.48145466, 0.4578275, 0.40821073) | |||||
| std = (0.26862954, 0.26130258, 0.27577711) | |||||
| self.patch_resize_transform = transforms.Compose([ | |||||
| transforms.Resize((config.image_res, config.image_res), | |||||
| interpolation=Image.BICUBIC), | |||||
| transforms.ToTensor(), | |||||
| transforms.Normalize(mean=mean, std=std), | |||||
| ]) | |||||
| def __call__(self, data: Dict[str, Any]) -> Dict[str, Any]: | |||||
| image, question = data['image'], data['question'] | |||||
| image = Image.open(image).convert('RGB') if isinstance(image, | |||||
| str) else image | |||||
| image = self.patch_resize_transform(image) | |||||
| image = torch.stack([image], dim=0) | |||||
| question = self.tokenizer([question.lower()], | |||||
| padding='longest', | |||||
| return_tensors='pt') | |||||
| return {'image': image, 'question': question, 'train': False} | |||||
| @@ -326,14 +326,17 @@ class FillMaskPreprocessor(Preprocessor): | |||||
| model_dir (str): model path | model_dir (str): model path | ||||
| """ | """ | ||||
| super().__init__(*args, **kwargs) | super().__init__(*args, **kwargs) | ||||
| from sofa.utils.backend import AutoTokenizer | |||||
| self.model_dir = model_dir | self.model_dir = model_dir | ||||
| self.first_sequence: str = kwargs.pop('first_sequence', | self.first_sequence: str = kwargs.pop('first_sequence', | ||||
| 'first_sequence') | 'first_sequence') | ||||
| self.sequence_length = kwargs.pop('sequence_length', 128) | self.sequence_length = kwargs.pop('sequence_length', 128) | ||||
| self.tokenizer = AutoTokenizer.from_pretrained( | |||||
| model_dir, use_fast=False) | |||||
| try: | |||||
| from transformers import AutoTokenizer | |||||
| self.tokenizer = AutoTokenizer.from_pretrained(model_dir) | |||||
| except KeyError: | |||||
| from sofa.utils.backend import AutoTokenizer | |||||
| self.tokenizer = AutoTokenizer.from_pretrained( | |||||
| model_dir, use_fast=False) | |||||
| @type_assert(object, str) | @type_assert(object, str) | ||||
| def __call__(self, data: str) -> Dict[str, Any]: | def __call__(self, data: str) -> Dict[str, Any]: | ||||
| @@ -1,4 +1,5 @@ | |||||
| # Copyright (c) Alibaba, Inc. and its affiliates. | # Copyright (c) Alibaba, Inc. and its affiliates. | ||||
| import enum | |||||
| class Fields(object): | class Fields(object): | ||||
| @@ -52,6 +53,7 @@ class Tasks(object): | |||||
| fill_mask = 'fill-mask' | fill_mask = 'fill-mask' | ||||
| summarization = 'summarization' | summarization = 'summarization' | ||||
| question_answering = 'question-answering' | question_answering = 'question-answering' | ||||
| zero_shot_classification = 'zero-shot-classification' | |||||
| # audio tasks | # audio tasks | ||||
| auto_speech_recognition = 'auto-speech-recognition' | auto_speech_recognition = 'auto-speech-recognition' | ||||
| @@ -64,6 +66,7 @@ class Tasks(object): | |||||
| visual_grounding = 'visual-grounding' | visual_grounding = 'visual-grounding' | ||||
| text_to_image_synthesis = 'text-to-image-synthesis' | text_to_image_synthesis = 'text-to-image-synthesis' | ||||
| multi_modal_embedding = 'multi-modal-embedding' | multi_modal_embedding = 'multi-modal-embedding' | ||||
| visual_question_answering = 'visual-question-answering' | |||||
| class InputFields(object): | class InputFields(object): | ||||
| @@ -74,13 +77,20 @@ class InputFields(object): | |||||
| audio = 'audio' | audio = 'audio' | ||||
| class Hubs(object): | |||||
| class Hubs(enum.Enum): | |||||
| """ Source from which an entity (such as a Dataset or Model) is stored | """ Source from which an entity (such as a Dataset or Model) is stored | ||||
| """ | """ | ||||
| modelscope = 'modelscope' | modelscope = 'modelscope' | ||||
| huggingface = 'huggingface' | huggingface = 'huggingface' | ||||
| class DownloadMode(enum.Enum): | |||||
| """ How to treat existing datasets | |||||
| """ | |||||
| REUSE_DATASET_IF_EXISTS = 'reuse_dataset_if_exists' | |||||
| FORCE_REDOWNLOAD = 'force_redownload' | |||||
| class ModelFile(object): | class ModelFile(object): | ||||
| CONFIGURATION = 'configuration.json' | CONFIGURATION = 'configuration.json' | ||||
| README = 'README.md' | README = 'README.md' | ||||
| @@ -31,9 +31,10 @@ def create_model_if_not_exist( | |||||
| else: | else: | ||||
| api.create_model( | api.create_model( | ||||
| model_id=model_id, | model_id=model_id, | ||||
| chinese_name=chinese_name, | |||||
| visibility=visibility, | visibility=visibility, | ||||
| license=license) | |||||
| license=license, | |||||
| chinese_name=chinese_name, | |||||
| ) | |||||
| print(f'model {model_id} successfully created.') | print(f'model {model_id} successfully created.') | ||||
| return True | return True | ||||
| @@ -1 +1 @@ | |||||
| __version__ = '0.1.1' | |||||
| __version__ = '0.2.1' | |||||
| @@ -16,6 +16,7 @@ protobuf>3,<=3.20 | |||||
| ptflops | ptflops | ||||
| PyWavelets>=1.0.0 | PyWavelets>=1.0.0 | ||||
| scikit-learn | scikit-learn | ||||
| SoundFile>0.10 | |||||
| sox | sox | ||||
| tensorboard | tensorboard | ||||
| tensorflow==1.15.* | tensorflow==1.15.* | ||||
| @@ -1,5 +1,3 @@ | |||||
| # https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-2.3.1/en_core_web_sm-2.3.1.tar.gz | |||||
| http://ait-public.oss-cn-hangzhou-zmf.aliyuncs.com/jizhu/en_core_web_sm-2.3.1.tar.gz | |||||
| https://alinlp.alibaba-inc.com/pypi/sofa-1.0.3-py3-none-any.whl | |||||
| https://alinlp.alibaba-inc.com/pypi/sofa-1.0.5-py3-none-any.whl | |||||
| https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-2.3.1/en_core_web_sm-2.3.1.tar.gz | |||||
| spacy>=2.3.5 | spacy>=2.3.5 | ||||
| # python -m spacy download en_core_web_sm | |||||
| @@ -3,6 +3,7 @@ import os | |||||
| import tempfile | import tempfile | ||||
| import unittest | import unittest | ||||
| import uuid | import uuid | ||||
| from shutil import rmtree | |||||
| from modelscope.hub.api import HubApi, ModelScopeConfig | from modelscope.hub.api import HubApi, ModelScopeConfig | ||||
| from modelscope.hub.constants import Licenses, ModelVisibility | from modelscope.hub.constants import Licenses, ModelVisibility | ||||
| @@ -23,7 +24,6 @@ download_model_file_name = 'test.bin' | |||||
| class HubOperationTest(unittest.TestCase): | class HubOperationTest(unittest.TestCase): | ||||
| def setUp(self): | def setUp(self): | ||||
| self.old_cwd = os.getcwd() | |||||
| self.api = HubApi() | self.api = HubApi() | ||||
| # note this is temporary before official account management is ready | # note this is temporary before official account management is ready | ||||
| self.api.login(USER_NAME, PASSWORD) | self.api.login(USER_NAME, PASSWORD) | ||||
| @@ -31,19 +31,18 @@ class HubOperationTest(unittest.TestCase): | |||||
| self.model_id = '%s/%s' % (model_org, self.model_name) | self.model_id = '%s/%s' % (model_org, self.model_name) | ||||
| self.api.create_model( | self.api.create_model( | ||||
| model_id=self.model_id, | model_id=self.model_id, | ||||
| chinese_name=model_chinese_name, | |||||
| visibility=ModelVisibility.PUBLIC, | visibility=ModelVisibility.PUBLIC, | ||||
| license=Licenses.APACHE_V2) | |||||
| license=Licenses.APACHE_V2, | |||||
| chinese_name=model_chinese_name, | |||||
| ) | |||||
| temporary_dir = tempfile.mkdtemp() | temporary_dir = tempfile.mkdtemp() | ||||
| self.model_dir = os.path.join(temporary_dir, self.model_name) | self.model_dir = os.path.join(temporary_dir, self.model_name) | ||||
| repo = Repository(self.model_dir, clone_from=self.model_id) | repo = Repository(self.model_dir, clone_from=self.model_id) | ||||
| os.chdir(self.model_dir) | |||||
| os.system("echo 'testtest'>%s" | os.system("echo 'testtest'>%s" | ||||
| % os.path.join(self.model_dir, 'test.bin')) | |||||
| repo.push('add model', all_files=True) | |||||
| % os.path.join(self.model_dir, download_model_file_name)) | |||||
| repo.push('add model') | |||||
| def tearDown(self): | def tearDown(self): | ||||
| os.chdir(self.old_cwd) | |||||
| self.api.delete_model(model_id=self.model_id) | self.api.delete_model(model_id=self.model_id) | ||||
| def test_model_repo_creation(self): | def test_model_repo_creation(self): | ||||
| @@ -79,6 +78,35 @@ class HubOperationTest(unittest.TestCase): | |||||
| mdtime2 = os.path.getmtime(downloaded_file_path) | mdtime2 = os.path.getmtime(downloaded_file_path) | ||||
| assert mdtime1 == mdtime2 | assert mdtime1 == mdtime2 | ||||
| def test_download_public_without_login(self): | |||||
| rmtree(ModelScopeConfig.path_credential) | |||||
| snapshot_path = snapshot_download(model_id=self.model_id) | |||||
| downloaded_file_path = os.path.join(snapshot_path, | |||||
| download_model_file_name) | |||||
| assert os.path.exists(downloaded_file_path) | |||||
| temporary_dir = tempfile.mkdtemp() | |||||
| downloaded_file = model_file_download( | |||||
| model_id=self.model_id, | |||||
| file_path=download_model_file_name, | |||||
| cache_dir=temporary_dir) | |||||
| assert os.path.exists(downloaded_file) | |||||
| self.api.login(USER_NAME, PASSWORD) | |||||
| def test_snapshot_delete_download_cache_file(self): | |||||
| snapshot_path = snapshot_download(model_id=self.model_id) | |||||
| downloaded_file_path = os.path.join(snapshot_path, | |||||
| download_model_file_name) | |||||
| assert os.path.exists(downloaded_file_path) | |||||
| os.remove(downloaded_file_path) | |||||
| # download again in cache | |||||
| file_download_path = model_file_download( | |||||
| model_id=self.model_id, file_path='README.md') | |||||
| assert os.path.exists(file_download_path) | |||||
| # deleted file need download again | |||||
| file_download_path = model_file_download( | |||||
| model_id=self.model_id, file_path=download_model_file_name) | |||||
| assert os.path.exists(file_download_path) | |||||
| if __name__ == '__main__': | if __name__ == '__main__': | ||||
| unittest.main() | unittest.main() | ||||
| @@ -0,0 +1,85 @@ | |||||
| # Copyright (c) Alibaba, Inc. and its affiliates. | |||||
| import os | |||||
| import tempfile | |||||
| import unittest | |||||
| import uuid | |||||
| from requests.exceptions import HTTPError | |||||
| from modelscope.hub.api import HubApi | |||||
| from modelscope.hub.constants import Licenses, ModelVisibility | |||||
| from modelscope.hub.errors import GitError | |||||
| from modelscope.hub.file_download import model_file_download | |||||
| from modelscope.hub.repository import Repository | |||||
| from modelscope.hub.snapshot_download import snapshot_download | |||||
| from modelscope.utils.constant import ModelFile | |||||
| USER_NAME = 'maasadmin' | |||||
| PASSWORD = '12345678' | |||||
| USER_NAME2 = 'sdkdev' | |||||
| model_chinese_name = '达摩卡通化模型' | |||||
| model_org = 'unittest' | |||||
| class HubPrivateFileDownloadTest(unittest.TestCase): | |||||
| def setUp(self): | |||||
| self.old_cwd = os.getcwd() | |||||
| self.api = HubApi() | |||||
| # note this is temporary before official account management is ready | |||||
| self.token, _ = self.api.login(USER_NAME, PASSWORD) | |||||
| self.model_name = uuid.uuid4().hex | |||||
| self.model_id = '%s/%s' % (model_org, self.model_name) | |||||
| self.api.create_model( | |||||
| model_id=self.model_id, | |||||
| visibility=ModelVisibility.PRIVATE, # 1-private, 5-public | |||||
| license=Licenses.APACHE_V2, | |||||
| chinese_name=model_chinese_name, | |||||
| ) | |||||
| def tearDown(self): | |||||
| os.chdir(self.old_cwd) | |||||
| self.api.delete_model(model_id=self.model_id) | |||||
| def test_snapshot_download_private_model(self): | |||||
| snapshot_path = snapshot_download(self.model_id) | |||||
| assert os.path.exists(os.path.join(snapshot_path, ModelFile.README)) | |||||
| def test_snapshot_download_private_model_no_permission(self): | |||||
| self.token, _ = self.api.login(USER_NAME2, PASSWORD) | |||||
| with self.assertRaises(HTTPError): | |||||
| snapshot_download(self.model_id) | |||||
| self.api.login(USER_NAME, PASSWORD) | |||||
| def test_download_file_private_model(self): | |||||
| file_path = model_file_download(self.model_id, ModelFile.README) | |||||
| assert os.path.exists(file_path) | |||||
| def test_download_file_private_model_no_permission(self): | |||||
| self.token, _ = self.api.login(USER_NAME2, PASSWORD) | |||||
| with self.assertRaises(HTTPError): | |||||
| model_file_download(self.model_id, ModelFile.README) | |||||
| self.api.login(USER_NAME, PASSWORD) | |||||
| def test_snapshot_download_local_only(self): | |||||
| with self.assertRaises(ValueError): | |||||
| snapshot_download(self.model_id, local_files_only=True) | |||||
| snapshot_path = snapshot_download(self.model_id) | |||||
| assert os.path.exists(os.path.join(snapshot_path, ModelFile.README)) | |||||
| snapshot_path = snapshot_download(self.model_id, local_files_only=True) | |||||
| assert os.path.exists(snapshot_path) | |||||
| def test_file_download_local_only(self): | |||||
| with self.assertRaises(ValueError): | |||||
| model_file_download( | |||||
| self.model_id, ModelFile.README, local_files_only=True) | |||||
| file_path = model_file_download(self.model_id, ModelFile.README) | |||||
| assert os.path.exists(file_path) | |||||
| file_path = model_file_download( | |||||
| self.model_id, ModelFile.README, local_files_only=True) | |||||
| assert os.path.exists(file_path) | |||||
| if __name__ == '__main__': | |||||
| unittest.main() | |||||
| @@ -5,6 +5,7 @@ import unittest | |||||
| import uuid | import uuid | ||||
| from modelscope.hub.api import HubApi | from modelscope.hub.api import HubApi | ||||
| from modelscope.hub.constants import Licenses, ModelVisibility | |||||
| from modelscope.hub.errors import GitError | from modelscope.hub.errors import GitError | ||||
| from modelscope.hub.repository import Repository | from modelscope.hub.repository import Repository | ||||
| @@ -16,9 +17,6 @@ model_chinese_name = '达摩卡通化模型' | |||||
| model_org = 'unittest' | model_org = 'unittest' | ||||
| DEFAULT_GIT_PATH = 'git' | DEFAULT_GIT_PATH = 'git' | ||||
| sample_model_url = 'https://mindscope.oss-cn-hangzhou.aliyuncs.com/test_models/mnist-12.onnx' | |||||
| download_model_file_name = 'mnist-12.onnx' | |||||
| class HubPrivateRepositoryTest(unittest.TestCase): | class HubPrivateRepositoryTest(unittest.TestCase): | ||||
| @@ -31,9 +29,10 @@ class HubPrivateRepositoryTest(unittest.TestCase): | |||||
| self.model_id = '%s/%s' % (model_org, self.model_name) | self.model_id = '%s/%s' % (model_org, self.model_name) | ||||
| self.api.create_model( | self.api.create_model( | ||||
| model_id=self.model_id, | model_id=self.model_id, | ||||
| visibility=ModelVisibility.PRIVATE, # 1-private, 5-public | |||||
| license=Licenses.APACHE_V2, | |||||
| chinese_name=model_chinese_name, | chinese_name=model_chinese_name, | ||||
| visibility=1, # 1-private, 5-public | |||||
| license='apache-2.0') | |||||
| ) | |||||
| def tearDown(self): | def tearDown(self): | ||||
| self.api.login(USER_NAME, PASSWORD) | self.api.login(USER_NAME, PASSWORD) | ||||
| @@ -2,7 +2,6 @@ | |||||
| import os | import os | ||||
| import shutil | import shutil | ||||
| import tempfile | import tempfile | ||||
| import time | |||||
| import unittest | import unittest | ||||
| import uuid | import uuid | ||||
| from os.path import expanduser | from os.path import expanduser | ||||
| @@ -10,6 +9,7 @@ from os.path import expanduser | |||||
| from requests import delete | from requests import delete | ||||
| from modelscope.hub.api import HubApi | from modelscope.hub.api import HubApi | ||||
| from modelscope.hub.constants import Licenses, ModelVisibility | |||||
| from modelscope.hub.errors import NotExistError | from modelscope.hub.errors import NotExistError | ||||
| from modelscope.hub.file_download import model_file_download | from modelscope.hub.file_download import model_file_download | ||||
| from modelscope.hub.repository import Repository | from modelscope.hub.repository import Repository | ||||
| @@ -55,9 +55,10 @@ class HubRepositoryTest(unittest.TestCase): | |||||
| self.model_id = '%s/%s' % (model_org, self.model_name) | self.model_id = '%s/%s' % (model_org, self.model_name) | ||||
| self.api.create_model( | self.api.create_model( | ||||
| model_id=self.model_id, | model_id=self.model_id, | ||||
| visibility=ModelVisibility.PUBLIC, # 1-private, 5-public | |||||
| license=Licenses.APACHE_V2, | |||||
| chinese_name=model_chinese_name, | chinese_name=model_chinese_name, | ||||
| visibility=5, # 1-private, 5-public | |||||
| license='apache-2.0') | |||||
| ) | |||||
| temporary_dir = tempfile.mkdtemp() | temporary_dir = tempfile.mkdtemp() | ||||
| self.model_dir = os.path.join(temporary_dir, self.model_name) | self.model_dir = os.path.join(temporary_dir, self.model_name) | ||||
| @@ -81,27 +82,12 @@ class HubRepositoryTest(unittest.TestCase): | |||||
| os.chdir(self.model_dir) | os.chdir(self.model_dir) | ||||
| os.system("echo '111'>%s" % os.path.join(self.model_dir, 'add1.py')) | os.system("echo '111'>%s" % os.path.join(self.model_dir, 'add1.py')) | ||||
| os.system("echo '222'>%s" % os.path.join(self.model_dir, 'add2.py')) | os.system("echo '222'>%s" % os.path.join(self.model_dir, 'add2.py')) | ||||
| repo.push('test', all_files=True) | |||||
| repo.push('test') | |||||
| add1 = model_file_download(self.model_id, 'add1.py') | add1 = model_file_download(self.model_id, 'add1.py') | ||||
| assert os.path.exists(add1) | assert os.path.exists(add1) | ||||
| add2 = model_file_download(self.model_id, 'add2.py') | add2 = model_file_download(self.model_id, 'add2.py') | ||||
| assert os.path.exists(add2) | assert os.path.exists(add2) | ||||
| def test_push_files(self): | |||||
| repo = Repository(self.model_dir, clone_from=self.model_id) | |||||
| assert os.path.exists(os.path.join(self.model_dir, 'README.md')) | |||||
| os.system("echo '111'>%s" % os.path.join(self.model_dir, 'add1.py')) | |||||
| os.system("echo '222'>%s" % os.path.join(self.model_dir, 'add2.py')) | |||||
| os.system("echo '333'>%s" % os.path.join(self.model_dir, 'add3.py')) | |||||
| repo.push('test', files=['add1.py', 'add2.py'], all_files=False) | |||||
| add1 = model_file_download(self.model_id, 'add1.py') | |||||
| assert os.path.exists(add1) | |||||
| add2 = model_file_download(self.model_id, 'add2.py') | |||||
| assert os.path.exists(add2) | |||||
| with self.assertRaises(NotExistError) as cm: | |||||
| model_file_download(self.model_id, 'add3.py') | |||||
| print(cm.exception) | |||||
| if __name__ == '__main__': | if __name__ == '__main__': | ||||
| unittest.main() | unittest.main() | ||||
| @@ -32,11 +32,12 @@ class ImgPreprocessor(Preprocessor): | |||||
| class MsDatasetTest(unittest.TestCase): | class MsDatasetTest(unittest.TestCase): | ||||
| @unittest.skipUnless(test_level() >= 2, 'skip test in current test level') | |||||
| @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') | |||||
| def test_ds_basic(self): | def test_ds_basic(self): | ||||
| ms_ds_full = MsDataset.load('squad') | |||||
| ms_ds_full = MsDataset.load('squad', namespace='damotest') | |||||
| ms_ds_full_hf = hfdata.load_dataset('squad') | ms_ds_full_hf = hfdata.load_dataset('squad') | ||||
| ms_ds_train = MsDataset.load('squad', split='train') | |||||
| ms_ds_train = MsDataset.load( | |||||
| 'squad', namespace='damotest', split='train') | |||||
| ms_ds_train_hf = hfdata.load_dataset('squad', split='train') | ms_ds_train_hf = hfdata.load_dataset('squad', split='train') | ||||
| ms_image_train = MsDataset.from_hf_dataset( | ms_image_train = MsDataset.from_hf_dataset( | ||||
| hfdata.load_dataset('beans', split='train')) | hfdata.load_dataset('beans', split='train')) | ||||
| @@ -48,7 +49,7 @@ class MsDatasetTest(unittest.TestCase): | |||||
| print(next(iter(ms_ds_train))) | print(next(iter(ms_ds_train))) | ||||
| print(next(iter(ms_image_train))) | print(next(iter(ms_image_train))) | ||||
| @unittest.skipUnless(test_level() >= 2, 'skip test in current test level') | |||||
| @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') | |||||
| @require_torch | @require_torch | ||||
| def test_to_torch_dataset_text(self): | def test_to_torch_dataset_text(self): | ||||
| model_id = 'damo/bert-base-sst2' | model_id = 'damo/bert-base-sst2' | ||||
| @@ -57,13 +58,14 @@ class MsDatasetTest(unittest.TestCase): | |||||
| nlp_model.model_dir, | nlp_model.model_dir, | ||||
| first_sequence='context', | first_sequence='context', | ||||
| second_sequence=None) | second_sequence=None) | ||||
| ms_ds_train = MsDataset.load('squad', split='train') | |||||
| ms_ds_train = MsDataset.load( | |||||
| 'squad', namespace='damotest', split='train') | |||||
| pt_dataset = ms_ds_train.to_torch_dataset(preprocessors=preprocessor) | pt_dataset = ms_ds_train.to_torch_dataset(preprocessors=preprocessor) | ||||
| import torch | import torch | ||||
| dataloader = torch.utils.data.DataLoader(pt_dataset, batch_size=5) | dataloader = torch.utils.data.DataLoader(pt_dataset, batch_size=5) | ||||
| print(next(iter(dataloader))) | print(next(iter(dataloader))) | ||||
| @unittest.skipUnless(test_level() >= 2, 'skip test in current test level') | |||||
| @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') | |||||
| @require_tf | @require_tf | ||||
| def test_to_tf_dataset_text(self): | def test_to_tf_dataset_text(self): | ||||
| import tensorflow as tf | import tensorflow as tf | ||||
| @@ -74,7 +76,8 @@ class MsDatasetTest(unittest.TestCase): | |||||
| nlp_model.model_dir, | nlp_model.model_dir, | ||||
| first_sequence='context', | first_sequence='context', | ||||
| second_sequence=None) | second_sequence=None) | ||||
| ms_ds_train = MsDataset.load('squad', split='train') | |||||
| ms_ds_train = MsDataset.load( | |||||
| 'squad', namespace='damotest', split='train') | |||||
| tf_dataset = ms_ds_train.to_tf_dataset( | tf_dataset = ms_ds_train.to_tf_dataset( | ||||
| batch_size=5, | batch_size=5, | ||||
| shuffle=True, | shuffle=True, | ||||
| @@ -85,8 +88,8 @@ class MsDatasetTest(unittest.TestCase): | |||||
| @unittest.skipUnless(test_level() >= 2, 'skip test in current test level') | @unittest.skipUnless(test_level() >= 2, 'skip test in current test level') | ||||
| @require_torch | @require_torch | ||||
| def test_to_torch_dataset_img(self): | def test_to_torch_dataset_img(self): | ||||
| ms_image_train = MsDataset.from_hf_dataset( | |||||
| hfdata.load_dataset('beans', split='train')) | |||||
| ms_image_train = MsDataset.load( | |||||
| 'beans', namespace='damotest', split='train') | |||||
| pt_dataset = ms_image_train.to_torch_dataset( | pt_dataset = ms_image_train.to_torch_dataset( | ||||
| preprocessors=ImgPreprocessor( | preprocessors=ImgPreprocessor( | ||||
| image_path='image_file_path', label='labels')) | image_path='image_file_path', label='labels')) | ||||
| @@ -99,7 +102,8 @@ class MsDatasetTest(unittest.TestCase): | |||||
| def test_to_tf_dataset_img(self): | def test_to_tf_dataset_img(self): | ||||
| import tensorflow as tf | import tensorflow as tf | ||||
| tf.compat.v1.enable_eager_execution() | tf.compat.v1.enable_eager_execution() | ||||
| ms_image_train = MsDataset.load('beans', split='train') | |||||
| ms_image_train = MsDataset.load( | |||||
| 'beans', namespace='damotest', split='train') | |||||
| tf_dataset = ms_image_train.to_tf_dataset( | tf_dataset = ms_image_train.to_tf_dataset( | ||||
| batch_size=5, | batch_size=5, | ||||
| shuffle=True, | shuffle=True, | ||||
| @@ -5,8 +5,7 @@ import tempfile | |||||
| import unittest | import unittest | ||||
| from modelscope.hub.snapshot_download import snapshot_download | from modelscope.hub.snapshot_download import snapshot_download | ||||
| from modelscope.models import Model | |||||
| from modelscope.models.nlp import DialogStateTrackingModel | |||||
| from modelscope.models import Model, SpaceForDialogStateTrackingModel | |||||
| from modelscope.pipelines import DialogStateTrackingPipeline, pipeline | from modelscope.pipelines import DialogStateTrackingPipeline, pipeline | ||||
| from modelscope.preprocessors import DialogStateTrackingPreprocessor | from modelscope.preprocessors import DialogStateTrackingPreprocessor | ||||
| from modelscope.utils.constant import Tasks | from modelscope.utils.constant import Tasks | ||||
| @@ -41,7 +40,7 @@ class DialogStateTrackingTest(unittest.TestCase): | |||||
| cache_path = '/Users/yangliu/Space/maas_model/nlp_space_dialog-state-tracking' | cache_path = '/Users/yangliu/Space/maas_model/nlp_space_dialog-state-tracking' | ||||
| # cache_path = snapshot_download(self.model_id) | # cache_path = snapshot_download(self.model_id) | ||||
| model = DialogStateTrackingModel(cache_path) | |||||
| model = SpaceForDialogStateTrackingModel(cache_path) | |||||
| preprocessor = DialogStateTrackingPreprocessor(model_dir=cache_path) | preprocessor = DialogStateTrackingPreprocessor(model_dir=cache_path) | ||||
| pipelines = [ | pipelines = [ | ||||
| DialogStateTrackingPipeline( | DialogStateTrackingPipeline( | ||||
| @@ -55,17 +54,18 @@ class DialogStateTrackingTest(unittest.TestCase): | |||||
| history_states = [{}] | history_states = [{}] | ||||
| utter = {} | utter = {} | ||||
| pipelines_len = len(pipelines) | pipelines_len = len(pipelines) | ||||
| import json | |||||
| for step, item in enumerate(self.test_case): | for step, item in enumerate(self.test_case): | ||||
| utter.update(item) | utter.update(item) | ||||
| ds = pipelines[step % pipelines_len]({ | |||||
| result = pipelines[step % pipelines_len]({ | |||||
| 'utter': | 'utter': | ||||
| utter, | utter, | ||||
| 'history_states': | 'history_states': | ||||
| history_states | history_states | ||||
| }) | }) | ||||
| print(ds) | |||||
| print(json.dumps(result)) | |||||
| history_states.extend([ds, {}]) | |||||
| history_states.extend([result['dialog_states'], {}]) | |||||
| @unittest.skip('test with snapshot_download') | @unittest.skip('test with snapshot_download') | ||||
| def test_run_with_model_from_modelhub(self): | def test_run_with_model_from_modelhub(self): | ||||
| @@ -0,0 +1,20 @@ | |||||
| import unittest | |||||
| from modelscope.pipelines import pipeline | |||||
| from modelscope.utils.constant import Tasks | |||||
| from modelscope.utils.test_utils import test_level | |||||
| class MultiModalFeatureTest(unittest.TestCase): | |||||
| @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') | |||||
| def test_run(self): | |||||
| animal_recog = pipeline( | |||||
| Tasks.image_classification, | |||||
| model='damo/cv_resnest101_animal_recognation') | |||||
| result = animal_recog('data/test/images/image1.jpg') | |||||
| print(result) | |||||
| if __name__ == '__main__': | |||||
| unittest.main() | |||||
| @@ -3,7 +3,8 @@ import unittest | |||||
| from modelscope.hub.snapshot_download import snapshot_download | from modelscope.hub.snapshot_download import snapshot_download | ||||
| from modelscope.models import Model | from modelscope.models import Model | ||||
| from modelscope.models.nlp import StructBertForMaskedLM, VecoForMaskedLM | |||||
| from modelscope.models.nlp import (BertForMaskedLM, StructBertForMaskedLM, | |||||
| VecoForMaskedLM) | |||||
| from modelscope.pipelines import FillMaskPipeline, pipeline | from modelscope.pipelines import FillMaskPipeline, pipeline | ||||
| from modelscope.preprocessors import FillMaskPreprocessor | from modelscope.preprocessors import FillMaskPreprocessor | ||||
| from modelscope.utils.constant import Tasks | from modelscope.utils.constant import Tasks | ||||
| @@ -16,6 +17,7 @@ class FillMaskTest(unittest.TestCase): | |||||
| 'en': 'damo/nlp_structbert_fill-mask_english-large' | 'en': 'damo/nlp_structbert_fill-mask_english-large' | ||||
| } | } | ||||
| model_id_veco = 'damo/nlp_veco_fill-mask-large' | model_id_veco = 'damo/nlp_veco_fill-mask-large' | ||||
| model_id_bert = 'damo/nlp_bert_fill-mask_chinese-base' | |||||
| ori_texts = { | ori_texts = { | ||||
| 'zh': | 'zh': | ||||
| @@ -69,6 +71,20 @@ class FillMaskTest(unittest.TestCase): | |||||
| f'{pipeline1(test_input)}\npipeline2: {pipeline2(test_input)}\n' | f'{pipeline1(test_input)}\npipeline2: {pipeline2(test_input)}\n' | ||||
| ) | ) | ||||
| # zh bert | |||||
| language = 'zh' | |||||
| model_dir = snapshot_download(self.model_id_bert) | |||||
| preprocessor = FillMaskPreprocessor( | |||||
| model_dir, first_sequence='sentence', second_sequence=None) | |||||
| model = BertForMaskedLM(model_dir) | |||||
| pipeline1 = FillMaskPipeline(model, preprocessor) | |||||
| pipeline2 = pipeline( | |||||
| Tasks.fill_mask, model=model, preprocessor=preprocessor) | |||||
| ori_text = self.ori_texts[language] | |||||
| test_input = self.test_inputs[language] | |||||
| print(f'\nori_text: {ori_text}\ninput: {test_input}\npipeline1: ' | |||||
| f'{pipeline1(test_input)}\npipeline2: {pipeline2(test_input)}\n') | |||||
| @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') | @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') | ||||
| def test_run_with_model_from_modelhub(self): | def test_run_with_model_from_modelhub(self): | ||||
| # sbert | # sbert | ||||
| @@ -97,6 +113,18 @@ class FillMaskTest(unittest.TestCase): | |||||
| print(f'\nori_text: {ori_text}\ninput: {test_input}\npipeline: ' | print(f'\nori_text: {ori_text}\ninput: {test_input}\npipeline: ' | ||||
| f'{pipeline_ins(test_input)}\n') | f'{pipeline_ins(test_input)}\n') | ||||
| # zh bert | |||||
| model = Model.from_pretrained(self.model_id_bert) | |||||
| preprocessor = FillMaskPreprocessor( | |||||
| model.model_dir, first_sequence='sentence', second_sequence=None) | |||||
| pipeline_ins = pipeline( | |||||
| Tasks.fill_mask, model=model, preprocessor=preprocessor) | |||||
| language = 'zh' | |||||
| ori_text = self.ori_texts[language] | |||||
| test_input = self.test_inputs[language] | |||||
| print(f'\nori_text: {ori_text}\ninput: {test_input}\npipeline: ' | |||||
| f'{pipeline_ins(test_input)}\n') | |||||
| @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') | @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') | ||||
| def test_run_with_model_name(self): | def test_run_with_model_name(self): | ||||
| # veco | # veco | ||||
| @@ -115,6 +143,12 @@ class FillMaskTest(unittest.TestCase): | |||||
| f'\nori_text: {self.ori_texts[language]}\ninput: {self.test_inputs[language]}\npipeline: ' | f'\nori_text: {self.ori_texts[language]}\ninput: {self.test_inputs[language]}\npipeline: ' | ||||
| f'{pipeline_ins(self.test_inputs[language])}\n') | f'{pipeline_ins(self.test_inputs[language])}\n') | ||||
| # bert | |||||
| pipeline_ins = pipeline(task=Tasks.fill_mask, model=self.model_id_bert) | |||||
| print( | |||||
| f'\nori_text: {self.ori_texts[language]}\ninput: {self.test_inputs[language]}\npipeline: ' | |||||
| f'{pipeline_ins(self.test_inputs[language])}\n') | |||||
| @unittest.skipUnless(test_level() >= 2, 'skip test in current test level') | @unittest.skipUnless(test_level() >= 2, 'skip test in current test level') | ||||
| def test_run_with_default_model(self): | def test_run_with_default_model(self): | ||||
| pipeline_ins = pipeline(task=Tasks.fill_mask) | pipeline_ins = pipeline(task=Tasks.fill_mask) | ||||
| @@ -62,7 +62,8 @@ class ImageMattingTest(unittest.TestCase): | |||||
| @unittest.skipUnless(test_level() >= 2, 'skip test in current test level') | @unittest.skipUnless(test_level() >= 2, 'skip test in current test level') | ||||
| def test_run_with_modelscope_dataset(self): | def test_run_with_modelscope_dataset(self): | ||||
| dataset = MsDataset.load('beans', split='train', target='image') | |||||
| dataset = MsDataset.load( | |||||
| 'beans', namespace='damotest', split='train', target='image') | |||||
| img_matting = pipeline(Tasks.image_matting, model=self.model_id) | img_matting = pipeline(Tasks.image_matting, model=self.model_id) | ||||
| result = img_matting(dataset) | result = img_matting(dataset) | ||||
| for i in range(10): | for i in range(10): | ||||
| @@ -27,6 +27,11 @@ class OCRDetectionTest(unittest.TestCase): | |||||
| print('ocr detection results: ') | print('ocr detection results: ') | ||||
| print(result) | print(result) | ||||
| @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') | |||||
| def test_run_with_model_from_modelhub(self): | |||||
| ocr_detection = pipeline(Tasks.ocr_detection, model=self.model_id) | |||||
| self.pipeline_inference(ocr_detection, self.test_image) | |||||
| @unittest.skipUnless(test_level() >= 2, 'skip test in current test level') | @unittest.skipUnless(test_level() >= 2, 'skip test in current test level') | ||||
| def test_run_modelhub_default_model(self): | def test_run_modelhub_default_model(self): | ||||
| ocr_detection = pipeline(Tasks.ocr_detection) | ocr_detection = pipeline(Tasks.ocr_detection) | ||||
| @@ -17,6 +17,9 @@ AEC_LIB_URL = 'http://isv-data.oss-cn-hangzhou.aliyuncs.com/ics%2FMaaS%2FAEC%2Fl | |||||
| '?Expires=1664085465&OSSAccessKeyId=LTAIxjQyZNde90zh&Signature=Y7gelmGEsQAJRK4yyHSYMrdWizk%3D' | '?Expires=1664085465&OSSAccessKeyId=LTAIxjQyZNde90zh&Signature=Y7gelmGEsQAJRK4yyHSYMrdWizk%3D' | ||||
| AEC_LIB_FILE = 'libmitaec_pyio.so' | AEC_LIB_FILE = 'libmitaec_pyio.so' | ||||
| NOISE_SPEECH_URL = 'https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ANS/sample_audio/speech_with_noise.wav' | |||||
| NOISE_SPEECH_FILE = 'speech_with_noise.wav' | |||||
| def download(remote_path, local_path): | def download(remote_path, local_path): | ||||
| local_dir = os.path.dirname(local_path) | local_dir = os.path.dirname(local_path) | ||||
| @@ -30,23 +33,40 @@ def download(remote_path, local_path): | |||||
| class SpeechSignalProcessTest(unittest.TestCase): | class SpeechSignalProcessTest(unittest.TestCase): | ||||
| def setUp(self) -> None: | def setUp(self) -> None: | ||||
| self.model_id = 'damo/speech_dfsmn_aec_psm_16k' | |||||
| pass | |||||
| @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') | |||||
| def test_aec(self): | |||||
| # A temporary hack to provide c++ lib. Download it first. | # A temporary hack to provide c++ lib. Download it first. | ||||
| download(AEC_LIB_URL, AEC_LIB_FILE) | download(AEC_LIB_URL, AEC_LIB_FILE) | ||||
| @unittest.skipUnless(test_level() >= 2, 'skip test in current test level') | |||||
| def test_run(self): | |||||
| # Download audio files | |||||
| download(NEAREND_MIC_URL, NEAREND_MIC_FILE) | download(NEAREND_MIC_URL, NEAREND_MIC_FILE) | ||||
| download(FAREND_SPEECH_URL, FAREND_SPEECH_FILE) | download(FAREND_SPEECH_URL, FAREND_SPEECH_FILE) | ||||
| model_id = 'damo/speech_dfsmn_aec_psm_16k' | |||||
| input = { | input = { | ||||
| 'nearend_mic': NEAREND_MIC_FILE, | 'nearend_mic': NEAREND_MIC_FILE, | ||||
| 'farend_speech': FAREND_SPEECH_FILE | 'farend_speech': FAREND_SPEECH_FILE | ||||
| } | } | ||||
| aec = pipeline( | aec = pipeline( | ||||
| Tasks.speech_signal_process, | Tasks.speech_signal_process, | ||||
| model=self.model_id, | |||||
| model=model_id, | |||||
| pipeline_name=Pipelines.speech_dfsmn_aec_psm_16k) | pipeline_name=Pipelines.speech_dfsmn_aec_psm_16k) | ||||
| aec(input, output_path='output.wav') | |||||
| output_path = os.path.abspath('output.wav') | |||||
| aec(input, output_path=output_path) | |||||
| print(f'Processed audio saved to {output_path}') | |||||
| @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') | |||||
| def test_ans(self): | |||||
| # Download audio files | |||||
| download(NOISE_SPEECH_URL, NOISE_SPEECH_FILE) | |||||
| model_id = 'damo/speech_frcrn_ans_cirm_16k' | |||||
| ans = pipeline( | |||||
| Tasks.speech_signal_process, | |||||
| model=model_id, | |||||
| pipeline_name=Pipelines.speech_frcrn_ans_cirm_16k) | |||||
| output_path = os.path.abspath('output.wav') | |||||
| ans(NOISE_SPEECH_FILE, output_path=output_path) | |||||
| print(f'Processed audio saved to {output_path}') | |||||
| if __name__ == '__main__': | if __name__ == '__main__': | ||||
| @@ -87,12 +87,16 @@ class SequenceClassificationTest(unittest.TestCase): | |||||
| result = text_classification(dataset) | result = text_classification(dataset) | ||||
| self.printDataset(result) | self.printDataset(result) | ||||
| @unittest.skipUnless(test_level() >= 2, 'skip test in current test level') | |||||
| @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') | |||||
| def test_run_with_modelscope_dataset(self): | def test_run_with_modelscope_dataset(self): | ||||
| text_classification = pipeline(task=Tasks.text_classification) | text_classification = pipeline(task=Tasks.text_classification) | ||||
| # loaded from modelscope dataset | # loaded from modelscope dataset | ||||
| dataset = MsDataset.load( | dataset = MsDataset.load( | ||||
| 'squad', split='train', target='context', hub=Hubs.modelscope) | |||||
| 'squad', | |||||
| namespace='damotest', | |||||
| split='train', | |||||
| target='context', | |||||
| hub=Hubs.modelscope) | |||||
| result = text_classification(dataset) | result = text_classification(dataset) | ||||
| self.printDataset(result) | self.printDataset(result) | ||||
| @@ -0,0 +1,60 @@ | |||||
| # Copyright (c) Alibaba, Inc. and its affiliates. | |||||
| import unittest | |||||
| from modelscope.hub.snapshot_download import snapshot_download | |||||
| from modelscope.models import Model | |||||
| from modelscope.models.multi_modal import MPlugForVisualQuestionAnswering | |||||
| from modelscope.pipelines import VisualQuestionAnsweringPipeline, pipeline | |||||
| from modelscope.preprocessors import MPlugVisualQuestionAnsweringPreprocessor | |||||
| from modelscope.utils.constant import Tasks | |||||
| from modelscope.utils.test_utils import test_level | |||||
| class VisualQuestionAnsweringTest(unittest.TestCase): | |||||
| model_id = 'damo/mplug_visual-question-answering_coco_large_en' | |||||
| input_vqa = { | |||||
| 'image': 'data/test/images/image_mplug_vqa.jpg', | |||||
| 'question': 'What is the woman doing?', | |||||
| } | |||||
| @unittest.skipUnless(test_level() >= 2, 'skip test in current test level') | |||||
| def test_run(self): | |||||
| cache_path = snapshot_download(self.model_id) | |||||
| preprocessor = MPlugVisualQuestionAnsweringPreprocessor(cache_path) | |||||
| model = MPlugForVisualQuestionAnswering(cache_path) | |||||
| pipeline1 = VisualQuestionAnsweringPipeline( | |||||
| model, preprocessor=preprocessor) | |||||
| pipeline2 = pipeline( | |||||
| Tasks.visual_question_answering, | |||||
| model=model, | |||||
| preprocessor=preprocessor) | |||||
| print(f"question: {self.input_vqa['question']}") | |||||
| print(f"pipeline1: {pipeline1(self.input_vqa)['answer']}") | |||||
| print(f"pipeline2: {pipeline2(self.input_vqa)['answer']}") | |||||
| @unittest.skipUnless(test_level() >= 2, 'skip test in current test level') | |||||
| def test_run_with_model_from_modelhub(self): | |||||
| model = Model.from_pretrained(self.model_id) | |||||
| preprocessor = MPlugVisualQuestionAnsweringPreprocessor( | |||||
| model.model_dir) | |||||
| pipeline_vqa = pipeline( | |||||
| task=Tasks.visual_question_answering, | |||||
| model=model, | |||||
| preprocessor=preprocessor) | |||||
| print(pipeline_vqa(self.input_vqa)) | |||||
| @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') | |||||
| def test_run_with_model_name(self): | |||||
| pipeline_vqa = pipeline( | |||||
| Tasks.visual_question_answering, model=self.model_id) | |||||
| print(pipeline_vqa(self.input_vqa)) | |||||
| @unittest.skipUnless(test_level() >= 2, 'skip test in current test level') | |||||
| def test_run_with_default_model(self): | |||||
| pipeline_vqa = pipeline(task=Tasks.visual_question_answering) | |||||
| print(pipeline_vqa(self.input_vqa)) | |||||
| if __name__ == '__main__': | |||||
| unittest.main() | |||||
| @@ -0,0 +1,64 @@ | |||||
| # Copyright (c) Alibaba, Inc. and its affiliates. | |||||
| import unittest | |||||
| from modelscope.hub.snapshot_download import snapshot_download | |||||
| from modelscope.models import Model | |||||
| from modelscope.models.nlp import SbertForZeroShotClassification | |||||
| from modelscope.pipelines import ZeroShotClassificationPipeline, pipeline | |||||
| from modelscope.preprocessors import ZeroShotClassificationPreprocessor | |||||
| from modelscope.utils.constant import Tasks | |||||
| from modelscope.utils.test_utils import test_level | |||||
| class ZeroShotClassificationTest(unittest.TestCase): | |||||
| model_id = 'damo/nlp_structbert_zero-shot-classification_chinese-base' | |||||
| sentence = '全新突破 解放军运20版空中加油机曝光' | |||||
| labels = ['文化', '体育', '娱乐', '财经', '家居', '汽车', '教育', '科技', '军事'] | |||||
| template = '这篇文章的标题是{}' | |||||
| @unittest.skipUnless(test_level() >= 2, 'skip test in current test level') | |||||
| def test_run_with_direct_file_download(self): | |||||
| cache_path = snapshot_download(self.model_id) | |||||
| tokenizer = ZeroShotClassificationPreprocessor(cache_path) | |||||
| model = SbertForZeroShotClassification(cache_path, tokenizer=tokenizer) | |||||
| pipeline1 = ZeroShotClassificationPipeline( | |||||
| model, preprocessor=tokenizer) | |||||
| pipeline2 = pipeline( | |||||
| Tasks.zero_shot_classification, | |||||
| model=model, | |||||
| preprocessor=tokenizer) | |||||
| print( | |||||
| f'sentence: {self.sentence}\n' | |||||
| f'pipeline1:{pipeline1(input=self.sentence,candidate_labels=self.labels)}' | |||||
| ) | |||||
| print() | |||||
| print( | |||||
| f'sentence: {self.sentence}\n' | |||||
| f'pipeline2: {pipeline2(self.sentence,candidate_labels=self.labels,hypothesis_template=self.template)}' | |||||
| ) | |||||
| @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') | |||||
| def test_run_with_model_from_modelhub(self): | |||||
| model = Model.from_pretrained(self.model_id) | |||||
| tokenizer = ZeroShotClassificationPreprocessor(model.model_dir) | |||||
| pipeline_ins = pipeline( | |||||
| task=Tasks.zero_shot_classification, | |||||
| model=model, | |||||
| preprocessor=tokenizer) | |||||
| print(pipeline_ins(input=self.sentence, candidate_labels=self.labels)) | |||||
| @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') | |||||
| def test_run_with_model_name(self): | |||||
| pipeline_ins = pipeline( | |||||
| task=Tasks.zero_shot_classification, model=self.model_id) | |||||
| print(pipeline_ins(input=self.sentence, candidate_labels=self.labels)) | |||||
| @unittest.skipUnless(test_level() >= 2, 'skip test in current test level') | |||||
| def test_run_with_default_model(self): | |||||
| pipeline_ins = pipeline(task=Tasks.zero_shot_classification) | |||||
| print(pipeline_ins(input=self.sentence, candidate_labels=self.labels)) | |||||
| if __name__ == '__main__': | |||||
| unittest.main() | |||||