Browse Source

merge feat/nlp

master
ly119399 3 years ago
parent
commit
4cfc4b43a0
76 changed files with 2762 additions and 227 deletions
  1. +3
    -0
      data/test/images/image_mplug_vqa.jpg
  2. +61
    -23
      modelscope/hub/api.py
  3. +19
    -0
      modelscope/hub/errors.py
  4. +9
    -7
      modelscope/hub/file_download.py
  5. +8
    -0
      modelscope/hub/git.py
  6. +8
    -4
      modelscope/hub/repository.py
  7. +7
    -9
      modelscope/hub/snapshot_download.py
  8. +6
    -2
      modelscope/hub/utils/caching.py
  9. +9
    -0
      modelscope/metainfo.py
  10. +5
    -2
      modelscope/models/__init__.py
  11. +0
    -0
      modelscope/models/audio/aec/__init__.py
  12. +0
    -0
      modelscope/models/audio/aec/layers/__init__.py
  13. +0
    -0
      modelscope/models/audio/aec/layers/activations.py
  14. +0
    -0
      modelscope/models/audio/aec/layers/affine_transform.py
  15. +0
    -0
      modelscope/models/audio/aec/layers/deep_fsmn.py
  16. +0
    -0
      modelscope/models/audio/aec/layers/layer_base.py
  17. +0
    -0
      modelscope/models/audio/aec/layers/uni_deep_fsmn.py
  18. +0
    -0
      modelscope/models/audio/aec/network/__init__.py
  19. +0
    -0
      modelscope/models/audio/aec/network/loss.py
  20. +0
    -0
      modelscope/models/audio/aec/network/modulation_loss.py
  21. +0
    -0
      modelscope/models/audio/aec/network/se_net.py
  22. +0
    -0
      modelscope/models/audio/ans/__init__.py
  23. +248
    -0
      modelscope/models/audio/ans/complex_nn.py
  24. +112
    -0
      modelscope/models/audio/ans/conv_stft.py
  25. +309
    -0
      modelscope/models/audio/ans/frcrn.py
  26. +26
    -0
      modelscope/models/audio/ans/se_module_complex.py
  27. +269
    -0
      modelscope/models/audio/ans/unet.py
  28. +0
    -0
      modelscope/models/cv/animal_recognition/__init__.py
  29. +430
    -0
      modelscope/models/cv/animal_recognition/resnet.py
  30. +125
    -0
      modelscope/models/cv/animal_recognition/splat.py
  31. +2
    -0
      modelscope/models/multi_modal/__init__.py
  32. +46
    -0
      modelscope/models/multi_modal/mplug_for_visual_question_answering.py
  33. +1
    -0
      modelscope/models/nlp/__init__.py
  34. +9
    -1
      modelscope/models/nlp/masked_language_model.py
  35. +50
    -0
      modelscope/models/nlp/sbert_for_zero_shot_classification.py
  36. +2
    -2
      modelscope/models/nlp/space/dialog_state_tracking_model.py
  37. +1
    -1
      modelscope/msdatasets/config.py
  38. +41
    -15
      modelscope/msdatasets/ms_dataset.py
  39. +33
    -15
      modelscope/msdatasets/utils/ms_api.py
  40. +4
    -1
      modelscope/pipelines/__init__.py
  41. +117
    -0
      modelscope/pipelines/audio/ans_pipeline.py
  42. +40
    -15
      modelscope/pipelines/base.py
  43. +7
    -1
      modelscope/pipelines/builder.py
  44. +1
    -0
      modelscope/pipelines/cv/__init__.py
  45. +127
    -0
      modelscope/pipelines/cv/animal_recog_pipeline.py
  46. +52
    -42
      modelscope/pipelines/cv/ocr_detection_pipeline.py
  47. +5
    -1
      modelscope/pipelines/cv/ocr_utils/model_resnet_mutex_v4_linewithchar.py
  48. +5
    -1
      modelscope/pipelines/cv/ocr_utils/resnet18_v1.py
  49. +5
    -1
      modelscope/pipelines/cv/ocr_utils/resnet_utils.py
  50. +1
    -0
      modelscope/pipelines/multi_modal/__init__.py
  51. +65
    -0
      modelscope/pipelines/multi_modal/visual_question_answering_pipeline.py
  52. +3
    -3
      modelscope/pipelines/nlp/dialog_state_tracking_pipeline.py
  53. +14
    -10
      modelscope/pipelines/nlp/fill_mask_pipeline.py
  54. +39
    -1
      modelscope/pipelines/outputs.py
  55. +1
    -1
      modelscope/preprocessors/__init__.py
  56. +45
    -0
      modelscope/preprocessors/multi_modal.py
  57. +7
    -4
      modelscope/preprocessors/nlp.py
  58. +11
    -1
      modelscope/utils/constant.py
  59. +3
    -2
      modelscope/utils/hub.py
  60. +1
    -1
      modelscope/version.py
  61. +1
    -0
      requirements/audio.txt
  62. +2
    -4
      requirements/nlp.txt
  63. +35
    -7
      tests/hub/test_hub_operation.py
  64. +85
    -0
      tests/hub/test_hub_private_files.py
  65. +4
    -5
      tests/hub/test_hub_private_repository.py
  66. +5
    -19
      tests/hub/test_hub_repository.py
  67. +14
    -10
      tests/msdatasets/test_ms_dataset.py
  68. +6
    -6
      tests/pipelines/nlp/test_dialog_state_tracking.py
  69. +20
    -0
      tests/pipelines/test_animal_recognation.py
  70. +35
    -1
      tests/pipelines/test_fill_mask.py
  71. +2
    -1
      tests/pipelines/test_image_matting.py
  72. +5
    -0
      tests/pipelines/test_ocr_detection.py
  73. +26
    -6
      tests/pipelines/test_speech_signal_process.py
  74. +6
    -2
      tests/pipelines/test_text_classification.py
  75. +60
    -0
      tests/pipelines/test_visual_question_answering.py
  76. +64
    -0
      tests/pipelines/test_zero_shot_classification.py

+ 3
- 0
data/test/images/image_mplug_vqa.jpg View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:b37b706885849037b5fa7fa44a3b78a6375f768d95ce46bfcb8e7329d038a692
size 181725

+ 61
- 23
modelscope/hub/api.py View File

@@ -9,7 +9,7 @@ import requests


from modelscope.utils.logger import get_logger from modelscope.utils.logger import get_logger
from .constants import MODELSCOPE_URL_SCHEME from .constants import MODELSCOPE_URL_SCHEME
from .errors import NotExistError, is_ok, raise_on_error
from .errors import InvalidParameter, NotExistError, is_ok, raise_on_error
from .utils.utils import (get_endpoint, get_gitlab_domain, from .utils.utils import (get_endpoint, get_gitlab_domain,
model_id_to_group_owner_name) model_id_to_group_owner_name)


@@ -61,17 +61,21 @@ class HubApi:


return d['Data']['AccessToken'], cookies return d['Data']['AccessToken'], cookies


def create_model(self, model_id: str, chinese_name: str, visibility: int,
license: str) -> str:
def create_model(
self,
model_id: str,
visibility: str,
license: str,
chinese_name: Optional[str] = None,
) -> str:
""" """
Create model repo at ModelScopeHub Create model repo at ModelScopeHub


Args: Args:
model_id:(`str`): The model id model_id:(`str`): The model id
chinese_name(`str`): chinese name of the model
visibility(`int`): visibility of the model(1-private, 3-internal, 5-public)
license(`str`): license of the model, candidates can be found at: TBA

visibility(`int`): visibility of the model(1-private, 5-public), default public.
license(`str`): license of the model, default none.
chinese_name(`str`, *optional*): chinese name of the model
Returns: Returns:
name of the model created name of the model created


@@ -79,6 +83,8 @@ class HubApi:
model_id = {owner}/{name} model_id = {owner}/{name}
</Tip> </Tip>
""" """
if model_id is None:
raise InvalidParameter('model_id is required!')
cookies = ModelScopeConfig.get_cookies() cookies = ModelScopeConfig.get_cookies()
if cookies is None: if cookies is None:
raise ValueError('Token does not exist, please login first.') raise ValueError('Token does not exist, please login first.')
@@ -151,11 +157,33 @@ class HubApi:
else: else:
r.raise_for_status() r.raise_for_status()


def _check_cookie(self,
use_cookies: Union[bool,
CookieJar] = False) -> CookieJar:
cookies = None
if isinstance(use_cookies, CookieJar):
cookies = use_cookies
elif use_cookies:
cookies = ModelScopeConfig.get_cookies()
if cookies is None:
raise ValueError('Token does not exist, please login first.')
return cookies

def get_model_branches_and_tags( def get_model_branches_and_tags(
self, self,
model_id: str, model_id: str,
use_cookies: Union[bool, CookieJar] = False
) -> Tuple[List[str], List[str]]: ) -> Tuple[List[str], List[str]]:
cookies = ModelScopeConfig.get_cookies()
"""Get model branch and tags.

Args:
model_id (str): The model id
use_cookies (Union[bool, CookieJar], optional): If is cookieJar, we will use this cookie, if True, will
will load cookie from local. Defaults to False.
Returns:
Tuple[List[str], List[str]]: _description_
"""
cookies = self._check_cookie(use_cookies)


path = f'{self.endpoint}/api/v1/models/{model_id}/revisions' path = f'{self.endpoint}/api/v1/models/{model_id}/revisions'
r = requests.get(path, cookies=cookies) r = requests.get(path, cookies=cookies)
@@ -169,23 +197,33 @@ class HubApi:
] if info['RevisionMap']['Tags'] else [] ] if info['RevisionMap']['Tags'] else []
return branches, tags return branches, tags


def get_model_files(
self,
model_id: str,
revision: Optional[str] = 'master',
root: Optional[str] = None,
recursive: Optional[str] = False,
use_cookies: Union[bool, CookieJar] = False) -> List[dict]:
def get_model_files(self,
model_id: str,
revision: Optional[str] = 'master',
root: Optional[str] = None,
recursive: Optional[str] = False,
use_cookies: Union[bool, CookieJar] = False,
is_snapshot: Optional[bool] = True) -> List[dict]:
"""List the models files.


cookies = None
if isinstance(use_cookies, CookieJar):
cookies = use_cookies
elif use_cookies:
cookies = ModelScopeConfig.get_cookies()
if cookies is None:
raise ValueError('Token does not exist, please login first.')
Args:
model_id (str): The model id
revision (Optional[str], optional): The branch or tag name. Defaults to 'master'.
root (Optional[str], optional): The root path. Defaults to None.
recursive (Optional[str], optional): Is recurive list files. Defaults to False.
use_cookies (Union[bool, CookieJar], optional): If is cookieJar, we will use this cookie, if True, will
will load cookie from local. Defaults to False.
is_snapshot(Optional[bool], optional): when snapshot_download set to True, otherwise False.


path = f'{self.endpoint}/api/v1/models/{model_id}/repo/files?Revision={revision}&Recursive={recursive}'
Raises:
ValueError: If user_cookies is True, but no local cookie.

Returns:
List[dict]: Model file list.
"""
path = '%s/api/v1/models/%s/repo/files?Revision=%s&Recursive=%s&Snapshot=%s' % (
self.endpoint, model_id, revision, recursive, is_snapshot)
cookies = self._check_cookie(use_cookies)
if root is not None: if root is not None:
path = path + f'&Root={root}' path = path + f'&Root={root}'




+ 19
- 0
modelscope/hub/errors.py View File

@@ -10,6 +10,10 @@ class GitError(Exception):
pass pass




class InvalidParameter(Exception):
pass


def is_ok(rsp): def is_ok(rsp):
""" Check the request is ok """ Check the request is ok


@@ -32,3 +36,18 @@ def raise_on_error(rsp):
return True return True
else: else:
raise RequestError(rsp['Message']) raise RequestError(rsp['Message'])


# TODO use raise_on_error instead if modelhub and datahub response have uniform structures,
def datahub_raise_on_error(url, rsp):
"""If response error, raise exception

Args:
rsp (_type_): The server response
"""
if rsp.get('Code') == 200:
return True
else:
raise RequestError(
f"Url = {url}, Status = {rsp.get('status')}, error = {rsp.get('error')}, message = {rsp.get('message')}"
)

+ 9
- 7
modelscope/hub/file_download.py View File

@@ -7,6 +7,7 @@ import tempfile
import time import time
from functools import partial from functools import partial
from hashlib import sha256 from hashlib import sha256
from http.cookiejar import CookieJar
from pathlib import Path from pathlib import Path
from typing import BinaryIO, Dict, Optional, Union from typing import BinaryIO, Dict, Optional, Union
from uuid import uuid4 from uuid import uuid4
@@ -107,7 +108,9 @@ def model_file_download(


_api = HubApi() _api = HubApi()
headers = {'user-agent': http_user_agent(user_agent=user_agent, )} headers = {'user-agent': http_user_agent(user_agent=user_agent, )}
branches, tags = _api.get_model_branches_and_tags(model_id)
cookies = ModelScopeConfig.get_cookies()
branches, tags = _api.get_model_branches_and_tags(
model_id, use_cookies=False if cookies is None else cookies)
file_to_download_info = None file_to_download_info = None
is_commit_id = False is_commit_id = False
if revision in branches or revision in tags: # The revision is version or tag, if revision in branches or revision in tags: # The revision is version or tag,
@@ -117,18 +120,19 @@ def model_file_download(
model_id=model_id, model_id=model_id,
revision=revision, revision=revision,
recursive=True, recursive=True,
)
use_cookies=False if cookies is None else cookies,
is_snapshot=False)


for model_file in model_files: for model_file in model_files:
if model_file['Type'] == 'tree': if model_file['Type'] == 'tree':
continue continue


if model_file['Path'] == file_path: if model_file['Path'] == file_path:
model_file['Branch'] = revision
if cache.exists(model_file): if cache.exists(model_file):
return cache.get_file_by_info(model_file) return cache.get_file_by_info(model_file)
else: else:
file_to_download_info = model_file file_to_download_info = model_file
break


if file_to_download_info is None: if file_to_download_info is None:
raise NotExistError('The file path: %s not exist in: %s' % raise NotExistError('The file path: %s not exist in: %s' %
@@ -141,8 +145,6 @@ def model_file_download(
return cached_file_path # the file is in cache. return cached_file_path # the file is in cache.
is_commit_id = True is_commit_id = True
# we need to download again # we need to download again
# TODO: skip using JWT for authorization, use cookie instead
cookies = ModelScopeConfig.get_cookies()
url_to_download = get_file_download_url(model_id, file_path, revision) url_to_download = get_file_download_url(model_id, file_path, revision)
file_to_download_info = { file_to_download_info = {
'Path': file_path, 'Path': file_path,
@@ -202,7 +204,7 @@ def http_get_file(
url: str, url: str,
local_dir: str, local_dir: str,
file_name: str, file_name: str,
cookies: Dict[str, str],
cookies: CookieJar,
headers: Optional[Dict[str, str]] = None, headers: Optional[Dict[str, str]] = None,
): ):
""" """
@@ -217,7 +219,7 @@ def http_get_file(
local directory where the downloaded file stores local directory where the downloaded file stores
file_name(`str`): file_name(`str`):
name of the file stored in `local_dir` name of the file stored in `local_dir`
cookies(`Dict[str, str]`):
cookies(`CookieJar`):
cookies used to authentication the user, which is used for downloading private repos cookies used to authentication the user, which is used for downloading private repos
headers(`Optional[Dict[str, str]] = None`): headers(`Optional[Dict[str, str]] = None`):
http headers to carry necessary info when requesting the remote file http headers to carry necessary info when requesting the remote file


+ 8
- 0
modelscope/hub/git.py View File

@@ -70,6 +70,14 @@ class GitCommandWrapper(metaclass=Singleton):
except GitError: except GitError:
return False return False


def git_lfs_install(self, repo_dir):
cmd = ['git', '-C', repo_dir, 'lfs', 'install']
try:
self._run_git_command(*cmd)
return True
except GitError:
return False

def clone(self, def clone(self,
repo_base_dir: str, repo_base_dir: str,
token: str, token: str,


+ 8
- 4
modelscope/hub/repository.py View File

@@ -1,7 +1,7 @@
import os import os
from typing import List, Optional from typing import List, Optional


from modelscope.hub.errors import GitError
from modelscope.hub.errors import GitError, InvalidParameter
from modelscope.utils.logger import get_logger from modelscope.utils.logger import get_logger
from .api import ModelScopeConfig from .api import ModelScopeConfig
from .constants import MODELSCOPE_URL_SCHEME from .constants import MODELSCOPE_URL_SCHEME
@@ -49,6 +49,8 @@ class Repository:
git_wrapper = GitCommandWrapper() git_wrapper = GitCommandWrapper()
if not git_wrapper.is_lfs_installed(): if not git_wrapper.is_lfs_installed():
logger.error('git lfs is not installed, please install.') logger.error('git lfs is not installed, please install.')
else:
git_wrapper.git_lfs_install(self.model_dir) # init repo lfs


self.git_wrapper = GitCommandWrapper(git_path) self.git_wrapper = GitCommandWrapper(git_path)
os.makedirs(self.model_dir, exist_ok=True) os.makedirs(self.model_dir, exist_ok=True)
@@ -74,8 +76,6 @@ class Repository:


def push(self, def push(self,
commit_message: str, commit_message: str,
files: List[str] = list(),
all_files: bool = False,
branch: Optional[str] = 'master', branch: Optional[str] = 'master',
force: bool = False): force: bool = False):
"""Push local to remote, this method will do. """Push local to remote, this method will do.
@@ -86,8 +86,12 @@ class Repository:
commit_message (str): commit message commit_message (str): commit message
revision (Optional[str], optional): which branch to push. Defaults to 'master'. revision (Optional[str], optional): which branch to push. Defaults to 'master'.
""" """
if commit_message is None:
msg = 'commit_message must be provided!'
raise InvalidParameter(msg)
url = self.git_wrapper.get_repo_remote_url(self.model_dir) url = self.git_wrapper.get_repo_remote_url(self.model_dir)
self.git_wrapper.add(self.model_dir, files, all_files)
self.git_wrapper.pull(self.model_dir)
self.git_wrapper.add(self.model_dir, all_files=True)
self.git_wrapper.commit(self.model_dir, commit_message) self.git_wrapper.commit(self.model_dir, commit_message)
self.git_wrapper.push( self.git_wrapper.push(
repo_dir=self.model_dir, repo_dir=self.model_dir,


+ 7
- 9
modelscope/hub/snapshot_download.py View File

@@ -20,8 +20,7 @@ def snapshot_download(model_id: str,
revision: Optional[str] = 'master', revision: Optional[str] = 'master',
cache_dir: Union[str, Path, None] = None, cache_dir: Union[str, Path, None] = None,
user_agent: Optional[Union[Dict, str]] = None, user_agent: Optional[Union[Dict, str]] = None,
local_files_only: Optional[bool] = False,
private: Optional[bool] = False) -> str:
local_files_only: Optional[bool] = False) -> str:
"""Download all files of a repo. """Download all files of a repo.
Downloads a whole snapshot of a repo's files at the specified revision. This Downloads a whole snapshot of a repo's files at the specified revision. This
is useful when you want all files from a repo, because you don't know which is useful when you want all files from a repo, because you don't know which
@@ -79,8 +78,10 @@ def snapshot_download(model_id: str,
# make headers # make headers
headers = {'user-agent': http_user_agent(user_agent=user_agent, )} headers = {'user-agent': http_user_agent(user_agent=user_agent, )}
_api = HubApi() _api = HubApi()
cookies = ModelScopeConfig.get_cookies()
# get file list from model repo # get file list from model repo
branches, tags = _api.get_model_branches_and_tags(model_id)
branches, tags = _api.get_model_branches_and_tags(
model_id, use_cookies=False if cookies is None else cookies)
if revision not in branches and revision not in tags: if revision not in branches and revision not in tags:
raise NotExistError('The specified branch or tag : %s not exist!' raise NotExistError('The specified branch or tag : %s not exist!'
% revision) % revision)
@@ -89,11 +90,8 @@ def snapshot_download(model_id: str,
model_id=model_id, model_id=model_id,
revision=revision, revision=revision,
recursive=True, recursive=True,
use_cookies=private)

cookies = None
if private:
cookies = ModelScopeConfig.get_cookies()
use_cookies=False if cookies is None else cookies,
is_snapshot=True)


for model_file in model_files: for model_file in model_files:
if model_file['Type'] == 'tree': if model_file['Type'] == 'tree':
@@ -116,7 +114,7 @@ def snapshot_download(model_id: str,
local_dir=tempfile.gettempdir(), local_dir=tempfile.gettempdir(),
file_name=model_file['Name'], file_name=model_file['Name'],
headers=headers, headers=headers,
cookies=None if cookies is None else cookies.get_dict())
cookies=cookies)
# put file to cache # put file to cache
cache.put_file( cache.put_file(
model_file, model_file,


+ 6
- 2
modelscope/hub/utils/caching.py View File

@@ -101,8 +101,9 @@ class FileSystemCache(object):
Args: Args:
key (dict): The cache key. key (dict): The cache key.
""" """
self.cached_files.remove(key)
self.save_cached_files()
if key in self.cached_files:
self.cached_files.remove(key)
self.save_cached_files()


def exists(self, key): def exists(self, key):
for cache_file in self.cached_files: for cache_file in self.cached_files:
@@ -204,6 +205,7 @@ class ModelFileSystemCache(FileSystemCache):
return orig_path return orig_path
else: else:
self.remove_key(cached_file) self.remove_key(cached_file)
break


return None return None


@@ -230,6 +232,7 @@ class ModelFileSystemCache(FileSystemCache):
cached_key['Revision'].startswith(key['Revision']) cached_key['Revision'].startswith(key['Revision'])
or key['Revision'].startswith(cached_key['Revision'])): or key['Revision'].startswith(cached_key['Revision'])):
is_exists = True is_exists = True
break
file_path = os.path.join(self.cache_root_location, file_path = os.path.join(self.cache_root_location,
model_file_info['Path']) model_file_info['Path'])
if is_exists: if is_exists:
@@ -253,6 +256,7 @@ class ModelFileSystemCache(FileSystemCache):
cached_file['Path']) cached_file['Path'])
if os.path.exists(file_path): if os.path.exists(file_path):
os.remove(file_path) os.remove(file_path)
break


def put_file(self, model_file_info, model_file_location): def put_file(self, model_file_info, model_file_location):
"""Put model on model_file_location to cache, the model first download to /tmp, and move to cache. """Put model on model_file_location to cache, the model first download to /tmp, and move to cache.


+ 9
- 0
modelscope/metainfo.py View File

@@ -21,11 +21,13 @@ class Models(object):
sambert_hifi_16k = 'sambert-hifi-16k' sambert_hifi_16k = 'sambert-hifi-16k'
generic_tts_frontend = 'generic-tts-frontend' generic_tts_frontend = 'generic-tts-frontend'
hifigan16k = 'hifigan16k' hifigan16k = 'hifigan16k'
speech_frcrn_ans_cirm_16k = 'speech_frcrn_ans_cirm_16k'
kws_kwsbp = 'kws-kwsbp' kws_kwsbp = 'kws-kwsbp'


# multi-modal models # multi-modal models
ofa = 'ofa' ofa = 'ofa'
clip = 'clip-multi-modal-embedding' clip = 'clip-multi-modal-embedding'
mplug = 'mplug'




class Pipelines(object): class Pipelines(object):
@@ -43,6 +45,7 @@ class Pipelines(object):
person_image_cartoon = 'unet-person-image-cartoon' person_image_cartoon = 'unet-person-image-cartoon'
ocr_detection = 'resnet18-ocr-detection' ocr_detection = 'resnet18-ocr-detection'
action_recognition = 'TAdaConv_action-recognition' action_recognition = 'TAdaConv_action-recognition'
animal_recognation = 'resnet101-animal_recog'


# nlp tasks # nlp tasks
sentence_similarity = 'sentence-similarity' sentence_similarity = 'sentence-similarity'
@@ -55,15 +58,18 @@ class Pipelines(object):
dialog_intent_prediction = 'dialog-intent-prediction' dialog_intent_prediction = 'dialog-intent-prediction'
dialog_modeling = 'dialog-modeling' dialog_modeling = 'dialog-modeling'
dialog_state_tracking = 'dialog-state-tracking' dialog_state_tracking = 'dialog-state-tracking'
zero_shot_classification = 'zero-shot-classification'


# audio tasks # audio tasks
sambert_hifigan_16k_tts = 'sambert-hifigan-16k-tts' sambert_hifigan_16k_tts = 'sambert-hifigan-16k-tts'
speech_dfsmn_aec_psm_16k = 'speech-dfsmn-aec-psm-16k' speech_dfsmn_aec_psm_16k = 'speech-dfsmn-aec-psm-16k'
speech_frcrn_ans_cirm_16k = 'speech_frcrn_ans_cirm_16k'
kws_kwsbp = 'kws-kwsbp' kws_kwsbp = 'kws-kwsbp'


# multi-modal tasks # multi-modal tasks
image_caption = 'image-caption' image_caption = 'image-caption'
multi_modal_embedding = 'multi-modal-embedding' multi_modal_embedding = 'multi-modal-embedding'
visual_question_answering = 'visual-question-answering'




class Trainers(object): class Trainers(object):
@@ -99,6 +105,8 @@ class Preprocessors(object):
token_cls_tokenizer = 'token-cls-tokenizer' token_cls_tokenizer = 'token-cls-tokenizer'
nli_tokenizer = 'nli-tokenizer' nli_tokenizer = 'nli-tokenizer'
sen_cls_tokenizer = 'sen-cls-tokenizer' sen_cls_tokenizer = 'sen-cls-tokenizer'
sbert_token_cls_tokenizer = 'sbert-token-cls-tokenizer'
zero_shot_cls_tokenizer = 'zero-shot-cls-tokenizer'


# audio preprocessor # audio preprocessor
linear_aec_fbank = 'linear-aec-fbank' linear_aec_fbank = 'linear-aec-fbank'
@@ -107,3 +115,4 @@ class Preprocessors(object):


# multi-modal # multi-modal
ofa_image_caption = 'ofa-image-caption' ofa_image_caption = 'ofa-image-caption'
mplug_visual_question_answering = 'mplug-visual-question-answering'

+ 5
- 2
modelscope/models/__init__.py View File

@@ -1,12 +1,15 @@
# Copyright (c) Alibaba, Inc. and its affiliates. # Copyright (c) Alibaba, Inc. and its affiliates.


from .audio.ans.frcrn import FRCRNModel
from .audio.kws import GenericKeyWordSpotting from .audio.kws import GenericKeyWordSpotting
from .audio.tts.am import SambertNetHifi16k from .audio.tts.am import SambertNetHifi16k
from .audio.tts.vocoder import Hifigan16k from .audio.tts.vocoder import Hifigan16k
from .base import Model from .base import Model
from .builder import MODELS, build_model from .builder import MODELS, build_model
from .multi_modal import OfaForImageCaptioning from .multi_modal import OfaForImageCaptioning
from .nlp import (BertForSequenceClassification, SbertForNLI,
from .nlp import (BertForMaskedLM, BertForSequenceClassification, SbertForNLI,
SbertForSentenceSimilarity, SbertForSentimentClassification, SbertForSentenceSimilarity, SbertForSentimentClassification,
SbertForTokenClassification, StructBertForMaskedLM,
SbertForTokenClassification, SpaceForDialogIntentModel,
SpaceForDialogModelingModel,
SpaceForDialogStateTrackingModel, StructBertForMaskedLM,
VecoForMaskedLM) VecoForMaskedLM)

modelscope/models/audio/layers/__init__.py → modelscope/models/audio/aec/__init__.py View File


modelscope/models/audio/network/__init__.py → modelscope/models/audio/aec/layers/__init__.py View File


modelscope/models/audio/layers/activations.py → modelscope/models/audio/aec/layers/activations.py View File


modelscope/models/audio/layers/affine_transform.py → modelscope/models/audio/aec/layers/affine_transform.py View File


modelscope/models/audio/layers/deep_fsmn.py → modelscope/models/audio/aec/layers/deep_fsmn.py View File


modelscope/models/audio/layers/layer_base.py → modelscope/models/audio/aec/layers/layer_base.py View File


modelscope/models/audio/layers/uni_deep_fsmn.py → modelscope/models/audio/aec/layers/uni_deep_fsmn.py View File


+ 0
- 0
modelscope/models/audio/aec/network/__init__.py View File


modelscope/models/audio/network/loss.py → modelscope/models/audio/aec/network/loss.py View File


modelscope/models/audio/network/modulation_loss.py → modelscope/models/audio/aec/network/modulation_loss.py View File


modelscope/models/audio/network/se_net.py → modelscope/models/audio/aec/network/se_net.py View File


+ 0
- 0
modelscope/models/audio/ans/__init__.py View File


+ 248
- 0
modelscope/models/audio/ans/complex_nn.py View File

@@ -0,0 +1,248 @@
import torch
import torch.nn as nn
import torch.nn.functional as F


class UniDeepFsmn(nn.Module):

def __init__(self, input_dim, output_dim, lorder=None, hidden_size=None):
super(UniDeepFsmn, self).__init__()

self.input_dim = input_dim
self.output_dim = output_dim

if lorder is None:
return

self.lorder = lorder
self.hidden_size = hidden_size

self.linear = nn.Linear(input_dim, hidden_size)

self.project = nn.Linear(hidden_size, output_dim, bias=False)

self.conv1 = nn.Conv2d(
output_dim,
output_dim, [lorder, 1], [1, 1],
groups=output_dim,
bias=False)

def forward(self, input):
r"""

Args:
input: torch with shape: batch (b) x sequence(T) x feature (h)

Returns:
batch (b) x channel (c) x sequence(T) x feature (h)
"""
f1 = F.relu(self.linear(input))

p1 = self.project(f1)

x = torch.unsqueeze(p1, 1)
# x: batch (b) x channel (c) x sequence(T) x feature (h)
x_per = x.permute(0, 3, 2, 1)
# x_per: batch (b) x feature (h) x sequence(T) x channel (c)
y = F.pad(x_per, [0, 0, self.lorder - 1, 0])

out = x_per + self.conv1(y)

out1 = out.permute(0, 3, 2, 1)
# out1: batch (b) x channel (c) x sequence(T) x feature (h)
return input + out1.squeeze()


class ComplexUniDeepFsmn(nn.Module):

def __init__(self, nIn, nHidden=128, nOut=128):
super(ComplexUniDeepFsmn, self).__init__()

self.fsmn_re_L1 = UniDeepFsmn(nIn, nHidden, 20, nHidden)
self.fsmn_im_L1 = UniDeepFsmn(nIn, nHidden, 20, nHidden)
self.fsmn_re_L2 = UniDeepFsmn(nHidden, nOut, 20, nHidden)
self.fsmn_im_L2 = UniDeepFsmn(nHidden, nOut, 20, nHidden)

def forward(self, x):
r"""

Args:
x: torch with shape [batch, channel, feature, sequence, 2], eg: [6, 256, 1, 106, 2]

Returns:
[batch, feature, sequence, 2], eg: [6, 99, 1024, 2]
"""
#
b, c, h, T, d = x.size()
x = torch.reshape(x, (b, c * h, T, d))
# x: [b,h,T,2], [6, 256, 106, 2]
x = torch.transpose(x, 1, 2)
# x: [b,T,h,2], [6, 106, 256, 2]

real_L1 = self.fsmn_re_L1(x[..., 0]) - self.fsmn_im_L1(x[..., 1])
imaginary_L1 = self.fsmn_re_L1(x[..., 1]) + self.fsmn_im_L1(x[..., 0])
# GRU output: [99, 6, 128]
real = self.fsmn_re_L2(real_L1) - self.fsmn_im_L2(imaginary_L1)
imaginary = self.fsmn_re_L2(imaginary_L1) + self.fsmn_im_L2(real_L1)
# output: [b,T,h,2], [99, 6, 1024, 2]
output = torch.stack((real, imaginary), dim=-1)

# output: [b,h,T,2], [6, 99, 1024, 2]
output = torch.transpose(output, 1, 2)
output = torch.reshape(output, (b, c, h, T, d))

return output


class ComplexUniDeepFsmn_L1(nn.Module):

def __init__(self, nIn, nHidden=128, nOut=128):
super(ComplexUniDeepFsmn_L1, self).__init__()
self.fsmn_re_L1 = UniDeepFsmn(nIn, nHidden, 20, nHidden)
self.fsmn_im_L1 = UniDeepFsmn(nIn, nHidden, 20, nHidden)

def forward(self, x):
r"""

Args:
x: torch with shape [batch, channel, feature, sequence, 2], eg: [6, 256, 1, 106, 2]
"""
b, c, h, T, d = x.size()
# x : [b,T,h,c,2]
x = torch.transpose(x, 1, 3)
x = torch.reshape(x, (b * T, h, c, d))

real = self.fsmn_re_L1(x[..., 0]) - self.fsmn_im_L1(x[..., 1])
imaginary = self.fsmn_re_L1(x[..., 1]) + self.fsmn_im_L1(x[..., 0])
# output: [b*T,h,c,2], [6*106, h, 256, 2]
output = torch.stack((real, imaginary), dim=-1)

output = torch.reshape(output, (b, T, h, c, d))
output = torch.transpose(output, 1, 3)
return output


class ComplexConv2d(nn.Module):
# https://github.com/litcoderr/ComplexCNN/blob/master/complexcnn/modules.py
def __init__(self,
in_channel,
out_channel,
kernel_size,
stride=1,
padding=0,
dilation=1,
groups=1,
bias=True,
**kwargs):
super().__init__()

# Model components
self.conv_re = nn.Conv2d(
in_channel,
out_channel,
kernel_size,
stride=stride,
padding=padding,
dilation=dilation,
groups=groups,
bias=bias,
**kwargs)
self.conv_im = nn.Conv2d(
in_channel,
out_channel,
kernel_size,
stride=stride,
padding=padding,
dilation=dilation,
groups=groups,
bias=bias,
**kwargs)

def forward(self, x):
r"""

Args:
x: torch with shape: [batch,channel,axis1,axis2,2]
"""
real = self.conv_re(x[..., 0]) - self.conv_im(x[..., 1])
imaginary = self.conv_re(x[..., 1]) + self.conv_im(x[..., 0])
output = torch.stack((real, imaginary), dim=-1)
return output


class ComplexConvTranspose2d(nn.Module):

def __init__(self,
in_channel,
out_channel,
kernel_size,
stride=1,
padding=0,
output_padding=0,
dilation=1,
groups=1,
bias=True,
**kwargs):
super().__init__()

# Model components
self.tconv_re = nn.ConvTranspose2d(
in_channel,
out_channel,
kernel_size=kernel_size,
stride=stride,
padding=padding,
output_padding=output_padding,
groups=groups,
bias=bias,
dilation=dilation,
**kwargs)
self.tconv_im = nn.ConvTranspose2d(
in_channel,
out_channel,
kernel_size=kernel_size,
stride=stride,
padding=padding,
output_padding=output_padding,
groups=groups,
bias=bias,
dilation=dilation,
**kwargs)

def forward(self, x): # shpae of x : [batch,channel,axis1,axis2,2]
real = self.tconv_re(x[..., 0]) - self.tconv_im(x[..., 1])
imaginary = self.tconv_re(x[..., 1]) + self.tconv_im(x[..., 0])
output = torch.stack((real, imaginary), dim=-1)
return output


class ComplexBatchNorm2d(nn.Module):

def __init__(self,
num_features,
eps=1e-5,
momentum=0.1,
affine=True,
track_running_stats=True,
**kwargs):
super().__init__()
self.bn_re = nn.BatchNorm2d(
num_features=num_features,
momentum=momentum,
affine=affine,
eps=eps,
track_running_stats=track_running_stats,
**kwargs)
self.bn_im = nn.BatchNorm2d(
num_features=num_features,
momentum=momentum,
affine=affine,
eps=eps,
track_running_stats=track_running_stats,
**kwargs)

def forward(self, x):
real = self.bn_re(x[..., 0])
imag = self.bn_im(x[..., 1])
output = torch.stack((real, imag), dim=-1)
return output

+ 112
- 0
modelscope/models/audio/ans/conv_stft.py View File

@@ -0,0 +1,112 @@
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from scipy.signal import get_window


def init_kernels(win_len, win_inc, fft_len, win_type=None, invers=False):
if win_type == 'None' or win_type is None:
window = np.ones(win_len)
else:
window = get_window(win_type, win_len, fftbins=True)**0.5

N = fft_len
fourier_basis = np.fft.rfft(np.eye(N))[:win_len]
real_kernel = np.real(fourier_basis)
imag_kernel = np.imag(fourier_basis)
kernel = np.concatenate([real_kernel, imag_kernel], 1).T

if invers:
kernel = np.linalg.pinv(kernel).T

kernel = kernel * window
kernel = kernel[:, None, :]
return torch.from_numpy(kernel.astype(np.float32)), torch.from_numpy(
window[None, :, None].astype(np.float32))


class ConvSTFT(nn.Module):

def __init__(self,
win_len,
win_inc,
fft_len=None,
win_type='hamming',
feature_type='real',
fix=True):
super(ConvSTFT, self).__init__()

if fft_len is None:
self.fft_len = np.int(2**np.ceil(np.log2(win_len)))
else:
self.fft_len = fft_len

kernel, _ = init_kernels(win_len, win_inc, self.fft_len, win_type)
self.weight = nn.Parameter(kernel, requires_grad=(not fix))
self.feature_type = feature_type
self.stride = win_inc
self.win_len = win_len
self.dim = self.fft_len

def forward(self, inputs):
if inputs.dim() == 2:
inputs = torch.unsqueeze(inputs, 1)

outputs = F.conv1d(inputs, self.weight, stride=self.stride)

if self.feature_type == 'complex':
return outputs
else:
dim = self.dim // 2 + 1
real = outputs[:, :dim, :]
imag = outputs[:, dim:, :]
mags = torch.sqrt(real**2 + imag**2)
phase = torch.atan2(imag, real)
return mags, phase


class ConviSTFT(nn.Module):

def __init__(self,
win_len,
win_inc,
fft_len=None,
win_type='hamming',
feature_type='real',
fix=True):
super(ConviSTFT, self).__init__()
if fft_len is None:
self.fft_len = np.int(2**np.ceil(np.log2(win_len)))
else:
self.fft_len = fft_len
kernel, window = init_kernels(
win_len, win_inc, self.fft_len, win_type, invers=True)
self.weight = nn.Parameter(kernel, requires_grad=(not fix))
self.feature_type = feature_type
self.win_type = win_type
self.win_len = win_len
self.win_inc = win_inc
self.stride = win_inc
self.dim = self.fft_len
self.register_buffer('window', window)
self.register_buffer('enframe', torch.eye(win_len)[:, None, :])

def forward(self, inputs, phase=None):
"""
Args:
inputs : [B, N+2, T] (complex spec) or [B, N//2+1, T] (mags)
phase: [B, N//2+1, T] (if not none)
"""

if phase is not None:
real = inputs * torch.cos(phase)
imag = inputs * torch.sin(phase)
inputs = torch.cat([real, imag], 1)
outputs = F.conv_transpose1d(inputs, self.weight, stride=self.stride)

# this is from torch-stft: https://github.com/pseeth/torch-stft
t = self.window.repeat(1, 1, inputs.size(-1))**2
coff = F.conv_transpose1d(t, self.enframe, stride=self.stride)
outputs = outputs / (coff + 1e-8)
return outputs

+ 309
- 0
modelscope/models/audio/ans/frcrn.py View File

@@ -0,0 +1,309 @@
import os
from typing import Dict

import torch
import torch.nn as nn
import torch.nn.functional as F

from modelscope.metainfo import Models
from modelscope.models.builder import MODELS
from modelscope.utils.constant import ModelFile, Tasks
from ...base import Model, Tensor
from .conv_stft import ConviSTFT, ConvSTFT
from .unet import UNet


class FTB(nn.Module):

def __init__(self, input_dim=257, in_channel=9, r_channel=5):

super(FTB, self).__init__()
self.in_channel = in_channel
self.conv1 = nn.Sequential(
nn.Conv2d(in_channel, r_channel, kernel_size=[1, 1]),
nn.BatchNorm2d(r_channel), nn.ReLU())

self.conv1d = nn.Sequential(
nn.Conv1d(
r_channel * input_dim, in_channel, kernel_size=9, padding=4),
nn.BatchNorm1d(in_channel), nn.ReLU())
self.freq_fc = nn.Linear(input_dim, input_dim, bias=False)

self.conv2 = nn.Sequential(
nn.Conv2d(in_channel * 2, in_channel, kernel_size=[1, 1]),
nn.BatchNorm2d(in_channel), nn.ReLU())

def forward(self, inputs):
'''
inputs should be [Batch, Ca, Dim, Time]
'''
# T-F attention
conv1_out = self.conv1(inputs)
B, C, D, T = conv1_out.size()
reshape1_out = torch.reshape(conv1_out, [B, C * D, T])
conv1d_out = self.conv1d(reshape1_out)
conv1d_out = torch.reshape(conv1d_out, [B, self.in_channel, 1, T])

# now is also [B,C,D,T]
att_out = conv1d_out * inputs

# tranpose to [B,C,T,D]
att_out = torch.transpose(att_out, 2, 3)
freqfc_out = self.freq_fc(att_out)
att_out = torch.transpose(freqfc_out, 2, 3)

cat_out = torch.cat([att_out, inputs], 1)
outputs = self.conv2(cat_out)
return outputs


@MODELS.register_module(
Tasks.speech_signal_process, module_name=Models.speech_frcrn_ans_cirm_16k)
class FRCRNModel(Model):
r""" A decorator of FRCRN for integrating into modelscope framework """

def __init__(self, model_dir: str, *args, **kwargs):
"""initialize the frcrn model from the `model_dir` path.

Args:
model_dir (str): the model path.
"""
super().__init__(model_dir, *args, **kwargs)
self._model = FRCRN(*args, **kwargs)
model_bin_file = os.path.join(model_dir,
ModelFile.TORCH_MODEL_BIN_FILE)
if os.path.exists(model_bin_file):
checkpoint = torch.load(model_bin_file)
self._model.load_state_dict(checkpoint, strict=False)

def forward(self, input: Dict[str, Tensor]) -> Dict[str, Tensor]:
output = self._model.forward(input)
return {
'spec_l1': output[0],
'wav_l1': output[1],
'mask_l1': output[2],
'spec_l2': output[3],
'wav_l2': output[4],
'mask_l2': output[5]
}

def to(self, *args, **kwargs):
self._model = self._model.to(*args, **kwargs)
return self

def eval(self):
self._model = self._model.train(False)
return self


class FRCRN(nn.Module):
r""" Frequency Recurrent CRN """

def __init__(self,
complex,
model_complexity,
model_depth,
log_amp,
padding_mode,
win_len=400,
win_inc=100,
fft_len=512,
win_type='hanning'):
r"""
Args:
complex: Whether to use complex networks.
model_complexity: define the model complexity with the number of layers
model_depth: Only two options are available : 10, 20
log_amp: Whether to use log amplitude to estimate signals
padding_mode: Encoder's convolution filter. 'zeros', 'reflect'
win_len: length of window used for defining one frame of sample points
win_inc: length of window shifting (equivalent to hop_size)
fft_len: number of Short Time Fourier Transform (STFT) points
win_type: windowing type used in STFT, eg. 'hanning', 'hamming'
"""
super().__init__()
self.feat_dim = fft_len // 2 + 1

self.win_len = win_len
self.win_inc = win_inc
self.fft_len = fft_len
self.win_type = win_type

fix = True
self.stft = ConvSTFT(
self.win_len,
self.win_inc,
self.fft_len,
self.win_type,
feature_type='complex',
fix=fix)
self.istft = ConviSTFT(
self.win_len,
self.win_inc,
self.fft_len,
self.win_type,
feature_type='complex',
fix=fix)
self.unet = UNet(
1,
complex=complex,
model_complexity=model_complexity,
model_depth=model_depth,
padding_mode=padding_mode)
self.unet2 = UNet(
1,
complex=complex,
model_complexity=model_complexity,
model_depth=model_depth,
padding_mode=padding_mode)

def forward(self, inputs):
out_list = []
# [B, D*2, T]
cmp_spec = self.stft(inputs)
# [B, 1, D*2, T]
cmp_spec = torch.unsqueeze(cmp_spec, 1)

# to [B, 2, D, T] real_part/imag_part
cmp_spec = torch.cat([
cmp_spec[:, :, :self.feat_dim, :],
cmp_spec[:, :, self.feat_dim:, :],
], 1)

# [B, 2, D, T]
cmp_spec = torch.unsqueeze(cmp_spec, 4)
# [B, 1, D, T, 2]
cmp_spec = torch.transpose(cmp_spec, 1, 4)
unet1_out = self.unet(cmp_spec)
cmp_mask1 = torch.tanh(unet1_out)
unet2_out = self.unet2(unet1_out)
cmp_mask2 = torch.tanh(unet2_out)
est_spec, est_wav, est_mask = self.apply_mask(cmp_spec, cmp_mask1)
out_list.append(est_spec)
out_list.append(est_wav)
out_list.append(est_mask)
cmp_mask2 = cmp_mask2 + cmp_mask1
est_spec, est_wav, est_mask = self.apply_mask(cmp_spec, cmp_mask2)
out_list.append(est_spec)
out_list.append(est_wav)
out_list.append(est_mask)
return out_list

def apply_mask(self, cmp_spec, cmp_mask):
est_spec = torch.cat([
cmp_spec[:, :, :, :, 0] * cmp_mask[:, :, :, :, 0]
- cmp_spec[:, :, :, :, 1] * cmp_mask[:, :, :, :, 1],
cmp_spec[:, :, :, :, 0] * cmp_mask[:, :, :, :, 1]
+ cmp_spec[:, :, :, :, 1] * cmp_mask[:, :, :, :, 0]
], 1)
est_spec = torch.cat([est_spec[:, 0, :, :], est_spec[:, 1, :, :]], 1)
cmp_mask = torch.squeeze(cmp_mask, 1)
cmp_mask = torch.cat([cmp_mask[:, :, :, 0], cmp_mask[:, :, :, 1]], 1)

est_wav = self.istft(est_spec)
est_wav = torch.squeeze(est_wav, 1)
return est_spec, est_wav, cmp_mask

def get_params(self, weight_decay=0.0):
# add L2 penalty
weights, biases = [], []
for name, param in self.named_parameters():
if 'bias' in name:
biases += [param]
else:
weights += [param]
params = [{
'params': weights,
'weight_decay': weight_decay,
}, {
'params': biases,
'weight_decay': 0.0,
}]
return params

def loss(self, noisy, labels, out_list, mode='Mix'):
if mode == 'SiSNR':
count = 0
while count < len(out_list):
est_spec = out_list[count]
count = count + 1
est_wav = out_list[count]
count = count + 1
est_mask = out_list[count]
count = count + 1
if count != 3:
loss = self.loss_1layer(noisy, est_spec, est_wav, labels,
est_mask, mode)
return loss

elif mode == 'Mix':
count = 0
while count < len(out_list):
est_spec = out_list[count]
count = count + 1
est_wav = out_list[count]
count = count + 1
est_mask = out_list[count]
count = count + 1
if count != 3:
amp_loss, phase_loss, SiSNR_loss = self.loss_1layer(
noisy, est_spec, est_wav, labels, est_mask, mode)
loss = amp_loss + phase_loss + SiSNR_loss
return loss, amp_loss, phase_loss

def loss_1layer(self, noisy, est, est_wav, labels, cmp_mask, mode='Mix'):
r""" Compute the loss by mode
mode == 'Mix'
est: [B, F*2, T]
labels: [B, F*2,T]
mode == 'SiSNR'
est: [B, T]
labels: [B, T]
"""
if mode == 'SiSNR':
if labels.dim() == 3:
labels = torch.squeeze(labels, 1)
if est_wav.dim() == 3:
est_wav = torch.squeeze(est_wav, 1)
return -si_snr(est_wav, labels)
elif mode == 'Mix':

if labels.dim() == 3:
labels = torch.squeeze(labels, 1)
if est_wav.dim() == 3:
est_wav = torch.squeeze(est_wav, 1)
SiSNR_loss = -si_snr(est_wav, labels)

b, d, t = est.size()
S = self.stft(labels)
Sr = S[:, :self.feat_dim, :]
Si = S[:, self.feat_dim:, :]
Y = self.stft(noisy)
Yr = Y[:, :self.feat_dim, :]
Yi = Y[:, self.feat_dim:, :]
Y_pow = Yr**2 + Yi**2
gth_mask = torch.cat([(Sr * Yr + Si * Yi) / (Y_pow + 1e-8),
(Si * Yr - Sr * Yi) / (Y_pow + 1e-8)], 1)
gth_mask[gth_mask > 2] = 1
gth_mask[gth_mask < -2] = -1
amp_loss = F.mse_loss(gth_mask[:, :self.feat_dim, :],
cmp_mask[:, :self.feat_dim, :]) * d
phase_loss = F.mse_loss(gth_mask[:, self.feat_dim:, :],
cmp_mask[:, self.feat_dim:, :]) * d
return amp_loss, phase_loss, SiSNR_loss


def l2_norm(s1, s2):
norm = torch.sum(s1 * s2, -1, keepdim=True)
return norm


def si_snr(s1, s2, eps=1e-8):
s1_s2_norm = l2_norm(s1, s2)
s2_s2_norm = l2_norm(s2, s2)
s_target = s1_s2_norm / (s2_s2_norm + eps) * s2
e_nosie = s1 - s_target
target_norm = l2_norm(s_target, s_target)
noise_norm = l2_norm(e_nosie, e_nosie)
snr = 10 * torch.log10((target_norm) / (noise_norm + eps) + eps)
return torch.mean(snr)

+ 26
- 0
modelscope/models/audio/ans/se_module_complex.py View File

@@ -0,0 +1,26 @@
import torch
from torch import nn


class SELayer(nn.Module):

def __init__(self, channel, reduction=16):
super(SELayer, self).__init__()
self.avg_pool = nn.AdaptiveAvgPool2d(1)
self.fc_r = nn.Sequential(
nn.Linear(channel, channel // reduction), nn.ReLU(inplace=True),
nn.Linear(channel // reduction, channel), nn.Sigmoid())
self.fc_i = nn.Sequential(
nn.Linear(channel, channel // reduction), nn.ReLU(inplace=True),
nn.Linear(channel // reduction, channel), nn.Sigmoid())

def forward(self, x):
b, c, _, _, _ = x.size()
x_r = self.avg_pool(x[:, :, :, :, 0]).view(b, c)
x_i = self.avg_pool(x[:, :, :, :, 1]).view(b, c)
y_r = self.fc_r(x_r).view(b, c, 1, 1, 1) - self.fc_i(x_i).view(
b, c, 1, 1, 1)
y_i = self.fc_r(x_i).view(b, c, 1, 1, 1) + self.fc_i(x_r).view(
b, c, 1, 1, 1)
y = torch.cat([y_r, y_i], 4)
return x * y

+ 269
- 0
modelscope/models/audio/ans/unet.py View File

@@ -0,0 +1,269 @@
import torch
import torch.nn as nn

from . import complex_nn
from .se_module_complex import SELayer


class Encoder(nn.Module):

def __init__(self,
in_channels,
out_channels,
kernel_size,
stride,
padding=None,
complex=False,
padding_mode='zeros'):
super().__init__()
if padding is None:
padding = [(i - 1) // 2 for i in kernel_size] # 'SAME' padding

if complex:
conv = complex_nn.ComplexConv2d
bn = complex_nn.ComplexBatchNorm2d
else:
conv = nn.Conv2d
bn = nn.BatchNorm2d

self.conv = conv(
in_channels,
out_channels,
kernel_size=kernel_size,
stride=stride,
padding=padding,
padding_mode=padding_mode)
self.bn = bn(out_channels)
self.relu = nn.LeakyReLU(inplace=True)

def forward(self, x):
x = self.conv(x)
x = self.bn(x)
x = self.relu(x)
return x


class Decoder(nn.Module):

def __init__(self,
in_channels,
out_channels,
kernel_size,
stride,
padding=(0, 0),
complex=False):
super().__init__()
if complex:
tconv = complex_nn.ComplexConvTranspose2d
bn = complex_nn.ComplexBatchNorm2d
else:
tconv = nn.ConvTranspose2d
bn = nn.BatchNorm2d

self.transconv = tconv(
in_channels,
out_channels,
kernel_size=kernel_size,
stride=stride,
padding=padding)
self.bn = bn(out_channels)
self.relu = nn.LeakyReLU(inplace=True)

def forward(self, x):
x = self.transconv(x)
x = self.bn(x)
x = self.relu(x)
return x


class UNet(nn.Module):

def __init__(self,
input_channels=1,
complex=False,
model_complexity=45,
model_depth=20,
padding_mode='zeros'):
super().__init__()

if complex:
model_complexity = int(model_complexity // 1.414)

self.set_size(
model_complexity=model_complexity,
input_channels=input_channels,
model_depth=model_depth)
self.encoders = []
self.model_length = model_depth // 2
self.fsmn = complex_nn.ComplexUniDeepFsmn(128, 128, 128)
self.se_layers_enc = []
self.fsmn_enc = []
for i in range(self.model_length):
fsmn_enc = complex_nn.ComplexUniDeepFsmn_L1(128, 128, 128)
self.add_module('fsmn_enc{}'.format(i), fsmn_enc)
self.fsmn_enc.append(fsmn_enc)
module = Encoder(
self.enc_channels[i],
self.enc_channels[i + 1],
kernel_size=self.enc_kernel_sizes[i],
stride=self.enc_strides[i],
padding=self.enc_paddings[i],
complex=complex,
padding_mode=padding_mode)
self.add_module('encoder{}'.format(i), module)
self.encoders.append(module)
se_layer_enc = SELayer(self.enc_channels[i + 1], 8)
self.add_module('se_layer_enc{}'.format(i), se_layer_enc)
self.se_layers_enc.append(se_layer_enc)
self.decoders = []
self.fsmn_dec = []
self.se_layers_dec = []
for i in range(self.model_length):
fsmn_dec = complex_nn.ComplexUniDeepFsmn_L1(128, 128, 128)
self.add_module('fsmn_dec{}'.format(i), fsmn_dec)
self.fsmn_dec.append(fsmn_dec)
module = Decoder(
self.dec_channels[i] * 2,
self.dec_channels[i + 1],
kernel_size=self.dec_kernel_sizes[i],
stride=self.dec_strides[i],
padding=self.dec_paddings[i],
complex=complex)
self.add_module('decoder{}'.format(i), module)
self.decoders.append(module)
if i < self.model_length - 1:
se_layer_dec = SELayer(self.dec_channels[i + 1], 8)
self.add_module('se_layer_dec{}'.format(i), se_layer_dec)
self.se_layers_dec.append(se_layer_dec)
if complex:
conv = complex_nn.ComplexConv2d
else:
conv = nn.Conv2d

linear = conv(self.dec_channels[-1], 1, 1)

self.add_module('linear', linear)
self.complex = complex
self.padding_mode = padding_mode

self.decoders = nn.ModuleList(self.decoders)
self.encoders = nn.ModuleList(self.encoders)
self.se_layers_enc = nn.ModuleList(self.se_layers_enc)
self.se_layers_dec = nn.ModuleList(self.se_layers_dec)
self.fsmn_enc = nn.ModuleList(self.fsmn_enc)
self.fsmn_dec = nn.ModuleList(self.fsmn_dec)

def forward(self, inputs):
x = inputs
# go down
xs = []
xs_se = []
xs_se.append(x)
for i, encoder in enumerate(self.encoders):
xs.append(x)
if i > 0:
x = self.fsmn_enc[i](x)
x = encoder(x)
xs_se.append(self.se_layers_enc[i](x))
# xs : x0=input x1 ... x9
x = self.fsmn(x)

p = x
for i, decoder in enumerate(self.decoders):
p = decoder(p)
if i < self.model_length - 1:
p = self.fsmn_dec[i](p)
if i == self.model_length - 1:
break
if i < self.model_length - 2:
p = self.se_layers_dec[i](p)
p = torch.cat([p, xs_se[self.model_length - 1 - i]], dim=1)

# cmp_spec: [12, 1, 513, 64, 2]
cmp_spec = self.linear(p)
return cmp_spec

def set_size(self, model_complexity, model_depth=20, input_channels=1):

if model_depth == 14:
self.enc_channels = [
input_channels, 128, 128, 128, 128, 128, 128, 128
]
self.enc_kernel_sizes = [(5, 2), (5, 2), (5, 2), (5, 2), (5, 2),
(5, 2), (2, 2)]
self.enc_strides = [(2, 1), (2, 1), (2, 1), (2, 1), (2, 1), (2, 1),
(2, 1)]
self.enc_paddings = [(0, 1), (0, 1), (0, 1), (0, 1), (0, 1),
(0, 1), (0, 1)]
self.dec_channels = [64, 128, 128, 128, 128, 128, 128, 1]
self.dec_kernel_sizes = [(2, 2), (5, 2), (5, 2), (5, 2), (6, 2),
(5, 2), (5, 2)]
self.dec_strides = [(2, 1), (2, 1), (2, 1), (2, 1), (2, 1), (2, 1),
(2, 1)]
self.dec_paddings = [(0, 1), (0, 1), (0, 1), (0, 1), (0, 1),
(0, 1), (0, 1)]

elif model_depth == 10:
self.enc_channels = [
input_channels,
16,
32,
64,
128,
256,
]
self.enc_kernel_sizes = [(3, 3), (3, 3), (3, 3), (3, 3), (3, 3)]
self.enc_strides = [(2, 1), (2, 1), (2, 1), (2, 1), (2, 1)]
self.enc_paddings = [(0, 1), (0, 1), (0, 1), (0, 1), (0, 1)]
self.dec_channels = [128, 128, 64, 32, 16, 1]
self.dec_kernel_sizes = [(3, 3), (3, 3), (3, 3), (4, 3), (3, 3)]
self.dec_strides = [(2, 1), (2, 1), (2, 1), (2, 1), (2, 1)]
self.dec_paddings = [(0, 1), (0, 1), (0, 1), (0, 1), (0, 1)]

elif model_depth == 20:
self.enc_channels = [
input_channels, model_complexity, model_complexity,
model_complexity * 2, model_complexity * 2,
model_complexity * 2, model_complexity * 2,
model_complexity * 2, model_complexity * 2,
model_complexity * 2, 128
]

self.enc_kernel_sizes = [(7, 1), (1, 7), (6, 4), (7, 5), (5, 3),
(5, 3), (5, 3), (5, 3), (5, 3), (5, 3)]

self.enc_strides = [(1, 1), (1, 1), (2, 2), (2, 1), (2, 2), (2, 1),
(2, 2), (2, 1), (2, 2), (2, 1)]

self.enc_paddings = [
(3, 0),
(0, 3),
None, # (0, 2),
None,
None, # (3,1),
None, # (3,1),
None, # (1,2),
None,
None,
None
]

self.dec_channels = [
0, model_complexity * 2, model_complexity * 2,
model_complexity * 2, model_complexity * 2,
model_complexity * 2, model_complexity * 2,
model_complexity * 2, model_complexity * 2,
model_complexity * 2, model_complexity * 2,
model_complexity * 2
]

self.dec_kernel_sizes = [(4, 3), (4, 2), (4, 3), (4, 2), (4, 3),
(4, 2), (6, 3), (7, 4), (1, 7), (7, 1)]

self.dec_strides = [(2, 1), (2, 2), (2, 1), (2, 2), (2, 1), (2, 2),
(2, 1), (2, 2), (1, 1), (1, 1)]

self.dec_paddings = [(1, 1), (1, 0), (1, 1), (1, 0), (1, 1),
(1, 0), (2, 1), (2, 1), (0, 3), (3, 0)]
else:
raise ValueError('Unknown model depth : {}'.format(model_depth))

+ 0
- 0
modelscope/models/cv/animal_recognition/__init__.py View File


+ 430
- 0
modelscope/models/cv/animal_recognition/resnet.py View File

@@ -0,0 +1,430 @@
import math

import torch
import torch.nn as nn

from .splat import SplAtConv2d

__all__ = ['ResNet', 'Bottleneck']


class DropBlock2D(object):

def __init__(self, *args, **kwargs):
raise NotImplementedError


class GlobalAvgPool2d(nn.Module):

def __init__(self):
"""Global average pooling over the input's spatial dimensions"""
super(GlobalAvgPool2d, self).__init__()

def forward(self, inputs):
return nn.functional.adaptive_avg_pool2d(inputs,
1).view(inputs.size(0), -1)


class Bottleneck(nn.Module):
expansion = 4

def __init__(self,
inplanes,
planes,
stride=1,
downsample=None,
radix=1,
cardinality=1,
bottleneck_width=64,
avd=False,
avd_first=False,
dilation=1,
is_first=False,
rectified_conv=False,
rectify_avg=False,
norm_layer=None,
dropblock_prob=0.0,
last_gamma=False):
super(Bottleneck, self).__init__()
group_width = int(planes * (bottleneck_width / 64.)) * cardinality
self.conv1 = nn.Conv2d(
inplanes, group_width, kernel_size=1, bias=False)
self.bn1 = norm_layer(group_width)
self.dropblock_prob = dropblock_prob
self.radix = radix
self.avd = avd and (stride > 1 or is_first)
self.avd_first = avd_first

if self.avd:
self.avd_layer = nn.AvgPool2d(3, stride, padding=1)
stride = 1

if dropblock_prob > 0.0:
self.dropblock1 = DropBlock2D(dropblock_prob, 3)
if radix == 1:
self.dropblock2 = DropBlock2D(dropblock_prob, 3)
self.dropblock3 = DropBlock2D(dropblock_prob, 3)

if radix >= 1:
self.conv2 = SplAtConv2d(
group_width,
group_width,
kernel_size=3,
stride=stride,
padding=dilation,
dilation=dilation,
groups=cardinality,
bias=False,
radix=radix,
rectify=rectified_conv,
rectify_avg=rectify_avg,
norm_layer=norm_layer,
dropblock_prob=dropblock_prob)
elif rectified_conv:
from rfconv import RFConv2d
self.conv2 = RFConv2d(
group_width,
group_width,
kernel_size=3,
stride=stride,
padding=dilation,
dilation=dilation,
groups=cardinality,
bias=False,
average_mode=rectify_avg)
self.bn2 = norm_layer(group_width)
else:
self.conv2 = nn.Conv2d(
group_width,
group_width,
kernel_size=3,
stride=stride,
padding=dilation,
dilation=dilation,
groups=cardinality,
bias=False)
self.bn2 = norm_layer(group_width)

self.conv3 = nn.Conv2d(
group_width, planes * 4, kernel_size=1, bias=False)
self.bn3 = norm_layer(planes * 4)

if last_gamma:
from torch.nn.init import zeros_
zeros_(self.bn3.weight)
self.relu = nn.ReLU(inplace=True)
self.downsample = downsample
self.dilation = dilation
self.stride = stride

def forward(self, x):
residual = x

out = self.conv1(x)
out = self.bn1(out)
if self.dropblock_prob > 0.0:
out = self.dropblock1(out)
out = self.relu(out)

if self.avd and self.avd_first:
out = self.avd_layer(out)

out = self.conv2(out)
if self.radix == 0:
out = self.bn2(out)
if self.dropblock_prob > 0.0:
out = self.dropblock2(out)
out = self.relu(out)

if self.avd and not self.avd_first:
out = self.avd_layer(out)

out = self.conv3(out)
out = self.bn3(out)
if self.dropblock_prob > 0.0:
out = self.dropblock3(out)

if self.downsample is not None:
residual = self.downsample(x)

out += residual
out = self.relu(out)

return out


class ResNet(nn.Module):

def __init__(self,
block,
layers,
radix=1,
groups=1,
bottleneck_width=64,
num_classes=1000,
dilated=False,
dilation=1,
deep_stem=False,
stem_width=64,
avg_down=False,
rectified_conv=False,
rectify_avg=False,
avd=False,
avd_first=False,
final_drop=0.0,
dropblock_prob=0,
last_gamma=False,
norm_layer=nn.BatchNorm2d):
self.cardinality = groups
self.bottleneck_width = bottleneck_width
# ResNet-D params
self.inplanes = stem_width * 2 if deep_stem else 64
self.avg_down = avg_down
self.last_gamma = last_gamma
# ResNeSt params
self.radix = radix
self.avd = avd
self.avd_first = avd_first

super(ResNet, self).__init__()
self.rectified_conv = rectified_conv
self.rectify_avg = rectify_avg
if rectified_conv:
from rfconv import RFConv2d
conv_layer = RFConv2d
else:
conv_layer = nn.Conv2d
conv_kwargs = {'average_mode': rectify_avg} if rectified_conv else {}
if deep_stem:
self.conv1 = nn.Sequential(
conv_layer(
3,
stem_width,
kernel_size=3,
stride=2,
padding=1,
bias=False,
**conv_kwargs),
norm_layer(stem_width),
nn.ReLU(inplace=True),
conv_layer(
stem_width,
stem_width,
kernel_size=3,
stride=1,
padding=1,
bias=False,
**conv_kwargs),
norm_layer(stem_width),
nn.ReLU(inplace=True),
conv_layer(
stem_width,
stem_width * 2,
kernel_size=3,
stride=1,
padding=1,
bias=False,
**conv_kwargs),
)
else:
self.conv1 = conv_layer(
3,
64,
kernel_size=7,
stride=2,
padding=3,
bias=False,
**conv_kwargs)
self.bn1 = norm_layer(self.inplanes)
self.relu = nn.ReLU(inplace=True)
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
self.layer1 = self._make_layer(
block, 64, layers[0], norm_layer=norm_layer, is_first=False)
self.layer2 = self._make_layer(
block, 128, layers[1], stride=2, norm_layer=norm_layer)
if dilated or dilation == 4:
self.layer3 = self._make_layer(
block,
256,
layers[2],
stride=1,
dilation=2,
norm_layer=norm_layer,
dropblock_prob=dropblock_prob)
self.layer4 = self._make_layer(
block,
512,
layers[3],
stride=1,
dilation=4,
norm_layer=norm_layer,
dropblock_prob=dropblock_prob)
elif dilation == 2:
self.layer3 = self._make_layer(
block,
256,
layers[2],
stride=2,
dilation=1,
norm_layer=norm_layer,
dropblock_prob=dropblock_prob)
self.layer4 = self._make_layer(
block,
512,
layers[3],
stride=1,
dilation=2,
norm_layer=norm_layer,
dropblock_prob=dropblock_prob)
else:
self.layer3 = self._make_layer(
block,
256,
layers[2],
stride=2,
norm_layer=norm_layer,
dropblock_prob=dropblock_prob)
self.layer4 = self._make_layer(
block,
512,
layers[3],
stride=2,
norm_layer=norm_layer,
dropblock_prob=dropblock_prob)
self.avgpool = GlobalAvgPool2d()
self.drop = nn.Dropout(final_drop) if final_drop > 0.0 else None
self.fc = nn.Linear(512 * block.expansion, num_classes)

for m in self.modules():
if isinstance(m, nn.Conv2d):
n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
m.weight.data.normal_(0, math.sqrt(2. / n))
elif isinstance(m, norm_layer):
m.weight.data.fill_(1)
m.bias.data.zero_()

def _make_layer(self,
block,
planes,
blocks,
stride=1,
dilation=1,
norm_layer=None,
dropblock_prob=0.0,
is_first=True):
downsample = None
if stride != 1 or self.inplanes != planes * block.expansion:
down_layers = []
if self.avg_down:
if dilation == 1:
down_layers.append(
nn.AvgPool2d(
kernel_size=stride,
stride=stride,
ceil_mode=True,
count_include_pad=False))
else:
down_layers.append(
nn.AvgPool2d(
kernel_size=1,
stride=1,
ceil_mode=True,
count_include_pad=False))
down_layers.append(
nn.Conv2d(
self.inplanes,
planes * block.expansion,
kernel_size=1,
stride=1,
bias=False))
else:
down_layers.append(
nn.Conv2d(
self.inplanes,
planes * block.expansion,
kernel_size=1,
stride=stride,
bias=False))
down_layers.append(norm_layer(planes * block.expansion))
downsample = nn.Sequential(*down_layers)

layers = []
if dilation == 1 or dilation == 2:
layers.append(
block(
self.inplanes,
planes,
stride,
downsample=downsample,
radix=self.radix,
cardinality=self.cardinality,
bottleneck_width=self.bottleneck_width,
avd=self.avd,
avd_first=self.avd_first,
dilation=1,
is_first=is_first,
rectified_conv=self.rectified_conv,
rectify_avg=self.rectify_avg,
norm_layer=norm_layer,
dropblock_prob=dropblock_prob,
last_gamma=self.last_gamma))
elif dilation == 4:
layers.append(
block(
self.inplanes,
planes,
stride,
downsample=downsample,
radix=self.radix,
cardinality=self.cardinality,
bottleneck_width=self.bottleneck_width,
avd=self.avd,
avd_first=self.avd_first,
dilation=2,
is_first=is_first,
rectified_conv=self.rectified_conv,
rectify_avg=self.rectify_avg,
norm_layer=norm_layer,
dropblock_prob=dropblock_prob,
last_gamma=self.last_gamma))
else:
raise RuntimeError('=> unknown dilation size: {}'.format(dilation))

self.inplanes = planes * block.expansion
for i in range(1, blocks):
layers.append(
block(
self.inplanes,
planes,
radix=self.radix,
cardinality=self.cardinality,
bottleneck_width=self.bottleneck_width,
avd=self.avd,
avd_first=self.avd_first,
dilation=dilation,
rectified_conv=self.rectified_conv,
rectify_avg=self.rectify_avg,
norm_layer=norm_layer,
dropblock_prob=dropblock_prob,
last_gamma=self.last_gamma))

return nn.Sequential(*layers)

def forward(self, x):
x = self.conv1(x)
x = self.bn1(x)
x = self.relu(x)
x = self.maxpool(x)

x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = self.layer4(x)

x = self.avgpool(x)
x = torch.flatten(x, 1)
if self.drop:
x = self.drop(x)
x = self.fc(x)

return x

+ 125
- 0
modelscope/models/cv/animal_recognition/splat.py View File

@@ -0,0 +1,125 @@
"""Split-Attention"""

import torch
import torch.nn.functional as F
from torch import nn
from torch.nn import BatchNorm2d, Conv2d, Linear, Module, ReLU
from torch.nn.modules.utils import _pair

__all__ = ['SplAtConv2d']


class SplAtConv2d(Module):
"""Split-Attention Conv2d
"""

def __init__(self,
in_channels,
channels,
kernel_size,
stride=(1, 1),
padding=(0, 0),
dilation=(1, 1),
groups=1,
bias=True,
radix=2,
reduction_factor=4,
rectify=False,
rectify_avg=False,
norm_layer=None,
dropblock_prob=0.0,
**kwargs):
super(SplAtConv2d, self).__init__()
padding = _pair(padding)
self.rectify = rectify and (padding[0] > 0 or padding[1] > 0)
self.rectify_avg = rectify_avg
inter_channels = max(in_channels * radix // reduction_factor, 32)
self.radix = radix
self.cardinality = groups
self.channels = channels
self.dropblock_prob = dropblock_prob
if self.rectify:
from rfconv import RFConv2d
self.conv = RFConv2d(
in_channels,
channels * radix,
kernel_size,
stride,
padding,
dilation,
groups=groups * radix,
bias=bias,
average_mode=rectify_avg,
**kwargs)
else:
self.conv = Conv2d(
in_channels,
channels * radix,
kernel_size,
stride,
padding,
dilation,
groups=groups * radix,
bias=bias,
**kwargs)
self.use_bn = norm_layer is not None
if self.use_bn:
self.bn0 = norm_layer(channels * radix)
self.relu = ReLU(inplace=True)
self.fc1 = Conv2d(channels, inter_channels, 1, groups=self.cardinality)
if self.use_bn:
self.bn1 = norm_layer(inter_channels)
self.fc2 = Conv2d(
inter_channels, channels * radix, 1, groups=self.cardinality)
if dropblock_prob > 0.0:
self.dropblock = DropBlock2D(dropblock_prob, 3)
self.rsoftmax = rSoftMax(radix, groups)

def forward(self, x):
x = self.conv(x)
if self.use_bn:
x = self.bn0(x)
if self.dropblock_prob > 0.0:
x = self.dropblock(x)
x = self.relu(x)

batch, rchannel = x.shape[:2]
if self.radix > 1:
splited = torch.split(x, rchannel // self.radix, dim=1)
gap = sum(splited)
else:
gap = x
gap = F.adaptive_avg_pool2d(gap, 1)
gap = self.fc1(gap)

if self.use_bn:
gap = self.bn1(gap)
gap = self.relu(gap)

atten = self.fc2(gap)
atten = self.rsoftmax(atten).view(batch, -1, 1, 1)

if self.radix > 1:
attens = torch.split(atten, rchannel // self.radix, dim=1)
out = sum([att * split for (att, split) in zip(attens, splited)])
else:
out = atten * x
return out.contiguous()


class rSoftMax(nn.Module):

def __init__(self, radix, cardinality):
super().__init__()
self.radix = radix
self.cardinality = cardinality

def forward(self, x):
batch = x.size(0)
if self.radix > 1:
x = x.view(batch, self.cardinality, self.radix, -1).transpose(1, 2)
x = F.softmax(x, dim=1)
x = x.reshape(batch, -1)
else:
x = torch.sigmoid(x)
return x

+ 2
- 0
modelscope/models/multi_modal/__init__.py View File

@@ -1,2 +1,4 @@
from .clip.clip_model import CLIPForMultiModalEmbedding from .clip.clip_model import CLIPForMultiModalEmbedding
from .image_captioning_model import OfaForImageCaptioning from .image_captioning_model import OfaForImageCaptioning
from .mplug_for_visual_question_answering import \
MPlugForVisualQuestionAnswering

+ 46
- 0
modelscope/models/multi_modal/mplug_for_visual_question_answering.py View File

@@ -0,0 +1,46 @@
from typing import Dict

from ...metainfo import Models
from ...utils.constant import Tasks
from ..base import Model, Tensor
from ..builder import MODELS

__all__ = ['MPlugForVisualQuestionAnswering']


@MODELS.register_module(
Tasks.visual_question_answering, module_name=Models.mplug)
class MPlugForVisualQuestionAnswering(Model):

def __init__(self, model_dir: str, *args, **kwargs):
"""initialize the mplug model from the `model_dir` path.
Args:
model_dir (str): the model path.
"""

super().__init__(model_dir, *args, **kwargs)
from sofa.models.mplug import MPlugForVisualQuestionAnswering
self.model = MPlugForVisualQuestionAnswering.from_pretrained(model_dir)
self.tokenizer = self.model.tokenizer

def train(self):
return self.model.train()

def eval(self):
return self.model.eval()

def forward(self, input: Dict[str, Tensor]) -> Dict[str, Tensor]:
"""return the result by the model

Args:
input (Dict[str, Tensor]): the preprocessed data

Returns:
Dict[str, Tensor]: results
Example:
{
'predictions': Tensor([[1377, 4959, 2785, 6392...])]),
}
"""

return self.model(**input)[0]

+ 1
- 0
modelscope/models/nlp/__init__.py View File

@@ -5,6 +5,7 @@ from .sbert_for_nli import * # noqa F403
from .sbert_for_sentence_similarity import * # noqa F403 from .sbert_for_sentence_similarity import * # noqa F403
from .sbert_for_sentiment_classification import * # noqa F403 from .sbert_for_sentiment_classification import * # noqa F403
from .sbert_for_token_classification import * # noqa F403 from .sbert_for_token_classification import * # noqa F403
from .sbert_for_zero_shot_classification import * # noqa F403
from .space.dialog_intent_prediction_model import * # noqa F403 from .space.dialog_intent_prediction_model import * # noqa F403
from .space.dialog_modeling_model import * # noqa F403 from .space.dialog_modeling_model import * # noqa F403
from .space.dialog_state_tracking_model import * # noqa F403 from .space.dialog_state_tracking_model import * # noqa F403

+ 9
- 1
modelscope/models/nlp/masked_language_model.py View File

@@ -7,7 +7,7 @@ from ...utils.constant import Tasks
from ..base import Model, Tensor from ..base import Model, Tensor
from ..builder import MODELS from ..builder import MODELS


__all__ = ['StructBertForMaskedLM', 'VecoForMaskedLM']
__all__ = ['BertForMaskedLM', 'StructBertForMaskedLM', 'VecoForMaskedLM']




class MaskedLanguageModelBase(Model): class MaskedLanguageModelBase(Model):
@@ -61,3 +61,11 @@ class VecoForMaskedLM(MaskedLanguageModelBase):
def build_model(self): def build_model(self):
from sofa import VecoForMaskedLM from sofa import VecoForMaskedLM
return VecoForMaskedLM.from_pretrained(self.model_dir) return VecoForMaskedLM.from_pretrained(self.model_dir)


@MODELS.register_module(Tasks.fill_mask, module_name=Models.bert)
class BertForMaskedLM(MaskedLanguageModelBase):

def build_model(self):
from transformers import BertForMaskedLM
return BertForMaskedLM.from_pretrained(self.model_dir)

+ 50
- 0
modelscope/models/nlp/sbert_for_zero_shot_classification.py View File

@@ -0,0 +1,50 @@
from typing import Any, Dict

import numpy as np

from modelscope.utils.constant import Tasks
from ...metainfo import Models
from ..base import Model
from ..builder import MODELS

__all__ = ['SbertForZeroShotClassification']


@MODELS.register_module(
Tasks.zero_shot_classification, module_name=Models.structbert)
class SbertForZeroShotClassification(Model):

def __init__(self, model_dir: str, *args, **kwargs):
"""initialize the zero shot classification model from the `model_dir` path.

Args:
model_dir (str): the model path.
"""

super().__init__(model_dir, *args, **kwargs)
from sofa import SbertForSequenceClassification
self.model = SbertForSequenceClassification.from_pretrained(model_dir)

def train(self):
return self.model.train()

def eval(self):
return self.model.eval()

def forward(self, input: Dict[str, Any]) -> Dict[str, np.ndarray]:
"""return the result by the model

Args:
input (Dict[str, Any]): the preprocessed data

Returns:
Dict[str, np.ndarray]: results
Example:
{
'logits': array([[-0.53860897, 1.5029076 ]], dtype=float32) # true value
}
"""
outputs = self.model(**input)
logits = outputs['logits'].numpy()
res = {'logits': logits}
return res

+ 2
- 2
modelscope/models/nlp/space/dialog_state_tracking_model.py View File

@@ -6,11 +6,11 @@ from ....utils.nlp.space.utils_dst import batch_to_device
from ...base import Model, Tensor from ...base import Model, Tensor
from ...builder import MODELS from ...builder import MODELS


__all__ = ['DialogStateTrackingModel']
__all__ = ['SpaceForDialogStateTrackingModel']




@MODELS.register_module(Tasks.dialog_state_tracking, module_name=r'space') @MODELS.register_module(Tasks.dialog_state_tracking, module_name=r'space')
class DialogStateTrackingModel(Model):
class SpaceForDialogStateTrackingModel(Model):


def __init__(self, model_dir: str, *args, **kwargs): def __init__(self, model_dir: str, *args, **kwargs):
"""initialize the test generation model from the `model_dir` path. """initialize the test generation model from the `model_dir` path.


+ 1
- 1
modelscope/msdatasets/config.py View File

@@ -19,4 +19,4 @@ DOWNLOADED_DATASETS_PATH = Path(
os.getenv('DOWNLOADED_DATASETS_PATH', DEFAULT_DOWNLOADED_DATASETS_PATH)) os.getenv('DOWNLOADED_DATASETS_PATH', DEFAULT_DOWNLOADED_DATASETS_PATH))


MS_HUB_ENDPOINT = os.environ.get('MS_HUB_ENDPOINT', MS_HUB_ENDPOINT = os.environ.get('MS_HUB_ENDPOINT',
'http://101.201.119.157:31752')
'http://123.57.189.90:31752')

+ 41
- 15
modelscope/msdatasets/ms_dataset.py View File

@@ -3,7 +3,7 @@ from typing import (Any, Callable, Dict, Iterable, List, Mapping, Optional,
Sequence, Union) Sequence, Union)


import numpy as np import numpy as np
from datasets import Dataset
from datasets import Dataset, DatasetDict
from datasets import load_dataset as hf_load_dataset from datasets import load_dataset as hf_load_dataset
from datasets.config import TF_AVAILABLE, TORCH_AVAILABLE from datasets.config import TF_AVAILABLE, TORCH_AVAILABLE
from datasets.packaged_modules import _PACKAGED_DATASETS_MODULES from datasets.packaged_modules import _PACKAGED_DATASETS_MODULES
@@ -12,7 +12,7 @@ from datasets.utils.file_utils import (is_relative_path,


from modelscope.msdatasets.config import MS_DATASETS_CACHE from modelscope.msdatasets.config import MS_DATASETS_CACHE
from modelscope.msdatasets.utils.ms_api import MsApi from modelscope.msdatasets.utils.ms_api import MsApi
from modelscope.utils.constant import Hubs
from modelscope.utils.constant import DownloadMode, Hubs
from modelscope.utils.logger import get_logger from modelscope.utils.logger import get_logger


logger = get_logger() logger = get_logger()
@@ -34,6 +34,10 @@ class MsDataset:


def __init__(self, hf_ds: Dataset, target: Optional[str] = None): def __init__(self, hf_ds: Dataset, target: Optional[str] = None):
self._hf_ds = hf_ds self._hf_ds = hf_ds
if target is not None and target not in self._hf_ds.features:
raise TypeError(
f'"target" must be a column of the dataset({list(self._hf_ds.features.keys())}, but got {target}'
)
self.target = target self.target = target


def __iter__(self): def __iter__(self):
@@ -48,17 +52,23 @@ class MsDataset:


@classmethod @classmethod
def from_hf_dataset(cls, def from_hf_dataset(cls,
hf_ds: Dataset,
hf_ds: Union[Dataset, DatasetDict],
target: str = None) -> Union[dict, 'MsDataset']: target: str = None) -> Union[dict, 'MsDataset']:
if isinstance(hf_ds, Dataset): if isinstance(hf_ds, Dataset):
return cls(hf_ds, target) return cls(hf_ds, target)
if len(hf_ds.keys()) == 1:
return cls(next(iter(hf_ds.values())), target)
return {k: cls(v, target) for k, v in hf_ds.items()}
elif isinstance(hf_ds, DatasetDict):
if len(hf_ds.keys()) == 1:
return cls(next(iter(hf_ds.values())), target)
return {k: cls(v, target) for k, v in hf_ds.items()}
else:
raise TypeError(
f'"hf_ds" must be a Dataset or DatasetDict, but got {type(hf_ds)}'
)


@staticmethod @staticmethod
def load( def load(
dataset_name: Union[str, list], dataset_name: Union[str, list],
namespace: Optional[str] = None,
target: Optional[str] = None, target: Optional[str] = None,
version: Optional[str] = None, version: Optional[str] = None,
hub: Optional[Hubs] = Hubs.modelscope, hub: Optional[Hubs] = Hubs.modelscope,
@@ -67,23 +77,32 @@ class MsDataset:
data_dir: Optional[str] = None, data_dir: Optional[str] = None,
data_files: Optional[Union[str, Sequence[str], data_files: Optional[Union[str, Sequence[str],
Mapping[str, Union[str, Mapping[str, Union[str,
Sequence[str]]]]] = None
Sequence[str]]]]] = None,
download_mode: Optional[DownloadMode] = DownloadMode.
REUSE_DATASET_IF_EXISTS
) -> Union[dict, 'MsDataset']: ) -> Union[dict, 'MsDataset']:
"""Load a MsDataset from the ModelScope Hub, Hugging Face Hub, urls, or a local dataset. """Load a MsDataset from the ModelScope Hub, Hugging Face Hub, urls, or a local dataset.
Args: Args:


dataset_name (str): Path or name of the dataset. dataset_name (str): Path or name of the dataset.
namespace(str, optional): Namespace of the dataset. It should not be None, if you load a remote dataset
from Hubs.modelscope,
target (str, optional): Name of the column to output. target (str, optional): Name of the column to output.
version (str, optional): Version of the dataset script to load: version (str, optional): Version of the dataset script to load:
subset_name (str, optional): Defining the subset_name of the dataset. subset_name (str, optional): Defining the subset_name of the dataset.
data_dir (str, optional): Defining the data_dir of the dataset configuration. I data_dir (str, optional): Defining the data_dir of the dataset configuration. I
data_files (str or Sequence or Mapping, optional): Path(s) to source data file(s). data_files (str or Sequence or Mapping, optional): Path(s) to source data file(s).
split (str, optional): Which split of the data to load. split (str, optional): Which split of the data to load.
hub (Hubs, optional): When loading from a remote hub, where it is from
hub (Hubs or str, optional): When loading from a remote hub, where it is from. default Hubs.modelscope
download_mode (DownloadMode or str, optional): How to treat existing datasets. default
DownloadMode.REUSE_DATASET_IF_EXISTS


Returns: Returns:
MsDataset (obj:`MsDataset`): MsDataset object for a certain dataset. MsDataset (obj:`MsDataset`): MsDataset object for a certain dataset.
""" """
download_mode = DownloadMode(download_mode
or DownloadMode.REUSE_DATASET_IF_EXISTS)
hub = Hubs(hub or Hubs.modelscope)
if hub == Hubs.huggingface: if hub == Hubs.huggingface:
dataset = hf_load_dataset( dataset = hf_load_dataset(
dataset_name, dataset_name,
@@ -91,21 +110,25 @@ class MsDataset:
revision=version, revision=version,
split=split, split=split,
data_dir=data_dir, data_dir=data_dir,
data_files=data_files)
data_files=data_files,
download_mode=download_mode.value)
return MsDataset.from_hf_dataset(dataset, target=target) return MsDataset.from_hf_dataset(dataset, target=target)
else:
elif hub == Hubs.modelscope:
return MsDataset._load_ms_dataset( return MsDataset._load_ms_dataset(
dataset_name, dataset_name,
namespace=namespace,
target=target, target=target,
subset_name=subset_name, subset_name=subset_name,
version=version, version=version,
split=split, split=split,
data_dir=data_dir, data_dir=data_dir,
data_files=data_files)
data_files=data_files,
download_mode=download_mode)


@staticmethod @staticmethod
def _load_ms_dataset( def _load_ms_dataset(
dataset_name: Union[str, list], dataset_name: Union[str, list],
namespace: Optional[str] = None,
target: Optional[str] = None, target: Optional[str] = None,
version: Optional[str] = None, version: Optional[str] = None,
subset_name: Optional[str] = None, subset_name: Optional[str] = None,
@@ -113,17 +136,19 @@ class MsDataset:
data_dir: Optional[str] = None, data_dir: Optional[str] = None,
data_files: Optional[Union[str, Sequence[str], data_files: Optional[Union[str, Sequence[str],
Mapping[str, Union[str, Mapping[str, Union[str,
Sequence[str]]]]] = None
Sequence[str]]]]] = None,
download_mode: Optional[DownloadMode] = None
) -> Union[dict, 'MsDataset']: ) -> Union[dict, 'MsDataset']:
if isinstance(dataset_name, str): if isinstance(dataset_name, str):
use_hf = False use_hf = False
if dataset_name in _PACKAGED_DATASETS_MODULES or os.path.isdir(dataset_name) or \ if dataset_name in _PACKAGED_DATASETS_MODULES or os.path.isdir(dataset_name) or \
(os.path.isfile(dataset_name) and dataset_name.endswith('.py')): (os.path.isfile(dataset_name) and dataset_name.endswith('.py')):
use_hf = True use_hf = True
elif is_relative_path(dataset_name):
elif is_relative_path(dataset_name) and dataset_name.count(
'/') == 0:
ms_api = MsApi() ms_api = MsApi()
dataset_scripts = ms_api.fetch_dataset_scripts( dataset_scripts = ms_api.fetch_dataset_scripts(
dataset_name, version)
dataset_name, namespace, download_mode, version)
if 'py' in dataset_scripts: # dataset copied from hf datasets if 'py' in dataset_scripts: # dataset copied from hf datasets
dataset_name = dataset_scripts['py'][0] dataset_name = dataset_scripts['py'][0]
use_hf = True use_hf = True
@@ -140,7 +165,8 @@ class MsDataset:
split=split, split=split,
data_dir=data_dir, data_dir=data_dir,
data_files=data_files, data_files=data_files,
cache_dir=MS_DATASETS_CACHE)
cache_dir=MS_DATASETS_CACHE,
download_mode=download_mode.value)
else: else:
# TODO load from ms datahub # TODO load from ms datahub
raise NotImplementedError( raise NotImplementedError(


+ 33
- 15
modelscope/msdatasets/utils/ms_api.py View File

@@ -1,11 +1,14 @@
import os import os
import shutil
from collections import defaultdict from collections import defaultdict
from typing import Optional from typing import Optional


import requests import requests


from modelscope.hub.errors import NotExistError, datahub_raise_on_error
from modelscope.msdatasets.config import (DOWNLOADED_DATASETS_PATH, from modelscope.msdatasets.config import (DOWNLOADED_DATASETS_PATH,
MS_HUB_ENDPOINT) MS_HUB_ENDPOINT)
from modelscope.utils.constant import DownloadMode
from modelscope.utils.logger import get_logger from modelscope.utils.logger import get_logger


logger = get_logger() logger = get_logger()
@@ -27,23 +30,38 @@ class MsApi:


def fetch_dataset_scripts(self, def fetch_dataset_scripts(self,
dataset_name: str, dataset_name: str,
version: Optional[str] = 'master',
force_download=False):
datahub_url = f'{self.endpoint}/api/v1/datasets?Query={dataset_name}'
r = requests.get(datahub_url)
r.raise_for_status()
dataset_list = r.json()['Data']
if len(dataset_list) == 0:
return None
dataset_id = dataset_list[0]['Id']
namespace: str,
download_mode: Optional[DownloadMode],
version: Optional[str] = 'master'):
if namespace is None:
raise ValueError(
f'Dataset from Hubs.modelscope should have a valid "namespace", but get {namespace}'
)
version = version or 'master' version = version or 'master'
datahub_url = f'{self.endpoint}/api/v1/datasets/{dataset_id}/repo/tree?Revision={version}'
r = requests.get(datahub_url)
r.raise_for_status()
file_list = r.json()['Data']['Files']
cache_dir = os.path.join(DOWNLOADED_DATASETS_PATH, dataset_name, cache_dir = os.path.join(DOWNLOADED_DATASETS_PATH, dataset_name,
version)
namespace, version)
download_mode = DownloadMode(download_mode
or DownloadMode.REUSE_DATASET_IF_EXISTS)
if download_mode == DownloadMode.FORCE_REDOWNLOAD and os.path.exists(
cache_dir):
shutil.rmtree(cache_dir)
os.makedirs(cache_dir, exist_ok=True) os.makedirs(cache_dir, exist_ok=True)
datahub_url = f'{self.endpoint}/api/v1/datasets/{namespace}/{dataset_name}'
r = requests.get(datahub_url)
resp = r.json()
datahub_raise_on_error(datahub_url, resp)
dataset_id = resp['Data']['Id']
datahub_url = f'{self.endpoint}/api/v1/datasets/{dataset_id}/repo/tree?Revision={version}'
r = requests.get(datahub_url)
resp = r.json()
datahub_raise_on_error(datahub_url, resp)
file_list = resp['Data']
if file_list is None:
raise NotExistError(
f'The modelscope dataset [dataset_name = {dataset_name}, namespace = {namespace}, '
f'version = {version}] dose not exist')

file_list = file_list['Files']
local_paths = defaultdict(list) local_paths = defaultdict(list)
for file_info in file_list: for file_info in file_list:
file_path = file_info['Path'] file_path = file_info['Path']
@@ -54,7 +72,7 @@ class MsApi:
r.raise_for_status() r.raise_for_status()
content = r.json()['Data']['Content'] content = r.json()['Data']['Content']
local_path = os.path.join(cache_dir, file_path) local_path = os.path.join(cache_dir, file_path)
if os.path.exists(local_path) and not force_download:
if os.path.exists(local_path):
logger.warning( logger.warning(
f"Reusing dataset {dataset_name}'s python file ({local_path})" f"Reusing dataset {dataset_name}'s python file ({local_path})"
) )


+ 4
- 1
modelscope/pipelines/__init__.py View File

@@ -1,4 +1,7 @@
# from .audio import LinearAECPipeline
from .audio import LinearAECPipeline
from .audio.ans_pipeline import ANSPipeline
from .base import Pipeline from .base import Pipeline
from .builder import pipeline from .builder import pipeline
from .cv import * # noqa F403
from .multi_modal import * # noqa F403
from .nlp import * # noqa F403 from .nlp import * # noqa F403

+ 117
- 0
modelscope/pipelines/audio/ans_pipeline.py View File

@@ -0,0 +1,117 @@
import os.path
from typing import Any, Dict

import librosa
import numpy as np
import soundfile as sf
import torch

from modelscope.metainfo import Pipelines
from modelscope.utils.constant import Tasks
from ..base import Input, Pipeline
from ..builder import PIPELINES


def audio_norm(x):
rms = (x**2).mean()**0.5
scalar = 10**(-25 / 20) / rms
x = x * scalar
pow_x = x**2
avg_pow_x = pow_x.mean()
rmsx = pow_x[pow_x > avg_pow_x].mean()**0.5
scalarx = 10**(-25 / 20) / rmsx
x = x * scalarx
return x


@PIPELINES.register_module(
Tasks.speech_signal_process,
module_name=Pipelines.speech_frcrn_ans_cirm_16k)
class ANSPipeline(Pipeline):
r"""ANS (Acoustic Noise Suppression) Inference Pipeline .

When invoke the class with pipeline.__call__(), it accept only one parameter:
inputs(str): the path of wav file
"""
SAMPLE_RATE = 16000

def __init__(self, model):
r"""
Args:
model: model id on modelscope hub.
"""
super().__init__(model=model)
self.device = torch.device(
'cuda' if torch.cuda.is_available() else 'cpu')
self.model = self.model.to(self.device)
self.model.eval()

def preprocess(self, inputs: Input) -> Dict[str, Any]:
assert isinstance(inputs, str) and os.path.exists(inputs) and os.path.isfile(inputs), \
f'Input file do not exists: {inputs}'
data1, fs = sf.read(inputs)
data1 = audio_norm(data1)
if fs != self.SAMPLE_RATE:
data1 = librosa.resample(data1, fs, self.SAMPLE_RATE)
if len(data1.shape) > 1:
data1 = data1[:, 0]
data = data1.astype(np.float32)
inputs = np.reshape(data, [1, data.shape[0]])
return {'ndarray': inputs, 'nsamples': data.shape[0]}

def forward(self, inputs: Dict[str, Any]) -> Dict[str, Any]:
ndarray = inputs['ndarray']
nsamples = inputs['nsamples']
decode_do_segement = False
window = 16000
stride = int(window * 0.75)
print('inputs:{}'.format(ndarray.shape))
b, t = ndarray.shape # size()
if t > window * 120:
decode_do_segement = True

if t < window:
ndarray = np.concatenate(
[ndarray, np.zeros((ndarray.shape[0], window - t))], 1)
elif t < window + stride:
padding = window + stride - t
print('padding: {}'.format(padding))
ndarray = np.concatenate(
[ndarray, np.zeros((ndarray.shape[0], padding))], 1)
else:
if (t - window) % stride != 0:
padding = t - (t - window) // stride * stride
print('padding: {}'.format(padding))
ndarray = np.concatenate(
[ndarray, np.zeros((ndarray.shape[0], padding))], 1)
print('inputs after padding:{}'.format(ndarray.shape))
with torch.no_grad():
ndarray = torch.from_numpy(np.float32(ndarray)).to(self.device)
b, t = ndarray.shape
if decode_do_segement:
outputs = np.zeros(t)
give_up_length = (window - stride) // 2
current_idx = 0
while current_idx + window <= t:
print('current_idx: {}'.format(current_idx))
tmp_input = ndarray[:, current_idx:current_idx + window]
tmp_output = self.model(
tmp_input, )['wav_l2'][0].cpu().numpy()
end_index = current_idx + window - give_up_length
if current_idx == 0:
outputs[current_idx:
end_index] = tmp_output[:-give_up_length]
else:
outputs[current_idx
+ give_up_length:end_index] = tmp_output[
give_up_length:-give_up_length]
current_idx += stride
else:
outputs = self.model(ndarray)['wav_l2'][0].cpu().numpy()
return {'output_pcm': outputs[:nsamples]}

def postprocess(self, inputs: Dict[str, Any], **kwargs) -> Dict[str, Any]:
if 'output_path' in kwargs.keys():
sf.write(kwargs['output_path'], inputs['output_pcm'],
self.SAMPLE_RATE)
return inputs

+ 40
- 15
modelscope/pipelines/base.py View File

@@ -74,33 +74,57 @@ class Pipeline(ABC):
self.preprocessor = preprocessor self.preprocessor = preprocessor


def __call__(self, input: Union[Input, List[Input]], *args, def __call__(self, input: Union[Input, List[Input]], *args,
**post_kwargs) -> Union[Dict[str, Any], Generator]:
**kwargs) -> Union[Dict[str, Any], Generator]:
# model provider should leave it as it is # model provider should leave it as it is
# modelscope library developer will handle this function # modelscope library developer will handle this function


# simple showcase, need to support iterator type for both tensorflow and pytorch # simple showcase, need to support iterator type for both tensorflow and pytorch
# input_dict = self._handle_input(input) # input_dict = self._handle_input(input)

# sanitize the parameters
preprocess_params, forward_params, postprocess_params = self._sanitize_parameters(
**kwargs)
kwargs['preprocess_params'] = preprocess_params
kwargs['forward_params'] = forward_params
kwargs['postprocess_params'] = postprocess_params

if isinstance(input, list): if isinstance(input, list):
output = [] output = []
for ele in input: for ele in input:
output.append(self._process_single(ele, *args, **post_kwargs))
output.append(self._process_single(ele, *args, **kwargs))


elif isinstance(input, MsDataset): elif isinstance(input, MsDataset):
return self._process_iterator(input, *args, **post_kwargs)
return self._process_iterator(input, *args, **kwargs)


else: else:
output = self._process_single(input, *args, **post_kwargs)
output = self._process_single(input, *args, **kwargs)
return output return output


def _process_iterator(self, input: Input, *args, **post_kwargs):
def _sanitize_parameters(self, **pipeline_parameters):
"""
this method should sanitize the keyword args to preprocessor params,
forward params and postprocess params on '__call__' or '_process_single' method
considered to be a normal classmethod with default implementation / output

Default Returns:
Dict[str, str]: preprocess_params = {}
Dict[str, str]: forward_params = {}
Dict[str, str]: postprocess_params = pipeline_parameters
"""
return {}, {}, pipeline_parameters

def _process_iterator(self, input: Input, *args, **kwargs):
for ele in input: for ele in input:
yield self._process_single(ele, *args, **post_kwargs)
yield self._process_single(ele, *args, **kwargs)

def _process_single(self, input: Input, *args, **kwargs) -> Dict[str, Any]:
preprocess_params = kwargs.get('preprocess_params')
forward_params = kwargs.get('forward_params')
postprocess_params = kwargs.get('postprocess_params')


def _process_single(self, input: Input, *args,
**post_kwargs) -> Dict[str, Any]:
out = self.preprocess(input)
out = self.forward(out)
out = self.postprocess(out, **post_kwargs)
out = self.preprocess(input, **preprocess_params)
out = self.forward(out, **forward_params)
out = self.postprocess(out, **postprocess_params)
self._check_output(out) self._check_output(out)
return out return out


@@ -120,20 +144,21 @@ class Pipeline(ABC):
raise ValueError(f'expected output keys are {output_keys}, ' raise ValueError(f'expected output keys are {output_keys}, '
f'those {missing_keys} are missing') f'those {missing_keys} are missing')


def preprocess(self, inputs: Input) -> Dict[str, Any]:
def preprocess(self, inputs: Input, **preprocess_params) -> Dict[str, Any]:
""" Provide default implementation based on preprocess_cfg and user can reimplement it """ Provide default implementation based on preprocess_cfg and user can reimplement it
""" """
assert self.preprocessor is not None, 'preprocess method should be implemented' assert self.preprocessor is not None, 'preprocess method should be implemented'
assert not isinstance(self.preprocessor, List),\ assert not isinstance(self.preprocessor, List),\
'default implementation does not support using multiple preprocessors.' 'default implementation does not support using multiple preprocessors.'
return self.preprocessor(inputs)
return self.preprocessor(inputs, **preprocess_params)


def forward(self, inputs: Dict[str, Any]) -> Dict[str, Any]:
def forward(self, inputs: Dict[str, Any],
**forward_params) -> Dict[str, Any]:
""" Provide default implementation using self.model and user can reimplement it """ Provide default implementation using self.model and user can reimplement it
""" """
assert self.model is not None, 'forward method should be implemented' assert self.model is not None, 'forward method should be implemented'
assert not self.has_multiple_models, 'default implementation does not support multiple models in a pipeline.' assert not self.has_multiple_models, 'default implementation does not support multiple models in a pipeline.'
return self.model(inputs)
return self.model(inputs, **forward_params)


@abstractmethod @abstractmethod
def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, Any]: def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, Any]:


+ 7
- 1
modelscope/pipelines/builder.py View File

@@ -33,6 +33,9 @@ DEFAULT_MODEL_FOR_PIPELINE = {
'damo/bert-base-sst2'), 'damo/bert-base-sst2'),
Tasks.text_generation: (Pipelines.text_generation, Tasks.text_generation: (Pipelines.text_generation,
'damo/nlp_palm2.0_text-generation_chinese-base'), 'damo/nlp_palm2.0_text-generation_chinese-base'),
Tasks.zero_shot_classification:
(Pipelines.zero_shot_classification,
'damo/nlp_structbert_zero-shot-classification_chinese-base'),
Tasks.image_captioning: (Pipelines.image_caption, Tasks.image_captioning: (Pipelines.image_caption,
'damo/ofa_image-caption_coco_large_en'), 'damo/ofa_image-caption_coco_large_en'),
Tasks.image_generation: Tasks.image_generation:
@@ -45,7 +48,10 @@ DEFAULT_MODEL_FOR_PIPELINE = {
'damo/cv_TAdaConv_action-recognition'), 'damo/cv_TAdaConv_action-recognition'),
Tasks.multi_modal_embedding: Tasks.multi_modal_embedding:
(Pipelines.multi_modal_embedding, (Pipelines.multi_modal_embedding,
'damo/multi-modal_clip-vit-large-patch14-chinese_multi-modal-embedding')
'damo/multi-modal_clip-vit-large-patch14-chinese_multi-modal-embedding'),
Tasks.visual_question_answering:
(Pipelines.visual_question_answering,
'damo/mplug_visual-question-answering_coco_large_en'),
} }






+ 1
- 0
modelscope/pipelines/cv/__init__.py View File

@@ -1,4 +1,5 @@
from .action_recognition_pipeline import ActionRecognitionPipeline from .action_recognition_pipeline import ActionRecognitionPipeline
from .animal_recog_pipeline import AnimalRecogPipeline
from .image_cartoon_pipeline import ImageCartoonPipeline from .image_cartoon_pipeline import ImageCartoonPipeline
from .image_matting_pipeline import ImageMattingPipeline from .image_matting_pipeline import ImageMattingPipeline
from .ocr_detection_pipeline import OCRDetectionPipeline from .ocr_detection_pipeline import OCRDetectionPipeline

+ 127
- 0
modelscope/pipelines/cv/animal_recog_pipeline.py View File

@@ -0,0 +1,127 @@
import os.path as osp
import tempfile
from typing import Any, Dict

import cv2
import numpy as np
import torch
from PIL import Image
from torchvision import transforms

from modelscope.fileio import File
from modelscope.hub.snapshot_download import snapshot_download
from modelscope.metainfo import Pipelines
from modelscope.models.cv.animal_recognition import resnet
from modelscope.pipelines.base import Input
from modelscope.preprocessors import load_image
from modelscope.utils.constant import ModelFile, Tasks
from modelscope.utils.logger import get_logger
from ..base import Pipeline
from ..builder import PIPELINES

logger = get_logger()


@PIPELINES.register_module(
Tasks.image_classification, module_name=Pipelines.animal_recognation)
class AnimalRecogPipeline(Pipeline):

def __init__(self, model: str):
super().__init__(model=model)
import torch

def resnest101(**kwargs):
model = resnet.ResNet(
resnet.Bottleneck, [3, 4, 23, 3],
radix=2,
groups=1,
bottleneck_width=64,
deep_stem=True,
stem_width=64,
avg_down=True,
avd=True,
avd_first=False,
**kwargs)
return model

def filter_param(src_params, own_state):
copied_keys = []
for name, param in src_params.items():
if 'module.' == name[0:7]:
name = name[7:]
if '.module.' not in list(own_state.keys())[0]:
name = name.replace('.module.', '.')
if (name in own_state) and (own_state[name].shape
== param.shape):
own_state[name].copy_(param)
copied_keys.append(name)

def load_pretrained(model, src_params):
if 'state_dict' in src_params:
src_params = src_params['state_dict']
own_state = model.state_dict()
filter_param(src_params, own_state)
model.load_state_dict(own_state)

self.model = resnest101(num_classes=8288)
local_model_dir = model
if osp.exists(model):
local_model_dir = model
else:
local_model_dir = snapshot_download(model)
self.local_path = local_model_dir
src_params = torch.load(
osp.join(local_model_dir, 'pytorch_model.pt'), 'cpu')
load_pretrained(self.model, src_params)
logger.info('load model done')

def preprocess(self, input: Input) -> Dict[str, Any]:
if isinstance(input, str):
img = load_image(input)
elif isinstance(input, PIL.Image.Image):
img = input.convert('RGB')
elif isinstance(input, np.ndarray):
if len(input.shape) == 2:
img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
img = input[:, :, ::-1]
img = Image.fromarray(img.astype('uint8')).convert('RGB')
else:
raise TypeError(f'input should be either str, PIL.Image,'
f' np.array, but got {type(input)}')

normalize = transforms.Normalize(
mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
test_transforms = transforms.Compose([
transforms.Resize(256),
transforms.CenterCrop(224),
transforms.ToTensor(), normalize
])
img = test_transforms(img)
result = {'img': img}
return result

def forward(self, input: Dict[str, Any]) -> Dict[str, Any]:

def set_phase(model, is_train):
if is_train:
model.train()
else:
model.eval()

is_train = False
set_phase(self.model, is_train)
img = input['img']
input_img = torch.unsqueeze(img, 0)
outputs = self.model(input_img)
return {'outputs': outputs}

def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, Any]:
label_mapping_path = osp.join(self.local_path, 'label_mapping.txt')
with open(label_mapping_path, 'r') as f:
label_mapping = f.readlines()
score = torch.max(inputs['outputs'])
inputs = {
'scores': score.item(),
'labels': label_mapping[inputs['outputs'].argmax()].split('\t')[1]
}
return inputs

+ 52
- 42
modelscope/pipelines/cv/ocr_detection_pipeline.py View File

@@ -8,7 +8,6 @@ import cv2
import numpy as np import numpy as np
import PIL import PIL
import tensorflow as tf import tensorflow as tf
import tf_slim as slim


from modelscope.metainfo import Pipelines from modelscope.metainfo import Pipelines
from modelscope.pipelines.base import Input from modelscope.pipelines.base import Input
@@ -19,6 +18,11 @@ from ..base import Pipeline
from ..builder import PIPELINES from ..builder import PIPELINES
from .ocr_utils import model_resnet_mutex_v4_linewithchar, ops, utils from .ocr_utils import model_resnet_mutex_v4_linewithchar, ops, utils


if tf.__version__ >= '2.0':
import tf_slim as slim
else:
from tensorflow.contrib import slim

if tf.__version__ >= '2.0': if tf.__version__ >= '2.0':
tf = tf.compat.v1 tf = tf.compat.v1
tf.compat.v1.disable_eager_execution() tf.compat.v1.disable_eager_execution()
@@ -44,6 +48,7 @@ class OCRDetectionPipeline(Pipeline):


def __init__(self, model: str): def __init__(self, model: str):
super().__init__(model=model) super().__init__(model=model)
tf.reset_default_graph()
model_path = osp.join( model_path = osp.join(
osp.join(self.model, ModelFile.TF_CHECKPOINT_FOLDER), osp.join(self.model, ModelFile.TF_CHECKPOINT_FOLDER),
'checkpoint-80000') 'checkpoint-80000')
@@ -51,51 +56,56 @@ class OCRDetectionPipeline(Pipeline):
config = tf.ConfigProto(allow_soft_placement=True) config = tf.ConfigProto(allow_soft_placement=True)
config.gpu_options.allow_growth = True config.gpu_options.allow_growth = True
self._session = tf.Session(config=config) self._session = tf.Session(config=config)
global_step = tf.get_variable(
'global_step', [],
initializer=tf.constant_initializer(0),
dtype=tf.int64,
trainable=False)
variable_averages = tf.train.ExponentialMovingAverage(
0.997, global_step)
self.input_images = tf.placeholder( self.input_images = tf.placeholder(
tf.float32, shape=[1, 1024, 1024, 3], name='input_images') tf.float32, shape=[1, 1024, 1024, 3], name='input_images')
self.output = {} self.output = {}


# detector
detector = model_resnet_mutex_v4_linewithchar.SegLinkDetector()
all_maps = detector.build_model(self.input_images, is_training=False)

# decode local predictions
all_nodes, all_links, all_reg = [], [], []
for i, maps in enumerate(all_maps):
cls_maps, lnk_maps, reg_maps = maps[0], maps[1], maps[2]
reg_maps = tf.multiply(reg_maps, OFFSET_VARIANCE)

cls_prob = tf.nn.softmax(tf.reshape(cls_maps, [-1, 2]))

lnk_prob_pos = tf.nn.softmax(tf.reshape(lnk_maps, [-1, 4])[:, :2])
lnk_prob_mut = tf.nn.softmax(tf.reshape(lnk_maps, [-1, 4])[:, 2:])
lnk_prob = tf.concat([lnk_prob_pos, lnk_prob_mut], axis=1)

all_nodes.append(cls_prob)
all_links.append(lnk_prob)
all_reg.append(reg_maps)

# decode segments and links
image_size = tf.shape(self.input_images)[1:3]
segments, group_indices, segment_counts, _ = ops.decode_segments_links_python(
image_size,
all_nodes,
all_links,
all_reg,
anchor_sizes=list(detector.anchor_sizes))

# combine segments
combined_rboxes, combined_counts = ops.combine_segments_python(
segments, group_indices, segment_counts)
self.output['combined_rboxes'] = combined_rboxes
self.output['combined_counts'] = combined_counts
with tf.variable_scope('', reuse=tf.AUTO_REUSE):
global_step = tf.get_variable(
'global_step', [],
initializer=tf.constant_initializer(0),
dtype=tf.int64,
trainable=False)
variable_averages = tf.train.ExponentialMovingAverage(
0.997, global_step)

# detector
detector = model_resnet_mutex_v4_linewithchar.SegLinkDetector()
all_maps = detector.build_model(
self.input_images, is_training=False)

# decode local predictions
all_nodes, all_links, all_reg = [], [], []
for i, maps in enumerate(all_maps):
cls_maps, lnk_maps, reg_maps = maps[0], maps[1], maps[2]
reg_maps = tf.multiply(reg_maps, OFFSET_VARIANCE)

cls_prob = tf.nn.softmax(tf.reshape(cls_maps, [-1, 2]))

lnk_prob_pos = tf.nn.softmax(
tf.reshape(lnk_maps, [-1, 4])[:, :2])
lnk_prob_mut = tf.nn.softmax(
tf.reshape(lnk_maps, [-1, 4])[:, 2:])
lnk_prob = tf.concat([lnk_prob_pos, lnk_prob_mut], axis=1)

all_nodes.append(cls_prob)
all_links.append(lnk_prob)
all_reg.append(reg_maps)

# decode segments and links
image_size = tf.shape(self.input_images)[1:3]
segments, group_indices, segment_counts, _ = ops.decode_segments_links_python(
image_size,
all_nodes,
all_links,
all_reg,
anchor_sizes=list(detector.anchor_sizes))

# combine segments
combined_rboxes, combined_counts = ops.combine_segments_python(
segments, group_indices, segment_counts)
self.output['combined_rboxes'] = combined_rboxes
self.output['combined_counts'] = combined_counts


with self._session.as_default() as sess: with self._session.as_default() as sess:
logger.info(f'loading model from {model_path}') logger.info(f'loading model from {model_path}')


+ 5
- 1
modelscope/pipelines/cv/ocr_utils/model_resnet_mutex_v4_linewithchar.py View File

@@ -1,8 +1,12 @@
import tensorflow as tf import tensorflow as tf
import tf_slim as slim


from . import ops, resnet18_v1, resnet_utils from . import ops, resnet18_v1, resnet_utils


if tf.__version__ >= '2.0':
import tf_slim as slim
else:
from tensorflow.contrib import slim

if tf.__version__ >= '2.0': if tf.__version__ >= '2.0':
tf = tf.compat.v1 tf = tf.compat.v1




+ 5
- 1
modelscope/pipelines/cv/ocr_utils/resnet18_v1.py View File

@@ -30,10 +30,14 @@ ResNet-101 for semantic segmentation into 21 classes:
output_stride=16) output_stride=16)
""" """
import tensorflow as tf import tensorflow as tf
import tf_slim as slim


from . import resnet_utils from . import resnet_utils


if tf.__version__ >= '2.0':
import tf_slim as slim
else:
from tensorflow.contrib import slim

if tf.__version__ >= '2.0': if tf.__version__ >= '2.0':
tf = tf.compat.v1 tf = tf.compat.v1




+ 5
- 1
modelscope/pipelines/cv/ocr_utils/resnet_utils.py View File

@@ -19,7 +19,11 @@ implementation is more memory efficient.
import collections import collections


import tensorflow as tf import tensorflow as tf
import tf_slim as slim

if tf.__version__ >= '2.0':
import tf_slim as slim
else:
from tensorflow.contrib import slim


if tf.__version__ >= '2.0': if tf.__version__ >= '2.0':
tf = tf.compat.v1 tf = tf.compat.v1


+ 1
- 0
modelscope/pipelines/multi_modal/__init__.py View File

@@ -1,2 +1,3 @@
from .image_captioning_pipeline import ImageCaptionPipeline from .image_captioning_pipeline import ImageCaptionPipeline
from .multi_modal_embedding_pipeline import MultiModalEmbeddingPipeline from .multi_modal_embedding_pipeline import MultiModalEmbeddingPipeline
from .visual_question_answering_pipeline import VisualQuestionAnsweringPipeline

+ 65
- 0
modelscope/pipelines/multi_modal/visual_question_answering_pipeline.py View File

@@ -0,0 +1,65 @@
from typing import Any, Dict, Optional, Union

import torch

from ...metainfo import Pipelines
from ...models import Model
from ...models.multi_modal import MPlugForVisualQuestionAnswering
from ...preprocessors import MPlugVisualQuestionAnsweringPreprocessor
from ...utils.constant import Tasks
from ..base import Pipeline, Tensor
from ..builder import PIPELINES

__all__ = ['VisualQuestionAnsweringPipeline']


@PIPELINES.register_module(
Tasks.visual_question_answering,
module_name=Pipelines.visual_question_answering)
class VisualQuestionAnsweringPipeline(Pipeline):

def __init__(self,
model: Union[MPlugForVisualQuestionAnswering, str],
preprocessor: Optional[
MPlugVisualQuestionAnsweringPreprocessor] = None,
**kwargs):
"""use `model` and `preprocessor` to create a visual question answering pipeline for prediction

Args:
model (MPlugForVisualQuestionAnswering): a model instance
preprocessor (MPlugVisualQuestionAnsweringPreprocessor): a preprocessor instance
"""
model = model if isinstance(
model,
MPlugForVisualQuestionAnswering) else Model.from_pretrained(model)
if preprocessor is None:
preprocessor = MPlugVisualQuestionAnsweringPreprocessor(
model.model_dir)
model.eval()
super().__init__(model=model, preprocessor=preprocessor, **kwargs)
self.tokenizer = model.tokenizer

def forward(self, inputs: Dict[str, Any],
**forward_params) -> Dict[str, Any]:
with torch.no_grad():
return super().forward(inputs, **forward_params)

def postprocess(self, inputs: Dict[str, Tensor],
**postprocess_params) -> Dict[str, str]:
"""process the prediction results

Args:
inputs (Dict[str, Any]): _description_

Returns:
Dict[str, str]: the prediction results
"""
replace_tokens_bert = (('[unused0]', ''), ('[PAD]', ''),
('[unused1]', ''), (r' +', ' '), ('[SEP]', ''),
('[unused2]', ''), ('[CLS]', ''), ('[UNK]', ''))

pred_string = self.tokenizer.decode(inputs[0][0])
for _old, _new in replace_tokens_bert:
pred_string = pred_string.replace(_old, _new)
pred_string.strip()
return {'answer': pred_string}

+ 3
- 3
modelscope/pipelines/nlp/dialog_state_tracking_pipeline.py View File

@@ -1,7 +1,7 @@
from typing import Any, Dict from typing import Any, Dict


from ...metainfo import Pipelines from ...metainfo import Pipelines
from ...models.nlp import DialogStateTrackingModel
from ...models import SpaceForDialogStateTrackingModel
from ...preprocessors import DialogStateTrackingPreprocessor from ...preprocessors import DialogStateTrackingPreprocessor
from ...utils.constant import Tasks from ...utils.constant import Tasks
from ..base import Pipeline from ..base import Pipeline
@@ -14,7 +14,7 @@ __all__ = ['DialogStateTrackingPipeline']
Tasks.dialog_state_tracking, module_name=Pipelines.dialog_state_tracking) Tasks.dialog_state_tracking, module_name=Pipelines.dialog_state_tracking)
class DialogStateTrackingPipeline(Pipeline): class DialogStateTrackingPipeline(Pipeline):


def __init__(self, model: DialogStateTrackingModel,
def __init__(self, model: SpaceForDialogStateTrackingModel,
preprocessor: DialogStateTrackingPreprocessor, **kwargs): preprocessor: DialogStateTrackingPreprocessor, **kwargs):
"""use `model` and `preprocessor` to create a nlp text classification pipeline for prediction """use `model` and `preprocessor` to create a nlp text classification pipeline for prediction


@@ -52,7 +52,7 @@ class DialogStateTrackingPipeline(Pipeline):
_outputs[5], unique_ids, input_ids_unmasked, _outputs[5], unique_ids, input_ids_unmasked,
values, inform, prefix, ds) values, inform, prefix, ds)


return ds
return {'dialog_states': ds}




def predict_and_format(config, tokenizer, features, per_slot_class_logits, def predict_and_format(config, tokenizer, features, per_slot_class_logits,


+ 14
- 10
modelscope/pipelines/nlp/fill_mask_pipeline.py View File

@@ -1,3 +1,4 @@
import os
from typing import Any, Dict, Optional, Union from typing import Any, Dict, Optional, Union


import torch import torch
@@ -6,11 +7,13 @@ from ...metainfo import Pipelines
from ...models import Model from ...models import Model
from ...models.nlp.masked_language_model import MaskedLanguageModelBase from ...models.nlp.masked_language_model import MaskedLanguageModelBase
from ...preprocessors import FillMaskPreprocessor from ...preprocessors import FillMaskPreprocessor
from ...utils.constant import Tasks
from ...utils.config import Config
from ...utils.constant import ModelFile, Tasks
from ..base import Pipeline, Tensor from ..base import Pipeline, Tensor
from ..builder import PIPELINES from ..builder import PIPELINES


__all__ = ['FillMaskPipeline'] __all__ = ['FillMaskPipeline']
_type_map = {'veco': 'roberta', 'sbert': 'bert'}




@PIPELINES.register_module(Tasks.fill_mask, module_name=Pipelines.fill_mask) @PIPELINES.register_module(Tasks.fill_mask, module_name=Pipelines.fill_mask)
@@ -29,7 +32,6 @@ class FillMaskPipeline(Pipeline):
""" """
fill_mask_model = model if isinstance( fill_mask_model = model if isinstance(
model, MaskedLanguageModelBase) else Model.from_pretrained(model) model, MaskedLanguageModelBase) else Model.from_pretrained(model)
assert fill_mask_model.config is not None


if preprocessor is None: if preprocessor is None:
preprocessor = FillMaskPreprocessor( preprocessor = FillMaskPreprocessor(
@@ -41,11 +43,13 @@ class FillMaskPipeline(Pipeline):
model=fill_mask_model, preprocessor=preprocessor, **kwargs) model=fill_mask_model, preprocessor=preprocessor, **kwargs)


self.preprocessor = preprocessor self.preprocessor = preprocessor
self.config = Config.from_file(
os.path.join(fill_mask_model.model_dir, ModelFile.CONFIGURATION))
self.tokenizer = preprocessor.tokenizer self.tokenizer = preprocessor.tokenizer
self.mask_id = {'veco': 250001, 'sbert': 103}
self.mask_id = {'roberta': 250001, 'bert': 103}


self.rep_map = { self.rep_map = {
'sbert': {
'bert': {
'[unused0]': '', '[unused0]': '',
'[PAD]': '', '[PAD]': '',
'[unused1]': '', '[unused1]': '',
@@ -55,7 +59,7 @@ class FillMaskPipeline(Pipeline):
'[CLS]': '', '[CLS]': '',
'[UNK]': '' '[UNK]': ''
}, },
'veco': {
'roberta': {
r' +': ' ', r' +': ' ',
'<mask>': '<q>', '<mask>': '<q>',
'<pad>': '', '<pad>': '',
@@ -84,7 +88,9 @@ class FillMaskPipeline(Pipeline):
input_ids = inputs['input_ids'].detach().numpy() input_ids = inputs['input_ids'].detach().numpy()
pred_ids = np.argmax(logits, axis=-1) pred_ids = np.argmax(logits, axis=-1)
model_type = self.model.config.model_type model_type = self.model.config.model_type
rst_ids = np.where(input_ids == self.mask_id[model_type], pred_ids,
process_type = model_type if model_type in self.mask_id else _type_map[
model_type]
rst_ids = np.where(input_ids == self.mask_id[process_type], pred_ids,
input_ids) input_ids)


def rep_tokens(string, rep_map): def rep_tokens(string, rep_map):
@@ -94,14 +100,12 @@ class FillMaskPipeline(Pipeline):


pred_strings = [] pred_strings = []
for ids in rst_ids: # batch for ids in rst_ids: # batch
# TODO vocab size is not stable

if self.model.config.vocab_size == 21128: # zh bert
if 'language' in self.config.model and self.config.model.language == 'zh':
pred_string = self.tokenizer.convert_ids_to_tokens(ids) pred_string = self.tokenizer.convert_ids_to_tokens(ids)
pred_string = ''.join(pred_string) pred_string = ''.join(pred_string)
else: else:
pred_string = self.tokenizer.decode(ids) pred_string = self.tokenizer.decode(ids)
pred_string = rep_tokens(pred_string, self.rep_map[model_type])
pred_string = rep_tokens(pred_string, self.rep_map[process_type])
pred_strings.append(pred_string) pred_strings.append(pred_string)


return {'text': pred_strings} return {'text': pred_strings}

+ 39
- 1
modelscope/pipelines/outputs.py View File

@@ -153,5 +153,43 @@ TASK_OUTPUTS = {
# { # {
# "image": np.ndarray with shape [height, width, 3] # "image": np.ndarray with shape [height, width, 3]
# } # }
Tasks.text_to_image_synthesis: ['image']
Tasks.text_to_image_synthesis: ['image'],
Tasks.dialog_modeling: [],
Tasks.dialog_intent_prediction: [],

# {
# "dialog_states": {
# "taxi-leaveAt": "none",
# "taxi-destination": "none",
# "taxi-departure": "none",
# "taxi-arriveBy": "none",
# "restaurant-book_people": "none",
# "restaurant-book_day": "none",
# "restaurant-book_time": "none",
# "restaurant-food": "none",
# "restaurant-pricerange": "none",
# "restaurant-name": "none",
# "restaurant-area": "none",
# "hotel-book_people": "none",
# "hotel-book_day": "none",
# "hotel-book_stay": "none",
# "hotel-name": "none",
# "hotel-area": "none",
# "hotel-parking": "none",
# "hotel-pricerange": "cheap",
# "hotel-stars": "none",
# "hotel-internet": "none",
# "hotel-type": "true",
# "attraction-type": "none",
# "attraction-name": "none",
# "attraction-area": "none",
# "train-book_people": "none",
# "train-leaveAt": "none",
# "train-destination": "none",
# "train-day": "none",
# "train-arriveBy": "none",
# "train-departure": "none"
# }
# }
Tasks.dialog_state_tracking: ['dialog_states']
} }

+ 1
- 1
modelscope/preprocessors/__init__.py View File

@@ -6,7 +6,7 @@ from .base import Preprocessor
from .common import Compose from .common import Compose
from .image import LoadImage, load_image from .image import LoadImage, load_image
from .kws import WavToLists from .kws import WavToLists
from .multi_modal import OfaImageCaptionPreprocessor
from .multi_modal import * # noqa F403
from .nlp import * # noqa F403 from .nlp import * # noqa F403
from .space.dialog_intent_prediction_preprocessor import * # noqa F403 from .space.dialog_intent_prediction_preprocessor import * # noqa F403
from .space.dialog_modeling_preprocessor import * # noqa F403 from .space.dialog_modeling_preprocessor import * # noqa F403


+ 45
- 0
modelscope/preprocessors/multi_modal.py View File

@@ -16,6 +16,7 @@ from .image import load_image


__all__ = [ __all__ = [
'OfaImageCaptionPreprocessor', 'OfaImageCaptionPreprocessor',
'MPlugVisualQuestionAnsweringPreprocessor',
] ]




@@ -110,3 +111,47 @@ class OfaImageCaptionPreprocessor(Preprocessor):
} }
} }
return sample return sample


@PREPROCESSORS.register_module(
Fields.multi_modal,
module_name=Preprocessors.mplug_visual_question_answering)
class MPlugVisualQuestionAnsweringPreprocessor(Preprocessor):

def __init__(self, model_dir: str, *args, **kwargs):
"""preprocess the data via 'bert-base-uncased' tokenizer and configuration

"""
super().__init__(*args, **kwargs)

# tokenizer
from transformers import AutoTokenizer
self.tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')

# load configuration
from sofa.models.mplug import CONFIG_NAME, MPlugConfig
config = MPlugConfig.from_yaml_file(osp.join(model_dir, CONFIG_NAME))

# Initialize transform
from torchvision import transforms
mean = (0.48145466, 0.4578275, 0.40821073)
std = (0.26862954, 0.26130258, 0.27577711)

self.patch_resize_transform = transforms.Compose([
transforms.Resize((config.image_res, config.image_res),
interpolation=Image.BICUBIC),
transforms.ToTensor(),
transforms.Normalize(mean=mean, std=std),
])

def __call__(self, data: Dict[str, Any]) -> Dict[str, Any]:
image, question = data['image'], data['question']
image = Image.open(image).convert('RGB') if isinstance(image,
str) else image
image = self.patch_resize_transform(image)
image = torch.stack([image], dim=0)
question = self.tokenizer([question.lower()],
padding='longest',
return_tensors='pt')

return {'image': image, 'question': question, 'train': False}

+ 7
- 4
modelscope/preprocessors/nlp.py View File

@@ -326,14 +326,17 @@ class FillMaskPreprocessor(Preprocessor):
model_dir (str): model path model_dir (str): model path
""" """
super().__init__(*args, **kwargs) super().__init__(*args, **kwargs)
from sofa.utils.backend import AutoTokenizer
self.model_dir = model_dir self.model_dir = model_dir
self.first_sequence: str = kwargs.pop('first_sequence', self.first_sequence: str = kwargs.pop('first_sequence',
'first_sequence') 'first_sequence')
self.sequence_length = kwargs.pop('sequence_length', 128) self.sequence_length = kwargs.pop('sequence_length', 128)

self.tokenizer = AutoTokenizer.from_pretrained(
model_dir, use_fast=False)
try:
from transformers import AutoTokenizer
self.tokenizer = AutoTokenizer.from_pretrained(model_dir)
except KeyError:
from sofa.utils.backend import AutoTokenizer
self.tokenizer = AutoTokenizer.from_pretrained(
model_dir, use_fast=False)


@type_assert(object, str) @type_assert(object, str)
def __call__(self, data: str) -> Dict[str, Any]: def __call__(self, data: str) -> Dict[str, Any]:


+ 11
- 1
modelscope/utils/constant.py View File

@@ -1,4 +1,5 @@
# Copyright (c) Alibaba, Inc. and its affiliates. # Copyright (c) Alibaba, Inc. and its affiliates.
import enum




class Fields(object): class Fields(object):
@@ -52,6 +53,7 @@ class Tasks(object):
fill_mask = 'fill-mask' fill_mask = 'fill-mask'
summarization = 'summarization' summarization = 'summarization'
question_answering = 'question-answering' question_answering = 'question-answering'
zero_shot_classification = 'zero-shot-classification'


# audio tasks # audio tasks
auto_speech_recognition = 'auto-speech-recognition' auto_speech_recognition = 'auto-speech-recognition'
@@ -64,6 +66,7 @@ class Tasks(object):
visual_grounding = 'visual-grounding' visual_grounding = 'visual-grounding'
text_to_image_synthesis = 'text-to-image-synthesis' text_to_image_synthesis = 'text-to-image-synthesis'
multi_modal_embedding = 'multi-modal-embedding' multi_modal_embedding = 'multi-modal-embedding'
visual_question_answering = 'visual-question-answering'




class InputFields(object): class InputFields(object):
@@ -74,13 +77,20 @@ class InputFields(object):
audio = 'audio' audio = 'audio'




class Hubs(object):
class Hubs(enum.Enum):
""" Source from which an entity (such as a Dataset or Model) is stored """ Source from which an entity (such as a Dataset or Model) is stored
""" """
modelscope = 'modelscope' modelscope = 'modelscope'
huggingface = 'huggingface' huggingface = 'huggingface'




class DownloadMode(enum.Enum):
""" How to treat existing datasets
"""
REUSE_DATASET_IF_EXISTS = 'reuse_dataset_if_exists'
FORCE_REDOWNLOAD = 'force_redownload'


class ModelFile(object): class ModelFile(object):
CONFIGURATION = 'configuration.json' CONFIGURATION = 'configuration.json'
README = 'README.md' README = 'README.md'


+ 3
- 2
modelscope/utils/hub.py View File

@@ -31,9 +31,10 @@ def create_model_if_not_exist(
else: else:
api.create_model( api.create_model(
model_id=model_id, model_id=model_id,
chinese_name=chinese_name,
visibility=visibility, visibility=visibility,
license=license)
license=license,
chinese_name=chinese_name,
)
print(f'model {model_id} successfully created.') print(f'model {model_id} successfully created.')
return True return True




+ 1
- 1
modelscope/version.py View File

@@ -1 +1 @@
__version__ = '0.1.1'
__version__ = '0.2.1'

+ 1
- 0
requirements/audio.txt View File

@@ -16,6 +16,7 @@ protobuf>3,<=3.20
ptflops ptflops
PyWavelets>=1.0.0 PyWavelets>=1.0.0
scikit-learn scikit-learn
SoundFile>0.10
sox sox
tensorboard tensorboard
tensorflow==1.15.* tensorflow==1.15.*


+ 2
- 4
requirements/nlp.txt View File

@@ -1,5 +1,3 @@
# https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-2.3.1/en_core_web_sm-2.3.1.tar.gz
http://ait-public.oss-cn-hangzhou-zmf.aliyuncs.com/jizhu/en_core_web_sm-2.3.1.tar.gz
https://alinlp.alibaba-inc.com/pypi/sofa-1.0.3-py3-none-any.whl
https://alinlp.alibaba-inc.com/pypi/sofa-1.0.5-py3-none-any.whl
https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-2.3.1/en_core_web_sm-2.3.1.tar.gz
spacy>=2.3.5 spacy>=2.3.5
# python -m spacy download en_core_web_sm

+ 35
- 7
tests/hub/test_hub_operation.py View File

@@ -3,6 +3,7 @@ import os
import tempfile import tempfile
import unittest import unittest
import uuid import uuid
from shutil import rmtree


from modelscope.hub.api import HubApi, ModelScopeConfig from modelscope.hub.api import HubApi, ModelScopeConfig
from modelscope.hub.constants import Licenses, ModelVisibility from modelscope.hub.constants import Licenses, ModelVisibility
@@ -23,7 +24,6 @@ download_model_file_name = 'test.bin'
class HubOperationTest(unittest.TestCase): class HubOperationTest(unittest.TestCase):


def setUp(self): def setUp(self):
self.old_cwd = os.getcwd()
self.api = HubApi() self.api = HubApi()
# note this is temporary before official account management is ready # note this is temporary before official account management is ready
self.api.login(USER_NAME, PASSWORD) self.api.login(USER_NAME, PASSWORD)
@@ -31,19 +31,18 @@ class HubOperationTest(unittest.TestCase):
self.model_id = '%s/%s' % (model_org, self.model_name) self.model_id = '%s/%s' % (model_org, self.model_name)
self.api.create_model( self.api.create_model(
model_id=self.model_id, model_id=self.model_id,
chinese_name=model_chinese_name,
visibility=ModelVisibility.PUBLIC, visibility=ModelVisibility.PUBLIC,
license=Licenses.APACHE_V2)
license=Licenses.APACHE_V2,
chinese_name=model_chinese_name,
)
temporary_dir = tempfile.mkdtemp() temporary_dir = tempfile.mkdtemp()
self.model_dir = os.path.join(temporary_dir, self.model_name) self.model_dir = os.path.join(temporary_dir, self.model_name)
repo = Repository(self.model_dir, clone_from=self.model_id) repo = Repository(self.model_dir, clone_from=self.model_id)
os.chdir(self.model_dir)
os.system("echo 'testtest'>%s" os.system("echo 'testtest'>%s"
% os.path.join(self.model_dir, 'test.bin'))
repo.push('add model', all_files=True)
% os.path.join(self.model_dir, download_model_file_name))
repo.push('add model')


def tearDown(self): def tearDown(self):
os.chdir(self.old_cwd)
self.api.delete_model(model_id=self.model_id) self.api.delete_model(model_id=self.model_id)


def test_model_repo_creation(self): def test_model_repo_creation(self):
@@ -79,6 +78,35 @@ class HubOperationTest(unittest.TestCase):
mdtime2 = os.path.getmtime(downloaded_file_path) mdtime2 = os.path.getmtime(downloaded_file_path)
assert mdtime1 == mdtime2 assert mdtime1 == mdtime2


def test_download_public_without_login(self):
rmtree(ModelScopeConfig.path_credential)
snapshot_path = snapshot_download(model_id=self.model_id)
downloaded_file_path = os.path.join(snapshot_path,
download_model_file_name)
assert os.path.exists(downloaded_file_path)
temporary_dir = tempfile.mkdtemp()
downloaded_file = model_file_download(
model_id=self.model_id,
file_path=download_model_file_name,
cache_dir=temporary_dir)
assert os.path.exists(downloaded_file)
self.api.login(USER_NAME, PASSWORD)

def test_snapshot_delete_download_cache_file(self):
snapshot_path = snapshot_download(model_id=self.model_id)
downloaded_file_path = os.path.join(snapshot_path,
download_model_file_name)
assert os.path.exists(downloaded_file_path)
os.remove(downloaded_file_path)
# download again in cache
file_download_path = model_file_download(
model_id=self.model_id, file_path='README.md')
assert os.path.exists(file_download_path)
# deleted file need download again
file_download_path = model_file_download(
model_id=self.model_id, file_path=download_model_file_name)
assert os.path.exists(file_download_path)



if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()

+ 85
- 0
tests/hub/test_hub_private_files.py View File

@@ -0,0 +1,85 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
import os
import tempfile
import unittest
import uuid

from requests.exceptions import HTTPError

from modelscope.hub.api import HubApi
from modelscope.hub.constants import Licenses, ModelVisibility
from modelscope.hub.errors import GitError
from modelscope.hub.file_download import model_file_download
from modelscope.hub.repository import Repository
from modelscope.hub.snapshot_download import snapshot_download
from modelscope.utils.constant import ModelFile

USER_NAME = 'maasadmin'
PASSWORD = '12345678'
USER_NAME2 = 'sdkdev'

model_chinese_name = '达摩卡通化模型'
model_org = 'unittest'


class HubPrivateFileDownloadTest(unittest.TestCase):

def setUp(self):
self.old_cwd = os.getcwd()
self.api = HubApi()
# note this is temporary before official account management is ready
self.token, _ = self.api.login(USER_NAME, PASSWORD)
self.model_name = uuid.uuid4().hex
self.model_id = '%s/%s' % (model_org, self.model_name)
self.api.create_model(
model_id=self.model_id,
visibility=ModelVisibility.PRIVATE, # 1-private, 5-public
license=Licenses.APACHE_V2,
chinese_name=model_chinese_name,
)

def tearDown(self):
os.chdir(self.old_cwd)
self.api.delete_model(model_id=self.model_id)

def test_snapshot_download_private_model(self):
snapshot_path = snapshot_download(self.model_id)
assert os.path.exists(os.path.join(snapshot_path, ModelFile.README))

def test_snapshot_download_private_model_no_permission(self):
self.token, _ = self.api.login(USER_NAME2, PASSWORD)
with self.assertRaises(HTTPError):
snapshot_download(self.model_id)
self.api.login(USER_NAME, PASSWORD)

def test_download_file_private_model(self):
file_path = model_file_download(self.model_id, ModelFile.README)
assert os.path.exists(file_path)

def test_download_file_private_model_no_permission(self):
self.token, _ = self.api.login(USER_NAME2, PASSWORD)
with self.assertRaises(HTTPError):
model_file_download(self.model_id, ModelFile.README)
self.api.login(USER_NAME, PASSWORD)

def test_snapshot_download_local_only(self):
with self.assertRaises(ValueError):
snapshot_download(self.model_id, local_files_only=True)
snapshot_path = snapshot_download(self.model_id)
assert os.path.exists(os.path.join(snapshot_path, ModelFile.README))
snapshot_path = snapshot_download(self.model_id, local_files_only=True)
assert os.path.exists(snapshot_path)

def test_file_download_local_only(self):
with self.assertRaises(ValueError):
model_file_download(
self.model_id, ModelFile.README, local_files_only=True)
file_path = model_file_download(self.model_id, ModelFile.README)
assert os.path.exists(file_path)
file_path = model_file_download(
self.model_id, ModelFile.README, local_files_only=True)
assert os.path.exists(file_path)


if __name__ == '__main__':
unittest.main()

+ 4
- 5
tests/hub/test_hub_private_repository.py View File

@@ -5,6 +5,7 @@ import unittest
import uuid import uuid


from modelscope.hub.api import HubApi from modelscope.hub.api import HubApi
from modelscope.hub.constants import Licenses, ModelVisibility
from modelscope.hub.errors import GitError from modelscope.hub.errors import GitError
from modelscope.hub.repository import Repository from modelscope.hub.repository import Repository


@@ -16,9 +17,6 @@ model_chinese_name = '达摩卡通化模型'
model_org = 'unittest' model_org = 'unittest'
DEFAULT_GIT_PATH = 'git' DEFAULT_GIT_PATH = 'git'


sample_model_url = 'https://mindscope.oss-cn-hangzhou.aliyuncs.com/test_models/mnist-12.onnx'
download_model_file_name = 'mnist-12.onnx'



class HubPrivateRepositoryTest(unittest.TestCase): class HubPrivateRepositoryTest(unittest.TestCase):


@@ -31,9 +29,10 @@ class HubPrivateRepositoryTest(unittest.TestCase):
self.model_id = '%s/%s' % (model_org, self.model_name) self.model_id = '%s/%s' % (model_org, self.model_name)
self.api.create_model( self.api.create_model(
model_id=self.model_id, model_id=self.model_id,
visibility=ModelVisibility.PRIVATE, # 1-private, 5-public
license=Licenses.APACHE_V2,
chinese_name=model_chinese_name, chinese_name=model_chinese_name,
visibility=1, # 1-private, 5-public
license='apache-2.0')
)


def tearDown(self): def tearDown(self):
self.api.login(USER_NAME, PASSWORD) self.api.login(USER_NAME, PASSWORD)


+ 5
- 19
tests/hub/test_hub_repository.py View File

@@ -2,7 +2,6 @@
import os import os
import shutil import shutil
import tempfile import tempfile
import time
import unittest import unittest
import uuid import uuid
from os.path import expanduser from os.path import expanduser
@@ -10,6 +9,7 @@ from os.path import expanduser
from requests import delete from requests import delete


from modelscope.hub.api import HubApi from modelscope.hub.api import HubApi
from modelscope.hub.constants import Licenses, ModelVisibility
from modelscope.hub.errors import NotExistError from modelscope.hub.errors import NotExistError
from modelscope.hub.file_download import model_file_download from modelscope.hub.file_download import model_file_download
from modelscope.hub.repository import Repository from modelscope.hub.repository import Repository
@@ -55,9 +55,10 @@ class HubRepositoryTest(unittest.TestCase):
self.model_id = '%s/%s' % (model_org, self.model_name) self.model_id = '%s/%s' % (model_org, self.model_name)
self.api.create_model( self.api.create_model(
model_id=self.model_id, model_id=self.model_id,
visibility=ModelVisibility.PUBLIC, # 1-private, 5-public
license=Licenses.APACHE_V2,
chinese_name=model_chinese_name, chinese_name=model_chinese_name,
visibility=5, # 1-private, 5-public
license='apache-2.0')
)
temporary_dir = tempfile.mkdtemp() temporary_dir = tempfile.mkdtemp()
self.model_dir = os.path.join(temporary_dir, self.model_name) self.model_dir = os.path.join(temporary_dir, self.model_name)


@@ -81,27 +82,12 @@ class HubRepositoryTest(unittest.TestCase):
os.chdir(self.model_dir) os.chdir(self.model_dir)
os.system("echo '111'>%s" % os.path.join(self.model_dir, 'add1.py')) os.system("echo '111'>%s" % os.path.join(self.model_dir, 'add1.py'))
os.system("echo '222'>%s" % os.path.join(self.model_dir, 'add2.py')) os.system("echo '222'>%s" % os.path.join(self.model_dir, 'add2.py'))
repo.push('test', all_files=True)
repo.push('test')
add1 = model_file_download(self.model_id, 'add1.py') add1 = model_file_download(self.model_id, 'add1.py')
assert os.path.exists(add1) assert os.path.exists(add1)
add2 = model_file_download(self.model_id, 'add2.py') add2 = model_file_download(self.model_id, 'add2.py')
assert os.path.exists(add2) assert os.path.exists(add2)


def test_push_files(self):
repo = Repository(self.model_dir, clone_from=self.model_id)
assert os.path.exists(os.path.join(self.model_dir, 'README.md'))
os.system("echo '111'>%s" % os.path.join(self.model_dir, 'add1.py'))
os.system("echo '222'>%s" % os.path.join(self.model_dir, 'add2.py'))
os.system("echo '333'>%s" % os.path.join(self.model_dir, 'add3.py'))
repo.push('test', files=['add1.py', 'add2.py'], all_files=False)
add1 = model_file_download(self.model_id, 'add1.py')
assert os.path.exists(add1)
add2 = model_file_download(self.model_id, 'add2.py')
assert os.path.exists(add2)
with self.assertRaises(NotExistError) as cm:
model_file_download(self.model_id, 'add3.py')
print(cm.exception)



if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()

+ 14
- 10
tests/msdatasets/test_ms_dataset.py View File

@@ -32,11 +32,12 @@ class ImgPreprocessor(Preprocessor):


class MsDatasetTest(unittest.TestCase): class MsDatasetTest(unittest.TestCase):


@unittest.skipUnless(test_level() >= 2, 'skip test in current test level')
@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
def test_ds_basic(self): def test_ds_basic(self):
ms_ds_full = MsDataset.load('squad')
ms_ds_full = MsDataset.load('squad', namespace='damotest')
ms_ds_full_hf = hfdata.load_dataset('squad') ms_ds_full_hf = hfdata.load_dataset('squad')
ms_ds_train = MsDataset.load('squad', split='train')
ms_ds_train = MsDataset.load(
'squad', namespace='damotest', split='train')
ms_ds_train_hf = hfdata.load_dataset('squad', split='train') ms_ds_train_hf = hfdata.load_dataset('squad', split='train')
ms_image_train = MsDataset.from_hf_dataset( ms_image_train = MsDataset.from_hf_dataset(
hfdata.load_dataset('beans', split='train')) hfdata.load_dataset('beans', split='train'))
@@ -48,7 +49,7 @@ class MsDatasetTest(unittest.TestCase):
print(next(iter(ms_ds_train))) print(next(iter(ms_ds_train)))
print(next(iter(ms_image_train))) print(next(iter(ms_image_train)))


@unittest.skipUnless(test_level() >= 2, 'skip test in current test level')
@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
@require_torch @require_torch
def test_to_torch_dataset_text(self): def test_to_torch_dataset_text(self):
model_id = 'damo/bert-base-sst2' model_id = 'damo/bert-base-sst2'
@@ -57,13 +58,14 @@ class MsDatasetTest(unittest.TestCase):
nlp_model.model_dir, nlp_model.model_dir,
first_sequence='context', first_sequence='context',
second_sequence=None) second_sequence=None)
ms_ds_train = MsDataset.load('squad', split='train')
ms_ds_train = MsDataset.load(
'squad', namespace='damotest', split='train')
pt_dataset = ms_ds_train.to_torch_dataset(preprocessors=preprocessor) pt_dataset = ms_ds_train.to_torch_dataset(preprocessors=preprocessor)
import torch import torch
dataloader = torch.utils.data.DataLoader(pt_dataset, batch_size=5) dataloader = torch.utils.data.DataLoader(pt_dataset, batch_size=5)
print(next(iter(dataloader))) print(next(iter(dataloader)))


@unittest.skipUnless(test_level() >= 2, 'skip test in current test level')
@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
@require_tf @require_tf
def test_to_tf_dataset_text(self): def test_to_tf_dataset_text(self):
import tensorflow as tf import tensorflow as tf
@@ -74,7 +76,8 @@ class MsDatasetTest(unittest.TestCase):
nlp_model.model_dir, nlp_model.model_dir,
first_sequence='context', first_sequence='context',
second_sequence=None) second_sequence=None)
ms_ds_train = MsDataset.load('squad', split='train')
ms_ds_train = MsDataset.load(
'squad', namespace='damotest', split='train')
tf_dataset = ms_ds_train.to_tf_dataset( tf_dataset = ms_ds_train.to_tf_dataset(
batch_size=5, batch_size=5,
shuffle=True, shuffle=True,
@@ -85,8 +88,8 @@ class MsDatasetTest(unittest.TestCase):
@unittest.skipUnless(test_level() >= 2, 'skip test in current test level') @unittest.skipUnless(test_level() >= 2, 'skip test in current test level')
@require_torch @require_torch
def test_to_torch_dataset_img(self): def test_to_torch_dataset_img(self):
ms_image_train = MsDataset.from_hf_dataset(
hfdata.load_dataset('beans', split='train'))
ms_image_train = MsDataset.load(
'beans', namespace='damotest', split='train')
pt_dataset = ms_image_train.to_torch_dataset( pt_dataset = ms_image_train.to_torch_dataset(
preprocessors=ImgPreprocessor( preprocessors=ImgPreprocessor(
image_path='image_file_path', label='labels')) image_path='image_file_path', label='labels'))
@@ -99,7 +102,8 @@ class MsDatasetTest(unittest.TestCase):
def test_to_tf_dataset_img(self): def test_to_tf_dataset_img(self):
import tensorflow as tf import tensorflow as tf
tf.compat.v1.enable_eager_execution() tf.compat.v1.enable_eager_execution()
ms_image_train = MsDataset.load('beans', split='train')
ms_image_train = MsDataset.load(
'beans', namespace='damotest', split='train')
tf_dataset = ms_image_train.to_tf_dataset( tf_dataset = ms_image_train.to_tf_dataset(
batch_size=5, batch_size=5,
shuffle=True, shuffle=True,


+ 6
- 6
tests/pipelines/nlp/test_dialog_state_tracking.py View File

@@ -5,8 +5,7 @@ import tempfile
import unittest import unittest


from modelscope.hub.snapshot_download import snapshot_download from modelscope.hub.snapshot_download import snapshot_download
from modelscope.models import Model
from modelscope.models.nlp import DialogStateTrackingModel
from modelscope.models import Model, SpaceForDialogStateTrackingModel
from modelscope.pipelines import DialogStateTrackingPipeline, pipeline from modelscope.pipelines import DialogStateTrackingPipeline, pipeline
from modelscope.preprocessors import DialogStateTrackingPreprocessor from modelscope.preprocessors import DialogStateTrackingPreprocessor
from modelscope.utils.constant import Tasks from modelscope.utils.constant import Tasks
@@ -41,7 +40,7 @@ class DialogStateTrackingTest(unittest.TestCase):
cache_path = '/Users/yangliu/Space/maas_model/nlp_space_dialog-state-tracking' cache_path = '/Users/yangliu/Space/maas_model/nlp_space_dialog-state-tracking'
# cache_path = snapshot_download(self.model_id) # cache_path = snapshot_download(self.model_id)


model = DialogStateTrackingModel(cache_path)
model = SpaceForDialogStateTrackingModel(cache_path)
preprocessor = DialogStateTrackingPreprocessor(model_dir=cache_path) preprocessor = DialogStateTrackingPreprocessor(model_dir=cache_path)
pipelines = [ pipelines = [
DialogStateTrackingPipeline( DialogStateTrackingPipeline(
@@ -55,17 +54,18 @@ class DialogStateTrackingTest(unittest.TestCase):
history_states = [{}] history_states = [{}]
utter = {} utter = {}
pipelines_len = len(pipelines) pipelines_len = len(pipelines)
import json
for step, item in enumerate(self.test_case): for step, item in enumerate(self.test_case):
utter.update(item) utter.update(item)
ds = pipelines[step % pipelines_len]({
result = pipelines[step % pipelines_len]({
'utter': 'utter':
utter, utter,
'history_states': 'history_states':
history_states history_states
}) })
print(ds)
print(json.dumps(result))


history_states.extend([ds, {}])
history_states.extend([result['dialog_states'], {}])


@unittest.skip('test with snapshot_download') @unittest.skip('test with snapshot_download')
def test_run_with_model_from_modelhub(self): def test_run_with_model_from_modelhub(self):


+ 20
- 0
tests/pipelines/test_animal_recognation.py View File

@@ -0,0 +1,20 @@
import unittest

from modelscope.pipelines import pipeline
from modelscope.utils.constant import Tasks
from modelscope.utils.test_utils import test_level


class MultiModalFeatureTest(unittest.TestCase):

@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
def test_run(self):
animal_recog = pipeline(
Tasks.image_classification,
model='damo/cv_resnest101_animal_recognation')
result = animal_recog('data/test/images/image1.jpg')
print(result)


if __name__ == '__main__':
unittest.main()

+ 35
- 1
tests/pipelines/test_fill_mask.py View File

@@ -3,7 +3,8 @@ import unittest


from modelscope.hub.snapshot_download import snapshot_download from modelscope.hub.snapshot_download import snapshot_download
from modelscope.models import Model from modelscope.models import Model
from modelscope.models.nlp import StructBertForMaskedLM, VecoForMaskedLM
from modelscope.models.nlp import (BertForMaskedLM, StructBertForMaskedLM,
VecoForMaskedLM)
from modelscope.pipelines import FillMaskPipeline, pipeline from modelscope.pipelines import FillMaskPipeline, pipeline
from modelscope.preprocessors import FillMaskPreprocessor from modelscope.preprocessors import FillMaskPreprocessor
from modelscope.utils.constant import Tasks from modelscope.utils.constant import Tasks
@@ -16,6 +17,7 @@ class FillMaskTest(unittest.TestCase):
'en': 'damo/nlp_structbert_fill-mask_english-large' 'en': 'damo/nlp_structbert_fill-mask_english-large'
} }
model_id_veco = 'damo/nlp_veco_fill-mask-large' model_id_veco = 'damo/nlp_veco_fill-mask-large'
model_id_bert = 'damo/nlp_bert_fill-mask_chinese-base'


ori_texts = { ori_texts = {
'zh': 'zh':
@@ -69,6 +71,20 @@ class FillMaskTest(unittest.TestCase):
f'{pipeline1(test_input)}\npipeline2: {pipeline2(test_input)}\n' f'{pipeline1(test_input)}\npipeline2: {pipeline2(test_input)}\n'
) )


# zh bert
language = 'zh'
model_dir = snapshot_download(self.model_id_bert)
preprocessor = FillMaskPreprocessor(
model_dir, first_sequence='sentence', second_sequence=None)
model = BertForMaskedLM(model_dir)
pipeline1 = FillMaskPipeline(model, preprocessor)
pipeline2 = pipeline(
Tasks.fill_mask, model=model, preprocessor=preprocessor)
ori_text = self.ori_texts[language]
test_input = self.test_inputs[language]
print(f'\nori_text: {ori_text}\ninput: {test_input}\npipeline1: '
f'{pipeline1(test_input)}\npipeline2: {pipeline2(test_input)}\n')

@unittest.skipUnless(test_level() >= 0, 'skip test in current test level') @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
def test_run_with_model_from_modelhub(self): def test_run_with_model_from_modelhub(self):
# sbert # sbert
@@ -97,6 +113,18 @@ class FillMaskTest(unittest.TestCase):
print(f'\nori_text: {ori_text}\ninput: {test_input}\npipeline: ' print(f'\nori_text: {ori_text}\ninput: {test_input}\npipeline: '
f'{pipeline_ins(test_input)}\n') f'{pipeline_ins(test_input)}\n')


# zh bert
model = Model.from_pretrained(self.model_id_bert)
preprocessor = FillMaskPreprocessor(
model.model_dir, first_sequence='sentence', second_sequence=None)
pipeline_ins = pipeline(
Tasks.fill_mask, model=model, preprocessor=preprocessor)
language = 'zh'
ori_text = self.ori_texts[language]
test_input = self.test_inputs[language]
print(f'\nori_text: {ori_text}\ninput: {test_input}\npipeline: '
f'{pipeline_ins(test_input)}\n')

@unittest.skipUnless(test_level() >= 0, 'skip test in current test level') @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
def test_run_with_model_name(self): def test_run_with_model_name(self):
# veco # veco
@@ -115,6 +143,12 @@ class FillMaskTest(unittest.TestCase):
f'\nori_text: {self.ori_texts[language]}\ninput: {self.test_inputs[language]}\npipeline: ' f'\nori_text: {self.ori_texts[language]}\ninput: {self.test_inputs[language]}\npipeline: '
f'{pipeline_ins(self.test_inputs[language])}\n') f'{pipeline_ins(self.test_inputs[language])}\n')


# bert
pipeline_ins = pipeline(task=Tasks.fill_mask, model=self.model_id_bert)
print(
f'\nori_text: {self.ori_texts[language]}\ninput: {self.test_inputs[language]}\npipeline: '
f'{pipeline_ins(self.test_inputs[language])}\n')

@unittest.skipUnless(test_level() >= 2, 'skip test in current test level') @unittest.skipUnless(test_level() >= 2, 'skip test in current test level')
def test_run_with_default_model(self): def test_run_with_default_model(self):
pipeline_ins = pipeline(task=Tasks.fill_mask) pipeline_ins = pipeline(task=Tasks.fill_mask)


+ 2
- 1
tests/pipelines/test_image_matting.py View File

@@ -62,7 +62,8 @@ class ImageMattingTest(unittest.TestCase):


@unittest.skipUnless(test_level() >= 2, 'skip test in current test level') @unittest.skipUnless(test_level() >= 2, 'skip test in current test level')
def test_run_with_modelscope_dataset(self): def test_run_with_modelscope_dataset(self):
dataset = MsDataset.load('beans', split='train', target='image')
dataset = MsDataset.load(
'beans', namespace='damotest', split='train', target='image')
img_matting = pipeline(Tasks.image_matting, model=self.model_id) img_matting = pipeline(Tasks.image_matting, model=self.model_id)
result = img_matting(dataset) result = img_matting(dataset)
for i in range(10): for i in range(10):


+ 5
- 0
tests/pipelines/test_ocr_detection.py View File

@@ -27,6 +27,11 @@ class OCRDetectionTest(unittest.TestCase):
print('ocr detection results: ') print('ocr detection results: ')
print(result) print(result)


@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
def test_run_with_model_from_modelhub(self):
ocr_detection = pipeline(Tasks.ocr_detection, model=self.model_id)
self.pipeline_inference(ocr_detection, self.test_image)

@unittest.skipUnless(test_level() >= 2, 'skip test in current test level') @unittest.skipUnless(test_level() >= 2, 'skip test in current test level')
def test_run_modelhub_default_model(self): def test_run_modelhub_default_model(self):
ocr_detection = pipeline(Tasks.ocr_detection) ocr_detection = pipeline(Tasks.ocr_detection)


+ 26
- 6
tests/pipelines/test_speech_signal_process.py View File

@@ -17,6 +17,9 @@ AEC_LIB_URL = 'http://isv-data.oss-cn-hangzhou.aliyuncs.com/ics%2FMaaS%2FAEC%2Fl
'?Expires=1664085465&OSSAccessKeyId=LTAIxjQyZNde90zh&Signature=Y7gelmGEsQAJRK4yyHSYMrdWizk%3D' '?Expires=1664085465&OSSAccessKeyId=LTAIxjQyZNde90zh&Signature=Y7gelmGEsQAJRK4yyHSYMrdWizk%3D'
AEC_LIB_FILE = 'libmitaec_pyio.so' AEC_LIB_FILE = 'libmitaec_pyio.so'


NOISE_SPEECH_URL = 'https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ANS/sample_audio/speech_with_noise.wav'
NOISE_SPEECH_FILE = 'speech_with_noise.wav'



def download(remote_path, local_path): def download(remote_path, local_path):
local_dir = os.path.dirname(local_path) local_dir = os.path.dirname(local_path)
@@ -30,23 +33,40 @@ def download(remote_path, local_path):
class SpeechSignalProcessTest(unittest.TestCase): class SpeechSignalProcessTest(unittest.TestCase):


def setUp(self) -> None: def setUp(self) -> None:
self.model_id = 'damo/speech_dfsmn_aec_psm_16k'
pass

@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
def test_aec(self):
# A temporary hack to provide c++ lib. Download it first. # A temporary hack to provide c++ lib. Download it first.
download(AEC_LIB_URL, AEC_LIB_FILE) download(AEC_LIB_URL, AEC_LIB_FILE)

@unittest.skipUnless(test_level() >= 2, 'skip test in current test level')
def test_run(self):
# Download audio files
download(NEAREND_MIC_URL, NEAREND_MIC_FILE) download(NEAREND_MIC_URL, NEAREND_MIC_FILE)
download(FAREND_SPEECH_URL, FAREND_SPEECH_FILE) download(FAREND_SPEECH_URL, FAREND_SPEECH_FILE)
model_id = 'damo/speech_dfsmn_aec_psm_16k'
input = { input = {
'nearend_mic': NEAREND_MIC_FILE, 'nearend_mic': NEAREND_MIC_FILE,
'farend_speech': FAREND_SPEECH_FILE 'farend_speech': FAREND_SPEECH_FILE
} }
aec = pipeline( aec = pipeline(
Tasks.speech_signal_process, Tasks.speech_signal_process,
model=self.model_id,
model=model_id,
pipeline_name=Pipelines.speech_dfsmn_aec_psm_16k) pipeline_name=Pipelines.speech_dfsmn_aec_psm_16k)
aec(input, output_path='output.wav')
output_path = os.path.abspath('output.wav')
aec(input, output_path=output_path)
print(f'Processed audio saved to {output_path}')

@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
def test_ans(self):
# Download audio files
download(NOISE_SPEECH_URL, NOISE_SPEECH_FILE)
model_id = 'damo/speech_frcrn_ans_cirm_16k'
ans = pipeline(
Tasks.speech_signal_process,
model=model_id,
pipeline_name=Pipelines.speech_frcrn_ans_cirm_16k)
output_path = os.path.abspath('output.wav')
ans(NOISE_SPEECH_FILE, output_path=output_path)
print(f'Processed audio saved to {output_path}')




if __name__ == '__main__': if __name__ == '__main__':


+ 6
- 2
tests/pipelines/test_text_classification.py View File

@@ -87,12 +87,16 @@ class SequenceClassificationTest(unittest.TestCase):
result = text_classification(dataset) result = text_classification(dataset)
self.printDataset(result) self.printDataset(result)


@unittest.skipUnless(test_level() >= 2, 'skip test in current test level')
@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
def test_run_with_modelscope_dataset(self): def test_run_with_modelscope_dataset(self):
text_classification = pipeline(task=Tasks.text_classification) text_classification = pipeline(task=Tasks.text_classification)
# loaded from modelscope dataset # loaded from modelscope dataset
dataset = MsDataset.load( dataset = MsDataset.load(
'squad', split='train', target='context', hub=Hubs.modelscope)
'squad',
namespace='damotest',
split='train',
target='context',
hub=Hubs.modelscope)
result = text_classification(dataset) result = text_classification(dataset)
self.printDataset(result) self.printDataset(result)




+ 60
- 0
tests/pipelines/test_visual_question_answering.py View File

@@ -0,0 +1,60 @@
# Copyright (c) Alibaba, Inc. and its affiliates.

import unittest

from modelscope.hub.snapshot_download import snapshot_download
from modelscope.models import Model
from modelscope.models.multi_modal import MPlugForVisualQuestionAnswering
from modelscope.pipelines import VisualQuestionAnsweringPipeline, pipeline
from modelscope.preprocessors import MPlugVisualQuestionAnsweringPreprocessor
from modelscope.utils.constant import Tasks
from modelscope.utils.test_utils import test_level


class VisualQuestionAnsweringTest(unittest.TestCase):
model_id = 'damo/mplug_visual-question-answering_coco_large_en'
input_vqa = {
'image': 'data/test/images/image_mplug_vqa.jpg',
'question': 'What is the woman doing?',
}

@unittest.skipUnless(test_level() >= 2, 'skip test in current test level')
def test_run(self):
cache_path = snapshot_download(self.model_id)
preprocessor = MPlugVisualQuestionAnsweringPreprocessor(cache_path)
model = MPlugForVisualQuestionAnswering(cache_path)
pipeline1 = VisualQuestionAnsweringPipeline(
model, preprocessor=preprocessor)
pipeline2 = pipeline(
Tasks.visual_question_answering,
model=model,
preprocessor=preprocessor)
print(f"question: {self.input_vqa['question']}")
print(f"pipeline1: {pipeline1(self.input_vqa)['answer']}")
print(f"pipeline2: {pipeline2(self.input_vqa)['answer']}")

@unittest.skipUnless(test_level() >= 2, 'skip test in current test level')
def test_run_with_model_from_modelhub(self):
model = Model.from_pretrained(self.model_id)
preprocessor = MPlugVisualQuestionAnsweringPreprocessor(
model.model_dir)
pipeline_vqa = pipeline(
task=Tasks.visual_question_answering,
model=model,
preprocessor=preprocessor)
print(pipeline_vqa(self.input_vqa))

@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
def test_run_with_model_name(self):
pipeline_vqa = pipeline(
Tasks.visual_question_answering, model=self.model_id)
print(pipeline_vqa(self.input_vqa))

@unittest.skipUnless(test_level() >= 2, 'skip test in current test level')
def test_run_with_default_model(self):
pipeline_vqa = pipeline(task=Tasks.visual_question_answering)
print(pipeline_vqa(self.input_vqa))


if __name__ == '__main__':
unittest.main()

+ 64
- 0
tests/pipelines/test_zero_shot_classification.py View File

@@ -0,0 +1,64 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
import unittest

from modelscope.hub.snapshot_download import snapshot_download
from modelscope.models import Model
from modelscope.models.nlp import SbertForZeroShotClassification
from modelscope.pipelines import ZeroShotClassificationPipeline, pipeline
from modelscope.preprocessors import ZeroShotClassificationPreprocessor
from modelscope.utils.constant import Tasks
from modelscope.utils.test_utils import test_level


class ZeroShotClassificationTest(unittest.TestCase):
model_id = 'damo/nlp_structbert_zero-shot-classification_chinese-base'
sentence = '全新突破 解放军运20版空中加油机曝光'
labels = ['文化', '体育', '娱乐', '财经', '家居', '汽车', '教育', '科技', '军事']
template = '这篇文章的标题是{}'

@unittest.skipUnless(test_level() >= 2, 'skip test in current test level')
def test_run_with_direct_file_download(self):
cache_path = snapshot_download(self.model_id)
tokenizer = ZeroShotClassificationPreprocessor(cache_path)
model = SbertForZeroShotClassification(cache_path, tokenizer=tokenizer)
pipeline1 = ZeroShotClassificationPipeline(
model, preprocessor=tokenizer)
pipeline2 = pipeline(
Tasks.zero_shot_classification,
model=model,
preprocessor=tokenizer)

print(
f'sentence: {self.sentence}\n'
f'pipeline1:{pipeline1(input=self.sentence,candidate_labels=self.labels)}'
)
print()
print(
f'sentence: {self.sentence}\n'
f'pipeline2: {pipeline2(self.sentence,candidate_labels=self.labels,hypothesis_template=self.template)}'
)

@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
def test_run_with_model_from_modelhub(self):
model = Model.from_pretrained(self.model_id)
tokenizer = ZeroShotClassificationPreprocessor(model.model_dir)
pipeline_ins = pipeline(
task=Tasks.zero_shot_classification,
model=model,
preprocessor=tokenizer)
print(pipeline_ins(input=self.sentence, candidate_labels=self.labels))

@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
def test_run_with_model_name(self):
pipeline_ins = pipeline(
task=Tasks.zero_shot_classification, model=self.model_id)
print(pipeline_ins(input=self.sentence, candidate_labels=self.labels))

@unittest.skipUnless(test_level() >= 2, 'skip test in current test level')
def test_run_with_default_model(self):
pipeline_ins = pipeline(task=Tasks.zero_shot_classification)
print(pipeline_ins(input=self.sentence, candidate_labels=self.labels))


if __name__ == '__main__':
unittest.main()

Loading…
Cancel
Save