Browse Source

merge nlp

master
ly119399 3 years ago
parent
commit
80461600f7
100 changed files with 6228 additions and 492 deletions
  1. +3
    -0
      data/test/images/image_captioning.png
  2. +3
    -0
      data/test/images/ocr_detection.jpg
  3. +3
    -0
      data/test/videos/action_recognition_test_video.mp4
  4. +0
    -0
      modelscope/hub/__init__.py
  5. +265
    -0
      modelscope/hub/api.py
  6. +8
    -0
      modelscope/hub/constants.py
  7. +30
    -0
      modelscope/hub/errors.py
  8. +254
    -0
      modelscope/hub/file_download.py
  9. +82
    -0
      modelscope/hub/git.py
  10. +173
    -0
      modelscope/hub/repository.py
  11. +125
    -0
      modelscope/hub/snapshot_download.py
  12. +0
    -0
      modelscope/hub/utils/__init__.py
  13. +40
    -0
      modelscope/hub/utils/_subprocess.py
  14. +294
    -0
      modelscope/hub/utils/caching.py
  15. +39
    -0
      modelscope/hub/utils/utils.py
  16. +104
    -0
      modelscope/metainfo.py
  17. +7
    -3
      modelscope/models/__init__.py
  18. +4
    -2
      modelscope/models/audio/tts/am/sambert_hifi_16k.py
  19. +3
    -3
      modelscope/models/audio/tts/frontend/generic_text_to_speech_frontend.py
  20. +2
    -1
      modelscope/models/audio/tts/vocoder/hifigan16k.py
  21. +1
    -1
      modelscope/models/audio/tts/vocoder/models/models.py
  22. +16
    -11
      modelscope/models/base.py
  23. +0
    -0
      modelscope/models/cv/action_recognition/__init__.py
  24. +91
    -0
      modelscope/models/cv/action_recognition/models.py
  25. +472
    -0
      modelscope/models/cv/action_recognition/tada_convnext.py
  26. +1
    -0
      modelscope/models/multi_model/__init__.py
  27. +80
    -0
      modelscope/models/multi_model/image_captioning_model.py
  28. +3
    -0
      modelscope/models/nlp/__init__.py
  29. +2
    -2
      modelscope/models/nlp/bert_for_sequence_classification.py
  30. +63
    -0
      modelscope/models/nlp/masked_language_model.py
  31. +9
    -3
      modelscope/models/nlp/palm_for_text_generation.py
  32. +23
    -0
      modelscope/models/nlp/sbert_for_nli.py
  33. +8
    -71
      modelscope/models/nlp/sbert_for_sentence_similarity.py
  34. +24
    -0
      modelscope/models/nlp/sbert_for_sentiment_classification.py
  35. +71
    -0
      modelscope/models/nlp/sbert_for_sequence_classification.py
  36. +18
    -11
      modelscope/models/nlp/sbert_for_token_classification.py
  37. +5
    -7
      modelscope/models/nlp/space/dialog_intent_prediction_model.py
  38. +5
    -6
      modelscope/models/nlp/space/dialog_modeling_model.py
  39. +1
    -1
      modelscope/models/nlp/space/dialog_state_tracking.py
  40. +1
    -2
      modelscope/models/nlp/space/model/gen_unified_transformer.py
  41. +1
    -1
      modelscope/models/nlp/space/model/intent_unified_transformer.py
  42. +3
    -4
      modelscope/models/nlp/space/model/unified_transformer.py
  43. +2
    -3
      modelscope/models/nlp/space/modules/transformer_block.py
  44. +1
    -4
      modelscope/pipelines/__init__.py
  45. +3
    -1
      modelscope/pipelines/audio/linear_aec_pipeline.py
  46. +2
    -1
      modelscope/pipelines/audio/text_to_speech_pipeline.py
  47. +8
    -14
      modelscope/pipelines/base.py
  48. +49
    -23
      modelscope/pipelines/builder.py
  49. +2
    -0
      modelscope/pipelines/cv/__init__.py
  50. +65
    -0
      modelscope/pipelines/cv/action_recognition_pipeline.py
  51. +2
    -1
      modelscope/pipelines/cv/image_cartoon_pipeline.py
  52. +2
    -1
      modelscope/pipelines/cv/image_matting_pipeline.py
  53. +168
    -0
      modelscope/pipelines/cv/ocr_detection_pipeline.py
  54. +0
    -0
      modelscope/pipelines/cv/ocr_utils/__init__.py
  55. +158
    -0
      modelscope/pipelines/cv/ocr_utils/model_resnet_mutex_v4_linewithchar.py
  56. +1098
    -0
      modelscope/pipelines/cv/ocr_utils/ops.py
  57. +432
    -0
      modelscope/pipelines/cv/ocr_utils/resnet18_v1.py
  58. +231
    -0
      modelscope/pipelines/cv/ocr_utils/resnet_utils.py
  59. +108
    -0
      modelscope/pipelines/cv/ocr_utils/utils.py
  60. +1
    -1
      modelscope/pipelines/multi_modal/__init__.py
  61. +35
    -0
      modelscope/pipelines/multi_modal/image_captioning_pipeline.py
  62. +6
    -3
      modelscope/pipelines/nlp/__init__.py
  63. +9
    -7
      modelscope/pipelines/nlp/dialog_intent_prediction_pipeline.py
  64. +4
    -3
      modelscope/pipelines/nlp/dialog_modeling_pipeline.py
  65. +45
    -0
      modelscope/pipelines/nlp/dialog_state_tracking.py
  66. +107
    -0
      modelscope/pipelines/nlp/fill_mask_pipeline.py
  67. +72
    -0
      modelscope/pipelines/nlp/nli_pipeline.py
  68. +19
    -9
      modelscope/pipelines/nlp/sentence_similarity_pipeline.py
  69. +77
    -0
      modelscope/pipelines/nlp/sentiment_classification_pipeline.py
  70. +2
    -1
      modelscope/pipelines/nlp/sequence_classification_pipeline.py
  71. +0
    -46
      modelscope/pipelines/nlp/space/dialog_state_tracking.py
  72. +18
    -7
      modelscope/pipelines/nlp/text_generation_pipeline.py
  73. +19
    -9
      modelscope/pipelines/nlp/word_segmentation_pipeline.py
  74. +33
    -0
      modelscope/pipelines/outputs.py
  75. +44
    -12
      modelscope/pipelines/util.py
  76. +4
    -4
      modelscope/preprocessors/__init__.py
  77. +2
    -1
      modelscope/preprocessors/image.py
  78. +41
    -50
      modelscope/preprocessors/multi_modal.py
  79. +223
    -21
      modelscope/preprocessors/nlp.py
  80. +4
    -5
      modelscope/preprocessors/space/dialog_intent_prediction_preprocessor.py
  81. +6
    -8
      modelscope/preprocessors/space/dialog_modeling_preprocessor.py
  82. +31
    -29
      modelscope/preprocessors/space/fields/dst_processors.py
  83. +4
    -4
      modelscope/preprocessors/space/fields/gen_field.py
  84. +4
    -4
      modelscope/preprocessors/space/fields/intent_field.py
  85. +3
    -4
      modelscope/preprocessors/text_to_speech.py
  86. +232
    -0
      modelscope/preprocessors/video.py
  87. +22
    -0
      modelscope/pydatasets/config.py
  88. +323
    -58
      modelscope/pydatasets/py_dataset.py
  89. +0
    -0
      modelscope/pydatasets/utils/__init__.py
  90. +66
    -0
      modelscope/pydatasets/utils/ms_api.py
  91. +1
    -1
      modelscope/trainers/nlp/space/trainers/gen_trainer.py
  92. +1
    -3
      modelscope/trainers/nlp/space/trainers/intent_trainer.py
  93. +5
    -2
      modelscope/utils/constant.py
  94. +61
    -8
      modelscope/utils/hub.py
  95. +1
    -1
      modelscope/utils/registry.py
  96. +15
    -0
      modelscope/utils/test_utils.py
  97. +11
    -11
      requirements/audio.txt
  98. +2
    -0
      requirements/cv.txt
  99. +3
    -2
      requirements/nlp.txt
  100. +4
    -1
      requirements/runtime.txt

+ 3
- 0
data/test/images/image_captioning.png View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:af83a94899a6d23339c3ecc5c4c58c57c835af57b531a2f4c50461184f820141
size 603621

+ 3
- 0
data/test/images/ocr_detection.jpg View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:5c8435db5583400be5d11a2c17910c96133b462c8a99ccaf0e19f4aac34e0a94
size 141149

+ 3
- 0
data/test/videos/action_recognition_test_video.mp4 View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:24dc4237b1197321ee8486bb983fa01fd47e2b4afdb3c2df24229e5f2bd20119
size 1475924

modelscope/pipelines/nlp/space/__init__.py → modelscope/hub/__init__.py View File


+ 265
- 0
modelscope/hub/api.py View File

@@ -0,0 +1,265 @@
import imp
import os
import pickle
import subprocess
from http.cookiejar import CookieJar
from os.path import expanduser
from typing import List, Optional, Tuple, Union

import requests

from modelscope.utils.logger import get_logger
from .constants import LOGGER_NAME
from .errors import NotExistError, is_ok, raise_on_error
from .utils.utils import get_endpoint, model_id_to_group_owner_name

logger = get_logger()


class HubApi:

def __init__(self, endpoint=None):
self.endpoint = endpoint if endpoint is not None else get_endpoint()

def login(
self,
user_name: str,
password: str,
) -> tuple():
"""
Login with username and password

Args:
username(`str`): user name on modelscope
password(`str`): password

Returns:
cookies: to authenticate yourself to ModelScope open-api
gitlab token: to access private repos

<Tip>
You only have to login once within 30 days.
</Tip>

TODO: handle cookies expire

"""
path = f'{self.endpoint}/api/v1/login'
r = requests.post(
path, json={
'username': user_name,
'password': password
})
r.raise_for_status()
d = r.json()
raise_on_error(d)

token = d['Data']['AccessToken']
cookies = r.cookies

# save token and cookie
ModelScopeConfig.save_token(token)
ModelScopeConfig.save_cookies(cookies)
ModelScopeConfig.write_to_git_credential(user_name, password)

return d['Data']['AccessToken'], cookies

def create_model(self, model_id: str, chinese_name: str, visibility: int,
license: str) -> str:
"""
Create model repo at ModelScopeHub

Args:
model_id:(`str`): The model id
chinese_name(`str`): chinese name of the model
visibility(`int`): visibility of the model(1-private, 3-internal, 5-public)
license(`str`): license of the model, candidates can be found at: TBA

Returns:
name of the model created

<Tip>
model_id = {owner}/{name}
</Tip>
"""
cookies = ModelScopeConfig.get_cookies()
if cookies is None:
raise ValueError('Token does not exist, please login first.')

path = f'{self.endpoint}/api/v1/models'
owner_or_group, name = model_id_to_group_owner_name(model_id)
r = requests.post(
path,
json={
'Path': owner_or_group,
'Name': name,
'ChineseName': chinese_name,
'Visibility': visibility,
'License': license
},
cookies=cookies)
r.raise_for_status()
raise_on_error(r.json())
d = r.json()
return d['Data']['Name']

def delete_model(self, model_id):
"""_summary_

Args:
model_id (str): The model id.
<Tip>
model_id = {owner}/{name}
</Tip>
"""
cookies = ModelScopeConfig.get_cookies()
path = f'{self.endpoint}/api/v1/models/{model_id}'

r = requests.delete(path, cookies=cookies)
r.raise_for_status()
raise_on_error(r.json())

def get_model_url(self, model_id):
return f'{self.endpoint}/api/v1/models/{model_id}.git'

def get_model(
self,
model_id: str,
revision: str = 'master',
) -> str:
"""
Get model information at modelscope_hub

Args:
model_id(`str`): The model id.
revision(`str`): revision of model
Returns:
The model details information.
Raises:
NotExistError: If the model is not exist, will throw NotExistError
<Tip>
model_id = {owner}/{name}
</Tip>
"""
cookies = ModelScopeConfig.get_cookies()
owner_or_group, name = model_id_to_group_owner_name(model_id)
path = f'{self.endpoint}/api/v1/models/{owner_or_group}/{name}?{revision}'

r = requests.get(path, cookies=cookies)
if r.status_code == 200:
if is_ok(r.json()):
return r.json()['Data']
else:
raise NotExistError(r.json()['Message'])
else:
r.raise_for_status()

def get_model_branches_and_tags(
self,
model_id: str,
) -> Tuple[List[str], List[str]]:
cookies = ModelScopeConfig.get_cookies()

path = f'{self.endpoint}/api/v1/models/{model_id}/revisions'
r = requests.get(path, cookies=cookies)
r.raise_for_status()
d = r.json()
raise_on_error(d)
info = d['Data']
branches = [x['Revision'] for x in info['RevisionMap']['Branches']
] if info['RevisionMap']['Branches'] else []
tags = [x['Revision'] for x in info['RevisionMap']['Tags']
] if info['RevisionMap']['Tags'] else []
return branches, tags

def get_model_files(
self,
model_id: str,
revision: Optional[str] = 'master',
root: Optional[str] = None,
recursive: Optional[str] = False,
use_cookies: Union[bool, CookieJar] = False) -> List[dict]:

cookies = None
if isinstance(use_cookies, CookieJar):
cookies = use_cookies
elif use_cookies:
cookies = ModelScopeConfig.get_cookies()
if cookies is None:
raise ValueError('Token does not exist, please login first.')

path = f'{self.endpoint}/api/v1/models/{model_id}/repo/files?Revision={revision}&Recursive={recursive}'
if root is not None:
path = path + f'&Root={root}'

r = requests.get(path, cookies=cookies)

r.raise_for_status()
d = r.json()
raise_on_error(d)

files = []
for file in d['Data']['Files']:
if file['Name'] == '.gitignore' or file['Name'] == '.gitattributes':
continue

files.append(file)
return files


class ModelScopeConfig:
path_credential = expanduser('~/.modelscope/credentials')
os.makedirs(path_credential, exist_ok=True)

@classmethod
def save_cookies(cls, cookies: CookieJar):
with open(os.path.join(cls.path_credential, 'cookies'), 'wb+') as f:
pickle.dump(cookies, f)

@classmethod
def get_cookies(cls):
try:
with open(os.path.join(cls.path_credential, 'cookies'), 'rb') as f:
return pickle.load(f)
except FileNotFoundError:
logger.warn("Auth token does not exist, you'll get authentication \
error when downloading private model files. Please login first"
)

@classmethod
def save_token(cls, token: str):
with open(os.path.join(cls.path_credential, 'token'), 'w+') as f:
f.write(token)

@classmethod
def get_token(cls) -> Optional[str]:
"""
Get token or None if not existent.

Returns:
`str` or `None`: The token, `None` if it doesn't exist.

"""
token = None
try:
with open(os.path.join(cls.path_credential, 'token'), 'r') as f:
token = f.read()
except FileNotFoundError:
pass
return token

@staticmethod
def write_to_git_credential(username: str, password: str):
with subprocess.Popen(
'git credential-store store'.split(),
stdin=subprocess.PIPE,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
) as process:
input_username = f'username={username.lower()}'
input_password = f'password={password}'

process.stdin.write(
f'url={get_endpoint()}\n{input_username}\n{input_password}\n\n'
.encode('utf-8'))
process.stdin.flush()

+ 8
- 0
modelscope/hub/constants.py View File

@@ -0,0 +1,8 @@
MODELSCOPE_URL_SCHEME = 'http://'
DEFAULT_MODELSCOPE_DOMAIN = '101.201.119.157:32330'
DEFAULT_MODELSCOPE_GITLAB_DOMAIN = '101.201.119.157:31102'

DEFAULT_MODELSCOPE_GROUP = 'damo'
MODEL_ID_SEPARATOR = '/'

LOGGER_NAME = 'ModelScopeHub'

+ 30
- 0
modelscope/hub/errors.py View File

@@ -0,0 +1,30 @@
class NotExistError(Exception):
pass


class RequestError(Exception):
pass


def is_ok(rsp):
""" Check the request is ok

Args:
rsp (_type_): The request response body
Failed: {'Code': 10010101004, 'Message': 'get model info failed, err: unauthorized permission',
'RequestId': '', 'Success': False}
Success: {'Code': 200, 'Data': {}, 'Message': 'success', 'RequestId': '', 'Success': True}
"""
return rsp['Code'] == 200 and rsp['Success']


def raise_on_error(rsp):
"""If response error, raise exception

Args:
rsp (_type_): The server response
"""
if rsp['Code'] == 200 and rsp['Success']:
return True
else:
raise RequestError(rsp['Message'])

+ 254
- 0
modelscope/hub/file_download.py View File

@@ -0,0 +1,254 @@
import copy
import fnmatch
import logging
import os
import sys
import tempfile
import time
from functools import partial
from hashlib import sha256
from pathlib import Path
from typing import BinaryIO, Dict, Optional, Union
from uuid import uuid4

import json
import requests
from filelock import FileLock
from requests.exceptions import HTTPError
from tqdm import tqdm

from modelscope import __version__
from modelscope.utils.logger import get_logger
from .api import HubApi, ModelScopeConfig
from .constants import (DEFAULT_MODELSCOPE_GROUP, LOGGER_NAME,
MODEL_ID_SEPARATOR)
from .errors import NotExistError, RequestError, raise_on_error
from .utils.caching import ModelFileSystemCache
from .utils.utils import (get_cache_dir, get_endpoint,
model_id_to_group_owner_name)

SESSION_ID = uuid4().hex
logger = get_logger()


def model_file_download(
model_id: str,
file_path: str,
revision: Optional[str] = 'master',
cache_dir: Optional[str] = None,
user_agent: Union[Dict, str, None] = None,
local_files_only: Optional[bool] = False,
) -> Optional[str]: # pragma: no cover
"""
Download from a given URL and cache it if it's not already present in the
local cache.

Given a URL, this function looks for the corresponding file in the local
cache. If it's not there, download it. Then return the path to the cached
file.

Args:
model_id (`str`):
The model to whom the file to be downloaded belongs.
file_path(`str`):
Path of the file to be downloaded, relative to the root of model repo
revision(`str`, *optional*):
revision of the model file to be downloaded.
Can be any of a branch, tag or commit hash, default to `master`
cache_dir (`str`, `Path`, *optional*):
Path to the folder where cached files are stored.
user_agent (`dict`, `str`, *optional*):
The user-agent info in the form of a dictionary or a string.
local_files_only (`bool`, *optional*, defaults to `False`):
If `True`, avoid downloading the file and return the path to the
local cached file if it exists.
if `False`, download the file anyway even it exists

Returns:
Local path (string) of file or if networking is off, last version of
file cached on disk.

<Tip>

Raises the following errors:

- [`EnvironmentError`](https://docs.python.org/3/library/exceptions.html#EnvironmentError)
if `use_auth_token=True` and the token cannot be found.
- [`OSError`](https://docs.python.org/3/library/exceptions.html#OSError)
if ETag cannot be determined.
- [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
if some parameter value is invalid

</Tip>
"""
if cache_dir is None:
cache_dir = get_cache_dir()
if isinstance(cache_dir, Path):
cache_dir = str(cache_dir)

group_or_owner, name = model_id_to_group_owner_name(model_id)

cache = ModelFileSystemCache(cache_dir, group_or_owner, name)

# if local_files_only is `True` and the file already exists in cached_path
# return the cached path
if local_files_only:
cached_file_path = cache.get_file_by_path(file_path)
if cached_file_path is not None:
logger.warning(
"File exists in local cache, but we're not sure it's up to date"
)
return cached_file_path
else:
raise ValueError(
'Cannot find the requested files in the cached path and outgoing'
' traffic has been disabled. To enable model look-ups and downloads'
" online, set 'local_files_only' to False.")

_api = HubApi()
headers = {'user-agent': http_user_agent(user_agent=user_agent, )}
branches, tags = _api.get_model_branches_and_tags(model_id)
file_to_download_info = None
is_commit_id = False
if revision in branches or revision in tags: # The revision is version or tag,
# we need to confirm the version is up to date
# we need to get the file list to check if the lateast version is cached, if so return, otherwise download
model_files = _api.get_model_files(
model_id=model_id,
revision=revision,
recursive=True,
)

for model_file in model_files:
if model_file['Type'] == 'tree':
continue

if model_file['Path'] == file_path:
model_file['Branch'] = revision
if cache.exists(model_file):
return cache.get_file_by_info(model_file)
else:
file_to_download_info = model_file

if file_to_download_info is None:
raise NotExistError('The file path: %s not exist in: %s' %
(file_path, model_id))
else: # the revision is commit id.
cached_file_path = cache.get_file_by_path_and_commit_id(
file_path, revision)
if cached_file_path is not None:
logger.info('The specified file is in cache, skip downloading!')
return cached_file_path # the file is in cache.
is_commit_id = True
# we need to download again
# TODO: skip using JWT for authorization, use cookie instead
cookies = ModelScopeConfig.get_cookies()
url_to_download = get_file_download_url(model_id, file_path, revision)
file_to_download_info = {
'Path': file_path,
'Revision':
revision if is_commit_id else file_to_download_info['Revision']
}
# Prevent parallel downloads of the same file with a lock.
lock_path = cache.get_root_location() + '.lock'

with FileLock(lock_path):
temp_file_name = next(tempfile._get_candidate_names())
http_get_file(
url_to_download,
cache_dir,
temp_file_name,
headers=headers,
cookies=None if cookies is None else cookies.get_dict())
return cache.put_file(file_to_download_info,
os.path.join(cache_dir, temp_file_name))


def http_user_agent(user_agent: Union[Dict, str, None] = None, ) -> str:
"""Formats a user-agent string with basic info about a request.

Args:
user_agent (`str`, `dict`, *optional*):
The user agent info in the form of a dictionary or a single string.

Returns:
The formatted user-agent string.
"""
ua = f'modelscope/{__version__}; python/{sys.version.split()[0]}; session_id/{SESSION_ID}'

if isinstance(user_agent, dict):
ua = '; '.join(f'{k}/{v}' for k, v in user_agent.items())
elif isinstance(user_agent, str):
ua = user_agent
return ua


def get_file_download_url(model_id: str, file_path: str, revision: str):
"""
Format file download url according to `model_id`, `revision` and `file_path`.
e.g., Given `model_id=john/bert`, `revision=master`, `file_path=README.md`,
the resulted download url is: https://maas.co/api/v1/models/john/bert/repo?Revision=master&FilePath=README.md
"""
download_url_template = '{endpoint}/api/v1/models/{model_id}/repo?Revision={revision}&FilePath={file_path}'
return download_url_template.format(
endpoint=get_endpoint(),
model_id=model_id,
revision=revision,
file_path=file_path,
)


def http_get_file(
url: str,
local_dir: str,
file_name: str,
cookies: Dict[str, str],
headers: Optional[Dict[str, str]] = None,
):
"""
Download remote file. Do not gobble up errors.
This method is only used by snapshot_download, since the behavior is quite different with single file download
TODO: consolidate with http_get_file() to avoild duplicate code

Args:
url(`str`):
actual download url of the file
local_dir(`str`):
local directory where the downloaded file stores
file_name(`str`):
name of the file stored in `local_dir`
cookies(`Dict[str, str]`):
cookies used to authentication the user, which is used for downloading private repos
headers(`Optional[Dict[str, str]] = None`):
http headers to carry necessary info when requesting the remote file

"""
temp_file_manager = partial(
tempfile.NamedTemporaryFile, mode='wb', dir=local_dir, delete=False)

with temp_file_manager() as temp_file:
logger.info('downloading %s to %s', url, temp_file.name)
headers = copy.deepcopy(headers)

r = requests.get(url, stream=True, headers=headers, cookies=cookies)
r.raise_for_status()

content_length = r.headers.get('Content-Length')
total = int(content_length) if content_length is not None else None

progress = tqdm(
unit='B',
unit_scale=True,
unit_divisor=1024,
total=total,
initial=0,
desc='Downloading',
)
for chunk in r.iter_content(chunk_size=1024):
if chunk: # filter out keep-alive new chunks
progress.update(len(chunk))
temp_file.write(chunk)
progress.close()

logger.info('storing %s in cache at %s', url, local_dir)
os.replace(temp_file.name, os.path.join(local_dir, file_name))

+ 82
- 0
modelscope/hub/git.py View File

@@ -0,0 +1,82 @@
from threading import local
from tkinter.messagebox import NO
from typing import Union

from modelscope.utils.logger import get_logger
from .constants import LOGGER_NAME
from .utils._subprocess import run_subprocess

logger = get_logger


def git_clone(
local_dir: str,
repo_url: str,
):
# TODO: use "git clone" or "git lfs clone" according to git version
# TODO: print stderr when subprocess fails
run_subprocess(
f'git clone {repo_url}'.split(),
local_dir,
True,
)


def git_checkout(
local_dir: str,
revsion: str,
):
run_subprocess(f'git checkout {revsion}'.split(), local_dir)


def git_add(local_dir: str, ):
run_subprocess(
'git add .'.split(),
local_dir,
True,
)


def git_commit(local_dir: str, commit_message: str):
run_subprocess(
'git commit -v -m'.split() + [commit_message],
local_dir,
True,
)


def git_push(local_dir: str, branch: str):
# check current branch
cur_branch = git_current_branch(local_dir)
if cur_branch != branch:
logger.error(
"You're trying to push to a different branch, please double check")
return

run_subprocess(
f'git push origin {branch}'.split(),
local_dir,
True,
)


def git_current_branch(local_dir: str) -> Union[str, None]:
"""
Get current branch name

Args:
local_dir(`str`): local model repo directory

Returns
branch name you're currently on
"""
try:
process = run_subprocess(
'git rev-parse --abbrev-ref HEAD'.split(),
local_dir,
True,
)

return str(process.stdout).strip()
except Exception as e:
raise e

+ 173
- 0
modelscope/hub/repository.py View File

@@ -0,0 +1,173 @@
import os
import subprocess
from pathlib import Path
from typing import Optional, Union

from modelscope.utils.logger import get_logger
from .api import ModelScopeConfig
from .constants import MODELSCOPE_URL_SCHEME
from .git import git_add, git_checkout, git_clone, git_commit, git_push
from .utils._subprocess import run_subprocess
from .utils.utils import get_gitlab_domain

logger = get_logger()


class Repository:

def __init__(
self,
local_dir: str,
clone_from: Optional[str] = None,
auth_token: Optional[str] = None,
private: Optional[bool] = False,
revision: Optional[str] = 'master',
):
"""
Instantiate a Repository object by cloning the remote ModelScopeHub repo
Args:
local_dir(`str`):
local directory to store the model files
clone_from(`Optional[str] = None`):
model id in ModelScope-hub from which git clone
You should ignore this parameter when `local_dir` is already a git repo
auth_token(`Optional[str]`):
token obtained when calling `HubApi.login()`. Usually you can safely ignore the parameter
as the token is already saved when you login the first time
private(`Optional[bool]`):
whether the model is private, default to False
revision(`Optional[str]`):
revision of the model you want to clone from. Can be any of a branch, tag or commit hash
"""
logger.info('Instantiating Repository object...')

# Create local directory if not exist
os.makedirs(local_dir, exist_ok=True)
self.local_dir = os.path.join(os.getcwd(), local_dir)

self.private = private

# Check git and git-lfs installation
self.check_git_versions()

# Retrieve auth token
if not private and isinstance(auth_token, str):
logger.warning(
'cloning a public repo with a token, which will be ignored')
self.token = None
else:
if isinstance(auth_token, str):
self.token = auth_token
else:
self.token = ModelScopeConfig.get_token()

if self.token is None:
raise EnvironmentError(
'Token does not exist, the clone will fail for private repo.'
'Please login first.')

# git clone
if clone_from is not None:
self.model_id = clone_from
logger.info('cloning model repo to %s ...', self.local_dir)
git_clone(self.local_dir, self.get_repo_url())
else:
if is_git_repo(self.local_dir):
logger.debug('[Repository] is a valid git repo')
else:
raise ValueError(
'If not specifying `clone_from`, you need to pass Repository a'
' valid git clone.')

# git checkout
if isinstance(revision, str) and revision != 'master':
git_checkout(revision)

def push_to_hub(self,
commit_message: str,
revision: Optional[str] = 'master'):
"""
Push changes changes to hub

Args:
commit_message(`str`):
commit message describing the changes, it's mandatory
revision(`Optional[str]`):
remote branch you want to push to, default to `master`

<Tip>
The function complains when local and remote branch are different, please be careful
</Tip>

"""
git_add(self.local_dir)
git_commit(self.local_dir, commit_message)

logger.info('Pushing changes to repo...')
git_push(self.local_dir, revision)

# TODO: if git push fails, how to retry?

def check_git_versions(self):
"""
Checks that `git` and `git-lfs` can be run.

Raises:
`EnvironmentError`: if `git` or `git-lfs` are not installed.
"""
try:
git_version = run_subprocess('git --version'.split(),
self.local_dir).stdout.strip()
except FileNotFoundError:
raise EnvironmentError(
'Looks like you do not have git installed, please install.')

try:
lfs_version = run_subprocess('git-lfs --version'.split(),
self.local_dir).stdout.strip()
except FileNotFoundError:
raise EnvironmentError(
'Looks like you do not have git-lfs installed, please install.'
' You can install from https://git-lfs.github.com/.'
' Then run `git lfs install` (you only have to do this once).')
logger.info(git_version + '\n' + lfs_version)

def get_repo_url(self) -> str:
"""
Get repo url to clone, according whether the repo is private or not
"""
url = None

if self.private:
url = f'{MODELSCOPE_URL_SCHEME}oauth2:{self.token}@{get_gitlab_domain()}/{self.model_id}'
else:
url = f'{MODELSCOPE_URL_SCHEME}{get_gitlab_domain()}/{self.model_id}'

if not url:
raise ValueError(
'Empty repo url, please check clone_from parameter')

logger.debug('url to clone: %s', str(url))

return url


def is_git_repo(folder: Union[str, Path]) -> bool:
"""
Check if the folder is the root or part of a git repository

Args:
folder (`str`):
The folder in which to run the command.

Returns:
`bool`: `True` if the repository is part of a repository, `False`
otherwise.
"""
folder_exists = os.path.exists(os.path.join(folder, '.git'))
git_branch = subprocess.run(
'git branch'.split(),
cwd=folder,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
return folder_exists and git_branch.returncode == 0

+ 125
- 0
modelscope/hub/snapshot_download.py View File

@@ -0,0 +1,125 @@
import os
import tempfile
from glob import glob
from pathlib import Path
from typing import Dict, Optional, Union

from modelscope.utils.logger import get_logger
from .api import HubApi, ModelScopeConfig
from .constants import DEFAULT_MODELSCOPE_GROUP, MODEL_ID_SEPARATOR
from .errors import NotExistError, RequestError, raise_on_error
from .file_download import (get_file_download_url, http_get_file,
http_user_agent)
from .utils.caching import ModelFileSystemCache
from .utils.utils import get_cache_dir, model_id_to_group_owner_name

logger = get_logger()


def snapshot_download(model_id: str,
revision: Optional[str] = 'master',
cache_dir: Union[str, Path, None] = None,
user_agent: Optional[Union[Dict, str]] = None,
local_files_only: Optional[bool] = False,
private: Optional[bool] = False) -> str:
"""Download all files of a repo.
Downloads a whole snapshot of a repo's files at the specified revision. This
is useful when you want all files from a repo, because you don't know which
ones you will need a priori. All files are nested inside a folder in order
to keep their actual filename relative to that folder.

An alternative would be to just clone a repo but this would require that the
user always has git and git-lfs installed, and properly configured.
Args:
model_id (`str`):
A user or an organization name and a repo name separated by a `/`.
revision (`str`, *optional*):
An optional Git revision id which can be a branch name, a tag, or a
commit hash. NOTE: currently only branch and tag name is supported
cache_dir (`str`, `Path`, *optional*):
Path to the folder where cached files are stored.
user_agent (`str`, `dict`, *optional*):
The user-agent info in the form of a dictionary or a string.
local_files_only (`bool`, *optional*, defaults to `False`):
If `True`, avoid downloading the file and return the path to the
local cached file if it exists.
Returns:
Local folder path (string) of repo snapshot

<Tip>
Raises the following errors:
- [`EnvironmentError`](https://docs.python.org/3/library/exceptions.html#EnvironmentError)
if `use_auth_token=True` and the token cannot be found.
- [`OSError`](https://docs.python.org/3/library/exceptions.html#OSError) if
ETag cannot be determined.
- [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
if some parameter value is invalid
</Tip>
"""

if cache_dir is None:
cache_dir = get_cache_dir()
if isinstance(cache_dir, Path):
cache_dir = str(cache_dir)

group_or_owner, name = model_id_to_group_owner_name(model_id)

cache = ModelFileSystemCache(cache_dir, group_or_owner, name)
if local_files_only:
if len(cache.cached_files) == 0:
raise ValueError(
'Cannot find the requested files in the cached path and outgoing'
' traffic has been disabled. To enable model look-ups and downloads'
" online, set 'local_files_only' to False.")
logger.warn('We can not confirm the cached file is for revision: %s'
% revision)
return cache.get_root_location(
) # we can not confirm the cached file is for snapshot 'revision'
else:
# make headers
headers = {'user-agent': http_user_agent(user_agent=user_agent, )}
_api = HubApi()
# get file list from model repo
branches, tags = _api.get_model_branches_and_tags(model_id)
if revision not in branches and revision not in tags:
raise NotExistError('The specified branch or tag : %s not exist!'
% revision)

model_files = _api.get_model_files(
model_id=model_id,
revision=revision,
recursive=True,
use_cookies=private)

cookies = None
if private:
cookies = ModelScopeConfig.get_cookies()

for model_file in model_files:
if model_file['Type'] == 'tree':
continue
# check model_file is exist in cache, if exist, skip download, otherwise download
if cache.exists(model_file):
logger.info(
'The specified file is in cache, skip downloading!')
continue

# get download url
url = get_file_download_url(
model_id=model_id,
file_path=model_file['Path'],
revision=revision)

# First download to /tmp
http_get_file(
url=url,
local_dir=tempfile.gettempdir(),
file_name=model_file['Name'],
headers=headers,
cookies=None if cookies is None else cookies.get_dict())
# put file to cache
cache.put_file(
model_file,
os.path.join(tempfile.gettempdir(), model_file['Name']))

return os.path.join(cache.get_root_location())

tests/pipelines/nlp/__init__.py → modelscope/hub/utils/__init__.py View File


+ 40
- 0
modelscope/hub/utils/_subprocess.py View File

@@ -0,0 +1,40 @@
import subprocess
from typing import List


def run_subprocess(command: List[str],
folder: str,
check=True,
**kwargs) -> subprocess.CompletedProcess:
"""
Method to run subprocesses. Calling this will capture the `stderr` and `stdout`,
please call `subprocess.run` manually in case you would like for them not to
be captured.

Args:
command (`List[str]`):
The command to execute as a list of strings.
folder (`str`):
The folder in which to run the command.
check (`bool`, *optional*, defaults to `True`):
Setting `check` to `True` will raise a `subprocess.CalledProcessError`
when the subprocess has a non-zero exit code.
kwargs (`Dict[str]`):
Keyword arguments to be passed to the `subprocess.run` underlying command.

Returns:
`subprocess.CompletedProcess`: The completed process.
"""
if isinstance(command, str):
raise ValueError(
'`run_subprocess` should be called with a list of strings.')

return subprocess.run(
command,
stderr=subprocess.PIPE,
stdout=subprocess.PIPE,
check=check,
encoding='utf-8',
cwd=folder,
**kwargs,
)

+ 294
- 0
modelscope/hub/utils/caching.py View File

@@ -0,0 +1,294 @@
import hashlib
import logging
import os
import pickle
import tempfile
import time
from shutil import move, rmtree

from modelscope.utils.logger import get_logger

logger = get_logger()


class FileSystemCache(object):
KEY_FILE_NAME = '.msc'
"""Local file cache.
"""

def __init__(
self,
cache_root_location: str,
**kwargs,
):
"""
Parameters
----------
cache_location: str
The root location to store files.
"""
os.makedirs(cache_root_location, exist_ok=True)
self.cache_root_location = cache_root_location
self.load_cache()

def get_root_location(self):
return self.cache_root_location

def load_cache(self):
"""Read set of stored blocks from file
Args:
owner(`str`): individual or group username at modelscope, can be empty for official models
name(`str`): name of the model
Returns:
The model details information.
Raises:
NotExistError: If the model is not exist, will throw NotExistError
TODO: Error based error code.
<Tip>
model_id = {owner}/{name}
</Tip>
"""
self.cached_files = []
cache_keys_file_path = os.path.join(self.cache_root_location,
FileSystemCache.KEY_FILE_NAME)
if os.path.exists(cache_keys_file_path):
with open(cache_keys_file_path, 'rb') as f:
self.cached_files = pickle.load(f)

def save_cached_files(self):
"""Save cache metadata."""
# save new meta to tmp and move to KEY_FILE_NAME
cache_keys_file_path = os.path.join(self.cache_root_location,
FileSystemCache.KEY_FILE_NAME)
# TODO: Sync file write
fd, fn = tempfile.mkstemp()
with open(fd, 'wb') as f:
pickle.dump(self.cached_files, f)
move(fn, cache_keys_file_path)

def get_file(self, key):
"""Check the key is in the cache, if exist, return the file, otherwise return None.
Args:
key(`str`): The cache key.
Returns:
If file exist, return the cached file location, otherwise None.
Raises:
None
<Tip>
model_id = {owner}/{name}
</Tip>
"""
pass

def put_file(self, key, location):
"""Put file to the cache,
Args:
key(`str`): The cache key
location(`str`): Location of the file, we will move the file to cache.
Returns:
The cached file path of the file.
Raises:
None
<Tip>
model_id = {owner}/{name}
</Tip>
"""
pass

def remove_key(self, key):
"""Remove cache key in index, The file is removed manually

Args:
key (dict): The cache key.
"""
self.cached_files.remove(key)
self.save_cached_files()

def exists(self, key):
for cache_file in self.cached_files:
if cache_file == key:
return True

return False

def clear_cache(self):
"""Remove all files and metadat from the cache

In the case of multiple cache locations, this clears only the last one,
which is assumed to be the read/write one.
"""
rmtree(self.cache_root_location)
self.load_cache()

def hash_name(self, key):
return hashlib.sha256(key.encode()).hexdigest()


class ModelFileSystemCache(FileSystemCache):
"""Local cache file layout
cache_root/owner/model_name/|individual cached files
|.mk: file, The cache index file
Save only one version for each file.
"""

def __init__(self, cache_root, owner, name):
"""Put file to the cache
Args:
cache_root(`str`): The modelscope local cache root(default: ~/.modelscope/cache/models/)
owner(`str`): The model owner.
name('str'): The name of the model
branch('str'): The branch of model
tag('str'): The tag of model
Returns:
Raises:
None
<Tip>
model_id = {owner}/{name}
</Tip>
"""
super().__init__(os.path.join(cache_root, owner, name))

def get_file_by_path(self, file_path):
"""Retrieve the cache if there is file match the path.
Args:
file_path (str): The file path in the model.
Returns:
path: the full path of the file.
"""
for cached_file in self.cached_files:
if file_path == cached_file['Path']:
cached_file_path = os.path.join(self.cache_root_location,
cached_file['Path'])
if os.path.exists(cached_file_path):
return cached_file_path
else:
self.remove_key(cached_file)

return None

def get_file_by_path_and_commit_id(self, file_path, commit_id):
"""Retrieve the cache if there is file match the path.
Args:
file_path (str): The file path in the model.
commit_id (str): The commit id of the file
Returns:
path: the full path of the file.
"""
for cached_file in self.cached_files:
if file_path == cached_file['Path'] and \
(cached_file['Revision'].startswith(commit_id) or commit_id.startswith(cached_file['Revision'])):
cached_file_path = os.path.join(self.cache_root_location,
cached_file['Path'])
if os.path.exists(cached_file_path):
return cached_file_path
else:
self.remove_key(cached_file)

return None

def get_file_by_info(self, model_file_info):
"""Check if exist cache file.

Args:
model_file_info (ModelFileInfo): The file information of the file.

Returns:
_type_: _description_
"""
cache_key = self.__get_cache_key(model_file_info)
for cached_file in self.cached_files:
if cached_file == cache_key:
orig_path = os.path.join(self.cache_root_location,
cached_file['Path'])
if os.path.exists(orig_path):
return orig_path
else:
self.remove_key(cached_file)

return None

def __get_cache_key(self, model_file_info):
cache_key = {
'Path': model_file_info['Path'],
'Revision': model_file_info['Revision'], # commit id
}
return cache_key

def exists(self, model_file_info):
"""Check the file is cached or not.

Args:
model_file_info (CachedFileInfo): The cached file info

Returns:
bool: If exists return True otherwise False
"""
key = self.__get_cache_key(model_file_info)
is_exists = False
for cached_key in self.cached_files:
if cached_key['Path'] == key['Path'] and (
cached_key['Revision'].startswith(key['Revision'])
or key['Revision'].startswith(cached_key['Revision'])):
is_exists = True
file_path = os.path.join(self.cache_root_location,
model_file_info['Path'])
if is_exists:
if os.path.exists(file_path):
return True
else:
self.remove_key(
model_file_info) # sameone may manual delete the file
return False

def remove_if_exists(self, model_file_info):
"""We in cache, remove it.

Args:
model_file_info (ModelFileInfo): The model file information from server.
"""
for cached_file in self.cached_files:
if cached_file['Path'] == model_file_info['Path']:
self.remove_key(cached_file)
file_path = os.path.join(self.cache_root_location,
cached_file['Path'])
if os.path.exists(file_path):
os.remove(file_path)

def put_file(self, model_file_info, model_file_location):
"""Put model on model_file_location to cache, the model first download to /tmp, and move to cache.

Args:
model_file_info (str): The file description returned by get_model_files
sample:
{
"CommitMessage": "add model\n",
"CommittedDate": 1654857567,
"CommitterName": "mulin.lyh",
"IsLFS": false,
"Mode": "100644",
"Name": "resnet18.pth",
"Path": "resnet18.pth",
"Revision": "09b68012b27de0048ba74003690a890af7aff192",
"Size": 46827520,
"Type": "blob"
}
model_file_location (str): The location of the temporary file.
Raises:
NotImplementedError: _description_

Returns:
str: The location of the cached file.
"""
self.remove_if_exists(model_file_info) # backup old revision
cache_key = self.__get_cache_key(model_file_info)
cache_full_path = os.path.join(
self.cache_root_location,
cache_key['Path']) # Branch and Tag do not have same name.
cache_file_dir = os.path.dirname(cache_full_path)
if not os.path.exists(cache_file_dir):
os.makedirs(cache_file_dir, exist_ok=True)
# We can't make operation transaction
move(model_file_location, cache_full_path)
self.cached_files.append(cache_key)
self.save_cached_files()
return cache_full_path

+ 39
- 0
modelscope/hub/utils/utils.py View File

@@ -0,0 +1,39 @@
import os

from modelscope.hub.constants import (DEFAULT_MODELSCOPE_DOMAIN,
DEFAULT_MODELSCOPE_GITLAB_DOMAIN,
DEFAULT_MODELSCOPE_GROUP,
MODEL_ID_SEPARATOR,
MODELSCOPE_URL_SCHEME)


def model_id_to_group_owner_name(model_id):
if MODEL_ID_SEPARATOR in model_id:
group_or_owner = model_id.split(MODEL_ID_SEPARATOR)[0]
name = model_id.split(MODEL_ID_SEPARATOR)[1]
else:
group_or_owner = DEFAULT_MODELSCOPE_GROUP
name = model_id
return group_or_owner, name


def get_cache_dir():
"""
cache dir precedence:
function parameter > enviroment > ~/.cache/modelscope/hub
"""
default_cache_dir = os.path.expanduser(
os.path.join('~/.cache', 'modelscope'))
return os.getenv('MODELSCOPE_CACHE', os.path.join(default_cache_dir,
'hub'))


def get_endpoint():
modelscope_domain = os.getenv('MODELSCOPE_DOMAIN',
DEFAULT_MODELSCOPE_DOMAIN)
return MODELSCOPE_URL_SCHEME + modelscope_domain


def get_gitlab_domain():
return os.getenv('MODELSCOPE_GITLAB_DOMAIN',
DEFAULT_MODELSCOPE_GITLAB_DOMAIN)

+ 104
- 0
modelscope/metainfo.py View File

@@ -0,0 +1,104 @@
# Copyright (c) Alibaba, Inc. and its affiliates.


class Models(object):
""" Names for different models.

Holds the standard model name to use for identifying different model.
This should be used to register models.

Model name should only contain model info but not task info.
"""
# vision models

# nlp models
bert = 'bert'
palm = 'palm-v2'
structbert = 'structbert'
veco = 'veco'

# audio models
sambert_hifi_16k = 'sambert-hifi-16k'
generic_tts_frontend = 'generic-tts-frontend'
hifigan16k = 'hifigan16k'

# multi-modal models
ofa = 'ofa'


class Pipelines(object):
""" Names for different pipelines.

Holds the standard pipline name to use for identifying different pipeline.
This should be used to register pipelines.

For pipeline which support different models and implements the common function, we
should use task name for this pipeline.
For pipeline which suuport only one model, we should use ${Model}-${Task} as its name.
"""
# vision tasks
image_matting = 'unet-image-matting'
person_image_cartoon = 'unet-person-image-cartoon'
ocr_detection = 'resnet18-ocr-detection'
action_recognition = 'TAdaConv_action-recognition'

# nlp tasks
sentence_similarity = 'sentence-similarity'
word_segmentation = 'word-segmentation'
text_generation = 'text-generation'
sentiment_analysis = 'sentiment-analysis'
sentiment_classification = 'sentiment-classification'
fill_mask = 'fill-mask'
nli = 'nli'
dialog_intent_prediction = 'dialog-intent-prediction'
dialog_modeling = 'dialog-modeling'
dialog_state_tracking = 'dialog_state_tracking'

# audio tasks
sambert_hifigan_16k_tts = 'sambert-hifigan-16k-tts'
speech_dfsmn_aec_psm_16k = 'speech-dfsmn-aec-psm-16k'

# multi-modal tasks
image_caption = 'image-caption'


class Trainers(object):
""" Names for different trainer.

Holds the standard trainer name to use for identifying different trainer.
This should be used to register trainers.

For a general Trainer, you can use easynlp-trainer/ofa-trainer/sofa-trainer.
For a model specific Trainer, you can use ${ModelName}-${Task}-trainer.
"""

default = 'Trainer'


class Preprocessors(object):
""" Names for different preprocessor.

Holds the standard preprocessor name to use for identifying different preprocessor.
This should be used to register preprocessors.

For a general preprocessor, just use the function name as preprocessor name such as
resize-image, random-crop
For a model-specific preprocessor, use ${modelname}-${fuction}
"""

# cv preprocessor
load_image = 'load-image'

# nlp preprocessor
bert_seq_cls_tokenizer = 'bert-seq-cls-tokenizer'
palm_text_gen_tokenizer = 'palm-text-gen-tokenizer'
token_cls_tokenizer = 'token-cls-tokenizer'
nli_tokenizer = 'nli-tokenizer'
sen_cls_tokenizer = 'sen-cls-tokenizer'

# audio preprocessor
linear_aec_fbank = 'linear-aec-fbank'
text_to_tacotron_symbols = 'text-to-tacotron-symbols'

# multi-modal
ofa_image_caption = 'ofa-image-caption'

+ 7
- 3
modelscope/models/__init__.py View File

@@ -1,7 +1,11 @@
# Copyright (c) Alibaba, Inc. and its affiliates.

from .audio.tts.am import SambertNetHifi16k
from .audio.tts.vocoder import Hifigan16k
# from .audio.tts.am import SambertNetHifi16k
# from .audio.tts.vocoder import Hifigan16k
from .base import Model
from .builder import MODELS, build_model
from .nlp import BertForSequenceClassification, SbertForSentenceSimilarity
# from .multi_model import OfaForImageCaptioning
from .nlp import (BertForSequenceClassification, SbertForNLI,
SbertForSentenceSimilarity, SbertForSentimentClassification,
SbertForTokenClassification, StructBertForMaskedLM,
VecoForMaskedLM)

+ 4
- 2
modelscope/models/audio/tts/am/sambert_hifi_16k.py View File

@@ -6,6 +6,7 @@ import numpy as np
import tensorflow as tf
from sklearn.preprocessing import MultiLabelBinarizer

from modelscope.metainfo import Models
from modelscope.models.base import Model
from modelscope.models.builder import MODELS
from modelscope.utils.constant import ModelFile, Tasks
@@ -17,7 +18,7 @@ __all__ = ['SambertNetHifi16k']


def multi_label_symbol_to_sequence(my_classes, my_symbol):
one_hot = MultiLabelBinarizer(my_classes)
one_hot = MultiLabelBinarizer(classes=my_classes)
tokens = my_symbol.strip().split(' ')
sequences = []
for token in tokens:
@@ -26,7 +27,8 @@ def multi_label_symbol_to_sequence(my_classes, my_symbol):
return one_hot.fit_transform(sequences)


@MODELS.register_module(Tasks.text_to_speech, module_name=r'sambert_hifi_16k')
@MODELS.register_module(
Tasks.text_to_speech, module_name=Models.sambert_hifi_16k)
class SambertNetHifi16k(Model):

def __init__(self,


+ 3
- 3
modelscope/models/audio/tts/frontend/generic_text_to_speech_frontend.py View File

@@ -2,8 +2,7 @@ import os
import zipfile
from typing import Any, Dict, List

import ttsfrd

from modelscope.metainfo import Models
from modelscope.models.base import Model
from modelscope.models.builder import MODELS
from modelscope.utils.audio.tts_exceptions import (
@@ -15,11 +14,12 @@ __all__ = ['GenericTtsFrontend']


@MODELS.register_module(
Tasks.text_to_speech, module_name=r'generic_tts_frontend')
Tasks.text_to_speech, module_name=Models.generic_tts_frontend)
class GenericTtsFrontend(Model):

def __init__(self, model_dir='.', lang_type='pinyin', *args, **kwargs):
super().__init__(model_dir, *args, **kwargs)
import ttsfrd
frontend = ttsfrd.TtsFrontendEngine()
zip_file = os.path.join(model_dir, 'resource.zip')
self._res_path = os.path.join(model_dir, 'resource')


+ 2
- 1
modelscope/models/audio/tts/vocoder/hifigan16k.py View File

@@ -10,6 +10,7 @@ import numpy as np
import torch
from scipy.io.wavfile import write

from modelscope.metainfo import Models
from modelscope.models.base import Model
from modelscope.models.builder import MODELS
from modelscope.utils.audio.tts_exceptions import \
@@ -36,7 +37,7 @@ class AttrDict(dict):
self.__dict__ = self


@MODELS.register_module(Tasks.text_to_speech, module_name=r'hifigan16k')
@MODELS.register_module(Tasks.text_to_speech, module_name=Models.hifigan16k)
class Hifigan16k(Model):

def __init__(self, model_dir, *args, **kwargs):


+ 1
- 1
modelscope/models/audio/tts/vocoder/models/models.py View File

@@ -3,7 +3,6 @@ from distutils.version import LooseVersion
import torch
import torch.nn as nn
import torch.nn.functional as F
from pytorch_wavelets import DWT1DForward
from torch.nn import AvgPool1d, Conv1d, Conv2d, ConvTranspose1d
from torch.nn.utils import remove_weight_norm, spectral_norm, weight_norm

@@ -357,6 +356,7 @@ class MultiScaleDiscriminator(torch.nn.Module):
DiscriminatorS(),
DiscriminatorS(),
])
from pytorch_wavelets import DWT1DForward
self.meanpools = nn.ModuleList(
[DWT1DForward(wave='db3', J=1),
DWT1DForward(wave='db3', J=1)])


+ 16
- 11
modelscope/models/base.py View File

@@ -4,12 +4,13 @@ import os.path as osp
from abc import ABC, abstractmethod
from typing import Dict, Union

from maas_hub.snapshot_download import snapshot_download

from modelscope.hub.snapshot_download import snapshot_download
from modelscope.models.builder import build_model
from modelscope.utils.config import Config
from modelscope.utils.constant import ModelFile
from modelscope.utils.hub import get_model_cache_dir
from modelscope.utils.logger import get_logger

logger = get_logger()

Tensor = Union['torch.Tensor', 'tf.Tensor']

@@ -47,21 +48,25 @@ class Model(ABC):
if osp.exists(model_name_or_path):
local_model_dir = model_name_or_path
else:
cache_path = get_model_cache_dir(model_name_or_path)
local_model_dir = cache_path if osp.exists(
cache_path) else snapshot_download(model_name_or_path)
# else:
# raise ValueError(
# 'Remote model repo {model_name_or_path} does not exists')

local_model_dir = snapshot_download(model_name_or_path)
logger.info(f'initialize model from {local_model_dir}')
cfg = Config.from_file(
osp.join(local_model_dir, ModelFile.CONFIGURATION))
task_name = cfg.task
model_cfg = cfg.model
assert hasattr(
cfg, 'pipeline'), 'pipeline config is missing from config file.'
pipeline_cfg = cfg.pipeline
# TODO @wenmeng.zwm may should manually initialize model after model building
if hasattr(model_cfg, 'model_type') and not hasattr(model_cfg, 'type'):
model_cfg.type = model_cfg.model_type

model_cfg.model_dir = local_model_dir

for k, v in kwargs.items():
model_cfg.k = v
return build_model(model_cfg, task_name)
model = build_model(model_cfg, task_name)

# dynamically add pipeline info to model for pipeline inference
model.pipeline = pipeline_cfg
return model

+ 0
- 0
modelscope/models/cv/action_recognition/__init__.py View File


+ 91
- 0
modelscope/models/cv/action_recognition/models.py View File

@@ -0,0 +1,91 @@
import torch
import torch.nn as nn

from .tada_convnext import TadaConvNeXt


class BaseVideoModel(nn.Module):
"""
Standard video model.
The model is divided into the backbone and the head, where the backbone
extracts features and the head performs classification.

The backbones can be defined in model/base/backbone.py or anywhere else
as long as the backbone is registered by the BACKBONE_REGISTRY.
The heads can be defined in model/module_zoo/heads/ or anywhere else
as long as the head is registered by the HEAD_REGISTRY.

The registries automatically finds the registered modules and construct
the base video model.
"""

def __init__(self, cfg):
"""
Args:
cfg (Config): global config object.
"""
super(BaseVideoModel, self).__init__()
# the backbone is created according to meta-architectures
# defined in models/base/backbone.py
self.backbone = TadaConvNeXt(cfg)

# the head is created according to the heads
# defined in models/module_zoo/heads
self.head = BaseHead(cfg)

def forward(self, x):
x = self.backbone(x)
x = self.head(x)
return x


class BaseHead(nn.Module):
"""
Constructs base head.
"""

def __init__(
self,
cfg,
):
"""
Args:
cfg (Config): global config object.
"""
super(BaseHead, self).__init__()
self.cfg = cfg
dim = cfg.VIDEO.BACKBONE.NUM_OUT_FEATURES
num_classes = cfg.VIDEO.HEAD.NUM_CLASSES
dropout_rate = cfg.VIDEO.HEAD.DROPOUT_RATE
activation_func = cfg.VIDEO.HEAD.ACTIVATION
self._construct_head(dim, num_classes, dropout_rate, activation_func)

def _construct_head(self, dim, num_classes, dropout_rate, activation_func):
self.global_avg_pool = nn.AdaptiveAvgPool3d(1)

if dropout_rate > 0.0:
self.dropout = nn.Dropout(dropout_rate)

self.out = nn.Linear(dim, num_classes, bias=True)

if activation_func == 'softmax':
self.activation = nn.Softmax(dim=-1)
elif activation_func == 'sigmoid':
self.activation = nn.Sigmoid()
else:
raise NotImplementedError('{} is not supported as an activation'
'function.'.format(activation_func))

def forward(self, x):
if len(x.shape) == 5:
x = self.global_avg_pool(x)
# (N, C, T, H, W) -> (N, T, H, W, C).
x = x.permute((0, 2, 3, 4, 1))
if hasattr(self, 'dropout'):
out = self.dropout(x)
else:
out = x
out = self.out(out)
out = self.activation(out)
out = out.view(out.shape[0], -1)
return out, x.view(x.shape[0], -1)

+ 472
- 0
modelscope/models/cv/action_recognition/tada_convnext.py View File

@@ -0,0 +1,472 @@
import math

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.nn.modules.utils import _pair, _triple


def drop_path(x, drop_prob: float = 0., training: bool = False):
"""
From https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/layers/drop.py.
Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).
This is the same as the DropConnect impl I created for EfficientNet, etc networks, however,
the original name is misleading as 'Drop Connect' is a different form of dropout in a separate paper...
See discussion: https://github.com/tensorflow/tpu/issues/494#issuecomment-532968956 ... I've opted for
changing the layer and argument names to 'drop path' rather than mix DropConnect as a layer name and use
'survival rate' as the argument.
"""
if drop_prob == 0. or not training:
return x
keep_prob = 1 - drop_prob
shape = (x.shape[0], ) + (1, ) * (
x.ndim - 1) # work with diff dim tensors, not just 2D ConvNets
random_tensor = keep_prob + torch.rand(
shape, dtype=x.dtype, device=x.device)
random_tensor.floor_() # binarize
output = x.div(keep_prob) * random_tensor
return output


class DropPath(nn.Module):
"""
From https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/layers/drop.py.
Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).
"""

def __init__(self, drop_prob=None):
super(DropPath, self).__init__()
self.drop_prob = drop_prob

def forward(self, x):
return drop_path(x, self.drop_prob, self.training)


class TadaConvNeXt(nn.Module):
r""" ConvNeXt
A PyTorch impl of : `A ConvNet for the 2020s` -
https://arxiv.org/pdf/2201.03545.pdf

Args:
in_chans (int): Number of input image channels. Default: 3
num_classes (int): Number of classes for classification head. Default: 1000
depths (tuple(int)): Number of blocks at each stage. Default: [3, 3, 9, 3]
dims (int): Feature dimension at each stage. Default: [96, 192, 384, 768]
drop_path_rate (float): Stochastic depth rate. Default: 0.
layer_scale_init_value (float): Init value for Layer Scale. Default: 1e-6.
head_init_scale (float): Init scaling value for classifier weights and biases. Default: 1.
"""

def __init__(
self, cfg
# in_chans=3, num_classes=1000,
# depths=[3, 3, 9, 3], dims=[96, 192, 384, 768], drop_path_rate=0.,
# layer_scale_init_value=1e-6, head_init_scale=1.,
):
super().__init__()
in_chans = cfg.VIDEO.BACKBONE.NUM_INPUT_CHANNELS
dims = cfg.VIDEO.BACKBONE.NUM_FILTERS
drop_path_rate = cfg.VIDEO.BACKBONE.DROP_PATH
depths = cfg.VIDEO.BACKBONE.DEPTH
layer_scale_init_value = cfg.VIDEO.BACKBONE.LARGE_SCALE_INIT_VALUE
stem_t_kernel_size = cfg.VIDEO.BACKBONE.STEM.T_KERNEL_SIZE if hasattr(
cfg.VIDEO.BACKBONE.STEM, 'T_KERNEL_SIZE') else 2
t_stride = cfg.VIDEO.BACKBONE.STEM.T_STRIDE if hasattr(
cfg.VIDEO.BACKBONE.STEM, 'T_STRIDE') else 2

self.downsample_layers = nn.ModuleList(
) # stem and 3 intermediate downsampling conv layers
stem = nn.Sequential(
nn.Conv3d(
in_chans,
dims[0],
kernel_size=(stem_t_kernel_size, 4, 4),
stride=(t_stride, 4, 4),
padding=((stem_t_kernel_size - 1) // 2, 0, 0)),
LayerNorm(dims[0], eps=1e-6, data_format='channels_first'))
self.downsample_layers.append(stem)
for i in range(3):
downsample_layer = nn.Sequential(
LayerNorm(dims[i], eps=1e-6, data_format='channels_first'),
nn.Conv3d(
dims[i],
dims[i + 1],
kernel_size=(1, 2, 2),
stride=(1, 2, 2)),
)
self.downsample_layers.append(downsample_layer)

self.stages = nn.ModuleList(
) # 4 feature resolution stages, each consisting of multiple residual blocks
dp_rates = [
x.item() for x in torch.linspace(0, drop_path_rate, sum(depths))
]
cur = 0
for i in range(4):
stage = nn.Sequential(*[
TAdaConvNeXtBlock(
cfg,
dim=dims[i],
drop_path=dp_rates[cur + j],
layer_scale_init_value=layer_scale_init_value)
for j in range(depths[i])
])
self.stages.append(stage)
cur += depths[i]

self.norm = nn.LayerNorm(dims[-1], eps=1e-6) # final norm layer

def forward_features(self, x):
for i in range(4):
x = self.downsample_layers[i](x)
x = self.stages[i](x)
return self.norm(x.mean(
[-3, -2, -1])) # global average pooling, (N, C, H, W) -> (N, C)

def forward(self, x):
if isinstance(x, dict):
x = x['video']
x = self.forward_features(x)
return x

def get_num_layers(self):
return 12, 0


class ConvNeXtBlock(nn.Module):
r""" ConvNeXt Block. There are two equivalent implementations:
(1) DwConv -> LayerNorm (channels_first) -> 1x1 Conv -> GELU -> 1x1 Conv; all in (N, C, H, W)
(2) DwConv -> Permute to (N, H, W, C); LayerNorm (channels_last) -> Linear -> GELU -> Linear; Permute back
We use (2) as we find it slightly faster in PyTorch

Args:
dim (int): Number of input channels.
drop_path (float): Stochastic depth rate. Default: 0.0
layer_scale_init_value (float): Init value for Layer Scale. Default: 1e-6.
"""

def __init__(self, cfg, dim, drop_path=0., layer_scale_init_value=1e-6):
super().__init__()
self.dwconv = nn.Conv3d(
dim, dim, kernel_size=(1, 7, 7), padding=(0, 3, 3),
groups=dim) # depthwise conv
self.norm = LayerNorm(dim, eps=1e-6)
self.pwconv1 = nn.Linear(
dim,
4 * dim) # pointwise/1x1 convs, implemented with linear layers
self.act = nn.GELU()
self.pwconv2 = nn.Linear(4 * dim, dim)
self.gamma = nn.Parameter(
layer_scale_init_value * torch.ones((dim)),
requires_grad=True) if layer_scale_init_value > 0 else None
self.drop_path = DropPath(
drop_path) if drop_path > 0. else nn.Identity()

def forward(self, x):
input = x
x = self.dwconv(x)
x = x.permute(0, 2, 3, 4, 1) # (N, C, T, H, W) -> (N, T, H, W, C)
x = self.norm(x)
x = self.pwconv1(x)
x = self.act(x)
x = self.pwconv2(x)
if self.gamma is not None:
x = self.gamma * x
x = x.permute(0, 4, 1, 2, 3) # (N, T, H, W, C) -> (N, C, T, H, W)

x = input + self.drop_path(x)
return x


class LayerNorm(nn.Module):
r""" LayerNorm that supports two data formats: channels_last (default) or channels_first.
The ordering of the dimensions in the inputs. channels_last corresponds to inputs with
shape (batch_size, height, width, channels) while channels_first corresponds to inputs
with shape (batch_size, channels, height, width).
"""

def __init__(self,
normalized_shape,
eps=1e-6,
data_format='channels_last'):
super().__init__()
self.weight = nn.Parameter(torch.ones(normalized_shape))
self.bias = nn.Parameter(torch.zeros(normalized_shape))
self.eps = eps
self.data_format = data_format
if self.data_format not in ['channels_last', 'channels_first']:
raise NotImplementedError
self.normalized_shape = (normalized_shape, )

def forward(self, x):
if self.data_format == 'channels_last':
return F.layer_norm(x, self.normalized_shape, self.weight,
self.bias, self.eps)
elif self.data_format == 'channels_first':
u = x.mean(1, keepdim=True)
s = (x - u).pow(2).mean(1, keepdim=True)
x = (x - u) / torch.sqrt(s + self.eps)
x = self.weight[:, None, None, None] * x + self.bias[:, None, None,
None]
return x


class TAdaConvNeXtBlock(nn.Module):
r""" ConvNeXt Block. There are two equivalent implementations:
(1) DwConv -> LayerNorm (channels_fi rst) -> 1x1 Conv -> GELU -> 1x1 Conv; all in (N, C, H, W)
(2) DwConv -> Permute to (N, H, W, C); LayerNorm (channels_last) -> Linear -> GELU -> Linear; Permute back
We use (2) as we find it slightly faster in PyTorch

Args:
dim (int): Number of input channels.
drop_path (float): Stochastic depth rate. Default: 0.0
layer_scale_init_value (float): Init value for Layer Scale. Default: 1e-6.
"""

def __init__(self, cfg, dim, drop_path=0., layer_scale_init_value=1e-6):
super().__init__()
layer_scale_init_value = float(layer_scale_init_value)
self.dwconv = TAdaConv2d(
dim,
dim,
kernel_size=(1, 7, 7),
padding=(0, 3, 3),
groups=dim,
cal_dim='cout')
route_func_type = cfg.VIDEO.BACKBONE.BRANCH.ROUTE_FUNC_TYPE
if route_func_type == 'normal':
self.dwconv_rf = RouteFuncMLP(
c_in=dim,
ratio=cfg.VIDEO.BACKBONE.BRANCH.ROUTE_FUNC_R,
kernels=cfg.VIDEO.BACKBONE.BRANCH.ROUTE_FUNC_K,
with_bias_cal=self.dwconv.bias is not None)
elif route_func_type == 'normal_lngelu':
self.dwconv_rf = RouteFuncMLPLnGelu(
c_in=dim,
ratio=cfg.VIDEO.BACKBONE.BRANCH.ROUTE_FUNC_R,
kernels=cfg.VIDEO.BACKBONE.BRANCH.ROUTE_FUNC_K,
with_bias_cal=self.dwconv.bias is not None)
else:
raise ValueError(
'Unknown route_func_type: {}'.format(route_func_type))
self.norm = LayerNorm(dim, eps=1e-6)
self.pwconv1 = nn.Linear(
dim,
4 * dim) # pointwise/1x1 convs, implemented with linear layers
self.act = nn.GELU()
self.pwconv2 = nn.Linear(4 * dim, dim)
self.gamma = nn.Parameter(
layer_scale_init_value * torch.ones((dim)),
requires_grad=True) if layer_scale_init_value > 0 else None
self.drop_path = DropPath(
drop_path) if drop_path > 0. else nn.Identity()

def forward(self, x):
input = x
x = self.dwconv(x, self.dwconv_rf(x))
x = x.permute(0, 2, 3, 4, 1) # (N, C, T, H, W) -> (N, T, H, W, C)
x = self.norm(x)
x = self.pwconv1(x)
x = self.act(x)
x = self.pwconv2(x)
if self.gamma is not None:
x = self.gamma * x
x = x.permute(0, 4, 1, 2, 3) # (N, T, H, W, C) -> (N, C, T, H, W)

x = input + self.drop_path(x)
return x


class RouteFuncMLPLnGelu(nn.Module):
"""
The routing function for generating the calibration weights.
"""

def __init__(self,
c_in,
ratio,
kernels,
with_bias_cal=False,
bn_eps=1e-5,
bn_mmt=0.1):
"""
Args:
c_in (int): number of input channels.
ratio (int): reduction ratio for the routing function.
kernels (list): temporal kernel size of the stacked 1D convolutions
"""
super(RouteFuncMLPLnGelu, self).__init__()
self.c_in = c_in
self.with_bias_cal = with_bias_cal
self.avgpool = nn.AdaptiveAvgPool3d((None, 1, 1))
self.globalpool = nn.AdaptiveAvgPool3d(1)
self.g = nn.Conv3d(
in_channels=c_in,
out_channels=c_in,
kernel_size=1,
padding=0,
)
self.a = nn.Conv3d(
in_channels=c_in,
out_channels=int(c_in // ratio),
kernel_size=[kernels[0], 1, 1],
padding=[kernels[0] // 2, 0, 0],
)
# self.bn = nn.BatchNorm3d(int(c_in//ratio), eps=bn_eps, momentum=bn_mmt)
self.ln = LayerNorm(
int(c_in // ratio), eps=1e-6, data_format='channels_first')
self.gelu = nn.GELU()
# self.relu = nn.ReLU(inplace=True)
self.b = nn.Conv3d(
in_channels=int(c_in // ratio),
out_channels=c_in,
kernel_size=[kernels[1], 1, 1],
padding=[kernels[1] // 2, 0, 0],
bias=False)
self.b.skip_init = True
self.b.weight.data.zero_() # to make sure the initial values
# for the output is 1.
if with_bias_cal:
self.b_bias = nn.Conv3d(
in_channels=int(c_in // ratio),
out_channels=c_in,
kernel_size=[kernels[1], 1, 1],
padding=[kernels[1] // 2, 0, 0],
bias=False)
self.b_bias.skip_init = True
self.b_bias.weight.data.zero_() # to make sure the initial values
# for the output is 1.

def forward(self, x):
g = self.globalpool(x)
x = self.avgpool(x)
x = self.a(x + self.g(g))
# x = self.bn(x)
# x = self.relu(x)
x = self.ln(x)
x = self.gelu(x)
if self.with_bias_cal:
return [self.b(x) + 1, self.b_bias(x) + 1]
else:
return self.b(x) + 1


class TAdaConv2d(nn.Module):
"""
Performs temporally adaptive 2D convolution.
Currently, only application on 5D tensors is supported, which makes TAdaConv2d
essentially a 3D convolution with temporal kernel size of 1.
"""

def __init__(self,
in_channels,
out_channels,
kernel_size,
stride=1,
padding=0,
dilation=1,
groups=1,
bias=True,
cal_dim='cin'):
super(TAdaConv2d, self).__init__()
"""
Args:
in_channels (int): number of input channels.
out_channels (int): number of output channels.
kernel_size (list): kernel size of TAdaConv2d.
stride (list): stride for the convolution in TAdaConv2d.
padding (list): padding for the convolution in TAdaConv2d.
dilation (list): dilation of the convolution in TAdaConv2d.
groups (int): number of groups for TAdaConv2d.
bias (bool): whether to use bias in TAdaConv2d.
calibration_mode (str): calibrated dimension in TAdaConv2d.
Supported input "cin", "cout".
"""

kernel_size = _triple(kernel_size)
stride = _triple(stride)
padding = _triple(padding)
dilation = _triple(dilation)

assert kernel_size[0] == 1
assert stride[0] == 1
assert padding[0] == 0
assert dilation[0] == 1
assert cal_dim in ['cin', 'cout']

self.in_channels = in_channels
self.out_channels = out_channels
self.kernel_size = kernel_size
self.stride = stride
self.padding = padding
self.dilation = dilation
self.groups = groups
self.cal_dim = cal_dim

# base weights (W_b)
self.weight = nn.Parameter(
torch.Tensor(1, 1, out_channels, in_channels // groups,
kernel_size[1], kernel_size[2]))
if bias:
self.bias = nn.Parameter(torch.Tensor(1, 1, out_channels))
else:
self.register_parameter('bias', None)

nn.init.kaiming_uniform_(self.weight, a=math.sqrt(5))
if self.bias is not None:
fan_in, _ = nn.init._calculate_fan_in_and_fan_out(self.weight)
bound = 1 / math.sqrt(fan_in)
nn.init.uniform_(self.bias, -bound, bound)

def forward(self, x, alpha):
"""
Args:
x (tensor): feature to perform convolution on.
alpha (tensor): calibration weight for the base weights.
W_t = alpha_t * W_b
"""
if isinstance(alpha, list):
w_alpha, b_alpha = alpha[0], alpha[1]
else:
w_alpha = alpha
b_alpha = None
_, _, c_out, c_in, kh, kw = self.weight.size()
b, c_in, t, h, w = x.size()
x = x.permute(0, 2, 1, 3, 4).reshape(1, -1, h, w)

if self.cal_dim == 'cin':
# w_alpha: B, C, T, H(1), W(1) -> B, T, C, H(1), W(1) -> B, T, 1, C, H(1), W(1)
# corresponding to calibrating the input channel
weight = (w_alpha.permute(0, 2, 1, 3, 4).unsqueeze(2)
* self.weight).reshape(-1, c_in // self.groups, kh, kw)
elif self.cal_dim == 'cout':
# w_alpha: B, C, T, H(1), W(1) -> B, T, C, H(1), W(1) -> B, T, C, 1, H(1), W(1)
# corresponding to calibrating the input channel
weight = (w_alpha.permute(0, 2, 1, 3, 4).unsqueeze(3)
* self.weight).reshape(-1, c_in // self.groups, kh, kw)

bias = None
if self.bias is not None:
if b_alpha is not None:
# b_alpha: B, C, T, H(1), W(1) -> B, T, C, H(1), W(1) -> B, T, C
bias = (b_alpha.permute(0, 2, 1, 3, 4).squeeze()
* self.bias).reshape(-1)
else:
bias = self.bias.repeat(b, t, 1).reshape(-1)
output = F.conv2d(
x,
weight=weight,
bias=bias,
stride=self.stride[1:],
padding=self.padding[1:],
dilation=self.dilation[1:],
groups=self.groups * b * t)

output = output.view(b, t, c_out, output.size(-2),
output.size(-1)).permute(0, 2, 1, 3, 4)

return output

def __repr__(self):
return f'TAdaConv2d({self.in_channels}, {self.out_channels}, kernel_size={self.kernel_size}, ' +\
f"stride={self.stride}, padding={self.padding}, bias={self.bias is not None}, cal_dim=\"{self.cal_dim}\")"

+ 1
- 0
modelscope/models/multi_model/__init__.py View File

@@ -0,0 +1 @@
from .image_captioning_model import OfaForImageCaptioning

+ 80
- 0
modelscope/models/multi_model/image_captioning_model.py View File

@@ -0,0 +1,80 @@
import os.path as osp
from typing import Any, Dict

from PIL import Image

from modelscope.metainfo import Models
from modelscope.utils.constant import ModelFile, Tasks
from ..base import Model
from ..builder import MODELS

__all__ = ['OfaForImageCaptioning']


@MODELS.register_module(Tasks.image_captioning, module_name=Models.ofa)
class OfaForImageCaptioning(Model):

def __init__(self, model_dir, *args, **kwargs):
super().__init__(model_dir=model_dir, *args, **kwargs)
ckpt_name = ModelFile.TORCH_MODEL_FILE
local_model = osp.join(model_dir, ckpt_name)
bpe_dir = model_dir
# turn on cuda if GPU is available
from fairseq import checkpoint_utils, tasks, utils
from ofa.tasks.mm_tasks import CaptionTask
from ofa.utils.eval_utils import eval_caption
self.eval_caption = eval_caption

tasks.register_task('caption', CaptionTask)
use_cuda = kwargs['use_cuda'] if 'use_cuda' in kwargs else False
use_fp16 = kwargs[
'use_fp16'] if 'use_fp16' in kwargs and use_cuda else False
overrides = {
'bpe_dir': bpe_dir,
'eval_cider': False,
'beam': 5,
'max_len_b': 16,
'no_repeat_ngram_size': 3,
'seed': 7
}
models, cfg, task = checkpoint_utils.load_model_ensemble_and_task(
utils.split_paths(local_model), arg_overrides=overrides)

# Move models to GPU
for model in models:
model.eval()
if use_cuda:
model.cuda()
if use_fp16:
model.half()
model.prepare_for_inference_(cfg)
self.models = models
# Initialize generator
self.generator = task.build_generator(models, cfg.generation)

# Initialize transform
from torchvision import transforms
mean = [0.5, 0.5, 0.5]
std = [0.5, 0.5, 0.5]

self.patch_resize_transform = transforms.Compose([
lambda image: image.convert('RGB'),
transforms.Resize(
(cfg.task.patch_image_size, cfg.task.patch_image_size),
interpolation=Image.BICUBIC),
transforms.ToTensor(),
transforms.Normalize(mean=mean, std=std),
])
self.task = task

def forward(self, input: Dict[str, Any]) -> Dict[str, Any]:
results, _ = self.eval_caption(self.task, self.generator, self.models,
input)
return {
'image_id': results[0]['image_id'],
'caption': results[0]['caption']
}

def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, Any]:
# What should we do here ?
return inputs

+ 3
- 0
modelscope/models/nlp/__init__.py View File

@@ -1,6 +1,9 @@
from .bert_for_sequence_classification import * # noqa F403
from .masked_language_model import * # noqa F403
from .palm_for_text_generation import * # noqa F403
from .sbert_for_nli import * # noqa F403
from .sbert_for_sentence_similarity import * # noqa F403
from .sbert_for_sentiment_classification import * # noqa F403
from .sbert_for_token_classification import * # noqa F403
from .space.dialog_intent_prediction_model import * # noqa F403
from .space.dialog_modeling_model import * # noqa F403


+ 2
- 2
modelscope/models/nlp/bert_for_sequence_classification.py View File

@@ -4,6 +4,7 @@ from typing import Any, Dict
import json
import numpy as np

from modelscope.metainfo import Models
from modelscope.utils.constant import Tasks
from ..base import Model
from ..builder import MODELS
@@ -11,8 +12,7 @@ from ..builder import MODELS
__all__ = ['BertForSequenceClassification']


@MODELS.register_module(
Tasks.text_classification, module_name=r'bert-sentiment-analysis')
@MODELS.register_module(Tasks.text_classification, module_name=Models.bert)
class BertForSequenceClassification(Model):

def __init__(self, model_dir: str, *args, **kwargs):


+ 63
- 0
modelscope/models/nlp/masked_language_model.py View File

@@ -0,0 +1,63 @@
from typing import Any, Dict, Optional, Union

import numpy as np

from ...metainfo import Models
from ...utils.constant import Tasks
from ..base import Model, Tensor
from ..builder import MODELS

__all__ = ['StructBertForMaskedLM', 'VecoForMaskedLM']


class MaskedLanguageModelBase(Model):

def __init__(self, model_dir: str, *args, **kwargs):
super().__init__(model_dir, *args, **kwargs)
self.model = self.build_model()

def build_model(self):
raise NotImplementedError()

def train(self):
return self.model.train()

def eval(self):
return self.model.eval()

@property
def config(self):
if hasattr(self.model, 'config'):
return self.model.config
return None

def forward(self, input: Dict[str, Tensor]) -> Dict[str, np.ndarray]:
"""return the result by the model

Args:
input (Dict[str, Any]): the preprocessed data

Returns:
Dict[str, np.ndarray]: results
"""
rst = self.model(
input_ids=input['input_ids'],
attention_mask=input['attention_mask'],
token_type_ids=input['token_type_ids'])
return {'logits': rst['logits'], 'input_ids': input['input_ids']}


@MODELS.register_module(Tasks.fill_mask, module_name=Models.structbert)
class StructBertForMaskedLM(MaskedLanguageModelBase):

def build_model(self):
from sofa import SbertForMaskedLM
return SbertForMaskedLM.from_pretrained(self.model_dir)


@MODELS.register_module(Tasks.fill_mask, module_name=Models.veco)
class VecoForMaskedLM(MaskedLanguageModelBase):

def build_model(self):
from sofa import VecoForMaskedLM
return VecoForMaskedLM.from_pretrained(self.model_dir)

+ 9
- 3
modelscope/models/nlp/palm_for_text_generation.py View File

@@ -1,13 +1,14 @@
from typing import Dict

from modelscope.utils.constant import Tasks
from ...metainfo import Models
from ...utils.constant import Tasks
from ..base import Model, Tensor
from ..builder import MODELS

__all__ = ['PalmForTextGeneration']


@MODELS.register_module(Tasks.text_generation, module_name=r'palm2.0')
@MODELS.register_module(Tasks.text_generation, module_name=Models.palm)
class PalmForTextGeneration(Model):

def __init__(self, model_dir: str, *args, **kwargs):
@@ -19,13 +20,18 @@ class PalmForTextGeneration(Model):
default loader to load model weights, by default None.
"""
super().__init__(model_dir, *args, **kwargs)
self.model_dir = model_dir

from sofa.models.palm_v2 import PalmForConditionalGeneration, Translator
model = PalmForConditionalGeneration.from_pretrained(model_dir)
self.tokenizer = model.tokenizer
self.generator = Translator(model)

def train(self):
return self.generator.train()

def eval(self):
return self.generator.eval()

def forward(self, input: Dict[str, Tensor]) -> Dict[str, Tensor]:
"""return the result by the model



+ 23
- 0
modelscope/models/nlp/sbert_for_nli.py View File

@@ -0,0 +1,23 @@
from ...metainfo import Models
from ...utils.constant import Tasks
from ..builder import MODELS
from .sbert_for_sequence_classification import \
SbertForSequenceClassificationBase

__all__ = ['SbertForNLI']


@MODELS.register_module(Tasks.nli, module_name=Models.structbert)
class SbertForNLI(SbertForSequenceClassificationBase):

def __init__(self, model_dir: str, *args, **kwargs):
"""initialize the text generation model from the `model_dir` path.

Args:
model_dir (str): the model path.
model_cls (Optional[Any], optional): model loader, if None, use the
default loader to load model weights, by default None.
"""
super().__init__(
model_dir, *args, model_args={'num_labels': 3}, **kwargs)
assert self.model.config.num_labels == 3

+ 8
- 71
modelscope/models/nlp/sbert_for_sentence_similarity.py View File

@@ -1,46 +1,15 @@
import os
from typing import Any, Dict

import json
import numpy as np
import torch
from sofa import SbertModel
from sofa.models.sbert.modeling_sbert import SbertPreTrainedModel
from torch import nn

from modelscope.metainfo import Models
from modelscope.utils.constant import Tasks
from ..base import Model, Tensor
from ..builder import MODELS
from .sbert_for_sequence_classification import \
SbertForSequenceClassificationBase

__all__ = ['SbertForSentenceSimilarity']


class SbertTextClassifier(SbertPreTrainedModel):

def __init__(self, config):
super().__init__(config)
self.num_labels = config.num_labels
self.config = config
self.encoder = SbertModel(config, add_pooling_layer=True)
self.dropout = nn.Dropout(config.hidden_dropout_prob)
self.classifier = nn.Linear(config.hidden_size, config.num_labels)

def forward(self, input_ids=None, token_type_ids=None):
outputs = self.encoder(
input_ids,
token_type_ids=token_type_ids,
return_dict=None,
)
pooled_output = outputs[1]
pooled_output = self.dropout(pooled_output)
logits = self.classifier(pooled_output)
return logits


@MODELS.register_module(
Tasks.sentence_similarity,
module_name=r'sbert-base-chinese-sentence-similarity')
class SbertForSentenceSimilarity(Model):
Tasks.sentence_similarity, module_name=Models.structbert)
class SbertForSentenceSimilarity(SbertForSequenceClassificationBase):

def __init__(self, model_dir: str, *args, **kwargs):
"""initialize the sentence similarity model from the `model_dir` path.
@@ -50,39 +19,7 @@ class SbertForSentenceSimilarity(Model):
model_cls (Optional[Any], optional): model loader, if None, use the
default loader to load model weights, by default None.
"""
super().__init__(model_dir, *args, **kwargs)
super().__init__(
model_dir, *args, model_args={'num_labels': 2}, **kwargs)
self.model_dir = model_dir

self.model = SbertTextClassifier.from_pretrained(
model_dir, num_labels=2)
self.model.eval()
self.label_path = os.path.join(self.model_dir, 'label_mapping.json')
with open(self.label_path) as f:
self.label_mapping = json.load(f)
self.id2label = {idx: name for name, idx in self.label_mapping.items()}

def forward(self, input: Dict[str, Any]) -> Dict[str, np.ndarray]:
"""return the result by the model

Args:
input (Dict[str, Any]): the preprocessed data

Returns:
Dict[str, np.ndarray]: results
Example:
{
'predictions': array([1]), # lable 0-negative 1-positive
'probabilities': array([[0.11491239, 0.8850876 ]], dtype=float32),
'logits': array([[-0.53860897, 1.5029076 ]], dtype=float32) # true value
}
"""
input_ids = torch.tensor(input['input_ids'], dtype=torch.long)
token_type_ids = torch.tensor(
input['token_type_ids'], dtype=torch.long)
with torch.no_grad():
logits = self.model(input_ids, token_type_ids)
probs = logits.softmax(-1).numpy()
pred = logits.argmax(-1).numpy()
logits = logits.numpy()
res = {'predictions': pred, 'probabilities': probs, 'logits': logits}
return res
assert self.model.config.num_labels == 2

+ 24
- 0
modelscope/models/nlp/sbert_for_sentiment_classification.py View File

@@ -0,0 +1,24 @@
from modelscope.metainfo import Models
from modelscope.utils.constant import Tasks
from ..builder import MODELS
from .sbert_for_sequence_classification import \
SbertForSequenceClassificationBase

__all__ = ['SbertForSentimentClassification']


@MODELS.register_module(
Tasks.sentiment_classification, module_name=Models.structbert)
class SbertForSentimentClassification(SbertForSequenceClassificationBase):

def __init__(self, model_dir: str, *args, **kwargs):
"""initialize the text generation model from the `model_dir` path.

Args:
model_dir (str): the model path.
model_cls (Optional[Any], optional): model loader, if None, use the
default loader to load model weights, by default None.
"""
super().__init__(
model_dir, *args, model_args={'num_labels': 2}, **kwargs)
assert self.model.config.num_labels == 2

+ 71
- 0
modelscope/models/nlp/sbert_for_sequence_classification.py View File

@@ -0,0 +1,71 @@
import os
from typing import Any, Dict

import json
import numpy as np
import torch
from sofa.models.sbert.modeling_sbert import SbertModel, SbertPreTrainedModel
from torch import nn

from ..base import Model


class SbertTextClassfier(SbertPreTrainedModel):

def __init__(self, config):
super().__init__(config)
self.num_labels = config.num_labels
self.config = config
self.encoder = SbertModel(config, add_pooling_layer=True)
self.dropout = nn.Dropout(config.hidden_dropout_prob)
self.classifier = nn.Linear(config.hidden_size, config.num_labels)

def forward(self, input_ids=None, token_type_ids=None):
outputs = self.encoder(
input_ids,
token_type_ids=token_type_ids,
return_dict=None,
)
pooled_output = outputs[1]
pooled_output = self.dropout(pooled_output)
logits = self.classifier(pooled_output)
return {'logits': logits}


class SbertForSequenceClassificationBase(Model):

def __init__(self, model_dir: str, model_args=None, *args, **kwargs):
super().__init__(model_dir, *args, **kwargs)
if model_args is None:
model_args = {}
self.model = SbertTextClassfier.from_pretrained(
model_dir, **model_args)
self.id2label = {}
self.label_path = os.path.join(self.model_dir, 'label_mapping.json')
if os.path.exists(self.label_path):
with open(self.label_path) as f:
self.label_mapping = json.load(f)
self.id2label = {
idx: name
for name, idx in self.label_mapping.items()
}

def train(self):
return self.model.train()

def eval(self):
return self.model.eval()

def forward(self, input: Dict[str, Any]) -> Dict[str, np.ndarray]:
input_ids = torch.tensor(input['input_ids'], dtype=torch.long)
token_type_ids = torch.tensor(
input['token_type_ids'], dtype=torch.long)
return self.model.forward(input_ids, token_type_ids)

def postprocess(self, input, **kwargs):
logits = input['logits']
probs = logits.softmax(-1).numpy()
pred = logits.argmax(-1).numpy()
logits = logits.numpy()
res = {'predictions': pred, 'probabilities': probs, 'logits': logits}
return res

+ 18
- 11
modelscope/models/nlp/sbert_for_token_classification.py View File

@@ -2,19 +2,17 @@ from typing import Any, Dict, Union

import numpy as np
import torch
from sofa import SbertConfig, SbertForTokenClassification

from modelscope.metainfo import Models
from modelscope.utils.constant import Tasks
from ..base import Model, Tensor
from ..builder import MODELS

__all__ = ['StructBertForTokenClassification']
__all__ = ['SbertForTokenClassification']


@MODELS.register_module(
Tasks.word_segmentation,
module_name=r'structbert-chinese-word-segmentation')
class StructBertForTokenClassification(Model):
@MODELS.register_module(Tasks.word_segmentation, module_name=Models.structbert)
class SbertForTokenClassification(Model):

def __init__(self, model_dir: str, *args, **kwargs):
"""initialize the word segmentation model from the `model_dir` path.
@@ -26,9 +24,16 @@ class StructBertForTokenClassification(Model):
"""
super().__init__(model_dir, *args, **kwargs)
self.model_dir = model_dir
self.model = SbertForTokenClassification.from_pretrained(
import sofa
self.model = sofa.SbertForTokenClassification.from_pretrained(
self.model_dir)
self.config = SbertConfig.from_pretrained(self.model_dir)
self.config = sofa.SbertConfig.from_pretrained(self.model_dir)

def train(self):
return self.model.train()

def eval(self):
return self.model.eval()

def forward(self, input: Dict[str,
Any]) -> Dict[str, Union[str, np.ndarray]]:
@@ -47,10 +52,12 @@ class StructBertForTokenClassification(Model):
}
"""
input_ids = torch.tensor(input['input_ids']).unsqueeze(0)
output = self.model(input_ids)
logits = output.logits
return {**self.model(input_ids), 'text': input['text']}

def postprocess(self, input: Dict[str, Tensor],
**kwargs) -> Dict[str, Tensor]:
logits = input['logits']
pred = torch.argmax(logits[0], dim=-1)
pred = pred.numpy()

rst = {'predictions': pred, 'logits': logits, 'text': input['text']}
return rst

+ 5
- 7
modelscope/models/nlp/space/dialog_intent_prediction_model.py View File

@@ -1,11 +1,10 @@
import os
from typing import Any, Dict

from modelscope.preprocessors.space.fields.intent_field import \
IntentBPETextField
from modelscope.trainers.nlp.space.trainers.intent_trainer import IntentTrainer
from modelscope.utils.config import Config
from modelscope.utils.constant import Tasks
from ....preprocessors.space.fields.intent_field import IntentBPETextField
from ....trainers.nlp.space.trainers.intent_trainer import IntentTrainer
from ....utils.config import Config
from ....utils.constant import Tasks
from ...base import Model, Tensor
from ...builder import MODELS
from .model.generator import Generator
@@ -14,8 +13,7 @@ from .model.model_base import ModelBase
__all__ = ['DialogIntentModel']


@MODELS.register_module(
Tasks.dialog_intent_prediction, module_name=r'space-intent')
@MODELS.register_module(Tasks.dialog_intent_prediction, module_name=r'space')
class DialogIntentModel(Model):

def __init__(self, model_dir: str, *args, **kwargs):


+ 5
- 6
modelscope/models/nlp/space/dialog_modeling_model.py View File

@@ -1,11 +1,10 @@
import os
from typing import Any, Dict, Optional

from modelscope.preprocessors.space.fields.gen_field import \
MultiWOZBPETextField
from modelscope.trainers.nlp.space.trainers.gen_trainer import MultiWOZTrainer
from modelscope.utils.config import Config
from modelscope.utils.constant import Tasks
from ....preprocessors.space.fields.gen_field import MultiWOZBPETextField
from ....trainers.nlp.space.trainers.gen_trainer import MultiWOZTrainer
from ....utils.config import Config
from ....utils.constant import Tasks
from ...base import Model, Tensor
from ...builder import MODELS
from .model.generator import Generator
@@ -14,7 +13,7 @@ from .model.model_base import ModelBase
__all__ = ['DialogModelingModel']


@MODELS.register_module(Tasks.dialog_modeling, module_name=r'space-modeling')
@MODELS.register_module(Tasks.dialog_modeling, module_name=r'space')
class DialogModelingModel(Model):

def __init__(self, model_dir: str, *args, **kwargs):


+ 1
- 1
modelscope/models/nlp/space/dialog_state_tracking.py View File

@@ -11,7 +11,7 @@ from .model.model_base import ModelBase
__all__ = ['DialogStateTrackingModel']


@MODELS.register_module(Tasks.dialog_state_tracking, module_name=r'space-dst')
@MODELS.register_module(Tasks.dialog_state_tracking, module_name=r'space')
class DialogStateTrackingModel(Model):

def __init__(self, model_dir: str, *args, **kwargs):


+ 1
- 2
modelscope/models/nlp/space/model/gen_unified_transformer.py View File

@@ -3,8 +3,7 @@ IntentUnifiedTransformer
"""
import torch

from modelscope.models.nlp.space.model.unified_transformer import \
UnifiedTransformer
from .unified_transformer import UnifiedTransformer


class GenUnifiedTransformer(UnifiedTransformer):


+ 1
- 1
modelscope/models/nlp/space/model/intent_unified_transformer.py View File

@@ -5,7 +5,7 @@ import torch
import torch.nn as nn
import torch.nn.functional as F

from modelscope.utils.nlp.space.criterions import compute_kl_loss
from .....utils.nlp.space.criterions import compute_kl_loss
from .unified_transformer import UnifiedTransformer




+ 3
- 4
modelscope/models/nlp/space/model/unified_transformer.py View File

@@ -7,10 +7,9 @@ import torch
import torch.nn as nn
import torch.nn.functional as F

from modelscope.models.nlp.space.model.model_base import ModelBase
from modelscope.models.nlp.space.modules.embedder import Embedder
from modelscope.models.nlp.space.modules.transformer_block import \
TransformerBlock
from ..modules.embedder import Embedder
from ..modules.transformer_block import TransformerBlock
from .model_base import ModelBase


class UnifiedTransformer(ModelBase):


+ 2
- 3
modelscope/models/nlp/space/modules/transformer_block.py View File

@@ -5,9 +5,8 @@ TransformerBlock class.
import torch
import torch.nn as nn

from modelscope.models.nlp.space.modules.feedforward import FeedForward
from modelscope.models.nlp.space.modules.multihead_attention import \
MultiheadAttention
from .feedforward import FeedForward
from .multihead_attention import MultiheadAttention


class TransformerBlock(nn.Module):


+ 1
- 4
modelscope/pipelines/__init__.py View File

@@ -1,7 +1,4 @@
from .audio import LinearAECPipeline
# from .audio import LinearAECPipeline
from .base import Pipeline
from .builder import pipeline
from .cv import * # noqa F403
from .multi_modal import * # noqa F403
from .nlp import * # noqa F403
from .nlp.space import * # noqa F403

+ 3
- 1
modelscope/pipelines/audio/linear_aec_pipeline.py View File

@@ -7,6 +7,7 @@ import scipy.io.wavfile as wav
import torch
import yaml

from modelscope.metainfo import Pipelines
from modelscope.preprocessors.audio import LinearAECAndFbank
from modelscope.utils.constant import ModelFile, Tasks
from ..base import Pipeline
@@ -39,7 +40,8 @@ def initialize_config(module_cfg):


@PIPELINES.register_module(
Tasks.speech_signal_process, module_name=r'speech_dfsmn_aec_psm_16k')
Tasks.speech_signal_process,
module_name=Pipelines.speech_dfsmn_aec_psm_16k)
class LinearAECPipeline(Pipeline):
r"""AEC Inference Pipeline only support 16000 sample rate.



+ 2
- 1
modelscope/pipelines/audio/text_to_speech_pipeline.py View File

@@ -3,6 +3,7 @@ from typing import Any, Dict, List

import numpy as np

from modelscope.metainfo import Pipelines
from modelscope.models import Model
from modelscope.models.audio.tts.am import SambertNetHifi16k
from modelscope.models.audio.tts.vocoder import Hifigan16k
@@ -15,7 +16,7 @@ __all__ = ['TextToSpeechSambertHifigan16kPipeline']


@PIPELINES.register_module(
Tasks.text_to_speech, module_name=r'tts-sambert-hifigan-16k')
Tasks.text_to_speech, module_name=Pipelines.sambert_hifigan_16k_tts)
class TextToSpeechSambertHifigan16kPipeline(Pipeline):

def __init__(self,


+ 8
- 14
modelscope/pipelines/base.py View File

@@ -4,19 +4,17 @@ import os.path as osp
from abc import ABC, abstractmethod
from typing import Any, Dict, Generator, List, Union

from maas_hub.snapshot_download import snapshot_download

from modelscope.hub.snapshot_download import snapshot_download
from modelscope.models.base import Model
from modelscope.preprocessors import Preprocessor
from modelscope.pydatasets import PyDataset
from modelscope.utils.config import Config
from modelscope.utils.hub import get_model_cache_dir
from modelscope.utils.logger import get_logger
from .outputs import TASK_OUTPUTS
from .util import is_model_name
from .util import is_model, is_official_hub_path

Tensor = Union['torch.Tensor', 'tf.Tensor']
Input = Union[str, tuple, dict, PyDataset, 'PIL.Image.Image', 'numpy.ndarray']
Input = Union[str, tuple, PyDataset, 'PIL.Image.Image', 'numpy.ndarray']
InputModel = Union[str, Model]

output_keys = [
@@ -29,14 +27,10 @@ class Pipeline(ABC):

def initiate_single_model(self, model):
logger.info(f'initiate model from {model}')
# TODO @wenmeng.zwm replace model.startswith('damo/') with get_model
if isinstance(model, str) and model.startswith('damo/'):
if not osp.exists(model):
cache_path = get_model_cache_dir(model)
model = cache_path if osp.exists(
cache_path) else snapshot_download(model)
return Model.from_pretrained(model) if is_model_name(
model) else model
if isinstance(model, str) and is_official_hub_path(model):
model = snapshot_download(
model) if not osp.exists(model) else model
return Model.from_pretrained(model) if is_model(model) else model
elif isinstance(model, Model):
return model
else:
@@ -104,7 +98,7 @@ class Pipeline(ABC):

def _process_single(self, input: Input, *args,
**post_kwargs) -> Dict[str, Any]:
out = self.preprocess(input, **post_kwargs)
out = self.preprocess(input)
out = self.forward(out)
out = self.postprocess(out, **post_kwargs)
self._check_output(out)


+ 49
- 23
modelscope/pipelines/builder.py View File

@@ -1,33 +1,49 @@
# Copyright (c) Alibaba, Inc. and its affiliates.

import os.path as osp
from typing import List, Union

from modelscope.metainfo import Pipelines
from modelscope.models.base import Model
from modelscope.utils.config import Config, ConfigDict
from modelscope.utils.constant import Tasks
from modelscope.utils.constant import ModelFile, Tasks
from modelscope.utils.hub import read_config
from modelscope.utils.registry import Registry, build_from_cfg
from .base import Pipeline
from .util import is_official_hub_path

PIPELINES = Registry('pipelines')

DEFAULT_MODEL_FOR_PIPELINE = {
# TaskName: (pipeline_module_name, model_repo)
Tasks.word_segmentation:
('structbert-chinese-word-segmentation',
(Pipelines.word_segmentation,
'damo/nlp_structbert_word-segmentation_chinese-base'),
Tasks.sentence_similarity:
('sbert-base-chinese-sentence-similarity',
(Pipelines.sentence_similarity,
'damo/nlp_structbert_sentence-similarity_chinese-base'),
Tasks.image_matting: ('image-matting', 'damo/cv_unet_image-matting'),
Tasks.text_classification:
('bert-sentiment-analysis', 'damo/bert-base-sst2'),
Tasks.text_generation: ('palm2.0',
Tasks.nli: (Pipelines.nli, 'damo/nlp_structbert_nli_chinese-base'),
Tasks.sentiment_classification:
(Pipelines.sentiment_classification,
'damo/nlp_structbert_sentiment-classification_chinese-base'),
Tasks.text_classification: ('bert-sentiment-analysis',
'damo/bert-base-sst2'),
Tasks.image_matting: (Pipelines.image_matting,
'damo/cv_unet_image-matting'),
Tasks.text_classification: (Pipelines.sentiment_analysis,
'damo/bert-base-sst2'),
Tasks.text_generation: (Pipelines.text_generation,
'damo/nlp_palm2.0_text-generation_chinese-base'),
Tasks.image_captioning: ('ofa', None),
Tasks.image_captioning: (Pipelines.image_caption,
'damo/ofa_image-caption_coco_large_en'),
Tasks.image_generation:
('person-image-cartoon',
(Pipelines.person_image_cartoon,
'damo/cv_unet_person-image-cartoon_compound-models'),
Tasks.ocr_detection: (Pipelines.ocr_detection,
'damo/cv_resnet18_ocr-detection-line-level_damo'),
Tasks.fill_mask: (Pipelines.fill_mask, 'damo/nlp_veco_fill-mask-large'),
Tasks.action_recognition: (Pipelines.action_recognition,
'damo/cv_TAdaConv_action-recognition'),
}


@@ -84,30 +100,40 @@ def pipeline(task: str = None,
if task is None and pipeline_name is None:
raise ValueError('task or pipeline_name is required')

assert isinstance(model, (type(None), str, Model, list)), \
f'model should be either None, str, List[str], Model, or List[Model], but got {type(model)}'

if pipeline_name is None:
# get default pipeline for this task
if isinstance(model, str) \
or (isinstance(model, list) and isinstance(model[0], str)):

# if is_model_name(model):
if (isinstance(model, str) and model.startswith('damo/')) \
or (isinstance(model, list) and model[0].startswith('damo/')) \
or (isinstance(model, str) and osp.exists(model)):
# TODO @wenmeng.zwm add support when model is a str of modelhub address
# read pipeline info from modelhub configuration file.
pipeline_name, default_model_repo = get_default_pipeline_info(
task)
if is_official_hub_path(model):
# read config file from hub and parse
cfg = read_config(model) if isinstance(
model, str) else read_config(model[0])
assert hasattr(
cfg,
'pipeline'), 'pipeline config is missing from config file.'
pipeline_name = cfg.pipeline.type
else:
# used for test case, when model is str and is not hub path
pipeline_name = get_pipeline_by_model_name(task, model)
elif isinstance(model, Model) or \
(isinstance(model, list) and isinstance(model[0], Model)):
# get pipeline info from Model object
first_model = model[0] if isinstance(model, list) else model
if not hasattr(first_model, 'pipeline'):
# model is instantiated by user, we should parse config again
cfg = read_config(first_model.model_dir)
assert hasattr(
cfg,
'pipeline'), 'pipeline config is missing from config file.'
first_model.pipeline = cfg.pipeline
pipeline_name = first_model.pipeline.type
else:
pipeline_name, default_model_repo = get_default_pipeline_info(task)

if model is None:
model = default_model_repo

assert isinstance(model, (type(None), str, Model, list)), \
f'model should be either None, str, List[str], Model, or List[Model], but got {type(model)}'

cfg = ConfigDict(type=pipeline_name, model=model)

if kwargs:


+ 2
- 0
modelscope/pipelines/cv/__init__.py View File

@@ -1,2 +1,4 @@
from .action_recognition_pipeline import ActionRecognitionPipeline
from .image_cartoon_pipeline import ImageCartoonPipeline
from .image_matting_pipeline import ImageMattingPipeline
from .ocr_detection_pipeline import OCRDetectionPipeline

+ 65
- 0
modelscope/pipelines/cv/action_recognition_pipeline.py View File

@@ -0,0 +1,65 @@
import math
import os.path as osp
from typing import Any, Dict

import cv2
import numpy as np
import PIL
import torch

from modelscope.metainfo import Pipelines
from modelscope.models.cv.action_recognition.models import BaseVideoModel
from modelscope.pipelines.base import Input
from modelscope.preprocessors.video import ReadVideoData
from modelscope.utils.config import Config
from modelscope.utils.constant import ModelFile, Tasks
from modelscope.utils.logger import get_logger
from ..base import Pipeline
from ..builder import PIPELINES

logger = get_logger()


@PIPELINES.register_module(
Tasks.action_recognition, module_name=Pipelines.action_recognition)
class ActionRecognitionPipeline(Pipeline):

def __init__(self, model: str):
super().__init__(model=model)
model_path = osp.join(self.model, ModelFile.TORCH_MODEL_FILE)
logger.info(f'loading model from {model_path}')
config_path = osp.join(self.model, ModelFile.CONFIGURATION)
logger.info(f'loading config from {config_path}')
self.cfg = Config.from_file(config_path)
self.infer_model = BaseVideoModel(cfg=self.cfg).cuda()
self.infer_model.eval()
self.infer_model.load_state_dict(torch.load(model_path)['model_state'])
self.label_mapping = self.cfg.label_mapping
logger.info('load model done')

def preprocess(self, input: Input) -> Dict[str, Any]:
if isinstance(input, str):
video_input_data = ReadVideoData(self.cfg, input).cuda()
else:
raise TypeError(f'input should be a str,'
f' but got {type(input)}')
result = {'video_data': video_input_data}
return result

def forward(self, input: Dict[str, Any]) -> Dict[str, Any]:
pred = self.perform_inference(input['video_data'])
output_label = self.label_mapping[str(pred)]
return {'output_label': output_label}

@torch.no_grad()
def perform_inference(self, data, max_bsz=4):
iter_num = math.ceil(data.size(0) / max_bsz)
preds_list = []
for i in range(iter_num):
preds_list.append(
self.infer_model(data[i * max_bsz:(i + 1) * max_bsz])[0])
pred = torch.cat(preds_list, dim=0)
return pred.mean(dim=0).argmax().item()

def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, Any]:
return inputs

+ 2
- 1
modelscope/pipelines/cv/image_cartoon_pipeline.py View File

@@ -6,6 +6,7 @@ import numpy as np
import PIL
import tensorflow as tf

from modelscope.metainfo import Pipelines
from modelscope.models.cv.cartoon.facelib.facer import FaceAna
from modelscope.models.cv.cartoon.mtcnn_pytorch.src.align_trans import (
get_reference_facial_points, warp_and_crop_face)
@@ -25,7 +26,7 @@ logger = get_logger()


@PIPELINES.register_module(
Tasks.image_generation, module_name='person-image-cartoon')
Tasks.image_generation, module_name=Pipelines.person_image_cartoon)
class ImageCartoonPipeline(Pipeline):

def __init__(self, model: str):


+ 2
- 1
modelscope/pipelines/cv/image_matting_pipeline.py View File

@@ -5,6 +5,7 @@ import cv2
import numpy as np
import PIL

from modelscope.metainfo import Pipelines
from modelscope.pipelines.base import Input
from modelscope.preprocessors import load_image
from modelscope.utils.constant import ModelFile, Tasks
@@ -16,7 +17,7 @@ logger = get_logger()


@PIPELINES.register_module(
Tasks.image_matting, module_name=Tasks.image_matting)
Tasks.image_matting, module_name=Pipelines.image_matting)
class ImageMattingPipeline(Pipeline):

def __init__(self, model: str):


+ 168
- 0
modelscope/pipelines/cv/ocr_detection_pipeline.py View File

@@ -0,0 +1,168 @@
import math
import os
import os.path as osp
import sys
from typing import Any, Dict, List, Tuple, Union

import cv2
import numpy as np
import PIL
import tensorflow as tf
import tf_slim as slim

from modelscope.metainfo import Pipelines
from modelscope.pipelines.base import Input
from modelscope.preprocessors import load_image
from modelscope.utils.constant import ModelFile, Tasks
from modelscope.utils.logger import get_logger
from ..base import Pipeline
from ..builder import PIPELINES
from .ocr_utils import model_resnet_mutex_v4_linewithchar, ops, utils

if tf.__version__ >= '2.0':
tf = tf.compat.v1
tf.compat.v1.disable_eager_execution()

logger = get_logger()

# constant
RBOX_DIM = 5
OFFSET_DIM = 6
WORD_POLYGON_DIM = 8
OFFSET_VARIANCE = [0.1, 0.1, 0.1, 0.1, 0.1, 0.1]

FLAGS = tf.app.flags.FLAGS
tf.app.flags.DEFINE_float('node_threshold', 0.4,
'Confidence threshold for nodes')
tf.app.flags.DEFINE_float('link_threshold', 0.6,
'Confidence threshold for links')


@PIPELINES.register_module(
Tasks.ocr_detection, module_name=Pipelines.ocr_detection)
class OCRDetectionPipeline(Pipeline):

def __init__(self, model: str):
super().__init__(model=model)
model_path = osp.join(
osp.join(self.model, ModelFile.TF_CHECKPOINT_FOLDER),
'checkpoint-80000')

config = tf.ConfigProto(allow_soft_placement=True)
config.gpu_options.allow_growth = True
self._session = tf.Session(config=config)
global_step = tf.get_variable(
'global_step', [],
initializer=tf.constant_initializer(0),
dtype=tf.int64,
trainable=False)
variable_averages = tf.train.ExponentialMovingAverage(
0.997, global_step)
self.input_images = tf.placeholder(
tf.float32, shape=[1, 1024, 1024, 3], name='input_images')
self.output = {}

# detector
detector = model_resnet_mutex_v4_linewithchar.SegLinkDetector()
all_maps = detector.build_model(self.input_images, is_training=False)

# decode local predictions
all_nodes, all_links, all_reg = [], [], []
for i, maps in enumerate(all_maps):
cls_maps, lnk_maps, reg_maps = maps[0], maps[1], maps[2]
reg_maps = tf.multiply(reg_maps, OFFSET_VARIANCE)

cls_prob = tf.nn.softmax(tf.reshape(cls_maps, [-1, 2]))

lnk_prob_pos = tf.nn.softmax(tf.reshape(lnk_maps, [-1, 4])[:, :2])
lnk_prob_mut = tf.nn.softmax(tf.reshape(lnk_maps, [-1, 4])[:, 2:])
lnk_prob = tf.concat([lnk_prob_pos, lnk_prob_mut], axis=1)

all_nodes.append(cls_prob)
all_links.append(lnk_prob)
all_reg.append(reg_maps)

# decode segments and links
image_size = tf.shape(self.input_images)[1:3]
segments, group_indices, segment_counts, _ = ops.decode_segments_links_python(
image_size,
all_nodes,
all_links,
all_reg,
anchor_sizes=list(detector.anchor_sizes))

# combine segments
combined_rboxes, combined_counts = ops.combine_segments_python(
segments, group_indices, segment_counts)
self.output['combined_rboxes'] = combined_rboxes
self.output['combined_counts'] = combined_counts

with self._session.as_default() as sess:
logger.info(f'loading model from {model_path}')
# load model
model_loader = tf.train.Saver(
variable_averages.variables_to_restore())
model_loader.restore(sess, model_path)

def preprocess(self, input: Input) -> Dict[str, Any]:
if isinstance(input, str):
img = np.array(load_image(input))
elif isinstance(input, PIL.Image.Image):
img = np.array(input.convert('RGB'))
elif isinstance(input, np.ndarray):
if len(input.shape) == 2:
img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
img = input[:, :, ::-1] # in rgb order
else:
raise TypeError(f'input should be either str, PIL.Image,'
f' np.array, but got {type(input)}')
h, w, c = img.shape
img_pad = np.zeros((max(h, w), max(h, w), 3), dtype=np.float32)
img_pad[:h, :w, :] = img

resize_size = 1024
img_pad_resize = cv2.resize(img_pad, (resize_size, resize_size))
img_pad_resize = cv2.cvtColor(img_pad_resize, cv2.COLOR_RGB2BGR)
img_pad_resize = img_pad_resize - np.array([123.68, 116.78, 103.94],
dtype=np.float32)

resize_size = tf.stack([resize_size, resize_size])
orig_size = tf.stack([max(h, w), max(h, w)])
self.output['orig_size'] = orig_size
self.output['resize_size'] = resize_size

result = {'img': np.expand_dims(img_pad_resize, axis=0)}
return result

def forward(self, input: Dict[str, Any]) -> Dict[str, Any]:
with self._session.as_default():
feed_dict = {self.input_images: input['img']}
sess_outputs = self._session.run(self.output, feed_dict=feed_dict)
return sess_outputs

def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, Any]:
rboxes = inputs['combined_rboxes'][0]
count = inputs['combined_counts'][0]
rboxes = rboxes[:count, :]

# convert rboxes to polygons and find its coordinates on the original image
orig_h, orig_w = inputs['orig_size']
resize_h, resize_w = inputs['resize_size']
polygons = utils.rboxes_to_polygons(rboxes)
scale_y = float(orig_h) / float(resize_h)
scale_x = float(orig_w) / float(resize_w)

# confine polygons inside image
polygons[:, ::2] = np.maximum(
0, np.minimum(polygons[:, ::2] * scale_x, orig_w - 1))
polygons[:, 1::2] = np.maximum(
0, np.minimum(polygons[:, 1::2] * scale_y, orig_h - 1))
polygons = np.round(polygons).astype(np.int32)

# nms
dt_n9 = [o + [utils.cal_width(o)] for o in polygons.tolist()]
dt_nms = utils.nms_python(dt_n9)
dt_polygons = np.array([o[:8] for o in dt_nms])

result = {'det_polygons': dt_polygons}
return result

+ 0
- 0
modelscope/pipelines/cv/ocr_utils/__init__.py View File


+ 158
- 0
modelscope/pipelines/cv/ocr_utils/model_resnet_mutex_v4_linewithchar.py View File

@@ -0,0 +1,158 @@
import tensorflow as tf
import tf_slim as slim

from . import ops, resnet18_v1, resnet_utils

if tf.__version__ >= '2.0':
tf = tf.compat.v1

# constants
OFFSET_DIM = 6

N_LOCAL_LINKS = 8
N_CROSS_LINKS = 4
N_SEG_CLASSES = 2
N_LNK_CLASSES = 4

POS_LABEL = 1
NEG_LABEL = 0


class SegLinkDetector():

def __init__(self):
self.anchor_sizes = [6., 11.84210526, 23.68421053, 45., 90., 150.]

def _detection_classifier(self,
maps,
ksize,
weight_decay,
cross_links=False,
scope=None):

with tf.variable_scope(scope):
seg_depth = N_SEG_CLASSES
if cross_links:
lnk_depth = N_LNK_CLASSES * (N_LOCAL_LINKS + N_CROSS_LINKS)
else:
lnk_depth = N_LNK_CLASSES * N_LOCAL_LINKS
reg_depth = OFFSET_DIM
map_depth = maps.get_shape()[3]
inter_maps, inter_relu = ops.conv2d(
maps, map_depth, 256, 1, 1, 'SAME', scope='conv_inter')

dir_maps, dir_relu = ops.conv2d(
inter_relu, 256, 2, ksize, 1, 'SAME', scope='conv_dir')
cen_maps, cen_relu = ops.conv2d(
inter_relu, 256, 2, ksize, 1, 'SAME', scope='conv_cen')
pol_maps, pol_relu = ops.conv2d(
inter_relu, 256, 8, ksize, 1, 'SAME', scope='conv_pol')
concat_relu = tf.concat([dir_relu, cen_relu, pol_relu], axis=-1)
_, lnk_embedding = ops.conv_relu(
concat_relu, 12, 256, 1, 1, scope='lnk_embedding')
lnk_maps, lnk_relu = ops.conv2d(
inter_relu + lnk_embedding,
256,
lnk_depth,
ksize,
1,
'SAME',
scope='conv_lnk')

char_seg_maps, char_seg_relu = ops.conv2d(
inter_relu,
256,
seg_depth,
ksize,
1,
'SAME',
scope='conv_char_cls')
char_reg_maps, char_reg_relu = ops.conv2d(
inter_relu,
256,
reg_depth,
ksize,
1,
'SAME',
scope='conv_char_reg')
concat_char_relu = tf.concat([char_seg_relu, char_reg_relu],
axis=-1)
_, char_embedding = ops.conv_relu(
concat_char_relu, 8, 256, 1, 1, scope='conv_char_embedding')
seg_maps, seg_relu = ops.conv2d(
inter_relu + char_embedding,
256,
seg_depth,
ksize,
1,
'SAME',
scope='conv_cls')
reg_maps, reg_relu = ops.conv2d(
inter_relu + char_embedding,
256,
reg_depth,
ksize,
1,
'SAME',
scope='conv_reg')

return seg_relu, lnk_relu, reg_relu

def _build_cnn(self, images, weight_decay, is_training):
with slim.arg_scope(
resnet18_v1.resnet_arg_scope(weight_decay=weight_decay)):
logits, end_points = resnet18_v1.resnet_v1_18(
images, is_training=is_training, scope='resnet_v1_18')

outputs = {
'conv3_3': end_points['pool1'],
'conv4_3': end_points['pool2'],
'fc7': end_points['pool3'],
'conv8_2': end_points['pool4'],
'conv9_2': end_points['pool5'],
'conv10_2': end_points['pool6'],
}
return outputs

def build_model(self, images, is_training=True, scope=None):

weight_decay = 5e-4 # FLAGS.weight_decay
cnn_outputs = self._build_cnn(images, weight_decay, is_training)
det_0 = self._detection_classifier(
cnn_outputs['conv3_3'],
3,
weight_decay,
cross_links=False,
scope='dete_0')
det_1 = self._detection_classifier(
cnn_outputs['conv4_3'],
3,
weight_decay,
cross_links=True,
scope='dete_1')
det_2 = self._detection_classifier(
cnn_outputs['fc7'],
3,
weight_decay,
cross_links=True,
scope='dete_2')
det_3 = self._detection_classifier(
cnn_outputs['conv8_2'],
3,
weight_decay,
cross_links=True,
scope='dete_3')
det_4 = self._detection_classifier(
cnn_outputs['conv9_2'],
3,
weight_decay,
cross_links=True,
scope='dete_4')
det_5 = self._detection_classifier(
cnn_outputs['conv10_2'],
3,
weight_decay,
cross_links=True,
scope='dete_5')
outputs = [det_0, det_1, det_2, det_3, det_4, det_5]
return outputs

+ 1098
- 0
modelscope/pipelines/cv/ocr_utils/ops.py
File diff suppressed because it is too large
View File


+ 432
- 0
modelscope/pipelines/cv/ocr_utils/resnet18_v1.py View File

@@ -0,0 +1,432 @@
"""Contains definitions for the original form of Residual Networks.
The 'v1' residual networks (ResNets) implemented in this module were proposed
by:
[1] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun
Deep Residual Learning for Image Recognition. arXiv:1512.03385
Other variants were introduced in:
[2] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun
Identity Mappings in Deep Residual Networks. arXiv: 1603.05027
The networks defined in this module utilize the bottleneck building block of
[1] with projection shortcuts only for increasing depths. They employ batch
normalization *after* every weight layer. This is the architecture used by
MSRA in the Imagenet and MSCOCO 2016 competition models ResNet-101 and
ResNet-152. See [2; Fig. 1a] for a comparison between the current 'v1'
architecture and the alternative 'v2' architecture of [2] which uses batch
normalization *before* every weight layer in the so-called full pre-activation
units.
Typical use:
from tensorflow.contrib.slim.nets import resnet_v1
ResNet-101 for image classification into 1000 classes:
# inputs has shape [batch, 224, 224, 3]
with slim.arg_scope(resnet_v1.resnet_arg_scope()):
net, end_points = resnet_v1.resnet_v1_101(inputs, 1000, is_training=False)
ResNet-101 for semantic segmentation into 21 classes:
# inputs has shape [batch, 513, 513, 3]
with slim.arg_scope(resnet_v1.resnet_arg_scope()):
net, end_points = resnet_v1.resnet_v1_101(inputs,
21,
is_training=False,
global_pool=False,
output_stride=16)
"""
import tensorflow as tf
import tf_slim as slim

from . import resnet_utils

if tf.__version__ >= '2.0':
tf = tf.compat.v1

resnet_arg_scope = resnet_utils.resnet_arg_scope


@slim.add_arg_scope
def basicblock(inputs,
depth,
depth_bottleneck,
stride,
rate=1,
outputs_collections=None,
scope=None):
"""Bottleneck residual unit variant with BN after convolutions.
This is the original residual unit proposed in [1]. See Fig. 1(a) of [2] for
its definition. Note that we use here the bottleneck variant which has an
extra bottleneck layer.
When putting together two consecutive ResNet blocks that use this unit, one
should use stride = 2 in the last unit of the first block.
Args:
inputs: A tensor of size [batch, height, width, channels].
depth: The depth of the ResNet unit output.
depth_bottleneck: The depth of the bottleneck layers.
stride: The ResNet unit's stride. Determines the amount of downsampling of
the units output compared to its input.
rate: An integer, rate for atrous convolution.
outputs_collections: Collection to add the ResNet unit output.
scope: Optional variable_scope.
Returns:
The ResNet unit's output.
"""
with tf.variable_scope(scope, 'bottleneck_v1', [inputs]) as sc:
depth_in = slim.utils.last_dimension(inputs.get_shape(), min_rank=4)
if depth == depth_in:
shortcut = resnet_utils.subsample(inputs, stride, 'shortcut')
else:
shortcut = slim.conv2d(
inputs,
depth, [1, 1],
stride=stride,
activation_fn=None,
scope='shortcut')

residual = resnet_utils.conv2d_same(
inputs, depth, 3, stride, rate=rate, scope='conv1')
residual = resnet_utils.conv2d_same(
residual, depth, 3, 1, rate=rate, scope='conv2')

output = tf.nn.relu(residual + shortcut)

return slim.utils.collect_named_outputs(outputs_collections,
sc.original_name_scope, output)


@slim.add_arg_scope
def bottleneck(inputs,
depth,
depth_bottleneck,
stride,
rate=1,
outputs_collections=None,
scope=None):
"""Bottleneck residual unit variant with BN after convolutions.
This is the original residual unit proposed in [1]. See Fig. 1(a) of [2] for
its definition. Note that we use here the bottleneck variant which has an
extra bottleneck layer.
When putting together two consecutive ResNet blocks that use this unit, one
should use stride = 2 in the last unit of the first block.
Args:
inputs: A tensor of size [batch, height, width, channels].
depth: The depth of the ResNet unit output.
depth_bottleneck: The depth of the bottleneck layers.
stride: The ResNet unit's stride. Determines the amount of downsampling of
the units output compared to its input.
rate: An integer, rate for atrous convolution.
outputs_collections: Collection to add the ResNet unit output.
scope: Optional variable_scope.
Returns:
The ResNet unit's output.
"""
with tf.variable_scope(scope, 'bottleneck_v1', [inputs]) as sc:
depth_in = slim.utils.last_dimension(inputs.get_shape(), min_rank=4)
if depth == depth_in:
shortcut = resnet_utils.subsample(inputs, stride, 'shortcut')
else:
shortcut = slim.conv2d(
inputs,
depth, [1, 1],
stride=stride,
activation_fn=None,
scope='shortcut')

residual = slim.conv2d(
inputs, depth_bottleneck, [1, 1], stride=1, scope='conv1')
residual = resnet_utils.conv2d_same(
residual, depth_bottleneck, 3, stride, rate=rate, scope='conv2')
residual = slim.conv2d(
residual,
depth, [1, 1],
stride=1,
activation_fn=None,
scope='conv3')

output = tf.nn.relu(shortcut + residual)

return slim.utils.collect_named_outputs(outputs_collections,
sc.original_name_scope, output)


def resnet_v1(inputs,
blocks,
num_classes=None,
is_training=True,
global_pool=True,
output_stride=None,
include_root_block=True,
spatial_squeeze=True,
reuse=None,
scope=None):
"""Generator for v1 ResNet models.
This function generates a family of ResNet v1 models. See the resnet_v1_*()
methods for specific model instantiations, obtained by selecting different
block instantiations that produce ResNets of various depths.
Training for image classification on Imagenet is usually done with [224, 224]
inputs, resulting in [7, 7] feature maps at the output of the last ResNet
block for the ResNets defined in [1] that have nominal stride equal to 32.
However, for dense prediction tasks we advise that one uses inputs with
spatial dimensions that are multiples of 32 plus 1, e.g., [321, 321]. In
this case the feature maps at the ResNet output will have spatial shape
[(height - 1) / output_stride + 1, (width - 1) / output_stride + 1]
and corners exactly aligned with the input image corners, which greatly
facilitates alignment of the features to the image. Using as input [225, 225]
images results in [8, 8] feature maps at the output of the last ResNet block.
For dense prediction tasks, the ResNet needs to run in fully-convolutional
(FCN) mode and global_pool needs to be set to False. The ResNets in [1, 2] all
have nominal stride equal to 32 and a good choice in FCN mode is to use
output_stride=16 in order to increase the density of the computed features at
small computational and memory overhead, cf. http://arxiv.org/abs/1606.00915.
Args:
inputs: A tensor of size [batch, height_in, width_in, channels].
blocks: A list of length equal to the number of ResNet blocks. Each element
is a resnet_utils.Block object describing the units in the block.
num_classes: Number of predicted classes for classification tasks. If None
we return the features before the logit layer.
is_training: whether is training or not.
global_pool: If True, we perform global average pooling before computing the
logits. Set to True for image classification, False for dense prediction.
output_stride: If None, then the output will be computed at the nominal
network stride. If output_stride is not None, it specifies the requested
ratio of input to output spatial resolution.
include_root_block: If True, include the initial convolution followed by
max-pooling, if False excludes it.
spatial_squeeze: if True, logits is of shape [B, C], if false logits is
of shape [B, 1, 1, C], where B is batch_size and C is number of classes.
reuse: whether or not the network and its variables should be reused. To be
able to reuse 'scope' must be given.
scope: Optional variable_scope.
Returns:
net: A rank-4 tensor of size [batch, height_out, width_out, channels_out].
If global_pool is False, then height_out and width_out are reduced by a
factor of output_stride compared to the respective height_in and width_in,
else both height_out and width_out equal one. If num_classes is None, then
net is the output of the last ResNet block, potentially after global
average pooling. If num_classes is not None, net contains the pre-softmax
activations.
end_points: A dictionary from components of the network to the corresponding
activation.
Raises:
ValueError: If the target output_stride is not valid.
"""
with tf.variable_scope(scope, 'resnet_v1', [inputs], reuse=reuse) as sc:
end_points_collection = sc.name + '_end_points'
with slim.arg_scope(
[slim.conv2d, bottleneck, resnet_utils.stack_blocks_dense],
outputs_collections=end_points_collection):
with slim.arg_scope([slim.batch_norm], is_training=is_training):
net = inputs
if include_root_block:
if output_stride is not None:
if output_stride % 4 != 0:
raise ValueError(
'The output_stride needs to be a multiple of 4.'
)
output_stride /= 4
net = resnet_utils.conv2d_same(
net, 64, 7, stride=2, scope='conv1')
net = tf.pad(net, [[0, 0], [1, 1], [1, 1], [0, 0]])
net = slim.max_pool2d(net, [3, 3], stride=2, scope='pool1')

net = slim.utils.collect_named_outputs(
end_points_collection, 'pool2', net)

net = resnet_utils.stack_blocks_dense(net, blocks,
output_stride)

end_points = slim.utils.convert_collection_to_dict(
end_points_collection)

end_points['pool1'] = end_points['resnet_v1_18/block2/unit_2']
end_points['pool2'] = end_points['resnet_v1_18/block3/unit_2']
end_points['pool3'] = end_points['resnet_v1_18/block4/unit_2']
end_points['pool4'] = end_points['resnet_v1_18/block5/unit_2']
end_points['pool5'] = end_points['resnet_v1_18/block6/unit_2']
end_points['pool6'] = net

return net, end_points


resnet_v1.default_image_size = 224


def resnet_v1_18(inputs,
num_classes=None,
is_training=True,
global_pool=True,
output_stride=None,
spatial_squeeze=True,
reuse=None,
scope='resnet_v1_18'):
"""ResNet-18 model of [1]. See resnet_v1() for arg and return description."""
blocks = [
resnet_utils.Block('block1', basicblock,
[(64, 64, 1)] + [(64, 64, 1)]),
resnet_utils.Block('block2', basicblock,
[(128, 128, 1)] + [(128, 128, 1)]),
resnet_utils.Block('block3', basicblock,
[(256, 256, 2)] + [(256, 256, 1)]),
resnet_utils.Block('block4', basicblock,
[(512, 512, 2)] + [(512, 512, 1)]),
resnet_utils.Block('block5', basicblock,
[(256, 256, 2)] + [(256, 256, 1)]),
resnet_utils.Block('block6', basicblock,
[(256, 256, 2)] + [(256, 256, 1)]),
resnet_utils.Block('block7', basicblock,
[(256, 256, 2)] + [(256, 256, 1)]),
]
return resnet_v1(
inputs,
blocks,
num_classes,
is_training,
global_pool=global_pool,
output_stride=output_stride,
include_root_block=True,
spatial_squeeze=spatial_squeeze,
reuse=reuse,
scope=scope)


resnet_v1_18.default_image_size = resnet_v1.default_image_size


def resnet_v1_50(inputs,
num_classes=None,
is_training=True,
global_pool=True,
output_stride=None,
spatial_squeeze=True,
reuse=None,
scope='resnet_v1_50'):
"""ResNet-50 model of [1]. See resnet_v1() for arg and return description."""
blocks = [
resnet_utils.Block('block1', bottleneck,
[(256, 64, 1)] * 2 + [(256, 64, 2)]),
resnet_utils.Block('block2', bottleneck,
[(512, 128, 1)] * 3 + [(512, 128, 2)]),
resnet_utils.Block('block3', bottleneck,
[(1024, 256, 1)] * 5 + [(1024, 256, 2)]),
resnet_utils.Block('block4', bottleneck,
[(2048, 512, 1)] * 3 + [(2048, 512, 2)]),
resnet_utils.Block('block5', bottleneck,
[(1024, 256, 1)] * 2 + [(1024, 256, 2)]),
resnet_utils.Block('block6', bottleneck, [(1024, 256, 1)] * 2),
]
return resnet_v1(
inputs,
blocks,
num_classes,
is_training,
global_pool=global_pool,
output_stride=output_stride,
include_root_block=True,
spatial_squeeze=spatial_squeeze,
reuse=reuse,
scope=scope)


resnet_v1_50.default_image_size = resnet_v1.default_image_size


def resnet_v1_101(inputs,
num_classes=None,
is_training=True,
global_pool=True,
output_stride=None,
spatial_squeeze=True,
reuse=None,
scope='resnet_v1_101'):
"""ResNet-101 model of [1]. See resnet_v1() for arg and return description."""
blocks = [
resnet_utils.Block('block1', bottleneck,
[(256, 64, 1)] * 2 + [(256, 64, 2)]),
resnet_utils.Block('block2', bottleneck,
[(512, 128, 1)] * 3 + [(512, 128, 2)]),
resnet_utils.Block('block3', bottleneck,
[(1024, 256, 1)] * 22 + [(1024, 256, 2)]),
resnet_utils.Block('block4', bottleneck, [(2048, 512, 1)] * 3)
]
return resnet_v1(
inputs,
blocks,
num_classes,
is_training,
global_pool=global_pool,
output_stride=output_stride,
include_root_block=True,
spatial_squeeze=spatial_squeeze,
reuse=reuse,
scope=scope)


resnet_v1_101.default_image_size = resnet_v1.default_image_size


def resnet_v1_152(inputs,
num_classes=None,
is_training=True,
global_pool=True,
output_stride=None,
spatial_squeeze=True,
reuse=None,
scope='resnet_v1_152'):
"""ResNet-152 model of [1]. See resnet_v1() for arg and return description."""
blocks = [
resnet_utils.Block('block1', bottleneck,
[(256, 64, 1)] * 2 + [(256, 64, 2)]),
resnet_utils.Block('block2', bottleneck,
[(512, 128, 1)] * 7 + [(512, 128, 2)]),
resnet_utils.Block('block3', bottleneck,
[(1024, 256, 1)] * 35 + [(1024, 256, 2)]),
resnet_utils.Block('block4', bottleneck, [(2048, 512, 1)] * 3)
]
return resnet_v1(
inputs,
blocks,
num_classes,
is_training,
global_pool=global_pool,
output_stride=output_stride,
include_root_block=True,
spatial_squeeze=spatial_squeeze,
reuse=reuse,
scope=scope)


resnet_v1_152.default_image_size = resnet_v1.default_image_size


def resnet_v1_200(inputs,
num_classes=None,
is_training=True,
global_pool=True,
output_stride=None,
spatial_squeeze=True,
reuse=None,
scope='resnet_v1_200'):
"""ResNet-200 model of [2]. See resnet_v1() for arg and return description."""
blocks = [
resnet_utils.Block('block1', bottleneck,
[(256, 64, 1)] * 2 + [(256, 64, 2)]),
resnet_utils.Block('block2', bottleneck,
[(512, 128, 1)] * 23 + [(512, 128, 2)]),
resnet_utils.Block('block3', bottleneck,
[(1024, 256, 1)] * 35 + [(1024, 256, 2)]),
resnet_utils.Block('block4', bottleneck, [(2048, 512, 1)] * 3)
]
return resnet_v1(
inputs,
blocks,
num_classes,
is_training,
global_pool=global_pool,
output_stride=output_stride,
include_root_block=True,
spatial_squeeze=spatial_squeeze,
reuse=reuse,
scope=scope)


resnet_v1_200.default_image_size = resnet_v1.default_image_size

if __name__ == '__main__':
input = tf.placeholder(tf.float32, shape=(None, 224, 224, 3), name='input')
with slim.arg_scope(resnet_arg_scope()) as sc:
logits = resnet_v1_50(input)

+ 231
- 0
modelscope/pipelines/cv/ocr_utils/resnet_utils.py View File

@@ -0,0 +1,231 @@
"""Contains building blocks for various versions of Residual Networks.
Residual networks (ResNets) were proposed in:
Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun
Deep Residual Learning for Image Recognition. arXiv:1512.03385, 2015
More variants were introduced in:
Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun
Identity Mappings in Deep Residual Networks. arXiv: 1603.05027, 2016
We can obtain different ResNet variants by changing the network depth, width,
and form of residual unit. This module implements the infrastructure for
building them. Concrete ResNet units and full ResNet networks are implemented in
the accompanying resnet_v1.py and resnet_v2.py modules.
Compared to https://github.com/KaimingHe/deep-residual-networks, in the current
implementation we subsample the output activations in the last residual unit of
each block, instead of subsampling the input activations in the first residual
unit of each block. The two implementations give identical results but our
implementation is more memory efficient.
"""

import collections

import tensorflow as tf
import tf_slim as slim

if tf.__version__ >= '2.0':
tf = tf.compat.v1


class Block(collections.namedtuple('Block', ['scope', 'unit_fn', 'args'])):
"""A named tuple describing a ResNet block.
Its parts are:
scope: The scope of the `Block`.
unit_fn: The ResNet unit function which takes as input a `Tensor` and
returns another `Tensor` with the output of the ResNet unit.
args: A list of length equal to the number of units in the `Block`. The list
contains one (depth, depth_bottleneck, stride) tuple for each unit in the
block to serve as argument to unit_fn.
"""


def subsample(inputs, factor, scope=None):
"""Subsamples the input along the spatial dimensions.
Args:
inputs: A `Tensor` of size [batch, height_in, width_in, channels].
factor: The subsampling factor.
scope: Optional variable_scope.
Returns:
output: A `Tensor` of size [batch, height_out, width_out, channels] with the
input, either intact (if factor == 1) or subsampled (if factor > 1).
"""
if factor == 1:
return inputs
else:
return slim.max_pool2d(inputs, [1, 1], stride=factor, scope=scope)


def conv2d_same(inputs, num_outputs, kernel_size, stride, rate=1, scope=None):
"""Strided 2-D convolution with 'SAME' padding.
When stride > 1, then we do explicit zero-padding, followed by conv2d with
'VALID' padding.
Note that
net = conv2d_same(inputs, num_outputs, 3, stride=stride)
is equivalent to
net = slim.conv2d(inputs, num_outputs, 3, stride=1, padding='SAME')
net = subsample(net, factor=stride)
whereas
net = slim.conv2d(inputs, num_outputs, 3, stride=stride, padding='SAME')
is different when the input's height or width is even, which is why we add the
current function. For more details, see ResnetUtilsTest.testConv2DSameEven().
Args:
inputs: A 4-D tensor of size [batch, height_in, width_in, channels].
num_outputs: An integer, the number of output filters.
kernel_size: An int with the kernel_size of the filters.
stride: An integer, the output stride.
rate: An integer, rate for atrous convolution.
scope: Scope.
Returns:
output: A 4-D tensor of size [batch, height_out, width_out, channels] with
the convolution output.
"""
if stride == 1:
return slim.conv2d(
inputs,
num_outputs,
kernel_size,
stride=1,
rate=rate,
padding='SAME',
scope=scope)
else:
kernel_size_effective = kernel_size + (kernel_size - 1) * (rate - 1)
pad_total = kernel_size_effective - 1
pad_beg = pad_total // 2
pad_end = pad_total - pad_beg
inputs = tf.pad(
inputs, [[0, 0], [pad_beg, pad_end], [pad_beg, pad_end], [0, 0]])
return slim.conv2d(
inputs,
num_outputs,
kernel_size,
stride=stride,
rate=rate,
padding='VALID',
scope=scope)


@slim.add_arg_scope
def stack_blocks_dense(net,
blocks,
output_stride=None,
outputs_collections=None):
"""Stacks ResNet `Blocks` and controls output feature density.
First, this function creates scopes for the ResNet in the form of
'block_name/unit_1', 'block_name/unit_2', etc.
Second, this function allows the user to explicitly control the ResNet
output_stride, which is the ratio of the input to output spatial resolution.
This is useful for dense prediction tasks such as semantic segmentation or
object detection.
Most ResNets consist of 4 ResNet blocks and subsample the activations by a
factor of 2 when transitioning between consecutive ResNet blocks. This results
to a nominal ResNet output_stride equal to 8. If we set the output_stride to
half the nominal network stride (e.g., output_stride=4), then we compute
responses twice.
Control of the output feature density is implemented by atrous convolution.
Args:
net: A `Tensor` of size [batch, height, width, channels].
blocks: A list of length equal to the number of ResNet `Blocks`. Each
element is a ResNet `Block` object describing the units in the `Block`.
output_stride: If `None`, then the output will be computed at the nominal
network stride. If output_stride is not `None`, it specifies the requested
ratio of input to output spatial resolution, which needs to be equal to
the product of unit strides from the start up to some level of the ResNet.
For example, if the ResNet employs units with strides 1, 2, 1, 3, 4, 1,
then valid values for the output_stride are 1, 2, 6, 24 or None (which
is equivalent to output_stride=24).
outputs_collections: Collection to add the ResNet block outputs.
Returns:
net: Output tensor with stride equal to the specified output_stride.
Raises:
ValueError: If the target output_stride is not valid.
"""
# The current_stride variable keeps track of the effective stride of the
# activations. This allows us to invoke atrous convolution whenever applying
# the next residual unit would result in the activations having stride larger
# than the target output_stride.
current_stride = 1

# The atrous convolution rate parameter.
rate = 1

for block in blocks:
with tf.variable_scope(block.scope, 'block', [net]):
for i, unit in enumerate(block.args):
if output_stride is not None and current_stride > output_stride:
raise ValueError(
'The target output_stride cannot be reached.')

with tf.variable_scope(
'unit_%d' % (i + 1), values=[net]) as sc:
unit_depth, unit_depth_bottleneck, unit_stride = unit
# If we have reached the target output_stride, then we need to employ
# atrous convolution with stride=1 and multiply the atrous rate by the
# current unit's stride for use in subsequent layers.
if output_stride is not None and current_stride == output_stride:
net = block.unit_fn(
net,
depth=unit_depth,
depth_bottleneck=unit_depth_bottleneck,
stride=1,
rate=rate)
rate *= unit_stride

else:
net = block.unit_fn(
net,
depth=unit_depth,
depth_bottleneck=unit_depth_bottleneck,
stride=unit_stride,
rate=1)
current_stride *= unit_stride
net = slim.utils.collect_named_outputs(
outputs_collections, sc.name, net)

if output_stride is not None and current_stride != output_stride:
raise ValueError('The target output_stride cannot be reached.')

return net


def resnet_arg_scope(weight_decay=0.0001,
batch_norm_decay=0.997,
batch_norm_epsilon=1e-5,
batch_norm_scale=True):
"""Defines the default ResNet arg scope.
TODO(gpapan): The batch-normalization related default values above are
appropriate for use in conjunction with the reference ResNet models
released at https://github.com/KaimingHe/deep-residual-networks. When
training ResNets from scratch, they might need to be tuned.
Args:
weight_decay: The weight decay to use for regularizing the model.
batch_norm_decay: The moving average decay when estimating layer activation
statistics in batch normalization.
batch_norm_epsilon: Small constant to prevent division by zero when
normalizing activations by their variance in batch normalization.
batch_norm_scale: If True, uses an explicit `gamma` multiplier to scale the
activations in the batch normalization layer.
Returns:
An `arg_scope` to use for the resnet models.
"""
batch_norm_params = {
'decay': batch_norm_decay,
'epsilon': batch_norm_epsilon,
'scale': batch_norm_scale,
'updates_collections': tf.GraphKeys.UPDATE_OPS,
}

with slim.arg_scope(
[slim.conv2d],
weights_regularizer=slim.l2_regularizer(weight_decay),
weights_initializer=slim.variance_scaling_initializer(),
activation_fn=tf.nn.relu,
normalizer_fn=slim.batch_norm,
normalizer_params=batch_norm_params):
with slim.arg_scope([slim.batch_norm], **batch_norm_params):
# The following implies padding='SAME' for pool1, which makes feature
# alignment easier for dense prediction tasks. This is also used in
# https://github.com/facebook/fb.resnet.torch. However the accompanying
# code of 'Deep Residual Learning for Image Recognition' uses
# padding='VALID' for pool1. You can switch to that choice by setting
# slim.arg_scope([slim.max_pool2d], padding='VALID').
with slim.arg_scope([slim.max_pool2d], padding='VALID') as arg_sc:
return arg_sc

+ 108
- 0
modelscope/pipelines/cv/ocr_utils/utils.py View File

@@ -0,0 +1,108 @@
import cv2
import numpy as np


def rboxes_to_polygons(rboxes):
"""
Convert rboxes to polygons
ARGS
`rboxes`: [n, 5]
RETURN
`polygons`: [n, 8]
"""

theta = rboxes[:, 4:5]
cxcy = rboxes[:, :2]
half_w = rboxes[:, 2:3] / 2.
half_h = rboxes[:, 3:4] / 2.
v1 = np.hstack([np.cos(theta) * half_w, np.sin(theta) * half_w])
v2 = np.hstack([-np.sin(theta) * half_h, np.cos(theta) * half_h])
p1 = cxcy - v1 - v2
p2 = cxcy + v1 - v2
p3 = cxcy + v1 + v2
p4 = cxcy - v1 + v2
polygons = np.hstack([p1, p2, p3, p4])
return polygons


def cal_width(box):
pd1 = point_dist(box[0], box[1], box[2], box[3])
pd2 = point_dist(box[4], box[5], box[6], box[7])
return (pd1 + pd2) / 2


def point_dist(x1, y1, x2, y2):
return np.sqrt((x2 - x1) * (x2 - x1) + (y2 - y1) * (y2 - y1))


def draw_polygons(img, polygons):
for p in polygons.tolist():
p = [int(o) for o in p]
cv2.line(img, (p[0], p[1]), (p[2], p[3]), (0, 255, 0), 1)
cv2.line(img, (p[2], p[3]), (p[4], p[5]), (0, 255, 0), 1)
cv2.line(img, (p[4], p[5]), (p[6], p[7]), (0, 255, 0), 1)
cv2.line(img, (p[6], p[7]), (p[0], p[1]), (0, 255, 0), 1)
return img


def nms_python(boxes):
boxes = sorted(boxes, key=lambda x: -x[8])
nms_flag = [True] * len(boxes)
for i, a in enumerate(boxes):
if not nms_flag[i]:
continue
else:
for j, b in enumerate(boxes):
if not j > i:
continue
if not nms_flag[j]:
continue
score_a = a[8]
score_b = b[8]
rbox_a = polygon2rbox(a[:8])
rbox_b = polygon2rbox(b[:8])
if point_in_rbox(rbox_a[:2], rbox_b) or point_in_rbox(
rbox_b[:2], rbox_a):
if score_a > score_b:
nms_flag[j] = False
boxes_nms = []
for i, box in enumerate(boxes):
if nms_flag[i]:
boxes_nms.append(box)
return boxes_nms


def point_in_rbox(c, rbox):
cx0, cy0 = c[0], c[1]
cx1, cy1 = rbox[0], rbox[1]
w, h = rbox[2], rbox[3]
theta = rbox[4]
dist_x = np.abs((cx1 - cx0) * np.cos(theta) + (cy1 - cy0) * np.sin(theta))
dist_y = np.abs(-(cx1 - cx0) * np.sin(theta) + (cy1 - cy0) * np.cos(theta))
return ((dist_x < w / 2.0) and (dist_y < h / 2.0))


def polygon2rbox(polygon):
x1, x2, x3, x4 = polygon[0], polygon[2], polygon[4], polygon[6]
y1, y2, y3, y4 = polygon[1], polygon[3], polygon[5], polygon[7]
c_x = (x1 + x2 + x3 + x4) / 4
c_y = (y1 + y2 + y3 + y4) / 4
w1 = point_dist(x1, y1, x2, y2)
w2 = point_dist(x3, y3, x4, y4)
h1 = point_line_dist(c_x, c_y, x1, y1, x2, y2)
h2 = point_line_dist(c_x, c_y, x3, y3, x4, y4)
h = h1 + h2
w = (w1 + w2) / 2
theta1 = np.arctan2(y2 - y1, x2 - x1)
theta2 = np.arctan2(y3 - y4, x3 - x4)
theta = (theta1 + theta2) / 2.0
return [c_x, c_y, w, h, theta]


def point_line_dist(px, py, x1, y1, x2, y2):
eps = 1e-6
dx = x2 - x1
dy = y2 - y1
div = np.sqrt(dx * dx + dy * dy) + eps
dist = np.abs(px * dy - py * dx + x2 * y1 - y2 * x1) / div
return dist

+ 1
- 1
modelscope/pipelines/multi_modal/__init__.py View File

@@ -1 +1 @@
from .image_caption_pipeline import ImageCaptionPipeline
from .image_captioning_pipeline import ImageCaptionPipeline

+ 35
- 0
modelscope/pipelines/multi_modal/image_captioning_pipeline.py View File

@@ -0,0 +1,35 @@
from typing import Any, Dict, Union

from modelscope.metainfo import Pipelines
from modelscope.preprocessors import OfaImageCaptionPreprocessor, Preprocessor
from modelscope.utils.constant import Tasks
from modelscope.utils.logger import get_logger
from ..base import Model, Pipeline
from ..builder import PIPELINES

logger = get_logger()


@PIPELINES.register_module(
Tasks.image_captioning, module_name=Pipelines.image_caption)
class ImageCaptionPipeline(Pipeline):

def __init__(self,
model: Union[Model, str],
preprocessor: [Preprocessor] = None,
**kwargs):
super().__init__()
assert isinstance(model, str) or isinstance(model, Model), \
'model must be a single str or OfaForImageCaptioning'
if isinstance(model, str):
pipe_model = Model.from_pretrained(model)
elif isinstance(model, Model):
pipe_model = model
else:
raise NotImplementedError
if preprocessor is None and pipe_model:
preprocessor = OfaImageCaptionPreprocessor(model_dir=model)
super().__init__(model=pipe_model, preprocessor=preprocessor, **kwargs)

def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, Any]:
return inputs

+ 6
- 3
modelscope/pipelines/nlp/__init__.py View File

@@ -1,7 +1,10 @@
from .dialog_intent_prediction_pipeline import * # noqa F403
from .dialog_modeling_pipeline import * # noqa F403
from .dialog_state_tracking import * # noqa F403
from .fill_mask_pipeline import * # noqa F403
from .nli_pipeline import * # noqa F403
from .sentence_similarity_pipeline import * # noqa F403
from .sentiment_classification_pipeline import * # noqa F403
from .sequence_classification_pipeline import * # noqa F403
from .space.dialog_intent_prediction_pipeline import * # noqa F403
from .space.dialog_modeling_pipeline import * # noqa F403
from .space.dialog_state_tracking import * # noqa F403
from .text_generation_pipeline import * # noqa F403
from .word_segmentation_pipeline import * # noqa F403

modelscope/pipelines/nlp/space/dialog_intent_prediction_pipeline.py → modelscope/pipelines/nlp/dialog_intent_prediction_pipeline.py View File

@@ -1,16 +1,18 @@
from typing import Any, Dict, Optional
from typing import Any, Dict

from modelscope.models.nlp import DialogIntentModel
from modelscope.preprocessors import DialogIntentPredictionPreprocessor
from modelscope.utils.constant import Tasks
from ...base import Input, Pipeline
from ...builder import PIPELINES
from ...metainfo import Pipelines
from ...models.nlp import DialogIntentModel
from ...preprocessors import DialogIntentPredictionPreprocessor
from ...utils.constant import Tasks
from ..base import Pipeline
from ..builder import PIPELINES

__all__ = ['DialogIntentPredictionPipeline']


@PIPELINES.register_module(
Tasks.dialog_intent_prediction, module_name=r'space-intent')
Tasks.dialog_intent_prediction,
module_name=Pipelines.dialog_intent_prediction)
class DialogIntentPredictionPipeline(Pipeline):

def __init__(self, model: DialogIntentModel,

modelscope/pipelines/nlp/space/dialog_modeling_pipeline.py → modelscope/pipelines/nlp/dialog_modeling_pipeline.py View File

@@ -3,14 +3,15 @@ from typing import Any, Dict, Optional
from modelscope.models.nlp import DialogModelingModel
from modelscope.preprocessors import DialogModelingPreprocessor
from modelscope.utils.constant import Tasks
from ...base import Pipeline, Tensor
from ...builder import PIPELINES
from ...metainfo import Pipelines
from ..base import Pipeline, Tensor
from ..builder import PIPELINES

__all__ = ['DialogModelingPipeline']


@PIPELINES.register_module(
Tasks.dialog_modeling, module_name=r'space-modeling')
Tasks.dialog_modeling, module_name=Pipelines.dialog_modeling)
class DialogModelingPipeline(Pipeline):

def __init__(self, model: DialogModelingModel,

+ 45
- 0
modelscope/pipelines/nlp/dialog_state_tracking.py View File

@@ -0,0 +1,45 @@
from typing import Any, Dict

from ...metainfo import Pipelines
from ...models.nlp import DialogStateTrackingModel
from ...preprocessors import DialogStateTrackingPreprocessor
from ...utils.constant import Tasks
from ..base import Pipeline
from ..builder import PIPELINES

__all__ = ['DialogStateTrackingPipeline']


@PIPELINES.register_module(
Tasks.dialog_state_tracking, module_name=Pipelines.dialog_state_tracking)
class DialogStateTrackingPipeline(Pipeline):

def __init__(self, model: DialogStateTrackingModel,
preprocessor: DialogStateTrackingPreprocessor, **kwargs):
"""use `model` and `preprocessor` to create a nlp text classification pipeline for prediction

Args:
model (SequenceClassificationModel): a model instance
preprocessor (SequenceClassificationPreprocessor): a preprocessor instance
"""

super().__init__(model=model, preprocessor=preprocessor, **kwargs)
self.model = model
# self.tokenizer = preprocessor.tokenizer

def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, str]:
"""process the prediction results

Args:
inputs (Dict[str, Any]): _description_

Returns:
Dict[str, str]: the prediction results
"""
import numpy as np
pred = inputs['pred']
pos = np.where(pred == np.max(pred))

result = {'pred': pred, 'label': pos[0]}

return result

+ 107
- 0
modelscope/pipelines/nlp/fill_mask_pipeline.py View File

@@ -0,0 +1,107 @@
from typing import Any, Dict, Optional, Union

import torch

from ...metainfo import Pipelines
from ...models import Model
from ...models.nlp.masked_language_model import MaskedLanguageModelBase
from ...preprocessors import FillMaskPreprocessor
from ...utils.constant import Tasks
from ..base import Pipeline, Tensor
from ..builder import PIPELINES

__all__ = ['FillMaskPipeline']


@PIPELINES.register_module(Tasks.fill_mask, module_name=Pipelines.fill_mask)
class FillMaskPipeline(Pipeline):

def __init__(self,
model: Union[MaskedLanguageModelBase, str],
preprocessor: Optional[FillMaskPreprocessor] = None,
first_sequence='sentense',
**kwargs):
"""use `model` and `preprocessor` to create a nlp fill mask pipeline for prediction

Args:
model (MaskedLanguageModelBase): a model instance
preprocessor (FillMaskPreprocessor): a preprocessor instance
"""
fill_mask_model = model if isinstance(
model, MaskedLanguageModelBase) else Model.from_pretrained(model)
assert fill_mask_model.config is not None

if preprocessor is None:
preprocessor = FillMaskPreprocessor(
fill_mask_model.model_dir,
first_sequence=first_sequence,
second_sequence=None)
fill_mask_model.eval()
super().__init__(
model=fill_mask_model, preprocessor=preprocessor, **kwargs)

self.preprocessor = preprocessor
self.tokenizer = preprocessor.tokenizer
self.mask_id = {'veco': 250001, 'sbert': 103}

self.rep_map = {
'sbert': {
'[unused0]': '',
'[PAD]': '',
'[unused1]': '',
r' +': ' ',
'[SEP]': '',
'[unused2]': '',
'[CLS]': '',
'[UNK]': ''
},
'veco': {
r' +': ' ',
'<mask>': '<q>',
'<pad>': '',
'<s>': '',
'</s>': '',
'<unk>': ' '
}
}

def forward(self, inputs: Dict[str, Any],
**forward_params) -> Dict[str, Any]:
with torch.no_grad():
return super().forward(inputs, **forward_params)

def postprocess(self, inputs: Dict[str, Tensor]) -> Dict[str, Tensor]:
"""process the prediction results

Args:
inputs (Dict[str, Any]): _description_

Returns:
Dict[str, str]: the prediction results
"""
import numpy as np
logits = inputs['logits'].detach().numpy()
input_ids = inputs['input_ids'].detach().numpy()
pred_ids = np.argmax(logits, axis=-1)
model_type = self.model.config.model_type
rst_ids = np.where(input_ids == self.mask_id[model_type], pred_ids,
input_ids)

def rep_tokens(string, rep_map):
for k, v in rep_map.items():
string = string.replace(k, v)
return string.strip()

pred_strings = []
for ids in rst_ids: # batch
# TODO vocab size is not stable

if self.model.config.vocab_size == 21128: # zh bert
pred_string = self.tokenizer.convert_ids_to_tokens(ids)
pred_string = ''.join(pred_string)
else:
pred_string = self.tokenizer.decode(ids)
pred_string = rep_tokens(pred_string, self.rep_map[model_type])
pred_strings.append(pred_string)

return {'text': pred_strings}

+ 72
- 0
modelscope/pipelines/nlp/nli_pipeline.py View File

@@ -0,0 +1,72 @@
import uuid
from typing import Any, Dict, Union

import numpy as np
import torch

from ...metainfo import Pipelines
from ...models import Model
from ...models.nlp import SbertForNLI
from ...preprocessors import NLIPreprocessor
from ...utils.constant import Tasks
from ..base import Pipeline
from ..builder import PIPELINES

__all__ = ['NLIPipeline']


@PIPELINES.register_module(Tasks.nli, module_name=Pipelines.nli)
class NLIPipeline(Pipeline):

def __init__(self,
model: Union[SbertForNLI, str],
preprocessor: NLIPreprocessor = None,
first_sequence='first_sequence',
second_sequence='second_sequence',
**kwargs):
"""use `model` and `preprocessor` to create a nlp text classification pipeline for prediction

Args:
model (SbertForNLI): a model instance
preprocessor (NLIPreprocessor): a preprocessor instance
"""
assert isinstance(model, str) or isinstance(model, SbertForNLI), \
'model must be a single str or SbertForNLI'
model = model if isinstance(
model, SbertForNLI) else Model.from_pretrained(model)
if preprocessor is None:
preprocessor = NLIPreprocessor(
model.model_dir,
first_sequence=first_sequence,
second_sequence=second_sequence)
model.eval()
super().__init__(model=model, preprocessor=preprocessor, **kwargs)
assert len(model.id2label) > 0

def forward(self, inputs: Dict[str, Any],
**forward_params) -> Dict[str, Any]:
with torch.no_grad():
return super().forward(inputs, **forward_params)

def postprocess(self,
inputs: Dict[str, Any],
topk: int = 5) -> Dict[str, str]:
"""process the prediction results

Args:
inputs (Dict[str, Any]): _description_

Returns:
Dict[str, str]: the prediction results
"""

probs = inputs['probabilities'][0]
num_classes = probs.shape[0]
topk = min(topk, num_classes)
top_indices = np.argpartition(probs, -topk)[-topk:]
cls_ids = top_indices[np.argsort(probs[top_indices])]
probs = probs[cls_ids].tolist()

cls_names = [self.model.id2label[cid] for cid in cls_ids]

return {'scores': probs, 'labels': cls_names}

+ 19
- 9
modelscope/pipelines/nlp/sentence_similarity_pipeline.py View File

@@ -1,11 +1,13 @@
from typing import Any, Dict, Union

import numpy as np
import torch

from modelscope.models.nlp import SbertForSentenceSimilarity
from modelscope.preprocessors import SequenceClassificationPreprocessor
from modelscope.utils.constant import Tasks
from ...metainfo import Pipelines
from ...models import Model
from ...models.nlp import SbertForSentenceSimilarity
from ...preprocessors import SequenceClassificationPreprocessor
from ...utils.constant import Tasks
from ..base import Input, Pipeline
from ..builder import PIPELINES

@@ -13,13 +15,14 @@ __all__ = ['SentenceSimilarityPipeline']


@PIPELINES.register_module(
Tasks.sentence_similarity,
module_name=r'sbert-base-chinese-sentence-similarity')
Tasks.sentence_similarity, module_name=Pipelines.sentence_similarity)
class SentenceSimilarityPipeline(Pipeline):

def __init__(self,
model: Union[SbertForSentenceSimilarity, str],
model: Union[Model, str],
preprocessor: SequenceClassificationPreprocessor = None,
first_sequence='first_sequence',
second_sequence='second_sequence',
**kwargs):
"""use `model` and `preprocessor` to create a nlp sentence similarity pipeline for prediction

@@ -35,14 +38,21 @@ class SentenceSimilarityPipeline(Pipeline):
if preprocessor is None:
preprocessor = SequenceClassificationPreprocessor(
sc_model.model_dir,
first_sequence='first_sequence',
second_sequence='second_sequence')
first_sequence=first_sequence,
second_sequence=second_sequence)
sc_model.eval()
super().__init__(model=sc_model, preprocessor=preprocessor, **kwargs)

assert hasattr(self.model, 'id2label'), \
'id2label map should be initalizaed in init function.'

def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, str]:
def forward(self, inputs: Dict[str, Any],
**forward_params) -> Dict[str, Any]:
with torch.no_grad():
return super().forward(inputs, **forward_params)

def postprocess(self, inputs: Dict[str, Any],
**postprocess_params) -> Dict[str, str]:
"""process the prediction results

Args:


+ 77
- 0
modelscope/pipelines/nlp/sentiment_classification_pipeline.py View File

@@ -0,0 +1,77 @@
import os
import uuid
from typing import Any, Dict, Union

import json
import numpy as np
import torch

from ...metainfo import Pipelines
from ...models import Model
from ...models.nlp import SbertForSentimentClassification
from ...preprocessors import SentimentClassificationPreprocessor
from ...utils.constant import Tasks
from ..base import Input, Pipeline
from ..builder import PIPELINES

__all__ = ['SentimentClassificationPipeline']


@PIPELINES.register_module(
Tasks.sentiment_classification,
module_name=Pipelines.sentiment_classification)
class SentimentClassificationPipeline(Pipeline):

def __init__(self,
model: Union[SbertForSentimentClassification, str],
preprocessor: SentimentClassificationPreprocessor = None,
first_sequence='first_sequence',
second_sequence='second_sequence',
**kwargs):
"""use `model` and `preprocessor` to create a nlp text classification pipeline for prediction

Args:
model (SbertForSentimentClassification): a model instance
preprocessor (SentimentClassificationPreprocessor): a preprocessor instance
"""
assert isinstance(model, str) or isinstance(model, SbertForSentimentClassification), \
'model must be a single str or SbertForSentimentClassification'
model = model if isinstance(
model,
SbertForSentimentClassification) else Model.from_pretrained(model)
if preprocessor is None:
preprocessor = SentimentClassificationPreprocessor(
model.model_dir,
first_sequence=first_sequence,
second_sequence=second_sequence)
model.eval()
super().__init__(model=model, preprocessor=preprocessor, **kwargs)
assert len(model.id2label) > 0

def forward(self, inputs: Dict[str, Any],
**forward_params) -> Dict[str, Any]:
with torch.no_grad():
return super().forward(inputs, **forward_params)

def postprocess(self,
inputs: Dict[str, Any],
topk: int = 5) -> Dict[str, str]:
"""process the prediction results

Args:
inputs (Dict[str, Any]): _description_

Returns:
Dict[str, str]: the prediction results
"""

probs = inputs['probabilities'][0]
num_classes = probs.shape[0]
topk = min(topk, num_classes)
top_indices = np.argpartition(probs, -topk)[-topk:]
cls_ids = top_indices[np.argsort(probs[top_indices])]
probs = probs[cls_ids].tolist()

cls_names = [self.model.id2label[cid] for cid in cls_ids]

return {'scores': probs, 'labels': cls_names}

+ 2
- 1
modelscope/pipelines/nlp/sequence_classification_pipeline.py View File

@@ -2,6 +2,7 @@ from typing import Any, Dict, Union

import numpy as np

from modelscope.metainfo import Pipelines
from modelscope.models.nlp import BertForSequenceClassification
from modelscope.preprocessors import SequenceClassificationPreprocessor
from modelscope.utils.constant import Tasks
@@ -13,7 +14,7 @@ __all__ = ['SequenceClassificationPipeline']


@PIPELINES.register_module(
Tasks.text_classification, module_name=r'bert-sentiment-analysis')
Tasks.text_classification, module_name=Pipelines.sentiment_analysis)
class SequenceClassificationPipeline(Pipeline):

def __init__(self,


+ 0
- 46
modelscope/pipelines/nlp/space/dialog_state_tracking.py View File

@@ -1,46 +0,0 @@
from typing import Any, Dict, Optional

from modelscope.models.nlp import DialogModelingModel
from modelscope.preprocessors import DialogModelingPreprocessor
from modelscope.utils.constant import Tasks
from ...base import Pipeline, Tensor
from ...builder import PIPELINES

__all__ = ['DialogStateTrackingPipeline']


@PIPELINES.register_module(
Tasks.dialog_state_tracking, module_name=r'space-dst')
class DialogStateTrackingPipeline(Pipeline):

def __init__(self, model: DialogModelingModel,
preprocessor: DialogModelingPreprocessor, **kwargs):
"""use `model` and `preprocessor` to create a nlp text classification pipeline for prediction

Args:
model (SequenceClassificationModel): a model instance
preprocessor (SequenceClassificationPreprocessor): a preprocessor instance
"""

super().__init__(model=model, preprocessor=preprocessor, **kwargs)
self.model = model
self.preprocessor = preprocessor

def postprocess(self, inputs: Dict[str, Tensor]) -> Dict[str, str]:
"""process the prediction results

Args:
inputs (Dict[str, Any]): _description_

Returns:
Dict[str, str]: the prediction results
"""
sys_rsp = self.preprocessor.text_field.tokenizer.convert_ids_to_tokens(
inputs['resp'])
assert len(sys_rsp) > 2
sys_rsp = sys_rsp[1:len(sys_rsp) - 1]
# sys_rsp = self.preprocessor.text_field.tokenizer.

inputs['sys'] = sys_rsp

return inputs

+ 18
- 7
modelscope/pipelines/nlp/text_generation_pipeline.py View File

@@ -1,16 +1,20 @@
from typing import Dict, Optional, Union
from typing import Any, Dict, Optional, Union

from modelscope.models import Model
from modelscope.models.nlp import PalmForTextGeneration
from modelscope.preprocessors import TextGenerationPreprocessor
from modelscope.utils.constant import Tasks
import torch

from ...metainfo import Pipelines
from ...models import Model
from ...models.nlp import PalmForTextGeneration
from ...preprocessors import TextGenerationPreprocessor
from ...utils.constant import Tasks
from ..base import Pipeline, Tensor
from ..builder import PIPELINES

__all__ = ['TextGenerationPipeline']


@PIPELINES.register_module(Tasks.text_generation, module_name=r'palm2.0')
@PIPELINES.register_module(
Tasks.text_generation, module_name=Pipelines.text_generation)
class TextGenerationPipeline(Pipeline):

def __init__(self,
@@ -31,10 +35,17 @@ class TextGenerationPipeline(Pipeline):
model.tokenizer,
first_sequence='sentence',
second_sequence=None)
model.eval()
super().__init__(model=model, preprocessor=preprocessor, **kwargs)
self.tokenizer = model.tokenizer

def postprocess(self, inputs: Dict[str, Tensor]) -> Dict[str, str]:
def forward(self, inputs: Dict[str, Any],
**forward_params) -> Dict[str, Any]:
with torch.no_grad():
return super().forward(inputs, **forward_params)

def postprocess(self, inputs: Dict[str, Tensor],
**postprocess_params) -> Dict[str, str]:
"""process the prediction results

Args:


+ 19
- 9
modelscope/pipelines/nlp/word_segmentation_pipeline.py View File

@@ -1,9 +1,12 @@
from typing import Any, Dict, Optional, Union

from modelscope.models import Model
from modelscope.models.nlp import StructBertForTokenClassification
from modelscope.preprocessors import TokenClassifcationPreprocessor
from modelscope.utils.constant import Tasks
import torch

from ...metainfo import Pipelines
from ...models import Model
from ...models.nlp import SbertForTokenClassification
from ...preprocessors import TokenClassifcationPreprocessor
from ...utils.constant import Tasks
from ..base import Pipeline, Tensor
from ..builder import PIPELINES

@@ -11,12 +14,11 @@ __all__ = ['WordSegmentationPipeline']


@PIPELINES.register_module(
Tasks.word_segmentation,
module_name=r'structbert-chinese-word-segmentation')
Tasks.word_segmentation, module_name=Pipelines.word_segmentation)
class WordSegmentationPipeline(Pipeline):

def __init__(self,
model: Union[StructBertForTokenClassification, str],
model: Union[SbertForTokenClassification, str],
preprocessor: Optional[TokenClassifcationPreprocessor] = None,
**kwargs):
"""use `model` and `preprocessor` to create a nlp word segmentation pipeline for prediction
@@ -27,15 +29,23 @@ class WordSegmentationPipeline(Pipeline):
"""
model = model if isinstance(
model,
StructBertForTokenClassification) else Model.from_pretrained(model)
SbertForTokenClassification) else Model.from_pretrained(model)
if preprocessor is None:
preprocessor = TokenClassifcationPreprocessor(model.model_dir)
model.eval()
super().__init__(model=model, preprocessor=preprocessor, **kwargs)
self.tokenizer = preprocessor.tokenizer
self.config = model.config
assert len(self.config.id2label) > 0
self.id2label = self.config.id2label

def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, str]:
def forward(self, inputs: Dict[str, Any],
**forward_params) -> Dict[str, Any]:
with torch.no_grad():
return super().forward(inputs, **forward_params)

def postprocess(self, inputs: Dict[str, Any],
**postprocess_params) -> Dict[str, str]:
"""process the prediction results

Args:


+ 33
- 0
modelscope/pipelines/outputs.py View File

@@ -45,6 +45,12 @@ TASK_OUTPUTS = {
Tasks.image_matting: ['output_png'],
Tasks.image_generation: ['output_png'],

# action recognition result for single video
# {
# "output_label": "abseiling"
# }
Tasks.action_recognition: ['output_label'],

# pose estimation result for single sample
# {
# "poses": np.array with shape [num_pose, num_keypoint, 3],
@@ -54,6 +60,13 @@ TASK_OUTPUTS = {
# }
Tasks.pose_estimation: ['poses', 'boxes'],

# ocr detection result for single sample
# {
# "det_polygons": np.array with shape [num_text, 8], each box is
# [x1, y1, x2, y2, x3, y3, x4, y4]
# }
Tasks.ocr_detection: ['det_polygons'],

# ============ nlp tasks ===================

# text classification result for single sample
@@ -69,6 +82,12 @@ TASK_OUTPUTS = {
# }
Tasks.text_generation: ['text'],

# fill mask result for single sample
# {
# "text": "this is the text which masks filled by model."
# }
Tasks.fill_mask: ['text'],

# word segmentation result for single sample
# {
# "output": "今天 天气 不错 , 适合 出去 游玩"
@@ -82,6 +101,20 @@ TASK_OUTPUTS = {
# }
Tasks.sentence_similarity: ['scores', 'labels'],

# sentiment classification result for single sample
# {
# "labels": ["happy", "sad", "calm", "angry"],
# "scores": [0.9, 0.1, 0.05, 0.05]
# }
Tasks.sentiment_classification: ['scores', 'labels'],

# nli result for single sample
# {
# "labels": ["happy", "sad", "calm", "angry"],
# "scores": [0.9, 0.1, 0.05, 0.05]
# }
Tasks.nli: ['scores', 'labels'],

# ============ audio tasks ===================

# audio processed for single file in PCM format


+ 44
- 12
modelscope/pipelines/util.py View File

@@ -2,8 +2,8 @@
import os.path as osp
from typing import List, Union

from maas_hub.file_download import model_file_download
from modelscope.hub.api import HubApi
from modelscope.hub.file_download import model_file_download
from modelscope.utils.config import Config
from modelscope.utils.constant import ModelFile
from modelscope.utils.logger import get_logger
@@ -20,31 +20,63 @@ def is_config_has_model(cfg_file):
return False


def is_model_name(model: Union[str, List]):
""" whether model is a valid modelhub path
def is_official_hub_path(path: Union[str, List]):
""" Whether path is a official hub name or a valid local
path to official hub directory.
"""

def is_official_hub_impl(path):
if osp.exists(path):
cfg_file = osp.join(path, ModelFile.CONFIGURATION)
return osp.exists(cfg_file)
else:
try:
_ = HubApi().get_model(path)
return True
except Exception:
return False

if isinstance(path, str):
return is_official_hub_impl(path)
else:
results = [is_official_hub_impl(m) for m in path]
all_true = all(results)
any_true = any(results)
if any_true and not all_true:
raise ValueError(
f'some model are hub address, some are not, model list: {path}'
)

return all_true


def is_model(path: Union[str, List]):
""" whether path is a valid modelhub path and containing model config
"""

def is_model_name_impl(model):
if osp.exists(model):
cfg_file = osp.join(model, ModelFile.CONFIGURATION)
def is_modelhub_path_impl(path):
if osp.exists(path):
cfg_file = osp.join(path, ModelFile.CONFIGURATION)
if osp.exists(cfg_file):
return is_config_has_model(cfg_file)
else:
return False
else:
try:
cfg_file = model_file_download(model, ModelFile.CONFIGURATION)
cfg_file = model_file_download(path, ModelFile.CONFIGURATION)
return is_config_has_model(cfg_file)
except Exception:
return False

if isinstance(model, str):
return is_model_name_impl(model)
if isinstance(path, str):
return is_modelhub_path_impl(path)
else:
results = [is_model_name_impl(m) for m in model]
results = [is_modelhub_path_impl(m) for m in path]
all_true = all(results)
any_true = any(results)
if any_true and not all_true:
raise ValueError('some model are hub address, some are not')
raise ValueError(
f'some models are hub address, some are not, model list: {path}'
)

return all_true

+ 4
- 4
modelscope/preprocessors/__init__.py View File

@@ -1,12 +1,12 @@
# Copyright (c) Alibaba, Inc. and its affiliates.

from .audio import LinearAECAndFbank
# from .audio import LinearAECAndFbank
from .base import Preprocessor
from .builder import PREPROCESSORS, build_preprocessor
# from .builder import PREPROCESSORS, build_preprocessor
from .common import Compose
from .image import LoadImage, load_image
from .nlp import * # noqa F403
from .space.dialog_intent_prediction_preprocessor import * # noqa F403
from .space.dialog_modeling_preprocessor import * # noqa F403
from .space.dialog_state_tracking_preprocessor import * # noqa F403
from .text_to_speech import * # noqa F403

# from .text_to_speech import * # noqa F403

+ 2
- 1
modelscope/preprocessors/image.py View File

@@ -5,11 +5,12 @@ from typing import Dict, Union
from PIL import Image, ImageOps

from modelscope.fileio import File
from modelscope.metainfo import Preprocessors
from modelscope.utils.constant import Fields
from .builder import PREPROCESSORS


@PREPROCESSORS.register_module(Fields.cv)
@PREPROCESSORS.register_module(Fields.cv, Preprocessors.load_image)
class LoadImage:
"""Load an image from file or url.
Added or updated keys are "filename", "img", "img_shape",


modelscope/pipelines/multi_modal/image_caption_pipeline.py → modelscope/preprocessors/multi_modal.py View File

@@ -1,32 +1,48 @@
from typing import Any, Dict
# Copyright (c) Alibaba, Inc. and its affiliates.
import os.path as osp
from typing import Any, Dict, Union

import numpy as np
import torch
from PIL import Image

from modelscope.pipelines.base import Input
from modelscope.preprocessors import load_image
from modelscope.utils.constant import Tasks
from modelscope.utils.logger import get_logger
from ..base import Pipeline
from ..builder import PIPELINES
from modelscope.hub.snapshot_download import snapshot_download
from modelscope.metainfo import Preprocessors
from modelscope.utils.constant import Fields, ModelFile
from modelscope.utils.type_assert import type_assert
from .base import Preprocessor
from .builder import PREPROCESSORS
from .image import load_image

logger = get_logger()
__all__ = [
'OfaImageCaptionPreprocessor',
]


@PIPELINES.register_module(Tasks.image_captioning, module_name='ofa')
class ImageCaptionPipeline(Pipeline):
# TODO: refine using modelhub
def __init__(self, model: str, bpe_dir: str):
super().__init__()
# turn on cuda if GPU is available
@PREPROCESSORS.register_module(
Fields.multi_modal, module_name=Preprocessors.ofa_image_caption)
class OfaImageCaptionPreprocessor(Preprocessor):

def __init__(self, model_dir: str, *args, **kwargs):
"""preprocess the data via the vocab.txt from the `model_dir` path

Args:
model_dir (str): model path
"""
super().__init__(*args, **kwargs)

if osp.exists(model_dir):
local_model_dir = model_dir
else:
local_model_dir = snapshot_download(model_dir)
local_model = osp.join(local_model_dir, ModelFile.TORCH_MODEL_FILE)
bpe_dir = local_model_dir

from fairseq import checkpoint_utils, tasks, utils
from ofa.tasks.mm_tasks import CaptionTask

tasks.register_task('caption', CaptionTask)
use_cuda = False
# use fp16 only when GPU is available
use_fp16 = False

overrides = {
'bpe_dir': bpe_dir,
'eval_cider': False,
@@ -35,21 +51,9 @@ class ImageCaptionPipeline(Pipeline):
'no_repeat_ngram_size': 3,
'seed': 7
}
models, cfg, task = checkpoint_utils.load_model_ensemble_and_task(
utils.split_paths(model), arg_overrides=overrides)

# Move models to GPU
for model in models:
model.eval()
if use_cuda:
model.cuda()
if use_fp16:
model.half()
model.prepare_for_inference_(cfg)
self.models = models
# Initialize generator
self.generator = task.build_generator(models, cfg.generation)

model, cfg, task = checkpoint_utils.load_model_ensemble_and_task(
utils.split_paths(local_model), arg_overrides=overrides)
del model
# Initialize transform
from torchvision import transforms
mean = [0.5, 0.5, 0.5]
@@ -69,7 +73,8 @@ class ImageCaptionPipeline(Pipeline):
self.eos_item = torch.LongTensor([task.src_dict.eos()])
self.pad_idx = task.src_dict.pad()

def preprocess(self, input: Input) -> Dict[str, Any]:
@type_assert(object, (str, tuple, Image.Image))
def __call__(self, data: Union[str, tuple]) -> Dict[str, Any]:

def encode_text(text, length=None, append_bos=False, append_eos=False):
s = self.task.tgt_dict.encode_line(
@@ -84,11 +89,11 @@ class ImageCaptionPipeline(Pipeline):
s = torch.cat([s, self.eos_item])
return s

if isinstance(input, Image.Image):
patch_image = self.patch_resize_transform(input).unsqueeze(0)
if isinstance(data, Image.Image):
patch_image = self.patch_resize_transform(data).unsqueeze(0)
else:
patch_image = self.patch_resize_transform(
load_image(input)).unsqueeze(0)
load_image(data)).unsqueeze(0)
patch_mask = torch.tensor([True])
text = 'what does the image describe?'
src_text = encode_text(
@@ -105,17 +110,3 @@ class ImageCaptionPipeline(Pipeline):
}
}
return sample

def forward(self, input: Dict[str, Any]) -> Dict[str, Any]:
from ofa.utils.eval_utils import eval_caption

results, _ = eval_caption(self.task, self.generator, self.models,
input)
return {
'image_id': results[0]['image_id'],
'caption': results[0]['caption']
}

def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, Any]:
# What should we do here ?
return inputs

+ 223
- 21
modelscope/preprocessors/nlp.py View File

@@ -5,14 +5,17 @@ from typing import Any, Dict, Union

from transformers import AutoTokenizer

from modelscope.utils.constant import Fields, InputFields
from modelscope.utils.type_assert import type_assert
from ..metainfo import Models, Preprocessors
from ..utils.constant import Fields, InputFields
from ..utils.type_assert import type_assert
from .base import Preprocessor
from .builder import PREPROCESSORS

__all__ = [
'Tokenize', 'SequenceClassificationPreprocessor',
'TextGenerationPreprocessor', 'TokenClassifcationPreprocessor'
'TextGenerationPreprocessor', 'TokenClassifcationPreprocessor',
'NLIPreprocessor', 'SentimentClassificationPreprocessor',
'FillMaskPreprocessor'
]


@@ -31,7 +34,141 @@ class Tokenize(Preprocessor):


@PREPROCESSORS.register_module(
Fields.nlp, module_name=r'bert-sequence-classification')
Fields.nlp, module_name=Preprocessors.nli_tokenizer)
class NLIPreprocessor(Preprocessor):

def __init__(self, model_dir: str, *args, **kwargs):
"""preprocess the data via the vocab.txt from the `model_dir` path

Args:
model_dir (str): model path
"""

super().__init__(*args, **kwargs)

from sofa import SbertTokenizer
self.model_dir: str = model_dir
self.first_sequence: str = kwargs.pop('first_sequence',
'first_sequence')
self.second_sequence = kwargs.pop('second_sequence', 'second_sequence')
self.sequence_length = kwargs.pop('sequence_length', 128)

self.tokenizer = SbertTokenizer.from_pretrained(self.model_dir)

@type_assert(object, tuple)
def __call__(self, data: tuple) -> Dict[str, Any]:
"""process the raw input data

Args:
data (tuple): [sentence1, sentence2]
sentence1 (str): a sentence
Example:
'you are so handsome.'
sentence2 (str): a sentence
Example:
'you are so beautiful.'
Returns:
Dict[str, Any]: the preprocessed data
"""
sentence1, sentence2 = data
new_data = {
self.first_sequence: sentence1,
self.second_sequence: sentence2
}
# preprocess the data for the model input

rst = {
'id': [],
'input_ids': [],
'attention_mask': [],
'token_type_ids': []
}

max_seq_length = self.sequence_length

text_a = new_data[self.first_sequence]
text_b = new_data[self.second_sequence]
feature = self.tokenizer(
text_a,
text_b,
padding=False,
truncation=True,
max_length=max_seq_length)

rst['id'].append(new_data.get('id', str(uuid.uuid4())))
rst['input_ids'].append(feature['input_ids'])
rst['attention_mask'].append(feature['attention_mask'])
rst['token_type_ids'].append(feature['token_type_ids'])

return rst


@PREPROCESSORS.register_module(
Fields.nlp, module_name=Preprocessors.sen_cls_tokenizer)
class SentimentClassificationPreprocessor(Preprocessor):

def __init__(self, model_dir: str, *args, **kwargs):
"""preprocess the data via the vocab.txt from the `model_dir` path

Args:
model_dir (str): model path
"""

super().__init__(*args, **kwargs)

from sofa import SbertTokenizer
self.model_dir: str = model_dir
self.first_sequence: str = kwargs.pop('first_sequence',
'first_sequence')
self.second_sequence = kwargs.pop('second_sequence', 'second_sequence')
self.sequence_length = kwargs.pop('sequence_length', 128)

self.tokenizer = SbertTokenizer.from_pretrained(self.model_dir)

@type_assert(object, str)
def __call__(self, data: str) -> Dict[str, Any]:
"""process the raw input data

Args:
data (str): a sentence
Example:
'you are so handsome.'
Returns:
Dict[str, Any]: the preprocessed data
"""

new_data = {self.first_sequence: data}
# preprocess the data for the model input

rst = {
'id': [],
'input_ids': [],
'attention_mask': [],
'token_type_ids': []
}

max_seq_length = self.sequence_length

text_a = new_data[self.first_sequence]

text_b = new_data.get(self.second_sequence, None)
feature = self.tokenizer(
text_a,
text_b,
padding='max_length',
truncation=True,
max_length=max_seq_length)

rst['id'].append(new_data.get('id', str(uuid.uuid4())))
rst['input_ids'].append(feature['input_ids'])
rst['attention_mask'].append(feature['attention_mask'])
rst['token_type_ids'].append(feature['token_type_ids'])

return rst


@PREPROCESSORS.register_module(
Fields.nlp, module_name=Preprocessors.bert_seq_cls_tokenizer)
class SequenceClassificationPreprocessor(Preprocessor):

def __init__(self, model_dir: str, *args, **kwargs):
@@ -53,12 +190,12 @@ class SequenceClassificationPreprocessor(Preprocessor):
self.tokenizer = AutoTokenizer.from_pretrained(self.model_dir)
print(f'this is the tokenzier {self.tokenizer}')

@type_assert(object, (str, tuple))
def __call__(self, data: Union[str, tuple]) -> Dict[str, Any]:
@type_assert(object, (str, tuple, Dict))
def __call__(self, data: Union[str, tuple, Dict]) -> Dict[str, Any]:
"""process the raw input data

Args:
data (str or tuple):
data (str or tuple, Dict):
sentence1 (str): a sentence
Example:
'you are so handsome.'
@@ -70,22 +207,31 @@ class SequenceClassificationPreprocessor(Preprocessor):
sentence2 (str): a sentence
Example:
'you are so beautiful.'
or
{field1: field_value1, field2: field_value2}
field1 (str): field name, default 'first_sequence'
field_value1 (str): a sentence
Example:
'you are so handsome.'

field2 (str): field name, default 'second_sequence'
field_value2 (str): a sentence
Example:
'you are so beautiful.'

Returns:
Dict[str, Any]: the preprocessed data
"""

if not isinstance(data, tuple):
data = (
data,
None,
)

sentence1, sentence2 = data
new_data = {
self.first_sequence: sentence1,
self.second_sequence: sentence2
}
if isinstance(data, str):
new_data = {self.first_sequence: data}
elif isinstance(data, tuple):
sentence1, sentence2 = data
new_data = {
self.first_sequence: sentence1,
self.second_sequence: sentence2
}
else:
new_data = data

# preprocess the data for the model input

@@ -115,7 +261,8 @@ class SequenceClassificationPreprocessor(Preprocessor):
return rst


@PREPROCESSORS.register_module(Fields.nlp, module_name=r'palm2.0')
@PREPROCESSORS.register_module(
Fields.nlp, module_name=Preprocessors.palm_text_gen_tokenizer)
class TextGenerationPreprocessor(Preprocessor):

def __init__(self, model_dir: str, tokenizer, *args, **kwargs):
@@ -166,12 +313,66 @@ class TextGenerationPreprocessor(Preprocessor):

rst['input_ids'].append(feature['input_ids'])
rst['attention_mask'].append(feature['attention_mask'])
return {k: torch.tensor(v) for k, v in rst.items()}


@PREPROCESSORS.register_module(Fields.nlp)
class FillMaskPreprocessor(Preprocessor):

def __init__(self, model_dir: str, *args, **kwargs):
"""preprocess the data via the vocab.txt from the `model_dir` path

Args:
model_dir (str): model path
"""
super().__init__(*args, **kwargs)
from sofa.utils.backend import AutoTokenizer
self.model_dir = model_dir
self.first_sequence: str = kwargs.pop('first_sequence',
'first_sequence')
self.sequence_length = kwargs.pop('sequence_length', 128)

self.tokenizer = AutoTokenizer.from_pretrained(
model_dir, use_fast=False)

@type_assert(object, str)
def __call__(self, data: str) -> Dict[str, Any]:
"""process the raw input data

Args:
data (str): a sentence
Example:
'you are so handsome.'

Returns:
Dict[str, Any]: the preprocessed data
"""
import torch

new_data = {self.first_sequence: data}
# preprocess the data for the model input

rst = {'input_ids': [], 'attention_mask': [], 'token_type_ids': []}

max_seq_length = self.sequence_length

text_a = new_data[self.first_sequence]
feature = self.tokenizer(
text_a,
padding='max_length',
truncation=True,
max_length=max_seq_length,
return_token_type_ids=True)

rst['input_ids'].append(feature['input_ids'])
rst['attention_mask'].append(feature['attention_mask'])
rst['token_type_ids'].append(feature['token_type_ids'])

return {k: torch.tensor(v) for k, v in rst.items()}


@PREPROCESSORS.register_module(
Fields.nlp, module_name=r'bert-token-classification')
Fields.nlp, module_name=Preprocessors.token_cls_tokenizer)
class TokenClassifcationPreprocessor(Preprocessor):

def __init__(self, model_dir: str, *args, **kwargs):
@@ -199,6 +400,7 @@ class TokenClassifcationPreprocessor(Preprocessor):
Returns:
Dict[str, Any]: the preprocessed data
"""

# preprocess the data for the model input

text = data.replace(' ', '').strip()


+ 4
- 5
modelscope/preprocessors/space/dialog_intent_prediction_preprocessor.py View File

@@ -3,13 +3,12 @@
import os
from typing import Any, Dict

from modelscope.preprocessors.space.fields.intent_field import \
IntentBPETextField
from modelscope.utils.config import Config
from modelscope.utils.constant import Fields
from modelscope.utils.type_assert import type_assert
from ...utils.config import Config
from ...utils.constant import Fields
from ...utils.type_assert import type_assert
from ..base import Preprocessor
from ..builder import PREPROCESSORS
from .fields.intent_field import IntentBPETextField

__all__ = ['DialogIntentPredictionPreprocessor']



+ 6
- 8
modelscope/preprocessors/space/dialog_modeling_preprocessor.py View File

@@ -1,16 +1,14 @@
# Copyright (c) Alibaba, Inc. and its affiliates.

import os
import uuid
from typing import Any, Dict, Union

from modelscope.preprocessors.space.fields.gen_field import \
MultiWOZBPETextField
from modelscope.utils.config import Config
from modelscope.utils.constant import Fields, InputFields
from modelscope.utils.type_assert import type_assert
from typing import Any, Dict

from ...utils.config import Config
from ...utils.constant import Fields
from ...utils.type_assert import type_assert
from ..base import Preprocessor
from ..builder import PREPROCESSORS
from .fields.gen_field import MultiWOZBPETextField

__all__ = ['DialogModelingPreprocessor']



+ 31
- 29
modelscope/preprocessors/space/fields/dst_processors.py View File

@@ -154,14 +154,16 @@ utter3 = {
'User-2':
'I am looking for an expensive indian restaurant in the area of centre.',
'System-2':
'Might I recommend Saffron Brasserie? That is an expensive Indian restaurant in the center of town. I can book a table for you, if you like.',
'Might I recommend Saffron Brasserie? That is an expensive Indian restaurant '
'in the center of town. I can book a table for you, if you like.',
'Dialog_Act-2': {
'Restaurant-Recommend': [['area', 'center of town'],
['food', 'Indian'],
['name', 'Saffron Brasserie'],
['pricerange', 'expensive']]
},
'User-3': 'Sure thing, please book for 6 people at 19:30 on Saturday.'
'User-3':
'Sure thing, please book for 6 people at 19:30 on Saturday.'
}

history_states3 = [{}, {
@@ -346,7 +348,6 @@ history_states3 = [{}, {


class DSTProcessor(object):

ACTS_DICT = {
'taxi-depart': 'taxi-departure',
'taxi-dest': 'taxi-destination',
@@ -380,7 +381,8 @@ class DSTProcessor(object):

def _convert_inputs_to_utterances(self, inputs: dict,
history_states: list):
"""This method is to generate the utterances with user, sys, dialog_acts and metadata, while metadata is from the history_states or the output from the inference pipline"""
"""This method is to generate the utterances with user, sys, dialog_acts and metadata,
while metadata is from the history_states or the output from the inference pipline"""

utterances = []
user_inputs = []
@@ -427,8 +429,8 @@ class DSTProcessor(object):
if isinstance(item, dict):
for a in item:
aa = a.lower().split('-')
if aa[1] == 'inform' or aa[1] == 'recommend' or aa[
1] == 'select' or aa[1] == 'book':
if aa[1] == 'inform' or aa[1] == 'recommend' or \
aa[1] == 'select' or aa[1] == 'book':
for i in item[a]:
s = i[0].lower()
v = i[1].lower().strip()
@@ -443,7 +445,7 @@ class DSTProcessor(object):
if key not in s_dict:
s_dict[key] = list([v])
# ... Option 2: Keep last informed value
#s_dict[key] = list([v])
# s_dict[key] = list([v])

return s_dict

@@ -454,26 +456,26 @@ class multiwoz22Processor(DSTProcessor):
super().__init__()

def normalize_time(self, text):
text = re.sub('(\d{1})(a\.?m\.?|p\.?m\.?)', r'\1 \2',
text = re.sub(r'(\d{1})(a\.?m\.?|p\.?m\.?)', r'\1 \2',
text) # am/pm without space
text = re.sub('(^| )(\d{1,2}) (a\.?m\.?|p\.?m\.?)', r'\1\2:00 \3',
text = re.sub(r'(^| )(\d{1,2}) (a\.?m\.?|p\.?m\.?)', r'\1\2:00 \3',
text) # am/pm short to long form
text = re.sub(
'(^| )(at|from|by|until|after) ?(\d{1,2}) ?(\d{2})([^0-9]|$)',
r'(^| )(at|from|by|until|after) ?(\d{1,2}) ?(\d{2})([^0-9]|$)',
r'\1\2 \3:\4\5', text) # Missing separator
text = re.sub('(^| )(\d{2})[;.,](\d{2})', r'\1\2:\3',
text = re.sub(r'(^| )(\d{2})[;.,](\d{2})', r'\1\2:\3',
text) # Wrong separator
text = re.sub('(^| )(at|from|by|until|after) ?(\d{1,2})([;., ]|$)',
text = re.sub(r'(^| )(at|from|by|until|after) ?(\d{1,2})([;., ]|$)',
r'\1\2 \3:00\4', text) # normalize simple full hour time
text = re.sub('(^| )(\d{1}:\d{2})', r'\g<1>0\2',
text = re.sub(r'(^| )(\d{1}:\d{2})', r'\g<1>0\2',
text) # Add missing leading 0
# Map 12 hour times to 24 hour times
text = re.sub(
'(\d{2})(:\d{2}) ?p\.?m\.?', lambda x: str(
int(x.groups()[0]) + 12
if int(x.groups()[0]) < 12 else int(x.groups()[0])) + x.groups(
)[1], text)
text = re.sub('(^| )24:(\d{2})', r'\g<1>00:\2',
text = \
re.sub(
r'(\d{2})(:\d{2}) ?p\.?m\.?',
lambda x: str(int(x.groups()[0]) + 12
if int(x.groups()[0]) < 12 else int(x.groups()[0])) + x.groups()[1], text)
text = re.sub(r'(^| )24:(\d{2})', r'\g<1>00:\2',
text) # Correct times that use 24 as hour
return text

@@ -508,8 +510,8 @@ class multiwoz22Processor(DSTProcessor):
if isinstance(acts[d][t]['dialog_act'], dict):
for a in acts[d][t]['dialog_act']:
aa = a.lower().split('-')
if aa[1] == 'inform' or aa[1] == 'recommend' or aa[
1] == 'select' or aa[1] == 'book':
if aa[1] == 'inform' or aa[1] == 'recommend' \
or aa[1] == 'select' or aa[1] == 'book':
for i in acts[d][t]['dialog_act'][a]:
s = i[0].lower()
v = i[1].lower().strip()
@@ -524,7 +526,7 @@ class multiwoz22Processor(DSTProcessor):
if key not in s_dict:
s_dict[key] = list([v])
# ... Option 2: Keep last informed value
#s_dict[key] = list([v])
# s_dict[key] = list([v])
return s_dict

# This should only contain label normalizations. All other mappings should
@@ -560,7 +562,7 @@ class multiwoz22Processor(DSTProcessor):
utt_lower = convert_to_unicode(utt).lower()
utt_lower = self.normalize_text(utt_lower)
utt_tok = [
tok for tok in map(str.strip, re.split('(\W+)', utt_lower))
tok for tok in map(str.strip, re.split(r'(\W+)', utt_lower))
if len(tok) > 0
]
return utt_tok
@@ -582,7 +584,7 @@ class multiwoz22Processor(DSTProcessor):
find_pos = []
found = False
label_list = [
item for item in map(str.strip, re.split('(\W+)', value_label))
item for item in map(str.strip, re.split(r'(\W+)', value_label))
if len(item) > 0
]
len_label = len(label_list)
@@ -633,11 +635,11 @@ class multiwoz22Processor(DSTProcessor):
def is_in_list(self, tok, value):
found = False
tok_list = [
item for item in map(str.strip, re.split('(\W+)', tok))
item for item in map(str.strip, re.split(r'(\W+)', tok))
if len(item) > 0
]
value_list = [
item for item in map(str.strip, re.split('(\W+)', value))
item for item in map(str.strip, re.split(r'(\W+)', value))
if len(item) > 0
]
tok_len = len(tok_list)
@@ -938,8 +940,8 @@ class multiwoz22Processor(DSTProcessor):
if slot not in diag_seen_slots_dict or value_label != diag_seen_slots_value_dict[
slot]:
print('(%s): %s, ' % (slot, value_label), end='')
elif slot in diag_seen_slots_dict and class_type == diag_seen_slots_dict[
slot] and class_type != 'copy_value' and class_type != 'inform':
elif slot in diag_seen_slots_dict and class_type == diag_seen_slots_dict[slot] \
and class_type != 'copy_value' and class_type != 'inform':
# If slot has seen before and its class type did not change, label this slot a not present,
# assuming that the slot has not actually been mentioned in this turn.
# Exceptions are copy_value and inform. If a seen slot has been tagged as copy_value or inform,
@@ -1262,7 +1264,7 @@ def convert_examples_to_features(examples,

def _get_start_end_pos(class_type, token_label_ids, max_seq_length):
if class_type == 'copy_value' and 1 not in token_label_ids:
#logger.warn("copy_value label, but token_label not detected. Setting label to 'none'.")
# logger.warn("copy_value label, but token_label not detected. Setting label to 'none'.")
class_type = 'none'
start_pos = 0
end_pos = 0


+ 4
- 4
modelscope/preprocessors/space/fields/gen_field.py View File

@@ -8,10 +8,10 @@ from itertools import chain

import numpy as np

from modelscope.preprocessors.space.tokenizer import Tokenizer
from modelscope.utils.nlp.space import ontology, utils
from modelscope.utils.nlp.space.db_ops import MultiWozDB
from modelscope.utils.nlp.space.utils import list2np
from ....utils.nlp.space import ontology, utils
from ....utils.nlp.space.db_ops import MultiWozDB
from ....utils.nlp.space.utils import list2np
from ..tokenizer import Tokenizer


class BPETextField(object):


+ 4
- 4
modelscope/preprocessors/space/fields/intent_field.py View File

@@ -14,10 +14,10 @@ import json
import numpy as np
from tqdm import tqdm

from modelscope.preprocessors.space.tokenizer import Tokenizer
from modelscope.utils.nlp.space import ontology, utils
from modelscope.utils.nlp.space.scores import hierarchical_set_score
from modelscope.utils.nlp.space.utils import list2np
from ....utils.nlp.space import ontology, utils
from ....utils.nlp.space.scores import hierarchical_set_score
from ....utils.nlp.space.utils import list2np
from ..tokenizer import Tokenizer


class BPETextField(object):


+ 3
- 4
modelscope/preprocessors/text_to_speech.py View File

@@ -2,9 +2,8 @@
import io
from typing import Any, Dict, Union

import ttsfrd

from modelscope.fileio import File
from modelscope.metainfo import Preprocessors
from modelscope.models.audio.tts.frontend import GenericTtsFrontend
from modelscope.models.base import Model
from modelscope.utils.audio.tts_exceptions import * # noqa F403
@@ -12,11 +11,11 @@ from modelscope.utils.constant import Fields
from .base import Preprocessor
from .builder import PREPROCESSORS

__all__ = ['TextToTacotronSymbols', 'text_to_tacotron_symbols']
__all__ = ['TextToTacotronSymbols']


@PREPROCESSORS.register_module(
Fields.audio, module_name=r'text_to_tacotron_symbols')
Fields.audio, module_name=Preprocessors.text_to_tacotron_symbols)
class TextToTacotronSymbols(Preprocessor):
"""extract tacotron symbols from text.



+ 232
- 0
modelscope/preprocessors/video.py View File

@@ -0,0 +1,232 @@
import math
import os
import random

import decord
import numpy as np
import torch
import torch.nn as nn
import torch.utils.data
import torch.utils.dlpack as dlpack
import torchvision.transforms._transforms_video as transforms
from decord import VideoReader
from torchvision.transforms import Compose


def ReadVideoData(cfg, video_path):
""" simple interface to load video frames from file

Args:
cfg (Config): The global config object.
video_path (str): video file path
"""
data = _decode_video(cfg, video_path)
transform = kinetics400_tranform(cfg)
data_list = []
for i in range(data.size(0)):
for j in range(cfg.TEST.NUM_SPATIAL_CROPS):
transform.transforms[1].set_spatial_index(j)
data_list.append(transform(data[i]))
return torch.stack(data_list, dim=0)


def kinetics400_tranform(cfg):
"""
Configs the transform for the kinetics-400 dataset.
We apply controlled spatial cropping and normalization.
Args:
cfg (Config): The global config object.
"""
resize_video = KineticsResizedCrop(
short_side_range=[cfg.DATA.TEST_SCALE, cfg.DATA.TEST_SCALE],
crop_size=cfg.DATA.TEST_CROP_SIZE,
num_spatial_crops=cfg.TEST.NUM_SPATIAL_CROPS)
std_transform_list = [
transforms.ToTensorVideo(), resize_video,
transforms.NormalizeVideo(
mean=cfg.DATA.MEAN, std=cfg.DATA.STD, inplace=True)
]
return Compose(std_transform_list)


def _interval_based_sampling(vid_length, vid_fps, target_fps, clip_idx,
num_clips, num_frames, interval, minus_interval):
"""
Generates the frame index list using interval based sampling.
Args:
vid_length (int): the length of the whole video (valid selection range).
vid_fps (int): the original video fps
target_fps (int): the normalized video fps
clip_idx (int): -1 for random temporal sampling, and positive values for
sampling specific clip from the video
num_clips (int): the total clips to be sampled from each video.
combined with clip_idx, the sampled video is the "clip_idx-th"
video from "num_clips" videos.
num_frames (int): number of frames in each sampled clips.
interval (int): the interval to sample each frame.
minus_interval (bool): control the end index
Returns:
index (tensor): the sampled frame indexes
"""
if num_frames == 1:
index = [random.randint(0, vid_length - 1)]
else:
# transform FPS
clip_length = num_frames * interval * vid_fps / target_fps

max_idx = max(vid_length - clip_length, 0)
start_idx = clip_idx * math.floor(max_idx / (num_clips - 1))
if minus_interval:
end_idx = start_idx + clip_length - interval
else:
end_idx = start_idx + clip_length - 1

index = torch.linspace(start_idx, end_idx, num_frames)
index = torch.clamp(index, 0, vid_length - 1).long()

return index


def _decode_video_frames_list(cfg, frames_list, vid_fps):
"""
Decodes the video given the numpy frames.
Args:
cfg (Config): The global config object.
frames_list (list): all frames for a video, the frames should be numpy array.
vid_fps (int): the fps of this video.
Returns:
frames (Tensor): video tensor data
"""
assert isinstance(frames_list, list)
num_clips_per_video = cfg.TEST.NUM_ENSEMBLE_VIEWS

frame_list = []
for clip_idx in range(num_clips_per_video):
# for each clip in the video,
# a list is generated before decoding the specified frames from the video
list_ = _interval_based_sampling(
len(frames_list), vid_fps, cfg.DATA.TARGET_FPS, clip_idx,
num_clips_per_video, cfg.DATA.NUM_INPUT_FRAMES,
cfg.DATA.SAMPLING_RATE, cfg.DATA.MINUS_INTERVAL)
frames = None
frames = torch.from_numpy(
np.stack([frames_list[l_index] for l_index in list_.tolist()],
axis=0))
frame_list.append(frames)
frames = torch.stack(frame_list)
if num_clips_per_video == 1:
frames = frames.squeeze(0)

return frames


def _decode_video(cfg, path):
"""
Decodes the video given the numpy frames.
Args:
path (str): video file path.
Returns:
frames (Tensor): video tensor data
"""
vr = VideoReader(path)

num_clips_per_video = cfg.TEST.NUM_ENSEMBLE_VIEWS

frame_list = []
for clip_idx in range(num_clips_per_video):
# for each clip in the video,
# a list is generated before decoding the specified frames from the video
list_ = _interval_based_sampling(
len(vr), vr.get_avg_fps(), cfg.DATA.TARGET_FPS, clip_idx,
num_clips_per_video, cfg.DATA.NUM_INPUT_FRAMES,
cfg.DATA.SAMPLING_RATE, cfg.DATA.MINUS_INTERVAL)
frames = None
if path.endswith('.avi'):
append_list = torch.arange(0, list_[0], 4)
frames = dlpack.from_dlpack(
vr.get_batch(torch.cat([append_list,
list_])).to_dlpack()).clone()
frames = frames[append_list.shape[0]:]
else:
frames = dlpack.from_dlpack(
vr.get_batch(list_).to_dlpack()).clone()
frame_list.append(frames)
frames = torch.stack(frame_list)
if num_clips_per_video == 1:
frames = frames.squeeze(0)
del vr
return frames


class KineticsResizedCrop(object):
"""Perform resize and crop for kinetics-400 dataset
Args:
short_side_range (list): The length of short side range. In inference, this shoudle be [256, 256]
crop_size (int): The cropped size for frames.
num_spatial_crops (int): The number of the cropped spatial regions in each video.
"""

def __init__(
self,
short_side_range,
crop_size,
num_spatial_crops=1,
):
self.idx = -1
self.short_side_range = short_side_range
self.crop_size = int(crop_size)
self.num_spatial_crops = num_spatial_crops

def _get_controlled_crop(self, clip):
"""Perform controlled crop for video tensor.
Args:
clip (Tensor): the video data, the shape is [T, C, H, W]
"""
_, _, clip_height, clip_width = clip.shape

length = self.short_side_range[0]

if clip_height < clip_width:
new_clip_height = int(length)
new_clip_width = int(clip_width / clip_height * new_clip_height)
new_clip = torch.nn.functional.interpolate(
clip, size=(new_clip_height, new_clip_width), mode='bilinear')
else:
new_clip_width = int(length)
new_clip_height = int(clip_height / clip_width * new_clip_width)
new_clip = torch.nn.functional.interpolate(
clip, size=(new_clip_height, new_clip_width), mode='bilinear')
x_max = int(new_clip_width - self.crop_size)
y_max = int(new_clip_height - self.crop_size)
if self.num_spatial_crops == 1:
x = x_max // 2
y = y_max // 2
elif self.num_spatial_crops == 3:
if self.idx == 0:
if new_clip_width == length:
x = x_max // 2
y = 0
elif new_clip_height == length:
x = 0
y = y_max // 2
elif self.idx == 1:
x = x_max // 2
y = y_max // 2
elif self.idx == 2:
if new_clip_width == length:
x = x_max // 2
y = y_max
elif new_clip_height == length:
x = x_max
y = y_max // 2
return new_clip[:, :, y:y + self.crop_size, x:x + self.crop_size]

def set_spatial_index(self, idx):
"""Set the spatial cropping index for controlled cropping..
Args:
idx (int): the spatial index. The value should be in [0, 1, 2], means [left, center, right], respectively.
"""
self.idx = idx

def __call__(self, clip):
return self._get_controlled_crop(clip)

+ 22
- 0
modelscope/pydatasets/config.py View File

@@ -0,0 +1,22 @@
import os
from pathlib import Path

# Cache location
DEFAULT_CACHE_HOME = '~/.cache'
CACHE_HOME = os.getenv('CACHE_HOME', DEFAULT_CACHE_HOME)
DEFAULT_MS_CACHE_HOME = os.path.join(CACHE_HOME, 'modelscope/hub')
MS_CACHE_HOME = os.path.expanduser(
os.getenv('MS_CACHE_HOME', DEFAULT_MS_CACHE_HOME))

DEFAULT_MS_DATASETS_CACHE = os.path.join(MS_CACHE_HOME, 'datasets')
MS_DATASETS_CACHE = Path(
os.getenv('MS_DATASETS_CACHE', DEFAULT_MS_DATASETS_CACHE))

DOWNLOADED_DATASETS_DIR = 'downloads'
DEFAULT_DOWNLOADED_DATASETS_PATH = os.path.join(MS_DATASETS_CACHE,
DOWNLOADED_DATASETS_DIR)
DOWNLOADED_DATASETS_PATH = Path(
os.getenv('DOWNLOADED_DATASETS_PATH', DEFAULT_DOWNLOADED_DATASETS_PATH))

MS_HUB_ENDPOINT = os.environ.get('MS_HUB_ENDPOINT',
'http://101.201.119.157:31752')

+ 323
- 58
modelscope/pydatasets/py_dataset.py View File

@@ -1,64 +1,81 @@
from typing import (Any, Callable, Dict, List, Mapping, Optional, Sequence,
Union)
import os
from typing import (Any, Callable, Dict, Iterable, List, Mapping, Optional,
Sequence, Union)

from datasets import Dataset, load_dataset
import numpy as np
from datasets import Dataset
from datasets import load_dataset as hf_load_dataset
from datasets.config import TF_AVAILABLE, TORCH_AVAILABLE
from datasets.packaged_modules import _PACKAGED_DATASETS_MODULES
from datasets.utils.file_utils import (is_relative_path,
relative_to_absolute_path)

from modelscope.pydatasets.config import MS_DATASETS_CACHE
from modelscope.pydatasets.utils.ms_api import MsApi
from modelscope.utils.constant import Hubs
from modelscope.utils.logger import get_logger

logger = get_logger()


def format_list(para) -> List:
if para is None:
para = []
elif isinstance(para, str):
para = [para]
elif len(set(para)) < len(para):
raise ValueError(f'List columns contains duplicates: {para}')
return para


class PyDataset:
_hf_ds = None # holds the underlying HuggingFace Dataset
"""A PyDataset backed by hugging face Dataset."""

def __init__(self, hf_ds: Dataset):
def __init__(self, hf_ds: Dataset, target: Optional[str] = None):
self._hf_ds = hf_ds
self.target = None
self.target = target

def __iter__(self):
if isinstance(self._hf_ds, Dataset):
for item in self._hf_ds:
if self.target is not None:
yield item[self.target]
else:
yield item
else:
for ds in self._hf_ds.values():
for item in ds:
if self.target is not None:
yield item[self.target]
else:
yield item
for item in self._hf_ds:
if self.target is not None:
yield item[self.target]
else:
yield item

def __getitem__(self, key):
return self._hf_ds[key]

@classmethod
def from_hf_dataset(cls,
hf_ds: Dataset,
target: str = None) -> 'PyDataset':
dataset = cls(hf_ds)
dataset.target = target
return dataset
target: str = None) -> Union[dict, 'PyDataset']:
if isinstance(hf_ds, Dataset):
return cls(hf_ds, target)
if len(hf_ds.keys()) == 1:
return cls(next(iter(hf_ds.values())), target)
return {k: cls(v, target) for k, v in hf_ds.items()}

@staticmethod
def load(path: Union[str, list],
target: Optional[str] = None,
version: Optional[str] = None,
name: Optional[str] = None,
split: Optional[str] = None,
data_dir: Optional[str] = None,
data_files: Optional[Union[str, Sequence[str],
Mapping[str,
Union[str,
Sequence[str]]]]] = None,
hub: Optional[Hubs] = None) -> 'PyDataset':
def load(
dataset_name: Union[str, list],
target: Optional[str] = None,
version: Optional[str] = None,
hub: Optional[Hubs] = Hubs.modelscope,
subset_name: Optional[str] = None,
split: Optional[str] = None,
data_dir: Optional[str] = None,
data_files: Optional[Union[str, Sequence[str],
Mapping[str, Union[str,
Sequence[str]]]]] = None
) -> Union[dict, 'PyDataset']:
"""Load a PyDataset from the ModelScope Hub, Hugging Face Hub, urls, or a local dataset.
Args:

path (str): Path or name of the dataset.
dataset_name (str): Path or name of the dataset.
target (str, optional): Name of the column to output.
version (str, optional): Version of the dataset script to load:
name (str, optional): Defining the subset_name of the dataset.
subset_name (str, optional): Defining the subset_name of the dataset.
data_dir (str, optional): Defining the data_dir of the dataset configuration. I
data_files (str or Sequence or Mapping, optional): Path(s) to source data file(s).
split (str, optional): Which split of the data to load.
@@ -67,53 +84,302 @@ class PyDataset:
Returns:
PyDataset (obj:`PyDataset`): PyDataset object for a certain dataset.
"""
if Hubs.modelscope == hub:
# TODO: parse data meta information from modelscope hub
# and possibly download data files to local (and update path)
print('getting data from modelscope hub')
if isinstance(path, str):
dataset = load_dataset(
path,
name=name,
if hub == Hubs.huggingface:
dataset = hf_load_dataset(
dataset_name,
name=subset_name,
revision=version,
split=split,
data_dir=data_dir,
data_files=data_files)
elif isinstance(path, list):
return PyDataset.from_hf_dataset(dataset, target=target)
else:
return PyDataset._load_ms_dataset(
dataset_name,
target=target,
subset_name=subset_name,
version=version,
split=split,
data_dir=data_dir,
data_files=data_files)

@staticmethod
def _load_ms_dataset(
dataset_name: Union[str, list],
target: Optional[str] = None,
version: Optional[str] = None,
subset_name: Optional[str] = None,
split: Optional[str] = None,
data_dir: Optional[str] = None,
data_files: Optional[Union[str, Sequence[str],
Mapping[str, Union[str,
Sequence[str]]]]] = None
) -> Union[dict, 'PyDataset']:
if isinstance(dataset_name, str):
use_hf = False
if dataset_name in _PACKAGED_DATASETS_MODULES or os.path.isdir(dataset_name) or \
(os.path.isfile(dataset_name) and dataset_name.endswith('.py')):
use_hf = True
elif is_relative_path(dataset_name):
ms_api = MsApi()
dataset_scripts = ms_api.fetch_dataset_scripts(
dataset_name, version)
if 'py' in dataset_scripts: # dataset copied from hf datasets
dataset_name = dataset_scripts['py'][0]
use_hf = True
else:
raise FileNotFoundError(
f"Couldn't find a dataset script at {relative_to_absolute_path(dataset_name)} "
f'or any data file in the same directory.')

if use_hf:
dataset = hf_load_dataset(
dataset_name,
name=subset_name,
revision=version,
split=split,
data_dir=data_dir,
data_files=data_files,
cache_dir=MS_DATASETS_CACHE)
else:
# TODO load from ms datahub
raise NotImplementedError(
f'Dataset {dataset_name} load from modelscope datahub to be implemented in '
f'the future')
elif isinstance(dataset_name, list):
if target is None:
target = 'target'
dataset = Dataset.from_dict({target: [p] for p in path})
dataset = Dataset.from_dict({target: dataset_name})
else:
raise TypeError('path must be a str or a list, but got'
f' {type(path)}')
f' {type(dataset_name)}')
return PyDataset.from_hf_dataset(dataset, target=target)

def to_torch_dataset_with_processors(
self,
preprocessors: Union[Callable, List[Callable]],
columns: Union[str, List[str]] = None,
):
preprocessor_list = preprocessors if isinstance(
preprocessors, list) else [preprocessors]

columns = format_list(columns)

columns = [
key for key in self._hf_ds.features.keys() if key in columns
]
sample = next(iter(self._hf_ds))

sample_res = {k: np.array(sample[k]) for k in columns}
for processor in preprocessor_list:
sample_res.update(
{k: np.array(v)
for k, v in processor(sample).items()})

def is_numpy_number(value):
return np.issubdtype(value.dtype, np.integer) or np.issubdtype(
value.dtype, np.floating)

retained_columns = []
for k in sample_res.keys():
if not is_numpy_number(sample_res[k]):
logger.warning(
f'Data of column {k} is non-numeric, will be removed')
continue
retained_columns.append(k)

import torch

class MsIterableDataset(torch.utils.data.IterableDataset):

def __init__(self, dataset: Iterable):
super(MsIterableDataset).__init__()
self.dataset = dataset

def __iter__(self):
for item_dict in self.dataset:
res = {
k: np.array(item_dict[k])
for k in columns if k in retained_columns
}
for preprocessor in preprocessor_list:
res.update({
k: np.array(v)
for k, v in preprocessor(item_dict).items()
if k in retained_columns
})
yield res

return MsIterableDataset(self._hf_ds)

def to_torch_dataset(
self,
columns: Union[str, List[str]] = None,
output_all_columns: bool = False,
preprocessors: Union[Callable, List[Callable]] = None,
**format_kwargs,
):
self._hf_ds.reset_format()
self._hf_ds.set_format(
type='torch',
columns=columns,
output_all_columns=output_all_columns,
format_kwargs=format_kwargs)
return self._hf_ds
"""Create a torch.utils.data.Dataset from the MS Dataset. The torch.utils.data.Dataset can be passed to
torch.utils.data.DataLoader.

Args:
preprocessors (Callable or List[Callable], default None): (list of) Preprocessor object used to process
every sample of the dataset. The output type of processors is dict, and each numeric field of the dict
will be used as a field of torch.utils.data.Dataset.
columns (str or List[str], default None): Dataset column(s) to be loaded (numeric data only). If the
preprocessor is None, the arg columns must have at least one column. If the `preprocessors` is not None,
the output fields of processors will also be added.
format_kwargs: A `dict` of arguments to be passed to the `torch.tensor`.

Returns:
:class:`tf.data.Dataset`

"""
if not TORCH_AVAILABLE:
raise ImportError(
'The function to_torch_dataset requires pytorch to be installed'
)
if preprocessors is not None:
return self.to_torch_dataset_with_processors(preprocessors)
else:
self._hf_ds.reset_format()
self._hf_ds.set_format(
type='torch', columns=columns, format_kwargs=format_kwargs)
return self._hf_ds

def to_tf_dataset_with_processors(
self,
batch_size: int,
shuffle: bool,
preprocessors: Union[Callable, List[Callable]],
drop_remainder: bool = None,
prefetch: bool = True,
label_cols: Union[str, List[str]] = None,
columns: Union[str, List[str]] = None,
):
preprocessor_list = preprocessors if isinstance(
preprocessors, list) else [preprocessors]

label_cols = format_list(label_cols)
columns = format_list(columns)
cols_to_retain = list(set(label_cols + columns))
retained_columns = [
key for key in self._hf_ds.features.keys() if key in cols_to_retain
]
import tensorflow as tf
tf_dataset = tf.data.Dataset.from_tensor_slices(
np.arange(len(self._hf_ds), dtype=np.int64))
if shuffle:
tf_dataset = tf_dataset.shuffle(buffer_size=len(self._hf_ds))

def func(i, return_dict=False):
i = int(i)
res = {k: np.array(self._hf_ds[i][k]) for k in retained_columns}
for preprocessor in preprocessor_list:
# TODO preprocessor output may have the same key
res.update({
k: np.array(v)
for k, v in preprocessor(self._hf_ds[i]).items()
})
if return_dict:
return res
return tuple(list(res.values()))

sample_res = func(0, True)

@tf.function(input_signature=[tf.TensorSpec(None, tf.int64)])
def fetch_function(i):
output = tf.numpy_function(
func,
inp=[i],
Tout=[
tf.dtypes.as_dtype(val.dtype)
for val in sample_res.values()
],
)
return {key: output[i] for i, key in enumerate(sample_res)}

tf_dataset = tf_dataset.map(
fetch_function, num_parallel_calls=tf.data.AUTOTUNE)
if label_cols:

def split_features_and_labels(input_batch):
labels = {
key: tensor
for key, tensor in input_batch.items() if key in label_cols
}
if len(input_batch) == 1:
input_batch = next(iter(input_batch.values()))
if len(labels) == 1:
labels = next(iter(labels.values()))
return input_batch, labels

tf_dataset = tf_dataset.map(split_features_and_labels)

elif len(columns) == 1:
tf_dataset = tf_dataset.map(lambda x: next(iter(x.values())))
if batch_size > 1:
tf_dataset = tf_dataset.batch(
batch_size, drop_remainder=drop_remainder)

if prefetch:
tf_dataset = tf_dataset.prefetch(tf.data.experimental.AUTOTUNE)
return tf_dataset

def to_tf_dataset(
self,
columns: Union[str, List[str]],
batch_size: int,
shuffle: bool,
collate_fn: Callable,
preprocessors: Union[Callable, List[Callable]] = None,
columns: Union[str, List[str]] = None,
collate_fn: Callable = None,
drop_remainder: bool = None,
collate_fn_args: Dict[str, Any] = None,
label_cols: Union[str, List[str]] = None,
dummy_labels: bool = False,
prefetch: bool = True,
):
"""Create a tf.data.Dataset from the MS Dataset. This tf.data.Dataset can be passed to tf methods like
model.fit() or model.predict().

Args:
batch_size (int): Number of samples in a single batch.
shuffle(bool): Shuffle the dataset order.
preprocessors (Callable or List[Callable], default None): (list of) Preprocessor object used to process
every sample of the dataset. The output type of processors is dict, and each field of the dict will be
used as a field of the tf.data. Dataset. If the `preprocessors` is None, the `collate_fn`
shouldn't be None.
columns (str or List[str], default None): Dataset column(s) to be loaded. If the preprocessor is None,
the arg columns must have at least one column. If the `preprocessors` is not None, the output fields of
processors will also be added.
collate_fn(Callable, default None): A callable object used to collect lists of samples into a batch. If
the `preprocessors` is None, the `collate_fn` shouldn't be None.
drop_remainder(bool, default None): Drop the last incomplete batch when loading.
collate_fn_args (Dict, optional): A `dict` of arguments to be passed to the`collate_fn`.
label_cols (str or List[str], defalut None): Dataset column(s) to load as labels.
prefetch (bool, default True): Prefetch data.

Returns:
:class:`tf.data.Dataset`

"""
if not TF_AVAILABLE:
raise ImportError(
'The function to_tf_dataset requires Tensorflow to be installed.'
)
if preprocessors is not None:
return self.to_tf_dataset_with_processors(
batch_size,
shuffle,
preprocessors,
drop_remainder=drop_remainder,
prefetch=prefetch,
label_cols=label_cols,
columns=columns)

if collate_fn is None:
logger.error(
'The `preprocessors` and the `collate_fn` should`t be both None.'
)
return None
self._hf_ds.reset_format()
return self._hf_ds.to_tf_dataset(
columns,
@@ -123,7 +389,6 @@ class PyDataset:
drop_remainder=drop_remainder,
collate_fn_args=collate_fn_args,
label_cols=label_cols,
dummy_labels=dummy_labels,
prefetch=prefetch)

def to_hf_dataset(self) -> Dataset:


+ 0
- 0
modelscope/pydatasets/utils/__init__.py View File


+ 66
- 0
modelscope/pydatasets/utils/ms_api.py View File

@@ -0,0 +1,66 @@
import os
from collections import defaultdict
from typing import Optional

import requests

from modelscope.pydatasets.config import (DOWNLOADED_DATASETS_PATH,
MS_HUB_ENDPOINT)
from modelscope.utils.logger import get_logger

logger = get_logger()


class MsApi:

def __init__(self, endpoint=MS_HUB_ENDPOINT):
self.endpoint = endpoint

def list_datasets(self):
path = f'{self.endpoint}/api/v1/datasets'
headers = None
params = {}
r = requests.get(path, params=params, headers=headers)
r.raise_for_status()
dataset_list = r.json()['Data']
return [x['Name'] for x in dataset_list]

def fetch_dataset_scripts(self,
dataset_name: str,
version: Optional[str] = 'master',
force_download=False):
datahub_url = f'{self.endpoint}/api/v1/datasets?Query={dataset_name}'
r = requests.get(datahub_url)
r.raise_for_status()
dataset_list = r.json()['Data']
if len(dataset_list) == 0:
return None
dataset_id = dataset_list[0]['Id']
version = version or 'master'
datahub_url = f'{self.endpoint}/api/v1/datasets/{dataset_id}/repo/tree?Revision={version}'
r = requests.get(datahub_url)
r.raise_for_status()
file_list = r.json()['Data']['Files']
cache_dir = os.path.join(DOWNLOADED_DATASETS_PATH, dataset_name,
version)
os.makedirs(cache_dir, exist_ok=True)
local_paths = defaultdict(list)
for file_info in file_list:
file_path = file_info['Path']
if file_path.endswith('.py'):
datahub_url = f'{self.endpoint}/api/v1/datasets/{dataset_id}/repo/files?' \
f'Revision={version}&Path={file_path}'
r = requests.get(datahub_url)
r.raise_for_status()
content = r.json()['Data']['Content']
local_path = os.path.join(cache_dir, file_path)
if os.path.exists(local_path) and not force_download:
logger.warning(
f"Reusing dataset {dataset_name}'s python file ({local_path})"
)
local_paths['py'].append(local_path)
continue
with open(local_path, 'w') as f:
f.writelines(content)
local_paths['py'].append(local_path)
return local_paths

+ 1
- 1
modelscope/trainers/nlp/space/trainers/gen_trainer.py View File

@@ -13,7 +13,7 @@ import torch
from tqdm import tqdm
from transformers.optimization import AdamW, get_linear_schedule_with_warmup

import modelscope.utils.nlp.space.ontology as ontology
from .....utils.nlp.space import ontology
from ..metrics.metrics_tracker import MetricsTracker




+ 1
- 3
modelscope/trainers/nlp/space/trainers/intent_trainer.py View File

@@ -14,9 +14,7 @@ import torch
from tqdm import tqdm
from transformers.optimization import AdamW, get_linear_schedule_with_warmup

from modelscope.trainers.nlp.space.metrics.metrics_tracker import \
MetricsTracker
from modelscope.utils.nlp.space.args import str2bool
from ..metrics.metrics_tracker import MetricsTracker


def get_logger(log_path, name='default'):


+ 5
- 2
modelscope/utils/constant.py View File

@@ -28,9 +28,13 @@ class Tasks(object):
image_editing = 'image-editing'
image_generation = 'image-generation'
image_matting = 'image-matting'
ocr_detection = 'ocr-detection'
action_recognition = 'action-recognition'

# nlp tasks
word_segmentation = 'word-segmentation'
nli = 'nli'
sentiment_classification = 'sentiment-classification'
sentiment_analysis = 'sentiment-analysis'
sentence_similarity = 'sentence-similarity'
text_classification = 'text-classification'
@@ -45,8 +49,7 @@ class Tasks(object):
dialog_state_tracking = 'dialog-state-tracking'
table_question_answering = 'table-question-answering'
feature_extraction = 'feature-extraction'
sentence_similarity = 'sentence-similarity'
fill_mask = 'fill-mask '
fill_mask = 'fill-mask'
summarization = 'summarization'
question_answering = 'question-answering'



+ 61
- 8
modelscope/utils/hub.py View File

@@ -1,14 +1,67 @@
# Copyright (c) Alibaba, Inc. and its affiliates.

import os
import os.path as osp
from typing import List, Optional, Union

from maas_hub.constants import MODEL_ID_SEPARATOR
from requests import HTTPError

from modelscope.hub.file_download import model_file_download
from modelscope.hub.snapshot_download import snapshot_download
from modelscope.utils.config import Config
from modelscope.utils.constant import ModelFile

# temp solution before the hub-cache is in place
def get_model_cache_dir(model_id: str, branch: str = 'master'):
model_id_expanded = model_id.replace('/',
MODEL_ID_SEPARATOR) + '.' + branch
default_cache_dir = os.path.expanduser(os.path.join('~/.cache', 'maas'))
return os.getenv('MAAS_CACHE',
os.path.join(default_cache_dir, 'hub', model_id_expanded))

def create_model_if_not_exist(
api,
model_id: str,
chinese_name: str,
visibility: Optional[int] = 5, # 1-private, 5-public
license: Optional[str] = 'apache-2.0',
revision: Optional[str] = 'master'):
exists = True
try:
api.get_model(model_id=model_id, revision=revision)
except HTTPError:
exists = False
if exists:
print(f'model {model_id} already exists, skip creation.')
return False
else:
api.create_model(
model_id=model_id,
chinese_name=chinese_name,
visibility=visibility,
license=license)
print(f'model {model_id} successfully created.')
return True


def read_config(model_id_or_path: str):
""" Read config from hub or local path

Args:
model_id_or_path (str): Model repo name or local directory path.

Return:
config (:obj:`Config`): config object
"""
if not os.path.exists(model_id_or_path):
local_path = model_file_download(model_id_or_path,
ModelFile.CONFIGURATION)
else:
local_path = os.path.join(model_id_or_path, ModelFile.CONFIGURATION)

return Config.from_file(local_path)


def auto_load(model: Union[str, List[str]]):
if isinstance(model, str):
if not osp.exists(model):
model = snapshot_download(model)
else:
model = [
snapshot_download(m) if not osp.exists(m) else m for m in model
]

return model

+ 1
- 1
modelscope/utils/registry.py View File

@@ -78,7 +78,7 @@ class Registry(object):
f'{self._name}[{default_group}] and will '
'be overwritten')
logger.warning(f'{self._modules[default_group][module_name]}'
'to {module_cls}')
f'to {module_cls}')
# also register module in the default group for faster access
# only by module name
self._modules[default_group][module_name] = module_cls


+ 15
- 0
modelscope/utils/test_utils.py View File

@@ -2,6 +2,9 @@
# Copyright (c) Alibaba, Inc. and its affiliates.

import os
import unittest

from datasets.config import TF_AVAILABLE, TORCH_AVAILABLE

TEST_LEVEL = 2
TEST_LEVEL_STR = 'TEST_LEVEL'
@@ -15,6 +18,18 @@ def test_level():
return TEST_LEVEL


def require_tf(test_case):
if not TF_AVAILABLE:
test_case = unittest.skip('test requires TensorFlow')(test_case)
return test_case


def require_torch(test_case):
if not TORCH_AVAILABLE:
test_case = unittest.skip('test requires PyTorch')(test_case)
return test_case


def set_test_level(level: int):
global TEST_LEVEL
TEST_LEVEL = level

+ 11
- 11
requirements/audio.txt View File

@@ -1,25 +1,25 @@
#tts
h5py==2.10.0
#https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/TTS/requirements/ttsfrd-0.0.1-cp36-cp36m-linux_x86_64.whl
https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/TTS/requirements/ttsfrd-0.0.1-cp37-cp37m-linux_x86_64.whl
https://swap.oss-cn-hangzhou.aliyuncs.com/Jiaqi%2Fmaas%2Ftts%2Frequirements%2Fpytorch_wavelets-1.3.0-py3-none-any.whl?Expires=1685688388&OSSAccessKeyId=LTAI4Ffebq4d9jTVDwiSbY4L&Signature=jcQbg5EZ%2Bdys3%2F4BRn3srrKLdIg%3D
#https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/TTS/requirements/ttsfrd-0.0.1-cp38-cp38-linux_x86_64.whl
#https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/TTS/requirements/ttsfrd-0.0.1-cp39-cp39-linux_x86_64.whl
h5py
https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/TTS/requirements/pytorch_wavelets-1.3.0-py3-none-any.whl
https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/TTS/requirements/ttsfrd-0.0.2-cp36-cp36m-linux_x86_64.whl; python_version=='3.6'
https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/TTS/requirements/ttsfrd-0.0.2-cp37-cp37m-linux_x86_64.whl; python_version=='3.7'
https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/TTS/requirements/ttsfrd-0.0.2-cp38-cp38-linux_x86_64.whl; python_version=='3.8'
https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/TTS/requirements/ttsfrd-0.0.2-cp39-cp39-linux_x86_64.whl; python_version=='3.9'
inflect
keras==2.2.4
keras
librosa
lxml
matplotlib
nara_wpe
numpy==1.18.*
protobuf==3.20.*
numpy
protobuf>3,<=3.20
ptflops
PyWavelets>=1.0.0
scikit-learn==0.23.2
scikit-learn
sox
tensorboard
tensorflow==1.15.*
torch==1.10.*
torch
torchaudio
torchvision
tqdm


+ 2
- 0
requirements/cv.txt View File

@@ -1 +1,3 @@
decord>=0.6.0
easydict
tf_slim

+ 3
- 2
requirements/nlp.txt View File

@@ -1,4 +1,5 @@
https://alinlp.alibaba-inc.com/pypi/sofa-1.0.2-py3-none-any.whl
https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-2.3.1/en_core_web_sm-2.3.1.tar.gz
# https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-2.3.1/en_core_web_sm-2.3.1.tar.gz
http://ait-public.oss-cn-hangzhou-zmf.aliyuncs.com/jizhu/en_core_web_sm-2.3.1.tar.gz
https://alinlp.alibaba-inc.com/pypi/sofa-1.0.3-py3-none-any.whl
spacy>=2.3.5
# python -m spacy download en_core_web_sm

+ 4
- 1
requirements/runtime.txt View File

@@ -1,13 +1,16 @@
addict
datasets
easydict
https://mindscope.oss-cn-hangzhou.aliyuncs.com/sdklib/maas_hub-0.2.4.dev0-py3-none-any.whl
filelock>=3.3.0
numpy
opencv-python-headless
Pillow>=6.2.0
pyyaml
requests
requests==2.27.1
scipy
setuptools==58.0.4
tokenizers<=0.10.3
tqdm>=4.64.0
transformers<=4.16.2
yapf

Some files were not shown because too many files changed in this diff

Loading…
Cancel
Save