diff --git a/data/test/images/image_captioning.png b/data/test/images/image_captioning.png
new file mode 100644
index 00000000..de3f1918
--- /dev/null
+++ b/data/test/images/image_captioning.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:af83a94899a6d23339c3ecc5c4c58c57c835af57b531a2f4c50461184f820141
+size 603621
diff --git a/data/test/images/ocr_detection.jpg b/data/test/images/ocr_detection.jpg
new file mode 100644
index 00000000..c347810e
--- /dev/null
+++ b/data/test/images/ocr_detection.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5c8435db5583400be5d11a2c17910c96133b462c8a99ccaf0e19f4aac34e0a94
+size 141149
diff --git a/data/test/videos/action_recognition_test_video.mp4 b/data/test/videos/action_recognition_test_video.mp4
new file mode 100644
index 00000000..9197b770
--- /dev/null
+++ b/data/test/videos/action_recognition_test_video.mp4
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:24dc4237b1197321ee8486bb983fa01fd47e2b4afdb3c2df24229e5f2bd20119
+size 1475924
diff --git a/modelscope/pipelines/nlp/space/__init__.py b/modelscope/hub/__init__.py
similarity index 100%
rename from modelscope/pipelines/nlp/space/__init__.py
rename to modelscope/hub/__init__.py
diff --git a/modelscope/hub/api.py b/modelscope/hub/api.py
new file mode 100644
index 00000000..104eafbd
--- /dev/null
+++ b/modelscope/hub/api.py
@@ -0,0 +1,265 @@
+import imp
+import os
+import pickle
+import subprocess
+from http.cookiejar import CookieJar
+from os.path import expanduser
+from typing import List, Optional, Tuple, Union
+
+import requests
+
+from modelscope.utils.logger import get_logger
+from .constants import LOGGER_NAME
+from .errors import NotExistError, is_ok, raise_on_error
+from .utils.utils import get_endpoint, model_id_to_group_owner_name
+
+logger = get_logger()
+
+
+class HubApi:
+
+ def __init__(self, endpoint=None):
+ self.endpoint = endpoint if endpoint is not None else get_endpoint()
+
+ def login(
+ self,
+ user_name: str,
+ password: str,
+ ) -> tuple():
+ """
+ Login with username and password
+
+ Args:
+ username(`str`): user name on modelscope
+ password(`str`): password
+
+ Returns:
+ cookies: to authenticate yourself to ModelScope open-api
+ gitlab token: to access private repos
+
+
+ You only have to login once within 30 days.
+
+
+ TODO: handle cookies expire
+
+ """
+ path = f'{self.endpoint}/api/v1/login'
+ r = requests.post(
+ path, json={
+ 'username': user_name,
+ 'password': password
+ })
+ r.raise_for_status()
+ d = r.json()
+ raise_on_error(d)
+
+ token = d['Data']['AccessToken']
+ cookies = r.cookies
+
+ # save token and cookie
+ ModelScopeConfig.save_token(token)
+ ModelScopeConfig.save_cookies(cookies)
+ ModelScopeConfig.write_to_git_credential(user_name, password)
+
+ return d['Data']['AccessToken'], cookies
+
+ def create_model(self, model_id: str, chinese_name: str, visibility: int,
+ license: str) -> str:
+ """
+ Create model repo at ModelScopeHub
+
+ Args:
+ model_id:(`str`): The model id
+ chinese_name(`str`): chinese name of the model
+ visibility(`int`): visibility of the model(1-private, 3-internal, 5-public)
+ license(`str`): license of the model, candidates can be found at: TBA
+
+ Returns:
+ name of the model created
+
+
+ model_id = {owner}/{name}
+
+ """
+ cookies = ModelScopeConfig.get_cookies()
+ if cookies is None:
+ raise ValueError('Token does not exist, please login first.')
+
+ path = f'{self.endpoint}/api/v1/models'
+ owner_or_group, name = model_id_to_group_owner_name(model_id)
+ r = requests.post(
+ path,
+ json={
+ 'Path': owner_or_group,
+ 'Name': name,
+ 'ChineseName': chinese_name,
+ 'Visibility': visibility,
+ 'License': license
+ },
+ cookies=cookies)
+ r.raise_for_status()
+ raise_on_error(r.json())
+ d = r.json()
+ return d['Data']['Name']
+
+ def delete_model(self, model_id):
+ """_summary_
+
+ Args:
+ model_id (str): The model id.
+
+ model_id = {owner}/{name}
+
+ """
+ cookies = ModelScopeConfig.get_cookies()
+ path = f'{self.endpoint}/api/v1/models/{model_id}'
+
+ r = requests.delete(path, cookies=cookies)
+ r.raise_for_status()
+ raise_on_error(r.json())
+
+ def get_model_url(self, model_id):
+ return f'{self.endpoint}/api/v1/models/{model_id}.git'
+
+ def get_model(
+ self,
+ model_id: str,
+ revision: str = 'master',
+ ) -> str:
+ """
+ Get model information at modelscope_hub
+
+ Args:
+ model_id(`str`): The model id.
+ revision(`str`): revision of model
+ Returns:
+ The model details information.
+ Raises:
+ NotExistError: If the model is not exist, will throw NotExistError
+
+ model_id = {owner}/{name}
+
+ """
+ cookies = ModelScopeConfig.get_cookies()
+ owner_or_group, name = model_id_to_group_owner_name(model_id)
+ path = f'{self.endpoint}/api/v1/models/{owner_or_group}/{name}?{revision}'
+
+ r = requests.get(path, cookies=cookies)
+ if r.status_code == 200:
+ if is_ok(r.json()):
+ return r.json()['Data']
+ else:
+ raise NotExistError(r.json()['Message'])
+ else:
+ r.raise_for_status()
+
+ def get_model_branches_and_tags(
+ self,
+ model_id: str,
+ ) -> Tuple[List[str], List[str]]:
+ cookies = ModelScopeConfig.get_cookies()
+
+ path = f'{self.endpoint}/api/v1/models/{model_id}/revisions'
+ r = requests.get(path, cookies=cookies)
+ r.raise_for_status()
+ d = r.json()
+ raise_on_error(d)
+ info = d['Data']
+ branches = [x['Revision'] for x in info['RevisionMap']['Branches']
+ ] if info['RevisionMap']['Branches'] else []
+ tags = [x['Revision'] for x in info['RevisionMap']['Tags']
+ ] if info['RevisionMap']['Tags'] else []
+ return branches, tags
+
+ def get_model_files(
+ self,
+ model_id: str,
+ revision: Optional[str] = 'master',
+ root: Optional[str] = None,
+ recursive: Optional[str] = False,
+ use_cookies: Union[bool, CookieJar] = False) -> List[dict]:
+
+ cookies = None
+ if isinstance(use_cookies, CookieJar):
+ cookies = use_cookies
+ elif use_cookies:
+ cookies = ModelScopeConfig.get_cookies()
+ if cookies is None:
+ raise ValueError('Token does not exist, please login first.')
+
+ path = f'{self.endpoint}/api/v1/models/{model_id}/repo/files?Revision={revision}&Recursive={recursive}'
+ if root is not None:
+ path = path + f'&Root={root}'
+
+ r = requests.get(path, cookies=cookies)
+
+ r.raise_for_status()
+ d = r.json()
+ raise_on_error(d)
+
+ files = []
+ for file in d['Data']['Files']:
+ if file['Name'] == '.gitignore' or file['Name'] == '.gitattributes':
+ continue
+
+ files.append(file)
+ return files
+
+
+class ModelScopeConfig:
+ path_credential = expanduser('~/.modelscope/credentials')
+ os.makedirs(path_credential, exist_ok=True)
+
+ @classmethod
+ def save_cookies(cls, cookies: CookieJar):
+ with open(os.path.join(cls.path_credential, 'cookies'), 'wb+') as f:
+ pickle.dump(cookies, f)
+
+ @classmethod
+ def get_cookies(cls):
+ try:
+ with open(os.path.join(cls.path_credential, 'cookies'), 'rb') as f:
+ return pickle.load(f)
+ except FileNotFoundError:
+ logger.warn("Auth token does not exist, you'll get authentication \
+ error when downloading private model files. Please login first"
+ )
+
+ @classmethod
+ def save_token(cls, token: str):
+ with open(os.path.join(cls.path_credential, 'token'), 'w+') as f:
+ f.write(token)
+
+ @classmethod
+ def get_token(cls) -> Optional[str]:
+ """
+ Get token or None if not existent.
+
+ Returns:
+ `str` or `None`: The token, `None` if it doesn't exist.
+
+ """
+ token = None
+ try:
+ with open(os.path.join(cls.path_credential, 'token'), 'r') as f:
+ token = f.read()
+ except FileNotFoundError:
+ pass
+ return token
+
+ @staticmethod
+ def write_to_git_credential(username: str, password: str):
+ with subprocess.Popen(
+ 'git credential-store store'.split(),
+ stdin=subprocess.PIPE,
+ stdout=subprocess.PIPE,
+ stderr=subprocess.STDOUT,
+ ) as process:
+ input_username = f'username={username.lower()}'
+ input_password = f'password={password}'
+
+ process.stdin.write(
+ f'url={get_endpoint()}\n{input_username}\n{input_password}\n\n'
+ .encode('utf-8'))
+ process.stdin.flush()
diff --git a/modelscope/hub/constants.py b/modelscope/hub/constants.py
new file mode 100644
index 00000000..a38f9afb
--- /dev/null
+++ b/modelscope/hub/constants.py
@@ -0,0 +1,8 @@
+MODELSCOPE_URL_SCHEME = 'http://'
+DEFAULT_MODELSCOPE_DOMAIN = '101.201.119.157:32330'
+DEFAULT_MODELSCOPE_GITLAB_DOMAIN = '101.201.119.157:31102'
+
+DEFAULT_MODELSCOPE_GROUP = 'damo'
+MODEL_ID_SEPARATOR = '/'
+
+LOGGER_NAME = 'ModelScopeHub'
diff --git a/modelscope/hub/errors.py b/modelscope/hub/errors.py
new file mode 100644
index 00000000..13ea709f
--- /dev/null
+++ b/modelscope/hub/errors.py
@@ -0,0 +1,30 @@
+class NotExistError(Exception):
+ pass
+
+
+class RequestError(Exception):
+ pass
+
+
+def is_ok(rsp):
+ """ Check the request is ok
+
+ Args:
+ rsp (_type_): The request response body
+ Failed: {'Code': 10010101004, 'Message': 'get model info failed, err: unauthorized permission',
+ 'RequestId': '', 'Success': False}
+ Success: {'Code': 200, 'Data': {}, 'Message': 'success', 'RequestId': '', 'Success': True}
+ """
+ return rsp['Code'] == 200 and rsp['Success']
+
+
+def raise_on_error(rsp):
+ """If response error, raise exception
+
+ Args:
+ rsp (_type_): The server response
+ """
+ if rsp['Code'] == 200 and rsp['Success']:
+ return True
+ else:
+ raise RequestError(rsp['Message'])
diff --git a/modelscope/hub/file_download.py b/modelscope/hub/file_download.py
new file mode 100644
index 00000000..e5c64f1c
--- /dev/null
+++ b/modelscope/hub/file_download.py
@@ -0,0 +1,254 @@
+import copy
+import fnmatch
+import logging
+import os
+import sys
+import tempfile
+import time
+from functools import partial
+from hashlib import sha256
+from pathlib import Path
+from typing import BinaryIO, Dict, Optional, Union
+from uuid import uuid4
+
+import json
+import requests
+from filelock import FileLock
+from requests.exceptions import HTTPError
+from tqdm import tqdm
+
+from modelscope import __version__
+from modelscope.utils.logger import get_logger
+from .api import HubApi, ModelScopeConfig
+from .constants import (DEFAULT_MODELSCOPE_GROUP, LOGGER_NAME,
+ MODEL_ID_SEPARATOR)
+from .errors import NotExistError, RequestError, raise_on_error
+from .utils.caching import ModelFileSystemCache
+from .utils.utils import (get_cache_dir, get_endpoint,
+ model_id_to_group_owner_name)
+
+SESSION_ID = uuid4().hex
+logger = get_logger()
+
+
+def model_file_download(
+ model_id: str,
+ file_path: str,
+ revision: Optional[str] = 'master',
+ cache_dir: Optional[str] = None,
+ user_agent: Union[Dict, str, None] = None,
+ local_files_only: Optional[bool] = False,
+) -> Optional[str]: # pragma: no cover
+ """
+ Download from a given URL and cache it if it's not already present in the
+ local cache.
+
+ Given a URL, this function looks for the corresponding file in the local
+ cache. If it's not there, download it. Then return the path to the cached
+ file.
+
+ Args:
+ model_id (`str`):
+ The model to whom the file to be downloaded belongs.
+ file_path(`str`):
+ Path of the file to be downloaded, relative to the root of model repo
+ revision(`str`, *optional*):
+ revision of the model file to be downloaded.
+ Can be any of a branch, tag or commit hash, default to `master`
+ cache_dir (`str`, `Path`, *optional*):
+ Path to the folder where cached files are stored.
+ user_agent (`dict`, `str`, *optional*):
+ The user-agent info in the form of a dictionary or a string.
+ local_files_only (`bool`, *optional*, defaults to `False`):
+ If `True`, avoid downloading the file and return the path to the
+ local cached file if it exists.
+ if `False`, download the file anyway even it exists
+
+ Returns:
+ Local path (string) of file or if networking is off, last version of
+ file cached on disk.
+
+
+
+ Raises the following errors:
+
+ - [`EnvironmentError`](https://docs.python.org/3/library/exceptions.html#EnvironmentError)
+ if `use_auth_token=True` and the token cannot be found.
+ - [`OSError`](https://docs.python.org/3/library/exceptions.html#OSError)
+ if ETag cannot be determined.
+ - [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
+ if some parameter value is invalid
+
+
+ """
+ if cache_dir is None:
+ cache_dir = get_cache_dir()
+ if isinstance(cache_dir, Path):
+ cache_dir = str(cache_dir)
+
+ group_or_owner, name = model_id_to_group_owner_name(model_id)
+
+ cache = ModelFileSystemCache(cache_dir, group_or_owner, name)
+
+ # if local_files_only is `True` and the file already exists in cached_path
+ # return the cached path
+ if local_files_only:
+ cached_file_path = cache.get_file_by_path(file_path)
+ if cached_file_path is not None:
+ logger.warning(
+ "File exists in local cache, but we're not sure it's up to date"
+ )
+ return cached_file_path
+ else:
+ raise ValueError(
+ 'Cannot find the requested files in the cached path and outgoing'
+ ' traffic has been disabled. To enable model look-ups and downloads'
+ " online, set 'local_files_only' to False.")
+
+ _api = HubApi()
+ headers = {'user-agent': http_user_agent(user_agent=user_agent, )}
+ branches, tags = _api.get_model_branches_and_tags(model_id)
+ file_to_download_info = None
+ is_commit_id = False
+ if revision in branches or revision in tags: # The revision is version or tag,
+ # we need to confirm the version is up to date
+ # we need to get the file list to check if the lateast version is cached, if so return, otherwise download
+ model_files = _api.get_model_files(
+ model_id=model_id,
+ revision=revision,
+ recursive=True,
+ )
+
+ for model_file in model_files:
+ if model_file['Type'] == 'tree':
+ continue
+
+ if model_file['Path'] == file_path:
+ model_file['Branch'] = revision
+ if cache.exists(model_file):
+ return cache.get_file_by_info(model_file)
+ else:
+ file_to_download_info = model_file
+
+ if file_to_download_info is None:
+ raise NotExistError('The file path: %s not exist in: %s' %
+ (file_path, model_id))
+ else: # the revision is commit id.
+ cached_file_path = cache.get_file_by_path_and_commit_id(
+ file_path, revision)
+ if cached_file_path is not None:
+ logger.info('The specified file is in cache, skip downloading!')
+ return cached_file_path # the file is in cache.
+ is_commit_id = True
+ # we need to download again
+ # TODO: skip using JWT for authorization, use cookie instead
+ cookies = ModelScopeConfig.get_cookies()
+ url_to_download = get_file_download_url(model_id, file_path, revision)
+ file_to_download_info = {
+ 'Path': file_path,
+ 'Revision':
+ revision if is_commit_id else file_to_download_info['Revision']
+ }
+ # Prevent parallel downloads of the same file with a lock.
+ lock_path = cache.get_root_location() + '.lock'
+
+ with FileLock(lock_path):
+ temp_file_name = next(tempfile._get_candidate_names())
+ http_get_file(
+ url_to_download,
+ cache_dir,
+ temp_file_name,
+ headers=headers,
+ cookies=None if cookies is None else cookies.get_dict())
+ return cache.put_file(file_to_download_info,
+ os.path.join(cache_dir, temp_file_name))
+
+
+def http_user_agent(user_agent: Union[Dict, str, None] = None, ) -> str:
+ """Formats a user-agent string with basic info about a request.
+
+ Args:
+ user_agent (`str`, `dict`, *optional*):
+ The user agent info in the form of a dictionary or a single string.
+
+ Returns:
+ The formatted user-agent string.
+ """
+ ua = f'modelscope/{__version__}; python/{sys.version.split()[0]}; session_id/{SESSION_ID}'
+
+ if isinstance(user_agent, dict):
+ ua = '; '.join(f'{k}/{v}' for k, v in user_agent.items())
+ elif isinstance(user_agent, str):
+ ua = user_agent
+ return ua
+
+
+def get_file_download_url(model_id: str, file_path: str, revision: str):
+ """
+ Format file download url according to `model_id`, `revision` and `file_path`.
+ e.g., Given `model_id=john/bert`, `revision=master`, `file_path=README.md`,
+ the resulted download url is: https://maas.co/api/v1/models/john/bert/repo?Revision=master&FilePath=README.md
+ """
+ download_url_template = '{endpoint}/api/v1/models/{model_id}/repo?Revision={revision}&FilePath={file_path}'
+ return download_url_template.format(
+ endpoint=get_endpoint(),
+ model_id=model_id,
+ revision=revision,
+ file_path=file_path,
+ )
+
+
+def http_get_file(
+ url: str,
+ local_dir: str,
+ file_name: str,
+ cookies: Dict[str, str],
+ headers: Optional[Dict[str, str]] = None,
+):
+ """
+ Download remote file. Do not gobble up errors.
+ This method is only used by snapshot_download, since the behavior is quite different with single file download
+ TODO: consolidate with http_get_file() to avoild duplicate code
+
+ Args:
+ url(`str`):
+ actual download url of the file
+ local_dir(`str`):
+ local directory where the downloaded file stores
+ file_name(`str`):
+ name of the file stored in `local_dir`
+ cookies(`Dict[str, str]`):
+ cookies used to authentication the user, which is used for downloading private repos
+ headers(`Optional[Dict[str, str]] = None`):
+ http headers to carry necessary info when requesting the remote file
+
+ """
+ temp_file_manager = partial(
+ tempfile.NamedTemporaryFile, mode='wb', dir=local_dir, delete=False)
+
+ with temp_file_manager() as temp_file:
+ logger.info('downloading %s to %s', url, temp_file.name)
+ headers = copy.deepcopy(headers)
+
+ r = requests.get(url, stream=True, headers=headers, cookies=cookies)
+ r.raise_for_status()
+
+ content_length = r.headers.get('Content-Length')
+ total = int(content_length) if content_length is not None else None
+
+ progress = tqdm(
+ unit='B',
+ unit_scale=True,
+ unit_divisor=1024,
+ total=total,
+ initial=0,
+ desc='Downloading',
+ )
+ for chunk in r.iter_content(chunk_size=1024):
+ if chunk: # filter out keep-alive new chunks
+ progress.update(len(chunk))
+ temp_file.write(chunk)
+ progress.close()
+
+ logger.info('storing %s in cache at %s', url, local_dir)
+ os.replace(temp_file.name, os.path.join(local_dir, file_name))
diff --git a/modelscope/hub/git.py b/modelscope/hub/git.py
new file mode 100644
index 00000000..5f079105
--- /dev/null
+++ b/modelscope/hub/git.py
@@ -0,0 +1,82 @@
+from threading import local
+from tkinter.messagebox import NO
+from typing import Union
+
+from modelscope.utils.logger import get_logger
+from .constants import LOGGER_NAME
+from .utils._subprocess import run_subprocess
+
+logger = get_logger
+
+
+def git_clone(
+ local_dir: str,
+ repo_url: str,
+):
+ # TODO: use "git clone" or "git lfs clone" according to git version
+ # TODO: print stderr when subprocess fails
+ run_subprocess(
+ f'git clone {repo_url}'.split(),
+ local_dir,
+ True,
+ )
+
+
+def git_checkout(
+ local_dir: str,
+ revsion: str,
+):
+ run_subprocess(f'git checkout {revsion}'.split(), local_dir)
+
+
+def git_add(local_dir: str, ):
+ run_subprocess(
+ 'git add .'.split(),
+ local_dir,
+ True,
+ )
+
+
+def git_commit(local_dir: str, commit_message: str):
+ run_subprocess(
+ 'git commit -v -m'.split() + [commit_message],
+ local_dir,
+ True,
+ )
+
+
+def git_push(local_dir: str, branch: str):
+ # check current branch
+ cur_branch = git_current_branch(local_dir)
+ if cur_branch != branch:
+ logger.error(
+ "You're trying to push to a different branch, please double check")
+ return
+
+ run_subprocess(
+ f'git push origin {branch}'.split(),
+ local_dir,
+ True,
+ )
+
+
+def git_current_branch(local_dir: str) -> Union[str, None]:
+ """
+ Get current branch name
+
+ Args:
+ local_dir(`str`): local model repo directory
+
+ Returns
+ branch name you're currently on
+ """
+ try:
+ process = run_subprocess(
+ 'git rev-parse --abbrev-ref HEAD'.split(),
+ local_dir,
+ True,
+ )
+
+ return str(process.stdout).strip()
+ except Exception as e:
+ raise e
diff --git a/modelscope/hub/repository.py b/modelscope/hub/repository.py
new file mode 100644
index 00000000..6367f903
--- /dev/null
+++ b/modelscope/hub/repository.py
@@ -0,0 +1,173 @@
+import os
+import subprocess
+from pathlib import Path
+from typing import Optional, Union
+
+from modelscope.utils.logger import get_logger
+from .api import ModelScopeConfig
+from .constants import MODELSCOPE_URL_SCHEME
+from .git import git_add, git_checkout, git_clone, git_commit, git_push
+from .utils._subprocess import run_subprocess
+from .utils.utils import get_gitlab_domain
+
+logger = get_logger()
+
+
+class Repository:
+
+ def __init__(
+ self,
+ local_dir: str,
+ clone_from: Optional[str] = None,
+ auth_token: Optional[str] = None,
+ private: Optional[bool] = False,
+ revision: Optional[str] = 'master',
+ ):
+ """
+ Instantiate a Repository object by cloning the remote ModelScopeHub repo
+ Args:
+ local_dir(`str`):
+ local directory to store the model files
+ clone_from(`Optional[str] = None`):
+ model id in ModelScope-hub from which git clone
+ You should ignore this parameter when `local_dir` is already a git repo
+ auth_token(`Optional[str]`):
+ token obtained when calling `HubApi.login()`. Usually you can safely ignore the parameter
+ as the token is already saved when you login the first time
+ private(`Optional[bool]`):
+ whether the model is private, default to False
+ revision(`Optional[str]`):
+ revision of the model you want to clone from. Can be any of a branch, tag or commit hash
+ """
+ logger.info('Instantiating Repository object...')
+
+ # Create local directory if not exist
+ os.makedirs(local_dir, exist_ok=True)
+ self.local_dir = os.path.join(os.getcwd(), local_dir)
+
+ self.private = private
+
+ # Check git and git-lfs installation
+ self.check_git_versions()
+
+ # Retrieve auth token
+ if not private and isinstance(auth_token, str):
+ logger.warning(
+ 'cloning a public repo with a token, which will be ignored')
+ self.token = None
+ else:
+ if isinstance(auth_token, str):
+ self.token = auth_token
+ else:
+ self.token = ModelScopeConfig.get_token()
+
+ if self.token is None:
+ raise EnvironmentError(
+ 'Token does not exist, the clone will fail for private repo.'
+ 'Please login first.')
+
+ # git clone
+ if clone_from is not None:
+ self.model_id = clone_from
+ logger.info('cloning model repo to %s ...', self.local_dir)
+ git_clone(self.local_dir, self.get_repo_url())
+ else:
+ if is_git_repo(self.local_dir):
+ logger.debug('[Repository] is a valid git repo')
+ else:
+ raise ValueError(
+ 'If not specifying `clone_from`, you need to pass Repository a'
+ ' valid git clone.')
+
+ # git checkout
+ if isinstance(revision, str) and revision != 'master':
+ git_checkout(revision)
+
+ def push_to_hub(self,
+ commit_message: str,
+ revision: Optional[str] = 'master'):
+ """
+ Push changes changes to hub
+
+ Args:
+ commit_message(`str`):
+ commit message describing the changes, it's mandatory
+ revision(`Optional[str]`):
+ remote branch you want to push to, default to `master`
+
+
+ The function complains when local and remote branch are different, please be careful
+
+
+ """
+ git_add(self.local_dir)
+ git_commit(self.local_dir, commit_message)
+
+ logger.info('Pushing changes to repo...')
+ git_push(self.local_dir, revision)
+
+ # TODO: if git push fails, how to retry?
+
+ def check_git_versions(self):
+ """
+ Checks that `git` and `git-lfs` can be run.
+
+ Raises:
+ `EnvironmentError`: if `git` or `git-lfs` are not installed.
+ """
+ try:
+ git_version = run_subprocess('git --version'.split(),
+ self.local_dir).stdout.strip()
+ except FileNotFoundError:
+ raise EnvironmentError(
+ 'Looks like you do not have git installed, please install.')
+
+ try:
+ lfs_version = run_subprocess('git-lfs --version'.split(),
+ self.local_dir).stdout.strip()
+ except FileNotFoundError:
+ raise EnvironmentError(
+ 'Looks like you do not have git-lfs installed, please install.'
+ ' You can install from https://git-lfs.github.com/.'
+ ' Then run `git lfs install` (you only have to do this once).')
+ logger.info(git_version + '\n' + lfs_version)
+
+ def get_repo_url(self) -> str:
+ """
+ Get repo url to clone, according whether the repo is private or not
+ """
+ url = None
+
+ if self.private:
+ url = f'{MODELSCOPE_URL_SCHEME}oauth2:{self.token}@{get_gitlab_domain()}/{self.model_id}'
+ else:
+ url = f'{MODELSCOPE_URL_SCHEME}{get_gitlab_domain()}/{self.model_id}'
+
+ if not url:
+ raise ValueError(
+ 'Empty repo url, please check clone_from parameter')
+
+ logger.debug('url to clone: %s', str(url))
+
+ return url
+
+
+def is_git_repo(folder: Union[str, Path]) -> bool:
+ """
+ Check if the folder is the root or part of a git repository
+
+ Args:
+ folder (`str`):
+ The folder in which to run the command.
+
+ Returns:
+ `bool`: `True` if the repository is part of a repository, `False`
+ otherwise.
+ """
+ folder_exists = os.path.exists(os.path.join(folder, '.git'))
+ git_branch = subprocess.run(
+ 'git branch'.split(),
+ cwd=folder,
+ stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE)
+ return folder_exists and git_branch.returncode == 0
diff --git a/modelscope/hub/snapshot_download.py b/modelscope/hub/snapshot_download.py
new file mode 100644
index 00000000..90d850f4
--- /dev/null
+++ b/modelscope/hub/snapshot_download.py
@@ -0,0 +1,125 @@
+import os
+import tempfile
+from glob import glob
+from pathlib import Path
+from typing import Dict, Optional, Union
+
+from modelscope.utils.logger import get_logger
+from .api import HubApi, ModelScopeConfig
+from .constants import DEFAULT_MODELSCOPE_GROUP, MODEL_ID_SEPARATOR
+from .errors import NotExistError, RequestError, raise_on_error
+from .file_download import (get_file_download_url, http_get_file,
+ http_user_agent)
+from .utils.caching import ModelFileSystemCache
+from .utils.utils import get_cache_dir, model_id_to_group_owner_name
+
+logger = get_logger()
+
+
+def snapshot_download(model_id: str,
+ revision: Optional[str] = 'master',
+ cache_dir: Union[str, Path, None] = None,
+ user_agent: Optional[Union[Dict, str]] = None,
+ local_files_only: Optional[bool] = False,
+ private: Optional[bool] = False) -> str:
+ """Download all files of a repo.
+ Downloads a whole snapshot of a repo's files at the specified revision. This
+ is useful when you want all files from a repo, because you don't know which
+ ones you will need a priori. All files are nested inside a folder in order
+ to keep their actual filename relative to that folder.
+
+ An alternative would be to just clone a repo but this would require that the
+ user always has git and git-lfs installed, and properly configured.
+ Args:
+ model_id (`str`):
+ A user or an organization name and a repo name separated by a `/`.
+ revision (`str`, *optional*):
+ An optional Git revision id which can be a branch name, a tag, or a
+ commit hash. NOTE: currently only branch and tag name is supported
+ cache_dir (`str`, `Path`, *optional*):
+ Path to the folder where cached files are stored.
+ user_agent (`str`, `dict`, *optional*):
+ The user-agent info in the form of a dictionary or a string.
+ local_files_only (`bool`, *optional*, defaults to `False`):
+ If `True`, avoid downloading the file and return the path to the
+ local cached file if it exists.
+ Returns:
+ Local folder path (string) of repo snapshot
+
+
+ Raises the following errors:
+ - [`EnvironmentError`](https://docs.python.org/3/library/exceptions.html#EnvironmentError)
+ if `use_auth_token=True` and the token cannot be found.
+ - [`OSError`](https://docs.python.org/3/library/exceptions.html#OSError) if
+ ETag cannot be determined.
+ - [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
+ if some parameter value is invalid
+
+ """
+
+ if cache_dir is None:
+ cache_dir = get_cache_dir()
+ if isinstance(cache_dir, Path):
+ cache_dir = str(cache_dir)
+
+ group_or_owner, name = model_id_to_group_owner_name(model_id)
+
+ cache = ModelFileSystemCache(cache_dir, group_or_owner, name)
+ if local_files_only:
+ if len(cache.cached_files) == 0:
+ raise ValueError(
+ 'Cannot find the requested files in the cached path and outgoing'
+ ' traffic has been disabled. To enable model look-ups and downloads'
+ " online, set 'local_files_only' to False.")
+ logger.warn('We can not confirm the cached file is for revision: %s'
+ % revision)
+ return cache.get_root_location(
+ ) # we can not confirm the cached file is for snapshot 'revision'
+ else:
+ # make headers
+ headers = {'user-agent': http_user_agent(user_agent=user_agent, )}
+ _api = HubApi()
+ # get file list from model repo
+ branches, tags = _api.get_model_branches_and_tags(model_id)
+ if revision not in branches and revision not in tags:
+ raise NotExistError('The specified branch or tag : %s not exist!'
+ % revision)
+
+ model_files = _api.get_model_files(
+ model_id=model_id,
+ revision=revision,
+ recursive=True,
+ use_cookies=private)
+
+ cookies = None
+ if private:
+ cookies = ModelScopeConfig.get_cookies()
+
+ for model_file in model_files:
+ if model_file['Type'] == 'tree':
+ continue
+ # check model_file is exist in cache, if exist, skip download, otherwise download
+ if cache.exists(model_file):
+ logger.info(
+ 'The specified file is in cache, skip downloading!')
+ continue
+
+ # get download url
+ url = get_file_download_url(
+ model_id=model_id,
+ file_path=model_file['Path'],
+ revision=revision)
+
+ # First download to /tmp
+ http_get_file(
+ url=url,
+ local_dir=tempfile.gettempdir(),
+ file_name=model_file['Name'],
+ headers=headers,
+ cookies=None if cookies is None else cookies.get_dict())
+ # put file to cache
+ cache.put_file(
+ model_file,
+ os.path.join(tempfile.gettempdir(), model_file['Name']))
+
+ return os.path.join(cache.get_root_location())
diff --git a/tests/pipelines/nlp/__init__.py b/modelscope/hub/utils/__init__.py
similarity index 100%
rename from tests/pipelines/nlp/__init__.py
rename to modelscope/hub/utils/__init__.py
diff --git a/modelscope/hub/utils/_subprocess.py b/modelscope/hub/utils/_subprocess.py
new file mode 100644
index 00000000..77e9fc48
--- /dev/null
+++ b/modelscope/hub/utils/_subprocess.py
@@ -0,0 +1,40 @@
+import subprocess
+from typing import List
+
+
+def run_subprocess(command: List[str],
+ folder: str,
+ check=True,
+ **kwargs) -> subprocess.CompletedProcess:
+ """
+ Method to run subprocesses. Calling this will capture the `stderr` and `stdout`,
+ please call `subprocess.run` manually in case you would like for them not to
+ be captured.
+
+ Args:
+ command (`List[str]`):
+ The command to execute as a list of strings.
+ folder (`str`):
+ The folder in which to run the command.
+ check (`bool`, *optional*, defaults to `True`):
+ Setting `check` to `True` will raise a `subprocess.CalledProcessError`
+ when the subprocess has a non-zero exit code.
+ kwargs (`Dict[str]`):
+ Keyword arguments to be passed to the `subprocess.run` underlying command.
+
+ Returns:
+ `subprocess.CompletedProcess`: The completed process.
+ """
+ if isinstance(command, str):
+ raise ValueError(
+ '`run_subprocess` should be called with a list of strings.')
+
+ return subprocess.run(
+ command,
+ stderr=subprocess.PIPE,
+ stdout=subprocess.PIPE,
+ check=check,
+ encoding='utf-8',
+ cwd=folder,
+ **kwargs,
+ )
diff --git a/modelscope/hub/utils/caching.py b/modelscope/hub/utils/caching.py
new file mode 100644
index 00000000..ac258385
--- /dev/null
+++ b/modelscope/hub/utils/caching.py
@@ -0,0 +1,294 @@
+import hashlib
+import logging
+import os
+import pickle
+import tempfile
+import time
+from shutil import move, rmtree
+
+from modelscope.utils.logger import get_logger
+
+logger = get_logger()
+
+
+class FileSystemCache(object):
+ KEY_FILE_NAME = '.msc'
+ """Local file cache.
+ """
+
+ def __init__(
+ self,
+ cache_root_location: str,
+ **kwargs,
+ ):
+ """
+ Parameters
+ ----------
+ cache_location: str
+ The root location to store files.
+ """
+ os.makedirs(cache_root_location, exist_ok=True)
+ self.cache_root_location = cache_root_location
+ self.load_cache()
+
+ def get_root_location(self):
+ return self.cache_root_location
+
+ def load_cache(self):
+ """Read set of stored blocks from file
+ Args:
+ owner(`str`): individual or group username at modelscope, can be empty for official models
+ name(`str`): name of the model
+ Returns:
+ The model details information.
+ Raises:
+ NotExistError: If the model is not exist, will throw NotExistError
+ TODO: Error based error code.
+
+ model_id = {owner}/{name}
+
+ """
+ self.cached_files = []
+ cache_keys_file_path = os.path.join(self.cache_root_location,
+ FileSystemCache.KEY_FILE_NAME)
+ if os.path.exists(cache_keys_file_path):
+ with open(cache_keys_file_path, 'rb') as f:
+ self.cached_files = pickle.load(f)
+
+ def save_cached_files(self):
+ """Save cache metadata."""
+ # save new meta to tmp and move to KEY_FILE_NAME
+ cache_keys_file_path = os.path.join(self.cache_root_location,
+ FileSystemCache.KEY_FILE_NAME)
+ # TODO: Sync file write
+ fd, fn = tempfile.mkstemp()
+ with open(fd, 'wb') as f:
+ pickle.dump(self.cached_files, f)
+ move(fn, cache_keys_file_path)
+
+ def get_file(self, key):
+ """Check the key is in the cache, if exist, return the file, otherwise return None.
+ Args:
+ key(`str`): The cache key.
+ Returns:
+ If file exist, return the cached file location, otherwise None.
+ Raises:
+ None
+
+ model_id = {owner}/{name}
+
+ """
+ pass
+
+ def put_file(self, key, location):
+ """Put file to the cache,
+ Args:
+ key(`str`): The cache key
+ location(`str`): Location of the file, we will move the file to cache.
+ Returns:
+ The cached file path of the file.
+ Raises:
+ None
+
+ model_id = {owner}/{name}
+
+ """
+ pass
+
+ def remove_key(self, key):
+ """Remove cache key in index, The file is removed manually
+
+ Args:
+ key (dict): The cache key.
+ """
+ self.cached_files.remove(key)
+ self.save_cached_files()
+
+ def exists(self, key):
+ for cache_file in self.cached_files:
+ if cache_file == key:
+ return True
+
+ return False
+
+ def clear_cache(self):
+ """Remove all files and metadat from the cache
+
+ In the case of multiple cache locations, this clears only the last one,
+ which is assumed to be the read/write one.
+ """
+ rmtree(self.cache_root_location)
+ self.load_cache()
+
+ def hash_name(self, key):
+ return hashlib.sha256(key.encode()).hexdigest()
+
+
+class ModelFileSystemCache(FileSystemCache):
+ """Local cache file layout
+ cache_root/owner/model_name/|individual cached files
+ |.mk: file, The cache index file
+ Save only one version for each file.
+ """
+
+ def __init__(self, cache_root, owner, name):
+ """Put file to the cache
+ Args:
+ cache_root(`str`): The modelscope local cache root(default: ~/.modelscope/cache/models/)
+ owner(`str`): The model owner.
+ name('str'): The name of the model
+ branch('str'): The branch of model
+ tag('str'): The tag of model
+ Returns:
+ Raises:
+ None
+
+ model_id = {owner}/{name}
+
+ """
+ super().__init__(os.path.join(cache_root, owner, name))
+
+ def get_file_by_path(self, file_path):
+ """Retrieve the cache if there is file match the path.
+ Args:
+ file_path (str): The file path in the model.
+ Returns:
+ path: the full path of the file.
+ """
+ for cached_file in self.cached_files:
+ if file_path == cached_file['Path']:
+ cached_file_path = os.path.join(self.cache_root_location,
+ cached_file['Path'])
+ if os.path.exists(cached_file_path):
+ return cached_file_path
+ else:
+ self.remove_key(cached_file)
+
+ return None
+
+ def get_file_by_path_and_commit_id(self, file_path, commit_id):
+ """Retrieve the cache if there is file match the path.
+ Args:
+ file_path (str): The file path in the model.
+ commit_id (str): The commit id of the file
+ Returns:
+ path: the full path of the file.
+ """
+ for cached_file in self.cached_files:
+ if file_path == cached_file['Path'] and \
+ (cached_file['Revision'].startswith(commit_id) or commit_id.startswith(cached_file['Revision'])):
+ cached_file_path = os.path.join(self.cache_root_location,
+ cached_file['Path'])
+ if os.path.exists(cached_file_path):
+ return cached_file_path
+ else:
+ self.remove_key(cached_file)
+
+ return None
+
+ def get_file_by_info(self, model_file_info):
+ """Check if exist cache file.
+
+ Args:
+ model_file_info (ModelFileInfo): The file information of the file.
+
+ Returns:
+ _type_: _description_
+ """
+ cache_key = self.__get_cache_key(model_file_info)
+ for cached_file in self.cached_files:
+ if cached_file == cache_key:
+ orig_path = os.path.join(self.cache_root_location,
+ cached_file['Path'])
+ if os.path.exists(orig_path):
+ return orig_path
+ else:
+ self.remove_key(cached_file)
+
+ return None
+
+ def __get_cache_key(self, model_file_info):
+ cache_key = {
+ 'Path': model_file_info['Path'],
+ 'Revision': model_file_info['Revision'], # commit id
+ }
+ return cache_key
+
+ def exists(self, model_file_info):
+ """Check the file is cached or not.
+
+ Args:
+ model_file_info (CachedFileInfo): The cached file info
+
+ Returns:
+ bool: If exists return True otherwise False
+ """
+ key = self.__get_cache_key(model_file_info)
+ is_exists = False
+ for cached_key in self.cached_files:
+ if cached_key['Path'] == key['Path'] and (
+ cached_key['Revision'].startswith(key['Revision'])
+ or key['Revision'].startswith(cached_key['Revision'])):
+ is_exists = True
+ file_path = os.path.join(self.cache_root_location,
+ model_file_info['Path'])
+ if is_exists:
+ if os.path.exists(file_path):
+ return True
+ else:
+ self.remove_key(
+ model_file_info) # sameone may manual delete the file
+ return False
+
+ def remove_if_exists(self, model_file_info):
+ """We in cache, remove it.
+
+ Args:
+ model_file_info (ModelFileInfo): The model file information from server.
+ """
+ for cached_file in self.cached_files:
+ if cached_file['Path'] == model_file_info['Path']:
+ self.remove_key(cached_file)
+ file_path = os.path.join(self.cache_root_location,
+ cached_file['Path'])
+ if os.path.exists(file_path):
+ os.remove(file_path)
+
+ def put_file(self, model_file_info, model_file_location):
+ """Put model on model_file_location to cache, the model first download to /tmp, and move to cache.
+
+ Args:
+ model_file_info (str): The file description returned by get_model_files
+ sample:
+ {
+ "CommitMessage": "add model\n",
+ "CommittedDate": 1654857567,
+ "CommitterName": "mulin.lyh",
+ "IsLFS": false,
+ "Mode": "100644",
+ "Name": "resnet18.pth",
+ "Path": "resnet18.pth",
+ "Revision": "09b68012b27de0048ba74003690a890af7aff192",
+ "Size": 46827520,
+ "Type": "blob"
+ }
+ model_file_location (str): The location of the temporary file.
+ Raises:
+ NotImplementedError: _description_
+
+ Returns:
+ str: The location of the cached file.
+ """
+ self.remove_if_exists(model_file_info) # backup old revision
+ cache_key = self.__get_cache_key(model_file_info)
+ cache_full_path = os.path.join(
+ self.cache_root_location,
+ cache_key['Path']) # Branch and Tag do not have same name.
+ cache_file_dir = os.path.dirname(cache_full_path)
+ if not os.path.exists(cache_file_dir):
+ os.makedirs(cache_file_dir, exist_ok=True)
+ # We can't make operation transaction
+ move(model_file_location, cache_full_path)
+ self.cached_files.append(cache_key)
+ self.save_cached_files()
+ return cache_full_path
diff --git a/modelscope/hub/utils/utils.py b/modelscope/hub/utils/utils.py
new file mode 100644
index 00000000..d0704de8
--- /dev/null
+++ b/modelscope/hub/utils/utils.py
@@ -0,0 +1,39 @@
+import os
+
+from modelscope.hub.constants import (DEFAULT_MODELSCOPE_DOMAIN,
+ DEFAULT_MODELSCOPE_GITLAB_DOMAIN,
+ DEFAULT_MODELSCOPE_GROUP,
+ MODEL_ID_SEPARATOR,
+ MODELSCOPE_URL_SCHEME)
+
+
+def model_id_to_group_owner_name(model_id):
+ if MODEL_ID_SEPARATOR in model_id:
+ group_or_owner = model_id.split(MODEL_ID_SEPARATOR)[0]
+ name = model_id.split(MODEL_ID_SEPARATOR)[1]
+ else:
+ group_or_owner = DEFAULT_MODELSCOPE_GROUP
+ name = model_id
+ return group_or_owner, name
+
+
+def get_cache_dir():
+ """
+ cache dir precedence:
+ function parameter > enviroment > ~/.cache/modelscope/hub
+ """
+ default_cache_dir = os.path.expanduser(
+ os.path.join('~/.cache', 'modelscope'))
+ return os.getenv('MODELSCOPE_CACHE', os.path.join(default_cache_dir,
+ 'hub'))
+
+
+def get_endpoint():
+ modelscope_domain = os.getenv('MODELSCOPE_DOMAIN',
+ DEFAULT_MODELSCOPE_DOMAIN)
+ return MODELSCOPE_URL_SCHEME + modelscope_domain
+
+
+def get_gitlab_domain():
+ return os.getenv('MODELSCOPE_GITLAB_DOMAIN',
+ DEFAULT_MODELSCOPE_GITLAB_DOMAIN)
diff --git a/modelscope/metainfo.py b/modelscope/metainfo.py
new file mode 100644
index 00000000..f89b7b27
--- /dev/null
+++ b/modelscope/metainfo.py
@@ -0,0 +1,104 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+
+
+class Models(object):
+ """ Names for different models.
+
+ Holds the standard model name to use for identifying different model.
+ This should be used to register models.
+
+ Model name should only contain model info but not task info.
+ """
+ # vision models
+
+ # nlp models
+ bert = 'bert'
+ palm = 'palm-v2'
+ structbert = 'structbert'
+ veco = 'veco'
+
+ # audio models
+ sambert_hifi_16k = 'sambert-hifi-16k'
+ generic_tts_frontend = 'generic-tts-frontend'
+ hifigan16k = 'hifigan16k'
+
+ # multi-modal models
+ ofa = 'ofa'
+
+
+class Pipelines(object):
+ """ Names for different pipelines.
+
+ Holds the standard pipline name to use for identifying different pipeline.
+ This should be used to register pipelines.
+
+ For pipeline which support different models and implements the common function, we
+ should use task name for this pipeline.
+ For pipeline which suuport only one model, we should use ${Model}-${Task} as its name.
+ """
+ # vision tasks
+ image_matting = 'unet-image-matting'
+ person_image_cartoon = 'unet-person-image-cartoon'
+ ocr_detection = 'resnet18-ocr-detection'
+ action_recognition = 'TAdaConv_action-recognition'
+
+ # nlp tasks
+ sentence_similarity = 'sentence-similarity'
+ word_segmentation = 'word-segmentation'
+ text_generation = 'text-generation'
+ sentiment_analysis = 'sentiment-analysis'
+ sentiment_classification = 'sentiment-classification'
+ fill_mask = 'fill-mask'
+ nli = 'nli'
+ dialog_intent_prediction = 'dialog-intent-prediction'
+ dialog_modeling = 'dialog-modeling'
+ dialog_state_tracking = 'dialog_state_tracking'
+
+ # audio tasks
+ sambert_hifigan_16k_tts = 'sambert-hifigan-16k-tts'
+ speech_dfsmn_aec_psm_16k = 'speech-dfsmn-aec-psm-16k'
+
+ # multi-modal tasks
+ image_caption = 'image-caption'
+
+
+class Trainers(object):
+ """ Names for different trainer.
+
+ Holds the standard trainer name to use for identifying different trainer.
+ This should be used to register trainers.
+
+ For a general Trainer, you can use easynlp-trainer/ofa-trainer/sofa-trainer.
+ For a model specific Trainer, you can use ${ModelName}-${Task}-trainer.
+ """
+
+ default = 'Trainer'
+
+
+class Preprocessors(object):
+ """ Names for different preprocessor.
+
+ Holds the standard preprocessor name to use for identifying different preprocessor.
+ This should be used to register preprocessors.
+
+ For a general preprocessor, just use the function name as preprocessor name such as
+ resize-image, random-crop
+ For a model-specific preprocessor, use ${modelname}-${fuction}
+ """
+
+ # cv preprocessor
+ load_image = 'load-image'
+
+ # nlp preprocessor
+ bert_seq_cls_tokenizer = 'bert-seq-cls-tokenizer'
+ palm_text_gen_tokenizer = 'palm-text-gen-tokenizer'
+ token_cls_tokenizer = 'token-cls-tokenizer'
+ nli_tokenizer = 'nli-tokenizer'
+ sen_cls_tokenizer = 'sen-cls-tokenizer'
+
+ # audio preprocessor
+ linear_aec_fbank = 'linear-aec-fbank'
+ text_to_tacotron_symbols = 'text-to-tacotron-symbols'
+
+ # multi-modal
+ ofa_image_caption = 'ofa-image-caption'
diff --git a/modelscope/models/__init__.py b/modelscope/models/__init__.py
index 7d70e6ca..d3423a3f 100644
--- a/modelscope/models/__init__.py
+++ b/modelscope/models/__init__.py
@@ -1,7 +1,11 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
-from .audio.tts.am import SambertNetHifi16k
-from .audio.tts.vocoder import Hifigan16k
+# from .audio.tts.am import SambertNetHifi16k
+# from .audio.tts.vocoder import Hifigan16k
from .base import Model
from .builder import MODELS, build_model
-from .nlp import BertForSequenceClassification, SbertForSentenceSimilarity
+# from .multi_model import OfaForImageCaptioning
+from .nlp import (BertForSequenceClassification, SbertForNLI,
+ SbertForSentenceSimilarity, SbertForSentimentClassification,
+ SbertForTokenClassification, StructBertForMaskedLM,
+ VecoForMaskedLM)
diff --git a/modelscope/models/audio/tts/am/sambert_hifi_16k.py b/modelscope/models/audio/tts/am/sambert_hifi_16k.py
index 2db9abc6..fc6d519a 100644
--- a/modelscope/models/audio/tts/am/sambert_hifi_16k.py
+++ b/modelscope/models/audio/tts/am/sambert_hifi_16k.py
@@ -6,6 +6,7 @@ import numpy as np
import tensorflow as tf
from sklearn.preprocessing import MultiLabelBinarizer
+from modelscope.metainfo import Models
from modelscope.models.base import Model
from modelscope.models.builder import MODELS
from modelscope.utils.constant import ModelFile, Tasks
@@ -17,7 +18,7 @@ __all__ = ['SambertNetHifi16k']
def multi_label_symbol_to_sequence(my_classes, my_symbol):
- one_hot = MultiLabelBinarizer(my_classes)
+ one_hot = MultiLabelBinarizer(classes=my_classes)
tokens = my_symbol.strip().split(' ')
sequences = []
for token in tokens:
@@ -26,7 +27,8 @@ def multi_label_symbol_to_sequence(my_classes, my_symbol):
return one_hot.fit_transform(sequences)
-@MODELS.register_module(Tasks.text_to_speech, module_name=r'sambert_hifi_16k')
+@MODELS.register_module(
+ Tasks.text_to_speech, module_name=Models.sambert_hifi_16k)
class SambertNetHifi16k(Model):
def __init__(self,
diff --git a/modelscope/models/audio/tts/frontend/generic_text_to_speech_frontend.py b/modelscope/models/audio/tts/frontend/generic_text_to_speech_frontend.py
index ed34143f..757e4db9 100644
--- a/modelscope/models/audio/tts/frontend/generic_text_to_speech_frontend.py
+++ b/modelscope/models/audio/tts/frontend/generic_text_to_speech_frontend.py
@@ -2,8 +2,7 @@ import os
import zipfile
from typing import Any, Dict, List
-import ttsfrd
-
+from modelscope.metainfo import Models
from modelscope.models.base import Model
from modelscope.models.builder import MODELS
from modelscope.utils.audio.tts_exceptions import (
@@ -15,11 +14,12 @@ __all__ = ['GenericTtsFrontend']
@MODELS.register_module(
- Tasks.text_to_speech, module_name=r'generic_tts_frontend')
+ Tasks.text_to_speech, module_name=Models.generic_tts_frontend)
class GenericTtsFrontend(Model):
def __init__(self, model_dir='.', lang_type='pinyin', *args, **kwargs):
super().__init__(model_dir, *args, **kwargs)
+ import ttsfrd
frontend = ttsfrd.TtsFrontendEngine()
zip_file = os.path.join(model_dir, 'resource.zip')
self._res_path = os.path.join(model_dir, 'resource')
diff --git a/modelscope/models/audio/tts/vocoder/hifigan16k.py b/modelscope/models/audio/tts/vocoder/hifigan16k.py
index 0d917dbe..b3fd9cf6 100644
--- a/modelscope/models/audio/tts/vocoder/hifigan16k.py
+++ b/modelscope/models/audio/tts/vocoder/hifigan16k.py
@@ -10,6 +10,7 @@ import numpy as np
import torch
from scipy.io.wavfile import write
+from modelscope.metainfo import Models
from modelscope.models.base import Model
from modelscope.models.builder import MODELS
from modelscope.utils.audio.tts_exceptions import \
@@ -36,7 +37,7 @@ class AttrDict(dict):
self.__dict__ = self
-@MODELS.register_module(Tasks.text_to_speech, module_name=r'hifigan16k')
+@MODELS.register_module(Tasks.text_to_speech, module_name=Models.hifigan16k)
class Hifigan16k(Model):
def __init__(self, model_dir, *args, **kwargs):
diff --git a/modelscope/models/audio/tts/vocoder/models/models.py b/modelscope/models/audio/tts/vocoder/models/models.py
index 83fc7dc2..c46a9204 100755
--- a/modelscope/models/audio/tts/vocoder/models/models.py
+++ b/modelscope/models/audio/tts/vocoder/models/models.py
@@ -3,7 +3,6 @@ from distutils.version import LooseVersion
import torch
import torch.nn as nn
import torch.nn.functional as F
-from pytorch_wavelets import DWT1DForward
from torch.nn import AvgPool1d, Conv1d, Conv2d, ConvTranspose1d
from torch.nn.utils import remove_weight_norm, spectral_norm, weight_norm
@@ -357,6 +356,7 @@ class MultiScaleDiscriminator(torch.nn.Module):
DiscriminatorS(),
DiscriminatorS(),
])
+ from pytorch_wavelets import DWT1DForward
self.meanpools = nn.ModuleList(
[DWT1DForward(wave='db3', J=1),
DWT1DForward(wave='db3', J=1)])
diff --git a/modelscope/models/base.py b/modelscope/models/base.py
index ab0d22cc..cb6d2b0e 100644
--- a/modelscope/models/base.py
+++ b/modelscope/models/base.py
@@ -4,12 +4,13 @@ import os.path as osp
from abc import ABC, abstractmethod
from typing import Dict, Union
-from maas_hub.snapshot_download import snapshot_download
-
+from modelscope.hub.snapshot_download import snapshot_download
from modelscope.models.builder import build_model
from modelscope.utils.config import Config
from modelscope.utils.constant import ModelFile
-from modelscope.utils.hub import get_model_cache_dir
+from modelscope.utils.logger import get_logger
+
+logger = get_logger()
Tensor = Union['torch.Tensor', 'tf.Tensor']
@@ -47,21 +48,25 @@ class Model(ABC):
if osp.exists(model_name_or_path):
local_model_dir = model_name_or_path
else:
- cache_path = get_model_cache_dir(model_name_or_path)
- local_model_dir = cache_path if osp.exists(
- cache_path) else snapshot_download(model_name_or_path)
- # else:
- # raise ValueError(
- # 'Remote model repo {model_name_or_path} does not exists')
-
+ local_model_dir = snapshot_download(model_name_or_path)
+ logger.info(f'initialize model from {local_model_dir}')
cfg = Config.from_file(
osp.join(local_model_dir, ModelFile.CONFIGURATION))
task_name = cfg.task
model_cfg = cfg.model
+ assert hasattr(
+ cfg, 'pipeline'), 'pipeline config is missing from config file.'
+ pipeline_cfg = cfg.pipeline
# TODO @wenmeng.zwm may should manually initialize model after model building
if hasattr(model_cfg, 'model_type') and not hasattr(model_cfg, 'type'):
model_cfg.type = model_cfg.model_type
+
model_cfg.model_dir = local_model_dir
+
for k, v in kwargs.items():
model_cfg.k = v
- return build_model(model_cfg, task_name)
+ model = build_model(model_cfg, task_name)
+
+ # dynamically add pipeline info to model for pipeline inference
+ model.pipeline = pipeline_cfg
+ return model
diff --git a/modelscope/models/cv/action_recognition/__init__.py b/modelscope/models/cv/action_recognition/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/modelscope/models/cv/action_recognition/models.py b/modelscope/models/cv/action_recognition/models.py
new file mode 100644
index 00000000..e85b6d81
--- /dev/null
+++ b/modelscope/models/cv/action_recognition/models.py
@@ -0,0 +1,91 @@
+import torch
+import torch.nn as nn
+
+from .tada_convnext import TadaConvNeXt
+
+
+class BaseVideoModel(nn.Module):
+ """
+ Standard video model.
+ The model is divided into the backbone and the head, where the backbone
+ extracts features and the head performs classification.
+
+ The backbones can be defined in model/base/backbone.py or anywhere else
+ as long as the backbone is registered by the BACKBONE_REGISTRY.
+ The heads can be defined in model/module_zoo/heads/ or anywhere else
+ as long as the head is registered by the HEAD_REGISTRY.
+
+ The registries automatically finds the registered modules and construct
+ the base video model.
+ """
+
+ def __init__(self, cfg):
+ """
+ Args:
+ cfg (Config): global config object.
+ """
+ super(BaseVideoModel, self).__init__()
+ # the backbone is created according to meta-architectures
+ # defined in models/base/backbone.py
+ self.backbone = TadaConvNeXt(cfg)
+
+ # the head is created according to the heads
+ # defined in models/module_zoo/heads
+ self.head = BaseHead(cfg)
+
+ def forward(self, x):
+ x = self.backbone(x)
+ x = self.head(x)
+ return x
+
+
+class BaseHead(nn.Module):
+ """
+ Constructs base head.
+ """
+
+ def __init__(
+ self,
+ cfg,
+ ):
+ """
+ Args:
+ cfg (Config): global config object.
+ """
+ super(BaseHead, self).__init__()
+ self.cfg = cfg
+ dim = cfg.VIDEO.BACKBONE.NUM_OUT_FEATURES
+ num_classes = cfg.VIDEO.HEAD.NUM_CLASSES
+ dropout_rate = cfg.VIDEO.HEAD.DROPOUT_RATE
+ activation_func = cfg.VIDEO.HEAD.ACTIVATION
+ self._construct_head(dim, num_classes, dropout_rate, activation_func)
+
+ def _construct_head(self, dim, num_classes, dropout_rate, activation_func):
+ self.global_avg_pool = nn.AdaptiveAvgPool3d(1)
+
+ if dropout_rate > 0.0:
+ self.dropout = nn.Dropout(dropout_rate)
+
+ self.out = nn.Linear(dim, num_classes, bias=True)
+
+ if activation_func == 'softmax':
+ self.activation = nn.Softmax(dim=-1)
+ elif activation_func == 'sigmoid':
+ self.activation = nn.Sigmoid()
+ else:
+ raise NotImplementedError('{} is not supported as an activation'
+ 'function.'.format(activation_func))
+
+ def forward(self, x):
+ if len(x.shape) == 5:
+ x = self.global_avg_pool(x)
+ # (N, C, T, H, W) -> (N, T, H, W, C).
+ x = x.permute((0, 2, 3, 4, 1))
+ if hasattr(self, 'dropout'):
+ out = self.dropout(x)
+ else:
+ out = x
+ out = self.out(out)
+ out = self.activation(out)
+ out = out.view(out.shape[0], -1)
+ return out, x.view(x.shape[0], -1)
diff --git a/modelscope/models/cv/action_recognition/tada_convnext.py b/modelscope/models/cv/action_recognition/tada_convnext.py
new file mode 100644
index 00000000..379b5271
--- /dev/null
+++ b/modelscope/models/cv/action_recognition/tada_convnext.py
@@ -0,0 +1,472 @@
+import math
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from torch.nn.modules.utils import _pair, _triple
+
+
+def drop_path(x, drop_prob: float = 0., training: bool = False):
+ """
+ From https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/layers/drop.py.
+ Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).
+ This is the same as the DropConnect impl I created for EfficientNet, etc networks, however,
+ the original name is misleading as 'Drop Connect' is a different form of dropout in a separate paper...
+ See discussion: https://github.com/tensorflow/tpu/issues/494#issuecomment-532968956 ... I've opted for
+ changing the layer and argument names to 'drop path' rather than mix DropConnect as a layer name and use
+ 'survival rate' as the argument.
+ """
+ if drop_prob == 0. or not training:
+ return x
+ keep_prob = 1 - drop_prob
+ shape = (x.shape[0], ) + (1, ) * (
+ x.ndim - 1) # work with diff dim tensors, not just 2D ConvNets
+ random_tensor = keep_prob + torch.rand(
+ shape, dtype=x.dtype, device=x.device)
+ random_tensor.floor_() # binarize
+ output = x.div(keep_prob) * random_tensor
+ return output
+
+
+class DropPath(nn.Module):
+ """
+ From https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/layers/drop.py.
+ Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).
+ """
+
+ def __init__(self, drop_prob=None):
+ super(DropPath, self).__init__()
+ self.drop_prob = drop_prob
+
+ def forward(self, x):
+ return drop_path(x, self.drop_prob, self.training)
+
+
+class TadaConvNeXt(nn.Module):
+ r""" ConvNeXt
+ A PyTorch impl of : `A ConvNet for the 2020s` -
+ https://arxiv.org/pdf/2201.03545.pdf
+
+ Args:
+ in_chans (int): Number of input image channels. Default: 3
+ num_classes (int): Number of classes for classification head. Default: 1000
+ depths (tuple(int)): Number of blocks at each stage. Default: [3, 3, 9, 3]
+ dims (int): Feature dimension at each stage. Default: [96, 192, 384, 768]
+ drop_path_rate (float): Stochastic depth rate. Default: 0.
+ layer_scale_init_value (float): Init value for Layer Scale. Default: 1e-6.
+ head_init_scale (float): Init scaling value for classifier weights and biases. Default: 1.
+ """
+
+ def __init__(
+ self, cfg
+ # in_chans=3, num_classes=1000,
+ # depths=[3, 3, 9, 3], dims=[96, 192, 384, 768], drop_path_rate=0.,
+ # layer_scale_init_value=1e-6, head_init_scale=1.,
+ ):
+ super().__init__()
+ in_chans = cfg.VIDEO.BACKBONE.NUM_INPUT_CHANNELS
+ dims = cfg.VIDEO.BACKBONE.NUM_FILTERS
+ drop_path_rate = cfg.VIDEO.BACKBONE.DROP_PATH
+ depths = cfg.VIDEO.BACKBONE.DEPTH
+ layer_scale_init_value = cfg.VIDEO.BACKBONE.LARGE_SCALE_INIT_VALUE
+ stem_t_kernel_size = cfg.VIDEO.BACKBONE.STEM.T_KERNEL_SIZE if hasattr(
+ cfg.VIDEO.BACKBONE.STEM, 'T_KERNEL_SIZE') else 2
+ t_stride = cfg.VIDEO.BACKBONE.STEM.T_STRIDE if hasattr(
+ cfg.VIDEO.BACKBONE.STEM, 'T_STRIDE') else 2
+
+ self.downsample_layers = nn.ModuleList(
+ ) # stem and 3 intermediate downsampling conv layers
+ stem = nn.Sequential(
+ nn.Conv3d(
+ in_chans,
+ dims[0],
+ kernel_size=(stem_t_kernel_size, 4, 4),
+ stride=(t_stride, 4, 4),
+ padding=((stem_t_kernel_size - 1) // 2, 0, 0)),
+ LayerNorm(dims[0], eps=1e-6, data_format='channels_first'))
+ self.downsample_layers.append(stem)
+ for i in range(3):
+ downsample_layer = nn.Sequential(
+ LayerNorm(dims[i], eps=1e-6, data_format='channels_first'),
+ nn.Conv3d(
+ dims[i],
+ dims[i + 1],
+ kernel_size=(1, 2, 2),
+ stride=(1, 2, 2)),
+ )
+ self.downsample_layers.append(downsample_layer)
+
+ self.stages = nn.ModuleList(
+ ) # 4 feature resolution stages, each consisting of multiple residual blocks
+ dp_rates = [
+ x.item() for x in torch.linspace(0, drop_path_rate, sum(depths))
+ ]
+ cur = 0
+ for i in range(4):
+ stage = nn.Sequential(*[
+ TAdaConvNeXtBlock(
+ cfg,
+ dim=dims[i],
+ drop_path=dp_rates[cur + j],
+ layer_scale_init_value=layer_scale_init_value)
+ for j in range(depths[i])
+ ])
+ self.stages.append(stage)
+ cur += depths[i]
+
+ self.norm = nn.LayerNorm(dims[-1], eps=1e-6) # final norm layer
+
+ def forward_features(self, x):
+ for i in range(4):
+ x = self.downsample_layers[i](x)
+ x = self.stages[i](x)
+ return self.norm(x.mean(
+ [-3, -2, -1])) # global average pooling, (N, C, H, W) -> (N, C)
+
+ def forward(self, x):
+ if isinstance(x, dict):
+ x = x['video']
+ x = self.forward_features(x)
+ return x
+
+ def get_num_layers(self):
+ return 12, 0
+
+
+class ConvNeXtBlock(nn.Module):
+ r""" ConvNeXt Block. There are two equivalent implementations:
+ (1) DwConv -> LayerNorm (channels_first) -> 1x1 Conv -> GELU -> 1x1 Conv; all in (N, C, H, W)
+ (2) DwConv -> Permute to (N, H, W, C); LayerNorm (channels_last) -> Linear -> GELU -> Linear; Permute back
+ We use (2) as we find it slightly faster in PyTorch
+
+ Args:
+ dim (int): Number of input channels.
+ drop_path (float): Stochastic depth rate. Default: 0.0
+ layer_scale_init_value (float): Init value for Layer Scale. Default: 1e-6.
+ """
+
+ def __init__(self, cfg, dim, drop_path=0., layer_scale_init_value=1e-6):
+ super().__init__()
+ self.dwconv = nn.Conv3d(
+ dim, dim, kernel_size=(1, 7, 7), padding=(0, 3, 3),
+ groups=dim) # depthwise conv
+ self.norm = LayerNorm(dim, eps=1e-6)
+ self.pwconv1 = nn.Linear(
+ dim,
+ 4 * dim) # pointwise/1x1 convs, implemented with linear layers
+ self.act = nn.GELU()
+ self.pwconv2 = nn.Linear(4 * dim, dim)
+ self.gamma = nn.Parameter(
+ layer_scale_init_value * torch.ones((dim)),
+ requires_grad=True) if layer_scale_init_value > 0 else None
+ self.drop_path = DropPath(
+ drop_path) if drop_path > 0. else nn.Identity()
+
+ def forward(self, x):
+ input = x
+ x = self.dwconv(x)
+ x = x.permute(0, 2, 3, 4, 1) # (N, C, T, H, W) -> (N, T, H, W, C)
+ x = self.norm(x)
+ x = self.pwconv1(x)
+ x = self.act(x)
+ x = self.pwconv2(x)
+ if self.gamma is not None:
+ x = self.gamma * x
+ x = x.permute(0, 4, 1, 2, 3) # (N, T, H, W, C) -> (N, C, T, H, W)
+
+ x = input + self.drop_path(x)
+ return x
+
+
+class LayerNorm(nn.Module):
+ r""" LayerNorm that supports two data formats: channels_last (default) or channels_first.
+ The ordering of the dimensions in the inputs. channels_last corresponds to inputs with
+ shape (batch_size, height, width, channels) while channels_first corresponds to inputs
+ with shape (batch_size, channels, height, width).
+ """
+
+ def __init__(self,
+ normalized_shape,
+ eps=1e-6,
+ data_format='channels_last'):
+ super().__init__()
+ self.weight = nn.Parameter(torch.ones(normalized_shape))
+ self.bias = nn.Parameter(torch.zeros(normalized_shape))
+ self.eps = eps
+ self.data_format = data_format
+ if self.data_format not in ['channels_last', 'channels_first']:
+ raise NotImplementedError
+ self.normalized_shape = (normalized_shape, )
+
+ def forward(self, x):
+ if self.data_format == 'channels_last':
+ return F.layer_norm(x, self.normalized_shape, self.weight,
+ self.bias, self.eps)
+ elif self.data_format == 'channels_first':
+ u = x.mean(1, keepdim=True)
+ s = (x - u).pow(2).mean(1, keepdim=True)
+ x = (x - u) / torch.sqrt(s + self.eps)
+ x = self.weight[:, None, None, None] * x + self.bias[:, None, None,
+ None]
+ return x
+
+
+class TAdaConvNeXtBlock(nn.Module):
+ r""" ConvNeXt Block. There are two equivalent implementations:
+ (1) DwConv -> LayerNorm (channels_fi rst) -> 1x1 Conv -> GELU -> 1x1 Conv; all in (N, C, H, W)
+ (2) DwConv -> Permute to (N, H, W, C); LayerNorm (channels_last) -> Linear -> GELU -> Linear; Permute back
+ We use (2) as we find it slightly faster in PyTorch
+
+ Args:
+ dim (int): Number of input channels.
+ drop_path (float): Stochastic depth rate. Default: 0.0
+ layer_scale_init_value (float): Init value for Layer Scale. Default: 1e-6.
+ """
+
+ def __init__(self, cfg, dim, drop_path=0., layer_scale_init_value=1e-6):
+ super().__init__()
+ layer_scale_init_value = float(layer_scale_init_value)
+ self.dwconv = TAdaConv2d(
+ dim,
+ dim,
+ kernel_size=(1, 7, 7),
+ padding=(0, 3, 3),
+ groups=dim,
+ cal_dim='cout')
+ route_func_type = cfg.VIDEO.BACKBONE.BRANCH.ROUTE_FUNC_TYPE
+ if route_func_type == 'normal':
+ self.dwconv_rf = RouteFuncMLP(
+ c_in=dim,
+ ratio=cfg.VIDEO.BACKBONE.BRANCH.ROUTE_FUNC_R,
+ kernels=cfg.VIDEO.BACKBONE.BRANCH.ROUTE_FUNC_K,
+ with_bias_cal=self.dwconv.bias is not None)
+ elif route_func_type == 'normal_lngelu':
+ self.dwconv_rf = RouteFuncMLPLnGelu(
+ c_in=dim,
+ ratio=cfg.VIDEO.BACKBONE.BRANCH.ROUTE_FUNC_R,
+ kernels=cfg.VIDEO.BACKBONE.BRANCH.ROUTE_FUNC_K,
+ with_bias_cal=self.dwconv.bias is not None)
+ else:
+ raise ValueError(
+ 'Unknown route_func_type: {}'.format(route_func_type))
+ self.norm = LayerNorm(dim, eps=1e-6)
+ self.pwconv1 = nn.Linear(
+ dim,
+ 4 * dim) # pointwise/1x1 convs, implemented with linear layers
+ self.act = nn.GELU()
+ self.pwconv2 = nn.Linear(4 * dim, dim)
+ self.gamma = nn.Parameter(
+ layer_scale_init_value * torch.ones((dim)),
+ requires_grad=True) if layer_scale_init_value > 0 else None
+ self.drop_path = DropPath(
+ drop_path) if drop_path > 0. else nn.Identity()
+
+ def forward(self, x):
+ input = x
+ x = self.dwconv(x, self.dwconv_rf(x))
+ x = x.permute(0, 2, 3, 4, 1) # (N, C, T, H, W) -> (N, T, H, W, C)
+ x = self.norm(x)
+ x = self.pwconv1(x)
+ x = self.act(x)
+ x = self.pwconv2(x)
+ if self.gamma is not None:
+ x = self.gamma * x
+ x = x.permute(0, 4, 1, 2, 3) # (N, T, H, W, C) -> (N, C, T, H, W)
+
+ x = input + self.drop_path(x)
+ return x
+
+
+class RouteFuncMLPLnGelu(nn.Module):
+ """
+ The routing function for generating the calibration weights.
+ """
+
+ def __init__(self,
+ c_in,
+ ratio,
+ kernels,
+ with_bias_cal=False,
+ bn_eps=1e-5,
+ bn_mmt=0.1):
+ """
+ Args:
+ c_in (int): number of input channels.
+ ratio (int): reduction ratio for the routing function.
+ kernels (list): temporal kernel size of the stacked 1D convolutions
+ """
+ super(RouteFuncMLPLnGelu, self).__init__()
+ self.c_in = c_in
+ self.with_bias_cal = with_bias_cal
+ self.avgpool = nn.AdaptiveAvgPool3d((None, 1, 1))
+ self.globalpool = nn.AdaptiveAvgPool3d(1)
+ self.g = nn.Conv3d(
+ in_channels=c_in,
+ out_channels=c_in,
+ kernel_size=1,
+ padding=0,
+ )
+ self.a = nn.Conv3d(
+ in_channels=c_in,
+ out_channels=int(c_in // ratio),
+ kernel_size=[kernels[0], 1, 1],
+ padding=[kernels[0] // 2, 0, 0],
+ )
+ # self.bn = nn.BatchNorm3d(int(c_in//ratio), eps=bn_eps, momentum=bn_mmt)
+ self.ln = LayerNorm(
+ int(c_in // ratio), eps=1e-6, data_format='channels_first')
+ self.gelu = nn.GELU()
+ # self.relu = nn.ReLU(inplace=True)
+ self.b = nn.Conv3d(
+ in_channels=int(c_in // ratio),
+ out_channels=c_in,
+ kernel_size=[kernels[1], 1, 1],
+ padding=[kernels[1] // 2, 0, 0],
+ bias=False)
+ self.b.skip_init = True
+ self.b.weight.data.zero_() # to make sure the initial values
+ # for the output is 1.
+ if with_bias_cal:
+ self.b_bias = nn.Conv3d(
+ in_channels=int(c_in // ratio),
+ out_channels=c_in,
+ kernel_size=[kernels[1], 1, 1],
+ padding=[kernels[1] // 2, 0, 0],
+ bias=False)
+ self.b_bias.skip_init = True
+ self.b_bias.weight.data.zero_() # to make sure the initial values
+ # for the output is 1.
+
+ def forward(self, x):
+ g = self.globalpool(x)
+ x = self.avgpool(x)
+ x = self.a(x + self.g(g))
+ # x = self.bn(x)
+ # x = self.relu(x)
+ x = self.ln(x)
+ x = self.gelu(x)
+ if self.with_bias_cal:
+ return [self.b(x) + 1, self.b_bias(x) + 1]
+ else:
+ return self.b(x) + 1
+
+
+class TAdaConv2d(nn.Module):
+ """
+ Performs temporally adaptive 2D convolution.
+ Currently, only application on 5D tensors is supported, which makes TAdaConv2d
+ essentially a 3D convolution with temporal kernel size of 1.
+ """
+
+ def __init__(self,
+ in_channels,
+ out_channels,
+ kernel_size,
+ stride=1,
+ padding=0,
+ dilation=1,
+ groups=1,
+ bias=True,
+ cal_dim='cin'):
+ super(TAdaConv2d, self).__init__()
+ """
+ Args:
+ in_channels (int): number of input channels.
+ out_channels (int): number of output channels.
+ kernel_size (list): kernel size of TAdaConv2d.
+ stride (list): stride for the convolution in TAdaConv2d.
+ padding (list): padding for the convolution in TAdaConv2d.
+ dilation (list): dilation of the convolution in TAdaConv2d.
+ groups (int): number of groups for TAdaConv2d.
+ bias (bool): whether to use bias in TAdaConv2d.
+ calibration_mode (str): calibrated dimension in TAdaConv2d.
+ Supported input "cin", "cout".
+ """
+
+ kernel_size = _triple(kernel_size)
+ stride = _triple(stride)
+ padding = _triple(padding)
+ dilation = _triple(dilation)
+
+ assert kernel_size[0] == 1
+ assert stride[0] == 1
+ assert padding[0] == 0
+ assert dilation[0] == 1
+ assert cal_dim in ['cin', 'cout']
+
+ self.in_channels = in_channels
+ self.out_channels = out_channels
+ self.kernel_size = kernel_size
+ self.stride = stride
+ self.padding = padding
+ self.dilation = dilation
+ self.groups = groups
+ self.cal_dim = cal_dim
+
+ # base weights (W_b)
+ self.weight = nn.Parameter(
+ torch.Tensor(1, 1, out_channels, in_channels // groups,
+ kernel_size[1], kernel_size[2]))
+ if bias:
+ self.bias = nn.Parameter(torch.Tensor(1, 1, out_channels))
+ else:
+ self.register_parameter('bias', None)
+
+ nn.init.kaiming_uniform_(self.weight, a=math.sqrt(5))
+ if self.bias is not None:
+ fan_in, _ = nn.init._calculate_fan_in_and_fan_out(self.weight)
+ bound = 1 / math.sqrt(fan_in)
+ nn.init.uniform_(self.bias, -bound, bound)
+
+ def forward(self, x, alpha):
+ """
+ Args:
+ x (tensor): feature to perform convolution on.
+ alpha (tensor): calibration weight for the base weights.
+ W_t = alpha_t * W_b
+ """
+ if isinstance(alpha, list):
+ w_alpha, b_alpha = alpha[0], alpha[1]
+ else:
+ w_alpha = alpha
+ b_alpha = None
+ _, _, c_out, c_in, kh, kw = self.weight.size()
+ b, c_in, t, h, w = x.size()
+ x = x.permute(0, 2, 1, 3, 4).reshape(1, -1, h, w)
+
+ if self.cal_dim == 'cin':
+ # w_alpha: B, C, T, H(1), W(1) -> B, T, C, H(1), W(1) -> B, T, 1, C, H(1), W(1)
+ # corresponding to calibrating the input channel
+ weight = (w_alpha.permute(0, 2, 1, 3, 4).unsqueeze(2)
+ * self.weight).reshape(-1, c_in // self.groups, kh, kw)
+ elif self.cal_dim == 'cout':
+ # w_alpha: B, C, T, H(1), W(1) -> B, T, C, H(1), W(1) -> B, T, C, 1, H(1), W(1)
+ # corresponding to calibrating the input channel
+ weight = (w_alpha.permute(0, 2, 1, 3, 4).unsqueeze(3)
+ * self.weight).reshape(-1, c_in // self.groups, kh, kw)
+
+ bias = None
+ if self.bias is not None:
+ if b_alpha is not None:
+ # b_alpha: B, C, T, H(1), W(1) -> B, T, C, H(1), W(1) -> B, T, C
+ bias = (b_alpha.permute(0, 2, 1, 3, 4).squeeze()
+ * self.bias).reshape(-1)
+ else:
+ bias = self.bias.repeat(b, t, 1).reshape(-1)
+ output = F.conv2d(
+ x,
+ weight=weight,
+ bias=bias,
+ stride=self.stride[1:],
+ padding=self.padding[1:],
+ dilation=self.dilation[1:],
+ groups=self.groups * b * t)
+
+ output = output.view(b, t, c_out, output.size(-2),
+ output.size(-1)).permute(0, 2, 1, 3, 4)
+
+ return output
+
+ def __repr__(self):
+ return f'TAdaConv2d({self.in_channels}, {self.out_channels}, kernel_size={self.kernel_size}, ' +\
+ f"stride={self.stride}, padding={self.padding}, bias={self.bias is not None}, cal_dim=\"{self.cal_dim}\")"
diff --git a/modelscope/models/multi_model/__init__.py b/modelscope/models/multi_model/__init__.py
new file mode 100644
index 00000000..02e8d6ab
--- /dev/null
+++ b/modelscope/models/multi_model/__init__.py
@@ -0,0 +1 @@
+from .image_captioning_model import OfaForImageCaptioning
diff --git a/modelscope/models/multi_model/image_captioning_model.py b/modelscope/models/multi_model/image_captioning_model.py
new file mode 100644
index 00000000..79ab2b5f
--- /dev/null
+++ b/modelscope/models/multi_model/image_captioning_model.py
@@ -0,0 +1,80 @@
+import os.path as osp
+from typing import Any, Dict
+
+from PIL import Image
+
+from modelscope.metainfo import Models
+from modelscope.utils.constant import ModelFile, Tasks
+from ..base import Model
+from ..builder import MODELS
+
+__all__ = ['OfaForImageCaptioning']
+
+
+@MODELS.register_module(Tasks.image_captioning, module_name=Models.ofa)
+class OfaForImageCaptioning(Model):
+
+ def __init__(self, model_dir, *args, **kwargs):
+ super().__init__(model_dir=model_dir, *args, **kwargs)
+ ckpt_name = ModelFile.TORCH_MODEL_FILE
+ local_model = osp.join(model_dir, ckpt_name)
+ bpe_dir = model_dir
+ # turn on cuda if GPU is available
+ from fairseq import checkpoint_utils, tasks, utils
+ from ofa.tasks.mm_tasks import CaptionTask
+ from ofa.utils.eval_utils import eval_caption
+ self.eval_caption = eval_caption
+
+ tasks.register_task('caption', CaptionTask)
+ use_cuda = kwargs['use_cuda'] if 'use_cuda' in kwargs else False
+ use_fp16 = kwargs[
+ 'use_fp16'] if 'use_fp16' in kwargs and use_cuda else False
+ overrides = {
+ 'bpe_dir': bpe_dir,
+ 'eval_cider': False,
+ 'beam': 5,
+ 'max_len_b': 16,
+ 'no_repeat_ngram_size': 3,
+ 'seed': 7
+ }
+ models, cfg, task = checkpoint_utils.load_model_ensemble_and_task(
+ utils.split_paths(local_model), arg_overrides=overrides)
+
+ # Move models to GPU
+ for model in models:
+ model.eval()
+ if use_cuda:
+ model.cuda()
+ if use_fp16:
+ model.half()
+ model.prepare_for_inference_(cfg)
+ self.models = models
+ # Initialize generator
+ self.generator = task.build_generator(models, cfg.generation)
+
+ # Initialize transform
+ from torchvision import transforms
+ mean = [0.5, 0.5, 0.5]
+ std = [0.5, 0.5, 0.5]
+
+ self.patch_resize_transform = transforms.Compose([
+ lambda image: image.convert('RGB'),
+ transforms.Resize(
+ (cfg.task.patch_image_size, cfg.task.patch_image_size),
+ interpolation=Image.BICUBIC),
+ transforms.ToTensor(),
+ transforms.Normalize(mean=mean, std=std),
+ ])
+ self.task = task
+
+ def forward(self, input: Dict[str, Any]) -> Dict[str, Any]:
+ results, _ = self.eval_caption(self.task, self.generator, self.models,
+ input)
+ return {
+ 'image_id': results[0]['image_id'],
+ 'caption': results[0]['caption']
+ }
+
+ def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, Any]:
+ # What should we do here ?
+ return inputs
diff --git a/modelscope/models/nlp/__init__.py b/modelscope/models/nlp/__init__.py
index e62ab404..49cbd053 100644
--- a/modelscope/models/nlp/__init__.py
+++ b/modelscope/models/nlp/__init__.py
@@ -1,6 +1,9 @@
from .bert_for_sequence_classification import * # noqa F403
+from .masked_language_model import * # noqa F403
from .palm_for_text_generation import * # noqa F403
+from .sbert_for_nli import * # noqa F403
from .sbert_for_sentence_similarity import * # noqa F403
+from .sbert_for_sentiment_classification import * # noqa F403
from .sbert_for_token_classification import * # noqa F403
from .space.dialog_intent_prediction_model import * # noqa F403
from .space.dialog_modeling_model import * # noqa F403
diff --git a/modelscope/models/nlp/bert_for_sequence_classification.py b/modelscope/models/nlp/bert_for_sequence_classification.py
index a3cc4b68..7d85fa28 100644
--- a/modelscope/models/nlp/bert_for_sequence_classification.py
+++ b/modelscope/models/nlp/bert_for_sequence_classification.py
@@ -4,6 +4,7 @@ from typing import Any, Dict
import json
import numpy as np
+from modelscope.metainfo import Models
from modelscope.utils.constant import Tasks
from ..base import Model
from ..builder import MODELS
@@ -11,8 +12,7 @@ from ..builder import MODELS
__all__ = ['BertForSequenceClassification']
-@MODELS.register_module(
- Tasks.text_classification, module_name=r'bert-sentiment-analysis')
+@MODELS.register_module(Tasks.text_classification, module_name=Models.bert)
class BertForSequenceClassification(Model):
def __init__(self, model_dir: str, *args, **kwargs):
diff --git a/modelscope/models/nlp/masked_language_model.py b/modelscope/models/nlp/masked_language_model.py
new file mode 100644
index 00000000..bb255c9c
--- /dev/null
+++ b/modelscope/models/nlp/masked_language_model.py
@@ -0,0 +1,63 @@
+from typing import Any, Dict, Optional, Union
+
+import numpy as np
+
+from ...metainfo import Models
+from ...utils.constant import Tasks
+from ..base import Model, Tensor
+from ..builder import MODELS
+
+__all__ = ['StructBertForMaskedLM', 'VecoForMaskedLM']
+
+
+class MaskedLanguageModelBase(Model):
+
+ def __init__(self, model_dir: str, *args, **kwargs):
+ super().__init__(model_dir, *args, **kwargs)
+ self.model = self.build_model()
+
+ def build_model(self):
+ raise NotImplementedError()
+
+ def train(self):
+ return self.model.train()
+
+ def eval(self):
+ return self.model.eval()
+
+ @property
+ def config(self):
+ if hasattr(self.model, 'config'):
+ return self.model.config
+ return None
+
+ def forward(self, input: Dict[str, Tensor]) -> Dict[str, np.ndarray]:
+ """return the result by the model
+
+ Args:
+ input (Dict[str, Any]): the preprocessed data
+
+ Returns:
+ Dict[str, np.ndarray]: results
+ """
+ rst = self.model(
+ input_ids=input['input_ids'],
+ attention_mask=input['attention_mask'],
+ token_type_ids=input['token_type_ids'])
+ return {'logits': rst['logits'], 'input_ids': input['input_ids']}
+
+
+@MODELS.register_module(Tasks.fill_mask, module_name=Models.structbert)
+class StructBertForMaskedLM(MaskedLanguageModelBase):
+
+ def build_model(self):
+ from sofa import SbertForMaskedLM
+ return SbertForMaskedLM.from_pretrained(self.model_dir)
+
+
+@MODELS.register_module(Tasks.fill_mask, module_name=Models.veco)
+class VecoForMaskedLM(MaskedLanguageModelBase):
+
+ def build_model(self):
+ from sofa import VecoForMaskedLM
+ return VecoForMaskedLM.from_pretrained(self.model_dir)
diff --git a/modelscope/models/nlp/palm_for_text_generation.py b/modelscope/models/nlp/palm_for_text_generation.py
index e5799feb..f6c15387 100644
--- a/modelscope/models/nlp/palm_for_text_generation.py
+++ b/modelscope/models/nlp/palm_for_text_generation.py
@@ -1,13 +1,14 @@
from typing import Dict
-from modelscope.utils.constant import Tasks
+from ...metainfo import Models
+from ...utils.constant import Tasks
from ..base import Model, Tensor
from ..builder import MODELS
__all__ = ['PalmForTextGeneration']
-@MODELS.register_module(Tasks.text_generation, module_name=r'palm2.0')
+@MODELS.register_module(Tasks.text_generation, module_name=Models.palm)
class PalmForTextGeneration(Model):
def __init__(self, model_dir: str, *args, **kwargs):
@@ -19,13 +20,18 @@ class PalmForTextGeneration(Model):
default loader to load model weights, by default None.
"""
super().__init__(model_dir, *args, **kwargs)
- self.model_dir = model_dir
from sofa.models.palm_v2 import PalmForConditionalGeneration, Translator
model = PalmForConditionalGeneration.from_pretrained(model_dir)
self.tokenizer = model.tokenizer
self.generator = Translator(model)
+ def train(self):
+ return self.generator.train()
+
+ def eval(self):
+ return self.generator.eval()
+
def forward(self, input: Dict[str, Tensor]) -> Dict[str, Tensor]:
"""return the result by the model
diff --git a/modelscope/models/nlp/sbert_for_nli.py b/modelscope/models/nlp/sbert_for_nli.py
new file mode 100644
index 00000000..a5a76b34
--- /dev/null
+++ b/modelscope/models/nlp/sbert_for_nli.py
@@ -0,0 +1,23 @@
+from ...metainfo import Models
+from ...utils.constant import Tasks
+from ..builder import MODELS
+from .sbert_for_sequence_classification import \
+ SbertForSequenceClassificationBase
+
+__all__ = ['SbertForNLI']
+
+
+@MODELS.register_module(Tasks.nli, module_name=Models.structbert)
+class SbertForNLI(SbertForSequenceClassificationBase):
+
+ def __init__(self, model_dir: str, *args, **kwargs):
+ """initialize the text generation model from the `model_dir` path.
+
+ Args:
+ model_dir (str): the model path.
+ model_cls (Optional[Any], optional): model loader, if None, use the
+ default loader to load model weights, by default None.
+ """
+ super().__init__(
+ model_dir, *args, model_args={'num_labels': 3}, **kwargs)
+ assert self.model.config.num_labels == 3
diff --git a/modelscope/models/nlp/sbert_for_sentence_similarity.py b/modelscope/models/nlp/sbert_for_sentence_similarity.py
index 98daac92..25c38a2e 100644
--- a/modelscope/models/nlp/sbert_for_sentence_similarity.py
+++ b/modelscope/models/nlp/sbert_for_sentence_similarity.py
@@ -1,46 +1,15 @@
-import os
-from typing import Any, Dict
-
-import json
-import numpy as np
-import torch
-from sofa import SbertModel
-from sofa.models.sbert.modeling_sbert import SbertPreTrainedModel
-from torch import nn
-
+from modelscope.metainfo import Models
from modelscope.utils.constant import Tasks
-from ..base import Model, Tensor
from ..builder import MODELS
+from .sbert_for_sequence_classification import \
+ SbertForSequenceClassificationBase
__all__ = ['SbertForSentenceSimilarity']
-class SbertTextClassifier(SbertPreTrainedModel):
-
- def __init__(self, config):
- super().__init__(config)
- self.num_labels = config.num_labels
- self.config = config
- self.encoder = SbertModel(config, add_pooling_layer=True)
- self.dropout = nn.Dropout(config.hidden_dropout_prob)
- self.classifier = nn.Linear(config.hidden_size, config.num_labels)
-
- def forward(self, input_ids=None, token_type_ids=None):
- outputs = self.encoder(
- input_ids,
- token_type_ids=token_type_ids,
- return_dict=None,
- )
- pooled_output = outputs[1]
- pooled_output = self.dropout(pooled_output)
- logits = self.classifier(pooled_output)
- return logits
-
-
@MODELS.register_module(
- Tasks.sentence_similarity,
- module_name=r'sbert-base-chinese-sentence-similarity')
-class SbertForSentenceSimilarity(Model):
+ Tasks.sentence_similarity, module_name=Models.structbert)
+class SbertForSentenceSimilarity(SbertForSequenceClassificationBase):
def __init__(self, model_dir: str, *args, **kwargs):
"""initialize the sentence similarity model from the `model_dir` path.
@@ -50,39 +19,7 @@ class SbertForSentenceSimilarity(Model):
model_cls (Optional[Any], optional): model loader, if None, use the
default loader to load model weights, by default None.
"""
- super().__init__(model_dir, *args, **kwargs)
+ super().__init__(
+ model_dir, *args, model_args={'num_labels': 2}, **kwargs)
self.model_dir = model_dir
-
- self.model = SbertTextClassifier.from_pretrained(
- model_dir, num_labels=2)
- self.model.eval()
- self.label_path = os.path.join(self.model_dir, 'label_mapping.json')
- with open(self.label_path) as f:
- self.label_mapping = json.load(f)
- self.id2label = {idx: name for name, idx in self.label_mapping.items()}
-
- def forward(self, input: Dict[str, Any]) -> Dict[str, np.ndarray]:
- """return the result by the model
-
- Args:
- input (Dict[str, Any]): the preprocessed data
-
- Returns:
- Dict[str, np.ndarray]: results
- Example:
- {
- 'predictions': array([1]), # lable 0-negative 1-positive
- 'probabilities': array([[0.11491239, 0.8850876 ]], dtype=float32),
- 'logits': array([[-0.53860897, 1.5029076 ]], dtype=float32) # true value
- }
- """
- input_ids = torch.tensor(input['input_ids'], dtype=torch.long)
- token_type_ids = torch.tensor(
- input['token_type_ids'], dtype=torch.long)
- with torch.no_grad():
- logits = self.model(input_ids, token_type_ids)
- probs = logits.softmax(-1).numpy()
- pred = logits.argmax(-1).numpy()
- logits = logits.numpy()
- res = {'predictions': pred, 'probabilities': probs, 'logits': logits}
- return res
+ assert self.model.config.num_labels == 2
diff --git a/modelscope/models/nlp/sbert_for_sentiment_classification.py b/modelscope/models/nlp/sbert_for_sentiment_classification.py
new file mode 100644
index 00000000..72fb92f0
--- /dev/null
+++ b/modelscope/models/nlp/sbert_for_sentiment_classification.py
@@ -0,0 +1,24 @@
+from modelscope.metainfo import Models
+from modelscope.utils.constant import Tasks
+from ..builder import MODELS
+from .sbert_for_sequence_classification import \
+ SbertForSequenceClassificationBase
+
+__all__ = ['SbertForSentimentClassification']
+
+
+@MODELS.register_module(
+ Tasks.sentiment_classification, module_name=Models.structbert)
+class SbertForSentimentClassification(SbertForSequenceClassificationBase):
+
+ def __init__(self, model_dir: str, *args, **kwargs):
+ """initialize the text generation model from the `model_dir` path.
+
+ Args:
+ model_dir (str): the model path.
+ model_cls (Optional[Any], optional): model loader, if None, use the
+ default loader to load model weights, by default None.
+ """
+ super().__init__(
+ model_dir, *args, model_args={'num_labels': 2}, **kwargs)
+ assert self.model.config.num_labels == 2
diff --git a/modelscope/models/nlp/sbert_for_sequence_classification.py b/modelscope/models/nlp/sbert_for_sequence_classification.py
new file mode 100644
index 00000000..861b6fe2
--- /dev/null
+++ b/modelscope/models/nlp/sbert_for_sequence_classification.py
@@ -0,0 +1,71 @@
+import os
+from typing import Any, Dict
+
+import json
+import numpy as np
+import torch
+from sofa.models.sbert.modeling_sbert import SbertModel, SbertPreTrainedModel
+from torch import nn
+
+from ..base import Model
+
+
+class SbertTextClassfier(SbertPreTrainedModel):
+
+ def __init__(self, config):
+ super().__init__(config)
+ self.num_labels = config.num_labels
+ self.config = config
+ self.encoder = SbertModel(config, add_pooling_layer=True)
+ self.dropout = nn.Dropout(config.hidden_dropout_prob)
+ self.classifier = nn.Linear(config.hidden_size, config.num_labels)
+
+ def forward(self, input_ids=None, token_type_ids=None):
+ outputs = self.encoder(
+ input_ids,
+ token_type_ids=token_type_ids,
+ return_dict=None,
+ )
+ pooled_output = outputs[1]
+ pooled_output = self.dropout(pooled_output)
+ logits = self.classifier(pooled_output)
+ return {'logits': logits}
+
+
+class SbertForSequenceClassificationBase(Model):
+
+ def __init__(self, model_dir: str, model_args=None, *args, **kwargs):
+ super().__init__(model_dir, *args, **kwargs)
+ if model_args is None:
+ model_args = {}
+ self.model = SbertTextClassfier.from_pretrained(
+ model_dir, **model_args)
+ self.id2label = {}
+ self.label_path = os.path.join(self.model_dir, 'label_mapping.json')
+ if os.path.exists(self.label_path):
+ with open(self.label_path) as f:
+ self.label_mapping = json.load(f)
+ self.id2label = {
+ idx: name
+ for name, idx in self.label_mapping.items()
+ }
+
+ def train(self):
+ return self.model.train()
+
+ def eval(self):
+ return self.model.eval()
+
+ def forward(self, input: Dict[str, Any]) -> Dict[str, np.ndarray]:
+ input_ids = torch.tensor(input['input_ids'], dtype=torch.long)
+ token_type_ids = torch.tensor(
+ input['token_type_ids'], dtype=torch.long)
+ return self.model.forward(input_ids, token_type_ids)
+
+ def postprocess(self, input, **kwargs):
+ logits = input['logits']
+ probs = logits.softmax(-1).numpy()
+ pred = logits.argmax(-1).numpy()
+ logits = logits.numpy()
+ res = {'predictions': pred, 'probabilities': probs, 'logits': logits}
+ return res
diff --git a/modelscope/models/nlp/sbert_for_token_classification.py b/modelscope/models/nlp/sbert_for_token_classification.py
index b918dc37..fd175033 100644
--- a/modelscope/models/nlp/sbert_for_token_classification.py
+++ b/modelscope/models/nlp/sbert_for_token_classification.py
@@ -2,19 +2,17 @@ from typing import Any, Dict, Union
import numpy as np
import torch
-from sofa import SbertConfig, SbertForTokenClassification
+from modelscope.metainfo import Models
from modelscope.utils.constant import Tasks
from ..base import Model, Tensor
from ..builder import MODELS
-__all__ = ['StructBertForTokenClassification']
+__all__ = ['SbertForTokenClassification']
-@MODELS.register_module(
- Tasks.word_segmentation,
- module_name=r'structbert-chinese-word-segmentation')
-class StructBertForTokenClassification(Model):
+@MODELS.register_module(Tasks.word_segmentation, module_name=Models.structbert)
+class SbertForTokenClassification(Model):
def __init__(self, model_dir: str, *args, **kwargs):
"""initialize the word segmentation model from the `model_dir` path.
@@ -26,9 +24,16 @@ class StructBertForTokenClassification(Model):
"""
super().__init__(model_dir, *args, **kwargs)
self.model_dir = model_dir
- self.model = SbertForTokenClassification.from_pretrained(
+ import sofa
+ self.model = sofa.SbertForTokenClassification.from_pretrained(
self.model_dir)
- self.config = SbertConfig.from_pretrained(self.model_dir)
+ self.config = sofa.SbertConfig.from_pretrained(self.model_dir)
+
+ def train(self):
+ return self.model.train()
+
+ def eval(self):
+ return self.model.eval()
def forward(self, input: Dict[str,
Any]) -> Dict[str, Union[str, np.ndarray]]:
@@ -47,10 +52,12 @@ class StructBertForTokenClassification(Model):
}
"""
input_ids = torch.tensor(input['input_ids']).unsqueeze(0)
- output = self.model(input_ids)
- logits = output.logits
+ return {**self.model(input_ids), 'text': input['text']}
+
+ def postprocess(self, input: Dict[str, Tensor],
+ **kwargs) -> Dict[str, Tensor]:
+ logits = input['logits']
pred = torch.argmax(logits[0], dim=-1)
pred = pred.numpy()
-
rst = {'predictions': pred, 'logits': logits, 'text': input['text']}
return rst
diff --git a/modelscope/models/nlp/space/dialog_intent_prediction_model.py b/modelscope/models/nlp/space/dialog_intent_prediction_model.py
index 3ea500e5..a5d94376 100644
--- a/modelscope/models/nlp/space/dialog_intent_prediction_model.py
+++ b/modelscope/models/nlp/space/dialog_intent_prediction_model.py
@@ -1,11 +1,10 @@
import os
from typing import Any, Dict
-from modelscope.preprocessors.space.fields.intent_field import \
- IntentBPETextField
-from modelscope.trainers.nlp.space.trainers.intent_trainer import IntentTrainer
-from modelscope.utils.config import Config
-from modelscope.utils.constant import Tasks
+from ....preprocessors.space.fields.intent_field import IntentBPETextField
+from ....trainers.nlp.space.trainers.intent_trainer import IntentTrainer
+from ....utils.config import Config
+from ....utils.constant import Tasks
from ...base import Model, Tensor
from ...builder import MODELS
from .model.generator import Generator
@@ -14,8 +13,7 @@ from .model.model_base import ModelBase
__all__ = ['DialogIntentModel']
-@MODELS.register_module(
- Tasks.dialog_intent_prediction, module_name=r'space-intent')
+@MODELS.register_module(Tasks.dialog_intent_prediction, module_name=r'space')
class DialogIntentModel(Model):
def __init__(self, model_dir: str, *args, **kwargs):
diff --git a/modelscope/models/nlp/space/dialog_modeling_model.py b/modelscope/models/nlp/space/dialog_modeling_model.py
index bae8a822..4a34f132 100644
--- a/modelscope/models/nlp/space/dialog_modeling_model.py
+++ b/modelscope/models/nlp/space/dialog_modeling_model.py
@@ -1,11 +1,10 @@
import os
from typing import Any, Dict, Optional
-from modelscope.preprocessors.space.fields.gen_field import \
- MultiWOZBPETextField
-from modelscope.trainers.nlp.space.trainers.gen_trainer import MultiWOZTrainer
-from modelscope.utils.config import Config
-from modelscope.utils.constant import Tasks
+from ....preprocessors.space.fields.gen_field import MultiWOZBPETextField
+from ....trainers.nlp.space.trainers.gen_trainer import MultiWOZTrainer
+from ....utils.config import Config
+from ....utils.constant import Tasks
from ...base import Model, Tensor
from ...builder import MODELS
from .model.generator import Generator
@@ -14,7 +13,7 @@ from .model.model_base import ModelBase
__all__ = ['DialogModelingModel']
-@MODELS.register_module(Tasks.dialog_modeling, module_name=r'space-modeling')
+@MODELS.register_module(Tasks.dialog_modeling, module_name=r'space')
class DialogModelingModel(Model):
def __init__(self, model_dir: str, *args, **kwargs):
diff --git a/modelscope/models/nlp/space/dialog_state_tracking.py b/modelscope/models/nlp/space/dialog_state_tracking.py
index 4b1c44d3..e94c59b0 100644
--- a/modelscope/models/nlp/space/dialog_state_tracking.py
+++ b/modelscope/models/nlp/space/dialog_state_tracking.py
@@ -11,7 +11,7 @@ from .model.model_base import ModelBase
__all__ = ['DialogStateTrackingModel']
-@MODELS.register_module(Tasks.dialog_state_tracking, module_name=r'space-dst')
+@MODELS.register_module(Tasks.dialog_state_tracking, module_name=r'space')
class DialogStateTrackingModel(Model):
def __init__(self, model_dir: str, *args, **kwargs):
diff --git a/modelscope/models/nlp/space/model/gen_unified_transformer.py b/modelscope/models/nlp/space/model/gen_unified_transformer.py
index c076cce4..0f1b1a83 100644
--- a/modelscope/models/nlp/space/model/gen_unified_transformer.py
+++ b/modelscope/models/nlp/space/model/gen_unified_transformer.py
@@ -3,8 +3,7 @@ IntentUnifiedTransformer
"""
import torch
-from modelscope.models.nlp.space.model.unified_transformer import \
- UnifiedTransformer
+from .unified_transformer import UnifiedTransformer
class GenUnifiedTransformer(UnifiedTransformer):
diff --git a/modelscope/models/nlp/space/model/intent_unified_transformer.py b/modelscope/models/nlp/space/model/intent_unified_transformer.py
index 646a8044..b9c699d7 100644
--- a/modelscope/models/nlp/space/model/intent_unified_transformer.py
+++ b/modelscope/models/nlp/space/model/intent_unified_transformer.py
@@ -5,7 +5,7 @@ import torch
import torch.nn as nn
import torch.nn.functional as F
-from modelscope.utils.nlp.space.criterions import compute_kl_loss
+from .....utils.nlp.space.criterions import compute_kl_loss
from .unified_transformer import UnifiedTransformer
diff --git a/modelscope/models/nlp/space/model/unified_transformer.py b/modelscope/models/nlp/space/model/unified_transformer.py
index a25bc7f4..2636553d 100644
--- a/modelscope/models/nlp/space/model/unified_transformer.py
+++ b/modelscope/models/nlp/space/model/unified_transformer.py
@@ -7,10 +7,9 @@ import torch
import torch.nn as nn
import torch.nn.functional as F
-from modelscope.models.nlp.space.model.model_base import ModelBase
-from modelscope.models.nlp.space.modules.embedder import Embedder
-from modelscope.models.nlp.space.modules.transformer_block import \
- TransformerBlock
+from ..modules.embedder import Embedder
+from ..modules.transformer_block import TransformerBlock
+from .model_base import ModelBase
class UnifiedTransformer(ModelBase):
diff --git a/modelscope/models/nlp/space/modules/transformer_block.py b/modelscope/models/nlp/space/modules/transformer_block.py
index 1a0565d6..5b6c79a5 100644
--- a/modelscope/models/nlp/space/modules/transformer_block.py
+++ b/modelscope/models/nlp/space/modules/transformer_block.py
@@ -5,9 +5,8 @@ TransformerBlock class.
import torch
import torch.nn as nn
-from modelscope.models.nlp.space.modules.feedforward import FeedForward
-from modelscope.models.nlp.space.modules.multihead_attention import \
- MultiheadAttention
+from .feedforward import FeedForward
+from .multihead_attention import MultiheadAttention
class TransformerBlock(nn.Module):
diff --git a/modelscope/pipelines/__init__.py b/modelscope/pipelines/__init__.py
index 6e2645de..962b2245 100644
--- a/modelscope/pipelines/__init__.py
+++ b/modelscope/pipelines/__init__.py
@@ -1,7 +1,4 @@
-from .audio import LinearAECPipeline
+# from .audio import LinearAECPipeline
from .base import Pipeline
from .builder import pipeline
-from .cv import * # noqa F403
-from .multi_modal import * # noqa F403
from .nlp import * # noqa F403
-from .nlp.space import * # noqa F403
diff --git a/modelscope/pipelines/audio/linear_aec_pipeline.py b/modelscope/pipelines/audio/linear_aec_pipeline.py
index 528d8d47..70562b19 100644
--- a/modelscope/pipelines/audio/linear_aec_pipeline.py
+++ b/modelscope/pipelines/audio/linear_aec_pipeline.py
@@ -7,6 +7,7 @@ import scipy.io.wavfile as wav
import torch
import yaml
+from modelscope.metainfo import Pipelines
from modelscope.preprocessors.audio import LinearAECAndFbank
from modelscope.utils.constant import ModelFile, Tasks
from ..base import Pipeline
@@ -39,7 +40,8 @@ def initialize_config(module_cfg):
@PIPELINES.register_module(
- Tasks.speech_signal_process, module_name=r'speech_dfsmn_aec_psm_16k')
+ Tasks.speech_signal_process,
+ module_name=Pipelines.speech_dfsmn_aec_psm_16k)
class LinearAECPipeline(Pipeline):
r"""AEC Inference Pipeline only support 16000 sample rate.
diff --git a/modelscope/pipelines/audio/text_to_speech_pipeline.py b/modelscope/pipelines/audio/text_to_speech_pipeline.py
index ecd9daac..22586d3e 100644
--- a/modelscope/pipelines/audio/text_to_speech_pipeline.py
+++ b/modelscope/pipelines/audio/text_to_speech_pipeline.py
@@ -3,6 +3,7 @@ from typing import Any, Dict, List
import numpy as np
+from modelscope.metainfo import Pipelines
from modelscope.models import Model
from modelscope.models.audio.tts.am import SambertNetHifi16k
from modelscope.models.audio.tts.vocoder import Hifigan16k
@@ -15,7 +16,7 @@ __all__ = ['TextToSpeechSambertHifigan16kPipeline']
@PIPELINES.register_module(
- Tasks.text_to_speech, module_name=r'tts-sambert-hifigan-16k')
+ Tasks.text_to_speech, module_name=Pipelines.sambert_hifigan_16k_tts)
class TextToSpeechSambertHifigan16kPipeline(Pipeline):
def __init__(self,
diff --git a/modelscope/pipelines/base.py b/modelscope/pipelines/base.py
index e266d21c..7e32f543 100644
--- a/modelscope/pipelines/base.py
+++ b/modelscope/pipelines/base.py
@@ -4,19 +4,17 @@ import os.path as osp
from abc import ABC, abstractmethod
from typing import Any, Dict, Generator, List, Union
-from maas_hub.snapshot_download import snapshot_download
-
+from modelscope.hub.snapshot_download import snapshot_download
from modelscope.models.base import Model
from modelscope.preprocessors import Preprocessor
from modelscope.pydatasets import PyDataset
from modelscope.utils.config import Config
-from modelscope.utils.hub import get_model_cache_dir
from modelscope.utils.logger import get_logger
from .outputs import TASK_OUTPUTS
-from .util import is_model_name
+from .util import is_model, is_official_hub_path
Tensor = Union['torch.Tensor', 'tf.Tensor']
-Input = Union[str, tuple, dict, PyDataset, 'PIL.Image.Image', 'numpy.ndarray']
+Input = Union[str, tuple, PyDataset, 'PIL.Image.Image', 'numpy.ndarray']
InputModel = Union[str, Model]
output_keys = [
@@ -29,14 +27,10 @@ class Pipeline(ABC):
def initiate_single_model(self, model):
logger.info(f'initiate model from {model}')
- # TODO @wenmeng.zwm replace model.startswith('damo/') with get_model
- if isinstance(model, str) and model.startswith('damo/'):
- if not osp.exists(model):
- cache_path = get_model_cache_dir(model)
- model = cache_path if osp.exists(
- cache_path) else snapshot_download(model)
- return Model.from_pretrained(model) if is_model_name(
- model) else model
+ if isinstance(model, str) and is_official_hub_path(model):
+ model = snapshot_download(
+ model) if not osp.exists(model) else model
+ return Model.from_pretrained(model) if is_model(model) else model
elif isinstance(model, Model):
return model
else:
@@ -104,7 +98,7 @@ class Pipeline(ABC):
def _process_single(self, input: Input, *args,
**post_kwargs) -> Dict[str, Any]:
- out = self.preprocess(input, **post_kwargs)
+ out = self.preprocess(input)
out = self.forward(out)
out = self.postprocess(out, **post_kwargs)
self._check_output(out)
diff --git a/modelscope/pipelines/builder.py b/modelscope/pipelines/builder.py
index 6e2c791d..cff1801d 100644
--- a/modelscope/pipelines/builder.py
+++ b/modelscope/pipelines/builder.py
@@ -1,33 +1,49 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
-import os.path as osp
from typing import List, Union
+from modelscope.metainfo import Pipelines
from modelscope.models.base import Model
from modelscope.utils.config import Config, ConfigDict
-from modelscope.utils.constant import Tasks
+from modelscope.utils.constant import ModelFile, Tasks
+from modelscope.utils.hub import read_config
from modelscope.utils.registry import Registry, build_from_cfg
from .base import Pipeline
+from .util import is_official_hub_path
PIPELINES = Registry('pipelines')
DEFAULT_MODEL_FOR_PIPELINE = {
# TaskName: (pipeline_module_name, model_repo)
Tasks.word_segmentation:
- ('structbert-chinese-word-segmentation',
+ (Pipelines.word_segmentation,
'damo/nlp_structbert_word-segmentation_chinese-base'),
Tasks.sentence_similarity:
- ('sbert-base-chinese-sentence-similarity',
+ (Pipelines.sentence_similarity,
'damo/nlp_structbert_sentence-similarity_chinese-base'),
Tasks.image_matting: ('image-matting', 'damo/cv_unet_image-matting'),
- Tasks.text_classification:
- ('bert-sentiment-analysis', 'damo/bert-base-sst2'),
- Tasks.text_generation: ('palm2.0',
+ Tasks.nli: (Pipelines.nli, 'damo/nlp_structbert_nli_chinese-base'),
+ Tasks.sentiment_classification:
+ (Pipelines.sentiment_classification,
+ 'damo/nlp_structbert_sentiment-classification_chinese-base'),
+ Tasks.text_classification: ('bert-sentiment-analysis',
+ 'damo/bert-base-sst2'),
+ Tasks.image_matting: (Pipelines.image_matting,
+ 'damo/cv_unet_image-matting'),
+ Tasks.text_classification: (Pipelines.sentiment_analysis,
+ 'damo/bert-base-sst2'),
+ Tasks.text_generation: (Pipelines.text_generation,
'damo/nlp_palm2.0_text-generation_chinese-base'),
- Tasks.image_captioning: ('ofa', None),
+ Tasks.image_captioning: (Pipelines.image_caption,
+ 'damo/ofa_image-caption_coco_large_en'),
Tasks.image_generation:
- ('person-image-cartoon',
+ (Pipelines.person_image_cartoon,
'damo/cv_unet_person-image-cartoon_compound-models'),
+ Tasks.ocr_detection: (Pipelines.ocr_detection,
+ 'damo/cv_resnet18_ocr-detection-line-level_damo'),
+ Tasks.fill_mask: (Pipelines.fill_mask, 'damo/nlp_veco_fill-mask-large'),
+ Tasks.action_recognition: (Pipelines.action_recognition,
+ 'damo/cv_TAdaConv_action-recognition'),
}
@@ -84,30 +100,40 @@ def pipeline(task: str = None,
if task is None and pipeline_name is None:
raise ValueError('task or pipeline_name is required')
+ assert isinstance(model, (type(None), str, Model, list)), \
+ f'model should be either None, str, List[str], Model, or List[Model], but got {type(model)}'
+
if pipeline_name is None:
# get default pipeline for this task
if isinstance(model, str) \
or (isinstance(model, list) and isinstance(model[0], str)):
-
- # if is_model_name(model):
- if (isinstance(model, str) and model.startswith('damo/')) \
- or (isinstance(model, list) and model[0].startswith('damo/')) \
- or (isinstance(model, str) and osp.exists(model)):
- # TODO @wenmeng.zwm add support when model is a str of modelhub address
- # read pipeline info from modelhub configuration file.
- pipeline_name, default_model_repo = get_default_pipeline_info(
- task)
+ if is_official_hub_path(model):
+ # read config file from hub and parse
+ cfg = read_config(model) if isinstance(
+ model, str) else read_config(model[0])
+ assert hasattr(
+ cfg,
+ 'pipeline'), 'pipeline config is missing from config file.'
+ pipeline_name = cfg.pipeline.type
else:
+ # used for test case, when model is str and is not hub path
pipeline_name = get_pipeline_by_model_name(task, model)
+ elif isinstance(model, Model) or \
+ (isinstance(model, list) and isinstance(model[0], Model)):
+ # get pipeline info from Model object
+ first_model = model[0] if isinstance(model, list) else model
+ if not hasattr(first_model, 'pipeline'):
+ # model is instantiated by user, we should parse config again
+ cfg = read_config(first_model.model_dir)
+ assert hasattr(
+ cfg,
+ 'pipeline'), 'pipeline config is missing from config file.'
+ first_model.pipeline = cfg.pipeline
+ pipeline_name = first_model.pipeline.type
else:
pipeline_name, default_model_repo = get_default_pipeline_info(task)
-
- if model is None:
model = default_model_repo
- assert isinstance(model, (type(None), str, Model, list)), \
- f'model should be either None, str, List[str], Model, or List[Model], but got {type(model)}'
-
cfg = ConfigDict(type=pipeline_name, model=model)
if kwargs:
diff --git a/modelscope/pipelines/cv/__init__.py b/modelscope/pipelines/cv/__init__.py
index 79c85c19..68d875ec 100644
--- a/modelscope/pipelines/cv/__init__.py
+++ b/modelscope/pipelines/cv/__init__.py
@@ -1,2 +1,4 @@
+from .action_recognition_pipeline import ActionRecognitionPipeline
from .image_cartoon_pipeline import ImageCartoonPipeline
from .image_matting_pipeline import ImageMattingPipeline
+from .ocr_detection_pipeline import OCRDetectionPipeline
diff --git a/modelscope/pipelines/cv/action_recognition_pipeline.py b/modelscope/pipelines/cv/action_recognition_pipeline.py
new file mode 100644
index 00000000..845f8f9a
--- /dev/null
+++ b/modelscope/pipelines/cv/action_recognition_pipeline.py
@@ -0,0 +1,65 @@
+import math
+import os.path as osp
+from typing import Any, Dict
+
+import cv2
+import numpy as np
+import PIL
+import torch
+
+from modelscope.metainfo import Pipelines
+from modelscope.models.cv.action_recognition.models import BaseVideoModel
+from modelscope.pipelines.base import Input
+from modelscope.preprocessors.video import ReadVideoData
+from modelscope.utils.config import Config
+from modelscope.utils.constant import ModelFile, Tasks
+from modelscope.utils.logger import get_logger
+from ..base import Pipeline
+from ..builder import PIPELINES
+
+logger = get_logger()
+
+
+@PIPELINES.register_module(
+ Tasks.action_recognition, module_name=Pipelines.action_recognition)
+class ActionRecognitionPipeline(Pipeline):
+
+ def __init__(self, model: str):
+ super().__init__(model=model)
+ model_path = osp.join(self.model, ModelFile.TORCH_MODEL_FILE)
+ logger.info(f'loading model from {model_path}')
+ config_path = osp.join(self.model, ModelFile.CONFIGURATION)
+ logger.info(f'loading config from {config_path}')
+ self.cfg = Config.from_file(config_path)
+ self.infer_model = BaseVideoModel(cfg=self.cfg).cuda()
+ self.infer_model.eval()
+ self.infer_model.load_state_dict(torch.load(model_path)['model_state'])
+ self.label_mapping = self.cfg.label_mapping
+ logger.info('load model done')
+
+ def preprocess(self, input: Input) -> Dict[str, Any]:
+ if isinstance(input, str):
+ video_input_data = ReadVideoData(self.cfg, input).cuda()
+ else:
+ raise TypeError(f'input should be a str,'
+ f' but got {type(input)}')
+ result = {'video_data': video_input_data}
+ return result
+
+ def forward(self, input: Dict[str, Any]) -> Dict[str, Any]:
+ pred = self.perform_inference(input['video_data'])
+ output_label = self.label_mapping[str(pred)]
+ return {'output_label': output_label}
+
+ @torch.no_grad()
+ def perform_inference(self, data, max_bsz=4):
+ iter_num = math.ceil(data.size(0) / max_bsz)
+ preds_list = []
+ for i in range(iter_num):
+ preds_list.append(
+ self.infer_model(data[i * max_bsz:(i + 1) * max_bsz])[0])
+ pred = torch.cat(preds_list, dim=0)
+ return pred.mean(dim=0).argmax().item()
+
+ def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, Any]:
+ return inputs
diff --git a/modelscope/pipelines/cv/image_cartoon_pipeline.py b/modelscope/pipelines/cv/image_cartoon_pipeline.py
index d253eaf5..717336e9 100644
--- a/modelscope/pipelines/cv/image_cartoon_pipeline.py
+++ b/modelscope/pipelines/cv/image_cartoon_pipeline.py
@@ -6,6 +6,7 @@ import numpy as np
import PIL
import tensorflow as tf
+from modelscope.metainfo import Pipelines
from modelscope.models.cv.cartoon.facelib.facer import FaceAna
from modelscope.models.cv.cartoon.mtcnn_pytorch.src.align_trans import (
get_reference_facial_points, warp_and_crop_face)
@@ -25,7 +26,7 @@ logger = get_logger()
@PIPELINES.register_module(
- Tasks.image_generation, module_name='person-image-cartoon')
+ Tasks.image_generation, module_name=Pipelines.person_image_cartoon)
class ImageCartoonPipeline(Pipeline):
def __init__(self, model: str):
diff --git a/modelscope/pipelines/cv/image_matting_pipeline.py b/modelscope/pipelines/cv/image_matting_pipeline.py
index 0c60dfa7..b3e27e4b 100644
--- a/modelscope/pipelines/cv/image_matting_pipeline.py
+++ b/modelscope/pipelines/cv/image_matting_pipeline.py
@@ -5,6 +5,7 @@ import cv2
import numpy as np
import PIL
+from modelscope.metainfo import Pipelines
from modelscope.pipelines.base import Input
from modelscope.preprocessors import load_image
from modelscope.utils.constant import ModelFile, Tasks
@@ -16,7 +17,7 @@ logger = get_logger()
@PIPELINES.register_module(
- Tasks.image_matting, module_name=Tasks.image_matting)
+ Tasks.image_matting, module_name=Pipelines.image_matting)
class ImageMattingPipeline(Pipeline):
def __init__(self, model: str):
diff --git a/modelscope/pipelines/cv/ocr_detection_pipeline.py b/modelscope/pipelines/cv/ocr_detection_pipeline.py
new file mode 100644
index 00000000..0502fe36
--- /dev/null
+++ b/modelscope/pipelines/cv/ocr_detection_pipeline.py
@@ -0,0 +1,168 @@
+import math
+import os
+import os.path as osp
+import sys
+from typing import Any, Dict, List, Tuple, Union
+
+import cv2
+import numpy as np
+import PIL
+import tensorflow as tf
+import tf_slim as slim
+
+from modelscope.metainfo import Pipelines
+from modelscope.pipelines.base import Input
+from modelscope.preprocessors import load_image
+from modelscope.utils.constant import ModelFile, Tasks
+from modelscope.utils.logger import get_logger
+from ..base import Pipeline
+from ..builder import PIPELINES
+from .ocr_utils import model_resnet_mutex_v4_linewithchar, ops, utils
+
+if tf.__version__ >= '2.0':
+ tf = tf.compat.v1
+tf.compat.v1.disable_eager_execution()
+
+logger = get_logger()
+
+# constant
+RBOX_DIM = 5
+OFFSET_DIM = 6
+WORD_POLYGON_DIM = 8
+OFFSET_VARIANCE = [0.1, 0.1, 0.1, 0.1, 0.1, 0.1]
+
+FLAGS = tf.app.flags.FLAGS
+tf.app.flags.DEFINE_float('node_threshold', 0.4,
+ 'Confidence threshold for nodes')
+tf.app.flags.DEFINE_float('link_threshold', 0.6,
+ 'Confidence threshold for links')
+
+
+@PIPELINES.register_module(
+ Tasks.ocr_detection, module_name=Pipelines.ocr_detection)
+class OCRDetectionPipeline(Pipeline):
+
+ def __init__(self, model: str):
+ super().__init__(model=model)
+ model_path = osp.join(
+ osp.join(self.model, ModelFile.TF_CHECKPOINT_FOLDER),
+ 'checkpoint-80000')
+
+ config = tf.ConfigProto(allow_soft_placement=True)
+ config.gpu_options.allow_growth = True
+ self._session = tf.Session(config=config)
+ global_step = tf.get_variable(
+ 'global_step', [],
+ initializer=tf.constant_initializer(0),
+ dtype=tf.int64,
+ trainable=False)
+ variable_averages = tf.train.ExponentialMovingAverage(
+ 0.997, global_step)
+ self.input_images = tf.placeholder(
+ tf.float32, shape=[1, 1024, 1024, 3], name='input_images')
+ self.output = {}
+
+ # detector
+ detector = model_resnet_mutex_v4_linewithchar.SegLinkDetector()
+ all_maps = detector.build_model(self.input_images, is_training=False)
+
+ # decode local predictions
+ all_nodes, all_links, all_reg = [], [], []
+ for i, maps in enumerate(all_maps):
+ cls_maps, lnk_maps, reg_maps = maps[0], maps[1], maps[2]
+ reg_maps = tf.multiply(reg_maps, OFFSET_VARIANCE)
+
+ cls_prob = tf.nn.softmax(tf.reshape(cls_maps, [-1, 2]))
+
+ lnk_prob_pos = tf.nn.softmax(tf.reshape(lnk_maps, [-1, 4])[:, :2])
+ lnk_prob_mut = tf.nn.softmax(tf.reshape(lnk_maps, [-1, 4])[:, 2:])
+ lnk_prob = tf.concat([lnk_prob_pos, lnk_prob_mut], axis=1)
+
+ all_nodes.append(cls_prob)
+ all_links.append(lnk_prob)
+ all_reg.append(reg_maps)
+
+ # decode segments and links
+ image_size = tf.shape(self.input_images)[1:3]
+ segments, group_indices, segment_counts, _ = ops.decode_segments_links_python(
+ image_size,
+ all_nodes,
+ all_links,
+ all_reg,
+ anchor_sizes=list(detector.anchor_sizes))
+
+ # combine segments
+ combined_rboxes, combined_counts = ops.combine_segments_python(
+ segments, group_indices, segment_counts)
+ self.output['combined_rboxes'] = combined_rboxes
+ self.output['combined_counts'] = combined_counts
+
+ with self._session.as_default() as sess:
+ logger.info(f'loading model from {model_path}')
+ # load model
+ model_loader = tf.train.Saver(
+ variable_averages.variables_to_restore())
+ model_loader.restore(sess, model_path)
+
+ def preprocess(self, input: Input) -> Dict[str, Any]:
+ if isinstance(input, str):
+ img = np.array(load_image(input))
+ elif isinstance(input, PIL.Image.Image):
+ img = np.array(input.convert('RGB'))
+ elif isinstance(input, np.ndarray):
+ if len(input.shape) == 2:
+ img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
+ img = input[:, :, ::-1] # in rgb order
+ else:
+ raise TypeError(f'input should be either str, PIL.Image,'
+ f' np.array, but got {type(input)}')
+ h, w, c = img.shape
+ img_pad = np.zeros((max(h, w), max(h, w), 3), dtype=np.float32)
+ img_pad[:h, :w, :] = img
+
+ resize_size = 1024
+ img_pad_resize = cv2.resize(img_pad, (resize_size, resize_size))
+ img_pad_resize = cv2.cvtColor(img_pad_resize, cv2.COLOR_RGB2BGR)
+ img_pad_resize = img_pad_resize - np.array([123.68, 116.78, 103.94],
+ dtype=np.float32)
+
+ resize_size = tf.stack([resize_size, resize_size])
+ orig_size = tf.stack([max(h, w), max(h, w)])
+ self.output['orig_size'] = orig_size
+ self.output['resize_size'] = resize_size
+
+ result = {'img': np.expand_dims(img_pad_resize, axis=0)}
+ return result
+
+ def forward(self, input: Dict[str, Any]) -> Dict[str, Any]:
+ with self._session.as_default():
+ feed_dict = {self.input_images: input['img']}
+ sess_outputs = self._session.run(self.output, feed_dict=feed_dict)
+ return sess_outputs
+
+ def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, Any]:
+ rboxes = inputs['combined_rboxes'][0]
+ count = inputs['combined_counts'][0]
+ rboxes = rboxes[:count, :]
+
+ # convert rboxes to polygons and find its coordinates on the original image
+ orig_h, orig_w = inputs['orig_size']
+ resize_h, resize_w = inputs['resize_size']
+ polygons = utils.rboxes_to_polygons(rboxes)
+ scale_y = float(orig_h) / float(resize_h)
+ scale_x = float(orig_w) / float(resize_w)
+
+ # confine polygons inside image
+ polygons[:, ::2] = np.maximum(
+ 0, np.minimum(polygons[:, ::2] * scale_x, orig_w - 1))
+ polygons[:, 1::2] = np.maximum(
+ 0, np.minimum(polygons[:, 1::2] * scale_y, orig_h - 1))
+ polygons = np.round(polygons).astype(np.int32)
+
+ # nms
+ dt_n9 = [o + [utils.cal_width(o)] for o in polygons.tolist()]
+ dt_nms = utils.nms_python(dt_n9)
+ dt_polygons = np.array([o[:8] for o in dt_nms])
+
+ result = {'det_polygons': dt_polygons}
+ return result
diff --git a/modelscope/pipelines/cv/ocr_utils/__init__.py b/modelscope/pipelines/cv/ocr_utils/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/modelscope/pipelines/cv/ocr_utils/model_resnet_mutex_v4_linewithchar.py b/modelscope/pipelines/cv/ocr_utils/model_resnet_mutex_v4_linewithchar.py
new file mode 100644
index 00000000..50b8ba02
--- /dev/null
+++ b/modelscope/pipelines/cv/ocr_utils/model_resnet_mutex_v4_linewithchar.py
@@ -0,0 +1,158 @@
+import tensorflow as tf
+import tf_slim as slim
+
+from . import ops, resnet18_v1, resnet_utils
+
+if tf.__version__ >= '2.0':
+ tf = tf.compat.v1
+
+# constants
+OFFSET_DIM = 6
+
+N_LOCAL_LINKS = 8
+N_CROSS_LINKS = 4
+N_SEG_CLASSES = 2
+N_LNK_CLASSES = 4
+
+POS_LABEL = 1
+NEG_LABEL = 0
+
+
+class SegLinkDetector():
+
+ def __init__(self):
+ self.anchor_sizes = [6., 11.84210526, 23.68421053, 45., 90., 150.]
+
+ def _detection_classifier(self,
+ maps,
+ ksize,
+ weight_decay,
+ cross_links=False,
+ scope=None):
+
+ with tf.variable_scope(scope):
+ seg_depth = N_SEG_CLASSES
+ if cross_links:
+ lnk_depth = N_LNK_CLASSES * (N_LOCAL_LINKS + N_CROSS_LINKS)
+ else:
+ lnk_depth = N_LNK_CLASSES * N_LOCAL_LINKS
+ reg_depth = OFFSET_DIM
+ map_depth = maps.get_shape()[3]
+ inter_maps, inter_relu = ops.conv2d(
+ maps, map_depth, 256, 1, 1, 'SAME', scope='conv_inter')
+
+ dir_maps, dir_relu = ops.conv2d(
+ inter_relu, 256, 2, ksize, 1, 'SAME', scope='conv_dir')
+ cen_maps, cen_relu = ops.conv2d(
+ inter_relu, 256, 2, ksize, 1, 'SAME', scope='conv_cen')
+ pol_maps, pol_relu = ops.conv2d(
+ inter_relu, 256, 8, ksize, 1, 'SAME', scope='conv_pol')
+ concat_relu = tf.concat([dir_relu, cen_relu, pol_relu], axis=-1)
+ _, lnk_embedding = ops.conv_relu(
+ concat_relu, 12, 256, 1, 1, scope='lnk_embedding')
+ lnk_maps, lnk_relu = ops.conv2d(
+ inter_relu + lnk_embedding,
+ 256,
+ lnk_depth,
+ ksize,
+ 1,
+ 'SAME',
+ scope='conv_lnk')
+
+ char_seg_maps, char_seg_relu = ops.conv2d(
+ inter_relu,
+ 256,
+ seg_depth,
+ ksize,
+ 1,
+ 'SAME',
+ scope='conv_char_cls')
+ char_reg_maps, char_reg_relu = ops.conv2d(
+ inter_relu,
+ 256,
+ reg_depth,
+ ksize,
+ 1,
+ 'SAME',
+ scope='conv_char_reg')
+ concat_char_relu = tf.concat([char_seg_relu, char_reg_relu],
+ axis=-1)
+ _, char_embedding = ops.conv_relu(
+ concat_char_relu, 8, 256, 1, 1, scope='conv_char_embedding')
+ seg_maps, seg_relu = ops.conv2d(
+ inter_relu + char_embedding,
+ 256,
+ seg_depth,
+ ksize,
+ 1,
+ 'SAME',
+ scope='conv_cls')
+ reg_maps, reg_relu = ops.conv2d(
+ inter_relu + char_embedding,
+ 256,
+ reg_depth,
+ ksize,
+ 1,
+ 'SAME',
+ scope='conv_reg')
+
+ return seg_relu, lnk_relu, reg_relu
+
+ def _build_cnn(self, images, weight_decay, is_training):
+ with slim.arg_scope(
+ resnet18_v1.resnet_arg_scope(weight_decay=weight_decay)):
+ logits, end_points = resnet18_v1.resnet_v1_18(
+ images, is_training=is_training, scope='resnet_v1_18')
+
+ outputs = {
+ 'conv3_3': end_points['pool1'],
+ 'conv4_3': end_points['pool2'],
+ 'fc7': end_points['pool3'],
+ 'conv8_2': end_points['pool4'],
+ 'conv9_2': end_points['pool5'],
+ 'conv10_2': end_points['pool6'],
+ }
+ return outputs
+
+ def build_model(self, images, is_training=True, scope=None):
+
+ weight_decay = 5e-4 # FLAGS.weight_decay
+ cnn_outputs = self._build_cnn(images, weight_decay, is_training)
+ det_0 = self._detection_classifier(
+ cnn_outputs['conv3_3'],
+ 3,
+ weight_decay,
+ cross_links=False,
+ scope='dete_0')
+ det_1 = self._detection_classifier(
+ cnn_outputs['conv4_3'],
+ 3,
+ weight_decay,
+ cross_links=True,
+ scope='dete_1')
+ det_2 = self._detection_classifier(
+ cnn_outputs['fc7'],
+ 3,
+ weight_decay,
+ cross_links=True,
+ scope='dete_2')
+ det_3 = self._detection_classifier(
+ cnn_outputs['conv8_2'],
+ 3,
+ weight_decay,
+ cross_links=True,
+ scope='dete_3')
+ det_4 = self._detection_classifier(
+ cnn_outputs['conv9_2'],
+ 3,
+ weight_decay,
+ cross_links=True,
+ scope='dete_4')
+ det_5 = self._detection_classifier(
+ cnn_outputs['conv10_2'],
+ 3,
+ weight_decay,
+ cross_links=True,
+ scope='dete_5')
+ outputs = [det_0, det_1, det_2, det_3, det_4, det_5]
+ return outputs
diff --git a/modelscope/pipelines/cv/ocr_utils/ops.py b/modelscope/pipelines/cv/ocr_utils/ops.py
new file mode 100644
index 00000000..2bc8a8bf
--- /dev/null
+++ b/modelscope/pipelines/cv/ocr_utils/ops.py
@@ -0,0 +1,1098 @@
+import math
+import os
+import shutil
+import uuid
+
+import cv2
+import numpy as np
+import tensorflow as tf
+
+from . import utils
+
+if tf.__version__ >= '2.0':
+ tf = tf.compat.v1
+
+FLAGS = tf.app.flags.FLAGS
+tf.app.flags.DEFINE_string('weight_init_method', 'xavier',
+ 'Weight initialization method')
+
+# constants
+OFFSET_DIM = 6
+RBOX_DIM = 5
+
+N_LOCAL_LINKS = 8
+N_CROSS_LINKS = 4
+N_SEG_CLASSES = 2
+N_LNK_CLASSES = 4
+
+MATCH_STATUS_POS = 1
+MATCH_STATUS_NEG = -1
+MATCH_STATUS_IGNORE = 0
+MUT_LABEL = 3
+POS_LABEL = 1
+NEG_LABEL = 0
+
+N_DET_LAYERS = 6
+
+
+def load_oplib(lib_name):
+ """
+ Load TensorFlow operator library.
+ """
+ # use absolute path so that ops.py can be called from other directory
+ lib_path = os.path.join(
+ os.path.dirname(os.path.realpath(__file__)),
+ 'lib{0}.so'.format(lib_name))
+ # duplicate library with a random new name so that
+ # a running program will not be interrupted when the original library is updated
+ lib_copy_path = '/tmp/lib{0}_{1}.so'.format(
+ str(uuid.uuid4())[:8], LIB_NAME)
+ shutil.copyfile(lib_path, lib_copy_path)
+ oplib = tf.load_op_library(lib_copy_path)
+ return oplib
+
+
+def _nn_variable(name, shape, init_method, collection=None, **kwargs):
+ """
+ Create or reuse a variable
+ ARGS
+ name: variable name
+ shape: variable shape
+ init_method: 'zero', 'kaiming', 'xavier', or (mean, std)
+ collection: if not none, add variable to this collection
+ kwargs: extra paramters passed to tf.get_variable
+ RETURN
+ var: a new or existing variable
+ """
+ if init_method == 'zero':
+ initializer = tf.constant_initializer(0.0)
+ elif init_method == 'kaiming':
+ if len(shape) == 4: # convolutional filters
+ kh, kw, n_in = shape[:3]
+ init_std = math.sqrt(2.0 / (kh * kw * n_in))
+ elif len(shape) == 2: # linear weights
+ n_in, n_out = shape
+ init_std = math.sqrt(1.0 / n_out)
+ else:
+ raise 'Unsupported shape'
+ initializer = tf.truncated_normal_initializer(0.0, init_std)
+ elif init_method == 'xavier':
+ if len(shape) == 4:
+ initializer = tf.keras.initializers.glorot_normal()
+ else:
+ initializer = tf.keras.initializers.glorot_normal()
+ elif isinstance(init_method, tuple):
+ assert (len(init_method) == 2)
+ initializer = tf.truncated_normal_initializer(init_method[0],
+ init_method[1])
+ else:
+ raise 'Unsupported weight initialization method: ' + init_method
+
+ var = tf.get_variable(name, shape=shape, initializer=initializer, **kwargs)
+ if collection is not None:
+ tf.add_to_collection(collection, var)
+
+ return var
+
+
+def conv2d(x,
+ n_in,
+ n_out,
+ ksize,
+ stride=1,
+ padding='SAME',
+ weight_init=None,
+ bias=True,
+ relu=False,
+ scope=None,
+ **kwargs):
+ weight_init = weight_init or FLAGS.weight_init_method
+ trainable = kwargs.get('trainable', True)
+ # input_dim = n_in
+ if (padding == 'SAME'):
+ in_height = x.get_shape()[1]
+ in_width = x.get_shape()[2]
+ if (in_height % stride == 0):
+ pad_along_height = max(ksize - stride, 0)
+ else:
+ pad_along_height = max(ksize - (in_height % stride), 0)
+ if (in_width % stride == 0):
+ pad_along_width = max(ksize - stride, 0)
+ else:
+ pad_along_width = max(ksize - (in_width % stride), 0)
+ pad_bottom = pad_along_height // 2
+ pad_top = pad_along_height - pad_bottom
+ pad_right = pad_along_width // 2
+ pad_left = pad_along_width - pad_right
+ paddings = tf.constant([[0, 0], [pad_top, pad_bottom],
+ [pad_left, pad_right], [0, 0]])
+ input_padded = tf.pad(x, paddings, 'CONSTANT')
+ else:
+ input_padded = x
+
+ with tf.variable_scope(scope or 'conv2d'):
+ # convolution
+ kernel = _nn_variable(
+ 'weight', [ksize, ksize, n_in, n_out],
+ weight_init,
+ collection='weights' if trainable else None,
+ **kwargs)
+ yc = tf.nn.conv2d(
+ input_padded, kernel, [1, stride, stride, 1], padding='VALID')
+ # add bias
+ if bias is True:
+ bias = _nn_variable(
+ 'bias', [n_out],
+ 'zero',
+ collection='biases' if trainable else None,
+ **kwargs)
+ yb = tf.nn.bias_add(yc, bias)
+ # apply ReLU
+ y = yb
+ if relu is True:
+ y = tf.nn.relu(yb)
+ return yb, y
+
+
+def group_conv2d_relu(x,
+ n_in,
+ n_out,
+ ksize,
+ stride=1,
+ group=4,
+ padding='SAME',
+ weight_init=None,
+ bias=True,
+ relu=False,
+ name='group_conv2d',
+ **kwargs):
+ group_axis = len(x.get_shape()) - 1
+ splits = tf.split(x, [int(n_in / group)] * group, group_axis)
+
+ conv_list = []
+ for i in range(group):
+ conv_split, relu_split = conv2d(
+ splits[i],
+ n_in / group,
+ n_out / group,
+ ksize=ksize,
+ stride=stride,
+ padding=padding,
+ weight_init=weight_init,
+ bias=bias,
+ relu=relu,
+ scope='%s_%d' % (name, i))
+ conv_list.append(conv_split)
+ conv = tf.concat(values=conv_list, axis=group_axis, name=name + '_concat')
+ relu = tf.nn.relu(conv)
+ return conv, relu
+
+
+def group_conv2d_bn_relu(x,
+ n_in,
+ n_out,
+ ksize,
+ stride=1,
+ group=4,
+ padding='SAME',
+ weight_init=None,
+ bias=True,
+ relu=False,
+ name='group_conv2d',
+ **kwargs):
+ group_axis = len(x.get_shape()) - 1
+ splits = tf.split(x, [int(n_in / group)] * group, group_axis)
+
+ conv_list = []
+ for i in range(group):
+ conv_split, relu_split = conv2d(
+ splits[i],
+ n_in / group,
+ n_out / group,
+ ksize=ksize,
+ stride=stride,
+ padding=padding,
+ weight_init=weight_init,
+ bias=bias,
+ relu=relu,
+ scope='%s_%d' % (name, i))
+ conv_list.append(conv_split)
+ conv = tf.concat(values=conv_list, axis=group_axis, name=name + '_concat')
+ with tf.variable_scope(name + '_bn'):
+ bn = tf.layers.batch_normalization(
+ conv, momentum=0.9, epsilon=1e-5, scale=True, training=True)
+ relu = tf.nn.relu(bn)
+ return conv, relu
+
+
+def next_conv(x,
+ n_in,
+ n_out,
+ ksize,
+ stride=1,
+ group=4,
+ padding='SAME',
+ weight_init=None,
+ bias=True,
+ relu=False,
+ name='next_conv2d',
+ **kwargs):
+ conv_a, relu_a = conv_relu(
+ x,
+ n_in,
+ n_in / 2,
+ ksize=1,
+ stride=1,
+ padding=padding,
+ weight_init=weight_init,
+ bias=bias,
+ relu=relu,
+ scope=name + '_a',
+ **kwargs)
+
+ conv_b, relu_b = group_conv2d_relu(
+ relu_a,
+ n_in / 2,
+ n_out / 2,
+ ksize=ksize,
+ stride=stride,
+ group=group,
+ padding=padding,
+ weight_init=weight_init,
+ bias=bias,
+ relu=relu,
+ name=name + '_b',
+ **kwargs)
+
+ conv_c, relu_c = conv_relu(
+ relu_b,
+ n_out / 2,
+ n_out,
+ ksize=1,
+ stride=1,
+ padding=padding,
+ weight_init=weight_init,
+ bias=bias,
+ relu=relu,
+ scope=name + '_c',
+ **kwargs)
+
+ return conv_c, relu_c
+
+
+def next_conv_bn(x,
+ n_in,
+ n_out,
+ ksize,
+ stride=1,
+ group=4,
+ padding='SAME',
+ weight_init=None,
+ bias=True,
+ relu=False,
+ name='next_conv2d',
+ **kwargs):
+ conv_a, relu_a = conv_bn_relu(
+ x,
+ n_in,
+ n_in / 2,
+ ksize=1,
+ stride=1,
+ padding=padding,
+ weight_init=weight_init,
+ bias=bias,
+ relu=relu,
+ scope=name + '_a',
+ **kwargs)
+
+ conv_b, relu_b = group_conv2d_bn_relu(
+ relu_a,
+ n_in / 2,
+ n_out / 2,
+ ksize=ksize,
+ stride=stride,
+ group=group,
+ padding=padding,
+ weight_init=weight_init,
+ bias=bias,
+ relu=relu,
+ name=name + '_b',
+ **kwargs)
+
+ conv_c, relu_c = conv_bn_relu(
+ relu_b,
+ n_out / 2,
+ n_out,
+ ksize=1,
+ stride=1,
+ padding=padding,
+ weight_init=weight_init,
+ bias=bias,
+ relu=relu,
+ scope=name + '_c',
+ **kwargs)
+
+ return conv_c, relu_c
+
+
+def conv2d_ori(x,
+ n_in,
+ n_out,
+ ksize,
+ stride=1,
+ padding='SAME',
+ weight_init=None,
+ bias=True,
+ relu=False,
+ scope=None,
+ **kwargs):
+ weight_init = weight_init or FLAGS.weight_init_method
+ trainable = kwargs.get('trainable', True)
+
+ with tf.variable_scope(scope or 'conv2d'):
+ # convolution
+ kernel = _nn_variable(
+ 'weight', [ksize, ksize, n_in, n_out],
+ weight_init,
+ collection='weights' if trainable else None,
+ **kwargs)
+ y = tf.nn.conv2d(x, kernel, [1, stride, stride, 1], padding=padding)
+ # add bias
+ if bias is True:
+ bias = _nn_variable(
+ 'bias', [n_out],
+ 'zero',
+ collection='biases' if trainable else None,
+ **kwargs)
+ y = tf.nn.bias_add(y, bias)
+ # apply ReLU
+ if relu is True:
+ y = tf.nn.relu(y)
+ return y
+
+
+def conv_relu(*args, **kwargs):
+ kwargs['relu'] = True
+ if 'scope' not in kwargs:
+ kwargs['scope'] = 'conv_relu'
+ return conv2d(*args, **kwargs)
+
+
+def conv_bn_relu(*args, **kwargs):
+ kwargs['relu'] = True
+ if 'scope' not in kwargs:
+ kwargs['scope'] = 'conv_relu'
+ conv, relu = conv2d(*args, **kwargs)
+ with tf.variable_scope(kwargs['scope'] + '_bn'):
+ bn = tf.layers.batch_normalization(
+ conv, momentum=0.9, epsilon=1e-5, scale=True, training=True)
+ bn_relu = tf.nn.relu(bn)
+ return bn, bn_relu
+
+
+def conv_relu_ori(*args, **kwargs):
+ kwargs['relu'] = True
+ if 'scope' not in kwargs:
+ kwargs['scope'] = 'conv_relu'
+ return conv2d_ori(*args, **kwargs)
+
+
+def atrous_conv2d(x,
+ n_in,
+ n_out,
+ ksize,
+ dilation,
+ padding='SAME',
+ weight_init=None,
+ bias=True,
+ relu=False,
+ scope=None,
+ **kwargs):
+ weight_init = weight_init or FLAGS.weight_init_method
+ trainable = kwargs.get('trainable', True)
+ with tf.variable_scope(scope or 'atrous_conv2d'):
+ # atrous convolution
+ kernel = _nn_variable(
+ 'weight', [ksize, ksize, n_in, n_out],
+ weight_init,
+ collection='weights' if trainable else None,
+ **kwargs)
+ y = tf.nn.atrous_conv2d(x, kernel, dilation, padding=padding)
+ # add bias
+ if bias is True:
+ bias = _nn_variable(
+ 'bias', [n_out],
+ 'zero',
+ collection='biases' if trainable else None,
+ **kwargs)
+ y = tf.nn.bias_add(y, bias)
+ # apply ReLU
+ if relu is True:
+ y = tf.nn.relu(y)
+ return y
+
+
+def avg_pool(x, ksize, stride, padding='SAME', scope=None):
+ with tf.variable_scope(scope or 'avg_pool'):
+ y = tf.nn.avg_pool(x, [1, ksize, ksize, 1], [1, stride, stride, 1],
+ padding)
+ return y
+
+
+def max_pool(x, ksize, stride, padding='SAME', scope=None):
+ with tf.variable_scope(scope or 'max_pool'):
+ y = tf.nn.max_pool(x, [1, ksize, ksize, 1], [1, stride, stride, 1],
+ padding)
+ return y
+
+
+def score_loss(gt_labels, match_scores, n_classes):
+ """
+ Classification loss
+ ARGS
+ gt_labels: int32 [n]
+ match_scores: [n, n_classes]
+ RETURN
+ loss
+ """
+ embeddings = tf.one_hot(tf.cast(gt_labels, tf.int64), n_classes, 1.0, 0.0)
+ losses = tf.nn.softmax_cross_entropy_with_logits(match_scores, embeddings)
+ return tf.reduce_sum(losses)
+
+
+def smooth_l1_loss(offsets, gt_offsets, scope=None):
+ """
+ Smooth L1 loss between offsets and encoded_gt
+ ARGS
+ offsets: [m?, 5], predicted offsets for one example
+ gt_offsets: [m?, 5], correponding groundtruth offsets
+ RETURN
+ loss: scalar
+ """
+ with tf.variable_scope(scope or 'smooth_l1_loss'):
+ gt_offsets = tf.stop_gradient(gt_offsets)
+ diff = tf.abs(offsets - gt_offsets)
+ lesser_mask = tf.cast(tf.less(diff, 1.0), tf.float32)
+ larger_mask = 1.0 - lesser_mask
+ losses1 = (0.5 * tf.square(diff)) * lesser_mask
+ losses2 = (diff - 0.5) * larger_mask
+ return tf.reduce_sum(losses1 + losses2, 1)
+
+
+def polygon_to_rboxe(polygon):
+ x1 = polygon[0]
+ y1 = polygon[1]
+ x2 = polygon[2]
+ y2 = polygon[3]
+ x3 = polygon[4]
+ y3 = polygon[5]
+ x4 = polygon[6]
+ y4 = polygon[7]
+ c_x = (x1 + x2 + x3 + x4) / 4
+ c_y = (y1 + y2 + y3 + y4) / 4
+ w1 = point_dist(x1, y1, x2, y2)
+ w2 = point_dist(x3, y3, x4, y4)
+ h1 = point_line_dist(c_x, c_y, x1, y1, x2, y2)
+ h2 = point_line_dist(c_x, c_y, x3, y3, x4, y4)
+ h = h1 + h2
+ w = (w1 + w2) / 2
+ theta1 = np.arctan2(y2 - y1, x2 - x1)
+ theta2 = np.arctan2(y3 - y4, x3 - x4)
+ theta = (theta1 + theta2) / 2
+ return np.array([c_x, c_y, w, h, theta])
+
+
+def point_dist(x1, y1, x2, y2):
+ return np.sqrt((x2 - x1) * (x2 - x1) + (y2 - y1) * (y2 - y1))
+
+
+def point_line_dist(px, py, x1, y1, x2, y2):
+ eps = 1e-6
+ dx = x2 - x1
+ dy = y2 - y1
+ div = np.sqrt(dx * dx + dy * dy) + eps
+ dist = np.abs(px * dy - py * dx + x2 * y1 - y2 * x1) / div
+ return dist
+
+
+def get_combined_polygon(rboxes, resize_size):
+ image_w = resize_size[1]
+ image_h = resize_size[0]
+ img = np.zeros((image_h, image_w, 3), np.uint8)
+ for i in range(rboxes.shape[0]):
+ segment = np.reshape(
+ np.array(utils.rboxes_to_polygons(rboxes)[i, :], np.int32),
+ (-1, 1, 2))
+ cv2.drawContours(img, [segment], 0, (255, 255, 255), -1)
+ img2gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
+ ret, thresh = cv2.threshold(img2gray, 127, 255, cv2.THRESH_BINARY)
+ im2, contours, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE,
+ cv2.CHAIN_APPROX_SIMPLE)
+ if len(contours) > 0:
+ cnt = contours[0]
+ max_area = cv2.contourArea(cnt)
+ # get max_area
+ for cont in contours:
+ if cv2.contourArea(cont) > max_area:
+ cnt = cont
+ max_area = cv2.contourArea(cont)
+ rect = cv2.minAreaRect(cnt)
+ combined_polygon = np.array(cv2.boxPoints(rect)).reshape(-1)
+ else:
+ combined_polygon = np.array([0, 0, 0, 0, 0, 0, 0, 0])
+
+ return combined_polygon
+
+
+def combine_segs(segs):
+ segs = np.asarray(segs)
+ assert segs.ndim == 2, 'invalid segs ndim'
+ assert segs.shape[-1] == 6, 'invalid segs shape'
+
+ if len(segs) == 1:
+ cx = segs[0, 0]
+ cy = segs[0, 1]
+ w = segs[0, 2]
+ h = segs[0, 3]
+ theta_sin = segs[0, 4]
+ theta_cos = segs[0, 5]
+ theta = np.arctan2(theta_sin, theta_cos)
+ return np.array([cx, cy, w, h, theta])
+
+ # find the best straight line fitting all center points: y = kx + b
+ cxs = segs[:, 0]
+ cys = segs[:, 1]
+
+ theta_coss = segs[:, 4]
+ theta_sins = segs[:, 5]
+
+ bar_theta = np.arctan2(theta_sins.sum(), theta_coss.sum())
+ k = np.tan(bar_theta)
+ b = np.mean(cys - k * cxs)
+
+ proj_xs = (k * cys + cxs - k * b) / (k**2 + 1)
+ proj_ys = (k * k * cys + k * cxs + b) / (k**2 + 1)
+ proj_points = np.stack((proj_xs, proj_ys), -1)
+
+ # find the max distance
+ max_dist = -1
+ idx1 = -1
+ idx2 = -1
+
+ for i in range(len(proj_points)):
+ point1 = proj_points[i, :]
+ for j in range(i + 1, len(proj_points)):
+ point2 = proj_points[j, :]
+ dist = np.sqrt(np.sum((point1 - point2)**2))
+ if dist > max_dist:
+ idx1 = i
+ idx2 = j
+ max_dist = dist
+ assert idx1 >= 0 and idx2 >= 0
+ # the bbox: bcx, bcy, bw, bh, average_theta
+ seg1 = segs[idx1, :]
+ seg2 = segs[idx2, :]
+ bcx, bcy = (seg1[:2] + seg2[:2]) / 2.0
+ bh = np.mean(segs[:, 3])
+ bw = max_dist + (seg1[2] + seg2[2]) / 2.0
+ return bcx, bcy, bw, bh, bar_theta
+
+
+def combine_segments_batch(segments_batch, group_indices_batch,
+ segment_counts_batch):
+ batch_size = 1
+ combined_rboxes_batch = []
+ combined_counts_batch = []
+ for image_id in range(batch_size):
+ group_count = segment_counts_batch[image_id]
+ segments = segments_batch[image_id, :, :]
+ group_indices = group_indices_batch[image_id, :]
+ combined_rboxes = []
+ for i in range(group_count):
+ segments_group = segments[np.where(group_indices == i)[0], :]
+ if segments_group.shape[0] > 0:
+ combined_rbox = combine_segs(segments_group)
+ combined_rboxes.append(combined_rbox)
+ combined_rboxes_batch.append(combined_rboxes)
+ combined_counts_batch.append(len(combined_rboxes))
+
+ max_count = np.max(combined_counts_batch)
+ for image_id in range(batch_size):
+ if not combined_counts_batch[image_id] == max_count:
+ combined_rboxes_pad = (max_count - combined_counts_batch[image_id]
+ ) * [RBOX_DIM * [0.0]]
+ combined_rboxes_batch[image_id] = np.vstack(
+ (combined_rboxes_batch[image_id],
+ np.array(combined_rboxes_pad)))
+
+ return np.asarray(combined_rboxes_batch,
+ np.float32), np.asarray(combined_counts_batch, np.int32)
+
+
+# combine_segments rewrite in python version
+def combine_segments_python(segments, group_indices, segment_counts):
+ combined_rboxes, combined_counts = tf.py_func(
+ combine_segments_batch, [segments, group_indices, segment_counts],
+ [tf.float32, tf.int32])
+ return combined_rboxes, combined_counts
+
+
+# decode_segments_links rewrite in python version
+def get_coord(offsets, map_size, offsets_defaults):
+ if offsets < offsets_defaults[1][0]:
+ l_idx = 0
+ x = offsets % map_size[0][1]
+ y = offsets // map_size[0][1]
+ elif offsets < offsets_defaults[2][0]:
+ l_idx = 1
+ x = (offsets - offsets_defaults[1][0]) % map_size[1][1]
+ y = (offsets - offsets_defaults[1][0]) // map_size[1][1]
+ elif offsets < offsets_defaults[3][0]:
+ l_idx = 2
+ x = (offsets - offsets_defaults[2][0]) % map_size[2][1]
+ y = (offsets - offsets_defaults[2][0]) // map_size[2][1]
+ elif offsets < offsets_defaults[4][0]:
+ l_idx = 3
+ x = (offsets - offsets_defaults[3][0]) % map_size[3][1]
+ y = (offsets - offsets_defaults[3][0]) // map_size[3][1]
+ elif offsets < offsets_defaults[5][0]:
+ l_idx = 4
+ x = (offsets - offsets_defaults[4][0]) % map_size[4][1]
+ y = (offsets - offsets_defaults[4][0]) // map_size[4][1]
+ else:
+ l_idx = 5
+ x = (offsets - offsets_defaults[5][0]) % map_size[5][1]
+ y = (offsets - offsets_defaults[5][0]) // map_size[5][1]
+
+ return l_idx, x, y
+
+
+def get_coord_link(offsets, map_size, offsets_defaults):
+ if offsets < offsets_defaults[1][1]:
+ offsets_node = offsets // N_LOCAL_LINKS
+ link_idx = offsets % N_LOCAL_LINKS
+ else:
+ offsets_node = (offsets - offsets_defaults[1][1]) // (
+ N_LOCAL_LINKS + N_CROSS_LINKS) + offsets_defaults[1][0]
+ link_idx = (offsets - offsets_defaults[1][1]) % (
+ N_LOCAL_LINKS + N_CROSS_LINKS)
+ l_idx, x, y = get_coord(offsets_node, map_size, offsets_defaults)
+ return l_idx, x, y, link_idx
+
+
+def is_valid_coord(l_idx, x, y, map_size):
+ w = map_size[l_idx][1]
+ h = map_size[l_idx][0]
+ return x >= 0 and x < w and y >= 0 and y < h
+
+
+def get_neighbours(l_idx, x, y, map_size, offsets_defaults):
+ if l_idx == 0:
+ coord = [(0, x - 1, y - 1), (0, x, y - 1), (0, x + 1, y - 1),
+ (0, x - 1, y), (0, x + 1, y), (0, x - 1, y + 1),
+ (0, x, y + 1), (0, x + 1, y + 1)]
+ else:
+ coord = [(l_idx, x - 1, y - 1),
+ (l_idx, x, y - 1), (l_idx, x + 1, y - 1), (l_idx, x - 1, y),
+ (l_idx, x + 1, y), (l_idx, x - 1, y + 1), (l_idx, x, y + 1),
+ (l_idx, x + 1, y + 1), (l_idx - 1, 2 * x, 2 * y),
+ (l_idx - 1, 2 * x + 1, 2 * y), (l_idx - 1, 2 * x, 2 * y + 1),
+ (l_idx - 1, 2 * x + 1, 2 * y + 1)]
+ neighbours_offsets = []
+ link_idx = 0
+ for nl_idx, nx, ny in coord:
+ if is_valid_coord(nl_idx, nx, ny, map_size):
+ neighbours_offset_node = offsets_defaults[nl_idx][
+ 0] + map_size[nl_idx][1] * ny + nx
+ if l_idx == 0:
+ neighbours_offset_link = offsets_defaults[l_idx][1] + (
+ map_size[l_idx][1] * y + x) * N_LOCAL_LINKS + link_idx
+ else:
+ off_tmp = (map_size[l_idx][1] * y + x) * (
+ N_LOCAL_LINKS + N_CROSS_LINKS)
+ neighbours_offset_link = offsets_defaults[l_idx][
+ 1] + off_tmp + link_idx
+ neighbours_offsets.append(
+ [neighbours_offset_node, neighbours_offset_link, link_idx])
+ link_idx += 1
+ # [node_offsets, link_offsets, link_idx(0-7/11)]
+ return neighbours_offsets
+
+
+def decode_segments_links_python(image_size, all_nodes, all_links, all_reg,
+ anchor_sizes):
+ batch_size = 1 # FLAGS.test_batch_size
+ # offsets = 12285 #768
+ all_nodes_flat = tf.concat(
+ [tf.reshape(o, [batch_size, -1, N_SEG_CLASSES]) for o in all_nodes],
+ axis=1)
+ all_links_flat = tf.concat(
+ [tf.reshape(o, [batch_size, -1, N_LNK_CLASSES]) for o in all_links],
+ axis=1)
+ all_reg_flat = tf.concat(
+ [tf.reshape(o, [batch_size, -1, OFFSET_DIM]) for o in all_reg], axis=1)
+ segments, group_indices, segment_counts, group_indices_all = tf.py_func(
+ decode_batch, [
+ all_nodes_flat, all_links_flat, all_reg_flat, image_size,
+ tf.constant(anchor_sizes)
+ ], [tf.float32, tf.int32, tf.int32, tf.int32])
+ return segments, group_indices, segment_counts, group_indices_all
+
+
+def decode_segments_links_train(image_size, all_nodes, all_links, all_reg,
+ anchor_sizes):
+ batch_size = FLAGS.train_batch_size
+ # offsets = 12285 #768
+ all_nodes_flat = tf.concat(
+ [tf.reshape(o, [batch_size, -1, N_SEG_CLASSES]) for o in all_nodes],
+ axis=1)
+ all_links_flat = tf.concat(
+ [tf.reshape(o, [batch_size, -1, N_LNK_CLASSES]) for o in all_links],
+ axis=1)
+ all_reg_flat = tf.concat(
+ [tf.reshape(o, [batch_size, -1, OFFSET_DIM]) for o in all_reg], axis=1)
+ segments, group_indices, segment_counts, group_indices_all = tf.py_func(
+ decode_batch, [
+ all_nodes_flat, all_links_flat, all_reg_flat, image_size,
+ tf.constant(anchor_sizes)
+ ], [tf.float32, tf.int32, tf.int32, tf.int32])
+ return segments, group_indices, segment_counts, group_indices_all
+
+
+def decode_batch(all_nodes, all_links, all_reg, image_size, anchor_sizes):
+ batch_size = all_nodes.shape[0]
+ batch_segments = []
+ batch_group_indices = []
+ batch_segments_counts = []
+ batch_group_indices_all = []
+ for image_id in range(batch_size):
+ image_node_scores = all_nodes[image_id, :, :]
+ image_link_scores = all_links[image_id, :, :]
+ image_reg = all_reg[image_id, :, :]
+ image_segments, image_group_indices, image_segments_counts, image_group_indices_all = decode_image(
+ image_node_scores, image_link_scores, image_reg, image_size,
+ anchor_sizes)
+ batch_segments.append(image_segments)
+ batch_group_indices.append(image_group_indices)
+ batch_segments_counts.append(image_segments_counts)
+ batch_group_indices_all.append(image_group_indices_all)
+ max_count = np.max(batch_segments_counts)
+ for image_id in range(batch_size):
+ if not batch_segments_counts[image_id] == max_count:
+ batch_segments_pad = (max_count - batch_segments_counts[image_id]
+ ) * [OFFSET_DIM * [0.0]]
+ batch_segments[image_id] = np.vstack(
+ (batch_segments[image_id], np.array(batch_segments_pad)))
+ batch_group_indices[image_id] = np.hstack(
+ (batch_group_indices[image_id],
+ np.array(
+ (max_count - batch_segments_counts[image_id]) * [-1])))
+ return np.asarray(batch_segments, np.float32), np.asarray(
+ batch_group_indices,
+ np.int32), np.asarray(batch_segments_counts,
+ np.int32), np.asarray(batch_group_indices_all,
+ np.int32)
+
+
+def decode_image(image_node_scores, image_link_scores, image_reg, image_size,
+ anchor_sizes):
+ map_size = []
+ offsets_defaults = []
+ offsets_default_node = 0
+ offsets_default_link = 0
+ for i in range(N_DET_LAYERS):
+ offsets_defaults.append([offsets_default_node, offsets_default_link])
+ map_size.append(image_size // (2**(2 + i)))
+ offsets_default_node += map_size[i][0] * map_size[i][1]
+ if i == 0:
+ offsets_default_link += map_size[i][0] * map_size[i][
+ 1] * N_LOCAL_LINKS
+ else:
+ offsets_default_link += map_size[i][0] * map_size[i][1] * (
+ N_LOCAL_LINKS + N_CROSS_LINKS)
+
+ image_group_indices_all = decode_image_by_join(image_node_scores,
+ image_link_scores,
+ FLAGS.node_threshold,
+ FLAGS.link_threshold,
+ map_size, offsets_defaults)
+ image_group_indices_all -= 1
+ image_group_indices = image_group_indices_all[np.where(
+ image_group_indices_all >= 0)[0]]
+ image_segments_counts = len(image_group_indices)
+ # convert image_reg to segments with scores(OFFSET_DIM+1)
+ image_segments = np.zeros((image_segments_counts, OFFSET_DIM),
+ dtype=np.float32)
+ for i, offsets in enumerate(np.where(image_group_indices_all >= 0)[0]):
+ encoded_cx = image_reg[offsets, 0]
+ encoded_cy = image_reg[offsets, 1]
+ encoded_width = image_reg[offsets, 2]
+ encoded_height = image_reg[offsets, 3]
+ encoded_theta_cos = image_reg[offsets, 4]
+ encoded_theta_sin = image_reg[offsets, 5]
+
+ l_idx, x, y = get_coord(offsets, map_size, offsets_defaults)
+ rs = anchor_sizes[l_idx]
+ eps = 1e-6
+ image_segments[i, 0] = encoded_cx * rs + (2**(2 + l_idx)) * (x + 0.5)
+ image_segments[i, 1] = encoded_cy * rs + (2**(2 + l_idx)) * (y + 0.5)
+ image_segments[i, 2] = np.exp(encoded_width) * rs - eps
+ image_segments[i, 3] = np.exp(encoded_height) * rs - eps
+ image_segments[i, 4] = encoded_theta_cos
+ image_segments[i, 5] = encoded_theta_sin
+
+ return image_segments, image_group_indices, image_segments_counts, image_group_indices_all
+
+
+def decode_image_by_join(node_scores, link_scores, node_threshold,
+ link_threshold, map_size, offsets_defaults):
+ node_mask = node_scores[:, POS_LABEL] >= node_threshold
+ link_mask = link_scores[:, POS_LABEL] >= link_threshold
+ group_mask = np.zeros_like(node_mask, np.int32) - 1
+ offsets_pos = np.where(node_mask == 1)[0]
+
+ def find_parent(point):
+ return group_mask[point]
+
+ def set_parent(point, parent):
+ group_mask[point] = parent
+
+ def is_root(point):
+ return find_parent(point) == -1
+
+ def find_root(point):
+ root = point
+ update_parent = False
+ while not is_root(root):
+ root = find_parent(root)
+ update_parent = True
+
+ # for acceleration of find_root
+ if update_parent:
+ set_parent(point, root)
+
+ return root
+
+ def join(p1, p2):
+ root1 = find_root(p1)
+ root2 = find_root(p2)
+
+ if root1 != root2:
+ set_parent(root1, root2)
+
+ def get_all():
+ root_map = {}
+
+ def get_index(root):
+ if root not in root_map:
+ root_map[root] = len(root_map) + 1
+ return root_map[root]
+
+ mask = np.zeros_like(node_mask, dtype=np.int32)
+ for i, point in enumerate(offsets_pos):
+ point_root = find_root(point)
+ bbox_idx = get_index(point_root)
+ mask[point] = bbox_idx
+ return mask
+
+ # join by link
+ pos_link = 0
+ for i, offsets in enumerate(offsets_pos):
+ l_idx, x, y = get_coord(offsets, map_size, offsets_defaults)
+ neighbours = get_neighbours(l_idx, x, y, map_size, offsets_defaults)
+ for n_idx, noffsets in enumerate(neighbours):
+ link_value = link_mask[noffsets[1]]
+ node_cls = node_mask[noffsets[0]]
+ if link_value and node_cls:
+ pos_link += 1
+ join(offsets, noffsets[0])
+ # print(pos_link)
+ mask = get_all()
+ return mask
+
+
+def get_link_mask(node_mask, offsets_defaults, link_max):
+ link_mask = np.zeros_like(link_max)
+ link_mask[0:offsets_defaults[1][1]] = np.tile(
+ node_mask[0:offsets_defaults[1][0]],
+ (N_LOCAL_LINKS, 1)).transpose().reshape(offsets_defaults[1][1])
+ link_mask[offsets_defaults[1][1]:offsets_defaults[2][1]] = np.tile(
+ node_mask[offsets_defaults[1][0]:offsets_defaults[2][0]],
+ (N_LOCAL_LINKS + N_CROSS_LINKS, 1)).transpose().reshape(
+ (offsets_defaults[2][1] - offsets_defaults[1][1]))
+ link_mask[offsets_defaults[2][1]:offsets_defaults[3][1]] = np.tile(
+ node_mask[offsets_defaults[2][0]:offsets_defaults[3][0]],
+ (N_LOCAL_LINKS + N_CROSS_LINKS, 1)).transpose().reshape(
+ (offsets_defaults[3][1] - offsets_defaults[2][1]))
+ link_mask[offsets_defaults[3][1]:offsets_defaults[4][1]] = np.tile(
+ node_mask[offsets_defaults[3][0]:offsets_defaults[4][0]],
+ (N_LOCAL_LINKS + N_CROSS_LINKS, 1)).transpose().reshape(
+ (offsets_defaults[4][1] - offsets_defaults[3][1]))
+ link_mask[offsets_defaults[4][1]:offsets_defaults[5][1]] = np.tile(
+ node_mask[offsets_defaults[4][0]:offsets_defaults[5][0]],
+ (N_LOCAL_LINKS + N_CROSS_LINKS, 1)).transpose().reshape(
+ (offsets_defaults[5][1] - offsets_defaults[4][1]))
+ link_mask[offsets_defaults[5][1]:] = np.tile(
+ node_mask[offsets_defaults[5][0]:],
+ (N_LOCAL_LINKS + N_CROSS_LINKS, 1)).transpose().reshape(
+ (len(link_mask) - offsets_defaults[5][1]))
+
+ return link_mask
+
+
+def get_link8(link_scores_raw, map_size):
+ # link[i-1] -local- start -16- end -cross- link[i]
+ link8_mask = np.zeros((link_scores_raw.shape[0]))
+ for i in range(N_DET_LAYERS):
+ if i == 0:
+ offsets_start = map_size[i][0] * map_size[i][1] * N_LOCAL_LINKS
+ offsets_end = map_size[i][0] * map_size[i][1] * (
+ N_LOCAL_LINKS + 16)
+ offsets_link = map_size[i][0] * map_size[i][1] * (
+ N_LOCAL_LINKS + 16)
+ link8_mask[:offsets_start] = 1
+ else:
+ offsets_start = offsets_link + map_size[i][0] * map_size[i][
+ 1] * N_LOCAL_LINKS
+ offsets_end = offsets_link + map_size[i][0] * map_size[i][1] * (
+ N_LOCAL_LINKS + 16)
+ offsets_link_pre = offsets_link
+ offsets_link += map_size[i][0] * map_size[i][1] * (
+ N_LOCAL_LINKS + 16 + N_CROSS_LINKS)
+ link8_mask[offsets_link_pre:offsets_start] = 1
+ link8_mask[offsets_end:offsets_link] = 1
+ return link_scores_raw[np.where(link8_mask > 0)[0], :]
+
+
+def decode_image_by_mutex(node_scores, link_scores, node_threshold,
+ link_threshold, map_size, offsets_defaults):
+ node_mask = node_scores[:, POS_LABEL] >= node_threshold
+ link_pos = link_scores[:, POS_LABEL]
+ link_mut = link_scores[:, MUT_LABEL]
+ link_max = np.max(np.vstack((link_pos, link_mut)), axis=0)
+
+ offsets_pos_list = np.where(node_mask == 1)[0].tolist()
+
+ link_mask_th = link_max >= link_threshold
+ link_mask = get_link_mask(node_mask, offsets_defaults, link_max)
+ offsets_link_max = np.argsort(-(link_max * link_mask * link_mask_th))
+ offsets_link_max = offsets_link_max[:len(offsets_pos_list) * 8]
+
+ group_mask = np.zeros_like(node_mask, dtype=np.int32) - 1
+ mutex_mask = len(node_mask) * [[]]
+
+ def find_parent(point):
+ return group_mask[point]
+
+ def set_parent(point, parent):
+ group_mask[point] = parent
+
+ def set_mutex_constraint(point, mutex_point_list):
+ mutex_mask[point] = mutex_point_list
+
+ def find_mutex_constraint(point):
+ mutex_point_list = mutex_mask[point]
+ # update mutex_point_list
+ mutex_point_list_new = []
+ if not mutex_point_list == []:
+ for mutex_point in mutex_point_list:
+ if not is_root(mutex_point):
+ mutex_point = find_root(mutex_point)
+ if mutex_point not in mutex_point_list_new:
+ mutex_point_list_new.append(mutex_point)
+ set_mutex_constraint(point, mutex_point_list_new)
+ return mutex_point_list_new
+
+ def combine_mutex_constraint(point, parent):
+ mutex_point_list = find_mutex_constraint(point)
+ mutex_parent_list = find_mutex_constraint(parent)
+ for mutex_point in mutex_point_list:
+ if not is_root(mutex_point):
+ mutex_point = find_root(mutex_point)
+ if mutex_point not in mutex_parent_list:
+ mutex_parent_list.append(mutex_point)
+ set_mutex_constraint(parent, mutex_parent_list)
+
+ def add_mutex_constraint(p1, p2):
+ mutex_point_list1 = find_mutex_constraint(p1)
+ mutex_point_list2 = find_mutex_constraint(p2)
+
+ if p1 not in mutex_point_list2:
+ mutex_point_list2.append(p1)
+ if p2 not in mutex_point_list1:
+ mutex_point_list1.append(p2)
+ set_mutex_constraint(p1, mutex_point_list1)
+ set_mutex_constraint(p2, mutex_point_list2)
+
+ def is_root(point):
+ return find_parent(point) == -1
+
+ def find_root(point):
+ root = point
+ update_parent = False
+ while not is_root(root):
+ root = find_parent(root)
+ update_parent = True
+
+ # for acceleration of find_root
+ if update_parent:
+ set_parent(point, root)
+
+ return root
+
+ def join(p1, p2):
+ root1 = find_root(p1)
+ root2 = find_root(p2)
+
+ if root1 != root2 and (root1 not in find_mutex_constraint(root2)):
+ set_parent(root1, root2)
+ combine_mutex_constraint(root1, root2)
+
+ def disjoin(p1, p2):
+ root1 = find_root(p1)
+ root2 = find_root(p2)
+
+ if root1 != root2:
+ add_mutex_constraint(root1, root2)
+
+ def get_all():
+ root_map = {}
+
+ def get_index(root):
+ if root not in root_map:
+ root_map[root] = len(root_map) + 1
+ return root_map[root]
+
+ mask = np.zeros_like(node_mask, dtype=np.int32)
+ for _, point in enumerate(offsets_pos_list):
+ point_root = find_root(point)
+ bbox_idx = get_index(point_root)
+ mask[point] = bbox_idx
+ return mask
+
+ # join by link
+ pos_link = 0
+ mut_link = 0
+ for _, offsets_link in enumerate(offsets_link_max):
+ l_idx, x, y, link_idx = get_coord_link(offsets_link, map_size,
+ offsets_defaults)
+ offsets = offsets_defaults[l_idx][0] + map_size[l_idx][1] * y + x
+ if offsets in offsets_pos_list:
+ neighbours = get_neighbours(l_idx, x, y, map_size,
+ offsets_defaults)
+ if not len(np.where(np.array(neighbours)[:,
+ 2] == link_idx)[0]) == 0:
+ noffsets = neighbours[np.where(
+ np.array(neighbours)[:, 2] == link_idx)[0][0]]
+ link_pos_value = link_pos[noffsets[1]]
+ link_mut_value = link_mut[noffsets[1]]
+ node_cls = node_mask[noffsets[0]]
+ if node_cls and (link_pos_value > link_mut_value):
+ pos_link += 1
+ join(offsets, noffsets[0])
+ elif node_cls and (link_pos_value < link_mut_value):
+ mut_link += 1
+ disjoin(offsets, noffsets[0])
+
+ mask = get_all()
+ return mask
diff --git a/modelscope/pipelines/cv/ocr_utils/resnet18_v1.py b/modelscope/pipelines/cv/ocr_utils/resnet18_v1.py
new file mode 100644
index 00000000..6371d4e5
--- /dev/null
+++ b/modelscope/pipelines/cv/ocr_utils/resnet18_v1.py
@@ -0,0 +1,432 @@
+"""Contains definitions for the original form of Residual Networks.
+The 'v1' residual networks (ResNets) implemented in this module were proposed
+by:
+[1] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun
+ Deep Residual Learning for Image Recognition. arXiv:1512.03385
+Other variants were introduced in:
+[2] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun
+ Identity Mappings in Deep Residual Networks. arXiv: 1603.05027
+The networks defined in this module utilize the bottleneck building block of
+[1] with projection shortcuts only for increasing depths. They employ batch
+normalization *after* every weight layer. This is the architecture used by
+MSRA in the Imagenet and MSCOCO 2016 competition models ResNet-101 and
+ResNet-152. See [2; Fig. 1a] for a comparison between the current 'v1'
+architecture and the alternative 'v2' architecture of [2] which uses batch
+normalization *before* every weight layer in the so-called full pre-activation
+units.
+Typical use:
+ from tensorflow.contrib.slim.nets import resnet_v1
+ResNet-101 for image classification into 1000 classes:
+ # inputs has shape [batch, 224, 224, 3]
+ with slim.arg_scope(resnet_v1.resnet_arg_scope()):
+ net, end_points = resnet_v1.resnet_v1_101(inputs, 1000, is_training=False)
+ResNet-101 for semantic segmentation into 21 classes:
+ # inputs has shape [batch, 513, 513, 3]
+ with slim.arg_scope(resnet_v1.resnet_arg_scope()):
+ net, end_points = resnet_v1.resnet_v1_101(inputs,
+ 21,
+ is_training=False,
+ global_pool=False,
+ output_stride=16)
+"""
+import tensorflow as tf
+import tf_slim as slim
+
+from . import resnet_utils
+
+if tf.__version__ >= '2.0':
+ tf = tf.compat.v1
+
+resnet_arg_scope = resnet_utils.resnet_arg_scope
+
+
+@slim.add_arg_scope
+def basicblock(inputs,
+ depth,
+ depth_bottleneck,
+ stride,
+ rate=1,
+ outputs_collections=None,
+ scope=None):
+ """Bottleneck residual unit variant with BN after convolutions.
+ This is the original residual unit proposed in [1]. See Fig. 1(a) of [2] for
+ its definition. Note that we use here the bottleneck variant which has an
+ extra bottleneck layer.
+ When putting together two consecutive ResNet blocks that use this unit, one
+ should use stride = 2 in the last unit of the first block.
+ Args:
+ inputs: A tensor of size [batch, height, width, channels].
+ depth: The depth of the ResNet unit output.
+ depth_bottleneck: The depth of the bottleneck layers.
+ stride: The ResNet unit's stride. Determines the amount of downsampling of
+ the units output compared to its input.
+ rate: An integer, rate for atrous convolution.
+ outputs_collections: Collection to add the ResNet unit output.
+ scope: Optional variable_scope.
+ Returns:
+ The ResNet unit's output.
+ """
+ with tf.variable_scope(scope, 'bottleneck_v1', [inputs]) as sc:
+ depth_in = slim.utils.last_dimension(inputs.get_shape(), min_rank=4)
+ if depth == depth_in:
+ shortcut = resnet_utils.subsample(inputs, stride, 'shortcut')
+ else:
+ shortcut = slim.conv2d(
+ inputs,
+ depth, [1, 1],
+ stride=stride,
+ activation_fn=None,
+ scope='shortcut')
+
+ residual = resnet_utils.conv2d_same(
+ inputs, depth, 3, stride, rate=rate, scope='conv1')
+ residual = resnet_utils.conv2d_same(
+ residual, depth, 3, 1, rate=rate, scope='conv2')
+
+ output = tf.nn.relu(residual + shortcut)
+
+ return slim.utils.collect_named_outputs(outputs_collections,
+ sc.original_name_scope, output)
+
+
+@slim.add_arg_scope
+def bottleneck(inputs,
+ depth,
+ depth_bottleneck,
+ stride,
+ rate=1,
+ outputs_collections=None,
+ scope=None):
+ """Bottleneck residual unit variant with BN after convolutions.
+ This is the original residual unit proposed in [1]. See Fig. 1(a) of [2] for
+ its definition. Note that we use here the bottleneck variant which has an
+ extra bottleneck layer.
+ When putting together two consecutive ResNet blocks that use this unit, one
+ should use stride = 2 in the last unit of the first block.
+ Args:
+ inputs: A tensor of size [batch, height, width, channels].
+ depth: The depth of the ResNet unit output.
+ depth_bottleneck: The depth of the bottleneck layers.
+ stride: The ResNet unit's stride. Determines the amount of downsampling of
+ the units output compared to its input.
+ rate: An integer, rate for atrous convolution.
+ outputs_collections: Collection to add the ResNet unit output.
+ scope: Optional variable_scope.
+ Returns:
+ The ResNet unit's output.
+ """
+ with tf.variable_scope(scope, 'bottleneck_v1', [inputs]) as sc:
+ depth_in = slim.utils.last_dimension(inputs.get_shape(), min_rank=4)
+ if depth == depth_in:
+ shortcut = resnet_utils.subsample(inputs, stride, 'shortcut')
+ else:
+ shortcut = slim.conv2d(
+ inputs,
+ depth, [1, 1],
+ stride=stride,
+ activation_fn=None,
+ scope='shortcut')
+
+ residual = slim.conv2d(
+ inputs, depth_bottleneck, [1, 1], stride=1, scope='conv1')
+ residual = resnet_utils.conv2d_same(
+ residual, depth_bottleneck, 3, stride, rate=rate, scope='conv2')
+ residual = slim.conv2d(
+ residual,
+ depth, [1, 1],
+ stride=1,
+ activation_fn=None,
+ scope='conv3')
+
+ output = tf.nn.relu(shortcut + residual)
+
+ return slim.utils.collect_named_outputs(outputs_collections,
+ sc.original_name_scope, output)
+
+
+def resnet_v1(inputs,
+ blocks,
+ num_classes=None,
+ is_training=True,
+ global_pool=True,
+ output_stride=None,
+ include_root_block=True,
+ spatial_squeeze=True,
+ reuse=None,
+ scope=None):
+ """Generator for v1 ResNet models.
+ This function generates a family of ResNet v1 models. See the resnet_v1_*()
+ methods for specific model instantiations, obtained by selecting different
+ block instantiations that produce ResNets of various depths.
+ Training for image classification on Imagenet is usually done with [224, 224]
+ inputs, resulting in [7, 7] feature maps at the output of the last ResNet
+ block for the ResNets defined in [1] that have nominal stride equal to 32.
+ However, for dense prediction tasks we advise that one uses inputs with
+ spatial dimensions that are multiples of 32 plus 1, e.g., [321, 321]. In
+ this case the feature maps at the ResNet output will have spatial shape
+ [(height - 1) / output_stride + 1, (width - 1) / output_stride + 1]
+ and corners exactly aligned with the input image corners, which greatly
+ facilitates alignment of the features to the image. Using as input [225, 225]
+ images results in [8, 8] feature maps at the output of the last ResNet block.
+ For dense prediction tasks, the ResNet needs to run in fully-convolutional
+ (FCN) mode and global_pool needs to be set to False. The ResNets in [1, 2] all
+ have nominal stride equal to 32 and a good choice in FCN mode is to use
+ output_stride=16 in order to increase the density of the computed features at
+ small computational and memory overhead, cf. http://arxiv.org/abs/1606.00915.
+ Args:
+ inputs: A tensor of size [batch, height_in, width_in, channels].
+ blocks: A list of length equal to the number of ResNet blocks. Each element
+ is a resnet_utils.Block object describing the units in the block.
+ num_classes: Number of predicted classes for classification tasks. If None
+ we return the features before the logit layer.
+ is_training: whether is training or not.
+ global_pool: If True, we perform global average pooling before computing the
+ logits. Set to True for image classification, False for dense prediction.
+ output_stride: If None, then the output will be computed at the nominal
+ network stride. If output_stride is not None, it specifies the requested
+ ratio of input to output spatial resolution.
+ include_root_block: If True, include the initial convolution followed by
+ max-pooling, if False excludes it.
+ spatial_squeeze: if True, logits is of shape [B, C], if false logits is
+ of shape [B, 1, 1, C], where B is batch_size and C is number of classes.
+ reuse: whether or not the network and its variables should be reused. To be
+ able to reuse 'scope' must be given.
+ scope: Optional variable_scope.
+ Returns:
+ net: A rank-4 tensor of size [batch, height_out, width_out, channels_out].
+ If global_pool is False, then height_out and width_out are reduced by a
+ factor of output_stride compared to the respective height_in and width_in,
+ else both height_out and width_out equal one. If num_classes is None, then
+ net is the output of the last ResNet block, potentially after global
+ average pooling. If num_classes is not None, net contains the pre-softmax
+ activations.
+ end_points: A dictionary from components of the network to the corresponding
+ activation.
+ Raises:
+ ValueError: If the target output_stride is not valid.
+ """
+ with tf.variable_scope(scope, 'resnet_v1', [inputs], reuse=reuse) as sc:
+ end_points_collection = sc.name + '_end_points'
+ with slim.arg_scope(
+ [slim.conv2d, bottleneck, resnet_utils.stack_blocks_dense],
+ outputs_collections=end_points_collection):
+ with slim.arg_scope([slim.batch_norm], is_training=is_training):
+ net = inputs
+ if include_root_block:
+ if output_stride is not None:
+ if output_stride % 4 != 0:
+ raise ValueError(
+ 'The output_stride needs to be a multiple of 4.'
+ )
+ output_stride /= 4
+ net = resnet_utils.conv2d_same(
+ net, 64, 7, stride=2, scope='conv1')
+ net = tf.pad(net, [[0, 0], [1, 1], [1, 1], [0, 0]])
+ net = slim.max_pool2d(net, [3, 3], stride=2, scope='pool1')
+
+ net = slim.utils.collect_named_outputs(
+ end_points_collection, 'pool2', net)
+
+ net = resnet_utils.stack_blocks_dense(net, blocks,
+ output_stride)
+
+ end_points = slim.utils.convert_collection_to_dict(
+ end_points_collection)
+
+ end_points['pool1'] = end_points['resnet_v1_18/block2/unit_2']
+ end_points['pool2'] = end_points['resnet_v1_18/block3/unit_2']
+ end_points['pool3'] = end_points['resnet_v1_18/block4/unit_2']
+ end_points['pool4'] = end_points['resnet_v1_18/block5/unit_2']
+ end_points['pool5'] = end_points['resnet_v1_18/block6/unit_2']
+ end_points['pool6'] = net
+
+ return net, end_points
+
+
+resnet_v1.default_image_size = 224
+
+
+def resnet_v1_18(inputs,
+ num_classes=None,
+ is_training=True,
+ global_pool=True,
+ output_stride=None,
+ spatial_squeeze=True,
+ reuse=None,
+ scope='resnet_v1_18'):
+ """ResNet-18 model of [1]. See resnet_v1() for arg and return description."""
+ blocks = [
+ resnet_utils.Block('block1', basicblock,
+ [(64, 64, 1)] + [(64, 64, 1)]),
+ resnet_utils.Block('block2', basicblock,
+ [(128, 128, 1)] + [(128, 128, 1)]),
+ resnet_utils.Block('block3', basicblock,
+ [(256, 256, 2)] + [(256, 256, 1)]),
+ resnet_utils.Block('block4', basicblock,
+ [(512, 512, 2)] + [(512, 512, 1)]),
+ resnet_utils.Block('block5', basicblock,
+ [(256, 256, 2)] + [(256, 256, 1)]),
+ resnet_utils.Block('block6', basicblock,
+ [(256, 256, 2)] + [(256, 256, 1)]),
+ resnet_utils.Block('block7', basicblock,
+ [(256, 256, 2)] + [(256, 256, 1)]),
+ ]
+ return resnet_v1(
+ inputs,
+ blocks,
+ num_classes,
+ is_training,
+ global_pool=global_pool,
+ output_stride=output_stride,
+ include_root_block=True,
+ spatial_squeeze=spatial_squeeze,
+ reuse=reuse,
+ scope=scope)
+
+
+resnet_v1_18.default_image_size = resnet_v1.default_image_size
+
+
+def resnet_v1_50(inputs,
+ num_classes=None,
+ is_training=True,
+ global_pool=True,
+ output_stride=None,
+ spatial_squeeze=True,
+ reuse=None,
+ scope='resnet_v1_50'):
+ """ResNet-50 model of [1]. See resnet_v1() for arg and return description."""
+ blocks = [
+ resnet_utils.Block('block1', bottleneck,
+ [(256, 64, 1)] * 2 + [(256, 64, 2)]),
+ resnet_utils.Block('block2', bottleneck,
+ [(512, 128, 1)] * 3 + [(512, 128, 2)]),
+ resnet_utils.Block('block3', bottleneck,
+ [(1024, 256, 1)] * 5 + [(1024, 256, 2)]),
+ resnet_utils.Block('block4', bottleneck,
+ [(2048, 512, 1)] * 3 + [(2048, 512, 2)]),
+ resnet_utils.Block('block5', bottleneck,
+ [(1024, 256, 1)] * 2 + [(1024, 256, 2)]),
+ resnet_utils.Block('block6', bottleneck, [(1024, 256, 1)] * 2),
+ ]
+ return resnet_v1(
+ inputs,
+ blocks,
+ num_classes,
+ is_training,
+ global_pool=global_pool,
+ output_stride=output_stride,
+ include_root_block=True,
+ spatial_squeeze=spatial_squeeze,
+ reuse=reuse,
+ scope=scope)
+
+
+resnet_v1_50.default_image_size = resnet_v1.default_image_size
+
+
+def resnet_v1_101(inputs,
+ num_classes=None,
+ is_training=True,
+ global_pool=True,
+ output_stride=None,
+ spatial_squeeze=True,
+ reuse=None,
+ scope='resnet_v1_101'):
+ """ResNet-101 model of [1]. See resnet_v1() for arg and return description."""
+ blocks = [
+ resnet_utils.Block('block1', bottleneck,
+ [(256, 64, 1)] * 2 + [(256, 64, 2)]),
+ resnet_utils.Block('block2', bottleneck,
+ [(512, 128, 1)] * 3 + [(512, 128, 2)]),
+ resnet_utils.Block('block3', bottleneck,
+ [(1024, 256, 1)] * 22 + [(1024, 256, 2)]),
+ resnet_utils.Block('block4', bottleneck, [(2048, 512, 1)] * 3)
+ ]
+ return resnet_v1(
+ inputs,
+ blocks,
+ num_classes,
+ is_training,
+ global_pool=global_pool,
+ output_stride=output_stride,
+ include_root_block=True,
+ spatial_squeeze=spatial_squeeze,
+ reuse=reuse,
+ scope=scope)
+
+
+resnet_v1_101.default_image_size = resnet_v1.default_image_size
+
+
+def resnet_v1_152(inputs,
+ num_classes=None,
+ is_training=True,
+ global_pool=True,
+ output_stride=None,
+ spatial_squeeze=True,
+ reuse=None,
+ scope='resnet_v1_152'):
+ """ResNet-152 model of [1]. See resnet_v1() for arg and return description."""
+ blocks = [
+ resnet_utils.Block('block1', bottleneck,
+ [(256, 64, 1)] * 2 + [(256, 64, 2)]),
+ resnet_utils.Block('block2', bottleneck,
+ [(512, 128, 1)] * 7 + [(512, 128, 2)]),
+ resnet_utils.Block('block3', bottleneck,
+ [(1024, 256, 1)] * 35 + [(1024, 256, 2)]),
+ resnet_utils.Block('block4', bottleneck, [(2048, 512, 1)] * 3)
+ ]
+ return resnet_v1(
+ inputs,
+ blocks,
+ num_classes,
+ is_training,
+ global_pool=global_pool,
+ output_stride=output_stride,
+ include_root_block=True,
+ spatial_squeeze=spatial_squeeze,
+ reuse=reuse,
+ scope=scope)
+
+
+resnet_v1_152.default_image_size = resnet_v1.default_image_size
+
+
+def resnet_v1_200(inputs,
+ num_classes=None,
+ is_training=True,
+ global_pool=True,
+ output_stride=None,
+ spatial_squeeze=True,
+ reuse=None,
+ scope='resnet_v1_200'):
+ """ResNet-200 model of [2]. See resnet_v1() for arg and return description."""
+ blocks = [
+ resnet_utils.Block('block1', bottleneck,
+ [(256, 64, 1)] * 2 + [(256, 64, 2)]),
+ resnet_utils.Block('block2', bottleneck,
+ [(512, 128, 1)] * 23 + [(512, 128, 2)]),
+ resnet_utils.Block('block3', bottleneck,
+ [(1024, 256, 1)] * 35 + [(1024, 256, 2)]),
+ resnet_utils.Block('block4', bottleneck, [(2048, 512, 1)] * 3)
+ ]
+ return resnet_v1(
+ inputs,
+ blocks,
+ num_classes,
+ is_training,
+ global_pool=global_pool,
+ output_stride=output_stride,
+ include_root_block=True,
+ spatial_squeeze=spatial_squeeze,
+ reuse=reuse,
+ scope=scope)
+
+
+resnet_v1_200.default_image_size = resnet_v1.default_image_size
+
+if __name__ == '__main__':
+ input = tf.placeholder(tf.float32, shape=(None, 224, 224, 3), name='input')
+ with slim.arg_scope(resnet_arg_scope()) as sc:
+ logits = resnet_v1_50(input)
diff --git a/modelscope/pipelines/cv/ocr_utils/resnet_utils.py b/modelscope/pipelines/cv/ocr_utils/resnet_utils.py
new file mode 100644
index 00000000..e0e240c8
--- /dev/null
+++ b/modelscope/pipelines/cv/ocr_utils/resnet_utils.py
@@ -0,0 +1,231 @@
+"""Contains building blocks for various versions of Residual Networks.
+Residual networks (ResNets) were proposed in:
+ Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun
+ Deep Residual Learning for Image Recognition. arXiv:1512.03385, 2015
+More variants were introduced in:
+ Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun
+ Identity Mappings in Deep Residual Networks. arXiv: 1603.05027, 2016
+We can obtain different ResNet variants by changing the network depth, width,
+and form of residual unit. This module implements the infrastructure for
+building them. Concrete ResNet units and full ResNet networks are implemented in
+the accompanying resnet_v1.py and resnet_v2.py modules.
+Compared to https://github.com/KaimingHe/deep-residual-networks, in the current
+implementation we subsample the output activations in the last residual unit of
+each block, instead of subsampling the input activations in the first residual
+unit of each block. The two implementations give identical results but our
+implementation is more memory efficient.
+"""
+
+import collections
+
+import tensorflow as tf
+import tf_slim as slim
+
+if tf.__version__ >= '2.0':
+ tf = tf.compat.v1
+
+
+class Block(collections.namedtuple('Block', ['scope', 'unit_fn', 'args'])):
+ """A named tuple describing a ResNet block.
+ Its parts are:
+ scope: The scope of the `Block`.
+ unit_fn: The ResNet unit function which takes as input a `Tensor` and
+ returns another `Tensor` with the output of the ResNet unit.
+ args: A list of length equal to the number of units in the `Block`. The list
+ contains one (depth, depth_bottleneck, stride) tuple for each unit in the
+ block to serve as argument to unit_fn.
+ """
+
+
+def subsample(inputs, factor, scope=None):
+ """Subsamples the input along the spatial dimensions.
+ Args:
+ inputs: A `Tensor` of size [batch, height_in, width_in, channels].
+ factor: The subsampling factor.
+ scope: Optional variable_scope.
+ Returns:
+ output: A `Tensor` of size [batch, height_out, width_out, channels] with the
+ input, either intact (if factor == 1) or subsampled (if factor > 1).
+ """
+ if factor == 1:
+ return inputs
+ else:
+ return slim.max_pool2d(inputs, [1, 1], stride=factor, scope=scope)
+
+
+def conv2d_same(inputs, num_outputs, kernel_size, stride, rate=1, scope=None):
+ """Strided 2-D convolution with 'SAME' padding.
+ When stride > 1, then we do explicit zero-padding, followed by conv2d with
+ 'VALID' padding.
+ Note that
+ net = conv2d_same(inputs, num_outputs, 3, stride=stride)
+ is equivalent to
+ net = slim.conv2d(inputs, num_outputs, 3, stride=1, padding='SAME')
+ net = subsample(net, factor=stride)
+ whereas
+ net = slim.conv2d(inputs, num_outputs, 3, stride=stride, padding='SAME')
+ is different when the input's height or width is even, which is why we add the
+ current function. For more details, see ResnetUtilsTest.testConv2DSameEven().
+ Args:
+ inputs: A 4-D tensor of size [batch, height_in, width_in, channels].
+ num_outputs: An integer, the number of output filters.
+ kernel_size: An int with the kernel_size of the filters.
+ stride: An integer, the output stride.
+ rate: An integer, rate for atrous convolution.
+ scope: Scope.
+ Returns:
+ output: A 4-D tensor of size [batch, height_out, width_out, channels] with
+ the convolution output.
+ """
+ if stride == 1:
+ return slim.conv2d(
+ inputs,
+ num_outputs,
+ kernel_size,
+ stride=1,
+ rate=rate,
+ padding='SAME',
+ scope=scope)
+ else:
+ kernel_size_effective = kernel_size + (kernel_size - 1) * (rate - 1)
+ pad_total = kernel_size_effective - 1
+ pad_beg = pad_total // 2
+ pad_end = pad_total - pad_beg
+ inputs = tf.pad(
+ inputs, [[0, 0], [pad_beg, pad_end], [pad_beg, pad_end], [0, 0]])
+ return slim.conv2d(
+ inputs,
+ num_outputs,
+ kernel_size,
+ stride=stride,
+ rate=rate,
+ padding='VALID',
+ scope=scope)
+
+
+@slim.add_arg_scope
+def stack_blocks_dense(net,
+ blocks,
+ output_stride=None,
+ outputs_collections=None):
+ """Stacks ResNet `Blocks` and controls output feature density.
+ First, this function creates scopes for the ResNet in the form of
+ 'block_name/unit_1', 'block_name/unit_2', etc.
+ Second, this function allows the user to explicitly control the ResNet
+ output_stride, which is the ratio of the input to output spatial resolution.
+ This is useful for dense prediction tasks such as semantic segmentation or
+ object detection.
+ Most ResNets consist of 4 ResNet blocks and subsample the activations by a
+ factor of 2 when transitioning between consecutive ResNet blocks. This results
+ to a nominal ResNet output_stride equal to 8. If we set the output_stride to
+ half the nominal network stride (e.g., output_stride=4), then we compute
+ responses twice.
+ Control of the output feature density is implemented by atrous convolution.
+ Args:
+ net: A `Tensor` of size [batch, height, width, channels].
+ blocks: A list of length equal to the number of ResNet `Blocks`. Each
+ element is a ResNet `Block` object describing the units in the `Block`.
+ output_stride: If `None`, then the output will be computed at the nominal
+ network stride. If output_stride is not `None`, it specifies the requested
+ ratio of input to output spatial resolution, which needs to be equal to
+ the product of unit strides from the start up to some level of the ResNet.
+ For example, if the ResNet employs units with strides 1, 2, 1, 3, 4, 1,
+ then valid values for the output_stride are 1, 2, 6, 24 or None (which
+ is equivalent to output_stride=24).
+ outputs_collections: Collection to add the ResNet block outputs.
+ Returns:
+ net: Output tensor with stride equal to the specified output_stride.
+ Raises:
+ ValueError: If the target output_stride is not valid.
+ """
+ # The current_stride variable keeps track of the effective stride of the
+ # activations. This allows us to invoke atrous convolution whenever applying
+ # the next residual unit would result in the activations having stride larger
+ # than the target output_stride.
+ current_stride = 1
+
+ # The atrous convolution rate parameter.
+ rate = 1
+
+ for block in blocks:
+ with tf.variable_scope(block.scope, 'block', [net]):
+ for i, unit in enumerate(block.args):
+ if output_stride is not None and current_stride > output_stride:
+ raise ValueError(
+ 'The target output_stride cannot be reached.')
+
+ with tf.variable_scope(
+ 'unit_%d' % (i + 1), values=[net]) as sc:
+ unit_depth, unit_depth_bottleneck, unit_stride = unit
+ # If we have reached the target output_stride, then we need to employ
+ # atrous convolution with stride=1 and multiply the atrous rate by the
+ # current unit's stride for use in subsequent layers.
+ if output_stride is not None and current_stride == output_stride:
+ net = block.unit_fn(
+ net,
+ depth=unit_depth,
+ depth_bottleneck=unit_depth_bottleneck,
+ stride=1,
+ rate=rate)
+ rate *= unit_stride
+
+ else:
+ net = block.unit_fn(
+ net,
+ depth=unit_depth,
+ depth_bottleneck=unit_depth_bottleneck,
+ stride=unit_stride,
+ rate=1)
+ current_stride *= unit_stride
+ net = slim.utils.collect_named_outputs(
+ outputs_collections, sc.name, net)
+
+ if output_stride is not None and current_stride != output_stride:
+ raise ValueError('The target output_stride cannot be reached.')
+
+ return net
+
+
+def resnet_arg_scope(weight_decay=0.0001,
+ batch_norm_decay=0.997,
+ batch_norm_epsilon=1e-5,
+ batch_norm_scale=True):
+ """Defines the default ResNet arg scope.
+ TODO(gpapan): The batch-normalization related default values above are
+ appropriate for use in conjunction with the reference ResNet models
+ released at https://github.com/KaimingHe/deep-residual-networks. When
+ training ResNets from scratch, they might need to be tuned.
+ Args:
+ weight_decay: The weight decay to use for regularizing the model.
+ batch_norm_decay: The moving average decay when estimating layer activation
+ statistics in batch normalization.
+ batch_norm_epsilon: Small constant to prevent division by zero when
+ normalizing activations by their variance in batch normalization.
+ batch_norm_scale: If True, uses an explicit `gamma` multiplier to scale the
+ activations in the batch normalization layer.
+ Returns:
+ An `arg_scope` to use for the resnet models.
+ """
+ batch_norm_params = {
+ 'decay': batch_norm_decay,
+ 'epsilon': batch_norm_epsilon,
+ 'scale': batch_norm_scale,
+ 'updates_collections': tf.GraphKeys.UPDATE_OPS,
+ }
+
+ with slim.arg_scope(
+ [slim.conv2d],
+ weights_regularizer=slim.l2_regularizer(weight_decay),
+ weights_initializer=slim.variance_scaling_initializer(),
+ activation_fn=tf.nn.relu,
+ normalizer_fn=slim.batch_norm,
+ normalizer_params=batch_norm_params):
+ with slim.arg_scope([slim.batch_norm], **batch_norm_params):
+ # The following implies padding='SAME' for pool1, which makes feature
+ # alignment easier for dense prediction tasks. This is also used in
+ # https://github.com/facebook/fb.resnet.torch. However the accompanying
+ # code of 'Deep Residual Learning for Image Recognition' uses
+ # padding='VALID' for pool1. You can switch to that choice by setting
+ # slim.arg_scope([slim.max_pool2d], padding='VALID').
+ with slim.arg_scope([slim.max_pool2d], padding='VALID') as arg_sc:
+ return arg_sc
diff --git a/modelscope/pipelines/cv/ocr_utils/utils.py b/modelscope/pipelines/cv/ocr_utils/utils.py
new file mode 100644
index 00000000..be8e3371
--- /dev/null
+++ b/modelscope/pipelines/cv/ocr_utils/utils.py
@@ -0,0 +1,108 @@
+import cv2
+import numpy as np
+
+
+def rboxes_to_polygons(rboxes):
+ """
+ Convert rboxes to polygons
+ ARGS
+ `rboxes`: [n, 5]
+ RETURN
+ `polygons`: [n, 8]
+ """
+
+ theta = rboxes[:, 4:5]
+ cxcy = rboxes[:, :2]
+ half_w = rboxes[:, 2:3] / 2.
+ half_h = rboxes[:, 3:4] / 2.
+ v1 = np.hstack([np.cos(theta) * half_w, np.sin(theta) * half_w])
+ v2 = np.hstack([-np.sin(theta) * half_h, np.cos(theta) * half_h])
+ p1 = cxcy - v1 - v2
+ p2 = cxcy + v1 - v2
+ p3 = cxcy + v1 + v2
+ p4 = cxcy - v1 + v2
+ polygons = np.hstack([p1, p2, p3, p4])
+ return polygons
+
+
+def cal_width(box):
+ pd1 = point_dist(box[0], box[1], box[2], box[3])
+ pd2 = point_dist(box[4], box[5], box[6], box[7])
+ return (pd1 + pd2) / 2
+
+
+def point_dist(x1, y1, x2, y2):
+ return np.sqrt((x2 - x1) * (x2 - x1) + (y2 - y1) * (y2 - y1))
+
+
+def draw_polygons(img, polygons):
+ for p in polygons.tolist():
+ p = [int(o) for o in p]
+ cv2.line(img, (p[0], p[1]), (p[2], p[3]), (0, 255, 0), 1)
+ cv2.line(img, (p[2], p[3]), (p[4], p[5]), (0, 255, 0), 1)
+ cv2.line(img, (p[4], p[5]), (p[6], p[7]), (0, 255, 0), 1)
+ cv2.line(img, (p[6], p[7]), (p[0], p[1]), (0, 255, 0), 1)
+ return img
+
+
+def nms_python(boxes):
+ boxes = sorted(boxes, key=lambda x: -x[8])
+ nms_flag = [True] * len(boxes)
+ for i, a in enumerate(boxes):
+ if not nms_flag[i]:
+ continue
+ else:
+ for j, b in enumerate(boxes):
+ if not j > i:
+ continue
+ if not nms_flag[j]:
+ continue
+ score_a = a[8]
+ score_b = b[8]
+ rbox_a = polygon2rbox(a[:8])
+ rbox_b = polygon2rbox(b[:8])
+ if point_in_rbox(rbox_a[:2], rbox_b) or point_in_rbox(
+ rbox_b[:2], rbox_a):
+ if score_a > score_b:
+ nms_flag[j] = False
+ boxes_nms = []
+ for i, box in enumerate(boxes):
+ if nms_flag[i]:
+ boxes_nms.append(box)
+ return boxes_nms
+
+
+def point_in_rbox(c, rbox):
+ cx0, cy0 = c[0], c[1]
+ cx1, cy1 = rbox[0], rbox[1]
+ w, h = rbox[2], rbox[3]
+ theta = rbox[4]
+ dist_x = np.abs((cx1 - cx0) * np.cos(theta) + (cy1 - cy0) * np.sin(theta))
+ dist_y = np.abs(-(cx1 - cx0) * np.sin(theta) + (cy1 - cy0) * np.cos(theta))
+ return ((dist_x < w / 2.0) and (dist_y < h / 2.0))
+
+
+def polygon2rbox(polygon):
+ x1, x2, x3, x4 = polygon[0], polygon[2], polygon[4], polygon[6]
+ y1, y2, y3, y4 = polygon[1], polygon[3], polygon[5], polygon[7]
+ c_x = (x1 + x2 + x3 + x4) / 4
+ c_y = (y1 + y2 + y3 + y4) / 4
+ w1 = point_dist(x1, y1, x2, y2)
+ w2 = point_dist(x3, y3, x4, y4)
+ h1 = point_line_dist(c_x, c_y, x1, y1, x2, y2)
+ h2 = point_line_dist(c_x, c_y, x3, y3, x4, y4)
+ h = h1 + h2
+ w = (w1 + w2) / 2
+ theta1 = np.arctan2(y2 - y1, x2 - x1)
+ theta2 = np.arctan2(y3 - y4, x3 - x4)
+ theta = (theta1 + theta2) / 2.0
+ return [c_x, c_y, w, h, theta]
+
+
+def point_line_dist(px, py, x1, y1, x2, y2):
+ eps = 1e-6
+ dx = x2 - x1
+ dy = y2 - y1
+ div = np.sqrt(dx * dx + dy * dy) + eps
+ dist = np.abs(px * dy - py * dx + x2 * y1 - y2 * x1) / div
+ return dist
diff --git a/modelscope/pipelines/multi_modal/__init__.py b/modelscope/pipelines/multi_modal/__init__.py
index b1ee121c..b7402b93 100644
--- a/modelscope/pipelines/multi_modal/__init__.py
+++ b/modelscope/pipelines/multi_modal/__init__.py
@@ -1 +1 @@
-from .image_caption_pipeline import ImageCaptionPipeline
+from .image_captioning_pipeline import ImageCaptionPipeline
diff --git a/modelscope/pipelines/multi_modal/image_captioning_pipeline.py b/modelscope/pipelines/multi_modal/image_captioning_pipeline.py
new file mode 100644
index 00000000..9f32caf4
--- /dev/null
+++ b/modelscope/pipelines/multi_modal/image_captioning_pipeline.py
@@ -0,0 +1,35 @@
+from typing import Any, Dict, Union
+
+from modelscope.metainfo import Pipelines
+from modelscope.preprocessors import OfaImageCaptionPreprocessor, Preprocessor
+from modelscope.utils.constant import Tasks
+from modelscope.utils.logger import get_logger
+from ..base import Model, Pipeline
+from ..builder import PIPELINES
+
+logger = get_logger()
+
+
+@PIPELINES.register_module(
+ Tasks.image_captioning, module_name=Pipelines.image_caption)
+class ImageCaptionPipeline(Pipeline):
+
+ def __init__(self,
+ model: Union[Model, str],
+ preprocessor: [Preprocessor] = None,
+ **kwargs):
+ super().__init__()
+ assert isinstance(model, str) or isinstance(model, Model), \
+ 'model must be a single str or OfaForImageCaptioning'
+ if isinstance(model, str):
+ pipe_model = Model.from_pretrained(model)
+ elif isinstance(model, Model):
+ pipe_model = model
+ else:
+ raise NotImplementedError
+ if preprocessor is None and pipe_model:
+ preprocessor = OfaImageCaptionPreprocessor(model_dir=model)
+ super().__init__(model=pipe_model, preprocessor=preprocessor, **kwargs)
+
+ def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, Any]:
+ return inputs
diff --git a/modelscope/pipelines/nlp/__init__.py b/modelscope/pipelines/nlp/__init__.py
index adfa1d4c..df8dbbd9 100644
--- a/modelscope/pipelines/nlp/__init__.py
+++ b/modelscope/pipelines/nlp/__init__.py
@@ -1,7 +1,10 @@
+from .dialog_intent_prediction_pipeline import * # noqa F403
+from .dialog_modeling_pipeline import * # noqa F403
+from .dialog_state_tracking import * # noqa F403
+from .fill_mask_pipeline import * # noqa F403
+from .nli_pipeline import * # noqa F403
from .sentence_similarity_pipeline import * # noqa F403
+from .sentiment_classification_pipeline import * # noqa F403
from .sequence_classification_pipeline import * # noqa F403
-from .space.dialog_intent_prediction_pipeline import * # noqa F403
-from .space.dialog_modeling_pipeline import * # noqa F403
-from .space.dialog_state_tracking import * # noqa F403
from .text_generation_pipeline import * # noqa F403
from .word_segmentation_pipeline import * # noqa F403
diff --git a/modelscope/pipelines/nlp/space/dialog_intent_prediction_pipeline.py b/modelscope/pipelines/nlp/dialog_intent_prediction_pipeline.py
similarity index 75%
rename from modelscope/pipelines/nlp/space/dialog_intent_prediction_pipeline.py
rename to modelscope/pipelines/nlp/dialog_intent_prediction_pipeline.py
index 57245bdf..3fd38641 100644
--- a/modelscope/pipelines/nlp/space/dialog_intent_prediction_pipeline.py
+++ b/modelscope/pipelines/nlp/dialog_intent_prediction_pipeline.py
@@ -1,16 +1,18 @@
-from typing import Any, Dict, Optional
+from typing import Any, Dict
-from modelscope.models.nlp import DialogIntentModel
-from modelscope.preprocessors import DialogIntentPredictionPreprocessor
-from modelscope.utils.constant import Tasks
-from ...base import Input, Pipeline
-from ...builder import PIPELINES
+from ...metainfo import Pipelines
+from ...models.nlp import DialogIntentModel
+from ...preprocessors import DialogIntentPredictionPreprocessor
+from ...utils.constant import Tasks
+from ..base import Pipeline
+from ..builder import PIPELINES
__all__ = ['DialogIntentPredictionPipeline']
@PIPELINES.register_module(
- Tasks.dialog_intent_prediction, module_name=r'space-intent')
+ Tasks.dialog_intent_prediction,
+ module_name=Pipelines.dialog_intent_prediction)
class DialogIntentPredictionPipeline(Pipeline):
def __init__(self, model: DialogIntentModel,
diff --git a/modelscope/pipelines/nlp/space/dialog_modeling_pipeline.py b/modelscope/pipelines/nlp/dialog_modeling_pipeline.py
similarity index 89%
rename from modelscope/pipelines/nlp/space/dialog_modeling_pipeline.py
rename to modelscope/pipelines/nlp/dialog_modeling_pipeline.py
index afa352b6..778284de 100644
--- a/modelscope/pipelines/nlp/space/dialog_modeling_pipeline.py
+++ b/modelscope/pipelines/nlp/dialog_modeling_pipeline.py
@@ -3,14 +3,15 @@ from typing import Any, Dict, Optional
from modelscope.models.nlp import DialogModelingModel
from modelscope.preprocessors import DialogModelingPreprocessor
from modelscope.utils.constant import Tasks
-from ...base import Pipeline, Tensor
-from ...builder import PIPELINES
+from ...metainfo import Pipelines
+from ..base import Pipeline, Tensor
+from ..builder import PIPELINES
__all__ = ['DialogModelingPipeline']
@PIPELINES.register_module(
- Tasks.dialog_modeling, module_name=r'space-modeling')
+ Tasks.dialog_modeling, module_name=Pipelines.dialog_modeling)
class DialogModelingPipeline(Pipeline):
def __init__(self, model: DialogModelingModel,
diff --git a/modelscope/pipelines/nlp/dialog_state_tracking.py b/modelscope/pipelines/nlp/dialog_state_tracking.py
new file mode 100644
index 00000000..823248d2
--- /dev/null
+++ b/modelscope/pipelines/nlp/dialog_state_tracking.py
@@ -0,0 +1,45 @@
+from typing import Any, Dict
+
+from ...metainfo import Pipelines
+from ...models.nlp import DialogStateTrackingModel
+from ...preprocessors import DialogStateTrackingPreprocessor
+from ...utils.constant import Tasks
+from ..base import Pipeline
+from ..builder import PIPELINES
+
+__all__ = ['DialogStateTrackingPipeline']
+
+
+@PIPELINES.register_module(
+ Tasks.dialog_state_tracking, module_name=Pipelines.dialog_state_tracking)
+class DialogStateTrackingPipeline(Pipeline):
+
+ def __init__(self, model: DialogStateTrackingModel,
+ preprocessor: DialogStateTrackingPreprocessor, **kwargs):
+ """use `model` and `preprocessor` to create a nlp text classification pipeline for prediction
+
+ Args:
+ model (SequenceClassificationModel): a model instance
+ preprocessor (SequenceClassificationPreprocessor): a preprocessor instance
+ """
+
+ super().__init__(model=model, preprocessor=preprocessor, **kwargs)
+ self.model = model
+ # self.tokenizer = preprocessor.tokenizer
+
+ def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, str]:
+ """process the prediction results
+
+ Args:
+ inputs (Dict[str, Any]): _description_
+
+ Returns:
+ Dict[str, str]: the prediction results
+ """
+ import numpy as np
+ pred = inputs['pred']
+ pos = np.where(pred == np.max(pred))
+
+ result = {'pred': pred, 'label': pos[0]}
+
+ return result
diff --git a/modelscope/pipelines/nlp/fill_mask_pipeline.py b/modelscope/pipelines/nlp/fill_mask_pipeline.py
new file mode 100644
index 00000000..596d65f7
--- /dev/null
+++ b/modelscope/pipelines/nlp/fill_mask_pipeline.py
@@ -0,0 +1,107 @@
+from typing import Any, Dict, Optional, Union
+
+import torch
+
+from ...metainfo import Pipelines
+from ...models import Model
+from ...models.nlp.masked_language_model import MaskedLanguageModelBase
+from ...preprocessors import FillMaskPreprocessor
+from ...utils.constant import Tasks
+from ..base import Pipeline, Tensor
+from ..builder import PIPELINES
+
+__all__ = ['FillMaskPipeline']
+
+
+@PIPELINES.register_module(Tasks.fill_mask, module_name=Pipelines.fill_mask)
+class FillMaskPipeline(Pipeline):
+
+ def __init__(self,
+ model: Union[MaskedLanguageModelBase, str],
+ preprocessor: Optional[FillMaskPreprocessor] = None,
+ first_sequence='sentense',
+ **kwargs):
+ """use `model` and `preprocessor` to create a nlp fill mask pipeline for prediction
+
+ Args:
+ model (MaskedLanguageModelBase): a model instance
+ preprocessor (FillMaskPreprocessor): a preprocessor instance
+ """
+ fill_mask_model = model if isinstance(
+ model, MaskedLanguageModelBase) else Model.from_pretrained(model)
+ assert fill_mask_model.config is not None
+
+ if preprocessor is None:
+ preprocessor = FillMaskPreprocessor(
+ fill_mask_model.model_dir,
+ first_sequence=first_sequence,
+ second_sequence=None)
+ fill_mask_model.eval()
+ super().__init__(
+ model=fill_mask_model, preprocessor=preprocessor, **kwargs)
+
+ self.preprocessor = preprocessor
+ self.tokenizer = preprocessor.tokenizer
+ self.mask_id = {'veco': 250001, 'sbert': 103}
+
+ self.rep_map = {
+ 'sbert': {
+ '[unused0]': '',
+ '[PAD]': '',
+ '[unused1]': '',
+ r' +': ' ',
+ '[SEP]': '',
+ '[unused2]': '',
+ '[CLS]': '',
+ '[UNK]': ''
+ },
+ 'veco': {
+ r' +': ' ',
+ '': '',
+ '': '',
+ '': '',
+ '': '',
+ '': ' '
+ }
+ }
+
+ def forward(self, inputs: Dict[str, Any],
+ **forward_params) -> Dict[str, Any]:
+ with torch.no_grad():
+ return super().forward(inputs, **forward_params)
+
+ def postprocess(self, inputs: Dict[str, Tensor]) -> Dict[str, Tensor]:
+ """process the prediction results
+
+ Args:
+ inputs (Dict[str, Any]): _description_
+
+ Returns:
+ Dict[str, str]: the prediction results
+ """
+ import numpy as np
+ logits = inputs['logits'].detach().numpy()
+ input_ids = inputs['input_ids'].detach().numpy()
+ pred_ids = np.argmax(logits, axis=-1)
+ model_type = self.model.config.model_type
+ rst_ids = np.where(input_ids == self.mask_id[model_type], pred_ids,
+ input_ids)
+
+ def rep_tokens(string, rep_map):
+ for k, v in rep_map.items():
+ string = string.replace(k, v)
+ return string.strip()
+
+ pred_strings = []
+ for ids in rst_ids: # batch
+ # TODO vocab size is not stable
+
+ if self.model.config.vocab_size == 21128: # zh bert
+ pred_string = self.tokenizer.convert_ids_to_tokens(ids)
+ pred_string = ''.join(pred_string)
+ else:
+ pred_string = self.tokenizer.decode(ids)
+ pred_string = rep_tokens(pred_string, self.rep_map[model_type])
+ pred_strings.append(pred_string)
+
+ return {'text': pred_strings}
diff --git a/modelscope/pipelines/nlp/nli_pipeline.py b/modelscope/pipelines/nlp/nli_pipeline.py
new file mode 100644
index 00000000..49dc330f
--- /dev/null
+++ b/modelscope/pipelines/nlp/nli_pipeline.py
@@ -0,0 +1,72 @@
+import uuid
+from typing import Any, Dict, Union
+
+import numpy as np
+import torch
+
+from ...metainfo import Pipelines
+from ...models import Model
+from ...models.nlp import SbertForNLI
+from ...preprocessors import NLIPreprocessor
+from ...utils.constant import Tasks
+from ..base import Pipeline
+from ..builder import PIPELINES
+
+__all__ = ['NLIPipeline']
+
+
+@PIPELINES.register_module(Tasks.nli, module_name=Pipelines.nli)
+class NLIPipeline(Pipeline):
+
+ def __init__(self,
+ model: Union[SbertForNLI, str],
+ preprocessor: NLIPreprocessor = None,
+ first_sequence='first_sequence',
+ second_sequence='second_sequence',
+ **kwargs):
+ """use `model` and `preprocessor` to create a nlp text classification pipeline for prediction
+
+ Args:
+ model (SbertForNLI): a model instance
+ preprocessor (NLIPreprocessor): a preprocessor instance
+ """
+ assert isinstance(model, str) or isinstance(model, SbertForNLI), \
+ 'model must be a single str or SbertForNLI'
+ model = model if isinstance(
+ model, SbertForNLI) else Model.from_pretrained(model)
+ if preprocessor is None:
+ preprocessor = NLIPreprocessor(
+ model.model_dir,
+ first_sequence=first_sequence,
+ second_sequence=second_sequence)
+ model.eval()
+ super().__init__(model=model, preprocessor=preprocessor, **kwargs)
+ assert len(model.id2label) > 0
+
+ def forward(self, inputs: Dict[str, Any],
+ **forward_params) -> Dict[str, Any]:
+ with torch.no_grad():
+ return super().forward(inputs, **forward_params)
+
+ def postprocess(self,
+ inputs: Dict[str, Any],
+ topk: int = 5) -> Dict[str, str]:
+ """process the prediction results
+
+ Args:
+ inputs (Dict[str, Any]): _description_
+
+ Returns:
+ Dict[str, str]: the prediction results
+ """
+
+ probs = inputs['probabilities'][0]
+ num_classes = probs.shape[0]
+ topk = min(topk, num_classes)
+ top_indices = np.argpartition(probs, -topk)[-topk:]
+ cls_ids = top_indices[np.argsort(probs[top_indices])]
+ probs = probs[cls_ids].tolist()
+
+ cls_names = [self.model.id2label[cid] for cid in cls_ids]
+
+ return {'scores': probs, 'labels': cls_names}
diff --git a/modelscope/pipelines/nlp/sentence_similarity_pipeline.py b/modelscope/pipelines/nlp/sentence_similarity_pipeline.py
index 1b630c10..f6bcd72e 100644
--- a/modelscope/pipelines/nlp/sentence_similarity_pipeline.py
+++ b/modelscope/pipelines/nlp/sentence_similarity_pipeline.py
@@ -1,11 +1,13 @@
from typing import Any, Dict, Union
import numpy as np
+import torch
-from modelscope.models.nlp import SbertForSentenceSimilarity
-from modelscope.preprocessors import SequenceClassificationPreprocessor
-from modelscope.utils.constant import Tasks
+from ...metainfo import Pipelines
from ...models import Model
+from ...models.nlp import SbertForSentenceSimilarity
+from ...preprocessors import SequenceClassificationPreprocessor
+from ...utils.constant import Tasks
from ..base import Input, Pipeline
from ..builder import PIPELINES
@@ -13,13 +15,14 @@ __all__ = ['SentenceSimilarityPipeline']
@PIPELINES.register_module(
- Tasks.sentence_similarity,
- module_name=r'sbert-base-chinese-sentence-similarity')
+ Tasks.sentence_similarity, module_name=Pipelines.sentence_similarity)
class SentenceSimilarityPipeline(Pipeline):
def __init__(self,
- model: Union[SbertForSentenceSimilarity, str],
+ model: Union[Model, str],
preprocessor: SequenceClassificationPreprocessor = None,
+ first_sequence='first_sequence',
+ second_sequence='second_sequence',
**kwargs):
"""use `model` and `preprocessor` to create a nlp sentence similarity pipeline for prediction
@@ -35,14 +38,21 @@ class SentenceSimilarityPipeline(Pipeline):
if preprocessor is None:
preprocessor = SequenceClassificationPreprocessor(
sc_model.model_dir,
- first_sequence='first_sequence',
- second_sequence='second_sequence')
+ first_sequence=first_sequence,
+ second_sequence=second_sequence)
+ sc_model.eval()
super().__init__(model=sc_model, preprocessor=preprocessor, **kwargs)
assert hasattr(self.model, 'id2label'), \
'id2label map should be initalizaed in init function.'
- def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, str]:
+ def forward(self, inputs: Dict[str, Any],
+ **forward_params) -> Dict[str, Any]:
+ with torch.no_grad():
+ return super().forward(inputs, **forward_params)
+
+ def postprocess(self, inputs: Dict[str, Any],
+ **postprocess_params) -> Dict[str, str]:
"""process the prediction results
Args:
diff --git a/modelscope/pipelines/nlp/sentiment_classification_pipeline.py b/modelscope/pipelines/nlp/sentiment_classification_pipeline.py
new file mode 100644
index 00000000..9291ed44
--- /dev/null
+++ b/modelscope/pipelines/nlp/sentiment_classification_pipeline.py
@@ -0,0 +1,77 @@
+import os
+import uuid
+from typing import Any, Dict, Union
+
+import json
+import numpy as np
+import torch
+
+from ...metainfo import Pipelines
+from ...models import Model
+from ...models.nlp import SbertForSentimentClassification
+from ...preprocessors import SentimentClassificationPreprocessor
+from ...utils.constant import Tasks
+from ..base import Input, Pipeline
+from ..builder import PIPELINES
+
+__all__ = ['SentimentClassificationPipeline']
+
+
+@PIPELINES.register_module(
+ Tasks.sentiment_classification,
+ module_name=Pipelines.sentiment_classification)
+class SentimentClassificationPipeline(Pipeline):
+
+ def __init__(self,
+ model: Union[SbertForSentimentClassification, str],
+ preprocessor: SentimentClassificationPreprocessor = None,
+ first_sequence='first_sequence',
+ second_sequence='second_sequence',
+ **kwargs):
+ """use `model` and `preprocessor` to create a nlp text classification pipeline for prediction
+
+ Args:
+ model (SbertForSentimentClassification): a model instance
+ preprocessor (SentimentClassificationPreprocessor): a preprocessor instance
+ """
+ assert isinstance(model, str) or isinstance(model, SbertForSentimentClassification), \
+ 'model must be a single str or SbertForSentimentClassification'
+ model = model if isinstance(
+ model,
+ SbertForSentimentClassification) else Model.from_pretrained(model)
+ if preprocessor is None:
+ preprocessor = SentimentClassificationPreprocessor(
+ model.model_dir,
+ first_sequence=first_sequence,
+ second_sequence=second_sequence)
+ model.eval()
+ super().__init__(model=model, preprocessor=preprocessor, **kwargs)
+ assert len(model.id2label) > 0
+
+ def forward(self, inputs: Dict[str, Any],
+ **forward_params) -> Dict[str, Any]:
+ with torch.no_grad():
+ return super().forward(inputs, **forward_params)
+
+ def postprocess(self,
+ inputs: Dict[str, Any],
+ topk: int = 5) -> Dict[str, str]:
+ """process the prediction results
+
+ Args:
+ inputs (Dict[str, Any]): _description_
+
+ Returns:
+ Dict[str, str]: the prediction results
+ """
+
+ probs = inputs['probabilities'][0]
+ num_classes = probs.shape[0]
+ topk = min(topk, num_classes)
+ top_indices = np.argpartition(probs, -topk)[-topk:]
+ cls_ids = top_indices[np.argsort(probs[top_indices])]
+ probs = probs[cls_ids].tolist()
+
+ cls_names = [self.model.id2label[cid] for cid in cls_ids]
+
+ return {'scores': probs, 'labels': cls_names}
diff --git a/modelscope/pipelines/nlp/sequence_classification_pipeline.py b/modelscope/pipelines/nlp/sequence_classification_pipeline.py
index 1dbe2efd..43c81d60 100644
--- a/modelscope/pipelines/nlp/sequence_classification_pipeline.py
+++ b/modelscope/pipelines/nlp/sequence_classification_pipeline.py
@@ -2,6 +2,7 @@ from typing import Any, Dict, Union
import numpy as np
+from modelscope.metainfo import Pipelines
from modelscope.models.nlp import BertForSequenceClassification
from modelscope.preprocessors import SequenceClassificationPreprocessor
from modelscope.utils.constant import Tasks
@@ -13,7 +14,7 @@ __all__ = ['SequenceClassificationPipeline']
@PIPELINES.register_module(
- Tasks.text_classification, module_name=r'bert-sentiment-analysis')
+ Tasks.text_classification, module_name=Pipelines.sentiment_analysis)
class SequenceClassificationPipeline(Pipeline):
def __init__(self,
diff --git a/modelscope/pipelines/nlp/space/dialog_state_tracking.py b/modelscope/pipelines/nlp/space/dialog_state_tracking.py
deleted file mode 100644
index 4a943095..00000000
--- a/modelscope/pipelines/nlp/space/dialog_state_tracking.py
+++ /dev/null
@@ -1,46 +0,0 @@
-from typing import Any, Dict, Optional
-
-from modelscope.models.nlp import DialogModelingModel
-from modelscope.preprocessors import DialogModelingPreprocessor
-from modelscope.utils.constant import Tasks
-from ...base import Pipeline, Tensor
-from ...builder import PIPELINES
-
-__all__ = ['DialogStateTrackingPipeline']
-
-
-@PIPELINES.register_module(
- Tasks.dialog_state_tracking, module_name=r'space-dst')
-class DialogStateTrackingPipeline(Pipeline):
-
- def __init__(self, model: DialogModelingModel,
- preprocessor: DialogModelingPreprocessor, **kwargs):
- """use `model` and `preprocessor` to create a nlp text classification pipeline for prediction
-
- Args:
- model (SequenceClassificationModel): a model instance
- preprocessor (SequenceClassificationPreprocessor): a preprocessor instance
- """
-
- super().__init__(model=model, preprocessor=preprocessor, **kwargs)
- self.model = model
- self.preprocessor = preprocessor
-
- def postprocess(self, inputs: Dict[str, Tensor]) -> Dict[str, str]:
- """process the prediction results
-
- Args:
- inputs (Dict[str, Any]): _description_
-
- Returns:
- Dict[str, str]: the prediction results
- """
- sys_rsp = self.preprocessor.text_field.tokenizer.convert_ids_to_tokens(
- inputs['resp'])
- assert len(sys_rsp) > 2
- sys_rsp = sys_rsp[1:len(sys_rsp) - 1]
- # sys_rsp = self.preprocessor.text_field.tokenizer.
-
- inputs['sys'] = sys_rsp
-
- return inputs
diff --git a/modelscope/pipelines/nlp/text_generation_pipeline.py b/modelscope/pipelines/nlp/text_generation_pipeline.py
index 881e7ea6..8f55cce0 100644
--- a/modelscope/pipelines/nlp/text_generation_pipeline.py
+++ b/modelscope/pipelines/nlp/text_generation_pipeline.py
@@ -1,16 +1,20 @@
-from typing import Dict, Optional, Union
+from typing import Any, Dict, Optional, Union
-from modelscope.models import Model
-from modelscope.models.nlp import PalmForTextGeneration
-from modelscope.preprocessors import TextGenerationPreprocessor
-from modelscope.utils.constant import Tasks
+import torch
+
+from ...metainfo import Pipelines
+from ...models import Model
+from ...models.nlp import PalmForTextGeneration
+from ...preprocessors import TextGenerationPreprocessor
+from ...utils.constant import Tasks
from ..base import Pipeline, Tensor
from ..builder import PIPELINES
__all__ = ['TextGenerationPipeline']
-@PIPELINES.register_module(Tasks.text_generation, module_name=r'palm2.0')
+@PIPELINES.register_module(
+ Tasks.text_generation, module_name=Pipelines.text_generation)
class TextGenerationPipeline(Pipeline):
def __init__(self,
@@ -31,10 +35,17 @@ class TextGenerationPipeline(Pipeline):
model.tokenizer,
first_sequence='sentence',
second_sequence=None)
+ model.eval()
super().__init__(model=model, preprocessor=preprocessor, **kwargs)
self.tokenizer = model.tokenizer
- def postprocess(self, inputs: Dict[str, Tensor]) -> Dict[str, str]:
+ def forward(self, inputs: Dict[str, Any],
+ **forward_params) -> Dict[str, Any]:
+ with torch.no_grad():
+ return super().forward(inputs, **forward_params)
+
+ def postprocess(self, inputs: Dict[str, Tensor],
+ **postprocess_params) -> Dict[str, str]:
"""process the prediction results
Args:
diff --git a/modelscope/pipelines/nlp/word_segmentation_pipeline.py b/modelscope/pipelines/nlp/word_segmentation_pipeline.py
index 1cc08a38..9501efb7 100644
--- a/modelscope/pipelines/nlp/word_segmentation_pipeline.py
+++ b/modelscope/pipelines/nlp/word_segmentation_pipeline.py
@@ -1,9 +1,12 @@
from typing import Any, Dict, Optional, Union
-from modelscope.models import Model
-from modelscope.models.nlp import StructBertForTokenClassification
-from modelscope.preprocessors import TokenClassifcationPreprocessor
-from modelscope.utils.constant import Tasks
+import torch
+
+from ...metainfo import Pipelines
+from ...models import Model
+from ...models.nlp import SbertForTokenClassification
+from ...preprocessors import TokenClassifcationPreprocessor
+from ...utils.constant import Tasks
from ..base import Pipeline, Tensor
from ..builder import PIPELINES
@@ -11,12 +14,11 @@ __all__ = ['WordSegmentationPipeline']
@PIPELINES.register_module(
- Tasks.word_segmentation,
- module_name=r'structbert-chinese-word-segmentation')
+ Tasks.word_segmentation, module_name=Pipelines.word_segmentation)
class WordSegmentationPipeline(Pipeline):
def __init__(self,
- model: Union[StructBertForTokenClassification, str],
+ model: Union[SbertForTokenClassification, str],
preprocessor: Optional[TokenClassifcationPreprocessor] = None,
**kwargs):
"""use `model` and `preprocessor` to create a nlp word segmentation pipeline for prediction
@@ -27,15 +29,23 @@ class WordSegmentationPipeline(Pipeline):
"""
model = model if isinstance(
model,
- StructBertForTokenClassification) else Model.from_pretrained(model)
+ SbertForTokenClassification) else Model.from_pretrained(model)
if preprocessor is None:
preprocessor = TokenClassifcationPreprocessor(model.model_dir)
+ model.eval()
super().__init__(model=model, preprocessor=preprocessor, **kwargs)
self.tokenizer = preprocessor.tokenizer
self.config = model.config
+ assert len(self.config.id2label) > 0
self.id2label = self.config.id2label
- def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, str]:
+ def forward(self, inputs: Dict[str, Any],
+ **forward_params) -> Dict[str, Any]:
+ with torch.no_grad():
+ return super().forward(inputs, **forward_params)
+
+ def postprocess(self, inputs: Dict[str, Any],
+ **postprocess_params) -> Dict[str, str]:
"""process the prediction results
Args:
diff --git a/modelscope/pipelines/outputs.py b/modelscope/pipelines/outputs.py
index 15d8a995..a950fa69 100644
--- a/modelscope/pipelines/outputs.py
+++ b/modelscope/pipelines/outputs.py
@@ -45,6 +45,12 @@ TASK_OUTPUTS = {
Tasks.image_matting: ['output_png'],
Tasks.image_generation: ['output_png'],
+ # action recognition result for single video
+ # {
+ # "output_label": "abseiling"
+ # }
+ Tasks.action_recognition: ['output_label'],
+
# pose estimation result for single sample
# {
# "poses": np.array with shape [num_pose, num_keypoint, 3],
@@ -54,6 +60,13 @@ TASK_OUTPUTS = {
# }
Tasks.pose_estimation: ['poses', 'boxes'],
+ # ocr detection result for single sample
+ # {
+ # "det_polygons": np.array with shape [num_text, 8], each box is
+ # [x1, y1, x2, y2, x3, y3, x4, y4]
+ # }
+ Tasks.ocr_detection: ['det_polygons'],
+
# ============ nlp tasks ===================
# text classification result for single sample
@@ -69,6 +82,12 @@ TASK_OUTPUTS = {
# }
Tasks.text_generation: ['text'],
+ # fill mask result for single sample
+ # {
+ # "text": "this is the text which masks filled by model."
+ # }
+ Tasks.fill_mask: ['text'],
+
# word segmentation result for single sample
# {
# "output": "今天 天气 不错 , 适合 出去 游玩"
@@ -82,6 +101,20 @@ TASK_OUTPUTS = {
# }
Tasks.sentence_similarity: ['scores', 'labels'],
+ # sentiment classification result for single sample
+ # {
+ # "labels": ["happy", "sad", "calm", "angry"],
+ # "scores": [0.9, 0.1, 0.05, 0.05]
+ # }
+ Tasks.sentiment_classification: ['scores', 'labels'],
+
+ # nli result for single sample
+ # {
+ # "labels": ["happy", "sad", "calm", "angry"],
+ # "scores": [0.9, 0.1, 0.05, 0.05]
+ # }
+ Tasks.nli: ['scores', 'labels'],
+
# ============ audio tasks ===================
# audio processed for single file in PCM format
diff --git a/modelscope/pipelines/util.py b/modelscope/pipelines/util.py
index 37c9c929..d034a7d4 100644
--- a/modelscope/pipelines/util.py
+++ b/modelscope/pipelines/util.py
@@ -2,8 +2,8 @@
import os.path as osp
from typing import List, Union
-from maas_hub.file_download import model_file_download
-
+from modelscope.hub.api import HubApi
+from modelscope.hub.file_download import model_file_download
from modelscope.utils.config import Config
from modelscope.utils.constant import ModelFile
from modelscope.utils.logger import get_logger
@@ -20,31 +20,63 @@ def is_config_has_model(cfg_file):
return False
-def is_model_name(model: Union[str, List]):
- """ whether model is a valid modelhub path
+def is_official_hub_path(path: Union[str, List]):
+ """ Whether path is a official hub name or a valid local
+ path to official hub directory.
+ """
+
+ def is_official_hub_impl(path):
+ if osp.exists(path):
+ cfg_file = osp.join(path, ModelFile.CONFIGURATION)
+ return osp.exists(cfg_file)
+ else:
+ try:
+ _ = HubApi().get_model(path)
+ return True
+ except Exception:
+ return False
+
+ if isinstance(path, str):
+ return is_official_hub_impl(path)
+ else:
+ results = [is_official_hub_impl(m) for m in path]
+ all_true = all(results)
+ any_true = any(results)
+ if any_true and not all_true:
+ raise ValueError(
+ f'some model are hub address, some are not, model list: {path}'
+ )
+
+ return all_true
+
+
+def is_model(path: Union[str, List]):
+ """ whether path is a valid modelhub path and containing model config
"""
- def is_model_name_impl(model):
- if osp.exists(model):
- cfg_file = osp.join(model, ModelFile.CONFIGURATION)
+ def is_modelhub_path_impl(path):
+ if osp.exists(path):
+ cfg_file = osp.join(path, ModelFile.CONFIGURATION)
if osp.exists(cfg_file):
return is_config_has_model(cfg_file)
else:
return False
else:
try:
- cfg_file = model_file_download(model, ModelFile.CONFIGURATION)
+ cfg_file = model_file_download(path, ModelFile.CONFIGURATION)
return is_config_has_model(cfg_file)
except Exception:
return False
- if isinstance(model, str):
- return is_model_name_impl(model)
+ if isinstance(path, str):
+ return is_modelhub_path_impl(path)
else:
- results = [is_model_name_impl(m) for m in model]
+ results = [is_modelhub_path_impl(m) for m in path]
all_true = all(results)
any_true = any(results)
if any_true and not all_true:
- raise ValueError('some model are hub address, some are not')
+ raise ValueError(
+ f'some models are hub address, some are not, model list: {path}'
+ )
return all_true
diff --git a/modelscope/preprocessors/__init__.py b/modelscope/preprocessors/__init__.py
index 7b67507a..fe68173a 100644
--- a/modelscope/preprocessors/__init__.py
+++ b/modelscope/preprocessors/__init__.py
@@ -1,12 +1,12 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
-from .audio import LinearAECAndFbank
+# from .audio import LinearAECAndFbank
from .base import Preprocessor
-from .builder import PREPROCESSORS, build_preprocessor
+# from .builder import PREPROCESSORS, build_preprocessor
from .common import Compose
-from .image import LoadImage, load_image
from .nlp import * # noqa F403
from .space.dialog_intent_prediction_preprocessor import * # noqa F403
from .space.dialog_modeling_preprocessor import * # noqa F403
from .space.dialog_state_tracking_preprocessor import * # noqa F403
-from .text_to_speech import * # noqa F403
+
+# from .text_to_speech import * # noqa F403
diff --git a/modelscope/preprocessors/image.py b/modelscope/preprocessors/image.py
index 6bd8aed5..b2123fb7 100644
--- a/modelscope/preprocessors/image.py
+++ b/modelscope/preprocessors/image.py
@@ -5,11 +5,12 @@ from typing import Dict, Union
from PIL import Image, ImageOps
from modelscope.fileio import File
+from modelscope.metainfo import Preprocessors
from modelscope.utils.constant import Fields
from .builder import PREPROCESSORS
-@PREPROCESSORS.register_module(Fields.cv)
+@PREPROCESSORS.register_module(Fields.cv, Preprocessors.load_image)
class LoadImage:
"""Load an image from file or url.
Added or updated keys are "filename", "img", "img_shape",
diff --git a/modelscope/pipelines/multi_modal/image_caption_pipeline.py b/modelscope/preprocessors/multi_modal.py
similarity index 55%
rename from modelscope/pipelines/multi_modal/image_caption_pipeline.py
rename to modelscope/preprocessors/multi_modal.py
index 3e5f49d0..7c8f0fab 100644
--- a/modelscope/pipelines/multi_modal/image_caption_pipeline.py
+++ b/modelscope/preprocessors/multi_modal.py
@@ -1,32 +1,48 @@
-from typing import Any, Dict
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import os.path as osp
+from typing import Any, Dict, Union
import numpy as np
import torch
from PIL import Image
-from modelscope.pipelines.base import Input
-from modelscope.preprocessors import load_image
-from modelscope.utils.constant import Tasks
-from modelscope.utils.logger import get_logger
-from ..base import Pipeline
-from ..builder import PIPELINES
+from modelscope.hub.snapshot_download import snapshot_download
+from modelscope.metainfo import Preprocessors
+from modelscope.utils.constant import Fields, ModelFile
+from modelscope.utils.type_assert import type_assert
+from .base import Preprocessor
+from .builder import PREPROCESSORS
+from .image import load_image
-logger = get_logger()
+__all__ = [
+ 'OfaImageCaptionPreprocessor',
+]
-@PIPELINES.register_module(Tasks.image_captioning, module_name='ofa')
-class ImageCaptionPipeline(Pipeline):
- # TODO: refine using modelhub
- def __init__(self, model: str, bpe_dir: str):
- super().__init__()
- # turn on cuda if GPU is available
+@PREPROCESSORS.register_module(
+ Fields.multi_modal, module_name=Preprocessors.ofa_image_caption)
+class OfaImageCaptionPreprocessor(Preprocessor):
+
+ def __init__(self, model_dir: str, *args, **kwargs):
+ """preprocess the data via the vocab.txt from the `model_dir` path
+
+ Args:
+ model_dir (str): model path
+ """
+ super().__init__(*args, **kwargs)
+
+ if osp.exists(model_dir):
+ local_model_dir = model_dir
+ else:
+ local_model_dir = snapshot_download(model_dir)
+ local_model = osp.join(local_model_dir, ModelFile.TORCH_MODEL_FILE)
+ bpe_dir = local_model_dir
+
from fairseq import checkpoint_utils, tasks, utils
from ofa.tasks.mm_tasks import CaptionTask
tasks.register_task('caption', CaptionTask)
- use_cuda = False
- # use fp16 only when GPU is available
- use_fp16 = False
+
overrides = {
'bpe_dir': bpe_dir,
'eval_cider': False,
@@ -35,21 +51,9 @@ class ImageCaptionPipeline(Pipeline):
'no_repeat_ngram_size': 3,
'seed': 7
}
- models, cfg, task = checkpoint_utils.load_model_ensemble_and_task(
- utils.split_paths(model), arg_overrides=overrides)
-
- # Move models to GPU
- for model in models:
- model.eval()
- if use_cuda:
- model.cuda()
- if use_fp16:
- model.half()
- model.prepare_for_inference_(cfg)
- self.models = models
- # Initialize generator
- self.generator = task.build_generator(models, cfg.generation)
-
+ model, cfg, task = checkpoint_utils.load_model_ensemble_and_task(
+ utils.split_paths(local_model), arg_overrides=overrides)
+ del model
# Initialize transform
from torchvision import transforms
mean = [0.5, 0.5, 0.5]
@@ -69,7 +73,8 @@ class ImageCaptionPipeline(Pipeline):
self.eos_item = torch.LongTensor([task.src_dict.eos()])
self.pad_idx = task.src_dict.pad()
- def preprocess(self, input: Input) -> Dict[str, Any]:
+ @type_assert(object, (str, tuple, Image.Image))
+ def __call__(self, data: Union[str, tuple]) -> Dict[str, Any]:
def encode_text(text, length=None, append_bos=False, append_eos=False):
s = self.task.tgt_dict.encode_line(
@@ -84,11 +89,11 @@ class ImageCaptionPipeline(Pipeline):
s = torch.cat([s, self.eos_item])
return s
- if isinstance(input, Image.Image):
- patch_image = self.patch_resize_transform(input).unsqueeze(0)
+ if isinstance(data, Image.Image):
+ patch_image = self.patch_resize_transform(data).unsqueeze(0)
else:
patch_image = self.patch_resize_transform(
- load_image(input)).unsqueeze(0)
+ load_image(data)).unsqueeze(0)
patch_mask = torch.tensor([True])
text = 'what does the image describe?'
src_text = encode_text(
@@ -105,17 +110,3 @@ class ImageCaptionPipeline(Pipeline):
}
}
return sample
-
- def forward(self, input: Dict[str, Any]) -> Dict[str, Any]:
- from ofa.utils.eval_utils import eval_caption
-
- results, _ = eval_caption(self.task, self.generator, self.models,
- input)
- return {
- 'image_id': results[0]['image_id'],
- 'caption': results[0]['caption']
- }
-
- def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, Any]:
- # What should we do here ?
- return inputs
diff --git a/modelscope/preprocessors/nlp.py b/modelscope/preprocessors/nlp.py
index 9bcaa87c..f998da37 100644
--- a/modelscope/preprocessors/nlp.py
+++ b/modelscope/preprocessors/nlp.py
@@ -5,14 +5,17 @@ from typing import Any, Dict, Union
from transformers import AutoTokenizer
-from modelscope.utils.constant import Fields, InputFields
-from modelscope.utils.type_assert import type_assert
+from ..metainfo import Models, Preprocessors
+from ..utils.constant import Fields, InputFields
+from ..utils.type_assert import type_assert
from .base import Preprocessor
from .builder import PREPROCESSORS
__all__ = [
'Tokenize', 'SequenceClassificationPreprocessor',
- 'TextGenerationPreprocessor', 'TokenClassifcationPreprocessor'
+ 'TextGenerationPreprocessor', 'TokenClassifcationPreprocessor',
+ 'NLIPreprocessor', 'SentimentClassificationPreprocessor',
+ 'FillMaskPreprocessor'
]
@@ -31,7 +34,141 @@ class Tokenize(Preprocessor):
@PREPROCESSORS.register_module(
- Fields.nlp, module_name=r'bert-sequence-classification')
+ Fields.nlp, module_name=Preprocessors.nli_tokenizer)
+class NLIPreprocessor(Preprocessor):
+
+ def __init__(self, model_dir: str, *args, **kwargs):
+ """preprocess the data via the vocab.txt from the `model_dir` path
+
+ Args:
+ model_dir (str): model path
+ """
+
+ super().__init__(*args, **kwargs)
+
+ from sofa import SbertTokenizer
+ self.model_dir: str = model_dir
+ self.first_sequence: str = kwargs.pop('first_sequence',
+ 'first_sequence')
+ self.second_sequence = kwargs.pop('second_sequence', 'second_sequence')
+ self.sequence_length = kwargs.pop('sequence_length', 128)
+
+ self.tokenizer = SbertTokenizer.from_pretrained(self.model_dir)
+
+ @type_assert(object, tuple)
+ def __call__(self, data: tuple) -> Dict[str, Any]:
+ """process the raw input data
+
+ Args:
+ data (tuple): [sentence1, sentence2]
+ sentence1 (str): a sentence
+ Example:
+ 'you are so handsome.'
+ sentence2 (str): a sentence
+ Example:
+ 'you are so beautiful.'
+ Returns:
+ Dict[str, Any]: the preprocessed data
+ """
+ sentence1, sentence2 = data
+ new_data = {
+ self.first_sequence: sentence1,
+ self.second_sequence: sentence2
+ }
+ # preprocess the data for the model input
+
+ rst = {
+ 'id': [],
+ 'input_ids': [],
+ 'attention_mask': [],
+ 'token_type_ids': []
+ }
+
+ max_seq_length = self.sequence_length
+
+ text_a = new_data[self.first_sequence]
+ text_b = new_data[self.second_sequence]
+ feature = self.tokenizer(
+ text_a,
+ text_b,
+ padding=False,
+ truncation=True,
+ max_length=max_seq_length)
+
+ rst['id'].append(new_data.get('id', str(uuid.uuid4())))
+ rst['input_ids'].append(feature['input_ids'])
+ rst['attention_mask'].append(feature['attention_mask'])
+ rst['token_type_ids'].append(feature['token_type_ids'])
+
+ return rst
+
+
+@PREPROCESSORS.register_module(
+ Fields.nlp, module_name=Preprocessors.sen_cls_tokenizer)
+class SentimentClassificationPreprocessor(Preprocessor):
+
+ def __init__(self, model_dir: str, *args, **kwargs):
+ """preprocess the data via the vocab.txt from the `model_dir` path
+
+ Args:
+ model_dir (str): model path
+ """
+
+ super().__init__(*args, **kwargs)
+
+ from sofa import SbertTokenizer
+ self.model_dir: str = model_dir
+ self.first_sequence: str = kwargs.pop('first_sequence',
+ 'first_sequence')
+ self.second_sequence = kwargs.pop('second_sequence', 'second_sequence')
+ self.sequence_length = kwargs.pop('sequence_length', 128)
+
+ self.tokenizer = SbertTokenizer.from_pretrained(self.model_dir)
+
+ @type_assert(object, str)
+ def __call__(self, data: str) -> Dict[str, Any]:
+ """process the raw input data
+
+ Args:
+ data (str): a sentence
+ Example:
+ 'you are so handsome.'
+ Returns:
+ Dict[str, Any]: the preprocessed data
+ """
+
+ new_data = {self.first_sequence: data}
+ # preprocess the data for the model input
+
+ rst = {
+ 'id': [],
+ 'input_ids': [],
+ 'attention_mask': [],
+ 'token_type_ids': []
+ }
+
+ max_seq_length = self.sequence_length
+
+ text_a = new_data[self.first_sequence]
+
+ text_b = new_data.get(self.second_sequence, None)
+ feature = self.tokenizer(
+ text_a,
+ text_b,
+ padding='max_length',
+ truncation=True,
+ max_length=max_seq_length)
+
+ rst['id'].append(new_data.get('id', str(uuid.uuid4())))
+ rst['input_ids'].append(feature['input_ids'])
+ rst['attention_mask'].append(feature['attention_mask'])
+ rst['token_type_ids'].append(feature['token_type_ids'])
+
+ return rst
+
+
+@PREPROCESSORS.register_module(
+ Fields.nlp, module_name=Preprocessors.bert_seq_cls_tokenizer)
class SequenceClassificationPreprocessor(Preprocessor):
def __init__(self, model_dir: str, *args, **kwargs):
@@ -53,12 +190,12 @@ class SequenceClassificationPreprocessor(Preprocessor):
self.tokenizer = AutoTokenizer.from_pretrained(self.model_dir)
print(f'this is the tokenzier {self.tokenizer}')
- @type_assert(object, (str, tuple))
- def __call__(self, data: Union[str, tuple]) -> Dict[str, Any]:
+ @type_assert(object, (str, tuple, Dict))
+ def __call__(self, data: Union[str, tuple, Dict]) -> Dict[str, Any]:
"""process the raw input data
Args:
- data (str or tuple):
+ data (str or tuple, Dict):
sentence1 (str): a sentence
Example:
'you are so handsome.'
@@ -70,22 +207,31 @@ class SequenceClassificationPreprocessor(Preprocessor):
sentence2 (str): a sentence
Example:
'you are so beautiful.'
+ or
+ {field1: field_value1, field2: field_value2}
+ field1 (str): field name, default 'first_sequence'
+ field_value1 (str): a sentence
+ Example:
+ 'you are so handsome.'
+
+ field2 (str): field name, default 'second_sequence'
+ field_value2 (str): a sentence
+ Example:
+ 'you are so beautiful.'
Returns:
Dict[str, Any]: the preprocessed data
"""
-
- if not isinstance(data, tuple):
- data = (
- data,
- None,
- )
-
- sentence1, sentence2 = data
- new_data = {
- self.first_sequence: sentence1,
- self.second_sequence: sentence2
- }
+ if isinstance(data, str):
+ new_data = {self.first_sequence: data}
+ elif isinstance(data, tuple):
+ sentence1, sentence2 = data
+ new_data = {
+ self.first_sequence: sentence1,
+ self.second_sequence: sentence2
+ }
+ else:
+ new_data = data
# preprocess the data for the model input
@@ -115,7 +261,8 @@ class SequenceClassificationPreprocessor(Preprocessor):
return rst
-@PREPROCESSORS.register_module(Fields.nlp, module_name=r'palm2.0')
+@PREPROCESSORS.register_module(
+ Fields.nlp, module_name=Preprocessors.palm_text_gen_tokenizer)
class TextGenerationPreprocessor(Preprocessor):
def __init__(self, model_dir: str, tokenizer, *args, **kwargs):
@@ -166,12 +313,66 @@ class TextGenerationPreprocessor(Preprocessor):
rst['input_ids'].append(feature['input_ids'])
rst['attention_mask'].append(feature['attention_mask'])
+ return {k: torch.tensor(v) for k, v in rst.items()}
+
+
+@PREPROCESSORS.register_module(Fields.nlp)
+class FillMaskPreprocessor(Preprocessor):
+
+ def __init__(self, model_dir: str, *args, **kwargs):
+ """preprocess the data via the vocab.txt from the `model_dir` path
+
+ Args:
+ model_dir (str): model path
+ """
+ super().__init__(*args, **kwargs)
+ from sofa.utils.backend import AutoTokenizer
+ self.model_dir = model_dir
+ self.first_sequence: str = kwargs.pop('first_sequence',
+ 'first_sequence')
+ self.sequence_length = kwargs.pop('sequence_length', 128)
+
+ self.tokenizer = AutoTokenizer.from_pretrained(
+ model_dir, use_fast=False)
+
+ @type_assert(object, str)
+ def __call__(self, data: str) -> Dict[str, Any]:
+ """process the raw input data
+
+ Args:
+ data (str): a sentence
+ Example:
+ 'you are so handsome.'
+
+ Returns:
+ Dict[str, Any]: the preprocessed data
+ """
+ import torch
+
+ new_data = {self.first_sequence: data}
+ # preprocess the data for the model input
+
+ rst = {'input_ids': [], 'attention_mask': [], 'token_type_ids': []}
+
+ max_seq_length = self.sequence_length
+
+ text_a = new_data[self.first_sequence]
+ feature = self.tokenizer(
+ text_a,
+ padding='max_length',
+ truncation=True,
+ max_length=max_seq_length,
+ return_token_type_ids=True)
+
+ rst['input_ids'].append(feature['input_ids'])
+ rst['attention_mask'].append(feature['attention_mask'])
+ rst['token_type_ids'].append(feature['token_type_ids'])
return {k: torch.tensor(v) for k, v in rst.items()}
@PREPROCESSORS.register_module(
- Fields.nlp, module_name=r'bert-token-classification')
+ Fields.nlp, module_name=Preprocessors.token_cls_tokenizer)
class TokenClassifcationPreprocessor(Preprocessor):
def __init__(self, model_dir: str, *args, **kwargs):
@@ -199,6 +400,7 @@ class TokenClassifcationPreprocessor(Preprocessor):
Returns:
Dict[str, Any]: the preprocessed data
"""
+
# preprocess the data for the model input
text = data.replace(' ', '').strip()
diff --git a/modelscope/preprocessors/space/dialog_intent_prediction_preprocessor.py b/modelscope/preprocessors/space/dialog_intent_prediction_preprocessor.py
index c5a6b34c..733abf24 100644
--- a/modelscope/preprocessors/space/dialog_intent_prediction_preprocessor.py
+++ b/modelscope/preprocessors/space/dialog_intent_prediction_preprocessor.py
@@ -3,13 +3,12 @@
import os
from typing import Any, Dict
-from modelscope.preprocessors.space.fields.intent_field import \
- IntentBPETextField
-from modelscope.utils.config import Config
-from modelscope.utils.constant import Fields
-from modelscope.utils.type_assert import type_assert
+from ...utils.config import Config
+from ...utils.constant import Fields
+from ...utils.type_assert import type_assert
from ..base import Preprocessor
from ..builder import PREPROCESSORS
+from .fields.intent_field import IntentBPETextField
__all__ = ['DialogIntentPredictionPreprocessor']
diff --git a/modelscope/preprocessors/space/dialog_modeling_preprocessor.py b/modelscope/preprocessors/space/dialog_modeling_preprocessor.py
index 5061ba35..b0758b40 100644
--- a/modelscope/preprocessors/space/dialog_modeling_preprocessor.py
+++ b/modelscope/preprocessors/space/dialog_modeling_preprocessor.py
@@ -1,16 +1,14 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
import os
-import uuid
-from typing import Any, Dict, Union
-
-from modelscope.preprocessors.space.fields.gen_field import \
- MultiWOZBPETextField
-from modelscope.utils.config import Config
-from modelscope.utils.constant import Fields, InputFields
-from modelscope.utils.type_assert import type_assert
+from typing import Any, Dict
+
+from ...utils.config import Config
+from ...utils.constant import Fields
+from ...utils.type_assert import type_assert
from ..base import Preprocessor
from ..builder import PREPROCESSORS
+from .fields.gen_field import MultiWOZBPETextField
__all__ = ['DialogModelingPreprocessor']
diff --git a/modelscope/preprocessors/space/fields/dst_processors.py b/modelscope/preprocessors/space/fields/dst_processors.py
index 6d888bff..c5c81f66 100644
--- a/modelscope/preprocessors/space/fields/dst_processors.py
+++ b/modelscope/preprocessors/space/fields/dst_processors.py
@@ -154,14 +154,16 @@ utter3 = {
'User-2':
'I am looking for an expensive indian restaurant in the area of centre.',
'System-2':
- 'Might I recommend Saffron Brasserie? That is an expensive Indian restaurant in the center of town. I can book a table for you, if you like.',
+ 'Might I recommend Saffron Brasserie? That is an expensive Indian restaurant '
+ 'in the center of town. I can book a table for you, if you like.',
'Dialog_Act-2': {
'Restaurant-Recommend': [['area', 'center of town'],
['food', 'Indian'],
['name', 'Saffron Brasserie'],
['pricerange', 'expensive']]
},
- 'User-3': 'Sure thing, please book for 6 people at 19:30 on Saturday.'
+ 'User-3':
+ 'Sure thing, please book for 6 people at 19:30 on Saturday.'
}
history_states3 = [{}, {
@@ -346,7 +348,6 @@ history_states3 = [{}, {
class DSTProcessor(object):
-
ACTS_DICT = {
'taxi-depart': 'taxi-departure',
'taxi-dest': 'taxi-destination',
@@ -380,7 +381,8 @@ class DSTProcessor(object):
def _convert_inputs_to_utterances(self, inputs: dict,
history_states: list):
- """This method is to generate the utterances with user, sys, dialog_acts and metadata, while metadata is from the history_states or the output from the inference pipline"""
+ """This method is to generate the utterances with user, sys, dialog_acts and metadata,
+ while metadata is from the history_states or the output from the inference pipline"""
utterances = []
user_inputs = []
@@ -427,8 +429,8 @@ class DSTProcessor(object):
if isinstance(item, dict):
for a in item:
aa = a.lower().split('-')
- if aa[1] == 'inform' or aa[1] == 'recommend' or aa[
- 1] == 'select' or aa[1] == 'book':
+ if aa[1] == 'inform' or aa[1] == 'recommend' or \
+ aa[1] == 'select' or aa[1] == 'book':
for i in item[a]:
s = i[0].lower()
v = i[1].lower().strip()
@@ -443,7 +445,7 @@ class DSTProcessor(object):
if key not in s_dict:
s_dict[key] = list([v])
# ... Option 2: Keep last informed value
- #s_dict[key] = list([v])
+ # s_dict[key] = list([v])
return s_dict
@@ -454,26 +456,26 @@ class multiwoz22Processor(DSTProcessor):
super().__init__()
def normalize_time(self, text):
- text = re.sub('(\d{1})(a\.?m\.?|p\.?m\.?)', r'\1 \2',
+ text = re.sub(r'(\d{1})(a\.?m\.?|p\.?m\.?)', r'\1 \2',
text) # am/pm without space
- text = re.sub('(^| )(\d{1,2}) (a\.?m\.?|p\.?m\.?)', r'\1\2:00 \3',
+ text = re.sub(r'(^| )(\d{1,2}) (a\.?m\.?|p\.?m\.?)', r'\1\2:00 \3',
text) # am/pm short to long form
text = re.sub(
- '(^| )(at|from|by|until|after) ?(\d{1,2}) ?(\d{2})([^0-9]|$)',
+ r'(^| )(at|from|by|until|after) ?(\d{1,2}) ?(\d{2})([^0-9]|$)',
r'\1\2 \3:\4\5', text) # Missing separator
- text = re.sub('(^| )(\d{2})[;.,](\d{2})', r'\1\2:\3',
+ text = re.sub(r'(^| )(\d{2})[;.,](\d{2})', r'\1\2:\3',
text) # Wrong separator
- text = re.sub('(^| )(at|from|by|until|after) ?(\d{1,2})([;., ]|$)',
+ text = re.sub(r'(^| )(at|from|by|until|after) ?(\d{1,2})([;., ]|$)',
r'\1\2 \3:00\4', text) # normalize simple full hour time
- text = re.sub('(^| )(\d{1}:\d{2})', r'\g<1>0\2',
+ text = re.sub(r'(^| )(\d{1}:\d{2})', r'\g<1>0\2',
text) # Add missing leading 0
# Map 12 hour times to 24 hour times
- text = re.sub(
- '(\d{2})(:\d{2}) ?p\.?m\.?', lambda x: str(
- int(x.groups()[0]) + 12
- if int(x.groups()[0]) < 12 else int(x.groups()[0])) + x.groups(
- )[1], text)
- text = re.sub('(^| )24:(\d{2})', r'\g<1>00:\2',
+ text = \
+ re.sub(
+ r'(\d{2})(:\d{2}) ?p\.?m\.?',
+ lambda x: str(int(x.groups()[0]) + 12
+ if int(x.groups()[0]) < 12 else int(x.groups()[0])) + x.groups()[1], text)
+ text = re.sub(r'(^| )24:(\d{2})', r'\g<1>00:\2',
text) # Correct times that use 24 as hour
return text
@@ -508,8 +510,8 @@ class multiwoz22Processor(DSTProcessor):
if isinstance(acts[d][t]['dialog_act'], dict):
for a in acts[d][t]['dialog_act']:
aa = a.lower().split('-')
- if aa[1] == 'inform' or aa[1] == 'recommend' or aa[
- 1] == 'select' or aa[1] == 'book':
+ if aa[1] == 'inform' or aa[1] == 'recommend' \
+ or aa[1] == 'select' or aa[1] == 'book':
for i in acts[d][t]['dialog_act'][a]:
s = i[0].lower()
v = i[1].lower().strip()
@@ -524,7 +526,7 @@ class multiwoz22Processor(DSTProcessor):
if key not in s_dict:
s_dict[key] = list([v])
# ... Option 2: Keep last informed value
- #s_dict[key] = list([v])
+ # s_dict[key] = list([v])
return s_dict
# This should only contain label normalizations. All other mappings should
@@ -560,7 +562,7 @@ class multiwoz22Processor(DSTProcessor):
utt_lower = convert_to_unicode(utt).lower()
utt_lower = self.normalize_text(utt_lower)
utt_tok = [
- tok for tok in map(str.strip, re.split('(\W+)', utt_lower))
+ tok for tok in map(str.strip, re.split(r'(\W+)', utt_lower))
if len(tok) > 0
]
return utt_tok
@@ -582,7 +584,7 @@ class multiwoz22Processor(DSTProcessor):
find_pos = []
found = False
label_list = [
- item for item in map(str.strip, re.split('(\W+)', value_label))
+ item for item in map(str.strip, re.split(r'(\W+)', value_label))
if len(item) > 0
]
len_label = len(label_list)
@@ -633,11 +635,11 @@ class multiwoz22Processor(DSTProcessor):
def is_in_list(self, tok, value):
found = False
tok_list = [
- item for item in map(str.strip, re.split('(\W+)', tok))
+ item for item in map(str.strip, re.split(r'(\W+)', tok))
if len(item) > 0
]
value_list = [
- item for item in map(str.strip, re.split('(\W+)', value))
+ item for item in map(str.strip, re.split(r'(\W+)', value))
if len(item) > 0
]
tok_len = len(tok_list)
@@ -938,8 +940,8 @@ class multiwoz22Processor(DSTProcessor):
if slot not in diag_seen_slots_dict or value_label != diag_seen_slots_value_dict[
slot]:
print('(%s): %s, ' % (slot, value_label), end='')
- elif slot in diag_seen_slots_dict and class_type == diag_seen_slots_dict[
- slot] and class_type != 'copy_value' and class_type != 'inform':
+ elif slot in diag_seen_slots_dict and class_type == diag_seen_slots_dict[slot] \
+ and class_type != 'copy_value' and class_type != 'inform':
# If slot has seen before and its class type did not change, label this slot a not present,
# assuming that the slot has not actually been mentioned in this turn.
# Exceptions are copy_value and inform. If a seen slot has been tagged as copy_value or inform,
@@ -1262,7 +1264,7 @@ def convert_examples_to_features(examples,
def _get_start_end_pos(class_type, token_label_ids, max_seq_length):
if class_type == 'copy_value' and 1 not in token_label_ids:
- #logger.warn("copy_value label, but token_label not detected. Setting label to 'none'.")
+ # logger.warn("copy_value label, but token_label not detected. Setting label to 'none'.")
class_type = 'none'
start_pos = 0
end_pos = 0
diff --git a/modelscope/preprocessors/space/fields/gen_field.py b/modelscope/preprocessors/space/fields/gen_field.py
index 7012697f..49a30e8f 100644
--- a/modelscope/preprocessors/space/fields/gen_field.py
+++ b/modelscope/preprocessors/space/fields/gen_field.py
@@ -8,10 +8,10 @@ from itertools import chain
import numpy as np
-from modelscope.preprocessors.space.tokenizer import Tokenizer
-from modelscope.utils.nlp.space import ontology, utils
-from modelscope.utils.nlp.space.db_ops import MultiWozDB
-from modelscope.utils.nlp.space.utils import list2np
+from ....utils.nlp.space import ontology, utils
+from ....utils.nlp.space.db_ops import MultiWozDB
+from ....utils.nlp.space.utils import list2np
+from ..tokenizer import Tokenizer
class BPETextField(object):
diff --git a/modelscope/preprocessors/space/fields/intent_field.py b/modelscope/preprocessors/space/fields/intent_field.py
index 9907165e..35e1693c 100644
--- a/modelscope/preprocessors/space/fields/intent_field.py
+++ b/modelscope/preprocessors/space/fields/intent_field.py
@@ -14,10 +14,10 @@ import json
import numpy as np
from tqdm import tqdm
-from modelscope.preprocessors.space.tokenizer import Tokenizer
-from modelscope.utils.nlp.space import ontology, utils
-from modelscope.utils.nlp.space.scores import hierarchical_set_score
-from modelscope.utils.nlp.space.utils import list2np
+from ....utils.nlp.space import ontology, utils
+from ....utils.nlp.space.scores import hierarchical_set_score
+from ....utils.nlp.space.utils import list2np
+from ..tokenizer import Tokenizer
class BPETextField(object):
diff --git a/modelscope/preprocessors/text_to_speech.py b/modelscope/preprocessors/text_to_speech.py
index fd41b752..9d8af6fa 100644
--- a/modelscope/preprocessors/text_to_speech.py
+++ b/modelscope/preprocessors/text_to_speech.py
@@ -2,9 +2,8 @@
import io
from typing import Any, Dict, Union
-import ttsfrd
-
from modelscope.fileio import File
+from modelscope.metainfo import Preprocessors
from modelscope.models.audio.tts.frontend import GenericTtsFrontend
from modelscope.models.base import Model
from modelscope.utils.audio.tts_exceptions import * # noqa F403
@@ -12,11 +11,11 @@ from modelscope.utils.constant import Fields
from .base import Preprocessor
from .builder import PREPROCESSORS
-__all__ = ['TextToTacotronSymbols', 'text_to_tacotron_symbols']
+__all__ = ['TextToTacotronSymbols']
@PREPROCESSORS.register_module(
- Fields.audio, module_name=r'text_to_tacotron_symbols')
+ Fields.audio, module_name=Preprocessors.text_to_tacotron_symbols)
class TextToTacotronSymbols(Preprocessor):
"""extract tacotron symbols from text.
diff --git a/modelscope/preprocessors/video.py b/modelscope/preprocessors/video.py
new file mode 100644
index 00000000..262fdaa5
--- /dev/null
+++ b/modelscope/preprocessors/video.py
@@ -0,0 +1,232 @@
+import math
+import os
+import random
+
+import decord
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.utils.data
+import torch.utils.dlpack as dlpack
+import torchvision.transforms._transforms_video as transforms
+from decord import VideoReader
+from torchvision.transforms import Compose
+
+
+def ReadVideoData(cfg, video_path):
+ """ simple interface to load video frames from file
+
+ Args:
+ cfg (Config): The global config object.
+ video_path (str): video file path
+ """
+ data = _decode_video(cfg, video_path)
+ transform = kinetics400_tranform(cfg)
+ data_list = []
+ for i in range(data.size(0)):
+ for j in range(cfg.TEST.NUM_SPATIAL_CROPS):
+ transform.transforms[1].set_spatial_index(j)
+ data_list.append(transform(data[i]))
+ return torch.stack(data_list, dim=0)
+
+
+def kinetics400_tranform(cfg):
+ """
+ Configs the transform for the kinetics-400 dataset.
+ We apply controlled spatial cropping and normalization.
+ Args:
+ cfg (Config): The global config object.
+ """
+ resize_video = KineticsResizedCrop(
+ short_side_range=[cfg.DATA.TEST_SCALE, cfg.DATA.TEST_SCALE],
+ crop_size=cfg.DATA.TEST_CROP_SIZE,
+ num_spatial_crops=cfg.TEST.NUM_SPATIAL_CROPS)
+ std_transform_list = [
+ transforms.ToTensorVideo(), resize_video,
+ transforms.NormalizeVideo(
+ mean=cfg.DATA.MEAN, std=cfg.DATA.STD, inplace=True)
+ ]
+ return Compose(std_transform_list)
+
+
+def _interval_based_sampling(vid_length, vid_fps, target_fps, clip_idx,
+ num_clips, num_frames, interval, minus_interval):
+ """
+ Generates the frame index list using interval based sampling.
+ Args:
+ vid_length (int): the length of the whole video (valid selection range).
+ vid_fps (int): the original video fps
+ target_fps (int): the normalized video fps
+ clip_idx (int): -1 for random temporal sampling, and positive values for
+ sampling specific clip from the video
+ num_clips (int): the total clips to be sampled from each video.
+ combined with clip_idx, the sampled video is the "clip_idx-th"
+ video from "num_clips" videos.
+ num_frames (int): number of frames in each sampled clips.
+ interval (int): the interval to sample each frame.
+ minus_interval (bool): control the end index
+ Returns:
+ index (tensor): the sampled frame indexes
+ """
+ if num_frames == 1:
+ index = [random.randint(0, vid_length - 1)]
+ else:
+ # transform FPS
+ clip_length = num_frames * interval * vid_fps / target_fps
+
+ max_idx = max(vid_length - clip_length, 0)
+ start_idx = clip_idx * math.floor(max_idx / (num_clips - 1))
+ if minus_interval:
+ end_idx = start_idx + clip_length - interval
+ else:
+ end_idx = start_idx + clip_length - 1
+
+ index = torch.linspace(start_idx, end_idx, num_frames)
+ index = torch.clamp(index, 0, vid_length - 1).long()
+
+ return index
+
+
+def _decode_video_frames_list(cfg, frames_list, vid_fps):
+ """
+ Decodes the video given the numpy frames.
+ Args:
+ cfg (Config): The global config object.
+ frames_list (list): all frames for a video, the frames should be numpy array.
+ vid_fps (int): the fps of this video.
+ Returns:
+ frames (Tensor): video tensor data
+ """
+ assert isinstance(frames_list, list)
+ num_clips_per_video = cfg.TEST.NUM_ENSEMBLE_VIEWS
+
+ frame_list = []
+ for clip_idx in range(num_clips_per_video):
+ # for each clip in the video,
+ # a list is generated before decoding the specified frames from the video
+ list_ = _interval_based_sampling(
+ len(frames_list), vid_fps, cfg.DATA.TARGET_FPS, clip_idx,
+ num_clips_per_video, cfg.DATA.NUM_INPUT_FRAMES,
+ cfg.DATA.SAMPLING_RATE, cfg.DATA.MINUS_INTERVAL)
+ frames = None
+ frames = torch.from_numpy(
+ np.stack([frames_list[l_index] for l_index in list_.tolist()],
+ axis=0))
+ frame_list.append(frames)
+ frames = torch.stack(frame_list)
+ if num_clips_per_video == 1:
+ frames = frames.squeeze(0)
+
+ return frames
+
+
+def _decode_video(cfg, path):
+ """
+ Decodes the video given the numpy frames.
+ Args:
+ path (str): video file path.
+ Returns:
+ frames (Tensor): video tensor data
+ """
+ vr = VideoReader(path)
+
+ num_clips_per_video = cfg.TEST.NUM_ENSEMBLE_VIEWS
+
+ frame_list = []
+ for clip_idx in range(num_clips_per_video):
+ # for each clip in the video,
+ # a list is generated before decoding the specified frames from the video
+ list_ = _interval_based_sampling(
+ len(vr), vr.get_avg_fps(), cfg.DATA.TARGET_FPS, clip_idx,
+ num_clips_per_video, cfg.DATA.NUM_INPUT_FRAMES,
+ cfg.DATA.SAMPLING_RATE, cfg.DATA.MINUS_INTERVAL)
+ frames = None
+ if path.endswith('.avi'):
+ append_list = torch.arange(0, list_[0], 4)
+ frames = dlpack.from_dlpack(
+ vr.get_batch(torch.cat([append_list,
+ list_])).to_dlpack()).clone()
+ frames = frames[append_list.shape[0]:]
+ else:
+ frames = dlpack.from_dlpack(
+ vr.get_batch(list_).to_dlpack()).clone()
+ frame_list.append(frames)
+ frames = torch.stack(frame_list)
+ if num_clips_per_video == 1:
+ frames = frames.squeeze(0)
+ del vr
+ return frames
+
+
+class KineticsResizedCrop(object):
+ """Perform resize and crop for kinetics-400 dataset
+ Args:
+ short_side_range (list): The length of short side range. In inference, this shoudle be [256, 256]
+ crop_size (int): The cropped size for frames.
+ num_spatial_crops (int): The number of the cropped spatial regions in each video.
+ """
+
+ def __init__(
+ self,
+ short_side_range,
+ crop_size,
+ num_spatial_crops=1,
+ ):
+ self.idx = -1
+ self.short_side_range = short_side_range
+ self.crop_size = int(crop_size)
+ self.num_spatial_crops = num_spatial_crops
+
+ def _get_controlled_crop(self, clip):
+ """Perform controlled crop for video tensor.
+ Args:
+ clip (Tensor): the video data, the shape is [T, C, H, W]
+ """
+ _, _, clip_height, clip_width = clip.shape
+
+ length = self.short_side_range[0]
+
+ if clip_height < clip_width:
+ new_clip_height = int(length)
+ new_clip_width = int(clip_width / clip_height * new_clip_height)
+ new_clip = torch.nn.functional.interpolate(
+ clip, size=(new_clip_height, new_clip_width), mode='bilinear')
+ else:
+ new_clip_width = int(length)
+ new_clip_height = int(clip_height / clip_width * new_clip_width)
+ new_clip = torch.nn.functional.interpolate(
+ clip, size=(new_clip_height, new_clip_width), mode='bilinear')
+ x_max = int(new_clip_width - self.crop_size)
+ y_max = int(new_clip_height - self.crop_size)
+ if self.num_spatial_crops == 1:
+ x = x_max // 2
+ y = y_max // 2
+ elif self.num_spatial_crops == 3:
+ if self.idx == 0:
+ if new_clip_width == length:
+ x = x_max // 2
+ y = 0
+ elif new_clip_height == length:
+ x = 0
+ y = y_max // 2
+ elif self.idx == 1:
+ x = x_max // 2
+ y = y_max // 2
+ elif self.idx == 2:
+ if new_clip_width == length:
+ x = x_max // 2
+ y = y_max
+ elif new_clip_height == length:
+ x = x_max
+ y = y_max // 2
+ return new_clip[:, :, y:y + self.crop_size, x:x + self.crop_size]
+
+ def set_spatial_index(self, idx):
+ """Set the spatial cropping index for controlled cropping..
+ Args:
+ idx (int): the spatial index. The value should be in [0, 1, 2], means [left, center, right], respectively.
+ """
+ self.idx = idx
+
+ def __call__(self, clip):
+ return self._get_controlled_crop(clip)
diff --git a/modelscope/pydatasets/config.py b/modelscope/pydatasets/config.py
new file mode 100644
index 00000000..e916b3ec
--- /dev/null
+++ b/modelscope/pydatasets/config.py
@@ -0,0 +1,22 @@
+import os
+from pathlib import Path
+
+# Cache location
+DEFAULT_CACHE_HOME = '~/.cache'
+CACHE_HOME = os.getenv('CACHE_HOME', DEFAULT_CACHE_HOME)
+DEFAULT_MS_CACHE_HOME = os.path.join(CACHE_HOME, 'modelscope/hub')
+MS_CACHE_HOME = os.path.expanduser(
+ os.getenv('MS_CACHE_HOME', DEFAULT_MS_CACHE_HOME))
+
+DEFAULT_MS_DATASETS_CACHE = os.path.join(MS_CACHE_HOME, 'datasets')
+MS_DATASETS_CACHE = Path(
+ os.getenv('MS_DATASETS_CACHE', DEFAULT_MS_DATASETS_CACHE))
+
+DOWNLOADED_DATASETS_DIR = 'downloads'
+DEFAULT_DOWNLOADED_DATASETS_PATH = os.path.join(MS_DATASETS_CACHE,
+ DOWNLOADED_DATASETS_DIR)
+DOWNLOADED_DATASETS_PATH = Path(
+ os.getenv('DOWNLOADED_DATASETS_PATH', DEFAULT_DOWNLOADED_DATASETS_PATH))
+
+MS_HUB_ENDPOINT = os.environ.get('MS_HUB_ENDPOINT',
+ 'http://101.201.119.157:31752')
diff --git a/modelscope/pydatasets/py_dataset.py b/modelscope/pydatasets/py_dataset.py
index 78aedaa0..49137253 100644
--- a/modelscope/pydatasets/py_dataset.py
+++ b/modelscope/pydatasets/py_dataset.py
@@ -1,64 +1,81 @@
-from typing import (Any, Callable, Dict, List, Mapping, Optional, Sequence,
- Union)
+import os
+from typing import (Any, Callable, Dict, Iterable, List, Mapping, Optional,
+ Sequence, Union)
-from datasets import Dataset, load_dataset
+import numpy as np
+from datasets import Dataset
+from datasets import load_dataset as hf_load_dataset
+from datasets.config import TF_AVAILABLE, TORCH_AVAILABLE
+from datasets.packaged_modules import _PACKAGED_DATASETS_MODULES
+from datasets.utils.file_utils import (is_relative_path,
+ relative_to_absolute_path)
+from modelscope.pydatasets.config import MS_DATASETS_CACHE
+from modelscope.pydatasets.utils.ms_api import MsApi
from modelscope.utils.constant import Hubs
from modelscope.utils.logger import get_logger
logger = get_logger()
+def format_list(para) -> List:
+ if para is None:
+ para = []
+ elif isinstance(para, str):
+ para = [para]
+ elif len(set(para)) < len(para):
+ raise ValueError(f'List columns contains duplicates: {para}')
+ return para
+
+
class PyDataset:
_hf_ds = None # holds the underlying HuggingFace Dataset
"""A PyDataset backed by hugging face Dataset."""
- def __init__(self, hf_ds: Dataset):
+ def __init__(self, hf_ds: Dataset, target: Optional[str] = None):
self._hf_ds = hf_ds
- self.target = None
+ self.target = target
def __iter__(self):
- if isinstance(self._hf_ds, Dataset):
- for item in self._hf_ds:
- if self.target is not None:
- yield item[self.target]
- else:
- yield item
- else:
- for ds in self._hf_ds.values():
- for item in ds:
- if self.target is not None:
- yield item[self.target]
- else:
- yield item
+ for item in self._hf_ds:
+ if self.target is not None:
+ yield item[self.target]
+ else:
+ yield item
+
+ def __getitem__(self, key):
+ return self._hf_ds[key]
@classmethod
def from_hf_dataset(cls,
hf_ds: Dataset,
- target: str = None) -> 'PyDataset':
- dataset = cls(hf_ds)
- dataset.target = target
- return dataset
+ target: str = None) -> Union[dict, 'PyDataset']:
+ if isinstance(hf_ds, Dataset):
+ return cls(hf_ds, target)
+ if len(hf_ds.keys()) == 1:
+ return cls(next(iter(hf_ds.values())), target)
+ return {k: cls(v, target) for k, v in hf_ds.items()}
@staticmethod
- def load(path: Union[str, list],
- target: Optional[str] = None,
- version: Optional[str] = None,
- name: Optional[str] = None,
- split: Optional[str] = None,
- data_dir: Optional[str] = None,
- data_files: Optional[Union[str, Sequence[str],
- Mapping[str,
- Union[str,
- Sequence[str]]]]] = None,
- hub: Optional[Hubs] = None) -> 'PyDataset':
+ def load(
+ dataset_name: Union[str, list],
+ target: Optional[str] = None,
+ version: Optional[str] = None,
+ hub: Optional[Hubs] = Hubs.modelscope,
+ subset_name: Optional[str] = None,
+ split: Optional[str] = None,
+ data_dir: Optional[str] = None,
+ data_files: Optional[Union[str, Sequence[str],
+ Mapping[str, Union[str,
+ Sequence[str]]]]] = None
+ ) -> Union[dict, 'PyDataset']:
"""Load a PyDataset from the ModelScope Hub, Hugging Face Hub, urls, or a local dataset.
Args:
- path (str): Path or name of the dataset.
+ dataset_name (str): Path or name of the dataset.
target (str, optional): Name of the column to output.
version (str, optional): Version of the dataset script to load:
- name (str, optional): Defining the subset_name of the dataset.
+ subset_name (str, optional): Defining the subset_name of the dataset.
data_dir (str, optional): Defining the data_dir of the dataset configuration. I
data_files (str or Sequence or Mapping, optional): Path(s) to source data file(s).
split (str, optional): Which split of the data to load.
@@ -67,53 +84,302 @@ class PyDataset:
Returns:
PyDataset (obj:`PyDataset`): PyDataset object for a certain dataset.
"""
- if Hubs.modelscope == hub:
- # TODO: parse data meta information from modelscope hub
- # and possibly download data files to local (and update path)
- print('getting data from modelscope hub')
- if isinstance(path, str):
- dataset = load_dataset(
- path,
- name=name,
+ if hub == Hubs.huggingface:
+ dataset = hf_load_dataset(
+ dataset_name,
+ name=subset_name,
revision=version,
split=split,
data_dir=data_dir,
data_files=data_files)
- elif isinstance(path, list):
+ return PyDataset.from_hf_dataset(dataset, target=target)
+ else:
+ return PyDataset._load_ms_dataset(
+ dataset_name,
+ target=target,
+ subset_name=subset_name,
+ version=version,
+ split=split,
+ data_dir=data_dir,
+ data_files=data_files)
+
+ @staticmethod
+ def _load_ms_dataset(
+ dataset_name: Union[str, list],
+ target: Optional[str] = None,
+ version: Optional[str] = None,
+ subset_name: Optional[str] = None,
+ split: Optional[str] = None,
+ data_dir: Optional[str] = None,
+ data_files: Optional[Union[str, Sequence[str],
+ Mapping[str, Union[str,
+ Sequence[str]]]]] = None
+ ) -> Union[dict, 'PyDataset']:
+ if isinstance(dataset_name, str):
+ use_hf = False
+ if dataset_name in _PACKAGED_DATASETS_MODULES or os.path.isdir(dataset_name) or \
+ (os.path.isfile(dataset_name) and dataset_name.endswith('.py')):
+ use_hf = True
+ elif is_relative_path(dataset_name):
+ ms_api = MsApi()
+ dataset_scripts = ms_api.fetch_dataset_scripts(
+ dataset_name, version)
+ if 'py' in dataset_scripts: # dataset copied from hf datasets
+ dataset_name = dataset_scripts['py'][0]
+ use_hf = True
+ else:
+ raise FileNotFoundError(
+ f"Couldn't find a dataset script at {relative_to_absolute_path(dataset_name)} "
+ f'or any data file in the same directory.')
+
+ if use_hf:
+ dataset = hf_load_dataset(
+ dataset_name,
+ name=subset_name,
+ revision=version,
+ split=split,
+ data_dir=data_dir,
+ data_files=data_files,
+ cache_dir=MS_DATASETS_CACHE)
+ else:
+ # TODO load from ms datahub
+ raise NotImplementedError(
+ f'Dataset {dataset_name} load from modelscope datahub to be implemented in '
+ f'the future')
+ elif isinstance(dataset_name, list):
if target is None:
target = 'target'
- dataset = Dataset.from_dict({target: [p] for p in path})
+ dataset = Dataset.from_dict({target: dataset_name})
else:
raise TypeError('path must be a str or a list, but got'
- f' {type(path)}')
+ f' {type(dataset_name)}')
return PyDataset.from_hf_dataset(dataset, target=target)
+ def to_torch_dataset_with_processors(
+ self,
+ preprocessors: Union[Callable, List[Callable]],
+ columns: Union[str, List[str]] = None,
+ ):
+ preprocessor_list = preprocessors if isinstance(
+ preprocessors, list) else [preprocessors]
+
+ columns = format_list(columns)
+
+ columns = [
+ key for key in self._hf_ds.features.keys() if key in columns
+ ]
+ sample = next(iter(self._hf_ds))
+
+ sample_res = {k: np.array(sample[k]) for k in columns}
+ for processor in preprocessor_list:
+ sample_res.update(
+ {k: np.array(v)
+ for k, v in processor(sample).items()})
+
+ def is_numpy_number(value):
+ return np.issubdtype(value.dtype, np.integer) or np.issubdtype(
+ value.dtype, np.floating)
+
+ retained_columns = []
+ for k in sample_res.keys():
+ if not is_numpy_number(sample_res[k]):
+ logger.warning(
+ f'Data of column {k} is non-numeric, will be removed')
+ continue
+ retained_columns.append(k)
+
+ import torch
+
+ class MsIterableDataset(torch.utils.data.IterableDataset):
+
+ def __init__(self, dataset: Iterable):
+ super(MsIterableDataset).__init__()
+ self.dataset = dataset
+
+ def __iter__(self):
+ for item_dict in self.dataset:
+ res = {
+ k: np.array(item_dict[k])
+ for k in columns if k in retained_columns
+ }
+ for preprocessor in preprocessor_list:
+ res.update({
+ k: np.array(v)
+ for k, v in preprocessor(item_dict).items()
+ if k in retained_columns
+ })
+ yield res
+
+ return MsIterableDataset(self._hf_ds)
+
def to_torch_dataset(
self,
columns: Union[str, List[str]] = None,
- output_all_columns: bool = False,
+ preprocessors: Union[Callable, List[Callable]] = None,
**format_kwargs,
):
- self._hf_ds.reset_format()
- self._hf_ds.set_format(
- type='torch',
- columns=columns,
- output_all_columns=output_all_columns,
- format_kwargs=format_kwargs)
- return self._hf_ds
+ """Create a torch.utils.data.Dataset from the MS Dataset. The torch.utils.data.Dataset can be passed to
+ torch.utils.data.DataLoader.
+
+ Args:
+ preprocessors (Callable or List[Callable], default None): (list of) Preprocessor object used to process
+ every sample of the dataset. The output type of processors is dict, and each numeric field of the dict
+ will be used as a field of torch.utils.data.Dataset.
+ columns (str or List[str], default None): Dataset column(s) to be loaded (numeric data only). If the
+ preprocessor is None, the arg columns must have at least one column. If the `preprocessors` is not None,
+ the output fields of processors will also be added.
+ format_kwargs: A `dict` of arguments to be passed to the `torch.tensor`.
+
+ Returns:
+ :class:`tf.data.Dataset`
+
+ """
+ if not TORCH_AVAILABLE:
+ raise ImportError(
+ 'The function to_torch_dataset requires pytorch to be installed'
+ )
+ if preprocessors is not None:
+ return self.to_torch_dataset_with_processors(preprocessors)
+ else:
+ self._hf_ds.reset_format()
+ self._hf_ds.set_format(
+ type='torch', columns=columns, format_kwargs=format_kwargs)
+ return self._hf_ds
+
+ def to_tf_dataset_with_processors(
+ self,
+ batch_size: int,
+ shuffle: bool,
+ preprocessors: Union[Callable, List[Callable]],
+ drop_remainder: bool = None,
+ prefetch: bool = True,
+ label_cols: Union[str, List[str]] = None,
+ columns: Union[str, List[str]] = None,
+ ):
+ preprocessor_list = preprocessors if isinstance(
+ preprocessors, list) else [preprocessors]
+
+ label_cols = format_list(label_cols)
+ columns = format_list(columns)
+ cols_to_retain = list(set(label_cols + columns))
+ retained_columns = [
+ key for key in self._hf_ds.features.keys() if key in cols_to_retain
+ ]
+ import tensorflow as tf
+ tf_dataset = tf.data.Dataset.from_tensor_slices(
+ np.arange(len(self._hf_ds), dtype=np.int64))
+ if shuffle:
+ tf_dataset = tf_dataset.shuffle(buffer_size=len(self._hf_ds))
+
+ def func(i, return_dict=False):
+ i = int(i)
+ res = {k: np.array(self._hf_ds[i][k]) for k in retained_columns}
+ for preprocessor in preprocessor_list:
+ # TODO preprocessor output may have the same key
+ res.update({
+ k: np.array(v)
+ for k, v in preprocessor(self._hf_ds[i]).items()
+ })
+ if return_dict:
+ return res
+ return tuple(list(res.values()))
+
+ sample_res = func(0, True)
+
+ @tf.function(input_signature=[tf.TensorSpec(None, tf.int64)])
+ def fetch_function(i):
+ output = tf.numpy_function(
+ func,
+ inp=[i],
+ Tout=[
+ tf.dtypes.as_dtype(val.dtype)
+ for val in sample_res.values()
+ ],
+ )
+ return {key: output[i] for i, key in enumerate(sample_res)}
+
+ tf_dataset = tf_dataset.map(
+ fetch_function, num_parallel_calls=tf.data.AUTOTUNE)
+ if label_cols:
+
+ def split_features_and_labels(input_batch):
+ labels = {
+ key: tensor
+ for key, tensor in input_batch.items() if key in label_cols
+ }
+ if len(input_batch) == 1:
+ input_batch = next(iter(input_batch.values()))
+ if len(labels) == 1:
+ labels = next(iter(labels.values()))
+ return input_batch, labels
+
+ tf_dataset = tf_dataset.map(split_features_and_labels)
+
+ elif len(columns) == 1:
+ tf_dataset = tf_dataset.map(lambda x: next(iter(x.values())))
+ if batch_size > 1:
+ tf_dataset = tf_dataset.batch(
+ batch_size, drop_remainder=drop_remainder)
+
+ if prefetch:
+ tf_dataset = tf_dataset.prefetch(tf.data.experimental.AUTOTUNE)
+ return tf_dataset
def to_tf_dataset(
self,
- columns: Union[str, List[str]],
batch_size: int,
shuffle: bool,
- collate_fn: Callable,
+ preprocessors: Union[Callable, List[Callable]] = None,
+ columns: Union[str, List[str]] = None,
+ collate_fn: Callable = None,
drop_remainder: bool = None,
collate_fn_args: Dict[str, Any] = None,
label_cols: Union[str, List[str]] = None,
- dummy_labels: bool = False,
prefetch: bool = True,
):
+ """Create a tf.data.Dataset from the MS Dataset. This tf.data.Dataset can be passed to tf methods like
+ model.fit() or model.predict().
+
+ Args:
+ batch_size (int): Number of samples in a single batch.
+ shuffle(bool): Shuffle the dataset order.
+ preprocessors (Callable or List[Callable], default None): (list of) Preprocessor object used to process
+ every sample of the dataset. The output type of processors is dict, and each field of the dict will be
+ used as a field of the tf.data. Dataset. If the `preprocessors` is None, the `collate_fn`
+ shouldn't be None.
+ columns (str or List[str], default None): Dataset column(s) to be loaded. If the preprocessor is None,
+ the arg columns must have at least one column. If the `preprocessors` is not None, the output fields of
+ processors will also be added.
+ collate_fn(Callable, default None): A callable object used to collect lists of samples into a batch. If
+ the `preprocessors` is None, the `collate_fn` shouldn't be None.
+ drop_remainder(bool, default None): Drop the last incomplete batch when loading.
+ collate_fn_args (Dict, optional): A `dict` of arguments to be passed to the`collate_fn`.
+ label_cols (str or List[str], defalut None): Dataset column(s) to load as labels.
+ prefetch (bool, default True): Prefetch data.
+
+ Returns:
+ :class:`tf.data.Dataset`
+
+ """
+ if not TF_AVAILABLE:
+ raise ImportError(
+ 'The function to_tf_dataset requires Tensorflow to be installed.'
+ )
+ if preprocessors is not None:
+ return self.to_tf_dataset_with_processors(
+ batch_size,
+ shuffle,
+ preprocessors,
+ drop_remainder=drop_remainder,
+ prefetch=prefetch,
+ label_cols=label_cols,
+ columns=columns)
+
+ if collate_fn is None:
+ logger.error(
+ 'The `preprocessors` and the `collate_fn` should`t be both None.'
+ )
+ return None
self._hf_ds.reset_format()
return self._hf_ds.to_tf_dataset(
columns,
@@ -123,7 +389,6 @@ class PyDataset:
drop_remainder=drop_remainder,
collate_fn_args=collate_fn_args,
label_cols=label_cols,
- dummy_labels=dummy_labels,
prefetch=prefetch)
def to_hf_dataset(self) -> Dataset:
diff --git a/modelscope/pydatasets/utils/__init__.py b/modelscope/pydatasets/utils/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/modelscope/pydatasets/utils/ms_api.py b/modelscope/pydatasets/utils/ms_api.py
new file mode 100644
index 00000000..04052cc4
--- /dev/null
+++ b/modelscope/pydatasets/utils/ms_api.py
@@ -0,0 +1,66 @@
+import os
+from collections import defaultdict
+from typing import Optional
+
+import requests
+
+from modelscope.pydatasets.config import (DOWNLOADED_DATASETS_PATH,
+ MS_HUB_ENDPOINT)
+from modelscope.utils.logger import get_logger
+
+logger = get_logger()
+
+
+class MsApi:
+
+ def __init__(self, endpoint=MS_HUB_ENDPOINT):
+ self.endpoint = endpoint
+
+ def list_datasets(self):
+ path = f'{self.endpoint}/api/v1/datasets'
+ headers = None
+ params = {}
+ r = requests.get(path, params=params, headers=headers)
+ r.raise_for_status()
+ dataset_list = r.json()['Data']
+ return [x['Name'] for x in dataset_list]
+
+ def fetch_dataset_scripts(self,
+ dataset_name: str,
+ version: Optional[str] = 'master',
+ force_download=False):
+ datahub_url = f'{self.endpoint}/api/v1/datasets?Query={dataset_name}'
+ r = requests.get(datahub_url)
+ r.raise_for_status()
+ dataset_list = r.json()['Data']
+ if len(dataset_list) == 0:
+ return None
+ dataset_id = dataset_list[0]['Id']
+ version = version or 'master'
+ datahub_url = f'{self.endpoint}/api/v1/datasets/{dataset_id}/repo/tree?Revision={version}'
+ r = requests.get(datahub_url)
+ r.raise_for_status()
+ file_list = r.json()['Data']['Files']
+ cache_dir = os.path.join(DOWNLOADED_DATASETS_PATH, dataset_name,
+ version)
+ os.makedirs(cache_dir, exist_ok=True)
+ local_paths = defaultdict(list)
+ for file_info in file_list:
+ file_path = file_info['Path']
+ if file_path.endswith('.py'):
+ datahub_url = f'{self.endpoint}/api/v1/datasets/{dataset_id}/repo/files?' \
+ f'Revision={version}&Path={file_path}'
+ r = requests.get(datahub_url)
+ r.raise_for_status()
+ content = r.json()['Data']['Content']
+ local_path = os.path.join(cache_dir, file_path)
+ if os.path.exists(local_path) and not force_download:
+ logger.warning(
+ f"Reusing dataset {dataset_name}'s python file ({local_path})"
+ )
+ local_paths['py'].append(local_path)
+ continue
+ with open(local_path, 'w') as f:
+ f.writelines(content)
+ local_paths['py'].append(local_path)
+ return local_paths
diff --git a/modelscope/trainers/nlp/space/trainers/gen_trainer.py b/modelscope/trainers/nlp/space/trainers/gen_trainer.py
index a0cda25c..e09e2100 100644
--- a/modelscope/trainers/nlp/space/trainers/gen_trainer.py
+++ b/modelscope/trainers/nlp/space/trainers/gen_trainer.py
@@ -13,7 +13,7 @@ import torch
from tqdm import tqdm
from transformers.optimization import AdamW, get_linear_schedule_with_warmup
-import modelscope.utils.nlp.space.ontology as ontology
+from .....utils.nlp.space import ontology
from ..metrics.metrics_tracker import MetricsTracker
diff --git a/modelscope/trainers/nlp/space/trainers/intent_trainer.py b/modelscope/trainers/nlp/space/trainers/intent_trainer.py
index bd43e9a5..2c5081d7 100644
--- a/modelscope/trainers/nlp/space/trainers/intent_trainer.py
+++ b/modelscope/trainers/nlp/space/trainers/intent_trainer.py
@@ -14,9 +14,7 @@ import torch
from tqdm import tqdm
from transformers.optimization import AdamW, get_linear_schedule_with_warmup
-from modelscope.trainers.nlp.space.metrics.metrics_tracker import \
- MetricsTracker
-from modelscope.utils.nlp.space.args import str2bool
+from ..metrics.metrics_tracker import MetricsTracker
def get_logger(log_path, name='default'):
diff --git a/modelscope/utils/constant.py b/modelscope/utils/constant.py
index d89f0496..cd232c6a 100644
--- a/modelscope/utils/constant.py
+++ b/modelscope/utils/constant.py
@@ -28,9 +28,13 @@ class Tasks(object):
image_editing = 'image-editing'
image_generation = 'image-generation'
image_matting = 'image-matting'
+ ocr_detection = 'ocr-detection'
+ action_recognition = 'action-recognition'
# nlp tasks
word_segmentation = 'word-segmentation'
+ nli = 'nli'
+ sentiment_classification = 'sentiment-classification'
sentiment_analysis = 'sentiment-analysis'
sentence_similarity = 'sentence-similarity'
text_classification = 'text-classification'
@@ -45,8 +49,7 @@ class Tasks(object):
dialog_state_tracking = 'dialog-state-tracking'
table_question_answering = 'table-question-answering'
feature_extraction = 'feature-extraction'
- sentence_similarity = 'sentence-similarity'
- fill_mask = 'fill-mask '
+ fill_mask = 'fill-mask'
summarization = 'summarization'
question_answering = 'question-answering'
diff --git a/modelscope/utils/hub.py b/modelscope/utils/hub.py
index 2f61b148..868e751b 100644
--- a/modelscope/utils/hub.py
+++ b/modelscope/utils/hub.py
@@ -1,14 +1,67 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
import os
+import os.path as osp
+from typing import List, Optional, Union
-from maas_hub.constants import MODEL_ID_SEPARATOR
+from requests import HTTPError
+from modelscope.hub.file_download import model_file_download
+from modelscope.hub.snapshot_download import snapshot_download
+from modelscope.utils.config import Config
+from modelscope.utils.constant import ModelFile
-# temp solution before the hub-cache is in place
-def get_model_cache_dir(model_id: str, branch: str = 'master'):
- model_id_expanded = model_id.replace('/',
- MODEL_ID_SEPARATOR) + '.' + branch
- default_cache_dir = os.path.expanduser(os.path.join('~/.cache', 'maas'))
- return os.getenv('MAAS_CACHE',
- os.path.join(default_cache_dir, 'hub', model_id_expanded))
+
+def create_model_if_not_exist(
+ api,
+ model_id: str,
+ chinese_name: str,
+ visibility: Optional[int] = 5, # 1-private, 5-public
+ license: Optional[str] = 'apache-2.0',
+ revision: Optional[str] = 'master'):
+ exists = True
+ try:
+ api.get_model(model_id=model_id, revision=revision)
+ except HTTPError:
+ exists = False
+ if exists:
+ print(f'model {model_id} already exists, skip creation.')
+ return False
+ else:
+ api.create_model(
+ model_id=model_id,
+ chinese_name=chinese_name,
+ visibility=visibility,
+ license=license)
+ print(f'model {model_id} successfully created.')
+ return True
+
+
+def read_config(model_id_or_path: str):
+ """ Read config from hub or local path
+
+ Args:
+ model_id_or_path (str): Model repo name or local directory path.
+
+ Return:
+ config (:obj:`Config`): config object
+ """
+ if not os.path.exists(model_id_or_path):
+ local_path = model_file_download(model_id_or_path,
+ ModelFile.CONFIGURATION)
+ else:
+ local_path = os.path.join(model_id_or_path, ModelFile.CONFIGURATION)
+
+ return Config.from_file(local_path)
+
+
+def auto_load(model: Union[str, List[str]]):
+ if isinstance(model, str):
+ if not osp.exists(model):
+ model = snapshot_download(model)
+ else:
+ model = [
+ snapshot_download(m) if not osp.exists(m) else m for m in model
+ ]
+
+ return model
diff --git a/modelscope/utils/registry.py b/modelscope/utils/registry.py
index b26b899d..8009b084 100644
--- a/modelscope/utils/registry.py
+++ b/modelscope/utils/registry.py
@@ -78,7 +78,7 @@ class Registry(object):
f'{self._name}[{default_group}] and will '
'be overwritten')
logger.warning(f'{self._modules[default_group][module_name]}'
- 'to {module_cls}')
+ f'to {module_cls}')
# also register module in the default group for faster access
# only by module name
self._modules[default_group][module_name] = module_cls
diff --git a/modelscope/utils/test_utils.py b/modelscope/utils/test_utils.py
index c8ea0442..95e63dba 100644
--- a/modelscope/utils/test_utils.py
+++ b/modelscope/utils/test_utils.py
@@ -2,6 +2,9 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
import os
+import unittest
+
+from datasets.config import TF_AVAILABLE, TORCH_AVAILABLE
TEST_LEVEL = 2
TEST_LEVEL_STR = 'TEST_LEVEL'
@@ -15,6 +18,18 @@ def test_level():
return TEST_LEVEL
+def require_tf(test_case):
+ if not TF_AVAILABLE:
+ test_case = unittest.skip('test requires TensorFlow')(test_case)
+ return test_case
+
+
+def require_torch(test_case):
+ if not TORCH_AVAILABLE:
+ test_case = unittest.skip('test requires PyTorch')(test_case)
+ return test_case
+
+
def set_test_level(level: int):
global TEST_LEVEL
TEST_LEVEL = level
diff --git a/requirements/audio.txt b/requirements/audio.txt
index 140836a8..c7b2b239 100644
--- a/requirements/audio.txt
+++ b/requirements/audio.txt
@@ -1,25 +1,25 @@
#tts
-h5py==2.10.0
-#https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/TTS/requirements/ttsfrd-0.0.1-cp36-cp36m-linux_x86_64.whl
-https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/TTS/requirements/ttsfrd-0.0.1-cp37-cp37m-linux_x86_64.whl
-https://swap.oss-cn-hangzhou.aliyuncs.com/Jiaqi%2Fmaas%2Ftts%2Frequirements%2Fpytorch_wavelets-1.3.0-py3-none-any.whl?Expires=1685688388&OSSAccessKeyId=LTAI4Ffebq4d9jTVDwiSbY4L&Signature=jcQbg5EZ%2Bdys3%2F4BRn3srrKLdIg%3D
-#https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/TTS/requirements/ttsfrd-0.0.1-cp38-cp38-linux_x86_64.whl
-#https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/TTS/requirements/ttsfrd-0.0.1-cp39-cp39-linux_x86_64.whl
+h5py
+https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/TTS/requirements/pytorch_wavelets-1.3.0-py3-none-any.whl
+https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/TTS/requirements/ttsfrd-0.0.2-cp36-cp36m-linux_x86_64.whl; python_version=='3.6'
+https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/TTS/requirements/ttsfrd-0.0.2-cp37-cp37m-linux_x86_64.whl; python_version=='3.7'
+https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/TTS/requirements/ttsfrd-0.0.2-cp38-cp38-linux_x86_64.whl; python_version=='3.8'
+https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/TTS/requirements/ttsfrd-0.0.2-cp39-cp39-linux_x86_64.whl; python_version=='3.9'
inflect
-keras==2.2.4
+keras
librosa
lxml
matplotlib
nara_wpe
-numpy==1.18.*
-protobuf==3.20.*
+numpy
+protobuf>3,<=3.20
ptflops
PyWavelets>=1.0.0
-scikit-learn==0.23.2
+scikit-learn
sox
tensorboard
tensorflow==1.15.*
-torch==1.10.*
+torch
torchaudio
torchvision
tqdm
diff --git a/requirements/cv.txt b/requirements/cv.txt
index 66799b76..513dae99 100644
--- a/requirements/cv.txt
+++ b/requirements/cv.txt
@@ -1 +1,3 @@
+decord>=0.6.0
easydict
+tf_slim
diff --git a/requirements/nlp.txt b/requirements/nlp.txt
index eefb3c7d..bc0b3fcd 100644
--- a/requirements/nlp.txt
+++ b/requirements/nlp.txt
@@ -1,4 +1,5 @@
-https://alinlp.alibaba-inc.com/pypi/sofa-1.0.2-py3-none-any.whl
-https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-2.3.1/en_core_web_sm-2.3.1.tar.gz
+# https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-2.3.1/en_core_web_sm-2.3.1.tar.gz
+http://ait-public.oss-cn-hangzhou-zmf.aliyuncs.com/jizhu/en_core_web_sm-2.3.1.tar.gz
+https://alinlp.alibaba-inc.com/pypi/sofa-1.0.3-py3-none-any.whl
spacy>=2.3.5
# python -m spacy download en_core_web_sm
diff --git a/requirements/runtime.txt b/requirements/runtime.txt
index e97352aa..6580de53 100644
--- a/requirements/runtime.txt
+++ b/requirements/runtime.txt
@@ -1,13 +1,16 @@
addict
datasets
easydict
-https://mindscope.oss-cn-hangzhou.aliyuncs.com/sdklib/maas_hub-0.2.4.dev0-py3-none-any.whl
+filelock>=3.3.0
numpy
opencv-python-headless
Pillow>=6.2.0
pyyaml
requests
+requests==2.27.1
scipy
+setuptools==58.0.4
tokenizers<=0.10.3
+tqdm>=4.64.0
transformers<=4.16.2
yapf
diff --git a/tests/hub/__init__.py b/tests/hub/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/hub/test_hub_examples.py b/tests/hub/test_hub_examples.py
new file mode 100644
index 00000000..b63445af
--- /dev/null
+++ b/tests/hub/test_hub_examples.py
@@ -0,0 +1,33 @@
+import unittest
+
+from maas_hub.maas_api import MaasApi
+
+from modelscope.utils.hub import create_model_if_not_exist
+
+USER_NAME = 'maasadmin'
+PASSWORD = '12345678'
+
+
+class HubExampleTest(unittest.TestCase):
+
+ def setUp(self):
+ self.api = MaasApi()
+ # note this is temporary before official account management is ready
+ self.api.login(USER_NAME, PASSWORD)
+
+ @unittest.skip('to be used for local test only')
+ def test_example_model_creation(self):
+ # ATTENTION:change to proper model names before use
+ model_name = 'cv_unet_person-image-cartoon_compound-models'
+ model_chinese_name = '达摩卡通化模型'
+ model_org = 'damo'
+ model_id = '%s/%s' % (model_org, model_name)
+
+ created = create_model_if_not_exist(self.api, model_id,
+ model_chinese_name)
+ if not created:
+ print('!! NOT created since model already exists !!')
+
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/tests/hub/test_hub_operation.py b/tests/hub/test_hub_operation.py
new file mode 100644
index 00000000..d44cd7c1
--- /dev/null
+++ b/tests/hub/test_hub_operation.py
@@ -0,0 +1,155 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import os
+import subprocess
+import tempfile
+import unittest
+import uuid
+
+from modelscope.hub.api import HubApi, ModelScopeConfig
+from modelscope.hub.file_download import model_file_download
+from modelscope.hub.snapshot_download import snapshot_download
+from modelscope.hub.utils.utils import get_gitlab_domain
+
+USER_NAME = 'maasadmin'
+PASSWORD = '12345678'
+
+model_chinese_name = '达摩卡通化模型'
+model_org = 'unittest'
+DEFAULT_GIT_PATH = 'git'
+
+
+class GitError(Exception):
+ pass
+
+
+# TODO make thest git operation to git library after merge code.
+def run_git_command(git_path, *args) -> subprocess.CompletedProcess:
+ response = subprocess.run([git_path, *args], capture_output=True)
+ try:
+ response.check_returncode()
+ return response.stdout.decode('utf8')
+ except subprocess.CalledProcessError as error:
+ raise GitError(error.stderr.decode('utf8'))
+
+
+# for public project, token can None, private repo, there must token.
+def clone(local_dir: str, token: str, url: str):
+ url = url.replace('//', '//oauth2:%s@' % token)
+ clone_args = '-C %s clone %s' % (local_dir, url)
+ clone_args = clone_args.split(' ')
+ stdout = run_git_command(DEFAULT_GIT_PATH, *clone_args)
+ print('stdout: %s' % stdout)
+
+
+def push(local_dir: str, token: str, url: str):
+ url = url.replace('//', '//oauth2:%s@' % token)
+ push_args = '-C %s push %s' % (local_dir, url)
+ push_args = push_args.split(' ')
+ stdout = run_git_command(DEFAULT_GIT_PATH, *push_args)
+ print('stdout: %s' % stdout)
+
+
+sample_model_url = 'https://mindscope.oss-cn-hangzhou.aliyuncs.com/test_models/mnist-12.onnx'
+download_model_file_name = 'mnist-12.onnx'
+
+
+class HubOperationTest(unittest.TestCase):
+
+ def setUp(self):
+ self.old_cwd = os.getcwd()
+ self.api = HubApi()
+ # note this is temporary before official account management is ready
+ self.api.login(USER_NAME, PASSWORD)
+ self.model_name = uuid.uuid4().hex
+ self.model_id = '%s/%s' % (model_org, self.model_name)
+ self.api.create_model(
+ model_id=self.model_id,
+ chinese_name=model_chinese_name,
+ visibility=5, # 1-private, 5-public
+ license='apache-2.0')
+
+ def tearDown(self):
+ os.chdir(self.old_cwd)
+ self.api.delete_model(model_id=self.model_id)
+
+ def test_model_repo_creation(self):
+ # change to proper model names before use
+ try:
+ info = self.api.get_model(model_id=self.model_id)
+ assert info['Name'] == self.model_name
+ except KeyError as ke:
+ if ke.args[0] == 'name':
+ print(f'model {self.model_name} already exists, ignore')
+ else:
+ raise
+
+ # Note that this can be done via git operation once model repo
+ # has been created. Git-Op is the RECOMMENDED model upload approach
+ def test_model_upload(self):
+ url = f'http://{get_gitlab_domain()}/{self.model_id}'
+ print(url)
+ temporary_dir = tempfile.mkdtemp()
+ os.chdir(temporary_dir)
+ cmd_args = 'clone %s' % url
+ cmd_args = cmd_args.split(' ')
+ out = run_git_command('git', *cmd_args)
+ print(out)
+ repo_dir = os.path.join(temporary_dir, self.model_name)
+ os.chdir(repo_dir)
+ os.system('touch file1')
+ os.system('git add file1')
+ os.system("git commit -m 'Test'")
+ token = ModelScopeConfig.get_token()
+ push(repo_dir, token, url)
+
+ def test_download_single_file(self):
+ url = f'http://{get_gitlab_domain()}/{self.model_id}'
+ print(url)
+ temporary_dir = tempfile.mkdtemp()
+ os.chdir(temporary_dir)
+ os.system('git clone %s' % url)
+ repo_dir = os.path.join(temporary_dir, self.model_name)
+ os.chdir(repo_dir)
+ os.system('wget %s' % sample_model_url)
+ os.system('git add .')
+ os.system("git commit -m 'Add file'")
+ token = ModelScopeConfig.get_token()
+ push(repo_dir, token, url)
+ assert os.path.exists(
+ os.path.join(temporary_dir, self.model_name,
+ download_model_file_name))
+ downloaded_file = model_file_download(
+ model_id=self.model_id, file_path=download_model_file_name)
+ mdtime1 = os.path.getmtime(downloaded_file)
+ # download again
+ downloaded_file = model_file_download(
+ model_id=self.model_id, file_path=download_model_file_name)
+ mdtime2 = os.path.getmtime(downloaded_file)
+ assert mdtime1 == mdtime2
+
+ def test_snapshot_download(self):
+ url = f'http://{get_gitlab_domain()}/{self.model_id}'
+ print(url)
+ temporary_dir = tempfile.mkdtemp()
+ os.chdir(temporary_dir)
+ os.system('git clone %s' % url)
+ repo_dir = os.path.join(temporary_dir, self.model_name)
+ os.chdir(repo_dir)
+ os.system('wget %s' % sample_model_url)
+ os.system('git add .')
+ os.system("git commit -m 'Add file'")
+ token = ModelScopeConfig.get_token()
+ push(repo_dir, token, url)
+ snapshot_path = snapshot_download(model_id=self.model_id)
+ downloaded_file_path = os.path.join(snapshot_path,
+ download_model_file_name)
+ assert os.path.exists(downloaded_file_path)
+ mdtime1 = os.path.getmtime(downloaded_file_path)
+ # download again
+ snapshot_path = snapshot_download(model_id=self.model_id)
+ mdtime2 = os.path.getmtime(downloaded_file_path)
+ assert mdtime1 == mdtime2
+
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/tests/pipelines/nlp/test_dialog_state_tracking.py b/tests/pipelines/nlp/test_dialog_state_tracking.py
index a6c989bd..41ef7981 100644
--- a/tests/pipelines/nlp/test_dialog_state_tracking.py
+++ b/tests/pipelines/nlp/test_dialog_state_tracking.py
@@ -1,8 +1,10 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
+import os
+import os.path as osp
+import tempfile
import unittest
-from maas_hub.snapshot_download import snapshot_download
-
+from modelscope.hub.snapshot_download import snapshot_download
from modelscope.models import Model
from modelscope.models.nlp import DialogStateTrackingModel
from modelscope.pipelines import DialogStateTrackingPipeline, pipeline
@@ -10,50 +12,31 @@ from modelscope.preprocessors import DialogStateTrackingPreprocessor
from modelscope.utils.constant import Tasks
-class DialogIntentPredictionTest(unittest.TestCase):
- model_id = 'damo/nlp_space_dialog-intent-prediction'
- test_case = [
- 'How do I locate my card?',
- 'I still have not received my new card, I ordered over a week ago.'
- ]
+class DialogStateTrackingTest(unittest.TestCase):
+ model_id = 'damo/nlp_space_dialog-state-tracking'
+ test_case = {}
- @unittest.skip('test with snapshot_download')
def test_run(self):
- cache_path = snapshot_download(self.model_id)
- preprocessor = DialogIntentPredictionPreprocessor(model_dir=cache_path)
- model = DialogIntentModel(
- model_dir=cache_path,
- text_field=preprocessor.text_field,
- config=preprocessor.config)
-
- pipelines = [
- DialogIntentPredictionPipeline(
- model=model, preprocessor=preprocessor),
- pipeline(
- task=Tasks.dialog_intent_prediction,
- model=model,
- preprocessor=preprocessor)
- ]
-
- for my_pipeline, item in list(zip(pipelines, self.test_case)):
- print(my_pipeline(item))
-
- def test_run_with_model_from_modelhub(self):
- # model = Model.from_pretrained(self.model_id)
- # preprocessor = DialogIntentPredictionPreprocessor(
- # model_dir=model.model_dir)
- #
+ # cache_path = ''
+ # cache_path = snapshot_download(self.model_id)
+
+ # preprocessor = DialogStateTrackingPreprocessor(model_dir=cache_path)
+ # model = DialogStateTrackingModel(
+ # model_dir=cache_path,
+ # text_field=preprocessor.text_field,
+ # config=preprocessor.config)
# pipelines = [
- # DialogIntentPredictionPipeline(
- # model=model, preprocessor=preprocessor),
+ # DialogStateTrackingPipeline(model=model, preprocessor=preprocessor),
# pipeline(
- # task=Tasks.dialog_intent_prediction,
+ # task=Tasks.dialog_modeling,
# model=model,
# preprocessor=preprocessor)
# ]
- #
- # for my_pipeline, item in list(zip(pipelines, self.test_case)):
- # print(my_pipeline(item))
+
+ print('jizhu test')
+
+ @unittest.skip('test with snapshot_download')
+ def test_run_with_model_from_modelhub(self):
pass
diff --git a/tests/pipelines/test_action_recognition.py b/tests/pipelines/test_action_recognition.py
new file mode 100644
index 00000000..b524ca18
--- /dev/null
+++ b/tests/pipelines/test_action_recognition.py
@@ -0,0 +1,58 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+# !/usr/bin/env python
+import os.path as osp
+import shutil
+import tempfile
+import unittest
+
+import cv2
+
+from modelscope.fileio import File
+from modelscope.pipelines import pipeline
+from modelscope.pydatasets import PyDataset
+from modelscope.utils.constant import ModelFile, Tasks
+from modelscope.utils.test_utils import test_level
+
+
+class ActionRecognitionTest(unittest.TestCase):
+
+ def setUp(self) -> None:
+ self.model_id = 'damo/cv_TAdaConv_action-recognition'
+
+ @unittest.skip('deprecated, download model from model hub instead')
+ def test_run_with_direct_file_download(self):
+ model_path = 'https://aquila2-online-models.oss-cn-shanghai.aliyuncs.com/maas_test/pytorch_model.pt'
+ config_path = 'https://aquila2-online-models.oss-cn-shanghai.aliyuncs.com/maas_test/configuration.json'
+ with tempfile.TemporaryDirectory() as tmp_dir:
+ model_file = osp.join(tmp_dir, ModelFile.TORCH_MODEL_FILE)
+ with open(model_file, 'wb') as ofile1:
+ ofile1.write(File.read(model_path))
+ config_file = osp.join(tmp_dir, ModelFile.CONFIGURATION)
+ with open(config_file, 'wb') as ofile2:
+ ofile2.write(File.read(config_path))
+ recognition_pipeline = pipeline(
+ Tasks.action_recognition, model=tmp_dir)
+ result = recognition_pipeline(
+ 'data/test/videos/action_recognition_test_video.mp4')
+ print(f'recognition output: {result}.')
+
+ @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+ def test_run_modelhub(self):
+ recognition_pipeline = pipeline(
+ Tasks.action_recognition, model=self.model_id)
+ result = recognition_pipeline(
+ 'data/test/videos/action_recognition_test_video.mp4')
+
+ print(f'recognition output: {result}.')
+
+ @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+ def test_run_modelhub_default_model(self):
+ recognition_pipeline = pipeline(Tasks.action_recognition)
+ result = recognition_pipeline(
+ 'data/test/videos/action_recognition_test_video.mp4')
+
+ print(f'recognition output: {result}.')
+
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/tests/pipelines/nlp/test_dialog_intent_prediction.py b/tests/pipelines/test_dialog_intent_prediction.py
similarity index 96%
rename from tests/pipelines/nlp/test_dialog_intent_prediction.py
rename to tests/pipelines/test_dialog_intent_prediction.py
index 0ec4e1e7..97cdbb3d 100644
--- a/tests/pipelines/nlp/test_dialog_intent_prediction.py
+++ b/tests/pipelines/test_dialog_intent_prediction.py
@@ -1,8 +1,7 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
import unittest
-from maas_hub.snapshot_download import snapshot_download
-
+from modelscope.hub.snapshot_download import snapshot_download
from modelscope.models import Model
from modelscope.models.nlp import DialogIntentModel
from modelscope.pipelines import DialogIntentPredictionPipeline, pipeline
diff --git a/tests/pipelines/nlp/test_dialog_modeling.py b/tests/pipelines/test_dialog_modeling.py
similarity index 98%
rename from tests/pipelines/nlp/test_dialog_modeling.py
rename to tests/pipelines/test_dialog_modeling.py
index 7d4da8fe..f606ba49 100644
--- a/tests/pipelines/nlp/test_dialog_modeling.py
+++ b/tests/pipelines/test_dialog_modeling.py
@@ -4,8 +4,7 @@ import os.path as osp
import tempfile
import unittest
-from maas_hub.snapshot_download import snapshot_download
-
+from modelscope.hub.snapshot_download import snapshot_download
from modelscope.models import Model
from modelscope.models.nlp import DialogModelingModel
from modelscope.pipelines import DialogModelingPipeline, pipeline
diff --git a/tests/pipelines/test_fill_mask.py b/tests/pipelines/test_fill_mask.py
new file mode 100644
index 00000000..49c5dc8a
--- /dev/null
+++ b/tests/pipelines/test_fill_mask.py
@@ -0,0 +1,129 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import unittest
+
+from modelscope.hub.snapshot_download import snapshot_download
+from modelscope.models import Model
+from modelscope.models.nlp import StructBertForMaskedLM, VecoForMaskedLM
+from modelscope.pipelines import FillMaskPipeline, pipeline
+from modelscope.preprocessors import FillMaskPreprocessor
+from modelscope.utils.constant import Tasks
+from modelscope.utils.test_utils import test_level
+
+
+class FillMaskTest(unittest.TestCase):
+ model_id_sbert = {
+ 'zh': 'damo/nlp_structbert_fill-mask_chinese-large',
+ 'en': 'damo/nlp_structbert_fill-mask_english-large'
+ }
+ model_id_veco = 'damo/nlp_veco_fill-mask-large'
+
+ ori_texts = {
+ 'zh':
+ '段誉轻挥折扇,摇了摇头,说道:“你师父是你的师父,你师父可不是我的师父。'
+ '你师父差得动你,你师父可差不动我。',
+ 'en':
+ 'Everything in what you call reality is really just a reflection of your '
+ 'consciousness. Your whole universe is just a mirror reflection of your story.'
+ }
+
+ test_inputs = {
+ 'zh':
+ '段誉轻[MASK]折扇,摇了摇[MASK],[MASK]道:“你师父是你的[MASK][MASK],你'
+ '师父可不是[MASK]的师父。你师父差得动你,你师父可[MASK]不动我。',
+ 'en':
+ 'Everything in [MASK] you call reality is really [MASK] a reflection of your '
+ '[MASK]. Your [MASK] universe is just a mirror [MASK] of your story.'
+ }
+
+ @unittest.skipUnless(test_level() >= 2, 'skip test in current test level')
+ def test_run_by_direct_model_download(self):
+ # sbert
+ for language in ['zh', 'en']:
+ model_dir = snapshot_download(self.model_id_sbert[language])
+ preprocessor = FillMaskPreprocessor(
+ model_dir, first_sequence='sentence', second_sequence=None)
+ model = StructBertForMaskedLM(model_dir)
+ pipeline1 = FillMaskPipeline(model, preprocessor)
+ pipeline2 = pipeline(
+ Tasks.fill_mask, model=model, preprocessor=preprocessor)
+ ori_text = self.ori_texts[language]
+ test_input = self.test_inputs[language]
+ print(
+ f'\nori_text: {ori_text}\ninput: {test_input}\npipeline1: '
+ f'{pipeline1(test_input)}\npipeline2: {pipeline2(test_input)}\n'
+ )
+
+ # veco
+ model_dir = snapshot_download(self.model_id_veco)
+ preprocessor = FillMaskPreprocessor(
+ model_dir, first_sequence='sentence', second_sequence=None)
+ model = VecoForMaskedLM(model_dir)
+ pipeline1 = FillMaskPipeline(model, preprocessor)
+ pipeline2 = pipeline(
+ Tasks.fill_mask, model=model, preprocessor=preprocessor)
+ for language in ['zh', 'en']:
+ ori_text = self.ori_texts[language]
+ test_input = self.test_inputs[language].replace('[MASK]', '')
+ print(
+ f'\nori_text: {ori_text}\ninput: {test_input}\npipeline1: '
+ f'{pipeline1(test_input)}\npipeline2: {pipeline2(test_input)}\n'
+ )
+
+ @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+ def test_run_with_model_from_modelhub(self):
+ # sbert
+ for language in ['zh', 'en']:
+ print(self.model_id_sbert[language])
+ model = Model.from_pretrained(self.model_id_sbert[language])
+ preprocessor = FillMaskPreprocessor(
+ model.model_dir,
+ first_sequence='sentence',
+ second_sequence=None)
+ pipeline_ins = pipeline(
+ task=Tasks.fill_mask, model=model, preprocessor=preprocessor)
+ print(
+ f'\nori_text: {self.ori_texts[language]}\ninput: {self.test_inputs[language]}\npipeline: '
+ f'{pipeline_ins(self.test_inputs[language])}\n')
+
+ # veco
+ model = Model.from_pretrained(self.model_id_veco)
+ preprocessor = FillMaskPreprocessor(
+ model.model_dir, first_sequence='sentence', second_sequence=None)
+ pipeline_ins = pipeline(
+ Tasks.fill_mask, model=model, preprocessor=preprocessor)
+ for language in ['zh', 'en']:
+ ori_text = self.ori_texts[language]
+ test_input = self.test_inputs[language].replace('[MASK]', '')
+ print(f'\nori_text: {ori_text}\ninput: {test_input}\npipeline: '
+ f'{pipeline_ins(test_input)}\n')
+
+ @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+ def test_run_with_model_name(self):
+ # veco
+ pipeline_ins = pipeline(task=Tasks.fill_mask, model=self.model_id_veco)
+ for language in ['zh', 'en']:
+ ori_text = self.ori_texts[language]
+ test_input = self.test_inputs[language].replace('[MASK]', '')
+ print(f'\nori_text: {ori_text}\ninput: {test_input}\npipeline: '
+ f'{pipeline_ins(test_input)}\n')
+
+ # structBert
+ language = 'zh'
+ pipeline_ins = pipeline(
+ task=Tasks.fill_mask, model=self.model_id_sbert[language])
+ print(
+ f'\nori_text: {self.ori_texts[language]}\ninput: {self.test_inputs[language]}\npipeline: '
+ f'{pipeline_ins(self.test_inputs[language])}\n')
+
+ @unittest.skipUnless(test_level() >= 2, 'skip test in current test level')
+ def test_run_with_default_model(self):
+ pipeline_ins = pipeline(task=Tasks.fill_mask)
+ language = 'en'
+ ori_text = self.ori_texts[language]
+ test_input = self.test_inputs[language].replace('[MASK]', '')
+ print(f'\nori_text: {ori_text}\ninput: {test_input}\npipeline: '
+ f'{pipeline_ins(test_input)}\n')
+
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/tests/pipelines/test_image_captioning.py b/tests/pipelines/test_image_captioning.py
index 74a65806..5fa6ff49 100644
--- a/tests/pipelines/test_image_captioning.py
+++ b/tests/pipelines/test_image_captioning.py
@@ -1,10 +1,7 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
-import os
-import tempfile
import unittest
-from modelscope.fileio import File
from modelscope.pipelines import pipeline
from modelscope.utils.constant import Tasks
from modelscope.utils.test_utils import test_level
@@ -12,23 +9,13 @@ from modelscope.utils.test_utils import test_level
class ImageCaptionTest(unittest.TestCase):
- @unittest.skip('skip before model is restored in model hub')
+ @unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
def test_run(self):
- model = 'https://ofa-beijing.oss-cn-beijing.aliyuncs.com/checkpoints/caption_large_best_clean.pt'
-
- os.system(
- 'wget https://jirenmr.oss-cn-zhangjiakou.aliyuncs.com/ofa/BPE.zip'
- )
- os.system('unzip BPE.zip')
- bpe_dir = './BPE'
-
- with tempfile.NamedTemporaryFile('wb', suffix='.pb') as ofile:
- ofile.write(File.read(model))
- img_captioning = pipeline(
- Tasks.image_captioning, model=ofile.name, bpe_dir=bpe_dir)
-
- result = img_captioning('data/test/images/image_matting.png')
- print(result['caption'])
+ img_captioning = pipeline(
+ Tasks.image_captioning,
+ model='damo/ofa_image-caption_coco_large_en')
+ result = img_captioning('data/test/images/image_captioning.png')
+ print(result['caption'])
if __name__ == '__main__':
diff --git a/tests/pipelines/test_image_matting.py b/tests/pipelines/test_image_matting.py
index 6e102d00..1b547e14 100644
--- a/tests/pipelines/test_image_matting.py
+++ b/tests/pipelines/test_image_matting.py
@@ -10,7 +10,6 @@ from modelscope.fileio import File
from modelscope.pipelines import pipeline
from modelscope.pydatasets import PyDataset
from modelscope.utils.constant import ModelFile, Tasks
-from modelscope.utils.hub import get_model_cache_dir
from modelscope.utils.test_utils import test_level
@@ -18,11 +17,6 @@ class ImageMattingTest(unittest.TestCase):
def setUp(self) -> None:
self.model_id = 'damo/cv_unet_image-matting'
- # switch to False if downloading everytime is not desired
- purge_cache = True
- if purge_cache:
- shutil.rmtree(
- get_model_cache_dir(self.model_id), ignore_errors=True)
@unittest.skip('deprecated, download model from model hub instead')
def test_run_with_direct_file_download(self):
@@ -58,7 +52,7 @@ class ImageMattingTest(unittest.TestCase):
cv2.imwrite('result.png', result['output_png'])
print(f'Output written to {osp.abspath("result.png")}')
- @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+ @unittest.skipUnless(test_level() >= 2, 'skip test in current test level')
def test_run_modelhub_default_model(self):
img_matting = pipeline(Tasks.image_matting)
@@ -66,6 +60,17 @@ class ImageMattingTest(unittest.TestCase):
cv2.imwrite('result.png', result['output_png'])
print(f'Output written to {osp.abspath("result.png")}')
+ @unittest.skipUnless(test_level() >= 2, 'skip test in current test level')
+ def test_run_with_modelscope_dataset(self):
+ dataset = PyDataset.load('beans', split='train', target='image')
+ img_matting = pipeline(Tasks.image_matting, model=self.model_id)
+ result = img_matting(dataset)
+ for i in range(10):
+ cv2.imwrite(f'result_{i}.png', next(result)['output_png'])
+ print(
+ f'Output written to dir: {osp.dirname(osp.abspath("result_0.png"))}'
+ )
+
if __name__ == '__main__':
unittest.main()
diff --git a/tests/pipelines/test_nli.py b/tests/pipelines/test_nli.py
new file mode 100644
index 00000000..0c8da8b4
--- /dev/null
+++ b/tests/pipelines/test_nli.py
@@ -0,0 +1,52 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import unittest
+
+from modelscope.hub.snapshot_download import snapshot_download
+from modelscope.models import Model
+from modelscope.models.nlp import SbertForNLI
+from modelscope.pipelines import NLIPipeline, pipeline
+from modelscope.preprocessors import NLIPreprocessor
+from modelscope.utils.constant import Tasks
+from modelscope.utils.test_utils import test_level
+
+
+class NLITest(unittest.TestCase):
+ model_id = 'damo/nlp_structbert_nli_chinese-base'
+ sentence1 = '四川商务职业学院和四川财经职业学院哪个好?'
+ sentence2 = '四川商务职业学院商务管理在哪个校区?'
+
+ @unittest.skipUnless(test_level() >= 2, 'skip test in current test level')
+ def test_run_with_direct_file_download(self):
+ cache_path = snapshot_download(self.model_id)
+ tokenizer = NLIPreprocessor(cache_path)
+ model = SbertForNLI(cache_path, tokenizer=tokenizer)
+ pipeline1 = NLIPipeline(model, preprocessor=tokenizer)
+ pipeline2 = pipeline(Tasks.nli, model=model, preprocessor=tokenizer)
+ print(f'sentence1: {self.sentence1}\nsentence2: {self.sentence2}\n'
+ f'pipeline1:{pipeline1(input=(self.sentence1, self.sentence2))}')
+ print()
+ print(
+ f'sentence1: {self.sentence1}\nsentence2: {self.sentence2}\n'
+ f'pipeline1: {pipeline2(input=(self.sentence1, self.sentence2))}')
+
+ @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+ def test_run_with_model_from_modelhub(self):
+ model = Model.from_pretrained(self.model_id)
+ tokenizer = NLIPreprocessor(model.model_dir)
+ pipeline_ins = pipeline(
+ task=Tasks.nli, model=model, preprocessor=tokenizer)
+ print(pipeline_ins(input=(self.sentence1, self.sentence2)))
+
+ @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+ def test_run_with_model_name(self):
+ pipeline_ins = pipeline(task=Tasks.nli, model=self.model_id)
+ print(pipeline_ins(input=(self.sentence1, self.sentence2)))
+
+ @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+ def test_run_with_default_model(self):
+ pipeline_ins = pipeline(task=Tasks.nli)
+ print(pipeline_ins(input=(self.sentence1, self.sentence2)))
+
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/tests/pipelines/test_ocr_detection.py b/tests/pipelines/test_ocr_detection.py
new file mode 100644
index 00000000..986961b7
--- /dev/null
+++ b/tests/pipelines/test_ocr_detection.py
@@ -0,0 +1,37 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import os.path as osp
+import shutil
+import sys
+import tempfile
+import unittest
+from typing import Any, Dict, List, Tuple, Union
+
+import cv2
+import numpy as np
+import PIL
+
+from modelscope.pipelines import pipeline
+from modelscope.pipelines.base import Pipeline
+from modelscope.utils.constant import Tasks
+from modelscope.utils.test_utils import test_level
+
+
+class OCRDetectionTest(unittest.TestCase):
+
+ def setUp(self) -> None:
+ self.model_id = 'damo/cv_resnet18_ocr-detection-line-level_damo'
+ self.test_image = 'data/test/images/ocr_detection.jpg'
+
+ def pipeline_inference(self, pipeline: Pipeline, input_location: str):
+ result = pipeline(input_location)
+ print('ocr detection results: ')
+ print(result)
+
+ @unittest.skipUnless(test_level() >= 2, 'skip test in current test level')
+ def test_run_modelhub_default_model(self):
+ ocr_detection = pipeline(Tasks.ocr_detection)
+ self.pipeline_inference(ocr_detection, self.test_image)
+
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/tests/pipelines/test_person_image_cartoon.py b/tests/pipelines/test_person_image_cartoon.py
index ed912b1c..f47ca008 100644
--- a/tests/pipelines/test_person_image_cartoon.py
+++ b/tests/pipelines/test_person_image_cartoon.py
@@ -42,7 +42,7 @@ class ImageCartoonTest(unittest.TestCase):
img_cartoon = pipeline(Tasks.image_generation, model=self.model_id)
self.pipeline_inference(img_cartoon, self.test_image)
- @unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
+ @unittest.skipUnless(test_level() >= 2, 'skip test in current test level')
def test_run_modelhub_default_model(self):
img_cartoon = pipeline(Tasks.image_generation)
self.pipeline_inference(img_cartoon, self.test_image)
diff --git a/tests/pipelines/test_sentence_similarity.py b/tests/pipelines/test_sentence_similarity.py
index ac2ff4fb..df38593f 100644
--- a/tests/pipelines/test_sentence_similarity.py
+++ b/tests/pipelines/test_sentence_similarity.py
@@ -2,14 +2,12 @@
import shutil
import unittest
-from maas_hub.snapshot_download import snapshot_download
-
+from modelscope.hub.snapshot_download import snapshot_download
from modelscope.models import Model
from modelscope.models.nlp import SbertForSentenceSimilarity
from modelscope.pipelines import SentenceSimilarityPipeline, pipeline
from modelscope.preprocessors import SequenceClassificationPreprocessor
from modelscope.utils.constant import Tasks
-from modelscope.utils.hub import get_model_cache_dir
from modelscope.utils.test_utils import test_level
@@ -18,14 +16,7 @@ class SentenceSimilarityTest(unittest.TestCase):
sentence1 = '今天气温比昨天高么?'
sentence2 = '今天湿度比昨天高么?'
- def setUp(self) -> None:
- # switch to False if downloading everytime is not desired
- purge_cache = True
- if purge_cache:
- shutil.rmtree(
- get_model_cache_dir(self.model_id), ignore_errors=True)
-
- @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+ @unittest.skipUnless(test_level() >= 2, 'skip test in current test level')
def test_run(self):
cache_path = snapshot_download(self.model_id)
tokenizer = SequenceClassificationPreprocessor(cache_path)
@@ -41,7 +32,7 @@ class SentenceSimilarityTest(unittest.TestCase):
f'sentence1: {self.sentence1}\nsentence2: {self.sentence2}\n'
f'pipeline1: {pipeline2(input=(self.sentence1, self.sentence2))}')
- @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+ @unittest.skipUnless(test_level() >= 2, 'skip test in current test level')
def test_run_with_model_from_modelhub(self):
model = Model.from_pretrained(self.model_id)
tokenizer = SequenceClassificationPreprocessor(model.model_dir)
@@ -57,7 +48,7 @@ class SentenceSimilarityTest(unittest.TestCase):
task=Tasks.sentence_similarity, model=self.model_id)
print(pipeline_ins(input=(self.sentence1, self.sentence2)))
- @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+ @unittest.skipUnless(test_level() >= 2, 'skip test in current test level')
def test_run_with_default_model(self):
pipeline_ins = pipeline(task=Tasks.sentence_similarity)
print(pipeline_ins(input=(self.sentence1, self.sentence2)))
diff --git a/tests/pipelines/test_sentiment_classification.py b/tests/pipelines/test_sentiment_classification.py
new file mode 100644
index 00000000..0ba22d5c
--- /dev/null
+++ b/tests/pipelines/test_sentiment_classification.py
@@ -0,0 +1,58 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import unittest
+
+from modelscope.hub.snapshot_download import snapshot_download
+from modelscope.models import Model
+from modelscope.models.nlp import SbertForSentimentClassification
+from modelscope.pipelines import SentimentClassificationPipeline, pipeline
+from modelscope.preprocessors import SentimentClassificationPreprocessor
+from modelscope.utils.constant import Tasks
+from modelscope.utils.test_utils import test_level
+
+
+class SentimentClassificationTest(unittest.TestCase):
+ model_id = 'damo/nlp_structbert_sentiment-classification_chinese-base'
+ sentence1 = '启动的时候很大声音,然后就会听到1.2秒的卡察的声音,类似齿轮摩擦的声音'
+
+ @unittest.skipUnless(test_level() >= 2, 'skip test in current test level')
+ def test_run_with_direct_file_download(self):
+ cache_path = snapshot_download(self.model_id)
+ tokenizer = SentimentClassificationPreprocessor(cache_path)
+ model = SbertForSentimentClassification(
+ cache_path, tokenizer=tokenizer)
+ pipeline1 = SentimentClassificationPipeline(
+ model, preprocessor=tokenizer)
+ pipeline2 = pipeline(
+ Tasks.sentiment_classification,
+ model=model,
+ preprocessor=tokenizer)
+ print(f'sentence1: {self.sentence1}\n'
+ f'pipeline1:{pipeline1(input=self.sentence1)}')
+ print()
+ print(f'sentence1: {self.sentence1}\n'
+ f'pipeline1: {pipeline2(input=self.sentence1)}')
+
+ @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+ def test_run_with_model_from_modelhub(self):
+ model = Model.from_pretrained(self.model_id)
+ tokenizer = SentimentClassificationPreprocessor(model.model_dir)
+ pipeline_ins = pipeline(
+ task=Tasks.sentiment_classification,
+ model=model,
+ preprocessor=tokenizer)
+ print(pipeline_ins(input=self.sentence1))
+
+ @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+ def test_run_with_model_name(self):
+ pipeline_ins = pipeline(
+ task=Tasks.sentiment_classification, model=self.model_id)
+ print(pipeline_ins(input=self.sentence1))
+
+ @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+ def test_run_with_default_model(self):
+ pipeline_ins = pipeline(task=Tasks.sentiment_classification)
+ print(pipeline_ins(input=self.sentence1))
+
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/tests/pipelines/test_speech_signal_process.py b/tests/pipelines/test_speech_signal_process.py
index 8b5c9468..1b070fda 100644
--- a/tests/pipelines/test_speech_signal_process.py
+++ b/tests/pipelines/test_speech_signal_process.py
@@ -3,9 +3,10 @@ import shutil
import unittest
from modelscope.fileio import File
+from modelscope.metainfo import Pipelines
from modelscope.pipelines import pipeline
from modelscope.utils.constant import Tasks
-from modelscope.utils.hub import get_model_cache_dir
+from modelscope.utils.test_utils import test_level
NEAREND_MIC_URL = 'https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/AEC/sample_audio/nearend_mic.wav'
FAREND_SPEECH_URL = 'https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/AEC/sample_audio/farend_speech.wav'
@@ -30,14 +31,10 @@ class SpeechSignalProcessTest(unittest.TestCase):
def setUp(self) -> None:
self.model_id = 'damo/speech_dfsmn_aec_psm_16k'
- # switch to False if downloading everytime is not desired
- purge_cache = True
- if purge_cache:
- shutil.rmtree(
- get_model_cache_dir(self.model_id), ignore_errors=True)
# A temporary hack to provide c++ lib. Download it first.
download(AEC_LIB_URL, AEC_LIB_FILE)
+ @unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
def test_run(self):
download(NEAREND_MIC_URL, NEAREND_MIC_FILE)
download(FAREND_SPEECH_URL, FAREND_SPEECH_FILE)
@@ -48,7 +45,7 @@ class SpeechSignalProcessTest(unittest.TestCase):
aec = pipeline(
Tasks.speech_signal_process,
model=self.model_id,
- pipeline_name=r'speech_dfsmn_aec_psm_16k')
+ pipeline_name=Pipelines.speech_dfsmn_aec_psm_16k)
aec(input, output_path='output.wav')
diff --git a/tests/pipelines/test_text_classification.py b/tests/pipelines/test_text_classification.py
index 01fdd29b..9e5f15b9 100644
--- a/tests/pipelines/test_text_classification.py
+++ b/tests/pipelines/test_text_classification.py
@@ -1,17 +1,12 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
import shutil
import unittest
-import zipfile
-from pathlib import Path
-from modelscope.fileio import File
from modelscope.models import Model
-from modelscope.models.nlp import BertForSequenceClassification
from modelscope.pipelines import SequenceClassificationPipeline, pipeline
from modelscope.preprocessors import SequenceClassificationPreprocessor
from modelscope.pydatasets import PyDataset
from modelscope.utils.constant import Hubs, Tasks
-from modelscope.utils.hub import get_model_cache_dir
from modelscope.utils.test_utils import test_level
@@ -19,11 +14,6 @@ class SequenceClassificationTest(unittest.TestCase):
def setUp(self) -> None:
self.model_id = 'damo/bert-base-sst2'
- # switch to False if downloading everytime is not desired
- purge_cache = True
- if purge_cache:
- shutil.rmtree(
- get_model_cache_dir(self.model_id), ignore_errors=True)
def predict(self, pipeline_ins: SequenceClassificationPipeline):
from easynlp.appzoo import load_dataset
@@ -44,31 +34,6 @@ class SequenceClassificationTest(unittest.TestCase):
break
print(r)
- @unittest.skipUnless(test_level() >= 2, 'skip test in current test level')
- def test_run(self):
- model_url = 'https://atp-modelzoo-sh.oss-cn-shanghai.aliyuncs.com' \
- '/release/easynlp_modelzoo/alibaba-pai/bert-base-sst2.zip'
- cache_path_str = r'.cache/easynlp/bert-base-sst2.zip'
- cache_path = Path(cache_path_str)
-
- if not cache_path.exists():
- cache_path.parent.mkdir(parents=True, exist_ok=True)
- cache_path.touch(exist_ok=True)
- with cache_path.open('wb') as ofile:
- ofile.write(File.read(model_url))
-
- with zipfile.ZipFile(cache_path_str, 'r') as zipf:
- zipf.extractall(cache_path.parent)
- path = r'.cache/easynlp/'
- model = BertForSequenceClassification(path)
- preprocessor = SequenceClassificationPreprocessor(
- path, first_sequence='sentence', second_sequence=None)
- pipeline1 = SequenceClassificationPipeline(model, preprocessor)
- self.predict(pipeline1)
- pipeline2 = pipeline(
- Tasks.text_classification, model=model, preprocessor=preprocessor)
- print(pipeline2('Hello world!'))
-
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
def test_run_with_model_from_modelhub(self):
model = Model.from_pretrained(self.model_id)
@@ -86,18 +51,26 @@ class SequenceClassificationTest(unittest.TestCase):
task=Tasks.text_classification, model=self.model_id)
result = text_classification(
PyDataset.load(
- 'glue', name='sst2', target='sentence', hub=Hubs.huggingface))
+ 'glue',
+ subset_name='sst2',
+ split='train',
+ target='sentence',
+ hub=Hubs.huggingface))
self.printDataset(result)
- @unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
+ @unittest.skipUnless(test_level() >= 2, 'skip test in current test level')
def test_run_with_default_model(self):
text_classification = pipeline(task=Tasks.text_classification)
result = text_classification(
PyDataset.load(
- 'glue', name='sst2', target='sentence', hub=Hubs.huggingface))
+ 'glue',
+ subset_name='sst2',
+ split='train',
+ target='sentence',
+ hub=Hubs.huggingface))
self.printDataset(result)
- @unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
+ @unittest.skipUnless(test_level() >= 2, 'skip test in current test level')
def test_run_with_dataset(self):
model = Model.from_pretrained(self.model_id)
preprocessor = SequenceClassificationPreprocessor(
@@ -105,9 +78,21 @@ class SequenceClassificationTest(unittest.TestCase):
text_classification = pipeline(
Tasks.text_classification, model=model, preprocessor=preprocessor)
# loaded from huggingface dataset
- # TODO: rename parameter as dataset_name and subset_name
dataset = PyDataset.load(
- 'glue', name='sst2', target='sentence', hub=Hubs.huggingface)
+ 'glue',
+ subset_name='sst2',
+ split='train',
+ target='sentence',
+ hub=Hubs.huggingface)
+ result = text_classification(dataset)
+ self.printDataset(result)
+
+ @unittest.skipUnless(test_level() >= 2, 'skip test in current test level')
+ def test_run_with_modelscope_dataset(self):
+ text_classification = pipeline(task=Tasks.text_classification)
+ # loaded from modelscope dataset
+ dataset = PyDataset.load(
+ 'squad', split='train', target='context', hub=Hubs.modelscope)
result = text_classification(dataset)
self.printDataset(result)
diff --git a/tests/pipelines/test_text_generation.py b/tests/pipelines/test_text_generation.py
index fbdd165f..9df3b8bb 100644
--- a/tests/pipelines/test_text_generation.py
+++ b/tests/pipelines/test_text_generation.py
@@ -1,8 +1,7 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
import unittest
-from maas_hub.snapshot_download import snapshot_download
-
+from modelscope.hub.snapshot_download import snapshot_download
from modelscope.models import Model
from modelscope.models.nlp import PalmForTextGeneration
from modelscope.pipelines import TextGenerationPipeline, pipeline
@@ -69,7 +68,7 @@ class TextGenerationTest(unittest.TestCase):
pipeline_ins = pipeline(task=Tasks.text_generation, model=model_id)
print(pipeline_ins(input))
- @unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
+ @unittest.skipUnless(test_level() >= 2, 'skip test in current test level')
def test_run_with_default_model(self):
pipeline_ins = pipeline(task=Tasks.text_generation)
print(pipeline_ins(self.input_zh))
diff --git a/tests/pipelines/test_text_to_speech.py b/tests/pipelines/test_text_to_speech.py
index c9b988a1..e92047d6 100644
--- a/tests/pipelines/test_text_to_speech.py
+++ b/tests/pipelines/test_text_to_speech.py
@@ -1,7 +1,5 @@
-import time
import unittest
-import json
import tensorflow as tf
# NOTICE: Tensorflow 1.15 seems not so compatible with pytorch.
# A segmentation fault may be raise by pytorch cpp library
@@ -10,20 +8,20 @@ import tensorflow as tf
import torch
from scipy.io.wavfile import write
-from modelscope.fileio import File
-from modelscope.models import Model, build_model
-from modelscope.models.audio.tts.am import SambertNetHifi16k
-from modelscope.models.audio.tts.vocoder import AttrDict, Hifigan16k
+from modelscope.metainfo import Pipelines, Preprocessors
+from modelscope.models import Model
from modelscope.pipelines import pipeline
from modelscope.preprocessors import build_preprocessor
-from modelscope.utils.constant import Fields, InputFields, Tasks
+from modelscope.utils.constant import Fields
from modelscope.utils.logger import get_logger
+from modelscope.utils.test_utils import test_level
logger = get_logger()
class TextToSpeechSambertHifigan16kPipelineTest(unittest.TestCase):
+ @unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
def test_pipeline(self):
lang_type = 'pinyin'
text = '明天天气怎么样'
@@ -32,7 +30,7 @@ class TextToSpeechSambertHifigan16kPipelineTest(unittest.TestCase):
voc_model_id = 'damo/speech_hifigan16k_tts_zhitian_emo'
cfg_preprocessor = dict(
- type='text_to_tacotron_symbols',
+ type=Preprocessors.text_to_tacotron_symbols,
model_name=preprocessor_model_id,
lang_type=lang_type)
preprocessor = build_preprocessor(cfg_preprocessor, Fields.audio)
@@ -45,7 +43,7 @@ class TextToSpeechSambertHifigan16kPipelineTest(unittest.TestCase):
self.assertTrue(voc is not None)
sambert_tts = pipeline(
- pipeline_name='tts-sambert-hifigan-16k',
+ pipeline_name=Pipelines.sambert_hifigan_16k_tts,
config_file='',
model=[am, voc],
preprocessor=preprocessor)
diff --git a/tests/pipelines/test_word_segmentation.py b/tests/pipelines/test_word_segmentation.py
index 4ec2bf29..d33e4bdb 100644
--- a/tests/pipelines/test_word_segmentation.py
+++ b/tests/pipelines/test_word_segmentation.py
@@ -2,14 +2,12 @@
import shutil
import unittest
-from maas_hub.snapshot_download import snapshot_download
-
+from modelscope.hub.snapshot_download import snapshot_download
from modelscope.models import Model
-from modelscope.models.nlp import StructBertForTokenClassification
+from modelscope.models.nlp import SbertForTokenClassification
from modelscope.pipelines import WordSegmentationPipeline, pipeline
from modelscope.preprocessors import TokenClassifcationPreprocessor
from modelscope.utils.constant import Tasks
-from modelscope.utils.hub import get_model_cache_dir
from modelscope.utils.test_utils import test_level
@@ -17,19 +15,11 @@ class WordSegmentationTest(unittest.TestCase):
model_id = 'damo/nlp_structbert_word-segmentation_chinese-base'
sentence = '今天天气不错,适合出去游玩'
- def setUp(self) -> None:
- # switch to False if downloading everytime is not desired
- purge_cache = True
- if purge_cache:
- shutil.rmtree(
- get_model_cache_dir(self.model_id), ignore_errors=True)
-
@unittest.skipUnless(test_level() >= 2, 'skip test in current test level')
def test_run_by_direct_model_download(self):
cache_path = snapshot_download(self.model_id)
tokenizer = TokenClassifcationPreprocessor(cache_path)
- model = StructBertForTokenClassification(
- cache_path, tokenizer=tokenizer)
+ model = SbertForTokenClassification(cache_path, tokenizer=tokenizer)
pipeline1 = WordSegmentationPipeline(model, preprocessor=tokenizer)
pipeline2 = pipeline(
Tasks.word_segmentation, model=model, preprocessor=tokenizer)
@@ -46,13 +36,13 @@ class WordSegmentationTest(unittest.TestCase):
task=Tasks.word_segmentation, model=model, preprocessor=tokenizer)
print(pipeline_ins(input=self.sentence))
- @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+ @unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
def test_run_with_model_name(self):
pipeline_ins = pipeline(
task=Tasks.word_segmentation, model=self.model_id)
print(pipeline_ins(input=self.sentence))
- @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+ @unittest.skipUnless(test_level() >= 2, 'skip test in current test level')
def test_run_with_default_model(self):
pipeline_ins = pipeline(task=Tasks.word_segmentation)
print(pipeline_ins(input=self.sentence))
diff --git a/tests/preprocessors/test_image.py b/tests/preprocessors/test_image.py
index 21ae780e..4d66c171 100644
--- a/tests/preprocessors/test_image.py
+++ b/tests/preprocessors/test_image.py
@@ -5,7 +5,6 @@ import unittest
import PIL
from modelscope.preprocessors import load_image
-from modelscope.utils.logger import get_logger
class ImagePreprocessorTest(unittest.TestCase):
diff --git a/tests/preprocessors/test_text_to_speech.py b/tests/preprocessors/test_text_to_speech.py
index 18b66987..fd2473fd 100644
--- a/tests/preprocessors/test_text_to_speech.py
+++ b/tests/preprocessors/test_text_to_speech.py
@@ -1,6 +1,7 @@
import shutil
import unittest
+from modelscope.metainfo import Preprocessors
from modelscope.preprocessors import build_preprocessor
from modelscope.utils.constant import Fields, InputFields
from modelscope.utils.logger import get_logger
@@ -14,7 +15,7 @@ class TtsPreprocessorTest(unittest.TestCase):
lang_type = 'pinyin'
text = '今天天气不错,我们去散步吧。'
cfg = dict(
- type='text_to_tacotron_symbols',
+ type=Preprocessors.text_to_tacotron_symbols,
model_name='damo/speech_binary_tts_frontend_resource',
lang_type=lang_type)
preprocessor = build_preprocessor(cfg, Fields.audio)
diff --git a/tests/pydatasets/test_py_dataset.py b/tests/pydatasets/test_py_dataset.py
index 7accd814..e84f240a 100644
--- a/tests/pydatasets/test_py_dataset.py
+++ b/tests/pydatasets/test_py_dataset.py
@@ -2,42 +2,112 @@ import unittest
import datasets as hfdata
+from modelscope.models import Model
+from modelscope.preprocessors import SequenceClassificationPreprocessor
+from modelscope.preprocessors.base import Preprocessor
from modelscope.pydatasets import PyDataset
+from modelscope.utils.constant import Hubs
+from modelscope.utils.test_utils import require_tf, require_torch, test_level
-class PyDatasetTest(unittest.TestCase):
+class ImgPreprocessor(Preprocessor):
+
+ def __init__(self, *args, **kwargs):
+ super().__init__(*args, **kwargs)
+ self.path_field = kwargs.pop('image_path', 'image_path')
+ self.width = kwargs.pop('width', 'width')
+ self.height = kwargs.pop('height', 'width')
- def setUp(self):
- # ds1 initialized from in memory json
- self.json_data = {
- 'dummy': [{
- 'a': i,
- 'x': i * 10,
- 'c': i * 100
- } for i in range(1, 11)]
+ def __call__(self, data):
+ import cv2
+ image_path = data.get(self.path_field)
+ if not image_path:
+ return None
+ img = cv2.imread(image_path)
+ return {
+ 'image':
+ cv2.resize(img,
+ (data.get(self.height, 128), data.get(self.width, 128)))
}
- hfds1 = hfdata.Dataset.from_dict(self.json_data)
- self.ds1 = PyDataset.from_hf_dataset(hfds1)
- # ds2 initialized from hg hub
- hfds2 = hfdata.load_dataset(
- 'glue', 'mrpc', revision='2.0.0', split='train')
- self.ds2 = PyDataset.from_hf_dataset(hfds2)
- def tearDown(self):
- pass
+class PyDatasetTest(unittest.TestCase):
+
+ @unittest.skipUnless(test_level() >= 2, 'skip test in current test level')
+ def test_ds_basic(self):
+ ms_ds_full = PyDataset.load('squad')
+ ms_ds_full_hf = hfdata.load_dataset('squad')
+ ms_ds_train = PyDataset.load('squad', split='train')
+ ms_ds_train_hf = hfdata.load_dataset('squad', split='train')
+ ms_image_train = PyDataset.from_hf_dataset(
+ hfdata.load_dataset('beans', split='train'))
+ self.assertEqual(ms_ds_full['train'][0], ms_ds_full_hf['train'][0])
+ self.assertEqual(ms_ds_full['validation'][0],
+ ms_ds_full_hf['validation'][0])
+ self.assertEqual(ms_ds_train[0], ms_ds_train_hf[0])
+ print(next(iter(ms_ds_full['train'])))
+ print(next(iter(ms_ds_train)))
+ print(next(iter(ms_image_train)))
+
+ @unittest.skipUnless(test_level() >= 2, 'skip test in current test level')
+ @require_torch
+ def test_to_torch_dataset_text(self):
+ model_id = 'damo/bert-base-sst2'
+ nlp_model = Model.from_pretrained(model_id)
+ preprocessor = SequenceClassificationPreprocessor(
+ nlp_model.model_dir,
+ first_sequence='context',
+ second_sequence=None)
+ ms_ds_train = PyDataset.load('squad', split='train')
+ pt_dataset = ms_ds_train.to_torch_dataset(preprocessors=preprocessor)
+ import torch
+ dataloader = torch.utils.data.DataLoader(pt_dataset, batch_size=5)
+ print(next(iter(dataloader)))
- def test_to_hf_dataset(self):
- hfds = self.ds1.to_hf_dataset()
- hfds1 = hfdata.Dataset.from_dict(self.json_data)
- self.assertEqual(hfds.data, hfds1.data)
+ @unittest.skipUnless(test_level() >= 2, 'skip test in current test level')
+ @require_tf
+ def test_to_tf_dataset_text(self):
+ import tensorflow as tf
+ tf.compat.v1.enable_eager_execution()
+ model_id = 'damo/bert-base-sst2'
+ nlp_model = Model.from_pretrained(model_id)
+ preprocessor = SequenceClassificationPreprocessor(
+ nlp_model.model_dir,
+ first_sequence='context',
+ second_sequence=None)
+ ms_ds_train = PyDataset.load('squad', split='train')
+ tf_dataset = ms_ds_train.to_tf_dataset(
+ batch_size=5,
+ shuffle=True,
+ preprocessors=preprocessor,
+ drop_remainder=True)
+ print(next(iter(tf_dataset)))
- # simple map function
- hfds = hfds.map(lambda e: {'new_feature': e['dummy']['a']})
- self.assertEqual(len(hfds['new_feature']), 10)
+ @unittest.skipUnless(test_level() >= 2, 'skip test in current test level')
+ @require_torch
+ def test_to_torch_dataset_img(self):
+ ms_image_train = PyDataset.from_hf_dataset(
+ hfdata.load_dataset('beans', split='train'))
+ pt_dataset = ms_image_train.to_torch_dataset(
+ preprocessors=ImgPreprocessor(
+ image_path='image_file_path', label='labels'))
+ import torch
+ dataloader = torch.utils.data.DataLoader(pt_dataset, batch_size=5)
+ print(next(iter(dataloader)))
- hfds2 = self.ds2.to_hf_dataset()
- self.assertTrue(hfds2[0]['sentence1'].startswith('Amrozi'))
+ @unittest.skipUnless(test_level() >= 2, 'skip test in current test level')
+ @require_tf
+ def test_to_tf_dataset_img(self):
+ import tensorflow as tf
+ tf.compat.v1.enable_eager_execution()
+ ms_image_train = PyDataset.load('beans', split='train')
+ tf_dataset = ms_image_train.to_tf_dataset(
+ batch_size=5,
+ shuffle=True,
+ preprocessors=ImgPreprocessor(image_path='image_file_path'),
+ drop_remainder=True,
+ label_cols='labels')
+ print(next(iter(tf_dataset)))
if __name__ == '__main__':
diff --git a/tests/run.py b/tests/run.py
index a904ba8e..38c5a897 100644
--- a/tests/run.py
+++ b/tests/run.py
@@ -61,7 +61,7 @@ if __name__ == '__main__':
parser.add_argument(
'--test_dir', default='tests', help='directory to be tested')
parser.add_argument(
- '--level', default=0, help='2 -- all, 1 -- p1, 0 -- p0')
+ '--level', default=0, type=int, help='2 -- all, 1 -- p1, 0 -- p0')
args = parser.parse_args()
set_test_level(args.level)
logger.info(f'TEST LEVEL: {test_level()}')
diff --git a/tests/utils/test_hub_operation.py b/tests/utils/test_hub_operation.py
deleted file mode 100644
index f432a60c..00000000
--- a/tests/utils/test_hub_operation.py
+++ /dev/null
@@ -1,50 +0,0 @@
-# Copyright (c) Alibaba, Inc. and its affiliates.
-import os.path as osp
-import unittest
-
-from maas_hub.maas_api import MaasApi
-from maas_hub.repository import Repository
-
-USER_NAME = 'maasadmin'
-PASSWORD = '12345678'
-
-
-class HubOperationTest(unittest.TestCase):
-
- def setUp(self):
- self.api = MaasApi()
- # note this is temporary before official account management is ready
- self.api.login(USER_NAME, PASSWORD)
-
- @unittest.skip('to be used for local test only')
- def test_model_repo_creation(self):
- # change to proper model names before use
- model_name = 'cv_unet_person-image-cartoon_compound-models'
- model_chinese_name = '达摩卡通化模型'
- model_org = 'damo'
- try:
- self.api.create_model(
- owner=model_org,
- name=model_name,
- chinese_name=model_chinese_name,
- visibility=5, # 1-private, 5-public
- license='apache-2.0')
- # TODO: support proper name duplication checking
- except KeyError as ke:
- if ke.args[0] == 'name':
- print(f'model {self.model_name} already exists, ignore')
- else:
- raise
-
- # Note that this can be done via git operation once model repo
- # has been created. Git-Op is the RECOMMENDED model upload approach
- @unittest.skip('to be used for local test only')
- def test_model_upload(self):
- local_path = '/path/to/local/model/directory'
- assert osp.exists(local_path), 'Local model directory not exist.'
- repo = Repository(local_dir=local_path)
- repo.push_to_hub(commit_message='Upload model files')
-
-
-if __name__ == '__main__':
- unittest.main()