diff --git a/data/test/images/image_captioning.png b/data/test/images/image_captioning.png new file mode 100644 index 00000000..de3f1918 --- /dev/null +++ b/data/test/images/image_captioning.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:af83a94899a6d23339c3ecc5c4c58c57c835af57b531a2f4c50461184f820141 +size 603621 diff --git a/data/test/images/ocr_detection.jpg b/data/test/images/ocr_detection.jpg new file mode 100644 index 00000000..c347810e --- /dev/null +++ b/data/test/images/ocr_detection.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c8435db5583400be5d11a2c17910c96133b462c8a99ccaf0e19f4aac34e0a94 +size 141149 diff --git a/data/test/videos/action_recognition_test_video.mp4 b/data/test/videos/action_recognition_test_video.mp4 new file mode 100644 index 00000000..9197b770 --- /dev/null +++ b/data/test/videos/action_recognition_test_video.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:24dc4237b1197321ee8486bb983fa01fd47e2b4afdb3c2df24229e5f2bd20119 +size 1475924 diff --git a/modelscope/pipelines/nlp/space/__init__.py b/modelscope/hub/__init__.py similarity index 100% rename from modelscope/pipelines/nlp/space/__init__.py rename to modelscope/hub/__init__.py diff --git a/modelscope/hub/api.py b/modelscope/hub/api.py new file mode 100644 index 00000000..104eafbd --- /dev/null +++ b/modelscope/hub/api.py @@ -0,0 +1,265 @@ +import imp +import os +import pickle +import subprocess +from http.cookiejar import CookieJar +from os.path import expanduser +from typing import List, Optional, Tuple, Union + +import requests + +from modelscope.utils.logger import get_logger +from .constants import LOGGER_NAME +from .errors import NotExistError, is_ok, raise_on_error +from .utils.utils import get_endpoint, model_id_to_group_owner_name + +logger = get_logger() + + +class HubApi: + + def __init__(self, endpoint=None): + self.endpoint = endpoint if endpoint is not None else get_endpoint() + + def login( + self, + user_name: str, + password: str, + ) -> tuple(): + """ + Login with username and password + + Args: + username(`str`): user name on modelscope + password(`str`): password + + Returns: + cookies: to authenticate yourself to ModelScope open-api + gitlab token: to access private repos + + + You only have to login once within 30 days. + + + TODO: handle cookies expire + + """ + path = f'{self.endpoint}/api/v1/login' + r = requests.post( + path, json={ + 'username': user_name, + 'password': password + }) + r.raise_for_status() + d = r.json() + raise_on_error(d) + + token = d['Data']['AccessToken'] + cookies = r.cookies + + # save token and cookie + ModelScopeConfig.save_token(token) + ModelScopeConfig.save_cookies(cookies) + ModelScopeConfig.write_to_git_credential(user_name, password) + + return d['Data']['AccessToken'], cookies + + def create_model(self, model_id: str, chinese_name: str, visibility: int, + license: str) -> str: + """ + Create model repo at ModelScopeHub + + Args: + model_id:(`str`): The model id + chinese_name(`str`): chinese name of the model + visibility(`int`): visibility of the model(1-private, 3-internal, 5-public) + license(`str`): license of the model, candidates can be found at: TBA + + Returns: + name of the model created + + + model_id = {owner}/{name} + + """ + cookies = ModelScopeConfig.get_cookies() + if cookies is None: + raise ValueError('Token does not exist, please login first.') + + path = f'{self.endpoint}/api/v1/models' + owner_or_group, name = model_id_to_group_owner_name(model_id) + r = requests.post( + path, + json={ + 'Path': owner_or_group, + 'Name': name, + 'ChineseName': chinese_name, + 'Visibility': visibility, + 'License': license + }, + cookies=cookies) + r.raise_for_status() + raise_on_error(r.json()) + d = r.json() + return d['Data']['Name'] + + def delete_model(self, model_id): + """_summary_ + + Args: + model_id (str): The model id. + + model_id = {owner}/{name} + + """ + cookies = ModelScopeConfig.get_cookies() + path = f'{self.endpoint}/api/v1/models/{model_id}' + + r = requests.delete(path, cookies=cookies) + r.raise_for_status() + raise_on_error(r.json()) + + def get_model_url(self, model_id): + return f'{self.endpoint}/api/v1/models/{model_id}.git' + + def get_model( + self, + model_id: str, + revision: str = 'master', + ) -> str: + """ + Get model information at modelscope_hub + + Args: + model_id(`str`): The model id. + revision(`str`): revision of model + Returns: + The model details information. + Raises: + NotExistError: If the model is not exist, will throw NotExistError + + model_id = {owner}/{name} + + """ + cookies = ModelScopeConfig.get_cookies() + owner_or_group, name = model_id_to_group_owner_name(model_id) + path = f'{self.endpoint}/api/v1/models/{owner_or_group}/{name}?{revision}' + + r = requests.get(path, cookies=cookies) + if r.status_code == 200: + if is_ok(r.json()): + return r.json()['Data'] + else: + raise NotExistError(r.json()['Message']) + else: + r.raise_for_status() + + def get_model_branches_and_tags( + self, + model_id: str, + ) -> Tuple[List[str], List[str]]: + cookies = ModelScopeConfig.get_cookies() + + path = f'{self.endpoint}/api/v1/models/{model_id}/revisions' + r = requests.get(path, cookies=cookies) + r.raise_for_status() + d = r.json() + raise_on_error(d) + info = d['Data'] + branches = [x['Revision'] for x in info['RevisionMap']['Branches'] + ] if info['RevisionMap']['Branches'] else [] + tags = [x['Revision'] for x in info['RevisionMap']['Tags'] + ] if info['RevisionMap']['Tags'] else [] + return branches, tags + + def get_model_files( + self, + model_id: str, + revision: Optional[str] = 'master', + root: Optional[str] = None, + recursive: Optional[str] = False, + use_cookies: Union[bool, CookieJar] = False) -> List[dict]: + + cookies = None + if isinstance(use_cookies, CookieJar): + cookies = use_cookies + elif use_cookies: + cookies = ModelScopeConfig.get_cookies() + if cookies is None: + raise ValueError('Token does not exist, please login first.') + + path = f'{self.endpoint}/api/v1/models/{model_id}/repo/files?Revision={revision}&Recursive={recursive}' + if root is not None: + path = path + f'&Root={root}' + + r = requests.get(path, cookies=cookies) + + r.raise_for_status() + d = r.json() + raise_on_error(d) + + files = [] + for file in d['Data']['Files']: + if file['Name'] == '.gitignore' or file['Name'] == '.gitattributes': + continue + + files.append(file) + return files + + +class ModelScopeConfig: + path_credential = expanduser('~/.modelscope/credentials') + os.makedirs(path_credential, exist_ok=True) + + @classmethod + def save_cookies(cls, cookies: CookieJar): + with open(os.path.join(cls.path_credential, 'cookies'), 'wb+') as f: + pickle.dump(cookies, f) + + @classmethod + def get_cookies(cls): + try: + with open(os.path.join(cls.path_credential, 'cookies'), 'rb') as f: + return pickle.load(f) + except FileNotFoundError: + logger.warn("Auth token does not exist, you'll get authentication \ + error when downloading private model files. Please login first" + ) + + @classmethod + def save_token(cls, token: str): + with open(os.path.join(cls.path_credential, 'token'), 'w+') as f: + f.write(token) + + @classmethod + def get_token(cls) -> Optional[str]: + """ + Get token or None if not existent. + + Returns: + `str` or `None`: The token, `None` if it doesn't exist. + + """ + token = None + try: + with open(os.path.join(cls.path_credential, 'token'), 'r') as f: + token = f.read() + except FileNotFoundError: + pass + return token + + @staticmethod + def write_to_git_credential(username: str, password: str): + with subprocess.Popen( + 'git credential-store store'.split(), + stdin=subprocess.PIPE, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + ) as process: + input_username = f'username={username.lower()}' + input_password = f'password={password}' + + process.stdin.write( + f'url={get_endpoint()}\n{input_username}\n{input_password}\n\n' + .encode('utf-8')) + process.stdin.flush() diff --git a/modelscope/hub/constants.py b/modelscope/hub/constants.py new file mode 100644 index 00000000..a38f9afb --- /dev/null +++ b/modelscope/hub/constants.py @@ -0,0 +1,8 @@ +MODELSCOPE_URL_SCHEME = 'http://' +DEFAULT_MODELSCOPE_DOMAIN = '101.201.119.157:32330' +DEFAULT_MODELSCOPE_GITLAB_DOMAIN = '101.201.119.157:31102' + +DEFAULT_MODELSCOPE_GROUP = 'damo' +MODEL_ID_SEPARATOR = '/' + +LOGGER_NAME = 'ModelScopeHub' diff --git a/modelscope/hub/errors.py b/modelscope/hub/errors.py new file mode 100644 index 00000000..13ea709f --- /dev/null +++ b/modelscope/hub/errors.py @@ -0,0 +1,30 @@ +class NotExistError(Exception): + pass + + +class RequestError(Exception): + pass + + +def is_ok(rsp): + """ Check the request is ok + + Args: + rsp (_type_): The request response body + Failed: {'Code': 10010101004, 'Message': 'get model info failed, err: unauthorized permission', + 'RequestId': '', 'Success': False} + Success: {'Code': 200, 'Data': {}, 'Message': 'success', 'RequestId': '', 'Success': True} + """ + return rsp['Code'] == 200 and rsp['Success'] + + +def raise_on_error(rsp): + """If response error, raise exception + + Args: + rsp (_type_): The server response + """ + if rsp['Code'] == 200 and rsp['Success']: + return True + else: + raise RequestError(rsp['Message']) diff --git a/modelscope/hub/file_download.py b/modelscope/hub/file_download.py new file mode 100644 index 00000000..e5c64f1c --- /dev/null +++ b/modelscope/hub/file_download.py @@ -0,0 +1,254 @@ +import copy +import fnmatch +import logging +import os +import sys +import tempfile +import time +from functools import partial +from hashlib import sha256 +from pathlib import Path +from typing import BinaryIO, Dict, Optional, Union +from uuid import uuid4 + +import json +import requests +from filelock import FileLock +from requests.exceptions import HTTPError +from tqdm import tqdm + +from modelscope import __version__ +from modelscope.utils.logger import get_logger +from .api import HubApi, ModelScopeConfig +from .constants import (DEFAULT_MODELSCOPE_GROUP, LOGGER_NAME, + MODEL_ID_SEPARATOR) +from .errors import NotExistError, RequestError, raise_on_error +from .utils.caching import ModelFileSystemCache +from .utils.utils import (get_cache_dir, get_endpoint, + model_id_to_group_owner_name) + +SESSION_ID = uuid4().hex +logger = get_logger() + + +def model_file_download( + model_id: str, + file_path: str, + revision: Optional[str] = 'master', + cache_dir: Optional[str] = None, + user_agent: Union[Dict, str, None] = None, + local_files_only: Optional[bool] = False, +) -> Optional[str]: # pragma: no cover + """ + Download from a given URL and cache it if it's not already present in the + local cache. + + Given a URL, this function looks for the corresponding file in the local + cache. If it's not there, download it. Then return the path to the cached + file. + + Args: + model_id (`str`): + The model to whom the file to be downloaded belongs. + file_path(`str`): + Path of the file to be downloaded, relative to the root of model repo + revision(`str`, *optional*): + revision of the model file to be downloaded. + Can be any of a branch, tag or commit hash, default to `master` + cache_dir (`str`, `Path`, *optional*): + Path to the folder where cached files are stored. + user_agent (`dict`, `str`, *optional*): + The user-agent info in the form of a dictionary or a string. + local_files_only (`bool`, *optional*, defaults to `False`): + If `True`, avoid downloading the file and return the path to the + local cached file if it exists. + if `False`, download the file anyway even it exists + + Returns: + Local path (string) of file or if networking is off, last version of + file cached on disk. + + + + Raises the following errors: + + - [`EnvironmentError`](https://docs.python.org/3/library/exceptions.html#EnvironmentError) + if `use_auth_token=True` and the token cannot be found. + - [`OSError`](https://docs.python.org/3/library/exceptions.html#OSError) + if ETag cannot be determined. + - [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError) + if some parameter value is invalid + + + """ + if cache_dir is None: + cache_dir = get_cache_dir() + if isinstance(cache_dir, Path): + cache_dir = str(cache_dir) + + group_or_owner, name = model_id_to_group_owner_name(model_id) + + cache = ModelFileSystemCache(cache_dir, group_or_owner, name) + + # if local_files_only is `True` and the file already exists in cached_path + # return the cached path + if local_files_only: + cached_file_path = cache.get_file_by_path(file_path) + if cached_file_path is not None: + logger.warning( + "File exists in local cache, but we're not sure it's up to date" + ) + return cached_file_path + else: + raise ValueError( + 'Cannot find the requested files in the cached path and outgoing' + ' traffic has been disabled. To enable model look-ups and downloads' + " online, set 'local_files_only' to False.") + + _api = HubApi() + headers = {'user-agent': http_user_agent(user_agent=user_agent, )} + branches, tags = _api.get_model_branches_and_tags(model_id) + file_to_download_info = None + is_commit_id = False + if revision in branches or revision in tags: # The revision is version or tag, + # we need to confirm the version is up to date + # we need to get the file list to check if the lateast version is cached, if so return, otherwise download + model_files = _api.get_model_files( + model_id=model_id, + revision=revision, + recursive=True, + ) + + for model_file in model_files: + if model_file['Type'] == 'tree': + continue + + if model_file['Path'] == file_path: + model_file['Branch'] = revision + if cache.exists(model_file): + return cache.get_file_by_info(model_file) + else: + file_to_download_info = model_file + + if file_to_download_info is None: + raise NotExistError('The file path: %s not exist in: %s' % + (file_path, model_id)) + else: # the revision is commit id. + cached_file_path = cache.get_file_by_path_and_commit_id( + file_path, revision) + if cached_file_path is not None: + logger.info('The specified file is in cache, skip downloading!') + return cached_file_path # the file is in cache. + is_commit_id = True + # we need to download again + # TODO: skip using JWT for authorization, use cookie instead + cookies = ModelScopeConfig.get_cookies() + url_to_download = get_file_download_url(model_id, file_path, revision) + file_to_download_info = { + 'Path': file_path, + 'Revision': + revision if is_commit_id else file_to_download_info['Revision'] + } + # Prevent parallel downloads of the same file with a lock. + lock_path = cache.get_root_location() + '.lock' + + with FileLock(lock_path): + temp_file_name = next(tempfile._get_candidate_names()) + http_get_file( + url_to_download, + cache_dir, + temp_file_name, + headers=headers, + cookies=None if cookies is None else cookies.get_dict()) + return cache.put_file(file_to_download_info, + os.path.join(cache_dir, temp_file_name)) + + +def http_user_agent(user_agent: Union[Dict, str, None] = None, ) -> str: + """Formats a user-agent string with basic info about a request. + + Args: + user_agent (`str`, `dict`, *optional*): + The user agent info in the form of a dictionary or a single string. + + Returns: + The formatted user-agent string. + """ + ua = f'modelscope/{__version__}; python/{sys.version.split()[0]}; session_id/{SESSION_ID}' + + if isinstance(user_agent, dict): + ua = '; '.join(f'{k}/{v}' for k, v in user_agent.items()) + elif isinstance(user_agent, str): + ua = user_agent + return ua + + +def get_file_download_url(model_id: str, file_path: str, revision: str): + """ + Format file download url according to `model_id`, `revision` and `file_path`. + e.g., Given `model_id=john/bert`, `revision=master`, `file_path=README.md`, + the resulted download url is: https://maas.co/api/v1/models/john/bert/repo?Revision=master&FilePath=README.md + """ + download_url_template = '{endpoint}/api/v1/models/{model_id}/repo?Revision={revision}&FilePath={file_path}' + return download_url_template.format( + endpoint=get_endpoint(), + model_id=model_id, + revision=revision, + file_path=file_path, + ) + + +def http_get_file( + url: str, + local_dir: str, + file_name: str, + cookies: Dict[str, str], + headers: Optional[Dict[str, str]] = None, +): + """ + Download remote file. Do not gobble up errors. + This method is only used by snapshot_download, since the behavior is quite different with single file download + TODO: consolidate with http_get_file() to avoild duplicate code + + Args: + url(`str`): + actual download url of the file + local_dir(`str`): + local directory where the downloaded file stores + file_name(`str`): + name of the file stored in `local_dir` + cookies(`Dict[str, str]`): + cookies used to authentication the user, which is used for downloading private repos + headers(`Optional[Dict[str, str]] = None`): + http headers to carry necessary info when requesting the remote file + + """ + temp_file_manager = partial( + tempfile.NamedTemporaryFile, mode='wb', dir=local_dir, delete=False) + + with temp_file_manager() as temp_file: + logger.info('downloading %s to %s', url, temp_file.name) + headers = copy.deepcopy(headers) + + r = requests.get(url, stream=True, headers=headers, cookies=cookies) + r.raise_for_status() + + content_length = r.headers.get('Content-Length') + total = int(content_length) if content_length is not None else None + + progress = tqdm( + unit='B', + unit_scale=True, + unit_divisor=1024, + total=total, + initial=0, + desc='Downloading', + ) + for chunk in r.iter_content(chunk_size=1024): + if chunk: # filter out keep-alive new chunks + progress.update(len(chunk)) + temp_file.write(chunk) + progress.close() + + logger.info('storing %s in cache at %s', url, local_dir) + os.replace(temp_file.name, os.path.join(local_dir, file_name)) diff --git a/modelscope/hub/git.py b/modelscope/hub/git.py new file mode 100644 index 00000000..5f079105 --- /dev/null +++ b/modelscope/hub/git.py @@ -0,0 +1,82 @@ +from threading import local +from tkinter.messagebox import NO +from typing import Union + +from modelscope.utils.logger import get_logger +from .constants import LOGGER_NAME +from .utils._subprocess import run_subprocess + +logger = get_logger + + +def git_clone( + local_dir: str, + repo_url: str, +): + # TODO: use "git clone" or "git lfs clone" according to git version + # TODO: print stderr when subprocess fails + run_subprocess( + f'git clone {repo_url}'.split(), + local_dir, + True, + ) + + +def git_checkout( + local_dir: str, + revsion: str, +): + run_subprocess(f'git checkout {revsion}'.split(), local_dir) + + +def git_add(local_dir: str, ): + run_subprocess( + 'git add .'.split(), + local_dir, + True, + ) + + +def git_commit(local_dir: str, commit_message: str): + run_subprocess( + 'git commit -v -m'.split() + [commit_message], + local_dir, + True, + ) + + +def git_push(local_dir: str, branch: str): + # check current branch + cur_branch = git_current_branch(local_dir) + if cur_branch != branch: + logger.error( + "You're trying to push to a different branch, please double check") + return + + run_subprocess( + f'git push origin {branch}'.split(), + local_dir, + True, + ) + + +def git_current_branch(local_dir: str) -> Union[str, None]: + """ + Get current branch name + + Args: + local_dir(`str`): local model repo directory + + Returns + branch name you're currently on + """ + try: + process = run_subprocess( + 'git rev-parse --abbrev-ref HEAD'.split(), + local_dir, + True, + ) + + return str(process.stdout).strip() + except Exception as e: + raise e diff --git a/modelscope/hub/repository.py b/modelscope/hub/repository.py new file mode 100644 index 00000000..6367f903 --- /dev/null +++ b/modelscope/hub/repository.py @@ -0,0 +1,173 @@ +import os +import subprocess +from pathlib import Path +from typing import Optional, Union + +from modelscope.utils.logger import get_logger +from .api import ModelScopeConfig +from .constants import MODELSCOPE_URL_SCHEME +from .git import git_add, git_checkout, git_clone, git_commit, git_push +from .utils._subprocess import run_subprocess +from .utils.utils import get_gitlab_domain + +logger = get_logger() + + +class Repository: + + def __init__( + self, + local_dir: str, + clone_from: Optional[str] = None, + auth_token: Optional[str] = None, + private: Optional[bool] = False, + revision: Optional[str] = 'master', + ): + """ + Instantiate a Repository object by cloning the remote ModelScopeHub repo + Args: + local_dir(`str`): + local directory to store the model files + clone_from(`Optional[str] = None`): + model id in ModelScope-hub from which git clone + You should ignore this parameter when `local_dir` is already a git repo + auth_token(`Optional[str]`): + token obtained when calling `HubApi.login()`. Usually you can safely ignore the parameter + as the token is already saved when you login the first time + private(`Optional[bool]`): + whether the model is private, default to False + revision(`Optional[str]`): + revision of the model you want to clone from. Can be any of a branch, tag or commit hash + """ + logger.info('Instantiating Repository object...') + + # Create local directory if not exist + os.makedirs(local_dir, exist_ok=True) + self.local_dir = os.path.join(os.getcwd(), local_dir) + + self.private = private + + # Check git and git-lfs installation + self.check_git_versions() + + # Retrieve auth token + if not private and isinstance(auth_token, str): + logger.warning( + 'cloning a public repo with a token, which will be ignored') + self.token = None + else: + if isinstance(auth_token, str): + self.token = auth_token + else: + self.token = ModelScopeConfig.get_token() + + if self.token is None: + raise EnvironmentError( + 'Token does not exist, the clone will fail for private repo.' + 'Please login first.') + + # git clone + if clone_from is not None: + self.model_id = clone_from + logger.info('cloning model repo to %s ...', self.local_dir) + git_clone(self.local_dir, self.get_repo_url()) + else: + if is_git_repo(self.local_dir): + logger.debug('[Repository] is a valid git repo') + else: + raise ValueError( + 'If not specifying `clone_from`, you need to pass Repository a' + ' valid git clone.') + + # git checkout + if isinstance(revision, str) and revision != 'master': + git_checkout(revision) + + def push_to_hub(self, + commit_message: str, + revision: Optional[str] = 'master'): + """ + Push changes changes to hub + + Args: + commit_message(`str`): + commit message describing the changes, it's mandatory + revision(`Optional[str]`): + remote branch you want to push to, default to `master` + + + The function complains when local and remote branch are different, please be careful + + + """ + git_add(self.local_dir) + git_commit(self.local_dir, commit_message) + + logger.info('Pushing changes to repo...') + git_push(self.local_dir, revision) + + # TODO: if git push fails, how to retry? + + def check_git_versions(self): + """ + Checks that `git` and `git-lfs` can be run. + + Raises: + `EnvironmentError`: if `git` or `git-lfs` are not installed. + """ + try: + git_version = run_subprocess('git --version'.split(), + self.local_dir).stdout.strip() + except FileNotFoundError: + raise EnvironmentError( + 'Looks like you do not have git installed, please install.') + + try: + lfs_version = run_subprocess('git-lfs --version'.split(), + self.local_dir).stdout.strip() + except FileNotFoundError: + raise EnvironmentError( + 'Looks like you do not have git-lfs installed, please install.' + ' You can install from https://git-lfs.github.com/.' + ' Then run `git lfs install` (you only have to do this once).') + logger.info(git_version + '\n' + lfs_version) + + def get_repo_url(self) -> str: + """ + Get repo url to clone, according whether the repo is private or not + """ + url = None + + if self.private: + url = f'{MODELSCOPE_URL_SCHEME}oauth2:{self.token}@{get_gitlab_domain()}/{self.model_id}' + else: + url = f'{MODELSCOPE_URL_SCHEME}{get_gitlab_domain()}/{self.model_id}' + + if not url: + raise ValueError( + 'Empty repo url, please check clone_from parameter') + + logger.debug('url to clone: %s', str(url)) + + return url + + +def is_git_repo(folder: Union[str, Path]) -> bool: + """ + Check if the folder is the root or part of a git repository + + Args: + folder (`str`): + The folder in which to run the command. + + Returns: + `bool`: `True` if the repository is part of a repository, `False` + otherwise. + """ + folder_exists = os.path.exists(os.path.join(folder, '.git')) + git_branch = subprocess.run( + 'git branch'.split(), + cwd=folder, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE) + return folder_exists and git_branch.returncode == 0 diff --git a/modelscope/hub/snapshot_download.py b/modelscope/hub/snapshot_download.py new file mode 100644 index 00000000..90d850f4 --- /dev/null +++ b/modelscope/hub/snapshot_download.py @@ -0,0 +1,125 @@ +import os +import tempfile +from glob import glob +from pathlib import Path +from typing import Dict, Optional, Union + +from modelscope.utils.logger import get_logger +from .api import HubApi, ModelScopeConfig +from .constants import DEFAULT_MODELSCOPE_GROUP, MODEL_ID_SEPARATOR +from .errors import NotExistError, RequestError, raise_on_error +from .file_download import (get_file_download_url, http_get_file, + http_user_agent) +from .utils.caching import ModelFileSystemCache +from .utils.utils import get_cache_dir, model_id_to_group_owner_name + +logger = get_logger() + + +def snapshot_download(model_id: str, + revision: Optional[str] = 'master', + cache_dir: Union[str, Path, None] = None, + user_agent: Optional[Union[Dict, str]] = None, + local_files_only: Optional[bool] = False, + private: Optional[bool] = False) -> str: + """Download all files of a repo. + Downloads a whole snapshot of a repo's files at the specified revision. This + is useful when you want all files from a repo, because you don't know which + ones you will need a priori. All files are nested inside a folder in order + to keep their actual filename relative to that folder. + + An alternative would be to just clone a repo but this would require that the + user always has git and git-lfs installed, and properly configured. + Args: + model_id (`str`): + A user or an organization name and a repo name separated by a `/`. + revision (`str`, *optional*): + An optional Git revision id which can be a branch name, a tag, or a + commit hash. NOTE: currently only branch and tag name is supported + cache_dir (`str`, `Path`, *optional*): + Path to the folder where cached files are stored. + user_agent (`str`, `dict`, *optional*): + The user-agent info in the form of a dictionary or a string. + local_files_only (`bool`, *optional*, defaults to `False`): + If `True`, avoid downloading the file and return the path to the + local cached file if it exists. + Returns: + Local folder path (string) of repo snapshot + + + Raises the following errors: + - [`EnvironmentError`](https://docs.python.org/3/library/exceptions.html#EnvironmentError) + if `use_auth_token=True` and the token cannot be found. + - [`OSError`](https://docs.python.org/3/library/exceptions.html#OSError) if + ETag cannot be determined. + - [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError) + if some parameter value is invalid + + """ + + if cache_dir is None: + cache_dir = get_cache_dir() + if isinstance(cache_dir, Path): + cache_dir = str(cache_dir) + + group_or_owner, name = model_id_to_group_owner_name(model_id) + + cache = ModelFileSystemCache(cache_dir, group_or_owner, name) + if local_files_only: + if len(cache.cached_files) == 0: + raise ValueError( + 'Cannot find the requested files in the cached path and outgoing' + ' traffic has been disabled. To enable model look-ups and downloads' + " online, set 'local_files_only' to False.") + logger.warn('We can not confirm the cached file is for revision: %s' + % revision) + return cache.get_root_location( + ) # we can not confirm the cached file is for snapshot 'revision' + else: + # make headers + headers = {'user-agent': http_user_agent(user_agent=user_agent, )} + _api = HubApi() + # get file list from model repo + branches, tags = _api.get_model_branches_and_tags(model_id) + if revision not in branches and revision not in tags: + raise NotExistError('The specified branch or tag : %s not exist!' + % revision) + + model_files = _api.get_model_files( + model_id=model_id, + revision=revision, + recursive=True, + use_cookies=private) + + cookies = None + if private: + cookies = ModelScopeConfig.get_cookies() + + for model_file in model_files: + if model_file['Type'] == 'tree': + continue + # check model_file is exist in cache, if exist, skip download, otherwise download + if cache.exists(model_file): + logger.info( + 'The specified file is in cache, skip downloading!') + continue + + # get download url + url = get_file_download_url( + model_id=model_id, + file_path=model_file['Path'], + revision=revision) + + # First download to /tmp + http_get_file( + url=url, + local_dir=tempfile.gettempdir(), + file_name=model_file['Name'], + headers=headers, + cookies=None if cookies is None else cookies.get_dict()) + # put file to cache + cache.put_file( + model_file, + os.path.join(tempfile.gettempdir(), model_file['Name'])) + + return os.path.join(cache.get_root_location()) diff --git a/tests/pipelines/nlp/__init__.py b/modelscope/hub/utils/__init__.py similarity index 100% rename from tests/pipelines/nlp/__init__.py rename to modelscope/hub/utils/__init__.py diff --git a/modelscope/hub/utils/_subprocess.py b/modelscope/hub/utils/_subprocess.py new file mode 100644 index 00000000..77e9fc48 --- /dev/null +++ b/modelscope/hub/utils/_subprocess.py @@ -0,0 +1,40 @@ +import subprocess +from typing import List + + +def run_subprocess(command: List[str], + folder: str, + check=True, + **kwargs) -> subprocess.CompletedProcess: + """ + Method to run subprocesses. Calling this will capture the `stderr` and `stdout`, + please call `subprocess.run` manually in case you would like for them not to + be captured. + + Args: + command (`List[str]`): + The command to execute as a list of strings. + folder (`str`): + The folder in which to run the command. + check (`bool`, *optional*, defaults to `True`): + Setting `check` to `True` will raise a `subprocess.CalledProcessError` + when the subprocess has a non-zero exit code. + kwargs (`Dict[str]`): + Keyword arguments to be passed to the `subprocess.run` underlying command. + + Returns: + `subprocess.CompletedProcess`: The completed process. + """ + if isinstance(command, str): + raise ValueError( + '`run_subprocess` should be called with a list of strings.') + + return subprocess.run( + command, + stderr=subprocess.PIPE, + stdout=subprocess.PIPE, + check=check, + encoding='utf-8', + cwd=folder, + **kwargs, + ) diff --git a/modelscope/hub/utils/caching.py b/modelscope/hub/utils/caching.py new file mode 100644 index 00000000..ac258385 --- /dev/null +++ b/modelscope/hub/utils/caching.py @@ -0,0 +1,294 @@ +import hashlib +import logging +import os +import pickle +import tempfile +import time +from shutil import move, rmtree + +from modelscope.utils.logger import get_logger + +logger = get_logger() + + +class FileSystemCache(object): + KEY_FILE_NAME = '.msc' + """Local file cache. + """ + + def __init__( + self, + cache_root_location: str, + **kwargs, + ): + """ + Parameters + ---------- + cache_location: str + The root location to store files. + """ + os.makedirs(cache_root_location, exist_ok=True) + self.cache_root_location = cache_root_location + self.load_cache() + + def get_root_location(self): + return self.cache_root_location + + def load_cache(self): + """Read set of stored blocks from file + Args: + owner(`str`): individual or group username at modelscope, can be empty for official models + name(`str`): name of the model + Returns: + The model details information. + Raises: + NotExistError: If the model is not exist, will throw NotExistError + TODO: Error based error code. + + model_id = {owner}/{name} + + """ + self.cached_files = [] + cache_keys_file_path = os.path.join(self.cache_root_location, + FileSystemCache.KEY_FILE_NAME) + if os.path.exists(cache_keys_file_path): + with open(cache_keys_file_path, 'rb') as f: + self.cached_files = pickle.load(f) + + def save_cached_files(self): + """Save cache metadata.""" + # save new meta to tmp and move to KEY_FILE_NAME + cache_keys_file_path = os.path.join(self.cache_root_location, + FileSystemCache.KEY_FILE_NAME) + # TODO: Sync file write + fd, fn = tempfile.mkstemp() + with open(fd, 'wb') as f: + pickle.dump(self.cached_files, f) + move(fn, cache_keys_file_path) + + def get_file(self, key): + """Check the key is in the cache, if exist, return the file, otherwise return None. + Args: + key(`str`): The cache key. + Returns: + If file exist, return the cached file location, otherwise None. + Raises: + None + + model_id = {owner}/{name} + + """ + pass + + def put_file(self, key, location): + """Put file to the cache, + Args: + key(`str`): The cache key + location(`str`): Location of the file, we will move the file to cache. + Returns: + The cached file path of the file. + Raises: + None + + model_id = {owner}/{name} + + """ + pass + + def remove_key(self, key): + """Remove cache key in index, The file is removed manually + + Args: + key (dict): The cache key. + """ + self.cached_files.remove(key) + self.save_cached_files() + + def exists(self, key): + for cache_file in self.cached_files: + if cache_file == key: + return True + + return False + + def clear_cache(self): + """Remove all files and metadat from the cache + + In the case of multiple cache locations, this clears only the last one, + which is assumed to be the read/write one. + """ + rmtree(self.cache_root_location) + self.load_cache() + + def hash_name(self, key): + return hashlib.sha256(key.encode()).hexdigest() + + +class ModelFileSystemCache(FileSystemCache): + """Local cache file layout + cache_root/owner/model_name/|individual cached files + |.mk: file, The cache index file + Save only one version for each file. + """ + + def __init__(self, cache_root, owner, name): + """Put file to the cache + Args: + cache_root(`str`): The modelscope local cache root(default: ~/.modelscope/cache/models/) + owner(`str`): The model owner. + name('str'): The name of the model + branch('str'): The branch of model + tag('str'): The tag of model + Returns: + Raises: + None + + model_id = {owner}/{name} + + """ + super().__init__(os.path.join(cache_root, owner, name)) + + def get_file_by_path(self, file_path): + """Retrieve the cache if there is file match the path. + Args: + file_path (str): The file path in the model. + Returns: + path: the full path of the file. + """ + for cached_file in self.cached_files: + if file_path == cached_file['Path']: + cached_file_path = os.path.join(self.cache_root_location, + cached_file['Path']) + if os.path.exists(cached_file_path): + return cached_file_path + else: + self.remove_key(cached_file) + + return None + + def get_file_by_path_and_commit_id(self, file_path, commit_id): + """Retrieve the cache if there is file match the path. + Args: + file_path (str): The file path in the model. + commit_id (str): The commit id of the file + Returns: + path: the full path of the file. + """ + for cached_file in self.cached_files: + if file_path == cached_file['Path'] and \ + (cached_file['Revision'].startswith(commit_id) or commit_id.startswith(cached_file['Revision'])): + cached_file_path = os.path.join(self.cache_root_location, + cached_file['Path']) + if os.path.exists(cached_file_path): + return cached_file_path + else: + self.remove_key(cached_file) + + return None + + def get_file_by_info(self, model_file_info): + """Check if exist cache file. + + Args: + model_file_info (ModelFileInfo): The file information of the file. + + Returns: + _type_: _description_ + """ + cache_key = self.__get_cache_key(model_file_info) + for cached_file in self.cached_files: + if cached_file == cache_key: + orig_path = os.path.join(self.cache_root_location, + cached_file['Path']) + if os.path.exists(orig_path): + return orig_path + else: + self.remove_key(cached_file) + + return None + + def __get_cache_key(self, model_file_info): + cache_key = { + 'Path': model_file_info['Path'], + 'Revision': model_file_info['Revision'], # commit id + } + return cache_key + + def exists(self, model_file_info): + """Check the file is cached or not. + + Args: + model_file_info (CachedFileInfo): The cached file info + + Returns: + bool: If exists return True otherwise False + """ + key = self.__get_cache_key(model_file_info) + is_exists = False + for cached_key in self.cached_files: + if cached_key['Path'] == key['Path'] and ( + cached_key['Revision'].startswith(key['Revision']) + or key['Revision'].startswith(cached_key['Revision'])): + is_exists = True + file_path = os.path.join(self.cache_root_location, + model_file_info['Path']) + if is_exists: + if os.path.exists(file_path): + return True + else: + self.remove_key( + model_file_info) # sameone may manual delete the file + return False + + def remove_if_exists(self, model_file_info): + """We in cache, remove it. + + Args: + model_file_info (ModelFileInfo): The model file information from server. + """ + for cached_file in self.cached_files: + if cached_file['Path'] == model_file_info['Path']: + self.remove_key(cached_file) + file_path = os.path.join(self.cache_root_location, + cached_file['Path']) + if os.path.exists(file_path): + os.remove(file_path) + + def put_file(self, model_file_info, model_file_location): + """Put model on model_file_location to cache, the model first download to /tmp, and move to cache. + + Args: + model_file_info (str): The file description returned by get_model_files + sample: + { + "CommitMessage": "add model\n", + "CommittedDate": 1654857567, + "CommitterName": "mulin.lyh", + "IsLFS": false, + "Mode": "100644", + "Name": "resnet18.pth", + "Path": "resnet18.pth", + "Revision": "09b68012b27de0048ba74003690a890af7aff192", + "Size": 46827520, + "Type": "blob" + } + model_file_location (str): The location of the temporary file. + Raises: + NotImplementedError: _description_ + + Returns: + str: The location of the cached file. + """ + self.remove_if_exists(model_file_info) # backup old revision + cache_key = self.__get_cache_key(model_file_info) + cache_full_path = os.path.join( + self.cache_root_location, + cache_key['Path']) # Branch and Tag do not have same name. + cache_file_dir = os.path.dirname(cache_full_path) + if not os.path.exists(cache_file_dir): + os.makedirs(cache_file_dir, exist_ok=True) + # We can't make operation transaction + move(model_file_location, cache_full_path) + self.cached_files.append(cache_key) + self.save_cached_files() + return cache_full_path diff --git a/modelscope/hub/utils/utils.py b/modelscope/hub/utils/utils.py new file mode 100644 index 00000000..d0704de8 --- /dev/null +++ b/modelscope/hub/utils/utils.py @@ -0,0 +1,39 @@ +import os + +from modelscope.hub.constants import (DEFAULT_MODELSCOPE_DOMAIN, + DEFAULT_MODELSCOPE_GITLAB_DOMAIN, + DEFAULT_MODELSCOPE_GROUP, + MODEL_ID_SEPARATOR, + MODELSCOPE_URL_SCHEME) + + +def model_id_to_group_owner_name(model_id): + if MODEL_ID_SEPARATOR in model_id: + group_or_owner = model_id.split(MODEL_ID_SEPARATOR)[0] + name = model_id.split(MODEL_ID_SEPARATOR)[1] + else: + group_or_owner = DEFAULT_MODELSCOPE_GROUP + name = model_id + return group_or_owner, name + + +def get_cache_dir(): + """ + cache dir precedence: + function parameter > enviroment > ~/.cache/modelscope/hub + """ + default_cache_dir = os.path.expanduser( + os.path.join('~/.cache', 'modelscope')) + return os.getenv('MODELSCOPE_CACHE', os.path.join(default_cache_dir, + 'hub')) + + +def get_endpoint(): + modelscope_domain = os.getenv('MODELSCOPE_DOMAIN', + DEFAULT_MODELSCOPE_DOMAIN) + return MODELSCOPE_URL_SCHEME + modelscope_domain + + +def get_gitlab_domain(): + return os.getenv('MODELSCOPE_GITLAB_DOMAIN', + DEFAULT_MODELSCOPE_GITLAB_DOMAIN) diff --git a/modelscope/metainfo.py b/modelscope/metainfo.py new file mode 100644 index 00000000..f89b7b27 --- /dev/null +++ b/modelscope/metainfo.py @@ -0,0 +1,104 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. + + +class Models(object): + """ Names for different models. + + Holds the standard model name to use for identifying different model. + This should be used to register models. + + Model name should only contain model info but not task info. + """ + # vision models + + # nlp models + bert = 'bert' + palm = 'palm-v2' + structbert = 'structbert' + veco = 'veco' + + # audio models + sambert_hifi_16k = 'sambert-hifi-16k' + generic_tts_frontend = 'generic-tts-frontend' + hifigan16k = 'hifigan16k' + + # multi-modal models + ofa = 'ofa' + + +class Pipelines(object): + """ Names for different pipelines. + + Holds the standard pipline name to use for identifying different pipeline. + This should be used to register pipelines. + + For pipeline which support different models and implements the common function, we + should use task name for this pipeline. + For pipeline which suuport only one model, we should use ${Model}-${Task} as its name. + """ + # vision tasks + image_matting = 'unet-image-matting' + person_image_cartoon = 'unet-person-image-cartoon' + ocr_detection = 'resnet18-ocr-detection' + action_recognition = 'TAdaConv_action-recognition' + + # nlp tasks + sentence_similarity = 'sentence-similarity' + word_segmentation = 'word-segmentation' + text_generation = 'text-generation' + sentiment_analysis = 'sentiment-analysis' + sentiment_classification = 'sentiment-classification' + fill_mask = 'fill-mask' + nli = 'nli' + dialog_intent_prediction = 'dialog-intent-prediction' + dialog_modeling = 'dialog-modeling' + dialog_state_tracking = 'dialog_state_tracking' + + # audio tasks + sambert_hifigan_16k_tts = 'sambert-hifigan-16k-tts' + speech_dfsmn_aec_psm_16k = 'speech-dfsmn-aec-psm-16k' + + # multi-modal tasks + image_caption = 'image-caption' + + +class Trainers(object): + """ Names for different trainer. + + Holds the standard trainer name to use for identifying different trainer. + This should be used to register trainers. + + For a general Trainer, you can use easynlp-trainer/ofa-trainer/sofa-trainer. + For a model specific Trainer, you can use ${ModelName}-${Task}-trainer. + """ + + default = 'Trainer' + + +class Preprocessors(object): + """ Names for different preprocessor. + + Holds the standard preprocessor name to use for identifying different preprocessor. + This should be used to register preprocessors. + + For a general preprocessor, just use the function name as preprocessor name such as + resize-image, random-crop + For a model-specific preprocessor, use ${modelname}-${fuction} + """ + + # cv preprocessor + load_image = 'load-image' + + # nlp preprocessor + bert_seq_cls_tokenizer = 'bert-seq-cls-tokenizer' + palm_text_gen_tokenizer = 'palm-text-gen-tokenizer' + token_cls_tokenizer = 'token-cls-tokenizer' + nli_tokenizer = 'nli-tokenizer' + sen_cls_tokenizer = 'sen-cls-tokenizer' + + # audio preprocessor + linear_aec_fbank = 'linear-aec-fbank' + text_to_tacotron_symbols = 'text-to-tacotron-symbols' + + # multi-modal + ofa_image_caption = 'ofa-image-caption' diff --git a/modelscope/models/__init__.py b/modelscope/models/__init__.py index 7d70e6ca..d3423a3f 100644 --- a/modelscope/models/__init__.py +++ b/modelscope/models/__init__.py @@ -1,7 +1,11 @@ # Copyright (c) Alibaba, Inc. and its affiliates. -from .audio.tts.am import SambertNetHifi16k -from .audio.tts.vocoder import Hifigan16k +# from .audio.tts.am import SambertNetHifi16k +# from .audio.tts.vocoder import Hifigan16k from .base import Model from .builder import MODELS, build_model -from .nlp import BertForSequenceClassification, SbertForSentenceSimilarity +# from .multi_model import OfaForImageCaptioning +from .nlp import (BertForSequenceClassification, SbertForNLI, + SbertForSentenceSimilarity, SbertForSentimentClassification, + SbertForTokenClassification, StructBertForMaskedLM, + VecoForMaskedLM) diff --git a/modelscope/models/audio/tts/am/sambert_hifi_16k.py b/modelscope/models/audio/tts/am/sambert_hifi_16k.py index 2db9abc6..fc6d519a 100644 --- a/modelscope/models/audio/tts/am/sambert_hifi_16k.py +++ b/modelscope/models/audio/tts/am/sambert_hifi_16k.py @@ -6,6 +6,7 @@ import numpy as np import tensorflow as tf from sklearn.preprocessing import MultiLabelBinarizer +from modelscope.metainfo import Models from modelscope.models.base import Model from modelscope.models.builder import MODELS from modelscope.utils.constant import ModelFile, Tasks @@ -17,7 +18,7 @@ __all__ = ['SambertNetHifi16k'] def multi_label_symbol_to_sequence(my_classes, my_symbol): - one_hot = MultiLabelBinarizer(my_classes) + one_hot = MultiLabelBinarizer(classes=my_classes) tokens = my_symbol.strip().split(' ') sequences = [] for token in tokens: @@ -26,7 +27,8 @@ def multi_label_symbol_to_sequence(my_classes, my_symbol): return one_hot.fit_transform(sequences) -@MODELS.register_module(Tasks.text_to_speech, module_name=r'sambert_hifi_16k') +@MODELS.register_module( + Tasks.text_to_speech, module_name=Models.sambert_hifi_16k) class SambertNetHifi16k(Model): def __init__(self, diff --git a/modelscope/models/audio/tts/frontend/generic_text_to_speech_frontend.py b/modelscope/models/audio/tts/frontend/generic_text_to_speech_frontend.py index ed34143f..757e4db9 100644 --- a/modelscope/models/audio/tts/frontend/generic_text_to_speech_frontend.py +++ b/modelscope/models/audio/tts/frontend/generic_text_to_speech_frontend.py @@ -2,8 +2,7 @@ import os import zipfile from typing import Any, Dict, List -import ttsfrd - +from modelscope.metainfo import Models from modelscope.models.base import Model from modelscope.models.builder import MODELS from modelscope.utils.audio.tts_exceptions import ( @@ -15,11 +14,12 @@ __all__ = ['GenericTtsFrontend'] @MODELS.register_module( - Tasks.text_to_speech, module_name=r'generic_tts_frontend') + Tasks.text_to_speech, module_name=Models.generic_tts_frontend) class GenericTtsFrontend(Model): def __init__(self, model_dir='.', lang_type='pinyin', *args, **kwargs): super().__init__(model_dir, *args, **kwargs) + import ttsfrd frontend = ttsfrd.TtsFrontendEngine() zip_file = os.path.join(model_dir, 'resource.zip') self._res_path = os.path.join(model_dir, 'resource') diff --git a/modelscope/models/audio/tts/vocoder/hifigan16k.py b/modelscope/models/audio/tts/vocoder/hifigan16k.py index 0d917dbe..b3fd9cf6 100644 --- a/modelscope/models/audio/tts/vocoder/hifigan16k.py +++ b/modelscope/models/audio/tts/vocoder/hifigan16k.py @@ -10,6 +10,7 @@ import numpy as np import torch from scipy.io.wavfile import write +from modelscope.metainfo import Models from modelscope.models.base import Model from modelscope.models.builder import MODELS from modelscope.utils.audio.tts_exceptions import \ @@ -36,7 +37,7 @@ class AttrDict(dict): self.__dict__ = self -@MODELS.register_module(Tasks.text_to_speech, module_name=r'hifigan16k') +@MODELS.register_module(Tasks.text_to_speech, module_name=Models.hifigan16k) class Hifigan16k(Model): def __init__(self, model_dir, *args, **kwargs): diff --git a/modelscope/models/audio/tts/vocoder/models/models.py b/modelscope/models/audio/tts/vocoder/models/models.py index 83fc7dc2..c46a9204 100755 --- a/modelscope/models/audio/tts/vocoder/models/models.py +++ b/modelscope/models/audio/tts/vocoder/models/models.py @@ -3,7 +3,6 @@ from distutils.version import LooseVersion import torch import torch.nn as nn import torch.nn.functional as F -from pytorch_wavelets import DWT1DForward from torch.nn import AvgPool1d, Conv1d, Conv2d, ConvTranspose1d from torch.nn.utils import remove_weight_norm, spectral_norm, weight_norm @@ -357,6 +356,7 @@ class MultiScaleDiscriminator(torch.nn.Module): DiscriminatorS(), DiscriminatorS(), ]) + from pytorch_wavelets import DWT1DForward self.meanpools = nn.ModuleList( [DWT1DForward(wave='db3', J=1), DWT1DForward(wave='db3', J=1)]) diff --git a/modelscope/models/base.py b/modelscope/models/base.py index ab0d22cc..cb6d2b0e 100644 --- a/modelscope/models/base.py +++ b/modelscope/models/base.py @@ -4,12 +4,13 @@ import os.path as osp from abc import ABC, abstractmethod from typing import Dict, Union -from maas_hub.snapshot_download import snapshot_download - +from modelscope.hub.snapshot_download import snapshot_download from modelscope.models.builder import build_model from modelscope.utils.config import Config from modelscope.utils.constant import ModelFile -from modelscope.utils.hub import get_model_cache_dir +from modelscope.utils.logger import get_logger + +logger = get_logger() Tensor = Union['torch.Tensor', 'tf.Tensor'] @@ -47,21 +48,25 @@ class Model(ABC): if osp.exists(model_name_or_path): local_model_dir = model_name_or_path else: - cache_path = get_model_cache_dir(model_name_or_path) - local_model_dir = cache_path if osp.exists( - cache_path) else snapshot_download(model_name_or_path) - # else: - # raise ValueError( - # 'Remote model repo {model_name_or_path} does not exists') - + local_model_dir = snapshot_download(model_name_or_path) + logger.info(f'initialize model from {local_model_dir}') cfg = Config.from_file( osp.join(local_model_dir, ModelFile.CONFIGURATION)) task_name = cfg.task model_cfg = cfg.model + assert hasattr( + cfg, 'pipeline'), 'pipeline config is missing from config file.' + pipeline_cfg = cfg.pipeline # TODO @wenmeng.zwm may should manually initialize model after model building if hasattr(model_cfg, 'model_type') and not hasattr(model_cfg, 'type'): model_cfg.type = model_cfg.model_type + model_cfg.model_dir = local_model_dir + for k, v in kwargs.items(): model_cfg.k = v - return build_model(model_cfg, task_name) + model = build_model(model_cfg, task_name) + + # dynamically add pipeline info to model for pipeline inference + model.pipeline = pipeline_cfg + return model diff --git a/modelscope/models/cv/action_recognition/__init__.py b/modelscope/models/cv/action_recognition/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/modelscope/models/cv/action_recognition/models.py b/modelscope/models/cv/action_recognition/models.py new file mode 100644 index 00000000..e85b6d81 --- /dev/null +++ b/modelscope/models/cv/action_recognition/models.py @@ -0,0 +1,91 @@ +import torch +import torch.nn as nn + +from .tada_convnext import TadaConvNeXt + + +class BaseVideoModel(nn.Module): + """ + Standard video model. + The model is divided into the backbone and the head, where the backbone + extracts features and the head performs classification. + + The backbones can be defined in model/base/backbone.py or anywhere else + as long as the backbone is registered by the BACKBONE_REGISTRY. + The heads can be defined in model/module_zoo/heads/ or anywhere else + as long as the head is registered by the HEAD_REGISTRY. + + The registries automatically finds the registered modules and construct + the base video model. + """ + + def __init__(self, cfg): + """ + Args: + cfg (Config): global config object. + """ + super(BaseVideoModel, self).__init__() + # the backbone is created according to meta-architectures + # defined in models/base/backbone.py + self.backbone = TadaConvNeXt(cfg) + + # the head is created according to the heads + # defined in models/module_zoo/heads + self.head = BaseHead(cfg) + + def forward(self, x): + x = self.backbone(x) + x = self.head(x) + return x + + +class BaseHead(nn.Module): + """ + Constructs base head. + """ + + def __init__( + self, + cfg, + ): + """ + Args: + cfg (Config): global config object. + """ + super(BaseHead, self).__init__() + self.cfg = cfg + dim = cfg.VIDEO.BACKBONE.NUM_OUT_FEATURES + num_classes = cfg.VIDEO.HEAD.NUM_CLASSES + dropout_rate = cfg.VIDEO.HEAD.DROPOUT_RATE + activation_func = cfg.VIDEO.HEAD.ACTIVATION + self._construct_head(dim, num_classes, dropout_rate, activation_func) + + def _construct_head(self, dim, num_classes, dropout_rate, activation_func): + self.global_avg_pool = nn.AdaptiveAvgPool3d(1) + + if dropout_rate > 0.0: + self.dropout = nn.Dropout(dropout_rate) + + self.out = nn.Linear(dim, num_classes, bias=True) + + if activation_func == 'softmax': + self.activation = nn.Softmax(dim=-1) + elif activation_func == 'sigmoid': + self.activation = nn.Sigmoid() + else: + raise NotImplementedError('{} is not supported as an activation' + 'function.'.format(activation_func)) + + def forward(self, x): + if len(x.shape) == 5: + x = self.global_avg_pool(x) + # (N, C, T, H, W) -> (N, T, H, W, C). + x = x.permute((0, 2, 3, 4, 1)) + if hasattr(self, 'dropout'): + out = self.dropout(x) + else: + out = x + out = self.out(out) + out = self.activation(out) + out = out.view(out.shape[0], -1) + return out, x.view(x.shape[0], -1) diff --git a/modelscope/models/cv/action_recognition/tada_convnext.py b/modelscope/models/cv/action_recognition/tada_convnext.py new file mode 100644 index 00000000..379b5271 --- /dev/null +++ b/modelscope/models/cv/action_recognition/tada_convnext.py @@ -0,0 +1,472 @@ +import math + +import torch +import torch.nn as nn +import torch.nn.functional as F +from torch.nn.modules.utils import _pair, _triple + + +def drop_path(x, drop_prob: float = 0., training: bool = False): + """ + From https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/layers/drop.py. + Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks). + This is the same as the DropConnect impl I created for EfficientNet, etc networks, however, + the original name is misleading as 'Drop Connect' is a different form of dropout in a separate paper... + See discussion: https://github.com/tensorflow/tpu/issues/494#issuecomment-532968956 ... I've opted for + changing the layer and argument names to 'drop path' rather than mix DropConnect as a layer name and use + 'survival rate' as the argument. + """ + if drop_prob == 0. or not training: + return x + keep_prob = 1 - drop_prob + shape = (x.shape[0], ) + (1, ) * ( + x.ndim - 1) # work with diff dim tensors, not just 2D ConvNets + random_tensor = keep_prob + torch.rand( + shape, dtype=x.dtype, device=x.device) + random_tensor.floor_() # binarize + output = x.div(keep_prob) * random_tensor + return output + + +class DropPath(nn.Module): + """ + From https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/layers/drop.py. + Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks). + """ + + def __init__(self, drop_prob=None): + super(DropPath, self).__init__() + self.drop_prob = drop_prob + + def forward(self, x): + return drop_path(x, self.drop_prob, self.training) + + +class TadaConvNeXt(nn.Module): + r""" ConvNeXt + A PyTorch impl of : `A ConvNet for the 2020s` - + https://arxiv.org/pdf/2201.03545.pdf + + Args: + in_chans (int): Number of input image channels. Default: 3 + num_classes (int): Number of classes for classification head. Default: 1000 + depths (tuple(int)): Number of blocks at each stage. Default: [3, 3, 9, 3] + dims (int): Feature dimension at each stage. Default: [96, 192, 384, 768] + drop_path_rate (float): Stochastic depth rate. Default: 0. + layer_scale_init_value (float): Init value for Layer Scale. Default: 1e-6. + head_init_scale (float): Init scaling value for classifier weights and biases. Default: 1. + """ + + def __init__( + self, cfg + # in_chans=3, num_classes=1000, + # depths=[3, 3, 9, 3], dims=[96, 192, 384, 768], drop_path_rate=0., + # layer_scale_init_value=1e-6, head_init_scale=1., + ): + super().__init__() + in_chans = cfg.VIDEO.BACKBONE.NUM_INPUT_CHANNELS + dims = cfg.VIDEO.BACKBONE.NUM_FILTERS + drop_path_rate = cfg.VIDEO.BACKBONE.DROP_PATH + depths = cfg.VIDEO.BACKBONE.DEPTH + layer_scale_init_value = cfg.VIDEO.BACKBONE.LARGE_SCALE_INIT_VALUE + stem_t_kernel_size = cfg.VIDEO.BACKBONE.STEM.T_KERNEL_SIZE if hasattr( + cfg.VIDEO.BACKBONE.STEM, 'T_KERNEL_SIZE') else 2 + t_stride = cfg.VIDEO.BACKBONE.STEM.T_STRIDE if hasattr( + cfg.VIDEO.BACKBONE.STEM, 'T_STRIDE') else 2 + + self.downsample_layers = nn.ModuleList( + ) # stem and 3 intermediate downsampling conv layers + stem = nn.Sequential( + nn.Conv3d( + in_chans, + dims[0], + kernel_size=(stem_t_kernel_size, 4, 4), + stride=(t_stride, 4, 4), + padding=((stem_t_kernel_size - 1) // 2, 0, 0)), + LayerNorm(dims[0], eps=1e-6, data_format='channels_first')) + self.downsample_layers.append(stem) + for i in range(3): + downsample_layer = nn.Sequential( + LayerNorm(dims[i], eps=1e-6, data_format='channels_first'), + nn.Conv3d( + dims[i], + dims[i + 1], + kernel_size=(1, 2, 2), + stride=(1, 2, 2)), + ) + self.downsample_layers.append(downsample_layer) + + self.stages = nn.ModuleList( + ) # 4 feature resolution stages, each consisting of multiple residual blocks + dp_rates = [ + x.item() for x in torch.linspace(0, drop_path_rate, sum(depths)) + ] + cur = 0 + for i in range(4): + stage = nn.Sequential(*[ + TAdaConvNeXtBlock( + cfg, + dim=dims[i], + drop_path=dp_rates[cur + j], + layer_scale_init_value=layer_scale_init_value) + for j in range(depths[i]) + ]) + self.stages.append(stage) + cur += depths[i] + + self.norm = nn.LayerNorm(dims[-1], eps=1e-6) # final norm layer + + def forward_features(self, x): + for i in range(4): + x = self.downsample_layers[i](x) + x = self.stages[i](x) + return self.norm(x.mean( + [-3, -2, -1])) # global average pooling, (N, C, H, W) -> (N, C) + + def forward(self, x): + if isinstance(x, dict): + x = x['video'] + x = self.forward_features(x) + return x + + def get_num_layers(self): + return 12, 0 + + +class ConvNeXtBlock(nn.Module): + r""" ConvNeXt Block. There are two equivalent implementations: + (1) DwConv -> LayerNorm (channels_first) -> 1x1 Conv -> GELU -> 1x1 Conv; all in (N, C, H, W) + (2) DwConv -> Permute to (N, H, W, C); LayerNorm (channels_last) -> Linear -> GELU -> Linear; Permute back + We use (2) as we find it slightly faster in PyTorch + + Args: + dim (int): Number of input channels. + drop_path (float): Stochastic depth rate. Default: 0.0 + layer_scale_init_value (float): Init value for Layer Scale. Default: 1e-6. + """ + + def __init__(self, cfg, dim, drop_path=0., layer_scale_init_value=1e-6): + super().__init__() + self.dwconv = nn.Conv3d( + dim, dim, kernel_size=(1, 7, 7), padding=(0, 3, 3), + groups=dim) # depthwise conv + self.norm = LayerNorm(dim, eps=1e-6) + self.pwconv1 = nn.Linear( + dim, + 4 * dim) # pointwise/1x1 convs, implemented with linear layers + self.act = nn.GELU() + self.pwconv2 = nn.Linear(4 * dim, dim) + self.gamma = nn.Parameter( + layer_scale_init_value * torch.ones((dim)), + requires_grad=True) if layer_scale_init_value > 0 else None + self.drop_path = DropPath( + drop_path) if drop_path > 0. else nn.Identity() + + def forward(self, x): + input = x + x = self.dwconv(x) + x = x.permute(0, 2, 3, 4, 1) # (N, C, T, H, W) -> (N, T, H, W, C) + x = self.norm(x) + x = self.pwconv1(x) + x = self.act(x) + x = self.pwconv2(x) + if self.gamma is not None: + x = self.gamma * x + x = x.permute(0, 4, 1, 2, 3) # (N, T, H, W, C) -> (N, C, T, H, W) + + x = input + self.drop_path(x) + return x + + +class LayerNorm(nn.Module): + r""" LayerNorm that supports two data formats: channels_last (default) or channels_first. + The ordering of the dimensions in the inputs. channels_last corresponds to inputs with + shape (batch_size, height, width, channels) while channels_first corresponds to inputs + with shape (batch_size, channels, height, width). + """ + + def __init__(self, + normalized_shape, + eps=1e-6, + data_format='channels_last'): + super().__init__() + self.weight = nn.Parameter(torch.ones(normalized_shape)) + self.bias = nn.Parameter(torch.zeros(normalized_shape)) + self.eps = eps + self.data_format = data_format + if self.data_format not in ['channels_last', 'channels_first']: + raise NotImplementedError + self.normalized_shape = (normalized_shape, ) + + def forward(self, x): + if self.data_format == 'channels_last': + return F.layer_norm(x, self.normalized_shape, self.weight, + self.bias, self.eps) + elif self.data_format == 'channels_first': + u = x.mean(1, keepdim=True) + s = (x - u).pow(2).mean(1, keepdim=True) + x = (x - u) / torch.sqrt(s + self.eps) + x = self.weight[:, None, None, None] * x + self.bias[:, None, None, + None] + return x + + +class TAdaConvNeXtBlock(nn.Module): + r""" ConvNeXt Block. There are two equivalent implementations: + (1) DwConv -> LayerNorm (channels_fi rst) -> 1x1 Conv -> GELU -> 1x1 Conv; all in (N, C, H, W) + (2) DwConv -> Permute to (N, H, W, C); LayerNorm (channels_last) -> Linear -> GELU -> Linear; Permute back + We use (2) as we find it slightly faster in PyTorch + + Args: + dim (int): Number of input channels. + drop_path (float): Stochastic depth rate. Default: 0.0 + layer_scale_init_value (float): Init value for Layer Scale. Default: 1e-6. + """ + + def __init__(self, cfg, dim, drop_path=0., layer_scale_init_value=1e-6): + super().__init__() + layer_scale_init_value = float(layer_scale_init_value) + self.dwconv = TAdaConv2d( + dim, + dim, + kernel_size=(1, 7, 7), + padding=(0, 3, 3), + groups=dim, + cal_dim='cout') + route_func_type = cfg.VIDEO.BACKBONE.BRANCH.ROUTE_FUNC_TYPE + if route_func_type == 'normal': + self.dwconv_rf = RouteFuncMLP( + c_in=dim, + ratio=cfg.VIDEO.BACKBONE.BRANCH.ROUTE_FUNC_R, + kernels=cfg.VIDEO.BACKBONE.BRANCH.ROUTE_FUNC_K, + with_bias_cal=self.dwconv.bias is not None) + elif route_func_type == 'normal_lngelu': + self.dwconv_rf = RouteFuncMLPLnGelu( + c_in=dim, + ratio=cfg.VIDEO.BACKBONE.BRANCH.ROUTE_FUNC_R, + kernels=cfg.VIDEO.BACKBONE.BRANCH.ROUTE_FUNC_K, + with_bias_cal=self.dwconv.bias is not None) + else: + raise ValueError( + 'Unknown route_func_type: {}'.format(route_func_type)) + self.norm = LayerNorm(dim, eps=1e-6) + self.pwconv1 = nn.Linear( + dim, + 4 * dim) # pointwise/1x1 convs, implemented with linear layers + self.act = nn.GELU() + self.pwconv2 = nn.Linear(4 * dim, dim) + self.gamma = nn.Parameter( + layer_scale_init_value * torch.ones((dim)), + requires_grad=True) if layer_scale_init_value > 0 else None + self.drop_path = DropPath( + drop_path) if drop_path > 0. else nn.Identity() + + def forward(self, x): + input = x + x = self.dwconv(x, self.dwconv_rf(x)) + x = x.permute(0, 2, 3, 4, 1) # (N, C, T, H, W) -> (N, T, H, W, C) + x = self.norm(x) + x = self.pwconv1(x) + x = self.act(x) + x = self.pwconv2(x) + if self.gamma is not None: + x = self.gamma * x + x = x.permute(0, 4, 1, 2, 3) # (N, T, H, W, C) -> (N, C, T, H, W) + + x = input + self.drop_path(x) + return x + + +class RouteFuncMLPLnGelu(nn.Module): + """ + The routing function for generating the calibration weights. + """ + + def __init__(self, + c_in, + ratio, + kernels, + with_bias_cal=False, + bn_eps=1e-5, + bn_mmt=0.1): + """ + Args: + c_in (int): number of input channels. + ratio (int): reduction ratio for the routing function. + kernels (list): temporal kernel size of the stacked 1D convolutions + """ + super(RouteFuncMLPLnGelu, self).__init__() + self.c_in = c_in + self.with_bias_cal = with_bias_cal + self.avgpool = nn.AdaptiveAvgPool3d((None, 1, 1)) + self.globalpool = nn.AdaptiveAvgPool3d(1) + self.g = nn.Conv3d( + in_channels=c_in, + out_channels=c_in, + kernel_size=1, + padding=0, + ) + self.a = nn.Conv3d( + in_channels=c_in, + out_channels=int(c_in // ratio), + kernel_size=[kernels[0], 1, 1], + padding=[kernels[0] // 2, 0, 0], + ) + # self.bn = nn.BatchNorm3d(int(c_in//ratio), eps=bn_eps, momentum=bn_mmt) + self.ln = LayerNorm( + int(c_in // ratio), eps=1e-6, data_format='channels_first') + self.gelu = nn.GELU() + # self.relu = nn.ReLU(inplace=True) + self.b = nn.Conv3d( + in_channels=int(c_in // ratio), + out_channels=c_in, + kernel_size=[kernels[1], 1, 1], + padding=[kernels[1] // 2, 0, 0], + bias=False) + self.b.skip_init = True + self.b.weight.data.zero_() # to make sure the initial values + # for the output is 1. + if with_bias_cal: + self.b_bias = nn.Conv3d( + in_channels=int(c_in // ratio), + out_channels=c_in, + kernel_size=[kernels[1], 1, 1], + padding=[kernels[1] // 2, 0, 0], + bias=False) + self.b_bias.skip_init = True + self.b_bias.weight.data.zero_() # to make sure the initial values + # for the output is 1. + + def forward(self, x): + g = self.globalpool(x) + x = self.avgpool(x) + x = self.a(x + self.g(g)) + # x = self.bn(x) + # x = self.relu(x) + x = self.ln(x) + x = self.gelu(x) + if self.with_bias_cal: + return [self.b(x) + 1, self.b_bias(x) + 1] + else: + return self.b(x) + 1 + + +class TAdaConv2d(nn.Module): + """ + Performs temporally adaptive 2D convolution. + Currently, only application on 5D tensors is supported, which makes TAdaConv2d + essentially a 3D convolution with temporal kernel size of 1. + """ + + def __init__(self, + in_channels, + out_channels, + kernel_size, + stride=1, + padding=0, + dilation=1, + groups=1, + bias=True, + cal_dim='cin'): + super(TAdaConv2d, self).__init__() + """ + Args: + in_channels (int): number of input channels. + out_channels (int): number of output channels. + kernel_size (list): kernel size of TAdaConv2d. + stride (list): stride for the convolution in TAdaConv2d. + padding (list): padding for the convolution in TAdaConv2d. + dilation (list): dilation of the convolution in TAdaConv2d. + groups (int): number of groups for TAdaConv2d. + bias (bool): whether to use bias in TAdaConv2d. + calibration_mode (str): calibrated dimension in TAdaConv2d. + Supported input "cin", "cout". + """ + + kernel_size = _triple(kernel_size) + stride = _triple(stride) + padding = _triple(padding) + dilation = _triple(dilation) + + assert kernel_size[0] == 1 + assert stride[0] == 1 + assert padding[0] == 0 + assert dilation[0] == 1 + assert cal_dim in ['cin', 'cout'] + + self.in_channels = in_channels + self.out_channels = out_channels + self.kernel_size = kernel_size + self.stride = stride + self.padding = padding + self.dilation = dilation + self.groups = groups + self.cal_dim = cal_dim + + # base weights (W_b) + self.weight = nn.Parameter( + torch.Tensor(1, 1, out_channels, in_channels // groups, + kernel_size[1], kernel_size[2])) + if bias: + self.bias = nn.Parameter(torch.Tensor(1, 1, out_channels)) + else: + self.register_parameter('bias', None) + + nn.init.kaiming_uniform_(self.weight, a=math.sqrt(5)) + if self.bias is not None: + fan_in, _ = nn.init._calculate_fan_in_and_fan_out(self.weight) + bound = 1 / math.sqrt(fan_in) + nn.init.uniform_(self.bias, -bound, bound) + + def forward(self, x, alpha): + """ + Args: + x (tensor): feature to perform convolution on. + alpha (tensor): calibration weight for the base weights. + W_t = alpha_t * W_b + """ + if isinstance(alpha, list): + w_alpha, b_alpha = alpha[0], alpha[1] + else: + w_alpha = alpha + b_alpha = None + _, _, c_out, c_in, kh, kw = self.weight.size() + b, c_in, t, h, w = x.size() + x = x.permute(0, 2, 1, 3, 4).reshape(1, -1, h, w) + + if self.cal_dim == 'cin': + # w_alpha: B, C, T, H(1), W(1) -> B, T, C, H(1), W(1) -> B, T, 1, C, H(1), W(1) + # corresponding to calibrating the input channel + weight = (w_alpha.permute(0, 2, 1, 3, 4).unsqueeze(2) + * self.weight).reshape(-1, c_in // self.groups, kh, kw) + elif self.cal_dim == 'cout': + # w_alpha: B, C, T, H(1), W(1) -> B, T, C, H(1), W(1) -> B, T, C, 1, H(1), W(1) + # corresponding to calibrating the input channel + weight = (w_alpha.permute(0, 2, 1, 3, 4).unsqueeze(3) + * self.weight).reshape(-1, c_in // self.groups, kh, kw) + + bias = None + if self.bias is not None: + if b_alpha is not None: + # b_alpha: B, C, T, H(1), W(1) -> B, T, C, H(1), W(1) -> B, T, C + bias = (b_alpha.permute(0, 2, 1, 3, 4).squeeze() + * self.bias).reshape(-1) + else: + bias = self.bias.repeat(b, t, 1).reshape(-1) + output = F.conv2d( + x, + weight=weight, + bias=bias, + stride=self.stride[1:], + padding=self.padding[1:], + dilation=self.dilation[1:], + groups=self.groups * b * t) + + output = output.view(b, t, c_out, output.size(-2), + output.size(-1)).permute(0, 2, 1, 3, 4) + + return output + + def __repr__(self): + return f'TAdaConv2d({self.in_channels}, {self.out_channels}, kernel_size={self.kernel_size}, ' +\ + f"stride={self.stride}, padding={self.padding}, bias={self.bias is not None}, cal_dim=\"{self.cal_dim}\")" diff --git a/modelscope/models/multi_model/__init__.py b/modelscope/models/multi_model/__init__.py new file mode 100644 index 00000000..02e8d6ab --- /dev/null +++ b/modelscope/models/multi_model/__init__.py @@ -0,0 +1 @@ +from .image_captioning_model import OfaForImageCaptioning diff --git a/modelscope/models/multi_model/image_captioning_model.py b/modelscope/models/multi_model/image_captioning_model.py new file mode 100644 index 00000000..79ab2b5f --- /dev/null +++ b/modelscope/models/multi_model/image_captioning_model.py @@ -0,0 +1,80 @@ +import os.path as osp +from typing import Any, Dict + +from PIL import Image + +from modelscope.metainfo import Models +from modelscope.utils.constant import ModelFile, Tasks +from ..base import Model +from ..builder import MODELS + +__all__ = ['OfaForImageCaptioning'] + + +@MODELS.register_module(Tasks.image_captioning, module_name=Models.ofa) +class OfaForImageCaptioning(Model): + + def __init__(self, model_dir, *args, **kwargs): + super().__init__(model_dir=model_dir, *args, **kwargs) + ckpt_name = ModelFile.TORCH_MODEL_FILE + local_model = osp.join(model_dir, ckpt_name) + bpe_dir = model_dir + # turn on cuda if GPU is available + from fairseq import checkpoint_utils, tasks, utils + from ofa.tasks.mm_tasks import CaptionTask + from ofa.utils.eval_utils import eval_caption + self.eval_caption = eval_caption + + tasks.register_task('caption', CaptionTask) + use_cuda = kwargs['use_cuda'] if 'use_cuda' in kwargs else False + use_fp16 = kwargs[ + 'use_fp16'] if 'use_fp16' in kwargs and use_cuda else False + overrides = { + 'bpe_dir': bpe_dir, + 'eval_cider': False, + 'beam': 5, + 'max_len_b': 16, + 'no_repeat_ngram_size': 3, + 'seed': 7 + } + models, cfg, task = checkpoint_utils.load_model_ensemble_and_task( + utils.split_paths(local_model), arg_overrides=overrides) + + # Move models to GPU + for model in models: + model.eval() + if use_cuda: + model.cuda() + if use_fp16: + model.half() + model.prepare_for_inference_(cfg) + self.models = models + # Initialize generator + self.generator = task.build_generator(models, cfg.generation) + + # Initialize transform + from torchvision import transforms + mean = [0.5, 0.5, 0.5] + std = [0.5, 0.5, 0.5] + + self.patch_resize_transform = transforms.Compose([ + lambda image: image.convert('RGB'), + transforms.Resize( + (cfg.task.patch_image_size, cfg.task.patch_image_size), + interpolation=Image.BICUBIC), + transforms.ToTensor(), + transforms.Normalize(mean=mean, std=std), + ]) + self.task = task + + def forward(self, input: Dict[str, Any]) -> Dict[str, Any]: + results, _ = self.eval_caption(self.task, self.generator, self.models, + input) + return { + 'image_id': results[0]['image_id'], + 'caption': results[0]['caption'] + } + + def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, Any]: + # What should we do here ? + return inputs diff --git a/modelscope/models/nlp/__init__.py b/modelscope/models/nlp/__init__.py index e62ab404..49cbd053 100644 --- a/modelscope/models/nlp/__init__.py +++ b/modelscope/models/nlp/__init__.py @@ -1,6 +1,9 @@ from .bert_for_sequence_classification import * # noqa F403 +from .masked_language_model import * # noqa F403 from .palm_for_text_generation import * # noqa F403 +from .sbert_for_nli import * # noqa F403 from .sbert_for_sentence_similarity import * # noqa F403 +from .sbert_for_sentiment_classification import * # noqa F403 from .sbert_for_token_classification import * # noqa F403 from .space.dialog_intent_prediction_model import * # noqa F403 from .space.dialog_modeling_model import * # noqa F403 diff --git a/modelscope/models/nlp/bert_for_sequence_classification.py b/modelscope/models/nlp/bert_for_sequence_classification.py index a3cc4b68..7d85fa28 100644 --- a/modelscope/models/nlp/bert_for_sequence_classification.py +++ b/modelscope/models/nlp/bert_for_sequence_classification.py @@ -4,6 +4,7 @@ from typing import Any, Dict import json import numpy as np +from modelscope.metainfo import Models from modelscope.utils.constant import Tasks from ..base import Model from ..builder import MODELS @@ -11,8 +12,7 @@ from ..builder import MODELS __all__ = ['BertForSequenceClassification'] -@MODELS.register_module( - Tasks.text_classification, module_name=r'bert-sentiment-analysis') +@MODELS.register_module(Tasks.text_classification, module_name=Models.bert) class BertForSequenceClassification(Model): def __init__(self, model_dir: str, *args, **kwargs): diff --git a/modelscope/models/nlp/masked_language_model.py b/modelscope/models/nlp/masked_language_model.py new file mode 100644 index 00000000..bb255c9c --- /dev/null +++ b/modelscope/models/nlp/masked_language_model.py @@ -0,0 +1,63 @@ +from typing import Any, Dict, Optional, Union + +import numpy as np + +from ...metainfo import Models +from ...utils.constant import Tasks +from ..base import Model, Tensor +from ..builder import MODELS + +__all__ = ['StructBertForMaskedLM', 'VecoForMaskedLM'] + + +class MaskedLanguageModelBase(Model): + + def __init__(self, model_dir: str, *args, **kwargs): + super().__init__(model_dir, *args, **kwargs) + self.model = self.build_model() + + def build_model(self): + raise NotImplementedError() + + def train(self): + return self.model.train() + + def eval(self): + return self.model.eval() + + @property + def config(self): + if hasattr(self.model, 'config'): + return self.model.config + return None + + def forward(self, input: Dict[str, Tensor]) -> Dict[str, np.ndarray]: + """return the result by the model + + Args: + input (Dict[str, Any]): the preprocessed data + + Returns: + Dict[str, np.ndarray]: results + """ + rst = self.model( + input_ids=input['input_ids'], + attention_mask=input['attention_mask'], + token_type_ids=input['token_type_ids']) + return {'logits': rst['logits'], 'input_ids': input['input_ids']} + + +@MODELS.register_module(Tasks.fill_mask, module_name=Models.structbert) +class StructBertForMaskedLM(MaskedLanguageModelBase): + + def build_model(self): + from sofa import SbertForMaskedLM + return SbertForMaskedLM.from_pretrained(self.model_dir) + + +@MODELS.register_module(Tasks.fill_mask, module_name=Models.veco) +class VecoForMaskedLM(MaskedLanguageModelBase): + + def build_model(self): + from sofa import VecoForMaskedLM + return VecoForMaskedLM.from_pretrained(self.model_dir) diff --git a/modelscope/models/nlp/palm_for_text_generation.py b/modelscope/models/nlp/palm_for_text_generation.py index e5799feb..f6c15387 100644 --- a/modelscope/models/nlp/palm_for_text_generation.py +++ b/modelscope/models/nlp/palm_for_text_generation.py @@ -1,13 +1,14 @@ from typing import Dict -from modelscope.utils.constant import Tasks +from ...metainfo import Models +from ...utils.constant import Tasks from ..base import Model, Tensor from ..builder import MODELS __all__ = ['PalmForTextGeneration'] -@MODELS.register_module(Tasks.text_generation, module_name=r'palm2.0') +@MODELS.register_module(Tasks.text_generation, module_name=Models.palm) class PalmForTextGeneration(Model): def __init__(self, model_dir: str, *args, **kwargs): @@ -19,13 +20,18 @@ class PalmForTextGeneration(Model): default loader to load model weights, by default None. """ super().__init__(model_dir, *args, **kwargs) - self.model_dir = model_dir from sofa.models.palm_v2 import PalmForConditionalGeneration, Translator model = PalmForConditionalGeneration.from_pretrained(model_dir) self.tokenizer = model.tokenizer self.generator = Translator(model) + def train(self): + return self.generator.train() + + def eval(self): + return self.generator.eval() + def forward(self, input: Dict[str, Tensor]) -> Dict[str, Tensor]: """return the result by the model diff --git a/modelscope/models/nlp/sbert_for_nli.py b/modelscope/models/nlp/sbert_for_nli.py new file mode 100644 index 00000000..a5a76b34 --- /dev/null +++ b/modelscope/models/nlp/sbert_for_nli.py @@ -0,0 +1,23 @@ +from ...metainfo import Models +from ...utils.constant import Tasks +from ..builder import MODELS +from .sbert_for_sequence_classification import \ + SbertForSequenceClassificationBase + +__all__ = ['SbertForNLI'] + + +@MODELS.register_module(Tasks.nli, module_name=Models.structbert) +class SbertForNLI(SbertForSequenceClassificationBase): + + def __init__(self, model_dir: str, *args, **kwargs): + """initialize the text generation model from the `model_dir` path. + + Args: + model_dir (str): the model path. + model_cls (Optional[Any], optional): model loader, if None, use the + default loader to load model weights, by default None. + """ + super().__init__( + model_dir, *args, model_args={'num_labels': 3}, **kwargs) + assert self.model.config.num_labels == 3 diff --git a/modelscope/models/nlp/sbert_for_sentence_similarity.py b/modelscope/models/nlp/sbert_for_sentence_similarity.py index 98daac92..25c38a2e 100644 --- a/modelscope/models/nlp/sbert_for_sentence_similarity.py +++ b/modelscope/models/nlp/sbert_for_sentence_similarity.py @@ -1,46 +1,15 @@ -import os -from typing import Any, Dict - -import json -import numpy as np -import torch -from sofa import SbertModel -from sofa.models.sbert.modeling_sbert import SbertPreTrainedModel -from torch import nn - +from modelscope.metainfo import Models from modelscope.utils.constant import Tasks -from ..base import Model, Tensor from ..builder import MODELS +from .sbert_for_sequence_classification import \ + SbertForSequenceClassificationBase __all__ = ['SbertForSentenceSimilarity'] -class SbertTextClassifier(SbertPreTrainedModel): - - def __init__(self, config): - super().__init__(config) - self.num_labels = config.num_labels - self.config = config - self.encoder = SbertModel(config, add_pooling_layer=True) - self.dropout = nn.Dropout(config.hidden_dropout_prob) - self.classifier = nn.Linear(config.hidden_size, config.num_labels) - - def forward(self, input_ids=None, token_type_ids=None): - outputs = self.encoder( - input_ids, - token_type_ids=token_type_ids, - return_dict=None, - ) - pooled_output = outputs[1] - pooled_output = self.dropout(pooled_output) - logits = self.classifier(pooled_output) - return logits - - @MODELS.register_module( - Tasks.sentence_similarity, - module_name=r'sbert-base-chinese-sentence-similarity') -class SbertForSentenceSimilarity(Model): + Tasks.sentence_similarity, module_name=Models.structbert) +class SbertForSentenceSimilarity(SbertForSequenceClassificationBase): def __init__(self, model_dir: str, *args, **kwargs): """initialize the sentence similarity model from the `model_dir` path. @@ -50,39 +19,7 @@ class SbertForSentenceSimilarity(Model): model_cls (Optional[Any], optional): model loader, if None, use the default loader to load model weights, by default None. """ - super().__init__(model_dir, *args, **kwargs) + super().__init__( + model_dir, *args, model_args={'num_labels': 2}, **kwargs) self.model_dir = model_dir - - self.model = SbertTextClassifier.from_pretrained( - model_dir, num_labels=2) - self.model.eval() - self.label_path = os.path.join(self.model_dir, 'label_mapping.json') - with open(self.label_path) as f: - self.label_mapping = json.load(f) - self.id2label = {idx: name for name, idx in self.label_mapping.items()} - - def forward(self, input: Dict[str, Any]) -> Dict[str, np.ndarray]: - """return the result by the model - - Args: - input (Dict[str, Any]): the preprocessed data - - Returns: - Dict[str, np.ndarray]: results - Example: - { - 'predictions': array([1]), # lable 0-negative 1-positive - 'probabilities': array([[0.11491239, 0.8850876 ]], dtype=float32), - 'logits': array([[-0.53860897, 1.5029076 ]], dtype=float32) # true value - } - """ - input_ids = torch.tensor(input['input_ids'], dtype=torch.long) - token_type_ids = torch.tensor( - input['token_type_ids'], dtype=torch.long) - with torch.no_grad(): - logits = self.model(input_ids, token_type_ids) - probs = logits.softmax(-1).numpy() - pred = logits.argmax(-1).numpy() - logits = logits.numpy() - res = {'predictions': pred, 'probabilities': probs, 'logits': logits} - return res + assert self.model.config.num_labels == 2 diff --git a/modelscope/models/nlp/sbert_for_sentiment_classification.py b/modelscope/models/nlp/sbert_for_sentiment_classification.py new file mode 100644 index 00000000..72fb92f0 --- /dev/null +++ b/modelscope/models/nlp/sbert_for_sentiment_classification.py @@ -0,0 +1,24 @@ +from modelscope.metainfo import Models +from modelscope.utils.constant import Tasks +from ..builder import MODELS +from .sbert_for_sequence_classification import \ + SbertForSequenceClassificationBase + +__all__ = ['SbertForSentimentClassification'] + + +@MODELS.register_module( + Tasks.sentiment_classification, module_name=Models.structbert) +class SbertForSentimentClassification(SbertForSequenceClassificationBase): + + def __init__(self, model_dir: str, *args, **kwargs): + """initialize the text generation model from the `model_dir` path. + + Args: + model_dir (str): the model path. + model_cls (Optional[Any], optional): model loader, if None, use the + default loader to load model weights, by default None. + """ + super().__init__( + model_dir, *args, model_args={'num_labels': 2}, **kwargs) + assert self.model.config.num_labels == 2 diff --git a/modelscope/models/nlp/sbert_for_sequence_classification.py b/modelscope/models/nlp/sbert_for_sequence_classification.py new file mode 100644 index 00000000..861b6fe2 --- /dev/null +++ b/modelscope/models/nlp/sbert_for_sequence_classification.py @@ -0,0 +1,71 @@ +import os +from typing import Any, Dict + +import json +import numpy as np +import torch +from sofa.models.sbert.modeling_sbert import SbertModel, SbertPreTrainedModel +from torch import nn + +from ..base import Model + + +class SbertTextClassfier(SbertPreTrainedModel): + + def __init__(self, config): + super().__init__(config) + self.num_labels = config.num_labels + self.config = config + self.encoder = SbertModel(config, add_pooling_layer=True) + self.dropout = nn.Dropout(config.hidden_dropout_prob) + self.classifier = nn.Linear(config.hidden_size, config.num_labels) + + def forward(self, input_ids=None, token_type_ids=None): + outputs = self.encoder( + input_ids, + token_type_ids=token_type_ids, + return_dict=None, + ) + pooled_output = outputs[1] + pooled_output = self.dropout(pooled_output) + logits = self.classifier(pooled_output) + return {'logits': logits} + + +class SbertForSequenceClassificationBase(Model): + + def __init__(self, model_dir: str, model_args=None, *args, **kwargs): + super().__init__(model_dir, *args, **kwargs) + if model_args is None: + model_args = {} + self.model = SbertTextClassfier.from_pretrained( + model_dir, **model_args) + self.id2label = {} + self.label_path = os.path.join(self.model_dir, 'label_mapping.json') + if os.path.exists(self.label_path): + with open(self.label_path) as f: + self.label_mapping = json.load(f) + self.id2label = { + idx: name + for name, idx in self.label_mapping.items() + } + + def train(self): + return self.model.train() + + def eval(self): + return self.model.eval() + + def forward(self, input: Dict[str, Any]) -> Dict[str, np.ndarray]: + input_ids = torch.tensor(input['input_ids'], dtype=torch.long) + token_type_ids = torch.tensor( + input['token_type_ids'], dtype=torch.long) + return self.model.forward(input_ids, token_type_ids) + + def postprocess(self, input, **kwargs): + logits = input['logits'] + probs = logits.softmax(-1).numpy() + pred = logits.argmax(-1).numpy() + logits = logits.numpy() + res = {'predictions': pred, 'probabilities': probs, 'logits': logits} + return res diff --git a/modelscope/models/nlp/sbert_for_token_classification.py b/modelscope/models/nlp/sbert_for_token_classification.py index b918dc37..fd175033 100644 --- a/modelscope/models/nlp/sbert_for_token_classification.py +++ b/modelscope/models/nlp/sbert_for_token_classification.py @@ -2,19 +2,17 @@ from typing import Any, Dict, Union import numpy as np import torch -from sofa import SbertConfig, SbertForTokenClassification +from modelscope.metainfo import Models from modelscope.utils.constant import Tasks from ..base import Model, Tensor from ..builder import MODELS -__all__ = ['StructBertForTokenClassification'] +__all__ = ['SbertForTokenClassification'] -@MODELS.register_module( - Tasks.word_segmentation, - module_name=r'structbert-chinese-word-segmentation') -class StructBertForTokenClassification(Model): +@MODELS.register_module(Tasks.word_segmentation, module_name=Models.structbert) +class SbertForTokenClassification(Model): def __init__(self, model_dir: str, *args, **kwargs): """initialize the word segmentation model from the `model_dir` path. @@ -26,9 +24,16 @@ class StructBertForTokenClassification(Model): """ super().__init__(model_dir, *args, **kwargs) self.model_dir = model_dir - self.model = SbertForTokenClassification.from_pretrained( + import sofa + self.model = sofa.SbertForTokenClassification.from_pretrained( self.model_dir) - self.config = SbertConfig.from_pretrained(self.model_dir) + self.config = sofa.SbertConfig.from_pretrained(self.model_dir) + + def train(self): + return self.model.train() + + def eval(self): + return self.model.eval() def forward(self, input: Dict[str, Any]) -> Dict[str, Union[str, np.ndarray]]: @@ -47,10 +52,12 @@ class StructBertForTokenClassification(Model): } """ input_ids = torch.tensor(input['input_ids']).unsqueeze(0) - output = self.model(input_ids) - logits = output.logits + return {**self.model(input_ids), 'text': input['text']} + + def postprocess(self, input: Dict[str, Tensor], + **kwargs) -> Dict[str, Tensor]: + logits = input['logits'] pred = torch.argmax(logits[0], dim=-1) pred = pred.numpy() - rst = {'predictions': pred, 'logits': logits, 'text': input['text']} return rst diff --git a/modelscope/models/nlp/space/dialog_intent_prediction_model.py b/modelscope/models/nlp/space/dialog_intent_prediction_model.py index 3ea500e5..a5d94376 100644 --- a/modelscope/models/nlp/space/dialog_intent_prediction_model.py +++ b/modelscope/models/nlp/space/dialog_intent_prediction_model.py @@ -1,11 +1,10 @@ import os from typing import Any, Dict -from modelscope.preprocessors.space.fields.intent_field import \ - IntentBPETextField -from modelscope.trainers.nlp.space.trainers.intent_trainer import IntentTrainer -from modelscope.utils.config import Config -from modelscope.utils.constant import Tasks +from ....preprocessors.space.fields.intent_field import IntentBPETextField +from ....trainers.nlp.space.trainers.intent_trainer import IntentTrainer +from ....utils.config import Config +from ....utils.constant import Tasks from ...base import Model, Tensor from ...builder import MODELS from .model.generator import Generator @@ -14,8 +13,7 @@ from .model.model_base import ModelBase __all__ = ['DialogIntentModel'] -@MODELS.register_module( - Tasks.dialog_intent_prediction, module_name=r'space-intent') +@MODELS.register_module(Tasks.dialog_intent_prediction, module_name=r'space') class DialogIntentModel(Model): def __init__(self, model_dir: str, *args, **kwargs): diff --git a/modelscope/models/nlp/space/dialog_modeling_model.py b/modelscope/models/nlp/space/dialog_modeling_model.py index bae8a822..4a34f132 100644 --- a/modelscope/models/nlp/space/dialog_modeling_model.py +++ b/modelscope/models/nlp/space/dialog_modeling_model.py @@ -1,11 +1,10 @@ import os from typing import Any, Dict, Optional -from modelscope.preprocessors.space.fields.gen_field import \ - MultiWOZBPETextField -from modelscope.trainers.nlp.space.trainers.gen_trainer import MultiWOZTrainer -from modelscope.utils.config import Config -from modelscope.utils.constant import Tasks +from ....preprocessors.space.fields.gen_field import MultiWOZBPETextField +from ....trainers.nlp.space.trainers.gen_trainer import MultiWOZTrainer +from ....utils.config import Config +from ....utils.constant import Tasks from ...base import Model, Tensor from ...builder import MODELS from .model.generator import Generator @@ -14,7 +13,7 @@ from .model.model_base import ModelBase __all__ = ['DialogModelingModel'] -@MODELS.register_module(Tasks.dialog_modeling, module_name=r'space-modeling') +@MODELS.register_module(Tasks.dialog_modeling, module_name=r'space') class DialogModelingModel(Model): def __init__(self, model_dir: str, *args, **kwargs): diff --git a/modelscope/models/nlp/space/dialog_state_tracking.py b/modelscope/models/nlp/space/dialog_state_tracking.py index 4b1c44d3..e94c59b0 100644 --- a/modelscope/models/nlp/space/dialog_state_tracking.py +++ b/modelscope/models/nlp/space/dialog_state_tracking.py @@ -11,7 +11,7 @@ from .model.model_base import ModelBase __all__ = ['DialogStateTrackingModel'] -@MODELS.register_module(Tasks.dialog_state_tracking, module_name=r'space-dst') +@MODELS.register_module(Tasks.dialog_state_tracking, module_name=r'space') class DialogStateTrackingModel(Model): def __init__(self, model_dir: str, *args, **kwargs): diff --git a/modelscope/models/nlp/space/model/gen_unified_transformer.py b/modelscope/models/nlp/space/model/gen_unified_transformer.py index c076cce4..0f1b1a83 100644 --- a/modelscope/models/nlp/space/model/gen_unified_transformer.py +++ b/modelscope/models/nlp/space/model/gen_unified_transformer.py @@ -3,8 +3,7 @@ IntentUnifiedTransformer """ import torch -from modelscope.models.nlp.space.model.unified_transformer import \ - UnifiedTransformer +from .unified_transformer import UnifiedTransformer class GenUnifiedTransformer(UnifiedTransformer): diff --git a/modelscope/models/nlp/space/model/intent_unified_transformer.py b/modelscope/models/nlp/space/model/intent_unified_transformer.py index 646a8044..b9c699d7 100644 --- a/modelscope/models/nlp/space/model/intent_unified_transformer.py +++ b/modelscope/models/nlp/space/model/intent_unified_transformer.py @@ -5,7 +5,7 @@ import torch import torch.nn as nn import torch.nn.functional as F -from modelscope.utils.nlp.space.criterions import compute_kl_loss +from .....utils.nlp.space.criterions import compute_kl_loss from .unified_transformer import UnifiedTransformer diff --git a/modelscope/models/nlp/space/model/unified_transformer.py b/modelscope/models/nlp/space/model/unified_transformer.py index a25bc7f4..2636553d 100644 --- a/modelscope/models/nlp/space/model/unified_transformer.py +++ b/modelscope/models/nlp/space/model/unified_transformer.py @@ -7,10 +7,9 @@ import torch import torch.nn as nn import torch.nn.functional as F -from modelscope.models.nlp.space.model.model_base import ModelBase -from modelscope.models.nlp.space.modules.embedder import Embedder -from modelscope.models.nlp.space.modules.transformer_block import \ - TransformerBlock +from ..modules.embedder import Embedder +from ..modules.transformer_block import TransformerBlock +from .model_base import ModelBase class UnifiedTransformer(ModelBase): diff --git a/modelscope/models/nlp/space/modules/transformer_block.py b/modelscope/models/nlp/space/modules/transformer_block.py index 1a0565d6..5b6c79a5 100644 --- a/modelscope/models/nlp/space/modules/transformer_block.py +++ b/modelscope/models/nlp/space/modules/transformer_block.py @@ -5,9 +5,8 @@ TransformerBlock class. import torch import torch.nn as nn -from modelscope.models.nlp.space.modules.feedforward import FeedForward -from modelscope.models.nlp.space.modules.multihead_attention import \ - MultiheadAttention +from .feedforward import FeedForward +from .multihead_attention import MultiheadAttention class TransformerBlock(nn.Module): diff --git a/modelscope/pipelines/__init__.py b/modelscope/pipelines/__init__.py index 6e2645de..962b2245 100644 --- a/modelscope/pipelines/__init__.py +++ b/modelscope/pipelines/__init__.py @@ -1,7 +1,4 @@ -from .audio import LinearAECPipeline +# from .audio import LinearAECPipeline from .base import Pipeline from .builder import pipeline -from .cv import * # noqa F403 -from .multi_modal import * # noqa F403 from .nlp import * # noqa F403 -from .nlp.space import * # noqa F403 diff --git a/modelscope/pipelines/audio/linear_aec_pipeline.py b/modelscope/pipelines/audio/linear_aec_pipeline.py index 528d8d47..70562b19 100644 --- a/modelscope/pipelines/audio/linear_aec_pipeline.py +++ b/modelscope/pipelines/audio/linear_aec_pipeline.py @@ -7,6 +7,7 @@ import scipy.io.wavfile as wav import torch import yaml +from modelscope.metainfo import Pipelines from modelscope.preprocessors.audio import LinearAECAndFbank from modelscope.utils.constant import ModelFile, Tasks from ..base import Pipeline @@ -39,7 +40,8 @@ def initialize_config(module_cfg): @PIPELINES.register_module( - Tasks.speech_signal_process, module_name=r'speech_dfsmn_aec_psm_16k') + Tasks.speech_signal_process, + module_name=Pipelines.speech_dfsmn_aec_psm_16k) class LinearAECPipeline(Pipeline): r"""AEC Inference Pipeline only support 16000 sample rate. diff --git a/modelscope/pipelines/audio/text_to_speech_pipeline.py b/modelscope/pipelines/audio/text_to_speech_pipeline.py index ecd9daac..22586d3e 100644 --- a/modelscope/pipelines/audio/text_to_speech_pipeline.py +++ b/modelscope/pipelines/audio/text_to_speech_pipeline.py @@ -3,6 +3,7 @@ from typing import Any, Dict, List import numpy as np +from modelscope.metainfo import Pipelines from modelscope.models import Model from modelscope.models.audio.tts.am import SambertNetHifi16k from modelscope.models.audio.tts.vocoder import Hifigan16k @@ -15,7 +16,7 @@ __all__ = ['TextToSpeechSambertHifigan16kPipeline'] @PIPELINES.register_module( - Tasks.text_to_speech, module_name=r'tts-sambert-hifigan-16k') + Tasks.text_to_speech, module_name=Pipelines.sambert_hifigan_16k_tts) class TextToSpeechSambertHifigan16kPipeline(Pipeline): def __init__(self, diff --git a/modelscope/pipelines/base.py b/modelscope/pipelines/base.py index e266d21c..7e32f543 100644 --- a/modelscope/pipelines/base.py +++ b/modelscope/pipelines/base.py @@ -4,19 +4,17 @@ import os.path as osp from abc import ABC, abstractmethod from typing import Any, Dict, Generator, List, Union -from maas_hub.snapshot_download import snapshot_download - +from modelscope.hub.snapshot_download import snapshot_download from modelscope.models.base import Model from modelscope.preprocessors import Preprocessor from modelscope.pydatasets import PyDataset from modelscope.utils.config import Config -from modelscope.utils.hub import get_model_cache_dir from modelscope.utils.logger import get_logger from .outputs import TASK_OUTPUTS -from .util import is_model_name +from .util import is_model, is_official_hub_path Tensor = Union['torch.Tensor', 'tf.Tensor'] -Input = Union[str, tuple, dict, PyDataset, 'PIL.Image.Image', 'numpy.ndarray'] +Input = Union[str, tuple, PyDataset, 'PIL.Image.Image', 'numpy.ndarray'] InputModel = Union[str, Model] output_keys = [ @@ -29,14 +27,10 @@ class Pipeline(ABC): def initiate_single_model(self, model): logger.info(f'initiate model from {model}') - # TODO @wenmeng.zwm replace model.startswith('damo/') with get_model - if isinstance(model, str) and model.startswith('damo/'): - if not osp.exists(model): - cache_path = get_model_cache_dir(model) - model = cache_path if osp.exists( - cache_path) else snapshot_download(model) - return Model.from_pretrained(model) if is_model_name( - model) else model + if isinstance(model, str) and is_official_hub_path(model): + model = snapshot_download( + model) if not osp.exists(model) else model + return Model.from_pretrained(model) if is_model(model) else model elif isinstance(model, Model): return model else: @@ -104,7 +98,7 @@ class Pipeline(ABC): def _process_single(self, input: Input, *args, **post_kwargs) -> Dict[str, Any]: - out = self.preprocess(input, **post_kwargs) + out = self.preprocess(input) out = self.forward(out) out = self.postprocess(out, **post_kwargs) self._check_output(out) diff --git a/modelscope/pipelines/builder.py b/modelscope/pipelines/builder.py index 6e2c791d..cff1801d 100644 --- a/modelscope/pipelines/builder.py +++ b/modelscope/pipelines/builder.py @@ -1,33 +1,49 @@ # Copyright (c) Alibaba, Inc. and its affiliates. -import os.path as osp from typing import List, Union +from modelscope.metainfo import Pipelines from modelscope.models.base import Model from modelscope.utils.config import Config, ConfigDict -from modelscope.utils.constant import Tasks +from modelscope.utils.constant import ModelFile, Tasks +from modelscope.utils.hub import read_config from modelscope.utils.registry import Registry, build_from_cfg from .base import Pipeline +from .util import is_official_hub_path PIPELINES = Registry('pipelines') DEFAULT_MODEL_FOR_PIPELINE = { # TaskName: (pipeline_module_name, model_repo) Tasks.word_segmentation: - ('structbert-chinese-word-segmentation', + (Pipelines.word_segmentation, 'damo/nlp_structbert_word-segmentation_chinese-base'), Tasks.sentence_similarity: - ('sbert-base-chinese-sentence-similarity', + (Pipelines.sentence_similarity, 'damo/nlp_structbert_sentence-similarity_chinese-base'), Tasks.image_matting: ('image-matting', 'damo/cv_unet_image-matting'), - Tasks.text_classification: - ('bert-sentiment-analysis', 'damo/bert-base-sst2'), - Tasks.text_generation: ('palm2.0', + Tasks.nli: (Pipelines.nli, 'damo/nlp_structbert_nli_chinese-base'), + Tasks.sentiment_classification: + (Pipelines.sentiment_classification, + 'damo/nlp_structbert_sentiment-classification_chinese-base'), + Tasks.text_classification: ('bert-sentiment-analysis', + 'damo/bert-base-sst2'), + Tasks.image_matting: (Pipelines.image_matting, + 'damo/cv_unet_image-matting'), + Tasks.text_classification: (Pipelines.sentiment_analysis, + 'damo/bert-base-sst2'), + Tasks.text_generation: (Pipelines.text_generation, 'damo/nlp_palm2.0_text-generation_chinese-base'), - Tasks.image_captioning: ('ofa', None), + Tasks.image_captioning: (Pipelines.image_caption, + 'damo/ofa_image-caption_coco_large_en'), Tasks.image_generation: - ('person-image-cartoon', + (Pipelines.person_image_cartoon, 'damo/cv_unet_person-image-cartoon_compound-models'), + Tasks.ocr_detection: (Pipelines.ocr_detection, + 'damo/cv_resnet18_ocr-detection-line-level_damo'), + Tasks.fill_mask: (Pipelines.fill_mask, 'damo/nlp_veco_fill-mask-large'), + Tasks.action_recognition: (Pipelines.action_recognition, + 'damo/cv_TAdaConv_action-recognition'), } @@ -84,30 +100,40 @@ def pipeline(task: str = None, if task is None and pipeline_name is None: raise ValueError('task or pipeline_name is required') + assert isinstance(model, (type(None), str, Model, list)), \ + f'model should be either None, str, List[str], Model, or List[Model], but got {type(model)}' + if pipeline_name is None: # get default pipeline for this task if isinstance(model, str) \ or (isinstance(model, list) and isinstance(model[0], str)): - - # if is_model_name(model): - if (isinstance(model, str) and model.startswith('damo/')) \ - or (isinstance(model, list) and model[0].startswith('damo/')) \ - or (isinstance(model, str) and osp.exists(model)): - # TODO @wenmeng.zwm add support when model is a str of modelhub address - # read pipeline info from modelhub configuration file. - pipeline_name, default_model_repo = get_default_pipeline_info( - task) + if is_official_hub_path(model): + # read config file from hub and parse + cfg = read_config(model) if isinstance( + model, str) else read_config(model[0]) + assert hasattr( + cfg, + 'pipeline'), 'pipeline config is missing from config file.' + pipeline_name = cfg.pipeline.type else: + # used for test case, when model is str and is not hub path pipeline_name = get_pipeline_by_model_name(task, model) + elif isinstance(model, Model) or \ + (isinstance(model, list) and isinstance(model[0], Model)): + # get pipeline info from Model object + first_model = model[0] if isinstance(model, list) else model + if not hasattr(first_model, 'pipeline'): + # model is instantiated by user, we should parse config again + cfg = read_config(first_model.model_dir) + assert hasattr( + cfg, + 'pipeline'), 'pipeline config is missing from config file.' + first_model.pipeline = cfg.pipeline + pipeline_name = first_model.pipeline.type else: pipeline_name, default_model_repo = get_default_pipeline_info(task) - - if model is None: model = default_model_repo - assert isinstance(model, (type(None), str, Model, list)), \ - f'model should be either None, str, List[str], Model, or List[Model], but got {type(model)}' - cfg = ConfigDict(type=pipeline_name, model=model) if kwargs: diff --git a/modelscope/pipelines/cv/__init__.py b/modelscope/pipelines/cv/__init__.py index 79c85c19..68d875ec 100644 --- a/modelscope/pipelines/cv/__init__.py +++ b/modelscope/pipelines/cv/__init__.py @@ -1,2 +1,4 @@ +from .action_recognition_pipeline import ActionRecognitionPipeline from .image_cartoon_pipeline import ImageCartoonPipeline from .image_matting_pipeline import ImageMattingPipeline +from .ocr_detection_pipeline import OCRDetectionPipeline diff --git a/modelscope/pipelines/cv/action_recognition_pipeline.py b/modelscope/pipelines/cv/action_recognition_pipeline.py new file mode 100644 index 00000000..845f8f9a --- /dev/null +++ b/modelscope/pipelines/cv/action_recognition_pipeline.py @@ -0,0 +1,65 @@ +import math +import os.path as osp +from typing import Any, Dict + +import cv2 +import numpy as np +import PIL +import torch + +from modelscope.metainfo import Pipelines +from modelscope.models.cv.action_recognition.models import BaseVideoModel +from modelscope.pipelines.base import Input +from modelscope.preprocessors.video import ReadVideoData +from modelscope.utils.config import Config +from modelscope.utils.constant import ModelFile, Tasks +from modelscope.utils.logger import get_logger +from ..base import Pipeline +from ..builder import PIPELINES + +logger = get_logger() + + +@PIPELINES.register_module( + Tasks.action_recognition, module_name=Pipelines.action_recognition) +class ActionRecognitionPipeline(Pipeline): + + def __init__(self, model: str): + super().__init__(model=model) + model_path = osp.join(self.model, ModelFile.TORCH_MODEL_FILE) + logger.info(f'loading model from {model_path}') + config_path = osp.join(self.model, ModelFile.CONFIGURATION) + logger.info(f'loading config from {config_path}') + self.cfg = Config.from_file(config_path) + self.infer_model = BaseVideoModel(cfg=self.cfg).cuda() + self.infer_model.eval() + self.infer_model.load_state_dict(torch.load(model_path)['model_state']) + self.label_mapping = self.cfg.label_mapping + logger.info('load model done') + + def preprocess(self, input: Input) -> Dict[str, Any]: + if isinstance(input, str): + video_input_data = ReadVideoData(self.cfg, input).cuda() + else: + raise TypeError(f'input should be a str,' + f' but got {type(input)}') + result = {'video_data': video_input_data} + return result + + def forward(self, input: Dict[str, Any]) -> Dict[str, Any]: + pred = self.perform_inference(input['video_data']) + output_label = self.label_mapping[str(pred)] + return {'output_label': output_label} + + @torch.no_grad() + def perform_inference(self, data, max_bsz=4): + iter_num = math.ceil(data.size(0) / max_bsz) + preds_list = [] + for i in range(iter_num): + preds_list.append( + self.infer_model(data[i * max_bsz:(i + 1) * max_bsz])[0]) + pred = torch.cat(preds_list, dim=0) + return pred.mean(dim=0).argmax().item() + + def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, Any]: + return inputs diff --git a/modelscope/pipelines/cv/image_cartoon_pipeline.py b/modelscope/pipelines/cv/image_cartoon_pipeline.py index d253eaf5..717336e9 100644 --- a/modelscope/pipelines/cv/image_cartoon_pipeline.py +++ b/modelscope/pipelines/cv/image_cartoon_pipeline.py @@ -6,6 +6,7 @@ import numpy as np import PIL import tensorflow as tf +from modelscope.metainfo import Pipelines from modelscope.models.cv.cartoon.facelib.facer import FaceAna from modelscope.models.cv.cartoon.mtcnn_pytorch.src.align_trans import ( get_reference_facial_points, warp_and_crop_face) @@ -25,7 +26,7 @@ logger = get_logger() @PIPELINES.register_module( - Tasks.image_generation, module_name='person-image-cartoon') + Tasks.image_generation, module_name=Pipelines.person_image_cartoon) class ImageCartoonPipeline(Pipeline): def __init__(self, model: str): diff --git a/modelscope/pipelines/cv/image_matting_pipeline.py b/modelscope/pipelines/cv/image_matting_pipeline.py index 0c60dfa7..b3e27e4b 100644 --- a/modelscope/pipelines/cv/image_matting_pipeline.py +++ b/modelscope/pipelines/cv/image_matting_pipeline.py @@ -5,6 +5,7 @@ import cv2 import numpy as np import PIL +from modelscope.metainfo import Pipelines from modelscope.pipelines.base import Input from modelscope.preprocessors import load_image from modelscope.utils.constant import ModelFile, Tasks @@ -16,7 +17,7 @@ logger = get_logger() @PIPELINES.register_module( - Tasks.image_matting, module_name=Tasks.image_matting) + Tasks.image_matting, module_name=Pipelines.image_matting) class ImageMattingPipeline(Pipeline): def __init__(self, model: str): diff --git a/modelscope/pipelines/cv/ocr_detection_pipeline.py b/modelscope/pipelines/cv/ocr_detection_pipeline.py new file mode 100644 index 00000000..0502fe36 --- /dev/null +++ b/modelscope/pipelines/cv/ocr_detection_pipeline.py @@ -0,0 +1,168 @@ +import math +import os +import os.path as osp +import sys +from typing import Any, Dict, List, Tuple, Union + +import cv2 +import numpy as np +import PIL +import tensorflow as tf +import tf_slim as slim + +from modelscope.metainfo import Pipelines +from modelscope.pipelines.base import Input +from modelscope.preprocessors import load_image +from modelscope.utils.constant import ModelFile, Tasks +from modelscope.utils.logger import get_logger +from ..base import Pipeline +from ..builder import PIPELINES +from .ocr_utils import model_resnet_mutex_v4_linewithchar, ops, utils + +if tf.__version__ >= '2.0': + tf = tf.compat.v1 +tf.compat.v1.disable_eager_execution() + +logger = get_logger() + +# constant +RBOX_DIM = 5 +OFFSET_DIM = 6 +WORD_POLYGON_DIM = 8 +OFFSET_VARIANCE = [0.1, 0.1, 0.1, 0.1, 0.1, 0.1] + +FLAGS = tf.app.flags.FLAGS +tf.app.flags.DEFINE_float('node_threshold', 0.4, + 'Confidence threshold for nodes') +tf.app.flags.DEFINE_float('link_threshold', 0.6, + 'Confidence threshold for links') + + +@PIPELINES.register_module( + Tasks.ocr_detection, module_name=Pipelines.ocr_detection) +class OCRDetectionPipeline(Pipeline): + + def __init__(self, model: str): + super().__init__(model=model) + model_path = osp.join( + osp.join(self.model, ModelFile.TF_CHECKPOINT_FOLDER), + 'checkpoint-80000') + + config = tf.ConfigProto(allow_soft_placement=True) + config.gpu_options.allow_growth = True + self._session = tf.Session(config=config) + global_step = tf.get_variable( + 'global_step', [], + initializer=tf.constant_initializer(0), + dtype=tf.int64, + trainable=False) + variable_averages = tf.train.ExponentialMovingAverage( + 0.997, global_step) + self.input_images = tf.placeholder( + tf.float32, shape=[1, 1024, 1024, 3], name='input_images') + self.output = {} + + # detector + detector = model_resnet_mutex_v4_linewithchar.SegLinkDetector() + all_maps = detector.build_model(self.input_images, is_training=False) + + # decode local predictions + all_nodes, all_links, all_reg = [], [], [] + for i, maps in enumerate(all_maps): + cls_maps, lnk_maps, reg_maps = maps[0], maps[1], maps[2] + reg_maps = tf.multiply(reg_maps, OFFSET_VARIANCE) + + cls_prob = tf.nn.softmax(tf.reshape(cls_maps, [-1, 2])) + + lnk_prob_pos = tf.nn.softmax(tf.reshape(lnk_maps, [-1, 4])[:, :2]) + lnk_prob_mut = tf.nn.softmax(tf.reshape(lnk_maps, [-1, 4])[:, 2:]) + lnk_prob = tf.concat([lnk_prob_pos, lnk_prob_mut], axis=1) + + all_nodes.append(cls_prob) + all_links.append(lnk_prob) + all_reg.append(reg_maps) + + # decode segments and links + image_size = tf.shape(self.input_images)[1:3] + segments, group_indices, segment_counts, _ = ops.decode_segments_links_python( + image_size, + all_nodes, + all_links, + all_reg, + anchor_sizes=list(detector.anchor_sizes)) + + # combine segments + combined_rboxes, combined_counts = ops.combine_segments_python( + segments, group_indices, segment_counts) + self.output['combined_rboxes'] = combined_rboxes + self.output['combined_counts'] = combined_counts + + with self._session.as_default() as sess: + logger.info(f'loading model from {model_path}') + # load model + model_loader = tf.train.Saver( + variable_averages.variables_to_restore()) + model_loader.restore(sess, model_path) + + def preprocess(self, input: Input) -> Dict[str, Any]: + if isinstance(input, str): + img = np.array(load_image(input)) + elif isinstance(input, PIL.Image.Image): + img = np.array(input.convert('RGB')) + elif isinstance(input, np.ndarray): + if len(input.shape) == 2: + img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR) + img = input[:, :, ::-1] # in rgb order + else: + raise TypeError(f'input should be either str, PIL.Image,' + f' np.array, but got {type(input)}') + h, w, c = img.shape + img_pad = np.zeros((max(h, w), max(h, w), 3), dtype=np.float32) + img_pad[:h, :w, :] = img + + resize_size = 1024 + img_pad_resize = cv2.resize(img_pad, (resize_size, resize_size)) + img_pad_resize = cv2.cvtColor(img_pad_resize, cv2.COLOR_RGB2BGR) + img_pad_resize = img_pad_resize - np.array([123.68, 116.78, 103.94], + dtype=np.float32) + + resize_size = tf.stack([resize_size, resize_size]) + orig_size = tf.stack([max(h, w), max(h, w)]) + self.output['orig_size'] = orig_size + self.output['resize_size'] = resize_size + + result = {'img': np.expand_dims(img_pad_resize, axis=0)} + return result + + def forward(self, input: Dict[str, Any]) -> Dict[str, Any]: + with self._session.as_default(): + feed_dict = {self.input_images: input['img']} + sess_outputs = self._session.run(self.output, feed_dict=feed_dict) + return sess_outputs + + def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, Any]: + rboxes = inputs['combined_rboxes'][0] + count = inputs['combined_counts'][0] + rboxes = rboxes[:count, :] + + # convert rboxes to polygons and find its coordinates on the original image + orig_h, orig_w = inputs['orig_size'] + resize_h, resize_w = inputs['resize_size'] + polygons = utils.rboxes_to_polygons(rboxes) + scale_y = float(orig_h) / float(resize_h) + scale_x = float(orig_w) / float(resize_w) + + # confine polygons inside image + polygons[:, ::2] = np.maximum( + 0, np.minimum(polygons[:, ::2] * scale_x, orig_w - 1)) + polygons[:, 1::2] = np.maximum( + 0, np.minimum(polygons[:, 1::2] * scale_y, orig_h - 1)) + polygons = np.round(polygons).astype(np.int32) + + # nms + dt_n9 = [o + [utils.cal_width(o)] for o in polygons.tolist()] + dt_nms = utils.nms_python(dt_n9) + dt_polygons = np.array([o[:8] for o in dt_nms]) + + result = {'det_polygons': dt_polygons} + return result diff --git a/modelscope/pipelines/cv/ocr_utils/__init__.py b/modelscope/pipelines/cv/ocr_utils/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/modelscope/pipelines/cv/ocr_utils/model_resnet_mutex_v4_linewithchar.py b/modelscope/pipelines/cv/ocr_utils/model_resnet_mutex_v4_linewithchar.py new file mode 100644 index 00000000..50b8ba02 --- /dev/null +++ b/modelscope/pipelines/cv/ocr_utils/model_resnet_mutex_v4_linewithchar.py @@ -0,0 +1,158 @@ +import tensorflow as tf +import tf_slim as slim + +from . import ops, resnet18_v1, resnet_utils + +if tf.__version__ >= '2.0': + tf = tf.compat.v1 + +# constants +OFFSET_DIM = 6 + +N_LOCAL_LINKS = 8 +N_CROSS_LINKS = 4 +N_SEG_CLASSES = 2 +N_LNK_CLASSES = 4 + +POS_LABEL = 1 +NEG_LABEL = 0 + + +class SegLinkDetector(): + + def __init__(self): + self.anchor_sizes = [6., 11.84210526, 23.68421053, 45., 90., 150.] + + def _detection_classifier(self, + maps, + ksize, + weight_decay, + cross_links=False, + scope=None): + + with tf.variable_scope(scope): + seg_depth = N_SEG_CLASSES + if cross_links: + lnk_depth = N_LNK_CLASSES * (N_LOCAL_LINKS + N_CROSS_LINKS) + else: + lnk_depth = N_LNK_CLASSES * N_LOCAL_LINKS + reg_depth = OFFSET_DIM + map_depth = maps.get_shape()[3] + inter_maps, inter_relu = ops.conv2d( + maps, map_depth, 256, 1, 1, 'SAME', scope='conv_inter') + + dir_maps, dir_relu = ops.conv2d( + inter_relu, 256, 2, ksize, 1, 'SAME', scope='conv_dir') + cen_maps, cen_relu = ops.conv2d( + inter_relu, 256, 2, ksize, 1, 'SAME', scope='conv_cen') + pol_maps, pol_relu = ops.conv2d( + inter_relu, 256, 8, ksize, 1, 'SAME', scope='conv_pol') + concat_relu = tf.concat([dir_relu, cen_relu, pol_relu], axis=-1) + _, lnk_embedding = ops.conv_relu( + concat_relu, 12, 256, 1, 1, scope='lnk_embedding') + lnk_maps, lnk_relu = ops.conv2d( + inter_relu + lnk_embedding, + 256, + lnk_depth, + ksize, + 1, + 'SAME', + scope='conv_lnk') + + char_seg_maps, char_seg_relu = ops.conv2d( + inter_relu, + 256, + seg_depth, + ksize, + 1, + 'SAME', + scope='conv_char_cls') + char_reg_maps, char_reg_relu = ops.conv2d( + inter_relu, + 256, + reg_depth, + ksize, + 1, + 'SAME', + scope='conv_char_reg') + concat_char_relu = tf.concat([char_seg_relu, char_reg_relu], + axis=-1) + _, char_embedding = ops.conv_relu( + concat_char_relu, 8, 256, 1, 1, scope='conv_char_embedding') + seg_maps, seg_relu = ops.conv2d( + inter_relu + char_embedding, + 256, + seg_depth, + ksize, + 1, + 'SAME', + scope='conv_cls') + reg_maps, reg_relu = ops.conv2d( + inter_relu + char_embedding, + 256, + reg_depth, + ksize, + 1, + 'SAME', + scope='conv_reg') + + return seg_relu, lnk_relu, reg_relu + + def _build_cnn(self, images, weight_decay, is_training): + with slim.arg_scope( + resnet18_v1.resnet_arg_scope(weight_decay=weight_decay)): + logits, end_points = resnet18_v1.resnet_v1_18( + images, is_training=is_training, scope='resnet_v1_18') + + outputs = { + 'conv3_3': end_points['pool1'], + 'conv4_3': end_points['pool2'], + 'fc7': end_points['pool3'], + 'conv8_2': end_points['pool4'], + 'conv9_2': end_points['pool5'], + 'conv10_2': end_points['pool6'], + } + return outputs + + def build_model(self, images, is_training=True, scope=None): + + weight_decay = 5e-4 # FLAGS.weight_decay + cnn_outputs = self._build_cnn(images, weight_decay, is_training) + det_0 = self._detection_classifier( + cnn_outputs['conv3_3'], + 3, + weight_decay, + cross_links=False, + scope='dete_0') + det_1 = self._detection_classifier( + cnn_outputs['conv4_3'], + 3, + weight_decay, + cross_links=True, + scope='dete_1') + det_2 = self._detection_classifier( + cnn_outputs['fc7'], + 3, + weight_decay, + cross_links=True, + scope='dete_2') + det_3 = self._detection_classifier( + cnn_outputs['conv8_2'], + 3, + weight_decay, + cross_links=True, + scope='dete_3') + det_4 = self._detection_classifier( + cnn_outputs['conv9_2'], + 3, + weight_decay, + cross_links=True, + scope='dete_4') + det_5 = self._detection_classifier( + cnn_outputs['conv10_2'], + 3, + weight_decay, + cross_links=True, + scope='dete_5') + outputs = [det_0, det_1, det_2, det_3, det_4, det_5] + return outputs diff --git a/modelscope/pipelines/cv/ocr_utils/ops.py b/modelscope/pipelines/cv/ocr_utils/ops.py new file mode 100644 index 00000000..2bc8a8bf --- /dev/null +++ b/modelscope/pipelines/cv/ocr_utils/ops.py @@ -0,0 +1,1098 @@ +import math +import os +import shutil +import uuid + +import cv2 +import numpy as np +import tensorflow as tf + +from . import utils + +if tf.__version__ >= '2.0': + tf = tf.compat.v1 + +FLAGS = tf.app.flags.FLAGS +tf.app.flags.DEFINE_string('weight_init_method', 'xavier', + 'Weight initialization method') + +# constants +OFFSET_DIM = 6 +RBOX_DIM = 5 + +N_LOCAL_LINKS = 8 +N_CROSS_LINKS = 4 +N_SEG_CLASSES = 2 +N_LNK_CLASSES = 4 + +MATCH_STATUS_POS = 1 +MATCH_STATUS_NEG = -1 +MATCH_STATUS_IGNORE = 0 +MUT_LABEL = 3 +POS_LABEL = 1 +NEG_LABEL = 0 + +N_DET_LAYERS = 6 + + +def load_oplib(lib_name): + """ + Load TensorFlow operator library. + """ + # use absolute path so that ops.py can be called from other directory + lib_path = os.path.join( + os.path.dirname(os.path.realpath(__file__)), + 'lib{0}.so'.format(lib_name)) + # duplicate library with a random new name so that + # a running program will not be interrupted when the original library is updated + lib_copy_path = '/tmp/lib{0}_{1}.so'.format( + str(uuid.uuid4())[:8], LIB_NAME) + shutil.copyfile(lib_path, lib_copy_path) + oplib = tf.load_op_library(lib_copy_path) + return oplib + + +def _nn_variable(name, shape, init_method, collection=None, **kwargs): + """ + Create or reuse a variable + ARGS + name: variable name + shape: variable shape + init_method: 'zero', 'kaiming', 'xavier', or (mean, std) + collection: if not none, add variable to this collection + kwargs: extra paramters passed to tf.get_variable + RETURN + var: a new or existing variable + """ + if init_method == 'zero': + initializer = tf.constant_initializer(0.0) + elif init_method == 'kaiming': + if len(shape) == 4: # convolutional filters + kh, kw, n_in = shape[:3] + init_std = math.sqrt(2.0 / (kh * kw * n_in)) + elif len(shape) == 2: # linear weights + n_in, n_out = shape + init_std = math.sqrt(1.0 / n_out) + else: + raise 'Unsupported shape' + initializer = tf.truncated_normal_initializer(0.0, init_std) + elif init_method == 'xavier': + if len(shape) == 4: + initializer = tf.keras.initializers.glorot_normal() + else: + initializer = tf.keras.initializers.glorot_normal() + elif isinstance(init_method, tuple): + assert (len(init_method) == 2) + initializer = tf.truncated_normal_initializer(init_method[0], + init_method[1]) + else: + raise 'Unsupported weight initialization method: ' + init_method + + var = tf.get_variable(name, shape=shape, initializer=initializer, **kwargs) + if collection is not None: + tf.add_to_collection(collection, var) + + return var + + +def conv2d(x, + n_in, + n_out, + ksize, + stride=1, + padding='SAME', + weight_init=None, + bias=True, + relu=False, + scope=None, + **kwargs): + weight_init = weight_init or FLAGS.weight_init_method + trainable = kwargs.get('trainable', True) + # input_dim = n_in + if (padding == 'SAME'): + in_height = x.get_shape()[1] + in_width = x.get_shape()[2] + if (in_height % stride == 0): + pad_along_height = max(ksize - stride, 0) + else: + pad_along_height = max(ksize - (in_height % stride), 0) + if (in_width % stride == 0): + pad_along_width = max(ksize - stride, 0) + else: + pad_along_width = max(ksize - (in_width % stride), 0) + pad_bottom = pad_along_height // 2 + pad_top = pad_along_height - pad_bottom + pad_right = pad_along_width // 2 + pad_left = pad_along_width - pad_right + paddings = tf.constant([[0, 0], [pad_top, pad_bottom], + [pad_left, pad_right], [0, 0]]) + input_padded = tf.pad(x, paddings, 'CONSTANT') + else: + input_padded = x + + with tf.variable_scope(scope or 'conv2d'): + # convolution + kernel = _nn_variable( + 'weight', [ksize, ksize, n_in, n_out], + weight_init, + collection='weights' if trainable else None, + **kwargs) + yc = tf.nn.conv2d( + input_padded, kernel, [1, stride, stride, 1], padding='VALID') + # add bias + if bias is True: + bias = _nn_variable( + 'bias', [n_out], + 'zero', + collection='biases' if trainable else None, + **kwargs) + yb = tf.nn.bias_add(yc, bias) + # apply ReLU + y = yb + if relu is True: + y = tf.nn.relu(yb) + return yb, y + + +def group_conv2d_relu(x, + n_in, + n_out, + ksize, + stride=1, + group=4, + padding='SAME', + weight_init=None, + bias=True, + relu=False, + name='group_conv2d', + **kwargs): + group_axis = len(x.get_shape()) - 1 + splits = tf.split(x, [int(n_in / group)] * group, group_axis) + + conv_list = [] + for i in range(group): + conv_split, relu_split = conv2d( + splits[i], + n_in / group, + n_out / group, + ksize=ksize, + stride=stride, + padding=padding, + weight_init=weight_init, + bias=bias, + relu=relu, + scope='%s_%d' % (name, i)) + conv_list.append(conv_split) + conv = tf.concat(values=conv_list, axis=group_axis, name=name + '_concat') + relu = tf.nn.relu(conv) + return conv, relu + + +def group_conv2d_bn_relu(x, + n_in, + n_out, + ksize, + stride=1, + group=4, + padding='SAME', + weight_init=None, + bias=True, + relu=False, + name='group_conv2d', + **kwargs): + group_axis = len(x.get_shape()) - 1 + splits = tf.split(x, [int(n_in / group)] * group, group_axis) + + conv_list = [] + for i in range(group): + conv_split, relu_split = conv2d( + splits[i], + n_in / group, + n_out / group, + ksize=ksize, + stride=stride, + padding=padding, + weight_init=weight_init, + bias=bias, + relu=relu, + scope='%s_%d' % (name, i)) + conv_list.append(conv_split) + conv = tf.concat(values=conv_list, axis=group_axis, name=name + '_concat') + with tf.variable_scope(name + '_bn'): + bn = tf.layers.batch_normalization( + conv, momentum=0.9, epsilon=1e-5, scale=True, training=True) + relu = tf.nn.relu(bn) + return conv, relu + + +def next_conv(x, + n_in, + n_out, + ksize, + stride=1, + group=4, + padding='SAME', + weight_init=None, + bias=True, + relu=False, + name='next_conv2d', + **kwargs): + conv_a, relu_a = conv_relu( + x, + n_in, + n_in / 2, + ksize=1, + stride=1, + padding=padding, + weight_init=weight_init, + bias=bias, + relu=relu, + scope=name + '_a', + **kwargs) + + conv_b, relu_b = group_conv2d_relu( + relu_a, + n_in / 2, + n_out / 2, + ksize=ksize, + stride=stride, + group=group, + padding=padding, + weight_init=weight_init, + bias=bias, + relu=relu, + name=name + '_b', + **kwargs) + + conv_c, relu_c = conv_relu( + relu_b, + n_out / 2, + n_out, + ksize=1, + stride=1, + padding=padding, + weight_init=weight_init, + bias=bias, + relu=relu, + scope=name + '_c', + **kwargs) + + return conv_c, relu_c + + +def next_conv_bn(x, + n_in, + n_out, + ksize, + stride=1, + group=4, + padding='SAME', + weight_init=None, + bias=True, + relu=False, + name='next_conv2d', + **kwargs): + conv_a, relu_a = conv_bn_relu( + x, + n_in, + n_in / 2, + ksize=1, + stride=1, + padding=padding, + weight_init=weight_init, + bias=bias, + relu=relu, + scope=name + '_a', + **kwargs) + + conv_b, relu_b = group_conv2d_bn_relu( + relu_a, + n_in / 2, + n_out / 2, + ksize=ksize, + stride=stride, + group=group, + padding=padding, + weight_init=weight_init, + bias=bias, + relu=relu, + name=name + '_b', + **kwargs) + + conv_c, relu_c = conv_bn_relu( + relu_b, + n_out / 2, + n_out, + ksize=1, + stride=1, + padding=padding, + weight_init=weight_init, + bias=bias, + relu=relu, + scope=name + '_c', + **kwargs) + + return conv_c, relu_c + + +def conv2d_ori(x, + n_in, + n_out, + ksize, + stride=1, + padding='SAME', + weight_init=None, + bias=True, + relu=False, + scope=None, + **kwargs): + weight_init = weight_init or FLAGS.weight_init_method + trainable = kwargs.get('trainable', True) + + with tf.variable_scope(scope or 'conv2d'): + # convolution + kernel = _nn_variable( + 'weight', [ksize, ksize, n_in, n_out], + weight_init, + collection='weights' if trainable else None, + **kwargs) + y = tf.nn.conv2d(x, kernel, [1, stride, stride, 1], padding=padding) + # add bias + if bias is True: + bias = _nn_variable( + 'bias', [n_out], + 'zero', + collection='biases' if trainable else None, + **kwargs) + y = tf.nn.bias_add(y, bias) + # apply ReLU + if relu is True: + y = tf.nn.relu(y) + return y + + +def conv_relu(*args, **kwargs): + kwargs['relu'] = True + if 'scope' not in kwargs: + kwargs['scope'] = 'conv_relu' + return conv2d(*args, **kwargs) + + +def conv_bn_relu(*args, **kwargs): + kwargs['relu'] = True + if 'scope' not in kwargs: + kwargs['scope'] = 'conv_relu' + conv, relu = conv2d(*args, **kwargs) + with tf.variable_scope(kwargs['scope'] + '_bn'): + bn = tf.layers.batch_normalization( + conv, momentum=0.9, epsilon=1e-5, scale=True, training=True) + bn_relu = tf.nn.relu(bn) + return bn, bn_relu + + +def conv_relu_ori(*args, **kwargs): + kwargs['relu'] = True + if 'scope' not in kwargs: + kwargs['scope'] = 'conv_relu' + return conv2d_ori(*args, **kwargs) + + +def atrous_conv2d(x, + n_in, + n_out, + ksize, + dilation, + padding='SAME', + weight_init=None, + bias=True, + relu=False, + scope=None, + **kwargs): + weight_init = weight_init or FLAGS.weight_init_method + trainable = kwargs.get('trainable', True) + with tf.variable_scope(scope or 'atrous_conv2d'): + # atrous convolution + kernel = _nn_variable( + 'weight', [ksize, ksize, n_in, n_out], + weight_init, + collection='weights' if trainable else None, + **kwargs) + y = tf.nn.atrous_conv2d(x, kernel, dilation, padding=padding) + # add bias + if bias is True: + bias = _nn_variable( + 'bias', [n_out], + 'zero', + collection='biases' if trainable else None, + **kwargs) + y = tf.nn.bias_add(y, bias) + # apply ReLU + if relu is True: + y = tf.nn.relu(y) + return y + + +def avg_pool(x, ksize, stride, padding='SAME', scope=None): + with tf.variable_scope(scope or 'avg_pool'): + y = tf.nn.avg_pool(x, [1, ksize, ksize, 1], [1, stride, stride, 1], + padding) + return y + + +def max_pool(x, ksize, stride, padding='SAME', scope=None): + with tf.variable_scope(scope or 'max_pool'): + y = tf.nn.max_pool(x, [1, ksize, ksize, 1], [1, stride, stride, 1], + padding) + return y + + +def score_loss(gt_labels, match_scores, n_classes): + """ + Classification loss + ARGS + gt_labels: int32 [n] + match_scores: [n, n_classes] + RETURN + loss + """ + embeddings = tf.one_hot(tf.cast(gt_labels, tf.int64), n_classes, 1.0, 0.0) + losses = tf.nn.softmax_cross_entropy_with_logits(match_scores, embeddings) + return tf.reduce_sum(losses) + + +def smooth_l1_loss(offsets, gt_offsets, scope=None): + """ + Smooth L1 loss between offsets and encoded_gt + ARGS + offsets: [m?, 5], predicted offsets for one example + gt_offsets: [m?, 5], correponding groundtruth offsets + RETURN + loss: scalar + """ + with tf.variable_scope(scope or 'smooth_l1_loss'): + gt_offsets = tf.stop_gradient(gt_offsets) + diff = tf.abs(offsets - gt_offsets) + lesser_mask = tf.cast(tf.less(diff, 1.0), tf.float32) + larger_mask = 1.0 - lesser_mask + losses1 = (0.5 * tf.square(diff)) * lesser_mask + losses2 = (diff - 0.5) * larger_mask + return tf.reduce_sum(losses1 + losses2, 1) + + +def polygon_to_rboxe(polygon): + x1 = polygon[0] + y1 = polygon[1] + x2 = polygon[2] + y2 = polygon[3] + x3 = polygon[4] + y3 = polygon[5] + x4 = polygon[6] + y4 = polygon[7] + c_x = (x1 + x2 + x3 + x4) / 4 + c_y = (y1 + y2 + y3 + y4) / 4 + w1 = point_dist(x1, y1, x2, y2) + w2 = point_dist(x3, y3, x4, y4) + h1 = point_line_dist(c_x, c_y, x1, y1, x2, y2) + h2 = point_line_dist(c_x, c_y, x3, y3, x4, y4) + h = h1 + h2 + w = (w1 + w2) / 2 + theta1 = np.arctan2(y2 - y1, x2 - x1) + theta2 = np.arctan2(y3 - y4, x3 - x4) + theta = (theta1 + theta2) / 2 + return np.array([c_x, c_y, w, h, theta]) + + +def point_dist(x1, y1, x2, y2): + return np.sqrt((x2 - x1) * (x2 - x1) + (y2 - y1) * (y2 - y1)) + + +def point_line_dist(px, py, x1, y1, x2, y2): + eps = 1e-6 + dx = x2 - x1 + dy = y2 - y1 + div = np.sqrt(dx * dx + dy * dy) + eps + dist = np.abs(px * dy - py * dx + x2 * y1 - y2 * x1) / div + return dist + + +def get_combined_polygon(rboxes, resize_size): + image_w = resize_size[1] + image_h = resize_size[0] + img = np.zeros((image_h, image_w, 3), np.uint8) + for i in range(rboxes.shape[0]): + segment = np.reshape( + np.array(utils.rboxes_to_polygons(rboxes)[i, :], np.int32), + (-1, 1, 2)) + cv2.drawContours(img, [segment], 0, (255, 255, 255), -1) + img2gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) + ret, thresh = cv2.threshold(img2gray, 127, 255, cv2.THRESH_BINARY) + im2, contours, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, + cv2.CHAIN_APPROX_SIMPLE) + if len(contours) > 0: + cnt = contours[0] + max_area = cv2.contourArea(cnt) + # get max_area + for cont in contours: + if cv2.contourArea(cont) > max_area: + cnt = cont + max_area = cv2.contourArea(cont) + rect = cv2.minAreaRect(cnt) + combined_polygon = np.array(cv2.boxPoints(rect)).reshape(-1) + else: + combined_polygon = np.array([0, 0, 0, 0, 0, 0, 0, 0]) + + return combined_polygon + + +def combine_segs(segs): + segs = np.asarray(segs) + assert segs.ndim == 2, 'invalid segs ndim' + assert segs.shape[-1] == 6, 'invalid segs shape' + + if len(segs) == 1: + cx = segs[0, 0] + cy = segs[0, 1] + w = segs[0, 2] + h = segs[0, 3] + theta_sin = segs[0, 4] + theta_cos = segs[0, 5] + theta = np.arctan2(theta_sin, theta_cos) + return np.array([cx, cy, w, h, theta]) + + # find the best straight line fitting all center points: y = kx + b + cxs = segs[:, 0] + cys = segs[:, 1] + + theta_coss = segs[:, 4] + theta_sins = segs[:, 5] + + bar_theta = np.arctan2(theta_sins.sum(), theta_coss.sum()) + k = np.tan(bar_theta) + b = np.mean(cys - k * cxs) + + proj_xs = (k * cys + cxs - k * b) / (k**2 + 1) + proj_ys = (k * k * cys + k * cxs + b) / (k**2 + 1) + proj_points = np.stack((proj_xs, proj_ys), -1) + + # find the max distance + max_dist = -1 + idx1 = -1 + idx2 = -1 + + for i in range(len(proj_points)): + point1 = proj_points[i, :] + for j in range(i + 1, len(proj_points)): + point2 = proj_points[j, :] + dist = np.sqrt(np.sum((point1 - point2)**2)) + if dist > max_dist: + idx1 = i + idx2 = j + max_dist = dist + assert idx1 >= 0 and idx2 >= 0 + # the bbox: bcx, bcy, bw, bh, average_theta + seg1 = segs[idx1, :] + seg2 = segs[idx2, :] + bcx, bcy = (seg1[:2] + seg2[:2]) / 2.0 + bh = np.mean(segs[:, 3]) + bw = max_dist + (seg1[2] + seg2[2]) / 2.0 + return bcx, bcy, bw, bh, bar_theta + + +def combine_segments_batch(segments_batch, group_indices_batch, + segment_counts_batch): + batch_size = 1 + combined_rboxes_batch = [] + combined_counts_batch = [] + for image_id in range(batch_size): + group_count = segment_counts_batch[image_id] + segments = segments_batch[image_id, :, :] + group_indices = group_indices_batch[image_id, :] + combined_rboxes = [] + for i in range(group_count): + segments_group = segments[np.where(group_indices == i)[0], :] + if segments_group.shape[0] > 0: + combined_rbox = combine_segs(segments_group) + combined_rboxes.append(combined_rbox) + combined_rboxes_batch.append(combined_rboxes) + combined_counts_batch.append(len(combined_rboxes)) + + max_count = np.max(combined_counts_batch) + for image_id in range(batch_size): + if not combined_counts_batch[image_id] == max_count: + combined_rboxes_pad = (max_count - combined_counts_batch[image_id] + ) * [RBOX_DIM * [0.0]] + combined_rboxes_batch[image_id] = np.vstack( + (combined_rboxes_batch[image_id], + np.array(combined_rboxes_pad))) + + return np.asarray(combined_rboxes_batch, + np.float32), np.asarray(combined_counts_batch, np.int32) + + +# combine_segments rewrite in python version +def combine_segments_python(segments, group_indices, segment_counts): + combined_rboxes, combined_counts = tf.py_func( + combine_segments_batch, [segments, group_indices, segment_counts], + [tf.float32, tf.int32]) + return combined_rboxes, combined_counts + + +# decode_segments_links rewrite in python version +def get_coord(offsets, map_size, offsets_defaults): + if offsets < offsets_defaults[1][0]: + l_idx = 0 + x = offsets % map_size[0][1] + y = offsets // map_size[0][1] + elif offsets < offsets_defaults[2][0]: + l_idx = 1 + x = (offsets - offsets_defaults[1][0]) % map_size[1][1] + y = (offsets - offsets_defaults[1][0]) // map_size[1][1] + elif offsets < offsets_defaults[3][0]: + l_idx = 2 + x = (offsets - offsets_defaults[2][0]) % map_size[2][1] + y = (offsets - offsets_defaults[2][0]) // map_size[2][1] + elif offsets < offsets_defaults[4][0]: + l_idx = 3 + x = (offsets - offsets_defaults[3][0]) % map_size[3][1] + y = (offsets - offsets_defaults[3][0]) // map_size[3][1] + elif offsets < offsets_defaults[5][0]: + l_idx = 4 + x = (offsets - offsets_defaults[4][0]) % map_size[4][1] + y = (offsets - offsets_defaults[4][0]) // map_size[4][1] + else: + l_idx = 5 + x = (offsets - offsets_defaults[5][0]) % map_size[5][1] + y = (offsets - offsets_defaults[5][0]) // map_size[5][1] + + return l_idx, x, y + + +def get_coord_link(offsets, map_size, offsets_defaults): + if offsets < offsets_defaults[1][1]: + offsets_node = offsets // N_LOCAL_LINKS + link_idx = offsets % N_LOCAL_LINKS + else: + offsets_node = (offsets - offsets_defaults[1][1]) // ( + N_LOCAL_LINKS + N_CROSS_LINKS) + offsets_defaults[1][0] + link_idx = (offsets - offsets_defaults[1][1]) % ( + N_LOCAL_LINKS + N_CROSS_LINKS) + l_idx, x, y = get_coord(offsets_node, map_size, offsets_defaults) + return l_idx, x, y, link_idx + + +def is_valid_coord(l_idx, x, y, map_size): + w = map_size[l_idx][1] + h = map_size[l_idx][0] + return x >= 0 and x < w and y >= 0 and y < h + + +def get_neighbours(l_idx, x, y, map_size, offsets_defaults): + if l_idx == 0: + coord = [(0, x - 1, y - 1), (0, x, y - 1), (0, x + 1, y - 1), + (0, x - 1, y), (0, x + 1, y), (0, x - 1, y + 1), + (0, x, y + 1), (0, x + 1, y + 1)] + else: + coord = [(l_idx, x - 1, y - 1), + (l_idx, x, y - 1), (l_idx, x + 1, y - 1), (l_idx, x - 1, y), + (l_idx, x + 1, y), (l_idx, x - 1, y + 1), (l_idx, x, y + 1), + (l_idx, x + 1, y + 1), (l_idx - 1, 2 * x, 2 * y), + (l_idx - 1, 2 * x + 1, 2 * y), (l_idx - 1, 2 * x, 2 * y + 1), + (l_idx - 1, 2 * x + 1, 2 * y + 1)] + neighbours_offsets = [] + link_idx = 0 + for nl_idx, nx, ny in coord: + if is_valid_coord(nl_idx, nx, ny, map_size): + neighbours_offset_node = offsets_defaults[nl_idx][ + 0] + map_size[nl_idx][1] * ny + nx + if l_idx == 0: + neighbours_offset_link = offsets_defaults[l_idx][1] + ( + map_size[l_idx][1] * y + x) * N_LOCAL_LINKS + link_idx + else: + off_tmp = (map_size[l_idx][1] * y + x) * ( + N_LOCAL_LINKS + N_CROSS_LINKS) + neighbours_offset_link = offsets_defaults[l_idx][ + 1] + off_tmp + link_idx + neighbours_offsets.append( + [neighbours_offset_node, neighbours_offset_link, link_idx]) + link_idx += 1 + # [node_offsets, link_offsets, link_idx(0-7/11)] + return neighbours_offsets + + +def decode_segments_links_python(image_size, all_nodes, all_links, all_reg, + anchor_sizes): + batch_size = 1 # FLAGS.test_batch_size + # offsets = 12285 #768 + all_nodes_flat = tf.concat( + [tf.reshape(o, [batch_size, -1, N_SEG_CLASSES]) for o in all_nodes], + axis=1) + all_links_flat = tf.concat( + [tf.reshape(o, [batch_size, -1, N_LNK_CLASSES]) for o in all_links], + axis=1) + all_reg_flat = tf.concat( + [tf.reshape(o, [batch_size, -1, OFFSET_DIM]) for o in all_reg], axis=1) + segments, group_indices, segment_counts, group_indices_all = tf.py_func( + decode_batch, [ + all_nodes_flat, all_links_flat, all_reg_flat, image_size, + tf.constant(anchor_sizes) + ], [tf.float32, tf.int32, tf.int32, tf.int32]) + return segments, group_indices, segment_counts, group_indices_all + + +def decode_segments_links_train(image_size, all_nodes, all_links, all_reg, + anchor_sizes): + batch_size = FLAGS.train_batch_size + # offsets = 12285 #768 + all_nodes_flat = tf.concat( + [tf.reshape(o, [batch_size, -1, N_SEG_CLASSES]) for o in all_nodes], + axis=1) + all_links_flat = tf.concat( + [tf.reshape(o, [batch_size, -1, N_LNK_CLASSES]) for o in all_links], + axis=1) + all_reg_flat = tf.concat( + [tf.reshape(o, [batch_size, -1, OFFSET_DIM]) for o in all_reg], axis=1) + segments, group_indices, segment_counts, group_indices_all = tf.py_func( + decode_batch, [ + all_nodes_flat, all_links_flat, all_reg_flat, image_size, + tf.constant(anchor_sizes) + ], [tf.float32, tf.int32, tf.int32, tf.int32]) + return segments, group_indices, segment_counts, group_indices_all + + +def decode_batch(all_nodes, all_links, all_reg, image_size, anchor_sizes): + batch_size = all_nodes.shape[0] + batch_segments = [] + batch_group_indices = [] + batch_segments_counts = [] + batch_group_indices_all = [] + for image_id in range(batch_size): + image_node_scores = all_nodes[image_id, :, :] + image_link_scores = all_links[image_id, :, :] + image_reg = all_reg[image_id, :, :] + image_segments, image_group_indices, image_segments_counts, image_group_indices_all = decode_image( + image_node_scores, image_link_scores, image_reg, image_size, + anchor_sizes) + batch_segments.append(image_segments) + batch_group_indices.append(image_group_indices) + batch_segments_counts.append(image_segments_counts) + batch_group_indices_all.append(image_group_indices_all) + max_count = np.max(batch_segments_counts) + for image_id in range(batch_size): + if not batch_segments_counts[image_id] == max_count: + batch_segments_pad = (max_count - batch_segments_counts[image_id] + ) * [OFFSET_DIM * [0.0]] + batch_segments[image_id] = np.vstack( + (batch_segments[image_id], np.array(batch_segments_pad))) + batch_group_indices[image_id] = np.hstack( + (batch_group_indices[image_id], + np.array( + (max_count - batch_segments_counts[image_id]) * [-1]))) + return np.asarray(batch_segments, np.float32), np.asarray( + batch_group_indices, + np.int32), np.asarray(batch_segments_counts, + np.int32), np.asarray(batch_group_indices_all, + np.int32) + + +def decode_image(image_node_scores, image_link_scores, image_reg, image_size, + anchor_sizes): + map_size = [] + offsets_defaults = [] + offsets_default_node = 0 + offsets_default_link = 0 + for i in range(N_DET_LAYERS): + offsets_defaults.append([offsets_default_node, offsets_default_link]) + map_size.append(image_size // (2**(2 + i))) + offsets_default_node += map_size[i][0] * map_size[i][1] + if i == 0: + offsets_default_link += map_size[i][0] * map_size[i][ + 1] * N_LOCAL_LINKS + else: + offsets_default_link += map_size[i][0] * map_size[i][1] * ( + N_LOCAL_LINKS + N_CROSS_LINKS) + + image_group_indices_all = decode_image_by_join(image_node_scores, + image_link_scores, + FLAGS.node_threshold, + FLAGS.link_threshold, + map_size, offsets_defaults) + image_group_indices_all -= 1 + image_group_indices = image_group_indices_all[np.where( + image_group_indices_all >= 0)[0]] + image_segments_counts = len(image_group_indices) + # convert image_reg to segments with scores(OFFSET_DIM+1) + image_segments = np.zeros((image_segments_counts, OFFSET_DIM), + dtype=np.float32) + for i, offsets in enumerate(np.where(image_group_indices_all >= 0)[0]): + encoded_cx = image_reg[offsets, 0] + encoded_cy = image_reg[offsets, 1] + encoded_width = image_reg[offsets, 2] + encoded_height = image_reg[offsets, 3] + encoded_theta_cos = image_reg[offsets, 4] + encoded_theta_sin = image_reg[offsets, 5] + + l_idx, x, y = get_coord(offsets, map_size, offsets_defaults) + rs = anchor_sizes[l_idx] + eps = 1e-6 + image_segments[i, 0] = encoded_cx * rs + (2**(2 + l_idx)) * (x + 0.5) + image_segments[i, 1] = encoded_cy * rs + (2**(2 + l_idx)) * (y + 0.5) + image_segments[i, 2] = np.exp(encoded_width) * rs - eps + image_segments[i, 3] = np.exp(encoded_height) * rs - eps + image_segments[i, 4] = encoded_theta_cos + image_segments[i, 5] = encoded_theta_sin + + return image_segments, image_group_indices, image_segments_counts, image_group_indices_all + + +def decode_image_by_join(node_scores, link_scores, node_threshold, + link_threshold, map_size, offsets_defaults): + node_mask = node_scores[:, POS_LABEL] >= node_threshold + link_mask = link_scores[:, POS_LABEL] >= link_threshold + group_mask = np.zeros_like(node_mask, np.int32) - 1 + offsets_pos = np.where(node_mask == 1)[0] + + def find_parent(point): + return group_mask[point] + + def set_parent(point, parent): + group_mask[point] = parent + + def is_root(point): + return find_parent(point) == -1 + + def find_root(point): + root = point + update_parent = False + while not is_root(root): + root = find_parent(root) + update_parent = True + + # for acceleration of find_root + if update_parent: + set_parent(point, root) + + return root + + def join(p1, p2): + root1 = find_root(p1) + root2 = find_root(p2) + + if root1 != root2: + set_parent(root1, root2) + + def get_all(): + root_map = {} + + def get_index(root): + if root not in root_map: + root_map[root] = len(root_map) + 1 + return root_map[root] + + mask = np.zeros_like(node_mask, dtype=np.int32) + for i, point in enumerate(offsets_pos): + point_root = find_root(point) + bbox_idx = get_index(point_root) + mask[point] = bbox_idx + return mask + + # join by link + pos_link = 0 + for i, offsets in enumerate(offsets_pos): + l_idx, x, y = get_coord(offsets, map_size, offsets_defaults) + neighbours = get_neighbours(l_idx, x, y, map_size, offsets_defaults) + for n_idx, noffsets in enumerate(neighbours): + link_value = link_mask[noffsets[1]] + node_cls = node_mask[noffsets[0]] + if link_value and node_cls: + pos_link += 1 + join(offsets, noffsets[0]) + # print(pos_link) + mask = get_all() + return mask + + +def get_link_mask(node_mask, offsets_defaults, link_max): + link_mask = np.zeros_like(link_max) + link_mask[0:offsets_defaults[1][1]] = np.tile( + node_mask[0:offsets_defaults[1][0]], + (N_LOCAL_LINKS, 1)).transpose().reshape(offsets_defaults[1][1]) + link_mask[offsets_defaults[1][1]:offsets_defaults[2][1]] = np.tile( + node_mask[offsets_defaults[1][0]:offsets_defaults[2][0]], + (N_LOCAL_LINKS + N_CROSS_LINKS, 1)).transpose().reshape( + (offsets_defaults[2][1] - offsets_defaults[1][1])) + link_mask[offsets_defaults[2][1]:offsets_defaults[3][1]] = np.tile( + node_mask[offsets_defaults[2][0]:offsets_defaults[3][0]], + (N_LOCAL_LINKS + N_CROSS_LINKS, 1)).transpose().reshape( + (offsets_defaults[3][1] - offsets_defaults[2][1])) + link_mask[offsets_defaults[3][1]:offsets_defaults[4][1]] = np.tile( + node_mask[offsets_defaults[3][0]:offsets_defaults[4][0]], + (N_LOCAL_LINKS + N_CROSS_LINKS, 1)).transpose().reshape( + (offsets_defaults[4][1] - offsets_defaults[3][1])) + link_mask[offsets_defaults[4][1]:offsets_defaults[5][1]] = np.tile( + node_mask[offsets_defaults[4][0]:offsets_defaults[5][0]], + (N_LOCAL_LINKS + N_CROSS_LINKS, 1)).transpose().reshape( + (offsets_defaults[5][1] - offsets_defaults[4][1])) + link_mask[offsets_defaults[5][1]:] = np.tile( + node_mask[offsets_defaults[5][0]:], + (N_LOCAL_LINKS + N_CROSS_LINKS, 1)).transpose().reshape( + (len(link_mask) - offsets_defaults[5][1])) + + return link_mask + + +def get_link8(link_scores_raw, map_size): + # link[i-1] -local- start -16- end -cross- link[i] + link8_mask = np.zeros((link_scores_raw.shape[0])) + for i in range(N_DET_LAYERS): + if i == 0: + offsets_start = map_size[i][0] * map_size[i][1] * N_LOCAL_LINKS + offsets_end = map_size[i][0] * map_size[i][1] * ( + N_LOCAL_LINKS + 16) + offsets_link = map_size[i][0] * map_size[i][1] * ( + N_LOCAL_LINKS + 16) + link8_mask[:offsets_start] = 1 + else: + offsets_start = offsets_link + map_size[i][0] * map_size[i][ + 1] * N_LOCAL_LINKS + offsets_end = offsets_link + map_size[i][0] * map_size[i][1] * ( + N_LOCAL_LINKS + 16) + offsets_link_pre = offsets_link + offsets_link += map_size[i][0] * map_size[i][1] * ( + N_LOCAL_LINKS + 16 + N_CROSS_LINKS) + link8_mask[offsets_link_pre:offsets_start] = 1 + link8_mask[offsets_end:offsets_link] = 1 + return link_scores_raw[np.where(link8_mask > 0)[0], :] + + +def decode_image_by_mutex(node_scores, link_scores, node_threshold, + link_threshold, map_size, offsets_defaults): + node_mask = node_scores[:, POS_LABEL] >= node_threshold + link_pos = link_scores[:, POS_LABEL] + link_mut = link_scores[:, MUT_LABEL] + link_max = np.max(np.vstack((link_pos, link_mut)), axis=0) + + offsets_pos_list = np.where(node_mask == 1)[0].tolist() + + link_mask_th = link_max >= link_threshold + link_mask = get_link_mask(node_mask, offsets_defaults, link_max) + offsets_link_max = np.argsort(-(link_max * link_mask * link_mask_th)) + offsets_link_max = offsets_link_max[:len(offsets_pos_list) * 8] + + group_mask = np.zeros_like(node_mask, dtype=np.int32) - 1 + mutex_mask = len(node_mask) * [[]] + + def find_parent(point): + return group_mask[point] + + def set_parent(point, parent): + group_mask[point] = parent + + def set_mutex_constraint(point, mutex_point_list): + mutex_mask[point] = mutex_point_list + + def find_mutex_constraint(point): + mutex_point_list = mutex_mask[point] + # update mutex_point_list + mutex_point_list_new = [] + if not mutex_point_list == []: + for mutex_point in mutex_point_list: + if not is_root(mutex_point): + mutex_point = find_root(mutex_point) + if mutex_point not in mutex_point_list_new: + mutex_point_list_new.append(mutex_point) + set_mutex_constraint(point, mutex_point_list_new) + return mutex_point_list_new + + def combine_mutex_constraint(point, parent): + mutex_point_list = find_mutex_constraint(point) + mutex_parent_list = find_mutex_constraint(parent) + for mutex_point in mutex_point_list: + if not is_root(mutex_point): + mutex_point = find_root(mutex_point) + if mutex_point not in mutex_parent_list: + mutex_parent_list.append(mutex_point) + set_mutex_constraint(parent, mutex_parent_list) + + def add_mutex_constraint(p1, p2): + mutex_point_list1 = find_mutex_constraint(p1) + mutex_point_list2 = find_mutex_constraint(p2) + + if p1 not in mutex_point_list2: + mutex_point_list2.append(p1) + if p2 not in mutex_point_list1: + mutex_point_list1.append(p2) + set_mutex_constraint(p1, mutex_point_list1) + set_mutex_constraint(p2, mutex_point_list2) + + def is_root(point): + return find_parent(point) == -1 + + def find_root(point): + root = point + update_parent = False + while not is_root(root): + root = find_parent(root) + update_parent = True + + # for acceleration of find_root + if update_parent: + set_parent(point, root) + + return root + + def join(p1, p2): + root1 = find_root(p1) + root2 = find_root(p2) + + if root1 != root2 and (root1 not in find_mutex_constraint(root2)): + set_parent(root1, root2) + combine_mutex_constraint(root1, root2) + + def disjoin(p1, p2): + root1 = find_root(p1) + root2 = find_root(p2) + + if root1 != root2: + add_mutex_constraint(root1, root2) + + def get_all(): + root_map = {} + + def get_index(root): + if root not in root_map: + root_map[root] = len(root_map) + 1 + return root_map[root] + + mask = np.zeros_like(node_mask, dtype=np.int32) + for _, point in enumerate(offsets_pos_list): + point_root = find_root(point) + bbox_idx = get_index(point_root) + mask[point] = bbox_idx + return mask + + # join by link + pos_link = 0 + mut_link = 0 + for _, offsets_link in enumerate(offsets_link_max): + l_idx, x, y, link_idx = get_coord_link(offsets_link, map_size, + offsets_defaults) + offsets = offsets_defaults[l_idx][0] + map_size[l_idx][1] * y + x + if offsets in offsets_pos_list: + neighbours = get_neighbours(l_idx, x, y, map_size, + offsets_defaults) + if not len(np.where(np.array(neighbours)[:, + 2] == link_idx)[0]) == 0: + noffsets = neighbours[np.where( + np.array(neighbours)[:, 2] == link_idx)[0][0]] + link_pos_value = link_pos[noffsets[1]] + link_mut_value = link_mut[noffsets[1]] + node_cls = node_mask[noffsets[0]] + if node_cls and (link_pos_value > link_mut_value): + pos_link += 1 + join(offsets, noffsets[0]) + elif node_cls and (link_pos_value < link_mut_value): + mut_link += 1 + disjoin(offsets, noffsets[0]) + + mask = get_all() + return mask diff --git a/modelscope/pipelines/cv/ocr_utils/resnet18_v1.py b/modelscope/pipelines/cv/ocr_utils/resnet18_v1.py new file mode 100644 index 00000000..6371d4e5 --- /dev/null +++ b/modelscope/pipelines/cv/ocr_utils/resnet18_v1.py @@ -0,0 +1,432 @@ +"""Contains definitions for the original form of Residual Networks. +The 'v1' residual networks (ResNets) implemented in this module were proposed +by: +[1] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun + Deep Residual Learning for Image Recognition. arXiv:1512.03385 +Other variants were introduced in: +[2] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun + Identity Mappings in Deep Residual Networks. arXiv: 1603.05027 +The networks defined in this module utilize the bottleneck building block of +[1] with projection shortcuts only for increasing depths. They employ batch +normalization *after* every weight layer. This is the architecture used by +MSRA in the Imagenet and MSCOCO 2016 competition models ResNet-101 and +ResNet-152. See [2; Fig. 1a] for a comparison between the current 'v1' +architecture and the alternative 'v2' architecture of [2] which uses batch +normalization *before* every weight layer in the so-called full pre-activation +units. +Typical use: + from tensorflow.contrib.slim.nets import resnet_v1 +ResNet-101 for image classification into 1000 classes: + # inputs has shape [batch, 224, 224, 3] + with slim.arg_scope(resnet_v1.resnet_arg_scope()): + net, end_points = resnet_v1.resnet_v1_101(inputs, 1000, is_training=False) +ResNet-101 for semantic segmentation into 21 classes: + # inputs has shape [batch, 513, 513, 3] + with slim.arg_scope(resnet_v1.resnet_arg_scope()): + net, end_points = resnet_v1.resnet_v1_101(inputs, + 21, + is_training=False, + global_pool=False, + output_stride=16) +""" +import tensorflow as tf +import tf_slim as slim + +from . import resnet_utils + +if tf.__version__ >= '2.0': + tf = tf.compat.v1 + +resnet_arg_scope = resnet_utils.resnet_arg_scope + + +@slim.add_arg_scope +def basicblock(inputs, + depth, + depth_bottleneck, + stride, + rate=1, + outputs_collections=None, + scope=None): + """Bottleneck residual unit variant with BN after convolutions. + This is the original residual unit proposed in [1]. See Fig. 1(a) of [2] for + its definition. Note that we use here the bottleneck variant which has an + extra bottleneck layer. + When putting together two consecutive ResNet blocks that use this unit, one + should use stride = 2 in the last unit of the first block. + Args: + inputs: A tensor of size [batch, height, width, channels]. + depth: The depth of the ResNet unit output. + depth_bottleneck: The depth of the bottleneck layers. + stride: The ResNet unit's stride. Determines the amount of downsampling of + the units output compared to its input. + rate: An integer, rate for atrous convolution. + outputs_collections: Collection to add the ResNet unit output. + scope: Optional variable_scope. + Returns: + The ResNet unit's output. + """ + with tf.variable_scope(scope, 'bottleneck_v1', [inputs]) as sc: + depth_in = slim.utils.last_dimension(inputs.get_shape(), min_rank=4) + if depth == depth_in: + shortcut = resnet_utils.subsample(inputs, stride, 'shortcut') + else: + shortcut = slim.conv2d( + inputs, + depth, [1, 1], + stride=stride, + activation_fn=None, + scope='shortcut') + + residual = resnet_utils.conv2d_same( + inputs, depth, 3, stride, rate=rate, scope='conv1') + residual = resnet_utils.conv2d_same( + residual, depth, 3, 1, rate=rate, scope='conv2') + + output = tf.nn.relu(residual + shortcut) + + return slim.utils.collect_named_outputs(outputs_collections, + sc.original_name_scope, output) + + +@slim.add_arg_scope +def bottleneck(inputs, + depth, + depth_bottleneck, + stride, + rate=1, + outputs_collections=None, + scope=None): + """Bottleneck residual unit variant with BN after convolutions. + This is the original residual unit proposed in [1]. See Fig. 1(a) of [2] for + its definition. Note that we use here the bottleneck variant which has an + extra bottleneck layer. + When putting together two consecutive ResNet blocks that use this unit, one + should use stride = 2 in the last unit of the first block. + Args: + inputs: A tensor of size [batch, height, width, channels]. + depth: The depth of the ResNet unit output. + depth_bottleneck: The depth of the bottleneck layers. + stride: The ResNet unit's stride. Determines the amount of downsampling of + the units output compared to its input. + rate: An integer, rate for atrous convolution. + outputs_collections: Collection to add the ResNet unit output. + scope: Optional variable_scope. + Returns: + The ResNet unit's output. + """ + with tf.variable_scope(scope, 'bottleneck_v1', [inputs]) as sc: + depth_in = slim.utils.last_dimension(inputs.get_shape(), min_rank=4) + if depth == depth_in: + shortcut = resnet_utils.subsample(inputs, stride, 'shortcut') + else: + shortcut = slim.conv2d( + inputs, + depth, [1, 1], + stride=stride, + activation_fn=None, + scope='shortcut') + + residual = slim.conv2d( + inputs, depth_bottleneck, [1, 1], stride=1, scope='conv1') + residual = resnet_utils.conv2d_same( + residual, depth_bottleneck, 3, stride, rate=rate, scope='conv2') + residual = slim.conv2d( + residual, + depth, [1, 1], + stride=1, + activation_fn=None, + scope='conv3') + + output = tf.nn.relu(shortcut + residual) + + return slim.utils.collect_named_outputs(outputs_collections, + sc.original_name_scope, output) + + +def resnet_v1(inputs, + blocks, + num_classes=None, + is_training=True, + global_pool=True, + output_stride=None, + include_root_block=True, + spatial_squeeze=True, + reuse=None, + scope=None): + """Generator for v1 ResNet models. + This function generates a family of ResNet v1 models. See the resnet_v1_*() + methods for specific model instantiations, obtained by selecting different + block instantiations that produce ResNets of various depths. + Training for image classification on Imagenet is usually done with [224, 224] + inputs, resulting in [7, 7] feature maps at the output of the last ResNet + block for the ResNets defined in [1] that have nominal stride equal to 32. + However, for dense prediction tasks we advise that one uses inputs with + spatial dimensions that are multiples of 32 plus 1, e.g., [321, 321]. In + this case the feature maps at the ResNet output will have spatial shape + [(height - 1) / output_stride + 1, (width - 1) / output_stride + 1] + and corners exactly aligned with the input image corners, which greatly + facilitates alignment of the features to the image. Using as input [225, 225] + images results in [8, 8] feature maps at the output of the last ResNet block. + For dense prediction tasks, the ResNet needs to run in fully-convolutional + (FCN) mode and global_pool needs to be set to False. The ResNets in [1, 2] all + have nominal stride equal to 32 and a good choice in FCN mode is to use + output_stride=16 in order to increase the density of the computed features at + small computational and memory overhead, cf. http://arxiv.org/abs/1606.00915. + Args: + inputs: A tensor of size [batch, height_in, width_in, channels]. + blocks: A list of length equal to the number of ResNet blocks. Each element + is a resnet_utils.Block object describing the units in the block. + num_classes: Number of predicted classes for classification tasks. If None + we return the features before the logit layer. + is_training: whether is training or not. + global_pool: If True, we perform global average pooling before computing the + logits. Set to True for image classification, False for dense prediction. + output_stride: If None, then the output will be computed at the nominal + network stride. If output_stride is not None, it specifies the requested + ratio of input to output spatial resolution. + include_root_block: If True, include the initial convolution followed by + max-pooling, if False excludes it. + spatial_squeeze: if True, logits is of shape [B, C], if false logits is + of shape [B, 1, 1, C], where B is batch_size and C is number of classes. + reuse: whether or not the network and its variables should be reused. To be + able to reuse 'scope' must be given. + scope: Optional variable_scope. + Returns: + net: A rank-4 tensor of size [batch, height_out, width_out, channels_out]. + If global_pool is False, then height_out and width_out are reduced by a + factor of output_stride compared to the respective height_in and width_in, + else both height_out and width_out equal one. If num_classes is None, then + net is the output of the last ResNet block, potentially after global + average pooling. If num_classes is not None, net contains the pre-softmax + activations. + end_points: A dictionary from components of the network to the corresponding + activation. + Raises: + ValueError: If the target output_stride is not valid. + """ + with tf.variable_scope(scope, 'resnet_v1', [inputs], reuse=reuse) as sc: + end_points_collection = sc.name + '_end_points' + with slim.arg_scope( + [slim.conv2d, bottleneck, resnet_utils.stack_blocks_dense], + outputs_collections=end_points_collection): + with slim.arg_scope([slim.batch_norm], is_training=is_training): + net = inputs + if include_root_block: + if output_stride is not None: + if output_stride % 4 != 0: + raise ValueError( + 'The output_stride needs to be a multiple of 4.' + ) + output_stride /= 4 + net = resnet_utils.conv2d_same( + net, 64, 7, stride=2, scope='conv1') + net = tf.pad(net, [[0, 0], [1, 1], [1, 1], [0, 0]]) + net = slim.max_pool2d(net, [3, 3], stride=2, scope='pool1') + + net = slim.utils.collect_named_outputs( + end_points_collection, 'pool2', net) + + net = resnet_utils.stack_blocks_dense(net, blocks, + output_stride) + + end_points = slim.utils.convert_collection_to_dict( + end_points_collection) + + end_points['pool1'] = end_points['resnet_v1_18/block2/unit_2'] + end_points['pool2'] = end_points['resnet_v1_18/block3/unit_2'] + end_points['pool3'] = end_points['resnet_v1_18/block4/unit_2'] + end_points['pool4'] = end_points['resnet_v1_18/block5/unit_2'] + end_points['pool5'] = end_points['resnet_v1_18/block6/unit_2'] + end_points['pool6'] = net + + return net, end_points + + +resnet_v1.default_image_size = 224 + + +def resnet_v1_18(inputs, + num_classes=None, + is_training=True, + global_pool=True, + output_stride=None, + spatial_squeeze=True, + reuse=None, + scope='resnet_v1_18'): + """ResNet-18 model of [1]. See resnet_v1() for arg and return description.""" + blocks = [ + resnet_utils.Block('block1', basicblock, + [(64, 64, 1)] + [(64, 64, 1)]), + resnet_utils.Block('block2', basicblock, + [(128, 128, 1)] + [(128, 128, 1)]), + resnet_utils.Block('block3', basicblock, + [(256, 256, 2)] + [(256, 256, 1)]), + resnet_utils.Block('block4', basicblock, + [(512, 512, 2)] + [(512, 512, 1)]), + resnet_utils.Block('block5', basicblock, + [(256, 256, 2)] + [(256, 256, 1)]), + resnet_utils.Block('block6', basicblock, + [(256, 256, 2)] + [(256, 256, 1)]), + resnet_utils.Block('block7', basicblock, + [(256, 256, 2)] + [(256, 256, 1)]), + ] + return resnet_v1( + inputs, + blocks, + num_classes, + is_training, + global_pool=global_pool, + output_stride=output_stride, + include_root_block=True, + spatial_squeeze=spatial_squeeze, + reuse=reuse, + scope=scope) + + +resnet_v1_18.default_image_size = resnet_v1.default_image_size + + +def resnet_v1_50(inputs, + num_classes=None, + is_training=True, + global_pool=True, + output_stride=None, + spatial_squeeze=True, + reuse=None, + scope='resnet_v1_50'): + """ResNet-50 model of [1]. See resnet_v1() for arg and return description.""" + blocks = [ + resnet_utils.Block('block1', bottleneck, + [(256, 64, 1)] * 2 + [(256, 64, 2)]), + resnet_utils.Block('block2', bottleneck, + [(512, 128, 1)] * 3 + [(512, 128, 2)]), + resnet_utils.Block('block3', bottleneck, + [(1024, 256, 1)] * 5 + [(1024, 256, 2)]), + resnet_utils.Block('block4', bottleneck, + [(2048, 512, 1)] * 3 + [(2048, 512, 2)]), + resnet_utils.Block('block5', bottleneck, + [(1024, 256, 1)] * 2 + [(1024, 256, 2)]), + resnet_utils.Block('block6', bottleneck, [(1024, 256, 1)] * 2), + ] + return resnet_v1( + inputs, + blocks, + num_classes, + is_training, + global_pool=global_pool, + output_stride=output_stride, + include_root_block=True, + spatial_squeeze=spatial_squeeze, + reuse=reuse, + scope=scope) + + +resnet_v1_50.default_image_size = resnet_v1.default_image_size + + +def resnet_v1_101(inputs, + num_classes=None, + is_training=True, + global_pool=True, + output_stride=None, + spatial_squeeze=True, + reuse=None, + scope='resnet_v1_101'): + """ResNet-101 model of [1]. See resnet_v1() for arg and return description.""" + blocks = [ + resnet_utils.Block('block1', bottleneck, + [(256, 64, 1)] * 2 + [(256, 64, 2)]), + resnet_utils.Block('block2', bottleneck, + [(512, 128, 1)] * 3 + [(512, 128, 2)]), + resnet_utils.Block('block3', bottleneck, + [(1024, 256, 1)] * 22 + [(1024, 256, 2)]), + resnet_utils.Block('block4', bottleneck, [(2048, 512, 1)] * 3) + ] + return resnet_v1( + inputs, + blocks, + num_classes, + is_training, + global_pool=global_pool, + output_stride=output_stride, + include_root_block=True, + spatial_squeeze=spatial_squeeze, + reuse=reuse, + scope=scope) + + +resnet_v1_101.default_image_size = resnet_v1.default_image_size + + +def resnet_v1_152(inputs, + num_classes=None, + is_training=True, + global_pool=True, + output_stride=None, + spatial_squeeze=True, + reuse=None, + scope='resnet_v1_152'): + """ResNet-152 model of [1]. See resnet_v1() for arg and return description.""" + blocks = [ + resnet_utils.Block('block1', bottleneck, + [(256, 64, 1)] * 2 + [(256, 64, 2)]), + resnet_utils.Block('block2', bottleneck, + [(512, 128, 1)] * 7 + [(512, 128, 2)]), + resnet_utils.Block('block3', bottleneck, + [(1024, 256, 1)] * 35 + [(1024, 256, 2)]), + resnet_utils.Block('block4', bottleneck, [(2048, 512, 1)] * 3) + ] + return resnet_v1( + inputs, + blocks, + num_classes, + is_training, + global_pool=global_pool, + output_stride=output_stride, + include_root_block=True, + spatial_squeeze=spatial_squeeze, + reuse=reuse, + scope=scope) + + +resnet_v1_152.default_image_size = resnet_v1.default_image_size + + +def resnet_v1_200(inputs, + num_classes=None, + is_training=True, + global_pool=True, + output_stride=None, + spatial_squeeze=True, + reuse=None, + scope='resnet_v1_200'): + """ResNet-200 model of [2]. See resnet_v1() for arg and return description.""" + blocks = [ + resnet_utils.Block('block1', bottleneck, + [(256, 64, 1)] * 2 + [(256, 64, 2)]), + resnet_utils.Block('block2', bottleneck, + [(512, 128, 1)] * 23 + [(512, 128, 2)]), + resnet_utils.Block('block3', bottleneck, + [(1024, 256, 1)] * 35 + [(1024, 256, 2)]), + resnet_utils.Block('block4', bottleneck, [(2048, 512, 1)] * 3) + ] + return resnet_v1( + inputs, + blocks, + num_classes, + is_training, + global_pool=global_pool, + output_stride=output_stride, + include_root_block=True, + spatial_squeeze=spatial_squeeze, + reuse=reuse, + scope=scope) + + +resnet_v1_200.default_image_size = resnet_v1.default_image_size + +if __name__ == '__main__': + input = tf.placeholder(tf.float32, shape=(None, 224, 224, 3), name='input') + with slim.arg_scope(resnet_arg_scope()) as sc: + logits = resnet_v1_50(input) diff --git a/modelscope/pipelines/cv/ocr_utils/resnet_utils.py b/modelscope/pipelines/cv/ocr_utils/resnet_utils.py new file mode 100644 index 00000000..e0e240c8 --- /dev/null +++ b/modelscope/pipelines/cv/ocr_utils/resnet_utils.py @@ -0,0 +1,231 @@ +"""Contains building blocks for various versions of Residual Networks. +Residual networks (ResNets) were proposed in: + Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun + Deep Residual Learning for Image Recognition. arXiv:1512.03385, 2015 +More variants were introduced in: + Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun + Identity Mappings in Deep Residual Networks. arXiv: 1603.05027, 2016 +We can obtain different ResNet variants by changing the network depth, width, +and form of residual unit. This module implements the infrastructure for +building them. Concrete ResNet units and full ResNet networks are implemented in +the accompanying resnet_v1.py and resnet_v2.py modules. +Compared to https://github.com/KaimingHe/deep-residual-networks, in the current +implementation we subsample the output activations in the last residual unit of +each block, instead of subsampling the input activations in the first residual +unit of each block. The two implementations give identical results but our +implementation is more memory efficient. +""" + +import collections + +import tensorflow as tf +import tf_slim as slim + +if tf.__version__ >= '2.0': + tf = tf.compat.v1 + + +class Block(collections.namedtuple('Block', ['scope', 'unit_fn', 'args'])): + """A named tuple describing a ResNet block. + Its parts are: + scope: The scope of the `Block`. + unit_fn: The ResNet unit function which takes as input a `Tensor` and + returns another `Tensor` with the output of the ResNet unit. + args: A list of length equal to the number of units in the `Block`. The list + contains one (depth, depth_bottleneck, stride) tuple for each unit in the + block to serve as argument to unit_fn. + """ + + +def subsample(inputs, factor, scope=None): + """Subsamples the input along the spatial dimensions. + Args: + inputs: A `Tensor` of size [batch, height_in, width_in, channels]. + factor: The subsampling factor. + scope: Optional variable_scope. + Returns: + output: A `Tensor` of size [batch, height_out, width_out, channels] with the + input, either intact (if factor == 1) or subsampled (if factor > 1). + """ + if factor == 1: + return inputs + else: + return slim.max_pool2d(inputs, [1, 1], stride=factor, scope=scope) + + +def conv2d_same(inputs, num_outputs, kernel_size, stride, rate=1, scope=None): + """Strided 2-D convolution with 'SAME' padding. + When stride > 1, then we do explicit zero-padding, followed by conv2d with + 'VALID' padding. + Note that + net = conv2d_same(inputs, num_outputs, 3, stride=stride) + is equivalent to + net = slim.conv2d(inputs, num_outputs, 3, stride=1, padding='SAME') + net = subsample(net, factor=stride) + whereas + net = slim.conv2d(inputs, num_outputs, 3, stride=stride, padding='SAME') + is different when the input's height or width is even, which is why we add the + current function. For more details, see ResnetUtilsTest.testConv2DSameEven(). + Args: + inputs: A 4-D tensor of size [batch, height_in, width_in, channels]. + num_outputs: An integer, the number of output filters. + kernel_size: An int with the kernel_size of the filters. + stride: An integer, the output stride. + rate: An integer, rate for atrous convolution. + scope: Scope. + Returns: + output: A 4-D tensor of size [batch, height_out, width_out, channels] with + the convolution output. + """ + if stride == 1: + return slim.conv2d( + inputs, + num_outputs, + kernel_size, + stride=1, + rate=rate, + padding='SAME', + scope=scope) + else: + kernel_size_effective = kernel_size + (kernel_size - 1) * (rate - 1) + pad_total = kernel_size_effective - 1 + pad_beg = pad_total // 2 + pad_end = pad_total - pad_beg + inputs = tf.pad( + inputs, [[0, 0], [pad_beg, pad_end], [pad_beg, pad_end], [0, 0]]) + return slim.conv2d( + inputs, + num_outputs, + kernel_size, + stride=stride, + rate=rate, + padding='VALID', + scope=scope) + + +@slim.add_arg_scope +def stack_blocks_dense(net, + blocks, + output_stride=None, + outputs_collections=None): + """Stacks ResNet `Blocks` and controls output feature density. + First, this function creates scopes for the ResNet in the form of + 'block_name/unit_1', 'block_name/unit_2', etc. + Second, this function allows the user to explicitly control the ResNet + output_stride, which is the ratio of the input to output spatial resolution. + This is useful for dense prediction tasks such as semantic segmentation or + object detection. + Most ResNets consist of 4 ResNet blocks and subsample the activations by a + factor of 2 when transitioning between consecutive ResNet blocks. This results + to a nominal ResNet output_stride equal to 8. If we set the output_stride to + half the nominal network stride (e.g., output_stride=4), then we compute + responses twice. + Control of the output feature density is implemented by atrous convolution. + Args: + net: A `Tensor` of size [batch, height, width, channels]. + blocks: A list of length equal to the number of ResNet `Blocks`. Each + element is a ResNet `Block` object describing the units in the `Block`. + output_stride: If `None`, then the output will be computed at the nominal + network stride. If output_stride is not `None`, it specifies the requested + ratio of input to output spatial resolution, which needs to be equal to + the product of unit strides from the start up to some level of the ResNet. + For example, if the ResNet employs units with strides 1, 2, 1, 3, 4, 1, + then valid values for the output_stride are 1, 2, 6, 24 or None (which + is equivalent to output_stride=24). + outputs_collections: Collection to add the ResNet block outputs. + Returns: + net: Output tensor with stride equal to the specified output_stride. + Raises: + ValueError: If the target output_stride is not valid. + """ + # The current_stride variable keeps track of the effective stride of the + # activations. This allows us to invoke atrous convolution whenever applying + # the next residual unit would result in the activations having stride larger + # than the target output_stride. + current_stride = 1 + + # The atrous convolution rate parameter. + rate = 1 + + for block in blocks: + with tf.variable_scope(block.scope, 'block', [net]): + for i, unit in enumerate(block.args): + if output_stride is not None and current_stride > output_stride: + raise ValueError( + 'The target output_stride cannot be reached.') + + with tf.variable_scope( + 'unit_%d' % (i + 1), values=[net]) as sc: + unit_depth, unit_depth_bottleneck, unit_stride = unit + # If we have reached the target output_stride, then we need to employ + # atrous convolution with stride=1 and multiply the atrous rate by the + # current unit's stride for use in subsequent layers. + if output_stride is not None and current_stride == output_stride: + net = block.unit_fn( + net, + depth=unit_depth, + depth_bottleneck=unit_depth_bottleneck, + stride=1, + rate=rate) + rate *= unit_stride + + else: + net = block.unit_fn( + net, + depth=unit_depth, + depth_bottleneck=unit_depth_bottleneck, + stride=unit_stride, + rate=1) + current_stride *= unit_stride + net = slim.utils.collect_named_outputs( + outputs_collections, sc.name, net) + + if output_stride is not None and current_stride != output_stride: + raise ValueError('The target output_stride cannot be reached.') + + return net + + +def resnet_arg_scope(weight_decay=0.0001, + batch_norm_decay=0.997, + batch_norm_epsilon=1e-5, + batch_norm_scale=True): + """Defines the default ResNet arg scope. + TODO(gpapan): The batch-normalization related default values above are + appropriate for use in conjunction with the reference ResNet models + released at https://github.com/KaimingHe/deep-residual-networks. When + training ResNets from scratch, they might need to be tuned. + Args: + weight_decay: The weight decay to use for regularizing the model. + batch_norm_decay: The moving average decay when estimating layer activation + statistics in batch normalization. + batch_norm_epsilon: Small constant to prevent division by zero when + normalizing activations by their variance in batch normalization. + batch_norm_scale: If True, uses an explicit `gamma` multiplier to scale the + activations in the batch normalization layer. + Returns: + An `arg_scope` to use for the resnet models. + """ + batch_norm_params = { + 'decay': batch_norm_decay, + 'epsilon': batch_norm_epsilon, + 'scale': batch_norm_scale, + 'updates_collections': tf.GraphKeys.UPDATE_OPS, + } + + with slim.arg_scope( + [slim.conv2d], + weights_regularizer=slim.l2_regularizer(weight_decay), + weights_initializer=slim.variance_scaling_initializer(), + activation_fn=tf.nn.relu, + normalizer_fn=slim.batch_norm, + normalizer_params=batch_norm_params): + with slim.arg_scope([slim.batch_norm], **batch_norm_params): + # The following implies padding='SAME' for pool1, which makes feature + # alignment easier for dense prediction tasks. This is also used in + # https://github.com/facebook/fb.resnet.torch. However the accompanying + # code of 'Deep Residual Learning for Image Recognition' uses + # padding='VALID' for pool1. You can switch to that choice by setting + # slim.arg_scope([slim.max_pool2d], padding='VALID'). + with slim.arg_scope([slim.max_pool2d], padding='VALID') as arg_sc: + return arg_sc diff --git a/modelscope/pipelines/cv/ocr_utils/utils.py b/modelscope/pipelines/cv/ocr_utils/utils.py new file mode 100644 index 00000000..be8e3371 --- /dev/null +++ b/modelscope/pipelines/cv/ocr_utils/utils.py @@ -0,0 +1,108 @@ +import cv2 +import numpy as np + + +def rboxes_to_polygons(rboxes): + """ + Convert rboxes to polygons + ARGS + `rboxes`: [n, 5] + RETURN + `polygons`: [n, 8] + """ + + theta = rboxes[:, 4:5] + cxcy = rboxes[:, :2] + half_w = rboxes[:, 2:3] / 2. + half_h = rboxes[:, 3:4] / 2. + v1 = np.hstack([np.cos(theta) * half_w, np.sin(theta) * half_w]) + v2 = np.hstack([-np.sin(theta) * half_h, np.cos(theta) * half_h]) + p1 = cxcy - v1 - v2 + p2 = cxcy + v1 - v2 + p3 = cxcy + v1 + v2 + p4 = cxcy - v1 + v2 + polygons = np.hstack([p1, p2, p3, p4]) + return polygons + + +def cal_width(box): + pd1 = point_dist(box[0], box[1], box[2], box[3]) + pd2 = point_dist(box[4], box[5], box[6], box[7]) + return (pd1 + pd2) / 2 + + +def point_dist(x1, y1, x2, y2): + return np.sqrt((x2 - x1) * (x2 - x1) + (y2 - y1) * (y2 - y1)) + + +def draw_polygons(img, polygons): + for p in polygons.tolist(): + p = [int(o) for o in p] + cv2.line(img, (p[0], p[1]), (p[2], p[3]), (0, 255, 0), 1) + cv2.line(img, (p[2], p[3]), (p[4], p[5]), (0, 255, 0), 1) + cv2.line(img, (p[4], p[5]), (p[6], p[7]), (0, 255, 0), 1) + cv2.line(img, (p[6], p[7]), (p[0], p[1]), (0, 255, 0), 1) + return img + + +def nms_python(boxes): + boxes = sorted(boxes, key=lambda x: -x[8]) + nms_flag = [True] * len(boxes) + for i, a in enumerate(boxes): + if not nms_flag[i]: + continue + else: + for j, b in enumerate(boxes): + if not j > i: + continue + if not nms_flag[j]: + continue + score_a = a[8] + score_b = b[8] + rbox_a = polygon2rbox(a[:8]) + rbox_b = polygon2rbox(b[:8]) + if point_in_rbox(rbox_a[:2], rbox_b) or point_in_rbox( + rbox_b[:2], rbox_a): + if score_a > score_b: + nms_flag[j] = False + boxes_nms = [] + for i, box in enumerate(boxes): + if nms_flag[i]: + boxes_nms.append(box) + return boxes_nms + + +def point_in_rbox(c, rbox): + cx0, cy0 = c[0], c[1] + cx1, cy1 = rbox[0], rbox[1] + w, h = rbox[2], rbox[3] + theta = rbox[4] + dist_x = np.abs((cx1 - cx0) * np.cos(theta) + (cy1 - cy0) * np.sin(theta)) + dist_y = np.abs(-(cx1 - cx0) * np.sin(theta) + (cy1 - cy0) * np.cos(theta)) + return ((dist_x < w / 2.0) and (dist_y < h / 2.0)) + + +def polygon2rbox(polygon): + x1, x2, x3, x4 = polygon[0], polygon[2], polygon[4], polygon[6] + y1, y2, y3, y4 = polygon[1], polygon[3], polygon[5], polygon[7] + c_x = (x1 + x2 + x3 + x4) / 4 + c_y = (y1 + y2 + y3 + y4) / 4 + w1 = point_dist(x1, y1, x2, y2) + w2 = point_dist(x3, y3, x4, y4) + h1 = point_line_dist(c_x, c_y, x1, y1, x2, y2) + h2 = point_line_dist(c_x, c_y, x3, y3, x4, y4) + h = h1 + h2 + w = (w1 + w2) / 2 + theta1 = np.arctan2(y2 - y1, x2 - x1) + theta2 = np.arctan2(y3 - y4, x3 - x4) + theta = (theta1 + theta2) / 2.0 + return [c_x, c_y, w, h, theta] + + +def point_line_dist(px, py, x1, y1, x2, y2): + eps = 1e-6 + dx = x2 - x1 + dy = y2 - y1 + div = np.sqrt(dx * dx + dy * dy) + eps + dist = np.abs(px * dy - py * dx + x2 * y1 - y2 * x1) / div + return dist diff --git a/modelscope/pipelines/multi_modal/__init__.py b/modelscope/pipelines/multi_modal/__init__.py index b1ee121c..b7402b93 100644 --- a/modelscope/pipelines/multi_modal/__init__.py +++ b/modelscope/pipelines/multi_modal/__init__.py @@ -1 +1 @@ -from .image_caption_pipeline import ImageCaptionPipeline +from .image_captioning_pipeline import ImageCaptionPipeline diff --git a/modelscope/pipelines/multi_modal/image_captioning_pipeline.py b/modelscope/pipelines/multi_modal/image_captioning_pipeline.py new file mode 100644 index 00000000..9f32caf4 --- /dev/null +++ b/modelscope/pipelines/multi_modal/image_captioning_pipeline.py @@ -0,0 +1,35 @@ +from typing import Any, Dict, Union + +from modelscope.metainfo import Pipelines +from modelscope.preprocessors import OfaImageCaptionPreprocessor, Preprocessor +from modelscope.utils.constant import Tasks +from modelscope.utils.logger import get_logger +from ..base import Model, Pipeline +from ..builder import PIPELINES + +logger = get_logger() + + +@PIPELINES.register_module( + Tasks.image_captioning, module_name=Pipelines.image_caption) +class ImageCaptionPipeline(Pipeline): + + def __init__(self, + model: Union[Model, str], + preprocessor: [Preprocessor] = None, + **kwargs): + super().__init__() + assert isinstance(model, str) or isinstance(model, Model), \ + 'model must be a single str or OfaForImageCaptioning' + if isinstance(model, str): + pipe_model = Model.from_pretrained(model) + elif isinstance(model, Model): + pipe_model = model + else: + raise NotImplementedError + if preprocessor is None and pipe_model: + preprocessor = OfaImageCaptionPreprocessor(model_dir=model) + super().__init__(model=pipe_model, preprocessor=preprocessor, **kwargs) + + def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, Any]: + return inputs diff --git a/modelscope/pipelines/nlp/__init__.py b/modelscope/pipelines/nlp/__init__.py index adfa1d4c..df8dbbd9 100644 --- a/modelscope/pipelines/nlp/__init__.py +++ b/modelscope/pipelines/nlp/__init__.py @@ -1,7 +1,10 @@ +from .dialog_intent_prediction_pipeline import * # noqa F403 +from .dialog_modeling_pipeline import * # noqa F403 +from .dialog_state_tracking import * # noqa F403 +from .fill_mask_pipeline import * # noqa F403 +from .nli_pipeline import * # noqa F403 from .sentence_similarity_pipeline import * # noqa F403 +from .sentiment_classification_pipeline import * # noqa F403 from .sequence_classification_pipeline import * # noqa F403 -from .space.dialog_intent_prediction_pipeline import * # noqa F403 -from .space.dialog_modeling_pipeline import * # noqa F403 -from .space.dialog_state_tracking import * # noqa F403 from .text_generation_pipeline import * # noqa F403 from .word_segmentation_pipeline import * # noqa F403 diff --git a/modelscope/pipelines/nlp/space/dialog_intent_prediction_pipeline.py b/modelscope/pipelines/nlp/dialog_intent_prediction_pipeline.py similarity index 75% rename from modelscope/pipelines/nlp/space/dialog_intent_prediction_pipeline.py rename to modelscope/pipelines/nlp/dialog_intent_prediction_pipeline.py index 57245bdf..3fd38641 100644 --- a/modelscope/pipelines/nlp/space/dialog_intent_prediction_pipeline.py +++ b/modelscope/pipelines/nlp/dialog_intent_prediction_pipeline.py @@ -1,16 +1,18 @@ -from typing import Any, Dict, Optional +from typing import Any, Dict -from modelscope.models.nlp import DialogIntentModel -from modelscope.preprocessors import DialogIntentPredictionPreprocessor -from modelscope.utils.constant import Tasks -from ...base import Input, Pipeline -from ...builder import PIPELINES +from ...metainfo import Pipelines +from ...models.nlp import DialogIntentModel +from ...preprocessors import DialogIntentPredictionPreprocessor +from ...utils.constant import Tasks +from ..base import Pipeline +from ..builder import PIPELINES __all__ = ['DialogIntentPredictionPipeline'] @PIPELINES.register_module( - Tasks.dialog_intent_prediction, module_name=r'space-intent') + Tasks.dialog_intent_prediction, + module_name=Pipelines.dialog_intent_prediction) class DialogIntentPredictionPipeline(Pipeline): def __init__(self, model: DialogIntentModel, diff --git a/modelscope/pipelines/nlp/space/dialog_modeling_pipeline.py b/modelscope/pipelines/nlp/dialog_modeling_pipeline.py similarity index 89% rename from modelscope/pipelines/nlp/space/dialog_modeling_pipeline.py rename to modelscope/pipelines/nlp/dialog_modeling_pipeline.py index afa352b6..778284de 100644 --- a/modelscope/pipelines/nlp/space/dialog_modeling_pipeline.py +++ b/modelscope/pipelines/nlp/dialog_modeling_pipeline.py @@ -3,14 +3,15 @@ from typing import Any, Dict, Optional from modelscope.models.nlp import DialogModelingModel from modelscope.preprocessors import DialogModelingPreprocessor from modelscope.utils.constant import Tasks -from ...base import Pipeline, Tensor -from ...builder import PIPELINES +from ...metainfo import Pipelines +from ..base import Pipeline, Tensor +from ..builder import PIPELINES __all__ = ['DialogModelingPipeline'] @PIPELINES.register_module( - Tasks.dialog_modeling, module_name=r'space-modeling') + Tasks.dialog_modeling, module_name=Pipelines.dialog_modeling) class DialogModelingPipeline(Pipeline): def __init__(self, model: DialogModelingModel, diff --git a/modelscope/pipelines/nlp/dialog_state_tracking.py b/modelscope/pipelines/nlp/dialog_state_tracking.py new file mode 100644 index 00000000..823248d2 --- /dev/null +++ b/modelscope/pipelines/nlp/dialog_state_tracking.py @@ -0,0 +1,45 @@ +from typing import Any, Dict + +from ...metainfo import Pipelines +from ...models.nlp import DialogStateTrackingModel +from ...preprocessors import DialogStateTrackingPreprocessor +from ...utils.constant import Tasks +from ..base import Pipeline +from ..builder import PIPELINES + +__all__ = ['DialogStateTrackingPipeline'] + + +@PIPELINES.register_module( + Tasks.dialog_state_tracking, module_name=Pipelines.dialog_state_tracking) +class DialogStateTrackingPipeline(Pipeline): + + def __init__(self, model: DialogStateTrackingModel, + preprocessor: DialogStateTrackingPreprocessor, **kwargs): + """use `model` and `preprocessor` to create a nlp text classification pipeline for prediction + + Args: + model (SequenceClassificationModel): a model instance + preprocessor (SequenceClassificationPreprocessor): a preprocessor instance + """ + + super().__init__(model=model, preprocessor=preprocessor, **kwargs) + self.model = model + # self.tokenizer = preprocessor.tokenizer + + def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, str]: + """process the prediction results + + Args: + inputs (Dict[str, Any]): _description_ + + Returns: + Dict[str, str]: the prediction results + """ + import numpy as np + pred = inputs['pred'] + pos = np.where(pred == np.max(pred)) + + result = {'pred': pred, 'label': pos[0]} + + return result diff --git a/modelscope/pipelines/nlp/fill_mask_pipeline.py b/modelscope/pipelines/nlp/fill_mask_pipeline.py new file mode 100644 index 00000000..596d65f7 --- /dev/null +++ b/modelscope/pipelines/nlp/fill_mask_pipeline.py @@ -0,0 +1,107 @@ +from typing import Any, Dict, Optional, Union + +import torch + +from ...metainfo import Pipelines +from ...models import Model +from ...models.nlp.masked_language_model import MaskedLanguageModelBase +from ...preprocessors import FillMaskPreprocessor +from ...utils.constant import Tasks +from ..base import Pipeline, Tensor +from ..builder import PIPELINES + +__all__ = ['FillMaskPipeline'] + + +@PIPELINES.register_module(Tasks.fill_mask, module_name=Pipelines.fill_mask) +class FillMaskPipeline(Pipeline): + + def __init__(self, + model: Union[MaskedLanguageModelBase, str], + preprocessor: Optional[FillMaskPreprocessor] = None, + first_sequence='sentense', + **kwargs): + """use `model` and `preprocessor` to create a nlp fill mask pipeline for prediction + + Args: + model (MaskedLanguageModelBase): a model instance + preprocessor (FillMaskPreprocessor): a preprocessor instance + """ + fill_mask_model = model if isinstance( + model, MaskedLanguageModelBase) else Model.from_pretrained(model) + assert fill_mask_model.config is not None + + if preprocessor is None: + preprocessor = FillMaskPreprocessor( + fill_mask_model.model_dir, + first_sequence=first_sequence, + second_sequence=None) + fill_mask_model.eval() + super().__init__( + model=fill_mask_model, preprocessor=preprocessor, **kwargs) + + self.preprocessor = preprocessor + self.tokenizer = preprocessor.tokenizer + self.mask_id = {'veco': 250001, 'sbert': 103} + + self.rep_map = { + 'sbert': { + '[unused0]': '', + '[PAD]': '', + '[unused1]': '', + r' +': ' ', + '[SEP]': '', + '[unused2]': '', + '[CLS]': '', + '[UNK]': '' + }, + 'veco': { + r' +': ' ', + '': '', + '': '', + '': '', + '': '', + '': ' ' + } + } + + def forward(self, inputs: Dict[str, Any], + **forward_params) -> Dict[str, Any]: + with torch.no_grad(): + return super().forward(inputs, **forward_params) + + def postprocess(self, inputs: Dict[str, Tensor]) -> Dict[str, Tensor]: + """process the prediction results + + Args: + inputs (Dict[str, Any]): _description_ + + Returns: + Dict[str, str]: the prediction results + """ + import numpy as np + logits = inputs['logits'].detach().numpy() + input_ids = inputs['input_ids'].detach().numpy() + pred_ids = np.argmax(logits, axis=-1) + model_type = self.model.config.model_type + rst_ids = np.where(input_ids == self.mask_id[model_type], pred_ids, + input_ids) + + def rep_tokens(string, rep_map): + for k, v in rep_map.items(): + string = string.replace(k, v) + return string.strip() + + pred_strings = [] + for ids in rst_ids: # batch + # TODO vocab size is not stable + + if self.model.config.vocab_size == 21128: # zh bert + pred_string = self.tokenizer.convert_ids_to_tokens(ids) + pred_string = ''.join(pred_string) + else: + pred_string = self.tokenizer.decode(ids) + pred_string = rep_tokens(pred_string, self.rep_map[model_type]) + pred_strings.append(pred_string) + + return {'text': pred_strings} diff --git a/modelscope/pipelines/nlp/nli_pipeline.py b/modelscope/pipelines/nlp/nli_pipeline.py new file mode 100644 index 00000000..49dc330f --- /dev/null +++ b/modelscope/pipelines/nlp/nli_pipeline.py @@ -0,0 +1,72 @@ +import uuid +from typing import Any, Dict, Union + +import numpy as np +import torch + +from ...metainfo import Pipelines +from ...models import Model +from ...models.nlp import SbertForNLI +from ...preprocessors import NLIPreprocessor +from ...utils.constant import Tasks +from ..base import Pipeline +from ..builder import PIPELINES + +__all__ = ['NLIPipeline'] + + +@PIPELINES.register_module(Tasks.nli, module_name=Pipelines.nli) +class NLIPipeline(Pipeline): + + def __init__(self, + model: Union[SbertForNLI, str], + preprocessor: NLIPreprocessor = None, + first_sequence='first_sequence', + second_sequence='second_sequence', + **kwargs): + """use `model` and `preprocessor` to create a nlp text classification pipeline for prediction + + Args: + model (SbertForNLI): a model instance + preprocessor (NLIPreprocessor): a preprocessor instance + """ + assert isinstance(model, str) or isinstance(model, SbertForNLI), \ + 'model must be a single str or SbertForNLI' + model = model if isinstance( + model, SbertForNLI) else Model.from_pretrained(model) + if preprocessor is None: + preprocessor = NLIPreprocessor( + model.model_dir, + first_sequence=first_sequence, + second_sequence=second_sequence) + model.eval() + super().__init__(model=model, preprocessor=preprocessor, **kwargs) + assert len(model.id2label) > 0 + + def forward(self, inputs: Dict[str, Any], + **forward_params) -> Dict[str, Any]: + with torch.no_grad(): + return super().forward(inputs, **forward_params) + + def postprocess(self, + inputs: Dict[str, Any], + topk: int = 5) -> Dict[str, str]: + """process the prediction results + + Args: + inputs (Dict[str, Any]): _description_ + + Returns: + Dict[str, str]: the prediction results + """ + + probs = inputs['probabilities'][0] + num_classes = probs.shape[0] + topk = min(topk, num_classes) + top_indices = np.argpartition(probs, -topk)[-topk:] + cls_ids = top_indices[np.argsort(probs[top_indices])] + probs = probs[cls_ids].tolist() + + cls_names = [self.model.id2label[cid] for cid in cls_ids] + + return {'scores': probs, 'labels': cls_names} diff --git a/modelscope/pipelines/nlp/sentence_similarity_pipeline.py b/modelscope/pipelines/nlp/sentence_similarity_pipeline.py index 1b630c10..f6bcd72e 100644 --- a/modelscope/pipelines/nlp/sentence_similarity_pipeline.py +++ b/modelscope/pipelines/nlp/sentence_similarity_pipeline.py @@ -1,11 +1,13 @@ from typing import Any, Dict, Union import numpy as np +import torch -from modelscope.models.nlp import SbertForSentenceSimilarity -from modelscope.preprocessors import SequenceClassificationPreprocessor -from modelscope.utils.constant import Tasks +from ...metainfo import Pipelines from ...models import Model +from ...models.nlp import SbertForSentenceSimilarity +from ...preprocessors import SequenceClassificationPreprocessor +from ...utils.constant import Tasks from ..base import Input, Pipeline from ..builder import PIPELINES @@ -13,13 +15,14 @@ __all__ = ['SentenceSimilarityPipeline'] @PIPELINES.register_module( - Tasks.sentence_similarity, - module_name=r'sbert-base-chinese-sentence-similarity') + Tasks.sentence_similarity, module_name=Pipelines.sentence_similarity) class SentenceSimilarityPipeline(Pipeline): def __init__(self, - model: Union[SbertForSentenceSimilarity, str], + model: Union[Model, str], preprocessor: SequenceClassificationPreprocessor = None, + first_sequence='first_sequence', + second_sequence='second_sequence', **kwargs): """use `model` and `preprocessor` to create a nlp sentence similarity pipeline for prediction @@ -35,14 +38,21 @@ class SentenceSimilarityPipeline(Pipeline): if preprocessor is None: preprocessor = SequenceClassificationPreprocessor( sc_model.model_dir, - first_sequence='first_sequence', - second_sequence='second_sequence') + first_sequence=first_sequence, + second_sequence=second_sequence) + sc_model.eval() super().__init__(model=sc_model, preprocessor=preprocessor, **kwargs) assert hasattr(self.model, 'id2label'), \ 'id2label map should be initalizaed in init function.' - def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, str]: + def forward(self, inputs: Dict[str, Any], + **forward_params) -> Dict[str, Any]: + with torch.no_grad(): + return super().forward(inputs, **forward_params) + + def postprocess(self, inputs: Dict[str, Any], + **postprocess_params) -> Dict[str, str]: """process the prediction results Args: diff --git a/modelscope/pipelines/nlp/sentiment_classification_pipeline.py b/modelscope/pipelines/nlp/sentiment_classification_pipeline.py new file mode 100644 index 00000000..9291ed44 --- /dev/null +++ b/modelscope/pipelines/nlp/sentiment_classification_pipeline.py @@ -0,0 +1,77 @@ +import os +import uuid +from typing import Any, Dict, Union + +import json +import numpy as np +import torch + +from ...metainfo import Pipelines +from ...models import Model +from ...models.nlp import SbertForSentimentClassification +from ...preprocessors import SentimentClassificationPreprocessor +from ...utils.constant import Tasks +from ..base import Input, Pipeline +from ..builder import PIPELINES + +__all__ = ['SentimentClassificationPipeline'] + + +@PIPELINES.register_module( + Tasks.sentiment_classification, + module_name=Pipelines.sentiment_classification) +class SentimentClassificationPipeline(Pipeline): + + def __init__(self, + model: Union[SbertForSentimentClassification, str], + preprocessor: SentimentClassificationPreprocessor = None, + first_sequence='first_sequence', + second_sequence='second_sequence', + **kwargs): + """use `model` and `preprocessor` to create a nlp text classification pipeline for prediction + + Args: + model (SbertForSentimentClassification): a model instance + preprocessor (SentimentClassificationPreprocessor): a preprocessor instance + """ + assert isinstance(model, str) or isinstance(model, SbertForSentimentClassification), \ + 'model must be a single str or SbertForSentimentClassification' + model = model if isinstance( + model, + SbertForSentimentClassification) else Model.from_pretrained(model) + if preprocessor is None: + preprocessor = SentimentClassificationPreprocessor( + model.model_dir, + first_sequence=first_sequence, + second_sequence=second_sequence) + model.eval() + super().__init__(model=model, preprocessor=preprocessor, **kwargs) + assert len(model.id2label) > 0 + + def forward(self, inputs: Dict[str, Any], + **forward_params) -> Dict[str, Any]: + with torch.no_grad(): + return super().forward(inputs, **forward_params) + + def postprocess(self, + inputs: Dict[str, Any], + topk: int = 5) -> Dict[str, str]: + """process the prediction results + + Args: + inputs (Dict[str, Any]): _description_ + + Returns: + Dict[str, str]: the prediction results + """ + + probs = inputs['probabilities'][0] + num_classes = probs.shape[0] + topk = min(topk, num_classes) + top_indices = np.argpartition(probs, -topk)[-topk:] + cls_ids = top_indices[np.argsort(probs[top_indices])] + probs = probs[cls_ids].tolist() + + cls_names = [self.model.id2label[cid] for cid in cls_ids] + + return {'scores': probs, 'labels': cls_names} diff --git a/modelscope/pipelines/nlp/sequence_classification_pipeline.py b/modelscope/pipelines/nlp/sequence_classification_pipeline.py index 1dbe2efd..43c81d60 100644 --- a/modelscope/pipelines/nlp/sequence_classification_pipeline.py +++ b/modelscope/pipelines/nlp/sequence_classification_pipeline.py @@ -2,6 +2,7 @@ from typing import Any, Dict, Union import numpy as np +from modelscope.metainfo import Pipelines from modelscope.models.nlp import BertForSequenceClassification from modelscope.preprocessors import SequenceClassificationPreprocessor from modelscope.utils.constant import Tasks @@ -13,7 +14,7 @@ __all__ = ['SequenceClassificationPipeline'] @PIPELINES.register_module( - Tasks.text_classification, module_name=r'bert-sentiment-analysis') + Tasks.text_classification, module_name=Pipelines.sentiment_analysis) class SequenceClassificationPipeline(Pipeline): def __init__(self, diff --git a/modelscope/pipelines/nlp/space/dialog_state_tracking.py b/modelscope/pipelines/nlp/space/dialog_state_tracking.py deleted file mode 100644 index 4a943095..00000000 --- a/modelscope/pipelines/nlp/space/dialog_state_tracking.py +++ /dev/null @@ -1,46 +0,0 @@ -from typing import Any, Dict, Optional - -from modelscope.models.nlp import DialogModelingModel -from modelscope.preprocessors import DialogModelingPreprocessor -from modelscope.utils.constant import Tasks -from ...base import Pipeline, Tensor -from ...builder import PIPELINES - -__all__ = ['DialogStateTrackingPipeline'] - - -@PIPELINES.register_module( - Tasks.dialog_state_tracking, module_name=r'space-dst') -class DialogStateTrackingPipeline(Pipeline): - - def __init__(self, model: DialogModelingModel, - preprocessor: DialogModelingPreprocessor, **kwargs): - """use `model` and `preprocessor` to create a nlp text classification pipeline for prediction - - Args: - model (SequenceClassificationModel): a model instance - preprocessor (SequenceClassificationPreprocessor): a preprocessor instance - """ - - super().__init__(model=model, preprocessor=preprocessor, **kwargs) - self.model = model - self.preprocessor = preprocessor - - def postprocess(self, inputs: Dict[str, Tensor]) -> Dict[str, str]: - """process the prediction results - - Args: - inputs (Dict[str, Any]): _description_ - - Returns: - Dict[str, str]: the prediction results - """ - sys_rsp = self.preprocessor.text_field.tokenizer.convert_ids_to_tokens( - inputs['resp']) - assert len(sys_rsp) > 2 - sys_rsp = sys_rsp[1:len(sys_rsp) - 1] - # sys_rsp = self.preprocessor.text_field.tokenizer. - - inputs['sys'] = sys_rsp - - return inputs diff --git a/modelscope/pipelines/nlp/text_generation_pipeline.py b/modelscope/pipelines/nlp/text_generation_pipeline.py index 881e7ea6..8f55cce0 100644 --- a/modelscope/pipelines/nlp/text_generation_pipeline.py +++ b/modelscope/pipelines/nlp/text_generation_pipeline.py @@ -1,16 +1,20 @@ -from typing import Dict, Optional, Union +from typing import Any, Dict, Optional, Union -from modelscope.models import Model -from modelscope.models.nlp import PalmForTextGeneration -from modelscope.preprocessors import TextGenerationPreprocessor -from modelscope.utils.constant import Tasks +import torch + +from ...metainfo import Pipelines +from ...models import Model +from ...models.nlp import PalmForTextGeneration +from ...preprocessors import TextGenerationPreprocessor +from ...utils.constant import Tasks from ..base import Pipeline, Tensor from ..builder import PIPELINES __all__ = ['TextGenerationPipeline'] -@PIPELINES.register_module(Tasks.text_generation, module_name=r'palm2.0') +@PIPELINES.register_module( + Tasks.text_generation, module_name=Pipelines.text_generation) class TextGenerationPipeline(Pipeline): def __init__(self, @@ -31,10 +35,17 @@ class TextGenerationPipeline(Pipeline): model.tokenizer, first_sequence='sentence', second_sequence=None) + model.eval() super().__init__(model=model, preprocessor=preprocessor, **kwargs) self.tokenizer = model.tokenizer - def postprocess(self, inputs: Dict[str, Tensor]) -> Dict[str, str]: + def forward(self, inputs: Dict[str, Any], + **forward_params) -> Dict[str, Any]: + with torch.no_grad(): + return super().forward(inputs, **forward_params) + + def postprocess(self, inputs: Dict[str, Tensor], + **postprocess_params) -> Dict[str, str]: """process the prediction results Args: diff --git a/modelscope/pipelines/nlp/word_segmentation_pipeline.py b/modelscope/pipelines/nlp/word_segmentation_pipeline.py index 1cc08a38..9501efb7 100644 --- a/modelscope/pipelines/nlp/word_segmentation_pipeline.py +++ b/modelscope/pipelines/nlp/word_segmentation_pipeline.py @@ -1,9 +1,12 @@ from typing import Any, Dict, Optional, Union -from modelscope.models import Model -from modelscope.models.nlp import StructBertForTokenClassification -from modelscope.preprocessors import TokenClassifcationPreprocessor -from modelscope.utils.constant import Tasks +import torch + +from ...metainfo import Pipelines +from ...models import Model +from ...models.nlp import SbertForTokenClassification +from ...preprocessors import TokenClassifcationPreprocessor +from ...utils.constant import Tasks from ..base import Pipeline, Tensor from ..builder import PIPELINES @@ -11,12 +14,11 @@ __all__ = ['WordSegmentationPipeline'] @PIPELINES.register_module( - Tasks.word_segmentation, - module_name=r'structbert-chinese-word-segmentation') + Tasks.word_segmentation, module_name=Pipelines.word_segmentation) class WordSegmentationPipeline(Pipeline): def __init__(self, - model: Union[StructBertForTokenClassification, str], + model: Union[SbertForTokenClassification, str], preprocessor: Optional[TokenClassifcationPreprocessor] = None, **kwargs): """use `model` and `preprocessor` to create a nlp word segmentation pipeline for prediction @@ -27,15 +29,23 @@ class WordSegmentationPipeline(Pipeline): """ model = model if isinstance( model, - StructBertForTokenClassification) else Model.from_pretrained(model) + SbertForTokenClassification) else Model.from_pretrained(model) if preprocessor is None: preprocessor = TokenClassifcationPreprocessor(model.model_dir) + model.eval() super().__init__(model=model, preprocessor=preprocessor, **kwargs) self.tokenizer = preprocessor.tokenizer self.config = model.config + assert len(self.config.id2label) > 0 self.id2label = self.config.id2label - def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, str]: + def forward(self, inputs: Dict[str, Any], + **forward_params) -> Dict[str, Any]: + with torch.no_grad(): + return super().forward(inputs, **forward_params) + + def postprocess(self, inputs: Dict[str, Any], + **postprocess_params) -> Dict[str, str]: """process the prediction results Args: diff --git a/modelscope/pipelines/outputs.py b/modelscope/pipelines/outputs.py index 15d8a995..a950fa69 100644 --- a/modelscope/pipelines/outputs.py +++ b/modelscope/pipelines/outputs.py @@ -45,6 +45,12 @@ TASK_OUTPUTS = { Tasks.image_matting: ['output_png'], Tasks.image_generation: ['output_png'], + # action recognition result for single video + # { + # "output_label": "abseiling" + # } + Tasks.action_recognition: ['output_label'], + # pose estimation result for single sample # { # "poses": np.array with shape [num_pose, num_keypoint, 3], @@ -54,6 +60,13 @@ TASK_OUTPUTS = { # } Tasks.pose_estimation: ['poses', 'boxes'], + # ocr detection result for single sample + # { + # "det_polygons": np.array with shape [num_text, 8], each box is + # [x1, y1, x2, y2, x3, y3, x4, y4] + # } + Tasks.ocr_detection: ['det_polygons'], + # ============ nlp tasks =================== # text classification result for single sample @@ -69,6 +82,12 @@ TASK_OUTPUTS = { # } Tasks.text_generation: ['text'], + # fill mask result for single sample + # { + # "text": "this is the text which masks filled by model." + # } + Tasks.fill_mask: ['text'], + # word segmentation result for single sample # { # "output": "今天 天气 不错 , 适合 出去 游玩" @@ -82,6 +101,20 @@ TASK_OUTPUTS = { # } Tasks.sentence_similarity: ['scores', 'labels'], + # sentiment classification result for single sample + # { + # "labels": ["happy", "sad", "calm", "angry"], + # "scores": [0.9, 0.1, 0.05, 0.05] + # } + Tasks.sentiment_classification: ['scores', 'labels'], + + # nli result for single sample + # { + # "labels": ["happy", "sad", "calm", "angry"], + # "scores": [0.9, 0.1, 0.05, 0.05] + # } + Tasks.nli: ['scores', 'labels'], + # ============ audio tasks =================== # audio processed for single file in PCM format diff --git a/modelscope/pipelines/util.py b/modelscope/pipelines/util.py index 37c9c929..d034a7d4 100644 --- a/modelscope/pipelines/util.py +++ b/modelscope/pipelines/util.py @@ -2,8 +2,8 @@ import os.path as osp from typing import List, Union -from maas_hub.file_download import model_file_download - +from modelscope.hub.api import HubApi +from modelscope.hub.file_download import model_file_download from modelscope.utils.config import Config from modelscope.utils.constant import ModelFile from modelscope.utils.logger import get_logger @@ -20,31 +20,63 @@ def is_config_has_model(cfg_file): return False -def is_model_name(model: Union[str, List]): - """ whether model is a valid modelhub path +def is_official_hub_path(path: Union[str, List]): + """ Whether path is a official hub name or a valid local + path to official hub directory. + """ + + def is_official_hub_impl(path): + if osp.exists(path): + cfg_file = osp.join(path, ModelFile.CONFIGURATION) + return osp.exists(cfg_file) + else: + try: + _ = HubApi().get_model(path) + return True + except Exception: + return False + + if isinstance(path, str): + return is_official_hub_impl(path) + else: + results = [is_official_hub_impl(m) for m in path] + all_true = all(results) + any_true = any(results) + if any_true and not all_true: + raise ValueError( + f'some model are hub address, some are not, model list: {path}' + ) + + return all_true + + +def is_model(path: Union[str, List]): + """ whether path is a valid modelhub path and containing model config """ - def is_model_name_impl(model): - if osp.exists(model): - cfg_file = osp.join(model, ModelFile.CONFIGURATION) + def is_modelhub_path_impl(path): + if osp.exists(path): + cfg_file = osp.join(path, ModelFile.CONFIGURATION) if osp.exists(cfg_file): return is_config_has_model(cfg_file) else: return False else: try: - cfg_file = model_file_download(model, ModelFile.CONFIGURATION) + cfg_file = model_file_download(path, ModelFile.CONFIGURATION) return is_config_has_model(cfg_file) except Exception: return False - if isinstance(model, str): - return is_model_name_impl(model) + if isinstance(path, str): + return is_modelhub_path_impl(path) else: - results = [is_model_name_impl(m) for m in model] + results = [is_modelhub_path_impl(m) for m in path] all_true = all(results) any_true = any(results) if any_true and not all_true: - raise ValueError('some model are hub address, some are not') + raise ValueError( + f'some models are hub address, some are not, model list: {path}' + ) return all_true diff --git a/modelscope/preprocessors/__init__.py b/modelscope/preprocessors/__init__.py index 7b67507a..fe68173a 100644 --- a/modelscope/preprocessors/__init__.py +++ b/modelscope/preprocessors/__init__.py @@ -1,12 +1,12 @@ # Copyright (c) Alibaba, Inc. and its affiliates. -from .audio import LinearAECAndFbank +# from .audio import LinearAECAndFbank from .base import Preprocessor -from .builder import PREPROCESSORS, build_preprocessor +# from .builder import PREPROCESSORS, build_preprocessor from .common import Compose -from .image import LoadImage, load_image from .nlp import * # noqa F403 from .space.dialog_intent_prediction_preprocessor import * # noqa F403 from .space.dialog_modeling_preprocessor import * # noqa F403 from .space.dialog_state_tracking_preprocessor import * # noqa F403 -from .text_to_speech import * # noqa F403 + +# from .text_to_speech import * # noqa F403 diff --git a/modelscope/preprocessors/image.py b/modelscope/preprocessors/image.py index 6bd8aed5..b2123fb7 100644 --- a/modelscope/preprocessors/image.py +++ b/modelscope/preprocessors/image.py @@ -5,11 +5,12 @@ from typing import Dict, Union from PIL import Image, ImageOps from modelscope.fileio import File +from modelscope.metainfo import Preprocessors from modelscope.utils.constant import Fields from .builder import PREPROCESSORS -@PREPROCESSORS.register_module(Fields.cv) +@PREPROCESSORS.register_module(Fields.cv, Preprocessors.load_image) class LoadImage: """Load an image from file or url. Added or updated keys are "filename", "img", "img_shape", diff --git a/modelscope/pipelines/multi_modal/image_caption_pipeline.py b/modelscope/preprocessors/multi_modal.py similarity index 55% rename from modelscope/pipelines/multi_modal/image_caption_pipeline.py rename to modelscope/preprocessors/multi_modal.py index 3e5f49d0..7c8f0fab 100644 --- a/modelscope/pipelines/multi_modal/image_caption_pipeline.py +++ b/modelscope/preprocessors/multi_modal.py @@ -1,32 +1,48 @@ -from typing import Any, Dict +# Copyright (c) Alibaba, Inc. and its affiliates. +import os.path as osp +from typing import Any, Dict, Union import numpy as np import torch from PIL import Image -from modelscope.pipelines.base import Input -from modelscope.preprocessors import load_image -from modelscope.utils.constant import Tasks -from modelscope.utils.logger import get_logger -from ..base import Pipeline -from ..builder import PIPELINES +from modelscope.hub.snapshot_download import snapshot_download +from modelscope.metainfo import Preprocessors +from modelscope.utils.constant import Fields, ModelFile +from modelscope.utils.type_assert import type_assert +from .base import Preprocessor +from .builder import PREPROCESSORS +from .image import load_image -logger = get_logger() +__all__ = [ + 'OfaImageCaptionPreprocessor', +] -@PIPELINES.register_module(Tasks.image_captioning, module_name='ofa') -class ImageCaptionPipeline(Pipeline): - # TODO: refine using modelhub - def __init__(self, model: str, bpe_dir: str): - super().__init__() - # turn on cuda if GPU is available +@PREPROCESSORS.register_module( + Fields.multi_modal, module_name=Preprocessors.ofa_image_caption) +class OfaImageCaptionPreprocessor(Preprocessor): + + def __init__(self, model_dir: str, *args, **kwargs): + """preprocess the data via the vocab.txt from the `model_dir` path + + Args: + model_dir (str): model path + """ + super().__init__(*args, **kwargs) + + if osp.exists(model_dir): + local_model_dir = model_dir + else: + local_model_dir = snapshot_download(model_dir) + local_model = osp.join(local_model_dir, ModelFile.TORCH_MODEL_FILE) + bpe_dir = local_model_dir + from fairseq import checkpoint_utils, tasks, utils from ofa.tasks.mm_tasks import CaptionTask tasks.register_task('caption', CaptionTask) - use_cuda = False - # use fp16 only when GPU is available - use_fp16 = False + overrides = { 'bpe_dir': bpe_dir, 'eval_cider': False, @@ -35,21 +51,9 @@ class ImageCaptionPipeline(Pipeline): 'no_repeat_ngram_size': 3, 'seed': 7 } - models, cfg, task = checkpoint_utils.load_model_ensemble_and_task( - utils.split_paths(model), arg_overrides=overrides) - - # Move models to GPU - for model in models: - model.eval() - if use_cuda: - model.cuda() - if use_fp16: - model.half() - model.prepare_for_inference_(cfg) - self.models = models - # Initialize generator - self.generator = task.build_generator(models, cfg.generation) - + model, cfg, task = checkpoint_utils.load_model_ensemble_and_task( + utils.split_paths(local_model), arg_overrides=overrides) + del model # Initialize transform from torchvision import transforms mean = [0.5, 0.5, 0.5] @@ -69,7 +73,8 @@ class ImageCaptionPipeline(Pipeline): self.eos_item = torch.LongTensor([task.src_dict.eos()]) self.pad_idx = task.src_dict.pad() - def preprocess(self, input: Input) -> Dict[str, Any]: + @type_assert(object, (str, tuple, Image.Image)) + def __call__(self, data: Union[str, tuple]) -> Dict[str, Any]: def encode_text(text, length=None, append_bos=False, append_eos=False): s = self.task.tgt_dict.encode_line( @@ -84,11 +89,11 @@ class ImageCaptionPipeline(Pipeline): s = torch.cat([s, self.eos_item]) return s - if isinstance(input, Image.Image): - patch_image = self.patch_resize_transform(input).unsqueeze(0) + if isinstance(data, Image.Image): + patch_image = self.patch_resize_transform(data).unsqueeze(0) else: patch_image = self.patch_resize_transform( - load_image(input)).unsqueeze(0) + load_image(data)).unsqueeze(0) patch_mask = torch.tensor([True]) text = 'what does the image describe?' src_text = encode_text( @@ -105,17 +110,3 @@ class ImageCaptionPipeline(Pipeline): } } return sample - - def forward(self, input: Dict[str, Any]) -> Dict[str, Any]: - from ofa.utils.eval_utils import eval_caption - - results, _ = eval_caption(self.task, self.generator, self.models, - input) - return { - 'image_id': results[0]['image_id'], - 'caption': results[0]['caption'] - } - - def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, Any]: - # What should we do here ? - return inputs diff --git a/modelscope/preprocessors/nlp.py b/modelscope/preprocessors/nlp.py index 9bcaa87c..f998da37 100644 --- a/modelscope/preprocessors/nlp.py +++ b/modelscope/preprocessors/nlp.py @@ -5,14 +5,17 @@ from typing import Any, Dict, Union from transformers import AutoTokenizer -from modelscope.utils.constant import Fields, InputFields -from modelscope.utils.type_assert import type_assert +from ..metainfo import Models, Preprocessors +from ..utils.constant import Fields, InputFields +from ..utils.type_assert import type_assert from .base import Preprocessor from .builder import PREPROCESSORS __all__ = [ 'Tokenize', 'SequenceClassificationPreprocessor', - 'TextGenerationPreprocessor', 'TokenClassifcationPreprocessor' + 'TextGenerationPreprocessor', 'TokenClassifcationPreprocessor', + 'NLIPreprocessor', 'SentimentClassificationPreprocessor', + 'FillMaskPreprocessor' ] @@ -31,7 +34,141 @@ class Tokenize(Preprocessor): @PREPROCESSORS.register_module( - Fields.nlp, module_name=r'bert-sequence-classification') + Fields.nlp, module_name=Preprocessors.nli_tokenizer) +class NLIPreprocessor(Preprocessor): + + def __init__(self, model_dir: str, *args, **kwargs): + """preprocess the data via the vocab.txt from the `model_dir` path + + Args: + model_dir (str): model path + """ + + super().__init__(*args, **kwargs) + + from sofa import SbertTokenizer + self.model_dir: str = model_dir + self.first_sequence: str = kwargs.pop('first_sequence', + 'first_sequence') + self.second_sequence = kwargs.pop('second_sequence', 'second_sequence') + self.sequence_length = kwargs.pop('sequence_length', 128) + + self.tokenizer = SbertTokenizer.from_pretrained(self.model_dir) + + @type_assert(object, tuple) + def __call__(self, data: tuple) -> Dict[str, Any]: + """process the raw input data + + Args: + data (tuple): [sentence1, sentence2] + sentence1 (str): a sentence + Example: + 'you are so handsome.' + sentence2 (str): a sentence + Example: + 'you are so beautiful.' + Returns: + Dict[str, Any]: the preprocessed data + """ + sentence1, sentence2 = data + new_data = { + self.first_sequence: sentence1, + self.second_sequence: sentence2 + } + # preprocess the data for the model input + + rst = { + 'id': [], + 'input_ids': [], + 'attention_mask': [], + 'token_type_ids': [] + } + + max_seq_length = self.sequence_length + + text_a = new_data[self.first_sequence] + text_b = new_data[self.second_sequence] + feature = self.tokenizer( + text_a, + text_b, + padding=False, + truncation=True, + max_length=max_seq_length) + + rst['id'].append(new_data.get('id', str(uuid.uuid4()))) + rst['input_ids'].append(feature['input_ids']) + rst['attention_mask'].append(feature['attention_mask']) + rst['token_type_ids'].append(feature['token_type_ids']) + + return rst + + +@PREPROCESSORS.register_module( + Fields.nlp, module_name=Preprocessors.sen_cls_tokenizer) +class SentimentClassificationPreprocessor(Preprocessor): + + def __init__(self, model_dir: str, *args, **kwargs): + """preprocess the data via the vocab.txt from the `model_dir` path + + Args: + model_dir (str): model path + """ + + super().__init__(*args, **kwargs) + + from sofa import SbertTokenizer + self.model_dir: str = model_dir + self.first_sequence: str = kwargs.pop('first_sequence', + 'first_sequence') + self.second_sequence = kwargs.pop('second_sequence', 'second_sequence') + self.sequence_length = kwargs.pop('sequence_length', 128) + + self.tokenizer = SbertTokenizer.from_pretrained(self.model_dir) + + @type_assert(object, str) + def __call__(self, data: str) -> Dict[str, Any]: + """process the raw input data + + Args: + data (str): a sentence + Example: + 'you are so handsome.' + Returns: + Dict[str, Any]: the preprocessed data + """ + + new_data = {self.first_sequence: data} + # preprocess the data for the model input + + rst = { + 'id': [], + 'input_ids': [], + 'attention_mask': [], + 'token_type_ids': [] + } + + max_seq_length = self.sequence_length + + text_a = new_data[self.first_sequence] + + text_b = new_data.get(self.second_sequence, None) + feature = self.tokenizer( + text_a, + text_b, + padding='max_length', + truncation=True, + max_length=max_seq_length) + + rst['id'].append(new_data.get('id', str(uuid.uuid4()))) + rst['input_ids'].append(feature['input_ids']) + rst['attention_mask'].append(feature['attention_mask']) + rst['token_type_ids'].append(feature['token_type_ids']) + + return rst + + +@PREPROCESSORS.register_module( + Fields.nlp, module_name=Preprocessors.bert_seq_cls_tokenizer) class SequenceClassificationPreprocessor(Preprocessor): def __init__(self, model_dir: str, *args, **kwargs): @@ -53,12 +190,12 @@ class SequenceClassificationPreprocessor(Preprocessor): self.tokenizer = AutoTokenizer.from_pretrained(self.model_dir) print(f'this is the tokenzier {self.tokenizer}') - @type_assert(object, (str, tuple)) - def __call__(self, data: Union[str, tuple]) -> Dict[str, Any]: + @type_assert(object, (str, tuple, Dict)) + def __call__(self, data: Union[str, tuple, Dict]) -> Dict[str, Any]: """process the raw input data Args: - data (str or tuple): + data (str or tuple, Dict): sentence1 (str): a sentence Example: 'you are so handsome.' @@ -70,22 +207,31 @@ class SequenceClassificationPreprocessor(Preprocessor): sentence2 (str): a sentence Example: 'you are so beautiful.' + or + {field1: field_value1, field2: field_value2} + field1 (str): field name, default 'first_sequence' + field_value1 (str): a sentence + Example: + 'you are so handsome.' + + field2 (str): field name, default 'second_sequence' + field_value2 (str): a sentence + Example: + 'you are so beautiful.' Returns: Dict[str, Any]: the preprocessed data """ - - if not isinstance(data, tuple): - data = ( - data, - None, - ) - - sentence1, sentence2 = data - new_data = { - self.first_sequence: sentence1, - self.second_sequence: sentence2 - } + if isinstance(data, str): + new_data = {self.first_sequence: data} + elif isinstance(data, tuple): + sentence1, sentence2 = data + new_data = { + self.first_sequence: sentence1, + self.second_sequence: sentence2 + } + else: + new_data = data # preprocess the data for the model input @@ -115,7 +261,8 @@ class SequenceClassificationPreprocessor(Preprocessor): return rst -@PREPROCESSORS.register_module(Fields.nlp, module_name=r'palm2.0') +@PREPROCESSORS.register_module( + Fields.nlp, module_name=Preprocessors.palm_text_gen_tokenizer) class TextGenerationPreprocessor(Preprocessor): def __init__(self, model_dir: str, tokenizer, *args, **kwargs): @@ -166,12 +313,66 @@ class TextGenerationPreprocessor(Preprocessor): rst['input_ids'].append(feature['input_ids']) rst['attention_mask'].append(feature['attention_mask']) + return {k: torch.tensor(v) for k, v in rst.items()} + + +@PREPROCESSORS.register_module(Fields.nlp) +class FillMaskPreprocessor(Preprocessor): + + def __init__(self, model_dir: str, *args, **kwargs): + """preprocess the data via the vocab.txt from the `model_dir` path + + Args: + model_dir (str): model path + """ + super().__init__(*args, **kwargs) + from sofa.utils.backend import AutoTokenizer + self.model_dir = model_dir + self.first_sequence: str = kwargs.pop('first_sequence', + 'first_sequence') + self.sequence_length = kwargs.pop('sequence_length', 128) + + self.tokenizer = AutoTokenizer.from_pretrained( + model_dir, use_fast=False) + + @type_assert(object, str) + def __call__(self, data: str) -> Dict[str, Any]: + """process the raw input data + + Args: + data (str): a sentence + Example: + 'you are so handsome.' + + Returns: + Dict[str, Any]: the preprocessed data + """ + import torch + + new_data = {self.first_sequence: data} + # preprocess the data for the model input + + rst = {'input_ids': [], 'attention_mask': [], 'token_type_ids': []} + + max_seq_length = self.sequence_length + + text_a = new_data[self.first_sequence] + feature = self.tokenizer( + text_a, + padding='max_length', + truncation=True, + max_length=max_seq_length, + return_token_type_ids=True) + + rst['input_ids'].append(feature['input_ids']) + rst['attention_mask'].append(feature['attention_mask']) + rst['token_type_ids'].append(feature['token_type_ids']) return {k: torch.tensor(v) for k, v in rst.items()} @PREPROCESSORS.register_module( - Fields.nlp, module_name=r'bert-token-classification') + Fields.nlp, module_name=Preprocessors.token_cls_tokenizer) class TokenClassifcationPreprocessor(Preprocessor): def __init__(self, model_dir: str, *args, **kwargs): @@ -199,6 +400,7 @@ class TokenClassifcationPreprocessor(Preprocessor): Returns: Dict[str, Any]: the preprocessed data """ + # preprocess the data for the model input text = data.replace(' ', '').strip() diff --git a/modelscope/preprocessors/space/dialog_intent_prediction_preprocessor.py b/modelscope/preprocessors/space/dialog_intent_prediction_preprocessor.py index c5a6b34c..733abf24 100644 --- a/modelscope/preprocessors/space/dialog_intent_prediction_preprocessor.py +++ b/modelscope/preprocessors/space/dialog_intent_prediction_preprocessor.py @@ -3,13 +3,12 @@ import os from typing import Any, Dict -from modelscope.preprocessors.space.fields.intent_field import \ - IntentBPETextField -from modelscope.utils.config import Config -from modelscope.utils.constant import Fields -from modelscope.utils.type_assert import type_assert +from ...utils.config import Config +from ...utils.constant import Fields +from ...utils.type_assert import type_assert from ..base import Preprocessor from ..builder import PREPROCESSORS +from .fields.intent_field import IntentBPETextField __all__ = ['DialogIntentPredictionPreprocessor'] diff --git a/modelscope/preprocessors/space/dialog_modeling_preprocessor.py b/modelscope/preprocessors/space/dialog_modeling_preprocessor.py index 5061ba35..b0758b40 100644 --- a/modelscope/preprocessors/space/dialog_modeling_preprocessor.py +++ b/modelscope/preprocessors/space/dialog_modeling_preprocessor.py @@ -1,16 +1,14 @@ # Copyright (c) Alibaba, Inc. and its affiliates. import os -import uuid -from typing import Any, Dict, Union - -from modelscope.preprocessors.space.fields.gen_field import \ - MultiWOZBPETextField -from modelscope.utils.config import Config -from modelscope.utils.constant import Fields, InputFields -from modelscope.utils.type_assert import type_assert +from typing import Any, Dict + +from ...utils.config import Config +from ...utils.constant import Fields +from ...utils.type_assert import type_assert from ..base import Preprocessor from ..builder import PREPROCESSORS +from .fields.gen_field import MultiWOZBPETextField __all__ = ['DialogModelingPreprocessor'] diff --git a/modelscope/preprocessors/space/fields/dst_processors.py b/modelscope/preprocessors/space/fields/dst_processors.py index 6d888bff..c5c81f66 100644 --- a/modelscope/preprocessors/space/fields/dst_processors.py +++ b/modelscope/preprocessors/space/fields/dst_processors.py @@ -154,14 +154,16 @@ utter3 = { 'User-2': 'I am looking for an expensive indian restaurant in the area of centre.', 'System-2': - 'Might I recommend Saffron Brasserie? That is an expensive Indian restaurant in the center of town. I can book a table for you, if you like.', + 'Might I recommend Saffron Brasserie? That is an expensive Indian restaurant ' + 'in the center of town. I can book a table for you, if you like.', 'Dialog_Act-2': { 'Restaurant-Recommend': [['area', 'center of town'], ['food', 'Indian'], ['name', 'Saffron Brasserie'], ['pricerange', 'expensive']] }, - 'User-3': 'Sure thing, please book for 6 people at 19:30 on Saturday.' + 'User-3': + 'Sure thing, please book for 6 people at 19:30 on Saturday.' } history_states3 = [{}, { @@ -346,7 +348,6 @@ history_states3 = [{}, { class DSTProcessor(object): - ACTS_DICT = { 'taxi-depart': 'taxi-departure', 'taxi-dest': 'taxi-destination', @@ -380,7 +381,8 @@ class DSTProcessor(object): def _convert_inputs_to_utterances(self, inputs: dict, history_states: list): - """This method is to generate the utterances with user, sys, dialog_acts and metadata, while metadata is from the history_states or the output from the inference pipline""" + """This method is to generate the utterances with user, sys, dialog_acts and metadata, + while metadata is from the history_states or the output from the inference pipline""" utterances = [] user_inputs = [] @@ -427,8 +429,8 @@ class DSTProcessor(object): if isinstance(item, dict): for a in item: aa = a.lower().split('-') - if aa[1] == 'inform' or aa[1] == 'recommend' or aa[ - 1] == 'select' or aa[1] == 'book': + if aa[1] == 'inform' or aa[1] == 'recommend' or \ + aa[1] == 'select' or aa[1] == 'book': for i in item[a]: s = i[0].lower() v = i[1].lower().strip() @@ -443,7 +445,7 @@ class DSTProcessor(object): if key not in s_dict: s_dict[key] = list([v]) # ... Option 2: Keep last informed value - #s_dict[key] = list([v]) + # s_dict[key] = list([v]) return s_dict @@ -454,26 +456,26 @@ class multiwoz22Processor(DSTProcessor): super().__init__() def normalize_time(self, text): - text = re.sub('(\d{1})(a\.?m\.?|p\.?m\.?)', r'\1 \2', + text = re.sub(r'(\d{1})(a\.?m\.?|p\.?m\.?)', r'\1 \2', text) # am/pm without space - text = re.sub('(^| )(\d{1,2}) (a\.?m\.?|p\.?m\.?)', r'\1\2:00 \3', + text = re.sub(r'(^| )(\d{1,2}) (a\.?m\.?|p\.?m\.?)', r'\1\2:00 \3', text) # am/pm short to long form text = re.sub( - '(^| )(at|from|by|until|after) ?(\d{1,2}) ?(\d{2})([^0-9]|$)', + r'(^| )(at|from|by|until|after) ?(\d{1,2}) ?(\d{2})([^0-9]|$)', r'\1\2 \3:\4\5', text) # Missing separator - text = re.sub('(^| )(\d{2})[;.,](\d{2})', r'\1\2:\3', + text = re.sub(r'(^| )(\d{2})[;.,](\d{2})', r'\1\2:\3', text) # Wrong separator - text = re.sub('(^| )(at|from|by|until|after) ?(\d{1,2})([;., ]|$)', + text = re.sub(r'(^| )(at|from|by|until|after) ?(\d{1,2})([;., ]|$)', r'\1\2 \3:00\4', text) # normalize simple full hour time - text = re.sub('(^| )(\d{1}:\d{2})', r'\g<1>0\2', + text = re.sub(r'(^| )(\d{1}:\d{2})', r'\g<1>0\2', text) # Add missing leading 0 # Map 12 hour times to 24 hour times - text = re.sub( - '(\d{2})(:\d{2}) ?p\.?m\.?', lambda x: str( - int(x.groups()[0]) + 12 - if int(x.groups()[0]) < 12 else int(x.groups()[0])) + x.groups( - )[1], text) - text = re.sub('(^| )24:(\d{2})', r'\g<1>00:\2', + text = \ + re.sub( + r'(\d{2})(:\d{2}) ?p\.?m\.?', + lambda x: str(int(x.groups()[0]) + 12 + if int(x.groups()[0]) < 12 else int(x.groups()[0])) + x.groups()[1], text) + text = re.sub(r'(^| )24:(\d{2})', r'\g<1>00:\2', text) # Correct times that use 24 as hour return text @@ -508,8 +510,8 @@ class multiwoz22Processor(DSTProcessor): if isinstance(acts[d][t]['dialog_act'], dict): for a in acts[d][t]['dialog_act']: aa = a.lower().split('-') - if aa[1] == 'inform' or aa[1] == 'recommend' or aa[ - 1] == 'select' or aa[1] == 'book': + if aa[1] == 'inform' or aa[1] == 'recommend' \ + or aa[1] == 'select' or aa[1] == 'book': for i in acts[d][t]['dialog_act'][a]: s = i[0].lower() v = i[1].lower().strip() @@ -524,7 +526,7 @@ class multiwoz22Processor(DSTProcessor): if key not in s_dict: s_dict[key] = list([v]) # ... Option 2: Keep last informed value - #s_dict[key] = list([v]) + # s_dict[key] = list([v]) return s_dict # This should only contain label normalizations. All other mappings should @@ -560,7 +562,7 @@ class multiwoz22Processor(DSTProcessor): utt_lower = convert_to_unicode(utt).lower() utt_lower = self.normalize_text(utt_lower) utt_tok = [ - tok for tok in map(str.strip, re.split('(\W+)', utt_lower)) + tok for tok in map(str.strip, re.split(r'(\W+)', utt_lower)) if len(tok) > 0 ] return utt_tok @@ -582,7 +584,7 @@ class multiwoz22Processor(DSTProcessor): find_pos = [] found = False label_list = [ - item for item in map(str.strip, re.split('(\W+)', value_label)) + item for item in map(str.strip, re.split(r'(\W+)', value_label)) if len(item) > 0 ] len_label = len(label_list) @@ -633,11 +635,11 @@ class multiwoz22Processor(DSTProcessor): def is_in_list(self, tok, value): found = False tok_list = [ - item for item in map(str.strip, re.split('(\W+)', tok)) + item for item in map(str.strip, re.split(r'(\W+)', tok)) if len(item) > 0 ] value_list = [ - item for item in map(str.strip, re.split('(\W+)', value)) + item for item in map(str.strip, re.split(r'(\W+)', value)) if len(item) > 0 ] tok_len = len(tok_list) @@ -938,8 +940,8 @@ class multiwoz22Processor(DSTProcessor): if slot not in diag_seen_slots_dict or value_label != diag_seen_slots_value_dict[ slot]: print('(%s): %s, ' % (slot, value_label), end='') - elif slot in diag_seen_slots_dict and class_type == diag_seen_slots_dict[ - slot] and class_type != 'copy_value' and class_type != 'inform': + elif slot in diag_seen_slots_dict and class_type == diag_seen_slots_dict[slot] \ + and class_type != 'copy_value' and class_type != 'inform': # If slot has seen before and its class type did not change, label this slot a not present, # assuming that the slot has not actually been mentioned in this turn. # Exceptions are copy_value and inform. If a seen slot has been tagged as copy_value or inform, @@ -1262,7 +1264,7 @@ def convert_examples_to_features(examples, def _get_start_end_pos(class_type, token_label_ids, max_seq_length): if class_type == 'copy_value' and 1 not in token_label_ids: - #logger.warn("copy_value label, but token_label not detected. Setting label to 'none'.") + # logger.warn("copy_value label, but token_label not detected. Setting label to 'none'.") class_type = 'none' start_pos = 0 end_pos = 0 diff --git a/modelscope/preprocessors/space/fields/gen_field.py b/modelscope/preprocessors/space/fields/gen_field.py index 7012697f..49a30e8f 100644 --- a/modelscope/preprocessors/space/fields/gen_field.py +++ b/modelscope/preprocessors/space/fields/gen_field.py @@ -8,10 +8,10 @@ from itertools import chain import numpy as np -from modelscope.preprocessors.space.tokenizer import Tokenizer -from modelscope.utils.nlp.space import ontology, utils -from modelscope.utils.nlp.space.db_ops import MultiWozDB -from modelscope.utils.nlp.space.utils import list2np +from ....utils.nlp.space import ontology, utils +from ....utils.nlp.space.db_ops import MultiWozDB +from ....utils.nlp.space.utils import list2np +from ..tokenizer import Tokenizer class BPETextField(object): diff --git a/modelscope/preprocessors/space/fields/intent_field.py b/modelscope/preprocessors/space/fields/intent_field.py index 9907165e..35e1693c 100644 --- a/modelscope/preprocessors/space/fields/intent_field.py +++ b/modelscope/preprocessors/space/fields/intent_field.py @@ -14,10 +14,10 @@ import json import numpy as np from tqdm import tqdm -from modelscope.preprocessors.space.tokenizer import Tokenizer -from modelscope.utils.nlp.space import ontology, utils -from modelscope.utils.nlp.space.scores import hierarchical_set_score -from modelscope.utils.nlp.space.utils import list2np +from ....utils.nlp.space import ontology, utils +from ....utils.nlp.space.scores import hierarchical_set_score +from ....utils.nlp.space.utils import list2np +from ..tokenizer import Tokenizer class BPETextField(object): diff --git a/modelscope/preprocessors/text_to_speech.py b/modelscope/preprocessors/text_to_speech.py index fd41b752..9d8af6fa 100644 --- a/modelscope/preprocessors/text_to_speech.py +++ b/modelscope/preprocessors/text_to_speech.py @@ -2,9 +2,8 @@ import io from typing import Any, Dict, Union -import ttsfrd - from modelscope.fileio import File +from modelscope.metainfo import Preprocessors from modelscope.models.audio.tts.frontend import GenericTtsFrontend from modelscope.models.base import Model from modelscope.utils.audio.tts_exceptions import * # noqa F403 @@ -12,11 +11,11 @@ from modelscope.utils.constant import Fields from .base import Preprocessor from .builder import PREPROCESSORS -__all__ = ['TextToTacotronSymbols', 'text_to_tacotron_symbols'] +__all__ = ['TextToTacotronSymbols'] @PREPROCESSORS.register_module( - Fields.audio, module_name=r'text_to_tacotron_symbols') + Fields.audio, module_name=Preprocessors.text_to_tacotron_symbols) class TextToTacotronSymbols(Preprocessor): """extract tacotron symbols from text. diff --git a/modelscope/preprocessors/video.py b/modelscope/preprocessors/video.py new file mode 100644 index 00000000..262fdaa5 --- /dev/null +++ b/modelscope/preprocessors/video.py @@ -0,0 +1,232 @@ +import math +import os +import random + +import decord +import numpy as np +import torch +import torch.nn as nn +import torch.utils.data +import torch.utils.dlpack as dlpack +import torchvision.transforms._transforms_video as transforms +from decord import VideoReader +from torchvision.transforms import Compose + + +def ReadVideoData(cfg, video_path): + """ simple interface to load video frames from file + + Args: + cfg (Config): The global config object. + video_path (str): video file path + """ + data = _decode_video(cfg, video_path) + transform = kinetics400_tranform(cfg) + data_list = [] + for i in range(data.size(0)): + for j in range(cfg.TEST.NUM_SPATIAL_CROPS): + transform.transforms[1].set_spatial_index(j) + data_list.append(transform(data[i])) + return torch.stack(data_list, dim=0) + + +def kinetics400_tranform(cfg): + """ + Configs the transform for the kinetics-400 dataset. + We apply controlled spatial cropping and normalization. + Args: + cfg (Config): The global config object. + """ + resize_video = KineticsResizedCrop( + short_side_range=[cfg.DATA.TEST_SCALE, cfg.DATA.TEST_SCALE], + crop_size=cfg.DATA.TEST_CROP_SIZE, + num_spatial_crops=cfg.TEST.NUM_SPATIAL_CROPS) + std_transform_list = [ + transforms.ToTensorVideo(), resize_video, + transforms.NormalizeVideo( + mean=cfg.DATA.MEAN, std=cfg.DATA.STD, inplace=True) + ] + return Compose(std_transform_list) + + +def _interval_based_sampling(vid_length, vid_fps, target_fps, clip_idx, + num_clips, num_frames, interval, minus_interval): + """ + Generates the frame index list using interval based sampling. + Args: + vid_length (int): the length of the whole video (valid selection range). + vid_fps (int): the original video fps + target_fps (int): the normalized video fps + clip_idx (int): -1 for random temporal sampling, and positive values for + sampling specific clip from the video + num_clips (int): the total clips to be sampled from each video. + combined with clip_idx, the sampled video is the "clip_idx-th" + video from "num_clips" videos. + num_frames (int): number of frames in each sampled clips. + interval (int): the interval to sample each frame. + minus_interval (bool): control the end index + Returns: + index (tensor): the sampled frame indexes + """ + if num_frames == 1: + index = [random.randint(0, vid_length - 1)] + else: + # transform FPS + clip_length = num_frames * interval * vid_fps / target_fps + + max_idx = max(vid_length - clip_length, 0) + start_idx = clip_idx * math.floor(max_idx / (num_clips - 1)) + if minus_interval: + end_idx = start_idx + clip_length - interval + else: + end_idx = start_idx + clip_length - 1 + + index = torch.linspace(start_idx, end_idx, num_frames) + index = torch.clamp(index, 0, vid_length - 1).long() + + return index + + +def _decode_video_frames_list(cfg, frames_list, vid_fps): + """ + Decodes the video given the numpy frames. + Args: + cfg (Config): The global config object. + frames_list (list): all frames for a video, the frames should be numpy array. + vid_fps (int): the fps of this video. + Returns: + frames (Tensor): video tensor data + """ + assert isinstance(frames_list, list) + num_clips_per_video = cfg.TEST.NUM_ENSEMBLE_VIEWS + + frame_list = [] + for clip_idx in range(num_clips_per_video): + # for each clip in the video, + # a list is generated before decoding the specified frames from the video + list_ = _interval_based_sampling( + len(frames_list), vid_fps, cfg.DATA.TARGET_FPS, clip_idx, + num_clips_per_video, cfg.DATA.NUM_INPUT_FRAMES, + cfg.DATA.SAMPLING_RATE, cfg.DATA.MINUS_INTERVAL) + frames = None + frames = torch.from_numpy( + np.stack([frames_list[l_index] for l_index in list_.tolist()], + axis=0)) + frame_list.append(frames) + frames = torch.stack(frame_list) + if num_clips_per_video == 1: + frames = frames.squeeze(0) + + return frames + + +def _decode_video(cfg, path): + """ + Decodes the video given the numpy frames. + Args: + path (str): video file path. + Returns: + frames (Tensor): video tensor data + """ + vr = VideoReader(path) + + num_clips_per_video = cfg.TEST.NUM_ENSEMBLE_VIEWS + + frame_list = [] + for clip_idx in range(num_clips_per_video): + # for each clip in the video, + # a list is generated before decoding the specified frames from the video + list_ = _interval_based_sampling( + len(vr), vr.get_avg_fps(), cfg.DATA.TARGET_FPS, clip_idx, + num_clips_per_video, cfg.DATA.NUM_INPUT_FRAMES, + cfg.DATA.SAMPLING_RATE, cfg.DATA.MINUS_INTERVAL) + frames = None + if path.endswith('.avi'): + append_list = torch.arange(0, list_[0], 4) + frames = dlpack.from_dlpack( + vr.get_batch(torch.cat([append_list, + list_])).to_dlpack()).clone() + frames = frames[append_list.shape[0]:] + else: + frames = dlpack.from_dlpack( + vr.get_batch(list_).to_dlpack()).clone() + frame_list.append(frames) + frames = torch.stack(frame_list) + if num_clips_per_video == 1: + frames = frames.squeeze(0) + del vr + return frames + + +class KineticsResizedCrop(object): + """Perform resize and crop for kinetics-400 dataset + Args: + short_side_range (list): The length of short side range. In inference, this shoudle be [256, 256] + crop_size (int): The cropped size for frames. + num_spatial_crops (int): The number of the cropped spatial regions in each video. + """ + + def __init__( + self, + short_side_range, + crop_size, + num_spatial_crops=1, + ): + self.idx = -1 + self.short_side_range = short_side_range + self.crop_size = int(crop_size) + self.num_spatial_crops = num_spatial_crops + + def _get_controlled_crop(self, clip): + """Perform controlled crop for video tensor. + Args: + clip (Tensor): the video data, the shape is [T, C, H, W] + """ + _, _, clip_height, clip_width = clip.shape + + length = self.short_side_range[0] + + if clip_height < clip_width: + new_clip_height = int(length) + new_clip_width = int(clip_width / clip_height * new_clip_height) + new_clip = torch.nn.functional.interpolate( + clip, size=(new_clip_height, new_clip_width), mode='bilinear') + else: + new_clip_width = int(length) + new_clip_height = int(clip_height / clip_width * new_clip_width) + new_clip = torch.nn.functional.interpolate( + clip, size=(new_clip_height, new_clip_width), mode='bilinear') + x_max = int(new_clip_width - self.crop_size) + y_max = int(new_clip_height - self.crop_size) + if self.num_spatial_crops == 1: + x = x_max // 2 + y = y_max // 2 + elif self.num_spatial_crops == 3: + if self.idx == 0: + if new_clip_width == length: + x = x_max // 2 + y = 0 + elif new_clip_height == length: + x = 0 + y = y_max // 2 + elif self.idx == 1: + x = x_max // 2 + y = y_max // 2 + elif self.idx == 2: + if new_clip_width == length: + x = x_max // 2 + y = y_max + elif new_clip_height == length: + x = x_max + y = y_max // 2 + return new_clip[:, :, y:y + self.crop_size, x:x + self.crop_size] + + def set_spatial_index(self, idx): + """Set the spatial cropping index for controlled cropping.. + Args: + idx (int): the spatial index. The value should be in [0, 1, 2], means [left, center, right], respectively. + """ + self.idx = idx + + def __call__(self, clip): + return self._get_controlled_crop(clip) diff --git a/modelscope/pydatasets/config.py b/modelscope/pydatasets/config.py new file mode 100644 index 00000000..e916b3ec --- /dev/null +++ b/modelscope/pydatasets/config.py @@ -0,0 +1,22 @@ +import os +from pathlib import Path + +# Cache location +DEFAULT_CACHE_HOME = '~/.cache' +CACHE_HOME = os.getenv('CACHE_HOME', DEFAULT_CACHE_HOME) +DEFAULT_MS_CACHE_HOME = os.path.join(CACHE_HOME, 'modelscope/hub') +MS_CACHE_HOME = os.path.expanduser( + os.getenv('MS_CACHE_HOME', DEFAULT_MS_CACHE_HOME)) + +DEFAULT_MS_DATASETS_CACHE = os.path.join(MS_CACHE_HOME, 'datasets') +MS_DATASETS_CACHE = Path( + os.getenv('MS_DATASETS_CACHE', DEFAULT_MS_DATASETS_CACHE)) + +DOWNLOADED_DATASETS_DIR = 'downloads' +DEFAULT_DOWNLOADED_DATASETS_PATH = os.path.join(MS_DATASETS_CACHE, + DOWNLOADED_DATASETS_DIR) +DOWNLOADED_DATASETS_PATH = Path( + os.getenv('DOWNLOADED_DATASETS_PATH', DEFAULT_DOWNLOADED_DATASETS_PATH)) + +MS_HUB_ENDPOINT = os.environ.get('MS_HUB_ENDPOINT', + 'http://101.201.119.157:31752') diff --git a/modelscope/pydatasets/py_dataset.py b/modelscope/pydatasets/py_dataset.py index 78aedaa0..49137253 100644 --- a/modelscope/pydatasets/py_dataset.py +++ b/modelscope/pydatasets/py_dataset.py @@ -1,64 +1,81 @@ -from typing import (Any, Callable, Dict, List, Mapping, Optional, Sequence, - Union) +import os +from typing import (Any, Callable, Dict, Iterable, List, Mapping, Optional, + Sequence, Union) -from datasets import Dataset, load_dataset +import numpy as np +from datasets import Dataset +from datasets import load_dataset as hf_load_dataset +from datasets.config import TF_AVAILABLE, TORCH_AVAILABLE +from datasets.packaged_modules import _PACKAGED_DATASETS_MODULES +from datasets.utils.file_utils import (is_relative_path, + relative_to_absolute_path) +from modelscope.pydatasets.config import MS_DATASETS_CACHE +from modelscope.pydatasets.utils.ms_api import MsApi from modelscope.utils.constant import Hubs from modelscope.utils.logger import get_logger logger = get_logger() +def format_list(para) -> List: + if para is None: + para = [] + elif isinstance(para, str): + para = [para] + elif len(set(para)) < len(para): + raise ValueError(f'List columns contains duplicates: {para}') + return para + + class PyDataset: _hf_ds = None # holds the underlying HuggingFace Dataset """A PyDataset backed by hugging face Dataset.""" - def __init__(self, hf_ds: Dataset): + def __init__(self, hf_ds: Dataset, target: Optional[str] = None): self._hf_ds = hf_ds - self.target = None + self.target = target def __iter__(self): - if isinstance(self._hf_ds, Dataset): - for item in self._hf_ds: - if self.target is not None: - yield item[self.target] - else: - yield item - else: - for ds in self._hf_ds.values(): - for item in ds: - if self.target is not None: - yield item[self.target] - else: - yield item + for item in self._hf_ds: + if self.target is not None: + yield item[self.target] + else: + yield item + + def __getitem__(self, key): + return self._hf_ds[key] @classmethod def from_hf_dataset(cls, hf_ds: Dataset, - target: str = None) -> 'PyDataset': - dataset = cls(hf_ds) - dataset.target = target - return dataset + target: str = None) -> Union[dict, 'PyDataset']: + if isinstance(hf_ds, Dataset): + return cls(hf_ds, target) + if len(hf_ds.keys()) == 1: + return cls(next(iter(hf_ds.values())), target) + return {k: cls(v, target) for k, v in hf_ds.items()} @staticmethod - def load(path: Union[str, list], - target: Optional[str] = None, - version: Optional[str] = None, - name: Optional[str] = None, - split: Optional[str] = None, - data_dir: Optional[str] = None, - data_files: Optional[Union[str, Sequence[str], - Mapping[str, - Union[str, - Sequence[str]]]]] = None, - hub: Optional[Hubs] = None) -> 'PyDataset': + def load( + dataset_name: Union[str, list], + target: Optional[str] = None, + version: Optional[str] = None, + hub: Optional[Hubs] = Hubs.modelscope, + subset_name: Optional[str] = None, + split: Optional[str] = None, + data_dir: Optional[str] = None, + data_files: Optional[Union[str, Sequence[str], + Mapping[str, Union[str, + Sequence[str]]]]] = None + ) -> Union[dict, 'PyDataset']: """Load a PyDataset from the ModelScope Hub, Hugging Face Hub, urls, or a local dataset. Args: - path (str): Path or name of the dataset. + dataset_name (str): Path or name of the dataset. target (str, optional): Name of the column to output. version (str, optional): Version of the dataset script to load: - name (str, optional): Defining the subset_name of the dataset. + subset_name (str, optional): Defining the subset_name of the dataset. data_dir (str, optional): Defining the data_dir of the dataset configuration. I data_files (str or Sequence or Mapping, optional): Path(s) to source data file(s). split (str, optional): Which split of the data to load. @@ -67,53 +84,302 @@ class PyDataset: Returns: PyDataset (obj:`PyDataset`): PyDataset object for a certain dataset. """ - if Hubs.modelscope == hub: - # TODO: parse data meta information from modelscope hub - # and possibly download data files to local (and update path) - print('getting data from modelscope hub') - if isinstance(path, str): - dataset = load_dataset( - path, - name=name, + if hub == Hubs.huggingface: + dataset = hf_load_dataset( + dataset_name, + name=subset_name, revision=version, split=split, data_dir=data_dir, data_files=data_files) - elif isinstance(path, list): + return PyDataset.from_hf_dataset(dataset, target=target) + else: + return PyDataset._load_ms_dataset( + dataset_name, + target=target, + subset_name=subset_name, + version=version, + split=split, + data_dir=data_dir, + data_files=data_files) + + @staticmethod + def _load_ms_dataset( + dataset_name: Union[str, list], + target: Optional[str] = None, + version: Optional[str] = None, + subset_name: Optional[str] = None, + split: Optional[str] = None, + data_dir: Optional[str] = None, + data_files: Optional[Union[str, Sequence[str], + Mapping[str, Union[str, + Sequence[str]]]]] = None + ) -> Union[dict, 'PyDataset']: + if isinstance(dataset_name, str): + use_hf = False + if dataset_name in _PACKAGED_DATASETS_MODULES or os.path.isdir(dataset_name) or \ + (os.path.isfile(dataset_name) and dataset_name.endswith('.py')): + use_hf = True + elif is_relative_path(dataset_name): + ms_api = MsApi() + dataset_scripts = ms_api.fetch_dataset_scripts( + dataset_name, version) + if 'py' in dataset_scripts: # dataset copied from hf datasets + dataset_name = dataset_scripts['py'][0] + use_hf = True + else: + raise FileNotFoundError( + f"Couldn't find a dataset script at {relative_to_absolute_path(dataset_name)} " + f'or any data file in the same directory.') + + if use_hf: + dataset = hf_load_dataset( + dataset_name, + name=subset_name, + revision=version, + split=split, + data_dir=data_dir, + data_files=data_files, + cache_dir=MS_DATASETS_CACHE) + else: + # TODO load from ms datahub + raise NotImplementedError( + f'Dataset {dataset_name} load from modelscope datahub to be implemented in ' + f'the future') + elif isinstance(dataset_name, list): if target is None: target = 'target' - dataset = Dataset.from_dict({target: [p] for p in path}) + dataset = Dataset.from_dict({target: dataset_name}) else: raise TypeError('path must be a str or a list, but got' - f' {type(path)}') + f' {type(dataset_name)}') return PyDataset.from_hf_dataset(dataset, target=target) + def to_torch_dataset_with_processors( + self, + preprocessors: Union[Callable, List[Callable]], + columns: Union[str, List[str]] = None, + ): + preprocessor_list = preprocessors if isinstance( + preprocessors, list) else [preprocessors] + + columns = format_list(columns) + + columns = [ + key for key in self._hf_ds.features.keys() if key in columns + ] + sample = next(iter(self._hf_ds)) + + sample_res = {k: np.array(sample[k]) for k in columns} + for processor in preprocessor_list: + sample_res.update( + {k: np.array(v) + for k, v in processor(sample).items()}) + + def is_numpy_number(value): + return np.issubdtype(value.dtype, np.integer) or np.issubdtype( + value.dtype, np.floating) + + retained_columns = [] + for k in sample_res.keys(): + if not is_numpy_number(sample_res[k]): + logger.warning( + f'Data of column {k} is non-numeric, will be removed') + continue + retained_columns.append(k) + + import torch + + class MsIterableDataset(torch.utils.data.IterableDataset): + + def __init__(self, dataset: Iterable): + super(MsIterableDataset).__init__() + self.dataset = dataset + + def __iter__(self): + for item_dict in self.dataset: + res = { + k: np.array(item_dict[k]) + for k in columns if k in retained_columns + } + for preprocessor in preprocessor_list: + res.update({ + k: np.array(v) + for k, v in preprocessor(item_dict).items() + if k in retained_columns + }) + yield res + + return MsIterableDataset(self._hf_ds) + def to_torch_dataset( self, columns: Union[str, List[str]] = None, - output_all_columns: bool = False, + preprocessors: Union[Callable, List[Callable]] = None, **format_kwargs, ): - self._hf_ds.reset_format() - self._hf_ds.set_format( - type='torch', - columns=columns, - output_all_columns=output_all_columns, - format_kwargs=format_kwargs) - return self._hf_ds + """Create a torch.utils.data.Dataset from the MS Dataset. The torch.utils.data.Dataset can be passed to + torch.utils.data.DataLoader. + + Args: + preprocessors (Callable or List[Callable], default None): (list of) Preprocessor object used to process + every sample of the dataset. The output type of processors is dict, and each numeric field of the dict + will be used as a field of torch.utils.data.Dataset. + columns (str or List[str], default None): Dataset column(s) to be loaded (numeric data only). If the + preprocessor is None, the arg columns must have at least one column. If the `preprocessors` is not None, + the output fields of processors will also be added. + format_kwargs: A `dict` of arguments to be passed to the `torch.tensor`. + + Returns: + :class:`tf.data.Dataset` + + """ + if not TORCH_AVAILABLE: + raise ImportError( + 'The function to_torch_dataset requires pytorch to be installed' + ) + if preprocessors is not None: + return self.to_torch_dataset_with_processors(preprocessors) + else: + self._hf_ds.reset_format() + self._hf_ds.set_format( + type='torch', columns=columns, format_kwargs=format_kwargs) + return self._hf_ds + + def to_tf_dataset_with_processors( + self, + batch_size: int, + shuffle: bool, + preprocessors: Union[Callable, List[Callable]], + drop_remainder: bool = None, + prefetch: bool = True, + label_cols: Union[str, List[str]] = None, + columns: Union[str, List[str]] = None, + ): + preprocessor_list = preprocessors if isinstance( + preprocessors, list) else [preprocessors] + + label_cols = format_list(label_cols) + columns = format_list(columns) + cols_to_retain = list(set(label_cols + columns)) + retained_columns = [ + key for key in self._hf_ds.features.keys() if key in cols_to_retain + ] + import tensorflow as tf + tf_dataset = tf.data.Dataset.from_tensor_slices( + np.arange(len(self._hf_ds), dtype=np.int64)) + if shuffle: + tf_dataset = tf_dataset.shuffle(buffer_size=len(self._hf_ds)) + + def func(i, return_dict=False): + i = int(i) + res = {k: np.array(self._hf_ds[i][k]) for k in retained_columns} + for preprocessor in preprocessor_list: + # TODO preprocessor output may have the same key + res.update({ + k: np.array(v) + for k, v in preprocessor(self._hf_ds[i]).items() + }) + if return_dict: + return res + return tuple(list(res.values())) + + sample_res = func(0, True) + + @tf.function(input_signature=[tf.TensorSpec(None, tf.int64)]) + def fetch_function(i): + output = tf.numpy_function( + func, + inp=[i], + Tout=[ + tf.dtypes.as_dtype(val.dtype) + for val in sample_res.values() + ], + ) + return {key: output[i] for i, key in enumerate(sample_res)} + + tf_dataset = tf_dataset.map( + fetch_function, num_parallel_calls=tf.data.AUTOTUNE) + if label_cols: + + def split_features_and_labels(input_batch): + labels = { + key: tensor + for key, tensor in input_batch.items() if key in label_cols + } + if len(input_batch) == 1: + input_batch = next(iter(input_batch.values())) + if len(labels) == 1: + labels = next(iter(labels.values())) + return input_batch, labels + + tf_dataset = tf_dataset.map(split_features_and_labels) + + elif len(columns) == 1: + tf_dataset = tf_dataset.map(lambda x: next(iter(x.values()))) + if batch_size > 1: + tf_dataset = tf_dataset.batch( + batch_size, drop_remainder=drop_remainder) + + if prefetch: + tf_dataset = tf_dataset.prefetch(tf.data.experimental.AUTOTUNE) + return tf_dataset def to_tf_dataset( self, - columns: Union[str, List[str]], batch_size: int, shuffle: bool, - collate_fn: Callable, + preprocessors: Union[Callable, List[Callable]] = None, + columns: Union[str, List[str]] = None, + collate_fn: Callable = None, drop_remainder: bool = None, collate_fn_args: Dict[str, Any] = None, label_cols: Union[str, List[str]] = None, - dummy_labels: bool = False, prefetch: bool = True, ): + """Create a tf.data.Dataset from the MS Dataset. This tf.data.Dataset can be passed to tf methods like + model.fit() or model.predict(). + + Args: + batch_size (int): Number of samples in a single batch. + shuffle(bool): Shuffle the dataset order. + preprocessors (Callable or List[Callable], default None): (list of) Preprocessor object used to process + every sample of the dataset. The output type of processors is dict, and each field of the dict will be + used as a field of the tf.data. Dataset. If the `preprocessors` is None, the `collate_fn` + shouldn't be None. + columns (str or List[str], default None): Dataset column(s) to be loaded. If the preprocessor is None, + the arg columns must have at least one column. If the `preprocessors` is not None, the output fields of + processors will also be added. + collate_fn(Callable, default None): A callable object used to collect lists of samples into a batch. If + the `preprocessors` is None, the `collate_fn` shouldn't be None. + drop_remainder(bool, default None): Drop the last incomplete batch when loading. + collate_fn_args (Dict, optional): A `dict` of arguments to be passed to the`collate_fn`. + label_cols (str or List[str], defalut None): Dataset column(s) to load as labels. + prefetch (bool, default True): Prefetch data. + + Returns: + :class:`tf.data.Dataset` + + """ + if not TF_AVAILABLE: + raise ImportError( + 'The function to_tf_dataset requires Tensorflow to be installed.' + ) + if preprocessors is not None: + return self.to_tf_dataset_with_processors( + batch_size, + shuffle, + preprocessors, + drop_remainder=drop_remainder, + prefetch=prefetch, + label_cols=label_cols, + columns=columns) + + if collate_fn is None: + logger.error( + 'The `preprocessors` and the `collate_fn` should`t be both None.' + ) + return None self._hf_ds.reset_format() return self._hf_ds.to_tf_dataset( columns, @@ -123,7 +389,6 @@ class PyDataset: drop_remainder=drop_remainder, collate_fn_args=collate_fn_args, label_cols=label_cols, - dummy_labels=dummy_labels, prefetch=prefetch) def to_hf_dataset(self) -> Dataset: diff --git a/modelscope/pydatasets/utils/__init__.py b/modelscope/pydatasets/utils/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/modelscope/pydatasets/utils/ms_api.py b/modelscope/pydatasets/utils/ms_api.py new file mode 100644 index 00000000..04052cc4 --- /dev/null +++ b/modelscope/pydatasets/utils/ms_api.py @@ -0,0 +1,66 @@ +import os +from collections import defaultdict +from typing import Optional + +import requests + +from modelscope.pydatasets.config import (DOWNLOADED_DATASETS_PATH, + MS_HUB_ENDPOINT) +from modelscope.utils.logger import get_logger + +logger = get_logger() + + +class MsApi: + + def __init__(self, endpoint=MS_HUB_ENDPOINT): + self.endpoint = endpoint + + def list_datasets(self): + path = f'{self.endpoint}/api/v1/datasets' + headers = None + params = {} + r = requests.get(path, params=params, headers=headers) + r.raise_for_status() + dataset_list = r.json()['Data'] + return [x['Name'] for x in dataset_list] + + def fetch_dataset_scripts(self, + dataset_name: str, + version: Optional[str] = 'master', + force_download=False): + datahub_url = f'{self.endpoint}/api/v1/datasets?Query={dataset_name}' + r = requests.get(datahub_url) + r.raise_for_status() + dataset_list = r.json()['Data'] + if len(dataset_list) == 0: + return None + dataset_id = dataset_list[0]['Id'] + version = version or 'master' + datahub_url = f'{self.endpoint}/api/v1/datasets/{dataset_id}/repo/tree?Revision={version}' + r = requests.get(datahub_url) + r.raise_for_status() + file_list = r.json()['Data']['Files'] + cache_dir = os.path.join(DOWNLOADED_DATASETS_PATH, dataset_name, + version) + os.makedirs(cache_dir, exist_ok=True) + local_paths = defaultdict(list) + for file_info in file_list: + file_path = file_info['Path'] + if file_path.endswith('.py'): + datahub_url = f'{self.endpoint}/api/v1/datasets/{dataset_id}/repo/files?' \ + f'Revision={version}&Path={file_path}' + r = requests.get(datahub_url) + r.raise_for_status() + content = r.json()['Data']['Content'] + local_path = os.path.join(cache_dir, file_path) + if os.path.exists(local_path) and not force_download: + logger.warning( + f"Reusing dataset {dataset_name}'s python file ({local_path})" + ) + local_paths['py'].append(local_path) + continue + with open(local_path, 'w') as f: + f.writelines(content) + local_paths['py'].append(local_path) + return local_paths diff --git a/modelscope/trainers/nlp/space/trainers/gen_trainer.py b/modelscope/trainers/nlp/space/trainers/gen_trainer.py index a0cda25c..e09e2100 100644 --- a/modelscope/trainers/nlp/space/trainers/gen_trainer.py +++ b/modelscope/trainers/nlp/space/trainers/gen_trainer.py @@ -13,7 +13,7 @@ import torch from tqdm import tqdm from transformers.optimization import AdamW, get_linear_schedule_with_warmup -import modelscope.utils.nlp.space.ontology as ontology +from .....utils.nlp.space import ontology from ..metrics.metrics_tracker import MetricsTracker diff --git a/modelscope/trainers/nlp/space/trainers/intent_trainer.py b/modelscope/trainers/nlp/space/trainers/intent_trainer.py index bd43e9a5..2c5081d7 100644 --- a/modelscope/trainers/nlp/space/trainers/intent_trainer.py +++ b/modelscope/trainers/nlp/space/trainers/intent_trainer.py @@ -14,9 +14,7 @@ import torch from tqdm import tqdm from transformers.optimization import AdamW, get_linear_schedule_with_warmup -from modelscope.trainers.nlp.space.metrics.metrics_tracker import \ - MetricsTracker -from modelscope.utils.nlp.space.args import str2bool +from ..metrics.metrics_tracker import MetricsTracker def get_logger(log_path, name='default'): diff --git a/modelscope/utils/constant.py b/modelscope/utils/constant.py index d89f0496..cd232c6a 100644 --- a/modelscope/utils/constant.py +++ b/modelscope/utils/constant.py @@ -28,9 +28,13 @@ class Tasks(object): image_editing = 'image-editing' image_generation = 'image-generation' image_matting = 'image-matting' + ocr_detection = 'ocr-detection' + action_recognition = 'action-recognition' # nlp tasks word_segmentation = 'word-segmentation' + nli = 'nli' + sentiment_classification = 'sentiment-classification' sentiment_analysis = 'sentiment-analysis' sentence_similarity = 'sentence-similarity' text_classification = 'text-classification' @@ -45,8 +49,7 @@ class Tasks(object): dialog_state_tracking = 'dialog-state-tracking' table_question_answering = 'table-question-answering' feature_extraction = 'feature-extraction' - sentence_similarity = 'sentence-similarity' - fill_mask = 'fill-mask ' + fill_mask = 'fill-mask' summarization = 'summarization' question_answering = 'question-answering' diff --git a/modelscope/utils/hub.py b/modelscope/utils/hub.py index 2f61b148..868e751b 100644 --- a/modelscope/utils/hub.py +++ b/modelscope/utils/hub.py @@ -1,14 +1,67 @@ # Copyright (c) Alibaba, Inc. and its affiliates. import os +import os.path as osp +from typing import List, Optional, Union -from maas_hub.constants import MODEL_ID_SEPARATOR +from requests import HTTPError +from modelscope.hub.file_download import model_file_download +from modelscope.hub.snapshot_download import snapshot_download +from modelscope.utils.config import Config +from modelscope.utils.constant import ModelFile -# temp solution before the hub-cache is in place -def get_model_cache_dir(model_id: str, branch: str = 'master'): - model_id_expanded = model_id.replace('/', - MODEL_ID_SEPARATOR) + '.' + branch - default_cache_dir = os.path.expanduser(os.path.join('~/.cache', 'maas')) - return os.getenv('MAAS_CACHE', - os.path.join(default_cache_dir, 'hub', model_id_expanded)) + +def create_model_if_not_exist( + api, + model_id: str, + chinese_name: str, + visibility: Optional[int] = 5, # 1-private, 5-public + license: Optional[str] = 'apache-2.0', + revision: Optional[str] = 'master'): + exists = True + try: + api.get_model(model_id=model_id, revision=revision) + except HTTPError: + exists = False + if exists: + print(f'model {model_id} already exists, skip creation.') + return False + else: + api.create_model( + model_id=model_id, + chinese_name=chinese_name, + visibility=visibility, + license=license) + print(f'model {model_id} successfully created.') + return True + + +def read_config(model_id_or_path: str): + """ Read config from hub or local path + + Args: + model_id_or_path (str): Model repo name or local directory path. + + Return: + config (:obj:`Config`): config object + """ + if not os.path.exists(model_id_or_path): + local_path = model_file_download(model_id_or_path, + ModelFile.CONFIGURATION) + else: + local_path = os.path.join(model_id_or_path, ModelFile.CONFIGURATION) + + return Config.from_file(local_path) + + +def auto_load(model: Union[str, List[str]]): + if isinstance(model, str): + if not osp.exists(model): + model = snapshot_download(model) + else: + model = [ + snapshot_download(m) if not osp.exists(m) else m for m in model + ] + + return model diff --git a/modelscope/utils/registry.py b/modelscope/utils/registry.py index b26b899d..8009b084 100644 --- a/modelscope/utils/registry.py +++ b/modelscope/utils/registry.py @@ -78,7 +78,7 @@ class Registry(object): f'{self._name}[{default_group}] and will ' 'be overwritten') logger.warning(f'{self._modules[default_group][module_name]}' - 'to {module_cls}') + f'to {module_cls}') # also register module in the default group for faster access # only by module name self._modules[default_group][module_name] = module_cls diff --git a/modelscope/utils/test_utils.py b/modelscope/utils/test_utils.py index c8ea0442..95e63dba 100644 --- a/modelscope/utils/test_utils.py +++ b/modelscope/utils/test_utils.py @@ -2,6 +2,9 @@ # Copyright (c) Alibaba, Inc. and its affiliates. import os +import unittest + +from datasets.config import TF_AVAILABLE, TORCH_AVAILABLE TEST_LEVEL = 2 TEST_LEVEL_STR = 'TEST_LEVEL' @@ -15,6 +18,18 @@ def test_level(): return TEST_LEVEL +def require_tf(test_case): + if not TF_AVAILABLE: + test_case = unittest.skip('test requires TensorFlow')(test_case) + return test_case + + +def require_torch(test_case): + if not TORCH_AVAILABLE: + test_case = unittest.skip('test requires PyTorch')(test_case) + return test_case + + def set_test_level(level: int): global TEST_LEVEL TEST_LEVEL = level diff --git a/requirements/audio.txt b/requirements/audio.txt index 140836a8..c7b2b239 100644 --- a/requirements/audio.txt +++ b/requirements/audio.txt @@ -1,25 +1,25 @@ #tts -h5py==2.10.0 -#https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/TTS/requirements/ttsfrd-0.0.1-cp36-cp36m-linux_x86_64.whl -https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/TTS/requirements/ttsfrd-0.0.1-cp37-cp37m-linux_x86_64.whl -https://swap.oss-cn-hangzhou.aliyuncs.com/Jiaqi%2Fmaas%2Ftts%2Frequirements%2Fpytorch_wavelets-1.3.0-py3-none-any.whl?Expires=1685688388&OSSAccessKeyId=LTAI4Ffebq4d9jTVDwiSbY4L&Signature=jcQbg5EZ%2Bdys3%2F4BRn3srrKLdIg%3D -#https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/TTS/requirements/ttsfrd-0.0.1-cp38-cp38-linux_x86_64.whl -#https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/TTS/requirements/ttsfrd-0.0.1-cp39-cp39-linux_x86_64.whl +h5py +https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/TTS/requirements/pytorch_wavelets-1.3.0-py3-none-any.whl +https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/TTS/requirements/ttsfrd-0.0.2-cp36-cp36m-linux_x86_64.whl; python_version=='3.6' +https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/TTS/requirements/ttsfrd-0.0.2-cp37-cp37m-linux_x86_64.whl; python_version=='3.7' +https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/TTS/requirements/ttsfrd-0.0.2-cp38-cp38-linux_x86_64.whl; python_version=='3.8' +https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/TTS/requirements/ttsfrd-0.0.2-cp39-cp39-linux_x86_64.whl; python_version=='3.9' inflect -keras==2.2.4 +keras librosa lxml matplotlib nara_wpe -numpy==1.18.* -protobuf==3.20.* +numpy +protobuf>3,<=3.20 ptflops PyWavelets>=1.0.0 -scikit-learn==0.23.2 +scikit-learn sox tensorboard tensorflow==1.15.* -torch==1.10.* +torch torchaudio torchvision tqdm diff --git a/requirements/cv.txt b/requirements/cv.txt index 66799b76..513dae99 100644 --- a/requirements/cv.txt +++ b/requirements/cv.txt @@ -1 +1,3 @@ +decord>=0.6.0 easydict +tf_slim diff --git a/requirements/nlp.txt b/requirements/nlp.txt index eefb3c7d..bc0b3fcd 100644 --- a/requirements/nlp.txt +++ b/requirements/nlp.txt @@ -1,4 +1,5 @@ -https://alinlp.alibaba-inc.com/pypi/sofa-1.0.2-py3-none-any.whl -https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-2.3.1/en_core_web_sm-2.3.1.tar.gz +# https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-2.3.1/en_core_web_sm-2.3.1.tar.gz +http://ait-public.oss-cn-hangzhou-zmf.aliyuncs.com/jizhu/en_core_web_sm-2.3.1.tar.gz +https://alinlp.alibaba-inc.com/pypi/sofa-1.0.3-py3-none-any.whl spacy>=2.3.5 # python -m spacy download en_core_web_sm diff --git a/requirements/runtime.txt b/requirements/runtime.txt index e97352aa..6580de53 100644 --- a/requirements/runtime.txt +++ b/requirements/runtime.txt @@ -1,13 +1,16 @@ addict datasets easydict -https://mindscope.oss-cn-hangzhou.aliyuncs.com/sdklib/maas_hub-0.2.4.dev0-py3-none-any.whl +filelock>=3.3.0 numpy opencv-python-headless Pillow>=6.2.0 pyyaml requests +requests==2.27.1 scipy +setuptools==58.0.4 tokenizers<=0.10.3 +tqdm>=4.64.0 transformers<=4.16.2 yapf diff --git a/tests/hub/__init__.py b/tests/hub/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/hub/test_hub_examples.py b/tests/hub/test_hub_examples.py new file mode 100644 index 00000000..b63445af --- /dev/null +++ b/tests/hub/test_hub_examples.py @@ -0,0 +1,33 @@ +import unittest + +from maas_hub.maas_api import MaasApi + +from modelscope.utils.hub import create_model_if_not_exist + +USER_NAME = 'maasadmin' +PASSWORD = '12345678' + + +class HubExampleTest(unittest.TestCase): + + def setUp(self): + self.api = MaasApi() + # note this is temporary before official account management is ready + self.api.login(USER_NAME, PASSWORD) + + @unittest.skip('to be used for local test only') + def test_example_model_creation(self): + # ATTENTION:change to proper model names before use + model_name = 'cv_unet_person-image-cartoon_compound-models' + model_chinese_name = '达摩卡通化模型' + model_org = 'damo' + model_id = '%s/%s' % (model_org, model_name) + + created = create_model_if_not_exist(self.api, model_id, + model_chinese_name) + if not created: + print('!! NOT created since model already exists !!') + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/hub/test_hub_operation.py b/tests/hub/test_hub_operation.py new file mode 100644 index 00000000..d44cd7c1 --- /dev/null +++ b/tests/hub/test_hub_operation.py @@ -0,0 +1,155 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. +import os +import subprocess +import tempfile +import unittest +import uuid + +from modelscope.hub.api import HubApi, ModelScopeConfig +from modelscope.hub.file_download import model_file_download +from modelscope.hub.snapshot_download import snapshot_download +from modelscope.hub.utils.utils import get_gitlab_domain + +USER_NAME = 'maasadmin' +PASSWORD = '12345678' + +model_chinese_name = '达摩卡通化模型' +model_org = 'unittest' +DEFAULT_GIT_PATH = 'git' + + +class GitError(Exception): + pass + + +# TODO make thest git operation to git library after merge code. +def run_git_command(git_path, *args) -> subprocess.CompletedProcess: + response = subprocess.run([git_path, *args], capture_output=True) + try: + response.check_returncode() + return response.stdout.decode('utf8') + except subprocess.CalledProcessError as error: + raise GitError(error.stderr.decode('utf8')) + + +# for public project, token can None, private repo, there must token. +def clone(local_dir: str, token: str, url: str): + url = url.replace('//', '//oauth2:%s@' % token) + clone_args = '-C %s clone %s' % (local_dir, url) + clone_args = clone_args.split(' ') + stdout = run_git_command(DEFAULT_GIT_PATH, *clone_args) + print('stdout: %s' % stdout) + + +def push(local_dir: str, token: str, url: str): + url = url.replace('//', '//oauth2:%s@' % token) + push_args = '-C %s push %s' % (local_dir, url) + push_args = push_args.split(' ') + stdout = run_git_command(DEFAULT_GIT_PATH, *push_args) + print('stdout: %s' % stdout) + + +sample_model_url = 'https://mindscope.oss-cn-hangzhou.aliyuncs.com/test_models/mnist-12.onnx' +download_model_file_name = 'mnist-12.onnx' + + +class HubOperationTest(unittest.TestCase): + + def setUp(self): + self.old_cwd = os.getcwd() + self.api = HubApi() + # note this is temporary before official account management is ready + self.api.login(USER_NAME, PASSWORD) + self.model_name = uuid.uuid4().hex + self.model_id = '%s/%s' % (model_org, self.model_name) + self.api.create_model( + model_id=self.model_id, + chinese_name=model_chinese_name, + visibility=5, # 1-private, 5-public + license='apache-2.0') + + def tearDown(self): + os.chdir(self.old_cwd) + self.api.delete_model(model_id=self.model_id) + + def test_model_repo_creation(self): + # change to proper model names before use + try: + info = self.api.get_model(model_id=self.model_id) + assert info['Name'] == self.model_name + except KeyError as ke: + if ke.args[0] == 'name': + print(f'model {self.model_name} already exists, ignore') + else: + raise + + # Note that this can be done via git operation once model repo + # has been created. Git-Op is the RECOMMENDED model upload approach + def test_model_upload(self): + url = f'http://{get_gitlab_domain()}/{self.model_id}' + print(url) + temporary_dir = tempfile.mkdtemp() + os.chdir(temporary_dir) + cmd_args = 'clone %s' % url + cmd_args = cmd_args.split(' ') + out = run_git_command('git', *cmd_args) + print(out) + repo_dir = os.path.join(temporary_dir, self.model_name) + os.chdir(repo_dir) + os.system('touch file1') + os.system('git add file1') + os.system("git commit -m 'Test'") + token = ModelScopeConfig.get_token() + push(repo_dir, token, url) + + def test_download_single_file(self): + url = f'http://{get_gitlab_domain()}/{self.model_id}' + print(url) + temporary_dir = tempfile.mkdtemp() + os.chdir(temporary_dir) + os.system('git clone %s' % url) + repo_dir = os.path.join(temporary_dir, self.model_name) + os.chdir(repo_dir) + os.system('wget %s' % sample_model_url) + os.system('git add .') + os.system("git commit -m 'Add file'") + token = ModelScopeConfig.get_token() + push(repo_dir, token, url) + assert os.path.exists( + os.path.join(temporary_dir, self.model_name, + download_model_file_name)) + downloaded_file = model_file_download( + model_id=self.model_id, file_path=download_model_file_name) + mdtime1 = os.path.getmtime(downloaded_file) + # download again + downloaded_file = model_file_download( + model_id=self.model_id, file_path=download_model_file_name) + mdtime2 = os.path.getmtime(downloaded_file) + assert mdtime1 == mdtime2 + + def test_snapshot_download(self): + url = f'http://{get_gitlab_domain()}/{self.model_id}' + print(url) + temporary_dir = tempfile.mkdtemp() + os.chdir(temporary_dir) + os.system('git clone %s' % url) + repo_dir = os.path.join(temporary_dir, self.model_name) + os.chdir(repo_dir) + os.system('wget %s' % sample_model_url) + os.system('git add .') + os.system("git commit -m 'Add file'") + token = ModelScopeConfig.get_token() + push(repo_dir, token, url) + snapshot_path = snapshot_download(model_id=self.model_id) + downloaded_file_path = os.path.join(snapshot_path, + download_model_file_name) + assert os.path.exists(downloaded_file_path) + mdtime1 = os.path.getmtime(downloaded_file_path) + # download again + snapshot_path = snapshot_download(model_id=self.model_id) + mdtime2 = os.path.getmtime(downloaded_file_path) + assert mdtime1 == mdtime2 + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/pipelines/nlp/test_dialog_state_tracking.py b/tests/pipelines/nlp/test_dialog_state_tracking.py index a6c989bd..41ef7981 100644 --- a/tests/pipelines/nlp/test_dialog_state_tracking.py +++ b/tests/pipelines/nlp/test_dialog_state_tracking.py @@ -1,8 +1,10 @@ # Copyright (c) Alibaba, Inc. and its affiliates. +import os +import os.path as osp +import tempfile import unittest -from maas_hub.snapshot_download import snapshot_download - +from modelscope.hub.snapshot_download import snapshot_download from modelscope.models import Model from modelscope.models.nlp import DialogStateTrackingModel from modelscope.pipelines import DialogStateTrackingPipeline, pipeline @@ -10,50 +12,31 @@ from modelscope.preprocessors import DialogStateTrackingPreprocessor from modelscope.utils.constant import Tasks -class DialogIntentPredictionTest(unittest.TestCase): - model_id = 'damo/nlp_space_dialog-intent-prediction' - test_case = [ - 'How do I locate my card?', - 'I still have not received my new card, I ordered over a week ago.' - ] +class DialogStateTrackingTest(unittest.TestCase): + model_id = 'damo/nlp_space_dialog-state-tracking' + test_case = {} - @unittest.skip('test with snapshot_download') def test_run(self): - cache_path = snapshot_download(self.model_id) - preprocessor = DialogIntentPredictionPreprocessor(model_dir=cache_path) - model = DialogIntentModel( - model_dir=cache_path, - text_field=preprocessor.text_field, - config=preprocessor.config) - - pipelines = [ - DialogIntentPredictionPipeline( - model=model, preprocessor=preprocessor), - pipeline( - task=Tasks.dialog_intent_prediction, - model=model, - preprocessor=preprocessor) - ] - - for my_pipeline, item in list(zip(pipelines, self.test_case)): - print(my_pipeline(item)) - - def test_run_with_model_from_modelhub(self): - # model = Model.from_pretrained(self.model_id) - # preprocessor = DialogIntentPredictionPreprocessor( - # model_dir=model.model_dir) - # + # cache_path = '' + # cache_path = snapshot_download(self.model_id) + + # preprocessor = DialogStateTrackingPreprocessor(model_dir=cache_path) + # model = DialogStateTrackingModel( + # model_dir=cache_path, + # text_field=preprocessor.text_field, + # config=preprocessor.config) # pipelines = [ - # DialogIntentPredictionPipeline( - # model=model, preprocessor=preprocessor), + # DialogStateTrackingPipeline(model=model, preprocessor=preprocessor), # pipeline( - # task=Tasks.dialog_intent_prediction, + # task=Tasks.dialog_modeling, # model=model, # preprocessor=preprocessor) # ] - # - # for my_pipeline, item in list(zip(pipelines, self.test_case)): - # print(my_pipeline(item)) + + print('jizhu test') + + @unittest.skip('test with snapshot_download') + def test_run_with_model_from_modelhub(self): pass diff --git a/tests/pipelines/test_action_recognition.py b/tests/pipelines/test_action_recognition.py new file mode 100644 index 00000000..b524ca18 --- /dev/null +++ b/tests/pipelines/test_action_recognition.py @@ -0,0 +1,58 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. +# !/usr/bin/env python +import os.path as osp +import shutil +import tempfile +import unittest + +import cv2 + +from modelscope.fileio import File +from modelscope.pipelines import pipeline +from modelscope.pydatasets import PyDataset +from modelscope.utils.constant import ModelFile, Tasks +from modelscope.utils.test_utils import test_level + + +class ActionRecognitionTest(unittest.TestCase): + + def setUp(self) -> None: + self.model_id = 'damo/cv_TAdaConv_action-recognition' + + @unittest.skip('deprecated, download model from model hub instead') + def test_run_with_direct_file_download(self): + model_path = 'https://aquila2-online-models.oss-cn-shanghai.aliyuncs.com/maas_test/pytorch_model.pt' + config_path = 'https://aquila2-online-models.oss-cn-shanghai.aliyuncs.com/maas_test/configuration.json' + with tempfile.TemporaryDirectory() as tmp_dir: + model_file = osp.join(tmp_dir, ModelFile.TORCH_MODEL_FILE) + with open(model_file, 'wb') as ofile1: + ofile1.write(File.read(model_path)) + config_file = osp.join(tmp_dir, ModelFile.CONFIGURATION) + with open(config_file, 'wb') as ofile2: + ofile2.write(File.read(config_path)) + recognition_pipeline = pipeline( + Tasks.action_recognition, model=tmp_dir) + result = recognition_pipeline( + 'data/test/videos/action_recognition_test_video.mp4') + print(f'recognition output: {result}.') + + @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') + def test_run_modelhub(self): + recognition_pipeline = pipeline( + Tasks.action_recognition, model=self.model_id) + result = recognition_pipeline( + 'data/test/videos/action_recognition_test_video.mp4') + + print(f'recognition output: {result}.') + + @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') + def test_run_modelhub_default_model(self): + recognition_pipeline = pipeline(Tasks.action_recognition) + result = recognition_pipeline( + 'data/test/videos/action_recognition_test_video.mp4') + + print(f'recognition output: {result}.') + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/pipelines/nlp/test_dialog_intent_prediction.py b/tests/pipelines/test_dialog_intent_prediction.py similarity index 96% rename from tests/pipelines/nlp/test_dialog_intent_prediction.py rename to tests/pipelines/test_dialog_intent_prediction.py index 0ec4e1e7..97cdbb3d 100644 --- a/tests/pipelines/nlp/test_dialog_intent_prediction.py +++ b/tests/pipelines/test_dialog_intent_prediction.py @@ -1,8 +1,7 @@ # Copyright (c) Alibaba, Inc. and its affiliates. import unittest -from maas_hub.snapshot_download import snapshot_download - +from modelscope.hub.snapshot_download import snapshot_download from modelscope.models import Model from modelscope.models.nlp import DialogIntentModel from modelscope.pipelines import DialogIntentPredictionPipeline, pipeline diff --git a/tests/pipelines/nlp/test_dialog_modeling.py b/tests/pipelines/test_dialog_modeling.py similarity index 98% rename from tests/pipelines/nlp/test_dialog_modeling.py rename to tests/pipelines/test_dialog_modeling.py index 7d4da8fe..f606ba49 100644 --- a/tests/pipelines/nlp/test_dialog_modeling.py +++ b/tests/pipelines/test_dialog_modeling.py @@ -4,8 +4,7 @@ import os.path as osp import tempfile import unittest -from maas_hub.snapshot_download import snapshot_download - +from modelscope.hub.snapshot_download import snapshot_download from modelscope.models import Model from modelscope.models.nlp import DialogModelingModel from modelscope.pipelines import DialogModelingPipeline, pipeline diff --git a/tests/pipelines/test_fill_mask.py b/tests/pipelines/test_fill_mask.py new file mode 100644 index 00000000..49c5dc8a --- /dev/null +++ b/tests/pipelines/test_fill_mask.py @@ -0,0 +1,129 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. +import unittest + +from modelscope.hub.snapshot_download import snapshot_download +from modelscope.models import Model +from modelscope.models.nlp import StructBertForMaskedLM, VecoForMaskedLM +from modelscope.pipelines import FillMaskPipeline, pipeline +from modelscope.preprocessors import FillMaskPreprocessor +from modelscope.utils.constant import Tasks +from modelscope.utils.test_utils import test_level + + +class FillMaskTest(unittest.TestCase): + model_id_sbert = { + 'zh': 'damo/nlp_structbert_fill-mask_chinese-large', + 'en': 'damo/nlp_structbert_fill-mask_english-large' + } + model_id_veco = 'damo/nlp_veco_fill-mask-large' + + ori_texts = { + 'zh': + '段誉轻挥折扇,摇了摇头,说道:“你师父是你的师父,你师父可不是我的师父。' + '你师父差得动你,你师父可差不动我。', + 'en': + 'Everything in what you call reality is really just a reflection of your ' + 'consciousness. Your whole universe is just a mirror reflection of your story.' + } + + test_inputs = { + 'zh': + '段誉轻[MASK]折扇,摇了摇[MASK],[MASK]道:“你师父是你的[MASK][MASK],你' + '师父可不是[MASK]的师父。你师父差得动你,你师父可[MASK]不动我。', + 'en': + 'Everything in [MASK] you call reality is really [MASK] a reflection of your ' + '[MASK]. Your [MASK] universe is just a mirror [MASK] of your story.' + } + + @unittest.skipUnless(test_level() >= 2, 'skip test in current test level') + def test_run_by_direct_model_download(self): + # sbert + for language in ['zh', 'en']: + model_dir = snapshot_download(self.model_id_sbert[language]) + preprocessor = FillMaskPreprocessor( + model_dir, first_sequence='sentence', second_sequence=None) + model = StructBertForMaskedLM(model_dir) + pipeline1 = FillMaskPipeline(model, preprocessor) + pipeline2 = pipeline( + Tasks.fill_mask, model=model, preprocessor=preprocessor) + ori_text = self.ori_texts[language] + test_input = self.test_inputs[language] + print( + f'\nori_text: {ori_text}\ninput: {test_input}\npipeline1: ' + f'{pipeline1(test_input)}\npipeline2: {pipeline2(test_input)}\n' + ) + + # veco + model_dir = snapshot_download(self.model_id_veco) + preprocessor = FillMaskPreprocessor( + model_dir, first_sequence='sentence', second_sequence=None) + model = VecoForMaskedLM(model_dir) + pipeline1 = FillMaskPipeline(model, preprocessor) + pipeline2 = pipeline( + Tasks.fill_mask, model=model, preprocessor=preprocessor) + for language in ['zh', 'en']: + ori_text = self.ori_texts[language] + test_input = self.test_inputs[language].replace('[MASK]', '') + print( + f'\nori_text: {ori_text}\ninput: {test_input}\npipeline1: ' + f'{pipeline1(test_input)}\npipeline2: {pipeline2(test_input)}\n' + ) + + @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') + def test_run_with_model_from_modelhub(self): + # sbert + for language in ['zh', 'en']: + print(self.model_id_sbert[language]) + model = Model.from_pretrained(self.model_id_sbert[language]) + preprocessor = FillMaskPreprocessor( + model.model_dir, + first_sequence='sentence', + second_sequence=None) + pipeline_ins = pipeline( + task=Tasks.fill_mask, model=model, preprocessor=preprocessor) + print( + f'\nori_text: {self.ori_texts[language]}\ninput: {self.test_inputs[language]}\npipeline: ' + f'{pipeline_ins(self.test_inputs[language])}\n') + + # veco + model = Model.from_pretrained(self.model_id_veco) + preprocessor = FillMaskPreprocessor( + model.model_dir, first_sequence='sentence', second_sequence=None) + pipeline_ins = pipeline( + Tasks.fill_mask, model=model, preprocessor=preprocessor) + for language in ['zh', 'en']: + ori_text = self.ori_texts[language] + test_input = self.test_inputs[language].replace('[MASK]', '') + print(f'\nori_text: {ori_text}\ninput: {test_input}\npipeline: ' + f'{pipeline_ins(test_input)}\n') + + @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') + def test_run_with_model_name(self): + # veco + pipeline_ins = pipeline(task=Tasks.fill_mask, model=self.model_id_veco) + for language in ['zh', 'en']: + ori_text = self.ori_texts[language] + test_input = self.test_inputs[language].replace('[MASK]', '') + print(f'\nori_text: {ori_text}\ninput: {test_input}\npipeline: ' + f'{pipeline_ins(test_input)}\n') + + # structBert + language = 'zh' + pipeline_ins = pipeline( + task=Tasks.fill_mask, model=self.model_id_sbert[language]) + print( + f'\nori_text: {self.ori_texts[language]}\ninput: {self.test_inputs[language]}\npipeline: ' + f'{pipeline_ins(self.test_inputs[language])}\n') + + @unittest.skipUnless(test_level() >= 2, 'skip test in current test level') + def test_run_with_default_model(self): + pipeline_ins = pipeline(task=Tasks.fill_mask) + language = 'en' + ori_text = self.ori_texts[language] + test_input = self.test_inputs[language].replace('[MASK]', '') + print(f'\nori_text: {ori_text}\ninput: {test_input}\npipeline: ' + f'{pipeline_ins(test_input)}\n') + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/pipelines/test_image_captioning.py b/tests/pipelines/test_image_captioning.py index 74a65806..5fa6ff49 100644 --- a/tests/pipelines/test_image_captioning.py +++ b/tests/pipelines/test_image_captioning.py @@ -1,10 +1,7 @@ # Copyright (c) Alibaba, Inc. and its affiliates. -import os -import tempfile import unittest -from modelscope.fileio import File from modelscope.pipelines import pipeline from modelscope.utils.constant import Tasks from modelscope.utils.test_utils import test_level @@ -12,23 +9,13 @@ from modelscope.utils.test_utils import test_level class ImageCaptionTest(unittest.TestCase): - @unittest.skip('skip before model is restored in model hub') + @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') def test_run(self): - model = 'https://ofa-beijing.oss-cn-beijing.aliyuncs.com/checkpoints/caption_large_best_clean.pt' - - os.system( - 'wget https://jirenmr.oss-cn-zhangjiakou.aliyuncs.com/ofa/BPE.zip' - ) - os.system('unzip BPE.zip') - bpe_dir = './BPE' - - with tempfile.NamedTemporaryFile('wb', suffix='.pb') as ofile: - ofile.write(File.read(model)) - img_captioning = pipeline( - Tasks.image_captioning, model=ofile.name, bpe_dir=bpe_dir) - - result = img_captioning('data/test/images/image_matting.png') - print(result['caption']) + img_captioning = pipeline( + Tasks.image_captioning, + model='damo/ofa_image-caption_coco_large_en') + result = img_captioning('data/test/images/image_captioning.png') + print(result['caption']) if __name__ == '__main__': diff --git a/tests/pipelines/test_image_matting.py b/tests/pipelines/test_image_matting.py index 6e102d00..1b547e14 100644 --- a/tests/pipelines/test_image_matting.py +++ b/tests/pipelines/test_image_matting.py @@ -10,7 +10,6 @@ from modelscope.fileio import File from modelscope.pipelines import pipeline from modelscope.pydatasets import PyDataset from modelscope.utils.constant import ModelFile, Tasks -from modelscope.utils.hub import get_model_cache_dir from modelscope.utils.test_utils import test_level @@ -18,11 +17,6 @@ class ImageMattingTest(unittest.TestCase): def setUp(self) -> None: self.model_id = 'damo/cv_unet_image-matting' - # switch to False if downloading everytime is not desired - purge_cache = True - if purge_cache: - shutil.rmtree( - get_model_cache_dir(self.model_id), ignore_errors=True) @unittest.skip('deprecated, download model from model hub instead') def test_run_with_direct_file_download(self): @@ -58,7 +52,7 @@ class ImageMattingTest(unittest.TestCase): cv2.imwrite('result.png', result['output_png']) print(f'Output written to {osp.abspath("result.png")}') - @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') + @unittest.skipUnless(test_level() >= 2, 'skip test in current test level') def test_run_modelhub_default_model(self): img_matting = pipeline(Tasks.image_matting) @@ -66,6 +60,17 @@ class ImageMattingTest(unittest.TestCase): cv2.imwrite('result.png', result['output_png']) print(f'Output written to {osp.abspath("result.png")}') + @unittest.skipUnless(test_level() >= 2, 'skip test in current test level') + def test_run_with_modelscope_dataset(self): + dataset = PyDataset.load('beans', split='train', target='image') + img_matting = pipeline(Tasks.image_matting, model=self.model_id) + result = img_matting(dataset) + for i in range(10): + cv2.imwrite(f'result_{i}.png', next(result)['output_png']) + print( + f'Output written to dir: {osp.dirname(osp.abspath("result_0.png"))}' + ) + if __name__ == '__main__': unittest.main() diff --git a/tests/pipelines/test_nli.py b/tests/pipelines/test_nli.py new file mode 100644 index 00000000..0c8da8b4 --- /dev/null +++ b/tests/pipelines/test_nli.py @@ -0,0 +1,52 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. +import unittest + +from modelscope.hub.snapshot_download import snapshot_download +from modelscope.models import Model +from modelscope.models.nlp import SbertForNLI +from modelscope.pipelines import NLIPipeline, pipeline +from modelscope.preprocessors import NLIPreprocessor +from modelscope.utils.constant import Tasks +from modelscope.utils.test_utils import test_level + + +class NLITest(unittest.TestCase): + model_id = 'damo/nlp_structbert_nli_chinese-base' + sentence1 = '四川商务职业学院和四川财经职业学院哪个好?' + sentence2 = '四川商务职业学院商务管理在哪个校区?' + + @unittest.skipUnless(test_level() >= 2, 'skip test in current test level') + def test_run_with_direct_file_download(self): + cache_path = snapshot_download(self.model_id) + tokenizer = NLIPreprocessor(cache_path) + model = SbertForNLI(cache_path, tokenizer=tokenizer) + pipeline1 = NLIPipeline(model, preprocessor=tokenizer) + pipeline2 = pipeline(Tasks.nli, model=model, preprocessor=tokenizer) + print(f'sentence1: {self.sentence1}\nsentence2: {self.sentence2}\n' + f'pipeline1:{pipeline1(input=(self.sentence1, self.sentence2))}') + print() + print( + f'sentence1: {self.sentence1}\nsentence2: {self.sentence2}\n' + f'pipeline1: {pipeline2(input=(self.sentence1, self.sentence2))}') + + @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') + def test_run_with_model_from_modelhub(self): + model = Model.from_pretrained(self.model_id) + tokenizer = NLIPreprocessor(model.model_dir) + pipeline_ins = pipeline( + task=Tasks.nli, model=model, preprocessor=tokenizer) + print(pipeline_ins(input=(self.sentence1, self.sentence2))) + + @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') + def test_run_with_model_name(self): + pipeline_ins = pipeline(task=Tasks.nli, model=self.model_id) + print(pipeline_ins(input=(self.sentence1, self.sentence2))) + + @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') + def test_run_with_default_model(self): + pipeline_ins = pipeline(task=Tasks.nli) + print(pipeline_ins(input=(self.sentence1, self.sentence2))) + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/pipelines/test_ocr_detection.py b/tests/pipelines/test_ocr_detection.py new file mode 100644 index 00000000..986961b7 --- /dev/null +++ b/tests/pipelines/test_ocr_detection.py @@ -0,0 +1,37 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. +import os.path as osp +import shutil +import sys +import tempfile +import unittest +from typing import Any, Dict, List, Tuple, Union + +import cv2 +import numpy as np +import PIL + +from modelscope.pipelines import pipeline +from modelscope.pipelines.base import Pipeline +from modelscope.utils.constant import Tasks +from modelscope.utils.test_utils import test_level + + +class OCRDetectionTest(unittest.TestCase): + + def setUp(self) -> None: + self.model_id = 'damo/cv_resnet18_ocr-detection-line-level_damo' + self.test_image = 'data/test/images/ocr_detection.jpg' + + def pipeline_inference(self, pipeline: Pipeline, input_location: str): + result = pipeline(input_location) + print('ocr detection results: ') + print(result) + + @unittest.skipUnless(test_level() >= 2, 'skip test in current test level') + def test_run_modelhub_default_model(self): + ocr_detection = pipeline(Tasks.ocr_detection) + self.pipeline_inference(ocr_detection, self.test_image) + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/pipelines/test_person_image_cartoon.py b/tests/pipelines/test_person_image_cartoon.py index ed912b1c..f47ca008 100644 --- a/tests/pipelines/test_person_image_cartoon.py +++ b/tests/pipelines/test_person_image_cartoon.py @@ -42,7 +42,7 @@ class ImageCartoonTest(unittest.TestCase): img_cartoon = pipeline(Tasks.image_generation, model=self.model_id) self.pipeline_inference(img_cartoon, self.test_image) - @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') + @unittest.skipUnless(test_level() >= 2, 'skip test in current test level') def test_run_modelhub_default_model(self): img_cartoon = pipeline(Tasks.image_generation) self.pipeline_inference(img_cartoon, self.test_image) diff --git a/tests/pipelines/test_sentence_similarity.py b/tests/pipelines/test_sentence_similarity.py index ac2ff4fb..df38593f 100644 --- a/tests/pipelines/test_sentence_similarity.py +++ b/tests/pipelines/test_sentence_similarity.py @@ -2,14 +2,12 @@ import shutil import unittest -from maas_hub.snapshot_download import snapshot_download - +from modelscope.hub.snapshot_download import snapshot_download from modelscope.models import Model from modelscope.models.nlp import SbertForSentenceSimilarity from modelscope.pipelines import SentenceSimilarityPipeline, pipeline from modelscope.preprocessors import SequenceClassificationPreprocessor from modelscope.utils.constant import Tasks -from modelscope.utils.hub import get_model_cache_dir from modelscope.utils.test_utils import test_level @@ -18,14 +16,7 @@ class SentenceSimilarityTest(unittest.TestCase): sentence1 = '今天气温比昨天高么?' sentence2 = '今天湿度比昨天高么?' - def setUp(self) -> None: - # switch to False if downloading everytime is not desired - purge_cache = True - if purge_cache: - shutil.rmtree( - get_model_cache_dir(self.model_id), ignore_errors=True) - - @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') + @unittest.skipUnless(test_level() >= 2, 'skip test in current test level') def test_run(self): cache_path = snapshot_download(self.model_id) tokenizer = SequenceClassificationPreprocessor(cache_path) @@ -41,7 +32,7 @@ class SentenceSimilarityTest(unittest.TestCase): f'sentence1: {self.sentence1}\nsentence2: {self.sentence2}\n' f'pipeline1: {pipeline2(input=(self.sentence1, self.sentence2))}') - @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') + @unittest.skipUnless(test_level() >= 2, 'skip test in current test level') def test_run_with_model_from_modelhub(self): model = Model.from_pretrained(self.model_id) tokenizer = SequenceClassificationPreprocessor(model.model_dir) @@ -57,7 +48,7 @@ class SentenceSimilarityTest(unittest.TestCase): task=Tasks.sentence_similarity, model=self.model_id) print(pipeline_ins(input=(self.sentence1, self.sentence2))) - @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') + @unittest.skipUnless(test_level() >= 2, 'skip test in current test level') def test_run_with_default_model(self): pipeline_ins = pipeline(task=Tasks.sentence_similarity) print(pipeline_ins(input=(self.sentence1, self.sentence2))) diff --git a/tests/pipelines/test_sentiment_classification.py b/tests/pipelines/test_sentiment_classification.py new file mode 100644 index 00000000..0ba22d5c --- /dev/null +++ b/tests/pipelines/test_sentiment_classification.py @@ -0,0 +1,58 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. +import unittest + +from modelscope.hub.snapshot_download import snapshot_download +from modelscope.models import Model +from modelscope.models.nlp import SbertForSentimentClassification +from modelscope.pipelines import SentimentClassificationPipeline, pipeline +from modelscope.preprocessors import SentimentClassificationPreprocessor +from modelscope.utils.constant import Tasks +from modelscope.utils.test_utils import test_level + + +class SentimentClassificationTest(unittest.TestCase): + model_id = 'damo/nlp_structbert_sentiment-classification_chinese-base' + sentence1 = '启动的时候很大声音,然后就会听到1.2秒的卡察的声音,类似齿轮摩擦的声音' + + @unittest.skipUnless(test_level() >= 2, 'skip test in current test level') + def test_run_with_direct_file_download(self): + cache_path = snapshot_download(self.model_id) + tokenizer = SentimentClassificationPreprocessor(cache_path) + model = SbertForSentimentClassification( + cache_path, tokenizer=tokenizer) + pipeline1 = SentimentClassificationPipeline( + model, preprocessor=tokenizer) + pipeline2 = pipeline( + Tasks.sentiment_classification, + model=model, + preprocessor=tokenizer) + print(f'sentence1: {self.sentence1}\n' + f'pipeline1:{pipeline1(input=self.sentence1)}') + print() + print(f'sentence1: {self.sentence1}\n' + f'pipeline1: {pipeline2(input=self.sentence1)}') + + @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') + def test_run_with_model_from_modelhub(self): + model = Model.from_pretrained(self.model_id) + tokenizer = SentimentClassificationPreprocessor(model.model_dir) + pipeline_ins = pipeline( + task=Tasks.sentiment_classification, + model=model, + preprocessor=tokenizer) + print(pipeline_ins(input=self.sentence1)) + + @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') + def test_run_with_model_name(self): + pipeline_ins = pipeline( + task=Tasks.sentiment_classification, model=self.model_id) + print(pipeline_ins(input=self.sentence1)) + + @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') + def test_run_with_default_model(self): + pipeline_ins = pipeline(task=Tasks.sentiment_classification) + print(pipeline_ins(input=self.sentence1)) + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/pipelines/test_speech_signal_process.py b/tests/pipelines/test_speech_signal_process.py index 8b5c9468..1b070fda 100644 --- a/tests/pipelines/test_speech_signal_process.py +++ b/tests/pipelines/test_speech_signal_process.py @@ -3,9 +3,10 @@ import shutil import unittest from modelscope.fileio import File +from modelscope.metainfo import Pipelines from modelscope.pipelines import pipeline from modelscope.utils.constant import Tasks -from modelscope.utils.hub import get_model_cache_dir +from modelscope.utils.test_utils import test_level NEAREND_MIC_URL = 'https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/AEC/sample_audio/nearend_mic.wav' FAREND_SPEECH_URL = 'https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/AEC/sample_audio/farend_speech.wav' @@ -30,14 +31,10 @@ class SpeechSignalProcessTest(unittest.TestCase): def setUp(self) -> None: self.model_id = 'damo/speech_dfsmn_aec_psm_16k' - # switch to False if downloading everytime is not desired - purge_cache = True - if purge_cache: - shutil.rmtree( - get_model_cache_dir(self.model_id), ignore_errors=True) # A temporary hack to provide c++ lib. Download it first. download(AEC_LIB_URL, AEC_LIB_FILE) + @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') def test_run(self): download(NEAREND_MIC_URL, NEAREND_MIC_FILE) download(FAREND_SPEECH_URL, FAREND_SPEECH_FILE) @@ -48,7 +45,7 @@ class SpeechSignalProcessTest(unittest.TestCase): aec = pipeline( Tasks.speech_signal_process, model=self.model_id, - pipeline_name=r'speech_dfsmn_aec_psm_16k') + pipeline_name=Pipelines.speech_dfsmn_aec_psm_16k) aec(input, output_path='output.wav') diff --git a/tests/pipelines/test_text_classification.py b/tests/pipelines/test_text_classification.py index 01fdd29b..9e5f15b9 100644 --- a/tests/pipelines/test_text_classification.py +++ b/tests/pipelines/test_text_classification.py @@ -1,17 +1,12 @@ # Copyright (c) Alibaba, Inc. and its affiliates. import shutil import unittest -import zipfile -from pathlib import Path -from modelscope.fileio import File from modelscope.models import Model -from modelscope.models.nlp import BertForSequenceClassification from modelscope.pipelines import SequenceClassificationPipeline, pipeline from modelscope.preprocessors import SequenceClassificationPreprocessor from modelscope.pydatasets import PyDataset from modelscope.utils.constant import Hubs, Tasks -from modelscope.utils.hub import get_model_cache_dir from modelscope.utils.test_utils import test_level @@ -19,11 +14,6 @@ class SequenceClassificationTest(unittest.TestCase): def setUp(self) -> None: self.model_id = 'damo/bert-base-sst2' - # switch to False if downloading everytime is not desired - purge_cache = True - if purge_cache: - shutil.rmtree( - get_model_cache_dir(self.model_id), ignore_errors=True) def predict(self, pipeline_ins: SequenceClassificationPipeline): from easynlp.appzoo import load_dataset @@ -44,31 +34,6 @@ class SequenceClassificationTest(unittest.TestCase): break print(r) - @unittest.skipUnless(test_level() >= 2, 'skip test in current test level') - def test_run(self): - model_url = 'https://atp-modelzoo-sh.oss-cn-shanghai.aliyuncs.com' \ - '/release/easynlp_modelzoo/alibaba-pai/bert-base-sst2.zip' - cache_path_str = r'.cache/easynlp/bert-base-sst2.zip' - cache_path = Path(cache_path_str) - - if not cache_path.exists(): - cache_path.parent.mkdir(parents=True, exist_ok=True) - cache_path.touch(exist_ok=True) - with cache_path.open('wb') as ofile: - ofile.write(File.read(model_url)) - - with zipfile.ZipFile(cache_path_str, 'r') as zipf: - zipf.extractall(cache_path.parent) - path = r'.cache/easynlp/' - model = BertForSequenceClassification(path) - preprocessor = SequenceClassificationPreprocessor( - path, first_sequence='sentence', second_sequence=None) - pipeline1 = SequenceClassificationPipeline(model, preprocessor) - self.predict(pipeline1) - pipeline2 = pipeline( - Tasks.text_classification, model=model, preprocessor=preprocessor) - print(pipeline2('Hello world!')) - @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') def test_run_with_model_from_modelhub(self): model = Model.from_pretrained(self.model_id) @@ -86,18 +51,26 @@ class SequenceClassificationTest(unittest.TestCase): task=Tasks.text_classification, model=self.model_id) result = text_classification( PyDataset.load( - 'glue', name='sst2', target='sentence', hub=Hubs.huggingface)) + 'glue', + subset_name='sst2', + split='train', + target='sentence', + hub=Hubs.huggingface)) self.printDataset(result) - @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') + @unittest.skipUnless(test_level() >= 2, 'skip test in current test level') def test_run_with_default_model(self): text_classification = pipeline(task=Tasks.text_classification) result = text_classification( PyDataset.load( - 'glue', name='sst2', target='sentence', hub=Hubs.huggingface)) + 'glue', + subset_name='sst2', + split='train', + target='sentence', + hub=Hubs.huggingface)) self.printDataset(result) - @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') + @unittest.skipUnless(test_level() >= 2, 'skip test in current test level') def test_run_with_dataset(self): model = Model.from_pretrained(self.model_id) preprocessor = SequenceClassificationPreprocessor( @@ -105,9 +78,21 @@ class SequenceClassificationTest(unittest.TestCase): text_classification = pipeline( Tasks.text_classification, model=model, preprocessor=preprocessor) # loaded from huggingface dataset - # TODO: rename parameter as dataset_name and subset_name dataset = PyDataset.load( - 'glue', name='sst2', target='sentence', hub=Hubs.huggingface) + 'glue', + subset_name='sst2', + split='train', + target='sentence', + hub=Hubs.huggingface) + result = text_classification(dataset) + self.printDataset(result) + + @unittest.skipUnless(test_level() >= 2, 'skip test in current test level') + def test_run_with_modelscope_dataset(self): + text_classification = pipeline(task=Tasks.text_classification) + # loaded from modelscope dataset + dataset = PyDataset.load( + 'squad', split='train', target='context', hub=Hubs.modelscope) result = text_classification(dataset) self.printDataset(result) diff --git a/tests/pipelines/test_text_generation.py b/tests/pipelines/test_text_generation.py index fbdd165f..9df3b8bb 100644 --- a/tests/pipelines/test_text_generation.py +++ b/tests/pipelines/test_text_generation.py @@ -1,8 +1,7 @@ # Copyright (c) Alibaba, Inc. and its affiliates. import unittest -from maas_hub.snapshot_download import snapshot_download - +from modelscope.hub.snapshot_download import snapshot_download from modelscope.models import Model from modelscope.models.nlp import PalmForTextGeneration from modelscope.pipelines import TextGenerationPipeline, pipeline @@ -69,7 +68,7 @@ class TextGenerationTest(unittest.TestCase): pipeline_ins = pipeline(task=Tasks.text_generation, model=model_id) print(pipeline_ins(input)) - @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') + @unittest.skipUnless(test_level() >= 2, 'skip test in current test level') def test_run_with_default_model(self): pipeline_ins = pipeline(task=Tasks.text_generation) print(pipeline_ins(self.input_zh)) diff --git a/tests/pipelines/test_text_to_speech.py b/tests/pipelines/test_text_to_speech.py index c9b988a1..e92047d6 100644 --- a/tests/pipelines/test_text_to_speech.py +++ b/tests/pipelines/test_text_to_speech.py @@ -1,7 +1,5 @@ -import time import unittest -import json import tensorflow as tf # NOTICE: Tensorflow 1.15 seems not so compatible with pytorch. # A segmentation fault may be raise by pytorch cpp library @@ -10,20 +8,20 @@ import tensorflow as tf import torch from scipy.io.wavfile import write -from modelscope.fileio import File -from modelscope.models import Model, build_model -from modelscope.models.audio.tts.am import SambertNetHifi16k -from modelscope.models.audio.tts.vocoder import AttrDict, Hifigan16k +from modelscope.metainfo import Pipelines, Preprocessors +from modelscope.models import Model from modelscope.pipelines import pipeline from modelscope.preprocessors import build_preprocessor -from modelscope.utils.constant import Fields, InputFields, Tasks +from modelscope.utils.constant import Fields from modelscope.utils.logger import get_logger +from modelscope.utils.test_utils import test_level logger = get_logger() class TextToSpeechSambertHifigan16kPipelineTest(unittest.TestCase): + @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') def test_pipeline(self): lang_type = 'pinyin' text = '明天天气怎么样' @@ -32,7 +30,7 @@ class TextToSpeechSambertHifigan16kPipelineTest(unittest.TestCase): voc_model_id = 'damo/speech_hifigan16k_tts_zhitian_emo' cfg_preprocessor = dict( - type='text_to_tacotron_symbols', + type=Preprocessors.text_to_tacotron_symbols, model_name=preprocessor_model_id, lang_type=lang_type) preprocessor = build_preprocessor(cfg_preprocessor, Fields.audio) @@ -45,7 +43,7 @@ class TextToSpeechSambertHifigan16kPipelineTest(unittest.TestCase): self.assertTrue(voc is not None) sambert_tts = pipeline( - pipeline_name='tts-sambert-hifigan-16k', + pipeline_name=Pipelines.sambert_hifigan_16k_tts, config_file='', model=[am, voc], preprocessor=preprocessor) diff --git a/tests/pipelines/test_word_segmentation.py b/tests/pipelines/test_word_segmentation.py index 4ec2bf29..d33e4bdb 100644 --- a/tests/pipelines/test_word_segmentation.py +++ b/tests/pipelines/test_word_segmentation.py @@ -2,14 +2,12 @@ import shutil import unittest -from maas_hub.snapshot_download import snapshot_download - +from modelscope.hub.snapshot_download import snapshot_download from modelscope.models import Model -from modelscope.models.nlp import StructBertForTokenClassification +from modelscope.models.nlp import SbertForTokenClassification from modelscope.pipelines import WordSegmentationPipeline, pipeline from modelscope.preprocessors import TokenClassifcationPreprocessor from modelscope.utils.constant import Tasks -from modelscope.utils.hub import get_model_cache_dir from modelscope.utils.test_utils import test_level @@ -17,19 +15,11 @@ class WordSegmentationTest(unittest.TestCase): model_id = 'damo/nlp_structbert_word-segmentation_chinese-base' sentence = '今天天气不错,适合出去游玩' - def setUp(self) -> None: - # switch to False if downloading everytime is not desired - purge_cache = True - if purge_cache: - shutil.rmtree( - get_model_cache_dir(self.model_id), ignore_errors=True) - @unittest.skipUnless(test_level() >= 2, 'skip test in current test level') def test_run_by_direct_model_download(self): cache_path = snapshot_download(self.model_id) tokenizer = TokenClassifcationPreprocessor(cache_path) - model = StructBertForTokenClassification( - cache_path, tokenizer=tokenizer) + model = SbertForTokenClassification(cache_path, tokenizer=tokenizer) pipeline1 = WordSegmentationPipeline(model, preprocessor=tokenizer) pipeline2 = pipeline( Tasks.word_segmentation, model=model, preprocessor=tokenizer) @@ -46,13 +36,13 @@ class WordSegmentationTest(unittest.TestCase): task=Tasks.word_segmentation, model=model, preprocessor=tokenizer) print(pipeline_ins(input=self.sentence)) - @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') + @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') def test_run_with_model_name(self): pipeline_ins = pipeline( task=Tasks.word_segmentation, model=self.model_id) print(pipeline_ins(input=self.sentence)) - @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') + @unittest.skipUnless(test_level() >= 2, 'skip test in current test level') def test_run_with_default_model(self): pipeline_ins = pipeline(task=Tasks.word_segmentation) print(pipeline_ins(input=self.sentence)) diff --git a/tests/preprocessors/test_image.py b/tests/preprocessors/test_image.py index 21ae780e..4d66c171 100644 --- a/tests/preprocessors/test_image.py +++ b/tests/preprocessors/test_image.py @@ -5,7 +5,6 @@ import unittest import PIL from modelscope.preprocessors import load_image -from modelscope.utils.logger import get_logger class ImagePreprocessorTest(unittest.TestCase): diff --git a/tests/preprocessors/test_text_to_speech.py b/tests/preprocessors/test_text_to_speech.py index 18b66987..fd2473fd 100644 --- a/tests/preprocessors/test_text_to_speech.py +++ b/tests/preprocessors/test_text_to_speech.py @@ -1,6 +1,7 @@ import shutil import unittest +from modelscope.metainfo import Preprocessors from modelscope.preprocessors import build_preprocessor from modelscope.utils.constant import Fields, InputFields from modelscope.utils.logger import get_logger @@ -14,7 +15,7 @@ class TtsPreprocessorTest(unittest.TestCase): lang_type = 'pinyin' text = '今天天气不错,我们去散步吧。' cfg = dict( - type='text_to_tacotron_symbols', + type=Preprocessors.text_to_tacotron_symbols, model_name='damo/speech_binary_tts_frontend_resource', lang_type=lang_type) preprocessor = build_preprocessor(cfg, Fields.audio) diff --git a/tests/pydatasets/test_py_dataset.py b/tests/pydatasets/test_py_dataset.py index 7accd814..e84f240a 100644 --- a/tests/pydatasets/test_py_dataset.py +++ b/tests/pydatasets/test_py_dataset.py @@ -2,42 +2,112 @@ import unittest import datasets as hfdata +from modelscope.models import Model +from modelscope.preprocessors import SequenceClassificationPreprocessor +from modelscope.preprocessors.base import Preprocessor from modelscope.pydatasets import PyDataset +from modelscope.utils.constant import Hubs +from modelscope.utils.test_utils import require_tf, require_torch, test_level -class PyDatasetTest(unittest.TestCase): +class ImgPreprocessor(Preprocessor): + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.path_field = kwargs.pop('image_path', 'image_path') + self.width = kwargs.pop('width', 'width') + self.height = kwargs.pop('height', 'width') - def setUp(self): - # ds1 initialized from in memory json - self.json_data = { - 'dummy': [{ - 'a': i, - 'x': i * 10, - 'c': i * 100 - } for i in range(1, 11)] + def __call__(self, data): + import cv2 + image_path = data.get(self.path_field) + if not image_path: + return None + img = cv2.imread(image_path) + return { + 'image': + cv2.resize(img, + (data.get(self.height, 128), data.get(self.width, 128))) } - hfds1 = hfdata.Dataset.from_dict(self.json_data) - self.ds1 = PyDataset.from_hf_dataset(hfds1) - # ds2 initialized from hg hub - hfds2 = hfdata.load_dataset( - 'glue', 'mrpc', revision='2.0.0', split='train') - self.ds2 = PyDataset.from_hf_dataset(hfds2) - def tearDown(self): - pass +class PyDatasetTest(unittest.TestCase): + + @unittest.skipUnless(test_level() >= 2, 'skip test in current test level') + def test_ds_basic(self): + ms_ds_full = PyDataset.load('squad') + ms_ds_full_hf = hfdata.load_dataset('squad') + ms_ds_train = PyDataset.load('squad', split='train') + ms_ds_train_hf = hfdata.load_dataset('squad', split='train') + ms_image_train = PyDataset.from_hf_dataset( + hfdata.load_dataset('beans', split='train')) + self.assertEqual(ms_ds_full['train'][0], ms_ds_full_hf['train'][0]) + self.assertEqual(ms_ds_full['validation'][0], + ms_ds_full_hf['validation'][0]) + self.assertEqual(ms_ds_train[0], ms_ds_train_hf[0]) + print(next(iter(ms_ds_full['train']))) + print(next(iter(ms_ds_train))) + print(next(iter(ms_image_train))) + + @unittest.skipUnless(test_level() >= 2, 'skip test in current test level') + @require_torch + def test_to_torch_dataset_text(self): + model_id = 'damo/bert-base-sst2' + nlp_model = Model.from_pretrained(model_id) + preprocessor = SequenceClassificationPreprocessor( + nlp_model.model_dir, + first_sequence='context', + second_sequence=None) + ms_ds_train = PyDataset.load('squad', split='train') + pt_dataset = ms_ds_train.to_torch_dataset(preprocessors=preprocessor) + import torch + dataloader = torch.utils.data.DataLoader(pt_dataset, batch_size=5) + print(next(iter(dataloader))) - def test_to_hf_dataset(self): - hfds = self.ds1.to_hf_dataset() - hfds1 = hfdata.Dataset.from_dict(self.json_data) - self.assertEqual(hfds.data, hfds1.data) + @unittest.skipUnless(test_level() >= 2, 'skip test in current test level') + @require_tf + def test_to_tf_dataset_text(self): + import tensorflow as tf + tf.compat.v1.enable_eager_execution() + model_id = 'damo/bert-base-sst2' + nlp_model = Model.from_pretrained(model_id) + preprocessor = SequenceClassificationPreprocessor( + nlp_model.model_dir, + first_sequence='context', + second_sequence=None) + ms_ds_train = PyDataset.load('squad', split='train') + tf_dataset = ms_ds_train.to_tf_dataset( + batch_size=5, + shuffle=True, + preprocessors=preprocessor, + drop_remainder=True) + print(next(iter(tf_dataset))) - # simple map function - hfds = hfds.map(lambda e: {'new_feature': e['dummy']['a']}) - self.assertEqual(len(hfds['new_feature']), 10) + @unittest.skipUnless(test_level() >= 2, 'skip test in current test level') + @require_torch + def test_to_torch_dataset_img(self): + ms_image_train = PyDataset.from_hf_dataset( + hfdata.load_dataset('beans', split='train')) + pt_dataset = ms_image_train.to_torch_dataset( + preprocessors=ImgPreprocessor( + image_path='image_file_path', label='labels')) + import torch + dataloader = torch.utils.data.DataLoader(pt_dataset, batch_size=5) + print(next(iter(dataloader))) - hfds2 = self.ds2.to_hf_dataset() - self.assertTrue(hfds2[0]['sentence1'].startswith('Amrozi')) + @unittest.skipUnless(test_level() >= 2, 'skip test in current test level') + @require_tf + def test_to_tf_dataset_img(self): + import tensorflow as tf + tf.compat.v1.enable_eager_execution() + ms_image_train = PyDataset.load('beans', split='train') + tf_dataset = ms_image_train.to_tf_dataset( + batch_size=5, + shuffle=True, + preprocessors=ImgPreprocessor(image_path='image_file_path'), + drop_remainder=True, + label_cols='labels') + print(next(iter(tf_dataset))) if __name__ == '__main__': diff --git a/tests/run.py b/tests/run.py index a904ba8e..38c5a897 100644 --- a/tests/run.py +++ b/tests/run.py @@ -61,7 +61,7 @@ if __name__ == '__main__': parser.add_argument( '--test_dir', default='tests', help='directory to be tested') parser.add_argument( - '--level', default=0, help='2 -- all, 1 -- p1, 0 -- p0') + '--level', default=0, type=int, help='2 -- all, 1 -- p1, 0 -- p0') args = parser.parse_args() set_test_level(args.level) logger.info(f'TEST LEVEL: {test_level()}') diff --git a/tests/utils/test_hub_operation.py b/tests/utils/test_hub_operation.py deleted file mode 100644 index f432a60c..00000000 --- a/tests/utils/test_hub_operation.py +++ /dev/null @@ -1,50 +0,0 @@ -# Copyright (c) Alibaba, Inc. and its affiliates. -import os.path as osp -import unittest - -from maas_hub.maas_api import MaasApi -from maas_hub.repository import Repository - -USER_NAME = 'maasadmin' -PASSWORD = '12345678' - - -class HubOperationTest(unittest.TestCase): - - def setUp(self): - self.api = MaasApi() - # note this is temporary before official account management is ready - self.api.login(USER_NAME, PASSWORD) - - @unittest.skip('to be used for local test only') - def test_model_repo_creation(self): - # change to proper model names before use - model_name = 'cv_unet_person-image-cartoon_compound-models' - model_chinese_name = '达摩卡通化模型' - model_org = 'damo' - try: - self.api.create_model( - owner=model_org, - name=model_name, - chinese_name=model_chinese_name, - visibility=5, # 1-private, 5-public - license='apache-2.0') - # TODO: support proper name duplication checking - except KeyError as ke: - if ke.args[0] == 'name': - print(f'model {self.model_name} already exists, ignore') - else: - raise - - # Note that this can be done via git operation once model repo - # has been created. Git-Op is the RECOMMENDED model upload approach - @unittest.skip('to be used for local test only') - def test_model_upload(self): - local_path = '/path/to/local/model/directory' - assert osp.exists(local_path), 'Local model directory not exist.' - repo = Repository(local_dir=local_path) - repo.push_to_hub(commit_message='Upload model files') - - -if __name__ == '__main__': - unittest.main()