From 6991620f59c20ac386a815eb6d842adde3cedd07 Mon Sep 17 00:00:00 2001 From: "mulin.lyh" Date: Fri, 24 Jun 2022 16:43:32 +0800 Subject: [PATCH 1/5] [to #42698276]fix: git repo operations supports, gitlab token certification support. --- modelscope/hub/api.py | 38 ++-- modelscope/hub/errors.py | 4 + modelscope/hub/git.py | 225 +++++++++++++++-------- modelscope/hub/repository.py | 216 +++++++--------------- modelscope/hub/utils/_subprocess.py | 40 ---- tests/hub/test_hub_operation.py | 94 ++-------- tests/hub/test_hub_private_repository.py | 76 ++++++++ tests/hub/test_hub_repository.py | 107 +++++++++++ 8 files changed, 444 insertions(+), 356 deletions(-) delete mode 100644 modelscope/hub/utils/_subprocess.py create mode 100644 tests/hub/test_hub_private_repository.py create mode 100644 tests/hub/test_hub_repository.py diff --git a/modelscope/hub/api.py b/modelscope/hub/api.py index 104eafbd..f4f31280 100644 --- a/modelscope/hub/api.py +++ b/modelscope/hub/api.py @@ -9,9 +9,10 @@ from typing import List, Optional, Tuple, Union import requests from modelscope.utils.logger import get_logger -from .constants import LOGGER_NAME +from .constants import MODELSCOPE_URL_SCHEME from .errors import NotExistError, is_ok, raise_on_error -from .utils.utils import get_endpoint, model_id_to_group_owner_name +from .utils.utils import (get_endpoint, get_gitlab_domain, + model_id_to_group_owner_name) logger = get_logger() @@ -40,9 +41,6 @@ class HubApi: You only have to login once within 30 days. - - TODO: handle cookies expire - """ path = f'{self.endpoint}/api/v1/login' r = requests.post( @@ -94,14 +92,14 @@ class HubApi: 'Path': owner_or_group, 'Name': name, 'ChineseName': chinese_name, - 'Visibility': visibility, + 'Visibility': visibility, # server check 'License': license }, cookies=cookies) r.raise_for_status() raise_on_error(r.json()) - d = r.json() - return d['Data']['Name'] + model_repo_url = f'{MODELSCOPE_URL_SCHEME}{get_gitlab_domain()}/{model_id}' + return model_repo_url def delete_model(self, model_id): """_summary_ @@ -209,25 +207,37 @@ class HubApi: class ModelScopeConfig: path_credential = expanduser('~/.modelscope/credentials') - os.makedirs(path_credential, exist_ok=True) + + @classmethod + def make_sure_credential_path_exist(cls): + os.makedirs(cls.path_credential, exist_ok=True) @classmethod def save_cookies(cls, cookies: CookieJar): + cls.make_sure_credential_path_exist() with open(os.path.join(cls.path_credential, 'cookies'), 'wb+') as f: pickle.dump(cookies, f) @classmethod def get_cookies(cls): try: - with open(os.path.join(cls.path_credential, 'cookies'), 'rb') as f: - return pickle.load(f) + cookies_path = os.path.join(cls.path_credential, 'cookies') + with open(cookies_path, 'rb') as f: + cookies = pickle.load(f) + for cookie in cookies: + if cookie.is_expired(): + logger.warn('Auth is expored, please re-login') + return None + return cookies except FileNotFoundError: - logger.warn("Auth token does not exist, you'll get authentication \ - error when downloading private model files. Please login first" - ) + logger.warn( + "Auth token does not exist, you'll get authentication error when downloading \ + private model files. Please login first") + return None @classmethod def save_token(cls, token: str): + cls.make_sure_credential_path_exist() with open(os.path.join(cls.path_credential, 'token'), 'w+') as f: f.write(token) diff --git a/modelscope/hub/errors.py b/modelscope/hub/errors.py index 13ea709f..4b39d6e3 100644 --- a/modelscope/hub/errors.py +++ b/modelscope/hub/errors.py @@ -6,6 +6,10 @@ class RequestError(Exception): pass +class GitError(Exception): + pass + + def is_ok(rsp): """ Check the request is ok diff --git a/modelscope/hub/git.py b/modelscope/hub/git.py index 5f079105..37f61814 100644 --- a/modelscope/hub/git.py +++ b/modelscope/hub/git.py @@ -1,82 +1,161 @@ -from threading import local -from tkinter.messagebox import NO -from typing import Union +import subprocess +from typing import List +from xmlrpc.client import Boolean from modelscope.utils.logger import get_logger -from .constants import LOGGER_NAME -from .utils._subprocess import run_subprocess +from .errors import GitError -logger = get_logger +logger = get_logger() -def git_clone( - local_dir: str, - repo_url: str, -): - # TODO: use "git clone" or "git lfs clone" according to git version - # TODO: print stderr when subprocess fails - run_subprocess( - f'git clone {repo_url}'.split(), - local_dir, - True, - ) +class Singleton(type): + _instances = {} + def __call__(cls, *args, **kwargs): + if cls not in cls._instances: + cls._instances[cls] = super(Singleton, + cls).__call__(*args, **kwargs) + return cls._instances[cls] -def git_checkout( - local_dir: str, - revsion: str, -): - run_subprocess(f'git checkout {revsion}'.split(), local_dir) - -def git_add(local_dir: str, ): - run_subprocess( - 'git add .'.split(), - local_dir, - True, - ) - - -def git_commit(local_dir: str, commit_message: str): - run_subprocess( - 'git commit -v -m'.split() + [commit_message], - local_dir, - True, - ) - - -def git_push(local_dir: str, branch: str): - # check current branch - cur_branch = git_current_branch(local_dir) - if cur_branch != branch: - logger.error( - "You're trying to push to a different branch, please double check") - return - - run_subprocess( - f'git push origin {branch}'.split(), - local_dir, - True, - ) - - -def git_current_branch(local_dir: str) -> Union[str, None]: - """ - Get current branch name - - Args: - local_dir(`str`): local model repo directory - - Returns - branch name you're currently on +class GitCommandWrapper(metaclass=Singleton): + """Some git operation wrapper """ - try: - process = run_subprocess( - 'git rev-parse --abbrev-ref HEAD'.split(), - local_dir, - True, - ) - - return str(process.stdout).strip() - except Exception as e: - raise e + default_git_path = 'git' # The default git command line + + def __init__(self, path: str = None): + self.git_path = path or self.default_git_path + + def _run_git_command(self, *args) -> subprocess.CompletedProcess: + """Run git command, if command return 0, return subprocess.response + otherwise raise GitError, message is stdout and stderr. + + Raises: + GitError: Exception with stdout and stderr. + + Returns: + subprocess.CompletedProcess: the command response + """ + logger.info(' '.join(args)) + response = subprocess.run( + [self.git_path, *args], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE) # compatible for python3.6 + try: + response.check_returncode() + return response + except subprocess.CalledProcessError as error: + raise GitError( + 'stdout: %s, stderr: %s' % + (response.stdout.decode('utf8'), error.stderr.decode('utf8'))) + + def _add_token(self, token: str, url: str): + if token: + if '//oauth2' not in url: + url = url.replace('//', '//oauth2:%s@' % token) + return url + + def remove_token_from_url(self, url: str): + if url and '//oauth2' in url: + start_index = url.find('oauth2') + end_index = url.find('@') + url = url[:start_index] + url[end_index + 1:] + return url + + def is_lfs_installed(self): + cmd = ['lfs', 'env'] + try: + self._run_git_command(*cmd) + return True + except GitError: + return False + + def clone(self, + repo_base_dir: str, + token: str, + url: str, + repo_name: str, + branch: str = None): + """ git clone command wrapper. + For public project, token can None, private repo, there must token. + + Args: + repo_base_dir (str): The local base dir, the repository will be clone to local_dir/repo_name + token (str): The git token, must be provided for private project. + url (str): The remote url + repo_name (str): The local repository path name. + branch (str, optional): _description_. Defaults to None. + """ + url = self._add_token(token, url) + if branch: + clone_args = '-C %s clone %s %s --branch %s' % (repo_base_dir, url, + repo_name, branch) + else: + clone_args = '-C %s clone %s' % (repo_base_dir, url) + logger.debug(clone_args) + clone_args = clone_args.split(' ') + response = self._run_git_command(*clone_args) + logger.info(response.stdout.decode('utf8')) + return response + + def add(self, + repo_dir: str, + files: List[str] = list(), + all_files: bool = False): + if all_files: + add_args = '-C %s add -A' % repo_dir + elif len(files) > 0: + files_str = ' '.join(files) + add_args = '-C %s add %s' % (repo_dir, files_str) + add_args = add_args.split(' ') + rsp = self._run_git_command(*add_args) + logger.info(rsp.stdout.decode('utf8')) + return rsp + + def commit(self, repo_dir: str, message: str): + """Run git commit command + + Args: + message (str): commit message. + """ + commit_args = ['-C', '%s' % repo_dir, 'commit', '-m', "'%s'" % message] + rsp = self._run_git_command(*commit_args) + logger.info(rsp.stdout.decode('utf8')) + return rsp + + def checkout(self, repo_dir: str, revision: str): + cmds = ['-C', '%s' % repo_dir, 'checkout', '%s' % revision] + return self._run_git_command(*cmds) + + def new_branch(self, repo_dir: str, revision: str): + cmds = ['-C', '%s' % repo_dir, 'checkout', '-b', revision] + return self._run_git_command(*cmds) + + def pull(self, repo_dir: str): + cmds = ['-C', repo_dir, 'pull'] + return self._run_git_command(*cmds) + + def push(self, + repo_dir: str, + token: str, + url: str, + local_branch: str, + remote_branch: str, + force: bool = False): + url = self._add_token(token, url) + + push_args = '-C %s push %s %s:%s' % (repo_dir, url, local_branch, + remote_branch) + if force: + push_args += ' -f' + push_args = push_args.split(' ') + rsp = self._run_git_command(*push_args) + logger.info(rsp.stdout.decode('utf8')) + return rsp + + def get_repo_remote_url(self, repo_dir: str): + cmd_args = '-C %s config --get remote.origin.url' % repo_dir + cmd_args = cmd_args.split(' ') + rsp = self._run_git_command(*cmd_args) + url = rsp.stdout.decode('utf8') + return url.strip() diff --git a/modelscope/hub/repository.py b/modelscope/hub/repository.py index 6367f903..d9322144 100644 --- a/modelscope/hub/repository.py +++ b/modelscope/hub/repository.py @@ -1,173 +1,97 @@ import os -import subprocess -from pathlib import Path -from typing import Optional, Union +from typing import List, Optional +from modelscope.hub.errors import GitError from modelscope.utils.logger import get_logger from .api import ModelScopeConfig from .constants import MODELSCOPE_URL_SCHEME -from .git import git_add, git_checkout, git_clone, git_commit, git_push -from .utils._subprocess import run_subprocess +from .git import GitCommandWrapper from .utils.utils import get_gitlab_domain logger = get_logger() class Repository: + """Representation local model git repository. + """ def __init__( self, - local_dir: str, - clone_from: Optional[str] = None, - auth_token: Optional[str] = None, - private: Optional[bool] = False, + model_dir: str, + clone_from: str, revision: Optional[str] = 'master', + auth_token: Optional[str] = None, + git_path: Optional[str] = None, ): """ Instantiate a Repository object by cloning the remote ModelScopeHub repo Args: - local_dir(`str`): - local directory to store the model files - clone_from(`Optional[str] = None`): + model_dir(`str`): + The model root directory. + clone_from: model id in ModelScope-hub from which git clone - You should ignore this parameter when `local_dir` is already a git repo - auth_token(`Optional[str]`): - token obtained when calling `HubApi.login()`. Usually you can safely ignore the parameter - as the token is already saved when you login the first time - private(`Optional[bool]`): - whether the model is private, default to False revision(`Optional[str]`): revision of the model you want to clone from. Can be any of a branch, tag or commit hash + auth_token(`Optional[str]`): + token obtained when calling `HubApi.login()`. Usually you can safely ignore the parameter + as the token is already saved when you login the first time, if None, we will use saved token. + git_path:(`Optional[str]`): + The git command line path, if None, we use 'git' """ - logger.info('Instantiating Repository object...') - - # Create local directory if not exist - os.makedirs(local_dir, exist_ok=True) - self.local_dir = os.path.join(os.getcwd(), local_dir) - - self.private = private - - # Check git and git-lfs installation - self.check_git_versions() - - # Retrieve auth token - if not private and isinstance(auth_token, str): - logger.warning( - 'cloning a public repo with a token, which will be ignored') - self.token = None + self.model_dir = model_dir + self.model_base_dir = os.path.dirname(model_dir) + self.model_repo_name = os.path.basename(model_dir) + if auth_token: + self.auth_token = auth_token else: - if isinstance(auth_token, str): - self.token = auth_token - else: - self.token = ModelScopeConfig.get_token() - - if self.token is None: - raise EnvironmentError( - 'Token does not exist, the clone will fail for private repo.' - 'Please login first.') - - # git clone - if clone_from is not None: - self.model_id = clone_from - logger.info('cloning model repo to %s ...', self.local_dir) - git_clone(self.local_dir, self.get_repo_url()) - else: - if is_git_repo(self.local_dir): - logger.debug('[Repository] is a valid git repo') - else: - raise ValueError( - 'If not specifying `clone_from`, you need to pass Repository a' - ' valid git clone.') - - # git checkout - if isinstance(revision, str) and revision != 'master': - git_checkout(revision) - - def push_to_hub(self, - commit_message: str, - revision: Optional[str] = 'master'): - """ - Push changes changes to hub - - Args: - commit_message(`str`): - commit message describing the changes, it's mandatory - revision(`Optional[str]`): - remote branch you want to push to, default to `master` - - - The function complains when local and remote branch are different, please be careful - - - """ - git_add(self.local_dir) - git_commit(self.local_dir, commit_message) - - logger.info('Pushing changes to repo...') - git_push(self.local_dir, revision) - - # TODO: if git push fails, how to retry? - - def check_git_versions(self): - """ - Checks that `git` and `git-lfs` can be run. - - Raises: - `EnvironmentError`: if `git` or `git-lfs` are not installed. - """ - try: - git_version = run_subprocess('git --version'.split(), - self.local_dir).stdout.strip() - except FileNotFoundError: - raise EnvironmentError( - 'Looks like you do not have git installed, please install.') + self.auth_token = ModelScopeConfig.get_token() + + git_wrapper = GitCommandWrapper() + if not git_wrapper.is_lfs_installed(): + logger.error('git lfs is not installed, please install.') + + self.git_wrapper = GitCommandWrapper(git_path) + os.makedirs(self.model_dir, exist_ok=True) + url = self._get_model_id_url(clone_from) + if os.listdir(self.model_dir): # directory not empty. + remote_url = self._get_remote_url() + remote_url = self.git_wrapper.remove_token_from_url(remote_url) + if remote_url and remote_url == url: # need not clone again + return + self.git_wrapper.clone(self.model_base_dir, self.auth_token, url, + self.model_repo_name, revision) + + def _get_model_id_url(self, model_id): + url = f'{MODELSCOPE_URL_SCHEME}{get_gitlab_domain()}/{model_id}' + return url + def _get_remote_url(self): try: - lfs_version = run_subprocess('git-lfs --version'.split(), - self.local_dir).stdout.strip() - except FileNotFoundError: - raise EnvironmentError( - 'Looks like you do not have git-lfs installed, please install.' - ' You can install from https://git-lfs.github.com/.' - ' Then run `git lfs install` (you only have to do this once).') - logger.info(git_version + '\n' + lfs_version) - - def get_repo_url(self) -> str: - """ - Get repo url to clone, according whether the repo is private or not + remote = self.git_wrapper.get_repo_remote_url(self.model_dir) + except GitError: + remote = None + return remote + + def push(self, + commit_message: str, + files: List[str] = list(), + all_files: bool = False, + branch: Optional[str] = 'master', + force: bool = False): + """Push local to remote, this method will do. + git add + git commit + git push + Args: + commit_message (str): commit message + revision (Optional[str], optional): which branch to push. Defaults to 'master'. """ - url = None - - if self.private: - url = f'{MODELSCOPE_URL_SCHEME}oauth2:{self.token}@{get_gitlab_domain()}/{self.model_id}' - else: - url = f'{MODELSCOPE_URL_SCHEME}{get_gitlab_domain()}/{self.model_id}' - - if not url: - raise ValueError( - 'Empty repo url, please check clone_from parameter') - - logger.debug('url to clone: %s', str(url)) - - return url - - -def is_git_repo(folder: Union[str, Path]) -> bool: - """ - Check if the folder is the root or part of a git repository - - Args: - folder (`str`): - The folder in which to run the command. - - Returns: - `bool`: `True` if the repository is part of a repository, `False` - otherwise. - """ - folder_exists = os.path.exists(os.path.join(folder, '.git')) - git_branch = subprocess.run( - 'git branch'.split(), - cwd=folder, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE) - return folder_exists and git_branch.returncode == 0 + url = self.git_wrapper.get_repo_remote_url(self.model_dir) + self.git_wrapper.add(self.model_dir, files, all_files) + self.git_wrapper.commit(self.model_dir, commit_message) + self.git_wrapper.push( + repo_dir=self.model_dir, + token=self.auth_token, + url=url, + local_branch=branch, + remote_branch=branch) diff --git a/modelscope/hub/utils/_subprocess.py b/modelscope/hub/utils/_subprocess.py deleted file mode 100644 index 77e9fc48..00000000 --- a/modelscope/hub/utils/_subprocess.py +++ /dev/null @@ -1,40 +0,0 @@ -import subprocess -from typing import List - - -def run_subprocess(command: List[str], - folder: str, - check=True, - **kwargs) -> subprocess.CompletedProcess: - """ - Method to run subprocesses. Calling this will capture the `stderr` and `stdout`, - please call `subprocess.run` manually in case you would like for them not to - be captured. - - Args: - command (`List[str]`): - The command to execute as a list of strings. - folder (`str`): - The folder in which to run the command. - check (`bool`, *optional*, defaults to `True`): - Setting `check` to `True` will raise a `subprocess.CalledProcessError` - when the subprocess has a non-zero exit code. - kwargs (`Dict[str]`): - Keyword arguments to be passed to the `subprocess.run` underlying command. - - Returns: - `subprocess.CompletedProcess`: The completed process. - """ - if isinstance(command, str): - raise ValueError( - '`run_subprocess` should be called with a list of strings.') - - return subprocess.run( - command, - stderr=subprocess.PIPE, - stdout=subprocess.PIPE, - check=check, - encoding='utf-8', - cwd=folder, - **kwargs, - ) diff --git a/tests/hub/test_hub_operation.py b/tests/hub/test_hub_operation.py index d44cd7c1..e0adc013 100644 --- a/tests/hub/test_hub_operation.py +++ b/tests/hub/test_hub_operation.py @@ -1,14 +1,13 @@ # Copyright (c) Alibaba, Inc. and its affiliates. import os -import subprocess import tempfile import unittest import uuid -from modelscope.hub.api import HubApi, ModelScopeConfig +from modelscope.hub.api import HubApi from modelscope.hub.file_download import model_file_download +from modelscope.hub.repository import Repository from modelscope.hub.snapshot_download import snapshot_download -from modelscope.hub.utils.utils import get_gitlab_domain USER_NAME = 'maasadmin' PASSWORD = '12345678' @@ -17,40 +16,7 @@ model_chinese_name = '达摩卡通化模型' model_org = 'unittest' DEFAULT_GIT_PATH = 'git' - -class GitError(Exception): - pass - - -# TODO make thest git operation to git library after merge code. -def run_git_command(git_path, *args) -> subprocess.CompletedProcess: - response = subprocess.run([git_path, *args], capture_output=True) - try: - response.check_returncode() - return response.stdout.decode('utf8') - except subprocess.CalledProcessError as error: - raise GitError(error.stderr.decode('utf8')) - - -# for public project, token can None, private repo, there must token. -def clone(local_dir: str, token: str, url: str): - url = url.replace('//', '//oauth2:%s@' % token) - clone_args = '-C %s clone %s' % (local_dir, url) - clone_args = clone_args.split(' ') - stdout = run_git_command(DEFAULT_GIT_PATH, *clone_args) - print('stdout: %s' % stdout) - - -def push(local_dir: str, token: str, url: str): - url = url.replace('//', '//oauth2:%s@' % token) - push_args = '-C %s push %s' % (local_dir, url) - push_args = push_args.split(' ') - stdout = run_git_command(DEFAULT_GIT_PATH, *push_args) - print('stdout: %s' % stdout) - - -sample_model_url = 'https://mindscope.oss-cn-hangzhou.aliyuncs.com/test_models/mnist-12.onnx' -download_model_file_name = 'mnist-12.onnx' +download_model_file_name = 'test.bin' class HubOperationTest(unittest.TestCase): @@ -67,6 +33,13 @@ class HubOperationTest(unittest.TestCase): chinese_name=model_chinese_name, visibility=5, # 1-private, 5-public license='apache-2.0') + temporary_dir = tempfile.mkdtemp() + self.model_dir = os.path.join(temporary_dir, self.model_name) + repo = Repository(self.model_dir, clone_from=self.model_id) + os.chdir(self.model_dir) + os.system("echo 'testtest'>%s" + % os.path.join(self.model_dir, 'test.bin')) + repo.push('add model', all_files=True) def tearDown(self): os.chdir(self.old_cwd) @@ -83,43 +56,10 @@ class HubOperationTest(unittest.TestCase): else: raise - # Note that this can be done via git operation once model repo - # has been created. Git-Op is the RECOMMENDED model upload approach - def test_model_upload(self): - url = f'http://{get_gitlab_domain()}/{self.model_id}' - print(url) - temporary_dir = tempfile.mkdtemp() - os.chdir(temporary_dir) - cmd_args = 'clone %s' % url - cmd_args = cmd_args.split(' ') - out = run_git_command('git', *cmd_args) - print(out) - repo_dir = os.path.join(temporary_dir, self.model_name) - os.chdir(repo_dir) - os.system('touch file1') - os.system('git add file1') - os.system("git commit -m 'Test'") - token = ModelScopeConfig.get_token() - push(repo_dir, token, url) - def test_download_single_file(self): - url = f'http://{get_gitlab_domain()}/{self.model_id}' - print(url) - temporary_dir = tempfile.mkdtemp() - os.chdir(temporary_dir) - os.system('git clone %s' % url) - repo_dir = os.path.join(temporary_dir, self.model_name) - os.chdir(repo_dir) - os.system('wget %s' % sample_model_url) - os.system('git add .') - os.system("git commit -m 'Add file'") - token = ModelScopeConfig.get_token() - push(repo_dir, token, url) - assert os.path.exists( - os.path.join(temporary_dir, self.model_name, - download_model_file_name)) downloaded_file = model_file_download( model_id=self.model_id, file_path=download_model_file_name) + assert os.path.exists(downloaded_file) mdtime1 = os.path.getmtime(downloaded_file) # download again downloaded_file = model_file_download( @@ -128,18 +68,6 @@ class HubOperationTest(unittest.TestCase): assert mdtime1 == mdtime2 def test_snapshot_download(self): - url = f'http://{get_gitlab_domain()}/{self.model_id}' - print(url) - temporary_dir = tempfile.mkdtemp() - os.chdir(temporary_dir) - os.system('git clone %s' % url) - repo_dir = os.path.join(temporary_dir, self.model_name) - os.chdir(repo_dir) - os.system('wget %s' % sample_model_url) - os.system('git add .') - os.system("git commit -m 'Add file'") - token = ModelScopeConfig.get_token() - push(repo_dir, token, url) snapshot_path = snapshot_download(model_id=self.model_id) downloaded_file_path = os.path.join(snapshot_path, download_model_file_name) diff --git a/tests/hub/test_hub_private_repository.py b/tests/hub/test_hub_private_repository.py new file mode 100644 index 00000000..b6e3536c --- /dev/null +++ b/tests/hub/test_hub_private_repository.py @@ -0,0 +1,76 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. +import os +import tempfile +import unittest +import uuid + +from modelscope.hub.api import HubApi +from modelscope.hub.errors import GitError +from modelscope.hub.repository import Repository + +USER_NAME = 'maasadmin' +PASSWORD = '12345678' + +USER_NAME2 = 'sdkdev' +model_chinese_name = '达摩卡通化模型' +model_org = 'unittest' +DEFAULT_GIT_PATH = 'git' + +sample_model_url = 'https://mindscope.oss-cn-hangzhou.aliyuncs.com/test_models/mnist-12.onnx' +download_model_file_name = 'mnist-12.onnx' + + +class HubPrivateRepositoryTest(unittest.TestCase): + + def setUp(self): + self.old_cwd = os.getcwd() + self.api = HubApi() + # note this is temporary before official account management is ready + self.token, _ = self.api.login(USER_NAME, PASSWORD) + self.model_name = uuid.uuid4().hex + self.model_id = '%s/%s' % (model_org, self.model_name) + self.api.create_model( + model_id=self.model_id, + chinese_name=model_chinese_name, + visibility=1, # 1-private, 5-public + license='apache-2.0') + + def tearDown(self): + self.api.login(USER_NAME, PASSWORD) + os.chdir(self.old_cwd) + self.api.delete_model(model_id=self.model_id) + + def test_clone_private_repo_no_permission(self): + token, _ = self.api.login(USER_NAME2, PASSWORD) + temporary_dir = tempfile.mkdtemp() + local_dir = os.path.join(temporary_dir, self.model_name) + with self.assertRaises(GitError) as cm: + Repository(local_dir, clone_from=self.model_id, auth_token=token) + + print(cm.exception) + assert not os.path.exists(os.path.join(local_dir, 'README.md')) + + def test_clone_private_repo_has_permission(self): + temporary_dir = tempfile.mkdtemp() + local_dir = os.path.join(temporary_dir, self.model_name) + repo1 = Repository( + local_dir, clone_from=self.model_id, auth_token=self.token) + print(repo1.model_dir) + assert os.path.exists(os.path.join(local_dir, 'README.md')) + + def test_initlize_repo_multiple_times(self): + temporary_dir = tempfile.mkdtemp() + local_dir = os.path.join(temporary_dir, self.model_name) + repo1 = Repository( + local_dir, clone_from=self.model_id, auth_token=self.token) + print(repo1.model_dir) + assert os.path.exists(os.path.join(local_dir, 'README.md')) + repo2 = Repository( + local_dir, clone_from=self.model_id, + auth_token=self.token) # skip clone + print(repo2.model_dir) + assert repo1.model_dir == repo2.model_dir + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/hub/test_hub_repository.py b/tests/hub/test_hub_repository.py new file mode 100644 index 00000000..7b1cc751 --- /dev/null +++ b/tests/hub/test_hub_repository.py @@ -0,0 +1,107 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. +import os +import shutil +import tempfile +import time +import unittest +import uuid +from os.path import expanduser + +from requests import delete + +from modelscope.hub.api import HubApi +from modelscope.hub.errors import NotExistError +from modelscope.hub.file_download import model_file_download +from modelscope.hub.repository import Repository +from modelscope.utils.logger import get_logger + +logger = get_logger() +logger.setLevel('DEBUG') +USER_NAME = 'maasadmin' +PASSWORD = '12345678' + +model_chinese_name = '达摩卡通化模型' +model_org = 'unittest' +DEFAULT_GIT_PATH = 'git' + +download_model_file_name = 'mnist-12.onnx' + + +def delete_credential(): + path_credential = expanduser('~/.modelscope/credentials') + shutil.rmtree(path_credential) + + +def delete_stored_git_credential(user): + credential_path = expanduser('~/.git-credentials') + if os.path.exists(credential_path): + with open(credential_path, 'r+') as f: + lines = f.readlines() + for line in lines: + if user in line: + lines.remove(line) + f.seek(0) + f.write(''.join(lines)) + f.truncate() + + +class HubRepositoryTest(unittest.TestCase): + + def setUp(self): + self.api = HubApi() + # note this is temporary before official account management is ready + self.api.login(USER_NAME, PASSWORD) + self.model_name = uuid.uuid4().hex + self.model_id = '%s/%s' % (model_org, self.model_name) + self.api.create_model( + model_id=self.model_id, + chinese_name=model_chinese_name, + visibility=5, # 1-private, 5-public + license='apache-2.0') + temporary_dir = tempfile.mkdtemp() + self.model_dir = os.path.join(temporary_dir, self.model_name) + + def tearDown(self): + self.api.delete_model(model_id=self.model_id) + + def test_clone_repo(self): + Repository(self.model_dir, clone_from=self.model_id) + assert os.path.exists(os.path.join(self.model_dir, 'README.md')) + + def test_clone_public_model_without_token(self): + delete_credential() + delete_stored_git_credential(USER_NAME) + Repository(self.model_dir, clone_from=self.model_id) + assert os.path.exists(os.path.join(self.model_dir, 'README.md')) + self.api.login(USER_NAME, PASSWORD) # re-login for delete + + def test_push_all(self): + repo = Repository(self.model_dir, clone_from=self.model_id) + assert os.path.exists(os.path.join(self.model_dir, 'README.md')) + os.chdir(self.model_dir) + os.system("echo '111'>%s" % os.path.join(self.model_dir, 'add1.py')) + os.system("echo '222'>%s" % os.path.join(self.model_dir, 'add2.py')) + repo.push('test', all_files=True) + add1 = model_file_download(self.model_id, 'add1.py') + assert os.path.exists(add1) + add2 = model_file_download(self.model_id, 'add2.py') + assert os.path.exists(add2) + + def test_push_files(self): + repo = Repository(self.model_dir, clone_from=self.model_id) + assert os.path.exists(os.path.join(self.model_dir, 'README.md')) + os.system("echo '111'>%s" % os.path.join(self.model_dir, 'add1.py')) + os.system("echo '222'>%s" % os.path.join(self.model_dir, 'add2.py')) + os.system("echo '333'>%s" % os.path.join(self.model_dir, 'add3.py')) + repo.push('test', files=['add1.py', 'add2.py'], all_files=False) + add1 = model_file_download(self.model_id, 'add1.py') + assert os.path.exists(add1) + add2 = model_file_download(self.model_id, 'add2.py') + assert os.path.exists(add2) + with self.assertRaises(NotExistError) as cm: + model_file_download(self.model_id, 'add3.py') + print(cm.exception) + + +if __name__ == '__main__': + unittest.main() From 0acbfe166314749e34f84402184d1827880ae008 Mon Sep 17 00:00:00 2001 From: "yingda.chen" Date: Fri, 24 Jun 2022 23:54:10 +0800 Subject: [PATCH 2/5] [to #42322933] interface refine with doc Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/9159678 --- modelscope/hub/api.py | 1 - modelscope/hub/constants.py | 13 +++++++++++++ modelscope/models/base.py | 13 +++++++++---- modelscope/utils/hub.py | 5 +++-- tests/hub/test_hub_examples.py | 8 +++----- tests/hub/test_hub_operation.py | 7 ++++--- 6 files changed, 32 insertions(+), 15 deletions(-) diff --git a/modelscope/hub/api.py b/modelscope/hub/api.py index f4f31280..d102219b 100644 --- a/modelscope/hub/api.py +++ b/modelscope/hub/api.py @@ -1,4 +1,3 @@ -import imp import os import pickle import subprocess diff --git a/modelscope/hub/constants.py b/modelscope/hub/constants.py index a38f9afb..08f7c31d 100644 --- a/modelscope/hub/constants.py +++ b/modelscope/hub/constants.py @@ -6,3 +6,16 @@ DEFAULT_MODELSCOPE_GROUP = 'damo' MODEL_ID_SEPARATOR = '/' LOGGER_NAME = 'ModelScopeHub' + + +class Licenses(object): + APACHE_V2 = 'Apache License 2.0' + GPL = 'GPL' + LGPL = 'LGPL' + MIT = 'MIT' + + +class ModelVisibility(object): + PRIVATE = 1 + INTERNAL = 3 + PUBLIC = 5 diff --git a/modelscope/models/base.py b/modelscope/models/base.py index cb6d2b0e..40929a21 100644 --- a/modelscope/models/base.py +++ b/modelscope/models/base.py @@ -2,7 +2,7 @@ import os.path as osp from abc import ABC, abstractmethod -from typing import Dict, Union +from typing import Dict, Optional, Union from modelscope.hub.snapshot_download import snapshot_download from modelscope.models.builder import build_model @@ -42,13 +42,18 @@ class Model(ABC): return input @classmethod - def from_pretrained(cls, model_name_or_path: str, *model_args, **kwargs): - """ Instantiate a model from local directory or remote model repo + def from_pretrained(cls, + model_name_or_path: str, + revision: Optional[str] = 'master', + *model_args, + **kwargs): + """ Instantiate a model from local directory or remote model repo. Note + that when loading from remote, the model revision can be specified. """ if osp.exists(model_name_or_path): local_model_dir = model_name_or_path else: - local_model_dir = snapshot_download(model_name_or_path) + local_model_dir = snapshot_download(model_name_or_path, revision) logger.info(f'initialize model from {local_model_dir}') cfg = Config.from_file( osp.join(local_model_dir, ModelFile.CONFIGURATION)) diff --git a/modelscope/utils/hub.py b/modelscope/utils/hub.py index 868e751b..c427b7a3 100644 --- a/modelscope/utils/hub.py +++ b/modelscope/utils/hub.py @@ -6,6 +6,7 @@ from typing import List, Optional, Union from requests import HTTPError +from modelscope.hub.constants import Licenses, ModelVisibility from modelscope.hub.file_download import model_file_download from modelscope.hub.snapshot_download import snapshot_download from modelscope.utils.config import Config @@ -16,8 +17,8 @@ def create_model_if_not_exist( api, model_id: str, chinese_name: str, - visibility: Optional[int] = 5, # 1-private, 5-public - license: Optional[str] = 'apache-2.0', + visibility: Optional[int] = ModelVisibility.PUBLIC, + license: Optional[str] = Licenses.APACHE_V2, revision: Optional[str] = 'master'): exists = True try: diff --git a/tests/hub/test_hub_examples.py b/tests/hub/test_hub_examples.py index b63445af..b21cae51 100644 --- a/tests/hub/test_hub_examples.py +++ b/tests/hub/test_hub_examples.py @@ -1,9 +1,9 @@ import unittest -from maas_hub.maas_api import MaasApi - +from modelscope.hub.api import HubApi from modelscope.utils.hub import create_model_if_not_exist +# note this is temporary before official account management is ready USER_NAME = 'maasadmin' PASSWORD = '12345678' @@ -11,8 +11,7 @@ PASSWORD = '12345678' class HubExampleTest(unittest.TestCase): def setUp(self): - self.api = MaasApi() - # note this is temporary before official account management is ready + self.api = HubApi() self.api.login(USER_NAME, PASSWORD) @unittest.skip('to be used for local test only') @@ -22,7 +21,6 @@ class HubExampleTest(unittest.TestCase): model_chinese_name = '达摩卡通化模型' model_org = 'damo' model_id = '%s/%s' % (model_org, model_name) - created = create_model_if_not_exist(self.api, model_id, model_chinese_name) if not created: diff --git a/tests/hub/test_hub_operation.py b/tests/hub/test_hub_operation.py index e0adc013..035b183e 100644 --- a/tests/hub/test_hub_operation.py +++ b/tests/hub/test_hub_operation.py @@ -4,7 +4,8 @@ import tempfile import unittest import uuid -from modelscope.hub.api import HubApi +from modelscope.hub.api import HubApi, ModelScopeConfig +from modelscope.hub.constants import Licenses, ModelVisibility from modelscope.hub.file_download import model_file_download from modelscope.hub.repository import Repository from modelscope.hub.snapshot_download import snapshot_download @@ -31,8 +32,8 @@ class HubOperationTest(unittest.TestCase): self.api.create_model( model_id=self.model_id, chinese_name=model_chinese_name, - visibility=5, # 1-private, 5-public - license='apache-2.0') + visibility=ModelVisibility.PUBLIC, + license=Licenses.APACHE_V2) temporary_dir = tempfile.mkdtemp() self.model_dir = os.path.join(temporary_dir, self.model_name) repo = Repository(self.model_dir, clone_from=self.model_id) From c8e2e6de0ebdb75350ef55cffca58f8a94530c12 Mon Sep 17 00:00:00 2001 From: Yingda Chen Date: Sat, 25 Jun 2022 08:36:48 +0800 Subject: [PATCH 3/5] [to #42794773] rename pydataset to msdataset --- docs/source/api/modelscope.pydatasets.rst | 8 +++---- docs/source/api/modelscope.rst | 2 +- docs/source/quick_start.md | 10 ++++---- modelscope/datasets/__init__.py | 1 + modelscope/{pydatasets => datasets}/config.py | 0 .../py_dataset.py => datasets/ms_dataset.py} | 24 +++++++++---------- .../utils/__init__.py | 0 .../{pydatasets => datasets}/utils/ms_api.py | 4 ++-- modelscope/hub/file_download.py | 2 +- modelscope/pipelines/base.py | 6 ++--- modelscope/pydatasets/__init__.py | 1 - tests/{pydatasets => datasets}/__init__.py | 0 .../test_ms_dataset.py} | 19 +++++++-------- tests/pipelines/test_action_recognition.py | 2 +- tests/pipelines/test_image_matting.py | 6 ++--- tests/pipelines/test_text_classification.py | 12 +++++----- 16 files changed, 48 insertions(+), 49 deletions(-) create mode 100644 modelscope/datasets/__init__.py rename modelscope/{pydatasets => datasets}/config.py (100%) rename modelscope/{pydatasets/py_dataset.py => datasets/ms_dataset.py} (96%) rename modelscope/{pydatasets => datasets}/utils/__init__.py (100%) rename modelscope/{pydatasets => datasets}/utils/ms_api.py (95%) delete mode 100644 modelscope/pydatasets/__init__.py rename tests/{pydatasets => datasets}/__init__.py (100%) rename tests/{pydatasets/test_py_dataset.py => datasets/test_ms_dataset.py} (88%) diff --git a/docs/source/api/modelscope.pydatasets.rst b/docs/source/api/modelscope.pydatasets.rst index 2508a91f..33f2fab5 100644 --- a/docs/source/api/modelscope.pydatasets.rst +++ b/docs/source/api/modelscope.pydatasets.rst @@ -1,7 +1,7 @@ -modelscope.pydatasets package +modelscope.datasets package ============================= -.. automodule:: modelscope.pydatasets +.. automodule:: modelscope.datasets :members: :undoc-members: :show-inheritance: @@ -9,10 +9,10 @@ modelscope.pydatasets package Submodules ---------- -modelscope.pydatasets.py\_dataset module +modelscope.datasets.py\_dataset module ---------------------------------------- -.. automodule:: modelscope.pydatasets.py_dataset +.. automodule:: modelscope.datasets.ms_dataset :members: :undoc-members: :show-inheritance: diff --git a/docs/source/api/modelscope.rst b/docs/source/api/modelscope.rst index efab568b..f1389717 100644 --- a/docs/source/api/modelscope.rst +++ b/docs/source/api/modelscope.rst @@ -16,7 +16,7 @@ Subpackages modelscope.models modelscope.pipelines modelscope.preprocessors - modelscope.pydatasets + modelscope.datasets modelscope.trainers modelscope.utils diff --git a/docs/source/quick_start.md b/docs/source/quick_start.md index 7148f27f..91509fa4 100644 --- a/docs/source/quick_start.md +++ b/docs/source/quick_start.md @@ -3,7 +3,7 @@ ## python环境配置 首先,参考[文档](https://docs.anaconda.com/anaconda/install/) 安装配置Anaconda环境 -安装完成后,执行如下命令为maas library创建对应的python环境。 +安装完成后,执行如下命令为modelscope library创建对应的python环境。 ```shell conda create -n modelscope python=3.6 conda activate modelscope @@ -105,15 +105,15 @@ import cv2 import os.path as osp from modelscope.pipelines import pipeline from modelscope.utils.constant import Tasks -from modelscope.pydatasets import PyDataset +from modelscope.datasets import MsDataset -# 使用图像url构建PyDataset,此处也可通过 input_location = '/dir/to/images' 来使用本地文件夹 +# 使用图像url构建MsDataset,此处也可通过 input_location = '/dir/to/images' 来使用本地文件夹 input_location = [ 'http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/data/test/maas/image_matting/test.png' ] -dataset = PyDataset.load(input_location, target='image') +dataset = MsDataset.load(input_location, target='image') img_matting = pipeline(Tasks.image_matting, model='damo/image-matting-person') -# 输入为PyDataset时,输出的结果为迭代器 +# 输入为MsDataset时,输出的结果为迭代器 result = img_matting(dataset) cv2.imwrite('result.png', next(result)['output_png']) print(f'Output written to {osp.abspath("result.png")}') diff --git a/modelscope/datasets/__init__.py b/modelscope/datasets/__init__.py new file mode 100644 index 00000000..8e0647bb --- /dev/null +++ b/modelscope/datasets/__init__.py @@ -0,0 +1 @@ +from .ms_dataset import MsDataset diff --git a/modelscope/pydatasets/config.py b/modelscope/datasets/config.py similarity index 100% rename from modelscope/pydatasets/config.py rename to modelscope/datasets/config.py diff --git a/modelscope/pydatasets/py_dataset.py b/modelscope/datasets/ms_dataset.py similarity index 96% rename from modelscope/pydatasets/py_dataset.py rename to modelscope/datasets/ms_dataset.py index 49137253..80ffc77a 100644 --- a/modelscope/pydatasets/py_dataset.py +++ b/modelscope/datasets/ms_dataset.py @@ -10,8 +10,8 @@ from datasets.packaged_modules import _PACKAGED_DATASETS_MODULES from datasets.utils.file_utils import (is_relative_path, relative_to_absolute_path) -from modelscope.pydatasets.config import MS_DATASETS_CACHE -from modelscope.pydatasets.utils.ms_api import MsApi +from modelscope.datasets.config import MS_DATASETS_CACHE +from modelscope.datasets.utils.ms_api import MsApi from modelscope.utils.constant import Hubs from modelscope.utils.logger import get_logger @@ -28,9 +28,9 @@ def format_list(para) -> List: return para -class PyDataset: +class MsDataset: _hf_ds = None # holds the underlying HuggingFace Dataset - """A PyDataset backed by hugging face Dataset.""" + """A MsDataset backed by hugging face Dataset.""" def __init__(self, hf_ds: Dataset, target: Optional[str] = None): self._hf_ds = hf_ds @@ -49,7 +49,7 @@ class PyDataset: @classmethod def from_hf_dataset(cls, hf_ds: Dataset, - target: str = None) -> Union[dict, 'PyDataset']: + target: str = None) -> Union[dict, 'MsDataset']: if isinstance(hf_ds, Dataset): return cls(hf_ds, target) if len(hf_ds.keys()) == 1: @@ -68,8 +68,8 @@ class PyDataset: data_files: Optional[Union[str, Sequence[str], Mapping[str, Union[str, Sequence[str]]]]] = None - ) -> Union[dict, 'PyDataset']: - """Load a PyDataset from the ModelScope Hub, Hugging Face Hub, urls, or a local dataset. + ) -> Union[dict, 'MsDataset']: + """Load a MsDataset from the ModelScope Hub, Hugging Face Hub, urls, or a local dataset. Args: dataset_name (str): Path or name of the dataset. @@ -82,7 +82,7 @@ class PyDataset: hub (Hubs, optional): When loading from a remote hub, where it is from Returns: - PyDataset (obj:`PyDataset`): PyDataset object for a certain dataset. + MsDataset (obj:`MsDataset`): MsDataset object for a certain dataset. """ if hub == Hubs.huggingface: dataset = hf_load_dataset( @@ -92,9 +92,9 @@ class PyDataset: split=split, data_dir=data_dir, data_files=data_files) - return PyDataset.from_hf_dataset(dataset, target=target) + return MsDataset.from_hf_dataset(dataset, target=target) else: - return PyDataset._load_ms_dataset( + return MsDataset._load_ms_dataset( dataset_name, target=target, subset_name=subset_name, @@ -114,7 +114,7 @@ class PyDataset: data_files: Optional[Union[str, Sequence[str], Mapping[str, Union[str, Sequence[str]]]]] = None - ) -> Union[dict, 'PyDataset']: + ) -> Union[dict, 'MsDataset']: if isinstance(dataset_name, str): use_hf = False if dataset_name in _PACKAGED_DATASETS_MODULES or os.path.isdir(dataset_name) or \ @@ -153,7 +153,7 @@ class PyDataset: else: raise TypeError('path must be a str or a list, but got' f' {type(dataset_name)}') - return PyDataset.from_hf_dataset(dataset, target=target) + return MsDataset.from_hf_dataset(dataset, target=target) def to_torch_dataset_with_processors( self, diff --git a/modelscope/pydatasets/utils/__init__.py b/modelscope/datasets/utils/__init__.py similarity index 100% rename from modelscope/pydatasets/utils/__init__.py rename to modelscope/datasets/utils/__init__.py diff --git a/modelscope/pydatasets/utils/ms_api.py b/modelscope/datasets/utils/ms_api.py similarity index 95% rename from modelscope/pydatasets/utils/ms_api.py rename to modelscope/datasets/utils/ms_api.py index 04052cc4..a478766f 100644 --- a/modelscope/pydatasets/utils/ms_api.py +++ b/modelscope/datasets/utils/ms_api.py @@ -4,8 +4,8 @@ from typing import Optional import requests -from modelscope.pydatasets.config import (DOWNLOADED_DATASETS_PATH, - MS_HUB_ENDPOINT) +from modelscope.datasets.config import (DOWNLOADED_DATASETS_PATH, + MS_HUB_ENDPOINT) from modelscope.utils.logger import get_logger logger = get_logger() diff --git a/modelscope/hub/file_download.py b/modelscope/hub/file_download.py index e5c64f1c..b92bf89c 100644 --- a/modelscope/hub/file_download.py +++ b/modelscope/hub/file_download.py @@ -187,7 +187,7 @@ def get_file_download_url(model_id: str, file_path: str, revision: str): """ Format file download url according to `model_id`, `revision` and `file_path`. e.g., Given `model_id=john/bert`, `revision=master`, `file_path=README.md`, - the resulted download url is: https://maas.co/api/v1/models/john/bert/repo?Revision=master&FilePath=README.md + the resulted download url is: https://modelscope.co/api/v1/models/john/bert/repo?Revision=master&FilePath=README.md """ download_url_template = '{endpoint}/api/v1/models/{model_id}/repo?Revision={revision}&FilePath={file_path}' return download_url_template.format( diff --git a/modelscope/pipelines/base.py b/modelscope/pipelines/base.py index 7e32f543..cf4ce8fd 100644 --- a/modelscope/pipelines/base.py +++ b/modelscope/pipelines/base.py @@ -4,17 +4,17 @@ import os.path as osp from abc import ABC, abstractmethod from typing import Any, Dict, Generator, List, Union +from modelscope.datasets import MsDataset from modelscope.hub.snapshot_download import snapshot_download from modelscope.models.base import Model from modelscope.preprocessors import Preprocessor -from modelscope.pydatasets import PyDataset from modelscope.utils.config import Config from modelscope.utils.logger import get_logger from .outputs import TASK_OUTPUTS from .util import is_model, is_official_hub_path Tensor = Union['torch.Tensor', 'tf.Tensor'] -Input = Union[str, tuple, PyDataset, 'PIL.Image.Image', 'numpy.ndarray'] +Input = Union[str, tuple, MsDataset, 'PIL.Image.Image', 'numpy.ndarray'] InputModel = Union[str, Model] output_keys = [ @@ -85,7 +85,7 @@ class Pipeline(ABC): for ele in input: output.append(self._process_single(ele, *args, **post_kwargs)) - elif isinstance(input, PyDataset): + elif isinstance(input, MsDataset): return self._process_iterator(input, *args, **post_kwargs) else: diff --git a/modelscope/pydatasets/__init__.py b/modelscope/pydatasets/__init__.py deleted file mode 100644 index a1ed1d93..00000000 --- a/modelscope/pydatasets/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .py_dataset import PyDataset diff --git a/tests/pydatasets/__init__.py b/tests/datasets/__init__.py similarity index 100% rename from tests/pydatasets/__init__.py rename to tests/datasets/__init__.py diff --git a/tests/pydatasets/test_py_dataset.py b/tests/datasets/test_ms_dataset.py similarity index 88% rename from tests/pydatasets/test_py_dataset.py rename to tests/datasets/test_ms_dataset.py index e84f240a..d08258ac 100644 --- a/tests/pydatasets/test_py_dataset.py +++ b/tests/datasets/test_ms_dataset.py @@ -2,11 +2,10 @@ import unittest import datasets as hfdata +from modelscope.datasets import MsDataset from modelscope.models import Model from modelscope.preprocessors import SequenceClassificationPreprocessor from modelscope.preprocessors.base import Preprocessor -from modelscope.pydatasets import PyDataset -from modelscope.utils.constant import Hubs from modelscope.utils.test_utils import require_tf, require_torch, test_level @@ -31,15 +30,15 @@ class ImgPreprocessor(Preprocessor): } -class PyDatasetTest(unittest.TestCase): +class MsDatasetTest(unittest.TestCase): @unittest.skipUnless(test_level() >= 2, 'skip test in current test level') def test_ds_basic(self): - ms_ds_full = PyDataset.load('squad') + ms_ds_full = MsDataset.load('squad') ms_ds_full_hf = hfdata.load_dataset('squad') - ms_ds_train = PyDataset.load('squad', split='train') + ms_ds_train = MsDataset.load('squad', split='train') ms_ds_train_hf = hfdata.load_dataset('squad', split='train') - ms_image_train = PyDataset.from_hf_dataset( + ms_image_train = MsDataset.from_hf_dataset( hfdata.load_dataset('beans', split='train')) self.assertEqual(ms_ds_full['train'][0], ms_ds_full_hf['train'][0]) self.assertEqual(ms_ds_full['validation'][0], @@ -58,7 +57,7 @@ class PyDatasetTest(unittest.TestCase): nlp_model.model_dir, first_sequence='context', second_sequence=None) - ms_ds_train = PyDataset.load('squad', split='train') + ms_ds_train = MsDataset.load('squad', split='train') pt_dataset = ms_ds_train.to_torch_dataset(preprocessors=preprocessor) import torch dataloader = torch.utils.data.DataLoader(pt_dataset, batch_size=5) @@ -75,7 +74,7 @@ class PyDatasetTest(unittest.TestCase): nlp_model.model_dir, first_sequence='context', second_sequence=None) - ms_ds_train = PyDataset.load('squad', split='train') + ms_ds_train = MsDataset.load('squad', split='train') tf_dataset = ms_ds_train.to_tf_dataset( batch_size=5, shuffle=True, @@ -86,7 +85,7 @@ class PyDatasetTest(unittest.TestCase): @unittest.skipUnless(test_level() >= 2, 'skip test in current test level') @require_torch def test_to_torch_dataset_img(self): - ms_image_train = PyDataset.from_hf_dataset( + ms_image_train = MsDataset.from_hf_dataset( hfdata.load_dataset('beans', split='train')) pt_dataset = ms_image_train.to_torch_dataset( preprocessors=ImgPreprocessor( @@ -100,7 +99,7 @@ class PyDatasetTest(unittest.TestCase): def test_to_tf_dataset_img(self): import tensorflow as tf tf.compat.v1.enable_eager_execution() - ms_image_train = PyDataset.load('beans', split='train') + ms_image_train = MsDataset.load('beans', split='train') tf_dataset = ms_image_train.to_tf_dataset( batch_size=5, shuffle=True, diff --git a/tests/pipelines/test_action_recognition.py b/tests/pipelines/test_action_recognition.py index b524ca18..7bb3bb90 100644 --- a/tests/pipelines/test_action_recognition.py +++ b/tests/pipelines/test_action_recognition.py @@ -7,9 +7,9 @@ import unittest import cv2 +from modelscope.datasets import MsDataset from modelscope.fileio import File from modelscope.pipelines import pipeline -from modelscope.pydatasets import PyDataset from modelscope.utils.constant import ModelFile, Tasks from modelscope.utils.test_utils import test_level diff --git a/tests/pipelines/test_image_matting.py b/tests/pipelines/test_image_matting.py index 1b547e14..13576d44 100644 --- a/tests/pipelines/test_image_matting.py +++ b/tests/pipelines/test_image_matting.py @@ -6,9 +6,9 @@ import unittest import cv2 +from modelscope.datasets import MsDataset from modelscope.fileio import File from modelscope.pipelines import pipeline -from modelscope.pydatasets import PyDataset from modelscope.utils.constant import ModelFile, Tasks from modelscope.utils.test_utils import test_level @@ -37,7 +37,7 @@ class ImageMattingTest(unittest.TestCase): # alternatively: # input_location = '/dir/to/images' - dataset = PyDataset.load(input_location, target='image') + dataset = MsDataset.load(input_location, target='image') img_matting = pipeline(Tasks.image_matting, model=self.model_id) # note that for dataset output, the inference-output is a Generator that can be iterated. result = img_matting(dataset) @@ -62,7 +62,7 @@ class ImageMattingTest(unittest.TestCase): @unittest.skipUnless(test_level() >= 2, 'skip test in current test level') def test_run_with_modelscope_dataset(self): - dataset = PyDataset.load('beans', split='train', target='image') + dataset = MsDataset.load('beans', split='train', target='image') img_matting = pipeline(Tasks.image_matting, model=self.model_id) result = img_matting(dataset) for i in range(10): diff --git a/tests/pipelines/test_text_classification.py b/tests/pipelines/test_text_classification.py index 9e5f15b9..bf6de28e 100644 --- a/tests/pipelines/test_text_classification.py +++ b/tests/pipelines/test_text_classification.py @@ -2,10 +2,10 @@ import shutil import unittest +from modelscope.datasets import MsDataset from modelscope.models import Model from modelscope.pipelines import SequenceClassificationPipeline, pipeline from modelscope.preprocessors import SequenceClassificationPreprocessor -from modelscope.pydatasets import PyDataset from modelscope.utils.constant import Hubs, Tasks from modelscope.utils.test_utils import test_level @@ -28,7 +28,7 @@ class SequenceClassificationTest(unittest.TestCase): print(data) - def printDataset(self, dataset: PyDataset): + def printDataset(self, dataset: MsDataset): for i, r in enumerate(dataset): if i > 10: break @@ -50,7 +50,7 @@ class SequenceClassificationTest(unittest.TestCase): text_classification = pipeline( task=Tasks.text_classification, model=self.model_id) result = text_classification( - PyDataset.load( + MsDataset.load( 'glue', subset_name='sst2', split='train', @@ -62,7 +62,7 @@ class SequenceClassificationTest(unittest.TestCase): def test_run_with_default_model(self): text_classification = pipeline(task=Tasks.text_classification) result = text_classification( - PyDataset.load( + MsDataset.load( 'glue', subset_name='sst2', split='train', @@ -78,7 +78,7 @@ class SequenceClassificationTest(unittest.TestCase): text_classification = pipeline( Tasks.text_classification, model=model, preprocessor=preprocessor) # loaded from huggingface dataset - dataset = PyDataset.load( + dataset = MsDataset.load( 'glue', subset_name='sst2', split='train', @@ -91,7 +91,7 @@ class SequenceClassificationTest(unittest.TestCase): def test_run_with_modelscope_dataset(self): text_classification = pipeline(task=Tasks.text_classification) # loaded from modelscope dataset - dataset = PyDataset.load( + dataset = MsDataset.load( 'squad', split='train', target='context', hub=Hubs.modelscope) result = text_classification(dataset) self.printDataset(result) From b6e3fd80b0299395cc595bad45b87eccb4c82b07 Mon Sep 17 00:00:00 2001 From: Yingda Chen Date: Sat, 25 Jun 2022 08:50:28 +0800 Subject: [PATCH 4/5] Revert "[to #42794773] rename pydataset to msdataset" This reverts commit c8e2e6de0ebdb75350ef55cffca58f8a94530c12. --- docs/source/api/modelscope.pydatasets.rst | 8 +++---- docs/source/api/modelscope.rst | 2 +- docs/source/quick_start.md | 10 ++++---- modelscope/datasets/__init__.py | 1 - modelscope/hub/file_download.py | 2 +- modelscope/pipelines/base.py | 6 ++--- modelscope/pydatasets/__init__.py | 1 + modelscope/{datasets => pydatasets}/config.py | 0 .../py_dataset.py} | 24 +++++++++---------- .../utils/__init__.py | 0 .../{datasets => pydatasets}/utils/ms_api.py | 4 ++-- tests/pipelines/test_action_recognition.py | 2 +- tests/pipelines/test_image_matting.py | 6 ++--- tests/pipelines/test_text_classification.py | 12 +++++----- tests/{datasets => pydatasets}/__init__.py | 0 .../test_py_dataset.py} | 19 ++++++++------- 16 files changed, 49 insertions(+), 48 deletions(-) delete mode 100644 modelscope/datasets/__init__.py create mode 100644 modelscope/pydatasets/__init__.py rename modelscope/{datasets => pydatasets}/config.py (100%) rename modelscope/{datasets/ms_dataset.py => pydatasets/py_dataset.py} (96%) rename modelscope/{datasets => pydatasets}/utils/__init__.py (100%) rename modelscope/{datasets => pydatasets}/utils/ms_api.py (95%) rename tests/{datasets => pydatasets}/__init__.py (100%) rename tests/{datasets/test_ms_dataset.py => pydatasets/test_py_dataset.py} (88%) diff --git a/docs/source/api/modelscope.pydatasets.rst b/docs/source/api/modelscope.pydatasets.rst index 33f2fab5..2508a91f 100644 --- a/docs/source/api/modelscope.pydatasets.rst +++ b/docs/source/api/modelscope.pydatasets.rst @@ -1,7 +1,7 @@ -modelscope.datasets package +modelscope.pydatasets package ============================= -.. automodule:: modelscope.datasets +.. automodule:: modelscope.pydatasets :members: :undoc-members: :show-inheritance: @@ -9,10 +9,10 @@ modelscope.datasets package Submodules ---------- -modelscope.datasets.py\_dataset module +modelscope.pydatasets.py\_dataset module ---------------------------------------- -.. automodule:: modelscope.datasets.ms_dataset +.. automodule:: modelscope.pydatasets.py_dataset :members: :undoc-members: :show-inheritance: diff --git a/docs/source/api/modelscope.rst b/docs/source/api/modelscope.rst index f1389717..efab568b 100644 --- a/docs/source/api/modelscope.rst +++ b/docs/source/api/modelscope.rst @@ -16,7 +16,7 @@ Subpackages modelscope.models modelscope.pipelines modelscope.preprocessors - modelscope.datasets + modelscope.pydatasets modelscope.trainers modelscope.utils diff --git a/docs/source/quick_start.md b/docs/source/quick_start.md index 91509fa4..7148f27f 100644 --- a/docs/source/quick_start.md +++ b/docs/source/quick_start.md @@ -3,7 +3,7 @@ ## python环境配置 首先,参考[文档](https://docs.anaconda.com/anaconda/install/) 安装配置Anaconda环境 -安装完成后,执行如下命令为modelscope library创建对应的python环境。 +安装完成后,执行如下命令为maas library创建对应的python环境。 ```shell conda create -n modelscope python=3.6 conda activate modelscope @@ -105,15 +105,15 @@ import cv2 import os.path as osp from modelscope.pipelines import pipeline from modelscope.utils.constant import Tasks -from modelscope.datasets import MsDataset +from modelscope.pydatasets import PyDataset -# 使用图像url构建MsDataset,此处也可通过 input_location = '/dir/to/images' 来使用本地文件夹 +# 使用图像url构建PyDataset,此处也可通过 input_location = '/dir/to/images' 来使用本地文件夹 input_location = [ 'http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/data/test/maas/image_matting/test.png' ] -dataset = MsDataset.load(input_location, target='image') +dataset = PyDataset.load(input_location, target='image') img_matting = pipeline(Tasks.image_matting, model='damo/image-matting-person') -# 输入为MsDataset时,输出的结果为迭代器 +# 输入为PyDataset时,输出的结果为迭代器 result = img_matting(dataset) cv2.imwrite('result.png', next(result)['output_png']) print(f'Output written to {osp.abspath("result.png")}') diff --git a/modelscope/datasets/__init__.py b/modelscope/datasets/__init__.py deleted file mode 100644 index 8e0647bb..00000000 --- a/modelscope/datasets/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .ms_dataset import MsDataset diff --git a/modelscope/hub/file_download.py b/modelscope/hub/file_download.py index b92bf89c..e5c64f1c 100644 --- a/modelscope/hub/file_download.py +++ b/modelscope/hub/file_download.py @@ -187,7 +187,7 @@ def get_file_download_url(model_id: str, file_path: str, revision: str): """ Format file download url according to `model_id`, `revision` and `file_path`. e.g., Given `model_id=john/bert`, `revision=master`, `file_path=README.md`, - the resulted download url is: https://modelscope.co/api/v1/models/john/bert/repo?Revision=master&FilePath=README.md + the resulted download url is: https://maas.co/api/v1/models/john/bert/repo?Revision=master&FilePath=README.md """ download_url_template = '{endpoint}/api/v1/models/{model_id}/repo?Revision={revision}&FilePath={file_path}' return download_url_template.format( diff --git a/modelscope/pipelines/base.py b/modelscope/pipelines/base.py index cf4ce8fd..7e32f543 100644 --- a/modelscope/pipelines/base.py +++ b/modelscope/pipelines/base.py @@ -4,17 +4,17 @@ import os.path as osp from abc import ABC, abstractmethod from typing import Any, Dict, Generator, List, Union -from modelscope.datasets import MsDataset from modelscope.hub.snapshot_download import snapshot_download from modelscope.models.base import Model from modelscope.preprocessors import Preprocessor +from modelscope.pydatasets import PyDataset from modelscope.utils.config import Config from modelscope.utils.logger import get_logger from .outputs import TASK_OUTPUTS from .util import is_model, is_official_hub_path Tensor = Union['torch.Tensor', 'tf.Tensor'] -Input = Union[str, tuple, MsDataset, 'PIL.Image.Image', 'numpy.ndarray'] +Input = Union[str, tuple, PyDataset, 'PIL.Image.Image', 'numpy.ndarray'] InputModel = Union[str, Model] output_keys = [ @@ -85,7 +85,7 @@ class Pipeline(ABC): for ele in input: output.append(self._process_single(ele, *args, **post_kwargs)) - elif isinstance(input, MsDataset): + elif isinstance(input, PyDataset): return self._process_iterator(input, *args, **post_kwargs) else: diff --git a/modelscope/pydatasets/__init__.py b/modelscope/pydatasets/__init__.py new file mode 100644 index 00000000..a1ed1d93 --- /dev/null +++ b/modelscope/pydatasets/__init__.py @@ -0,0 +1 @@ +from .py_dataset import PyDataset diff --git a/modelscope/datasets/config.py b/modelscope/pydatasets/config.py similarity index 100% rename from modelscope/datasets/config.py rename to modelscope/pydatasets/config.py diff --git a/modelscope/datasets/ms_dataset.py b/modelscope/pydatasets/py_dataset.py similarity index 96% rename from modelscope/datasets/ms_dataset.py rename to modelscope/pydatasets/py_dataset.py index 80ffc77a..49137253 100644 --- a/modelscope/datasets/ms_dataset.py +++ b/modelscope/pydatasets/py_dataset.py @@ -10,8 +10,8 @@ from datasets.packaged_modules import _PACKAGED_DATASETS_MODULES from datasets.utils.file_utils import (is_relative_path, relative_to_absolute_path) -from modelscope.datasets.config import MS_DATASETS_CACHE -from modelscope.datasets.utils.ms_api import MsApi +from modelscope.pydatasets.config import MS_DATASETS_CACHE +from modelscope.pydatasets.utils.ms_api import MsApi from modelscope.utils.constant import Hubs from modelscope.utils.logger import get_logger @@ -28,9 +28,9 @@ def format_list(para) -> List: return para -class MsDataset: +class PyDataset: _hf_ds = None # holds the underlying HuggingFace Dataset - """A MsDataset backed by hugging face Dataset.""" + """A PyDataset backed by hugging face Dataset.""" def __init__(self, hf_ds: Dataset, target: Optional[str] = None): self._hf_ds = hf_ds @@ -49,7 +49,7 @@ class MsDataset: @classmethod def from_hf_dataset(cls, hf_ds: Dataset, - target: str = None) -> Union[dict, 'MsDataset']: + target: str = None) -> Union[dict, 'PyDataset']: if isinstance(hf_ds, Dataset): return cls(hf_ds, target) if len(hf_ds.keys()) == 1: @@ -68,8 +68,8 @@ class MsDataset: data_files: Optional[Union[str, Sequence[str], Mapping[str, Union[str, Sequence[str]]]]] = None - ) -> Union[dict, 'MsDataset']: - """Load a MsDataset from the ModelScope Hub, Hugging Face Hub, urls, or a local dataset. + ) -> Union[dict, 'PyDataset']: + """Load a PyDataset from the ModelScope Hub, Hugging Face Hub, urls, or a local dataset. Args: dataset_name (str): Path or name of the dataset. @@ -82,7 +82,7 @@ class MsDataset: hub (Hubs, optional): When loading from a remote hub, where it is from Returns: - MsDataset (obj:`MsDataset`): MsDataset object for a certain dataset. + PyDataset (obj:`PyDataset`): PyDataset object for a certain dataset. """ if hub == Hubs.huggingface: dataset = hf_load_dataset( @@ -92,9 +92,9 @@ class MsDataset: split=split, data_dir=data_dir, data_files=data_files) - return MsDataset.from_hf_dataset(dataset, target=target) + return PyDataset.from_hf_dataset(dataset, target=target) else: - return MsDataset._load_ms_dataset( + return PyDataset._load_ms_dataset( dataset_name, target=target, subset_name=subset_name, @@ -114,7 +114,7 @@ class MsDataset: data_files: Optional[Union[str, Sequence[str], Mapping[str, Union[str, Sequence[str]]]]] = None - ) -> Union[dict, 'MsDataset']: + ) -> Union[dict, 'PyDataset']: if isinstance(dataset_name, str): use_hf = False if dataset_name in _PACKAGED_DATASETS_MODULES or os.path.isdir(dataset_name) or \ @@ -153,7 +153,7 @@ class MsDataset: else: raise TypeError('path must be a str or a list, but got' f' {type(dataset_name)}') - return MsDataset.from_hf_dataset(dataset, target=target) + return PyDataset.from_hf_dataset(dataset, target=target) def to_torch_dataset_with_processors( self, diff --git a/modelscope/datasets/utils/__init__.py b/modelscope/pydatasets/utils/__init__.py similarity index 100% rename from modelscope/datasets/utils/__init__.py rename to modelscope/pydatasets/utils/__init__.py diff --git a/modelscope/datasets/utils/ms_api.py b/modelscope/pydatasets/utils/ms_api.py similarity index 95% rename from modelscope/datasets/utils/ms_api.py rename to modelscope/pydatasets/utils/ms_api.py index a478766f..04052cc4 100644 --- a/modelscope/datasets/utils/ms_api.py +++ b/modelscope/pydatasets/utils/ms_api.py @@ -4,8 +4,8 @@ from typing import Optional import requests -from modelscope.datasets.config import (DOWNLOADED_DATASETS_PATH, - MS_HUB_ENDPOINT) +from modelscope.pydatasets.config import (DOWNLOADED_DATASETS_PATH, + MS_HUB_ENDPOINT) from modelscope.utils.logger import get_logger logger = get_logger() diff --git a/tests/pipelines/test_action_recognition.py b/tests/pipelines/test_action_recognition.py index 7bb3bb90..b524ca18 100644 --- a/tests/pipelines/test_action_recognition.py +++ b/tests/pipelines/test_action_recognition.py @@ -7,9 +7,9 @@ import unittest import cv2 -from modelscope.datasets import MsDataset from modelscope.fileio import File from modelscope.pipelines import pipeline +from modelscope.pydatasets import PyDataset from modelscope.utils.constant import ModelFile, Tasks from modelscope.utils.test_utils import test_level diff --git a/tests/pipelines/test_image_matting.py b/tests/pipelines/test_image_matting.py index 13576d44..1b547e14 100644 --- a/tests/pipelines/test_image_matting.py +++ b/tests/pipelines/test_image_matting.py @@ -6,9 +6,9 @@ import unittest import cv2 -from modelscope.datasets import MsDataset from modelscope.fileio import File from modelscope.pipelines import pipeline +from modelscope.pydatasets import PyDataset from modelscope.utils.constant import ModelFile, Tasks from modelscope.utils.test_utils import test_level @@ -37,7 +37,7 @@ class ImageMattingTest(unittest.TestCase): # alternatively: # input_location = '/dir/to/images' - dataset = MsDataset.load(input_location, target='image') + dataset = PyDataset.load(input_location, target='image') img_matting = pipeline(Tasks.image_matting, model=self.model_id) # note that for dataset output, the inference-output is a Generator that can be iterated. result = img_matting(dataset) @@ -62,7 +62,7 @@ class ImageMattingTest(unittest.TestCase): @unittest.skipUnless(test_level() >= 2, 'skip test in current test level') def test_run_with_modelscope_dataset(self): - dataset = MsDataset.load('beans', split='train', target='image') + dataset = PyDataset.load('beans', split='train', target='image') img_matting = pipeline(Tasks.image_matting, model=self.model_id) result = img_matting(dataset) for i in range(10): diff --git a/tests/pipelines/test_text_classification.py b/tests/pipelines/test_text_classification.py index bf6de28e..9e5f15b9 100644 --- a/tests/pipelines/test_text_classification.py +++ b/tests/pipelines/test_text_classification.py @@ -2,10 +2,10 @@ import shutil import unittest -from modelscope.datasets import MsDataset from modelscope.models import Model from modelscope.pipelines import SequenceClassificationPipeline, pipeline from modelscope.preprocessors import SequenceClassificationPreprocessor +from modelscope.pydatasets import PyDataset from modelscope.utils.constant import Hubs, Tasks from modelscope.utils.test_utils import test_level @@ -28,7 +28,7 @@ class SequenceClassificationTest(unittest.TestCase): print(data) - def printDataset(self, dataset: MsDataset): + def printDataset(self, dataset: PyDataset): for i, r in enumerate(dataset): if i > 10: break @@ -50,7 +50,7 @@ class SequenceClassificationTest(unittest.TestCase): text_classification = pipeline( task=Tasks.text_classification, model=self.model_id) result = text_classification( - MsDataset.load( + PyDataset.load( 'glue', subset_name='sst2', split='train', @@ -62,7 +62,7 @@ class SequenceClassificationTest(unittest.TestCase): def test_run_with_default_model(self): text_classification = pipeline(task=Tasks.text_classification) result = text_classification( - MsDataset.load( + PyDataset.load( 'glue', subset_name='sst2', split='train', @@ -78,7 +78,7 @@ class SequenceClassificationTest(unittest.TestCase): text_classification = pipeline( Tasks.text_classification, model=model, preprocessor=preprocessor) # loaded from huggingface dataset - dataset = MsDataset.load( + dataset = PyDataset.load( 'glue', subset_name='sst2', split='train', @@ -91,7 +91,7 @@ class SequenceClassificationTest(unittest.TestCase): def test_run_with_modelscope_dataset(self): text_classification = pipeline(task=Tasks.text_classification) # loaded from modelscope dataset - dataset = MsDataset.load( + dataset = PyDataset.load( 'squad', split='train', target='context', hub=Hubs.modelscope) result = text_classification(dataset) self.printDataset(result) diff --git a/tests/datasets/__init__.py b/tests/pydatasets/__init__.py similarity index 100% rename from tests/datasets/__init__.py rename to tests/pydatasets/__init__.py diff --git a/tests/datasets/test_ms_dataset.py b/tests/pydatasets/test_py_dataset.py similarity index 88% rename from tests/datasets/test_ms_dataset.py rename to tests/pydatasets/test_py_dataset.py index d08258ac..e84f240a 100644 --- a/tests/datasets/test_ms_dataset.py +++ b/tests/pydatasets/test_py_dataset.py @@ -2,10 +2,11 @@ import unittest import datasets as hfdata -from modelscope.datasets import MsDataset from modelscope.models import Model from modelscope.preprocessors import SequenceClassificationPreprocessor from modelscope.preprocessors.base import Preprocessor +from modelscope.pydatasets import PyDataset +from modelscope.utils.constant import Hubs from modelscope.utils.test_utils import require_tf, require_torch, test_level @@ -30,15 +31,15 @@ class ImgPreprocessor(Preprocessor): } -class MsDatasetTest(unittest.TestCase): +class PyDatasetTest(unittest.TestCase): @unittest.skipUnless(test_level() >= 2, 'skip test in current test level') def test_ds_basic(self): - ms_ds_full = MsDataset.load('squad') + ms_ds_full = PyDataset.load('squad') ms_ds_full_hf = hfdata.load_dataset('squad') - ms_ds_train = MsDataset.load('squad', split='train') + ms_ds_train = PyDataset.load('squad', split='train') ms_ds_train_hf = hfdata.load_dataset('squad', split='train') - ms_image_train = MsDataset.from_hf_dataset( + ms_image_train = PyDataset.from_hf_dataset( hfdata.load_dataset('beans', split='train')) self.assertEqual(ms_ds_full['train'][0], ms_ds_full_hf['train'][0]) self.assertEqual(ms_ds_full['validation'][0], @@ -57,7 +58,7 @@ class MsDatasetTest(unittest.TestCase): nlp_model.model_dir, first_sequence='context', second_sequence=None) - ms_ds_train = MsDataset.load('squad', split='train') + ms_ds_train = PyDataset.load('squad', split='train') pt_dataset = ms_ds_train.to_torch_dataset(preprocessors=preprocessor) import torch dataloader = torch.utils.data.DataLoader(pt_dataset, batch_size=5) @@ -74,7 +75,7 @@ class MsDatasetTest(unittest.TestCase): nlp_model.model_dir, first_sequence='context', second_sequence=None) - ms_ds_train = MsDataset.load('squad', split='train') + ms_ds_train = PyDataset.load('squad', split='train') tf_dataset = ms_ds_train.to_tf_dataset( batch_size=5, shuffle=True, @@ -85,7 +86,7 @@ class MsDatasetTest(unittest.TestCase): @unittest.skipUnless(test_level() >= 2, 'skip test in current test level') @require_torch def test_to_torch_dataset_img(self): - ms_image_train = MsDataset.from_hf_dataset( + ms_image_train = PyDataset.from_hf_dataset( hfdata.load_dataset('beans', split='train')) pt_dataset = ms_image_train.to_torch_dataset( preprocessors=ImgPreprocessor( @@ -99,7 +100,7 @@ class MsDatasetTest(unittest.TestCase): def test_to_tf_dataset_img(self): import tensorflow as tf tf.compat.v1.enable_eager_execution() - ms_image_train = MsDataset.load('beans', split='train') + ms_image_train = PyDataset.load('beans', split='train') tf_dataset = ms_image_train.to_tf_dataset( batch_size=5, shuffle=True, From 39172b5f662bad258b122cc11b72df490a0bf8d2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=99=BA=E4=B8=9E?= Date: Mon, 27 Jun 2022 10:54:24 +0800 Subject: [PATCH 5/5] remove unformatted space trainer --- .../nlp/space => models/nlp/space/application}/__init__.py | 0 .../gen_trainer.py => models/nlp/space/application/gen_app.py} | 0 .../nlp/space/application/intent_app.py} | 0 modelscope/models/nlp/space/dialog_intent_prediction_model.py | 2 +- modelscope/models/nlp/space/dialog_modeling_model.py | 2 +- modelscope/{trainers => models}/nlp/space/metrics/__init__.py | 0 .../{trainers => models}/nlp/space/metrics/metrics_tracker.py | 0 modelscope/trainers/nlp/space/trainers/__init__.py | 0 8 files changed, 2 insertions(+), 2 deletions(-) rename modelscope/{trainers/nlp/space => models/nlp/space/application}/__init__.py (100%) rename modelscope/{trainers/nlp/space/trainers/gen_trainer.py => models/nlp/space/application/gen_app.py} (100%) rename modelscope/{trainers/nlp/space/trainers/intent_trainer.py => models/nlp/space/application/intent_app.py} (100%) rename modelscope/{trainers => models}/nlp/space/metrics/__init__.py (100%) rename modelscope/{trainers => models}/nlp/space/metrics/metrics_tracker.py (100%) delete mode 100644 modelscope/trainers/nlp/space/trainers/__init__.py diff --git a/modelscope/trainers/nlp/space/__init__.py b/modelscope/models/nlp/space/application/__init__.py similarity index 100% rename from modelscope/trainers/nlp/space/__init__.py rename to modelscope/models/nlp/space/application/__init__.py diff --git a/modelscope/trainers/nlp/space/trainers/gen_trainer.py b/modelscope/models/nlp/space/application/gen_app.py similarity index 100% rename from modelscope/trainers/nlp/space/trainers/gen_trainer.py rename to modelscope/models/nlp/space/application/gen_app.py diff --git a/modelscope/trainers/nlp/space/trainers/intent_trainer.py b/modelscope/models/nlp/space/application/intent_app.py similarity index 100% rename from modelscope/trainers/nlp/space/trainers/intent_trainer.py rename to modelscope/models/nlp/space/application/intent_app.py diff --git a/modelscope/models/nlp/space/dialog_intent_prediction_model.py b/modelscope/models/nlp/space/dialog_intent_prediction_model.py index a5d94376..a6bd1d27 100644 --- a/modelscope/models/nlp/space/dialog_intent_prediction_model.py +++ b/modelscope/models/nlp/space/dialog_intent_prediction_model.py @@ -2,11 +2,11 @@ import os from typing import Any, Dict from ....preprocessors.space.fields.intent_field import IntentBPETextField -from ....trainers.nlp.space.trainers.intent_trainer import IntentTrainer from ....utils.config import Config from ....utils.constant import Tasks from ...base import Model, Tensor from ...builder import MODELS +from .application.intent_app import IntentTrainer from .model.generator import Generator from .model.model_base import ModelBase diff --git a/modelscope/models/nlp/space/dialog_modeling_model.py b/modelscope/models/nlp/space/dialog_modeling_model.py index 4a34f132..ad8212c0 100644 --- a/modelscope/models/nlp/space/dialog_modeling_model.py +++ b/modelscope/models/nlp/space/dialog_modeling_model.py @@ -2,11 +2,11 @@ import os from typing import Any, Dict, Optional from ....preprocessors.space.fields.gen_field import MultiWOZBPETextField -from ....trainers.nlp.space.trainers.gen_trainer import MultiWOZTrainer from ....utils.config import Config from ....utils.constant import Tasks from ...base import Model, Tensor from ...builder import MODELS +from .application.gen_app import MultiWOZTrainer from .model.generator import Generator from .model.model_base import ModelBase diff --git a/modelscope/trainers/nlp/space/metrics/__init__.py b/modelscope/models/nlp/space/metrics/__init__.py similarity index 100% rename from modelscope/trainers/nlp/space/metrics/__init__.py rename to modelscope/models/nlp/space/metrics/__init__.py diff --git a/modelscope/trainers/nlp/space/metrics/metrics_tracker.py b/modelscope/models/nlp/space/metrics/metrics_tracker.py similarity index 100% rename from modelscope/trainers/nlp/space/metrics/metrics_tracker.py rename to modelscope/models/nlp/space/metrics/metrics_tracker.py diff --git a/modelscope/trainers/nlp/space/trainers/__init__.py b/modelscope/trainers/nlp/space/trainers/__init__.py deleted file mode 100644 index e69de29b..00000000