From 6991620f59c20ac386a815eb6d842adde3cedd07 Mon Sep 17 00:00:00 2001
From: "mulin.lyh" <mulin.lyh@taobao.com>
Date: Fri, 24 Jun 2022 16:43:32 +0800
Subject: [PATCH 1/5] [to #42698276]fix: git repo operations supports, gitlab
 token certification support.

---
 modelscope/hub/api.py                    |  38 ++--
 modelscope/hub/errors.py                 |   4 +
 modelscope/hub/git.py                    | 225 +++++++++++++++--------
 modelscope/hub/repository.py             | 216 +++++++---------------
 modelscope/hub/utils/_subprocess.py      |  40 ----
 tests/hub/test_hub_operation.py          |  94 ++--------
 tests/hub/test_hub_private_repository.py |  76 ++++++++
 tests/hub/test_hub_repository.py         | 107 +++++++++++
 8 files changed, 444 insertions(+), 356 deletions(-)
 delete mode 100644 modelscope/hub/utils/_subprocess.py
 create mode 100644 tests/hub/test_hub_private_repository.py
 create mode 100644 tests/hub/test_hub_repository.py
diff --git a/modelscope/hub/api.py b/modelscope/hub/api.py
index 104eafbd..f4f31280 100644
--- a/modelscope/hub/api.py
+++ b/modelscope/hub/api.py
@@ -9,9 +9,10 @@ from typing import List, Optional, Tuple, Union
 import requests
 
 from modelscope.utils.logger import get_logger
-from .constants import LOGGER_NAME
+from .constants import MODELSCOPE_URL_SCHEME
 from .errors import NotExistError, is_ok, raise_on_error
-from .utils.utils import get_endpoint, model_id_to_group_owner_name
+from .utils.utils import (get_endpoint, get_gitlab_domain,
+                          model_id_to_group_owner_name)
 
 logger = get_logger()
 
@@ -40,9 +41,6 @@ class HubApi:
         <Tip>
             You only have to login once within 30 days.
         </Tip>
-
-        TODO: handle cookies expire
-
         """
         path = f'{self.endpoint}/api/v1/login'
         r = requests.post(
@@ -94,14 +92,14 @@ class HubApi:
                 'Path': owner_or_group,
                 'Name': name,
                 'ChineseName': chinese_name,
-                'Visibility': visibility,
+                'Visibility': visibility,  # server check
                 'License': license
             },
             cookies=cookies)
         r.raise_for_status()
         raise_on_error(r.json())
-        d = r.json()
-        return d['Data']['Name']
+        model_repo_url = f'{MODELSCOPE_URL_SCHEME}{get_gitlab_domain()}/{model_id}'
+        return model_repo_url
 
     def delete_model(self, model_id):
         """_summary_
@@ -209,25 +207,37 @@ class HubApi:
 
 class ModelScopeConfig:
     path_credential = expanduser('~/.modelscope/credentials')
-    os.makedirs(path_credential, exist_ok=True)
+
+    @classmethod
+    def make_sure_credential_path_exist(cls):
+        os.makedirs(cls.path_credential, exist_ok=True)
 
     @classmethod
     def save_cookies(cls, cookies: CookieJar):
+        cls.make_sure_credential_path_exist()
         with open(os.path.join(cls.path_credential, 'cookies'), 'wb+') as f:
             pickle.dump(cookies, f)
 
     @classmethod
     def get_cookies(cls):
         try:
-            with open(os.path.join(cls.path_credential, 'cookies'), 'rb') as f:
-                return pickle.load(f)
+            cookies_path = os.path.join(cls.path_credential, 'cookies')
+            with open(cookies_path, 'rb') as f:
+                cookies = pickle.load(f)
+                for cookie in cookies:
+                    if cookie.is_expired():
+                        logger.warn('Auth is expored, please re-login')
+                        return None
+                return cookies
         except FileNotFoundError:
-            logger.warn("Auth token does not exist, you'll get authentication \
-                error when downloading private model files. Please login first"
-                        )
+            logger.warn(
+                "Auth token does not exist, you'll get authentication error when downloading \
+                private model files. Please login first")
+        return None
 
     @classmethod
     def save_token(cls, token: str):
+        cls.make_sure_credential_path_exist()
         with open(os.path.join(cls.path_credential, 'token'), 'w+') as f:
             f.write(token)
 
diff --git a/modelscope/hub/errors.py b/modelscope/hub/errors.py
index 13ea709f..4b39d6e3 100644
--- a/modelscope/hub/errors.py
+++ b/modelscope/hub/errors.py
@@ -6,6 +6,10 @@ class RequestError(Exception):
     pass
 
 
+class GitError(Exception):
+    pass
+
+
 def is_ok(rsp):
     """ Check the request is ok
 
diff --git a/modelscope/hub/git.py b/modelscope/hub/git.py
index 5f079105..37f61814 100644
--- a/modelscope/hub/git.py
+++ b/modelscope/hub/git.py
@@ -1,82 +1,161 @@
-from threading import local
-from tkinter.messagebox import NO
-from typing import Union
+import subprocess
+from typing import List
+from xmlrpc.client import Boolean
 
 from modelscope.utils.logger import get_logger
-from .constants import LOGGER_NAME
-from .utils._subprocess import run_subprocess
+from .errors import GitError
 
-logger = get_logger
+logger = get_logger()
 
 
-def git_clone(
-    local_dir: str,
-    repo_url: str,
-):
-    # TODO: use "git clone" or "git lfs clone" according to git version
-    # TODO: print stderr when subprocess fails
-    run_subprocess(
-        f'git clone {repo_url}'.split(),
-        local_dir,
-        True,
-    )
+class Singleton(type):
+    _instances = {}
 
+    def __call__(cls, *args, **kwargs):
+        if cls not in cls._instances:
+            cls._instances[cls] = super(Singleton,
+                                        cls).__call__(*args, **kwargs)
+        return cls._instances[cls]
 
-def git_checkout(
-    local_dir: str,
-    revsion: str,
-):
-    run_subprocess(f'git checkout {revsion}'.split(), local_dir)
 
-
-def git_add(local_dir: str, ):
-    run_subprocess(
-        'git add .'.split(),
-        local_dir,
-        True,
-    )
-
-
-def git_commit(local_dir: str, commit_message: str):
-    run_subprocess(
-        'git commit -v -m'.split() + [commit_message],
-        local_dir,
-        True,
-    )
-
-
-def git_push(local_dir: str, branch: str):
-    # check current branch
-    cur_branch = git_current_branch(local_dir)
-    if cur_branch != branch:
-        logger.error(
-            "You're trying to push to a different branch, please double check")
-        return
-
-    run_subprocess(
-        f'git push origin {branch}'.split(),
-        local_dir,
-        True,
-    )
-
-
-def git_current_branch(local_dir: str) -> Union[str, None]:
-    """
-    Get current branch name
-
-    Args:
-        local_dir(`str`): local model repo directory
-
-    Returns
-        branch name you're currently on
+class GitCommandWrapper(metaclass=Singleton):
+    """Some git operation wrapper
     """
-    try:
-        process = run_subprocess(
-            'git rev-parse --abbrev-ref HEAD'.split(),
-            local_dir,
-            True,
-        )
-
-        return str(process.stdout).strip()
-    except Exception as e:
-        raise e
+    default_git_path = 'git'  # The default git command line
+
+    def __init__(self, path: str = None):
+        self.git_path = path or self.default_git_path
+
+    def _run_git_command(self, *args) -> subprocess.CompletedProcess:
+        """Run git command, if command return 0, return subprocess.response
+             otherwise raise GitError, message is stdout and stderr.
+
+        Raises:
+            GitError: Exception with stdout and stderr.
+
+        Returns:
+            subprocess.CompletedProcess: the command response
+        """
+        logger.info(' '.join(args))
+        response = subprocess.run(
+            [self.git_path, *args],
+            stdout=subprocess.PIPE,
+            stderr=subprocess.PIPE)  # compatible for python3.6
+        try:
+            response.check_returncode()
+            return response
+        except subprocess.CalledProcessError as error:
+            raise GitError(
+                'stdout: %s, stderr: %s' %
+                (response.stdout.decode('utf8'), error.stderr.decode('utf8')))
+
+    def _add_token(self, token: str, url: str):
+        if token:
+            if '//oauth2' not in url:
+                url = url.replace('//', '//oauth2:%s@' % token)
+        return url
+
+    def remove_token_from_url(self, url: str):
+        if url and '//oauth2' in url:
+            start_index = url.find('oauth2')
+            end_index = url.find('@')
+            url = url[:start_index] + url[end_index + 1:]
+        return url
+
+    def is_lfs_installed(self):
+        cmd = ['lfs', 'env']
+        try:
+            self._run_git_command(*cmd)
+            return True
+        except GitError:
+            return False
+
+    def clone(self,
+              repo_base_dir: str,
+              token: str,
+              url: str,
+              repo_name: str,
+              branch: str = None):
+        """ git clone command wrapper.
+        For public project, token can None, private repo, there must token.
+
+        Args:
+            repo_base_dir (str): The local base dir, the repository will be clone to local_dir/repo_name
+            token (str): The git token, must be provided for private project.
+            url (str): The remote url
+            repo_name (str): The local repository path name.
+            branch (str, optional): _description_. Defaults to None.
+        """
+        url = self._add_token(token, url)
+        if branch:
+            clone_args = '-C %s clone %s %s --branch %s' % (repo_base_dir, url,
+                                                            repo_name, branch)
+        else:
+            clone_args = '-C %s clone %s' % (repo_base_dir, url)
+        logger.debug(clone_args)
+        clone_args = clone_args.split(' ')
+        response = self._run_git_command(*clone_args)
+        logger.info(response.stdout.decode('utf8'))
+        return response
+
+    def add(self,
+            repo_dir: str,
+            files: List[str] = list(),
+            all_files: bool = False):
+        if all_files:
+            add_args = '-C %s add -A' % repo_dir
+        elif len(files) > 0:
+            files_str = ' '.join(files)
+            add_args = '-C %s add %s' % (repo_dir, files_str)
+        add_args = add_args.split(' ')
+        rsp = self._run_git_command(*add_args)
+        logger.info(rsp.stdout.decode('utf8'))
+        return rsp
+
+    def commit(self, repo_dir: str, message: str):
+        """Run git commit command
+
+        Args:
+            message (str): commit message.
+        """
+        commit_args = ['-C', '%s' % repo_dir, 'commit', '-m', "'%s'" % message]
+        rsp = self._run_git_command(*commit_args)
+        logger.info(rsp.stdout.decode('utf8'))
+        return rsp
+
+    def checkout(self, repo_dir: str, revision: str):
+        cmds = ['-C', '%s' % repo_dir, 'checkout', '%s' % revision]
+        return self._run_git_command(*cmds)
+
+    def new_branch(self, repo_dir: str, revision: str):
+        cmds = ['-C', '%s' % repo_dir, 'checkout', '-b', revision]
+        return self._run_git_command(*cmds)
+
+    def pull(self, repo_dir: str):
+        cmds = ['-C', repo_dir, 'pull']
+        return self._run_git_command(*cmds)
+
+    def push(self,
+             repo_dir: str,
+             token: str,
+             url: str,
+             local_branch: str,
+             remote_branch: str,
+             force: bool = False):
+        url = self._add_token(token, url)
+
+        push_args = '-C %s push %s %s:%s' % (repo_dir, url, local_branch,
+                                             remote_branch)
+        if force:
+            push_args += ' -f'
+        push_args = push_args.split(' ')
+        rsp = self._run_git_command(*push_args)
+        logger.info(rsp.stdout.decode('utf8'))
+        return rsp
+
+    def get_repo_remote_url(self, repo_dir: str):
+        cmd_args = '-C %s config --get remote.origin.url' % repo_dir
+        cmd_args = cmd_args.split(' ')
+        rsp = self._run_git_command(*cmd_args)
+        url = rsp.stdout.decode('utf8')
+        return url.strip()
diff --git a/modelscope/hub/repository.py b/modelscope/hub/repository.py
index 6367f903..d9322144 100644
--- a/modelscope/hub/repository.py
+++ b/modelscope/hub/repository.py
@@ -1,173 +1,97 @@
 import os
-import subprocess
-from pathlib import Path
-from typing import Optional, Union
+from typing import List, Optional
 
+from modelscope.hub.errors import GitError
 from modelscope.utils.logger import get_logger
 from .api import ModelScopeConfig
 from .constants import MODELSCOPE_URL_SCHEME
-from .git import git_add, git_checkout, git_clone, git_commit, git_push
-from .utils._subprocess import run_subprocess
+from .git import GitCommandWrapper
 from .utils.utils import get_gitlab_domain
 
 logger = get_logger()
 
 
 class Repository:
+    """Representation local model git repository.
+    """
 
     def __init__(
         self,
-        local_dir: str,
-        clone_from: Optional[str] = None,
-        auth_token: Optional[str] = None,
-        private: Optional[bool] = False,
+        model_dir: str,
+        clone_from: str,
         revision: Optional[str] = 'master',
+        auth_token: Optional[str] = None,
+        git_path: Optional[str] = None,
     ):
         """
         Instantiate a Repository object by cloning the remote ModelScopeHub repo
         Args:
-            local_dir(`str`):
-                local directory to store the model files
-            clone_from(`Optional[str] = None`):
+            model_dir(`str`):
+                The model root directory.
+            clone_from:
                 model id in ModelScope-hub from which git clone
-                You should ignore this parameter when `local_dir` is already a git repo
-            auth_token(`Optional[str]`):
-                token obtained when calling `HubApi.login()`. Usually you can safely ignore the parameter
-                as the token is already saved when you login the first time
-            private(`Optional[bool]`):
-                whether the model is private, default to False
             revision(`Optional[str]`):
                 revision of the model you want to clone from. Can be any of a branch, tag or commit hash
+            auth_token(`Optional[str]`):
+                token obtained when calling `HubApi.login()`. Usually you can safely ignore the parameter
+                as the token is already saved when you login the first time, if None, we will use saved token.
+            git_path:(`Optional[str]`):
+                The git command line path, if None, we use 'git'
         """
-        logger.info('Instantiating Repository object...')
-
-        # Create local directory if not exist
-        os.makedirs(local_dir, exist_ok=True)
-        self.local_dir = os.path.join(os.getcwd(), local_dir)
-
-        self.private = private
-
-        # Check git and git-lfs installation
-        self.check_git_versions()
-
-        # Retrieve auth token
-        if not private and isinstance(auth_token, str):
-            logger.warning(
-                'cloning a public repo with a token, which will be ignored')
-            self.token = None
+        self.model_dir = model_dir
+        self.model_base_dir = os.path.dirname(model_dir)
+        self.model_repo_name = os.path.basename(model_dir)
+        if auth_token:
+            self.auth_token = auth_token
         else:
-            if isinstance(auth_token, str):
-                self.token = auth_token
-            else:
-                self.token = ModelScopeConfig.get_token()
-
-            if self.token is None:
-                raise EnvironmentError(
-                    'Token does not exist, the clone will fail for private repo.'
-                    'Please login first.')
-
-        # git clone
-        if clone_from is not None:
-            self.model_id = clone_from
-            logger.info('cloning model repo to %s ...', self.local_dir)
-            git_clone(self.local_dir, self.get_repo_url())
-        else:
-            if is_git_repo(self.local_dir):
-                logger.debug('[Repository] is a valid git repo')
-            else:
-                raise ValueError(
-                    'If not specifying `clone_from`, you need to pass Repository a'
-                    ' valid git clone.')
-
-        # git checkout
-        if isinstance(revision, str) and revision != 'master':
-            git_checkout(revision)
-
-    def push_to_hub(self,
-                    commit_message: str,
-                    revision: Optional[str] = 'master'):
-        """
-        Push changes changes to hub
-
-        Args:
-            commit_message(`str`):
-                commit message describing the changes, it's mandatory
-            revision(`Optional[str]`):
-                remote branch you want to push to, default to `master`
-
-        <Tip>
-            The function complains when local and remote branch are different, please be careful
-        </Tip>
-
-        """
-        git_add(self.local_dir)
-        git_commit(self.local_dir, commit_message)
-
-        logger.info('Pushing changes to repo...')
-        git_push(self.local_dir, revision)
-
-        # TODO: if git push fails, how to retry?
-
-    def check_git_versions(self):
-        """
-        Checks that `git` and `git-lfs` can be run.
-
-        Raises:
-            `EnvironmentError`: if `git` or `git-lfs` are not installed.
-        """
-        try:
-            git_version = run_subprocess('git --version'.split(),
-                                         self.local_dir).stdout.strip()
-        except FileNotFoundError:
-            raise EnvironmentError(
-                'Looks like you do not have git installed, please install.')
+            self.auth_token = ModelScopeConfig.get_token()
+
+        git_wrapper = GitCommandWrapper()
+        if not git_wrapper.is_lfs_installed():
+            logger.error('git lfs is not installed, please install.')
+
+        self.git_wrapper = GitCommandWrapper(git_path)
+        os.makedirs(self.model_dir, exist_ok=True)
+        url = self._get_model_id_url(clone_from)
+        if os.listdir(self.model_dir):  # directory not empty.
+            remote_url = self._get_remote_url()
+            remote_url = self.git_wrapper.remove_token_from_url(remote_url)
+            if remote_url and remote_url == url:  # need not clone again
+                return
+        self.git_wrapper.clone(self.model_base_dir, self.auth_token, url,
+                               self.model_repo_name, revision)
+
+    def _get_model_id_url(self, model_id):
+        url = f'{MODELSCOPE_URL_SCHEME}{get_gitlab_domain()}/{model_id}'
+        return url
 
+    def _get_remote_url(self):
         try:
-            lfs_version = run_subprocess('git-lfs --version'.split(),
-                                         self.local_dir).stdout.strip()
-        except FileNotFoundError:
-            raise EnvironmentError(
-                'Looks like you do not have git-lfs installed, please install.'
-                ' You can install from https://git-lfs.github.com/.'
-                ' Then run `git lfs install` (you only have to do this once).')
-        logger.info(git_version + '\n' + lfs_version)
-
-    def get_repo_url(self) -> str:
-        """
-        Get repo url to clone, according whether the repo is private or not
+            remote = self.git_wrapper.get_repo_remote_url(self.model_dir)
+        except GitError:
+            remote = None
+        return remote
+
+    def push(self,
+             commit_message: str,
+             files: List[str] = list(),
+             all_files: bool = False,
+             branch: Optional[str] = 'master',
+             force: bool = False):
+        """Push local to remote, this method will do.
+           git add
+           git commit
+           git push
+        Args:
+            commit_message (str): commit message
+            revision (Optional[str], optional): which branch to push. Defaults to 'master'.
         """
-        url = None
-
-        if self.private:
-            url = f'{MODELSCOPE_URL_SCHEME}oauth2:{self.token}@{get_gitlab_domain()}/{self.model_id}'
-        else:
-            url = f'{MODELSCOPE_URL_SCHEME}{get_gitlab_domain()}/{self.model_id}'
-
-        if not url:
-            raise ValueError(
-                'Empty repo url, please check clone_from parameter')
-
-        logger.debug('url to clone: %s', str(url))
-
-        return url
-
-
-def is_git_repo(folder: Union[str, Path]) -> bool:
-    """
-    Check if the folder is the root or part of a git repository
-
-    Args:
-        folder (`str`):
-            The folder in which to run the command.
-
-    Returns:
-        `bool`: `True` if the repository is part of a repository, `False`
-        otherwise.
-    """
-    folder_exists = os.path.exists(os.path.join(folder, '.git'))
-    git_branch = subprocess.run(
-        'git branch'.split(),
-        cwd=folder,
-        stdout=subprocess.PIPE,
-        stderr=subprocess.PIPE)
-    return folder_exists and git_branch.returncode == 0
+        url = self.git_wrapper.get_repo_remote_url(self.model_dir)
+        self.git_wrapper.add(self.model_dir, files, all_files)
+        self.git_wrapper.commit(self.model_dir, commit_message)
+        self.git_wrapper.push(
+            repo_dir=self.model_dir,
+            token=self.auth_token,
+            url=url,
+            local_branch=branch,
+            remote_branch=branch)
diff --git a/modelscope/hub/utils/_subprocess.py b/modelscope/hub/utils/_subprocess.py
deleted file mode 100644
index 77e9fc48..00000000
--- a/modelscope/hub/utils/_subprocess.py
+++ /dev/null
@@ -1,40 +0,0 @@
-import subprocess
-from typing import List
-
-
-def run_subprocess(command: List[str],
-                   folder: str,
-                   check=True,
-                   **kwargs) -> subprocess.CompletedProcess:
-    """
-    Method to run subprocesses. Calling this will capture the `stderr` and `stdout`,
-    please call `subprocess.run` manually in case you would like for them not to
-    be captured.
-
-    Args:
-        command (`List[str]`):
-            The command to execute as a list of strings.
-        folder (`str`):
-            The folder in which to run the command.
-        check (`bool`, *optional*, defaults to `True`):
-            Setting `check` to `True` will raise a `subprocess.CalledProcessError`
-            when the subprocess has a non-zero exit code.
-        kwargs (`Dict[str]`):
-            Keyword arguments to be passed to the `subprocess.run` underlying command.
-
-    Returns:
-        `subprocess.CompletedProcess`: The completed process.
-    """
-    if isinstance(command, str):
-        raise ValueError(
-            '`run_subprocess` should be called with a list of strings.')
-
-    return subprocess.run(
-        command,
-        stderr=subprocess.PIPE,
-        stdout=subprocess.PIPE,
-        check=check,
-        encoding='utf-8',
-        cwd=folder,
-        **kwargs,
-    )
diff --git a/tests/hub/test_hub_operation.py b/tests/hub/test_hub_operation.py
index d44cd7c1..e0adc013 100644
--- a/tests/hub/test_hub_operation.py
+++ b/tests/hub/test_hub_operation.py
@@ -1,14 +1,13 @@
 # Copyright (c) Alibaba, Inc. and its affiliates.
 import os
-import subprocess
 import tempfile
 import unittest
 import uuid
 
-from modelscope.hub.api import HubApi, ModelScopeConfig
+from modelscope.hub.api import HubApi
 from modelscope.hub.file_download import model_file_download
+from modelscope.hub.repository import Repository
 from modelscope.hub.snapshot_download import snapshot_download
-from modelscope.hub.utils.utils import get_gitlab_domain
 
 USER_NAME = 'maasadmin'
 PASSWORD = '12345678'
@@ -17,40 +16,7 @@ model_chinese_name = '达摩卡通化模型'
 model_org = 'unittest'
 DEFAULT_GIT_PATH = 'git'
 
-
-class GitError(Exception):
-    pass
-
-
-# TODO make thest git operation to git library after merge code.
-def run_git_command(git_path, *args) -> subprocess.CompletedProcess:
-    response = subprocess.run([git_path, *args], capture_output=True)
-    try:
-        response.check_returncode()
-        return response.stdout.decode('utf8')
-    except subprocess.CalledProcessError as error:
-        raise GitError(error.stderr.decode('utf8'))
-
-
-# for public project, token can None, private repo, there must token.
-def clone(local_dir: str, token: str, url: str):
-    url = url.replace('//', '//oauth2:%s@' % token)
-    clone_args = '-C %s clone %s' % (local_dir, url)
-    clone_args = clone_args.split(' ')
-    stdout = run_git_command(DEFAULT_GIT_PATH, *clone_args)
-    print('stdout: %s' % stdout)
-
-
-def push(local_dir: str, token: str, url: str):
-    url = url.replace('//', '//oauth2:%s@' % token)
-    push_args = '-C %s push %s' % (local_dir, url)
-    push_args = push_args.split(' ')
-    stdout = run_git_command(DEFAULT_GIT_PATH, *push_args)
-    print('stdout: %s' % stdout)
-
-
-sample_model_url = 'https://mindscope.oss-cn-hangzhou.aliyuncs.com/test_models/mnist-12.onnx'
-download_model_file_name = 'mnist-12.onnx'
+download_model_file_name = 'test.bin'
 
 
 class HubOperationTest(unittest.TestCase):
@@ -67,6 +33,13 @@ class HubOperationTest(unittest.TestCase):
             chinese_name=model_chinese_name,
             visibility=5,  # 1-private, 5-public
             license='apache-2.0')
+        temporary_dir = tempfile.mkdtemp()
+        self.model_dir = os.path.join(temporary_dir, self.model_name)
+        repo = Repository(self.model_dir, clone_from=self.model_id)
+        os.chdir(self.model_dir)
+        os.system("echo 'testtest'>%s"
+                  % os.path.join(self.model_dir, 'test.bin'))
+        repo.push('add model', all_files=True)
 
     def tearDown(self):
         os.chdir(self.old_cwd)
@@ -83,43 +56,10 @@ class HubOperationTest(unittest.TestCase):
             else:
                 raise
 
-    # Note that this can be done via git operation once model repo
-    # has been created. Git-Op is the RECOMMENDED model upload approach
-    def test_model_upload(self):
-        url = f'http://{get_gitlab_domain()}/{self.model_id}'
-        print(url)
-        temporary_dir = tempfile.mkdtemp()
-        os.chdir(temporary_dir)
-        cmd_args = 'clone %s' % url
-        cmd_args = cmd_args.split(' ')
-        out = run_git_command('git', *cmd_args)
-        print(out)
-        repo_dir = os.path.join(temporary_dir, self.model_name)
-        os.chdir(repo_dir)
-        os.system('touch file1')
-        os.system('git add file1')
-        os.system("git commit -m 'Test'")
-        token = ModelScopeConfig.get_token()
-        push(repo_dir, token, url)
-
     def test_download_single_file(self):
-        url = f'http://{get_gitlab_domain()}/{self.model_id}'
-        print(url)
-        temporary_dir = tempfile.mkdtemp()
-        os.chdir(temporary_dir)
-        os.system('git clone %s' % url)
-        repo_dir = os.path.join(temporary_dir, self.model_name)
-        os.chdir(repo_dir)
-        os.system('wget %s' % sample_model_url)
-        os.system('git add .')
-        os.system("git commit -m 'Add file'")
-        token = ModelScopeConfig.get_token()
-        push(repo_dir, token, url)
-        assert os.path.exists(
-            os.path.join(temporary_dir, self.model_name,
-                         download_model_file_name))
         downloaded_file = model_file_download(
             model_id=self.model_id, file_path=download_model_file_name)
+        assert os.path.exists(downloaded_file)
         mdtime1 = os.path.getmtime(downloaded_file)
         # download again
         downloaded_file = model_file_download(
@@ -128,18 +68,6 @@ class HubOperationTest(unittest.TestCase):
         assert mdtime1 == mdtime2
 
     def test_snapshot_download(self):
-        url = f'http://{get_gitlab_domain()}/{self.model_id}'
-        print(url)
-        temporary_dir = tempfile.mkdtemp()
-        os.chdir(temporary_dir)
-        os.system('git clone %s' % url)
-        repo_dir = os.path.join(temporary_dir, self.model_name)
-        os.chdir(repo_dir)
-        os.system('wget %s' % sample_model_url)
-        os.system('git add .')
-        os.system("git commit -m 'Add file'")
-        token = ModelScopeConfig.get_token()
-        push(repo_dir, token, url)
         snapshot_path = snapshot_download(model_id=self.model_id)
         downloaded_file_path = os.path.join(snapshot_path,
                                             download_model_file_name)
diff --git a/tests/hub/test_hub_private_repository.py b/tests/hub/test_hub_private_repository.py
new file mode 100644
index 00000000..b6e3536c
--- /dev/null
+++ b/tests/hub/test_hub_private_repository.py
@@ -0,0 +1,76 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import os
+import tempfile
+import unittest
+import uuid
+
+from modelscope.hub.api import HubApi
+from modelscope.hub.errors import GitError
+from modelscope.hub.repository import Repository
+
+USER_NAME = 'maasadmin'
+PASSWORD = '12345678'
+
+USER_NAME2 = 'sdkdev'
+model_chinese_name = '达摩卡通化模型'
+model_org = 'unittest'
+DEFAULT_GIT_PATH = 'git'
+
+sample_model_url = 'https://mindscope.oss-cn-hangzhou.aliyuncs.com/test_models/mnist-12.onnx'
+download_model_file_name = 'mnist-12.onnx'
+
+
+class HubPrivateRepositoryTest(unittest.TestCase):
+
+    def setUp(self):
+        self.old_cwd = os.getcwd()
+        self.api = HubApi()
+        # note this is temporary before official account management is ready
+        self.token, _ = self.api.login(USER_NAME, PASSWORD)
+        self.model_name = uuid.uuid4().hex
+        self.model_id = '%s/%s' % (model_org, self.model_name)
+        self.api.create_model(
+            model_id=self.model_id,
+            chinese_name=model_chinese_name,
+            visibility=1,  # 1-private, 5-public
+            license='apache-2.0')
+
+    def tearDown(self):
+        self.api.login(USER_NAME, PASSWORD)
+        os.chdir(self.old_cwd)
+        self.api.delete_model(model_id=self.model_id)
+
+    def test_clone_private_repo_no_permission(self):
+        token, _ = self.api.login(USER_NAME2, PASSWORD)
+        temporary_dir = tempfile.mkdtemp()
+        local_dir = os.path.join(temporary_dir, self.model_name)
+        with self.assertRaises(GitError) as cm:
+            Repository(local_dir, clone_from=self.model_id, auth_token=token)
+
+        print(cm.exception)
+        assert not os.path.exists(os.path.join(local_dir, 'README.md'))
+
+    def test_clone_private_repo_has_permission(self):
+        temporary_dir = tempfile.mkdtemp()
+        local_dir = os.path.join(temporary_dir, self.model_name)
+        repo1 = Repository(
+            local_dir, clone_from=self.model_id, auth_token=self.token)
+        print(repo1.model_dir)
+        assert os.path.exists(os.path.join(local_dir, 'README.md'))
+
+    def test_initlize_repo_multiple_times(self):
+        temporary_dir = tempfile.mkdtemp()
+        local_dir = os.path.join(temporary_dir, self.model_name)
+        repo1 = Repository(
+            local_dir, clone_from=self.model_id, auth_token=self.token)
+        print(repo1.model_dir)
+        assert os.path.exists(os.path.join(local_dir, 'README.md'))
+        repo2 = Repository(
+            local_dir, clone_from=self.model_id,
+            auth_token=self.token)  # skip clone
+        print(repo2.model_dir)
+        assert repo1.model_dir == repo2.model_dir
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/tests/hub/test_hub_repository.py b/tests/hub/test_hub_repository.py
new file mode 100644
index 00000000..7b1cc751
--- /dev/null
+++ b/tests/hub/test_hub_repository.py
@@ -0,0 +1,107 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import os
+import shutil
+import tempfile
+import time
+import unittest
+import uuid
+from os.path import expanduser
+
+from requests import delete
+
+from modelscope.hub.api import HubApi
+from modelscope.hub.errors import NotExistError
+from modelscope.hub.file_download import model_file_download
+from modelscope.hub.repository import Repository
+from modelscope.utils.logger import get_logger
+
+logger = get_logger()
+logger.setLevel('DEBUG')
+USER_NAME = 'maasadmin'
+PASSWORD = '12345678'
+
+model_chinese_name = '达摩卡通化模型'
+model_org = 'unittest'
+DEFAULT_GIT_PATH = 'git'
+
+download_model_file_name = 'mnist-12.onnx'
+
+
+def delete_credential():
+    path_credential = expanduser('~/.modelscope/credentials')
+    shutil.rmtree(path_credential)
+
+
+def delete_stored_git_credential(user):
+    credential_path = expanduser('~/.git-credentials')
+    if os.path.exists(credential_path):
+        with open(credential_path, 'r+') as f:
+            lines = f.readlines()
+            for line in lines:
+                if user in line:
+                    lines.remove(line)
+            f.seek(0)
+            f.write(''.join(lines))
+            f.truncate()
+
+
+class HubRepositoryTest(unittest.TestCase):
+
+    def setUp(self):
+        self.api = HubApi()
+        # note this is temporary before official account management is ready
+        self.api.login(USER_NAME, PASSWORD)
+        self.model_name = uuid.uuid4().hex
+        self.model_id = '%s/%s' % (model_org, self.model_name)
+        self.api.create_model(
+            model_id=self.model_id,
+            chinese_name=model_chinese_name,
+            visibility=5,  # 1-private, 5-public
+            license='apache-2.0')
+        temporary_dir = tempfile.mkdtemp()
+        self.model_dir = os.path.join(temporary_dir, self.model_name)
+
+    def tearDown(self):
+        self.api.delete_model(model_id=self.model_id)
+
+    def test_clone_repo(self):
+        Repository(self.model_dir, clone_from=self.model_id)
+        assert os.path.exists(os.path.join(self.model_dir, 'README.md'))
+
+    def test_clone_public_model_without_token(self):
+        delete_credential()
+        delete_stored_git_credential(USER_NAME)
+        Repository(self.model_dir, clone_from=self.model_id)
+        assert os.path.exists(os.path.join(self.model_dir, 'README.md'))
+        self.api.login(USER_NAME, PASSWORD)  # re-login for delete
+
+    def test_push_all(self):
+        repo = Repository(self.model_dir, clone_from=self.model_id)
+        assert os.path.exists(os.path.join(self.model_dir, 'README.md'))
+        os.chdir(self.model_dir)
+        os.system("echo '111'>%s" % os.path.join(self.model_dir, 'add1.py'))
+        os.system("echo '222'>%s" % os.path.join(self.model_dir, 'add2.py'))
+        repo.push('test', all_files=True)
+        add1 = model_file_download(self.model_id, 'add1.py')
+        assert os.path.exists(add1)
+        add2 = model_file_download(self.model_id, 'add2.py')
+        assert os.path.exists(add2)
+
+    def test_push_files(self):
+        repo = Repository(self.model_dir, clone_from=self.model_id)
+        assert os.path.exists(os.path.join(self.model_dir, 'README.md'))
+        os.system("echo '111'>%s" % os.path.join(self.model_dir, 'add1.py'))
+        os.system("echo '222'>%s" % os.path.join(self.model_dir, 'add2.py'))
+        os.system("echo '333'>%s" % os.path.join(self.model_dir, 'add3.py'))
+        repo.push('test', files=['add1.py', 'add2.py'], all_files=False)
+        add1 = model_file_download(self.model_id, 'add1.py')
+        assert os.path.exists(add1)
+        add2 = model_file_download(self.model_id, 'add2.py')
+        assert os.path.exists(add2)
+        with self.assertRaises(NotExistError) as cm:
+            model_file_download(self.model_id, 'add3.py')
+        print(cm.exception)
+
+
+if __name__ == '__main__':
+    unittest.main()

From 0acbfe166314749e34f84402184d1827880ae008 Mon Sep 17 00:00:00 2001
From: "yingda.chen" <yingda.chen@alibaba-inc.com>
Date: Fri, 24 Jun 2022 23:54:10 +0800
Subject: [PATCH 2/5] [to #42322933] interface refine with doc         Link:
 https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/9159678

---
 modelscope/hub/api.py           |  1 -
 modelscope/hub/constants.py     | 13 +++++++++++++
 modelscope/models/base.py       | 13 +++++++++----
 modelscope/utils/hub.py         |  5 +++--
 tests/hub/test_hub_examples.py  |  8 +++-----
 tests/hub/test_hub_operation.py |  7 ++++---
 6 files changed, 32 insertions(+), 15 deletions(-)

diff --git a/modelscope/hub/api.py b/modelscope/hub/api.py
index f4f31280..d102219b 100644
--- a/modelscope/hub/api.py
+++ b/modelscope/hub/api.py
@@ -1,4 +1,3 @@
-import imp
 import os
 import pickle
 import subprocess
diff --git a/modelscope/hub/constants.py b/modelscope/hub/constants.py
index a38f9afb..08f7c31d 100644
--- a/modelscope/hub/constants.py
+++ b/modelscope/hub/constants.py
@@ -6,3 +6,16 @@ DEFAULT_MODELSCOPE_GROUP = 'damo'
 MODEL_ID_SEPARATOR = '/'
 
 LOGGER_NAME = 'ModelScopeHub'
+
+
+class Licenses(object):
+    APACHE_V2 = 'Apache License 2.0'
+    GPL = 'GPL'
+    LGPL = 'LGPL'
+    MIT = 'MIT'
+
+
+class ModelVisibility(object):
+    PRIVATE = 1
+    INTERNAL = 3
+    PUBLIC = 5
diff --git a/modelscope/models/base.py b/modelscope/models/base.py
index cb6d2b0e..40929a21 100644
--- a/modelscope/models/base.py
+++ b/modelscope/models/base.py
@@ -2,7 +2,7 @@
 
 import os.path as osp
 from abc import ABC, abstractmethod
-from typing import Dict, Union
+from typing import Dict, Optional, Union
 
 from modelscope.hub.snapshot_download import snapshot_download
 from modelscope.models.builder import build_model
@@ -42,13 +42,18 @@ class Model(ABC):
         return input
 
     @classmethod
-    def from_pretrained(cls, model_name_or_path: str, *model_args, **kwargs):
-        """ Instantiate a model from local directory or remote model repo
+    def from_pretrained(cls,
+                        model_name_or_path: str,
+                        revision: Optional[str] = 'master',
+                        *model_args,
+                        **kwargs):
+        """ Instantiate a model from local directory or remote model repo. Note
+        that when loading from remote, the model revision can be specified.
         """
         if osp.exists(model_name_or_path):
             local_model_dir = model_name_or_path
         else:
-            local_model_dir = snapshot_download(model_name_or_path)
+            local_model_dir = snapshot_download(model_name_or_path, revision)
         logger.info(f'initialize model from {local_model_dir}')
         cfg = Config.from_file(
             osp.join(local_model_dir, ModelFile.CONFIGURATION))
diff --git a/modelscope/utils/hub.py b/modelscope/utils/hub.py
index 868e751b..c427b7a3 100644
--- a/modelscope/utils/hub.py
+++ b/modelscope/utils/hub.py
@@ -6,6 +6,7 @@ from typing import List, Optional, Union
 
 from requests import HTTPError
 
+from modelscope.hub.constants import Licenses, ModelVisibility
 from modelscope.hub.file_download import model_file_download
 from modelscope.hub.snapshot_download import snapshot_download
 from modelscope.utils.config import Config
@@ -16,8 +17,8 @@ def create_model_if_not_exist(
         api,
         model_id: str,
         chinese_name: str,
-        visibility: Optional[int] = 5,  # 1-private, 5-public
-        license: Optional[str] = 'apache-2.0',
+        visibility: Optional[int] = ModelVisibility.PUBLIC,
+        license: Optional[str] = Licenses.APACHE_V2,
         revision: Optional[str] = 'master'):
     exists = True
     try:
diff --git a/tests/hub/test_hub_examples.py b/tests/hub/test_hub_examples.py
index b63445af..b21cae51 100644
--- a/tests/hub/test_hub_examples.py
+++ b/tests/hub/test_hub_examples.py
@@ -1,9 +1,9 @@
 import unittest
 
-from maas_hub.maas_api import MaasApi
-
+from modelscope.hub.api import HubApi
 from modelscope.utils.hub import create_model_if_not_exist
 
+# note this is temporary before official account management is ready
 USER_NAME = 'maasadmin'
 PASSWORD = '12345678'
 
@@ -11,8 +11,7 @@ PASSWORD = '12345678'
 class HubExampleTest(unittest.TestCase):
 
     def setUp(self):
-        self.api = MaasApi()
-        # note this is temporary before official account management is ready
+        self.api = HubApi()
         self.api.login(USER_NAME, PASSWORD)
 
     @unittest.skip('to be used for local test only')
@@ -22,7 +21,6 @@ class HubExampleTest(unittest.TestCase):
         model_chinese_name = '达摩卡通化模型'
         model_org = 'damo'
         model_id = '%s/%s' % (model_org, model_name)
-
         created = create_model_if_not_exist(self.api, model_id,
                                             model_chinese_name)
         if not created:
diff --git a/tests/hub/test_hub_operation.py b/tests/hub/test_hub_operation.py
index e0adc013..035b183e 100644
--- a/tests/hub/test_hub_operation.py
+++ b/tests/hub/test_hub_operation.py
@@ -4,7 +4,8 @@ import tempfile
 import unittest
 import uuid
 
-from modelscope.hub.api import HubApi
+from modelscope.hub.api import HubApi, ModelScopeConfig
+from modelscope.hub.constants import Licenses, ModelVisibility
 from modelscope.hub.file_download import model_file_download
 from modelscope.hub.repository import Repository
 from modelscope.hub.snapshot_download import snapshot_download
@@ -31,8 +32,8 @@ class HubOperationTest(unittest.TestCase):
         self.api.create_model(
             model_id=self.model_id,
             chinese_name=model_chinese_name,
-            visibility=5,  # 1-private, 5-public
-            license='apache-2.0')
+            visibility=ModelVisibility.PUBLIC,
+            license=Licenses.APACHE_V2)
         temporary_dir = tempfile.mkdtemp()
         self.model_dir = os.path.join(temporary_dir, self.model_name)
         repo = Repository(self.model_dir, clone_from=self.model_id)

From c8e2e6de0ebdb75350ef55cffca58f8a94530c12 Mon Sep 17 00:00:00 2001
From: Yingda Chen <yingda.chen@alibaba-inc.com>
Date: Sat, 25 Jun 2022 08:36:48 +0800
Subject: [PATCH 3/5] [to #42794773] rename pydataset to msdataset

---
 docs/source/api/modelscope.pydatasets.rst     |  8 +++----
 docs/source/api/modelscope.rst                |  2 +-
 docs/source/quick_start.md                    | 10 ++++----
 modelscope/datasets/__init__.py               |  1 +
 modelscope/{pydatasets => datasets}/config.py |  0
 .../py_dataset.py => datasets/ms_dataset.py}  | 24 +++++++++----------
 .../utils/__init__.py                         |  0
 .../{pydatasets => datasets}/utils/ms_api.py  |  4 ++--
 modelscope/hub/file_download.py               |  2 +-
 modelscope/pipelines/base.py                  |  6 ++---
 modelscope/pydatasets/__init__.py             |  1 -
 tests/{pydatasets => datasets}/__init__.py    |  0
 .../test_ms_dataset.py}                       | 19 +++++++--------
 tests/pipelines/test_action_recognition.py    |  2 +-
 tests/pipelines/test_image_matting.py         |  6 ++---
 tests/pipelines/test_text_classification.py   | 12 +++++-----
 16 files changed, 48 insertions(+), 49 deletions(-)
 create mode 100644 modelscope/datasets/__init__.py
 rename modelscope/{pydatasets => datasets}/config.py (100%)
 rename modelscope/{pydatasets/py_dataset.py => datasets/ms_dataset.py} (96%)
 rename modelscope/{pydatasets => datasets}/utils/__init__.py (100%)
 rename modelscope/{pydatasets => datasets}/utils/ms_api.py (95%)
 delete mode 100644 modelscope/pydatasets/__init__.py
 rename tests/{pydatasets => datasets}/__init__.py (100%)
 rename tests/{pydatasets/test_py_dataset.py => datasets/test_ms_dataset.py} (88%)

diff --git a/docs/source/api/modelscope.pydatasets.rst b/docs/source/api/modelscope.pydatasets.rst
index 2508a91f..33f2fab5 100644
--- a/docs/source/api/modelscope.pydatasets.rst
+++ b/docs/source/api/modelscope.pydatasets.rst
@@ -1,7 +1,7 @@
-modelscope.pydatasets package
+modelscope.datasets package
 =============================
 
-.. automodule:: modelscope.pydatasets
+.. automodule:: modelscope.datasets
    :members:
    :undoc-members:
    :show-inheritance:
@@ -9,10 +9,10 @@ modelscope.pydatasets package
 Submodules
 ----------
 
-modelscope.pydatasets.py\_dataset module
+modelscope.datasets.py\_dataset module
 ----------------------------------------
 
-.. automodule:: modelscope.pydatasets.py_dataset
+.. automodule:: modelscope.datasets.ms_dataset
    :members:
    :undoc-members:
    :show-inheritance:
diff --git a/docs/source/api/modelscope.rst b/docs/source/api/modelscope.rst
index efab568b..f1389717 100644
--- a/docs/source/api/modelscope.rst
+++ b/docs/source/api/modelscope.rst
@@ -16,7 +16,7 @@ Subpackages
    modelscope.models
    modelscope.pipelines
    modelscope.preprocessors
-   modelscope.pydatasets
+   modelscope.datasets
    modelscope.trainers
    modelscope.utils
 
diff --git a/docs/source/quick_start.md b/docs/source/quick_start.md
index 7148f27f..91509fa4 100644
--- a/docs/source/quick_start.md
+++ b/docs/source/quick_start.md
@@ -3,7 +3,7 @@
 ## python环境配置
 首先，参考[文档](https://docs.anaconda.com/anaconda/install/) 安装配置Anaconda环境
 
-安装完成后，执行如下命令为maas library创建对应的python环境。
+安装完成后，执行如下命令为modelscope library创建对应的python环境。
 ```shell
 conda create -n modelscope python=3.6
 conda activate modelscope
@@ -105,15 +105,15 @@ import cv2
 import os.path as osp
 from modelscope.pipelines import pipeline
 from modelscope.utils.constant import Tasks
-from modelscope.pydatasets import PyDataset
+from modelscope.datasets import MsDataset
 
-# 使用图像url构建PyDataset，此处也可通过 input_location = '/dir/to/images' 来使用本地文件夹
+# 使用图像url构建MsDataset，此处也可通过 input_location = '/dir/to/images' 来使用本地文件夹
 input_location = [
     'http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/data/test/maas/image_matting/test.png'
 ]
-dataset = PyDataset.load(input_location, target='image')
+dataset = MsDataset.load(input_location, target='image')
 img_matting = pipeline(Tasks.image_matting, model='damo/image-matting-person')
-# 输入为PyDataset时，输出的结果为迭代器
+# 输入为MsDataset时，输出的结果为迭代器
 result = img_matting(dataset)
 cv2.imwrite('result.png', next(result)['output_png'])
 print(f'Output written to {osp.abspath("result.png")}')
diff --git a/modelscope/datasets/__init__.py b/modelscope/datasets/__init__.py
new file mode 100644
index 00000000..8e0647bb
--- /dev/null
+++ b/modelscope/datasets/__init__.py
@@ -0,0 +1 @@
+from .ms_dataset import MsDataset
diff --git a/modelscope/pydatasets/config.py b/modelscope/datasets/config.py
similarity index 100%
rename from modelscope/pydatasets/config.py
rename to modelscope/datasets/config.py
diff --git a/modelscope/pydatasets/py_dataset.py b/modelscope/datasets/ms_dataset.py
similarity index 96%
rename from modelscope/pydatasets/py_dataset.py
rename to modelscope/datasets/ms_dataset.py
index 49137253..80ffc77a 100644
--- a/modelscope/pydatasets/py_dataset.py
+++ b/modelscope/datasets/ms_dataset.py
@@ -10,8 +10,8 @@ from datasets.packaged_modules import _PACKAGED_DATASETS_MODULES
 from datasets.utils.file_utils import (is_relative_path,
                                        relative_to_absolute_path)
 
-from modelscope.pydatasets.config import MS_DATASETS_CACHE
-from modelscope.pydatasets.utils.ms_api import MsApi
+from modelscope.datasets.config import MS_DATASETS_CACHE
+from modelscope.datasets.utils.ms_api import MsApi
 from modelscope.utils.constant import Hubs
 from modelscope.utils.logger import get_logger
 
@@ -28,9 +28,9 @@ def format_list(para) -> List:
     return para
 
 
-class PyDataset:
+class MsDataset:
     _hf_ds = None  # holds the underlying HuggingFace Dataset
-    """A PyDataset backed by hugging face Dataset."""
+    """A MsDataset backed by hugging face Dataset."""
 
     def __init__(self, hf_ds: Dataset, target: Optional[str] = None):
         self._hf_ds = hf_ds
@@ -49,7 +49,7 @@ class PyDataset:
     @classmethod
     def from_hf_dataset(cls,
                         hf_ds: Dataset,
-                        target: str = None) -> Union[dict, 'PyDataset']:
+                        target: str = None) -> Union[dict, 'MsDataset']:
         if isinstance(hf_ds, Dataset):
             return cls(hf_ds, target)
         if len(hf_ds.keys()) == 1:
@@ -68,8 +68,8 @@ class PyDataset:
         data_files: Optional[Union[str, Sequence[str],
                                    Mapping[str, Union[str,
                                                       Sequence[str]]]]] = None
-    ) -> Union[dict, 'PyDataset']:
-        """Load a PyDataset from the ModelScope Hub, Hugging Face Hub, urls, or a local dataset.
+    ) -> Union[dict, 'MsDataset']:
+        """Load a MsDataset from the ModelScope Hub, Hugging Face Hub, urls, or a local dataset.
             Args:
 
                 dataset_name (str): Path or name of the dataset.
@@ -82,7 +82,7 @@ class PyDataset:
                 hub (Hubs, optional): When loading from a remote hub, where it is from
 
             Returns:
-                PyDataset (obj:`PyDataset`): PyDataset object for a certain dataset.
+                MsDataset (obj:`MsDataset`): MsDataset object for a certain dataset.
             """
         if hub == Hubs.huggingface:
             dataset = hf_load_dataset(
@@ -92,9 +92,9 @@ class PyDataset:
                 split=split,
                 data_dir=data_dir,
                 data_files=data_files)
-            return PyDataset.from_hf_dataset(dataset, target=target)
+            return MsDataset.from_hf_dataset(dataset, target=target)
         else:
-            return PyDataset._load_ms_dataset(
+            return MsDataset._load_ms_dataset(
                 dataset_name,
                 target=target,
                 subset_name=subset_name,
@@ -114,7 +114,7 @@ class PyDataset:
         data_files: Optional[Union[str, Sequence[str],
                                    Mapping[str, Union[str,
                                                       Sequence[str]]]]] = None
-    ) -> Union[dict, 'PyDataset']:
+    ) -> Union[dict, 'MsDataset']:
         if isinstance(dataset_name, str):
             use_hf = False
             if dataset_name in _PACKAGED_DATASETS_MODULES or os.path.isdir(dataset_name) or \
@@ -153,7 +153,7 @@ class PyDataset:
         else:
             raise TypeError('path must be a str or a list, but got'
                             f' {type(dataset_name)}')
-        return PyDataset.from_hf_dataset(dataset, target=target)
+        return MsDataset.from_hf_dataset(dataset, target=target)
 
     def to_torch_dataset_with_processors(
         self,
diff --git a/modelscope/pydatasets/utils/__init__.py b/modelscope/datasets/utils/__init__.py
similarity index 100%
rename from modelscope/pydatasets/utils/__init__.py
rename to modelscope/datasets/utils/__init__.py
diff --git a/modelscope/pydatasets/utils/ms_api.py b/modelscope/datasets/utils/ms_api.py
similarity index 95%
rename from modelscope/pydatasets/utils/ms_api.py
rename to modelscope/datasets/utils/ms_api.py
index 04052cc4..a478766f 100644
--- a/modelscope/pydatasets/utils/ms_api.py
+++ b/modelscope/datasets/utils/ms_api.py
@@ -4,8 +4,8 @@ from typing import Optional
 
 import requests
 
-from modelscope.pydatasets.config import (DOWNLOADED_DATASETS_PATH,
-                                          MS_HUB_ENDPOINT)
+from modelscope.datasets.config import (DOWNLOADED_DATASETS_PATH,
+                                        MS_HUB_ENDPOINT)
 from modelscope.utils.logger import get_logger
 
 logger = get_logger()
diff --git a/modelscope/hub/file_download.py b/modelscope/hub/file_download.py
index e5c64f1c..b92bf89c 100644
--- a/modelscope/hub/file_download.py
+++ b/modelscope/hub/file_download.py
@@ -187,7 +187,7 @@ def get_file_download_url(model_id: str, file_path: str, revision: str):
     """
     Format file download url according to `model_id`, `revision` and `file_path`.
     e.g., Given `model_id=john/bert`, `revision=master`, `file_path=README.md`,
-    the resulted download url is: https://maas.co/api/v1/models/john/bert/repo?Revision=master&FilePath=README.md
+    the resulted download url is: https://modelscope.co/api/v1/models/john/bert/repo?Revision=master&FilePath=README.md
     """
     download_url_template = '{endpoint}/api/v1/models/{model_id}/repo?Revision={revision}&FilePath={file_path}'
     return download_url_template.format(
diff --git a/modelscope/pipelines/base.py b/modelscope/pipelines/base.py
index 7e32f543..cf4ce8fd 100644
--- a/modelscope/pipelines/base.py
+++ b/modelscope/pipelines/base.py
@@ -4,17 +4,17 @@ import os.path as osp
 from abc import ABC, abstractmethod
 from typing import Any, Dict, Generator, List, Union
 
+from modelscope.datasets import MsDataset
 from modelscope.hub.snapshot_download import snapshot_download
 from modelscope.models.base import Model
 from modelscope.preprocessors import Preprocessor
-from modelscope.pydatasets import PyDataset
 from modelscope.utils.config import Config
 from modelscope.utils.logger import get_logger
 from .outputs import TASK_OUTPUTS
 from .util import is_model, is_official_hub_path
 
 Tensor = Union['torch.Tensor', 'tf.Tensor']
-Input = Union[str, tuple, PyDataset, 'PIL.Image.Image', 'numpy.ndarray']
+Input = Union[str, tuple, MsDataset, 'PIL.Image.Image', 'numpy.ndarray']
 InputModel = Union[str, Model]
 
 output_keys = [
@@ -85,7 +85,7 @@ class Pipeline(ABC):
             for ele in input:
                 output.append(self._process_single(ele, *args, **post_kwargs))
 
-        elif isinstance(input, PyDataset):
+        elif isinstance(input, MsDataset):
             return self._process_iterator(input, *args, **post_kwargs)
 
         else:
diff --git a/modelscope/pydatasets/__init__.py b/modelscope/pydatasets/__init__.py
deleted file mode 100644
index a1ed1d93..00000000
--- a/modelscope/pydatasets/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-from .py_dataset import PyDataset
diff --git a/tests/pydatasets/__init__.py b/tests/datasets/__init__.py
similarity index 100%
rename from tests/pydatasets/__init__.py
rename to tests/datasets/__init__.py
diff --git a/tests/pydatasets/test_py_dataset.py b/tests/datasets/test_ms_dataset.py
similarity index 88%
rename from tests/pydatasets/test_py_dataset.py
rename to tests/datasets/test_ms_dataset.py
index e84f240a..d08258ac 100644
--- a/tests/pydatasets/test_py_dataset.py
+++ b/tests/datasets/test_ms_dataset.py
@@ -2,11 +2,10 @@ import unittest
 
 import datasets as hfdata
 
+from modelscope.datasets import MsDataset
 from modelscope.models import Model
 from modelscope.preprocessors import SequenceClassificationPreprocessor
 from modelscope.preprocessors.base import Preprocessor
-from modelscope.pydatasets import PyDataset
-from modelscope.utils.constant import Hubs
 from modelscope.utils.test_utils import require_tf, require_torch, test_level
 
 
@@ -31,15 +30,15 @@ class ImgPreprocessor(Preprocessor):
         }
 
 
-class PyDatasetTest(unittest.TestCase):
+class MsDatasetTest(unittest.TestCase):
 
     @unittest.skipUnless(test_level() >= 2, 'skip test in current test level')
     def test_ds_basic(self):
-        ms_ds_full = PyDataset.load('squad')
+        ms_ds_full = MsDataset.load('squad')
         ms_ds_full_hf = hfdata.load_dataset('squad')
-        ms_ds_train = PyDataset.load('squad', split='train')
+        ms_ds_train = MsDataset.load('squad', split='train')
         ms_ds_train_hf = hfdata.load_dataset('squad', split='train')
-        ms_image_train = PyDataset.from_hf_dataset(
+        ms_image_train = MsDataset.from_hf_dataset(
             hfdata.load_dataset('beans', split='train'))
         self.assertEqual(ms_ds_full['train'][0], ms_ds_full_hf['train'][0])
         self.assertEqual(ms_ds_full['validation'][0],
@@ -58,7 +57,7 @@ class PyDatasetTest(unittest.TestCase):
             nlp_model.model_dir,
             first_sequence='context',
             second_sequence=None)
-        ms_ds_train = PyDataset.load('squad', split='train')
+        ms_ds_train = MsDataset.load('squad', split='train')
         pt_dataset = ms_ds_train.to_torch_dataset(preprocessors=preprocessor)
         import torch
         dataloader = torch.utils.data.DataLoader(pt_dataset, batch_size=5)
@@ -75,7 +74,7 @@ class PyDatasetTest(unittest.TestCase):
             nlp_model.model_dir,
             first_sequence='context',
             second_sequence=None)
-        ms_ds_train = PyDataset.load('squad', split='train')
+        ms_ds_train = MsDataset.load('squad', split='train')
         tf_dataset = ms_ds_train.to_tf_dataset(
             batch_size=5,
             shuffle=True,
@@ -86,7 +85,7 @@ class PyDatasetTest(unittest.TestCase):
     @unittest.skipUnless(test_level() >= 2, 'skip test in current test level')
     @require_torch
     def test_to_torch_dataset_img(self):
-        ms_image_train = PyDataset.from_hf_dataset(
+        ms_image_train = MsDataset.from_hf_dataset(
             hfdata.load_dataset('beans', split='train'))
         pt_dataset = ms_image_train.to_torch_dataset(
             preprocessors=ImgPreprocessor(
@@ -100,7 +99,7 @@ class PyDatasetTest(unittest.TestCase):
     def test_to_tf_dataset_img(self):
         import tensorflow as tf
         tf.compat.v1.enable_eager_execution()
-        ms_image_train = PyDataset.load('beans', split='train')
+        ms_image_train = MsDataset.load('beans', split='train')
         tf_dataset = ms_image_train.to_tf_dataset(
             batch_size=5,
             shuffle=True,
diff --git a/tests/pipelines/test_action_recognition.py b/tests/pipelines/test_action_recognition.py
index b524ca18..7bb3bb90 100644
--- a/tests/pipelines/test_action_recognition.py
+++ b/tests/pipelines/test_action_recognition.py
@@ -7,9 +7,9 @@ import unittest
 
 import cv2
 
+from modelscope.datasets import MsDataset
 from modelscope.fileio import File
 from modelscope.pipelines import pipeline
-from modelscope.pydatasets import PyDataset
 from modelscope.utils.constant import ModelFile, Tasks
 from modelscope.utils.test_utils import test_level
 
diff --git a/tests/pipelines/test_image_matting.py b/tests/pipelines/test_image_matting.py
index 1b547e14..13576d44 100644
--- a/tests/pipelines/test_image_matting.py
+++ b/tests/pipelines/test_image_matting.py
@@ -6,9 +6,9 @@ import unittest
 
 import cv2
 
+from modelscope.datasets import MsDataset
 from modelscope.fileio import File
 from modelscope.pipelines import pipeline
-from modelscope.pydatasets import PyDataset
 from modelscope.utils.constant import ModelFile, Tasks
 from modelscope.utils.test_utils import test_level
 
@@ -37,7 +37,7 @@ class ImageMattingTest(unittest.TestCase):
         # alternatively:
         # input_location = '/dir/to/images'
 
-        dataset = PyDataset.load(input_location, target='image')
+        dataset = MsDataset.load(input_location, target='image')
         img_matting = pipeline(Tasks.image_matting, model=self.model_id)
         # note that for dataset output, the inference-output is a Generator that can be iterated.
         result = img_matting(dataset)
@@ -62,7 +62,7 @@ class ImageMattingTest(unittest.TestCase):
 
     @unittest.skipUnless(test_level() >= 2, 'skip test in current test level')
     def test_run_with_modelscope_dataset(self):
-        dataset = PyDataset.load('beans', split='train', target='image')
+        dataset = MsDataset.load('beans', split='train', target='image')
         img_matting = pipeline(Tasks.image_matting, model=self.model_id)
         result = img_matting(dataset)
         for i in range(10):
diff --git a/tests/pipelines/test_text_classification.py b/tests/pipelines/test_text_classification.py
index 9e5f15b9..bf6de28e 100644
--- a/tests/pipelines/test_text_classification.py
+++ b/tests/pipelines/test_text_classification.py
@@ -2,10 +2,10 @@
 import shutil
 import unittest
 
+from modelscope.datasets import MsDataset
 from modelscope.models import Model
 from modelscope.pipelines import SequenceClassificationPipeline, pipeline
 from modelscope.preprocessors import SequenceClassificationPreprocessor
-from modelscope.pydatasets import PyDataset
 from modelscope.utils.constant import Hubs, Tasks
 from modelscope.utils.test_utils import test_level
 
@@ -28,7 +28,7 @@ class SequenceClassificationTest(unittest.TestCase):
 
         print(data)
 
-    def printDataset(self, dataset: PyDataset):
+    def printDataset(self, dataset: MsDataset):
         for i, r in enumerate(dataset):
             if i > 10:
                 break
@@ -50,7 +50,7 @@ class SequenceClassificationTest(unittest.TestCase):
         text_classification = pipeline(
             task=Tasks.text_classification, model=self.model_id)
         result = text_classification(
-            PyDataset.load(
+            MsDataset.load(
                 'glue',
                 subset_name='sst2',
                 split='train',
@@ -62,7 +62,7 @@ class SequenceClassificationTest(unittest.TestCase):
     def test_run_with_default_model(self):
         text_classification = pipeline(task=Tasks.text_classification)
         result = text_classification(
-            PyDataset.load(
+            MsDataset.load(
                 'glue',
                 subset_name='sst2',
                 split='train',
@@ -78,7 +78,7 @@ class SequenceClassificationTest(unittest.TestCase):
         text_classification = pipeline(
             Tasks.text_classification, model=model, preprocessor=preprocessor)
         # loaded from huggingface dataset
-        dataset = PyDataset.load(
+        dataset = MsDataset.load(
             'glue',
             subset_name='sst2',
             split='train',
@@ -91,7 +91,7 @@ class SequenceClassificationTest(unittest.TestCase):
     def test_run_with_modelscope_dataset(self):
         text_classification = pipeline(task=Tasks.text_classification)
         # loaded from modelscope dataset
-        dataset = PyDataset.load(
+        dataset = MsDataset.load(
             'squad', split='train', target='context', hub=Hubs.modelscope)
         result = text_classification(dataset)
         self.printDataset(result)

From b6e3fd80b0299395cc595bad45b87eccb4c82b07 Mon Sep 17 00:00:00 2001
From: Yingda Chen <yingda.chen@alibaba-inc.com>
Date: Sat, 25 Jun 2022 08:50:28 +0800
Subject: [PATCH 4/5] Revert "[to #42794773] rename pydataset to msdataset"

This reverts commit c8e2e6de0ebdb75350ef55cffca58f8a94530c12.
---
 docs/source/api/modelscope.pydatasets.rst     |  8 +++----
 docs/source/api/modelscope.rst                |  2 +-
 docs/source/quick_start.md                    | 10 ++++----
 modelscope/datasets/__init__.py               |  1 -
 modelscope/hub/file_download.py               |  2 +-
 modelscope/pipelines/base.py                  |  6 ++---
 modelscope/pydatasets/__init__.py             |  1 +
 modelscope/{datasets => pydatasets}/config.py |  0
 .../py_dataset.py}                            | 24 +++++++++----------
 .../utils/__init__.py                         |  0
 .../{datasets => pydatasets}/utils/ms_api.py  |  4 ++--
 tests/pipelines/test_action_recognition.py    |  2 +-
 tests/pipelines/test_image_matting.py         |  6 ++---
 tests/pipelines/test_text_classification.py   | 12 +++++-----
 tests/{datasets => pydatasets}/__init__.py    |  0
 .../test_py_dataset.py}                       | 19 ++++++++-------
 16 files changed, 49 insertions(+), 48 deletions(-)
 delete mode 100644 modelscope/datasets/__init__.py
 create mode 100644 modelscope/pydatasets/__init__.py
 rename modelscope/{datasets => pydatasets}/config.py (100%)
 rename modelscope/{datasets/ms_dataset.py => pydatasets/py_dataset.py} (96%)
 rename modelscope/{datasets => pydatasets}/utils/__init__.py (100%)
 rename modelscope/{datasets => pydatasets}/utils/ms_api.py (95%)
 rename tests/{datasets => pydatasets}/__init__.py (100%)
 rename tests/{datasets/test_ms_dataset.py => pydatasets/test_py_dataset.py} (88%)

diff --git a/docs/source/api/modelscope.pydatasets.rst b/docs/source/api/modelscope.pydatasets.rst
index 33f2fab5..2508a91f 100644
--- a/docs/source/api/modelscope.pydatasets.rst
+++ b/docs/source/api/modelscope.pydatasets.rst
@@ -1,7 +1,7 @@
-modelscope.datasets package
+modelscope.pydatasets package
 =============================
 
-.. automodule:: modelscope.datasets
+.. automodule:: modelscope.pydatasets
    :members:
    :undoc-members:
    :show-inheritance:
@@ -9,10 +9,10 @@ modelscope.datasets package
 Submodules
 ----------
 
-modelscope.datasets.py\_dataset module
+modelscope.pydatasets.py\_dataset module
 ----------------------------------------
 
-.. automodule:: modelscope.datasets.ms_dataset
+.. automodule:: modelscope.pydatasets.py_dataset
    :members:
    :undoc-members:
    :show-inheritance:
diff --git a/docs/source/api/modelscope.rst b/docs/source/api/modelscope.rst
index f1389717..efab568b 100644
--- a/docs/source/api/modelscope.rst
+++ b/docs/source/api/modelscope.rst
@@ -16,7 +16,7 @@ Subpackages
    modelscope.models
    modelscope.pipelines
    modelscope.preprocessors
-   modelscope.datasets
+   modelscope.pydatasets
    modelscope.trainers
    modelscope.utils
 
diff --git a/docs/source/quick_start.md b/docs/source/quick_start.md
index 91509fa4..7148f27f 100644
--- a/docs/source/quick_start.md
+++ b/docs/source/quick_start.md
@@ -3,7 +3,7 @@
 ## python环境配置
 首先，参考[文档](https://docs.anaconda.com/anaconda/install/) 安装配置Anaconda环境
 
-安装完成后，执行如下命令为modelscope library创建对应的python环境。
+安装完成后，执行如下命令为maas library创建对应的python环境。
 ```shell
 conda create -n modelscope python=3.6
 conda activate modelscope
@@ -105,15 +105,15 @@ import cv2
 import os.path as osp
 from modelscope.pipelines import pipeline
 from modelscope.utils.constant import Tasks
-from modelscope.datasets import MsDataset
+from modelscope.pydatasets import PyDataset
 
-# 使用图像url构建MsDataset，此处也可通过 input_location = '/dir/to/images' 来使用本地文件夹
+# 使用图像url构建PyDataset，此处也可通过 input_location = '/dir/to/images' 来使用本地文件夹
 input_location = [
     'http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/data/test/maas/image_matting/test.png'
 ]
-dataset = MsDataset.load(input_location, target='image')
+dataset = PyDataset.load(input_location, target='image')
 img_matting = pipeline(Tasks.image_matting, model='damo/image-matting-person')
-# 输入为MsDataset时，输出的结果为迭代器
+# 输入为PyDataset时，输出的结果为迭代器
 result = img_matting(dataset)
 cv2.imwrite('result.png', next(result)['output_png'])
 print(f'Output written to {osp.abspath("result.png")}')
diff --git a/modelscope/datasets/__init__.py b/modelscope/datasets/__init__.py
deleted file mode 100644
index 8e0647bb..00000000
--- a/modelscope/datasets/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-from .ms_dataset import MsDataset
diff --git a/modelscope/hub/file_download.py b/modelscope/hub/file_download.py
index b92bf89c..e5c64f1c 100644
--- a/modelscope/hub/file_download.py
+++ b/modelscope/hub/file_download.py
@@ -187,7 +187,7 @@ def get_file_download_url(model_id: str, file_path: str, revision: str):
     """
     Format file download url according to `model_id`, `revision` and `file_path`.
     e.g., Given `model_id=john/bert`, `revision=master`, `file_path=README.md`,
-    the resulted download url is: https://modelscope.co/api/v1/models/john/bert/repo?Revision=master&FilePath=README.md
+    the resulted download url is: https://maas.co/api/v1/models/john/bert/repo?Revision=master&FilePath=README.md
     """
     download_url_template = '{endpoint}/api/v1/models/{model_id}/repo?Revision={revision}&FilePath={file_path}'
     return download_url_template.format(
diff --git a/modelscope/pipelines/base.py b/modelscope/pipelines/base.py
index cf4ce8fd..7e32f543 100644
--- a/modelscope/pipelines/base.py
+++ b/modelscope/pipelines/base.py
@@ -4,17 +4,17 @@ import os.path as osp
 from abc import ABC, abstractmethod
 from typing import Any, Dict, Generator, List, Union
 
-from modelscope.datasets import MsDataset
 from modelscope.hub.snapshot_download import snapshot_download
 from modelscope.models.base import Model
 from modelscope.preprocessors import Preprocessor
+from modelscope.pydatasets import PyDataset
 from modelscope.utils.config import Config
 from modelscope.utils.logger import get_logger
 from .outputs import TASK_OUTPUTS
 from .util import is_model, is_official_hub_path
 
 Tensor = Union['torch.Tensor', 'tf.Tensor']
-Input = Union[str, tuple, MsDataset, 'PIL.Image.Image', 'numpy.ndarray']
+Input = Union[str, tuple, PyDataset, 'PIL.Image.Image', 'numpy.ndarray']
 InputModel = Union[str, Model]
 
 output_keys = [
@@ -85,7 +85,7 @@ class Pipeline(ABC):
             for ele in input:
                 output.append(self._process_single(ele, *args, **post_kwargs))
 
-        elif isinstance(input, MsDataset):
+        elif isinstance(input, PyDataset):
             return self._process_iterator(input, *args, **post_kwargs)
 
         else:
diff --git a/modelscope/pydatasets/__init__.py b/modelscope/pydatasets/__init__.py
new file mode 100644
index 00000000..a1ed1d93
--- /dev/null
+++ b/modelscope/pydatasets/__init__.py
@@ -0,0 +1 @@
+from .py_dataset import PyDataset
diff --git a/modelscope/datasets/config.py b/modelscope/pydatasets/config.py
similarity index 100%
rename from modelscope/datasets/config.py
rename to modelscope/pydatasets/config.py
diff --git a/modelscope/datasets/ms_dataset.py b/modelscope/pydatasets/py_dataset.py
similarity index 96%
rename from modelscope/datasets/ms_dataset.py
rename to modelscope/pydatasets/py_dataset.py
index 80ffc77a..49137253 100644
--- a/modelscope/datasets/ms_dataset.py
+++ b/modelscope/pydatasets/py_dataset.py
@@ -10,8 +10,8 @@ from datasets.packaged_modules import _PACKAGED_DATASETS_MODULES
 from datasets.utils.file_utils import (is_relative_path,
                                        relative_to_absolute_path)
 
-from modelscope.datasets.config import MS_DATASETS_CACHE
-from modelscope.datasets.utils.ms_api import MsApi
+from modelscope.pydatasets.config import MS_DATASETS_CACHE
+from modelscope.pydatasets.utils.ms_api import MsApi
 from modelscope.utils.constant import Hubs
 from modelscope.utils.logger import get_logger
 
@@ -28,9 +28,9 @@ def format_list(para) -> List:
     return para
 
 
-class MsDataset:
+class PyDataset:
     _hf_ds = None  # holds the underlying HuggingFace Dataset
-    """A MsDataset backed by hugging face Dataset."""
+    """A PyDataset backed by hugging face Dataset."""
 
     def __init__(self, hf_ds: Dataset, target: Optional[str] = None):
         self._hf_ds = hf_ds
@@ -49,7 +49,7 @@ class MsDataset:
     @classmethod
     def from_hf_dataset(cls,
                         hf_ds: Dataset,
-                        target: str = None) -> Union[dict, 'MsDataset']:
+                        target: str = None) -> Union[dict, 'PyDataset']:
         if isinstance(hf_ds, Dataset):
             return cls(hf_ds, target)
         if len(hf_ds.keys()) == 1:
@@ -68,8 +68,8 @@ class MsDataset:
         data_files: Optional[Union[str, Sequence[str],
                                    Mapping[str, Union[str,
                                                       Sequence[str]]]]] = None
-    ) -> Union[dict, 'MsDataset']:
-        """Load a MsDataset from the ModelScope Hub, Hugging Face Hub, urls, or a local dataset.
+    ) -> Union[dict, 'PyDataset']:
+        """Load a PyDataset from the ModelScope Hub, Hugging Face Hub, urls, or a local dataset.
             Args:
 
                 dataset_name (str): Path or name of the dataset.
@@ -82,7 +82,7 @@ class MsDataset:
                 hub (Hubs, optional): When loading from a remote hub, where it is from
 
             Returns:
-                MsDataset (obj:`MsDataset`): MsDataset object for a certain dataset.
+                PyDataset (obj:`PyDataset`): PyDataset object for a certain dataset.
             """
         if hub == Hubs.huggingface:
             dataset = hf_load_dataset(
@@ -92,9 +92,9 @@ class MsDataset:
                 split=split,
                 data_dir=data_dir,
                 data_files=data_files)
-            return MsDataset.from_hf_dataset(dataset, target=target)
+            return PyDataset.from_hf_dataset(dataset, target=target)
         else:
-            return MsDataset._load_ms_dataset(
+            return PyDataset._load_ms_dataset(
                 dataset_name,
                 target=target,
                 subset_name=subset_name,
@@ -114,7 +114,7 @@ class MsDataset:
         data_files: Optional[Union[str, Sequence[str],
                                    Mapping[str, Union[str,
                                                       Sequence[str]]]]] = None
-    ) -> Union[dict, 'MsDataset']:
+    ) -> Union[dict, 'PyDataset']:
         if isinstance(dataset_name, str):
             use_hf = False
             if dataset_name in _PACKAGED_DATASETS_MODULES or os.path.isdir(dataset_name) or \
@@ -153,7 +153,7 @@ class MsDataset:
         else:
             raise TypeError('path must be a str or a list, but got'
                             f' {type(dataset_name)}')
-        return MsDataset.from_hf_dataset(dataset, target=target)
+        return PyDataset.from_hf_dataset(dataset, target=target)
 
     def to_torch_dataset_with_processors(
         self,
diff --git a/modelscope/datasets/utils/__init__.py b/modelscope/pydatasets/utils/__init__.py
similarity index 100%
rename from modelscope/datasets/utils/__init__.py
rename to modelscope/pydatasets/utils/__init__.py
diff --git a/modelscope/datasets/utils/ms_api.py b/modelscope/pydatasets/utils/ms_api.py
similarity index 95%
rename from modelscope/datasets/utils/ms_api.py
rename to modelscope/pydatasets/utils/ms_api.py
index a478766f..04052cc4 100644
--- a/modelscope/datasets/utils/ms_api.py
+++ b/modelscope/pydatasets/utils/ms_api.py
@@ -4,8 +4,8 @@ from typing import Optional
 
 import requests
 
-from modelscope.datasets.config import (DOWNLOADED_DATASETS_PATH,
-                                        MS_HUB_ENDPOINT)
+from modelscope.pydatasets.config import (DOWNLOADED_DATASETS_PATH,
+                                          MS_HUB_ENDPOINT)
 from modelscope.utils.logger import get_logger
 
 logger = get_logger()
diff --git a/tests/pipelines/test_action_recognition.py b/tests/pipelines/test_action_recognition.py
index 7bb3bb90..b524ca18 100644
--- a/tests/pipelines/test_action_recognition.py
+++ b/tests/pipelines/test_action_recognition.py
@@ -7,9 +7,9 @@ import unittest
 
 import cv2
 
-from modelscope.datasets import MsDataset
 from modelscope.fileio import File
 from modelscope.pipelines import pipeline
+from modelscope.pydatasets import PyDataset
 from modelscope.utils.constant import ModelFile, Tasks
 from modelscope.utils.test_utils import test_level
 
diff --git a/tests/pipelines/test_image_matting.py b/tests/pipelines/test_image_matting.py
index 13576d44..1b547e14 100644
--- a/tests/pipelines/test_image_matting.py
+++ b/tests/pipelines/test_image_matting.py
@@ -6,9 +6,9 @@ import unittest
 
 import cv2
 
-from modelscope.datasets import MsDataset
 from modelscope.fileio import File
 from modelscope.pipelines import pipeline
+from modelscope.pydatasets import PyDataset
 from modelscope.utils.constant import ModelFile, Tasks
 from modelscope.utils.test_utils import test_level
 
@@ -37,7 +37,7 @@ class ImageMattingTest(unittest.TestCase):
         # alternatively:
         # input_location = '/dir/to/images'
 
-        dataset = MsDataset.load(input_location, target='image')
+        dataset = PyDataset.load(input_location, target='image')
         img_matting = pipeline(Tasks.image_matting, model=self.model_id)
         # note that for dataset output, the inference-output is a Generator that can be iterated.
         result = img_matting(dataset)
@@ -62,7 +62,7 @@ class ImageMattingTest(unittest.TestCase):
 
     @unittest.skipUnless(test_level() >= 2, 'skip test in current test level')
     def test_run_with_modelscope_dataset(self):
-        dataset = MsDataset.load('beans', split='train', target='image')
+        dataset = PyDataset.load('beans', split='train', target='image')
         img_matting = pipeline(Tasks.image_matting, model=self.model_id)
         result = img_matting(dataset)
         for i in range(10):
diff --git a/tests/pipelines/test_text_classification.py b/tests/pipelines/test_text_classification.py
index bf6de28e..9e5f15b9 100644
--- a/tests/pipelines/test_text_classification.py
+++ b/tests/pipelines/test_text_classification.py
@@ -2,10 +2,10 @@
 import shutil
 import unittest
 
-from modelscope.datasets import MsDataset
 from modelscope.models import Model
 from modelscope.pipelines import SequenceClassificationPipeline, pipeline
 from modelscope.preprocessors import SequenceClassificationPreprocessor
+from modelscope.pydatasets import PyDataset
 from modelscope.utils.constant import Hubs, Tasks
 from modelscope.utils.test_utils import test_level
 
@@ -28,7 +28,7 @@ class SequenceClassificationTest(unittest.TestCase):
 
         print(data)
 
-    def printDataset(self, dataset: MsDataset):
+    def printDataset(self, dataset: PyDataset):
         for i, r in enumerate(dataset):
             if i > 10:
                 break
@@ -50,7 +50,7 @@ class SequenceClassificationTest(unittest.TestCase):
         text_classification = pipeline(
             task=Tasks.text_classification, model=self.model_id)
         result = text_classification(
-            MsDataset.load(
+            PyDataset.load(
                 'glue',
                 subset_name='sst2',
                 split='train',
@@ -62,7 +62,7 @@ class SequenceClassificationTest(unittest.TestCase):
     def test_run_with_default_model(self):
         text_classification = pipeline(task=Tasks.text_classification)
         result = text_classification(
-            MsDataset.load(
+            PyDataset.load(
                 'glue',
                 subset_name='sst2',
                 split='train',
@@ -78,7 +78,7 @@ class SequenceClassificationTest(unittest.TestCase):
         text_classification = pipeline(
             Tasks.text_classification, model=model, preprocessor=preprocessor)
         # loaded from huggingface dataset
-        dataset = MsDataset.load(
+        dataset = PyDataset.load(
             'glue',
             subset_name='sst2',
             split='train',
@@ -91,7 +91,7 @@ class SequenceClassificationTest(unittest.TestCase):
     def test_run_with_modelscope_dataset(self):
         text_classification = pipeline(task=Tasks.text_classification)
         # loaded from modelscope dataset
-        dataset = MsDataset.load(
+        dataset = PyDataset.load(
             'squad', split='train', target='context', hub=Hubs.modelscope)
         result = text_classification(dataset)
         self.printDataset(result)
diff --git a/tests/datasets/__init__.py b/tests/pydatasets/__init__.py
similarity index 100%
rename from tests/datasets/__init__.py
rename to tests/pydatasets/__init__.py
diff --git a/tests/datasets/test_ms_dataset.py b/tests/pydatasets/test_py_dataset.py
similarity index 88%
rename from tests/datasets/test_ms_dataset.py
rename to tests/pydatasets/test_py_dataset.py
index d08258ac..e84f240a 100644
--- a/tests/datasets/test_ms_dataset.py
+++ b/tests/pydatasets/test_py_dataset.py
@@ -2,10 +2,11 @@ import unittest
 
 import datasets as hfdata
 
-from modelscope.datasets import MsDataset
 from modelscope.models import Model
 from modelscope.preprocessors import SequenceClassificationPreprocessor
 from modelscope.preprocessors.base import Preprocessor
+from modelscope.pydatasets import PyDataset
+from modelscope.utils.constant import Hubs
 from modelscope.utils.test_utils import require_tf, require_torch, test_level
 
 
@@ -30,15 +31,15 @@ class ImgPreprocessor(Preprocessor):
         }
 
 
-class MsDatasetTest(unittest.TestCase):
+class PyDatasetTest(unittest.TestCase):
 
     @unittest.skipUnless(test_level() >= 2, 'skip test in current test level')
     def test_ds_basic(self):
-        ms_ds_full = MsDataset.load('squad')
+        ms_ds_full = PyDataset.load('squad')
         ms_ds_full_hf = hfdata.load_dataset('squad')
-        ms_ds_train = MsDataset.load('squad', split='train')
+        ms_ds_train = PyDataset.load('squad', split='train')
         ms_ds_train_hf = hfdata.load_dataset('squad', split='train')
-        ms_image_train = MsDataset.from_hf_dataset(
+        ms_image_train = PyDataset.from_hf_dataset(
             hfdata.load_dataset('beans', split='train'))
         self.assertEqual(ms_ds_full['train'][0], ms_ds_full_hf['train'][0])
         self.assertEqual(ms_ds_full['validation'][0],
@@ -57,7 +58,7 @@ class MsDatasetTest(unittest.TestCase):
             nlp_model.model_dir,
             first_sequence='context',
             second_sequence=None)
-        ms_ds_train = MsDataset.load('squad', split='train')
+        ms_ds_train = PyDataset.load('squad', split='train')
         pt_dataset = ms_ds_train.to_torch_dataset(preprocessors=preprocessor)
         import torch
         dataloader = torch.utils.data.DataLoader(pt_dataset, batch_size=5)
@@ -74,7 +75,7 @@ class MsDatasetTest(unittest.TestCase):
             nlp_model.model_dir,
             first_sequence='context',
             second_sequence=None)
-        ms_ds_train = MsDataset.load('squad', split='train')
+        ms_ds_train = PyDataset.load('squad', split='train')
         tf_dataset = ms_ds_train.to_tf_dataset(
             batch_size=5,
             shuffle=True,
@@ -85,7 +86,7 @@ class MsDatasetTest(unittest.TestCase):
     @unittest.skipUnless(test_level() >= 2, 'skip test in current test level')
     @require_torch
     def test_to_torch_dataset_img(self):
-        ms_image_train = MsDataset.from_hf_dataset(
+        ms_image_train = PyDataset.from_hf_dataset(
             hfdata.load_dataset('beans', split='train'))
         pt_dataset = ms_image_train.to_torch_dataset(
             preprocessors=ImgPreprocessor(
@@ -99,7 +100,7 @@ class MsDatasetTest(unittest.TestCase):
     def test_to_tf_dataset_img(self):
         import tensorflow as tf
         tf.compat.v1.enable_eager_execution()
-        ms_image_train = MsDataset.load('beans', split='train')
+        ms_image_train = PyDataset.load('beans', split='train')
         tf_dataset = ms_image_train.to_tf_dataset(
             batch_size=5,
             shuffle=True,

From 39172b5f662bad258b122cc11b72df490a0bf8d2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E6=99=BA=E4=B8=9E?= <zhangzhicheng.zzc@alibaba-inc.com>
Date: Mon, 27 Jun 2022 10:54:24 +0800
Subject: [PATCH 5/5] remove unformatted space trainer

---
 .../nlp/space => models/nlp/space/application}/__init__.py      | 0
 .../gen_trainer.py => models/nlp/space/application/gen_app.py}  | 0
 .../nlp/space/application/intent_app.py}                        | 0
 modelscope/models/nlp/space/dialog_intent_prediction_model.py   | 2 +-
 modelscope/models/nlp/space/dialog_modeling_model.py            | 2 +-
 modelscope/{trainers => models}/nlp/space/metrics/__init__.py   | 0
 .../{trainers => models}/nlp/space/metrics/metrics_tracker.py   | 0
 modelscope/trainers/nlp/space/trainers/__init__.py              | 0
 8 files changed, 2 insertions(+), 2 deletions(-)
 rename modelscope/{trainers/nlp/space => models/nlp/space/application}/__init__.py (100%)
 rename modelscope/{trainers/nlp/space/trainers/gen_trainer.py => models/nlp/space/application/gen_app.py} (100%)
 rename modelscope/{trainers/nlp/space/trainers/intent_trainer.py => models/nlp/space/application/intent_app.py} (100%)
 rename modelscope/{trainers => models}/nlp/space/metrics/__init__.py (100%)
 rename modelscope/{trainers => models}/nlp/space/metrics/metrics_tracker.py (100%)
 delete mode 100644 modelscope/trainers/nlp/space/trainers/__init__.py

diff --git a/modelscope/trainers/nlp/space/__init__.py b/modelscope/models/nlp/space/application/__init__.py
similarity index 100%
rename from modelscope/trainers/nlp/space/__init__.py
rename to modelscope/models/nlp/space/application/__init__.py
diff --git a/modelscope/trainers/nlp/space/trainers/gen_trainer.py b/modelscope/models/nlp/space/application/gen_app.py
similarity index 100%
rename from modelscope/trainers/nlp/space/trainers/gen_trainer.py
rename to modelscope/models/nlp/space/application/gen_app.py
diff --git a/modelscope/trainers/nlp/space/trainers/intent_trainer.py b/modelscope/models/nlp/space/application/intent_app.py
similarity index 100%
rename from modelscope/trainers/nlp/space/trainers/intent_trainer.py
rename to modelscope/models/nlp/space/application/intent_app.py
diff --git a/modelscope/models/nlp/space/dialog_intent_prediction_model.py b/modelscope/models/nlp/space/dialog_intent_prediction_model.py
index a5d94376..a6bd1d27 100644
--- a/modelscope/models/nlp/space/dialog_intent_prediction_model.py
+++ b/modelscope/models/nlp/space/dialog_intent_prediction_model.py
@@ -2,11 +2,11 @@ import os
 from typing import Any, Dict
 
 from ....preprocessors.space.fields.intent_field import IntentBPETextField
-from ....trainers.nlp.space.trainers.intent_trainer import IntentTrainer
 from ....utils.config import Config
 from ....utils.constant import Tasks
 from ...base import Model, Tensor
 from ...builder import MODELS
+from .application.intent_app import IntentTrainer
 from .model.generator import Generator
 from .model.model_base import ModelBase
 
diff --git a/modelscope/models/nlp/space/dialog_modeling_model.py b/modelscope/models/nlp/space/dialog_modeling_model.py
index 4a34f132..ad8212c0 100644
--- a/modelscope/models/nlp/space/dialog_modeling_model.py
+++ b/modelscope/models/nlp/space/dialog_modeling_model.py
@@ -2,11 +2,11 @@ import os
 from typing import Any, Dict, Optional
 
 from ....preprocessors.space.fields.gen_field import MultiWOZBPETextField
-from ....trainers.nlp.space.trainers.gen_trainer import MultiWOZTrainer
 from ....utils.config import Config
 from ....utils.constant import Tasks
 from ...base import Model, Tensor
 from ...builder import MODELS
+from .application.gen_app import MultiWOZTrainer
 from .model.generator import Generator
 from .model.model_base import ModelBase
 
diff --git a/modelscope/trainers/nlp/space/metrics/__init__.py b/modelscope/models/nlp/space/metrics/__init__.py
similarity index 100%
rename from modelscope/trainers/nlp/space/metrics/__init__.py
rename to modelscope/models/nlp/space/metrics/__init__.py
diff --git a/modelscope/trainers/nlp/space/metrics/metrics_tracker.py b/modelscope/models/nlp/space/metrics/metrics_tracker.py
similarity index 100%
rename from modelscope/trainers/nlp/space/metrics/metrics_tracker.py
rename to modelscope/models/nlp/space/metrics/metrics_tracker.py
diff --git a/modelscope/trainers/nlp/space/trainers/__init__.py b/modelscope/trainers/nlp/space/trainers/__init__.py
deleted file mode 100644
index e69de29b..00000000