Title: [to #42322933] add event tracking 1. add event tracking for dataset downloading pv/uv 2. change datasets version: <=2.5.2 Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/10593016master
| @@ -39,8 +39,8 @@ from modelscope.utils.constant import (DEFAULT_DATASET_REVISION, | |||||
| DEFAULT_MODEL_REVISION, | DEFAULT_MODEL_REVISION, | ||||
| DEFAULT_REPOSITORY_REVISION, | DEFAULT_REPOSITORY_REVISION, | ||||
| MASTER_MODEL_BRANCH, DatasetFormations, | MASTER_MODEL_BRANCH, DatasetFormations, | ||||
| DatasetMetaFormats, DownloadMode, | |||||
| ModelFile) | |||||
| DatasetMetaFormats, DownloadChannel, | |||||
| DownloadMode, ModelFile) | |||||
| from modelscope.utils.logger import get_logger | from modelscope.utils.logger import get_logger | ||||
| from .utils.utils import (get_endpoint, get_release_datetime, | from .utils.utils import (get_endpoint, get_release_datetime, | ||||
| model_id_to_group_owner_name) | model_id_to_group_owner_name) | ||||
| @@ -646,6 +646,25 @@ class HubApi: | |||||
| def check_local_cookies(self, use_cookies) -> CookieJar: | def check_local_cookies(self, use_cookies) -> CookieJar: | ||||
| return self._check_cookie(use_cookies=use_cookies) | return self._check_cookie(use_cookies=use_cookies) | ||||
| def dataset_download_uv(self, dataset_name: str, namespace: str): | |||||
| if not dataset_name or not namespace: | |||||
| raise ValueError('dataset_name or namespace cannot be empty!') | |||||
| # get channel and user_name | |||||
| channel = DownloadChannel.LOCAL.value | |||||
| user_name = '' | |||||
| if MODELSCOPE_ENVIRONMENT in os.environ: | |||||
| channel = os.environ[MODELSCOPE_ENVIRONMENT] | |||||
| if MODELSCOPE_USERNAME in os.environ: | |||||
| user_name = os.environ[MODELSCOPE_USERNAME] | |||||
| url = f'{self.endpoint}/api/v1/datasets/{namespace}/{dataset_name}/download/uv/{channel}?user={user_name}' | |||||
| cookies = ModelScopeConfig.get_cookies() | |||||
| r = requests.post(url, cookies=cookies, headers=self.headers) | |||||
| resp = r.json() | |||||
| raise_on_error(resp) | |||||
| return resp['Message'] | |||||
| class ModelScopeConfig: | class ModelScopeConfig: | ||||
| path_credential = expanduser(DEFAULT_CREDENTIALS_PATH) | path_credential = expanduser(DEFAULT_CREDENTIALS_PATH) | ||||
| @@ -274,6 +274,8 @@ class MsDataset: | |||||
| try: | try: | ||||
| api.on_dataset_download( | api.on_dataset_download( | ||||
| dataset_name=download_dataset, namespace=namespace) | dataset_name=download_dataset, namespace=namespace) | ||||
| api.dataset_download_uv( | |||||
| dataset_name=download_dataset, namespace=namespace) | |||||
| except Exception as e: | except Exception as e: | ||||
| logger.error(e) | logger.error(e) | ||||
| @@ -238,6 +238,14 @@ class DownloadMode(enum.Enum): | |||||
| FORCE_REDOWNLOAD = 'force_redownload' | FORCE_REDOWNLOAD = 'force_redownload' | ||||
| class DownloadChannel(enum.Enum): | |||||
| """ Channels of datasets downloading for uv/pv counting. | |||||
| """ | |||||
| LOCAL = 'local' | |||||
| DSW = 'dsw' | |||||
| EAIS = 'eais' | |||||
| class UploadMode(enum.Enum): | class UploadMode(enum.Enum): | ||||
| """ How to upload object to remote. | """ How to upload object to remote. | ||||
| """ | """ | ||||
| @@ -1,7 +1,7 @@ | |||||
| addict | addict | ||||
| attrs | attrs | ||||
| # version beyond 2.6.0 introduces compatbility issue and is being resolved | |||||
| datasets<=2.6.0 | |||||
| # version beyond 2.5.2 introduces compatbility issue and is being resolved | |||||
| datasets<=2.5.2 | |||||
| easydict | easydict | ||||
| einops | einops | ||||
| filelock>=3.3.0 | filelock>=3.3.0 | ||||
| @@ -8,7 +8,8 @@ import zipfile | |||||
| from modelscope.msdatasets import MsDataset | from modelscope.msdatasets import MsDataset | ||||
| from modelscope.msdatasets.utils.dataset_utils import list_dataset_objects | from modelscope.msdatasets.utils.dataset_utils import list_dataset_objects | ||||
| from modelscope.utils import logger as logging | from modelscope.utils import logger as logging | ||||
| from modelscope.utils.constant import DEFAULT_DATASET_REVISION, ModelFile | |||||
| from modelscope.utils.constant import (DEFAULT_DATASET_REVISION, DownloadMode, | |||||
| ModelFile) | |||||
| from modelscope.utils.test_utils import test_level | from modelscope.utils.test_utils import test_level | ||||
| logger = logging.get_logger(__name__) | logger = logging.get_logger(__name__) | ||||
| @@ -104,7 +105,10 @@ class DatasetUploadTest(unittest.TestCase): | |||||
| @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') | @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') | ||||
| def test_ds_download_dir(self): | def test_ds_download_dir(self): | ||||
| test_ds = MsDataset.load(self.dataset_name, self.namespace) | |||||
| test_ds = MsDataset.load( | |||||
| self.dataset_name, | |||||
| namespace=self.namespace, | |||||
| download_mode=DownloadMode.FORCE_REDOWNLOAD) | |||||
| assert test_ds.config_kwargs['split_config'].values() | assert test_ds.config_kwargs['split_config'].values() | ||||
| @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') | @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') | ||||