Browse Source

[to #42322933] Fix issue -- it's not necessary to login for loading public datasets.

修复”公开数据集需要登录才能load“的问题
        Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/10448936
master
xingjun.wxj yingda.chen 3 years ago
parent
commit
9809d960b0
3 changed files with 8 additions and 15 deletions
  1. +5
    -3
      modelscope/hub/api.py
  2. +3
    -5
      modelscope/msdatasets/utils/dataset_utils.py
  3. +0
    -7
      modelscope/utils/constant.py

+ 5
- 3
modelscope/hub/api.py View File

@@ -390,11 +390,13 @@ class HubApi:
return resp['Data']

def list_oss_dataset_objects(self, dataset_name, namespace, max_limit,
is_recursive, is_filter_dir, revision,
cookies):
is_recursive, is_filter_dir, revision):
url = f'{self.endpoint}/api/v1/datasets/{namespace}/{dataset_name}/oss/tree/?' \
f'MaxLimit={max_limit}&Revision={revision}&Recursive={is_recursive}&FilterDir={is_filter_dir}'
cookies = requests.utils.dict_from_cookiejar(cookies)

cookies = ModelScopeConfig.get_cookies()
if cookies:
cookies = requests.utils.dict_from_cookiejar(cookies)

resp = requests.get(url=url, cookies=cookies)
resp = resp.json()


+ 3
- 5
modelscope/msdatasets/utils/dataset_utils.py View File

@@ -7,7 +7,7 @@ from typing import Any, Mapping, Optional, Sequence, Union
from datasets.builder import DatasetBuilder

from modelscope.hub.api import HubApi
from modelscope.utils.constant import DEFAULT_DATASET_REVISION, DownloadParams
from modelscope.utils.constant import DEFAULT_DATASET_REVISION
from modelscope.utils.logger import get_logger
from .dataset_builder import MsCsvDatasetBuilder, TaskSpecificDatasetBuilder

@@ -95,15 +95,13 @@ def list_dataset_objects(hub_api: HubApi, max_limit: int, is_recursive: bool,
res (list): List of objects, i.e., ['train/images/001.png', 'train/images/002.png', 'val/images/001.png', ...]
"""
res = []
cookies = hub_api.check_cookies_upload_data(use_cookies=True)
objects = hub_api.list_oss_dataset_objects(
dataset_name=dataset_name,
namespace=namespace,
max_limit=max_limit,
is_recursive=is_recursive,
is_filter_dir=True,
revision=version,
cookies=cookies)
revision=version)

for item in objects:
object_key = item.get('Key')
@@ -174,7 +172,7 @@ def get_dataset_files(subset_split_into: dict,
modelscope_api = HubApi()
objects = list_dataset_objects(
hub_api=modelscope_api,
max_limit=DownloadParams.MAX_LIST_OBJECTS_NUM.value,
max_limit=-1,
is_recursive=True,
dataset_name=dataset_name,
namespace=namespace,


+ 0
- 7
modelscope/utils/constant.py View File

@@ -231,13 +231,6 @@ class DownloadMode(enum.Enum):
FORCE_REDOWNLOAD = 'force_redownload'


class DownloadParams(enum.Enum):
"""
Parameters for downloading dataset.
"""
MAX_LIST_OBJECTS_NUM = 50000


class DatasetFormations(enum.Enum):
""" How a dataset is organized and interpreted
"""


Loading…
Cancel
Save