You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

test_dataset_upload.py 3.6 kB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394
  1. # Copyright (c) Alibaba, Inc. and its affiliates.
  2. import os
  3. import shutil
  4. import tempfile
  5. import unittest
  6. import zipfile
  7. from modelscope.msdatasets import MsDataset
  8. from modelscope.utils.constant import ModelFile
  9. from modelscope.utils.test_utils import test_level
  10. KEY_EXTRACTED = 'extracted'
  11. class DatasetUploadTest(unittest.TestCase):
  12. def setUp(self):
  13. self.old_dir = os.getcwd()
  14. self.dataset_name = 'small_coco_for_test'
  15. self.dataset_file_name = self.dataset_name
  16. self.prepared_dataset_name = 'pets_small'
  17. self.token = os.getenv('TEST_UPLOAD_MS_TOKEN')
  18. error_msg = 'The modelscope token can not be empty, please set env variable: TEST_UPLOAD_MS_TOKEN'
  19. self.assertIsNotNone(self.token, msg=error_msg)
  20. from modelscope.hub.api import HubApi
  21. from modelscope.hub.api import ModelScopeConfig
  22. self.api = HubApi()
  23. self.api.login(self.token)
  24. # get user info
  25. self.namespace, _ = ModelScopeConfig.get_user_info()
  26. self.temp_dir = tempfile.mkdtemp()
  27. self.test_work_dir = os.path.join(self.temp_dir, self.dataset_name)
  28. self.test_meta_dir = os.path.join(self.test_work_dir, 'meta')
  29. if not os.path.exists(self.test_work_dir):
  30. os.makedirs(self.test_work_dir)
  31. def tearDown(self):
  32. os.chdir(self.old_dir)
  33. shutil.rmtree(self.temp_dir, ignore_errors=True)
  34. print('The test dir successfully removed!')
  35. @staticmethod
  36. def get_raw_downloaded_file_path(extracted_path):
  37. raw_downloaded_file_path = ''
  38. raw_data_dir = os.path.abspath(
  39. os.path.join(extracted_path, '../../..'))
  40. for root, dirs, files in os.walk(raw_data_dir):
  41. if KEY_EXTRACTED in dirs:
  42. for file in files:
  43. curr_file_path = os.path.join(root, file)
  44. if zipfile.is_zipfile(curr_file_path):
  45. raw_downloaded_file_path = curr_file_path
  46. return raw_downloaded_file_path
  47. @unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
  48. def test_ds_upload(self):
  49. # Get the prepared data from hub, using default modelscope namespace
  50. ms_ds_train = MsDataset.load(self.prepared_dataset_name, split='train')
  51. config_res = ms_ds_train._hf_ds.config_kwargs
  52. extracted_path = config_res.get('split_config').get('train')
  53. raw_zipfile_path = self.get_raw_downloaded_file_path(extracted_path)
  54. MsDataset.upload(
  55. object_name=self.dataset_file_name + '.zip',
  56. local_file_path=raw_zipfile_path,
  57. dataset_name=self.dataset_name,
  58. namespace=self.namespace)
  59. @unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
  60. def test_ds_clone_meta(self):
  61. MsDataset.clone_meta(
  62. dataset_work_dir=self.test_meta_dir,
  63. dataset_id=os.path.join(self.namespace, self.dataset_name))
  64. @unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
  65. def test_ds_upload_meta(self):
  66. # Clone dataset meta repo first.
  67. MsDataset.clone_meta(
  68. dataset_work_dir=self.test_meta_dir,
  69. dataset_id=os.path.join(self.namespace, self.dataset_name))
  70. with open(os.path.join(self.test_meta_dir, ModelFile.README),
  71. 'a') as f:
  72. f.write('\nThis is a line for unit test.')
  73. MsDataset.upload_meta(
  74. dataset_work_dir=self.test_meta_dir,
  75. commit_message='Update for unit test.')
  76. if __name__ == '__main__':
  77. unittest.main()