You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

test_datasets_imagenet_distribution.py 3.7 kB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798
  1. # Copyright 2019 Huawei Technologies Co., Ltd
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. # ==============================================================================
  15. import pytest
  16. import mindspore.dataset as ds
  17. from mindspore import log as logger
  18. DATA_DIR = ["../data/dataset/test_tf_file_3_images2/train-0000-of-0001.data",
  19. "../data/dataset/test_tf_file_3_images2/train-0000-of-0002.data",
  20. "../data/dataset/test_tf_file_3_images2/train-0000-of-0003.data",
  21. "../data/dataset/test_tf_file_3_images2/train-0000-of-0004.data"]
  22. SCHEMA_DIR = "../data/dataset/test_tf_file_3_images2/datasetSchema.json"
  23. DISTRIBUTION_ALL_DIR = "../data/dataset/test_tf_file_3_images2/dataDistributionAll.json"
  24. DISTRIBUTION_UNIQUE_DIR = "../data/dataset/test_tf_file_3_images2/dataDistributionUnique.json"
  25. DISTRIBUTION_RANDOM_DIR = "../data/dataset/test_tf_file_3_images2/dataDistributionRandom.json"
  26. DISTRIBUTION_EQUAL_DIR = "../data/dataset/test_tf_file_3_images2/dataDistributionEqualRows.json"
  27. def test_tf_file_normal():
  28. # apply dataset operations
  29. data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, shuffle=False)
  30. data1 = data1.repeat(1)
  31. num_iter = 0
  32. for item in data1.create_dict_iterator(): # each data is a dictionary
  33. num_iter += 1
  34. logger.info("Number of data in data1: {}".format(num_iter))
  35. assert num_iter == 12
  36. def test_tf_file_distribution_all():
  37. # apply dataset operations
  38. data1 = ds.StorageDataset(DATA_DIR, SCHEMA_DIR, DISTRIBUTION_ALL_DIR)
  39. data1 = data1.repeat(2)
  40. num_iter = 0
  41. for item in data1.create_dict_iterator(): # each data is a dictionary
  42. num_iter += 1
  43. logger.info("Number of data in data1: {}".format(num_iter))
  44. assert num_iter == 24
  45. def test_tf_file_distribution_unique():
  46. data1 = ds.StorageDataset(DATA_DIR, SCHEMA_DIR, DISTRIBUTION_UNIQUE_DIR)
  47. data1 = data1.repeat(1)
  48. num_iter = 0
  49. for item in data1.create_dict_iterator(): # each data is a dictionary
  50. num_iter += 1
  51. logger.info("Number of data in data1: {}".format(num_iter))
  52. assert num_iter == 4
  53. def test_tf_file_distribution_random():
  54. data1 = ds.StorageDataset(DATA_DIR, SCHEMA_DIR, DISTRIBUTION_RANDOM_DIR)
  55. data1 = data1.repeat(1)
  56. num_iter = 0
  57. for item in data1.create_dict_iterator(): # each data is a dictionary
  58. num_iter += 1
  59. logger.info("Number of data in data1: {}".format(num_iter))
  60. assert num_iter == 4
  61. def test_tf_file_distribution_equal_rows():
  62. data1 = ds.StorageDataset(DATA_DIR, SCHEMA_DIR, DISTRIBUTION_EQUAL_DIR)
  63. data1 = data1.repeat(2)
  64. num_iter = 0
  65. for item in data1.create_dict_iterator(): # each data is a dictionary
  66. num_iter += 1
  67. assert num_iter == 4
  68. if __name__ == '__main__':
  69. logger.info('=======test normal=======')
  70. test_tf_file_normal()
  71. logger.info('=======test all=======')
  72. test_tf_file_distribution_all()
  73. logger.info('=======test unique=======')
  74. test_tf_file_distribution_unique()
  75. logger.info('=======test random=======')
  76. test_tf_file_distribution_random()
  77. logger.info('=======test equal rows=======')
  78. test_tf_file_distribution_equal_rows()