You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

test_config.py 12 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295
  1. # Copyright 2019 Huawei Technologies Co., Ltd
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. # ==============================================================================
  15. """
  16. Testing configuration manager
  17. """
  18. import filecmp
  19. import glob
  20. import numpy as np
  21. import os
  22. from mindspore import log as logger
  23. import mindspore.dataset as ds
  24. import mindspore.dataset.transforms.vision.c_transforms as vision
  25. import mindspore.dataset.transforms.vision.py_transforms as py_vision
  26. DATA_DIR = ["../data/dataset/test_tf_file_3_images/train-0000-of-0001.data"]
  27. SCHEMA_DIR = "../data/dataset/test_tf_file_3_images/datasetSchema.json"
  28. def test_basic():
  29. ds.config.load('../data/dataset/declient.cfg')
  30. # assert ds.config.get_rows_per_buffer() == 32
  31. assert ds.config.get_num_parallel_workers() == 4
  32. # assert ds.config.get_worker_connector_size() == 16
  33. assert ds.config.get_prefetch_size() == 16
  34. assert ds.config.get_seed() == 5489
  35. # ds.config.set_rows_per_buffer(1)
  36. ds.config.set_num_parallel_workers(2)
  37. # ds.config.set_worker_connector_size(3)
  38. ds.config.set_prefetch_size(4)
  39. ds.config.set_seed(5)
  40. # assert ds.config.get_rows_per_buffer() == 1
  41. assert ds.config.get_num_parallel_workers() == 2
  42. # assert ds.config.get_worker_connector_size() == 3
  43. assert ds.config.get_prefetch_size() == 4
  44. assert ds.config.get_seed() == 5
  45. def test_get_seed():
  46. """
  47. This gets the seed value without explicitly setting a default, expect int.
  48. """
  49. assert isinstance(ds.config.get_seed(), int)
  50. def test_pipeline():
  51. """
  52. Test that our configuration pipeline works when we set parameters at different locations in dataset code
  53. """
  54. data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, shuffle=False)
  55. ds.config.set_num_parallel_workers(2)
  56. data1 = data1.map(input_columns=["image"], operations=[vision.Decode(True)])
  57. ds.serialize(data1, "testpipeline.json")
  58. data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, shuffle=False)
  59. ds.config.set_num_parallel_workers(4)
  60. data2 = data2.map(input_columns=["image"], operations=[vision.Decode(True)])
  61. ds.serialize(data2, "testpipeline2.json")
  62. # check that the generated output is different
  63. assert (filecmp.cmp('testpipeline.json', 'testpipeline2.json'))
  64. # this test passes currently because our num_parallel_workers don't get updated.
  65. # remove generated jason files
  66. file_list = glob.glob('*.json')
  67. for f in file_list:
  68. try:
  69. os.remove(f)
  70. except IOError:
  71. logger.info("Error while deleting: {}".format(f))
  72. def test_deterministic_run_fail():
  73. """
  74. Test RandomCrop with seed, expected to fail
  75. """
  76. logger.info("test_deterministic_run_fail")
  77. # when we set the seed all operations within our dataset should be deterministic
  78. ds.config.set_seed(0)
  79. ds.config.set_num_parallel_workers(1)
  80. # First dataset
  81. data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
  82. # Assuming we get the same seed on calling constructor, if this op is re-used then result won't be
  83. # the same in between the two datasets. For example, RandomCrop constructor takes seed (0)
  84. # outputs a deterministic series of numbers, e,g "a" = [1, 2, 3, 4, 5, 6] <- pretend these are random
  85. random_crop_op = vision.RandomCrop([512, 512], [200, 200, 200, 200])
  86. decode_op = vision.Decode()
  87. data1 = data1.map(input_columns=["image"], operations=decode_op)
  88. data1 = data1.map(input_columns=["image"], operations=random_crop_op)
  89. # Second dataset
  90. data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
  91. data2 = data2.map(input_columns=["image"], operations=decode_op)
  92. # If seed is set up on constructor
  93. data2 = data2.map(input_columns=["image"], operations=random_crop_op)
  94. try:
  95. for item1, item2 in zip(data1.create_dict_iterator(), data2.create_dict_iterator()):
  96. np.testing.assert_equal (item1["image"], item2["image"])
  97. except BaseException as e:
  98. # two datasets split the number out of the sequence a
  99. logger.info("Got an exception in DE: {}".format(str(e)))
  100. assert "Array" in str(e)
  101. def test_deterministic_run_pass():
  102. """
  103. Test deterministic run with with setting the seed
  104. """
  105. logger.info("test_deterministic_run_pass")
  106. ds.config.set_seed(0)
  107. ds.config.set_num_parallel_workers(1)
  108. # First dataset
  109. data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
  110. # We get the seed when constructor is called
  111. random_crop_op = vision.RandomCrop([512, 512], [200, 200, 200, 200])
  112. decode_op = vision.Decode()
  113. data1 = data1.map(input_columns=["image"], operations=decode_op)
  114. data1 = data1.map(input_columns=["image"], operations=random_crop_op)
  115. # Second dataset
  116. data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
  117. data2 = data2.map(input_columns=["image"], operations=decode_op)
  118. # Since seed is set up on constructor, so the two ops output deterministic sequence.
  119. # Assume the generated random sequence "a" = [1, 2, 3, 4, 5, 6] <- pretend these are random
  120. random_crop_op2 = vision.RandomCrop([512, 512], [200, 200, 200, 200])
  121. data2 = data2.map(input_columns=["image"], operations=random_crop_op2)
  122. try:
  123. for item1, item2 in zip(data1.create_dict_iterator(), data2.create_dict_iterator()):
  124. np.testing.assert_equal (item1["image"], item2["image"])
  125. except BaseException as e:
  126. # two datasets both use numbers from the generated sequence "a"
  127. logger.info("Got an exception in DE: {}".format(str(e)))
  128. assert "Array" in str(e)
  129. def test_seed_undeterministic():
  130. """
  131. Test seed with num parallel workers in c, this test is expected to fail some of the time
  132. """
  133. logger.info("test_seed_undeterministic")
  134. ds.config.set_seed(0)
  135. # First dataset
  136. data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
  137. # seed will be read in during constructor call
  138. random_crop_op = vision.RandomCrop([512, 512], [200, 200, 200, 200])
  139. decode_op = vision.Decode()
  140. data1 = data1.map(input_columns=["image"], operations=decode_op)
  141. data1 = data1.map(input_columns=["image"], operations=random_crop_op)
  142. # Second dataset
  143. data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
  144. data2 = data2.map(input_columns=["image"], operations=decode_op)
  145. # If seed is set up on constructor, so the two ops output deterministic sequence
  146. random_crop_op2 = vision.RandomCrop([512, 512], [200, 200, 200, 200])
  147. data2 = data2.map(input_columns=["image"], operations=random_crop_op2)
  148. for item1, item2 in zip(data1.create_dict_iterator(), data2.create_dict_iterator()):
  149. np.testing.assert_equal (item1["image"], item2["image"])
  150. def test_deterministic_run_distribution():
  151. """
  152. Test deterministic run with with setting the seed being used in a distribution
  153. """
  154. logger.info("test_deterministic_run_distribution")
  155. # when we set the seed all operations within our dataset should be deterministic
  156. ds.config.set_seed(0)
  157. ds.config.set_num_parallel_workers(1)
  158. # First dataset
  159. data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
  160. random_crop_op = vision.RandomHorizontalFlip(0.1)
  161. decode_op = vision.Decode()
  162. data1 = data1.map(input_columns=["image"], operations=decode_op)
  163. data1 = data1.map(input_columns=["image"], operations=random_crop_op)
  164. # Second dataset
  165. data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
  166. data2 = data2.map(input_columns=["image"], operations=decode_op)
  167. # If seed is set up on constructor, so the two ops output deterministic sequence
  168. random_crop_op2 = vision.RandomHorizontalFlip(0.1)
  169. data2 = data2.map(input_columns=["image"], operations=random_crop_op2)
  170. for item1, item2 in zip(data1.create_dict_iterator(), data2.create_dict_iterator()):
  171. np.testing.assert_equal (item1["image"], item2["image"])
  172. def test_deterministic_python_seed():
  173. """
  174. Test deterministic execution with seed in python
  175. """
  176. logger.info("deterministic_random_crop_op_python_2")
  177. ds.config.set_seed(0)
  178. ds.config.set_num_parallel_workers(1)
  179. # First dataset
  180. data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
  181. transforms = [
  182. py_vision.Decode(),
  183. py_vision.RandomCrop([512, 512], [200, 200, 200, 200]),
  184. py_vision.ToTensor(),
  185. ]
  186. transform = py_vision.ComposeOp(transforms)
  187. data1 = data1.map(input_columns=["image"], operations=transform())
  188. data1_output = []
  189. # config.set_seed() calls random.seed()
  190. for data_one in data1.create_dict_iterator():
  191. data1_output.append(data_one["image"])
  192. # Second dataset
  193. data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
  194. data2 = data2.map(input_columns=["image"], operations=transform())
  195. # config.set_seed() calls random.seed(), resets seed for next dataset iterator
  196. ds.config.set_seed(0)
  197. data2_output = []
  198. for data_two in data2.create_dict_iterator():
  199. data2_output.append(data_two["image"])
  200. np.testing.assert_equal (data1_output, data2_output)
  201. def test_deterministic_python_seed_multi_thread():
  202. """
  203. Test deterministic execution with seed in python, this fails with multi-thread pyfunc run
  204. """
  205. logger.info("deterministic_random_crop_op_python_2")
  206. ds.config.set_seed(0)
  207. # when we set the seed all operations within our dataset should be deterministic
  208. # First dataset
  209. data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
  210. transforms = [
  211. py_vision.Decode(),
  212. py_vision.RandomCrop([512, 512], [200, 200, 200, 200]),
  213. py_vision.ToTensor(),
  214. ]
  215. transform = py_vision.ComposeOp(transforms)
  216. data1 = data1.map(input_columns=["image"], operations=transform(), python_multiprocessing=True)
  217. data1_output = []
  218. # config.set_seed() calls random.seed()
  219. for data_one in data1.create_dict_iterator():
  220. data1_output.append(data_one["image"])
  221. # Second dataset
  222. data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
  223. # If seed is set up on constructor
  224. data2 = data2.map(input_columns=["image"], operations=transform(), python_multiprocessing=True)
  225. # config.set_seed() calls random.seed()
  226. ds.config.set_seed(0)
  227. data2_output = []
  228. for data_two in data2.create_dict_iterator():
  229. data2_output.append(data_two["image"])
  230. try:
  231. np.testing.assert_equal (data1_output, data2_output)
  232. except BaseException as e:
  233. # expect output to not match during multi-threaded excution
  234. logger.info("Got an exception in DE: {}".format(str(e)))
  235. assert "Array" in str(e)
  236. if __name__ == '__main__':
  237. test_basic()
  238. test_pipeline()
  239. test_deterministic_run_pass()
  240. test_deterministic_run_distribution()
  241. test_deterministic_run_fail()
  242. test_deterministic_python_seed()
  243. test_seed_undeterministic()
  244. test_get_seed()