You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

test_pyfunc.py 15 kB

5 years ago
5 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394
  1. # Copyright 2019 Huawei Technologies Co., Ltd
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. # ==============================================================================
  15. import numpy as np
  16. import pytest
  17. import mindspore.dataset as ds
  18. import mindspore.dataset.engine.iterators as it
  19. from mindspore import log as logger
  20. DATA_DIR = ["../data/dataset/testPyfuncMap/data.data"]
  21. SCHEMA_DIR = "../data/dataset/testPyfuncMap/schema.json"
  22. COLUMNS = ["col0", "col1", "col2"]
  23. GENERATE_GOLDEN = False
  24. def test_case_0():
  25. """
  26. Test PyFunc
  27. """
  28. logger.info("Test 1-1 PyFunc : lambda x : x + x")
  29. # apply dataset operations
  30. data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, shuffle=False)
  31. data1 = data1.map(operations=(lambda x: x + x), input_columns="col0", output_columns="out")
  32. i = 0
  33. for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary
  34. # In this test, the dataset is 2x2 sequential tensors
  35. golden = np.array([[i * 2, (i + 1) * 2], [(i + 2) * 2, (i + 3) * 2]])
  36. np.testing.assert_array_equal(item["out"], golden)
  37. i = i + 4
  38. def test_case_1():
  39. """
  40. Test PyFunc
  41. """
  42. logger.info("Test 1-n PyFunc : lambda x : (x , x + x) ")
  43. col = "col0"
  44. # apply dataset operations
  45. data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, shuffle=False)
  46. data1 = data1.map(operations=(lambda x: (x, x + x)), input_columns=col, output_columns=["out0", "out1"],
  47. column_order=["out0", "out1"])
  48. i = 0
  49. for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary
  50. # In this test, the dataset is 2x2 sequential tensors
  51. golden = np.array([[i, i + 1], [i + 2, i + 3]])
  52. np.testing.assert_array_equal(item["out0"], golden)
  53. golden = np.array([[i * 2, (i + 1) * 2], [(i + 2) * 2, (i + 3) * 2]])
  54. np.testing.assert_array_equal(item["out1"], golden)
  55. i = i + 4
  56. def test_case_2():
  57. """
  58. Test PyFunc
  59. """
  60. logger.info("Test n-1 PyFunc : lambda x, y : x + y ")
  61. col = ["col0", "col1"]
  62. # apply dataset operations
  63. data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, shuffle=False)
  64. data1 = data1.map(operations=(lambda x, y: x + y), input_columns=col, output_columns="out",
  65. column_order=["out"])
  66. i = 0
  67. for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary
  68. # In this test, the dataset is 2x2 sequential tensors
  69. golden = np.array([[i * 2, (i + 1) * 2], [(i + 2) * 2, (i + 3) * 2]])
  70. np.testing.assert_array_equal(item["out"], golden)
  71. i = i + 4
  72. def test_case_3():
  73. """
  74. Test PyFunc
  75. """
  76. logger.info("Test n-m PyFunc : lambda x, y : (x , x + 1, x + y)")
  77. col = ["col0", "col1"]
  78. # apply dataset operations
  79. data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, shuffle=False)
  80. data1 = data1.map(operations=(lambda x, y: (x, x + y, x + y + 1)), input_columns=col,
  81. output_columns=["out0", "out1", "out2"], column_order=["out0", "out1", "out2"])
  82. i = 0
  83. for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary
  84. # In this test, the dataset is 2x2 sequential tensors
  85. golden = np.array([[i, i + 1], [i + 2, i + 3]])
  86. np.testing.assert_array_equal(item["out0"], golden)
  87. golden = np.array([[i * 2, (i + 1) * 2], [(i + 2) * 2, (i + 3) * 2]])
  88. np.testing.assert_array_equal(item["out1"], golden)
  89. golden = np.array([[i * 2 + 1, (i + 1) * 2 + 1], [(i + 2) * 2 + 1, (i + 3) * 2 + 1]])
  90. np.testing.assert_array_equal(item["out2"], golden)
  91. i = i + 4
  92. def test_case_4():
  93. """
  94. Test PyFunc
  95. """
  96. logger.info("Test Parallel n-m PyFunc : lambda x, y : (x , x + 1, x + y)")
  97. col = ["col0", "col1"]
  98. # apply dataset operations
  99. data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, shuffle=False)
  100. data1 = data1.map(operations=(lambda x, y: (x, x + y, x + y + 1)), input_columns=col,
  101. output_columns=["out0", "out1", "out2"], num_parallel_workers=4,
  102. column_order=["out0", "out1", "out2"])
  103. i = 0
  104. for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary
  105. # In this test, the dataset is 2x2 sequential tensors
  106. golden = np.array([[i, i + 1], [i + 2, i + 3]])
  107. np.testing.assert_array_equal(item["out0"], golden)
  108. golden = np.array([[i * 2, (i + 1) * 2], [(i + 2) * 2, (i + 3) * 2]])
  109. np.testing.assert_array_equal(item["out1"], golden)
  110. golden = np.array([[i * 2 + 1, (i + 1) * 2 + 1], [(i + 2) * 2 + 1, (i + 3) * 2 + 1]])
  111. np.testing.assert_array_equal(item["out2"], golden)
  112. i = i + 4
  113. # The execution of this function will acquire GIL
  114. def func_5(x):
  115. return np.ones(x.shape, dtype=x.dtype)
  116. def test_case_5():
  117. """
  118. Test PyFunc
  119. """
  120. logger.info("Test 1-1 PyFunc : lambda x: np.ones(x.shape)")
  121. # apply dataset operations
  122. data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, shuffle=False)
  123. data1 = data1.map(operations=func_5, input_columns="col0", output_columns="out")
  124. for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary
  125. # In this test, the dataset is 2x2 sequential tensors
  126. golden = np.array([[1, 1], [1, 1]])
  127. np.testing.assert_array_equal(item["out"], golden)
  128. def test_case_6():
  129. """
  130. Test PyFunc
  131. """
  132. logger.info("Test PyFunc Compose : (lambda x : x + x), (lambda x : x + x)")
  133. # apply dataset operations
  134. data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, shuffle=False)
  135. data1 = data1.map(operations=[(lambda x: x + x), (lambda x: x + x)], input_columns="col0", output_columns="out")
  136. i = 0
  137. for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary
  138. # In this test, the dataset is 2x2 sequential tensors
  139. golden = np.array([[i * 4, (i + 1) * 4], [(i + 2) * 4, (i + 3) * 4]])
  140. np.testing.assert_array_equal(item["out"], golden)
  141. i = i + 4
  142. def test_case_7():
  143. """
  144. Test PyFunc
  145. """
  146. logger.info("Test 1-1 PyFunc Multiprocess: lambda x : x + x")
  147. # Reduce memory required by disabling the shared memory optimization
  148. mem_original = ds.config.get_enable_shared_mem()
  149. ds.config.set_enable_shared_mem(False)
  150. # apply dataset operations
  151. data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, shuffle=False)
  152. data1 = data1.map(operations=(lambda x: x + x), input_columns="col0", output_columns="out",
  153. num_parallel_workers=4, python_multiprocessing=True)
  154. i = 0
  155. for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary
  156. # In this test, the dataset is 2x2 sequential tensors
  157. golden = np.array([[i * 2, (i + 1) * 2], [(i + 2) * 2, (i + 3) * 2]])
  158. np.testing.assert_array_equal(item["out"], golden)
  159. i = i + 4
  160. ds.config.set_enable_shared_mem(mem_original)
  161. def test_case_8():
  162. """
  163. Test PyFunc
  164. """
  165. logger.info("Test Multiprocess n-m PyFunc : lambda x, y : (x , x + 1, x + y)")
  166. # Reduce memory required by disabling the shared memory optimization
  167. mem_original = ds.config.get_enable_shared_mem()
  168. ds.config.set_enable_shared_mem(False)
  169. col = ["col0", "col1"]
  170. # apply dataset operations
  171. data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, shuffle=False)
  172. data1 = data1.map(operations=(lambda x, y: (x, x + y, x + y + 1)), input_columns=col,
  173. output_columns=["out0", "out1", "out2"], num_parallel_workers=4,
  174. column_order=["out0", "out1", "out2"],
  175. python_multiprocessing=True)
  176. i = 0
  177. for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary
  178. # In this test, the dataset is 2x2 sequential tensors
  179. golden = np.array([[i, i + 1], [i + 2, i + 3]])
  180. np.testing.assert_array_equal(item["out0"], golden)
  181. golden = np.array([[i * 2, (i + 1) * 2], [(i + 2) * 2, (i + 3) * 2]])
  182. np.testing.assert_array_equal(item["out1"], golden)
  183. golden = np.array([[i * 2 + 1, (i + 1) * 2 + 1], [(i + 2) * 2 + 1, (i + 3) * 2 + 1]])
  184. np.testing.assert_array_equal(item["out2"], golden)
  185. i = i + 4
  186. ds.config.set_enable_shared_mem(mem_original)
  187. def test_case_9():
  188. """
  189. Test PyFunc
  190. """
  191. logger.info("Test multiple 1-1 PyFunc Multiprocess: lambda x : x + x")
  192. # Reduce memory required by disabling the shared memory optimization
  193. mem_original = ds.config.get_enable_shared_mem()
  194. ds.config.set_enable_shared_mem(False)
  195. # apply dataset operations
  196. data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, shuffle=False)
  197. data1 = data1.map(operations=[(lambda x: x + x), (lambda x: x + 1), (lambda x: x + 2)], input_columns="col0",
  198. output_columns="out", num_parallel_workers=4, python_multiprocessing=True)
  199. i = 0
  200. for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary
  201. # In this test, the dataset is 2x2 sequential tensors
  202. golden = np.array([[i * 2 + 3, (i + 1) * 2 + 3], [(i + 2) * 2 + 3, (i + 3) * 2 + 3]])
  203. np.testing.assert_array_equal(item["out"], golden)
  204. i = i + 4
  205. ds.config.set_enable_shared_mem(mem_original)
  206. def test_case_10():
  207. """
  208. Test PyFunc
  209. """
  210. logger.info("Test multiple map with multiprocess: lambda x : x + x")
  211. # Reduce memory required by disabling the shared memory optimization
  212. mem_original = ds.config.get_enable_shared_mem()
  213. ds.config.set_enable_shared_mem(False)
  214. # apply dataset operations
  215. data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, shuffle=False)
  216. data1 = data1.map(operations=[(lambda x: x * 10)], input_columns="col0",
  217. output_columns="out", num_parallel_workers=4)
  218. data1 = data1.map(operations=[(lambda x: x + x), (lambda x: x + 1), (lambda x: x + 2)], input_columns="out",
  219. output_columns="out", num_parallel_workers=4, python_multiprocessing=True)
  220. i = 0
  221. for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary
  222. # In this test, the dataset is 2x2 sequential tensors
  223. golden = np.array([[i * 20 + 3, (i + 1) * 20 + 3], [(i + 2) * 20 + 3, (i + 3) * 20 + 3]])
  224. np.testing.assert_array_equal(item["out"], golden)
  225. i = i + 4
  226. ds.config.set_enable_shared_mem(mem_original)
  227. def test_pyfunc_implicit_compose():
  228. """
  229. Test Implicit Compose with pyfunc
  230. """
  231. logger.info("Test n-m PyFunc : lambda x, y : (x , x + 1, x + y)")
  232. col = ["col0", "col1"]
  233. # apply dataset operations
  234. data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, shuffle=False)
  235. data1 = data1.map(operations=[(lambda x, y: (x, x + y, x + y + 1)), (lambda x, y, z: (x, y, z))], input_columns=col,
  236. output_columns=["out0", "out1", "out2"], column_order=["out0", "out1", "out2"])
  237. i = 0
  238. for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary
  239. # In this test, the dataset is 2x2 sequential tensors
  240. golden = np.array([[i, i + 1], [i + 2, i + 3]])
  241. np.testing.assert_array_equal(item["out0"], golden)
  242. golden = np.array([[i * 2, (i + 1) * 2], [(i + 2) * 2, (i + 3) * 2]])
  243. np.testing.assert_array_equal(item["out1"], golden)
  244. golden = np.array([[i * 2 + 1, (i + 1) * 2 + 1], [(i + 2) * 2 + 1, (i + 3) * 2 + 1]])
  245. np.testing.assert_array_equal(item["out2"], golden)
  246. i = i + 4
  247. def test_pyfunc_exception():
  248. logger.info("Test PyFunc Exception Throw: lambda x : raise Exception()")
  249. # Sometimes there are some ITERATORS left in ITERATORS_LIST when run all UTs together,
  250. # and cause core dump and blocking in this UT. Add cleanup() here to fix it.
  251. it._cleanup() # pylint: disable=W0212
  252. def pyfunc(x):
  253. raise Exception("Pyfunc Throw")
  254. with pytest.raises(RuntimeError) as info:
  255. # apply dataset operations
  256. data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, shuffle=False)
  257. data1 = data1.map(operations=pyfunc, input_columns="col0", output_columns="out",
  258. num_parallel_workers=4)
  259. for _ in data1:
  260. pass
  261. assert "Pyfunc Throw" in str(info.value)
  262. def skip_test_pyfunc_Exception_multiprocess():
  263. logger.info("Test Multiprocess PyFunc Exception Throw: lambda x : raise Exception()")
  264. def pyfunc(x):
  265. raise Exception("MP Pyfunc Throw")
  266. with pytest.raises(RuntimeError) as info:
  267. # apply dataset operations
  268. data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, shuffle=False)
  269. data1 = data1.map(operations=pyfunc, input_columns="col0", output_columns="out",
  270. num_parallel_workers=4, python_multiprocessing=True)
  271. for _ in data1:
  272. pass
  273. assert "MP Pyfunc Throw" in str(info.value)
  274. def test_func_with_yield_manifest_dataset_01():
  275. def pass_func(_):
  276. for i in range(10):
  277. yield (np.array([i]),)
  278. # Sometimes there are some ITERATORS left in ITERATORS_LIST when run all UTs together,
  279. # and cause core dump and blocking in this UT. Add cleanup() here to fix it.
  280. it._cleanup() # pylint: disable=W0212
  281. DATA_FILE = "../data/dataset/testManifestData/test.manifest"
  282. data = ds.ManifestDataset(DATA_FILE)
  283. data = data.map(operations=pass_func, input_columns=["image"], num_parallel_workers=1, python_multiprocessing=True,
  284. max_rowsize=1)
  285. num_iter = 0
  286. try:
  287. for _ in data.create_dict_iterator(num_epochs=1, output_numpy=True):
  288. num_iter += 1
  289. except RuntimeError as e:
  290. assert "Can not pickle <class 'generator'> object, " in str(e)
  291. if __name__ == "__main__":
  292. test_case_0()
  293. test_case_1()
  294. test_case_2()
  295. test_case_3()
  296. test_case_4()
  297. test_case_5()
  298. test_case_6()
  299. test_case_7()
  300. test_case_8()
  301. test_case_9()
  302. test_case_10()
  303. test_pyfunc_implicit_compose()
  304. test_pyfunc_exception()
  305. skip_test_pyfunc_exception_multiprocess()
  306. test_func_with_yield_manifest_dataset_01()