You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

test_pyfunc.py 14 kB

5 years ago
5 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385
  1. # Copyright 2019 Huawei Technologies Co., Ltd
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. # ==============================================================================
  15. import numpy as np
  16. import pytest
  17. import mindspore.dataset as ds
  18. from mindspore import log as logger
  19. DATA_DIR = ["../data/dataset/testPyfuncMap/data.data"]
  20. SCHEMA_DIR = "../data/dataset/testPyfuncMap/schema.json"
  21. COLUMNS = ["col0", "col1", "col2"]
  22. GENERATE_GOLDEN = False
  23. def test_case_0():
  24. """
  25. Test PyFunc
  26. """
  27. logger.info("Test 1-1 PyFunc : lambda x : x + x")
  28. # apply dataset operations
  29. data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, shuffle=False)
  30. data1 = data1.map(operations=(lambda x: x + x), input_columns="col0", output_columns="out")
  31. i = 0
  32. for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary
  33. # In this test, the dataset is 2x2 sequential tensors
  34. golden = np.array([[i * 2, (i + 1) * 2], [(i + 2) * 2, (i + 3) * 2]])
  35. np.testing.assert_array_equal(item["out"], golden)
  36. i = i + 4
  37. def test_case_1():
  38. """
  39. Test PyFunc
  40. """
  41. logger.info("Test 1-n PyFunc : lambda x : (x , x + x) ")
  42. col = "col0"
  43. # apply dataset operations
  44. data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, shuffle=False)
  45. data1 = data1.map(operations=(lambda x: (x, x + x)), input_columns=col, output_columns=["out0", "out1"],
  46. column_order=["out0", "out1"])
  47. i = 0
  48. for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary
  49. # In this test, the dataset is 2x2 sequential tensors
  50. golden = np.array([[i, i + 1], [i + 2, i + 3]])
  51. np.testing.assert_array_equal(item["out0"], golden)
  52. golden = np.array([[i * 2, (i + 1) * 2], [(i + 2) * 2, (i + 3) * 2]])
  53. np.testing.assert_array_equal(item["out1"], golden)
  54. i = i + 4
  55. def test_case_2():
  56. """
  57. Test PyFunc
  58. """
  59. logger.info("Test n-1 PyFunc : lambda x, y : x + y ")
  60. col = ["col0", "col1"]
  61. # apply dataset operations
  62. data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, shuffle=False)
  63. data1 = data1.map(operations=(lambda x, y: x + y), input_columns=col, output_columns="out",
  64. column_order=["out"])
  65. i = 0
  66. for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary
  67. # In this test, the dataset is 2x2 sequential tensors
  68. golden = np.array([[i * 2, (i + 1) * 2], [(i + 2) * 2, (i + 3) * 2]])
  69. np.testing.assert_array_equal(item["out"], golden)
  70. i = i + 4
  71. def test_case_3():
  72. """
  73. Test PyFunc
  74. """
  75. logger.info("Test n-m PyFunc : lambda x, y : (x , x + 1, x + y)")
  76. col = ["col0", "col1"]
  77. # apply dataset operations
  78. data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, shuffle=False)
  79. data1 = data1.map(operations=(lambda x, y: (x, x + y, x + y + 1)), input_columns=col,
  80. output_columns=["out0", "out1", "out2"], column_order=["out0", "out1", "out2"])
  81. i = 0
  82. for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary
  83. # In this test, the dataset is 2x2 sequential tensors
  84. golden = np.array([[i, i + 1], [i + 2, i + 3]])
  85. np.testing.assert_array_equal(item["out0"], golden)
  86. golden = np.array([[i * 2, (i + 1) * 2], [(i + 2) * 2, (i + 3) * 2]])
  87. np.testing.assert_array_equal(item["out1"], golden)
  88. golden = np.array([[i * 2 + 1, (i + 1) * 2 + 1], [(i + 2) * 2 + 1, (i + 3) * 2 + 1]])
  89. np.testing.assert_array_equal(item["out2"], golden)
  90. i = i + 4
  91. def test_case_4():
  92. """
  93. Test PyFunc
  94. """
  95. logger.info("Test Parallel n-m PyFunc : lambda x, y : (x , x + 1, x + y)")
  96. col = ["col0", "col1"]
  97. # apply dataset operations
  98. data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, shuffle=False)
  99. data1 = data1.map(operations=(lambda x, y: (x, x + y, x + y + 1)), input_columns=col,
  100. output_columns=["out0", "out1", "out2"], num_parallel_workers=4,
  101. column_order=["out0", "out1", "out2"])
  102. i = 0
  103. for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary
  104. # In this test, the dataset is 2x2 sequential tensors
  105. golden = np.array([[i, i + 1], [i + 2, i + 3]])
  106. np.testing.assert_array_equal(item["out0"], golden)
  107. golden = np.array([[i * 2, (i + 1) * 2], [(i + 2) * 2, (i + 3) * 2]])
  108. np.testing.assert_array_equal(item["out1"], golden)
  109. golden = np.array([[i * 2 + 1, (i + 1) * 2 + 1], [(i + 2) * 2 + 1, (i + 3) * 2 + 1]])
  110. np.testing.assert_array_equal(item["out2"], golden)
  111. i = i + 4
  112. # The execution of this function will acquire GIL
  113. def func_5(x):
  114. return np.ones(x.shape, dtype=x.dtype)
  115. def test_case_5():
  116. """
  117. Test PyFunc
  118. """
  119. logger.info("Test 1-1 PyFunc : lambda x: np.ones(x.shape)")
  120. # apply dataset operations
  121. data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, shuffle=False)
  122. data1 = data1.map(operations=func_5, input_columns="col0", output_columns="out")
  123. for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary
  124. # In this test, the dataset is 2x2 sequential tensors
  125. golden = np.array([[1, 1], [1, 1]])
  126. np.testing.assert_array_equal(item["out"], golden)
  127. def test_case_6():
  128. """
  129. Test PyFunc
  130. """
  131. logger.info("Test PyFunc Compose : (lambda x : x + x), (lambda x : x + x)")
  132. # apply dataset operations
  133. data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, shuffle=False)
  134. data1 = data1.map(operations=[(lambda x: x + x), (lambda x: x + x)], input_columns="col0", output_columns="out")
  135. i = 0
  136. for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary
  137. # In this test, the dataset is 2x2 sequential tensors
  138. golden = np.array([[i * 4, (i + 1) * 4], [(i + 2) * 4, (i + 3) * 4]])
  139. np.testing.assert_array_equal(item["out"], golden)
  140. i = i + 4
  141. def test_case_7():
  142. """
  143. Test PyFunc
  144. """
  145. logger.info("Test 1-1 PyFunc Multiprocess: lambda x : x + x")
  146. # Reduce memory required by disabling the shared memory optimization
  147. mem_original = ds.config.get_enable_shared_mem()
  148. ds.config.set_enable_shared_mem(False)
  149. # apply dataset operations
  150. data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, shuffle=False)
  151. data1 = data1.map(operations=(lambda x: x + x), input_columns="col0", output_columns="out",
  152. num_parallel_workers=4, python_multiprocessing=True)
  153. i = 0
  154. for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary
  155. # In this test, the dataset is 2x2 sequential tensors
  156. golden = np.array([[i * 2, (i + 1) * 2], [(i + 2) * 2, (i + 3) * 2]])
  157. np.testing.assert_array_equal(item["out"], golden)
  158. i = i + 4
  159. ds.config.set_enable_shared_mem(mem_original)
  160. def test_case_8():
  161. """
  162. Test PyFunc
  163. """
  164. logger.info("Test Multiprocess n-m PyFunc : lambda x, y : (x , x + 1, x + y)")
  165. # Reduce memory required by disabling the shared memory optimization
  166. mem_original = ds.config.get_enable_shared_mem()
  167. ds.config.set_enable_shared_mem(False)
  168. col = ["col0", "col1"]
  169. # apply dataset operations
  170. data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, shuffle=False)
  171. data1 = data1.map(operations=(lambda x, y: (x, x + y, x + y + 1)), input_columns=col,
  172. output_columns=["out0", "out1", "out2"], num_parallel_workers=4,
  173. column_order=["out0", "out1", "out2"],
  174. python_multiprocessing=True)
  175. i = 0
  176. for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary
  177. # In this test, the dataset is 2x2 sequential tensors
  178. golden = np.array([[i, i + 1], [i + 2, i + 3]])
  179. np.testing.assert_array_equal(item["out0"], golden)
  180. golden = np.array([[i * 2, (i + 1) * 2], [(i + 2) * 2, (i + 3) * 2]])
  181. np.testing.assert_array_equal(item["out1"], golden)
  182. golden = np.array([[i * 2 + 1, (i + 1) * 2 + 1], [(i + 2) * 2 + 1, (i + 3) * 2 + 1]])
  183. np.testing.assert_array_equal(item["out2"], golden)
  184. i = i + 4
  185. ds.config.set_enable_shared_mem(mem_original)
  186. def test_case_9():
  187. """
  188. Test PyFunc
  189. """
  190. logger.info("Test multiple 1-1 PyFunc Multiprocess: lambda x : x + x")
  191. # Reduce memory required by disabling the shared memory optimization
  192. mem_original = ds.config.get_enable_shared_mem()
  193. ds.config.set_enable_shared_mem(False)
  194. # apply dataset operations
  195. data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, shuffle=False)
  196. data1 = data1.map(operations=[(lambda x: x + x), (lambda x: x + 1), (lambda x: x + 2)], input_columns="col0",
  197. output_columns="out", num_parallel_workers=4, python_multiprocessing=True)
  198. i = 0
  199. for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary
  200. # In this test, the dataset is 2x2 sequential tensors
  201. golden = np.array([[i * 2 + 3, (i + 1) * 2 + 3], [(i + 2) * 2 + 3, (i + 3) * 2 + 3]])
  202. np.testing.assert_array_equal(item["out"], golden)
  203. i = i + 4
  204. ds.config.set_enable_shared_mem(mem_original)
  205. def test_case_10():
  206. """
  207. Test PyFunc
  208. """
  209. logger.info("Test multiple map with multiprocess: lambda x : x + x")
  210. # Reduce memory required by disabling the shared memory optimization
  211. mem_original = ds.config.get_enable_shared_mem()
  212. ds.config.set_enable_shared_mem(False)
  213. # apply dataset operations
  214. data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, shuffle=False)
  215. data1 = data1.map(operations=[(lambda x: x * 10)], input_columns="col0",
  216. output_columns="out", num_parallel_workers=4)
  217. data1 = data1.map(operations=[(lambda x: x + x), (lambda x: x + 1), (lambda x: x + 2)], input_columns="out",
  218. output_columns="out", num_parallel_workers=4, python_multiprocessing=True)
  219. i = 0
  220. for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary
  221. # In this test, the dataset is 2x2 sequential tensors
  222. golden = np.array([[i * 20 + 3, (i + 1) * 20 + 3], [(i + 2) * 20 + 3, (i + 3) * 20 + 3]])
  223. np.testing.assert_array_equal(item["out"], golden)
  224. i = i + 4
  225. ds.config.set_enable_shared_mem(mem_original)
  226. def test_pyfunc_implicit_compose():
  227. """
  228. Test Implicit Compose with pyfunc
  229. """
  230. logger.info("Test n-m PyFunc : lambda x, y : (x , x + 1, x + y)")
  231. col = ["col0", "col1"]
  232. # apply dataset operations
  233. data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, shuffle=False)
  234. data1 = data1.map(operations=[(lambda x, y: (x, x + y, x + y + 1)), (lambda x, y, z: (x, y, z))], input_columns=col,
  235. output_columns=["out0", "out1", "out2"], column_order=["out0", "out1", "out2"])
  236. i = 0
  237. for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary
  238. # In this test, the dataset is 2x2 sequential tensors
  239. golden = np.array([[i, i + 1], [i + 2, i + 3]])
  240. np.testing.assert_array_equal(item["out0"], golden)
  241. golden = np.array([[i * 2, (i + 1) * 2], [(i + 2) * 2, (i + 3) * 2]])
  242. np.testing.assert_array_equal(item["out1"], golden)
  243. golden = np.array([[i * 2 + 1, (i + 1) * 2 + 1], [(i + 2) * 2 + 1, (i + 3) * 2 + 1]])
  244. np.testing.assert_array_equal(item["out2"], golden)
  245. i = i + 4
  246. def test_pyfunc_exception():
  247. logger.info("Test PyFunc Exception Throw: lambda x : raise Exception()")
  248. def pyfunc(x):
  249. raise Exception("Pyfunc Throw")
  250. with pytest.raises(RuntimeError) as info:
  251. # apply dataset operations
  252. data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, shuffle=False)
  253. data1 = data1.map(operations=pyfunc, input_columns="col0", output_columns="out",
  254. num_parallel_workers=4)
  255. for _ in data1:
  256. pass
  257. assert "Pyfunc Throw" in str(info.value)
  258. def skip_test_pyfunc_Exception_multiprocess():
  259. logger.info("Test Multiprocess PyFunc Exception Throw: lambda x : raise Exception()")
  260. def pyfunc(x):
  261. raise Exception("MP Pyfunc Throw")
  262. with pytest.raises(RuntimeError) as info:
  263. # apply dataset operations
  264. data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, shuffle=False)
  265. data1 = data1.map(operations=pyfunc, input_columns="col0", output_columns="out",
  266. num_parallel_workers=4, python_multiprocessing=True)
  267. for _ in data1:
  268. pass
  269. assert "MP Pyfunc Throw" in str(info.value)
  270. def test_func_with_yield_manifest_dataset_01():
  271. def pass_func(_):
  272. for i in range(10):
  273. yield (np.array([i]),)
  274. DATA_FILE = "../data/dataset/testManifestData/test.manifest"
  275. data = ds.ManifestDataset(DATA_FILE)
  276. data = data.map(operations=pass_func, input_columns=["image"], num_parallel_workers=1, python_multiprocessing=True,
  277. max_rowsize=1)
  278. num_iter = 0
  279. try:
  280. for _ in data.create_dict_iterator(num_epochs=1, output_numpy=True):
  281. num_iter += 1
  282. except RuntimeError as e:
  283. assert "Can not pickle <class 'generator'> object, " in str(e)
  284. if __name__ == "__main__":
  285. test_case_0()
  286. test_case_1()
  287. test_case_2()
  288. test_case_3()
  289. test_case_4()
  290. test_case_5()
  291. test_case_6()
  292. test_case_7()
  293. test_case_8()
  294. test_case_9()
  295. test_case_10()
  296. test_pyfunc_implicit_compose()
  297. test_pyfunc_exception()
  298. skip_test_pyfunc_exception_multiprocess()
  299. test_func_with_yield_manifest_dataset_01()