You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

test_pyfunc.py 6.5 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191
  1. # Copyright 2019 Huawei Technologies Co., Ltd
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. # ==============================================================================
  15. import numpy as np
  16. import mindspore.dataset as ds
  17. from mindspore import log as logger
  18. DATA_DIR = ["../data/dataset/testPyfuncMap/data.data"]
  19. SCHEMA_DIR = "../data/dataset/testPyfuncMap/schema.json"
  20. COLUMNS = ["col0", "col1", "col2"]
  21. GENERATE_GOLDEN = False
  22. def test_case_0():
  23. """
  24. Test PyFunc
  25. """
  26. logger.info("Test 1-1 PyFunc : lambda x : x + x")
  27. # apply dataset operations
  28. data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, shuffle=False)
  29. data1 = data1.map(input_columns="col0", output_columns="out", operations=(lambda x: x + x))
  30. i = 0
  31. for item in data1.create_dict_iterator(): # each data is a dictionary
  32. # In this test, the dataset is 2x2 sequential tensors
  33. golden = np.array([[i * 2, (i + 1) * 2], [(i + 2) * 2, (i + 3) * 2]])
  34. assert np.array_equal(item["out"], golden)
  35. i = i + 4
  36. def test_case_1():
  37. """
  38. Test PyFunc
  39. """
  40. logger.info("Test 1-n PyFunc : lambda x : (x , x + x) ")
  41. col = "col0"
  42. # apply dataset operations
  43. data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, shuffle=False)
  44. data1 = data1.map(input_columns=col, output_columns=["out0", "out1"], operations=(lambda x: (x, x + x)),
  45. columns_order=["out0", "out1"])
  46. i = 0
  47. for item in data1.create_dict_iterator(): # each data is a dictionary
  48. # In this test, the dataset is 2x2 sequential tensors
  49. golden = np.array([[i, i + 1], [i + 2, i + 3]])
  50. assert np.array_equal(item["out0"], golden)
  51. golden = np.array([[i * 2, (i + 1) * 2], [(i + 2) * 2, (i + 3) * 2]])
  52. assert np.array_equal(item["out1"], golden)
  53. i = i + 4
  54. def test_case_2():
  55. """
  56. Test PyFunc
  57. """
  58. logger.info("Test n-1 PyFunc : lambda x, y : x + y ")
  59. col = ["col0", "col1"]
  60. # apply dataset operations
  61. data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, shuffle=False)
  62. data1 = data1.map(input_columns=col, output_columns="out", operations=(lambda x, y: x + y),
  63. columns_order=["out"])
  64. i = 0
  65. for item in data1.create_dict_iterator(): # each data is a dictionary
  66. # In this test, the dataset is 2x2 sequential tensors
  67. golden = np.array([[i * 2, (i + 1) * 2], [(i + 2) * 2, (i + 3) * 2]])
  68. assert np.array_equal(item["out"], golden)
  69. i = i + 4
  70. def test_case_3():
  71. """
  72. Test PyFunc
  73. """
  74. logger.info("Test n-m PyFunc : lambda x, y : (x , x + 1, x + y)")
  75. col = ["col0", "col1"]
  76. # apply dataset operations
  77. data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, shuffle=False)
  78. data1 = data1.map(input_columns=col, output_columns=["out0", "out1", "out2"],
  79. operations=(lambda x, y: (x, x + y, x + y + 1)), columns_order=["out0", "out1", "out2"])
  80. i = 0
  81. for item in data1.create_dict_iterator(): # each data is a dictionary
  82. # In this test, the dataset is 2x2 sequential tensors
  83. golden = np.array([[i, i + 1], [i + 2, i + 3]])
  84. assert np.array_equal(item["out0"], golden)
  85. golden = np.array([[i * 2, (i + 1) * 2], [(i + 2) * 2, (i + 3) * 2]])
  86. assert np.array_equal(item["out1"], golden)
  87. golden = np.array([[i * 2 + 1, (i + 1) * 2 + 1], [(i + 2) * 2 + 1, (i + 3) * 2 + 1]])
  88. assert np.array_equal(item["out2"], golden)
  89. i = i + 4
  90. def test_case_4():
  91. """
  92. Test PyFunc
  93. """
  94. logger.info("Test Parallel n-m PyFunc : lambda x, y : (x , x + 1, x + y)")
  95. col = ["col0", "col1"]
  96. # apply dataset operations
  97. data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, shuffle=False)
  98. data1 = data1.map(input_columns=col, output_columns=["out0", "out1", "out2"], num_parallel_workers=4,
  99. operations=(lambda x, y: (x, x + y, x + y + 1)), columns_order=["out0", "out1", "out2"])
  100. i = 0
  101. for item in data1.create_dict_iterator(): # each data is a dictionary
  102. # In this test, the dataset is 2x2 sequential tensors
  103. golden = np.array([[i, i + 1], [i + 2, i + 3]])
  104. assert np.array_equal(item["out0"], golden)
  105. golden = np.array([[i * 2, (i + 1) * 2], [(i + 2) * 2, (i + 3) * 2]])
  106. assert np.array_equal(item["out1"], golden)
  107. golden = np.array([[i * 2 + 1, (i + 1) * 2 + 1], [(i + 2) * 2 + 1, (i + 3) * 2 + 1]])
  108. assert np.array_equal(item["out2"], golden)
  109. i = i + 4
  110. # The execution of this function will acquire GIL
  111. def func_5(x):
  112. return np.ones(x.shape, dtype=x.dtype)
  113. def test_case_5():
  114. """
  115. Test PyFunc
  116. """
  117. logger.info("Test 1-1 PyFunc : lambda x: np.ones(x.shape)")
  118. # apply dataset operations
  119. data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, shuffle=False)
  120. data1 = data1.map(input_columns="col0", output_columns="out", operations=func_5)
  121. for item in data1.create_dict_iterator(): # each data is a dictionary
  122. # In this test, the dataset is 2x2 sequential tensors
  123. golden = np.array([[1, 1], [1, 1]])
  124. assert np.array_equal(item["out"], golden)
  125. def test_case_6():
  126. """
  127. Test PyFunc
  128. """
  129. logger.info("Test PyFunc ComposeOp : (lambda x : x + x), (lambda x : x + x)")
  130. # apply dataset operations
  131. data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, shuffle=False)
  132. data1 = data1.map(input_columns="col0", output_columns="out",
  133. operations=[(lambda x: x + x), (lambda x: x + x)])
  134. i = 0
  135. for item in data1.create_dict_iterator(): # each data is a dictionary
  136. # In this test, the dataset is 2x2 sequential tensors
  137. golden = np.array([[i * 4, (i + 1) * 4], [(i + 2) * 4, (i + 3) * 4]])
  138. assert np.array_equal(item["out"], golden)
  139. i = i + 4
  140. if __name__ == "__main__":
  141. test_case_0()
  142. test_case_1()
  143. test_case_2()
  144. test_case_3()
  145. test_case_4()
  146. test_case_5()
  147. test_case_6()