You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

test_map_offload.py 12 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272
  1. # Copyright 2021 Huawei Technologies Co., Ltd
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. # ==============================================================================
  15. import numpy as np
  16. import pytest
  17. import mindspore.dataset as ds
  18. import mindspore.common.dtype as mstype
  19. import mindspore.dataset.vision.c_transforms as C
  20. import mindspore.dataset.transforms.c_transforms as C2
  21. DATA_DIR = "../data/dataset/testPK/data"
  22. def test_offload():
  23. """
  24. Feature: test map offload flag.
  25. Description: Input is image dataset.
  26. Expectation: Output should be same with activated or deactivated offload.
  27. """
  28. # Dataset with offload activated.
  29. dataset_0 = ds.ImageFolderDataset(DATA_DIR)
  30. dataset_0 = dataset_0.map(operations=[C.Decode()], input_columns="image")
  31. dataset_0 = dataset_0.map(operations=[C.HWC2CHW()], input_columns="image", offload=True)
  32. dataset_0 = dataset_0.batch(8, drop_remainder=True)
  33. # Dataset with offload not activated.
  34. dataset_1 = ds.ImageFolderDataset(DATA_DIR)
  35. dataset_1 = dataset_1.map(operations=[C.Decode()], input_columns="image")
  36. dataset_1 = dataset_1.map(operations=[C.HWC2CHW()], input_columns="image")
  37. dataset_1 = dataset_1.batch(8, drop_remainder=True)
  38. for (img_0, _), (img_1, _) in zip(dataset_0.create_tuple_iterator(num_epochs=1, output_numpy=True),
  39. dataset_1.create_tuple_iterator(num_epochs=1, output_numpy=True)):
  40. np.testing.assert_array_equal(img_0, img_1)
  41. def test_auto_offload():
  42. """
  43. Feature: Test auto_offload config option.
  44. Description: Input is image dataset.
  45. Expectation: Output should same with auto_offload activated and deactivated.
  46. """
  47. trans = [C.Decode(), C.HWC2CHW()]
  48. # Enable automatic offload
  49. ds.config.set_auto_offload(True)
  50. # Dataset with offload deactivated
  51. dataset_auto_disabled = ds.ImageFolderDataset(DATA_DIR)
  52. dataset_auto_disabled = dataset_auto_disabled.map(operations=trans, input_columns="image", offload=False)
  53. dataset_auto_disabled = dataset_auto_disabled.batch(8, drop_remainder=True)
  54. # Dataset with config.auto_offload activated
  55. dataset_auto_enabled = ds.ImageFolderDataset(DATA_DIR)
  56. dataset_auto_enabled = dataset_auto_enabled.map(operations=trans, input_columns="image")
  57. dataset_auto_enabled = dataset_auto_enabled.batch(8, drop_remainder=True)
  58. for (img_0, _), (img_1, _) in zip(dataset_auto_disabled.create_tuple_iterator(num_epochs=1, output_numpy=True),
  59. dataset_auto_enabled.create_tuple_iterator(num_epochs=1, output_numpy=True)):
  60. np.testing.assert_array_equal(img_0, img_1)
  61. # Need to turn off here or subsequent test cases will fail.
  62. ds.config.set_auto_offload(False)
  63. def test_offload_column_validation():
  64. """
  65. Feature: Test the column validation for offloaded map operations
  66. Description: Input is an image dataset, but the input column is incorrect for the offloaded map operation.
  67. Expectation: Should raise RuntimeError.
  68. """
  69. dataset = ds.ImageFolderDataset(DATA_DIR)
  70. dataset = dataset.map(operations=[C.Decode()], input_columns="image")
  71. # Use invalid input column name
  72. dataset = dataset.map(operations=[C.HWC2CHW()], input_columns="fake_column", offload=True)
  73. dataset = dataset.batch(8, drop_remainder=True)
  74. error_msg = "The following input column(s) for an offloaded map operation do not exist: [\'fake_column\']"
  75. with pytest.raises(RuntimeError) as excinfo:
  76. for (_, _) in dataset.create_tuple_iterator(num_epochs=1, output_numpy=True):
  77. continue
  78. assert str(excinfo.value) == error_msg
  79. def test_offload_concat_dataset_1():
  80. """
  81. Feature: test map offload flag for concatenated dataset.
  82. Description: Input is image dataset.
  83. Expectation: Should raise RuntimeError.
  84. """
  85. # Dataset with offload activated.
  86. dataset_0 = ds.ImageFolderDataset(DATA_DIR)
  87. dataset_0 = dataset_0.map(operations=[C.Decode()], input_columns="image")
  88. dataset_0 = dataset_0.map(operations=[C.HWC2CHW()], input_columns="image", offload=True)
  89. dataset_0 = dataset_0.batch(8, drop_remainder=True)
  90. # Dataset with offload not activated.
  91. dataset_1 = ds.ImageFolderDataset(DATA_DIR)
  92. dataset_1 = dataset_1.map(operations=[C.Decode()], input_columns="image")
  93. dataset_1 = dataset_1.map(operations=[C.HWC2CHW()], input_columns="image")
  94. dataset_1 = dataset_1.batch(8, drop_remainder=True)
  95. dataset_concat = dataset_0 + dataset_1
  96. error_msg = "Offload module currently does not support concatenated or zipped datasets."
  97. with pytest.raises(RuntimeError, match=error_msg):
  98. for (_, _) in dataset_concat.create_tuple_iterator(num_epochs=1, output_numpy=True):
  99. continue
  100. def test_offload_concat_dataset_2():
  101. """
  102. Feature: test map offload flag for concatenated dataset.
  103. Description: Input is image dataset.
  104. Expectation: Should raise RuntimeError.
  105. """
  106. # Dataset with offload activated.
  107. dataset_0 = ds.ImageFolderDataset(DATA_DIR)
  108. dataset_0 = dataset_0.map(operations=[C.Decode()], input_columns="image")
  109. dataset_0 = dataset_0.map(operations=[C.HWC2CHW()], input_columns="image", offload=True)
  110. # Dataset with offload not activated.
  111. dataset_1 = ds.ImageFolderDataset(DATA_DIR)
  112. dataset_1 = dataset_1.map(operations=[C.Decode()], input_columns="image")
  113. dataset_1 = dataset_1.map(operations=[C.HWC2CHW()], input_columns="image")
  114. dataset_concat = dataset_0 + dataset_1
  115. dataset_concat = dataset_concat.batch(8, drop_remainder=True)
  116. error_msg = "Offload module currently does not support concatenated or zipped datasets."
  117. with pytest.raises(RuntimeError, match=error_msg):
  118. for (_, _) in dataset_concat.create_tuple_iterator(num_epochs=1, output_numpy=True):
  119. continue
  120. def test_offload_normalize_op():
  121. """
  122. Feature: test map offload Normalize op.
  123. Description: Input is image dataset.
  124. Expectation: Output should be same with activated or deactivated offload for Normalize op.
  125. """
  126. mean = [0.485 * 255, 0.456 * 255, 0.406 * 255]
  127. std = [0.229 * 255, 0.224 * 255, 0.225 * 255]
  128. # Dataset with offload activated.
  129. dataset_0 = ds.ImageFolderDataset(DATA_DIR)
  130. dataset_0 = dataset_0.map(operations=[C.Decode()], input_columns="image")
  131. dataset_0 = dataset_0.map(operations=[C.Normalize(mean=mean, std=std)], input_columns="image", offload=True)
  132. dataset_0 = dataset_0.map(operations=[C.HWC2CHW()], input_columns="image", offload=True)
  133. dataset_0 = dataset_0.batch(8, drop_remainder=True)
  134. # Dataset with offload not activated.
  135. dataset_1 = ds.ImageFolderDataset(DATA_DIR)
  136. dataset_1 = dataset_1.map(operations=[C.Decode()], input_columns="image")
  137. dataset_1 = dataset_1.map(operations=[C.Normalize(mean=mean, std=std)], input_columns="image")
  138. dataset_1 = dataset_1.map(operations=[C.HWC2CHW()], input_columns="image")
  139. dataset_1 = dataset_1.batch(8, drop_remainder=True)
  140. for (img_0, _), (img_1, _) in zip(dataset_0.create_tuple_iterator(num_epochs=1, output_numpy=True),
  141. dataset_1.create_tuple_iterator(num_epochs=1, output_numpy=True)):
  142. np.testing.assert_almost_equal(img_0, img_1, decimal=6)
  143. def test_offload_rescale_op():
  144. """
  145. Feature: test map offload Rescale op.
  146. Description: Input is image dataset.
  147. Expectation: Output should be same with activated or deactivated offload for Rescale op.
  148. """
  149. rescale = 1.0 / 255.0
  150. shift = 0.0
  151. # Dataset with offload activated.
  152. dataset_0 = ds.ImageFolderDataset(DATA_DIR)
  153. dataset_0 = dataset_0.map(operations=[C.Decode()], input_columns="image")
  154. dataset_0 = dataset_0.map(operations=[C.Rescale(rescale, shift)], input_columns="image", offload=True)
  155. dataset_0 = dataset_0.map(operations=[C.HWC2CHW()], input_columns="image", offload=True)
  156. dataset_0 = dataset_0.batch(8, drop_remainder=True)
  157. # Dataset with offload not activated.
  158. dataset_1 = ds.ImageFolderDataset(DATA_DIR)
  159. dataset_1 = dataset_1.map(operations=[C.Decode()], input_columns="image")
  160. dataset_1 = dataset_1.map(operations=[C.Rescale(rescale, shift)], input_columns="image")
  161. dataset_1 = dataset_1.map(operations=[C.HWC2CHW()], input_columns="image")
  162. dataset_1 = dataset_1.batch(8, drop_remainder=True)
  163. for (img_0, _), (img_1, _) in zip(dataset_0.create_tuple_iterator(num_epochs=1, output_numpy=True),
  164. dataset_1.create_tuple_iterator(num_epochs=1, output_numpy=True)):
  165. np.testing.assert_almost_equal(img_0, img_1, decimal=6)
  166. def test_offload_different_column_end_of_pipeline():
  167. """
  168. Feature: Test offload end_of_pipeline check.
  169. Description: Input is image dataset.
  170. Expectation: The image map op gets offloaded even though it comes before the not-offloaded label map op, since
  171. the end_of_pipeline check looks at columns separately.
  172. """
  173. image_trans = [C.Decode(), C.HWC2CHW()]
  174. ds.config.set_auto_offload(True)
  175. dataset_0 = ds.ImageFolderDataset(DATA_DIR)
  176. dataset_0 = dataset_0.map(operations=image_trans, input_columns="image")
  177. dataset_0 = dataset_0.map(operations=[C2.TypeCast(mstype.int32)], input_columns="label", offload=False)
  178. data_iterator = dataset_0.create_tuple_iterator(num_epochs=1, output_numpy=True)
  179. # Assert at least one operation has been offloaded
  180. np.testing.assert_(len(data_iterator.offload_model.transform_list[0].me_ops) > 0)
  181. ds.config.set_auto_offload(False)
  182. def test_offload_not_end_of_pipeline():
  183. """
  184. Feature: Test offload end_of_pipeline check.
  185. Description: Input is image dataset.
  186. Expectation: No operations are offloaded, since the image map op at the end of the pipeline has the
  187. offload flag set to False.
  188. """
  189. dataset_0 = ds.ImageFolderDataset(DATA_DIR)
  190. dataset_0 = dataset_0.map(operations=[C.Decode()], input_columns="image", offload=True)
  191. dataset_0 = dataset_0.map(operations=[C.RandomHorizontalFlip(prob=0.5)], input_columns="image", offload=True)
  192. dataset_0 = dataset_0.map(operations=[C.HWC2CHW()], input_columns="image", offload=False)
  193. dataset_0 = dataset_0.map(operations=[C2.TypeCast(mstype.int32)], input_columns="label", offload=False)
  194. data_iterator = dataset_0.create_tuple_iterator(num_epochs=1, output_numpy=True)
  195. # Assert no operations are set to be offloaded
  196. np.testing.assert_(data_iterator.offload_model is None)
  197. def test_offload_dim_check():
  198. """
  199. Feature: test input has the required number of dimensions for offload operation.
  200. Description: Input is image dataset.
  201. Expectation: Should raise ValueError.
  202. """
  203. # Dataset with offload activated.
  204. dataset = ds.ImageFolderDataset(DATA_DIR)
  205. dataset = dataset.map(operations=[C.Decode()], input_columns="image")
  206. dataset = dataset.map(operations=[C.HWC2CHW()], input_columns="image", offload=True)
  207. error_msg = "For HwcToChw offload operation, the dimension of input should be 4, but got 3."
  208. with pytest.raises(ValueError, match=error_msg):
  209. for (_, _) in dataset.create_tuple_iterator(num_epochs=1, output_numpy=True):
  210. continue
  211. if __name__ == "__main__":
  212. test_offload()
  213. test_auto_offload()
  214. test_offload_column_validation()
  215. test_offload_concat_dataset_1()
  216. test_offload_concat_dataset_2()
  217. test_offload_normalize_op()
  218. test_offload_rescale_op()
  219. test_offload_different_column_end_of_pipeline()
  220. test_offload_not_end_of_pipeline()
  221. test_offload_dim_check()