You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

test_map_offload.py 18 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389
  1. # Copyright 2021-2022 Huawei Technologies Co., Ltd
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. # ==============================================================================
  15. import numpy as np
  16. import pytest
  17. import mindspore.dataset as ds
  18. import mindspore.common.dtype as mstype
  19. import mindspore.dataset.vision.c_transforms as C
  20. import mindspore.dataset.transforms.c_transforms as C2
  21. DATA_DIR = "../data/dataset/testPK/data"
  22. BATCH_SIZE = 2
  23. def test_offload():
  24. """
  25. Feature: test map offload flag.
  26. Description: Input is image dataset.
  27. Expectation: Output should be same with activated or deactivated offload.
  28. """
  29. # Dataset with offload activated.
  30. dataset_0 = ds.ImageFolderDataset(DATA_DIR)
  31. dataset_0 = dataset_0.map(operations=[C.Decode()], input_columns="image")
  32. dataset_0 = dataset_0.map(operations=[C.HWC2CHW()], input_columns="image", offload=True)
  33. dataset_0 = dataset_0.batch(BATCH_SIZE, drop_remainder=True)
  34. # Dataset with offload not activated.
  35. dataset_1 = ds.ImageFolderDataset(DATA_DIR)
  36. dataset_1 = dataset_1.map(operations=[C.Decode()], input_columns="image")
  37. dataset_1 = dataset_1.map(operations=[C.HWC2CHW()], input_columns="image")
  38. dataset_1 = dataset_1.batch(BATCH_SIZE, drop_remainder=True)
  39. for (img_0, _), (img_1, _) in zip(dataset_0.create_tuple_iterator(num_epochs=1, output_numpy=True),
  40. dataset_1.create_tuple_iterator(num_epochs=1, output_numpy=True)):
  41. np.testing.assert_array_equal(img_0, img_1)
  42. break
  43. def test_auto_offload():
  44. """
  45. Feature: Test auto_offload config option.
  46. Description: Input is image dataset.
  47. Expectation: Output should same with auto_offload activated and deactivated.
  48. """
  49. trans = [C.Decode(), C.HWC2CHW()]
  50. # Enable automatic offload
  51. ds.config.set_auto_offload(True)
  52. # Dataset with offload deactivated
  53. dataset_auto_disabled = ds.ImageFolderDataset(DATA_DIR)
  54. dataset_auto_disabled = dataset_auto_disabled.map(operations=trans, input_columns="image", offload=False)
  55. dataset_auto_disabled = dataset_auto_disabled.batch(BATCH_SIZE, drop_remainder=True)
  56. # Dataset with config.auto_offload activated
  57. dataset_auto_enabled = ds.ImageFolderDataset(DATA_DIR)
  58. dataset_auto_enabled = dataset_auto_enabled.map(operations=trans, input_columns="image")
  59. dataset_auto_enabled = dataset_auto_enabled.batch(BATCH_SIZE, drop_remainder=True)
  60. for (img_0, _), (img_1, _) in zip(dataset_auto_disabled.create_tuple_iterator(num_epochs=1, output_numpy=True),
  61. dataset_auto_enabled.create_tuple_iterator(num_epochs=1, output_numpy=True)):
  62. np.testing.assert_array_equal(img_0, img_1)
  63. break
  64. # Need to turn off here or subsequent test cases will fail.
  65. ds.config.set_auto_offload(False)
  66. def test_offload_column_validation():
  67. """
  68. Feature: Test the column validation for offloaded map operations
  69. Description: Input is an image dataset, but the input column is incorrect for the offloaded map operation.
  70. Expectation: Should raise RuntimeError.
  71. """
  72. dataset = ds.ImageFolderDataset(DATA_DIR)
  73. dataset = dataset.map(operations=[C.Decode()], input_columns="image")
  74. # Use invalid input column name
  75. dataset = dataset.map(operations=[C.HWC2CHW()], input_columns="fake_column", offload=True)
  76. dataset = dataset.batch(BATCH_SIZE, drop_remainder=True)
  77. error_msg = "The following input column(s) for an offloaded map operation do not exist: [\'fake_column\']"
  78. with pytest.raises(RuntimeError) as excinfo:
  79. for (_, _) in dataset.create_tuple_iterator(num_epochs=1, output_numpy=True):
  80. continue
  81. assert str(excinfo.value) == error_msg
  82. def test_offload_multi_column():
  83. """
  84. Feature: Test the offload functionality with datasets with more than 2 columns.
  85. Description: Input is an image dataset, copy the image column and apply map operations to both images.
  86. Expectation: Output should be same with both offload activated and deactivated.
  87. """
  88. def copy_column(x, y):
  89. return x, x, y
  90. dataset = ds.ImageFolderDataset(DATA_DIR)
  91. dataset = dataset.map(operations=copy_column, input_columns=["image", "label"],
  92. output_columns=["image1", "image2", "label"],
  93. column_order=["image1", "image2", "label"])
  94. dataset = dataset.map(operations=[C.Decode()], input_columns="image1")
  95. dataset = dataset.map(operations=[C.HWC2CHW()], input_columns="image1")
  96. dataset = dataset.map(operations=[C.Decode()], input_columns="image2")
  97. dataset = dataset.map(operations=[C.HWC2CHW()], input_columns="image2")
  98. dataset = dataset.batch(BATCH_SIZE, drop_remainder=True)
  99. dataset_offload = ds.ImageFolderDataset(DATA_DIR)
  100. dataset_offload = dataset_offload.map(operations=copy_column, input_columns=["image", "label"],
  101. output_columns=["image1", "image2", "label"],
  102. column_order=["image1", "image2", "label"])
  103. dataset_offload = dataset_offload.map(operations=[C.Decode()], input_columns="image1")
  104. dataset_offload = dataset_offload.map(operations=[C.HWC2CHW()], input_columns="image1", offload=True)
  105. dataset_offload = dataset_offload.map(operations=[C.Decode()], input_columns="image2")
  106. dataset_offload = dataset_offload.map(operations=[C.HWC2CHW()], input_columns="image2", offload=True)
  107. dataset_offload = dataset_offload.batch(BATCH_SIZE, drop_remainder=True)
  108. for (img1, img2, _), (img1_offload, img2_offload, _) in \
  109. zip(dataset.create_tuple_iterator(num_epochs=1, output_numpy=True),
  110. dataset_offload.create_tuple_iterator(num_epochs=1, output_numpy=True)):
  111. np.testing.assert_array_equal(img1, img1_offload)
  112. np.testing.assert_array_equal(img2, img2_offload)
  113. break
  114. def test_offload_column_mapping():
  115. """
  116. Feature: Test the dataset column mapping for offloaded operations
  117. Description: Input is an image dataset, copy the image column, then apply offload to only copied column.
  118. Expectation: The offload model dataset column index value is 1 (second column).
  119. """
  120. def copy_column(x, y):
  121. return x, x, y
  122. dataset = ds.ImageFolderDataset(DATA_DIR)
  123. dataset = dataset.map(operations=copy_column, input_columns=["image", "label"],
  124. output_columns=["image1", "image2", "label"], column_order=["image1", "image2", "label"])
  125. dataset = dataset.map(operations=[C.Decode()], input_columns="image2")
  126. dataset = dataset.map(operations=[C.HWC2CHW()], input_columns="image2", offload=True)
  127. dataset_iterator = dataset.create_tuple_iterator(num_epochs=1, output_numpy=True)
  128. offload_col_idxs = dataset_iterator.offload_model.transform_list[0].col_idxs
  129. # assert there is only one column index in the offload model, and that it is 1 (second column)
  130. np.testing.assert_((len(offload_col_idxs) == 1) and (offload_col_idxs[0] == 1))
  131. def test_offload_concat_dataset_1():
  132. """
  133. Feature: test map offload flag for concatenated dataset.
  134. Description: Input is image dataset.
  135. Expectation: Should raise RuntimeError.
  136. """
  137. # Dataset with offload activated.
  138. dataset_0 = ds.ImageFolderDataset(DATA_DIR)
  139. dataset_0 = dataset_0.map(operations=[C.Decode()], input_columns="image")
  140. dataset_0 = dataset_0.map(operations=[C.HWC2CHW()], input_columns="image", offload=True)
  141. dataset_0 = dataset_0.batch(BATCH_SIZE, drop_remainder=True)
  142. # Dataset with offload not activated.
  143. dataset_1 = ds.ImageFolderDataset(DATA_DIR)
  144. dataset_1 = dataset_1.map(operations=[C.Decode()], input_columns="image")
  145. dataset_1 = dataset_1.map(operations=[C.HWC2CHW()], input_columns="image")
  146. dataset_1 = dataset_1.batch(BATCH_SIZE, drop_remainder=True)
  147. dataset_concat = dataset_0 + dataset_1
  148. error_msg = "Offload module currently does not support concatenated or zipped datasets."
  149. with pytest.raises(RuntimeError, match=error_msg):
  150. for (_, _) in dataset_concat.create_tuple_iterator(num_epochs=1, output_numpy=True):
  151. continue
  152. def test_offload_concat_dataset_2():
  153. """
  154. Feature: test map offload flag for concatenated dataset.
  155. Description: Input is image dataset.
  156. Expectation: Should raise RuntimeError.
  157. """
  158. # Dataset with offload activated.
  159. dataset_0 = ds.ImageFolderDataset(DATA_DIR)
  160. dataset_0 = dataset_0.map(operations=[C.Decode()], input_columns="image")
  161. dataset_0 = dataset_0.map(operations=[C.HWC2CHW()], input_columns="image", offload=True)
  162. # Dataset with offload not activated.
  163. dataset_1 = ds.ImageFolderDataset(DATA_DIR)
  164. dataset_1 = dataset_1.map(operations=[C.Decode()], input_columns="image")
  165. dataset_1 = dataset_1.map(operations=[C.HWC2CHW()], input_columns="image")
  166. dataset_concat = dataset_0 + dataset_1
  167. dataset_concat = dataset_concat.batch(BATCH_SIZE, drop_remainder=True)
  168. error_msg = "Offload module currently does not support concatenated or zipped datasets."
  169. with pytest.raises(RuntimeError, match=error_msg):
  170. for (_, _) in dataset_concat.create_tuple_iterator(num_epochs=1, output_numpy=True):
  171. continue
  172. def test_offload_normalize_op():
  173. """
  174. Feature: test map offload Normalize op.
  175. Description: Input is image dataset.
  176. Expectation: Output should be same with activated or deactivated offload for Normalize op.
  177. """
  178. mean = [0.485 * 255, 0.456 * 255, 0.406 * 255]
  179. std = [0.229 * 255, 0.224 * 255, 0.225 * 255]
  180. # Dataset with offload activated.
  181. dataset_0 = ds.ImageFolderDataset(DATA_DIR)
  182. dataset_0 = dataset_0.map(operations=[C.Decode()], input_columns="image")
  183. dataset_0 = dataset_0.map(operations=[C.Normalize(mean=mean, std=std)], input_columns="image", offload=True)
  184. dataset_0 = dataset_0.map(operations=[C.HWC2CHW()], input_columns="image", offload=True)
  185. dataset_0 = dataset_0.batch(BATCH_SIZE, drop_remainder=True)
  186. # Dataset with offload not activated.
  187. dataset_1 = ds.ImageFolderDataset(DATA_DIR)
  188. dataset_1 = dataset_1.map(operations=[C.Decode()], input_columns="image")
  189. dataset_1 = dataset_1.map(operations=[C.Normalize(mean=mean, std=std)], input_columns="image")
  190. dataset_1 = dataset_1.map(operations=[C.HWC2CHW()], input_columns="image")
  191. dataset_1 = dataset_1.batch(BATCH_SIZE, drop_remainder=True)
  192. for (img_0, _), (img_1, _) in zip(dataset_0.create_tuple_iterator(num_epochs=1, output_numpy=True),
  193. dataset_1.create_tuple_iterator(num_epochs=1, output_numpy=True)):
  194. np.testing.assert_almost_equal(img_0, img_1, decimal=6)
  195. break
  196. def test_offload_rescale_op():
  197. """
  198. Feature: test map offload Rescale op.
  199. Description: Input is image dataset.
  200. Expectation: Output should be same with activated or deactivated offload for Rescale op.
  201. """
  202. rescale = 1.0 / 255.0
  203. shift = 0.0
  204. # Dataset with offload activated.
  205. dataset_0 = ds.ImageFolderDataset(DATA_DIR)
  206. dataset_0 = dataset_0.map(operations=[C.Decode()], input_columns="image")
  207. dataset_0 = dataset_0.map(operations=[C.Rescale(rescale, shift)], input_columns="image", offload=True)
  208. dataset_0 = dataset_0.map(operations=[C.HWC2CHW()], input_columns="image", offload=True)
  209. dataset_0 = dataset_0.batch(BATCH_SIZE, drop_remainder=True)
  210. # Dataset with offload not activated.
  211. dataset_1 = ds.ImageFolderDataset(DATA_DIR)
  212. dataset_1 = dataset_1.map(operations=[C.Decode()], input_columns="image")
  213. dataset_1 = dataset_1.map(operations=[C.Rescale(rescale, shift)], input_columns="image")
  214. dataset_1 = dataset_1.map(operations=[C.HWC2CHW()], input_columns="image")
  215. dataset_1 = dataset_1.batch(BATCH_SIZE, drop_remainder=True)
  216. for (img_0, _), (img_1, _) in zip(dataset_0.create_tuple_iterator(num_epochs=1, output_numpy=True),
  217. dataset_1.create_tuple_iterator(num_epochs=1, output_numpy=True)):
  218. np.testing.assert_almost_equal(img_0, img_1, decimal=6)
  219. break
  220. def test_offload_typecast_op():
  221. """
  222. Feature: test map offload TypeCast op.
  223. Description: Input is image dataset.
  224. Expectation: Output should be the same with activated or deactivated offload for TypeCast op.
  225. """
  226. # Dataset without offload activated.
  227. ds_baseline = ds.ImageFolderDataset(DATA_DIR)
  228. ds_baseline = ds_baseline.map(operations=[C.Decode(), C2.TypeCast(mstype.float32)], input_columns="image")
  229. ds_baseline = ds_baseline.map(operations=[C2.TypeCast(mstype.int32)], input_columns="label")
  230. # Dataset with offload activated.
  231. ds_offload = ds.ImageFolderDataset(DATA_DIR)
  232. ds_offload = ds_offload.map(operations=[C.Decode(), C2.TypeCast(mstype.float32)],
  233. input_columns="image", offload=True)
  234. ds_offload = ds_offload.map(operations=[C2.TypeCast(mstype.int32)], input_columns="label", offload=True)
  235. for (img_0, _), (img_1, _) in zip(ds_baseline.create_tuple_iterator(num_epochs=1, output_numpy=True),
  236. ds_offload.create_tuple_iterator(num_epochs=1, output_numpy=True)):
  237. np.testing.assert_almost_equal(img_0, img_1, decimal=6)
  238. def test_offload_different_column_end_of_pipeline():
  239. """
  240. Feature: Test offload end_of_pipeline check.
  241. Description: Input is image dataset.
  242. Expectation: The image map op gets offloaded even though it comes before the not-offloaded label map op, since
  243. the end_of_pipeline check looks at columns separately.
  244. """
  245. image_trans = [C.Decode(), C.HWC2CHW()]
  246. ds.config.set_auto_offload(True)
  247. dataset_0 = ds.ImageFolderDataset(DATA_DIR)
  248. dataset_0 = dataset_0.map(operations=image_trans, input_columns="image")
  249. dataset_0 = dataset_0.map(operations=[C2.TypeCast(mstype.int32)], input_columns="label", offload=False)
  250. data_iterator = dataset_0.create_tuple_iterator(num_epochs=1, output_numpy=True)
  251. # Assert at least one operation has been offloaded
  252. np.testing.assert_(len(data_iterator.offload_model.transform_list[0].me_ops) > 0)
  253. ds.config.set_auto_offload(False)
  254. def test_offload_not_end_of_pipeline():
  255. """
  256. Feature: Test offload end_of_pipeline check.
  257. Description: Input is image dataset.
  258. Expectation: No operations are offloaded, since the image map op at the end of the pipeline has the
  259. offload flag set to False.
  260. """
  261. dataset_0 = ds.ImageFolderDataset(DATA_DIR)
  262. dataset_0 = dataset_0.map(operations=[C.Decode()], input_columns="image", offload=True)
  263. dataset_0 = dataset_0.map(operations=[C.RandomHorizontalFlip(prob=0.5)], input_columns="image", offload=True)
  264. dataset_0 = dataset_0.map(operations=[C.HWC2CHW()], input_columns="image", offload=False)
  265. dataset_0 = dataset_0.map(operations=[C2.TypeCast(mstype.int32)], input_columns="label", offload=False)
  266. data_iterator = dataset_0.create_tuple_iterator(num_epochs=1, output_numpy=True)
  267. # Assert no operations are set to be offloaded
  268. np.testing.assert_(data_iterator.offload_model is None)
  269. def test_offload_dim_check():
  270. """
  271. Feature: test input has the required number of dimensions for offload operation.
  272. Description: Input is image dataset.
  273. Expectation: Should raise ValueError.
  274. """
  275. # Dataset with offload activated.
  276. dataset = ds.ImageFolderDataset(DATA_DIR)
  277. dataset = dataset.map(operations=[C.Decode()], input_columns="image")
  278. dataset = dataset.map(operations=[C.HWC2CHW()], input_columns="image", offload=True)
  279. error_msg = "For HwcToChw offload operation, the dimension of input should be 4, but got 3."
  280. with pytest.raises(ValueError, match=error_msg):
  281. for (_, _) in dataset.create_tuple_iterator(num_epochs=1, output_numpy=True):
  282. continue
  283. def test_offload_random_sharpness_op():
  284. """
  285. Feature: test map offload RandomSharpness op.
  286. Description: Input is image dataset.
  287. Expectation: Output should be same with activated or deactivated offload for RandomSharpness op.
  288. """
  289. # Dataset with offload activated.
  290. dataset_0 = ds.ImageFolderDataset(DATA_DIR)
  291. dataset_0 = dataset_0.map(operations=[C.Decode()], input_columns="image")
  292. dataset_0 = dataset_0.map(operations=[C.RandomSharpness(degrees=[1.0, 1.0])], input_columns="image", offload=True)
  293. dataset_0 = dataset_0.map(operations=[C.HWC2CHW()], input_columns="image", offload=True)
  294. dataset_0 = dataset_0.batch(BATCH_SIZE, drop_remainder=True)
  295. # Dataset with offload not activated.
  296. dataset_1 = ds.ImageFolderDataset(DATA_DIR)
  297. dataset_1 = dataset_1.map(operations=[C.Decode()], input_columns="image")
  298. dataset_1 = dataset_1.map(operations=[C.RandomSharpness(degrees=[1.0, 1.0])], input_columns="image")
  299. dataset_1 = dataset_1.map(operations=[C.HWC2CHW()], input_columns="image")
  300. dataset_1 = dataset_1.batch(BATCH_SIZE, drop_remainder=True)
  301. for (img_0, _), (img_1, _) in zip(dataset_0.create_tuple_iterator(num_epochs=1, output_numpy=True),
  302. dataset_1.create_tuple_iterator(num_epochs=1, output_numpy=True)):
  303. np.testing.assert_almost_equal(img_0, img_1, decimal=6)
  304. break
  305. if __name__ == "__main__":
  306. test_offload()
  307. test_auto_offload()
  308. test_offload_column_validation()
  309. test_offload_column_mapping()
  310. test_offload_multi_column()
  311. test_offload_concat_dataset_1()
  312. test_offload_concat_dataset_2()
  313. test_offload_normalize_op()
  314. test_offload_rescale_op()
  315. test_offload_typecast_op()
  316. test_offload_different_column_end_of_pipeline()
  317. test_offload_not_end_of_pipeline()
  318. test_offload_dim_check()
  319. test_offload_random_sharpness_op()