# Copyright 2021-2022 Huawei Technologies Co., Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== import numpy as np import pytest import mindspore.dataset as ds import mindspore.common.dtype as mstype import mindspore.dataset.vision.c_transforms as C import mindspore.dataset.transforms.c_transforms as C2 DATA_DIR = "../data/dataset/testPK/data" BATCH_SIZE = 2 def test_offload(): """ Feature: test map offload flag. Description: Input is image dataset. Expectation: Output should be same with activated or deactivated offload. """ # Dataset with offload activated. dataset_0 = ds.ImageFolderDataset(DATA_DIR) dataset_0 = dataset_0.map(operations=[C.Decode()], input_columns="image") dataset_0 = dataset_0.map(operations=[C.HWC2CHW()], input_columns="image", offload=True) dataset_0 = dataset_0.batch(BATCH_SIZE, drop_remainder=True) # Dataset with offload not activated. dataset_1 = ds.ImageFolderDataset(DATA_DIR) dataset_1 = dataset_1.map(operations=[C.Decode()], input_columns="image") dataset_1 = dataset_1.map(operations=[C.HWC2CHW()], input_columns="image") dataset_1 = dataset_1.batch(BATCH_SIZE, drop_remainder=True) for (img_0, _), (img_1, _) in zip(dataset_0.create_tuple_iterator(num_epochs=1, output_numpy=True), dataset_1.create_tuple_iterator(num_epochs=1, output_numpy=True)): np.testing.assert_array_equal(img_0, img_1) break def test_auto_offload(): """ Feature: Test auto_offload config option. Description: Input is image dataset. Expectation: Output should same with auto_offload activated and deactivated. """ trans = [C.Decode(), C.HWC2CHW()] # Enable automatic offload ds.config.set_auto_offload(True) # Dataset with offload deactivated dataset_auto_disabled = ds.ImageFolderDataset(DATA_DIR) dataset_auto_disabled = dataset_auto_disabled.map(operations=trans, input_columns="image", offload=False) dataset_auto_disabled = dataset_auto_disabled.batch(BATCH_SIZE, drop_remainder=True) # Dataset with config.auto_offload activated dataset_auto_enabled = ds.ImageFolderDataset(DATA_DIR) dataset_auto_enabled = dataset_auto_enabled.map(operations=trans, input_columns="image") dataset_auto_enabled = dataset_auto_enabled.batch(BATCH_SIZE, drop_remainder=True) for (img_0, _), (img_1, _) in zip(dataset_auto_disabled.create_tuple_iterator(num_epochs=1, output_numpy=True), dataset_auto_enabled.create_tuple_iterator(num_epochs=1, output_numpy=True)): np.testing.assert_array_equal(img_0, img_1) break # Need to turn off here or subsequent test cases will fail. ds.config.set_auto_offload(False) def test_offload_column_validation(): """ Feature: Test the column validation for offloaded map operations Description: Input is an image dataset, but the input column is incorrect for the offloaded map operation. Expectation: Should raise RuntimeError. """ dataset = ds.ImageFolderDataset(DATA_DIR) dataset = dataset.map(operations=[C.Decode()], input_columns="image") # Use invalid input column name dataset = dataset.map(operations=[C.HWC2CHW()], input_columns="fake_column", offload=True) dataset = dataset.batch(BATCH_SIZE, drop_remainder=True) error_msg = "The following input column(s) for an offloaded map operation do not exist: [\'fake_column\']" with pytest.raises(RuntimeError) as excinfo: for (_, _) in dataset.create_tuple_iterator(num_epochs=1, output_numpy=True): continue assert str(excinfo.value) == error_msg def test_offload_multi_column(): """ Feature: Test the offload functionality with datasets with more than 2 columns. Description: Input is an image dataset, copy the image column and apply map operations to both images. Expectation: Output should be same with both offload activated and deactivated. """ def copy_column(x, y): return x, x, y dataset = ds.ImageFolderDataset(DATA_DIR) dataset = dataset.map(operations=copy_column, input_columns=["image", "label"], output_columns=["image1", "image2", "label"], column_order=["image1", "image2", "label"]) dataset = dataset.map(operations=[C.Decode()], input_columns="image1") dataset = dataset.map(operations=[C.HWC2CHW()], input_columns="image1") dataset = dataset.map(operations=[C.Decode()], input_columns="image2") dataset = dataset.map(operations=[C.HWC2CHW()], input_columns="image2") dataset = dataset.batch(BATCH_SIZE, drop_remainder=True) dataset_offload = ds.ImageFolderDataset(DATA_DIR) dataset_offload = dataset_offload.map(operations=copy_column, input_columns=["image", "label"], output_columns=["image1", "image2", "label"], column_order=["image1", "image2", "label"]) dataset_offload = dataset_offload.map(operations=[C.Decode()], input_columns="image1") dataset_offload = dataset_offload.map(operations=[C.HWC2CHW()], input_columns="image1", offload=True) dataset_offload = dataset_offload.map(operations=[C.Decode()], input_columns="image2") dataset_offload = dataset_offload.map(operations=[C.HWC2CHW()], input_columns="image2", offload=True) dataset_offload = dataset_offload.batch(BATCH_SIZE, drop_remainder=True) for (img1, img2, _), (img1_offload, img2_offload, _) in \ zip(dataset.create_tuple_iterator(num_epochs=1, output_numpy=True), dataset_offload.create_tuple_iterator(num_epochs=1, output_numpy=True)): np.testing.assert_array_equal(img1, img1_offload) np.testing.assert_array_equal(img2, img2_offload) break def test_offload_column_mapping(): """ Feature: Test the dataset column mapping for offloaded operations Description: Input is an image dataset, copy the image column, then apply offload to only copied column. Expectation: The offload model dataset column index value is 1 (second column). """ def copy_column(x, y): return x, x, y dataset = ds.ImageFolderDataset(DATA_DIR) dataset = dataset.map(operations=copy_column, input_columns=["image", "label"], output_columns=["image1", "image2", "label"], column_order=["image1", "image2", "label"]) dataset = dataset.map(operations=[C.Decode()], input_columns="image2") dataset = dataset.map(operations=[C.HWC2CHW()], input_columns="image2", offload=True) dataset_iterator = dataset.create_tuple_iterator(num_epochs=1, output_numpy=True) offload_col_idxs = dataset_iterator.offload_model.transform_list[0].col_idxs # assert there is only one column index in the offload model, and that it is 1 (second column) np.testing.assert_((len(offload_col_idxs) == 1) and (offload_col_idxs[0] == 1)) def test_offload_concat_dataset_1(): """ Feature: test map offload flag for concatenated dataset. Description: Input is image dataset. Expectation: Should raise RuntimeError. """ # Dataset with offload activated. dataset_0 = ds.ImageFolderDataset(DATA_DIR) dataset_0 = dataset_0.map(operations=[C.Decode()], input_columns="image") dataset_0 = dataset_0.map(operations=[C.HWC2CHW()], input_columns="image", offload=True) dataset_0 = dataset_0.batch(BATCH_SIZE, drop_remainder=True) # Dataset with offload not activated. dataset_1 = ds.ImageFolderDataset(DATA_DIR) dataset_1 = dataset_1.map(operations=[C.Decode()], input_columns="image") dataset_1 = dataset_1.map(operations=[C.HWC2CHW()], input_columns="image") dataset_1 = dataset_1.batch(BATCH_SIZE, drop_remainder=True) dataset_concat = dataset_0 + dataset_1 error_msg = "Offload module currently does not support concatenated or zipped datasets." with pytest.raises(RuntimeError, match=error_msg): for (_, _) in dataset_concat.create_tuple_iterator(num_epochs=1, output_numpy=True): continue def test_offload_concat_dataset_2(): """ Feature: test map offload flag for concatenated dataset. Description: Input is image dataset. Expectation: Should raise RuntimeError. """ # Dataset with offload activated. dataset_0 = ds.ImageFolderDataset(DATA_DIR) dataset_0 = dataset_0.map(operations=[C.Decode()], input_columns="image") dataset_0 = dataset_0.map(operations=[C.HWC2CHW()], input_columns="image", offload=True) # Dataset with offload not activated. dataset_1 = ds.ImageFolderDataset(DATA_DIR) dataset_1 = dataset_1.map(operations=[C.Decode()], input_columns="image") dataset_1 = dataset_1.map(operations=[C.HWC2CHW()], input_columns="image") dataset_concat = dataset_0 + dataset_1 dataset_concat = dataset_concat.batch(BATCH_SIZE, drop_remainder=True) error_msg = "Offload module currently does not support concatenated or zipped datasets." with pytest.raises(RuntimeError, match=error_msg): for (_, _) in dataset_concat.create_tuple_iterator(num_epochs=1, output_numpy=True): continue def test_offload_normalize_op(): """ Feature: test map offload Normalize op. Description: Input is image dataset. Expectation: Output should be same with activated or deactivated offload for Normalize op. """ mean = [0.485 * 255, 0.456 * 255, 0.406 * 255] std = [0.229 * 255, 0.224 * 255, 0.225 * 255] # Dataset with offload activated. dataset_0 = ds.ImageFolderDataset(DATA_DIR) dataset_0 = dataset_0.map(operations=[C.Decode()], input_columns="image") dataset_0 = dataset_0.map(operations=[C.Normalize(mean=mean, std=std)], input_columns="image", offload=True) dataset_0 = dataset_0.map(operations=[C.HWC2CHW()], input_columns="image", offload=True) dataset_0 = dataset_0.batch(BATCH_SIZE, drop_remainder=True) # Dataset with offload not activated. dataset_1 = ds.ImageFolderDataset(DATA_DIR) dataset_1 = dataset_1.map(operations=[C.Decode()], input_columns="image") dataset_1 = dataset_1.map(operations=[C.Normalize(mean=mean, std=std)], input_columns="image") dataset_1 = dataset_1.map(operations=[C.HWC2CHW()], input_columns="image") dataset_1 = dataset_1.batch(BATCH_SIZE, drop_remainder=True) for (img_0, _), (img_1, _) in zip(dataset_0.create_tuple_iterator(num_epochs=1, output_numpy=True), dataset_1.create_tuple_iterator(num_epochs=1, output_numpy=True)): np.testing.assert_almost_equal(img_0, img_1, decimal=6) break def test_offload_rescale_op(): """ Feature: test map offload Rescale op. Description: Input is image dataset. Expectation: Output should be same with activated or deactivated offload for Rescale op. """ rescale = 1.0 / 255.0 shift = 0.0 # Dataset with offload activated. dataset_0 = ds.ImageFolderDataset(DATA_DIR) dataset_0 = dataset_0.map(operations=[C.Decode()], input_columns="image") dataset_0 = dataset_0.map(operations=[C.Rescale(rescale, shift)], input_columns="image", offload=True) dataset_0 = dataset_0.map(operations=[C.HWC2CHW()], input_columns="image", offload=True) dataset_0 = dataset_0.batch(BATCH_SIZE, drop_remainder=True) # Dataset with offload not activated. dataset_1 = ds.ImageFolderDataset(DATA_DIR) dataset_1 = dataset_1.map(operations=[C.Decode()], input_columns="image") dataset_1 = dataset_1.map(operations=[C.Rescale(rescale, shift)], input_columns="image") dataset_1 = dataset_1.map(operations=[C.HWC2CHW()], input_columns="image") dataset_1 = dataset_1.batch(BATCH_SIZE, drop_remainder=True) for (img_0, _), (img_1, _) in zip(dataset_0.create_tuple_iterator(num_epochs=1, output_numpy=True), dataset_1.create_tuple_iterator(num_epochs=1, output_numpy=True)): np.testing.assert_almost_equal(img_0, img_1, decimal=6) break def test_offload_typecast_op(): """ Feature: test map offload TypeCast op. Description: Input is image dataset. Expectation: Output should be the same with activated or deactivated offload for TypeCast op. """ # Dataset without offload activated. ds_baseline = ds.ImageFolderDataset(DATA_DIR) ds_baseline = ds_baseline.map(operations=[C.Decode(), C2.TypeCast(mstype.float32)], input_columns="image") ds_baseline = ds_baseline.map(operations=[C2.TypeCast(mstype.int32)], input_columns="label") # Dataset with offload activated. ds_offload = ds.ImageFolderDataset(DATA_DIR) ds_offload = ds_offload.map(operations=[C.Decode(), C2.TypeCast(mstype.float32)], input_columns="image", offload=True) ds_offload = ds_offload.map(operations=[C2.TypeCast(mstype.int32)], input_columns="label", offload=True) for (img_0, _), (img_1, _) in zip(ds_baseline.create_tuple_iterator(num_epochs=1, output_numpy=True), ds_offload.create_tuple_iterator(num_epochs=1, output_numpy=True)): np.testing.assert_almost_equal(img_0, img_1, decimal=6) def test_offload_different_column_end_of_pipeline(): """ Feature: Test offload end_of_pipeline check. Description: Input is image dataset. Expectation: The image map op gets offloaded even though it comes before the not-offloaded label map op, since the end_of_pipeline check looks at columns separately. """ image_trans = [C.Decode(), C.HWC2CHW()] ds.config.set_auto_offload(True) dataset_0 = ds.ImageFolderDataset(DATA_DIR) dataset_0 = dataset_0.map(operations=image_trans, input_columns="image") dataset_0 = dataset_0.map(operations=[C2.TypeCast(mstype.int32)], input_columns="label", offload=False) data_iterator = dataset_0.create_tuple_iterator(num_epochs=1, output_numpy=True) # Assert at least one operation has been offloaded np.testing.assert_(len(data_iterator.offload_model.transform_list[0].me_ops) > 0) ds.config.set_auto_offload(False) def test_offload_not_end_of_pipeline(): """ Feature: Test offload end_of_pipeline check. Description: Input is image dataset. Expectation: No operations are offloaded, since the image map op at the end of the pipeline has the offload flag set to False. """ dataset_0 = ds.ImageFolderDataset(DATA_DIR) dataset_0 = dataset_0.map(operations=[C.Decode()], input_columns="image", offload=True) dataset_0 = dataset_0.map(operations=[C.RandomHorizontalFlip(prob=0.5)], input_columns="image", offload=True) dataset_0 = dataset_0.map(operations=[C.HWC2CHW()], input_columns="image", offload=False) dataset_0 = dataset_0.map(operations=[C2.TypeCast(mstype.int32)], input_columns="label", offload=False) data_iterator = dataset_0.create_tuple_iterator(num_epochs=1, output_numpy=True) # Assert no operations are set to be offloaded np.testing.assert_(data_iterator.offload_model is None) def test_offload_dim_check(): """ Feature: test input has the required number of dimensions for offload operation. Description: Input is image dataset. Expectation: Should raise ValueError. """ # Dataset with offload activated. dataset = ds.ImageFolderDataset(DATA_DIR) dataset = dataset.map(operations=[C.Decode()], input_columns="image") dataset = dataset.map(operations=[C.HWC2CHW()], input_columns="image", offload=True) error_msg = "For HwcToChw offload operation, the dimension of input should be 4, but got 3." with pytest.raises(ValueError, match=error_msg): for (_, _) in dataset.create_tuple_iterator(num_epochs=1, output_numpy=True): continue def test_offload_random_sharpness_op(): """ Feature: test map offload RandomSharpness op. Description: Input is image dataset. Expectation: Output should be same with activated or deactivated offload for RandomSharpness op. """ # Dataset with offload activated. dataset_0 = ds.ImageFolderDataset(DATA_DIR) dataset_0 = dataset_0.map(operations=[C.Decode()], input_columns="image") dataset_0 = dataset_0.map(operations=[C.RandomSharpness(degrees=[1.0, 1.0])], input_columns="image", offload=True) dataset_0 = dataset_0.map(operations=[C.HWC2CHW()], input_columns="image", offload=True) dataset_0 = dataset_0.batch(BATCH_SIZE, drop_remainder=True) # Dataset with offload not activated. dataset_1 = ds.ImageFolderDataset(DATA_DIR) dataset_1 = dataset_1.map(operations=[C.Decode()], input_columns="image") dataset_1 = dataset_1.map(operations=[C.RandomSharpness(degrees=[1.0, 1.0])], input_columns="image") dataset_1 = dataset_1.map(operations=[C.HWC2CHW()], input_columns="image") dataset_1 = dataset_1.batch(BATCH_SIZE, drop_remainder=True) for (img_0, _), (img_1, _) in zip(dataset_0.create_tuple_iterator(num_epochs=1, output_numpy=True), dataset_1.create_tuple_iterator(num_epochs=1, output_numpy=True)): np.testing.assert_almost_equal(img_0, img_1, decimal=6) break if __name__ == "__main__": test_offload() test_auto_offload() test_offload_column_validation() test_offload_column_mapping() test_offload_multi_column() test_offload_concat_dataset_1() test_offload_concat_dataset_2() test_offload_normalize_op() test_offload_rescale_op() test_offload_typecast_op() test_offload_different_column_end_of_pipeline() test_offload_not_end_of_pipeline() test_offload_dim_check() test_offload_random_sharpness_op()