# Copyright 2021 Huawei Technologies Co., Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== import numpy as np import pytest import mindspore.dataset as ds import mindspore.common.dtype as mstype import mindspore.dataset.vision.c_transforms as C import mindspore.dataset.transforms.c_transforms as C2 DATA_DIR = "../data/dataset/testPK/data" def test_offload(): """ Feature: test map offload flag. Description: Input is image dataset. Expectation: Output should be same with activated or deactivated offload. """ # Dataset with offload activated. dataset_0 = ds.ImageFolderDataset(DATA_DIR) dataset_0 = dataset_0.map(operations=[C.Decode()], input_columns="image") dataset_0 = dataset_0.map(operations=[C.HWC2CHW()], input_columns="image", offload=True) dataset_0 = dataset_0.batch(8, drop_remainder=True) # Dataset with offload not activated. dataset_1 = ds.ImageFolderDataset(DATA_DIR) dataset_1 = dataset_1.map(operations=[C.Decode()], input_columns="image") dataset_1 = dataset_1.map(operations=[C.HWC2CHW()], input_columns="image") dataset_1 = dataset_1.batch(8, drop_remainder=True) for (img_0, _), (img_1, _) in zip(dataset_0.create_tuple_iterator(num_epochs=1, output_numpy=True), dataset_1.create_tuple_iterator(num_epochs=1, output_numpy=True)): np.testing.assert_array_equal(img_0, img_1) def test_auto_offload(): """ Feature: Test auto_offload config option. Description: Input is image dataset. Expectation: Output should same with auto_offload activated and deactivated. """ trans = [C.Decode(), C.HWC2CHW()] # Enable automatic offload ds.config.set_auto_offload(True) # Dataset with offload deactivated dataset_auto_disabled = ds.ImageFolderDataset(DATA_DIR) dataset_auto_disabled = dataset_auto_disabled.map(operations=trans, input_columns="image", offload=False) dataset_auto_disabled = dataset_auto_disabled.batch(8, drop_remainder=True) # Dataset with config.auto_offload activated dataset_auto_enabled = ds.ImageFolderDataset(DATA_DIR) dataset_auto_enabled = dataset_auto_enabled.map(operations=trans, input_columns="image") dataset_auto_enabled = dataset_auto_enabled.batch(8, drop_remainder=True) for (img_0, _), (img_1, _) in zip(dataset_auto_disabled.create_tuple_iterator(num_epochs=1, output_numpy=True), dataset_auto_enabled.create_tuple_iterator(num_epochs=1, output_numpy=True)): np.testing.assert_array_equal(img_0, img_1) # Need to turn off here or subsequent test cases will fail. ds.config.set_auto_offload(False) def test_offload_column_validation(): """ Feature: Test the column validation for offloaded map operations Description: Input is an image dataset, but the input column is incorrect for the offloaded map operation. Expectation: Should raise RuntimeError. """ dataset = ds.ImageFolderDataset(DATA_DIR) dataset = dataset.map(operations=[C.Decode()], input_columns="image") # Use invalid input column name dataset = dataset.map(operations=[C.HWC2CHW()], input_columns="fake_column", offload=True) dataset = dataset.batch(8, drop_remainder=True) error_msg = "The following input column(s) for an offloaded map operation do not exist: [\'fake_column\']" with pytest.raises(RuntimeError) as excinfo: for (_, _) in dataset.create_tuple_iterator(num_epochs=1, output_numpy=True): continue assert str(excinfo.value) == error_msg def test_offload_concat_dataset_1(): """ Feature: test map offload flag for concatenated dataset. Description: Input is image dataset. Expectation: Should raise RuntimeError. """ # Dataset with offload activated. dataset_0 = ds.ImageFolderDataset(DATA_DIR) dataset_0 = dataset_0.map(operations=[C.Decode()], input_columns="image") dataset_0 = dataset_0.map(operations=[C.HWC2CHW()], input_columns="image", offload=True) dataset_0 = dataset_0.batch(8, drop_remainder=True) # Dataset with offload not activated. dataset_1 = ds.ImageFolderDataset(DATA_DIR) dataset_1 = dataset_1.map(operations=[C.Decode()], input_columns="image") dataset_1 = dataset_1.map(operations=[C.HWC2CHW()], input_columns="image") dataset_1 = dataset_1.batch(8, drop_remainder=True) dataset_concat = dataset_0 + dataset_1 error_msg = "Offload module currently does not support concatenated or zipped datasets." with pytest.raises(RuntimeError, match=error_msg): for (_, _) in dataset_concat.create_tuple_iterator(num_epochs=1, output_numpy=True): continue def test_offload_concat_dataset_2(): """ Feature: test map offload flag for concatenated dataset. Description: Input is image dataset. Expectation: Should raise RuntimeError. """ # Dataset with offload activated. dataset_0 = ds.ImageFolderDataset(DATA_DIR) dataset_0 = dataset_0.map(operations=[C.Decode()], input_columns="image") dataset_0 = dataset_0.map(operations=[C.HWC2CHW()], input_columns="image", offload=True) # Dataset with offload not activated. dataset_1 = ds.ImageFolderDataset(DATA_DIR) dataset_1 = dataset_1.map(operations=[C.Decode()], input_columns="image") dataset_1 = dataset_1.map(operations=[C.HWC2CHW()], input_columns="image") dataset_concat = dataset_0 + dataset_1 dataset_concat = dataset_concat.batch(8, drop_remainder=True) error_msg = "Offload module currently does not support concatenated or zipped datasets." with pytest.raises(RuntimeError, match=error_msg): for (_, _) in dataset_concat.create_tuple_iterator(num_epochs=1, output_numpy=True): continue def test_offload_normalize_op(): """ Feature: test map offload Normalize op. Description: Input is image dataset. Expectation: Output should be same with activated or deactivated offload for Normalize op. """ mean = [0.485 * 255, 0.456 * 255, 0.406 * 255] std = [0.229 * 255, 0.224 * 255, 0.225 * 255] # Dataset with offload activated. dataset_0 = ds.ImageFolderDataset(DATA_DIR) dataset_0 = dataset_0.map(operations=[C.Decode()], input_columns="image") dataset_0 = dataset_0.map(operations=[C.Normalize(mean=mean, std=std)], input_columns="image", offload=True) dataset_0 = dataset_0.map(operations=[C.HWC2CHW()], input_columns="image", offload=True) dataset_0 = dataset_0.batch(8, drop_remainder=True) # Dataset with offload not activated. dataset_1 = ds.ImageFolderDataset(DATA_DIR) dataset_1 = dataset_1.map(operations=[C.Decode()], input_columns="image") dataset_1 = dataset_1.map(operations=[C.Normalize(mean=mean, std=std)], input_columns="image") dataset_1 = dataset_1.map(operations=[C.HWC2CHW()], input_columns="image") dataset_1 = dataset_1.batch(8, drop_remainder=True) for (img_0, _), (img_1, _) in zip(dataset_0.create_tuple_iterator(num_epochs=1, output_numpy=True), dataset_1.create_tuple_iterator(num_epochs=1, output_numpy=True)): np.testing.assert_almost_equal(img_0, img_1, decimal=6) def test_offload_rescale_op(): """ Feature: test map offload Rescale op. Description: Input is image dataset. Expectation: Output should be same with activated or deactivated offload for Rescale op. """ rescale = 1.0 / 255.0 shift = 0.0 # Dataset with offload activated. dataset_0 = ds.ImageFolderDataset(DATA_DIR) dataset_0 = dataset_0.map(operations=[C.Decode()], input_columns="image") dataset_0 = dataset_0.map(operations=[C.Rescale(rescale, shift)], input_columns="image", offload=True) dataset_0 = dataset_0.map(operations=[C.HWC2CHW()], input_columns="image", offload=True) dataset_0 = dataset_0.batch(8, drop_remainder=True) # Dataset with offload not activated. dataset_1 = ds.ImageFolderDataset(DATA_DIR) dataset_1 = dataset_1.map(operations=[C.Decode()], input_columns="image") dataset_1 = dataset_1.map(operations=[C.Rescale(rescale, shift)], input_columns="image") dataset_1 = dataset_1.map(operations=[C.HWC2CHW()], input_columns="image") dataset_1 = dataset_1.batch(8, drop_remainder=True) for (img_0, _), (img_1, _) in zip(dataset_0.create_tuple_iterator(num_epochs=1, output_numpy=True), dataset_1.create_tuple_iterator(num_epochs=1, output_numpy=True)): np.testing.assert_almost_equal(img_0, img_1, decimal=6) def test_offload_different_column_end_of_pipeline(): """ Feature: Test offload end_of_pipeline check. Description: Input is image dataset. Expectation: The image map op gets offloaded even though it comes before the not-offloaded label map op, since the end_of_pipeline check looks at columns separately. """ image_trans = [C.Decode(), C.HWC2CHW()] ds.config.set_auto_offload(True) dataset_0 = ds.ImageFolderDataset(DATA_DIR) dataset_0 = dataset_0.map(operations=image_trans, input_columns="image") dataset_0 = dataset_0.map(operations=[C2.TypeCast(mstype.int32)], input_columns="label", offload=False) data_iterator = dataset_0.create_tuple_iterator(num_epochs=1, output_numpy=True) # Assert at least one operation has been offloaded np.testing.assert_(len(data_iterator.offload_model.transform_list[0].me_ops) > 0) ds.config.set_auto_offload(False) def test_offload_not_end_of_pipeline(): """ Feature: Test offload end_of_pipeline check. Description: Input is image dataset. Expectation: No operations are offloaded, since the image map op at the end of the pipeline has the offload flag set to False. """ dataset_0 = ds.ImageFolderDataset(DATA_DIR) dataset_0 = dataset_0.map(operations=[C.Decode()], input_columns="image", offload=True) dataset_0 = dataset_0.map(operations=[C.RandomHorizontalFlip(prob=0.5)], input_columns="image", offload=True) dataset_0 = dataset_0.map(operations=[C.HWC2CHW()], input_columns="image", offload=False) dataset_0 = dataset_0.map(operations=[C2.TypeCast(mstype.int32)], input_columns="label", offload=False) data_iterator = dataset_0.create_tuple_iterator(num_epochs=1, output_numpy=True) # Assert no operations are set to be offloaded np.testing.assert_(data_iterator.offload_model is None) def test_offload_dim_check(): """ Feature: test input has the required number of dimensions for offload operation. Description: Input is image dataset. Expectation: Should raise ValueError. """ # Dataset with offload activated. dataset = ds.ImageFolderDataset(DATA_DIR) dataset = dataset.map(operations=[C.Decode()], input_columns="image") dataset = dataset.map(operations=[C.HWC2CHW()], input_columns="image", offload=True) error_msg = "For HwcToChw offload operation, the dimension of input should be 4, but got 3." with pytest.raises(ValueError, match=error_msg): for (_, _) in dataset.create_tuple_iterator(num_epochs=1, output_numpy=True): continue if __name__ == "__main__": test_offload() test_auto_offload() test_offload_column_validation() test_offload_concat_dataset_1() test_offload_concat_dataset_2() test_offload_normalize_op() test_offload_rescale_op() test_offload_different_column_end_of_pipeline() test_offload_not_end_of_pipeline() test_offload_dim_check()