|
- # Copyright 2021-2022 Huawei Technologies Co., Ltd
- #
- # Licensed under the Apache License, Version 2.0 (the "License");
- # you may not use this file except in compliance with the License.
- # You may obtain a copy of the License at
- #
- # http://www.apache.org/licenses/LICENSE-2.0
- #
- # Unless required by applicable law or agreed to in writing, software
- # distributed under the License is distributed on an "AS IS" BASIS,
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- # See the License for the specific language governing permissions and
- # limitations under the License.
- # ==============================================================================
- import numpy as np
- import pytest
-
- import mindspore.dataset as ds
- import mindspore.common.dtype as mstype
- import mindspore.dataset.vision.c_transforms as C
- import mindspore.dataset.transforms.c_transforms as C2
-
-
- DATA_DIR = "../data/dataset/testPK/data"
- BATCH_SIZE = 2
-
-
- def test_offload():
- """
- Feature: test map offload flag.
- Description: Input is image dataset.
- Expectation: Output should be same with activated or deactivated offload.
- """
- # Dataset with offload activated.
- dataset_0 = ds.ImageFolderDataset(DATA_DIR)
- dataset_0 = dataset_0.map(operations=[C.Decode()], input_columns="image")
- dataset_0 = dataset_0.map(operations=[C.HWC2CHW()], input_columns="image", offload=True)
- dataset_0 = dataset_0.batch(BATCH_SIZE, drop_remainder=True)
-
- # Dataset with offload not activated.
- dataset_1 = ds.ImageFolderDataset(DATA_DIR)
- dataset_1 = dataset_1.map(operations=[C.Decode()], input_columns="image")
- dataset_1 = dataset_1.map(operations=[C.HWC2CHW()], input_columns="image")
- dataset_1 = dataset_1.batch(BATCH_SIZE, drop_remainder=True)
-
- for (img_0, _), (img_1, _) in zip(dataset_0.create_tuple_iterator(num_epochs=1, output_numpy=True),
- dataset_1.create_tuple_iterator(num_epochs=1, output_numpy=True)):
- np.testing.assert_array_equal(img_0, img_1)
- break
-
-
- def test_auto_offload():
- """
- Feature: Test auto_offload config option.
- Description: Input is image dataset.
- Expectation: Output should same with auto_offload activated and deactivated.
- """
- trans = [C.Decode(), C.HWC2CHW()]
-
- # Enable automatic offload
- ds.config.set_auto_offload(True)
-
- # Dataset with offload deactivated
- dataset_auto_disabled = ds.ImageFolderDataset(DATA_DIR)
- dataset_auto_disabled = dataset_auto_disabled.map(operations=trans, input_columns="image", offload=False)
- dataset_auto_disabled = dataset_auto_disabled.batch(BATCH_SIZE, drop_remainder=True)
-
- # Dataset with config.auto_offload activated
- dataset_auto_enabled = ds.ImageFolderDataset(DATA_DIR)
- dataset_auto_enabled = dataset_auto_enabled.map(operations=trans, input_columns="image")
- dataset_auto_enabled = dataset_auto_enabled.batch(BATCH_SIZE, drop_remainder=True)
-
- for (img_0, _), (img_1, _) in zip(dataset_auto_disabled.create_tuple_iterator(num_epochs=1, output_numpy=True),
- dataset_auto_enabled.create_tuple_iterator(num_epochs=1, output_numpy=True)):
- np.testing.assert_array_equal(img_0, img_1)
- break
-
- # Need to turn off here or subsequent test cases will fail.
- ds.config.set_auto_offload(False)
-
-
- def test_offload_column_validation():
- """
- Feature: Test the column validation for offloaded map operations
- Description: Input is an image dataset, but the input column is incorrect for the offloaded map operation.
- Expectation: Should raise RuntimeError.
- """
- dataset = ds.ImageFolderDataset(DATA_DIR)
- dataset = dataset.map(operations=[C.Decode()], input_columns="image")
- # Use invalid input column name
- dataset = dataset.map(operations=[C.HWC2CHW()], input_columns="fake_column", offload=True)
- dataset = dataset.batch(BATCH_SIZE, drop_remainder=True)
-
- error_msg = "The following input column(s) for an offloaded map operation do not exist: [\'fake_column\']"
- with pytest.raises(RuntimeError) as excinfo:
- for (_, _) in dataset.create_tuple_iterator(num_epochs=1, output_numpy=True):
- continue
- assert str(excinfo.value) == error_msg
-
-
- def test_offload_multi_column():
- """
- Feature: Test the offload functionality with datasets with more than 2 columns.
- Description: Input is an image dataset, copy the image column and apply map operations to both images.
- Expectation: Output should be same with both offload activated and deactivated.
- """
- def copy_column(x, y):
- return x, x, y
-
- dataset = ds.ImageFolderDataset(DATA_DIR)
- dataset = dataset.map(operations=copy_column, input_columns=["image", "label"],
- output_columns=["image1", "image2", "label"],
- column_order=["image1", "image2", "label"])
- dataset = dataset.map(operations=[C.Decode()], input_columns="image1")
- dataset = dataset.map(operations=[C.HWC2CHW()], input_columns="image1")
- dataset = dataset.map(operations=[C.Decode()], input_columns="image2")
- dataset = dataset.map(operations=[C.HWC2CHW()], input_columns="image2")
- dataset = dataset.batch(BATCH_SIZE, drop_remainder=True)
-
- dataset_offload = ds.ImageFolderDataset(DATA_DIR)
- dataset_offload = dataset_offload.map(operations=copy_column, input_columns=["image", "label"],
- output_columns=["image1", "image2", "label"],
- column_order=["image1", "image2", "label"])
- dataset_offload = dataset_offload.map(operations=[C.Decode()], input_columns="image1")
- dataset_offload = dataset_offload.map(operations=[C.HWC2CHW()], input_columns="image1", offload=True)
- dataset_offload = dataset_offload.map(operations=[C.Decode()], input_columns="image2")
- dataset_offload = dataset_offload.map(operations=[C.HWC2CHW()], input_columns="image2", offload=True)
- dataset_offload = dataset_offload.batch(BATCH_SIZE, drop_remainder=True)
-
- for (img1, img2, _), (img1_offload, img2_offload, _) in \
- zip(dataset.create_tuple_iterator(num_epochs=1, output_numpy=True),
- dataset_offload.create_tuple_iterator(num_epochs=1, output_numpy=True)):
- np.testing.assert_array_equal(img1, img1_offload)
- np.testing.assert_array_equal(img2, img2_offload)
- break
-
-
- def test_offload_column_mapping():
- """
- Feature: Test the dataset column mapping for offloaded operations
- Description: Input is an image dataset, copy the image column, then apply offload to only copied column.
- Expectation: The offload model dataset column index value is 1 (second column).
- """
- def copy_column(x, y):
- return x, x, y
-
- dataset = ds.ImageFolderDataset(DATA_DIR)
- dataset = dataset.map(operations=copy_column, input_columns=["image", "label"],
- output_columns=["image1", "image2", "label"], column_order=["image1", "image2", "label"])
- dataset = dataset.map(operations=[C.Decode()], input_columns="image2")
- dataset = dataset.map(operations=[C.HWC2CHW()], input_columns="image2", offload=True)
-
- dataset_iterator = dataset.create_tuple_iterator(num_epochs=1, output_numpy=True)
-
- offload_col_idxs = dataset_iterator.offload_model.transform_list[0].col_idxs
- # assert there is only one column index in the offload model, and that it is 1 (second column)
- np.testing.assert_((len(offload_col_idxs) == 1) and (offload_col_idxs[0] == 1))
-
-
- def test_offload_concat_dataset_1():
- """
- Feature: test map offload flag for concatenated dataset.
- Description: Input is image dataset.
- Expectation: Should raise RuntimeError.
- """
- # Dataset with offload activated.
- dataset_0 = ds.ImageFolderDataset(DATA_DIR)
- dataset_0 = dataset_0.map(operations=[C.Decode()], input_columns="image")
- dataset_0 = dataset_0.map(operations=[C.HWC2CHW()], input_columns="image", offload=True)
- dataset_0 = dataset_0.batch(BATCH_SIZE, drop_remainder=True)
-
- # Dataset with offload not activated.
- dataset_1 = ds.ImageFolderDataset(DATA_DIR)
- dataset_1 = dataset_1.map(operations=[C.Decode()], input_columns="image")
- dataset_1 = dataset_1.map(operations=[C.HWC2CHW()], input_columns="image")
- dataset_1 = dataset_1.batch(BATCH_SIZE, drop_remainder=True)
-
- dataset_concat = dataset_0 + dataset_1
-
- error_msg = "Offload module currently does not support concatenated or zipped datasets."
- with pytest.raises(RuntimeError, match=error_msg):
- for (_, _) in dataset_concat.create_tuple_iterator(num_epochs=1, output_numpy=True):
- continue
-
-
- def test_offload_concat_dataset_2():
- """
- Feature: test map offload flag for concatenated dataset.
- Description: Input is image dataset.
- Expectation: Should raise RuntimeError.
- """
- # Dataset with offload activated.
- dataset_0 = ds.ImageFolderDataset(DATA_DIR)
- dataset_0 = dataset_0.map(operations=[C.Decode()], input_columns="image")
- dataset_0 = dataset_0.map(operations=[C.HWC2CHW()], input_columns="image", offload=True)
-
- # Dataset with offload not activated.
- dataset_1 = ds.ImageFolderDataset(DATA_DIR)
- dataset_1 = dataset_1.map(operations=[C.Decode()], input_columns="image")
- dataset_1 = dataset_1.map(operations=[C.HWC2CHW()], input_columns="image")
-
- dataset_concat = dataset_0 + dataset_1
- dataset_concat = dataset_concat.batch(BATCH_SIZE, drop_remainder=True)
-
- error_msg = "Offload module currently does not support concatenated or zipped datasets."
- with pytest.raises(RuntimeError, match=error_msg):
- for (_, _) in dataset_concat.create_tuple_iterator(num_epochs=1, output_numpy=True):
- continue
-
-
- def test_offload_normalize_op():
- """
- Feature: test map offload Normalize op.
- Description: Input is image dataset.
- Expectation: Output should be same with activated or deactivated offload for Normalize op.
- """
- mean = [0.485 * 255, 0.456 * 255, 0.406 * 255]
- std = [0.229 * 255, 0.224 * 255, 0.225 * 255]
-
- # Dataset with offload activated.
- dataset_0 = ds.ImageFolderDataset(DATA_DIR)
- dataset_0 = dataset_0.map(operations=[C.Decode()], input_columns="image")
- dataset_0 = dataset_0.map(operations=[C.Normalize(mean=mean, std=std)], input_columns="image", offload=True)
- dataset_0 = dataset_0.map(operations=[C.HWC2CHW()], input_columns="image", offload=True)
- dataset_0 = dataset_0.batch(BATCH_SIZE, drop_remainder=True)
-
- # Dataset with offload not activated.
- dataset_1 = ds.ImageFolderDataset(DATA_DIR)
- dataset_1 = dataset_1.map(operations=[C.Decode()], input_columns="image")
- dataset_1 = dataset_1.map(operations=[C.Normalize(mean=mean, std=std)], input_columns="image")
- dataset_1 = dataset_1.map(operations=[C.HWC2CHW()], input_columns="image")
- dataset_1 = dataset_1.batch(BATCH_SIZE, drop_remainder=True)
-
- for (img_0, _), (img_1, _) in zip(dataset_0.create_tuple_iterator(num_epochs=1, output_numpy=True),
- dataset_1.create_tuple_iterator(num_epochs=1, output_numpy=True)):
- np.testing.assert_almost_equal(img_0, img_1, decimal=6)
- break
-
-
- def test_offload_rescale_op():
- """
- Feature: test map offload Rescale op.
- Description: Input is image dataset.
- Expectation: Output should be same with activated or deactivated offload for Rescale op.
- """
- rescale = 1.0 / 255.0
- shift = 0.0
-
- # Dataset with offload activated.
- dataset_0 = ds.ImageFolderDataset(DATA_DIR)
- dataset_0 = dataset_0.map(operations=[C.Decode()], input_columns="image")
- dataset_0 = dataset_0.map(operations=[C.Rescale(rescale, shift)], input_columns="image", offload=True)
- dataset_0 = dataset_0.map(operations=[C.HWC2CHW()], input_columns="image", offload=True)
- dataset_0 = dataset_0.batch(BATCH_SIZE, drop_remainder=True)
-
- # Dataset with offload not activated.
- dataset_1 = ds.ImageFolderDataset(DATA_DIR)
- dataset_1 = dataset_1.map(operations=[C.Decode()], input_columns="image")
- dataset_1 = dataset_1.map(operations=[C.Rescale(rescale, shift)], input_columns="image")
- dataset_1 = dataset_1.map(operations=[C.HWC2CHW()], input_columns="image")
- dataset_1 = dataset_1.batch(BATCH_SIZE, drop_remainder=True)
-
- for (img_0, _), (img_1, _) in zip(dataset_0.create_tuple_iterator(num_epochs=1, output_numpy=True),
- dataset_1.create_tuple_iterator(num_epochs=1, output_numpy=True)):
- np.testing.assert_almost_equal(img_0, img_1, decimal=6)
- break
-
-
- def test_offload_typecast_op():
- """
- Feature: test map offload TypeCast op.
- Description: Input is image dataset.
- Expectation: Output should be the same with activated or deactivated offload for TypeCast op.
- """
- # Dataset without offload activated.
- ds_baseline = ds.ImageFolderDataset(DATA_DIR)
- ds_baseline = ds_baseline.map(operations=[C.Decode(), C2.TypeCast(mstype.float32)], input_columns="image")
- ds_baseline = ds_baseline.map(operations=[C2.TypeCast(mstype.int32)], input_columns="label")
-
- # Dataset with offload activated.
- ds_offload = ds.ImageFolderDataset(DATA_DIR)
- ds_offload = ds_offload.map(operations=[C.Decode(), C2.TypeCast(mstype.float32)],
- input_columns="image", offload=True)
- ds_offload = ds_offload.map(operations=[C2.TypeCast(mstype.int32)], input_columns="label", offload=True)
-
- for (img_0, _), (img_1, _) in zip(ds_baseline.create_tuple_iterator(num_epochs=1, output_numpy=True),
- ds_offload.create_tuple_iterator(num_epochs=1, output_numpy=True)):
- np.testing.assert_almost_equal(img_0, img_1, decimal=6)
-
-
- def test_offload_different_column_end_of_pipeline():
- """
- Feature: Test offload end_of_pipeline check.
- Description: Input is image dataset.
- Expectation: The image map op gets offloaded even though it comes before the not-offloaded label map op, since
- the end_of_pipeline check looks at columns separately.
- """
- image_trans = [C.Decode(), C.HWC2CHW()]
- ds.config.set_auto_offload(True)
-
- dataset_0 = ds.ImageFolderDataset(DATA_DIR)
- dataset_0 = dataset_0.map(operations=image_trans, input_columns="image")
- dataset_0 = dataset_0.map(operations=[C2.TypeCast(mstype.int32)], input_columns="label", offload=False)
-
- data_iterator = dataset_0.create_tuple_iterator(num_epochs=1, output_numpy=True)
- # Assert at least one operation has been offloaded
- np.testing.assert_(len(data_iterator.offload_model.transform_list[0].me_ops) > 0)
-
- ds.config.set_auto_offload(False)
-
-
- def test_offload_not_end_of_pipeline():
- """
- Feature: Test offload end_of_pipeline check.
- Description: Input is image dataset.
- Expectation: No operations are offloaded, since the image map op at the end of the pipeline has the
- offload flag set to False.
- """
- dataset_0 = ds.ImageFolderDataset(DATA_DIR)
- dataset_0 = dataset_0.map(operations=[C.Decode()], input_columns="image", offload=True)
- dataset_0 = dataset_0.map(operations=[C.RandomHorizontalFlip(prob=0.5)], input_columns="image", offload=True)
- dataset_0 = dataset_0.map(operations=[C.HWC2CHW()], input_columns="image", offload=False)
-
- dataset_0 = dataset_0.map(operations=[C2.TypeCast(mstype.int32)], input_columns="label", offload=False)
-
- data_iterator = dataset_0.create_tuple_iterator(num_epochs=1, output_numpy=True)
- # Assert no operations are set to be offloaded
- np.testing.assert_(data_iterator.offload_model is None)
-
-
- def test_offload_dim_check():
- """
- Feature: test input has the required number of dimensions for offload operation.
- Description: Input is image dataset.
- Expectation: Should raise ValueError.
- """
- # Dataset with offload activated.
- dataset = ds.ImageFolderDataset(DATA_DIR)
- dataset = dataset.map(operations=[C.Decode()], input_columns="image")
- dataset = dataset.map(operations=[C.HWC2CHW()], input_columns="image", offload=True)
-
- error_msg = "For HwcToChw offload operation, the dimension of input should be 4, but got 3."
- with pytest.raises(ValueError, match=error_msg):
- for (_, _) in dataset.create_tuple_iterator(num_epochs=1, output_numpy=True):
- continue
-
-
- def test_offload_random_sharpness_op():
- """
- Feature: test map offload RandomSharpness op.
- Description: Input is image dataset.
- Expectation: Output should be same with activated or deactivated offload for RandomSharpness op.
- """
-
- # Dataset with offload activated.
- dataset_0 = ds.ImageFolderDataset(DATA_DIR)
- dataset_0 = dataset_0.map(operations=[C.Decode()], input_columns="image")
- dataset_0 = dataset_0.map(operations=[C.RandomSharpness(degrees=[1.0, 1.0])], input_columns="image", offload=True)
- dataset_0 = dataset_0.map(operations=[C.HWC2CHW()], input_columns="image", offload=True)
- dataset_0 = dataset_0.batch(BATCH_SIZE, drop_remainder=True)
-
- # Dataset with offload not activated.
- dataset_1 = ds.ImageFolderDataset(DATA_DIR)
- dataset_1 = dataset_1.map(operations=[C.Decode()], input_columns="image")
- dataset_1 = dataset_1.map(operations=[C.RandomSharpness(degrees=[1.0, 1.0])], input_columns="image")
- dataset_1 = dataset_1.map(operations=[C.HWC2CHW()], input_columns="image")
- dataset_1 = dataset_1.batch(BATCH_SIZE, drop_remainder=True)
-
- for (img_0, _), (img_1, _) in zip(dataset_0.create_tuple_iterator(num_epochs=1, output_numpy=True),
- dataset_1.create_tuple_iterator(num_epochs=1, output_numpy=True)):
- np.testing.assert_almost_equal(img_0, img_1, decimal=6)
- break
-
-
- if __name__ == "__main__":
- test_offload()
- test_auto_offload()
- test_offload_column_validation()
- test_offload_column_mapping()
- test_offload_multi_column()
- test_offload_concat_dataset_1()
- test_offload_concat_dataset_2()
- test_offload_normalize_op()
- test_offload_rescale_op()
- test_offload_typecast_op()
- test_offload_different_column_end_of_pipeline()
- test_offload_not_end_of_pipeline()
- test_offload_dim_check()
- test_offload_random_sharpness_op()
|