# Copyright 2022 Huawei Technologies Co., Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """ Test Dataset AutoTune's Save and Load Configuration support """ import os import json import random import numpy as np import pytest import mindspore.dataset as ds import mindspore.dataset.transforms.c_transforms as c_transforms import mindspore.dataset.vision.c_transforms as c_vision MNIST_DATA_DIR = "../data/dataset/testMnistData" DATA_DIR = "../data/dataset/testPK/data" def data_pipeline_same(file1, file2): assert file1.exists() assert file2.exists() with file1.open() as f1, file2.open() as f2: pipeline1 = json.load(f1) pipeline1 = pipeline1["tree"] if "tree" in pipeline1 else pipeline1 pipeline2 = json.load(f2) pipeline2 = pipeline2["tree"] if "tree" in pipeline2 else pipeline2 return pipeline1 == pipeline2 @pytest.mark.forked class TestAutotuneSaveLoad: # Note: Use pytest fixture tmp_path to create files within this temporary directory, # which is automatically created for each test and deleted at the end of the test. @staticmethod def setup_method(): os.environ['RANK_ID'] = str(random.randint(0, 9)) @staticmethod def teardown_method(): del os.environ['RANK_ID'] @staticmethod def test_autotune_generator_pipeline(tmp_path): """ Feature: Autotuning Description: Test save final config with GeneratorDataset pipeline: Generator -> Shuffle -> Batch Expectation: pipeline runs successfully """ original_autotune = ds.config.get_enable_autotune() ds.config.set_enable_autotune(True, str(tmp_path / "test_autotune_generator_atfinal")) source = [(np.array([x]),) for x in range(1024)] data1 = ds.GeneratorDataset(source, ["data"]) data1 = data1.shuffle(64) data1 = data1.batch(32) ds.serialize(data1, str(tmp_path / "test_autotune_generator_serialized.json")) itr = data1.create_dict_iterator(num_epochs=5) for _ in range(5): for _ in itr: pass del itr ds.config.set_enable_autotune(original_autotune) file = tmp_path / ("test_autotune_generator_atfinal_" + os.environ['RANK_ID'] + ".json") assert file.exists() @staticmethod def test_autotune_save_overwrite_generator(tmp_path): """ Feature: Autotuning Description: Test set_enable_autotune and existing json_filepath is overwritten Expectation: set_enable_autotune() executes successfully with file-exist warning produced. Execution of 2nd pipeline overwrites AutoTune configuration file of 1st pipeline. """ source = [(np.array([x]),) for x in range(1024)] at_final_json_filename = "test_autotune_save_overwrite_generator_atfinal.json" original_autotune = ds.config.get_enable_autotune() ds.config.set_enable_autotune(True, str(tmp_path / at_final_json_filename)) data1 = ds.GeneratorDataset(source, ["data"]) for _ in data1.create_dict_iterator(num_epochs=1, output_numpy=True): pass ds.config.set_enable_autotune(False) ds.config.set_enable_autotune(True, str(tmp_path) + at_final_json_filename) data2 = ds.GeneratorDataset(source, ["data"]) data2 = data2.shuffle(64) for _ in data2.create_dict_iterator(num_epochs=1, output_numpy=True): pass ds.config.set_enable_autotune(original_autotune) @staticmethod def test_autotune_mnist_pipeline(tmp_path): """ Feature: Autotuning Description: Test save final config with Mnist pipeline: Mnist -> Batch -> Map Expectation: pipeline runs successfully """ original_autotune = ds.config.get_enable_autotune() ds.config.set_enable_autotune(True, str(tmp_path / "test_autotune_mnist_pipeline_atfinal")) original_seed = ds.config.get_seed() ds.config.set_seed(1) data1 = ds.MnistDataset(MNIST_DATA_DIR, num_samples=100) one_hot_encode = c_transforms.OneHot(10) # num_classes is input argument data1 = data1.map(operations=one_hot_encode, input_columns="label") data1 = data1.batch(batch_size=10, drop_remainder=True) ds.serialize(data1, str(tmp_path / "test_autotune_mnist_pipeline_serialized.json")) for _ in data1.create_dict_iterator(num_epochs=1, output_numpy=True): pass ds.config.set_enable_autotune(original_autotune) # Confirm final AutoTune config file pipeline is identical to the serialized file pipeline. file1 = tmp_path / ("test_autotune_mnist_pipeline_atfinal_" + os.environ['RANK_ID'] + ".json") file2 = tmp_path / "test_autotune_mnist_pipeline_serialized.json" assert data_pipeline_same(file1, file2) desdata1 = ds.deserialize(json_filepath=str(file1)) desdata2 = ds.deserialize(json_filepath=str(file2)) num = 0 for newdata1, newdata2 in zip(desdata1.create_dict_iterator(num_epochs=1, output_numpy=True), desdata2.create_dict_iterator(num_epochs=1, output_numpy=True)): np.testing.assert_array_equal(newdata1['image'], newdata2['image']) np.testing.assert_array_equal(newdata1['label'], newdata2['label']) num += 1 assert num == 10 ds.config.set_seed(original_seed) @staticmethod def test_autotune_warning_with_offload(tmp_path, capfd): """ Feature: Autotuning Description: Test autotune config saving with offload=True Expectation: Autotune should not write the config file and print a log message """ original_seed = ds.config.get_seed() ds.config.set_seed(1) at_final_json_filename = "test_autotune_warning_with_offload_config.json" config_path = tmp_path / at_final_json_filename original_autotune = ds.config.get_enable_autotune() ds.config.set_enable_autotune(True, str(config_path)) # Dataset with offload activated. dataset = ds.ImageFolderDataset(DATA_DIR, num_samples=8) dataset = dataset.map(operations=[c_vision.Decode()], input_columns="image") dataset = dataset.map(operations=[c_vision.HWC2CHW()], input_columns="image", offload=True) dataset = dataset.batch(8, drop_remainder=True) for _ in dataset.create_tuple_iterator(num_epochs=1, output_numpy=True): pass _, err = capfd.readouterr() assert "Some nodes have been offloaded. AutoTune is unable to write the autotune configuration to disk. " \ "Disable offload to prevent this from happening." in err with pytest.raises(FileNotFoundError): with open(config_path) as _: pass ds.config.set_enable_autotune(original_autotune) ds.config.set_seed(original_seed) @staticmethod def test_autotune_save_overwrite_mnist(tmp_path): """ Feature: Autotuning Description: Test set_enable_autotune and existing json_filepath is overwritten Expectation: set_enable_autotune() executes successfully with file-exist warning produced. Execution of 2nd pipeline overwrites AutoTune configuration file of 1st pipeline. """ original_seed = ds.config.get_seed() ds.config.set_seed(1) at_final_json_filename = "test_autotune_save_overwrite_mnist_atfinal" # Pipeline#1 original_autotune = ds.config.get_enable_autotune() ds.config.set_enable_autotune(True, str(tmp_path / at_final_json_filename)) data1 = ds.MnistDataset(MNIST_DATA_DIR, num_samples=100) one_hot_encode = c_transforms.OneHot(10) # num_classes is input argument data1 = data1.map(operations=one_hot_encode, input_columns="label") data1 = data1.batch(batch_size=10, drop_remainder=True) ds.serialize(data1, str(tmp_path / "test_autotune_save_overwrite_mnist_serialized1.json")) for _ in data1.create_dict_iterator(num_epochs=1, output_numpy=True): pass ds.config.set_enable_autotune(False) # Pipeline#2 ds.config.set_enable_autotune(True, str(tmp_path / at_final_json_filename)) data1 = ds.MnistDataset(MNIST_DATA_DIR, num_samples=200) data1 = data1.map(operations=one_hot_encode, input_columns="label") data1 = data1.shuffle(40) data1 = data1.batch(batch_size=20, drop_remainder=False) ds.serialize(data1, str(tmp_path / "test_autotune_save_overwrite_mnist_serialized2.json")) for _ in data1.create_dict_iterator(num_epochs=1, output_numpy=True): pass ds.config.set_enable_autotune(False) # Confirm 2nd serialized file is identical to final AutoTune config file. file1 = tmp_path / ("test_autotune_save_overwrite_mnist_atfinal_" + os.environ['RANK_ID'] + ".json") file2 = tmp_path / "test_autotune_save_overwrite_mnist_serialized2.json" assert data_pipeline_same(file1, file2) # Confirm the serialized files for the 2 different pipelines are different file1 = tmp_path / "test_autotune_save_overwrite_mnist_serialized1.json" file2 = tmp_path / "test_autotune_save_overwrite_mnist_serialized2.json" assert not data_pipeline_same(file1, file2) ds.config.set_seed(original_seed) ds.config.set_enable_autotune(original_autotune)