|
|
|
@@ -15,8 +15,9 @@ |
|
|
|
""" |
|
|
|
Test Dataset AutoTune's Save and Load Configuration support |
|
|
|
""" |
|
|
|
import os |
|
|
|
import json |
|
|
|
|
|
|
|
import random |
|
|
|
import numpy as np |
|
|
|
import pytest |
|
|
|
import mindspore.dataset as ds |
|
|
|
@@ -43,6 +44,14 @@ class TestAutotuneSaveLoad: |
|
|
|
# Note: Use pytest fixture tmp_path to create files within this temporary directory, |
|
|
|
# which is automatically created for each test and deleted at the end of the test. |
|
|
|
|
|
|
|
@staticmethod |
|
|
|
def setup_method(): |
|
|
|
os.environ['RANK_ID'] = str(random.randint(0, 9)) |
|
|
|
|
|
|
|
@staticmethod |
|
|
|
def teardown_method(): |
|
|
|
del os.environ['RANK_ID'] |
|
|
|
|
|
|
|
@staticmethod |
|
|
|
def test_autotune_generator_pipeline(tmp_path): |
|
|
|
""" |
|
|
|
@@ -51,7 +60,7 @@ class TestAutotuneSaveLoad: |
|
|
|
Expectation: pipeline runs successfully |
|
|
|
""" |
|
|
|
original_autotune = ds.config.get_enable_autotune() |
|
|
|
ds.config.set_enable_autotune(True, str(tmp_path / "test_autotune_generator_atfinal.json")) |
|
|
|
ds.config.set_enable_autotune(True, str(tmp_path / "test_autotune_generator_atfinal")) |
|
|
|
|
|
|
|
source = [(np.array([x]),) for x in range(1024)] |
|
|
|
data1 = ds.GeneratorDataset(source, ["data"]) |
|
|
|
@@ -64,6 +73,40 @@ class TestAutotuneSaveLoad: |
|
|
|
for _ in range(5): |
|
|
|
for _ in itr: |
|
|
|
pass |
|
|
|
del itr |
|
|
|
ds.config.set_enable_autotune(original_autotune) |
|
|
|
|
|
|
|
file = tmp_path / ("test_autotune_generator_atfinal_" + os.environ['RANK_ID'] + ".json") |
|
|
|
assert file.exists() |
|
|
|
|
|
|
|
@staticmethod |
|
|
|
def test_autotune_save_overwrite_generator(tmp_path): |
|
|
|
""" |
|
|
|
Feature: Autotuning |
|
|
|
Description: Test set_enable_autotune and existing json_filepath is overwritten |
|
|
|
Expectation: set_enable_autotune() executes successfully with file-exist warning produced. |
|
|
|
Execution of 2nd pipeline overwrites AutoTune configuration file of 1st pipeline. |
|
|
|
""" |
|
|
|
source = [(np.array([x]),) for x in range(1024)] |
|
|
|
|
|
|
|
at_final_json_filename = "test_autotune_save_overwrite_generator_atfinal.json" |
|
|
|
original_autotune = ds.config.get_enable_autotune() |
|
|
|
ds.config.set_enable_autotune(True, str(tmp_path / at_final_json_filename)) |
|
|
|
|
|
|
|
data1 = ds.GeneratorDataset(source, ["data"]) |
|
|
|
|
|
|
|
for _ in data1.create_dict_iterator(num_epochs=1, output_numpy=True): |
|
|
|
pass |
|
|
|
|
|
|
|
ds.config.set_enable_autotune(False) |
|
|
|
|
|
|
|
ds.config.set_enable_autotune(True, str(tmp_path) + at_final_json_filename) |
|
|
|
|
|
|
|
data2 = ds.GeneratorDataset(source, ["data"]) |
|
|
|
data2 = data2.shuffle(64) |
|
|
|
|
|
|
|
for _ in data2.create_dict_iterator(num_epochs=1, output_numpy=True): |
|
|
|
pass |
|
|
|
|
|
|
|
ds.config.set_enable_autotune(original_autotune) |
|
|
|
|
|
|
|
@@ -75,7 +118,7 @@ class TestAutotuneSaveLoad: |
|
|
|
Expectation: pipeline runs successfully |
|
|
|
""" |
|
|
|
original_autotune = ds.config.get_enable_autotune() |
|
|
|
ds.config.set_enable_autotune(True, str(tmp_path / "test_autotune_mnist_pipeline_atfinal.json")) |
|
|
|
ds.config.set_enable_autotune(True, str(tmp_path / "test_autotune_mnist_pipeline_atfinal")) |
|
|
|
original_seed = ds.config.get_seed() |
|
|
|
ds.config.set_seed(1) |
|
|
|
|
|
|
|
@@ -93,12 +136,12 @@ class TestAutotuneSaveLoad: |
|
|
|
ds.config.set_enable_autotune(original_autotune) |
|
|
|
|
|
|
|
# Confirm final AutoTune config file pipeline is identical to the serialized file pipeline. |
|
|
|
file1 = tmp_path / "test_autotune_mnist_pipeline_atfinal.json" |
|
|
|
file1 = tmp_path / ("test_autotune_mnist_pipeline_atfinal_" + os.environ['RANK_ID'] + ".json") |
|
|
|
file2 = tmp_path / "test_autotune_mnist_pipeline_serialized.json" |
|
|
|
assert data_pipeline_same(file1, file2) |
|
|
|
|
|
|
|
desdata1 = ds.deserialize(json_filepath=str(tmp_path / "test_autotune_mnist_pipeline_atfinal.json")) |
|
|
|
desdata2 = ds.deserialize(json_filepath=str(tmp_path / "test_autotune_mnist_pipeline_serialized.json")) |
|
|
|
desdata1 = ds.deserialize(json_filepath=str(file1)) |
|
|
|
desdata2 = ds.deserialize(json_filepath=str(file2)) |
|
|
|
|
|
|
|
num = 0 |
|
|
|
for newdata1, newdata2 in zip(desdata1.create_dict_iterator(num_epochs=1, output_numpy=True), |
|
|
|
@@ -111,35 +154,39 @@ class TestAutotuneSaveLoad: |
|
|
|
ds.config.set_seed(original_seed) |
|
|
|
|
|
|
|
@staticmethod |
|
|
|
def test_autotune_save_overwrite_generator(tmp_path): |
|
|
|
def test_autotune_warning_with_offload(tmp_path, capfd): |
|
|
|
""" |
|
|
|
Feature: Autotuning |
|
|
|
Description: Test set_enable_autotune and existing json_filepath is overwritten |
|
|
|
Expectation: set_enable_autotune() executes successfully with file-exist warning produced. |
|
|
|
Execution of 2nd pipeline overwrites AutoTune configuration file of 1st pipeline. |
|
|
|
Description: Test autotune config saving with offload=True |
|
|
|
Expectation: Autotune should not write the config file and print a log message |
|
|
|
""" |
|
|
|
source = [(np.array([x]),) for x in range(1024)] |
|
|
|
|
|
|
|
at_final_json_filename = "test_autotune_save_overwrite_generator_atfinal.json" |
|
|
|
original_seed = ds.config.get_seed() |
|
|
|
ds.config.set_seed(1) |
|
|
|
at_final_json_filename = "test_autotune_warning_with_offload_config.json" |
|
|
|
config_path = tmp_path / at_final_json_filename |
|
|
|
original_autotune = ds.config.get_enable_autotune() |
|
|
|
ds.config.set_enable_autotune(True, str(tmp_path / at_final_json_filename)) |
|
|
|
ds.config.set_enable_autotune(True, str(config_path)) |
|
|
|
|
|
|
|
data1 = ds.GeneratorDataset(source, ["data"]) |
|
|
|
# Dataset with offload activated. |
|
|
|
dataset = ds.ImageFolderDataset(DATA_DIR, num_samples=8) |
|
|
|
dataset = dataset.map(operations=[c_vision.Decode()], input_columns="image") |
|
|
|
dataset = dataset.map(operations=[c_vision.HWC2CHW()], input_columns="image", offload=True) |
|
|
|
dataset = dataset.batch(8, drop_remainder=True) |
|
|
|
|
|
|
|
for _ in data1.create_dict_iterator(num_epochs=1, output_numpy=True): |
|
|
|
for _ in dataset.create_tuple_iterator(num_epochs=1, output_numpy=True): |
|
|
|
pass |
|
|
|
|
|
|
|
ds.config.set_enable_autotune(False) |
|
|
|
|
|
|
|
ds.config.set_enable_autotune(True, str(tmp_path) + at_final_json_filename) |
|
|
|
_, err = capfd.readouterr() |
|
|
|
|
|
|
|
data2 = ds.GeneratorDataset(source, ["data"]) |
|
|
|
data2 = data2.shuffle(64) |
|
|
|
assert "Some nodes have been offloaded. AutoTune is unable to write the autotune configuration to disk. " \ |
|
|
|
"Disable offload to prevent this from happening." in err |
|
|
|
|
|
|
|
for _ in data2.create_dict_iterator(num_epochs=1, output_numpy=True): |
|
|
|
pass |
|
|
|
with pytest.raises(FileNotFoundError): |
|
|
|
with open(config_path) as _: |
|
|
|
pass |
|
|
|
|
|
|
|
ds.config.set_enable_autotune(original_autotune) |
|
|
|
ds.config.set_seed(original_seed) |
|
|
|
|
|
|
|
@staticmethod |
|
|
|
def test_autotune_save_overwrite_mnist(tmp_path): |
|
|
|
@@ -151,7 +198,7 @@ class TestAutotuneSaveLoad: |
|
|
|
""" |
|
|
|
original_seed = ds.config.get_seed() |
|
|
|
ds.config.set_seed(1) |
|
|
|
at_final_json_filename = "test_autotune_save_overwrite_mnist_atfinal.json" |
|
|
|
at_final_json_filename = "test_autotune_save_overwrite_mnist_atfinal" |
|
|
|
|
|
|
|
# Pipeline#1 |
|
|
|
original_autotune = ds.config.get_enable_autotune() |
|
|
|
@@ -185,7 +232,7 @@ class TestAutotuneSaveLoad: |
|
|
|
ds.config.set_enable_autotune(False) |
|
|
|
|
|
|
|
# Confirm 2nd serialized file is identical to final AutoTune config file. |
|
|
|
file1 = tmp_path / "test_autotune_save_overwrite_mnist_atfinal.json" |
|
|
|
file1 = tmp_path / ("test_autotune_save_overwrite_mnist_atfinal_" + os.environ['RANK_ID'] + ".json") |
|
|
|
file2 = tmp_path / "test_autotune_save_overwrite_mnist_serialized2.json" |
|
|
|
assert data_pipeline_same(file1, file2) |
|
|
|
|
|
|
|
@@ -196,38 +243,3 @@ class TestAutotuneSaveLoad: |
|
|
|
|
|
|
|
ds.config.set_seed(original_seed) |
|
|
|
ds.config.set_enable_autotune(original_autotune) |
|
|
|
|
|
|
|
@staticmethod |
|
|
|
def test_autotune_warning_with_offload(tmp_path, capfd): |
|
|
|
""" |
|
|
|
Feature: Autotuning |
|
|
|
Description: Test autotune config saving with offload=True |
|
|
|
Expectation: Autotune should not write the config file and print a log message |
|
|
|
""" |
|
|
|
original_seed = ds.config.get_seed() |
|
|
|
ds.config.set_seed(1) |
|
|
|
at_final_json_filename = "test_autotune_warning_with_offload_config.json" |
|
|
|
config_path = tmp_path / at_final_json_filename |
|
|
|
original_autotune = ds.config.get_enable_autotune() |
|
|
|
ds.config.set_enable_autotune(True, str(config_path)) |
|
|
|
|
|
|
|
# Dataset with offload activated. |
|
|
|
dataset = ds.ImageFolderDataset(DATA_DIR) |
|
|
|
dataset = dataset.map(operations=[c_vision.Decode()], input_columns="image") |
|
|
|
dataset = dataset.map(operations=[c_vision.HWC2CHW()], input_columns="image", offload=True) |
|
|
|
dataset = dataset.batch(8, drop_remainder=True) |
|
|
|
|
|
|
|
for _ in dataset.create_tuple_iterator(num_epochs=1, output_numpy=True): |
|
|
|
pass |
|
|
|
|
|
|
|
_, err = capfd.readouterr() |
|
|
|
|
|
|
|
assert "Some nodes have been offloaded. AutoTune is unable to write the autotune configuration to disk. " \ |
|
|
|
"Disable offload to prevent this from happening." in err |
|
|
|
|
|
|
|
with pytest.raises(FileNotFoundError): |
|
|
|
with open(config_path) as _: |
|
|
|
pass |
|
|
|
|
|
|
|
ds.config.set_enable_autotune(original_autotune) |
|
|
|
ds.config.set_seed(original_seed) |