!32534 [MD][Autotune] Add rank to filename

Merge pull request !32534 from harshvardhangupta/rank_id_suffix_at_config
4 years ago · 0af661659c
--- a/mindspore/ccsrc/minddata/dataset/api/python/bindings/dataset/core/bindings.cc
+++ b/mindspore/ccsrc/minddata/dataset/api/python/bindings/dataset/core/bindings.cc
@@ -46,6 +46,7 @@ PYBIND_REGISTER(ConfigManager, 0, ([](const py::module *m) {
                    .def("get_op_connector_size", &ConfigManager::op_connector_size)
                    .def("get_seed", &ConfigManager::seed)
                    .def("set_rank_id", &ConfigManager::set_rank_id)
                    .def("get_rank_id", &ConfigManager::rank_id)
                    .def("get_worker_connector_size", &ConfigManager::worker_connector_size)
                    .def("set_auto_num_workers", &ConfigManager::set_auto_num_workers)
                    .def("set_auto_worker_config", &ConfigManager::set_auto_worker_config_)
--- a/mindspore/ccsrc/minddata/dataset/core/config_manager.cc
+++ b/mindspore/ccsrc/minddata/dataset/core/config_manager.cc
@@ -215,7 +215,7 @@ Status ConfigManager::set_enable_autotune(bool enable, bool save_autoconfig, con
  }

  // Save the final AutoTune configuration JSON filepath name
  autotune_json_filepath_ = std::move(json_filepath);
  autotune_json_filepath_ = json_filepath;
  return Status::OK();
 }

--- a/mindspore/python/mindspore/dataset/core/config.py
+++ b/mindspore/python/mindspore/dataset/core/config.py
@@ -25,6 +25,7 @@ Common imported modules in corresponding API examples are as follows:
 import os
 import platform
 import random
 from pathlib import Path
 import numpy
 import mindspore._c_dataengine as cde
 from mindspore import log as logger
@@ -436,7 +437,7 @@ def load(file):
    _config.load(file)


 def set_enable_autotune(enable, json_filepath=None):
 def set_enable_autotune(enable, json_filepath_prefix=None):
    """
    Set whether to enable AutoTune. AutoTune is disabled by default.

@@ -449,7 +450,10 @@ def set_enable_autotune(enable, json_filepath=None):

    Args:
        enable (bool): Whether to enable AutoTune.
        json_filepath (str, optional): The filepath to save the optimized global configuration.
        json_filepath_prefix (str, optional): The prefix filepath to save the optimized global configuration.
            The rank id and the json extension will be appended to the json_filepath_prefix string.
            For example, if json_filepath_prefix="/path/to/some/dir/prefixname" and rank_id is 1, then the path
            of the generated file will be "/path/to/some/dir/prefixname_1.json"
            If the file already exists, it will be automatically overwritten. Default: None,
            means not to save the configuration file, but the tuned result still can be checked through INFO log.

@@ -465,7 +469,7 @@ def set_enable_autotune(enable, json_filepath=None):
        - When `enable` is False, `json_filepath` will be ignored.
        - The JSON file can be loaded by API `mindspore.dataset.deserialize` to build a tuned pipeline.

    An example of the generated JSON file is as follow. "remark" file will conclude that if the dataset has been
    An example of the generated JSON file is as follows. "remark" file will conclude that if the dataset has been
    tuned or not. "summary" filed will show the tuned configuration of dataset pipeline. Users can modify scripts
    based on the tuned result.

@@ -494,25 +498,53 @@ def set_enable_autotune(enable, json_filepath=None):
    if not isinstance(enable, bool):
        raise TypeError("enable must be of type bool.")

    save_autoconfig = bool(enable and json_filepath is not None)
    save_autoconfig = bool(enable and json_filepath_prefix is not None)

    if json_filepath and not isinstance(json_filepath, str):
        raise TypeError("json_filepath must be a str value but was: {}.".format(json_filepath))
    if json_filepath_prefix and not isinstance(json_filepath_prefix, str):
        raise TypeError("json_filepath must be a str value but was: {}.".format(json_filepath_prefix))

    if enable and json_filepath == "":
    if enable and json_filepath_prefix == "":
        raise RuntimeError("The value of json_filepath cannot be the empty string.")

    if not enable and json_filepath is not None:
    if not enable and json_filepath_prefix is not None:
        logger.warning("The value of json_filepath is ignored when enable is False.")

    if enable and json_filepath is None:
    if enable and json_filepath_prefix is None:
        logger.warning("Dataset AutoTune is enabled but no json path is specified, check INFO log for tuned result.")

    json_filepath = replace_none(json_filepath, "")
    json_filepath = replace_none(json_filepath_prefix, "")

    rank_id = _get_rank_id()

    path = Path(json_filepath).resolve()

    if not path.is_dir():
        filename_prefix = path.name
        # append rank_id and json extension
        filename = filename_prefix + "_" + rank_id + ".json"
        json_filepath = str(path.with_name(filename))

    _config.set_enable_autotune(enable, save_autoconfig, json_filepath)


 def _get_rank_id():
    """
    INTERNAL USE ONLY
    Get the rank id
    :return:rank_id
    """
    _init_device_info()
    rank_id = _config.get_rank_id()
    # default rank_id is -1 in ConfigManager
    if rank_id < 0:
        rank_id = os.getenv("RANK_ID")
        if not rank_id or not rank_id.isdigit():
            rank_id = "0"
    else:
        rank_id = str(rank_id)
    return rank_id


 def get_enable_autotune():
    """
    Get whether AutoTune is currently enabled.
--- a/tests/st/dataset/test_gpu_autotune.py
+++ b/tests/st/dataset/test_gpu_autotune.py
@@ -87,15 +87,19 @@ def test_autotune_train_simple_model(tmp_path):
    Expectation: Training and data deserialization completes successfully

    """
    rank_id = os.getenv("RANK_ID")
    if not rank_id or not rank_id.isdigit():
        rank_id = "0"

    original_seed = ds.config.get_seed()
    set_seed(1)
    context.set_context(mode=context.GRAPH_MODE, device_target="GPU")
    context.set_context(enable_graph_kernel=True)
    at_config_filename = "test_autotune_train_simple_model_at_config.json"
    at_config_filename = "test_autotune_train_simple_model_at_config"

    # Enable Dataset AutoTune
    original_autotune = ds.config.get_enable_autotune()
    ds.config.set_enable_autotune(True, str(tmp_path) + at_config_filename)
    ds.config.set_enable_autotune(True, str(tmp_path / at_config_filename))

    ds_train = create_dataset(os.path.join("/home/workspace/mindspore_dataset/mnist", "train"), 32)
    model = create_model()
@@ -108,7 +112,9 @@ def test_autotune_train_simple_model(tmp_path):

    ds.config.set_enable_autotune(False)

    ds_train_deserialized = ds.deserialize(json_filepath=str(tmp_path) + at_config_filename)
    file = tmp_path / (at_config_filename + "_" + rank_id + ".json")
    assert file.exists()
    ds_train_deserialized = ds.deserialize(json_filepath=str(file))

    num = 0
    for data1, data2 in zip(ds_train.create_dict_iterator(num_epochs=1, output_numpy=True),
--- a/tests/ut/python/dataset/test_autotune.py
+++ b/tests/ut/python/dataset/test_autotune.py
@@ -118,7 +118,7 @@ class TestAutotuneWithProfiler:
        ds.config.set_enable_autotune(False)

    @staticmethod
    def test_delayed_autotune_with_2_pipeline(capfd):
    def test_delayed_autotune_with_2_pipeline(tmp_path, capfd):
        """
        Feature: Autotuning
        Description: Test delayed Autotune with two pipelines
@@ -130,7 +130,7 @@ class TestAutotuneWithProfiler:
        data1 = data1.batch(32)
        itr1 = data1.create_dict_iterator(num_epochs=5)

        ds.config.set_enable_autotune(True)
        ds.config.set_enable_autotune(True, str(tmp_path / "file.json"))
        itr2 = data1.create_dict_iterator(num_epochs=5)
        ds.config.set_enable_autotune(False)

@@ -140,7 +140,7 @@ class TestAutotuneWithProfiler:
        err_out_log(out, err, False)

    @staticmethod
    def test_delayed_start_autotune_with_3_pipeline(capfd):
    def test_delayed_start_autotune_with_3_pipeline(tmp_path, capfd):
        """
        Feature: Autotuning
        Description: Test delayed Autotune and early stop with three pipelines
@@ -152,7 +152,7 @@ class TestAutotuneWithProfiler:
        data1 = data1.batch(32)
        itr1 = data1.create_dict_iterator(num_epochs=5)

        ds.config.set_enable_autotune(True)
        ds.config.set_enable_autotune(True, str(tmp_path / "file.json"))
        itr2 = data1.create_dict_iterator(num_epochs=5)
        ds.config.set_enable_autotune(False)

--- a/tests/ut/python/dataset/test_autotune_saveload.py
+++ b/tests/ut/python/dataset/test_autotune_saveload.py
@@ -15,8 +15,9 @@
 """
 Test Dataset AutoTune's Save and Load Configuration support
 """
 import os
 import json

 import random
 import numpy as np
 import pytest
 import mindspore.dataset as ds
@@ -43,6 +44,14 @@ class TestAutotuneSaveLoad:
    # Note: Use pytest fixture tmp_path to create files within this temporary directory,
    # which is automatically created for each test and deleted at the end of the test.

    @staticmethod
    def setup_method():
        os.environ['RANK_ID'] = str(random.randint(0, 9))

    @staticmethod
    def teardown_method():
        del os.environ['RANK_ID']

    @staticmethod
    def test_autotune_generator_pipeline(tmp_path):
        """
@@ -51,7 +60,7 @@ class TestAutotuneSaveLoad:
        Expectation: pipeline runs successfully
        """
        original_autotune = ds.config.get_enable_autotune()
        ds.config.set_enable_autotune(True, str(tmp_path / "test_autotune_generator_atfinal.json"))
        ds.config.set_enable_autotune(True, str(tmp_path / "test_autotune_generator_atfinal"))

        source = [(np.array([x]),) for x in range(1024)]
        data1 = ds.GeneratorDataset(source, ["data"])
@@ -64,6 +73,40 @@ class TestAutotuneSaveLoad:
        for _ in range(5):
            for _ in itr:
                pass
        del itr
        ds.config.set_enable_autotune(original_autotune)

        file = tmp_path / ("test_autotune_generator_atfinal_" + os.environ['RANK_ID'] + ".json")
        assert file.exists()

    @staticmethod
    def test_autotune_save_overwrite_generator(tmp_path):
        """
        Feature: Autotuning
        Description: Test set_enable_autotune and existing json_filepath is overwritten
        Expectation: set_enable_autotune() executes successfully with file-exist warning produced.
            Execution of 2nd pipeline overwrites AutoTune configuration file of 1st pipeline.
        """
        source = [(np.array([x]),) for x in range(1024)]

        at_final_json_filename = "test_autotune_save_overwrite_generator_atfinal.json"
        original_autotune = ds.config.get_enable_autotune()
        ds.config.set_enable_autotune(True, str(tmp_path / at_final_json_filename))

        data1 = ds.GeneratorDataset(source, ["data"])

        for _ in data1.create_dict_iterator(num_epochs=1, output_numpy=True):
            pass

        ds.config.set_enable_autotune(False)

        ds.config.set_enable_autotune(True, str(tmp_path) + at_final_json_filename)

        data2 = ds.GeneratorDataset(source, ["data"])
        data2 = data2.shuffle(64)

        for _ in data2.create_dict_iterator(num_epochs=1, output_numpy=True):
            pass

        ds.config.set_enable_autotune(original_autotune)

@@ -75,7 +118,7 @@ class TestAutotuneSaveLoad:
        Expectation: pipeline runs successfully
        """
        original_autotune = ds.config.get_enable_autotune()
        ds.config.set_enable_autotune(True, str(tmp_path / "test_autotune_mnist_pipeline_atfinal.json"))
        ds.config.set_enable_autotune(True, str(tmp_path / "test_autotune_mnist_pipeline_atfinal"))
        original_seed = ds.config.get_seed()
        ds.config.set_seed(1)

@@ -93,12 +136,12 @@ class TestAutotuneSaveLoad:
        ds.config.set_enable_autotune(original_autotune)

        # Confirm final AutoTune config file pipeline is identical to the serialized file pipeline.
        file1 = tmp_path / "test_autotune_mnist_pipeline_atfinal.json"
        file1 = tmp_path / ("test_autotune_mnist_pipeline_atfinal_" + os.environ['RANK_ID'] + ".json")
        file2 = tmp_path / "test_autotune_mnist_pipeline_serialized.json"
        assert data_pipeline_same(file1, file2)

        desdata1 = ds.deserialize(json_filepath=str(tmp_path / "test_autotune_mnist_pipeline_atfinal.json"))
        desdata2 = ds.deserialize(json_filepath=str(tmp_path / "test_autotune_mnist_pipeline_serialized.json"))
        desdata1 = ds.deserialize(json_filepath=str(file1))
        desdata2 = ds.deserialize(json_filepath=str(file2))

        num = 0
        for newdata1, newdata2 in zip(desdata1.create_dict_iterator(num_epochs=1, output_numpy=True),
@@ -111,35 +154,39 @@ class TestAutotuneSaveLoad:
        ds.config.set_seed(original_seed)

    @staticmethod
    def test_autotune_save_overwrite_generator(tmp_path):
    def test_autotune_warning_with_offload(tmp_path, capfd):
        """
        Feature: Autotuning
        Description: Test set_enable_autotune and existing json_filepath is overwritten
        Expectation: set_enable_autotune() executes successfully with file-exist warning produced.
            Execution of 2nd pipeline overwrites AutoTune configuration file of 1st pipeline.
        Description: Test autotune config saving with offload=True
        Expectation: Autotune should not write the config file and print a log message
        """
        source = [(np.array([x]),) for x in range(1024)]

        at_final_json_filename = "test_autotune_save_overwrite_generator_atfinal.json"
        original_seed = ds.config.get_seed()
        ds.config.set_seed(1)
        at_final_json_filename = "test_autotune_warning_with_offload_config.json"
        config_path = tmp_path / at_final_json_filename
        original_autotune = ds.config.get_enable_autotune()
        ds.config.set_enable_autotune(True, str(tmp_path / at_final_json_filename))
        ds.config.set_enable_autotune(True, str(config_path))

        data1 = ds.GeneratorDataset(source, ["data"])
        # Dataset with offload activated.
        dataset = ds.ImageFolderDataset(DATA_DIR, num_samples=8)
        dataset = dataset.map(operations=[c_vision.Decode()], input_columns="image")
        dataset = dataset.map(operations=[c_vision.HWC2CHW()], input_columns="image", offload=True)
        dataset = dataset.batch(8, drop_remainder=True)

        for _ in data1.create_dict_iterator(num_epochs=1, output_numpy=True):
        for _ in dataset.create_tuple_iterator(num_epochs=1, output_numpy=True):
            pass

        ds.config.set_enable_autotune(False)

        ds.config.set_enable_autotune(True, str(tmp_path) + at_final_json_filename)
        _, err = capfd.readouterr()

        data2 = ds.GeneratorDataset(source, ["data"])
        data2 = data2.shuffle(64)
        assert "Some nodes have been offloaded. AutoTune is unable to write the autotune configuration to disk. " \
               "Disable offload to prevent this from happening." in err

        for _ in data2.create_dict_iterator(num_epochs=1, output_numpy=True):
            pass
        with pytest.raises(FileNotFoundError):
            with open(config_path) as _:
                pass

        ds.config.set_enable_autotune(original_autotune)
        ds.config.set_seed(original_seed)

    @staticmethod
    def test_autotune_save_overwrite_mnist(tmp_path):
@@ -151,7 +198,7 @@ class TestAutotuneSaveLoad:
        """
        original_seed = ds.config.get_seed()
        ds.config.set_seed(1)
        at_final_json_filename = "test_autotune_save_overwrite_mnist_atfinal.json"
        at_final_json_filename = "test_autotune_save_overwrite_mnist_atfinal"

        # Pipeline#1
        original_autotune = ds.config.get_enable_autotune()
@@ -185,7 +232,7 @@ class TestAutotuneSaveLoad:
        ds.config.set_enable_autotune(False)

        # Confirm 2nd serialized file is identical to final AutoTune config file.
        file1 = tmp_path / "test_autotune_save_overwrite_mnist_atfinal.json"
        file1 = tmp_path / ("test_autotune_save_overwrite_mnist_atfinal_" + os.environ['RANK_ID'] + ".json")
        file2 = tmp_path / "test_autotune_save_overwrite_mnist_serialized2.json"
        assert data_pipeline_same(file1, file2)

@@ -196,38 +243,3 @@ class TestAutotuneSaveLoad:

        ds.config.set_seed(original_seed)
        ds.config.set_enable_autotune(original_autotune)

    @staticmethod
    def test_autotune_warning_with_offload(tmp_path, capfd):
        """
        Feature: Autotuning
        Description: Test autotune config saving with offload=True
        Expectation: Autotune should not write the config file and print a log message
        """
        original_seed = ds.config.get_seed()
        ds.config.set_seed(1)
        at_final_json_filename = "test_autotune_warning_with_offload_config.json"
        config_path = tmp_path / at_final_json_filename
        original_autotune = ds.config.get_enable_autotune()
        ds.config.set_enable_autotune(True, str(config_path))

        # Dataset with offload activated.
        dataset = ds.ImageFolderDataset(DATA_DIR)
        dataset = dataset.map(operations=[c_vision.Decode()], input_columns="image")
        dataset = dataset.map(operations=[c_vision.HWC2CHW()], input_columns="image", offload=True)
        dataset = dataset.batch(8, drop_remainder=True)

        for _ in dataset.create_tuple_iterator(num_epochs=1, output_numpy=True):
            pass

        _, err = capfd.readouterr()

        assert "Some nodes have been offloaded. AutoTune is unable to write the autotune configuration to disk. " \
               "Disable offload to prevent this from happening." in err

        with pytest.raises(FileNotFoundError):
            with open(config_path) as _:
                pass

        ds.config.set_enable_autotune(original_autotune)
        ds.config.set_seed(original_seed)