|
- # Copyright 2021 Huawei Technologies Co., Ltd
- #
- # Licensed under the Apache License, Version 2.0 (the "License");
- # you may not use this file except in compliance with the License.
- # You may obtain a copy of the License at
- #
- # http://www.apache.org/licenses/LICENSE-2.0
- #
- # Unless required by applicable law or agreed to in writing, software
- # distributed under the License is distributed on an "AS IS" BASIS,
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- # See the License for the specific language governing permissions and
- # limitations under the License.
- # ==============================================================================
- """
- Test MindData Profiling Analyzer Support
- """
- import csv
- import json
- import os
- import numpy as np
- import mindspore.common.dtype as mstype
- import mindspore.dataset as ds
- import mindspore.dataset.transforms.c_transforms as C
- from mindspore.profiler.parser.minddata_analyzer import MinddataProfilingAnalyzer
-
- PIPELINE_FILE = "./pipeline_profiling_0.json"
- CPU_UTIL_FILE = "./minddata_cpu_utilization_0.json"
- DATASET_ITERATOR_FILE = "./dataset_iterator_profiling_0.txt"
- SUMMARY_JSON_FILE = "./minddata_pipeline_summary_0.json"
- SUMMARY_CSV_FILE = "./minddata_pipeline_summary_0.csv"
- ANALYZE_FILE_PATH = "./"
-
- # This is the minimum subset of expected keys (in alphabetical order) in the MindData Analyzer summary output
- EXPECTED_SUMMARY_KEYS = ['avg_cpu_pct', 'children_ids', 'num_workers', 'op_ids', 'op_names', 'parent_id',
- 'per_batch_time', 'pipeline_ops', 'queue_average_size', 'queue_empty_freq_pct',
- 'queue_utilization_pct']
-
-
- def get_csv_result(file_pathname):
- """
- Get result from the CSV file.
-
- Args:
- file_pathname (str): The CSV file pathname.
-
- Returns:
- list[list], the parsed CSV information.
- """
- result = []
- with open(file_pathname, 'r') as csvfile:
- csv_reader = csv.reader(csvfile)
- for row in csv_reader:
- result.append(row)
- return result
-
-
- def delete_profiling_files():
- """
- Delete the MindData profiling files generated from the test.
- Also disable the MindData Profiling environment variables.
- """
- # Delete MindData profiling files
- os.remove(PIPELINE_FILE)
- os.remove(CPU_UTIL_FILE)
- os.remove(DATASET_ITERATOR_FILE)
-
- # Delete MindData profiling analyze summary files
- os.remove(SUMMARY_JSON_FILE)
- os.remove(SUMMARY_CSV_FILE)
-
- # Disable MindData Profiling environment variables
- del os.environ['PROFILING_MODE']
- del os.environ['MINDDATA_PROFILING_DIR']
- del os.environ['DEVICE_ID']
-
-
- def test_analyze_basic():
- """
- Test MindData profiling analyze summary files exist with basic pipeline.
- Also test basic content (subset of keys and values) from the returned summary result.
- """
- # Confirm MindData Profiling files do not yet exist
- assert os.path.exists(PIPELINE_FILE) is False
- assert os.path.exists(CPU_UTIL_FILE) is False
- assert os.path.exists(DATASET_ITERATOR_FILE) is False
- # Confirm MindData Profiling analyze summary files do not yet exist
- assert os.path.exists(SUMMARY_JSON_FILE) is False
- assert os.path.exists(SUMMARY_CSV_FILE) is False
-
- # Enable MindData Profiling environment variables
- os.environ['PROFILING_MODE'] = 'true'
- os.environ['MINDDATA_PROFILING_DIR'] = '.'
- os.environ['DEVICE_ID'] = '0'
-
- def source1():
- for i in range(8000):
- yield (np.array([i]),)
-
- try:
- # Create this basic and common linear pipeline
- # Generator -> Map -> Batch -> Repeat -> EpochCtrl
-
- data1 = ds.GeneratorDataset(source1, ["col1"])
- type_cast_op = C.TypeCast(mstype.int32)
- data1 = data1.map(operations=type_cast_op, input_columns="col1")
- data1 = data1.batch(16)
- data1 = data1.repeat(2)
-
- num_iter = 0
- # Note: If create_tuple_iterator() is called with num_epochs>1, then EpochCtrlOp is added to the pipeline
- for _ in data1.create_dict_iterator(num_epochs=2):
- num_iter = num_iter + 1
-
- # Confirm number of rows returned
- assert num_iter == 1000
-
- # Confirm MindData Profiling files are created
- assert os.path.exists(PIPELINE_FILE) is True
- assert os.path.exists(CPU_UTIL_FILE) is True
- assert os.path.exists(DATASET_ITERATOR_FILE) is True
-
- # Call MindData Analyzer for generated MindData profiling files to generate MindData pipeline summary result
- # Note: MindData Analyzer returns the result in 3 formats:
- # 1. returned dictionary
- # 2. JSON file
- # 3. CSV file
- md_analyzer = MinddataProfilingAnalyzer(ANALYZE_FILE_PATH, "CPU", 0, ANALYZE_FILE_PATH)
- md_summary_dict = md_analyzer.analyze()
-
- # Confirm MindData Profiling analyze summary files are created
- assert os.path.exists(SUMMARY_JSON_FILE) is True
- assert os.path.exists(SUMMARY_CSV_FILE) is True
-
- # Build a list of the sorted returned keys
- summary_returned_keys = list(md_summary_dict.keys())
- summary_returned_keys.sort()
-
- # 1. Confirm expected keys are in returned keys
- for k in EXPECTED_SUMMARY_KEYS:
- assert k in summary_returned_keys
-
- # Read summary JSON file
- with open(SUMMARY_JSON_FILE) as f:
- summary_json_data = json.load(f)
- # Build a list of the sorted JSON keys
- summary_json_keys = list(summary_json_data.keys())
- summary_json_keys.sort()
-
- # 2a. Confirm expected keys are in JSON file keys
- for k in EXPECTED_SUMMARY_KEYS:
- assert k in summary_json_keys
-
- # 2b. Confirm returned dictionary keys are identical to JSON file keys
- np.testing.assert_array_equal(summary_returned_keys, summary_json_keys)
-
- # Read summary CSV file
- summary_csv_data = get_csv_result(SUMMARY_CSV_FILE)
- # Build a list of the sorted CSV keys from the first column in the CSV file
- summary_csv_keys = []
- for x in summary_csv_data:
- summary_csv_keys.append(x[0])
- summary_csv_keys.sort()
-
- # 3a. Confirm expected keys are in the first column of the CSV file
- for k in EXPECTED_SUMMARY_KEYS:
- assert k in summary_csv_keys
-
- # 3b. Confirm returned dictionary keys are identical to CSV file first column keys
- np.testing.assert_array_equal(summary_returned_keys, summary_csv_keys)
-
- # 4. Verify non-variant values or number of values in the tested pipeline for certain keys
- # of the returned dictionary
- # Note: Values of num_workers are not tested since default may change in the future
- # Note: Values related to queue metrics are not tested since they may vary on different execution environments
- assert md_summary_dict["pipeline_ops"] == ["EpochCtrl(id=0)", "Repeat(id=1)", "Batch(id=2)", "Map(id=3)",
- "Generator(id=4)"]
- assert md_summary_dict["op_names"] == ["EpochCtrl", "Repeat", "Batch", "Map", "Generator"]
- assert md_summary_dict["op_ids"] == [0, 1, 2, 3, 4]
- assert len(md_summary_dict["num_workers"]) == 5
- assert len(md_summary_dict["queue_average_size"]) == 5
- assert len(md_summary_dict["queue_utilization_pct"]) == 5
- assert len(md_summary_dict["queue_empty_freq_pct"]) == 5
- assert md_summary_dict["children_ids"] == [[1], [2], [3], [4], []]
- assert md_summary_dict["parent_id"] == [-1, 0, 1, 2, 3]
- assert len(md_summary_dict["avg_cpu_pct"]) == 5
-
- except Exception as error:
- delete_profiling_files()
- raise error
-
- else:
- delete_profiling_files()
-
-
- if __name__ == "__main__":
- test_analyze_basic()
|