| @@ -18,6 +18,7 @@ Testing profiling support in DE | |||
| import json | |||
| import os | |||
| import numpy as np | |||
| import pytest | |||
| import mindspore.common.dtype as mstype | |||
| import mindspore.dataset as ds | |||
| import mindspore.dataset.transforms.c_transforms as C | |||
| @@ -41,6 +42,7 @@ file_name_map_rank_id = {"test_profiling_simple_pipeline": "0", | |||
| "test_profiling_seq_pipelines_repeat": "9"} | |||
| @pytest.mark.forked | |||
| class TestMinddataProfilingManager: | |||
| """ | |||
| Test MinddataProfilingManager | |||
| @@ -53,9 +55,9 @@ class TestMinddataProfilingManager: | |||
| # Get instance pointer for MindData profiling manager | |||
| self.md_profiler = cde.GlobalContext.profiling_manager() | |||
| self._PIPELINE_FILE = "./pipeline_profiling" | |||
| self._CPU_UTIL_FILE = "./minddata_cpu_utilization" | |||
| self._DATASET_ITERATOR_FILE = "./dataset_iterator_profiling" | |||
| self._pipeline_file = "./pipeline_profiling" | |||
| self._cpu_util_file = "./minddata_cpu_utilization" | |||
| self._dataset_iterator_file = "./dataset_iterator_profiling" | |||
| def setup_method(self): | |||
| """ | |||
| @@ -64,9 +66,9 @@ class TestMinddataProfilingManager: | |||
| file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] | |||
| file_id = file_name_map_rank_id[file_name] | |||
| pipeline_file = self._PIPELINE_FILE + "_" + file_id + ".json" | |||
| cpu_util_file = self._CPU_UTIL_FILE + "_" + file_id + ".json" | |||
| dataset_iterator_file = self._DATASET_ITERATOR_FILE + "_" + file_id + ".txt" | |||
| pipeline_file = self._pipeline_file + "_" + file_id + ".json" | |||
| cpu_util_file = self._cpu_util_file + "_" + file_id + ".json" | |||
| dataset_iterator_file = self._dataset_iterator_file + "_" + file_id + ".txt" | |||
| # Confirm MindData Profiling files do not yet exist | |||
| assert os.path.exists(pipeline_file) is False | |||
| @@ -91,9 +93,9 @@ class TestMinddataProfilingManager: | |||
| file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] | |||
| file_id = file_name_map_rank_id[file_name] | |||
| pipeline_file = self._PIPELINE_FILE + "_" + file_id + ".json" | |||
| cpu_util_file = self._CPU_UTIL_FILE + "_" + file_id + ".json" | |||
| dataset_iterator_file = self._DATASET_ITERATOR_FILE + "_" + file_id + ".txt" | |||
| pipeline_file = self._pipeline_file + "_" + file_id + ".json" | |||
| cpu_util_file = self._cpu_util_file + "_" + file_id + ".json" | |||
| dataset_iterator_file = self._dataset_iterator_file + "_" + file_id + ".txt" | |||
| # Delete MindData profiling files generated from the test. | |||
| os.remove(pipeline_file) | |||
| @@ -132,9 +134,9 @@ class TestMinddataProfilingManager: | |||
| file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] | |||
| file_id = file_name_map_rank_id[file_name] | |||
| pipeline_file = self._PIPELINE_FILE + "_" + file_id + ".json" | |||
| cpu_util_file = self._CPU_UTIL_FILE + "_" + file_id + ".json" | |||
| dataset_iterator_file = self._DATASET_ITERATOR_FILE + "_" + file_id + ".txt" | |||
| pipeline_file = self._pipeline_file + "_" + file_id + ".json" | |||
| cpu_util_file = self._cpu_util_file + "_" + file_id + ".json" | |||
| dataset_iterator_file = self._dataset_iterator_file + "_" + file_id + ".txt" | |||
| source = [(np.array([x]),) for x in range(1024)] | |||
| data1 = ds.GeneratorDataset(source, ["data"]) | |||
| @@ -172,8 +174,8 @@ class TestMinddataProfilingManager: | |||
| file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] | |||
| file_id = file_name_map_rank_id[file_name] | |||
| pipeline_file = self._PIPELINE_FILE + "_" + file_id + ".json" | |||
| cpu_util_file = self._CPU_UTIL_FILE + "_" + file_id + ".json" | |||
| pipeline_file = self._pipeline_file + "_" + file_id + ".json" | |||
| cpu_util_file = self._cpu_util_file + "_" + file_id + ".json" | |||
| source = [(np.array([x]),) for x in range(1024)] | |||
| data1 = ds.GeneratorDataset(source, ["gen"]) | |||
| @@ -217,8 +219,8 @@ class TestMinddataProfilingManager: | |||
| file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] | |||
| file_id = file_name_map_rank_id[file_name] | |||
| pipeline_file = self._PIPELINE_FILE + "_" + file_id + ".json" | |||
| cpu_util_file = self._CPU_UTIL_FILE + "_" + file_id + ".json" | |||
| pipeline_file = self._pipeline_file + "_" + file_id + ".json" | |||
| cpu_util_file = self._cpu_util_file + "_" + file_id + ".json" | |||
| # In source1 dataset: Number of rows is 3; its values are 0, 1, 2 | |||
| def source1(): | |||
| @@ -273,8 +275,8 @@ class TestMinddataProfilingManager: | |||
| file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] | |||
| file_id = file_name_map_rank_id[file_name] | |||
| pipeline_file = self._PIPELINE_FILE + "_" + file_id + ".json" | |||
| cpu_util_file = self._CPU_UTIL_FILE + "_" + file_id + ".json" | |||
| pipeline_file = self._pipeline_file + "_" + file_id + ".json" | |||
| cpu_util_file = self._cpu_util_file + "_" + file_id + ".json" | |||
| # In source1 dataset: Number of rows is 10; its values are 0, 1, 2, 3, 4, 5 ... 9 | |||
| def source1(): | |||
| @@ -342,8 +344,8 @@ class TestMinddataProfilingManager: | |||
| file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] | |||
| file_id = file_name_map_rank_id[file_name] | |||
| pipeline_file = self._PIPELINE_FILE + "_" + file_id + ".json" | |||
| cpu_util_file = self._CPU_UTIL_FILE + "_" + file_id + ".json" | |||
| pipeline_file = self._pipeline_file + "_" + file_id + ".json" | |||
| cpu_util_file = self._cpu_util_file + "_" + file_id + ".json" | |||
| def source1(): | |||
| for i in range(8000): | |||
| @@ -393,8 +395,8 @@ class TestMinddataProfilingManager: | |||
| file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] | |||
| file_id = file_name_map_rank_id[file_name] | |||
| pipeline_file = self._PIPELINE_FILE + "_" + file_id + ".json" | |||
| cpu_util_file = self._CPU_UTIL_FILE + "_" + file_id + ".json" | |||
| pipeline_file = self._pipeline_file + "_" + file_id + ".json" | |||
| cpu_util_file = self._cpu_util_file + "_" + file_id + ".json" | |||
| # Create this common pipeline | |||
| # Cifar10 -> Map -> Map -> Batch -> Repeat | |||
| @@ -446,8 +448,8 @@ class TestMinddataProfilingManager: | |||
| file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] | |||
| file_id = file_name_map_rank_id[file_name] | |||
| pipeline_file = self._PIPELINE_FILE + "_" + file_id + ".json" | |||
| cpu_util_file = self._CPU_UTIL_FILE + "_" + file_id + ".json" | |||
| pipeline_file = self._pipeline_file + "_" + file_id + ".json" | |||
| cpu_util_file = self._cpu_util_file + "_" + file_id + ".json" | |||
| source = [(np.array([x]),) for x in range(64)] | |||
| data1 = ds.GeneratorDataset(source, ["data"]) | |||
| @@ -498,8 +500,8 @@ class TestMinddataProfilingManager: | |||
| file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] | |||
| file_id = file_name_map_rank_id[file_name] | |||
| pipeline_file = self._PIPELINE_FILE + "_" + file_id + ".json" | |||
| cpu_util_file = self._CPU_UTIL_FILE + "_" + file_id + ".json" | |||
| pipeline_file = self._pipeline_file + "_" + file_id + ".json" | |||
| cpu_util_file = self._cpu_util_file + "_" + file_id + ".json" | |||
| source = [(np.array([x]),) for x in range(64)] | |||
| data2 = ds.GeneratorDataset(source, ["data"]) | |||
| @@ -550,8 +552,8 @@ class TestMinddataProfilingManager: | |||
| file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] | |||
| file_id = file_name_map_rank_id[file_name] | |||
| pipeline_file = self._PIPELINE_FILE + "_" + file_id + ".json" | |||
| cpu_util_file = self._CPU_UTIL_FILE + "_" + file_id + ".json" | |||
| pipeline_file = self._pipeline_file + "_" + file_id + ".json" | |||
| cpu_util_file = self._cpu_util_file + "_" + file_id + ".json" | |||
| source = [(np.array([x]),) for x in range(64)] | |||
| data2 = ds.GeneratorDataset(source, ["data"]) | |||
| @@ -37,6 +37,7 @@ file_name_map_rank_id = {"test_profiling_early_stop": "0", | |||
| "test_profiling_stop_nostart": "4"} | |||
| @pytest.mark.forked | |||
| class TestMindDataProfilingStartStop: | |||
| """ | |||
| Test MindData Profiling Manager Start-Stop Support | |||
| @@ -46,9 +47,9 @@ class TestMindDataProfilingStartStop: | |||
| """ | |||
| Run once for the class | |||
| """ | |||
| self._PIPELINE_FILE = "./pipeline_profiling" | |||
| self._CPU_UTIL_FILE = "./minddata_cpu_utilization" | |||
| self._DATASET_ITERATOR_FILE = "./dataset_iterator_profiling" | |||
| self._pipeline_file = "./pipeline_profiling" | |||
| self._cpu_util_file = "./minddata_cpu_utilization" | |||
| self._dataset_iterator_file = "./dataset_iterator_profiling" | |||
| def setup_method(self): | |||
| """ | |||
| @@ -57,9 +58,9 @@ class TestMindDataProfilingStartStop: | |||
| file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] | |||
| file_id = file_name_map_rank_id[file_name] | |||
| self.pipeline_file = self._PIPELINE_FILE + "_" + file_id + ".json" | |||
| self.cpu_util_file = self._CPU_UTIL_FILE + "_" + file_id + ".json" | |||
| self.dataset_iterator_file = self._DATASET_ITERATOR_FILE + "_" + file_id + ".txt" | |||
| self.pipeline_file = self._pipeline_file + "_" + file_id + ".json" | |||
| self.cpu_util_file = self._cpu_util_file + "_" + file_id + ".json" | |||
| self.dataset_iterator_file = self._dataset_iterator_file + "_" + file_id + ".txt" | |||
| # Confirm MindData Profiling files do not yet exist | |||
| assert os.path.exists(self.pipeline_file) is False | |||
| @@ -19,6 +19,7 @@ import csv | |||
| import json | |||
| import os | |||
| import numpy as np | |||
| import pytest | |||
| import mindspore.common.dtype as mstype | |||
| import mindspore.dataset as ds | |||
| import mindspore.dataset.transforms.c_transforms as C | |||
| @@ -30,6 +31,7 @@ file_name_map_rank_id = {"test_analyze_basic": "0", | |||
| "test_analyze_sequential_pipelines_invalid": "1"} | |||
| @pytest.mark.forked | |||
| class TestMinddataProfilingAnalyzer: | |||
| """ | |||
| Test the MinddataProfilingAnalyzer class | |||
| @@ -42,15 +44,15 @@ class TestMinddataProfilingAnalyzer: | |||
| # Get instance pointer for MindData profiling manager | |||
| self.md_profiler = cde.GlobalContext.profiling_manager() | |||
| self._PIPELINE_FILE = "./pipeline_profiling" | |||
| self._CPU_UTIL_FILE = "./minddata_cpu_utilization" | |||
| self._DATASET_ITERATOR_FILE = "./dataset_iterator_profiling" | |||
| self._SUMMARY_JSON_FILE = "./minddata_pipeline_summary" | |||
| self._SUMMARY_CSV_FILE = "./minddata_pipeline_summary" | |||
| self._ANALYZE_FILE_PATH = "./" | |||
| self._pipeline_file = "./pipeline_profiling" | |||
| self._cpu_util_file = "./minddata_cpu_utilization" | |||
| self._dataset_iterator_file = "./dataset_iterator_profiling" | |||
| self._summary_json_file = "./minddata_pipeline_summary" | |||
| self._summary_csv_file = "./minddata_pipeline_summary" | |||
| self._analyze_file_path = "./" | |||
| # This is the set of keys for success case | |||
| self._EXPECTED_SUMMARY_KEYS_SUCCESS = \ | |||
| self._expected_summary_keys_success = \ | |||
| ['avg_cpu_pct', 'avg_cpu_pct_per_worker', 'children_ids', 'num_workers', 'op_ids', 'op_names', | |||
| 'parent_id', 'per_batch_time', 'per_pipeline_time', 'per_push_queue_time', 'pipeline_ops', | |||
| 'queue_average_size', 'queue_empty_freq_pct', 'queue_utilization_pct'] | |||
| @@ -62,11 +64,11 @@ class TestMinddataProfilingAnalyzer: | |||
| file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] | |||
| file_id = file_name_map_rank_id[file_name] | |||
| pipeline_file = self._PIPELINE_FILE + "_" + file_id + ".json" | |||
| cpu_util_file = self._CPU_UTIL_FILE + "_" + file_id + ".json" | |||
| dataset_iterator_file = self._DATASET_ITERATOR_FILE + "_" + file_id + ".txt" | |||
| summary_json_file = self._SUMMARY_JSON_FILE + "_" + file_id + ".json" | |||
| summary_csv_file = self._SUMMARY_CSV_FILE + "_" + file_id + ".csv" | |||
| pipeline_file = self._pipeline_file + "_" + file_id + ".json" | |||
| cpu_util_file = self._cpu_util_file + "_" + file_id + ".json" | |||
| dataset_iterator_file = self._dataset_iterator_file + "_" + file_id + ".txt" | |||
| summary_json_file = self._summary_json_file + "_" + file_id + ".json" | |||
| summary_csv_file = self._summary_csv_file + "_" + file_id + ".csv" | |||
| # Confirm MindData Profiling files do not yet exist | |||
| assert os.path.exists(pipeline_file) is False | |||
| @@ -94,11 +96,11 @@ class TestMinddataProfilingAnalyzer: | |||
| file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] | |||
| file_id = file_name_map_rank_id[file_name] | |||
| pipeline_file = self._PIPELINE_FILE + "_" + file_id + ".json" | |||
| cpu_util_file = self._CPU_UTIL_FILE + "_" + file_id + ".json" | |||
| dataset_iterator_file = self._DATASET_ITERATOR_FILE + "_" + file_id + ".txt" | |||
| summary_json_file = self._SUMMARY_JSON_FILE + "_" + file_id + ".json" | |||
| summary_csv_file = self._SUMMARY_CSV_FILE + "_" + file_id + ".csv" | |||
| pipeline_file = self._pipeline_file + "_" + file_id + ".json" | |||
| cpu_util_file = self._cpu_util_file + "_" + file_id + ".json" | |||
| dataset_iterator_file = self._dataset_iterator_file + "_" + file_id + ".txt" | |||
| summary_json_file = self._summary_json_file + "_" + file_id + ".json" | |||
| summary_csv_file = self._summary_csv_file + "_" + file_id + ".csv" | |||
| # Delete MindData profiling files generated from the test. | |||
| os.remove(pipeline_file) | |||
| @@ -130,15 +132,15 @@ class TestMinddataProfilingAnalyzer: | |||
| result.append(row) | |||
| return result | |||
| def verify_md_summary(self, md_summary_dict, EXPECTED_SUMMARY_KEYS): | |||
| def verify_md_summary(self, md_summary_dict, expected_summary_keys): | |||
| """ | |||
| Verify the content of the 3 variations of the MindData Profiling analyze summary output. | |||
| """ | |||
| file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] | |||
| file_id = file_name_map_rank_id[file_name] | |||
| summary_json_file = self._SUMMARY_JSON_FILE + "_" + file_id + ".json" | |||
| summary_csv_file = self._SUMMARY_CSV_FILE + "_" + file_id + ".csv" | |||
| summary_json_file = self._summary_json_file + "_" + file_id + ".json" | |||
| summary_csv_file = self._summary_csv_file + "_" + file_id + ".csv" | |||
| # Confirm MindData Profiling analyze summary files are created | |||
| assert os.path.exists(summary_json_file) is True | |||
| @@ -149,7 +151,7 @@ class TestMinddataProfilingAnalyzer: | |||
| summary_returned_keys.sort() | |||
| # 1. Confirm expected keys are in returned keys | |||
| for k in EXPECTED_SUMMARY_KEYS: | |||
| for k in expected_summary_keys: | |||
| assert k in summary_returned_keys | |||
| # Read summary JSON file | |||
| @@ -160,7 +162,7 @@ class TestMinddataProfilingAnalyzer: | |||
| summary_json_keys.sort() | |||
| # 2a. Confirm expected keys are in JSON file keys | |||
| for k in EXPECTED_SUMMARY_KEYS: | |||
| for k in expected_summary_keys: | |||
| assert k in summary_json_keys | |||
| # 2b. Confirm returned dictionary keys are identical to JSON file keys | |||
| @@ -175,7 +177,7 @@ class TestMinddataProfilingAnalyzer: | |||
| summary_csv_keys.sort() | |||
| # 3a. Confirm expected keys are in the first column of the CSV file | |||
| for k in EXPECTED_SUMMARY_KEYS: | |||
| for k in expected_summary_keys: | |||
| assert k in summary_csv_keys | |||
| # 3b. Confirm returned dictionary keys are identical to CSV file first column keys | |||
| @@ -195,9 +197,9 @@ class TestMinddataProfilingAnalyzer: | |||
| file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] | |||
| file_id = file_name_map_rank_id[file_name] | |||
| pipeline_file = self._PIPELINE_FILE + "_" + file_id + ".json" | |||
| cpu_util_file = self._CPU_UTIL_FILE + "_" + file_id + ".json" | |||
| dataset_iterator_file = self._DATASET_ITERATOR_FILE + "_" + file_id + ".txt" | |||
| pipeline_file = self._pipeline_file + "_" + file_id + ".json" | |||
| cpu_util_file = self._cpu_util_file + "_" + file_id + ".json" | |||
| dataset_iterator_file = self._dataset_iterator_file + "_" + file_id + ".txt" | |||
| # Create this basic and common linear pipeline | |||
| # Generator -> Map -> Batch -> Repeat -> EpochCtrl | |||
| @@ -225,7 +227,7 @@ class TestMinddataProfilingAnalyzer: | |||
| assert os.path.exists(dataset_iterator_file) is True | |||
| # Call MindData Analyzer for generated MindData profiling files to generate MindData pipeline summary result | |||
| md_analyzer = MinddataProfilingAnalyzer(self._ANALYZE_FILE_PATH, file_id, self._ANALYZE_FILE_PATH) | |||
| md_analyzer = MinddataProfilingAnalyzer(self._analyze_file_path, file_id, self._analyze_file_path) | |||
| md_summary_dict = md_analyzer.analyze() | |||
| # Verify MindData Profiling Analyze Summary output | |||
| @@ -233,7 +235,7 @@ class TestMinddataProfilingAnalyzer: | |||
| # 1. returned dictionary | |||
| # 2. JSON file | |||
| # 3. CSV file | |||
| self.verify_md_summary(md_summary_dict, self._EXPECTED_SUMMARY_KEYS_SUCCESS) | |||
| self.verify_md_summary(md_summary_dict, self._expected_summary_keys_success) | |||
| # 4. Verify non-variant values or number of values in the tested pipeline for certain keys | |||
| # of the returned dictionary | |||
| @@ -258,9 +260,9 @@ class TestMinddataProfilingAnalyzer: | |||
| file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] | |||
| file_id = file_name_map_rank_id[file_name] | |||
| pipeline_file = self._PIPELINE_FILE + "_" + file_id + ".json" | |||
| cpu_util_file = self._CPU_UTIL_FILE + "_" + file_id + ".json" | |||
| dataset_iterator_file = self._DATASET_ITERATOR_FILE + "_" + file_id + ".txt" | |||
| pipeline_file = self._pipeline_file + "_" + file_id + ".json" | |||
| cpu_util_file = self._cpu_util_file + "_" + file_id + ".json" | |||
| dataset_iterator_file = self._dataset_iterator_file + "_" + file_id + ".txt" | |||
| # Create the pipeline | |||
| # Generator -> Map -> Batch -> EpochCtrl | |||
| @@ -315,11 +317,11 @@ class TestMinddataProfilingAnalyzer: | |||
| assert os.path.exists(dataset_iterator_file) is True | |||
| # Call MindData Analyzer for generated MindData profiling files to generate MindData pipeline summary result | |||
| md_analyzer = MinddataProfilingAnalyzer(self._ANALYZE_FILE_PATH, file_id, self._ANALYZE_FILE_PATH) | |||
| md_analyzer = MinddataProfilingAnalyzer(self._analyze_file_path, file_id, self._analyze_file_path) | |||
| md_summary_dict = md_analyzer.analyze() | |||
| # Verify MindData Profiling Analyze Summary output | |||
| self.verify_md_summary(md_summary_dict, self._EXPECTED_SUMMARY_KEYS_SUCCESS) | |||
| self.verify_md_summary(md_summary_dict, self._expected_summary_keys_success) | |||
| # Confirm pipeline data contains info for 3 ops | |||
| assert md_summary_dict["pipeline_ops"] == ["Batch(id=0)", "Map(id=1)", "Generator(id=2)"] | |||