You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

test_minddata_analyzer.py 7.8 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197
  1. # Copyright 2021 Huawei Technologies Co., Ltd
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. # ==============================================================================
  15. """
  16. Test MindData Profiling Analyzer Support
  17. """
  18. import csv
  19. import json
  20. import os
  21. import numpy as np
  22. import mindspore.common.dtype as mstype
  23. import mindspore.dataset as ds
  24. import mindspore.dataset.transforms.c_transforms as C
  25. from mindspore.profiler.parser.minddata_analyzer import MinddataProfilingAnalyzer
  26. PIPELINE_FILE = "./pipeline_profiling_0.json"
  27. CPU_UTIL_FILE = "./minddata_cpu_utilization_0.json"
  28. DATASET_ITERATOR_FILE = "./dataset_iterator_profiling_0.txt"
  29. SUMMARY_JSON_FILE = "./minddata_pipeline_summary_0.json"
  30. SUMMARY_CSV_FILE = "./minddata_pipeline_summary_0.csv"
  31. ANALYZE_FILE_PATH = "./"
  32. # This is the minimum subset of expected keys (in alphabetical order) in the MindData Analyzer summary output
  33. EXPECTED_SUMMARY_KEYS = ['avg_cpu_pct', 'children_ids', 'num_workers', 'op_ids', 'op_names', 'parent_id',
  34. 'per_batch_time', 'pipeline_ops', 'queue_average_size', 'queue_empty_freq_pct',
  35. 'queue_utilization_pct']
  36. def get_csv_result(file_pathname):
  37. """
  38. Get result from the CSV file.
  39. Args:
  40. file_pathname (str): The CSV file pathname.
  41. Returns:
  42. list[list], the parsed CSV information.
  43. """
  44. result = []
  45. with open(file_pathname, 'r') as csvfile:
  46. csv_reader = csv.reader(csvfile)
  47. for row in csv_reader:
  48. result.append(row)
  49. return result
  50. def delete_profiling_files():
  51. """
  52. Delete the MindData profiling files generated from the test.
  53. Also disable the MindData Profiling environment variables.
  54. """
  55. # Delete MindData profiling files
  56. os.remove(PIPELINE_FILE)
  57. os.remove(CPU_UTIL_FILE)
  58. os.remove(DATASET_ITERATOR_FILE)
  59. # Delete MindData profiling analyze summary files
  60. os.remove(SUMMARY_JSON_FILE)
  61. os.remove(SUMMARY_CSV_FILE)
  62. # Disable MindData Profiling environment variables
  63. del os.environ['PROFILING_MODE']
  64. del os.environ['MINDDATA_PROFILING_DIR']
  65. del os.environ['DEVICE_ID']
  66. def test_analyze_basic():
  67. """
  68. Test MindData profiling analyze summary files exist with basic pipeline.
  69. Also test basic content (subset of keys and values) from the returned summary result.
  70. """
  71. # Confirm MindData Profiling files do not yet exist
  72. assert os.path.exists(PIPELINE_FILE) is False
  73. assert os.path.exists(CPU_UTIL_FILE) is False
  74. assert os.path.exists(DATASET_ITERATOR_FILE) is False
  75. # Confirm MindData Profiling analyze summary files do not yet exist
  76. assert os.path.exists(SUMMARY_JSON_FILE) is False
  77. assert os.path.exists(SUMMARY_CSV_FILE) is False
  78. # Enable MindData Profiling environment variables
  79. os.environ['PROFILING_MODE'] = 'true'
  80. os.environ['MINDDATA_PROFILING_DIR'] = '.'
  81. os.environ['DEVICE_ID'] = '0'
  82. def source1():
  83. for i in range(8000):
  84. yield (np.array([i]),)
  85. try:
  86. # Create this basic and common linear pipeline
  87. # Generator -> Map -> Batch -> Repeat -> EpochCtrl
  88. data1 = ds.GeneratorDataset(source1, ["col1"])
  89. type_cast_op = C.TypeCast(mstype.int32)
  90. data1 = data1.map(operations=type_cast_op, input_columns="col1")
  91. data1 = data1.batch(16)
  92. data1 = data1.repeat(2)
  93. num_iter = 0
  94. # Note: If create_tuple_iterator() is called with num_epochs>1, then EpochCtrlOp is added to the pipeline
  95. for _ in data1.create_dict_iterator(num_epochs=2):
  96. num_iter = num_iter + 1
  97. # Confirm number of rows returned
  98. assert num_iter == 1000
  99. # Confirm MindData Profiling files are created
  100. assert os.path.exists(PIPELINE_FILE) is True
  101. assert os.path.exists(CPU_UTIL_FILE) is True
  102. assert os.path.exists(DATASET_ITERATOR_FILE) is True
  103. # Call MindData Analyzer for generated MindData profiling files to generate MindData pipeline summary result
  104. # Note: MindData Analyzer returns the result in 3 formats:
  105. # 1. returned dictionary
  106. # 2. JSON file
  107. # 3. CSV file
  108. md_analyzer = MinddataProfilingAnalyzer(ANALYZE_FILE_PATH, "CPU", 0, ANALYZE_FILE_PATH)
  109. md_summary_dict = md_analyzer.analyze()
  110. # Confirm MindData Profiling analyze summary files are created
  111. assert os.path.exists(SUMMARY_JSON_FILE) is True
  112. assert os.path.exists(SUMMARY_CSV_FILE) is True
  113. # Build a list of the sorted returned keys
  114. summary_returned_keys = list(md_summary_dict.keys())
  115. summary_returned_keys.sort()
  116. # 1. Confirm expected keys are in returned keys
  117. for k in EXPECTED_SUMMARY_KEYS:
  118. assert k in summary_returned_keys
  119. # Read summary JSON file
  120. with open(SUMMARY_JSON_FILE) as f:
  121. summary_json_data = json.load(f)
  122. # Build a list of the sorted JSON keys
  123. summary_json_keys = list(summary_json_data.keys())
  124. summary_json_keys.sort()
  125. # 2a. Confirm expected keys are in JSON file keys
  126. for k in EXPECTED_SUMMARY_KEYS:
  127. assert k in summary_json_keys
  128. # 2b. Confirm returned dictionary keys are identical to JSON file keys
  129. np.testing.assert_array_equal(summary_returned_keys, summary_json_keys)
  130. # Read summary CSV file
  131. summary_csv_data = get_csv_result(SUMMARY_CSV_FILE)
  132. # Build a list of the sorted CSV keys from the first column in the CSV file
  133. summary_csv_keys = []
  134. for x in summary_csv_data:
  135. summary_csv_keys.append(x[0])
  136. summary_csv_keys.sort()
  137. # 3a. Confirm expected keys are in the first column of the CSV file
  138. for k in EXPECTED_SUMMARY_KEYS:
  139. assert k in summary_csv_keys
  140. # 3b. Confirm returned dictionary keys are identical to CSV file first column keys
  141. np.testing.assert_array_equal(summary_returned_keys, summary_csv_keys)
  142. # 4. Verify non-variant values or number of values in the tested pipeline for certain keys
  143. # of the returned dictionary
  144. # Note: Values of num_workers are not tested since default may change in the future
  145. # Note: Values related to queue metrics are not tested since they may vary on different execution environments
  146. assert md_summary_dict["pipeline_ops"] == ["EpochCtrl(id=0)", "Repeat(id=1)", "Batch(id=2)", "Map(id=3)",
  147. "Generator(id=4)"]
  148. assert md_summary_dict["op_names"] == ["EpochCtrl", "Repeat", "Batch", "Map", "Generator"]
  149. assert md_summary_dict["op_ids"] == [0, 1, 2, 3, 4]
  150. assert len(md_summary_dict["num_workers"]) == 5
  151. assert len(md_summary_dict["queue_average_size"]) == 5
  152. assert len(md_summary_dict["queue_utilization_pct"]) == 5
  153. assert len(md_summary_dict["queue_empty_freq_pct"]) == 5
  154. assert md_summary_dict["children_ids"] == [[1], [2], [3], [4], []]
  155. assert md_summary_dict["parent_id"] == [-1, 0, 1, 2, 3]
  156. assert len(md_summary_dict["avg_cpu_pct"]) == 5
  157. except Exception as error:
  158. delete_profiling_files()
  159. raise error
  160. else:
  161. delete_profiling_files()
  162. if __name__ == "__main__":
  163. test_analyze_basic()