You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

util.py 5.8 kB

5 years ago
5 years ago
5 years ago
5 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178
  1. # Copyright 2020 Huawei Technologies Co., Ltd
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. # ============================================================================
  15. """
  16. Profiler util.
  17. This module provides the utils.
  18. """
  19. import os
  20. from mindinsight.datavisual.utils.tools import to_int
  21. # one sys count takes 10 ns, 1 ms has 100000 system count
  22. PER_MS_SYSCNT = 100000
  23. def analyse_device_list_from_profiler_dir(profiler_dir):
  24. """
  25. Analyse device list from profiler dir.
  26. Args:
  27. profiler_dir (str): The profiler data dir.
  28. Returns:
  29. list, the device_id list.
  30. """
  31. profiler_file_prefix = ["timeline_display", "output_op_compute_time"]
  32. gpu_profiler_file_prefix = ["gpu_op_detail_info", "gpu_activity_data", "gpu_op_type_info"]
  33. device_id_list = set()
  34. gpu_device_id_list = set()
  35. for _, _, filenames in os.walk(profiler_dir):
  36. for filename in filenames:
  37. if filename.startswith("step_trace_raw"):
  38. items = filename.split("_")
  39. device_num = ""
  40. if len(items) > 3:
  41. device_num = items[3]
  42. else:
  43. items = filename.split("_")
  44. device_num = items[-1].split(".")[0] if items[-1].split(".") else ""
  45. if device_num.isdigit() and '_'.join(items[:-1]) in profiler_file_prefix:
  46. device_id_list.add(device_num)
  47. elif device_num.isdigit() and '_'.join(items[:-1]) in gpu_profiler_file_prefix:
  48. gpu_device_id_list.add(device_num)
  49. if device_id_list:
  50. result_list = sorted(list(device_id_list))
  51. profiler_type = "ascend"
  52. elif gpu_device_id_list:
  53. result_list = sorted(list(gpu_device_id_list))
  54. profiler_type = "gpu"
  55. else:
  56. result_list = []
  57. profiler_type = ""
  58. return result_list, profiler_type
  59. def query_latest_trace_time_file(profiler_dir, device_id=0):
  60. """
  61. Query the latest trace time file.
  62. Args:
  63. profiler_dir (str): The profiler directory.
  64. device_id (int): The id of device.
  65. Returns:
  66. str, the latest trace time file path.
  67. """
  68. files = os.listdir(profiler_dir)
  69. target_file = f'step_trace_raw_{device_id}_detail_time.csv'
  70. try:
  71. latest_file = max(
  72. filter(
  73. lambda file: file == target_file,
  74. files
  75. ),
  76. key=lambda file: os.stat(os.path.join(profiler_dir, file)).st_mtime
  77. )
  78. except ValueError:
  79. return None
  80. return os.path.join(profiler_dir, latest_file)
  81. def query_step_trace_file(profiler_dir):
  82. """
  83. Query for all step trace file.
  84. Args:
  85. profiler_dir (str): The directory that contains all step trace files.
  86. Returns:
  87. str, the file path of step trace time.
  88. """
  89. files = os.listdir(profiler_dir)
  90. training_trace_file = list(
  91. filter(
  92. lambda file: file.startswith('training_trace') and not file.endswith('.done'),
  93. files
  94. )
  95. )
  96. if training_trace_file:
  97. return os.path.join(profiler_dir, training_trace_file[0])
  98. return None
  99. def get_summary_for_step_trace(average_info, header):
  100. """The property of summary info."""
  101. if not average_info or not header:
  102. return {}
  103. total_time = get_field_value(average_info, 'total', header)
  104. iteration_interval = get_field_value(average_info, 'iteration_interval',
  105. header)
  106. fp_and_bp = get_field_value(average_info, 'fp_and_bp', header)
  107. tail = get_field_value(average_info, 'tail', header)
  108. summary = {
  109. 'total_time': total_time,
  110. 'iteration_interval': iteration_interval,
  111. 'iteration_interval_percent': calculate_percent(iteration_interval, total_time),
  112. 'fp_and_bp': fp_and_bp,
  113. 'fp_and_bp_percent': calculate_percent(fp_and_bp, total_time),
  114. 'tail': tail,
  115. 'tail_percent': calculate_percent(tail, total_time)
  116. }
  117. return summary
  118. def calculate_percent(partial, total):
  119. """Calculate percent value."""
  120. if total:
  121. percent = round(partial / total * 100, 2)
  122. else:
  123. percent = 0
  124. return f'{percent}%'
  125. def to_millisecond(sys_count, limit=4):
  126. """Translate system count to millisecond."""
  127. return round(sys_count / PER_MS_SYSCNT, limit)
  128. def get_field_value(row_info, field_name, header, time_type='realtime'):
  129. """
  130. Extract basic info through row_info.
  131. Args:
  132. row_info (list): The list of data info in one row.
  133. field_name (str): The name in header.
  134. header (list[str]): The list of field names.
  135. time_type (str): The type of value, `realtime` or `systime`. Default: `realtime`.
  136. Returns:
  137. dict, step trace info in dict format.
  138. """
  139. field_index = header.index(field_name)
  140. value = row_info[field_index]
  141. value = to_int(value, field_name)
  142. if time_type == 'realtime':
  143. value = to_millisecond(value)
  144. return value
  145. def get_options(options):
  146. if options is None:
  147. options = {}
  148. return options