You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

flops_parser.py 24 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522
  1. # Copyright 2021 Huawei Technologies Co., Ltd
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. # ============================================================================
  15. """Flops parser which parsing flops from aicore file."""
  16. import os
  17. import struct
  18. import json
  19. import stat
  20. from mindspore import log as logger
  21. from mindspore.profiler.common.exceptions.exceptions import ProfilerIOException, \
  22. ProfilerFileNotFoundException, ProfilerRawFileException
  23. from mindspore.profiler.common.validator.validate_path import \
  24. validate_and_normalize_path
  25. class FlopsParser:
  26. """
  27. The parser for parsing flops from aicore file.
  28. Args:
  29. input_dir (str): Directory(JOBXXX) where the original profiling data are located.
  30. output_dir (str): Directory(profiler-{timestamp}) where the parsed profiling files are located.
  31. op_task_dict (dict): The mapping relation of task_id and op_full_name.
  32. device_id (str): The device ID.
  33. """
  34. HEX = 16
  35. PMU_COUNT = 8
  36. AICORE_LOG_SIZE = 128 # the size of each struct is 128 Byte.
  37. RUNTIME_COMMON = "BBHHHII10Q8I"
  38. # If the task id is less than the task id threshold,
  39. # the combination of task id and Stream id represents one operator,
  40. # else the task id represents one operator.
  41. _task_id_threshold = 25000
  42. def __init__(self, input_dir, output_dir, op_task_dict, device_id, rank_id, is_training_mode_flag=True):
  43. self._input_dir = input_dir
  44. self._output_dir = output_dir
  45. self._op_task_dict = op_task_dict
  46. self._device_id = device_id
  47. self._rank_id = rank_id
  48. self.is_training_mode_flag = is_training_mode_flag
  49. # output files.
  50. self._flops_filename = f'flops_{self._rank_id}.txt'
  51. self._flops_summary_filename = f'flops_summary_{self._rank_id}.json'
  52. self._flops_scope_filename = f'flops_scope_{self._rank_id}.json'
  53. self._flops_utilization_step_filename = f'flops_utilization_step_{self._rank_id}.json'
  54. # input files.
  55. self._aicore_filename_pref = f'aicore.data.{self._device_id}.slice'
  56. self._optime_filename = f'output_op_compute_time_{self._rank_id}.txt'
  57. self._info_json = f'info.json.{self._device_id}'
  58. self._step_trace_filename = f'step_trace_raw_{self._rank_id}_detail_time.csv'
  59. self._timeline_data_filename = f'output_timeline_data_{self._rank_id}.txt'
  60. self._flops_summary = {
  61. 'FLOPs': 0,
  62. 'FLOPS': 0,
  63. 'FLOPS_Utilization': 0
  64. }
  65. self._flops_each_scope = {}
  66. self._flops_sankey_diagram = {}
  67. self._max_scope_num = 0
  68. def execute(self):
  69. """Get the flops of aicore operators and write to file."""
  70. peak_flops = self._get_peak_flops()
  71. op_avg_time_dict = self._get_op_avg_time_dict()
  72. op_flops_list = []
  73. op_name_set = set()
  74. op_compute_dict = dict()
  75. # get all step time.
  76. op_all_step_time, op_all_step_comp = self._get_all_step_time()
  77. op_start_time = self._get_op_start_time()
  78. op_idx = 0
  79. step_idx = 0
  80. aicore_file_doc = os.path.join(self._input_dir, "data")
  81. source_files = self._get_aicore_files(aicore_file_doc)
  82. # parse all sliced aicore files.
  83. for source_file in source_files:
  84. source_file = validate_and_normalize_path(source_file)
  85. read_count, all_log_struct = self._load_aicore_data(source_file)
  86. for idx in range(read_count):
  87. log_struct = all_log_struct[idx * self.AICORE_LOG_SIZE:
  88. (idx + 1) * self.AICORE_LOG_SIZE]
  89. result = [hex(i) for i in struct.unpack(self.RUNTIME_COMMON, log_struct)]
  90. op_name = self._get_op_name(result)
  91. if op_name == "":
  92. # filter out the blank line in the file.
  93. continue
  94. if op_name not in op_avg_time_dict:
  95. logger.info(f"Op name {op_name} does not exist in op average time dict.")
  96. continue
  97. # Convert the unit of task_fops to MFLOPs(1e6).
  98. if op_name in op_compute_dict:
  99. task_fops = op_compute_dict[op_name]
  100. else:
  101. task_fops = self._compute_task_flops(result) * 1e-6
  102. op_compute_dict[op_name] = task_fops
  103. # add the op FLOPS in current step.
  104. if len(op_start_time) >= 1 and len(op_all_step_time) >= 1:
  105. op_idx, step_idx, op_all_step_comp = self._add_step_flops_time(
  106. op_name, task_fops, op_idx, step_idx, op_start_time, op_all_step_time, op_all_step_comp)
  107. # calculate averge op FLOPS.
  108. if op_name in op_name_set:
  109. continue
  110. op_avg_time = op_avg_time_dict[op_name]
  111. # Time unit of op_avg_time is ms.
  112. # The unit of gflop_per_second is GFLOPS(1e9).
  113. if float(op_avg_time) == 0.0:
  114. raise ValueError("All operators take 0 ms.")
  115. if peak_flops == 0:
  116. raise ValueError("The frequency of an operator is 0.")
  117. gflop_per_second = task_fops / float(op_avg_time)
  118. flops_utilization = (gflop_per_second * 1e9 / peak_flops) * 100
  119. self._flops_summary['FLOPs'] += task_fops
  120. self._flops_summary['FLOPS'] += gflop_per_second
  121. op_flops = [op_name, str(task_fops), str(gflop_per_second), str(flops_utilization)]
  122. op_flops_list.append(op_flops)
  123. op_name_set.add(op_name)
  124. self._add_flops_to_each_scope(op_name, task_fops)
  125. if not op_name_set:
  126. raise ProfilerRawFileException("No aicore operator found.")
  127. self._flops_summary['FLOPS'] /= len(op_name_set)
  128. sum_flops_utilization = 0.0
  129. # calculate the every step FLOPS utilization and the average values.
  130. utilization_save_filename = os.path.join(self._output_dir, self._flops_utilization_step_filename)
  131. with open(utilization_save_filename, 'w') as f:
  132. f.write("steps, FLOPS_Utilization %\n")
  133. for i, x in enumerate(op_all_step_comp):
  134. current_utilization = x[0] / x[1] * 1e9 / peak_flops * 100
  135. sum_flops_utilization += current_utilization
  136. f.write(str(i + 1))
  137. f.write(",")
  138. f.write(str(current_utilization))
  139. f.write("\n")
  140. if len(op_all_step_comp) >= 1:
  141. self._flops_summary['FLOPS_Utilization'] = sum_flops_utilization / len(op_all_step_comp)
  142. else:
  143. logger.warning("The number of data calculation steps is 0, please check whether the "
  144. "output timeline data is none.")
  145. self._flops_summary['FLOPS_Utilization'] = 0.0
  146. self._format_scope_flops()
  147. self._write_file(op_flops_list)
  148. def _load_aicore_data(self, aicore_file_path):
  149. """Load the original binary aicore data."""
  150. logger.info("the aicore file path is %s", aicore_file_path)
  151. if not os.path.exists(aicore_file_path):
  152. logger.critical(f'The file {aicore_file_path} does not exist.')
  153. raise ProfilerFileNotFoundException('aicore.data')
  154. file_size = os.path.getsize(aicore_file_path)
  155. read_count = file_size // self.AICORE_LOG_SIZE
  156. if not read_count:
  157. logger.critical(f'the file {aicore_file_path} '
  158. f'does not have enough content to be parsed.')
  159. raise ProfilerRawFileException(
  160. 'aicore.data file does not have enough content to be parsed'
  161. )
  162. try:
  163. with open(aicore_file_path, "rb") as aicore_file:
  164. all_log_struct = aicore_file.read(self.AICORE_LOG_SIZE * read_count)
  165. except (IOError, OSError) as err:
  166. logger.critical(f'Error occurred when read {aicore_file_path} file: {err}')
  167. raise ProfilerIOException()
  168. return read_count, all_log_struct
  169. def _get_peak_flops(self):
  170. """Get the peak FLOPS of current ascend device."""
  171. info_json_file_path = os.path.join(
  172. self._input_dir,
  173. self._info_json
  174. )
  175. if not os.path.exists(info_json_file_path):
  176. logger.critical(f'The file {info_json_file_path} does not exist.')
  177. raise ProfilerFileNotFoundException(info_json_file_path)
  178. try:
  179. with open(info_json_file_path, 'r', encoding='utf-8') as info_file:
  180. device_info = json.load(info_file)['DeviceInfo'][0]
  181. device_frequency = float(device_info["aic_frequency"])
  182. ai_core_num = float(device_info["ai_core_num"])
  183. # peak_flops formula (provided by Hisi): device_frequency * num_of_aicore * 4096 * 2.
  184. peak_flops = device_frequency * 1e6 * ai_core_num * 4096 * 2
  185. except (IOError, OSError, json.JSONDecodeError) as err:
  186. logger.critical(f'Error occurred when read {info_json_file_path} file: {err}')
  187. raise ProfilerIOException()
  188. return peak_flops
  189. def _compute_task_flops(self, log_result):
  190. """Compute the FLOPs of each task."""
  191. pmu_cnt = list(
  192. int(i.replace('0x', ''), self.HEX)
  193. for i in log_result[9:17]
  194. )
  195. cube_fp16_exec = pmu_cnt[0]
  196. cube_int8_exec = pmu_cnt[1]
  197. vec_fp32 = pmu_cnt[2]
  198. vec_fp16_128lane_exec = pmu_cnt[3]
  199. vec_fp16_64lane_exec = pmu_cnt[4]
  200. vec_int32_exec = pmu_cnt[5]
  201. vec_misc_exec = pmu_cnt[6]
  202. # These formula is provided by HISI profiling.
  203. # a cube_fp16 instruction has (16**3)*2 float point operation.
  204. # a cube_fp16 instruction has 16*16*32*2 float point operation.
  205. cube_fops = cube_fp16_exec * (16 ** 3) * 2 + cube_int8_exec * 16 * 16 * 32 * 2
  206. vec_fops = vec_fp32 * 32 + vec_fp16_128lane_exec * 128 + \
  207. vec_fp16_64lane_exec * 64 + vec_int32_exec * 64 + vec_misc_exec * 32
  208. task_fops = cube_fops + vec_fops
  209. return task_fops
  210. def _get_op_name(self, log_result):
  211. """Get the operator name for current task_id."""
  212. task_id = int(log_result[4].replace('0x', ''), self.HEX)
  213. stream_id = int(log_result[17].replace('0x', ''), self.HEX)
  214. if task_id < self._task_id_threshold:
  215. task_id = '_'.join([str(stream_id), str(task_id)])
  216. if str(task_id) not in self._op_task_dict:
  217. return ""
  218. op_name = self._op_task_dict[str(task_id)]
  219. return op_name
  220. def _get_op_avg_time_dict(self):
  221. """Get the op average execution time."""
  222. op_avg_time_dict = {}
  223. optime_file_path = os.path.join(self._output_dir, self._optime_filename)
  224. if not os.path.exists(optime_file_path):
  225. logger.critical(f'The {optime_file_path} file does not exist.')
  226. raise ProfilerFileNotFoundException(optime_file_path)
  227. try:
  228. with open(optime_file_path, 'r') as f:
  229. lines = f.readlines()
  230. op_avg_time_lines = lines[3:] # the first three lines are table header.
  231. for line in op_avg_time_lines:
  232. op_name, avg_time = line.split()[:2]
  233. op_avg_time_dict[op_name] = avg_time
  234. except (IOError, OSError) as err:
  235. logger.critical(f'Error occurred when read {optime_file_path} file: {err}')
  236. raise ProfilerIOException()
  237. return op_avg_time_dict
  238. def _add_flops_to_each_scope(self, op_name, task_fops):
  239. """
  240. Add task_fops to each scope of the op_name.
  241. The top-level scope name is "Default" or "recompute_Default" or "Gradients".
  242. To classify the same scope name under different top-level scope, a suffix name is added.
  243. For op_name like "Default/network", the "network" will be renamed as "network(Default)".
  244. For op_name like "recompute_Default/network", "network" --> "network(recompute_Default)".
  245. For op_name like "Gradients/network", "network" --> "network(Gradients)".
  246. For op_name like "Gradients/recompute_Default/network","network" --> "network(recompute_Gradients)".
  247. """
  248. # Only extracts the scope name, remove the operator name.
  249. scope_list = op_name.split('/')[:-1]
  250. self._max_scope_num = max(self._max_scope_num, len(scope_list))
  251. top_level_scope = scope_list[0]
  252. suffix_name = ""
  253. if op_name.startswith("Gradients/recompute_Default"):
  254. suffix_name = "recompute_Gradients"
  255. else:
  256. suffix_name = top_level_scope
  257. # To distinguish the same scope name at different scope level and different top level scope,
  258. # the scope level and top level scope is added.
  259. for level, scope_name in enumerate(scope_list):
  260. scope_list[level] = scope_name + f"({level}_{suffix_name})"
  261. scope_list[0] = top_level_scope
  262. # Add root node (refers to total flops).
  263. scope_list.insert(0, "Total")
  264. scope_depth = len(scope_list)
  265. for idx in range(scope_depth - 1):
  266. key_name = scope_list[idx] + " " + scope_list[idx + 1]
  267. self._flops_each_scope.setdefault(key_name, 0)
  268. self._flops_each_scope[key_name] += task_fops
  269. def _format_scope_flops(self):
  270. """
  271. Format the flops of each scope to a Sankey Diagram.
  272. The format of Sankey Diagram is:
  273. {"nodes": [
  274. {"name": "Default"},
  275. {"name": "network"}
  276. ],
  277. "links": [
  278. {"source": "Total", "target": "Default", "value": 555},
  279. {"source": "Default", "target": "network", "value": 555}
  280. ]
  281. }
  282. """
  283. nodes, links = [], []
  284. scope_name_set = set()
  285. for scope_link, task_fops in self._flops_each_scope.items():
  286. source, target = scope_link.split()
  287. scope_name_set.update({source, target})
  288. link = {
  289. "source": source,
  290. "target": target,
  291. "value": round(task_fops, 3)
  292. }
  293. links.append(link)
  294. for scope_name in scope_name_set:
  295. node = {"name": scope_name}
  296. nodes.append(node)
  297. sankey_diagram = {"nodes": nodes, "links": links}
  298. self._flops_sankey_diagram = {
  299. "data": sankey_diagram,
  300. "max_scope_num": self._max_scope_num
  301. }
  302. def _write_file(self, op_flops_list):
  303. """Write the operator's flops related information into file."""
  304. join_file_path = lambda x: os.path.join(self._output_dir, x)
  305. output_file_path = join_file_path(self._flops_filename)
  306. output_summary_file_path = join_file_path(self._flops_summary_filename)
  307. output_flops_scope_file_path = join_file_path(self._flops_scope_filename)
  308. try:
  309. with open(output_file_path, 'w') as f:
  310. header = "op_full_name, MFLOPs(10^6), GFLOPS(10^9), FLOPS utilization(%) \n"
  311. f.writelines(header)
  312. for op_flops in op_flops_list:
  313. line = ", ".join(op_flops)
  314. f.writelines(line + '\n')
  315. os.chmod(output_file_path, stat.S_IREAD | stat.S_IWRITE)
  316. except (IOError, OSError) as err:
  317. logger.critical(f'Error occurred when writing {output_file_path} file: {err}')
  318. raise ProfilerIOException()
  319. for key in self._flops_summary:
  320. self._flops_summary[key] = round(self._flops_summary[key], 3)
  321. try:
  322. with open(output_summary_file_path, 'w') as json_file:
  323. json.dump(self._flops_summary, json_file)
  324. os.chmod(output_summary_file_path, stat.S_IREAD | stat.S_IWRITE)
  325. except (IOError, OSError) as err:
  326. logger.critical(f'Error occurred when write {output_summary_file_path} file: {err}')
  327. raise ProfilerIOException()
  328. try:
  329. with open(output_flops_scope_file_path, 'w') as json_file:
  330. json.dump(self._flops_sankey_diagram, json_file)
  331. os.chmod(output_flops_scope_file_path, stat.S_IREAD | stat.S_IWRITE)
  332. except (IOError, OSError) as err:
  333. logger.critical(f'Error occurred when write {output_flops_scope_file_path} file: {err}')
  334. raise ProfilerIOException()
  335. def _get_aicore_files(self, profiler_dir):
  336. """Get aicore files."""
  337. aicore_files = self._search_file(profiler_dir)
  338. if not aicore_files:
  339. raise ProfilerPathErrorException('The aicore file does not exist.')
  340. return aicore_files
  341. def _search_file(self, input_dir):
  342. """Search aicore file under specific input directory."""
  343. # validate input_dir
  344. if not os.path.isdir(input_dir):
  345. raise ProfilerPathErrorException(
  346. '{} does not exist or is not a dir'.format(input_dir)
  347. )
  348. # get aicore files
  349. files = os.listdir(input_dir)
  350. aicore_files = list(
  351. filter(
  352. lambda file: file.startswith(self._aicore_filename_pref) and not file.endswith('.done'),
  353. files
  354. )
  355. )
  356. # validate result
  357. if len(aicore_files) > 1:
  358. # the format of file name is like `aicore.data.$id.slice_$number`.
  359. # use the $number as the sorted key
  360. try:
  361. aicore_files.sort(key=lambda path: int(path.rsplit('_', 1)[-1]))
  362. except ValueError as err:
  363. logger.warning("Unable to parse file names: %s. %s", aicore_files, err)
  364. aicore_files = []
  365. else:
  366. aicore_files = list(
  367. filter(
  368. lambda file: file.startswith(self._aicore_filename_pref) and not file.endswith('.done'),
  369. files
  370. )
  371. )
  372. if len(aicore_files) >= 1:
  373. logger.warning("The aicore file structure is changed, please upgrade " \
  374. "mindspore and regenerate profiling data")
  375. file_paths = [os.path.join(input_dir, file) for file in aicore_files]
  376. logger.info("Find %d aicore files.", len(file_paths))
  377. return file_paths
  378. def _get_all_step_time(self):
  379. """Get the op average execution time."""
  380. op_all_step_time = []
  381. op_all_step_comp = []
  382. _step_trace_file_path = os.path.join(self._output_dir, self._step_trace_filename)
  383. if not os.path.exists(_step_trace_file_path):
  384. logger.critical(f'The {_step_trace_file_path} file does not exist.')
  385. raise ProfilerFileNotFoundException(_step_trace_file_path)
  386. try:
  387. with open(_step_trace_file_path, 'r') as f:
  388. lines = f.readlines()
  389. op_all_step_time, op_all_step_comp = \
  390. self._get_bp_fp_time_by_line(lines, op_all_step_time, op_all_step_comp)
  391. except (IOError, OSError) as err:
  392. logger.critical(f'Error occurred when read {optime_file_path} file: {err}')
  393. raise ProfilerIOException()
  394. logger.info("the train step is %d .", len(op_all_step_time))
  395. if not op_all_step_time:
  396. logger.warning(f'Empty when read {optime_file_path} file, please check the valid'
  397. 'data of this file.')
  398. return op_all_step_time, op_all_step_comp
  399. def _get_bp_fp_time_by_line(self, lines, op_all_step_time, op_all_step_comp):
  400. """Get the bp and fp time with lines."""
  401. # the last line is the average info.
  402. op_avg_time_lines = lines[1:-1]
  403. # train mode.
  404. if self.is_training_mode_flag:
  405. op_all_step_time, op_all_step_comp = \
  406. self._read_line(4, 5, op_avg_time_lines, op_all_step_time, op_all_step_comp)
  407. else:
  408. # eval mode.
  409. op_all_step_time, op_all_step_comp = \
  410. self._read_line(4, 2, op_avg_time_lines, op_all_step_time, op_all_step_comp)
  411. return op_all_step_time, op_all_step_comp
  412. def _read_line(self, start_dot, end_dot, op_avg_time_lines, op_all_step_time, op_all_step_comp):
  413. """Read the bp and fp time from line."""
  414. for op_avg_idx in op_avg_time_lines:
  415. line = op_avg_idx.split(',')
  416. fp = float(line[start_dot]) / 100000.0
  417. bp = float(line[end_dot]) / 100000.0
  418. op_all_step_time.append([fp, bp])
  419. op_all_step_comp.append([0.0, bp - fp])
  420. return op_all_step_time, op_all_step_comp
  421. def _get_op_start_time(self):
  422. """Get the op average execution time."""
  423. op_start_time = []
  424. _timeline_file_path = os.path.join(self._output_dir, self._timeline_data_filename)
  425. if not os.path.exists(_timeline_file_path):
  426. logger.critical(f'The {_timeline_file_path} file does not exist.')
  427. raise ProfilerFileNotFoundException(_timeline_file_path)
  428. try:
  429. with open(_timeline_file_path, 'r') as f:
  430. lines = f.readlines()
  431. op_avg_time_lines = lines[1:]
  432. for op_avg_idx in op_avg_time_lines:
  433. line = op_avg_idx.split(',')
  434. op_name = line[0]
  435. op_start = float(line[2])
  436. op_start_time.append([op_name, op_start])
  437. except (IOError, OSError) as err:
  438. logger.critical(f'Error occurred when read {optime_file_path} file: {err}')
  439. raise ProfilerIOException()
  440. if not op_start_time:
  441. logger.warning(f'Empty when read {optime_file_path} file, please check the valid'
  442. 'data of this file.')
  443. return op_start_time
  444. def _add_step_flops_time(self, op_name, task_fops, op_idx, step_idx, op_start_time,
  445. op_all_step_time, op_all_step_comp):
  446. """Get the start time from the current task."""
  447. while((op_idx < len(op_start_time)) and (op_name != op_start_time[op_idx][0])):
  448. op_idx += 1
  449. if op_idx >= len(op_start_time):
  450. logger.info(f"Op name {op_name} does not exist in timeline dict.")
  451. return op_idx, step_idx, op_all_step_comp
  452. # do not add the op FLOPS that not in fp_and_bp time.
  453. while((step_idx < len(op_all_step_time)) and
  454. (op_start_time[op_idx][1] >= op_all_step_time[step_idx][1])):
  455. step_idx += 1
  456. if step_idx >= len(op_all_step_time):
  457. logger.info(f"Op name {op_name} does not exist in timeline dict.")
  458. # add the op FLOPS that in fp_and_bp time.
  459. if ((step_idx < len(op_all_step_time)) and
  460. (op_start_time[op_idx][1] >= op_all_step_time[step_idx][0]) and
  461. (op_start_time[op_idx][1] <= op_all_step_time[step_idx][1])):
  462. op_all_step_comp[step_idx][0] += task_fops
  463. # next op.
  464. op_idx += 1
  465. return op_idx, step_idx, op_all_step_comp