You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

parsing_profiling_data.py 13 kB

5 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282
  1. #!/usr/bin/env python3
  2. # coding: utf-8
  3. # Copyright 2019 Huawei Technologies Co., Ltd
  4. #
  5. # Licensed under the Apache License, Version 2.0 (the "License");
  6. # you may not use this file except in compliance with the License.
  7. # You may obtain a copy of the License at
  8. #
  9. # http://www.apache.org/licenses/LICENSE-2.0
  10. #
  11. # Unless required by applicable law or agreed to in writing, software
  12. # distributed under the License is distributed on an "AS IS" BASIS,
  13. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. # See the License for the specific language governing permissions and
  15. # limitations under the License.
  16. """parsing_profiling_data"""
  17. import os
  18. import subprocess
  19. import struct
  20. import re
  21. from tabulate import tabulate
  22. OUTPUT_FORMAT_DATA = "./output_format_data_hwts.txt"
  23. BLOCK_LEN = 32
  24. max_time_consume = 9999999999
  25. def get_log_slice_id(file_name):
  26. pattern = re.compile(r'(?<=slice_)\d+')
  27. slice_ = pattern.findall(file_name)
  28. index = re.findall(r'\d+', slice_[0])
  29. return int(index[0])
  30. def get_file_join_name(input_path=None, file_name=None):
  31. """Function for getting join name from input path."""
  32. name_list = []
  33. file_join_name = ''
  34. if os.path.exists(input_path):
  35. files = os.listdir(input_path)
  36. for f in files:
  37. if file_name in f and not f.endswith('.done') and not f.endswith('.join'):
  38. name_list.append(f)
  39. # resort name_list
  40. name_list.sort(key=get_log_slice_id)
  41. if len(name_list) == 1:
  42. file_join_name = input_path + os.sep + name_list[0]
  43. elif len(name_list) > 1:
  44. file_join_name = input_path + os.sep + '%s.join' % file_name
  45. if os.path.exists(file_join_name):
  46. os.remove(file_join_name)
  47. with open(file_join_name, 'ab') as bin_data:
  48. for i in name_list:
  49. file = input_path + os.sep + i
  50. with open(file, 'rb') as txt:
  51. bin_data.write(txt.read())
  52. return file_join_name
  53. def get_first_runtime_task_trace(input_file=None):
  54. """Function for getting first task trace from runtime."""
  55. result_data = []
  56. format_ = "BBHIQHHHHII"
  57. format_last = "B"
  58. with open(input_file, 'rb') as bin_data:
  59. while True:
  60. line_ = bin_data.read(96)
  61. if line_:
  62. if not line_.strip():
  63. continue
  64. else:
  65. break
  66. if len(line_) == 96:
  67. unpack_tuple = struct.unpack(format_, line_[0:32])
  68. char_string = line_[32:95].decode().strip(b'\x00'.decode())
  69. result_last = [hex(i) for i in struct.unpack(format_last, line_[95:96])]
  70. byte01 = bin(int(result_last[0].replace('0x', ''), 16)).replace('0b', '').zfill(8)
  71. persistant_1bit = byte01[-1]
  72. reserved_7bit = byte01[0:7]
  73. kernelname = char_string
  74. result_data.append((unpack_tuple[0], unpack_tuple[1], unpack_tuple[2], unpack_tuple[3],
  75. unpack_tuple[4], unpack_tuple[5], unpack_tuple[6], unpack_tuple[7],
  76. unpack_tuple[8], unpack_tuple[9], unpack_tuple[10],
  77. kernelname, persistant_1bit, reserved_7bit))
  78. return result_data
  79. def get_44_tsch_fw_timeline(input_file=None):
  80. """Function for getting tsch_fw_timeline from input file."""
  81. result_data = []
  82. format_ = "BBHIHHHHQII"
  83. with open(input_file, 'rb') as bin_data:
  84. while True:
  85. line_ = bin_data.read(32)
  86. if line_:
  87. if not line_.strip():
  88. continue
  89. else:
  90. break
  91. if len(line_) == 32:
  92. result_ = struct.unpack(format_, line_)
  93. result_data.append((result_[0], result_[1], result_[2], result_[3], result_[4], result_[5], result_[6],
  94. result_[7], result_[8], result_[9], result_[10]))
  95. return result_data
  96. def get_43_ai_core_data(input_file=None):
  97. """Function for getting datas from aicore: ov/cnt/total_cyc/ov_cyc/pmu_cnt/stream_id."""
  98. result_data = []
  99. with open(input_file, 'rb') as ai_core_file:
  100. while True:
  101. line_ = ai_core_file.read(128)
  102. if line_:
  103. if not line_.strip():
  104. continue
  105. else:
  106. break
  107. format_ = "BBHHHIIqqqqqqqqqqIIIIIIII"
  108. result_ = [hex(i) for i in struct.unpack(format_, line_)]
  109. byte01 = bin(int(result_[0].replace('0x', ''), 16)).replace('0b', '').zfill(8)
  110. ov = byte01[-4]
  111. cnt = byte01[0:4]
  112. total_cyc = int(result_[7].replace('0x', ''), 16)
  113. ov_cyc = int(result_[8].replace('0x', ''), 16)
  114. pmu_cnt = tuple(int(i.replace('0x', ''), 16) for i in result_[9:17])
  115. stream_id = int(result_[17].replace('0x', ''), 16)
  116. result_data.append((ov, cnt, total_cyc, ov_cyc, stream_id, pmu_cnt))
  117. return result_data
  118. def get_last_tsch_training_trace(input_file=None):
  119. """Function for getting last tsch training trace from input file."""
  120. result_data = []
  121. format_ = "LLHHLL"
  122. with open(input_file, 'rb') as bin_data:
  123. while True:
  124. line_ = bin_data.read(20)
  125. if line_:
  126. if not line_.strip():
  127. continue
  128. else:
  129. break
  130. if len(line_) == 20:
  131. result_ = struct.unpack(format_, line_)
  132. result_data.append((result_[0], result_[1], result_[3], result_[2], result_[4], result_[5]))
  133. return result_data
  134. def get_45_hwts_log(input_file=None):
  135. """Function for getting hwts log from input file."""
  136. format_ = ['QIIIIIIIIIIII', 'QIIQIIIIIIII', 'IIIIQIIIIIIII']
  137. log_type = ['Start of task', 'End of task', 'Start of block', 'End of block', 'Block PMU']
  138. type1, type2, type3 = [], [], []
  139. with open(input_file, 'rb') as hwts_data:
  140. while True:
  141. line_ = hwts_data.read(64)
  142. if line_:
  143. if not line_.strip():
  144. continue
  145. else:
  146. break
  147. byte_first_four = struct.unpack('BBHHH', line_[0:8])
  148. byte_first = bin(byte_first_four[0]).replace('0b', '').zfill(8)
  149. type_ = byte_first[-3:]
  150. is_warn_res0_ov = byte_first[4]
  151. cnt = int(byte_first[0:4], 2)
  152. core_id = byte_first_four[1]
  153. blk_id, task_id = byte_first_four[3], byte_first_four[4]
  154. if type_ in ['000', '001', '010']: # log type 0,1,2
  155. result_ = struct.unpack(format_[0], line_[8:])
  156. syscnt = result_[0]
  157. stream_id = result_[1]
  158. type1.append((log_type[int(type_, 2)], cnt, core_id, blk_id, task_id, syscnt, stream_id))
  159. elif type_ == '011': # log type 3
  160. result_ = struct.unpack(format_[1], line_[8:])
  161. syscnt = result_[0]
  162. stream_id = result_[1]
  163. if is_warn_res0_ov == '1':
  164. warn_status = result_[3]
  165. else:
  166. warn_status = None
  167. type2.append(
  168. (log_type[int(type_, 2)], cnt, is_warn_res0_ov, core_id, blk_id, task_id, syscnt, stream_id,
  169. warn_status))
  170. type1.append((log_type[int(type_, 2)], cnt, core_id, blk_id, task_id, syscnt, stream_id))
  171. elif type_ == '100': # log type 4
  172. result_ = struct.unpack(format_[2], line_[8:])
  173. stream_id = result_[2]
  174. if is_warn_res0_ov == '0':
  175. total_cyc = result_[4]
  176. ov_cyc = None
  177. else:
  178. total_cyc = None
  179. ov_cyc = result_[4]
  180. pmu_events = result_[-8:]
  181. type3.append((log_type[int(type_, 2)], cnt, is_warn_res0_ov, core_id, blk_id, task_id, stream_id,
  182. total_cyc, ov_cyc, pmu_events))
  183. type1.append((log_type[int(type_, 2)], cnt, core_id, blk_id, task_id, total_cyc, stream_id))
  184. return type1, type2, type3
  185. def fwrite_format(output_data_path=OUTPUT_FORMAT_DATA, data_source=None, is_start=False):
  186. if is_start and os.path.exists(OUTPUT_FORMAT_DATA):
  187. os.remove(OUTPUT_FORMAT_DATA)
  188. with open(output_data_path, 'a+') as f:
  189. f.write(data_source)
  190. f.write("\n")
  191. def parsing(source_path):
  192. """Function for parsing aicore data/tsch fw timeline data/HWTS data/last tsch training trace data."""
  193. # subprocess.run("cp -r %s ./jobs/" % source_path, shell=True)
  194. job_name = source_path.split('/')[-1]
  195. job_path = "/var/log/npu/profiling/" + job_name
  196. fwrite_format(data_source='====================starting parse task ==================', is_start=True)
  197. result = get_file_join_name(input_path=job_path, file_name='runtime.host.runtime')
  198. if result:
  199. runtime_task_trace_data = get_first_runtime_task_trace(input_file=result)
  200. fwrite_format(data_source='====================first runtime task trace data==================')
  201. fwrite_format(data_source=tabulate(runtime_task_trace_data,
  202. ['mode', 'rpttype', 'bufsize', 'reserved', 'timestamp', 'eventname',
  203. 'tasktype', 'streamid',
  204. 'task_id', 'thread', 'device_id', 'kernelname', 'persistant_1bit',
  205. 'reserved_7bit'],
  206. tablefmt='simple'))
  207. result = get_file_join_name(input_path=job_path, file_name='aicore.data.43.dev.profiler_default_tag')
  208. if result:
  209. ai_core_data = get_43_ai_core_data(input_file=result)
  210. fwrite_format(data_source='============================43 AI core data =========================')
  211. fwrite_format(data_source=tabulate(ai_core_data,
  212. ['Overflow', 'cnt', 'Total cycles', 'overflowed cycles', 'Stream ID',
  213. 'PMU events'],
  214. tablefmt='simple'))
  215. result = get_file_join_name(input_path=job_path, file_name='ts_track.data.44.dev.profiler_default_tag')
  216. if result:
  217. tsch_fw_timeline_data = get_44_tsch_fw_timeline(input_file=result)
  218. fwrite_format(data_source='============================44 tsch fw timeline data =========================')
  219. fwrite_format(data_source=tabulate(tsch_fw_timeline_data,
  220. ['mode', 'rptType', 'bufSize', 'reserved', 'task_type', 'task_state',
  221. 'stream_id',
  222. 'task_id', 'timestamp', 'thread', 'device_id'], tablefmt='simple'))
  223. result = get_file_join_name(input_path=job_path, file_name='hwts.log.data.45.dev.profiler_default_tag')
  224. start_time = 0
  225. end_time = 0
  226. if result:
  227. data_1, data_2, data_3 = get_45_hwts_log(input_file=result)
  228. fwrite_format(data_source='============================45 HWTS data ============================')
  229. for i in data_1:
  230. if i[0] == 'Start of task' and i[4] == 60000 and start_time == 0:
  231. start_time = i[5]
  232. if i[0] == 'End of task' and i[4] == 60000 and end_time == 0:
  233. end_time = i[5]
  234. fwrite_format(data_source=tabulate(data_1,
  235. ['Type', 'cnt', 'Core ID', 'Block ID', 'Task ID', 'Cycle counter',
  236. 'Stream ID'],
  237. tablefmt='simple'))
  238. fwrite_format(data_source=tabulate(data_2,
  239. ['Type', 'cnt', 'WARN', 'Core ID', 'Block ID', 'Task ID', 'Cycle counter',
  240. 'Stream ID', 'WARN Status'],
  241. tablefmt='simple'))
  242. fwrite_format(data_source=tabulate(data_3,
  243. ['Type', 'cnt', 'Overflow', 'Core ID', 'Block ID', 'Task ID', 'Stream ID',
  244. 'Total cycles',
  245. 'Overflowed cycles',
  246. 'PMU events'], tablefmt='simple'))
  247. result = get_file_join_name(input_path=job_path, file_name='training_trace.dev.profiler_default_tag')
  248. if result:
  249. tsch_training_trace_data = get_last_tsch_training_trace(input_file=result)
  250. fwrite_format(data_source='============================last tsch training_trace data=========================')
  251. fwrite_format(data_source=tabulate(tsch_training_trace_data,
  252. ['id_lo', 'id_hi', 'stream_id', 'task_id', 'syscnt_lo', 'syscnt_hi'],
  253. tablefmt='simple'))
  254. try:
  255. time_consume = abs(int(start_time) - int(end_time))
  256. return time_consume if time_consume != 0 else max_time_consume
  257. except SyntaxError:
  258. return max_time_consume

AKG(Auto Kernel Generator)对深度神经网络中的算子进行优化,并提供特定模式下的算子自动融合功能。AKG与MindSpore的图算融合功能协同工作,可提升在不同硬件后端上运行网络的性能。