You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

convert_async.py 14 kB

5 years ago
5 years ago
5 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325
  1. # Copyright 2021 Huawei Technologies Co., Ltd
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. # ============================================================================
  15. """
  16. Module to provide conversion capabalities from .timestamp async dump files to .npy.
  17. It's an internal module for debugger backend but not exposed to users.
  18. """
  19. import os
  20. import glob
  21. import stat
  22. import sys
  23. from pathlib import Path
  24. from importlib import import_module
  25. from collections import namedtuple
  26. import numpy as np
  27. class ConvertToolLoader:
  28. """
  29. Module to load CANN conversion tool.
  30. """
  31. def __init__(self):
  32. self.utils = None
  33. self.common = None
  34. self.dump_data_parser = None
  35. self.format_conversion = None
  36. self.progress = None
  37. self.log = None
  38. self.compare_none_error = None
  39. self.compare_exception = None
  40. self.toolkit_path = self.find_toolkit_path()
  41. self.load_convert_tool()
  42. @staticmethod
  43. def find_toolkit_path():
  44. """
  45. Find the path to Ascend toolkit.
  46. """
  47. ascend_toolkit_path = os.getenv("ASCEND_TOOLKIT_PATH")
  48. if not ascend_toolkit_path:
  49. ascend_toolkit_path = "/usr/local/Ascend"
  50. if not os.path.exists(ascend_toolkit_path):
  51. raise ValueError(
  52. "Path {} does not exist. Please install Ascend run packages " \
  53. "and set the environment variable $ASCEND_TOOLKIT_PATH correctly.".format(ascend_toolkit_path))
  54. toolkit_search_path = Path(ascend_toolkit_path).resolve()
  55. msaccucmp_file_list = list(toolkit_search_path.rglob('msaccucmp.py*'))
  56. if not msaccucmp_file_list:
  57. toolkit_search_path = toolkit_search_path / 'tools'
  58. msaccucmp_file_list = list(toolkit_search_path.rglob('msaccucmp.py*'))
  59. if not msaccucmp_file_list:
  60. raise ValueError("Failed to find msaccucmp.py or msaccucmp.pyc file under {}. " \
  61. "Please install Ascend toolkit.".format(ascend_toolkit_path))
  62. return msaccucmp_file_list[0].parent
  63. def load_convert_tool(self):
  64. """
  65. Load CANN conversion tool from the toolkit path.
  66. """
  67. # add toolkit path to system searching module path
  68. if str(self.toolkit_path) not in sys.path:
  69. sys.path.insert(0, str(self.toolkit_path))
  70. try:
  71. self.utils = import_module('utils')
  72. self.common = import_module('common')
  73. self.dump_data_parser = import_module(
  74. 'dump_data_parser').DumpDataParser
  75. self.format_conversion = import_module(
  76. 'shape_conversion').FormatConversionMain
  77. except ModuleNotFoundError:
  78. self.reset_system_path()
  79. raise ModuleNotFoundError(
  80. "Failed to load CANN conversion tools under {}. Please make sure Ascend " \
  81. "toolkit has been installed properly.".format(self.toolkit_path))
  82. try:
  83. self.progress = import_module("progress").Progress
  84. except (ModuleNotFoundError, AttributeError):
  85. self.progress = self.utils.Progress
  86. try:
  87. self.log = import_module("log")
  88. if not hasattr(self.log, "print_error_log"):
  89. raise ModuleNotFoundError
  90. except ModuleNotFoundError:
  91. self.log = self.utils
  92. try:
  93. compare_error = import_module("compare_error")
  94. self.compare_none_error = compare_error.CompareError.MSACCUCMP_NONE_ERROR
  95. self.compare_exception = compare_error.CompareError
  96. except ModuleNotFoundError:
  97. self.compare_none_error = self.utils.VECTOR_COMPARISON_NONE_ERROR
  98. self.compare_exception = self.utils.CompareError
  99. def reset_system_path(self):
  100. """
  101. Restore system searching module path
  102. """
  103. if str(self.toolkit_path) in sys.path:
  104. sys.path.remove(str(self.toolkit_path))
  105. def parse_args(file_list, output_path):
  106. """
  107. Helper function to parse the input argument for the conversion configuration.
  108. """
  109. args_dict = dict()
  110. args_dict['dump_version'] = '2.0'
  111. args_dict['format'] = 'NCHW'
  112. args_dict['output_file_type'] = 'npy'
  113. args_dict['dump_path'] = output_path
  114. args_dict['output_path'] = output_path
  115. args_dict['file_list'] = file_list
  116. args_dict['input'] = None
  117. args_dict['output'] = None
  118. args_dict['shape'] = None
  119. args_dict['custom_script_path'] = None
  120. args_parser = namedtuple("args_parser", args_dict.keys())
  121. return args_parser(**args_dict)
  122. class AsyncDumpConverter:
  123. """
  124. Convert the target async dump data into npy files.
  125. """
  126. def __init__(self, file_list, output_path):
  127. # check input path
  128. file_list = [os.path.realpath(file_item) for file_item in file_list]
  129. output_path = os.path.realpath(output_path)
  130. self.convert_tool = ConvertToolLoader()
  131. self.args = parse_args(file_list, output_path)
  132. self.files_to_convert = self.args.file_list
  133. self.output_path = self.args.output_path
  134. self.failed_file_path = os.path.join(
  135. self.output_path, 'convert_failed_file_list.txt')
  136. self.clear_failed_list_file()
  137. def clear_failed_list_file(self):
  138. """
  139. Remove existing failed txt file.
  140. """
  141. if self.failed_file_path and os.path.exists(self.failed_file_path):
  142. os.remove(self.failed_file_path)
  143. def convert_files(self):
  144. """
  145. Main entry of the converter to convert async dump files into npy format.
  146. """
  147. self.convert_tool.log.print_info_log('Start to convert async dump files.')
  148. try:
  149. if self.args.format is not None:
  150. convert = self.convert_tool.format_conversion(self.args)
  151. else:
  152. convert = self.convert_tool.dump_data_parser(self.args)
  153. # 1. check if arguments are valid
  154. convert.check_arguments_valid()
  155. # 2. convert format for dump data
  156. ret_code = self.handle_multi_process(convert, self.files_to_convert)
  157. self._rename_generated_npy_files()
  158. if ret_code != self.convert_tool.compare_none_error:
  159. if os.path.exists(self.failed_file_path):
  160. self.convert_failed_tensors()
  161. finally:
  162. # clean up sys.path no matter conversion is successful or not to avoid pollution
  163. self.convert_tool.reset_system_path()
  164. self.convert_tool.log.print_info_log('Finish to convert async dump files.')
  165. def convert_failed_tensors(self):
  166. """
  167. Convert the failed tensor recorded in the failed txt file.
  168. """
  169. self.convert_tool.log.print_info_log(
  170. 'Start to convert failed tensors recorded in ' + self.failed_file_path + '.')
  171. with open(self.failed_file_path) as failed_lines:
  172. for failed_line in failed_lines:
  173. try:
  174. failed_line_list = failed_line.rstrip().split(',')
  175. self.convert_one_failed_tensor(failed_line_list)
  176. except (ValueError, OSError, AttributeError, self.convert_tool.compare_exception) as err:
  177. self.convert_tool.log.print_error_log(
  178. 'Failed to convert ' + failed_line + ' to Host format: ' + str(err))
  179. def convert_one_failed_tensor(self, failed_tensor):
  180. """
  181. Convert failed operator one by one.
  182. """
  183. if len(failed_tensor) <= 1:
  184. raise ValueError(
  185. "Invalid tensor info in convert_failed_file_list.txt")
  186. file_path = failed_tensor[0]
  187. type_index = failed_tensor[1:]
  188. op_data = self.convert_tool.utils.parse_dump_file(
  189. file_path, self.args.dump_version)
  190. for type_index_item in type_index:
  191. tensor_type, index = type_index_item.split(':')
  192. index = int(index)
  193. tensor = getattr(op_data, tensor_type)[index]
  194. dump_data_array = self.convert_tool.utils.deserialize_dump_data_to_array(tensor)
  195. array = dump_data_array.reshape(tensor.shape.dim)
  196. out_path = self._generate_path(file_path, tensor_type, index, tensor.format)
  197. self._save_tensor_to_npy_file(out_path, array)
  198. def handle_multi_process(self, convert_obj, files):
  199. """
  200. Convert async format files to npy in a multithreaded manner.
  201. """
  202. return_code = self.convert_tool.compare_none_error
  203. # try looking for function in compatibility with the toolkit package version.
  204. progress = self.convert_tool.progress(len(files))
  205. if hasattr(convert_obj, 'multi_process'):
  206. setattr(convert_obj.multi_process, '_progress', progress)
  207. else:
  208. setattr(convert_obj, 'progress', progress)
  209. multi_process_file_list, big_file_list = self._get_file_list(files, convert_obj)
  210. if multi_process_file_list:
  211. if hasattr(convert_obj, 'multi_process'):
  212. ret_mp = getattr(convert_obj.multi_process, '_do_multi_process')(multi_process_file_list)
  213. else:
  214. ret_mp = getattr(convert_obj, '_do_multi_process')(multi_process_file_list)
  215. if ret_mp != self.convert_tool.compare_none_error:
  216. return_code = ret_mp
  217. if big_file_list:
  218. ret_bf = self._process_big_file(big_file_list, convert_obj)
  219. if ret_bf != self.convert_tool.compare_none_error:
  220. return_code = ret_bf
  221. if return_code != self.convert_tool.compare_none_error:
  222. if os.path.exists(self.failed_file_path):
  223. self.convert_tool.log.print_info_log(
  224. 'The list of file that failed to convert has been written to "'
  225. + self.failed_file_path + '".')
  226. return return_code
  227. def _get_file_list(self, files, convert_obj):
  228. """
  229. Process to get file lists in multi_process.
  230. """
  231. multi_process_file_list = []
  232. big_file_list = []
  233. max_file_size = 0
  234. if hasattr(convert_obj, 'multi_process'):
  235. max_file_size = getattr(convert_obj.multi_process, 'get_max_file_size')()
  236. else:
  237. max_file_size = getattr(convert_obj, '_get_max_file_size')()
  238. for cur_file in files:
  239. cur_path = cur_file
  240. if os.path.isfile(cur_path):
  241. if os.path.getsize(cur_path) > max_file_size:
  242. big_file_list.append(cur_path)
  243. else:
  244. multi_process_file_list.append(cur_path)
  245. return multi_process_file_list, big_file_list
  246. def _process_big_file(self, big_file_list, convert_obj):
  247. """
  248. Process big file in multi_process.
  249. """
  250. return_code = self.convert_tool.compare_none_error
  251. for big_file in big_file_list:
  252. if hasattr(convert_obj, '_convert_format_for_one_file'):
  253. ret_bf, _ = getattr(convert_obj, '_convert_format_for_one_file')(big_file)
  254. else:
  255. ret_bf, _ = getattr(convert_obj, 'convert_format_for_one_file')(big_file)
  256. if hasattr(convert_obj, 'multi_process'):
  257. getattr(convert_obj.multi_process, '_handle_result_callback')([ret_bf, big_file])
  258. else:
  259. getattr(convert_obj, '_handle_result_callback')([ret_bf, big_file])
  260. if ret_bf != self.convert_tool.compare_none_error:
  261. return_code = ret_bf
  262. return return_code
  263. @staticmethod
  264. def _save_tensor_to_npy_file(out_path, dump_data_array):
  265. """
  266. Save tensor file into npy format.
  267. """
  268. np.save(out_path, dump_data_array)
  269. os.chmod(out_path, stat.S_IRUSR)
  270. def _generate_path(self, file_path, tensor_type, idx, tensor_format):
  271. """
  272. Generate path and filename to the target npy files
  273. """
  274. file_name = os.path.basename(file_path)
  275. name_splits = file_name.split('.')
  276. name_splits[1] = name_splits[1].split('_')[-1]
  277. file_name_no_scope = '.'.join(name_splits)
  278. out_file_name = "%s.%s.%d.%s.npy" % (
  279. file_name_no_scope,
  280. tensor_type,
  281. idx,
  282. self.convert_tool.common.get_format_string(tensor_format)
  283. )
  284. return os.path.join(self.output_path, out_file_name)
  285. def _rename_generated_npy_files(self):
  286. """
  287. In order to follow dump naming convention, rename npy files generated by CANN conversion tool.
  288. """
  289. target_file_list = []
  290. for in_file in self.files_to_convert:
  291. target_file_list.extend(glob.glob(in_file + "*.npy"))
  292. for target_file in target_file_list:
  293. old_filename = os.path.basename(target_file)
  294. name_splits = old_filename.split('.')
  295. name_splits[1] = name_splits[1].split('_')[-1]
  296. name_splits[-2] = self.args.format
  297. new_file_name = '.'.join(name_splits)
  298. out_path = os.path.join(self.output_path, new_file_name)
  299. os.rename(target_file, out_path)
  300. os.chmod(out_path, stat.S_IRUSR)
  301. self.convert_tool.log.print_info_log("Rename file " + target_file + " to " + out_path)