You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

convert_async.py 10 kB

5 years ago
5 years ago
5 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242
  1. # Copyright 2021 Huawei Technologies Co., Ltd
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. # ============================================================================
  15. """
  16. Module to provide conversion capabalities from .timestamp async dump files to .npy.
  17. It's an internal module for debugger backend but not exposed to users.
  18. """
  19. import os
  20. import sys
  21. from pathlib import Path
  22. from importlib import import_module
  23. from collections import namedtuple
  24. class ConvertToolLoader:
  25. """
  26. Module to load CANN conversion tool.
  27. """
  28. def __init__(self):
  29. self.utils = None
  30. self.common = None
  31. self.dump_data_parser = None
  32. self.format_conversion = None
  33. self.progress = None
  34. self.log = None
  35. self.compare_none_error = None
  36. self.compare_exception = None
  37. self.toolkit_path = self.find_toolkit_path()
  38. self.load_convert_tool()
  39. @staticmethod
  40. def find_toolkit_path():
  41. """
  42. Find the path to Ascend toolkit.
  43. """
  44. ascend_toolkit_path = os.getenv("ASCEND_TOOLKIT_PATH")
  45. if not ascend_toolkit_path:
  46. ascend_toolkit_path = "/usr/local/Ascend"
  47. if not os.path.exists(ascend_toolkit_path):
  48. raise ValueError(
  49. "Path {} does not exist. Please install Ascend run packages " \
  50. "and set the environment variable $ASCEND_TOOLKIT_PATH correctly.".format(ascend_toolkit_path))
  51. toolkit_search_path = Path(ascend_toolkit_path).resolve()
  52. msaccucmp_file_list = list(toolkit_search_path.rglob('msaccucmp.py*'))
  53. if not msaccucmp_file_list:
  54. toolkit_search_path = toolkit_search_path / 'tools'
  55. msaccucmp_file_list = list(toolkit_search_path.rglob('msaccucmp.py*'))
  56. if not msaccucmp_file_list:
  57. raise ValueError("Failed to find msaccucmp.py or msaccucmp.pyc file under {}. " \
  58. "Please install Ascend toolkit.".format(ascend_toolkit_path))
  59. return msaccucmp_file_list[0].parent
  60. def load_convert_tool(self):
  61. """
  62. Load CANN conversion tool from the toolkit path.
  63. """
  64. # add toolkit path to system searching module path
  65. if str(self.toolkit_path) not in sys.path:
  66. sys.path.insert(0, str(self.toolkit_path))
  67. try:
  68. self.utils = import_module('utils')
  69. self.common = import_module('common')
  70. self.dump_data_parser = import_module(
  71. 'dump_data_parser').DumpDataParser
  72. self.format_conversion = import_module(
  73. 'shape_conversion').FormatConversionMain
  74. except ModuleNotFoundError:
  75. self.reset_system_path()
  76. raise ModuleNotFoundError(
  77. "Failed to load CANN conversion tools under {}. Please make sure Ascend " \
  78. "toolkit has been installed properly.".format(self.toolkit_path))
  79. try:
  80. self.progress = import_module("progress").Progress
  81. except (ModuleNotFoundError, AttributeError):
  82. self.progress = self.utils.Progress
  83. try:
  84. self.log = import_module("log")
  85. if not hasattr(self.log, "print_error_log"):
  86. raise ModuleNotFoundError
  87. except ModuleNotFoundError:
  88. self.log = self.utils
  89. try:
  90. compare_error = import_module("compare_error")
  91. self.compare_none_error = compare_error.CompareError.MSACCUCMP_NONE_ERROR
  92. self.compare_exception = compare_error.CompareError
  93. except ModuleNotFoundError:
  94. self.compare_none_error = self.utils.VECTOR_COMPARISON_NONE_ERROR
  95. self.compare_exception = self.utils.CompareError
  96. def reset_system_path(self):
  97. """
  98. Restore system searching module path
  99. """
  100. if str(self.toolkit_path) in sys.path:
  101. sys.path.remove(str(self.toolkit_path))
  102. def parse_args(file_list, output_path):
  103. """
  104. Helper function to parse the input argument for the conversion configuration.
  105. """
  106. args_dict = dict()
  107. args_dict['dump_version'] = '2.0'
  108. args_dict['format'] = 'NCHW'
  109. args_dict['output_file_type'] = 'msnpy'
  110. args_dict['dump_path'] = output_path
  111. args_dict['output_path'] = output_path
  112. args_dict['file_list'] = file_list
  113. args_dict['input'] = None
  114. args_dict['output'] = None
  115. args_dict['shape'] = None
  116. args_dict['custom_script_path'] = None
  117. args_parser = namedtuple("args_parser", args_dict.keys())
  118. return args_parser(**args_dict)
  119. class AsyncDumpConverter:
  120. """
  121. Convert the target async dump data into npy files.
  122. """
  123. def __init__(self, file_list, output_path):
  124. # check input path
  125. file_list = [os.path.realpath(file_item) for file_item in file_list]
  126. output_path = os.path.realpath(output_path)
  127. self.convert_tool = ConvertToolLoader()
  128. self.args = parse_args(file_list, output_path)
  129. self.files_to_convert = self.args.file_list
  130. self.output_path = self.args.output_path
  131. self.failed_file_path = os.path.join(
  132. self.output_path, 'convert_failed_file_list.txt')
  133. self.clear_failed_list_file()
  134. def clear_failed_list_file(self):
  135. """
  136. Remove existing failed txt file.
  137. """
  138. if self.failed_file_path and os.path.exists(self.failed_file_path):
  139. os.remove(self.failed_file_path)
  140. def convert_files(self):
  141. """
  142. Main entry of the converter to convert async dump files into npy format.
  143. """
  144. self.convert_tool.log.print_info_log('Start to convert async dump files.')
  145. try:
  146. if self.args.format is not None:
  147. convert = self.convert_tool.format_conversion(self.args)
  148. else:
  149. convert = self.convert_tool.dump_data_parser(self.args)
  150. # 1. check if arguments are valid
  151. convert.check_arguments_valid()
  152. # 2. convert format for dump data
  153. ret_code = self.handle_multi_process(convert, self.files_to_convert)
  154. if ret_code != self.convert_tool.compare_none_error:
  155. self.convert_tool.log.print_info_log('An error has occurred while converting format.')
  156. finally:
  157. # clean up sys.path no matter conversion is successful or not to avoid pollution
  158. self.convert_tool.reset_system_path()
  159. self.convert_tool.log.print_info_log('Finish to convert async dump files.')
  160. def handle_multi_process(self, convert_obj, files):
  161. """
  162. Convert async format files to npy in a multithreaded manner.
  163. """
  164. return_code = self.convert_tool.compare_none_error
  165. # try looking for function in compatibility with the toolkit package version.
  166. progress = self.convert_tool.progress(len(files))
  167. if hasattr(convert_obj, 'multi_process'):
  168. setattr(convert_obj.multi_process, '_progress', progress)
  169. else:
  170. setattr(convert_obj, 'progress', progress)
  171. multi_process_file_list, big_file_list = self._get_file_list(files, convert_obj)
  172. if multi_process_file_list:
  173. if hasattr(convert_obj, 'multi_process'):
  174. ret_mp = getattr(convert_obj.multi_process, '_do_multi_process')(multi_process_file_list)
  175. else:
  176. ret_mp = getattr(convert_obj, '_do_multi_process')(multi_process_file_list)
  177. if ret_mp != self.convert_tool.compare_none_error:
  178. return_code = ret_mp
  179. if big_file_list:
  180. ret_bf = self._process_big_file(big_file_list, convert_obj)
  181. if ret_bf != self.convert_tool.compare_none_error:
  182. return_code = ret_bf
  183. if return_code != self.convert_tool.compare_none_error:
  184. if os.path.exists(self.failed_file_path):
  185. self.convert_tool.log.print_info_log(
  186. 'The list of file that failed to convert has been written to "'
  187. + self.failed_file_path + '".')
  188. return return_code
  189. def _get_file_list(self, files, convert_obj):
  190. """
  191. Process to get file lists in multi_process.
  192. """
  193. multi_process_file_list = []
  194. big_file_list = []
  195. max_file_size = 0
  196. if hasattr(convert_obj, 'multi_process'):
  197. max_file_size = getattr(convert_obj.multi_process, 'get_max_file_size')()
  198. else:
  199. max_file_size = getattr(convert_obj, '_get_max_file_size')()
  200. for cur_file in files:
  201. cur_path = cur_file
  202. if os.path.isfile(cur_path):
  203. if os.path.getsize(cur_path) > max_file_size:
  204. big_file_list.append(cur_path)
  205. else:
  206. multi_process_file_list.append(cur_path)
  207. return multi_process_file_list, big_file_list
  208. def _process_big_file(self, big_file_list, convert_obj):
  209. """
  210. Process big file in multi_process.
  211. """
  212. return_code = self.convert_tool.compare_none_error
  213. for big_file in big_file_list:
  214. if hasattr(convert_obj, '_convert_format_for_one_file'):
  215. ret_bf, _ = getattr(convert_obj, '_convert_format_for_one_file')(big_file)
  216. else:
  217. ret_bf, _ = getattr(convert_obj, 'convert_format_for_one_file')(big_file)
  218. if hasattr(convert_obj, 'multi_process'):
  219. getattr(convert_obj.multi_process, '_handle_result_callback')([ret_bf, big_file])
  220. else:
  221. getattr(convert_obj, '_handle_result_callback')([ret_bf, big_file])
  222. if ret_bf != self.convert_tool.compare_none_error:
  223. return_code = ret_bf
  224. return return_code