You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

dump_test_utils.py 8.3 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230
  1. # Copyright 2021 Huawei Technologies Co., Ltd
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. # ============================================================================
  15. """
  16. Utils for testing dump feature.
  17. """
  18. import json
  19. import os
  20. async_dump_dict = {
  21. "common_dump_settings": {
  22. "dump_mode": 0,
  23. "path": "",
  24. "net_name": "Net",
  25. "iteration": "0",
  26. "input_output": 2,
  27. "kernels": ["Default/TensorAdd-op3"],
  28. "support_device": [0, 1, 2, 3, 4, 5, 6, 7],
  29. "op_debug_mode": 0
  30. }
  31. }
  32. e2e_dump_dict = {
  33. "common_dump_settings": {
  34. "dump_mode": 0,
  35. "path": "",
  36. "net_name": "Net",
  37. "iteration": "0",
  38. "input_output": 0,
  39. "kernels": ["Default/Conv-op12"],
  40. "support_device": [0, 1, 2, 3, 4, 5, 6, 7],
  41. "op_debug_mode": 0
  42. },
  43. "e2e_dump_settings": {
  44. "enable": True,
  45. "trans_flag": False
  46. }
  47. }
  48. async_dump_dict_2 = {
  49. "common_dump_settings": {
  50. "dump_mode": 0,
  51. "path": "/tmp/async_dump/test_async_dump_net_multi_layer_mode1",
  52. "net_name": "test",
  53. "iteration": "0",
  54. "input_output": 2,
  55. "kernels": [
  56. "default/TensorAdd-op10",
  57. "Gradients/Default/network-WithLossCell/_backbone-ReLUReduceMeanDenseRelu/dense-Dense/gradBiasAdd/"\
  58. "BiasAddGrad-op8",
  59. "Default/network-WithLossCell/_loss_fn-SoftmaxCrossEntropyWithLogits/SoftmaxCrossEntropyWithLogits-op5",
  60. "Default/optimizer-Momentum/tuple_getitem-op29",
  61. "Default/optimizer-Momentum/ApplyMomentum-op12"
  62. ],
  63. "support_device": [0, 1, 2, 3, 4, 5, 6, 7],
  64. "op_debug_mode": 0
  65. }
  66. }
  67. e2e_dump_dict_2 = {
  68. "common_dump_settings": {
  69. "dump_mode": 0,
  70. "path": "",
  71. "net_name": "Net",
  72. "iteration": "all",
  73. "input_output": 0,
  74. "kernels": ["Default/Conv-op12"],
  75. "support_device": [0, 1, 2, 3, 4, 5, 6, 7],
  76. "op_debug_mode": 0
  77. },
  78. "e2e_dump_settings": {
  79. "enable": True,
  80. "trans_flag": False
  81. }
  82. }
  83. async_dump_dict_3 = {
  84. "common_dump_settings": {
  85. "dump_mode": 0,
  86. "path": "",
  87. "net_name": "Net",
  88. "iteration": "all",
  89. "input_output": 2,
  90. "kernels": ["Default/TensorAdd-op3"],
  91. "support_device": [0, 1, 2, 3, 4, 5, 6, 7],
  92. "op_debug_mode": 0
  93. }
  94. }
  95. def generate_dump_json(dump_path, json_file_name, test_key):
  96. """
  97. Util function to generate dump configuration json file.
  98. """
  99. if test_key == "test_async_dump":
  100. data = async_dump_dict
  101. data["common_dump_settings"]["path"] = dump_path
  102. elif test_key == "test_e2e_dump":
  103. data = e2e_dump_dict
  104. data["common_dump_settings"]["path"] = dump_path
  105. elif test_key == "test_async_dump_net_multi_layer_mode1":
  106. data = async_dump_dict_2
  107. data["common_dump_settings"]["path"] = dump_path
  108. elif test_key in ("test_GPU_e2e_multi_root_graph_dump", "test_Ascend_e2e_multi_root_graph_dump"):
  109. data = e2e_dump_dict_2
  110. data["common_dump_settings"]["path"] = dump_path
  111. elif test_key == "test_Ascend_async_multi_root_graph_dump":
  112. data = async_dump_dict_3
  113. data["common_dump_settings"]["path"] = dump_path
  114. elif test_key == "test_async_dump_npy":
  115. data = async_dump_dict
  116. data["common_dump_settings"]["path"] = dump_path
  117. data["common_dump_settings"]["file_format"] = "npy"
  118. elif test_key == "test_async_dump_bin":
  119. data = async_dump_dict
  120. data["common_dump_settings"]["path"] = dump_path
  121. data["common_dump_settings"]["file_format"] = "bin"
  122. else:
  123. raise ValueError(
  124. "Failed to generate dump json file. The test name value " + test_key + " is invalid.")
  125. with open(json_file_name, 'w') as f:
  126. json.dump(data, f)
  127. def generate_dump_json_with_overflow(dump_path, json_file_name, test_key, op):
  128. """
  129. Util function to generate dump configuration json file.
  130. """
  131. if test_key == "test_async_dump":
  132. data = async_dump_dict
  133. data["common_dump_settings"]["path"] = dump_path
  134. data["common_dump_settings"]["op_debug_mode"] = op
  135. elif test_key == "test_async_dump_npy":
  136. data = async_dump_dict
  137. data["common_dump_settings"]["path"] = dump_path
  138. data["common_dump_settings"]["op_debug_mode"] = op
  139. data["common_dump_settings"]["file_format"] = "npy"
  140. else:
  141. raise ValueError(
  142. "Failed to generate dump json file. Overflow only support in async dump")
  143. with open(json_file_name, 'w') as f:
  144. json.dump(data, f)
  145. def generate_statistic_dump_json(dump_path, json_file_name, test_key, saved_data):
  146. """
  147. Util function to generate dump configuration json file for statistic dump.
  148. """
  149. if test_key == "test_gpu_e2e_dump":
  150. data = e2e_dump_dict
  151. elif test_key == "test_async_dump":
  152. data = async_dump_dict
  153. data["common_dump_settings"]["input_output"] = 0
  154. data["common_dump_settings"]["file_format"] = "npy"
  155. else:
  156. raise ValueError(
  157. "Failed to generate statistic dump json file. The test name value " + test_key + " is invalid.")
  158. data["common_dump_settings"]["path"] = dump_path
  159. data["common_dump_settings"]["saved_data"] = saved_data
  160. with open(json_file_name, 'w') as f:
  161. json.dump(data, f)
  162. def generate_cell_dump_json(dump_path, json_file_name, test_key, dump_mode):
  163. """
  164. Util function to generate dump configuration json file.
  165. """
  166. if test_key == "test_async_dump":
  167. data = async_dump_dict
  168. data["common_dump_settings"]["path"] = dump_path
  169. data["common_dump_settings"]["dump_mode"] = dump_mode
  170. else:
  171. raise ValueError(
  172. "Failed to generate dump json file. Overflow only support in async dump")
  173. with open(json_file_name, 'w') as f:
  174. json.dump(data, f)
  175. def check_dump_structure(dump_path, json_file_path, num_card, num_graph, num_iteration):
  176. """
  177. Util to check if the dump structure is correct.
  178. """
  179. with open(json_file_path) as f:
  180. data = json.load(f)
  181. net_name = data["common_dump_settings"]["net_name"]
  182. assert os.path.isdir(dump_path)
  183. for rank_id in range(num_card):
  184. rank_path = os.path.join(dump_path, "rank_"+str(rank_id))
  185. assert os.path.exists(rank_path)
  186. net_name_path = os.path.join(rank_path, net_name)
  187. assert os.path.exists(net_name_path)
  188. graph_path = os.path.join(rank_path, "graphs")
  189. assert os.path.exists(graph_path)
  190. execution_order_path = os.path.join(rank_path, "execution_order")
  191. assert os.path.exists(execution_order_path)
  192. for graph_id in range(num_graph):
  193. graph_id_path = os.path.join(net_name_path, str(graph_id))
  194. assert os.path.exists(graph_id_path)
  195. graph_pb_file = os.path.join(graph_path, "ms_output_trace_code_graph_" + str(graph_id) + ".pb")
  196. graph_ir_file = os.path.join(graph_path, "ms_output_trace_code_graph_" + str(graph_id) + ".ir")
  197. assert os.path.exists(graph_pb_file)
  198. assert os.path.exists(graph_ir_file)
  199. execution_order_file = os.path.join(execution_order_path, "ms_execution_order_graph_"
  200. + str(graph_id) + ".csv")
  201. assert os.path.exists(execution_order_file)
  202. for iteration_id in range(num_iteration):
  203. it_id_path = os.path.join(graph_id_path, str(iteration_id))
  204. assert os.path.isdir(it_id_path)
  205. def find_nth_pos(string, substring, n):
  206. start = string.find(substring)
  207. while n > 1 and start >= 0:
  208. start = string.find(substring, start + len(substring))
  209. n -= 1
  210. return start