You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

dump_test_utils.py 7.7 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217
  1. # Copyright 2021 Huawei Technologies Co., Ltd
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. # ============================================================================
  15. """
  16. Utils for testing dump feature.
  17. """
  18. import json
  19. import os
  20. async_dump_dict = {
  21. "common_dump_settings": {
  22. "dump_mode": 0,
  23. "path": "",
  24. "net_name": "Net",
  25. "iteration": "0",
  26. "input_output": 2,
  27. "kernels": ["Default/TensorAdd-op3"],
  28. "support_device": [0, 1, 2, 3, 4, 5, 6, 7],
  29. "op_debug_mode": 0
  30. }
  31. }
  32. e2e_dump_dict = {
  33. "common_dump_settings": {
  34. "dump_mode": 0,
  35. "path": "",
  36. "net_name": "Net",
  37. "iteration": "0",
  38. "input_output": 0,
  39. "kernels": ["Default/Conv-op12"],
  40. "support_device": [0, 1, 2, 3, 4, 5, 6, 7],
  41. "op_debug_mode": 0
  42. },
  43. "e2e_dump_settings": {
  44. "enable": True,
  45. "trans_flag": False
  46. }
  47. }
  48. async_dump_dict_2 = {
  49. "common_dump_settings": {
  50. "dump_mode": 0,
  51. "path": "/tmp/async_dump/test_async_dump_net_multi_layer_mode1",
  52. "net_name": "test",
  53. "iteration": "0",
  54. "input_output": 2,
  55. "kernels": [
  56. "default/TensorAdd-op10",
  57. "Gradients/Default/network-WithLossCell/_backbone-ReLUReduceMeanDenseRelu/dense-Dense/gradBiasAdd/"\
  58. "BiasAddGrad-op8",
  59. "Default/network-WithLossCell/_loss_fn-SoftmaxCrossEntropyWithLogits/SoftmaxCrossEntropyWithLogits-op5",
  60. "Default/optimizer-Momentum/tuple_getitem-op29",
  61. "Default/optimizer-Momentum/ApplyMomentum-op12"
  62. ],
  63. "support_device": [0, 1, 2, 3, 4, 5, 6, 7],
  64. "op_debug_mode": 0
  65. }
  66. }
  67. e2e_dump_dict_2 = {
  68. "common_dump_settings": {
  69. "dump_mode": 0,
  70. "path": "",
  71. "net_name": "Net",
  72. "iteration": "all",
  73. "input_output": 0,
  74. "kernels": ["Default/Conv-op12"],
  75. "support_device": [0, 1, 2, 3, 4, 5, 6, 7],
  76. "op_debug_mode": 0
  77. },
  78. "e2e_dump_settings": {
  79. "enable": True,
  80. "trans_flag": False
  81. }
  82. }
  83. async_dump_dict_3 = {
  84. "common_dump_settings": {
  85. "dump_mode": 0,
  86. "path": "",
  87. "net_name": "Net",
  88. "iteration": "all",
  89. "input_output": 2,
  90. "kernels": ["Default/TensorAdd-op3"],
  91. "support_device": [0, 1, 2, 3, 4, 5, 6, 7],
  92. "op_debug_mode": 0
  93. }
  94. }
  95. def generate_dump_json(dump_path, json_file_name, test_key):
  96. """
  97. Util function to generate dump configuration json file.
  98. """
  99. if test_key == "test_async_dump":
  100. data = async_dump_dict
  101. data["common_dump_settings"]["path"] = dump_path
  102. elif test_key == "test_e2e_dump":
  103. data = e2e_dump_dict
  104. data["common_dump_settings"]["path"] = dump_path
  105. elif test_key == "test_async_dump_net_multi_layer_mode1":
  106. data = async_dump_dict_2
  107. data["common_dump_settings"]["path"] = dump_path
  108. elif test_key in ("test_GPU_e2e_multi_root_graph_dump", "test_Ascend_e2e_multi_root_graph_dump"):
  109. data = e2e_dump_dict_2
  110. data["common_dump_settings"]["path"] = dump_path
  111. elif test_key == "test_Ascend_async_multi_root_graph_dump":
  112. data = async_dump_dict_3
  113. data["common_dump_settings"]["path"] = dump_path
  114. elif test_key == "test_async_dump_file_format":
  115. data = async_dump_dict
  116. data["common_dump_settings"]["path"] = dump_path
  117. data["common_dump_settings"]["file_format"] = "npy"
  118. else:
  119. raise ValueError(
  120. "Failed to generate dump json file. The test name value " + test_key + " is invalid.")
  121. with open(json_file_name, 'w') as f:
  122. json.dump(data, f)
  123. def generate_dump_json_with_overflow(dump_path, json_file_name, test_key, op):
  124. """
  125. Util function to generate dump configuration json file.
  126. """
  127. if test_key == "test_async_dump":
  128. data = async_dump_dict
  129. data["common_dump_settings"]["path"] = dump_path
  130. data["common_dump_settings"]["op_debug_mode"] = op
  131. else:
  132. raise ValueError(
  133. "Failed to generate dump json file. Overflow only support in async dump")
  134. with open(json_file_name, 'w') as f:
  135. json.dump(data, f)
  136. def generate_statistic_dump_json(dump_path, json_file_name, test_key, saved_data):
  137. """
  138. Util function to generate dump configuration json file for statistic dump.
  139. """
  140. if test_key == "test_gpu_e2e_dump":
  141. data = e2e_dump_dict
  142. data["common_dump_settings"]["path"] = dump_path
  143. data["common_dump_settings"]["saved_data"] = saved_data
  144. else:
  145. raise ValueError(
  146. "Failed to generate statistic dump json file. The test name value " + test_key + " is invalid.")
  147. with open(json_file_name, 'w') as f:
  148. json.dump(data, f)
  149. def generate_cell_dump_json(dump_path, json_file_name, test_key, dump_mode):
  150. """
  151. Util function to generate dump configuration json file.
  152. """
  153. if test_key == "test_async_dump":
  154. data = async_dump_dict
  155. data["common_dump_settings"]["path"] = dump_path
  156. data["common_dump_settings"]["dump_mode"] = dump_mode
  157. else:
  158. raise ValueError(
  159. "Failed to generate dump json file. Overflow only support in async dump")
  160. with open(json_file_name, 'w') as f:
  161. json.dump(data, f)
  162. def check_dump_structure(dump_path, json_file_path, num_card, num_graph, num_iteration):
  163. """
  164. Util to check if the dump structure is correct.
  165. """
  166. with open(json_file_path) as f:
  167. data = json.load(f)
  168. net_name = data["common_dump_settings"]["net_name"]
  169. assert os.path.isdir(dump_path)
  170. for rank_id in range(num_card):
  171. rank_path = os.path.join(dump_path, "rank_"+str(rank_id))
  172. assert os.path.exists(rank_path)
  173. net_name_path = os.path.join(rank_path, net_name)
  174. assert os.path.exists(net_name_path)
  175. graph_path = os.path.join(rank_path, "graphs")
  176. assert os.path.exists(graph_path)
  177. execution_order_path = os.path.join(rank_path, "execution_order")
  178. assert os.path.exists(execution_order_path)
  179. for graph_id in range(num_graph):
  180. graph_id_path = os.path.join(net_name_path, str(graph_id))
  181. assert os.path.exists(graph_id_path)
  182. graph_pb_file = os.path.join(graph_path, "ms_output_trace_code_graph_" + str(graph_id) + ".pb")
  183. graph_ir_file = os.path.join(graph_path, "ms_output_trace_code_graph_" + str(graph_id) + ".ir")
  184. assert os.path.exists(graph_pb_file)
  185. assert os.path.exists(graph_ir_file)
  186. execution_order_file = os.path.join(execution_order_path, "ms_execution_order_graph_"
  187. + str(graph_id) + ".csv")
  188. assert os.path.exists(execution_order_file)
  189. for iteration_id in range(num_iteration):
  190. it_id_path = os.path.join(graph_id_path, str(iteration_id))
  191. assert os.path.isdir(it_id_path)
  192. def find_nth_pos(string, substring, n):
  193. start = string.find(substring)
  194. while n > 1 and start >= 0:
  195. start = string.find(substring, start + len(substring))
  196. n -= 1
  197. return start