You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

dump_test_utils.py 3.0 kB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768
  1. # Copyright 2021 Huawei Technologies Co., Ltd
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. # ==============================================================================
  15. """
  16. Utils for testing offline debugger.
  17. """
  18. import os
  19. import tempfile
  20. import bisect
  21. import csv
  22. import numpy as np
  23. def build_dump_structure(path, tensor_name_list, tensor_list, net_name, tensor_info_list):
  24. """Build dump file structure from tensor_list."""
  25. ranks_run_history = {}
  26. temp_dir = tempfile.mkdtemp(prefix=net_name, dir=path)
  27. for tensor_name, tensor, tensor_info in zip(tensor_name_list, tensor_list, tensor_info_list):
  28. slot = str(tensor_info.slot)
  29. iteration = str(tensor_info.iteration)
  30. rank_id = str(tensor_info.rank_id)
  31. root_graph_id = str(tensor_info.root_graph_id)
  32. is_output = str(tensor_info.is_output)
  33. if rank_id not in ranks_run_history:
  34. graphs_run_history = {}
  35. ranks_run_history[rank_id] = graphs_run_history
  36. if root_graph_id not in ranks_run_history[rank_id]:
  37. iter_list = []
  38. iter_list.append(iteration)
  39. graphs_run_history[root_graph_id] = iter_list
  40. elif iteration not in graphs_run_history[root_graph_id]:
  41. bisect.insort(graphs_run_history[root_graph_id], iteration)
  42. path = os.path.join(temp_dir, "rank_" + rank_id, net_name, root_graph_id, iteration)
  43. os.makedirs(path, exist_ok=True)
  44. if is_output == "True":
  45. file = tempfile.mkstemp(prefix=tensor_name, suffix=".output." + slot +
  46. ".DefaultFormat.npy", dir=path)
  47. else:
  48. file = tempfile.mkstemp(prefix=tensor_name, suffix=".input." + slot +
  49. ".DefaultFormat.npy", dir=path)
  50. full_path = file[1]
  51. np.save(full_path, tensor)
  52. build_global_execution_order(temp_dir, ranks_run_history)
  53. return temp_dir
  54. def build_global_execution_order(path, ranks_run_history):
  55. for rank_id in ranks_run_history.keys():
  56. exec_order_path = path + "/rank_" + rank_id + "/" + "execution_order"
  57. os.makedirs(exec_order_path, exist_ok=True)
  58. for graph in ranks_run_history[rank_id].keys():
  59. full_path = os.path.join(exec_order_path, "ms_global_execution_order_graph_" + graph + ".csv")
  60. with open(full_path, 'w+', newline='') as csv_file:
  61. write = csv.writer(csv_file)
  62. write.writerows(ranks_run_history[rank_id][graph])