|
- # Copyright 2021 Huawei Technologies Co., Ltd
- #
- # Licensed under the Apache License, Version 2.0 (the "License");
- # you may not use this file except in compliance with the License.
- # You may obtain a copy of the License at
- #
- # http://www.apache.org/licenses/LICENSE-2.0
- #
- # Unless required by applicable law or agreed to in writing, software
- # distributed under the License is distributed on an "AS IS" BASIS,
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- # See the License for the specific language governing permissions and
- # limitations under the License.
- # ==============================================================================
- """
- Utils for testing offline debugger.
- """
-
- import os
- import tempfile
- import bisect
- import csv
- import numpy as np
-
-
- def write_watchpoint_to_json(watchpoint_hits):
- parameter_json = []
- for p, _ in enumerate(watchpoint_hits.parameters):
- parameter = "parameter" + str(p)
- parameter_json.append({
- parameter: {
- 'name': watchpoint_hits.parameters[p].name,
- 'disabled': watchpoint_hits.parameters[p].disabled,
- 'value': watchpoint_hits.parameters[p].value,
- 'hit': watchpoint_hits.parameters[p].hit,
- 'actual_value': watchpoint_hits.parameters[p].actual_value
- }
- })
- wp = {
- 'name': watchpoint_hits.name,
- 'slot': watchpoint_hits.slot,
- 'condition': watchpoint_hits.condition,
- 'watchpoint_id': watchpoint_hits.watchpoint_id,
- 'parameter': parameter_json,
- 'error_code': watchpoint_hits.error_code,
- 'rank_id': watchpoint_hits.rank_id,
- 'root_graph_id': watchpoint_hits.root_graph_id
- }
- return wp
-
- def write_tensor_to_json(tensor_info, tensor_data):
- data = np.frombuffer(
- tensor_data.data_ptr, np.uint8, tensor_data.data_size).tolist()
- py_byte_size = len(tensor_data.data_ptr)
- c_byte_size = tensor_data.data_size
- if c_byte_size != py_byte_size:
- print("The python byte size of " + str(py_byte_size) +
- " does not match the C++ byte size of " + str(c_byte_size) + "\n")
- tensor = {
- 'tensor_info': {
- 'node_name': tensor_info.node_name,
- 'slot': tensor_info.slot,
- 'iteration': tensor_info.iteration,
- 'rank_id': tensor_info.rank_id,
- 'root_graph_id': tensor_info.root_graph_id,
- 'is_output': tensor_info.is_output
- },
- 'tensor_data': {
- 'data': data,
- 'size_in_bytes': tensor_data.data_size,
- 'debugger_dtype': tensor_data.dtype,
- 'shape': tensor_data.shape
- }
- }
- return tensor
-
- def build_dump_structure(path, tensor_name_list, tensor_list, net_name, tensor_info_list):
- """Build dump file structure from tensor_list."""
- ranks_run_history = {}
- temp_dir = tempfile.mkdtemp(prefix=net_name, dir=path)
- for tensor_name, tensor, tensor_info in zip(tensor_name_list, tensor_list, tensor_info_list):
- slot = str(tensor_info.slot)
- iteration = str(tensor_info.iteration)
- rank_id = str(tensor_info.rank_id)
- root_graph_id = str(tensor_info.root_graph_id)
- is_output = str(tensor_info.is_output)
- graphs_run_history = ranks_run_history.get(rank_id)
- if graphs_run_history is None:
- graphs_run_history = {}
- ranks_run_history[rank_id] = graphs_run_history
- if root_graph_id not in graphs_run_history:
- graphs_run_history[root_graph_id] = [iteration]
- if iteration not in graphs_run_history[root_graph_id]:
- bisect.insort(graphs_run_history[root_graph_id], iteration)
-
- path = os.path.join(temp_dir, "rank_" + rank_id, net_name, root_graph_id, iteration)
- os.makedirs(path, exist_ok=True)
- if is_output == "True":
- file_name = f'{tensor_name}.output.{slot}.DefaultFormat.npy'
- else:
- file_name = f'{tensor_name}.input.{slot}.DefaultFormat.npy'
- full_path = os.path.join(path, file_name)
- np.save(full_path, tensor)
- build_global_execution_order(temp_dir, ranks_run_history)
- return temp_dir
-
-
- def build_global_execution_order(path, ranks_run_history):
- """Build global execution order."""
- for rank_id in ranks_run_history.keys():
- exec_order_path = path + "/rank_" + rank_id + "/" + "execution_order"
- os.makedirs(exec_order_path, exist_ok=True)
- for graph in ranks_run_history[rank_id].keys():
- full_path = os.path.join(exec_order_path, "ms_global_execution_order_graph_" + graph + ".csv")
- with open(full_path, 'w+', newline='') as csv_file:
- write = csv.writer(csv_file)
- write.writerows(ranks_run_history[rank_id][graph])
|