Browse Source

Change fullname_with_scope in dump and online debugger

tags/v1.4.0
TinaMengtingZhang 4 years ago
parent
commit
b9e391b6ae
11 changed files with 53 additions and 36 deletions
  1. +9
    -0
      mindspore/ccsrc/debug/anf_ir_utils.cc
  2. +2
    -0
      mindspore/ccsrc/debug/anf_ir_utils.h
  3. +5
    -4
      mindspore/ccsrc/debug/data_dump/cpu_e2e_dump.cc
  4. +5
    -4
      mindspore/ccsrc/debug/data_dump/dump_json_parser.cc
  5. +4
    -2
      mindspore/ccsrc/debug/data_dump/dump_utils.cc
  6. +9
    -13
      mindspore/ccsrc/debug/data_dump/e2e_dump.cc
  7. +3
    -2
      mindspore/ccsrc/debug/debug_services.cc
  8. +6
    -5
      mindspore/ccsrc/debug/debugger/debugger.cc
  9. +4
    -3
      mindspore/ccsrc/debug/debugger/debugger_utils.cc
  10. +4
    -2
      mindspore/ccsrc/debug/debugger/proto_exporter.cc
  11. +2
    -1
      mindspore/ccsrc/debug/dump_proto.cc

+ 9
- 0
mindspore/ccsrc/debug/anf_ir_utils.cc View File

@@ -46,6 +46,15 @@ using mindspore::tensor::TensorPy;

namespace mindspore {

std::string GetKernelNodeName(const AnfNodePtr &anf_node) {
std::string kernel_name = anf_node->fullname_with_scope();
if (kernel_name.empty()) {
kernel_name = anf_node->ToString();
}
MS_LOG(DEBUG) << "Full scope kernel name is " << kernel_name << ".";
return kernel_name;
}

// ============================================= MindSpore IR Exporter =============================================

std::string AnfExporter::GetNodeType(const AnfNodePtr &nd) {


+ 2
- 0
mindspore/ccsrc/debug/anf_ir_utils.h View File

@@ -111,6 +111,8 @@ class AnfExporter {

void ExportIR(const std::string &filename, const FuncGraphPtr &func_graph);
void ExportIR(const std::string &filename, const std::vector<TaggedGraph> &graphs);

std::string GetKernelNodeName(const AnfNodePtr &anf_node);
} // namespace mindspore

#endif // MINDSPORE_CCSRC_DEBUG_ANF_IR_UTILS_H_

+ 5
- 4
mindspore/ccsrc/debug/data_dump/cpu_e2e_dump.cc View File

@@ -17,12 +17,13 @@
#include "debug/data_dump/cpu_e2e_dump.h"
#include <map>
#include "backend/session/anf_runtime_algorithm.h"
#include "debug/anf_ir_utils.h"

namespace mindspore {
void CPUE2eDump::DumpCNodeData(const CNodePtr &node, uint32_t graph_id) {
MS_EXCEPTION_IF_NULL(node);
auto &dump_json_parser = DumpJsonParser::GetInstance();
std::string kernel_name = node->fullname_with_scope();
std::string kernel_name = GetKernelNodeName(node);
if (!dump_json_parser.NeedDump(kernel_name)) {
return;
}
@@ -40,7 +41,7 @@ void CPUE2eDump::DumpCNodeData(const CNodePtr &node, uint32_t graph_id) {

void CPUE2eDump::DumpCNodeInputs(const CNodePtr &node, const std::string &dump_path) {
MS_EXCEPTION_IF_NULL(node);
std::string kernel_name = node->fullname_with_scope();
std::string kernel_name = GetKernelNodeName(node);
MS_LOG(DEBUG) << "Start e2e dump CNode inputs data: " << kernel_name;
DumpJsonParser::GetInstance().MatchKernel(kernel_name);
DumpInputImpl(node, dump_path, &kernel_name);
@@ -48,7 +49,7 @@ void CPUE2eDump::DumpCNodeInputs(const CNodePtr &node, const std::string &dump_p

void CPUE2eDump::DumpCNodeOutputs(const CNodePtr &node, const std::string &dump_path) {
MS_EXCEPTION_IF_NULL(node);
std::string kernel_name = node->fullname_with_scope();
std::string kernel_name = GetKernelNodeName(node);
MS_LOG(DEBUG) << "Start e2e dump CNode outputs data: " << kernel_name;
DumpJsonParser::GetInstance().MatchKernel(kernel_name);
DumpOutputImpl(node, dump_path, &kernel_name);
@@ -113,7 +114,7 @@ void CPUE2eDump::DumpSingleAnfNode(const AnfNodePtr &anf_node, const size_t outp
if (!anf_node->isa<Parameter>() && !anf_node->isa<ValueNode>()) {
return;
}
std::string node_name = anf_node->fullname_with_scope();
std::string node_name = GetKernelNodeName(anf_node);
std::string dump_name = node_name;
if (anf_node->isa<ValueNode>()) {
auto iter = const_map->find(node_name);


+ 5
- 4
mindspore/ccsrc/debug/data_dump/dump_json_parser.cc View File

@@ -21,6 +21,7 @@
#include "utils/convert_utils_base.h"
#include "backend/session/anf_runtime_algorithm.h"
#include "debug/data_dump/npy_header.h"
#include "debug/anf_ir_utils.h"

namespace {
constexpr auto kCommonDumpSettings = "common_dump_settings";
@@ -544,15 +545,15 @@ void DumpJsonParser::UpdateNeedDumpKernels(NotNull<const session::KernelGraph *>
for (const auto &kernel : kernel_graph->execution_order()) {
MS_EXCEPTION_IF_NULL(kernel);
if (AnfAlgo::GetKernelType(kernel) == HCCL_KERNEL &&
DumpJsonParser::GetInstance().NeedDump(kernel->fullname_with_scope())) {
DumpJsonParser::GetInstance().NeedDump(GetKernelNodeName(kernel))) {
auto input_size = AnfAlgo::GetInputTensorNum(kernel);
for (size_t i = 0; i < input_size; ++i) {
auto input_with_index = AnfAlgo::GetPrevNodeOutput(kernel, i);
auto input = input_with_index.first;
if (input->isa<CNode>()) {
MS_LOG(INFO) << "[AsyncDump] Match Hccl Node:" << kernel->fullname_with_scope()
<< " Input:" << input->fullname_with_scope();
update_kernels.try_emplace(input->fullname_with_scope(), 0);
MS_LOG(INFO) << "[AsyncDump] Match Hccl Node:" << GetKernelNodeName(kernel)
<< " Input:" << GetKernelNodeName(input);
update_kernels.try_emplace(GetKernelNodeName(input), 0);
}
}
}


+ 4
- 2
mindspore/ccsrc/debug/data_dump/dump_utils.cc View File

@@ -20,6 +20,7 @@

#include "common/trans.h"
#include "utils/ms_context.h"
#include "debug/anf_ir_utils.h"
#include "debug/data_dump/dump_json_parser.h"
#include "backend/session/anf_runtime_algorithm.h"
#include "runtime/device/kernel_runtime_manager.h"
@@ -62,10 +63,11 @@ void SetConstNodeId(const AnfNodePtr &node, std::map<std::string, size_t> *const
if (!node->isa<ValueNode>()) {
return;
}
auto iter = const_map->find(node->fullname_with_scope());
std::string node_name = GetKernelNodeName(node);
auto iter = const_map->find(node_name);
if (iter == const_map->end()) {
auto const_idx = const_map->size() + 1;
(*const_map)[node->fullname_with_scope()] = const_idx;
(*const_map)[node_name] = const_idx;
}
}



+ 9
- 13
mindspore/ccsrc/debug/data_dump/e2e_dump.cc View File

@@ -22,6 +22,7 @@

#include "debug/data_dump/dump_json_parser.h"
#include "common/trans.h"
#include "debug/anf_ir_utils.h"
#include "debug/common.h"
#include "backend/session/anf_runtime_algorithm.h"
#include "utils/ms_context.h"
@@ -67,7 +68,7 @@ void E2eDump::DumpOutput(const session::KernelGraph *graph, const std::string &d
const auto &apply_kernels = graph->execution_order();
for (const auto &node : apply_kernels) {
MS_EXCEPTION_IF_NULL(node);
std::string kernel_name = node->fullname_with_scope();
std::string kernel_name = GetKernelNodeName(node);
if (!dump_json_parser.NeedDump(kernel_name)) {
continue;
}
@@ -83,7 +84,7 @@ void E2eDump::DumpOutputSingleNode(const CNodePtr &node, const std::string &dump
}
bool trans_flag = dump_json_parser.trans_flag();
MS_EXCEPTION_IF_NULL(node);
std::string kernel_name = node->fullname_with_scope();
std::string kernel_name = GetKernelNodeName(node);
if (!dump_json_parser.NeedDump(kernel_name)) {
return;
}
@@ -115,7 +116,7 @@ void E2eDump::DumpOutputImpl(const CNodePtr &node, bool trans_flag, const std::s
std::to_string(stream_id) + '.' + std::to_string(timestamp) + ".output." +
std::to_string(j);
if (IsDeviceTargetGPU()) {
DumpGPUMemToFile(file_path, node->fullname_with_scope(), *addr, int_shapes, type, device_type, trans_flag, j,
DumpGPUMemToFile(file_path, GetKernelNodeName(node), *addr, int_shapes, type, device_type, trans_flag, j,
debugger);
} else {
DumpMemToFile(file_path, *addr, int_shapes, type, trans_flag);
@@ -134,7 +135,7 @@ void E2eDump::DumpInput(const session::KernelGraph *graph, const std::string &du
const auto &apply_kernels = graph->execution_order();
for (const auto &node : apply_kernels) {
MS_EXCEPTION_IF_NULL(node);
std::string kernel_name = node->fullname_with_scope();
std::string kernel_name = GetKernelNodeName(node);
if (!dump_json_parser.NeedDump(kernel_name)) {
continue;
}
@@ -150,7 +151,7 @@ void E2eDump::DumpInputSingleNode(const CNodePtr &node, const std::string &dump_
}
bool trans_flag = dump_json_parser.trans_flag();
MS_EXCEPTION_IF_NULL(node);
std::string kernel_name = node->fullname_with_scope();
std::string kernel_name = GetKernelNodeName(node);
if (!dump_json_parser.NeedDump(kernel_name)) {
return;
}
@@ -177,11 +178,11 @@ void E2eDump::DumpInputImpl(const CNodePtr &node, bool trans_flag, const std::st
size_t slot;
if (IsDeviceTargetGPU()) {
auto input_kernel = node->input(j + 1);
std::string input_kernel_name = input_kernel->fullname_with_scope();
std::string input_kernel_name = GetKernelNodeName(input_kernel);
tensor_name = input_kernel_name;
slot = 0;
} else {
tensor_name = node->fullname_with_scope();
tensor_name = GetKernelNodeName(node);
slot = j;
}
ShapeVector int_shapes;
@@ -210,7 +211,7 @@ void E2eDump::DumpSingleAnfNode(const AnfNodePtr &anf_node, const size_t output_
if ((!anf_node->isa<Parameter>() && !anf_node->isa<ValueNode>()) || IsValueNode<StringImm>(anf_node)) {
return;
}
std::string node_name = anf_node->fullname_with_scope();
std::string node_name = GetKernelNodeName(anf_node);
std::string dump_name = node_name;
if (anf_node->isa<ValueNode>()) {
auto iter = const_map->find(node_name);
@@ -220,11 +221,6 @@ void E2eDump::DumpSingleAnfNode(const AnfNodePtr &anf_node, const size_t output_
dump_name = std::string("cst") + std::to_string(iter->second);
}

// Some parameter nodes have no name. Take the whole string value as the name when dumpping if it's missing.
if (dump_name.empty()) {
dump_name = anf_node->ToString();
}

if (!dump_json_parser.NeedDump(node_name)) {
return;
}


+ 3
- 2
mindspore/ccsrc/debug/debug_services.cc View File

@@ -26,6 +26,7 @@
#include <unordered_set>
#include "pybind11/embed.h"
#ifdef ONLINE_DBG_MODE
#include "debug/anf_ir_utils.h"
#include "backend/session/anf_runtime_algorithm.h"
#endif
#include "debug/debugger/tensor_summary.h"
@@ -971,7 +972,7 @@ bool DebugServices::IsWatchPointNodeInput(const std::string &w_name, const CNode
auto input_size = AnfAlgo::GetInputTensorNum(kernel);
for (size_t j = 0; j < input_size; ++j) {
auto input_kernel = kernel->input(j + 1);
std::string input_kernel_name = input_kernel->fullname_with_scope();
std::string input_kernel_name = GetKernelNodeName(input_kernel);
auto found = w_name.find_last_of('/');
if (found != std::string::npos && w_name.substr(found + 1) == input_kernel_name) return true;
}
@@ -1030,7 +1031,7 @@ std::vector<std::shared_ptr<TensorData>> DebugServices::GetNodeTensor(const CNod
MS_EXCEPTION_IF_NULL(kernel);
std::vector<std::shared_ptr<TensorData>> result;
auto output_size = AnfAlgo::GetOutputTensorNum(kernel);
auto kernel_name = kernel->fullname_with_scope();
auto kernel_name = GetKernelNodeName(kernel);
for (size_t j = 0; j < output_size; ++j) {
auto tensor_name_with_slot = kernel_name + ":" + std::to_string(j);
auto tensor = tensor_loader_->GetTensor(tensor_name_with_slot);


+ 6
- 5
mindspore/ccsrc/debug/debugger/debugger.cc View File

@@ -37,6 +37,7 @@
#include "utils/comm_manager.h"
#include "runtime/hardware/device_context_manager.h"
#include "debug/anf_ir_dump.h"
#include "debug/anf_ir_utils.h"
#ifdef ENABLE_DEBUGGER
#include "debug/debugger/proto_exporter.h"
#else
@@ -599,13 +600,13 @@ void Debugger::CheckDatasetGraph() {
// print parameter node names
const auto &params = graph_ptr_->inputs();
for (const auto &param : params) {
MS_LOG(INFO) << "param: " << param->fullname_with_scope();
MS_LOG(INFO) << "param: " << GetKernelNodeName(param);
}
// check if there is GetNext or InitDataSetQueue node
const auto &nodes = graph_ptr_->execution_order();
for (const auto &node : nodes) {
auto node_name = AnfAlgo::GetCNodeName(node);
MS_LOG(INFO) << "node: " << node->fullname_with_scope();
MS_LOG(INFO) << "node: " << GetKernelNodeName(node);
if (node_name == "GetNext" || node_name == "InitDataSetQueue") {
MS_LOG(INFO) << "Not enabling debugger for graph " << graph_ptr_->graph_id() << ": found dataset graph node "
<< node_name;
@@ -1294,7 +1295,7 @@ void Debugger::LoadSingleAnfnode(const AnfNodePtr &anf_node, const size_t output
}
// for parameters and value nodes, set its execution order to be 0;
int exec_order = 0;
std::string node_name = anf_node->fullname_with_scope();
std::string node_name = GetKernelNodeName(anf_node);
GetFileKernelName(NOT_NULL(&node_name));
// check if output adde exists, if not, return;
if (!AnfAlgo::OutputAddrExist(anf_node, output_index)) {
@@ -1367,7 +1368,7 @@ void Debugger::LoadGraphOutputs() {
int exec_order = 1;
for (const auto &node : apply_kernels) {
MS_EXCEPTION_IF_NULL(node);
std::string kernel_name = node->fullname_with_scope();
std::string kernel_name = GetKernelNodeName(node);
auto output_size = AnfAlgo::GetOutputTensorNum(node);
if (partial_memory_) {
if (!debug_services_->IsWatchPoint(kernel_name, node)) {
@@ -1376,7 +1377,7 @@ void Debugger::LoadGraphOutputs() {
}
for (size_t j = 0; j < output_size; ++j) {
if (!AnfAlgo::OutputAddrExist(node, j)) {
MS_LOG(INFO) << "Cannot find output addr for slot " << j << " for " << node->fullname_with_scope();
MS_LOG(INFO) << "Cannot find output addr for slot " << j << " for " << kernel_name;
continue;
}
auto addr = AnfAlgo::GetOutputAddr(node, j);


+ 4
- 3
mindspore/ccsrc/debug/debugger/debugger_utils.cc View File

@@ -19,6 +19,7 @@
#include <vector>
#include <memory>
#include <string>
#include "debug/anf_ir_utils.h"
#include "debug/debugger/debugger.h"
#include "runtime/device/gpu/gpu_device_address.h"
#include "debug/data_dump/dump_json_parser.h"
@@ -58,7 +59,7 @@ void LoadInputs(const CNodePtr &cnode, const KernelLaunchInfo *launch_info_, uin
auto input_size = AnfAlgo::GetInputTensorNum(cnode);
for (size_t j = 0; j < input_size; ++j) {
auto input_kernel = cnode->input(j + 1);
std::string input_kernel_name = input_kernel->fullname_with_scope();
std::string input_kernel_name = GetKernelNodeName(input_kernel);
auto addr = kernel_inputs[j];
auto type = AnfAlgo::GetOutputInferDataType(input_kernel, PARAMETER_OUTPUT_INDEX);
// For example, this happens with the Depend op
@@ -84,7 +85,7 @@ void LoadOutputs(const CNodePtr &cnode, const KernelLaunchInfo *launch_info_, ui
auto kernel_outputs = launch_info_->outputs_;
auto output_size = AnfAlgo::GetOutputTensorNum(cnode);
auto node_name = AnfAlgo::GetCNodeName(cnode);
std::string kernel_name = cnode->fullname_with_scope();
std::string kernel_name = GetKernelNodeName(cnode);
std::vector<int> real_outputs = CheckRealOutput(node_name, output_size);

for (int j : real_outputs) {
@@ -116,7 +117,7 @@ bool CheckReadData(const CNodePtr &cnode) {
bool read_data = false;
auto &dump_json_parser = DumpJsonParser::GetInstance();
bool dump_enabled = debugger->DumpDataEnabledIteration();
std::string kernel_name = cnode->fullname_with_scope();
std::string kernel_name = GetKernelNodeName(cnode);
if (dump_enabled) {
auto dump_mode = dump_json_parser.dump_mode();
// dump the node if dump_mode is 0, which means all kernels, or if this kernel is in the kernels list


+ 4
- 2
mindspore/ccsrc/debug/debugger/proto_exporter.cc View File

@@ -23,6 +23,7 @@
#include <utility>
#include <algorithm>

#include "debug/anf_ir_utils.h"
#include "debug/common.h"
#include "debug/debugger/debugger.h"
#include "debug/data_dump/dump_json_parser.h"
@@ -436,8 +437,9 @@ void DebuggerProtoExporter::ExportCNode(const FuncGraphPtr &func_graph, const CN
node_proto->set_scope(node->scope()->name());

// add full_name for debugger
node_proto->set_full_name(node->fullname_with_scope());
MS_LOG(INFO) << "full_name: " << node->fullname_with_scope();
std::string full_name = GetKernelNodeName(node);
node_proto->set_full_name(full_name);
MS_LOG(INFO) << "full_name: " << full_name;

std::ostringstream buffer;
auto traces = mindspore::trace::GetSourceLineList(node);


+ 2
- 1
mindspore/ccsrc/debug/dump_proto.cc View File

@@ -22,6 +22,7 @@
#include <utility>
#include <vector>

#include "debug/anf_ir_utils.h"
#include "debug/common.h"
#include "proto/anf_ir.pb.h"
#include "ir/graph_utils.h"
@@ -468,7 +469,7 @@ void ProtoExporter::ExportCNode(const FuncGraphPtr &func_graph, const CNodePtr &
GetOpNodeTypeAndAttrs(func_graph, op, node_proto);
node_proto->set_name(std::to_string(apply_idx));
node_proto->set_scope(node->scope()->name());
node_proto->set_full_name(node->fullname_with_scope());
node_proto->set_full_name(GetKernelNodeName(node));

// process OP inputs
for (size_t i = 1; i < inputs.size(); ++i) {


Loading…
Cancel
Save