Browse Source

!6198 [Data Dump] Bug fix

Merge pull request !6198 from caifubi/dump
tags/v1.0.0
mindspore-ci-bot Gitee 5 years ago
parent
commit
fe735a0f83
9 changed files with 20 additions and 88 deletions
  1. +1
    -1
      config/data_dump.json
  2. +0
    -22
      config/e2e_dump_config.json
  3. +0
    -22
      config/e2e_dump_config_0.json
  4. +0
    -22
      config/e2e_dump_config_1.json
  5. +1
    -0
      mindspore/ccsrc/backend/session/ascend_session.cc
  6. +9
    -13
      mindspore/ccsrc/debug/data_dump/dump_json_parser.cc
  7. +1
    -0
      mindspore/ccsrc/debug/data_dump/dump_json_parser.h
  8. +7
    -4
      mindspore/ccsrc/debug/data_dump/e2e_dump_util.cc
  9. +1
    -4
      mindspore/ccsrc/runtime/device/ascend/ascend_kernel_runtime.cc

+ 1
- 1
config/data_dump.json View File

@@ -4,7 +4,7 @@
"path": "/test",
"net_name": "ResNet50",
"iteration": 0,
"input_output": 0,
"input_output": 2,
"kernels": ["Default/Conv-op12"],
"support_device": [0,1,2,3,4,5,6,7]
},


+ 0
- 22
config/e2e_dump_config.json View File

@@ -1,22 +0,0 @@
{
"DumpSettings": {
"enable": false,
"trans_flag": false,
"path": "/tmp/net/",
"net_name": "ResNet50",
"mode": 0,
"iteration": 0,
"kernels": ["Default/Conv2D-op2", "Default/TensorAdd-op10"]
},

"DumpSettingsSpec": {
"enable": "true: dump enable, false: dump disable",
"trans_flag": "true: trans to host format, false: not trans format",
"path": "the dump file folder",
"net_name": "net name eg:ResNet50",
"mode": "0: dump all kernels, 1: dump kernels in kernels list",
"iteration": "0: all iteration, others: specified iteration ",
"kernels": "op's full scope name which need to be dump"
},
"other": {}
}

+ 0
- 22
config/e2e_dump_config_0.json View File

@@ -1,22 +0,0 @@
{
"DumpSettings": {
"enable": false,
"trans_flag": false,
"path": "/tmp/hccllog/0",
"net_name": "ResNet50",
"mode": 0,
"iteration": 0,
"kernels": ["Default/Conv2D-op2", "Default/TensorAdd-op10"]
},

"DumpSettingsSpec": {
"enable": "true: dump enable, false: dump disable",
"trans_flag": "true: trans to host format, false: not trans format",
"path": "the dump file folder",
"net_name": "net name eg:ResNet50",
"mode": "0: dump all kernels, 1: dump kernels in kernels list",
"iteration": "0: all iteration, others: specified iteration ",
"kernels": "op's full scope name which need to be dump"
},
"other": {}
}

+ 0
- 22
config/e2e_dump_config_1.json View File

@@ -1,22 +0,0 @@
{
"DumpSettings": {
"enable": false,
"trans_flag": false,
"path": "/tmp/hccllog/1",
"net_name": "ResNet50",
"mode": 0,
"iteration": 0,
"kernels": ["Default/Conv2D-op2", "Default/TensorAdd-op10"]
},

"DumpSettingsSpec": {
"enable": "true: dump enable, false: dump disable",
"trans_flag": "true: trans to host format, false: not trans format",
"path": "the dump file folder",
"net_name": "net name eg:ResNet50",
"mode": "0: dump all kernels, 1: dump kernels in kernels list",
"iteration": "0: all iteration, others: specified iteration ",
"kernels": "op's full scope name which need to be dump"
},
"other": {}
}

+ 1
- 0
mindspore/ccsrc/backend/session/ascend_session.cc View File

@@ -442,6 +442,7 @@ void AscendSession::InitRuntimeResource() {
if (!runtime_instance->Init()) {
MS_LOG(EXCEPTION) << "Kernel runtime init error.";
}
DumpJsonParser::GetInstance().Parse();
MS_LOG(INFO) << "Finish!";
}



+ 9
- 13
mindspore/ccsrc/debug/data_dump/dump_json_parser.cc View File

@@ -74,6 +74,10 @@ bool DumpJsonParser::IsDumpEnabled() {

void DumpJsonParser::Parse() {
std::lock_guard<std::mutex> guard(lock_);
if (already_parsed_) {
return;
}
already_parsed_ = true;
if (!IsDumpEnabled()) {
return;
}
@@ -305,6 +309,8 @@ void DumpJsonParser::JudgeDumpEnabled() {
MS_LOG(WARNING) << "Dump not enabled. device_id:" << device_id << " not support";
}
context->set_param<bool>(MS_CTX_ENABLE_MEM_REUSE, !e2e_dump_enabled_);
MS_LOG(INFO) << "Dump status, e2e_dump_enabled:" << e2e_dump_enabled_
<< " async_dump_enabled:" << async_dump_enabled_;
}

bool DumpJsonParser::NeedDump(const std::string &op_full_name) const {
@@ -325,6 +331,9 @@ void DumpJsonParser::MatchKernel(const std::string &kernel_name) {
}

void DumpJsonParser::PrintUnusedKernel() {
if (!e2e_dump_enabled_ && !async_dump_enabled_) {
return;
}
for (const auto &iter : kernels_) {
if (iter.second == 0) {
MS_LOG(WARNING) << "[DataDump] Unused Kernel in json:" << iter.first;
@@ -362,16 +371,6 @@ bool DumpJsonParser::OutputNeedDump() const {
return input_output_ == kDumpInputAndOutput || input_output_ == kDumpOutputOnly;
}

bool NeedAsyncDump(const CNodePtr &kernel) {
if (AnfAlgo::GetKernelType(kernel) != TBE_KERNEL && AnfAlgo::GetKernelType(kernel) != AICPU_KERNEL &&
AnfAlgo::GetKernelType(kernel) != AKG_KERNEL) {
return false;
}
MS_EXCEPTION_IF_NULL(kernel);
// dump all kernel if mode is set 0 in data_dump.json
return DumpJsonParser::GetInstance().NeedDump(kernel->fullname_with_scope());
}

void DumpJsonParser::UpdateNeedDumpKernels(NotNull<const session::KernelGraph *> kernel_graph) {
if (e2e_dump_enabled_) {
MS_LOG(INFO) << "E2e dump no need to update dump kernel list";
@@ -391,9 +390,6 @@ void DumpJsonParser::UpdateNeedDumpKernels(NotNull<const session::KernelGraph *>
update_kernels.try_emplace(input->fullname_with_scope(), 0);
}
}
} else if (NeedAsyncDump(kernel)) {
MS_LOG(INFO) << "[AsyncDump] Match Node:" << kernel->fullname_with_scope();
update_kernels.try_emplace(kernel->fullname_with_scope(), 0);
}
}
kernels_.insert(update_kernels.begin(), update_kernels.end());


+ 1
- 0
mindspore/ccsrc/debug/data_dump/dump_json_parser.h View File

@@ -72,6 +72,7 @@ class DumpJsonParser {
uint32_t op_debug_mode_{0};
bool trans_flag_{false};
uint32_t cur_dump_iter_{0};
bool already_parsed_{false};

void ParseCommonDumpSetting(const nlohmann::json &content);
void ParseAsyncDumpSetting(const nlohmann::json &content);


+ 7
- 4
mindspore/ccsrc/debug/data_dump/e2e_dump_util.cc View File

@@ -206,14 +206,17 @@ bool E2eDumpUtil::DumpData(const session::KernelGraph *graph, Debugger *debugger
}
}
MS_LOG(INFO) << "Start e2e dump. Current iteration is " << dump_json_parser.cur_dump_iter();
auto context = MsContext::GetInstance();
MS_EXCEPTION_IF_NULL(context);
auto device_id = context->get_param<uint32_t>(MS_CTX_DEVICE_ID);

std::string net_name = dump_json_parser.net_name();
std::string iterator = std::to_string(dump_json_parser.cur_dump_iter());
std::string dump_path = dump_json_parser.path();
if (dump_path.back() == '/') {
dump_path = dump_path + net_name + '/' + iterator;
} else {
dump_path = dump_path + '/' + net_name + '/' + iterator;
if (dump_path.back() != '/') {
dump_path += "/";
}
dump_path += (net_name + "/device_" + std::to_string(device_id) + "/iteration_" + iterator);
DumpInput(graph, dump_path, debugger);
DumpOutput(graph, dump_path, debugger);
DumpParameters(graph, dump_path, debugger);


+ 1
- 4
mindspore/ccsrc/runtime/device/ascend/ascend_kernel_runtime.cc View File

@@ -206,11 +206,8 @@ bool AscendKernelRuntime::Init() {
SetContext();
return true;
}
bool ret = false;

DumpJsonParser::GetInstance().Parse();
// Start up profiling before rtSetDevice
ret = ProfilingManager::GetInstance().StartupProfiling(device_id_);
bool ret = ProfilingManager::GetInstance().StartupProfiling(device_id_);
if (!ret) {
MS_EXCEPTION(DeviceProcessError) << "StartupProfiling failed.";
}


Loading…
Cancel
Save