Browse Source

Debugger support ascend mindrt

tags/v1.6.0
Parastoo Ashtari 4 years ago
parent
commit
76f074b77e
6 changed files with 54 additions and 25 deletions
  1. +6
    -1
      mindspore/ccsrc/debug/data_dump/e2e_dump.cc
  2. +14
    -7
      mindspore/ccsrc/debug/debug_services.cc
  3. +28
    -12
      mindspore/ccsrc/debug/debugger/debugger.cc
  4. +1
    -1
      mindspore/ccsrc/debug/debugger/debugger.h
  5. +4
    -1
      mindspore/ccsrc/debug/debugger/debugger_utils.cc
  6. +1
    -3
      mindspore/ccsrc/runtime/hardware/ascend/ascend_device_context.cc

+ 6
- 1
mindspore/ccsrc/debug/data_dump/e2e_dump.cc View File

@@ -380,7 +380,12 @@ void E2eDump::DumpSetup(const session::KernelGraph *graph) {
}
}

void E2eDump::UpdateIterGPUDump() { DumpJsonParser::GetInstance().UpdateDumpIter(); }
void E2eDump::UpdateIterGPUDump() {
if (!IsDeviceTargetGPU()) {
return;
}
DumpJsonParser::GetInstance().UpdateDumpIter();
}

void E2eDump::DumpRunIter(const KernelGraphPtr &graph, uint32_t rank_id) {
auto &json_parser = DumpJsonParser::GetInstance();


+ 14
- 7
mindspore/ccsrc/debug/debug_services.cc View File

@@ -770,7 +770,8 @@ void DebugServices::ProcessConvertToHostFormat(const std::vector<std::string> &f
MS_LOG(ERROR) << "stat error, ret is: " << ret;
(void)closedir(d_handle);
return;
} else if (S_ISREG(st.st_mode)) {
}
if (S_ISREG(st.st_mode)) {
std::string candidate = dir->d_name;
for (const std::string &file_to_find : files_after_convert_in_dir) {
std::string file_n = file_to_find;
@@ -888,7 +889,8 @@ void DebugServices::ProcessConvertList(const std::string &prefix_dump_file_name,
MS_LOG(ERROR) << "stat error, ret is: " << ret;
(void)closedir(d);
return;
} else if (!(S_ISREG(st.st_mode))) {
}
if (!(S_ISREG(st.st_mode))) {
continue;
}
std::string file_name = dir->d_name;
@@ -991,7 +993,8 @@ std::vector<uint32_t> DebugServices::GetDumpRankIdList() {
MS_LOG(ERROR) << "stat error, ret is: " << ret;
(void)closedir(d_handle);
return rank_id_list;
} else if (S_ISDIR(st.st_mode)) {
}
if (S_ISDIR(st.st_mode)) {
std::string rank_dir_name = dir->d_name;
if (GetRankOrGraphId("rank", rank_dir_name) != UINT32_MAX) {
rank_id_list.push_back(GetRankOrGraphId("rank", rank_dir_name));
@@ -1022,7 +1025,8 @@ void DebugServices::CheckDumpGraphIdList(std::vector<uint32_t> rank_id_list) {
MS_LOG(ERROR) << "stat error, ret is: " << ret;
(void)closedir(d_handle_rank);
return;
} else if (S_ISDIR(st.st_mode)) {
}
if (S_ISDIR(st.st_mode)) {
std::string graph_dir = direc->d_name;
if (graph_dir == "." || graph_dir == "..") {
continue;
@@ -1266,7 +1270,8 @@ void DebugServices::ReadDumpedTensorSync(const std::string &prefix_dump_file_nam
MS_LOG(ERROR) << "stat error, ret is: " << ret;
(void)closedir(d);
return;
} else if (S_ISREG(st.st_mode)) {
}
if (S_ISREG(st.st_mode)) {
std::string file_name = dir->d_name;
std::string stripped_file_name = GetStrippedFilename(file_name);
if (stripped_file_name.empty()) {
@@ -1408,7 +1413,8 @@ void DebugServices::ProcessTensorDataSync(const std::vector<std::tuple<std::stri
MS_LOG(ERROR) << "stat error, ret is: " << ret;
(void)closedir(d);
return;
} else if (S_ISREG(st.st_mode)) {
}
if (S_ISREG(st.st_mode)) {
std::string file_name = dir->d_name;
for (auto &node : proto_to_dump) {
std::string dump_name = std::get<1>(node);
@@ -1641,7 +1647,8 @@ void DebugServices::AddOpOverflowOpNames(const std::string overflow_bin_path, st
MS_LOG(ERROR) << "stat error, ret is: " << ret;
(void)closedir(d);
return;
} else if (S_ISREG(st.st_mode)) {
}
if (S_ISREG(st.st_mode)) {
// form fully qualified filename
std::string file_path = name;
std::string file_name = dir->d_name;


+ 28
- 12
mindspore/ccsrc/debug/debugger/debugger.cc View File

@@ -199,7 +199,7 @@ void Debugger::EnableDebugger() {

void Debugger::CheckDatasetSinkMode(const KernelGraphPtr &graph_ptr) {
bool sink_mode = ConfigManager::GetInstance().dataset_mode() || graph_ptr->IsDatasetGraph();
if (CheckDebuggerDumpEnabled() && sink_mode) {
if (CheckDebuggerDumpEnabled() && sink_mode && device_target_ == kGPUDevice) {
MS_EXCEPTION(NotSupportError)
<< "e2e_dump is not supported on GPU with dataset_sink_mode=True. Please set dataset_sink_mode=False";
}
@@ -212,8 +212,11 @@ void Debugger::CheckDatasetSinkMode(const KernelGraphPtr &graph_ptr) {

bool Debugger::CheckDebuggerDumpEnabled() const {
// see if dump is enabled
auto &dump_json_parser = DumpJsonParser::GetInstance();
if (device_target_ == kGPUDevice) {
return device::KernelRuntime::DumpDataEnabled();
return dump_json_parser.e2e_dump_enabled();
} else if (device_target_ == kAscendDevice) {
return dump_json_parser.async_dump_enabled() || dump_json_parser.e2e_dump_enabled();
}
return false;
}
@@ -281,8 +284,8 @@ void Debugger::Reset() {
}

void Debugger::PreExecuteGraphDebugger(const std::vector<KernelGraphPtr> &graphs) {
// Only GPU is supported for MindRTBackend
if (device_target_ != kGPUDevice) {
// MindRTBackend for GPU and Ascend
if (device_target_ == kCPUDevice) {
return;
}
// Store graphs that are run in one step.
@@ -421,7 +424,11 @@ uint32_t Debugger::GetRankID() {
return rank_id;
}

void Debugger::Dump(const KernelGraphPtr &kernel_graph) const {
void Debugger::DumpGPU(const KernelGraphPtr &kernel_graph) const {
// only for GPU mindrt
if (device_target_ != kGPUDevice) {
return;
}
uint32_t rank_id = GetRankID();
E2eDump::DumpRunIter(kernel_graph, rank_id);
if (debugger_ && debugger_->DebuggerBackendEnabled()) {
@@ -469,21 +476,26 @@ void Debugger::PostExecuteGraphDebugger() {
DumpJsonParser::GetInstance().UpdateDumpIter();
return;
}
// Only GPU is supported for MindRTBackend
if (device_target_ != kGPUDevice) {
return;
for (const auto &graph_ptr : debugger_->GetGraphPtrList()) {
if (device_target_ == kAscendDevice) {
debugger_->SetGraphPtr(graph_ptr);
// load output for Ascend
debugger_->LoadGraphOutputs();
// load parameters for Ascend
debugger_->LoadParametersAndConst();
}
}
// LoadParametersAndConst for all the graphs that have been run in the current step
if (debugger_) {
if (debugger_ && device_target_ == kGPUDevice) {
for (auto graph : graph_ptr_step_vec_) {
debugger_->LoadParametersAndConst(graph);
}
}
// debug used for dump
if (debugger_ && debugger_->CheckDebuggerDumpEnabled()) {
if (debugger_ && debugger_->CheckDebuggerDumpEnabled() && device_target_ == kGPUDevice) {
// Dump Parameters and consts
for (auto graph : graph_ptr_step_vec_) {
debugger_->Dump(graph);
debugger_->DumpGPU(graph);
if (!debugger_->debugger_enabled()) {
debugger_->ClearCurrentData();
}
@@ -521,7 +533,11 @@ void Debugger::PostExecute() {
// GPU ResetLoadedTensors for old runtime happens in preExecute
if ((device_target_ == kGPUDevice && MsContext::GetInstance()->get_param<bool>(MS_CTX_ENABLE_MINDRT)) ||
device_target_ == kAscendDevice) {
debug_services_->ResetLoadedTensors();
if (debug_services_ != nullptr) {
debug_services_->ResetLoadedTensors();
} else {
MS_LOG(ERROR) << "debug_services_ is nullptr";
}
}
}
}


+ 1
- 1
mindspore/ccsrc/debug/debugger/debugger.h View File

@@ -95,7 +95,7 @@ class Debugger : public std::enable_shared_from_this<Debugger> {

static uint32_t GetRankID();

void Dump(const KernelGraphPtr &kernel_graph) const;
void DumpGPU(const KernelGraphPtr &kernel_graph) const;

void DumpSingleNode(const CNodePtr &node, uint32_t graph_id);



+ 4
- 1
mindspore/ccsrc/debug/debugger/debugger_utils.cc View File

@@ -172,7 +172,10 @@ std::string CheckDatasetSinkMode(const KernelGraphPtr &graph_ptr) {
std::string error_info = "";
bool sink_mode = ConfigManager::GetInstance().dataset_mode() || graph_ptr->IsDatasetGraph();
auto debugger = Debugger::GetInstance();
if (debugger->CheckDebuggerDumpEnabled() && sink_mode) {
auto context = MsContext::GetInstance();
MS_EXCEPTION_IF_NULL(context);
bool is_gpu = (context->get_param<std::string>(MS_CTX_DEVICE_TARGET) == kGPUDevice);
if (debugger->CheckDebuggerDumpEnabled() && sink_mode && is_gpu) {
error_info = "e2e_dump is not supported on GPU with dataset_sink_mode=True. Please set dataset_sink_mode=False";
}
if (debugger->CheckDebuggerEnabled() && sink_mode) {


+ 1
- 3
mindspore/ccsrc/runtime/hardware/ascend/ascend_device_context.cc View File

@@ -557,6 +557,7 @@ bool AscendDeviceContext::ExecuteGraph(const KernelGraphPtr &graph) const {
}
#ifndef ENABLE_SECURITY
Dump(graph, GetRankID());
DumpSetup(graph);
#endif
#if defined(_WIN32) || defined(_WIN64)
auto end_time = std::chrono::steady_clock::now();
@@ -566,9 +567,6 @@ bool AscendDeviceContext::ExecuteGraph(const KernelGraphPtr &graph) const {
(void)gettimeofday(&end_time, nullptr);
uint64_t cost = kUSecondInSecond * static_cast<uint64_t>(end_time.tv_sec - start_time.tv_sec);
cost += static_cast<uint64_t>(end_time.tv_usec - start_time.tv_usec);
#ifndef ENABLE_SECURITY
DumpSetup(graph);
#endif
MS_LOG(INFO) << "Call MS Run Success in " << cost << " us";
#endif
} else {


Loading…
Cancel
Save