Browse Source

!15799 Fixing multi graph suspend for debugger in GPU

From: @parastooashtari
Reviewed-by: @john_tzanakakis,@tom__chen
Signed-off-by: @john_tzanakakis
pull/15799/MERGE
mindspore-ci-bot Gitee 4 years ago
parent
commit
d7f9423e9f
3 changed files with 11 additions and 8 deletions
  1. +1
    -1
      mindspore/ccsrc/backend/session/gpu_session.cc
  2. +9
    -6
      mindspore/ccsrc/debug/debugger/debugger.cc
  3. +1
    -1
      mindspore/ccsrc/debug/debugger/debugger.h

+ 1
- 1
mindspore/ccsrc/backend/session/gpu_session.cc View File

@@ -443,7 +443,7 @@ void GPUSession::PostExecuteGraph(const std::shared_ptr<KernelGraph> &kernel_gra
DumpJsonParser::GetInstance().UpdateDumpIter();
}
if (debugger_) {
debugger_->PostExecute();
debugger_->PostExecute(kernel_graph);
}
}



+ 9
- 6
mindspore/ccsrc/debug/debugger/debugger.cc View File

@@ -328,9 +328,6 @@ void Debugger::PreExecute(const KernelGraphPtr &graph_ptr, uint32_t graph_sum) {
graph_ptr_ = dbg_graph_ptr;
SendMultiGraphsAndSuspend(graph_proto_list_, graph_sum);
graph_proto_list_.clear();
} else if (graph_id == rungraph_id_list_.front() && device_target_ == kGPUDevice) {
// stop only when receive the first sub run graph for each step
CommandLoop();
}
}
}
@@ -343,7 +340,7 @@ void Debugger::PreExecute(const KernelGraphPtr &graph_ptr, uint32_t graph_sum) {
}
}

void Debugger::PostExecute() {
void Debugger::PostExecute(const KernelGraphPtr &graph_ptr) {
// access lock for public method
std::lock_guard<std::mutex> a_lock(access_lock_);
if (pipeline::ExecutorPy::GetDebugTerminate()) {
@@ -355,9 +352,15 @@ void Debugger::PostExecute() {
if (device_target_ != kGPUDevice) {
num_step_++;
}

SendWatchpoints(CheckWatchpoints());
CommandLoop();
if (graph_ptr != nullptr && device_target_ == kGPUDevice) {
auto graph_id = graph_ptr->graph_id();
if (graph_id == rungraph_id_list_.front()) {
CommandLoop();
}
} else {
CommandLoop();
}
}
// Only keep parameters in the current map
debug_services_->ResetLoadedTensors();


+ 1
- 1
mindspore/ccsrc/debug/debugger/debugger.h View File

@@ -80,7 +80,7 @@ class Debugger : public std::enable_shared_from_this<Debugger> {

// analyze tensors and wait for command
// don't need a graph_ptr because it is saved during pre_execute
void PostExecute();
void PostExecute(const KernelGraphPtr &graph_ptr = nullptr);

bool ReadNodeDataRequired(const CNodePtr &kernel);



Loading…
Cancel
Save