From 7b9a73fb1b6f8f9c4828f4307bd472abd5c66bf9 Mon Sep 17 00:00:00 2001 From: Parastoo Ashtari Date: Tue, 27 Apr 2021 15:03:28 -0400 Subject: [PATCH] Fixing multi graph suspend for debugger in GPU removed the suspend from preExecute function to avoid double stopping in multigraph models replaced else if with else in postExecute add else if to check the smoke ascend test improve the format --- mindspore/ccsrc/backend/session/gpu_session.cc | 2 +- mindspore/ccsrc/debug/debugger/debugger.cc | 15 +++++++++------ mindspore/ccsrc/debug/debugger/debugger.h | 2 +- 3 files changed, 11 insertions(+), 8 deletions(-) diff --git a/mindspore/ccsrc/backend/session/gpu_session.cc b/mindspore/ccsrc/backend/session/gpu_session.cc index 9d79ea19dc..9b44a46b3c 100644 --- a/mindspore/ccsrc/backend/session/gpu_session.cc +++ b/mindspore/ccsrc/backend/session/gpu_session.cc @@ -443,7 +443,7 @@ void GPUSession::PostExecuteGraph(const std::shared_ptr &kernel_gra DumpJsonParser::GetInstance().UpdateDumpIter(); } if (debugger_) { - debugger_->PostExecute(); + debugger_->PostExecute(kernel_graph); } } diff --git a/mindspore/ccsrc/debug/debugger/debugger.cc b/mindspore/ccsrc/debug/debugger/debugger.cc index 008c23165a..e7f29727e7 100644 --- a/mindspore/ccsrc/debug/debugger/debugger.cc +++ b/mindspore/ccsrc/debug/debugger/debugger.cc @@ -328,9 +328,6 @@ void Debugger::PreExecute(const KernelGraphPtr &graph_ptr, uint32_t graph_sum) { graph_ptr_ = dbg_graph_ptr; SendMultiGraphsAndSuspend(graph_proto_list_, graph_sum); graph_proto_list_.clear(); - } else if (graph_id == rungraph_id_list_.front() && device_target_ == kGPUDevice) { - // stop only when receive the first sub run graph for each step - CommandLoop(); } } } @@ -343,7 +340,7 @@ void Debugger::PreExecute(const KernelGraphPtr &graph_ptr, uint32_t graph_sum) { } } -void Debugger::PostExecute() { +void Debugger::PostExecute(const KernelGraphPtr &graph_ptr) { // access lock for public method std::lock_guard a_lock(access_lock_); if (pipeline::ExecutorPy::GetDebugTerminate()) { @@ -355,9 +352,15 @@ void Debugger::PostExecute() { if (device_target_ != kGPUDevice) { num_step_++; } - SendWatchpoints(CheckWatchpoints()); - CommandLoop(); + if (graph_ptr != nullptr && device_target_ == kGPUDevice) { + auto graph_id = graph_ptr->graph_id(); + if (graph_id == rungraph_id_list_.front()) { + CommandLoop(); + } + } else { + CommandLoop(); + } } // Only keep parameters in the current map debug_services_->ResetLoadedTensors(); diff --git a/mindspore/ccsrc/debug/debugger/debugger.h b/mindspore/ccsrc/debug/debugger/debugger.h index db6eda97b2..b53b087a08 100644 --- a/mindspore/ccsrc/debug/debugger/debugger.h +++ b/mindspore/ccsrc/debug/debugger/debugger.h @@ -80,7 +80,7 @@ class Debugger : public std::enable_shared_from_this { // analyze tensors and wait for command // don't need a graph_ptr because it is saved during pre_execute - void PostExecute(); + void PostExecute(const KernelGraphPtr &graph_ptr = nullptr); bool ReadNodeDataRequired(const CNodePtr &kernel);