Browse Source

support multi network dump in GPU

tags/v1.6.0
Parastoo Ashtari 4 years ago
parent
commit
7c9ecff30b
4 changed files with 21 additions and 6 deletions
  1. +9
    -1
      mindspore/ccsrc/debug/data_dump/e2e_dump.cc
  2. +3
    -0
      mindspore/ccsrc/debug/data_dump/e2e_dump.h
  3. +7
    -5
      mindspore/ccsrc/debug/debugger/debugger.cc
  4. +2
    -0
      mindspore/ccsrc/debug/debugger/debugger.h

+ 9
- 1
mindspore/ccsrc/debug/data_dump/e2e_dump.cc View File

@@ -279,7 +279,9 @@ void E2eDump::UpdateIterDumpSetup(const session::KernelGraph *graph, bool sink_m
if (IsDeviceTargetGPU()) { if (IsDeviceTargetGPU()) {
if (starting_graph_id == INT32_MAX) { if (starting_graph_id == INT32_MAX) {
starting_graph_id = graph_id; starting_graph_id = graph_id;
} else if (starting_graph_id == graph_id) {
} else if (starting_graph_id == graph_id && !MsContext::GetInstance()->get_param<bool>(MS_CTX_ENABLE_MINDRT)) {
// Update dump iter for mindrt runtime is done using UpdateIterGPUDump().
// Update dump iter for GPU old runtime.
dump_json_parser.UpdateDumpIter(); dump_json_parser.UpdateDumpIter();
} }
return; return;
@@ -307,6 +309,12 @@ void E2eDump::DumpSetup(const session::KernelGraph *graph, uint32_t rank_id) {
} }
} }


void E2eDump::UpdateIterGPUDump() {
if (starting_graph_id != INT32_MAX) {
DumpJsonParser::GetInstance().UpdateDumpIter();
}
}

void E2eDump::DumpData(const session::KernelGraph *graph, uint32_t rank_id, const Debugger *debugger) { void E2eDump::DumpData(const session::KernelGraph *graph, uint32_t rank_id, const Debugger *debugger) {
MS_EXCEPTION_IF_NULL(graph); MS_EXCEPTION_IF_NULL(graph);
bool success = false; bool success = false;


+ 3
- 0
mindspore/ccsrc/debug/data_dump/e2e_dump.h View File

@@ -36,6 +36,9 @@ class E2eDump {
E2eDump() = default; E2eDump() = default;
~E2eDump() = default; ~E2eDump() = default;
static void DumpSetup(const session::KernelGraph *graph, uint32_t rank_id); static void DumpSetup(const session::KernelGraph *graph, uint32_t rank_id);

static void UpdateIterGPUDump();

static void DumpData(const session::KernelGraph *graph, uint32_t rank_id, const Debugger *debugger = nullptr); static void DumpData(const session::KernelGraph *graph, uint32_t rank_id, const Debugger *debugger = nullptr);


static bool DumpParametersAndConstData(const session::KernelGraph *graph, uint32_t rank_id, const Debugger *debugger); static bool DumpParametersAndConstData(const session::KernelGraph *graph, uint32_t rank_id, const Debugger *debugger);


+ 7
- 5
mindspore/ccsrc/debug/debugger/debugger.cc View File

@@ -274,6 +274,7 @@ void Debugger::Reset() {
debug_services_ = nullptr; debug_services_ = nullptr;
graph_proto_list_.clear(); graph_proto_list_.clear();
graph_ptr_list_.clear(); graph_ptr_list_.clear();
graph_ptr_step_vec_.clear();
MS_LOG(INFO) << "Release Debugger resource."; MS_LOG(INFO) << "Release Debugger resource.";
} }


@@ -282,6 +283,9 @@ void Debugger::PreExecuteGraphDebugger(const std::vector<KernelGraphPtr> &graphs
if (device_target_ != kGPUDevice) { if (device_target_ != kGPUDevice) {
return; return;
} }
E2eDump::UpdateIterGPUDump();
// Store graphs that are run in one step.
graph_ptr_step_vec_ = graphs;
for (size_t graph_index = 0; graph_index < graphs.size(); ++graph_index) { for (size_t graph_index = 0; graph_index < graphs.size(); ++graph_index) {
const auto &graph = graphs[graph_index]; const auto &graph = graphs[graph_index];
if (debugger_) { if (debugger_) {
@@ -430,15 +434,13 @@ void Debugger::PostExecuteGraphDebugger() {
return; return;
} }
// LoadParametersAndConst for all the graphs // LoadParametersAndConst for all the graphs
if (debugger_) {
for (auto graph : graph_ptr_list_) {
debugger_->LoadParametersAndConst(graph);
}
for (auto graph : graph_ptr_step_vec_) {
debugger_->LoadParametersAndConst(graph);
} }
// debug used for dump // debug used for dump
if (debugger_ && debugger_->CheckDebuggerDumpEnabled()) { if (debugger_ && debugger_->CheckDebuggerDumpEnabled()) {
// Dump Parameters and consts // Dump Parameters and consts
for (auto graph : graph_ptr_list_) {
for (auto graph : graph_ptr_step_vec_) {
debugger_->Dump(graph); debugger_->Dump(graph);
if (!debugger_->debugger_enabled()) { if (!debugger_->debugger_enabled()) {
debugger_->ClearCurrentData(); debugger_->ClearCurrentData();


+ 2
- 0
mindspore/ccsrc/debug/debugger/debugger.h View File

@@ -265,6 +265,8 @@ class Debugger : public std::enable_shared_from_this<Debugger> {


std::list<GraphProto> graph_proto_list_; std::list<GraphProto> graph_proto_list_;
std::list<KernelGraphPtr> graph_ptr_list_; std::list<KernelGraphPtr> graph_ptr_list_;
// The vector of graph pointers that have been run in the current step.
std::vector<KernelGraphPtr> graph_ptr_step_vec_;


// singleton // singleton
static std::mutex instance_lock_; static std::mutex instance_lock_;


Loading…
Cancel
Save