Browse Source

support multi network dump in GPU

tags/v1.6.0
Parastoo Ashtari 4 years ago
parent
commit
7c9ecff30b
4 changed files with 21 additions and 6 deletions
  1. +9
    -1
      mindspore/ccsrc/debug/data_dump/e2e_dump.cc
  2. +3
    -0
      mindspore/ccsrc/debug/data_dump/e2e_dump.h
  3. +7
    -5
      mindspore/ccsrc/debug/debugger/debugger.cc
  4. +2
    -0
      mindspore/ccsrc/debug/debugger/debugger.h

+ 9
- 1
mindspore/ccsrc/debug/data_dump/e2e_dump.cc View File

@@ -279,7 +279,9 @@ void E2eDump::UpdateIterDumpSetup(const session::KernelGraph *graph, bool sink_m
if (IsDeviceTargetGPU()) {
if (starting_graph_id == INT32_MAX) {
starting_graph_id = graph_id;
} else if (starting_graph_id == graph_id) {
} else if (starting_graph_id == graph_id && !MsContext::GetInstance()->get_param<bool>(MS_CTX_ENABLE_MINDRT)) {
// Update dump iter for mindrt runtime is done using UpdateIterGPUDump().
// Update dump iter for GPU old runtime.
dump_json_parser.UpdateDumpIter();
}
return;
@@ -307,6 +309,12 @@ void E2eDump::DumpSetup(const session::KernelGraph *graph, uint32_t rank_id) {
}
}

void E2eDump::UpdateIterGPUDump() {
if (starting_graph_id != INT32_MAX) {
DumpJsonParser::GetInstance().UpdateDumpIter();
}
}

void E2eDump::DumpData(const session::KernelGraph *graph, uint32_t rank_id, const Debugger *debugger) {
MS_EXCEPTION_IF_NULL(graph);
bool success = false;


+ 3
- 0
mindspore/ccsrc/debug/data_dump/e2e_dump.h View File

@@ -36,6 +36,9 @@ class E2eDump {
E2eDump() = default;
~E2eDump() = default;
static void DumpSetup(const session::KernelGraph *graph, uint32_t rank_id);

static void UpdateIterGPUDump();

static void DumpData(const session::KernelGraph *graph, uint32_t rank_id, const Debugger *debugger = nullptr);

static bool DumpParametersAndConstData(const session::KernelGraph *graph, uint32_t rank_id, const Debugger *debugger);


+ 7
- 5
mindspore/ccsrc/debug/debugger/debugger.cc View File

@@ -274,6 +274,7 @@ void Debugger::Reset() {
debug_services_ = nullptr;
graph_proto_list_.clear();
graph_ptr_list_.clear();
graph_ptr_step_vec_.clear();
MS_LOG(INFO) << "Release Debugger resource.";
}

@@ -282,6 +283,9 @@ void Debugger::PreExecuteGraphDebugger(const std::vector<KernelGraphPtr> &graphs
if (device_target_ != kGPUDevice) {
return;
}
E2eDump::UpdateIterGPUDump();
// Store graphs that are run in one step.
graph_ptr_step_vec_ = graphs;
for (size_t graph_index = 0; graph_index < graphs.size(); ++graph_index) {
const auto &graph = graphs[graph_index];
if (debugger_) {
@@ -430,15 +434,13 @@ void Debugger::PostExecuteGraphDebugger() {
return;
}
// LoadParametersAndConst for all the graphs
if (debugger_) {
for (auto graph : graph_ptr_list_) {
debugger_->LoadParametersAndConst(graph);
}
for (auto graph : graph_ptr_step_vec_) {
debugger_->LoadParametersAndConst(graph);
}
// debug used for dump
if (debugger_ && debugger_->CheckDebuggerDumpEnabled()) {
// Dump Parameters and consts
for (auto graph : graph_ptr_list_) {
for (auto graph : graph_ptr_step_vec_) {
debugger_->Dump(graph);
if (!debugger_->debugger_enabled()) {
debugger_->ClearCurrentData();


+ 2
- 0
mindspore/ccsrc/debug/debugger/debugger.h View File

@@ -265,6 +265,8 @@ class Debugger : public std::enable_shared_from_this<Debugger> {

std::list<GraphProto> graph_proto_list_;
std::list<KernelGraphPtr> graph_ptr_list_;
// The vector of graph pointers that have been run in the current step.
std::vector<KernelGraphPtr> graph_ptr_step_vec_;

// singleton
static std::mutex instance_lock_;


Loading…
Cancel
Save