Browse Source

Fix GPU sink_mode True and Dump multi graph ERROR issue

tags/v1.6.0
Parastoo Ashtari 4 years ago
parent
commit
9c036d1229
6 changed files with 34 additions and 7 deletions
  1. +2
    -2
      mindspore/ccsrc/debug/debug_services.cc
  2. +5
    -2
      mindspore/ccsrc/debug/debugger/debugger.cc
  3. +3
    -3
      mindspore/ccsrc/debug/debugger/debugger.h
  4. +16
    -0
      mindspore/ccsrc/debug/debugger/debugger_utils.cc
  5. +2
    -0
      mindspore/ccsrc/debug/debugger/debugger_utils.h
  6. +6
    -0
      mindspore/ccsrc/runtime/framework/actor/debug_actor.cc

+ 2
- 2
mindspore/ccsrc/debug/debug_services.cc View File

@@ -1359,7 +1359,7 @@ void DebugServices::ProcessTensorDataSync(const std::vector<std::tuple<std::stri
bool error_on_no_value) {
DIR *d = opendir(abspath.c_str());
if (d == nullptr) {
MS_LOG(ERROR) << "Directory " << specific_dump_dir.c_str() << " does not exist in ReadNeededDumpedTensors.";
MS_LOG(INFO) << "Directory " << specific_dump_dir.c_str() << " does not exist in ReadNeededDumpedTensors.";
} else {
struct dirent *dir = nullptr;
while ((dir = readdir(d)) != nullptr) {
@@ -1771,7 +1771,7 @@ std::string DebugServices::RealPath(const std::string &input_path) {
MS_LOG(EXCEPTION) << "The length of file name : " << file_name.length() << " exceeds limit: " << NAME_MAX;
}
if (realpath(prefix_path.c_str(), real_path) == nullptr) {
MS_LOG(ERROR) << "The dir " << prefix_path << " does not exist.";
MS_LOG(INFO) << "The dir " << prefix_path << " does not exist.";
return "";
}



+ 5
- 2
mindspore/ccsrc/debug/debugger/debugger.cc View File

@@ -201,7 +201,7 @@ void Debugger::CheckDatasetSinkMode(const KernelGraphPtr &graph_ptr) {
bool sink_mode = ConfigManager::GetInstance().dataset_mode() || graph_ptr->IsDatasetGraph();
if (CheckDebuggerDumpEnabled() && sink_mode) {
MS_EXCEPTION(NotSupportError)
<< "e2e_dump not supported on GPU with dataset_sink_mode=True. Please set dataset_sink_mode=False";
<< "e2e_dump is not supported on GPU with dataset_sink_mode=True. Please set dataset_sink_mode=False";
}

if (CheckDebuggerEnabled() && sink_mode) {
@@ -331,7 +331,10 @@ void Debugger::PreExecute(const KernelGraphPtr &graph_ptr) {
MS_EXCEPTION_IF_NULL(graph_ptr);
// access lock for public method
std::lock_guard<std::mutex> a_lock(access_lock_);
CheckDatasetSinkMode(graph_ptr);
if (!MsContext::GetInstance()->get_param<bool>(MS_CTX_ENABLE_MINDRT)) {
// Checking dataset_sink_mode for mindRT is done in debug_actor
CheckDatasetSinkMode(graph_ptr);
}
auto graph_id = graph_ptr->graph_id();
MS_LOG(DEBUG) << "PreExecute for graph: " << graph_id << " in step: " << num_step_ << ".";
StoreRunGraphIdList(graph_id);


+ 3
- 3
mindspore/ccsrc/debug/debugger/debugger.h View File

@@ -171,6 +171,9 @@ class Debugger : public std::enable_shared_from_this<Debugger> {
// check if dump using debugger backend is enabled
bool CheckDebuggerDumpEnabled() const;

// check if debugger is enabled
bool CheckDebuggerEnabled() const;

#ifdef ENABLE_D
std::shared_ptr<DumpDataBuilder> LoadDumpDataBuilder(const std::string &node_name);

@@ -186,9 +189,6 @@ class Debugger : public std::enable_shared_from_this<Debugger> {
// read env variable for grpc client
void EnableDebugger();

// check if debugger enabled
bool CheckDebuggerEnabled() const;

void CheckDebuggerEnabledParam() const;

bool CheckDebuggerPartialMemoryEnabled() const;


+ 16
- 0
mindspore/ccsrc/debug/debugger/debugger_utils.cc View File

@@ -29,6 +29,9 @@
#include "backend/session/anf_runtime_algorithm.h"
#include "backend/kernel_compiler/kernel.h"
#include "debug/data_dump/e2e_dump.h"
#include "utils/config_manager.h"

constexpr int kFailure = 1;

using mindspore::kernel::AddressPtr;
using mindspore::kernel::KernelLaunchInfo;
@@ -165,6 +168,19 @@ void ReadDataAndDump(const CNodePtr &cnode, const KernelLaunchInfo *launch_info_
debugger->PostExecuteNode(cnode, last_kernel);
}

std::string CheckDatasetSinkMode(const KernelGraphPtr &graph_ptr) {
std::string error_info = "";
bool sink_mode = ConfigManager::GetInstance().dataset_mode() || graph_ptr->IsDatasetGraph();
auto debugger = Debugger::GetInstance();
if (debugger->CheckDebuggerDumpEnabled() && sink_mode) {
error_info = "e2e_dump is not supported on GPU with dataset_sink_mode=True. Please set dataset_sink_mode=False";
}
if (debugger->CheckDebuggerEnabled() && sink_mode) {
error_info = "Debugger is not supported with dataset_sink_mode=True. Please set dataset_sink_mode=False";
}
return error_info;
}

#ifdef ENABLE_D
int32_t DumpDataCallBack(const DumpChunk *dump_chunk, int32_t size) {
MS_LOG(DEBUG) << "ADX DumpDataCallBack is called";


+ 2
- 0
mindspore/ccsrc/debug/debugger/debugger_utils.h View File

@@ -40,6 +40,8 @@ bool CheckReadData(const CNodePtr &cnode);

void ReadDataAndDump(const CNodePtr &cnode, const KernelLaunchInfo *launch_info_, uint32_t exec_order_);

std::string CheckDatasetSinkMode(const KernelGraphPtr &graph_ptr);

#ifdef ENABLE_D
// Callback function to dump ascend async mode
int32_t DumpDataCallBack(const DumpChunk *dump_chunk, int32_t size);


+ 6
- 0
mindspore/ccsrc/runtime/framework/actor/debug_actor.cc View File

@@ -80,6 +80,12 @@ void DebugActor::DebugOnStepBegin(std::vector<KernelGraphPtr> graphs, std::vecto
MS_EXCEPTION_IF_NULL(op_context);
MS_EXCEPTION_IF_NULL(from_aid);
#ifdef ENABLE_DEBUGGER
// First graph is the dataset graph when dataset_sink_mode = True
auto graph = graphs[0];
std::string error_info = CheckDatasetSinkMode(graph);
if (!error_info.empty()) {
SET_OPCONTEXT_FAIL_RET_WITH_ERROR((*op_context), error_info);
}
auto debugger = Debugger::GetInstance();
if (debugger != nullptr && debugger->DebuggerBackendEnabled()) {
debugger->PreExecuteGraphDebugger(graphs);


Loading…
Cancel
Save