Browse Source

!6605 [Data Dump] Fix GPU dump bug

Merge pull request !6605 from caifubi/dump2
tags/v1.0.0
mindspore-ci-bot Gitee 5 years ago
parent
commit
406052eb58
4 changed files with 32 additions and 9 deletions
  1. +2
    -1
      mindspore/ccsrc/backend/session/gpu_session.cc
  2. +25
    -3
      mindspore/ccsrc/debug/data_dump/dump_json_parser.cc
  3. +1
    -0
      mindspore/ccsrc/debug/data_dump/dump_json_parser.h
  4. +4
    -5
      mindspore/ccsrc/debug/tensor_load.h

+ 2
- 1
mindspore/ccsrc/backend/session/gpu_session.cc View File

@@ -45,6 +45,7 @@
#include "backend/optimizer/graph_kernel/composite_ops_fusion.h"
#include "runtime/device/kernel_runtime_manager.h"
#include "utils/ms_utils.h"
#include "utils/config_manager.h"
#include "common/trans.h"
#include "utils/ms_context.h"
#include "debug/data_dump/e2e_dump_util.h"
@@ -375,7 +376,7 @@ void GPUSession::PreLoadTensor(const std::shared_ptr<KernelGraph> &kernel_graph)
bool dump_enabled = DumpDataEnabledIteration();
auto context_ptr = MsContext::GetInstance();
MS_EXCEPTION_IF_NULL(context_ptr);
if (dump_enabled && context_ptr->get_param<bool>(MS_CTX_ENABLE_TASK_SINK)) {
if (dump_enabled && ConfigManager::GetInstance().dataset_mode() == DS_SINK_MODE) {
MS_EXCEPTION(NotSupportError) << "Don't support set dataset_sink_mode to True when using e2e_dump";
}



+ 25
- 3
mindspore/ccsrc/debug/data_dump/dump_json_parser.cc View File

@@ -283,12 +283,35 @@ void DumpJsonParser::ParseOpDebugMode(const nlohmann::json &content) {
}
}

void DumpJsonParser::JsonConfigToString() {
std::string cur_config;
cur_config.append("dump_mode:");
cur_config.append(std::to_string(dump_mode_));
cur_config.append(" path:");
cur_config.append(path_);
cur_config.append(" net_name:");
cur_config.append(net_name_);
cur_config.append(" iteration:");
cur_config.append(std::to_string(iteration_));
cur_config.append(" input_output:");
cur_config.append(std::to_string(input_output_));
cur_config.append("e2e_enable:");
cur_config.append(std::to_string(e2e_dump_enabled_));
cur_config.append(" async_dump_enable:");
cur_config.append(std::to_string(async_dump_enabled_));
MS_LOG(INFO) << cur_config;
}

void DumpJsonParser::JudgeDumpEnabled() {
auto context = MsContext::GetInstance();
MS_EXCEPTION_IF_NULL(context);

if (context->get_param<std::string>(MS_CTX_DEVICE_TARGET) == kGPUDevice) {
async_dump_enabled_ = false;
// GPU not support dump kernel inputs
if (input_output_ != kDumpOutputOnly) {
MS_LOG(WARNING) << "Data dump only support dump kernel output when device target is GPU";
input_output_ = kDumpOutputOnly;
}
}

if (context->get_param<std::string>(MS_CTX_DEVICE_TARGET) == kAscendDevice) {
@@ -309,8 +332,7 @@ void DumpJsonParser::JudgeDumpEnabled() {
MS_LOG(WARNING) << "Dump not enabled. device_id:" << device_id << " not support";
}
context->set_param<bool>(MS_CTX_ENABLE_MEM_REUSE, !e2e_dump_enabled_);
MS_LOG(INFO) << "Dump status, e2e_dump_enabled:" << e2e_dump_enabled_
<< " async_dump_enabled:" << async_dump_enabled_;
JsonConfigToString();
}

bool DumpJsonParser::NeedDump(const std::string &op_full_name) const {


+ 1
- 0
mindspore/ccsrc/debug/data_dump/dump_json_parser.h View File

@@ -92,6 +92,7 @@ class DumpJsonParser {
void ParseOpDebugMode(const nlohmann::json &content);

void JudgeDumpEnabled();
void JsonConfigToString();
};
} // namespace mindspore
#endif // MINDSPORE_MINDSPORE_CCSRC_DEBUG_DUMP_JSON_PARSER_H_

+ 4
- 5
mindspore/ccsrc/debug/tensor_load.h View File

@@ -99,10 +99,9 @@ class TensorLoader {
bool DumpTensorToFile(std::string tensor_name, bool trans_flag, const std::string &filepath,
const std::string &host_fmt, const std::vector<int> &host_shape, TypeId host_type,
TypeId addr_type_id, std::string addr_format, size_t slot) const {
bool ret = false;
if (filepath.empty()) {
MS_LOG(ERROR) << "Dump file path is null!";
return ret;
return false;
}
std::string shape = "shape";
if (host_shape.size()) {
@@ -129,10 +128,10 @@ class TensorLoader {
mindspore::tensor::TensorPtr out_tensor = node->GetTensor();
size_t host_size = out_tensor->data().nbytes();

ret = DumpJsonParser::DumpToFile(path, out_tensor->data_c(), host_size);
return DumpJsonParser::DumpToFile(path, out_tensor->data_c(), host_size);
}
return ret;
MS_LOG(INFO) << "Tensor name:" << tensor_name << " not found in tensor_list_map";
return true;
}

private:


Loading…
Cancel
Save