From 46d78efb0ce3de009258d38ccf28df451e075707 Mon Sep 17 00:00:00 2001 From: louei5 Date: Thu, 11 Mar 2021 14:38:29 +0800 Subject: [PATCH] fix saving too much gpu memory info and not skip saving kernel graph in protobuff type --- mindspore/ccsrc/backend/session/ascend_session.cc | 4 +++- mindspore/ccsrc/debug/rdr/mem_address_recorder.cc | 14 +++----------- mindspore/ccsrc/debug/rdr/mem_address_recorder.h | 4 ++-- 3 files changed, 8 insertions(+), 14 deletions(-) diff --git a/mindspore/ccsrc/backend/session/ascend_session.cc b/mindspore/ccsrc/backend/session/ascend_session.cc index c98d2463a0..bad064c9a5 100644 --- a/mindspore/ccsrc/backend/session/ascend_session.cc +++ b/mindspore/ccsrc/backend/session/ascend_session.cc @@ -65,6 +65,7 @@ #include "toolchain/adx_datadump_server.h" #ifdef ENABLE_DUMP_IR #include "debug/rdr/running_data_recorder.h" +#include "debug/rdr/recorder_manager.h" #include "runtime/device/ascend/ascend_bucket.h" #endif #if ENABLE_CPU && ENABLE_D @@ -1000,7 +1001,8 @@ void AscendSession::DumpAllGraphs(const std::vector &all_graphs) bool save_graphs = context_ptr->get_param(MS_CTX_SAVE_GRAPHS_FLAG); auto &json_parser = DumpJsonParser::GetInstance(); json_parser.Parse(); - if (!save_graphs && !json_parser.e2e_dump_enabled() && !json_parser.async_dump_enabled()) { + if (!save_graphs && !json_parser.e2e_dump_enabled() && !json_parser.async_dump_enabled() && + !mindspore::RecorderManager::Instance().RdrEnable()) { return; } auto kernel_runtime = device::KernelRuntimeManager::Instance().GetKernelRuntime(kAscendDevice, device_id_); diff --git a/mindspore/ccsrc/debug/rdr/mem_address_recorder.cc b/mindspore/ccsrc/debug/rdr/mem_address_recorder.cc index 67ad103cbf..2a2f12b4eb 100644 --- a/mindspore/ccsrc/debug/rdr/mem_address_recorder.cc +++ b/mindspore/ccsrc/debug/rdr/mem_address_recorder.cc @@ -49,14 +49,7 @@ void MemAddressRecorder::SaveMemInfo(const std::string &op_name, const GPUMemInf mem_info_stream << MemInfo2String("kernel_outputs", *outputs); mem_info_stream << std::endl; std::string mem_info_str = mem_info_stream.str(); - size_t length = mem_info_append_str_.size() + mem_info_str.size(); - // set maximum length of one memory info recorder is 10 percent of string::max_size - if (length < 0.1 * mem_info_str.max_size()) { - mem_info_append_str_ += mem_info_str; - } else { - mem_infos_.push_back(mem_info_append_str_); - mem_info_append_str_ = mem_info_str; - } + mem_info_container_[op_name] = mem_info_str; } void MemAddressRecorder::Export() { @@ -72,10 +65,9 @@ void MemAddressRecorder::Export() { MS_LOG(WARNING) << "Open file for saving gpu memory information failed. File path: '" << file_path << "'."; return; } - for (auto &info : mem_infos_) { - fout << info; + for (auto &info : mem_info_container_) { + fout << info.second; } - fout << mem_info_append_str_; fout.close(); ChangeFileMode(file_path, S_IRUSR); } diff --git a/mindspore/ccsrc/debug/rdr/mem_address_recorder.h b/mindspore/ccsrc/debug/rdr/mem_address_recorder.h index 971873c6a0..81092af5f4 100644 --- a/mindspore/ccsrc/debug/rdr/mem_address_recorder.h +++ b/mindspore/ccsrc/debug/rdr/mem_address_recorder.h @@ -17,6 +17,7 @@ #define MINDSPORE_CCSRC_DEBUG_RDR_MEM_ADDRESS_RECORDER_H_ #include #include +#include #include #include @@ -48,8 +49,7 @@ class MemAddressRecorder : public BaseRecorder { mutable std::mutex mtx_; - std::string mem_info_append_str_; - std::vector mem_infos_; + std::map mem_info_container_; }; using MemAddressRecorderPtr = std::shared_ptr; } // namespace mindspore