Browse Source

!12894 [MS][RDR] Fix bugs in recording gpu memory information by RDR

From: @louie5
Reviewed-by: 
Signed-off-by:
tags/v1.2.0-rc1
mindspore-ci-bot Gitee 5 years ago
parent
commit
52909e6d64
3 changed files with 22 additions and 5 deletions
  1. +16
    -3
      mindspore/ccsrc/debug/rdr/mem_address_recorder.cc
  2. +3
    -1
      mindspore/ccsrc/debug/rdr/mem_address_recorder.h
  3. +3
    -1
      mindspore/ccsrc/debug/rdr/running_data_recorder.cc

+ 16
- 3
mindspore/ccsrc/debug/rdr/mem_address_recorder.cc View File

@@ -25,7 +25,9 @@ std::string MemInfo2String(const std::string &label, const AddressPtrList &info)
std::ostringstream ss;
ss << label << " " << info.size() << std::endl;
for (size_t i = 0; i < info.size(); i++) {
ss << "&" << info[i]->addr << " #" << info[i]->size << std::endl;
if (info[i] != nullptr) {
ss << "&" << info[i]->addr << " #" << info[i]->size << std::endl;
}
}
return ss.str();
}
@@ -46,7 +48,15 @@ void MemAddressRecorder::SaveMemInfo(const std::string &op_name, const GPUMemInf
auto outputs = mem_info.outputs_;
mem_info_stream << MemInfo2String("kernel_outputs", *outputs);
mem_info_stream << std::endl;
mem_info_str_ += mem_info_stream.str();
std::string mem_info_str = mem_info_stream.str();
size_t length = mem_info_append_str_.size() + mem_info_str.size();
// set maximum length of one memory info recorder is 10 percent of string::max_size
if (length < 0.1 * mem_info_str.max_size()) {
mem_info_append_str_ += mem_info_str;
} else {
mem_infos_.push_back(mem_info_append_str_);
mem_info_append_str_ = mem_info_str;
}
}

void MemAddressRecorder::Export() {
@@ -62,7 +72,10 @@ void MemAddressRecorder::Export() {
MS_LOG(WARNING) << "Open file for saving gpu memory information failed. File path: '" << file_path << "'.";
return;
}
fout << mem_info_str_;
for (auto &info : mem_infos_) {
fout << info;
}
fout << mem_info_append_str_;
fout.close();
ChangeFileMode(file_path, S_IRUSR);
}


+ 3
- 1
mindspore/ccsrc/debug/rdr/mem_address_recorder.h View File

@@ -46,7 +46,9 @@ class MemAddressRecorder : public BaseRecorder {
MemAddressRecorder &operator=(const MemAddressRecorder &recorder);

mutable std::mutex mtx_;
std::string mem_info_str_;

std::string mem_info_append_str_;
std::vector<std::string> mem_infos_;
};
using MemAddressRecorderPtr = std::shared_ptr<MemAddressRecorder>;
} // namespace mindspore


+ 3
- 1
mindspore/ccsrc/debug/rdr/running_data_recorder.cc View File

@@ -131,8 +131,10 @@ bool RecordMemAddressInfo(const SubModuleId module, const std::string &tag, cons
return false;
}
std::string submodule_name = std::string(GetSubModuleName(module));
std::string directory = mindspore::EnvConfigParser::GetInstance().rdr_path();
MemAddressRecorder::Instance().SetModule(submodule_name);
MemAddressRecorder::Instance().SetTag(tag);
MemAddressRecorder::Instance().SetFilename(tag); // set filename using tag
MemAddressRecorder::Instance().SetDirectory(directory);
MemAddressRecorder::Instance().SaveMemInfo(op_name, mem_info);
return true;
}


Loading…
Cancel
Save