| @@ -373,6 +373,9 @@ GraphId GPUSession::CompileGraphImpl(KernelGraphPtr graph) { | |||
| std::string name = "graph_build"; | |||
| DumpGraphParams dump_params = {true, static_cast<int>(kWholeStack)}; | |||
| mindspore::RDR::RecordAnfGraph(SubModuleId::SM_SESSION, name, graph, dump_params, ".ir,.pb"); | |||
| auto &kernels = graph->execution_order(); | |||
| std::string exec_order_name = "graph_exec_order." + std::to_string(graph->graph_id()); | |||
| mindspore::RDR::RecordGraphExecOrder(SubModuleId::SM_SESSION, exec_order_name, kernels); | |||
| #endif | |||
| // Get summary nodes. | |||
| SetSummaryNodes(graph.get()); | |||
| @@ -33,23 +33,19 @@ std::string MemInfo2String(const std::string &label, const AddressPtrList &info) | |||
| } | |||
| } // namespace | |||
| void MemAddressRecorder::SaveMemInfo(const std::string &op_name, const GPUMemInfo &mem_info) { | |||
| std::lock_guard<std::mutex> lock(mtx_); | |||
| std::ostringstream mem_info_stream; | |||
| auto inputs = mem_info.inputs_; | |||
| mem_info_stream << op_name << std::endl; | |||
| mem_info_stream << MemInfo2String("kernel_inputs", *inputs); | |||
| auto workspaces = mem_info.workspaces_; | |||
| mem_info_stream << MemInfo2String("kernel_workspaces", *workspaces); | |||
| auto outputs = mem_info.outputs_; | |||
| mem_info_stream << MemInfo2String("kernel_outputs", *outputs); | |||
| mem_info_stream << std::endl; | |||
| void GPUMemAddressRecorder::SaveMemInfo(const std::string &op_name, const GPUMemInfo &mem_info, size_t id) { | |||
| if (op_names_.size() <= id) { | |||
| return; | |||
| } | |||
| std::string mem_info_str = mem_info_stream.str(); | |||
| mem_info_container_[op_name] = mem_info_str; | |||
| std::lock_guard<std::mutex> lock(mtx_); | |||
| op_names_[id] = op_name; | |||
| mem_info_inputs_[id] = *(mem_info.inputs_); | |||
| mem_info_workspaces_[id] = *(mem_info.workspaces_); | |||
| mem_info_outputs_[id] = *(mem_info.outputs_); | |||
| } | |||
| void MemAddressRecorder::Export() { | |||
| void GPUMemAddressRecorder::Export() { | |||
| auto realpath = GetFileRealPath(); | |||
| if (!realpath.has_value()) { | |||
| return; | |||
| @@ -62,18 +58,19 @@ void MemAddressRecorder::Export() { | |||
| MS_LOG(WARNING) << "Open file for saving gpu memory information failed. File path: '" << file_path << "'."; | |||
| return; | |||
| } | |||
| for (auto &info : mem_info_container_) { | |||
| fout << info.second; | |||
| std::ostringstream mem_info_stream; | |||
| for (size_t i = 0; i < op_names_.size(); i++) { | |||
| mem_info_stream << op_names_[i] << std::endl; | |||
| auto inputs = mem_info_inputs_[i]; | |||
| mem_info_stream << MemInfo2String("kernel_inputs", inputs); | |||
| auto workspaces = mem_info_workspaces_[i]; | |||
| mem_info_stream << MemInfo2String("kernel_workspaces", workspaces); | |||
| auto outputs = mem_info_outputs_[i]; | |||
| mem_info_stream << MemInfo2String("kernel_outputs", outputs); | |||
| mem_info_stream << std::endl; | |||
| } | |||
| fout << mem_info_stream.str(); | |||
| fout.close(); | |||
| ChangeFileMode(file_path, S_IRUSR); | |||
| } | |||
| void MemAddressRecorder::UpdateInfo(const BaseRecorder &recorder) { | |||
| const MemAddressRecorder *mem_recorder = reinterpret_cast<const MemAddressRecorder *>(&recorder); | |||
| std::map<std::string, std::string> mem_info = mem_recorder->MemInfo(); | |||
| for (const auto &info : mem_info) { | |||
| mem_info_container_[info.first] = info.second; | |||
| } | |||
| } | |||
| } // namespace mindspore | |||
| @@ -34,22 +34,28 @@ struct GPUMemInfo { | |||
| AddressPtrList *workspaces_; | |||
| AddressPtrList *outputs_; | |||
| }; | |||
| class MemAddressRecorder : public BaseRecorder { | |||
| class GPUMemAddressRecorder : public BaseRecorder { | |||
| public: | |||
| MemAddressRecorder() {} | |||
| MemAddressRecorder(const std::string &module, const std::string &name) : BaseRecorder(module, name) {} | |||
| ~MemAddressRecorder() {} | |||
| GPUMemAddressRecorder() {} | |||
| GPUMemAddressRecorder(const std::string &module, const std::string &name) : BaseRecorder(module, name) {} | |||
| ~GPUMemAddressRecorder() {} | |||
| virtual void Export(); | |||
| void SaveMemInfo(const std::string &op_name, const GPUMemInfo &mem_info); | |||
| void UpdateInfo(const BaseRecorder &recorder); | |||
| std::map<std::string, std::string> MemInfo() const { return mem_info_container_; } | |||
| void SaveMemInfo(const std::string &op_name, const GPUMemInfo &mem_info, size_t id); | |||
| void Reset(size_t nsize) { | |||
| op_names_.resize(nsize); | |||
| mem_info_inputs_.resize(nsize); | |||
| mem_info_workspaces_.resize(nsize); | |||
| mem_info_outputs_.resize(nsize); | |||
| } | |||
| private: | |||
| mutable std::mutex mtx_; | |||
| std::map<std::string, std::string> mem_info_container_; | |||
| std::vector<std::string> op_names_; | |||
| std::vector<AddressPtrList> mem_info_inputs_; | |||
| std::vector<AddressPtrList> mem_info_workspaces_; | |||
| std::vector<AddressPtrList> mem_info_outputs_; | |||
| }; | |||
| using MemAddressRecorderPtr = std::shared_ptr<MemAddressRecorder>; | |||
| using GPUMemAddressRecorderPtr = std::shared_ptr<GPUMemAddressRecorder>; | |||
| } // namespace mindspore | |||
| #endif // MINDSPORE_CCSRC_DEBUG_RDR_MEM_ADDRESS_RECORDER_H_ | |||
| @@ -42,7 +42,7 @@ void RecorderManager::UpdateRdrEnable() { | |||
| updated = true; | |||
| } | |||
| bool RecorderManager::RecordObject(const BaseRecorderPtr &recorder, const bool &replace) { | |||
| bool RecorderManager::RecordObject(const BaseRecorderPtr &recorder) { | |||
| if (!rdr_enable_) { | |||
| return false; | |||
| } | |||
| @@ -55,20 +55,19 @@ bool RecorderManager::RecordObject(const BaseRecorderPtr &recorder, const bool & | |||
| std::string name = recorder->GetName(); | |||
| std::pair<std::string, std::string> recorder_key(module, name); | |||
| std::lock_guard<std::mutex> lock(mtx_); | |||
| if (replace) { | |||
| recorder_container_[recorder_key] = recorder; | |||
| return true; | |||
| } | |||
| std::unordered_map<std::pair<std::string, std::string>, BaseRecorderPtr, pair_hash>::iterator item = | |||
| recorder_container_.find(recorder_key); | |||
| if (item == recorder_container_.end()) { | |||
| recorder_container_[recorder_key] = recorder; | |||
| } else { | |||
| recorder_container_[recorder_key]->UpdateInfo(*recorder); | |||
| } | |||
| recorder_container_[recorder_key] = recorder; | |||
| return true; | |||
| } | |||
| BaseRecorderPtr RecorderManager::GetRecorder(std::string module, std::string name) { | |||
| std::pair<std::string, std::string> recorder_key(module, name); | |||
| auto item = recorder_container_.find(recorder_key); | |||
| if (item != recorder_container_.end()) { | |||
| return item->second; | |||
| } | |||
| return nullptr; | |||
| } | |||
| void RecorderManager::TriggerAll() { | |||
| if (!rdr_enable_) { | |||
| return; | |||
| @@ -62,7 +62,8 @@ class RecorderManager { | |||
| void UpdateRdrEnable(); | |||
| bool RdrEnable() const { return rdr_enable_; } | |||
| bool RecordObject(const BaseRecorderPtr &recorder, const bool &replace = true); | |||
| bool RecordObject(const BaseRecorderPtr &recorder); | |||
| BaseRecorderPtr GetRecorder(std::string module, std::string name); | |||
| void TriggerAll(); | |||
| void ClearAll(); | |||
| @@ -123,15 +123,30 @@ bool RecordStreamExecOrder(const SubModuleId module, const std::string &name, co | |||
| return ans; | |||
| } | |||
| bool RecordMemAddressInfo(const SubModuleId module, const std::string &name, const std::string &op_name, | |||
| const GPUMemInfo &mem_info) { | |||
| bool RecordGPUMemAddressInfo(const SubModuleId module, const std::string &name, size_t nsize) { | |||
| if (!mindspore::RecorderManager::Instance().RdrEnable()) { | |||
| return false; | |||
| } | |||
| std::string submodule_name = std::string(GetSubModuleName(module)); | |||
| MemAddressRecorderPtr mem_info_recorder = std::make_shared<MemAddressRecorder>(submodule_name, name); | |||
| mem_info_recorder->SaveMemInfo(op_name, mem_info); | |||
| bool ans = mindspore::RecorderManager::Instance().RecordObject(std::move(mem_info_recorder), false); | |||
| GPUMemAddressRecorderPtr mem_info_recorder = std::make_shared<GPUMemAddressRecorder>(submodule_name, name); | |||
| mem_info_recorder->Reset(nsize); | |||
| bool ans = mindspore::RecorderManager::Instance().RecordObject(std::move(mem_info_recorder)); | |||
| return ans; | |||
| } | |||
| bool UpdateGPUMemAddressInfo(const SubModuleId module, const std::string &name, const std::string &op_name, | |||
| const GPUMemInfo &mem_info, size_t id) { | |||
| if (!mindspore::RecorderManager::Instance().RdrEnable()) { | |||
| return false; | |||
| } | |||
| std::string submodule_name = std::string(GetSubModuleName(module)); | |||
| auto recorder = mindspore::RecorderManager::Instance().GetRecorder(submodule_name, name); | |||
| bool ans = false; | |||
| if (recorder != nullptr) { | |||
| auto mem_recorder = std::dynamic_pointer_cast<GPUMemAddressRecorder>(recorder); | |||
| mem_recorder->SaveMemInfo(op_name, mem_info, id); | |||
| ans = true; | |||
| } | |||
| return ans; | |||
| } | |||
| @@ -52,8 +52,9 @@ bool RecordGraphExecOrder(const SubModuleId module, const std::string &name, | |||
| const std::vector<CNodePtr> &final_exec_order); | |||
| bool RecordString(SubModuleId module, const std::string &name, const std::string &data); | |||
| bool RecordStreamExecOrder(const SubModuleId module, const std::string &name, const std::vector<CNodePtr> &exec_order); | |||
| bool RecordMemAddressInfo(const SubModuleId module, const std::string &name, const std::string &op_name, | |||
| const GPUMemInfo &mem_info); | |||
| bool RecordGPUMemAddressInfo(const SubModuleId module, const std::string &name, size_t nsize); | |||
| bool UpdateGPUMemAddressInfo(const SubModuleId module, const std::string &name, const std::string &op_name, | |||
| const GPUMemInfo &mem_info, size_t id); | |||
| #ifdef ENABLE_D | |||
| bool RecordTaskDebugInfo(SubModuleId module, const std::string &name, | |||
| const std::vector<TaskDebugInfoPtr> &task_debug_info_list); | |||
| @@ -43,6 +43,7 @@ | |||
| #endif | |||
| #ifdef ENABLE_DUMP_IR | |||
| #include "debug/rdr/running_data_recorder.h" | |||
| #include "debug/rdr/recorder_manager.h" | |||
| #include "debug/rdr/mem_address_recorder.h" | |||
| #endif | |||
| @@ -649,8 +650,9 @@ bool GPUKernelRuntime::LaunchKernelDynamic(const session::KernelGraph *graph, bo | |||
| auto &kernels = graph->execution_order(); | |||
| int exec_order = 1; | |||
| #ifdef ENABLE_DUMP_IR | |||
| std::string exec_order_name = "graph_exec_order." + std::to_string(graph->graph_id()); | |||
| mindspore::RDR::RecordGraphExecOrder(SubModuleId::SM_KERNEL, exec_order_name, kernels); | |||
| std::string name = "mem_address_list"; | |||
| mindspore::RDR::RecordGPUMemAddressInfo(SubModuleId::SM_KERNEL, name, kernels.size()); | |||
| size_t id = 0; | |||
| #endif | |||
| auto profiler_inst = profiler::gpu::GPUProfiler::GetInstance(); | |||
| MS_EXCEPTION_IF_NULL(profiler_inst); | |||
| @@ -694,9 +696,8 @@ bool GPUKernelRuntime::LaunchKernelDynamic(const session::KernelGraph *graph, bo | |||
| } | |||
| #ifdef ENABLE_DUMP_IR | |||
| GPUMemInfo mem_info = {&kernel_inputs, &kernel_workspaces, &kernel_outputs}; | |||
| std::string name = "mem_address_list"; | |||
| std::string op_name = kernel->fullname_with_scope(); | |||
| mindspore::RDR::RecordMemAddressInfo(SubModuleId::SM_KERNEL, name, op_name, mem_info); | |||
| mindspore::RDR::UpdateGPUMemAddressInfo(SubModuleId::SM_KERNEL, name, op_name, mem_info, id++); | |||
| #endif | |||
| if (!mock) { | |||
| if (!profiling) { | |||