| @@ -373,6 +373,9 @@ GraphId GPUSession::CompileGraphImpl(KernelGraphPtr graph) { | |||||
| std::string name = "graph_build"; | std::string name = "graph_build"; | ||||
| DumpGraphParams dump_params = {true, static_cast<int>(kWholeStack)}; | DumpGraphParams dump_params = {true, static_cast<int>(kWholeStack)}; | ||||
| mindspore::RDR::RecordAnfGraph(SubModuleId::SM_SESSION, name, graph, dump_params, ".ir,.pb"); | mindspore::RDR::RecordAnfGraph(SubModuleId::SM_SESSION, name, graph, dump_params, ".ir,.pb"); | ||||
| auto &kernels = graph->execution_order(); | |||||
| std::string exec_order_name = "graph_exec_order." + std::to_string(graph->graph_id()); | |||||
| mindspore::RDR::RecordGraphExecOrder(SubModuleId::SM_SESSION, exec_order_name, kernels); | |||||
| #endif | #endif | ||||
| // Get summary nodes. | // Get summary nodes. | ||||
| SetSummaryNodes(graph.get()); | SetSummaryNodes(graph.get()); | ||||
| @@ -33,23 +33,19 @@ std::string MemInfo2String(const std::string &label, const AddressPtrList &info) | |||||
| } | } | ||||
| } // namespace | } // namespace | ||||
| void MemAddressRecorder::SaveMemInfo(const std::string &op_name, const GPUMemInfo &mem_info) { | |||||
| std::lock_guard<std::mutex> lock(mtx_); | |||||
| std::ostringstream mem_info_stream; | |||||
| auto inputs = mem_info.inputs_; | |||||
| mem_info_stream << op_name << std::endl; | |||||
| mem_info_stream << MemInfo2String("kernel_inputs", *inputs); | |||||
| auto workspaces = mem_info.workspaces_; | |||||
| mem_info_stream << MemInfo2String("kernel_workspaces", *workspaces); | |||||
| auto outputs = mem_info.outputs_; | |||||
| mem_info_stream << MemInfo2String("kernel_outputs", *outputs); | |||||
| mem_info_stream << std::endl; | |||||
| void GPUMemAddressRecorder::SaveMemInfo(const std::string &op_name, const GPUMemInfo &mem_info, size_t id) { | |||||
| if (op_names_.size() <= id) { | |||||
| return; | |||||
| } | |||||
| std::string mem_info_str = mem_info_stream.str(); | |||||
| mem_info_container_[op_name] = mem_info_str; | |||||
| std::lock_guard<std::mutex> lock(mtx_); | |||||
| op_names_[id] = op_name; | |||||
| mem_info_inputs_[id] = *(mem_info.inputs_); | |||||
| mem_info_workspaces_[id] = *(mem_info.workspaces_); | |||||
| mem_info_outputs_[id] = *(mem_info.outputs_); | |||||
| } | } | ||||
| void MemAddressRecorder::Export() { | |||||
| void GPUMemAddressRecorder::Export() { | |||||
| auto realpath = GetFileRealPath(); | auto realpath = GetFileRealPath(); | ||||
| if (!realpath.has_value()) { | if (!realpath.has_value()) { | ||||
| return; | return; | ||||
| @@ -62,18 +58,19 @@ void MemAddressRecorder::Export() { | |||||
| MS_LOG(WARNING) << "Open file for saving gpu memory information failed. File path: '" << file_path << "'."; | MS_LOG(WARNING) << "Open file for saving gpu memory information failed. File path: '" << file_path << "'."; | ||||
| return; | return; | ||||
| } | } | ||||
| for (auto &info : mem_info_container_) { | |||||
| fout << info.second; | |||||
| std::ostringstream mem_info_stream; | |||||
| for (size_t i = 0; i < op_names_.size(); i++) { | |||||
| mem_info_stream << op_names_[i] << std::endl; | |||||
| auto inputs = mem_info_inputs_[i]; | |||||
| mem_info_stream << MemInfo2String("kernel_inputs", inputs); | |||||
| auto workspaces = mem_info_workspaces_[i]; | |||||
| mem_info_stream << MemInfo2String("kernel_workspaces", workspaces); | |||||
| auto outputs = mem_info_outputs_[i]; | |||||
| mem_info_stream << MemInfo2String("kernel_outputs", outputs); | |||||
| mem_info_stream << std::endl; | |||||
| } | } | ||||
| fout << mem_info_stream.str(); | |||||
| fout.close(); | fout.close(); | ||||
| ChangeFileMode(file_path, S_IRUSR); | ChangeFileMode(file_path, S_IRUSR); | ||||
| } | } | ||||
| void MemAddressRecorder::UpdateInfo(const BaseRecorder &recorder) { | |||||
| const MemAddressRecorder *mem_recorder = reinterpret_cast<const MemAddressRecorder *>(&recorder); | |||||
| std::map<std::string, std::string> mem_info = mem_recorder->MemInfo(); | |||||
| for (const auto &info : mem_info) { | |||||
| mem_info_container_[info.first] = info.second; | |||||
| } | |||||
| } | |||||
| } // namespace mindspore | } // namespace mindspore | ||||
| @@ -34,22 +34,28 @@ struct GPUMemInfo { | |||||
| AddressPtrList *workspaces_; | AddressPtrList *workspaces_; | ||||
| AddressPtrList *outputs_; | AddressPtrList *outputs_; | ||||
| }; | }; | ||||
| class MemAddressRecorder : public BaseRecorder { | |||||
| class GPUMemAddressRecorder : public BaseRecorder { | |||||
| public: | public: | ||||
| MemAddressRecorder() {} | |||||
| MemAddressRecorder(const std::string &module, const std::string &name) : BaseRecorder(module, name) {} | |||||
| ~MemAddressRecorder() {} | |||||
| GPUMemAddressRecorder() {} | |||||
| GPUMemAddressRecorder(const std::string &module, const std::string &name) : BaseRecorder(module, name) {} | |||||
| ~GPUMemAddressRecorder() {} | |||||
| virtual void Export(); | virtual void Export(); | ||||
| void SaveMemInfo(const std::string &op_name, const GPUMemInfo &mem_info); | |||||
| void UpdateInfo(const BaseRecorder &recorder); | |||||
| std::map<std::string, std::string> MemInfo() const { return mem_info_container_; } | |||||
| void SaveMemInfo(const std::string &op_name, const GPUMemInfo &mem_info, size_t id); | |||||
| void Reset(size_t nsize) { | |||||
| op_names_.resize(nsize); | |||||
| mem_info_inputs_.resize(nsize); | |||||
| mem_info_workspaces_.resize(nsize); | |||||
| mem_info_outputs_.resize(nsize); | |||||
| } | |||||
| private: | private: | ||||
| mutable std::mutex mtx_; | mutable std::mutex mtx_; | ||||
| std::map<std::string, std::string> mem_info_container_; | |||||
| std::vector<std::string> op_names_; | |||||
| std::vector<AddressPtrList> mem_info_inputs_; | |||||
| std::vector<AddressPtrList> mem_info_workspaces_; | |||||
| std::vector<AddressPtrList> mem_info_outputs_; | |||||
| }; | }; | ||||
| using MemAddressRecorderPtr = std::shared_ptr<MemAddressRecorder>; | |||||
| using GPUMemAddressRecorderPtr = std::shared_ptr<GPUMemAddressRecorder>; | |||||
| } // namespace mindspore | } // namespace mindspore | ||||
| #endif // MINDSPORE_CCSRC_DEBUG_RDR_MEM_ADDRESS_RECORDER_H_ | #endif // MINDSPORE_CCSRC_DEBUG_RDR_MEM_ADDRESS_RECORDER_H_ | ||||
| @@ -42,7 +42,7 @@ void RecorderManager::UpdateRdrEnable() { | |||||
| updated = true; | updated = true; | ||||
| } | } | ||||
| bool RecorderManager::RecordObject(const BaseRecorderPtr &recorder, const bool &replace) { | |||||
| bool RecorderManager::RecordObject(const BaseRecorderPtr &recorder) { | |||||
| if (!rdr_enable_) { | if (!rdr_enable_) { | ||||
| return false; | return false; | ||||
| } | } | ||||
| @@ -55,20 +55,19 @@ bool RecorderManager::RecordObject(const BaseRecorderPtr &recorder, const bool & | |||||
| std::string name = recorder->GetName(); | std::string name = recorder->GetName(); | ||||
| std::pair<std::string, std::string> recorder_key(module, name); | std::pair<std::string, std::string> recorder_key(module, name); | ||||
| std::lock_guard<std::mutex> lock(mtx_); | std::lock_guard<std::mutex> lock(mtx_); | ||||
| if (replace) { | |||||
| recorder_container_[recorder_key] = recorder; | |||||
| return true; | |||||
| } | |||||
| std::unordered_map<std::pair<std::string, std::string>, BaseRecorderPtr, pair_hash>::iterator item = | |||||
| recorder_container_.find(recorder_key); | |||||
| if (item == recorder_container_.end()) { | |||||
| recorder_container_[recorder_key] = recorder; | |||||
| } else { | |||||
| recorder_container_[recorder_key]->UpdateInfo(*recorder); | |||||
| } | |||||
| recorder_container_[recorder_key] = recorder; | |||||
| return true; | return true; | ||||
| } | } | ||||
| BaseRecorderPtr RecorderManager::GetRecorder(std::string module, std::string name) { | |||||
| std::pair<std::string, std::string> recorder_key(module, name); | |||||
| auto item = recorder_container_.find(recorder_key); | |||||
| if (item != recorder_container_.end()) { | |||||
| return item->second; | |||||
| } | |||||
| return nullptr; | |||||
| } | |||||
| void RecorderManager::TriggerAll() { | void RecorderManager::TriggerAll() { | ||||
| if (!rdr_enable_) { | if (!rdr_enable_) { | ||||
| return; | return; | ||||
| @@ -62,7 +62,8 @@ class RecorderManager { | |||||
| void UpdateRdrEnable(); | void UpdateRdrEnable(); | ||||
| bool RdrEnable() const { return rdr_enable_; } | bool RdrEnable() const { return rdr_enable_; } | ||||
| bool RecordObject(const BaseRecorderPtr &recorder, const bool &replace = true); | |||||
| bool RecordObject(const BaseRecorderPtr &recorder); | |||||
| BaseRecorderPtr GetRecorder(std::string module, std::string name); | |||||
| void TriggerAll(); | void TriggerAll(); | ||||
| void ClearAll(); | void ClearAll(); | ||||
| @@ -123,15 +123,30 @@ bool RecordStreamExecOrder(const SubModuleId module, const std::string &name, co | |||||
| return ans; | return ans; | ||||
| } | } | ||||
| bool RecordMemAddressInfo(const SubModuleId module, const std::string &name, const std::string &op_name, | |||||
| const GPUMemInfo &mem_info) { | |||||
| bool RecordGPUMemAddressInfo(const SubModuleId module, const std::string &name, size_t nsize) { | |||||
| if (!mindspore::RecorderManager::Instance().RdrEnable()) { | if (!mindspore::RecorderManager::Instance().RdrEnable()) { | ||||
| return false; | return false; | ||||
| } | } | ||||
| std::string submodule_name = std::string(GetSubModuleName(module)); | std::string submodule_name = std::string(GetSubModuleName(module)); | ||||
| MemAddressRecorderPtr mem_info_recorder = std::make_shared<MemAddressRecorder>(submodule_name, name); | |||||
| mem_info_recorder->SaveMemInfo(op_name, mem_info); | |||||
| bool ans = mindspore::RecorderManager::Instance().RecordObject(std::move(mem_info_recorder), false); | |||||
| GPUMemAddressRecorderPtr mem_info_recorder = std::make_shared<GPUMemAddressRecorder>(submodule_name, name); | |||||
| mem_info_recorder->Reset(nsize); | |||||
| bool ans = mindspore::RecorderManager::Instance().RecordObject(std::move(mem_info_recorder)); | |||||
| return ans; | |||||
| } | |||||
| bool UpdateGPUMemAddressInfo(const SubModuleId module, const std::string &name, const std::string &op_name, | |||||
| const GPUMemInfo &mem_info, size_t id) { | |||||
| if (!mindspore::RecorderManager::Instance().RdrEnable()) { | |||||
| return false; | |||||
| } | |||||
| std::string submodule_name = std::string(GetSubModuleName(module)); | |||||
| auto recorder = mindspore::RecorderManager::Instance().GetRecorder(submodule_name, name); | |||||
| bool ans = false; | |||||
| if (recorder != nullptr) { | |||||
| auto mem_recorder = std::dynamic_pointer_cast<GPUMemAddressRecorder>(recorder); | |||||
| mem_recorder->SaveMemInfo(op_name, mem_info, id); | |||||
| ans = true; | |||||
| } | |||||
| return ans; | return ans; | ||||
| } | } | ||||
| @@ -52,8 +52,9 @@ bool RecordGraphExecOrder(const SubModuleId module, const std::string &name, | |||||
| const std::vector<CNodePtr> &final_exec_order); | const std::vector<CNodePtr> &final_exec_order); | ||||
| bool RecordString(SubModuleId module, const std::string &name, const std::string &data); | bool RecordString(SubModuleId module, const std::string &name, const std::string &data); | ||||
| bool RecordStreamExecOrder(const SubModuleId module, const std::string &name, const std::vector<CNodePtr> &exec_order); | bool RecordStreamExecOrder(const SubModuleId module, const std::string &name, const std::vector<CNodePtr> &exec_order); | ||||
| bool RecordMemAddressInfo(const SubModuleId module, const std::string &name, const std::string &op_name, | |||||
| const GPUMemInfo &mem_info); | |||||
| bool RecordGPUMemAddressInfo(const SubModuleId module, const std::string &name, size_t nsize); | |||||
| bool UpdateGPUMemAddressInfo(const SubModuleId module, const std::string &name, const std::string &op_name, | |||||
| const GPUMemInfo &mem_info, size_t id); | |||||
| #ifdef ENABLE_D | #ifdef ENABLE_D | ||||
| bool RecordTaskDebugInfo(SubModuleId module, const std::string &name, | bool RecordTaskDebugInfo(SubModuleId module, const std::string &name, | ||||
| const std::vector<TaskDebugInfoPtr> &task_debug_info_list); | const std::vector<TaskDebugInfoPtr> &task_debug_info_list); | ||||
| @@ -43,6 +43,7 @@ | |||||
| #endif | #endif | ||||
| #ifdef ENABLE_DUMP_IR | #ifdef ENABLE_DUMP_IR | ||||
| #include "debug/rdr/running_data_recorder.h" | #include "debug/rdr/running_data_recorder.h" | ||||
| #include "debug/rdr/recorder_manager.h" | |||||
| #include "debug/rdr/mem_address_recorder.h" | #include "debug/rdr/mem_address_recorder.h" | ||||
| #endif | #endif | ||||
| @@ -649,8 +650,9 @@ bool GPUKernelRuntime::LaunchKernelDynamic(const session::KernelGraph *graph, bo | |||||
| auto &kernels = graph->execution_order(); | auto &kernels = graph->execution_order(); | ||||
| int exec_order = 1; | int exec_order = 1; | ||||
| #ifdef ENABLE_DUMP_IR | #ifdef ENABLE_DUMP_IR | ||||
| std::string exec_order_name = "graph_exec_order." + std::to_string(graph->graph_id()); | |||||
| mindspore::RDR::RecordGraphExecOrder(SubModuleId::SM_KERNEL, exec_order_name, kernels); | |||||
| std::string name = "mem_address_list"; | |||||
| mindspore::RDR::RecordGPUMemAddressInfo(SubModuleId::SM_KERNEL, name, kernels.size()); | |||||
| size_t id = 0; | |||||
| #endif | #endif | ||||
| auto profiler_inst = profiler::gpu::GPUProfiler::GetInstance(); | auto profiler_inst = profiler::gpu::GPUProfiler::GetInstance(); | ||||
| MS_EXCEPTION_IF_NULL(profiler_inst); | MS_EXCEPTION_IF_NULL(profiler_inst); | ||||
| @@ -694,9 +696,8 @@ bool GPUKernelRuntime::LaunchKernelDynamic(const session::KernelGraph *graph, bo | |||||
| } | } | ||||
| #ifdef ENABLE_DUMP_IR | #ifdef ENABLE_DUMP_IR | ||||
| GPUMemInfo mem_info = {&kernel_inputs, &kernel_workspaces, &kernel_outputs}; | GPUMemInfo mem_info = {&kernel_inputs, &kernel_workspaces, &kernel_outputs}; | ||||
| std::string name = "mem_address_list"; | |||||
| std::string op_name = kernel->fullname_with_scope(); | std::string op_name = kernel->fullname_with_scope(); | ||||
| mindspore::RDR::RecordMemAddressInfo(SubModuleId::SM_KERNEL, name, op_name, mem_info); | |||||
| mindspore::RDR::UpdateGPUMemAddressInfo(SubModuleId::SM_KERNEL, name, op_name, mem_info, id++); | |||||
| #endif | #endif | ||||
| if (!mock) { | if (!mock) { | ||||
| if (!profiling) { | if (!profiling) { | ||||