Merge pull request !3338 from zhoufeng/debug-op-overflowtags/v0.7.0-beta
| @@ -1,14 +1,16 @@ | |||||
| { | { | ||||
| "DumpSettings": { | "DumpSettings": { | ||||
| "net_name": "ResNet50", | "net_name": "ResNet50", | ||||
| "mode": 1, | |||||
| "dump_mode": 1, | |||||
| "op_debug_mode": 3, | |||||
| "iteration": 0, | "iteration": 0, | ||||
| "kernels": ["Default/Conv2D-op2", "Default/TensorAdd-op10"] | "kernels": ["Default/Conv2D-op2", "Default/TensorAdd-op10"] | ||||
| }, | }, | ||||
| "DumpSettingsSpec": { | "DumpSettingsSpec": { | ||||
| "net_name": "net name eg:ResNet50", | "net_name": "net name eg:ResNet50", | ||||
| "mode": "0: dump all kernels, 1: dump kernels in kernels list", | |||||
| "dump_mode": "0: dump all kernels, 1: dump kernels in kernels list", | |||||
| "op_debug_mode": "0: close debug, 1: debug ai-core overflow, 2: debug atomic overflow, 3: debug all overflow", | |||||
| "iteration": "specified iteration ", | "iteration": "specified iteration ", | ||||
| "kernels": "op's full scope name which need to be dump" | "kernels": "op's full scope name which need to be dump" | ||||
| } | } | ||||
| @@ -1 +1 @@ | |||||
| Subproject commit 103f2d1019dc50d781d7a964551d9f1f50b3b009 | |||||
| Subproject commit 6d12411003164d88eaed62e1ead33761cbfa15ef | |||||
| @@ -20,9 +20,15 @@ | |||||
| #include "utils/context/ms_context.h" | #include "utils/context/ms_context.h" | ||||
| #include "debug/common.h" | #include "debug/common.h" | ||||
| constexpr auto kDataDumpConfigPtah = "DATA_DUMP_CONFIG_PATH"; | |||||
| constexpr auto kEnableDataDump = "ENABLE_DATA_DUMP"; | |||||
| constexpr auto kDataDumpPath = "DATA_DUMP_PATH"; | |||||
| static constexpr auto kDataDumpConfigPtah = "DATA_DUMP_CONFIG_PATH"; | |||||
| static constexpr auto kEnableDataDump = "ENABLE_DATA_DUMP"; | |||||
| static constexpr auto kDataDumpPath = "DATA_DUMP_PATH"; | |||||
| static constexpr auto kConfigDumpMode = "dump_mode"; | |||||
| static constexpr auto kConfigOpDebugMode = "op_debug_mode"; | |||||
| static constexpr auto kConfigNetName = "net_name"; | |||||
| static constexpr auto kConfigIteration = "iteration"; | |||||
| static constexpr auto kConfigKernels = "kernels"; | |||||
| namespace mindspore { | namespace mindspore { | ||||
| void DataDumpParser::ResetParam() { | void DataDumpParser::ResetParam() { | ||||
| enable_ = false; | enable_ = false; | ||||
| @@ -132,8 +138,11 @@ bool DataDumpParser::NeedDump(const std::string &op_full_name) const { | |||||
| } | } | ||||
| bool DataDumpParser::IsConfigExist(const nlohmann::json &dump_settings) const { | bool DataDumpParser::IsConfigExist(const nlohmann::json &dump_settings) const { | ||||
| if (dump_settings.find("mode") == dump_settings.end() || dump_settings.find("net_name") == dump_settings.end() || | |||||
| dump_settings.find("iteration") == dump_settings.end() || dump_settings.find("kernels") == dump_settings.end()) { | |||||
| if (dump_settings.find(kConfigDumpMode) == dump_settings.end() || | |||||
| dump_settings.find(kConfigNetName) == dump_settings.end() || | |||||
| dump_settings.find(kConfigOpDebugMode) == dump_settings.end() || | |||||
| dump_settings.find(kConfigIteration) == dump_settings.end() || | |||||
| dump_settings.find(kConfigKernels) == dump_settings.end()) { | |||||
| MS_LOG(ERROR) << "[DataDump] DumpSettings keys are not exist."; | MS_LOG(ERROR) << "[DataDump] DumpSettings keys are not exist."; | ||||
| return false; | return false; | ||||
| } | } | ||||
| @@ -141,10 +150,11 @@ bool DataDumpParser::IsConfigExist(const nlohmann::json &dump_settings) const { | |||||
| } | } | ||||
| bool DataDumpParser::ParseDumpSetting(const nlohmann::json &dump_settings) { | bool DataDumpParser::ParseDumpSetting(const nlohmann::json &dump_settings) { | ||||
| auto mode = dump_settings.at("mode"); | |||||
| auto net_name = dump_settings.at("net_name"); | |||||
| auto iteration = dump_settings.at("iteration"); | |||||
| auto kernels = dump_settings.at("kernels"); | |||||
| auto mode = dump_settings.at(kConfigDumpMode); | |||||
| auto op_debug_mode = dump_settings.at(kConfigOpDebugMode); | |||||
| auto net_name = dump_settings.at(kConfigNetName); | |||||
| auto iteration = dump_settings.at(kConfigIteration); | |||||
| auto kernels = dump_settings.at(kConfigKernels); | |||||
| if (!(mode.is_number() && net_name.is_string() && iteration.is_number() && kernels.is_array())) { | if (!(mode.is_number() && net_name.is_string() && iteration.is_number() && kernels.is_array())) { | ||||
| MS_LOG(ERROR) << "[DataDump] Element's type in Dump config json is invalid."; | MS_LOG(ERROR) << "[DataDump] Element's type in Dump config json is invalid."; | ||||
| enable_ = false; | enable_ = false; | ||||
| @@ -155,6 +165,7 @@ bool DataDumpParser::ParseDumpSetting(const nlohmann::json &dump_settings) { | |||||
| auto context_ptr = MsContext::GetInstance(); | auto context_ptr = MsContext::GetInstance(); | ||||
| MS_EXCEPTION_IF_NULL(context_ptr); | MS_EXCEPTION_IF_NULL(context_ptr); | ||||
| dump_mode_ = mode; | dump_mode_ = mode; | ||||
| op_debug_mode_ = op_debug_mode; | |||||
| net_name_ = net_name; | net_name_ = net_name; | ||||
| dump_step_ = iteration; | dump_step_ = iteration; | ||||
| for (const auto &kernel : kernels) { | for (const auto &kernel : kernels) { | ||||
| @@ -38,6 +38,7 @@ class DataDumpParser { | |||||
| bool enable() const { return enable_; } | bool enable() const { return enable_; } | ||||
| const std::string &net_name() const { return net_name_; } | const std::string &net_name() const { return net_name_; } | ||||
| uint32_t dump_mode() const { return dump_mode_; } | uint32_t dump_mode() const { return dump_mode_; } | ||||
| uint32_t op_debug_mode() const { return op_debug_mode_; } | |||||
| uint32_t dump_step() const { return dump_step_; } | uint32_t dump_step() const { return dump_step_; } | ||||
| void MatchKernel(const std::string &kernel_name); | void MatchKernel(const std::string &kernel_name); | ||||
| void PrintUnusedKernel(); | void PrintUnusedKernel(); | ||||
| @@ -54,6 +55,7 @@ class DataDumpParser { | |||||
| std::mutex lock_; | std::mutex lock_; | ||||
| bool enable_{false}; | bool enable_{false}; | ||||
| std::string net_name_; | std::string net_name_; | ||||
| uint32_t op_debug_mode_{0}; | |||||
| uint32_t dump_mode_{0}; | uint32_t dump_mode_{0}; | ||||
| uint32_t dump_step_{0}; | uint32_t dump_step_{0}; | ||||
| std::map<std::string, uint32_t> kernel_map_; | std::map<std::string, uint32_t> kernel_map_; | ||||
| @@ -97,7 +97,10 @@ AscendKernelRuntime::~AscendKernelRuntime() { graph_model_map_.clear(); } | |||||
| void AscendKernelRuntime::ClearGraphModelMap() { | void AscendKernelRuntime::ClearGraphModelMap() { | ||||
| for (auto &iter : graph_data_dumper_) { | for (auto &iter : graph_data_dumper_) { | ||||
| MS_LOG(INFO) << "[DataDump] Unload data dumper:" << iter.first; | MS_LOG(INFO) << "[DataDump] Unload data dumper:" << iter.first; | ||||
| iter.second->UnloadDumpInfo(); | |||||
| auto &data_dumper = iter.second; | |||||
| MS_EXCEPTION_IF_NULL(data_dumper); | |||||
| data_dumper->UnloadDumpInfo(); | |||||
| data_dumper->OpDebugUnregister(); | |||||
| } | } | ||||
| graph_data_dumper_.clear(); | graph_data_dumper_.clear(); | ||||
| // tell users which dump kernel name not used | // tell users which dump kernel name not used | ||||
| @@ -113,18 +116,29 @@ void AscendKernelRuntime::ClearGraphModelMap() { | |||||
| } | } | ||||
| void AscendKernelRuntime::ClearGraphRuntimeResource(uint32_t graph_id) { | void AscendKernelRuntime::ClearGraphRuntimeResource(uint32_t graph_id) { | ||||
| MS_LOG(DEBUG) << "Clear graph:" << graph_id << " runtime resource"; | |||||
| auto iter = graph_model_map_.find(graph_id); | |||||
| if (iter == graph_model_map_.end()) { | |||||
| MS_LOG(DEBUG) << "Clear graph:" << graph_id << " data dumper"; | |||||
| if (auto dumper_iter = graph_data_dumper_.find(graph_id); dumper_iter != graph_data_dumper_.end()) { | |||||
| MS_LOG(DEBUG) << "Unload dump info " << graph_id; | |||||
| auto &data_dumper = dumper_iter->second; | |||||
| MS_EXCEPTION_IF_NULL(data_dumper); | |||||
| data_dumper->UnloadDumpInfo(); | |||||
| data_dumper->OpDebugUnregister(); | |||||
| graph_data_dumper_.erase(dumper_iter); | |||||
| } else { | |||||
| MS_LOG(DEBUG) << "GraphId:" << graph_id << " not found"; | MS_LOG(DEBUG) << "GraphId:" << graph_id << " not found"; | ||||
| return; | |||||
| } | } | ||||
| MS_LOG(DEBUG) << "Ge UnloadModel " << iter->first; | |||||
| auto ret = ModelRunner::Instance().UnloadModel(iter->first); | |||||
| if (!ret) { | |||||
| MS_LOG(ERROR) << "UnloadModel failed"; | |||||
| MS_LOG(DEBUG) << "Clear graph:" << graph_id << " runtime resource"; | |||||
| if (auto model_iter = graph_model_map_.find(graph_id); model_iter != graph_model_map_.end()) { | |||||
| MS_LOG(DEBUG) << "Ge UnloadModel " << graph_id; | |||||
| auto ret = ModelRunner::Instance().UnloadModel(graph_id); | |||||
| if (!ret) { | |||||
| MS_LOG(ERROR) << "UnloadModel failed"; | |||||
| } | |||||
| graph_model_map_.erase(model_iter); | |||||
| } else { | |||||
| MS_LOG(DEBUG) << "GraphId:" << graph_id << " not found"; | |||||
| } | } | ||||
| graph_model_map_.erase(iter); | |||||
| } | } | ||||
| bool AscendKernelRuntime::NeedDestroyHccl() { | bool AscendKernelRuntime::NeedDestroyHccl() { | ||||
| @@ -505,15 +519,25 @@ bool AscendKernelRuntime::LoadTask(const session::KernelGraph *graph) { | |||||
| bool status = | bool status = | ||||
| ModelRunner::Instance().LoadDavinciModel(device_id_, 0, model_iter->first, model_iter->second, listener); | ModelRunner::Instance().LoadDavinciModel(device_id_, 0, model_iter->first, model_iter->second, listener); | ||||
| if (!status) { | if (!status) { | ||||
| MS_LOG(EXCEPTION) << "Load Task Failed"; | |||||
| MS_LOG(EXCEPTION) << "Load Model Failed"; | |||||
| } | |||||
| std::function<void *()> model_handle = | |||||
| std::bind(&ModelRunner::GetModelHandle, &ModelRunner::Instance(), model_iter->first); | |||||
| DistributeDebugTask(NOT_NULL(graph), NOT_NULL(model_handle)); | |||||
| status = ModelRunner::Instance().DistributeTask(model_iter->first); | |||||
| if (!status) { | |||||
| MS_LOG(EXCEPTION) << "Distribute Task Failed"; | |||||
| } | } | ||||
| if (ProfilingManager::GetInstance().IsProfiling()) { | if (ProfilingManager::GetInstance().IsProfiling()) { | ||||
| auto task_ids = ModelRunner::Instance().GetTaskIdList(model_iter->first); | auto task_ids = ModelRunner::Instance().GetTaskIdList(model_iter->first); | ||||
| auto stream_ids = ModelRunner::Instance().GetStreamIdList(model_iter->first); | auto stream_ids = ModelRunner::Instance().GetStreamIdList(model_iter->first); | ||||
| ProfilingUtils::ReportProfilingData(task_ids, stream_ids, NOT_NULL(graph)); | ProfilingUtils::ReportProfilingData(task_ids, stream_ids, NOT_NULL(graph)); | ||||
| } | } | ||||
| LaunchDataDump(NOT_NULL(graph)); | |||||
| LaunchDataDump(graph->graph_id()); | |||||
| if (!ModelRunner::Instance().LoadModelComplete(model_iter->first)) { | if (!ModelRunner::Instance().LoadModelComplete(model_iter->first)) { | ||||
| MS_LOG(ERROR) << "Call ge runtime LoadModelComplete failed"; | MS_LOG(ERROR) << "Call ge runtime LoadModelComplete failed"; | ||||
| @@ -522,20 +546,35 @@ bool AscendKernelRuntime::LoadTask(const session::KernelGraph *graph) { | |||||
| return true; | return true; | ||||
| } | } | ||||
| void AscendKernelRuntime::LaunchDataDump(NotNull<const session::KernelGraph *> graph) { | |||||
| void AscendKernelRuntime::DistributeDebugTask(NotNull<const session::KernelGraph *> graph, | |||||
| NotNull<std::function<void *()>> model_handle) { | |||||
| if (!DataDumpParser::GetInstance().DumpEnabled()) { | if (!DataDumpParser::GetInstance().DumpEnabled()) { | ||||
| return; | return; | ||||
| } | } | ||||
| auto runtime_info_map = ModelRunner::Instance().GetRuntimeInfoMap(graph->graph_id()); | |||||
| auto data_dumper = std::make_shared<DataDumper>(graph.get(), runtime_info_map); | |||||
| auto data_dumper = std::make_shared<DataDumper>(graph.get(), model_handle); | |||||
| MS_EXCEPTION_IF_NULL(data_dumper); | MS_EXCEPTION_IF_NULL(data_dumper); | ||||
| data_dumper->LoadDumpInfo(); | |||||
| auto ret = graph_data_dumper_.try_emplace(graph->graph_id(), data_dumper); | auto ret = graph_data_dumper_.try_emplace(graph->graph_id(), data_dumper); | ||||
| data_dumper->OpDebugRegister(); | |||||
| if (!ret.second) { | if (!ret.second) { | ||||
| MS_LOG(WARNING) << "[DataDump] Insert graphId:" << graph->graph_id() << " data dumper failed"; | MS_LOG(WARNING) << "[DataDump] Insert graphId:" << graph->graph_id() << " data dumper failed"; | ||||
| } | } | ||||
| } | } | ||||
| void AscendKernelRuntime::LaunchDataDump(GraphId graph_id) { | |||||
| if (!DataDumpParser::GetInstance().DumpEnabled()) { | |||||
| return; | |||||
| } | |||||
| auto runtime_info_map = ModelRunner::Instance().GetRuntimeInfoMap(graph_id); | |||||
| if (auto dumper_iter = graph_data_dumper_.find(graph_id); dumper_iter != graph_data_dumper_.end()) { | |||||
| auto &data_dumper = dumper_iter->second; | |||||
| MS_EXCEPTION_IF_NULL(data_dumper); | |||||
| data_dumper->set_runtime_info(runtime_info_map); | |||||
| data_dumper->LoadDumpInfo(); | |||||
| } else { | |||||
| MS_LOG(EXCEPTION) << "GraphId:" << graph_id << " not found"; | |||||
| } | |||||
| } | |||||
| void AscendKernelRuntime::DebugTaskIdName(GraphId graph_id) { | void AscendKernelRuntime::DebugTaskIdName(GraphId graph_id) { | ||||
| auto runtime_info_map = ModelRunner::Instance().GetRuntimeInfoMap(graph_id); | auto runtime_info_map = ModelRunner::Instance().GetRuntimeInfoMap(graph_id); | ||||
| for (auto iter : runtime_info_map) { | for (auto iter : runtime_info_map) { | ||||
| @@ -63,12 +63,13 @@ class AscendKernelRuntime : public KernelRuntime { | |||||
| bool GraphWithEmptyTaskList(const session::KernelGraph *graph) const; | bool GraphWithEmptyTaskList(const session::KernelGraph *graph) const; | ||||
| bool CheckGraphIdValid(GraphId graph_id) const; | bool CheckGraphIdValid(GraphId graph_id) const; | ||||
| static void DebugTaskIdName(GraphId graph_id); | static void DebugTaskIdName(GraphId graph_id); | ||||
| void DistributeDebugTask(NotNull<const session::KernelGraph *> graph, NotNull<std::function<void *()>> model_handle); | |||||
| void LaunchDataDump(GraphId graph_id); | |||||
| rtContext_t rt_context_{nullptr}; | rtContext_t rt_context_{nullptr}; | ||||
| bool initialized_{false}; | bool initialized_{false}; | ||||
| unordered_map<GraphId, vector<std::shared_ptr<TaskInfo>>> task_map_; | unordered_map<GraphId, vector<std::shared_ptr<TaskInfo>>> task_map_; | ||||
| unordered_map<GraphId, std::shared_ptr<ge::model_runner::DavinciModel>> graph_model_map_; | unordered_map<GraphId, std::shared_ptr<ge::model_runner::DavinciModel>> graph_model_map_; | ||||
| void LaunchDataDump(NotNull<const session::KernelGraph *> graph); | |||||
| unordered_map<GraphId, std::shared_ptr<DataDumper>> graph_data_dumper_; | unordered_map<GraphId, std::shared_ptr<DataDumper>> graph_data_dumper_; | ||||
| }; | }; | ||||
| @@ -22,36 +22,53 @@ | |||||
| #include "backend/session/anf_runtime_algorithm.h" | #include "backend/session/anf_runtime_algorithm.h" | ||||
| #include "runtime/mem.h" | #include "runtime/mem.h" | ||||
| #include "runtime/kernel.h" | #include "runtime/kernel.h" | ||||
| #include "runtime/rt_model.h" | |||||
| #include "runtime/device/ascend/dump/ge_dump.h" | #include "runtime/device/ascend/dump/ge_dump.h" | ||||
| #include "proto/op_mapping_info.pb.h" | #include "proto/op_mapping_info.pb.h" | ||||
| #include "utils/context/ms_context.h" | #include "utils/context/ms_context.h" | ||||
| #include "debug/data_dump_parser.h" | #include "debug/data_dump_parser.h" | ||||
| constexpr uint32_t kAicpuLoadFlag = 1; | |||||
| constexpr uint32_t kAicpuUnloadFlag = 0; | |||||
| constexpr uint32_t kTupleTaskId = 0; | |||||
| constexpr uint32_t kTupleStreamId = 1; | |||||
| constexpr uint32_t kTupleArgs = 2; | |||||
| constexpr uint32_t kCurrentStepTensorIndex = 0; | |||||
| constexpr uint32_t kCurrentEpochTensorIndex = 1; | |||||
| constexpr uint32_t kStepsPerEpochTensorIndex = 2; | |||||
| static constexpr uint32_t kAicpuLoadFlag = 1; | |||||
| static constexpr uint32_t kAicpuUnloadFlag = 0; | |||||
| static constexpr uint32_t kTupleTaskId = 0; | |||||
| static constexpr uint32_t kTupleStreamId = 1; | |||||
| static constexpr uint32_t kTupleArgs = 2; | |||||
| static constexpr uint32_t kCurrentStepTensorIndex = 0; | |||||
| static constexpr uint32_t kCurrentEpochTensorIndex = 1; | |||||
| static constexpr uint32_t kStepsPerEpochTensorIndex = 2; | |||||
| static constexpr uint64_t kOpDebugShape = 2048; | |||||
| static constexpr uint64_t kOpDebugHostMemSize = 2048; | |||||
| static constexpr uint64_t kOpDebugDevMemSize = sizeof(void *); | |||||
| static constexpr uint8_t kNoOverflow = 0; | |||||
| static constexpr uint8_t kAiCoreOverflow = (0x1 << 0); | |||||
| static constexpr uint8_t kAtomicOverflow = (0x1 << 1); | |||||
| static constexpr uint8_t kAllOverflow = (kAiCoreOverflow | kAtomicOverflow); | |||||
| static const std::map<uint32_t, std::string> kOverflowModeStr = {{kNoOverflow, "NoOverflow"}, | |||||
| {kAiCoreOverflow, "AiCoreOverflow"}, | |||||
| {kAtomicOverflow, "AtomicOverflow"}, | |||||
| {kAllOverflow, "AllOverflow"}}; | |||||
| constexpr const char *kNodeNameOpDebug = "Node_OpDebug"; | |||||
| constexpr const char *kOpTypeOpDebug = "Opdebug"; | |||||
| namespace mindspore { | namespace mindspore { | ||||
| namespace device { | namespace device { | ||||
| namespace ascend { | namespace ascend { | ||||
| void DumpKernelOutput(const CNodePtr &kernel, void *args, NotNull<aicpu::dump::Task *> task); | |||||
| void DumpKernelInput(const CNodePtr &kernel, void *args, NotNull<aicpu::dump::Task *> task); | |||||
| void RtLoadDumpData(const aicpu::dump::OpMappingInfo &dump_info, void **ptr); | |||||
| static void DumpKernelOutput(const CNodePtr &kernel, void *args, NotNull<aicpu::dump::Task *> task); | |||||
| static void DumpKernelInput(const CNodePtr &kernel, void *args, NotNull<aicpu::dump::Task *> task); | |||||
| static void RtLoadDumpData(const aicpu::dump::OpMappingInfo &dump_info, void **ptr); | |||||
| DataDumper::~DataDumper() { | DataDumper::~DataDumper() { | ||||
| ReleaseDevMem(&dev_load_mem_); | ReleaseDevMem(&dev_load_mem_); | ||||
| ReleaseDevMem(&dev_unload_mem_); | ReleaseDevMem(&dev_unload_mem_); | ||||
| ReleaseDevMem(&op_debug_buffer_addr_); | |||||
| ReleaseDevMem(&op_debug_dump_args_); | |||||
| } | } | ||||
| void DataDumper::LoadDumpInfo() { | void DataDumper::LoadDumpInfo() { | ||||
| MS_LOG(INFO) << "[DataDump] LoadDumpInfo start"; | MS_LOG(INFO) << "[DataDump] LoadDumpInfo start"; | ||||
| MS_EXCEPTION_IF_NULL(kernel_graph_); | MS_EXCEPTION_IF_NULL(kernel_graph_); | ||||
| aicpu::dump::OpMappingInfo dump_info; | aicpu::dump::OpMappingInfo dump_info; | ||||
| SetOpDebugMappingInfo(NOT_NULL(&dump_info)); | |||||
| SetOpMappingInfo(NOT_NULL(&dump_info)); | SetOpMappingInfo(NOT_NULL(&dump_info)); | ||||
| auto kernels = kernel_graph_->execution_order(); | auto kernels = kernel_graph_->execution_order(); | ||||
| @@ -134,7 +151,7 @@ bool DataDumper::KernelNeedDump(const CNodePtr &kernel) const { | |||||
| void DataDumper::UnloadDumpInfo() { | void DataDumper::UnloadDumpInfo() { | ||||
| if (!load_flag_) { | if (!load_flag_) { | ||||
| MS_LOG(WARNING) << "Load not success, no need to unload"; | |||||
| MS_LOG(WARNING) << "[DataDump] Load not success, no need to unload"; | |||||
| return; | return; | ||||
| } | } | ||||
| MS_LOG(INFO) << "[DataDump] UnloadDumpInfo start. graphId:" << graph_id_; | MS_LOG(INFO) << "[DataDump] UnloadDumpInfo start. graphId:" << graph_id_; | ||||
| @@ -194,6 +211,84 @@ void DataDumper::ConstructDumpTask(NotNull<const CNodePtr &> kernel, NotNull<aic | |||||
| DumpKernelInput(kernel, args, dump_task); | DumpKernelInput(kernel, args, dump_task); | ||||
| } | } | ||||
| void DataDumper::SetOpDebugMappingInfo(NotNull<aicpu::dump::OpMappingInfo *> dump_info) const { | |||||
| MS_LOG(INFO) << "[DataDump] Add op debug info to OpMappingInfo, task id = " << debug_task_id_ | |||||
| << ", stream id = " << debug_stream_id_; | |||||
| aicpu::dump::Task task; | |||||
| task.set_end_graph(false); | |||||
| task.set_task_id(debug_task_id_); | |||||
| task.set_stream_id(debug_stream_id_); | |||||
| task.mutable_op()->set_op_name(kNodeNameOpDebug); | |||||
| task.mutable_op()->set_op_type(kOpTypeOpDebug); | |||||
| aicpu::dump::Output output; | |||||
| output.set_data_type(ge::proto::DataType::DT_UINT8); | |||||
| output.set_format(GeFormat::kFormat_ND); | |||||
| output.mutable_shape()->add_dim(kOpDebugShape); | |||||
| output.set_original_name(kNodeNameOpDebug); | |||||
| output.set_original_output_index(0); | |||||
| output.set_original_output_format(GeFormat::kFormat_ND); | |||||
| output.set_original_output_data_type(ge::proto::DataType::DT_UINT8); | |||||
| // due to lhisi virtual addr bug, cannot use args now | |||||
| output.set_address(static_cast<uint64_t>(reinterpret_cast<uintptr_t>(op_debug_dump_args_))); | |||||
| output.set_size(kOpDebugHostMemSize); | |||||
| task.mutable_output()->Add(std::move(output)); | |||||
| dump_info->mutable_task()->Add(std::move(task)); | |||||
| } | |||||
| void DataDumper::OpDebugRegister() { | |||||
| uint32_t op_debug_mode = DataDumpParser::GetInstance().op_debug_mode(); | |||||
| auto iter = kOverflowModeStr.find(op_debug_mode); | |||||
| if (iter == kOverflowModeStr.end()) { | |||||
| MS_LOG(EXCEPTION) << "Invalid op debug mode " << op_debug_mode; | |||||
| } | |||||
| MS_LOG(INFO) << "[DataDump] Op debug mode is " << iter->second; | |||||
| if (op_debug_mode == kNoOverflow) { | |||||
| return; | |||||
| } | |||||
| rtError_t rt_ret = rtMalloc(&op_debug_buffer_addr_, kOpDebugHostMemSize, RT_MEMORY_DDR); | |||||
| if (rt_ret != RT_ERROR_NONE) { | |||||
| MS_LOG(EXCEPTION) << "[DataDump] Call rtMalloc failed, ret = " << rt_ret; | |||||
| } | |||||
| rt_ret = rtMalloc(&op_debug_dump_args_, kOpDebugDevMemSize, RT_MEMORY_HBM); | |||||
| if (rt_ret != RT_ERROR_NONE) { | |||||
| MS_LOG(EXCEPTION) << "[DataDump] Call rtMalloc failed, ret = " << rt_ret; | |||||
| } | |||||
| rt_ret = | |||||
| rtMemcpy(op_debug_dump_args_, sizeof(void *), &op_debug_buffer_addr_, sizeof(void *), RT_MEMCPY_HOST_TO_DEVICE); | |||||
| if (rt_ret != RT_ERROR_NONE) { | |||||
| MS_LOG(EXCEPTION) << "[DataDump] Call rtMemcpy failed, ret = " << rt_ret; | |||||
| } | |||||
| rt_ret = rtDebugRegister(model_handle_(), op_debug_mode, op_debug_buffer_addr_, &debug_stream_id_, &debug_task_id_); | |||||
| if (rt_ret != RT_ERROR_NONE) { | |||||
| MS_LOG(EXCEPTION) << "[DataDump] Call rtDebugRegister failed, ret = " << rt_ret; | |||||
| } | |||||
| MS_LOG(INFO) << "[DataDump] Distribute op debug task, task id = " << debug_task_id_ | |||||
| << ", stream id = " << debug_stream_id_; | |||||
| } | |||||
| void DataDumper::OpDebugUnregister() { | |||||
| uint32_t op_debug_mode = DataDumpParser::GetInstance().op_debug_mode(); | |||||
| if (op_debug_mode == kNoOverflow) { | |||||
| MS_LOG(INFO) << "[DataDump] Op debug mode is no overflow, no need to unregister."; | |||||
| return; | |||||
| } | |||||
| MS_LOG(INFO) << "[DataDump] Start."; | |||||
| rtError_t rt_ret = rtDebugUnRegister(model_handle_()); | |||||
| if (rt_ret != RT_ERROR_NONE) { | |||||
| MS_LOG(EXCEPTION) << "[DataDump] Call rtDebugUnRegister failed, ret = " << rt_ret; | |||||
| } | |||||
| } | |||||
| void RtLoadDumpData(const aicpu::dump::OpMappingInfo &dump_info, void **ptr) { | void RtLoadDumpData(const aicpu::dump::OpMappingInfo &dump_info, void **ptr) { | ||||
| std::string proto_str; | std::string proto_str; | ||||
| size_t proto_size = dump_info.ByteSizeLong(); | size_t proto_size = dump_info.ByteSizeLong(); | ||||
| @@ -21,6 +21,7 @@ | |||||
| #include <memory> | #include <memory> | ||||
| #include <string> | #include <string> | ||||
| #include <vector> | #include <vector> | ||||
| #include <functional> | |||||
| #include "backend/session/kernel_graph.h" | #include "backend/session/kernel_graph.h" | ||||
| namespace aicpu { | namespace aicpu { | ||||
| @@ -36,25 +37,38 @@ namespace ascend { | |||||
| using RuntimeInfo = std::tuple<uint32_t, uint32_t, void *>; | using RuntimeInfo = std::tuple<uint32_t, uint32_t, void *>; | ||||
| class DataDumper { | class DataDumper { | ||||
| public: | public: | ||||
| DataDumper(const session::KernelGraph *kernel_graph, | |||||
| const std::map<std::string, std::shared_ptr<RuntimeInfo>> &runtime_info_map) | |||||
| : load_flag_(false), | |||||
| DataDumper(const session::KernelGraph *kernel_graph, NotNull<std::function<void *()>> model_handle) | |||||
| : model_handle_(model_handle), | |||||
| debug_task_id_(-1), | |||||
| debug_stream_id_(-1), | |||||
| op_debug_buffer_addr_(nullptr), | |||||
| op_debug_dump_args_(nullptr), | |||||
| load_flag_(false), | |||||
| dev_load_mem_(nullptr), | dev_load_mem_(nullptr), | ||||
| dev_unload_mem_(nullptr), | dev_unload_mem_(nullptr), | ||||
| graph_id_(UINT32_MAX), | graph_id_(UINT32_MAX), | ||||
| kernel_graph_(kernel_graph), | |||||
| runtime_info_map_(runtime_info_map) {} | |||||
| kernel_graph_(kernel_graph) {} | |||||
| ~DataDumper(); | ~DataDumper(); | ||||
| void set_runtime_info(const std::map<std::string, std::shared_ptr<RuntimeInfo>> &runtime_info) { | |||||
| runtime_info_map_ = runtime_info; | |||||
| } | |||||
| void LoadDumpInfo(); | void LoadDumpInfo(); | ||||
| void UnloadDumpInfo(); | void UnloadDumpInfo(); | ||||
| void OpDebugRegister(); | |||||
| void OpDebugUnregister(); | |||||
| private: | private: | ||||
| void ReleaseDevMem(void **ptr) const; | void ReleaseDevMem(void **ptr) const; | ||||
| bool KernelNeedDump(const CNodePtr &kernel) const; | bool KernelNeedDump(const CNodePtr &kernel) const; | ||||
| void SetOpMappingInfo(NotNull<aicpu::dump::OpMappingInfo *> dump_info) const; | void SetOpMappingInfo(NotNull<aicpu::dump::OpMappingInfo *> dump_info) const; | ||||
| void SetOpDebugMappingInfo(NotNull<aicpu::dump::OpMappingInfo *> dump_info) const; | |||||
| void ConstructDumpTask(NotNull<const CNodePtr &> kernel, NotNull<aicpu::dump::Task *> dump_task) const; | void ConstructDumpTask(NotNull<const CNodePtr &> kernel, NotNull<aicpu::dump::Task *> dump_task) const; | ||||
| std::function<void *()> model_handle_; | |||||
| uint32_t debug_task_id_; | |||||
| uint32_t debug_stream_id_; | |||||
| void *op_debug_buffer_addr_; | |||||
| void *op_debug_dump_args_; | |||||
| bool load_flag_; | bool load_flag_; | ||||
| void *dev_load_mem_; | void *dev_load_mem_; | ||||
| void *dev_unload_mem_; | void *dev_unload_mem_; | ||||
| @@ -38,6 +38,10 @@ bool ModelRunner::RunModel(uint32_t model_id, const ge::InputData &input_data, g | |||||
| return true; | return true; | ||||
| } | } | ||||
| void *ModelRunner::GetModelHandle(uint32_t model_id) const { return nullptr; } | |||||
| bool ModelRunner::DistributeTask(uint32_t model_id) { return true; } | |||||
| const std::vector<uint32_t> &ModelRunner::GetTaskIdList(uint32_t model_id) const { | const std::vector<uint32_t> &ModelRunner::GetTaskIdList(uint32_t model_id) const { | ||||
| static std::vector<uint32_t> task_id_list; | static std::vector<uint32_t> task_id_list; | ||||
| return task_id_list; | return task_id_list; | ||||
| @@ -28,6 +28,8 @@ bool TaskGenerator::GenTasks(const std::vector<CNodePtr> &anf_node_list, std::ve | |||||
| } // namespace tasksink | } // namespace tasksink | ||||
| void DataDumper::LoadDumpInfo() {} | void DataDumper::LoadDumpInfo() {} | ||||
| void DataDumper::UnloadDumpInfo() {} | void DataDumper::UnloadDumpInfo() {} | ||||
| void DataDumper::OpDebugRegister() {} | |||||
| void DataDumper::OpDebugUnregister() {} | |||||
| DataDumper::~DataDumper() {} | DataDumper::~DataDumper() {} | ||||
| } // namespace ascend | } // namespace ascend | ||||
| } // namespace device | } // namespace device | ||||