From b05916b5972a06fd4fbbe48e37525572d69b15f1 Mon Sep 17 00:00:00 2001 From: jjfeing Date: Mon, 30 Nov 2020 09:32:04 +0800 Subject: [PATCH] add trace when bulid operator failed --- .../tbe/tbe_kernel_parallel_build.cc | 15 +++++++++++++-- .../tbe/tbe_kernel_parallel_build.h | 1 + 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_parallel_build.cc b/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_parallel_build.cc index 98e86d815d..e26954f35b 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_parallel_build.cc +++ b/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_parallel_build.cc @@ -28,6 +28,7 @@ #include "backend/kernel_compiler/tbe/tbe_convert_utils.h" #include "backend/kernel_compiler/tbe/tbe_utils.h" #include "backend/kernel_compiler/tbe/tbe_dynaminc_shape_util.h" +#include "utils/trace_base.h" namespace mindspore { namespace kernel { @@ -77,11 +78,13 @@ bool TbeOpParallelBuild(const std::vector &anf_nodes) { std::string build_result; auto ret = build_manger->WaitOne(&task_id, &task_result, &build_result); if (!ret) { - MS_EXCEPTION(ArgumentError) << "Build Failed. wait one ret:" << ret << ", task id:" << task_id; + MS_EXCEPTION(ArgumentError) << "Build Failed. wait one ret:" << ret << ", task id:" << task_id + << " trace: " << trace::DumpSourceLines(build_manger->GetAnfNodeByTaskID(task_id)); } if (task_result != "Success") { - MS_EXCEPTION(ArgumentError) << "task compile Failed, task id:" << task_id << ", cause:" << task_result; + MS_EXCEPTION(ArgumentError) << "task compile Failed, task id:" << task_id << ", cause:" << task_result + << " trace: " << trace::DumpSourceLines(build_manger->GetAnfNodeByTaskID(task_id)); } (void)build_manger->TaskFinishProcess(task_id, build_result); } @@ -248,6 +251,14 @@ void ParallelBuildManager::ResetTaskInfo() { AscendKernelBuildClient::Instance().TbeReset(); } +AnfNodePtr ParallelBuildManager::GetAnfNodeByTaskID(int32_t task_id) { + auto find_iter = task_map_.find(task_id); + if (find_iter != task_map_.end()) { + return find_iter->second.node; + } + return nullptr; +} + std::string ParallelBuildManager::ProcessBuildRetStr(const std::string &build_result) { std::string start_flag = "fusion_pattern_start"; std::string end_flag = "fusion_pattern_end"; diff --git a/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_parallel_build.h b/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_parallel_build.h index 48d9833f06..7928c0df9b 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_parallel_build.h +++ b/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_parallel_build.h @@ -64,6 +64,7 @@ class ParallelBuildManager { static int StartCompileOp(const nlohmann::json &kernel_json); static bool WaitOne(int *task_id, std::string *task_result, std::string *build_result); void ResetTaskInfo(); + AnfNodePtr GetAnfNodeByTaskID(int32_t task_id); private: std::string ProcessBuildRetStr(const std::string &build_result);