From 1d3385f9943d4c9b9f7e3e5d5b73a8d8e29b1c88 Mon Sep 17 00:00:00 2001 From: liudingyan Date: Fri, 12 Mar 2021 16:42:09 +0800 Subject: [PATCH 1/2] modify geloge and report errmsg --- ge/omm/csa_interact.cc | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/ge/omm/csa_interact.cc b/ge/omm/csa_interact.cc index 1b33ddbd..98e23a5b 100644 --- a/ge/omm/csa_interact.cc +++ b/ge/omm/csa_interact.cc @@ -79,6 +79,7 @@ Status CsaInteract::WriteJobState(JobState job_state, JobSubState job_sub_state, ErrorModule error_module) { if (!is_init_) { GELOGE(INTERNAL_ERROR, "CsaInteract has not init, can't WriteJobState"); + REPORT_INNER_ERROR("E19999", "CsaInteract has not init, can't WriteJobState"); return INTERNAL_ERROR; } if ((curr_state_ == JOBSTATE_FAILED) || (curr_state_ == JOBSTATE_KILLED)) { @@ -107,7 +108,9 @@ Status CsaInteract::WriteJobState(JobState job_state, JobSubState job_sub_state, content = content_json.dump(); } catch (const nlohmann::json::exception &e) { + GELOGE(INTERNAL_ERROR, "construct json object failed."); GELOGE(INTERNAL_ERROR, "build jobstate content json string failed, exception:%s job_state:%u", e.what(), job_state); + REPORT_INNER_ERROR("E19999", "construct json object failed. exception:%s job_state:%u", e.what(), job_state); return INTERNAL_ERROR; } @@ -169,6 +172,7 @@ void CsaInteract::WriteInternalErrorCode() { Status CsaInteract::WriteHcomDetection(const std::string &content) { if (!is_init_) { GELOGE(INTERNAL_ERROR, "CsaInteract has not init, can't WriteJobState"); + REPORT_INNER_ERROR("E19999", "WriteHcomDetection failed. CsaInteract has not init, can't WriteJobState"); return INTERNAL_ERROR; } @@ -192,28 +196,38 @@ Status CsaInteract::WriteFile(const std::string &file_name, const std::string &c int32_t fd = mmOpen2(file_name.c_str(), flags, M_IRUSR | M_IWUSR | M_UMASK_GRPREAD); if (fd == EN_ERROR) { if (MakePath(file_name) != SUCCESS) { + GELOGE(INTERNAL_ERROR, "MakePath failed."); GELOGE(INTERNAL_ERROR, "csainteract create file path fail, errno is %d", errno); + REPORT_CALL_ERROR("E19999", "MakePath failed. create file path fail, errno is %d", errno); return INTERNAL_ERROR; } fd = mmOpen2(file_name.c_str(), flags, M_IRUSR | M_IWUSR | M_UMASK_GRPREAD); if (fd == EN_ERROR) { + GELOGE(INTERNAL_ERROR, "mmOpen2 failed."); GELOGE(INTERNAL_ERROR, "open file fail, errno is %d", errno); + REPORT_CALL_ERROR("E19999", "mmOpen2 failed. open file fail, errno is %d", errno); return INTERNAL_ERROR; } } mmSsize_t ret = mmWrite(fd, reinterpret_cast(const_cast(content.c_str())), content.length()); if (ret == EN_ERROR) { + GELOGE(INTERNAL_ERROR, "mmWrite failed."); GELOGE(INTERNAL_ERROR, "write file fail, errno is %d", errno); + REPORT_CALL_ERROR("E19999", "mmWrite failed. write file fail, errno is %d", errno); ret = mmClose(fd); if (ret == EN_ERROR) { + GELOGE(INTERNAL_ERROR, "mmClose failed."); GELOGE(INTERNAL_ERROR, "close file fail, error is %d", errno); + REPORT_CALL_ERROR("E19999", "mmClose failed. close file fail, error is %d", errno); } return INTERNAL_ERROR; } ret = mmClose(fd); if (ret == EN_ERROR) { + GELOGE(INTERNAL_ERROR, "mmClose failed."); GELOGE(INTERNAL_ERROR, "close file fail, error is %d", errno); + REPORT_CALL_ERROR("E19999", "mmClose failed. close file fail, error is %d", errno); return INTERNAL_ERROR; } @@ -242,7 +256,9 @@ Status CsaInteract::MakePath(const std::string &file_name) { std::string pre_path = file_path.substr(0, found + 1); if (mmAccess(pre_path.c_str()) != EN_OK) { if (mmMkdir(pre_path.c_str(), M_IRWXU) != EN_OK) { + GELOGE(INTERNAL_ERROR, "mmMkdir failed."); GELOGE(INTERNAL_ERROR, "csainteract mkdir fail, errno is %d", errno); + REPORT_CALL_ERROR("E19999", "mmMkdir failed. mkdir fail, errno is %d", errno); return INTERNAL_ERROR; } } From 6ae54b2794bbbb08c8fa9d30b463841a5787d2ed Mon Sep 17 00:00:00 2001 From: liudingyan Date: Fri, 12 Mar 2021 17:42:31 +0800 Subject: [PATCH 2/2] modify geloge and report errmsg --- ge/host_cpu_engine/engine/host_cpu_engine.cc | 1 + .../host_cpu_ops_kernel_builder.cc | 7 +++++ ge/plugin/engine/engine_manage.cc | 27 ++++++++++++------- 3 files changed, 26 insertions(+), 9 deletions(-) diff --git a/ge/host_cpu_engine/engine/host_cpu_engine.cc b/ge/host_cpu_engine/engine/host_cpu_engine.cc index cdbad1ed..f5a5032a 100644 --- a/ge/host_cpu_engine/engine/host_cpu_engine.cc +++ b/ge/host_cpu_engine/engine/host_cpu_engine.cc @@ -35,6 +35,7 @@ Status HostCpuEngine::Initialize(const std::map &options) { ops_kernel_store_ = MakeShared(); if (ops_kernel_store_ == nullptr) { GELOGE(FAILED, "Make HostCpuOpsKernelInfoStore failed."); + RRPORT_INNER_ERROR("E19999", "Initialize FAILED. Make HostCpuOpsKernelInfoStore failed."); return FAILED; } } diff --git a/ge/host_cpu_engine/ops_kernel_store/host_cpu_ops_kernel_builder.cc b/ge/host_cpu_engine/ops_kernel_store/host_cpu_ops_kernel_builder.cc index adb252bc..70a4a802 100644 --- a/ge/host_cpu_engine/ops_kernel_store/host_cpu_ops_kernel_builder.cc +++ b/ge/host_cpu_engine/ops_kernel_store/host_cpu_ops_kernel_builder.cc @@ -73,9 +73,13 @@ Status HostCpuOpsKernelBuilder::CalcOpRunningParam(Node &ge_node) { GeShape output_shape = output_tensor.GetShape(); if ((TensorUtils::CalcTensorMemSize(output_shape, format, data_type, output_mem_size) != GRAPH_SUCCESS) || (output_mem_size < 0)) { + GELOGE(FAILED, "CalcTensorMemSize failed."); GELOGE(FAILED, "Calc op[%s:%s] out[%zu] mem size failed, mem_size=%ld, format=%s, data_type=%s.", name.c_str(), type.c_str(), i, output_mem_size, TypeUtils::FormatToSerialString(format).c_str(), TypeUtils::DataTypeToSerialString(data_type).c_str()); + REPORT_CALL_ERROR("E19999", "CalcTensorMemSize failed. op[%s:%s] out[%zu] mem size failed, mem_size=%ld, format=%s, data_type=%s.", + name.c_str(), type.c_str(), i, output_mem_size, TypeUtils::FormatToSerialString(format).c_str(), + TypeUtils::DataTypeToSerialString(data_type).c_str()); return FAILED; } GELOGI("Calc op[%s:%s] out[%zu] mem size is %ld, format=%s, data_type=%s.", @@ -84,8 +88,11 @@ Status HostCpuOpsKernelBuilder::CalcOpRunningParam(Node &ge_node) { TensorUtils::SetSize(output_tensor, output_mem_size); if (op_desc->UpdateOutputDesc(static_cast(i), output_tensor) != GRAPH_SUCCESS) { + GELOGE(FAILED, "UpdateOutputDesc failed."); GELOGE(FAILED, "Update op[%s:%s] out[%zu] desc failed, format=%s, data_type=%s.", name.c_str(), type.c_str(), i, TypeUtils::FormatToSerialString(format).c_str(), TypeUtils::DataTypeToSerialString(data_type).c_str()); + REPORT_CALL_ERROR("E19999", "UpdateOutputDesc failed. op[%s:%s] out[%zu] desc failed, format=%s, data_type=%s.", name.c_str(), type.c_str(), i, + TypeUtils::FormatToSerialString(format).c_str(), TypeUtils::DataTypeToSerialString(data_type).c_str()); return FAILED; } } diff --git a/ge/plugin/engine/engine_manage.cc b/ge/plugin/engine/engine_manage.cc index a14c92ea..370021a5 100644 --- a/ge/plugin/engine/engine_manage.cc +++ b/ge/plugin/engine/engine_manage.cc @@ -29,7 +29,8 @@ std::unique_ptr> EngineManager::engine_map_; Status EngineManager::RegisterEngine(const std::string &engine_name, DNNEnginePtr engine_ptr) { if (engine_ptr == nullptr) { - GELOGE(FAILED, "enginePtr is nullptr"); + GELOGE(FAILED, "RegisterEngine failed. as input param engine_ptr is nullptr"); + REPORT_INNER_ERROR("E19999", "RegisterEngine failed. as input param engine_ptr is nullptr"); return FAILED; } @@ -64,7 +65,8 @@ void RegisterAiCoreEngine() { DNNEngineAttribute attr_aicore = {ai_core, mem_type_aicore, COST_0, DEVICE, FORMAT_RESERVED, FORMAT_RESERVED}; DNNEnginePtr aicore_engine_ptr = MakeShared(attr_aicore); if (aicore_engine_ptr == nullptr) { - GELOGE(ge::FAILED, "make aiCoreEnginePtr failed"); + GELOGE(ge::FAILED, "RegisterAiCoreEngine failed. as make aiCoreEnginePtr failed"); + REPORT_INNER_ERROR("E19999", "RegisterAiCoreEngine failed. as make aiCoreEnginePtr failed"); return; } if (EngineManager::RegisterEngine(ai_core, aicore_engine_ptr) != SUCCESS) { @@ -80,7 +82,8 @@ void RegisterVectorEngine() { DEVICE, FORMAT_RESERVED, FORMAT_RESERVED}; DNNEnginePtr vectorcore_engine_ptr = MakeShared(attr_vector_core); if (vectorcore_engine_ptr == nullptr) { - GELOGE(ge::FAILED, "make vectorCoreEnginePtr failed"); + GELOGE(ge::FAILED, "RegisterVectorEngine failed. as make vectorCoreEnginePtr failed"); + REPORT_INNER_ERROR("E19999", "RegisterVectorEngine failed. as make vectorCoreEnginePtr failed"); return; } if (EngineManager::RegisterEngine(vector_core, vectorcore_engine_ptr) != SUCCESS) { @@ -95,7 +98,8 @@ void RegisterAiCpuEngine() { DNNEngineAttribute attr_aicpu = {vm_aicpu, mem_type_aicpu, COST_3, DEVICE, FORMAT_RESERVED, FORMAT_RESERVED}; DNNEnginePtr vm_engine_ptr = MakeShared(attr_aicpu); if (vm_engine_ptr == nullptr) { - GELOGE(ge::FAILED, "make vm_engine_ptr failed"); + GELOGE(ge::FAILED, "RegisterAiCpuEngine failed. as make vm_engine_ptr failed"); + REPORT_INNER_ERROR("E19999", "RegisterAiCpuEngine failed. as make vm_engine_ptr failed"); return; } if (EngineManager::RegisterEngine(vm_aicpu, vm_engine_ptr) != SUCCESS) { @@ -110,7 +114,8 @@ void RegisterAiCpuTFEngine() { DNNEngineAttribute attr_aicpu_tf = {vm_aicpu_tf, mem_type_aicpu_tf, COST_2, DEVICE, FORMAT_RESERVED, FORMAT_RESERVED}; DNNEnginePtr vm_engine_ptr = MakeShared(attr_aicpu_tf); if (vm_engine_ptr == nullptr) { - GELOGE(ge::FAILED, "make vm_engine_ptr failed"); + GELOGE(ge::FAILED, "RegisterAiCpuTFEngine failed. as make vm_engine_ptr failed"); + REPORT_INNER_ERROR("E19999", "RegisterAiCpuTFEngine failed. as make vm_engine_ptr failed"); return; } if (EngineManager::RegisterEngine(vm_aicpu_tf, vm_engine_ptr) != SUCCESS) { @@ -126,7 +131,8 @@ void RegisterGeLocalEngine() { DNNEngineAttribute attr_ge_local = {vm_ge_local, mem_type_ge_local, COST_9, DEVICE, FORMAT_RESERVED, FORMAT_RESERVED}; DNNEnginePtr ge_local_engine = MakeShared(attr_ge_local); if (ge_local_engine == nullptr) { - GELOGE(ge::FAILED, "make ge_local_engine failed"); + GELOGE(ge::FAILED, "RegisterGeLocalEngine failed. as make ge_local_engine failed"); + REPORT_INNER_ERROR("E19999", "RegisterGeLocalEngine failed. as make ge_local_engine failed"); return; } if (EngineManager::RegisterEngine(vm_ge_local, ge_local_engine) != SUCCESS) { @@ -142,7 +148,8 @@ void RegisterHostCpuEngine() { DNNEngineAttribute attr_host_cpu = {vm_host_cpu, mem_type_host_cpu, COST_10, HOST, FORMAT_RESERVED, FORMAT_RESERVED}; DNNEnginePtr host_cpu_engine = MakeShared(attr_host_cpu); if (host_cpu_engine == nullptr) { - GELOGE(ge::FAILED, "make host_cpu_engine failed"); + GELOGE(ge::FAILED, "RegisterHostCpuEngine failed. as make host_cpu_engine failed"); + REPORT_INNER_ERROR("E19999", "RegisterHostCpuEngine failed. as make host_cpu_engine failed"); return; } if (EngineManager::RegisterEngine(vm_host_cpu, host_cpu_engine) != SUCCESS) { @@ -157,7 +164,8 @@ void RegisterRtsEngine() { DNNEngineAttribute attr_rts = {vm_rts, mem_type_rts, COST_1, DEVICE, FORMAT_RESERVED, FORMAT_RESERVED}; DNNEnginePtr rts_engine = MakeShared(attr_rts); if (rts_engine == nullptr) { - GELOGE(ge::FAILED, "make rts_engine failed"); + GELOGE(ge::FAILED, "RegisterRtsEngine failed. as make rts_engine failed"); + REPORT_INNER_ERROR("E19999", "RegisterRtsEngine failed. as make rts_engine failed"); return; } if (EngineManager::RegisterEngine(vm_rts, rts_engine) != SUCCESS) { @@ -172,7 +180,8 @@ void RegisterHcclEngine() { DNNEngineAttribute attr_hccl = {dnn_hccl, mem_type_hccl, COST_1, DEVICE, FORMAT_RESERVED, FORMAT_RESERVED}; DNNEnginePtr hccl_engine = MakeShared(attr_hccl); if (hccl_engine == nullptr) { - GELOGE(ge::FAILED, "make hccl_engine failed"); + GELOGE(ge::FAILED, "RegisterHcclEngine failed. as make hccl_engine failed"); + REPORT_INNER_ERROR("E19999", "RegisterHcclEngine failed. as make hccl_engine failed"); return; } if (EngineManager::RegisterEngine(dnn_hccl, hccl_engine) != SUCCESS) {