diff --git a/ge/host_cpu_engine/engine/host_cpu_engine.cc b/ge/host_cpu_engine/engine/host_cpu_engine.cc index 7d4b9df7..62dd9ec2 100644 --- a/ge/host_cpu_engine/engine/host_cpu_engine.cc +++ b/ge/host_cpu_engine/engine/host_cpu_engine.cc @@ -35,6 +35,7 @@ Status HostCpuEngine::Initialize(const std::map &options) { ops_kernel_store_ = MakeShared(); if (ops_kernel_store_ == nullptr) { GELOGE(FAILED, "[Init][HostCpuEngine] fail for new HostCpuOpsKernelInfoStore."); + REPORT_INNER_ERROR("E19999", "HostCpuEngine Initialize failed. fail for new HostCpuOpsKernelInfoStore."); return FAILED; } } diff --git a/ge/init/gelib.cc b/ge/init/gelib.cc index 03870522..add130c4 100644 --- a/ge/init/gelib.cc +++ b/ge/init/gelib.cc @@ -1,564 +1,212 @@ -/** - * Copyright 2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "init/gelib.h" - -#include -#include -#include -#include -#include -#include -#include - -#include "common/ge/ge_util.h" -#include "common/ge/plugin_manager.h" -#include "common/profiling/profiling_manager.h" -#include "common/properties_manager.h" -#include "framework/common/debug/ge_log.h" -#include "framework/common/debug/log.h" -#include "framework/common/util.h" -#include "framework/omg/ge_init.h" -#include "analyzer/analyzer.h" -#include "ge/ge_api_types.h" -#include "ge_local_engine/engine/host_cpu_engine.h" -#include "graph/common/ge_call_wrapper.h" -#include "graph/ge_context.h" -#include "graph/ge_global_options.h" -#include "graph/load/model_manager/model_manager.h" -#include "graph/manager/graph_mem_allocator.h" -#include "graph/manager/host_mem_manager.h" -#include "graph/manager/graph_var_manager.h" -#include "omm/csa_interact.h" -#include "runtime/kernel.h" -#include "opskernel_manager/ops_kernel_builder_manager.h" -#include "external/runtime/rt_error_codes.h" - -using Json = nlohmann::json; - -namespace ge { -namespace { -const int kDecimal = 10; -const int kSocVersionLen = 50; -const int kDefaultDeviceIdForTrain = 0; -const int kDefaultDeviceIdForInfer = -1; -const char *const kGlobalOptionFpCeilingModeDefault = "2"; -} // namespace -static std::shared_ptr instancePtr_ = nullptr; - -// Initial each module of GE, if one failed, release all -Status GELib::Initialize(const map &options) { - - - GELOGI("initial start"); - GEEVENT("[GEPERFTRACE] GE Init Start"); - // Multiple initializations are not allowed - instancePtr_ = MakeShared(); - if (instancePtr_ == nullptr) { - GELOGE(GE_CLI_INIT_FAILED, "[Init][GeLib] failed for new GELib."); - return GE_CLI_INIT_FAILED; - } - - ErrorManager::GetInstance().SetStage(ErrorMessage::kInitialize, ErrorMessage::kSystemInit); - map new_options; - Status ret = instancePtr_->SetRTSocVersion(options, new_options); - if (ret != SUCCESS) { - GELOGE(ret, "[Set][RTSocVersion] failed."); - return ret; - } - - ret = instancePtr_->SetAiCoreNum(new_options); - if (ret != SUCCESS) { - GELOGE(ret, "[Set][AiCoreNum] failed."); - return ret; - } - - instancePtr_->SetDefaultPrecisionMode(new_options); - - if (new_options.find("ge.fpCeilingMode") == new_options.end()) { - new_options["ge.fpCeilingMode"] = kGlobalOptionFpCeilingModeDefault; - } - - GetMutableGlobalOptions().insert(new_options.begin(), new_options.end()); - GetThreadLocalContext().SetGlobalOption(GetMutableGlobalOptions()); - GE_TIMESTAMP_START(Init); - ret = instancePtr_->InnerInitialize(new_options); - if (ret != SUCCESS) { - GELOGE(ret, "[Init][GELib] failed."); - instancePtr_ = nullptr; - return ret; - } - GE_TIMESTAMP_EVENT_END(Init, "GELib::Initialize"); - return SUCCESS; -} - -Status GELib::InnerInitialize(const map &options) { - // Multiple initializations are not allowed - if (init_flag_) { - GELOGW("multi initializations"); - return SUCCESS; - } - - ErrorManager::GetInstance().SetStage(ErrorMessage::kInitialize, ErrorMessage::kSystemInit); - GELOGI("GE System initial."); - GE_TIMESTAMP_START(SystemInitialize); - Status initSystemStatus = SystemInitialize(options); - GE_TIMESTAMP_END(SystemInitialize, "InnerInitialize::SystemInitialize"); - if (initSystemStatus != SUCCESS) { - GELOGE(initSystemStatus, "[Init][System]failed."); - RollbackInit(); - return initSystemStatus; - } - - ErrorManager::GetInstance().SetStage(ErrorMessage::kInitialize, ErrorMessage::kEngineInit); - GELOGI("engineManager initial."); - GE_TIMESTAMP_START(EngineInitialize); - Status initEmStatus = engineManager_.Initialize(options); - GE_TIMESTAMP_END(EngineInitialize, "InnerInitialize::EngineInitialize"); - if (initEmStatus != SUCCESS) { - GELOGE(initEmStatus, "[Init][Enginemanager] failed, options invalid. "); - RollbackInit(); - return initEmStatus; - } - - ErrorManager::GetInstance().SetStage(ErrorMessage::kInitialize, ErrorMessage::kOpsKernelInit); - GELOGI("opsManager initial."); - GE_TIMESTAMP_START(OpsManagerInitialize); - Status initOpsStatus = opsManager_.Initialize(options); - GE_TIMESTAMP_END(OpsManagerInitialize, "InnerInitialize::OpsManagerInitialize"); - if (initOpsStatus != SUCCESS) { - GELOGE(initOpsStatus, "[Init][OpsKernelManager] failed. "); - RollbackInit(); - return initOpsStatus; - } - - ErrorManager::GetInstance().SetStage(ErrorMessage::kInitialize, ErrorMessage::kOpsKernelBuilderInit); - GELOGI("opsBuilderManager initial."); - GE_TIMESTAMP_START(OpsKernelBuilderManagerInitialize); - Status initOpsBuilderStatus = OpsKernelBuilderManager::Instance().Initialize(options); - GE_TIMESTAMP_END(OpsKernelBuilderManagerInitialize, "InnerInitialize::OpsKernelBuilderManager"); - if (initOpsBuilderStatus != SUCCESS) { - GELOGE(initOpsBuilderStatus, "[Init][OpsKernelBuilderManager] failed."); - RollbackInit(); - return initOpsBuilderStatus; - } - - ErrorManager::GetInstance().SetStage(ErrorMessage::kInitialize, ErrorMessage::kOther); - GELOGI("sessionManager initial."); - GE_TIMESTAMP_START(SessionManagerInitialize); - Status initSmStatus = sessionManager_.Initialize(options); - GE_TIMESTAMP_END(SessionManagerInitialize, "InnerInitialize::SessionManagerInitialize"); - if (initSmStatus != SUCCESS) { - GELOGE(initSmStatus, "[Init][SessionManager] failed."); - RollbackInit(); - return initSmStatus; - } - - GELOGI("Start to initialize HostCpuEngine"); - GE_TIMESTAMP_START(HostCpuEngineInitialize); - Status initHostCpuEngineStatus = HostCpuEngine::GetInstance().Initialize(); - GE_TIMESTAMP_END(HostCpuEngineInitialize, "InnerInitialize::HostCpuEngineInitialize"); - if (initHostCpuEngineStatus != SUCCESS) { - GELOGE(initHostCpuEngineStatus, "[Init][HostCpuEngine] failed."); - RollbackInit(); - return initHostCpuEngineStatus; - } - - GELOGI("Start to init Analyzer!"); - Status init_analyzer_status = ge::Analyzer::GetInstance()->Initialize(); - if (init_analyzer_status != SUCCESS) { - GELOGE(init_analyzer_status, "[Init][HostCpuEngine] failed."); - RollbackInit(); - return init_analyzer_status; - } - - init_flag_ = true; - return SUCCESS; -} - -Status GELib::SystemInitialize(const map &options) { - Status status = FAILED; - auto iter = options.find(OPTION_GRAPH_RUN_MODE); - if (iter != options.end()) { - if (GraphRunMode(std::strtol(iter->second.c_str(), nullptr, kDecimal)) >= TRAIN) { - is_train_mode_ = true; - } - } - - InitOptions(options); - - // In train and infer, profiling is always needed. - InitProfiling(this->options_); - auto model_manager = ModelManager::GetInstance(); - GE_CHECK_NOTNULL(model_manager); - GE_IF_BOOL_EXEC(model_manager->EnableExceptionDump(options) != SUCCESS, - GELOGE(FAILED, "[Enable][ExceptionDump] failed."); - return FAILED); - // 1.`is_train_mode_` means case: train - // 2.`(!is_train_mode_) && (options_.device_id != kDefaultDeviceIdForInfer)` means case: online infer - // these two case with logical device id - if (is_train_mode_ || (options_.device_id != kDefaultDeviceIdForInfer)) { - status = InitSystemWithOptions(this->options_); - } else { - status = InitSystemWithoutOptions(); - } - return status; -} - -void GELib::InitProfiling(Options &options) { - GELOGI("Init Profiling. session Id: %ld, device id:%d ", options.session_id, options.device_id); - std::lock_guard lock(status_mutex_); - GetContext().Init(); - // Profiling init - if (ProfilingManager::Instance().Init(options) != SUCCESS) { - GELOGW("Profiling init failed."); - } -} - -void GELib::SetDefaultPrecisionMode(map &new_options) { - auto iter = new_options.find(PRECISION_MODE); - if (iter != new_options.end()) { - GELOGI("Find precision_mode in options, value is %s", iter->second.c_str()); - return; - } - iter = new_options.find(OPTION_GRAPH_RUN_MODE); - if (iter != new_options.end()) { - if (GraphRunMode(std::strtol(iter->second.c_str(), nullptr, kDecimal)) >= TRAIN) { - // only train mode need to be set allow_fp32_to_fp16. - GELOGI("This is train mode, precision_mode need to be set allow_fp32_to_fp16"); - new_options.insert(std::make_pair(PRECISION_MODE, "allow_fp32_to_fp16")); - return; - } - } - GELOGI("This is not train mode, precision_mode need to be set force_fp16"); - new_options.insert(std::make_pair(PRECISION_MODE, "force_fp16")); - return; -} - -Status GELib::SetRTSocVersion(const map &options, map &new_options) { - GELOGI("Start to set SOC_VERSION"); - new_options.insert(options.begin(), options.end()); - auto it = new_options.find(ge::SOC_VERSION); - if (it != new_options.end()) { - GE_CHK_RT_RET(rtSetSocVersion(it->second.c_str())); - GELOGI("Succeeded in setting SOC_VERSION[%s] to runtime.", it->second.c_str()); - } else { - GELOGI("SOC_VERSION is not exist in options"); - char version[kSocVersionLen] = {0}; - rtError_t rt_ret = rtGetSocVersion(version, kSocVersionLen); - GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(rt_ret, "[Get][SocVersion] failed."); return FAILED;) - GELOGI("Succeeded in getting SOC_VERSION[%s] from runtime.", version); - new_options.insert(std::make_pair(ge::SOC_VERSION, version)); - } - return SUCCESS; -} - -Status GELib::SetAiCoreNum(map &options) { - // Already set or get AICORE_NUM from options in offline mode - if (options.find(AICORE_NUM) != options.end()) { - return SUCCESS; - } - - uint32_t aicore_num = 0; - rtError_t ret = rtGetAiCoreCount(&aicore_num); - if (ret == ACL_ERROR_RT_FEATURE_NOT_SUPPORT) { // offline without ATC Input of AiCoreNum - return SUCCESS; - } else if (ret == RT_ERROR_NONE) { // online-mode - options.emplace(std::make_pair(AICORE_NUM, std::to_string(aicore_num))); - return SUCCESS; - } - GELOGE(FAILED, "[Get][AiCoreCount] failed."); - return FAILED; -} - -void GELib::InitOptions(const map &options) { - this->options_.session_id = 0; - auto iter = options.find(OPTION_EXEC_SESSION_ID); - if (iter != options.end()) { - this->options_.session_id = std::strtoll(iter->second.c_str(), nullptr, kDecimal); - } - this->options_.device_id = is_train_mode_ ? kDefaultDeviceIdForTrain : kDefaultDeviceIdForInfer; - iter = options.find(OPTION_EXEC_DEVICE_ID); - if (iter != options.end()) { - this->options_.device_id = static_cast(std::strtol(iter->second.c_str(), nullptr, kDecimal)); - } - iter = options.find(OPTION_EXEC_JOB_ID); - if (iter != options.end()) { - this->options_.job_id = iter->second.c_str(); - } - this->options_.isUseHcom = false; - iter = options.find(OPTION_EXEC_IS_USEHCOM); - if (iter != options.end()) { - std::istringstream(iter->second) >> this->options_.isUseHcom; - } - this->options_.isUseHvd = false; - iter = options.find(OPTION_EXEC_IS_USEHVD); - if (iter != options.end()) { - std::istringstream(iter->second) >> this->options_.isUseHvd; - } - this->options_.deployMode = false; - iter = options.find(OPTION_EXEC_DEPLOY_MODE); - if (iter != options.end()) { - std::istringstream(iter->second) >> this->options_.deployMode; - } - iter = options.find(OPTION_EXEC_POD_NAME); - if (iter != options.end()) { - this->options_.podName = iter->second.c_str(); - } - iter = options.find(OPTION_EXEC_PROFILING_MODE); - if (iter != options.end()) { - this->options_.profiling_mode = iter->second.c_str(); - } - iter = options.find(OPTION_EXEC_PROFILING_OPTIONS); - if (iter != options.end()) { - this->options_.profiling_options = iter->second.c_str(); - } - iter = options.find(OPTION_EXEC_RANK_ID); - if (iter != options.end()) { - this->options_.rankId = std::strtoll(iter->second.c_str(), nullptr, kDecimal); - } - iter = options.find(OPTION_EXEC_RANK_TABLE_FILE); - if (iter != options.end()) { - this->options_.rankTableFile = iter->second.c_str(); - } - this->options_.enable_atomic = true; - iter = options.find(OPTION_EXEC_ATOMIC_FLAG); - GE_IF_BOOL_EXEC(iter != options.end(), - this->options_.enable_atomic = std::strtol(iter->second.c_str(), nullptr, kDecimal)); - GELOGI("ge InnerInitialize, the enable_atomic_flag in options_ is %d", this->options_.enable_atomic); -} - -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status GELib::InitSystemWithOptions(Options &options) { - std::string mode = is_train_mode_ ? "Training" : "Online infer"; - GELOGI("%s init GELib. session Id:%ld, device id :%d ", mode.c_str(), options.session_id, options.device_id); - GEEVENT("System init with options begin, job id %s", options.job_id.c_str()); - std::lock_guard lock(status_mutex_); - GE_IF_BOOL_EXEC(is_system_inited && !is_shutdown, - GELOGW("System init with options is already inited and not shutdown."); - return SUCCESS); - - std::vector mem_type; - mem_type.push_back(RT_MEMORY_HBM); - mem_type.push_back(RT_MEMORY_P2P_DDR); - Status initMmStatus = MemManager::Instance().Initialize(mem_type); - if (initMmStatus != SUCCESS) { - GELOGE(initMmStatus, "[Init][MemManager] failed."); - return initMmStatus; - } - - GE_CHK_STATUS_RET(HostMemManager::Instance().Initialize()); - // Update CSA file - CsaInteract::GetInstance().Init(options.device_id, GetContext().TraceId()); - Status ret = CsaInteract::GetInstance().WriteJobState(JOBSTATE_RUNNING, JOBSUBSTATE_ENV_INIT); - GE_LOGE_IF(ret != SUCCESS, "[Write][JobState] failed, ret=%u , jobstate_running=%d , substate_env_init=%d .", - ret, JOBSTATE_RUNNING, JOBSUBSTATE_ENV_INIT); - - // set device id - GELOGI("set logical device id:%u", options.device_id); - GetContext().SetCtxDeviceId(static_cast(options.device_id)); - GE_CHK_RT_RET(rtSetDevice(options.device_id)); - - // In the scenario that the automatic add fusion is set, but there is no cleanaddr operator, - // maybe need to check it - is_system_inited = true; - is_shutdown = false; - - GELOGI("%s init GELib success.", mode.c_str()); - - return SUCCESS; -} - -Status GELib::SystemShutdownWithOptions(const Options &options) { - std::string mode = is_train_mode_ ? "Training" : "Online infer"; - GELOGI("%s finalize GELib begin.", mode.c_str()); - std::lock_guard lock(status_mutex_); - GE_IF_BOOL_EXEC(is_shutdown || !is_system_inited, - GELOGW("System Shutdown with options is already is_shutdown or system does not inited. " - "is_shutdown:%d is_omm_inited:%d", - is_shutdown, is_system_inited); - return SUCCESS); - - GE_CHK_RT(rtDeviceReset(options.device_id)); - - // Update CSA file - Status ret = CsaInteract::GetInstance().WriteJobState(JOBSTATE_SUCCEED); - GE_LOGE_IF(ret != SUCCESS, "[Write][JobState] failed, ret=%u jobstate_succeed=%d .", ret, JOBSTATE_SUCCEED); - - is_system_inited = false; - is_shutdown = true; - GELOGI("%s finalize GELib success.", mode.c_str()); - return SUCCESS; -} - -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status GELib::InitSystemWithoutOptions() { - GELOGI("Inference Init GELib begin."); - - std::vector mem_type; - mem_type.push_back(RT_MEMORY_HBM); - mem_type.push_back(RT_MEMORY_P2P_DDR); - Status initMmStatus = MemManager::Instance().Initialize(mem_type); - if (initMmStatus != SUCCESS) { - GELOGE(initMmStatus, "[Init][MemManager] failed."); - return initMmStatus; - } - GE_CHK_STATUS_RET(HostMemManager::Instance().Initialize()); - - static bool is_inited = false; - if (is_inited) { - GELOGW("System init without options is already inited, don't need to init again."); - return SUCCESS; - } - is_inited = true; - GELOGI("Inference init GELib success."); - - return SUCCESS; -} - -string GELib::GetPath() { return PluginManager::GetPath(); } - -// Finalize all modules -Status GELib::Finalize() { - ErrorManager::GetInstance().SetStage(ErrorMessage::kFinalize, ErrorMessage::kFinalize); - GELOGI("finalization start"); - // Finalization is not allowed before initialization - if (!init_flag_) { - GELOGW("not initialize"); - return SUCCESS; - } - if (is_train_mode_ || (options_.device_id != kDefaultDeviceIdForInfer)) { - GE_CHK_RT_RET(rtSetDevice(options_.device_id)); - } - Status final_state = SUCCESS; - Status mid_state; - GELOGI("engineManager finalization."); - mid_state = engineManager_.Finalize(); - if (mid_state != SUCCESS) { - GELOGW("engineManager finalize failed"); - final_state = mid_state; - } - GELOGI("sessionManager finalization."); - mid_state = sessionManager_.Finalize(); - if (mid_state != SUCCESS) { - GELOGW("sessionManager finalize failed"); - final_state = mid_state; - } - - GELOGI("opsBuilderManager finalization."); - mid_state = OpsKernelBuilderManager::Instance().Finalize(); - if (mid_state != SUCCESS) { - GELOGW("opsBuilderManager finalize failed"); - final_state = mid_state; - } - GELOGI("opsManager finalization."); - mid_state = opsManager_.Finalize(); - if (mid_state != SUCCESS) { - GELOGW("opsManager finalize failed"); - final_state = mid_state; - } - - GELOGI("VarManagerPool finalization."); - VarManagerPool::Instance().Destory(); - - GELOGI("MemManager finalization."); - MemManager::Instance().Finalize(); - - GELOGI("HostMemManager finalization."); - HostMemManager::Instance().Finalize(); - - GELOGI("HostCpuEngine finalization."); - HostCpuEngine::GetInstance().Finalize(); - - GELOGI("Analyzer finalization"); - Analyzer::GetInstance()->Finalize(); - - // Shut down profiling - ShutDownProfiling(); - - if (is_train_mode_ || (options_.device_id != kDefaultDeviceIdForInfer)) { - GELOGI("System ShutDown."); - mid_state = SystemShutdownWithOptions(this->options_); - if (mid_state != SUCCESS) { - GELOGW("System shutdown with options failed"); - final_state = mid_state; - } - } - - is_train_mode_ = false; - - GetMutableGlobalOptions().erase(ENABLE_SINGLE_STREAM); - - if (is_train_mode_ || (options_.device_id != kDefaultDeviceIdForInfer)) { - GE_CHK_RT_RET(rtDeviceReset(options_.device_id)); - } - - instancePtr_ = nullptr; - init_flag_ = false; - if (final_state != SUCCESS) { - GELOGE(FAILED, "[Check][State]finalization failed, because no init before. "); - return final_state; - } - GELOGI("finalization success."); - return SUCCESS; -} - -void GELib::ShutDownProfiling() { - std::lock_guard lock(status_mutex_); - - if (ProfilingManager::Instance().ProfilingOn()) { - ProfilingManager::Instance().StopProfiling(); - ProfilingManager::Instance().PluginUnInit(); - } -} - -// Get Singleton Instance -std::shared_ptr GELib::GetInstance() { return instancePtr_; } - -void GELib::RollbackInit() { - if (engineManager_.init_flag_) { - (void)engineManager_.Finalize(); - } - if (opsManager_.init_flag_) { - (void)opsManager_.Finalize(); - } - if (sessionManager_.init_flag_) { - (void)sessionManager_.Finalize(); - } - MemManager::Instance().Finalize(); - HostMemManager::Instance().Finalize(); - VarManagerPool::Instance().Destory(); -} - -Status GEInit::Initialize(const map &options) { - Status ret = SUCCESS; - std::shared_ptr instance_ptr = ge::GELib::GetInstance(); - if (instance_ptr == nullptr || !instance_ptr->InitFlag()) { - ret = GELib::Initialize(options); - } - return ret; -} - -Status GEInit::Finalize() { - std::shared_ptr instance_ptr = ge::GELib::GetInstance(); - if (instance_ptr != nullptr) { - return instance_ptr->Finalize(); - } - return SUCCESS; -} - -string GEInit::GetPath() { - return GELib::GetPath(); -} -} // namespace ge +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "plugin/engine/engine_manage.h" + +#include +#include +#include + +#include "common/ge/ge_util.h" +#include "framework/common/debug/ge_log.h" +#include "plugin/engine/dnnengines.h" + +namespace ge { +std::unique_ptr> EngineManager::engine_map_; + +Status EngineManager::RegisterEngine(const std::string &engine_name, DNNEnginePtr engine_ptr) { + if (engine_ptr == nullptr) { + GELOGE(FAILED, "[Register][Engine] failed, as input engine_ptr is nullptr"); + REPORT_INNER_ERROR("E19999", "RegisterEngine failed, as input engine_ptr is nullptr"); + return FAILED; + } + + if (engine_map_ == nullptr) { + engine_map_.reset(new (std::nothrow) std::map()); + } + + auto it = engine_map_->find(engine_name); + if (it != engine_map_->end()) { + GELOGW("engine %s already exist.", engine_name.c_str()); + return FAILED; + } + engine_map_->emplace(engine_name, engine_ptr); + return SUCCESS; +} + +DNNEnginePtr EngineManager::GetEngine(const std::string &engine_name) { + auto it = engine_map_->find(engine_name); + if (it == engine_map_->end()) { + GELOGW("engine %s not exist.", engine_name.c_str()); + return nullptr; + } + + auto engine = it->second; + return engine; +} + +void RegisterAiCoreEngine() { + const std::string ai_core = "AIcoreEngine"; + std::vector mem_type_aicore; + mem_type_aicore.emplace_back(GE_ENGINE_ATTR_MEM_TYPE_HBM); + DNNEngineAttribute attr_aicore = {ai_core, mem_type_aicore, COST_0, DEVICE, FORMAT_RESERVED, FORMAT_RESERVED}; + DNNEnginePtr aicore_engine_ptr = MakeShared(attr_aicore); + if (aicore_engine_ptr == nullptr) { + GELOGE(ge::FAILED, "[Register][AiCoreEngine] failed, as malloc shared_ptr failed."); + REPORT_INNER_ERROR("E19999", "RegisterAiCoreEngine failed, as malloc shared_ptr failed."); + return; + } + if (EngineManager::RegisterEngine(ai_core, aicore_engine_ptr) != SUCCESS) { + GELOGW("register ai_core failed"); + } +} + +void RegisterVectorEngine() { + const std::string vector_core = "VectorEngine"; + std::vector mem_type_aivcore; + mem_type_aivcore.emplace_back(GE_ENGINE_ATTR_MEM_TYPE_HBM); + DNNEngineAttribute attr_vector_core = {vector_core, mem_type_aivcore, COST_1, + DEVICE, FORMAT_RESERVED, FORMAT_RESERVED}; + DNNEnginePtr vectorcore_engine_ptr = MakeShared(attr_vector_core); + if (vectorcore_engine_ptr == nullptr) { + GELOGE(ge::FAILED, "[Register][VectorEngine] failed, as malloc shared_ptr failed."); + REPORT_INNER_ERROR("E19999", "RegisterVectorEngine failed, as malloc shared_ptr failed."); + return; + } + if (EngineManager::RegisterEngine(vector_core, vectorcore_engine_ptr) != SUCCESS) { + GELOGW("register vector_core failed"); + } +} + +void RegisterAiCpuEngine() { + const std::string vm_aicpu = "DNN_VM_AICPU_ASCEND"; + std::vector mem_type_aicpu; + mem_type_aicpu.emplace_back(GE_ENGINE_ATTR_MEM_TYPE_HBM); + DNNEngineAttribute attr_aicpu = {vm_aicpu, mem_type_aicpu, COST_3, DEVICE, FORMAT_RESERVED, FORMAT_RESERVED}; + DNNEnginePtr vm_engine_ptr = MakeShared(attr_aicpu); + if (vm_engine_ptr == nullptr) { + GELOGE(ge::FAILED, "[Register][AiCpuEngine] failed, as malloc shared_ptr failed."); + REPORT_INNER_ERROR("E19999", "RegisterAiCpuEngine failed, as malloc shared_ptr failed."); + return; + } + if (EngineManager::RegisterEngine(vm_aicpu, vm_engine_ptr) != SUCCESS) { + GELOGW("register vmAicpuEngine failed"); + } +} + +void RegisterAiCpuTFEngine() { + const std::string vm_aicpu_tf = "DNN_VM_AICPU"; + std::vector mem_type_aicpu_tf; + mem_type_aicpu_tf.emplace_back(GE_ENGINE_ATTR_MEM_TYPE_HBM); + DNNEngineAttribute attr_aicpu_tf = {vm_aicpu_tf, mem_type_aicpu_tf, COST_2, DEVICE, + FORMAT_RESERVED, FORMAT_RESERVED}; + DNNEnginePtr vm_engine_ptr = MakeShared(attr_aicpu_tf); + if (vm_engine_ptr == nullptr) { + GELOGE(ge::FAILED, "[Register][AiCpuTFEngine] failed, as malloc shared_ptr failed."); + REPORT_INNER_ERROR("E19999", "RegisterAiCpuTFEngine failed, as malloc shared_ptr failed."); + return; + } + if (EngineManager::RegisterEngine(vm_aicpu_tf, vm_engine_ptr) != SUCCESS) { + GELOGW("register vmAicpuTFEngine failed"); + } +} + +void RegisterGeLocalEngine() { + const std::string vm_ge_local = "DNN_VM_GE_LOCAL"; + std::vector mem_type_ge_local; + mem_type_ge_local.emplace_back(GE_ENGINE_ATTR_MEM_TYPE_HBM); + // GeLocal use minimum priority, set it as 9 + DNNEngineAttribute attr_ge_local = {vm_ge_local, mem_type_ge_local, COST_9, DEVICE, FORMAT_RESERVED, FORMAT_RESERVED}; + DNNEnginePtr ge_local_engine = MakeShared(attr_ge_local); + if (ge_local_engine == nullptr) { + GELOGE(ge::FAILED, "[Register][GeLocalEngine] failed, as malloc shared_ptr failed."); + REPORT_INNER_ERROR("E19999", "RegisterGeLocalEngine failed, as malloc shared_ptr failed."); + return; + } + if (EngineManager::RegisterEngine(vm_ge_local, ge_local_engine) != SUCCESS) { + GELOGW("register ge_local_engine failed"); + } +} + +void RegisterHostCpuEngine() { + const std::string vm_host_cpu = "DNN_VM_HOST_CPU"; + std::vector mem_type_host_cpu; + mem_type_host_cpu.emplace_back(GE_ENGINE_ATTR_MEM_TYPE_HBM); + // HostCpu use minimum priority, set it as 10 + DNNEngineAttribute attr_host_cpu = {vm_host_cpu, mem_type_host_cpu, COST_10, + HOST, FORMAT_RESERVED, FORMAT_RESERVED}; + DNNEnginePtr host_cpu_engine = MakeShared(attr_host_cpu); + if (host_cpu_engine == nullptr) { + GELOGE(ge::FAILED, "[Register][HostCpuEngine] failed, as malloc shared_ptr failed."); + REPORT_INNER_ERROR("E19999", "RegisterHostCpuEngine failed, as malloc shared_ptr failed."); + return; + } + if (EngineManager::RegisterEngine(vm_host_cpu, host_cpu_engine) != SUCCESS) { + GELOGW("register host_cpu_engine failed"); + } +} + +void RegisterRtsEngine() { + const std::string vm_rts = "DNN_VM_RTS"; + std::vector mem_type_rts; + mem_type_rts.emplace_back(GE_ENGINE_ATTR_MEM_TYPE_HBM); + DNNEngineAttribute attr_rts = {vm_rts, mem_type_rts, COST_1, DEVICE, FORMAT_RESERVED, FORMAT_RESERVED}; + DNNEnginePtr rts_engine = MakeShared(attr_rts); + if (rts_engine == nullptr) { + GELOGE(ge::FAILED, "[Register][RtsEngine] failed, as malloc shared_ptr failed."); + REPORT_INNER_ERROR("E19999", "RegisterRtsEngine failed, as malloc shared_ptr failed."); + return; + } + if (EngineManager::RegisterEngine(vm_rts, rts_engine) != SUCCESS) { + GELOGW("register rts_engine failed"); + } +} + +void RegisterHcclEngine() { + const std::string dnn_hccl = "DNN_HCCL"; + std::vector mem_type_hccl; + mem_type_hccl.emplace_back(GE_ENGINE_ATTR_MEM_TYPE_HBM); + DNNEngineAttribute attr_hccl = {dnn_hccl, mem_type_hccl, COST_1, DEVICE, FORMAT_RESERVED, FORMAT_RESERVED}; + DNNEnginePtr hccl_engine = MakeShared(attr_hccl); + if (hccl_engine == nullptr) { + GELOGE(ge::FAILED, "[Register][HcclEngine] failed, as malloc shared_ptr failed."); + REPORT_INNER_ERROR("E19999", "RegisterHcclEngine failed, as malloc shared_ptr failed."); + return; + } + if (EngineManager::RegisterEngine(dnn_hccl, hccl_engine) != SUCCESS) { + GELOGW("register hccl_engine failed"); + } +} + +void GetDNNEngineObjs(std::map &engines) { + RegisterAiCoreEngine(); + RegisterVectorEngine(); + RegisterAiCpuTFEngine(); + RegisterAiCpuEngine(); + RegisterGeLocalEngine(); + RegisterHostCpuEngine(); + RegisterRtsEngine(); + RegisterHcclEngine(); + + for (auto it = EngineManager::engine_map_->begin(); it != EngineManager::engine_map_->end(); ++it) { + GELOGI("get engine %s from engine plugin.", it->first.c_str()); + engines.emplace(std::pair(it->first, it->second)); + } + + GELOGI("after get engine, engine size: %zu", engines.size()); + return; +} +} // namespace ge diff --git a/ge/ir_build/atc_ir_common.cc b/ge/ir_build/atc_ir_common.cc index 54793e6b..237839a2 100755 --- a/ge/ir_build/atc_ir_common.cc +++ b/ge/ir_build/atc_ir_common.cc @@ -78,7 +78,7 @@ Status CheckInputFormat(const string &input_format) { if (!ge::TypeUtils::IsFormatValid(input_format.c_str())) { ErrorManager::GetInstance().ATCReportErrMessage( "E10001", {"parameter", "value", "reason"}, {"--input_format", input_format, "input format is invalid!"}); - GELOGE(ge::PARAM_INVALID, "[Check][InputFormat] [%s] is invalid!", input_format.c_str()); + GELOGE(ge::PARAM_INVALID, "[Check][InputFormat] --input_format[%s] is invalid!", input_format.c_str()); return ge::PARAM_INVALID; } return ge::SUCCESS; @@ -189,7 +189,7 @@ bool CheckDynamicDimsInputShapeValid(const map> &shape_m ErrorManager::GetInstance().ATCReportErrMessage( "E10001", {"parameter", "value", "reason"}, {"--input_format", input_format.c_str(), "input_format must be ND when set dynamic_dims"}); - GELOGE(ge::PARAM_INVALID, "[Check][Param]input_format must be ND when set dynamic_dims."); + GELOGE(ge::PARAM_INVALID, "[Check][Param]--input_format must be ND when set dynamic_dims."); return false; } @@ -211,7 +211,7 @@ bool CheckDynamicDimsInputShapeValid(const map> &shape_m "E10001", {"parameter", "value", "reason"}, {"--input_shape's dynamic dim num", "0", "at least one dim should be -1 when set dynamic_dims"}); GELOGE(ge::PARAM_INVALID, - "[Check][Param]input_shape invalid, at least one dim should be -1 when set dynamic_dims."); + "[Check][Param]--input_shape invalid, at least one dim should be -1 when set dynamic_dims."); return false; } @@ -229,7 +229,7 @@ bool CheckAndParseDynamicDims(int32_t dynamic_dim_num, std::string &dynamic_dims ErrorManager::GetInstance().ATCReportErrMessage( "E10001", {"parameter", "value", "reason"}, {"--dynamic_dims", dynamic_dims.c_str(), "dynamic_dims can not be empty"}); - GELOGE(ge::PARAM_INVALID, "[Check][Param]dynamic_dims can not be empty."); + GELOGE(ge::PARAM_INVALID, "[Check][Param]--dynamic_dims can not be empty."); return false; } // Different parameter sets are split by ';' @@ -259,7 +259,7 @@ bool CheckAndParseDynamicDims(int32_t dynamic_dim_num, std::string &dynamic_dims "E10001", {"parameter", "value", "reason"}, {"--dynamic_dims's parameter", dim.c_str(), "must be positive integer"}); GELOGE(ge::PARAM_INVALID, - "[Check][Param]dynamic_dims:%s parameter must be positive integer.", dynamic_dims.c_str()); + "[Check][Param]--dynamic_dims:%s parameter must be positive integer.", dynamic_dims.c_str()); return false; } } @@ -294,8 +294,8 @@ bool ParseSingleShapeRange(std::string &shape_range, vector(!dynamic_batch_size.empty()) + static_cast(!dynamic_image_size.empty()) + static_cast(!dynamic_dims.empty()); if (param_size > 1) { - ErrorManager::GetInstance().ATCReportErrMessage("E10009", {"parameter0", "parameter1", "parameter2"}, + ErrorManager::GetInstance().ATCReportErrMessage("E10009", {"parameter0", "parameter1", "parameter2"}, {"dynamic_batch_size", "dynamic_image_size", "dynamic_dims"}); - GELOGE(ge::PARAM_INVALID, - "[Parse][Param]dynamic_batch_size, dynamic_image_size and dynamic_dims can only be set one"); - return ge::PARAM_INVALID; + GELOGE(ge::PARAM_INVALID, + "[Parse][Param]dynamic_batch_size, dynamic_image_size and dynamic_dims can only be set one"); + return ge::PARAM_INVALID; } if (param_size == 0) { @@ -770,7 +770,7 @@ Status UpdateDynamicInputShapeRange(const ge::ComputeGraphPtr &compute_graph, co GE_CHECK_NOTNULL(op); if (op->GetType() == DATA) { if (UpdateDataOpShapeRange(op, shape_range_map) != SUCCESS) { - GELOGE(FAILED, "[Update][Data] op[%s] invalid.", op->GetName().c_str()); + GELOGE(FAILED, "[Update][InputShapeRange] op[%s] invalid.", op->GetName().c_str()); return FAILED; } } diff --git a/ge/opskernel_manager/ops_kernel_builder_manager.cc b/ge/opskernel_manager/ops_kernel_builder_manager.cc index 22a62cad..a556f740 100644 --- a/ge/opskernel_manager/ops_kernel_builder_manager.cc +++ b/ge/opskernel_manager/ops_kernel_builder_manager.cc @@ -129,7 +129,7 @@ Status OpsKernelBuilderManager::CalcOpRunningParam(Node &node) const { const std::string &lib_name = op_desc->GetOpKernelLibName(); auto it = ops_kernel_builders_.find(lib_name); if (it == ops_kernel_builders_.end()) { - GELOGE(INTERNAL_ERROR,"[Check][Lib_name] libName = %s, node = %s not exist.", + GELOGE(INTERNAL_ERROR,"[Check][Lib_Name] libName = %s, node = %s not exist.", lib_name.c_str(), op_desc->GetName().c_str()); REPORT_INNER_ERROR("E19999", "CalcOpRunningParam failed, libName = %s, node = %s not exist.", lib_name.c_str(), op_desc->GetName().c_str()); diff --git a/ge/opskernel_manager/ops_kernel_manager.cc b/ge/opskernel_manager/ops_kernel_manager.cc index 3de80805..57379552 100644 --- a/ge/opskernel_manager/ops_kernel_manager.cc +++ b/ge/opskernel_manager/ops_kernel_manager.cc @@ -183,16 +183,20 @@ Status OpsKernelManager::ParsePluginOptions(const map &options, return GE_GRAPH_OPTIONS_INVALID; } } catch (std::invalid_argument &) { - GELOGE(GE_GRAPH_OPTIONS_INVALID, "[Check][Data] failed, its value %s is invalid_argument, it must be 0 or 1.", + GELOGE(GE_GRAPH_OPTIONS_INVALID, + "[Check][Data] failed, option_key:ge.feFlag, its value %s is invalid_argument, it must be 0 or 1.", iter->second.c_str()); - REPORT_INNER_ERROR("E19999", "ParsePluginOptions failed, its value %s is invalid_argument, it must be 0 or 1.", + REPORT_INNER_ERROR("E19999", + "ParsePluginOptions failed, option_key:ge.feFlag, its value %s is invalid_argument, it must be 0 or 1.", iter->second.c_str()); return GE_GRAPH_OPTIONS_INVALID; } catch (std::out_of_range &) { - GELOGE(GE_GRAPH_OPTIONS_INVALID, "[Check][Data]failed, its value %s is out of range, it must be 0 or 1.", - iter->second.c_str()); - REPORT_INNER_ERROR("E19999", "ParsePluginOptions failed, its value %s is out of range, it must be 0 or 1.", + GELOGE(GE_GRAPH_OPTIONS_INVALID, + "[Check][Data]failed, option_key:ge.feFlag, its value %s is out of range, it must be 0 or 1.", iter->second.c_str()); + REPORT_INNER_ERROR("E19999", + "ParsePluginOptions failed, option_key:ge.feFlag, its value %s is out of range, it must be 0 or 1.", + iter->second.c_str());option_key:ge.feFlag, return GE_GRAPH_OPTIONS_INVALID; } catch (...) { GELOGE(GE_GRAPH_OPTIONS_INVALID, "[Check][Data]option_key:%s, its value %s is invalid, it must be 0 or 1.", diff --git a/ge/plugin/engine/CMakeLists.txt b/ge/plugin/engine/CMakeLists.txt index a984384e..3aace4ac 100644 --- a/ge/plugin/engine/CMakeLists.txt +++ b/ge/plugin/engine/CMakeLists.txt @@ -41,7 +41,7 @@ target_link_options(engine PRIVATE target_link_libraries(engine PRIVATE $ -Wl,--no-as-needed - c_sec + c_sec slog -Wl,--as-needed -lrt