|
|
|
@@ -1,564 +1,212 @@ |
|
|
|
/** |
|
|
|
* Copyright 2020 Huawei Technologies Co., Ltd |
|
|
|
* |
|
|
|
* Licensed under the Apache License, Version 2.0 (the "License"); |
|
|
|
* you may not use this file except in compliance with the License. |
|
|
|
* You may obtain a copy of the License at |
|
|
|
* |
|
|
|
* http://www.apache.org/licenses/LICENSE-2.0 |
|
|
|
* |
|
|
|
* Unless required by applicable law or agreed to in writing, software |
|
|
|
* distributed under the License is distributed on an "AS IS" BASIS, |
|
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
|
|
|
* See the License for the specific language governing permissions and |
|
|
|
* limitations under the License. |
|
|
|
*/ |
|
|
|
|
|
|
|
#include "init/gelib.h" |
|
|
|
|
|
|
|
#include <dlfcn.h> |
|
|
|
#include <cstdlib> |
|
|
|
#include <mutex> |
|
|
|
#include <set> |
|
|
|
#include <sstream> |
|
|
|
#include <string> |
|
|
|
#include <utility> |
|
|
|
|
|
|
|
#include "common/ge/ge_util.h" |
|
|
|
#include "common/ge/plugin_manager.h" |
|
|
|
#include "common/profiling/profiling_manager.h" |
|
|
|
#include "common/properties_manager.h" |
|
|
|
#include "framework/common/debug/ge_log.h" |
|
|
|
#include "framework/common/debug/log.h" |
|
|
|
#include "framework/common/util.h" |
|
|
|
#include "framework/omg/ge_init.h" |
|
|
|
#include "analyzer/analyzer.h" |
|
|
|
#include "ge/ge_api_types.h" |
|
|
|
#include "ge_local_engine/engine/host_cpu_engine.h" |
|
|
|
#include "graph/common/ge_call_wrapper.h" |
|
|
|
#include "graph/ge_context.h" |
|
|
|
#include "graph/ge_global_options.h" |
|
|
|
#include "graph/load/model_manager/model_manager.h" |
|
|
|
#include "graph/manager/graph_mem_allocator.h" |
|
|
|
#include "graph/manager/host_mem_manager.h" |
|
|
|
#include "graph/manager/graph_var_manager.h" |
|
|
|
#include "omm/csa_interact.h" |
|
|
|
#include "runtime/kernel.h" |
|
|
|
#include "opskernel_manager/ops_kernel_builder_manager.h" |
|
|
|
#include "external/runtime/rt_error_codes.h" |
|
|
|
|
|
|
|
using Json = nlohmann::json; |
|
|
|
|
|
|
|
namespace ge { |
|
|
|
namespace { |
|
|
|
const int kDecimal = 10; |
|
|
|
const int kSocVersionLen = 50; |
|
|
|
const int kDefaultDeviceIdForTrain = 0; |
|
|
|
const int kDefaultDeviceIdForInfer = -1; |
|
|
|
const char *const kGlobalOptionFpCeilingModeDefault = "2"; |
|
|
|
} // namespace |
|
|
|
static std::shared_ptr<GELib> instancePtr_ = nullptr; |
|
|
|
|
|
|
|
// Initial each module of GE, if one failed, release all |
|
|
|
Status GELib::Initialize(const map<string, string> &options) { |
|
|
|
|
|
|
|
|
|
|
|
GELOGI("initial start"); |
|
|
|
GEEVENT("[GEPERFTRACE] GE Init Start"); |
|
|
|
// Multiple initializations are not allowed |
|
|
|
instancePtr_ = MakeShared<GELib>(); |
|
|
|
if (instancePtr_ == nullptr) { |
|
|
|
GELOGE(GE_CLI_INIT_FAILED, "[Init][GeLib] failed for new GELib."); |
|
|
|
return GE_CLI_INIT_FAILED; |
|
|
|
} |
|
|
|
|
|
|
|
ErrorManager::GetInstance().SetStage(ErrorMessage::kInitialize, ErrorMessage::kSystemInit); |
|
|
|
map<string, string> new_options; |
|
|
|
Status ret = instancePtr_->SetRTSocVersion(options, new_options); |
|
|
|
if (ret != SUCCESS) { |
|
|
|
GELOGE(ret, "[Set][RTSocVersion] failed."); |
|
|
|
return ret; |
|
|
|
} |
|
|
|
|
|
|
|
ret = instancePtr_->SetAiCoreNum(new_options); |
|
|
|
if (ret != SUCCESS) { |
|
|
|
GELOGE(ret, "[Set][AiCoreNum] failed."); |
|
|
|
return ret; |
|
|
|
} |
|
|
|
|
|
|
|
instancePtr_->SetDefaultPrecisionMode(new_options); |
|
|
|
|
|
|
|
if (new_options.find("ge.fpCeilingMode") == new_options.end()) { |
|
|
|
new_options["ge.fpCeilingMode"] = kGlobalOptionFpCeilingModeDefault; |
|
|
|
} |
|
|
|
|
|
|
|
GetMutableGlobalOptions().insert(new_options.begin(), new_options.end()); |
|
|
|
GetThreadLocalContext().SetGlobalOption(GetMutableGlobalOptions()); |
|
|
|
GE_TIMESTAMP_START(Init); |
|
|
|
ret = instancePtr_->InnerInitialize(new_options); |
|
|
|
if (ret != SUCCESS) { |
|
|
|
GELOGE(ret, "[Init][GELib] failed."); |
|
|
|
instancePtr_ = nullptr; |
|
|
|
return ret; |
|
|
|
} |
|
|
|
GE_TIMESTAMP_EVENT_END(Init, "GELib::Initialize"); |
|
|
|
return SUCCESS; |
|
|
|
} |
|
|
|
|
|
|
|
Status GELib::InnerInitialize(const map<string, string> &options) { |
|
|
|
// Multiple initializations are not allowed |
|
|
|
if (init_flag_) { |
|
|
|
GELOGW("multi initializations"); |
|
|
|
return SUCCESS; |
|
|
|
} |
|
|
|
|
|
|
|
ErrorManager::GetInstance().SetStage(ErrorMessage::kInitialize, ErrorMessage::kSystemInit); |
|
|
|
GELOGI("GE System initial."); |
|
|
|
GE_TIMESTAMP_START(SystemInitialize); |
|
|
|
Status initSystemStatus = SystemInitialize(options); |
|
|
|
GE_TIMESTAMP_END(SystemInitialize, "InnerInitialize::SystemInitialize"); |
|
|
|
if (initSystemStatus != SUCCESS) { |
|
|
|
GELOGE(initSystemStatus, "[Init][System]failed."); |
|
|
|
RollbackInit(); |
|
|
|
return initSystemStatus; |
|
|
|
} |
|
|
|
|
|
|
|
ErrorManager::GetInstance().SetStage(ErrorMessage::kInitialize, ErrorMessage::kEngineInit); |
|
|
|
GELOGI("engineManager initial."); |
|
|
|
GE_TIMESTAMP_START(EngineInitialize); |
|
|
|
Status initEmStatus = engineManager_.Initialize(options); |
|
|
|
GE_TIMESTAMP_END(EngineInitialize, "InnerInitialize::EngineInitialize"); |
|
|
|
if (initEmStatus != SUCCESS) { |
|
|
|
GELOGE(initEmStatus, "[Init][Enginemanager] failed, options invalid. "); |
|
|
|
RollbackInit(); |
|
|
|
return initEmStatus; |
|
|
|
} |
|
|
|
|
|
|
|
ErrorManager::GetInstance().SetStage(ErrorMessage::kInitialize, ErrorMessage::kOpsKernelInit); |
|
|
|
GELOGI("opsManager initial."); |
|
|
|
GE_TIMESTAMP_START(OpsManagerInitialize); |
|
|
|
Status initOpsStatus = opsManager_.Initialize(options); |
|
|
|
GE_TIMESTAMP_END(OpsManagerInitialize, "InnerInitialize::OpsManagerInitialize"); |
|
|
|
if (initOpsStatus != SUCCESS) { |
|
|
|
GELOGE(initOpsStatus, "[Init][OpsKernelManager] failed. "); |
|
|
|
RollbackInit(); |
|
|
|
return initOpsStatus; |
|
|
|
} |
|
|
|
|
|
|
|
ErrorManager::GetInstance().SetStage(ErrorMessage::kInitialize, ErrorMessage::kOpsKernelBuilderInit); |
|
|
|
GELOGI("opsBuilderManager initial."); |
|
|
|
GE_TIMESTAMP_START(OpsKernelBuilderManagerInitialize); |
|
|
|
Status initOpsBuilderStatus = OpsKernelBuilderManager::Instance().Initialize(options); |
|
|
|
GE_TIMESTAMP_END(OpsKernelBuilderManagerInitialize, "InnerInitialize::OpsKernelBuilderManager"); |
|
|
|
if (initOpsBuilderStatus != SUCCESS) { |
|
|
|
GELOGE(initOpsBuilderStatus, "[Init][OpsKernelBuilderManager] failed."); |
|
|
|
RollbackInit(); |
|
|
|
return initOpsBuilderStatus; |
|
|
|
} |
|
|
|
|
|
|
|
ErrorManager::GetInstance().SetStage(ErrorMessage::kInitialize, ErrorMessage::kOther); |
|
|
|
GELOGI("sessionManager initial."); |
|
|
|
GE_TIMESTAMP_START(SessionManagerInitialize); |
|
|
|
Status initSmStatus = sessionManager_.Initialize(options); |
|
|
|
GE_TIMESTAMP_END(SessionManagerInitialize, "InnerInitialize::SessionManagerInitialize"); |
|
|
|
if (initSmStatus != SUCCESS) { |
|
|
|
GELOGE(initSmStatus, "[Init][SessionManager] failed."); |
|
|
|
RollbackInit(); |
|
|
|
return initSmStatus; |
|
|
|
} |
|
|
|
|
|
|
|
GELOGI("Start to initialize HostCpuEngine"); |
|
|
|
GE_TIMESTAMP_START(HostCpuEngineInitialize); |
|
|
|
Status initHostCpuEngineStatus = HostCpuEngine::GetInstance().Initialize(); |
|
|
|
GE_TIMESTAMP_END(HostCpuEngineInitialize, "InnerInitialize::HostCpuEngineInitialize"); |
|
|
|
if (initHostCpuEngineStatus != SUCCESS) { |
|
|
|
GELOGE(initHostCpuEngineStatus, "[Init][HostCpuEngine] failed."); |
|
|
|
RollbackInit(); |
|
|
|
return initHostCpuEngineStatus; |
|
|
|
} |
|
|
|
|
|
|
|
GELOGI("Start to init Analyzer!"); |
|
|
|
Status init_analyzer_status = ge::Analyzer::GetInstance()->Initialize(); |
|
|
|
if (init_analyzer_status != SUCCESS) { |
|
|
|
GELOGE(init_analyzer_status, "[Init][HostCpuEngine] failed."); |
|
|
|
RollbackInit(); |
|
|
|
return init_analyzer_status; |
|
|
|
} |
|
|
|
|
|
|
|
init_flag_ = true; |
|
|
|
return SUCCESS; |
|
|
|
} |
|
|
|
|
|
|
|
Status GELib::SystemInitialize(const map<string, string> &options) { |
|
|
|
Status status = FAILED; |
|
|
|
auto iter = options.find(OPTION_GRAPH_RUN_MODE); |
|
|
|
if (iter != options.end()) { |
|
|
|
if (GraphRunMode(std::strtol(iter->second.c_str(), nullptr, kDecimal)) >= TRAIN) { |
|
|
|
is_train_mode_ = true; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
InitOptions(options); |
|
|
|
|
|
|
|
// In train and infer, profiling is always needed. |
|
|
|
InitProfiling(this->options_); |
|
|
|
auto model_manager = ModelManager::GetInstance(); |
|
|
|
GE_CHECK_NOTNULL(model_manager); |
|
|
|
GE_IF_BOOL_EXEC(model_manager->EnableExceptionDump(options) != SUCCESS, |
|
|
|
GELOGE(FAILED, "[Enable][ExceptionDump] failed."); |
|
|
|
return FAILED); |
|
|
|
// 1.`is_train_mode_` means case: train |
|
|
|
// 2.`(!is_train_mode_) && (options_.device_id != kDefaultDeviceIdForInfer)` means case: online infer |
|
|
|
// these two case with logical device id |
|
|
|
if (is_train_mode_ || (options_.device_id != kDefaultDeviceIdForInfer)) { |
|
|
|
status = InitSystemWithOptions(this->options_); |
|
|
|
} else { |
|
|
|
status = InitSystemWithoutOptions(); |
|
|
|
} |
|
|
|
return status; |
|
|
|
} |
|
|
|
|
|
|
|
void GELib::InitProfiling(Options &options) { |
|
|
|
GELOGI("Init Profiling. session Id: %ld, device id:%d ", options.session_id, options.device_id); |
|
|
|
std::lock_guard<std::mutex> lock(status_mutex_); |
|
|
|
GetContext().Init(); |
|
|
|
// Profiling init |
|
|
|
if (ProfilingManager::Instance().Init(options) != SUCCESS) { |
|
|
|
GELOGW("Profiling init failed."); |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
void GELib::SetDefaultPrecisionMode(map<string, string> &new_options) { |
|
|
|
auto iter = new_options.find(PRECISION_MODE); |
|
|
|
if (iter != new_options.end()) { |
|
|
|
GELOGI("Find precision_mode in options, value is %s", iter->second.c_str()); |
|
|
|
return; |
|
|
|
} |
|
|
|
iter = new_options.find(OPTION_GRAPH_RUN_MODE); |
|
|
|
if (iter != new_options.end()) { |
|
|
|
if (GraphRunMode(std::strtol(iter->second.c_str(), nullptr, kDecimal)) >= TRAIN) { |
|
|
|
// only train mode need to be set allow_fp32_to_fp16. |
|
|
|
GELOGI("This is train mode, precision_mode need to be set allow_fp32_to_fp16"); |
|
|
|
new_options.insert(std::make_pair(PRECISION_MODE, "allow_fp32_to_fp16")); |
|
|
|
return; |
|
|
|
} |
|
|
|
} |
|
|
|
GELOGI("This is not train mode, precision_mode need to be set force_fp16"); |
|
|
|
new_options.insert(std::make_pair(PRECISION_MODE, "force_fp16")); |
|
|
|
return; |
|
|
|
} |
|
|
|
|
|
|
|
Status GELib::SetRTSocVersion(const map<string, string> &options, map<string, string> &new_options) { |
|
|
|
GELOGI("Start to set SOC_VERSION"); |
|
|
|
new_options.insert(options.begin(), options.end()); |
|
|
|
auto it = new_options.find(ge::SOC_VERSION); |
|
|
|
if (it != new_options.end()) { |
|
|
|
GE_CHK_RT_RET(rtSetSocVersion(it->second.c_str())); |
|
|
|
GELOGI("Succeeded in setting SOC_VERSION[%s] to runtime.", it->second.c_str()); |
|
|
|
} else { |
|
|
|
GELOGI("SOC_VERSION is not exist in options"); |
|
|
|
char version[kSocVersionLen] = {0}; |
|
|
|
rtError_t rt_ret = rtGetSocVersion(version, kSocVersionLen); |
|
|
|
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(rt_ret, "[Get][SocVersion] failed."); return FAILED;) |
|
|
|
GELOGI("Succeeded in getting SOC_VERSION[%s] from runtime.", version); |
|
|
|
new_options.insert(std::make_pair(ge::SOC_VERSION, version)); |
|
|
|
} |
|
|
|
return SUCCESS; |
|
|
|
} |
|
|
|
|
|
|
|
Status GELib::SetAiCoreNum(map<string, string> &options) { |
|
|
|
// Already set or get AICORE_NUM from options in offline mode |
|
|
|
if (options.find(AICORE_NUM) != options.end()) { |
|
|
|
return SUCCESS; |
|
|
|
} |
|
|
|
|
|
|
|
uint32_t aicore_num = 0; |
|
|
|
rtError_t ret = rtGetAiCoreCount(&aicore_num); |
|
|
|
if (ret == ACL_ERROR_RT_FEATURE_NOT_SUPPORT) { // offline without ATC Input of AiCoreNum |
|
|
|
return SUCCESS; |
|
|
|
} else if (ret == RT_ERROR_NONE) { // online-mode |
|
|
|
options.emplace(std::make_pair(AICORE_NUM, std::to_string(aicore_num))); |
|
|
|
return SUCCESS; |
|
|
|
} |
|
|
|
GELOGE(FAILED, "[Get][AiCoreCount] failed."); |
|
|
|
return FAILED; |
|
|
|
} |
|
|
|
|
|
|
|
void GELib::InitOptions(const map<string, string> &options) { |
|
|
|
this->options_.session_id = 0; |
|
|
|
auto iter = options.find(OPTION_EXEC_SESSION_ID); |
|
|
|
if (iter != options.end()) { |
|
|
|
this->options_.session_id = std::strtoll(iter->second.c_str(), nullptr, kDecimal); |
|
|
|
} |
|
|
|
this->options_.device_id = is_train_mode_ ? kDefaultDeviceIdForTrain : kDefaultDeviceIdForInfer; |
|
|
|
iter = options.find(OPTION_EXEC_DEVICE_ID); |
|
|
|
if (iter != options.end()) { |
|
|
|
this->options_.device_id = static_cast<int32_t>(std::strtol(iter->second.c_str(), nullptr, kDecimal)); |
|
|
|
} |
|
|
|
iter = options.find(OPTION_EXEC_JOB_ID); |
|
|
|
if (iter != options.end()) { |
|
|
|
this->options_.job_id = iter->second.c_str(); |
|
|
|
} |
|
|
|
this->options_.isUseHcom = false; |
|
|
|
iter = options.find(OPTION_EXEC_IS_USEHCOM); |
|
|
|
if (iter != options.end()) { |
|
|
|
std::istringstream(iter->second) >> this->options_.isUseHcom; |
|
|
|
} |
|
|
|
this->options_.isUseHvd = false; |
|
|
|
iter = options.find(OPTION_EXEC_IS_USEHVD); |
|
|
|
if (iter != options.end()) { |
|
|
|
std::istringstream(iter->second) >> this->options_.isUseHvd; |
|
|
|
} |
|
|
|
this->options_.deployMode = false; |
|
|
|
iter = options.find(OPTION_EXEC_DEPLOY_MODE); |
|
|
|
if (iter != options.end()) { |
|
|
|
std::istringstream(iter->second) >> this->options_.deployMode; |
|
|
|
} |
|
|
|
iter = options.find(OPTION_EXEC_POD_NAME); |
|
|
|
if (iter != options.end()) { |
|
|
|
this->options_.podName = iter->second.c_str(); |
|
|
|
} |
|
|
|
iter = options.find(OPTION_EXEC_PROFILING_MODE); |
|
|
|
if (iter != options.end()) { |
|
|
|
this->options_.profiling_mode = iter->second.c_str(); |
|
|
|
} |
|
|
|
iter = options.find(OPTION_EXEC_PROFILING_OPTIONS); |
|
|
|
if (iter != options.end()) { |
|
|
|
this->options_.profiling_options = iter->second.c_str(); |
|
|
|
} |
|
|
|
iter = options.find(OPTION_EXEC_RANK_ID); |
|
|
|
if (iter != options.end()) { |
|
|
|
this->options_.rankId = std::strtoll(iter->second.c_str(), nullptr, kDecimal); |
|
|
|
} |
|
|
|
iter = options.find(OPTION_EXEC_RANK_TABLE_FILE); |
|
|
|
if (iter != options.end()) { |
|
|
|
this->options_.rankTableFile = iter->second.c_str(); |
|
|
|
} |
|
|
|
this->options_.enable_atomic = true; |
|
|
|
iter = options.find(OPTION_EXEC_ATOMIC_FLAG); |
|
|
|
GE_IF_BOOL_EXEC(iter != options.end(), |
|
|
|
this->options_.enable_atomic = std::strtol(iter->second.c_str(), nullptr, kDecimal)); |
|
|
|
GELOGI("ge InnerInitialize, the enable_atomic_flag in options_ is %d", this->options_.enable_atomic); |
|
|
|
} |
|
|
|
|
|
|
|
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status GELib::InitSystemWithOptions(Options &options) { |
|
|
|
std::string mode = is_train_mode_ ? "Training" : "Online infer"; |
|
|
|
GELOGI("%s init GELib. session Id:%ld, device id :%d ", mode.c_str(), options.session_id, options.device_id); |
|
|
|
GEEVENT("System init with options begin, job id %s", options.job_id.c_str()); |
|
|
|
std::lock_guard<std::mutex> lock(status_mutex_); |
|
|
|
GE_IF_BOOL_EXEC(is_system_inited && !is_shutdown, |
|
|
|
GELOGW("System init with options is already inited and not shutdown."); |
|
|
|
return SUCCESS); |
|
|
|
|
|
|
|
std::vector<rtMemType_t> mem_type; |
|
|
|
mem_type.push_back(RT_MEMORY_HBM); |
|
|
|
mem_type.push_back(RT_MEMORY_P2P_DDR); |
|
|
|
Status initMmStatus = MemManager::Instance().Initialize(mem_type); |
|
|
|
if (initMmStatus != SUCCESS) { |
|
|
|
GELOGE(initMmStatus, "[Init][MemManager] failed."); |
|
|
|
return initMmStatus; |
|
|
|
} |
|
|
|
|
|
|
|
GE_CHK_STATUS_RET(HostMemManager::Instance().Initialize()); |
|
|
|
// Update CSA file |
|
|
|
CsaInteract::GetInstance().Init(options.device_id, GetContext().TraceId()); |
|
|
|
Status ret = CsaInteract::GetInstance().WriteJobState(JOBSTATE_RUNNING, JOBSUBSTATE_ENV_INIT); |
|
|
|
GE_LOGE_IF(ret != SUCCESS, "[Write][JobState] failed, ret=%u , jobstate_running=%d , substate_env_init=%d .", |
|
|
|
ret, JOBSTATE_RUNNING, JOBSUBSTATE_ENV_INIT); |
|
|
|
|
|
|
|
// set device id |
|
|
|
GELOGI("set logical device id:%u", options.device_id); |
|
|
|
GetContext().SetCtxDeviceId(static_cast<uint32_t>(options.device_id)); |
|
|
|
GE_CHK_RT_RET(rtSetDevice(options.device_id)); |
|
|
|
|
|
|
|
// In the scenario that the automatic add fusion is set, but there is no cleanaddr operator, |
|
|
|
// maybe need to check it |
|
|
|
is_system_inited = true; |
|
|
|
is_shutdown = false; |
|
|
|
|
|
|
|
GELOGI("%s init GELib success.", mode.c_str()); |
|
|
|
|
|
|
|
return SUCCESS; |
|
|
|
} |
|
|
|
|
|
|
|
Status GELib::SystemShutdownWithOptions(const Options &options) { |
|
|
|
std::string mode = is_train_mode_ ? "Training" : "Online infer"; |
|
|
|
GELOGI("%s finalize GELib begin.", mode.c_str()); |
|
|
|
std::lock_guard<std::mutex> lock(status_mutex_); |
|
|
|
GE_IF_BOOL_EXEC(is_shutdown || !is_system_inited, |
|
|
|
GELOGW("System Shutdown with options is already is_shutdown or system does not inited. " |
|
|
|
"is_shutdown:%d is_omm_inited:%d", |
|
|
|
is_shutdown, is_system_inited); |
|
|
|
return SUCCESS); |
|
|
|
|
|
|
|
GE_CHK_RT(rtDeviceReset(options.device_id)); |
|
|
|
|
|
|
|
// Update CSA file |
|
|
|
Status ret = CsaInteract::GetInstance().WriteJobState(JOBSTATE_SUCCEED); |
|
|
|
GE_LOGE_IF(ret != SUCCESS, "[Write][JobState] failed, ret=%u jobstate_succeed=%d .", ret, JOBSTATE_SUCCEED); |
|
|
|
|
|
|
|
is_system_inited = false; |
|
|
|
is_shutdown = true; |
|
|
|
GELOGI("%s finalize GELib success.", mode.c_str()); |
|
|
|
return SUCCESS; |
|
|
|
} |
|
|
|
|
|
|
|
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status GELib::InitSystemWithoutOptions() { |
|
|
|
GELOGI("Inference Init GELib begin."); |
|
|
|
|
|
|
|
std::vector<rtMemType_t> mem_type; |
|
|
|
mem_type.push_back(RT_MEMORY_HBM); |
|
|
|
mem_type.push_back(RT_MEMORY_P2P_DDR); |
|
|
|
Status initMmStatus = MemManager::Instance().Initialize(mem_type); |
|
|
|
if (initMmStatus != SUCCESS) { |
|
|
|
GELOGE(initMmStatus, "[Init][MemManager] failed."); |
|
|
|
return initMmStatus; |
|
|
|
} |
|
|
|
GE_CHK_STATUS_RET(HostMemManager::Instance().Initialize()); |
|
|
|
|
|
|
|
static bool is_inited = false; |
|
|
|
if (is_inited) { |
|
|
|
GELOGW("System init without options is already inited, don't need to init again."); |
|
|
|
return SUCCESS; |
|
|
|
} |
|
|
|
is_inited = true; |
|
|
|
GELOGI("Inference init GELib success."); |
|
|
|
|
|
|
|
return SUCCESS; |
|
|
|
} |
|
|
|
|
|
|
|
string GELib::GetPath() { return PluginManager::GetPath(); } |
|
|
|
|
|
|
|
// Finalize all modules |
|
|
|
Status GELib::Finalize() { |
|
|
|
ErrorManager::GetInstance().SetStage(ErrorMessage::kFinalize, ErrorMessage::kFinalize); |
|
|
|
GELOGI("finalization start"); |
|
|
|
// Finalization is not allowed before initialization |
|
|
|
if (!init_flag_) { |
|
|
|
GELOGW("not initialize"); |
|
|
|
return SUCCESS; |
|
|
|
} |
|
|
|
if (is_train_mode_ || (options_.device_id != kDefaultDeviceIdForInfer)) { |
|
|
|
GE_CHK_RT_RET(rtSetDevice(options_.device_id)); |
|
|
|
} |
|
|
|
Status final_state = SUCCESS; |
|
|
|
Status mid_state; |
|
|
|
GELOGI("engineManager finalization."); |
|
|
|
mid_state = engineManager_.Finalize(); |
|
|
|
if (mid_state != SUCCESS) { |
|
|
|
GELOGW("engineManager finalize failed"); |
|
|
|
final_state = mid_state; |
|
|
|
} |
|
|
|
GELOGI("sessionManager finalization."); |
|
|
|
mid_state = sessionManager_.Finalize(); |
|
|
|
if (mid_state != SUCCESS) { |
|
|
|
GELOGW("sessionManager finalize failed"); |
|
|
|
final_state = mid_state; |
|
|
|
} |
|
|
|
|
|
|
|
GELOGI("opsBuilderManager finalization."); |
|
|
|
mid_state = OpsKernelBuilderManager::Instance().Finalize(); |
|
|
|
if (mid_state != SUCCESS) { |
|
|
|
GELOGW("opsBuilderManager finalize failed"); |
|
|
|
final_state = mid_state; |
|
|
|
} |
|
|
|
GELOGI("opsManager finalization."); |
|
|
|
mid_state = opsManager_.Finalize(); |
|
|
|
if (mid_state != SUCCESS) { |
|
|
|
GELOGW("opsManager finalize failed"); |
|
|
|
final_state = mid_state; |
|
|
|
} |
|
|
|
|
|
|
|
GELOGI("VarManagerPool finalization."); |
|
|
|
VarManagerPool::Instance().Destory(); |
|
|
|
|
|
|
|
GELOGI("MemManager finalization."); |
|
|
|
MemManager::Instance().Finalize(); |
|
|
|
|
|
|
|
GELOGI("HostMemManager finalization."); |
|
|
|
HostMemManager::Instance().Finalize(); |
|
|
|
|
|
|
|
GELOGI("HostCpuEngine finalization."); |
|
|
|
HostCpuEngine::GetInstance().Finalize(); |
|
|
|
|
|
|
|
GELOGI("Analyzer finalization"); |
|
|
|
Analyzer::GetInstance()->Finalize(); |
|
|
|
|
|
|
|
// Shut down profiling |
|
|
|
ShutDownProfiling(); |
|
|
|
|
|
|
|
if (is_train_mode_ || (options_.device_id != kDefaultDeviceIdForInfer)) { |
|
|
|
GELOGI("System ShutDown."); |
|
|
|
mid_state = SystemShutdownWithOptions(this->options_); |
|
|
|
if (mid_state != SUCCESS) { |
|
|
|
GELOGW("System shutdown with options failed"); |
|
|
|
final_state = mid_state; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
is_train_mode_ = false; |
|
|
|
|
|
|
|
GetMutableGlobalOptions().erase(ENABLE_SINGLE_STREAM); |
|
|
|
|
|
|
|
if (is_train_mode_ || (options_.device_id != kDefaultDeviceIdForInfer)) { |
|
|
|
GE_CHK_RT_RET(rtDeviceReset(options_.device_id)); |
|
|
|
} |
|
|
|
|
|
|
|
instancePtr_ = nullptr; |
|
|
|
init_flag_ = false; |
|
|
|
if (final_state != SUCCESS) { |
|
|
|
GELOGE(FAILED, "[Check][State]finalization failed, because no init before. "); |
|
|
|
return final_state; |
|
|
|
} |
|
|
|
GELOGI("finalization success."); |
|
|
|
return SUCCESS; |
|
|
|
} |
|
|
|
|
|
|
|
void GELib::ShutDownProfiling() { |
|
|
|
std::lock_guard<std::mutex> lock(status_mutex_); |
|
|
|
|
|
|
|
if (ProfilingManager::Instance().ProfilingOn()) { |
|
|
|
ProfilingManager::Instance().StopProfiling(); |
|
|
|
ProfilingManager::Instance().PluginUnInit(); |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
// Get Singleton Instance |
|
|
|
std::shared_ptr<GELib> GELib::GetInstance() { return instancePtr_; } |
|
|
|
|
|
|
|
void GELib::RollbackInit() { |
|
|
|
if (engineManager_.init_flag_) { |
|
|
|
(void)engineManager_.Finalize(); |
|
|
|
} |
|
|
|
if (opsManager_.init_flag_) { |
|
|
|
(void)opsManager_.Finalize(); |
|
|
|
} |
|
|
|
if (sessionManager_.init_flag_) { |
|
|
|
(void)sessionManager_.Finalize(); |
|
|
|
} |
|
|
|
MemManager::Instance().Finalize(); |
|
|
|
HostMemManager::Instance().Finalize(); |
|
|
|
VarManagerPool::Instance().Destory(); |
|
|
|
} |
|
|
|
|
|
|
|
Status GEInit::Initialize(const map<string, string> &options) { |
|
|
|
Status ret = SUCCESS; |
|
|
|
std::shared_ptr<GELib> instance_ptr = ge::GELib::GetInstance(); |
|
|
|
if (instance_ptr == nullptr || !instance_ptr->InitFlag()) { |
|
|
|
ret = GELib::Initialize(options); |
|
|
|
} |
|
|
|
return ret; |
|
|
|
} |
|
|
|
|
|
|
|
Status GEInit::Finalize() { |
|
|
|
std::shared_ptr<GELib> instance_ptr = ge::GELib::GetInstance(); |
|
|
|
if (instance_ptr != nullptr) { |
|
|
|
return instance_ptr->Finalize(); |
|
|
|
} |
|
|
|
return SUCCESS; |
|
|
|
} |
|
|
|
|
|
|
|
string GEInit::GetPath() { |
|
|
|
return GELib::GetPath(); |
|
|
|
} |
|
|
|
} // namespace ge |
|
|
|
/**
|
|
|
|
* Copyright 2019-2020 Huawei Technologies Co., Ltd
|
|
|
|
*
|
|
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
* you may not use this file except in compliance with the License.
|
|
|
|
* You may obtain a copy of the License at
|
|
|
|
*
|
|
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
*
|
|
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
* See the License for the specific language governing permissions and
|
|
|
|
* limitations under the License.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include "plugin/engine/engine_manage.h"
|
|
|
|
|
|
|
|
#include <map>
|
|
|
|
#include <string>
|
|
|
|
#include <utility>
|
|
|
|
|
|
|
|
#include "common/ge/ge_util.h"
|
|
|
|
#include "framework/common/debug/ge_log.h"
|
|
|
|
#include "plugin/engine/dnnengines.h"
|
|
|
|
|
|
|
|
namespace ge {
|
|
|
|
std::unique_ptr<std::map<std::string, DNNEnginePtr>> EngineManager::engine_map_;
|
|
|
|
|
|
|
|
Status EngineManager::RegisterEngine(const std::string &engine_name, DNNEnginePtr engine_ptr) {
|
|
|
|
if (engine_ptr == nullptr) {
|
|
|
|
GELOGE(FAILED, "[Register][Engine] failed, as input engine_ptr is nullptr");
|
|
|
|
REPORT_INNER_ERROR("E19999", "RegisterEngine failed, as input engine_ptr is nullptr");
|
|
|
|
return FAILED;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (engine_map_ == nullptr) {
|
|
|
|
engine_map_.reset(new (std::nothrow) std::map<std::string, DNNEnginePtr>());
|
|
|
|
}
|
|
|
|
|
|
|
|
auto it = engine_map_->find(engine_name);
|
|
|
|
if (it != engine_map_->end()) {
|
|
|
|
GELOGW("engine %s already exist.", engine_name.c_str());
|
|
|
|
return FAILED;
|
|
|
|
}
|
|
|
|
engine_map_->emplace(engine_name, engine_ptr);
|
|
|
|
return SUCCESS;
|
|
|
|
}
|
|
|
|
|
|
|
|
DNNEnginePtr EngineManager::GetEngine(const std::string &engine_name) {
|
|
|
|
auto it = engine_map_->find(engine_name);
|
|
|
|
if (it == engine_map_->end()) {
|
|
|
|
GELOGW("engine %s not exist.", engine_name.c_str());
|
|
|
|
return nullptr;
|
|
|
|
}
|
|
|
|
|
|
|
|
auto engine = it->second;
|
|
|
|
return engine;
|
|
|
|
}
|
|
|
|
|
|
|
|
void RegisterAiCoreEngine() {
|
|
|
|
const std::string ai_core = "AIcoreEngine";
|
|
|
|
std::vector<std::string> mem_type_aicore;
|
|
|
|
mem_type_aicore.emplace_back(GE_ENGINE_ATTR_MEM_TYPE_HBM);
|
|
|
|
DNNEngineAttribute attr_aicore = {ai_core, mem_type_aicore, COST_0, DEVICE, FORMAT_RESERVED, FORMAT_RESERVED};
|
|
|
|
DNNEnginePtr aicore_engine_ptr = MakeShared<AICoreDNNEngine>(attr_aicore);
|
|
|
|
if (aicore_engine_ptr == nullptr) {
|
|
|
|
GELOGE(ge::FAILED, "[Register][AiCoreEngine] failed, as malloc shared_ptr failed.");
|
|
|
|
REPORT_INNER_ERROR("E19999", "RegisterAiCoreEngine failed, as malloc shared_ptr failed.");
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
if (EngineManager::RegisterEngine(ai_core, aicore_engine_ptr) != SUCCESS) {
|
|
|
|
GELOGW("register ai_core failed");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void RegisterVectorEngine() {
|
|
|
|
const std::string vector_core = "VectorEngine";
|
|
|
|
std::vector<std::string> mem_type_aivcore;
|
|
|
|
mem_type_aivcore.emplace_back(GE_ENGINE_ATTR_MEM_TYPE_HBM);
|
|
|
|
DNNEngineAttribute attr_vector_core = {vector_core, mem_type_aivcore, COST_1,
|
|
|
|
DEVICE, FORMAT_RESERVED, FORMAT_RESERVED};
|
|
|
|
DNNEnginePtr vectorcore_engine_ptr = MakeShared<VectorCoreDNNEngine>(attr_vector_core);
|
|
|
|
if (vectorcore_engine_ptr == nullptr) {
|
|
|
|
GELOGE(ge::FAILED, "[Register][VectorEngine] failed, as malloc shared_ptr failed.");
|
|
|
|
REPORT_INNER_ERROR("E19999", "RegisterVectorEngine failed, as malloc shared_ptr failed.");
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
if (EngineManager::RegisterEngine(vector_core, vectorcore_engine_ptr) != SUCCESS) {
|
|
|
|
GELOGW("register vector_core failed");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void RegisterAiCpuEngine() {
|
|
|
|
const std::string vm_aicpu = "DNN_VM_AICPU_ASCEND";
|
|
|
|
std::vector<std::string> mem_type_aicpu;
|
|
|
|
mem_type_aicpu.emplace_back(GE_ENGINE_ATTR_MEM_TYPE_HBM);
|
|
|
|
DNNEngineAttribute attr_aicpu = {vm_aicpu, mem_type_aicpu, COST_3, DEVICE, FORMAT_RESERVED, FORMAT_RESERVED};
|
|
|
|
DNNEnginePtr vm_engine_ptr = MakeShared<AICpuDNNEngine>(attr_aicpu);
|
|
|
|
if (vm_engine_ptr == nullptr) {
|
|
|
|
GELOGE(ge::FAILED, "[Register][AiCpuEngine] failed, as malloc shared_ptr failed.");
|
|
|
|
REPORT_INNER_ERROR("E19999", "RegisterAiCpuEngine failed, as malloc shared_ptr failed.");
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
if (EngineManager::RegisterEngine(vm_aicpu, vm_engine_ptr) != SUCCESS) {
|
|
|
|
GELOGW("register vmAicpuEngine failed");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void RegisterAiCpuTFEngine() {
|
|
|
|
const std::string vm_aicpu_tf = "DNN_VM_AICPU";
|
|
|
|
std::vector<std::string> mem_type_aicpu_tf;
|
|
|
|
mem_type_aicpu_tf.emplace_back(GE_ENGINE_ATTR_MEM_TYPE_HBM);
|
|
|
|
DNNEngineAttribute attr_aicpu_tf = {vm_aicpu_tf, mem_type_aicpu_tf, COST_2, DEVICE,
|
|
|
|
FORMAT_RESERVED, FORMAT_RESERVED};
|
|
|
|
DNNEnginePtr vm_engine_ptr = MakeShared<AICpuTFDNNEngine>(attr_aicpu_tf);
|
|
|
|
if (vm_engine_ptr == nullptr) {
|
|
|
|
GELOGE(ge::FAILED, "[Register][AiCpuTFEngine] failed, as malloc shared_ptr failed.");
|
|
|
|
REPORT_INNER_ERROR("E19999", "RegisterAiCpuTFEngine failed, as malloc shared_ptr failed.");
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
if (EngineManager::RegisterEngine(vm_aicpu_tf, vm_engine_ptr) != SUCCESS) {
|
|
|
|
GELOGW("register vmAicpuTFEngine failed");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void RegisterGeLocalEngine() {
|
|
|
|
const std::string vm_ge_local = "DNN_VM_GE_LOCAL";
|
|
|
|
std::vector<std::string> mem_type_ge_local;
|
|
|
|
mem_type_ge_local.emplace_back(GE_ENGINE_ATTR_MEM_TYPE_HBM);
|
|
|
|
// GeLocal use minimum priority, set it as 9
|
|
|
|
DNNEngineAttribute attr_ge_local = {vm_ge_local, mem_type_ge_local, COST_9, DEVICE, FORMAT_RESERVED, FORMAT_RESERVED};
|
|
|
|
DNNEnginePtr ge_local_engine = MakeShared<GeLocalDNNEngine>(attr_ge_local);
|
|
|
|
if (ge_local_engine == nullptr) {
|
|
|
|
GELOGE(ge::FAILED, "[Register][GeLocalEngine] failed, as malloc shared_ptr failed.");
|
|
|
|
REPORT_INNER_ERROR("E19999", "RegisterGeLocalEngine failed, as malloc shared_ptr failed.");
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
if (EngineManager::RegisterEngine(vm_ge_local, ge_local_engine) != SUCCESS) {
|
|
|
|
GELOGW("register ge_local_engine failed");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void RegisterHostCpuEngine() {
|
|
|
|
const std::string vm_host_cpu = "DNN_VM_HOST_CPU";
|
|
|
|
std::vector<std::string> mem_type_host_cpu;
|
|
|
|
mem_type_host_cpu.emplace_back(GE_ENGINE_ATTR_MEM_TYPE_HBM);
|
|
|
|
// HostCpu use minimum priority, set it as 10
|
|
|
|
DNNEngineAttribute attr_host_cpu = {vm_host_cpu, mem_type_host_cpu, COST_10,
|
|
|
|
HOST, FORMAT_RESERVED, FORMAT_RESERVED};
|
|
|
|
DNNEnginePtr host_cpu_engine = MakeShared<HostCpuDNNEngine>(attr_host_cpu);
|
|
|
|
if (host_cpu_engine == nullptr) {
|
|
|
|
GELOGE(ge::FAILED, "[Register][HostCpuEngine] failed, as malloc shared_ptr failed.");
|
|
|
|
REPORT_INNER_ERROR("E19999", "RegisterHostCpuEngine failed, as malloc shared_ptr failed.");
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
if (EngineManager::RegisterEngine(vm_host_cpu, host_cpu_engine) != SUCCESS) {
|
|
|
|
GELOGW("register host_cpu_engine failed");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void RegisterRtsEngine() {
|
|
|
|
const std::string vm_rts = "DNN_VM_RTS";
|
|
|
|
std::vector<std::string> mem_type_rts;
|
|
|
|
mem_type_rts.emplace_back(GE_ENGINE_ATTR_MEM_TYPE_HBM);
|
|
|
|
DNNEngineAttribute attr_rts = {vm_rts, mem_type_rts, COST_1, DEVICE, FORMAT_RESERVED, FORMAT_RESERVED};
|
|
|
|
DNNEnginePtr rts_engine = MakeShared<RtsDNNEngine>(attr_rts);
|
|
|
|
if (rts_engine == nullptr) {
|
|
|
|
GELOGE(ge::FAILED, "[Register][RtsEngine] failed, as malloc shared_ptr failed.");
|
|
|
|
REPORT_INNER_ERROR("E19999", "RegisterRtsEngine failed, as malloc shared_ptr failed.");
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
if (EngineManager::RegisterEngine(vm_rts, rts_engine) != SUCCESS) {
|
|
|
|
GELOGW("register rts_engine failed");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void RegisterHcclEngine() {
|
|
|
|
const std::string dnn_hccl = "DNN_HCCL";
|
|
|
|
std::vector<std::string> mem_type_hccl;
|
|
|
|
mem_type_hccl.emplace_back(GE_ENGINE_ATTR_MEM_TYPE_HBM);
|
|
|
|
DNNEngineAttribute attr_hccl = {dnn_hccl, mem_type_hccl, COST_1, DEVICE, FORMAT_RESERVED, FORMAT_RESERVED};
|
|
|
|
DNNEnginePtr hccl_engine = MakeShared<HcclDNNEngine>(attr_hccl);
|
|
|
|
if (hccl_engine == nullptr) {
|
|
|
|
GELOGE(ge::FAILED, "[Register][HcclEngine] failed, as malloc shared_ptr failed.");
|
|
|
|
REPORT_INNER_ERROR("E19999", "RegisterHcclEngine failed, as malloc shared_ptr failed.");
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
if (EngineManager::RegisterEngine(dnn_hccl, hccl_engine) != SUCCESS) {
|
|
|
|
GELOGW("register hccl_engine failed");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void GetDNNEngineObjs(std::map<std::string, DNNEnginePtr> &engines) {
|
|
|
|
RegisterAiCoreEngine();
|
|
|
|
RegisterVectorEngine();
|
|
|
|
RegisterAiCpuTFEngine();
|
|
|
|
RegisterAiCpuEngine();
|
|
|
|
RegisterGeLocalEngine();
|
|
|
|
RegisterHostCpuEngine();
|
|
|
|
RegisterRtsEngine();
|
|
|
|
RegisterHcclEngine();
|
|
|
|
|
|
|
|
for (auto it = EngineManager::engine_map_->begin(); it != EngineManager::engine_map_->end(); ++it) {
|
|
|
|
GELOGI("get engine %s from engine plugin.", it->first.c_str());
|
|
|
|
engines.emplace(std::pair<std::string, DNNEnginePtr>(it->first, it->second));
|
|
|
|
}
|
|
|
|
|
|
|
|
GELOGI("after get engine, engine size: %zu", engines.size());
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
} // namespace ge
|