| @@ -144,6 +144,7 @@ set(TRAIN_SRC_LIST | |||
| "graph/passes/atomic_addr_clean_pass.cc" | |||
| "graph/passes/mark_same_addr_pass.cc" | |||
| "graph/passes/mark_graph_unknown_status_pass.cc" | |||
| "graph/passes/dynamic_single_op_reset_shape_pass.cc" | |||
| "graph/passes/mark_agnostic_pass.cc" | |||
| "graph/partition/dynamic_shape_partition.cc" | |||
| "graph/partition/stage_partition.cc" | |||
| @@ -434,6 +435,7 @@ set(INFER_SRC_LIST | |||
| "graph/passes/atomic_addr_clean_pass.cc" | |||
| "graph/passes/mark_same_addr_pass.cc" | |||
| "graph/passes/mark_graph_unknown_status_pass.cc" | |||
| "graph/passes/dynamic_single_op_reset_shape_pass.cc" | |||
| "graph/passes/mark_agnostic_pass.cc" | |||
| "graph/common/omg_util.cc" | |||
| "graph/common/bcast.cc" | |||
| @@ -130,6 +130,7 @@ target_compile_definitions(ge_common_static PRIVATE | |||
| google=ascend_private | |||
| $<IF:$<STREQUAL:${TARGET_SYSTEM_NAME},Windows>,OS_TYPE=WIN,OS_TYPE=0> | |||
| $<$<STREQUAL:${TARGET_SYSTEM_NAME},Windows>:SECUREC_USING_STD_SECURE_LIB=0 NOMINMAX> | |||
| LOG_CPP | |||
| ) | |||
| target_compile_options(ge_common_static PRIVATE | |||
| @@ -94,6 +94,9 @@ Status DumpOp::DumpOutput(aicpu::dump::Task &task) { | |||
| for (auto dim : output_descs.at(i).GetShape().GetDims()) { | |||
| output.mutable_shape()->add_dim(dim); | |||
| } | |||
| for (auto dim : output_descs.at(i).GetOriginShape().GetDims()) { | |||
| output.mutable_origin_shape()->add_dim(dim); | |||
| } | |||
| int64_t output_size = 0; | |||
| if (TensorUtils::GetTensorSizeInBytes(output_descs.at(i), output_size) != SUCCESS) { | |||
| GELOGE(PARAM_INVALID, "Get output size filed"); | |||
| @@ -118,6 +121,9 @@ Status DumpOp::DumpInput(aicpu::dump::Task &task) { | |||
| for (auto dim : input_descs.at(i).GetShape().GetDims()) { | |||
| input.mutable_shape()->add_dim(dim); | |||
| } | |||
| for (auto dim : input_descs.at(i).GetOriginShape().GetDims()) { | |||
| input.mutable_origin_shape()->add_dim(dim); | |||
| } | |||
| int64_t input_size = 0; | |||
| if (TensorUtils::GetTensorSizeInBytes(input_descs.at(i), input_size) != SUCCESS) { | |||
| GELOGE(PARAM_INVALID, "Get output size filed"); | |||
| @@ -214,8 +220,15 @@ Status DumpOp::LaunchDumpOp() { | |||
| SetOpMappingLoopAddr(global_step_, loop_per_iter_, loop_cond_, op_mapping_info); | |||
| GELOGI("Dump step is %s ,dump path is %s ,in Launch dump op", dump_properties_.GetDumpStep().c_str(), | |||
| dump_path.c_str()); | |||
| uint32_t task_id = 0; | |||
| uint32_t stream_id = 0; | |||
| rt_ret = rtGetTaskIdAndStreamID(&task_id, &stream_id); | |||
| if (rt_ret != RT_ERROR_NONE) { | |||
| GELOGW("call rtGetTaskIdAndStreamID failed, ret = 0x%X", rt_ret); | |||
| } | |||
| aicpu::dump::Task task; | |||
| task.set_task_id(task_id); | |||
| task.set_stream_id(stream_id); | |||
| task.mutable_op()->set_op_name(op_desc_->GetName()); | |||
| task.mutable_op()->set_op_type(op_desc_->GetType()); | |||
| if (dump_properties_.GetDumpMode() == kDumpOutput) { | |||
| @@ -181,12 +181,19 @@ void TBEPluginManager::GetCustomOpPath(std::string &customop_path) { | |||
| void TBEPluginManager::LoadCustomOpLib() { | |||
| LoadPluginSo(options_); | |||
| std::string fmk_type = std::to_string(domi::TENSORFLOW); | |||
| auto it = options_.find(ge::FRAMEWORK_TYPE); | |||
| if (it != options_.end()) { | |||
| fmk_type = it->second; | |||
| } | |||
| std::vector<OpRegistrationData> registration_datas = domi::OpRegistry::Instance()->registrationDatas; | |||
| GELOGI("The size of registration_datas is: %zu", registration_datas.size()); | |||
| for (OpRegistrationData reg_data : registration_datas) { | |||
| GELOGD("Begin to register optype: %s, imply_type: %s", reg_data.GetOmOptype().c_str(), | |||
| TypeUtils::ImplyTypeToSerialString(reg_data.GetImplyType()).c_str()); | |||
| domi::OpRegistry::Instance()->Register(reg_data); | |||
| if (std::to_string(reg_data.GetFrameworkType()) == fmk_type) { | |||
| GELOGD("Begin to register optype: %s, imply_type: %s", reg_data.GetOmOptype().c_str(), | |||
| TypeUtils::ImplyTypeToSerialString(reg_data.GetImplyType()).c_str()); | |||
| (void)domi::OpRegistry::Instance()->Register(reg_data); | |||
| } | |||
| } | |||
| } | |||
| @@ -112,7 +112,6 @@ ge::Status RegProfCtrlCallback(MsprofCtrlCallback func) { | |||
| if (ge::ProfilingManager::Instance().GetMsprofCallback().msprofCtrlCallback != nullptr) { | |||
| GELOGW("Msprof ctrl callback is exist, just ignore it."); | |||
| } else { | |||
| GELOGI("GE register Msprof ctrl callback."); | |||
| ge::ProfilingManager::Instance().SetMsprofCtrlCallback(func); | |||
| } | |||
| return ge::SUCCESS; | |||
| @@ -124,7 +123,6 @@ ge::Status RegProfSetDeviceCallback(MsprofSetDeviceCallback func) { | |||
| return ge::PARAM_INVALID; | |||
| } | |||
| // Pass MsprofSetDeviceCallback to runtime | |||
| GELOGI("GE pass setdevice callback to runtime."); | |||
| ge::Status rt_ret = rtRegDeviceStateCallback(kRtSetDeviceRegName.c_str(), static_cast<rtDeviceStateCallback>(func)); | |||
| if (rt_ret != ge::SUCCESS) { | |||
| GELOGE(rt_ret, "Pass MsprofSetDeviceCallback to runtime failed!"); | |||
| @@ -158,7 +156,7 @@ ge::Status ProfCommandHandle(ProfCommandHandleType type, void *data, uint32_t le | |||
| if (type != kProfCommandhandleFinalize) { | |||
| GE_CHECK_NOTNULL(data); | |||
| } | |||
| ProfCommandHandleData *prof_config_param = (ProfCommandHandleData *)data; | |||
| ProfCommandHandleData *prof_config_param = reinterpret_cast<ProfCommandHandleData *>(data); | |||
| auto iter = kProfCommandTypeMap.find(type); | |||
| if (iter == kProfCommandTypeMap.end()) { | |||
| GELOGW("The prof comand type is invalid."); | |||
| @@ -183,7 +181,8 @@ ge::Status ProfCommandHandle(ProfCommandHandleType type, void *data, uint32_t le | |||
| if (type != kProfCommandhandleFinalize) { | |||
| command.module_index = prof_config_param->profSwitch; | |||
| } | |||
| GELOGI("GE commandhandle execute, Command Type: %d, data type config: 0x%llx", type, command.module_index); | |||
| GELOGI("GE commandhandle execute, Command Type: %s, data type config: 0x%llx", iter->second.c_str(), | |||
| command.module_index); | |||
| if (type == kProfCommandhandleStart || type == kProfCommandhandleStop) { | |||
| GELOGI("Profiling device nums:%s , deviceID:[%s]", prof_params[0].c_str(), prof_params[kDeviceListIndex].c_str()); | |||
| } | |||
| @@ -38,10 +38,8 @@ const std::string kProfModelUnsubscribe = "prof_model_cancel_subscribe"; | |||
| } // namespace | |||
| namespace ge { | |||
| ProfilingManager::ProfilingManager() : is_load_profiling_(false), | |||
| is_execute_profiling_(false), | |||
| is_training_trace_(false), | |||
| subscribe_count_(0) { | |||
| ProfilingManager::ProfilingManager() | |||
| : is_load_profiling_(false), is_execute_profiling_(false), is_training_trace_(false), subscribe_count_(0) { | |||
| prof_cb_.msprofCtrlCallback = nullptr; | |||
| prof_cb_.msprofReporterCallback = nullptr; | |||
| } | |||
| @@ -102,8 +100,8 @@ ge::Status ProfilingManager::InitFromOptions(const Options &options, MsprofGeOpt | |||
| return INTERNAL_ERROR; | |||
| } | |||
| is_execute_profiling_ = true; | |||
| GELOGI("The profiling in options is %s, %s. origin option: %s", options.profiling_mode.c_str(), | |||
| prof_conf.options, options.profiling_options.c_str()); | |||
| GELOGI("The profiling in options is %s, %s. origin option: %s", options.profiling_mode.c_str(), prof_conf.options, | |||
| options.profiling_options.c_str()); | |||
| } else { | |||
| (void)mmGetEnv("PROFILING_MODE", env_profiling_mode, MMPA_MAX_PATH); | |||
| (void)mmGetEnv("PROFILING_OPTIONS", prof_conf.options, MSPROF_OPTIONS_DEF_LEN_MAX); | |||
| @@ -215,7 +213,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::Profilin | |||
| uint32_t task_id = task.task_id; | |||
| uint32_t stream_id = task.stream_id; | |||
| std::string shape_type = task.shape_type; | |||
| uint64_t cur_iter_num = task.cur_iter_num; | |||
| int64_t cur_iter_num = task.cur_iter_num; | |||
| data = model_name.append(" ") | |||
| .append(op_name).append(" ") | |||
| .append(std::to_string(block_dim)).append(" ") | |||
| @@ -809,32 +807,46 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::GetFpBpP | |||
| if (!fp_point_.empty() && !bp_point_.empty()) { | |||
| fp_point = fp_point_; | |||
| bp_point = bp_point_; | |||
| GELOGI("Bp Fp have been initialized in env or options. bp_point: %s, fp_point: %s", bp_point.c_str(), fp_point.c_str()); | |||
| GELOGI("Bp Fp have been initialized in env or options. bp_point: %s, fp_point: %s", bp_point.c_str(), | |||
| fp_point.c_str()); | |||
| return; | |||
| } | |||
| // ProfApi mode and training trace is set | |||
| try { | |||
| char env_profiling_options[MSPROF_OPTIONS_DEF_LEN_MAX] = { 0x00 }; | |||
| // Parse options first | |||
| char env_profiling_options[MSPROF_OPTIONS_DEF_LEN_MAX] = { 0x00 }; | |||
| bool is_profiling_valid = false; | |||
| std::string profiling_options; | |||
| if (ge::GetContext().GetOption(OPTION_EXEC_PROFILING_OPTIONS, profiling_options) == SUCCESS && | |||
| !profiling_options.empty()) { | |||
| is_profiling_valid = true; | |||
| } else { | |||
| INT32 ret = mmGetEnv("PROFILING_OPTIONS", env_profiling_options, MSPROF_OPTIONS_DEF_LEN_MAX); | |||
| if (ret != EN_OK) { | |||
| GELOGI("PROFILING_OPTIONS env is not exist."); | |||
| return; | |||
| } | |||
| GELOGI("Parse env PROFILING_OPTIONS:%s.", env_profiling_options); | |||
| Json prof_options = Json::parse(env_profiling_options); | |||
| profiling_options = env_profiling_options; | |||
| is_profiling_valid = true; | |||
| } | |||
| if (is_profiling_valid) { | |||
| try { | |||
| Json prof_options = Json::parse(profiling_options); | |||
| fp_point_ = prof_options[kFpPoint]; | |||
| bp_point_ = prof_options[kBpPoint]; | |||
| fp_point_ = prof_options[kFpPoint]; | |||
| bp_point_ = prof_options[kBpPoint]; | |||
| fp_point = fp_point_; | |||
| bp_point = bp_point_; | |||
| if (!fp_point_.empty() && !bp_point_.empty()) { | |||
| GELOGI("Training trace bp fp is set, bp_point:%s, fp_point:%s.", bp_point_.c_str(), fp_point_.c_str()); | |||
| fp_point = fp_point_; | |||
| bp_point = bp_point_; | |||
| if (!fp_point_.empty() && !bp_point_.empty()) { | |||
| GELOGI("Training trace bp fp is set, bp_point:%s, fp_point:%s.", bp_point_.c_str(), fp_point_.c_str()); | |||
| } | |||
| } catch (...) { | |||
| GELOGW("Json prof options is invalid."); | |||
| return; | |||
| } | |||
| } catch (...) { | |||
| GELOGE(FAILED, "Json prof options is invalid."); | |||
| return; | |||
| } | |||
| return; | |||
| } | |||
| @@ -36,21 +36,21 @@ using Json = nlohmann::json; | |||
| namespace { | |||
| const std::string GE_PROFILING_MODULE = "Framework"; | |||
| // DataTypeConfig MASK | |||
| #define PROF_ACL_API_MASK 0x0001 | |||
| #define PROF_TASK_TIME_MASK 0x0002 | |||
| #define PROF_AICORE_METRICS_MASK 0x0004 | |||
| #define PROF_AICPU_TRACE_MASK 0x0008 | |||
| #define PROF_MODEL_EXECUTE_MASK 0x0010 | |||
| #define PROF_RUNTIME_API_MASK 0x0020 | |||
| #define PROF_RUNTIME_TRACE_MASK 0x0040 | |||
| #define PROF_SCHEDULE_TIMELINE_MASK 0x0080 | |||
| #define PROF_SCHEDULE_TRACE_MASK 0x0100 | |||
| #define PROF_AIVECTORCORE_METRICS_MASK 0x0200 | |||
| #define PROF_SUBTASK_TIME_MASK 0x0400 | |||
| #define PROF_TRAINING_TRACE_MASK 0x0800 | |||
| #define PROF_HCCL_TRACE_MASK 0x1000 | |||
| #define PROF_DATA_PROCESS_MASK 0x2000 | |||
| #define PROF_MODEL_LOAD_MASK 0x8000000000000000 | |||
| const uint64_t PROF_ACL_API_MASK = 0x0001; | |||
| const uint64_t PROF_TASK_TIME_MASK = 0x0002; | |||
| const uint64_t PROF_AICORE_METRICS_MASK = 0x0004; | |||
| const uint64_t PROF_AICPU_TRACE_MASK = 0x0008; | |||
| const uint64_t PROF_MODEL_EXECUTE_MASK = 0x0010; | |||
| const uint64_t PROF_RUNTIME_API_MASK = 0x0020; | |||
| const uint64_t PROF_RUNTIME_TRACE_MASK = 0x0040; | |||
| const uint64_t PROF_SCHEDULE_TIMELINE_MASK = 0x0080; | |||
| const uint64_t PROF_SCHEDULE_TRACE_MASK = 0x0100; | |||
| const uint64_t PROF_AIVECTORCORE_METRICS_MASK = 0x0200; | |||
| const uint64_t PROF_SUBTASK_TIME_MASK = 0x0400; | |||
| const uint64_t PROF_TRAINING_TRACE_MASK = 0x0800; | |||
| const uint64_t PROF_HCCL_TRACE_MASK = 0x1000; | |||
| const uint64_t PROF_DATA_PROCESS_MASK = 0x2000; | |||
| const uint64_t PROF_MODEL_LOAD_MASK = 0x8000000000000000; | |||
| } // namespace | |||
| namespace ge { | |||
| @@ -80,7 +80,8 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ProfilingManager { | |||
| bool ProfilingTrainingTraceOn() const { return is_training_trace_; } | |||
| bool ProfilingModelLoadOn() const { return is_load_profiling_; } | |||
| bool ProfilingModelExecuteOn() const; | |||
| bool ProfilingOn() const { return is_load_profiling_ && is_execute_profiling_; } // is_execute_profiling_ only used by ge option and env | |||
| // is_execute_profiling_ only used by ge option and env | |||
| bool ProfilingOn() const { return is_load_profiling_ && is_execute_profiling_; } | |||
| void ReportProfilingData(uint32_t model_id, const std::vector<TaskDescInfo> &task_desc_info, | |||
| const std::vector<ComputeGraphDescInfo> &compute_graph_desc_info); | |||
| void ProfilingTaskDescInfo(uint32_t model_id, const std::vector<TaskDescInfo> &task_desc_info, | |||
| @@ -15,6 +15,7 @@ message Output { | |||
| int32 original_output_data_type = 7; | |||
| int32 original_output_format = 8; | |||
| uint64 size = 9; | |||
| Shape origin_shape = 10; | |||
| } | |||
| message Input { | |||
| @@ -23,6 +24,7 @@ message Input { | |||
| Shape shape = 3; | |||
| uint64 address = 4; | |||
| uint64 size = 5; | |||
| Shape origin_shape = 6; | |||
| } | |||
| enum BufferType { | |||
| @@ -173,6 +173,7 @@ target_compile_definitions(ge_executor PRIVATE | |||
| google=ascend_private | |||
| $<IF:$<STREQUAL:${TARGET_SYSTEM_NAME},Windows>,OS_TYPE=WIN,OS_TYPE=0> | |||
| $<$<STREQUAL:${TARGET_SYSTEM_NAME},Windows>:SECUREC_USING_STD_SECURE_LIB=0 NOMINMAX> | |||
| LOG_CPP | |||
| ) | |||
| target_include_directories(ge_executor PRIVATE | |||
| @@ -209,19 +209,6 @@ bool IsDynmaicDimsSizeMatchModel(const vector<uint64_t> cur_dynamic_dims, | |||
| namespace ge { | |||
| bool GeExecutor::isInit_ = false; | |||
| class ModelListenerAdapter : public ModelListener { | |||
| public: | |||
| domi::Status OnComputeDone(uint32_t model_id, uint32_t dataIndex, uint32_t resultCode, | |||
| std::vector<ge::OutputTensorInfo> &outputs) { | |||
| if (listener == nullptr) { | |||
| GELOGE(ge::FAILED, "listener is null."); | |||
| return FAILED; | |||
| } | |||
| return listener->OnComputeDone(model_id, dataIndex, resultCode, outputs); | |||
| } | |||
| std::shared_ptr<ge::ModelListener> listener; | |||
| }; | |||
| static void InitOpsProtoManger() { | |||
| string opsproto_path; | |||
| @@ -573,60 +560,6 @@ Status GeExecutor::SetDynamicAippData(uint32_t model_id, void *dynamic_input_add | |||
| return SUCCESS; | |||
| } | |||
| // Load model | |||
| Status GeExecutor::LoadModelOffline(uint32_t &model_id, const std::string &path, const std::string &key, | |||
| int32_t priority, std::shared_ptr<ge::ModelListener> listener) { | |||
| GELOGI("load model offline begin."); | |||
| if (!isInit_) { | |||
| GELOGE(ACL_ERROR_GE_EXEC_NOT_INIT, "GeExecutor has not been initialized!"); | |||
| return ACL_ERROR_GE_EXEC_NOT_INIT; | |||
| } | |||
| string filePath = RealPath(path.c_str()); | |||
| if (filePath.empty()) { | |||
| GELOGE(ACL_ERROR_GE_EXEC_MODEL_PATH_INVALID, | |||
| "File path is invalid. please check your text file '%s'.", path.c_str()); | |||
| return ACL_ERROR_GE_EXEC_MODEL_PATH_INVALID; | |||
| } | |||
| std::shared_ptr<ModelListenerAdapter> listener_adapter = MakeShared<ModelListenerAdapter>(); | |||
| if (listener_adapter == nullptr) { | |||
| GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "ModelListenerAdapter make shared failed!"); | |||
| return ACL_ERROR_GE_MEMORY_ALLOCATION; | |||
| } | |||
| listener_adapter->listener = listener; | |||
| Status ret = GraphLoader::LoadModelFromFile(path, key, priority, listener_adapter, model_id); | |||
| if (ret != SUCCESS) { | |||
| GELOGE(ret, "[GeExecutor] LoadModelFromFile failed"); | |||
| return ACL_ERROR_GE_LOAD_MODEL; | |||
| } | |||
| return SUCCESS; | |||
| } | |||
| Status GeExecutor::LoadModel(uint32_t &model_id, const ModelData &model_data, | |||
| std::shared_ptr<ge::ModelListener> listener) { | |||
| GELOGI("Load model begin."); | |||
| if (!isInit_) { | |||
| GELOGE(ACL_ERROR_GE_EXEC_NOT_INIT, "GeExecutor has not been initialized!"); | |||
| return ACL_ERROR_GE_EXEC_NOT_INIT; | |||
| } | |||
| std::shared_ptr<ModelListenerAdapter> listener_adapter = MakeShared<ModelListenerAdapter>(); | |||
| if (listener_adapter == nullptr) { | |||
| GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "ModelListenerAdapter make shared failed!"); | |||
| return ACL_ERROR_GE_MEMORY_ALLOCATION; | |||
| } | |||
| listener_adapter->listener = listener; | |||
| Status ret = GraphLoader::LoadModel(model_data, listener_adapter, model_id); | |||
| if (ret != SUCCESS) { | |||
| GELOGE(ret, "[GeExecutor] LoadModel failed."); | |||
| return ACL_ERROR_GE_LOAD_MODEL; | |||
| } | |||
| return ret; | |||
| } | |||
| Status GeExecutor::UnloadModel(uint32_t model_id) { | |||
| GELOGD("unload model %u begin.", model_id); | |||
| if (!isInit_) { | |||
| @@ -659,21 +592,6 @@ Status GeExecutor::UnloadModel(uint32_t model_id) { | |||
| return SUCCESS; | |||
| } | |||
| Status GeExecutor::RunModel(const ge::RunModelData &input_data, ge::RunModelData &output_data) { | |||
| GELOGI("run model begin."); | |||
| if (!isInit_) { | |||
| GELOGE(ACL_ERROR_GE_EXEC_NOT_INIT, "GeExecutor has not been initialized!"); | |||
| return ACL_ERROR_GE_EXEC_NOT_INIT; | |||
| } | |||
| InputData inputs; | |||
| GetDomiInputData(input_data, inputs); | |||
| OutputData outputs; | |||
| GetDomiOutputData(output_data, outputs); | |||
| return GraphExecutor::DataInput(inputs, outputs); | |||
| } | |||
| // Get input and output descriptor | |||
| Status GeExecutor::GetModelDescInfo(uint32_t model_id, std::vector<ge::TensorDesc> &input_desc, | |||
| std::vector<ge::TensorDesc> &output_desc, bool new_model_desc) { | |||
| @@ -15,6 +15,7 @@ message Output { | |||
| int32 original_output_data_type = 7; | |||
| int32 original_output_format = 8; | |||
| uint64 size = 9; | |||
| Shape origin_shape = 10; | |||
| } | |||
| message Input { | |||
| @@ -23,6 +24,7 @@ message Input { | |||
| Shape shape = 3; | |||
| uint64 address = 4; | |||
| uint64 size = 5; | |||
| Shape origin_shape = 6; | |||
| } | |||
| enum BufferType { | |||
| @@ -109,6 +109,7 @@ OMG_HOST_SRC_FILES := \ | |||
| graph/passes/atomic_addr_clean_pass.cc \ | |||
| graph/passes/mark_same_addr_pass.cc \ | |||
| graph/passes/mark_graph_unknown_status_pass.cc \ | |||
| graph/passes/dynamic_single_op_reset_shape_pass.cc \ | |||
| graph/passes/mark_agnostic_pass.cc \ | |||
| graph/common/omg_util.cc \ | |||
| graph/common/bcast.cc \ | |||
| @@ -203,6 +203,7 @@ target_compile_options(ge_local_opskernel_builder_static PRIVATE | |||
| target_compile_definitions(ge_local_opskernel_builder_static PRIVATE | |||
| google=ascend_private | |||
| LOG_CPP | |||
| ) | |||
| target_include_directories(ge_local_opskernel_builder_static PRIVATE | |||
| @@ -113,6 +113,7 @@ LIBGE_LOCAL_SRC_FILES := \ | |||
| graph/passes/atomic_addr_clean_pass.cc \ | |||
| graph/passes/mark_same_addr_pass.cc \ | |||
| graph/passes/mark_graph_unknown_status_pass.cc \ | |||
| graph/passes/dynamic_single_op_reset_shape_pass.cc \ | |||
| graph/passes/mark_agnostic_pass.cc \ | |||
| graph/partition/dynamic_shape_partition.cc \ | |||
| graph/partition/stage_partition.cc \ | |||
| @@ -47,6 +47,8 @@ const char *const kEngineNameDefault = "default"; | |||
| const char *const kVectorEngine = "VectorEngine"; | |||
| const char *const kAIcoreEngine = "AIcoreEngine"; | |||
| const char *const kFileNameSuffix = "online"; | |||
| const size_t kDynamicDimSize = 1; | |||
| const int64_t kDynamicDimValue = -2; | |||
| std::map<ge::OpEngineType, std::string> engine_type_map{ | |||
| {ge::ENGINE_SYS, kEngineNameDefault}, {ge::ENGINE_AICORE, kAIcoreEngine}, {ge::ENGINE_VECTOR, kVectorEngine}}; | |||
| @@ -248,6 +250,43 @@ static void GetOpsProtoPath(string &opsproto_path) { | |||
| opsproto_path = (path_base + "ops/op_proto/custom/" + ":") + (path_base + "ops/op_proto/built-in/"); | |||
| } | |||
| static Status CheckShapeReset(const OpDescPtr &op_desc, bool &change_shape_flag) { | |||
| GE_CHECK_NOTNULL_EXEC(op_desc, return PARAM_INVALID); | |||
| change_shape_flag = false; | |||
| for (size_t i = 0; i < op_desc->GetAllInputsDesc().size(); i++) { | |||
| auto input_desc = op_desc->MutableInputDesc(static_cast<uint32_t>(i)); | |||
| GE_CHECK_NOTNULL(input_desc); | |||
| // pass scalar input desc | |||
| auto dims = input_desc->GetShape().GetDims(); | |||
| if (dims.size() == kDynamicDimSize && dims[0] == kDynamicDimValue) { | |||
| change_shape_flag = true; | |||
| } | |||
| } | |||
| return SUCCESS; | |||
| } | |||
| static void ResetTensorVecShape(const vector<GeTensor> &inputs, vector<GeTensor> &inputs_dynamic) { | |||
| for (auto input : inputs) { | |||
| auto input_desc = input.GetTensorDesc(); | |||
| GeShape shape_ori = input_desc.GetShape(); | |||
| std::vector<int64_t> dynamic_shape_dims = {kDynamicDimValue}; | |||
| GeShape dynamic_shape(dynamic_shape_dims); | |||
| ge::GeTensor inputTensor; | |||
| ge::GeTensorDesc desc(input_desc); | |||
| bool is_const = false; | |||
| (void)AttrUtils::GetBool(input_desc, CONST_ATTR_NAME_INPUT, is_const); | |||
| if (!is_const && shape_ori.GetDims().size() > 0) { | |||
| desc.SetShape(dynamic_shape); | |||
| } | |||
| inputTensor.SetTensorDesc(desc); | |||
| inputs_dynamic.push_back(inputTensor); | |||
| } | |||
| } | |||
| class GeGenerator::Impl { | |||
| public: | |||
| Impl(OmgContext &omg_context) : omg_context_(omg_context) {} | |||
| @@ -638,7 +677,18 @@ Status GeGenerator::BuildSingleOp(OpDescPtr &op_desc, const vector<GeTensor> &in | |||
| } | |||
| GeModelPtr &ge_model = name_to_ge_model.begin()->second; | |||
| GELOGD("The opType in op_desc_tmp is [%s]", op_desc_tmp->GetType().c_str()); | |||
| GE_CHK_STATUS_RET_NOLOG(impl_->SaveParams(ge_model, op_desc_tmp->GetType(), op_attrs, inputs, outputs)); | |||
| bool dynamic_flag = false; | |||
| if (CheckShapeReset(op_desc, dynamic_flag) == SUCCESS && dynamic_flag) { | |||
| vector<GeTensor> inputs_dynamic; | |||
| vector<GeTensor> outputs_dynamic; | |||
| ResetTensorVecShape(inputs, inputs_dynamic); | |||
| ResetTensorVecShape(outputs, outputs_dynamic); | |||
| GE_CHK_STATUS_RET_NOLOG( | |||
| impl_->SaveParams(ge_model, op_desc_tmp->GetType(), op_attrs, inputs_dynamic, outputs_dynamic)); | |||
| } else { | |||
| GE_CHK_STATUS_RET_NOLOG(impl_->SaveParams(ge_model, op_desc_tmp->GetType(), op_attrs, inputs, outputs)); | |||
| } | |||
| GE_CHK_STATUS_RET_NOLOG(impl_->SaveModel(model_file_name, ge_model, model_buff)); | |||
| return SUCCESS; | |||
| } | |||
| @@ -18,6 +18,7 @@ target_compile_options(ge_memory PRIVATE | |||
| target_compile_definitions(ge_memory PRIVATE | |||
| google=ascend_private | |||
| LOG_CPP | |||
| ) | |||
| target_link_libraries(ge_memory PRIVATE | |||
| @@ -99,7 +99,7 @@ Status GraphMemoryAssigner::AssignMemory() { | |||
| MemoryOffset memory_offset(RT_MEMORY_HBM, mem_assigner->GetMemOffset()); | |||
| memory_offset_.emplace(RT_MEMORY_HBM, memory_offset); | |||
| if (mem_assigner->GetP2PMemOffset() > 0) { | |||
| if (mem_assigner->GetP2PMemOffset() >= 0) { | |||
| MemoryOffset p2p_memory_offset(RT_MEMORY_P2P_DDR, mem_assigner->GetP2PMemOffset()); | |||
| memory_offset_.emplace(RT_MEMORY_P2P_DDR, p2p_memory_offset); | |||
| } | |||
| @@ -48,26 +48,41 @@ void StreamGraphOptimizer::RefreshNodeId(const ComputeGraphPtr &comp_graph, Grap | |||
| } | |||
| } | |||
| bool StreamGraphOptimizer::IsSameStreamId(const ComputeGraphPtr &comp_graph) { | |||
| bool StreamGraphOptimizer::IsSameStreamIdOrBatchLabel(const ComputeGraphPtr &comp_graph) { | |||
| if (comp_graph == nullptr) { | |||
| return false; | |||
| } | |||
| std::set<int64_t> stream_set; | |||
| std::set<std::string> label_set; | |||
| for (const ge::NodePtr &cur_node : comp_graph->GetDirectNode()) { | |||
| GE_IF_BOOL_EXEC(cur_node->GetOpDesc() == nullptr, continue); | |||
| int64_t stream_id = cur_node->GetOpDesc()->GetStreamId(); | |||
| if (stream_id == kInvalidStream) { | |||
| continue; | |||
| } | |||
| GELOGD("Node %s in subgraph %s stream id is: %ld, node num: %zu", cur_node->GetName().c_str(), | |||
| comp_graph->GetName().c_str(), stream_id, comp_graph->GetDirectNodesSize()); | |||
| stream_set.insert(stream_id); | |||
| std::string batch_label; | |||
| if (AttrUtils::GetStr(cur_node->GetOpDesc(), ATTR_NAME_BATCH_LABEL, batch_label)) { | |||
| label_set.insert(batch_label); | |||
| } else { | |||
| GELOGD("Node %s[%s] has no batch label, subgraph %s, stream id: %ld", cur_node->GetName().c_str(), | |||
| cur_node->GetType().c_str(), comp_graph->GetName().c_str(), stream_id); | |||
| continue; | |||
| } | |||
| GELOGD("Node %s in subgraph %s stream id: %ld, node num: %zu", cur_node->GetName().c_str(), | |||
| comp_graph->GetName().c_str(), stream_id, comp_graph->GetDirectNodesSize()); | |||
| } | |||
| if (stream_set.size() > 1) { | |||
| GELOGI("Nodes of graph: %s have different stream id, node num: %zu, different stream num: %zu.", | |||
| if (stream_set.size() > 1 || label_set.size() > 1) { | |||
| GELOGI("Nodes of graph: %s have different stream id or batch_label, node num: %zu, different stream num: %zu.", | |||
| comp_graph->GetName().c_str(), comp_graph->GetDirectNodesSize(), stream_set.size()); | |||
| return false; | |||
| } | |||
| if (!label_set.empty()) { | |||
| (void)AttrUtils::SetStr(comp_graph, ATTR_NAME_BATCH_LABEL, *label_set.begin()); | |||
| } | |||
| return true; | |||
| } | |||
| @@ -99,8 +114,8 @@ Status StreamGraphOptimizer::OptimizeStreamedSubGraph(const ComputeGraphPtr &com | |||
| continue; | |||
| } | |||
| if (!IsSameStreamId(subgraph)) { | |||
| GELOGI("There are more than one stream in subgraph %s", subgraph->GetName().c_str()); | |||
| if (!IsSameStreamIdOrBatchLabel(subgraph)) { | |||
| GELOGI("There are more than one stream or batch_label in subgraph %s", subgraph->GetName().c_str()); | |||
| continue; | |||
| } | |||
| OpDescPtr op_desc = nodes.at(0)->GetOpDesc(); | |||
| @@ -112,9 +127,11 @@ Status StreamGraphOptimizer::OptimizeStreamedSubGraph(const ComputeGraphPtr &com | |||
| return FAILED; | |||
| } | |||
| run_context.stream = run_context.graphStreamList[stream_id]; | |||
| GELOGD("Subgraph has same stream id, subgraph: %s, engine_name: %s, stream_id: %ld, rtstream: %lu.", | |||
| subgraph->GetName().c_str(), engine_name.c_str(), stream_id, | |||
| static_cast<uint64_t>(reinterpret_cast<uintptr_t>(run_context.stream))); | |||
| std::string batch_label; | |||
| (void)AttrUtils::GetStr(subgraph, ATTR_NAME_BATCH_LABEL, batch_label); | |||
| GELOGD("Subgraph has same stream id, subgraph: %s, engine_name: %s, stream_id: %ld, rtstream: %lu, " | |||
| "batch_label: %s", subgraph->GetName().c_str(), engine_name.c_str(), stream_id, | |||
| static_cast<uint64_t>(reinterpret_cast<uintptr_t>(run_context.stream)), batch_label.c_str()); | |||
| for (auto iter = graph_optimizers.begin(); iter != graph_optimizers.end(); ++iter) { | |||
| GE_CHECK_NOTNULL(*iter); | |||
| Status ret = (*iter)->OptimizeStreamGraph(*subgraph, run_context); | |||
| @@ -41,7 +41,7 @@ class StreamGraphOptimizer { | |||
| private: | |||
| void RefreshNodeId(const ComputeGraphPtr &comp_graph, Graph2SubGraphInfoList &subgraph_map); | |||
| bool IsSameStreamId(const ComputeGraphPtr &comp_graph); | |||
| bool IsSameStreamIdOrBatchLabel(const ComputeGraphPtr &comp_graph); | |||
| }; | |||
| } // namespace ge | |||
| #endif // GE_GRAPH_BUILD_OPTIMIZE_STREAM_GRAPH_H_ | |||
| @@ -567,7 +567,7 @@ Status TaskGenerator::MarkFirstAndLastOps(const vector<OpDescPtr> &ops, bool is_ | |||
| continue; | |||
| } | |||
| string op_type = op_desc->GetType(); | |||
| if (!is_single_stream && (!op_desc->GetSubgraphInstanceNames().empty() || separator_types.count(op_type) != 0)) { | |||
| if (!op_desc->GetSubgraphInstanceNames().empty() || separator_types.count(op_type) != 0) { | |||
| continuous_op_lists.emplace_back(vector<OpDescPtr>()); | |||
| } else { | |||
| continuous_op_lists.back().emplace_back(op_desc); | |||
| @@ -122,14 +122,14 @@ Status GraphLoader::LoadDataFromFile(const std::string &path, const std::string | |||
| ModelData &model_data) { | |||
| Status ret; | |||
| if (!CheckInputPathValid(path)) { | |||
| GELOGE(GE_EXEC_MODEL_PATH_INVALID, "model path is invalid: %s", path.c_str()); | |||
| return GE_EXEC_MODEL_PATH_INVALID; | |||
| GELOGE(ACL_ERROR_GE_EXEC_MODEL_PATH_INVALID, "model path is invalid: %s", path.c_str()); | |||
| return ACL_ERROR_GE_EXEC_MODEL_PATH_INVALID; | |||
| } | |||
| GELOGI("Load model begin, model path is: %s", path.c_str()); | |||
| if (!key_path.empty() && !CheckInputPathValid(key_path)) { | |||
| GELOGE(GE_EXEC_MODEL_KEY_PATH_INVALID, "decrypt_key path is invalid: %s", key_path.c_str()); | |||
| return GE_EXEC_MODEL_KEY_PATH_INVALID; | |||
| GELOGE(ACL_ERROR_GE_PARAM_INVALID, "decrypt_key path is invalid: %s", key_path.c_str()); | |||
| return ACL_ERROR_GE_PARAM_INVALID; | |||
| } | |||
| ret = DavinciModelParser::LoadFromFile(path.c_str(), key_path.c_str(), priority, model_data); | |||
| @@ -144,63 +144,6 @@ Status GraphLoader::LoadDataFromFile(const std::string &path, const std::string | |||
| return SUCCESS; | |||
| } | |||
| Status GraphLoader::LoadModelFromFile(const std::string &path, const std::string &key_path, int32_t priority, | |||
| const std::shared_ptr<ModelListener> &listener, uint32_t &model_id) { | |||
| Status ret; | |||
| ModelData model_data; | |||
| ret = LoadDataFromFile(path, key_path, priority, model_data); | |||
| if (ret != SUCCESS) { | |||
| GELOGE(ret, "LoadModelFromFile: Load failed. ret = %u", ret); | |||
| if (model_data.model_data != nullptr) { | |||
| delete[] static_cast<char *>(model_data.model_data); | |||
| model_data.model_data = nullptr; | |||
| } | |||
| return ret; | |||
| } | |||
| ret = LoadModel(model_data, listener, model_id); | |||
| if (ret != SUCCESS) { | |||
| GELOGE(ret, "LoadModel: Load failed. ret = %u", ret); | |||
| if (model_data.model_data != nullptr) { | |||
| delete[] static_cast<char *>(model_data.model_data); | |||
| model_data.model_data = nullptr; | |||
| } | |||
| } | |||
| if (model_data.model_data != nullptr) { | |||
| delete[] static_cast<char *>(model_data.model_data); | |||
| model_data.model_data = nullptr; | |||
| } | |||
| return ret; | |||
| } | |||
| Status GraphLoader::LoadModel(const ModelData &model_data, const std::shared_ptr<ModelListener> &listener, | |||
| uint32_t &model_id) { | |||
| GELOGI("Load model begin, model_id:%u.", model_id); | |||
| // For GeOp, Open Device 0 here. | |||
| GE_CHK_RT_RET(rtSetDevice(0)); | |||
| auto model_manager = ModelManager::GetInstance(); | |||
| GE_CHECK_NOTNULL(model_manager); | |||
| Status ret = model_manager->LoadModelOffline(model_id, model_data, listener); | |||
| if (ret != SUCCESS) { | |||
| GE_CHK_RT(rtDeviceReset(0)); | |||
| GELOGE(ret, "LoadModel: Load failed."); | |||
| return ret; | |||
| } | |||
| ret = model_manager->Start(model_id); | |||
| if (ret != SUCCESS) { | |||
| if (model_manager->Unload(model_id) != SUCCESS) { | |||
| GELOGE(FAILED, "LoadModel: Unload failed while trying to unload after a failed start."); | |||
| } | |||
| GELOGE(ret, "LoadModel: Start failed."); | |||
| return ret; | |||
| } | |||
| GELOGI("LoadModel: Start model success, model_id:%u.", model_id); | |||
| return SUCCESS; | |||
| } | |||
| Status GraphLoader::CommandHandle(const Command &command) { | |||
| try { | |||
| auto model_manager = ModelManager::GetInstance(); | |||
| @@ -225,16 +168,16 @@ Status GraphLoader::CommandHandle(const Command &command) { | |||
| } | |||
| Status GraphLoader::LoadModelFromData(uint32_t &model_id, const ModelData &model_data, void *dev_ptr, | |||
| size_t memsize, void *weight_ptr, size_t weightsize) { | |||
| size_t mem_size, void *weight_ptr, size_t weight_size) { | |||
| GELOGI("Load model begin, model_id:%u.", model_id); | |||
| // For ACL, Open Device from App. | |||
| auto model_manager = ModelManager::GetInstance(); | |||
| GE_CHECK_NOTNULL(model_manager); | |||
| Status ret = model_manager->LoadModelOffline( | |||
| model_id, model_data, nullptr, dev_ptr, memsize, weight_ptr, weightsize); | |||
| model_id, model_data, nullptr, dev_ptr, mem_size, weight_ptr, weight_size); | |||
| if (ret != SUCCESS) { | |||
| GELOGE(ret, "Load model failed, model_id:%u.", model_id); | |||
| return ret; | |||
| GELOGE(ACL_ERROR_GE_LOAD_MODEL, "Load model failed, model_id:%u.", model_id); | |||
| return ACL_ERROR_GE_LOAD_MODEL; | |||
| } | |||
| GELOGI("Load model success, model_id:%u.", model_id); | |||
| return SUCCESS; | |||
| @@ -259,8 +202,8 @@ Status GraphLoader::LoadModelWithQ(uint32_t &model_id, const ModelData &model_da | |||
| GE_CHECK_NOTNULL(model_manager); | |||
| Status ret = model_manager->LoadModelWithQ(model_id, model_data, input_queue_ids, output_queue_ids); | |||
| if (ret != SUCCESS) { | |||
| GELOGE(ret, "Load model with queue failed, model_id:%u.", model_id); | |||
| return ret; | |||
| GELOGE(ACL_ERROR_GE_LOAD_MODEL, "Load model with queue failed, model_id:%u.", model_id); | |||
| return ACL_ERROR_GE_LOAD_MODEL; | |||
| } | |||
| GELOGI("Load model with queue success, model_id:%u.", model_id); | |||
| @@ -44,12 +44,6 @@ class GraphLoader { | |||
| static Status GetMaxUsedMemory(uint32_t model_id, uint64_t &max_size); | |||
| static Status LoadModel(const ModelData &model_data, const std::shared_ptr<ModelListener> &listener, | |||
| uint32_t &model_id); | |||
| static Status LoadModelFromFile(const std::string &path, const std::string &key_path, int32_t priority, | |||
| const std::shared_ptr<ModelListener> &listener, uint32_t &model_id); | |||
| static Status CommandHandle(const Command &command); | |||
| static Status GetMemoryInfo(int64_t &free); | |||
| @@ -319,6 +319,9 @@ Status DataDumper::GenerateOutput(aicpu::dump::Output &output, const OpDesc::Vis | |||
| for (auto dim : tensor_descs.at(index).GetShape().GetDims()) { | |||
| output.mutable_shape()->add_dim(dim); | |||
| } | |||
| for (auto dim : tensor_descs.at(index).GetOriginShape().GetDims()) { | |||
| output.mutable_origin_shape()->add_dim(dim); | |||
| } | |||
| int64_t output_size = 0; | |||
| if (TensorUtils::GetTensorSizeInBytes(tensor_descs.at(index), output_size) != SUCCESS) { | |||
| GELOGE(PARAM_INVALID, "Get output size filed"); | |||
| @@ -476,6 +479,9 @@ Status DataDumper::GenerateInput(aicpu::dump::Input &input, const OpDesc::Vistor | |||
| for (auto dim : tensor_descs.at(index).GetShape().GetDims()) { | |||
| input.mutable_shape()->add_dim(dim); | |||
| } | |||
| for (auto dim : tensor_descs.at(index).GetOriginShape().GetDims()) { | |||
| input.mutable_origin_shape()->add_dim(dim); | |||
| } | |||
| int64_t input_size = 0; | |||
| if (AttrUtils::GetInt(tensor_descs.at(index), ATTR_NAME_INPUT_ORIGIN_SIZE, input_size)) { | |||
| GELOGI("Get aipp input size according to attr is %ld", input_size); | |||
| @@ -289,8 +289,8 @@ Status DavinciModel::InitWeightMem(void *dev_ptr, void *weight_ptr, size_t weigh | |||
| if (weight_ptr == nullptr) { | |||
| weights_mem_base_ = MallocWeightsMem(weights_size); | |||
| if (weights_mem_base_ == nullptr) { | |||
| GELOGE(GE_EXEC_ALLOC_WEIGHT_MEM_FAILED, "Alloc weight memory failed. size: %zu", weights_size); | |||
| return GE_EXEC_ALLOC_WEIGHT_MEM_FAILED; | |||
| GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Alloc weight memory failed. size: %zu", weights_size); | |||
| return ACL_ERROR_GE_MEMORY_ALLOCATION; | |||
| } | |||
| is_inner_weight_base_ = true; | |||
| } | |||
| @@ -307,8 +307,8 @@ Status DavinciModel::InitWeightMem(void *dev_ptr, void *weight_ptr, size_t weigh | |||
| Status DavinciModel::InitFeatureMapAndP2PMem(void *dev_ptr, size_t mem_size) { | |||
| if (is_feature_map_mem_has_inited_) { | |||
| GELOGE(FAILED, "call InitFeatureMapMem more than once ."); | |||
| return FAILED; | |||
| GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "call InitFeatureMapMem more than once ."); | |||
| return ACL_ERROR_GE_MEMORY_ALLOCATION; | |||
| } | |||
| is_feature_map_mem_has_inited_ = true; | |||
| @@ -316,8 +316,8 @@ Status DavinciModel::InitFeatureMapAndP2PMem(void *dev_ptr, size_t mem_size) { | |||
| std::size_t p2p_data_size = P2PMemInfos().at(RT_MEMORY_P2P_DDR).memory_size; | |||
| if ((dev_ptr != nullptr) && (mem_size < TotalMemSize())) { | |||
| GELOGE(FAILED, "Invalid mem param: mem_size=%zu totalsize=%zu.", mem_size, TotalMemSize()); | |||
| return FAILED; | |||
| GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Invalid mem param: mem_size=%zu totalsize=%zu.", mem_size, TotalMemSize()); | |||
| return ACL_ERROR_GE_MEMORY_ALLOCATION; | |||
| } | |||
| mem_base_ = static_cast<uint8_t *>(dev_ptr); | |||
| @@ -327,8 +327,8 @@ Status DavinciModel::InitFeatureMapAndP2PMem(void *dev_ptr, size_t mem_size) { | |||
| if (TotalMemSize() && mem_base_ == nullptr) { | |||
| mem_base_ = MallocFeatureMapMem(data_size); | |||
| if (mem_base_ == nullptr) { | |||
| GELOGE(GE_EXEC_ALLOC_FEATURE_MAP_MEM_FAILED, "Alloc feature map memory failed. size: %zu", data_size); | |||
| return GE_EXEC_ALLOC_FEATURE_MAP_MEM_FAILED; | |||
| GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Alloc feature map memory failed. size: %zu", data_size); | |||
| return ACL_ERROR_GE_MEMORY_ALLOCATION; | |||
| } | |||
| GEEVENT("[IMAS]InitFeatureMapAndP2PMem graph_%u MallocMemory type[F] memaddr[%p] mem_size[%zu]", | |||
| runtime_param_.graph_id, mem_base_, data_size); | |||
| @@ -343,8 +343,8 @@ Status DavinciModel::InitFeatureMapAndP2PMem(void *dev_ptr, size_t mem_size) { | |||
| if (p2p_data_size != 0) { | |||
| p2p_mem_base_ = MallocP2PMem(p2p_data_size); | |||
| if (p2p_mem_base_ == nullptr) { | |||
| GELOGE(GE_EXEC_ALLOC_P2P_MEM_FAILED, "Alloc p2p memory failed,size: %zu", p2p_data_size); | |||
| return GE_EXEC_ALLOC_P2P_MEM_FAILED; | |||
| GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Alloc p2p memory failed,size: %zu", p2p_data_size); | |||
| return ACL_ERROR_GE_MEMORY_ALLOCATION; | |||
| } | |||
| GELOGI("InitFeatureMapAndP2PMem graph_%u MallocMemory type[F] memaddr[%p] mem_size[%zu]", runtime_param_.graph_id, | |||
| p2p_mem_base_, p2p_data_size); | |||
| @@ -710,6 +710,7 @@ Status DavinciModel::Init(void *dev_ptr, size_t mem_size, void *weight_ptr, size | |||
| } | |||
| // collect profiling for ge | |||
| GE_CHK_STATUS_RET(InitModelProfile(), "Init model profile failed"); | |||
| auto &profiling_manager = ProfilingManager::Instance(); | |||
| if (profiling_manager.ProfilingModelLoadOn()) { | |||
| Status p_ret = ReportProfilingData(); | |||
| @@ -970,7 +971,7 @@ Status DavinciModel::InitDataOp(const NodePtr &node, uint32_t &data_op_index, ma | |||
| uint32_t parent_index = 0; // Ignore subgraph Data Node. | |||
| if (AttrUtils::GetInt(op_desc, ATTR_NAME_PARENT_NODE_INDEX, parent_index)) { | |||
| GELOGI("Init zero copy by subgraph Data node: %s.", op_desc->GetName().c_str()); | |||
| return InitInputBatchLabel(node); | |||
| return SUCCESS; | |||
| } | |||
| data_op_list_.push_back(op_desc); | |||
| @@ -1011,10 +1012,6 @@ Status DavinciModel::InitDataOp(const NodePtr &node, uint32_t &data_op_index, ma | |||
| } | |||
| data_op_index++; | |||
| if (InitInputZeroCopyNodes(node) != SUCCESS) { | |||
| GELOGE(PARAM_INVALID, "Input zero copy nodes init failed!"); | |||
| return PARAM_INVALID; | |||
| } | |||
| return SUCCESS; | |||
| } | |||
| @@ -1036,39 +1033,6 @@ void DavinciModel::AdjustDataOpList(const map<uint32_t, OpDescPtr> &data_by_inde | |||
| } | |||
| } | |||
| /// | |||
| /// @ingroup ge | |||
| /// @brief input zero copy node Initialize. | |||
| /// @param [in] NodePtr: Data Op. | |||
| /// @return Status | |||
| /// | |||
| Status DavinciModel::InitInputZeroCopyNodes(const NodePtr &node) { | |||
| auto out_data_anchor = node->GetOutDataAnchor(kDataIndex); | |||
| if (out_data_anchor == nullptr) { | |||
| GELOGE(FAILED, "Out data anchor is nullptr"); | |||
| return FAILED; | |||
| } | |||
| for (auto &peer_in_data_anchor : out_data_anchor->GetPeerInDataAnchors()) { | |||
| auto node = peer_in_data_anchor->GetOwnerNode(); | |||
| auto op_desc = node->GetOpDesc(); | |||
| if (op_desc == nullptr) { | |||
| GELOGE(FAILED, "Op desc is nullptr"); | |||
| return FAILED; | |||
| } | |||
| string batch_label; | |||
| (void)ge::AttrUtils::GetStr(op_desc, ATTR_NAME_BATCH_LABEL, batch_label); | |||
| if (batch_label.empty()) { | |||
| batch_label = kDefaultBatchLable; | |||
| } | |||
| if (zero_copy_op_id_batch_label_.find(op_desc->GetId()) == zero_copy_op_id_batch_label_.end()) { | |||
| zero_copy_op_id_batch_label_.emplace(pair<int64_t, string>(op_desc->GetId(), batch_label)); | |||
| GELOGD("Init input zero copy nodes success, op name:%s, op id: %ld, batch label: %s.", op_desc->GetName().c_str(), | |||
| op_desc->GetId(), batch_label.c_str()); | |||
| } | |||
| } | |||
| return SUCCESS; | |||
| } | |||
| bool DavinciModel::IsGetNextSinkDynamic(const OpDescPtr &op_desc) { | |||
| bool getnext_sink_dynamic = false; | |||
| if (ge::AttrUtils::GetBool(op_desc, ATTR_GETNEXT_SINK_DYNMAIC, getnext_sink_dynamic) && getnext_sink_dynamic) { | |||
| @@ -1094,7 +1058,7 @@ Status DavinciModel::InitNetOutput(const NodePtr &node) { | |||
| if (owner_graph->GetParentGraph() != nullptr) { | |||
| GELOGI("Init zero copy by subgraph NetOutput node: %s.", op_desc->GetName().c_str()); | |||
| op_list_.erase(op_desc->GetId()); | |||
| return InitOutputBatchLabel(node); | |||
| return SUCCESS; | |||
| } | |||
| output_op_list_.push_back(op_desc); | |||
| @@ -1146,8 +1110,6 @@ Status DavinciModel::InitNetOutput(const NodePtr &node) { | |||
| } | |||
| } | |||
| GE_IF_BOOL_EXEC(InitOutputZeroCopyNodes(node) != SUCCESS, | |||
| GELOGE(PARAM_INVALID, "Output zero copy nodes init failed!"); return PARAM_INVALID;); | |||
| GetAllGearsInfo(node); | |||
| if (is_getnext_sink_dynamic_) { | |||
| GE_IF_BOOL_EXEC(GetGetDynamicDimsNodeInfo(node) != SUCCESS, | |||
| @@ -1343,121 +1305,6 @@ void DavinciModel::ParseDynamicOutShape(const std::vector<std::string> &str_info | |||
| } | |||
| } | |||
| /// | |||
| /// @ingroup ge | |||
| /// @brief output zero copy node Initialize. | |||
| /// @param [in] NodePtr: netoutput Op. | |||
| /// @return Status | |||
| /// | |||
| Status DavinciModel::InitOutputZeroCopyNodes(const NodePtr &node) { | |||
| set<NodePtr> nodes_need_record; | |||
| for (auto &in_data_anchor : node->GetAllInDataAnchors()) { | |||
| auto peer_out_data_anchor = in_data_anchor->GetPeerOutAnchor(); | |||
| if (peer_out_data_anchor == nullptr) { | |||
| continue; | |||
| } | |||
| auto peer_node = peer_out_data_anchor->GetOwnerNode(); | |||
| nodes_need_record.emplace(peer_node); | |||
| // Merge node output multiplexed input, upstream nodes need to be considered in multiple batch scenarios | |||
| if (peer_node->GetType() == MERGE) { | |||
| for (const auto &merge_peer_in_data_anchor : peer_node->GetAllInDataAnchors()) { | |||
| auto merge_peer_out_data_anchor = merge_peer_in_data_anchor->GetPeerOutAnchor(); | |||
| if (merge_peer_out_data_anchor == nullptr) { | |||
| continue; | |||
| } | |||
| auto merge_peer_node = merge_peer_out_data_anchor->GetOwnerNode(); | |||
| nodes_need_record.emplace(merge_peer_node); | |||
| } | |||
| } else { | |||
| for (const auto &other_in_data_anchor : peer_out_data_anchor->GetPeerInDataAnchors()) { | |||
| auto other_in_node = other_in_data_anchor->GetOwnerNode(); | |||
| if (other_in_node->GetType() != NETOUTPUT) { | |||
| nodes_need_record.emplace(other_in_node); | |||
| } | |||
| } | |||
| } | |||
| } | |||
| for (const auto &node_need_record : nodes_need_record) { | |||
| auto op_desc = node_need_record->GetOpDesc(); | |||
| GE_CHECK_NOTNULL(op_desc); | |||
| string batch_label; | |||
| (void)ge::AttrUtils::GetStr(op_desc, ATTR_NAME_BATCH_LABEL, batch_label); | |||
| if (batch_label.empty()) { | |||
| batch_label = kDefaultBatchLable; | |||
| } | |||
| if (zero_copy_op_id_batch_label_.find(op_desc->GetId()) == zero_copy_op_id_batch_label_.end()) { | |||
| zero_copy_op_id_batch_label_.emplace(pair<int64_t, string>(op_desc->GetId(), batch_label)); | |||
| GELOGD("Init Output zero copy nodes success, op name:%s, op id: %ld, batch label: %s.", | |||
| op_desc->GetName().c_str(), op_desc->GetId(), batch_label.c_str()); | |||
| } | |||
| } | |||
| return SUCCESS; | |||
| } | |||
| /// | |||
| /// @ingroup ge | |||
| /// @brief input zero copy node Initialize. | |||
| /// @param [in] NodePtr: Data Op. | |||
| /// @return Status | |||
| /// | |||
| Status DavinciModel::InitInputBatchLabel(const NodePtr &node) { | |||
| string batch_label; | |||
| if (!AttrUtils::GetStr(node->GetOpDesc(), ATTR_NAME_BATCH_LABEL, batch_label)) { | |||
| return SUCCESS; // Not Multi-batch. | |||
| } | |||
| const auto &out_data_anchor = node->GetOutDataAnchor(kDataIndex); | |||
| GE_CHECK_NOTNULL(out_data_anchor); | |||
| for (const auto &peer_in_data_anchor : out_data_anchor->GetPeerInDataAnchors()) { | |||
| const auto &node = peer_in_data_anchor->GetOwnerNode(); | |||
| const auto &op_desc = node->GetOpDesc(); | |||
| GE_CHECK_NOTNULL(op_desc); | |||
| if (zero_copy_op_id_batch_label_.find(op_desc->GetId()) == zero_copy_op_id_batch_label_.end()) { | |||
| zero_copy_op_id_batch_label_[op_desc->GetId()] = batch_label; | |||
| GELOGD("Init input zero copy nodes success, op name: %s, op id: %ld, batch label: %s", op_desc->GetName().c_str(), | |||
| op_desc->GetId(), batch_label.c_str()); | |||
| } | |||
| } | |||
| return SUCCESS; | |||
| } | |||
| /// | |||
| /// @ingroup ge | |||
| /// @brief output zero copy node Initialize for Case. | |||
| /// @param [in] NodePtr: netoutput Op. | |||
| /// @return Status | |||
| /// | |||
| Status DavinciModel::InitOutputBatchLabel(const NodePtr &node) { | |||
| string batch_label; | |||
| if (!AttrUtils::GetStr(node->GetOpDesc(), ATTR_NAME_BATCH_LABEL, batch_label)) { | |||
| return SUCCESS; // Not Multi-batch. | |||
| } | |||
| for (const auto &in_data_anchor : node->GetAllInDataAnchors()) { | |||
| const auto &peer_out_data_anchor = in_data_anchor->GetPeerOutAnchor(); | |||
| if (peer_out_data_anchor == nullptr) { | |||
| continue; | |||
| } | |||
| const auto &peer_node = peer_out_data_anchor->GetOwnerNode(); | |||
| const auto &op_desc = peer_node->GetOpDesc(); | |||
| GE_CHECK_NOTNULL(op_desc); | |||
| if (zero_copy_op_id_batch_label_.find(op_desc->GetId()) == zero_copy_op_id_batch_label_.end()) { | |||
| zero_copy_op_id_batch_label_[op_desc->GetId()] = batch_label; | |||
| GELOGD("Init Output zero copy nodes success, op name: %s, op id: %ld, batch label: %s", | |||
| op_desc->GetName().c_str(), op_desc->GetId(), batch_label.c_str()); | |||
| } | |||
| } | |||
| return SUCCESS; | |||
| } | |||
| /// @ingroup ge | |||
| /// @brief LabelSet Op Initialize. | |||
| /// @param [in] op_desc: LabelSet Op descriptor. | |||
| @@ -2240,12 +2087,61 @@ Status DavinciModel::SyncVarData() { | |||
| return ret; | |||
| } | |||
| inline int64_t SumSize(const vector<int64_t> &size_list) { | |||
| int64_t sum_size = 0; | |||
| for (const int64_t &size : size_list) { | |||
| sum_size += size; | |||
| Status DavinciModel::InitModelProfile() { | |||
| for (const auto &task : task_list_) { | |||
| GE_CHECK_NOTNULL(task); | |||
| const FusionOpInfo *fusion_op_info = task->GetFusionOpInfo(); | |||
| // when type is RT_MODEL_TASK_KERNEL, ctx is not null | |||
| if ((fusion_op_info == nullptr) || fusion_op_info->original_op_names.empty()) { | |||
| continue; | |||
| } | |||
| GELOGI("task.id = %u, opNum = %zu", task->GetTaskID(), fusion_op_info->original_op_names.size()); | |||
| op_id_map_.insert(std::make_pair(fusion_op_info->op_index, task->GetTaskID())); | |||
| } | |||
| std::set<uint32_t> task_id_set; | |||
| using CIT = std::multimap<uint32_t, uint32_t>::const_iterator; | |||
| using Range = std::pair<CIT, CIT>; | |||
| for (const auto &task : task_list_) { | |||
| GE_CHECK_NOTNULL(task); | |||
| const FusionOpInfo *fusion_op_info = task->GetFusionOpInfo(); | |||
| if ((fusion_op_info == nullptr) || fusion_op_info->original_op_names.empty()) { | |||
| continue; | |||
| } | |||
| if (task_id_set.count(task->GetTaskID()) > 0) { | |||
| continue; | |||
| } | |||
| const auto &op_desc = GetOpByIndex(fusion_op_info->op_index); | |||
| GE_CHK_BOOL_EXEC(op_desc != nullptr, return FAILED, "index: %u out of range", fusion_op_info->op_index); | |||
| ProfileInfo profile; | |||
| profile.fusion_info = *fusion_op_info; | |||
| Range range = op_id_map_.equal_range(fusion_op_info->op_index); | |||
| for (CIT range_idx = range.first; range_idx != range.second; ++range_idx) { | |||
| profile.task_count++; | |||
| task_id_set.insert(range_idx->second); | |||
| } | |||
| // memory info | |||
| TaskMemInfo &mem_info = profile.memory_info; | |||
| const auto input_size = ModelUtils::GetInputSize(op_desc); | |||
| const auto output_size = ModelUtils::GetOutputSize(op_desc); | |||
| const auto workspace_size = ModelUtils::GetWorkspaceSize(op_desc); | |||
| const auto weight_size = ModelUtils::GetWeightSize(op_desc); | |||
| mem_info.input_size = std::accumulate(input_size.begin(), input_size.end(), 0); | |||
| mem_info.output_size = std::accumulate(output_size.begin(), output_size.end(), 0); | |||
| mem_info.workspace_size = std::accumulate(workspace_size.begin(), workspace_size.end(), 0); | |||
| mem_info.weight_size = std::accumulate(weight_size.begin(), weight_size.end(), 0); | |||
| mem_info.total_size = mem_info.weight_size + mem_info.input_size + mem_info.output_size + mem_info.workspace_size; | |||
| profile_list_.emplace_back(profile); | |||
| } | |||
| return sum_size; | |||
| GELOGI("fusion task size: %zu, profile info size: %zu", op_id_map_.size(), profile_list_.size()); | |||
| return SUCCESS; | |||
| } | |||
| Status DavinciModel::SinkModelProfile() { | |||
| @@ -2253,18 +2149,12 @@ Status DavinciModel::SinkModelProfile() { | |||
| auto &prof_mgr = ProfilingManager::Instance(); | |||
| ReporterData reporter_data{}; | |||
| // report model data tag name | |||
| std::string tag_name; | |||
| tag_name.append("model_load_info_").append(std::to_string(this->Id())); | |||
| std::string tag_name("model_load_info_" + std::to_string(this->Id())); | |||
| GE_CHK_BOOL_EXEC(memcpy_s(reporter_data.tag, MSPROF_ENGINE_MAX_TAG_LEN, tag_name.c_str(), tag_name.size()) == EOK, | |||
| return FAILED, "Sink model tag memcpy error."); | |||
| // Model Header | |||
| string name; | |||
| if (!om_name_.empty()) { | |||
| name = om_name_; | |||
| } else { | |||
| name = name_; | |||
| } | |||
| std::string name = om_name_.empty() ? name_ : om_name_; | |||
| size_t name_len = name.size(); | |||
| reporter_data.deviceId = device_id_; | |||
| reporter_data.data = (unsigned char *)&name_len; | |||
| @@ -2296,128 +2186,71 @@ Status DavinciModel::SinkModelProfile() { | |||
| GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, | |||
| "Reporter data fail, model id:%u.", this->Id()); | |||
| int32_t task_num = task_list_.size(); | |||
| std::multimap<uint32_t, uint32_t> op_id_map; | |||
| std::set<uint32_t> task_id_set; | |||
| for (int32_t i = 0; i < task_num; i++) { | |||
| auto task = task_list_[i]; | |||
| GE_CHECK_NOTNULL(task); | |||
| auto fusion_op_info = task->GetFusionOpInfo(); | |||
| // when type is RT_MODEL_TASK_KERNEL, ctx is not null | |||
| if (fusion_op_info != nullptr) { | |||
| uint32_t op_num = fusion_op_info->original_op_names.size(); | |||
| uint32_t task_id = task->GetTaskID(); | |||
| if (op_num > 0) { | |||
| GELOGI("task.id = %u, opNum = %u", task_id, op_num); | |||
| op_id_map.insert(std::make_pair(fusion_op_info->op_index, task_id)); | |||
| } | |||
| } | |||
| } | |||
| struct memoryInfo { | |||
| int64_t input_size; | |||
| int64_t output_size; | |||
| int64_t weight_size; | |||
| int64_t workspace_size; | |||
| int64_t total_size; | |||
| memoryInfo() : input_size(0), output_size(0), weight_size(0), workspace_size(0), total_size(0) {} | |||
| }; | |||
| using CIT = std::multimap<uint32_t, uint32_t>::const_iterator; | |||
| using Range = std::pair<CIT, CIT>; | |||
| for (int32_t i = 0; i < task_num; i++) { | |||
| auto task = task_list_[i]; | |||
| GE_CHECK_NOTNULL(task); | |||
| auto fusion_op_info = task->GetFusionOpInfo(); | |||
| if (fusion_op_info != nullptr && fusion_op_info->original_op_names.size() > 0) { | |||
| uint32_t task_id = task->GetTaskID(); | |||
| uint32_t op_num = fusion_op_info->original_op_names.size(); | |||
| uint32_t task_count = 0; | |||
| if (task_id_set.count(task_id) != 0) { | |||
| continue; | |||
| } | |||
| uint32_t op_id = fusion_op_info->op_index; | |||
| Range range = op_id_map.equal_range(op_id); | |||
| for (CIT range_idx = range.first; range_idx != range.second; ++range_idx) { | |||
| task_count++; | |||
| uint32_t task_id = range_idx->second; | |||
| task_id_set.insert(task_id); | |||
| } | |||
| // op name after fusion | |||
| string fusion_op_name = fusion_op_info->op_name; | |||
| int32_t fusion_op_name_len = fusion_op_name.size() == 0 ? 1 : fusion_op_name.size(); | |||
| reporter_data.data = (unsigned char *)&fusion_op_name_len; | |||
| reporter_data.dataLen = sizeof(int32_t); | |||
| GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, | |||
| "Reporter data fail, model id:%u.", this->Id()); | |||
| reporter_data.data = (unsigned char *)fusion_op_name.c_str(); | |||
| reporter_data.dataLen = fusion_op_name_len; | |||
| GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, | |||
| "Reporter data fail, model id:%u.", this->Id()); | |||
| // original op name before fusion | |||
| reporter_data.data = (unsigned char *)&op_num; | |||
| reporter_data.dataLen = sizeof(int32_t); | |||
| GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, | |||
| "Reporter data fail, model id:%u.", this->Id()); | |||
| for (uint32_t k = 0; k < op_num; k++) { | |||
| std::string op_name = fusion_op_info->original_op_names[k]; | |||
| int32_t op_name_len = op_name.size() == 0 ? 1 : op_name.size(); | |||
| reporter_data.data = (unsigned char *)&op_name_len; | |||
| reporter_data.dataLen = sizeof(int32_t); | |||
| GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, | |||
| "Reporter data fail, model id:%u.", this->Id()); | |||
| reporter_data.data = (unsigned char *)op_name.c_str(); | |||
| reporter_data.dataLen = op_name_len; | |||
| GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, | |||
| "Reporter data fail, model id:%u.", this->Id()); | |||
| } | |||
| // stream id info | |||
| uint32_t streamId = task->GetStreamId(); | |||
| reporter_data.data = (unsigned char *)&streamId; | |||
| for (const ProfileInfo &profile : profile_list_) { | |||
| // op name after fusion | |||
| string fusion_op_name = profile.fusion_info.op_name; | |||
| int32_t fusion_op_name_len = fusion_op_name.size() == 0 ? 1 : fusion_op_name.size(); | |||
| reporter_data.data = (unsigned char *)&fusion_op_name_len; | |||
| reporter_data.dataLen = sizeof(int32_t); | |||
| GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, | |||
| "Reporter data fail, model id:%u.", this->Id()); | |||
| reporter_data.data = (unsigned char *)fusion_op_name.c_str(); | |||
| reporter_data.dataLen = fusion_op_name_len; | |||
| GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, | |||
| "Reporter data fail, model id:%u.", this->Id()); | |||
| // original op name before fusion | |||
| uint32_t op_num = profile.fusion_info.original_op_names.size(); | |||
| reporter_data.data = (unsigned char *)&op_num; | |||
| reporter_data.dataLen = sizeof(int32_t); | |||
| GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, | |||
| "Reporter data fail, model id:%u.", this->Id()); | |||
| for (uint32_t k = 0; k < op_num; k++) { | |||
| std::string op_name = profile.fusion_info.original_op_names[k]; | |||
| int32_t op_name_len = op_name.size() == 0 ? 1 : op_name.size(); | |||
| reporter_data.data = (unsigned char *)&op_name_len; | |||
| reporter_data.dataLen = sizeof(int32_t); | |||
| GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, | |||
| "Reporter data fail, model id:%u.", this->Id()); | |||
| // memory info | |||
| struct memoryInfo memory_info; | |||
| uint32_t op_index = fusion_op_info->op_index; | |||
| auto iter = op_list_.find(op_index); | |||
| GE_CHK_BOOL_EXEC(iter != op_list_.end(), return FAILED, "index is out of range, index: %u", op_index); | |||
| auto op_desc = iter->second; | |||
| memory_info.input_size = SumSize(ModelUtils::GetInputSize(op_desc)); | |||
| memory_info.output_size = SumSize(ModelUtils::GetOutputSize(op_desc)); | |||
| memory_info.workspace_size = SumSize(ModelUtils::GetWorkspaceSize(op_desc)); | |||
| memory_info.weight_size = SumSize(ModelUtils::GetWeightSize(op_desc)); | |||
| memory_info.total_size = | |||
| memory_info.weight_size + memory_info.input_size + memory_info.output_size + memory_info.workspace_size; | |||
| reporter_data.data = (unsigned char *)&memory_info; | |||
| reporter_data.dataLen = sizeof(struct memoryInfo); | |||
| reporter_data.data = (unsigned char *)op_name.c_str(); | |||
| reporter_data.dataLen = op_name_len; | |||
| GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, | |||
| "Reporter data fail, model id:%u.", this->Id()); | |||
| } | |||
| // task info | |||
| reporter_data.data = (unsigned char *)&task_count; | |||
| // stream id info | |||
| uint32_t streamId = profile.fusion_info.stream_id; | |||
| reporter_data.data = (unsigned char *)&streamId; | |||
| reporter_data.dataLen = sizeof(int32_t); | |||
| GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, | |||
| "Reporter data fail, model id:%u.", this->Id()); | |||
| // memory info | |||
| reporter_data.data = (unsigned char *)&profile.memory_info; | |||
| reporter_data.dataLen = sizeof(profile.memory_info); | |||
| GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, | |||
| "Reporter data fail, model id:%u.", this->Id()); | |||
| // task info | |||
| reporter_data.data = (unsigned char *)&profile.task_count; | |||
| reporter_data.dataLen = sizeof(uint32_t); | |||
| GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, | |||
| "Reporter data fail, model id:%u.", this->Id()); | |||
| Range task_range = op_id_map_.equal_range(profile.fusion_info.op_index); | |||
| for (CIT idx = task_range.first; idx != task_range.second; ++idx) { | |||
| uint32_t task_id = idx->second; | |||
| reporter_data.data = (unsigned char *)&task_id; | |||
| reporter_data.dataLen = sizeof(uint32_t); | |||
| GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, | |||
| "Reporter data fail, model id:%u.", this->Id()); | |||
| Range task_range = op_id_map.equal_range(op_id); | |||
| for (CIT idx = task_range.first; idx != task_range.second; ++idx) { | |||
| uint32_t task_id = idx->second; | |||
| reporter_data.data = (unsigned char *)&task_id; | |||
| reporter_data.dataLen = sizeof(uint32_t); | |||
| GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, | |||
| "Reporter data fail, model id:%u.", this->Id()); | |||
| } | |||
| } | |||
| } | |||
| return SUCCESS; | |||
| } | |||
| @@ -3264,27 +3097,20 @@ void DavinciModel::SetZeroCopyAddr(const OpDescPtr &op_desc, const std::vector<v | |||
| for (auto &input_outside_addrs : new_input_outside_addrs_) { | |||
| ZeroCopyOffset &input_outside = input_outside_addrs.second; | |||
| bool ret = input_outside.SetOutsideAddrsValue(zero_copy_task, outside_addrs[i], args, offset + i * kAddrLen); | |||
| if (ret) { | |||
| void *args_val = static_cast<uint8_t *>(args) + offset + i * kAddrLen; | |||
| SetBatchLabelAddr(op_desc, reinterpret_cast<uintptr_t>(args_val)); | |||
| } | |||
| input_outside.SetOutsideAddrsValue(zero_copy_task, outside_addrs[i], args, offset + i * kAddrLen); | |||
| } | |||
| for (auto &output_outside_addrs : new_output_outside_addrs_) { | |||
| ZeroCopyOffset &output_outside = output_outside_addrs.second; | |||
| bool ret = output_outside.SetOutsideAddrsValue(zero_copy_task, outside_addrs[i], args, offset + i * kAddrLen); | |||
| if (ret) { | |||
| void *args_val = static_cast<uint8_t *>(args) + offset + i * kAddrLen; | |||
| SetBatchLabelAddr(op_desc, reinterpret_cast<uintptr_t>(args_val)); | |||
| } | |||
| output_outside.SetOutsideAddrsValue(zero_copy_task, outside_addrs[i], args, offset + i * kAddrLen); | |||
| } | |||
| } | |||
| auto it = zero_copy_op_id_batch_label_.find(op_desc->GetId()); | |||
| if (it == zero_copy_op_id_batch_label_.end()) { | |||
| string batch_label; | |||
| if (!AttrUtils::GetStr(op_desc, ATTR_NAME_BATCH_LABEL, batch_label) || batch_label.empty()) { | |||
| zero_copy_task.SetBatchLabel(kDefaultBatchLable); | |||
| } else { | |||
| zero_copy_task.SetBatchLabel(it->second); | |||
| zero_copy_task.SetBatchLabel(batch_label); | |||
| } | |||
| std::lock_guard<std::mutex> lock(outside_addrs_mutex_); | |||
| @@ -3294,27 +3120,6 @@ void DavinciModel::SetZeroCopyAddr(const OpDescPtr &op_desc, const std::vector<v | |||
| } | |||
| } | |||
| void DavinciModel::SetBatchLabelAddr(const OpDescPtr &op_desc, uintptr_t addr) { | |||
| // Establish a mapping between batch label and zero copy address for multi-batch scenes | |||
| auto it = zero_copy_op_id_batch_label_.find(op_desc->GetId()); | |||
| if (it == zero_copy_op_id_batch_label_.end()) { | |||
| return; | |||
| } | |||
| const string &batch_label = it->second; | |||
| auto iter = zero_copy_batch_label_addrs_.find(batch_label); | |||
| if (iter != zero_copy_batch_label_addrs_.end()) { | |||
| iter->second.insert(addr); | |||
| GELOGD("[ZCPY] Set zero copy batch label and addrs success, batch label: %s, op name:%s.", batch_label.c_str(), | |||
| op_desc->GetName().c_str()); | |||
| } else { | |||
| set<uintptr_t> addrs = {addr}; | |||
| zero_copy_batch_label_addrs_.emplace(pair<string, set<uintptr_t>>(batch_label, addrs)); | |||
| GELOGD("[ZCPY] New added zero copy batch label and addrs success, batch label: %s, op name:%s.", | |||
| batch_label.c_str(), op_desc->GetName().c_str()); | |||
| } | |||
| } | |||
| /// | |||
| /// @ingroup ge | |||
| /// @brief Copy Check input size and model op size. | |||
| @@ -3448,15 +3253,15 @@ Status DavinciModel::UpdateIoTaskArgs(const std::map<uint32_t, ZeroCopyOffset> & | |||
| void *addr = data.second.GetDataInfo().at(count).second; | |||
| void *buffer_addr = reinterpret_cast<void *>(reinterpret_cast<uintptr_t>(buffer.data) + | |||
| data.second.GetRelativeOffset().at(count)); | |||
| GELOGI("[ZCPY] Copy %s blobs_index %u, virtual_addr: %p, size: %ld, user_data_addr: %p", input_or_output.c_str(), | |||
| data.first, addr, size, buffer_addr); | |||
| GELOGI("[ZCPY] Copy %s blobs_index %u, virtual_addr: %p, size: %ld, user_data_addr: %p, batch_label: %s", | |||
| input_or_output.c_str(), data.first, addr, size, buffer_addr, batch_label.c_str()); | |||
| // For input data, just copy for rts task. | |||
| for (ZeroCopyTask &task : zero_copy_tasks_) { | |||
| if (task.GetBatchLabel() != kDefaultBatchLable && task.GetBatchLabel() != batch_label) { | |||
| continue; | |||
| } | |||
| uintptr_t addr_val = reinterpret_cast<uintptr_t>(addr); | |||
| if (task.UpdateTaskParam(addr_val, buffer_addr, zero_copy_batch_label_addrs_, batch_label) != SUCCESS) { | |||
| if (task.UpdateTaskParam(addr_val, buffer_addr) != SUCCESS) { | |||
| return FAILED; | |||
| } | |||
| } | |||
| @@ -3818,9 +3623,6 @@ Status DavinciModel::NnExecute(rtStream_t stream, bool async_mode, const InputDa | |||
| GELOGD("Model Run begin, model id:%u, data index:%u, flag:%d.", model_id_, input_data.index, is_async_mode_); | |||
| GE_CHK_STATUS_RET(InitModelStream(stream), "Init model stream failed."); | |||
| is_dynamic_ = input_data.is_dynamic_batch; | |||
| if (!is_dynamic_) { | |||
| zero_copy_batch_label_addrs_.clear(); | |||
| } | |||
| GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), SetProfileTime(MODEL_PRE_PROC_START)); | |||
| Status ret = CopyModelData(input_data, output_data, is_dynamic_); | |||
| @@ -76,6 +76,20 @@ struct timeInfo { | |||
| int64_t dumpEndTime; | |||
| }; | |||
| struct TaskMemInfo { | |||
| int64_t input_size{0}; | |||
| int64_t output_size{0}; | |||
| int64_t weight_size{0}; | |||
| int64_t workspace_size{0}; | |||
| int64_t total_size{0}; | |||
| }; | |||
| struct ProfileInfo { | |||
| FusionOpInfo fusion_info; | |||
| TaskMemInfo memory_info; | |||
| uint32_t task_count{0}; | |||
| }; | |||
| enum ExecuteMode { | |||
| INITIALIZATION, | |||
| SYNCHRONIZATION, | |||
| @@ -226,8 +240,6 @@ class DavinciModel { | |||
| const vector<OpDescPtr> &GetDataList() const { return data_op_list_; } | |||
| // get Op | |||
| const map<uint32_t, OpDescPtr> &GetOpList() const { return op_list_; } | |||
| OpDescPtr GetOpByIndex(uint32_t index) const { | |||
| if (op_list_.find(index) == op_list_.end()) { | |||
| return nullptr; | |||
| @@ -436,10 +448,6 @@ class DavinciModel { | |||
| int64_t GetLoadEndTime() { return load_end_time_; } | |||
| Status SinkModelProfile(); | |||
| Status SinkTimeProfile(const InputData ¤t_data); | |||
| Status ReportProfilingData(); | |||
| void SaveDumpOpInfo(const RuntimeParam &model_param, const OpDescPtr &op, uint32_t task_id, uint32_t stream_id) { | |||
| @@ -537,15 +545,6 @@ class DavinciModel { | |||
| struct timeInfo time_info_; | |||
| int32_t dataInputTid; | |||
| /// | |||
| /// @ingroup ge | |||
| /// @brief Save Batch label Info. | |||
| /// @param [in] const OpDescPtr &op_desc | |||
| /// @param [in] uintptr_t addr: address value in args block. | |||
| /// @return None. | |||
| /// | |||
| void SetBatchLabelAddr(const OpDescPtr &op_desc, uintptr_t addr); | |||
| /// | |||
| /// @ingroup ge | |||
| /// @brief Copy Check input size and model op size. | |||
| @@ -657,14 +656,6 @@ class DavinciModel { | |||
| /// | |||
| void AdjustDataOpList(const map<uint32_t, OpDescPtr> &data_by_index); | |||
| /// | |||
| /// @ingroup ge | |||
| /// @brief input zero copy node Initialize. | |||
| /// @param [in] NodePtr: Data Op. | |||
| /// @return Status | |||
| /// | |||
| Status InitInputZeroCopyNodes(const NodePtr &node); | |||
| /// | |||
| /// @ingroup ge | |||
| /// @brief NetOutput Op Initialize. | |||
| @@ -673,30 +664,6 @@ class DavinciModel { | |||
| /// | |||
| Status InitNetOutput(const NodePtr &node); | |||
| /// | |||
| /// @ingroup ge | |||
| /// @brief output zero copy node Initialize. | |||
| /// @param [in] NodePtr: Data Op. | |||
| /// @return Status | |||
| /// | |||
| Status InitOutputZeroCopyNodes(const NodePtr &node); | |||
| /// | |||
| /// @ingroup ge | |||
| /// @brief input zero copy node Initialize for Case. | |||
| /// @param [in] NodePtr: Data Op. | |||
| /// @return Status | |||
| /// | |||
| Status InitInputBatchLabel(const NodePtr &node); | |||
| /// | |||
| /// @ingroup ge | |||
| /// @brief output zero copy node Initialize for Case. | |||
| /// @param [in] NodePtr: netoutput Op. | |||
| /// @return Status | |||
| /// | |||
| Status InitOutputBatchLabel(const NodePtr &node); | |||
| /// | |||
| /// @ingroup ge | |||
| /// @brief Constant Op Init. | |||
| @@ -845,6 +812,11 @@ class DavinciModel { | |||
| void SetDataDumperArgs(const ComputeGraphPtr &compute_graph); | |||
| Status InitModelProfile(); | |||
| Status SinkModelProfile(); | |||
| Status SinkTimeProfile(const InputData ¤t_data); | |||
| Status GenOutputTensorInfo(const OpDescPtr &op_desc, uint32_t data_index, OutputData *output_data, | |||
| std::vector<ge::OutputTensorInfo> &outputs); | |||
| @@ -922,11 +894,6 @@ class DavinciModel { | |||
| std::vector<ZeroCopyTask> zero_copy_tasks_; // Task used Data or NetOutput addr. | |||
| std::set<const void *> copy_only_addrs_; // Address need copy to original place. | |||
| // {op_id, batch_label} | |||
| std::map<int64_t, std::string> zero_copy_op_id_batch_label_; | |||
| // {batch_label, addrs} | |||
| std::map<std::string, std::set<uintptr_t>> zero_copy_batch_label_addrs_; | |||
| std::vector<TaskInfoPtr> task_list_; | |||
| // rt_moodel_handle | |||
| rtModel_t rt_model_handle_; | |||
| @@ -1026,6 +993,9 @@ class DavinciModel { | |||
| // key: input_index: input is merge node; value: each gear info and each output shape | |||
| std::map<size_t, std::map<vector<int64_t>, vector<int64_t>>> merge_nodes_gear_and_real_out_shape_info_; | |||
| std::vector<std::vector<int64_t>> all_gears_info_; | |||
| std::multimap<uint32_t, uint32_t> op_id_map_; | |||
| std::vector<ProfileInfo> profile_list_; | |||
| }; | |||
| } // namespace ge | |||
| #endif // GE_GRAPH_LOAD_NEW_MODEL_MANAGER_DAVINCI_MODEL_H_ | |||
| @@ -89,6 +89,7 @@ Status ModelManager::KernelLaunchEx(aicpu::FWKAdapter::FWKOperateType op_type, u | |||
| if (op_type == aicpu::FWKAdapter::FWKOperateType::FWK_ADPT_KERNEL_DESTROY) { | |||
| std::vector<uint64_t> v_aicpu_kernel; | |||
| std::string model_key = std::to_string(session_id) + "_" + std::to_string(model_id); | |||
| std::lock_guard<std::recursive_mutex> lock(map_mutex_); | |||
| auto iter = model_aicpu_kernel_.find(model_key); | |||
| if (iter != model_aicpu_kernel_.end()) { | |||
| GELOGD("kernel destroy session_id %lu, model_id %u.", session_id, model_id); | |||
| @@ -176,7 +177,7 @@ Status ModelManager::KernelLaunchEx(aicpu::FWKAdapter::FWKOperateType op_type, u | |||
| } | |||
| void ModelManager::DestroyAicpuSession(uint64_t session_id) { | |||
| std::lock_guard<std::mutex> lock(sess_ids_mutex_); | |||
| std::lock_guard<std::recursive_mutex> lock(map_mutex_); | |||
| auto it = sess_ids_.find(session_id); | |||
| if (it == sess_ids_.end()) { | |||
| GELOGI("The session: %lu not created.", session_id); | |||
| @@ -205,7 +206,7 @@ void ModelManager::DestroyAicpuSession(uint64_t session_id) { | |||
| } | |||
| ge::Status ModelManager::DestroyAicpuSessionForInfer(uint32_t model_id) { | |||
| std::lock_guard<std::mutex> lock(map_mutex_); | |||
| std::lock_guard<std::recursive_mutex> lock(map_mutex_); | |||
| auto hybrid_davinci_model = hybrid_model_map_.find(model_id); | |||
| if (hybrid_davinci_model != hybrid_model_map_.end()) { | |||
| uint64_t session_id = hybrid_davinci_model->second->GetSessionId(); | |||
| @@ -215,8 +216,8 @@ ge::Status ModelManager::DestroyAicpuSessionForInfer(uint32_t model_id) { | |||
| auto it = model_map_.find(model_id); | |||
| if (it == model_map_.end()) { | |||
| GELOGE(GE_EXEC_MODEL_ID_INVALID, "model id %u does not exists.", model_id); | |||
| return GE_EXEC_MODEL_ID_INVALID; | |||
| GELOGE(ACL_ERROR_GE_EXEC_MODEL_ID_INVALID, "model id %u does not exists.", model_id); | |||
| return ACL_ERROR_GE_EXEC_MODEL_ID_INVALID; | |||
| } | |||
| uint64_t session_id = it->second->GetSessionId(); | |||
| DestroyAicpuSession(session_id); | |||
| @@ -225,7 +226,7 @@ ge::Status ModelManager::DestroyAicpuSessionForInfer(uint32_t model_id) { | |||
| ge::Status ModelManager::DestroyAicpuKernel(uint64_t session_id, uint32_t model_id) { | |||
| GELOGD("destroy aicpu kernel in session_id %lu, model_id %u.", session_id, model_id); | |||
| std::lock_guard<std::mutex> lock(map_mutex_); | |||
| std::lock_guard<std::recursive_mutex> lock(map_mutex_); | |||
| std::string model_key = std::to_string(session_id) + "_" + std::to_string(model_id); | |||
| if (model_aicpu_kernel_.find(model_key) != model_aicpu_kernel_.end()) { | |||
| Status ret = KernelLaunchEx(aicpu::FWKAdapter::FWKOperateType::FWK_ADPT_KERNEL_DESTROY, session_id, model_id); | |||
| @@ -238,7 +239,7 @@ ge::Status ModelManager::DestroyAicpuKernel(uint64_t session_id, uint32_t model_ | |||
| } | |||
| ge::Status ModelManager::CreateAicpuKernel(uint64_t session_id, uint32_t model_id, uint64_t kernel_id) { | |||
| std::lock_guard<std::mutex> lock(map_mutex_); | |||
| std::lock_guard<std::recursive_mutex> lock(map_mutex_); | |||
| std::vector<uint64_t> v_aicpu_kernel; | |||
| std::string model_key = std::to_string(session_id) + "_" + std::to_string(model_id); | |||
| if (model_aicpu_kernel_.find(model_key) != model_aicpu_kernel_.end()) { | |||
| @@ -250,7 +251,7 @@ ge::Status ModelManager::CreateAicpuKernel(uint64_t session_id, uint32_t model_i | |||
| } | |||
| ModelManager::~ModelManager() { | |||
| std::lock_guard<std::mutex> lock(map_mutex_); | |||
| std::lock_guard<std::recursive_mutex> lock(map_mutex_); | |||
| model_map_.clear(); | |||
| model_aicpu_kernel_.clear(); | |||
| cust_aicpu_so_.clear(); | |||
| @@ -358,18 +359,18 @@ Status ModelManager::LoadModelOnline(uint32_t &model_id, const shared_ptr<ge::Ge | |||
| void ModelManager::InsertModel(uint32_t id, std::shared_ptr<DavinciModel> &davinci_model) { | |||
| GE_CHK_BOOL_EXEC(davinci_model != nullptr, return, "davinci_model ptr is null, id: %u", id); | |||
| std::lock_guard<std::mutex> lock(map_mutex_); | |||
| std::lock_guard<std::recursive_mutex> lock(map_mutex_); | |||
| model_map_[id] = davinci_model; | |||
| } | |||
| void ModelManager::InsertModel(uint32_t id, shared_ptr<hybrid::HybridDavinciModel> &hybrid_model) { | |||
| GE_CHK_BOOL_EXEC(hybrid_model != nullptr, return, "hybrid_model ptr is null, id: %u", id); | |||
| std::lock_guard<std::mutex> lock(map_mutex_); | |||
| std::lock_guard<std::recursive_mutex> lock(map_mutex_); | |||
| hybrid_model_map_[id] = hybrid_model; | |||
| } | |||
| Status ModelManager::DeleteModel(uint32_t id) { | |||
| std::lock_guard<std::mutex> lock(map_mutex_); | |||
| std::lock_guard<std::recursive_mutex> lock(map_mutex_); | |||
| auto it = model_map_.find(id); | |||
| auto hybrid_model_it = hybrid_model_map_.find(id); | |||
| @@ -384,22 +385,22 @@ Status ModelManager::DeleteModel(uint32_t id) { | |||
| } else if (hybrid_model_it != hybrid_model_map_.end()) { | |||
| (void)hybrid_model_map_.erase(hybrid_model_it); | |||
| } else { | |||
| GELOGE(GE_EXEC_MODEL_ID_INVALID, "model id %u does not exists.", id); | |||
| return GE_EXEC_MODEL_ID_INVALID; | |||
| GELOGE(ACL_ERROR_GE_EXEC_MODEL_ID_INVALID, "model id %u does not exists.", id); | |||
| return ACL_ERROR_GE_EXEC_MODEL_ID_INVALID; | |||
| } | |||
| return SUCCESS; | |||
| } | |||
| std::shared_ptr<DavinciModel> ModelManager::GetModel(uint32_t id) { | |||
| std::lock_guard<std::mutex> lock(map_mutex_); | |||
| std::lock_guard<std::recursive_mutex> lock(map_mutex_); | |||
| auto it = model_map_.find(id); | |||
| return (it == model_map_.end()) ? nullptr : it->second; | |||
| } | |||
| std::shared_ptr<hybrid::HybridDavinciModel> ModelManager::GetHybridModel(uint32_t id) { | |||
| std::lock_guard<std::mutex> lock(map_mutex_); | |||
| std::lock_guard<std::recursive_mutex> lock(map_mutex_); | |||
| auto it = hybrid_model_map_.find(id); | |||
| return (it == hybrid_model_map_.end()) ? nullptr : it->second; | |||
| @@ -902,7 +903,7 @@ Status ModelManager::GetInputOutputDescInfo(const uint32_t model_id, vector<Inpu | |||
| } | |||
| std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id); | |||
| GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, GE_EXEC_MODEL_ID_INVALID, | |||
| GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, ACL_ERROR_GE_EXEC_MODEL_ID_INVALID, | |||
| "GetInputOutputDescInfo Failed, Invalid model id %u!", model_id); | |||
| davinci_model->SetModelDescVersion(new_model_desc); | |||
| @@ -970,8 +971,9 @@ Status ModelManager::GetUserDesignateShapeOrder(const uint32_t model_id, | |||
| } | |||
| Status ModelManager::GetCurShape(const uint32_t model_id, std::vector<int64_t> &batch_info, int32_t &dynamic_type) { | |||
| std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id); | |||
| GE_CHECK_NOTNULL(davinci_model); | |||
| auto davinci_model = GetModel(model_id); | |||
| GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, ACL_ERROR_GE_EXEC_MODEL_ID_INVALID, | |||
| "GetCurShape Failed, Invalid Model ID %u!", model_id); | |||
| davinci_model->GetCurShape(batch_info, dynamic_type); | |||
| return SUCCESS; | |||
| } | |||
| @@ -984,7 +986,8 @@ Status ModelManager::GetModelAttr(uint32_t model_id, std::vector<string> &dynami | |||
| } | |||
| std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id); | |||
| GE_CHECK_NOTNULL(davinci_model); | |||
| GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, ACL_ERROR_GE_EXEC_MODEL_ID_INVALID, | |||
| "GetModelAttr Failed, Invalid Model ID %u!", model_id); | |||
| davinci_model->GetModelAttr(dynamic_output_shape_info); | |||
| return SUCCESS; | |||
| } | |||
| @@ -994,9 +997,8 @@ Status ModelManager::GetInputOutputDescInfoForZeroCopy(const uint32_t model_id, | |||
| std::vector<uint32_t> &inputFormats, | |||
| std::vector<uint32_t> &outputFormats) { | |||
| std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id); | |||
| GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, PARAM_INVALID, "GetInputOutputDescInfo Failed, Invalid model id %u!", | |||
| model_id); | |||
| GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, ACL_ERROR_GE_EXEC_MODEL_ID_INVALID, | |||
| "GetInputOutputDescInfo Failed, Invalid model id %u!", model_id); | |||
| return davinci_model->GetInputOutputDescInfoForZeroCopy(input_desc, output_desc, inputFormats, outputFormats); | |||
| } | |||
| @@ -1011,18 +1013,14 @@ Status ModelManager::GetInputOutputDescInfoForZeroCopy(const uint32_t model_id, | |||
| Status ModelManager::GetAIPPInfo(const uint32_t model_id, uint32_t index, AippConfigInfo &aipp_info) { | |||
| std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id); | |||
| GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, ACL_ERROR_GE_EXEC_MODEL_ID_INVALID, | |||
| "GetAIPPInfo failed, invalid model_id is %u.", | |||
| model_id); | |||
| "GetAIPPInfo failed, invalid model_id is %u.", model_id); | |||
| return davinci_model->GetAIPPInfo(index, aipp_info); | |||
| } | |||
| Status ModelManager::GetAippType(uint32_t model_id, uint32_t index, InputAippType &type, size_t &aipp_index) { | |||
| std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id); | |||
| GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, ACL_ERROR_GE_EXEC_MODEL_ID_INVALID, | |||
| "GetAIPPInfo failed, invalid model_id is %u.", | |||
| model_id); | |||
| "GetAIPPInfo failed, invalid model_id is %u.", model_id); | |||
| return davinci_model->GetAippType(index, type, aipp_index); | |||
| } | |||
| @@ -1059,8 +1057,7 @@ Status ModelManager::LoadModelOffline(uint32_t &model_id, const ModelData &model | |||
| if (model_helper.GetModelType()) { | |||
| bool is_shape_unknown = false; | |||
| GE_CHK_STATUS_RET(model_helper.GetGeRootModel()->CheckIsUnknownShape(is_shape_unknown), | |||
| "CheckIsUnknownShape failed, model id:%u", | |||
| model_id); | |||
| "CheckIsUnknownShape failed, model id:%u", model_id); | |||
| if (is_shape_unknown || GetContext().GetHostExecFlag()) { | |||
| return DoLoadHybridModelOnline(model_id, model_helper.GetGeRootModel(), listener); | |||
| } | |||
| @@ -1078,8 +1075,8 @@ Status ModelManager::LoadModelOffline(uint32_t &model_id, const ModelData &model | |||
| GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Make shared failed"); | |||
| return ACL_ERROR_GE_MEMORY_ALLOCATION; | |||
| } catch (...) { | |||
| GELOGE(INTERNAL_ERROR, "Make shared failed since other exception raise"); | |||
| return INTERNAL_ERROR; | |||
| GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Make shared failed since other exception raise"); | |||
| return ACL_ERROR_GE_MEMORY_ALLOCATION; | |||
| } | |||
| ret = davinci_model->Assign(ge_model); | |||
| if (ret != SUCCESS) { | |||
| @@ -1091,7 +1088,7 @@ Status ModelManager::LoadModelOffline(uint32_t &model_id, const ModelData &model | |||
| int32_t device_id = 0; | |||
| rtError_t rt_ret = rtGetDevice(&device_id); | |||
| if (rt_ret != RT_ERROR_NONE || device_id < 0) { | |||
| GELOGE(RT_FAILED, "Call rtGetDevice failed, ret = 0x%X, device_id = %d.", rt_ret, device_id); | |||
| GELOGE(rt_ret, "Call rtGetDevice failed, ret = 0x%X, device_id = %d.", rt_ret, device_id); | |||
| return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
| } | |||
| davinci_model->SetDeviceId(device_id); | |||
| @@ -1246,7 +1243,7 @@ Status ModelManager::ExecuteModel(uint32_t model_id, rtStream_t stream, bool asy | |||
| } | |||
| Status ModelManager::CreateAicpuSession(uint64_t session_id) { | |||
| std::lock_guard<std::mutex> lock(sess_ids_mutex_); | |||
| std::lock_guard<std::recursive_mutex> lock(map_mutex_); | |||
| auto it = sess_ids_.find(session_id); | |||
| // never been created by any model | |||
| if (it == sess_ids_.end()) { | |||
| @@ -1465,8 +1462,7 @@ void ModelManager::GenModelId(uint32_t *id) { | |||
| if (id == nullptr) { | |||
| return; | |||
| } | |||
| std::lock_guard<std::mutex> lock(map_mutex_); | |||
| std::lock_guard<std::recursive_mutex> lock(map_mutex_); | |||
| *id = ++max_model_id_; | |||
| } | |||
| @@ -353,8 +353,7 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelManager { | |||
| std::map<uint32_t, std::shared_ptr<hybrid::HybridDavinciModel>> hybrid_model_map_; | |||
| std::map<std::string, std::vector<uint64_t>> model_aicpu_kernel_; | |||
| uint32_t max_model_id_; | |||
| std::mutex map_mutex_; | |||
| std::mutex sess_ids_mutex_; | |||
| std::recursive_mutex map_mutex_; | |||
| std::mutex session_id_create_mutex_; | |||
| static::std::mutex exeception_infos_mutex_; | |||
| uint64_t session_id_bias_; | |||
| @@ -90,20 +90,18 @@ Status KernelTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci | |||
| fusion_op_info_.op_index = context.op_index(); fusion_op_info_.original_op_names = original_op_names; | |||
| fusion_op_info_.op_name = op_desc_->GetName()); | |||
| string session_graph_model_id; | |||
| davinci_model_->GetUniqueId(op_desc_, session_graph_model_id); | |||
| // get bin_file_key | |||
| const char *bin_file_key = davinci_model_->GetRegisterStub(op_desc_->GetName(), session_graph_model_id); | |||
| // new aicpu kernel(rtCpuKernelLaunch) no need to check function | |||
| if (kernel_type_ == ccKernelType::CCE_AI_CORE) { | |||
| rtError_t rt_ret; | |||
| rt_ret = rtGetFunctionByName(const_cast<char *>(kernel_def.stub_func().c_str()), &stub_func_); | |||
| rtError_t rt_ret = rtGetFunctionByName(const_cast<char *>(kernel_def.stub_func().c_str()), &stub_func_); | |||
| GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "execute rtGetFunctionByName failed. stub_func: %s", | |||
| kernel_def.stub_func().c_str()); | |||
| return RT_ERROR_TO_GE_STATUS(rt_ret);); | |||
| } else if (kernel_type_ == ccKernelType::TE) { | |||
| rtError_t rt_ret; | |||
| rt_ret = rtGetFunctionByName(bin_file_key, &stub_func_); | |||
| // get bin_file_key | |||
| string session_graph_model_id; | |||
| davinci_model_->GetUniqueId(op_desc_, session_graph_model_id); | |||
| const char *bin_file_key = davinci_model_->GetRegisterStub(op_desc_->GetName(), session_graph_model_id); | |||
| rtError_t rt_ret = rtGetFunctionByName(bin_file_key, &stub_func_); | |||
| GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, | |||
| GELOGE(RT_FAILED, "execute rtGetFunctionByName failed. bin_file_key: %s", bin_file_key); | |||
| return RT_ERROR_TO_GE_STATUS(rt_ret);); | |||
| @@ -183,22 +183,18 @@ void ZeroCopyOffset::SetOutputOutsideAddrs(const int64_t &input_offset, const bo | |||
| addr_count_ = out_count; | |||
| } | |||
| bool ZeroCopyOffset::SetOutsideAddrsValue(ZeroCopyTask &zero_copy_task, void *outside_addr, void *args, size_t offset) { | |||
| void ZeroCopyOffset::SetOutsideAddrsValue(ZeroCopyTask &zero_copy_task, void *outside_addr, void *args, size_t offset) { | |||
| const auto addr_val = reinterpret_cast<uintptr_t>(outside_addr); | |||
| bool set_batch_label_flag = false; | |||
| for (uint32_t out_count = 0; out_count < GetAddrCount(); ++out_count) { | |||
| auto &addrs_mapping_list = GetOutsideAddrs(); | |||
| auto args_addrs = addrs_mapping_list[out_count].find(outside_addr); | |||
| if (args_addrs != addrs_mapping_list[out_count].end()) { | |||
| auto args_addrs = outside_addrs_[out_count].find(outside_addr); | |||
| if (args_addrs != outside_addrs_[out_count].end()) { | |||
| GE_CHK_STATUS(zero_copy_task.SetTaskArgsOffset(addr_val, offset), "Input args invalid."); | |||
| void *args_val = static_cast<uint8_t *>(args) + offset; | |||
| args_addrs->second.push_back(args_val); | |||
| GELOGD("[ZCPY] set copy input: virtual_addr: 0x%lx, task_addr: %p, args: %p, offset: %zu.", addr_val, args_val, | |||
| args, offset); | |||
| set_batch_label_flag = true; | |||
| } | |||
| } | |||
| return set_batch_label_flag; | |||
| } | |||
| } // namespace ge | |||
| @@ -51,7 +51,7 @@ class ZeroCopyOffset { | |||
| const OpDescPtr &op_desc, const size_t &idx, bool &fusion_flag); | |||
| void SetOutputOutsideAddrs(const int64_t &input_offset, const bool &fusion_flag, void *addr, | |||
| std::vector<void *> &tensor_addrs); | |||
| bool SetOutsideAddrsValue(ZeroCopyTask &zero_copy_task, void *outside_addr, void *args, size_t offset); | |||
| void SetOutsideAddrsValue(ZeroCopyTask &zero_copy_task, void *outside_addr, void *args, size_t offset); | |||
| // basic_addr of l2-fusion | |||
| void *GetBasicAddr() const { return basic_addr_; } | |||
| @@ -22,8 +22,6 @@ | |||
| #include "common/ge_compiler_options.h" | |||
| namespace ge { | |||
| const char *const kDefaultBatchLable = "Batch_default"; | |||
| ZeroCopyTask::ZeroCopyTask(const string &name, uint8_t *args, size_t size) | |||
| : name_(name), args_addr_(args), args_size_(size), is_updated_(false) {} | |||
| @@ -66,68 +64,23 @@ void ZeroCopyTask::SetOriginalArgs(const void *info, size_t size) { | |||
| const uint8_t *data = static_cast<const uint8_t *>(info); | |||
| args_info_.assign(data, data + size); | |||
| GELOGI("[ZCPY] %s set info from virtual_addr: %p, args_addr: %p, args size: %zu, info size: %zu", name_.c_str(), info, | |||
| GELOGI("[ZCPY] %s set original args info: %p, args_addr: %p, args size: %zu, info size: %zu", name_.c_str(), info, | |||
| args_addr_, args_size_, size); | |||
| } | |||
| /** | |||
| * @ingroup ge | |||
| * @brief Check is dynamic batch node. | |||
| * @param [in] addr: virtual address value from Op. | |||
| * @param [in] data: data buffer from user. | |||
| * @param [in] batch_addrs: dynamic batch addr info. | |||
| * @param [in] batch_label: batch label. | |||
| * @return: true / false | |||
| */ | |||
| bool ZeroCopyTask::CheckDynamicBatch(const map<string, set<uintptr_t>> &batch_addrs, const string &batch_label, | |||
| uintptr_t addr) { | |||
| // Used for dynamic batch / resolution scene | |||
| set<uintptr_t> dynamic_input_addrs; | |||
| auto dynamic_input_iter = batch_addrs.find(batch_label); | |||
| if (dynamic_input_iter != batch_addrs.end()) { | |||
| dynamic_input_addrs = dynamic_input_iter->second; | |||
| } | |||
| set<uintptr_t> fix_input_addrs; | |||
| auto fix_input_iter = batch_addrs.find(kDefaultBatchLable); | |||
| if (fix_input_iter != batch_addrs.end()) { | |||
| fix_input_addrs = fix_input_iter->second; | |||
| } | |||
| if (fix_input_addrs.empty()) { | |||
| if (!dynamic_input_addrs.empty() && dynamic_input_addrs.find(addr) == dynamic_input_addrs.end()) { | |||
| return false; | |||
| } | |||
| } else { | |||
| if (!dynamic_input_addrs.empty() && dynamic_input_addrs.find(addr) == dynamic_input_addrs.end() && | |||
| fix_input_addrs.find(addr) == fix_input_addrs.end()) { | |||
| return false; | |||
| } | |||
| } | |||
| return true; | |||
| } | |||
| /** | |||
| * @ingroup ge | |||
| * @brief Set user data addr to Task param. | |||
| * @param [in] addr: virtual address value from Op. | |||
| * @param [in] buffer_addr: real_data_buffer_addr from user. | |||
| * @param [in] batch_addrs: dynamic batch addr info. | |||
| * @param [in] batch_label: batch label. | |||
| * @return: void | |||
| */ | |||
| Status ZeroCopyTask::UpdateTaskParam(uintptr_t addr, void *buffer_addr, const map<string, set<uintptr_t>> &batch_addrs, | |||
| const string &batch_label) { | |||
| Status ZeroCopyTask::UpdateTaskParam(uintptr_t addr, void *buffer_addr) { | |||
| auto iter = task_addr_offset_.find(addr); | |||
| if (iter != task_addr_offset_.end()) { | |||
| auto &cur_pair = *iter; | |||
| uint8_t *args_info = args_info_.data(); | |||
| for (auto offset : cur_pair.second) { | |||
| if (!CheckDynamicBatch(batch_addrs, batch_label, reinterpret_cast<uintptr_t>(args_addr_ + offset))) { | |||
| continue; | |||
| } | |||
| auto dst_addr = static_cast<uint8_t *>(buffer_addr); | |||
| GELOGI("[ZCPY] %s update task, args_addr: %p, size: %zu, offset: %zu, virtual_addr: 0x%lx, user_data_addr: %p", | |||
| name_.c_str(), args_addr_, args_size_, offset, addr, buffer_addr); | |||
| @@ -67,12 +67,9 @@ class ZeroCopyTask { | |||
| * @brief Set user data addr to Task param. | |||
| * @param [in] addr: virtual address value from Op. | |||
| * @param [in] buffer_addr: data buffer_addr from user. | |||
| * @param [in] batch_addrs: dynamic batch addr info. | |||
| * @param [in] batch_label: batch label. | |||
| * @return: 0 SUCCESS / others FAILED | |||
| */ | |||
| ge::Status UpdateTaskParam(uintptr_t addr, void *buffer_addr, const map<string, set<uintptr_t>> &batch_addrs, | |||
| const string &batch_label); | |||
| ge::Status UpdateTaskParam(uintptr_t addr, void *buffer_addr); | |||
| /** | |||
| * @ingroup ge | |||
| @@ -91,9 +88,6 @@ class ZeroCopyTask { | |||
| return batch_label_; | |||
| } | |||
| protected: | |||
| bool CheckDynamicBatch(const map<string, set<uintptr_t>> &batch_addrs, const string &batch_label, uintptr_t addr); | |||
| private: | |||
| const string name_; | |||
| @@ -56,6 +56,7 @@ | |||
| #include "graph/passes/iterator_op_pass.h" | |||
| #include "graph/passes/link_gen_mask_nodes_pass.h" | |||
| #include "graph/passes/mark_graph_unknown_status_pass.h" | |||
| #include "graph/passes/dynamic_single_op_reset_shape_pass.h" | |||
| #include "graph/passes/merge_pass.h" | |||
| #include "graph/passes/merge_input_memcpy_pass.h" | |||
| #include "graph/passes/merge_to_stream_merge_pass.h" | |||
| @@ -631,11 +632,22 @@ Status GraphManager::ReplaceSubgraphWithOriGraph(const ComputeGraphPtr &compute_ | |||
| Status GraphManager::SetSubgraph(uint64_t session_id, ComputeGraphPtr compute_graph, GraphPartitioner &partitioner) { | |||
| GE_CHECK_NOTNULL(compute_graph); | |||
| PassManager pass_for_dynamic_shape_reset_optimize; | |||
| GE_CHK_STATUS_RET(pass_for_dynamic_shape_reset_optimize.AddPass( | |||
| "SetSubgraph::AfterSetSubgraph::DynamicSingleOpResetShapePass", new (std::nothrow) DynamicSingleOpResetShapePass)) | |||
| GE_TIMESTAMP_START(pass_for_dynamic_shape_reset_optimize); | |||
| Status ret = pass_for_dynamic_shape_reset_optimize.Run(compute_graph); | |||
| GE_TIMESTAMP_END(pass_for_dynamic_shape_reset_optimize, "SetSubgraph::AfterSetSubgraph"); | |||
| if (ret != SUCCESS && ret != NOT_CHANGED) { | |||
| GELOGE(ret, "Run passes when optimize subgraph failed"); | |||
| return ret; | |||
| } | |||
| auto sub_graph_map = partitioner.GetSubGraphMap(); | |||
| GELOGD("Directly optimize subgraph with build mode:%s, and step:%s.", | |||
| options_.build_mode.c_str(), | |||
| options_.build_step.c_str()); | |||
| Status ret = OptimizeSubGraphWithMultiThreads(compute_graph, sub_graph_map, session_id); | |||
| ret = OptimizeSubGraphWithMultiThreads(compute_graph, sub_graph_map, session_id); | |||
| if (ret != SUCCESS) { | |||
| GELOGE(ret, "Multiply optimize subgraph failed"); | |||
| return ret; | |||
| @@ -63,7 +63,7 @@ Status RdmaRemoteRegister(const std::vector<HostVarInfo> &var_info, rtMemType_t | |||
| }); | |||
| auto hcom_remote_mem_register = | |||
| (HcclResult(*)(const MemRegisterAddr *, uint32_t))dlsym(handle, "hcom_remote_access_mem_register"); | |||
| (HcclResult(*)(const MemRegisterAddr *, uint32_t))dlsym(handle, "HcomRegRemoteAccessMem"); | |||
| if (hcom_remote_mem_register == nullptr) { | |||
| GELOGE(FAILED, "Failed to invoke hcom_remote_mem_register function."); | |||
| return FAILED; | |||
| @@ -0,0 +1,159 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "graph/passes/dynamic_single_op_reset_shape_pass.h" | |||
| #include "common/ge_inner_error_codes.h" | |||
| #include "graph/utils/node_utils.h" | |||
| #include "graph/utils/graph_utils.h" | |||
| #include "graph/utils/tensor_utils.h" | |||
| #include "graph/utils/op_desc_utils.h" | |||
| #include "graph/utils/type_utils.h" | |||
| #include "graph/debug/ge_attr_define.h" | |||
| namespace ge { | |||
| namespace { | |||
| const int64_t kDynamicShapeDim = -2; | |||
| const char *const kEngineNameAiCpu = "DNN_VM_AICPU_ASCEND"; | |||
| const char *const kEngineNameAiCpuTf = "DNN_VM_AICPU"; | |||
| } // namespace | |||
| Status DynamicSingleOpResetShapePass::Run(ComputeGraphPtr graph) { | |||
| GE_CHECK_NOTNULL(graph); | |||
| std::shared_ptr<GELib> instance = ge::GELib::GetInstance(); | |||
| if (instance == nullptr || !instance->InitFlag()) { | |||
| GELOGE(ge::GE_CLI_GE_NOT_INITIALIZED, "Run CompileNodesPass failed."); | |||
| return ge::GE_CLI_GE_NOT_INITIALIZED; | |||
| } | |||
| // pass if graph has not aicpu node. | |||
| bool is_not_aicpu = false; | |||
| if (CheckAllAicpuNodes(graph, is_not_aicpu) != SUCCESS) { | |||
| GELOGE(ge::GE_CLI_GE_NOT_INITIALIZED, "Check if graph has not aicpu node failed."); | |||
| return ge::GE_CLI_GE_NOT_INITIALIZED; | |||
| } | |||
| if (is_not_aicpu) { | |||
| GELOGI("The graph [%s] has not aicpu node, whose aicpu nodes would not be reset dynamic shape", | |||
| graph->GetName().c_str()); | |||
| return SUCCESS; | |||
| } | |||
| for (const auto &node : graph->GetDirectNode()) { | |||
| GE_CHECK_NOTNULL(node->GetOpDesc()); | |||
| // pass input and output node | |||
| if (node->GetType() == DATA || node->GetType() == CONSTANT || node->GetType() == CONSTANTOP || | |||
| node->GetType() == NETOUTPUT) { | |||
| continue; | |||
| } | |||
| // pass node without attr: ATTR_DYNAMIC_SHAPE_SINGLE_AICPU | |||
| bool single_aicpu_unknown = false; | |||
| if (!AttrUtils::GetBool(node->GetOpDesc(), ATTR_DYNAMIC_SHAPE_SINGLE_AICPU, single_aicpu_unknown) || | |||
| !single_aicpu_unknown) { | |||
| continue; | |||
| } | |||
| // reset aicpu shape to unknown shape | |||
| auto op_desc = node->GetOpDesc(); | |||
| if (ResetOpShape(op_desc) != SUCCESS) { | |||
| GELOGE(ge::GE_CLI_GE_NOT_INITIALIZED, "Reset node[%s] dynamic shapr failed.", node->GetName().c_str()); | |||
| return ge::GE_CLI_GE_NOT_INITIALIZED; | |||
| } | |||
| GELOGD("Reset dynamic aicpu node [%s] shape success!", node->GetName().c_str()); | |||
| } | |||
| GELOGD("Reset dynamic aicpu nodes shape of graph [%s] success!", graph->GetName().c_str()); | |||
| return SUCCESS; | |||
| } | |||
| Status DynamicSingleOpResetShapePass::CheckAllAicpuNodes(const ComputeGraphPtr &graph, bool &is_not_aicpu) { | |||
| is_not_aicpu = false; | |||
| for (const auto &node : graph->GetDirectNode()) { | |||
| GE_CHECK_NOTNULL(node->GetOpDesc()); | |||
| // pass input and output node | |||
| if (node->GetType() == DATA || node->GetType() == CONSTANT || node->GetType() == CONSTANTOP || | |||
| node->GetType() == NETOUTPUT) { | |||
| continue; | |||
| } | |||
| // find if there are aicpu nodes. | |||
| auto op_desc = node->GetOpDesc(); | |||
| string engine_name = op_desc->GetOpEngineName(); | |||
| if (engine_name.empty()) { | |||
| GELOGE(GRAPH_FAILED, "Get engine failed of node[%s].", node->GetName().c_str()); | |||
| return GRAPH_FAILED; | |||
| } | |||
| if (engine_name != kEngineNameAiCpu && engine_name != kEngineNameAiCpuTf) { | |||
| is_not_aicpu = true; | |||
| return SUCCESS; | |||
| } | |||
| } | |||
| return SUCCESS; | |||
| } | |||
| bool DynamicSingleOpResetShapePass::CheckIfConstInput(const GeTensorDescPtr &input_tensor_desc) { | |||
| bool is_const = false; | |||
| (void)AttrUtils::GetBool(input_tensor_desc, CONST_ATTR_NAME_INPUT, is_const); | |||
| return is_const; | |||
| } | |||
| Status DynamicSingleOpResetShapePass::ResetOpShape(OpDescPtr &op_desc) { | |||
| GE_CHECK_NOTNULL(op_desc); | |||
| std::vector<int64_t> dynamic_shape_dims = {kDynamicShapeDim}; | |||
| GeShape dynamic_shape(dynamic_shape_dims); | |||
| bool reset_shape_flag = false; | |||
| if (ResetInputTensorShape(op_desc, dynamic_shape, reset_shape_flag) == SUCCESS && reset_shape_flag) { | |||
| (void)ResetOutputTensorShape(op_desc, dynamic_shape); | |||
| } | |||
| return SUCCESS; | |||
| } | |||
| Status DynamicSingleOpResetShapePass::ResetInputTensorShape(OpDescPtr &op_desc, const GeShape &dynamic_shape, | |||
| bool &reset_shape_flag) { | |||
| reset_shape_flag = false; | |||
| GE_CHECK_NOTNULL(op_desc); | |||
| for (size_t i = 0; i < op_desc->GetAllInputsDesc().size(); i++) { | |||
| auto input_desc = op_desc->MutableInputDesc(static_cast<uint32_t>(i)); | |||
| GE_CHECK_NOTNULL(input_desc); | |||
| // pass scalar input desc | |||
| auto dims_ori = input_desc->GetShape().GetDims(); | |||
| if (dims_ori.size() == 0) { | |||
| continue; | |||
| } | |||
| // pass const input | |||
| if (CheckIfConstInput(input_desc)) { | |||
| continue; | |||
| } | |||
| reset_shape_flag = true; | |||
| input_desc->SetShape(dynamic_shape); | |||
| } | |||
| return SUCCESS; | |||
| } | |||
| Status DynamicSingleOpResetShapePass::ResetOutputTensorShape(OpDescPtr &op_desc, const GeShape &dynamic_shape) { | |||
| GE_CHECK_NOTNULL(op_desc); | |||
| for (size_t i = 0; i < op_desc->GetAllOutputsDesc().size(); i++) { | |||
| auto output_desc = op_desc->MutableOutputDesc(static_cast<uint32_t>(i)); | |||
| GE_CHECK_NOTNULL(output_desc); | |||
| // pass scalar input desc | |||
| auto output_dims_ori = output_desc->GetShape().GetDims(); | |||
| if (output_dims_ori.size() == 0) { | |||
| continue; | |||
| } | |||
| output_desc->SetShape(dynamic_shape); | |||
| } | |||
| return SUCCESS; | |||
| } | |||
| } // namespace ge | |||
| @@ -0,0 +1,36 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef GE_GRAPH_PASSES_DYNAMIC_SINGLE_OP_RESET_SHAPE_PASS_H_ | |||
| #define GE_GRAPH_PASSES_DYNAMIC_SINGLE_OP_RESET_SHAPE_PASS_H_ | |||
| #include "graph/graph.h" | |||
| #include "inc/graph_pass.h" | |||
| #include "init/gelib.h" | |||
| namespace ge { | |||
| class DynamicSingleOpResetShapePass : public GraphPass { | |||
| public: | |||
| Status Run(ComputeGraphPtr graph) override; | |||
| private: | |||
| Status ResetOpShape(OpDescPtr &op_desc); | |||
| Status ResetInputTensorShape(OpDescPtr &op_desc, const GeShape &dynamic_shape, bool &reset_shape_flag); | |||
| Status ResetOutputTensorShape(OpDescPtr &op_desc, const GeShape &dynamic_shape); | |||
| Status CheckAllAicpuNodes(const ComputeGraphPtr &graph, bool &is_not_aicpu); | |||
| bool CheckIfConstInput(const GeTensorDescPtr &input_tensor_desc); | |||
| }; | |||
| } // namespace ge | |||
| #endif // GE_GRAPH_PASSES_DYNAMIC_SINGLE_OP_RESET_SHAPE_PASS_H_ | |||
| @@ -1407,11 +1407,13 @@ Status MultiBatchGraphCopyer::InsertIdentityAfterSwitchN() { | |||
| } | |||
| Status ProcessMultiBatch(ComputeGraphPtr &graph) { | |||
| const char *multi_batch_with_case = std::getenv("MULTI_BATCH_WITH_CASE"); | |||
| if (multi_batch_with_case != nullptr) { | |||
| PassManager pass_manager; | |||
| GE_CHK_STATUS_RET(pass_manager.AddPass("MultiBatchClonePass", new (std::nothrow) MultiBatchClonePass)); | |||
| return pass_manager.Run(graph); | |||
| if (GetLocalOmgContext().dynamic_node_type.empty()) { | |||
| const char *multi_batch_with_switchn = std::getenv("MULTI_BATCH_WITH_SWITCHN"); | |||
| if (multi_batch_with_switchn == nullptr) { | |||
| PassManager pass_manager; | |||
| GE_CHK_STATUS_RET(pass_manager.AddPass("MultiBatchClonePass", new (std::nothrow) MultiBatchClonePass)); | |||
| return pass_manager.Run(graph); | |||
| } | |||
| } | |||
| if (!GetLocalOmgContext().need_multi_batch) { | |||
| GELOGI("No need to process_multi for no_train graph."); | |||
| @@ -193,6 +193,7 @@ target_compile_options(host_cpu_opskernel_builder_static PRIVATE | |||
| target_compile_definitions(host_cpu_opskernel_builder_static PRIVATE | |||
| google=ascend_private | |||
| LOG_CPP | |||
| ) | |||
| target_include_directories(host_cpu_opskernel_builder_static PRIVATE | |||
| @@ -42,10 +42,10 @@ Status HcclNodeTask::ExecuteAsync(TaskContext &context, std::function<void()> do | |||
| GELOGE(FAILED, "hccl handle is nullptr! "); | |||
| return FAILED; | |||
| } | |||
| auto EnqueueHcomOpertion = (HcclResult(*)(HcomOpertion, std::function<void(HcclResult status)>))dlsym( | |||
| context.handle_, "EnqueueHcomOpertion"); | |||
| if (EnqueueHcomOpertion == nullptr) { | |||
| GELOGE(FAILED, "Failed to invoke EnqueueHcomOpertion hcom unknown node function."); | |||
| auto HcomExecEnqueueOperation = (HcclResult(*)(HcomOperation, std::function<void(HcclResult status)>))dlsym( | |||
| context.handle_, "HcomExecEnqueueOperation"); | |||
| if (HcomExecEnqueueOperation == nullptr) { | |||
| GELOGE(FAILED, "Failed to invoke HcomExecEnqueueOperation hcom unknown node function."); | |||
| if (dlclose(context.handle_) != 0) { | |||
| GELOGW("Failed to close handle %s", dlerror()); | |||
| } | |||
| @@ -70,7 +70,7 @@ Status HcclNodeTask::ExecuteAsync(TaskContext &context, std::function<void()> do | |||
| const OpDescPtr op_desc = node_item.GetOpDesc(); | |||
| GE_CHECK_NOTNULL(op_desc); | |||
| HcomOpertion op_info; | |||
| HcomOperation op_info; | |||
| op_info.hcclType = op_desc->GetType(); | |||
| op_info.inputPtr = inputs.empty() ? nullptr : inputs[0]; | |||
| op_info.outputPtr = outputs.empty() ? nullptr : outputs[0]; | |||
| @@ -96,7 +96,7 @@ Status HcclNodeTask::ExecuteAsync(TaskContext &context, std::function<void()> do | |||
| op_info.root = root_id; | |||
| auto callback = [this, op_desc](HcclResult status) { | |||
| if (status != HCCL_SUCCESS) { | |||
| GELOGE(HCCL_E_INTERNAL, "node %s call EnqueueHcomOpertion failed, ret: 0x%X", op_desc->GetName().c_str(), status); | |||
| GELOGE(HCCL_E_INTERNAL, "node %s call HcomExecEnqueueOperation failed, ret: 0x%X", op_desc->GetName().c_str(), status); | |||
| } | |||
| std::lock_guard<std::mutex> lock(this->hccl_mutex_); | |||
| this->cond_.notify_all(); | |||
| @@ -110,9 +110,9 @@ Status HcclNodeTask::ExecuteAsync(TaskContext &context, std::function<void()> do | |||
| context.GetNodeName(), op_info.hcclType.c_str(), count, op_info.dataType, op_info.opType, op_info.root); | |||
| op_info.count = count; | |||
| HcclResult hccl_ret = EnqueueHcomOpertion(op_info, callback); | |||
| HcclResult hccl_ret = HcomExecEnqueueOperation(op_info, callback); | |||
| if (hccl_ret != HCCL_SUCCESS) { | |||
| GELOGE(HCCL_E_INTERNAL, "Call HcomExcutorInitialize failed, ret: 0x%X", hccl_ret); | |||
| GELOGE(HCCL_E_INTERNAL, "Call HcomExecInitialize failed, ret: 0x%X", hccl_ret); | |||
| return HCCL_E_INTERNAL; | |||
| } | |||
| @@ -213,11 +213,11 @@ Status RdmaNodeTask::ExtractTensor(TaskContext &context, vector<HcomRemoteAccess | |||
| Status RdmaNodeTask::ExecuteAsync(TaskContext &context, std::function<void()> done_callback) { | |||
| GELOGI("[%s] RdmaNodeTask::ExecuteAsync in.", context.GetNodeName()); | |||
| auto EnqueueRemoteAccess = | |||
| auto HcomExecEnqueueRemoteAccess = | |||
| (HcclResult(*)(const string &, const vector<HcomRemoteAccessAddrInfo> &, | |||
| std::function<void(HcclResult status)>))dlsym(context.handle_, "EnqueueRemoteAccess"); | |||
| if (EnqueueRemoteAccess == nullptr) { | |||
| GELOGE(FAILED, "Failed to invoke EnqueueRemoteAccess hcom unknown node function."); | |||
| std::function<void(HcclResult status)>))dlsym(context.handle_, "HcomExecEnqueueRemoteAccess"); | |||
| if (HcomExecEnqueueRemoteAccess == nullptr) { | |||
| GELOGE(FAILED, "Failed to invoke HcomExecEnqueueRemoteAccess hcom unknown node function."); | |||
| if (dlclose(context.handle_) != 0) { | |||
| GELOGW("Failed to close handle %s", dlerror()); | |||
| } | |||
| @@ -228,15 +228,15 @@ Status RdmaNodeTask::ExecuteAsync(TaskContext &context, std::function<void()> do | |||
| auto callback = [this](HcclResult status) { | |||
| if (status != HCCL_SUCCESS) { | |||
| GELOGE(HCCL_E_INTERNAL, "Call HcomExcutorInitialize failed, ret: 0x%X", status); | |||
| GELOGE(HCCL_E_INTERNAL, "Call HcomExecInitialize failed, ret: 0x%X", status); | |||
| } | |||
| std::lock_guard<std::mutex> lock(this->hccl_mutex_); | |||
| this->cond_.notify_all(); | |||
| GELOGI("rdma callback success."); | |||
| }; | |||
| HcclResult hccl_ret = EnqueueRemoteAccess(context.GetNodeItem().NodeType(), addr_infos, callback); | |||
| HcclResult hccl_ret = HcomExecEnqueueRemoteAccess(context.GetNodeItem().NodeType(), addr_infos, callback); | |||
| if (hccl_ret != HCCL_SUCCESS) { | |||
| GELOGE(HCCL_E_INTERNAL, "Call HcomExcutorInitialize failed, ret: 0x%X", hccl_ret); | |||
| GELOGE(HCCL_E_INTERNAL, "Call HcomExecInitialize failed, ret: 0x%X", hccl_ret); | |||
| return HCCL_E_INTERNAL; | |||
| } | |||
| @@ -307,32 +307,32 @@ Status HcclNodeExecutor::Initialize() { | |||
| GELOGE(GE_PLGMGR_SO_NOT_EXIST, "Failed in dlopen %s! ", dlerror()); | |||
| return FAILED; | |||
| } | |||
| auto HcomExcutorInitialize = (HcclResult(*)())dlsym(handle_, "HcomExcutorInitialize"); | |||
| if (HcomExcutorInitialize == nullptr) { | |||
| GELOGE(FAILED, "Failed to invoke HcomExcutorInitialize hcom unknown node function."); | |||
| auto HcomExecInitialize = (HcclResult(*)())dlsym(handle_, "HcomExecInitialize"); | |||
| if (HcomExecInitialize == nullptr) { | |||
| GELOGE(FAILED, "Failed to invoke HcomExecInitialize hcom unknown node function."); | |||
| return FAILED; | |||
| } | |||
| HcclResult hccl_ret = HcomExcutorInitialize(); | |||
| HcclResult hccl_ret = HcomExecInitialize(); | |||
| if (hccl_ret == HCCL_E_PTR) { | |||
| GELOGI("Hccl comm is null, hcom executor initialize is not required."); | |||
| } else if (hccl_ret == HCCL_SUCCESS) { | |||
| GELOGI("Hcom executor initialize success."); | |||
| } else { | |||
| GELOGE(FAILED, "Call HcomExcutorInitialize failed, ret: 0x%X", hccl_ret); | |||
| GELOGE(FAILED, "Call HcomExecInitialize failed, ret: 0x%X", hccl_ret); | |||
| return FAILED; | |||
| } | |||
| return SUCCESS; | |||
| } | |||
| Status HcclNodeExecutor::Finalize() { | |||
| auto HcomExcutorFinalize = (HcclResult(*)())dlsym(handle_, "HcomExcutorFinalize"); | |||
| if (HcomExcutorFinalize == nullptr) { | |||
| GELOGE(FAILED, "Failed to invoke HcomExcutorFinalize hcom unknown node function."); | |||
| auto HcomExecFinalize = (HcclResult(*)())dlsym(handle_, "HcomExecFinalize"); | |||
| if (HcomExecFinalize == nullptr) { | |||
| GELOGE(FAILED, "Failed to invoke HcomExecFinalize hcom unknown node function."); | |||
| return FAILED; | |||
| } | |||
| HcclResult hccl_ret = HcomExcutorFinalize(); | |||
| HcclResult hccl_ret = HcomExecFinalize(); | |||
| if (hccl_ret != HCCL_SUCCESS) { | |||
| GELOGE(FAILED, "Call HcomExcutorFinalize failed, ret: 0x%X", hccl_ret); | |||
| GELOGE(FAILED, "Call HcomExecFinalize failed, ret: 0x%X", hccl_ret); | |||
| return FAILED; | |||
| } | |||
| // dlclose file handle | |||
| @@ -27,6 +27,7 @@ target_compile_definitions(atc PRIVATE | |||
| PROTOBUF_INLINE_NOT_IN_HEADERS=0 | |||
| COMPILE_OMG_PACKAGE | |||
| google=ascend_private | |||
| LOG_CPP | |||
| ) | |||
| target_include_directories(atc PRIVATE | |||
| @@ -87,6 +88,7 @@ target_compile_definitions(atc_atc.bin PRIVATE | |||
| PROTOBUF_INLINE_NOT_IN_HEADERS=0 | |||
| COMPILE_OMG_PACKAGE | |||
| google=ascend_private | |||
| LOG_CPP | |||
| ) | |||
| target_include_directories(atc_atc.bin PRIVATE | |||
| @@ -152,6 +154,7 @@ target_compile_definitions(fwk_atc.bin PRIVATE | |||
| PROTOBUF_INLINE_NOT_IN_HEADERS=0 | |||
| COMPILE_OMG_PACKAGE | |||
| google=ascend_private | |||
| LOG_CPP | |||
| ) | |||
| target_include_directories(fwk_atc.bin PRIVATE | |||
| @@ -15,6 +15,7 @@ message Output { | |||
| int32 original_output_data_type = 7; | |||
| int32 original_output_format = 8; | |||
| uint64 size = 9; | |||
| Shape origin_shape = 10; | |||
| } | |||
| message Input { | |||
| @@ -23,6 +24,7 @@ message Input { | |||
| Shape shape = 3; | |||
| uint64 address = 4; | |||
| uint64 size = 5; | |||
| Shape origin_shape = 6; | |||
| } | |||
| enum BufferType { | |||
| @@ -395,19 +395,9 @@ const std::set<std::string> ir_builder_suppported_options = {INPUT_FORMAT, | |||
| OP_BANK_UPDATE}; | |||
| // for interface: aclgrphParse | |||
| const std::set<std::string> ir_parser_suppported_options = {INPUT_FORMAT, | |||
| INPUT_SHAPE, | |||
| OP_NAME_MAP, | |||
| IS_DYNAMIC_INPUT, | |||
| INPUT_FP16_NODES, | |||
| IS_INPUT_ADJUST_HW_LAYOUT, | |||
| IS_OUTPUT_ADJUST_HW_LAYOUT, | |||
| OUTPUT, | |||
| OUTPUT_TYPE, | |||
| OUT_NODES, | |||
| COMPRESS_WEIGHT_CONF, | |||
| ENABLE_SCOPE_FUSION_PASSES, | |||
| LOG_LEVEL}; | |||
| const std::set<std::string> ir_parser_suppported_options = { | |||
| INPUT_FP16_NODES, IS_INPUT_ADJUST_HW_LAYOUT, IS_OUTPUT_ADJUST_HW_LAYOUT, OUTPUT, | |||
| OUT_NODES, COMPRESS_WEIGHT_CONF, ENABLE_SCOPE_FUSION_PASSES}; | |||
| // for interface: aclgrphBuildInitialize | |||
| const std::set<std::string> global_options = {CORE_TYPE, | |||
| @@ -37,6 +37,9 @@ enum FrameworkType { | |||
| MINDSPORE = 1, | |||
| TENSORFLOW = 3, | |||
| ANDROID_NN, | |||
| #ifndef ONLY_COMPILE_OPEN_SRC | |||
| ONNX, | |||
| #endif | |||
| FRAMEWORK_RESERVED, | |||
| }; | |||
| @@ -20,7 +20,8 @@ | |||
| #include "ge/ge_api_error_codes.h" | |||
| #include "toolchain/prof_callback.h" | |||
| #define MAX_DEV_NUM (64) | |||
| const int MAX_DEV_NUM = 64; | |||
| enum ProfCommandHandleType { | |||
| kProfCommandhandleInit = 0, | |||
| kProfCommandhandleStart, | |||
| @@ -32,7 +33,7 @@ enum ProfCommandHandleType { | |||
| struct ProfCommandHandleData { | |||
| uint64_t profSwitch; | |||
| uint32_t devNums; // length of device id list | |||
| uint32_t devNums; // length of device id list | |||
| uint32_t devIdList[MAX_DEV_NUM]; | |||
| uint32_t modelId; | |||
| }; | |||
| @@ -30,8 +30,6 @@ | |||
| #include "runtime/base.h" | |||
| namespace ge { | |||
| class ModelListenerAdapter; | |||
| class SingleOp; | |||
| class DynamicSingleOp; | |||
| @@ -55,14 +53,8 @@ class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY GeExecutor { | |||
| ge::Status Initialize(); | |||
| ge::Status Finalize(); | |||
| // Load model | |||
| ge::Status LoadModelOffline(uint32_t &model_id, const std::string &path, const std::string &key, int32_t priority, | |||
| std::shared_ptr<ge::ModelListener> listener); | |||
| ge::Status UnloadModel(uint32_t modelId); | |||
| ge::Status RunModel(const ge::RunModelData &input_data, ge::RunModelData &output_data); | |||
| // Get input and output descriptor | |||
| ge::Status GetModelDescInfo(uint32_t model_id, std::vector<ge::TensorDesc> &input_desc, | |||
| std::vector<ge::TensorDesc> &output_desc, bool new_model_desc = false); | |||
| @@ -168,9 +160,6 @@ class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY GeExecutor { | |||
| ge::Status GetModelDescInfoForZeroCopy(uint32_t model_id, std::vector<ge::TensorDesc> &input_desc, | |||
| std::vector<ge::TensorDesc> &output_desc); | |||
| ge::Status LoadModel(uint32_t &model_id, const ge::ModelData &model_data, | |||
| std::shared_ptr<ge::ModelListener> listener); | |||
| ge::Status CommandHandle(const ge::Command &command); | |||
| ge::Status SetDump(const DumpConfig &dump_config); | |||
| @@ -297,8 +286,6 @@ class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY GeExecutor { | |||
| private: | |||
| static bool isInit_; | |||
| }; | |||
| ge::Status ModelInfoParser(const ge::ModelData &model, ge::ModelInfo &model_info); | |||
| } // namespace ge | |||
| #endif // INC_FRAMEWORK_EXECUTOR_GE_EXECUTOR_H_ | |||
| @@ -59,7 +59,7 @@ struct ParserContext { | |||
| bool train_flag = false; | |||
| domi::domiTensorFormat_t format = domi::DOMI_TENSOR_ND; | |||
| domi::FrameworkType type = domi::FRAMEWORK_RESERVED; | |||
| RunMode run_mode = ONLY_PRE_CHECK; | |||
| RunMode run_mode = GEN_OM_MODEL; | |||
| // save caffe custom proto path, used by caffe parse | |||
| std::string custom_proto_path; | |||
| // save caffe proto path, used by caffe parse | |||
| @@ -182,6 +182,7 @@ set(COMMON_SRC_FILES | |||
| "${GE_CODE_DIR}/ge/graph/passes/atomic_addr_clean_pass.cc" | |||
| "${GE_CODE_DIR}/ge/graph/passes/mark_same_addr_pass.cc" | |||
| "${GE_CODE_DIR}/ge/graph/passes/mark_graph_unknown_status_pass.cc" | |||
| "${GE_CODE_DIR}/ge/graph/passes/dynamic_single_op_reset_shape_pass.cc" | |||
| "${GE_CODE_DIR}/ge/graph/passes/mark_agnostic_pass.cc" | |||
| "${GE_CODE_DIR}/ge/graph/passes/dimension_compute_pass.cc" | |||
| "${GE_CODE_DIR}/ge/graph/passes/dimension_adjust_pass.cc" | |||
| @@ -18,9 +18,7 @@ | |||
| #define D_SYSLOG_H_ | |||
| #ifdef __cplusplus | |||
| #ifndef LOG_CPP | |||
| extern "C" { | |||
| #endif | |||
| #endif // __cplusplus | |||
| #ifndef LINUX | |||
| @@ -107,7 +105,6 @@ extern "C" { | |||
| #define SECURITY_LOG_MASK (0x00100000) | |||
| #define RUN_LOG_MASK (0x01000000) | |||
| #define OPERATION_LOG_MASK (0x10000000) | |||
| #define RESERVERD_LENGTH 52 | |||
| typedef struct tagDCODE { | |||
| const char *cName; | |||
| @@ -119,18 +116,6 @@ typedef struct tagKV { | |||
| char *value; | |||
| } KeyValue; | |||
| typedef enum { | |||
| APPLICATION = 0, | |||
| SYSTEM | |||
| } ProcessType; | |||
| typedef struct { | |||
| ProcessType type; | |||
| unsigned int pid; | |||
| unsigned int deviceId; | |||
| char reserved[RESERVERD_LENGTH]; | |||
| } LogAttr; | |||
| /** | |||
| * @ingroup slog | |||
| * | |||
| @@ -243,14 +228,6 @@ DLL_EXPORT int dlog_setlevel(int moduleId, int level, int enableEvent); | |||
| */ | |||
| DLL_EXPORT int CheckLogLevel(int moduleId, int logLevel); | |||
| /** | |||
| * @ingroup slog | |||
| * @brief DlogSetAttr: set log attr, default pid is 0, default device id is 0, default process type is APPLICATION | |||
| * @param [in]logAttr: attr info, include pid(must be larger than 0), process type and device id(chip ID) | |||
| * @return: 0: SUCCEED, others: FAILED | |||
| */ | |||
| DLL_EXPORT int DlogSetAttr(LogAttr logAttr); | |||
| /** | |||
| * @ingroup slog | |||
| * @brief dlog_error: print error log | |||
| @@ -390,8 +367,6 @@ void DlogInner(int moduleId, int level, const char *fmt, ...); | |||
| void DlogWithKVInner(int moduleId, int level, KeyValue *pstKVArray, int kvNum, const char *fmt, ...); | |||
| #ifdef __cplusplus | |||
| #ifndef LOG_CPP | |||
| } | |||
| #endif // LOG_CPP | |||
| #endif // __cplusplus | |||
| #endif // D_SYSLOG_H_ | |||