diff --git a/CMakeLists.txt b/CMakeLists.txt index 776a3232..9194f119 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -74,7 +74,7 @@ if (ENABLE_OPEN_SRC) set(STATIC_ACL_LIB ${GE_LIB_PATH}) find_module(slog libslog.so ${GE_LIB_PATH}) find_module(static_mmpa libmmpa.a ${GE_LIB_PATH}) - find_module(msprofiler libmsprofiler.a ${GE_LIB_PATH}) + find_module(msprofiler_ext libmsprofiler.a ${GE_LIB_PATH}) find_module(hccl libhccl.so ${GE_LIB_PATH}) find_module(adump_server libadump_server.a ${GE_LIB_PATH}) find_module(runtime libruntime.so ${GE_LIB_PATH}) @@ -83,7 +83,7 @@ if (ENABLE_OPEN_SRC) find_module(error_manager liberror_manager.so ${GE_LIB_PATH}) find_module(ascend_hal_stub libascend_hal.so ${GE_LIB_PATH}) find_module(error_manager_static liberror_manager.a ${GE_LIB_PATH}) - find_module(msprofiler_fwk libmsprofiler_fwk.a ${GE_LIB_PATH}) + find_module(msprofiler_fwk_ext libmsprofiler_fwk.a ${GE_LIB_PATH}) #find_module(ascendcl_static libascendcl.a ${GE_LIB_PATH}) elseif(ENABLE_GE_COV OR ENABLE_GE_UT) add_subdirectory(tests) @@ -97,7 +97,7 @@ if (ENABLE_OPEN_SRC) find_module(runtime libruntime.so ${ASCEND_RUNTIME_DIR}) find_module(resource libresource.so ${ASCEND_RUNTIME_DIR}) find_module(error_manager liberror_manager.so ${ASCEND_RUNTIME_DIR}) - find_module(msprofiler_fwk libmsprofiler_fwk.a ${ASCEND_RUNTIME_DIR}) + find_module(msprofiler_fwk_ext libmsprofiler_fwk.a ${ASCEND_RUNTIME_DIR}) find_module(ascend_hal_stub libascend_hal.so ${ASCEND_DRIVER_DIR}/driver) if(PRODUCT STREQUAL "flr3") message(FATAL_ERROR "This platform is not supported in train mode, build terminated") @@ -109,7 +109,7 @@ if (ENABLE_OPEN_SRC) find_module(resource libresource.so ${ASCEND_ATC_DIR}) find_module(error_manager liberror_manager.so ${ASCEND_ATC_DIR}) find_module(error_manager_static liberror_manager.a ${ASCEND_ACL_DIR}) - find_module(msprofiler libmsprofiler.a ${ASCEND_ACL_DIR}) + find_module(msprofiler_ext libmsprofiler.a ${ASCEND_ACL_DIR}) #find_module(ascendcl_static libascendcl.a ${ASCEND_ACL_DIR}) if(PRODUCT STREQUAL "flr3") elseif(PRODUCT STREQUAL "flr1") @@ -120,7 +120,7 @@ if (ENABLE_OPEN_SRC) find_module(ascend_hal_stub libascend_hal.so ${ASCEND_DRIVER_DIR}) endif() elseif(PLATFORM STREQUAL "all") - find_module(msprofiler libmsprofiler.a ${ASCEND_ACL_DIR}) + find_module(msprofiler_ext libmsprofiler.a ${ASCEND_ACL_DIR}) find_module(hccl libhccl.so ${ASCEND_RUNTIME_DIR}) find_module(adump_server libadump_server.a ${ASCEND_ACL_DIR}) find_module(runtime libruntime.so ${ASCEND_ACL_DIR}) @@ -128,7 +128,7 @@ if (ENABLE_OPEN_SRC) find_module(resource libresource.so ${ASCEND_ATC_DIR}) find_module(error_manager liberror_manager.so ${ASCEND_ATC_DIR}) find_module(error_manager_static liberror_manager.a ${ASCEND_ACL_DIR}) - find_module(msprofiler_fwk libmsprofiler_fwk.a ${ASCEND_RUNTIME_DIR}) + find_module(msprofiler_fwk_ext libmsprofiler_fwk.a ${ASCEND_RUNTIME_DIR}) find_module(ascend_hal_stub libascend_hal.so ${ASCEND_DRIVER_DIR}/driver) #find_module(ascendcl_static libascendcl.a ${ASCEND_ACL_DIR}) else() diff --git a/ge/CMakeLists.txt b/ge/CMakeLists.txt index 0325a7de..8d9edb65 100755 --- a/ge/CMakeLists.txt +++ b/ge/CMakeLists.txt @@ -1,7 +1,6 @@ if (NOT ENABLE_D AND NOT ENABLE_ACL AND NOT ENABLE_MS_TESTCASES) add_subdirectory(common) add_subdirectory(plugin/engine) - add_subdirectory(graph/build/memory) add_subdirectory(ge_local_engine) add_subdirectory(host_cpu_engine) add_subdirectory(executor) @@ -342,6 +341,13 @@ set(TRAIN_SRC_LIST "analyzer/analyzer.cc" "ir_build/ge_ir_build.cc" "ir_build/atc_ir_common.cc" + "graph/build/memory/memory_assigner.cc" + "graph/build/memory/graph_mem_assigner.cc" + "graph/build/memory/binary_block_mem_assigner.cc" + "graph/build/memory/block_mem_assigner.cc" + "graph/build/memory/hybrid_mem_assigner.cc" + "graph/build/memory/max_block_mem_assigner.cc" + "graph/build/memory/var_mem_assign_util.cc" ) set(INFER_SRC_LIST @@ -611,11 +617,35 @@ set(INFER_SRC_LIST "graph/label/while_label_maker.cc" "graph/label/partitioned_call_label_maker.cc" "analyzer/analyzer.cc" + "graph/build/memory/memory_assigner.cc" + "graph/build/memory/graph_mem_assigner.cc" + "graph/build/memory/binary_block_mem_assigner.cc" + "graph/build/memory/block_mem_assigner.cc" + "graph/build/memory/hybrid_mem_assigner.cc" + "graph/build/memory/max_block_mem_assigner.cc" + "graph/build/memory/var_mem_assign_util.cc" ) if (NOT ENABLE_D AND NOT ENABLE_ACL AND NOT ENABLE_MS_TESTCASES) ############ libge_runner.so ############ -add_library(ge_runner SHARED ${TRAIN_SRC_LIST} ${PROTO_SRCS} ${PROTO_CLIENT_SRCS}) +add_library(ge_runner SHARED + ${TRAIN_SRC_LIST} + ${PROTO_SRCS} + ${PROTO_CLIENT_SRCS} + $,msprofiler_fwk,msprofiler_fwk_object>> +) + +add_library(msprofiler_fwk_object OBJECT IMPORTED GLOBAL) + +if (msprofiler_fwk_ext_LIBRARY_DIR) + file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/msprofiler_fwk_object) + execute_process( + COMMAND ar x ${msprofiler_fwk_ext_LIBRARY_DIR} + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/msprofiler_fwk_object + ) + file(GLOB MSPROFILER_FWK_OBJECT_LIST ${CMAKE_CURRENT_BINARY_DIR}/msprofiler_fwk_object/*.o) + set_property(TARGET msprofiler_fwk_object PROPERTY IMPORTED_OBJECTS ${MSPROFILER_FWK_OBJECT_LIST}) +endif() target_compile_definitions(ge_runner PRIVATE PROTOBUF_INLINE_NOT_IN_HEADERS=0 @@ -660,12 +690,8 @@ target_include_directories(ge_runner PRIVATE target_link_libraries(ge_runner PRIVATE $ - ge_memory adump_server static_mmpa - -Wl,--whole-archive - msprofiler_fwk - -Wl,--no-whole-archive -Wl,--no-as-needed graph ge_common @@ -728,7 +754,6 @@ target_include_directories(ge_compiler PRIVATE target_link_libraries(ge_compiler PRIVATE $ - ge_memory static_mmpa -Wl,--no-as-needed graph @@ -755,7 +780,7 @@ file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/ascendcl_object) if(EXISTS ${STATIC_ACL_LIB}/libascendcl.a) execute_process( COMMAND ar x ${STATIC_ACL_LIB}/libascendcl.a - WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/ascendcl_object + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/ascendcl_object ) file(GLOB OBJECT_LIST ${CMAKE_CURRENT_BINARY_DIR}/ascendcl_object/*.o) else() @@ -764,8 +789,21 @@ endif() add_library(opensrc_ascendcl SHARED ${OBJECT_LIST} + $,msprofiler,msprofiler_object>> ) +add_library(msprofiler_object OBJECT IMPORTED GLOBAL) + +if (msprofiler_ext_LIBRARY_DIR) + file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/msprofiler_object) + execute_process( + COMMAND ar x ${msprofiler_ext_LIBRARY_DIR} + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/msprofiler_object + ) + file(GLOB MSPROFILER_OBJECT_LIST ${CMAKE_CURRENT_BINARY_DIR}/msprofiler_object/*.o) + set_property(TARGET msprofiler_object PROPERTY IMPORTED_OBJECTS ${MSPROFILER_OBJECT_LIST}) +endif() + target_compile_definitions(opensrc_ascendcl PRIVATE google=ascend_private $<$:ONLY_COMPILE_OPEN_SRC> @@ -780,14 +818,7 @@ target_link_options(opensrc_ascendcl PRIVATE -Wl,--allow-multiple-definition -Wl,-z,muldefs -Wl,-Bsymbolic - -Wl,--exclude-libs,libascend_protobuf.a - -Wl,--exclude-libs,libge_executor.a - -Wl,--exclude-libs,libge_common.a - -Wl,--exclude-libs,libgraph.a - -Wl,--exclude-libs,libmmpa.a - -Wl,--exclude-libs,libregister.a - -Wl,--exclude-libs,liberror_manager.a - -Wl,--exclude-libs,libadump_server.a + -Wl,--exclude-libs,ALL ) target_link_libraries(opensrc_ascendcl PRIVATE -Wl,--whole-archive @@ -799,7 +830,6 @@ target_link_libraries(opensrc_ascendcl PRIVATE register_static error_manager_static adump_server - msprofiler -Wl,--no-whole-archive -Wl,--no-as-needed c_sec diff --git a/ge/common/profiling/profiling_manager.cc b/ge/common/profiling/profiling_manager.cc index 92417286..aad2bbe3 100644 --- a/ge/common/profiling/profiling_manager.cc +++ b/ge/common/profiling/profiling_manager.cc @@ -302,6 +302,8 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::Profilin } data.append(" model_id:").append(std::to_string(model_id)); + data.append(" task_id:").append(std::to_string(graph.task_id)); + data.append(" stream_id:").append(std::to_string(graph.stream_id)); data.append("\n"); GraphDescReport(device_id, data); diff --git a/ge/common/types.cc b/ge/common/types.cc index 1cc70347..268e7caa 100644 --- a/ge/common/types.cc +++ b/ge/common/types.cc @@ -480,6 +480,9 @@ REGISTER_OPTYPE_DEFINE(HVDWAIT, "HorovodWait"); // aicpu op for online_infer dynamic_dims REGISTER_OPTYPE_DEFINE(GETDYNAMICDIMS, "GetDynamicDims"); +// profiling training trace node +REGISTER_OPTYPE_DEFINE(PROFILINGTRAININGTRACE, "ProfilingTrainingTrace"); + const std::string MODEL_ATTR_TASKS = "tasks"; const std::string MODEL_ATTR_TASK_GEN_BASE_ADDR = "task_gen_base_addr"; const std::string MODEL_ATTR_TASK_GEN_WEIGHT_ADDR = "task_gen_weight_addr"; diff --git a/ge/executor/ge_executor.cc b/ge/executor/ge_executor.cc index abdc0c3f..0ea0e66d 100755 --- a/ge/executor/ge_executor.cc +++ b/ge/executor/ge_executor.cc @@ -676,7 +676,7 @@ Status GeExecutor::GetAIPPInfo(uint32_t model_id, uint32_t index, AippConfigInfo GELOGE(ACL_ERROR_GE_EXEC_NOT_INIT, "not inited yet!"); return ACL_ERROR_GE_EXEC_NOT_INIT; } - Status ret = GraphExecutor::GetAIPPInfo(model_id, index, aipp_info); + Status ret = GraphExecutor::GetAippInfo(model_id, index, aipp_info); if (ret != SUCCESS) { GELOGW("GetAIPPInfo is not success."); return ret; @@ -713,43 +713,6 @@ Status GeExecutor::GetModelAttr(uint32_t model_id, std::vector &dyn return SUCCESS; } -Status GeExecutor::GetModelDescInfoForZeroCopy(uint32_t model_id, std::vector &input_desc, - std::vector &output_desc) { - GELOGI("get model desc info for zero copy begin."); - if (!isInit_) { - GELOGE(ACL_ERROR_GE_EXEC_NOT_INIT, "GeExecutor has not been initialized!"); - return ACL_ERROR_GE_EXEC_NOT_INIT; - } - - std::vector input_desc_infos; - std::vector output_desc_infos; - std::vector input_formats; - std::vector output_formats; - - Status ret = GraphExecutor::GetInputOutputDescInfoForZeroCopy(model_id, input_desc_infos, output_desc_infos, - input_formats, output_formats); - if (ret != domi::SUCCESS) { - GELOGE(ret, "Get DescInfo from zero copy failed. ret = %u", ret); - return ACL_ERROR_GE_GET_TENSOR_INFO; - } - - if (input_formats.size() != input_desc_infos.size()) { - GELOGE(ACL_ERROR_GE_PARAM_INVALID, "input_formats.size() != input_desc_infos.size()."); - return ACL_ERROR_GE_PARAM_INVALID; - } - - if (output_formats.size() != output_desc_infos.size()) { - GELOGE(ACL_ERROR_GE_PARAM_INVALID, "output_formats.size() != output_desc_infos.size()."); - return ACL_ERROR_GE_PARAM_INVALID; - } - - GetGeTensorDescFromDomiInfo(input_desc, input_desc_infos, input_formats); - GetGeTensorDescFromDomiInfo(output_desc, output_desc_infos, output_formats); - - GELOGI("get model desc info from zero copy end."); - return ge::SUCCESS; -} - Status GeExecutor::CommandHandle(const Command &command) { Status ret = GraphLoader::CommandHandle(command); if (ret != SUCCESS) { diff --git a/ge/graph/build/graph_builder.cc b/ge/graph/build/graph_builder.cc index dce40c3e..143d5550 100644 --- a/ge/graph/build/graph_builder.cc +++ b/ge/graph/build/graph_builder.cc @@ -421,6 +421,52 @@ static Status GenerateTaskForConstant(const std::shared_ptr &graph return SUCCESS; } +Status GraphBuilder::MarkFpBpProfilingTaskAttr(ComputeGraphPtr &com_graph) { + bool original_unknown_shape_flag = com_graph->GetGraphUnknownFlag(); + com_graph->SetGraphUnknownFlag(false); + + GELOGD("Start to mark profiling task attr for fp and bp."); + TaskGenerator task_generator; + ProfilingPoint profiling_point; + std::vector all_reduce_node_index; + Status ret = task_generator.FindProfilingNodeIndex(com_graph, profiling_point, all_reduce_node_index); + com_graph->SetGraphUnknownFlag(original_unknown_shape_flag); + if (ret != SUCCESS) { + GELOGW("Find profiling node index failed."); + } + if (profiling_point.fp_index == 0 || profiling_point.bp_index == 0 || profiling_point.end_index.empty()) { + GELOGD("No need to mark fp bp profiling task attr."); + return SUCCESS; + } + // mark profiling task attr for node + uint32_t node_index = 0; + for (const auto &node : com_graph->GetAllNodes()) { + OpDescPtr op_desc = node->GetOpDesc(); + GE_CHECK_NOTNULL(node->GetOpDesc()); + node_index++; + if (profiling_point.fp_index == node_index) { + GELOGI("The first fp node of dynamic graph is %s, idx %u", op_desc->GetName().c_str(), node_index); + (void)ge::AttrUtils::SetBool(op_desc, ATTR_NAME_INSERT_FP_PROFILILNG_TASK, true); + } + if (profiling_point.bp_index == node_index) { + GELOGI("The bp node of dynamic graph is %s, idx %u", op_desc->GetName().c_str(), node_index); + (void)ge::AttrUtils::SetBool(op_desc, ATTR_NAME_INSERT_BP_PROFILILNG_TASK, true); + } + for (size_t i = 0; i < all_reduce_node_index.size(); i++) { + if (all_reduce_node_index[i] == node_index) { + GELOGI("The all reduce node of dynamic graph is %s, idx %u", op_desc->GetName().c_str(), node_index); + (void)ge::AttrUtils::SetBool(op_desc, ATTR_NAME_INSERT_BP_PROFILILNG_TASK, true); + continue; + } + } + if (profiling_point.end_index.find(node_index) != profiling_point.end_index.end()) { + GELOGI("The end node of dynamic graph is %s, idx %u", op_desc->GetName().c_str(), node_index); + (void)ge::AttrUtils::SetBool(op_desc, ATTR_NAME_INSERT_END_PROFILILNG_TASK, true); + } + } + return SUCCESS; +} + Status GraphBuilder::BuildForDynamicShapeGraph(ComputeGraphPtr &comp_graph, std::vector &subgraph_ptr_list, GeRootModelPtr &ge_root_model_ptr, GeModelPtr &ge_model_ptr, @@ -437,6 +483,12 @@ Status GraphBuilder::BuildForDynamicShapeGraph(ComputeGraphPtr &comp_graph, } } + // Set fp bp profiling task attr for graph + if (MarkFpBpProfilingTaskAttr(comp_graph) != SUCCESS) { + GELOGE(FAILED, "Set fp bp profiling task attr for graph."); + return FAILED; + } + auto all_graphs = comp_graph->GetAllSubgraphs(); if (all_graphs.empty()) { all_graphs.push_back(comp_graph); diff --git a/ge/graph/build/graph_builder.h b/ge/graph/build/graph_builder.h index b828a80d..524b60e0 100644 --- a/ge/graph/build/graph_builder.h +++ b/ge/graph/build/graph_builder.h @@ -60,6 +60,7 @@ class GraphBuilder { Status UpdateParentNodeOutputSize(const ge::ComputeGraphPtr &graph, ge::NodePtr &parent_node_ptr); Status CalcDynShapeRootGraphDataSize(const ge::OpDescPtr &op_desc); Status SecondPartition(ge::ComputeGraphPtr &comp_graph, vector &subgraph_ptr_list); + Status MarkFpBpProfilingTaskAttr(ComputeGraphPtr &com_graph); Status BuildForDynamicShapeGraph(ComputeGraphPtr &comp_graph, std::vector &subgraph_ptr_list, GeRootModelPtr &ge_root_model_ptr, GeModelPtr &ge_model_ptr, uint64_t session_id = INVALID_SESSION_ID); diff --git a/ge/graph/build/memory/CMakeLists.txt b/ge/graph/build/memory/CMakeLists.txt deleted file mode 100644 index f6f56a54..00000000 --- a/ge/graph/build/memory/CMakeLists.txt +++ /dev/null @@ -1,45 +0,0 @@ -set(SRC_LIST - "memory_assigner.cc" - "graph_mem_assigner.cc" - "binary_block_mem_assigner.cc" - "block_mem_assigner.cc" - "hybrid_mem_assigner.cc" - "max_block_mem_assigner.cc" - "var_mem_assign_util.cc" -) - -############ libge_memory.a ############ -add_library(ge_memory STATIC ${SRC_LIST}) - -target_compile_options(ge_memory PRIVATE - -Werror - -O2 - -fno-common -) - -target_compile_definitions(ge_memory PRIVATE - google=ascend_private - LOG_CPP - $<$:ONLY_COMPILE_OPEN_SRC> -) - -target_link_libraries(ge_memory PRIVATE - $ - ascend_protobuf - c_sec -) - -target_include_directories(ge_memory PRIVATE - ${CMAKE_CURRENT_LIST_DIR} - ${GE_CODE_DIR}/ge - ${GE_CODE_DIR}/inc - ${GE_CODE_DIR}/inc/external - ${METADEF_DIR}/inc - ${METADEF_DIR}/inc/external - ${METADEF_DIR}/inc/external/graph - ${GE_CODE_DIR}/inc/framework - #### yellow zone #### - ${GE_CODE_DIR}/../inc - #### blue zone #### - ${GE_CODE_DIR}/third_party/fwkacllib/inc -) diff --git a/ge/graph/build/task_generator.cc b/ge/graph/build/task_generator.cc index 7e45ad61..21e82d11 100755 --- a/ge/graph/build/task_generator.cc +++ b/ge/graph/build/task_generator.cc @@ -274,6 +274,7 @@ Status TaskGenerator::GenerateTask(RunContext &run_context, ComputeGraphPtr &gra }; GE_MAKE_GUARD(release, callback); + uint64_t all_reduce_node_idx = 0; for (auto &node : graph->GetNodes(graph->GetGraphUnknownFlag())) { OpDescPtr op_desc = node->GetOpDesc(); GE_CHECK_NOTNULL(op_desc); @@ -292,7 +293,7 @@ Status TaskGenerator::GenerateTask(RunContext &run_context, ComputeGraphPtr &gra // Part2: Call auto fusion_task_info = FusionTaskInfo{run_context, graph, node, op_desc, node_index, ge_lib, - ops_kernel_manager, task_def_list, op_name_map, profiling_point, all_reduce_nodes}; + ops_kernel_manager, task_def_list, op_name_map, profiling_point, all_reduce_nodes, all_reduce_node_idx}; GE_CHK_STATUS_RET(GenerateTaskForFusionNode(fusion_task_info, fusion_nodes, fusion_nodes_seen), "Call GenerateTaskForFusionNode node:%s(%s) failed", name.c_str(), type.c_str()); // continue directly @@ -316,7 +317,8 @@ Status TaskGenerator::GenerateTask(RunContext &run_context, ComputeGraphPtr &gra type.c_str()); // Profiling task size_t task_list_size_before = task_def_list.size(); - GE_CHK_STATUS_RET(InsertProfilingTaskBefore(op_desc, profiling_point, all_reduce_nodes, node_index, task_def_list)); + GE_CHK_STATUS_RET(InsertProfilingTaskBefore(op_desc, profiling_point, all_reduce_nodes, + node_index, task_def_list, all_reduce_node_idx)); int64_t op_id = op_desc->GetId(); // Compatible with dynamic shape scenes, the default is 0 int64_t stream_id = 0; @@ -336,8 +338,8 @@ Status TaskGenerator::GenerateTask(RunContext &run_context, ComputeGraphPtr &gra return ret; } // Profiling task - GE_CHK_STATUS_RET(InsertProfilingTaskAfter(op_desc, profiling_point, all_reduce_nodes, node_index, task_def_list)); - + GE_CHK_STATUS_RET(InsertProfilingTaskAfter(op_desc, profiling_point, all_reduce_nodes, + node_index, task_def_list, all_reduce_node_idx)); size_t task_list_size_after = task_def_list.size(); // If tasks is reduced if (task_list_size_after < task_list_size_before) { @@ -380,6 +382,7 @@ Status TaskGenerator::GenerateTaskForFusionNode(FusionTaskInfo &fusion_task_info auto &op_name_map = fusion_task_info.op_name_map; auto &profiling_point = fusion_task_info.profiling_point; auto &all_reduce_nodes = fusion_task_info.all_reduce_nodes; + auto &all_reduce_idx = fusion_task_info.all_reduce_node_idx; // If op_desc have this attr, call nodes with same group key in a stream together if (ge::AttrUtils::GetInt(fusion_op_desc, ATTR_NAME_FUSION_GROUP_KEY, group_key) && (fusion_nodes_seen.count(node.get()) == 0)) { @@ -426,7 +429,8 @@ Status TaskGenerator::GenerateTaskForFusionNode(FusionTaskInfo &fusion_task_info return INTERNAL_ERROR; } // profiling task - (void)InsertProfilingTaskBefore(op_desc, profiling_point, all_reduce_nodes, node_index, task_def_list); + (void)InsertProfilingTaskBefore(op_desc, profiling_point, all_reduce_nodes, + node_index, task_def_list, all_reduce_idx); run_context.stream = run_context.graphStreamList[stream_id]; GELOGI("Fusion: Call %s to generate fusion_node:[fusion_node_name:%s(%s), id:%ld, stream_id:%ld] task.", op_kernel_lib_name.c_str(), fusion_node_name.c_str(), fusion_node_type.c_str(), op_id, stream_id); @@ -439,7 +443,8 @@ Status TaskGenerator::GenerateTaskForFusionNode(FusionTaskInfo &fusion_task_info return ret; } // profiling task - (void)InsertProfilingTaskAfter(op_desc, profiling_point, all_reduce_nodes, node_index, task_def_list); + (void)InsertProfilingTaskAfter(op_desc, profiling_point, all_reduce_nodes, + node_index, task_def_list, all_reduce_idx); size_t task_list_size_after = task_def_list.size(); // if tasks is reduced if (task_list_size_after < task_list_size_before) { @@ -830,6 +835,11 @@ Status TaskGenerator::GetFpBpIndex(const ComputeGraphPtr &graph, ProfilingPoint return SUCCESS; } +Status TaskGenerator::FindProfilingNodeIndex(const ComputeGraphPtr &graph, ProfilingPoint &profiling_point, + std::vector &all_reduce_nodes) { + return FindProfilingTaskIndex(graph, profiling_point, all_reduce_nodes); +} + Status TaskGenerator::FindProfilingTaskIndex(const ComputeGraphPtr &graph, ProfilingPoint &profiling_point, vector &all_reduce_nodes) const { GE_CHECK_NOTNULL(graph); @@ -840,7 +850,6 @@ Status TaskGenerator::FindProfilingTaskIndex(const ComputeGraphPtr &graph, Profi GELOGD("Profiling is not open."); return SUCCESS; } - GELOGI("Start get FP/BP index."); std::string fp_point_str; std::string bp_point_str; @@ -878,18 +887,27 @@ Status TaskGenerator::FindProfilingTaskIndex(const ComputeGraphPtr &graph, Profi return SUCCESS; } - Status TaskGenerator::InsertProfilingTaskBefore(const OpDescPtr &op_desc, const ProfilingPoint &profiling_point, vector &all_reduce_nodes, uint32_t node_index, - vector &task_def_list) { + vector &task_def_list, uint64_t &all_reduce_node_idx) { const char *profiling_mode = std::getenv(kProfilingMode); bool is_profiling = (profiling_mode != nullptr) || ProfilingManager::Instance().ProfilingOn() || ProfilingManager::Instance().ProfilingTrainingTraceOn(); - if (!is_profiling || (profiling_point.fp_index == 0) || (profiling_point.bp_index == 0) || - (profiling_point.end_index.empty())) { + bool is_insert_fp_profiling_task = false; + (void)ge::AttrUtils::GetBool(op_desc, ATTR_NAME_INSERT_FP_PROFILILNG_TASK, is_insert_fp_profiling_task); + bool is_insert_bp_profiling_task = false; + (void)ge::AttrUtils::GetBool(op_desc, ATTR_NAME_INSERT_BP_PROFILILNG_TASK, is_insert_bp_profiling_task); + bool no_insert_profiling_task = ((profiling_point.fp_index == 0) || (profiling_point.bp_index == 0) || + (profiling_point.end_index.empty())) && + (!(is_insert_fp_profiling_task || is_insert_bp_profiling_task)); + if (!is_profiling || no_insert_profiling_task) { return SUCCESS; } - if (profiling_point.fp_index == node_index) { + GELOGD("Insert fp profiling task: %d, insert bp profiling task: %d, fp index: %u, bp index: %u, end index size: %zu", + is_insert_fp_profiling_task, is_insert_bp_profiling_task, profiling_point.fp_index, profiling_point.bp_index, + profiling_point.end_index.size()); + + if ((profiling_point.fp_index == node_index) || is_insert_fp_profiling_task) { uint64_t jobid_log_id = ge::GetContext().TraceId(); GELOGI("The first FP operator is %s, idx %u, job_id %lu", op_desc->GetName().c_str(), node_index, jobid_log_id); @@ -913,22 +931,40 @@ Status TaskGenerator::InsertProfilingTaskBefore(const OpDescPtr &op_desc, const task_def_list.emplace_back(fp_task_def); } - for (size_t i = 0; i < all_reduce_nodes.size(); i++) { - if (all_reduce_nodes[i] != node_index) { - continue; + bool is_all_reduce = (op_desc->GetType() == HCOMALLREDUCE || op_desc->GetType() == HVDCALLBACKALLREDUCE); + uint64_t all_reduce_task_idx = 0; + bool is_insert_all_reduce_task = false; + if (is_all_reduce && is_insert_bp_profiling_task) { + all_reduce_task_idx = all_reduce_node_idx; + is_insert_all_reduce_task = true; + } + if (is_all_reduce) { + all_reduce_node_idx++; + } + if (!is_insert_all_reduce_task) { + for (size_t i = 0; i < all_reduce_nodes.size(); i++) { + if (all_reduce_nodes[i] == node_index) { + all_reduce_task_idx = i; + is_insert_all_reduce_task = true; + break; + } } + } + + if (is_insert_all_reduce_task) { GELOGI("The start allreduce operator is %s, idx %u", op_desc->GetName().c_str(), node_index); TaskDef ar_task_def; ar_task_def.set_type(RT_MODEL_TASK_PROFILER_TRACE); ar_task_def.set_stream_id(op_desc->GetStreamId()); LogTimeStampDef *ar_log_def = ar_task_def.mutable_log_timestamp(); if (ar_log_def != nullptr) { - GE_IF_BOOL_EXEC(TypeUtils::CheckUint64MulOverflow(i, kProfilingArStep), + GE_IF_BOOL_EXEC(TypeUtils::CheckUint64MulOverflow(all_reduce_task_idx, kProfilingArStep), GELOGE(FAILED, "Multiply result is out of range."); return FAILED); - auto log_id = i * kProfilingArStep + kProfilingArStartLogid; + auto log_id = all_reduce_task_idx * kProfilingArStep + kProfilingArStartLogid; ar_log_def->set_logid(log_id); ar_log_def->set_notify(false); + (void)ge::AttrUtils::SetInt(op_desc, ATTR_NAME_INSERT_PROFILILNG_TASK_LOG_ID, log_id); } task_def_list.push_back(ar_task_def); } @@ -937,16 +973,27 @@ Status TaskGenerator::InsertProfilingTaskBefore(const OpDescPtr &op_desc, const Status TaskGenerator::InsertProfilingTaskAfter(const OpDescPtr &op_desc, const ProfilingPoint &profiling_point, vector &all_reduce_nodes, uint32_t node_index, - vector &task_def_list) { + vector &task_def_list, uint64_t all_reduce_node_idx) { GE_CHECK_NOTNULL(op_desc); const char *profiling_mode = std::getenv(kProfilingMode); bool is_profiling = (profiling_mode != nullptr) || ProfilingManager::Instance().ProfilingOn() || ProfilingManager::Instance().ProfilingTrainingTraceOn(); - if (!is_profiling || (profiling_point.fp_index == 0) || (profiling_point.bp_index == 0) || - (profiling_point.end_index.empty())) { + bool is_insert_bp_profiling_task = false; + (void)ge::AttrUtils::GetBool(op_desc, ATTR_NAME_INSERT_BP_PROFILILNG_TASK, is_insert_bp_profiling_task); + bool is_insert_end_profiling_task = false; + (void)ge::AttrUtils::GetBool(op_desc, ATTR_NAME_INSERT_END_PROFILILNG_TASK, is_insert_end_profiling_task); + bool no_insert_profiling_task = ((profiling_point.fp_index == 0) || (profiling_point.bp_index == 0) || + (profiling_point.end_index.empty())) && + (!(is_insert_bp_profiling_task || is_insert_end_profiling_task)); + if (!is_profiling || no_insert_profiling_task) { return SUCCESS; } - if (profiling_point.bp_index == node_index) { + GELOGD("Insert bp profiling task: %d, insert end profiling task: %d, fp index: %u, bp index: %u, end index size: %zu", + is_insert_bp_profiling_task, is_insert_end_profiling_task, profiling_point.fp_index, profiling_point.bp_index, + profiling_point.end_index.size() ); + + bool is_all_reduce = (op_desc->GetType() == HCOMALLREDUCE || op_desc->GetType() == HVDCALLBACKALLREDUCE); + if ((profiling_point.bp_index == node_index) || (!is_all_reduce && is_insert_bp_profiling_task)) { GELOGI("The last BP operator is %s, idx %u", op_desc->GetName().c_str(), node_index); TaskDef bp_task_def; bp_task_def.set_type(RT_MODEL_TASK_PROFILER_TRACE); @@ -957,7 +1004,9 @@ Status TaskGenerator::InsertProfilingTaskAfter(const OpDescPtr &op_desc, const P bp_log_def->set_notify(false); task_def_list.emplace_back(bp_task_def); } - if (profiling_point.end_index.find(node_index) != profiling_point.end_index.end()) { + + if (profiling_point.end_index.find(node_index) != profiling_point.end_index.end() || + is_insert_end_profiling_task) { GELOGI("The iteration end operator is %s, idx %u", op_desc->GetName().c_str(), node_index); TaskDef end_task_def; end_task_def.set_type(RT_MODEL_TASK_PROFILER_TRACE); @@ -969,20 +1018,32 @@ Status TaskGenerator::InsertProfilingTaskAfter(const OpDescPtr &op_desc, const P task_def_list.emplace_back(end_task_def); } + uint32_t all_reduce_task_idx = 0; + bool is_insert_all_reduce_task = false; + if (is_all_reduce && is_insert_bp_profiling_task) { + all_reduce_task_idx = all_reduce_node_idx; + is_insert_all_reduce_task = true; + } + for (size_t i = 0; i < all_reduce_nodes.size(); i++) { - if (all_reduce_nodes[i] != node_index) { - continue; + if (all_reduce_nodes[i] == node_index) { + all_reduce_task_idx = i; + is_insert_all_reduce_task = true; + break; } + } + + if (is_insert_all_reduce_task) { GELOGI("The end allreduce operator is %s, idx %u", op_desc->GetName().c_str(), node_index); TaskDef ar_task_def; ar_task_def.set_type(RT_MODEL_TASK_PROFILER_TRACE); ar_task_def.set_stream_id(op_desc->GetStreamId()); LogTimeStampDef *ar_log_def = ar_task_def.mutable_log_timestamp(); GE_CHECK_NOTNULL(ar_log_def); - GE_IF_BOOL_EXEC(TypeUtils::CheckUint64MulOverflow(i, kProfilingArStep), + GE_IF_BOOL_EXEC(TypeUtils::CheckUint64MulOverflow(all_reduce_task_idx, kProfilingArStep), GELOGE(FAILED, "Multiply result is out of range."); return FAILED); - auto log_id = i * kProfilingArStep + kProfilingArEndLogid; + auto log_id = all_reduce_task_idx * kProfilingArStep + kProfilingArEndLogid; ar_log_def->set_logid(log_id); ar_log_def->set_notify(false); task_def_list.emplace_back(ar_task_def); diff --git a/ge/graph/build/task_generator.h b/ge/graph/build/task_generator.h index c93b2007..5970954c 100755 --- a/ge/graph/build/task_generator.h +++ b/ge/graph/build/task_generator.h @@ -51,6 +51,7 @@ struct FusionTaskInfo { std::map &op_name_map; ProfilingPoint &profiling_point; vector all_reduce_nodes; + uint64_t all_reduce_node_idx; }; class TaskGenerator { @@ -76,6 +77,8 @@ class TaskGenerator { /// Status GetTaskInfo(Model &model, ComputeGraphPtr &graph, uint64_t session_id, RunContext &run_context); + Status FindProfilingNodeIndex(const ComputeGraphPtr &graph, ProfilingPoint &profiling_point, + std::vector &all_reduce_nodes); private: Status UpdateAnchorStatus(const NodePtr &node); @@ -126,10 +129,10 @@ class TaskGenerator { std::vector &all_reduce_nodes) const; Status InsertProfilingTaskBefore(const OpDescPtr &op_desc, const ProfilingPoint &profiling_point, std::vector &all_reduce_nodes, uint32_t node_index, - std::vector &task_def_list); + std::vector &task_def_list, uint64_t &all_reduce_node_idx); Status InsertProfilingTaskAfter(const OpDescPtr &op_desc, const ProfilingPoint &profiling_point, std::vector &all_reduce_nodes, uint32_t node_index, - std::vector &task_def_list); + std::vector &task_def_list, uint64_t all_reduce_node_idx); static bool IsProfPoint(const OpDescPtr &op, const std::string &name); diff --git a/ge/graph/execute/graph_execute.cc b/ge/graph/execute/graph_execute.cc index 97e2fd1b..3c5618e8 100755 --- a/ge/graph/execute/graph_execute.cc +++ b/ge/graph/execute/graph_execute.cc @@ -560,34 +560,10 @@ Status GraphExecutor::GetModelAttr(uint32_t model_id, std::vector &dynam return SUCCESS; } -Status GraphExecutor::GetInputOutputDescInfoForZeroCopy(uint32_t model_id, vector &input_desc, - vector &output_desc, - std::vector &input_formats, - std::vector &out_formats) { - try { - auto model_manager = ge::ModelManager::GetInstance(); - GE_CHECK_NOTNULL(model_manager); - Status ret = - model_manager->GetInputOutputDescInfoForZeroCopy(model_id, input_desc, output_desc, input_formats, out_formats); - if (ret != SUCCESS) { - GELOGE(ret, "GetInputOutputDescInfoForZeroCopy failed."); - return ret; - } - } catch (std::bad_alloc &) { - GELOGE(MEMALLOC_FAILED, "GetInputOutputDescInfoForZeroCopy failed, bad memory allocation occur !"); - return MEMALLOC_FAILED; - } catch (...) { - GELOGE(FAILED, "GetInputOutputDescInfoForZeroCopy failed, some exceptions occur !"); - return FAILED; - } - - return SUCCESS; -} - -Status GraphExecutor::GetAIPPInfo(uint32_t model_id, uint32_t index, AippConfigInfo &aipp_info) { +Status GraphExecutor::GetAippInfo(uint32_t model_id, uint32_t index, AippConfigInfo &aipp_info) { auto model_manager = ge::ModelManager::GetInstance(); GE_CHECK_NOTNULL(model_manager); - Status ret = model_manager->GetAIPPInfo(model_id, index, aipp_info); + Status ret = model_manager->GetAippInfo(model_id, index, aipp_info); if (ret != SUCCESS) { GELOGW("GetAIPPInfo is not success."); return ret; diff --git a/ge/graph/execute/graph_execute.h b/ge/graph/execute/graph_execute.h index efc30743..d2a92e47 100755 --- a/ge/graph/execute/graph_execute.h +++ b/ge/graph/execute/graph_execute.h @@ -73,7 +73,7 @@ class GraphExecutor { vector &output_desc, std::vector &input_formats, std::vector &output_formats, bool new_model_desc = false); - static Status GetAIPPInfo(uint32_t model_id, uint32_t index, AippConfigInfo &aipp_info); + static Status GetAippInfo(uint32_t model_id, uint32_t index, AippConfigInfo &aipp_info); static Status GetAippType(uint32_t model_id, uint32_t index, InputAippType &type, size_t &aipp_index); @@ -110,10 +110,6 @@ class GraphExecutor { static Status GetModelAttr(uint32_t model_id, std::vector &dynamic_output_shape_info); - static Status GetInputOutputDescInfoForZeroCopy(uint32_t model_id, vector &input_desc, - vector &output_desc, - std::vector &input_formats, - std::vector &output_formats); static Status GetOrigInputInfo(uint32_t model_id, uint32_t index, OriginInputInfo &orig_input_info); static Status GetAllAippInputOutputDims(uint32_t model_id, uint32_t index, std::vector &input_dims, std::vector &output_dims); diff --git a/ge/graph/load/new_model_manager/davinci_model.cc b/ge/graph/load/new_model_manager/davinci_model.cc index f3d6f82b..37a39308 100755 --- a/ge/graph/load/new_model_manager/davinci_model.cc +++ b/ge/graph/load/new_model_manager/davinci_model.cc @@ -75,7 +75,6 @@ namespace ge { namespace { const uint32_t kDataIndex = 0; -const uint32_t kOutputNum = 1; const uint32_t kTrueBranchStreamNum = 1; const uint32_t kGetDynamicDimsCount = 1; const uint32_t kThreadNum = 16; @@ -87,6 +86,7 @@ const uint32_t kDumpL1FusionOpMByteSize = 2097152; // 2 * 1024 * 1024 const uint32_t kDumpFlagOfL1Fusion = 0; const char *const kDefaultBatchLable = "Batch_default"; const char *const kGetDynamicDimsName = "ascend_mbatch_get_dynamic_dims_node"; +const char *const kMultiBatchNodePostfix = "_ascend_mbatch_batch_"; const int32_t kInvalidStream = -1; const uint32_t kEndOfSequence = 0x0704000a; const uint32_t kEndOfSequenceNew = 507005; @@ -155,7 +155,6 @@ DavinciModel::~DavinciModel() { GE_CHK_STATUS(ModelRunStop()); op_list_.clear(); - data_op_list_.clear(); tensor_name_to_fixed_addr_size_.clear(); tensor_name_to_peer_output_index_.clear(); GE_DELETE_NEW_SINGLE(data_inputer_); @@ -867,13 +866,17 @@ Status DavinciModel::InitNodes(const ComputeGraphPtr &compute_graph) { GELOGE(PARAM_INVALID, "NetOutput init failed, Name: %s", op_desc->GetName().c_str()); return PARAM_INVALID; } + if (InitRealSizeAndShapeInfo(compute_graph, node) != SUCCESS) { + GELOGE(PARAM_INVALID, "Init real size and shape failed, Name: %s", op_desc->GetName().c_str()); + return PARAM_INVALID; + } continue; } auto it = op_desc_handle.find(op_desc->GetType()); if (it != op_desc_handle.end()) { if ((this->*it->second)(op_desc) != SUCCESS) { - GELOGE(PARAM_INVALID, "NetOutput init failed, Name: %s", op_desc->GetName().c_str()); + GELOGE(PARAM_INVALID, "Node init failed, Name: %s", op_desc->GetName().c_str()); return PARAM_INVALID; } continue; @@ -926,7 +929,7 @@ Status DavinciModel::InitNodes(const ComputeGraphPtr &compute_graph) { GE_TIMESTAMP_CALLNUM_END(LoadTBEKernelBinToOpDesc, "GraphLoader::LoadTBEKernelBinToOpDesc."); GE_TIMESTAMP_CALLNUM_END(InitTbeHandle, "GraphLoader::InitTbeHandle."); - return OptInputOutputInfo(data_by_index, output_op_list); + return GenInputOutputInfo(data_by_index, output_op_list); } void DavinciModel::SetLabelForDynamic(const NodePtr &node) { @@ -969,9 +972,6 @@ Status DavinciModel::InitDataOp(const ComputeGraphPtr &graph, const NodePtr &nod } data_by_index[data_index] = op_desc; - auto data_op = AttrUtils::CopyOpDesc(op_desc); - GE_CHECK_NOTNULL(data_op); - data_op_list_.push_back(data_op); if (known_node_) { return SUCCESS; } @@ -1017,23 +1017,18 @@ Status DavinciModel::InitDataOp(const ComputeGraphPtr &graph, const NodePtr &nod /// @param [in] output_op_list: list of NetOutput op. /// @return Status /// -Status DavinciModel::OptInputOutputInfo(const map &data_by_index, +Status DavinciModel::GenInputOutputInfo(const map &data_by_index, const vector &output_op_list) { - GELOGD("Data node size: %zu, NetOutput node size: %zu", data_op_list_.size(), output_op_list.size()); - if (data_by_index.size() != data_op_list_.size()) { - GELOGE(INTERNAL_ERROR, "Data map size: %zu, Data list size: %zu.", data_by_index.size(), data_op_list_.size()); - return INTERNAL_ERROR; - } - - data_op_list_.clear(); + GELOGD("Data node size: %zu, NetOutput node size: %zu", data_by_index.size(), output_op_list.size()); for (auto &item : data_by_index) { - auto data_op = AttrUtils::CopyOpDesc(item.second); - GE_CHECK_NOTNULL(data_op); - data_op_list_.emplace_back(data_op); auto output_addrs = ModelUtils::GetOutputDataAddrs(runtime_param_, item.second); GELOGD("Data node: %s, output addr size: %zu", item.second->GetName().c_str(), output_addrs.size()); input_addrs_list_.emplace_back(output_addrs); + GE_CHK_STATUS_RET(InitAippInfo(item.first, item.second), "Init AIPP Info failed"); + GE_CHK_STATUS_RET(InitAippType(item.first, item.second, data_by_index), "Init AIPP Type failed"); + GE_CHK_STATUS_RET(InitOrigInputInfo(item.first, item.second), "Init Orig input failed"); + GE_CHK_STATUS_RET(InitAippInputOutputDims(item.first, item.second), "Init AIPP dims failed"); if (item.second->GetType() == AIPP_DATA_TYPE) { GELOGI("This is dynamic aipp model, Node: %s", item.second->GetName().c_str()); is_dynamic_aipp_ = true; @@ -1061,7 +1056,8 @@ Status DavinciModel::OptInputOutputInfo(const map &data_by_ } } - return InitOutputDescInfo(output_op_list, output_descs_, output_formats_); + GE_CHK_STATUS_RET(InitInputDescInfo(data_by_index), "Init input desc info failed"); + return InitOutputDescInfo(output_op_list); } bool DavinciModel::IsGetNextSinkDynamic(const OpDescPtr &op_desc) { @@ -1143,16 +1139,24 @@ Status DavinciModel::InitNetOutput(const ComputeGraphPtr &graph, const NodePtr & real_virtual_addrs_.insert(real_addr); } } + return SUCCESS; +} +Status DavinciModel::InitRealSizeAndShapeInfo(const ComputeGraphPtr &compute_graph, const NodePtr &node) { + if (node->GetName().find(kMultiBatchNodePostfix) != string::npos) { + GELOGD("No need to get size and shape of netoutput in subgraph."); + return SUCCESS; + } + GELOGD("Start init real size and shape info of %s.", node->GetName().c_str()); GetAllGearsInfo(node); if (is_getnext_sink_dynamic_) { GE_IF_BOOL_EXEC(GetGetDynamicDimsNodeInfo(node) != SUCCESS, GELOGE(PARAM_INVALID, "Failed to get info of getdynamicdims node."); return PARAM_INVALID;); } if (is_online_infer_dynamic_) { - GE_IF_BOOL_EXEC(GetGearAndRealOutSizeInfo(input_count, node) != SUCCESS, + GE_IF_BOOL_EXEC(GetGearAndRealOutSizeInfo(compute_graph, node) != SUCCESS, GELOGE(PARAM_INVALID, "Failed to get gear and real out size info."); return PARAM_INVALID;); - GE_IF_BOOL_EXEC(GetGearAndRealOutShapeInfo(input_count, op_desc) != SUCCESS, + GE_IF_BOOL_EXEC(GetGearAndRealOutShapeInfo(compute_graph, node) != SUCCESS, GELOGE(PARAM_INVALID, "Failed to get gear and real out shape info."); return PARAM_INVALID;); } @@ -1171,7 +1175,7 @@ void DavinciModel::GetAllGearsInfo(const NodePtr &node) { if (shape_str.empty()) { continue; } - std::vector gear_info; + std::vector gear_info; std::vector dims = ge::StringUtils::Split(shape_str, ','); for (const auto &dim : dims) { if (dim.empty()) { @@ -1187,6 +1191,7 @@ void DavinciModel::GetAllGearsInfo(const NodePtr &node) { } } } + Status DavinciModel::GetGetDynamicDimsNodeInfo(const NodePtr &node) { GE_CHECK_NOTNULL(node->GetOpDesc()); size_t input_count = node->GetAllInDataAnchors().size(); @@ -1224,11 +1229,11 @@ Status DavinciModel::GetGetDynamicDimsNodeInfo(const NodePtr &node) { return SUCCESS; } -Status DavinciModel::GetGearAndRealOutSizeInfo(size_t input_count, const NodePtr &node) { - GELOGD("Start get gear and real output size info of %s, input count is %zu.", node->GetName().c_str(), input_count); +Status DavinciModel::GetGearAndRealOutSizeInfo(const ComputeGraphPtr &graph, const NodePtr &node) { + GELOGD("Start get gear and real output size info of %s.", node->GetName().c_str()); merge_nodes_gear_and_real_out_size_info_.clear(); - for (size_t idx = 0; idx < input_count; ++idx) { - auto in_anchor = node->GetAllInDataAnchors().at(idx); + size_t idx = 0; + for (const auto &in_anchor : node->GetAllInDataAnchors()) { auto peer_out_anchor = in_anchor->GetPeerOutAnchor(); if (peer_out_anchor == nullptr) { continue; @@ -1236,89 +1241,106 @@ Status DavinciModel::GetGearAndRealOutSizeInfo(size_t input_count, const NodePtr auto peer_node = peer_out_anchor->GetOwnerNode(); auto op_desc = peer_node->GetOpDesc(); GE_CHECK_NOTNULL(op_desc); - if ((peer_node->GetType() == MERGE) && (op_desc->HasAttr(ATTR_INSERT_BY_MBATCH))) { - if (GetRealOutputSizeOfMerge(idx, peer_node) != SUCCESS) { + if ((peer_node->GetType() == CASE) && (op_desc->HasAttr(ATTR_INSERT_BY_MBATCH))) { + if (GetRealOutputSizeOfCase(graph, idx, peer_node) != SUCCESS) { GELOGE(PARAM_INVALID, "Get real output size of %s failed.", peer_node->GetName().c_str()); return PARAM_INVALID; } } + idx++; } return SUCCESS; } -Status DavinciModel::GetRealOutputSizeOfMerge(size_t input_index, const NodePtr &merge_node) { - GELOGD("Start get output size of %s, which is %zu input to netoutput.", merge_node->GetName().c_str(), input_index); - std::map, int64_t> gear_and_real_out_size_info; - for (auto &in_anchor : merge_node->GetAllInDataAnchors()) { - auto peer_out_anchor = in_anchor->GetPeerOutAnchor(); - if (peer_out_anchor == nullptr) { - continue; - } - auto in_node = peer_out_anchor->GetOwnerNode(); - GELOGD("Input node of merge is %s.", in_node->GetName().c_str()); - auto op_desc = in_node->GetOpDesc(); - GE_CHECK_NOTNULL(op_desc); - string batch_label; - if (AttrUtils::GetStr(op_desc, ATTR_NAME_BATCH_LABEL, batch_label)) { - size_t batch_index = static_cast(stoi(batch_label.substr(batch_label.rfind('_') + 1))); - GELOGD("Batch index of %s is %zu.", op_desc->GetName().c_str(), batch_index); - if (batch_index > all_gears_info_.size()) { - GELOGE(PARAM_INVALID, "The value of ATTR_NAME_BATCH_LABEL is invalid."); - return PARAM_INVALID; - } - - const vector output_size_list = ModelUtils::GetOutputSize(op_desc); - int output_index = ge::AnchorUtils::GetIdx(peer_out_anchor); - auto tensor_desc = op_desc->GetOutputDescPtr(output_index); - GE_CHECK_NOTNULL(tensor_desc); - int64_t data_size = 0; - if (TensorUtils::GetTensorSizeInBytes(*tensor_desc, data_size) != GRAPH_SUCCESS) { - GELOGE(FAILED, "Get tensor size in bytes failed."); - return FAILED; +Status DavinciModel::GetRealOutputSizeOfCase(const ComputeGraphPtr &graph, size_t input_index, + const NodePtr &case_node) { + GELOGD("Start get output size of %s, which is %zu input to netoutput.", case_node->GetName().c_str(), input_index); + const auto &func_desc = case_node->GetOpDesc(); + GE_CHECK_NOTNULL(func_desc); + std::map, int64_t> gear_and_real_out_size_info; + for (const auto &name : func_desc->GetSubgraphInstanceNames()) { + const auto &subgraph = graph->GetSubgraph(name); + if (subgraph == nullptr) { + GELOGE(GE_GRAPH_EMPTY_SUBGRAPH, "Subgraph not found, name: %s.", name.c_str()); + return GE_GRAPH_EMPTY_SUBGRAPH; + } + for (auto &node : subgraph->GetDirectNode()) { + if (node->GetType() == NETOUTPUT) { + auto op_desc = node->GetOpDesc(); + GE_CHECK_NOTNULL(op_desc); + string batch_label; + if (AttrUtils::GetStr(op_desc, ATTR_NAME_BATCH_LABEL, batch_label)) { + size_t batch_index = static_cast(stoi(batch_label.substr(batch_label.rfind('_') + 1))); + GELOGD("Batch index of %s is %zu.", op_desc->GetName().c_str(), batch_index); + if (batch_index > all_gears_info_.size()) { + GELOGE(PARAM_INVALID, "The value of ATTR_NAME_BATCH_LABEL is invalid."); + return PARAM_INVALID; + } + + const vector input_size_list = ModelUtils::GetInputSize(op_desc); + auto tensor_desc = op_desc->GetInputDescPtr(input_index); + GE_CHECK_NOTNULL(tensor_desc); + int64_t data_size = 0; + if (TensorUtils::GetTensorSizeInBytes(*tensor_desc, data_size) != GRAPH_SUCCESS) { + GELOGE(FAILED, "Get tensor size in bytes failed."); + return FAILED; + } + gear_and_real_out_size_info[all_gears_info_[batch_index]] = data_size; + GELOGD("Get real gear index is: %zu, gear info is %s, size is %ld, tensor size is %ld", + batch_index, formats::JoinToString(all_gears_info_[batch_index]).c_str(), + input_size_list[input_index], data_size); + } + break; } - gear_and_real_out_size_info[all_gears_info_[batch_index]] = data_size; - GELOGD("Get real gear index is: %zu, gear info is %s, size is %ld, tensor size is %ld", - batch_index, formats::JoinToString(all_gears_info_[batch_index]).c_str(), - output_size_list[output_index], data_size); } } merge_nodes_gear_and_real_out_size_info_[input_index] = gear_and_real_out_size_info; return SUCCESS; } -Status DavinciModel::GetGearAndRealOutShapeInfo(size_t input_count, const OpDescPtr &op_desc) { - GELOGD("Start to get dynamic output dims of %s.", op_desc->GetName().c_str()); +Status DavinciModel::GetGearAndRealOutShapeInfo(const ComputeGraphPtr &graph, const NodePtr &node) { + GELOGD("Start to get dynamic output dims of %s.", node->GetName().c_str()); merge_nodes_gear_and_real_out_shape_info_.clear(); - std::vector dynamic_output_shape_info; - if (!AttrUtils::GetListStr(op_desc, ATTR_NAME_DYNAMIC_OUTPUT_DIMS, dynamic_output_shape_info)) { - GELOGD("Can not get dynamic output dims attr"); - return SUCCESS; - } - GELOGI("Dynamic output shape info is %s", formats::JoinToString(dynamic_output_shape_info).c_str()); - std::vector> dynamic_output_shape; - ParseDynamicOutShape(dynamic_output_shape_info, dynamic_output_shape); - // idx: input_index to netoutput - for (size_t idx = 0; idx < input_count; ++idx) { - std::map, vector> gear_and_real_out_shape_info; - for (auto &it : dynamic_output_shape) { - auto gear_index = static_cast(it[0]); - if (gear_index > all_gears_info_.size()) { - GELOGE(PARAM_INVALID, "The value of cur index: %zu is invalid.", static_cast(it[0])); - return PARAM_INVALID; + size_t idx = 0; + for (const auto &in_anchor : node->GetAllInDataAnchors()) { + auto peer_out_anchor = in_anchor->GetPeerOutAnchor(); + if (peer_out_anchor == nullptr) { + continue; + } + auto peer_node = peer_out_anchor->GetOwnerNode(); + auto op_desc = peer_node->GetOpDesc(); + GE_CHECK_NOTNULL(op_desc); + if ((peer_node->GetType() == CASE) && (op_desc->HasAttr(ATTR_INSERT_BY_MBATCH))) { + std::vector dynamic_output_shape_info; + if (!AttrUtils::GetListStr(node->GetOpDesc(), ATTR_NAME_DYNAMIC_OUTPUT_DIMS, dynamic_output_shape_info)) { + GELOGD("Can not get dynamic output dims attr from %s.", node->GetName().c_str()); + return SUCCESS; } + GELOGI("Dynamic output shape info is %s", formats::JoinToString(dynamic_output_shape_info).c_str()); + std::vector> dynamic_output_shape; + ParseDynamicOutShape(dynamic_output_shape_info, dynamic_output_shape); + std::map, vector> gear_and_real_out_shape_info; + for (auto &it : dynamic_output_shape) { + auto gear_index = static_cast(it[0]); + if (gear_index > all_gears_info_.size()) { + GELOGE(PARAM_INVALID, "The value of cur index: %zu is invalid.", static_cast(it[0])); + return PARAM_INVALID; + } - if (static_cast(it[1]) == idx) { - vector output_shape; - for (size_t i = 2; i < it.size(); ++i) { - output_shape.emplace_back(it[i]); + if (static_cast(it[1]) == idx) { + vector output_shape; + for (size_t i = 2; i < it.size(); ++i) { + output_shape.emplace_back(it[i]); + } + gear_and_real_out_shape_info[all_gears_info_[gear_index]] = output_shape; + GELOGD("Get real gear index is: %zu, gear info is %s, output shape is %s.", + gear_index, formats::JoinToString(all_gears_info_[gear_index]).c_str(), + formats::JoinToString(output_shape).c_str()); } - gear_and_real_out_shape_info[all_gears_info_[gear_index]] = output_shape; - GELOGD("Get real gear index is: %zu, gear info is %s, output shape is %s.", - gear_index, formats::JoinToString(all_gears_info_[gear_index]).c_str(), - formats::JoinToString(output_shape).c_str()); } + merge_nodes_gear_and_real_out_shape_info_[idx] = gear_and_real_out_shape_info; } - merge_nodes_gear_and_real_out_shape_info_[idx] = gear_and_real_out_shape_info; + idx++; } return SUCCESS; } @@ -1760,73 +1782,101 @@ void DavinciModel::GetUserDesignateShapeOrder(std::vector &user_inp /// @ingroup ge /// @brief Get AIPP input info /// @param [in] index -/// @param [out] aipp_info +/// @param [int] OpDescPtr /// @return execute result /// -Status DavinciModel::GetAIPPInfo(uint32_t index, AippConfigInfo &aipp_info) { - GE_CHK_BOOL_RET_STATUS(index < data_op_list_.size(), PARAM_INVALID, "Index %u is invalid.", index); - OpDescPtr data_op = data_op_list_[index]; - if (!data_op->HasAttr(ATTR_NAME_AIPP)) { - GELOGW("GetAIPPInfo: there is not AIPP related with index %u.", index); - return ACL_ERROR_GE_AIPP_NOT_EXIST; +Status DavinciModel::InitAippInfo(uint32_t index, const OpDescPtr &op_desc) { + if (!op_desc->HasAttr(ATTR_NAME_AIPP)) { + GELOGW("there is not AIPP related with index %u.", index); + return SUCCESS; } - std::unique_ptr aipp_params(new (std::nothrow) domi::AippOpParams()); - GE_CHECK_NOTNULL(aipp_params); - - ge::GeAttrValue::NAMED_ATTRS aipp_attr; - GE_CHK_BOOL_RET_STATUS(AttrUtils::GetNamedAttrs(data_op, ATTR_NAME_AIPP, aipp_attr), GE_AIPP_NOT_EXIST, + domi::AippOpParams aipp_params; + GeAttrValue::NAMED_ATTRS aipp_attr; + GE_CHK_BOOL_RET_STATUS(AttrUtils::GetNamedAttrs(op_desc, ATTR_NAME_AIPP, aipp_attr), GE_AIPP_NOT_EXIST, "Data node do not contain param aipp!"); - GE_CHK_STATUS_RET(OpUtils::ConvertAippParams(aipp_attr, aipp_params.get()), "get aipp params failed"); - GELOGI("GetAIPPInfo: node data: %s, type: %s, current index: %u, current node related input rank: %u", - data_op->GetName().c_str(), data_op->GetType().c_str(), index, aipp_params->related_input_rank()); + GE_CHK_STATUS_RET(OpUtils::ConvertAippParams(aipp_attr, &aipp_params), "get aipp params failed"); + GELOGI("node data: %s, type: %s, current index: %u, current node related input rank: %u", + op_desc->GetName().c_str(), op_desc->GetType().c_str(), index, aipp_params.related_input_rank()); - GE_CHK_STATUS_RET(AippUtils::ConvertAippParams2AippInfo(aipp_params.get(), aipp_info), + AippConfigInfo aipp_info; + GE_CHK_STATUS_RET(AippUtils::ConvertAippParams2AippInfo(&aipp_params, aipp_info), "convert aipp params to aipp config info failed"); + aipp_info_list_[index] = aipp_info; return SUCCESS; } -Status DavinciModel::GetAippType(uint32_t index, InputAippType &type, size_t &aipp_index) { - GE_CHK_BOOL_RET_STATUS(index < data_op_list_.size(), PARAM_INVALID, "Index %u is invalid.", index); - // Set default value - type = DATA_WITHOUT_AIPP; - aipp_index = 0xFFFFFFFF; // default invalid value - OpDescPtr data_op = data_op_list_[index]; - GE_CHECK_NOTNULL(data_op); - if (!data_op->HasAttr(ATTR_DATA_RELATED_AIPP_MODE)) { +/// +/// @ingroup ge +/// @brief Get AIPP input info +/// @param [in] index +/// @param [out] aipp_info +/// @return execute result +/// +Status DavinciModel::GetAippInfo(uint32_t index, AippConfigInfo &aipp_info) const { + const auto it = aipp_info_list_.find(index); + if (it == aipp_info_list_.end()) { + GELOGW("there is not AIPP related with index %u.", index); + return ACL_ERROR_GE_AIPP_NOT_EXIST; + } + + aipp_info = it->second; + return SUCCESS; +} + +Status DavinciModel::InitAippType(uint32_t index, const OpDescPtr &op_desc, const map &data_list) { + if (!op_desc->HasAttr(ATTR_DATA_RELATED_AIPP_MODE)) { GELOGW("There is no aipp releated info with index %u.", index); return SUCCESS; } - std::string data_mode; - (void)AttrUtils::GetStr(data_op, ATTR_DATA_RELATED_AIPP_MODE, data_mode); + + // Set default value + InputAippType aipp_type = DATA_WITHOUT_AIPP; + string data_mode; + (void)AttrUtils::GetStr(op_desc, ATTR_DATA_RELATED_AIPP_MODE, data_mode); if (data_mode == "static_aipp") { - type = DATA_WITH_STATIC_AIPP; + aipp_type = DATA_WITH_STATIC_AIPP; } else if (data_mode == "dynamic_aipp") { - type = DATA_WITH_DYNAMIC_AIPP; + aipp_type = DATA_WITH_DYNAMIC_AIPP; } else if (data_mode == "dynamic_aipp_conf") { - type = DYNAMIC_AIPP_NODE; + aipp_type = DYNAMIC_AIPP_NODE; } else { GELOGE(ACL_ERROR_GE_AIPP_MODE_INVALID, "The info of aipp releated info %s is invalid with index %u.", data_mode.c_str(), index); return ACL_ERROR_GE_AIPP_MODE_INVALID; } - if (type == DATA_WITH_DYNAMIC_AIPP) { + size_t aipp_index = 0xFFFFFFFF; // default invalid value + if (aipp_type == DATA_WITH_DYNAMIC_AIPP) { string releated_name; - (void)AttrUtils::GetStr(data_op, ATTR_DATA_AIPP_DATA_NAME_MAP, releated_name); - for (size_t i = 0; i < data_op_list_.size(); ++i) { - GE_CHECK_NOTNULL(data_op_list_[i]); - if (data_op_list_[i]->GetName() == releated_name) { - GELOGI("Find aipp_data [%s] index %zu from index %u", releated_name.c_str(), i, index); - aipp_index = i; + (void)AttrUtils::GetStr(op_desc, ATTR_DATA_AIPP_DATA_NAME_MAP, releated_name); + for (const auto item : data_list) { + if (item.second->GetName() == releated_name) { + GELOGI("Find aipp_data [%s] index %zu from index %u", releated_name.c_str(), item.first, index); + aipp_index = item.first; } } + if (aipp_index == 0xFFFFFFFF) { - GELOGE(ACL_ERROR_GE_AIPP_NOT_EXIST, "Can not find aipp data node from index %u", index); - return ACL_ERROR_GE_AIPP_NOT_EXIST; + GELOGW("Can not find aipp data node from index %u", index); + return SUCCESS; } } + + aipp_type_list_[index] = { aipp_type, aipp_index }; + return SUCCESS; +} + +Status DavinciModel::GetAippType(uint32_t index, InputAippType &aipp_type, size_t &aipp_index) const { + const auto it = aipp_type_list_.find(index); + if (it == aipp_type_list_.end()) { + GELOGW("There is no aipp releated info with index %u.", index); + return SUCCESS; + } + + aipp_type = it->second.first; + aipp_index = it->second.second; return SUCCESS; } @@ -1842,7 +1892,7 @@ void DavinciModel::SetDynamicSize(const std::vector &batch_num, int32_ dynamic_type_ = dynamic_type; } -void DavinciModel::GetCurShape(std::vector &batch_info, int32_t &dynamic_type) { +void DavinciModel::GetCurShape(std::vector &batch_info, int32_t &dynamic_type) const { if (batch_size_.empty()) { GELOGD("User does not set dynamic size"); } @@ -1854,38 +1904,10 @@ void DavinciModel::GetCurShape(std::vector &batch_info, int32_t &dynami dynamic_type = dynamic_type_; } -void DavinciModel::GetModelAttr(vector &out_shape_info) { +void DavinciModel::GetModelAttr(vector &out_shape_info) const { out_shape_info.insert(out_shape_info.end(), dynamic_output_shape_info_.begin(), dynamic_output_shape_info_.end()); } -Status DavinciModel::GetInputOutputDescInfoForZeroCopy(vector &input_desc, - vector &output_desc, - std::vector &input_formats, - std::vector &output_formats) { - if (input_addrs_list_.empty() || input_addrs_list_[0].size() != kOutputNum) { - GELOGE(FAILED, "OP List Pointer is null or input_desc size is not 1!"); - return FAILED; - } - - GE_CHK_STATUS_RET(GetInputDescInfo(input_desc, input_formats), "get input desc info failed"); - - GE_CHK_STATUS_RET(GetOutputDescInfo(output_desc, output_formats), "get ouput desc info failed"); - - GE_CHK_BOOL_RET_STATUS(output_desc.size() == output_memory_size_list_.size(), INTERNAL_ERROR, - "output_desc size[%zu] not equal output_size_list_[%zu] size!", output_desc.size(), - output_memory_size_list_.size()); - - /// For function zero copy,the momery should be aligned by 512 bytes. - /// And, because of the cce op limit, size should be lager than the real shape size. The memory should be padded by 32 - /// bytes. - /// *size equals to ((tensorDesc->dataSize + 2 * 32 - 1) / 32) * 32; - for (size_t i = 0; i < output_memory_size_list_.size(); i++) { - output_desc[i].size = output_memory_size_list_[i]; - } - - return SUCCESS; -} - void DavinciModel::SetInputDimsInfo(const vector &model_input_dims, Format &format, InputOutputDescInfo &input) { uint32_t n, c, h, w; @@ -1935,24 +1957,30 @@ void DavinciModel::CreateInputDimsInfo(const OpDescPtr &op_desc, Format format, } } -Status DavinciModel::GetInputDescInfo(vector &input_desc, std::vector &formats) { - for (size_t index = 0; index < data_op_list_.size(); ++index) { - InputOutputDescInfo input; - GE_CHECK_NOTNULL(data_op_list_[index]); - GE_CHECK_NOTNULL(data_op_list_[index]->GetInputDescPtr(0)); +Status DavinciModel::InitInputDescInfo(const map &data_by_index) { + for (const auto &item : data_by_index) { + const auto op_desc = item.second; + GE_CHECK_NOTNULL(op_desc->GetInputDescPtr(0)); - Format format = data_op_list_[index]->GetInputDescPtr(0)->GetFormat(); - CreateInputDimsInfo(data_op_list_[index], format, input); + InputOutputDescInfo input; + Format format = op_desc->GetInputDescPtr(0)->GetFormat(); + CreateInputDimsInfo(op_desc, format, input); - input.data_type = data_op_list_[index]->GetInputDescPtr(0)->GetDataType(); - input.name = data_op_list_[index]->GetName(); + input.data_type = op_desc->GetInputDescPtr(0)->GetDataType(); + input.name = op_desc->GetName(); int64_t input_size = 0; - GE_CHK_STATUS_RET(TensorUtils::GetSize(*data_op_list_[index]->GetInputDescPtr(0), input_size), - "get input size failed."); + GE_CHK_STATUS_RET(TensorUtils::GetSize(*op_desc->GetInputDescPtr(0), input_size), "get input size failed."); input.size = input_size; - formats.push_back(format); - input_desc.push_back(input); + input_formats_.push_back(format); + input_descs_.push_back(input); } + return SUCCESS; +} + +Status DavinciModel::GetInputDescInfo(vector &input_descs, vector &input_formats) { + input_descs.insert(input_descs.end(), input_descs_.begin(), input_descs_.end()); + input_formats.insert(input_formats.end(), input_formats_.begin(), input_formats_.end()); + // cause GetInputDescInfo called not only once, set is_new_model_desc_ to false after calc the model input dims is_new_model_desc_ = false; return SUCCESS; @@ -1962,7 +1990,7 @@ void DavinciModel::CreateOutput(uint32_t index, const OpDescPtr &op_desc, InputO uint32_t &format_result) { /// netoutput input tensor desc GE_IF_BOOL_EXEC(op_desc->GetInputDescPtr(index) == nullptr, GELOGE(FAILED, "OpDesc GetInputDescPtr is nullptr"); - return ); + return); Format format = op_desc->GetInputDescPtr(index)->GetFormat(); GeShape shape = op_desc->GetInputDescPtr(index)->GetShape(); DataType data_type = op_desc->GetInputDescPtr(index)->GetDataType(); @@ -2011,8 +2039,7 @@ void DavinciModel::CreateOutput(uint32_t index, const OpDescPtr &op_desc, InputO output.data_type = op_desc->GetInputDescPtr(index)->GetDataType(); } -Status DavinciModel::InitOutputDescInfo(const vector &output_op_list, - vector &output_descs, vector &output_formats) { +Status DavinciModel::InitOutputDescInfo(const vector &output_op_list) { GELOGD("Output node size: %zu", output_op_list.size()); for (const auto &op_desc : output_op_list) { uint32_t out_size = static_cast(op_desc->GetInputsSize()); @@ -2037,28 +2064,20 @@ Status DavinciModel::InitOutputDescInfo(const vector &output_op_list, std::to_string(src_index[index]); } output.name = output_name; - output_descs.push_back(output); - output_formats.push_back(format_result); + output_descs_.push_back(output); + output_formats_.push_back(format_result); } } return SUCCESS; } -Status DavinciModel::GetOutputDescInfo(vector &output_descs, vector &output_formats) { +Status DavinciModel::GetOutputDescInfo(vector &output_descs, + vector &output_formats) const { output_descs.insert(output_descs.end(), output_descs_.begin(), output_descs_.end()); output_formats.insert(output_formats.end(), output_formats_.begin(), output_formats_.end()); return SUCCESS; } -ge::Format DavinciModel::GetFormat() { - if ((data_op_list_.empty()) || data_op_list_[0] == nullptr || data_op_list_[0]->GetInputDescPtr(0) == nullptr) { - GELOGW("OP List Pointer is null or input_desc size is not 1!"); - return FORMAT_NCHW; - } - - return data_op_list_[0]->GetInputDescPtr(0)->GetFormat(); -} - Status DavinciModel::CopyInputData(const InputData &input_data, bool device_data) { rtMemcpyKind_t kind = device_data ? RT_MEMCPY_DEVICE_TO_DEVICE : RT_MEMCPY_HOST_TO_DEVICE; const std::vector &blobs = input_data.blobs; @@ -2567,7 +2586,7 @@ Status DavinciModel::ReturnResult(uint32_t data_id, const bool rslt_flg, const b GELOGD("Reinit cur dynamic dims when getnext sink dynamic."); cur_dynamic_dims_.clear(); cur_dynamic_dims_.resize(shape_of_cur_dynamic_dims_); - auto ret = rtMemcpy(cur_dynamic_dims_.data(), shape_of_cur_dynamic_dims_ * sizeof(int64_t), + auto ret = rtMemcpy(cur_dynamic_dims_.data(), shape_of_cur_dynamic_dims_ * sizeof(int32_t), netoutput_last_input_addr_, netoutput_last_input_size_, RT_MEMCPY_DEVICE_TO_HOST); GE_CHK_RT_RET(ret); } @@ -2668,11 +2687,11 @@ void *DavinciModel::Run(DavinciModel *model) { GE_IF_BOOL_EXEC(current_data.blobs.empty(), break); auto shape_data_buffer_data = current_data.blobs.back().data; auto shape_data_buffer_length = current_data.blobs.back().length; - model->cur_dynamic_dims_.assign(reinterpret_cast(shape_data_buffer_data), - reinterpret_cast(shape_data_buffer_data) + - shape_data_buffer_length / sizeof(int64_t)); + model->cur_dynamic_dims_.assign(reinterpret_cast(shape_data_buffer_data), + reinterpret_cast(shape_data_buffer_data) + + shape_data_buffer_length / sizeof(int32_t)); GELOGD("Data: cur dynamic dims is %s", formats::JoinToString(model->cur_dynamic_dims_).c_str()); - delete[] reinterpret_cast(current_data.blobs.back().data); + delete[] reinterpret_cast(current_data.blobs.back().data); current_data.blobs.pop_back(); } GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), model->SetProfileTime(MODEL_PRE_PROC_END)); @@ -3082,6 +3101,8 @@ Status DavinciModel::DistributeTask() { task_desc_info.stream_id = task->GetStreamId(); task_desc_info.shape_type = "static"; task_desc_info.cur_iter_num = 0; + profiler_report_op_info_[task_desc_info.op_name] = + std::pair(task_desc_info.task_id, task_desc_info.stream_id); task_desc_info_.emplace_back(task_desc_info); if (flag) { if (task->GetSktTaskID() != 0xFFFFFFFF) { @@ -3089,6 +3110,8 @@ Status DavinciModel::DistributeTask() { string op_name = "super_kernel_" + to_string(task_index); task_desc_info.op_name = op_name; task_desc_info.task_id = task->GetSktTaskID(); + profiler_report_op_info_[task_desc_info.op_name] = + std::pair(task_desc_info.task_id, task_desc_info.stream_id); task_desc_info_.emplace_back(task_desc_info); } } @@ -3960,7 +3983,15 @@ Status DavinciModel::GetComputeGraphInfo(vector &graph_des compute_graph_info.output_format = op_desc.output_format; compute_graph_info.output_shape = op_desc.output_shape; compute_graph_info.output_data_type = op_desc.output_data_type; - + uint32_t task_id = 0; + uint32_t stream_id = 0; + auto iter = profiler_report_op_info_.find(op_desc.op_name); + if (iter != profiler_report_op_info_.end()) { + task_id = iter->second.first; + stream_id = iter->second.second; + } + compute_graph_info.task_id = task_id; + compute_graph_info.stream_id = stream_id; graph_desc_info.emplace_back(compute_graph_info); } return SUCCESS; @@ -3973,25 +4004,45 @@ void DavinciModel::SetTotalFixedAddrsSize(string tensor_name, int64_t fix_addr_s } } -Status DavinciModel::GetOrigInputInfo(uint32_t index, OriginInputInfo &orig_input_info) { - GE_CHK_BOOL_RET_STATUS(index < data_op_list_.size(), PARAM_INVALID, "Index %u is invalid.", index); - OpDescPtr data_op = data_op_list_[index]; - if (!data_op->HasAttr(ATTR_NAME_AIPP_INPUTS) || !data_op->HasAttr(ATTR_NAME_AIPP_OUTPUTS)) { - GELOGE(ACL_ERROR_GE_AIPP_NOT_EXIST, "GetOrigInputInfo: there is not AIPP related with index %u.", index); - return ACL_ERROR_GE_AIPP_NOT_EXIST; +Status DavinciModel::InitOrigInputInfo(uint32_t index, const OpDescPtr &op_desc) { + if (!op_desc->HasAttr(ATTR_NAME_AIPP_INPUTS) || !op_desc->HasAttr(ATTR_NAME_AIPP_OUTPUTS)) { + GELOGI("there is not AIPP related with index %u, node: %s.", index, op_desc->GetName().c_str()); + return SUCCESS; } - vector inputs; - if (AttrUtils::GetListStr(data_op, ATTR_NAME_AIPP_INPUTS, inputs) && !inputs.empty()) { + vector inputs; + if (AttrUtils::GetListStr(op_desc, ATTR_NAME_AIPP_INPUTS, inputs) && !inputs.empty()) { std::string input = inputs[kAippOriginInputIndex]; - GELOGI("GetOrigInputInfo: origin input str: %s", input.c_str()); + GELOGI("origin input str: %s", input.c_str()); std::vector infos = ge::StringUtils::Split(input, ':'); if (infos.size() != kAippInfoNum) { - GELOGW("origin input str is invalid."); + GELOGE(ACL_ERROR_GE_AIPP_MODE_INVALID, "origin input str is invalid[%zu, %u].", infos.size(), kAippInfoNum); + return ACL_ERROR_GE_AIPP_MODE_INVALID; } - orig_input_info.format = TypeUtils::SerialStringToFormat(infos[kAippInfoFormat]); - orig_input_info.data_type = TypeUtils::SerialStringToDataType(infos[kAippInfoDataType]); - orig_input_info.dim_num = std::strtol(infos[kAippInfoDimNum].c_str(), nullptr, kDecimal); + + OriginInputInfo input_info; + input_info.format = TypeUtils::SerialStringToFormat(infos[kAippInfoFormat]); + input_info.data_type = TypeUtils::SerialStringToDataType(infos[kAippInfoDataType]); + input_info.dim_num = std::strtol(infos[kAippInfoDimNum].c_str(), nullptr, kDecimal); + orig_input_info_[index] = input_info; + } else { + OriginInputInfo input_info = { FORMAT_RESERVED, DT_UNDEFINED, 0 }; + orig_input_info_[index] = input_info; + } + + return SUCCESS; +} + +Status DavinciModel::GetOrigInputInfo(uint32_t index, OriginInputInfo &orig_input_info) const { + const auto it = orig_input_info_.find(index); + if (it == orig_input_info_.end()) { + GELOGE(ACL_ERROR_GE_AIPP_NOT_EXIST, "there is not AIPP related with index %u.", index); + return ACL_ERROR_GE_AIPP_NOT_EXIST; + } + + const OriginInputInfo &input_info = it->second; + if (input_info.format != FORMAT_RESERVED || input_info.data_type != DT_UNDEFINED) { + orig_input_info = input_info; } return SUCCESS; @@ -4001,7 +4052,8 @@ void DavinciModel::ParseAIPPInfo(std::string in_out_info, InputOutputDims &dims_ GELOGI("ParseAIPPInfo: origin str: %s", in_out_info.c_str()); std::vector infos = ge::StringUtils::Split(in_out_info, ':'); if (infos.size() != kAippInfoNum) { - GELOGW("origin input str is invalid."); + GELOGE(ACL_ERROR_GE_AIPP_MODE_INVALID, "origin input str is invalid[%zu, %u].", infos.size(), kAippInfoNum); + return; } dims_info.name = infos[kAippInfoTensorName]; dims_info.size = std::strtol(infos[kAippInfoTensorSize].c_str(), nullptr, kDecimal); @@ -4016,47 +4068,58 @@ void DavinciModel::ParseAIPPInfo(std::string in_out_info, InputOutputDims &dims_ } } -Status DavinciModel::GetAllAippInputOutputDims(uint32_t index, std::vector &input_dims, - std::vector &output_dims) { - GE_CHK_BOOL_RET_STATUS(index < data_op_list_.size(), PARAM_INVALID, "Index %u is invalid.", index); - OpDescPtr data_op = data_op_list_[index]; - if (!data_op->HasAttr(ATTR_NAME_AIPP_INPUTS) || !data_op->HasAttr(ATTR_NAME_AIPP_OUTPUTS)) { - GELOGE(ACL_ERROR_GE_AIPP_NOT_EXIST, "GetAllAippInputOutputDims: there is not AIPP related with index %u.", index); - return ACL_ERROR_GE_AIPP_NOT_EXIST; +Status DavinciModel::InitAippInputOutputDims(uint32_t index, const OpDescPtr &op_desc) { + if (!op_desc->HasAttr(ATTR_NAME_AIPP_INPUTS) || !op_desc->HasAttr(ATTR_NAME_AIPP_OUTPUTS)) { + GELOGI("there is not AIPP related with index %u.", index); + return SUCCESS; } - vector inputs; - if (AttrUtils::GetListStr(data_op, ATTR_NAME_AIPP_INPUTS, inputs) && !inputs.empty()) { - GELOGI("GetAllAippInputOutputDims: Data: %s has %zu related aippInfo.", data_op->GetName().c_str(), inputs.size()); + vector inputs; + vector input_dims; + if (AttrUtils::GetListStr(op_desc, ATTR_NAME_AIPP_INPUTS, inputs) && !inputs.empty()) { + GELOGI("Data: %s has %zu related aippInfo.", op_desc->GetName().c_str(), inputs.size()); for (auto it : inputs) { InputOutputDims input_info; ParseAIPPInfo(it, input_info); input_dims.emplace_back(input_info); - GELOGD("GetAllAippInputOutputDims Aipp origin input dims info: %s", it.c_str()); + GELOGD("Aipp origin input dims info: %s", it.c_str()); - ConstGeTensorDescPtr data_input_desc = data_op->GetInputDescPtr(kDataIndex); + ConstGeTensorDescPtr data_input_desc = op_desc->GetInputDescPtr(kDataIndex); int64_t data_input_size; - (void)TensorUtils::GetSize(*(data_op->GetInputDescPtr(kDataIndex)), data_input_size); - GELOGD( - "GetAllAippInputOutputDims related Data[%d]: tensor_name is %s, dim_num is %zu, tensor_size: %zu, format: " - "%s, data_type: %s, shape: %s .", - index, data_op->GetName().c_str(), data_input_desc->GetShape().GetDimNum(), data_input_size, - TypeUtils::FormatToSerialString(data_input_desc->GetFormat()).c_str(), - TypeUtils::DataTypeToSerialString(data_input_desc->GetDataType()).c_str(), - formats::JoinToString(data_input_desc->GetShape().GetDims()).c_str()); + (void)TensorUtils::GetSize(*(op_desc->GetInputDescPtr(kDataIndex)), data_input_size); + GELOGD("related Data[%d]: tensor_name: %s, dim_num: %zu, tensor_size: %zu, format: %s, data_type: %s, shape: %s", + index, op_desc->GetName().c_str(), data_input_desc->GetShape().GetDimNum(), data_input_size, + TypeUtils::FormatToSerialString(data_input_desc->GetFormat()).c_str(), + TypeUtils::DataTypeToSerialString(data_input_desc->GetDataType()).c_str(), + formats::JoinToString(data_input_desc->GetShape().GetDims()).c_str()); } } - vector outputs; - if (AttrUtils::GetListStr(data_op, ATTR_NAME_AIPP_OUTPUTS, outputs) && !outputs.empty()) { + vector outputs; + vector output_dims; + if (AttrUtils::GetListStr(op_desc, ATTR_NAME_AIPP_OUTPUTS, outputs) && !outputs.empty()) { for (auto it : outputs) { InputOutputDims output_info; ParseAIPPInfo(it, output_info); output_dims.emplace_back(output_info); - GELOGD("GetAllAippInputOutputDims Aipp output dims info: %s", it.c_str()); + GELOGD("Aipp output dims info: %s", it.c_str()); } } + aipp_dims_info_[index] = { input_dims, input_dims }; + return SUCCESS; +} + +Status DavinciModel::GetAllAippInputOutputDims(uint32_t index, vector &input_dims, + vector &output_dims) const { + const auto it = aipp_dims_info_.find(index); + if (it == aipp_dims_info_.end()) { + GELOGE(ACL_ERROR_GE_AIPP_NOT_EXIST, "there is not AIPP related with index %u.", index); + return ACL_ERROR_GE_AIPP_NOT_EXIST; + } + + input_dims = it->second.first; + output_dims = it->second.second; return SUCCESS; } diff --git a/ge/graph/load/new_model_manager/davinci_model.h b/ge/graph/load/new_model_manager/davinci_model.h index 6b930b05..4d5d2252 100755 --- a/ge/graph/load/new_model_manager/davinci_model.h +++ b/ge/graph/load/new_model_manager/davinci_model.h @@ -286,13 +286,6 @@ class DavinciModel { // Modified from KernelTaskInfo. SuperKernelTaskInfo &GetSuperKernelTaskInfo() { return skt_info_; } - /// - /// @ingroup ge - /// @brief get model input and output format - /// @return ccTensorFormat_t current model input and output format - /// - Format GetFormat(); - rtModel_t GetRtModelHandle() const { return rt_model_handle_; } rtStream_t GetRtModelStream() const { return rt_model_stream_; } @@ -326,7 +319,7 @@ class DavinciModel { Status GetInputOutputDescInfo(vector &input_desc, vector &output_desc); Status GetInputOutputDescInfo(vector &input_desc, vector &output_desc, - vector &inputFormats, vector &output_formats); + vector &input_formats, vector &output_formats); /// /// @ingroup ge @@ -347,9 +340,9 @@ class DavinciModel { void GetUserDesignateShapeOrder(vector &user_input_shape_order) const; - void GetCurShape(vector &batch_info, int32_t &dynamic_type); + void GetCurShape(vector &batch_info, int32_t &dynamic_type) const; - void GetModelAttr(vector &dynamic_output_shape_info); + void GetModelAttr(vector &dynamic_output_shape_info) const; /// /// @ingroup ge @@ -358,9 +351,9 @@ class DavinciModel { /// @param [out] aipp_info /// @return execute result /// - Status GetAIPPInfo(uint32_t index, AippConfigInfo &aipp_info); + Status GetAippInfo(uint32_t index, AippConfigInfo &aipp_info) const; - Status GetAippType(uint32_t index, InputAippType &type, size_t &aipp_index); + Status GetAippType(uint32_t index, InputAippType &type, size_t &aipp_index) const; /// /// @ingroup ge @@ -378,17 +371,6 @@ class DavinciModel { /// void GetUniqueId(const OpDescPtr &op_desc, string &unique_identification); - /// - /// @ingroup ge - /// @brief get model input and output desc for zero copy - /// @param [out] input_shape model input size - /// @param [out] output_shape model output size - /// @return execute result - /// - Status GetInputOutputDescInfoForZeroCopy(vector &input_desc, - vector &output_desc, - vector &inputFormats, vector &output_formats); - Status ReturnResult(uint32_t data_id, const bool rslt_flg, const bool seq_end_flg, OutputData *output_data); Status ReturnNoOutput(uint32_t data_id); @@ -538,9 +520,9 @@ class DavinciModel { Status UpdateKnownZeroCopyAddr(vector &total_io_addrs, bool update_args = true); void SetKnownNodeAddrNotChanged(bool base_addr_not_changed) { base_addr_not_changed_ = base_addr_not_changed; } - Status GetOrigInputInfo(uint32_t index, OriginInputInfo &orig_input_info); + Status GetOrigInputInfo(uint32_t index, OriginInputInfo &orig_input_info) const; Status GetAllAippInputOutputDims(uint32_t index, vector &input_dims, - vector &output_dims); + vector &output_dims) const; void SetModelDescVersion(bool is_new_model_desc) { is_new_model_desc_ = is_new_model_desc; } // om file name void SetOmName(string om_name) { om_name_ = om_name; } @@ -626,7 +608,7 @@ class DavinciModel { void SetInputDimsInfo(const vector &model_input_dims, Format &format, InputOutputDescInfo &input); Status GetInputDescInfo(vector &input_desc, vector &input_formats); - Status GetOutputDescInfo(vector &output_desc, vector &output_formats); + Status GetOutputDescInfo(vector &output_desc, vector &output_formats) const; Status InitTaskInfo(domi::ModelTaskDef &modelTaskInfo); @@ -688,7 +670,7 @@ class DavinciModel { /// @param [in] output_op_list: list of NetOutput op. /// @return Status /// - Status OptInputOutputInfo(const map &data_by_index, const vector &output_op_list); + Status GenInputOutputInfo(const map &data_by_index, const vector &output_op_list); /// /// @ingroup ge @@ -856,19 +838,26 @@ class DavinciModel { Status InitOutputTensorInfo(const OpDescPtr &op_desc); Status GenOutputTensorInfo(OutputData *output_data, vector &outputs); - Status InitOutputDescInfo(const vector &output_op_list, - vector &output_desc, vector &formats); + Status InitInputDescInfo(const map &data_by_index); + Status InitOutputDescInfo(const vector &output_op_list); + + Status InitOrigInputInfo(uint32_t index, const OpDescPtr &op_desc); + Status InitAippInfo(uint32_t index, const OpDescPtr &op_desc); + Status InitAippType(uint32_t index, const OpDescPtr &op_desc, const map &data_list); + Status InitAippInputOutputDims(uint32_t index, const OpDescPtr &op_desc); void ParseAIPPInfo(string in_out_info, InputOutputDims &dims_info); void SetLabelForDynamic(const NodePtr &node); void ParseDynamicOutShape(const vector &str_info, vector> &vec_info); bool IsGetNextSinkDynamic(const OpDescPtr &op_desc); + + Status InitRealSizeAndShapeInfo(const ComputeGraphPtr &compute_graph, const NodePtr &node); void GetAllGearsInfo(const NodePtr &node); Status GetGetDynamicDimsNodeInfo(const NodePtr &node); - Status GetGearAndRealOutSizeInfo(size_t input_count, const NodePtr &node); - Status GetRealOutputSizeOfMerge(size_t input_index, const NodePtr &merge_node); - Status GetGearAndRealOutShapeInfo(size_t input_count, const OpDescPtr &op_desc); + Status GetGearAndRealOutSizeInfo(const ComputeGraphPtr &graph, const NodePtr &node); + Status GetRealOutputSizeOfCase(const ComputeGraphPtr &graph, size_t input_index, const NodePtr &case_node); + Status GetGearAndRealOutShapeInfo(const ComputeGraphPtr &graph, const NodePtr &node); bool is_weight_mem_has_inited_; bool is_feature_map_mem_has_inited_; @@ -888,9 +877,6 @@ class DavinciModel { map op_list_; // release after DavinciModel::Init - // data op_desc - vector data_op_list_; - vector variable_op_list_; map new_input_data_info_; @@ -976,6 +962,8 @@ class DavinciModel { // for profiling task and graph info vector task_desc_info_; + std::map> profiler_report_op_info_; + int64_t maxDumpOpNum_; // for data dump DataDumper data_dumper_; @@ -1021,15 +1009,15 @@ class DavinciModel { bool is_new_model_desc_{false}; bool is_online_infer_dynamic_ = false; bool is_getnext_sink_dynamic_ = false; - vector cur_dynamic_dims_; + vector cur_dynamic_dims_; void *netoutput_last_input_addr_ = nullptr; int64_t netoutput_last_input_size_ = 0; size_t shape_of_cur_dynamic_dims_ = 0; // key: input_index: input is merge node; value: each gear info and each output size - map, int64_t>> merge_nodes_gear_and_real_out_size_info_; + map, int64_t>> merge_nodes_gear_and_real_out_size_info_; // key: input_index: input is merge node; value: each gear info and each output shape - map, vector>> merge_nodes_gear_and_real_out_shape_info_; - vector> all_gears_info_; + map, vector>> merge_nodes_gear_and_real_out_shape_info_; + vector> all_gears_info_; multimap op_id_map_; vector profile_list_; @@ -1046,6 +1034,13 @@ class DavinciModel { vector output_buffer_size_; vector output_shape_info_; + map orig_input_info_; + map aipp_info_list_; + map> aipp_type_list_; + map, vector>> aipp_dims_info_; + + vector input_descs_; + vector input_formats_; vector output_descs_; vector output_formats_; }; diff --git a/ge/graph/load/new_model_manager/davinci_model_parser.cc b/ge/graph/load/new_model_manager/davinci_model_parser.cc index 34180d08..76526de2 100644 --- a/ge/graph/load/new_model_manager/davinci_model_parser.cc +++ b/ge/graph/load/new_model_manager/davinci_model_parser.cc @@ -16,82 +16,7 @@ #include "graph/load/new_model_manager/davinci_model_parser.h" -#include -#include -#include -#include "securec.h" - -#include "common/debug/log.h" -#include "graph/load/new_model_manager/davinci_model.h" - namespace ge { -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelInfoParser(const ModelData &model, ModelInfo &model_info) { - GE_CHK_RT_RET(rtSetDevice(0)); - try { - uint32_t model_len = 0; - uint8_t *model_data = nullptr; - - Status ret = DavinciModelParser::ParseModelContent(model, model_data, model_len); - - GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, GE_CHK_RT(rtDeviceReset(0)); return ret, "Parse model failed"); - - auto *file_header = reinterpret_cast(model.model_data); - - GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(file_header == nullptr, GE_CHK_RT(rtDeviceReset(0)); - return PARAM_INVALID, "file_header is null."); - - model_info.version = file_header->version; - model_info.is_encrypt = false; - GE_IF_BOOL_EXEC(ENCRYPTED == file_header->is_encrypt, model_info.is_encrypt = true); - - std::shared_ptr davinci_model = - std::shared_ptr(new (std::nothrow) DavinciModel(model.priority, nullptr)); - - GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(davinci_model == nullptr, GE_CHK_RT(rtDeviceReset(0)); - return PARAM_INVALID, "davinci_model is null."); - - GE_MAKE_GUARD(davinci_model, [&] { davinci_model = nullptr; }); - - ModelHelper model_helper; - ret = model_helper.LoadModel(model); - GE_CHK_BOOL_TRUE_EXEC_WITH_LOG((ret != SUCCESS), GE_CHK_RT(rtDeviceReset(0)); return FAILED, "load model failed"); - - ret = davinci_model->Assign(model_helper.GetGeModel()); - GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, GE_CHK_RT(rtDeviceReset(0)); - return ret, "Parse davinci model data failed"); - - ret = davinci_model->Init(); - - GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, GE_CHK_RT(rtDeviceReset(0)); - return ret, "Davinci model init failed"); - - vector input_list; - vector output_list; - - ret = davinci_model->GetInputOutputDescInfo(input_list, output_list); - - GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, GE_CHK_RT(rtDeviceReset(0)); - return ret, "Davinci model GetInputOutputDescInfo failed"); - - for (const auto &desc : input_list) { - model_info.input_desc.push_back(desc.shape_info); - } - for (const auto &desc : output_list) { - model_info.output_desc.push_back(desc.shape_info); - } - - model_info.name = davinci_model->Name(); - } catch (...) { - DOMI_LOGE("OM model parser failed, some exceptions occur !"); - GE_CHK_RT(rtDeviceReset(0)); - return FAILED; - } - - GE_CHK_RT(rtDeviceReset(0)); - - return SUCCESS; -} - DavinciModelParser::DavinciModelParser() {} DavinciModelParser::~DavinciModelParser() {} diff --git a/ge/graph/load/new_model_manager/model_manager.cc b/ge/graph/load/new_model_manager/model_manager.cc index 01075255..22fddf86 100755 --- a/ge/graph/load/new_model_manager/model_manager.cc +++ b/ge/graph/load/new_model_manager/model_manager.cc @@ -460,8 +460,8 @@ Status ModelManager::DataInput(const InputData &input_data, OutputData &output_d Status ModelManager::GetCurDynamicDims(const vector> &user_real_input_dims, const vector>> &user_input_dims, - vector &cur_dynamic_dims) { - GELOGD(" Start get cur dynamic dims."); + vector &cur_dynamic_dims) { + GELOGD("Start get cur dynamic dims."); if (user_real_input_dims.size() != user_input_dims.size()) { GELOGE(INTERNAL_ERROR, "The input count of user: %zu should be equal to the data count of graph: %zu", @@ -478,7 +478,7 @@ Status ModelManager::GetCurDynamicDims(const vector> &user_real_ } for (size_t j = 0; j < user_input_dims.at(i).second.size(); ++j) { if (user_input_dims.at(i).second.at(j) < 0) { - cur_dynamic_dims.emplace_back(user_real_input_dims[i][j]); + cur_dynamic_dims.emplace_back(static_cast(user_real_input_dims[i][j])); } } } @@ -523,7 +523,7 @@ Status ModelManager::DataInputTensor(uint32_t model_id, const std::vector cur_dynamic_dims; + std::vector cur_dynamic_dims; if (!GetLocalOmgContext().user_real_input_dims.empty()) { if (GetCurDynamicDims(GetLocalOmgContext().user_real_input_dims, GetLocalOmgContext().user_input_dims, cur_dynamic_dims) != SUCCESS) { @@ -531,9 +531,9 @@ Status ModelManager::DataInputTensor(uint32_t model_id, const std::vector(cur_dynamic_dims.size() * sizeof(int64_t)); + uint32_t length = static_cast(cur_dynamic_dims.size() * sizeof(int32_t)); GE_CHK_BOOL_EXEC(memcpy_s(data.data, length, cur_dynamic_dims.data(), length) == EOK, return INTERNAL_ERROR, "Failed to memcpy data."); data.length = length; @@ -995,16 +995,6 @@ Status ModelManager::GetModelAttr(uint32_t model_id, std::vector &dynami return SUCCESS; } -Status ModelManager::GetInputOutputDescInfoForZeroCopy(const uint32_t model_id, vector &input_desc, - vector &output_desc, - std::vector &inputFormats, - std::vector &outputFormats) { - std::shared_ptr davinci_model = GetModel(model_id); - GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, ACL_ERROR_GE_EXEC_MODEL_ID_INVALID, - "GetInputOutputDescInfo Failed, Invalid model id %u!", model_id); - return davinci_model->GetInputOutputDescInfoForZeroCopy(input_desc, output_desc, inputFormats, outputFormats); -} - /// /// @ingroup ge /// @brief Get AIPP info @@ -1013,11 +1003,11 @@ Status ModelManager::GetInputOutputDescInfoForZeroCopy(const uint32_t model_id, /// @param [out] aipp_info /// @return execute result /// -Status ModelManager::GetAIPPInfo(const uint32_t model_id, uint32_t index, AippConfigInfo &aipp_info) { +Status ModelManager::GetAippInfo(const uint32_t model_id, uint32_t index, AippConfigInfo &aipp_info) { std::shared_ptr davinci_model = GetModel(model_id); GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, ACL_ERROR_GE_EXEC_MODEL_ID_INVALID, "GetAIPPInfo failed, invalid model_id is %u.", model_id); - return davinci_model->GetAIPPInfo(index, aipp_info); + return davinci_model->GetAippInfo(index, aipp_info); } Status ModelManager::GetAippType(uint32_t model_id, uint32_t index, InputAippType &type, size_t &aipp_index) { @@ -1568,6 +1558,7 @@ Status ModelManager::LaunchKernelCheckAicpuOp(std::vector &aicpu_op GE_CHK_RT(rtFree(mem)); } }; + GE_MAKE_GUARD(release, callback); // malloc sysOpInfoList in SysOpCheckInfo status = rtMalloc(&d_req_op_list, op_nums * sizeof(SysOpInfo), RT_MEMORY_HBM); if (status != RT_ERROR_NONE) { @@ -1580,7 +1571,6 @@ Status ModelManager::LaunchKernelCheckAicpuOp(std::vector &aicpu_op status = rtMalloc(&d_res_op_list, op_nums * sizeof(SysOpInfo), RT_MEMORY_HBM); if (status != RT_ERROR_NONE) { GELOGE(RT_FAILED, "Call rt failed, status: 0x%x", status); - GE_MAKE_GUARD(release, callback); return RT_ERROR_TO_GE_STATUS(status); } allocated_mem.push_back(d_res_op_list); @@ -1589,7 +1579,6 @@ Status ModelManager::LaunchKernelCheckAicpuOp(std::vector &aicpu_op status = rtMalloc(&d_ret_code_list, op_nums * sizeof(ReturnCode), RT_MEMORY_HBM); if (status != RT_ERROR_NONE) { GELOGE(RT_FAILED, "Call rt failed, status: 0x%x", status); - GE_MAKE_GUARD(release, callback); return RT_ERROR_TO_GE_STATUS(status); } allocated_mem.push_back(d_ret_code_list); @@ -1601,7 +1590,6 @@ Status ModelManager::LaunchKernelCheckAicpuOp(std::vector &aicpu_op status = rtMalloc(&d_op_type_name, op_type.length(), RT_MEMORY_HBM); if (status != RT_ERROR_NONE) { GELOGE(RT_FAILED, "Call rt failed, status: 0x%x", status); - GE_MAKE_GUARD(release, callback); return RT_ERROR_TO_GE_STATUS(status); } allocated_mem.push_back(d_op_type_name); @@ -1619,7 +1607,6 @@ Status ModelManager::LaunchKernelCheckAicpuOp(std::vector &aicpu_op status = rtMalloc(&d_op_type_name, op_type.size(), RT_MEMORY_HBM); if (status != RT_ERROR_NONE) { GELOGE(RT_FAILED, "Call rt failed, status: 0x%x", status); - GE_MAKE_GUARD(release, callback); return RT_ERROR_TO_GE_STATUS(status); } allocated_mem.push_back(d_op_type_name); @@ -1648,7 +1635,6 @@ Status ModelManager::LaunchKernelCheckAicpuOp(std::vector &aicpu_op status = rtMalloc(&args, args_size, RT_MEMORY_HBM); if (status != RT_ERROR_NONE) { GELOGE(RT_FAILED, "Call rt failed, status: 0x%x", status); - GE_MAKE_GUARD(release, callback); return RT_ERROR_TO_GE_STATUS(status); } allocated_mem.push_back(args); @@ -1664,7 +1650,6 @@ Status ModelManager::LaunchKernelCheckAicpuOp(std::vector &aicpu_op status = rtStreamSynchronize(stream); if (status != RT_ERROR_NONE) { GELOGE(RT_FAILED, "Call rt stream sync failed, status: 0x%x", status); - GE_MAKE_GUARD(release, callback); GE_CHK_RT(rtStreamDestroy(stream)); return RT_ERROR_TO_GE_STATUS(status); } @@ -1679,7 +1664,6 @@ Status ModelManager::LaunchKernelCheckAicpuOp(std::vector &aicpu_op if (op_check_info_res.isWithoutJson) { GELOGI("No need to check aicpu in this scenoria."); - GE_MAKE_GUARD(release, callback); GE_CHK_RT(rtStreamDestroy(stream)); return SUCCESS; } @@ -1698,7 +1682,6 @@ Status ModelManager::LaunchKernelCheckAicpuOp(std::vector &aicpu_op sizeof(SysOpInfo) * res_op_nums, RT_MEMCPY_DEVICE_TO_HOST)); if (res_ret_code_list.size() != res_aicpu_op_info_list.size() || res_ret_code_list.size() != res_op_nums) { GELOGE(FAILED, "Number of retcode is not equal to number of op type."); - GE_MAKE_GUARD(release, callback); GE_CHK_RT(rtStreamDestroy(stream)); return FAILED; } @@ -1722,12 +1705,10 @@ Status ModelManager::LaunchKernelCheckAicpuOp(std::vector &aicpu_op } fail_reason += "not support."; GELOGE(FAILED, "Check aicpu op_type failed. details: %s", fail_reason.c_str()); - GE_MAKE_GUARD(release, callback); GE_CHK_RT(rtStreamDestroy(stream)); return FAILED; } - GE_MAKE_GUARD(release, callback); GE_CHK_RT(rtStreamDestroy(stream)); GELOGI("Cpu kernel launch check optype task success."); return SUCCESS; diff --git a/ge/graph/load/new_model_manager/model_manager.h b/ge/graph/load/new_model_manager/model_manager.h index 088ea5fd..418bae62 100755 --- a/ge/graph/load/new_model_manager/model_manager.h +++ b/ge/graph/load/new_model_manager/model_manager.h @@ -126,14 +126,14 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelManager { /// /// @ingroup domi_ome /// @brief Get cur_dynamic_dims for all input. - /// @param [in] vector> &user_real_input_dims: dims info of all user_inputs. + /// @param [in] vector> &user_real_input_dims: dims info of all user_inputs. /// @param [in] vector>> &user_input_dims: key:name. value:dynamic dims from option. - /// @param [out] vector &cur_dynamic_dims: real dims gather, where the index of -1. + /// @param [out] vector &cur_dynamic_dims: real dims gather, where the index of -1. /// @return 0: SUCCESS / others: INTERNAL_ERROR /// Status GetCurDynamicDims(const vector> &user_real_input_dims, const vector>> &user_input_dims, - vector &cur_dynamic_dims); + vector &cur_dynamic_dims); /// /// @ingroup domi_ome @@ -239,24 +239,10 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelManager { /// @param [out] aipp_info /// @return execute result /// - ge::Status GetAIPPInfo(const uint32_t model_id, uint32_t index, AippConfigInfo &aipp_info); + ge::Status GetAippInfo(const uint32_t model_id, uint32_t index, AippConfigInfo &aipp_info); ge::Status GetAippType(uint32_t model_id, uint32_t index, InputAippType &type, size_t &aipp_index); - /// - /// @ingroup domi_ome - /// @brief set model input and output size zero copy - /// @param [in] model_id model id - /// @param [out] input_shape input tensor - /// @param [out] output_shape output tensor - /// @return SUCCESS success - /// @return PARAM_INVALID parameter invalid - /// - ge::Status GetInputOutputDescInfoForZeroCopy(const uint32_t model_id, std::vector &input_desc, - std::vector &output_desc, - std::vector &inputFormats, - std::vector &outputFormats); - ge::Status GetCurShape(const uint32_t model_id, std::vector &batch_info, int32_t &dynamic_type); ge::Status GetModelAttr(uint32_t model_id, std::vector &dynamic_output_shape_info); diff --git a/ge/graph/load/new_model_manager/task_info/hccl_task_info.cc b/ge/graph/load/new_model_manager/task_info/hccl_task_info.cc index df43fd5b..8033c93e 100644 --- a/ge/graph/load/new_model_manager/task_info/hccl_task_info.cc +++ b/ge/graph/load/new_model_manager/task_info/hccl_task_info.cc @@ -145,7 +145,9 @@ Status HcclTaskInfo::SetFollowStream(const ge::ConstOpDescPtr &op_desc, DavinciM } else { GELOGI("need to reuse follow stream and create new follow stream."); size_t created_stream_num = follow_stream_usage.size(); - hccl_stream_list_ = follow_stream_usage; + for (const auto &stream : follow_stream_usage) { + hccl_stream_list_.emplace_back(stream); + } ret = CreateStream(hccl_stream_num - created_stream_num, davinci_model, main_stream_id); if (ret != SUCCESS) { GELOGE(RT_FAILED, "Create hccl stream failed."); diff --git a/ge/graph/manager/graph_manager.cc b/ge/graph/manager/graph_manager.cc index beb7cd42..c4f91036 100755 --- a/ge/graph/manager/graph_manager.cc +++ b/ge/graph/manager/graph_manager.cc @@ -101,6 +101,7 @@ #include "graph/common/local_context.h" #include "graph/common/omg_util.h" #include "common/formats/utils/formats_trans_utils.h" +#include "register/custom_pass_helper.h" namespace { const char *const kSummary = "Summary"; @@ -686,7 +687,7 @@ Status GraphManager::PreRunOptimizeOriginalGraph(const GraphNodePtr &graph_node, CompilerStages &stages = GetCompilerStages(graph_node->GetGraphId()); GM_RUN_AND_DUMP_PERF("OptimizeGraphPrepare", stages.optimizer.OptimizeOriginalGraphForQuantize, compute_graph); GM_RUN_AND_DUMP_PERF("HandleSummaryOp", stages.optimizer.HandleSummaryOp, compute_graph); - GM_RUN_AND_DUMP_PERF("Prepare", stages.preparer.PrepareDynShape, graph_node->GetGraph(), inputs, compute_graph, + GM_RUN_AND_DUMP_PERF("Prepare", stages.preparer.PrepareDynShape, graph_node, inputs, compute_graph, session_id); GM_RUN_AND_DUMP_PERF("OptimizeOriginalGraph", stages.optimizer.OptimizeOriginalGraph, compute_graph); @@ -731,6 +732,9 @@ Status GraphManager::PreRunAfterOptimizeSubGraph(const GraphNodePtr &graph_node, GeRootModelPtr &ge_root_model, uint64_t session_id) { GE_CHECK_NOTNULL(graph_node); GE_CHECK_NOTNULL(compute_graph); + + CompilerStages &stages = GetCompilerStages(graph_node->GetGraphId()); + GM_RUN_AND_DUMP_PERF("OptimizeWholeGraph", stages.optimizer.OptimizeWholeGraph, compute_graph); GM_RUN_AND_DUMP_PERF("Optimize2", OptimizeStage2, compute_graph); GM_RUN_AND_DUMP_PERF("OptimizeGraphBeforeBuildForRts", GetCompilerStages(graph_node->GetGraphId()).optimizer.OptimizeGraphBeforeBuildForRts, @@ -765,10 +769,24 @@ Status GraphManager::SetRtContext(rtContext_t rt_context, rtCtxMode_t mode, uint return SUCCESS; } +Status GraphManager::RunCustomPass(const GraphNodePtr &graph_node) { + ConstGraphPtr const_graph = graph_node->GetGraph(); + auto comp_graph = GraphUtils::GetComputeGraph(*const_graph); + GE_DUMP(comp_graph, "RunCustomPassBegin"); + + GE_TIMESTAMP_START(RunCustomPass); + GraphPtr graph = std::const_pointer_cast(const_graph); + GE_CHK_STATUS_RET(CustomPassHelper::Instance().Run(graph), "Graph[%s] run custom pass fail.", + comp_graph->GetName().c_str()); + GE_TIMESTAMP_END(RunCustomPass, "GraphBuilder::RunCustomPass"); + return SUCCESS; +} + Status GraphManager::PreRun(const GraphNodePtr &graph_node, const std::vector &inputs, GeRootModelPtr &ge_root_model, uint64_t session_id) { GE_CHECK_NOTNULL(graph_node); GE_CHECK_NOTNULL(graph_node->GetGraph()); + GE_CHK_STATUS_RET_NOLOG(RunCustomPass(graph_node)); auto compute_graph = GraphUtils::GetComputeGraph(*graph_node->GetGraph()); GE_CHECK_NOTNULL(compute_graph); compute_graph->SetSessionID(session_id); @@ -1172,7 +1190,7 @@ Status GraphManager::BuildGraphForUnregisteredOp(const GraphId &graph_id, const auto compute_graph = GraphUtils::GetComputeGraph(*graph_node->GetGraph()); GE_CHECK_NOTNULL(compute_graph); - GM_RUN_AND_DUMP_PERF("Prepare", GetCompilerStages(graph_id).preparer.PrepareDynShape, graph_node->GetGraph(), inputs, + GM_RUN_AND_DUMP_PERF("Prepare", GetCompilerStages(graph_id).preparer.PrepareDynShape, graph_node, inputs, compute_graph, session_id); for (auto &node : compute_graph->GetAllNodes()) { @@ -2762,8 +2780,10 @@ Status GraphManager::ParseInputsDims(const std::vector &input_t if (!GetLocalOmgContext().dynamic_node_type.empty()) { vector data_nodes; vector getnext_nosink_nodes; - data_nodes = compute_graph_->TryGetExtAttr(kExtAttrDataNodes, data_nodes); - getnext_nosink_nodes = compute_graph_->TryGetExtAttr(kExtAttrGetNextNoSink, getnext_nosink_nodes); + data_nodes = GetLocalOmgContext().data_nodes; + getnext_nosink_nodes = GetLocalOmgContext().getnext_nosink_nodes; + GELOGD("Data nodes count is %zu, getnext nosink nodes count is %zu.", data_nodes.size(), + getnext_nosink_nodes.size()); if (GetLocalOmgContext().dynamic_node_type == DATA) { if (getnext_nosink_nodes.empty()) { // just data or data+getnext_sink diff --git a/ge/graph/manager/graph_manager.h b/ge/graph/manager/graph_manager.h index d2887c4c..32de7eac 100644 --- a/ge/graph/manager/graph_manager.h +++ b/ge/graph/manager/graph_manager.h @@ -226,6 +226,7 @@ class GraphManager { void ParseInputsDimsForData(const std::vector &input_tensor); Status ParseInputsDimsForGetNexNosinkAndData(const vector &dynamic_nodes, const std::vector &input_tensor); + Status RunCustomPass(const GraphNodePtr &graph_node); Status PreRun(const GraphNodePtr &graph_node, const std::vector &inputs, GeRootModelPtr &ge_root_model, uint64_t session_id = INVALID_SESSION_ID); diff --git a/ge/graph/optimize/graph_optimize.cc b/ge/graph/optimize/graph_optimize.cc index c94408de..cd80a956 100644 --- a/ge/graph/optimize/graph_optimize.cc +++ b/ge/graph/optimize/graph_optimize.cc @@ -336,4 +336,37 @@ Status GraphOptimize::IdentifyReference(ComputeGraphPtr &compute_graph) { } return SUCCESS; } +Status GraphOptimize::OptimizeWholeGraph(ComputeGraphPtr &compute_graph) { + if (compute_graph == nullptr) { + GELOGE(GE_GRAPH_OPTIMIZE_COMPUTE_GRAPH_NULL, "[OptimizeWholeGraph]: compute_graph is nullptr."); + return GE_GRAPH_OPTIMIZE_COMPUTE_GRAPH_NULL; + } + + std::shared_ptr instance_ptr = ge::GELib::GetInstance(); + if (instance_ptr == nullptr || !instance_ptr->InitFlag()) { + GELOGE(GE_CLI_GE_NOT_INITIALIZED, "OptimizeWholeGraph failed."); + return GE_CLI_GE_NOT_INITIALIZED; + } + + auto graph_optimizer = instance_ptr->OpsKernelManagerObj().GetAllGraphOptimizerObjsByPriority(); + GELOGI("optimize by opskernel in OptimizeWholeGraph. num of graph_optimizer is %zu.", graph_optimizer.size()); + Status ret = SUCCESS; + string exclude_core_type = (core_type_ == kVectorCore) ? kAicoreEngine : kVectorEngine; + GELOGD("[OptimizeWholeGraph]: engine type will exclude: %s", exclude_core_type.c_str()); + if (!graph_optimizer.empty()) { + for (auto &iter : graph_optimizer) { + if (iter.first == exclude_core_type || iter.second == nullptr) { + continue; + } + GELOGI("Begin to optimize whole graph by engine %s", iter.first.c_str()); + ret = iter.second->OptimizeWholeGraph(*compute_graph); + GE_DUMP(compute_graph, "OptimizeWholeGraph" + iter.first); + if (ret != SUCCESS) { + GELOGE(ret, "[OptimizeWholeGraph]: graph optimize failed, ret:%u", ret); + return ret; + } + } + } + return ret; +} } // namespace ge diff --git a/ge/graph/optimize/graph_optimize.h b/ge/graph/optimize/graph_optimize.h index 78d580b7..3a1960f7 100755 --- a/ge/graph/optimize/graph_optimize.h +++ b/ge/graph/optimize/graph_optimize.h @@ -52,6 +52,9 @@ class GraphOptimize { // for fe prepare optimize in quantize scene Status OptimizeOriginalGraphForQuantize(ComputeGraphPtr &compute_graph); + // for engine to optimize merged whole graph before ge Optimize2 + Status OptimizeWholeGraph(ComputeGraphPtr &compute_graph); + // for rts optimize before build to add attr and insert memcpy op Status OptimizeGraphBeforeBuildForRts(ComputeGraphPtr &compute_graph); diff --git a/ge/graph/passes/common_subexpression_elimination_pass.cc b/ge/graph/passes/common_subexpression_elimination_pass.cc index a4662d5d..7d9724fc 100644 --- a/ge/graph/passes/common_subexpression_elimination_pass.cc +++ b/ge/graph/passes/common_subexpression_elimination_pass.cc @@ -26,6 +26,10 @@ namespace ge { namespace { +std::set un_compute_attrs = { + {ATTR_NAME_DATA_DUMP_ORIGIN_OP_NAMES}, +}; + std::string GetCseKey(const NodePtr &node) { std::stringstream ss; ss << node->GetType() << "-data-inputs-"; @@ -49,7 +53,7 @@ std::string GetCseKey(const NodePtr &node) { ss << name << "-"; } - ss << "attrs-" << AttrUtils::GetAllAttrsStr(node->GetOpDesc()); + ss << "attrs-" << AttrUtils::GetAttrsStrAfterRid(node->GetOpDesc(), un_compute_attrs); return ss.str(); } diff --git a/ge/graph/passes/multi_batch_clone_pass.cc b/ge/graph/passes/multi_batch_clone_pass.cc index f8451ace..b7efa070 100755 --- a/ge/graph/passes/multi_batch_clone_pass.cc +++ b/ge/graph/passes/multi_batch_clone_pass.cc @@ -25,31 +25,65 @@ #include "graph/utils/tensor_utils.h" #include "graph/utils/type_utils.h" #include "register/op_registry.h" +#include "graph/common/omg_util.h" namespace ge { namespace { constexpr uint8_t kDataInIndex = 0; constexpr uint8_t kDataOutIndex = 0; constexpr uint8_t kCaseArgIndex = 1; +const int kDivisionConst = 2; +const size_t kNumOfGetnextNode = 1; const std::string kMultiBatchCaseNode = "ascend_mbatch_shape_case"; const std::string kMultiBatchDataNode = "ascend_mbatch_shape_data"; +const std::string kMultiBatchGetDynamicDimsNode = "ascend_mbatch_get_dynamic_dims_node"; const std::string kMultiBatchConstNode = "ascend_mbatch_shape_const"; const std::string kMultiBatchMapIndexNode = "ascend_mbatch_shape_mapindex"; const std::string kMultiBatchNodePostfix = "_ascend_mbatch_batch_"; +const char *const kGetNextName = "IteratorV2"; } // namespace +inline bool IsGetNextType(const NodePtr &node) { + std::string original_type; + GE_IF_BOOL_EXEC(GetOriginalType(node, original_type) != SUCCESS, + GELOGW("Get original type failed."); return false); + return (original_type == kGetNextName); +} + Status MultiBatchClonePass::Run(ComputeGraphPtr graph) { + GE_IF_BOOL_EXEC(graph == nullptr, GELOGE(FAILED, "Original graph is nullptr"); return FAILED); if (graph->GetParentGraph() != nullptr) { GELOGD("Subgraph %s skip the MultiBatchClonePass", graph->GetName().c_str()); return SUCCESS; } - + if (!GetLocalOmgContext().need_multi_batch) { + GELOGI("No need to process_multi for no_train graph."); + return SUCCESS; + } + std::vector data_nodes; + std::vector getnext_nosink_nodes; + std::vector getnext_sink_nodes; + if (multibatch::CheckSequenceOfOptions(graph, data_nodes, getnext_nosink_nodes, getnext_sink_nodes) != SUCCESS) { + GELOGE(PARAM_INVALID, "[Train_Dynamic] CheckSequenceOfOptions failed."); + return PARAM_INVALID; + } + if (multibatch::UpdateNameOfInputShape(graph, data_nodes, getnext_nosink_nodes, getnext_sink_nodes) != SUCCESS) { + GELOGE(PARAM_INVALID, "[Train_Dynamic] UpdateNameForInputShapeOfOption failed."); + return PARAM_INVALID; + } + if (multibatch::DeleteIdentityInsertByAdapter(graph) != SUCCESS) { + GELOGE(PARAM_INVALID, "[Train_Dynamic] DeleteIdentityInsertByAdapter failed."); + return PARAM_INVALID; + } if (!multibatch::InitDynamicParams(batch_shapes_)) { GELOGD("There is no multi-batch options, no need clone multi-batch graph"); return SUCCESS; } - + if (multibatch::CheckNegativeCountOfOptions(batch_shapes_) != SUCCESS) { + GELOGE(PARAM_INVALID, "[Train_Dynamic] Input_shape and dynamic_dims should set correct params."); + return PARAM_INVALID; + } GELOGD("Begin to run Multi-batch clone on graph: %s", graph->GetName().c_str()); GE_CHK_STATUS_RET(multibatch::CheckDynamicParams(batch_shapes_), "Invalid multi-batch param"); if (CollectIoNodes(graph) != SUCCESS) { @@ -66,21 +100,14 @@ Status MultiBatchClonePass::Run(ComputeGraphPtr graph) { (void)AttrUtils::GetStr(graph, ATTR_NAME_SESSION_GRAPH_ID, session_graph_id_); ComputeGraphPtr branch = MakeShared(graph->GetName()); - if (branch == nullptr) { - GELOGE(OUT_OF_MEMORY, "Create multi-batch graph failed"); - return OUT_OF_MEMORY; - } + GE_IF_BOOL_EXEC(branch == nullptr, GELOGE(OUT_OF_MEMORY, "Create multi batch graph failed"); return OUT_OF_MEMORY); (void)AttrUtils::SetStr(branch, ATTR_NAME_SESSION_GRAPH_ID, session_graph_id_); graph->InValid(); // Will modify, need topological again. graph->Swap(*branch); - if (CreateRootGraph(graph) != SUCCESS) { - return FAILED; - } - - if (CreateSubgraphs(graph, branch) != SUCCESS) { - return FAILED; - } + GE_CHK_STATUS_RET(CreateRootGraph(graph), "Construct root graph failed."); + GE_CHK_STATUS_RET(CreateOriGraph(branch), "Construct original graph failed.") + GE_CHK_STATUS_RET(CreateSubgraphs(graph, branch), "Construct subgraph failed."); GE_CHK_STATUS_RET(PruneDirectOutput(graph), "Prune direct output failed"); GELOGD("MultiBatchClonePass Leave"); @@ -95,9 +122,13 @@ Status MultiBatchClonePass::Run(ComputeGraphPtr graph) { /// Status MultiBatchClonePass::CollectIoNodes(const ComputeGraphPtr &graph) { for (const auto &node : graph->GetDirectNode()) { + if (!GetLocalOmgContext().dynamic_node_type.empty() && IsGetNextType(node)) { + all_data_nodes_.emplace_back(node); + GE_CHK_STATUS_RET(InitParamsOfGetNext(node), "Init params of %s failed.", node->GetName().c_str()); + } if (node->GetType() == DATA) { all_data_nodes_.emplace_back(node); - } else if (node->GetType() == CONSTANT) { + } else if (node->GetType() == CONSTANT || node->GetType() == CONSTANTOP) { all_const_nodes_.emplace_back(node); } else if (node->GetType() == NETOUTPUT) { all_output_nodes_.emplace_back(node); @@ -114,10 +145,16 @@ Status MultiBatchClonePass::CollectIoNodes(const ComputeGraphPtr &graph) { } int64_t data_index = 0; + size_t getnext_node_count = 0; for (size_t i = 0; i < all_data_nodes_.size(); ++i) { + if (IsGetNextType(all_data_nodes_[i])) { + // just one getnext node in graph + getnext_node_count++; + continue; + } const auto &op_desc = all_data_nodes_[i]->GetOpDesc(); if (!AttrUtils::GetInt(op_desc, ATTR_NAME_INDEX, data_index)) { - (void)AttrUtils::SetInt(op_desc, ATTR_NAME_INDEX, i); + (void)AttrUtils::SetInt(op_desc, ATTR_NAME_INDEX, i - getnext_node_count); } } @@ -133,7 +170,43 @@ Status MultiBatchClonePass::CollectIoNodes(const ComputeGraphPtr &graph) { "Remove edge failed"); } } + GELOGD("Data count is %zu, const count is %zu, getnext count is %zu, output count is %zu, direct out count is %zu.", + all_data_nodes_.size(), all_const_nodes_.size(), getnext_node_count, all_output_nodes_.size(), + direct_output_.size()); + + return SUCCESS; +} +Status MultiBatchClonePass::InitParamsOfGetNext(const NodePtr &node) { + data_count_from_getnext_ = 0; + getnext_sink_dynamic_dims_ = false; + GE_CHECK_NOTNULL(node->GetOpDesc()); + data_count_from_getnext_ = node->GetOpDesc()->GetOutputsSize(); + if (GetLocalOmgContext().dynamic_node_type == GETNEXT) { + data_count_from_getnext_ = data_count_from_getnext_ / kDivisionConst; + for (size_t i = 0; i < data_count_from_getnext_; ++i) { + GeTensorDesc output_desc = node->GetOpDesc()->GetOutputDesc(i); + GELOGD("The %zu data shape from getnext sink is %s.", i, + formats::JoinToString(output_desc.GetShape().GetDims()).c_str()); + const auto &dims = output_desc.GetShape().GetDims(); + if (std::all_of(dims.begin(), dims.end(), [](int64_t val) {return val >= 0; })) { + GELOGD("The %zu data from %s is static.", i, node->GetName().c_str()); + } else { + getnext_sink_dynamic_dims_ = true; + GELOGD("Dynamic dims in the pattern of getnext sink."); + } + } + } + if (node->GetOutControlAnchor() != nullptr) { + for (const auto &peer_in_control_anchor : node->GetOutControlAnchor()->GetPeerInControlAnchors()) { + NodePtr next_node = peer_in_control_anchor->GetOwnerNode(); + GE_CHECK_NOTNULL(next_node); + if (next_node->GetType() == CONSTANTOP) { + out_control_nodes_.insert(next_node); + GELOGD("Control edge: %s connect with %s.", node->GetName().c_str(), next_node->GetName().c_str()); + } + } + } return SUCCESS; } @@ -144,7 +217,11 @@ Status MultiBatchClonePass::CollectIoNodes(const ComputeGraphPtr &graph) { /// @return 0: SUCCESS / others: FAILED /// Status MultiBatchClonePass::CreateRootGraph(const ComputeGraphPtr &graph) { + GELOGD("Start create root graph of %s.", graph->GetName().c_str()); uint32_t input_num = all_data_nodes_.size() + all_const_nodes_.size(); + if (data_count_from_getnext_ != 0) { + input_num = input_num + data_count_from_getnext_ - kNumOfGetnextNode; + } uint32_t output_num = all_output_nodes_[0]->GetAllInDataAnchorsSize(); OpDescBuilder op_builder(kMultiBatchCaseNode, CASE); @@ -185,6 +262,10 @@ Status MultiBatchClonePass::CreateRootGraph(const ComputeGraphPtr &graph) { op_desc->GetName().c_str()); return FAILED; } + if (!AttrUtils::SetBool(op_desc, ATTR_INSERT_BY_MBATCH, true)) { + GELOGE(INTERNAL_ERROR, "Failed to add insert attr on case node %s", op_desc->GetName().c_str()); + return INTERNAL_ERROR; + } GE_CHK_STATUS_RET(multibatch::StampDynamicType(op_desc), "Set dynamic type failed"); GE_CHK_STATUS_RET(CreateIndexNode(graph), "Create index node failed"); @@ -202,7 +283,7 @@ Status MultiBatchClonePass::CreateRootGraph(const ComputeGraphPtr &graph) { /// @param [in] NodePtr node: index data node. /// @return 0: SUCCESS / others: FAILED /// -Status MultiBatchClonePass::CreateIndexDataNode(const ComputeGraphPtr &graph, NodePtr &node) { +Status MultiBatchClonePass::CreateIndexDataNode(const ComputeGraphPtr &graph, NodePtr &shape_node) { const OpDescPtr data_desc = MakeShared(kMultiBatchDataNode, DATA); if (data_desc == nullptr) { GELOGE(OUT_OF_MEMORY, "Create multi-batch data node failed"); @@ -220,11 +301,12 @@ Status MultiBatchClonePass::CreateIndexDataNode(const ComputeGraphPtr &graph, No } size_t data_index = all_data_nodes_.size(); + data_index = data_count_from_getnext_ != 0 ? data_index - kNumOfGetnextNode : data_index; (void)AttrUtils::SetInt(data_desc, ATTR_NAME_INDEX, data_index); (void)AttrUtils::SetBool(data_desc, ATTR_INSERT_BY_MBATCH, true); - node = graph->AddNode(data_desc); - if (node == nullptr) { + shape_node = graph->AddNode(data_desc); + if (shape_node == nullptr) { GELOGE(OUT_OF_MEMORY, "Create multi-batch data node failed"); return OUT_OF_MEMORY; } @@ -286,15 +368,19 @@ Status MultiBatchClonePass::CreateIndexConstNode(const ComputeGraphPtr &graph, N /// @return 0: SUCCESS / others: FAILED /// Status MultiBatchClonePass::CreateIndexNode(const ComputeGraphPtr &graph) { - // Data --> MapIndex --> Case - NodePtr data_node; - GE_CHK_STATUS_RET(CreateIndexDataNode(graph, data_node), "Create data node failed"); + // Data/GetDynamicDims --> MapIndex --> Case + if (!getnext_sink_dynamic_dims_) { + GE_CHK_STATUS_RET(CreateIndexDataNode(graph, shape_node_), "Create data node failed"); + } else { + GE_CHK_STATUS_RET(CreateGetDynamicDimsNode(graph, shape_node_), "Create get dynamic dims node failed"); + } NodePtr const_node; GE_CHK_STATUS_RET(CreateIndexConstNode(graph, const_node), "Create const node failed"); - + GELOGD("Shape node name is %s, type is %s, const node name is %s.", shape_node_->GetName().c_str(), + shape_node_->GetType().c_str(), const_node->GetName().c_str()); OpDescBuilder op_builder(kMultiBatchMapIndexNode, "MapIndex"); - op_builder.AddInput("x", data_node->GetOpDesc()->GetOutputDesc(0)) + op_builder.AddInput("x", shape_node_->GetOpDesc()->GetOutputDesc(0)) .AddInput("data_seq", const_node->GetOpDesc()->GetOutputDesc(0)) .AddOutput("y", GeTensorDesc(GeShape(), FORMAT_ND, DT_INT32)); @@ -309,8 +395,10 @@ Status MultiBatchClonePass::CreateIndexNode(const ComputeGraphPtr &graph) { return OUT_OF_MEMORY; } - if (GraphUtils::AddEdge(data_node->GetOutDataAnchor(0), index_node->GetInDataAnchor(0)) != GRAPH_SUCCESS) { - GELOGE(FAILED, "Failed to add edge between node:%s to MapIndex:%s", data_node->GetName().c_str(), + GE_CHK_STATUS_RET(AddAttrForGetDynamicDims(shape_node_), "Failed to add attr for %s.", + shape_node_->GetName().c_str()); + if (GraphUtils::AddEdge(shape_node_->GetOutDataAnchor(0), index_node->GetInDataAnchor(0)) != GRAPH_SUCCESS) { + GELOGE(FAILED, "Failed to add edge between node:%s to MapIndex:%s", shape_node_->GetName().c_str(), index_node->GetName().c_str()); return FAILED; } @@ -328,6 +416,120 @@ Status MultiBatchClonePass::CreateIndexNode(const ComputeGraphPtr &graph) { return SUCCESS; } +Status MultiBatchClonePass::CreateGetDynamicDimsNode(const ComputeGraphPtr &graph, NodePtr &shape_node) { + const OpDescPtr data_desc = MakeShared(kMultiBatchGetDynamicDimsNode, GETDYNAMICDIMS); + if (data_desc == nullptr) { + GELOGE(OUT_OF_MEMORY, "Create multi-batch get dynamic dims node failed"); + return OUT_OF_MEMORY; + } + + // input of GetDynamicDims is shape_of_each_data, output is gear_info + for (size_t i = 0; i < GetLocalOmgContext().user_input_dims.size(); ++i) { + size_t input_shape_dims = GetLocalOmgContext().user_input_dims.at(i).second.size(); + // add input desc without GeShape for const input, value of input_shape is 1 transferred by adapter + if (input_shape_dims == 1 && GetLocalOmgContext().user_input_dims.at(i).second.at(0) == 0) { + GeTensorDesc tensor_desc; + tensor_desc.SetFormat(FORMAT_ND); + tensor_desc.SetDataType(DT_INT32); + auto ret = data_desc->AddInputDesc(tensor_desc); + GE_IF_BOOL_EXEC(ret != GRAPH_SUCCESS, GELOGE(INTERNAL_ERROR, "Failed to add input desc for created data"); + return FAILED); + continue; + } + GeTensorDesc tensor_desc(GeShape({static_cast(input_shape_dims)}), FORMAT_ND, DT_INT32); + auto ret = data_desc->AddInputDesc(tensor_desc); + GE_IF_BOOL_EXEC(ret != GRAPH_SUCCESS, GELOGE(INTERNAL_ERROR, "Failed to add input desc for created data"); + return FAILED); + } + GeTensorDesc tensor_desc(GeShape({static_cast(batch_shapes_.at(0).size())}), FORMAT_ND, DT_INT32); + auto ret = data_desc->AddOutputDesc(tensor_desc); + GE_IF_BOOL_EXEC(ret != GRAPH_SUCCESS, GELOGE(INTERNAL_ERROR, "Failed to add output desc for created data"); + return FAILED); + + (void)AttrUtils::SetBool(data_desc, ATTR_INSERT_BY_MBATCH, true); + + shape_node = graph->AddNode(data_desc); + if (shape_node == nullptr) { + GELOGE(OUT_OF_MEMORY, "Create multi-batch dynamic dims node failed"); + return OUT_OF_MEMORY; + } + return SUCCESS; +} + +Status MultiBatchClonePass::AddAttrForGetDynamicDims(const NodePtr &shape_node) { + if (!getnext_sink_dynamic_dims_) { + GELOGD("No need to add attr when not insert get dynamic dims node."); + return SUCCESS; + } + GELOGD("Add attr for :%s, type is %s:", shape_node->GetName().c_str(), shape_node->GetType().c_str()); + if (!AttrUtils::SetInt(shape_node->GetOpDesc(), ATTR_GETNEXT_SINK_DATA_COUNT, data_count_from_getnext_)) { + GELOGE(INTERNAL_ERROR, "set ATTR_GETNEXT_SINK_DATA_COUNT failed"); + return INTERNAL_ERROR; + } + vector shape_info; + for (size_t i = 0; i < GetLocalOmgContext().user_input_dims.size(); ++i) { + if (GetLocalOmgContext().user_input_dims.at(i).second.size() == 1 && + GetLocalOmgContext().user_input_dims.at(i).second.at(0) == 0) { + shape_info.emplace_back(0); + continue; + } + shape_info.emplace_back(GetLocalOmgContext().user_input_dims.at(i).second.size()); + for (size_t j = 0; j < GetLocalOmgContext().user_input_dims.at(i).second.size(); ++j) { + shape_info.emplace_back(GetLocalOmgContext().user_input_dims.at(i).second.at(j)); + } + } + if (!AttrUtils::SetListInt(shape_node->GetOpDesc(), ATTR_GETNEXT_SINK_SHAPE_INFO, shape_info)) { + GELOGE(INTERNAL_ERROR, "set ATTR_GETNEXT_SINK_SHAPE_INFO failed"); + return INTERNAL_ERROR; + } + return SUCCESS; +} + +Status MultiBatchClonePass::LinkGetNextToGetDynamicDims(const NodePtr &getnext_node, const NodePtr &shape_node) { + GELOGD("Start relink shape anchor of %s to %s.", getnext_node->GetName().c_str(), shape_node->GetName().c_str()); + size_t input_index = 0; + size_t data_count = getnext_node->GetAllOutDataAnchors().size() / kDivisionConst; + for (size_t out_index = data_count; out_index < getnext_node->GetAllOutDataAnchors().size(); ++out_index, + ++input_index) { + GELOGD("Start add %s of %zu out_anchor to %s of %zu in_anchor.", getnext_node->GetName().c_str(), out_index, + shape_node->GetName().c_str(), input_index); + auto out_data_anchor = getnext_node->GetOutDataAnchor(out_index); + auto ret = GraphUtils::AddEdge(out_data_anchor, shape_node->GetInDataAnchor(input_index)); + GE_IF_BOOL_EXEC(ret != GRAPH_SUCCESS, GELOGE(INTERNAL_ERROR, "Failed to link getnext %s to getdynamicdims %s", + getnext_node->GetName().c_str(), shape_node->GetName().c_str()); + return INTERNAL_ERROR); + } + return SUCCESS; +} + +Status MultiBatchClonePass::LinkGetDynamicDimsToNetOutput(const NodePtr &output_node) { + if (!GetLocalOmgContext().dynamic_node_type.empty()) { + if (!AttrUtils::SetStr(output_node->GetOpDesc(), ATTR_ALL_GEARS_INFO, GetLocalOmgContext().dynamic_dims)) { + GELOGE(INTERNAL_ERROR, "Failed to set all gears info attr on netoutput %s.", output_node->GetName().c_str()); + return INTERNAL_ERROR; + } + } + if (getnext_sink_dynamic_dims_) { + GELOGD("Start link %s to %s.", shape_node_->GetName().c_str(), output_node->GetName().c_str()); + size_t input_index = output_node->GetAllInDataAnchors().size(); + if (NodeUtils::AppendInputAnchor(output_node, input_index + 1) != GRAPH_SUCCESS) { + GELOGE(INTERNAL_ERROR, "Append input anchor of %s of %zu failed.", output_node->GetName().c_str(), input_index); + return INTERNAL_ERROR; + } + auto ret = GraphUtils::AddEdge(shape_node_->GetOutDataAnchor(kDataOutIndex), + output_node->GetInDataAnchor(input_index)); + GE_IF_BOOL_EXEC(ret != GRAPH_SUCCESS, GELOGE(INTERNAL_ERROR, "Failed to link netoutput %s to getdynamicdims %s", + output_node->GetName().c_str(), shape_node_->GetName().c_str()); + return INTERNAL_ERROR); + if (!AttrUtils::SetBool(output_node->GetOpDesc(), ATTR_GETNEXT_SINK_DYNMAIC, true)) { + GELOGE(INTERNAL_ERROR, "Failed to set getnext sink dynamic attr on netoutput %s.", + output_node->GetName().c_str()); + return INTERNAL_ERROR; + } + } + return SUCCESS; +} + /// /// @ingroup ge /// @brief Create input node for root graph. @@ -337,8 +539,10 @@ Status MultiBatchClonePass::CreateIndexNode(const ComputeGraphPtr &graph) { Status MultiBatchClonePass::CreateInputNode(const ComputeGraphPtr &graph) { // Data --> Case std::vector all_data_nodes; - const size_t arg_index = kCaseArgIndex; - for (size_t i = 0; i < all_data_nodes_.size(); ++i) { + size_t case_input_index = kCaseArgIndex; + NodePtr getnext_node = nullptr; + size_t input_index_of_getnext = 0; + for (size_t i = 0; i < all_data_nodes_.size(); ++i, ++case_input_index) { const auto &node = all_data_nodes_[i]; const OpDescPtr op_desc = AttrUtils::CopyOpDesc(node->GetOpDesc()); if (op_desc == nullptr) { @@ -353,22 +557,60 @@ Status MultiBatchClonePass::CreateInputNode(const ComputeGraphPtr &graph) { op_desc->SetName(node->GetName()); const NodePtr &data = graph->AddNode(op_desc); GE_CHK_BOOL_EXEC(data != nullptr, return FAILED, "Add node[%s] to graph failed", op_desc->GetName().c_str()); - if (GraphUtils::AddEdge(data->GetOutDataAnchor(0), case_node_->GetInDataAnchor(arg_index + i)) != GRAPH_SUCCESS) { - GELOGE(FAILED, "Failed to add edge between Data:%s to Case:%s", - data->GetName().c_str(), case_node_->GetName().c_str()); - return FAILED; + if (IsGetNextType(node)) { + getnext_node = data; + input_index_of_getnext = case_input_index; + case_input_index = case_input_index + data_count_from_getnext_; + continue; + } else { + if (GraphUtils::AddEdge(data->GetOutDataAnchor(0), case_node_->GetInDataAnchor(case_input_index)) != + GRAPH_SUCCESS) { + GELOGE(FAILED, "Failed to add edge between Data:%s to Case:%s", data->GetName().c_str(), + case_node_->GetName().c_str()); + return FAILED; + } } - if (SetMaxShapeToData(data) != SUCCESS) { + if (SetMaxShape(data) != SUCCESS) { + GELOGE(FAILED, "Set max shape of %s failed.", data->GetName().c_str()); return FAILED; } all_data_nodes.emplace_back(data); } + if (getnext_node != nullptr) { + if (LinkEdgeForGetNext(getnext_node, input_index_of_getnext) != SUCCESS) { + GELOGE(FAILED, "Failed to link edge for %s.", getnext_node->GetName().c_str()); + return FAILED; + } + if (SetMaxShape(getnext_node) != SUCCESS) { + GELOGE(FAILED, "Set max shape of %s failed.", getnext_node->GetName().c_str()); + return FAILED; + } + all_data_nodes.emplace_back(getnext_node); + } all_data_nodes_.swap(all_data_nodes); return SUCCESS; } +Status MultiBatchClonePass::LinkEdgeForGetNext(const NodePtr &getnext_node, size_t &case_input_index) { + GELOGD("Start link edge for %s, which is the %zu input of %s.", getnext_node->GetName().c_str(), + case_input_index, case_node_->GetName().c_str()); + for (size_t out_index = 0; out_index < data_count_from_getnext_; ++out_index, ++case_input_index) { + if (GraphUtils::AddEdge(getnext_node->GetOutDataAnchor(out_index), + case_node_->GetInDataAnchor(case_input_index)) != GRAPH_SUCCESS) { + GELOGE(FAILED, "Failed to add data edge between %zu Data:%s to %zu Case:%s", out_index, + getnext_node->GetName().c_str(), case_input_index, case_node_->GetName().c_str()); + return FAILED; + } + } + if (getnext_sink_dynamic_dims_) { + GE_CHK_STATUS_RET(LinkGetNextToGetDynamicDims(getnext_node, shape_node_), "Failed to add link for %s.", + shape_node_->GetName().c_str()); + } + return SUCCESS; +} + /// /// @ingroup ge /// @brief Create Const node for root graph. @@ -378,7 +620,11 @@ Status MultiBatchClonePass::CreateInputNode(const ComputeGraphPtr &graph) { Status MultiBatchClonePass::CreateConstNode(const ComputeGraphPtr &graph) { // Const --> Case std::vector all_const_nodes; - const size_t arg_index = kCaseArgIndex + all_data_nodes_.size(); + size_t arg_index = kCaseArgIndex + all_data_nodes_.size(); + if (data_count_from_getnext_ != 0) { + arg_index = arg_index + data_count_from_getnext_ - kNumOfGetnextNode; + } + for (size_t i = 0; i < all_const_nodes_.size(); ++i) { const auto &node = all_const_nodes_[i]; const OpDescPtr op_desc = AttrUtils::CopyOpDesc(node->GetOpDesc()); @@ -395,15 +641,33 @@ Status MultiBatchClonePass::CreateConstNode(const ComputeGraphPtr &graph) { const NodePtr &data = graph->AddNode(op_desc); GE_CHK_BOOL_EXEC(data != nullptr, return FAILED, "Add node[%s] to graph failed", op_desc->GetName().c_str()); if (GraphUtils::AddEdge(data->GetOutDataAnchor(0), case_node_->GetInDataAnchor(arg_index + i)) != GRAPH_SUCCESS) { - GELOGE(FAILED, "Failed to add edge between Const:%s to Case:%s", - data->GetName().c_str(), case_node_->GetName().c_str()); + GELOGE(FAILED, "Failed to add edge between Const:%s to Case:%s", data->GetName().c_str(), + case_node_->GetName().c_str()); return FAILED; } all_const_nodes.emplace_back(data); } + ChangeConstToData(); + all_const_nodes_.swap(all_const_nodes); + return SUCCESS; +} +void MultiBatchClonePass::ChangeConstToData() { size_t data_index = all_data_nodes_.size(); + if (data_count_from_getnext_ != 0) { + data_index = data_index + data_count_from_getnext_ - kNumOfGetnextNode; + } for (size_t i = 0; i < all_const_nodes_.size(); ++i, ++data_index) { // Trans subgraph Const to Data. + auto &const_node = all_const_nodes_[i]; + bool need_change_type = true; + if (out_control_nodes_.find(const_node) != out_control_nodes_.end()) { + GELOGD("No need to change %s to data type.", const_node->GetName().c_str()); + need_change_type = false; + break; + } + if (!need_change_type) { + continue; + } const OpDescPtr &op_desc = all_const_nodes_[i]->GetOpDesc(); op_desc->SetType(DATA); (void)op_desc->DelAttr(ATTR_NAME_WEIGHTS); // Delete weight. @@ -413,9 +677,6 @@ Status MultiBatchClonePass::CreateConstNode(const ComputeGraphPtr &graph) { (void)AttrUtils::SetInt(op_desc, ATTR_NAME_INDEX, data_index); (void)NodeUtils::AppendInputAnchor(all_const_nodes_[i], 1); } - - all_const_nodes_.swap(all_const_nodes); - return SUCCESS; } /// @@ -461,7 +722,8 @@ Status MultiBatchClonePass::CreateOutputNode(const ComputeGraphPtr &graph) { } } } - + GE_CHK_STATUS_RET(LinkGetDynamicDimsToNetOutput(node), "Failed to add edge between %s to netoutput: %s.", + shape_node_->GetName().c_str(), output->GetName().c_str()); all_output_nodes_.clear(); all_output_nodes_.emplace_back(node); return SUCCESS; @@ -473,34 +735,69 @@ Status MultiBatchClonePass::CreateOutputNode(const ComputeGraphPtr &graph) { /// @param [in] const NodePtr &data: data in Root/Case graph. /// @return 0: SUCCESS / others: FAILED /// -Status MultiBatchClonePass::SetMaxShapeToData(const NodePtr &data) { - auto data_shape = NodeUtils::GetOutputDesc(*data, kDataOutIndex).GetShape(); - auto data_name = data->GetName(); +Status MultiBatchClonePass::SetMaxShape(const NodePtr &data) { + GELOGD("Start set max shape for %s.", data->GetName().c_str()); + if (!IsGetNextType(data)) { + if (SetMaxShapeToData(data, kDataOutIndex) != SUCCESS) { + GELOGE(PARAM_INVALID, "Failed to update max shape of %s.", data->GetName().c_str()); + return PARAM_INVALID; + } + } else { + for (size_t out_anchor_index = 0; out_anchor_index < data_count_from_getnext_; ++out_anchor_index) { + if (SetMaxShapeToData(data, out_anchor_index) != SUCCESS) { + GELOGE(PARAM_INVALID, "Failed to update max shape of %s.", data->GetName().c_str()); + return PARAM_INVALID; + } + } + } + return SUCCESS; +} + +Status MultiBatchClonePass::SetMaxShapeToData(const NodePtr &node, size_t out_anchor_index) { + GELOGD("Start update max shape of %s, %zu output.", node->GetName().c_str(), out_anchor_index); + auto data_shape = NodeUtils::GetOutputDesc(*node, out_anchor_index).GetShape(); + string data_name = node->GetName(); + if (IsGetNextType(node)) { + data_name.append("_").append(std::to_string(out_anchor_index)); + } + GELOGD("Update max shape of %s, shape dims is %s.", data_name.c_str(), + formats::JoinToString(data_shape.GetDims()).c_str()); const auto &dims = data_shape.GetDims(); - if (std::all_of(dims.begin(), dims.end(), [](int64_t val) { return val >= 0; })) { - return SUCCESS; + if (!IsGetNextType(node)) { + if (std::all_of(dims.begin(), dims.end(), [](int64_t val) { return val >= 0; })) { + GELOGD("No need to do anything for static data."); + return SUCCESS; + } + } else { + if (std::all_of(dims.begin(), dims.end(), [](int64_t val) { return val >= 0; })) { + if (getnext_sink_dynamic_dims_) { + // need to update shape of Shape_node when getnext node has dynamic data + GE_CHK_STATUS_RET(UpdateShapeOfShapeNode(node, out_anchor_index), "Failed to update shape of shape node"); + } + return SUCCESS; + } } - (void)AttrUtils::SetListInt(data->GetOpDesc(), ATTR_MBATCH_ORIGIN_INPUT_DIMS, data_shape.GetDims()); + (void)AttrUtils::SetListInt(node->GetOpDesc(), ATTR_MBATCH_ORIGIN_INPUT_DIMS, data_shape.GetDims()); - GeTensorDesc tensor(NodeUtils::GetOutputDesc(*data, kDataOutIndex)); + GeTensorDesc tensor(NodeUtils::GetOutputDesc(*node, kDataOutIndex)); std::vector input_dims_str; for (size_t i = 0; i < batch_shapes_.size(); ++i) { auto shape = data_shape; auto ret = multibatch::CalcShape(data_to_dynamic_info_.at(data_name).at(i), shape); if (ret != SUCCESS) { - GELOGE(ret, "Failed to calculate the shape for data node %s, the shape may not match", data->GetName().c_str()); + GELOGE(ret, "Failed to calculate the shape for data node %s, the shape may not match", node->GetName().c_str()); return ret; } tensor.SetShape(shape); int64_t tensor_size = 0; (void)TensorUtils::GetTensorSizeInBytes(tensor, tensor_size); string input_str = TypeUtils::FormatToSerialString(tensor.GetFormat()) + ":" + - TypeUtils::DataTypeToSerialString(tensor.GetDataType()) + ":" + data->GetName() + ":" + + TypeUtils::DataTypeToSerialString(tensor.GetDataType()) + ":" + node->GetName() + ":" + std::to_string(tensor_size) + ":" + std::to_string(tensor.GetShape().GetDimNum()) + ":" + formats::JoinToString(tensor.GetShape().GetDims()); input_dims_str.emplace_back(input_str); } - (void)AttrUtils::SetListStr(data->GetOpDesc(), "_all_origin_gears_inputs", input_dims_str); + (void)AttrUtils::SetListStr(node->GetOpDesc(), "_all_origin_gears_inputs", input_dims_str); size_t max_shape_index = 0; int64_t max_size = 0; @@ -519,18 +816,72 @@ Status MultiBatchClonePass::SetMaxShapeToData(const NodePtr &data) { max_shape_index = i; } } + return SetShapeToData(data_to_dynamic_info_.at(data_name).at(max_shape_index), node, data_shape, out_anchor_index); +} - return SetShapeToData(data_to_dynamic_info_.at(data_name).at(max_shape_index), data, data_shape); +/// +/// @ingroup ge +/// @brief Set max shape to Data/GetNext node in root graph. +/// @param [in] const std::vector &shapes: dims of shape. +/// @param [in] const NodePtr &data: data in Root/Case graph. +/// @param [in] GeShape &data_shape: dims of data node. +/// @param [in] size_t out_anchor_index: out anchor index of data node. +/// @return 0: SUCCESS / others: FAILED +/// +Status MultiBatchClonePass::SetShapeToData(const std::vector &shapes, const NodePtr &data, GeShape &data_shape, + size_t out_anchor_index) { + GELOGD("Start set shape to %zu out of %s.", out_anchor_index, data->GetName().c_str()); + if (multibatch::CalcShape(shapes, data_shape) != SUCCESS) { + GELOGE(INTERNAL_ERROR, "Failed to calculate the batched shape for data node %s, the shapes may not match", + data->GetName().c_str()); + return INTERNAL_ERROR; + } + + if (NodeUtils::UpdateOutputShape(*data, out_anchor_index, data_shape) != GRAPH_SUCCESS) { + GELOGE(INTERNAL_ERROR, "Failed to update output shape for data %s", data->GetName().c_str()); + return INTERNAL_ERROR; + } + if (!IsGetNextType(data)) { + if (NodeUtils::UpdateInputShape(*data, kDataInIndex, data_shape) != GRAPH_SUCCESS) { + GELOGE(INTERNAL_ERROR, "Failed to update input shape for data %s", data->GetName().c_str()); + return INTERNAL_ERROR; + } + } else { + if (getnext_sink_dynamic_dims_) { + // need to update shape of Shape_node when getnext_sink_dynamic + GE_CHK_STATUS_RET(UpdateShapeOfShapeNode(data, out_anchor_index), "Failed to update shape of shape node"); + } + } + + GELOGI("Update the data %s input/output shape to the max %s", data->GetName().c_str(), + formats::ShapeToString(data_shape).c_str()); + return SUCCESS; +} + +Status MultiBatchClonePass::UpdateShapeOfShapeNode(const NodePtr &node, size_t out_anchor_index) { + GELOGD("Start update output shape of shape node insert by adapter, which is the %zu out of %s.", out_anchor_index, + node->GetName().c_str()); + auto data_shape = NodeUtils::GetOutputDesc(*node, out_anchor_index).GetShape(); + size_t shape_index = out_anchor_index + (node->GetAllOutDataAnchors().size() / kDivisionConst); + GeTensorDesc output_desc = node->GetOpDesc()->GetOutputDesc(shape_index); + std::vector output_dims = {static_cast(data_shape.GetDims().size())}; + GeShape output_shape(output_dims); + output_desc.SetShape(output_shape); + if (node->GetOpDesc()->UpdateOutputDesc(shape_index, output_desc) != SUCCESS) { + GELOGE(FAILED, "Update output desc fail."); + return FAILED; + } + return SUCCESS; } /// /// @ingroup ge /// @brief Update Data node in Subgraph. /// @param [in] const NodePtr &data: data in Subgraph. -/// @param [in] size_t index: The batch index. +/// @param [in] size_t batch_index: The batch index. /// @return 0: SUCCESS / others: FAILED /// -Status MultiBatchClonePass::UpdateSubgraphData(const NodePtr &data, size_t index) { +Status MultiBatchClonePass::UpdateSubgraphData(const NodePtr &data, size_t batch_index) { int node_index = -1; if (!AttrUtils::GetInt(data->GetOpDesc(), ATTR_NAME_INDEX, node_index)) { GELOGE(FAILED, "Failed to get index from data[%s]", data->GetName().c_str()); @@ -545,6 +896,8 @@ Status MultiBatchClonePass::UpdateSubgraphData(const NodePtr &data, size_t index auto data_shape = NodeUtils::GetOutputDesc(*data, kDataOutIndex).GetShape(); const auto &dims = data_shape.GetDims(); + GELOGD("Start update shape of %s , batch index is %zu, dims is %s.", data->GetName().c_str(), batch_index, + formats::JoinToString(dims).c_str()); if (std::all_of(dims.begin(), dims.end(), [](int64_t val) { return val >= 0; })) { return SUCCESS; } @@ -559,35 +912,77 @@ Status MultiBatchClonePass::UpdateSubgraphData(const NodePtr &data, size_t index } auto parent_name = data_name.substr(0, pos); - return SetShapeToData(data_to_dynamic_info_.at(parent_name).at(index), data, data_shape); + return SetShapeToData(data_to_dynamic_info_.at(parent_name).at(batch_index), data, data_shape, kDataOutIndex); } -/// -/// @ingroup ge -/// @brief Set max shape to Data node in root graph. -/// @param [in] const std::vector &shapes: dims of shape. -/// @param [in] const NodePtr &data: data in Root/Case graph. -/// @param [in] GeShape &data_shape: dims of data node. -/// @return 0: SUCCESS / others: FAILED -/// -Status MultiBatchClonePass::SetShapeToData(const vector &shapes, const NodePtr &data, GeShape &data_shape) { - // must not be error, the calc result has been checked in function InsertSwitchNForData - if (multibatch::CalcShape(shapes, data_shape) != SUCCESS) { - return INTERNAL_ERROR; +Status MultiBatchClonePass::CreateOriGraph(const ComputeGraphPtr &graph) { + if (data_count_from_getnext_ == 0) { + GELOGD("No need to change original graph without getnext node."); + return SUCCESS; } - - if (NodeUtils::UpdateInputShape(*data, kDataInIndex, data_shape) != GRAPH_SUCCESS) { - GELOGE(INTERNAL_ERROR, "Failed to update input shape for data %s", data->GetName().c_str()); - return INTERNAL_ERROR; + GELOGD("Start change original graph: %s when exit getnext node.", graph->GetName().c_str()); + size_t data_index = all_data_nodes_.size() - kNumOfGetnextNode; + for (const auto &node : graph->GetDirectNode()) { + if (IsGetNextType(node)) { + for (size_t out_index = 0; out_index < data_count_from_getnext_; ++out_index, ++data_index) { + auto out_data_anchor = node->GetOutDataAnchor(out_index); + GE_IF_BOOL_EXEC(out_data_anchor == nullptr, continue); + NodePtr data_node = CreateDataNode(graph, out_data_anchor, data_index); + GE_IF_BOOL_EXEC(data_node == nullptr, GELOGE(INTERNAL_ERROR, "Create %zu data node failed.", + out_data_anchor->GetIdx()); return INTERNAL_ERROR); + for (auto &in_anchor : out_data_anchor->GetPeerInDataAnchors()) { + GE_IF_BOOL_EXEC(in_anchor == nullptr, continue); + NodePtr dst_node = in_anchor->GetOwnerNode(); + if (GraphUtils::RemoveEdge(out_data_anchor, in_anchor) != GRAPH_SUCCESS) { + GELOGE(INTERNAL_ERROR, "Failed to remove edge between %s to %s", node->GetName().c_str(), + dst_node->GetName().c_str()); + return INTERNAL_ERROR; + } + if (GraphUtils::AddEdge(data_node->GetOutDataAnchor(0), dst_node->GetInDataAnchor(in_anchor->GetIdx())) != + GRAPH_SUCCESS) { + GELOGE(INTERNAL_ERROR, "Failed to add edge between %s to %s", data_node->GetName().c_str(), + dst_node->GetName().c_str()); + return INTERNAL_ERROR; + } + } + } + if (graph->RemoveNode(node) != GRAPH_SUCCESS) { + GELOGE(GRAPH_FAILED, "Remove node %s failed!", node->GetName().c_str()); + return GRAPH_FAILED; + } + break; + } } + return SUCCESS; +} - if (NodeUtils::UpdateOutputShape(*data, kDataOutIndex, data_shape) != GRAPH_SUCCESS) { - GELOGE(INTERNAL_ERROR, "Failed to update output shape for data %s", data->GetName().c_str()); - return INTERNAL_ERROR; +NodePtr MultiBatchClonePass::CreateDataNode(const ComputeGraphPtr &graph, const OutDataAnchorPtr &out_data_anchor, + size_t data_index) { + size_t out_anchor_index = out_data_anchor->GetIdx(); + std::string node_name = out_data_anchor->GetOwnerNode()->GetName() + "_" + std::to_string(out_anchor_index); + OpDescPtr op_desc = MakeShared(node_name, DATA); + if (op_desc == nullptr) { + GELOGE(OUT_OF_MEMORY, "Create data node failed."); + return nullptr; } + (void)AttrUtils::SetInt(op_desc, ATTR_NAME_INDEX, data_index); - GELOGI("Update %s input/output shape to %s", data->GetName().c_str(), formats::ShapeToString(data_shape).c_str()); - return SUCCESS; + OpDescPtr getnext_op_desc = out_data_anchor->GetOwnerNode()->GetOpDesc(); + if (getnext_op_desc == nullptr) { + GELOGE(OUT_OF_MEMORY, "Op desc of %s is nullptr.", out_data_anchor->GetOwnerNode()->GetName().c_str()); + return nullptr; + } + if (op_desc->AddInputDesc(getnext_op_desc->GetOutputDesc(out_anchor_index)) != GRAPH_SUCCESS) { + GELOGE(INTERNAL_ERROR, "Add %s input desc failed.", op_desc->GetName().c_str()); + return nullptr; + } + if (op_desc->AddOutputDesc(getnext_op_desc->GetOutputDesc(out_anchor_index)) != GRAPH_SUCCESS) { + GELOGE(INTERNAL_ERROR, "Add %s output desc failed.", op_desc->GetName().c_str()); + return nullptr; + } + NodePtr data_node = graph->AddNode(op_desc); + GELOGD("Success create %s node.", data_node->GetName().c_str()); + return data_node; } /// @@ -598,17 +993,14 @@ Status MultiBatchClonePass::SetShapeToData(const vector &shapes, const /// @return 0: SUCCESS / others: FAILED /// Status MultiBatchClonePass::CreateSubgraphs(const ComputeGraphPtr &graph, const ComputeGraphPtr &branch) { + GELOGD("Start create subgraphs for %s.", graph->GetName().c_str()); const auto &op_desc = case_node_->GetOpDesc(); for (size_t i = 0; i < batch_shapes_.size(); ++i) { std::vector input_nodes; std::vector output_nodes; const std::string postfix = kMultiBatchNodePostfix + std::to_string(i); ComputeGraphPtr subgraph = (i == 0) ? branch : GraphUtils::CloneGraph(branch, postfix, input_nodes, output_nodes); - if (subgraph == nullptr) { - GELOGE(FAILED, "Create multi-batch case node failed"); - return FAILED; - } - + GE_IF_BOOL_EXEC(subgraph == nullptr, GELOGE(FAILED, "Create multi-batch case node failed"); return FAILED); subgraph->SetName("Batch_" + std::to_string(i)); subgraph->SetParentNode(case_node_); subgraph->SetParentGraph(graph); @@ -621,6 +1013,7 @@ Status MultiBatchClonePass::CreateSubgraphs(const ComputeGraphPtr &graph, const op_desc->AddSubgraphName(key_name); op_desc->SetSubgraphInstanceName(i, subgraph->GetName()); + GELOGD("The %s has %zu input, %zu output.", subgraph->GetName().c_str(), input_nodes.size(), output_nodes.size()); for (const auto &data : input_nodes) { GE_CHK_STATUS_RET(UpdateSubgraphData(data, i), "Update %s failed", subgraph->GetName().c_str()); } @@ -666,6 +1059,7 @@ Status MultiBatchClonePass::UpdateSubgraphOutput(const NodePtr &output_node) { /// @return 0: SUCCESS / others: FAILED /// Status MultiBatchClonePass::PruneDirectOutput(const ComputeGraphPtr &graph) { + GELOGD("Start prune direct output."); const auto &func_desc = case_node_->GetOpDesc(); uint32_t unused_num = 0; uint32_t output_num = func_desc->GetOutputsSize(); @@ -710,6 +1104,7 @@ Status MultiBatchClonePass::PruneDirectOutput(const ComputeGraphPtr &graph) { /// Status MultiBatchClonePass::UpdateOutputTensor(uint32_t parent_index, uint32_t unused_num) { if (unused_num == 0) { + GELOGD("No need to update output tensor."); return SUCCESS; } diff --git a/ge/graph/passes/multi_batch_clone_pass.h b/ge/graph/passes/multi_batch_clone_pass.h index ee137b5a..66e92892 100755 --- a/ge/graph/passes/multi_batch_clone_pass.h +++ b/ge/graph/passes/multi_batch_clone_pass.h @@ -36,6 +36,7 @@ class MultiBatchClonePass : public GraphPass { /// @return 0: SUCCESS / others: FAILED /// Status CollectIoNodes(const ComputeGraphPtr &graph); + Status InitParamsOfGetNext(const NodePtr &node); /// /// @ingroup ge @@ -49,10 +50,12 @@ class MultiBatchClonePass : public GraphPass { /// @ingroup ge /// @brief Create index data node for root graph. /// @param [in] const ComputeGraphPtr &graph: Root/Case graph. - /// @param [in] NodePtr node: index data node. + /// @param [in] NodePtr shape_node: index data node, DATA or GETDYNAMICDIMS type. /// @return 0: SUCCESS / others: FAILED /// - Status CreateIndexDataNode(const ComputeGraphPtr &graph, NodePtr &node); + Status CreateIndexDataNode(const ComputeGraphPtr &graph, NodePtr &shape_node); + + Status CreateGetDynamicDimsNode(const ComputeGraphPtr &graph, NodePtr &shape_node); /// /// @ingroup ge @@ -70,6 +73,9 @@ class MultiBatchClonePass : public GraphPass { /// @return 0: SUCCESS / others: FAILED /// Status CreateIndexNode(const ComputeGraphPtr &graph); + Status AddAttrForGetDynamicDims(const NodePtr &shape_node); + Status LinkGetNextToGetDynamicDims(const NodePtr &getnext_node, const NodePtr &shape_node); + Status LinkGetDynamicDimsToNetOutput(const NodePtr &output_node); /// /// @ingroup ge @@ -78,39 +84,54 @@ class MultiBatchClonePass : public GraphPass { /// @return 0: SUCCESS / others: FAILED /// Status CreateInputNode(const ComputeGraphPtr &graph); + Status LinkEdgeForGetNext(const NodePtr &getnext_node, size_t &case_input_index); /// /// @ingroup ge - /// @brief Create Const node for root graph. - /// @param [in] const ComputeGraphPtr &graph: Root/Case graph. + /// @brief Set max shape to Data node in root graph. + /// @param [in] const NodePtr &data: data in Root/Case graph. /// @return 0: SUCCESS / others: FAILED /// - Status CreateConstNode(const ComputeGraphPtr &graph); + Status SetMaxShape(const NodePtr &data); + Status SetMaxShapeToData(const NodePtr &node, size_t out_anchor_index); + /// + /// @ingroup ge + /// @brief Set max shape to Data/GetNext node in root graph. + /// @param [in] const std::vector &shapes: dims of shape. + /// @param [in] const NodePtr &data: data in Root/Case graph. + /// @param [in] GeShape &data_shape: dims of data node. + /// @param [in] size_t out_anchor_index: out anchor index of data node. + /// @return 0: SUCCESS / others: FAILED + /// + Status SetShapeToData(const std::vector &shapes, const NodePtr &data, GeShape &data_shape, + size_t out_anchor_index); + Status UpdateShapeOfShapeNode(const NodePtr &node, size_t out_anchor_index); /// /// @ingroup ge - /// @brief Create output node for root graph. + /// @brief Create Const node for root graph. /// @param [in] const ComputeGraphPtr &graph: Root/Case graph. /// @return 0: SUCCESS / others: FAILED /// - Status CreateOutputNode(const ComputeGraphPtr &graph); + Status CreateConstNode(const ComputeGraphPtr &graph); + void ChangeConstToData(); /// /// @ingroup ge - /// @brief Set max shape to Data node in root graph. - /// @param [in] const NodePtr &data: data in Root/Case graph. + /// @brief Create output node for root graph. + /// @param [in] const ComputeGraphPtr &graph: Root/Case graph. /// @return 0: SUCCESS / others: FAILED /// - Status SetMaxShapeToData(const NodePtr &data); + Status CreateOutputNode(const ComputeGraphPtr &graph); /// /// @ingroup ge /// @brief Update Data node in Subgraph. /// @param [in] const NodePtr &data: data in Subgraph. - /// @param [in] size_t index: The batch index. + /// @param [in] size_t batch_index: The batch index. /// @return 0: SUCCESS / others: FAILED /// - Status UpdateSubgraphData(const NodePtr &data, size_t index); + Status UpdateSubgraphData(const NodePtr &data, size_t batch_index); /// /// @ingroup ge @@ -122,13 +143,12 @@ class MultiBatchClonePass : public GraphPass { /// /// @ingroup ge - /// @brief Set max shape to Data node in root graph. - /// @param [in] const std::vector &shapes: dims of shape. - /// @param [in] const NodePtr &data: data in Root/Case graph. - /// @param [in] GeShape &data_shape: dims of data node. + /// @brief Create nodes for root graph. + /// @param [in] const ComputeGraphPtr &graph: Original graph. /// @return 0: SUCCESS / others: FAILED /// - Status SetShapeToData(const std::vector &shapes, const NodePtr &data, GeShape &data_shape); + Status CreateOriGraph(const ComputeGraphPtr &graph); + NodePtr CreateDataNode(const ComputeGraphPtr &graph, const OutDataAnchorPtr &out_data_anchor, size_t data_index); /// /// @ingroup ge @@ -168,6 +188,10 @@ class MultiBatchClonePass : public GraphPass { std::map>> data_to_dynamic_info_; NodePtr case_node_; + size_t data_count_from_getnext_ = 0; + bool getnext_sink_dynamic_dims_ = false; + NodePtr shape_node_; + std::set out_control_nodes_; }; } // namespace ge #endif // GE_GRAPH_PASSES_MULTI_BATCH_CLONE_PASS_H_ diff --git a/ge/graph/passes/unused_args_clean_pass.cc b/ge/graph/passes/unused_args_clean_pass.cc index 83fd0438..ec66b129 100755 --- a/ge/graph/passes/unused_args_clean_pass.cc +++ b/ge/graph/passes/unused_args_clean_pass.cc @@ -204,6 +204,10 @@ Status UnusedArgsCleanPass::RemoveInputTensor(const mapGetName().c_str(), func_node->GetName().c_str()); + if (out_node->GetInDataNodes().size() == 0 && out_node->GetOutAllNodes().size() == 0) { + GE_CHK_GRAPH_STATUS_RET(out_node->GetOwnerComputeGraph()->RemoveNode(out_node), "Remove node failed: %s", + out_node->GetName().c_str()); + } return SUCCESS; } } // namespace ge \ No newline at end of file diff --git a/ge/graph/preprocess/graph_preprocess.cc b/ge/graph/preprocess/graph_preprocess.cc index 6bb3105c..f94633a1 100644 --- a/ge/graph/preprocess/graph_preprocess.cc +++ b/ge/graph/preprocess/graph_preprocess.cc @@ -37,6 +37,7 @@ #include "graph/passes/addn_pass.h" #include "graph/passes/aicpu_constant_folding_pass.h" #include "graph/passes/assert_pass.h" +#include "ge/ge_api_types.h" #ifdef ONLY_COMPILE_OPEN_SRC #include "graph/passes/assign_remove_pass.h" #endif @@ -899,6 +900,160 @@ Status ProcessNetoutputNodeDynShape(NodePtr &node) { } return SUCCESS; } +long StringToLongNoThrow(const string &str) { + try { + return std::stol(str); + } catch (const std::invalid_argument) { + GELOGE(PARAM_INVALID, + "Parse shape range of input failed when transfer from string to int64. Given %s, while correct example: " + "\"[1~20,3,3~6,-1],[1~20,3,3~6,-1]\"", + str.c_str()); + return PARAM_INVALID; + } catch (const std::out_of_range) { + GELOGE(PARAM_INVALID, + "Parse shape range of input failed when transfer from string to int64. Given %s, while correct example: " + "\"[1~20,3,3~6,-1],[1~20,3,3~6,-1]\"", + str.c_str()); + return PARAM_INVALID; + } +} +/** + * Parser shape_range from string to vector + * shape_range from option normally is "[1~20,3,3~6,-1],[1~20,3,3~6,-1]" + * @param shape_range + */ +Status ParseDynamicInputShapeRange(const std::string &shape_range, + std::vector>> &range) { + if (shape_range.size() < 2) { + GELOGE(PARAM_INVALID, "Shape range %s is invalid.", shape_range.c_str()); + return PARAM_INVALID; + } + // different shape_range of single input are split by ']' + vector shape_range_set = ge::StringUtils::Split(shape_range, ']'); + if (shape_range_set.empty()) { + GELOGE(PARAM_INVALID, "Shape range %s is not valid. Correct example: \"[1~20,3,3~6,-1],[1~20,3,3~6,-1]\"", + shape_range.c_str()); + return PARAM_INVALID; + } + for (auto &shape_range_str : shape_range_set) { + if (shape_range_str.empty()) { + continue; + } + // trim start bytes, after that, single input should be "1~20,3,3~6,-1" + if (ge::StringUtils::StartWith(shape_range_str, "[")) { + shape_range_str = shape_range_str.substr(1, shape_range_str.size()); + } + if (ge::StringUtils::StartWith(shape_range_str, ",")) { + shape_range_str = shape_range_str.substr(2, shape_range_str.size()); + } + + // parse shape_range of single input. eg. "1~20,3,3~6,-1" + std::vector> range_of_single_input; + vector dim_range_set = ge::StringUtils::Split(shape_range_str, ','); + for (const auto &range_pair_str : dim_range_set) { + vector range_pair_set = ge::StringUtils::Split(range_pair_str, '~'); + pair range_pair; + if (range_pair_set.size() == 1) { + // fix dim + auto range_value = StringToLongNoThrow(range_pair_set.at(0).c_str()); + if (range_value < 0) { + range_pair = std::make_pair(0, range_value); + } else { + range_pair = std::make_pair(range_value, range_value); + } + } else if (range_pair_set.size() == 2) { + // unknown dim, should get range. + auto range_left = StringToLongNoThrow(range_pair_set.at(0).c_str()); + auto range_right = StringToLongNoThrow(range_pair_set.at(1).c_str()); + range_pair = std::make_pair(range_left, range_right); + } else { + GELOGE(PARAM_INVALID, + "Shape range of input is invalid. Given %s, while correct example: \"[1~20,3,3~6,-1],[1~20,3,3~6,-1]\"", + shape_range.c_str()); + return PARAM_INVALID; + } + range_of_single_input.emplace_back(range_pair); + } + range.emplace_back(range_of_single_input); + } + return SUCCESS; +} + +Status GetDynamicInputShapeRange(const std::vector &user_input, const std::map &graph_option, + vector>> &range_vec) { + auto mode_iter = graph_option.find(OPTION_EXEC_DYNAMIC_EXECUTE_MODE); + if (mode_iter == graph_option.end()) { + GELOGD("Graph Option: Can not find %s option in graph options.", OPTION_EXEC_DYNAMIC_EXECUTE_MODE); + return SUCCESS; + } + GELOGD("Graph Option: dynamic_input_mode value is %s.", mode_iter->second.c_str()); + if (mode_iter->second != "dynamic_execute") { + return SUCCESS; + } + auto iter = graph_option.find(OPTION_EXEC_DATA_INPUTS_SHAPE_RANGE); + if (iter == graph_option.end()) { + GELOGE(PARAM_INVALID, "Graph option %s is required when %s is dynamic_execute", OPTION_EXEC_DATA_INPUTS_SHAPE_RANGE, + OPTION_EXEC_DYNAMIC_EXECUTE_MODE); + return PARAM_INVALID; + } + GELOGD("GraphOption: dynamic_inputs_shape_range value is %s.", iter->second.c_str()); + auto ret = ParseDynamicInputShapeRange(iter->second, range_vec); + GE_CHK_STATUS_RET(ret, "Parse dynamic input shape range failed."); + if (range_vec.size() != user_input.size()) { + GELOGE(PARAM_INVALID, "Dynamic input shape range size is %zu, inputs size is %zu. Not match.", range_vec.size(), + user_input.size()); + return PARAM_INVALID; + } + return SUCCESS; +} + +Status UpdateDynamicInputShapeRange(const ge::GeAttrValue::INT index, + const vector>> &range_vec, OpDescPtr &op, + GeTensorDesc &desc) { + auto origin_shape = desc.GetShape(); + auto current_shape_range_vec = range_vec.at(index); + if (current_shape_range_vec.size() != origin_shape.GetDimNum()) { + GELOGE(PARAM_INVALID, "Given shape_range dim num is %zu, current dim num is %zu, not match.Pleace Check.", + current_shape_range_vec.size(), origin_shape.GetDimNum()); + return PARAM_INVALID; + } + for (size_t i = 0; i < origin_shape.GetDimNum(); ++i) { + if (current_shape_range_vec.at(i).first == current_shape_range_vec.at(i).second) { + // given shape_range is known dim, check is same as origin or not + if (origin_shape.GetDim(i) != current_shape_range_vec.at(i).first) { + GELOGE(PARAM_INVALID, "Given shape range is %ld, current dim shape is %ld, not match.Pleace Check.", + current_shape_range_vec.at(i).first, origin_shape.GetDim(i)); + return PARAM_INVALID; + } + origin_shape.SetDim(i, current_shape_range_vec.at(i).first); + } else { + origin_shape.SetDim(i, -1); + } + } + desc.SetShape(origin_shape); + desc.SetShapeRange(current_shape_range_vec); + + int64_t dynamic_shape_size = 1; + for (const auto range_pair : range_vec.at(index)) { + FMK_INT64_MULCHECK(dynamic_shape_size, range_pair.second); + dynamic_shape_size *= range_pair.second; + } + auto data_type_size = GetSizeByDataType(desc.GetDataType()); + if (data_type_size < 0) { + GELOGE(PARAM_INVALID, "Input data type is %s, is not supported.", + TypeUtils::DataTypeToSerialString(desc.GetDataType()).c_str()); + return PARAM_INVALID; + } + FMK_INT64_MULCHECK(dynamic_shape_size, data_type_size); + dynamic_shape_size *= data_type_size; + GELOGI("In dynamic_execute mode ,set input %s shape range size %ld", op->GetName().c_str(), dynamic_shape_size); + ge::TensorUtils::SetSize(desc, dynamic_shape_size); + graphStatus graph_ret = op->UpdateInputDesc(0, desc); + GE_CHK_STATUS_RET(graph_ret, "UpdateInputDesc fail, graph ret: %u", graph_ret); + graph_ret = op->UpdateOutputDesc(0, desc); + GE_CHK_STATUS_RET(graph_ret, "UpdateInputDesc fail, graph ret: %u", graph_ret); + return SUCCESS; +} } // namespace GraphPrepare::GraphPrepare() : compute_graph_(nullptr) {} @@ -1103,7 +1258,11 @@ Status GraphPrepare::AdjustDataOpOutput(const NodePtr &node) { return SUCCESS; } -Status GraphPrepare::UpdateInput(const std::vector &user_input) { +Status GraphPrepare::UpdateInput(const std::vector &user_input, const std::map &graph_option) { + // Get shape range of input in dynamic_execute mode + vector>> dynamic_shape_range_vec; + auto ret = GetDynamicInputShapeRange(user_input, graph_option, dynamic_shape_range_vec); + GE_CHK_STATUS_RET(ret, "Graph option is not right on Dynamic execute mode."); compute_graph_->SaveDataFormat(ge::TypeUtils::DomiFormatToFormat(GetLocalOmgContext().format)); for (NodePtr &input_node : compute_graph_->GetDirectNode()) { GE_CHECK_NOTNULL(input_node); @@ -1186,6 +1345,12 @@ Status GraphPrepare::UpdateInput(const std::vector &user_input) { return graph_ret; } + if (!dynamic_shape_range_vec.empty()) { + ret = UpdateDynamicInputShapeRange(index, dynamic_shape_range_vec, op, desc); + GE_CHK_STATUS_RET(ret, "Fail to update dynamic input shape range on %s.", op->GetName().c_str()); + continue; + } + if (!options_.train_graph_flag) { Status ret = AdjustDataOpOutput(input_node); GE_IF_BOOL_EXEC(ret != SUCCESS, GELOGE(ret, "AdjustDataOpOutput fail, ret:%u", ret); return ret); @@ -1359,17 +1524,17 @@ Status GraphPrepare::SaveOriginalGraphToOmModel() { GELOGI("Prepare %s on graph %s success.", name, compute_graph->GetName().c_str()); \ } while (0) -Status GraphPrepare::PrepareDynShape(ConstGraphPtr graph, const std::vector &user_input, +Status GraphPrepare::PrepareDynShape(const GraphNodePtr &graph_node, const std::vector &user_input, ge::ComputeGraphPtr &compute_graph, uint64_t session_id) { - GE_CHECK_NOTNULL(graph); + GE_CHECK_NOTNULL(graph_node->GetGraph()); GE_CHECK_NOTNULL(compute_graph); GetLocalOmgContext().type = static_cast(options_.framework_type); - const Graph &const_graph = *graph; + const Graph &const_graph = *graph_node->GetGraph(); PP_RUN("Init", Init, const_graph, session_id); PP_RUN("SetRtContext", SetRtContext, rtContext_t(), RT_CTX_GEN_MODE); - PP_RUN_AND_DUMP("CheckAndUpdateInput", CheckAndUpdateInput, user_input); + PP_RUN_AND_DUMP("CheckAndUpdateInput", CheckAndUpdateInput, user_input, graph_node->GetOptions()); PP_RUN_AND_DUMP("GraphEquivalentTransformation", GraphEquivalentTransformation); PP_RUN_AND_DUMP("ProcessOutput", ProcessNetOutput); PP_RUN_AND_DUMP("ProcessMultiBatch", multibatch::ProcessMultiBatch, compute_graph_); @@ -1834,7 +1999,7 @@ Status GraphPrepare::ProcessNetOutput() { return SUCCESS; } -Status GraphPrepare::CheckAndUpdateInput(const std::vector &user_input) { +Status GraphPrepare::CheckAndUpdateInput(const std::vector &user_input,const std::map &graph_option) { compute_graph_->SetInputSize(user_input.size()); if (user_input.empty()) { return SUCCESS; @@ -1846,7 +2011,7 @@ Status GraphPrepare::CheckAndUpdateInput(const std::vector &user_input return ret; } - ret = UpdateInput(user_input); + ret = UpdateInput(user_input, graph_option); if (ret != SUCCESS) { GELOGE(ret, "UpdateInput fail, ret:%u", ret); return ret; diff --git a/ge/graph/preprocess/graph_preprocess.h b/ge/graph/preprocess/graph_preprocess.h index a3bbf433..de755418 100755 --- a/ge/graph/preprocess/graph_preprocess.h +++ b/ge/graph/preprocess/graph_preprocess.h @@ -45,7 +45,7 @@ class GraphPrepare { virtual ~GraphPrepare(); GraphPrepare(const GraphPrepare &in) = delete; GraphPrepare &operator=(const GraphPrepare &in) = delete; - Status PrepareDynShape(ConstGraphPtr graph, + Status PrepareDynShape(const GraphNodePtr &graph_node, const std::vector &user_input, ge::ComputeGraphPtr &compute_graph, uint64_t session_id = 0); @@ -63,8 +63,8 @@ class GraphPrepare { Status CheckRefOp(); Status SetRtContext(rtContext_t rt_context, rtCtxMode_t mode); Status AdjustDataOpOutput(const NodePtr &node); - Status UpdateInput(const std::vector &user_input); - Status CheckAndUpdateInput(const std::vector &user_input); + Status UpdateInput(const std::vector &user_input, const std::map &graph_option); + Status CheckAndUpdateInput(const std::vector &user_input, const std::map &graph_option); Status CheckConstOp(); Status VerifyConstOp(const NodePtr &node); Status CheckUserInput(const std::vector &user_input); diff --git a/ge/graph/preprocess/multi_batch_copy_graph.cc b/ge/graph/preprocess/multi_batch_copy_graph.cc index c8880b2e..5506435e 100644 --- a/ge/graph/preprocess/multi_batch_copy_graph.cc +++ b/ge/graph/preprocess/multi_batch_copy_graph.cc @@ -1692,13 +1692,11 @@ Status MultiBatchGraphCopyer::LinkToNodeOutBranch(const NodePtr &node) { } Status ProcessMultiBatch(ComputeGraphPtr &graph) { - if (GetLocalOmgContext().dynamic_node_type.empty()) { - const char *multi_batch_with_switchn = std::getenv("MULTI_BATCH_WITH_SWITCHN"); - if (multi_batch_with_switchn == nullptr) { - PassManager pass_manager; - GE_CHK_STATUS_RET(pass_manager.AddPass("MultiBatchClonePass", new (std::nothrow) MultiBatchClonePass)); - return pass_manager.Run(graph); - } + const char *multi_batch_with_switchn = std::getenv("MULTI_BATCH_WITH_SWITCHN"); + if (multi_batch_with_switchn == nullptr) { + PassManager pass_manager; + GE_CHK_STATUS_RET(pass_manager.AddPass("MultiBatchClonePass", new (std::nothrow) MultiBatchClonePass)); + return pass_manager.Run(graph); } if (!GetLocalOmgContext().need_multi_batch) { GELOGI("No need to process_multi for no_train graph."); diff --git a/ge/graph/preprocess/multi_batch_options.cc b/ge/graph/preprocess/multi_batch_options.cc index c26b08bc..aba2b88d 100644 --- a/ge/graph/preprocess/multi_batch_options.cc +++ b/ge/graph/preprocess/multi_batch_options.cc @@ -99,9 +99,8 @@ Status DistinguishGetNextAndData(ComputeGraphPtr &graph, vector &data_n } GELOGI("Data count is %zu, getnext nosink count is %zu, getnext sink count is %zu.", data_nodes.size(), getnext_nosink_nodes.size(), getnext_sink_nodes.size()); - GE_IF_BOOL_EXEC(!graph->SetExtAttr(kExtAttrDataNodes, data_nodes), GELOGW("Set data nodes attr failed.");) - GE_IF_BOOL_EXEC(!graph->SetExtAttr(kExtAttrGetNextNoSink, getnext_nosink_nodes), - GELOGW("Set getnext nosink nodes attr failed.");) + GetLocalOmgContext().data_nodes = data_nodes; + GetLocalOmgContext().getnext_nosink_nodes = getnext_nosink_nodes; return SUCCESS; } diff --git a/ge/hybrid/executor/hybrid_model_async_executor.cc b/ge/hybrid/executor/hybrid_model_async_executor.cc index ba717a2d..4d23cd55 100644 --- a/ge/hybrid/executor/hybrid_model_async_executor.cc +++ b/ge/hybrid/executor/hybrid_model_async_executor.cc @@ -98,10 +98,10 @@ Status HybridModelAsyncExecutor::Init() { return SUCCESS; } -Status HybridModelAsyncExecutor::PreRun(InputData ¤t_data) { +Status HybridModelAsyncExecutor::PreRun(InputData ¤t_data, HybridModelExecutor::ExecuteArgs &args) { GE_CHK_STATUS_RET(SyncVarData(), "Failed to sync var data"); RECORD_MODEL_EXECUTION_EVENT(executor_->GetContext(), "[SyncVarData] End"); - GE_CHK_STATUS_RET(CopyInputData(current_data), "Failed to copy input data to model"); + GE_CHK_STATUS_RET(PrepareInputs(current_data, args), "Failed to copy input data to model"); RECORD_MODEL_EXECUTION_EVENT(executor_->GetContext(), "[CopyInputData] End"); return SUCCESS; } @@ -126,14 +126,9 @@ Status HybridModelAsyncExecutor::RunInternal() { InputData current_data = data_wrapper->GetInput(); GELOGI("Model thread Run begin, model id:%u, data index:%u.", model_id_, current_data.index); - HybridModelExecutor::ExecuteArgs args; - args.inputs.resize(input_tensors_.size()); - for (auto &it : input_tensors_) { - args.inputs[it.first] = it.second; - } - RECORD_MODEL_EXECUTION_EVENT(executor_->GetContext(), "[RunInternal] [iteration = %d] Start", iterator_count_); - ret = PreRun(current_data); + HybridModelExecutor::ExecuteArgs args; + ret = PreRun(current_data, args); GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( ret != SUCCESS, (void) HandleResult(ret, current_data.index, args, data_wrapper->GetOutput()); CsaInteract::GetInstance().StoreInternalErrorCode(ret, ERROR_MODULE_FMK, JOBSUBSTATE_GRAPH_EXEC); @@ -202,7 +197,9 @@ Status HybridModelAsyncExecutor::SyncVarData() { return SUCCESS; } -Status HybridModelAsyncExecutor::CopyInputData(const InputData ¤t_data) { +Status HybridModelAsyncExecutor::PrepareInputs(const InputData ¤t_data, HybridModelExecutor::ExecuteArgs &args) { + args.inputs.resize(input_tensors_.size()); + args.input_desc.resize(input_tensor_desc_.size()); const std::vector &blobs = current_data.blobs; for (const auto &it : input_tensors_) { auto input_index = it.first; @@ -230,6 +227,13 @@ Status HybridModelAsyncExecutor::CopyInputData(const InputData ¤t_data) { data_buf.data, data_buf.length, RT_MEMCPY_HOST_TO_DEVICE)); + args.inputs[input_index] = input_tensor; + if (is_input_dynamic_[input_index]) { + auto &tensor_desc = input_tensor_desc_[input_index]; + tensor_desc->SetShape(GeShape(current_data.shapes[input_index])); + args.input_desc[input_index] = tensor_desc; + GELOGD("Update shape of input[%u] to [%s]", input_index, tensor_desc->MutableShape().ToString().c_str()); + } } return SUCCESS; @@ -240,7 +244,10 @@ Status HybridModelAsyncExecutor::InitInputTensors() { GE_CHECK_NOTNULL(allocator); int input_index = 0; for (const auto &input_node : model_->GetRootGraphItem()->GetInputNodes()) { - GELOGD("Init input[%u], node = %s", input_index, input_node->NodeName().c_str()); + GELOGD("Init input[%u], node = %s, is_dynamic = %d", + input_index, + input_node->NodeName().c_str(), + input_node->is_dynamic); auto output_desc = input_node->MutableOutputDesc(kDataOutputIndex); GE_CHECK_NOTNULL(output_desc); int64_t tensor_size = 0; @@ -258,6 +265,8 @@ Status HybridModelAsyncExecutor::InitInputTensors() { TensorValue tensor(shared_ptr(buffer.release())); tensor.SetName("Input_" + input_node->NodeName()); input_tensors_.emplace(input_index, tensor); + input_tensor_desc_.emplace(input_index, output_desc); + is_input_dynamic_.push_back(input_node->is_dynamic); input_index += 1; } @@ -402,18 +411,12 @@ Status HybridModelAsyncExecutor::Execute(const vector &inputs, vector< buffer.data = const_cast(tensor.GetData().GetData()); buffer.length = tensor.GetData().size(); input_data.blobs.emplace_back(buffer); + input_data.shapes.emplace_back(tensor.GetTensorDesc().GetShape().GetDims()); } - GE_CHK_STATUS_RET(CopyInputData(input_data), "Failed to copy input data to model"); - GELOGD("Done copying input data successfully."); HybridModelExecutor::ExecuteArgs args; - args.inputs.resize(input_tensors_.size()); - args.input_desc.resize(input_tensors_.size()); - for (auto &it : input_tensors_) { - args.inputs[it.first] = it.second; - args.input_desc[it.first] = MakeShared(inputs[it.first].GetTensorDesc()); - } - + GE_CHK_STATUS_RET(PrepareInputs(input_data, args), "Failed to copy input data to model"); + GELOGD("Done copying input data successfully."); GE_CHK_STATUS_RET(executor_->Execute(args), "Failed to execute model."); std::vector output_tensor_info_list; diff --git a/ge/hybrid/executor/hybrid_model_async_executor.h b/ge/hybrid/executor/hybrid_model_async_executor.h index 21833b0b..ad39cac5 100644 --- a/ge/hybrid/executor/hybrid_model_async_executor.h +++ b/ge/hybrid/executor/hybrid_model_async_executor.h @@ -70,9 +70,9 @@ class HybridModelAsyncExecutor { Status OnComputeDone(uint32_t data_index, uint32_t result_code, std::vector &outputs); - Status PreRun(InputData ¤t_data); + Status PreRun(InputData ¤t_data, HybridModelExecutor::ExecuteArgs &args); - Status CopyInputData(const InputData ¤t_data); + Status PrepareInputs(const InputData ¤t_data, HybridModelExecutor::ExecuteArgs &args); std::mutex mu_; HybridModel *model_; @@ -86,6 +86,8 @@ class HybridModelAsyncExecutor { rtStream_t stream_ = nullptr; std::map input_tensors_; + std::map input_tensor_desc_; + std::vector is_input_dynamic_; std::shared_ptr listener_; }; } // namespace hybrid diff --git a/ge/hybrid/executor/worker/execution_engine.cc b/ge/hybrid/executor/worker/execution_engine.cc index 21dd8e4b..e9c6ef29 100755 --- a/ge/hybrid/executor/worker/execution_engine.cc +++ b/ge/hybrid/executor/worker/execution_engine.cc @@ -221,6 +221,8 @@ Status NodeDoneCallback::GetGraphDescInfo(const NodePtr node, const HybridModel tmp_compute_graph_info.output_shape.emplace_back(output_desc.GetShape().GetDims()); tmp_compute_graph_info.output_data_type.emplace_back(output_desc.GetDataType()); } + tmp_compute_graph_info.task_id = context_->GetTaskId(); + tmp_compute_graph_info.stream_id = context_->GetStreamId(); compute_graph_info.emplace_back(tmp_compute_graph_info); GELOGD("GetComputeGraphInfo of node [%s] end.", node->GetName().c_str()); } diff --git a/ge/hybrid/model/hybrid_model_builder.cc b/ge/hybrid/model/hybrid_model_builder.cc index 46c9c39b..32fc495a 100755 --- a/ge/hybrid/model/hybrid_model_builder.cc +++ b/ge/hybrid/model/hybrid_model_builder.cc @@ -35,11 +35,22 @@ namespace ge { namespace hybrid { +using domi::LogTimeStampDef; +using domi::TaskDef; namespace { const uint32_t kSubgraphIndex = 0U; const uint32_t kVarOutputIndex = 0U; +const uint64_t kProfilingFpStartLogid = 1U; +const uint64_t kProfilingBpEndLogid = 2U; +const uint64_t kProfilingIterEndLogid = 65535U; const int kBytes = 8; const char *const kOwnerGraphIsUnknown = "OwnerGraphIsUnknown"; +const char *const kProfilingGraph = "ProfilingGraph"; +const char *const kProfilingFpNode = "ProfilingFpNode"; +const char *const kProfilingBpNode = "ProfilingBpNode"; +const char *const kProfilingEndNode = "ProfilingEndNode"; +const char *const kProfilingArNode = "ProfilingAllReduceNode"; +const char *const kEngineNameRts = "DNN_VM_RTS_OP_STORE"; Status SetOutputNameAttr(ComputeGraph &graph) { vector output_names; @@ -1531,6 +1542,188 @@ Status HybridModelBuilder::RecoverGraphUnknownFlag() { return SUCCESS; } +Status HybridModelBuilder::GenerateFpProfilingTask(const OpDescPtr &op_desc, vector &task_def_list) { + uint64_t jobid_log_id = ge::GetContext().TraceId(); + GELOGD("The first FP operator is %s,, job_id %lu", op_desc->GetName().c_str(), jobid_log_id); + + TaskDef job_task_def; + job_task_def.set_type(RT_MODEL_TASK_PROFILER_TRACE); + job_task_def.set_stream_id(op_desc->GetStreamId()); + LogTimeStampDef *job_log_def = job_task_def.mutable_log_timestamp(); + if (job_log_def != nullptr) { + job_log_def->set_logid(jobid_log_id); + job_log_def->set_notify(false); + } + task_def_list.emplace_back(job_task_def); + TaskDef fp_task_def; + fp_task_def.set_type(RT_MODEL_TASK_PROFILER_TRACE); + fp_task_def.set_stream_id(op_desc->GetStreamId()); + LogTimeStampDef *fp_log_def = fp_task_def.mutable_log_timestamp(); + if (fp_log_def != nullptr) { + fp_log_def->set_logid(kProfilingFpStartLogid); + fp_log_def->set_notify(false); + } + task_def_list.emplace_back(fp_task_def); + + return SUCCESS; +} + +Status HybridModelBuilder::GenerateArProfilingTask(const OpDescPtr &op_desc, int64_t log_id, + vector &task_def_list) { + TaskDef ar_task_def; + ar_task_def.set_type(RT_MODEL_TASK_PROFILER_TRACE); + ar_task_def.set_stream_id(op_desc->GetStreamId()); + LogTimeStampDef *ar_log_def = ar_task_def.mutable_log_timestamp(); + if (ar_log_def != nullptr) { + ar_log_def->set_logid(log_id); + ar_log_def->set_notify(false); + } + task_def_list.emplace_back(ar_task_def); + + return SUCCESS; +} + +Status HybridModelBuilder::GenerateBpProfilingTask(const OpDescPtr &op_desc, vector &task_def_list) { + TaskDef bp_task_def; + bp_task_def.set_type(RT_MODEL_TASK_PROFILER_TRACE); + bp_task_def.set_stream_id(op_desc->GetStreamId()); + LogTimeStampDef *bp_log_def = bp_task_def.mutable_log_timestamp(); + GE_CHECK_NOTNULL(bp_log_def); + bp_log_def->set_logid(kProfilingBpEndLogid); + bp_log_def->set_notify(false); + task_def_list.emplace_back(bp_task_def); + + return SUCCESS; +} + +Status HybridModelBuilder::GenerateEndProfilingTask(const OpDescPtr &op_desc, vector &task_def_list) { + TaskDef end_task_def; + end_task_def.set_type(RT_MODEL_TASK_PROFILER_TRACE); + end_task_def.set_stream_id(op_desc->GetStreamId()); + LogTimeStampDef *end_log_def = end_task_def.mutable_log_timestamp(); + GE_CHECK_NOTNULL(end_log_def); + end_log_def->set_logid(kProfilingIterEndLogid); + end_log_def->set_notify(true); + task_def_list.emplace_back(end_task_def); + + return SUCCESS; +} + +Status HybridModelBuilder::CreateProfilingNodeBefore(GraphItem &graph_item, const NodePtr &node) { + GE_CHECK_NOTNULL(node); + const OpDescPtr &op_desc = node->GetOpDesc(); + GE_CHECK_NOTNULL(op_desc); + const auto &compute_graph = MakeShared(kProfilingGraph); + GE_CHECK_NOTNULL(compute_graph); + + NodePtr node_ptr = nullptr; + vector task_def_list; + // create fp node + bool is_insert_fp_profiling_task = false; + (void)ge::AttrUtils::GetBool(op_desc, ATTR_NAME_INSERT_FP_PROFILILNG_TASK, is_insert_fp_profiling_task); + if (is_insert_fp_profiling_task) { + (void)GenerateFpProfilingTask(op_desc, task_def_list); + auto fp_desc = MakeShared(kProfilingFpNode, PROFILINGTRAININGTRACE); + GE_CHECK_NOTNULL(fp_desc); + fp_desc->SetOpKernelLibName(kEngineNameRts); + node_ptr = compute_graph->AddNode(fp_desc); + GELOGD("Create fp profiling node success before."); + } + // creat all reduce start node + bool is_insert_bp_profiling_task = false; + (void)ge::AttrUtils::GetBool(op_desc, ATTR_NAME_INSERT_BP_PROFILILNG_TASK, is_insert_bp_profiling_task); + bool is_all_reduce = (op_desc->GetType() == HCOMALLREDUCE || op_desc->GetType() == HVDCALLBACKALLREDUCE); + if (is_all_reduce && is_insert_bp_profiling_task) { + int64_t log_id = 0; + (void)ge::AttrUtils::GetInt(op_desc, ATTR_NAME_INSERT_PROFILILNG_TASK_LOG_ID, log_id); + GELOGD("All reduce node profiling task log id: %ld before", log_id); + (void) GenerateArProfilingTask(op_desc, log_id, task_def_list); + string op_name = string(kProfilingArNode) + std::to_string(log_id); + auto ar_desc_start = MakeShared(op_name, PROFILINGTRAININGTRACE); + GE_CHECK_NOTNULL(ar_desc_start); + ar_desc_start->SetOpKernelLibName(kEngineNameRts); + node_ptr = compute_graph->AddNode(ar_desc_start); + GELOGD("Create all reduce start profiling node success before."); + } + + if (node_ptr != nullptr) { + for (const auto &task_def : task_def_list) { + hybrid_model_.task_defs_[node_ptr].emplace_back(task_def); + } + NodeItem *node_item = nullptr; + GE_CHK_STATUS_RET_NOLOG(GetOrCreateNodeItem(node_ptr, &node_item)); + node_item->input_start = 0; + node_item->output_start = 0; + graph_item.node_items_.emplace_back(node_item); + } else { + GELOGD("No need to create profiling node before."); + } + + return SUCCESS; +} + +Status HybridModelBuilder::CreateProfilingNodeAfter(GraphItem &graph_item, const NodePtr &node) { + GE_CHECK_NOTNULL(node); + const OpDescPtr &op_desc = node->GetOpDesc(); + GE_CHECK_NOTNULL(op_desc); + const auto &compute_graph = MakeShared(kProfilingGraph); + GE_CHECK_NOTNULL(compute_graph); + + NodePtr node_ptr = nullptr; + vector task_def_list; + // Create all reduce end node + bool is_insert_bp_profiling_task = false; + (void)ge::AttrUtils::GetBool(op_desc, ATTR_NAME_INSERT_BP_PROFILILNG_TASK, is_insert_bp_profiling_task); + bool is_all_reduce = (op_desc->GetType() == HCOMALLREDUCE || op_desc->GetType() == HVDCALLBACKALLREDUCE); + if (is_all_reduce && is_insert_bp_profiling_task) { + int64_t log_id = 0; + (void)ge::AttrUtils::GetInt(op_desc, ATTR_NAME_INSERT_PROFILILNG_TASK_LOG_ID, log_id); + GELOGD("All reduce node profiling task log id: %ld after", log_id); + (void) GenerateArProfilingTask(op_desc, log_id + 1, task_def_list); + string op_name = string(kProfilingArNode) + std::to_string(log_id + 1); + auto ar_desc_end = MakeShared(op_name, PROFILINGTRAININGTRACE); + GE_CHECK_NOTNULL(ar_desc_end); + ar_desc_end->SetOpKernelLibName(kEngineNameRts); + node_ptr = compute_graph->AddNode(ar_desc_end); + GELOGD("Create all reduce end profiling node success after."); + } + // create bp node + if (!is_all_reduce && is_insert_bp_profiling_task) { + (void) GenerateBpProfilingTask(op_desc, task_def_list); + auto bp_op_desc = MakeShared(kProfilingBpNode, PROFILINGTRAININGTRACE); + GE_CHECK_NOTNULL(bp_op_desc); + bp_op_desc->SetOpKernelLibName(kEngineNameRts); + node_ptr = compute_graph->AddNode(bp_op_desc); + GELOGD("Create bp profiling node success after."); + } + // create end node + bool is_insert_end_profiling_task = false; + (void)ge::AttrUtils::GetBool(op_desc, ATTR_NAME_INSERT_END_PROFILILNG_TASK, is_insert_end_profiling_task); + if (is_insert_end_profiling_task) { + (void)GenerateEndProfilingTask(op_desc, task_def_list); + auto end_desc = MakeShared(kProfilingEndNode, PROFILINGTRAININGTRACE); + GE_CHECK_NOTNULL(end_desc); + end_desc->SetOpKernelLibName(kEngineNameRts); + node_ptr = compute_graph->AddNode(end_desc); + GELOGD("Create end profiling node success after."); + } + + if (node_ptr != nullptr) { + for (const auto &task_def : task_def_list) { + hybrid_model_.task_defs_[node_ptr].emplace_back(task_def); + } + NodeItem *node_item = nullptr; + GE_CHK_STATUS_RET_NOLOG(GetOrCreateNodeItem(node_ptr, &node_item)); + node_item->input_start = 0; + node_item->output_start = 0; + graph_item.node_items_.emplace_back(node_item); + } else { + GELOGD("No need to create profiling node after."); + } + + return SUCCESS; +} + Status HybridModelBuilder::LoadDynamicSubgraph(ComputeGraph &graph, bool is_root_graph) { GELOGD("Start to load subgraph [%s]", graph.GetName().c_str()); // for known partitioned call, load all nodes @@ -1567,8 +1760,9 @@ Status HybridModelBuilder::LoadDynamicSubgraph(ComputeGraph &graph, bool is_root graph_item->output_node_ = node_item; GE_CHK_STATUS_RET_NOLOG(BuildOutputMapping(*graph_item, *node_item, is_root_graph)); } - + GE_CHK_STATUS_RET_NOLOG(CreateProfilingNodeBefore(*graph_item, node)); graph_item->node_items_.emplace_back(node_item); + GE_CHK_STATUS_RET_NOLOG(CreateProfilingNodeAfter(*graph_item, node)); // parse var outputs GE_CHK_STATUS_RET_NOLOG(ParseVarOutputs(*node_item)); GELOGD("NodeItem created: %s", node_item->DebugString().c_str()); diff --git a/ge/hybrid/model/hybrid_model_builder.h b/ge/hybrid/model/hybrid_model_builder.h index a11faae2..55a19b6c 100644 --- a/ge/hybrid/model/hybrid_model_builder.h +++ b/ge/hybrid/model/hybrid_model_builder.h @@ -79,6 +79,12 @@ class HybridModelBuilder { Status LoadKnownShapedSubgraph(ComputeGraph &graph, NodeItem *parent_node_item); Status RecoverGraphUnknownFlag(); Status CheckAicpuOpList(); + Status CreateProfilingNodeBefore(GraphItem &graph_item, const NodePtr &node); + Status CreateProfilingNodeAfter(GraphItem &graph_item, const NodePtr &node); + Status GenerateFpProfilingTask(const OpDescPtr &op_desc, vector &task_def_list); + Status GenerateBpProfilingTask(const OpDescPtr &op_desc, vector &task_def_list); + Status GenerateEndProfilingTask(const OpDescPtr &op_desc, vector &task_def_list); + Status GenerateArProfilingTask(const OpDescPtr &op_desc, int64_t log_id, vector &task_def_list); const char* GetGraphName() const { return hybrid_model_.model_name_.c_str(); diff --git a/ge/hybrid/node_executor/rts/rts_node_executor.cc b/ge/hybrid/node_executor/rts/rts_node_executor.cc index 18b875fd..90b623e0 100644 --- a/ge/hybrid/node_executor/rts/rts_node_executor.cc +++ b/ge/hybrid/node_executor/rts/rts_node_executor.cc @@ -18,6 +18,7 @@ #include "common/debug/log.h" #include "common/ge/ge_util.h" #include "graph/utils/tensor_utils.h" +#include "hybrid/model/hybrid_model.h" #include "runtime/rt.h" namespace ge { @@ -79,12 +80,44 @@ Status IdentityNNodeTask::ExecuteAsync(TaskContext &context, std::function done_callback) { + for (const auto &task_def : task_defs_) { + auto log_time_stamp_def = task_def.log_timestamp(); + uint64_t log_id = log_time_stamp_def.logid(); + bool notify = log_time_stamp_def.notify(); + uint32_t flat = log_time_stamp_def.flat(); + + GELOGD("ProfilingTraceTask execute async start. logid = %lu, notify = %d.", log_id, notify); + rtError_t rt_ret = rtProfilerTrace(log_id, notify, flat, context.GetStream()); + if (rt_ret != RT_ERROR_NONE) { + GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); + return RT_ERROR_TO_GE_STATUS(rt_ret); + } + GELOGD("[%s] ProfilingTraceTask[%lu] execute success.", context.GetNodeName(), log_id); + } + + return SUCCESS; +}; + Status RtsNodeExecutor::LoadTask(const HybridModel &model, const NodePtr &node, shared_ptr &task) const { + GE_CHECK_NOTNULL(node); + auto op_type = node->GetType(); if (op_type == IDENTITY) { task = MakeShared(); } else if (op_type == IDENTITYN) { task = MakeShared(); + } else if (op_type == PROFILINGTRAININGTRACE) { + auto *task_defs = model.GetTaskDefs(node); + if (task_defs == nullptr || task_defs->empty()) { + GELOGE(INTERNAL_ERROR, "Profiling node has no task to execute."); + return INTERNAL_ERROR; + } + task = MakeShared(*task_defs); } else { GELOGE(INTERNAL_ERROR, "[%s] Unsupported RTS op type: %s", node->GetName().c_str(), op_type.c_str()); return INTERNAL_ERROR; diff --git a/ge/hybrid/node_executor/rts/rts_node_executor.h b/ge/hybrid/node_executor/rts/rts_node_executor.h index 2576b73b..df487d6c 100644 --- a/ge/hybrid/node_executor/rts/rts_node_executor.h +++ b/ge/hybrid/node_executor/rts/rts_node_executor.h @@ -18,6 +18,7 @@ #define GE_HYBRID_NODE_EXECUTOR_RTS_RTS_NODE_EXECUTOR_H_ #include "hybrid/node_executor/node_executor.h" +#include "proto/task.pb.h" namespace ge { namespace hybrid { @@ -35,6 +36,18 @@ class IdentityNNodeTask : public IdentityNodeTask { Status ExecuteAsync(TaskContext &context, std::function done_callback) override; }; +class ProfilingTraceNodeTask : public NodeTask { + public: + explicit ProfilingTraceNodeTask(const std::vector &task_defs) : task_defs_(task_defs) {} + ~ProfilingTraceNodeTask() override = default; + + Status UpdateArgs(TaskContext &context) override; + Status ExecuteAsync(TaskContext &context, std::function done_callback) override; + + private: + std::vector task_defs_; +}; + class RtsNodeExecutor : public NodeExecutor { public: Status LoadTask(const HybridModel &model, const NodePtr &node, shared_ptr &task) const override; diff --git a/ge/hybrid/node_executor/task_context.h b/ge/hybrid/node_executor/task_context.h index 0e85a8e3..8ba4fb90 100644 --- a/ge/hybrid/node_executor/task_context.h +++ b/ge/hybrid/node_executor/task_context.h @@ -123,7 +123,7 @@ class TaskContext { Status status_ = SUCCESS; std::vector workspaces_; uint64_t iteration_ = 0; - uint32_t task_id_= 0; + uint32_t task_id_ = 0; uint32_t stream_id_ = 0; }; } // namespace hybrid diff --git a/ge/ir_build/ge_ir_build.cc b/ge/ir_build/ge_ir_build.cc index c7ef6c1a..78a69392 100644 --- a/ge/ir_build/ge_ir_build.cc +++ b/ge/ir_build/ge_ir_build.cc @@ -36,6 +36,9 @@ #include "model/ge_model.h" #include "graph/shape_refiner.h" #include "graph/opsproto_manager.h" +#include "inc/pass_manager.h" +#include "graph/passes/net_output_pass.h" +#include "graph/passes/data_pass.h" using std::string; using namespace std; @@ -233,6 +236,7 @@ class Impl { ModelBufferData &ge_models); graphStatus InitDomiOmgContext(const string &input_shape, const string &input_format, const string &net_format, bool is_dynamic_input); + static graphStatus InferShapePrepare(const ComputeGraphPtr &compute_graph); void SetRtSocVersion(); void UpdateThreadContext(); void LoadOpsProto(); @@ -243,6 +247,22 @@ class Impl { OmgContext omg_context_; }; +graphStatus Impl::InferShapePrepare(const ComputeGraphPtr &compute_graph) { + GE_CHECK_NOTNULL(compute_graph); + + PassManager prepare_infershape; + prepare_infershape.AddPass("PrepareNetoutput", new(std::nothrow) NetOutputPass); + prepare_infershape.AddPass("PrepareSubGraphReflection", new (std::nothrow) DataPass); + + auto ret = prepare_infershape.Run(compute_graph); + if ((ret != SUCCESS) && (ret != NOT_CHANGED)) { + GELOGE(ret, "Prepair for infershape failed, ret:%d", ret); + return ret; + } + GELOGD("Prepair for infershape success!"); + return GRAPH_SUCCESS; +} + graphStatus Impl::UpdateDataOpAttr(const Graph &graph) { GELOGD("Enter Update Data Attr Process!"); if (options_.find(kInputShape) == options_.end()) { @@ -591,7 +611,12 @@ graphStatus aclgrphInferShapeAndType(ge::Graph &graph) { return GRAPH_PARAM_INVALID; } - auto ret = compute_graph->TopologicalSorting(); + auto ret = Impl::InferShapePrepare(compute_graph); + if (ret != GRAPH_SUCCESS) { + return ret; + } + + ret = compute_graph->TopologicalSorting(); if (ret != GRAPH_SUCCESS) { GELOGE(ret, "Acl topo logical sort failed."); return ret; diff --git a/ge/offline/keep_dtype_option.cc b/ge/offline/keep_dtype_option.cc index 348a6068..5624f21c 100644 --- a/ge/offline/keep_dtype_option.cc +++ b/ge/offline/keep_dtype_option.cc @@ -42,21 +42,29 @@ bool IsOriginalOpFind(OpDescPtr &op_desc, const std::string &op_name) { } void KeepDtypeReportError(const std::vector &invalid_list) { - std::stringstream error_ops; - for (size_t i = 0; i < invalid_list.size(); i++) { + std::stringstream err_msg; + size_t list_size = invalid_list.size(); + err_msg << "config file contains " << list_size; + if (list_size == 1) { + err_msg << " operator not in the graph, op name:"; + } else { + err_msg << " operators not in the graph, op names:"; + } + + for (size_t i = 0; i < list_size; i++) { if (i == kMaxOpsNum) { - error_ops << "..."; + err_msg << ".."; break; } - error_ops << invalid_list[i] << " "; + err_msg << invalid_list[i]; + if (i != list_size - 1) { + err_msg << " "; + } } - std::string err_msg = "config file contains "; - err_msg = err_msg.append(std::to_string(invalid_list.size())) - .append(" operators not in the graph, op names:") - .append(error_ops.str()); + ErrorManager::GetInstance().ATCReportErrMessage( - "E10042", {"parameter", "reason"}, {"keep_dtype", err_msg.c_str()}); - GELOGE(FAILED, "%s", err_msg.c_str()); + "E10042", {"parameter", "reason"}, {"keep_dtype", err_msg.str().c_str()}); + GELOGE(FAILED, "%s", err_msg.str().c_str()); } Status DealKeepDtypeOption(const ComputeGraphPtr &graph, const std::string &keep_dtype) { @@ -96,6 +104,7 @@ Status DealKeepDtypeOption(const ComputeGraphPtr &graph, const std::string &keep invalid_list.push_back(op_name); } } + ifs.close(); if (!invalid_list.empty()) { KeepDtypeReportError(invalid_list); diff --git a/ge/offline/main.cc b/ge/offline/main.cc index ed67b913..14f7ae89 100755 --- a/ge/offline/main.cc +++ b/ge/offline/main.cc @@ -994,6 +994,8 @@ domi::Status GenerateModel(std::map &options, std::string output Status ret = ge::DealKeepDtypeOption(ge::GraphUtils::GetComputeGraph(graph), FLAGS_keep_dtype); if (ret != SUCCESS) { + (void)ge_generator.Finalize(); + (void)ge::GELib::GetInstance()->Finalize(); return ret; } diff --git a/inc/external/ge/ge_api_types.h b/inc/external/ge/ge_api_types.h index d0f2105f..250252f9 100644 --- a/inc/external/ge/ge_api_types.h +++ b/inc/external/ge/ge_api_types.h @@ -61,6 +61,11 @@ const char *const OPTION_EXEC_HCCL_FLAG = "ge.exec.hcclFlag"; const char *const OPTION_EXEC_ATOMIC_FLAG = "ge.exec.enable_atomic"; const char *const OPTION_EXEC_DISABLE_REUSED_MEMORY = "ge.exec.disableReuseMemory"; const char *const OPTION_EXEC_ENABLE_TAILING_OPTIMIZATION = "ge.exec.isTailingOptimization"; +// Dynamic input flag. ge.exec.dynamicInput=1, means enable dynaimc input, +// ge.exec.dynamicGraphExecuteMode, dynamic_execute[default] +const char *const OPTION_EXEC_DYNAMIC_INPUT = "ge.exec.dynamicInput"; +const char *const OPTION_EXEC_DYNAMIC_EXECUTE_MODE = "ge.exec.dynamicGraphExecuteMode"; +const char *const OPTION_EXEC_DATA_INPUTS_SHAPE_RANGE = "ge.exec.dataInputsShapeRange"; // Option key: memory init const char *const GRAPH_MEMORY_MAX_SIZE = "ge.graphMemoryMaxSize"; diff --git a/inc/framework/common/ge_types.h b/inc/framework/common/ge_types.h index 4267aec4..d845654e 100644 --- a/inc/framework/common/ge_types.h +++ b/inc/framework/common/ge_types.h @@ -73,14 +73,15 @@ struct DataBuffer { /// @brief External input data /// struct InputData { - uint32_t index; // Index of input data - uint32_t timestamp; // Data creation time - uint32_t timeout; // Processing timeout - uint32_t model_id; // Model ID required for data processing - uint64_t request_id = 0; // Request ID - std::vector blobs; // Actual input data, currently only supports one input - bool is_dynamic_batch = false; // Whether is dynamic batch size scene, default:false - std::string batch_label; // Gear used for current inference in dynamic batch scene + uint32_t index; // Index of input data + uint32_t timestamp; // Data creation time + uint32_t timeout; // Processing timeout + uint32_t model_id; // Model ID required for data processing + uint64_t request_id = 0; // Request ID + std::vector blobs; // Actual input data, currently only supports one input + bool is_dynamic_batch = false; // Whether is dynamic batch size scene, default:false + std::string batch_label; // Gear used for current inference in dynamic batch scene + std::vector> shapes; // Input shapes }; /// Output result structure definition @@ -263,6 +264,8 @@ struct ComputeGraphDescInfo { std::vector output_format; std::vector> output_shape; std::vector output_data_type; + uint32_t task_id; + uint32_t stream_id; }; struct OpDescInfo { diff --git a/inc/framework/common/types.h b/inc/framework/common/types.h index 99c2ea03..e3baa816 100644 --- a/inc/framework/common/types.h +++ b/inc/framework/common/types.h @@ -529,6 +529,9 @@ REGISTER_OPTYPE_DECLARE(HVDWAIT, "HorovodWait"); // aicpu op for online_infer dynamic_dims REGISTER_OPTYPE_DECLARE(GETDYNAMICDIMS, "GetDynamicDims"); +// profiling training trace node +REGISTER_OPTYPE_DECLARE(PROFILINGTRAININGTRACE, "ProfilingTrainingTrace"); + enum InputMode { INPUT = 0, CONST_INPUT }; // Definition of the processing status enum of the process module diff --git a/inc/framework/executor/ge_executor.h b/inc/framework/executor/ge_executor.h index 1b78860d..3136e172 100644 --- a/inc/framework/executor/ge_executor.h +++ b/inc/framework/executor/ge_executor.h @@ -157,9 +157,6 @@ class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY GeExecutor { ge::Status GetAippType(uint32_t model_id, uint32_t index, InputAippType &type, size_t &aipp_index); - ge::Status GetModelDescInfoForZeroCopy(uint32_t model_id, std::vector &input_desc, - std::vector &output_desc); - ge::Status CommandHandle(const ge::Command &command); ge::Status SetDump(const DumpConfig &dump_config); diff --git a/inc/framework/omg/omg_inner_types.h b/inc/framework/omg/omg_inner_types.h index dab79053..1049b6b5 100644 --- a/inc/framework/omg/omg_inner_types.h +++ b/inc/framework/omg/omg_inner_types.h @@ -26,6 +26,7 @@ #include #include "framework/common/fmk_error_codes.h" #include "register/register_fmk_types.h" +#include "graph/node.h" using domi::DOMI_TENSOR_ND; using domi::DOMI_TENSOR_RESERVED; @@ -120,6 +121,8 @@ struct OmgContext { std::vector> user_real_input_dims; std::vector cur_dynamic_dims; bool need_multi_batch = false; + std::vector data_nodes; + std::vector getnext_nosink_nodes; }; } // namespace ge diff --git a/metadef b/metadef index 11c6cf29..fe37bc34 160000 --- a/metadef +++ b/metadef @@ -1 +1 @@ -Subproject commit 11c6cf2921b6a385616a3ebc601b4431b55b07db +Subproject commit fe37bc343ea52c76d35e9e9ec83cea0151bfa900 diff --git a/parser b/parser index 99437c39..336cd310 160000 --- a/parser +++ b/parser @@ -1 +1 @@ -Subproject commit 99437c39d26624a14060307366a96b79b1d439c3 +Subproject commit 336cd3107253d3fe41cfb9fec2db62b5f3d8a33b diff --git a/tests/ut/ge/CMakeLists.txt b/tests/ut/ge/CMakeLists.txt index 1f6c6837..db725dfb 100755 --- a/tests/ut/ge/CMakeLists.txt +++ b/tests/ut/ge/CMakeLists.txt @@ -121,6 +121,7 @@ set(COMMON_SRC_FILES "${GE_CODE_DIR}/metadef/graph/opsproto/opsproto_manager.cc" "${GE_CODE_DIR}/metadef/ops/op_imp.cpp" "${GE_CODE_DIR}/metadef/register/register.cpp" + "${GE_CODE_DIR}/metadef/register/register_pass.cpp" "${GE_CODE_DIR}/metadef/register/op_kernel_registry.cpp" "${GE_CODE_DIR}/metadef/register/auto_mapping_util.cpp" "${GE_CODE_DIR}/metadef/register/tensor_assign.cpp" @@ -626,6 +627,7 @@ set(PASS_TEST_FILES "graph/passes/net_output_pass_unittest.cc" "graph/passes/no_use_reshape_remove_pass_unittest.cc" "graph/passes/infershape_pass_unittest.cc" + "graph/passes/multi_batch_clone_pass_unittest.cc" ) set(KERNEL_TEST_FILES diff --git a/tests/ut/ge/graph/load/davinci_model_unittest.cc b/tests/ut/ge/graph/load/davinci_model_unittest.cc index a9efab3d..fe7c70c9 100644 --- a/tests/ut/ge/graph/load/davinci_model_unittest.cc +++ b/tests/ut/ge/graph/load/davinci_model_unittest.cc @@ -32,6 +32,18 @@ class UtestDavinciModel : public testing::Test { void SetUp() {} void TearDown() {} + public: + NodePtr MakeNode(const ComputeGraphPtr &graph, uint32_t in_num, uint32_t out_num, string name, string type) { + GeTensorDesc test_desc(GeShape(), FORMAT_NCHW, DT_FLOAT); + auto op_desc = std::make_shared(name, type); + for (auto i = 0; i < in_num; ++i) { + op_desc->AddInputDesc(test_desc); + } + for (auto i = 0; i < out_num; ++i) { + op_desc->AddOutputDesc(test_desc); + } + return graph->AddNode(op_desc); + } }; TEST_F(UtestDavinciModel, init_success) { @@ -127,13 +139,14 @@ TEST_F(UtestDavinciModel, init_data_op) { model.runtime_param_.mem_size = 5120000; ComputeGraphPtr graph = make_shared("default"); - OpDescPtr op_input = CreateOpDesc("data", DATA); GeTensorDesc tensor(GeShape(), FORMAT_NCHW, DT_FLOAT); TensorUtils::SetSize(tensor, 512); + + OpDescPtr op_input = CreateOpDesc("data", DATA); op_input->AddInputDesc(tensor); op_input->AddOutputDesc(tensor); op_input->SetInputOffset({1024}); - op_input->SetOutputOffset({5120}); + op_input->SetOutputOffset({1024}); NodePtr node_input = graph->AddNode(op_input); OpDescPtr op_output = CreateOpDesc("output", NETOUTPUT); @@ -156,12 +169,14 @@ TEST_F(UtestDavinciModel, init_data_op_subgraph) { model.runtime_param_.mem_size = 5120000; ComputeGraphPtr graph = make_shared("default"); - OpDescPtr op_input = CreateOpDesc("data", DATA); GeTensorDesc tensor(GeShape(), FORMAT_NCHW, DT_FLOAT); + TensorUtils::SetSize(tensor, 512); + + OpDescPtr op_input = CreateOpDesc("data", DATA); op_input->AddInputDesc(tensor); op_input->AddOutputDesc(tensor); op_input->SetInputOffset({1024}); - op_input->SetOutputOffset({5120}); + op_input->SetOutputOffset({1024}); NodePtr node = graph->AddNode(op_input); uint32_t data_op_index = 0; @@ -180,8 +195,10 @@ TEST_F(UtestDavinciModel, init_netoutput_op_subgraph) { model.runtime_param_.mem_size = 5120000; ComputeGraphPtr graph = make_shared("default"); - OpDescPtr op_output = CreateOpDesc("output", NETOUTPUT); GeTensorDesc tensor(GeShape(), FORMAT_NCHW, DT_FLOAT); + TensorUtils::SetSize(tensor, 512); + + OpDescPtr op_output = CreateOpDesc("output", NETOUTPUT); op_output->AddInputDesc(tensor); op_output->SetInputOffset({1024}); op_output->SetSrcName( { "data" } ); @@ -324,5 +341,422 @@ TEST_F(UtestDavinciModel, SyncVarData_test) { EXPECT_NE(model.SyncVarData(), SUCCESS); } +TEST_F(UtestDavinciModel, InitRealSizeAndShapeInfo_succ1) { + DavinciModel model(0, nullptr); + model.ge_model_ = make_shared(); + ComputeGraphPtr graph = make_shared("default"); + + GeTensorDesc tensor(GeShape(), FORMAT_NCHW, DT_FLOAT); + OpDescPtr op_output = CreateOpDesc("output_ascend_mbatch_batch_1", NETOUTPUT); + op_output->AddInputDesc(tensor); + op_output->SetInputOffset({1024}); + NodePtr node_output = graph->AddNode(op_output); + EXPECT_EQ(model.InitRealSizeAndShapeInfo(graph, node_output), SUCCESS); +} + +TEST_F(UtestDavinciModel, InitRealSizeAndShapeInfo_succ2) { + DavinciModel model(0, nullptr); + ComputeGraphPtr graph = std::make_shared("test_graph"); + + OpDescPtr data1 = CreateOpDesc("data1", DATA); + GeTensorDesc shape_desc(GeShape({4,3,224,224}), FORMAT_NCHW, DT_FLOAT); + data1->AddInputDesc(shape_desc); + data1->AddOutputDesc(shape_desc); + NodePtr data1_node = graph->AddNode(data1); + + OpDescPtr case_node = CreateOpDesc("case1", CASE); + GeTensorDesc tensor(GeShape(), FORMAT_NCHW, DT_FLOAT); + case_node->AddInputDesc(tensor); + case_node->AddOutputDesc(tensor); + NodePtr case1_node = graph->AddNode(case_node); + + OpDescPtr output = CreateOpDesc("output1", NETOUTPUT); + output->AddInputDesc(tensor); + output->SetSrcName( { "case1" } ); + output->SetSrcIndex( { 0 } ); + NodePtr output_node = graph->AddNode(output); + + GraphUtils::AddEdge(data1_node->GetOutDataAnchor(0), case1_node->GetInDataAnchor(0)); + GraphUtils::AddEdge(case1_node->GetOutDataAnchor(0), output_node->GetInDataAnchor(0)); + + (void)AttrUtils::SetStr(output_node->GetOpDesc(), ATTR_ALL_GEARS_INFO, "1;2;4;8"); + (void)AttrUtils::SetBool(case_node, ATTR_INSERT_BY_MBATCH, true); + + model.is_getnext_sink_dynamic_ = false; + model.is_online_infer_dynamic_ = true; + auto ret = model.InitRealSizeAndShapeInfo(graph, output_node); + // GetGearAndRealOutShapeInfo without ATTR_NAME_DYNAMIC_OUTPUT_DIMS + EXPECT_EQ(ret, SUCCESS); + vector dynamic_output_dims = {"0,0,1,1,0,2,2,0,4,3,0,8"}; + (void)AttrUtils::SetListStr(output_node->GetOpDesc(), ATTR_NAME_DYNAMIC_OUTPUT_DIMS, dynamic_output_dims); + ret = model.InitRealSizeAndShapeInfo(graph, output_node); + EXPECT_EQ(ret, SUCCESS); +} + +TEST_F(UtestDavinciModel, InitRealSizeAndShapeInfo_succ3) { + DavinciModel model(0, nullptr); + ComputeGraphPtr graph = std::make_shared("test_graph"); + + OpDescPtr data1 = CreateOpDesc("data1", DATA); + GeTensorDesc shape_desc(GeShape({4,3,224,224}), FORMAT_NCHW, DT_FLOAT); + data1->AddInputDesc(shape_desc); + data1->AddOutputDesc(shape_desc); + NodePtr data1_node = graph->AddNode(data1); + + OpDescPtr shape_node = CreateOpDesc("ascend_mbatch_get_dynamic_dims_node", GETDYNAMICDIMS); + GeTensorDesc in_tensor(GeShape(), FORMAT_NCHW, DT_FLOAT); + GeTensorDesc out_tensor(GeShape({4,3}), FORMAT_NCHW, DT_FLOAT); + shape_node->AddInputDesc(in_tensor); + shape_node->AddOutputDesc(out_tensor); + NodePtr get_dynamic_dims_node = graph->AddNode(shape_node); + + OpDescPtr output = CreateOpDesc("output1", NETOUTPUT); + GeTensorDesc tensor(GeShape(), FORMAT_NCHW, DT_FLOAT); + output->AddInputDesc(tensor); + output->SetSrcName( { "data1", "ascend_mbatch_get_dynamic_dims_node" } ); + output->SetSrcIndex( { 0, 1 } ); + NodePtr output_node = graph->AddNode(output); + GraphUtils::AddEdge(data1_node->GetOutDataAnchor(0), output_node->GetInDataAnchor(0)); + GraphUtils::AddEdge(get_dynamic_dims_node->GetOutDataAnchor(0), output_node->GetInDataAnchor(1)); + + (void)AttrUtils::SetStr(output_node->GetOpDesc(), ATTR_ALL_GEARS_INFO, "1,3;;4,3;,3"); + + model.is_getnext_sink_dynamic_ = true; + model.is_online_infer_dynamic_ = false; + auto ret = model.InitRealSizeAndShapeInfo(graph, output_node); + EXPECT_EQ(ret, SUCCESS); + model.runtime_param_.mem_base = (uint8_t *)0x08000000; + model.runtime_param_.mem_size = 4; + ret = model.InitRealSizeAndShapeInfo(graph, output_node); + EXPECT_EQ(ret, SUCCESS); +} + +TEST_F(UtestDavinciModel, init_data_aipp_info) { + DavinciModel model(0, nullptr); + model.ge_model_ = make_shared(); // for CustAICPUKernelStore::GetCustAICPUKernelStore() + model.runtime_param_.mem_base = (uint8_t *)0x08000000; + model.runtime_param_.mem_size = 5120000; + ComputeGraphPtr graph = make_shared("default"); + + GeTensorDesc tensor(GeShape(), FORMAT_NCHW, DT_FLOAT); + TensorUtils::SetSize(tensor, 512); + + OpDescPtr op_desc = CreateOpDesc("data", DATA); + op_desc->AddInputDesc(tensor); + op_desc->AddOutputDesc(tensor); + op_desc->SetInputOffset({1024}); + op_desc->SetOutputOffset({1024}); + NodePtr node = graph->AddNode(op_desc); + + GeAttrValue::NAMED_ATTRS aipp_attr; + aipp_attr.SetAttr("aipp_mode", GeAttrValue::CreateFrom(domi::AippOpParams::dynamic)); + aipp_attr.SetAttr("related_input_rank", GeAttrValue::CreateFrom(0)); + aipp_attr.SetAttr("max_src_image_size", GeAttrValue::CreateFrom(2048)); + aipp_attr.SetAttr("support_rotation", GeAttrValue::CreateFrom(1)); + EXPECT_TRUE(AttrUtils::SetNamedAttrs(op_desc, ATTR_NAME_AIPP, aipp_attr)); + + AippConfigInfo aipp_info; + EXPECT_EQ(model.GetAippInfo(0, aipp_info), ACL_ERROR_GE_AIPP_NOT_EXIST); + EXPECT_EQ(model.InitNodes(graph), SUCCESS); + EXPECT_EQ(model.GetAippInfo(0, aipp_info), SUCCESS); + EXPECT_EQ(aipp_info.aipp_mode, domi::AippOpParams::dynamic); + + EXPECT_EQ(model.input_addrs_list_.size(), 1); + EXPECT_EQ(model.output_addrs_list_.size(), 0); + EXPECT_EQ(model.op_list_.size(), 1); +} + +TEST_F(UtestDavinciModel, init_data_aipp_static) { + DavinciModel model(0, nullptr); + model.ge_model_ = make_shared(); // for CustAICPUKernelStore::GetCustAICPUKernelStore() + model.runtime_param_.mem_base = (uint8_t *)0x08000000; + model.runtime_param_.mem_size = 5120000; + ComputeGraphPtr graph = make_shared("default"); + + GeTensorDesc tensor(GeShape(), FORMAT_NCHW, DT_FLOAT); + TensorUtils::SetSize(tensor, 512); + + OpDescPtr op_desc = CreateOpDesc("data", DATA); + op_desc->AddInputDesc(tensor); + op_desc->AddOutputDesc(tensor); + op_desc->SetInputOffset({1024}); + op_desc->SetOutputOffset({1024}); + NodePtr node = graph->AddNode(op_desc); + + AttrUtils::SetStr(op_desc, ATTR_DATA_RELATED_AIPP_MODE, "static_aipp"); + + InputAippType aipp_type; + size_t aipp_index = 0; + EXPECT_EQ(model.GetAippType(0, aipp_type, aipp_index), SUCCESS); + EXPECT_EQ(model.InitNodes(graph), SUCCESS); + EXPECT_EQ(model.GetAippType(0, aipp_type, aipp_index), SUCCESS); + EXPECT_EQ(aipp_type, DATA_WITH_STATIC_AIPP); + EXPECT_EQ(aipp_index, 0xFFFFFFFFu); + + EXPECT_EQ(model.input_addrs_list_.size(), 1); + EXPECT_EQ(model.output_addrs_list_.size(), 0); + EXPECT_EQ(model.op_list_.size(), 1); +} + +TEST_F(UtestDavinciModel, init_data_aipp_dynamic) { + DavinciModel model(0, nullptr); + model.ge_model_ = make_shared(); // for CustAICPUKernelStore::GetCustAICPUKernelStore() + model.runtime_param_.mem_base = (uint8_t *)0x08000000; + model.runtime_param_.mem_size = 5120000; + ComputeGraphPtr graph = make_shared("default"); + GeTensorDesc tensor(GeShape(), FORMAT_NCHW, DT_FLOAT); + TensorUtils::SetSize(tensor, 512); + + OpDescPtr op_desc = CreateOpDesc("data", DATA); + op_desc->AddInputDesc(tensor); + op_desc->AddOutputDesc(tensor); + op_desc->SetInputOffset({1024}); + op_desc->SetOutputOffset({1024}); + NodePtr node = graph->AddNode(op_desc); // op_index 0 + AttrUtils::SetStr(op_desc, ATTR_DATA_RELATED_AIPP_MODE, "dynamic_aipp"); + AttrUtils::SetStr(op_desc, ATTR_DATA_AIPP_DATA_NAME_MAP, "releated_aipp"); + + InputAippType aipp_type; + size_t aipp_index = 0; + EXPECT_EQ(model.GetAippType(0, aipp_type, aipp_index), SUCCESS); + EXPECT_EQ(model.InitNodes(graph), SUCCESS); + EXPECT_EQ(model.GetAippType(0, aipp_type, aipp_index), SUCCESS); + + EXPECT_EQ(model.input_addrs_list_.size(), 1); + EXPECT_EQ(model.output_addrs_list_.size(), 0); + EXPECT_EQ(model.op_list_.size(), 1); +} + +TEST_F(UtestDavinciModel, init_data_aipp_releated) { + DavinciModel model(0, nullptr); + model.ge_model_ = make_shared(); // for CustAICPUKernelStore::GetCustAICPUKernelStore() + model.runtime_param_.mem_base = (uint8_t *)0x08000000; + model.runtime_param_.mem_size = 5120000; + ComputeGraphPtr graph = make_shared("default"); + + GeTensorDesc tensor(GeShape(), FORMAT_NCHW, DT_FLOAT); + TensorUtils::SetSize(tensor, 512); + + { + OpDescPtr op_desc = CreateOpDesc("data", DATA); + op_desc->AddInputDesc(tensor); + op_desc->AddOutputDesc(tensor); + op_desc->SetInputOffset({1024}); + op_desc->SetOutputOffset({1024}); + NodePtr node = graph->AddNode(op_desc); // op_index 0 + AttrUtils::SetStr(op_desc, ATTR_DATA_RELATED_AIPP_MODE, "dynamic_aipp"); + AttrUtils::SetStr(op_desc, ATTR_DATA_AIPP_DATA_NAME_MAP, "releated_aipp"); + } + { + OpDescPtr op_desc = CreateOpDesc("releated_aipp", DATA); + op_desc->AddInputDesc(tensor); + op_desc->AddOutputDesc(tensor); + op_desc->SetInputOffset({1024}); + op_desc->SetOutputOffset({1024}); + NodePtr node = graph->AddNode(op_desc); // op_index 1 + } + + InputAippType aipp_type; + size_t aipp_index = 0; + EXPECT_EQ(model.GetAippType(0, aipp_type, aipp_index), SUCCESS); + EXPECT_EQ(model.InitNodes(graph), SUCCESS); + EXPECT_EQ(model.GetAippType(0, aipp_type, aipp_index), SUCCESS); + EXPECT_EQ(aipp_type, DATA_WITH_DYNAMIC_AIPP); + EXPECT_EQ(aipp_index, 1); + + EXPECT_EQ(model.input_addrs_list_.size(), 2); + EXPECT_EQ(model.output_addrs_list_.size(), 0); + EXPECT_EQ(model.op_list_.size(), 2); +} + +TEST_F(UtestDavinciModel, init_data_aipp_dynamic_conf) { + DavinciModel model(0, nullptr); + model.ge_model_ = make_shared(); // for CustAICPUKernelStore::GetCustAICPUKernelStore() + model.runtime_param_.mem_base = (uint8_t *)0x08000000; + model.runtime_param_.mem_size = 5120000; + ComputeGraphPtr graph = make_shared("default"); + + GeTensorDesc tensor(GeShape(), FORMAT_NCHW, DT_FLOAT); + TensorUtils::SetSize(tensor, 512); + + OpDescPtr op_desc = CreateOpDesc("data", DATA); + op_desc->AddInputDesc(tensor); + op_desc->AddOutputDesc(tensor); + op_desc->SetInputOffset({1024}); + op_desc->SetOutputOffset({1024}); + NodePtr node = graph->AddNode(op_desc); // op_index 0 + AttrUtils::SetStr(op_desc, ATTR_DATA_RELATED_AIPP_MODE, "dynamic_aipp_conf"); + + InputAippType aipp_type; + size_t aipp_index = 0; + EXPECT_EQ(model.GetAippType(0, aipp_type, aipp_index), SUCCESS); + EXPECT_EQ(model.InitNodes(graph), SUCCESS); + EXPECT_EQ(model.GetAippType(0, aipp_type, aipp_index), SUCCESS); + EXPECT_EQ(aipp_type, DYNAMIC_AIPP_NODE); + EXPECT_EQ(aipp_index, 0xFFFFFFFFU); + + EXPECT_EQ(model.input_addrs_list_.size(), 1); + EXPECT_EQ(model.output_addrs_list_.size(), 0); + EXPECT_EQ(model.op_list_.size(), 1); +} + +TEST_F(UtestDavinciModel, init_data_aipp_dynamic_invalid) { + DavinciModel model(0, nullptr); + model.ge_model_ = make_shared(); // for CustAICPUKernelStore::GetCustAICPUKernelStore() + model.runtime_param_.mem_base = (uint8_t *)0x08000000; + model.runtime_param_.mem_size = 5120000; + ComputeGraphPtr graph = make_shared("default"); + + GeTensorDesc tensor(GeShape(), FORMAT_NCHW, DT_FLOAT); + TensorUtils::SetSize(tensor, 512); + + OpDescPtr op_desc = CreateOpDesc("data", DATA); + op_desc->AddInputDesc(tensor); + op_desc->AddOutputDesc(tensor); + op_desc->SetInputOffset({1024}); + op_desc->SetOutputOffset({1024}); + NodePtr node = graph->AddNode(op_desc); // op_index 0 + AttrUtils::SetStr(op_desc, ATTR_DATA_RELATED_AIPP_MODE, "dynamic_aipp_invalid"); + + InputAippType aipp_type; + size_t aipp_index = 0; + EXPECT_EQ(model.GetAippType(0, aipp_type, aipp_index), SUCCESS); + EXPECT_EQ(model.InitNodes(graph), ACL_ERROR_GE_AIPP_MODE_INVALID); + + EXPECT_EQ(model.input_addrs_list_.size(), 1); + EXPECT_EQ(model.output_addrs_list_.size(), 0); + EXPECT_EQ(model.op_list_.size(), 1); +} + +TEST_F(UtestDavinciModel, init_data_aipp_input_info_empty) { + DavinciModel model(0, nullptr); + model.ge_model_ = make_shared(); // for CustAICPUKernelStore::GetCustAICPUKernelStore() + model.runtime_param_.mem_base = (uint8_t *)0x08000000; + model.runtime_param_.mem_size = 5120000; + ComputeGraphPtr graph = make_shared("default"); + + GeTensorDesc tensor(GeShape(), FORMAT_NCHW, DT_FLOAT); + TensorUtils::SetSize(tensor, 512); + + OpDescPtr op_desc = CreateOpDesc("data", DATA); + op_desc->AddInputDesc(tensor); + op_desc->AddOutputDesc(tensor); + op_desc->SetInputOffset({1024}); + op_desc->SetOutputOffset({1024}); + NodePtr node = graph->AddNode(op_desc); // op_index 0 + + vector inputs = {}; + AttrUtils::SetListStr(op_desc, ATTR_NAME_AIPP_INPUTS, inputs); + vector outputs = {}; + AttrUtils::SetListStr(op_desc, ATTR_NAME_AIPP_OUTPUTS, outputs); + + OriginInputInfo orig_input_info; + EXPECT_EQ(model.GetOrigInputInfo(0, orig_input_info), ACL_ERROR_GE_AIPP_NOT_EXIST); + EXPECT_EQ(model.InitNodes(graph), SUCCESS); + EXPECT_EQ(model.GetOrigInputInfo(0, orig_input_info), SUCCESS); + + EXPECT_EQ(model.input_addrs_list_.size(), 1); + EXPECT_EQ(model.output_addrs_list_.size(), 0); + EXPECT_EQ(model.op_list_.size(), 1); +} + +TEST_F(UtestDavinciModel, init_data_aipp_input_info_normal) { + DavinciModel model(0, nullptr); + model.ge_model_ = make_shared(); // for CustAICPUKernelStore::GetCustAICPUKernelStore() + model.runtime_param_.mem_base = (uint8_t *)0x08000000; + model.runtime_param_.mem_size = 5120000; + ComputeGraphPtr graph = make_shared("default"); + + GeTensorDesc tensor(GeShape(), FORMAT_NCHW, DT_FLOAT); + TensorUtils::SetSize(tensor, 512); + + OpDescPtr op_desc = CreateOpDesc("data", DATA); + op_desc->AddInputDesc(tensor); + op_desc->AddOutputDesc(tensor); + op_desc->SetInputOffset({1024}); + op_desc->SetOutputOffset({1024}); + NodePtr node = graph->AddNode(op_desc); // op_index 0 + + vector inputs = { "NCHW:DT_FLOAT:TensorName:TensorSize:3:1,2,8" }; + AttrUtils::SetListStr(op_desc, ATTR_NAME_AIPP_INPUTS, inputs); + vector outputs = { "NCHW:DT_FLOAT:TensorName:TensorSize:3:1,2,8" }; + AttrUtils::SetListStr(op_desc, ATTR_NAME_AIPP_OUTPUTS, outputs); + + OriginInputInfo orig_input_info; + EXPECT_EQ(model.GetOrigInputInfo(0, orig_input_info), ACL_ERROR_GE_AIPP_NOT_EXIST); + EXPECT_EQ(model.InitNodes(graph), SUCCESS); + EXPECT_EQ(model.GetOrigInputInfo(0, orig_input_info), SUCCESS); + + EXPECT_EQ(model.input_addrs_list_.size(), 1); + EXPECT_EQ(model.output_addrs_list_.size(), 0); + EXPECT_EQ(model.op_list_.size(), 1); +} + +TEST_F(UtestDavinciModel, init_data_aipp_input_info_invalid) { + DavinciModel model(0, nullptr); + model.ge_model_ = make_shared(); // for CustAICPUKernelStore::GetCustAICPUKernelStore() + model.runtime_param_.mem_base = (uint8_t *)0x08000000; + model.runtime_param_.mem_size = 5120000; + ComputeGraphPtr graph = make_shared("default"); + + GeTensorDesc tensor(GeShape(), FORMAT_NCHW, DT_FLOAT); + TensorUtils::SetSize(tensor, 512); + + OpDescPtr op_desc = CreateOpDesc("data", DATA); + op_desc->AddInputDesc(tensor); + op_desc->AddOutputDesc(tensor); + op_desc->SetInputOffset({1024}); + op_desc->SetOutputOffset({1024}); + NodePtr node = graph->AddNode(op_desc); // op_index 0 + + vector inputs = { "NCHW:DT_FLOAT:TensorName" }; // Invalid + AttrUtils::SetListStr(op_desc, ATTR_NAME_AIPP_INPUTS, inputs); + vector outputs = { "NCHW:DT_FLOAT:TensorName:TensorSize:3:1,2,8" }; + AttrUtils::SetListStr(op_desc, ATTR_NAME_AIPP_OUTPUTS, outputs); + + OriginInputInfo orig_input_info; + EXPECT_EQ(model.GetOrigInputInfo(0, orig_input_info), ACL_ERROR_GE_AIPP_NOT_EXIST); + EXPECT_EQ(model.InitNodes(graph), ACL_ERROR_GE_AIPP_MODE_INVALID); + EXPECT_EQ(model.GetOrigInputInfo(0, orig_input_info), ACL_ERROR_GE_AIPP_NOT_EXIST); + + EXPECT_EQ(model.input_addrs_list_.size(), 1); + EXPECT_EQ(model.output_addrs_list_.size(), 0); + EXPECT_EQ(model.op_list_.size(), 1); +} + +TEST_F(UtestDavinciModel, init_data_aipp_input_dims_normal) { + DavinciModel model(0, nullptr); + model.ge_model_ = make_shared(); // for CustAICPUKernelStore::GetCustAICPUKernelStore() + model.runtime_param_.mem_base = (uint8_t *)0x08000000; + model.runtime_param_.mem_size = 5120000; + ComputeGraphPtr graph = make_shared("default"); + + GeTensorDesc tensor(GeShape(), FORMAT_NCHW, DT_FLOAT); + TensorUtils::SetSize(tensor, 512); + + OpDescPtr op_desc = CreateOpDesc("data", DATA); + op_desc->AddInputDesc(tensor); + op_desc->AddOutputDesc(tensor); + op_desc->SetInputOffset({1024}); + op_desc->SetOutputOffset({1024}); + NodePtr node = graph->AddNode(op_desc); // op_index 0 + + vector inputs = { "NCHW:DT_FLOAT:TensorName:TensorSize:3:1,2,8" }; + AttrUtils::SetListStr(op_desc, ATTR_NAME_AIPP_INPUTS, inputs); + vector outputs = { "NCHW:DT_FLOAT:TensorName:TensorSize:3:1,2,8" }; + AttrUtils::SetListStr(op_desc, ATTR_NAME_AIPP_OUTPUTS, outputs); + + vector input_dims; + vector output_dims; + EXPECT_EQ(model.GetAllAippInputOutputDims(0, input_dims, output_dims), ACL_ERROR_GE_AIPP_NOT_EXIST); + EXPECT_EQ(model.InitNodes(graph), SUCCESS); + EXPECT_EQ(model.GetAllAippInputOutputDims(0, input_dims, output_dims), SUCCESS); + EXPECT_EQ(input_dims.size(), 1); + EXPECT_EQ(output_dims.size(), 1); + + EXPECT_EQ(model.input_addrs_list_.size(), 1); + EXPECT_EQ(model.output_addrs_list_.size(), 0); + EXPECT_EQ(model.op_list_.size(), 1); +} } // namespace ge diff --git a/tests/ut/ge/graph/load/kernel_task_info_unittest.cc b/tests/ut/ge/graph/load/kernel_task_info_unittest.cc index 43abc54b..fe886b49 100644 --- a/tests/ut/ge/graph/load/kernel_task_info_unittest.cc +++ b/tests/ut/ge/graph/load/kernel_task_info_unittest.cc @@ -1120,7 +1120,6 @@ TEST_F(UtestKernelTaskInfo, kernel_task_info_init_success) { op_desc->AddOutputDesc(descout); op_desc->SetId(0); - model.data_op_list_.push_back(op_desc); model.op_list_[0] = op_desc; domi::TaskDef task_def; diff --git a/tests/ut/ge/graph/passes/multi_batch_clone_pass_unittest.cc b/tests/ut/ge/graph/passes/multi_batch_clone_pass_unittest.cc new file mode 100644 index 00000000..b1cd6d4d --- /dev/null +++ b/tests/ut/ge/graph/passes/multi_batch_clone_pass_unittest.cc @@ -0,0 +1,247 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "graph/passes/multi_batch_clone_pass.h" + +#include +#include +#include + +#include "inc/pass_manager.h" +#include "graph/utils/tensor_utils.h" +#include "graph/common/local_context.h" +#include "graph/passes/multi_batch_pass.h" +#include "graph/preprocess/multi_batch_copy_graph.h" +#include "graph/preprocess/insert_op/util_insert_aipp_op.h" +#include "framework/omg/omg_inner_types.h" +#include "register/op_registry.h" + + +namespace ge{ +class UtestMultiBatchClonePass : public testing::Test { +protected: + void SetUp() { + SetLocalOmgContext(domi::GetContext()); + GetLocalOmgContext().dynamic_image_size.clear(); + GetLocalOmgContext().dynamic_batch_size.clear(); + } + void TearDown() { + GetLocalOmgContext().dynamic_image_size.clear(); + GetLocalOmgContext().dynamic_batch_size.clear(); + GetLocalOmgContext().dynamic_node_type.clear(); + } + +public: + NodePtr MakeNode(const ComputeGraphPtr &graph, uint32_t in_num, uint32_t out_num, string name, string type) { + GeTensorDesc test_desc(GeShape(), FORMAT_NCHW, DT_FLOAT); + auto op_desc = std::make_shared(name, type); + for (auto i = 0; i < in_num; ++i) { + op_desc->AddInputDesc(test_desc); + } + for (auto i = 0; i < out_num; ++i) { + op_desc->AddOutputDesc(test_desc); + } + return graph->AddNode(op_desc); + } + + NodePtr MakeConstNode(const ComputeGraphPtr &graph) { + static uint32_t index = 0; + GeTensorDesc test_desc(GeShape(), FORMAT_NCHW, DT_FLOAT); + auto op_desc = std::make_shared("dynamic_const_" + std::to_string(index++), "Const"); + op_desc->AddOutputDesc(test_desc); + return graph->AddNode(op_desc); + } + + void make_original_graph(const ComputeGraphPtr &graph) { + auto conv2d_node = MakeNode(graph, 3, 1, "conv1", "Conv2D"); + { + auto data1 = MakeNode(graph, 1, 1, "data", "Data"); + GeTensorDesc tensor_desc(GeShape({-1,3,224,224}), FORMAT_NCHW, DT_FLOAT); + data1->GetOpDesc()->UpdateInputDesc(0, tensor_desc); + data1->GetOpDesc()->UpdateOutputDesc(0, tensor_desc); + AttrUtils::SetInt(data1->GetOpDesc(), ATTR_NAME_INDEX, 0); + GetLocalOmgContext().user_input_dims = {std::make_pair(data1->GetOpDesc()->GetName(), vector{-1,3,224,224})}; + + GraphUtils::AddEdge(data1->GetOutDataAnchor(0), conv2d_node->GetInDataAnchor(0)); + auto const1 = MakeConstNode(graph); + GraphUtils::AddEdge(const1->GetOutDataAnchor(0), conv2d_node->GetInDataAnchor(1)); + auto const2 = MakeConstNode(graph); + GraphUtils::AddEdge(const2->GetOutDataAnchor(0), conv2d_node->GetInDataAnchor(2)); + } + + auto bn_conv1 = MakeNode(graph, 4, 1, "bn_conv1", "BNInference"); + { + GraphUtils::AddEdge(conv2d_node->GetOutDataAnchor(0), bn_conv1->GetInDataAnchor(0)); + auto const1 = MakeConstNode(graph); + GraphUtils::AddEdge(const1->GetOutDataAnchor(0), bn_conv1->GetInDataAnchor(1)); + auto const2 = MakeConstNode(graph); + GraphUtils::AddEdge(const2->GetOutDataAnchor(0), bn_conv1->GetInDataAnchor(2)); + auto const3= MakeConstNode(graph); + GraphUtils::AddEdge(const3->GetOutDataAnchor(0), bn_conv1->GetInDataAnchor(3)); + } + + auto scale_conv1 = MakeNode(graph, 4, 1, "scale1", "Scale"); + { + GraphUtils::AddEdge(bn_conv1->GetOutDataAnchor(0), scale_conv1->GetInDataAnchor(0)); + auto const1 = MakeConstNode(graph); + GraphUtils::AddEdge(const1->GetOutDataAnchor(0), scale_conv1->GetInDataAnchor(1)); + auto const2 = MakeConstNode(graph); + GraphUtils::AddEdge(const2->GetOutDataAnchor(0), scale_conv1->GetInDataAnchor(2)); + } + + auto output_node = MakeNode(graph, 1, 0, "output1", "NetOutput"); + GraphUtils::AddEdge(scale_conv1->GetOutDataAnchor(0), output_node->GetInDataAnchor(0)); + } + + void GraphWithJustData(const ComputeGraphPtr &graph) { + auto conv2d_node = MakeNode(graph, 3, 1, "conv1", "Conv2D"); + { + auto data1 = MakeNode(graph, 1, 1, "data", "Data"); + GeTensorDesc tensor_desc(GeShape({-1,3,224,224}), FORMAT_NCHW, DT_FLOAT); + data1->GetOpDesc()->UpdateInputDesc(0, tensor_desc); + data1->GetOpDesc()->UpdateOutputDesc(0, tensor_desc); + AttrUtils::SetInt(data1->GetOpDesc(), ATTR_NAME_INDEX, 0); + GetLocalOmgContext().user_input_dims = {std::make_pair(data1->GetOpDesc()->GetName(), vector{-1,3,224,224})}; + + GraphUtils::AddEdge(data1->GetOutDataAnchor(0), conv2d_node->GetInDataAnchor(0)); + auto const1 = MakeConstNode(graph); + GraphUtils::AddEdge(const1->GetOutDataAnchor(0), conv2d_node->GetInDataAnchor(1)); + auto const2 = MakeConstNode(graph); + GraphUtils::AddEdge(const2->GetOutDataAnchor(0), conv2d_node->GetInDataAnchor(2)); + } + + auto output_node = MakeNode(graph, 1, 0, "output1", "NetOutput"); + GraphUtils::AddEdge(conv2d_node->GetOutDataAnchor(0), output_node->GetInDataAnchor(0)); + } + + void GraphWithGetNextNosink(const ComputeGraphPtr &graph) { + auto conv2d_node = MakeNode(graph, 3, 1, "conv1", "Conv2D"); + { + auto data1 = MakeNode(graph, 1, 1, "IteratorGetNext_data", "Data"); + GeTensorDesc tensor_desc(GeShape({-1,3,224,224}), FORMAT_NCHW, DT_FLOAT); + data1->GetOpDesc()->UpdateInputDesc(0, tensor_desc); + data1->GetOpDesc()->UpdateOutputDesc(0, tensor_desc); + AttrUtils::SetInt(data1->GetOpDesc(), ATTR_NAME_INDEX, 0); + GetLocalOmgContext().user_input_dims = {std::make_pair(data1->GetOpDesc()->GetName(), vector{-1,3,224,224})}; + + GraphUtils::AddEdge(data1->GetOutDataAnchor(0), conv2d_node->GetInDataAnchor(0)); + auto const1 = MakeConstNode(graph); + GraphUtils::AddEdge(const1->GetOutDataAnchor(0), conv2d_node->GetInDataAnchor(1)); + auto const2 = MakeConstNode(graph); + GraphUtils::AddEdge(const2->GetOutDataAnchor(0), conv2d_node->GetInDataAnchor(2)); + } + + auto output_node = MakeNode(graph, 1, 0, "output1", "NetOutput"); + GraphUtils::AddEdge(conv2d_node->GetOutDataAnchor(0), output_node->GetInDataAnchor(0)); + } + + // getnext has one data and has one out of shape + void GraphWithGetNextSink(const ComputeGraphPtr &graph) { + auto conv2d_node = MakeNode(graph, 3, 1, "conv1", "Conv2D"); + { + auto data1 = MakeNode(graph, 1, 2, "data", "IteratorV2"); + GeTensorDesc tensor_desc(GeShape({-1,3,224,224}), FORMAT_NCHW, DT_FLOAT); + GeTensorDesc shape_desc(GeShape({4,3,224,224}), FORMAT_NCHW, DT_FLOAT); + data1->GetOpDesc()->UpdateOutputDesc(0, tensor_desc); + data1->GetOpDesc()->UpdateOutputDesc(1, shape_desc); + AttrUtils::SetInt(data1->GetOpDesc(), ATTR_NAME_INDEX, 0); + GetLocalOmgContext().user_input_dims = {std::make_pair(data1->GetOpDesc()->GetName(), vector{-1,3,224,224})}; + + GraphUtils::AddEdge(data1->GetOutDataAnchor(0), conv2d_node->GetInDataAnchor(0)); + auto identity = MakeNode(graph, 1, 0, "identity", "Identity"); + GraphUtils::AddEdge(data1->GetOutDataAnchor(1), identity->GetInDataAnchor(0)); + auto const1 = MakeConstNode(graph); + GraphUtils::AddEdge(const1->GetOutDataAnchor(0), conv2d_node->GetInDataAnchor(1)); + auto const2 = MakeConstNode(graph); + GraphUtils::AddEdge(const2->GetOutDataAnchor(0), conv2d_node->GetInDataAnchor(2)); + } + + auto output_node = MakeNode(graph, 1, 0, "output1", "NetOutput"); + GraphUtils::AddEdge(conv2d_node->GetOutDataAnchor(0), output_node->GetInDataAnchor(0)); + } +}; + +// graph is nullptr +TEST_F(UtestMultiBatchClonePass, graph_nullptr) { + PassManager pass_manager; + pass_manager.AddPass("MultiBatchClonePass", new (std::nothrow) MultiBatchClonePass); + ComputeGraphPtr graph; + EXPECT_EQ(pass_manager.Run(graph), PARAM_INVALID); +} + +// graph with subgraph +TEST_F(UtestMultiBatchClonePass, graph_with_subgraph) { + PassManager pass_manager; + pass_manager.AddPass("MultiBatchClonePass", new (std::nothrow) MultiBatchClonePass); + ComputeGraphPtr graph = std::make_shared("test_graph"); + make_original_graph(graph); + EXPECT_EQ(pass_manager.Run(graph), SUCCESS); + + ComputeGraphPtr owner = std::make_shared("test_owner"); + auto func_node = MakeNode(owner, 3, 1, "test_if", "If"); + graph->SetParentNode(func_node); + graph->SetParentGraph(owner); + EXPECT_EQ(pass_manager.Run(graph), SUCCESS); +} + +//graph is uncompute graph, not need to do multi batch +TEST_F(UtestMultiBatchClonePass, uncompute_graph) { + MultiBatchClonePass multi_batch_clone; + ComputeGraphPtr graph = std::make_shared("test_graph"); + make_original_graph(graph); + GetLocalOmgContext().need_multi_batch = false; + EXPECT_EQ(multi_batch_clone.Run(graph), SUCCESS); +} + + +//compute_graph with data from DATA +TEST_F(UtestMultiBatchClonePass, compute_graph_with_data) { + MultiBatchClonePass multi_batch_clone; + ComputeGraphPtr graph = std::make_shared("test_graph"); + GraphWithJustData(graph); + GetLocalOmgContext().need_multi_batch = true; + EXPECT_EQ(multi_batch_clone.Run(graph), SUCCESS); + GetLocalOmgContext().dynamic_node_type = DATA; + GetLocalOmgContext().dynamic_dims = "1;2;4;8"; + EXPECT_EQ(multi_batch_clone.Run(graph), SUCCESS); + EXPECT_EQ(GetLocalOmgContext().data_nodes.size(), 1); +} + +//compute_graph with data from GetNext_nosink +TEST_F(UtestMultiBatchClonePass, compute_graph_with_getnext_nosink) { + MultiBatchClonePass multi_batch_clone; + ComputeGraphPtr graph = std::make_shared("test_graph"); + GraphWithGetNextNosink(graph); + GetLocalOmgContext().need_multi_batch = true; + GetLocalOmgContext().dynamic_node_type = GETNEXT; + GetLocalOmgContext().dynamic_dims = "1;2;4;8"; + EXPECT_EQ(multi_batch_clone.Run(graph), SUCCESS); + EXPECT_EQ(GetLocalOmgContext().getnext_nosink_nodes.size(), 1); +} + +//compute_graph with data from GetNext_nosink +TEST_F(UtestMultiBatchClonePass, compute_graph_with_getnext_sink) { + MultiBatchClonePass multi_batch_clone; + ComputeGraphPtr graph = std::make_shared("test_graph"); + GraphWithGetNextSink(graph); + GetLocalOmgContext().need_multi_batch = true; + GetLocalOmgContext().dynamic_node_type = GETNEXT; + GetLocalOmgContext().dynamic_dims = "1;2;4;8"; + EXPECT_EQ(multi_batch_clone.Run(graph), SUCCESS); + EXPECT_EQ(GetLocalOmgContext().getnext_nosink_nodes.size(), 0); +} + +}