Merge branch 'development' of https://gitee.com/chen-hua-baker/graphengine into development

Please enter a commit message to explain why this merge is necessary, especially if it merges an updated upstream into a topic branch. Lines starting with '#' will be ignored, and an empty message aborts the commit.
5 years ago · 2bf7448321
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -74,7 +74,7 @@ if (ENABLE_OPEN_SRC)
        set(STATIC_ACL_LIB ${GE_LIB_PATH})
        find_module(slog libslog.so ${GE_LIB_PATH})
        find_module(static_mmpa libmmpa.a ${GE_LIB_PATH})
        find_module(msprofiler libmsprofiler.a ${GE_LIB_PATH})
        find_module(msprofiler_ext libmsprofiler.a ${GE_LIB_PATH})
        find_module(hccl libhccl.so ${GE_LIB_PATH})
        find_module(adump_server libadump_server.a ${GE_LIB_PATH})
        find_module(runtime libruntime.so ${GE_LIB_PATH})
@@ -83,7 +83,7 @@ if (ENABLE_OPEN_SRC)
        find_module(error_manager liberror_manager.so ${GE_LIB_PATH})
        find_module(ascend_hal_stub libascend_hal.so ${GE_LIB_PATH})
        find_module(error_manager_static liberror_manager.a ${GE_LIB_PATH})
        find_module(msprofiler_fwk libmsprofiler_fwk.a ${GE_LIB_PATH})
        find_module(msprofiler_fwk_ext libmsprofiler_fwk.a ${GE_LIB_PATH})
        #find_module(ascendcl_static libascendcl.a ${GE_LIB_PATH})
    elseif(ENABLE_GE_COV OR ENABLE_GE_UT)
 	add_subdirectory(tests)
@@ -97,7 +97,7 @@ if (ENABLE_OPEN_SRC)
            find_module(runtime libruntime.so ${ASCEND_RUNTIME_DIR})
            find_module(resource libresource.so ${ASCEND_RUNTIME_DIR})
            find_module(error_manager liberror_manager.so ${ASCEND_RUNTIME_DIR})
            find_module(msprofiler_fwk libmsprofiler_fwk.a ${ASCEND_RUNTIME_DIR})
            find_module(msprofiler_fwk_ext libmsprofiler_fwk.a ${ASCEND_RUNTIME_DIR})
            find_module(ascend_hal_stub libascend_hal.so ${ASCEND_DRIVER_DIR}/driver)
            if(PRODUCT STREQUAL "flr3")
                message(FATAL_ERROR "This platform is not supported in train mode, build terminated")
@@ -109,7 +109,7 @@ if (ENABLE_OPEN_SRC)
            find_module(resource libresource.so ${ASCEND_ATC_DIR})
            find_module(error_manager liberror_manager.so ${ASCEND_ATC_DIR})
            find_module(error_manager_static liberror_manager.a ${ASCEND_ACL_DIR})
            find_module(msprofiler libmsprofiler.a ${ASCEND_ACL_DIR})
            find_module(msprofiler_ext libmsprofiler.a ${ASCEND_ACL_DIR})
            #find_module(ascendcl_static libascendcl.a ${ASCEND_ACL_DIR})
            if(PRODUCT STREQUAL "flr3")
            elseif(PRODUCT STREQUAL "flr1")
@@ -120,7 +120,7 @@ if (ENABLE_OPEN_SRC)
                find_module(ascend_hal_stub libascend_hal.so ${ASCEND_DRIVER_DIR})
            endif()
        elseif(PLATFORM STREQUAL "all")
            find_module(msprofiler libmsprofiler.a ${ASCEND_ACL_DIR})
            find_module(msprofiler_ext libmsprofiler.a ${ASCEND_ACL_DIR})
            find_module(hccl libhccl.so ${ASCEND_RUNTIME_DIR})
            find_module(adump_server libadump_server.a ${ASCEND_ACL_DIR})
            find_module(runtime libruntime.so ${ASCEND_ACL_DIR})
@@ -128,7 +128,7 @@ if (ENABLE_OPEN_SRC)
            find_module(resource libresource.so ${ASCEND_ATC_DIR})
            find_module(error_manager liberror_manager.so ${ASCEND_ATC_DIR})
            find_module(error_manager_static liberror_manager.a ${ASCEND_ACL_DIR})
            find_module(msprofiler_fwk libmsprofiler_fwk.a ${ASCEND_RUNTIME_DIR})
            find_module(msprofiler_fwk_ext libmsprofiler_fwk.a ${ASCEND_RUNTIME_DIR})
            find_module(ascend_hal_stub libascend_hal.so ${ASCEND_DRIVER_DIR}/driver)
            #find_module(ascendcl_static libascendcl.a ${ASCEND_ACL_DIR})
        else()
--- a/ge/CMakeLists.txt
+++ b/ge/CMakeLists.txt
@@ -1,7 +1,6 @@
 if (NOT ENABLE_D AND NOT ENABLE_ACL AND NOT ENABLE_MS_TESTCASES)
    add_subdirectory(common)
    add_subdirectory(plugin/engine)
    add_subdirectory(graph/build/memory)
    add_subdirectory(ge_local_engine)
    add_subdirectory(host_cpu_engine)
    add_subdirectory(executor)
@@ -342,6 +341,13 @@ set(TRAIN_SRC_LIST
    "analyzer/analyzer.cc"
    "ir_build/ge_ir_build.cc"
    "ir_build/atc_ir_common.cc"
    "graph/build/memory/memory_assigner.cc"
    "graph/build/memory/graph_mem_assigner.cc"
    "graph/build/memory/binary_block_mem_assigner.cc"
    "graph/build/memory/block_mem_assigner.cc"
    "graph/build/memory/hybrid_mem_assigner.cc"
    "graph/build/memory/max_block_mem_assigner.cc"
    "graph/build/memory/var_mem_assign_util.cc"
 )

 set(INFER_SRC_LIST
@@ -611,11 +617,35 @@ set(INFER_SRC_LIST
    "graph/label/while_label_maker.cc"
    "graph/label/partitioned_call_label_maker.cc"
    "analyzer/analyzer.cc"
    "graph/build/memory/memory_assigner.cc"
    "graph/build/memory/graph_mem_assigner.cc"
    "graph/build/memory/binary_block_mem_assigner.cc"
    "graph/build/memory/block_mem_assigner.cc"
    "graph/build/memory/hybrid_mem_assigner.cc"
    "graph/build/memory/max_block_mem_assigner.cc"
    "graph/build/memory/var_mem_assign_util.cc"
 )

 if (NOT ENABLE_D AND NOT ENABLE_ACL AND NOT ENABLE_MS_TESTCASES)
 ############ libge_runner.so ############
 add_library(ge_runner SHARED ${TRAIN_SRC_LIST} ${PROTO_SRCS} ${PROTO_CLIENT_SRCS})
 add_library(ge_runner SHARED
            ${TRAIN_SRC_LIST}
 	    ${PROTO_SRCS}
 	    ${PROTO_CLIENT_SRCS}
 	    $<TARGET_OBJECTS:$<IF:$<TARGET_EXISTS:msprofiler_fwk>,msprofiler_fwk,msprofiler_fwk_object>>
 )

 add_library(msprofiler_fwk_object OBJECT IMPORTED GLOBAL)

 if (msprofiler_fwk_ext_LIBRARY_DIR)
    file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/msprofiler_fwk_object)
    execute_process(
        COMMAND ar x ${msprofiler_fwk_ext_LIBRARY_DIR}
        WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/msprofiler_fwk_object	
    )
    file(GLOB MSPROFILER_FWK_OBJECT_LIST ${CMAKE_CURRENT_BINARY_DIR}/msprofiler_fwk_object/*.o)
    set_property(TARGET msprofiler_fwk_object PROPERTY IMPORTED_OBJECTS ${MSPROFILER_FWK_OBJECT_LIST})
 endif()

 target_compile_definitions(ge_runner PRIVATE
    PROTOBUF_INLINE_NOT_IN_HEADERS=0
@@ -660,12 +690,8 @@ target_include_directories(ge_runner PRIVATE

 target_link_libraries(ge_runner PRIVATE
    $<BUILD_INTERFACE:intf_pub>
    ge_memory
    adump_server
    static_mmpa
    -Wl,--whole-archive
    msprofiler_fwk
    -Wl,--no-whole-archive
    -Wl,--no-as-needed
    graph
    ge_common
@@ -728,7 +754,6 @@ target_include_directories(ge_compiler PRIVATE

 target_link_libraries(ge_compiler PRIVATE
    $<BUILD_INTERFACE:intf_pub>
    ge_memory
    static_mmpa
    -Wl,--no-as-needed
    graph
@@ -755,7 +780,7 @@ file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/ascendcl_object)
 if(EXISTS ${STATIC_ACL_LIB}/libascendcl.a)
    execute_process(
        COMMAND ar x ${STATIC_ACL_LIB}/libascendcl.a
        WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/ascendcl_object    
        WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/ascendcl_object
    )
    file(GLOB OBJECT_LIST ${CMAKE_CURRENT_BINARY_DIR}/ascendcl_object/*.o)
 else()
@@ -764,8 +789,21 @@ endif()

 add_library(opensrc_ascendcl SHARED
    ${OBJECT_LIST}
    $<TARGET_OBJECTS:$<IF:$<TARGET_EXISTS:msprofiler>,msprofiler,msprofiler_object>>
 )

 add_library(msprofiler_object OBJECT IMPORTED GLOBAL)

 if (msprofiler_ext_LIBRARY_DIR)
    file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/msprofiler_object)
    execute_process(
        COMMAND ar x ${msprofiler_ext_LIBRARY_DIR}
        WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/msprofiler_object
    )
    file(GLOB MSPROFILER_OBJECT_LIST ${CMAKE_CURRENT_BINARY_DIR}/msprofiler_object/*.o)
    set_property(TARGET msprofiler_object PROPERTY IMPORTED_OBJECTS ${MSPROFILER_OBJECT_LIST})
 endif()

 target_compile_definitions(opensrc_ascendcl PRIVATE
    google=ascend_private
    $<$<STREQUAL:${ENABLE_OPEN_SRC},True>:ONLY_COMPILE_OPEN_SRC>
@@ -780,14 +818,7 @@ target_link_options(opensrc_ascendcl PRIVATE
    -Wl,--allow-multiple-definition
    -Wl,-z,muldefs
    -Wl,-Bsymbolic
    -Wl,--exclude-libs,libascend_protobuf.a
    -Wl,--exclude-libs,libge_executor.a
    -Wl,--exclude-libs,libge_common.a
    -Wl,--exclude-libs,libgraph.a
    -Wl,--exclude-libs,libmmpa.a
    -Wl,--exclude-libs,libregister.a
    -Wl,--exclude-libs,liberror_manager.a
    -Wl,--exclude-libs,libadump_server.a
    -Wl,--exclude-libs,ALL
 )
 target_link_libraries(opensrc_ascendcl PRIVATE
                     -Wl,--whole-archive
@@ -799,7 +830,6 @@ target_link_libraries(opensrc_ascendcl PRIVATE
                     register_static
                     error_manager_static
                     adump_server
                     msprofiler
                     -Wl,--no-whole-archive
                     -Wl,--no-as-needed
                     c_sec
--- a/ge/common/profiling/profiling_manager.cc
+++ b/ge/common/profiling/profiling_manager.cc
@@ -302,6 +302,8 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::Profilin
    }

    data.append(" model_id:").append(std::to_string(model_id));
    data.append(" task_id:").append(std::to_string(graph.task_id));
    data.append(" stream_id:").append(std::to_string(graph.stream_id));
    data.append("\n");

    GraphDescReport(device_id, data);
--- a/ge/common/types.cc
+++ b/ge/common/types.cc
@@ -480,6 +480,9 @@ REGISTER_OPTYPE_DEFINE(HVDWAIT, "HorovodWait");
 // aicpu op for online_infer dynamic_dims
 REGISTER_OPTYPE_DEFINE(GETDYNAMICDIMS, "GetDynamicDims");

 // profiling training trace node
 REGISTER_OPTYPE_DEFINE(PROFILINGTRAININGTRACE, "ProfilingTrainingTrace");

 const std::string MODEL_ATTR_TASKS = "tasks";
 const std::string MODEL_ATTR_TASK_GEN_BASE_ADDR = "task_gen_base_addr";
 const std::string MODEL_ATTR_TASK_GEN_WEIGHT_ADDR = "task_gen_weight_addr";
--- a/ge/executor/ge_executor.cc
+++ b/ge/executor/ge_executor.cc
@@ -676,7 +676,7 @@ Status GeExecutor::GetAIPPInfo(uint32_t model_id, uint32_t index, AippConfigInfo
    GELOGE(ACL_ERROR_GE_EXEC_NOT_INIT, "not inited yet!");
    return ACL_ERROR_GE_EXEC_NOT_INIT;
  }
  Status ret = GraphExecutor::GetAIPPInfo(model_id, index, aipp_info);
  Status ret = GraphExecutor::GetAippInfo(model_id, index, aipp_info);
  if (ret != SUCCESS) {
    GELOGW("GetAIPPInfo is not success.");
    return ret;
@@ -713,43 +713,6 @@ Status GeExecutor::GetModelAttr(uint32_t model_id, std::vector<std::string> &dyn
  return SUCCESS;
 }

 Status GeExecutor::GetModelDescInfoForZeroCopy(uint32_t model_id, std::vector<ge::TensorDesc> &input_desc,
                                               std::vector<TensorDesc> &output_desc) {
  GELOGI("get model desc info for zero copy begin.");
  if (!isInit_) {
    GELOGE(ACL_ERROR_GE_EXEC_NOT_INIT, "GeExecutor has not been initialized!");
    return ACL_ERROR_GE_EXEC_NOT_INIT;
  }

  std::vector<InputOutputDescInfo> input_desc_infos;
  std::vector<InputOutputDescInfo> output_desc_infos;
  std::vector<uint32_t> input_formats;
  std::vector<uint32_t> output_formats;

  Status ret = GraphExecutor::GetInputOutputDescInfoForZeroCopy(model_id, input_desc_infos, output_desc_infos,
                                                                input_formats, output_formats);
  if (ret != domi::SUCCESS) {
    GELOGE(ret, "Get DescInfo from zero copy failed. ret = %u", ret);
    return ACL_ERROR_GE_GET_TENSOR_INFO;
  }

  if (input_formats.size() != input_desc_infos.size()) {
    GELOGE(ACL_ERROR_GE_PARAM_INVALID, "input_formats.size() != input_desc_infos.size().");
    return ACL_ERROR_GE_PARAM_INVALID;
  }

  if (output_formats.size() != output_desc_infos.size()) {
    GELOGE(ACL_ERROR_GE_PARAM_INVALID, "output_formats.size() != output_desc_infos.size().");
    return ACL_ERROR_GE_PARAM_INVALID;
  }

  GetGeTensorDescFromDomiInfo(input_desc, input_desc_infos, input_formats);
  GetGeTensorDescFromDomiInfo(output_desc, output_desc_infos, output_formats);

  GELOGI("get model desc info from zero copy end.");
  return ge::SUCCESS;
 }

 Status GeExecutor::CommandHandle(const Command &command) {
  Status ret = GraphLoader::CommandHandle(command);
  if (ret != SUCCESS) {
--- a/ge/graph/build/graph_builder.cc
+++ b/ge/graph/build/graph_builder.cc
@@ -421,6 +421,52 @@ static Status GenerateTaskForConstant(const std::shared_ptr<ComputeGraph> &graph
  return SUCCESS;
 }

 Status GraphBuilder::MarkFpBpProfilingTaskAttr(ComputeGraphPtr &com_graph) {
  bool original_unknown_shape_flag = com_graph->GetGraphUnknownFlag();
  com_graph->SetGraphUnknownFlag(false);

  GELOGD("Start to mark profiling task attr for fp and bp.");
  TaskGenerator task_generator;
  ProfilingPoint profiling_point;
  std::vector<uint32_t> all_reduce_node_index;
  Status ret = task_generator.FindProfilingNodeIndex(com_graph, profiling_point, all_reduce_node_index);
  com_graph->SetGraphUnknownFlag(original_unknown_shape_flag);
  if (ret != SUCCESS) {
    GELOGW("Find profiling node index failed.");
  }
  if (profiling_point.fp_index == 0 || profiling_point.bp_index == 0 || profiling_point.end_index.empty()) {
    GELOGD("No need to mark fp bp profiling task attr.");
    return SUCCESS;
  }
  // mark profiling task attr for node
  uint32_t node_index = 0;
  for (const auto &node : com_graph->GetAllNodes()) {
    OpDescPtr op_desc = node->GetOpDesc();
    GE_CHECK_NOTNULL(node->GetOpDesc());
    node_index++;
    if (profiling_point.fp_index == node_index) {
       GELOGI("The first fp node of dynamic graph is %s, idx %u", op_desc->GetName().c_str(), node_index);
      (void)ge::AttrUtils::SetBool(op_desc, ATTR_NAME_INSERT_FP_PROFILILNG_TASK, true);
    }
    if (profiling_point.bp_index == node_index) {
      GELOGI("The bp node of dynamic graph is %s, idx %u", op_desc->GetName().c_str(), node_index);
      (void)ge::AttrUtils::SetBool(op_desc, ATTR_NAME_INSERT_BP_PROFILILNG_TASK, true);
    }
    for (size_t i = 0; i < all_reduce_node_index.size(); i++) {
      if (all_reduce_node_index[i] == node_index) {
        GELOGI("The all reduce node of dynamic graph is %s, idx %u", op_desc->GetName().c_str(), node_index);
        (void)ge::AttrUtils::SetBool(op_desc, ATTR_NAME_INSERT_BP_PROFILILNG_TASK, true);
        continue;
      }
    }
    if (profiling_point.end_index.find(node_index) != profiling_point.end_index.end()) {
      GELOGI("The end node of dynamic graph is %s, idx %u", op_desc->GetName().c_str(), node_index);
      (void)ge::AttrUtils::SetBool(op_desc, ATTR_NAME_INSERT_END_PROFILILNG_TASK, true);
    }
  }
  return SUCCESS;
 }

 Status GraphBuilder::BuildForDynamicShapeGraph(ComputeGraphPtr &comp_graph,
                                               std::vector<SubGraphInfoPtr> &subgraph_ptr_list,
                                               GeRootModelPtr &ge_root_model_ptr, GeModelPtr &ge_model_ptr,
@@ -437,6 +483,12 @@ Status GraphBuilder::BuildForDynamicShapeGraph(ComputeGraphPtr &comp_graph,
    }
  }

  // Set fp bp profiling task attr for graph
  if (MarkFpBpProfilingTaskAttr(comp_graph) != SUCCESS) {
    GELOGE(FAILED, "Set fp bp profiling task attr for graph.");
    return FAILED;
  }

  auto all_graphs = comp_graph->GetAllSubgraphs();
  if (all_graphs.empty()) {
    all_graphs.push_back(comp_graph);
--- a/ge/graph/build/graph_builder.h
+++ b/ge/graph/build/graph_builder.h
@@ -60,6 +60,7 @@ class GraphBuilder {
  Status UpdateParentNodeOutputSize(const ge::ComputeGraphPtr &graph, ge::NodePtr &parent_node_ptr);
  Status CalcDynShapeRootGraphDataSize(const ge::OpDescPtr &op_desc);
  Status SecondPartition(ge::ComputeGraphPtr &comp_graph, vector<ge::SubGraphInfoPtr> &subgraph_ptr_list);
  Status MarkFpBpProfilingTaskAttr(ComputeGraphPtr &com_graph);
  Status BuildForDynamicShapeGraph(ComputeGraphPtr &comp_graph, std::vector<SubGraphInfoPtr> &subgraph_ptr_list,
                                   GeRootModelPtr &ge_root_model_ptr, GeModelPtr &ge_model_ptr,
                                   uint64_t session_id = INVALID_SESSION_ID);
--- a/ge/graph/build/memory/CMakeLists.txt
+++ b/ge/graph/build/memory/CMakeLists.txt
@@ -1,45 +0,0 @@
 set(SRC_LIST
    "memory_assigner.cc"
    "graph_mem_assigner.cc"
    "binary_block_mem_assigner.cc"
    "block_mem_assigner.cc"
    "hybrid_mem_assigner.cc"
    "max_block_mem_assigner.cc"
    "var_mem_assign_util.cc"
 )

 ############ libge_memory.a ############
 add_library(ge_memory STATIC ${SRC_LIST})

 target_compile_options(ge_memory PRIVATE
    -Werror
    -O2
    -fno-common
 )

 target_compile_definitions(ge_memory PRIVATE
    google=ascend_private
    LOG_CPP
    $<$<STREQUAL:${ENABLE_OPEN_SRC},True>:ONLY_COMPILE_OPEN_SRC>
 )

 target_link_libraries(ge_memory PRIVATE
    $<BUILD_INTERFACE:intf_pub>
    ascend_protobuf
    c_sec
 )

 target_include_directories(ge_memory PRIVATE
    ${CMAKE_CURRENT_LIST_DIR}
    ${GE_CODE_DIR}/ge
    ${GE_CODE_DIR}/inc
    ${GE_CODE_DIR}/inc/external
    ${METADEF_DIR}/inc
    ${METADEF_DIR}/inc/external
    ${METADEF_DIR}/inc/external/graph
    ${GE_CODE_DIR}/inc/framework
    #### yellow zone ####
    ${GE_CODE_DIR}/../inc
    #### blue zone ####
    ${GE_CODE_DIR}/third_party/fwkacllib/inc
 )
--- a/ge/graph/build/task_generator.cc
+++ b/ge/graph/build/task_generator.cc
@@ -274,6 +274,7 @@ Status TaskGenerator::GenerateTask(RunContext &run_context, ComputeGraphPtr &gra
  };
  GE_MAKE_GUARD(release, callback);

  uint64_t all_reduce_node_idx = 0;
  for (auto &node : graph->GetNodes(graph->GetGraphUnknownFlag())) {
    OpDescPtr op_desc = node->GetOpDesc();
    GE_CHECK_NOTNULL(op_desc);
@@ -292,7 +293,7 @@ Status TaskGenerator::GenerateTask(RunContext &run_context, ComputeGraphPtr &gra
    // Part2: Call
    auto fusion_task_info =
        FusionTaskInfo{run_context,        graph,         node,        op_desc,         node_index,      ge_lib,
                       ops_kernel_manager, task_def_list, op_name_map, profiling_point, all_reduce_nodes};
                       ops_kernel_manager, task_def_list, op_name_map, profiling_point, all_reduce_nodes, all_reduce_node_idx};
    GE_CHK_STATUS_RET(GenerateTaskForFusionNode(fusion_task_info, fusion_nodes, fusion_nodes_seen),
                      "Call GenerateTaskForFusionNode node:%s(%s) failed", name.c_str(), type.c_str());
    // continue directly
@@ -316,7 +317,8 @@ Status TaskGenerator::GenerateTask(RunContext &run_context, ComputeGraphPtr &gra
                      type.c_str());
    // Profiling task
    size_t task_list_size_before = task_def_list.size();
    GE_CHK_STATUS_RET(InsertProfilingTaskBefore(op_desc, profiling_point, all_reduce_nodes, node_index, task_def_list));
    GE_CHK_STATUS_RET(InsertProfilingTaskBefore(op_desc, profiling_point, all_reduce_nodes,
                                                node_index, task_def_list, all_reduce_node_idx));
    int64_t op_id = op_desc->GetId();
    // Compatible with dynamic shape scenes, the default is 0
    int64_t stream_id = 0;
@@ -336,8 +338,8 @@ Status TaskGenerator::GenerateTask(RunContext &run_context, ComputeGraphPtr &gra
      return ret;
    }
    // Profiling task
    GE_CHK_STATUS_RET(InsertProfilingTaskAfter(op_desc, profiling_point, all_reduce_nodes, node_index, task_def_list));

    GE_CHK_STATUS_RET(InsertProfilingTaskAfter(op_desc, profiling_point, all_reduce_nodes,
                                               node_index, task_def_list, all_reduce_node_idx));
    size_t task_list_size_after = task_def_list.size();
    // If tasks is reduced
    if (task_list_size_after < task_list_size_before) {
@@ -380,6 +382,7 @@ Status TaskGenerator::GenerateTaskForFusionNode(FusionTaskInfo &fusion_task_info
  auto &op_name_map = fusion_task_info.op_name_map;
  auto &profiling_point = fusion_task_info.profiling_point;
  auto &all_reduce_nodes = fusion_task_info.all_reduce_nodes;
  auto &all_reduce_idx = fusion_task_info.all_reduce_node_idx;
  // If op_desc have this attr, call nodes with same group key in a stream together
  if (ge::AttrUtils::GetInt(fusion_op_desc, ATTR_NAME_FUSION_GROUP_KEY, group_key) &&
      (fusion_nodes_seen.count(node.get()) == 0)) {
@@ -426,7 +429,8 @@ Status TaskGenerator::GenerateTaskForFusionNode(FusionTaskInfo &fusion_task_info
        return INTERNAL_ERROR;
      }
      // profiling task
      (void)InsertProfilingTaskBefore(op_desc, profiling_point, all_reduce_nodes, node_index, task_def_list);
      (void)InsertProfilingTaskBefore(op_desc, profiling_point, all_reduce_nodes,
                                      node_index, task_def_list, all_reduce_idx);
      run_context.stream = run_context.graphStreamList[stream_id];
      GELOGI("Fusion: Call %s to generate fusion_node:[fusion_node_name:%s(%s), id:%ld, stream_id:%ld] task.",
             op_kernel_lib_name.c_str(), fusion_node_name.c_str(), fusion_node_type.c_str(), op_id, stream_id);
@@ -439,7 +443,8 @@ Status TaskGenerator::GenerateTaskForFusionNode(FusionTaskInfo &fusion_task_info
        return ret;
      }
      // profiling task
      (void)InsertProfilingTaskAfter(op_desc, profiling_point, all_reduce_nodes, node_index, task_def_list);
      (void)InsertProfilingTaskAfter(op_desc, profiling_point, all_reduce_nodes,
                                     node_index, task_def_list, all_reduce_idx);
      size_t task_list_size_after = task_def_list.size();
      // if tasks is reduced
      if (task_list_size_after < task_list_size_before) {
@@ -830,6 +835,11 @@ Status TaskGenerator::GetFpBpIndex(const ComputeGraphPtr &graph, ProfilingPoint
  return SUCCESS;
 }

 Status TaskGenerator::FindProfilingNodeIndex(const ComputeGraphPtr &graph, ProfilingPoint &profiling_point,
                                             std::vector<uint32_t> &all_reduce_nodes) {
  return FindProfilingTaskIndex(graph, profiling_point, all_reduce_nodes);
 }

 Status TaskGenerator::FindProfilingTaskIndex(const ComputeGraphPtr &graph, ProfilingPoint &profiling_point,
                                             vector<uint32_t> &all_reduce_nodes) const {
  GE_CHECK_NOTNULL(graph);
@@ -840,7 +850,6 @@ Status TaskGenerator::FindProfilingTaskIndex(const ComputeGraphPtr &graph, Profi
    GELOGD("Profiling is not open.");
    return SUCCESS;
  }

  GELOGI("Start get FP/BP index.");
  std::string fp_point_str;
  std::string bp_point_str;
@@ -878,18 +887,27 @@ Status TaskGenerator::FindProfilingTaskIndex(const ComputeGraphPtr &graph, Profi
  return SUCCESS;
 }


 Status TaskGenerator::InsertProfilingTaskBefore(const OpDescPtr &op_desc, const ProfilingPoint &profiling_point,
                                                vector<uint32_t> &all_reduce_nodes, uint32_t node_index,
                                                vector<domi::TaskDef> &task_def_list) {
                                                vector<domi::TaskDef> &task_def_list, uint64_t &all_reduce_node_idx) {
  const char *profiling_mode = std::getenv(kProfilingMode);
  bool is_profiling = (profiling_mode != nullptr) || ProfilingManager::Instance().ProfilingOn() ||
                      ProfilingManager::Instance().ProfilingTrainingTraceOn();
  if (!is_profiling || (profiling_point.fp_index == 0) || (profiling_point.bp_index == 0) ||
      (profiling_point.end_index.empty())) {
  bool is_insert_fp_profiling_task = false;
  (void)ge::AttrUtils::GetBool(op_desc, ATTR_NAME_INSERT_FP_PROFILILNG_TASK, is_insert_fp_profiling_task);
  bool is_insert_bp_profiling_task = false;
  (void)ge::AttrUtils::GetBool(op_desc, ATTR_NAME_INSERT_BP_PROFILILNG_TASK, is_insert_bp_profiling_task);
  bool no_insert_profiling_task = ((profiling_point.fp_index == 0) || (profiling_point.bp_index == 0) ||
                                   (profiling_point.end_index.empty())) &&
                                  (!(is_insert_fp_profiling_task || is_insert_bp_profiling_task));
  if (!is_profiling || no_insert_profiling_task) {
    return SUCCESS;
  }
  if (profiling_point.fp_index == node_index) {
  GELOGD("Insert fp profiling task: %d, insert bp profiling task: %d, fp index: %u, bp index: %u, end index size: %zu",
         is_insert_fp_profiling_task, is_insert_bp_profiling_task, profiling_point.fp_index, profiling_point.bp_index,
         profiling_point.end_index.size());

  if ((profiling_point.fp_index == node_index) || is_insert_fp_profiling_task) {
    uint64_t jobid_log_id = ge::GetContext().TraceId();
    GELOGI("The first FP operator is %s, idx %u, job_id %lu", op_desc->GetName().c_str(), node_index, jobid_log_id);

@@ -913,22 +931,40 @@ Status TaskGenerator::InsertProfilingTaskBefore(const OpDescPtr &op_desc, const
    task_def_list.emplace_back(fp_task_def);
  }

  for (size_t i = 0; i < all_reduce_nodes.size(); i++) {
    if (all_reduce_nodes[i] != node_index) {
      continue;
  bool is_all_reduce = (op_desc->GetType() == HCOMALLREDUCE || op_desc->GetType() == HVDCALLBACKALLREDUCE);
  uint64_t all_reduce_task_idx = 0;
  bool is_insert_all_reduce_task = false;
  if (is_all_reduce && is_insert_bp_profiling_task) {
    all_reduce_task_idx = all_reduce_node_idx;
    is_insert_all_reduce_task = true;
  }
  if (is_all_reduce) {
    all_reduce_node_idx++;
  }
  if (!is_insert_all_reduce_task) {
    for (size_t i = 0; i < all_reduce_nodes.size(); i++) {
      if (all_reduce_nodes[i] == node_index) {
        all_reduce_task_idx = i;
        is_insert_all_reduce_task = true;
        break;
      }
    }
  }

  if (is_insert_all_reduce_task) {
    GELOGI("The start allreduce operator is %s, idx %u", op_desc->GetName().c_str(), node_index);
    TaskDef ar_task_def;
    ar_task_def.set_type(RT_MODEL_TASK_PROFILER_TRACE);
    ar_task_def.set_stream_id(op_desc->GetStreamId());
    LogTimeStampDef *ar_log_def = ar_task_def.mutable_log_timestamp();
    if (ar_log_def != nullptr) {
      GE_IF_BOOL_EXEC(TypeUtils::CheckUint64MulOverflow(i, kProfilingArStep),
      GE_IF_BOOL_EXEC(TypeUtils::CheckUint64MulOverflow(all_reduce_task_idx, kProfilingArStep),
                      GELOGE(FAILED, "Multiply result is out of range.");
                      return FAILED);
      auto log_id = i * kProfilingArStep + kProfilingArStartLogid;
      auto log_id = all_reduce_task_idx * kProfilingArStep + kProfilingArStartLogid;
      ar_log_def->set_logid(log_id);
      ar_log_def->set_notify(false);
      (void)ge::AttrUtils::SetInt(op_desc, ATTR_NAME_INSERT_PROFILILNG_TASK_LOG_ID, log_id);
    }
    task_def_list.push_back(ar_task_def);
  }
@@ -937,16 +973,27 @@ Status TaskGenerator::InsertProfilingTaskBefore(const OpDescPtr &op_desc, const

 Status TaskGenerator::InsertProfilingTaskAfter(const OpDescPtr &op_desc, const ProfilingPoint &profiling_point,
                                               vector<uint32_t> &all_reduce_nodes, uint32_t node_index,
                                               vector<domi::TaskDef> &task_def_list) {
                                               vector<domi::TaskDef> &task_def_list, uint64_t all_reduce_node_idx) {
  GE_CHECK_NOTNULL(op_desc);
  const char *profiling_mode = std::getenv(kProfilingMode);
  bool is_profiling = (profiling_mode != nullptr) || ProfilingManager::Instance().ProfilingOn() ||
                      ProfilingManager::Instance().ProfilingTrainingTraceOn();
  if (!is_profiling || (profiling_point.fp_index == 0) || (profiling_point.bp_index == 0) ||
      (profiling_point.end_index.empty())) {
  bool is_insert_bp_profiling_task = false;
  (void)ge::AttrUtils::GetBool(op_desc, ATTR_NAME_INSERT_BP_PROFILILNG_TASK, is_insert_bp_profiling_task);
  bool is_insert_end_profiling_task = false;
  (void)ge::AttrUtils::GetBool(op_desc, ATTR_NAME_INSERT_END_PROFILILNG_TASK, is_insert_end_profiling_task);
  bool no_insert_profiling_task = ((profiling_point.fp_index == 0) || (profiling_point.bp_index == 0) ||
                                   (profiling_point.end_index.empty())) &&
                                  (!(is_insert_bp_profiling_task || is_insert_end_profiling_task));
  if (!is_profiling || no_insert_profiling_task) {
    return SUCCESS;
  }
  if (profiling_point.bp_index == node_index) {
  GELOGD("Insert bp profiling task: %d, insert end profiling task: %d, fp index: %u, bp index: %u, end index size: %zu",
         is_insert_bp_profiling_task, is_insert_end_profiling_task, profiling_point.fp_index, profiling_point.bp_index,
         profiling_point.end_index.size() );

  bool is_all_reduce = (op_desc->GetType() == HCOMALLREDUCE || op_desc->GetType() == HVDCALLBACKALLREDUCE);
  if ((profiling_point.bp_index == node_index) || (!is_all_reduce && is_insert_bp_profiling_task)) {
    GELOGI("The last BP operator is %s, idx %u", op_desc->GetName().c_str(), node_index);
    TaskDef bp_task_def;
    bp_task_def.set_type(RT_MODEL_TASK_PROFILER_TRACE);
@@ -957,7 +1004,9 @@ Status TaskGenerator::InsertProfilingTaskAfter(const OpDescPtr &op_desc, const P
    bp_log_def->set_notify(false);
    task_def_list.emplace_back(bp_task_def);
  }
  if (profiling_point.end_index.find(node_index) != profiling_point.end_index.end()) {

  if (profiling_point.end_index.find(node_index) != profiling_point.end_index.end() ||
      is_insert_end_profiling_task) {
    GELOGI("The iteration end operator is %s, idx %u", op_desc->GetName().c_str(), node_index);
    TaskDef end_task_def;
    end_task_def.set_type(RT_MODEL_TASK_PROFILER_TRACE);
@@ -969,20 +1018,32 @@ Status TaskGenerator::InsertProfilingTaskAfter(const OpDescPtr &op_desc, const P
    task_def_list.emplace_back(end_task_def);
  }

  uint32_t all_reduce_task_idx = 0;
  bool is_insert_all_reduce_task = false;
  if (is_all_reduce && is_insert_bp_profiling_task) {
    all_reduce_task_idx = all_reduce_node_idx;
    is_insert_all_reduce_task = true;
  }

  for (size_t i = 0; i < all_reduce_nodes.size(); i++) {
    if (all_reduce_nodes[i] != node_index) {
      continue;
    if (all_reduce_nodes[i] == node_index) {
      all_reduce_task_idx = i;
      is_insert_all_reduce_task = true;
      break;
    }
  }

  if (is_insert_all_reduce_task) {
    GELOGI("The end allreduce operator is %s, idx %u", op_desc->GetName().c_str(), node_index);
    TaskDef ar_task_def;
    ar_task_def.set_type(RT_MODEL_TASK_PROFILER_TRACE);
    ar_task_def.set_stream_id(op_desc->GetStreamId());
    LogTimeStampDef *ar_log_def = ar_task_def.mutable_log_timestamp();
    GE_CHECK_NOTNULL(ar_log_def);
    GE_IF_BOOL_EXEC(TypeUtils::CheckUint64MulOverflow(i, kProfilingArStep),
    GE_IF_BOOL_EXEC(TypeUtils::CheckUint64MulOverflow(all_reduce_task_idx, kProfilingArStep),
                    GELOGE(FAILED, "Multiply result is out of range.");
                    return FAILED);
    auto log_id = i * kProfilingArStep + kProfilingArEndLogid;
    auto log_id = all_reduce_task_idx * kProfilingArStep + kProfilingArEndLogid;
    ar_log_def->set_logid(log_id);
    ar_log_def->set_notify(false);
    task_def_list.emplace_back(ar_task_def);
--- a/ge/graph/build/task_generator.h
+++ b/ge/graph/build/task_generator.h
@@ -51,6 +51,7 @@ struct FusionTaskInfo {
  std::map<uint32_t, string> &op_name_map;
  ProfilingPoint &profiling_point;
  vector<uint32_t> all_reduce_nodes;
  uint64_t all_reduce_node_idx;
 };

 class TaskGenerator {
@@ -76,6 +77,8 @@ class TaskGenerator {
  ///
  Status GetTaskInfo(Model &model, ComputeGraphPtr &graph, uint64_t session_id, RunContext &run_context);

  Status FindProfilingNodeIndex(const ComputeGraphPtr &graph, ProfilingPoint &profiling_point,
                                std::vector<uint32_t> &all_reduce_nodes);
 private:
  Status UpdateAnchorStatus(const NodePtr &node);

@@ -126,10 +129,10 @@ class TaskGenerator {
                                std::vector<uint32_t> &all_reduce_nodes) const;
  Status InsertProfilingTaskBefore(const OpDescPtr &op_desc, const ProfilingPoint &profiling_point,
                                   std::vector<uint32_t> &all_reduce_nodes, uint32_t node_index,
                                   std::vector<domi::TaskDef> &task_def_list);
                                   std::vector<domi::TaskDef> &task_def_list, uint64_t &all_reduce_node_idx);
  Status InsertProfilingTaskAfter(const OpDescPtr &op_desc, const ProfilingPoint &profiling_point,
                                  std::vector<uint32_t> &all_reduce_nodes, uint32_t node_index,
                                  std::vector<domi::TaskDef> &task_def_list);
                                  std::vector<domi::TaskDef> &task_def_list, uint64_t all_reduce_node_idx);

  static bool IsProfPoint(const OpDescPtr &op, const std::string &name);

--- a/ge/graph/execute/graph_execute.cc
+++ b/ge/graph/execute/graph_execute.cc
@@ -560,34 +560,10 @@ Status GraphExecutor::GetModelAttr(uint32_t model_id, std::vector<string> &dynam
  return SUCCESS;
 }

 Status GraphExecutor::GetInputOutputDescInfoForZeroCopy(uint32_t model_id, vector<InputOutputDescInfo> &input_desc,
                                                        vector<InputOutputDescInfo> &output_desc,
                                                        std::vector<uint32_t> &input_formats,
                                                        std::vector<uint32_t> &out_formats) {
  try {
    auto model_manager = ge::ModelManager::GetInstance();
    GE_CHECK_NOTNULL(model_manager);
    Status ret =
        model_manager->GetInputOutputDescInfoForZeroCopy(model_id, input_desc, output_desc, input_formats, out_formats);
    if (ret != SUCCESS) {
      GELOGE(ret, "GetInputOutputDescInfoForZeroCopy failed.");
      return ret;
    }
  } catch (std::bad_alloc &) {
    GELOGE(MEMALLOC_FAILED, "GetInputOutputDescInfoForZeroCopy failed, bad memory allocation occur !");
    return MEMALLOC_FAILED;
  } catch (...) {
    GELOGE(FAILED, "GetInputOutputDescInfoForZeroCopy failed, some exceptions occur !");
    return FAILED;
  }

  return SUCCESS;
 }

 Status GraphExecutor::GetAIPPInfo(uint32_t model_id, uint32_t index, AippConfigInfo &aipp_info) {
 Status GraphExecutor::GetAippInfo(uint32_t model_id, uint32_t index, AippConfigInfo &aipp_info) {
  auto model_manager = ge::ModelManager::GetInstance();
  GE_CHECK_NOTNULL(model_manager);
  Status ret = model_manager->GetAIPPInfo(model_id, index, aipp_info);
  Status ret = model_manager->GetAippInfo(model_id, index, aipp_info);
  if (ret != SUCCESS) {
    GELOGW("GetAIPPInfo is not success.");
    return ret;
--- a/ge/graph/execute/graph_execute.h
+++ b/ge/graph/execute/graph_execute.h
@@ -73,7 +73,7 @@ class GraphExecutor {
                                       vector<InputOutputDescInfo> &output_desc, std::vector<uint32_t> &input_formats,
                                       std::vector<uint32_t> &output_formats, bool new_model_desc = false);

  static Status GetAIPPInfo(uint32_t model_id, uint32_t index, AippConfigInfo &aipp_info);
  static Status GetAippInfo(uint32_t model_id, uint32_t index, AippConfigInfo &aipp_info);

  static Status GetAippType(uint32_t model_id, uint32_t index, InputAippType &type, size_t &aipp_index);

@@ -110,10 +110,6 @@ class GraphExecutor {

  static Status GetModelAttr(uint32_t model_id, std::vector<string> &dynamic_output_shape_info);

  static Status GetInputOutputDescInfoForZeroCopy(uint32_t model_id, vector<InputOutputDescInfo> &input_desc,
                                                  vector<InputOutputDescInfo> &output_desc,
                                                  std::vector<uint32_t> &input_formats,
                                                  std::vector<uint32_t> &output_formats);
  static Status GetOrigInputInfo(uint32_t model_id, uint32_t index, OriginInputInfo &orig_input_info);
  static Status GetAllAippInputOutputDims(uint32_t model_id, uint32_t index, std::vector<InputOutputDims> &input_dims,
                                          std::vector<InputOutputDims> &output_dims);
--- a/ge/graph/load/new_model_manager/davinci_model.cc
+++ b/ge/graph/load/new_model_manager/davinci_model.cc
@@ -75,7 +75,6 @@
 namespace ge {
 namespace {
 const uint32_t kDataIndex = 0;
 const uint32_t kOutputNum = 1;
 const uint32_t kTrueBranchStreamNum = 1;
 const uint32_t kGetDynamicDimsCount = 1;
 const uint32_t kThreadNum = 16;
@@ -87,6 +86,7 @@ const uint32_t kDumpL1FusionOpMByteSize = 2097152;   // 2 * 1024 * 1024
 const uint32_t kDumpFlagOfL1Fusion = 0;
 const char *const kDefaultBatchLable = "Batch_default";
 const char *const kGetDynamicDimsName = "ascend_mbatch_get_dynamic_dims_node";
 const char *const kMultiBatchNodePostfix = "_ascend_mbatch_batch_";
 const int32_t kInvalidStream = -1;
 const uint32_t kEndOfSequence = 0x0704000a;
 const uint32_t kEndOfSequenceNew = 507005;
@@ -155,7 +155,6 @@ DavinciModel::~DavinciModel() {
    GE_CHK_STATUS(ModelRunStop());

    op_list_.clear();
    data_op_list_.clear();
    tensor_name_to_fixed_addr_size_.clear();
    tensor_name_to_peer_output_index_.clear();
    GE_DELETE_NEW_SINGLE(data_inputer_);
@@ -867,13 +866,17 @@ Status DavinciModel::InitNodes(const ComputeGraphPtr &compute_graph) {
        GELOGE(PARAM_INVALID, "NetOutput init failed, Name: %s", op_desc->GetName().c_str());
        return PARAM_INVALID;
      }
      if (InitRealSizeAndShapeInfo(compute_graph, node) != SUCCESS) {
        GELOGE(PARAM_INVALID, "Init real size and shape failed, Name: %s", op_desc->GetName().c_str());
        return PARAM_INVALID;
      }
      continue;
    }

    auto it = op_desc_handle.find(op_desc->GetType());
    if (it != op_desc_handle.end()) {
      if ((this->*it->second)(op_desc) != SUCCESS) {
        GELOGE(PARAM_INVALID, "NetOutput init failed, Name: %s", op_desc->GetName().c_str());
        GELOGE(PARAM_INVALID, "Node init failed, Name: %s", op_desc->GetName().c_str());
        return PARAM_INVALID;
      }
      continue;
@@ -926,7 +929,7 @@ Status DavinciModel::InitNodes(const ComputeGraphPtr &compute_graph) {

  GE_TIMESTAMP_CALLNUM_END(LoadTBEKernelBinToOpDesc, "GraphLoader::LoadTBEKernelBinToOpDesc.");
  GE_TIMESTAMP_CALLNUM_END(InitTbeHandle, "GraphLoader::InitTbeHandle.");
  return OptInputOutputInfo(data_by_index, output_op_list);
  return GenInputOutputInfo(data_by_index, output_op_list);
 }

 void DavinciModel::SetLabelForDynamic(const NodePtr &node) {
@@ -969,9 +972,6 @@ Status DavinciModel::InitDataOp(const ComputeGraphPtr &graph, const NodePtr &nod
  }

  data_by_index[data_index] = op_desc;
  auto data_op = AttrUtils::CopyOpDesc(op_desc);
  GE_CHECK_NOTNULL(data_op);
  data_op_list_.push_back(data_op);
  if (known_node_) {
    return SUCCESS;
  }
@@ -1017,23 +1017,18 @@ Status DavinciModel::InitDataOp(const ComputeGraphPtr &graph, const NodePtr &nod
 /// @param [in] output_op_list: list of NetOutput op.
 /// @return Status
 ///
 Status DavinciModel::OptInputOutputInfo(const map<uint32_t, OpDescPtr> &data_by_index,
 Status DavinciModel::GenInputOutputInfo(const map<uint32_t, OpDescPtr> &data_by_index,
                                        const vector<OpDescPtr> &output_op_list) {
  GELOGD("Data node size: %zu, NetOutput node size: %zu", data_op_list_.size(), output_op_list.size());
  if (data_by_index.size() != data_op_list_.size()) {
    GELOGE(INTERNAL_ERROR, "Data map size: %zu, Data list size: %zu.", data_by_index.size(), data_op_list_.size());
    return INTERNAL_ERROR;
  }

  data_op_list_.clear();
  GELOGD("Data node size: %zu, NetOutput node size: %zu", data_by_index.size(), output_op_list.size());
  for (auto &item : data_by_index) {
    auto data_op = AttrUtils::CopyOpDesc(item.second);
    GE_CHECK_NOTNULL(data_op);
    data_op_list_.emplace_back(data_op);
    auto output_addrs = ModelUtils::GetOutputDataAddrs(runtime_param_, item.second);
    GELOGD("Data node: %s, output addr size: %zu", item.second->GetName().c_str(), output_addrs.size());
    input_addrs_list_.emplace_back(output_addrs);

    GE_CHK_STATUS_RET(InitAippInfo(item.first, item.second), "Init AIPP Info failed");
    GE_CHK_STATUS_RET(InitAippType(item.first, item.second, data_by_index), "Init AIPP Type failed");
    GE_CHK_STATUS_RET(InitOrigInputInfo(item.first, item.second), "Init Orig input failed");
    GE_CHK_STATUS_RET(InitAippInputOutputDims(item.first, item.second), "Init AIPP dims failed");
    if (item.second->GetType() == AIPP_DATA_TYPE) {
      GELOGI("This is dynamic aipp model, Node: %s", item.second->GetName().c_str());
      is_dynamic_aipp_ = true;
@@ -1061,7 +1056,8 @@ Status DavinciModel::OptInputOutputInfo(const map<uint32_t, OpDescPtr> &data_by_
    }
  }

  return InitOutputDescInfo(output_op_list, output_descs_, output_formats_);
  GE_CHK_STATUS_RET(InitInputDescInfo(data_by_index), "Init input desc info failed");
  return InitOutputDescInfo(output_op_list);
 }

 bool DavinciModel::IsGetNextSinkDynamic(const OpDescPtr &op_desc) {
@@ -1143,16 +1139,24 @@ Status DavinciModel::InitNetOutput(const ComputeGraphPtr &graph, const NodePtr &
      real_virtual_addrs_.insert(real_addr);
    }
  }
  return SUCCESS;
 }

 Status DavinciModel::InitRealSizeAndShapeInfo(const ComputeGraphPtr &compute_graph, const NodePtr &node) {
  if (node->GetName().find(kMultiBatchNodePostfix) != string::npos) {
    GELOGD("No need to get size and shape of netoutput in subgraph.");
    return SUCCESS;
  }
  GELOGD("Start init real size and shape info of %s.", node->GetName().c_str());
  GetAllGearsInfo(node);
  if (is_getnext_sink_dynamic_) {
    GE_IF_BOOL_EXEC(GetGetDynamicDimsNodeInfo(node) != SUCCESS,
                    GELOGE(PARAM_INVALID, "Failed to get info of getdynamicdims node."); return PARAM_INVALID;);
  }
  if (is_online_infer_dynamic_) {
    GE_IF_BOOL_EXEC(GetGearAndRealOutSizeInfo(input_count, node) != SUCCESS,
    GE_IF_BOOL_EXEC(GetGearAndRealOutSizeInfo(compute_graph, node) != SUCCESS,
                    GELOGE(PARAM_INVALID, "Failed to get gear and real out size info."); return PARAM_INVALID;);
    GE_IF_BOOL_EXEC(GetGearAndRealOutShapeInfo(input_count, op_desc) != SUCCESS,
    GE_IF_BOOL_EXEC(GetGearAndRealOutShapeInfo(compute_graph, node) != SUCCESS,
                    GELOGE(PARAM_INVALID, "Failed to get gear and real out shape info."); return PARAM_INVALID;);
  }

@@ -1171,7 +1175,7 @@ void DavinciModel::GetAllGearsInfo(const NodePtr &node) {
      if (shape_str.empty()) {
        continue;
      }
      std::vector<int64_t> gear_info;
      std::vector<int32_t> gear_info;
      std::vector<std::string> dims = ge::StringUtils::Split(shape_str, ',');
      for (const auto &dim : dims) {
        if (dim.empty()) {
@@ -1187,6 +1191,7 @@ void DavinciModel::GetAllGearsInfo(const NodePtr &node) {
    }
  }
 }

 Status DavinciModel::GetGetDynamicDimsNodeInfo(const NodePtr &node) {
  GE_CHECK_NOTNULL(node->GetOpDesc());
  size_t input_count = node->GetAllInDataAnchors().size();
@@ -1224,11 +1229,11 @@ Status DavinciModel::GetGetDynamicDimsNodeInfo(const NodePtr &node) {
  return SUCCESS;
 }

 Status DavinciModel::GetGearAndRealOutSizeInfo(size_t input_count, const NodePtr &node) {
  GELOGD("Start get gear and real output size info of %s, input count is %zu.", node->GetName().c_str(), input_count);
 Status DavinciModel::GetGearAndRealOutSizeInfo(const ComputeGraphPtr &graph, const NodePtr &node) {
  GELOGD("Start get gear and real output size info of %s.", node->GetName().c_str());
  merge_nodes_gear_and_real_out_size_info_.clear();
  for (size_t idx = 0; idx < input_count; ++idx) {
    auto in_anchor = node->GetAllInDataAnchors().at(idx);
  size_t idx = 0;
  for (const auto &in_anchor : node->GetAllInDataAnchors()) {
    auto peer_out_anchor = in_anchor->GetPeerOutAnchor();
    if (peer_out_anchor == nullptr) {
      continue;
@@ -1236,89 +1241,106 @@ Status DavinciModel::GetGearAndRealOutSizeInfo(size_t input_count, const NodePtr
    auto peer_node = peer_out_anchor->GetOwnerNode();
    auto op_desc = peer_node->GetOpDesc();
    GE_CHECK_NOTNULL(op_desc);
    if ((peer_node->GetType() == MERGE) && (op_desc->HasAttr(ATTR_INSERT_BY_MBATCH))) {
      if (GetRealOutputSizeOfMerge(idx, peer_node) != SUCCESS) {
    if ((peer_node->GetType() == CASE) && (op_desc->HasAttr(ATTR_INSERT_BY_MBATCH))) {
      if (GetRealOutputSizeOfCase(graph, idx, peer_node) != SUCCESS) {
        GELOGE(PARAM_INVALID, "Get real output size of %s failed.", peer_node->GetName().c_str());
        return PARAM_INVALID;
      }
    }
    idx++;
  }
  return SUCCESS;
 }

 Status DavinciModel::GetRealOutputSizeOfMerge(size_t input_index, const NodePtr &merge_node) {
  GELOGD("Start get output size of %s, which is %zu input to netoutput.", merge_node->GetName().c_str(), input_index);
  std::map<vector<int64_t>, int64_t> gear_and_real_out_size_info;
  for (auto &in_anchor : merge_node->GetAllInDataAnchors()) {
    auto peer_out_anchor = in_anchor->GetPeerOutAnchor();
    if (peer_out_anchor == nullptr) {
      continue;
    }
    auto in_node = peer_out_anchor->GetOwnerNode();
    GELOGD("Input node of merge is %s.", in_node->GetName().c_str());
    auto op_desc = in_node->GetOpDesc();
    GE_CHECK_NOTNULL(op_desc);
    string batch_label;
    if (AttrUtils::GetStr(op_desc, ATTR_NAME_BATCH_LABEL, batch_label)) {
      size_t batch_index = static_cast<size_t>(stoi(batch_label.substr(batch_label.rfind('_') + 1)));
      GELOGD("Batch index of %s is %zu.", op_desc->GetName().c_str(), batch_index);
      if (batch_index > all_gears_info_.size()) {
        GELOGE(PARAM_INVALID, "The value of ATTR_NAME_BATCH_LABEL is invalid.");
        return PARAM_INVALID;
      }

      const vector<int64_t> output_size_list = ModelUtils::GetOutputSize(op_desc);
      int output_index = ge::AnchorUtils::GetIdx(peer_out_anchor);
      auto tensor_desc = op_desc->GetOutputDescPtr(output_index);
      GE_CHECK_NOTNULL(tensor_desc);
      int64_t data_size = 0;
      if (TensorUtils::GetTensorSizeInBytes(*tensor_desc, data_size) != GRAPH_SUCCESS) {
        GELOGE(FAILED, "Get tensor size in bytes failed.");
        return FAILED;
 Status DavinciModel::GetRealOutputSizeOfCase(const ComputeGraphPtr &graph, size_t input_index,
                                             const NodePtr &case_node) {
  GELOGD("Start get output size of %s, which is %zu input to netoutput.", case_node->GetName().c_str(), input_index);
  const auto &func_desc = case_node->GetOpDesc();
  GE_CHECK_NOTNULL(func_desc);
  std::map<vector<int32_t>, int64_t> gear_and_real_out_size_info;
  for (const auto &name : func_desc->GetSubgraphInstanceNames()) {
    const auto &subgraph = graph->GetSubgraph(name);
    if (subgraph == nullptr) {
      GELOGE(GE_GRAPH_EMPTY_SUBGRAPH, "Subgraph not found, name: %s.", name.c_str());
      return GE_GRAPH_EMPTY_SUBGRAPH;
    }
    for (auto &node : subgraph->GetDirectNode()) {
      if (node->GetType() == NETOUTPUT) {
        auto op_desc = node->GetOpDesc();
        GE_CHECK_NOTNULL(op_desc);
        string batch_label;
        if (AttrUtils::GetStr(op_desc, ATTR_NAME_BATCH_LABEL, batch_label)) {
          size_t batch_index = static_cast<size_t>(stoi(batch_label.substr(batch_label.rfind('_') + 1)));
          GELOGD("Batch index of %s is %zu.", op_desc->GetName().c_str(), batch_index);
          if (batch_index > all_gears_info_.size()) {
            GELOGE(PARAM_INVALID, "The value of ATTR_NAME_BATCH_LABEL is invalid.");
            return PARAM_INVALID;
          }

          const vector<int64_t> input_size_list = ModelUtils::GetInputSize(op_desc);
          auto tensor_desc = op_desc->GetInputDescPtr(input_index);
          GE_CHECK_NOTNULL(tensor_desc);
          int64_t data_size = 0;
          if (TensorUtils::GetTensorSizeInBytes(*tensor_desc, data_size) != GRAPH_SUCCESS) {
            GELOGE(FAILED, "Get tensor size in bytes failed.");
            return FAILED;
          }
          gear_and_real_out_size_info[all_gears_info_[batch_index]] = data_size;
          GELOGD("Get real gear index is: %zu, gear info is %s, size is %ld, tensor size is %ld",
                 batch_index, formats::JoinToString(all_gears_info_[batch_index]).c_str(),
                 input_size_list[input_index], data_size);
        }
        break;
      }
      gear_and_real_out_size_info[all_gears_info_[batch_index]] = data_size;
      GELOGD("Get real gear index is: %zu, gear info is %s, size is %ld, tensor size is %ld",
             batch_index, formats::JoinToString(all_gears_info_[batch_index]).c_str(),
             output_size_list[output_index], data_size);
    }
  }
  merge_nodes_gear_and_real_out_size_info_[input_index] = gear_and_real_out_size_info;
  return SUCCESS;
 }

 Status DavinciModel::GetGearAndRealOutShapeInfo(size_t input_count, const OpDescPtr &op_desc) {
  GELOGD("Start to get dynamic output dims of %s.", op_desc->GetName().c_str());
 Status DavinciModel::GetGearAndRealOutShapeInfo(const ComputeGraphPtr &graph, const NodePtr &node) {
  GELOGD("Start to get dynamic output dims of %s.", node->GetName().c_str());
  merge_nodes_gear_and_real_out_shape_info_.clear();
  std::vector<std::string> dynamic_output_shape_info;
  if (!AttrUtils::GetListStr(op_desc, ATTR_NAME_DYNAMIC_OUTPUT_DIMS, dynamic_output_shape_info)) {
    GELOGD("Can not get dynamic output dims attr");
    return SUCCESS;
  }
  GELOGI("Dynamic output shape info is %s", formats::JoinToString(dynamic_output_shape_info).c_str());
  std::vector<vector<int64_t>> dynamic_output_shape;
  ParseDynamicOutShape(dynamic_output_shape_info, dynamic_output_shape);
  // idx: input_index to netoutput
  for (size_t idx = 0; idx < input_count; ++idx) {
    std::map<vector<int64_t>, vector<int64_t>> gear_and_real_out_shape_info;
    for (auto &it : dynamic_output_shape) {
      auto gear_index = static_cast<size_t>(it[0]);
      if (gear_index > all_gears_info_.size()) {
        GELOGE(PARAM_INVALID, "The value of cur index: %zu is invalid.", static_cast<size_t>(it[0]));
        return PARAM_INVALID;
  size_t idx = 0;
  for (const auto &in_anchor : node->GetAllInDataAnchors()) {
    auto peer_out_anchor = in_anchor->GetPeerOutAnchor();
    if (peer_out_anchor == nullptr) {
      continue;
    }
    auto peer_node = peer_out_anchor->GetOwnerNode();
    auto op_desc = peer_node->GetOpDesc();
    GE_CHECK_NOTNULL(op_desc);
    if ((peer_node->GetType() == CASE) && (op_desc->HasAttr(ATTR_INSERT_BY_MBATCH))) {
      std::vector<std::string> dynamic_output_shape_info;
      if (!AttrUtils::GetListStr(node->GetOpDesc(), ATTR_NAME_DYNAMIC_OUTPUT_DIMS, dynamic_output_shape_info)) {
        GELOGD("Can not get dynamic output dims attr from %s.", node->GetName().c_str());
        return SUCCESS;
      }
      GELOGI("Dynamic output shape info is %s", formats::JoinToString(dynamic_output_shape_info).c_str());
      std::vector<vector<int64_t>> dynamic_output_shape;
      ParseDynamicOutShape(dynamic_output_shape_info, dynamic_output_shape);
      std::map<vector<int32_t>, vector<int64_t>> gear_and_real_out_shape_info;
      for (auto &it : dynamic_output_shape) {
        auto gear_index = static_cast<size_t>(it[0]);
        if (gear_index > all_gears_info_.size()) {
          GELOGE(PARAM_INVALID, "The value of cur index: %zu is invalid.", static_cast<size_t>(it[0]));
          return PARAM_INVALID;
        }

      if (static_cast<size_t>(it[1]) == idx) {
        vector<int64_t> output_shape;
        for (size_t i = 2; i < it.size(); ++i) {
          output_shape.emplace_back(it[i]);
        if (static_cast<size_t>(it[1]) == idx) {
          vector<int64_t> output_shape;
          for (size_t i = 2; i < it.size(); ++i) {
            output_shape.emplace_back(it[i]);
          }
          gear_and_real_out_shape_info[all_gears_info_[gear_index]] = output_shape;
          GELOGD("Get real gear index is: %zu, gear info is %s, output shape is %s.",
                 gear_index, formats::JoinToString(all_gears_info_[gear_index]).c_str(),
                 formats::JoinToString(output_shape).c_str());
        }
        gear_and_real_out_shape_info[all_gears_info_[gear_index]] = output_shape;
        GELOGD("Get real gear index is: %zu, gear info is %s, output shape is %s.",
               gear_index, formats::JoinToString(all_gears_info_[gear_index]).c_str(),
               formats::JoinToString(output_shape).c_str());
      }
      merge_nodes_gear_and_real_out_shape_info_[idx] = gear_and_real_out_shape_info;
    }
    merge_nodes_gear_and_real_out_shape_info_[idx] = gear_and_real_out_shape_info;
    idx++;
  }
  return SUCCESS;
 }
@@ -1760,73 +1782,101 @@ void DavinciModel::GetUserDesignateShapeOrder(std::vector<std::string> &user_inp
 /// @ingroup ge
 /// @brief Get AIPP input info
 /// @param [in] index
 /// @param [out] aipp_info
 /// @param [int] OpDescPtr
 /// @return execute result
 ///
 Status DavinciModel::GetAIPPInfo(uint32_t index, AippConfigInfo &aipp_info) {
  GE_CHK_BOOL_RET_STATUS(index < data_op_list_.size(), PARAM_INVALID, "Index %u is invalid.", index);
  OpDescPtr data_op = data_op_list_[index];
  if (!data_op->HasAttr(ATTR_NAME_AIPP)) {
    GELOGW("GetAIPPInfo: there is not AIPP related with index %u.", index);
    return ACL_ERROR_GE_AIPP_NOT_EXIST;
 Status DavinciModel::InitAippInfo(uint32_t index, const OpDescPtr &op_desc) {
  if (!op_desc->HasAttr(ATTR_NAME_AIPP)) {
    GELOGW("there is not AIPP related with index %u.", index);
    return SUCCESS;
  }

  std::unique_ptr<domi::AippOpParams> aipp_params(new (std::nothrow) domi::AippOpParams());
  GE_CHECK_NOTNULL(aipp_params);

  ge::GeAttrValue::NAMED_ATTRS aipp_attr;
  GE_CHK_BOOL_RET_STATUS(AttrUtils::GetNamedAttrs(data_op, ATTR_NAME_AIPP, aipp_attr), GE_AIPP_NOT_EXIST,
  domi::AippOpParams aipp_params;
  GeAttrValue::NAMED_ATTRS aipp_attr;
  GE_CHK_BOOL_RET_STATUS(AttrUtils::GetNamedAttrs(op_desc, ATTR_NAME_AIPP, aipp_attr), GE_AIPP_NOT_EXIST,
                         "Data node do not contain param aipp!");
  GE_CHK_STATUS_RET(OpUtils::ConvertAippParams(aipp_attr, aipp_params.get()), "get aipp params failed");
  GELOGI("GetAIPPInfo: node data: %s, type: %s, current index: %u, current node related input rank: %u",
         data_op->GetName().c_str(), data_op->GetType().c_str(), index, aipp_params->related_input_rank());
  GE_CHK_STATUS_RET(OpUtils::ConvertAippParams(aipp_attr, &aipp_params), "get aipp params failed");
  GELOGI("node data: %s, type: %s, current index: %u, current node related input rank: %u",
         op_desc->GetName().c_str(), op_desc->GetType().c_str(), index, aipp_params.related_input_rank());

  GE_CHK_STATUS_RET(AippUtils::ConvertAippParams2AippInfo(aipp_params.get(), aipp_info),
  AippConfigInfo aipp_info;
  GE_CHK_STATUS_RET(AippUtils::ConvertAippParams2AippInfo(&aipp_params, aipp_info),
                    "convert aipp params to aipp config info failed");

  aipp_info_list_[index] = aipp_info;
  return SUCCESS;
 }

 Status DavinciModel::GetAippType(uint32_t index, InputAippType &type, size_t &aipp_index) {
  GE_CHK_BOOL_RET_STATUS(index < data_op_list_.size(), PARAM_INVALID, "Index %u is invalid.", index);
  // Set default value
  type = DATA_WITHOUT_AIPP;
  aipp_index = 0xFFFFFFFF;  // default invalid value
  OpDescPtr data_op = data_op_list_[index];
  GE_CHECK_NOTNULL(data_op);
  if (!data_op->HasAttr(ATTR_DATA_RELATED_AIPP_MODE)) {
 ///
 /// @ingroup ge
 /// @brief Get AIPP input info
 /// @param [in] index
 /// @param [out] aipp_info
 /// @return execute result
 ///
 Status DavinciModel::GetAippInfo(uint32_t index, AippConfigInfo &aipp_info) const {
  const auto it = aipp_info_list_.find(index);
  if (it == aipp_info_list_.end()) {
    GELOGW("there is not AIPP related with index %u.", index);
    return ACL_ERROR_GE_AIPP_NOT_EXIST;
  }

  aipp_info = it->second;
  return SUCCESS;
 }

 Status DavinciModel::InitAippType(uint32_t index, const OpDescPtr &op_desc, const map<uint32_t, OpDescPtr> &data_list) {
  if (!op_desc->HasAttr(ATTR_DATA_RELATED_AIPP_MODE)) {
    GELOGW("There is no aipp releated info with index %u.", index);
    return SUCCESS;
  }
  std::string data_mode;
  (void)AttrUtils::GetStr(data_op, ATTR_DATA_RELATED_AIPP_MODE, data_mode);

  // Set default value
  InputAippType aipp_type = DATA_WITHOUT_AIPP;
  string data_mode;
  (void)AttrUtils::GetStr(op_desc, ATTR_DATA_RELATED_AIPP_MODE, data_mode);
  if (data_mode == "static_aipp") {
    type = DATA_WITH_STATIC_AIPP;
    aipp_type = DATA_WITH_STATIC_AIPP;
  } else if (data_mode == "dynamic_aipp") {
    type = DATA_WITH_DYNAMIC_AIPP;
    aipp_type = DATA_WITH_DYNAMIC_AIPP;
  } else if (data_mode == "dynamic_aipp_conf") {
    type = DYNAMIC_AIPP_NODE;
    aipp_type = DYNAMIC_AIPP_NODE;
  } else {
    GELOGE(ACL_ERROR_GE_AIPP_MODE_INVALID,
           "The info of aipp releated info %s is invalid with index %u.", data_mode.c_str(), index);
    return ACL_ERROR_GE_AIPP_MODE_INVALID;
  }

  if (type == DATA_WITH_DYNAMIC_AIPP) {
  size_t aipp_index = 0xFFFFFFFF;  // default invalid value
  if (aipp_type == DATA_WITH_DYNAMIC_AIPP) {
    string releated_name;
    (void)AttrUtils::GetStr(data_op, ATTR_DATA_AIPP_DATA_NAME_MAP, releated_name);
    for (size_t i = 0; i < data_op_list_.size(); ++i) {
      GE_CHECK_NOTNULL(data_op_list_[i]);
      if (data_op_list_[i]->GetName() == releated_name) {
        GELOGI("Find aipp_data [%s] index %zu from index %u", releated_name.c_str(), i, index);
        aipp_index = i;
    (void)AttrUtils::GetStr(op_desc, ATTR_DATA_AIPP_DATA_NAME_MAP, releated_name);
    for (const auto item : data_list) {
      if (item.second->GetName() == releated_name) {
        GELOGI("Find aipp_data [%s] index %zu from index %u", releated_name.c_str(), item.first, index);
        aipp_index = item.first;
      }
    }

    if (aipp_index == 0xFFFFFFFF) {
      GELOGE(ACL_ERROR_GE_AIPP_NOT_EXIST, "Can not find aipp data node from index %u", index);
      return ACL_ERROR_GE_AIPP_NOT_EXIST;
      GELOGW("Can not find aipp data node from index %u", index);
      return SUCCESS;
    }
  }

  aipp_type_list_[index] = { aipp_type, aipp_index };
  return SUCCESS;
 }

 Status DavinciModel::GetAippType(uint32_t index, InputAippType &aipp_type, size_t &aipp_index) const {
  const auto it = aipp_type_list_.find(index);
  if (it == aipp_type_list_.end()) {
    GELOGW("There is no aipp releated info with index %u.", index);
    return SUCCESS;
  }

  aipp_type = it->second.first;
  aipp_index = it->second.second;
  return SUCCESS;
 }

@@ -1842,7 +1892,7 @@ void DavinciModel::SetDynamicSize(const std::vector<uint64_t> &batch_num, int32_
  dynamic_type_ = dynamic_type;
 }

 void DavinciModel::GetCurShape(std::vector<int64_t> &batch_info, int32_t &dynamic_type) {
 void DavinciModel::GetCurShape(std::vector<int64_t> &batch_info, int32_t &dynamic_type) const {
  if (batch_size_.empty()) {
    GELOGD("User does not set dynamic size");
  }
@@ -1854,38 +1904,10 @@ void DavinciModel::GetCurShape(std::vector<int64_t> &batch_info, int32_t &dynami
  dynamic_type = dynamic_type_;
 }

 void DavinciModel::GetModelAttr(vector<string> &out_shape_info) {
 void DavinciModel::GetModelAttr(vector<string> &out_shape_info) const {
  out_shape_info.insert(out_shape_info.end(), dynamic_output_shape_info_.begin(), dynamic_output_shape_info_.end());
 }

 Status DavinciModel::GetInputOutputDescInfoForZeroCopy(vector<InputOutputDescInfo> &input_desc,
                                                       vector<InputOutputDescInfo> &output_desc,
                                                       std::vector<uint32_t> &input_formats,
                                                       std::vector<uint32_t> &output_formats) {
  if (input_addrs_list_.empty() || input_addrs_list_[0].size() != kOutputNum) {
    GELOGE(FAILED, "OP List Pointer is null or input_desc size is not 1!");
    return FAILED;
  }

  GE_CHK_STATUS_RET(GetInputDescInfo(input_desc, input_formats), "get input desc info failed");

  GE_CHK_STATUS_RET(GetOutputDescInfo(output_desc, output_formats), "get ouput desc info failed");

  GE_CHK_BOOL_RET_STATUS(output_desc.size() == output_memory_size_list_.size(), INTERNAL_ERROR,
                         "output_desc size[%zu] not equal output_size_list_[%zu] size!", output_desc.size(),
                         output_memory_size_list_.size());

  /// For function zero copy,the momery should be aligned by 512 bytes.
  /// And, because of the cce op limit, size should be lager than the real shape size. The memory should be padded by 32
  /// bytes.
  /// *size equals to ((tensorDesc->dataSize + 2 * 32 - 1) / 32) * 32;
  for (size_t i = 0; i < output_memory_size_list_.size(); i++) {
    output_desc[i].size = output_memory_size_list_[i];
  }

  return SUCCESS;
 }

 void DavinciModel::SetInputDimsInfo(const vector<int64_t> &model_input_dims, Format &format,
                                    InputOutputDescInfo &input) {
  uint32_t n, c, h, w;
@@ -1935,24 +1957,30 @@ void DavinciModel::CreateInputDimsInfo(const OpDescPtr &op_desc, Format format,
  }
 }

 Status DavinciModel::GetInputDescInfo(vector<InputOutputDescInfo> &input_desc, std::vector<uint32_t> &formats) {
  for (size_t index = 0; index < data_op_list_.size(); ++index) {
    InputOutputDescInfo input;
    GE_CHECK_NOTNULL(data_op_list_[index]);
    GE_CHECK_NOTNULL(data_op_list_[index]->GetInputDescPtr(0));
 Status DavinciModel::InitInputDescInfo(const map<uint32_t, OpDescPtr> &data_by_index) {
  for (const auto &item : data_by_index) {
    const auto op_desc = item.second;
    GE_CHECK_NOTNULL(op_desc->GetInputDescPtr(0));

    Format format = data_op_list_[index]->GetInputDescPtr(0)->GetFormat();
    CreateInputDimsInfo(data_op_list_[index], format, input);
    InputOutputDescInfo input;
    Format format = op_desc->GetInputDescPtr(0)->GetFormat();
    CreateInputDimsInfo(op_desc, format, input);

    input.data_type = data_op_list_[index]->GetInputDescPtr(0)->GetDataType();
    input.name = data_op_list_[index]->GetName();
    input.data_type = op_desc->GetInputDescPtr(0)->GetDataType();
    input.name = op_desc->GetName();
    int64_t input_size = 0;
    GE_CHK_STATUS_RET(TensorUtils::GetSize(*data_op_list_[index]->GetInputDescPtr(0), input_size),
                      "get input size failed.");
    GE_CHK_STATUS_RET(TensorUtils::GetSize(*op_desc->GetInputDescPtr(0), input_size), "get input size failed.");
    input.size = input_size;
    formats.push_back(format);
    input_desc.push_back(input);
    input_formats_.push_back(format);
    input_descs_.push_back(input);
  }
  return SUCCESS;
 }

 Status DavinciModel::GetInputDescInfo(vector<InputOutputDescInfo> &input_descs, vector<uint32_t> &input_formats) {
  input_descs.insert(input_descs.end(), input_descs_.begin(), input_descs_.end());
  input_formats.insert(input_formats.end(), input_formats_.begin(), input_formats_.end());

  // cause GetInputDescInfo called not only once, set is_new_model_desc_ to false after calc the model input dims
  is_new_model_desc_ = false;
  return SUCCESS;
@@ -1962,7 +1990,7 @@ void DavinciModel::CreateOutput(uint32_t index, const OpDescPtr &op_desc, InputO
                                uint32_t &format_result) {
  /// netoutput input tensor desc
  GE_IF_BOOL_EXEC(op_desc->GetInputDescPtr(index) == nullptr, GELOGE(FAILED, "OpDesc GetInputDescPtr is nullptr");
                  return );
                  return);
  Format format = op_desc->GetInputDescPtr(index)->GetFormat();
  GeShape shape = op_desc->GetInputDescPtr(index)->GetShape();
  DataType data_type = op_desc->GetInputDescPtr(index)->GetDataType();
@@ -2011,8 +2039,7 @@ void DavinciModel::CreateOutput(uint32_t index, const OpDescPtr &op_desc, InputO
  output.data_type = op_desc->GetInputDescPtr(index)->GetDataType();
 }

 Status DavinciModel::InitOutputDescInfo(const vector<OpDescPtr> &output_op_list,
                                        vector<InputOutputDescInfo> &output_descs, vector<uint32_t> &output_formats) {
 Status DavinciModel::InitOutputDescInfo(const vector<OpDescPtr> &output_op_list) {
  GELOGD("Output node size: %zu", output_op_list.size());
  for (const auto &op_desc : output_op_list) {
    uint32_t out_size = static_cast<uint32_t>(op_desc->GetInputsSize());
@@ -2037,28 +2064,20 @@ Status DavinciModel::InitOutputDescInfo(const vector<OpDescPtr> &output_op_list,
                      std::to_string(src_index[index]);
      }
      output.name = output_name;
      output_descs.push_back(output);
      output_formats.push_back(format_result);
      output_descs_.push_back(output);
      output_formats_.push_back(format_result);
    }
  }
  return SUCCESS;
 }

 Status DavinciModel::GetOutputDescInfo(vector<InputOutputDescInfo> &output_descs, vector<uint32_t> &output_formats) {
 Status DavinciModel::GetOutputDescInfo(vector<InputOutputDescInfo> &output_descs,
                                       vector<uint32_t> &output_formats) const {
  output_descs.insert(output_descs.end(), output_descs_.begin(), output_descs_.end());
  output_formats.insert(output_formats.end(), output_formats_.begin(), output_formats_.end());
  return SUCCESS;
 }

 ge::Format DavinciModel::GetFormat() {
  if ((data_op_list_.empty()) || data_op_list_[0] == nullptr || data_op_list_[0]->GetInputDescPtr(0) == nullptr) {
    GELOGW("OP List Pointer is null or input_desc size is not 1!");
    return FORMAT_NCHW;
  }

  return data_op_list_[0]->GetInputDescPtr(0)->GetFormat();
 }

 Status DavinciModel::CopyInputData(const InputData &input_data, bool device_data) {
  rtMemcpyKind_t kind = device_data ? RT_MEMCPY_DEVICE_TO_DEVICE : RT_MEMCPY_HOST_TO_DEVICE;
  const std::vector<DataBuffer> &blobs = input_data.blobs;
@@ -2567,7 +2586,7 @@ Status DavinciModel::ReturnResult(uint32_t data_id, const bool rslt_flg, const b
    GELOGD("Reinit cur dynamic dims when getnext sink dynamic.");
    cur_dynamic_dims_.clear();
    cur_dynamic_dims_.resize(shape_of_cur_dynamic_dims_);
    auto ret = rtMemcpy(cur_dynamic_dims_.data(), shape_of_cur_dynamic_dims_ * sizeof(int64_t),
    auto ret = rtMemcpy(cur_dynamic_dims_.data(), shape_of_cur_dynamic_dims_ * sizeof(int32_t),
                        netoutput_last_input_addr_, netoutput_last_input_size_, RT_MEMCPY_DEVICE_TO_HOST);
    GE_CHK_RT_RET(ret);
  }
@@ -2668,11 +2687,11 @@ void *DavinciModel::Run(DavinciModel *model) {
      GE_IF_BOOL_EXEC(current_data.blobs.empty(), break);
      auto shape_data_buffer_data = current_data.blobs.back().data;
      auto shape_data_buffer_length = current_data.blobs.back().length;
      model->cur_dynamic_dims_.assign(reinterpret_cast<int64_t *>(shape_data_buffer_data),
                                      reinterpret_cast<int64_t *>(shape_data_buffer_data) +
                                      shape_data_buffer_length / sizeof(int64_t));
      model->cur_dynamic_dims_.assign(reinterpret_cast<int32_t *>(shape_data_buffer_data),
                                      reinterpret_cast<int32_t *>(shape_data_buffer_data) +
                                      shape_data_buffer_length / sizeof(int32_t));
      GELOGD("Data: cur dynamic dims is %s", formats::JoinToString(model->cur_dynamic_dims_).c_str());
      delete[] reinterpret_cast<int64_t *>(current_data.blobs.back().data);
      delete[] reinterpret_cast<int32_t *>(current_data.blobs.back().data);
      current_data.blobs.pop_back();
    }
    GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), model->SetProfileTime(MODEL_PRE_PROC_END));
@@ -3082,6 +3101,8 @@ Status DavinciModel::DistributeTask() {
    task_desc_info.stream_id = task->GetStreamId();
    task_desc_info.shape_type = "static";
    task_desc_info.cur_iter_num = 0;
    profiler_report_op_info_[task_desc_info.op_name] =
      std::pair<uint32_t, uint32_t>(task_desc_info.task_id, task_desc_info.stream_id);
    task_desc_info_.emplace_back(task_desc_info);
    if (flag) {
      if (task->GetSktTaskID() != 0xFFFFFFFF) {
@@ -3089,6 +3110,8 @@ Status DavinciModel::DistributeTask() {
        string op_name = "super_kernel_" + to_string(task_index);
        task_desc_info.op_name = op_name;
        task_desc_info.task_id = task->GetSktTaskID();
        profiler_report_op_info_[task_desc_info.op_name] =
          std::pair<uint32_t, uint32_t>(task_desc_info.task_id, task_desc_info.stream_id);
        task_desc_info_.emplace_back(task_desc_info);
      }
    }
@@ -3960,7 +3983,15 @@ Status DavinciModel::GetComputeGraphInfo(vector<ComputeGraphDescInfo> &graph_des
    compute_graph_info.output_format = op_desc.output_format;
    compute_graph_info.output_shape = op_desc.output_shape;
    compute_graph_info.output_data_type = op_desc.output_data_type;

    uint32_t task_id = 0;
    uint32_t stream_id = 0;
    auto iter = profiler_report_op_info_.find(op_desc.op_name);
    if (iter != profiler_report_op_info_.end()) {
      task_id = iter->second.first;
      stream_id = iter->second.second;
    }
    compute_graph_info.task_id = task_id;
    compute_graph_info.stream_id = stream_id;
    graph_desc_info.emplace_back(compute_graph_info);
  }
  return SUCCESS;
@@ -3973,25 +4004,45 @@ void DavinciModel::SetTotalFixedAddrsSize(string tensor_name, int64_t fix_addr_s
  }
 }

 Status DavinciModel::GetOrigInputInfo(uint32_t index, OriginInputInfo &orig_input_info) {
  GE_CHK_BOOL_RET_STATUS(index < data_op_list_.size(), PARAM_INVALID, "Index %u is invalid.", index);
  OpDescPtr data_op = data_op_list_[index];
  if (!data_op->HasAttr(ATTR_NAME_AIPP_INPUTS) || !data_op->HasAttr(ATTR_NAME_AIPP_OUTPUTS)) {
    GELOGE(ACL_ERROR_GE_AIPP_NOT_EXIST, "GetOrigInputInfo: there is not AIPP related with index %u.", index);
    return ACL_ERROR_GE_AIPP_NOT_EXIST;
 Status DavinciModel::InitOrigInputInfo(uint32_t index, const OpDescPtr &op_desc) {
  if (!op_desc->HasAttr(ATTR_NAME_AIPP_INPUTS) || !op_desc->HasAttr(ATTR_NAME_AIPP_OUTPUTS)) {
    GELOGI("there is not AIPP related with index %u, node: %s.", index, op_desc->GetName().c_str());
    return SUCCESS;
  }

  vector<std::string> inputs;
  if (AttrUtils::GetListStr(data_op, ATTR_NAME_AIPP_INPUTS, inputs) && !inputs.empty()) {
  vector<string> inputs;
  if (AttrUtils::GetListStr(op_desc, ATTR_NAME_AIPP_INPUTS, inputs) && !inputs.empty()) {
    std::string input = inputs[kAippOriginInputIndex];
    GELOGI("GetOrigInputInfo: origin input str: %s", input.c_str());
    GELOGI("origin input str: %s", input.c_str());
    std::vector<std::string> infos = ge::StringUtils::Split(input, ':');
    if (infos.size() != kAippInfoNum) {
      GELOGW("origin input str is invalid.");
      GELOGE(ACL_ERROR_GE_AIPP_MODE_INVALID, "origin input str is invalid[%zu, %u].", infos.size(), kAippInfoNum);
      return ACL_ERROR_GE_AIPP_MODE_INVALID;
    }
    orig_input_info.format = TypeUtils::SerialStringToFormat(infos[kAippInfoFormat]);
    orig_input_info.data_type = TypeUtils::SerialStringToDataType(infos[kAippInfoDataType]);
    orig_input_info.dim_num = std::strtol(infos[kAippInfoDimNum].c_str(), nullptr, kDecimal);

    OriginInputInfo input_info;
    input_info.format = TypeUtils::SerialStringToFormat(infos[kAippInfoFormat]);
    input_info.data_type = TypeUtils::SerialStringToDataType(infos[kAippInfoDataType]);
    input_info.dim_num = std::strtol(infos[kAippInfoDimNum].c_str(), nullptr, kDecimal);
    orig_input_info_[index] = input_info;
  } else {
    OriginInputInfo input_info = { FORMAT_RESERVED, DT_UNDEFINED, 0 };
    orig_input_info_[index] = input_info;
  }

  return SUCCESS;
 }

 Status DavinciModel::GetOrigInputInfo(uint32_t index, OriginInputInfo &orig_input_info) const {
  const auto it = orig_input_info_.find(index);
  if (it == orig_input_info_.end()) {
    GELOGE(ACL_ERROR_GE_AIPP_NOT_EXIST, "there is not AIPP related with index %u.", index);
    return ACL_ERROR_GE_AIPP_NOT_EXIST;
  }

  const OriginInputInfo &input_info = it->second;
  if (input_info.format != FORMAT_RESERVED || input_info.data_type != DT_UNDEFINED) {
    orig_input_info = input_info;
  }

  return SUCCESS;
@@ -4001,7 +4052,8 @@ void DavinciModel::ParseAIPPInfo(std::string in_out_info, InputOutputDims &dims_
  GELOGI("ParseAIPPInfo: origin str: %s", in_out_info.c_str());
  std::vector<std::string> infos = ge::StringUtils::Split(in_out_info, ':');
  if (infos.size() != kAippInfoNum) {
    GELOGW("origin input str is invalid.");
    GELOGE(ACL_ERROR_GE_AIPP_MODE_INVALID, "origin input str is invalid[%zu, %u].", infos.size(), kAippInfoNum);
    return;
  }
  dims_info.name = infos[kAippInfoTensorName];
  dims_info.size = std::strtol(infos[kAippInfoTensorSize].c_str(), nullptr, kDecimal);
@@ -4016,47 +4068,58 @@ void DavinciModel::ParseAIPPInfo(std::string in_out_info, InputOutputDims &dims_
  }
 }

 Status DavinciModel::GetAllAippInputOutputDims(uint32_t index, std::vector<InputOutputDims> &input_dims,
                                               std::vector<InputOutputDims> &output_dims) {
  GE_CHK_BOOL_RET_STATUS(index < data_op_list_.size(), PARAM_INVALID, "Index %u is invalid.", index);
  OpDescPtr data_op = data_op_list_[index];
  if (!data_op->HasAttr(ATTR_NAME_AIPP_INPUTS) || !data_op->HasAttr(ATTR_NAME_AIPP_OUTPUTS)) {
    GELOGE(ACL_ERROR_GE_AIPP_NOT_EXIST, "GetAllAippInputOutputDims: there is not AIPP related with index %u.", index);
    return ACL_ERROR_GE_AIPP_NOT_EXIST;
 Status DavinciModel::InitAippInputOutputDims(uint32_t index, const OpDescPtr &op_desc) {
  if (!op_desc->HasAttr(ATTR_NAME_AIPP_INPUTS) || !op_desc->HasAttr(ATTR_NAME_AIPP_OUTPUTS)) {
    GELOGI("there is not AIPP related with index %u.", index);
    return SUCCESS;
  }

  vector<std::string> inputs;
  if (AttrUtils::GetListStr(data_op, ATTR_NAME_AIPP_INPUTS, inputs) && !inputs.empty()) {
    GELOGI("GetAllAippInputOutputDims: Data: %s has %zu related aippInfo.", data_op->GetName().c_str(), inputs.size());
  vector<string> inputs;
  vector<InputOutputDims> input_dims;
  if (AttrUtils::GetListStr(op_desc, ATTR_NAME_AIPP_INPUTS, inputs) && !inputs.empty()) {
    GELOGI("Data: %s has %zu related aippInfo.", op_desc->GetName().c_str(), inputs.size());
    for (auto it : inputs) {
      InputOutputDims input_info;
      ParseAIPPInfo(it, input_info);
      input_dims.emplace_back(input_info);
      GELOGD("GetAllAippInputOutputDims Aipp origin input dims info: %s", it.c_str());
      GELOGD("Aipp origin input dims info: %s", it.c_str());

      ConstGeTensorDescPtr data_input_desc = data_op->GetInputDescPtr(kDataIndex);
      ConstGeTensorDescPtr data_input_desc = op_desc->GetInputDescPtr(kDataIndex);
      int64_t data_input_size;
      (void)TensorUtils::GetSize(*(data_op->GetInputDescPtr(kDataIndex)), data_input_size);
      GELOGD(
          "GetAllAippInputOutputDims related Data[%d]: tensor_name is %s, dim_num is %zu, tensor_size: %zu, format: "
          "%s, data_type: %s, shape: %s .",
          index, data_op->GetName().c_str(), data_input_desc->GetShape().GetDimNum(), data_input_size,
          TypeUtils::FormatToSerialString(data_input_desc->GetFormat()).c_str(),
          TypeUtils::DataTypeToSerialString(data_input_desc->GetDataType()).c_str(),
          formats::JoinToString(data_input_desc->GetShape().GetDims()).c_str());
      (void)TensorUtils::GetSize(*(op_desc->GetInputDescPtr(kDataIndex)), data_input_size);
      GELOGD("related Data[%d]: tensor_name: %s, dim_num: %zu, tensor_size: %zu, format: %s, data_type: %s, shape: %s",
        index, op_desc->GetName().c_str(), data_input_desc->GetShape().GetDimNum(), data_input_size,
        TypeUtils::FormatToSerialString(data_input_desc->GetFormat()).c_str(),
        TypeUtils::DataTypeToSerialString(data_input_desc->GetDataType()).c_str(),
        formats::JoinToString(data_input_desc->GetShape().GetDims()).c_str());
    }
  }

  vector<std::string> outputs;
  if (AttrUtils::GetListStr(data_op, ATTR_NAME_AIPP_OUTPUTS, outputs) && !outputs.empty()) {
  vector<string> outputs;
  vector<InputOutputDims> output_dims;
  if (AttrUtils::GetListStr(op_desc, ATTR_NAME_AIPP_OUTPUTS, outputs) && !outputs.empty()) {
    for (auto it : outputs) {
      InputOutputDims output_info;
      ParseAIPPInfo(it, output_info);
      output_dims.emplace_back(output_info);
      GELOGD("GetAllAippInputOutputDims Aipp output dims info: %s", it.c_str());
      GELOGD("Aipp output dims info: %s", it.c_str());
    }
  }

  aipp_dims_info_[index] = { input_dims, input_dims };
  return SUCCESS;
 }

 Status DavinciModel::GetAllAippInputOutputDims(uint32_t index, vector<InputOutputDims> &input_dims,
                                               vector<InputOutputDims> &output_dims) const {
  const auto it = aipp_dims_info_.find(index);
  if (it == aipp_dims_info_.end()) {
    GELOGE(ACL_ERROR_GE_AIPP_NOT_EXIST, "there is not AIPP related with index %u.", index);
    return ACL_ERROR_GE_AIPP_NOT_EXIST;
  }

  input_dims = it->second.first;
  output_dims = it->second.second;
  return SUCCESS;
 }

--- a/ge/graph/load/new_model_manager/davinci_model.h
+++ b/ge/graph/load/new_model_manager/davinci_model.h
@@ -286,13 +286,6 @@ class DavinciModel {
  // Modified from KernelTaskInfo.
  SuperKernelTaskInfo &GetSuperKernelTaskInfo() { return skt_info_; }

  ///
  /// @ingroup ge
  /// @brief get model input and output format
  /// @return ccTensorFormat_t current model input and output format
  ///
  Format GetFormat();

  rtModel_t GetRtModelHandle() const { return rt_model_handle_; }

  rtStream_t GetRtModelStream() const { return rt_model_stream_; }
@@ -326,7 +319,7 @@ class DavinciModel {
  Status GetInputOutputDescInfo(vector<InputOutputDescInfo> &input_desc, vector<InputOutputDescInfo> &output_desc);

  Status GetInputOutputDescInfo(vector<InputOutputDescInfo> &input_desc, vector<InputOutputDescInfo> &output_desc,
                                vector<uint32_t> &inputFormats, vector<uint32_t> &output_formats);
                                vector<uint32_t> &input_formats, vector<uint32_t> &output_formats);

  ///
  /// @ingroup ge
@@ -347,9 +340,9 @@ class DavinciModel {

  void GetUserDesignateShapeOrder(vector<string> &user_input_shape_order) const;

  void GetCurShape(vector<int64_t> &batch_info, int32_t &dynamic_type);
  void GetCurShape(vector<int64_t> &batch_info, int32_t &dynamic_type) const;

  void GetModelAttr(vector<string> &dynamic_output_shape_info);
  void GetModelAttr(vector<string> &dynamic_output_shape_info) const;

  ///
  /// @ingroup ge
@@ -358,9 +351,9 @@ class DavinciModel {
  /// @param [out] aipp_info
  /// @return execute result
  ///
  Status GetAIPPInfo(uint32_t index, AippConfigInfo &aipp_info);
  Status GetAippInfo(uint32_t index, AippConfigInfo &aipp_info) const;

  Status GetAippType(uint32_t index, InputAippType &type, size_t &aipp_index);
  Status GetAippType(uint32_t index, InputAippType &type, size_t &aipp_index) const;

  ///
  /// @ingroup ge
@@ -378,17 +371,6 @@ class DavinciModel {
  ///
  void GetUniqueId(const OpDescPtr &op_desc, string &unique_identification);

  ///
  /// @ingroup ge
  /// @brief get model input and output desc for zero copy
  /// @param [out] input_shape  model input size
  /// @param [out] output_shape model output size
  /// @return execute result
  ///
  Status GetInputOutputDescInfoForZeroCopy(vector<InputOutputDescInfo> &input_desc,
                                           vector<InputOutputDescInfo> &output_desc,
                                           vector<uint32_t> &inputFormats, vector<uint32_t> &output_formats);

  Status ReturnResult(uint32_t data_id, const bool rslt_flg, const bool seq_end_flg, OutputData *output_data);

  Status ReturnNoOutput(uint32_t data_id);
@@ -538,9 +520,9 @@ class DavinciModel {
  Status UpdateKnownZeroCopyAddr(vector<void *> &total_io_addrs, bool update_args = true);
  void SetKnownNodeAddrNotChanged(bool base_addr_not_changed) { base_addr_not_changed_ = base_addr_not_changed; }

  Status GetOrigInputInfo(uint32_t index, OriginInputInfo &orig_input_info);
  Status GetOrigInputInfo(uint32_t index, OriginInputInfo &orig_input_info) const;
  Status GetAllAippInputOutputDims(uint32_t index, vector<InputOutputDims> &input_dims,
                                   vector<InputOutputDims> &output_dims);
                                   vector<InputOutputDims> &output_dims) const;
  void SetModelDescVersion(bool is_new_model_desc) { is_new_model_desc_ = is_new_model_desc; }
  // om file name
  void SetOmName(string om_name) { om_name_ = om_name; }
@@ -626,7 +608,7 @@ class DavinciModel {
  void SetInputDimsInfo(const vector<int64_t> &model_input_dims, Format &format, InputOutputDescInfo &input);

  Status GetInputDescInfo(vector<InputOutputDescInfo> &input_desc, vector<uint32_t> &input_formats);
  Status GetOutputDescInfo(vector<InputOutputDescInfo> &output_desc, vector<uint32_t> &output_formats);
  Status GetOutputDescInfo(vector<InputOutputDescInfo> &output_desc, vector<uint32_t> &output_formats) const;

  Status InitTaskInfo(domi::ModelTaskDef &modelTaskInfo);

@@ -688,7 +670,7 @@ class DavinciModel {
  /// @param [in] output_op_list: list of NetOutput op.
  /// @return Status
  ///
  Status OptInputOutputInfo(const map<uint32_t, OpDescPtr> &data_by_index, const vector<OpDescPtr> &output_op_list);
  Status GenInputOutputInfo(const map<uint32_t, OpDescPtr> &data_by_index, const vector<OpDescPtr> &output_op_list);

  ///
  /// @ingroup ge
@@ -856,19 +838,26 @@ class DavinciModel {
  Status InitOutputTensorInfo(const OpDescPtr &op_desc);
  Status GenOutputTensorInfo(OutputData *output_data, vector<OutputTensorInfo> &outputs);

  Status InitOutputDescInfo(const vector<OpDescPtr> &output_op_list,
                            vector<InputOutputDescInfo> &output_desc, vector<uint32_t> &formats);
  Status InitInputDescInfo(const map<uint32_t, OpDescPtr> &data_by_index);
  Status InitOutputDescInfo(const vector<OpDescPtr> &output_op_list);

  Status InitOrigInputInfo(uint32_t index, const OpDescPtr &op_desc);
  Status InitAippInfo(uint32_t index, const OpDescPtr &op_desc);
  Status InitAippType(uint32_t index, const OpDescPtr &op_desc, const map<uint32_t, OpDescPtr> &data_list);
  Status InitAippInputOutputDims(uint32_t index, const OpDescPtr &op_desc);

  void ParseAIPPInfo(string in_out_info, InputOutputDims &dims_info);
  void SetLabelForDynamic(const NodePtr &node);

  void ParseDynamicOutShape(const vector<string> &str_info, vector<vector<int64_t>> &vec_info);
  bool IsGetNextSinkDynamic(const OpDescPtr &op_desc);

  Status InitRealSizeAndShapeInfo(const ComputeGraphPtr &compute_graph, const NodePtr &node);
  void GetAllGearsInfo(const NodePtr &node);
  Status GetGetDynamicDimsNodeInfo(const NodePtr &node);
  Status GetGearAndRealOutSizeInfo(size_t input_count, const NodePtr &node);
  Status GetRealOutputSizeOfMerge(size_t input_index, const NodePtr &merge_node);
  Status GetGearAndRealOutShapeInfo(size_t input_count, const OpDescPtr &op_desc);
  Status GetGearAndRealOutSizeInfo(const ComputeGraphPtr &graph, const NodePtr &node);
  Status GetRealOutputSizeOfCase(const ComputeGraphPtr &graph, size_t input_index, const NodePtr &case_node);
  Status GetGearAndRealOutShapeInfo(const ComputeGraphPtr &graph, const NodePtr &node);

  bool is_weight_mem_has_inited_;
  bool is_feature_map_mem_has_inited_;
@@ -888,9 +877,6 @@ class DavinciModel {

  map<uint32_t, OpDescPtr> op_list_;  // release after DavinciModel::Init

  // data op_desc
  vector<OpDescPtr> data_op_list_;

  vector<OpDescPtr> variable_op_list_;

  map<uint32_t, ZeroCopyOffset> new_input_data_info_;
@@ -976,6 +962,8 @@ class DavinciModel {
  // for profiling task and graph info
  vector<TaskDescInfo> task_desc_info_;

  std::map<std::string, std::pair<uint32_t, uint32_t>> profiler_report_op_info_;

  int64_t maxDumpOpNum_;
  // for data dump
  DataDumper data_dumper_;
@@ -1021,15 +1009,15 @@ class DavinciModel {
  bool is_new_model_desc_{false};
  bool is_online_infer_dynamic_ = false;
  bool is_getnext_sink_dynamic_ = false;
  vector<int64_t> cur_dynamic_dims_;
  vector<int32_t> cur_dynamic_dims_;
  void *netoutput_last_input_addr_ = nullptr;
  int64_t netoutput_last_input_size_ = 0;
  size_t shape_of_cur_dynamic_dims_ = 0;
  // key: input_index: input is merge node; value: each gear info and each output size
  map<size_t, map<vector<int64_t>, int64_t>> merge_nodes_gear_and_real_out_size_info_;
  map<size_t, map<vector<int32_t>, int64_t>> merge_nodes_gear_and_real_out_size_info_;
  // key: input_index: input is merge node; value: each gear info and each output shape
  map<size_t, map<vector<int64_t>, vector<int64_t>>> merge_nodes_gear_and_real_out_shape_info_;
  vector<vector<int64_t>> all_gears_info_;
  map<size_t, map<vector<int32_t>, vector<int64_t>>> merge_nodes_gear_and_real_out_shape_info_;
  vector<vector<int32_t>> all_gears_info_;

  multimap<uint32_t, uint32_t> op_id_map_;
  vector<ProfileInfo> profile_list_;
@@ -1046,6 +1034,13 @@ class DavinciModel {
  vector<int64_t> output_buffer_size_;
  vector<GeShape> output_shape_info_;

  map<uint32_t, OriginInputInfo> orig_input_info_;
  map<uint32_t, AippConfigInfo> aipp_info_list_;
  map<uint32_t, pair<InputAippType, size_t>> aipp_type_list_;
  map<uint32_t, pair<vector<InputOutputDims>, vector<InputOutputDims>>> aipp_dims_info_;

  vector<InputOutputDescInfo> input_descs_;
  vector<uint32_t> input_formats_;
  vector<InputOutputDescInfo> output_descs_;
  vector<uint32_t> output_formats_;
 };
--- a/ge/graph/load/new_model_manager/davinci_model_parser.cc
+++ b/ge/graph/load/new_model_manager/davinci_model_parser.cc
@@ -16,82 +16,7 @@

 #include "graph/load/new_model_manager/davinci_model_parser.h"

 #include <fstream>
 #include <memory>
 #include <vector>
 #include "securec.h"

 #include "common/debug/log.h"
 #include "graph/load/new_model_manager/davinci_model.h"

 namespace ge {
 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelInfoParser(const ModelData &model, ModelInfo &model_info) {
  GE_CHK_RT_RET(rtSetDevice(0));
  try {
    uint32_t model_len = 0;
    uint8_t *model_data = nullptr;

    Status ret = DavinciModelParser::ParseModelContent(model, model_data, model_len);

    GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, GE_CHK_RT(rtDeviceReset(0)); return ret, "Parse model failed");

    auto *file_header = reinterpret_cast<ModelFileHeader *>(model.model_data);

    GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(file_header == nullptr, GE_CHK_RT(rtDeviceReset(0));
                                   return PARAM_INVALID, "file_header is null.");

    model_info.version = file_header->version;
    model_info.is_encrypt = false;
    GE_IF_BOOL_EXEC(ENCRYPTED == file_header->is_encrypt, model_info.is_encrypt = true);

    std::shared_ptr<DavinciModel> davinci_model =
      std::shared_ptr<DavinciModel>(new (std::nothrow) DavinciModel(model.priority, nullptr));

    GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(davinci_model == nullptr, GE_CHK_RT(rtDeviceReset(0));
                                   return PARAM_INVALID, "davinci_model is null.");

    GE_MAKE_GUARD(davinci_model, [&] { davinci_model = nullptr; });

    ModelHelper model_helper;
    ret = model_helper.LoadModel(model);
    GE_CHK_BOOL_TRUE_EXEC_WITH_LOG((ret != SUCCESS), GE_CHK_RT(rtDeviceReset(0)); return FAILED, "load model failed");

    ret = davinci_model->Assign(model_helper.GetGeModel());
    GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, GE_CHK_RT(rtDeviceReset(0));
                                   return ret, "Parse davinci model data failed");

    ret = davinci_model->Init();

    GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, GE_CHK_RT(rtDeviceReset(0));
                                   return ret, "Davinci model init failed");

    vector<InputOutputDescInfo> input_list;
    vector<InputOutputDescInfo> output_list;

    ret = davinci_model->GetInputOutputDescInfo(input_list, output_list);

    GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, GE_CHK_RT(rtDeviceReset(0));
                                   return ret, "Davinci model GetInputOutputDescInfo failed");

    for (const auto &desc : input_list) {
      model_info.input_desc.push_back(desc.shape_info);
    }
    for (const auto &desc : output_list) {
      model_info.output_desc.push_back(desc.shape_info);
    }

    model_info.name = davinci_model->Name();
  } catch (...) {
    DOMI_LOGE("OM model parser failed, some exceptions occur !");
    GE_CHK_RT(rtDeviceReset(0));
    return FAILED;
  }

  GE_CHK_RT(rtDeviceReset(0));

  return SUCCESS;
 }

 DavinciModelParser::DavinciModelParser() {}

 DavinciModelParser::~DavinciModelParser() {}
--- a/ge/graph/load/new_model_manager/model_manager.cc
+++ b/ge/graph/load/new_model_manager/model_manager.cc
@@ -460,8 +460,8 @@ Status ModelManager::DataInput(const InputData &input_data, OutputData &output_d

 Status ModelManager::GetCurDynamicDims(const vector<vector<int64_t>> &user_real_input_dims,
                                       const vector<pair<string, vector<int64_t>>> &user_input_dims,
                                       vector<int64_t> &cur_dynamic_dims) {
  GELOGD(" Start get cur dynamic dims.");
                                       vector<int32_t> &cur_dynamic_dims) {
  GELOGD("Start get cur dynamic dims.");
  if (user_real_input_dims.size() != user_input_dims.size()) {
    GELOGE(INTERNAL_ERROR,
           "The input count of user: %zu should be equal to the data count of graph: %zu",
@@ -478,7 +478,7 @@ Status ModelManager::GetCurDynamicDims(const vector<vector<int64_t>> &user_real_
    }
    for (size_t j = 0; j < user_input_dims.at(i).second.size(); ++j) {
      if (user_input_dims.at(i).second.at(j) < 0) {
        cur_dynamic_dims.emplace_back(user_real_input_dims[i][j]);
        cur_dynamic_dims.emplace_back(static_cast<int32_t>(user_real_input_dims[i][j]));
      }
    }
  }
@@ -523,7 +523,7 @@ Status ModelManager::DataInputTensor(uint32_t model_id, const std::vector<InputT
    input_data.blobs.push_back(data);
  }
  if (!GetLocalOmgContext().user_input_dims.empty() && GetLocalOmgContext().need_multi_batch) {
    std::vector<int64_t> cur_dynamic_dims;
    std::vector<int32_t> cur_dynamic_dims;
    if (!GetLocalOmgContext().user_real_input_dims.empty()) {
      if (GetCurDynamicDims(GetLocalOmgContext().user_real_input_dims, GetLocalOmgContext().user_input_dims,
                            cur_dynamic_dims) != SUCCESS) {
@@ -531,9 +531,9 @@ Status ModelManager::DataInputTensor(uint32_t model_id, const std::vector<InputT
        return INTERNAL_ERROR;
      }
      DataBuffer data;
      data.data = new(std::nothrow) int64_t[cur_dynamic_dims.size()];
      data.data = new(std::nothrow) int32_t[cur_dynamic_dims.size()];
      GE_CHECK_NOTNULL(data.data);
      uint64_t length = static_cast<uint64_t>(cur_dynamic_dims.size() * sizeof(int64_t));
      uint32_t length = static_cast<uint32_t>(cur_dynamic_dims.size() * sizeof(int32_t));
      GE_CHK_BOOL_EXEC(memcpy_s(data.data, length, cur_dynamic_dims.data(), length) == EOK, return INTERNAL_ERROR,
                       "Failed to memcpy data.");
      data.length = length;
@@ -995,16 +995,6 @@ Status ModelManager::GetModelAttr(uint32_t model_id, std::vector<string> &dynami
  return SUCCESS;
 }

 Status ModelManager::GetInputOutputDescInfoForZeroCopy(const uint32_t model_id, vector<InputOutputDescInfo> &input_desc,
                                                       vector<InputOutputDescInfo> &output_desc,
                                                       std::vector<uint32_t> &inputFormats,
                                                       std::vector<uint32_t> &outputFormats) {
  std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id);
  GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, ACL_ERROR_GE_EXEC_MODEL_ID_INVALID,
      "GetInputOutputDescInfo Failed, Invalid model id %u!", model_id);
  return davinci_model->GetInputOutputDescInfoForZeroCopy(input_desc, output_desc, inputFormats, outputFormats);
 }

 ///
 /// @ingroup ge
 /// @brief Get AIPP info
@@ -1013,11 +1003,11 @@ Status ModelManager::GetInputOutputDescInfoForZeroCopy(const uint32_t model_id,
 /// @param [out] aipp_info
 /// @return execute result
 ///
 Status ModelManager::GetAIPPInfo(const uint32_t model_id, uint32_t index, AippConfigInfo &aipp_info) {
 Status ModelManager::GetAippInfo(const uint32_t model_id, uint32_t index, AippConfigInfo &aipp_info) {
  std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id);
  GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, ACL_ERROR_GE_EXEC_MODEL_ID_INVALID,
      "GetAIPPInfo failed, invalid model_id is %u.", model_id);
  return davinci_model->GetAIPPInfo(index, aipp_info);
  return davinci_model->GetAippInfo(index, aipp_info);
 }

 Status ModelManager::GetAippType(uint32_t model_id, uint32_t index, InputAippType &type, size_t &aipp_index) {
@@ -1568,6 +1558,7 @@ Status ModelManager::LaunchKernelCheckAicpuOp(std::vector<std::string> &aicpu_op
      GE_CHK_RT(rtFree(mem));
    }
  };
  GE_MAKE_GUARD(release, callback);
  // malloc sysOpInfoList in SysOpCheckInfo
  status = rtMalloc(&d_req_op_list, op_nums * sizeof(SysOpInfo), RT_MEMORY_HBM);
  if (status != RT_ERROR_NONE) {
@@ -1580,7 +1571,6 @@ Status ModelManager::LaunchKernelCheckAicpuOp(std::vector<std::string> &aicpu_op
  status = rtMalloc(&d_res_op_list, op_nums * sizeof(SysOpInfo), RT_MEMORY_HBM);
  if (status != RT_ERROR_NONE) {
    GELOGE(RT_FAILED, "Call rt failed, status: 0x%x", status);
    GE_MAKE_GUARD(release, callback);
    return RT_ERROR_TO_GE_STATUS(status);
  }
  allocated_mem.push_back(d_res_op_list);
@@ -1589,7 +1579,6 @@ Status ModelManager::LaunchKernelCheckAicpuOp(std::vector<std::string> &aicpu_op
  status = rtMalloc(&d_ret_code_list, op_nums * sizeof(ReturnCode), RT_MEMORY_HBM);
  if (status != RT_ERROR_NONE) {
    GELOGE(RT_FAILED, "Call rt failed, status: 0x%x", status);
    GE_MAKE_GUARD(release, callback);
    return RT_ERROR_TO_GE_STATUS(status);
  }
  allocated_mem.push_back(d_ret_code_list);
@@ -1601,7 +1590,6 @@ Status ModelManager::LaunchKernelCheckAicpuOp(std::vector<std::string> &aicpu_op
    status = rtMalloc(&d_op_type_name, op_type.length(), RT_MEMORY_HBM);
    if (status != RT_ERROR_NONE) {
      GELOGE(RT_FAILED, "Call rt failed, status: 0x%x", status);
      GE_MAKE_GUARD(release, callback);
      return RT_ERROR_TO_GE_STATUS(status);
    }
    allocated_mem.push_back(d_op_type_name);
@@ -1619,7 +1607,6 @@ Status ModelManager::LaunchKernelCheckAicpuOp(std::vector<std::string> &aicpu_op
    status = rtMalloc(&d_op_type_name, op_type.size(), RT_MEMORY_HBM);
    if (status != RT_ERROR_NONE) {
      GELOGE(RT_FAILED, "Call rt failed, status: 0x%x", status);
      GE_MAKE_GUARD(release, callback);
      return RT_ERROR_TO_GE_STATUS(status);
    }
    allocated_mem.push_back(d_op_type_name);
@@ -1648,7 +1635,6 @@ Status ModelManager::LaunchKernelCheckAicpuOp(std::vector<std::string> &aicpu_op
  status = rtMalloc(&args, args_size, RT_MEMORY_HBM);
  if (status != RT_ERROR_NONE) {
    GELOGE(RT_FAILED, "Call rt failed, status: 0x%x", status);
    GE_MAKE_GUARD(release, callback);
    return RT_ERROR_TO_GE_STATUS(status);
  }
  allocated_mem.push_back(args);
@@ -1664,7 +1650,6 @@ Status ModelManager::LaunchKernelCheckAicpuOp(std::vector<std::string> &aicpu_op
  status = rtStreamSynchronize(stream);
  if (status != RT_ERROR_NONE) {
    GELOGE(RT_FAILED, "Call rt stream sync failed, status: 0x%x", status);
    GE_MAKE_GUARD(release, callback);
    GE_CHK_RT(rtStreamDestroy(stream));
    return RT_ERROR_TO_GE_STATUS(status);
  }
@@ -1679,7 +1664,6 @@ Status ModelManager::LaunchKernelCheckAicpuOp(std::vector<std::string> &aicpu_op

  if (op_check_info_res.isWithoutJson) {
    GELOGI("No need to check aicpu in this scenoria.");
    GE_MAKE_GUARD(release, callback);
    GE_CHK_RT(rtStreamDestroy(stream));
    return SUCCESS;
  }
@@ -1698,7 +1682,6 @@ Status ModelManager::LaunchKernelCheckAicpuOp(std::vector<std::string> &aicpu_op
                       sizeof(SysOpInfo) * res_op_nums, RT_MEMCPY_DEVICE_TO_HOST));
    if (res_ret_code_list.size() != res_aicpu_op_info_list.size() || res_ret_code_list.size() != res_op_nums) {
      GELOGE(FAILED, "Number of retcode is not equal to number of op type.");
      GE_MAKE_GUARD(release, callback);
      GE_CHK_RT(rtStreamDestroy(stream));
      return FAILED;
    }
@@ -1722,12 +1705,10 @@ Status ModelManager::LaunchKernelCheckAicpuOp(std::vector<std::string> &aicpu_op
    }
    fail_reason += "not support.";
    GELOGE(FAILED, "Check aicpu op_type failed. details: %s", fail_reason.c_str());
    GE_MAKE_GUARD(release, callback);
    GE_CHK_RT(rtStreamDestroy(stream));
    return FAILED;
  }

  GE_MAKE_GUARD(release, callback);
  GE_CHK_RT(rtStreamDestroy(stream));
  GELOGI("Cpu kernel launch check optype task success.");
  return SUCCESS;
--- a/ge/graph/load/new_model_manager/model_manager.h
+++ b/ge/graph/load/new_model_manager/model_manager.h
@@ -126,14 +126,14 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelManager {
  ///
  /// @ingroup domi_ome
  /// @brief Get cur_dynamic_dims for all input.
  /// @param [in] vector<vector<uint64_t>> &user_real_input_dims: dims info of all user_inputs.
  /// @param [in] vector<vector<int64_t>> &user_real_input_dims: dims info of all user_inputs.
  /// @param [in] vector<pair<string, vector<int64_t>>> &user_input_dims: key:name. value:dynamic dims from option.
  /// @param [out] vector<uint64_t> &cur_dynamic_dims: real dims gather, where the index of -1.
  /// @param [out] vector<int32_t> &cur_dynamic_dims: real dims gather, where the index of -1.
  /// @return 0: SUCCESS / others: INTERNAL_ERROR
  ///
  Status GetCurDynamicDims(const vector<vector<int64_t>> &user_real_input_dims,
                           const vector<pair<string, vector<int64_t>>> &user_input_dims,
                           vector<int64_t> &cur_dynamic_dims);
                           vector<int32_t> &cur_dynamic_dims);

  ///
  /// @ingroup domi_ome
@@ -239,24 +239,10 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelManager {
  /// @param [out] aipp_info
  /// @return execute result
  ///
  ge::Status GetAIPPInfo(const uint32_t model_id, uint32_t index, AippConfigInfo &aipp_info);
  ge::Status GetAippInfo(const uint32_t model_id, uint32_t index, AippConfigInfo &aipp_info);

  ge::Status GetAippType(uint32_t model_id, uint32_t index, InputAippType &type, size_t &aipp_index);

  ///
  /// @ingroup domi_ome
  /// @brief set model input and output size zero copy
  /// @param [in] model_id  model id
  /// @param [out] input_shape   input tensor
  /// @param [out] output_shape  output tensor
  /// @return SUCCESS          success
  /// @return PARAM_INVALID    parameter invalid
  ///
  ge::Status GetInputOutputDescInfoForZeroCopy(const uint32_t model_id, std::vector<InputOutputDescInfo> &input_desc,
                                               std::vector<InputOutputDescInfo> &output_desc,
                                               std::vector<uint32_t> &inputFormats,
                                               std::vector<uint32_t> &outputFormats);

  ge::Status GetCurShape(const uint32_t model_id, std::vector<int64_t> &batch_info, int32_t &dynamic_type);

  ge::Status GetModelAttr(uint32_t model_id, std::vector<string> &dynamic_output_shape_info);
--- a/ge/graph/load/new_model_manager/task_info/hccl_task_info.cc
+++ b/ge/graph/load/new_model_manager/task_info/hccl_task_info.cc
@@ -145,7 +145,9 @@ Status HcclTaskInfo::SetFollowStream(const ge::ConstOpDescPtr &op_desc, DavinciM
    } else {
      GELOGI("need to reuse follow stream and create new follow stream.");
      size_t created_stream_num = follow_stream_usage.size();
      hccl_stream_list_ = follow_stream_usage;
      for (const auto &stream : follow_stream_usage) {
        hccl_stream_list_.emplace_back(stream);
      }
      ret = CreateStream(hccl_stream_num - created_stream_num, davinci_model, main_stream_id);
      if (ret != SUCCESS) {
        GELOGE(RT_FAILED, "Create hccl stream failed.");
--- a/ge/graph/manager/graph_manager.cc
+++ b/ge/graph/manager/graph_manager.cc
@@ -101,6 +101,7 @@
 #include "graph/common/local_context.h"
 #include "graph/common/omg_util.h"
 #include "common/formats/utils/formats_trans_utils.h"
 #include "register/custom_pass_helper.h"

 namespace {
 const char *const kSummary = "Summary";
@@ -686,7 +687,7 @@ Status GraphManager::PreRunOptimizeOriginalGraph(const GraphNodePtr &graph_node,
  CompilerStages &stages = GetCompilerStages(graph_node->GetGraphId());
  GM_RUN_AND_DUMP_PERF("OptimizeGraphPrepare", stages.optimizer.OptimizeOriginalGraphForQuantize, compute_graph);
  GM_RUN_AND_DUMP_PERF("HandleSummaryOp", stages.optimizer.HandleSummaryOp, compute_graph);
  GM_RUN_AND_DUMP_PERF("Prepare", stages.preparer.PrepareDynShape, graph_node->GetGraph(), inputs, compute_graph,
  GM_RUN_AND_DUMP_PERF("Prepare", stages.preparer.PrepareDynShape, graph_node, inputs, compute_graph,
                       session_id);
  GM_RUN_AND_DUMP_PERF("OptimizeOriginalGraph", stages.optimizer.OptimizeOriginalGraph, compute_graph);

@@ -731,6 +732,9 @@ Status GraphManager::PreRunAfterOptimizeSubGraph(const GraphNodePtr &graph_node,
                                                 GeRootModelPtr &ge_root_model, uint64_t session_id) {
  GE_CHECK_NOTNULL(graph_node);
  GE_CHECK_NOTNULL(compute_graph);

  CompilerStages &stages = GetCompilerStages(graph_node->GetGraphId());
  GM_RUN_AND_DUMP_PERF("OptimizeWholeGraph", stages.optimizer.OptimizeWholeGraph, compute_graph);
  GM_RUN_AND_DUMP_PERF("Optimize2", OptimizeStage2, compute_graph);
  GM_RUN_AND_DUMP_PERF("OptimizeGraphBeforeBuildForRts",
                       GetCompilerStages(graph_node->GetGraphId()).optimizer.OptimizeGraphBeforeBuildForRts,
@@ -765,10 +769,24 @@ Status GraphManager::SetRtContext(rtContext_t rt_context, rtCtxMode_t mode, uint
  return SUCCESS;
 }

 Status GraphManager::RunCustomPass(const GraphNodePtr &graph_node) {
  ConstGraphPtr const_graph = graph_node->GetGraph();
  auto comp_graph = GraphUtils::GetComputeGraph(*const_graph);
  GE_DUMP(comp_graph, "RunCustomPassBegin");

  GE_TIMESTAMP_START(RunCustomPass);
  GraphPtr graph = std::const_pointer_cast<Graph>(const_graph);
  GE_CHK_STATUS_RET(CustomPassHelper::Instance().Run(graph), "Graph[%s] run custom pass fail.",
                    comp_graph->GetName().c_str());
  GE_TIMESTAMP_END(RunCustomPass, "GraphBuilder::RunCustomPass");
  return SUCCESS;
 }

 Status GraphManager::PreRun(const GraphNodePtr &graph_node, const std::vector<GeTensor> &inputs,
                            GeRootModelPtr &ge_root_model, uint64_t session_id) {
  GE_CHECK_NOTNULL(graph_node);
  GE_CHECK_NOTNULL(graph_node->GetGraph());
  GE_CHK_STATUS_RET_NOLOG(RunCustomPass(graph_node));
  auto compute_graph = GraphUtils::GetComputeGraph(*graph_node->GetGraph());
  GE_CHECK_NOTNULL(compute_graph);
  compute_graph->SetSessionID(session_id);
@@ -1172,7 +1190,7 @@ Status GraphManager::BuildGraphForUnregisteredOp(const GraphId &graph_id, const
  auto compute_graph = GraphUtils::GetComputeGraph(*graph_node->GetGraph());
  GE_CHECK_NOTNULL(compute_graph);

  GM_RUN_AND_DUMP_PERF("Prepare", GetCompilerStages(graph_id).preparer.PrepareDynShape, graph_node->GetGraph(), inputs,
  GM_RUN_AND_DUMP_PERF("Prepare", GetCompilerStages(graph_id).preparer.PrepareDynShape, graph_node, inputs,
                       compute_graph, session_id);

  for (auto &node : compute_graph->GetAllNodes()) {
@@ -2762,8 +2780,10 @@ Status GraphManager::ParseInputsDims(const std::vector<InputTensorInfo> &input_t
  if (!GetLocalOmgContext().dynamic_node_type.empty()) {
    vector<NodePtr> data_nodes;
    vector<NodePtr> getnext_nosink_nodes;
    data_nodes = compute_graph_->TryGetExtAttr(kExtAttrDataNodes, data_nodes);
    getnext_nosink_nodes = compute_graph_->TryGetExtAttr(kExtAttrGetNextNoSink, getnext_nosink_nodes);
    data_nodes = GetLocalOmgContext().data_nodes;
    getnext_nosink_nodes = GetLocalOmgContext().getnext_nosink_nodes;
    GELOGD("Data nodes count is %zu, getnext nosink nodes count is %zu.", data_nodes.size(),
           getnext_nosink_nodes.size());
    if (GetLocalOmgContext().dynamic_node_type == DATA) {
      if (getnext_nosink_nodes.empty()) {
        // just data or data+getnext_sink
--- a/ge/graph/manager/graph_manager.h
+++ b/ge/graph/manager/graph_manager.h
@@ -226,6 +226,7 @@ class GraphManager {
  void ParseInputsDimsForData(const std::vector<InputTensorInfo> &input_tensor);
  Status ParseInputsDimsForGetNexNosinkAndData(const vector<NodePtr> &dynamic_nodes,
                                               const std::vector<InputTensorInfo> &input_tensor);
  Status RunCustomPass(const GraphNodePtr &graph_node);
  Status PreRun(const GraphNodePtr &graph_node, const std::vector<GeTensor> &inputs, GeRootModelPtr &ge_root_model,
                uint64_t session_id = INVALID_SESSION_ID);

--- a/ge/graph/optimize/graph_optimize.cc
+++ b/ge/graph/optimize/graph_optimize.cc
@@ -336,4 +336,37 @@ Status GraphOptimize::IdentifyReference(ComputeGraphPtr &compute_graph) {
  }
  return SUCCESS;
 }
 Status GraphOptimize::OptimizeWholeGraph(ComputeGraphPtr &compute_graph) {
  if (compute_graph == nullptr) {
    GELOGE(GE_GRAPH_OPTIMIZE_COMPUTE_GRAPH_NULL, "[OptimizeWholeGraph]: compute_graph is nullptr.");
    return GE_GRAPH_OPTIMIZE_COMPUTE_GRAPH_NULL;
  }

  std::shared_ptr<GELib> instance_ptr = ge::GELib::GetInstance();
  if (instance_ptr == nullptr || !instance_ptr->InitFlag()) {
    GELOGE(GE_CLI_GE_NOT_INITIALIZED, "OptimizeWholeGraph failed.");
    return GE_CLI_GE_NOT_INITIALIZED;
  }

  auto graph_optimizer = instance_ptr->OpsKernelManagerObj().GetAllGraphOptimizerObjsByPriority();
  GELOGI("optimize by opskernel in OptimizeWholeGraph. num of graph_optimizer is %zu.", graph_optimizer.size());
  Status ret = SUCCESS;
  string exclude_core_type = (core_type_ == kVectorCore) ? kAicoreEngine : kVectorEngine;
  GELOGD("[OptimizeWholeGraph]: engine type will exclude: %s", exclude_core_type.c_str());
  if (!graph_optimizer.empty()) {
    for (auto &iter : graph_optimizer) {
      if (iter.first == exclude_core_type || iter.second == nullptr) {
        continue;
      }
      GELOGI("Begin to optimize whole graph by engine %s", iter.first.c_str());
      ret = iter.second->OptimizeWholeGraph(*compute_graph);
      GE_DUMP(compute_graph, "OptimizeWholeGraph" + iter.first);
      if (ret != SUCCESS) {
        GELOGE(ret, "[OptimizeWholeGraph]: graph optimize failed, ret:%u", ret);
        return ret;
      }
    }
  }
  return ret;
 }
 }  // namespace ge
--- a/ge/graph/optimize/graph_optimize.h
+++ b/ge/graph/optimize/graph_optimize.h
@@ -52,6 +52,9 @@ class GraphOptimize {
  // for fe prepare optimize in quantize scene
  Status OptimizeOriginalGraphForQuantize(ComputeGraphPtr &compute_graph);

  // for engine to optimize merged whole graph before ge Optimize2
  Status OptimizeWholeGraph(ComputeGraphPtr &compute_graph);

  // for rts optimize before build to add attr and insert memcpy op
  Status OptimizeGraphBeforeBuildForRts(ComputeGraphPtr &compute_graph);

--- a/ge/graph/passes/common_subexpression_elimination_pass.cc
+++ b/ge/graph/passes/common_subexpression_elimination_pass.cc
@@ -26,6 +26,10 @@

 namespace ge {
 namespace {
 std::set<std::string> un_compute_attrs = {
    {ATTR_NAME_DATA_DUMP_ORIGIN_OP_NAMES},
 };

 std::string GetCseKey(const NodePtr &node) {
  std::stringstream ss;
  ss << node->GetType() << "-data-inputs-";
@@ -49,7 +53,7 @@ std::string GetCseKey(const NodePtr &node) {
    ss << name << "-";
  }

  ss << "attrs-" << AttrUtils::GetAllAttrsStr(node->GetOpDesc());
  ss << "attrs-" << AttrUtils::GetAttrsStrAfterRid(node->GetOpDesc(), un_compute_attrs);

  return ss.str();
 }
--- a/ge/graph/passes/multi_batch_clone_pass.cc
+++ b/ge/graph/passes/multi_batch_clone_pass.cc
@@ -25,31 +25,65 @@
 #include "graph/utils/tensor_utils.h"
 #include "graph/utils/type_utils.h"
 #include "register/op_registry.h"
 #include "graph/common/omg_util.h"

 namespace ge {
 namespace {
 constexpr uint8_t kDataInIndex = 0;
 constexpr uint8_t kDataOutIndex = 0;
 constexpr uint8_t kCaseArgIndex = 1;
 const int kDivisionConst = 2;
 const size_t kNumOfGetnextNode = 1;

 const std::string kMultiBatchCaseNode = "ascend_mbatch_shape_case";
 const std::string kMultiBatchDataNode = "ascend_mbatch_shape_data";
 const std::string kMultiBatchGetDynamicDimsNode = "ascend_mbatch_get_dynamic_dims_node";
 const std::string kMultiBatchConstNode = "ascend_mbatch_shape_const";
 const std::string kMultiBatchMapIndexNode = "ascend_mbatch_shape_mapindex";
 const std::string kMultiBatchNodePostfix = "_ascend_mbatch_batch_";
 const char *const kGetNextName = "IteratorV2";
 }  // namespace

 inline bool IsGetNextType(const NodePtr &node) {
  std::string original_type;
  GE_IF_BOOL_EXEC(GetOriginalType(node, original_type) != SUCCESS,
                  GELOGW("Get original type failed."); return false);
  return (original_type == kGetNextName);
 }

 Status MultiBatchClonePass::Run(ComputeGraphPtr graph) {
  GE_IF_BOOL_EXEC(graph == nullptr, GELOGE(FAILED, "Original graph is nullptr"); return FAILED);
  if (graph->GetParentGraph() != nullptr) {
    GELOGD("Subgraph %s skip the MultiBatchClonePass", graph->GetName().c_str());
    return SUCCESS;
  }

  if (!GetLocalOmgContext().need_multi_batch) {
    GELOGI("No need to process_multi for no_train graph.");
    return SUCCESS;
  }
  std::vector<NodePtr> data_nodes;
  std::vector<NodePtr> getnext_nosink_nodes;
  std::vector<NodePtr> getnext_sink_nodes;
  if (multibatch::CheckSequenceOfOptions(graph, data_nodes, getnext_nosink_nodes, getnext_sink_nodes) != SUCCESS) {
    GELOGE(PARAM_INVALID, "[Train_Dynamic] CheckSequenceOfOptions failed.");
    return PARAM_INVALID;
  }
  if (multibatch::UpdateNameOfInputShape(graph, data_nodes, getnext_nosink_nodes, getnext_sink_nodes) != SUCCESS) {
    GELOGE(PARAM_INVALID, "[Train_Dynamic] UpdateNameForInputShapeOfOption failed.");
    return PARAM_INVALID;
  }
  if (multibatch::DeleteIdentityInsertByAdapter(graph) != SUCCESS) {
    GELOGE(PARAM_INVALID, "[Train_Dynamic] DeleteIdentityInsertByAdapter failed.");
    return PARAM_INVALID;
  }
  if (!multibatch::InitDynamicParams(batch_shapes_)) {
    GELOGD("There is no multi-batch options, no need clone multi-batch graph");
    return SUCCESS;
  }

  if (multibatch::CheckNegativeCountOfOptions(batch_shapes_) != SUCCESS) {
    GELOGE(PARAM_INVALID, "[Train_Dynamic] Input_shape and dynamic_dims should set correct params.");
    return PARAM_INVALID;
  }
  GELOGD("Begin to run Multi-batch clone on graph: %s", graph->GetName().c_str());
  GE_CHK_STATUS_RET(multibatch::CheckDynamicParams(batch_shapes_), "Invalid multi-batch param");
  if (CollectIoNodes(graph) != SUCCESS) {
@@ -66,21 +100,14 @@ Status MultiBatchClonePass::Run(ComputeGraphPtr graph) {

  (void)AttrUtils::GetStr(graph, ATTR_NAME_SESSION_GRAPH_ID, session_graph_id_);
  ComputeGraphPtr branch = MakeShared<ComputeGraph>(graph->GetName());
  if (branch == nullptr) {
    GELOGE(OUT_OF_MEMORY, "Create multi-batch graph failed");
    return OUT_OF_MEMORY;
  }
  GE_IF_BOOL_EXEC(branch == nullptr, GELOGE(OUT_OF_MEMORY, "Create multi batch graph failed"); return OUT_OF_MEMORY);
  (void)AttrUtils::SetStr(branch, ATTR_NAME_SESSION_GRAPH_ID, session_graph_id_);

  graph->InValid();  // Will modify, need topological again.
  graph->Swap(*branch);
  if (CreateRootGraph(graph) != SUCCESS) {
    return FAILED;
  }

  if (CreateSubgraphs(graph, branch) != SUCCESS) {
    return FAILED;
  }
  GE_CHK_STATUS_RET(CreateRootGraph(graph), "Construct root graph failed.");
  GE_CHK_STATUS_RET(CreateOriGraph(branch), "Construct original graph failed.")
  GE_CHK_STATUS_RET(CreateSubgraphs(graph, branch), "Construct subgraph failed.");

  GE_CHK_STATUS_RET(PruneDirectOutput(graph), "Prune direct output failed");
  GELOGD("MultiBatchClonePass Leave");
@@ -95,9 +122,13 @@ Status MultiBatchClonePass::Run(ComputeGraphPtr graph) {
 ///
 Status MultiBatchClonePass::CollectIoNodes(const ComputeGraphPtr &graph) {
  for (const auto &node : graph->GetDirectNode()) {
    if (!GetLocalOmgContext().dynamic_node_type.empty() && IsGetNextType(node)) {
      all_data_nodes_.emplace_back(node);
      GE_CHK_STATUS_RET(InitParamsOfGetNext(node), "Init params of %s failed.", node->GetName().c_str());
    }
    if (node->GetType() == DATA) {
      all_data_nodes_.emplace_back(node);
    } else if (node->GetType() == CONSTANT) {
    } else if (node->GetType() == CONSTANT || node->GetType() == CONSTANTOP) {
      all_const_nodes_.emplace_back(node);
    } else if (node->GetType() == NETOUTPUT) {
      all_output_nodes_.emplace_back(node);
@@ -114,10 +145,16 @@ Status MultiBatchClonePass::CollectIoNodes(const ComputeGraphPtr &graph) {
  }

  int64_t data_index = 0;
  size_t getnext_node_count = 0;
  for (size_t i = 0; i < all_data_nodes_.size(); ++i) {
    if (IsGetNextType(all_data_nodes_[i])) {
      // just one getnext node in graph
      getnext_node_count++;
      continue;
    }
    const auto &op_desc = all_data_nodes_[i]->GetOpDesc();
    if (!AttrUtils::GetInt(op_desc, ATTR_NAME_INDEX, data_index)) {
      (void)AttrUtils::SetInt(op_desc, ATTR_NAME_INDEX, i);
      (void)AttrUtils::SetInt(op_desc, ATTR_NAME_INDEX, i - getnext_node_count);
    }
  }

@@ -133,7 +170,43 @@ Status MultiBatchClonePass::CollectIoNodes(const ComputeGraphPtr &graph) {
          "Remove edge failed");
    }
  }
  GELOGD("Data count is %zu, const count is %zu, getnext count is %zu, output count is %zu, direct out count is %zu.",
         all_data_nodes_.size(), all_const_nodes_.size(), getnext_node_count, all_output_nodes_.size(),
         direct_output_.size());

  return SUCCESS;
 }

 Status MultiBatchClonePass::InitParamsOfGetNext(const NodePtr &node) {
  data_count_from_getnext_ = 0;
  getnext_sink_dynamic_dims_ = false;
  GE_CHECK_NOTNULL(node->GetOpDesc());
  data_count_from_getnext_ = node->GetOpDesc()->GetOutputsSize();
  if (GetLocalOmgContext().dynamic_node_type == GETNEXT) {
    data_count_from_getnext_ = data_count_from_getnext_ / kDivisionConst;
    for (size_t i = 0; i < data_count_from_getnext_; ++i) {
      GeTensorDesc output_desc = node->GetOpDesc()->GetOutputDesc(i);
      GELOGD("The %zu data shape from getnext sink is %s.", i,
             formats::JoinToString(output_desc.GetShape().GetDims()).c_str());
      const auto &dims = output_desc.GetShape().GetDims();
      if (std::all_of(dims.begin(), dims.end(), [](int64_t val) {return val >= 0; })) {
        GELOGD("The %zu data from %s is static.", i, node->GetName().c_str());
      } else {
        getnext_sink_dynamic_dims_ = true;
        GELOGD("Dynamic dims in the pattern of getnext sink.");
      }
    }
  }
  if (node->GetOutControlAnchor() != nullptr) {
    for (const auto &peer_in_control_anchor : node->GetOutControlAnchor()->GetPeerInControlAnchors()) {
      NodePtr next_node = peer_in_control_anchor->GetOwnerNode();
      GE_CHECK_NOTNULL(next_node);
      if (next_node->GetType() == CONSTANTOP) {
        out_control_nodes_.insert(next_node);
        GELOGD("Control edge: %s connect with %s.", node->GetName().c_str(), next_node->GetName().c_str());
      }
    }
  }
  return SUCCESS;
 }

@@ -144,7 +217,11 @@ Status MultiBatchClonePass::CollectIoNodes(const ComputeGraphPtr &graph) {
 /// @return 0: SUCCESS / others: FAILED
 ///
 Status MultiBatchClonePass::CreateRootGraph(const ComputeGraphPtr &graph) {
  GELOGD("Start create root graph of %s.", graph->GetName().c_str());
  uint32_t input_num = all_data_nodes_.size() + all_const_nodes_.size();
  if (data_count_from_getnext_ != 0) {
    input_num = input_num + data_count_from_getnext_ - kNumOfGetnextNode;
  }
  uint32_t output_num = all_output_nodes_[0]->GetAllInDataAnchorsSize();

  OpDescBuilder op_builder(kMultiBatchCaseNode, CASE);
@@ -185,6 +262,10 @@ Status MultiBatchClonePass::CreateRootGraph(const ComputeGraphPtr &graph) {
           op_desc->GetName().c_str());
    return FAILED;
  }
  if (!AttrUtils::SetBool(op_desc, ATTR_INSERT_BY_MBATCH, true)) {
    GELOGE(INTERNAL_ERROR, "Failed to add insert attr on case node %s", op_desc->GetName().c_str());
    return INTERNAL_ERROR;
  }
  GE_CHK_STATUS_RET(multibatch::StampDynamicType(op_desc), "Set dynamic type failed");

  GE_CHK_STATUS_RET(CreateIndexNode(graph), "Create index node failed");
@@ -202,7 +283,7 @@ Status MultiBatchClonePass::CreateRootGraph(const ComputeGraphPtr &graph) {
 /// @param [in] NodePtr node: index data node.
 /// @return 0: SUCCESS / others: FAILED
 ///
 Status MultiBatchClonePass::CreateIndexDataNode(const ComputeGraphPtr &graph, NodePtr &node) {
 Status MultiBatchClonePass::CreateIndexDataNode(const ComputeGraphPtr &graph, NodePtr &shape_node) {
  const OpDescPtr data_desc = MakeShared<OpDesc>(kMultiBatchDataNode, DATA);
  if (data_desc == nullptr) {
    GELOGE(OUT_OF_MEMORY, "Create multi-batch data node failed");
@@ -220,11 +301,12 @@ Status MultiBatchClonePass::CreateIndexDataNode(const ComputeGraphPtr &graph, No
  }

  size_t data_index = all_data_nodes_.size();
  data_index = data_count_from_getnext_ != 0 ? data_index - kNumOfGetnextNode : data_index;
  (void)AttrUtils::SetInt(data_desc, ATTR_NAME_INDEX, data_index);
  (void)AttrUtils::SetBool(data_desc, ATTR_INSERT_BY_MBATCH, true);

  node = graph->AddNode(data_desc);
  if (node == nullptr) {
  shape_node = graph->AddNode(data_desc);
  if (shape_node == nullptr) {
    GELOGE(OUT_OF_MEMORY, "Create multi-batch data node failed");
    return OUT_OF_MEMORY;
  }
@@ -286,15 +368,19 @@ Status MultiBatchClonePass::CreateIndexConstNode(const ComputeGraphPtr &graph, N
 /// @return 0: SUCCESS / others: FAILED
 ///
 Status MultiBatchClonePass::CreateIndexNode(const ComputeGraphPtr &graph) {
  // Data --> MapIndex --> Case
  NodePtr data_node;
  GE_CHK_STATUS_RET(CreateIndexDataNode(graph, data_node), "Create data node failed");
  // Data/GetDynamicDims --> MapIndex --> Case
  if (!getnext_sink_dynamic_dims_) {
    GE_CHK_STATUS_RET(CreateIndexDataNode(graph, shape_node_), "Create data node failed");
  } else {
    GE_CHK_STATUS_RET(CreateGetDynamicDimsNode(graph, shape_node_), "Create get dynamic dims node failed");
  }

  NodePtr const_node;
  GE_CHK_STATUS_RET(CreateIndexConstNode(graph, const_node), "Create const node failed");

  GELOGD("Shape node name is %s, type is %s, const node name is %s.", shape_node_->GetName().c_str(),
         shape_node_->GetType().c_str(), const_node->GetName().c_str());
  OpDescBuilder op_builder(kMultiBatchMapIndexNode, "MapIndex");
  op_builder.AddInput("x", data_node->GetOpDesc()->GetOutputDesc(0))
  op_builder.AddInput("x", shape_node_->GetOpDesc()->GetOutputDesc(0))
      .AddInput("data_seq", const_node->GetOpDesc()->GetOutputDesc(0))
      .AddOutput("y", GeTensorDesc(GeShape(), FORMAT_ND, DT_INT32));

@@ -309,8 +395,10 @@ Status MultiBatchClonePass::CreateIndexNode(const ComputeGraphPtr &graph) {
    return OUT_OF_MEMORY;
  }

  if (GraphUtils::AddEdge(data_node->GetOutDataAnchor(0), index_node->GetInDataAnchor(0)) != GRAPH_SUCCESS) {
    GELOGE(FAILED, "Failed to add edge between node:%s to MapIndex:%s", data_node->GetName().c_str(),
  GE_CHK_STATUS_RET(AddAttrForGetDynamicDims(shape_node_), "Failed to add attr for %s.",
                    shape_node_->GetName().c_str());
  if (GraphUtils::AddEdge(shape_node_->GetOutDataAnchor(0), index_node->GetInDataAnchor(0)) != GRAPH_SUCCESS) {
    GELOGE(FAILED, "Failed to add edge between node:%s to MapIndex:%s", shape_node_->GetName().c_str(),
           index_node->GetName().c_str());
    return FAILED;
  }
@@ -328,6 +416,120 @@ Status MultiBatchClonePass::CreateIndexNode(const ComputeGraphPtr &graph) {
  return SUCCESS;
 }

 Status MultiBatchClonePass::CreateGetDynamicDimsNode(const ComputeGraphPtr &graph, NodePtr &shape_node) {
  const OpDescPtr data_desc = MakeShared<OpDesc>(kMultiBatchGetDynamicDimsNode, GETDYNAMICDIMS);
  if (data_desc == nullptr) {
    GELOGE(OUT_OF_MEMORY, "Create multi-batch get dynamic dims node failed");
    return OUT_OF_MEMORY;
  }

  // input of GetDynamicDims is shape_of_each_data, output is gear_info
  for (size_t i = 0; i < GetLocalOmgContext().user_input_dims.size(); ++i) {
    size_t input_shape_dims = GetLocalOmgContext().user_input_dims.at(i).second.size();
    // add input desc without GeShape for const input, value of input_shape is 1 transferred by adapter
    if (input_shape_dims == 1 && GetLocalOmgContext().user_input_dims.at(i).second.at(0) == 0) {
      GeTensorDesc tensor_desc;
      tensor_desc.SetFormat(FORMAT_ND);
      tensor_desc.SetDataType(DT_INT32);
      auto ret = data_desc->AddInputDesc(tensor_desc);
      GE_IF_BOOL_EXEC(ret != GRAPH_SUCCESS, GELOGE(INTERNAL_ERROR, "Failed to add input desc for created data");
          return FAILED);
      continue;
    }
    GeTensorDesc tensor_desc(GeShape({static_cast<int32_t>(input_shape_dims)}), FORMAT_ND, DT_INT32);
    auto ret = data_desc->AddInputDesc(tensor_desc);
    GE_IF_BOOL_EXEC(ret != GRAPH_SUCCESS, GELOGE(INTERNAL_ERROR, "Failed to add input desc for created data");
        return FAILED);
  }
  GeTensorDesc tensor_desc(GeShape({static_cast<int32_t>(batch_shapes_.at(0).size())}), FORMAT_ND, DT_INT32);
  auto ret = data_desc->AddOutputDesc(tensor_desc);
  GE_IF_BOOL_EXEC(ret != GRAPH_SUCCESS, GELOGE(INTERNAL_ERROR, "Failed to add output desc for created data");
      return FAILED);

  (void)AttrUtils::SetBool(data_desc, ATTR_INSERT_BY_MBATCH, true);

  shape_node = graph->AddNode(data_desc);
  if (shape_node == nullptr) {
    GELOGE(OUT_OF_MEMORY, "Create multi-batch dynamic dims node failed");
    return OUT_OF_MEMORY;
  }
  return SUCCESS;
 }

 Status MultiBatchClonePass::AddAttrForGetDynamicDims(const NodePtr &shape_node) {
  if (!getnext_sink_dynamic_dims_) {
    GELOGD("No need to add attr when not insert get dynamic dims node.");
    return SUCCESS;
  }
  GELOGD("Add attr for :%s, type is %s:", shape_node->GetName().c_str(), shape_node->GetType().c_str());
  if (!AttrUtils::SetInt(shape_node->GetOpDesc(), ATTR_GETNEXT_SINK_DATA_COUNT, data_count_from_getnext_)) {
    GELOGE(INTERNAL_ERROR, "set ATTR_GETNEXT_SINK_DATA_COUNT failed");
    return INTERNAL_ERROR;
  }
  vector<int64_t> shape_info;
  for (size_t i = 0; i < GetLocalOmgContext().user_input_dims.size(); ++i) {
    if (GetLocalOmgContext().user_input_dims.at(i).second.size() == 1 &&
        GetLocalOmgContext().user_input_dims.at(i).second.at(0) == 0) {
      shape_info.emplace_back(0);
      continue;
    }
    shape_info.emplace_back(GetLocalOmgContext().user_input_dims.at(i).second.size());
    for (size_t j = 0; j < GetLocalOmgContext().user_input_dims.at(i).second.size(); ++j) {
      shape_info.emplace_back(GetLocalOmgContext().user_input_dims.at(i).second.at(j));
    }
  }
  if (!AttrUtils::SetListInt(shape_node->GetOpDesc(), ATTR_GETNEXT_SINK_SHAPE_INFO, shape_info)) {
    GELOGE(INTERNAL_ERROR, "set ATTR_GETNEXT_SINK_SHAPE_INFO failed");
    return INTERNAL_ERROR;
  }
  return SUCCESS;
 }

 Status MultiBatchClonePass::LinkGetNextToGetDynamicDims(const NodePtr &getnext_node, const NodePtr &shape_node) {
  GELOGD("Start relink shape anchor of %s to %s.", getnext_node->GetName().c_str(), shape_node->GetName().c_str());
  size_t input_index = 0;
  size_t data_count = getnext_node->GetAllOutDataAnchors().size() / kDivisionConst;
  for (size_t out_index = data_count; out_index < getnext_node->GetAllOutDataAnchors().size(); ++out_index,
      ++input_index) {
    GELOGD("Start add %s of %zu out_anchor to %s of %zu in_anchor.", getnext_node->GetName().c_str(), out_index,
           shape_node->GetName().c_str(), input_index);
    auto out_data_anchor =  getnext_node->GetOutDataAnchor(out_index);
    auto ret = GraphUtils::AddEdge(out_data_anchor, shape_node->GetInDataAnchor(input_index));
    GE_IF_BOOL_EXEC(ret != GRAPH_SUCCESS, GELOGE(INTERNAL_ERROR, "Failed to link getnext %s to getdynamicdims %s",
                                                 getnext_node->GetName().c_str(), shape_node->GetName().c_str());
        return INTERNAL_ERROR);
  }
  return SUCCESS;
 }

 Status MultiBatchClonePass::LinkGetDynamicDimsToNetOutput(const NodePtr &output_node) {
  if (!GetLocalOmgContext().dynamic_node_type.empty()) {
    if (!AttrUtils::SetStr(output_node->GetOpDesc(), ATTR_ALL_GEARS_INFO, GetLocalOmgContext().dynamic_dims)) {
      GELOGE(INTERNAL_ERROR, "Failed to set all gears info attr on netoutput %s.", output_node->GetName().c_str());
      return INTERNAL_ERROR;
    }
  }
  if (getnext_sink_dynamic_dims_) {
    GELOGD("Start link %s to %s.", shape_node_->GetName().c_str(), output_node->GetName().c_str());
    size_t input_index = output_node->GetAllInDataAnchors().size();
    if (NodeUtils::AppendInputAnchor(output_node, input_index + 1) != GRAPH_SUCCESS) {
      GELOGE(INTERNAL_ERROR, "Append input anchor of %s of %zu failed.", output_node->GetName().c_str(), input_index);
      return INTERNAL_ERROR;
    }
    auto ret = GraphUtils::AddEdge(shape_node_->GetOutDataAnchor(kDataOutIndex),
                                   output_node->GetInDataAnchor(input_index));
    GE_IF_BOOL_EXEC(ret != GRAPH_SUCCESS, GELOGE(INTERNAL_ERROR, "Failed to link netoutput %s to getdynamicdims %s",
                                                 output_node->GetName().c_str(), shape_node_->GetName().c_str());
        return INTERNAL_ERROR);
    if (!AttrUtils::SetBool(output_node->GetOpDesc(), ATTR_GETNEXT_SINK_DYNMAIC, true)) {
      GELOGE(INTERNAL_ERROR, "Failed to set getnext sink dynamic attr on netoutput %s.",
             output_node->GetName().c_str());
      return INTERNAL_ERROR;
    }
  }
  return SUCCESS;
 }

 ///
 /// @ingroup ge
 /// @brief Create input node for root graph.
@@ -337,8 +539,10 @@ Status MultiBatchClonePass::CreateIndexNode(const ComputeGraphPtr &graph) {
 Status MultiBatchClonePass::CreateInputNode(const ComputeGraphPtr &graph) {
  // Data --> Case
  std::vector<NodePtr> all_data_nodes;
  const size_t arg_index = kCaseArgIndex;
  for (size_t i = 0; i < all_data_nodes_.size(); ++i) {
  size_t case_input_index = kCaseArgIndex;
  NodePtr getnext_node = nullptr;
  size_t input_index_of_getnext = 0;
  for (size_t i = 0; i < all_data_nodes_.size(); ++i, ++case_input_index) {
    const auto &node = all_data_nodes_[i];
    const OpDescPtr op_desc = AttrUtils::CopyOpDesc(node->GetOpDesc());
    if (op_desc == nullptr) {
@@ -353,22 +557,60 @@ Status MultiBatchClonePass::CreateInputNode(const ComputeGraphPtr &graph) {
    op_desc->SetName(node->GetName());
    const NodePtr &data = graph->AddNode(op_desc);
    GE_CHK_BOOL_EXEC(data != nullptr, return FAILED, "Add node[%s] to graph failed", op_desc->GetName().c_str());
    if (GraphUtils::AddEdge(data->GetOutDataAnchor(0), case_node_->GetInDataAnchor(arg_index + i)) != GRAPH_SUCCESS) {
      GELOGE(FAILED, "Failed to add edge between Data:%s to Case:%s",
             data->GetName().c_str(), case_node_->GetName().c_str());
      return FAILED;
    if (IsGetNextType(node)) {
      getnext_node = data;
      input_index_of_getnext = case_input_index;
      case_input_index = case_input_index + data_count_from_getnext_;
      continue;
    } else {
      if (GraphUtils::AddEdge(data->GetOutDataAnchor(0), case_node_->GetInDataAnchor(case_input_index)) !=
          GRAPH_SUCCESS) {
        GELOGE(FAILED, "Failed to add edge between Data:%s to Case:%s", data->GetName().c_str(),
               case_node_->GetName().c_str());
        return FAILED;
      }
    }

    if (SetMaxShapeToData(data) != SUCCESS) {
    if (SetMaxShape(data) != SUCCESS) {
      GELOGE(FAILED, "Set max shape of %s failed.", data->GetName().c_str());
      return FAILED;
    }
    all_data_nodes.emplace_back(data);
  }
  if (getnext_node != nullptr) {
    if (LinkEdgeForGetNext(getnext_node, input_index_of_getnext) != SUCCESS) {
      GELOGE(FAILED, "Failed to link edge for %s.", getnext_node->GetName().c_str());
      return FAILED;
    }
    if (SetMaxShape(getnext_node) != SUCCESS) {
      GELOGE(FAILED, "Set max shape of %s failed.", getnext_node->GetName().c_str());
      return FAILED;
    }
    all_data_nodes.emplace_back(getnext_node);
  }

  all_data_nodes_.swap(all_data_nodes);
  return SUCCESS;
 }

 Status MultiBatchClonePass::LinkEdgeForGetNext(const NodePtr &getnext_node, size_t &case_input_index) {
  GELOGD("Start link edge for %s, which is the %zu input of %s.", getnext_node->GetName().c_str(),
         case_input_index, case_node_->GetName().c_str());
  for (size_t out_index = 0; out_index < data_count_from_getnext_; ++out_index, ++case_input_index) {
    if (GraphUtils::AddEdge(getnext_node->GetOutDataAnchor(out_index),
                            case_node_->GetInDataAnchor(case_input_index)) != GRAPH_SUCCESS) {
      GELOGE(FAILED, "Failed to add data edge between %zu Data:%s to %zu Case:%s", out_index,
             getnext_node->GetName().c_str(), case_input_index, case_node_->GetName().c_str());
      return FAILED;
    }
  }
  if (getnext_sink_dynamic_dims_) {
    GE_CHK_STATUS_RET(LinkGetNextToGetDynamicDims(getnext_node, shape_node_), "Failed to add link for %s.",
                      shape_node_->GetName().c_str());
  }
  return SUCCESS;
 }

 ///
 /// @ingroup ge
 /// @brief Create Const node for root graph.
@@ -378,7 +620,11 @@ Status MultiBatchClonePass::CreateInputNode(const ComputeGraphPtr &graph) {
 Status MultiBatchClonePass::CreateConstNode(const ComputeGraphPtr &graph) {
  // Const --> Case
  std::vector<NodePtr> all_const_nodes;
  const size_t arg_index = kCaseArgIndex + all_data_nodes_.size();
  size_t arg_index = kCaseArgIndex + all_data_nodes_.size();
  if (data_count_from_getnext_ != 0) {
    arg_index = arg_index + data_count_from_getnext_ - kNumOfGetnextNode;
  }

  for (size_t i = 0; i < all_const_nodes_.size(); ++i) {
    const auto &node = all_const_nodes_[i];
    const OpDescPtr op_desc = AttrUtils::CopyOpDesc(node->GetOpDesc());
@@ -395,15 +641,33 @@ Status MultiBatchClonePass::CreateConstNode(const ComputeGraphPtr &graph) {
    const NodePtr &data = graph->AddNode(op_desc);
    GE_CHK_BOOL_EXEC(data != nullptr, return FAILED, "Add node[%s] to graph failed", op_desc->GetName().c_str());
    if (GraphUtils::AddEdge(data->GetOutDataAnchor(0), case_node_->GetInDataAnchor(arg_index + i)) != GRAPH_SUCCESS) {
      GELOGE(FAILED, "Failed to add edge between Const:%s to Case:%s",
             data->GetName().c_str(), case_node_->GetName().c_str());
      GELOGE(FAILED, "Failed to add edge between Const:%s to Case:%s", data->GetName().c_str(),
             case_node_->GetName().c_str());
      return FAILED;
    }
    all_const_nodes.emplace_back(data);
  }
  ChangeConstToData();
  all_const_nodes_.swap(all_const_nodes);
  return SUCCESS;
 }

 void MultiBatchClonePass::ChangeConstToData() {
  size_t data_index = all_data_nodes_.size();
  if (data_count_from_getnext_ != 0) {
    data_index = data_index + data_count_from_getnext_ - kNumOfGetnextNode;
  }
  for (size_t i = 0; i < all_const_nodes_.size(); ++i, ++data_index) {  // Trans subgraph Const to Data.
    auto &const_node = all_const_nodes_[i];
    bool need_change_type = true;
    if (out_control_nodes_.find(const_node) != out_control_nodes_.end()) {
      GELOGD("No need to change %s to data type.", const_node->GetName().c_str());
      need_change_type = false;
      break;
    }
    if (!need_change_type) {
      continue;
    }
    const OpDescPtr &op_desc = all_const_nodes_[i]->GetOpDesc();
    op_desc->SetType(DATA);
    (void)op_desc->DelAttr(ATTR_NAME_WEIGHTS);  // Delete weight.
@@ -413,9 +677,6 @@ Status MultiBatchClonePass::CreateConstNode(const ComputeGraphPtr &graph) {
    (void)AttrUtils::SetInt(op_desc, ATTR_NAME_INDEX, data_index);
    (void)NodeUtils::AppendInputAnchor(all_const_nodes_[i], 1);
  }

  all_const_nodes_.swap(all_const_nodes);
  return SUCCESS;
 }

 ///
@@ -461,7 +722,8 @@ Status MultiBatchClonePass::CreateOutputNode(const ComputeGraphPtr &graph) {
      }
    }
  }

  GE_CHK_STATUS_RET(LinkGetDynamicDimsToNetOutput(node), "Failed to add edge between %s to netoutput: %s.",
                    shape_node_->GetName().c_str(), output->GetName().c_str());
  all_output_nodes_.clear();
  all_output_nodes_.emplace_back(node);
  return SUCCESS;
@@ -473,34 +735,69 @@ Status MultiBatchClonePass::CreateOutputNode(const ComputeGraphPtr &graph) {
 /// @param [in] const NodePtr &data: data in Root/Case graph.
 /// @return 0: SUCCESS / others: FAILED
 ///
 Status MultiBatchClonePass::SetMaxShapeToData(const NodePtr &data) {
  auto data_shape = NodeUtils::GetOutputDesc(*data, kDataOutIndex).GetShape();
  auto data_name = data->GetName();
 Status MultiBatchClonePass::SetMaxShape(const NodePtr &data) {
  GELOGD("Start set max shape for %s.", data->GetName().c_str());
  if (!IsGetNextType(data)) {
    if (SetMaxShapeToData(data, kDataOutIndex) != SUCCESS) {
      GELOGE(PARAM_INVALID, "Failed to update max shape of %s.", data->GetName().c_str());
      return PARAM_INVALID;
    }
  } else {
    for (size_t out_anchor_index = 0; out_anchor_index < data_count_from_getnext_; ++out_anchor_index) {
      if (SetMaxShapeToData(data, out_anchor_index) != SUCCESS) {
        GELOGE(PARAM_INVALID, "Failed to update max shape of %s.", data->GetName().c_str());
        return PARAM_INVALID;
      }
    }
  }
  return SUCCESS;
 }

 Status MultiBatchClonePass::SetMaxShapeToData(const NodePtr &node, size_t out_anchor_index) {
  GELOGD("Start update max shape of %s, %zu output.", node->GetName().c_str(), out_anchor_index);
  auto data_shape = NodeUtils::GetOutputDesc(*node, out_anchor_index).GetShape();
  string data_name = node->GetName();
  if (IsGetNextType(node)) {
    data_name.append("_").append(std::to_string(out_anchor_index));
  }
  GELOGD("Update max shape of %s, shape dims is %s.", data_name.c_str(),
         formats::JoinToString(data_shape.GetDims()).c_str());
  const auto &dims = data_shape.GetDims();
  if (std::all_of(dims.begin(), dims.end(), [](int64_t val) { return val >= 0; })) {
    return SUCCESS;
  if (!IsGetNextType(node)) {
    if (std::all_of(dims.begin(), dims.end(), [](int64_t val) { return val >= 0; })) {
      GELOGD("No need to do anything for static data.");
      return SUCCESS;
    }
  } else {
    if (std::all_of(dims.begin(), dims.end(), [](int64_t val) { return val >= 0; })) {
      if (getnext_sink_dynamic_dims_) {
        // need to update shape of Shape_node when getnext node has dynamic data
        GE_CHK_STATUS_RET(UpdateShapeOfShapeNode(node, out_anchor_index), "Failed to update shape of shape node");
      }
      return SUCCESS;
    }
  }
  (void)AttrUtils::SetListInt(data->GetOpDesc(), ATTR_MBATCH_ORIGIN_INPUT_DIMS, data_shape.GetDims());
  (void)AttrUtils::SetListInt(node->GetOpDesc(), ATTR_MBATCH_ORIGIN_INPUT_DIMS, data_shape.GetDims());

  GeTensorDesc tensor(NodeUtils::GetOutputDesc(*data, kDataOutIndex));
  GeTensorDesc tensor(NodeUtils::GetOutputDesc(*node, kDataOutIndex));
  std::vector<std::string> input_dims_str;
  for (size_t i = 0; i < batch_shapes_.size(); ++i) {
    auto shape = data_shape;
    auto ret = multibatch::CalcShape(data_to_dynamic_info_.at(data_name).at(i), shape);
    if (ret != SUCCESS) {
      GELOGE(ret, "Failed to calculate the shape for data node %s, the shape may not match", data->GetName().c_str());
      GELOGE(ret, "Failed to calculate the shape for data node %s, the shape may not match", node->GetName().c_str());
      return ret;
    }
    tensor.SetShape(shape);
    int64_t tensor_size = 0;
    (void)TensorUtils::GetTensorSizeInBytes(tensor, tensor_size);
    string input_str = TypeUtils::FormatToSerialString(tensor.GetFormat()) + ":" +
 	               TypeUtils::DataTypeToSerialString(tensor.GetDataType()) + ":" + data->GetName() + ":" +
 	               TypeUtils::DataTypeToSerialString(tensor.GetDataType()) + ":" + node->GetName() + ":" +
 	               std::to_string(tensor_size) + ":" + std::to_string(tensor.GetShape().GetDimNum()) + ":" +
                       formats::JoinToString(tensor.GetShape().GetDims());
    input_dims_str.emplace_back(input_str);
  }
  (void)AttrUtils::SetListStr(data->GetOpDesc(), "_all_origin_gears_inputs", input_dims_str);
  (void)AttrUtils::SetListStr(node->GetOpDesc(), "_all_origin_gears_inputs", input_dims_str);

  size_t max_shape_index = 0;
  int64_t max_size = 0;
@@ -519,18 +816,72 @@ Status MultiBatchClonePass::SetMaxShapeToData(const NodePtr &data) {
      max_shape_index = i;
    }
  }
  return SetShapeToData(data_to_dynamic_info_.at(data_name).at(max_shape_index), node, data_shape, out_anchor_index);
 }

  return SetShapeToData(data_to_dynamic_info_.at(data_name).at(max_shape_index), data, data_shape);
 ///
 /// @ingroup ge
 /// @brief Set max shape to Data/GetNext node in root graph.
 /// @param [in] const std::vector<int64_t> &shapes: dims of shape.
 /// @param [in] const NodePtr &data: data in Root/Case graph.
 /// @param [in] GeShape &data_shape: dims of data node.
 /// @param [in] size_t out_anchor_index: out anchor index of data node.
 /// @return 0: SUCCESS / others: FAILED
 ///
 Status MultiBatchClonePass::SetShapeToData(const std::vector<int64_t> &shapes, const NodePtr &data, GeShape &data_shape,
                                           size_t out_anchor_index) {
  GELOGD("Start set shape to %zu out of %s.", out_anchor_index, data->GetName().c_str());
  if (multibatch::CalcShape(shapes, data_shape) != SUCCESS) {
    GELOGE(INTERNAL_ERROR, "Failed to calculate the batched shape for data node %s, the shapes may not match",
           data->GetName().c_str());
    return INTERNAL_ERROR;
  }

  if (NodeUtils::UpdateOutputShape(*data, out_anchor_index, data_shape) != GRAPH_SUCCESS) {
    GELOGE(INTERNAL_ERROR, "Failed to update output shape for data %s", data->GetName().c_str());
    return INTERNAL_ERROR;
  }
  if (!IsGetNextType(data)) {
    if (NodeUtils::UpdateInputShape(*data, kDataInIndex, data_shape) != GRAPH_SUCCESS) {
      GELOGE(INTERNAL_ERROR, "Failed to update input shape for data %s", data->GetName().c_str());
      return INTERNAL_ERROR;
    }
  } else {
    if (getnext_sink_dynamic_dims_) {
      // need to update shape of Shape_node when getnext_sink_dynamic
      GE_CHK_STATUS_RET(UpdateShapeOfShapeNode(data, out_anchor_index), "Failed to update shape of shape node");
    }
  }

  GELOGI("Update the data %s input/output shape to the max %s", data->GetName().c_str(),
         formats::ShapeToString(data_shape).c_str());
  return SUCCESS;
 }

 Status MultiBatchClonePass::UpdateShapeOfShapeNode(const NodePtr &node, size_t out_anchor_index) {
  GELOGD("Start update output shape of shape node insert by adapter, which is the %zu out of %s.", out_anchor_index,
         node->GetName().c_str());
  auto data_shape = NodeUtils::GetOutputDesc(*node, out_anchor_index).GetShape();
  size_t shape_index = out_anchor_index + (node->GetAllOutDataAnchors().size() / kDivisionConst);
  GeTensorDesc output_desc = node->GetOpDesc()->GetOutputDesc(shape_index);
  std::vector<int64_t> output_dims = {static_cast<int64_t>(data_shape.GetDims().size())};
  GeShape output_shape(output_dims);
  output_desc.SetShape(output_shape);
  if (node->GetOpDesc()->UpdateOutputDesc(shape_index, output_desc) != SUCCESS) {
    GELOGE(FAILED, "Update output desc fail.");
    return FAILED;
  }
  return SUCCESS;
 }

 ///
 /// @ingroup ge
 /// @brief Update Data node in Subgraph.
 /// @param [in] const NodePtr &data: data in Subgraph.
 /// @param [in] size_t index: The batch index.
 /// @param [in] size_t batch_index: The batch index.
 /// @return 0: SUCCESS / others: FAILED
 ///
 Status MultiBatchClonePass::UpdateSubgraphData(const NodePtr &data, size_t index) {
 Status MultiBatchClonePass::UpdateSubgraphData(const NodePtr &data, size_t batch_index) {
  int node_index = -1;
  if (!AttrUtils::GetInt(data->GetOpDesc(), ATTR_NAME_INDEX, node_index)) {
    GELOGE(FAILED, "Failed to get index from data[%s]", data->GetName().c_str());
@@ -545,6 +896,8 @@ Status MultiBatchClonePass::UpdateSubgraphData(const NodePtr &data, size_t index

  auto data_shape = NodeUtils::GetOutputDesc(*data, kDataOutIndex).GetShape();
  const auto &dims = data_shape.GetDims();
  GELOGD("Start update shape of %s , batch index is %zu, dims is %s.", data->GetName().c_str(), batch_index,
         formats::JoinToString(dims).c_str());
  if (std::all_of(dims.begin(), dims.end(), [](int64_t val) { return val >= 0; })) {
    return SUCCESS;
  }
@@ -559,35 +912,77 @@ Status MultiBatchClonePass::UpdateSubgraphData(const NodePtr &data, size_t index
  }

  auto parent_name = data_name.substr(0, pos);
  return SetShapeToData(data_to_dynamic_info_.at(parent_name).at(index), data, data_shape);
  return SetShapeToData(data_to_dynamic_info_.at(parent_name).at(batch_index), data, data_shape, kDataOutIndex);
 }

 ///
 /// @ingroup ge
 /// @brief Set max shape to Data node in root graph.
 /// @param [in] const std::vector<int64_t> &shapes: dims of shape.
 /// @param [in] const NodePtr &data: data in Root/Case graph.
 /// @param [in] GeShape &data_shape: dims of data node.
 /// @return 0: SUCCESS / others: FAILED
 ///
 Status MultiBatchClonePass::SetShapeToData(const vector<int64_t> &shapes, const NodePtr &data, GeShape &data_shape) {
  // must not be error, the calc result has been checked in function InsertSwitchNForData
  if (multibatch::CalcShape(shapes, data_shape) != SUCCESS) {
    return INTERNAL_ERROR;
 Status MultiBatchClonePass::CreateOriGraph(const ComputeGraphPtr &graph) {
  if (data_count_from_getnext_ == 0) {
    GELOGD("No need to change original graph without getnext node.");
    return SUCCESS;
  }

  if (NodeUtils::UpdateInputShape(*data, kDataInIndex, data_shape) != GRAPH_SUCCESS) {
    GELOGE(INTERNAL_ERROR, "Failed to update input shape for data %s", data->GetName().c_str());
    return INTERNAL_ERROR;
  GELOGD("Start change original graph: %s when exit getnext node.", graph->GetName().c_str());
  size_t data_index = all_data_nodes_.size() - kNumOfGetnextNode;
  for (const auto &node : graph->GetDirectNode()) {
    if (IsGetNextType(node)) {
      for (size_t out_index = 0; out_index < data_count_from_getnext_; ++out_index, ++data_index) {
        auto out_data_anchor =  node->GetOutDataAnchor(out_index);
        GE_IF_BOOL_EXEC(out_data_anchor == nullptr, continue);
        NodePtr data_node = CreateDataNode(graph, out_data_anchor, data_index);
        GE_IF_BOOL_EXEC(data_node == nullptr, GELOGE(INTERNAL_ERROR, "Create %zu data node failed.",
                                                     out_data_anchor->GetIdx()); return INTERNAL_ERROR);
        for (auto &in_anchor : out_data_anchor->GetPeerInDataAnchors()) {
          GE_IF_BOOL_EXEC(in_anchor == nullptr, continue);
          NodePtr dst_node = in_anchor->GetOwnerNode();
          if (GraphUtils::RemoveEdge(out_data_anchor, in_anchor) != GRAPH_SUCCESS) {
            GELOGE(INTERNAL_ERROR, "Failed to remove edge between %s to %s", node->GetName().c_str(),
                   dst_node->GetName().c_str());
            return INTERNAL_ERROR;
          }
          if (GraphUtils::AddEdge(data_node->GetOutDataAnchor(0), dst_node->GetInDataAnchor(in_anchor->GetIdx())) !=
              GRAPH_SUCCESS) {
            GELOGE(INTERNAL_ERROR, "Failed to add edge between %s to %s", data_node->GetName().c_str(),
                   dst_node->GetName().c_str());
            return INTERNAL_ERROR;
          }
        }
      }
      if (graph->RemoveNode(node) != GRAPH_SUCCESS) {
        GELOGE(GRAPH_FAILED, "Remove node %s failed!", node->GetName().c_str());
        return GRAPH_FAILED;
      }
      break;
    }
  }
  return SUCCESS;
 }

  if (NodeUtils::UpdateOutputShape(*data, kDataOutIndex, data_shape) != GRAPH_SUCCESS) {
    GELOGE(INTERNAL_ERROR, "Failed to update output shape for data %s", data->GetName().c_str());
    return INTERNAL_ERROR;
 NodePtr MultiBatchClonePass::CreateDataNode(const ComputeGraphPtr &graph, const OutDataAnchorPtr &out_data_anchor,
                                            size_t data_index) {
  size_t out_anchor_index = out_data_anchor->GetIdx();
  std::string node_name = out_data_anchor->GetOwnerNode()->GetName() + "_" +  std::to_string(out_anchor_index);
  OpDescPtr op_desc = MakeShared<OpDesc>(node_name, DATA);
  if (op_desc == nullptr) {
    GELOGE(OUT_OF_MEMORY, "Create data node failed.");
    return nullptr;
  }
  (void)AttrUtils::SetInt(op_desc, ATTR_NAME_INDEX, data_index);

  GELOGI("Update %s input/output shape to %s", data->GetName().c_str(), formats::ShapeToString(data_shape).c_str());
  return SUCCESS;
  OpDescPtr getnext_op_desc = out_data_anchor->GetOwnerNode()->GetOpDesc();
  if (getnext_op_desc == nullptr) {
    GELOGE(OUT_OF_MEMORY, "Op desc of %s is nullptr.", out_data_anchor->GetOwnerNode()->GetName().c_str());
    return nullptr;
  }
  if (op_desc->AddInputDesc(getnext_op_desc->GetOutputDesc(out_anchor_index)) != GRAPH_SUCCESS) {
    GELOGE(INTERNAL_ERROR, "Add %s input desc failed.", op_desc->GetName().c_str());
    return nullptr;
  }
  if (op_desc->AddOutputDesc(getnext_op_desc->GetOutputDesc(out_anchor_index)) != GRAPH_SUCCESS) {
    GELOGE(INTERNAL_ERROR, "Add %s output desc failed.", op_desc->GetName().c_str());
    return nullptr;
  }
  NodePtr data_node = graph->AddNode(op_desc);
  GELOGD("Success create %s node.", data_node->GetName().c_str());
  return data_node;
 }

 ///
@@ -598,17 +993,14 @@ Status MultiBatchClonePass::SetShapeToData(const vector<int64_t> &shapes, const
 /// @return 0: SUCCESS / others: FAILED
 ///
 Status MultiBatchClonePass::CreateSubgraphs(const ComputeGraphPtr &graph, const ComputeGraphPtr &branch) {
  GELOGD("Start create subgraphs for %s.", graph->GetName().c_str());
  const auto &op_desc = case_node_->GetOpDesc();
  for (size_t i = 0; i < batch_shapes_.size(); ++i) {
    std::vector<NodePtr> input_nodes;
    std::vector<NodePtr> output_nodes;
    const std::string postfix = kMultiBatchNodePostfix + std::to_string(i);
    ComputeGraphPtr subgraph = (i == 0) ? branch : GraphUtils::CloneGraph(branch, postfix, input_nodes, output_nodes);
    if (subgraph == nullptr) {
      GELOGE(FAILED, "Create multi-batch case node failed");
      return FAILED;
    }

    GE_IF_BOOL_EXEC(subgraph == nullptr, GELOGE(FAILED, "Create multi-batch case node failed"); return FAILED);
    subgraph->SetName("Batch_" + std::to_string(i));
    subgraph->SetParentNode(case_node_);
    subgraph->SetParentGraph(graph);
@@ -621,6 +1013,7 @@ Status MultiBatchClonePass::CreateSubgraphs(const ComputeGraphPtr &graph, const
    op_desc->AddSubgraphName(key_name);
    op_desc->SetSubgraphInstanceName(i, subgraph->GetName());

    GELOGD("The %s has %zu input, %zu output.", subgraph->GetName().c_str(), input_nodes.size(), output_nodes.size());
    for (const auto &data : input_nodes) {
      GE_CHK_STATUS_RET(UpdateSubgraphData(data, i), "Update %s failed", subgraph->GetName().c_str());
    }
@@ -666,6 +1059,7 @@ Status MultiBatchClonePass::UpdateSubgraphOutput(const NodePtr &output_node) {
 /// @return 0: SUCCESS / others: FAILED
 ///
 Status MultiBatchClonePass::PruneDirectOutput(const ComputeGraphPtr &graph) {
  GELOGD("Start prune direct output.");
  const auto &func_desc = case_node_->GetOpDesc();
  uint32_t unused_num = 0;
  uint32_t output_num = func_desc->GetOutputsSize();
@@ -710,6 +1104,7 @@ Status MultiBatchClonePass::PruneDirectOutput(const ComputeGraphPtr &graph) {
 ///
 Status MultiBatchClonePass::UpdateOutputTensor(uint32_t parent_index, uint32_t unused_num) {
  if (unused_num == 0) {
    GELOGD("No need to update output tensor.");
    return SUCCESS;
  }

--- a/ge/graph/passes/multi_batch_clone_pass.h
+++ b/ge/graph/passes/multi_batch_clone_pass.h
@@ -36,6 +36,7 @@ class MultiBatchClonePass : public GraphPass {
  /// @return 0: SUCCESS / others: FAILED
  ///
  Status CollectIoNodes(const ComputeGraphPtr &graph);
  Status InitParamsOfGetNext(const NodePtr &node);

  ///
  /// @ingroup ge
@@ -49,10 +50,12 @@ class MultiBatchClonePass : public GraphPass {
  /// @ingroup ge
  /// @brief Create index data node for root graph.
  /// @param [in] const ComputeGraphPtr &graph: Root/Case graph.
  /// @param [in] NodePtr node: index data node.
  /// @param [in] NodePtr shape_node: index data node, DATA or GETDYNAMICDIMS type.
  /// @return 0: SUCCESS / others: FAILED
  ///
  Status CreateIndexDataNode(const ComputeGraphPtr &graph, NodePtr &node);
  Status CreateIndexDataNode(const ComputeGraphPtr &graph, NodePtr &shape_node);

  Status CreateGetDynamicDimsNode(const ComputeGraphPtr &graph, NodePtr &shape_node);

  ///
  /// @ingroup ge
@@ -70,6 +73,9 @@ class MultiBatchClonePass : public GraphPass {
  /// @return 0: SUCCESS / others: FAILED
  ///
  Status CreateIndexNode(const ComputeGraphPtr &graph);
  Status AddAttrForGetDynamicDims(const NodePtr &shape_node);
  Status LinkGetNextToGetDynamicDims(const NodePtr &getnext_node, const NodePtr &shape_node);
  Status LinkGetDynamicDimsToNetOutput(const NodePtr &output_node);

  ///
  /// @ingroup ge
@@ -78,39 +84,54 @@ class MultiBatchClonePass : public GraphPass {
  /// @return 0: SUCCESS / others: FAILED
  ///
  Status CreateInputNode(const ComputeGraphPtr &graph);
  Status LinkEdgeForGetNext(const NodePtr &getnext_node, size_t &case_input_index);

  ///
  /// @ingroup ge
  /// @brief Create Const node for root graph.
  /// @param [in] const ComputeGraphPtr &graph: Root/Case graph.
  /// @brief Set max shape to Data node in root graph.
  /// @param [in] const NodePtr &data: data in Root/Case graph.
  /// @return 0: SUCCESS / others: FAILED
  ///
  Status CreateConstNode(const ComputeGraphPtr &graph);
  Status SetMaxShape(const NodePtr &data);
  Status SetMaxShapeToData(const NodePtr &node, size_t out_anchor_index);
  ///
  /// @ingroup ge
  /// @brief Set max shape to Data/GetNext node in root graph.
  /// @param [in] const std::vector<int64_t> &shapes: dims of shape.
  /// @param [in] const NodePtr &data: data in Root/Case graph.
  /// @param [in] GeShape &data_shape: dims of data node.
  /// @param [in] size_t out_anchor_index: out anchor index of data node.
  /// @return 0: SUCCESS / others: FAILED
  ///
  Status SetShapeToData(const std::vector<int64_t> &shapes, const NodePtr &data, GeShape &data_shape,
                        size_t out_anchor_index);
  Status UpdateShapeOfShapeNode(const NodePtr &node, size_t out_anchor_index);

  ///
  /// @ingroup ge
  /// @brief Create output node for root graph.
  /// @brief Create Const node for root graph.
  /// @param [in] const ComputeGraphPtr &graph: Root/Case graph.
  /// @return 0: SUCCESS / others: FAILED
  ///
  Status CreateOutputNode(const ComputeGraphPtr &graph);
  Status CreateConstNode(const ComputeGraphPtr &graph);
  void ChangeConstToData();

  ///
  /// @ingroup ge
  /// @brief Set max shape to Data node in root graph.
  /// @param [in] const NodePtr &data: data in Root/Case graph.
  /// @brief Create output node for root graph.
  /// @param [in] const ComputeGraphPtr &graph: Root/Case graph.
  /// @return 0: SUCCESS / others: FAILED
  ///
  Status SetMaxShapeToData(const NodePtr &data);
  Status CreateOutputNode(const ComputeGraphPtr &graph);

  ///
  /// @ingroup ge
  /// @brief Update Data node in Subgraph.
  /// @param [in] const NodePtr &data: data in Subgraph.
  /// @param [in] size_t index: The batch index.
  /// @param [in] size_t batch_index: The batch index.
  /// @return 0: SUCCESS / others: FAILED
  ///
  Status UpdateSubgraphData(const NodePtr &data, size_t index);
  Status UpdateSubgraphData(const NodePtr &data, size_t batch_index);

  ///
  /// @ingroup ge
@@ -122,13 +143,12 @@ class MultiBatchClonePass : public GraphPass {

  ///
  /// @ingroup ge
  /// @brief Set max shape to Data node in root graph.
  /// @param [in] const std::vector<int64_t> &shapes: dims of shape.
  /// @param [in] const NodePtr &data: data in Root/Case graph.
  /// @param [in] GeShape &data_shape: dims of data node.
  /// @brief Create nodes for root graph.
  /// @param [in] const ComputeGraphPtr &graph: Original graph.
  /// @return 0: SUCCESS / others: FAILED
  ///
  Status SetShapeToData(const std::vector<int64_t> &shapes, const NodePtr &data, GeShape &data_shape);
  Status CreateOriGraph(const ComputeGraphPtr &graph);
  NodePtr CreateDataNode(const ComputeGraphPtr &graph, const OutDataAnchorPtr &out_data_anchor, size_t data_index);

  ///
  /// @ingroup ge
@@ -168,6 +188,10 @@ class MultiBatchClonePass : public GraphPass {
  std::map<string, vector<vector<int64_t>>> data_to_dynamic_info_;

  NodePtr case_node_;
  size_t data_count_from_getnext_ = 0;
  bool getnext_sink_dynamic_dims_ = false;
  NodePtr shape_node_;
  std::set<NodePtr> out_control_nodes_;
 };
 }  // namespace ge
 #endif  // GE_GRAPH_PASSES_MULTI_BATCH_CLONE_PASS_H_
--- a/ge/graph/passes/unused_args_clean_pass.cc
+++ b/ge/graph/passes/unused_args_clean_pass.cc
@@ -204,6 +204,10 @@ Status UnusedArgsCleanPass::RemoveInputTensor(const map<ComputeGraphPtr, map<uin
  GE_CHK_GRAPH_STATUS_RET(GraphUtils::RemoveEdge(out_anchor, old_anchor), "Remove edge failed");
  GELOGI("Remove edge: %s %s", out_node->GetName().c_str(), func_node->GetName().c_str());

  if (out_node->GetInDataNodes().size() == 0 && out_node->GetOutAllNodes().size() == 0) {
    GE_CHK_GRAPH_STATUS_RET(out_node->GetOwnerComputeGraph()->RemoveNode(out_node), "Remove node failed: %s",
                            out_node->GetName().c_str());
  }
  return SUCCESS;
 }
 }  // namespace ge
--- a/ge/graph/preprocess/graph_preprocess.cc
+++ b/ge/graph/preprocess/graph_preprocess.cc
@@ -37,6 +37,7 @@
 #include "graph/passes/addn_pass.h"
 #include "graph/passes/aicpu_constant_folding_pass.h"
 #include "graph/passes/assert_pass.h"
 #include "ge/ge_api_types.h"
 #ifdef ONLY_COMPILE_OPEN_SRC
 #include "graph/passes/assign_remove_pass.h"
 #endif
@@ -899,6 +900,160 @@ Status ProcessNetoutputNodeDynShape(NodePtr &node) {
  }
  return SUCCESS;
 }
 long StringToLongNoThrow(const string &str) {
  try {
    return std::stol(str);
  } catch (const std::invalid_argument) {
    GELOGE(PARAM_INVALID,
           "Parse shape range of input failed when transfer from string to int64. Given %s, while correct example: "
           "\"[1~20,3,3~6,-1],[1~20,3,3~6,-1]\"",
           str.c_str());
    return PARAM_INVALID;
  } catch (const std::out_of_range) {
    GELOGE(PARAM_INVALID,
           "Parse shape range of input failed when transfer from string to int64. Given %s, while correct example: "
           "\"[1~20,3,3~6,-1],[1~20,3,3~6,-1]\"",
           str.c_str());
    return PARAM_INVALID;
  }
 }
 /**
 * Parser shape_range from string to vector
 * shape_range from option normally is "[1~20,3,3~6,-1],[1~20,3,3~6,-1]"
 * @param shape_range
 */
 Status ParseDynamicInputShapeRange(const std::string &shape_range,
                                   std::vector<std::vector<std::pair<int64_t, int64_t>>> &range) {
  if (shape_range.size() < 2) {
    GELOGE(PARAM_INVALID, "Shape range %s is invalid.", shape_range.c_str());
    return PARAM_INVALID;
  }
  // different shape_range of single input are split by ']'
  vector<string> shape_range_set = ge::StringUtils::Split(shape_range, ']');
  if (shape_range_set.empty()) {
    GELOGE(PARAM_INVALID, "Shape range %s is not valid. Correct example: \"[1~20,3,3~6,-1],[1~20,3,3~6,-1]\"",
           shape_range.c_str());
    return PARAM_INVALID;
  }
  for (auto &shape_range_str : shape_range_set) {
    if (shape_range_str.empty()) {
      continue;
    }
    // trim start bytes, after that, single input should be "1~20,3,3~6,-1"
    if (ge::StringUtils::StartWith(shape_range_str, "[")) {
      shape_range_str = shape_range_str.substr(1, shape_range_str.size());
    }
    if (ge::StringUtils::StartWith(shape_range_str, ",")) {
      shape_range_str = shape_range_str.substr(2, shape_range_str.size());
    }

    // parse shape_range of single input. eg. "1~20,3,3~6,-1"
    std::vector<std::pair<int64_t, int64_t>> range_of_single_input;
    vector<string> dim_range_set = ge::StringUtils::Split(shape_range_str, ',');
    for (const auto &range_pair_str : dim_range_set) {
      vector<string> range_pair_set = ge::StringUtils::Split(range_pair_str, '~');
      pair<int64_t, int64_t> range_pair;
      if (range_pair_set.size() == 1) {
        // fix dim
        auto range_value = StringToLongNoThrow(range_pair_set.at(0).c_str());
        if (range_value < 0) {
          range_pair = std::make_pair(0, range_value);
        } else {
          range_pair = std::make_pair(range_value, range_value);
        }
      } else if (range_pair_set.size() == 2) {
        // unknown dim, should get range.
        auto range_left = StringToLongNoThrow(range_pair_set.at(0).c_str());
        auto range_right = StringToLongNoThrow(range_pair_set.at(1).c_str());
        range_pair = std::make_pair(range_left, range_right);
      } else {
        GELOGE(PARAM_INVALID,
               "Shape range of input is invalid. Given %s, while correct example: \"[1~20,3,3~6,-1],[1~20,3,3~6,-1]\"",
               shape_range.c_str());
        return PARAM_INVALID;
      }
      range_of_single_input.emplace_back(range_pair);
    }
    range.emplace_back(range_of_single_input);
  }
  return SUCCESS;
 }

 Status GetDynamicInputShapeRange(const std::vector<GeTensor> &user_input, const std::map<string, string> &graph_option,
                                 vector<vector<std::pair<int64_t, int64_t>>> &range_vec) {
  auto mode_iter = graph_option.find(OPTION_EXEC_DYNAMIC_EXECUTE_MODE);
  if (mode_iter == graph_option.end()) {
    GELOGD("Graph Option: Can not find %s option in graph options.", OPTION_EXEC_DYNAMIC_EXECUTE_MODE);
    return SUCCESS;
  }
  GELOGD("Graph Option: dynamic_input_mode value is %s.", mode_iter->second.c_str());
  if (mode_iter->second != "dynamic_execute") {
    return SUCCESS;
  }
  auto iter = graph_option.find(OPTION_EXEC_DATA_INPUTS_SHAPE_RANGE);
  if (iter == graph_option.end()) {
    GELOGE(PARAM_INVALID, "Graph option %s is required when %s is dynamic_execute", OPTION_EXEC_DATA_INPUTS_SHAPE_RANGE,
           OPTION_EXEC_DYNAMIC_EXECUTE_MODE);
    return PARAM_INVALID;
  }
  GELOGD("GraphOption: dynamic_inputs_shape_range value is %s.", iter->second.c_str());
  auto ret = ParseDynamicInputShapeRange(iter->second, range_vec);
  GE_CHK_STATUS_RET(ret, "Parse dynamic input shape range failed.");
  if (range_vec.size() != user_input.size()) {
    GELOGE(PARAM_INVALID, "Dynamic input shape range size is %zu, inputs size is %zu. Not match.", range_vec.size(),
           user_input.size());
    return PARAM_INVALID;
  }
  return SUCCESS;
 }

 Status UpdateDynamicInputShapeRange(const ge::GeAttrValue::INT index,
                                    const vector<vector<std::pair<int64_t, int64_t>>> &range_vec, OpDescPtr &op,
                                    GeTensorDesc &desc) {
  auto origin_shape = desc.GetShape();
  auto current_shape_range_vec = range_vec.at(index);
  if (current_shape_range_vec.size() != origin_shape.GetDimNum()) {
    GELOGE(PARAM_INVALID, "Given shape_range dim num is %zu, current dim num is %zu, not match.Pleace Check.",
           current_shape_range_vec.size(), origin_shape.GetDimNum());
    return PARAM_INVALID;
  }
  for (size_t i = 0; i < origin_shape.GetDimNum(); ++i) {
    if (current_shape_range_vec.at(i).first == current_shape_range_vec.at(i).second) {
      // given shape_range is known dim, check is same as origin or not
      if (origin_shape.GetDim(i) != current_shape_range_vec.at(i).first) {
        GELOGE(PARAM_INVALID, "Given shape range is %ld, current dim shape is %ld, not match.Pleace Check.",
              current_shape_range_vec.at(i).first, origin_shape.GetDim(i));
        return PARAM_INVALID;
      }
      origin_shape.SetDim(i, current_shape_range_vec.at(i).first);
    } else {
      origin_shape.SetDim(i, -1);
    }
  }
  desc.SetShape(origin_shape);
  desc.SetShapeRange(current_shape_range_vec);

  int64_t dynamic_shape_size = 1;
  for (const auto range_pair : range_vec.at(index)) {
    FMK_INT64_MULCHECK(dynamic_shape_size, range_pair.second);
    dynamic_shape_size *= range_pair.second;
  }
  auto data_type_size = GetSizeByDataType(desc.GetDataType());
  if (data_type_size < 0) {
    GELOGE(PARAM_INVALID, "Input data type is %s, is not supported.",
           TypeUtils::DataTypeToSerialString(desc.GetDataType()).c_str());
    return PARAM_INVALID;
  }
  FMK_INT64_MULCHECK(dynamic_shape_size, data_type_size);
  dynamic_shape_size *= data_type_size;
  GELOGI("In dynamic_execute mode ,set input %s shape range size %ld", op->GetName().c_str(), dynamic_shape_size);
  ge::TensorUtils::SetSize(desc, dynamic_shape_size);
  graphStatus graph_ret = op->UpdateInputDesc(0, desc);
  GE_CHK_STATUS_RET(graph_ret, "UpdateInputDesc fail, graph ret: %u", graph_ret);
  graph_ret = op->UpdateOutputDesc(0, desc);
  GE_CHK_STATUS_RET(graph_ret, "UpdateInputDesc fail, graph ret: %u", graph_ret);
  return SUCCESS;
 }
 }  // namespace

 GraphPrepare::GraphPrepare() : compute_graph_(nullptr) {}
@@ -1103,7 +1258,11 @@ Status GraphPrepare::AdjustDataOpOutput(const NodePtr &node) {
  return SUCCESS;
 }

 Status GraphPrepare::UpdateInput(const std::vector<GeTensor> &user_input) {
 Status GraphPrepare::UpdateInput(const std::vector<GeTensor> &user_input, const std::map<string,string> &graph_option) {
  // Get shape range of input in dynamic_execute mode
  vector<vector<std::pair<int64_t,int64_t>>> dynamic_shape_range_vec;
  auto ret = GetDynamicInputShapeRange(user_input, graph_option, dynamic_shape_range_vec);
  GE_CHK_STATUS_RET(ret, "Graph option is not right on Dynamic execute mode.");
  compute_graph_->SaveDataFormat(ge::TypeUtils::DomiFormatToFormat(GetLocalOmgContext().format));
  for (NodePtr &input_node : compute_graph_->GetDirectNode()) {
    GE_CHECK_NOTNULL(input_node);
@@ -1186,6 +1345,12 @@ Status GraphPrepare::UpdateInput(const std::vector<GeTensor> &user_input) {
        return graph_ret;
      }

      if (!dynamic_shape_range_vec.empty()) {
        ret = UpdateDynamicInputShapeRange(index, dynamic_shape_range_vec, op, desc);
        GE_CHK_STATUS_RET(ret, "Fail to update dynamic input shape range on %s.", op->GetName().c_str());
        continue;
      }

      if (!options_.train_graph_flag) {
        Status ret = AdjustDataOpOutput(input_node);
        GE_IF_BOOL_EXEC(ret != SUCCESS, GELOGE(ret, "AdjustDataOpOutput fail, ret:%u", ret); return ret);
@@ -1359,17 +1524,17 @@ Status GraphPrepare::SaveOriginalGraphToOmModel() {
    GELOGI("Prepare %s on graph %s success.", name, compute_graph->GetName().c_str()); \
  } while (0)

 Status GraphPrepare::PrepareDynShape(ConstGraphPtr graph, const std::vector<GeTensor> &user_input,
 Status GraphPrepare::PrepareDynShape(const GraphNodePtr &graph_node, const std::vector<GeTensor> &user_input,
                                     ge::ComputeGraphPtr &compute_graph, uint64_t session_id) {
  GE_CHECK_NOTNULL(graph);
  GE_CHECK_NOTNULL(graph_node->GetGraph());
  GE_CHECK_NOTNULL(compute_graph);

  GetLocalOmgContext().type = static_cast<domi::FrameworkType>(options_.framework_type);
  const Graph &const_graph = *graph;
  const Graph &const_graph = *graph_node->GetGraph();

  PP_RUN("Init", Init, const_graph, session_id);
  PP_RUN("SetRtContext", SetRtContext, rtContext_t(), RT_CTX_GEN_MODE);
  PP_RUN_AND_DUMP("CheckAndUpdateInput", CheckAndUpdateInput, user_input);
  PP_RUN_AND_DUMP("CheckAndUpdateInput", CheckAndUpdateInput, user_input, graph_node->GetOptions());
  PP_RUN_AND_DUMP("GraphEquivalentTransformation", GraphEquivalentTransformation);
  PP_RUN_AND_DUMP("ProcessOutput", ProcessNetOutput);
  PP_RUN_AND_DUMP("ProcessMultiBatch", multibatch::ProcessMultiBatch, compute_graph_);
@@ -1834,7 +1999,7 @@ Status GraphPrepare::ProcessNetOutput() {
  return SUCCESS;
 }

 Status GraphPrepare::CheckAndUpdateInput(const std::vector<GeTensor> &user_input) {
 Status GraphPrepare::CheckAndUpdateInput(const std::vector<GeTensor> &user_input,const std::map<string,string> &graph_option) {
  compute_graph_->SetInputSize(user_input.size());
  if (user_input.empty()) {
    return SUCCESS;
@@ -1846,7 +2011,7 @@ Status GraphPrepare::CheckAndUpdateInput(const std::vector<GeTensor> &user_input
    return ret;
  }

  ret = UpdateInput(user_input);
  ret = UpdateInput(user_input, graph_option);
  if (ret != SUCCESS) {
    GELOGE(ret, "UpdateInput fail, ret:%u", ret);
    return ret;
--- a/ge/graph/preprocess/graph_preprocess.h
+++ b/ge/graph/preprocess/graph_preprocess.h
@@ -45,7 +45,7 @@ class GraphPrepare {
  virtual ~GraphPrepare();
  GraphPrepare(const GraphPrepare &in) = delete;
  GraphPrepare &operator=(const GraphPrepare &in) = delete;
  Status PrepareDynShape(ConstGraphPtr graph,
  Status PrepareDynShape(const GraphNodePtr &graph_node,
                         const std::vector<GeTensor> &user_input,
                         ge::ComputeGraphPtr &compute_graph,
                         uint64_t session_id = 0);
@@ -63,8 +63,8 @@ class GraphPrepare {
  Status CheckRefOp();
  Status SetRtContext(rtContext_t rt_context, rtCtxMode_t mode);
  Status AdjustDataOpOutput(const NodePtr &node);
  Status UpdateInput(const std::vector<GeTensor> &user_input);
  Status CheckAndUpdateInput(const std::vector<GeTensor> &user_input);
  Status UpdateInput(const std::vector<GeTensor> &user_input, const std::map<string,string> &graph_option);
  Status CheckAndUpdateInput(const std::vector<GeTensor> &user_input, const std::map<string,string> &graph_option);
  Status CheckConstOp();
  Status VerifyConstOp(const NodePtr &node);
  Status CheckUserInput(const std::vector<GeTensor> &user_input);
--- a/ge/graph/preprocess/multi_batch_copy_graph.cc
+++ b/ge/graph/preprocess/multi_batch_copy_graph.cc
@@ -1692,13 +1692,11 @@ Status MultiBatchGraphCopyer::LinkToNodeOutBranch(const NodePtr &node) {
 }

 Status ProcessMultiBatch(ComputeGraphPtr &graph) {
  if (GetLocalOmgContext().dynamic_node_type.empty()) {
    const char *multi_batch_with_switchn = std::getenv("MULTI_BATCH_WITH_SWITCHN");
    if (multi_batch_with_switchn == nullptr) {
      PassManager pass_manager;
      GE_CHK_STATUS_RET(pass_manager.AddPass("MultiBatchClonePass", new (std::nothrow) MultiBatchClonePass));
      return pass_manager.Run(graph);
    }
  const char *multi_batch_with_switchn = std::getenv("MULTI_BATCH_WITH_SWITCHN");
  if (multi_batch_with_switchn == nullptr) {
    PassManager pass_manager;
    GE_CHK_STATUS_RET(pass_manager.AddPass("MultiBatchClonePass", new (std::nothrow) MultiBatchClonePass));
    return pass_manager.Run(graph);
  }
  if (!GetLocalOmgContext().need_multi_batch) {
    GELOGI("No need to process_multi for no_train graph.");
--- a/ge/graph/preprocess/multi_batch_options.cc
+++ b/ge/graph/preprocess/multi_batch_options.cc
@@ -99,9 +99,8 @@ Status DistinguishGetNextAndData(ComputeGraphPtr &graph, vector<NodePtr> &data_n
  }
  GELOGI("Data count is %zu, getnext nosink count is %zu, getnext sink count is %zu.", data_nodes.size(),
         getnext_nosink_nodes.size(), getnext_sink_nodes.size());
  GE_IF_BOOL_EXEC(!graph->SetExtAttr(kExtAttrDataNodes, data_nodes), GELOGW("Set data nodes attr failed.");)
  GE_IF_BOOL_EXEC(!graph->SetExtAttr(kExtAttrGetNextNoSink, getnext_nosink_nodes),
                  GELOGW("Set getnext nosink nodes attr failed.");)
  GetLocalOmgContext().data_nodes = data_nodes;
  GetLocalOmgContext().getnext_nosink_nodes = getnext_nosink_nodes;
  return SUCCESS;
 }

--- a/ge/hybrid/executor/hybrid_model_async_executor.cc
+++ b/ge/hybrid/executor/hybrid_model_async_executor.cc
@@ -98,10 +98,10 @@ Status HybridModelAsyncExecutor::Init() {
  return SUCCESS;
 }

 Status HybridModelAsyncExecutor::PreRun(InputData &current_data) {
 Status HybridModelAsyncExecutor::PreRun(InputData &current_data, HybridModelExecutor::ExecuteArgs &args) {
  GE_CHK_STATUS_RET(SyncVarData(), "Failed to sync var data");
  RECORD_MODEL_EXECUTION_EVENT(executor_->GetContext(), "[SyncVarData] End");
  GE_CHK_STATUS_RET(CopyInputData(current_data), "Failed to copy input data to model");
  GE_CHK_STATUS_RET(PrepareInputs(current_data, args), "Failed to copy input data to model");
  RECORD_MODEL_EXECUTION_EVENT(executor_->GetContext(), "[CopyInputData] End");
  return SUCCESS;
 }
@@ -126,14 +126,9 @@ Status HybridModelAsyncExecutor::RunInternal() {
    InputData current_data = data_wrapper->GetInput();
    GELOGI("Model thread Run begin, model id:%u, data index:%u.", model_id_, current_data.index);

    HybridModelExecutor::ExecuteArgs args;
    args.inputs.resize(input_tensors_.size());
    for (auto &it : input_tensors_) {
      args.inputs[it.first] = it.second;
    }

    RECORD_MODEL_EXECUTION_EVENT(executor_->GetContext(), "[RunInternal] [iteration = %d] Start", iterator_count_);
    ret = PreRun(current_data);
    HybridModelExecutor::ExecuteArgs args;
    ret = PreRun(current_data, args);
    GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(
        ret != SUCCESS, (void) HandleResult(ret, current_data.index, args, data_wrapper->GetOutput());
        CsaInteract::GetInstance().StoreInternalErrorCode(ret, ERROR_MODULE_FMK, JOBSUBSTATE_GRAPH_EXEC);
@@ -202,7 +197,9 @@ Status HybridModelAsyncExecutor::SyncVarData() {
  return SUCCESS;
 }

 Status HybridModelAsyncExecutor::CopyInputData(const InputData &current_data) {
 Status HybridModelAsyncExecutor::PrepareInputs(const InputData &current_data, HybridModelExecutor::ExecuteArgs &args) {
  args.inputs.resize(input_tensors_.size());
  args.input_desc.resize(input_tensor_desc_.size());
  const std::vector<DataBuffer> &blobs = current_data.blobs;
  for (const auto &it : input_tensors_) {
    auto input_index = it.first;
@@ -230,6 +227,13 @@ Status HybridModelAsyncExecutor::CopyInputData(const InputData &current_data) {
                           data_buf.data,
                           data_buf.length,
                           RT_MEMCPY_HOST_TO_DEVICE));
    args.inputs[input_index] = input_tensor;
    if (is_input_dynamic_[input_index]) {
      auto &tensor_desc = input_tensor_desc_[input_index];
      tensor_desc->SetShape(GeShape(current_data.shapes[input_index]));
      args.input_desc[input_index] = tensor_desc;
      GELOGD("Update shape of input[%u] to [%s]", input_index, tensor_desc->MutableShape().ToString().c_str());
    }
  }

  return SUCCESS;
@@ -240,7 +244,10 @@ Status HybridModelAsyncExecutor::InitInputTensors() {
  GE_CHECK_NOTNULL(allocator);
  int input_index = 0;
  for (const auto &input_node : model_->GetRootGraphItem()->GetInputNodes()) {
    GELOGD("Init input[%u], node = %s", input_index, input_node->NodeName().c_str());
    GELOGD("Init input[%u], node = %s, is_dynamic = %d",
           input_index,
           input_node->NodeName().c_str(),
           input_node->is_dynamic);
    auto output_desc = input_node->MutableOutputDesc(kDataOutputIndex);
    GE_CHECK_NOTNULL(output_desc);
    int64_t tensor_size = 0;
@@ -258,6 +265,8 @@ Status HybridModelAsyncExecutor::InitInputTensors() {
    TensorValue tensor(shared_ptr<TensorBuffer>(buffer.release()));
    tensor.SetName("Input_" + input_node->NodeName());
    input_tensors_.emplace(input_index, tensor);
    input_tensor_desc_.emplace(input_index, output_desc);
    is_input_dynamic_.push_back(input_node->is_dynamic);
    input_index += 1;
  }

@@ -402,18 +411,12 @@ Status HybridModelAsyncExecutor::Execute(const vector<GeTensor> &inputs, vector<
    buffer.data = const_cast<uint8_t *>(tensor.GetData().GetData());
    buffer.length = tensor.GetData().size();
    input_data.blobs.emplace_back(buffer);
    input_data.shapes.emplace_back(tensor.GetTensorDesc().GetShape().GetDims());
  }
  GE_CHK_STATUS_RET(CopyInputData(input_data), "Failed to copy input data to model");
  GELOGD("Done copying input data successfully.");

  HybridModelExecutor::ExecuteArgs args;
  args.inputs.resize(input_tensors_.size());
  args.input_desc.resize(input_tensors_.size());
  for (auto &it : input_tensors_) {
    args.inputs[it.first] = it.second;
    args.input_desc[it.first] = MakeShared<GeTensorDesc>(inputs[it.first].GetTensorDesc());
  }

  GE_CHK_STATUS_RET(PrepareInputs(input_data, args), "Failed to copy input data to model");
  GELOGD("Done copying input data successfully.");
  GE_CHK_STATUS_RET(executor_->Execute(args), "Failed to execute model.");

  std::vector<ge::OutputTensorInfo> output_tensor_info_list;
--- a/ge/hybrid/executor/hybrid_model_async_executor.h
+++ b/ge/hybrid/executor/hybrid_model_async_executor.h
@@ -70,9 +70,9 @@ class HybridModelAsyncExecutor {

  Status OnComputeDone(uint32_t data_index, uint32_t result_code, std::vector<ge::OutputTensorInfo> &outputs);

  Status PreRun(InputData &current_data);
  Status PreRun(InputData &current_data, HybridModelExecutor::ExecuteArgs &args);

  Status CopyInputData(const InputData &current_data);
  Status PrepareInputs(const InputData &current_data, HybridModelExecutor::ExecuteArgs &args);

  std::mutex mu_;
  HybridModel *model_;
@@ -86,6 +86,8 @@ class HybridModelAsyncExecutor {

  rtStream_t stream_ = nullptr;
  std::map<uint32_t, TensorValue> input_tensors_;
  std::map<uint32_t, GeTensorDescPtr> input_tensor_desc_;
  std::vector<bool> is_input_dynamic_;
  std::shared_ptr<ModelListener> listener_;
 };
 }  // namespace hybrid
--- a/ge/hybrid/executor/worker/execution_engine.cc
+++ b/ge/hybrid/executor/worker/execution_engine.cc
@@ -221,6 +221,8 @@ Status NodeDoneCallback::GetGraphDescInfo(const NodePtr node, const HybridModel
      tmp_compute_graph_info.output_shape.emplace_back(output_desc.GetShape().GetDims());
      tmp_compute_graph_info.output_data_type.emplace_back(output_desc.GetDataType());
    }
    tmp_compute_graph_info.task_id = context_->GetTaskId();
    tmp_compute_graph_info.stream_id = context_->GetStreamId();
    compute_graph_info.emplace_back(tmp_compute_graph_info);
    GELOGD("GetComputeGraphInfo of node [%s] end.", node->GetName().c_str());
  }
--- a/ge/hybrid/model/hybrid_model_builder.cc
+++ b/ge/hybrid/model/hybrid_model_builder.cc
@@ -35,11 +35,22 @@

 namespace ge {
 namespace hybrid {
 using domi::LogTimeStampDef;
 using domi::TaskDef;
 namespace {
 const uint32_t kSubgraphIndex = 0U;
 const uint32_t kVarOutputIndex = 0U;
 const uint64_t kProfilingFpStartLogid = 1U;
 const uint64_t kProfilingBpEndLogid = 2U;
 const uint64_t kProfilingIterEndLogid = 65535U;
 const int kBytes = 8;
 const char *const kOwnerGraphIsUnknown = "OwnerGraphIsUnknown";
 const char *const kProfilingGraph = "ProfilingGraph";
 const char *const kProfilingFpNode = "ProfilingFpNode";
 const char *const kProfilingBpNode = "ProfilingBpNode";
 const char *const kProfilingEndNode = "ProfilingEndNode";
 const char *const kProfilingArNode = "ProfilingAllReduceNode";
 const char *const kEngineNameRts = "DNN_VM_RTS_OP_STORE";

 Status SetOutputNameAttr(ComputeGraph &graph) {
  vector<string> output_names;
@@ -1531,6 +1542,188 @@ Status HybridModelBuilder::RecoverGraphUnknownFlag() {
  return SUCCESS;
 }

 Status HybridModelBuilder::GenerateFpProfilingTask(const OpDescPtr &op_desc, vector<domi::TaskDef> &task_def_list) {
  uint64_t jobid_log_id = ge::GetContext().TraceId();
  GELOGD("The first FP operator is %s,, job_id %lu", op_desc->GetName().c_str(), jobid_log_id);

  TaskDef job_task_def;
  job_task_def.set_type(RT_MODEL_TASK_PROFILER_TRACE);
  job_task_def.set_stream_id(op_desc->GetStreamId());
  LogTimeStampDef *job_log_def = job_task_def.mutable_log_timestamp();
  if (job_log_def != nullptr) {
    job_log_def->set_logid(jobid_log_id);
    job_log_def->set_notify(false);
  }
  task_def_list.emplace_back(job_task_def);
  TaskDef fp_task_def;
  fp_task_def.set_type(RT_MODEL_TASK_PROFILER_TRACE);
  fp_task_def.set_stream_id(op_desc->GetStreamId());
  LogTimeStampDef *fp_log_def = fp_task_def.mutable_log_timestamp();
  if (fp_log_def != nullptr) {
    fp_log_def->set_logid(kProfilingFpStartLogid);
    fp_log_def->set_notify(false);
  }
  task_def_list.emplace_back(fp_task_def);

  return SUCCESS;
 }

 Status HybridModelBuilder::GenerateArProfilingTask(const OpDescPtr &op_desc, int64_t log_id,
                                                   vector<domi::TaskDef> &task_def_list) {
  TaskDef ar_task_def;
  ar_task_def.set_type(RT_MODEL_TASK_PROFILER_TRACE);
  ar_task_def.set_stream_id(op_desc->GetStreamId());
  LogTimeStampDef *ar_log_def = ar_task_def.mutable_log_timestamp();
  if (ar_log_def != nullptr) {
    ar_log_def->set_logid(log_id);
    ar_log_def->set_notify(false);
  }
  task_def_list.emplace_back(ar_task_def);

  return SUCCESS;
 }

 Status HybridModelBuilder::GenerateBpProfilingTask(const OpDescPtr &op_desc, vector<domi::TaskDef> &task_def_list) {
    TaskDef bp_task_def;
    bp_task_def.set_type(RT_MODEL_TASK_PROFILER_TRACE);
    bp_task_def.set_stream_id(op_desc->GetStreamId());
    LogTimeStampDef *bp_log_def = bp_task_def.mutable_log_timestamp();
    GE_CHECK_NOTNULL(bp_log_def);
    bp_log_def->set_logid(kProfilingBpEndLogid);
    bp_log_def->set_notify(false);
    task_def_list.emplace_back(bp_task_def);

  return SUCCESS;
 }

 Status HybridModelBuilder::GenerateEndProfilingTask(const OpDescPtr &op_desc, vector<domi::TaskDef> &task_def_list) {
  TaskDef end_task_def;
  end_task_def.set_type(RT_MODEL_TASK_PROFILER_TRACE);
  end_task_def.set_stream_id(op_desc->GetStreamId());
  LogTimeStampDef *end_log_def = end_task_def.mutable_log_timestamp();
  GE_CHECK_NOTNULL(end_log_def);
  end_log_def->set_logid(kProfilingIterEndLogid);
  end_log_def->set_notify(true);
  task_def_list.emplace_back(end_task_def);

  return SUCCESS;
 }

 Status HybridModelBuilder::CreateProfilingNodeBefore(GraphItem &graph_item, const NodePtr &node) {
  GE_CHECK_NOTNULL(node);
  const OpDescPtr &op_desc = node->GetOpDesc();
  GE_CHECK_NOTNULL(op_desc);
  const auto &compute_graph = MakeShared<ComputeGraph>(kProfilingGraph);
  GE_CHECK_NOTNULL(compute_graph);

  NodePtr node_ptr = nullptr;
  vector<domi::TaskDef> task_def_list;
  // create fp node
  bool is_insert_fp_profiling_task = false;
  (void)ge::AttrUtils::GetBool(op_desc, ATTR_NAME_INSERT_FP_PROFILILNG_TASK, is_insert_fp_profiling_task);
  if (is_insert_fp_profiling_task) {
    (void)GenerateFpProfilingTask(op_desc, task_def_list);
    auto fp_desc = MakeShared<OpDesc>(kProfilingFpNode, PROFILINGTRAININGTRACE);
    GE_CHECK_NOTNULL(fp_desc);
    fp_desc->SetOpKernelLibName(kEngineNameRts);
    node_ptr = compute_graph->AddNode(fp_desc);
    GELOGD("Create fp profiling node success before.");
  }
  // creat all reduce start node
  bool is_insert_bp_profiling_task = false;
  (void)ge::AttrUtils::GetBool(op_desc, ATTR_NAME_INSERT_BP_PROFILILNG_TASK, is_insert_bp_profiling_task);
  bool is_all_reduce = (op_desc->GetType() == HCOMALLREDUCE || op_desc->GetType() == HVDCALLBACKALLREDUCE);
  if (is_all_reduce && is_insert_bp_profiling_task) {
    int64_t log_id = 0;
    (void)ge::AttrUtils::GetInt(op_desc, ATTR_NAME_INSERT_PROFILILNG_TASK_LOG_ID, log_id);
    GELOGD("All reduce node profiling task log id: %ld before", log_id);
    (void) GenerateArProfilingTask(op_desc, log_id, task_def_list);
    string op_name = string(kProfilingArNode) + std::to_string(log_id);
    auto ar_desc_start = MakeShared<OpDesc>(op_name, PROFILINGTRAININGTRACE);
    GE_CHECK_NOTNULL(ar_desc_start);
    ar_desc_start->SetOpKernelLibName(kEngineNameRts);
    node_ptr = compute_graph->AddNode(ar_desc_start);
    GELOGD("Create all reduce start profiling node success before.");
  }

  if (node_ptr != nullptr) {
    for (const auto &task_def : task_def_list) {
      hybrid_model_.task_defs_[node_ptr].emplace_back(task_def);
    }
    NodeItem *node_item = nullptr;
    GE_CHK_STATUS_RET_NOLOG(GetOrCreateNodeItem(node_ptr, &node_item));
    node_item->input_start = 0;
    node_item->output_start = 0;
    graph_item.node_items_.emplace_back(node_item);
  } else {
    GELOGD("No need to create profiling node before.");
  }

  return SUCCESS;
 }

 Status HybridModelBuilder::CreateProfilingNodeAfter(GraphItem &graph_item, const NodePtr &node) {
  GE_CHECK_NOTNULL(node);
  const OpDescPtr &op_desc = node->GetOpDesc();
  GE_CHECK_NOTNULL(op_desc);
  const auto &compute_graph = MakeShared<ComputeGraph>(kProfilingGraph);
  GE_CHECK_NOTNULL(compute_graph);

  NodePtr node_ptr = nullptr;
  vector<domi::TaskDef> task_def_list;
  // Create all reduce end node
  bool is_insert_bp_profiling_task = false;
  (void)ge::AttrUtils::GetBool(op_desc, ATTR_NAME_INSERT_BP_PROFILILNG_TASK, is_insert_bp_profiling_task);
  bool is_all_reduce = (op_desc->GetType() == HCOMALLREDUCE || op_desc->GetType() == HVDCALLBACKALLREDUCE);
  if (is_all_reduce && is_insert_bp_profiling_task) {
    int64_t log_id = 0;
    (void)ge::AttrUtils::GetInt(op_desc, ATTR_NAME_INSERT_PROFILILNG_TASK_LOG_ID, log_id);
    GELOGD("All reduce node profiling task log id: %ld after", log_id);
    (void) GenerateArProfilingTask(op_desc, log_id + 1, task_def_list);
    string op_name = string(kProfilingArNode) + std::to_string(log_id + 1);
    auto ar_desc_end = MakeShared<OpDesc>(op_name, PROFILINGTRAININGTRACE);
    GE_CHECK_NOTNULL(ar_desc_end);
    ar_desc_end->SetOpKernelLibName(kEngineNameRts);
    node_ptr = compute_graph->AddNode(ar_desc_end);
    GELOGD("Create all reduce end profiling node success after.");
  }
  // create bp node
  if (!is_all_reduce && is_insert_bp_profiling_task) {
    (void) GenerateBpProfilingTask(op_desc, task_def_list);
    auto bp_op_desc = MakeShared<OpDesc>(kProfilingBpNode, PROFILINGTRAININGTRACE);
    GE_CHECK_NOTNULL(bp_op_desc);
    bp_op_desc->SetOpKernelLibName(kEngineNameRts);
    node_ptr = compute_graph->AddNode(bp_op_desc);
    GELOGD("Create bp profiling node success after.");
  }
  // create end node
  bool is_insert_end_profiling_task = false;
  (void)ge::AttrUtils::GetBool(op_desc, ATTR_NAME_INSERT_END_PROFILILNG_TASK, is_insert_end_profiling_task);
  if (is_insert_end_profiling_task) {
    (void)GenerateEndProfilingTask(op_desc, task_def_list);
    auto end_desc = MakeShared<OpDesc>(kProfilingEndNode, PROFILINGTRAININGTRACE);
    GE_CHECK_NOTNULL(end_desc);
    end_desc->SetOpKernelLibName(kEngineNameRts);
    node_ptr = compute_graph->AddNode(end_desc);
    GELOGD("Create end profiling node success after.");
  }

  if (node_ptr != nullptr) {
    for (const auto &task_def : task_def_list) {
      hybrid_model_.task_defs_[node_ptr].emplace_back(task_def);
    }
    NodeItem *node_item = nullptr;
    GE_CHK_STATUS_RET_NOLOG(GetOrCreateNodeItem(node_ptr, &node_item));
    node_item->input_start = 0;
    node_item->output_start = 0;
    graph_item.node_items_.emplace_back(node_item);
  } else {
    GELOGD("No need to create profiling node after.");
  }

  return SUCCESS;
 }

 Status HybridModelBuilder::LoadDynamicSubgraph(ComputeGraph &graph, bool is_root_graph) {
  GELOGD("Start to load subgraph [%s]", graph.GetName().c_str());
  // for known partitioned call, load all nodes
@@ -1567,8 +1760,9 @@ Status HybridModelBuilder::LoadDynamicSubgraph(ComputeGraph &graph, bool is_root
      graph_item->output_node_ = node_item;
      GE_CHK_STATUS_RET_NOLOG(BuildOutputMapping(*graph_item, *node_item, is_root_graph));
    }

    GE_CHK_STATUS_RET_NOLOG(CreateProfilingNodeBefore(*graph_item, node));
    graph_item->node_items_.emplace_back(node_item);
    GE_CHK_STATUS_RET_NOLOG(CreateProfilingNodeAfter(*graph_item, node));
    // parse var outputs
    GE_CHK_STATUS_RET_NOLOG(ParseVarOutputs(*node_item));
    GELOGD("NodeItem created: %s", node_item->DebugString().c_str());
--- a/ge/hybrid/model/hybrid_model_builder.h
+++ b/ge/hybrid/model/hybrid_model_builder.h
@@ -79,6 +79,12 @@ class HybridModelBuilder {
  Status LoadKnownShapedSubgraph(ComputeGraph &graph, NodeItem *parent_node_item);
  Status RecoverGraphUnknownFlag();
  Status CheckAicpuOpList();
  Status CreateProfilingNodeBefore(GraphItem &graph_item, const NodePtr &node);
  Status CreateProfilingNodeAfter(GraphItem &graph_item, const NodePtr &node);
  Status GenerateFpProfilingTask(const OpDescPtr &op_desc, vector<domi::TaskDef> &task_def_list);
  Status GenerateBpProfilingTask(const OpDescPtr &op_desc, vector<domi::TaskDef> &task_def_list);
  Status GenerateEndProfilingTask(const OpDescPtr &op_desc, vector<domi::TaskDef> &task_def_list);
  Status GenerateArProfilingTask(const OpDescPtr &op_desc, int64_t log_id, vector<domi::TaskDef> &task_def_list);

  const char* GetGraphName() const {
    return hybrid_model_.model_name_.c_str();
--- a/ge/hybrid/node_executor/rts/rts_node_executor.cc
+++ b/ge/hybrid/node_executor/rts/rts_node_executor.cc
@@ -18,6 +18,7 @@
 #include "common/debug/log.h"
 #include "common/ge/ge_util.h"
 #include "graph/utils/tensor_utils.h"
 #include "hybrid/model/hybrid_model.h"
 #include "runtime/rt.h"

 namespace ge {
@@ -79,12 +80,44 @@ Status IdentityNNodeTask::ExecuteAsync(TaskContext &context, std::function<void(
  return SUCCESS;
 }

 Status ProfilingTraceNodeTask::UpdateArgs(TaskContext &context) {
  return SUCCESS;
 }

 Status ProfilingTraceNodeTask::ExecuteAsync(TaskContext &context, std::function<void()> done_callback) {
  for (const auto &task_def : task_defs_) {
    auto log_time_stamp_def = task_def.log_timestamp();
    uint64_t log_id = log_time_stamp_def.logid();
    bool notify = log_time_stamp_def.notify();
    uint32_t flat = log_time_stamp_def.flat();

    GELOGD("ProfilingTraceTask execute async start. logid = %lu, notify = %d.", log_id, notify);
    rtError_t rt_ret = rtProfilerTrace(log_id, notify, flat, context.GetStream());
    if (rt_ret != RT_ERROR_NONE) {
      GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
      return RT_ERROR_TO_GE_STATUS(rt_ret);
    }
    GELOGD("[%s] ProfilingTraceTask[%lu] execute success.", context.GetNodeName(), log_id);
  }

  return SUCCESS;
 };

 Status RtsNodeExecutor::LoadTask(const HybridModel &model, const NodePtr &node, shared_ptr<NodeTask> &task) const {
  GE_CHECK_NOTNULL(node);

  auto op_type = node->GetType();
  if (op_type == IDENTITY) {
    task = MakeShared<IdentityNodeTask>();
  } else if (op_type == IDENTITYN) {
    task = MakeShared<IdentityNNodeTask>();
  } else if (op_type == PROFILINGTRAININGTRACE) {
    auto *task_defs = model.GetTaskDefs(node);
    if (task_defs == nullptr || task_defs->empty()) {
      GELOGE(INTERNAL_ERROR, "Profiling node has no task to execute.");
      return INTERNAL_ERROR;
    }
    task = MakeShared<ProfilingTraceNodeTask>(*task_defs);
  } else {
    GELOGE(INTERNAL_ERROR, "[%s] Unsupported RTS op type: %s", node->GetName().c_str(), op_type.c_str());
    return INTERNAL_ERROR;
--- a/ge/hybrid/node_executor/rts/rts_node_executor.h
+++ b/ge/hybrid/node_executor/rts/rts_node_executor.h
@@ -18,6 +18,7 @@
 #define GE_HYBRID_NODE_EXECUTOR_RTS_RTS_NODE_EXECUTOR_H_

 #include "hybrid/node_executor/node_executor.h"
 #include "proto/task.pb.h"

 namespace ge {
 namespace hybrid {
@@ -35,6 +36,18 @@ class IdentityNNodeTask : public IdentityNodeTask {
  Status ExecuteAsync(TaskContext &context, std::function<void()> done_callback) override;
 };

 class ProfilingTraceNodeTask :  public NodeTask {
 public:
  explicit ProfilingTraceNodeTask(const std::vector<domi::TaskDef> &task_defs) : task_defs_(task_defs) {}
  ~ProfilingTraceNodeTask() override = default;

  Status UpdateArgs(TaskContext &context) override;
  Status ExecuteAsync(TaskContext &context, std::function<void()> done_callback) override;

 private:
  std::vector<domi::TaskDef> task_defs_;
 };

 class RtsNodeExecutor : public NodeExecutor {
 public:
  Status LoadTask(const HybridModel &model, const NodePtr &node, shared_ptr<NodeTask> &task) const override;
--- a/ge/hybrid/node_executor/task_context.h
+++ b/ge/hybrid/node_executor/task_context.h
@@ -123,7 +123,7 @@ class TaskContext {
  Status status_ = SUCCESS;
  std::vector<void *> workspaces_;
  uint64_t iteration_ = 0;
  uint32_t task_id_= 0;
  uint32_t task_id_ = 0;
  uint32_t stream_id_ = 0;
 };
 }  // namespace hybrid
--- a/ge/ir_build/ge_ir_build.cc
+++ b/ge/ir_build/ge_ir_build.cc
@@ -36,6 +36,9 @@
 #include "model/ge_model.h"
 #include "graph/shape_refiner.h"
 #include "graph/opsproto_manager.h"
 #include "inc/pass_manager.h"
 #include "graph/passes/net_output_pass.h"
 #include "graph/passes/data_pass.h"

 using std::string;
 using namespace std;
@@ -233,6 +236,7 @@ class Impl {
                         ModelBufferData &ge_models);
  graphStatus InitDomiOmgContext(const string &input_shape, const string &input_format, const string &net_format,
                                 bool is_dynamic_input);
  static graphStatus InferShapePrepare(const ComputeGraphPtr &compute_graph);
  void SetRtSocVersion();
  void UpdateThreadContext();
  void LoadOpsProto();
@@ -243,6 +247,22 @@ class Impl {
  OmgContext omg_context_;
 };

 graphStatus Impl::InferShapePrepare(const ComputeGraphPtr &compute_graph) {
  GE_CHECK_NOTNULL(compute_graph);

  PassManager prepare_infershape;
  prepare_infershape.AddPass("PrepareNetoutput", new(std::nothrow) NetOutputPass);
  prepare_infershape.AddPass("PrepareSubGraphReflection", new (std::nothrow) DataPass);

  auto ret = prepare_infershape.Run(compute_graph);
  if ((ret != SUCCESS) && (ret != NOT_CHANGED)) {
    GELOGE(ret, "Prepair for infershape failed, ret:%d", ret);
    return ret;
  }
  GELOGD("Prepair for infershape success!");
  return GRAPH_SUCCESS;
 }

 graphStatus Impl::UpdateDataOpAttr(const Graph &graph) {
  GELOGD("Enter Update Data Attr Process!");
  if (options_.find(kInputShape) == options_.end()) {
@@ -591,7 +611,12 @@ graphStatus aclgrphInferShapeAndType(ge::Graph &graph) {
    return GRAPH_PARAM_INVALID;
  }

  auto ret = compute_graph->TopologicalSorting();
  auto ret = Impl::InferShapePrepare(compute_graph);
  if (ret != GRAPH_SUCCESS) {
    return ret;
  }

  ret = compute_graph->TopologicalSorting();
  if (ret != GRAPH_SUCCESS) {
    GELOGE(ret, "Acl topo logical sort failed.");
    return ret;
--- a/ge/offline/keep_dtype_option.cc
+++ b/ge/offline/keep_dtype_option.cc
@@ -42,21 +42,29 @@ bool IsOriginalOpFind(OpDescPtr &op_desc, const std::string &op_name) {
 }

 void KeepDtypeReportError(const std::vector<std::string> &invalid_list) {
  std::stringstream error_ops;
  for (size_t i = 0; i < invalid_list.size(); i++) {
  std::stringstream err_msg;
  size_t list_size = invalid_list.size();
  err_msg << "config file contains " << list_size;
  if (list_size == 1) {
    err_msg << " operator not in the graph, op name:";
  } else {
    err_msg << " operators not in the graph, op names:";
  }

  for (size_t i = 0; i < list_size; i++) {
    if (i == kMaxOpsNum) {
      error_ops << "...";
      err_msg << "..";
      break;
    }
    error_ops << invalid_list[i] << " ";
    err_msg << invalid_list[i];
    if (i != list_size - 1) {
      err_msg << " ";
    }
  }
  std::string err_msg = "config file contains ";
  err_msg = err_msg.append(std::to_string(invalid_list.size()))
                   .append(" operators not in the graph, op names:")
                   .append(error_ops.str());

  ErrorManager::GetInstance().ATCReportErrMessage(
      "E10042", {"parameter", "reason"}, {"keep_dtype", err_msg.c_str()});
  GELOGE(FAILED, "%s", err_msg.c_str());
      "E10042", {"parameter", "reason"}, {"keep_dtype", err_msg.str().c_str()});
  GELOGE(FAILED, "%s", err_msg.str().c_str());
 }

 Status DealKeepDtypeOption(const ComputeGraphPtr &graph, const std::string &keep_dtype) {
@@ -96,6 +104,7 @@ Status DealKeepDtypeOption(const ComputeGraphPtr &graph, const std::string &keep
      invalid_list.push_back(op_name);
    }
  }
  ifs.close();

  if (!invalid_list.empty()) {
    KeepDtypeReportError(invalid_list);
--- a/ge/offline/main.cc
+++ b/ge/offline/main.cc
@@ -994,6 +994,8 @@ domi::Status GenerateModel(std::map<string, string> &options, std::string output

  Status ret = ge::DealKeepDtypeOption(ge::GraphUtils::GetComputeGraph(graph), FLAGS_keep_dtype);
  if (ret != SUCCESS) {
    (void)ge_generator.Finalize();
    (void)ge::GELib::GetInstance()->Finalize();
    return ret;
  }

--- a/inc/external/ge/ge_api_types.h
+++ b/inc/external/ge/ge_api_types.h
@@ -61,6 +61,11 @@ const char *const OPTION_EXEC_HCCL_FLAG = "ge.exec.hcclFlag";
 const char *const OPTION_EXEC_ATOMIC_FLAG = "ge.exec.enable_atomic";
 const char *const OPTION_EXEC_DISABLE_REUSED_MEMORY = "ge.exec.disableReuseMemory";
 const char *const OPTION_EXEC_ENABLE_TAILING_OPTIMIZATION = "ge.exec.isTailingOptimization";
 // Dynamic input flag. ge.exec.dynamicInput=1, means enable dynaimc input,
 // ge.exec.dynamicGraphExecuteMode, dynamic_execute[default]
 const char *const OPTION_EXEC_DYNAMIC_INPUT = "ge.exec.dynamicInput";
 const char *const OPTION_EXEC_DYNAMIC_EXECUTE_MODE = "ge.exec.dynamicGraphExecuteMode";
 const char *const OPTION_EXEC_DATA_INPUTS_SHAPE_RANGE = "ge.exec.dataInputsShapeRange";

 // Option key: memory init
 const char *const GRAPH_MEMORY_MAX_SIZE = "ge.graphMemoryMaxSize";
--- a/inc/framework/common/ge_types.h
+++ b/inc/framework/common/ge_types.h
@@ -73,14 +73,15 @@ struct DataBuffer {
 /// @brief External input data
 ///
 struct InputData {
  uint32_t index;                 // Index of input data
  uint32_t timestamp;             // Data creation time
  uint32_t timeout;               // Processing timeout
  uint32_t model_id;              // Model ID required for data processing
  uint64_t request_id = 0;        // Request ID
  std::vector<DataBuffer> blobs;  // Actual input data, currently only supports one input
  bool is_dynamic_batch = false;  // Whether is dynamic batch size scene, default:false
  std::string batch_label;        // Gear used for current inference in dynamic batch scene
  uint32_t index;                            // Index of input data
  uint32_t timestamp;                        // Data creation time
  uint32_t timeout;                          // Processing timeout
  uint32_t model_id;                         // Model ID required for data processing
  uint64_t request_id = 0;                   // Request ID
  std::vector<DataBuffer> blobs;             // Actual input data, currently only supports one input
  bool is_dynamic_batch = false;             // Whether is dynamic batch size scene, default:false
  std::string batch_label;                   // Gear used for current inference in dynamic batch scene
  std::vector<std::vector<int64_t>> shapes;  // Input shapes
 };

 /// Output result structure definition
@@ -263,6 +264,8 @@ struct ComputeGraphDescInfo {
  std::vector<Format> output_format;
  std::vector<std::vector<int64_t>> output_shape;
  std::vector<DataType> output_data_type;
  uint32_t task_id;
  uint32_t stream_id;
 };

 struct OpDescInfo {
--- a/inc/framework/common/types.h
+++ b/inc/framework/common/types.h
@@ -529,6 +529,9 @@ REGISTER_OPTYPE_DECLARE(HVDWAIT, "HorovodWait");
 // aicpu op for online_infer dynamic_dims
 REGISTER_OPTYPE_DECLARE(GETDYNAMICDIMS, "GetDynamicDims");

 // profiling training trace node
 REGISTER_OPTYPE_DECLARE(PROFILINGTRAININGTRACE, "ProfilingTrainingTrace");

 enum InputMode { INPUT = 0, CONST_INPUT };

 // Definition of the processing status enum of the process module
--- a/inc/framework/executor/ge_executor.h
+++ b/inc/framework/executor/ge_executor.h
@@ -157,9 +157,6 @@ class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY GeExecutor {

  ge::Status GetAippType(uint32_t model_id, uint32_t index, InputAippType &type, size_t &aipp_index);

  ge::Status GetModelDescInfoForZeroCopy(uint32_t model_id, std::vector<ge::TensorDesc> &input_desc,
                                         std::vector<ge::TensorDesc> &output_desc);

  ge::Status CommandHandle(const ge::Command &command);

  ge::Status SetDump(const DumpConfig &dump_config);
--- a/inc/framework/omg/omg_inner_types.h
+++ b/inc/framework/omg/omg_inner_types.h
@@ -26,6 +26,7 @@
 #include <vector>
 #include "framework/common/fmk_error_codes.h"
 #include "register/register_fmk_types.h"
 #include "graph/node.h"

 using domi::DOMI_TENSOR_ND;
 using domi::DOMI_TENSOR_RESERVED;
@@ -120,6 +121,8 @@ struct OmgContext {
  std::vector<std::vector<int64_t>> user_real_input_dims;
  std::vector<int64_t> cur_dynamic_dims;
  bool need_multi_batch = false;
  std::vector<NodePtr> data_nodes;
  std::vector<NodePtr> getnext_nosink_nodes;
 };
 }  // namespace ge

--- a/+ 1
+++ b/+ 1
@@ -1 +1 @@
 Subproject commit 11c6cf2921b6a385616a3ebc601b4431b55b07db
 Subproject commit fe37bc343ea52c76d35e9e9ec83cea0151bfa900
--- a/+ 1
+++ b/+ 1
@@ -1 +1 @@
 Subproject commit 99437c39d26624a14060307366a96b79b1d439c3
 Subproject commit 336cd3107253d3fe41cfb9fec2db62b5f3d8a33b
--- a/tests/ut/ge/CMakeLists.txt
+++ b/tests/ut/ge/CMakeLists.txt
@@ -121,6 +121,7 @@ set(COMMON_SRC_FILES
    "${GE_CODE_DIR}/metadef/graph/opsproto/opsproto_manager.cc"
    "${GE_CODE_DIR}/metadef/ops/op_imp.cpp"
    "${GE_CODE_DIR}/metadef/register/register.cpp"
    "${GE_CODE_DIR}/metadef/register/register_pass.cpp"
    "${GE_CODE_DIR}/metadef/register/op_kernel_registry.cpp"
    "${GE_CODE_DIR}/metadef/register/auto_mapping_util.cpp"
    "${GE_CODE_DIR}/metadef/register/tensor_assign.cpp"
@@ -626,6 +627,7 @@ set(PASS_TEST_FILES
    "graph/passes/net_output_pass_unittest.cc"
    "graph/passes/no_use_reshape_remove_pass_unittest.cc"
    "graph/passes/infershape_pass_unittest.cc"
 	"graph/passes/multi_batch_clone_pass_unittest.cc"
 )

 set(KERNEL_TEST_FILES
--- a/tests/ut/ge/graph/load/davinci_model_unittest.cc
+++ b/tests/ut/ge/graph/load/davinci_model_unittest.cc
@@ -32,6 +32,18 @@ class UtestDavinciModel : public testing::Test {
  void SetUp() {}

  void TearDown() {}
  public:
    NodePtr MakeNode(const ComputeGraphPtr &graph, uint32_t in_num, uint32_t out_num, string name, string type) {
      GeTensorDesc test_desc(GeShape(), FORMAT_NCHW, DT_FLOAT);
      auto op_desc = std::make_shared<OpDesc>(name, type);
      for (auto i = 0; i < in_num; ++i) {
        op_desc->AddInputDesc(test_desc);
      }
      for (auto i = 0; i < out_num; ++i) {
        op_desc->AddOutputDesc(test_desc);
      }
      return graph->AddNode(op_desc);
    }
 };

 TEST_F(UtestDavinciModel, init_success) {
@@ -127,13 +139,14 @@ TEST_F(UtestDavinciModel, init_data_op) {
  model.runtime_param_.mem_size = 5120000;
  ComputeGraphPtr graph = make_shared<ComputeGraph>("default");

  OpDescPtr op_input = CreateOpDesc("data", DATA);
  GeTensorDesc tensor(GeShape(), FORMAT_NCHW, DT_FLOAT);
  TensorUtils::SetSize(tensor, 512);

  OpDescPtr op_input = CreateOpDesc("data", DATA);
  op_input->AddInputDesc(tensor);
  op_input->AddOutputDesc(tensor);
  op_input->SetInputOffset({1024});
  op_input->SetOutputOffset({5120});
  op_input->SetOutputOffset({1024});
  NodePtr node_input = graph->AddNode(op_input);

  OpDescPtr op_output = CreateOpDesc("output", NETOUTPUT);
@@ -156,12 +169,14 @@ TEST_F(UtestDavinciModel, init_data_op_subgraph) {
  model.runtime_param_.mem_size = 5120000;
  ComputeGraphPtr graph = make_shared<ComputeGraph>("default");

  OpDescPtr op_input = CreateOpDesc("data", DATA);
  GeTensorDesc tensor(GeShape(), FORMAT_NCHW, DT_FLOAT);
  TensorUtils::SetSize(tensor, 512);

  OpDescPtr op_input = CreateOpDesc("data", DATA);
  op_input->AddInputDesc(tensor);
  op_input->AddOutputDesc(tensor);
  op_input->SetInputOffset({1024});
  op_input->SetOutputOffset({5120});
  op_input->SetOutputOffset({1024});
  NodePtr node = graph->AddNode(op_input);

  uint32_t data_op_index = 0;
@@ -180,8 +195,10 @@ TEST_F(UtestDavinciModel, init_netoutput_op_subgraph) {
  model.runtime_param_.mem_size = 5120000;
  ComputeGraphPtr graph = make_shared<ComputeGraph>("default");

  OpDescPtr op_output = CreateOpDesc("output", NETOUTPUT);
  GeTensorDesc tensor(GeShape(), FORMAT_NCHW, DT_FLOAT);
  TensorUtils::SetSize(tensor, 512);

  OpDescPtr op_output = CreateOpDesc("output", NETOUTPUT);
  op_output->AddInputDesc(tensor);
  op_output->SetInputOffset({1024});
  op_output->SetSrcName( { "data" } );
@@ -324,5 +341,422 @@ TEST_F(UtestDavinciModel, SyncVarData_test) {
  EXPECT_NE(model.SyncVarData(), SUCCESS);
 }

 TEST_F(UtestDavinciModel, InitRealSizeAndShapeInfo_succ1) {
  DavinciModel model(0, nullptr);
  model.ge_model_ = make_shared<GeModel>();
  ComputeGraphPtr graph = make_shared<ComputeGraph>("default");

  GeTensorDesc tensor(GeShape(), FORMAT_NCHW, DT_FLOAT);
  OpDescPtr op_output = CreateOpDesc("output_ascend_mbatch_batch_1", NETOUTPUT);
  op_output->AddInputDesc(tensor);
  op_output->SetInputOffset({1024});
  NodePtr node_output = graph->AddNode(op_output);
  EXPECT_EQ(model.InitRealSizeAndShapeInfo(graph, node_output), SUCCESS);
 }

 TEST_F(UtestDavinciModel, InitRealSizeAndShapeInfo_succ2) {
  DavinciModel model(0, nullptr);
  ComputeGraphPtr graph = std::make_shared<ComputeGraph>("test_graph");

  OpDescPtr data1 = CreateOpDesc("data1", DATA);
  GeTensorDesc shape_desc(GeShape({4,3,224,224}), FORMAT_NCHW, DT_FLOAT);
  data1->AddInputDesc(shape_desc);
  data1->AddOutputDesc(shape_desc);
  NodePtr data1_node = graph->AddNode(data1);

  OpDescPtr case_node = CreateOpDesc("case1", CASE);
  GeTensorDesc tensor(GeShape(), FORMAT_NCHW, DT_FLOAT);
  case_node->AddInputDesc(tensor);
  case_node->AddOutputDesc(tensor);
  NodePtr case1_node = graph->AddNode(case_node);

  OpDescPtr output = CreateOpDesc("output1", NETOUTPUT);
  output->AddInputDesc(tensor);
  output->SetSrcName( { "case1" } );
  output->SetSrcIndex( { 0 } );
  NodePtr output_node = graph->AddNode(output);

  GraphUtils::AddEdge(data1_node->GetOutDataAnchor(0), case1_node->GetInDataAnchor(0));
  GraphUtils::AddEdge(case1_node->GetOutDataAnchor(0), output_node->GetInDataAnchor(0));
  
  (void)AttrUtils::SetStr(output_node->GetOpDesc(), ATTR_ALL_GEARS_INFO, "1;2;4;8");
  (void)AttrUtils::SetBool(case_node, ATTR_INSERT_BY_MBATCH, true);

  model.is_getnext_sink_dynamic_ = false;
  model.is_online_infer_dynamic_ = true;
  auto ret = model.InitRealSizeAndShapeInfo(graph, output_node);
  // GetGearAndRealOutShapeInfo without ATTR_NAME_DYNAMIC_OUTPUT_DIMS
  EXPECT_EQ(ret, SUCCESS);
  vector<string> dynamic_output_dims = {"0,0,1,1,0,2,2,0,4,3,0,8"};
  (void)AttrUtils::SetListStr(output_node->GetOpDesc(), ATTR_NAME_DYNAMIC_OUTPUT_DIMS, dynamic_output_dims);
  ret = model.InitRealSizeAndShapeInfo(graph, output_node);
  EXPECT_EQ(ret, SUCCESS);
 }

 TEST_F(UtestDavinciModel, InitRealSizeAndShapeInfo_succ3) {
  DavinciModel model(0, nullptr);
  ComputeGraphPtr graph = std::make_shared<ComputeGraph>("test_graph");

  OpDescPtr data1 = CreateOpDesc("data1", DATA);
  GeTensorDesc shape_desc(GeShape({4,3,224,224}), FORMAT_NCHW, DT_FLOAT);
  data1->AddInputDesc(shape_desc);
  data1->AddOutputDesc(shape_desc);
  NodePtr data1_node = graph->AddNode(data1);

  OpDescPtr shape_node = CreateOpDesc("ascend_mbatch_get_dynamic_dims_node", GETDYNAMICDIMS);
  GeTensorDesc in_tensor(GeShape(), FORMAT_NCHW, DT_FLOAT);
  GeTensorDesc out_tensor(GeShape({4,3}), FORMAT_NCHW, DT_FLOAT);
  shape_node->AddInputDesc(in_tensor);
  shape_node->AddOutputDesc(out_tensor);
  NodePtr get_dynamic_dims_node = graph->AddNode(shape_node);

  OpDescPtr output = CreateOpDesc("output1", NETOUTPUT);
  GeTensorDesc tensor(GeShape(), FORMAT_NCHW, DT_FLOAT);
  output->AddInputDesc(tensor);
  output->SetSrcName( { "data1", "ascend_mbatch_get_dynamic_dims_node" } );
  output->SetSrcIndex( { 0, 1 } );
  NodePtr output_node = graph->AddNode(output);
  GraphUtils::AddEdge(data1_node->GetOutDataAnchor(0), output_node->GetInDataAnchor(0));
  GraphUtils::AddEdge(get_dynamic_dims_node->GetOutDataAnchor(0), output_node->GetInDataAnchor(1));

  (void)AttrUtils::SetStr(output_node->GetOpDesc(), ATTR_ALL_GEARS_INFO, "1,3;;4,3;,3");

  model.is_getnext_sink_dynamic_ = true;
  model.is_online_infer_dynamic_ = false;
  auto ret = model.InitRealSizeAndShapeInfo(graph, output_node);
  EXPECT_EQ(ret, SUCCESS);
  model.runtime_param_.mem_base = (uint8_t *)0x08000000;
  model.runtime_param_.mem_size = 4;
  ret = model.InitRealSizeAndShapeInfo(graph, output_node);
  EXPECT_EQ(ret, SUCCESS);
 }

 TEST_F(UtestDavinciModel, init_data_aipp_info) {
  DavinciModel model(0, nullptr);
  model.ge_model_ = make_shared<GeModel>();   // for CustAICPUKernelStore::GetCustAICPUKernelStore()
  model.runtime_param_.mem_base = (uint8_t *)0x08000000;
  model.runtime_param_.mem_size = 5120000;
  ComputeGraphPtr graph = make_shared<ComputeGraph>("default");

  GeTensorDesc tensor(GeShape(), FORMAT_NCHW, DT_FLOAT);
  TensorUtils::SetSize(tensor, 512);

  OpDescPtr op_desc = CreateOpDesc("data", DATA);
  op_desc->AddInputDesc(tensor);
  op_desc->AddOutputDesc(tensor);
  op_desc->SetInputOffset({1024});
  op_desc->SetOutputOffset({1024});
  NodePtr node = graph->AddNode(op_desc);

  GeAttrValue::NAMED_ATTRS aipp_attr;
  aipp_attr.SetAttr("aipp_mode", GeAttrValue::CreateFrom<GeAttrValue::INT>(domi::AippOpParams::dynamic));
  aipp_attr.SetAttr("related_input_rank", GeAttrValue::CreateFrom<GeAttrValue::INT>(0));
  aipp_attr.SetAttr("max_src_image_size", GeAttrValue::CreateFrom<GeAttrValue::INT>(2048));
  aipp_attr.SetAttr("support_rotation", GeAttrValue::CreateFrom<GeAttrValue::INT>(1));
  EXPECT_TRUE(AttrUtils::SetNamedAttrs(op_desc, ATTR_NAME_AIPP, aipp_attr));

  AippConfigInfo aipp_info;
  EXPECT_EQ(model.GetAippInfo(0, aipp_info), ACL_ERROR_GE_AIPP_NOT_EXIST);
  EXPECT_EQ(model.InitNodes(graph), SUCCESS);
  EXPECT_EQ(model.GetAippInfo(0, aipp_info), SUCCESS);
  EXPECT_EQ(aipp_info.aipp_mode, domi::AippOpParams::dynamic);

  EXPECT_EQ(model.input_addrs_list_.size(), 1);
  EXPECT_EQ(model.output_addrs_list_.size(), 0);
  EXPECT_EQ(model.op_list_.size(), 1);
 }

 TEST_F(UtestDavinciModel, init_data_aipp_static) {
  DavinciModel model(0, nullptr);
  model.ge_model_ = make_shared<GeModel>();   // for CustAICPUKernelStore::GetCustAICPUKernelStore()
  model.runtime_param_.mem_base = (uint8_t *)0x08000000;
  model.runtime_param_.mem_size = 5120000;
  ComputeGraphPtr graph = make_shared<ComputeGraph>("default");

  GeTensorDesc tensor(GeShape(), FORMAT_NCHW, DT_FLOAT);
  TensorUtils::SetSize(tensor, 512);

  OpDescPtr op_desc = CreateOpDesc("data", DATA);
  op_desc->AddInputDesc(tensor);
  op_desc->AddOutputDesc(tensor);
  op_desc->SetInputOffset({1024});
  op_desc->SetOutputOffset({1024});
  NodePtr node = graph->AddNode(op_desc);

  AttrUtils::SetStr(op_desc, ATTR_DATA_RELATED_AIPP_MODE, "static_aipp");

  InputAippType aipp_type;
  size_t aipp_index = 0;
  EXPECT_EQ(model.GetAippType(0, aipp_type, aipp_index), SUCCESS);
  EXPECT_EQ(model.InitNodes(graph), SUCCESS);
  EXPECT_EQ(model.GetAippType(0, aipp_type, aipp_index), SUCCESS);
  EXPECT_EQ(aipp_type, DATA_WITH_STATIC_AIPP);
  EXPECT_EQ(aipp_index, 0xFFFFFFFFu);

  EXPECT_EQ(model.input_addrs_list_.size(), 1);
  EXPECT_EQ(model.output_addrs_list_.size(), 0);
  EXPECT_EQ(model.op_list_.size(), 1);
 }

 TEST_F(UtestDavinciModel, init_data_aipp_dynamic) {
  DavinciModel model(0, nullptr);
  model.ge_model_ = make_shared<GeModel>();   // for CustAICPUKernelStore::GetCustAICPUKernelStore()
  model.runtime_param_.mem_base = (uint8_t *)0x08000000;
  model.runtime_param_.mem_size = 5120000;
  ComputeGraphPtr graph = make_shared<ComputeGraph>("default");

  GeTensorDesc tensor(GeShape(), FORMAT_NCHW, DT_FLOAT);
  TensorUtils::SetSize(tensor, 512);

  OpDescPtr op_desc = CreateOpDesc("data", DATA);
  op_desc->AddInputDesc(tensor);
  op_desc->AddOutputDesc(tensor);
  op_desc->SetInputOffset({1024});
  op_desc->SetOutputOffset({1024});
  NodePtr node = graph->AddNode(op_desc);   // op_index 0
  AttrUtils::SetStr(op_desc, ATTR_DATA_RELATED_AIPP_MODE, "dynamic_aipp");
  AttrUtils::SetStr(op_desc, ATTR_DATA_AIPP_DATA_NAME_MAP, "releated_aipp");

  InputAippType aipp_type;
  size_t aipp_index = 0;
  EXPECT_EQ(model.GetAippType(0, aipp_type, aipp_index), SUCCESS);
  EXPECT_EQ(model.InitNodes(graph), SUCCESS);
  EXPECT_EQ(model.GetAippType(0, aipp_type, aipp_index), SUCCESS);

  EXPECT_EQ(model.input_addrs_list_.size(), 1);
  EXPECT_EQ(model.output_addrs_list_.size(), 0);
  EXPECT_EQ(model.op_list_.size(), 1);
 }

 TEST_F(UtestDavinciModel, init_data_aipp_releated) {
  DavinciModel model(0, nullptr);
  model.ge_model_ = make_shared<GeModel>();   // for CustAICPUKernelStore::GetCustAICPUKernelStore()
  model.runtime_param_.mem_base = (uint8_t *)0x08000000;
  model.runtime_param_.mem_size = 5120000;
  ComputeGraphPtr graph = make_shared<ComputeGraph>("default");

  GeTensorDesc tensor(GeShape(), FORMAT_NCHW, DT_FLOAT);
  TensorUtils::SetSize(tensor, 512);

  {
    OpDescPtr op_desc = CreateOpDesc("data", DATA);
    op_desc->AddInputDesc(tensor);
    op_desc->AddOutputDesc(tensor);
    op_desc->SetInputOffset({1024});
    op_desc->SetOutputOffset({1024});
    NodePtr node = graph->AddNode(op_desc);   // op_index 0
    AttrUtils::SetStr(op_desc, ATTR_DATA_RELATED_AIPP_MODE, "dynamic_aipp");
    AttrUtils::SetStr(op_desc, ATTR_DATA_AIPP_DATA_NAME_MAP, "releated_aipp");
  }
  {
    OpDescPtr op_desc = CreateOpDesc("releated_aipp", DATA);
    op_desc->AddInputDesc(tensor);
    op_desc->AddOutputDesc(tensor);
    op_desc->SetInputOffset({1024});
    op_desc->SetOutputOffset({1024});
    NodePtr node = graph->AddNode(op_desc);   // op_index 1
  }

  InputAippType aipp_type;
  size_t aipp_index = 0;
  EXPECT_EQ(model.GetAippType(0, aipp_type, aipp_index), SUCCESS);
  EXPECT_EQ(model.InitNodes(graph), SUCCESS);
  EXPECT_EQ(model.GetAippType(0, aipp_type, aipp_index), SUCCESS);
  EXPECT_EQ(aipp_type, DATA_WITH_DYNAMIC_AIPP);
  EXPECT_EQ(aipp_index, 1);

  EXPECT_EQ(model.input_addrs_list_.size(), 2);
  EXPECT_EQ(model.output_addrs_list_.size(), 0);
  EXPECT_EQ(model.op_list_.size(), 2);
 }

 TEST_F(UtestDavinciModel, init_data_aipp_dynamic_conf) {
  DavinciModel model(0, nullptr);
  model.ge_model_ = make_shared<GeModel>();   // for CustAICPUKernelStore::GetCustAICPUKernelStore()
  model.runtime_param_.mem_base = (uint8_t *)0x08000000;
  model.runtime_param_.mem_size = 5120000;
  ComputeGraphPtr graph = make_shared<ComputeGraph>("default");

  GeTensorDesc tensor(GeShape(), FORMAT_NCHW, DT_FLOAT);
  TensorUtils::SetSize(tensor, 512);

  OpDescPtr op_desc = CreateOpDesc("data", DATA);
  op_desc->AddInputDesc(tensor);
  op_desc->AddOutputDesc(tensor);
  op_desc->SetInputOffset({1024});
  op_desc->SetOutputOffset({1024});
  NodePtr node = graph->AddNode(op_desc);   // op_index 0
  AttrUtils::SetStr(op_desc, ATTR_DATA_RELATED_AIPP_MODE, "dynamic_aipp_conf");

  InputAippType aipp_type;
  size_t aipp_index = 0;
  EXPECT_EQ(model.GetAippType(0, aipp_type, aipp_index), SUCCESS);
  EXPECT_EQ(model.InitNodes(graph), SUCCESS);
  EXPECT_EQ(model.GetAippType(0, aipp_type, aipp_index), SUCCESS);
  EXPECT_EQ(aipp_type, DYNAMIC_AIPP_NODE);
  EXPECT_EQ(aipp_index, 0xFFFFFFFFU);

  EXPECT_EQ(model.input_addrs_list_.size(), 1);
  EXPECT_EQ(model.output_addrs_list_.size(), 0);
  EXPECT_EQ(model.op_list_.size(), 1);
 }

 TEST_F(UtestDavinciModel, init_data_aipp_dynamic_invalid) {
  DavinciModel model(0, nullptr);
  model.ge_model_ = make_shared<GeModel>();   // for CustAICPUKernelStore::GetCustAICPUKernelStore()
  model.runtime_param_.mem_base = (uint8_t *)0x08000000;
  model.runtime_param_.mem_size = 5120000;
  ComputeGraphPtr graph = make_shared<ComputeGraph>("default");

  GeTensorDesc tensor(GeShape(), FORMAT_NCHW, DT_FLOAT);
  TensorUtils::SetSize(tensor, 512);

  OpDescPtr op_desc = CreateOpDesc("data", DATA);
  op_desc->AddInputDesc(tensor);
  op_desc->AddOutputDesc(tensor);
  op_desc->SetInputOffset({1024});
  op_desc->SetOutputOffset({1024});
  NodePtr node = graph->AddNode(op_desc);   // op_index 0
  AttrUtils::SetStr(op_desc, ATTR_DATA_RELATED_AIPP_MODE, "dynamic_aipp_invalid");

  InputAippType aipp_type;
  size_t aipp_index = 0;
  EXPECT_EQ(model.GetAippType(0, aipp_type, aipp_index), SUCCESS);
  EXPECT_EQ(model.InitNodes(graph), ACL_ERROR_GE_AIPP_MODE_INVALID);

  EXPECT_EQ(model.input_addrs_list_.size(), 1);
  EXPECT_EQ(model.output_addrs_list_.size(), 0);
  EXPECT_EQ(model.op_list_.size(), 1);
 }

 TEST_F(UtestDavinciModel, init_data_aipp_input_info_empty) {
  DavinciModel model(0, nullptr);
  model.ge_model_ = make_shared<GeModel>();   // for CustAICPUKernelStore::GetCustAICPUKernelStore()
  model.runtime_param_.mem_base = (uint8_t *)0x08000000;
  model.runtime_param_.mem_size = 5120000;
  ComputeGraphPtr graph = make_shared<ComputeGraph>("default");

  GeTensorDesc tensor(GeShape(), FORMAT_NCHW, DT_FLOAT);
  TensorUtils::SetSize(tensor, 512);

  OpDescPtr op_desc = CreateOpDesc("data", DATA);
  op_desc->AddInputDesc(tensor);
  op_desc->AddOutputDesc(tensor);
  op_desc->SetInputOffset({1024});
  op_desc->SetOutputOffset({1024});
  NodePtr node = graph->AddNode(op_desc);   // op_index 0

  vector<string> inputs = {};
  AttrUtils::SetListStr(op_desc, ATTR_NAME_AIPP_INPUTS, inputs);
  vector<string> outputs = {};
  AttrUtils::SetListStr(op_desc, ATTR_NAME_AIPP_OUTPUTS, outputs);

  OriginInputInfo orig_input_info;
  EXPECT_EQ(model.GetOrigInputInfo(0, orig_input_info), ACL_ERROR_GE_AIPP_NOT_EXIST);
  EXPECT_EQ(model.InitNodes(graph), SUCCESS);
  EXPECT_EQ(model.GetOrigInputInfo(0, orig_input_info), SUCCESS);

  EXPECT_EQ(model.input_addrs_list_.size(), 1);
  EXPECT_EQ(model.output_addrs_list_.size(), 0);
  EXPECT_EQ(model.op_list_.size(), 1);
 }

 TEST_F(UtestDavinciModel, init_data_aipp_input_info_normal) {
  DavinciModel model(0, nullptr);
  model.ge_model_ = make_shared<GeModel>();   // for CustAICPUKernelStore::GetCustAICPUKernelStore()
  model.runtime_param_.mem_base = (uint8_t *)0x08000000;
  model.runtime_param_.mem_size = 5120000;
  ComputeGraphPtr graph = make_shared<ComputeGraph>("default");

  GeTensorDesc tensor(GeShape(), FORMAT_NCHW, DT_FLOAT);
  TensorUtils::SetSize(tensor, 512);

  OpDescPtr op_desc = CreateOpDesc("data", DATA);
  op_desc->AddInputDesc(tensor);
  op_desc->AddOutputDesc(tensor);
  op_desc->SetInputOffset({1024});
  op_desc->SetOutputOffset({1024});
  NodePtr node = graph->AddNode(op_desc);   // op_index 0

  vector<string> inputs = { "NCHW:DT_FLOAT:TensorName:TensorSize:3:1,2,8" };
  AttrUtils::SetListStr(op_desc, ATTR_NAME_AIPP_INPUTS, inputs);
  vector<string> outputs = { "NCHW:DT_FLOAT:TensorName:TensorSize:3:1,2,8" };
  AttrUtils::SetListStr(op_desc, ATTR_NAME_AIPP_OUTPUTS, outputs);

  OriginInputInfo orig_input_info;
  EXPECT_EQ(model.GetOrigInputInfo(0, orig_input_info), ACL_ERROR_GE_AIPP_NOT_EXIST);
  EXPECT_EQ(model.InitNodes(graph), SUCCESS);
  EXPECT_EQ(model.GetOrigInputInfo(0, orig_input_info), SUCCESS);

  EXPECT_EQ(model.input_addrs_list_.size(), 1);
  EXPECT_EQ(model.output_addrs_list_.size(), 0);
  EXPECT_EQ(model.op_list_.size(), 1);
 }

 TEST_F(UtestDavinciModel, init_data_aipp_input_info_invalid) {
  DavinciModel model(0, nullptr);
  model.ge_model_ = make_shared<GeModel>();   // for CustAICPUKernelStore::GetCustAICPUKernelStore()
  model.runtime_param_.mem_base = (uint8_t *)0x08000000;
  model.runtime_param_.mem_size = 5120000;
  ComputeGraphPtr graph = make_shared<ComputeGraph>("default");

  GeTensorDesc tensor(GeShape(), FORMAT_NCHW, DT_FLOAT);
  TensorUtils::SetSize(tensor, 512);

  OpDescPtr op_desc = CreateOpDesc("data", DATA);
  op_desc->AddInputDesc(tensor);
  op_desc->AddOutputDesc(tensor);
  op_desc->SetInputOffset({1024});
  op_desc->SetOutputOffset({1024});
  NodePtr node = graph->AddNode(op_desc);   // op_index 0

  vector<string> inputs = { "NCHW:DT_FLOAT:TensorName" };     // Invalid
  AttrUtils::SetListStr(op_desc, ATTR_NAME_AIPP_INPUTS, inputs);
  vector<string> outputs = { "NCHW:DT_FLOAT:TensorName:TensorSize:3:1,2,8" };
  AttrUtils::SetListStr(op_desc, ATTR_NAME_AIPP_OUTPUTS, outputs);

  OriginInputInfo orig_input_info;
  EXPECT_EQ(model.GetOrigInputInfo(0, orig_input_info), ACL_ERROR_GE_AIPP_NOT_EXIST);
  EXPECT_EQ(model.InitNodes(graph), ACL_ERROR_GE_AIPP_MODE_INVALID);
  EXPECT_EQ(model.GetOrigInputInfo(0, orig_input_info), ACL_ERROR_GE_AIPP_NOT_EXIST);

  EXPECT_EQ(model.input_addrs_list_.size(), 1);
  EXPECT_EQ(model.output_addrs_list_.size(), 0);
  EXPECT_EQ(model.op_list_.size(), 1);
 }

 TEST_F(UtestDavinciModel, init_data_aipp_input_dims_normal) {
  DavinciModel model(0, nullptr);
  model.ge_model_ = make_shared<GeModel>();   // for CustAICPUKernelStore::GetCustAICPUKernelStore()
  model.runtime_param_.mem_base = (uint8_t *)0x08000000;
  model.runtime_param_.mem_size = 5120000;
  ComputeGraphPtr graph = make_shared<ComputeGraph>("default");

  GeTensorDesc tensor(GeShape(), FORMAT_NCHW, DT_FLOAT);
  TensorUtils::SetSize(tensor, 512);

  OpDescPtr op_desc = CreateOpDesc("data", DATA);
  op_desc->AddInputDesc(tensor);
  op_desc->AddOutputDesc(tensor);
  op_desc->SetInputOffset({1024});
  op_desc->SetOutputOffset({1024});
  NodePtr node = graph->AddNode(op_desc);   // op_index 0

  vector<string> inputs = { "NCHW:DT_FLOAT:TensorName:TensorSize:3:1,2,8" };
  AttrUtils::SetListStr(op_desc, ATTR_NAME_AIPP_INPUTS, inputs);
  vector<string> outputs = { "NCHW:DT_FLOAT:TensorName:TensorSize:3:1,2,8" };
  AttrUtils::SetListStr(op_desc, ATTR_NAME_AIPP_OUTPUTS, outputs);

  vector<InputOutputDims> input_dims;
  vector<InputOutputDims> output_dims;
  EXPECT_EQ(model.GetAllAippInputOutputDims(0, input_dims, output_dims), ACL_ERROR_GE_AIPP_NOT_EXIST);
  EXPECT_EQ(model.InitNodes(graph), SUCCESS);
  EXPECT_EQ(model.GetAllAippInputOutputDims(0, input_dims, output_dims), SUCCESS);
  EXPECT_EQ(input_dims.size(), 1);
  EXPECT_EQ(output_dims.size(), 1);

  EXPECT_EQ(model.input_addrs_list_.size(), 1);
  EXPECT_EQ(model.output_addrs_list_.size(), 0);
  EXPECT_EQ(model.op_list_.size(), 1);
 }
 }  // namespace ge
--- a/tests/ut/ge/graph/load/kernel_task_info_unittest.cc
+++ b/tests/ut/ge/graph/load/kernel_task_info_unittest.cc
@@ -1120,7 +1120,6 @@ TEST_F(UtestKernelTaskInfo, kernel_task_info_init_success) {
  op_desc->AddOutputDesc(descout);
  op_desc->SetId(0);

  model.data_op_list_.push_back(op_desc);
  model.op_list_[0] = op_desc;

  domi::TaskDef task_def;
--- a/tests/ut/ge/graph/passes/multi_batch_clone_pass_unittest.cc
+++ b/tests/ut/ge/graph/passes/multi_batch_clone_pass_unittest.cc
@@ -0,0 +1,247 @@
 /**
 * Copyright 2021 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 #include "graph/passes/multi_batch_clone_pass.h"

 #include <gtest/gtest.h>
 #include <set>
 #include <string>

 #include "inc/pass_manager.h"
 #include "graph/utils/tensor_utils.h"
 #include "graph/common/local_context.h"
 #include "graph/passes/multi_batch_pass.h"
 #include "graph/preprocess/multi_batch_copy_graph.h"
 #include "graph/preprocess/insert_op/util_insert_aipp_op.h"
 #include "framework/omg/omg_inner_types.h"
 #include "register/op_registry.h"


 namespace ge{
 class UtestMultiBatchClonePass : public testing::Test {
 protected:
  void SetUp() {
    SetLocalOmgContext(domi::GetContext());
    GetLocalOmgContext().dynamic_image_size.clear();
    GetLocalOmgContext().dynamic_batch_size.clear();
  }
  void TearDown() {
    GetLocalOmgContext().dynamic_image_size.clear();
    GetLocalOmgContext().dynamic_batch_size.clear();
    GetLocalOmgContext().dynamic_node_type.clear();
  }

 public:
  NodePtr MakeNode(const ComputeGraphPtr &graph, uint32_t in_num, uint32_t out_num, string name, string type) {
    GeTensorDesc test_desc(GeShape(), FORMAT_NCHW, DT_FLOAT);
    auto op_desc = std::make_shared<OpDesc>(name, type);
    for (auto i = 0; i < in_num; ++i) {
      op_desc->AddInputDesc(test_desc);
    }
    for (auto i = 0; i < out_num; ++i) {
      op_desc->AddOutputDesc(test_desc);
    }
    return graph->AddNode(op_desc);
  }

  NodePtr MakeConstNode(const ComputeGraphPtr &graph) {
    static uint32_t index = 0;
    GeTensorDesc test_desc(GeShape(), FORMAT_NCHW, DT_FLOAT);
    auto op_desc = std::make_shared<OpDesc>("dynamic_const_" + std::to_string(index++), "Const");
    op_desc->AddOutputDesc(test_desc);
    return graph->AddNode(op_desc);
  }

  void make_original_graph(const ComputeGraphPtr &graph) {
    auto conv2d_node = MakeNode(graph, 3, 1, "conv1", "Conv2D");
    {
      auto data1 = MakeNode(graph, 1, 1, "data", "Data");
      GeTensorDesc tensor_desc(GeShape({-1,3,224,224}), FORMAT_NCHW, DT_FLOAT);
      data1->GetOpDesc()->UpdateInputDesc(0, tensor_desc);
      data1->GetOpDesc()->UpdateOutputDesc(0, tensor_desc);
      AttrUtils::SetInt(data1->GetOpDesc(), ATTR_NAME_INDEX, 0);
      GetLocalOmgContext().user_input_dims = {std::make_pair(data1->GetOpDesc()->GetName(), vector<int64_t>{-1,3,224,224})};

      GraphUtils::AddEdge(data1->GetOutDataAnchor(0), conv2d_node->GetInDataAnchor(0));
      auto const1 = MakeConstNode(graph);
      GraphUtils::AddEdge(const1->GetOutDataAnchor(0), conv2d_node->GetInDataAnchor(1));
      auto const2 = MakeConstNode(graph);
      GraphUtils::AddEdge(const2->GetOutDataAnchor(0), conv2d_node->GetInDataAnchor(2));
    }

    auto bn_conv1 = MakeNode(graph, 4, 1, "bn_conv1", "BNInference");
    {
      GraphUtils::AddEdge(conv2d_node->GetOutDataAnchor(0), bn_conv1->GetInDataAnchor(0));
      auto const1 = MakeConstNode(graph);
      GraphUtils::AddEdge(const1->GetOutDataAnchor(0), bn_conv1->GetInDataAnchor(1));
      auto const2 = MakeConstNode(graph);
      GraphUtils::AddEdge(const2->GetOutDataAnchor(0), bn_conv1->GetInDataAnchor(2));
      auto const3= MakeConstNode(graph);
      GraphUtils::AddEdge(const3->GetOutDataAnchor(0), bn_conv1->GetInDataAnchor(3));
    }

    auto scale_conv1 = MakeNode(graph, 4, 1, "scale1", "Scale");
    {
      GraphUtils::AddEdge(bn_conv1->GetOutDataAnchor(0), scale_conv1->GetInDataAnchor(0));
      auto const1 = MakeConstNode(graph);
      GraphUtils::AddEdge(const1->GetOutDataAnchor(0), scale_conv1->GetInDataAnchor(1));
      auto const2 = MakeConstNode(graph);
      GraphUtils::AddEdge(const2->GetOutDataAnchor(0), scale_conv1->GetInDataAnchor(2));
    }

    auto output_node = MakeNode(graph, 1, 0, "output1", "NetOutput");
    GraphUtils::AddEdge(scale_conv1->GetOutDataAnchor(0), output_node->GetInDataAnchor(0));
  }

  void GraphWithJustData(const ComputeGraphPtr &graph) {
    auto conv2d_node = MakeNode(graph, 3, 1, "conv1", "Conv2D");
    {
      auto data1 = MakeNode(graph, 1, 1, "data", "Data");
      GeTensorDesc tensor_desc(GeShape({-1,3,224,224}), FORMAT_NCHW, DT_FLOAT);
      data1->GetOpDesc()->UpdateInputDesc(0, tensor_desc);
      data1->GetOpDesc()->UpdateOutputDesc(0, tensor_desc);
      AttrUtils::SetInt(data1->GetOpDesc(), ATTR_NAME_INDEX, 0);
      GetLocalOmgContext().user_input_dims = {std::make_pair(data1->GetOpDesc()->GetName(), vector<int64_t>{-1,3,224,224})};

      GraphUtils::AddEdge(data1->GetOutDataAnchor(0), conv2d_node->GetInDataAnchor(0));
      auto const1 = MakeConstNode(graph);
      GraphUtils::AddEdge(const1->GetOutDataAnchor(0), conv2d_node->GetInDataAnchor(1));
      auto const2 = MakeConstNode(graph);
      GraphUtils::AddEdge(const2->GetOutDataAnchor(0), conv2d_node->GetInDataAnchor(2));
    }

    auto output_node = MakeNode(graph, 1, 0, "output1", "NetOutput");
    GraphUtils::AddEdge(conv2d_node->GetOutDataAnchor(0), output_node->GetInDataAnchor(0));
  }

  void GraphWithGetNextNosink(const ComputeGraphPtr &graph) {
    auto conv2d_node = MakeNode(graph, 3, 1, "conv1", "Conv2D");
    {
      auto data1 = MakeNode(graph, 1, 1, "IteratorGetNext_data", "Data");
      GeTensorDesc tensor_desc(GeShape({-1,3,224,224}), FORMAT_NCHW, DT_FLOAT);
      data1->GetOpDesc()->UpdateInputDesc(0, tensor_desc);
      data1->GetOpDesc()->UpdateOutputDesc(0, tensor_desc);
      AttrUtils::SetInt(data1->GetOpDesc(), ATTR_NAME_INDEX, 0);
      GetLocalOmgContext().user_input_dims = {std::make_pair(data1->GetOpDesc()->GetName(), vector<int64_t>{-1,3,224,224})};

      GraphUtils::AddEdge(data1->GetOutDataAnchor(0), conv2d_node->GetInDataAnchor(0));
      auto const1 = MakeConstNode(graph);
      GraphUtils::AddEdge(const1->GetOutDataAnchor(0), conv2d_node->GetInDataAnchor(1));
      auto const2 = MakeConstNode(graph);
      GraphUtils::AddEdge(const2->GetOutDataAnchor(0), conv2d_node->GetInDataAnchor(2));
    }

    auto output_node = MakeNode(graph, 1, 0, "output1", "NetOutput");
    GraphUtils::AddEdge(conv2d_node->GetOutDataAnchor(0), output_node->GetInDataAnchor(0));
  }

  // getnext has one data and has one out of shape
  void GraphWithGetNextSink(const ComputeGraphPtr &graph) {
    auto conv2d_node = MakeNode(graph, 3, 1, "conv1", "Conv2D");
    {
      auto data1 = MakeNode(graph, 1, 2, "data", "IteratorV2");
      GeTensorDesc tensor_desc(GeShape({-1,3,224,224}), FORMAT_NCHW, DT_FLOAT);
      GeTensorDesc shape_desc(GeShape({4,3,224,224}), FORMAT_NCHW, DT_FLOAT);
      data1->GetOpDesc()->UpdateOutputDesc(0, tensor_desc);
      data1->GetOpDesc()->UpdateOutputDesc(1, shape_desc);
      AttrUtils::SetInt(data1->GetOpDesc(), ATTR_NAME_INDEX, 0);
      GetLocalOmgContext().user_input_dims = {std::make_pair(data1->GetOpDesc()->GetName(), vector<int64_t>{-1,3,224,224})};

      GraphUtils::AddEdge(data1->GetOutDataAnchor(0), conv2d_node->GetInDataAnchor(0));
      auto identity = MakeNode(graph, 1, 0, "identity", "Identity");
      GraphUtils::AddEdge(data1->GetOutDataAnchor(1), identity->GetInDataAnchor(0));
      auto const1 = MakeConstNode(graph);
      GraphUtils::AddEdge(const1->GetOutDataAnchor(0), conv2d_node->GetInDataAnchor(1));
      auto const2 = MakeConstNode(graph);
      GraphUtils::AddEdge(const2->GetOutDataAnchor(0), conv2d_node->GetInDataAnchor(2));
    }

    auto output_node = MakeNode(graph, 1, 0, "output1", "NetOutput");
    GraphUtils::AddEdge(conv2d_node->GetOutDataAnchor(0), output_node->GetInDataAnchor(0));
  }
 };

 // graph is nullptr
 TEST_F(UtestMultiBatchClonePass, graph_nullptr) {
  PassManager pass_manager;
  pass_manager.AddPass("MultiBatchClonePass", new (std::nothrow) MultiBatchClonePass);
  ComputeGraphPtr graph;
  EXPECT_EQ(pass_manager.Run(graph), PARAM_INVALID);
 }

 // graph with subgraph
 TEST_F(UtestMultiBatchClonePass, graph_with_subgraph) {
  PassManager pass_manager;
  pass_manager.AddPass("MultiBatchClonePass", new (std::nothrow) MultiBatchClonePass);
  ComputeGraphPtr graph = std::make_shared<ComputeGraph>("test_graph");
  make_original_graph(graph);
  EXPECT_EQ(pass_manager.Run(graph), SUCCESS);

  ComputeGraphPtr owner = std::make_shared<ComputeGraph>("test_owner");
  auto func_node = MakeNode(owner, 3, 1, "test_if", "If");
  graph->SetParentNode(func_node);
  graph->SetParentGraph(owner);
  EXPECT_EQ(pass_manager.Run(graph), SUCCESS);
 }

 //graph is uncompute graph, not need to do multi batch
 TEST_F(UtestMultiBatchClonePass, uncompute_graph) {
  MultiBatchClonePass multi_batch_clone;
  ComputeGraphPtr graph = std::make_shared<ComputeGraph>("test_graph");
  make_original_graph(graph);
  GetLocalOmgContext().need_multi_batch = false;
  EXPECT_EQ(multi_batch_clone.Run(graph), SUCCESS);
 }


 //compute_graph with data from DATA
 TEST_F(UtestMultiBatchClonePass, compute_graph_with_data) {
  MultiBatchClonePass multi_batch_clone;
  ComputeGraphPtr graph = std::make_shared<ComputeGraph>("test_graph");
  GraphWithJustData(graph);
  GetLocalOmgContext().need_multi_batch = true;
  EXPECT_EQ(multi_batch_clone.Run(graph), SUCCESS);
  GetLocalOmgContext().dynamic_node_type = DATA;
  GetLocalOmgContext().dynamic_dims = "1;2;4;8";
  EXPECT_EQ(multi_batch_clone.Run(graph), SUCCESS);
  EXPECT_EQ(GetLocalOmgContext().data_nodes.size(), 1);
 }

 //compute_graph with data from GetNext_nosink
 TEST_F(UtestMultiBatchClonePass, compute_graph_with_getnext_nosink) {
  MultiBatchClonePass multi_batch_clone;
  ComputeGraphPtr graph = std::make_shared<ComputeGraph>("test_graph");
  GraphWithGetNextNosink(graph);
  GetLocalOmgContext().need_multi_batch = true;
  GetLocalOmgContext().dynamic_node_type = GETNEXT;
  GetLocalOmgContext().dynamic_dims = "1;2;4;8";
  EXPECT_EQ(multi_batch_clone.Run(graph), SUCCESS);
  EXPECT_EQ(GetLocalOmgContext().getnext_nosink_nodes.size(), 1);
 }

 //compute_graph with data from GetNext_nosink
 TEST_F(UtestMultiBatchClonePass, compute_graph_with_getnext_sink) {
  MultiBatchClonePass multi_batch_clone;
  ComputeGraphPtr graph = std::make_shared<ComputeGraph>("test_graph");
  GraphWithGetNextSink(graph);
  GetLocalOmgContext().need_multi_batch = true;
  GetLocalOmgContext().dynamic_node_type = GETNEXT;
  GetLocalOmgContext().dynamic_dims = "1;2;4;8";
  EXPECT_EQ(multi_batch_clone.Run(graph), SUCCESS);
  EXPECT_EQ(GetLocalOmgContext().getnext_nosink_nodes.size(), 0);
 }

 }