diff --git a/build.sh b/build.sh
index 5222ab5c..f2fafd48 100644
--- a/build.sh
+++ b/build.sh
@@ -235,14 +235,14 @@ if [[ "X$ENABLE_GE_UT" = "Xon" || "X$ENABLE_GE_COV" = "Xon" ]]; then
 #     fi
 
 #     if [[ "X$ENABLE_GE_COV" = "Xon" ]]; then
-         echo "Generating coverage statistics, please wait..."
-         cd ${BASEPATH}
-         rm -rf ${BASEPATH}/cov
-         mkdir ${BASEPATH}/cov
-         lcov -c -d build/tests/ut/ge -d build/tests/ut/common/graph/ -o cov/tmp.info
-	 lcov --remove cov/tmp.info '*/output/*' '*/build/opensrc/*' '*/build/proto/*' '*/third_party/*' '*/tests/*' '/usr/local/*' -o cov/coverage.info
-	 cd ${BASEPATH}/cov
-	 genhtml coverage.info
+        echo "Generating coverage statistics, please wait..."
+        cd ${BASEPATH}
+        rm -rf ${BASEPATH}/cov
+        mkdir ${BASEPATH}/cov
+        lcov -c -d build/tests/ut/ge -d build/tests/ut/common/graph/ -o cov/tmp.info
+        lcov -r cov/tmp.info '*/output/*' '*/build/opensrc/*' '*/build/proto/*' '*/third_party/*' '*/tests/*' '/usr/local/*' -o cov/coverage.info
+        cd ${BASEPATH}/cov
+        genhtml coverage.info
 fi
 
 # generate output package in tar form, including ut/st libraries/executables
diff --git a/ge/CMakeLists.txt b/ge/CMakeLists.txt
index a8eabf05..12af76ec 100755
--- a/ge/CMakeLists.txt
+++ b/ge/CMakeLists.txt
@@ -129,38 +129,38 @@ set(TRAIN_SRC_LIST
     "graph/label/partitioned_call_label_maker.cc"
     "graph/label/while_label_maker.cc"
     "graph/load/graph_loader.cc"
-    "graph/load/new_model_manager/cpu_queue_schedule.cc"
-    "graph/load/new_model_manager/data_dumper.cc"
-    "graph/load/new_model_manager/data_inputer.cc"
-    "graph/load/new_model_manager/davinci_model.cc"
-    "graph/load/new_model_manager/davinci_model_parser.cc"
-    "graph/load/new_model_manager/model_manager.cc"
-    "graph/load/new_model_manager/model_utils.cc"
-    "graph/load/new_model_manager/aipp_utils.cc"
-    "graph/load/new_model_manager/task_info/end_graph_task_info.cc"
-    "graph/load/new_model_manager/task_info/model_exit_task_info.cc"
-    "graph/load/new_model_manager/task_info/event_record_task_info.cc"
-    "graph/load/new_model_manager/task_info/event_wait_task_info.cc"
-    "graph/load/new_model_manager/task_info/fusion_start_task_info.cc"
-    "graph/load/new_model_manager/task_info/fusion_stop_task_info.cc"
-    "graph/load/new_model_manager/task_info/hccl_task_info.cc"
-    "graph/load/new_model_manager/task_info/kernel_ex_task_info.cc"
-    "graph/load/new_model_manager/task_info/kernel_task_info.cc"
-    "graph/load/new_model_manager/task_info/label_set_task_info.cc"
-    "graph/load/new_model_manager/task_info/label_switch_by_index_task_info.cc"
-    "graph/load/new_model_manager/task_info/label_goto_ex_task_info.cc"
-    "graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.cc"
-    "graph/load/new_model_manager/task_info/memcpy_async_task_info.cc"
-    "graph/load/new_model_manager/task_info/profiler_trace_task_info.cc"
-    "graph/load/new_model_manager/task_info/stream_active_task_info.cc"
-    "graph/load/new_model_manager/task_info/stream_switch_task_info.cc"
-    "graph/load/new_model_manager/task_info/stream_switchn_task_info.cc"
-    "graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc"
-    "graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc"
-    "graph/load/new_model_manager/task_info/task_info.cc"
-    "graph/load/new_model_manager/tbe_handle_store.cc"
-    "graph/load/new_model_manager/zero_copy_task.cc"
-    "graph/load/new_model_manager/zero_copy_offset.cc"
+    "graph/load/model_manager/cpu_queue_schedule.cc"
+    "graph/load/model_manager/data_dumper.cc"
+    "graph/load/model_manager/data_inputer.cc"
+    "graph/load/model_manager/davinci_model.cc"
+    "graph/load/model_manager/davinci_model_parser.cc"
+    "graph/load/model_manager/model_manager.cc"
+    "graph/load/model_manager/model_utils.cc"
+    "graph/load/model_manager/aipp_utils.cc"
+    "graph/load/model_manager/task_info/end_graph_task_info.cc"
+    "graph/load/model_manager/task_info/model_exit_task_info.cc"
+    "graph/load/model_manager/task_info/event_record_task_info.cc"
+    "graph/load/model_manager/task_info/event_wait_task_info.cc"
+    "graph/load/model_manager/task_info/fusion_start_task_info.cc"
+    "graph/load/model_manager/task_info/fusion_stop_task_info.cc"
+    "graph/load/model_manager/task_info/hccl_task_info.cc"
+    "graph/load/model_manager/task_info/kernel_ex_task_info.cc"
+    "graph/load/model_manager/task_info/kernel_task_info.cc"
+    "graph/load/model_manager/task_info/label_set_task_info.cc"
+    "graph/load/model_manager/task_info/label_switch_by_index_task_info.cc"
+    "graph/load/model_manager/task_info/label_goto_ex_task_info.cc"
+    "graph/load/model_manager/task_info/memcpy_addr_async_task_info.cc"
+    "graph/load/model_manager/task_info/memcpy_async_task_info.cc"
+    "graph/load/model_manager/task_info/profiler_trace_task_info.cc"
+    "graph/load/model_manager/task_info/stream_active_task_info.cc"
+    "graph/load/model_manager/task_info/stream_switch_task_info.cc"
+    "graph/load/model_manager/task_info/stream_switchn_task_info.cc"
+    "graph/load/model_manager/task_info/super_kernel/super_kernel.cc"
+    "graph/load/model_manager/task_info/super_kernel/super_kernel_factory.cc"
+    "graph/load/model_manager/task_info/task_info.cc"
+    "graph/load/model_manager/tbe_handle_store.cc"
+    "graph/load/model_manager/zero_copy_task.cc"
+    "graph/load/model_manager/zero_copy_offset.cc"
     "graph/manager/graph_context.cc"
     "graph/manager/graph_manager.cc"
     "graph/manager/graph_manager_utils.cc"
@@ -375,6 +375,7 @@ set(TRAIN_SRC_LIST
     "hybrid/node_executor/host_cpu/kernel/variable_kernel.cc"
     "hybrid/node_executor/host_cpu/kernel/assign_kernel.cc"
     "hybrid/node_executor/host_cpu/kernel/random_uniform_kernel.cc"
+    "hybrid/node_executor/host_cpu/kernel/data_kernel.cc"
     "hybrid/node_executor/controlop/control_op_executor.cc"
     "hybrid/node_executor/partitioned_call/partitioned_call_node_executor.cc"
     "hybrid/node_executor/hccl/hccl_node_executor.cc"
@@ -605,37 +606,37 @@ set(INFER_SRC_LIST
     "graph/manager/util/rt_context_util.cc"
     "graph/manager/util/variable_accelerate_ctrl.cc"
     "graph/manager/util/debug.cc"
-    "graph/load/new_model_manager/model_manager.cc"
-    "graph/load/new_model_manager/data_inputer.cc"
-    "graph/load/new_model_manager/davinci_model.cc"
-    "graph/load/new_model_manager/davinci_model_parser.cc"
-    "graph/load/new_model_manager/model_utils.cc"
-    "graph/load/new_model_manager/aipp_utils.cc"
-    "graph/load/new_model_manager/tbe_handle_store.cc"
-    "graph/load/new_model_manager/cpu_queue_schedule.cc"
-    "graph/load/new_model_manager/zero_copy_task.cc"
-    "graph/load/new_model_manager/zero_copy_offset.cc"
-    "graph/load/new_model_manager/data_dumper.cc"
-    "graph/load/new_model_manager/task_info/task_info.cc"
-    "graph/load/new_model_manager/task_info/event_record_task_info.cc"
-    "graph/load/new_model_manager/task_info/event_wait_task_info.cc"
-    "graph/load/new_model_manager/task_info/fusion_start_task_info.cc"
-    "graph/load/new_model_manager/task_info/fusion_stop_task_info.cc"
-    "graph/load/new_model_manager/task_info/kernel_ex_task_info.cc"
-    "graph/load/new_model_manager/task_info/kernel_task_info.cc"
-    "graph/load/new_model_manager/task_info/label_set_task_info.cc"
-    "graph/load/new_model_manager/task_info/label_switch_by_index_task_info.cc"
-    "graph/load/new_model_manager/task_info/label_goto_ex_task_info.cc"
-    "graph/load/new_model_manager/task_info/memcpy_async_task_info.cc"
-    "graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.cc"
-    "graph/load/new_model_manager/task_info/profiler_trace_task_info.cc"
-    "graph/load/new_model_manager/task_info/stream_active_task_info.cc"
-    "graph/load/new_model_manager/task_info/stream_switch_task_info.cc"
-    "graph/load/new_model_manager/task_info/stream_switchn_task_info.cc"
-    "graph/load/new_model_manager/task_info/end_graph_task_info.cc"
-    "graph/load/new_model_manager/task_info/model_exit_task_info.cc"
-    "graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc"
-    "graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc"
+    "graph/load/model_manager/model_manager.cc"
+    "graph/load/model_manager/data_inputer.cc"
+    "graph/load/model_manager/davinci_model.cc"
+    "graph/load/model_manager/davinci_model_parser.cc"
+    "graph/load/model_manager/model_utils.cc"
+    "graph/load/model_manager/aipp_utils.cc"
+    "graph/load/model_manager/tbe_handle_store.cc"
+    "graph/load/model_manager/cpu_queue_schedule.cc"
+    "graph/load/model_manager/zero_copy_task.cc"
+    "graph/load/model_manager/zero_copy_offset.cc"
+    "graph/load/model_manager/data_dumper.cc"
+    "graph/load/model_manager/task_info/task_info.cc"
+    "graph/load/model_manager/task_info/event_record_task_info.cc"
+    "graph/load/model_manager/task_info/event_wait_task_info.cc"
+    "graph/load/model_manager/task_info/fusion_start_task_info.cc"
+    "graph/load/model_manager/task_info/fusion_stop_task_info.cc"
+    "graph/load/model_manager/task_info/kernel_ex_task_info.cc"
+    "graph/load/model_manager/task_info/kernel_task_info.cc"
+    "graph/load/model_manager/task_info/label_set_task_info.cc"
+    "graph/load/model_manager/task_info/label_switch_by_index_task_info.cc"
+    "graph/load/model_manager/task_info/label_goto_ex_task_info.cc"
+    "graph/load/model_manager/task_info/memcpy_async_task_info.cc"
+    "graph/load/model_manager/task_info/memcpy_addr_async_task_info.cc"
+    "graph/load/model_manager/task_info/profiler_trace_task_info.cc"
+    "graph/load/model_manager/task_info/stream_active_task_info.cc"
+    "graph/load/model_manager/task_info/stream_switch_task_info.cc"
+    "graph/load/model_manager/task_info/stream_switchn_task_info.cc"
+    "graph/load/model_manager/task_info/end_graph_task_info.cc"
+    "graph/load/model_manager/task_info/model_exit_task_info.cc"
+    "graph/load/model_manager/task_info/super_kernel/super_kernel_factory.cc"
+    "graph/load/model_manager/task_info/super_kernel/super_kernel.cc"
     "single_op/task/op_task.cc"
     "single_op/task/build_task_utils.cc"
     "single_op/task/tbe_task_builder.cc"
@@ -706,7 +707,7 @@ target_compile_options(ge_runner PRIVATE
     -O2
     -fno-common
     $<$<STREQUAL:${CMAKE_CXX_COMPILER_VERSION},7.3.0>:-Werror=unused-variable>
-    $<$<STREQUAL:${CMAKE_CXX_COMPILER_VERSION},7.3.0>:-Werror=unused-const-variable>
+    $<$<STREQUAL:${CMAKE_CXX_COMPILER_VERSION},7.3.0>:-Werror=unused-const-variable -Werror=format>
 )
 
 target_include_directories(ge_runner SYSTEM PRIVATE
@@ -775,7 +776,7 @@ target_compile_options(ge_compiler PRIVATE
     -O2
     -fno-common
     $<$<STREQUAL:${CMAKE_CXX_COMPILER_VERSION},7.3.0>:-Werror=unused-variable>
-    $<$<STREQUAL:${CMAKE_CXX_COMPILER_VERSION},7.3.0>:-Werror=unused-const-variable>
+    $<$<STREQUAL:${CMAKE_CXX_COMPILER_VERSION},7.3.0>:-Werror=unused-const-variable -Werror=format>
 )
 
 target_include_directories(ge_compiler SYSTEM PRIVATE
diff --git a/ge/client/proto/task.proto b/ge/client/proto/task.proto
index d0c09840..0da5631e 100644
--- a/ge/client/proto/task.proto
+++ b/ge/client/proto/task.proto
@@ -57,6 +57,7 @@ message TaskDef {
     LabelSetDef label_set = 37;
     LabelGotoExDef label_goto_ex = 38;
     LabelSwitchByIndexDef label_switch_by_index = 39;
+    KernelDefWithHandle kernel_with_handle = 40;
 }
 
 message KernelDef {
@@ -74,6 +75,19 @@ message KernelDef {
     uint32 kernel_ext_info_size = 19;
 }
 
+message KernelDefWithHandle {
+    KernelContext context = 1;
+
+    uint64 handle = 10;
+    string dev_func = 11;
+    uint32 block_dim = 12;
+    uint32 args_size = 13;
+    bytes args = 14;
+    bytes sm_desc = 15;
+    string original_kernel_key = 16;
+    string node_info = 17;
+}
+
 message KernelContext {
     uint32 kernel_type = 1;
     uint32 op_id = 2;                              // OP type in CCE
diff --git a/ge/common/auth/file_saver.cc b/ge/common/auth/file_saver.cc
index e708653a..12999e54 100755
--- a/ge/common/auth/file_saver.cc
+++ b/ge/common/auth/file_saver.cc
@@ -62,7 +62,7 @@ Status FileSaver::WriteData(const void *data, uint32_t size, int32_t fd) {
     while (size > size_1g) {
       write_count = mmWrite(fd, reinterpret_cast<void *>(seek), size_1g);
       if (write_count == EN_INVALID_PARAM || write_count == EN_ERROR) {
-        GELOGE(FAILED, "Write data failed. mmpa_errorno = %d, %s", write_count, strerror(errno));
+        GELOGE(FAILED, "Write data failed. mmpa_errorno = %ld, %s", write_count, strerror(errno));
         return FAILED;
       }
       size -= size_1g;
@@ -75,7 +75,7 @@ Status FileSaver::WriteData(const void *data, uint32_t size, int32_t fd) {
 
   // -1: Failed to write to file; - 2: Illegal parameter
   if (write_count == EN_INVALID_PARAM || write_count == EN_ERROR) {
-    GELOGE(FAILED, "Write data failed. mmpa_errorno = %d, %s", write_count, strerror(errno));
+    GELOGE(FAILED, "Write data failed. mmpa_errorno = %ld, %s", write_count, strerror(errno));
     return FAILED;
   }
 
@@ -133,7 +133,7 @@ Status FileSaver::SaveWithFileHeader(const std::string &file_path, const ModelFi
         WriteData(static_cast<const void *>(&model_partition_table), table_size, fd) != SUCCESS, ret = FAILED; break);
     // Write partition data
     for (const auto &partitionData : partition_datas) {
-      GELOGI("GC:size[%zu]", partitionData.size);
+      GELOGI("GC:size[%u]", partitionData.size);
       GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(
           WriteData(static_cast<const void *>(partitionData.data), partitionData.size, fd) != SUCCESS, ret = FAILED;
           break);
@@ -305,7 +305,7 @@ Status FileSaver::SaveWithFileHeader(const std::string &file_path, const ModelFi
       // Write partition data
       auto &cur_partition_datas = all_partition_datas[index];
       for (const auto &partition_data : cur_partition_datas) {
-        GELOGI("GC:size[%zu]", partition_data.size);
+        GELOGI("GC:size[%u]", partition_data.size);
         GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(
             WriteData(static_cast<const void *>(partition_data.data), partition_data.size, fd) != SUCCESS, ret = FAILED;
             break);
diff --git a/ge/common/dump/dump_op.cc b/ge/common/dump/dump_op.cc
index 0b9e9dcc..5c768e22 100755
--- a/ge/common/dump/dump_op.cc
+++ b/ge/common/dump/dump_op.cc
@@ -99,8 +99,8 @@ Status DumpOp::DumpOutput(aicpu::dump::Task &task) {
     }
     int64_t output_size = 0;
     if (TensorUtils::GetTensorSizeInBytes(output_descs.at(i), output_size) != SUCCESS) {
-      GELOGE(PARAM_INVALID, "Get output size filed");
-      return PARAM_INVALID;
+      GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "Get output size filed");
+      return ACL_ERROR_GE_INTERNAL_ERROR;
     }
     GELOGD("Get output size in lanch dump op is %ld", output_size);
     output.set_size(output_size);
@@ -126,8 +126,8 @@ Status DumpOp::DumpInput(aicpu::dump::Task &task) {
     }
     int64_t input_size = 0;
     if (TensorUtils::GetTensorSizeInBytes(input_descs.at(i), input_size) != SUCCESS) {
-      GELOGE(PARAM_INVALID, "Get output size filed");
-      return PARAM_INVALID;
+      GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "Get output size filed");
+      return ACL_ERROR_GE_INTERNAL_ERROR;
     }
     GELOGD("Get input size in lanch dump op is %ld", input_size);
     input.set_size(input_size);
@@ -151,31 +151,31 @@ Status DumpOp::ExecutorDumpOp(aicpu::dump::OpMappingInfo &op_mapping_info) {
   size_t proto_size = op_mapping_info.ByteSizeLong();
   bool ret = op_mapping_info.SerializeToString(&proto_msg);
   if (!ret || proto_size == 0) {
-    GELOGE(FAILED, "Protobuf serialize failed,proto_size is %zu", proto_size);
-    return FAILED;
+    GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "Protobuf serialize failed, proto_size is %zu", proto_size);
+    return ACL_ERROR_GE_INTERNAL_ERROR;
   }
 
   rtError_t rt_ret = rtMalloc(&proto_dev_mem_, proto_size, RT_MEMORY_HBM);
   if (rt_ret != RT_ERROR_NONE) {
-    GELOGE(RT_FAILED, "Call rtMalloc failed, ret: 0x%X", rt_ret);
-    return RT_FAILED;
+    GELOGE(rt_ret, "Call rtMalloc failed, ret: 0x%X", rt_ret);
+    return RT_ERROR_TO_GE_STATUS(rt_ret);
   }
 
   rt_ret = rtMemcpy(proto_dev_mem_, proto_size, proto_msg.c_str(), proto_size, RT_MEMCPY_HOST_TO_DEVICE);
   if (rt_ret != RT_ERROR_NONE) {
-    GELOGE(RT_FAILED, "Call rtMemcpy failed, ret: 0x%X", rt_ret);
-    return RT_FAILED;
+    GELOGE(rt_ret, "Call rtMemcpy failed, ret: 0x%X", rt_ret);
+    return RT_ERROR_TO_GE_STATUS(rt_ret);
   }
 
   rt_ret = rtMalloc(&proto_size_dev_mem_, sizeof(size_t), RT_MEMORY_HBM);
   if (rt_ret != RT_ERROR_NONE) {
-    GELOGE(RT_FAILED, "Call rtMalloc failed, ret: 0x%X", rt_ret);
-    return RT_FAILED;
+    GELOGE(rt_ret, "Call rtMalloc failed, ret: 0x%X", rt_ret);
+    return RT_ERROR_TO_GE_STATUS(rt_ret);
   }
   rt_ret = rtMemcpy(proto_size_dev_mem_, sizeof(size_t), &proto_size, sizeof(size_t), RT_MEMCPY_HOST_TO_DEVICE);
   if (rt_ret != RT_ERROR_NONE) {
-    GELOGE(RT_FAILED, "Call rtMemcpy failed, ret: 0x%X", rt_ret);
-    return RT_FAILED;
+    GELOGE(rt_ret, "Call rtMemcpy failed, ret: 0x%X", rt_ret);
+    return RT_ERROR_TO_GE_STATUS(rt_ret);
   }
 
   constexpr int32_t io_addr_num = 2;
@@ -193,8 +193,8 @@ Status DumpOp::ExecutorDumpOp(aicpu::dump::OpMappingInfo &op_mapping_info) {
                              nullptr,  // no need smDesc
                              stream_);
   if (rt_ret != RT_ERROR_NONE) {
-    GELOGE(RT_FAILED, "Call rtCpuKernelLaunch failed,rt_ret:0x%X", rt_ret);
-    return rt_ret;
+    GELOGE(rt_ret, "Call rtCpuKernelLaunch failed,rt_ret:0x%X", rt_ret);
+    return RT_ERROR_TO_GE_STATUS(rt_ret);
   }
   GELOGI("Kernel launch dump op success");
   return SUCCESS;
@@ -204,9 +204,15 @@ Status DumpOp::LaunchDumpOp() {
   GELOGI("Start to launch dump op %s", op_desc_->GetName().c_str());
   int32_t device_id = 0;
   rtError_t rt_ret = rtGetDevice(&device_id);
-  if (rt_ret != RT_ERROR_NONE || device_id < 0) {
-    GELOGE(RT_FAILED, "Call rtGetDevice failed, ret = 0x%X, device_id = %d.", rt_ret, device_id);
-    return RT_FAILED;
+  if (rt_ret != RT_ERROR_NONE) {
+    GELOGE(rt_ret, "Call rtGetDevice failed, ret = 0x%X, device_id = %d.", rt_ret, device_id);
+    return RT_ERROR_TO_GE_STATUS(rt_ret);
+  }
+  if (device_id < 0) {
+    GELOGE(ACL_ERROR_GE_INTERNAL_ERROR,
+           "Check device_id failed, device_id = %d, which should be not less than 0.",
+           device_id);
+    return ACL_ERROR_GE_INTERNAL_ERROR;
   }
   aicpu::dump::OpMappingInfo op_mapping_info;
   auto dump_path = dump_properties_.GetDumpPath() + std::to_string(device_id) + "/";
@@ -232,29 +238,31 @@ Status DumpOp::LaunchDumpOp() {
   task.mutable_op()->set_op_name(op_desc_->GetName());
   task.mutable_op()->set_op_type(op_desc_->GetType());
   if (dump_properties_.GetDumpMode() == kDumpOutput) {
-    if (DumpOutput(task) != SUCCESS) {
-      GELOGE(FAILED, "Dump output failed");
-      return FAILED;
+    auto ret = DumpOutput(task);
+    if (ret != SUCCESS) {
+      GELOGE(ret, "Dump output failed");
+      return ret;
     }
     op_mapping_info.mutable_task()->Add(std::move(task));
   }
   if (dump_properties_.GetDumpMode() == kDumpInput) {
-    if (DumpInput(task) != SUCCESS) {
-      GELOGE(FAILED, "Dump input failed");
-      return FAILED;
+    auto ret = DumpInput(task);
+    if (ret != SUCCESS) {
+      GELOGE(ret, "Dump input failed");
+      return ret;
     }
     op_mapping_info.mutable_task()->Add(std::move(task));
   }
   if (dump_properties_.GetDumpMode() == kDumpAll) {
     auto ret = DumpOutput(task);
     if (ret != SUCCESS) {
-      GELOGE(FAILED, "Dump output failed when in dumping all");
-      return FAILED;
+      GELOGE(ret, "Dump output failed when in dumping all");
+      return ret;
     }
     ret = DumpInput(task);
     if (ret != SUCCESS) {
-      GELOGE(FAILED, "Dump input failed when in dumping all");
-      return FAILED;
+      GELOGE(ret, "Dump input failed when in dumping all");
+      return ret;
     }
     op_mapping_info.mutable_task()->Add(std::move(task));
   }
diff --git a/ge/common/formats/format_transfers/format_transfer_c1hwncoc0_hwcn.cc b/ge/common/formats/format_transfers/format_transfer_c1hwncoc0_hwcn.cc
index 85f4038e..0cb581d7 100644
--- a/ge/common/formats/format_transfers/format_transfer_c1hwncoc0_hwcn.cc
+++ b/ge/common/formats/format_transfers/format_transfer_c1hwncoc0_hwcn.cc
@@ -162,7 +162,7 @@ Status FormatTransferC1hwncoc0Hwcn::TransFormat(const TransArgs &args, TransResu
 Status FormatTransferC1hwncoc0Hwcn::TransShape(Format src_format, const std::vector<int64_t> &src_shape,
                                                DataType data_type, Format dst_format, std::vector<int64_t> &dst_shape) {
   GELOGD("The shape derivation from C1HWNCoC0 to HWCN is not unique. Trans shape in this direction is not supported");
-  return UNSUPPORTED;
+  return ACL_ERROR_GE_TRANSSHAPE_FORMAT_INVALID;
 }
 
 REGISTER_FORMAT_TRANSFER(FormatTransferC1hwncoc0Hwcn, FORMAT_C1HWNCoC0, FORMAT_HWCN)
diff --git a/ge/common/formats/format_transfers/format_transfer_dhwcn_fracz3D.cc b/ge/common/formats/format_transfers/format_transfer_dhwcn_fracz3D.cc
index 79af84f7..eaa19d7d 100644
--- a/ge/common/formats/format_transfers/format_transfer_dhwcn_fracz3D.cc
+++ b/ge/common/formats/format_transfers/format_transfer_dhwcn_fracz3D.cc
@@ -32,7 +32,7 @@ Status TransShapeToFz(int64_t d, int64_t n, int64_t c, int64_t h, int64_t w, Dat
                       std::vector<int64_t> &dst_shape) {
   auto c0 = GetCubeSizeByDataType(data_type);
   if (c0 < 0) {
-    return UNSUPPORTED;
+    return ACL_ERROR_GE_TRANSSHAPE_DATATYPE_INVALID;
   }
 
   auto c1 = Ceil(c, c0);
@@ -50,7 +50,7 @@ Status TransShapeToFz(int64_t d, int64_t n, int64_t c, int64_t h, int64_t w, Dat
 Status TransShapeDhwckToFz3D(const std::vector<int64_t> &src_shape, DataType data_type,
                              std::vector<int64_t> &dst_shape) {
   if (!CheckShapeValid(src_shape, kDhwcnDimsNum)) {
-    return PARAM_INVALID;
+    return ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID;
   }
   auto d = src_shape.at(kDhwcnD);
   auto h = src_shape.at(kDhwcnH);
@@ -163,14 +163,14 @@ Status FormatTransferDhwcnFractalZ3D::TransShape(Format src_format, const std::v
                                                  DataType data_type, Format dst_format,
                                                  std::vector<int64_t> &dst_shape) {
   if (CheckDataTypeSupport(data_type) != SUCCESS) {
-    return UNSUPPORTED;
+    return ACL_ERROR_GE_TRANSSHAPE_DATATYPE_INVALID;
   }
 
   if (src_format == FORMAT_DHWCN && dst_format == FORMAT_FRACTAL_Z_3D) {
     return TransShapeDhwckToFz3D(src_shape, data_type, dst_shape);
   }
 
-  return UNSUPPORTED;
+  return ACL_ERROR_GE_TRANSSHAPE_FORMAT_INVALID;
 }
 
 REGISTER_FORMAT_TRANSFER(FormatTransferDhwcnFractalZ3D, FORMAT_DHWCN, FORMAT_FRACTAL_Z_3D)
diff --git a/ge/common/formats/format_transfers/format_transfer_dhwnc_fracz3D_transpose.cc b/ge/common/formats/format_transfers/format_transfer_dhwnc_fracz3D_transpose.cc
index cd1e0607..3a18312a 100644
--- a/ge/common/formats/format_transfers/format_transfer_dhwnc_fracz3D_transpose.cc
+++ b/ge/common/formats/format_transfers/format_transfer_dhwnc_fracz3D_transpose.cc
@@ -32,7 +32,7 @@ Status TransShapeToFz(int64_t d, int64_t n, int64_t c, int64_t h, int64_t w, Dat
                       std::vector<int64_t> &dst_shape) {
   auto c0 = GetCubeSizeByDataType(data_type);
   if (c0 < 0) {
-    return UNSUPPORTED;
+    return ACL_ERROR_GE_TRANSSHAPE_DATATYPE_INVALID;
   }
 
   auto c1 = Ceil(c, c0);
@@ -50,7 +50,7 @@ Status TransShapeToFz(int64_t d, int64_t n, int64_t c, int64_t h, int64_t w, Dat
 Status TransShapeDhwncToFz3DTranspose(const std::vector<int64_t> &src_shape, DataType data_type,
                                       std::vector<int64_t> &dst_shape) {
   if (!CheckShapeValid(src_shape, kDhwncDimsNum)) {
-    return PARAM_INVALID;
+    return ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID;
   }
   auto d = src_shape.at(kDhwncD);
   auto h = src_shape.at(kDhwncH);
@@ -164,14 +164,14 @@ Status FormatTransferDhwncFractalZ3DTranspose::TransShape(Format src_format, con
                                                           DataType data_type, Format dst_format,
                                                           std::vector<int64_t> &dst_shape) {
   if (CheckDataTypeSupport(data_type) != SUCCESS) {
-    return UNSUPPORTED;
+    return ACL_ERROR_GE_TRANSSHAPE_DATATYPE_INVALID;
   }
 
   if (src_format == FORMAT_DHWNC && dst_format == FORMAT_FRACTAL_Z_3D_TRANSPOSE) {
     return TransShapeDhwncToFz3DTranspose(src_shape, data_type, dst_shape);
   }
 
-  return UNSUPPORTED;
+  return ACL_ERROR_GE_TRANSSHAPE_FORMAT_INVALID;
 }
 
 REGISTER_FORMAT_TRANSFER(FormatTransferDhwncFractalZ3DTranspose, FORMAT_DHWNC, FORMAT_FRACTAL_Z_3D_TRANSPOSE)
diff --git a/ge/common/formats/format_transfers/format_transfer_fractal_nz.cc b/ge/common/formats/format_transfers/format_transfer_fractal_nz.cc
index cb528453..c3b288c1 100755
--- a/ge/common/formats/format_transfers/format_transfer_fractal_nz.cc
+++ b/ge/common/formats/format_transfers/format_transfer_fractal_nz.cc
@@ -87,8 +87,8 @@ Status TransShapeToFracNz(const ShapeVector &src_shape, DataType data_type, Shap
       hw_shape.push_back(DIM_DEFAULT_VALUE);
       hw_shape.push_back(src_shape[kNdDimIndexN]);
       if (!IsShapeValid(dst_shape)) {
-        GELOGE(PARAM_INVALID, "Failed to check dst shape %s", ShapeToString(dst_shape).c_str());
-        return PARAM_INVALID;
+        GELOGE(ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID, "Failed to check dst shape %s", ShapeToString(dst_shape).c_str());
+        return ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID;
       }
       return SUCCESS;
     default:
@@ -106,8 +106,8 @@ Status TransShapeToFracNz(const ShapeVector &src_shape, DataType data_type, Shap
       hw_shape.push_back(src_shape[size - kNdDimCountBackwardsWH]);
       hw_shape.push_back(src_shape[size - kNdDimCountBackwardsW]);
       if (!IsShapeValid(dst_shape)) {
-        GELOGE(PARAM_INVALID, "Failed to check dst shape %s", ShapeToString(dst_shape).c_str());
-        return PARAM_INVALID;
+        GELOGE(ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID, "Failed to check dst shape %s", ShapeToString(dst_shape).c_str());
+        return ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID;
       }
       return SUCCESS;
   }
@@ -299,11 +299,19 @@ Status FormatTransferFractalNz::TransFormat(const TransArgs &args, TransResult &
 
 Status FormatTransferFractalNz::TransShape(Format src_format, const ShapeVector &src_shape, DataType data_type,
                                            Format dst_format, ShapeVector &dst_shape) {
-  if (!IsDataTypeSupport(data_type) || !CheckShape(src_format, src_shape)) {
-    GELOGE(PARAM_INVALID, "Trans format from %s to %s, src shape %s, data type %s is not supported",
+  if (!IsDataTypeSupport(data_type)) {
+    GELOGE(ACL_ERROR_GE_TRANSSHAPE_DATATYPE_INVALID,
+           "Trans format from %s to %s, src shape %s, data type %s is not supported",
            TypeUtils::FormatToSerialString(src_format).c_str(), TypeUtils::FormatToSerialString(dst_format).c_str(),
            ShapeToString(src_shape).c_str(), TypeUtils::DataTypeToSerialString(data_type).c_str());
-    return PARAM_INVALID;
+    return ACL_ERROR_GE_TRANSSHAPE_DATATYPE_INVALID;
+  }
+  if (!CheckShape(src_format, src_shape)) {
+    GELOGE(ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID,
+           "Trans format from %s to %s, src shape %s, data type %s is not supported",
+           TypeUtils::FormatToSerialString(src_format).c_str(), TypeUtils::FormatToSerialString(dst_format).c_str(),
+           ShapeToString(src_shape).c_str(), TypeUtils::DataTypeToSerialString(data_type).c_str());
+    return ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID;
   }
   ShapeVector hw_shape;
   return TransShapeToFracNz(src_shape, data_type, dst_shape, hw_shape);
@@ -334,7 +342,7 @@ Status FormatTransferFractalNzND::TransShape(Format src_format, const ShapeVecto
                                              Format dst_format, ShapeVector &dst_shape) {
   GELOGD("The shape derivation from %s to %s is not unique. Trans shape is not supported",
          TypeUtils::FormatToSerialString(src_format).c_str(), TypeUtils::FormatToSerialString(dst_format).c_str());
-  return UNSUPPORTED;
+  return ACL_ERROR_GE_TRANSSHAPE_FORMAT_INVALID;
 }
 
 REGISTER_FORMAT_TRANSFER(FormatTransferFractalNz, FORMAT_ND, FORMAT_FRACTAL_NZ)
diff --git a/ge/common/formats/format_transfers/format_transfer_fractal_z.cc b/ge/common/formats/format_transfers/format_transfer_fractal_z.cc
index dbceb911..45c6d157 100644
--- a/ge/common/formats/format_transfers/format_transfer_fractal_z.cc
+++ b/ge/common/formats/format_transfers/format_transfer_fractal_z.cc
@@ -42,7 +42,7 @@ Status CheckDataTypeSupport(DataType data_type) { return GetSizeByDataType(data_
 Status TransShapeToFz(int64_t n, int64_t c, int64_t h, int64_t w, DataType data_type, std::vector<int64_t> &dst_shape) {
   auto c0 = GetCubeSizeByDataType(data_type);
   if (c0 < 0) {
-    return UNSUPPORTED;
+    return ACL_ERROR_GE_TRANSSHAPE_DATATYPE_INVALID;
   }
 
   auto c1 = Ceil(c, c0);
@@ -54,15 +54,16 @@ Status TransShapeToFz(int64_t n, int64_t c, int64_t h, int64_t w, DataType data_
   dst_shape.push_back(kNiSize);
   dst_shape.push_back(c0);
   if (!IsShapeValid(dst_shape)) {
-    GELOGE(PARAM_INVALID, "Failed to check dst shape %s", ShapeToString(dst_shape).c_str());
-    return PARAM_INVALID;
+    GELOGE(ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID, "Failed to check dst shape %s",
+           ShapeToString(dst_shape).c_str());
+    return ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID;
   }
   return SUCCESS;
 }
 
 Status TransShapeNchwToFz(const std::vector<int64_t> &src_shape, DataType data_type, std::vector<int64_t> &dst_shape) {
   if (!CheckShapeValid(src_shape, kNchwDimsNum)) {
-    return PARAM_INVALID;
+    return ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID;
   }
 
   auto n = src_shape.at(kNchwN);
@@ -74,7 +75,7 @@ Status TransShapeNchwToFz(const std::vector<int64_t> &src_shape, DataType data_t
 
 Status TransShapeHwcnToFz(const std::vector<int64_t> &src_shape, DataType data_type, std::vector<int64_t> &dst_shape) {
   if (!CheckShapeValid(src_shape, kHwcnDimsNum)) {
-    return PARAM_INVALID;
+    return ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID;
   }
 
   auto h = src_shape.at(kHwcnH);
@@ -87,7 +88,7 @@ Status TransShapeHwcnToFz(const std::vector<int64_t> &src_shape, DataType data_t
 
 Status TransShapeNhwcToFz(const std::vector<int64_t> &src_shape, DataType data_type, std::vector<int64_t> &dst_shape) {
   if (!CheckShapeValid(src_shape, kNhwcDimsNum)) {
-    return PARAM_INVALID;
+    return ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID;
   }
 
   auto n = src_shape.at(kNhwcN);
@@ -369,7 +370,7 @@ Status FormatTransferFractalZ::TransFormat(const TransArgs &args, TransResult &r
 Status FormatTransferFractalZ::TransShape(Format src_format, const std::vector<int64_t> &src_shape, DataType data_type,
                                           Format dst_format, std::vector<int64_t> &dst_shape) {
   if (CheckDataTypeSupport(data_type) != SUCCESS) {
-    return UNSUPPORTED;
+    return ACL_ERROR_GE_TRANSSHAPE_DATATYPE_INVALID;
   }
 
   if (src_format == FORMAT_NHWC && dst_format == FORMAT_FRACTAL_Z) {
@@ -382,7 +383,7 @@ Status FormatTransferFractalZ::TransShape(Format src_format, const std::vector<i
     return TransShapeNchwToFz(src_shape, data_type, dst_shape);
   }
 
-  return UNSUPPORTED;
+  return ACL_ERROR_GE_TRANSSHAPE_FORMAT_INVALID;
 }
 
 REGISTER_FORMAT_TRANSFER(FormatTransferFractalZ, FORMAT_NCHW, FORMAT_FRACTAL_Z)
diff --git a/ge/common/formats/format_transfers/format_transfer_fractal_zz.cc b/ge/common/formats/format_transfers/format_transfer_fractal_zz.cc
index 88603d5c..8a9e12b3 100755
--- a/ge/common/formats/format_transfers/format_transfer_fractal_zz.cc
+++ b/ge/common/formats/format_transfers/format_transfer_fractal_zz.cc
@@ -86,8 +86,9 @@ Status TransShapeToFracZz(const ShapeVector &src_shape, DataType data_type, Shap
       hw_shape.push_back(DIM_DEFAULT_VALUE);
       hw_shape.push_back(src_shape[kNdDimIndexN]);
       if (!IsShapeValid(dst_shape)) {
-        GELOGE(PARAM_INVALID, "Failed to check dst shape %s", ShapeToString(dst_shape).c_str());
-        return PARAM_INVALID;
+        GELOGE(ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID, "Failed to check dst shape %s",
+               ShapeToString(dst_shape).c_str());
+        return ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID;
       }
       return SUCCESS;
     default:
@@ -105,8 +106,9 @@ Status TransShapeToFracZz(const ShapeVector &src_shape, DataType data_type, Shap
       hw_shape.push_back(src_shape[size - kNdDimCountBackwardsWH]);
       hw_shape.push_back(src_shape[size - kNdDimCountBackwardsW]);
       if (!IsShapeValid(dst_shape)) {
-        GELOGE(PARAM_INVALID, "Failed to check dst shape %s", ShapeToString(dst_shape).c_str());
-        return PARAM_INVALID;
+        GELOGE(ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID, "Failed to check dst shape %s",
+               ShapeToString(dst_shape).c_str());
+        return ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID;
       }
       return SUCCESS;
   }
@@ -311,11 +313,19 @@ Status FormatTransferFractalZz::TransFormat(const TransArgs &args, TransResult &
 
 Status FormatTransferFractalZz::TransShape(Format src_format, const ShapeVector &src_shape, DataType data_type,
                                            Format dst_format, ShapeVector &dst_shape) {
-  if (!IsDataTypeSupport(data_type) || !CheckShape(src_format, src_shape)) {
-    GELOGE(PARAM_INVALID, "Not support trans format from %s to %s, src shape %s, data type %s",
+  if (!IsDataTypeSupport(data_type)) {
+    GELOGE(ACL_ERROR_GE_TRANSSHAPE_DATATYPE_INVALID,
+           "Not support trans format from %s to %s, src shape %s, data type %s",
            TypeUtils::FormatToSerialString(src_format).c_str(), TypeUtils::FormatToSerialString(dst_format).c_str(),
            ShapeToString(src_shape).c_str(), TypeUtils::DataTypeToSerialString(data_type).c_str());
-    return PARAM_INVALID;
+    return ACL_ERROR_GE_TRANSSHAPE_DATATYPE_INVALID;
+  }
+  if (!CheckShape(src_format, src_shape)) {
+    GELOGE(ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID,
+           "Not support trans format from %s to %s, src shape %s, data type %s",
+           TypeUtils::FormatToSerialString(src_format).c_str(), TypeUtils::FormatToSerialString(dst_format).c_str(),
+           ShapeToString(src_shape).c_str(), TypeUtils::DataTypeToSerialString(data_type).c_str());
+    return ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID;
   }
   ShapeVector hw_shape;
   return TransShapeToFracZz(src_shape, data_type, dst_shape, hw_shape);
@@ -346,7 +356,7 @@ Status FormatTransferFractalZzND::TransShape(Format src_format, const ShapeVecto
                                              Format dst_format, ShapeVector &dst_shape) {
   GELOGD("The shape derivation from %s to %s is not unique. Trans shape is not supported",
          TypeUtils::FormatToSerialString(src_format).c_str(), TypeUtils::FormatToSerialString(dst_format).c_str());
-  return UNSUPPORTED;
+  return ACL_ERROR_GE_TRANSSHAPE_FORMAT_INVALID;
 }
 
 REGISTER_FORMAT_TRANSFER(FormatTransferFractalZz, FORMAT_ND, FORMAT_FRACTAL_ZZ)
diff --git a/ge/common/formats/format_transfers/format_transfer_fracz_hwcn.cc b/ge/common/formats/format_transfers/format_transfer_fracz_hwcn.cc
index 43c16ee9..80164941 100755
--- a/ge/common/formats/format_transfers/format_transfer_fracz_hwcn.cc
+++ b/ge/common/formats/format_transfers/format_transfer_fracz_hwcn.cc
@@ -161,7 +161,7 @@ Status FormatTransferFracZHwcn::TransFormat(const TransArgs &args, TransResult &
 Status FormatTransferFracZHwcn::TransShape(Format src_format, const std::vector<int64_t> &src_shape, DataType data_type,
                                            Format dst_format, std::vector<int64_t> &dst_shape) {
   GELOGD("The shape derivation from FracZ to HWCN is not unique. Trans shape in this direction is not supported");
-  return UNSUPPORTED;
+  return ACL_ERROR_GE_TRANSSHAPE_FORMAT_INVALID;
 }
 
 REGISTER_FORMAT_TRANSFER(FormatTransferFracZHwcn, FORMAT_FRACTAL_Z, FORMAT_HWCN)
diff --git a/ge/common/formats/format_transfers/format_transfer_fracz_nchw.cc b/ge/common/formats/format_transfers/format_transfer_fracz_nchw.cc
index 915d0d76..90bf8fcb 100755
--- a/ge/common/formats/format_transfers/format_transfer_fracz_nchw.cc
+++ b/ge/common/formats/format_transfers/format_transfer_fracz_nchw.cc
@@ -160,7 +160,7 @@ Status FormatTransferFracZNchw::TransFormat(const TransArgs &args, TransResult &
 Status FormatTransferFracZNchw::TransShape(Format src_format, const std::vector<int64_t> &src_shape, DataType data_type,
                                            Format dst_format, std::vector<int64_t> &dst_shape) {
   GELOGD("The shape derivation from FracZ to NCHW is not unique. Trans shape in this direction is not supported");
-  return UNSUPPORTED;
+  return ACL_ERROR_GE_TRANSSHAPE_FORMAT_INVALID;
 }
 
 REGISTER_FORMAT_TRANSFER(FormatTransferFracZNchw, FORMAT_FRACTAL_Z, FORMAT_NCHW)
diff --git a/ge/common/formats/format_transfers/format_transfer_hwcn_c1hwncoc0.cc b/ge/common/formats/format_transfers/format_transfer_hwcn_c1hwncoc0.cc
index 7840b556..1e29baf2 100755
--- a/ge/common/formats/format_transfers/format_transfer_hwcn_c1hwncoc0.cc
+++ b/ge/common/formats/format_transfers/format_transfer_hwcn_c1hwncoc0.cc
@@ -43,8 +43,9 @@ Status TransShapeHwcnToC1hwncoc0(const DataType &data_type, const std::vector<in
   dst_shape.push_back(cube_size);
   dst_shape.push_back(cube_size);
   if (!CheckShapeValid(dst_shape, kC1hwncoc0DimsNum)) {
-    GELOGE(PARAM_INVALID, "Failed to check dst shape %s", ShapeToString(dst_shape).c_str());
-    return PARAM_INVALID;
+    GELOGE(ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID, "Failed to check dst shape %s",
+           ShapeToString(dst_shape).c_str());
+    return ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID;
   }
   return SUCCESS;
 }
@@ -197,12 +198,15 @@ Status FormatTransferHwcnC1hwncoc0::TransShape(Format src_format, const std::vec
                                                DataType data_type, Format dst_format, std::vector<int64_t> &dst_shape) {
   if (src_format == FORMAT_HWCN && CheckDataTypeSupported(data_type)) {
     if (!CheckShapeValid(src_shape, kHwcnDimsNum)) {
-      GELOGE(PARAM_INVALID, "Failed to check src shape %s", ShapeToString(src_shape).c_str());
-      return PARAM_INVALID;
+      GELOGE(ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID, "Failed to check src shape %s",
+             ShapeToString(src_shape).c_str());
+      return ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID;
     }
     return TransShapeHwcnToC1hwncoc0(data_type, src_shape, dst_shape);
+  } else if (src_format != FORMAT_HWCN) {
+    return ACL_ERROR_GE_TRANSSHAPE_FORMAT_INVALID;
   } else {
-    return UNSUPPORTED;
+    return ACL_ERROR_GE_TRANSSHAPE_DATATYPE_INVALID;
   }
 }
 
diff --git a/ge/common/formats/format_transfers/format_transfer_nc1hwc0_nhwc.cc b/ge/common/formats/format_transfers/format_transfer_nc1hwc0_nhwc.cc
index a37ba2b5..fd09b34c 100755
--- a/ge/common/formats/format_transfers/format_transfer_nc1hwc0_nhwc.cc
+++ b/ge/common/formats/format_transfers/format_transfer_nc1hwc0_nhwc.cc
@@ -157,7 +157,7 @@ Status FormatTransferNc1hwc0Nhwc::TransFormat(const TransArgs &args, TransResult
 Status FormatTransferNc1hwc0Nhwc::TransShape(Format src_format, const std::vector<int64_t> &src_shape,
                                              DataType data_type, Format dst_format, std::vector<int64_t> &dst_shape) {
   GELOGD("The shape derivation from NC1HWC0 to NHWC is not unique. Trans shape in this direction is not supported");
-  return UNSUPPORTED;
+  return ACL_ERROR_GE_TRANSSHAPE_FORMAT_INVALID;
 }
 
 REGISTER_FORMAT_TRANSFER(FormatTransferNc1hwc0Nhwc, FORMAT_NC1HWC0, FORMAT_NHWC)
diff --git a/ge/common/formats/format_transfers/format_transfer_nchw_fz_c04.cc b/ge/common/formats/format_transfers/format_transfer_nchw_fz_c04.cc
index 49b19f46..dd8721c0 100644
--- a/ge/common/formats/format_transfers/format_transfer_nchw_fz_c04.cc
+++ b/ge/common/formats/format_transfers/format_transfer_nchw_fz_c04.cc
@@ -45,7 +45,7 @@ Status CheckDataTypeSupport(DataType data_type) { return GetSizeByDataType(data_
 Status TransShape(int64_t n, int64_t c, int64_t h, int64_t w, DataType data_type, std::vector<int64_t> &dst_shape) {
   auto c0 = GetCubeSizeByDataType(data_type);
   if (c0 < 0) {
-    return UNSUPPORTED;
+    return ACL_ERROR_GE_TRANSSHAPE_DATATYPE_INVALID;
   }
   auto chw = c * h * w;
 
@@ -59,8 +59,9 @@ Status TransShape(int64_t n, int64_t c, int64_t h, int64_t w, DataType data_type
   dst_shape.push_back(c0);
 
   if (!IsShapeValid(dst_shape)) {
-    GELOGE(PARAM_INVALID, "Failed to check dst shape %s", ShapeToString(dst_shape).c_str());
-    return PARAM_INVALID;
+    GELOGE(ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID, "Failed to check dst shape %s",
+           ShapeToString(dst_shape).c_str());
+    return ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID;
   }
   return SUCCESS;
 }
@@ -68,7 +69,7 @@ Status TransShape(int64_t n, int64_t c, int64_t h, int64_t w, DataType data_type
 Status TransShapeNchwToFzC04(const std::vector<int64_t> &src_shape, DataType data_type,
                              std::vector<int64_t> &dst_shape) {
   if (!CheckShapeValid(src_shape, kNchwDimsNum)) {
-    return PARAM_INVALID;
+    return ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID;
   }
 
   auto n = src_shape.at(kNchwN);
@@ -293,13 +294,13 @@ Status FormatTransferNchwToFZC04::TransFormat(const TransArgs &args, TransResult
 Status FormatTransferNchwToFZC04::TransShape(Format src_format, const std::vector<int64_t> &src_shape,
                                              DataType data_type, Format dst_format, std::vector<int64_t> &dst_shape) {
   if (CheckDataTypeSupport(data_type) != SUCCESS) {
-    return UNSUPPORTED;
+    return ACL_ERROR_GE_TRANSSHAPE_DATATYPE_INVALID;
   }
   if (src_format == FORMAT_NCHW && dst_format == FORMAT_FRACTAL_Z_C04) {
     return TransShapeNchwToFzC04(src_shape, data_type, dst_shape);
   }
 
-  return UNSUPPORTED;
+  return ACL_ERROR_GE_TRANSSHAPE_FORMAT_INVALID;
 }
 
 REGISTER_FORMAT_TRANSFER(FormatTransferNchwToFZC04, FORMAT_NCHW, FORMAT_FRACTAL_Z_C04)
diff --git a/ge/common/formats/format_transfers/format_transfer_nchw_nc1hwc0.cc b/ge/common/formats/format_transfers/format_transfer_nchw_nc1hwc0.cc
index 98af1efa..752a4d64 100755
--- a/ge/common/formats/format_transfers/format_transfer_nchw_nc1hwc0.cc
+++ b/ge/common/formats/format_transfers/format_transfer_nchw_nc1hwc0.cc
@@ -32,12 +32,13 @@ Status TransShapeNchwToNc1hwc0(const std::vector<int64_t> &src_shape, DataType d
                                std::vector<int64_t> &dst_shape) {
   int64_t c0 = GetCubeSizeByDataType(data_type);
   if (c0 <= 0) {
-    GELOGE(PARAM_INVALID, "Failed to get cube size, the data type is invalid");
-    return PARAM_INVALID;
+    GELOGE(ACL_ERROR_GE_TRANSSHAPE_DATATYPE_INVALID, "Failed to get cube size, the data type is invalid");
+    return ACL_ERROR_GE_TRANSSHAPE_DATATYPE_INVALID;
   }
   if (!CheckShapeValid(src_shape, kNchwDimsNum)) {
-    GELOGE(PARAM_INVALID, "Failed to check src shape %s", ShapeToString(src_shape).c_str());
-    return PARAM_INVALID;
+    GELOGE(ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID, "Failed to check src shape %s",
+           ShapeToString(src_shape).c_str());
+    return ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID;
   }
   dst_shape.clear();
   dst_shape.push_back(src_shape.at(kNchwN));
@@ -46,8 +47,9 @@ Status TransShapeNchwToNc1hwc0(const std::vector<int64_t> &src_shape, DataType d
   dst_shape.push_back(src_shape.at(kNchwW));
   dst_shape.push_back(c0);
   if (!CheckShapeValid(dst_shape, kNc1hwc0DimsNum)) {
-    GELOGE(PARAM_INVALID, "Failed to check dst shape %s", ShapeToString(dst_shape).c_str());
-    return PARAM_INVALID;
+    GELOGE(ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID, "Failed to check dst shape %s",
+           ShapeToString(dst_shape).c_str());
+    return ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID;
   }
   return SUCCESS;
 }
@@ -193,7 +195,7 @@ Status FormatTransferNchwNc1hwc0::TransShape(Format src_format, const std::vecto
   if (src_format == FORMAT_NCHW) {
     return TransShapeNchwToNc1hwc0(src_shape, data_type, dst_shape);
   } else {
-    return UNSUPPORTED;
+    return ACL_ERROR_GE_TRANSSHAPE_FORMAT_INVALID;
   }
 }
 
diff --git a/ge/common/formats/format_transfers/format_transfer_nhwc_nc1hwc0.cc b/ge/common/formats/format_transfers/format_transfer_nhwc_nc1hwc0.cc
index 8faaf4e7..2c6b392d 100755
--- a/ge/common/formats/format_transfers/format_transfer_nhwc_nc1hwc0.cc
+++ b/ge/common/formats/format_transfers/format_transfer_nhwc_nc1hwc0.cc
@@ -34,8 +34,8 @@ Status TransShapeNhwcToNc1hwc0(const std::vector<int64_t> &src_shape, DataType d
                                std::vector<int64_t> &dst_shape) {
   int64_t c0 = GetCubeSizeByDataType(data_type);
   if (c0 <= 0) {
-    GELOGE(PARAM_INVALID, "Failed to get cube size, the data type is invalid");
-    return PARAM_INVALID;
+    GELOGE(ACL_ERROR_GE_TRANSSHAPE_DATATYPE_INVALID, "Failed to get cube size, the data type is invalid");
+    return ACL_ERROR_GE_TRANSSHAPE_DATATYPE_INVALID;
   }
   dst_shape.clear();
   dst_shape.push_back(src_shape.at(kNhwcN));
@@ -44,8 +44,9 @@ Status TransShapeNhwcToNc1hwc0(const std::vector<int64_t> &src_shape, DataType d
   dst_shape.push_back(src_shape.at(kNhwcW));
   dst_shape.push_back(c0);
   if (!CheckShapeValid(dst_shape, kNc1hwc0DimsNum)) {
-    GELOGE(PARAM_INVALID, "Failed to check dst shape %s", ShapeToString(dst_shape).c_str());
-    return PARAM_INVALID;
+    GELOGE(ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID, "Failed to check dst shape %s",
+           ShapeToString(dst_shape).c_str());
+    return ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID;
   }
   return SUCCESS;
 }
@@ -189,12 +190,15 @@ Status FormatTransferNhwcNc1hwc0::TransShape(Format src_format, const std::vecto
                                              DataType data_type, Format dst_format, std::vector<int64_t> &dst_shape) {
   if (src_format == FORMAT_NHWC && CheckDataTypeSupported(data_type)) {
     if (!CheckShapeValid(src_shape, kNhwcDimsNum)) {
-      GELOGE(PARAM_INVALID, "Failed to check src shape %s", ShapeToString(src_shape).c_str());
-      return PARAM_INVALID;
+      GELOGE(ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID, "Failed to check src shape %s",
+             ShapeToString(src_shape).c_str());
+      return ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID;
     }
     return TransShapeNhwcToNc1hwc0(src_shape, data_type, dst_shape);
+  } else if (src_format != FORMAT_NHWC) {
+    return ACL_ERROR_GE_TRANSSHAPE_FORMAT_INVALID;
   } else {
-    return UNSUPPORTED;
+    return ACL_ERROR_GE_TRANSSHAPE_DATATYPE_INVALID;
   }
 }
 
diff --git a/ge/common/formats/format_transfers/format_transfer_transpose.cc b/ge/common/formats/format_transfers/format_transfer_transpose.cc
index 9be74b1f..de0b456c 100755
--- a/ge/common/formats/format_transfers/format_transfer_transpose.cc
+++ b/ge/common/formats/format_transfers/format_transfer_transpose.cc
@@ -211,16 +211,16 @@ Status GetPermByForamt(Format src_format, Format dst_format, std::vector<int64_t
     std::string error = "Failed to trans shape, do not support transpose from format " +
         FmtToStr(TypeUtils::FormatToSerialString(src_format)) + " to " +
         FmtToStr(TypeUtils::FormatToSerialString(dst_format));
-    GE_ERRORLOG_AND_ERRORMSG(UNSUPPORTED, error.c_str());
-    return UNSUPPORTED;
+    GE_ERRORLOG_AND_ERRORMSG(ACL_ERROR_GE_TRANSSHAPE_FORMAT_INVALID, error.c_str());
+    return ACL_ERROR_GE_TRANSSHAPE_FORMAT_INVALID;
   }
   auto iter = dst_iter->second.find(dst_format);
   if (iter == dst_iter->second.end()) {
     std::string error = "Failed to trans shape, do not support transpose from format " +
         FmtToStr(TypeUtils::FormatToSerialString(src_format)) + " to " +
         FmtToStr(TypeUtils::FormatToSerialString(dst_format));
-    GE_ERRORLOG_AND_ERRORMSG(UNSUPPORTED, error.c_str());
-    return UNSUPPORTED;
+    GE_ERRORLOG_AND_ERRORMSG(ACL_ERROR_GE_TRANSSHAPE_FORMAT_INVALID, error.c_str());
+    return ACL_ERROR_GE_TRANSSHAPE_FORMAT_INVALID;
   }
   perm = iter->second;
   return SUCCESS;
@@ -244,7 +244,7 @@ Status FormatTransferTranspose::TransShape(Format src_format, const std::vector<
   std::vector<int64_t> perm_arg;
   GE_CHK_STATUS_RET_NOLOG(GetPermByForamt(src_format, dst_format, perm_arg));
   if (!IsShapeArgValid(src_shape, perm_arg)) {
-    return PARAM_INVALID;
+    return ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID;
   }
   dst_shape = TransShapeByPerm(src_shape, perm_arg);
   return SUCCESS;
diff --git a/ge/common/formats/formats.cc b/ge/common/formats/formats.cc
index 0b21a884..2b979e9a 100755
--- a/ge/common/formats/formats.cc
+++ b/ge/common/formats/formats.cc
@@ -64,8 +64,8 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY Status TransShape(Format src_form
     std::string error = "Failed to trans data from format " +
         FmtToStr(TypeUtils::FormatToSerialString(args.src_format)) + " to " +
         FmtToStr(TypeUtils::FormatToSerialString(args.dst_format));
-    GE_ERRORLOG_AND_ERRORMSG(UNSUPPORTED, error.c_str());
-    return UNSUPPORTED;
+    GE_ERRORLOG_AND_ERRORMSG(ACL_ERROR_GE_TRANSSHAPE_FORMAT_INVALID, error.c_str());
+    return ACL_ERROR_GE_TRANSSHAPE_FORMAT_INVALID;
   }
 
   return transfer->TransShape(src_format, src_shape, data_type, dst_format, dst_shape);
diff --git a/ge/common/formats/utils/formats_trans_utils.cc b/ge/common/formats/utils/formats_trans_utils.cc
index 18f2d70f..052951ce 100755
--- a/ge/common/formats/utils/formats_trans_utils.cc
+++ b/ge/common/formats/utils/formats_trans_utils.cc
@@ -32,7 +32,7 @@ int64_t GetCubeSizeByDataType(DataType data_type) {
   if (size <= 0) {
     std::string error = "Failed to get cube size, the data type " +
         FmtToStr(TypeUtils::DataTypeToSerialString(data_type)) + " is invalid";
-    GE_ERRORLOG_AND_ERRORMSG(PARAM_INVALID, error.c_str());
+    GE_WARNINGLOG_AND_ERRORMSG(error.c_str());
     return -1;
   } else if (size == 1) {
     return kCubeSize * 2;  // 32 bytes cube size
@@ -61,7 +61,7 @@ bool CheckShapeValid(const std::vector<int64_t> &shape, const int64_t expect_dim
   if (expect_dims <= 0 || shape.size() != static_cast<size_t>(expect_dims)) {
     std::string error = "Invalid shape, dims num " + FmtToStr(shape.size()) +
         ", expect " + FmtToStr(expect_dims);
-    GE_ERRORLOG_AND_ERRORMSG(PARAM_INVALID, error.c_str());
+    GE_WARNINGLOG_AND_ERRORMSG(error.c_str());
     return false;
   }
   return IsShapeValid(shape);
@@ -75,12 +75,12 @@ bool IsShapeValid(const std::vector<int64_t> &shape) {
   for (auto dim : shape) {
     if (dim < 0) {
       std::string error = "Invalid negative dims in the shape " +  FmtToStr(ShapeToString(shape));
-      GE_ERRORLOG_AND_ERRORMSG(PARAM_INVALID, error.c_str());
+      GE_WARNINGLOG_AND_ERRORMSG(error.c_str());
       return false;
     }
     if (dim != 0 && kShapeItemNumMAX / dim < num) {
       std::string error = "Shape overflow, the total count should be less than " + FmtToStr(kShapeItemNumMAX);
-      GE_ERRORLOG_AND_ERRORMSG(PARAM_INVALID, error.c_str());
+      GE_WARNINGLOG_AND_ERRORMSG(error.c_str());
       return false;
     }
     num *= dim;
@@ -108,7 +108,7 @@ bool IsTransShapeSrcCorrect(const TransArgs &args, std::vector<int64_t> &expect_
         FmtToStr(TypeUtils::FormatToSerialString(args.dst_format)) + ", invalid relationship between src shape " +
         FmtToStr(ShapeToString(args.src_shape)) + " and dst " +
         FmtToStr(ShapeToString(args.dst_shape));
-    GE_ERRORLOG_AND_ERRORMSG(PARAM_INVALID, error.c_str());
+    GE_WARNINGLOG_AND_ERRORMSG(error.c_str());
     return false;
   }
   return true;
@@ -121,7 +121,7 @@ bool IsTransShapeDstCorrect(const TransArgs &args, std::vector<int64_t> &expect_
         FmtToStr(TypeUtils::FormatToSerialString(args.dst_format)) + ", the dst shape" +
         FmtToStr(ShapeToString(args.dst_shape)) + " is invalid, expect" +
         FmtToStr(ShapeToString(expect_shape));
-    GE_ERRORLOG_AND_ERRORMSG(PARAM_INVALID, error.c_str());
+    GE_WARNINGLOG_AND_ERRORMSG(error.c_str());
     return false;
   }
   return true;
diff --git a/ge/common/ge/plugin_manager.cc b/ge/common/ge/plugin_manager.cc
index 75a36d99..38de251e 100644
--- a/ge/common/ge/plugin_manager.cc
+++ b/ge/common/ge/plugin_manager.cc
@@ -93,7 +93,7 @@ Status PluginManager::LoadSo(const string &path, const vector<string> &func_chec
   std::vector<std::string> path_vec;
   SplitPath(path, path_vec);
   for (const auto &single_path : path_vec) {
-    GE_IF_BOOL_EXEC(single_path.length() >= MMPA_MAX_PATH, GELOGE(GE_PLGMGR_PATH_INVALID,
+    GE_IF_BOOL_EXEC(single_path.length() >= MMPA_MAX_PATH, GELOGE(ACL_ERROR_GE_PLGMGR_PATH_INVALID,
                     "The shared library file path is too long!");
                     continue);
     // load break when number of loaded so reach maximum
@@ -125,7 +125,8 @@ Status PluginManager::LoadSo(const string &path, const vector<string> &func_chec
       GE_IF_BOOL_EXEC(error == nullptr, error = "");
       ErrorManager::GetInstance().ATCReportErrMessage("E19012", {"function", "reason"},
           {"mmDlopen", "shared library path is " + FmtToStr(file_path_dlopen) + ". Errormessage" + FmtToStr(error)});
-      GELOGE(GE_PLGMGR_PATH_INVALID, "Failed to dlopen the shared library path[%s]. Errormessage[%s]!",
+      GELOGE(ACL_ERROR_GE_PLGMGR_PATH_INVALID,
+             "Failed to dlopen the shared library path[%s]. Errormessage[%s]!",
              file_path_dlopen.c_str(), error);
       continue;
     }
@@ -138,8 +139,8 @@ Status PluginManager::LoadSo(const string &path, const vector<string> &func_chec
         ErrorManager::GetInstance().ATCReportErrMessage("E19012", {"function", "reason"},
             {"mmDlsym", FmtToStr(func_name) + " is skipped since function" +
             FmtToStr(func_name) + " is not existed!"});
-        GELOGE(GE_PLGMGR_PATH_INVALID, "%s is skipped since function %s is not existed!", func_name.c_str(),
-               func_name.c_str());
+        GELOGE(ACL_ERROR_GE_PLGMGR_PATH_INVALID, "%s is skipped since function %s is not existed!",
+               func_name.c_str(), func_name.c_str());
         is_valid = false;
         break;
       }
diff --git a/ge/common/helper/model_cache_helper.cc b/ge/common/helper/model_cache_helper.cc
index 0b592e11..bf8c3ce0 100755
--- a/ge/common/helper/model_cache_helper.cc
+++ b/ge/common/helper/model_cache_helper.cc
@@ -28,7 +28,7 @@
 #include "framework/common/util.h"
 #include "graph/detail/attributes_holder.h"
 #include "graph/detail/model_serialize_imp.h"
-#include "graph/load/new_model_manager/davinci_model_parser.h"
+#include "graph/load/model_manager/davinci_model_parser.h"
 #include "graph/model.h"
 #include "graph/utils/graph_utils.h"
 #include "graph/utils/tensor_utils.h"
@@ -1000,8 +1000,8 @@ Status ModelCacheHelper::RecoverVarAddrAndTensorDesc(const Json &json) const {
       auto offset = (tensor_addr_mgr.offset);
       // Check logic address and offset
       if (logic_address - offset != VarManager::Instance(session_id_)->GetVarMemLogicBase()) {
-        GELOGW("Check logic_address[%u] and offset [%u] of %s failed, var mem logic base is %u, abandon", logic_address,
-               offset, iter.first.c_str(), VarManager::Instance(session_id_)->GetVarMemLogicBase());
+        GELOGW("Check logic_address[%lu] and offset [%lu] of %s failed, var mem logic base is %lu, abandon",
+               logic_address, offset, iter.first.c_str(), VarManager::Instance(session_id_)->GetVarMemLogicBase());
         return PARAM_INVALID;
       }
       // Offset is needed by SaveVarVddr instead of logic address
diff --git a/ge/common/helper/model_helper.cc b/ge/common/helper/model_helper.cc
index 1d5a4a9b..37cb53bc 100644
--- a/ge/common/helper/model_helper.cc
+++ b/ge/common/helper/model_helper.cc
@@ -23,7 +23,7 @@
 #include "framework/common/debug/ge_log.h"
 #include "framework/omg/version.h"
 #include "graph/debug/ge_attr_define.h"
-#include "graph/load/new_model_manager/davinci_model_parser.h"
+#include "graph/load/model_manager/davinci_model_parser.h"
 #include "graph/utils/attr_utils.h"
 #include "graph/utils/graph_utils.h"
 
@@ -479,8 +479,8 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelHelper::LoadModel(c
 
   Status status = ge::DavinciModelParser::ParseModelContent(model_data, model_addr_tmp_, model_len_tmp_);
   if (status != SUCCESS) {
-    GELOGE(status, "Parse model content failed!");
-    return status;
+    GELOGE(ACL_ERROR_GE_PARAM_INVALID, "Parse model content failed!");
+    return ACL_ERROR_GE_PARAM_INVALID;
   }
 
   file_header_ = reinterpret_cast<ModelFileHeader *>(model_data.model_data);
@@ -517,8 +517,8 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelHelper::LoadRootMod
   }
 
   if (is_assign_model_) {
-    GELOGE(GE_EXEC_LOAD_MODEL_REPEATED, "Model helper has already loaded!");
-    return GE_EXEC_LOAD_MODEL_REPEATED;
+    GELOGE(ACL_ERROR_GE_EXEC_LOAD_MODEL_REPEATED, "Model helper has already loaded!");
+    return ACL_ERROR_GE_EXEC_LOAD_MODEL_REPEATED;
   }
 
   if (ReleaseLocalModelData() != SUCCESS) {
@@ -528,8 +528,8 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelHelper::LoadRootMod
 
   Status status = ge::DavinciModelParser::ParseModelContent(model_data, model_addr_tmp_, model_len_tmp_);
   if (status != SUCCESS) {
-    GELOGE(status, "Parse model content failed!");
-    return status;
+    GELOGE(ACL_ERROR_GE_PARAM_INVALID, "Parse model content failed!");
+    return ACL_ERROR_GE_PARAM_INVALID;
   }
 
   file_header_ = reinterpret_cast<ModelFileHeader *>(model_data.model_data);
@@ -537,7 +537,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelHelper::LoadRootMod
   //model verison 1.0 file header does not have model_num member
   is_unknown_shape_model_ = file_header_->version >= ge::MODEL_VERSION &&
                             file_header_->model_num > kStatiOmFileModelNum;
-  GELOGD("cur om model is ge root model or no %d, model version %zu", is_unknown_shape_model_, file_header_->version);
+  GELOGD("cur om model is ge root model or no %d, model version %u", is_unknown_shape_model_, file_header_->version);
 
   OmFileLoadHelper om_load_helper;
   if (is_unknown_shape_model_) {
@@ -609,7 +609,7 @@ Status ModelHelper::GenerateGeRootModel(OmFileLoadHelper &om_load_helper) {
     GeModelPtr cur_model = ge::MakeShared<ge::GeModel>();
     Status ret = LoadModelData(om_load_helper, cur_model, mode_index);
     if (ret != SUCCESS) {
-      return GE_EXEC_LOAD_MODEL_PARTITION_FAILED;
+      return ACL_ERROR_GE_EXEC_LOAD_MODEL_PARTITION_FAILED;
     }
 
     if (is_first_model) {
@@ -622,22 +622,22 @@ Status ModelHelper::GenerateGeRootModel(OmFileLoadHelper &om_load_helper) {
 
     ret = LoadWeights(om_load_helper, cur_model, mode_index);
     if (ret != SUCCESS) {
-      return GE_EXEC_LOAD_WEIGHT_PARTITION_FAILED;
+      return ACL_ERROR_GE_EXEC_LOAD_WEIGHT_PARTITION_FAILED;
     }
 
     ret = LoadTBEKernelStore(om_load_helper, cur_model, mode_index);
     if (ret != SUCCESS) {
-      return GE_EXEC_LOAD_KERNEL_PARTITION_FAILED;
+      return ACL_ERROR_GE_EXEC_LOAD_KERNEL_PARTITION_FAILED;
     }
 
     ret = LoadCustAICPUKernelStore(om_load_helper, cur_model, mode_index);
     if (ret != SUCCESS) {
-      return GE_EXEC_LOAD_KERNEL_PARTITION_FAILED;
+      return ACL_ERROR_GE_EXEC_LOAD_KERNEL_PARTITION_FAILED;
     }
 
     ret = LoadTask(om_load_helper, cur_model, mode_index);
     if (ret != SUCCESS) {
-      return GE_EXEC_LOAD_TASK_PARTITION_FAILED;
+      return ACL_ERROR_GE_EXEC_LOAD_TASK_PARTITION_FAILED;
     }
     root_model_->SetSubgraphInstanceNameToModel(cur_model->GetName(), cur_model);
   }
@@ -746,7 +746,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelHelper::LoadTask(Om
       GELOGE(INTERNAL_ERROR, "ReadProtoFromArray failed.");
       return INTERNAL_ERROR;
     }
-    GELOGD("TASK_INFO op_size:%zu, stream_num:%u", task->op().size(), task->stream_num());
+    GELOGD("TASK_INFO op_size:%d, stream_num:%u", task->op().size(), task->stream_num());
   }
   cur_model->SetModelTaskDef(task);
   return SUCCESS;
diff --git a/ge/common/helper/om_file_helper.cc b/ge/common/helper/om_file_helper.cc
index d1c52b13..b42aa759 100644
--- a/ge/common/helper/om_file_helper.cc
+++ b/ge/common/helper/om_file_helper.cc
@@ -203,7 +203,7 @@ Status OmFileLoadHelper::LoadModelPartitionTable(uint8_t *model_data, uint32_t m
     auto partition_table = reinterpret_cast<ModelPartitionTable *>(model_data + cur_offset);
     size_t partition_table_size = SIZE_OF_MODEL_PARTITION_TABLE(*partition_table);
     cur_offset += partition_table_size;
-    GELOGD("Cur model index %zu: ModelPartitionTable num :%u, "
+    GELOGD("Cur model index %u: ModelPartitionTable num :%u, "
            "ModelFileHeader length :%zu, ModelPartitionTable length :%zu",
            index, partition_table->num, sizeof(ModelFileHeader), partition_table_size);
     if (model_data_size <= cur_offset) {
@@ -219,7 +219,7 @@ Status OmFileLoadHelper::LoadModelPartitionTable(uint8_t *model_data, uint32_t m
       partition.type = partition_table->partition[i].type;
       if (index >= model_contexts_.size()) {
         if (index != model_contexts_.size()) {
-          GELOGE(FAILED, "cur index is %zu make model_contexts_ overflow", index);
+          GELOGE(FAILED, "cur index is %u make model_contexts_ overflow", index);
           return FAILED;
         }
 
@@ -231,16 +231,16 @@ Status OmFileLoadHelper::LoadModelPartitionTable(uint8_t *model_data, uint32_t m
       }
 
       if (partition.size > model_data_size || cur_offset > model_data_size - partition.size) {
-        GELOGE(GE_EXEC_MODEL_DATA_SIZE_INVALID, "The partition size %zu is greater than the model data size %u.",
+        GELOGE(GE_EXEC_MODEL_DATA_SIZE_INVALID, "The partition size %u is greater than the model data size %u.",
                partition.size + cur_offset, model_data_size);
         return GE_EXEC_MODEL_DATA_SIZE_INVALID;
       }
       cur_offset += partition.size;
-      GELOGD("Partition, type:%d, size:%u, model_index:%zu", static_cast<int>(partition.type), partition.size, index);
+      GELOGD("Partition, type:%d, size:%u, model_index:%u", static_cast<int>(partition.type), partition.size, index);
     }
   }
   if (cur_offset != model_data_size) {
-    GELOGE(FAILED, "do not get the complete model, read end offset:%zu, all size:%zu", cur_offset, model_data_size);
+    GELOGE(FAILED, "do not get the complete model, read end offset:%u, all size:%u", cur_offset, model_data_size);
     return FAILED;
   }
   return SUCCESS;
diff --git a/ge/common/kernel_store.cc b/ge/common/kernel_store.cc
index 0fad096a..d746fd10 100755
--- a/ge/common/kernel_store.cc
+++ b/ge/common/kernel_store.cc
@@ -51,7 +51,7 @@ bool KernelStore::Build() {
     kernel_head.name_len = static_cast<uint32_t>(kernel->GetName().length());
     kernel_head.bin_len = static_cast<uint32_t>(kernel->GetBinDataSize());
 
-    GELOGD("get kernel bin name %s, addr %p, size %u",
+    GELOGD("get kernel bin name %s, addr %p, size %zu",
            kernel->GetName().c_str(), kernel->GetBinData(), kernel->GetBinDataSize());
     mem_ret = memcpy_s(next_buffer, remain_len, &kernel_head, sizeof(kernel_head));
     GE_CHK_BOOL_EXEC_NOLOG(mem_ret == EOK, return false);
diff --git a/ge/common/math/math_util.h b/ge/common/math/math_util.h
index 3255e3c1..e077f4b5 100755
--- a/ge/common/math/math_util.h
+++ b/ge/common/math/math_util.h
@@ -878,11 +878,11 @@ inline Status CheckInt32DivOverflow(int32_t a, int32_t b) {
     return INTERNAL_ERROR;                                                                              \
   }
 
-#define FMK_INT64_UINT32_MULCHECK(a, b)                                                                                \
-  if (ge::CheckInt64Uint32MulOverflow((a), (b)) != SUCCESS) {                                                          \
-    GELOGW("Int64 %ld and UINT32 %u multiplication can result in overflow!", static_cast<uint32_t>(a), \
-           static_cast<uint32_t>(b));                                                                                  \
-    return INTERNAL_ERROR;                                                                                             \
+#define FMK_INT64_UINT32_MULCHECK(a, b)                                                                 \
+  if (ge::CheckInt64Uint32MulOverflow((a), (b)) != SUCCESS) {                                           \
+    GELOGW("Int64 %ld and Uint32 %u multiplication can result in overflow!", static_cast<int64_t>(a),   \
+           static_cast<uint32_t>(b));                                                                   \
+    return INTERNAL_ERROR;                                                                              \
   }
 
 #define FMK_FP16_ZEROCHECK(a)                                                                                          \
diff --git a/ge/common/model_parser/base.cc b/ge/common/model_parser/base.cc
index 64277199..22837be6 100644
--- a/ge/common/model_parser/base.cc
+++ b/ge/common/model_parser/base.cc
@@ -34,7 +34,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelParserBase::LoadFro
                                                                                       ge::ModelData &model_data) {
   std::string real_path = RealPath(model_path);
   if (real_path.empty()) {
-    GELOGE(GE_EXEC_MODEL_PATH_INVALID, "Model file path '%s' is invalid", model_path);
+    GELOGE(ACL_ERROR_GE_EXEC_MODEL_PATH_INVALID, "Model file path '%s' is invalid", model_path);
     return ACL_ERROR_GE_EXEC_MODEL_PATH_INVALID;
   }
 
diff --git a/ge/common/profiling/ge_profiling.cc b/ge/common/profiling/ge_profiling.cc
index 43ed6434..9060f82b 100644
--- a/ge/common/profiling/ge_profiling.cc
+++ b/ge/common/profiling/ge_profiling.cc
@@ -181,7 +181,7 @@ ge::Status ProfCommandHandle(ProfCommandHandleType type, void *data, uint32_t le
   if (type != kProfCommandhandleFinalize) {
     command.module_index = prof_config_param->profSwitch;
   }
-  GELOGI("GE commandhandle execute, Command Type: %s, data type config: 0x%llx", iter->second.c_str(),
+  GELOGI("GE commandhandle execute, Command Type: %s, data type config: 0x%lx", iter->second.c_str(),
          command.module_index);
   if (type == kProfCommandhandleStart || type == kProfCommandhandleStop) {
     GELOGI("Profiling device nums:%s , deviceID:[%s]", prof_params[0].c_str(), prof_params[kDeviceListIndex].c_str());
@@ -192,7 +192,7 @@ ge::Status ProfCommandHandle(ProfCommandHandleType type, void *data, uint32_t le
     return ge::FAILED;
   }
 
-  GELOGI("Successfully execute profiling command type: %d, command 0x%llx.", type, command.module_index);
+  GELOGI("Successfully execute profiling command type: %d, command 0x%lx.", type, command.module_index);
   return ge::SUCCESS;
 }
 
diff --git a/ge/common/profiling/profiling_manager.cc b/ge/common/profiling/profiling_manager.cc
index 1fc4dba6..86b1b2c5 100644
--- a/ge/common/profiling/profiling_manager.cc
+++ b/ge/common/profiling/profiling_manager.cc
@@ -21,7 +21,7 @@
 #include "framework/common/string_util.h"
 #include "graph/ge_context.h"
 #include "runtime/base.h"
-#include "graph/load/new_model_manager/davinci_model.h"
+#include "graph/load/model_manager/davinci_model.h"
 
 namespace {
 const char *const kTrainingTrace = "training_trace";
@@ -218,6 +218,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::Profilin
     uint32_t stream_id = task.stream_id;
     std::string shape_type = task.shape_type;
     int64_t cur_iter_num = task.cur_iter_num;
+    uint32_t task_type = task.task_type;
     data = model_name.append(" ")
                      .append(op_name).append(" ")
                      .append(std::to_string(block_dim)).append(" ")
@@ -225,7 +226,8 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::Profilin
                      .append(std::to_string(stream_id)).append(" ")
                      .append(std::to_string(model_id)).append(" ")
                      .append(shape_type).append(" ")
-                     .append(std::to_string(cur_iter_num)).append("\n");
+                     .append(std::to_string(cur_iter_num)).append(" ")
+                     .append(std::to_string(task_type)).append("\n");
 
     ReporterData reporter_data{};
     reporter_data.deviceId = device_id;
@@ -538,7 +540,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::ProfFi
   for (auto device_id_module : device_id_module_map_) {
     if (device_id_module.second != 0) {
       uint32_t device_id = static_cast<uint32_t>(device_id_module.first);
-      GELOGI("Prof finalize: device_id: %u, module: 0x%llx.", device_id, device_id_module.second);
+      GELOGI("Prof finalize: device_id: %u, module: 0x%lx.", device_id, device_id_module.second);
       rt_ret = rtProfilerStop(device_id_module.second, 1, &device_id);
       if (rt_ret != RT_ERROR_NONE) {
         GELOGE(FAILED, "Runtime profiler stop failed.");
@@ -627,7 +629,7 @@ Status ProfilingManager::ProfParseParam(const std::map<std::string, std::string>
   }
 
   if (device_num == 0 || device_num > kMaxDeviceNum || device_num != static_cast<int32_t>(device_list.size())) {
-    GELOGE(FAILED, "Config para device num: %d not equal to device list size: %d.", device_num, device_list.size());
+    GELOGE(FAILED, "Config para device num: %d not equal to device list size: %zu.", device_num, device_list.size());
     return FAILED;
   }
 #endif
@@ -657,7 +659,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::ProfSt
   for (int32_t i = 0; i < device_num; i++) {
     device_id_ptr[i] = static_cast<uint32_t>(device_list[i]);
   }
-  GELOGI("Runtime config param: 0x%llx, device num: %d.", module, device_num);
+  GELOGI("Runtime config param: 0x%lx, device num: %d.", module, device_num);
 
   rtError_t rt_ret = rtProfilerStart(module, device_num, device_id_ptr.get());
   if (rt_ret != RT_ERROR_NONE) {
@@ -699,7 +701,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::ProfSt
   for (int32_t i = 0; i < device_num; i++) {
     device_id_ptr[i] = static_cast<uint32_t>(device_list[i]);
   }
-  GELOGI("Prof stop: runtime config param: 0x%llx, device num: %d", module, device_num);
+  GELOGI("Prof stop: runtime config param: 0x%lx, device num: %d", module, device_num);
   rtError_t rt_ret = rtProfilerStop(module, device_num, device_id_ptr.get());
   if (rt_ret != RT_ERROR_NONE) {
     GELOGE(FAILED, "Prof stop: runtime profiler config proc failed.");
diff --git a/ge/common/proto/task.proto b/ge/common/proto/task.proto
index d0c09840..0da5631e 100644
--- a/ge/common/proto/task.proto
+++ b/ge/common/proto/task.proto
@@ -57,6 +57,7 @@ message TaskDef {
     LabelSetDef label_set = 37;
     LabelGotoExDef label_goto_ex = 38;
     LabelSwitchByIndexDef label_switch_by_index = 39;
+    KernelDefWithHandle kernel_with_handle = 40;
 }
 
 message KernelDef {
@@ -74,6 +75,19 @@ message KernelDef {
     uint32 kernel_ext_info_size = 19;
 }
 
+message KernelDefWithHandle {
+    KernelContext context = 1;
+
+    uint64 handle = 10;
+    string dev_func = 11;
+    uint32 block_dim = 12;
+    uint32 args_size = 13;
+    bytes args = 14;
+    bytes sm_desc = 15;
+    string original_kernel_key = 16;
+    string node_info = 17;
+}
+
 message KernelContext {
     uint32 kernel_type = 1;
     uint32 op_id = 2;                              // OP type in CCE
diff --git a/ge/common/types.cc b/ge/common/types.cc
index 268e7caa..90ff9fe4 100644
--- a/ge/common/types.cc
+++ b/ge/common/types.cc
@@ -388,6 +388,7 @@ REGISTER_OPTYPE_DEFINE(HCOMRECEIVE, "HcomReceive");
 REGISTER_OPTYPE_DEFINE(HCOMREMOTEREAD, "HcomRemoteRead");
 REGISTER_OPTYPE_DEFINE(HCOMREMOTEREFREAD, "HcomRemoteRefRead");
 REGISTER_OPTYPE_DEFINE(HCOMREMOTEWRITE, "HcomRemoteWrite");
+REGISTER_OPTYPE_DEFINE(HCOMREMOTESCATTERWRITE, "HcomRemoteScatterWrite");
 
 REGISTER_OPTYPE_DEFINE(VARASSIGN, "VarAssign");
 REGISTER_OPTYPE_DEFINE(VARISINITIALIZEDOP, "VarIsInitializedOp");
diff --git a/ge/executor/CMakeLists.txt b/ge/executor/CMakeLists.txt
index 755bdf97..26e53c7b 100644
--- a/ge/executor/CMakeLists.txt
+++ b/ge/executor/CMakeLists.txt
@@ -32,37 +32,37 @@ set(SRC_LIST
     "../hybrid/node_executor/aicpu/aicpu_ext_info.cc"
     "../model/ge_model.cc"
     "../model/ge_root_model.cc"
-    "../graph/load/new_model_manager/davinci_model.cc"
-    "../graph/load/new_model_manager/davinci_model_parser.cc"
-    "../graph/load/new_model_manager/model_manager.cc"
-    "../graph/load/new_model_manager/tbe_handle_store.cc"
-    "../graph/load/new_model_manager/cpu_queue_schedule.cc"
-    "../graph/load/new_model_manager/model_utils.cc"
-    "../graph/load/new_model_manager/aipp_utils.cc"
-    "../graph/load/new_model_manager/data_inputer.cc"
-    "../graph/load/new_model_manager/data_dumper.cc"
-    "../graph/load/new_model_manager/zero_copy_task.cc"
-    "../graph/load/new_model_manager/zero_copy_offset.cc"
-    "../graph/load/new_model_manager/task_info/task_info.cc"
-    "../graph/load/new_model_manager/task_info/event_record_task_info.cc"
-    "../graph/load/new_model_manager/task_info/event_wait_task_info.cc"
-    "../graph/load/new_model_manager/task_info/fusion_start_task_info.cc"
-    "../graph/load/new_model_manager/task_info/fusion_stop_task_info.cc"
-    "../graph/load/new_model_manager/task_info/kernel_ex_task_info.cc"
-    "../graph/load/new_model_manager/task_info/kernel_task_info.cc"
-    "../graph/load/new_model_manager/task_info/label_set_task_info.cc"
-    "../graph/load/new_model_manager/task_info/label_switch_by_index_task_info.cc"
-    "../graph/load/new_model_manager/task_info/label_goto_ex_task_info.cc"
-    "../graph/load/new_model_manager/task_info/memcpy_async_task_info.cc"
-    "../graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.cc"
-    "../graph/load/new_model_manager/task_info/profiler_trace_task_info.cc"
-    "../graph/load/new_model_manager/task_info/stream_active_task_info.cc"
-    "../graph/load/new_model_manager/task_info/stream_switch_task_info.cc"
-    "../graph/load/new_model_manager/task_info/stream_switchn_task_info.cc"
-    "../graph/load/new_model_manager/task_info/end_graph_task_info.cc"
-    "../graph/load/new_model_manager/task_info/model_exit_task_info.cc"
-    "../graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc"
-    "../graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc"
+    "../graph/load/model_manager/davinci_model.cc"
+    "../graph/load/model_manager/davinci_model_parser.cc"
+    "../graph/load/model_manager/model_manager.cc"
+    "../graph/load/model_manager/tbe_handle_store.cc"
+    "../graph/load/model_manager/cpu_queue_schedule.cc"
+    "../graph/load/model_manager/model_utils.cc"
+    "../graph/load/model_manager/aipp_utils.cc"
+    "../graph/load/model_manager/data_inputer.cc"
+    "../graph/load/model_manager/data_dumper.cc"
+    "../graph/load/model_manager/zero_copy_task.cc"
+    "../graph/load/model_manager/zero_copy_offset.cc"
+    "../graph/load/model_manager/task_info/task_info.cc"
+    "../graph/load/model_manager/task_info/event_record_task_info.cc"
+    "../graph/load/model_manager/task_info/event_wait_task_info.cc"
+    "../graph/load/model_manager/task_info/fusion_start_task_info.cc"
+    "../graph/load/model_manager/task_info/fusion_stop_task_info.cc"
+    "../graph/load/model_manager/task_info/kernel_ex_task_info.cc"
+    "../graph/load/model_manager/task_info/kernel_task_info.cc"
+    "../graph/load/model_manager/task_info/label_set_task_info.cc"
+    "../graph/load/model_manager/task_info/label_switch_by_index_task_info.cc"
+    "../graph/load/model_manager/task_info/label_goto_ex_task_info.cc"
+    "../graph/load/model_manager/task_info/memcpy_async_task_info.cc"
+    "../graph/load/model_manager/task_info/memcpy_addr_async_task_info.cc"
+    "../graph/load/model_manager/task_info/profiler_trace_task_info.cc"
+    "../graph/load/model_manager/task_info/stream_active_task_info.cc"
+    "../graph/load/model_manager/task_info/stream_switch_task_info.cc"
+    "../graph/load/model_manager/task_info/stream_switchn_task_info.cc"
+    "../graph/load/model_manager/task_info/end_graph_task_info.cc"
+    "../graph/load/model_manager/task_info/model_exit_task_info.cc"
+    "../graph/load/model_manager/task_info/super_kernel/super_kernel_factory.cc"
+    "../graph/load/model_manager/task_info/super_kernel/super_kernel.cc"
     "../graph/common/local_context.cc"
     "../opskernel_manager/ops_kernel_builder_manager.cc"
     "../single_op/single_op_manager.cc"
@@ -104,6 +104,7 @@ set(SRC_LIST
     "../hybrid/node_executor/host_cpu/kernel/variable_kernel.cc"
     "../hybrid/node_executor/host_cpu/kernel/assign_kernel.cc"
     "../hybrid/node_executor/host_cpu/kernel/random_uniform_kernel.cc"
+    "../hybrid/node_executor/host_cpu/kernel/data_kernel.cc"
     "../hybrid/node_executor/controlop/control_op_executor.cc"
     "../hybrid/node_executor/partitioned_call/partitioned_call_node_executor.cc"
     "../hybrid/node_executor/rts/rts_node_executor.cc"
diff --git a/ge/executor/ge_executor.cc b/ge/executor/ge_executor.cc
index 0ea0e66d..af8237e0 100755
--- a/ge/executor/ge_executor.cc
+++ b/ge/executor/ge_executor.cc
@@ -29,15 +29,15 @@
 #include "framework/common/util.h"
 #include "graph/execute/graph_execute.h"
 #include "graph/load/graph_loader.h"
-#include "graph/load/new_model_manager/davinci_model_parser.h"
-#include "graph/load/new_model_manager/model_manager.h"
+#include "graph/load/model_manager/davinci_model_parser.h"
+#include "graph/load/model_manager/model_manager.h"
 #include "graph/manager/graph_mem_allocator.h"
 #include "graph/model.h"
 #include "graph/utils/graph_utils.h"
 #include "mmpa/mmpa_api.h"
 #include "single_op/single_op_manager.h"
 #include "graph/manager/graph_var_manager.h"
-#include "graph/load/new_model_manager/davinci_model.h"
+#include "graph/load/model_manager/davinci_model.h"
 #include "opskernel_manager/ops_kernel_builder_manager.h"
 
 using std::string;
@@ -226,7 +226,7 @@ Status GeExecutor::Initialize() {
   }
 
   GE_CHK_STATUS_RET(OpsKernelBuilderManager::Instance().Initialize({}, false),
-                    "Failed to initialize OpsKernelBuilders");
+                    "Failed to initialize OpsKernelBuilders.");
 
   // Start profiling
   Options profiling_options;
@@ -454,7 +454,7 @@ Status GeExecutor::GetCurDynamicDims(uint32_t model_id, const vector<uint64_t> &
     if (all_data_dims[i] < 0) {
       cur_dynamic_dims.push_back(dynamic_dims[i]);
     } else if (static_cast<uint64_t>(all_data_dims[i]) != dynamic_dims[i]) {
-      GELOGE(ACL_ERROR_GE_DYNAMIC_INPUT_LENGTH_INVALID, "Static dims should be same, index: %zu value: %d should be %d",
+      GELOGE(ACL_ERROR_GE_DYNAMIC_INPUT_LENGTH_INVALID, "Static dims should be same, index: %zu value: %lu should be %ld",
              i, dynamic_dims[i], all_data_dims[i]);
       return ACL_ERROR_GE_DYNAMIC_INPUT_LENGTH_INVALID;
     }
diff --git a/ge/executor/module.mk b/ge/executor/module.mk
index 87abdade..4966eeb5 100644
--- a/ge/executor/module.mk
+++ b/ge/executor/module.mk
@@ -22,37 +22,37 @@ local_ge_executor_src_files :=  \
     ../graph/manager/util/debug.cc \
     ../model/ge_model.cc \
     ../model/ge_root_model.cc \
-    ../graph/load/new_model_manager/davinci_model.cc \
-    ../graph/load/new_model_manager/davinci_model_parser.cc \
-    ../graph/load/new_model_manager/model_manager.cc \
-    ../graph/load/new_model_manager/tbe_handle_store.cc \
-    ../graph/load/new_model_manager/cpu_queue_schedule.cc \
-    ../graph/load/new_model_manager/model_utils.cc \
-    ../graph/load/new_model_manager/aipp_utils.cc \
-    ../graph/load/new_model_manager/data_inputer.cc \
-    ../graph/load/new_model_manager/data_dumper.cc \
-    ../graph/load/new_model_manager/zero_copy_task.cc \
-    ../graph/load/new_model_manager/zero_copy_offset.cc \
-    ../graph/load/new_model_manager/task_info/task_info.cc                  \
-    ../graph/load/new_model_manager/task_info/event_record_task_info.cc     \
-    ../graph/load/new_model_manager/task_info/event_wait_task_info.cc       \
-    ../graph/load/new_model_manager/task_info/fusion_start_task_info.cc     \
-    ../graph/load/new_model_manager/task_info/fusion_stop_task_info.cc      \
-    ../graph/load/new_model_manager/task_info/kernel_ex_task_info.cc        \
-    ../graph/load/new_model_manager/task_info/kernel_task_info.cc           \
-    ../graph/load/new_model_manager/task_info/label_set_task_info.cc        \
-    ../graph/load/new_model_manager/task_info/label_switch_by_index_task_info.cc \
-    ../graph/load/new_model_manager/task_info/label_goto_ex_task_info.cc    \
-    ../graph/load/new_model_manager/task_info/memcpy_async_task_info.cc     \
-    ../graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.cc \
-    ../graph/load/new_model_manager/task_info/profiler_trace_task_info.cc   \
-    ../graph/load/new_model_manager/task_info/stream_active_task_info.cc    \
-    ../graph/load/new_model_manager/task_info/stream_switch_task_info.cc    \
-    ../graph/load/new_model_manager/task_info/stream_switchn_task_info.cc   \
-    ../graph/load/new_model_manager/task_info/end_graph_task_info.cc        \
-    ../graph/load/new_model_manager/task_info/model_exit_task_info.cc       \
-    ../graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc   \
-    ../graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc  \
+    ../graph/load/model_manager/davinci_model.cc \
+    ../graph/load/model_manager/davinci_model_parser.cc \
+    ../graph/load/model_manager/model_manager.cc \
+    ../graph/load/model_manager/tbe_handle_store.cc \
+    ../graph/load/model_manager/cpu_queue_schedule.cc \
+    ../graph/load/model_manager/model_utils.cc \
+    ../graph/load/model_manager/aipp_utils.cc \
+    ../graph/load/model_manager/data_inputer.cc \
+    ../graph/load/model_manager/data_dumper.cc \
+    ../graph/load/model_manager/zero_copy_task.cc \
+    ../graph/load/model_manager/zero_copy_offset.cc \
+    ../graph/load/model_manager/task_info/task_info.cc                  \
+    ../graph/load/model_manager/task_info/event_record_task_info.cc     \
+    ../graph/load/model_manager/task_info/event_wait_task_info.cc       \
+    ../graph/load/model_manager/task_info/fusion_start_task_info.cc     \
+    ../graph/load/model_manager/task_info/fusion_stop_task_info.cc      \
+    ../graph/load/model_manager/task_info/kernel_ex_task_info.cc        \
+    ../graph/load/model_manager/task_info/kernel_task_info.cc           \
+    ../graph/load/model_manager/task_info/label_set_task_info.cc        \
+    ../graph/load/model_manager/task_info/label_switch_by_index_task_info.cc \
+    ../graph/load/model_manager/task_info/label_goto_ex_task_info.cc    \
+    ../graph/load/model_manager/task_info/memcpy_async_task_info.cc     \
+    ../graph/load/model_manager/task_info/memcpy_addr_async_task_info.cc \
+    ../graph/load/model_manager/task_info/profiler_trace_task_info.cc   \
+    ../graph/load/model_manager/task_info/stream_active_task_info.cc    \
+    ../graph/load/model_manager/task_info/stream_switch_task_info.cc    \
+    ../graph/load/model_manager/task_info/stream_switchn_task_info.cc   \
+    ../graph/load/model_manager/task_info/end_graph_task_info.cc        \
+    ../graph/load/model_manager/task_info/model_exit_task_info.cc       \
+    ../graph/load/model_manager/task_info/super_kernel/super_kernel_factory.cc   \
+    ../graph/load/model_manager/task_info/super_kernel/super_kernel.cc  \
     ../opskernel_manager/ops_kernel_builder_manager.cc \
     ../single_op/single_op_manager.cc \
     ../single_op/single_op_model.cc \
@@ -95,6 +95,7 @@ local_ge_executor_src_files :=  \
     ../hybrid/node_executor/host_cpu/kernel/variable_kernel.cc              \
     ../hybrid/node_executor/host_cpu/kernel/assign_kernel.cc                \
     ../hybrid/node_executor/host_cpu/kernel/random_uniform_kernel.cc        \
+    ../hybrid/node_executor/host_cpu/kernel/data_kernel.cc                  \
     ../hybrid/node_executor/controlop/control_op_executor.cc                \
     ../hybrid/node_executor/partitioned_call/partitioned_call_node_executor.cc \
     ../hybrid/node_executor/rts/rts_node_executor.cc                        \
diff --git a/ge/executor/proto/task.proto b/ge/executor/proto/task.proto
index d0c09840..0da5631e 100644
--- a/ge/executor/proto/task.proto
+++ b/ge/executor/proto/task.proto
@@ -57,6 +57,7 @@ message TaskDef {
     LabelSetDef label_set = 37;
     LabelGotoExDef label_goto_ex = 38;
     LabelSwitchByIndexDef label_switch_by_index = 39;
+    KernelDefWithHandle kernel_with_handle = 40;
 }
 
 message KernelDef {
@@ -74,6 +75,19 @@ message KernelDef {
     uint32 kernel_ext_info_size = 19;
 }
 
+message KernelDefWithHandle {
+    KernelContext context = 1;
+
+    uint64 handle = 10;
+    string dev_func = 11;
+    uint32 block_dim = 12;
+    uint32 args_size = 13;
+    bytes args = 14;
+    bytes sm_desc = 15;
+    string original_kernel_key = 16;
+    string node_info = 17;
+}
+
 message KernelContext {
     uint32 kernel_type = 1;
     uint32 op_id = 2;                              // OP type in CCE
diff --git a/ge/ge_inference.mk b/ge/ge_inference.mk
index 6f9e60db..a20ff437 100755
--- a/ge/ge_inference.mk
+++ b/ge/ge_inference.mk
@@ -228,37 +228,37 @@ OME_HOST_SRC_FILES := \
     graph/manager/util/rt_context_util.cc               \
     graph/manager/util/variable_accelerate_ctrl.cc       \
     graph/manager/util/debug.cc  \
-    graph/load/new_model_manager/model_manager.cc                        \
-    graph/load/new_model_manager/data_inputer.cc                         \
-    graph/load/new_model_manager/davinci_model.cc                        \
-    graph/load/new_model_manager/davinci_model_parser.cc                 \
-    graph/load/new_model_manager/model_utils.cc                          \
-    graph/load/new_model_manager/aipp_utils.cc                           \
-    graph/load/new_model_manager/tbe_handle_store.cc                     \
-    graph/load/new_model_manager/cpu_queue_schedule.cc                   \
-    graph/load/new_model_manager/zero_copy_task.cc                       \
-    graph/load/new_model_manager/zero_copy_offset.cc                     \
-    graph/load/new_model_manager/data_dumper.cc                          \
-    graph/load/new_model_manager/task_info/task_info.cc                  \
-    graph/load/new_model_manager/task_info/event_record_task_info.cc     \
-    graph/load/new_model_manager/task_info/event_wait_task_info.cc       \
-    graph/load/new_model_manager/task_info/fusion_start_task_info.cc     \
-    graph/load/new_model_manager/task_info/fusion_stop_task_info.cc      \
-    graph/load/new_model_manager/task_info/kernel_ex_task_info.cc        \
-    graph/load/new_model_manager/task_info/kernel_task_info.cc           \
-    graph/load/new_model_manager/task_info/label_set_task_info.cc        \
-    graph/load/new_model_manager/task_info/label_switch_by_index_task_info.cc \
-    graph/load/new_model_manager/task_info/label_goto_ex_task_info.cc    \
-    graph/load/new_model_manager/task_info/memcpy_async_task_info.cc     \
-    graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.cc \
-    graph/load/new_model_manager/task_info/profiler_trace_task_info.cc   \
-    graph/load/new_model_manager/task_info/stream_active_task_info.cc    \
-    graph/load/new_model_manager/task_info/stream_switch_task_info.cc    \
-    graph/load/new_model_manager/task_info/stream_switchn_task_info.cc   \
-    graph/load/new_model_manager/task_info/end_graph_task_info.cc        \
-    graph/load/new_model_manager/task_info/model_exit_task_info.cc       \
-    graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc   \
-    graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc  \
+    graph/load/model_manager/model_manager.cc                        \
+    graph/load/model_manager/data_inputer.cc                         \
+    graph/load/model_manager/davinci_model.cc                        \
+    graph/load/model_manager/davinci_model_parser.cc                 \
+    graph/load/model_manager/model_utils.cc                          \
+    graph/load/model_manager/aipp_utils.cc                           \
+    graph/load/model_manager/tbe_handle_store.cc                     \
+    graph/load/model_manager/cpu_queue_schedule.cc                   \
+    graph/load/model_manager/zero_copy_task.cc                       \
+    graph/load/model_manager/zero_copy_offset.cc                     \
+    graph/load/model_manager/data_dumper.cc                          \
+    graph/load/model_manager/task_info/task_info.cc                  \
+    graph/load/model_manager/task_info/event_record_task_info.cc     \
+    graph/load/model_manager/task_info/event_wait_task_info.cc       \
+    graph/load/model_manager/task_info/fusion_start_task_info.cc     \
+    graph/load/model_manager/task_info/fusion_stop_task_info.cc      \
+    graph/load/model_manager/task_info/kernel_ex_task_info.cc        \
+    graph/load/model_manager/task_info/kernel_task_info.cc           \
+    graph/load/model_manager/task_info/label_set_task_info.cc        \
+    graph/load/model_manager/task_info/label_switch_by_index_task_info.cc \
+    graph/load/model_manager/task_info/label_goto_ex_task_info.cc    \
+    graph/load/model_manager/task_info/memcpy_async_task_info.cc     \
+    graph/load/model_manager/task_info/memcpy_addr_async_task_info.cc \
+    graph/load/model_manager/task_info/profiler_trace_task_info.cc   \
+    graph/load/model_manager/task_info/stream_active_task_info.cc    \
+    graph/load/model_manager/task_info/stream_switch_task_info.cc    \
+    graph/load/model_manager/task_info/stream_switchn_task_info.cc   \
+    graph/load/model_manager/task_info/end_graph_task_info.cc        \
+    graph/load/model_manager/task_info/model_exit_task_info.cc       \
+    graph/load/model_manager/task_info/super_kernel/super_kernel_factory.cc   \
+    graph/load/model_manager/task_info/super_kernel/super_kernel.cc  \
     single_op/task/op_task.cc                                            \
     single_op/task/build_task_utils.cc                                   \
     single_op/task/tbe_task_builder.cc                                   \
@@ -270,7 +270,7 @@ OME_HOST_SRC_FILES := \
     single_op/single_op_manager.cc                                       \
     hybrid/hybrid_davinci_model_stub.cc                                  \
     hybrid/node_executor/aicpu/aicpu_ext_info.cc                         \
-    # graph/load/new_model_manager/task_info/hccl_task_info.cc
+    # graph/load/model_manager/task_info/hccl_task_info.cc
 
 OME_DEVICE_SRC_FILES := $(OME_HOST_SRC_FILES)
 
diff --git a/ge/ge_local_engine/engine/host_cpu_engine.cc b/ge/ge_local_engine/engine/host_cpu_engine.cc
index 06dc2b96..4aebffb4 100755
--- a/ge/ge_local_engine/engine/host_cpu_engine.cc
+++ b/ge/ge_local_engine/engine/host_cpu_engine.cc
@@ -33,7 +33,7 @@ namespace {
       uint64_t size = data_num * sizeof(TYPE);                                                                         \
       ge_tensor = MakeShared<GeTensor>(out_desc, size);                                                                \
       GE_CHECK_NOTNULL(ge_tensor);                                                                                     \
-      GELOGD("node:%s allocate output %zu success, size=%lld", op_desc->GetName().c_str(), i, size);                   \
+      GELOGD("node:%s allocate output %zu success, size=%ld", op_desc->GetName().c_str(), i, size);                    \
       ge_tensor->MutableTensorDesc().SetDataType(out_desc.GetDataType());                                              \
       ge_tensor->MutableTensorDesc().SetShape(out_desc.GetShape());                                                    \
     } else {                                                                                                           \
@@ -72,7 +72,7 @@ Status GetDataNumber(const GeTensorDesc &out_desc, uint64_t &data_num) {
     num_size = max_range_size;
   }
   if (num_size < 0) {
-    GELOGE(INTERNAL_ERROR, "Get negative size, num_size=%lld.", num_size);
+    GELOGE(INTERNAL_ERROR, "Get negative size, num_size=%ld.", num_size);
     return INTERNAL_ERROR;
   }
   data_num = static_cast<uint64_t>(num_size);
diff --git a/ge/ge_local_engine/proto/task.proto b/ge/ge_local_engine/proto/task.proto
index d0c09840..0da5631e 100644
--- a/ge/ge_local_engine/proto/task.proto
+++ b/ge/ge_local_engine/proto/task.proto
@@ -57,6 +57,7 @@ message TaskDef {
     LabelSetDef label_set = 37;
     LabelGotoExDef label_goto_ex = 38;
     LabelSwitchByIndexDef label_switch_by_index = 39;
+    KernelDefWithHandle kernel_with_handle = 40;
 }
 
 message KernelDef {
@@ -74,6 +75,19 @@ message KernelDef {
     uint32 kernel_ext_info_size = 19;
 }
 
+message KernelDefWithHandle {
+    KernelContext context = 1;
+
+    uint64 handle = 10;
+    string dev_func = 11;
+    uint32 block_dim = 12;
+    uint32 args_size = 13;
+    bytes args = 14;
+    bytes sm_desc = 15;
+    string original_kernel_key = 16;
+    string node_info = 17;
+}
+
 message KernelContext {
     uint32 kernel_type = 1;
     uint32 op_id = 2;                              // OP type in CCE
diff --git a/ge/ge_runner.mk b/ge/ge_runner.mk
index 460d5068..4434dc2b 100644
--- a/ge/ge_runner.mk
+++ b/ge/ge_runner.mk
@@ -54,38 +54,38 @@ LIBGE_LOCAL_SRC_FILES := \
     graph/label/partitioned_call_label_maker.cc \
     graph/label/while_label_maker.cc \
     graph/load/graph_loader.cc \
-    graph/load/new_model_manager/cpu_queue_schedule.cc \
-    graph/load/new_model_manager/data_dumper.cc \
-    graph/load/new_model_manager/data_inputer.cc \
-    graph/load/new_model_manager/davinci_model.cc \
-    graph/load/new_model_manager/davinci_model_parser.cc \
-    graph/load/new_model_manager/model_manager.cc \
-    graph/load/new_model_manager/model_utils.cc \
-    graph/load/new_model_manager/aipp_utils.cc \
-    graph/load/new_model_manager/task_info/end_graph_task_info.cc \
-    graph/load/new_model_manager/task_info/model_exit_task_info.cc \
-    graph/load/new_model_manager/task_info/event_record_task_info.cc \
-    graph/load/new_model_manager/task_info/event_wait_task_info.cc \
-    graph/load/new_model_manager/task_info/fusion_start_task_info.cc \
-    graph/load/new_model_manager/task_info/fusion_stop_task_info.cc \
-    graph/load/new_model_manager/task_info/hccl_task_info.cc \
-    graph/load/new_model_manager/task_info/kernel_ex_task_info.cc \
-    graph/load/new_model_manager/task_info/kernel_task_info.cc \
-    graph/load/new_model_manager/task_info/label_set_task_info.cc \
-    graph/load/new_model_manager/task_info/label_switch_by_index_task_info.cc \
-    graph/load/new_model_manager/task_info/label_goto_ex_task_info.cc \
-    graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.cc \
-    graph/load/new_model_manager/task_info/memcpy_async_task_info.cc \
-    graph/load/new_model_manager/task_info/profiler_trace_task_info.cc \
-    graph/load/new_model_manager/task_info/stream_active_task_info.cc \
-    graph/load/new_model_manager/task_info/stream_switch_task_info.cc \
-    graph/load/new_model_manager/task_info/stream_switchn_task_info.cc \
-    graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc \
-    graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc   \
-    graph/load/new_model_manager/task_info/task_info.cc \
-    graph/load/new_model_manager/tbe_handle_store.cc \
-    graph/load/new_model_manager/zero_copy_task.cc \
-    graph/load/new_model_manager/zero_copy_offset.cc    \
+    graph/load/model_manager/cpu_queue_schedule.cc \
+    graph/load/model_manager/data_dumper.cc \
+    graph/load/model_manager/data_inputer.cc \
+    graph/load/model_manager/davinci_model.cc \
+    graph/load/model_manager/davinci_model_parser.cc \
+    graph/load/model_manager/model_manager.cc \
+    graph/load/model_manager/model_utils.cc \
+    graph/load/model_manager/aipp_utils.cc \
+    graph/load/model_manager/task_info/end_graph_task_info.cc \
+    graph/load/model_manager/task_info/model_exit_task_info.cc \
+    graph/load/model_manager/task_info/event_record_task_info.cc \
+    graph/load/model_manager/task_info/event_wait_task_info.cc \
+    graph/load/model_manager/task_info/fusion_start_task_info.cc \
+    graph/load/model_manager/task_info/fusion_stop_task_info.cc \
+    graph/load/model_manager/task_info/hccl_task_info.cc \
+    graph/load/model_manager/task_info/kernel_ex_task_info.cc \
+    graph/load/model_manager/task_info/kernel_task_info.cc \
+    graph/load/model_manager/task_info/label_set_task_info.cc \
+    graph/load/model_manager/task_info/label_switch_by_index_task_info.cc \
+    graph/load/model_manager/task_info/label_goto_ex_task_info.cc \
+    graph/load/model_manager/task_info/memcpy_addr_async_task_info.cc \
+    graph/load/model_manager/task_info/memcpy_async_task_info.cc \
+    graph/load/model_manager/task_info/profiler_trace_task_info.cc \
+    graph/load/model_manager/task_info/stream_active_task_info.cc \
+    graph/load/model_manager/task_info/stream_switch_task_info.cc \
+    graph/load/model_manager/task_info/stream_switchn_task_info.cc \
+    graph/load/model_manager/task_info/super_kernel/super_kernel.cc \
+    graph/load/model_manager/task_info/super_kernel/super_kernel_factory.cc   \
+    graph/load/model_manager/task_info/task_info.cc \
+    graph/load/model_manager/tbe_handle_store.cc \
+    graph/load/model_manager/zero_copy_task.cc \
+    graph/load/model_manager/zero_copy_offset.cc    \
     graph/manager/graph_context.cc \
     graph/manager/graph_manager.cc \
     graph/manager/graph_manager_utils.cc \
@@ -300,6 +300,7 @@ LIBGE_LOCAL_SRC_FILES := \
     hybrid/node_executor/host_cpu/kernel/variable_kernel.cc              \
     hybrid/node_executor/host_cpu/kernel/assign_kernel.cc                \
     hybrid/node_executor/host_cpu/kernel/random_uniform_kernel.cc        \
+    hybrid/node_executor/host_cpu/kernel/data_kernel.cc                  \
     hybrid/node_executor/controlop/control_op_executor.cc                \
     hybrid/node_executor/partitioned_call/partitioned_call_node_executor.cc \
     hybrid/node_executor/hccl/hccl_node_executor.cc                      \
diff --git a/ge/generator/ge_generator.cc b/ge/generator/ge_generator.cc
index d032965b..fe7ea3bf 100644
--- a/ge/generator/ge_generator.cc
+++ b/ge/generator/ge_generator.cc
@@ -670,7 +670,7 @@ Status GeGenerator::BuildSingleOp(OpDescPtr &op_desc, const vector<GeTensor> &in
                                   const string &model_file_name, OpEngineType engine_type, ModelBufferData &model_buff,
                                   bool is_offline) {
   if (!is_offline) {
-    (void)AttrUtils::SetBool(op_desc, ATTR_DYNAMIC_SHAPE_SINGLE_AICPU, true);
+    (void)AttrUtils::SetBool(op_desc, ATTR_SINGLE_OP_SCENE, true);
   }
 
   if (CheckForSingleOp(op_desc, inputs, outputs) != SUCCESS) {
diff --git a/ge/graph/build/graph_builder.cc b/ge/graph/build/graph_builder.cc
index ed77a7f1..2731e076 100644
--- a/ge/graph/build/graph_builder.cc
+++ b/ge/graph/build/graph_builder.cc
@@ -37,6 +37,8 @@ using domi::BuildMode;
 
 namespace {
 const int32_t kInvalidPerfLevel = -1;
+const int64_t kProfilingArStep = 2;
+const int64_t kProfilingArStartLogid = 3;
 enum NodeType { kSubgraphData, kSubgraphNode, kOthers };
 }  // namespace
 namespace ge {
@@ -187,8 +189,7 @@ Status GraphBuilder::UpdateParentNodeOutputSize(const ge::ComputeGraphPtr &graph
   return SUCCESS;
 }
 
-Status GraphBuilder::Build(ComputeGraphPtr &comp_graph, std::vector<SubGraphInfoPtr> &subgraph_ptr_list,
-                           GeRootModelPtr &ge_root_model_ptr, uint64_t session_id) {
+Status GraphBuilder::Build(ComputeGraphPtr &comp_graph, GeRootModelPtr &ge_root_model_ptr, uint64_t session_id) {
   if (comp_graph == nullptr) {
     GELOGE(GE_GRAPH_PARAM_NULLPTR, "Graph build comp_graph is null.");
     return GE_GRAPH_PARAM_NULLPTR;
@@ -203,18 +204,18 @@ Status GraphBuilder::Build(ComputeGraphPtr &comp_graph, std::vector<SubGraphInfo
   (void)AttrUtils::GetBool(comp_graph, ATTR_NAME_DYNAMIC_SHAPE_PARTITIONED, is_dynamic_shape);
   if (is_dynamic_shape || comp_graph->GetGraphUnknownFlag()) {
     GE_CHK_STATUS_RET(
-        BuildForDynamicShapeGraph(comp_graph, subgraph_ptr_list, ge_root_model_ptr, ge_model_ptr, session_id),
+        BuildForDynamicShapeGraph(comp_graph, ge_root_model_ptr, ge_model_ptr, session_id),
         "Build for dynamic shape graph failed.");
     return SUCCESS;
   }
 
-  GE_CHK_STATUS_RET(BuildForKnownShapeGraph(comp_graph, subgraph_ptr_list, ge_model_ptr, session_id),
+  GE_CHK_STATUS_RET(BuildForKnownShapeGraph(comp_graph, ge_model_ptr, session_id),
                     "Build for known shape graph failed.");
   ge_root_model_ptr->SetSubgraphInstanceNameToModel(comp_graph->GetName(), ge_model_ptr);
   return SUCCESS;
 }
 
-Status GraphBuilder::BuildForKnownShapeGraph(ComputeGraphPtr &comp_graph, std::vector<SubGraphInfoPtr> &subgraph_list,
+Status GraphBuilder::BuildForKnownShapeGraph(ComputeGraphPtr &comp_graph,
                                              GeModelPtr &ge_model_ptr, uint64_t session_id) {
   if (ge::GetContext().GetHostExecFlag()) {
     GE_CHK_STATUS_RET(BuildForHostCpuGraph(comp_graph, ge_model_ptr, session_id), "Build for host-cpu graph failed.");
@@ -222,7 +223,7 @@ Status GraphBuilder::BuildForKnownShapeGraph(ComputeGraphPtr &comp_graph, std::v
   }
 
   GELOGI("Begin to build known shape graph[%s].", comp_graph->GetName().c_str());
-  Status ret = SecondPartition(comp_graph, subgraph_list);
+  Status ret = SecondPartition(comp_graph);
   GE_CHK_STATUS_RET(ret, "Graph[%s] second partition Failed.", comp_graph->GetName().c_str());
   auto subgraph_map = graph_partitioner_.GetSubGraphMap();
 
@@ -458,6 +459,11 @@ Status GraphBuilder::MarkFpBpProfilingTaskAttr(ComputeGraphPtr &com_graph) {
       if (all_reduce_node_index[i] == node_index) {
         GELOGI("The all reduce node of dynamic graph is %s, idx %u", op_desc->GetName().c_str(), node_index);
         (void)ge::AttrUtils::SetBool(op_desc, ATTR_NAME_INSERT_BP_PROFILILNG_TASK, true);
+        GE_IF_BOOL_EXEC(TypeUtils::CheckUint64MulOverflow(i, kProfilingArStep),
+                        GELOGE(FAILED, "Multiply result is out of range.");
+                          return FAILED);
+        int64_t log_id = i * kProfilingArStep + kProfilingArStartLogid;
+        (void)ge::AttrUtils::SetInt(op_desc, ATTR_NAME_INSERT_PROFILILNG_TASK_LOG_ID, log_id);
         continue;
       }
     }
@@ -470,7 +476,6 @@ Status GraphBuilder::MarkFpBpProfilingTaskAttr(ComputeGraphPtr &com_graph) {
 }
 
 Status GraphBuilder::BuildForDynamicShapeGraph(ComputeGraphPtr &comp_graph,
-                                               std::vector<SubGraphInfoPtr> &subgraph_ptr_list,
                                                GeRootModelPtr &ge_root_model_ptr, GeModelPtr &ge_model_ptr,
                                                uint64_t session_id) {
   GELOGI("Start to build BuildForDynamicShape for dynamic shape.");
@@ -517,7 +522,7 @@ Status GraphBuilder::BuildForDynamicShapeGraph(ComputeGraphPtr &comp_graph,
         }
       }
       // known shape build flow
-      GE_CHK_STATUS_RET(BuildForKnownShapeGraph(sub_graph, subgraph_ptr_list, ge_model_ptr, session_id),
+      GE_CHK_STATUS_RET(BuildForKnownShapeGraph(sub_graph, ge_model_ptr, session_id),
                         "Build for known shape graph failed.");
     }
     ge_root_model_ptr->SetSubgraphInstanceNameToModel(sub_graph->GetName(), ge_model_ptr);
@@ -719,7 +724,7 @@ Status GraphBuilder::CalcDynShapeRootGraphDataSize(const ge::OpDescPtr &op_desc)
   return SUCCESS;
 }
 
-Status GraphBuilder::SecondPartition(ge::ComputeGraphPtr &comp_graph, vector<ge::SubGraphInfoPtr> &subgraph_ptr_list) {
+Status GraphBuilder::SecondPartition(ge::ComputeGraphPtr &comp_graph) {
   GE_TIMESTAMP_START(GraphPartition2);
   auto ret = graph_partitioner_.Partition(comp_graph, GraphPartitioner::kSecondPartitioning);
   if (ret != SUCCESS) {
@@ -727,10 +732,8 @@ Status GraphBuilder::SecondPartition(ge::ComputeGraphPtr &comp_graph, vector<ge:
     return ret;
   }
   GE_CHK_STATUS_RET(ret, "Graph partition Failed.");
-  auto graph_2_subgraphlist = graph_partitioner_.GetSubGraphMap();
-  if (graph_2_subgraphlist.find(comp_graph) != graph_2_subgraphlist.end()) {
-    subgraph_ptr_list = graph_2_subgraphlist[comp_graph];
-  } else {
+  const auto &graph_2_subgraphlist = graph_partitioner_.GetSubGraphMap();
+  if (graph_2_subgraphlist.find(comp_graph) == graph_2_subgraphlist.end()) {
     GELOGE(FAILED, "Find subgraph failed.");
     return FAILED;
   }
@@ -745,7 +748,7 @@ Status GraphBuilder::AddOutputMemTypeForNode(const NodePtr &node) {
   if (!AttrUtils::GetInt(op_desc, ATTR_INPUT_MEMORY_TYPE, mem_type)) {
     return SUCCESS;
   }
-  GELOGD("[%s] has attr input_memory_type %ld", op_desc->GetName().c_str(), mem_type);
+  GELOGD("[%s] has attr input_memory_type %u", op_desc->GetName().c_str(), mem_type);
   for (const auto &in_data_anchor : node->GetAllInDataAnchors()) {
     const auto &peer_out_anchor = in_data_anchor->GetPeerOutAnchor();
     GE_IF_BOOL_EXEC(peer_out_anchor == nullptr, continue);
@@ -755,7 +758,7 @@ Status GraphBuilder::AddOutputMemTypeForNode(const NodePtr &node) {
     while (true) {
       const auto &src_desc = src_node->GetOpDesc();
       GE_IF_BOOL_EXEC(src_desc == nullptr, continue);
-      GELOGD("[%s:%u] set attr output_memory_type %ld", src_desc->GetName().c_str(), src_out_anchor->GetIdx(),
+      GELOGD("[%s:%u] set attr output_memory_type %d", src_desc->GetName().c_str(), src_out_anchor->GetIdx(),
              mem_type);
       if (!AttrUtils::SetInt(src_desc->MutableOutputDesc(src_out_anchor->GetIdx()), ATTR_OUTPUT_MEMORY_TYPE,
                              mem_type)) {
diff --git a/ge/graph/build/graph_builder.h b/ge/graph/build/graph_builder.h
index 524b60e0..fb9ab6bd 100644
--- a/ge/graph/build/graph_builder.h
+++ b/ge/graph/build/graph_builder.h
@@ -47,8 +47,7 @@ class GraphBuilder {
   GraphBuilder(const GraphBuilder &in) = delete;
   GraphBuilder &operator=(const GraphBuilder &in) = delete;
   virtual ~GraphBuilder() = default;
-  Status Build(ComputeGraphPtr &comp_graph, std::vector<SubGraphInfoPtr> &subgraph_ptr_list,
-               GeRootModelPtr &ge_model_ptr, uint64_t session_id = INVALID_SESSION_ID);
+  Status Build(ComputeGraphPtr &comp_graph, GeRootModelPtr &ge_model_ptr, uint64_t session_id = INVALID_SESSION_ID);
   void SetOptions(const GraphManagerOptions &options);
 
  private:
@@ -59,12 +58,12 @@ class GraphBuilder {
   Status UpdateDataInputSize(const ge::NodePtr &node_ptr);
   Status UpdateParentNodeOutputSize(const ge::ComputeGraphPtr &graph, ge::NodePtr &parent_node_ptr);
   Status CalcDynShapeRootGraphDataSize(const ge::OpDescPtr &op_desc);
-  Status SecondPartition(ge::ComputeGraphPtr &comp_graph, vector<ge::SubGraphInfoPtr> &subgraph_ptr_list);
+  Status SecondPartition(ge::ComputeGraphPtr &comp_graph);
   Status MarkFpBpProfilingTaskAttr(ComputeGraphPtr &com_graph);
-  Status BuildForDynamicShapeGraph(ComputeGraphPtr &comp_graph, std::vector<SubGraphInfoPtr> &subgraph_ptr_list,
+  Status BuildForDynamicShapeGraph(ComputeGraphPtr &comp_graph,
                                    GeRootModelPtr &ge_root_model_ptr, GeModelPtr &ge_model_ptr,
                                    uint64_t session_id = INVALID_SESSION_ID);
-  Status BuildForKnownShapeGraph(ComputeGraphPtr &comp_graph, std::vector<SubGraphInfoPtr> &subgraph_list,
+  Status BuildForKnownShapeGraph(ComputeGraphPtr &comp_graph,
                                  GeModelPtr &ge_model_ptr, uint64_t session_id = INVALID_SESSION_ID);
   Status BuildForUnknownShapeGraph(ComputeGraphPtr &comp_graph, GeModelPtr &ge_model_ptr,
                                    uint64_t session_id = INVALID_SESSION_ID);
diff --git a/ge/graph/build/memory/block_mem_assigner.cc b/ge/graph/build/memory/block_mem_assigner.cc
index 21d6a49e..ebd23948 100755
--- a/ge/graph/build/memory/block_mem_assigner.cc
+++ b/ge/graph/build/memory/block_mem_assigner.cc
@@ -24,6 +24,7 @@
 #include "graph/buffer.h"
 #include "graph/ge_attr_value.h"
 #include "graph/ge_context.h"
+#include "graph/types.h"
 #include "graph/node.h"
 #include "graph/utils/graph_utils.h"
 #include "graph/utils/node_utils.h"
@@ -542,11 +543,31 @@ void GetMaxBatchAllMemorySize(std::map<std::string, vector<int64_t>> &batch_all_
   }
 }
 
+void BlockMemAssigner::MarkContinuousAllocedForOneInputFromVariable(const NodePtr &node) {
+  auto node_op_desc = node->GetOpDesc();
+  GE_IF_BOOL_EXEC(node_op_desc == nullptr, return);
+  // if input size just one and from variable, no need to reassign continuous memory
+  bool is_input_continuous = false;
+  (void)ge::AttrUtils::GetBool(node_op_desc, ATTR_NAME_CONTINUOUS_INPUT, is_input_continuous);
+  if (is_input_continuous && (node_op_desc->GetInputsSize() == 1)) {
+    auto peer_out_anchor = node->GetInDataAnchor(0)->GetPeerOutAnchor();
+    GE_IF_BOOL_EXEC(peer_out_anchor == nullptr, return);
+    auto in_node = peer_out_anchor->GetOwnerNode();
+    GE_IF_BOOL_EXEC(in_node == nullptr, return);
+    if (in_node->GetType() == VARIABLE || in_node->GetType() == CONSTANT) {
+      GELOGI("node only one input and from variable, set continuous alloced. node_name:%s", node->GetName().c_str());
+      (void)ge::AttrUtils::SetBool(node_op_desc, ATTR_NAME_CONTINUOUS_INPUT_ALLOC, true);
+    }
+  }
+}
+
 void BlockMemAssigner::GetOutAndWorkSpaceMem(vector<int64_t> &all_memory_size) {
   vector<int64_t> temp;
   std::map<std::string, vector<int64_t>> batch_all_memory_size;
   std::map<std::string, int64_t> batch_total_size;
   for (const NodePtr &n : compute_graph_->GetAllNodes()) {
+    MarkContinuousAllocedForOneInputFromVariable(n);
+
     auto node_op_desc = n->GetOpDesc();
     GE_IF_BOOL_EXEC(node_op_desc == nullptr, continue);
 
@@ -1131,18 +1152,73 @@ MemoryBlock *BlockMemAssigner::ApplyMemory(size_t block_size, size_t real_size,
   return block;
 }
 
-MemoryBlock *BlockMemAssigner::ApplyContinuousMemory(const NodePtr &n, const vector<int64_t> &ranges,
-                                                     const bool is_op_reuse_mem) {
-  GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(n == nullptr, return nullptr, "input node is null.");
+bool IsOutputIndexRef(const OpDescPtr &op_desc, uint32_t index) {
+  auto output_tensor = op_desc->GetOutputDescPtr(index);
+  bool dst_reuse_input = false;
+  (void)ge::TensorUtils::GetReuseInput(*output_tensor, dst_reuse_input);
+  if (dst_reuse_input) {
+    return true;
+  }
+
+  bool is_ref = false;
+  (void)ge::AttrUtils::GetBool(op_desc, ATTR_NAME_REFERENCE, is_ref);
+  if (is_ref) {
+    string output_name = op_desc->GetOutputNameByIndex(index);
+    for (const auto &input_name : op_desc->GetAllInputNames()) {
+      if (output_name == input_name) {
+        return true;;
+      }
+    }
+  }
+  return false;
+}
+
+void BlockMemAssigner::ContinuousOutRefCheck(bool &isAllOutputRef, bool &isOutputHasRef,
+                                             const NodePtr &n) {
+  const auto node_op_desc = n->GetOpDesc();
+  for (uint32_t index = 0; index < static_cast<uint32_t>(node_op_desc->GetOutputsSize()); index++) {
+    if (!IsOutputIndexRef(node_op_desc, index)) {
+      isAllOutputRef = false;
+      break;
+    } else {
+      zero_memory_list_.emplace_back(n, kOutput, index);
+      isOutputHasRef = true;
+    }
+  }
+}
+
+
+Status BlockMemAssigner::ApplyContinuousMemory(const NodePtr &n, const vector<int64_t> &ranges,
+                                               const bool is_op_reuse_mem) {
+  GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(n == nullptr, return INTERNAL_ERROR, "input node is null.");
   auto node_op_desc = n->GetOpDesc();
-  GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(node_op_desc == nullptr, return nullptr, "node_op_desc is null.");
+  GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(node_op_desc == nullptr, return INTERNAL_ERROR, "node_op_desc is null.");
+
+  // continuous output support ref only when all output ref input
+  bool isAllOutputRef = true;
+  bool isOutputHasRef = false;
+
+  ContinuousOutRefCheck(isAllOutputRef, isOutputHasRef, n);
+
+  if (isAllOutputRef) {
+    GELOGI("continuous output node ref all input, skip continuous alloc, node_name:%s", n->GetName().c_str());
+    return SUCCESS;
+  }
+
+  if (!isAllOutputRef && isOutputHasRef) {
+    GELOGE(INTERNAL_ERROR, "continuous output node ref part input, not support this situation, node_name:%s",
+           n->GetName().c_str());
+    return INTERNAL_ERROR;
+  }
+
   MemoryBlock *block = nullptr;
   int64_t total_size = 0;
   int64_t memory_type = RT_MEMORY_HBM;
   for (uint32_t index = 0; index < static_cast<uint32_t>(node_op_desc->GetOutputsSize()); index++) {
     auto output_op_desc = node_op_desc->GetOutputDescPtr(index);
     if (output_op_desc == nullptr) {
-      return nullptr;
+      GELOGE(INTERNAL_ERROR, "Get output desc failed, node_name:%s, output_index:%u", n->GetName().c_str(), index);
+      return INTERNAL_ERROR;
     }
 
     if (CheckIsZeroMemNodeType(n->GetType())) {
@@ -1152,8 +1228,8 @@ MemoryBlock *BlockMemAssigner::ApplyContinuousMemory(const NodePtr &n, const vec
 
     int64_t size = 0;
     if (ge::TensorUtils::GetSize(*output_op_desc, size) != SUCCESS) {
-      GELOGI("Get size failed");
-      return nullptr;
+      GELOGE(INTERNAL_ERROR, "Get size failed, node_name:%s, output_index:%u", n->GetName().c_str(), index);
+      return INTERNAL_ERROR;
     }
     size_t align_size = static_cast<size_t>(size);
     AlignMemOffset(align_size);
@@ -1176,7 +1252,7 @@ MemoryBlock *BlockMemAssigner::ApplyContinuousMemory(const NodePtr &n, const vec
   }
 
   if (total_size == 0) {
-    return nullptr;
+    return SUCCESS;
   }
 
   auto block_size = GetBlockSize(total_size, ranges);
@@ -1190,8 +1266,11 @@ MemoryBlock *BlockMemAssigner::ApplyContinuousMemory(const NodePtr &n, const vec
     // hccl task need align header and tail
     block->first_continuous_block_ = true;
     block->last_continuous_block_ = true;
+  } else {
+    GELOGE(INTERNAL_ERROR, "node apply continuous output memory failed. node_name:%s", n->GetName().c_str());
+    return INTERNAL_ERROR;
   }
-  return block;
+  return SUCCESS;
 }
 
 MemoryBlock *BlockMemAssigner::ApplyOutMemory(const NodePtr &n, uint32_t index, const vector<int64_t> &ranges,
@@ -1203,9 +1282,8 @@ MemoryBlock *BlockMemAssigner::ApplyOutMemory(const NodePtr &n, uint32_t index,
   NodeIndexIO node_index_io(n, index, kOut);
   int64_t size = 0;
   auto output_op_desc = node_op_desc->GetOutputDescPtr(index);
-  if (output_op_desc != nullptr) {
-    GE_IF_BOOL_EXEC(ge::TensorUtils::GetSize(*output_op_desc, size) != SUCCESS, GELOGI("Get size failed"));
-  }
+  GE_IF_BOOL_EXEC(output_op_desc == nullptr, return nullptr);
+  GE_IF_BOOL_EXEC(ge::TensorUtils::GetSize(*output_op_desc, size) != SUCCESS, GELOGI("Get size failed"));
   size_t no_align_size = 0;
   GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(GetNoAlignSize(*node_op_desc, index, no_align_size) != SUCCESS,
                                  return nullptr, "Get no align size failed");
@@ -1231,6 +1309,13 @@ MemoryBlock *BlockMemAssigner::ApplyOutMemory(const NodePtr &n, uint32_t index,
     AlignMemOffset(align_size);
     theory_memory_size_ += align_size;
   } else {
+    // if ref input is variable, can not find symbol, must judge alone
+    if (IsOutputIndexRef(node_op_desc, index)) {
+      zero_memory_list_.emplace_back(n, kOutput, index, false);
+      GELOGI("ref mode skip out block assign. node_name: %s, index:%d", n->GetName().c_str(), index);
+      return nullptr;
+    }
+
     int64_t max_size = size;
     int64_t memory_type = RT_MEMORY_HBM;
     auto iter1 = anchor_to_symbol_.find(node_index_io.ToString());
@@ -1477,8 +1562,7 @@ Status BlockMemAssigner::AssignOutputMemoryWithReuse(const NodePtr &node, vector
                   for (auto iter = stream_workspace_blocks_.begin(); iter != stream_workspace_blocks_.end();
                        ++iter) { ReleaseMemorys(iter->second[stream_id], reusable_blocks_[iter->first][stream_id]); });
   if (IsContinuousOutput(node)) {
-    (void)ApplyContinuousMemory(node, ranges, is_op_reuse_mem_);
-    return SUCCESS;
+    return ApplyContinuousMemory(node, ranges, is_op_reuse_mem_);
   }
   for (uint32_t i = 0; i < static_cast<uint32_t>(op_desc->GetOutputsSize()); i++) {
     int64_t size = 0;
@@ -1486,6 +1570,7 @@ Status BlockMemAssigner::AssignOutputMemoryWithReuse(const NodePtr &node, vector
     if (output_op_desc != nullptr) {
       GE_IF_BOOL_EXEC(ge::TensorUtils::GetSize(*output_op_desc, size) != SUCCESS, GELOGI("Get size failed"));
     }
+
     // fusion: other type's size not means malloc HBM memory
     bool l1_flag = has_mem_type_attr && memorys_type[i] == RT_MEMORY_L1;
     if (l1_flag) {
@@ -1493,6 +1578,11 @@ Status BlockMemAssigner::AssignOutputMemoryWithReuse(const NodePtr &node, vector
              op_desc->GetName().c_str(), op_desc->GetOutputNameByIndex(i).c_str(), memorys_type[i]);
       size = 0;
     }
+
+    int32_t calc_type = 0;
+    bool ret = ge::AttrUtils::GetInt(output_op_desc, ATTR_NAME_MEMORY_SIZE_CALC_TYPE, calc_type);
+    GE_IF_BOOL_EXEC((ret && (calc_type == static_cast<int32_t>(ge::MemorySizeCalcType::ALWAYS_EMPTY))), size = 0;);
+
     std::string peer_name;
     uint32_t peer_input_index = 0;
     bool out_node_set_continuous_input = false;
@@ -1973,9 +2063,8 @@ Status BlockMemAssigner::Assign() {
 
 bool BlockMemAssigner::CheckIsZeroMemNodeType(const string &node_type) const {
   return (node_type == VARIABLE) || (node_type == CONSTANT) || (node_type == MULTISHAPE) ||
-         (node_type == HCOMBROADCAST) || (node_type == CONSTANTOP) ||
-         (node_type == ASSIGNADD) || (node_type == ASSIGNSUB) || (node_type == ASSIGN) || (node_type == HVDWAIT) ||
-         (node_type == HVDCALLBACKBROADCAST);
+         (node_type == CONSTANTOP) || (node_type == ASSIGNADD) || (node_type == ASSIGNSUB) ||
+         (node_type == ASSIGN) || (node_type == HVDWAIT);
 }
 
 bool BlockMemAssigner::GetWorkSpaceMemoryType(const NodePtr &node, size_t index, int64_t &memory_type) {
diff --git a/ge/graph/build/memory/block_mem_assigner.h b/ge/graph/build/memory/block_mem_assigner.h
index 78584078..4401108d 100755
--- a/ge/graph/build/memory/block_mem_assigner.h
+++ b/ge/graph/build/memory/block_mem_assigner.h
@@ -448,7 +448,11 @@ class BlockMemAssigner : public MemAssigner {
 
   bool GetWorkSpaceMemoryType(const NodePtr &node, size_t index, int64_t &memory_type);
 
-  MemoryBlock *ApplyContinuousMemory(const NodePtr &n, const vector<int64_t> &ranges, const bool is_op_reuse_mem);
+  void ContinuousOutRefCheck(bool &isAllOutputRef, bool &isOutputHasRef, const NodePtr &n);
+
+  Status ApplyContinuousMemory(const NodePtr &n, const vector<int64_t> &ranges, const bool is_op_reuse_mem);
+
+  void MarkContinuousAllocedForOneInputFromVariable(const NodePtr &node);
 
   std::unordered_map<int64_t, std::unordered_map<int64_t, std::vector<MemoryBlock *>>> reusable_blocks_;
 
diff --git a/ge/graph/build/memory/graph_mem_assigner.cc b/ge/graph/build/memory/graph_mem_assigner.cc
index f94eb275..8c5d8940 100755
--- a/ge/graph/build/memory/graph_mem_assigner.cc
+++ b/ge/graph/build/memory/graph_mem_assigner.cc
@@ -88,6 +88,14 @@ Status VariableMemoryAssigner::AssignVarAttr2Nodes() {
   return ge::SUCCESS;
 }
 
+Status VariableMemoryAssigner::AssignMemory2HasRefAttrNode() {
+  Status result = ge::VarMemAssignUtil::AssignMemory2HasRefAttrNode(compute_graph_);
+  if (result != ge::SUCCESS) {
+    return result;
+  }
+  return ge::SUCCESS;
+}
+
 Status GraphMemoryAssigner::AssignMemory() {
   ge::HybridMemAssignerPtr mem_assigner(new(std::nothrow) HybridMemAssigner(compute_graph_));
   if (mem_assigner->Assign() != ge::SUCCESS) {
@@ -135,6 +143,19 @@ ge::Status GraphMemoryAssigner::AssignVarAttr2Nodes() {
   return ge::SUCCESS;
 }
 
+ge::Status GraphMemoryAssigner::AssignMemory2HasRefAttrNode() {
+  auto variable_assigner =
+      std::unique_ptr<ge::VariableMemoryAssigner>(new(std::nothrow) ge::VariableMemoryAssigner(compute_graph_));
+  if (variable_assigner == nullptr) {
+    GELOGE(ge::FAILED, "Alloc VariableMemoryAssigner failed.");
+    return ge::FAILED;
+  }
+  if (variable_assigner->AssignMemory2HasRefAttrNode() != ge::SUCCESS) {
+    return ge::FAILED;
+  }
+  return ge::SUCCESS;
+}
+
 ge::Status CalculateTensorRealSizeAndOutSize(const ge::ConstGeTensorDescPtr &output_desc,
                                                                   int64_t dim_index, int64_t &output_mem_size,
                                                                   int64_t &batch_dim_num, int64_t &out_size) {
@@ -371,10 +392,10 @@ Status GraphMemoryAssigner::ReAssignContinuousMemory(bool is_loop_graph) {
     // Assign continuous input memory
     bool continuous_input = ((continuous_type & kTypeInput) != 0) || ((continuous_type & kTypeInputNoPadding) != 0);
     int64_t memory_type = RT_MEMORY_HBM;
-    GE_CHK_STATUS_RET(GetNodeMemoryType(node, memory_type, "input"), "Get node memory type failed.");
     if (continuous_input) {
       int64_t mem_clean_start = 0;
       int64_t mem_clean_size = 0;
+      GE_CHK_STATUS_RET(GetNodeMemoryType(node, memory_type, "input"), "Get node memory type failed.");
       ret = AssignContinuousInputMemory(node, mem_clean_start, mem_clean_size, memory_type, continuous_type);
       if (ret != ge::SUCCESS) {
         GELOGE(ret, "Assign continuous input memory failed!");
@@ -412,6 +433,7 @@ Status GraphMemoryAssigner::ReAssignContinuousMemory(bool is_loop_graph) {
     // Assign continuous output memory
     bool continuous_output = ((continuous_type & kTypeOutput) != 0) || ((continuous_type & kTypeOutputNoPadding) != 0);
     if (continuous_output) {
+      GE_CHK_STATUS_RET(GetNodeMemoryType(node, memory_type, "output"), "Get node memory type failed.");
       ret = AssignContinuousOutputMemory(node, memory_type, continuous_type);
       if (ret != ge::SUCCESS) {
         GELOGE(ret, "Assign continuous output memory failed!");
@@ -640,9 +662,7 @@ Status GraphMemoryAssigner::ReAssignAtomicMemory(bool is_loop_graph) {
       }
 
       int64_t atomic_mem_size = static_cast<int64_t>(mem_iter->second.mem_offset_) - atomic_mem_start;
-      GE_CHECK_NOTNULL(mem_assigner_);
-      GE_CHECK_NOTNULL(mem_assigner_->GetPriorityAssinger());
-      if ((atomic_mem_size != 0) && (iter_batch.first == mem_assigner_->GetPriorityAssinger()->GetMaxBatchLabel())) {
+      if (atomic_mem_size != 0) {
         GE_CHK_STATUS_RET(SetAtomicCleanAttr(iter.first, {atomic_mem_start}, {atomic_mem_size}, RT_MEMORY_HBM),
                           "Failed to set attr for atomic addr clean node %s.", iter.first->GetName().c_str());
       }
@@ -1233,8 +1253,8 @@ ge::Status GraphMemoryAssigner::UpdateOpInputOffset(const NodePtr &node, vector<
         GE_CHK_STATUS(TensorUtils::GetDataOffset(tensor_desc, input_offset));
       }
 
-      GELOGD("%s node[%s] input[%d] is set from node[%s] out index[%lu] offset[%ld]",
-             has_mem_type_attr == true ? "Fusion" : "",
+      GELOGD("%s node[%s] input[%ld] is set from node[%s] out index[%lu] offset[%ld]",
+             has_mem_type_attr ? "Fusion" : "",
              tmp_op_desc->GetName().c_str(),
              valid_input_index,
              peer_out_anchor->GetOwnerNode()->GetOpDesc()->GetName().c_str(),
diff --git a/ge/graph/build/memory/graph_mem_assigner.h b/ge/graph/build/memory/graph_mem_assigner.h
index a380e594..be6c47b0 100755
--- a/ge/graph/build/memory/graph_mem_assigner.h
+++ b/ge/graph/build/memory/graph_mem_assigner.h
@@ -63,6 +63,8 @@ class VariableMemoryAssigner {
   ///
   ge::Status AssignVarAttr2Nodes();
 
+  ge::Status AssignMemory2HasRefAttrNode();
+
  private:
   ge::ComputeGraphPtr compute_graph_;
 };
@@ -99,6 +101,8 @@ class GraphMemoryAssigner {
   ///
   ge::Status AssignVarAttr2Nodes();
 
+  ge::Status AssignMemory2HasRefAttrNode();
+
   ge::Status ReAssignMemory(bool is_loop_graph, map<int64_t, size_t> &mem_type_to_offset);
 
   ge::Status AssignZeroCopyMemory(map<int64_t, size_t> &mem_offset, size_t &zero_mem_copy_size);
diff --git a/ge/graph/build/memory/memory_assigner.cc b/ge/graph/build/memory/memory_assigner.cc
index 055103a9..0f58a040 100755
--- a/ge/graph/build/memory/memory_assigner.cc
+++ b/ge/graph/build/memory/memory_assigner.cc
@@ -40,6 +40,11 @@ Status MemoryAssigner::AssignMemory(bool is_loop_graph, map<int64_t, size_t> &me
     return ge::FAILED;
   }
 
+  if (graph_mem_assigner.AssignMemory2HasRefAttrNode() != ge::SUCCESS) {
+    GELOGE(ge::FAILED, "Assign memory to node which has ref attr failed!");
+    return ge::FAILED;
+  }
+
   // Assign memory for reference
   if (graph_mem_assigner.AssignReferenceMemory() != ge::SUCCESS) {
     GELOGE(ge::FAILED, "Assign reference memory failed!");
diff --git a/ge/graph/build/memory/var_mem_assign_util.cc b/ge/graph/build/memory/var_mem_assign_util.cc
index 639bfaa0..f910d2e2 100755
--- a/ge/graph/build/memory/var_mem_assign_util.cc
+++ b/ge/graph/build/memory/var_mem_assign_util.cc
@@ -33,10 +33,7 @@ using std::vector;
 
 namespace ge {
 Status VarMemAssignUtil::AssignVarMemory(ge::ComputeGraphPtr &compute_graph) {
-  GE_CHK_STATUS_RET(AssignMemory2VariableNode(compute_graph));
-  GE_CHK_STATUS_RET(AssignMemory2HasRefAttrNode(compute_graph));
-
-  return SUCCESS;
+  return AssignMemory2VariableNode(compute_graph);
 }
 
 Status VarMemAssignUtil::AssignConstantOpMemory(ge::ComputeGraphPtr &compute_graph) {
@@ -60,9 +57,14 @@ Status VarMemAssignUtil::AssignStaticMemory2Node(ge::ComputeGraphPtr &compute_gr
                     return FAILED);
     ge::ConstGeTensorDescPtr tensor_desc = n->GetOpDesc()->GetOutputDescPtr(0);
     GE_CHECK_NOTNULL(tensor_desc);
+    rtMemType_t memory_type = RT_MEMORY_HBM;
+    uint32_t mem_type = 0;
+    if (AttrUtils::GetInt(n->GetOpDesc(), ATTR_OUTPUT_MEMORY_TYPE, mem_type) && (mem_type == 1)) {
+      memory_type = RT_MEMORY_RDMA_HBM;
+    }
     if (!VarManager::Instance(compute_graph->GetSessionID())->IsVarExist(node_name, *tensor_desc)) {
       GE_CHK_STATUS_RET(
-          VarManager::Instance(compute_graph->GetSessionID())->AssignVarMem(node_name, *tensor_desc, RT_MEMORY_HBM));
+          VarManager::Instance(compute_graph->GetSessionID())->AssignVarMem(node_name, *tensor_desc, memory_type));
       GE_IF_BOOL_EXEC(n->GetType() == VARIABLE,
                       GE_CHK_STATUS_RET(AssignData2Fp32Var(n, compute_graph->GetSessionID())));
       GE_CHK_STATUS_RET(VarManager::Instance(compute_graph->GetSessionID())
@@ -70,7 +72,6 @@ Status VarMemAssignUtil::AssignStaticMemory2Node(ge::ComputeGraphPtr &compute_gr
     }
 
     uint8_t *dev_ptr = nullptr;
-    rtMemType_t memory_type = RT_MEMORY_HBM;
     GE_CHK_STATUS_RET(VarManager::Instance(compute_graph->GetSessionID())
                           ->GetVarAddr(node_name, *tensor_desc, &dev_ptr, memory_type));
     vector<int64_t> output_list = n->GetOpDesc()->GetOutputOffset();
diff --git a/ge/graph/build/stream_allocator.cc b/ge/graph/build/stream_allocator.cc
index 63112ea8..88ffda02 100644
--- a/ge/graph/build/stream_allocator.cc
+++ b/ge/graph/build/stream_allocator.cc
@@ -1013,6 +1013,24 @@ bool StreamAllocator::IsActivated(int64_t stream_id) const {
   return false;
 }
 
+// Iteraotor loop :
+// StreamSwitch  ->  StreamActive
+// FpBp loop:
+// StreamSwitch  ->  AssignAdd  ->  StreamActive
+NodePtr FindSwitchNodeBeforeLoopActiveNode(const NodePtr &active_node) {
+  for (auto pre_node : active_node->GetInControlNodes()) {
+    if (pre_node->GetType() == STREAMSWITCH) {
+      return pre_node;
+    }
+    for (auto pre_pre_node : pre_node->GetInControlNodes()) {
+      if (pre_pre_node->GetType() == STREAMSWITCH) {
+        return pre_pre_node;
+      }
+    }
+  }
+  return nullptr;
+}
+
 Status StreamAllocator::SetActiveStreamsForLoop() {
   vector<uint32_t> loop_active_streams;
   for (int64_t stream_id = 0; stream_id < stream_num_; stream_id++) {
@@ -1038,6 +1056,13 @@ Status StreamAllocator::SetActiveStreamsForLoop() {
     bool is_loop_active = false;
     if (AttrUtils::GetBool(node->GetOpDesc(), ATTR_NAME_IS_LOOP_ACTIVE, is_loop_active) && is_loop_active) {
       vector<string> activated_label_list;
+
+      NodePtr pre_switch_node = FindSwitchNodeBeforeLoopActiveNode(node);
+      if (pre_switch_node == nullptr) {
+        GELOGE(FAILED, "find switch node before loop active node %s failed", node->GetName().c_str());
+        return FAILED;
+      }
+
       if (!AttrUtils::GetListStr(node->GetOpDesc(), ATTR_NAME_ACTIVE_LABEL_LIST, activated_label_list) ||
           activated_label_list.empty()) {
         GE_CHK_BOOL_EXEC(AttrUtils::SetListInt(node->GetOpDesc(), ATTR_NAME_ACTIVE_STREAM_LIST, loop_active_streams),
@@ -1053,7 +1078,7 @@ Status StreamAllocator::SetActiveStreamsForLoop() {
         // it may cause some stream actived by iterator next step when this stream still alive.
         // If above situation happen, active message will lose, cause process block in next iteration.
         // In order to avoid this abnormal happen,
-        // add event between each last node and iterator active node in target active stream
+        // add event between each last node and iterator switch node
         GELOGI("there are %zu next iterator target streams has streamswitch node.", streams_skip_iterator_event.size());
         for (auto iter : stream_id_to_last_node) {
           if (streams_skip_iterator_event.find(iter.first) != streams_skip_iterator_event.end()) {
@@ -1067,7 +1092,7 @@ Status StreamAllocator::SetActiveStreamsForLoop() {
             continue;
           }
           AddSendEventId(iter.second, event_num_);
-          AddRecvEventId(node, event_num_);
+          AddRecvEventId(pre_switch_node, event_num_);
           event_num_++;
         }
 
diff --git a/ge/graph/build/task_generator.cc b/ge/graph/build/task_generator.cc
index bb72fa8a..8bd7d32e 100755
--- a/ge/graph/build/task_generator.cc
+++ b/ge/graph/build/task_generator.cc
@@ -234,6 +234,19 @@ Status TaskGenerator::SaveFusionNodes(map<int64_t, std::vector<NodePtr>> &fusion
   return SUCCESS;
 }
 
+bool TaskGenerator::IsSubGraphOfDynamicGraph(const ComputeGraphPtr &graph) const {
+  auto parent_graph_ptr = graph->GetParentGraph();
+  if (parent_graph_ptr == nullptr) {
+    return false;
+  }
+  auto root_graph_ptr = GraphUtils::FindRootGraph(parent_graph_ptr);
+  if (root_graph_ptr == nullptr) {
+    return false;
+  }
+
+  return root_graph_ptr->GetGraphUnknownFlag();
+}
+
 Status TaskGenerator::GenerateTask(RunContext &run_context, ComputeGraphPtr &graph,
                                    vector<domi::TaskDef> &task_def_list, map<uint32_t, string> &op_name_map) {
   GELOGD("Beign to generate task, graph name is %s.", graph->GetName().c_str());
@@ -274,7 +287,6 @@ Status TaskGenerator::GenerateTask(RunContext &run_context, ComputeGraphPtr &gra
   };
   GE_MAKE_GUARD(release, callback);
 
-  uint64_t all_reduce_node_idx = 0;
   for (auto &node : graph->GetNodes(graph->GetGraphUnknownFlag())) {
     OpDescPtr op_desc = node->GetOpDesc();
     GE_CHECK_NOTNULL(op_desc);
@@ -293,7 +305,7 @@ Status TaskGenerator::GenerateTask(RunContext &run_context, ComputeGraphPtr &gra
     // Part2: Call
     auto fusion_task_info =
         FusionTaskInfo{run_context,        graph,         node,        op_desc,         node_index,      ge_lib,
-                       ops_kernel_manager, task_def_list, op_name_map, profiling_point, all_reduce_nodes, all_reduce_node_idx};
+                       ops_kernel_manager, task_def_list, op_name_map, profiling_point, all_reduce_nodes};
     GE_CHK_STATUS_RET(GenerateTaskForFusionNode(fusion_task_info, fusion_nodes, fusion_nodes_seen),
                       "Call GenerateTaskForFusionNode node:%s(%s) failed", name.c_str(), type.c_str());
     // continue directly
@@ -317,8 +329,7 @@ Status TaskGenerator::GenerateTask(RunContext &run_context, ComputeGraphPtr &gra
                       type.c_str());
     // Profiling task
     size_t task_list_size_before = task_def_list.size();
-    GE_CHK_STATUS_RET(InsertProfilingTaskBefore(op_desc, profiling_point, all_reduce_nodes,
-                                                node_index, task_def_list, all_reduce_node_idx));
+    GE_CHK_STATUS_RET(InsertProfilingTaskBefore(op_desc, profiling_point, all_reduce_nodes, node_index, task_def_list));
     int64_t op_id = op_desc->GetId();
     // Compatible with dynamic shape scenes, the default is 0
     int64_t stream_id = 0;
@@ -338,8 +349,7 @@ Status TaskGenerator::GenerateTask(RunContext &run_context, ComputeGraphPtr &gra
       return ret;
     }
     // Profiling task
-    GE_CHK_STATUS_RET(InsertProfilingTaskAfter(op_desc, profiling_point, all_reduce_nodes,
-                                               node_index, task_def_list, all_reduce_node_idx));
+    GE_CHK_STATUS_RET(InsertProfilingTaskAfter(op_desc, profiling_point, all_reduce_nodes, node_index, task_def_list));
     size_t task_list_size_after = task_def_list.size();
     // If tasks is reduced
     if (task_list_size_after < task_list_size_before) {
@@ -382,7 +392,6 @@ Status TaskGenerator::GenerateTaskForFusionNode(FusionTaskInfo &fusion_task_info
   auto &op_name_map = fusion_task_info.op_name_map;
   auto &profiling_point = fusion_task_info.profiling_point;
   auto &all_reduce_nodes = fusion_task_info.all_reduce_nodes;
-  auto &all_reduce_idx = fusion_task_info.all_reduce_node_idx;
   // If op_desc have this attr, call nodes with same group key in a stream together
   if (ge::AttrUtils::GetInt(fusion_op_desc, ATTR_NAME_FUSION_GROUP_KEY, group_key) &&
       (fusion_nodes_seen.count(node.get()) == 0)) {
@@ -429,8 +438,7 @@ Status TaskGenerator::GenerateTaskForFusionNode(FusionTaskInfo &fusion_task_info
         return INTERNAL_ERROR;
       }
       // profiling task
-      (void)InsertProfilingTaskBefore(op_desc, profiling_point, all_reduce_nodes,
-                                      node_index, task_def_list, all_reduce_idx);
+      (void)InsertProfilingTaskBefore(op_desc, profiling_point, all_reduce_nodes, node_index, task_def_list);
       run_context.stream = run_context.graphStreamList[stream_id];
       GELOGI("Fusion: Call %s to generate fusion_node:[fusion_node_name:%s(%s), id:%ld, stream_id:%ld] task.",
              op_kernel_lib_name.c_str(), fusion_node_name.c_str(), fusion_node_type.c_str(), op_id, stream_id);
@@ -443,8 +451,7 @@ Status TaskGenerator::GenerateTaskForFusionNode(FusionTaskInfo &fusion_task_info
         return ret;
       }
       // profiling task
-      (void)InsertProfilingTaskAfter(op_desc, profiling_point, all_reduce_nodes,
-                                     node_index, task_def_list, all_reduce_idx);
+      (void)InsertProfilingTaskAfter(op_desc, profiling_point, all_reduce_nodes, node_index, task_def_list);
       size_t task_list_size_after = task_def_list.size();
       // if tasks is reduced
       if (task_list_size_after < task_list_size_before) {
@@ -466,11 +473,10 @@ Status TaskGenerator::GenerateTaskForFusionNode(FusionTaskInfo &fusion_task_info
         task_def_ptr->set_ops_kernel_store_ptr(reinterpret_cast<uintptr_t>(ops_kernel_info_store_ptr));
       }
 
-      GELOGI(
-          "Fusion: Call %s to generate fusion_node:[fusion_node_name:%s(%s), id:%ld, stream_id:%ld]"
-          " task finished, generate %u task(s).",
-          op_kernel_lib_name.c_str(), fusion_node_name.c_str(), fusion_node_type.c_str(), op_id, stream_id,
-          task_list_size_after - task_list_size_before);
+      GELOGI("Fusion: Call %s to generate fusion_node:[fusion_node_name:%s(%s), id:%ld, stream_id:%ld]"
+             " task finished, generate %zu task(s).",
+             op_kernel_lib_name.c_str(), fusion_node_name.c_str(), fusion_node_type.c_str(), op_id, stream_id,
+             task_list_size_after - task_list_size_before);
 
       // record nodes which have call generate task successfully
       fusion_nodes_seen.insert(fusion_node.get());
@@ -681,7 +687,7 @@ Status TaskGenerator::AutoFindBpOpIndex(const ComputeGraphPtr &graph, ProfilingP
       }
     }
     if (graph->GetNeedIteration()) {
-      if (op_desc->GetName() == NODE_NAME_NET_OUTPUT + '_' + NODE_NAME_STREAM_SWITCH + "_StreamActive") {
+      if (op_desc->GetName() == NODE_NAME_FLOWCTRL_LOOP_ASSIGNADD) {
         profiling_point.end_index.insert(current_idx);
         GELOGI("Iter end name %s, idx %u, from Node_Output_IteratorCtrl_StreamSwitch_StreamActive",
                op_desc->GetName().c_str(), current_idx);
@@ -850,6 +856,13 @@ Status TaskGenerator::FindProfilingTaskIndex(const ComputeGraphPtr &graph, Profi
     GELOGD("Profiling is not open.");
     return SUCCESS;
   }
+
+  // subgraph  of dynamic graph no need to find index, has been found in parent graph
+  if (IsSubGraphOfDynamicGraph(graph)) {
+    GELOGI("Graph[%s] is subgraph of dynamic graph, no nned to find index.", graph->GetName().c_str());
+    return SUCCESS;
+  }
+
   GELOGI("Start get FP/BP index.");
   std::string fp_point_str;
   std::string bp_point_str;
@@ -887,9 +900,47 @@ Status TaskGenerator::FindProfilingTaskIndex(const ComputeGraphPtr &graph, Profi
   return SUCCESS;
 }
 
+Status TaskGenerator::InsertProfilingArTaskBefore(const OpDescPtr &op_desc, std::vector<uint32_t> &all_reduce_nodes,
+                                                  uint32_t node_index, std::vector<domi::TaskDef> &task_def_list,
+                                                  bool is_insert_bp_profiling_task) {
+  bool is_insert_all_reduce_task = false;
+  int64_t ar_log_id = 0xFFFF;
+  if (is_insert_bp_profiling_task) {
+    (void)ge::AttrUtils::GetInt(op_desc, ATTR_NAME_INSERT_PROFILILNG_TASK_LOG_ID, ar_log_id);
+    is_insert_all_reduce_task = true;
+  }
+  if (!is_insert_all_reduce_task) {
+    for (size_t i = 0; i < all_reduce_nodes.size(); i++) {
+      if (all_reduce_nodes[i] == node_index) {
+        GE_IF_BOOL_EXEC(TypeUtils::CheckUint64MulOverflow(i, kProfilingArStep),
+                        GELOGE(FAILED, "Multiply result is out of range.");
+                        return FAILED);
+        ar_log_id = i * kProfilingArStep + kProfilingArStartLogid;
+        is_insert_all_reduce_task = true;
+        break;
+      }
+    }
+  }
+
+  if (is_insert_all_reduce_task) {
+    GELOGI("The start allreduce operator is %s, idx %u, log_id %ld", op_desc->GetName().c_str(), node_index, ar_log_id);
+    TaskDef ar_task_def;
+    ar_task_def.set_type(RT_MODEL_TASK_PROFILER_TRACE);
+    ar_task_def.set_stream_id(op_desc->GetStreamId());
+    LogTimeStampDef *ar_log_def = ar_task_def.mutable_log_timestamp();
+    if (ar_log_def != nullptr) {
+      ar_log_def->set_logid(ar_log_id);
+      ar_log_def->set_notify(false);
+    }
+    task_def_list.push_back(ar_task_def);
+  }
+
+  return SUCCESS;
+}
+
 Status TaskGenerator::InsertProfilingTaskBefore(const OpDescPtr &op_desc, const ProfilingPoint &profiling_point,
                                                 vector<uint32_t> &all_reduce_nodes, uint32_t node_index,
-                                                vector<domi::TaskDef> &task_def_list, uint64_t &all_reduce_node_idx) {
+                                                vector<domi::TaskDef> &task_def_list) {
   const char *profiling_mode = std::getenv(kProfilingMode);
   bool is_profiling = (profiling_mode != nullptr) || ProfilingManager::Instance().ProfilingOn() ||
                       ProfilingManager::Instance().ProfilingTrainingTraceOn();
@@ -932,19 +983,31 @@ Status TaskGenerator::InsertProfilingTaskBefore(const OpDescPtr &op_desc, const
   }
 
   bool is_all_reduce = (op_desc->GetType() == HCOMALLREDUCE || op_desc->GetType() == HVDCALLBACKALLREDUCE);
-  uint64_t all_reduce_task_idx = 0;
+  if (is_all_reduce) {
+    (void)InsertProfilingArTaskBefore(op_desc, all_reduce_nodes, node_index,
+                                      task_def_list, is_insert_bp_profiling_task);
+  }
+
+  return SUCCESS;
+}
+
+Status TaskGenerator::InsertProfilingArTaskAfter(const OpDescPtr &op_desc, std::vector<uint32_t> &all_reduce_nodes,
+                                                 uint32_t node_index, std::vector<domi::TaskDef> &task_def_list,
+                                                 bool is_insert_bp_profiling_task) {
   bool is_insert_all_reduce_task = false;
-  if (is_all_reduce && is_insert_bp_profiling_task) {
-    all_reduce_task_idx = all_reduce_node_idx;
+  int64_t ar_log_id = 0xFFFF;
+  if (is_insert_bp_profiling_task) {
+    (void)ge::AttrUtils::GetInt(op_desc, ATTR_NAME_INSERT_PROFILILNG_TASK_LOG_ID, ar_log_id);
+    ar_log_id += 1;
     is_insert_all_reduce_task = true;
   }
-  if (is_all_reduce) {
-    all_reduce_node_idx++;
-  }
   if (!is_insert_all_reduce_task) {
     for (size_t i = 0; i < all_reduce_nodes.size(); i++) {
       if (all_reduce_nodes[i] == node_index) {
-        all_reduce_task_idx = i;
+        GE_IF_BOOL_EXEC(TypeUtils::CheckUint64MulOverflow(i, kProfilingArStep),
+                        GELOGE(FAILED, "Multiply result is out of range.");
+                        return FAILED);
+        ar_log_id = i * kProfilingArStep + kProfilingArEndLogid;
         is_insert_all_reduce_task = true;
         break;
       }
@@ -952,28 +1015,24 @@ Status TaskGenerator::InsertProfilingTaskBefore(const OpDescPtr &op_desc, const
   }
 
   if (is_insert_all_reduce_task) {
-    GELOGI("The start allreduce operator is %s, idx %u", op_desc->GetName().c_str(), node_index);
+    GELOGI("The start allreduce operator is %s, idx %u, log_id %ld", op_desc->GetName().c_str(), node_index, ar_log_id);
     TaskDef ar_task_def;
     ar_task_def.set_type(RT_MODEL_TASK_PROFILER_TRACE);
     ar_task_def.set_stream_id(op_desc->GetStreamId());
     LogTimeStampDef *ar_log_def = ar_task_def.mutable_log_timestamp();
     if (ar_log_def != nullptr) {
-      GE_IF_BOOL_EXEC(TypeUtils::CheckUint64MulOverflow(all_reduce_task_idx, kProfilingArStep),
-                      GELOGE(FAILED, "Multiply result is out of range.");
-                      return FAILED);
-      auto log_id = all_reduce_task_idx * kProfilingArStep + kProfilingArStartLogid;
-      ar_log_def->set_logid(log_id);
+      ar_log_def->set_logid(ar_log_id);
       ar_log_def->set_notify(false);
-      (void)ge::AttrUtils::SetInt(op_desc, ATTR_NAME_INSERT_PROFILILNG_TASK_LOG_ID, log_id);
     }
     task_def_list.push_back(ar_task_def);
   }
+
   return SUCCESS;
 }
 
 Status TaskGenerator::InsertProfilingTaskAfter(const OpDescPtr &op_desc, const ProfilingPoint &profiling_point,
                                                vector<uint32_t> &all_reduce_nodes, uint32_t node_index,
-                                               vector<domi::TaskDef> &task_def_list, uint64_t all_reduce_node_idx) {
+                                               vector<domi::TaskDef> &task_def_list) {
   GE_CHECK_NOTNULL(op_desc);
   const char *profiling_mode = std::getenv(kProfilingMode);
   bool is_profiling = (profiling_mode != nullptr) || ProfilingManager::Instance().ProfilingOn() ||
@@ -1018,36 +1077,11 @@ Status TaskGenerator::InsertProfilingTaskAfter(const OpDescPtr &op_desc, const P
     task_def_list.emplace_back(end_task_def);
   }
 
-  uint32_t all_reduce_task_idx = 0;
-  bool is_insert_all_reduce_task = false;
-  if (is_all_reduce && is_insert_bp_profiling_task) {
-    all_reduce_task_idx = all_reduce_node_idx;
-    is_insert_all_reduce_task = true;
-  }
-
-  for (size_t i = 0; i < all_reduce_nodes.size(); i++) {
-    if (all_reduce_nodes[i] == node_index) {
-      all_reduce_task_idx = i;
-      is_insert_all_reduce_task = true;
-      break;
-    }
+  if (is_all_reduce) {
+    (void)InsertProfilingArTaskAfter(op_desc, all_reduce_nodes, node_index,
+                                     task_def_list, is_insert_bp_profiling_task);
   }
 
-  if (is_insert_all_reduce_task) {
-    GELOGI("The end allreduce operator is %s, idx %u", op_desc->GetName().c_str(), node_index);
-    TaskDef ar_task_def;
-    ar_task_def.set_type(RT_MODEL_TASK_PROFILER_TRACE);
-    ar_task_def.set_stream_id(op_desc->GetStreamId());
-    LogTimeStampDef *ar_log_def = ar_task_def.mutable_log_timestamp();
-    GE_CHECK_NOTNULL(ar_log_def);
-    GE_IF_BOOL_EXEC(TypeUtils::CheckUint64MulOverflow(all_reduce_task_idx, kProfilingArStep),
-                    GELOGE(FAILED, "Multiply result is out of range.");
-                    return FAILED);
-    auto log_id = all_reduce_task_idx * kProfilingArStep + kProfilingArEndLogid;
-    ar_log_def->set_logid(log_id);
-    ar_log_def->set_notify(false);
-    task_def_list.emplace_back(ar_task_def);
-  }
   return SUCCESS;
 }
 
diff --git a/ge/graph/build/task_generator.h b/ge/graph/build/task_generator.h
index 5970954c..9f12d568 100755
--- a/ge/graph/build/task_generator.h
+++ b/ge/graph/build/task_generator.h
@@ -129,10 +129,16 @@ class TaskGenerator {
                                 std::vector<uint32_t> &all_reduce_nodes) const;
   Status InsertProfilingTaskBefore(const OpDescPtr &op_desc, const ProfilingPoint &profiling_point,
                                    std::vector<uint32_t> &all_reduce_nodes, uint32_t node_index,
-                                   std::vector<domi::TaskDef> &task_def_list, uint64_t &all_reduce_node_idx);
+                                   std::vector<domi::TaskDef> &task_def_list);
+  Status InsertProfilingArTaskBefore(const OpDescPtr &op_desc, std::vector<uint32_t> &all_reduce_nodes,
+                                     uint32_t node_index, std::vector<domi::TaskDef> &task_def_listy,
+                                     bool is_insert_bp_profiling_task);
   Status InsertProfilingTaskAfter(const OpDescPtr &op_desc, const ProfilingPoint &profiling_point,
                                   std::vector<uint32_t> &all_reduce_nodes, uint32_t node_index,
-                                  std::vector<domi::TaskDef> &task_def_list, uint64_t all_reduce_node_idx);
+                                  std::vector<domi::TaskDef> &task_def_list);
+  Status InsertProfilingArTaskAfter(const OpDescPtr &op_desc, std::vector<uint32_t> &all_reduce_nodes,
+                                    uint32_t node_index, std::vector<domi::TaskDef> &task_def_list,
+                                    bool is_insert_bp_profiling_task);
 
   static bool IsProfPoint(const OpDescPtr &op, const std::string &name);
 
@@ -155,6 +161,8 @@ class TaskGenerator {
 
   Status SetKnownShapeStream(RunContext &run_context, int64_t stream_id);
 
+  bool IsSubGraphOfDynamicGraph(const ComputeGraphPtr &graph) const;
+
   uint8_t *var_mem_base_ = nullptr;
   uint64_t var_mem_size_ = 0;
 };
diff --git a/ge/graph/execute/graph_execute.cc b/ge/graph/execute/graph_execute.cc
index 3c5618e8..79c22a29 100755
--- a/ge/graph/execute/graph_execute.cc
+++ b/ge/graph/execute/graph_execute.cc
@@ -21,7 +21,7 @@
 
 #include "common/ge_inner_error_codes.h"
 #include "common/model_parser/base.h"
-#include "graph/load/new_model_manager/model_manager.h"
+#include "graph/load/model_manager/model_manager.h"
 #include "omm/csa_interact.h"
 #include "runtime/dev.h"
 #include "runtime/mem.h"
diff --git a/ge/graph/load/graph_loader.cc b/ge/graph/load/graph_loader.cc
index 6272e581..29afc939 100755
--- a/ge/graph/load/graph_loader.cc
+++ b/ge/graph/load/graph_loader.cc
@@ -22,8 +22,8 @@
 #include "common/helper/model_helper.h"
 #include "common/util.h"
 #include "graph/ge_context.h"
-#include "graph/load/new_model_manager/davinci_model_parser.h"
-#include "graph/load/new_model_manager/model_manager.h"
+#include "graph/load/model_manager/davinci_model_parser.h"
+#include "graph/load/model_manager/model_manager.h"
 #include "graph/manager/graph_var_manager.h"
 #include "omm/csa_interact.h"
 #include "runtime/dev.h"
diff --git a/ge/graph/load/new_model_manager/aipp_utils.cc b/ge/graph/load/model_manager/aipp_utils.cc
similarity index 98%
rename from ge/graph/load/new_model_manager/aipp_utils.cc
rename to ge/graph/load/model_manager/aipp_utils.cc
index e0e60d2b..8a18c421 100755
--- a/ge/graph/load/new_model_manager/aipp_utils.cc
+++ b/ge/graph/load/model_manager/aipp_utils.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "graph/load/new_model_manager/aipp_utils.h"
+#include "graph/load/model_manager/aipp_utils.h"
 
 #include <string>
 
diff --git a/ge/graph/load/new_model_manager/aipp_utils.h b/ge/graph/load/model_manager/aipp_utils.h
similarity index 100%
rename from ge/graph/load/new_model_manager/aipp_utils.h
rename to ge/graph/load/model_manager/aipp_utils.h
diff --git a/ge/graph/load/new_model_manager/cpu_queue_schedule.cc b/ge/graph/load/model_manager/cpu_queue_schedule.cc
similarity index 99%
rename from ge/graph/load/new_model_manager/cpu_queue_schedule.cc
rename to ge/graph/load/model_manager/cpu_queue_schedule.cc
index 430321bd..d9b716ea 100644
--- a/ge/graph/load/new_model_manager/cpu_queue_schedule.cc
+++ b/ge/graph/load/model_manager/cpu_queue_schedule.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "graph/load/new_model_manager/cpu_queue_schedule.h"
+#include "graph/load/model_manager/cpu_queue_schedule.h"
 #include "common/debug/ge_log.h"
 #include "common/debug/log.h"
 
diff --git a/ge/graph/load/new_model_manager/cpu_queue_schedule.h b/ge/graph/load/model_manager/cpu_queue_schedule.h
similarity index 97%
rename from ge/graph/load/new_model_manager/cpu_queue_schedule.h
rename to ge/graph/load/model_manager/cpu_queue_schedule.h
index 8999e975..de4c5327 100644
--- a/ge/graph/load/new_model_manager/cpu_queue_schedule.h
+++ b/ge/graph/load/model_manager/cpu_queue_schedule.h
@@ -20,8 +20,8 @@
 #include <vector>
 
 #include "common/ge_inner_error_codes.h"
-#include "graph/load/new_model_manager/task_info/task_info.h"
-#include "graph/load/new_model_manager/zero_copy_offset.h"
+#include "graph/load/model_manager/task_info/task_info.h"
+#include "graph/load/model_manager/zero_copy_offset.h"
 #include "runtime/kernel.h"
 
 namespace ge {
diff --git a/ge/graph/load/new_model_manager/data_dumper.cc b/ge/graph/load/model_manager/data_dumper.cc
similarity index 99%
rename from ge/graph/load/new_model_manager/data_dumper.cc
rename to ge/graph/load/model_manager/data_dumper.cc
index a12a2b2a..235cffa9 100644
--- a/ge/graph/load/new_model_manager/data_dumper.cc
+++ b/ge/graph/load/model_manager/data_dumper.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "graph/load/new_model_manager/data_dumper.h"
+#include "graph/load/model_manager/data_dumper.h"
 
 #include <cstdlib>
 #include <ctime>
@@ -29,7 +29,7 @@
 #include "framework/common/util.h"
 #include "graph/anchor.h"
 #include "graph/debug/ge_attr_define.h"
-#include "graph/load/new_model_manager/model_utils.h"
+#include "graph/load/model_manager/model_utils.h"
 #include "graph/manager/util/debug.h"
 #include "graph/utils/attr_utils.h"
 #include "graph/utils/tensor_utils.h"
@@ -820,6 +820,7 @@ Status DataDumper::UnloadDumpInfo() {
   for (const auto &op_iter : op_list_) {
     aicpu::dump::Task task;
     task.set_task_id(op_iter.task_id);
+    task.set_stream_id(op_iter.stream_id);
     op_mapping_info.mutable_task()->Add(std::move(task));
   }
   auto ret = ExecuteUnLoadDumpInfo(op_mapping_info);
@@ -834,7 +835,6 @@ void DataDumper::DumpShrink() {
   compute_graph_.reset();
   input_map_.clear();
   ref_info_.clear();
-  op_list_.clear();
 }
 
 void DataDumper::PrintCheckLog(string &dump_list_key) {
diff --git a/ge/graph/load/new_model_manager/data_dumper.h b/ge/graph/load/model_manager/data_dumper.h
similarity index 100%
rename from ge/graph/load/new_model_manager/data_dumper.h
rename to ge/graph/load/model_manager/data_dumper.h
diff --git a/ge/graph/load/new_model_manager/data_inputer.cc b/ge/graph/load/model_manager/data_inputer.cc
similarity index 94%
rename from ge/graph/load/new_model_manager/data_inputer.cc
rename to ge/graph/load/model_manager/data_inputer.cc
index 5efc710e..0fe75465 100755
--- a/ge/graph/load/new_model_manager/data_inputer.cc
+++ b/ge/graph/load/model_manager/data_inputer.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "graph/load/new_model_manager/data_inputer.h"
+#include "graph/load/model_manager/data_inputer.h"
 
 #include <securec.h>
 
diff --git a/ge/graph/load/new_model_manager/data_inputer.h b/ge/graph/load/model_manager/data_inputer.h
similarity index 100%
rename from ge/graph/load/new_model_manager/data_inputer.h
rename to ge/graph/load/model_manager/data_inputer.h
diff --git a/ge/graph/load/new_model_manager/davinci_model.cc b/ge/graph/load/model_manager/davinci_model.cc
similarity index 96%
rename from ge/graph/load/new_model_manager/davinci_model.cc
rename to ge/graph/load/model_manager/davinci_model.cc
index 2afbdf30..95fd8392 100755
--- a/ge/graph/load/new_model_manager/davinci_model.cc
+++ b/ge/graph/load/model_manager/davinci_model.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "graph/load/new_model_manager/davinci_model.h"
+#include "graph/load/model_manager/davinci_model.h"
 
 #include <graph/utils/node_utils.h>
 #include <algorithm>
@@ -36,9 +36,9 @@
 #include "graph/debug/ge_attr_define.h"
 #include "graph/ge_context.h"
 #include "graph/graph.h"
-#include "graph/load/new_model_manager/cpu_queue_schedule.h"
-#include "graph/load/new_model_manager/model_manager.h"
-#include "graph/load/new_model_manager/tbe_handle_store.h"
+#include "graph/load/model_manager/cpu_queue_schedule.h"
+#include "graph/load/model_manager/model_manager.h"
+#include "graph/load/model_manager/tbe_handle_store.h"
 #include "graph/manager/graph_mem_allocator.h"
 #include "graph/manager/graph_var_manager.h"
 #include "graph/manager/trans_var_data_utils.h"
@@ -446,23 +446,20 @@ void DavinciModel::InitRuntimeParams() {
       runtime_param_.mem_size, runtime_param_.weight_size, runtime_param_.var_size);
 }
 
-void DavinciModel::CheckHasHcomOp() {
-  Graph graph = ge_model_->GetGraph();
-  auto compute_graph = GraphUtils::GetComputeGraph(graph);
-  if (compute_graph == nullptr) {
-    return;
-  }
+void DavinciModel::CheckHasHcomOp(const ComputeGraphPtr &compute_graph) {
+  const set<string> hcom_opp_types({
+      HCOMBROADCAST, HCOMALLGATHER, HCOMALLREDUCE, HCOMSEND, HCOMRECEIVE, HCOMREDUCESCATTER,
+      HVDCALLBACKALLREDUCE, HVDCALLBACKALLGATHER, HVDCALLBACKBROADCAST, HVDWAIT, HCOMREDUCE
+  });
+
   for (const auto &node : compute_graph->GetAllNodes()) {
     OpDescPtr op_desc = node->GetOpDesc();
     GE_IF_BOOL_EXEC(op_desc == nullptr, GELOGW("Node OpDesc is nullptr"); continue);
-    GE_IF_BOOL_EXEC(((op_desc->GetType() == HCOMBROADCAST) || (op_desc->GetType() == HCOMALLGATHER) ||
-                     (op_desc->GetType() == HCOMALLREDUCE) || (op_desc->GetType() == HCOMSEND) ||
-                     (op_desc->GetType() == HCOMRECEIVE) || (op_desc->GetType() == HCOMREDUCESCATTER) ||
-                     (op_desc->GetType() == HVDCALLBACKALLREDUCE) || (op_desc->GetType() == HVDCALLBACKALLGATHER) ||
-                     (op_desc->GetType() == HVDCALLBACKBROADCAST) || (op_desc->GetType() == HVDWAIT) ||
-                     (op_desc->GetType() == HCOMREDUCE)),
-                    uint32_t stream_id = static_cast<uint32_t>(op_desc->GetStreamId());
-                    (void)hcom_streams_.emplace(stream_id); GELOGD("hcom stream: %u.", stream_id); continue);
+    if (hcom_opp_types.count(op_desc->GetType()) > 0) {
+      uint32_t stream_id = static_cast<uint32_t>(op_desc->GetStreamId());
+      hcom_streams_.emplace(stream_id);
+      GELOGD("hcom stream: %u.", stream_id);
+    }
   }
 }
 
@@ -624,6 +621,7 @@ void DavinciModel::OpDebugUnRegister() {
 // initialize op sequence and call initialization function of each op respectively
 Status DavinciModel::Init(void *dev_ptr, size_t mem_size, void *weight_ptr, size_t weight_size) {
   // validating params
+  GELOGI("Priority is %d", priority_);
   GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(priority_ < 0 || priority_ > 7, return PARAM_INVALID,
                                  "Priority must between 0-7, now is %d", priority_);
   GE_CHK_BOOL_RET_STATUS(ge_model_ != nullptr, PARAM_INVALID, "GeModel is null.");
@@ -641,7 +639,7 @@ Status DavinciModel::Init(void *dev_ptr, size_t mem_size, void *weight_ptr, size
   name_ = ge_model_->GetName();
   (void)ge::AttrUtils::GetBool(ge_model_, ATTR_NAME_SWITCH_FOR_L1_FUSION, is_l1_fusion_enable_);
   GELOGD("The value of ge.l1Fusion in ge_model is %d.", is_l1_fusion_enable_);
-  CheckHasHcomOp();
+  CheckHasHcomOp(compute_graph);
 
   vector<int64_t> huge_stream_list;
   (void)ge::AttrUtils::GetListInt(ge_model_, ATTR_MODEL_HUGE_STREAM_LIST, huge_stream_list);
@@ -722,7 +720,6 @@ Status DavinciModel::Init(void *dev_ptr, size_t mem_size, void *weight_ptr, size
   /// the aicpu opertor needs to destroy history record, and update operator memory address.
   /// The model with specified aicpu operators is only marked here, and destruction is in ModelManager::ExecuteModel().
   need_destroy_aicpu_kernel_ = IsAicpuKernelConnectSpecifiedLayer();
-  (void)ge::AttrUtils::GetListStr(ge_model_, ATTR_MODEL_OUT_NODES_NAME, out_node_name_);
 
   string fp_ceiling_mode;
   if (ge::AttrUtils::GetStr(ge_model_, ATTR_FP_CEILING_MODE, fp_ceiling_mode)) {
@@ -1028,7 +1025,7 @@ Status DavinciModel::GenInputOutputInfo(const map<uint32_t, OpDescPtr> &data_by_
                                         const vector<OpDescPtr> &output_op_list) {
   GELOGD("Data node size: %zu, NetOutput node size: %zu", data_by_index.size(), output_op_list.size());
   for (auto &item : data_by_index) {
-    auto output_addrs = ModelUtils::GetOutputDataAddrs(runtime_param_, item.second);
+    const auto output_addrs = ModelUtils::GetOutputDataAddrs(runtime_param_, item.second);
     GELOGD("Data node: %s, output addr size: %zu", item.second->GetName().c_str(), output_addrs.size());
     input_addrs_list_.emplace_back(output_addrs);
 
@@ -1036,14 +1033,18 @@ Status DavinciModel::GenInputOutputInfo(const map<uint32_t, OpDescPtr> &data_by_
     GE_CHK_STATUS_RET(InitAippType(item.first, item.second, data_by_index), "Init AIPP Type failed");
     GE_CHK_STATUS_RET(InitOrigInputInfo(item.first, item.second), "Init Orig input failed");
     GE_CHK_STATUS_RET(InitAippInputOutputDims(item.first, item.second), "Init AIPP dims failed");
+    GE_CHK_STATUS_RET(InitInputDescInfo(item.second), "Init input desc info failed");
     if (item.second->GetType() == AIPP_DATA_TYPE) {
       GELOGI("This is dynamic aipp model, Node: %s", item.second->GetName().c_str());
       is_dynamic_aipp_ = true;
     }
   }
 
+  vector<string> out_node_name;
+  (void)AttrUtils::GetListStr(ge_model_, ATTR_MODEL_OUT_NODES_NAME, out_node_name);
+  GELOGD("Output node size: %zu, out nodes name: %zu", output_op_list.size(), out_node_name.size());
   for (const auto &op_desc : output_op_list) {
-    auto input_addrs = ModelUtils::GetInputDataAddrs(runtime_param_, op_desc);
+    const auto input_addrs = ModelUtils::GetInputDataAddrs(runtime_param_, op_desc);
     GELOGD("NetOutput node: %s, input addr size: %zu", op_desc->GetName().c_str(), input_addrs.size());
     output_addrs_list_.emplace_back(input_addrs);
 
@@ -1061,10 +1062,11 @@ Status DavinciModel::GenInputOutputInfo(const map<uint32_t, OpDescPtr> &data_by_
     if (InitOutputTensorInfo(op_desc) != SUCCESS) {
       return INTERNAL_ERROR;
     }
+
+    GE_CHK_STATUS_RET(InitOutputDescInfo(op_desc, out_node_name), "Init output desc info failed");
   }
 
-  GE_CHK_STATUS_RET(InitInputDescInfo(data_by_index), "Init input desc info failed");
-  return InitOutputDescInfo(output_op_list);
+  return SUCCESS;
 }
 
 bool DavinciModel::IsGetNextSinkDynamic(const OpDescPtr &op_desc) {
@@ -1815,7 +1817,7 @@ Status DavinciModel::InitAippInfo(uint32_t index, const OpDescPtr &op_desc) {
 
   domi::AippOpParams aipp_params;
   GeAttrValue::NAMED_ATTRS aipp_attr;
-  GE_CHK_BOOL_RET_STATUS(AttrUtils::GetNamedAttrs(op_desc, ATTR_NAME_AIPP, aipp_attr), GE_AIPP_NOT_EXIST,
+  GE_CHK_BOOL_RET_STATUS(AttrUtils::GetNamedAttrs(op_desc, ATTR_NAME_AIPP, aipp_attr), ACL_ERROR_GE_AIPP_NOT_EXIST,
                          "Data node do not contain param aipp!");
   GE_CHK_STATUS_RET(OpUtils::ConvertAippParams(aipp_attr, &aipp_params), "get aipp params failed");
   GELOGI("Node data: %s, type: %s, current index: %u, current node related input rank: %u",
@@ -1875,7 +1877,7 @@ Status DavinciModel::InitAippType(uint32_t index, const OpDescPtr &op_desc, cons
     (void)AttrUtils::GetStr(op_desc, ATTR_DATA_AIPP_DATA_NAME_MAP, releated_name);
     for (const auto item : data_list) {
       if (item.second->GetName() == releated_name) {
-        GELOGI("Find aipp_data [%s] index %zu from index %u", releated_name.c_str(), item.first, index);
+        GELOGI("Find aipp_data [%s] index %u from index %u", releated_name.c_str(), item.first, index);
         aipp_index = item.first;
       }
     }
@@ -1980,27 +1982,24 @@ void DavinciModel::CreateInputDimsInfo(const OpDescPtr &op_desc, Format format,
   }
 }
 
-Status DavinciModel::InitInputDescInfo(const map<uint32_t, OpDescPtr> &data_by_index) {
-  for (const auto &item : data_by_index) {
-    const auto op_desc = item.second;
-    GE_CHECK_NOTNULL(op_desc->GetInputDescPtr(0));
+Status DavinciModel::InitInputDescInfo(const OpDescPtr &op_desc) {
+  GE_CHECK_NOTNULL(op_desc->GetInputDescPtr(0));
 
-    InputOutputDescInfo input;
-    ShapeDescription dims_info;
-    Format format = op_desc->GetInputDescPtr(0)->GetFormat();
-    CreateInputDimsInfo(op_desc, format, input.shape_info, dims_info);
+  InputOutputDescInfo input;
+  ShapeDescription dims_info;
+  Format format = op_desc->GetInputDescPtr(0)->GetFormat();
+  CreateInputDimsInfo(op_desc, format, input.shape_info, dims_info);
 
-    input.data_type = op_desc->GetInputDescPtr(0)->GetDataType();
-    input.name = op_desc->GetName();
-    int64_t input_size = 0;
-    GE_CHK_STATUS_RET(TensorUtils::GetSize(*op_desc->GetInputDescPtr(0), input_size), "get input size failed.");
-    input.size = input_size;
-    input_formats_.push_back(format);
-    input_descs_.push_back(input);
+  input.data_type = op_desc->GetInputDescPtr(0)->GetDataType();
+  input.name = op_desc->GetName();
+  int64_t input_size = 0;
+  GE_CHK_STATUS_RET(TensorUtils::GetSize(*op_desc->GetInputDescPtr(0), input_size), "get input size failed.");
+  input.size = input_size;
+  input_formats_.push_back(format);
+  input_descs_.push_back(input);
 
-    input.shape_info = dims_info;
-    input_descs_dims_.push_back(input);
-  }
+  input.shape_info = dims_info;
+  input_descs_dims_.push_back(input);
   return SUCCESS;
 }
 
@@ -2066,35 +2065,31 @@ void DavinciModel::CreateOutput(uint32_t index, const OpDescPtr &op_desc, InputO
   output.data_type = op_desc->GetInputDescPtr(index)->GetDataType();
 }
 
-Status DavinciModel::InitOutputDescInfo(const vector<OpDescPtr> &output_op_list) {
-  GELOGD("Output node size: %zu", output_op_list.size());
-  for (const auto &op_desc : output_op_list) {
-    uint32_t out_size = static_cast<uint32_t>(op_desc->GetInputsSize());
-    for (uint32_t index = 0; index < out_size; index++) {
-      string output_name;
-      InputOutputDescInfo output;
-      uint32_t format_result;
-      CreateOutput(index, op_desc, output, format_result);
-
-      std::vector<std::string> src_name = op_desc->GetSrcName();
-      std::vector<int64_t> src_index = op_desc->GetSrcIndex();
-      GE_CHK_BOOL_RET_STATUS(src_name.size() > index && src_index.size() > index, INTERNAL_ERROR,
-                             "construct output_name failed.");
-      // forward compatbility, if old om has no out_node_name, need to return output follow origin way
-      if (out_size == out_node_name_.size()) {
-        // neweast plan, the index will add to name during generate model.
-        bool contains_colon = out_node_name_[index].find(":") != std::string::npos;
-        output_name =
-            contains_colon ? out_node_name_[index] : out_node_name_[index] + ":" + std::to_string(src_index[index]);
-      } else {
-        output_name = std::string("output_") + std::to_string(index) + "_" + src_name[index] + "_" +
-                      std::to_string(src_index[index]);
-      }
-      output.name = output_name;
-      output_descs_.push_back(output);
-      output_formats_.push_back(format_result);
+Status DavinciModel::InitOutputDescInfo(const OpDescPtr &op_desc, const vector<string> &out_node_name) {
+  uint32_t out_size = static_cast<uint32_t>(op_desc->GetInputsSize());
+  for (uint32_t i = 0; i < out_size; ++i) {
+    string output_name;
+    InputOutputDescInfo output;
+    uint32_t format_result;
+    CreateOutput(i, op_desc, output, format_result);
+
+    std::vector<std::string> src_name = op_desc->GetSrcName();
+    std::vector<int64_t> src_index = op_desc->GetSrcIndex();
+    GE_CHK_BOOL_RET_STATUS(src_name.size() > i && src_index.size() > i, INTERNAL_ERROR,
+                           "construct output_name failed.");
+    // forward compatbility, if old om has no out_node_name, need to return output follow origin way
+    if (out_size == out_node_name.size()) {
+      // neweast plan, the index will add to name during generate model.
+      bool contains_colon = out_node_name[i].find(":") != std::string::npos;
+      output_name = contains_colon ? out_node_name[i] : out_node_name[i] + ":" + std::to_string(src_index[i]);
+    } else {
+      output_name = string("output_") + std::to_string(i) + "_" + src_name[i] + "_" + std::to_string(src_index[i]);
     }
+    output.name = output_name;
+    output_descs_.push_back(output);
+    output_formats_.push_back(format_result);
   }
+
   return SUCCESS;
 }
 
@@ -2147,11 +2142,6 @@ Status DavinciModel::SyncVarData() {
                            RT_MEMCPY_HOST_TO_DEVICE));
   }
 
-  for (const auto &item : broadcast_variable_) {
-    ret = VarManager::Instance(session_id_)->SyncVarData(runtime_param_.graph_id, item.first, item.second, mem_base_);
-    GE_CHK_BOOL_EXEC(ret == SUCCESS, break, "sync var data ret failed, model id:%u, op name:%s.", model_id_,
-                     item.first.c_str());
-  }
   return ret;
 }
 
@@ -2635,12 +2625,6 @@ Status DavinciModel::ReturnResult(uint32_t data_id, const bool rslt_flg, const b
 ///
 Status DavinciModel::ReturnNoOutput(uint32_t data_id) {
   GELOGI("ReturnNoOutput model id:%u", model_id_);
-  for (const auto item : broadcast_variable_) {
-    Status ret = VarManager::Instance(session_id_)
-                     ->SyncBroadCastData2Var(runtime_param_.graph_id, item.first, item.second, mem_base_);
-    GE_CHK_BOOL_EXEC(ret == SUCCESS, break, "sync var data ret failed, model id:%u, op name:%s.", model_id_,
-                     item.first.c_str());
-  }
 
   GE_CHK_BOOL_EXEC(listener_ != nullptr, return PARAM_INVALID, "listener_ is null!");
   std::vector<ge::OutputTensorInfo> outputs;
@@ -3064,6 +3048,64 @@ Status DavinciModel::MallocKnownArgs() {
   return SUCCESS;
 }
 
+void DavinciModel::SaveProfilingTaskDescInfo(const OpDescPtr &op, const TaskInfoPtr &task,
+                                             const domi::TaskDef &task_def, size_t task_index) {
+  bool flag = GetL1FusionEnableOption();
+  char skt_enable_env[MMPA_MAX_PATH] = { 0x00 };
+  INT32 res = mmGetEnv("SKT_ENABLE", skt_enable_env, MMPA_MAX_PATH);
+  int64_t env_flag = (res == EN_OK) ? std::strtol(skt_enable_env, nullptr, kDecimal) : 0;
+  if (env_flag != 0) {
+    flag = true;
+  }
+
+  TaskDescInfo task_desc_info;
+  if (!om_name_.empty()) {
+    task_desc_info.model_name = om_name_;
+  } else {
+    task_desc_info.model_name = name_;
+  }
+  task_desc_info.op_name = op->GetName();
+  task_desc_info.block_dim = task_def.kernel().block_dim();
+  task_desc_info.task_id = task->GetTaskID();
+  task_desc_info.stream_id = task->GetStreamId();
+  task_desc_info.shape_type = "static";
+  task_desc_info.cur_iter_num = 0;
+  // task type
+  task_desc_info.task_type = kTaskTypeInvalid;
+  auto model_task_type = static_cast<rtModelTaskType_t>(task_def.type());
+  if (model_task_type == RT_MODEL_TASK_KERNEL) {
+    const domi::KernelDef &kernel_def = task_def.kernel();
+    const auto &context = kernel_def.context();
+    auto kernel_type = static_cast<ccKernelType>(context.kernel_type());
+    if (kernel_type == ccKernelType::TE) {
+      task_desc_info.task_type = kTaskTypeAicore;
+    } else if (kernel_type == ccKernelType::AI_CPU || kernel_type == ccKernelType::CUST_AI_CPU) {
+      task_desc_info.task_type = kTaskTypeAicpu;
+    } else {
+      GELOGD("Other kernel type: %u", context.kernel_type());
+    }
+  } else if (model_task_type == RT_MODEL_TASK_KERNEL_EX) {
+    task_desc_info.task_type = kTaskTypeAicpu;
+  } else {
+    GELOGD("Skip task type: %d", static_cast<int>(model_task_type));
+  }
+  profiler_report_op_info_[task_desc_info.op_name] =
+    std::pair<uint32_t, uint32_t>(task_desc_info.task_id, task_desc_info.stream_id);
+  task_desc_info_.emplace_back(task_desc_info);
+  if (flag) {
+    if (task->GetSktTaskID() != 0xFFFFFFFF) {
+      TaskDescInfo task_desc_info;
+      string op_name = "super_kernel_" + to_string(task_index);
+      task_desc_info.op_name = op_name;
+      task_desc_info.task_id = task->GetSktTaskID();
+      profiler_report_op_info_[task_desc_info.op_name] =
+        std::pair<uint32_t, uint32_t>(task_desc_info.task_id, task_desc_info.stream_id);
+      task_desc_info_.emplace_back(task_desc_info);
+    }
+  }
+  return;
+}
+
 Status DavinciModel::DistributeTask() {
   GELOGI("do Distribute.");
   for (auto &task : cpu_task_list_) {
@@ -3075,18 +3117,11 @@ Status DavinciModel::DistributeTask() {
   }
 
   task_desc_info_.clear();
-  bool flag = GetL1FusionEnableOption();
-  char skt_enable_env[MMPA_MAX_PATH] = { 0x00 };
-  INT32 res = mmGetEnv("SKT_ENABLE", skt_enable_env, MMPA_MAX_PATH);
-  int64_t env_flag = (res == EN_OK) ? std::strtol(skt_enable_env, nullptr, kDecimal) : 0;
-  if (env_flag != 0) {
-    flag = true;
-  }
-
   const auto &model_task_def = ge_model_->GetModelTaskDefPtr();
   for (size_t task_index = 0; task_index < task_list_.size(); ++task_index) {
     auto &task_def = model_task_def->task(task_index);
     auto &task = task_list_.at(task_index);
+    GE_CHECK_NOTNULL(task);
     GE_CHK_STATUS_RET(task->Distribute(), "Task[%zu] distribute fail", task_index);
     // for data dump
     auto op_index = std::max(task_def.kernel().context().op_index(),
@@ -3106,33 +3141,9 @@ Status DavinciModel::DistributeTask() {
     GE_IF_BOOL_EXEC(no_need_profiling, continue);
 
     SaveDumpOpInfo(runtime_param_, op, task->GetTaskID(), task->GetStreamId());
-    // Load task info for profiling
-    TaskDescInfo task_desc_info;
-    if (!om_name_.empty()) {
-      task_desc_info.model_name = om_name_;
-    } else {
-      task_desc_info.model_name = name_;
-    }
-    task_desc_info.op_name = op->GetName();
-    task_desc_info.block_dim = task_def.kernel().block_dim();
-    task_desc_info.task_id = task->GetTaskID();
-    task_desc_info.stream_id = task->GetStreamId();
-    task_desc_info.shape_type = "static";
-    task_desc_info.cur_iter_num = 0;
-    profiler_report_op_info_[task_desc_info.op_name] =
-      std::pair<uint32_t, uint32_t>(task_desc_info.task_id, task_desc_info.stream_id);
-    task_desc_info_.emplace_back(task_desc_info);
-    if (flag) {
-      if (task->GetSktTaskID() != 0xFFFFFFFF) {
-        TaskDescInfo task_desc_info;
-        string op_name = "super_kernel_" + to_string(task_index);
-        task_desc_info.op_name = op_name;
-        task_desc_info.task_id = task->GetSktTaskID();
-        profiler_report_op_info_[task_desc_info.op_name] =
-          std::pair<uint32_t, uint32_t>(task_desc_info.task_id, task_desc_info.stream_id);
-        task_desc_info_.emplace_back(task_desc_info);
-      }
-    }
+
+    // save task info for profiling
+    SaveProfilingTaskDescInfo(op, task, task_def, task_index);
   }
   // launch dump kernel to aicpu
   GE_CHK_STATUS_RET(data_dumper_.LoadDumpInfo(), "Load dump info failed.");
@@ -3949,8 +3960,11 @@ void DavinciModel::SetDataDumperArgs(const ComputeGraphPtr &graph, const map<str
   }
   data_dumper_.SetDeviceId(device_id);
 
-  // set loop count addr
-  auto get_var_addr = [&](const string &name) -> void *{
+  if (known_node_) {
+    data_dumper_.SetLoopAddr(known_shape_global_step_, nullptr, nullptr);
+  } else {
+    // set loop count addr
+    auto get_var_addr = [&](const string &name) -> void *{
     const auto it = variable_by_name.find(name);
     if (it != variable_by_name.end()) {
       const auto output_sizes = ModelUtils::GetOutputSize(it->second);
@@ -3963,10 +3977,10 @@ void DavinciModel::SetDataDumperArgs(const ComputeGraphPtr &graph, const map<str
     GELOGD("op: %s is null.", name.c_str());
     return nullptr;
   };
-
   data_dumper_.SetLoopAddr(get_var_addr(NODE_NAME_GLOBAL_STEP),
                            get_var_addr(NODE_NAME_FLOWCTRL_LOOP_PER_ITER),
                            get_var_addr(NODE_NAME_FLOWCTRL_LOOP_COND));
+  }
 }
 
 uint32_t DavinciModel::GetFlowctrlIndex(uint32_t op_index) {
@@ -3993,14 +4007,18 @@ Status DavinciModel::GetComputeGraphInfo(vector<ComputeGraphDescInfo> &graph_des
     } else {
       compute_graph_info.model_name = name_;
     }
+
+    std::vector<Format> format =  { FORMAT_NULL };
+    std::vector<std::vector<int64_t>> shape = { {0} };
+    std::vector<DataType> data_type = { DT_UNDEFINED };
     compute_graph_info.op_name = op_desc.op_name;
     compute_graph_info.op_type = op_desc.op_type;
-    compute_graph_info.input_format = op_desc.input_format;
-    compute_graph_info.input_shape = op_desc.input_shape;
-    compute_graph_info.input_data_type = op_desc.input_data_type;
-    compute_graph_info.output_format = op_desc.output_format;
-    compute_graph_info.output_shape = op_desc.output_shape;
-    compute_graph_info.output_data_type = op_desc.output_data_type;
+    compute_graph_info.input_format = op_desc.input_format.empty() ? format : op_desc.input_format;
+    compute_graph_info.input_shape = op_desc.input_shape.empty() ? shape : op_desc.input_shape;
+    compute_graph_info.input_data_type = op_desc.input_data_type.empty() ? data_type : op_desc.input_data_type;
+    compute_graph_info.output_format = op_desc.output_format.empty() ? format :  op_desc.output_format;
+    compute_graph_info.output_shape = op_desc.output_shape.empty() ? shape : op_desc.output_shape;
+    compute_graph_info.output_data_type = op_desc.output_data_type.empty() ? data_type : op_desc.output_data_type;
     uint32_t task_id = 0;
     uint32_t stream_id = 0;
     auto iter = profiler_report_op_info_.find(op_desc.op_name);
diff --git a/ge/graph/load/new_model_manager/davinci_model.h b/ge/graph/load/model_manager/davinci_model.h
similarity index 97%
rename from ge/graph/load/new_model_manager/davinci_model.h
rename to ge/graph/load/model_manager/davinci_model.h
index 4108f2c7..53e9cd4d 100755
--- a/ge/graph/load/new_model_manager/davinci_model.h
+++ b/ge/graph/load/model_manager/davinci_model.h
@@ -32,12 +32,12 @@
 #include "common/types.h"
 #include "framework/common/util.h"
 #include "graph/debug/ge_attr_define.h"
-#include "graph/load/new_model_manager/aipp_utils.h"
-#include "graph/load/new_model_manager/data_dumper.h"
-#include "graph/load/new_model_manager/data_inputer.h"
-#include "graph/load/new_model_manager/model_utils.h"
-#include "graph/load/new_model_manager/zero_copy_offset.h"
-#include "graph/load/new_model_manager/zero_copy_task.h"
+#include "graph/load/model_manager/aipp_utils.h"
+#include "graph/load/model_manager/data_dumper.h"
+#include "graph/load/model_manager/data_inputer.h"
+#include "graph/load/model_manager/model_utils.h"
+#include "graph/load/model_manager/zero_copy_offset.h"
+#include "graph/load/model_manager/zero_copy_task.h"
 #include "graph/model.h"
 #include "graph/node.h"
 #include "graph/op_desc.h"
@@ -470,6 +470,10 @@ class DavinciModel {
     data_dumper_.SaveDumpTask(task_id, stream_id, op_desc, args);
   }
 
+  void SetKnownShapeGlobalStep(void *global_step) {
+    known_shape_global_step_ = global_step;
+  }
+
   void DumperShrink() {
     data_dumper_.DumpShrink();
   }
@@ -623,6 +627,9 @@ class DavinciModel {
 
   Status DistributeTask();
 
+  void SaveProfilingTaskDescInfo(const OpDescPtr &op, const TaskInfoPtr &task,
+                                 const domi::TaskDef &task_def, size_t task_index);
+
   uint8_t *MallocFeatureMapMem(size_t data_size);
 
   uint8_t *MallocWeightsMem(size_t weights_size);
@@ -824,7 +831,7 @@ class DavinciModel {
 
   void OpDebugUnRegister();
 
-  void CheckHasHcomOp();
+  void CheckHasHcomOp(const ComputeGraphPtr &graph);
 
   Status DoTaskSink();
 
@@ -847,8 +854,8 @@ class DavinciModel {
   Status InitOutputTensorInfo(const OpDescPtr &op_desc);
   Status GenOutputTensorInfo(OutputData *output_data, vector<OutputTensorInfo> &outputs);
 
-  Status InitInputDescInfo(const map<uint32_t, OpDescPtr> &data_by_index);
-  Status InitOutputDescInfo(const vector<OpDescPtr> &output_op_list);
+  Status InitInputDescInfo(const OpDescPtr &op_desc);
+  Status InitOutputDescInfo(const OpDescPtr &op_desc, const vector<string> &out_node_name);
 
   Status InitOrigInputInfo(uint32_t index, const OpDescPtr &op_desc);
   Status InitAippInfo(uint32_t index, const OpDescPtr &op_desc);
@@ -883,7 +890,6 @@ class DavinciModel {
   GeModelPtr ge_model_;  // release after DavinciModel::Init
 
   bool need_destroy_aicpu_kernel_{false};
-  vector<string> out_node_name_;
 
   map<uint32_t, OpDescPtr> op_list_;  // release after DavinciModel::Init
 
@@ -1055,6 +1061,9 @@ class DavinciModel {
   vector<uint32_t> input_formats_;
   vector<InputOutputDescInfo> output_descs_;
   vector<uint32_t> output_formats_;
+
+  // known shape node for dump
+  void *known_shape_global_step_;
 };
 }  // namespace ge
 #endif  // GE_GRAPH_LOAD_NEW_MODEL_MANAGER_DAVINCI_MODEL_H_
diff --git a/ge/graph/load/new_model_manager/davinci_model_parser.cc b/ge/graph/load/model_manager/davinci_model_parser.cc
similarity index 92%
rename from ge/graph/load/new_model_manager/davinci_model_parser.cc
rename to ge/graph/load/model_manager/davinci_model_parser.cc
index 76526de2..c6f48b84 100644
--- a/ge/graph/load/new_model_manager/davinci_model_parser.cc
+++ b/ge/graph/load/model_manager/davinci_model_parser.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "graph/load/new_model_manager/davinci_model_parser.h"
+#include "graph/load/model_manager/davinci_model_parser.h"
 
 namespace ge {
 DavinciModelParser::DavinciModelParser() {}
diff --git a/ge/graph/load/new_model_manager/davinci_model_parser.h b/ge/graph/load/model_manager/davinci_model_parser.h
similarity index 100%
rename from ge/graph/load/new_model_manager/davinci_model_parser.h
rename to ge/graph/load/model_manager/davinci_model_parser.h
diff --git a/ge/graph/load/new_model_manager/model_manager.cc b/ge/graph/load/model_manager/model_manager.cc
similarity index 99%
rename from ge/graph/load/new_model_manager/model_manager.cc
rename to ge/graph/load/model_manager/model_manager.cc
index edc60e50..4eb3254b 100755
--- a/ge/graph/load/new_model_manager/model_manager.cc
+++ b/ge/graph/load/model_manager/model_manager.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "graph/load/new_model_manager/model_manager.h"
+#include "graph/load/model_manager/model_manager.h"
 
 #include <string>
 
@@ -28,8 +28,8 @@
 #include "framework/common/util.h"
 #include "graph/common/ge_call_wrapper.h"
 #include "graph/debug/ge_attr_define.h"
-#include "graph/load/new_model_manager/davinci_model.h"
-#include "graph/load/new_model_manager/davinci_model_parser.h"
+#include "graph/load/model_manager/davinci_model.h"
+#include "graph/load/model_manager/davinci_model_parser.h"
 #include "model/ge_root_model.h"
 #include "graph/common/local_context.h"
 #include "graph/utils/attr_utils.h"
@@ -527,6 +527,7 @@ Status ModelManager::DataInputTensor(uint32_t model_id, const std::vector<InputT
     DataBuffer data;
     data.data = inputs[i].data;
     data.length = inputs[i].length;
+    input_data.shapes.emplace_back(inputs[i].dims);
     input_data.blobs.push_back(data);
   }
   if (!GetLocalOmgContext().user_input_dims.empty() && GetLocalOmgContext().need_multi_batch) {
@@ -1427,7 +1428,7 @@ Status ModelManager::GetModelMemAndWeightSize(const ModelData &model, size_t &me
   uint8_t *model_data = nullptr;
   uint32_t model_len = 0;
   Status ret = DavinciModelParser::ParseModelContent(model, model_data, model_len);
-  GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, return ret, "parse model content failed!");
+  GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, return ACL_ERROR_GE_PARAM_INVALID, "parse model content failed!");
 
   OmFileLoadHelper om_file_helper;
   ret = om_file_helper.Init(model_data, model_len);
@@ -1703,7 +1704,7 @@ Status ModelManager::LaunchKernelCheckAicpuOp(std::vector<std::string> &aicpu_op
     for (uint32_t i = 0; i < res_op_nums; i++) {
       ReturnCode ret_code = res_ret_code_list.at(i);
       SysOpInfo aicpu_info = res_aicpu_op_info_list.at(i);
-      GELOGI("Not support aicpu op type: %lu, kernel_type:%d, opLen:%d, ret_code:%d", aicpu_info.opType,
+      GELOGI("Not support aicpu op type: %lu, kernel_type:%d, opLen:%lu, ret_code:%d", aicpu_info.opType,
              aicpu_info.kernelsType, aicpu_info.opLen, ret_code);
       std::vector<char> op_name;
       op_name.clear();
diff --git a/ge/graph/load/new_model_manager/model_manager.h b/ge/graph/load/model_manager/model_manager.h
similarity index 100%
rename from ge/graph/load/new_model_manager/model_manager.h
rename to ge/graph/load/model_manager/model_manager.h
diff --git a/ge/graph/load/new_model_manager/model_utils.cc b/ge/graph/load/model_manager/model_utils.cc
similarity index 92%
rename from ge/graph/load/new_model_manager/model_utils.cc
rename to ge/graph/load/model_manager/model_utils.cc
index 22a657ad..410e9364 100755
--- a/ge/graph/load/new_model_manager/model_utils.cc
+++ b/ge/graph/load/model_manager/model_utils.cc
@@ -14,20 +14,13 @@
  * limitations under the License.
  */
 
-#include "graph/load/new_model_manager/model_utils.h"
-
+#include "graph/load/model_manager/model_utils.h"
 #include <string>
-
 #include "common/debug/log.h"
 #include "common/op/ge_op_utils.h"
-#include "graph/debug/ge_attr_define.h"
-#include "graph/utils/attr_utils.h"
 #include "graph/utils/tensor_utils.h"
-#include "runtime/base.h"
-#include "runtime/kernel.h"
-
-#include "framework/common/debug/ge_log.h"
 #include "graph/manager/graph_var_manager.h"
+#include "graph/types.h"
 
 #define VALIDATE_MEM_RANGE(OP, SIZE, OFFSET)                                                                 \
   do {                                                                                                       \
@@ -342,13 +335,13 @@ vector<void *> ModelUtils::GetInputDataAddrs(const RuntimeParam &model_param, Co
     int64_t input_offset = v_input_offset[non_const_index];
     non_const_index++;
     GE_IF_BOOL_EXEC(model_param.var_size != 0 && ge::VarManager::Instance(session_id)->IsVarAddr(input_offset),
-                    VALIDATE_MEM_RANGE(op_desc, model_param.var_size, input_offset - model_param.logic_var_base);
-                    uint8_t *variable_addr = model_param.var_base + input_offset - model_param.logic_var_base;
+                    uint8_t *variable_addr = nullptr;
+                    GE_CHK_STATUS_EXEC(GetVarAddr(model_param, op_desc, input_offset, variable_addr), return {});
                     v_input_data_addr.push_back(variable_addr);
                     GELOGI("[IMAS]GetInputDataAddrs graph_%u type[V] name[%s] input[%lu] memaddr[%p]",
                            model_param.graph_id, op_desc->GetName().c_str(), i, variable_addr);
                     continue);
-    
+
     int64_t mem_type;
     bool tensor_has_mem_type = ge::AttrUtils::GetInt(tensor_desc, ATTR_NAME_TENSOR_MEM_TYPE, mem_type);
     // feature maps
@@ -380,6 +373,34 @@ vector<void *> ModelUtils::GetInputDataAddrs(const RuntimeParam &model_param, Co
   return v_input_data_addr;
 }
 
+///
+/// @ingroup ge
+/// @brief Get variable address.
+/// @return Status
+///
+Status ModelUtils::GetVarAddr(const RuntimeParam &model_param, const ConstOpDescPtr &op_desc, int64_t offset,
+                              uint8_t *&var_addr) {
+  rtMemType_t mem_type = ge::VarManager::Instance(model_param.session_id)->GetVarMemType(offset);
+  switch (mem_type) {
+    case RT_MEMORY_RDMA_HBM:
+      if (offset < 0) {
+        GELOGE(PARAM_INVALID, "rdma var addr is invalid, addr=%p", reinterpret_cast<uint8_t *>(offset));
+        return PARAM_INVALID;
+      }
+      var_addr = reinterpret_cast<uint8_t *>(offset);
+      break;
+    case RT_MEMORY_HBM:
+      VALIDATE_MEM_RANGE(op_desc, model_param.var_size, offset - model_param.logic_var_base);
+      var_addr = model_param.var_base + offset - model_param.logic_var_base;
+      break;
+    default:
+      GELOGE(PARAM_INVALID, "unsupported memory type %u", mem_type);
+      return PARAM_INVALID;
+  }
+  GE_CHECK_NOTNULL(var_addr);
+  return SUCCESS;
+}
+
 ///
 /// @ingroup ge
 /// @brief Get output data address.
@@ -404,19 +425,26 @@ vector<void *> ModelUtils::GetOutputDataAddrs(const RuntimeParam &model_param, C
     return v_output_data_addr;
   }
   for (size_t i = 0; i < outputs_size; ++i) {
-    GE_IF_BOOL_EXEC(model_param.var_size != 0 && ge::VarManager::Instance(session_id)->IsVarAddr(v_output_offset[i]),
-                    VALIDATE_MEM_RANGE(op_desc, model_param.var_size, v_output_offset[i] - model_param.logic_var_base);
-                    uint8_t *variable_addr = model_param.var_base + v_output_offset[i] - model_param.logic_var_base;
-                    v_output_data_addr.push_back(variable_addr);
-                    GELOGI("[IMAS]GetOutputDataAddrs graph_%u type[V] name[%s] output[%zu] memaddr[%p]",
-                           model_param.graph_id, op_desc->GetName().c_str(), i, variable_addr);
-                    continue);
     const GeTensorDescPtr tensor_desc = op_desc->MutableOutputDesc(i);
     if (tensor_desc == nullptr) {
       GELOGW("Op: %s, Index: %zu, Tensor Desc is null", op_desc->GetName().c_str(), i);
       continue;
     }
 
+    int32_t calc_type = 0;
+    bool ret = ge::AttrUtils::GetInt(tensor_desc, ATTR_NAME_MEMORY_SIZE_CALC_TYPE, calc_type);
+    if (ret && (calc_type == static_cast<int32_t>(ge::MemorySizeCalcType::ALWAYS_EMPTY))) {
+      GELOGD("%s is an optional output, the address don't need to be saved.", tensor_desc->GetName().c_str());
+      continue;
+    }
+    GE_IF_BOOL_EXEC(model_param.var_size != 0 && ge::VarManager::Instance(session_id)->IsVarAddr(v_output_offset[i]),
+                    uint8_t *variable_addr = nullptr;
+                    GE_CHK_STATUS_EXEC(GetVarAddr(model_param, op_desc, v_output_offset[i], variable_addr), return {});
+                    v_output_data_addr.push_back(variable_addr);
+                    GELOGI("[IMAS]GetOutputDataAddrs graph_%u type[V] name[%s] output[%zu] memaddr[%p]",
+                           model_param.graph_id, op_desc->GetName().c_str(), i, variable_addr);
+                    continue);
+
     int64_t mem_type;
     bool tensor_has_mem_type = ge::AttrUtils::GetInt(tensor_desc, ATTR_NAME_TENSOR_MEM_TYPE, mem_type);
     // feature maps
diff --git a/ge/graph/load/new_model_manager/model_utils.h b/ge/graph/load/model_manager/model_utils.h
similarity index 90%
rename from ge/graph/load/new_model_manager/model_utils.h
rename to ge/graph/load/model_manager/model_utils.h
index 4b3d7ae7..26f8d700 100755
--- a/ge/graph/load/new_model_manager/model_utils.h
+++ b/ge/graph/load/model_manager/model_utils.h
@@ -21,7 +21,7 @@
 
 #include "common/ge_inner_error_codes.h"
 #include "common/types.h"
-#include "graph/load/new_model_manager/task_info/task_info.h"
+#include "graph/load/model_manager/task_info/task_info.h"
 #include "graph/op_desc.h"
 #include "graph/utils/tensor_adapter.h"
 
@@ -107,6 +107,15 @@ class ModelUtils {
   /// @return Status
   ///
   static Status GetRtAddress(const RuntimeParam &model_param, uintptr_t logic_addr, uint8_t *&mem_addr);
+
+ private:
+  ///
+  /// @ingroup ge
+  /// @brief Get variable address.
+  /// @return Status
+  ///
+  static Status GetVarAddr(const RuntimeParam &model_param, const ConstOpDescPtr &op_desc, int64_t offset,
+                           uint8_t *&var_addr);
 };
 }  // namespace ge
 
diff --git a/ge/graph/load/new_model_manager/task_info/end_graph_task_info.cc b/ge/graph/load/model_manager/task_info/end_graph_task_info.cc
similarity index 95%
rename from ge/graph/load/new_model_manager/task_info/end_graph_task_info.cc
rename to ge/graph/load/model_manager/task_info/end_graph_task_info.cc
index b8b02f59..c306c650 100644
--- a/ge/graph/load/new_model_manager/task_info/end_graph_task_info.cc
+++ b/ge/graph/load/model_manager/task_info/end_graph_task_info.cc
@@ -14,11 +14,11 @@
  * limitations under the License.
  */
 
-#include "graph/load/new_model_manager/task_info/end_graph_task_info.h"
+#include "graph/load/model_manager/task_info/end_graph_task_info.h"
 
 #include "common/properties_manager.h"
 #include "framework/common/debug/ge_log.h"
-#include "graph/load/new_model_manager/davinci_model.h"
+#include "graph/load/model_manager/davinci_model.h"
 
 namespace {
 const uint32_t kDumpFlag = 2;
diff --git a/ge/graph/load/new_model_manager/task_info/end_graph_task_info.h b/ge/graph/load/model_manager/task_info/end_graph_task_info.h
similarity index 95%
rename from ge/graph/load/new_model_manager/task_info/end_graph_task_info.h
rename to ge/graph/load/model_manager/task_info/end_graph_task_info.h
index 614544f9..efce19b2 100644
--- a/ge/graph/load/new_model_manager/task_info/end_graph_task_info.h
+++ b/ge/graph/load/model_manager/task_info/end_graph_task_info.h
@@ -17,7 +17,7 @@
 #ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_END_GRAPH_TASK_INFO_H_
 #define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_END_GRAPH_TASK_INFO_H_
 
-#include "graph/load/new_model_manager/task_info/task_info.h"
+#include "graph/load/model_manager/task_info/task_info.h"
 
 namespace ge {
 class EndGraphTaskInfo : public TaskInfo {
diff --git a/ge/graph/load/new_model_manager/task_info/event_record_task_info.cc b/ge/graph/load/model_manager/task_info/event_record_task_info.cc
similarity index 93%
rename from ge/graph/load/new_model_manager/task_info/event_record_task_info.cc
rename to ge/graph/load/model_manager/task_info/event_record_task_info.cc
index 11589258..f736c386 100755
--- a/ge/graph/load/new_model_manager/task_info/event_record_task_info.cc
+++ b/ge/graph/load/model_manager/task_info/event_record_task_info.cc
@@ -14,10 +14,10 @@
  * limitations under the License.
  */
 
-#include "graph/load/new_model_manager/task_info/event_record_task_info.h"
+#include "graph/load/model_manager/task_info/event_record_task_info.h"
 
 #include "framework/common/debug/ge_log.h"
-#include "graph/load/new_model_manager/davinci_model.h"
+#include "graph/load/model_manager/davinci_model.h"
 
 namespace ge {
 Status EventRecordTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) {
diff --git a/ge/graph/load/new_model_manager/task_info/event_record_task_info.h b/ge/graph/load/model_manager/task_info/event_record_task_info.h
similarity index 95%
rename from ge/graph/load/new_model_manager/task_info/event_record_task_info.h
rename to ge/graph/load/model_manager/task_info/event_record_task_info.h
index d3f5961e..a79f1d3b 100755
--- a/ge/graph/load/new_model_manager/task_info/event_record_task_info.h
+++ b/ge/graph/load/model_manager/task_info/event_record_task_info.h
@@ -16,7 +16,7 @@
 
 #ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_EVENT_RECORD_TASK_INFO_H_
 #define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_EVENT_RECORD_TASK_INFO_H_
-#include "graph/load/new_model_manager/task_info/task_info.h"
+#include "graph/load/model_manager/task_info/task_info.h"
 
 namespace ge {
 class EventRecordTaskInfo : public TaskInfo {
diff --git a/ge/graph/load/new_model_manager/task_info/event_wait_task_info.cc b/ge/graph/load/model_manager/task_info/event_wait_task_info.cc
similarity index 93%
rename from ge/graph/load/new_model_manager/task_info/event_wait_task_info.cc
rename to ge/graph/load/model_manager/task_info/event_wait_task_info.cc
index 5701179b..34058502 100755
--- a/ge/graph/load/new_model_manager/task_info/event_wait_task_info.cc
+++ b/ge/graph/load/model_manager/task_info/event_wait_task_info.cc
@@ -14,10 +14,10 @@
  * limitations under the License.
  */
 
-#include "graph/load/new_model_manager/task_info/event_wait_task_info.h"
+#include "graph/load/model_manager/task_info/event_wait_task_info.h"
 
 #include "framework/common/debug/ge_log.h"
-#include "graph/load/new_model_manager/davinci_model.h"
+#include "graph/load/model_manager/davinci_model.h"
 
 namespace ge {
 Status EventWaitTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) {
diff --git a/ge/graph/load/new_model_manager/task_info/event_wait_task_info.h b/ge/graph/load/model_manager/task_info/event_wait_task_info.h
similarity index 95%
rename from ge/graph/load/new_model_manager/task_info/event_wait_task_info.h
rename to ge/graph/load/model_manager/task_info/event_wait_task_info.h
index a92252d7..bd8acab1 100755
--- a/ge/graph/load/new_model_manager/task_info/event_wait_task_info.h
+++ b/ge/graph/load/model_manager/task_info/event_wait_task_info.h
@@ -16,7 +16,7 @@
 
 #ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_EVENT_WAIT_TASK_INFO_H_
 #define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_EVENT_WAIT_TASK_INFO_H_
-#include "graph/load/new_model_manager/task_info/task_info.h"
+#include "graph/load/model_manager/task_info/task_info.h"
 
 namespace ge {
 class EventWaitTaskInfo : public TaskInfo {
diff --git a/ge/graph/load/new_model_manager/task_info/fusion_start_task_info.cc b/ge/graph/load/model_manager/task_info/fusion_start_task_info.cc
similarity index 92%
rename from ge/graph/load/new_model_manager/task_info/fusion_start_task_info.cc
rename to ge/graph/load/model_manager/task_info/fusion_start_task_info.cc
index 32c79647..6feea9e4 100755
--- a/ge/graph/load/new_model_manager/task_info/fusion_start_task_info.cc
+++ b/ge/graph/load/model_manager/task_info/fusion_start_task_info.cc
@@ -14,10 +14,10 @@
  * limitations under the License.
  */
 
-#include "graph/load/new_model_manager/task_info/fusion_start_task_info.h"
+#include "graph/load/model_manager/task_info/fusion_start_task_info.h"
 
 #include "framework/common/debug/ge_log.h"
-#include "graph/load/new_model_manager/davinci_model.h"
+#include "graph/load/model_manager/davinci_model.h"
 
 namespace ge {
 Status FusionStartTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) {
diff --git a/ge/graph/load/new_model_manager/task_info/fusion_start_task_info.h b/ge/graph/load/model_manager/task_info/fusion_start_task_info.h
similarity index 94%
rename from ge/graph/load/new_model_manager/task_info/fusion_start_task_info.h
rename to ge/graph/load/model_manager/task_info/fusion_start_task_info.h
index b1897533..284a5e0f 100755
--- a/ge/graph/load/new_model_manager/task_info/fusion_start_task_info.h
+++ b/ge/graph/load/model_manager/task_info/fusion_start_task_info.h
@@ -16,7 +16,7 @@
 
 #ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_FUSION_START_TASK_INFO_H_
 #define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_FUSION_START_TASK_INFO_H_
-#include "graph/load/new_model_manager/task_info/task_info.h"
+#include "graph/load/model_manager/task_info/task_info.h"
 
 namespace ge {
 class FusionStartTaskInfo : public TaskInfo {
diff --git a/ge/graph/load/new_model_manager/task_info/fusion_stop_task_info.cc b/ge/graph/load/model_manager/task_info/fusion_stop_task_info.cc
similarity index 92%
rename from ge/graph/load/new_model_manager/task_info/fusion_stop_task_info.cc
rename to ge/graph/load/model_manager/task_info/fusion_stop_task_info.cc
index dd4edfd0..22d1589c 100755
--- a/ge/graph/load/new_model_manager/task_info/fusion_stop_task_info.cc
+++ b/ge/graph/load/model_manager/task_info/fusion_stop_task_info.cc
@@ -14,10 +14,10 @@
  * limitations under the License.
  */
 
-#include "graph/load/new_model_manager/task_info/fusion_stop_task_info.h"
+#include "graph/load/model_manager/task_info/fusion_stop_task_info.h"
 
 #include "framework/common/debug/ge_log.h"
-#include "graph/load/new_model_manager/davinci_model.h"
+#include "graph/load/model_manager/davinci_model.h"
 
 namespace ge {
 Status FusionStopTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) {
diff --git a/ge/graph/load/new_model_manager/task_info/fusion_stop_task_info.h b/ge/graph/load/model_manager/task_info/fusion_stop_task_info.h
similarity index 94%
rename from ge/graph/load/new_model_manager/task_info/fusion_stop_task_info.h
rename to ge/graph/load/model_manager/task_info/fusion_stop_task_info.h
index 880ca487..994498d5 100755
--- a/ge/graph/load/new_model_manager/task_info/fusion_stop_task_info.h
+++ b/ge/graph/load/model_manager/task_info/fusion_stop_task_info.h
@@ -16,7 +16,7 @@
 
 #ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_FUSION_STOP_TASK_INFO_H_
 #define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_FUSION_STOP_TASK_INFO_H_
-#include "graph/load/new_model_manager/task_info/task_info.h"
+#include "graph/load/model_manager/task_info/task_info.h"
 
 namespace ge {
 class FusionStopTaskInfo : public TaskInfo {
diff --git a/ge/graph/load/new_model_manager/task_info/hccl_task_info.cc b/ge/graph/load/model_manager/task_info/hccl_task_info.cc
similarity index 98%
rename from ge/graph/load/new_model_manager/task_info/hccl_task_info.cc
rename to ge/graph/load/model_manager/task_info/hccl_task_info.cc
index 7b18a9a3..2d0ad560 100644
--- a/ge/graph/load/new_model_manager/task_info/hccl_task_info.cc
+++ b/ge/graph/load/model_manager/task_info/hccl_task_info.cc
@@ -14,14 +14,14 @@
  * limitations under the License.
  */
 
-#include "graph/load/new_model_manager/task_info/hccl_task_info.h"
+#include "graph/load/model_manager/task_info/hccl_task_info.h"
 
 #include <utility>
 
 #include "common/opskernel/ops_kernel_info_store.h"
 #include "framework/common/debug/ge_log.h"
-#include "graph/load/new_model_manager/davinci_model.h"
-#include "graph/load/new_model_manager/model_utils.h"
+#include "graph/load/model_manager/davinci_model.h"
+#include "graph/load/model_manager/model_utils.h"
 
 namespace ge {
 std::mutex HcclTaskInfo::hccl_follow_stream_mutex_;
diff --git a/ge/graph/load/new_model_manager/task_info/hccl_task_info.h b/ge/graph/load/model_manager/task_info/hccl_task_info.h
similarity index 97%
rename from ge/graph/load/new_model_manager/task_info/hccl_task_info.h
rename to ge/graph/load/model_manager/task_info/hccl_task_info.h
index 777f5bbf..3df155ad 100644
--- a/ge/graph/load/new_model_manager/task_info/hccl_task_info.h
+++ b/ge/graph/load/model_manager/task_info/hccl_task_info.h
@@ -23,7 +23,7 @@
 #include <vector>
 
 #include "common/opskernel/ge_task_info.h"
-#include "graph/load/new_model_manager/task_info/task_info.h"
+#include "graph/load/model_manager/task_info/task_info.h"
 #include "graph/manager/util/hcom_util.h"
 namespace ge {
 class HcclTaskInfo : public TaskInfo {
diff --git a/ge/graph/load/new_model_manager/task_info/kernel_ex_task_info.cc b/ge/graph/load/model_manager/task_info/kernel_ex_task_info.cc
similarity index 98%
rename from ge/graph/load/new_model_manager/task_info/kernel_ex_task_info.cc
rename to ge/graph/load/model_manager/task_info/kernel_ex_task_info.cc
index 98d9cb78..6da1bf63 100644
--- a/ge/graph/load/new_model_manager/task_info/kernel_ex_task_info.cc
+++ b/ge/graph/load/model_manager/task_info/kernel_ex_task_info.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "graph/load/new_model_manager/task_info/kernel_ex_task_info.h"
+#include "graph/load/model_manager/task_info/kernel_ex_task_info.h"
 
 #include <vector>
 
@@ -24,8 +24,8 @@
 #include "framework/common/debug/ge_log.h"
 #include "framework/common/fmk_error_codes.h"
 #include "graph/attr_value.h"
-#include "graph/load/new_model_manager/davinci_model.h"
-#include "graph/load/new_model_manager/model_manager.h"
+#include "graph/load/model_manager/davinci_model.h"
+#include "graph/load/model_manager/model_manager.h"
 
 namespace ge {
 Status KernelExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) {
@@ -192,7 +192,7 @@ void KernelExTaskInfo::InitDumpTask(void *addr, const OpDescPtr &op_desc) {
   if (davinci_model_->GetDumpProperties().IsLayerNeedDump(davinci_model_->Name(), davinci_model_->OmName(),
                                                           op_desc->GetName())) {
     dump_flag_ = RT_KERNEL_DUMPFLAG;
-    dump_args_ = input_output_addr_;
+    dump_args_ = addr;
   }
 }
 
diff --git a/ge/graph/load/new_model_manager/task_info/kernel_ex_task_info.h b/ge/graph/load/model_manager/task_info/kernel_ex_task_info.h
similarity index 97%
rename from ge/graph/load/new_model_manager/task_info/kernel_ex_task_info.h
rename to ge/graph/load/model_manager/task_info/kernel_ex_task_info.h
index f6873c6c..265316ce 100644
--- a/ge/graph/load/new_model_manager/task_info/kernel_ex_task_info.h
+++ b/ge/graph/load/model_manager/task_info/kernel_ex_task_info.h
@@ -17,7 +17,7 @@
 #ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_KERNEL_EX_TASK_INFO_H_
 #define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_KERNEL_EX_TASK_INFO_H_
 
-#include "graph/load/new_model_manager/task_info/task_info.h"
+#include "graph/load/model_manager/task_info/task_info.h"
 #include "graph/op_desc.h"
 
 namespace ge {
diff --git a/ge/graph/load/new_model_manager/task_info/kernel_task_info.cc b/ge/graph/load/model_manager/task_info/kernel_task_info.cc
similarity index 99%
rename from ge/graph/load/new_model_manager/task_info/kernel_task_info.cc
rename to ge/graph/load/model_manager/task_info/kernel_task_info.cc
index 83bf2779..27fe8eb0 100755
--- a/ge/graph/load/new_model_manager/task_info/kernel_task_info.cc
+++ b/ge/graph/load/model_manager/task_info/kernel_task_info.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "graph/load/new_model_manager/task_info/kernel_task_info.h"
+#include "graph/load/model_manager/task_info/kernel_task_info.h"
 #include <map>
 #include <memory>
 #include <string>
@@ -25,9 +25,9 @@
 #include "framework/common/debug/ge_log.h"
 #include "framework/common/l2_cache_optimize.h"
 #include "graph/debug/ge_attr_define.h"
-#include "graph/load/new_model_manager/davinci_model.h"
-#include "graph/load/new_model_manager/model_manager.h"
-#include "graph/load/new_model_manager/model_utils.h"
+#include "graph/load/model_manager/davinci_model.h"
+#include "graph/load/model_manager/model_manager.h"
+#include "graph/load/model_manager/model_utils.h"
 #include "runtime/kernel.h"
 #include "super_kernel/super_kernel.h"
 #include "super_kernel/super_kernel_factory.h"
diff --git a/ge/graph/load/new_model_manager/task_info/kernel_task_info.h b/ge/graph/load/model_manager/task_info/kernel_task_info.h
similarity index 98%
rename from ge/graph/load/new_model_manager/task_info/kernel_task_info.h
rename to ge/graph/load/model_manager/task_info/kernel_task_info.h
index cea25320..7cabf259 100644
--- a/ge/graph/load/new_model_manager/task_info/kernel_task_info.h
+++ b/ge/graph/load/model_manager/task_info/kernel_task_info.h
@@ -22,7 +22,7 @@
 #include <string>
 #include <vector>
 
-#include "graph/load/new_model_manager/task_info/task_info.h"
+#include "graph/load/model_manager/task_info/task_info.h"
 #include "graph/op_desc.h"
 namespace ge {
 class KernelTaskInfo : public TaskInfo {
diff --git a/ge/graph/load/new_model_manager/task_info/label_goto_ex_task_info.cc b/ge/graph/load/model_manager/task_info/label_goto_ex_task_info.cc
similarity index 94%
rename from ge/graph/load/new_model_manager/task_info/label_goto_ex_task_info.cc
rename to ge/graph/load/model_manager/task_info/label_goto_ex_task_info.cc
index 393c0b31..1921c85d 100755
--- a/ge/graph/load/new_model_manager/task_info/label_goto_ex_task_info.cc
+++ b/ge/graph/load/model_manager/task_info/label_goto_ex_task_info.cc
@@ -14,9 +14,9 @@
  * limitations under the License.
  */
 
-#include "graph/load/new_model_manager/task_info/label_goto_ex_task_info.h"
+#include "graph/load/model_manager/task_info/label_goto_ex_task_info.h"
 
-#include "graph/load/new_model_manager/davinci_model.h"
+#include "graph/load/model_manager/davinci_model.h"
 #include "graph/debug/ge_attr_define.h"
 
 namespace ge {
diff --git a/ge/graph/load/new_model_manager/task_info/label_goto_ex_task_info.h b/ge/graph/load/model_manager/task_info/label_goto_ex_task_info.h
similarity index 95%
rename from ge/graph/load/new_model_manager/task_info/label_goto_ex_task_info.h
rename to ge/graph/load/model_manager/task_info/label_goto_ex_task_info.h
index f83cd1d9..25310368 100755
--- a/ge/graph/load/new_model_manager/task_info/label_goto_ex_task_info.h
+++ b/ge/graph/load/model_manager/task_info/label_goto_ex_task_info.h
@@ -17,7 +17,7 @@
 #ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_LABEL_GOTO_EX_TASK_INFO_H_
 #define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_LABEL_GOTO_EX_TASK_INFO_H_
 
-#include "graph/load/new_model_manager/task_info/task_info.h"
+#include "graph/load/model_manager/task_info/task_info.h"
 
 namespace ge {
 class LabelGotoExTaskInfo : public TaskInfo {
diff --git a/ge/graph/load/new_model_manager/task_info/label_set_task_info.cc b/ge/graph/load/model_manager/task_info/label_set_task_info.cc
similarity index 94%
rename from ge/graph/load/new_model_manager/task_info/label_set_task_info.cc
rename to ge/graph/load/model_manager/task_info/label_set_task_info.cc
index 5fa96a96..45cb586a 100644
--- a/ge/graph/load/new_model_manager/task_info/label_set_task_info.cc
+++ b/ge/graph/load/model_manager/task_info/label_set_task_info.cc
@@ -14,9 +14,9 @@
  * limitations under the License.
  */
 
-#include "graph/load/new_model_manager/task_info/label_set_task_info.h"
+#include "graph/load/model_manager/task_info/label_set_task_info.h"
 
-#include "graph/load/new_model_manager/davinci_model.h"
+#include "graph/load/model_manager/davinci_model.h"
 #include "graph/debug/ge_attr_define.h"
 
 namespace ge {
diff --git a/ge/graph/load/new_model_manager/task_info/label_set_task_info.h b/ge/graph/load/model_manager/task_info/label_set_task_info.h
similarity index 94%
rename from ge/graph/load/new_model_manager/task_info/label_set_task_info.h
rename to ge/graph/load/model_manager/task_info/label_set_task_info.h
index bb02ccf0..36e41f1b 100644
--- a/ge/graph/load/new_model_manager/task_info/label_set_task_info.h
+++ b/ge/graph/load/model_manager/task_info/label_set_task_info.h
@@ -17,7 +17,7 @@
 #ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_LABEL_SET_TASK_INFO_H_
 #define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_LABEL_SET_TASK_INFO_H_
 
-#include "graph/load/new_model_manager/task_info/task_info.h"
+#include "graph/load/model_manager/task_info/task_info.h"
 
 namespace ge {
 class LabelSetTaskInfo : public TaskInfo {
diff --git a/ge/graph/load/new_model_manager/task_info/label_switch_by_index_task_info.cc b/ge/graph/load/model_manager/task_info/label_switch_by_index_task_info.cc
similarity index 97%
rename from ge/graph/load/new_model_manager/task_info/label_switch_by_index_task_info.cc
rename to ge/graph/load/model_manager/task_info/label_switch_by_index_task_info.cc
index ae7865a4..c2997678 100644
--- a/ge/graph/load/new_model_manager/task_info/label_switch_by_index_task_info.cc
+++ b/ge/graph/load/model_manager/task_info/label_switch_by_index_task_info.cc
@@ -14,10 +14,10 @@
  * limitations under the License.
  */
 
-#include "graph/load/new_model_manager/task_info/label_switch_by_index_task_info.h"
+#include "graph/load/model_manager/task_info/label_switch_by_index_task_info.h"
 
 #include "graph/debug/ge_attr_define.h"
-#include "graph/load/new_model_manager/davinci_model.h"
+#include "graph/load/model_manager/davinci_model.h"
 
 namespace ge {
 constexpr uint8_t kLabelSwitchIndexNum = 1;
diff --git a/ge/graph/load/new_model_manager/task_info/label_switch_by_index_task_info.h b/ge/graph/load/model_manager/task_info/label_switch_by_index_task_info.h
similarity index 94%
rename from ge/graph/load/new_model_manager/task_info/label_switch_by_index_task_info.h
rename to ge/graph/load/model_manager/task_info/label_switch_by_index_task_info.h
index 538b2d68..00ca0844 100644
--- a/ge/graph/load/new_model_manager/task_info/label_switch_by_index_task_info.h
+++ b/ge/graph/load/model_manager/task_info/label_switch_by_index_task_info.h
@@ -17,7 +17,7 @@
 #ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_LABEL_SWITCH_BY_INDEX_TASK_INFO_H_
 #define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_LABEL_SWITCH_BY_INDEX_TASK_INFO_H_
 
-#include "graph/load/new_model_manager/task_info/task_info.h"
+#include "graph/load/model_manager/task_info/task_info.h"
 
 namespace ge {
 class LabelSwitchByIndexTaskInfo : public TaskInfo {
diff --git a/ge/graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.cc b/ge/graph/load/model_manager/task_info/memcpy_addr_async_task_info.cc
similarity index 96%
rename from ge/graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.cc
rename to ge/graph/load/model_manager/task_info/memcpy_addr_async_task_info.cc
index b95705f0..a1f58e42 100755
--- a/ge/graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.cc
+++ b/ge/graph/load/model_manager/task_info/memcpy_addr_async_task_info.cc
@@ -14,10 +14,10 @@
  * limitations under the License.
  */
 
-#include "graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.h"
+#include "graph/load/model_manager/task_info/memcpy_addr_async_task_info.h"
 
 #include "framework/common/debug/ge_log.h"
-#include "graph/load/new_model_manager/davinci_model.h"
+#include "graph/load/model_manager/davinci_model.h"
 
 namespace {
 const uint32_t kAlignBytes = 64;
diff --git a/ge/graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.h b/ge/graph/load/model_manager/task_info/memcpy_addr_async_task_info.h
similarity index 96%
rename from ge/graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.h
rename to ge/graph/load/model_manager/task_info/memcpy_addr_async_task_info.h
index c7645b9f..4631c67c 100644
--- a/ge/graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.h
+++ b/ge/graph/load/model_manager/task_info/memcpy_addr_async_task_info.h
@@ -17,7 +17,7 @@
 #ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_MEMCPY_ADDR_ASYNC_TASK_INFO_H_
 #define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_MEMCPY_ADDR_ASYNC_TASK_INFO_H_
 
-#include "graph/load/new_model_manager/task_info/task_info.h"
+#include "graph/load/model_manager/task_info/task_info.h"
 
 namespace ge {
 class MemcpyAddrAsyncTaskInfo : public TaskInfo {
diff --git a/ge/graph/load/new_model_manager/task_info/memcpy_async_task_info.cc b/ge/graph/load/model_manager/task_info/memcpy_async_task_info.cc
similarity index 97%
rename from ge/graph/load/new_model_manager/task_info/memcpy_async_task_info.cc
rename to ge/graph/load/model_manager/task_info/memcpy_async_task_info.cc
index fa320d81..22f9267d 100755
--- a/ge/graph/load/new_model_manager/task_info/memcpy_async_task_info.cc
+++ b/ge/graph/load/model_manager/task_info/memcpy_async_task_info.cc
@@ -14,10 +14,10 @@
  * limitations under the License.
  */
 
-#include "graph/load/new_model_manager/task_info/memcpy_async_task_info.h"
+#include "graph/load/model_manager/task_info/memcpy_async_task_info.h"
 
 #include "framework/common/debug/ge_log.h"
-#include "graph/load/new_model_manager/davinci_model.h"
+#include "graph/load/model_manager/davinci_model.h"
 
 namespace ge {
 Status MemcpyAsyncTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) {
diff --git a/ge/graph/load/new_model_manager/task_info/memcpy_async_task_info.h b/ge/graph/load/model_manager/task_info/memcpy_async_task_info.h
similarity index 96%
rename from ge/graph/load/new_model_manager/task_info/memcpy_async_task_info.h
rename to ge/graph/load/model_manager/task_info/memcpy_async_task_info.h
index 43b5ba13..728305ff 100755
--- a/ge/graph/load/new_model_manager/task_info/memcpy_async_task_info.h
+++ b/ge/graph/load/model_manager/task_info/memcpy_async_task_info.h
@@ -17,7 +17,7 @@
 #ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_MEMCPY_ASYNC_TASK_INFO_H_
 #define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_MEMCPY_ASYNC_TASK_INFO_H_
 
-#include "graph/load/new_model_manager/task_info/task_info.h"
+#include "graph/load/model_manager/task_info/task_info.h"
 #include "graph/op_desc.h"
 
 namespace ge {
diff --git a/ge/graph/load/new_model_manager/task_info/model_exit_task_info.cc b/ge/graph/load/model_manager/task_info/model_exit_task_info.cc
similarity index 93%
rename from ge/graph/load/new_model_manager/task_info/model_exit_task_info.cc
rename to ge/graph/load/model_manager/task_info/model_exit_task_info.cc
index ff8057aa..eb200e3f 100644
--- a/ge/graph/load/new_model_manager/task_info/model_exit_task_info.cc
+++ b/ge/graph/load/model_manager/task_info/model_exit_task_info.cc
@@ -14,11 +14,11 @@
  * limitations under the License.
  */
 
-#include "graph/load/new_model_manager/task_info/model_exit_task_info.h"
+#include "graph/load/model_manager/task_info/model_exit_task_info.h"
 
 #include "common/properties_manager.h"
 #include "framework/common/debug/ge_log.h"
-#include "graph/load/new_model_manager/davinci_model.h"
+#include "graph/load/model_manager/davinci_model.h"
 
 namespace ge {
 Status ModelExitTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) {
diff --git a/ge/graph/load/new_model_manager/task_info/model_exit_task_info.h b/ge/graph/load/model_manager/task_info/model_exit_task_info.h
similarity index 94%
rename from ge/graph/load/new_model_manager/task_info/model_exit_task_info.h
rename to ge/graph/load/model_manager/task_info/model_exit_task_info.h
index c219fcc8..1e4a3923 100644
--- a/ge/graph/load/new_model_manager/task_info/model_exit_task_info.h
+++ b/ge/graph/load/model_manager/task_info/model_exit_task_info.h
@@ -17,7 +17,7 @@
 #ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_MODEL_EXIT_TASK_INFO_H_
 #define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_MODEL_EXIT_TASK_INFO_H_
 
-#include "graph/load/new_model_manager/task_info/task_info.h"
+#include "graph/load/model_manager/task_info/task_info.h"
 
 namespace ge {
 class ModelExitTaskInfo : public TaskInfo {
diff --git a/ge/graph/load/new_model_manager/task_info/profiler_trace_task_info.cc b/ge/graph/load/model_manager/task_info/profiler_trace_task_info.cc
similarity index 93%
rename from ge/graph/load/new_model_manager/task_info/profiler_trace_task_info.cc
rename to ge/graph/load/model_manager/task_info/profiler_trace_task_info.cc
index 533c459a..b8fd1828 100755
--- a/ge/graph/load/new_model_manager/task_info/profiler_trace_task_info.cc
+++ b/ge/graph/load/model_manager/task_info/profiler_trace_task_info.cc
@@ -14,10 +14,10 @@
  * limitations under the License.
  */
 
-#include "graph/load/new_model_manager/task_info/profiler_trace_task_info.h"
+#include "graph/load/model_manager/task_info/profiler_trace_task_info.h"
 
 #include "framework/common/debug/ge_log.h"
-#include "graph/load/new_model_manager/davinci_model.h"
+#include "graph/load/model_manager/davinci_model.h"
 
 namespace ge {
 Status ProfilerTraceTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) {
diff --git a/ge/graph/load/new_model_manager/task_info/profiler_trace_task_info.h b/ge/graph/load/model_manager/task_info/profiler_trace_task_info.h
similarity index 95%
rename from ge/graph/load/new_model_manager/task_info/profiler_trace_task_info.h
rename to ge/graph/load/model_manager/task_info/profiler_trace_task_info.h
index 8989096d..b57ebfae 100755
--- a/ge/graph/load/new_model_manager/task_info/profiler_trace_task_info.h
+++ b/ge/graph/load/model_manager/task_info/profiler_trace_task_info.h
@@ -16,7 +16,7 @@
 
 #ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_PROFILER_TRACE_TASK_INFO_H_
 #define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_PROFILER_TRACE_TASK_INFO_H_
-#include "graph/load/new_model_manager/task_info/task_info.h"
+#include "graph/load/model_manager/task_info/task_info.h"
 
 namespace ge {
 class ProfilerTraceTaskInfo : public TaskInfo {
diff --git a/ge/graph/load/new_model_manager/task_info/stream_active_task_info.cc b/ge/graph/load/model_manager/task_info/stream_active_task_info.cc
similarity index 95%
rename from ge/graph/load/new_model_manager/task_info/stream_active_task_info.cc
rename to ge/graph/load/model_manager/task_info/stream_active_task_info.cc
index 33ebea3b..ec807777 100755
--- a/ge/graph/load/new_model_manager/task_info/stream_active_task_info.cc
+++ b/ge/graph/load/model_manager/task_info/stream_active_task_info.cc
@@ -14,12 +14,12 @@
  * limitations under the License.
  */
 
-#include "graph/load/new_model_manager/task_info/stream_active_task_info.h"
+#include "graph/load/model_manager/task_info/stream_active_task_info.h"
 
 #include <vector>
 
 #include "framework/common/debug/ge_log.h"
-#include "graph/load/new_model_manager/davinci_model.h"
+#include "graph/load/model_manager/davinci_model.h"
 #include "graph/debug/ge_attr_define.h"
 
 namespace ge {
diff --git a/ge/graph/load/new_model_manager/task_info/stream_active_task_info.h b/ge/graph/load/model_manager/task_info/stream_active_task_info.h
similarity index 95%
rename from ge/graph/load/new_model_manager/task_info/stream_active_task_info.h
rename to ge/graph/load/model_manager/task_info/stream_active_task_info.h
index c6b263b4..dfbf48d1 100755
--- a/ge/graph/load/new_model_manager/task_info/stream_active_task_info.h
+++ b/ge/graph/load/model_manager/task_info/stream_active_task_info.h
@@ -16,7 +16,7 @@
 
 #ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_STREAM_ACTIVE_TASK_INFO_H_
 #define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_STREAM_ACTIVE_TASK_INFO_H_
-#include "graph/load/new_model_manager/task_info/task_info.h"
+#include "graph/load/model_manager/task_info/task_info.h"
 
 namespace ge {
 class StreamActiveTaskInfo : public TaskInfo {
diff --git a/ge/graph/load/new_model_manager/task_info/stream_switch_task_info.cc b/ge/graph/load/model_manager/task_info/stream_switch_task_info.cc
similarity index 97%
rename from ge/graph/load/new_model_manager/task_info/stream_switch_task_info.cc
rename to ge/graph/load/model_manager/task_info/stream_switch_task_info.cc
index 616ba85f..f129950a 100644
--- a/ge/graph/load/new_model_manager/task_info/stream_switch_task_info.cc
+++ b/ge/graph/load/model_manager/task_info/stream_switch_task_info.cc
@@ -14,13 +14,13 @@
  * limitations under the License.
  */
 
-#include "graph/load/new_model_manager/task_info/stream_switch_task_info.h"
+#include "graph/load/model_manager/task_info/stream_switch_task_info.h"
 
 #include <vector>
 
 #include "framework/common/debug/ge_log.h"
-#include "graph/load/new_model_manager/davinci_model.h"
-#include "graph/load/new_model_manager/model_utils.h"
+#include "graph/load/model_manager/davinci_model.h"
+#include "graph/load/model_manager/model_utils.h"
 #include "graph/debug/ge_attr_define.h"
 
 namespace ge {
diff --git a/ge/graph/load/new_model_manager/task_info/stream_switch_task_info.h b/ge/graph/load/model_manager/task_info/stream_switch_task_info.h
similarity index 96%
rename from ge/graph/load/new_model_manager/task_info/stream_switch_task_info.h
rename to ge/graph/load/model_manager/task_info/stream_switch_task_info.h
index a72d7de2..0e75e183 100755
--- a/ge/graph/load/new_model_manager/task_info/stream_switch_task_info.h
+++ b/ge/graph/load/model_manager/task_info/stream_switch_task_info.h
@@ -16,7 +16,7 @@
 
 #ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_STREAM_SWITCH_TASK_INFO_H_
 #define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_STREAM_SWITCH_TASK_INFO_H_
-#include "graph/load/new_model_manager/task_info/task_info.h"
+#include "graph/load/model_manager/task_info/task_info.h"
 
 namespace ge {
 class StreamSwitchTaskInfo : public TaskInfo {
diff --git a/ge/graph/load/new_model_manager/task_info/stream_switchn_task_info.cc b/ge/graph/load/model_manager/task_info/stream_switchn_task_info.cc
similarity index 97%
rename from ge/graph/load/new_model_manager/task_info/stream_switchn_task_info.cc
rename to ge/graph/load/model_manager/task_info/stream_switchn_task_info.cc
index 27adbbe4..35eb23e3 100755
--- a/ge/graph/load/new_model_manager/task_info/stream_switchn_task_info.cc
+++ b/ge/graph/load/model_manager/task_info/stream_switchn_task_info.cc
@@ -13,12 +13,12 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "graph/load/new_model_manager/task_info/stream_switchn_task_info.h"
+#include "graph/load/model_manager/task_info/stream_switchn_task_info.h"
 #include <vector>
 #include "framework/common/debug/ge_log.h"
 #include "graph/debug/ge_attr_define.h"
-#include "graph/load/new_model_manager/davinci_model.h"
-#include "graph/load/new_model_manager/model_utils.h"
+#include "graph/load/model_manager/davinci_model.h"
+#include "graph/load/model_manager/model_utils.h"
 
 namespace {
 const uint8_t kStreamSwitchnInputNum = 1;
diff --git a/ge/graph/load/new_model_manager/task_info/stream_switchn_task_info.h b/ge/graph/load/model_manager/task_info/stream_switchn_task_info.h
similarity index 96%
rename from ge/graph/load/new_model_manager/task_info/stream_switchn_task_info.h
rename to ge/graph/load/model_manager/task_info/stream_switchn_task_info.h
index 3d65a086..6e6ca190 100755
--- a/ge/graph/load/new_model_manager/task_info/stream_switchn_task_info.h
+++ b/ge/graph/load/model_manager/task_info/stream_switchn_task_info.h
@@ -17,7 +17,7 @@
 #ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_STREAM_SWITCHN_TASK_INFO_H_
 #define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_STREAM_SWITCHN_TASK_INFO_H_
 
-#include "graph/load/new_model_manager/task_info/task_info.h"
+#include "graph/load/model_manager/task_info/task_info.h"
 #include "graph/op_desc.h"
 
 namespace ge {
diff --git a/ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc b/ge/graph/load/model_manager/task_info/super_kernel/super_kernel.cc
similarity index 100%
rename from ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc
rename to ge/graph/load/model_manager/task_info/super_kernel/super_kernel.cc
diff --git a/ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel.h b/ge/graph/load/model_manager/task_info/super_kernel/super_kernel.h
similarity index 100%
rename from ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel.h
rename to ge/graph/load/model_manager/task_info/super_kernel/super_kernel.h
diff --git a/ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc b/ge/graph/load/model_manager/task_info/super_kernel/super_kernel_factory.cc
similarity index 100%
rename from ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc
rename to ge/graph/load/model_manager/task_info/super_kernel/super_kernel_factory.cc
diff --git a/ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.h b/ge/graph/load/model_manager/task_info/super_kernel/super_kernel_factory.h
similarity index 100%
rename from ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.h
rename to ge/graph/load/model_manager/task_info/super_kernel/super_kernel_factory.h
diff --git a/ge/graph/load/new_model_manager/task_info/task_info.cc b/ge/graph/load/model_manager/task_info/task_info.cc
similarity index 94%
rename from ge/graph/load/new_model_manager/task_info/task_info.cc
rename to ge/graph/load/model_manager/task_info/task_info.cc
index 674d477f..e521f95c 100755
--- a/ge/graph/load/new_model_manager/task_info/task_info.cc
+++ b/ge/graph/load/model_manager/task_info/task_info.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "graph/load/new_model_manager/task_info/task_info.h"
+#include "graph/load/model_manager/task_info/task_info.h"
 
 #include <vector>
 
diff --git a/ge/graph/load/new_model_manager/task_info/task_info.h b/ge/graph/load/model_manager/task_info/task_info.h
similarity index 96%
rename from ge/graph/load/new_model_manager/task_info/task_info.h
rename to ge/graph/load/model_manager/task_info/task_info.h
index 26f22564..99ec3c4e 100644
--- a/ge/graph/load/new_model_manager/task_info/task_info.h
+++ b/ge/graph/load/model_manager/task_info/task_info.h
@@ -22,8 +22,8 @@
 #include "cce/customize.h"
 #include "framework/common/taskdown_common.h"
 #include "framework/common/ge_inner_error_codes.h"
-#include "graph/load/new_model_manager/ts_mem_mall.h"
-#include "graph/load/new_model_manager/task_info/task_info_factory.h"
+#include "graph/load/model_manager/ts_mem_mall.h"
+#include "graph/load/model_manager/task_info/task_info_factory.h"
 #include "proto/task.pb.h"
 
 namespace ge {
diff --git a/ge/graph/load/new_model_manager/task_info/task_info_factory.h b/ge/graph/load/model_manager/task_info/task_info_factory.h
similarity index 100%
rename from ge/graph/load/new_model_manager/task_info/task_info_factory.h
rename to ge/graph/load/model_manager/task_info/task_info_factory.h
diff --git a/ge/graph/load/new_model_manager/tbe_handle_store.cc b/ge/graph/load/model_manager/tbe_handle_store.cc
similarity index 100%
rename from ge/graph/load/new_model_manager/tbe_handle_store.cc
rename to ge/graph/load/model_manager/tbe_handle_store.cc
diff --git a/ge/graph/load/new_model_manager/tbe_handle_store.h b/ge/graph/load/model_manager/tbe_handle_store.h
similarity index 100%
rename from ge/graph/load/new_model_manager/tbe_handle_store.h
rename to ge/graph/load/model_manager/tbe_handle_store.h
diff --git a/ge/graph/load/new_model_manager/ts_mem_mall.h b/ge/graph/load/model_manager/ts_mem_mall.h
similarity index 100%
rename from ge/graph/load/new_model_manager/ts_mem_mall.h
rename to ge/graph/load/model_manager/ts_mem_mall.h
diff --git a/ge/graph/load/new_model_manager/zero_copy_offset.cc b/ge/graph/load/model_manager/zero_copy_offset.cc
similarity index 98%
rename from ge/graph/load/new_model_manager/zero_copy_offset.cc
rename to ge/graph/load/model_manager/zero_copy_offset.cc
index f27d862d..3f8555bb 100644
--- a/ge/graph/load/new_model_manager/zero_copy_offset.cc
+++ b/ge/graph/load/model_manager/zero_copy_offset.cc
@@ -14,12 +14,12 @@
  * limitations under the License.
  */
 
-#include "graph/load/new_model_manager/zero_copy_offset.h"
+#include "graph/load/model_manager/zero_copy_offset.h"
 
 #include "framework/common/debug/ge_log.h"
 #include "framework/common/util.h"
-#include "graph/load/new_model_manager/model_utils.h"
-#include "graph/load/new_model_manager/zero_copy_task.h"
+#include "graph/load/model_manager/model_utils.h"
+#include "graph/load/model_manager/zero_copy_task.h"
 
 namespace ge {
 namespace {
diff --git a/ge/graph/load/new_model_manager/zero_copy_offset.h b/ge/graph/load/model_manager/zero_copy_offset.h
similarity index 98%
rename from ge/graph/load/new_model_manager/zero_copy_offset.h
rename to ge/graph/load/model_manager/zero_copy_offset.h
index 66fcd887..fc63fced 100644
--- a/ge/graph/load/new_model_manager/zero_copy_offset.h
+++ b/ge/graph/load/model_manager/zero_copy_offset.h
@@ -25,7 +25,7 @@
 #include "external/ge/ge_api_error_codes.h"
 #include "framework/common/ge_types.h"
 #include "graph/debug/ge_attr_define.h"
-#include "graph/load/new_model_manager/zero_copy_task.h"
+#include "graph/load/model_manager/zero_copy_task.h"
 #include "graph/utils/attr_utils.h"
 #include "graph/utils/tensor_utils.h"
 #include "runtime/mem.h"
diff --git a/ge/graph/load/new_model_manager/zero_copy_task.cc b/ge/graph/load/model_manager/zero_copy_task.cc
similarity index 97%
rename from ge/graph/load/new_model_manager/zero_copy_task.cc
rename to ge/graph/load/model_manager/zero_copy_task.cc
index b938f14b..367de87a 100755
--- a/ge/graph/load/new_model_manager/zero_copy_task.cc
+++ b/ge/graph/load/model_manager/zero_copy_task.cc
@@ -14,11 +14,11 @@
  * limitations under the License.
  */
 
-#include "graph/load/new_model_manager/zero_copy_task.h"
+#include "graph/load/model_manager/zero_copy_task.h"
 
 #include "framework/common/debug/ge_log.h"
 #include "framework/common/util.h"
-#include "graph/load/new_model_manager/model_utils.h"
+#include "graph/load/model_manager/model_utils.h"
 #include "common/ge_compiler_options.h"
 
 namespace ge {
diff --git a/ge/graph/load/new_model_manager/zero_copy_task.h b/ge/graph/load/model_manager/zero_copy_task.h
similarity index 100%
rename from ge/graph/load/new_model_manager/zero_copy_task.h
rename to ge/graph/load/model_manager/zero_copy_task.h
diff --git a/ge/graph/manager/graph_caching_allocator.cc b/ge/graph/manager/graph_caching_allocator.cc
index d6027a08..bfef4001 100644
--- a/ge/graph/manager/graph_caching_allocator.cc
+++ b/ge/graph/manager/graph_caching_allocator.cc
@@ -100,14 +100,14 @@ Status CachingAllocator::Initialize(uint32_t device_id) {
     }
     auto bin_ptr = new (std::nothrow) BlockBin(BlockComparator);
     if (bin_ptr == nullptr) {
-      GELOGE(ge::FAILED, "Alloc BlockBin failed.");
-      return ge::FAILED;
+      GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Alloc BlockBin failed.");
+      return ACL_ERROR_GE_MEMORY_ALLOCATION;
     }
     free_block_bins_[i] = bin_ptr;
   }
   memory_allocator_ = MemManager::Instance(memory_type_);
   if (memory_allocator_ == nullptr) {
-    return ge::FAILED;
+    return ACL_ERROR_GE_INTERNAL_ERROR;
   }
   return ge::SUCCESS;
 }
diff --git a/ge/graph/manager/graph_manager.cc b/ge/graph/manager/graph_manager.cc
index b0d412dc..410611b0 100755
--- a/ge/graph/manager/graph_manager.cc
+++ b/ge/graph/manager/graph_manager.cc
@@ -92,6 +92,7 @@
 #include "graph/passes/unused_args_clean_pass.h"
 #include "graph/passes/global_step_insert_pass.h"
 #include "graph/passes/memcpy_addr_async_pass.h"
+#include "graph/passes/hccl_memcpy_pass.h"
 #include "graph/build/label_allocator.h"
 #include "graph/utils/tensor_adapter.h"
 #include "inc/pass_manager.h"
@@ -2150,6 +2151,8 @@ Status GraphManager::OptimizeStage1(ge::ComputeGraphPtr &compute_graph) {
                                                new (std::nothrow) TransOpWithoutReshapeFusionPass))
   GE_CHK_STATUS_RET(after_merge_passes.AddPass("OptimizeStage1_1::TransOpBreadthFusionPass",
                                                new (std::nothrow) TransOpBreadthFusionPass))
+  GE_CHK_STATUS_RET(
+      after_merge_passes.AddPass("OptimizeStage1_1::HcclMemcpyPass", new (std::nothrow) HcclMemcpyPass));
 
   GE_TIMESTAMP_START(after_merge_passes);
   auto ret = after_merge_passes.Run(compute_graph);
@@ -2776,7 +2779,7 @@ Status GraphManager::ParseInputsDimsForGetNexNosinkAndData(const vector<NodePtr>
     }
 
     GetLocalOmgContext().user_real_input_dims.emplace_back(input_tensor.at(index).dims);
-    GELOGI("Shape dims of %d data is %s.", index, formats::JoinToString(input_tensor.at(index).dims).c_str());
+    GELOGI("Shape dims of %zu data is %s.", index, formats::JoinToString(input_tensor.at(index).dims).c_str());
   }
   return SUCCESS;
 }
@@ -3121,9 +3124,8 @@ Status GraphManager::Build(const GraphNodePtr &graph_node, ComputeGraphPtr &comp
     graph_name.append(std::to_string(graph_node->GetGraphId()));
     compute_graph->SetName(graph_name);
   }
-  std::vector<SubGraphInfoPtr> sub_graph_list;
-  auto ret = GetCompilerStages(graph_node->GetGraphId()).builder.Build(compute_graph, sub_graph_list, ge_root_model,
-                                                                      session_id);
+
+  auto ret = GetCompilerStages(graph_node->GetGraphId()).builder.Build(compute_graph, ge_root_model, session_id);
   if (ret != SUCCESS) {
     GELOGE(ret, "SubGraph build Failed.");
     return ret;
diff --git a/ge/graph/manager/graph_mem_allocator.cc b/ge/graph/manager/graph_mem_allocator.cc
index f3037299..428b08ae 100755
--- a/ge/graph/manager/graph_mem_allocator.cc
+++ b/ge/graph/manager/graph_mem_allocator.cc
@@ -64,9 +64,10 @@ uint8_t *MemoryAllocator::MallocMemory(const string &purpose, size_t memory_size
 
 Status MemoryAllocator::FreeMemory(uint8_t *memory_addr, uint32_t device_id) const {
   GELOGI("MemoryAllocator::FreeMemory device_id = %u", device_id);
-  if (rtFree(memory_addr) != RT_ERROR_NONE) {
-    GELOGE(ge::INTERNAL_ERROR, "MemoryAllocator::MallocMemory device_id = %u", device_id);
-    return ge::INTERNAL_ERROR;
+  auto rtRet = rtFree(memory_addr);
+  if (rtRet != RT_ERROR_NONE) {
+    GELOGE(rtRet, "MemoryAllocator::MallocMemory device_id = %u", device_id);
+    return RT_ERROR_TO_GE_STATUS(rtRet);
   }
   memory_addr = nullptr;
   return ge::SUCCESS;
@@ -168,31 +169,36 @@ Status MemManager::Initialize(const std::vector<rtMemType_t> &memory_type) {
         memory_allocator_map_[index] = memory_allocator;
         GELOGI("Create MemoryAllocator memory type[%u] success.", index);
       } else {
-        GELOGE(ge::INTERNAL_ERROR, "Alloc MemoryAllocator failed.");
+        GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Alloc MemoryAllocator failed.");
       }
     } else {
       memory_allocator = it->second;
     }
 
     if (memory_allocator == nullptr) {
-      GELOGE(ge::INTERNAL_ERROR, "Create MemoryAllocator failed.");
-      return ge::INTERNAL_ERROR;
+      GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Create MemoryAllocator failed.");
+      return ACL_ERROR_GE_MEMORY_ALLOCATION;
     } else {
       memory_allocator->Initialize(0);
     }
   }
 
-  if (InitAllocator(memory_type, caching_allocator_map_) != SUCCESS) {
-    GELOGE(ge::INTERNAL_ERROR, "Create CachingAllocator failed.");
-    return ge::INTERNAL_ERROR;
+  auto ret = InitAllocator(memory_type, caching_allocator_map_);
+  if (ret != SUCCESS) {
+    GELOGE(ret, "Create CachingAllocator failed.");
+    return ret;
   }
-  if (InitAllocator(memory_type, rdma_allocator_map_) != SUCCESS) {
-    GELOGE(ge::INTERNAL_ERROR, "Create RdmaAllocator failed.");
-    return ge::INTERNAL_ERROR;
+
+  ret = InitAllocator(memory_type, rdma_allocator_map_);
+  if (ret != SUCCESS) {
+    GELOGE(ret, "Create RdmaAllocator failed.");
+    return ret;
   }
-  if (InitAllocator(memory_type, host_allocator_map_) != SUCCESS) {
-    GELOGE(ge::INTERNAL_ERROR, "Create HostMemAllocator failed.");
-    return ge::INTERNAL_ERROR;
+
+  ret = InitAllocator(memory_type, host_allocator_map_);
+  if (ret != SUCCESS) {
+    GELOGE(ret, "Create HostMemAllocator failed.");
+    return ret;
   }
   return SUCCESS;
 }
@@ -229,7 +235,7 @@ MemoryAllocator *MemManager::GetMemoryAllocator(rtMemType_t memory_type) {
 
   // Usually impossible
   if (memory_allocator == nullptr) {
-    GELOGE(ge::INTERNAL_ERROR, "GetMemoryAllocator failed, memory type is %u.", memory_type);
+    GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "GetMemoryAllocator failed, memory type is %u.", memory_type);
     static MemoryAllocator default_memory_allocator(RT_MEMORY_RESERVED);
     return &default_memory_allocator;
   }
diff --git a/ge/graph/manager/graph_mem_allocator.h b/ge/graph/manager/graph_mem_allocator.h
index bd75dbb9..d3468e75 100644
--- a/ge/graph/manager/graph_mem_allocator.h
+++ b/ge/graph/manager/graph_mem_allocator.h
@@ -192,18 +192,18 @@ class MemManager {
           allocate_map[index] = allocator;
           GELOGI("Create Allocator memory type[%u] success.", index);
         } else {
-          GELOGE(INTERNAL_ERROR, "Alloc Allocator failed.");
+          GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Alloc Allocator failed.");
         }
       } else {
         allocator = it->second;
       }
 
       if (allocator == nullptr) {
-        GELOGE(INTERNAL_ERROR, "Create Allocator failed.");
-        return INTERNAL_ERROR;
+        GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Create Allocator failed.");
+        return ACL_ERROR_GE_MEMORY_ALLOCATION;
       } else {
         if (allocator->Initialize() != SUCCESS) {
-          return INTERNAL_ERROR;
+          return ACL_ERROR_GE_INTERNAL_ERROR;
         }
       }
     }
diff --git a/ge/graph/manager/graph_var_manager.cc b/ge/graph/manager/graph_var_manager.cc
index 821de257..d0292885 100755
--- a/ge/graph/manager/graph_var_manager.cc
+++ b/ge/graph/manager/graph_var_manager.cc
@@ -16,17 +16,10 @@
 
 #include "graph/manager/graph_var_manager.h"
 
-#include <utility>
-
-#include "common/l2_cache_optimize.h"
-#include "common/types.h"
-#include "framework/common/debug/ge_log.h"
-#include "framework/common/debug/log.h"
-#include "ge/ge_api_types.h"
 #include "graph/debug/ge_attr_define.h"
 #include "graph/manager/graph_mem_allocator.h"
+#include "graph/manager/rdma_pool_allocator.h"
 #include "graph/manager/trans_var_data_utils.h"
-#include "graph/utils/attr_utils.h"
 #include "graph/utils/type_utils.h"
 
 using std::map;
@@ -37,7 +30,7 @@ namespace ge {
 VarResource::VarResource(uint64_t session_id) : session_id_(session_id) {}
 
 VarResource::~VarResource() {
-  var_offset_set_.clear();
+  var_offset_map_.clear();
   var_addr_mgr_map_.clear();
   cur_var_tensor_desc_map_.clear();
   var_broad_cast_info_.clear();
@@ -91,8 +84,10 @@ ge::Status VarResource::SaveVarAddr(const std::string &var_name, const ge::GeTen
   std::string var_key = VarKey(var_name, tensor_desc);
   GELOGD("VarResource::SaveVarAddr, var_key = %s", var_key.c_str());
   if (var_addr_mgr_map_.count(var_key) == 0) {
-    uint64_t logic_address = VarManager::Instance(session_id_)->GetVarMemLogicBase() +
-                             static_cast<uint64_t>(reinterpret_cast<std::uintptr_t>(address));
+    uint64_t logic_address = static_cast<uint64_t>(reinterpret_cast<std::uintptr_t>(address));
+    if (memory_type != RT_MEMORY_RDMA_HBM) {
+      logic_address += VarManager::Instance(session_id_)->GetVarMemLogicBase();
+    }
     GELOGI("SaveVarAddr node_name %s, tensor_desc format %s, type %s.", var_name.c_str(),
            TypeUtils::FormatToSerialString(tensor_desc.GetFormat()).c_str(),
            TypeUtils::DataTypeToSerialString(tensor_desc.GetDataType()).c_str());
@@ -102,7 +97,7 @@ ge::Status VarResource::SaveVarAddr(const std::string &var_name, const ge::GeTen
     var_addr_mgr.tensor_desc = tensor_desc;
     var_addr_mgr.memory_type = memory_type;
     var_addr_mgr_map_[var_key] = var_addr_mgr;
-    var_offset_set_.insert(logic_address);
+    var_offset_map_[logic_address] = memory_type;
 
     return SUCCESS;
   }
@@ -211,7 +206,14 @@ ge::Status VarResource::SyncVarData(uint32_t graph_id, const std::string &var_na
   return SyncVarData2BroadCast(graph_id, var_name, var_tensor_desc, base_ptr);
 }
 
-bool VarResource::IsVarAddr(const int64_t &offset) { return var_offset_set_.count(offset) > 0; }
+bool VarResource::IsVarAddr(const int64_t &offset) { return var_offset_map_.count(offset) > 0; }
+
+rtMemType_t VarResource::GetVarMemType(const int64_t &offset) {
+  if (var_offset_map_.count(offset) > 0) {
+    return var_offset_map_[offset];
+  }
+  return RT_MEMORY_RESERVED;
+}
 
 VarTransRoad *VarResource::GetTransRoad(const std::string &var_name) {
   auto iter = var_to_trans_road_.find(var_name);
@@ -252,7 +254,19 @@ Status VarResource::SetAllocatedGraphId(const std::string &var_name, uint32_t gr
 
 MemResource::MemResource() : total_size_(0), var_mem_size_(0) {}
 
-Status MemResource::AssignVarMem(const std::string &var_name, uint64_t size, uint64_t session_id, size_t &mem_offset) {
+MemResource *MemResource::BuildMemResourceFromType(rtMemType_t mem_type) {
+  switch (mem_type) {
+    case RT_MEMORY_HBM:
+      return new (std::nothrow) HbmMemResource();
+    case RT_MEMORY_RDMA_HBM:
+      return new (std::nothrow) RdmaMemResource();
+    default:
+      return nullptr;
+  }
+}
+
+Status HbmMemResource::AssignVarMem(const std::string &var_name, uint64_t size, uint64_t session_id,
+                                    size_t &mem_offset) {
   size = (size + kSessionMemAlignSize - 1) / kSessionMemAlignSize * kSessionMemAlignSize;
   uint64_t real_size = size;
   total_size_ = VarManager::Instance(session_id)->GetVarMemMaxSize();
@@ -282,6 +296,19 @@ Status MemResource::AssignVarMem(const std::string &var_name, uint64_t size, uin
   return SUCCESS;
 }
 
+Status RdmaMemResource::AssignVarMem(const std::string &var_name, uint64_t size, uint64_t session_id, size_t &address) {
+  uint8_t *buffer = MemManager::Instance().RdmaPoolInstance(RT_MEMORY_HBM).Malloc(size);
+  if (buffer == nullptr) {
+    GELOGE(MEMALLOC_FAILED, "Failed to malloc rdma memory for node %s, size = %lu", var_name.c_str(), size);
+    return MEMALLOC_FAILED;
+  }
+  address = static_cast<size_t>(reinterpret_cast<uintptr_t>(buffer));
+  var_mem_size_ += size;
+  GELOGI("[IMAS]AssignVarMem Set session_%lu name[%s] output[%d] addr to [%p] size[%lu].",
+         session_id, var_name.c_str(), 0, buffer, size);
+  return SUCCESS;
+}
+
 uint64_t MemResource::GetVarMemSize() const { return var_mem_size_; }
 
 void MemResource::UpdateVarMemSize(int64_t mem_size) { var_mem_size_ = mem_size; };
@@ -428,7 +455,7 @@ Status VarManager::UpdateVarMemSize(rtMemType_t memory_type, int64_t mem_size) {
   MemResource *mem_resource = nullptr;
   auto iter = mem_resource_map_.find(memory_type);
   if (iter == mem_resource_map_.end()) {
-    mem_resource = new (std::nothrow) MemResource();
+    mem_resource = MemResource::BuildMemResourceFromType(memory_type);
     if (mem_resource == nullptr) {
       GELOGE(ge::INTERNAL_ERROR, "Alloc MemResource failed, memory_type = %u.", memory_type);
       return ge::INTERNAL_ERROR;
@@ -465,7 +492,7 @@ ge::Status VarManager::AssignVarMem(const std::string &var_name, const ge::GeTen
   MemResource *mem_resource = nullptr;
   auto it = mem_resource_map_.find(memory_type);
   if (it == mem_resource_map_.end()) {
-    mem_resource = new (std::nothrow) MemResource();
+    mem_resource = MemResource::BuildMemResourceFromType(memory_type);
     if (mem_resource == nullptr) {
       GELOGE(ge::INTERNAL_ERROR, "Alloc MemResource failed, memory_type = %u.", memory_type);
       return ge::INTERNAL_ERROR;
@@ -629,6 +656,15 @@ bool VarManager::IsVarAddr(const int64_t &offset) {
   return var_resource_->IsVarAddr(offset);
 }
 
+rtMemType_t VarManager::GetVarMemType(const int64_t &offset) {
+  std::lock_guard<std::recursive_mutex> lock(mutex_);
+  if (var_resource_ == nullptr) {
+    GELOGW("VarManager has not been init.");
+    return RT_MEMORY_RESERVED;
+  }
+  return var_resource_->GetVarMemType(offset);
+}
+
 ge::Status VarManager::MallocVarMemory(size_t memory_size) {
   std::lock_guard<std::recursive_mutex> lock(mutex_);
   uint8_t *var_mem_base = nullptr;
@@ -654,12 +690,18 @@ ge::Status VarManager::MallocVarMemory(size_t memory_size) {
 
 uint8_t *VarManager::GetVarMemoryBase(rtMemType_t memory_type) {
   std::lock_guard<std::recursive_mutex> lock(mutex_);
+  if (memory_type == RT_MEMORY_RDMA_HBM) {
+    return MemManager::Instance().RdmaPoolInstance(RT_MEMORY_HBM).GetRdmaBaseAddr();
+  }
   string memory_key = std::to_string(session_id_);
   return MemManager::Instance(memory_type)->GetMemoryAddr(memory_key);
 }
 
 uint8_t *VarManager::GetVarMemoryAddr(uint8_t *logic_addr, rtMemType_t memory_type) {
   std::lock_guard<std::recursive_mutex> lock(mutex_);
+  if (memory_type == RT_MEMORY_RDMA_HBM) {
+    return logic_addr;
+  }
   string mem_key = std::to_string(session_id_);
   uint8_t *mem_base = MemManager::Instance(memory_type)->GetMemoryAddr(mem_key);
   if (mem_base == nullptr) {
diff --git a/ge/graph/manager/graph_var_manager.h b/ge/graph/manager/graph_var_manager.h
index 9cf0068c..924ddcb7 100755
--- a/ge/graph/manager/graph_var_manager.h
+++ b/ge/graph/manager/graph_var_manager.h
@@ -158,13 +158,15 @@ class VarResource {
 
   bool IsVarAddr(const int64_t &offset);
 
+  rtMemType_t GetVarMemType(const int64_t &offset);
+
   std::unordered_map<std::string, ge::GeTensorDesc> GetAllVarDesc() const { return cur_var_tensor_desc_map_; }
 
  private:
   std::string VarKey(const std::string &var_name, const ge::GeTensorDesc &tensor_desc);
 
   uint64_t session_id_;
-  std::unordered_set<uint64_t> var_offset_set_;
+  std::unordered_map<uint64_t, rtMemType_t> var_offset_map_;
   std::unordered_map<std::string, VarAddrMgr> var_addr_mgr_map_;
   std::unordered_map<std::string, ge::GeTensorDesc> cur_var_tensor_desc_map_;
   std::unordered_map<std::string, std::vector<TransNodeInfo>> var_to_trans_road_;
@@ -176,19 +178,36 @@ class VarResource {
 class MemResource {
  public:
   MemResource();
-  ~MemResource() = default;
+  virtual ~MemResource() = default;
+  static MemResource *BuildMemResourceFromType(rtMemType_t mem_type);
 
-  Status AssignVarMem(const std::string &var_name, uint64_t size, uint64_t session_id, size_t &mem_offset);
+  virtual Status AssignVarMem(const std::string &var_name, uint64_t size, uint64_t session_id, size_t &mem_offset) = 0;
 
   uint64_t GetVarMemSize() const;
 
   void UpdateVarMemSize(int64_t mem_size);
 
- private:
+ protected:
   uint64_t total_size_;
   uint64_t var_mem_size_;
 };
 
+class HbmMemResource : public MemResource {
+ public:
+  HbmMemResource() = default;
+  ~HbmMemResource() override = default;
+
+  Status AssignVarMem(const std::string &var_name, uint64_t size, uint64_t session_id, size_t &address) override;
+};
+
+class RdmaMemResource : public MemResource {
+ public:
+  RdmaMemResource() = default;
+  ~RdmaMemResource() override = default;
+
+  Status AssignVarMem(const std::string &var_name, uint64_t size, uint64_t session_id, size_t &address) override;
+};
+
 class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY VarManager {
  public:
   static VarManager *Instance(uint64_t session_id);
@@ -275,6 +294,8 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY VarManager {
 
   bool IsVarAddr(const int64_t &offset);
 
+  rtMemType_t GetVarMemType(const int64_t &offset);
+
   uint8_t *GetVarMemoryBase(rtMemType_t memory_type);
 
   uint8_t *GetVarMemoryAddr(uint8_t *logic_addr, rtMemType_t memory_type);
diff --git a/ge/graph/manager/rdma_pool_allocator.cc b/ge/graph/manager/rdma_pool_allocator.cc
index 93d1fd1d..ed243801 100644
--- a/ge/graph/manager/rdma_pool_allocator.cc
+++ b/ge/graph/manager/rdma_pool_allocator.cc
@@ -51,7 +51,7 @@ RdmaPoolAllocator::RdmaPoolAllocator(rtMemType_t memory_type)
 Status RdmaPoolAllocator::Initialize() {
   memory_allocator_ = MemManager::Instance(memory_type_);
   if (memory_allocator_ == nullptr) {
-    return ge::FAILED;
+    return ACL_ERROR_GE_INTERNAL_ERROR;
   }
   return ge::SUCCESS;
 }
diff --git a/ge/graph/manager/rdma_pool_allocator.h b/ge/graph/manager/rdma_pool_allocator.h
index 4d8cf71e..0a895a11 100644
--- a/ge/graph/manager/rdma_pool_allocator.h
+++ b/ge/graph/manager/rdma_pool_allocator.h
@@ -53,6 +53,10 @@ class RdmaPoolAllocator {
 
   Status GetBaseAddr(uint64_t &base_addr, uint64_t &mem_size);
 
+  uint8_t *GetRdmaBaseAddr() { return rdma_base_addr_; }
+
+  size_t GetRdmaMemSize() { return rdma_mem_size_; }
+
  private:
   void MergeBlocks(Block *dst, Block *src);
 
diff --git a/ge/graph/partition/dynamic_shape_partition.cc b/ge/graph/partition/dynamic_shape_partition.cc
index 6c81b21f..2a60765f 100755
--- a/ge/graph/partition/dynamic_shape_partition.cc
+++ b/ge/graph/partition/dynamic_shape_partition.cc
@@ -51,6 +51,13 @@ using ClusterPtr = std::shared_ptr<Cluster>;
 static bool IsInExperimentalMode(const ComputeGraphPtr &root_graph) {
   for (const auto &node : root_graph->GetAllNodes()) {
     GE_CHECK_NOTNULL(node->GetOpDesc());
+    // not do partition in single op scene.
+    bool is_singleop = false;
+    (void)AttrUtils::GetBool(node->GetOpDesc(), ATTR_SINGLE_OP_SCENE, is_singleop);
+    if (is_singleop) {
+      return false;
+    }
+
     for (const auto &input_desc : node->GetOpDesc()->GetAllInputsDesc()) {
       auto type = input_desc.GetDataType();
       if (type == DT_STRING || type == DT_RESOURCE || type == DT_STRING_REF) {
@@ -213,6 +220,7 @@ std::string DynamicShapePartitioner::DebugString() const {
   size_t data = 0;
   size_t netoutput = 0;
   size_t is_inputnode = 0;
+  size_t stage = 0;
   std::stringstream ss;
   ss << "All unknown shape nodes:" << std::endl;
   for (const auto &node : unknown_shape_nodes_) {
@@ -229,10 +237,13 @@ std::string DynamicShapePartitioner::DebugString() const {
       netoutput++;
     } else if (cluster->IsInputNode()) {
       is_inputnode++;
+    } else if (cluster->IsIndependent()) {
+      stage++;
     }
   }
   ss << "All clusters:" << unique_clusters_.size() << ", data:" << data << ", known:" << known
-     << ", unknown:" << unknown << ", netoutput:" << netoutput << ", is_inputnode:" << is_inputnode << std::endl;
+     << ", unknown:" << unknown << ", netoutput:" << netoutput << ", is_inputnode:" << is_inputnode
+     << ", stage:" << stage << std::endl;
   for (const auto &cluster : unique_clusters_) {
     ss << "  " << cluster->DebugString() << std::endl;
   }
@@ -272,12 +283,15 @@ Status DynamicShapePartitioner::InitClusters() {
   for (const auto &node : graph->GetDirectNode()) {
     Cluster::Type type = Cluster::DATA;
     bool is_input = ((node->GetType() == CONSTANT) || (node->GetType() == CONSTANTOP)) && node->GetInNodes().empty();
+    REQUIRE_NOT_NULL(node->GetOpDesc(), "op_desc is null");
     if (node->GetType() == DATA) {
       type = Cluster::DATA;
     } else if (is_input) {
       type = Cluster::INPUT_NODE;
     } else if (node->GetType() == NETOUTPUT) {
       type = Cluster::NETOUTPUT;
+    } else if ((node->GetType() == PARTITIONEDCALL) && (node->GetOpDesc()->HasAttr(ATTR_STAGE_LEVEL))) {
+      type = Cluster::STAGE;
     } else if (unknown_shape_nodes_.count(node) > 0) {
       type = Cluster::UNKNOWN_SHAPE;
     } else {
@@ -360,6 +374,9 @@ static std::string ToString(const std::vector<ClusterPtr> &clusters) {
 void DynamicShapePartitioner::MergeClustersUnknownShape() {
   // Merge unknown shape clusters
   for (const auto &cluster : ordered_cluster_) {
+    if (cluster->IsIndependent()) {
+      continue;
+    }
     for (const auto &in_cluster : cluster->Inputs()) {
       if (!in_cluster->IsUnknownShape()) {
         continue;
@@ -379,6 +396,9 @@ void DynamicShapePartitioner::MergeClustersUnknownShape() {
 void DynamicShapePartitioner::MergeClustersKnownShape() {
   // Merge known shape clusters
   for (const auto &cluster : ordered_cluster_) {
+    if (cluster->IsIndependent()) {
+      continue;
+    }
     if (cluster->IsRefVariable() && cluster->Inputs().size() == 1) {
       auto in_cluster = *(cluster->Inputs().begin());
       in_cluster->Merge(cluster);
@@ -606,6 +626,7 @@ void Cluster::UpdateRank(size_t rank) {
 bool Cluster::IsData() const { return type_ == DATA; };
 bool Cluster::IsKnownShape() const { return type_ == KNOWN_SHAPE; };
 bool Cluster::IsUnknownShape() const { return type_ == UNKNOWN_SHAPE; };
+bool Cluster::IsIndependent() const { return type_ == STAGE; };
 bool Cluster::IsNetOutput() const { return type_ == NETOUTPUT; };
 bool Cluster::IsInputNode() const { return type_ == INPUT_NODE; };
 bool Cluster::IsRefVariable() const {
@@ -641,6 +662,9 @@ void Cluster::RemoveOutput(ClusterPtr out) {
                           out->in_clusters_.end());
 };
 void Cluster::Merge(ClusterPtr other) {
+  if (other->IsIndependent()) {
+    return;
+  }
   nodes_.insert(nodes_.end(), other->nodes_.begin(), other->nodes_.end());
   other->in_clusters_.erase(std::remove(other->in_clusters_.begin(), other->in_clusters_.end(), shared_from_this()),
                             other->in_clusters_.end());
@@ -689,7 +713,9 @@ std::vector<ClusterPtr> Cluster::MergeAllPathFrom(ClusterPtr other) {
   std::unordered_set<ClusterPtr> forward_reached_clusters;
   std::unordered_set<ClusterPtr> backward_reached_clusters;
   std::vector<ClusterPtr> path_clusters;
-
+  if (other->IsIndependent()) {
+    return path_clusters;
+  }
   if (std::find(other->out_clusters_.begin(), other->out_clusters_.end(), shared_from_this()) ==
       other->out_clusters_.end()) {
     return path_clusters;
@@ -772,7 +798,7 @@ Status Cluster::BuildFrame() {
         }
       }
     }
-    if (IsData()) {
+    if (IsData() || IsIndependent()) {
       for (const auto &anchor : node->GetAllOutDataAnchors()) {
         AddFrameOutput(anchor);
       }
@@ -888,7 +914,7 @@ Status Cluster::CombinePartitionFrame() {
 }
 
 Status Cluster::BuildPartitionSubgraph() {
-  if (IsData() || IsNetOutput()) {
+  if (IsData() || IsNetOutput() || IsIndependent()) {
     return SUCCESS;
   }
   int64_t parent_node_index = 0;
diff --git a/ge/graph/partition/dynamic_shape_partition.h b/ge/graph/partition/dynamic_shape_partition.h
index 9772615e..e8408ff9 100644
--- a/ge/graph/partition/dynamic_shape_partition.h
+++ b/ge/graph/partition/dynamic_shape_partition.h
@@ -32,7 +32,7 @@ class DynamicShapePartitioner {
   // DATA:DATA, UNKNOWN_SHAPE:unknowshape, KNOWN_SHAPE:knowshape, NETOUTPUT:NETOUTPUT.
   class Cluster : public std::enable_shared_from_this<Cluster> {
    public:
-    enum Type { DATA, INPUT_NODE, NETOUTPUT, KNOWN_SHAPE, UNKNOWN_SHAPE };
+    enum Type { DATA, INPUT_NODE, NETOUTPUT, STAGE, KNOWN_SHAPE, UNKNOWN_SHAPE };
     Cluster(size_t rank, Type type, NodePtr node, DynamicShapePartitioner *partitioner)
         : id_(rank), min_(rank), max_(rank), type_(type), partitioner_(partitioner) {
       nodes_.push_back(node);
@@ -45,6 +45,7 @@ class DynamicShapePartitioner {
     bool IsData() const;
     bool IsKnownShape() const;
     bool IsUnknownShape() const;
+    bool IsIndependent() const;
     bool IsNetOutput() const;
     std::vector<std::shared_ptr<Cluster>> Inputs() const;
     std::vector<std::shared_ptr<Cluster>> Outputs() const;
diff --git a/ge/graph/partition/stage_partition.cc b/ge/graph/partition/stage_partition.cc
index 93a06afe..f6e49bbd 100644
--- a/ge/graph/partition/stage_partition.cc
+++ b/ge/graph/partition/stage_partition.cc
@@ -25,6 +25,10 @@
 #include "common/types.h"
 
 namespace ge {
+namespace {
+const std::set<std::string> kSrcNodeTypes = { DATA, AIPPDATA, ANN_DATA };
+}
+
 Status StagePartitioner::Partition() {
   GE_CHECK_NOTNULL(root_graph_);
   if (root_graph_->GetParentGraph() != nullptr) {
@@ -37,6 +41,10 @@ Status StagePartitioner::Partition() {
     if (!AttrUtils::GetInt(op_desc, ATTR_STAGE_LEVEL, level)) {
       continue;
     }
+    if ((kSrcNodeTypes.count(op_desc->GetType()) != 0) && node->GetInAllNodes().empty()) {
+      continue;
+    }
+    GELOGD("original node %s for stage %u", node->GetName().c_str(), level);
     stage_nodes_[level].insert(node);
   }
   if (stage_nodes_.empty()) {
@@ -54,6 +62,13 @@ Status StagePartitioner::Partition() {
     return FAILED;
   }
 
+  root_graph_->TopologicalSorting([](const NodePtr &a, const NodePtr &b) -> bool {
+    uint32_t a_level = UINT32_MAX;
+    (void)AttrUtils::GetInt(a->GetOpDesc(), ATTR_STAGE_LEVEL, a_level);
+    uint32_t b_level = UINT32_MAX;
+    (void)AttrUtils::GetInt(b->GetOpDesc(), ATTR_STAGE_LEVEL, b_level);
+    return a_level < b_level;
+  });
   if (root_graph_->TopologicalSorting() != GRAPH_SUCCESS) {
     GELOGE(FAILED, "Topological sort for graph %s after stage partition failed, "
            "maybe stage_level was not set correctly.", root_graph_->GetName().c_str());
@@ -76,20 +91,26 @@ Status StagePartitioner::SplitStageLevel() {
       auto node = nodes.top();
       nodes.pop();
       GE_CHECK_NOTNULL(node->GetOpDesc());
-      if (node->GetOpDesc()->HasAttr(ATTR_STAGE_LEVEL) && (cur_stage_nodes.count(node) == 0)) {
+      uint32_t tmp_level = cur_stage_level;
+      (void)AttrUtils::GetInt(node->GetOpDesc(), ATTR_STAGE_LEVEL, tmp_level);
+      if (tmp_level != cur_stage_level) {
         continue;
       }
       for (const auto &in_node : node->GetInAllNodes()) {
         if (visited_stage_nodes.count(in_node) != 0) {
           continue;
         }
+        if (!AttrUtils::SetInt(in_node->GetOpDesc(), ATTR_STAGE_LEVEL, cur_stage_level)) {
+          GELOGE(INTERNAL_ERROR, "Set attr ATTR_STAGE_LEVEL on node %s failed.", in_node->GetName().c_str());
+          return INTERNAL_ERROR;
+        }
+        GELOGD("Mark stage_level node %s, stage_level=%u", in_node->GetName().c_str(), cur_stage_level);
+        if ((kSrcNodeTypes.count(in_node->GetType()) != 0) && in_node->GetInAllNodes().empty()) {
+          GELOGD("skip data node %s for stage %u", in_node->GetName().c_str(), cur_stage_level);
+          continue;
+        }
         nodes.push(in_node);
       }
-      if (!AttrUtils::SetInt(node->GetOpDesc(), ATTR_STAGE_LEVEL, cur_stage_level)) {
-        GELOGE(INTERNAL_ERROR, "Set attr ATTR_STAGE_LEVEL on node %s failed.", node->GetName().c_str());
-        return INTERNAL_ERROR;
-      }
-      GELOGD("Mark stage_level node %s, stage_level=%u", node->GetName().c_str(), cur_stage_level);
       visited_stage_nodes.emplace(node);
     }
     for (const auto &node : visited_stage_nodes) {
@@ -219,6 +240,11 @@ NodePtr StagePartitioner::BuildSubgraphNode(const std::string &graph_name, const
   op_desc->AddSubgraphName("f");
   op_desc->SetSubgraphInstanceName(0, graph_name);
 
+  if (!AttrUtils::SetInt(op_desc, ATTR_STAGE_LEVEL, stage_info.stage_level)) {
+    GELOGE(INTERNAL_ERROR, "Set attr ATTR_STAGE_LEVEL on node %s failed", op_desc->GetName().c_str());
+    return nullptr;
+  }
+
   NodePtr subgraph_node = root_graph_->AddNode(op_desc);
   if (subgraph_node == nullptr) {
     GELOGE(FAILED, "Add node %s failed.", graph_name.c_str());
diff --git a/ge/graph/passes/common_subexpression_elimination_pass.cc b/ge/graph/passes/common_subexpression_elimination_pass.cc
index 7d9724fc..3587b03e 100644
--- a/ge/graph/passes/common_subexpression_elimination_pass.cc
+++ b/ge/graph/passes/common_subexpression_elimination_pass.cc
@@ -26,9 +26,6 @@
 
 namespace ge {
 namespace {
-std::set<std::string> un_compute_attrs = {
-    {ATTR_NAME_DATA_DUMP_ORIGIN_OP_NAMES},
-};
 
 std::string GetCseKey(const NodePtr &node) {
   std::stringstream ss;
@@ -53,7 +50,7 @@ std::string GetCseKey(const NodePtr &node) {
     ss << name << "-";
   }
 
-  ss << "attrs-" << AttrUtils::GetAttrsStrAfterRid(node->GetOpDesc(), un_compute_attrs);
+  ss << "attrs-" << AttrUtils::GetAllAttrsStr(node->GetOpDesc());
 
   return ss.str();
 }
diff --git a/ge/graph/passes/cond_remove_pass.cc b/ge/graph/passes/cond_remove_pass.cc
index bf2e1170..9ecc79a6 100644
--- a/ge/graph/passes/cond_remove_pass.cc
+++ b/ge/graph/passes/cond_remove_pass.cc
@@ -203,7 +203,7 @@ bool CondRemovePass::CheckIfCondConstInput(const OutDataAnchorPtr &cond_out_anch
   // Get weights from peer node
   auto weights = OpDescUtils::GetWeights(out_node);
   if (weights.size() <= static_cast<size_t>(cond_out_anchor->GetIdx())) {
-    GELOGI("Get weights of node %s out index %d, weight size %u is not fit for data index %d.",
+    GELOGI("Get weights of node %s out index %d, weight size %zu is not fit for data index %d.",
            out_node->GetName().c_str(), cond_out_anchor->GetIdx(), weights.size(), cond_out_anchor->GetIdx());
     return false;
   }
@@ -234,14 +234,14 @@ Status CondRemovePass::ReplaceIfCaseNodeWithPartitioncall(const NodePtr &node, c
   const auto &output_desc_size = node->GetOpDesc()->GetOutputsSize();
   // Create subgraph opdesc & node
   auto partitioncall_opdesc =
-      CreateSubgraphOpDesc(save_branch->GetName(), input_desc_size - kConditionIndexNum, output_desc_size);
+      CreateSubgraphOpDesc(node, save_branch->GetName(), input_desc_size - kConditionIndexNum, output_desc_size);
   auto partitioncall_node = node->GetOwnerComputeGraph()->AddNode(partitioncall_opdesc);
   // Link node's peerout anchors to new node's inanchors
   for (const auto &input_anchor : node->GetAllInAnchors()) {
     for (const auto &peerout_anchor : input_anchor->GetPeerAnchors()) {
       if (GraphUtils::AddEdge(peerout_anchor, partitioncall_node->GetInAnchor(
                                                   input_anchor->GetIdx() - kConditionIndexNum)) != ge::GRAPH_SUCCESS) {
-        GELOGE(FAILED, "Add edge failed, from node:%s idx:%d to node:%s idx:%d, input num:%d, output num:%d",
+        GELOGE(FAILED, "Add edge failed, from node:%s idx:%d to node:%s idx:%d, input num:%zu, output num:%zu",
                peerout_anchor->GetOwnerNode()->GetName().c_str(), peerout_anchor->GetIdx(),
                partitioncall_node->GetName().c_str(), input_anchor->GetIdx(), input_desc_size,
                output_desc_size);
@@ -254,14 +254,14 @@ Status CondRemovePass::ReplaceIfCaseNodeWithPartitioncall(const NodePtr &node, c
   for (const auto &output_anchor : node->GetAllOutAnchors()) {
     for (const auto &peerin_anchor : output_anchor->GetPeerAnchors()) {
       if (GraphUtils::RemoveEdge(node->GetOutAnchor(output_anchor->GetIdx()), peerin_anchor) != ge::GRAPH_SUCCESS) {
-        GELOGE(FAILED, "Remove edge failed, from node:%s idx:%d to node:%s idx:%d, input num:%d, output num:%d",
+        GELOGE(FAILED, "Remove edge failed, from node:%s idx:%d to node:%s idx:%d, input num:%zu, output num:%zu",
                node->GetName().c_str(), output_anchor->GetIdx(), peerin_anchor->GetOwnerNode()->GetName().c_str(),
                peerin_anchor->GetIdx(), input_desc_size, output_desc_size);
         return FAILED;
       }
       if (GraphUtils::AddEdge(partitioncall_node->GetOutAnchor(output_anchor->GetIdx()), peerin_anchor) !=
           ge::GRAPH_SUCCESS) {
-        GELOGE(FAILED, "Add edge failed, from node:%s idx:%d to node:%s idx:%d, input num:%d, output num:%d",
+        GELOGE(FAILED, "Add edge failed, from node:%s idx:%d to node:%s idx:%d, input num:%zu, output num:%zu",
                partitioncall_node->GetName().c_str(), output_anchor->GetIdx(),
                peerin_anchor->GetOwnerNode()->GetName().c_str(), peerin_anchor->GetIdx(), input_desc_size,
                output_desc_size);
@@ -289,7 +289,8 @@ Status CondRemovePass::ReplaceIfCaseNodeWithPartitioncall(const NodePtr &node, c
 /// @param [in] output_num
 /// @return OpDescPtr
 ///
-OpDescPtr CondRemovePass::CreateSubgraphOpDesc(const std::string &name, size_t input_num, size_t output_num) {
+OpDescPtr CondRemovePass::CreateSubgraphOpDesc(const NodePtr &node, const std::string &name, size_t input_num,
+                                               size_t output_num) {
   OpDescBuilder op_desc_builder(name, PARTITIONEDCALL);
   op_desc_builder.AddDynamicInput("args", input_num).AddDynamicOutput("output", output_num);
 
@@ -299,6 +300,16 @@ OpDescPtr CondRemovePass::CreateSubgraphOpDesc(const std::string &name, size_t i
   size_t index = op_desc->GetSubgraphInstanceNames().size();
   op_desc->AddSubgraphName("f");
   op_desc->SetSubgraphInstanceName(static_cast<uint32_t>(index), name);
+
+  auto node_desc = node->GetOpDesc();
+  GE_CHECK_NOTNULL_EXEC(node_desc, return nullptr);
+  for (size_t i = 0; i < input_num; ++i) {
+    (void)op_desc->UpdateInputDesc(i, node_desc->GetInputDesc(i + 1));
+  }
+  for (size_t i = 0; i < output_num; ++i) {
+    (void)op_desc->UpdateOutputDesc(i, node_desc->GetOutputDesc(i));
+  }
+
   return op_desc;
 }
 
diff --git a/ge/graph/passes/cond_remove_pass.h b/ge/graph/passes/cond_remove_pass.h
index 72ca64b8..e466d684 100644
--- a/ge/graph/passes/cond_remove_pass.h
+++ b/ge/graph/passes/cond_remove_pass.h
@@ -70,7 +70,7 @@ class CondRemovePass : public BaseNodePass {
   ///
   Status ReplaceIfCaseNodeWithPartitioncall(const NodePtr &node, const ComputeGraphPtr &save_branch);
 
-  OpDescPtr CreateSubgraphOpDesc(const std::string &name, size_t input_num, size_t output_num);
+  OpDescPtr CreateSubgraphOpDesc(const NodePtr &node, const std::string &name, size_t input_num, size_t output_num);
 
   int32_t GetCondIndex(const ConstGeTensorPtr &tensor);
 };
diff --git a/ge/graph/passes/dynamic_single_op_reset_shape_pass.cc b/ge/graph/passes/dynamic_single_op_reset_shape_pass.cc
index 6fa63642..293fd132 100644
--- a/ge/graph/passes/dynamic_single_op_reset_shape_pass.cc
+++ b/ge/graph/passes/dynamic_single_op_reset_shape_pass.cc
@@ -58,9 +58,9 @@ Status DynamicSingleOpResetShapePass::Run(ComputeGraphPtr graph) {
       continue;
     }
 
-    // pass node without attr: ATTR_DYNAMIC_SHAPE_SINGLE_AICPU
+    // pass node without attr: ATTR_SINGLE_OP_SCENE
     bool single_aicpu_unknown = false;
-    if (!AttrUtils::GetBool(node->GetOpDesc(), ATTR_DYNAMIC_SHAPE_SINGLE_AICPU, single_aicpu_unknown) ||
+    if (!AttrUtils::GetBool(node->GetOpDesc(), ATTR_SINGLE_OP_SCENE, single_aicpu_unknown) ||
         !single_aicpu_unknown) {
       continue;
     }
diff --git a/ge/graph/passes/for_pass.cc b/ge/graph/passes/for_pass.cc
index 31dee390..3b7a0886 100644
--- a/ge/graph/passes/for_pass.cc
+++ b/ge/graph/passes/for_pass.cc
@@ -469,7 +469,7 @@ Status ForPass::BuildWhileLink(const WhileInfo &while_info) {
       continue;
     }
     GE_CHK_GRAPH_STATUS_RET(GraphUtils::AddEdge(peer_out_anchor, in_data_anchor),
-                            "Add data-edge %s:%d->%s:%d failed.",
+                            "Add data-edge %s:%d->%s:%zu failed.",
                             peer_out_anchor->GetOwnerNode()->GetName().c_str(), peer_out_anchor->GetIdx(),
                             while_node->GetName().c_str(), i);
   }
@@ -480,7 +480,7 @@ Status ForPass::BuildWhileLink(const WhileInfo &while_info) {
     GE_CHECK_NOTNULL(out_data_anchor);
     for (auto &peer_in_anchor : while_info.data_outputs[i]) {
       GE_CHK_GRAPH_STATUS_RET(GraphUtils::AddEdge(out_data_anchor, peer_in_anchor),
-                              "Add data-edge %s:%d->%s:%d failed.",
+                              "Add data-edge %s:%zu->%s:%d failed.",
                               while_node->GetName().c_str(), i + kWhileOutputIndex,
                               peer_in_anchor->GetOwnerNode()->GetName().c_str(), peer_in_anchor->GetIdx());
     }
diff --git a/ge/graph/passes/hccl_memcpy_pass.cc b/ge/graph/passes/hccl_memcpy_pass.cc
index 21747f42..3f607f84 100755
--- a/ge/graph/passes/hccl_memcpy_pass.cc
+++ b/ge/graph/passes/hccl_memcpy_pass.cc
@@ -28,6 +28,8 @@
 namespace {
 const int32_t kAnchorSize = 1;
 const int kAnchorNum = 0;
+const int32_t kAnchorAssignRefIndex = 0;
+const int32_t kAnchorAssignValueIndex = 1;
 const char *const kInputMutable = "_input_mutable";
 }  // namespace
 namespace ge {
@@ -35,43 +37,147 @@ Status HcclMemcpyPass::Run(ge::ComputeGraphPtr graph) {
   GE_IF_BOOL_EXEC(graph == nullptr, GELOGE(PARAM_INVALID, "param [graph] must not be null."); return PARAM_INVALID);
   for (const auto &node : graph->GetDirectNode()) {
     auto op_desc = node->GetOpDesc();
-    GE_IF_BOOL_EXEC(op_desc == nullptr, continue);
+    if (op_desc == nullptr) {
+      GELOGE(INTERNAL_ERROR, "node has no op_desc, node_name : %s.", node->GetName().c_str());
+      return INTERNAL_ERROR;
+    }
+
+    Status ret = ContinuousInputProcess(graph, node);
+    if (ret != SUCCESS) {
+      GELOGE(INTERNAL_ERROR, "failed ProcessBroadcastMemcpy, node_name:%s.", node->GetName().c_str());
+      return ret;
+    }
+
+    ret = MutableInputProcess(graph, node);
+    if (ret != SUCCESS) {
+      GELOGE(INTERNAL_ERROR, "failed MutableInputProcess, node_name:%s.", node->GetName().c_str());
+      return ret;
+    }
+
+    ret = P2pmemInputProcess(graph, node);
+    if (ret != SUCCESS) {
+      GELOGE(INTERNAL_ERROR, "failed P2pmemInputProcess, node_name:%s.", node->GetName().c_str());
+      return ret;
+    }
+
+  }
+  return SUCCESS;
+}
+
+// If node has _input_mutable attr, means input mem may be modified when op execute.
+// In order to avoid to affect another op execute with same input when data modified,
+// need to inset memcpy node between.
+// also works on situation that input is variable or const.
+Status HcclMemcpyPass::MutableInputProcess(const ComputeGraphPtr &graph, const NodePtr node) {
+  auto op_desc = node->GetOpDesc();
+
+  bool node_input_mutable = false;
+  if (!AttrUtils::HasAttr(op_desc, kInputMutable)) {
+    return SUCCESS;
+  }
+
+  if (!AttrUtils::GetBool(op_desc, kInputMutable, node_input_mutable)) {
+    GELOGE(INTERNAL_ERROR, "node:%s get attr:_input_mutable failed.", node->GetName().c_str());
+    return FAILED;
+  }
+  if (!node_input_mutable) {
+    return SUCCESS;
+  }
 
-    bool node_input_mutable = false;
-    if (!AttrUtils::HasAttr(op_desc, kInputMutable)) {
+  GELOGI("input mutable hcom op is:%s.", op_desc->GetName().c_str());
+  for (auto &hccl_in_anchor : node->GetAllInDataAnchors()) {
+    if (hccl_in_anchor == nullptr) {
       continue;
     }
+    auto src_out_anchor = hccl_in_anchor->GetPeerOutAnchor();
+    GE_CHECK_NOTNULL(src_out_anchor);
 
-    GE_IF_BOOL_EXEC(!AttrUtils::GetBool(op_desc, kInputMutable, node_input_mutable),
-        GELOGE(INTERNAL_ERROR, "node:%s get attr:_input_mutable failed.", node->GetName().c_str()); return FAILED);
-    if (!node_input_mutable) {
+    int32_t src_out_anchor_size = src_out_anchor->GetPeerInDataAnchors().size();
+    if (src_out_anchor_size == kAnchorSize) {
+      // Identity needs to be inserted between constant (/data) and hcomallreduce to avoid constant being cleared.
+      if (IsDataNode(src_out_anchor->GetOwnerNode()->GetType())) {
+        Status ret = ModifyEdgeConnection(graph, src_out_anchor, hccl_in_anchor);
+        if (ret != SUCCESS) {
+          GELOGE(INTERNAL_ERROR, "Failed to modify the connection.");
+          return ret;
+        }
+      }
       continue;
     }
 
-    GELOGI("hcom op is:%s.", op_desc->GetName().c_str());
+    Status ret = ModifyEdgeConnection(graph, src_out_anchor, hccl_in_anchor);
+    if (ret != SUCCESS) {
+      GELOGE(INTERNAL_ERROR, "Failed to modify the connection.");
+      return ret;
+    }
+  }
+  return SUCCESS;
+}
+
+// If broadcast input size is bigger than 1, and input from variable,
+// cause by broadcast input memory should be continuous,
+// another featuremap mem will be allocated for broadcast input.
+// In this condition, move data from variable mem to broadcast input featuremap mem will be executed each step.
+// In order to avoid move action out of model, use memcpy node instead of move action code.
+Status HcclMemcpyPass::ContinuousInputProcess(const ComputeGraphPtr &graph, const NodePtr node) {
+  auto op_desc = node->GetOpDesc();
+
+  bool is_input_continuous = false;
+  (void)ge::AttrUtils::GetBool(op_desc, ATTR_NAME_CONTINUOUS_INPUT, is_input_continuous);
+
+  if (is_input_continuous && op_desc->GetInputsSize() > 1) {
+    GELOGI("continuous input op is:%s.", op_desc->GetName().c_str());
+    // if input size bigger than one, insert memcpy between var data for support continous mem alloc
     for (auto &hccl_in_anchor : node->GetAllInDataAnchors()) {
       if (hccl_in_anchor == nullptr) {
         continue;
       }
       auto src_out_anchor = hccl_in_anchor->GetPeerOutAnchor();
-      GE_CHECK_NOTNULL(src_out_anchor);
-
-      int32_t src_out_anchor_size = src_out_anchor->GetPeerInDataAnchors().size();
-      if (src_out_anchor_size == kAnchorSize) {
-        // Memcpyasync needs to be inserted between constant (/data) and hcomallreduce to avoid constant being cleared.
-        NodePtr src_node = src_out_anchor->GetOwnerNode();
-        std::string src_type = src_node->GetType();
-        bool check_src_type = (src_type == CONSTANTOP) || (src_type == DATA) || (src_type == CONSTANT);
-        if (check_src_type) {
-          Status ret = ModifyEdgeConnection(graph, src_out_anchor, hccl_in_anchor);
-          if (ret != SUCCESS) {
-            GELOGE(INTERNAL_ERROR, "Failed to modify the connection.");
-            return ret;
-          }
+      if (src_out_anchor == nullptr) {
+        GELOGE(INTERNAL_ERROR, "hcom op input has no peer anchor, node_name:%s", node->GetName().c_str());
+        return INTERNAL_ERROR;
+      }
+
+      if (IsDataNode(src_out_anchor->GetOwnerNode()->GetType())) {
+        Status ret = ModifyEdgeConnection(graph, src_out_anchor, hccl_in_anchor);
+        if (ret != SUCCESS) {
+          GELOGE(INTERNAL_ERROR, "Failed to modify the connection.");
+          return ret;
         }
-        continue;
       }
+    }
+  }
+  return SUCCESS;
+}
+
+// if input is var type, and node input need p2p mem, then memcpy should be insert between the two
+Status HcclMemcpyPass::P2pmemInputProcess(const ComputeGraphPtr &graph, const NodePtr node) {
+  auto op_desc = node->GetOpDesc();
 
+  vector<int64_t> input_memory_types;
+  (void) ge::AttrUtils::GetListInt(op_desc, ATTR_NAME_INPUT_MEM_TYPE_LIST, input_memory_types);
+
+  if (input_memory_types.empty()) {
+    return SUCCESS;
+  }
+
+  for (uint32_t index = 0; index < input_memory_types.size() && index < op_desc->GetInputsSize(); index++) {
+    if (input_memory_types[index] != RT_MEMORY_P2P_DDR) {
+      continue;
+    }
+
+    GELOGD("p2p input op is:%s.", op_desc->GetName().c_str());
+    auto hccl_in_anchor = node->GetInDataAnchor(index);
+    if (hccl_in_anchor == nullptr) {
+      continue;
+    }
+    auto src_out_anchor = hccl_in_anchor->GetPeerOutAnchor();
+    if (src_out_anchor == nullptr) {
+      GELOGE(INTERNAL_ERROR, "hcom op input has no peer anchor, node_name:%s", node->GetName().c_str());
+      return INTERNAL_ERROR;
+    }
+
+    if (IsDataNode(src_out_anchor->GetOwnerNode()->GetType())) {
       Status ret = ModifyEdgeConnection(graph, src_out_anchor, hccl_in_anchor);
       if (ret != SUCCESS) {
         GELOGE(INTERNAL_ERROR, "Failed to modify the connection.");
@@ -82,8 +188,12 @@ Status HcclMemcpyPass::Run(ge::ComputeGraphPtr graph) {
   return SUCCESS;
 }
 
+bool HcclMemcpyPass::IsDataNode(const std::string& node_type) {
+  return (node_type == CONSTANTOP) || (node_type == VARIABLE) || (node_type == DATA) || (node_type == CONSTANT);
+}
+
 ///
-/// @brief Add MemcpyAsync Node
+/// @brief Add Identity Node
 /// @param [in] ge::ComputeGraphPtr graph
 /// @param [in] ge::OutDataAnchorPtr in_node
 /// @return ge::NodePtr
@@ -101,20 +211,20 @@ NodePtr HcclMemcpyPass::CreateIdentityNode(const ComputeGraphPtr &graph, const O
   node_name = CheckDuplicateName(node_name);
   OpDescPtr op_desc = MakeShared<OpDesc>(node_name.c_str(), IDENTITY);
   if (op_desc == nullptr) {
-    GELOGE(INTERNAL_ERROR, "Create identity op: MakeShared op_desc fail.");
+    GELOGE(INTERNAL_ERROR, "Create Identity op: MakeShared op_desc fail.");
     return nullptr;
   }
-  GELOGI("Create identity op:%s.", op_desc->GetName().c_str());
+  GELOGI("Create Identity op:%s.", op_desc->GetName().c_str());
 
   graphStatus ret = op_desc->AddInputDesc("x", pre_op_desc->GetOutputDesc(out_data_anchor->GetIdx()));
   if (ret != GRAPH_SUCCESS) {
-    GELOGE(INTERNAL_ERROR, "Create identity op: add input desc fail.");
+    GELOGE(INTERNAL_ERROR, "Create Identity op: add input desc fail.");
     return nullptr;
   }
 
   ret = op_desc->AddOutputDesc("y", pre_op_desc->GetOutputDesc(out_data_anchor->GetIdx()));
   if (ret != GRAPH_SUCCESS) {
-    GELOGE(INTERNAL_ERROR, "Create identity op: add output desc fail.");
+    GELOGE(INTERNAL_ERROR, "Create Identity op: add output desc fail.");
     return nullptr;
   }
   // because history reason ,this pass can not do work after constant fold so mark it
@@ -122,7 +232,7 @@ NodePtr HcclMemcpyPass::CreateIdentityNode(const ComputeGraphPtr &graph, const O
 
   NodePtr memcpy_node = graph->AddNode(op_desc);
   if (memcpy_node == nullptr) {
-    GELOGE(INTERNAL_ERROR, "Insert identity node fail.");
+    GELOGE(INTERNAL_ERROR, "Insert Identity node fail.");
     return nullptr;
   }
 
@@ -155,7 +265,38 @@ std::string HcclMemcpyPass::CheckDuplicateName(const std::string &node_name) {
 ///
 Status HcclMemcpyPass::ModifyEdgeConnection(const ComputeGraphPtr &graph, const OutDataAnchorPtr &src_out_anchor,
                                             const InDataAnchorPtr &hccl_in_anchor) {
-  GELOGI("The op %s need insert memcpy async op.", src_out_anchor->GetOwnerNode()->GetName().c_str());
+  GE_CHECK_NOTNULL(src_out_anchor->GetOwnerNode());
+  GE_CHECK_NOTNULL(hccl_in_anchor->GetOwnerNode());
+
+  Status ret = InsertIdentityBeforeHccl(graph, src_out_anchor, hccl_in_anchor);
+  if (ret != SUCCESS) {
+    GELOGE(INTERNAL_ERROR, "add identity failed, var_node:%s, hccl_node:%s.",
+           src_out_anchor->GetOwnerNode()->GetName().c_str(),
+           hccl_in_anchor->GetOwnerNode()->GetName().c_str());
+    return ret;
+  }
+
+  ret = InsertAssignAfterBroadcastIfNeed(graph, src_out_anchor, hccl_in_anchor);
+  if (ret != SUCCESS) {
+    GELOGE(INTERNAL_ERROR, "add assign failed, var_node:%s, hccl_node:%s.",
+           src_out_anchor->GetOwnerNode()->GetName().c_str(),
+           hccl_in_anchor->GetOwnerNode()->GetName().c_str());
+    return ret;
+  }
+  return SUCCESS;
+}
+
+///
+/// @brief Insert Identity node Between Hccl node and variable
+/// @param [in] ComputeGraphPtr graph
+/// @param [in] OutDataAnchorPtr src_out_anchor
+/// @param [in] InDataAnchorPtr hccl_in_anchor
+/// @return status
+///
+Status HcclMemcpyPass::InsertIdentityBeforeHccl(const ComputeGraphPtr &graph, const OutDataAnchorPtr &src_out_anchor,
+                                                const InDataAnchorPtr &hccl_in_anchor) {
+  GELOGI("Between op %s and op %s need insert memcpy async op.", src_out_anchor->GetOwnerNode()->GetName().c_str(),
+         hccl_in_anchor->GetOwnerNode()->GetName().c_str());
   NodePtr memcpy_node = CreateIdentityNode(graph, src_out_anchor);
   GE_CHECK_NOTNULL(memcpy_node);
 
@@ -182,6 +323,141 @@ Status HcclMemcpyPass::ModifyEdgeConnection(const ComputeGraphPtr &graph, const
   }
   return SUCCESS;
 }
+
+///
+/// @brief Insert assign node after broadcast node and variable to refresh variable data
+/// @param [in] ComputeGraphPtr graph
+/// @param [in] OutDataAnchorPtr var_out_anchor
+/// @param [in] InDataAnchorPtr hccl_in_anchor
+/// @return status
+///
+Status HcclMemcpyPass::InsertAssignAfterBroadcastIfNeed(const ComputeGraphPtr &graph,
+                                                        const OutDataAnchorPtr &var_out_anchor,
+                                                        const InDataAnchorPtr &hccl_in_anchor) {
+  if (hccl_in_anchor->GetOwnerNode()->GetType() != HCOMBROADCAST) {
+    GELOGD("%s not broadcast, no need to insert assign node", hccl_in_anchor->GetOwnerNode()->GetName().c_str());
+    return SUCCESS;
+  }
+
+  if (var_out_anchor->GetOwnerNode()->GetType() != VARIABLE) {
+    GELOGD("%s not variable, no need to insert assign node", var_out_anchor->GetOwnerNode()->GetName().c_str());
+    return SUCCESS;
+  }
+
+  GELOGI("after op %s and op %s need insert assign op.", var_out_anchor->GetOwnerNode()->GetName().c_str(),
+         hccl_in_anchor->GetOwnerNode()->GetName().c_str());
+
+  for (auto peer_in_anchor : var_out_anchor->GetPeerInDataAnchors()) {
+    if (peer_in_anchor->GetOwnerNode()->GetType() == ASSIGN) {
+      GELOGD("variable %s out assign node is exist.", var_out_anchor->GetOwnerNode()->GetName().c_str());
+      return SUCCESS;
+    }
+  }
+
+  NodePtr assign_node = CreateAssignNode(graph, var_out_anchor);
+  GE_CHECK_NOTNULL(assign_node);
+
+  OutDataAnchorPtr hccl_out_anchor = hccl_in_anchor->GetOwnerNode()->GetOutDataAnchor(hccl_in_anchor->GetIdx());
+  GE_CHECK_NOTNULL(hccl_out_anchor);
+
+  Status ret = hccl_out_anchor->LinkTo(assign_node->GetInDataAnchor(kAnchorAssignValueIndex));
+  if (ret != SUCCESS) {
+    GELOGE(INTERNAL_ERROR, "The op %s link anchor %s fail.", hccl_out_anchor->GetOwnerNode()->GetName().c_str(),
+           assign_node->GetName().c_str());
+    return FAILED;
+  }
+
+  ret = var_out_anchor->LinkTo(assign_node->GetInDataAnchor(kAnchorAssignRefIndex));
+  if (ret != SUCCESS) {
+    GELOGE(INTERNAL_ERROR, "The op %s link anchor %s fail.", var_out_anchor->GetOwnerNode()->GetName().c_str(),
+           assign_node->GetName().c_str());
+    return FAILED;
+  }
+
+  // add control edge between assign node and node after broadcast node
+  OutControlAnchorPtr assign_out_control_anchor = assign_node->GetOutControlAnchor();
+  GE_CHECK_NOTNULL(assign_out_control_anchor);
+
+  for (auto in_data_anchor : hccl_out_anchor->GetPeerInDataAnchors()) {
+    if (in_data_anchor->GetOwnerNode()->GetName() == assign_node->GetName()) {
+      continue;
+    }
+    ret = assign_out_control_anchor->LinkTo(in_data_anchor->GetOwnerNode()->GetInControlAnchor());
+      if (ret != SUCCESS) {
+      GELOGE(INTERNAL_ERROR, "The op %s link control anchor %s fail.",
+             assign_out_control_anchor->GetOwnerNode()->GetName().c_str(),
+             in_data_anchor->GetOwnerNode()->GetName().c_str());
+      return FAILED;
+    }
+  }
+
+  for (auto in_control_anchor : hccl_out_anchor->GetOwnerNode()->GetOutControlAnchor()->GetPeerInControlAnchors()) {
+    if (in_control_anchor->GetOwnerNode()->GetName() == assign_node->GetName()) {
+      continue;
+    }
+    ret = assign_out_control_anchor->LinkTo(in_control_anchor);
+      if (ret != SUCCESS) {
+      GELOGE(INTERNAL_ERROR, "The op %s link control anchor %s fail.",
+             assign_out_control_anchor->GetOwnerNode()->GetName().c_str(),
+             in_control_anchor->GetOwnerNode()->GetName().c_str());
+      return FAILED;
+    }
+  }
+  return SUCCESS;
+}
+
+///
+/// @brief create assign Node, add to graph
+/// @param [in] ge::ComputeGraphPtr graph
+/// @param [in] ge::OutDataAnchorPtr variable node out anchor
+/// @return ge::NodePtr
+///
+NodePtr HcclMemcpyPass::CreateAssignNode(const ComputeGraphPtr &graph, const OutDataAnchorPtr &out_data_anchor) {
+  GE_IF_BOOL_EXEC(graph == nullptr, return nullptr);
+  NodePtr pre_node = out_data_anchor->GetOwnerNode();
+  OpDescPtr pre_op_desc = pre_node->GetOpDesc();
+  if (pre_op_desc == nullptr) {
+    GELOGE(INTERNAL_ERROR, "OpDesc of pre node is invalid.");
+    return nullptr;
+  }
+
+  std::string node_name = pre_node->GetName() + "_" + ASSIGN;
+  node_name = CheckDuplicateName(node_name);
+  OpDescPtr op_desc = MakeShared<OpDesc>(node_name.c_str(), ASSIGN);
+  if (op_desc == nullptr) {
+    GELOGE(INTERNAL_ERROR, "Create Assign op: MakeShared op_desc fail.");
+    return nullptr;
+  }
+  GELOGI("Create Assign op:%s.", op_desc->GetName().c_str());
+
+  graphStatus ret = op_desc->AddInputDesc("ref", pre_op_desc->GetOutputDesc(out_data_anchor->GetIdx()));
+  if (ret != GRAPH_SUCCESS) {
+    GELOGE(INTERNAL_ERROR, "Create Assign op: add ref input desc fail.");
+    return nullptr;
+  }
+
+  ret = op_desc->AddInputDesc("value", pre_op_desc->GetOutputDesc(out_data_anchor->GetIdx()));
+  if (ret != GRAPH_SUCCESS) {
+    GELOGE(INTERNAL_ERROR, "Create Assign op: add value input desc fail.");
+    return nullptr;
+  }
+
+  ret = op_desc->AddOutputDesc("ref", pre_op_desc->GetOutputDesc(out_data_anchor->GetIdx()));
+  if (ret != GRAPH_SUCCESS) {
+    GELOGE(INTERNAL_ERROR, "Create Assign op: add output desc fail.");
+    return nullptr;
+  }
+
+  NodePtr assign_node = graph->AddNode(op_desc);
+  if (assign_node == nullptr) {
+    GELOGE(INTERNAL_ERROR, "Insert Identity node fail.");
+    return nullptr;
+  }
+
+  return assign_node;
+}
+
+
 ///
 /// @brief Clear Status, used for subgraph pass
 /// @return SUCCESS
diff --git a/ge/graph/passes/hccl_memcpy_pass.h b/ge/graph/passes/hccl_memcpy_pass.h
index e73a5483..98e05964 100755
--- a/ge/graph/passes/hccl_memcpy_pass.h
+++ b/ge/graph/passes/hccl_memcpy_pass.h
@@ -32,11 +32,28 @@ class HcclMemcpyPass : public GraphPass {
  private:
   NodePtr CreateIdentityNode(const ComputeGraphPtr &graph, const OutDataAnchorPtr &out_data_anchor);
 
+  NodePtr CreateAssignNode(const ComputeGraphPtr &graph, const OutDataAnchorPtr &out_data_anchor);
+
   std::string CheckDuplicateName(const std::string &node_name);
 
   Status ModifyEdgeConnection(const ComputeGraphPtr &graph, const OutDataAnchorPtr &src_out_anchor,
           const InDataAnchorPtr &hccl_in_anchor);
 
+  Status InsertIdentityBeforeHccl(const ComputeGraphPtr &graph, const OutDataAnchorPtr &src_out_anchor,
+                                  const InDataAnchorPtr &hccl_in_anchor);
+
+  Status InsertAssignAfterBroadcastIfNeed(const ComputeGraphPtr &graph,
+                                          const OutDataAnchorPtr &src_out_anchor,
+                                          const InDataAnchorPtr &hccl_in_anchor);
+
+  Status ContinuousInputProcess(const ComputeGraphPtr &graph, const NodePtr node);
+
+  Status MutableInputProcess(const ComputeGraphPtr &graph, const NodePtr node);
+
+  Status P2pmemInputProcess(const ComputeGraphPtr &graph, const NodePtr node);
+
+  bool IsDataNode(const std::string& node_type);
+
   std::unordered_map<std::string, uint32_t> node_num_map_;
 };
 }  // namespace ge
diff --git a/ge/graph/passes/multi_batch_clone_pass.cc b/ge/graph/passes/multi_batch_clone_pass.cc
index b7efa070..17a1e3bb 100755
--- a/ge/graph/passes/multi_batch_clone_pass.cc
+++ b/ge/graph/passes/multi_batch_clone_pass.cc
@@ -928,7 +928,7 @@ Status MultiBatchClonePass::CreateOriGraph(const ComputeGraphPtr &graph) {
         auto out_data_anchor =  node->GetOutDataAnchor(out_index);
         GE_IF_BOOL_EXEC(out_data_anchor == nullptr, continue);
         NodePtr data_node = CreateDataNode(graph, out_data_anchor, data_index);
-        GE_IF_BOOL_EXEC(data_node == nullptr, GELOGE(INTERNAL_ERROR, "Create %zu data node failed.",
+        GE_IF_BOOL_EXEC(data_node == nullptr, GELOGE(INTERNAL_ERROR, "Create %d data node failed.",
                                                      out_data_anchor->GetIdx()); return INTERNAL_ERROR);
         for (auto &in_anchor : out_data_anchor->GetPeerInDataAnchors()) {
           GE_IF_BOOL_EXEC(in_anchor == nullptr, continue);
diff --git a/ge/graph/passes/remove_same_const_pass.cc b/ge/graph/passes/remove_same_const_pass.cc
index e75a4553..3d18a92d 100644
--- a/ge/graph/passes/remove_same_const_pass.cc
+++ b/ge/graph/passes/remove_same_const_pass.cc
@@ -85,7 +85,7 @@ Status RemoveSameConstPass::Run(ComputeGraphPtr graph) {
 
     ret = GraphUtils::ReplaceNodeAnchors(iter->second, node, {}, output_map);
     if (ret != GRAPH_SUCCESS) {
-      GELOGE(INTERNAL_ERROR, "Failed to replace node %s by node %s", node->GetName().c_str(),
+      GELOGE(INTERNAL_ERROR, "Failed to replace node %s by node %s, ret=%u", node->GetName().c_str(),
              iter->second->GetName().c_str(), ret);
       return INTERNAL_ERROR;
     }
diff --git a/ge/graph/passes/subgraph_pass.cc b/ge/graph/passes/subgraph_pass.cc
index d1111d52..3d83c301 100755
--- a/ge/graph/passes/subgraph_pass.cc
+++ b/ge/graph/passes/subgraph_pass.cc
@@ -142,17 +142,18 @@ Status SubgraphPass::SubgraphOutputNode(const ComputeGraphPtr &graph, const Node
     GE_CHECK_NOTNULL(in_node);
 
     // Need insert memcpy
-    //   1. Const->NetOutput in subgraph
+    //   1. Const->NetOutput in subgraph & parent graph is known
     //   2. AtomicOp->NetOutput in subgraph
     //   3. OutputContinuesRequiredOp->NetOutput in subgraph
     //   4. Data->NetOutput in subgraph but parent_node is not while
     //   5. While->NetOutput in known subgraph
     std::string op_type;
-    bool insert_flag = NodeUtils::GetConstOpType(in_node, op_type) ||
+    bool insert_flag =
+        (NodeUtils::GetConstOpType(in_node, op_type) && !graph->GetParentGraph()->GetGraphUnknownFlag()) ||
         IsAtomicRequired(in_node, peer_out_anchor->GetIdx()) || IsOutputContinuesRequired(in_node) ||
         ((in_node->GetType() == DATA) && (kWhileOpTypes.count(graph->GetParentNode()->GetType()) == 0)) ||
         (!graph->GetGraphUnknownFlag() && NodeUtils::IsDynamicShape(node) &&
-            (kWhileOpTypes.count(in_node->GetType()) != 0));
+        (kWhileOpTypes.count(in_node->GetType()) != 0));
     if (insert_flag) {
       GELOGD("Insert MemcpyAsync node between %s and %s.", in_node->GetName().c_str(), node->GetName().c_str());
       std::string name = node->GetName() + "_input_" + std::to_string(in_data_anchor->GetIdx()) + "_Memcpy";
@@ -310,7 +311,7 @@ Status SubgraphPass::InsertInputMemcpy(const ComputeGraphPtr &graph, const std::
 Status SubgraphPass::InsertOutputMemcpy(const ComputeGraphPtr &graph, const NodePtr &output_node,
                                         const std::set<uint32_t> &bypass_index) {
   if (output_node->GetAllInDataAnchorsSize() == bypass_index.size()) {
-    GELOGD("No need to insert output memcpy node in while_body %s, output_size=%zu, bypass_num=%zu.",
+    GELOGD("No need to insert output memcpy node in while_body %s, output_size=%u, bypass_num=%zu.",
            graph->GetName().c_str(), output_node->GetAllInDataAnchorsSize(), bypass_index.size());
     return SUCCESS;
   }
diff --git a/ge/graph/passes/variable_op_pass_bak.cc b/ge/graph/passes/variable_op_pass_bak.cc
deleted file mode 100644
index c9218296..00000000
--- a/ge/graph/passes/variable_op_pass_bak.cc
+++ /dev/null
@@ -1,811 +0,0 @@
-/**
- * Copyright 2020 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "graph/passes/variable_op_pass.h"
-#include <string>
-#include <vector>
-
-#include "common/formats/formats.h"
-#include "common/formats/utils/formats_trans_utils.h"
-#include "graph/ge_context.h"
-#include "graph/graph.h"
-#include "graph/manager/graph_var_manager.h"
-#include "graph/utils/graph_utils.h"
-#include "graph/utils/tensor_utils.h"
-#include "graph/utils/type_utils.h"
-
-namespace ge {
-namespace {
-const int kTransOpOutIndex = 0;
-
-Status ByPassTransNode(NodePtr &front_node, NodePtr &back_node) {
-  GE_CHECK_NOTNULL(front_node);
-  GE_CHECK_NOTNULL(back_node);
-  GELOGD("Begin to bypass trans node %s", front_node->GetName().c_str());
-  auto ret = GraphUtils::CopyInCtrlEdges(front_node, back_node);
-  if (ret != GRAPH_SUCCESS) {
-    GELOGE(INTERNAL_ERROR,
-           "Failed to move control edges from trans "
-           "node %s to var-ref %s",
-           front_node->GetName().c_str(), back_node->GetName().c_str());
-    return INTERNAL_ERROR;
-  }
-  auto back_node_in_anchor = back_node->GetInDataAnchor(0);
-  if (back_node_in_anchor == nullptr) {
-    GELOGE(INTERNAL_ERROR,
-           "The back node %s does not have an "
-           "input anchor",
-           back_node->GetName().c_str());
-    return INTERNAL_ERROR;
-  }
-  back_node_in_anchor->UnlinkAll();
-  auto trans_in_anchor = front_node->GetInDataAnchor(0);
-  if (trans_in_anchor == nullptr) {
-    GELOGE(INTERNAL_ERROR,
-           "Failed to get the in data anchor from trans"
-           " node %s type %s",
-           front_node->GetName().c_str(), front_node->GetType().c_str());
-    return INTERNAL_ERROR;
-  }
-  auto prev_trans_node_out_anchor = trans_in_anchor->GetPeerOutAnchor();
-  if (prev_trans_node_out_anchor == nullptr) {
-    GELOGW(
-        "The trans node %s does not have an input, so the ref node %s does"
-        " not have any inputs after bypass",
-        front_node->GetName().c_str(), front_node->GetName().c_str());
-  } else {
-    ret = GraphUtils::AddEdge(prev_trans_node_out_anchor, back_node_in_anchor);
-    if (ret != GRAPH_SUCCESS) {
-      GELOGE(INTERNAL_ERROR,
-             "Failed to add edge between ref node %s "
-             "and the prev node of trans node %s",
-             back_node->GetName().c_str(), front_node->GetName().c_str());
-      return INTERNAL_ERROR;
-    }
-  }
-  return SUCCESS;
-}
-
-bool IsTransSupport(const TransNodeInfo &trans_info) {
-  if (trans_info.output.GetShape().IsUnknownShape()) {
-    return false;
-  }
-  if (trans_info.node_type == RESHAPE || trans_info.node_type == REFORMAT) {
-    return true;
-  } else if (trans_info.node_type == TRANSDATA || trans_info.node_type == TRANSPOSED) {
-    formats::TransArgs args{nullptr,
-                            trans_info.input.GetFormat(),
-                            trans_info.output.GetFormat(),
-                            trans_info.input.GetShape().GetDims(),
-                            trans_info.output.GetShape().GetDims(),
-                            trans_info.input.GetDataType()};
-    return formats::IsTransFormatSupport(args);
-  } else if (trans_info.node_type == CAST) {
-    formats::CastArgs datatype_args{nullptr, static_cast<size_t>(trans_info.input.GetShape().GetShapeSize()),
-                                    trans_info.input.GetDataType(), trans_info.output.GetDataType()};
-    return formats::IsTransDataTypeSupport(datatype_args);
-  } else {
-    return false;
-  }
-}
-
-std::string GetInAndOutDecsDiff(NodePtr &trans_node, bool reverse = false) {
-  int tran_in_index = TransOpUtil::GetTransOpDataIndex(trans_node->GetType());
-  auto op_desc = trans_node->GetOpDesc();
-  GeTensorDesc input_desc = op_desc->GetInputDesc(tran_in_index);
-  GeTensorDesc output_desc = op_desc->GetOutputDesc(kTransOpOutIndex);
-  if (reverse) {
-    GeTensorDesc tmp_desc = input_desc;
-    input_desc = output_desc;
-    output_desc = tmp_desc;
-  }
-  auto input_format = input_desc.GetFormat();
-  auto input_type = input_desc.GetDataType();
-  auto input_shape = input_desc.GetShape();
-  auto output_format = output_desc.GetFormat();
-  auto output_type = output_desc.GetDataType();
-  auto output_shape = output_desc.GetShape();
-  std::stringstream diff_key;
-  diff_key.str("");
-  if (input_format != output_format) {
-    diff_key << static_cast<int>(input_format) << '-' << static_cast<int>(output_format) << '-';
-  } else {
-    diff_key << "*-";
-  }
-  if (input_type != output_type) {
-    diff_key << static_cast<int>(input_type) << '-' << static_cast<int>(output_type) << '-';
-  } else {
-    diff_key << "*-";
-  }
-  if (!ge::formats::IsShapeEqual(input_shape, output_shape)) {
-    for (auto dim : input_shape.GetDims()) {
-      diff_key << dim << '-';
-    }
-    for (auto dim : output_shape.GetDims()) {
-      diff_key << dim << '-';
-    }
-  } else {
-    diff_key << "*";
-  }
-  return diff_key.str();
-}
-}  // namespace
-
-Status VariableOpPass::Run(ge::ComputeGraphPtr graph) {
-  if (graph == nullptr) {
-    GELOGE(INTERNAL_ERROR, "Failed to run variable op pass, null graph");
-    return INTERNAL_ERROR;
-  }
-
-  GELOGD("Begin to run variable op pass on graph %s, session %lu, graph id %u", graph->GetName().c_str(),
-         GetContext().SessionId(), graph->GetGraphID());
-
-  if (var_accelerate_ctrl_ == nullptr) {
-    GELOGE(INTERNAL_ERROR, "Failed to run var op pass, the variable accelerate control is null");
-    return INTERNAL_ERROR;
-  }
-
-  GELOGD("Begin to generate ref map for variable and refs, graph name:%s.", graph->GetName().c_str());
-  if (RenewVarDesc(graph) != SUCCESS) {
-    GELOGE(INTERNAL_ERROR, "Failed to renew var desc on graph");
-    return GE_GRAPH_VARIABLE_OP_PASS_FAILED;
-  }
-
-  if (GenerateVariableVariableRefMap(graph) != SUCCESS) {
-    GELOGE(INTERNAL_ERROR, "Failed to generate variable map for graph %s", graph->GetName().c_str());
-    return GE_GRAPH_VARIABLE_OP_PASS_FAILED;
-  }
-
-  GELOGD("Begin to fusion variables and trans nodes");
-  for (auto &var_to_refs : var_and_var_ref_map_) {
-    auto &node = var_to_refs.first;
-    GE_CHECK_NOTNULL(node);
-    GE_CHECK_NOTNULL(var_accelerate_ctrl_);
-    if (!var_accelerate_ctrl_->IsVarPermitToChangeFormats(node->GetName())) {
-      GELOGD("The var %s does not permit to change formats, skip it", node->GetName().c_str());
-      continue;
-    }
-
-    VarTransRoad fusion_road;
-    auto ret = FusionIfNeed(node, fusion_road);
-    if (ret != SUCCESS) {
-      return ret;
-    }
-
-    if (fusion_road.empty()) {
-      GELOGD("No need to fusion variable %s because it's fusion road is empty", node->GetName().c_str());
-      continue;
-    }
-
-    ret = RenewTransRoadDesc(node, fusion_road);
-    if (ret != SUCCESS) {
-      GELOGE(INTERNAL_ERROR, "Failed to renew description fusion road for var %s", node->GetName().c_str());
-      return GE_GRAPH_VARIABLE_OP_PASS_FAILED;
-    }
-
-    auto start_iter = fusion_road.begin();
-    auto end_iter = fusion_road.rbegin();
-    GELOGD(
-        "Trans variable data for %s from format %s to %s, shape %s to %s "
-        "data-type %s to %s, path len %zu success",
-        node->GetName().c_str(), TypeUtils::FormatToSerialString(start_iter->input.GetFormat()).c_str(),
-        TypeUtils::FormatToSerialString(end_iter->output.GetFormat()).c_str(),
-        formats::ShapeToString(start_iter->input.GetShape().GetDims()).c_str(),
-        formats::ShapeToString(end_iter->output.GetShape().GetDims()).c_str(),
-        TypeUtils::DataTypeToSerialString(start_iter->input.GetDataType()).c_str(),
-        TypeUtils::DataTypeToSerialString(end_iter->output.GetDataType()).c_str(), fusion_road.size());
-
-    ret = VarManager::Instance(graph->GetSessionID())->SetTransRoad(node->GetName(), fusion_road);
-    if (ret != SUCCESS) {
-      GELOGE(INTERNAL_ERROR, "Failed to update the format fusion road for var %s", node->GetName().c_str());
-      return INTERNAL_ERROR;
-    }
-    ret = VarManager::Instance(graph->GetSessionID())->SetChangedGraphId(node->GetName(), graph->GetGraphID());
-    if (ret != SUCCESS) {
-      GELOGE(INTERNAL_ERROR, "Failed to update the graph id for var %s", node->GetName().c_str());
-      return INTERNAL_ERROR;
-    }
-    var_accelerate_ctrl_->SetVarChanged(node->GetName());
-
-    GELOGD("Begin to update format info for var %s.", node->GetName().c_str());
-    std::set<ge::NodePtr> node_set({node});
-    if (UpdateIOFormatInfo(end_iter->output, node_set) != SUCCESS) {
-      return GE_GRAPH_VARIABLE_OP_PASS_FAILED;
-    }
-
-    // renew var desc if the trans_road is all reshape or reformat
-    ret = RenewVarDesc(graph->GetSessionID(), node, fusion_road);
-    if (ret != SUCCESS) {
-      GELOGE(FAILED, "var manager renew var[%s] descriptor failed!", node->GetName().c_str());
-      return FAILED;
-    }
-  }
-
-  return SUCCESS;
-}
-
-Status VariableOpPass::RenewTransRoadDesc(const NodePtr &var, VarTransRoad &fusion_road) {
-  auto var_desc = var->GetOpDesc();
-  GE_CHECK_NOTNULL(var_desc);
-  TransNodeInfo prev_node_info;
-  prev_node_info.node_type = var->GetType();
-  prev_node_info.output = var_desc->GetOutputDesc(0);
-  // two cases
-  // fisrt Var->cast->transdata which transdata in fusion road
-  // the input of transdata is not equal with output of var
-  // case 1 : suppose input dtype of transdata equal with out dtype
-  // but not equal with var
-  // so we make input dtype and output dytpe of transroad equal with var
-  // case 2: suppose input format of transdata not equal with out format
-  // and input format not equal with var
-  // so we make input format equal with var
-  for (auto &cur_trans : fusion_road) {
-    if (cur_trans.input.GetFormat() == cur_trans.output.GetFormat()) {
-      cur_trans.output.SetFormat(prev_node_info.output.GetFormat());
-    }
-    if (cur_trans.input.GetDataType() == cur_trans.output.GetDataType()) {
-      cur_trans.output.SetDataType(prev_node_info.output.GetDataType());
-    }
-    if (ge::formats::IsShapeEqual(cur_trans.input.GetShape(), cur_trans.output.GetShape())) {
-      cur_trans.output.SetShape(prev_node_info.output.GetShape());
-    }
-    cur_trans.input = prev_node_info.output;
-    prev_node_info.output = cur_trans.output;
-  }
-  return SUCCESS;
-}
-
-Status VariableOpPass::FusionIfNeed(const NodePtr &var, VarTransRoad &fusion_road) {
-  bool can_fusion = false;
-  while (true) {
-    map<string, vector<NodePtr>> trans_type_to_trans_ops ;
-    map<string, pair<string, bool>> trans_type_to_changed_desc;
-    // record the order of trans op in first path
-    vector<string> first_path_trans_order;
-    auto ret = CheckIfCouldBeOptimized(var, first_path_trans_order, trans_type_to_changed_desc,
-                                       trans_type_to_trans_ops, can_fusion);
-    if (ret != SUCCESS) {
-      GELOGE(FAILED, "Check trans ops after vatiable could be optimized or not failed");
-      return ret;
-    }
-
-    if (!can_fusion) {
-      break;
-    }
-
-    vector<pair<NodePtr, NodePtr>> delete_var_ref_trans_nodes;
-    ret = GetAndCheckTransOpOfVarRef(var, can_fusion, trans_type_to_changed_desc, delete_var_ref_trans_nodes);
-    if (ret != SUCCESS) {
-      GELOGE(FAILED, "get and check trans op of varref failed");
-      return ret;
-    }
-
-    if (!can_fusion) {
-      break;
-    }
-
-    ret = UpdateTransRoad(fusion_road, first_path_trans_order,
-                          trans_type_to_changed_desc, trans_type_to_trans_ops);
-    if (ret != SUCCESS) {
-      GELOGE(FAILED, "Update trans road failed");
-      return ret;
-    }
-
-    if (fusion_road.empty()) {
-      return SUCCESS;
-    }
-
-    ret = DealFusion(var, fusion_road, trans_type_to_changed_desc,
-                     trans_type_to_trans_ops, delete_var_ref_trans_nodes);
-    if (ret != SUCCESS) {
-      return ret;
-    }
-  }
-  return SUCCESS;
-}
-
-Status VariableOpPass::UpdateTransRoad(VarTransRoad &fusion_road, vector<std::string> &first_path_trans_order,
-                                       map<std::string, std::pair<std::string, bool>> &trans_type_to_changed_desc,
-                                       map<std::string, vector<NodePtr>> &trans_type_to_trans_ops){
-  vector<std::string> delete_trans_type;
-  for (auto &trans_type : first_path_trans_order) {
-    if (trans_type_to_changed_desc.find(trans_type) == trans_type_to_changed_desc.end()) {
-      continue;
-    }
-    bool delete_flag = false;
-    for (auto &trans_node : trans_type_to_trans_ops[trans_type]) {
-      int tran_in_index = TransOpUtil::GetTransOpDataIndex(trans_node->GetType());
-      auto out_op_desc = trans_node->GetOpDesc();
-      GE_CHECK_NOTNULL(out_op_desc);
-      TransNodeInfo trans_node_info;
-      trans_node_info.node_type = trans_node->GetType();
-      trans_node_info.input = out_op_desc->GetInputDesc(tran_in_index);
-      trans_node_info.output = out_op_desc->GetOutputDesc(kTransOpOutIndex);
-      if (!IsTransSupport(trans_node_info)) {
-        delete_flag = true;
-        GELOGD("The trans node %s does not support, skip the variable accelerating", trans_node_info.node_type.c_str());
-        break;
-      }
-    }
-    if (delete_flag) {
-      delete_trans_type.push_back(trans_type);
-    } else {
-      auto &trans_node = *trans_type_to_trans_ops[trans_type].begin();
-      auto out_op_desc = trans_node->GetOpDesc();
-      int tran_in_index = TransOpUtil::GetTransOpDataIndex(trans_node->GetType());
-      TransNodeInfo trans_node_info;
-      trans_node_info.node_type = trans_node->GetType();
-      trans_node_info.input = out_op_desc->GetInputDesc(tran_in_index);
-      trans_node_info.output = out_op_desc->GetOutputDesc(kTransOpOutIndex);
-      fusion_road.emplace_back(trans_node_info);
-    }
-  }
-  for (auto &trans_type : delete_trans_type) {
-    trans_type_to_changed_desc.erase(trans_type);
-  }
-  return SUCCESS;
-}
-
-Status VariableOpPass::DealFusion(const ge::NodePtr &var_node, VarTransRoad &fusion_road,
-                                  map<std::string, std::pair<std::string, bool>> trans_type_to_changed_desc,
-                                  map<std::string, vector<NodePtr>> trans_type_to_trans_ops,
-                                  vector<pair<NodePtr, NodePtr>> &delete_trans_nodes) {
-  GE_CHECK_NOTNULL(var_node);
-  GELOGD("Begin to fusion var %s with trans", var_node->GetName().c_str());
-  auto graph = var_node->GetOwnerComputeGraph();
-  for (auto &trans_type : trans_type_to_changed_desc) {
-    for (auto &trans_node : trans_type_to_trans_ops[trans_type.first]) {
-      GELOGD("Remove node %s type %s when fusion with variable %s", trans_node->GetName().c_str(),
-             trans_node->GetType().c_str(), var_node->GetName().c_str());
-      if (RenewTransOpDesc(trans_node, true) != SUCCESS) {
-        return GE_GRAPH_VARIABLE_OP_PASS_FAILED;
-      }
-
-      if (GraphUtils::IsolateNode(trans_node, {0}) != SUCCESS) {
-        return GE_GRAPH_VARIABLE_OP_PASS_FAILED;
-      }
-
-      if (GraphUtils::RemoveNodeWithoutRelink(graph, trans_node) != SUCCESS) {
-        return GE_GRAPH_VARIABLE_OP_PASS_FAILED;
-      }
-    }
-  }
-
-  // Iterate delete_trans_nodes backward, eg a->b->c, delete_trans_nodes:{{b,c},{a,b}}
-  // we should delete {a,b} first , then b->c,then we can delete {b,c}
-  // if we delete {b,c} first, then a->c, then we can not get b when we delete {a,b}
-  for (auto iter = delete_trans_nodes.rbegin(); iter != delete_trans_nodes.rend(); ++iter) {
-    auto front_node = iter->first;
-    auto back_node = iter->second;
-    if (RenewTransOpDesc(front_node, false) != SUCCESS) {
-      return GE_GRAPH_VARIABLE_OP_PASS_FAILED;
-    }
-    if (front_node->GetOutDataNodes().size() > 1) {
-      GELOGD("The trans node %s type %s connecting with var-ref %s has more"
-             " than one output data nodes, unlink the edge between them",
-             front_node->GetName().c_str(), front_node->GetType().c_str(), back_node->GetName().c_str());
-      if (ByPassTransNode(front_node, back_node) != SUCCESS) {
-        GELOGE(INTERNAL_ERROR, "Failed to bypass trans node %s to node %s", front_node->GetName().c_str(),
-               back_node->GetName().c_str());
-        return INTERNAL_ERROR;
-      }
-    } else {
-      GELOGD("The trans node %s type %s connecting with  %s has only"
-             " one output data nodes, isolate and remove it.",
-             front_node->GetName().c_str(), front_node->GetType().c_str(), back_node->GetName().c_str());
-      if (GraphUtils::IsolateNode(front_node, {0}) != SUCCESS) {
-        return GE_GRAPH_VARIABLE_OP_PASS_FAILED;
-      }
-      if (GraphUtils::RemoveNodeWithoutRelink(graph, front_node) != SUCCESS) {
-        return GE_GRAPH_VARIABLE_OP_PASS_FAILED;
-      }
-    }
-  }
-  return SUCCESS;
-}
-
-Status VariableOpPass::RenewTransOpDesc(ge::NodePtr &node, bool is_reverse) {
-  int tran_in_index = TransOpUtil::GetTransOpDataIndex(node->GetType());
-  auto op_desc = node->GetOpDesc();
-  GE_CHECK_NOTNULL(op_desc);
-  GeTensorDesc input_desc = op_desc->GetInputDesc(tran_in_index);
-  GeTensorDesc output_desc = op_desc->GetOutputDesc(kTransOpOutIndex);
-  GeTensorDesc renew_desc = is_reverse ? output_desc : input_desc;
-  bool format_changed = false;
-  bool shape_changed = false;
-  bool dtype_changed = false;
-  if (input_desc.GetFormat() != output_desc.GetFormat()) {
-    format_changed = true;
-  }
-  if (input_desc.GetDataType() != output_desc.GetDataType()) {
-    dtype_changed = true;
-  }
-  if (!ge::formats::IsShapeEqual(input_desc.GetShape(), output_desc.GetShape())) {
-    shape_changed = true;
-  }
-  auto cur_node = node;
-  while (TransOpUtil::IsTransOp(cur_node)) {
-    tran_in_index = TransOpUtil::GetTransOpDataIndex(cur_node->GetType());
-    auto next_node = is_reverse ? NodeUtils::GetInDataNodeByIndex(*cur_node, tran_in_index) :
-                     cur_node->GetOutDataNodes().at(kTransOpOutIndex);
-    if (!TransOpUtil::IsTransOp(next_node)) {
-      break;
-    }
-    auto prev_desc = next_node->GetOpDesc();
-    tran_in_index = TransOpUtil::GetTransOpDataIndex(next_node->GetType());
-    auto mutable_output_desc = prev_desc->MutableOutputDesc(kTransOpOutIndex);
-    auto mutable_input_desc = prev_desc->MutableInputDesc(tran_in_index);
-    GE_CHECK_NOTNULL(prev_desc->MutableOutputDesc(kTransOpOutIndex));
-    GE_CHECK_NOTNULL(prev_desc->MutableInputDesc(tran_in_index));
-    if (shape_changed) {
-      mutable_input_desc->SetShape(renew_desc.GetShape());
-      mutable_output_desc->SetShape(renew_desc.GetShape());
-    }
-    if (dtype_changed) {
-      mutable_input_desc->SetDataType(renew_desc.GetDataType());
-      mutable_output_desc->SetDataType(renew_desc.GetDataType());
-    }
-    if (format_changed) {
-      mutable_input_desc->SetFormat(renew_desc.GetFormat());
-      mutable_output_desc->SetFormat(renew_desc.GetFormat());
-    }
-    cur_node = next_node;
-  }
-  return SUCCESS;
-}
-
-Status VariableOpPass::CheckIfCouldBeOptimized(const NodePtr &var, vector<string> &first_path_trans_order,
-                                               map<string, pair<string, bool>> &trans_type_to_changed_desc,
-                                               map<string, vector<NodePtr>> &trans_type_to_trans_ops, bool &flag) {
-  bool is_match = true;
-  auto ret = GetSameTransOP(var, first_path_trans_order, trans_type_to_changed_desc,
-                            trans_type_to_trans_ops, is_match);
-
-  if (ret != SUCCESS) {
-    GELOGE(FAILED, "Get same trans op of variable node: %s failed", var->GetName().c_str());
-    return GE_GRAPH_VARIABLE_OP_PASS_FAILED;
-  }
-
-  if (!is_match) {
-    flag = false;
-    GELOGI("trans nodes after variable do not meet the condition");
-    return SUCCESS;
-  }
-
-  flag = true;
-  return SUCCESS;
-}
-
-Status VariableOpPass::GetSameTransOP(const NodePtr &var, vector<string> &first_path_trans_order,
-                                      map<string, pair<string, bool>> &trans_type_to_changed_desc,
-                                      map<string, vector<NodePtr>> &trans_type_to_trans_ops, bool &is_match) {
-  GELOGD("Begin to get Node: %s trans op info of first path", var->GetName().c_str());
-  auto ret = GetFisrtPathTransInfo(var, first_path_trans_order,
-                                   trans_type_to_changed_desc, trans_type_to_trans_ops);
-  if (ret != SUCCESS) {
-    GELOGE(FAILED, "Get var: %s first path trans info failed", var->GetName().c_str());
-    return FAILED;
-  }
-
-  if (first_path_trans_order.empty()) {
-    GELOGD("var %s first path has no trans op, not need to pass", var->GetName().c_str());
-    is_match = false;
-    return SUCCESS;
-  }
-
-  GELOGD("Begin to depth first search Node: %s ", var->GetName().c_str());
-  VariableDFS(var, trans_type_to_changed_desc, trans_type_to_trans_ops, is_match);
-
-  return SUCCESS;
-}
-
-void VariableOpPass::VariableDFS(const NodePtr &node, map<string, pair<string, bool>> &trans_type_to_changed_desc,
-                                 map<string, vector<NodePtr>> &trans_type_to_trans_ops, bool &is_match) {
-  std::stack<NodePtr> node_stack;
-  std::stack<vector<NodePtr>> path_stack;
-  for (auto &out_node : node->GetOutDataNodes()) {
-    if (!is_match) {
-      break;
-    }
-    if (out_node->GetOutDataNodesSize() == 0 || !ge::TransOpUtil::IsTransOp(out_node)) {
-      is_match = false;
-      break;
-    }
-    node_stack.push(out_node);
-    path_stack.emplace(vector<NodePtr>{out_node});
-    while (!node_stack.empty() && is_match) {
-      auto cur_node = node_stack.top();
-      auto cur_path = path_stack.top();
-      node_stack.pop();
-      path_stack.pop();
-      if (cur_node->GetOutDataNodesSize() == 0 || !ge::TransOpUtil::IsTransOp(cur_node)) {
-        UpdateTransInfo(cur_path, is_match, trans_type_to_changed_desc, trans_type_to_trans_ops);
-        continue;
-      }
-      for (auto &next_node : cur_node->GetOutDataNodes()) {
-        node_stack.push(next_node);
-        auto next_path = cur_path;
-        next_path.push_back(next_node);
-        path_stack.emplace(next_path);
-      }
-    }
-  }
-}
-
-Status VariableOpPass::UpdateTransInfo(vector<NodePtr> &cur_path, bool& is_match,
-                                       map<string, pair<string, bool>> &trans_type_to_changed_desc,
-                                       map<string, vector<NodePtr>> &trans_type_to_trans_ops) {
-  GELOGD("Begin to update trans info by path");
-  std::set<string> trans_op_occured;
-  for (auto &trans_node : cur_path) {
-    auto trans_node_type = trans_node->GetType();
-    if (trans_op_occured.find(trans_node_type) != trans_op_occured.end() ||
-        !ge::TransOpUtil::IsTransOp(trans_node_type)) {
-      continue;
-    }
-    trans_op_occured.insert(trans_node_type);
-    auto desc_diff = GetInAndOutDecsDiff(trans_node);
-    if (trans_type_to_changed_desc.find(trans_node_type) != trans_type_to_changed_desc.end() &&
-        desc_diff == trans_type_to_changed_desc[trans_node_type].first) {
-      trans_type_to_changed_desc[trans_node_type].second = true;
-      auto iter = find(trans_type_to_trans_ops[trans_node_type].begin(),
-                       trans_type_to_trans_ops[trans_node_type].end(),
-                       trans_node);
-      if (iter == trans_type_to_trans_ops[trans_node_type].end()) {
-        trans_type_to_trans_ops[trans_node_type].push_back(trans_node);
-      }
-    }
-  }
-  std::set<string> delete_trans_types;
-  for (auto &trans_item : trans_type_to_changed_desc) {
-    if (!trans_item.second.second) {
-      delete_trans_types.insert(trans_item.first);
-    } else {
-      trans_item.second.second = false;
-    }
-  }
-  for (auto& delete_item : delete_trans_types) {
-    trans_type_to_changed_desc.erase(delete_item);
-  }
-  if (trans_type_to_changed_desc.empty()) {
-    is_match = false;
-  }
-  return SUCCESS;
-}
-
-Status VariableOpPass::GetFisrtPathTransInfo(const NodePtr &var, vector<string> &first_path_trans_order,
-                                             map<string, pair<string, bool>> &trans_type_to_changed_desc,
-                                             map<string, vector<NodePtr>> &trans_type_to_trans_ops) {
-  auto cur_node = var;
-  while (cur_node->GetOutDataNodesSize() != 0) {
-    cur_node = cur_node->GetOutDataNodes().at(0);
-    GE_CHECK_NOTNULL(cur_node);
-    if (!ge::TransOpUtil::IsTransOp(cur_node)) {
-      break;
-    }
-    auto cur_node_type = cur_node->GetType();
-    // only get the the first occurrence operator of same type
-    if (trans_type_to_changed_desc.find(cur_node_type) == trans_type_to_changed_desc.end()) {
-      auto desc_diff = GetInAndOutDecsDiff(cur_node);
-      trans_type_to_changed_desc[cur_node->GetType()] = make_pair(desc_diff, false);
-      trans_type_to_trans_ops[cur_node->GetType()] = vector<NodePtr>{cur_node};
-      first_path_trans_order.push_back(cur_node->GetType());
-    }
-  }
-  GELOGD("get var %s first path trans info success", var->GetName().c_str());
-  return SUCCESS;
-}
-
-Status VariableOpPass::GetAndCheckTransOpOfVarRef(const ge::NodePtr &var_node, bool &pass_check,
-                                                  map<string, pair<string, bool>> &trans_type_to_changed_desc,
-                                                  vector<pair<NodePtr, NodePtr>> &delete_var_ref_trans_nodes) {
-  auto iterator = var_and_var_ref_map_.find(var_node);
-  if (iterator == var_and_var_ref_map_.end()) {
-    GELOGD("there is no var_ref of node %s", var_node->GetName().c_str());
-    return SUCCESS;
-  }
-  vector<string> delete_trans_type;
-  for (auto &trans_type : trans_type_to_changed_desc) {
-    delete_trans_type.push_back(trans_type.first);
-  }
-  for (auto &ref_node : iterator->second) {
-    GE_CHECK_NOTNULL(ref_node);
-    auto cur_node = *ref_node->GetInDataNodes().begin();
-    auto behind_node = ref_node;
-    GE_CHECK_NOTNULL(cur_node);
-    vector<string> tmp_delete_trans_type = delete_trans_type;
-    while (TransOpUtil::IsTransOp(cur_node)) {
-      GE_CHECK_NOTNULL(cur_node);
-      auto iter = find(tmp_delete_trans_type.begin(), tmp_delete_trans_type.end(), cur_node->GetType());
-      if (iter != tmp_delete_trans_type.end()) {
-        CheckTransOpOfVarAndVarRefSymmetry(cur_node, trans_type_to_changed_desc[cur_node->GetType()].first,
-                                           pass_check);
-        if (!pass_check) {
-          GELOGD("trans op : %s of var ref %s is illegal", cur_node->GetName().c_str(), ref_node->GetName().c_str());
-          return SUCCESS;
-        }
-        tmp_delete_trans_type.erase(iter);
-        delete_var_ref_trans_nodes.emplace_back(std::make_pair(cur_node, behind_node));
-      }
-      int tran_in_index = TransOpUtil::GetTransOpDataIndex(cur_node->GetType());
-      behind_node = cur_node;
-      cur_node = cur_node->GetInDataNodes().at(tran_in_index);
-    }
-    if (!tmp_delete_trans_type.empty()) {
-      pass_check = false;
-      return SUCCESS;
-    }
-  }
-  return SUCCESS;
-}
-
-Status VariableOpPass::CheckTransOpOfVarAndVarRefSymmetry(NodePtr &var_ref_trans_op, const string &desc_diff,
-                                                          bool &is_symmetry){
-  auto var_ref_trans_op_desc_diff = GetInAndOutDecsDiff(var_ref_trans_op, true);
-  is_symmetry = (var_ref_trans_op_desc_diff == desc_diff);
-  return SUCCESS;
-}
-
-Status VariableOpPass::UpdateVarAndRefOutputFormatInfo(const GeTensorDesc &final_output, const ge::NodePtr &node) {
-  if (node == nullptr || node->GetOpDesc() == nullptr) {
-    GELOGE(FAILED, "node or opdesc is nullptr");
-    return FAILED;
-  }
-  const Format &format = final_output.GetFormat();
-  const DataType &data_type = final_output.GetDataType();
-  const GeShape &shape = final_output.GetShape();
-  GELOGD("last ref is (%s, %s, %lu), var_ref_name is %s.", TypeUtils::DataTypeToSerialString(data_type).c_str(),
-         TypeUtils::FormatToSerialString(format).c_str(), shape.GetDims().size(), node->GetName().c_str());
-
-  auto node_desc = node->GetOpDesc()->GetOutputDesc(0);
-  CopyVariableFormatDataTypeAndShape(final_output, node_desc);
-  if (node->GetOpDesc()->UpdateOutputDesc(0, node_desc) != GRAPH_SUCCESS) {
-    GELOGE(FAILED, "update output desc fail.");
-    return FAILED;
-  }
-  GELOGD("node ref is (%s, %s, %lu), var_ref_name is %s.",
-         TypeUtils::DataTypeToSerialString(node->GetOpDesc()->GetOutputDesc(0).GetDataType()).c_str(),
-         TypeUtils::FormatToSerialString(node->GetOpDesc()->GetOutputDesc(0).GetFormat()).c_str(),
-         node->GetOpDesc()->GetOutputDesc(0).GetShape().GetDims().size(), node->GetName().c_str());
-
-  auto iterator = var_and_var_ref_map_.find(node);
-  if (iterator == var_and_var_ref_map_.end()) {
-    auto graph = node->GetOwnerComputeGraph();
-    if (GenerateVariableVariableRefMap(graph) != SUCCESS) {
-      GELOGE(INTERNAL_ERROR, "Failed to generate variable map for graph %s", graph->GetName().c_str());
-      return GE_GRAPH_VARIABLE_OP_PASS_FAILED;
-    }
-  }
-  iterator = var_and_var_ref_map_.find(node);
-  if (iterator == var_and_var_ref_map_.end()) {
-    GELOGW("The var node %s which belongs to graph %s can not be found on the graph", node->GetName().c_str(),
-           node->GetOwnerComputeGraph()->GetName().c_str());
-    return SUCCESS;
-  }
-
-  for (const auto &var_ref_node : iterator->second) {
-    auto var_ref_node_description = var_ref_node->GetOpDesc();
-    GE_CHECK_NOTNULL(var_ref_node_description);
-
-    GELOGD("var_ref_node before is (%s, %s, %zu), var_ref_name is %s.",
-           TypeUtils::DataTypeToSerialString(data_type).c_str(), TypeUtils::FormatToSerialString(format).c_str(),
-           shape.GetDims().size(), var_ref_node->GetName().c_str());
-    if (var_ref_node_description->UpdateOutputDesc(0, node_desc) != GRAPH_SUCCESS) {
-      GELOGW("UpdateOutputDesc fail.");
-    }
-    if (var_ref_node_description->UpdateInputDesc(0, node_desc) != GRAPH_SUCCESS) {
-      GELOGW("UpdateInputDesc fail.");
-    }
-    const auto &input_desc = var_ref_node_description->MutableInputDesc(0);
-    const auto &output_desc = var_ref_node_description->MutableOutputDesc(0);
-    GE_CHECK_NOTNULL(input_desc);
-    GE_CHECK_NOTNULL(output_desc);
-    GELOGD("var_ref_node ref is (%s, %s, %zu), var_ref_name is %s.",
-           TypeUtils::DataTypeToSerialString(input_desc->GetDataType()).c_str(),
-           TypeUtils::FormatToSerialString(input_desc->GetFormat()).c_str(), output_desc->GetShape().GetDims().size(),
-           var_ref_node->GetName().c_str());
-  }
-
-  return SUCCESS;
-}
-
-Status VariableOpPass::GenerateVariableVariableRefMap(const ComputeGraphPtr &compute_graph) {
-  std::map<std::string, NodePtr> names_to_var;
-  std::map<std::string, std::set<NodePtr>> names_to_refs;
-  GE_CHECK_NOTNULL(compute_graph);
-  for (auto &node : compute_graph->GetDirectNode()) {
-    if (node->GetType() != VARIABLE) {
-      continue;
-    }
-    std::string ref_var_name;
-    if (!ge::AttrUtils::GetStr(node->GetOpDesc(), REF_VAR_SRC_VAR_NAME, ref_var_name)) {
-      names_to_var[node->GetName()] = node;
-    } else {
-      names_to_refs[ref_var_name].insert(node);
-    }
-  }
-
-  for (auto &name_to_var : names_to_var) {
-    var_and_var_ref_map_[name_to_var.second] = names_to_refs[name_to_var.first];
-  }
-  return SUCCESS;
-}
-
-void VariableOpPass::CopyVariableFormatDataTypeAndShape(const GeTensorDesc &src_tensor_desc,
-                                                        GeTensorDesc &dst_tensor_desc) {
-  dst_tensor_desc.SetShape(src_tensor_desc.GetShape());
-  dst_tensor_desc.SetFormat(src_tensor_desc.GetFormat());
-  dst_tensor_desc.SetDataType(src_tensor_desc.GetDataType());
-}
-
-Status VariableOpPass::UpdateIOFormatInfo(const GeTensorDesc &final_output, std::set<NodePtr> &nodes) {
-  for (auto &need_set_node : nodes) {
-    auto ret = UpdateVarAndRefOutputFormatInfo(final_output, need_set_node);
-    if (ret != SUCCESS) {
-      return GE_GRAPH_VARIABLE_OP_PASS_FAILED;
-    }
-  }
-  return SUCCESS;
-}
-
-Status VariableOpPass::RenewVarDesc(ge::ComputeGraphPtr &graph) {
-  GE_CHECK_NOTNULL(graph);
-  // renew var manager desc
-  Status ret = SUCCESS;
-  for (auto &node : graph->GetDirectNode()) {
-    bool is_var_node =
-        (node->GetType() == VARIABLE) || (node->GetType() == VARIABLEV2) || (node->GetType() == VARHANDLEOP);
-    if (is_var_node) {
-      if (!ge::VarManager::Instance(graph->GetSessionID())->IsVarExist(node->GetName())) {
-        GELOGD("var manager does not exist var node[%s]", node->GetName().c_str());
-        continue;
-      }
-      GELOGD("var manager exist var node[%s], graph name[%s]", node->GetName().c_str(), graph->GetName().c_str());
-      GE_CHECK_NOTNULL(node->GetOpDesc());
-      ret = ge::VarManager::Instance(graph->GetSessionID())->RenewCurVarDesc(node->GetName(), node->GetOpDesc());
-      if (ret != SUCCESS) {
-        GELOGE(FAILED, "var manager renew var[%s] descriptor failed!", node->GetName().c_str());
-        return FAILED;
-      }
-    }
-  }
-  return SUCCESS;
-}
-
-Status VariableOpPass::RenewVarDesc(uint64_t session_id, const NodePtr &node, const VarTransRoad &fusion_road) {
-  // renew var desc if the trans_road is all reshape or reformat
-  for (auto &road : fusion_road) {
-    if (road.node_type != RESHAPE && road.node_type != REFORMAT) {
-      return SUCCESS;
-    }
-  }
-
-  if (!ge::VarManager::Instance(session_id)->IsVarExist(node->GetName())) {
-    GELOGD("var manager does not exist var node[%s]", node->GetName().c_str());
-    return SUCCESS;
-  }
-  GELOGD("var manager exist var node[%s]", node->GetName().c_str());
-  GE_CHECK_NOTNULL(node->GetOpDesc());
-  Status ret = ge::VarManager::Instance(session_id)->RenewCurVarDesc(node->GetName(), node->GetOpDesc());
-  if (ret != SUCCESS) {
-    GELOGE(FAILED, "var manager renew var[%s] descriptor failed!", node->GetName().c_str());
-    return FAILED;
-  }
-
-  return SUCCESS;
-}
-
-}  // namespace ge
diff --git a/ge/graph/passes/variable_op_pass_bak.h b/ge/graph/passes/variable_op_pass_bak.h
deleted file mode 100644
index fccd063b..00000000
--- a/ge/graph/passes/variable_op_pass_bak.h
+++ /dev/null
@@ -1,104 +0,0 @@
-/**
- * Copyright 2020 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef GE_GRAPH_PASSES_VARIABLE_OP_PASS_H_
-#define GE_GRAPH_PASSES_VARIABLE_OP_PASS_H_
-#include <map>
-#include <set>
-#include <stack>
-#include "graph/common/transop_util.h"
-#include "common/formats/utils/formats_trans_utils.h"
-#include "graph/utils/node_utils.h"
-#include "graph/graph.h"
-#include "graph/manager/graph_var_manager.h"
-#include "graph/manager/util/variable_accelerate_ctrl.h"
-#include "inc/graph_pass.h"
-
-namespace ge {
-namespace variable_op {
-struct NodeDesc {
-  ge::GeTensorDesc input;
-  ge::GeTensorDesc output;
-  bool is_update = false;
-};
-}  // namespace variable_op
-class VariableOpPass : public GraphPass {
- public:
-  explicit VariableOpPass(VarAccelerateCtrl *ctrl) : var_accelerate_ctrl_(ctrl) {}
-
-  ~VariableOpPass() override = default;
-
-  Status Run(ge::ComputeGraphPtr graph) override;
-
- private:
-  Status UpdateTransRoad(VarTransRoad &fusion_road, vector<string> &trans_road_order,
-                         map<string, pair<string, bool>> &trans_type_to_changed_desc,
-                         map<string, vector<NodePtr>> &trans_type_to_trans_ops);
-
-  Status DealFusion(const ge::NodePtr &var_node, VarTransRoad &fusion_road,
-                    map<string, pair<string, bool>> trans_type_to_changed_desc,
-                    map<string, vector<NodePtr>> trans_type_to_trans_ops,
-                    vector<pair<NodePtr, NodePtr>> &delete_trans_nodes);
-
-  Status RenewTransOpDesc(ge::NodePtr &node, bool is_reverse);
-
-  Status RenewTransRoadDesc(const NodePtr &var, VarTransRoad &fusion_road);
-
-  Status CheckIfCouldBeOptimized(const NodePtr &var, vector<string> &trans_road_order,
-                                 map<string, pair<string, bool>> &trans_type_to_changed_desc,
-                                 map<string, vector<NodePtr>> &trans_type_to_trans_ops, bool &flag);
-
-  Status FusionIfNeed(const NodePtr &var, VarTransRoad &fusion_road);
-
-  Status GetSameTransOP(const NodePtr &var, vector<string> &trans_road_order,
-                        map<string, pair<string, bool>> &trans_type_to_changed_desc,
-                        map<string, vector<NodePtr>> &trans_type_to_trans_ops, bool &is_match);
-
-  Status GetFisrtPathTransInfo(const NodePtr &var, vector<string> &trans_road_order,
-                               map<string, pair<string, bool>> &trans_type_to_changed_desc,
-                               map<string, vector<NodePtr>> &trans_type_to_trans_ops);
-
-  void VariableDFS(const NodePtr &node, map<string, pair<string, bool>> &trans_type_to_changed_desc,
-                   map<string, vector<NodePtr>> &trans_type_to_trans_ops, bool &is_match);
-
-  Status UpdateTransInfo(vector<NodePtr> &cur_path,  bool& is_match,
-                         map<string, pair<string, bool>> &trans_type_to_changed_desc,
-                         map<string, vector<NodePtr>> &trans_type_to_trans_ops);
-
-  Status GetAndCheckTransOpOfVarRef(const ge::NodePtr &var_node, bool &pass_check,
-                                    map<string, pair<string, bool>> &trans_type_to_changed_desc,
-                                    vector<pair<NodePtr, NodePtr>> &delete_var_ref_trans_nodes);
-
-  Status CheckTransOpOfVarAndVarRefSymmetry(NodePtr &var_ref_trans_op, const string &desc_diff, bool &is_symmetry);
-
-  Status UpdateVarAndRefOutputFormatInfo(const GeTensorDesc &final_output, const ge::NodePtr &node);
-
-  Status GenerateVariableVariableRefMap(const ComputeGraphPtr &compute_graph);
-
-  void CopyVariableFormatDataTypeAndShape(const GeTensorDesc &src_tensor_desc, GeTensorDesc &dst_tensor_desc);
-
-  Status UpdateIOFormatInfo(const GeTensorDesc &final_output, std::set<NodePtr> &nodes);
-
-  Status RenewVarDesc(ge::ComputeGraphPtr &graph);
-
-  Status RenewVarDesc(uint64_t session_id, const NodePtr &node, const VarTransRoad &fusion_road);
-
-  map<NodePtr, std::set<NodePtr>> var_and_var_ref_map_;
-
-  VarAccelerateCtrl *var_accelerate_ctrl_;
-};
-}  // namespace ge
-#endif  // GE_GRAPH_PASSES_VARIABLE_OP_PASS_H_
diff --git a/ge/graph/preprocess/graph_preprocess.cc b/ge/graph/preprocess/graph_preprocess.cc
index 91fab280..63f1b131 100644
--- a/ge/graph/preprocess/graph_preprocess.cc
+++ b/ge/graph/preprocess/graph_preprocess.cc
@@ -49,7 +49,6 @@
 #include "graph/passes/for_pass.h"
 #include "graph/passes/guarantee_const_pass.h"
 #include "graph/passes/hccl_group_pass.h"
-#include "graph/passes/hccl_memcpy_pass.h"
 #include "graph/passes/identity_pass.h"
 #include "graph/passes/infershape_pass.h"
 #include "graph/passes/merge_pass.h"
@@ -935,7 +934,10 @@ Status ParseDynamicInputShapeRange(const std::string &shape_range,
     return PARAM_INVALID;
   }
   for (auto &shape_range_str : shape_range_set) {
-    if (shape_range_str.empty()) {
+    if (shape_range_str.size() < 3) {
+      // shape_range_str should be "[2~3,1"
+      // or ",[2~3,1". because we should trim '[' or ',['
+      // so shape_range_str.size() < 3 is invalid
       continue;
     }
     // trim start bytes, after that, single input should be "1~20,3,3~6,-1"
@@ -956,7 +958,7 @@ Status ParseDynamicInputShapeRange(const std::string &shape_range,
         // fix dim
         auto range_value = StringToLongNoThrow(range_pair_set.at(0).c_str());
         if (range_value < 0) {
-          range_pair = std::make_pair(0, range_value);
+          range_pair = std::make_pair(1, range_value);
         } else {
           range_pair = std::make_pair(range_value, range_value);
         }
@@ -1017,36 +1019,32 @@ Status UpdateDynamicInputShapeRange(const ge::GeAttrValue::INT index,
     return PARAM_INVALID;
   }
   for (size_t i = 0; i < origin_shape.GetDimNum(); ++i) {
-    if (current_shape_range_vec.at(i).first == current_shape_range_vec.at(i).second) {
+    auto curr_dim = origin_shape.GetDim(i);
+    auto left_range = current_shape_range_vec.at(i).first;
+    auto right_range = current_shape_range_vec.at(i).second;
+    if (left_range == right_range) {
       // given shape_range is known dim, check is same as origin or not
-      if (origin_shape.GetDim(i) != current_shape_range_vec.at(i).first) {
+      if (curr_dim != left_range) {
         GELOGE(PARAM_INVALID, "Given shape range is %ld, current dim shape is %ld, not match.Pleace Check.",
-              current_shape_range_vec.at(i).first, origin_shape.GetDim(i));
+               left_range, curr_dim);
         return PARAM_INVALID;
       }
-      origin_shape.SetDim(i, current_shape_range_vec.at(i).first);
+      origin_shape.SetDim(i, left_range);
     } else {
-      origin_shape.SetDim(i, -1);
+      // given shape_range is fix range, check input_shape is in this range or not
+      if (right_range != UNKNOWN_DIM) {
+        if ((curr_dim < left_range) || (curr_dim > right_range)) {
+          GELOGE(PARAM_INVALID, "Given shape range is [%ld~%ld], current dim shape is %ld, out of range.Pleace Check.",
+                 left_range, right_range, curr_dim);
+          return PARAM_INVALID;
+        }
+      }
+      origin_shape.SetDim(i, UNKNOWN_DIM);
     }
   }
   desc.SetShape(origin_shape);
   desc.SetShapeRange(current_shape_range_vec);
 
-  int64_t dynamic_shape_size = 1;
-  for (const auto range_pair : range_vec.at(index)) {
-    FMK_INT64_MULCHECK(dynamic_shape_size, range_pair.second);
-    dynamic_shape_size *= range_pair.second;
-  }
-  auto data_type_size = GetSizeByDataType(desc.GetDataType());
-  if (data_type_size < 0) {
-    GELOGE(PARAM_INVALID, "Input data type is %s, is not supported.",
-           TypeUtils::DataTypeToSerialString(desc.GetDataType()).c_str());
-    return PARAM_INVALID;
-  }
-  FMK_INT64_MULCHECK(dynamic_shape_size, data_type_size);
-  dynamic_shape_size *= data_type_size;
-  GELOGI("In dynamic_execute mode ,set input %s shape range size %ld", op->GetName().c_str(), dynamic_shape_size);
-  ge::TensorUtils::SetSize(desc, dynamic_shape_size);
   graphStatus graph_ret = op->UpdateInputDesc(0, desc);
   GE_CHK_STATUS_RET(graph_ret, "UpdateInputDesc fail, graph ret: %u", graph_ret);
   graph_ret = op->UpdateOutputDesc(0, desc);
@@ -1666,6 +1664,9 @@ Status GraphPrepare::VerifyConstOp(const NodePtr &node) {
   auto ge_tensor_desc = ge_tensor_ptr->GetTensorDesc();
   int64_t shape_size = ge_tensor_desc.GetShape().GetShapeSize();
   auto data_type = ge_tensor_desc.GetDataType();
+  if (data_type == DT_STRING) {
+    return SUCCESS;
+  }
   uint32_t length = 1;
   bool type_ret = TypeUtils::GetDataTypeLength(data_type, length);
   if (!type_ret) {
@@ -1893,8 +1894,6 @@ Status GraphPrepare::PrepareOptimize() {
   PassManager graph_pass;
   try {
     (void)graph_pass.AddPass("PrepareOptimize::PrunePass", new PrunePass);
-    // todo 临时把hccl的memcpy插入放到图准备，为了防止其多插memcpy
-    (void)graph_pass.AddPass("PrepareOptimize::HcclMemcpyPass", new (std::nothrow) HcclMemcpyPass);
   } catch (std::bad_alloc &e) {
     GELOGE(INTERNAL_ERROR, "Add pass failed, bad memory allocation occurs.");
     return INTERNAL_ERROR;
@@ -1926,7 +1925,7 @@ void GraphPrepare::TypeConversionOfConstant() {
   for (ge::NodePtr &n : compute_graph_->GetAllNodes()) {
     // This can ensure that n is not a null pointer
     // No Conversion when called by aclOpCompile
-    (void)AttrUtils::GetBool(n->GetOpDesc(), ATTR_DYNAMIC_SHAPE_SINGLE_AICPU, is_acl_compile);
+    (void)AttrUtils::GetBool(n->GetOpDesc(), ATTR_SINGLE_OP_SCENE, is_acl_compile);
     if (is_acl_compile) {
       return;
     }
diff --git a/ge/graph/preprocess/insert_op/util_insert_aipp_op.cc b/ge/graph/preprocess/insert_op/util_insert_aipp_op.cc
index 3b37003f..b1534eb4 100755
--- a/ge/graph/preprocess/insert_op/util_insert_aipp_op.cc
+++ b/ge/graph/preprocess/insert_op/util_insert_aipp_op.cc
@@ -540,7 +540,7 @@ Status InsertNewOpUtil::GetDataRelatedNode(NodePtr &node, std::map<NodePtr, std:
 
   std::unique_ptr<domi::AippOpParams> aipp_params(new (std::nothrow) domi::AippOpParams());
   ge::GeAttrValue::NAMED_ATTRS aipp_attr;
-  GE_CHK_BOOL_RET_STATUS(AttrUtils::GetNamedAttrs(data_op, ATTR_NAME_AIPP, aipp_attr), GE_AIPP_NOT_EXIST,
+  GE_CHK_BOOL_RET_STATUS(AttrUtils::GetNamedAttrs(data_op, ATTR_NAME_AIPP, aipp_attr), ACL_ERROR_GE_AIPP_NOT_EXIST,
                          "Data node do not contain param aipp!");
   GE_CHK_STATUS_RET(OpUtils::ConvertAippParams(aipp_attr, aipp_params.get()), "get aipp params failed");
 
@@ -644,7 +644,7 @@ Status InsertNewOpUtil::RecordAIPPInfoToData(const ComputeGraphPtr &graph) {
 
     std::vector<NodePtr> aipps;
     GE_RETURN_IF_ERROR(GetAllAipps(data_node, *aipps_or_switchs_or_case.begin(), aipps));
-    GELOGI("RecordAIPPInfoToData: Data: name[%s], type[%s], batch size[%u]", data_node->GetName().c_str(),
+    GELOGI("RecordAIPPInfoToData: Data: name[%s], type[%s], batch size[%zu]", data_node->GetName().c_str(),
            data_node->GetType().c_str(), aipps.size());
 
     for (auto aipp_it : aipps) {
diff --git a/ge/graph/preprocess/multi_batch_copy_graph.cc b/ge/graph/preprocess/multi_batch_copy_graph.cc
index 5506435e..e43c5dd2 100644
--- a/ge/graph/preprocess/multi_batch_copy_graph.cc
+++ b/ge/graph/preprocess/multi_batch_copy_graph.cc
@@ -371,7 +371,7 @@ Status MultiBatchGraphCopyer::GetEnterNodesGroupByFrame(map<string, vector<NodeP
       GE_CHECK_NOTNULL(op_desc);
       string frame_name;
       if (!AttrUtils::GetStr(op_desc, ENTER_ATTR_FRAME_NAME, frame_name)) {
-        GELOGE(FAILED, "Get attr frame_name of enter[%] failed.", node->GetName().c_str());
+        GELOGE(FAILED, "Get attr frame_name of enter[%s] failed.", node->GetName().c_str());
         return FAILED;
       }
       frame_enter[frame_name].emplace_back(node);
@@ -850,19 +850,19 @@ NodePtr MultiBatchGraphCopyer::FindSwitchnNodeForDataEdge(const OutDataAnchorPtr
   if (is_getnext_sink_data) {
     auto output_idx = data_out_anchor->GetIdx();
     size_t referenced_index = 0;
-    GELOGI("The output idx %zu has %zu referenced nums.", output_idx, data_out_anchor->GetPeerInDataAnchors().size());
+    GELOGI("The output idx %d has %zu referenced nums.", output_idx, data_out_anchor->GetPeerInDataAnchors().size());
     for (const auto &peer_in_anchor : data_out_anchor->GetPeerInDataAnchors()) {
       if (peer_in_anchor->GetOwnerNode()->GetOpDesc() == nullptr) {
         GELOGE(INTERNAL_ERROR, "Op desc should not be nullptr.");
         return nullptr;
       }
       if (getnext_nodes_to_switchn_.at(output_idx).empty()) {
-        GELOGI("Output idx %zu of %s is static output.", output_idx, data_node->GetName().c_str());
+        GELOGI("Output idx %d of %s is static output.", output_idx, data_node->GetName().c_str());
         return nullptr;
       }
       if (output_idx >= static_cast<int>(getnext_nodes_to_switchn_.size()) ||
          referenced_index >= getnext_nodes_to_switchn_.at(output_idx).size()) {
-        GELOGE(INTERNAL_ERROR, "Output idx is %zu, referenced index is %zu", output_idx, referenced_index);
+        GELOGE(INTERNAL_ERROR, "Output idx is %d, referenced index is %zu", output_idx, referenced_index);
         return nullptr;
       }
       if (peer_in_anchor->GetOwnerNode()->GetOpDesc()->GetName() == origin_node->GetName()) {
@@ -1203,7 +1203,7 @@ Status MultiBatchGraphCopyer::InsertSwitchNAndUpdateMaxShape(const NodePtr &node
 
     for (size_t i = 0; i < getnext_sink_dynamic_out_mapping_.size(); ++i) {
       if(UpdateMaxShapeToData(node, i) != SUCCESS) {
-        GELOGE(PARAM_INVALID, "Failed to update max shape of %zu out anchor", node->GetName().c_str(), i);
+        GELOGE(PARAM_INVALID, "Failed to update %s max shape of %zu out anchor", node->GetName().c_str(), i);
         return PARAM_INVALID;
       }
     }
diff --git a/ge/graph/preprocess/multi_batch_options.cc b/ge/graph/preprocess/multi_batch_options.cc
index 8aab0981..84f38fa6 100644
--- a/ge/graph/preprocess/multi_batch_options.cc
+++ b/ge/graph/preprocess/multi_batch_options.cc
@@ -435,7 +435,7 @@ Status CheckDynamicParams(const vector<vector<int64_t>> &shapes) {
         "E10035", {"shapesize", "minshapesize"}, {std::to_string(shapes.size()), std::to_string(kMinShapesCount - 1)});
     GELOGE(PARAM_INVALID,
            "Input parameter[--dynamic_batch_size, --dynamic_image_size or --dynamic_dims]'s "
-           "value size [%zu] must be greater than [%zu].",
+           "value size [%zu] must be greater than [%d].",
            shapes.size(), kMinShapesCount - 1);
     return PARAM_INVALID;
   }
@@ -444,7 +444,7 @@ Status CheckDynamicParams(const vector<vector<int64_t>> &shapes) {
         "E10036", {"shapesize", "maxshapesize"}, {std::to_string(shapes.size()), std::to_string(kMaxShapesCount + 1)});
     GELOGE(PARAM_INVALID,
            "Input parameter[--dynamic_batch_size, --dynamic_image_size or --dynamic_dims]'s "
-           "value size [%zu] must be less than [%zu].",
+           "value size [%zu] must be less than [%d].",
            shapes.size(), kMaxShapesCount + 1);
     return PARAM_INVALID;
   }
diff --git a/ge/host_cpu_engine/ops_kernel_store/op/host_op.cc b/ge/host_cpu_engine/ops_kernel_store/op/host_op.cc
index a6e00f4a..7f709f03 100644
--- a/ge/host_cpu_engine/ops_kernel_store/op/host_op.cc
+++ b/ge/host_cpu_engine/ops_kernel_store/op/host_op.cc
@@ -32,5 +32,8 @@ REGISTER_OP_CREATOR(Assign, HostOp);
 REGISTER_OP_CREATOR(RandomUniform, HostOp);
 REGISTER_OP_CREATOR(Add, HostOp);
 REGISTER_OP_CREATOR(Mul, HostOp);
+REGISTER_OP_CREATOR(ConcatV2, HostOp);
+REGISTER_OP_CREATOR(Data, HostOp);
+REGISTER_OP_CREATOR(Fill, HostOp);
 }  // namespace host_cpu
 }  // namespace ge
diff --git a/ge/host_cpu_engine/proto/task.proto b/ge/host_cpu_engine/proto/task.proto
index d0c09840..0da5631e 100644
--- a/ge/host_cpu_engine/proto/task.proto
+++ b/ge/host_cpu_engine/proto/task.proto
@@ -57,6 +57,7 @@ message TaskDef {
     LabelSetDef label_set = 37;
     LabelGotoExDef label_goto_ex = 38;
     LabelSwitchByIndexDef label_switch_by_index = 39;
+    KernelDefWithHandle kernel_with_handle = 40;
 }
 
 message KernelDef {
@@ -74,6 +75,19 @@ message KernelDef {
     uint32 kernel_ext_info_size = 19;
 }
 
+message KernelDefWithHandle {
+    KernelContext context = 1;
+
+    uint64 handle = 10;
+    string dev_func = 11;
+    uint32 block_dim = 12;
+    uint32 args_size = 13;
+    bytes args = 14;
+    bytes sm_desc = 15;
+    string original_kernel_key = 16;
+    string node_info = 17;
+}
+
 message KernelContext {
     uint32 kernel_type = 1;
     uint32 op_id = 2;                              // OP type in CCE
diff --git a/ge/host_kernels/dynamic_stitch_kernel.cc b/ge/host_kernels/dynamic_stitch_kernel.cc
index 32611b03..3037934e 100644
--- a/ge/host_kernels/dynamic_stitch_kernel.cc
+++ b/ge/host_kernels/dynamic_stitch_kernel.cc
@@ -126,10 +126,10 @@ void DynamicStitchKernel::ComputeMergedShape(const vector<ConstGeTensorPtr> &inp
   vector<int64_t> merged_dim_vec = {merged_first_dim + 1};
   if (step > 0) {
     merged_dim_vec.emplace_back(step);
-    GELOGD("merged_shape is [ %ld, %ld].", merged_first_dim, step);
+    GELOGD("merged_shape is [ %d, %ld].", merged_first_dim, step);
   }
   merged_shape = GeShape(merged_dim_vec);
-  GELOGD("merged_shape is [ %ld ].", merged_first_dim);
+  GELOGD("merged_shape is [ %d ].", merged_first_dim);
 }
 
 Status DynamicStitchKernel::GenData(const vector<ConstGeTensorPtr> &input, GeTensorPtr &output_ptr) {
@@ -196,14 +196,14 @@ Status DynamicStitchKernel::StitchDataFollowIndices(int64_t data_unit, const vec
       // if index repeated, need new data replace old data , so give more allowance
       if (indices_set.find(input_indices[j]) != indices_set.end()) {
         if (ge::CheckInt64AddOverflow(input_indices[j], data_unit) != SUCCESS) {
-          GELOGW("Check int64 mul overflow failed. Indices is %ld, data_unit is %ld.", input_indices[j], data_unit);
+          GELOGW("Check int64 mul overflow failed. Indices is %d, data_unit is %ld.", input_indices[j], data_unit);
           return NOT_CHANGED;
         }
         allowance += data_unit;
       }
       indices_set.insert(input_indices[j]);
       if (!CheckInt64MulOverflow(input_indices[j], data_unit)) {
-        GELOGW("Check int64 mul overflow failed. Indices is %ld, data_unit is %ld.", input_indices[j], data_unit);
+        GELOGW("Check int64 mul overflow failed. Indices is %d, data_unit is %ld.", input_indices[j], data_unit);
         return NOT_CHANGED;
       }
       dst_offset = input_indices[j] * data_unit;
diff --git a/ge/host_kernels/pack_kernel.cc b/ge/host_kernels/pack_kernel.cc
index 476005ef..bf7a2a1f 100644
--- a/ge/host_kernels/pack_kernel.cc
+++ b/ge/host_kernels/pack_kernel.cc
@@ -124,7 +124,7 @@ Status PackKernel::ValidateInputs(const ge::OpDescPtr &op_desc_ptr, const std::v
     int64_t num = 1;
     for (auto dim : dst_shape.GetDims()) {
       if (dim < 0) {
-        GELOGW("Invalid dim ld% in the shape %s", dim, formats::ShapeToString(shape).c_str());
+        GELOGW("Invalid dim %ld in the shape %s", dim, formats::ShapeToString(shape).c_str());
         return NOT_CHANGED;
       }
       num *= dim;
diff --git a/ge/host_kernels/rank_kernel.cc b/ge/host_kernels/rank_kernel.cc
index 1de9478c..b246b976 100755
--- a/ge/host_kernels/rank_kernel.cc
+++ b/ge/host_kernels/rank_kernel.cc
@@ -42,7 +42,7 @@ Status RankKernel::Compute(const NodePtr &node, std::vector<GeTensorPtr> &v_outp
   GE_CHECK_NOTNULL(op_desc);
   size_t input_node_size = op_desc->GetInputsSize();
   if (input_node_size != kRankInputSize) {
-    GELOGW("input node size must be %d", kRankInputSize);
+    GELOGW("input node size must be %zu", kRankInputSize);
     return NOT_CHANGED;
   }
 
diff --git a/ge/host_kernels/strided_slice_kernel.cc b/ge/host_kernels/strided_slice_kernel.cc
index b1bfb10a..c7e4b2c8 100644
--- a/ge/host_kernels/strided_slice_kernel.cc
+++ b/ge/host_kernels/strided_slice_kernel.cc
@@ -250,16 +250,16 @@ Status StridedSliceKernel::InitParamWithAttrs(const std::vector<ConstGeTensorPtr
       end_i = x_dims.at(i);
       stride_i = 1;
     }
-    GELOGD("Before mask calculate. Begin is : %d\t,end is : %d\t stride is : %d\t x_dim_i is : %d.", begin_i, end_i,
-           stride_i, x_dims.at(i));
+    GELOGD("Before mask calculate. Begin is : %ld\t,end is : %ld\t stride is : %ld\t x_dim_i is : %ld.",
+           begin_i, end_i, stride_i, x_dims.at(i));
     auto ret = MaskCal(i, begin_i, end_i, x_dims.at(i));
     if (ret != SUCCESS) {
       GELOGW("MaskCal failed, because of data overflow.");
       return NOT_CHANGED;
     }
     int64_t dim_final;
-    GELOGD("Before stride calculate. Begin is : %d\t,end is : %d\t stride is : %d\t x_dim_i is : %d.", begin_i, end_i,
-           stride_i, x_dims.at(i));
+    GELOGD("Before stride calculate. Begin is : %ld\t,end is : %ld\t stride is : %ld\t x_dim_i is : %ld.",
+           begin_i, end_i, stride_i, x_dims.at(i));
     (void) StrideCal(x_dims.at(i), begin_i, end_i, stride_i, dim_final);
     output_dims.push_back(dim_final);
     input_dims.push_back(x_dims.at(i));
diff --git a/ge/hybrid/executor/hybrid_model_async_executor.cc b/ge/hybrid/executor/hybrid_model_async_executor.cc
index e9881224..67c85460 100644
--- a/ge/hybrid/executor/hybrid_model_async_executor.cc
+++ b/ge/hybrid/executor/hybrid_model_async_executor.cc
@@ -15,7 +15,7 @@
  */
 
 #include "hybrid/executor/hybrid_model_async_executor.h"
-#include "graph/load/new_model_manager/model_utils.h"
+#include "graph/load/model_manager/model_utils.h"
 #include "graph/utils/tensor_utils.h"
 #include "graph/utils/type_utils.h"
 #include "graph/ge_context.h"
@@ -59,6 +59,7 @@ Status HybridModelAsyncExecutor::Start(const std::shared_ptr<ModelListener> &lis
   run_flag_ = true;
   listener_ = listener;
   future_ = std::async(std::launch::async, [&]() -> Status {
+    GetThreadLocalContext() = *executor_->GetContext()->ge_context;
     GetContext().SetSessionId(executor_->GetContext()->session_id);
     return RunInternal();
   });
@@ -220,7 +221,7 @@ Status HybridModelAsyncExecutor::PrepareInputs(const InputData &current_data, Hy
       auto &tensor_desc = input_tensor_desc_[input_index];
       tensor_desc->SetShape(GeShape(current_data.shapes[input_index]));
       args.input_desc[input_index] = tensor_desc;
-      GELOGD("Update shape of input[%u] to [%s]", input_index, tensor_desc->MutableShape().ToString().c_str());
+      GELOGD("Update shape of input[%zu] to [%s]", input_index, tensor_desc->MutableShape().ToString().c_str());
       GE_CHK_GRAPH_STATUS_RET(TensorUtils::GetTensorMemorySizeInBytes(*tensor_desc, tensor_size),
                               "Failed to calc tensor size, index = %zu, shape = [%s]",
                               input_index,
@@ -229,11 +230,15 @@ Status HybridModelAsyncExecutor::PrepareInputs(const InputData &current_data, Hy
     }
 
     GE_CHECK_GE(tensor_size, 0);
-    auto tensor_buffer = TensorBuffer::Create(allocator, tensor_size);
+    AllocationAttr attr;
+    if (GetContext().GetHostExecFlag()) {
+      attr.SetMemType(HOST_DDR);
+    }
+    auto tensor_buffer = TensorBuffer::Create(allocator, tensor_size, &attr);
     GE_CHECK_NOTNULL(tensor_buffer);
     args.inputs.emplace_back(std::shared_ptr<TensorBuffer>(tensor_buffer.release()));
 
-    GELOGD("To copy input data for input[%u]", input_index);
+    GELOGD("To copy input data for input[%zu]", input_index);
     const DataBuffer &data_buf = blobs[input_index];
     auto mem_size = static_cast<uint64_t>(tensor_size);
     GE_CHK_BOOL_RET_STATUS(mem_size >= data_buf.length,
@@ -242,7 +247,7 @@ Status HybridModelAsyncExecutor::PrepareInputs(const InputData &current_data, Hy
                            data_buf.length,
                            mem_size);
 
-    GELOGI("[IMAS]CopyPlainData memcpy graph_%u type[F] output[%u] memaddr[%p] mem_size[%u] datasize[%lu]",
+    GELOGI("[IMAS]CopyPlainData memcpy graph_%u type[F] output[%zu] memaddr[%p] mem_size[%zu] datasize[%lu]",
            model_->root_runtime_param_.graph_id,
            input_index,
            args.inputs[input_index].GetData(),
diff --git a/ge/hybrid/executor/hybrid_model_async_executor.h b/ge/hybrid/executor/hybrid_model_async_executor.h
index 21d2d033..a69cc45f 100644
--- a/ge/hybrid/executor/hybrid_model_async_executor.h
+++ b/ge/hybrid/executor/hybrid_model_async_executor.h
@@ -21,7 +21,7 @@
 #include <future>
 #include "external/ge/ge_api_error_codes.h"
 #include "external/ge/ge_api_types.h"
-#include "graph/load/new_model_manager/data_inputer.h"
+#include "graph/load/model_manager/data_inputer.h"
 #include "hybrid/executor/hybrid_model_executor.h"
 #include "runtime/stream.h"
 
diff --git a/ge/hybrid/executor/hybrid_model_executor.h b/ge/hybrid/executor/hybrid_model_executor.h
index 6299d4ff..6b2e52b4 100644
--- a/ge/hybrid/executor/hybrid_model_executor.h
+++ b/ge/hybrid/executor/hybrid_model_executor.h
@@ -17,7 +17,7 @@
 #ifndef GE_HYBRID_EXECUTOR_HYBRID_MODEL_EXECUTOR_H_
 #define GE_HYBRID_EXECUTOR_HYBRID_MODEL_EXECUTOR_H_
 #include "common/thread_pool.h"
-#include "graph/load/new_model_manager/data_inputer.h"
+#include "graph/load/model_manager/data_inputer.h"
 #include "hybrid/executor/hybrid_execution_context.h"
 #include "hybrid/executor/rt_callback_manager.h"
 #include "hybrid/executor/subgraph_executor.h"
diff --git a/ge/hybrid/executor/node_state.cc b/ge/hybrid/executor/node_state.cc
index 171ddaf3..00921705 100644
--- a/ge/hybrid/executor/node_state.cc
+++ b/ge/hybrid/executor/node_state.cc
@@ -188,6 +188,14 @@ Status NodeState::WaitForPrepareDone() {
   return SUCCESS;
 }
 
+void NodeState::SetTaskContext(std::shared_ptr<TaskContext> &task_context) {
+  task_context_ = task_context;
+}
+
+std::shared_ptr<TaskContext> NodeState::GetTaskContext() {
+  return task_context_;
+}
+
 Status ShapeFuture::Get(GeShape &ori_shape, GeShape &shape) {
   GELOGD("Start to wait node: %s for getting shape", src_node_->GetName().c_str());
   HYBRID_CHK_STATUS_RET(subgraph_context_->Await(src_node_), "cancelled");
diff --git a/ge/hybrid/executor/node_state.h b/ge/hybrid/executor/node_state.h
index 02a362b4..c68a19ac 100644
--- a/ge/hybrid/executor/node_state.h
+++ b/ge/hybrid/executor/node_state.h
@@ -29,6 +29,7 @@ namespace hybrid {
 class NodeTask;
 struct GraphExecutionContext;
 class SubgraphContext;
+class TaskContext;
 
 class ShapeFuture {
  public:
@@ -103,6 +104,9 @@ struct NodeState {
 
   Status AwaitInputTensors(GraphExecutionContext &context) const;
 
+  void SetTaskContext(std::shared_ptr<TaskContext> &task_context);
+  std::shared_ptr<TaskContext> GetTaskContext();
+
  private:
   const NodeItem *node_item_ = nullptr;
   std::shared_ptr<NodeTask> kernel_task_ = nullptr;
@@ -110,6 +114,7 @@ struct NodeState {
   OpDescPtr op_desc_;
   ShapeInferenceState shape_inference_state_;
   SubgraphContext *subgraph_context_;
+  std::shared_ptr<TaskContext> task_context_ = nullptr;
   std::mutex mu_;
 };
 
diff --git a/ge/hybrid/executor/subgraph_executor.cc b/ge/hybrid/executor/subgraph_executor.cc
index f7b063c7..f8f122b1 100644
--- a/ge/hybrid/executor/subgraph_executor.cc
+++ b/ge/hybrid/executor/subgraph_executor.cc
@@ -231,6 +231,15 @@ Status SubgraphExecutor::PrepareNodes() {
         } else {
           node_state->SetKernelTask(node_item.kernel_task);
         }
+        auto unique_task_context = TaskContext::Create(*node_state->GetNodeItem(), context_, subgraph_context_.get());
+        GE_CHECK_NOTNULL(unique_task_context);
+        const auto &task = node_state->GetKernelTask();
+        if (task == nullptr) {
+          GELOGE(INTERNAL_ERROR, "[%s] NodeTask is null.", node_state->GetName().c_str());
+          return INTERNAL_ERROR;
+        }
+        auto shared_task_context = std::shared_ptr<TaskContext>(unique_task_context.release());
+        node_state->SetTaskContext(shared_task_context);
       }
     }
 
@@ -267,6 +276,19 @@ Status SubgraphExecutor::PrepareForExecution(GraphExecutionContext *ctx, NodeSta
   } else {
     node_state.SetKernelTask(node_item.kernel_task);
   }
+  auto unique_task_context = TaskContext::Create(*node_state.GetNodeItem(), context_, subgraph_context_.get());
+  GE_CHECK_NOTNULL(unique_task_context);
+  const auto &task = node_state.GetKernelTask();
+  if (task == nullptr) {
+    GELOGE(INTERNAL_ERROR, "[%s] NodeTask is null.", node_state.GetName().c_str());
+    return INTERNAL_ERROR;
+  }
+  auto shared_task_context = std::shared_ptr<TaskContext>(unique_task_context.release());
+  node_state.SetTaskContext(shared_task_context);
+  GE_CHK_RT_RET(rtCtxSetCurrent(ctx->rt_context));
+  RECORD_COMPILE_EVENT(ctx, node_item.NodeName().c_str(), "[UpdateTilingData] start");
+  GE_CHK_STATUS_RET_NOLOG(task->UpdateTilingData(*shared_task_context)); // update op_desc before alloc ws
+  RECORD_COMPILE_EVENT(ctx, node_item.NodeName().c_str(), "[UpdateTilingData] end");
   return SUCCESS;
 }
 
@@ -295,10 +317,9 @@ Status SubgraphExecutor::LaunchTasks() {
     GE_CHK_STATUS_RET_NOLOG(node_state->WaitForPrepareDone());
 
     GELOGD("[%s] Start to execute.", node_state->GetName().c_str());
-    auto task_context = TaskContext::Create(*node_state->GetNodeItem(), context_, subgraph_context_.get());
-    GE_CHECK_NOTNULL(task_context);
-    task_context->SetForceInferShape(force_infer_shape_);
-    auto shared_task_context = std::shared_ptr<TaskContext>(task_context.release());
+    auto shared_task_context = node_state->GetTaskContext();
+    GE_CHECK_NOTNULL(shared_task_context);
+    shared_task_context->SetForceInferShape(force_infer_shape_);
     HYBRID_CHK_STATUS_RET(ExecutionEngine::ExecuteAsync(*node_state, shared_task_context, *context_),
                           "[%s] Execute node failed.",
                           node_state->GetName().c_str());
diff --git a/ge/hybrid/executor/subgraph_executor.h b/ge/hybrid/executor/subgraph_executor.h
index d1949947..4523e2c4 100644
--- a/ge/hybrid/executor/subgraph_executor.h
+++ b/ge/hybrid/executor/subgraph_executor.h
@@ -75,7 +75,7 @@ class SubgraphExecutor {
   Status GetOutputs(std::vector<TensorValue> &outputs, std::vector<ConstGeTensorDescPtr> &output_desc);
 
  private:
-  static Status PrepareForExecution(GraphExecutionContext *ctx, NodeState &node_state);
+  Status PrepareForExecution(GraphExecutionContext *ctx, NodeState &node_state);
   static Status InferShape(ShapeInferenceEngine *shape_inference_engine, NodeState &node_state);
   Status Init(const std::vector<TensorValue> &inputs,
               const std::vector<ConstGeTensorDescPtr> &input_desc);
diff --git a/ge/hybrid/executor/worker/execution_engine.cc b/ge/hybrid/executor/worker/execution_engine.cc
index b5de2a70..a6386b27 100755
--- a/ge/hybrid/executor/worker/execution_engine.cc
+++ b/ge/hybrid/executor/worker/execution_engine.cc
@@ -159,27 +159,9 @@ Status NodeDoneCallback::GetTaskDescInfo(const NodePtr node, const HybridModel *
   }
 
   GELOGD("GetTaskDescInfo of node [%s] start.", node->GetName().c_str());
-  auto op_desc = node->GetOpDesc();
-  std::string op_name = op_desc->GetName();
-  std::string dynamic_model_name = model->GetModelName();
-  uint32_t task_id = context_->GetTaskId();
-  uint32_t stream_id = context_->GetStreamId();
-  TaskDescInfo tmp_task_desc_info;
-  tmp_task_desc_info.model_name = dynamic_model_name;
-  tmp_task_desc_info.op_name = op_name;
-  tmp_task_desc_info.block_dim = 0;
-  auto task_defs = model->GetTaskDefs(node);
-  if (task_defs != nullptr && (*task_defs).size() > 0) {
-    const auto &task_def = (*task_defs)[0];
-    tmp_task_desc_info.block_dim = task_def.kernel().block_dim();
-  }
-  tmp_task_desc_info.task_id = task_id;
-  tmp_task_desc_info.stream_id = stream_id;
-  tmp_task_desc_info.shape_type = "dynamic";
-  tmp_task_desc_info.cur_iter_num = graph_context_->iteration;
-  GELOGD("GetTaskDescInfo of node [%s] end, task_id[%u], stream_id[%u]",
-         node->GetName().c_str(), task_id, stream_id);
-  task_desc_info.emplace_back(tmp_task_desc_info);
+  task_desc_info = context_->GetProfilingTaskDescInfo();
+  context_->ClearProfilingTaskDescInfo();
+
   return SUCCESS;
 }
 
@@ -189,22 +171,18 @@ Status NodeDoneCallback::GetGraphDescInfo(const NodePtr node, const HybridModel
   GE_CHECK_NOTNULL(model);
 
   GELOGD("GetComputeGraphInfo of node [%s] start.", node->GetName().c_str());
+  compute_graph_info = context_->GetProfilingGraphDescInfo();
+  context_->ClearProfilingGraphDescInfo();
 
-  std::string dynamic_model_name = model->GetModelName();
   auto op_desc = node->GetOpDesc();
-  if (op_desc == nullptr) {
-    GELOGE(PARAM_INVALID, "op_desc is nullptr.");
-    return PARAM_INVALID;
-  }
-
-  auto op_mode = static_cast<uint32_t>(domi::ImplyType::INVALID);
-  if (AttrUtils::GetInt(op_desc, ATTR_NAME_IMPLY_TYPE, op_mode) &&
-      op_mode == static_cast<uint32_t>(domi::ImplyType::TVM)) {
-    ComputeGraphDescInfo tmp_compute_graph_info;
-    tmp_compute_graph_info.model_name = dynamic_model_name;
-    tmp_compute_graph_info.op_name = op_desc->GetName();
-    tmp_compute_graph_info.op_type = op_desc->GetType();
-
+  GE_CHECK_NOTNULL(op_desc);
+  for (auto &tmp_compute_graph_info : compute_graph_info) {
+    // default
+    if (op_desc->GetAllInputsSize() == 0) {
+      tmp_compute_graph_info.input_format = { FORMAT_NULL };
+      tmp_compute_graph_info.input_shape = { {0} };
+      tmp_compute_graph_info.input_data_type = { DT_UNDEFINED };
+    }
     for (size_t i = 0; i < op_desc->GetAllInputsSize(); ++i) {
       GeTensorDescPtr input_desc = op_desc->MutableInputDesc(i);
       if (input_desc == nullptr) {
@@ -215,17 +193,19 @@ Status NodeDoneCallback::GetGraphDescInfo(const NodePtr node, const HybridModel
       tmp_compute_graph_info.input_data_type.emplace_back(input_desc->GetDataType());
     }
 
+    if (op_desc->GetOutputsSize() == 0) {
+      tmp_compute_graph_info.output_format = { FORMAT_NULL };
+      tmp_compute_graph_info.output_shape = { {0} };
+      tmp_compute_graph_info.output_data_type = { DT_UNDEFINED };
+    }
     for (size_t j = 0; j < op_desc->GetOutputsSize(); ++j) {
       GeTensorDesc output_desc = op_desc->GetOutputDesc(j);
       tmp_compute_graph_info.output_format.emplace_back(output_desc.GetFormat());
       tmp_compute_graph_info.output_shape.emplace_back(output_desc.GetShape().GetDims());
       tmp_compute_graph_info.output_data_type.emplace_back(output_desc.GetDataType());
     }
-    tmp_compute_graph_info.task_id = context_->GetTaskId();
-    tmp_compute_graph_info.stream_id = context_->GetStreamId();
-    compute_graph_info.emplace_back(tmp_compute_graph_info);
-    GELOGD("GetComputeGraphInfo of node [%s] end.", node->GetName().c_str());
   }
+
   return SUCCESS;
 }
 
@@ -247,7 +227,6 @@ Status NodeDoneCallback::ProfilingReport() {
 
   GELOGD("ProfilingReport of node [%s] model [%s] start.", node->GetName().c_str(), model->GetModelName().c_str());
   std::vector<TaskDescInfo> task_desc_info;
-  TaskDescInfo tmp_task_desc_info;
   auto profiling_ret = GetTaskDescInfo(node, model, task_desc_info);
   if (profiling_ret != RT_ERROR_NONE) {
     GELOGE(profiling_ret, "Get task info of node[%s] failed.", node->GetName().c_str());
diff --git a/ge/hybrid/executor/worker/shape_inference_engine.cc b/ge/hybrid/executor/worker/shape_inference_engine.cc
index 56ae3ea3..46ee6bd6 100755
--- a/ge/hybrid/executor/worker/shape_inference_engine.cc
+++ b/ge/hybrid/executor/worker/shape_inference_engine.cc
@@ -68,7 +68,6 @@ Status ShapeInferenceEngine::InferShape(NodeState &node_state) {
   // Do shape inference
   GELOGD("[%s] Start to invoke InferShapeAndType", node_item.NodeName().c_str());
   {
-    std::lock_guard<std::mutex> lk(mu_);
     RECORD_SHAPE_INFERENCE_EVENT(execution_context_, node_item.NodeName().c_str(), "[InferShapeAndType] Start");
     GE_CHK_STATUS_RET(ShapeRefiner::InferShapeAndTypeForRunning(node_item.node, true),
                       "Invoke InferShapeAndType failed.");
diff --git a/ge/hybrid/hybrid_davinci_model.h b/ge/hybrid/hybrid_davinci_model.h
index 5349390c..369c732a 100644
--- a/ge/hybrid/hybrid_davinci_model.h
+++ b/ge/hybrid/hybrid_davinci_model.h
@@ -19,7 +19,7 @@
 
 #include <memory>
 #include "external/ge/ge_api_error_codes.h"
-#include "graph/load/new_model_manager/data_inputer.h"
+#include "graph/load/model_manager/data_inputer.h"
 #include "model/ge_root_model.h"
 
 namespace ge {
diff --git a/ge/hybrid/model/hybrid_model.cc b/ge/hybrid/model/hybrid_model.cc
index 91b6a549..7e5d8fe5 100644
--- a/ge/hybrid/model/hybrid_model.cc
+++ b/ge/hybrid/model/hybrid_model.cc
@@ -17,7 +17,7 @@
 #include "hybrid_model.h"
 #include <vector>
 #include "graph/debug/ge_attr_define.h"
-#include "graph/load/new_model_manager/model_utils.h"
+#include "graph/load/model_manager/model_utils.h"
 #include "graph/utils/graph_utils.h"
 #include "graph/utils/node_utils.h"
 #include "graph/utils/tensor_utils.h"
diff --git a/ge/hybrid/model/hybrid_model.h b/ge/hybrid/model/hybrid_model.h
index e521b776..72495cad 100644
--- a/ge/hybrid/model/hybrid_model.h
+++ b/ge/hybrid/model/hybrid_model.h
@@ -21,8 +21,8 @@
 #include <queue>
 #include <memory>
 #include "framework/common/ge_inner_error_codes.h"
-#include "graph/load/new_model_manager/data_inputer.h"
-#include "graph/load/new_model_manager/task_info/task_info.h"
+#include "graph/load/model_manager/data_inputer.h"
+#include "graph/load/model_manager/task_info/task_info.h"
 #include "graph/node.h"
 #include "hybrid/common/tensor_value.h"
 #include "hybrid/model/node_item.h"
diff --git a/ge/hybrid/model/hybrid_model_builder.cc b/ge/hybrid/model/hybrid_model_builder.cc
index d1f61985..b314c6a7 100755
--- a/ge/hybrid/model/hybrid_model_builder.cc
+++ b/ge/hybrid/model/hybrid_model_builder.cc
@@ -20,8 +20,8 @@
 #include "graph/ge_context.h"
 #include "graph/build/memory/var_mem_assign_util.h"
 #include "graph/debug/ge_attr_define.h"
-#include "graph/load/new_model_manager/model_utils.h"
-#include "graph/load/new_model_manager/model_manager.h"
+#include "graph/load/model_manager/model_utils.h"
+#include "graph/load/model_manager/model_manager.h"
 #include "graph/manager/graph_var_manager.h"
 #include "graph/manager/host_mem_manager.h"
 #include "graph/manager/trans_var_data_utils.h"
@@ -772,7 +772,12 @@ Status HybridModelBuilder::VarNodeToTensor(const NodePtr &var_node, std::unique_
                     var_name.c_str(),
                     hybrid_model_.GetSessionId());
 
-  uint8_t *dev_mem = var_manager_->GetVarMemoryAddr(var_logic, RT_MEMORY_HBM);
+  rtMemType_t memory_type = RT_MEMORY_HBM;
+  uint32_t mem_type = 0;
+  if (AttrUtils::GetInt(var_node->GetOpDesc(), ATTR_OUTPUT_MEMORY_TYPE, mem_type) && (mem_type == 1)) {
+    memory_type = RT_MEMORY_RDMA_HBM;
+  }
+  uint8_t *dev_mem = var_manager_->GetVarMemoryAddr(var_logic, memory_type);
   if (dev_mem == nullptr) {
     GELOGE(INTERNAL_ERROR,
            "Failed to copy var %s from device, cant not get "
@@ -934,7 +939,7 @@ Status HybridModelBuilder::InitVariableTensors() {
       GELOGE(MEMALLOC_FAILED, "Malloc host memory for an existed GeTensor failed.");
       return MEMALLOC_FAILED;
     }
-    GELOGD("Host variable [%s] malloc success, size=%lld.", it.first.c_str(), tensor_size);
+    GELOGD("Host variable [%s] malloc success, size=%ld.", it.first.c_str(), tensor_size);
 
     std::unique_ptr<TensorValue> tensor(new (std::nothrow) TensorValue(mem_info.host_aligned_ptr->MutableGet(),
                                                                        tensor_size));
@@ -1603,16 +1608,19 @@ Status HybridModelBuilder::CreateProfilingNodeBefore(GraphItem &graph_item, cons
   GE_CHECK_NOTNULL(compute_graph);
 
   NodePtr node_ptr = nullptr;
-  vector<domi::TaskDef> task_def_list;
+  map<NodePtr, vector<domi::TaskDef>> node_task_map;
   // create fp node
   bool is_insert_fp_profiling_task = false;
   (void)ge::AttrUtils::GetBool(op_desc, ATTR_NAME_INSERT_FP_PROFILILNG_TASK, is_insert_fp_profiling_task);
   if (is_insert_fp_profiling_task) {
+    vector<domi::TaskDef> task_def_list;
     (void)GenerateFpProfilingTask(op_desc, task_def_list);
     auto fp_desc = MakeShared<OpDesc>(kProfilingFpNode, PROFILINGTRAININGTRACE);
     GE_CHECK_NOTNULL(fp_desc);
     fp_desc->SetOpKernelLibName(kEngineNameRts);
     node_ptr = compute_graph->AddNode(fp_desc);
+    GE_CHECK_NOTNULL(node_ptr);
+    node_task_map[node_ptr] = task_def_list;
     GELOGD("Create fp profiling node success before.");
   }
   // creat all reduce start node
@@ -1620,6 +1628,7 @@ Status HybridModelBuilder::CreateProfilingNodeBefore(GraphItem &graph_item, cons
   (void)ge::AttrUtils::GetBool(op_desc, ATTR_NAME_INSERT_BP_PROFILILNG_TASK, is_insert_bp_profiling_task);
   bool is_all_reduce = (op_desc->GetType() == HCOMALLREDUCE || op_desc->GetType() == HVDCALLBACKALLREDUCE);
   if (is_all_reduce && is_insert_bp_profiling_task) {
+    vector<domi::TaskDef> task_def_list;
     int64_t log_id = 0;
     (void)ge::AttrUtils::GetInt(op_desc, ATTR_NAME_INSERT_PROFILILNG_TASK_LOG_ID, log_id);
     GELOGD("All reduce node profiling task log id: %ld before", log_id);
@@ -1629,18 +1638,24 @@ Status HybridModelBuilder::CreateProfilingNodeBefore(GraphItem &graph_item, cons
     GE_CHECK_NOTNULL(ar_desc_start);
     ar_desc_start->SetOpKernelLibName(kEngineNameRts);
     node_ptr = compute_graph->AddNode(ar_desc_start);
+    GE_CHECK_NOTNULL(node_ptr);
+    node_task_map[node_ptr] = task_def_list;
     GELOGD("Create all reduce start profiling node success before.");
   }
 
-  if (node_ptr != nullptr) {
-    for (const auto &task_def : task_def_list) {
-      hybrid_model_.task_defs_[node_ptr].emplace_back(task_def);
+  if (!node_task_map.empty()) {
+    for (const auto &node_task : node_task_map) {
+      NodePtr profiling_node = node_task.first;
+      vector<domi::TaskDef> task_def_lists = node_task.second;
+      for (const auto &task_def : task_def_lists) {
+        hybrid_model_.task_defs_[profiling_node].emplace_back(task_def);
+      }
+      NodeItem *node_item = nullptr;
+      GE_CHK_STATUS_RET_NOLOG(GetOrCreateNodeItem(profiling_node, &node_item));
+      node_item->input_start = 0;
+      node_item->output_start = 0;
+      graph_item.node_items_.emplace_back(node_item);
     }
-    NodeItem *node_item = nullptr;
-    GE_CHK_STATUS_RET_NOLOG(GetOrCreateNodeItem(node_ptr, &node_item));
-    node_item->input_start = 0;
-    node_item->output_start = 0;
-    graph_item.node_items_.emplace_back(node_item);
   } else {
     GELOGD("No need to create profiling node before.");
   }
@@ -1656,12 +1671,13 @@ Status HybridModelBuilder::CreateProfilingNodeAfter(GraphItem &graph_item, const
   GE_CHECK_NOTNULL(compute_graph);
 
   NodePtr node_ptr = nullptr;
-  vector<domi::TaskDef> task_def_list;
+  map<NodePtr, vector<domi::TaskDef>> node_task_map;
   // Create all reduce end node
   bool is_insert_bp_profiling_task = false;
   (void)ge::AttrUtils::GetBool(op_desc, ATTR_NAME_INSERT_BP_PROFILILNG_TASK, is_insert_bp_profiling_task);
   bool is_all_reduce = (op_desc->GetType() == HCOMALLREDUCE || op_desc->GetType() == HVDCALLBACKALLREDUCE);
   if (is_all_reduce && is_insert_bp_profiling_task) {
+    vector<domi::TaskDef> task_def_list;
     int64_t log_id = 0;
     (void)ge::AttrUtils::GetInt(op_desc, ATTR_NAME_INSERT_PROFILILNG_TASK_LOG_ID, log_id);
     GELOGD("All reduce node profiling task log id: %ld after", log_id);
@@ -1671,38 +1687,50 @@ Status HybridModelBuilder::CreateProfilingNodeAfter(GraphItem &graph_item, const
     GE_CHECK_NOTNULL(ar_desc_end);
     ar_desc_end->SetOpKernelLibName(kEngineNameRts);
     node_ptr = compute_graph->AddNode(ar_desc_end);
+    GE_CHECK_NOTNULL(node_ptr);
+    node_task_map[node_ptr] = task_def_list;
     GELOGD("Create all reduce end profiling node success after.");
   }
   // create bp node
   if (!is_all_reduce && is_insert_bp_profiling_task) {
+    vector<domi::TaskDef> task_def_list;
     (void) GenerateBpProfilingTask(op_desc, task_def_list);
     auto bp_op_desc = MakeShared<OpDesc>(kProfilingBpNode, PROFILINGTRAININGTRACE);
     GE_CHECK_NOTNULL(bp_op_desc);
     bp_op_desc->SetOpKernelLibName(kEngineNameRts);
     node_ptr = compute_graph->AddNode(bp_op_desc);
+    GE_CHECK_NOTNULL(node_ptr);
+    node_task_map[node_ptr] = task_def_list;
     GELOGD("Create bp profiling node success after.");
   }
   // create end node
   bool is_insert_end_profiling_task = false;
   (void)ge::AttrUtils::GetBool(op_desc, ATTR_NAME_INSERT_END_PROFILILNG_TASK, is_insert_end_profiling_task);
   if (is_insert_end_profiling_task) {
+    vector<domi::TaskDef> task_def_list;
     (void)GenerateEndProfilingTask(op_desc, task_def_list);
     auto end_desc = MakeShared<OpDesc>(kProfilingEndNode, PROFILINGTRAININGTRACE);
     GE_CHECK_NOTNULL(end_desc);
     end_desc->SetOpKernelLibName(kEngineNameRts);
     node_ptr = compute_graph->AddNode(end_desc);
+    GE_CHECK_NOTNULL(node_ptr);
+    node_task_map[node_ptr] = task_def_list;
     GELOGD("Create end profiling node success after.");
   }
 
-  if (node_ptr != nullptr) {
-    for (const auto &task_def : task_def_list) {
-      hybrid_model_.task_defs_[node_ptr].emplace_back(task_def);
+  if (!node_task_map.empty()) {
+    for (const auto &node_task : node_task_map) {
+      NodePtr profiling_node = node_task.first;
+      vector<domi::TaskDef> task_def_lists = node_task.second;
+      for (const auto &task_def : task_def_lists) {
+        hybrid_model_.task_defs_[profiling_node].emplace_back(task_def);
+      }
+      NodeItem *node_item = nullptr;
+      GE_CHK_STATUS_RET_NOLOG(GetOrCreateNodeItem(profiling_node, &node_item));
+      node_item->input_start = 0;
+      node_item->output_start = 0;
+      graph_item.node_items_.emplace_back(node_item);
     }
-    NodeItem *node_item = nullptr;
-    GE_CHK_STATUS_RET_NOLOG(GetOrCreateNodeItem(node_ptr, &node_item));
-    node_item->input_start = 0;
-    node_item->output_start = 0;
-    graph_item.node_items_.emplace_back(node_item);
   } else {
     GELOGD("No need to create profiling node after.");
   }
diff --git a/ge/hybrid/model/hybrid_model_builder.h b/ge/hybrid/model/hybrid_model_builder.h
index 55a19b6c..045bf3ef 100644
--- a/ge/hybrid/model/hybrid_model_builder.h
+++ b/ge/hybrid/model/hybrid_model_builder.h
@@ -21,7 +21,7 @@
 #include <queue>
 #include <memory>
 #include "framework/common/ge_inner_error_codes.h"
-#include "graph/load/new_model_manager/task_info/task_info.h"
+#include "graph/load/model_manager/task_info/task_info.h"
 #include "graph/node.h"
 #include "hybrid/model/hybrid_model.h"
 #include "hybrid/model/node_item.h"
diff --git a/ge/hybrid/node_executor/aicore/aicore_node_executor.cc b/ge/hybrid/node_executor/aicore/aicore_node_executor.cc
index 2abc5b03..cb5a7d4c 100755
--- a/ge/hybrid/node_executor/aicore/aicore_node_executor.cc
+++ b/ge/hybrid/node_executor/aicore/aicore_node_executor.cc
@@ -182,16 +182,17 @@ Status AiCoreNodeTask::ExecuteAsync(TaskContext &context, std::function<void()>
     }
     RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[AiCoreNodeLaunchKernel] Start");
     GE_CHK_STATUS_RET_NOLOG((*it)->LaunchKernel(context.GetStream()));
+    // save profiling data
     uint32_t task_id = 0;
     uint32_t stream_id = 0;
-    rtError_t rt_ret = rtGetTaskIdAndStreamID(&task_id, &stream_id);
+    rtError_t rt_ret = rtGetTaskIdAndStreamID(&task_id, &stream_id); // must be called after Launch kernel
     if (rt_ret != RT_ERROR_NONE) {
       GELOGE(rt_ret, "Get task_id and stream_id failed.");
-      return rt_ret;
+      return FAILED;
     }
-    context.SetTaskId(task_id);
-    context.SetStreamId(stream_id);
-    GELOGD("AiCore node[%s] task_id: %u, stream_id: %u.", context.GetNodeName(), task_id, stream_id);
+    GELOGD("Aicore node[%s] task_id: %u, stream_id: %u.", context.GetNodeName(), task_id, stream_id);
+    (void)context.SaveProfilingTaskDescInfo(task_id, stream_id, kTaskTypeAicore, (*it)->GetBlockDim());
+    (void)context.SaveProfilingGraphDescInfo(task_id, stream_id);
     RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[AiCoreNodeLaunchKernel] End");
     RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[AiCoreNodeLaunchKernel] End");
   }
diff --git a/ge/hybrid/node_executor/aicore/aicore_op_task.cc b/ge/hybrid/node_executor/aicore/aicore_op_task.cc
index 80ea579b..f1bd6466 100644
--- a/ge/hybrid/node_executor/aicore/aicore_op_task.cc
+++ b/ge/hybrid/node_executor/aicore/aicore_op_task.cc
@@ -19,7 +19,8 @@
 #include "framework/common/debug/log.h"
 #include "hybrid/executor/hybrid_execution_context.h"
 #include "hybrid/node_executor/aicore/aicore_task_builder.h"
-#include "graph/load/new_model_manager/tbe_handle_store.h"
+#include "graph/load/model_manager/tbe_handle_store.h"
+#include "graph/types.h"
 
 using optiling::OpRunInfo;
 
@@ -34,6 +35,23 @@ constexpr char const *kAttrAtomicOpParamSize = "atomic_op_para_size";
 Status AiCoreOpTask::Init(const OpDesc &op_desc, const domi::TaskDef &task_def) {
   GE_CHK_STATUS_RET_NOLOG(InitWithTaskDef(op_desc, task_def));
   GE_CHK_STATUS_RET_NOLOG(InitTilingInfo(op_desc));
+
+  GE_CHECK_LE(op_desc.GetOutputsSize(), static_cast<size_t>(INT_MAX));
+  int outputs_size = static_cast<int>(op_desc.GetOutputsSize());
+
+  for (int i = 0; i < outputs_size; ++i) {
+    const GeTensorDescPtr tensor_desc = op_desc.MutableOutputDesc(i);
+    if (tensor_desc == nullptr) {
+      GELOGW("Op: %s, Index: %d, Tensor Desc is null", op_desc.GetName().c_str(), i);
+      continue;
+    }
+
+    int32_t calc_type = 0;
+    bool ret = ge::AttrUtils::GetInt(tensor_desc, ATTR_NAME_MEMORY_SIZE_CALC_TYPE, calc_type);
+    if (ret && (calc_type == static_cast<int32_t>(ge::MemorySizeCalcType::ALWAYS_EMPTY))) {
+      output_indices_to_skip_.push_back(i);
+    }
+  }
   return SUCCESS;
 }
 
@@ -221,7 +239,8 @@ Status AiCoreOpTask::CalcTilingInfo(const NodePtr &node, OpRunInfo &tiling_info)
 }
 
 Status AiCoreOpTask::UpdateArgs(TaskContext &task_context) {
-  size_t expected_arg_count = task_context.NumInputs() + task_context.NumOutputs() + task_context.NumWorkspaces();
+  size_t expected_arg_count = task_context.NumInputs() + task_context.NumOutputs() + task_context.NumWorkspaces()
+                              - output_indices_to_skip_.size();
   if (tiling_buffer_ != nullptr) {
     ++expected_arg_count;
   }
@@ -244,6 +263,11 @@ Status AiCoreOpTask::UpdateArgs(TaskContext &task_context) {
   for (int i = 0; i < task_context.NumOutputs(); ++i) {
     const auto output = task_context.GetOutput(i);
     GE_CHECK_NOTNULL(output);
+    if (find(output_indices_to_skip_.begin(), output_indices_to_skip_.end(), i) != output_indices_to_skip_.end()) {
+      GELOGD("Node:%s output[%d] is an optional, the address don't need to be saved.",
+             task_context.GetNodeName(), i);
+      continue;
+    }
     arg_base_[index++] = reinterpret_cast<uintptr_t>(output->GetData());
   }
 
diff --git a/ge/hybrid/node_executor/aicore/aicore_op_task.h b/ge/hybrid/node_executor/aicore/aicore_op_task.h
index 5818f384..3f350531 100755
--- a/ge/hybrid/node_executor/aicore/aicore_op_task.h
+++ b/ge/hybrid/node_executor/aicore/aicore_op_task.h
@@ -48,6 +48,8 @@ class AiCoreOpTask {
 
   bool GetClearAtomic() const {return clear_atomic_;}
 
+  uint32_t GetBlockDim() const {return block_dim_;}
+
  protected:
   Status UpdateTilingInfo(TaskContext &context);
   virtual std::string GetKeyForOpParamSize() const;
@@ -70,6 +72,7 @@ class AiCoreOpTask {
   uint32_t args_size_ = 0;
   uint32_t block_dim_ = 1;
   bool clear_atomic_ = true;
+  std::vector<int> output_indices_to_skip_;
 };
 
 class AtomicAddrCleanOpTask : public AiCoreOpTask {
diff --git a/ge/hybrid/node_executor/aicpu/aicpu_ext_info.cc b/ge/hybrid/node_executor/aicpu/aicpu_ext_info.cc
index b8acbf0e..e9c7c604 100644
--- a/ge/hybrid/node_executor/aicpu/aicpu_ext_info.cc
+++ b/ge/hybrid/node_executor/aicpu/aicpu_ext_info.cc
@@ -29,8 +29,9 @@ constexpr int64_t kDimEndFlag = INT64_MIN;
 Status AicpuExtInfoHandler::Parse(const std::string &ext_info) {
   GELOGI("Node[%s] parse ext info start.", node_name_.c_str());
   if (ext_info.empty()) {
-    GELOGE(PARAM_INVALID, "Node[%s] parse ext info failed as ext info is empty.", node_name_.c_str());
-    return PARAM_INVALID;
+    GELOGE(ACL_ERROR_GE_PARAM_INVALID, "Node[%s] parse ext info failed as ext info is empty.",
+           node_name_.c_str());
+    return ACL_ERROR_GE_PARAM_INVALID;
   }
 
   ext_info_len_ = ext_info.size();
@@ -38,8 +39,8 @@ Status AicpuExtInfoHandler::Parse(const std::string &ext_info) {
   GE_CHECK_NOTNULL(ext_info_);
 
   if (memcpy_s(ext_info_.get(), ext_info_len_, ext_info.c_str(), ext_info.size()) != EOK) {
-    GELOGE(FAILED, "[%s] Failed to coy ext info", node_name_.c_str());
-    return FAILED;
+    GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, "[%s] Failed to coy ext info", node_name_.c_str());
+    return ACL_ERROR_GE_MEMORY_OPERATE_FAILED;
   }
 
   input_shape_and_type_.clear();
@@ -72,7 +73,7 @@ Status AicpuExtInfoHandler::Parse(const std::string &ext_info) {
     offset += aicpu_ext_info->infoLen;
   }
 
-  GE_CHK_BOOL_RET_STATUS(offset == ext_info_len_, PARAM_INVALID,
+  GE_CHK_BOOL_RET_STATUS(offset == ext_info_len_, ACL_ERROR_GE_PARAM_INVALID,
                          "Node[%s] ext_info format error, parse not reach end, offset=%zu, ext_info_len=%zu.",
                          node_name_.c_str(), offset, ext_info_len_);
   GELOGI("Node[%s] parse ext info end.", node_name_.c_str());
@@ -80,13 +81,13 @@ Status AicpuExtInfoHandler::Parse(const std::string &ext_info) {
 }
 
 Status AicpuExtInfoHandler::ParseExtShapeType(AicpuExtInfo *aicpu_ext_info) {
-  GE_CHK_BOOL_RET_STATUS(aicpu_ext_info->infoLen == sizeof(int32_t), PARAM_INVALID,
+  GE_CHK_BOOL_RET_STATUS(aicpu_ext_info->infoLen == sizeof(int32_t), ACL_ERROR_GE_PARAM_INVALID,
                          "Node[%s] parse ext shape type failed as infoLen must be %zu but %u.",
                          node_name_.c_str(), sizeof(int32_t), aicpu_ext_info->infoLen);
 
   auto type = reinterpret_cast<const int32_t *>(aicpu_ext_info->infoMsg);
 
-  GE_CHK_BOOL_RET_STATUS(*type == unknown_type_, PARAM_INVALID,
+  GE_CHK_BOOL_RET_STATUS(*type == unknown_type_, ACL_ERROR_GE_PARAM_INVALID,
                          "Node[%s] parse ext shape type failed as need %d but %d.",
                          node_name_.c_str(), unknown_type_, *type);
   GELOGI("Node[%s] parse ext shape type success infoLen=%u.", node_name_.c_str(), aicpu_ext_info->infoLen);
@@ -95,7 +96,7 @@ Status AicpuExtInfoHandler::ParseExtShapeType(AicpuExtInfo *aicpu_ext_info) {
 
 Status AicpuExtInfoHandler::ParseExtInputShape(AicpuExtInfo *aicpu_ext_info) {
   auto need_len = input_num_ * sizeof(AicpuShapeAndType);
-  GE_CHK_BOOL_RET_STATUS(aicpu_ext_info->infoLen == need_len, PARAM_INVALID,
+  GE_CHK_BOOL_RET_STATUS(aicpu_ext_info->infoLen == need_len, ACL_ERROR_GE_PARAM_INVALID,
                          "Node[%s] parse ext input shape failed as infoLen must be "
                          "input_num[%u]*sizeof(ShapeAndType)[%zu] but %u.",
                          node_name_.c_str(), input_num_, sizeof(AicpuShapeAndType), aicpu_ext_info->infoLen);
@@ -116,7 +117,7 @@ Status AicpuExtInfoHandler::ParseExtOutputShape(AicpuExtInfo *aicpu_ext_info) {
     return SUCCESS;
   }
   auto need_len = output_num_ * sizeof(AicpuShapeAndType);
-  GE_CHK_BOOL_RET_STATUS(aicpu_ext_info->infoLen == need_len, PARAM_INVALID,
+  GE_CHK_BOOL_RET_STATUS(aicpu_ext_info->infoLen == need_len, ACL_ERROR_GE_PARAM_INVALID,
                          "Node[%s] parse ext output shape failed as infoLen must be "
                          "output_num[%u]*sizeof(ShapeAndType)[%zu] but %u.",
                          node_name_.c_str(), output_num_, sizeof(AicpuShapeAndType), aicpu_ext_info->infoLen);
@@ -130,7 +131,7 @@ Status AicpuExtInfoHandler::ParseExtOutputShape(AicpuExtInfo *aicpu_ext_info) {
 }
 
 Status AicpuExtInfoHandler::ParseExtSessionInfo(AicpuExtInfo *aicpu_ext_info) {
-  GE_CHK_BOOL_RET_STATUS(aicpu_ext_info->infoLen == sizeof(AicpuSessionInfo), PARAM_INVALID,
+  GE_CHK_BOOL_RET_STATUS(aicpu_ext_info->infoLen == sizeof(AicpuSessionInfo), ACL_ERROR_GE_PARAM_INVALID,
                          "Node[%s] parse ext session info failed as infoLen must be %zu but %u.",
                          node_name_.c_str(), sizeof(SessionInfo), aicpu_ext_info->infoLen);
 
@@ -173,7 +174,7 @@ Status AicpuExtInfoHandler::UpdateInputShapeAndType(uint32_t input_index, const
 }
 
 Status AicpuExtInfoHandler::UpdateOutputShapeAndType(uint32_t output_index, const GeTensorDesc &output_desc) {
-  GE_CHK_BOOL_RET_STATUS((unknown_type_ != DEPEND_COMPUTE), INTERNAL_ERROR,
+  GE_CHK_BOOL_RET_STATUS((unknown_type_ != DEPEND_COMPUTE), ACL_ERROR_GE_INTERNAL_ERROR,
                          "Node[%s] is depend compute is no need update output shape and type by ext.",
                          node_name_.c_str());
   GE_CHECK_LE(output_index, output_num_);
@@ -183,7 +184,7 @@ Status AicpuExtInfoHandler::UpdateOutputShapeAndType(uint32_t output_index, cons
   if (unknown_type_ == DEPEND_SHAPE_RANGE) {
     std::vector<std::pair<int64_t, int64_t>> range;
     auto range_ret = output_desc.GetShapeRange(range);
-    GE_CHK_BOOL_RET_STATUS(range_ret == GRAPH_SUCCESS, INTERNAL_ERROR,
+    GE_CHK_BOOL_RET_STATUS(range_ret == GRAPH_SUCCESS, ACL_ERROR_GE_INTERNAL_ERROR,
                            "Node[%s] is shape range type but get GetShapeRange failed, ret=%u.",
                            node_name_.c_str(), range_ret);
     for (size_t k = 0; k < range.size(); ++k) {
@@ -210,9 +211,9 @@ Status AicpuExtInfoHandler::UpdateShapeAndType(const GeShape &shape, DataType da
                                                AicpuShapeAndType *shape_and_type) {
   auto dim_num = shape.GetDimNum();
   if (dim_num > aicpu::FWKAdapter::kMaxShapeDims) {
-    GELOGE(PARAM_INVALID, "Update shape and type failed, as dim_num %zu is over max shape dims %u.",
+    GELOGE(ACL_ERROR_GE_PARAM_INVALID, "Update shape and type failed, as dim_num %zu is over max shape dims %u.",
            dim_num, aicpu::FWKAdapter::kMaxShapeDims);
-    return PARAM_INVALID;
+    return ACL_ERROR_GE_PARAM_INVALID;
   }
   size_t index = 0;
   for (; index < dim_num; ++index) {
diff --git a/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc b/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc
index 63ce65e9..1c160eea 100755
--- a/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc
+++ b/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc
@@ -18,11 +18,10 @@
 #include "framework/common/taskdown_common.h"
 #include "common/formats/formats.h"
 #include "aicpu/common/aicpu_task_struct.h"
-#include "graph/load/new_model_manager/model_manager.h"
+#include "graph/load/model_manager/model_manager.h"
 #include "graph/utils/node_utils.h"
 #include "hybrid/executor/hybrid_execution_context.h"
 #include "hybrid/model/hybrid_model.h"
-#include "opskernel_manager/ops_kernel_builder_manager.h"
 
 namespace ge {
 namespace hybrid {
@@ -190,17 +189,17 @@ Status AicpuNodeTaskBase::ExecuteAsync(TaskContext &context, std::function<void(
 
   HYBRID_CHK_STATUS_RET(LaunchTask(context), "[%s] Failed to launch task", node_name_.c_str());
 
+  // save profiling data
   uint32_t task_id = 0;
   uint32_t stream_id = 0;
-  rtError_t rt_ret = rtGetTaskIdAndStreamID(&task_id, &stream_id);
+  rtError_t rt_ret = rtGetTaskIdAndStreamID(&task_id, &stream_id); // must be called after Launch kernel
   if (rt_ret != RT_ERROR_NONE) {
     GELOGE(rt_ret, "Get task_id and stream_id failed.");
-    return rt_ret;
+    return FAILED;
   }
-  context.SetTaskId(task_id);
-  context.SetStreamId(stream_id);
-  GELOGD("AiCpu node[%s] task_id: %u, stream_id: %u.", context.GetNodeName(), task_id, stream_id);
-
+  GELOGD("Aicpu node[%s] task_id: %u, stream_id: %u.", context.GetNodeName(), task_id, stream_id);
+  (void)context.SaveProfilingTaskDescInfo(task_id, stream_id, kTaskTypeAicpu, 0);
+  (void)context.SaveProfilingGraphDescInfo(task_id, stream_id);
   auto callback = [=, &context]() {
     GELOGD("Node[%s] callback start.", node_name_.c_str());
     RECORD_CALLBACK_EVENT(context.GetExecutionContext(), node_name_.c_str(), "[TaskCallback] Start");
@@ -352,10 +351,54 @@ Status AicpuTfNodeTask::Init(const HybridModel &model) {
     GELOGD("[%s] Is GetNext, set need sync to true, node type = %s", node_name_.c_str(), node_type.c_str());
     need_sync_ = true;
   }
+  auto task_defs = model.GetTaskDefs(node_item_->node);
+  if (unknown_type_ == DEPEND_COMPUTE) {
+    GE_CHK_STATUS_RET_NOLOG(SetMemCopyTask((*task_defs)[1]));
+  }
   GELOGI("Node[%s] init end.", node_name_.c_str());
   return SUCCESS;
 }
 
+Status AicpuTfNodeTask::SetMemCopyTask(const domi::TaskDef &task_def) {
+  if (node_item_->num_outputs == 0) {
+    GELOGD("Node[%s] type[%s] has no output, no need set mem_copy task.",
+           node_name_.c_str(), node_item_->node_type.c_str());
+    return SUCCESS;
+  }
+
+  GELOGD("Start to set memcpy task for node[%s].", node_name_.c_str());
+  const domi::KernelExDef &kernel_def = task_def.kernel_ex();
+  if (kernel_def.args_size() > sizeof(STR_FWK_OP_KERNEL)) {
+    GELOGE(PARAM_INVALID, "sizeof STR_FWK_OP_KERNEL is: %lu, but args_size is: %d",
+           sizeof(STR_FWK_OP_KERNEL), kernel_def.args_size());
+    return PARAM_INVALID;
+  }
+  STR_FWK_OP_KERNEL aicpu_task = {0};
+  auto sec_ret = memcpy_s(&aicpu_task, sizeof(STR_FWK_OP_KERNEL),
+                          kernel_def.args().data(), kernel_def.args_size());
+  if (sec_ret != EOK) {
+    GELOGE(FAILED, "memcpy failed, ret: %d", sec_ret);
+    return FAILED;
+  }
+
+  GE_CHK_STATUS_RET(AllocTensorBuffer(kernel_def.task_info_size(), copy_workspace_buf_),
+                    "Node[%s] alloc copy task workspace buf failed, size=%u.",
+                    node_name_.c_str(), kernel_def.task_info_size());
+
+  GE_CHK_RT_RET(rtMemcpy(copy_workspace_buf_->GetData(), kernel_def.task_info_size(),
+                         kernel_def.task_info().data(), kernel_def.task_info_size(), RT_MEMCPY_HOST_TO_DEVICE));
+
+  aicpu_task.fwkKernelBase.fwk_kernel.inputOutputAddr = reinterpret_cast<uintptr_t>(copy_ioaddr_dev_->GetData());
+  aicpu_task.fwkKernelBase.fwk_kernel.workspaceBaseAddr = reinterpret_cast<uintptr_t>(copy_workspace_buf_->GetData());
+  aicpu_task.fwkKernelBase.fwk_kernel.extInfoAddr = 0;
+  aicpu_task.fwkKernelBase.fwk_kernel.extInfoLen = 0;
+
+  GE_CHK_RT_RET(rtMemcpy(copy_task_args_buf_->GetData(), sizeof(STR_FWK_OP_KERNEL),
+                         &aicpu_task, sizeof(STR_FWK_OP_KERNEL), RT_MEMCPY_HOST_TO_DEVICE));
+  GELOGD("Set memcpy task for node[%s] successfully.", node_name_.c_str());
+  return SUCCESS;
+}
+
 uint64_t AicpuTfNodeTask::GetStepIdAddr(const HybridModel &model) {
   // get step_id_addr
   auto var_tensor = model.GetVariable(NODE_NAME_GLOBAL_STEP);
@@ -407,32 +450,7 @@ Status AicpuTfNodeTask::CopyDataToHbm(TaskContext &context,
                          "Node[%s] has %d outputs but out shape is %zu.",
                          node_name_.c_str(), node_item_->num_outputs, out_shape_hbm.size());
 
-  uint64_t copy_num = 0;
-  GE_CHK_STATUS_RET_NOLOG(PrepareCopyInputs(context, out_shape_hbm, copy_num));
-
-  STR_FWK_OP_KERNEL aicpu_task = {0};
-  std::string task_info;
-  RECORD_CALLBACK_EVENT(context.GetExecutionContext(), node_name_.c_str(),
-                        "[GenMemCopyTask] Start");
-  GE_CHK_STATUS_RET_NOLOG(GenMemCopyTask(copy_num, aicpu_task, task_info));
-  RECORD_CALLBACK_EVENT(context.GetExecutionContext(), node_name_.c_str(),
-                        "[GenMemCopyTask] End");
-
-  std::unique_ptr<TensorBuffer> kernel_workspace_buf;
-  GE_CHK_STATUS_RET(AllocTensorBuffer(task_info.size(), kernel_workspace_buf),
-                    "Node[%s] alloc copy task workspace buf failed, size=%zu.",
-                    node_name_.c_str(), task_info.size());
-
-  GE_CHK_RT_RET(rtMemcpy(kernel_workspace_buf->GetData(), task_info.size(),
-                         task_info.data(), task_info.size(), RT_MEMCPY_HOST_TO_DEVICE));
-
-  aicpu_task.fwkKernelBase.fwk_kernel.inputOutputAddr = reinterpret_cast<uintptr_t>(copy_ioaddr_dev_->GetData());
-  aicpu_task.fwkKernelBase.fwk_kernel.workspaceBaseAddr = reinterpret_cast<uintptr_t>(kernel_workspace_buf->GetData());
-  aicpu_task.fwkKernelBase.fwk_kernel.extInfoAddr = 0;
-  aicpu_task.fwkKernelBase.fwk_kernel.extInfoLen = 0;
-
-  GE_CHK_RT_RET(rtMemcpy(copy_task_args_buf_->GetData(), sizeof(STR_FWK_OP_KERNEL),
-                         &aicpu_task, sizeof(STR_FWK_OP_KERNEL), RT_MEMCPY_HOST_TO_DEVICE));
+  GE_CHK_STATUS_RET_NOLOG(PrepareCopyInputs(context, out_shape_hbm));
 
   RECORD_CALLBACK_EVENT(context.GetExecutionContext(), node_name_.c_str(), "[LaunchCopy] Start");
   GE_CHK_RT_RET(rtKernelLaunchEx(copy_task_args_buf_->GetData(), sizeof(STR_FWK_OP_KERNEL),
@@ -445,8 +463,7 @@ Status AicpuTfNodeTask::CopyDataToHbm(TaskContext &context,
 }
 
 Status AicpuTfNodeTask::PrepareCopyInputs(const TaskContext &context,
-                                          const std::vector<std::unique_ptr<TensorBuffer>> &out_shape_hbm,
-                                          uint64_t &copy_num) {
+                                          const std::vector<std::unique_ptr<TensorBuffer>> &out_shape_hbm) {
   std::vector<uint64_t> copy_input_release_flag;
   std::vector<uint64_t> copy_input_data_size;
   std::vector<uint64_t> copy_input_src;
@@ -458,34 +475,23 @@ Status AicpuTfNodeTask::PrepareCopyInputs(const TaskContext &context,
            node_name_.c_str(), i,
            summary.shape_data_ptr, summary.shape_data_size,
            summary.raw_data_ptr, summary.raw_data_size);
-    if (summary.raw_data_size > 0) {
-      auto output = context.GetOutput(i);
-      GE_CHECK_NOTNULL(output);
-      GE_CHECK_NOTNULL(output->GetData());
-      copy_input_release_flag.emplace_back(kReleaseFlag);
-      copy_input_data_size.emplace_back(summary.raw_data_size);
-      copy_input_src.emplace_back(summary.raw_data_ptr);
-      copy_input_dst.emplace_back(reinterpret_cast<uintptr_t>(output->GetData()));
-    }
-
-    if (summary.shape_data_size > 0) {
-      const auto &shape_buffer = out_shape_hbm[i];
-      GE_CHECK_NOTNULL(shape_buffer);
-      GE_CHECK_NOTNULL(shape_buffer->GetData());
-      copy_input_release_flag.emplace_back(kReleaseFlag);
-      copy_input_data_size.emplace_back(summary.shape_data_size);
-      copy_input_src.emplace_back(summary.shape_data_ptr);
-      copy_input_dst.emplace_back(reinterpret_cast<uintptr_t>(shape_buffer->GetData()));
-    }
+    auto output = context.GetOutput(i);
+    GE_CHECK_NOTNULL(output);
+    copy_input_release_flag.emplace_back(kReleaseFlag);
+    copy_input_data_size.emplace_back(summary.raw_data_size);
+    copy_input_src.emplace_back(summary.raw_data_ptr);
+    copy_input_dst.emplace_back(reinterpret_cast<uintptr_t>(output->GetData()));
+
+    const auto &shape_buffer = out_shape_hbm[i];
+    GE_CHECK_NOTNULL(shape_buffer);
+    copy_input_release_flag.emplace_back(kReleaseFlag);
+    copy_input_data_size.emplace_back(summary.shape_data_size);
+    copy_input_src.emplace_back(summary.shape_data_ptr);
+    copy_input_dst.emplace_back(reinterpret_cast<uintptr_t>(shape_buffer->GetData()));
   }
 
-  copy_num = copy_input_release_flag.size();
-
-  GE_CHK_BOOL_RET_STATUS(copy_num > 0, INTERNAL_ERROR,
-                         "Node[%s] need copy num is 0", node_name_.c_str());
-
-  // copy task need copy output and output shape
-  const size_t copy_input_buf_len = copy_num * sizeof(uint64_t);
+  // copy task need copy all output_data and output_shape, len is 2 * output_num
+  const size_t copy_input_buf_len = node_item_->num_outputs * 2 * sizeof(uint64_t);
 
   GE_CHK_RT_RET(rtMemcpy(copy_input_release_flag_dev_->GetData(), copy_input_release_flag_dev_->GetSize(),
                          &copy_input_release_flag[0], copy_input_buf_len, RT_MEMCPY_HOST_TO_DEVICE));
@@ -498,15 +504,6 @@ Status AicpuTfNodeTask::PrepareCopyInputs(const TaskContext &context,
   return SUCCESS;
 }
 
-Status AicpuTfNodeTask::GenMemCopyTask(uint64_t copy_num, STR_FWK_OP_KERNEL &task, std::string &task_info) {
-  static constexpr const char *const kKernelLibName = "aicpu_tf_kernel";
-  auto kernel_builder = OpsKernelBuilderManager::Instance().GetOpsKernelBuilder(kKernelLibName);
-  GE_CHK_BOOL_RET_STATUS(kernel_builder != nullptr, FAILED, "Get op kernel info store[%s] failed", kKernelLibName);
-  auto ret = kernel_builder->GenMemCopyTask(copy_num, task, task_info);
-  GE_CHK_STATUS_RET(ret, "Call aicpu GenMemCopyTask failed, copy_num=%lu, ret=%u", copy_num, ret);
-  return SUCCESS;
-}
-
 Status AicpuTfNodeTask::UpdateShapeByHbmBuffer(TaskContext &context,
                                                const std::vector<std::unique_ptr<TensorBuffer>> &out_shape_hbm) {
   GE_CHK_BOOL_RET_STATUS(out_shape_hbm.size() == static_cast<std::size_t>(node_item_->num_outputs),
@@ -813,9 +810,9 @@ Status AiCpuNodeExecutor::LoadTask(const HybridModel &model,
     GE_CHK_BOOL_RET_STATUS((*task_defs).size() == 1, PARAM_INVALID,
                            "Node[%s] task_def num[%zu] != 1", node->GetName().c_str(), (*task_defs).size());
   } else {
-    // The number of tasks of the fourth type operator may be 2
-    GE_CHK_BOOL_RET_STATUS((*task_defs).size() == 1 || (*task_defs).size() == 2, PARAM_INVALID,
-                           "Node[%s] DEPEND_COMPUTE task_def num[%zu] != 1 or 2",
+    // The number of tasks of the fourth type operator must be 2
+    GE_CHK_BOOL_RET_STATUS((*task_defs).size() == 2, PARAM_INVALID,
+                           "Node[%s] DEPEND_COMPUTE task_def num[%zu] != 2",
                            node->GetName().c_str(), (*task_defs).size());
   }
   const auto &task_def = (*task_defs)[0];
diff --git a/ge/hybrid/node_executor/aicpu/aicpu_node_executor.h b/ge/hybrid/node_executor/aicpu/aicpu_node_executor.h
index 8f0b1d0a..b9cc8256 100644
--- a/ge/hybrid/node_executor/aicpu/aicpu_node_executor.h
+++ b/ge/hybrid/node_executor/aicpu/aicpu_node_executor.h
@@ -98,6 +98,8 @@ class AicpuTfNodeTask : public AicpuNodeTaskBase {
   Status UpdateIoAddr(TaskContext &context) override;
 
  private:
+  Status SetMemCopyTask(const domi::TaskDef &task_def);
+
   Status InitForDependComputeTask();
 
   Status UpdateShapeAndDataByResultSummary(TaskContext &context);
@@ -117,11 +119,9 @@ class AicpuTfNodeTask : public AicpuNodeTaskBase {
                                 const std::vector<std::unique_ptr<TensorBuffer>> &out_shape_hbm);
 
   Status PrepareCopyInputs(const TaskContext &context,
-                           const std::vector<std::unique_ptr<TensorBuffer>> &out_shape_hbm,
-                           uint64_t &copy_num);
+                           const std::vector<std::unique_ptr<TensorBuffer>> &out_shape_hbm);
 
   static Status EnsureSessionCreated(uint64_t session_id);
-  static Status GenMemCopyTask(uint64_t count, STR_FWK_OP_KERNEL &task, std::string &task_info);
   static uint64_t GetStepIdAddr(const HybridModel &model);
  private:
   // kernel buf, device mem
@@ -145,6 +145,8 @@ class AicpuTfNodeTask : public AicpuNodeTaskBase {
   std::unique_ptr<TensorBuffer> copy_input_src_dev_;
   std::unique_ptr<TensorBuffer> copy_input_dst_dev_;
   bool need_sync_ = false;
+
+  std::unique_ptr<TensorBuffer> copy_workspace_buf_;
 };
 
 class AicpuNodeTask : public AicpuNodeTaskBase {
diff --git a/ge/hybrid/node_executor/compiledsubgraph/known_node_executor.cc b/ge/hybrid/node_executor/compiledsubgraph/known_node_executor.cc
index 7f2c6288..0837ffff 100755
--- a/ge/hybrid/node_executor/compiledsubgraph/known_node_executor.cc
+++ b/ge/hybrid/node_executor/compiledsubgraph/known_node_executor.cc
@@ -21,8 +21,8 @@
 #include "common/ge/ge_util.h"
 #include "graph/attr_value.h"
 #include "graph/debug/ge_attr_define.h"
-#include "graph/load/new_model_manager/model_utils.h"
-#include "graph/load/new_model_manager/model_manager.h"
+#include "graph/load/model_manager/model_utils.h"
+#include "graph/load/model_manager/model_manager.h"
 #include "hybrid/executor/hybrid_execution_context.h"
 
 namespace ge {
@@ -126,6 +126,12 @@ Status KnownNodeTask::Init(TaskContext &context) {
     auto dump_properties = context.GetDumpProperties();
     if (dump_properties.IsDumpOpen()) {
       davinci_model_->SetDumpProperties(dump_properties);
+      void *global_step = nullptr;
+      TensorValue *varible_global_step = context.GetVariable(NODE_NAME_GLOBAL_STEP);
+      if (varible_global_step != nullptr) {
+        global_step = varible_global_step->MutableData();
+      }
+      davinci_model_->SetKnownShapeGlobalStep(global_step);
     }
     int32_t device_id = 0;
     rtError_t rt_ret = rtGetDevice(&device_id);
diff --git a/ge/hybrid/node_executor/compiledsubgraph/known_node_executor.h b/ge/hybrid/node_executor/compiledsubgraph/known_node_executor.h
index 2dde993b..6e9740ad 100644
--- a/ge/hybrid/node_executor/compiledsubgraph/known_node_executor.h
+++ b/ge/hybrid/node_executor/compiledsubgraph/known_node_executor.h
@@ -19,7 +19,7 @@
 #include "hybrid/node_executor/node_executor.h"
 #include "hybrid/model/hybrid_model.h"
 #include "graph/op_desc.h"
-#include "graph/load/new_model_manager/davinci_model.h"
+#include "graph/load/model_manager/davinci_model.h"
 
 namespace ge {
 namespace hybrid {
diff --git a/ge/hybrid/node_executor/hccl/hccl_node_executor.cc b/ge/hybrid/node_executor/hccl/hccl_node_executor.cc
index 94c734ca..5387a176 100644
--- a/ge/hybrid/node_executor/hccl/hccl_node_executor.cc
+++ b/ge/hybrid/node_executor/hccl/hccl_node_executor.cc
@@ -15,23 +15,25 @@
  */
 
 #include "hybrid/node_executor/hccl/hccl_node_executor.h"
-#include "common/ge/ge_util.h"
 #include "common/ge/plugin_manager.h"
 #include "common/math/math_util.h"
-#include "framework/common/debug/ge_log.h"
 #include "graph/attr_value.h"
 #include "graph/debug/ge_attr_define.h"
 #include "graph/manager/util/hcom_util.h"
 #include "graph/runtime_inference_context.h"
-#include "hccl/hcom.h"
+#include "graph/utils/type_utils.h"
+#include "hybrid/executor/hybrid_execution_context.h"
 
+namespace ge {
 namespace {
-const size_t kVarTableDims = 2;
-const size_t kVarTableRowCnt = 3;
-const size_t kVarTableIdxAddr = 1;
-const size_t kVarTableIdxLen = 2;
+constexpr size_t kVarTableDims = 2;
+constexpr size_t kVarTableRowCnt = 3;
+constexpr size_t kVarTableIdxAddr = 1;
+constexpr size_t kVarTableIdxLen = 2;
+const std::set<std::string> kRdmaReadTypes = { HCOMREMOTEREAD, HCOMREMOTEREFREAD };
+const std::set<std::string> kRdmaWriteTypes = { HCOMREMOTEWRITE, HCOMREMOTESCATTERWRITE };
+const std::set<std::string> kRdmaScatterTypes = { HCOMREMOTEREFREAD, HCOMREMOTESCATTERWRITE };
 }  // namespace
-namespace ge {
 namespace hybrid {
 
 REGISTER_NODE_EXECUTOR_BUILDER(NodeExecutorManager::ExecutorType::HCCL, HcclNodeExecutor);
@@ -142,11 +144,22 @@ Status RdmaNodeTask::Init(TaskContext &context) {
   GE_CHECK_NOTNULL(peer_node->GetOpDesc());
 
   remote_index_ = {peer_node->GetOpDesc()->GetId(), out_data_anchor->GetIdx()};
-  if (node_item.node->GetType() == HCOMREMOTEREAD) {
+  if (kRdmaReadTypes.count(node_item.node->GetType()) > 0) {
     local_index_ = 0;
   } else {
     local_index_ = op_desc->GetInputIndexByName("local");
   }
+  int32_t offset_idx = node_item.op_desc->GetInputIndexByName("local_offset");
+  if ((offset_idx != -1) && (node_item.op_desc->GetInputDescPtr(offset_idx) != nullptr)) {
+    skip_flag_ = true;
+    GE_CHECK_NOTNULL(node_item.node->GetInDataAnchor(offset_idx));
+    GE_CHECK_NOTNULL(node_item.node->GetInDataAnchor(offset_idx)->GetPeerOutAnchor());
+    GE_CHECK_NOTNULL(node_item.node->GetInDataAnchor(offset_idx)->GetPeerOutAnchor()->GetOwnerNode());
+    GE_CHECK_NOTNULL(node_item.node->GetInDataAnchor(offset_idx)->GetPeerOutAnchor()->GetOwnerNode()->GetOpDesc());
+    offset_index_ = {
+        node_item.node->GetInDataAnchor(offset_idx)->GetPeerOutAnchor()->GetOwnerNode()->GetOpDesc()->GetId(),
+        node_item.node->GetInDataAnchor(offset_idx)->GetPeerOutAnchor()->GetIdx() };
+  }
   return SUCCESS;
 }
 
@@ -158,8 +171,13 @@ Status RdmaNodeTask::ExtractTensor(TaskContext &context, vector<HcomRemoteAccess
   GE_CHK_STATUS_RET(ctx->GetTensor(remote_index_.first, remote_index_.second, remote_tensor));
   auto data = reinterpret_cast<uint64_t *>(remote_tensor.GetData());
   if (data == nullptr) {
-    GELOGE(FAILED, "Tensor data is nullptr.");
-    return FAILED;
+    if (kRdmaScatterTypes.count(context.GetNodeItem().NodeType()) > 0) {
+      GELOGD("data is null, no need to do rdma read/write, node=%s", context.GetNodeName());
+      return SUCCESS;
+    } else {
+      GELOGE(FAILED, "Tensor data is nullptr.");
+      return FAILED;
+    }
   }
   auto dims = remote_tensor.GetTensorDesc().GetShape().GetDims();
   if (dims.size() != kVarTableDims && dims.back() != kVarTableRowCnt) {
@@ -183,30 +201,63 @@ Status RdmaNodeTask::ExtractTensor(TaskContext &context, vector<HcomRemoteAccess
       auto tensor_buffer = TensorBuffer::Create(allocator, remote_size, &attr);
       GE_CHK_STATUS_RET(context.SetOutput(i, TensorValue(std::shared_ptr<TensorBuffer>(tensor_buffer.release()))));
     }
+  } else if (context.GetNodeItem().NodeType() == HCOMREMOTEREFREAD) {
+    AllocationAttr attr;
+    attr.SetMemType(RDMA_HBM);
+    GE_CHK_STATUS_RET(context.AllocateOutputs(&attr))
   }
 
   TensorValue *tv;
-  if (context.GetNodeItem().NodeType() == HCOMREMOTEREAD) {
-    tv = context.MutableOutput(0);
+  if (kRdmaReadTypes.count(context.GetNodeItem().NodeType()) > 0) {
+    tv = context.MutableOutput(local_index_);
   } else {
     tv = context.MutableInput(local_index_);
   }
   GE_CHECK_NOTNULL(tv);
-  auto local_addr = reinterpret_cast<uint64_t>(reinterpret_cast<uintptr_t>(tv->MutableData()));
   auto row_num = dims.front();
   addr_infos.resize(row_num);
-  auto device_len = tv->GetSize() / row_num;
-  if (device_len <= 0 || device_len > data[kVarTableIdxLen]) {
-    GELOGE(FAILED, "Local embedding length is out of range.");
-    return FAILED;
-  }
+  if (skip_flag_) {
+    int32_t offset_idx = context.GetNodeItem().op_desc->GetInputIndexByName("local_offset");
+    GE_CHECK_NOTNULL(context.GetNodeItem().op_desc->GetInputDescPtr(offset_idx));
+    auto data_type = context.GetNodeItem().op_desc->GetInputDesc(offset_idx).GetDataType();
+
+    Tensor offset_tensor;
+    GE_CHK_STATUS_RET(ctx->GetTensor(offset_index_.first, offset_index_.second, offset_tensor))
+    if (static_cast<int64_t>(offset_tensor.GetSize() / GetSizeByDataType(data_type)) != row_num) {
+      GELOGE(PARAM_INVALID, "num of offset and remote addr mismatch, offset size=%zu, remote_addr size=%lld, dtype=%s",
+             offset_tensor.GetSize(), row_num, TypeUtils::DataTypeToSerialString(data_type).c_str());
+      return PARAM_INVALID;
+    }
 
-  for (auto idx = 0; idx < row_num; ++idx) {
-    FMK_INT64_MULCHECK(idx, kVarTableRowCnt);
-    auto line_idx = idx * kVarTableRowCnt;
-    addr_infos[idx] = {static_cast<uint32_t>(data[line_idx]), data[line_idx + kVarTableIdxAddr], local_addr,
-                       device_len};
-    local_addr += device_len;
+    auto addr_offset = reinterpret_cast<uint64_t *>(offset_tensor.GetData());
+    GE_CHECK_NOTNULL(addr_offset);
+    auto base_addr = reinterpret_cast<float *>(tv->MutableData());
+    GE_CHECK_NOTNULL(base_addr);
+
+    for (auto idx = 0; idx < row_num; idx++) {
+      FMK_INT64_MULCHECK(idx, kVarTableRowCnt)
+      auto line_idx = idx * kVarTableRowCnt;
+      addr_infos[idx] = { static_cast<uint32_t>(data[line_idx]),
+                          data[line_idx + kVarTableIdxAddr],
+                          reinterpret_cast<uint64_t>(reinterpret_cast<uintptr_t>(base_addr + addr_offset[idx])),
+                          data[line_idx + kVarTableIdxLen] };
+    }
+  } else {
+    auto local_addr = reinterpret_cast<uint64_t>(reinterpret_cast<uintptr_t>(tv->MutableData()));
+    auto device_len = tv->GetSize() / row_num;
+    if (device_len <= 0 || device_len > data[kVarTableIdxLen]) {
+      GELOGE(FAILED, "Local embedding length is out of range, expect %lld, but %lld exactly.",
+             data[kVarTableIdxLen], device_len);
+      return FAILED;
+    }
+
+    for (auto idx = 0; idx < row_num; ++idx) {
+      FMK_INT64_MULCHECK(idx, kVarTableRowCnt)
+      auto line_idx = idx * kVarTableRowCnt;
+      addr_infos[idx] = { static_cast<uint32_t>(data[line_idx]), data[line_idx + kVarTableIdxAddr], local_addr,
+                          device_len };
+      local_addr += device_len;
+    }
   }
 
   return SUCCESS;
@@ -226,6 +277,10 @@ Status RdmaNodeTask::ExecuteAsync(TaskContext &context, std::function<void()> do
   }
   vector<HcomRemoteAccessAddrInfo> addr_infos;
   GE_CHK_STATUS_RET(ExtractTensor(context, addr_infos));
+  if (addr_infos.empty()) {
+    done_callback();
+    return SUCCESS;
+  }
 
   auto callback = [this](HcclResult status) {
     if (status != HCCL_SUCCESS) {
@@ -235,6 +290,11 @@ Status RdmaNodeTask::ExecuteAsync(TaskContext &context, std::function<void()> do
     this->cond_.notify_all();
     GELOGI("rdma callback success.");
   };
+
+  std::string executor_type = context.GetNodeItem().NodeType();
+  if (kRdmaScatterTypes.count(context.GetNodeItem().NodeType()) > 0) {
+    executor_type = context.GetNodeItem().NodeType() == HCOMREMOTEREFREAD ? HCOMREMOTEREAD : HCOMREMOTEWRITE;
+  }
   HcclResult hccl_ret = HcomExecEnqueueRemoteAccess(context.GetNodeItem().NodeType(), addr_infos, callback);
   if (hccl_ret != HCCL_SUCCESS) {
     GELOGE(HCCL_E_INTERNAL, "Call HcomExecInitialize failed, ret: 0x%X", hccl_ret);
@@ -262,7 +322,7 @@ Status HcclNodeExecutor::PrepareTask(NodeTask &task, TaskContext &context) const
 
   GE_CHK_STATUS_RET(task.Init(context), "hccl node load hccl so failed.");
   // allocate output mem, output mem or remote read will be calculated when node execute.
-  if (context.GetNodeItem().NodeType() != HCOMREMOTEREAD) {
+  if (kRdmaReadTypes.count(context.GetNodeItem().NodeType()) == 0) {
     GE_CHK_STATUS_RET(context.AllocateOutputs(), "hccl node task allocate output failed.");
   }
 
@@ -274,7 +334,7 @@ Status HcclNodeExecutor::PrepareTask(NodeTask &task, TaskContext &context) const
 Status HcclNodeExecutor::LoadTask(const HybridModel &model, const NodePtr &node, shared_ptr<NodeTask> &task) const {
   GELOGI("[%s] HcclNodeExecutor::LoadTask in.", node->GetName().c_str());
   GE_CHECK_NOTNULL(node);
-  if (node->GetType() == HCOMREMOTEREAD || node->GetType() == HCOMREMOTEWRITE) {
+  if ((kRdmaReadTypes.count(node->GetType()) > 0) || (kRdmaWriteTypes.count(node->GetType()) > 0)) {
     task = MakeShared<RdmaNodeTask>();
   } else {
     task = MakeShared<HcclNodeTask>();
diff --git a/ge/hybrid/node_executor/hccl/hccl_node_executor.h b/ge/hybrid/node_executor/hccl/hccl_node_executor.h
index 07dd848b..873f259f 100644
--- a/ge/hybrid/node_executor/hccl/hccl_node_executor.h
+++ b/ge/hybrid/node_executor/hccl/hccl_node_executor.h
@@ -55,9 +55,11 @@ class RdmaNodeTask : public NodeTask {
  private:
   Status ExtractTensor(TaskContext &context, vector<HcomRemoteAccessAddrInfo> &addr_infos);
   std::pair<int64_t, int64_t> remote_index_;
+  std::pair<int64_t, int64_t> offset_index_;
   int32_t local_index_ = 0;
   std::mutex hccl_mutex_;
   std::condition_variable cond_;
+  bool skip_flag_;
 };
 
 class HcclNodeExecutor : public NodeExecutor {
diff --git a/ge/hybrid/node_executor/host_cpu/kernel/assign_kernel.cc b/ge/hybrid/node_executor/host_cpu/kernel/assign_kernel.cc
index 01fd391d..d54195d6 100644
--- a/ge/hybrid/node_executor/host_cpu/kernel/assign_kernel.cc
+++ b/ge/hybrid/node_executor/host_cpu/kernel/assign_kernel.cc
@@ -29,8 +29,6 @@ namespace ge {
 namespace hybrid {
 namespace host_cpu {
 Status AssignKernel::Compute(TaskContext& context) {
-  GELOGI("[%s] compute begin.", node_->GetName().c_str());
-
   auto ref_tensor = context.MutableInput(kAssignRefInputIndex);
   GE_CHECK_NOTNULL(ref_tensor);
   const auto value_tensor = context.GetInput(kAssignValueInputIndex);
@@ -50,7 +48,7 @@ Status AssignKernel::Compute(TaskContext& context) {
   GE_CHK_STATUS_RET(context.SetOutput(kAssignRefOutputIndex, *ref_tensor),
                     "[%s] Failed to set output.", context.GetNodeName());
 
-  GELOGI("[%s] compute success.", node_->GetName().c_str());
+  GELOGD("[%s] compute success.", node_->GetName().c_str());
   return SUCCESS;
 }
 
diff --git a/ge/hybrid/node_executor/host_cpu/kernel/data_kernel.cc b/ge/hybrid/node_executor/host_cpu/kernel/data_kernel.cc
new file mode 100644
index 00000000..e34f601a
--- /dev/null
+++ b/ge/hybrid/node_executor/host_cpu/kernel/data_kernel.cc
@@ -0,0 +1,41 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "hybrid/node_executor/host_cpu/kernel/data_kernel.h"
+#include "framework/common/debug/ge_log.h"
+#include "framework/common/util.h"
+#include "hybrid/node_executor/host_cpu/kernel_factory.h"
+
+namespace {
+constexpr size_t kDataInputIndex = 0;
+constexpr size_t kDataOutputIndex = 0;
+}
+
+namespace ge {
+namespace hybrid {
+namespace host_cpu {
+Status DataKernel::Compute(TaskContext& context) {
+  auto input = context.MutableInput(kDataInputIndex);
+  GE_CHECK_NOTNULL(input);
+  GE_CHK_STATUS_RET(context.SetOutput(kDataOutputIndex, *input), "[%s] Failed to set output.", context.GetNodeName())
+  GELOGD("[%s] compute success.", node_->GetName().c_str());
+  return SUCCESS;
+}
+
+REGISTER_KERNEL_CREATOR(Data, DataKernel);
+}  // namespace host_cpu
+}  // namespace hybrid
+}  // namespace ge
diff --git a/ge/hybrid/node_executor/host_cpu/kernel/data_kernel.h b/ge/hybrid/node_executor/host_cpu/kernel/data_kernel.h
new file mode 100644
index 00000000..ca42d647
--- /dev/null
+++ b/ge/hybrid/node_executor/host_cpu/kernel/data_kernel.h
@@ -0,0 +1,42 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef GE_HYBRID_HOST_CPU_KERNEL_DATA_KERNEL_H_
+#define GE_HYBRID_HOST_CPU_KERNEL_DATA_KERNEL_H_
+
+#include "hybrid/node_executor/host_cpu/kernel/kernel.h"
+
+namespace ge {
+namespace hybrid {
+namespace host_cpu {
+class DataKernel : public Kernel {
+ public:
+  DataKernel(const NodePtr &node) : Kernel(node) {}
+  ~DataKernel() override = default;
+  DataKernel &operator=(const DataKernel &op) = delete;
+  DataKernel(const DataKernel &op) = delete;
+
+  /**
+   *  @brief compute for node_task.
+   *  @return result
+   */
+  Status Compute(TaskContext& context) override;
+};
+}  // namespace host_cpu
+}  // namespace hybrid
+}  // namespace ge
+
+#endif  // GE_HYBRID_HOST_CPU_KERNEL_DATA_KERNEL_H_
diff --git a/ge/hybrid/node_executor/host_cpu/kernel/no_op_kernel.cc b/ge/hybrid/node_executor/host_cpu/kernel/no_op_kernel.cc
index ff5a7c6d..b1b4e68c 100644
--- a/ge/hybrid/node_executor/host_cpu/kernel/no_op_kernel.cc
+++ b/ge/hybrid/node_executor/host_cpu/kernel/no_op_kernel.cc
@@ -23,7 +23,7 @@ namespace ge {
 namespace hybrid {
 namespace host_cpu {
 Status NoOpKernel::Compute(TaskContext& context) {
-  GELOGI("[%s] no need to compute.", node_->GetName().c_str());
+  GELOGD("[%s] no need to compute.", node_->GetName().c_str());
   return SUCCESS;
 }
 
diff --git a/ge/hybrid/node_executor/host_cpu/kernel/random_uniform_kernel.cc b/ge/hybrid/node_executor/host_cpu/kernel/random_uniform_kernel.cc
index 37b07e37..52d48821 100755
--- a/ge/hybrid/node_executor/host_cpu/kernel/random_uniform_kernel.cc
+++ b/ge/hybrid/node_executor/host_cpu/kernel/random_uniform_kernel.cc
@@ -30,8 +30,6 @@ namespace ge {
 namespace hybrid {
 namespace host_cpu {
 Status RandomUniformKernel::Compute(TaskContext& context) {
-  GELOGI("[%s] compute begin.", node_->GetName().c_str());
-
   int64_t seed = 0;
   int64_t seed2 = 0;
   (void)AttrUtils::GetInt(node_->GetOpDesc(), "seed", seed);
@@ -66,7 +64,7 @@ Status RandomUniformKernel::Compute(TaskContext& context) {
       return UNSUPPORTED;
   }
 
-  GELOGI("[%s] compute success.", node_->GetName().c_str());
+  GELOGD("[%s] compute success.", node_->GetName().c_str());
   return SUCCESS;
 }
 
diff --git a/ge/hybrid/node_executor/host_cpu/kernel/variable_kernel.cc b/ge/hybrid/node_executor/host_cpu/kernel/variable_kernel.cc
index 2a836458..16738c2a 100644
--- a/ge/hybrid/node_executor/host_cpu/kernel/variable_kernel.cc
+++ b/ge/hybrid/node_executor/host_cpu/kernel/variable_kernel.cc
@@ -23,8 +23,6 @@ namespace ge {
 namespace hybrid {
 namespace host_cpu {
 Status VariableKernel::Compute(TaskContext& context) {
-  GELOGI("[%s] compute begin.", node_->GetName().c_str());
-
   auto tensor = context.GetVariable(node_->GetName());
   if (tensor == nullptr) {
     GELOGE(PARAM_INVALID, "tensor is NULL.");
@@ -32,7 +30,7 @@ Status VariableKernel::Compute(TaskContext& context) {
   }
   // Constant & Variable Op has and only has one output
   GE_CHK_STATUS_RET(context.SetOutput(0, *tensor), "[%s] Failed to set output.", context.GetNodeName());
-  GELOGI("[%s] compute success.", node_->GetName().c_str());
+  GELOGD("[%s] compute success.", node_->GetName().c_str());
   return SUCCESS;
 }
 
diff --git a/ge/hybrid/node_executor/node_executor.cc b/ge/hybrid/node_executor/node_executor.cc
index 02427b91..e74256f2 100755
--- a/ge/hybrid/node_executor/node_executor.cc
+++ b/ge/hybrid/node_executor/node_executor.cc
@@ -38,7 +38,6 @@ const char *const kEngineNameHostCpu = "DNN_VM_HOST_CPU_OP_STORE";
 }
 Status NodeExecutor::PrepareTask(NodeTask &task, TaskContext &context) const {
   GE_CHK_STATUS_RET_NOLOG(context.AllocateOutputs());
-  GE_CHK_STATUS_RET_NOLOG(task.UpdateTilingData(context)); // update op_desc before alloc ws
   GE_CHK_STATUS_RET_NOLOG(context.AllocateWorkspaces());
   GE_CHK_STATUS_RET_NOLOG(task.UpdateArgs(context));
   return SUCCESS;
@@ -118,11 +117,11 @@ Status NodeExecutorManager::GetExecutor(Node &node, const NodeExecutor **executo
   auto executor_type = ResolveExecutorType(node);
   const auto it = executors_.find(executor_type);
   if (it == executors_.end()) {
-    GELOGE(INTERNAL_ERROR, "Failed to get executor by type: %d.", executor_type);
+    GELOGE(INTERNAL_ERROR, "Failed to get executor by type: %d.", static_cast<int>(executor_type));
     return INTERNAL_ERROR;
   }
 
-  GELOGD("[%s] Set node executor by type: %d.", node.GetName().c_str(), executor_type);
+  GELOGD("[%s] Set node executor by type: %d.", node.GetName().c_str(), static_cast<int>(executor_type));
   *executor = it->second.get();
   return SUCCESS;
 }
@@ -166,7 +165,7 @@ Status NodeExecutorManager::CalcOpRunningParam(Node &node) const {
       TensorUtils::SetSize(output_tensor, output_mem_size);
       GE_CHK_STATUS_RET(op_desc->UpdateOutputDesc(static_cast<uint32_t>(i), output_tensor),
                         "hccl update output size failed.");
-      GELOGD("%s output desc[%u], dim_size: %zu, mem_size: %ld.", node.GetName().c_str(), i,
+      GELOGD("%s output desc[%zu], dim_size: %zu, mem_size: %ld.", node.GetName().c_str(), i,
              output_tensor.GetShape().GetDimNum(), output_mem_size);
     }
     return SUCCESS;
@@ -190,14 +189,14 @@ Status NodeExecutorManager::InitializeExecutors() {
     GE_CHECK_NOTNULL(build_fn);
     auto executor = std::unique_ptr<NodeExecutor>(build_fn());
     if (executor == nullptr) {
-      GELOGE(INTERNAL_ERROR, "Failed to create executor for engine type = %d", engine_type);
+      GELOGE(INTERNAL_ERROR, "Failed to create executor for engine type = %d", static_cast<int>(engine_type));
       return INTERNAL_ERROR;
     }
 
-    GELOGD("Executor of engine type = %d was created successfully", engine_type);
+    GELOGD("Executor of engine type = %d was created successfully", static_cast<int>(engine_type));
     auto ret = executor->Initialize();
     if (ret != SUCCESS) {
-      GELOGE(ret, "Failed to initialize NodeExecutor of type = %d, clear executors", engine_type);
+      GELOGE(ret, "Failed to initialize NodeExecutor of type = %d, clear executors", static_cast<int>(engine_type));
       for (auto &executor_it : executors_) {
         executor_it.second->Finalize();
       }
diff --git a/ge/hybrid/node_executor/task_context.cc b/ge/hybrid/node_executor/task_context.cc
index 6488fbbe..bc318124 100644
--- a/ge/hybrid/node_executor/task_context.cc
+++ b/ge/hybrid/node_executor/task_context.cc
@@ -18,9 +18,11 @@
 #include "framework/common/ge_inner_error_codes.h"
 #include "framework/common/debug/log.h"
 #include "graph/utils/tensor_utils.h"
+#include "graph/types.h"
 #include "graph/debug/ge_attr_define.h"
 #include "hybrid/executor/hybrid_execution_context.h"
 #include "hybrid/executor/subgraph_executor.h"
+#include "common/profiling/profiling_manager.h"
 
 namespace ge {
 namespace hybrid {
@@ -212,6 +214,13 @@ Status TaskContext::AllocateOutput(int index,
     return SUCCESS;
   }
 
+  int32_t calc_type = 0;
+  bool ret = ge::AttrUtils::GetInt(tensor_desc, ATTR_NAME_MEMORY_SIZE_CALC_TYPE, calc_type);
+  if (ret && (calc_type == static_cast<int32_t>(ge::MemorySizeCalcType::ALWAYS_EMPTY))) {
+    outputs_start_[index] = TensorValue();
+    return SUCCESS;
+  }
+
   auto it = node_item_->ref_outputs.find(index);
   if (it != node_item_->ref_outputs.end()) {
     auto &ref_node = it->second;
@@ -498,5 +507,60 @@ bool TaskContext::NeedCallback() {
 Status TaskContext::Synchronize() {
   return execution_context_->Synchronize(GetStream());
 }
+
+Status TaskContext::SaveProfilingTaskDescInfo(uint32_t task_id, uint32_t  stream_id,
+                                              uint32_t task_type, uint32_t block_dim) {
+  if (ProfilingManager::Instance().ProfilingModelExecuteOn()) {
+    const NodeItem &node_item = GetNodeItem();
+    auto op_desc = node_item.GetOpDesc();
+    GE_CHECK_NOTNULL(op_desc);
+    const GraphExecutionContext * graph_context = GetExecutionContext();
+    GE_CHECK_NOTNULL(graph_context);
+    const HybridModel *model = graph_context->model;
+    GE_CHECK_NOTNULL(model);
+
+    std::string op_name = op_desc->GetName();
+    std::string dynamic_model_name = model->GetModelName();
+    TaskDescInfo tmp_task_desc_info;
+    tmp_task_desc_info.model_name = dynamic_model_name;
+    tmp_task_desc_info.op_name = op_name;
+    tmp_task_desc_info.block_dim = block_dim;
+    tmp_task_desc_info.task_type = task_type;
+    tmp_task_desc_info.task_id = task_id;
+    tmp_task_desc_info.stream_id = stream_id;
+    tmp_task_desc_info.shape_type = "dynamic";
+    tmp_task_desc_info.cur_iter_num = iteration_ + 1;
+    task_desc_info.emplace_back(tmp_task_desc_info);
+  }
+
+  return SUCCESS;
+}
+
+Status TaskContext::SaveProfilingGraphDescInfo(uint32_t task_id, uint32_t stream_id) {
+  if (ProfilingManager::Instance().ProfilingModelExecuteOn()) {
+    const NodeItem &node_item = GetNodeItem();
+    auto op_desc = node_item.GetOpDesc();
+    GE_CHECK_NOTNULL(op_desc);
+    const GraphExecutionContext * graph_context = GetExecutionContext();
+    GE_CHECK_NOTNULL(graph_context);
+    const HybridModel *model = graph_context->model;
+    GE_CHECK_NOTNULL(model);
+
+    std::string dynamic_model_name = model->GetModelName();
+    auto op_mode = static_cast<uint32_t>(domi::ImplyType::INVALID);
+    if (AttrUtils::GetInt(op_desc, ATTR_NAME_IMPLY_TYPE, op_mode) &&
+        op_mode == static_cast<uint32_t>(domi::ImplyType::TVM)) {
+      ComputeGraphDescInfo tmp_compute_graph_info;
+      tmp_compute_graph_info.model_name = dynamic_model_name;
+      tmp_compute_graph_info.op_name = op_desc->GetName();
+      tmp_compute_graph_info.op_type = op_desc->GetType();
+      tmp_compute_graph_info.task_id = task_id;
+      tmp_compute_graph_info.stream_id = stream_id;
+      compute_graph_info.emplace_back(tmp_compute_graph_info);
+    }
+  }
+  return SUCCESS;
+}
+
 }  // namespace hybrid
 }  // namespace ge
diff --git a/ge/hybrid/node_executor/task_context.h b/ge/hybrid/node_executor/task_context.h
index 6a4bcb8c..e7ee4fc8 100644
--- a/ge/hybrid/node_executor/task_context.h
+++ b/ge/hybrid/node_executor/task_context.h
@@ -22,6 +22,7 @@
 #include <vector>
 #include "common/properties_manager.h"
 #include "external/ge/ge_api_error_codes.h"
+#include "framework/common/ge_types.h"
 #include "hybrid/common/tensor_value.h"
 #include "hybrid/common/npu_memory_allocator.h"
 #include "hybrid/executor/rt_callback_manager.h"
@@ -108,6 +109,14 @@ class TaskContext {
   void SetForceInferShape(bool force_infer_shape);
   void *handle_ = nullptr;
 
+  const std::vector<TaskDescInfo>& GetProfilingTaskDescInfo() const { return task_desc_info; }
+  Status SaveProfilingTaskDescInfo(uint32_t task_id, uint32_t stream_id, uint32_t task_type, uint32_t block_dim);
+  void ClearProfilingTaskDescInfo() { task_desc_info.clear(); }
+
+  const std::vector<ComputeGraphDescInfo>& GetProfilingGraphDescInfo() const { return compute_graph_info; }
+  Status SaveProfilingGraphDescInfo(uint32_t task_id, uint32_t stream_id);
+  void ClearProfilingGraphDescInfo() { compute_graph_info.clear(); }
+
  private:
   TaskContext(GraphExecutionContext *execution_context,
               const NodeItem *node_item,
@@ -127,6 +136,8 @@ class TaskContext {
   uint64_t iteration_ = 0;
   uint32_t task_id_ = 0;
   uint32_t stream_id_ = 0;
+  std::vector<TaskDescInfo> task_desc_info;
+  std::vector<ComputeGraphDescInfo> compute_graph_info;
 };
 }  // namespace hybrid
 }  // namespace ge
diff --git a/ge/init/gelib.cc b/ge/init/gelib.cc
index b81632bd..1a97b6f8 100755
--- a/ge/init/gelib.cc
+++ b/ge/init/gelib.cc
@@ -37,7 +37,7 @@
 #include "graph/common/ge_call_wrapper.h"
 #include "graph/ge_context.h"
 #include "graph/ge_global_options.h"
-#include "graph/load/new_model_manager/model_manager.h"
+#include "graph/load/model_manager/model_manager.h"
 #include "graph/manager/graph_mem_allocator.h"
 #include "graph/manager/host_mem_manager.h"
 #include "graph/manager/graph_var_manager.h"
diff --git a/ge/offline/proto/task.proto b/ge/offline/proto/task.proto
index d0c09840..0da5631e 100644
--- a/ge/offline/proto/task.proto
+++ b/ge/offline/proto/task.proto
@@ -57,6 +57,7 @@ message TaskDef {
     LabelSetDef label_set = 37;
     LabelGotoExDef label_goto_ex = 38;
     LabelSwitchByIndexDef label_switch_by_index = 39;
+    KernelDefWithHandle kernel_with_handle = 40;
 }
 
 message KernelDef {
@@ -74,6 +75,19 @@ message KernelDef {
     uint32 kernel_ext_info_size = 19;
 }
 
+message KernelDefWithHandle {
+    KernelContext context = 1;
+
+    uint64 handle = 10;
+    string dev_func = 11;
+    uint32 block_dim = 12;
+    uint32 args_size = 13;
+    bytes args = 14;
+    bytes sm_desc = 15;
+    string original_kernel_key = 16;
+    string node_info = 17;
+}
+
 message KernelContext {
     uint32 kernel_type = 1;
     uint32 op_id = 2;                              // OP type in CCE
diff --git a/ge/proto/task.proto b/ge/proto/task.proto
index d0c09840..0da5631e 100644
--- a/ge/proto/task.proto
+++ b/ge/proto/task.proto
@@ -57,6 +57,7 @@ message TaskDef {
     LabelSetDef label_set = 37;
     LabelGotoExDef label_goto_ex = 38;
     LabelSwitchByIndexDef label_switch_by_index = 39;
+    KernelDefWithHandle kernel_with_handle = 40;
 }
 
 message KernelDef {
@@ -74,6 +75,19 @@ message KernelDef {
     uint32 kernel_ext_info_size = 19;
 }
 
+message KernelDefWithHandle {
+    KernelContext context = 1;
+
+    uint64 handle = 10;
+    string dev_func = 11;
+    uint32 block_dim = 12;
+    uint32 args_size = 13;
+    bytes args = 14;
+    bytes sm_desc = 15;
+    string original_kernel_key = 16;
+    string node_info = 17;
+}
+
 message KernelContext {
     uint32 kernel_type = 1;
     uint32 op_id = 2;                              // OP type in CCE
diff --git a/ge/session/inner_session.cc b/ge/session/inner_session.cc
index c4f8a53b..5a67f7cd 100755
--- a/ge/session/inner_session.cc
+++ b/ge/session/inner_session.cc
@@ -29,7 +29,7 @@
 #include "graph/ge_global_options.h"
 #include "graph/ge_local_context.h"
 #include "graph/common/local_context.h"
-#include "graph/load/new_model_manager/model_manager.h"
+#include "graph/load/model_manager/model_manager.h"
 #include "graph/manager/graph_var_manager.h"
 #include "graph/utils/tensor_adapter.h"
 #include "runtime/mem.h"
diff --git a/ge/session/omg.cc b/ge/session/omg.cc
index 37b279a2..47073fc0 100755
--- a/ge/session/omg.cc
+++ b/ge/session/omg.cc
@@ -659,7 +659,7 @@ Status ParseOutNodes(const string &out_nodes) {
 
         auto iter = domi::GetContext().out_nodes_map.find(key_value_v[0]);
         int32_t index = stoi(StringUtils::Trim(key_value_v[1]));
-        GELOGD("Get output info: node[%s] and index[%ld]", key_value_v[0].c_str(), index);
+        GELOGD("Get output info: node[%s] and index[%d]", key_value_v[0].c_str(), index);
         if (iter != domi::GetContext().out_nodes_map.end()) {
           iter->second.emplace_back(index);
         } else {
@@ -1007,7 +1007,7 @@ FMK_FUNC_HOST_VISIBILITY Status ConvertOm(const char *model_file, const char *js
     } else {
       ErrorManager::GetInstance().ATCReportErrMessage("E10003",
           {"parameter", "value", "reason"}, {"om", model_file, "invalid om file"});
-      GELOGE(PARAM_INVALID, "ParseModelContent failed because of invalid om file. Please check --om param.");
+      GELOGE(ACL_ERROR_GE_PARAM_INVALID, "ParseModelContent failed because of invalid om file. Please check --om param.");
     }
 
     if (model.model_data != nullptr) {
diff --git a/ge/session/session_manager.cc b/ge/session/session_manager.cc
index 5d5a299a..3c531747 100755
--- a/ge/session/session_manager.cc
+++ b/ge/session/session_manager.cc
@@ -20,7 +20,7 @@
 #include "common/ge/ge_util.h"
 #include "framework/common/debug/ge_log.h"
 #include "graph/ge_context.h"
-#include "graph/load/new_model_manager/model_manager.h"
+#include "graph/load/model_manager/model_manager.h"
 #include "graph/manager/util/rt_context_util.h"
 
 using std::map;
diff --git a/ge/single_op/single_op.cc b/ge/single_op/single_op.cc
index 1f3fc5c5..4f32bd6b 100755
--- a/ge/single_op/single_op.cc
+++ b/ge/single_op/single_op.cc
@@ -22,11 +22,11 @@
 #include "common/profiling/profiling_manager.h"
 #include "framework/common/debug/ge_log.h"
 #include "framework/common/util.h"
-#include "graph/load/new_model_manager/model_utils.h"
+#include "graph/load/model_manager/model_utils.h"
 #include "runtime/mem.h"
 #include "single_op/single_op_manager.h"
 #include "single_op/task/build_task_utils.h"
-#include "graph/load/new_model_manager/model_manager.h"
+#include "graph/load/model_manager/model_manager.h"
 
 namespace ge {
 namespace {
@@ -57,9 +57,10 @@ Status ProfilingTaskInfo(OpTask *op_task, const string &shape_type) {
   std::vector<TaskDescInfo> task_desc_info;
   uint32_t task_id = 0;
   uint32_t stream_id = 0;
-  if (rtGetTaskIdAndStreamID(&task_id, &stream_id) != RT_ERROR_NONE) {
-    GELOGE(ACL_ERROR_GE_PARAM_INVALID, "Get task_id and stream_id failed.");
-    return ACL_ERROR_GE_PARAM_INVALID;
+  auto rt_ret = rtGetTaskIdAndStreamID(&task_id, &stream_id);
+  if (rt_ret != RT_ERROR_NONE) {
+    GELOGE(rt_ret, "Get task_id and stream_id failed.");
+    return RT_ERROR_TO_GE_STATUS(rt_ret);
   }
 
   TaskDescInfo tmp_task_desc_info;
@@ -70,6 +71,7 @@ Status ProfilingTaskInfo(OpTask *op_task, const string &shape_type) {
   tmp_task_desc_info.stream_id = stream_id;
   tmp_task_desc_info.shape_type = shape_type;
   tmp_task_desc_info.cur_iter_num = 0;
+  tmp_task_desc_info.task_type = op_task->GetTaskType();
   GELOGD("GetTaskDescInfo of op [%s] end, task_id[%u], stream_id[%u]", op_name.c_str(), task_id, stream_id);
   task_desc_info.emplace_back(tmp_task_desc_info);
 
diff --git a/ge/single_op/single_op_manager.cc b/ge/single_op/single_op_manager.cc
index d523d355..ccbdbe3f 100644
--- a/ge/single_op/single_op_manager.cc
+++ b/ge/single_op/single_op_manager.cc
@@ -141,7 +141,7 @@ Status SingleOpManager::GetResourceId(rtStream_t stream, uintptr_t &resource_id)
     auto rt_err = rtCtxGetCurrent(&rt_cur_ctx);
     if (rt_err != RT_ERROR_NONE) {
       GELOGE(rt_err, "get current context failed, runtime result is %d", static_cast<int>(rt_err));
-      return rt_err;
+      return RT_ERROR_TO_GE_STATUS(rt_err);
     }
     // use current context as resource key instead
     GELOGI("use context as resource key instead when default stream");
diff --git a/ge/single_op/single_op_model.cc b/ge/single_op/single_op_model.cc
index 2a1a14e6..7d092091 100755
--- a/ge/single_op/single_op_model.cc
+++ b/ge/single_op/single_op_model.cc
@@ -23,7 +23,7 @@
 
 #include "framework/common/debug/ge_log.h"
 #include "graph/debug/ge_attr_define.h"
-#include "graph/load/new_model_manager/model_utils.h"
+#include "graph/load/model_manager/model_utils.h"
 #include "graph/utils/attr_utils.h"
 #include "graph/utils/graph_utils.h"
 #include "graph/utils/tensor_utils.h"
@@ -438,8 +438,8 @@ Status SingleOpModel::BuildTaskListForDynamicOp(DynamicSingleOp &single_op) {
     auto task_type = static_cast<rtModelTaskType_t>(task_def.type());
     if (task_type == RT_MODEL_TASK_KERNEL) {
       if (single_op.op_task_ != nullptr) {
-        GELOGE(UNSUPPORTED, "Do not support dynamic op with multiple tasks.");
-        return UNSUPPORTED;
+        GELOGE(ACL_ERROR_GE_OP_TASK_TYPE_INVALID, "Do not support dynamic op with multiple tasks.");
+        return ACL_ERROR_GE_OP_TASK_TYPE_INVALID;
       }
       GE_CHK_STATUS_RET_NOLOG(BuildModelTaskKernel(task_def, single_op));
     } else if (task_type == RT_MODEL_TASK_KERNEL_EX) {
diff --git a/ge/single_op/single_op_model.h b/ge/single_op/single_op_model.h
index 6d0109fe..6637271c 100755
--- a/ge/single_op/single_op_model.h
+++ b/ge/single_op/single_op_model.h
@@ -24,7 +24,7 @@
 #include <vector>
 
 #include "common/helper/model_helper.h"
-#include "graph/load/new_model_manager/davinci_model_parser.h"
+#include "graph/load/model_manager/davinci_model_parser.h"
 #include "single_op/single_op.h"
 #include "single_op/stream_resource.h"
 
diff --git a/ge/single_op/task/aicpu_kernel_task_builder.cc b/ge/single_op/task/aicpu_kernel_task_builder.cc
index 2a5f968f..6580ea31 100755
--- a/ge/single_op/task/aicpu_kernel_task_builder.cc
+++ b/ge/single_op/task/aicpu_kernel_task_builder.cc
@@ -16,7 +16,7 @@
 
 #include "single_op/task/aicpu_kernel_task_builder.h"
 #include "framework/common/taskdown_common.h"
-#include "graph/load/new_model_manager/model_manager.h"
+#include "graph/load/model_manager/model_manager.h"
 #include "build_task_utils.h"
 
 namespace ge {
diff --git a/ge/single_op/task/aicpu_task_builder.cc b/ge/single_op/task/aicpu_task_builder.cc
index 1bfbcb3c..a01ee0f0 100755
--- a/ge/single_op/task/aicpu_task_builder.cc
+++ b/ge/single_op/task/aicpu_task_builder.cc
@@ -19,8 +19,8 @@
 #include "single_op/task/build_task_utils.h"
 #include "runtime/mem.h"
 #include "framework/common/debug/ge_log.h"
-#include "graph/load/new_model_manager/model_utils.h"
-#include "graph/load/new_model_manager/model_manager.h"
+#include "graph/load/model_manager/model_utils.h"
+#include "graph/load/model_manager/model_manager.h"
 
 namespace ge {
   AiCpuTaskBuilder::AiCpuTaskBuilder(const OpDescPtr &op_desc, const domi::KernelExDef &kernel_def)
@@ -30,8 +30,8 @@ namespace ge {
     auto sec_ret = memcpy_s(&fwk_op_kernel, sizeof(STR_FWK_OP_KERNEL),
                             kernel_def_.args().data(), kernel_def_.args().size());
     if (sec_ret != EOK) {
-      GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "memcpy failed, ret: %d", sec_ret);
-      return ACL_ERROR_GE_INTERNAL_ERROR;
+      GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, "memcpy failed, ret: %d", sec_ret);
+      return ACL_ERROR_GE_MEMORY_OPERATE_FAILED;
     }
 
     auto io_addr_val = static_cast<uint64_t>(reinterpret_cast<uintptr_t>(io_addr));
@@ -46,7 +46,7 @@ namespace ge {
     auto rt_ret = rtMalloc(&fwk_op_args, sizeof(STR_FWK_OP_KERNEL), RT_MEMORY_HBM);
     if (rt_ret != RT_ERROR_NONE) {
       GELOGE(rt_ret, "malloc arg memory failed, ret = %d", rt_ret);
-      return rt_ret;
+      return RT_ERROR_TO_GE_STATUS(rt_ret);
     }
 
     rt_ret = rtMemcpy(fwk_op_args, sizeof(STR_FWK_OP_KERNEL), &fwk_op_kernel,
@@ -54,7 +54,7 @@ namespace ge {
     if (rt_ret != RT_ERROR_NONE) {
       (void)rtFree(fwk_op_args);
       GELOGE(rt_ret, "copy args failed, ret = %d", rt_ret);
-      return rt_ret;
+      return RT_ERROR_TO_GE_STATUS(rt_ret);
     }
     *args = fwk_op_args;
     return SUCCESS;
@@ -96,7 +96,7 @@ namespace ge {
     // get kernel_ext_info
     auto &kernel_ext_info = kernel_def_.kernel_ext_info();
     auto kernel_ext_info_size = kernel_def_.kernel_ext_info_size();
-    GE_CHK_BOOL_RET_STATUS(kernel_ext_info.size() == kernel_ext_info_size, FAILED,
+    GE_CHK_BOOL_RET_STATUS(kernel_ext_info.size() == kernel_ext_info_size, ACL_ERROR_GE_PARAM_INVALID,
                            "task def kernel_ext_info.size=%zu, but kernel_ext_info_size=%u.",
                            kernel_ext_info.size(), kernel_ext_info_size);
     GE_CHK_STATUS_RET(task.SetExtInfoAndType(kernel_ext_info, kernel_id), "Init ext info failed.");
diff --git a/ge/single_op/task/build_task_utils.cc b/ge/single_op/task/build_task_utils.cc
index 071e514b..9e4d55e1 100644
--- a/ge/single_op/task/build_task_utils.cc
+++ b/ge/single_op/task/build_task_utils.cc
@@ -17,7 +17,7 @@
 #include "single_op/task/build_task_utils.h"
 
 #include "runtime/rt.h"
-#include "graph/load/new_model_manager/model_utils.h"
+#include "graph/load/model_manager/model_utils.h"
 #include "graph/manager/graph_var_manager.h"
 #include "graph/utils/type_utils.h"
 #include "framework/common/debug/ge_log.h"
diff --git a/ge/single_op/task/op_task.cc b/ge/single_op/task/op_task.cc
index cc63e811..3d001d8b 100755
--- a/ge/single_op/task/op_task.cc
+++ b/ge/single_op/task/op_task.cc
@@ -45,7 +45,7 @@ void FreeHbm(void *var) {
 
 Status OpTask::OpenDump(rtStream_t stream) {
   if (DumpManager::GetInstance().GetDumpProperties().IsSingleOpNeedDump()) {
-    GELOGI("Dump is open in single op,start to set dump info");
+    GELOGI("Dump is open in single op, start to set dump info");
     std::vector<uint64_t> input_addrs;
     std::vector<uint64_t> output_adds;
     auto input_size = op_desc_->GetInputsSize();
@@ -54,10 +54,10 @@ Status OpTask::OpenDump(rtStream_t stream) {
     size_t arg_num = 0;
     GetIoAddr(arg_base, arg_num);
     if (arg_num < input_size + output_size) {
-      GELOGE(FAILED, "io_addrs_for_dump_ size %zu is not equal input and output size %zu",
+      GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "io_addrs_for_dump_ size %zu is not equal input and output size %zu",
              arg_num,
              input_size + output_size);
-      return FAILED;
+      return ACL_ERROR_GE_INTERNAL_ERROR;
     }
 
     for (size_t i = 0; i < input_size; i++) {
@@ -120,11 +120,11 @@ Status OpTask::DoUpdateArgTable(const SingleOpModelParam &param, bool keep_works
   size_t arg_num = 0;
   GetIoAddr(arg_base, arg_num);
   if (arg_num < all_addresses.size()) {
-    GELOGE(INTERNAL_ERROR, "[%s] arg number mismatches, expect at least = %zu, but got = %zu",
+    GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "[%s] arg number mismatches, expect at least = %zu, but got = %zu",
            op_desc_->GetName().c_str(),
            all_addresses.size(),
            arg_num);
-    return INTERNAL_ERROR;
+    return ACL_ERROR_GE_INTERNAL_ERROR;
   }
 
   for (void *addr : all_addresses) {
@@ -145,6 +145,8 @@ Status OpTask::LaunchKernel(const vector<GeTensorDesc> &input_desc,
   return UNSUPPORTED;
 }
 
+uint32_t OpTask::GetTaskType() const { return kTaskTypeInvalid; }
+
 TbeOpTask::~TbeOpTask() {
   if (sm_desc_ != nullptr) {
     (void)rtMemFreeManaged(sm_desc_);
@@ -161,6 +163,8 @@ size_t TbeOpTask::GetArgSize() const { return arg_size_; }
 
 const std::string &TbeOpTask::GetStubName() const { return stub_name_; }
 
+uint32_t TbeOpTask::GetTaskType() const { return kTaskTypeAicore; }
+
 Status TbeOpTask::LaunchKernel(rtStream_t stream) {
   GELOGD("To invoke rtKernelLaunch. task = %s, block_dim = %u", this->stub_name_.c_str(), block_dim_);
   auto *sm_desc = reinterpret_cast<rtSmDesc_t *>(sm_desc_);
@@ -174,8 +178,8 @@ Status TbeOpTask::LaunchKernel(rtStream_t stream) {
   }
 
   if (ret != RT_ERROR_NONE) {
-    GELOGE(RT_FAILED, "Invoke rtKernelLaunch failed. ret = %d, task = %s", ret, this->stub_name_.c_str());
-    return RT_FAILED;
+    GELOGE(ret, "Invoke rtKernelLaunch failed. ret = %d, task = %s", ret, this->stub_name_.c_str());
+    return RT_ERROR_TO_GE_STATUS(ret);
   }
   GELOGI("[TASK_INFO] %s", this->stub_name_.c_str());
   auto status = OpenDump(stream);
@@ -195,8 +199,8 @@ Status TbeOpTask::UpdateRunInfo(const vector<GeTensorDesc> &input_desc, const ve
   run_info.block_dim = 0;
   auto ret = optiling::OpParaCalculate(*node_, run_info);
   if (ret != GRAPH_SUCCESS) {
-    GELOGE(FAILED, "Failed to invoke OpParaCalculate. ret = %u", ret);
-    return FAILED;
+    GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "Failed to invoke OpParaCalculate. ret = %u", ret);
+    return ACL_ERROR_GE_INTERNAL_ERROR;
   }
   block_dim_ = run_info.block_dim;
   tiling_data_ = run_info.tiling_data.str();
@@ -219,8 +223,8 @@ Status TbeOpTask::UpdateTensorDesc(const GeTensorDesc &src_tensor, GeTensorDesc
   } else {
     std::vector<int64_t> storage_shape;
     if (!AttrUtils::GetListInt(src_tensor, ge::ATTR_NAME_STORAGE_SHAPE, storage_shape)) {
-      GELOGE(PARAM_INVALID, "Failed to get storage_shape while storage_format was set");
-      return PARAM_INVALID;
+      GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "Failed to get storage_shape while storage_format was set");
+      return ACL_ERROR_GE_INTERNAL_ERROR;
     }
 
     GELOGD("Storage format set. update shape to [%s], and original shape to [%s]",
@@ -269,7 +273,9 @@ Status TbeOpTask::AllocateWorkspaces(const vector<int64_t> &workspace_sizes) {
   std::vector<int64_t> ws_offsets;
   for (auto ws_size : workspace_sizes) {
     // alignment and padding should be done in OpParaCalculate
-    GE_CHK_STATUS_RET_NOLOG(CheckInt64AddOverflow(total_size, ws_size));
+    if (CheckInt64AddOverflow(total_size, ws_size) != SUCCESS) {
+      return ACL_ERROR_GE_INTERNAL_ERROR;
+    }
     ws_offsets.emplace_back(total_size);
     total_size += ws_size;
   }
@@ -317,8 +323,9 @@ Status TbeOpTask::LaunchKernel(const vector<GeTensorDesc> &input_desc,
   }
 
   if (memcpy_s(args_.get(), arg_size_, args.data(), args.size() * sizeof(void *)) != EOK) {
-    GELOGE(INTERNAL_ERROR, "[%s] Failed to update kernel args.", node_->GetName().c_str());
-    return INTERNAL_ERROR;
+    GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, "[%s] Failed to update kernel args.",
+           node_->GetName().c_str());
+    return ACL_ERROR_GE_MEMORY_OPERATE_FAILED;
   }
 
   GELOGD("[%s] Start to invoke rtKernelLaunch", node_->GetName().c_str());
@@ -356,7 +363,7 @@ Status AiCpuBaseTask::SetExtInfoAndType(const std::string &kernel_ext_info, uint
                                                                               num_inputs_,
                                                                               num_outputs_,
                                                                               unknown_type_));
-  GE_CHK_BOOL_RET_STATUS(aicpu_ext_handle_ != nullptr, FAILED, "Malloc aicpu_ext_handle mem failed!");
+  GE_CHK_BOOL_RET_STATUS(aicpu_ext_handle_ != nullptr, ACL_ERROR_GE_MEMORY_ALLOCATION, "Malloc aicpu_ext_handle mem failed!");
 
   Status ret = aicpu_ext_handle_->Parse(kernel_ext_info);
   if (ret != SUCCESS) {
@@ -414,7 +421,7 @@ Status AiCpuBaseTask::UpdateExtInfo(const std::vector<GeTensorDesc> &input_desc,
                         "Input[%zu] update input shape failed.", input_index);
       continue;
     }
-    GE_CHK_BOOL_RET_STATUS(non_const_index < input_desc.size(), PARAM_INVALID,
+    GE_CHK_BOOL_RET_STATUS(non_const_index < input_desc.size(), ACL_ERROR_GE_PARAM_INVALID,
                            "Input_desc size is %zu, but get non_const_index is %zu",
                            input_desc.size(), non_const_index);
     GE_CHK_STATUS_RET(aicpu_ext_handle_->UpdateInputShapeAndType(input_index, input_desc[non_const_index]),
@@ -503,11 +510,11 @@ Status AiCpuBaseTask::UpdateIoAddr(const vector<DataBuffer> &inputs, const vecto
     if (input_index < input_is_const_.size() && input_is_const_[input_index]) {
       // const input no need update addr
       GE_CHECK_NOTNULL(arg_base);
-      GELOGD("AICpuTask input[%zu] addr = %u", input_index, *arg_base);
+      GELOGD("AICpuTask input[%zu] addr = %lu", input_index, *arg_base);
       arg_base++;
       continue;
     }
-    GE_CHK_BOOL_RET_STATUS(non_const_index < inputs.size(), PARAM_INVALID,
+    GE_CHK_BOOL_RET_STATUS(non_const_index < inputs.size(), ACL_ERROR_GE_PARAM_INVALID,
                            "Input size is %zu, but get non_const_index is %zu",
                            inputs.size(), non_const_index);
     auto addr = inputs[non_const_index].data;
@@ -557,15 +564,15 @@ Status AiCpuTask::LaunchKernel(rtStream_t stream) {
                            RT_MEMCPY_HOST_TO_DEVICE_EX,
                            stream);
   if (ret != RT_ERROR_NONE) {
-    GELOGE(RT_FAILED, "rtMemcpyAsync workspace data failed. ret = %d, task = %s", ret, this->op_type_.c_str());
-    return RT_FAILED;
+    GELOGE(ret, "rtMemcpyAsync workspace data failed. ret = %d, task = %s", ret, this->op_type_.c_str());
+    return RT_ERROR_TO_GE_STATUS(ret);
   }
 
   GELOGI("To invoke rtKernelLaunchEx. task = %s", this->op_type_.c_str());
   ret = rtKernelLaunchEx(args_, arg_size_, 0, stream);
   if (ret != RT_ERROR_NONE) {
-    GELOGE(RT_FAILED, "Invoke rtKernelLaunch failed. ret = %d, task = %s", ret, this->op_type_.c_str());
-    return RT_FAILED;
+    GELOGE(ret, "Invoke rtKernelLaunch failed. ret = %d, task = %s", ret, this->op_type_.c_str());
+    return RT_ERROR_TO_GE_STATUS(ret);
   }
   GELOGI("[TASK_INFO] %lu/%s", kernel_id_, op_type_.c_str());
 
@@ -706,7 +713,7 @@ Status AiCpuTask::UpdateShapeAndDataByResultSummary(vector<GeTensorDesc> &output
 
 Status AiCpuTask::InitForSummaryAndCopy() {
   if (unknown_type_ != DEPEND_COMPUTE || num_outputs_ == 0) {
-    GELOGI("Unknown_type is %d, output num is %d.", unknown_type_, num_outputs_);
+    GELOGI("Unknown_type is %d, output num is %zu.", unknown_type_, num_outputs_);
     return SUCCESS;
   }
 
@@ -743,9 +750,9 @@ Status AiCpuTask::InitForSummaryAndCopy() {
 
 Status AiCpuTask::SetMemCopyTask(const domi::KernelExDef &kernel_def) {
   if (kernel_def.args_size() > sizeof(STR_FWK_OP_KERNEL)) {
-    GELOGE(PARAM_INVALID, "sizeof STR_FWK_OP_KERNEL is: %lu, but args_size is: %d",
+    GELOGE(ACL_ERROR_GE_PARAM_INVALID, "sizeof STR_FWK_OP_KERNEL is: %lu, but args_size is: %d",
            sizeof(STR_FWK_OP_KERNEL), kernel_def.args_size());
-    return PARAM_INVALID;
+    return ACL_ERROR_GE_PARAM_INVALID;
   }
   GE_CHK_RT_RET(rtMalloc(&copy_workspace_buf_, kernel_def.task_info_size(), RT_MEMORY_HBM));
   GE_CHK_RT_RET(rtMemcpy(copy_workspace_buf_, kernel_def.task_info_size(),
@@ -755,8 +762,8 @@ Status AiCpuTask::SetMemCopyTask(const domi::KernelExDef &kernel_def) {
   auto sec_ret = memcpy_s(&aicpu_task, sizeof(STR_FWK_OP_KERNEL),
                           kernel_def.args().data(), kernel_def.args().size());
   if (sec_ret != EOK) {
-    GELOGE(FAILED, "memcpy failed, ret: %d", sec_ret);
-    return FAILED;
+    GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, "memcpy failed, ret: %d", sec_ret);
+    return ACL_ERROR_GE_MEMORY_OPERATE_FAILED;
   }
 
   aicpu_task.fwkKernelBase.fwk_kernel.inputOutputAddr = reinterpret_cast<uintptr_t>(copy_ioaddr_dev_);
@@ -802,6 +809,8 @@ Status AiCpuBaseTask::UpdateArgTable(const SingleOpModelParam &param) {
   return DoUpdateArgTable(param, false);
 }
 
+uint32_t AiCpuBaseTask::GetTaskType() const { return kTaskTypeAicpu; }
+
 void AiCpuTask::GetIoAddr(uintptr_t *&arg_base, size_t &arg_count) {
   arg_base = reinterpret_cast<uintptr_t *>(io_addr_host_.data());
   arg_count = io_addr_host_.size();
@@ -838,7 +847,7 @@ Status AiCpuCCTask::LaunchKernel(rtStream_t stream) {
                                        sm_desc, stream, dump_flag_);
   if (ret != RT_ERROR_NONE) {
     GELOGE(ret, "Invoke rtCpuKernelLaunch failed. ret = %d", ret);
-    return ret;
+    return RT_ERROR_TO_GE_STATUS(ret);
   }
   GELOGI("[TASK_INFO] %lu/%s", kernel_id_, op_type_.c_str());
   GELOGD("Invoke rtCpuKernelLaunch succeeded");
diff --git a/ge/single_op/task/op_task.h b/ge/single_op/task/op_task.h
index 2d0740a6..78e1f6f0 100644
--- a/ge/single_op/task/op_task.h
+++ b/ge/single_op/task/op_task.h
@@ -52,6 +52,7 @@ class OpTask {
                               std::vector<GeTensorDesc> &output_desc,
                               std::vector<DataBuffer> &output_buffers,
                               rtStream_t stream);
+  virtual uint32_t GetTaskType() const;
 
  protected:
   Status DoUpdateArgTable(const SingleOpModelParam &param, bool keep_workspace);
@@ -85,6 +86,7 @@ class TbeOpTask : public OpTask {
   size_t GetArgSize() const;
   const std::string &GetStubName() const;
   void EnableDynamicSupport(const NodePtr &node, void *tiling_buffer, size_t max_tiling_size);
+  uint32_t GetTaskType() const override;
 
  private:
   friend class SingleOpModel;
@@ -113,6 +115,8 @@ class AiCpuBaseTask : public OpTask {
   ~AiCpuBaseTask() override;
   UnknowShapeOpType GetUnknownType() const { return unknown_type_; }
   Status UpdateArgTable(const SingleOpModelParam &param) override;
+  uint32_t GetTaskType() const override;
+
  protected:
   Status UpdateIoAddr(const std::vector<DataBuffer> &inputs, const std::vector<DataBuffer> &outputs);
   Status SetInputConst();
diff --git a/ge/single_op/task/tbe_task_builder.cc b/ge/single_op/task/tbe_task_builder.cc
index 594352aa..6eee61d0 100644
--- a/ge/single_op/task/tbe_task_builder.cc
+++ b/ge/single_op/task/tbe_task_builder.cc
@@ -20,7 +20,7 @@
 #include <vector>
 
 #include "graph/debug/ge_attr_define.h"
-#include "graph/load/new_model_manager/model_utils.h"
+#include "graph/load/model_manager/model_utils.h"
 #include "graph/manager/graph_var_manager.h"
 #include "runtime/rt.h"
 #include "single_op/task/build_task_utils.h"
@@ -242,7 +242,7 @@ Status TbeTaskBuilder::SetKernelArgs(TbeOpTask &task, const SingleOpModelParam &
   auto rtRet = rtMemcpy(args.get(), arg_size, kernel_def_.args().data(), arg_size, RT_MEMCPY_HOST_TO_HOST);
   if (rtRet != RT_ERROR_NONE) {
     GELOGE(rtRet, "rtMemcpy args failed, size = %zu, ret = %d", arg_size, static_cast<int>(rtRet));
-    return rtRet;
+    return RT_ERROR_TO_GE_STATUS(rtRet);
   }
 
   const domi::KernelContext &context = kernel_def_.context();
@@ -261,7 +261,7 @@ Status TbeTaskBuilder::SetKernelArgs(TbeOpTask &task, const SingleOpModelParam &
     rtRet = rtMemcpy(args.get() + offset, arg_size - offset, src_addr, src_len, RT_MEMCPY_HOST_TO_HOST);
     if (rtRet != RT_ERROR_NONE) {
       GELOGE(rtRet, "rtMemcpy addresses failed, ret = %d", static_cast<int>(rtRet));
-      return rtRet;
+      return RT_ERROR_TO_GE_STATUS(rtRet);
     }
   }
 
@@ -287,7 +287,7 @@ Status TbeTaskBuilder::BuildTask(TbeOpTask &task, const SingleOpModelParam &para
   auto rtRet = rtGetFunctionByName(stub_name_.c_str(), &stub_func);
   if (rtRet != SUCCESS) {
     GELOGE(rtRet, "rtGetFunctionByName failed.");
-    return rtRet;
+    return RT_ERROR_TO_GE_STATUS(rtRet);
   }
 
   task.SetStubFunc(stub_name_, stub_func);
diff --git a/inc/external/ge/ge_api_error_codes.h b/inc/external/ge/ge_api_error_codes.h
index 3d63aced..e77f817c 100644
--- a/inc/external/ge/ge_api_error_codes.h
+++ b/inc/external/ge/ge_api_error_codes.h
@@ -109,8 +109,13 @@ GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_AIPP_NOT_EXIST, "AIPP parameter not exist.");
 GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_AIPP_MODE_INVALID, "AIPP mode invalid.");
 GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_OP_TASK_TYPE_INVALID, "Task type invalid.");
 GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_OP_KERNEL_TYPE_INVALID, "Kernel type invalid.");
+GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_PLGMGR_PATH_INVALID, "Plugin path is invalid.");
+GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_TRANSSHAPE_FORMAT_INVALID, "Format is invalid when transferring shape.");
+GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID, "Shape is invalid when transferring shape.");
+GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_TRANSSHAPE_DATATYPE_INVALID, "Datatype is invalid when transferring shape.");
 
 GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_MEMORY_ALLOCATION, "Memory allocation error.");
+GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, "Failed to operate memory.");
 
 GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_INTERNAL_ERROR, "Internal error.");
 GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_LOAD_MODEL, "Load model error.");
diff --git a/inc/external/ge/ge_error_codes.h b/inc/external/ge/ge_error_codes.h
index 20a7e0f9..041fc7ae 100644
--- a/inc/external/ge/ge_error_codes.h
+++ b/inc/external/ge/ge_error_codes.h
@@ -38,7 +38,12 @@ static const uint32_t ACL_ERROR_GE_AIPP_NOT_EXIST = 145015;
 static const uint32_t ACL_ERROR_GE_AIPP_MODE_INVALID = 145016;
 static const uint32_t ACL_ERROR_GE_OP_TASK_TYPE_INVALID = 145017;
 static const uint32_t ACL_ERROR_GE_OP_KERNEL_TYPE_INVALID = 145018;
+static const uint32_t ACL_ERROR_GE_PLGMGR_PATH_INVALID = 145019;
+static const uint32_t ACL_ERROR_GE_TRANSSHAPE_FORMAT_INVALID = 145020;
+static const uint32_t ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID = 145021;
+static const uint32_t ACL_ERROR_GE_TRANSSHAPE_DATATYPE_INVALID = 145022;
 static const uint32_t ACL_ERROR_GE_MEMORY_ALLOCATION = 245000;
+static const uint32_t ACL_ERROR_GE_MEMORY_OPERATE_FAILED = 245001;
 static const uint32_t ACL_ERROR_GE_INTERNAL_ERROR = 545000;
 static const uint32_t ACL_ERROR_GE_LOAD_MODEL = 545001;
 static const uint32_t ACL_ERROR_GE_EXEC_LOAD_MODEL_PARTITION_FAILED = 545002;
@@ -49,6 +54,7 @@ static const uint32_t ACL_ERROR_GE_EXEC_RELEASE_MODEL_DATA = 545006;
 static const uint32_t ACL_ERROR_GE_COMMAND_HANDLE = 545007;
 static const uint32_t ACL_ERROR_GE_GET_TENSOR_INFO = 545008;
 static const uint32_t ACL_ERROR_GE_UNLOAD_MODEL = 545009;
+
 #ifdef __cplusplus
 }  // namespace ge
 #endif
diff --git a/inc/framework/common/debug/ge_log.h b/inc/framework/common/debug/ge_log.h
index 4a32af36..07cd1664 100644
--- a/inc/framework/common/debug/ge_log.h
+++ b/inc/framework/common/debug/ge_log.h
@@ -38,75 +38,53 @@ extern "C" {
 enum TraceStatus { TRACE_INIT = 0, TRACE_RUNNING, TRACE_WAITING, TRACE_STOP };
 
 class GeLog {
-public:
+ public:
+  static uint64_t GetTid() {
 #ifdef __GNUC__
-static pid_t GetTid() {
-  thread_local static pid_t tid = syscall(__NR_gettid);
-  return tid;
-}
+    thread_local static uint64_t tid = static_cast<uint64_t>(syscall(__NR_gettid));
 #else
-static int GetTid() {
-  thread_local static int tid = static_cast<int>(GetCurrentThreadId());
-  return tid;
-}
+    thread_local static uint64_t tid = static_cast<uint64_t>(GetCurrentThreadId());
 #endif
+    return tid;
+  }
 };
 
 inline bool IsLogEnable(int module_name, int log_level) {
   int32_t enable = CheckLogLevel(module_name, log_level);
   // 1:enable, 0:disable
-  if (enable == 1) {
-    return true;
-  }
-  return false;
+  return (enable == 1);
 }
 
-#define GELOGE(ERROR_CODE, fmt, ...)                                       \
+#define GELOGE(ERROR_CODE, fmt, ...)                                                                    \
   dlog_error(GE_MODULE_NAME, "%lu %s: ErrorNo: %d(%s) " fmt, GeLog::GetTid(), __FUNCTION__, ERROR_CODE, \
              ((GE_GET_ERRORNO_STR(ERROR_CODE)).c_str()), ##__VA_ARGS__)
-#define GELOGW(fmt, ...) \
-  if (IsLogEnable(GE_MODULE_NAME, DLOG_WARN)) dlog_warn(GE_MODULE_NAME, "%lu %s:" fmt, GeLog::GetTid(), __FUNCTION__, ##__VA_ARGS__)
-#define GELOGI(fmt, ...) \
-  if (IsLogEnable(GE_MODULE_NAME, DLOG_INFO)) dlog_info(GE_MODULE_NAME, "%lu %s:" fmt, GeLog::GetTid(), __FUNCTION__, ##__VA_ARGS__)
-#define GELOGD(fmt, ...) \
-  if (IsLogEnable(GE_MODULE_NAME, DLOG_DEBUG)) dlog_debug(GE_MODULE_NAME, "%lu %s:" fmt, GeLog::GetTid(), __FUNCTION__, ##__VA_ARGS__)
+#define GELOGW(fmt, ...)                      \
+  if (IsLogEnable(GE_MODULE_NAME, DLOG_WARN)) \
+  dlog_warn(GE_MODULE_NAME, "%lu %s:" fmt, GeLog::GetTid(), __FUNCTION__, ##__VA_ARGS__)
+#define GELOGI(fmt, ...)                      \
+  if (IsLogEnable(GE_MODULE_NAME, DLOG_INFO)) \
+  dlog_info(GE_MODULE_NAME, "%lu %s:" fmt, GeLog::GetTid(), __FUNCTION__, ##__VA_ARGS__)
+#define GELOGD(fmt, ...)                       \
+  if (IsLogEnable(GE_MODULE_NAME, DLOG_DEBUG)) \
+  dlog_debug(GE_MODULE_NAME, "%lu %s:" fmt, GeLog::GetTid(), __FUNCTION__, ##__VA_ARGS__)
+
 #define GEEVENT(fmt, ...) dlog_event(GE_MODULE_NAME, "%lu %s:" fmt, GeLog::GetTid(), __FUNCTION__, ##__VA_ARGS__)
-#define GELOGO(fmt, ...) \
-  Dlog(GE_MODULE_NAME, DLOG_OPLOG, "%lu %s:" fmt, GeLog::GetTid(), __FUNCTION__, ##__VA_ARGS__)
-#define GELOGT(VALUE, fmt, ...)                                                                       \
-  do {                                                                                                                \
-    TraceStatus stat = VALUE;                                                                                         \
-    const char *const TraceStatStr[] = {"INIT", "RUNNING", "WAITING", "STOP"};                                        \
-    int idx = static_cast<int>(stat);                                                                                 \
-    char *k = const_cast<char *>("status");                                                                           \
-    char *v = const_cast<char *>(TraceStatStr[idx]);                                                                  \
-    KeyValue kv = {k, v};                                                                                             \
-    DlogWithKV(static_cast<int>(GE_MODULE_NAME), DLOG_TRACE, &kv, 1, "%lu %s:" fmt, GeLog::GetTid(), __FUNCTION__, ##__VA_ARGS__); \
+
+#define GELOGT(VALUE, fmt, ...)                                                                                    \
+  do {                                                                                                             \
+    TraceStatus stat = VALUE;                                                                                      \
+    const char *const TraceStatStr[] = {"INIT", "RUNNING", "WAITING", "STOP"};                                     \
+    int idx = static_cast<int>(stat);                                                                              \
+    char *k = const_cast<char *>("status");                                                                        \
+    char *v = const_cast<char *>(TraceStatStr[idx]);                                                               \
+    KeyValue kv = {k, v};                                                                                          \
+    DlogWithKV(static_cast<int>(GE_MODULE_NAME), DLOG_TRACE, &kv, 1, "%lu %s:" fmt, GeLog::GetTid(), __FUNCTION__, \
+               ##__VA_ARGS__);                                                                                     \
   } while (0)
 
-#define GE_LOG_ERROR(MOD_NAME, ERROR_CODE, fmt, ...)                                       \
+#define GE_LOG_ERROR(MOD_NAME, ERROR_CODE, fmt, ...)                                              \
   dlog_error(MOD_NAME, "%lu %s: ErrorNo: %d(%s) " fmt, GeLog::GetTid(), __FUNCTION__, ERROR_CODE, \
              ((GE_GET_ERRORNO_STR(ERROR_CODE)).c_str()), ##__VA_ARGS__)
-#define GE_LOG_WARN(MOD_NAME, fmt, ...) \
-  if (IsLogEnable(MOD_NAME, DLOG_WARN)) dlog_warn(MOD_NAME, "%lu %s:" fmt, GeLog::GetTid(), __FUNCTION__, ##__VA_ARGS__)
-#define GE_LOG_INFO(MOD_NAME, fmt, ...) \
-  if (IsLogEnable(MOD_NAME, DLOG_INFO)) dlog_info(MOD_NAME, "%lu %s:" fmt, GeLog::GetTid(), __FUNCTION__, ##__VA_ARGS__)
-#define GE_LOG_DEBUG(MOD_NAME, fmt, ...) \
-  if (IsLogEnable(MOD_NAME, DLOG_DEBUG)) dlog_debug(MOD_NAME, "%lu %s:" fmt, GeLog::GetTid(), __FUNCTION__, ##__VA_ARGS__)
-#define GE_LOG_EVENT(MOD_NAME, fmt, ...) dlog_event(MOD_NAME, "%lu %s:" fmt, GeLog::GetTid(), __FUNCTION__, ##__VA_ARGS__)
-#define GE_LOG_OPLOG(MOD_NAME, fmt, ...) \
-  Dlog(MOD_NAME, DLOG_OPLOG, "%lu %s:" fmt, GeLog::GetTid(), __FUNCTION__, ##__VA_ARGS__)
-
-#define GE_LOG_TRACE(MOD_NAME, value, fmt, ...)                                                                       \
-  do {                                                                                                                \
-    TraceStatus stat = value;                                                                                         \
-    const char *const TraceStatStr[] = {"INIT", "RUNNING", "WAITING", "STOP"};                                        \
-    int idx = static_cast<int>(stat);                                                                                 \
-    char *k = const_cast<char *>("status");                                                                           \
-    char *v = const_cast<char *>(TraceStatStr[idx]);                                                                  \
-    KeyValue kv = {k, v};                                                                                             \
-    DlogWithKV(static_cast<int>(MOD_NAME), DLOG_TRACE, &kv, 1, "%lu %s:" fmt, GeLog::GetTid(), __FUNCTION__, ##__VA_ARGS__); \
-  } while (0)
 
 // print memory when it is greater than 1KB.
 #define GE_PRINT_DYNAMIC_MEMORY(FUNC, PURPOSE, SIZE)                                                        \
diff --git a/inc/framework/common/debug/log.h b/inc/framework/common/debug/log.h
index 72dba126..31281cd6 100644
--- a/inc/framework/common/debug/log.h
+++ b/inc/framework/common/debug/log.h
@@ -261,6 +261,12 @@
     ErrorManager::GetInstance().ATCReportErrMessage("E19021", {"reason"}, {errormsg}); \
   }
 
+#define GE_WARNINGLOG_AND_ERRORMSG(errormsg)                                           \
+  {                                                                                    \
+    GELOGW("%s", errormsg);                                                            \
+    ErrorManager::GetInstance().ATCReportErrMessage("E19021", {"reason"}, {errormsg}); \
+  }
+
 #define GE_CHK_LOG_AND_ERRORMSG(expr, _status, errormsg)                                 \
   do {                                                                                   \
     bool b = (expr);                                                                     \
diff --git a/inc/framework/common/ge_types.h b/inc/framework/common/ge_types.h
index f7e6d679..9ca77f1c 100644
--- a/inc/framework/common/ge_types.h
+++ b/inc/framework/common/ge_types.h
@@ -41,12 +41,7 @@ enum FrameworkType {
 };
 
 const std::map<std::string, std::string> kFwkTypeToStr = {
-    {"0", "Caffe"},
-    {"1", "MindSpore"},
-    {"3", "TensorFlow"},
-    {"4", "Android_NN"},
-    {"5", "Onnx"}
-};
+  {"0", "Caffe"}, {"1", "MindSpore"}, {"3", "TensorFlow"}, {"4", "Android_NN"}, {"5", "Onnx"}};
 
 enum OpEngineType {
   ENGINE_SYS = 0,  // default engine
@@ -61,6 +56,11 @@ enum InputAippType { DATA_WITHOUT_AIPP = 0, DATA_WITH_STATIC_AIPP, DATA_WITH_DYN
 const char *const GE_ENGINE_ATTR_MEM_TYPE_HBM = "HBM";
 const char *const GE_OPTION_EXEC_PLACEMENT = "ge.exec.placement";
 
+// profiling data
+const uint32_t kTaskTypeAicore = 0;
+const uint32_t kTaskTypeAicpu = 1;
+const uint32_t kTaskTypeInvalid = 0xFFFF;
+
 // Data cache, including data address and length
 struct DataBuffer {
  public:
@@ -256,6 +256,7 @@ struct TaskDescInfo {
   uint32_t stream_id;
   std::string shape_type;
   int64_t cur_iter_num;
+  uint32_t task_type;
 };
 
 // Profiling info of graph
diff --git a/inc/framework/common/types.h b/inc/framework/common/types.h
index 4d4c54d1..2dbb1753 100644
--- a/inc/framework/common/types.h
+++ b/inc/framework/common/types.h
@@ -437,6 +437,7 @@ REGISTER_OPTYPE_DECLARE(HCOMRECEIVE, "HcomReceive");
 REGISTER_OPTYPE_DECLARE(HCOMREMOTEREAD, "HcomRemoteRead");
 REGISTER_OPTYPE_DECLARE(HCOMREMOTEREFREAD, "HcomRemoteRefRead");
 REGISTER_OPTYPE_DECLARE(HCOMREMOTEWRITE, "HcomRemoteWrite");
+REGISTER_OPTYPE_DECLARE(HCOMREMOTESCATTERWRITE, "HcomRemoteScatterWrite");
 
 REGISTER_OPTYPE_DECLARE(VARASSIGN, "VarAssign");
 REGISTER_OPTYPE_DECLARE(VARISINITIALIZEDOP, "VarIsInitializedOp");
diff --git a/inc/framework/omg/parser/parser_types.h b/inc/framework/omg/parser/parser_types.h
index 62c9c750..f3b7f00a 100644
--- a/inc/framework/omg/parser/parser_types.h
+++ b/inc/framework/omg/parser/parser_types.h
@@ -238,8 +238,8 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *SOFTSIGN;
 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *COSH;
 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *SINH;
 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *SQUAREDDIFFERENCE;
-FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char
-    *REQUIREDSPACETOBATCHPADDINGS;  // for retinanet scope fusion
+// for retinanet scope fusion
+FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *REQUIREDSPACETOBATCHPADDINGS;
 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *SSDPOSTPROCESSOR;
 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *RETINANETBOXES;
 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *RETINAMULTIANCHORS;
@@ -370,7 +370,9 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *HCOMREDUCESC
 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *HCOMSEND;
 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *HCOMRECEIVE;
 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *HCOMREMOTEREAD;
+FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *HCOMREMOTEREFREAD;
 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *HCOMREMOTEWRITE;
+FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *HCOMREMOTESCATTERWRITE;
 
 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *VARASSIGN;
 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *VARISINITIALIZEDOP;
diff --git a/metadef b/metadef
index fcd0833c..8ab60be2 160000
--- a/metadef
+++ b/metadef
@@ -1 +1 @@
-Subproject commit fcd0833cffcd201701f71d17db0c696c1bb01715
+Subproject commit 8ab60be2870b80b1ec952bb21c7f05ae2a624984
diff --git a/parser b/parser
index 1601d66b..98f17f4a 160000
--- a/parser
+++ b/parser
@@ -1 +1 @@
-Subproject commit 1601d66b6187c83cbf38e762beb5538ce2c7c573
+Subproject commit 98f17f4a2a37f283797858eabefa9dba1d06a66b
diff --git a/tests/depends/omg/src/omg_stub.cc b/tests/depends/omg/src/omg_stub.cc
index a6221570..13ddf8bb 100644
--- a/tests/depends/omg/src/omg_stub.cc
+++ b/tests/depends/omg/src/omg_stub.cc
@@ -315,7 +315,7 @@ long GetFileLength(const std::string &input_file) {
   GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(mmGetFileSize(input_file.c_str(), &file_length) != EN_OK, return -1,
                                  "open file failed.");
   GE_CHK_BOOL_TRUE_EXEC_WITH_LOG((file_length <= 0), return -1, "file length <= 0, not valid.");
-  GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(file_length > MAX_FILE_SIZE_LIMIT, return -1, "file size %ld is out of limit: %d.",
+  GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(file_length > MAX_FILE_SIZE_LIMIT, return -1, "file size %llu is out of limit: %d.",
                                  file_length, MAX_FILE_SIZE_LIMIT);
   return file_length;
 }
diff --git a/tests/ut/ge/CMakeLists.txt b/tests/ut/ge/CMakeLists.txt
index 91a6620d..a1ec8248 100755
--- a/tests/ut/ge/CMakeLists.txt
+++ b/tests/ut/ge/CMakeLists.txt
@@ -132,7 +132,7 @@ set(COMMON_SRC_FILES
     "${GE_CODE_DIR}/ge/opskernel_manager/ops_kernel_manager.cc"
     "${GE_CODE_DIR}/ge/session/session_manager.cc"
     "${GE_CODE_DIR}/ge/opskernel_manager/ops_kernel_builder_manager.cc"
-    "${GE_CODE_DIR}/ge/graph/load/new_model_manager/model_manager.cc"
+    "${GE_CODE_DIR}/ge/graph/load/model_manager/model_manager.cc"
     "${GE_CODE_DIR}/ge/common/profiling/profiling_manager.cc"
     "${GE_CODE_DIR}/ge/graph/manager/host_mem_manager.cc"
     "${GE_CODE_DIR}/ge/session/inner_session.cc"
@@ -140,15 +140,15 @@ set(COMMON_SRC_FILES
     "${GE_CODE_DIR}/ge/graph/execute/graph_execute.cc"
     "${GE_CODE_DIR}/ge/graph/preprocess/graph_preprocess.cc"
     "${GE_CODE_DIR}/ge/hybrid/hybrid_davinci_model_stub.cc"
-    "${GE_CODE_DIR}/ge/graph/load/new_model_manager/davinci_model.cc"
-    "${GE_CODE_DIR}/ge/graph/load/new_model_manager/data_inputer.cc"
+    "${GE_CODE_DIR}/ge/graph/load/model_manager/davinci_model.cc"
+    "${GE_CODE_DIR}/ge/graph/load/model_manager/data_inputer.cc"
     "${GE_CODE_DIR}/ge/common/dump/dump_properties.cc"
     "${GE_CODE_DIR}/ge/common/helper/model_helper.cc"
     "${GE_CODE_DIR}/ge/common/dump/dump_manager.cc"
     "${GE_CODE_DIR}/ge/common/helper/om_file_helper.cc"
     "${GE_CODE_DIR}/ge/model/ge_root_model.cc"
     "${GE_CODE_DIR}/ge/common/model_parser/base.cc"
-    "${GE_CODE_DIR}/ge/graph/load/new_model_manager/data_dumper.cc"
+    "${GE_CODE_DIR}/ge/graph/load/model_manager/data_dumper.cc"
     "${GE_CODE_DIR}/ge/graph/manager/graph_manager.cc"
     "${GE_CODE_DIR}/ge/common/dump/dump_server.cc"
     "${GE_CODE_DIR}/ge/graph/preprocess/insert_op/util_insert_aipp_op.cc"
@@ -254,13 +254,13 @@ set(COMMON_SRC_FILES
 	"${GE_CODE_DIR}/ge/graph/passes/useless_control_out_remove_pass.cc"
     "${GE_CODE_DIR}/ge/model/ge_model.cc"
     "${GE_CODE_DIR}/ge/common/cust_aicpu_kernel_store.cc"
-    "${GE_CODE_DIR}/ge/graph/load/new_model_manager/model_utils.cc"
-    "${GE_CODE_DIR}/ge/graph/load/new_model_manager/zero_copy_offset.cc"
-    "${GE_CODE_DIR}/ge/graph/load/new_model_manager/zero_copy_task.cc"
-    "${GE_CODE_DIR}/ge/graph/load/new_model_manager/cpu_queue_schedule.cc"
-    "${GE_CODE_DIR}/ge/graph/load/new_model_manager/aipp_utils.cc"
+    "${GE_CODE_DIR}/ge/graph/load/model_manager/model_utils.cc"
+    "${GE_CODE_DIR}/ge/graph/load/model_manager/zero_copy_offset.cc"
+    "${GE_CODE_DIR}/ge/graph/load/model_manager/zero_copy_task.cc"
+    "${GE_CODE_DIR}/ge/graph/load/model_manager/cpu_queue_schedule.cc"
+    "${GE_CODE_DIR}/ge/graph/load/model_manager/aipp_utils.cc"
     "${GE_CODE_DIR}/ge/omm/csa_interact.cc"
-    "${GE_CODE_DIR}/ge/graph/load/new_model_manager/tbe_handle_store.cc"
+    "${GE_CODE_DIR}/ge/graph/load/model_manager/tbe_handle_store.cc"
     "${GE_CODE_DIR}/ge/common/kernel_store.cc"
     "${GE_CODE_DIR}/ge/common/tbe_kernel_store.cc"
     "${GE_CODE_DIR}/ge/common/auth/file_saver.cc"
@@ -386,32 +386,32 @@ set(DISTINCT_GRAPH_LOAD_SRC_FILES
     "${GE_CODE_DIR}/ge/common/model_parser/base.cc"
     "${GE_CODE_DIR}/ge/common/tbe_kernel_store.cc"
     "${GE_CODE_DIR}/ge/common/util.cc"
-    "${GE_CODE_DIR}/ge/graph/load/new_model_manager/cpu_queue_schedule.cc"
-    "${GE_CODE_DIR}/ge/graph/load/new_model_manager/data_dumper.cc"
-    "${GE_CODE_DIR}/ge/graph/load/new_model_manager/data_inputer.cc"
-    "${GE_CODE_DIR}/ge/graph/load/new_model_manager/davinci_model.cc"
-    "${GE_CODE_DIR}/ge/graph/load/new_model_manager/davinci_model_parser.cc"
-    "${GE_CODE_DIR}/ge/graph/load/new_model_manager/model_manager.cc"
-    "${GE_CODE_DIR}/ge/graph/load/new_model_manager/model_utils.cc"
-    "${GE_CODE_DIR}/ge/graph/load/new_model_manager/tbe_handle_store.cc"
-    "${GE_CODE_DIR}/ge/graph/load/new_model_manager/task_info/task_info.cc"
-    "${GE_CODE_DIR}/ge/graph/load/new_model_manager/task_info/event_record_task_info.cc"
-    "${GE_CODE_DIR}/ge/graph/load/new_model_manager/task_info/event_wait_task_info.cc"
-    "${GE_CODE_DIR}/ge/graph/load/new_model_manager/task_info/fusion_start_task_info.cc"
-    "${GE_CODE_DIR}/ge/graph/load/new_model_manager/task_info/fusion_stop_task_info.cc"
-    "${GE_CODE_DIR}/ge/graph/load/new_model_manager/task_info/hccl_task_info.cc"
-    "${GE_CODE_DIR}/ge/graph/load/new_model_manager/task_info/kernel_ex_task_info.cc"
-    "${GE_CODE_DIR}/ge/graph/load/new_model_manager/task_info/kernel_task_info.cc"
-    "${GE_CODE_DIR}/ge/graph/load/new_model_manager/task_info/label_set_task_info.cc"
-    "${GE_CODE_DIR}/ge/graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.cc"
-    "${GE_CODE_DIR}/ge/graph/load/new_model_manager/task_info/memcpy_async_task_info.cc"
-    "${GE_CODE_DIR}/ge/graph/load/new_model_manager/task_info/profiler_trace_task_info.cc"
-    "${GE_CODE_DIR}/ge/graph/load/new_model_manager/task_info/stream_active_task_info.cc"
-    "${GE_CODE_DIR}/ge/graph/load/new_model_manager/task_info/stream_switch_task_info.cc"
-    "${GE_CODE_DIR}/ge/graph/load/new_model_manager/task_info/end_graph_task_info.cc"
-    "${GE_CODE_DIR}/ge/graph/load/new_model_manager/task_info/model_exit_task_info.cc"
-    "${GE_CODE_DIR}/ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc"
-    "${GE_CODE_DIR}/ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc"
+    "${GE_CODE_DIR}/ge/graph/load/model_manager/cpu_queue_schedule.cc"
+    "${GE_CODE_DIR}/ge/graph/load/model_manager/data_dumper.cc"
+    "${GE_CODE_DIR}/ge/graph/load/model_manager/data_inputer.cc"
+    "${GE_CODE_DIR}/ge/graph/load/model_manager/davinci_model.cc"
+    "${GE_CODE_DIR}/ge/graph/load/model_manager/davinci_model_parser.cc"
+    "${GE_CODE_DIR}/ge/graph/load/model_manager/model_manager.cc"
+    "${GE_CODE_DIR}/ge/graph/load/model_manager/model_utils.cc"
+    "${GE_CODE_DIR}/ge/graph/load/model_manager/tbe_handle_store.cc"
+    "${GE_CODE_DIR}/ge/graph/load/model_manager/task_info/task_info.cc"
+    "${GE_CODE_DIR}/ge/graph/load/model_manager/task_info/event_record_task_info.cc"
+    "${GE_CODE_DIR}/ge/graph/load/model_manager/task_info/event_wait_task_info.cc"
+    "${GE_CODE_DIR}/ge/graph/load/model_manager/task_info/fusion_start_task_info.cc"
+    "${GE_CODE_DIR}/ge/graph/load/model_manager/task_info/fusion_stop_task_info.cc"
+    "${GE_CODE_DIR}/ge/graph/load/model_manager/task_info/hccl_task_info.cc"
+    "${GE_CODE_DIR}/ge/graph/load/model_manager/task_info/kernel_ex_task_info.cc"
+    "${GE_CODE_DIR}/ge/graph/load/model_manager/task_info/kernel_task_info.cc"
+    "${GE_CODE_DIR}/ge/graph/load/model_manager/task_info/label_set_task_info.cc"
+    "${GE_CODE_DIR}/ge/graph/load/model_manager/task_info/memcpy_addr_async_task_info.cc"
+    "${GE_CODE_DIR}/ge/graph/load/model_manager/task_info/memcpy_async_task_info.cc"
+    "${GE_CODE_DIR}/ge/graph/load/model_manager/task_info/profiler_trace_task_info.cc"
+    "${GE_CODE_DIR}/ge/graph/load/model_manager/task_info/stream_active_task_info.cc"
+    "${GE_CODE_DIR}/ge/graph/load/model_manager/task_info/stream_switch_task_info.cc"
+    "${GE_CODE_DIR}/ge/graph/load/model_manager/task_info/end_graph_task_info.cc"
+    "${GE_CODE_DIR}/ge/graph/load/model_manager/task_info/model_exit_task_info.cc"
+    "${GE_CODE_DIR}/ge/graph/load/model_manager/task_info/super_kernel/super_kernel.cc"
+    "${GE_CODE_DIR}/ge/graph/load/model_manager/task_info/super_kernel/super_kernel_factory.cc"
     "${GE_CODE_DIR}/ge/model/ge_model.cc"
     "${GE_CODE_DIR}/ge/common/helper/om_file_helper.cc"
     "${GE_CODE_DIR}/ge/common/debug/memory_dumper.cc"
@@ -573,7 +573,7 @@ set(DISTINCT_GRAPH_LOAD_TEST_FILES
     "graph/load/data_dumper_unittest.cc"
     #"graph/load/new_model_manager_data_inputer_unittest.cc"
     #"graph/load/new_model_manager_davinci_model_unittest.cc"
-    #"graph/load/new_model_manager_model_manager_unittest.cc"
+    "graph/load/new_model_manager_model_manager_unittest.cc"
     #"graph/load/new_model_manager_task_build_unittest.cc"
 	"graph/load/new_model_manager_model_manager_aicpu_unittest.cc"
     "graph/load/end_graph_task_unittest.cc"
@@ -589,6 +589,7 @@ set(DISTINCT_GRAPH_LOAD_TEST_FILES
     #"graph/graph_load_unittest.cc"
     "graph/ge_executor_unittest.cc"
     "graph/load/model_helper_unittest.cc"
+    "graph/load/model_utils_unittest.cc"
 )
 
 set(PASS_TEST_FILES
@@ -682,7 +683,7 @@ set(MULTI_PARTS_TEST_FILES
     "common/format_transfer_nchw_fractalz_unittest.cc"
     "common/format_transfer_hwcn_fractalz_unittest.cc"
     "common/format_transfer_nhwc_fractalz_unittest.cc"
-    #"common/format_transfer_fractal_nz_unittest.cc"
+    "common/format_transfer_fractal_nz_unittest.cc"
     "common/format_transfer_fractal_zz_unittest.cc"
     "common/format_transfer_nhwc_5d_unittest.cc"
     "common/format_transfer_5d_nchw_unittest.cc"
@@ -696,6 +697,7 @@ set(MULTI_PARTS_TEST_FILES
     "graph/variable_accelerate_ctrl_unittest.cc"
     "graph/build/logical_stream_allocator_unittest.cc"
     "graph/build/mem_assigner_unittest.cc"
+    "graph/preprocess/graph_preprocess_unittest.cc"
     "session/omg_omg_unittest.cc"
 )
 
diff --git a/tests/ut/ge/common/format_transfer_5d_nhwc_unittest.cc b/tests/ut/ge/common/format_transfer_5d_nhwc_unittest.cc
index 6e5158df..b0a39396 100644
--- a/tests/ut/ge/common/format_transfer_5d_nhwc_unittest.cc
+++ b/tests/ut/ge/common/format_transfer_5d_nhwc_unittest.cc
@@ -679,7 +679,7 @@ TEST_F(UtestFormatTransfer5dNhwc, nc1hwc0_to_nhwc_float2) {
   }
   Status status =
       transfer.TransShape(args.src_format, args.src_shape, args.src_data_type, args.dst_format, args.dst_shape);
-  EXPECT_EQ(status, UNSUPPORTED);
+  EXPECT_EQ(status, ACL_ERROR_GE_TRANSSHAPE_FORMAT_INVALID);
 }
 
 TEST_F(UtestFormatTransfer5dNhwc, invalid_src_format) {
diff --git a/tests/ut/ge/common/format_transfer_c1hwncoc0_hwcn_unittest.cc b/tests/ut/ge/common/format_transfer_c1hwncoc0_hwcn_unittest.cc
index e809cf1b..3f195ef2 100644
--- a/tests/ut/ge/common/format_transfer_c1hwncoc0_hwcn_unittest.cc
+++ b/tests/ut/ge/common/format_transfer_c1hwncoc0_hwcn_unittest.cc
@@ -158,7 +158,7 @@ TEST_F(UtestFormatTransferC1hwncoc0Hwcn, sixd_to_hwcn_fp16_success_lt_cube) {
   }
   Status status =
       transfer.TransShape(args.src_format, args.src_shape, args.src_data_type, args.dst_format, args.dst_shape);
-  EXPECT_EQ(status, UNSUPPORTED);
+  EXPECT_EQ(status, ACL_ERROR_GE_TRANSSHAPE_FORMAT_INVALID);
 }
 
 TEST_F(UtestFormatTransferC1hwncoc0Hwcn, sixd_to_hwcn_gp16_success_eq_cube) {
diff --git a/tests/ut/ge/common/format_transfer_fractal_nz_unittest.cc b/tests/ut/ge/common/format_transfer_fractal_nz_unittest.cc
index fe3dd452..70c07d45 100644
--- a/tests/ut/ge/common/format_transfer_fractal_nz_unittest.cc
+++ b/tests/ut/ge/common/format_transfer_fractal_nz_unittest.cc
@@ -249,8 +249,7 @@ TEST_F(UtestFormatTransferNdFractNz, nd_shape1_uint8_3) {
 }
 */
 
-
-TEST_F(UtestFormatTransferNdFractNz, nd_shape2_uint8_1) {
+/*TEST_F(UtestFormatTransferNdFractNz, nd_shape2_uint8_1) {
   uint8_t data[32 * 32] = {
       47,  78,  47,  180, 246, 76,  157, 127, 63,  0,   168, 23,  148, 198, 180, 190, 43,  187, 76,  67,  77,  246, 11,
       149, 240, 236, 136, 123, 51,  95,  7,   163, 163, 64,  157, 230, 247, 122, 67,  106, 150, 20,  231, 118, 43,  208,
@@ -2157,7 +2156,7 @@ TEST_F(UtestFormatTransferNdFractNz, nd_shape3_fp16) {
   for (int i = 0; i < sizeof(data) / sizeof(data[0]); ++i) {
     EXPECT_EQ((reinterpret_cast<uint16_t *>(result2.data.get()))[i], data[i]);
   }
-}
+}*/
 
 TEST_F(UtestFormatTransferNdFractNz, nd_shape4_fp16) {
   uint16_t data[2 * 2 * 17 * 4] = {
@@ -2333,7 +2332,7 @@ TEST_F(UtestFormatTransferNdFractNz, nd_shape4_fp16) {
   }
   EXPECT_EQ(
       transfer2.TransShape(args2.src_format, args2.src_shape, args2.src_data_type, args2.dst_format, args2.dst_shape),
-      UNSUPPORTED);
+                           ACL_ERROR_GE_TRANSSHAPE_FORMAT_INVALID);
 }
 
 TEST_F(UtestFormatTransferNdFractNz, nd_shape5_fp16) {
@@ -4785,6 +4784,8 @@ TEST_F(UtestFormatTransferNdFractNz, nd_shape4_fp32) {
   for (int i = 0; i < sizeof(data) / sizeof(data[0]); ++i) {
     EXPECT_EQ((reinterpret_cast<float *>(result2.data.get()))[i], data[i]);
   }
+  EXPECT_EQ(transfer2.TransShape(args.src_format, args.src_shape, args.src_data_type, args.dst_format, args.dst_shape),
+            ACL_ERROR_GE_TRANSSHAPE_FORMAT_INVALID);
 }
 
 TEST_F(UtestFormatTransferNdFractNz, nchw_shape4_fp32) {
@@ -9059,7 +9060,7 @@ TEST_F(UtestFormatTransferNdFractNz, invalid_src_shape) {
   FormatTransferFractalNz transfer;
   EXPECT_EQ(transfer.TransFormat(args, result), PARAM_INVALID);
   EXPECT_EQ(transfer.TransShape(args.src_format, args.src_shape, args.src_data_type, args.dst_format, args.dst_shape),
-            PARAM_INVALID);
+            ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID);
 }
 
 TEST_F(UtestFormatTransferNdFractNz, invalid_src_data_type) {
@@ -9079,7 +9080,7 @@ TEST_F(UtestFormatTransferNdFractNz, invalid_src_data_type) {
   FormatTransferFractalNz transfer;
   EXPECT_EQ(transfer.TransFormat(args, result), PARAM_INVALID);
   EXPECT_EQ(transfer.TransShape(args.src_format, args.src_shape, args.src_data_type, args.dst_format, args.dst_shape),
-            PARAM_INVALID);
+            ACL_ERROR_GE_TRANSSHAPE_DATATYPE_INVALID);
 }
 
 TEST_F(UtestFormatTransferNdFractNz, invalid_src_format) {
@@ -9094,8 +9095,7 @@ TEST_F(UtestFormatTransferNdFractNz, invalid_src_format) {
   FormatTransferFractalNz transfer;
   EXPECT_EQ(transfer.TransFormat(args, result), PARAM_INVALID);
   EXPECT_EQ(transfer.TransShape(args.src_format, args.src_shape, args.src_data_type, args.dst_format, args.dst_shape),
-            PARAM_INVALID);
-  EXPECT_EQ(TransFormat(args, result), UNSUPPORTED);
+            ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID);
 }
 
 TEST_F(UtestFormatTransferNdFractNz, invalid_dst_shape) {
@@ -9136,6 +9136,24 @@ TEST_F(UtestFormatTransferNdFractNz, invalid_src_data_type2) {
   EXPECT_EQ(transfer.TransFormat(args, result), PARAM_INVALID);
 }
 
+TEST_F(UtestFormatTransferNdFractNz, invalid_src_data_type3) {
+  uint16_t data[1 * 1 * 1 * 16 * 16] = {0};
+  TransArgs args{reinterpret_cast<uint8_t *>(data),
+                 FORMAT_FRACTAL_NZ,
+                 FORMAT_NHWC,
+                 {1, 1, 1, 16, 16},
+                 {
+                     1,
+                     1,
+                     4,
+                     4,
+                 },
+                 DT_VARIANT};
+  TransResult result;
+  FormatTransferFractalNzND transfer;
+  EXPECT_EQ(transfer.TransFormat(args, result), PARAM_INVALID);
+}
+
 TEST_F(UtestFormatTransferNdFractNz, invalid_dst_format2) {
   uint16_t data[1 * 1 * 1 * 1 * 16 * 16] = {0};
   TransArgs args{reinterpret_cast<uint8_t *>(data),
diff --git a/tests/ut/ge/common/format_transfer_fractal_zz_unittest.cc b/tests/ut/ge/common/format_transfer_fractal_zz_unittest.cc
index 6278b958..8b1afa24 100644
--- a/tests/ut/ge/common/format_transfer_fractal_zz_unittest.cc
+++ b/tests/ut/ge/common/format_transfer_fractal_zz_unittest.cc
@@ -1894,7 +1894,7 @@ TEST_F(UtestFormatTransferNdFractZz, nd_shape4_fp16_1) {
   }
   EXPECT_EQ(
       transfer2.TransShape(args2.src_format, args2.src_shape, args2.src_data_type, args2.dst_format, args2.dst_shape),
-      UNSUPPORTED);
+                           ACL_ERROR_GE_TRANSSHAPE_FORMAT_INVALID);
 }
 
 TEST_F(UtestFormatTransferNdFractZz, nd_shape4_fp16) {
@@ -2071,7 +2071,7 @@ TEST_F(UtestFormatTransferNdFractZz, nd_shape4_fp16) {
   }
   EXPECT_EQ(
       transfer2.TransShape(args2.src_format, args2.src_shape, args2.src_data_type, args2.dst_format, args2.dst_shape),
-      UNSUPPORTED);
+                           ACL_ERROR_GE_TRANSSHAPE_FORMAT_INVALID);
 }
 
 TEST_F(UtestFormatTransferNdFractZz, nd_shape5_fp16) {
@@ -7879,7 +7879,7 @@ TEST_F(UtestFormatTransferNdFractZz, invalid_src_shape) {
   FormatTransferFractalZz transfer;
   EXPECT_EQ(transfer.TransFormat(args, result), PARAM_INVALID);
   EXPECT_EQ(transfer.TransShape(args.src_format, args.src_shape, args.src_data_type, args.dst_format, args.dst_shape),
-            PARAM_INVALID);
+            ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID);
 }
 
 TEST_F(UtestFormatTransferNdFractZz, invalid_src_data_type) {
@@ -7899,7 +7899,7 @@ TEST_F(UtestFormatTransferNdFractZz, invalid_src_data_type) {
   FormatTransferFractalZz transfer;
   EXPECT_EQ(transfer.TransFormat(args, result), PARAM_INVALID);
   EXPECT_EQ(transfer.TransShape(args.src_format, args.src_shape, args.src_data_type, args.dst_format, args.dst_shape),
-            PARAM_INVALID);
+            ACL_ERROR_GE_TRANSSHAPE_DATATYPE_INVALID);
 }
 
 TEST_F(UtestFormatTransferNdFractZz, invalid_src_format) {
@@ -7914,7 +7914,7 @@ TEST_F(UtestFormatTransferNdFractZz, invalid_src_format) {
   FormatTransferFractalZz transfer;
   EXPECT_EQ(transfer.TransFormat(args, result), PARAM_INVALID);
   EXPECT_EQ(transfer.TransShape(args.src_format, args.src_shape, args.src_data_type, args.dst_format, args.dst_shape),
-            PARAM_INVALID);
+            ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID);
   EXPECT_EQ(TransFormat(args, result), UNSUPPORTED);
 }
 
diff --git a/tests/ut/ge/common/format_transfer_fracz_hwcn_unittest.cc b/tests/ut/ge/common/format_transfer_fracz_hwcn_unittest.cc
index 6c18aa34..25caa741 100644
--- a/tests/ut/ge/common/format_transfer_fracz_hwcn_unittest.cc
+++ b/tests/ut/ge/common/format_transfer_fracz_hwcn_unittest.cc
@@ -302,7 +302,7 @@ TEST_F(UtestFormatTransferFracZHwcn, fracz_to_hwcn_fp16_success_eq_cube) {
   }
   Status status =
       transfer.TransShape(args.src_format, args.src_shape, args.src_data_type, args.dst_format, args.dst_shape);
-  EXPECT_EQ(status, UNSUPPORTED);
+  EXPECT_EQ(status, ACL_ERROR_GE_TRANSSHAPE_FORMAT_INVALID);
 }
 
 TEST_F(UtestFormatTransferFracZHwcn, fracz_to_hwcn_fp16_success_gt_cube) {
diff --git a/tests/ut/ge/common/format_transfer_fracz_nchw_unittest.cc b/tests/ut/ge/common/format_transfer_fracz_nchw_unittest.cc
index 46d3ae86..93160070 100644
--- a/tests/ut/ge/common/format_transfer_fracz_nchw_unittest.cc
+++ b/tests/ut/ge/common/format_transfer_fracz_nchw_unittest.cc
@@ -302,7 +302,7 @@ TEST_F(UtestFormatTransferFraczNchw, fracz_to_nchw_fp16_success_eq_cube) {
   }
   Status status =
       transfer.TransShape(args.src_format, args.src_shape, args.src_data_type, args.dst_format, args.dst_shape);
-  EXPECT_EQ(status, UNSUPPORTED);
+  EXPECT_EQ(status, ACL_ERROR_GE_TRANSSHAPE_FORMAT_INVALID);
 }
 
 TEST_F(UtestFormatTransferFraczNchw, fracz_to_nchw_fp16_success_gt_cube) {
diff --git a/tests/ut/ge/common/format_transfer_hwcn_c1hwncoc0_unittest.cc b/tests/ut/ge/common/format_transfer_hwcn_c1hwncoc0_unittest.cc
index e468f5ac..1e6b90dd 100644
--- a/tests/ut/ge/common/format_transfer_hwcn_c1hwncoc0_unittest.cc
+++ b/tests/ut/ge/common/format_transfer_hwcn_c1hwncoc0_unittest.cc
@@ -75,7 +75,7 @@ TEST_F(UtestFormatTransferHwcnC1hwncoc0, hwcn_to_6d_invalid_src_format_nchw) {
   EXPECT_EQ(transfer.TransFormat(args, result), PARAM_INVALID);
   Status status =
       transfer.TransShape(args.src_format, args.src_shape, args.src_data_type, args.dst_format, args.dst_shape);
-  EXPECT_EQ(status, UNSUPPORTED);
+  EXPECT_EQ(status, ACL_ERROR_GE_TRANSSHAPE_FORMAT_INVALID);
 }
 
 TEST_F(UtestFormatTransferHwcnC1hwncoc0, hwcn_to_6d_invalid_dst_format_nc1khkwhwc0) {
@@ -142,7 +142,7 @@ TEST_F(UtestFormatTransferHwcnC1hwncoc0, hwcn_to_6d_invalid_src_shape3) {
   EXPECT_EQ(transfer.TransFormat(args, result), PARAM_INVALID);
   Status status =
       transfer.TransShape(args.src_format, args.src_shape, args.src_data_type, args.dst_format, args.dst_shape);
-  EXPECT_EQ(status, PARAM_INVALID);
+  EXPECT_EQ(status, ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID);
 }
 
 TEST_F(UtestFormatTransferHwcnC1hwncoc0, hwcn_to_6d_invalid_dst_format) {
diff --git a/tests/ut/ge/common/format_transfer_nchw_5d_unittest.cc b/tests/ut/ge/common/format_transfer_nchw_5d_unittest.cc
index 67104bf8..610bd7d3 100644
--- a/tests/ut/ge/common/format_transfer_nchw_5d_unittest.cc
+++ b/tests/ut/ge/common/format_transfer_nchw_5d_unittest.cc
@@ -633,5 +633,14 @@ TEST_F(UtestFormatTransferNchw5d, unsupport_dst_format) {
   TransResult result;
   EXPECT_NE(transfer.TransFormat(args, result), SUCCESS);
 }
+
+TEST_F(UtestFormatTransferNchw5d, invalid_data_format) {
+  uint16_t data[1 * 4 * 4 * 1] = {0};
+  TransArgs args{
+      reinterpret_cast<uint8_t *>(data), FORMAT_NHWC, FORMAT_FRACTAL_Z, {1, 4, 4}, {1, 1, 1, 16, 16}, DT_FLOAT16};
+  FormatTransferNchwNc1hwc0 transfer;
+  EXPECT_EQ(transfer.TransShape(args.src_format, args.src_shape, args.src_data_type, args.dst_format, args.dst_shape),
+            ACL_ERROR_GE_TRANSSHAPE_FORMAT_INVALID);
+}
 }  // namespace formats
 }  // namespace ge
diff --git a/tests/ut/ge/common/format_transfer_nhwc_5d_unittest.cc b/tests/ut/ge/common/format_transfer_nhwc_5d_unittest.cc
index 0944afd7..bc5a8754 100644
--- a/tests/ut/ge/common/format_transfer_nhwc_5d_unittest.cc
+++ b/tests/ut/ge/common/format_transfer_nhwc_5d_unittest.cc
@@ -719,7 +719,7 @@ TEST_F(UtestFormatTransferNhwc5d, invalid_src_format) {
   EXPECT_EQ(transfer.TransFormat(args, result), PARAM_INVALID);
   Status status =
       transfer.TransShape(args.src_format, args.src_shape, args.src_data_type, args.dst_format, args.dst_shape);
-  EXPECT_EQ(status, UNSUPPORTED);
+  EXPECT_EQ(status, ACL_ERROR_GE_TRANSSHAPE_FORMAT_INVALID);
 }
 
 TEST_F(UtestFormatTransferNhwc5d, invalid_dst_shape2) {
@@ -751,5 +751,20 @@ TEST_F(UtestFormatTransferNhwc5d, unsupport_dst_format) {
   FormatTransferNhwcNc1hwc0 transfer;
   EXPECT_EQ(transfer.TransFormat(args, result), PARAM_INVALID);
 }
+
+TEST_F(UtestFormatTransferNhwc5d, invalid_data_shape) {
+  uint16_t data[1 * 4 * 4 * 1] = {0};
+  TransArgs args{
+      reinterpret_cast<uint8_t *>(data), FORMAT_NHWC, FORMAT_FRACTAL_Z, {1, 4, 4}, {1, 1, 1, 16, 16}, DT_FLOAT16};
+  FormatTransferNhwcNc1hwc0 transfer;
+  EXPECT_EQ(transfer.TransShape(args.src_format, args.src_shape, args.src_data_type, args.dst_format, args.dst_shape),
+            ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID);
+
+  TransArgs args2{
+      reinterpret_cast<uint8_t *>(data), FORMAT_NHWC, FORMAT_FRACTAL_Z, {1, 4, 4}, {1, 1, 1, 16, 16}, DT_STRING};
+  FormatTransferNhwcNc1hwc0 transfer2;
+  EXPECT_EQ(transfer2.TransShape(args2.src_format, args2.src_shape, args2.src_data_type, args2.dst_format, args2.dst_shape),
+            ACL_ERROR_GE_TRANSSHAPE_DATATYPE_INVALID);
+}
 }  // namespace formats
 }  // namespace ge
diff --git a/tests/ut/ge/common/format_transfer_nhwc_fractalz_unittest.cc b/tests/ut/ge/common/format_transfer_nhwc_fractalz_unittest.cc
index f6017fb7..a6dfffb0 100644
--- a/tests/ut/ge/common/format_transfer_nhwc_fractalz_unittest.cc
+++ b/tests/ut/ge/common/format_transfer_nhwc_fractalz_unittest.cc
@@ -5353,5 +5353,44 @@ TEST_F(UtestFormatTransferNhwcFz, build_transfer_uint8) {
   auto transfer = BuildFormatTransfer(args);
   EXPECT_NE(transfer, nullptr);
 }
+
+TEST_F(UtestFormatTransferNhwcFz, invalid_data_type) {
+  uint16_t data[1 * 4 * 4 * 1] = {0};
+  TransArgs args{
+      reinterpret_cast<uint8_t *>(data), FORMAT_NHWC, FORMAT_FRACTAL_NZ, {1, 4, 4}, {1, 1, 1, 16, 16}, DT_VARIANT};
+  FormatTransferFractalZ transfer;
+  EXPECT_EQ(transfer.TransShape(args.src_format, args.src_shape, args.src_data_type, args.dst_format, args.dst_shape),
+            ACL_ERROR_GE_TRANSSHAPE_DATATYPE_INVALID);
+}
+
+TEST_F(UtestFormatTransferNhwcFz, invalid_data_format) {
+  uint16_t data[1 * 4 * 4 * 1] = {0};
+  TransArgs args{
+      reinterpret_cast<uint8_t *>(data), FORMAT_CHWN, FORMAT_FRACTAL_NZ, {1, 4, 4}, {1, 1, 1, 16, 16}, DT_FLOAT16};
+  FormatTransferFractalZ transfer;
+  EXPECT_EQ(transfer.TransShape(args.src_format, args.src_shape, args.src_data_type, args.dst_format, args.dst_shape),
+            ACL_ERROR_GE_TRANSSHAPE_FORMAT_INVALID);
+}
+
+TEST_F(UtestFormatTransferNhwcFz, invalid_data_shape) {
+  uint16_t data[1 * 4 * 4 * 1] = {0};
+  TransArgs args{
+      reinterpret_cast<uint8_t *>(data), FORMAT_NHWC, FORMAT_FRACTAL_Z, {1, 4, 4}, {1, 1, 1, 16, 16}, DT_FLOAT16};
+  FormatTransferFractalZ transfer;
+  EXPECT_EQ(transfer.TransShape(args.src_format, args.src_shape, args.src_data_type, args.dst_format, args.dst_shape),
+            ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID);
+
+  TransArgs args2{
+      reinterpret_cast<uint8_t *>(data), FORMAT_HWCN, FORMAT_FRACTAL_Z, {1, 4, 4}, {1, 1, 1, 16, 16}, DT_FLOAT16};
+  FormatTransferFractalZ transfer2;
+  EXPECT_EQ(transfer2.TransShape(args2.src_format, args2.src_shape, args2.src_data_type, args2.dst_format, args2.dst_shape),
+            ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID);
+
+  TransArgs args3{
+      reinterpret_cast<uint8_t *>(data), FORMAT_NCHW, FORMAT_FRACTAL_Z, {1, 4, 4}, {1, 1, 1, 16, 16}, DT_FLOAT16};
+  FormatTransferFractalZ transfer3;
+  EXPECT_EQ(transfer3.TransShape(args3.src_format, args3.src_shape, args3.src_data_type, args3.dst_format, args3.dst_shape),
+            ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID);
+}
 }  // namespace formats
 }  // namespace ge
diff --git a/tests/ut/ge/common/format_transfer_transpose_unittest.cc b/tests/ut/ge/common/format_transfer_transpose_unittest.cc
index 258b77fc..d56e06c0 100644
--- a/tests/ut/ge/common/format_transfer_transpose_unittest.cc
+++ b/tests/ut/ge/common/format_transfer_transpose_unittest.cc
@@ -4654,5 +4654,27 @@ TEST_F(UtestFormatTranspose, chwn_to_hwcn2) {
     EXPECT_EQ((reinterpret_cast<uint16_t *>(result.data.get()))[i], ret[i]);
   }
 }
+
+TEST_F(UtestFormatTranspose, invalid_data_shape) {
+  FormatTransferTranspose transfer;
+  std::vector<int64_t> dst_shape;
+  EXPECT_EQ(transfer.TransShape(FORMAT_NCHW, std::vector<int64_t>({}), DT_FLOAT16, FORMAT_HWCN, dst_shape),
+            ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID);
+}
+
+TEST_F(UtestFormatTranspose, invalid_src_format) {
+  FormatTransferTranspose transfer;
+  std::vector<int64_t> dst_shape;
+  EXPECT_EQ(transfer.TransShape(FORMAT_NC1HWC0, std::vector<int64_t>({1, 3, 8, 8}), DT_FLOAT16, FORMAT_HWCN, dst_shape),
+            ACL_ERROR_GE_TRANSSHAPE_FORMAT_INVALID);
+}
+
+TEST_F(UtestFormatTranspose, invalid_dst_format) {
+  FormatTransferTranspose transfer;
+  std::vector<int64_t> dst_shape;
+  std::vector<int64_t> src_shape;
+  EXPECT_EQ(transfer.TransShape(FORMAT_NCHW, src_shape, DT_FLOAT16, FORMAT_C1HWNC0, dst_shape),
+            ACL_ERROR_GE_TRANSSHAPE_FORMAT_INVALID);
+}
 }  // namespace formats
 }  // namespace ge
diff --git a/tests/ut/ge/graph/ge_executor_unittest.cc b/tests/ut/ge/graph/ge_executor_unittest.cc
index 3d04fd0c..3ef8a750 100644
--- a/tests/ut/ge/graph/ge_executor_unittest.cc
+++ b/tests/ut/ge/graph/ge_executor_unittest.cc
@@ -33,11 +33,11 @@
 #include "common/properties_manager.h"
 #include "common/types.h"
 #include "graph/load/graph_loader.h"
-#include "graph/load/new_model_manager/davinci_model.h"
-#include "graph/load/new_model_manager/davinci_model_parser.h"
-#include "graph/load/new_model_manager/model_manager.h"
-#include "graph/load/new_model_manager/task_info/kernel_task_info.h"
-#include "graph/load/new_model_manager/task_info/kernel_ex_task_info.h"
+#include "graph/load/model_manager/davinci_model.h"
+#include "graph/load/model_manager/davinci_model_parser.h"
+#include "graph/load/model_manager/model_manager.h"
+#include "graph/load/model_manager/task_info/kernel_task_info.h"
+#include "graph/load/model_manager/task_info/kernel_ex_task_info.h"
 #include "ge/common/dump/dump_properties.h"
 #include "graph/manager/graph_mem_allocator.h"
 #include "graph/utils/graph_utils.h"
diff --git a/tests/ut/ge/graph/graph_load_unittest.cc b/tests/ut/ge/graph/graph_load_unittest.cc
index af9d5a37..54972af7 100644
--- a/tests/ut/ge/graph/graph_load_unittest.cc
+++ b/tests/ut/ge/graph/graph_load_unittest.cc
@@ -24,7 +24,7 @@
 #include "common/helper/model_helper.h"
 #include "common/op/ge_op_utils.h"
 #include "common/types.h"
-#include "graph/load/new_model_manager/davinci_model_parser.h"
+#include "graph/load/model_manager/davinci_model_parser.h"
 #include "graph/op_desc.h"
 #include "graph/types.h"
 #include "graph/utils/attr_utils.h"
@@ -35,7 +35,7 @@
 #include "graph/load/graph_loader.h"
 
 #include "framework/common/ge_inner_error_codes.h"
-#include "graph/load/new_model_manager/model_manager.h"
+#include "graph/load/model_manager/model_manager.h"
 #include "graph/manager/graph_manager_utils.h"
 #include "model/ge_model.h"
 #undef private
diff --git a/tests/ut/ge/graph/load/data_dumper_unittest.cc b/tests/ut/ge/graph/load/data_dumper_unittest.cc
index e53b76f4..1866f4eb 100644
--- a/tests/ut/ge/graph/load/data_dumper_unittest.cc
+++ b/tests/ut/ge/graph/load/data_dumper_unittest.cc
@@ -18,8 +18,8 @@
 
 #define private public
 #define protected public
-#include "graph/load/new_model_manager/data_dumper.h"
-#include "graph/load/new_model_manager/davinci_model.h"
+#include "graph/load/model_manager/data_dumper.h"
+#include "graph/load/model_manager/davinci_model.h"
 #undef private
 #undef protected
 
diff --git a/tests/ut/ge/graph/load/davinci_model_unittest.cc b/tests/ut/ge/graph/load/davinci_model_unittest.cc
index 0c03c934..47968345 100644
--- a/tests/ut/ge/graph/load/davinci_model_unittest.cc
+++ b/tests/ut/ge/graph/load/davinci_model_unittest.cc
@@ -20,7 +20,7 @@
 #define protected public
 #include "graph/utils/graph_utils.h"
 #include "common/profiling/profiling_manager.h"
-#include "graph/load/new_model_manager/davinci_model.h"
+#include "graph/load/model_manager/davinci_model.h"
 
 using namespace std;
 
@@ -46,7 +46,7 @@ class UtestDavinciModel : public testing::Test {
     }
 };
 
-TEST_F(UtestDavinciModel, init_success) {
+/*TEST_F(UtestDavinciModel, init_success) {
   DavinciModel model(0, nullptr);
   ComputeGraphPtr graph = make_shared<ComputeGraph>("default");
   ProfilingManager::Instance().is_load_profiling_ = true;
@@ -130,7 +130,7 @@ TEST_F(UtestDavinciModel, init_success) {
   EXPECT_EQ(outputs.size(), 1);
 
   ProfilingManager::Instance().is_load_profiling_ = false;
-}
+}*/
 
 TEST_F(UtestDavinciModel, init_data_op) {
   DavinciModel model(0, nullptr);
@@ -334,7 +334,7 @@ TEST_F(UtestDavinciModel, Init_variable_op) {
   EXPECT_EQ(model.InitNodes(graph), SUCCESS);
 
   EXPECT_EQ(model.ReturnNoOutput(1), PARAM_INVALID);
-  EXPECT_NE(model.SyncVarData(), SUCCESS);
+  EXPECT_EQ(model.SyncVarData(), SUCCESS);
 }
 
 TEST_F(UtestDavinciModel, InitRealSizeAndShapeInfo_succ1) {
diff --git a/tests/ut/ge/graph/load/end_graph_task_unittest.cc b/tests/ut/ge/graph/load/end_graph_task_unittest.cc
index 29e7a53a..a66aaaff 100644
--- a/tests/ut/ge/graph/load/end_graph_task_unittest.cc
+++ b/tests/ut/ge/graph/load/end_graph_task_unittest.cc
@@ -18,8 +18,8 @@
 
 #define private public
 #define protected public
-#include "graph/load/new_model_manager/task_info/end_graph_task_info.h"
-#include "graph/load/new_model_manager/davinci_model.h"
+#include "graph/load/model_manager/task_info/end_graph_task_info.h"
+#include "graph/load/model_manager/davinci_model.h"
 #undef private
 #undef protected
 
diff --git a/tests/ut/ge/graph/load/hccl_task_info_unittest.cc b/tests/ut/ge/graph/load/hccl_task_info_unittest.cc
index 5c056007..6a2468ee 100644
--- a/tests/ut/ge/graph/load/hccl_task_info_unittest.cc
+++ b/tests/ut/ge/graph/load/hccl_task_info_unittest.cc
@@ -19,8 +19,8 @@
 #define private public
 #define protected public
 
-#include "graph/load/new_model_manager/davinci_model.h"
-#include "graph/load/new_model_manager/task_info/hccl_task_info.h"
+#include "graph/load/model_manager/davinci_model.h"
+#include "graph/load/model_manager/task_info/hccl_task_info.h"
 
 namespace ge {
 class UtestHcclTaskInfo : public testing::Test {
diff --git a/tests/ut/ge/graph/load/kernel_ex_task_info_unittest.cc b/tests/ut/ge/graph/load/kernel_ex_task_info_unittest.cc
index 443d2975..53436820 100644
--- a/tests/ut/ge/graph/load/kernel_ex_task_info_unittest.cc
+++ b/tests/ut/ge/graph/load/kernel_ex_task_info_unittest.cc
@@ -19,9 +19,9 @@
 #define private public
 #define protected public
 
-#include "graph/load/new_model_manager/davinci_model.h"
+#include "graph/load/model_manager/davinci_model.h"
 
-#include "graph/load/new_model_manager/task_info/kernel_ex_task_info.h"
+#include "graph/load/model_manager/task_info/kernel_ex_task_info.h"
 #include "cce/aicpu_engine_struct.h"
 
 namespace ge {
diff --git a/tests/ut/ge/graph/load/kernel_task_info_unittest.cc b/tests/ut/ge/graph/load/kernel_task_info_unittest.cc
index fe886b49..a3a27a7b 100644
--- a/tests/ut/ge/graph/load/kernel_task_info_unittest.cc
+++ b/tests/ut/ge/graph/load/kernel_task_info_unittest.cc
@@ -19,9 +19,9 @@
 #define private public
 #define protected public
 
-#include "graph/load/new_model_manager/davinci_model.h"
-#include "graph/load/new_model_manager/task_info/kernel_task_info.h"
-#include "graph/load/new_model_manager/task_info/hccl_task_info.h"
+#include "graph/load/model_manager/davinci_model.h"
+#include "graph/load/model_manager/task_info/kernel_task_info.h"
+#include "graph/load/model_manager/task_info/hccl_task_info.h"
 
 namespace ge {
 extern OpDescPtr CreateOpDesc(string name, string type);
diff --git a/tests/ut/ge/graph/load/memcpy_addr_async_task_info_unittest.cc b/tests/ut/ge/graph/load/memcpy_addr_async_task_info_unittest.cc
index 9348d49e..1652841d 100644
--- a/tests/ut/ge/graph/load/memcpy_addr_async_task_info_unittest.cc
+++ b/tests/ut/ge/graph/load/memcpy_addr_async_task_info_unittest.cc
@@ -19,8 +19,8 @@
 #define private public
 #define protected public
 
-#include "graph/load/new_model_manager/davinci_model.h"
-#include "graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.h"
+#include "graph/load/model_manager/davinci_model.h"
+#include "graph/load/model_manager/task_info/memcpy_addr_async_task_info.h"
 
 namespace ge {
 class UtestMemcpyAddrAsyncTaskInfo : public testing::Test {
diff --git a/tests/ut/ge/graph/load/memcpy_async_task_info_unittest.cc b/tests/ut/ge/graph/load/memcpy_async_task_info_unittest.cc
index 8769ec39..afc04130 100644
--- a/tests/ut/ge/graph/load/memcpy_async_task_info_unittest.cc
+++ b/tests/ut/ge/graph/load/memcpy_async_task_info_unittest.cc
@@ -19,8 +19,8 @@
 #define private public
 #define protected public
 
-#include "graph/load/new_model_manager/davinci_model.h"
-#include "graph/load/new_model_manager/task_info/memcpy_async_task_info.h"
+#include "graph/load/model_manager/davinci_model.h"
+#include "graph/load/model_manager/task_info/memcpy_async_task_info.h"
 
 
 namespace ge {
diff --git a/tests/ut/ge/graph/load/model_utils_unittest.cc b/tests/ut/ge/graph/load/model_utils_unittest.cc
new file mode 100644
index 00000000..ac886cea
--- /dev/null
+++ b/tests/ut/ge/graph/load/model_utils_unittest.cc
@@ -0,0 +1,70 @@
+/**
+ * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+#define protected public
+#define private public
+#include "graph/load/model_manager/model_utils.h"
+#include "graph/manager/graph_var_manager.h"
+
+using namespace std;
+
+namespace ge {
+class UtestModelUtils : public testing::Test {
+ protected:
+  void TearDown() {}
+};
+
+// test ModelUtils::GetVarAddr
+TEST_F(UtestModelUtils, get_var_addr_hbm) {
+  uint8_t test = 2;
+  uint8_t *pf = &test;
+  RuntimeParam runtime_param;
+  runtime_param.session_id = 0;
+  runtime_param.logic_var_base = 0;
+  runtime_param.var_base = pf;
+  runtime_param.var_size = 16;
+
+  int64_t offset = 8;
+  EXPECT_EQ(VarManager::Instance(runtime_param.session_id)->Init(0, 0, 0, 0), SUCCESS);
+  EXPECT_NE(VarManager::Instance(runtime_param.session_id)->var_resource_, nullptr);
+  VarManager::Instance(runtime_param.session_id)->var_resource_->var_offset_map_[offset] = RT_MEMORY_HBM;
+  std::shared_ptr<OpDesc> op_desc = std::make_shared<OpDesc>("test", "test");
+  uint8_t *var_addr = nullptr;
+  EXPECT_EQ(ModelUtils::GetVarAddr(runtime_param, op_desc, offset, var_addr), SUCCESS);
+  EXPECT_EQ(runtime_param.var_base + offset - runtime_param.logic_var_base, var_addr);
+  VarManager::Instance(runtime_param.session_id)->Destory();
+}
+
+TEST_F(UtestModelUtils, get_var_addr_rdma_hbm) {
+  uint8_t test = 2;
+  uint8_t *pf = &test;
+  RuntimeParam runtime_param;
+  runtime_param.session_id = 0;
+  runtime_param.logic_var_base = 0;
+  runtime_param.var_base = pf;
+
+  int64_t offset = 8;
+  EXPECT_EQ(VarManager::Instance(runtime_param.session_id)->Init(0, 0, 0, 0), SUCCESS);
+  EXPECT_NE(VarManager::Instance(runtime_param.session_id)->var_resource_, nullptr);
+  VarManager::Instance(runtime_param.session_id)->var_resource_->var_offset_map_[offset] = RT_MEMORY_RDMA_HBM;
+  std::shared_ptr<OpDesc> op_desc = std::make_shared<OpDesc>("test", "test");
+  uint8_t *var_addr = nullptr;
+  EXPECT_EQ(ModelUtils::GetVarAddr(runtime_param, op_desc, offset, var_addr), SUCCESS);
+  EXPECT_EQ(reinterpret_cast<uint8_t *>(offset), var_addr);
+  VarManager::Instance(runtime_param.session_id)->Destory();
+}
+}  // namespace ge
diff --git a/tests/ut/ge/graph/load/new_model_manager_data_inputer_unittest.cc b/tests/ut/ge/graph/load/new_model_manager_data_inputer_unittest.cc
index 56e673f7..43c2ad15 100644
--- a/tests/ut/ge/graph/load/new_model_manager_data_inputer_unittest.cc
+++ b/tests/ut/ge/graph/load/new_model_manager_data_inputer_unittest.cc
@@ -17,7 +17,7 @@
 
 #include <gtest/gtest.h>
 
-#include "graph/load/new_model_manager/data_inputer.h"
+#include "graph/load/model_manager/data_inputer.h"
 
 #include "common/debug/log.h"
 #include "common/debug/memory_dumper.h"
diff --git a/tests/ut/ge/graph/load/new_model_manager_davinci_model_unittest.cc b/tests/ut/ge/graph/load/new_model_manager_davinci_model_unittest.cc
index 00069930..38a250ad 100644
--- a/tests/ut/ge/graph/load/new_model_manager_davinci_model_unittest.cc
+++ b/tests/ut/ge/graph/load/new_model_manager_davinci_model_unittest.cc
@@ -24,29 +24,29 @@
 #include "graph/compute_graph.h"
 #include "graph/utils/graph_utils.h"
 #include "graph/model_serialize.h"
-#include "graph/load/new_model_manager/davinci_model.h"
+#include "graph/load/model_manager/davinci_model.h"
 #include "common/properties_manager.h"
 #include "common/op/ge_op_utils.h"
 #include <cce/taskdown_api.h>
 #include "runtime/dev.h"
 #include "runtime/kernel.h"
 #include "cce/fwk_adpt_struct.h"
-#include "graph/load/new_model_manager/task_info/task_info_factory.h"
-#include "graph/load/new_model_manager/task_info/task_info.h"
-#include "graph/load/new_model_manager/task_info/stream_active_task_info.h"
-#include "graph/load/new_model_manager/task_info/stream_switch_task_info.h"
-#include "graph/load/new_model_manager/task_info/profiler_trace_task_info.h"
-#include "graph/load/new_model_manager/task_info/memcpy_async_task_info.h"
-#include "graph/load/new_model_manager/task_info/label_set_task_info.h"
-#include "graph/load/new_model_manager/task_info/kernel_ex_task_info.h"
-#include "graph/load/new_model_manager/task_info/kernel_task_info.h"
-#include "graph/load/new_model_manager/task_info/hccl_task_info.h"
-#include "graph/load/new_model_manager/task_info/fusion_start_task_info.h"
-#include "graph/load/new_model_manager/task_info/fusion_stop_task_info.h"
-#include "graph/load/new_model_manager/task_info/event_record_task_info.h"
-#include "graph/load/new_model_manager/task_info/event_wait_task_info.h"
+#include "graph/load/model_manager/task_info/task_info_factory.h"
+#include "graph/load/model_manager/task_info/task_info.h"
+#include "graph/load/model_manager/task_info/stream_active_task_info.h"
+#include "graph/load/model_manager/task_info/stream_switch_task_info.h"
+#include "graph/load/model_manager/task_info/profiler_trace_task_info.h"
+#include "graph/load/model_manager/task_info/memcpy_async_task_info.h"
+#include "graph/load/model_manager/task_info/label_set_task_info.h"
+#include "graph/load/model_manager/task_info/kernel_ex_task_info.h"
+#include "graph/load/model_manager/task_info/kernel_task_info.h"
+#include "graph/load/model_manager/task_info/hccl_task_info.h"
+#include "graph/load/model_manager/task_info/fusion_start_task_info.h"
+#include "graph/load/model_manager/task_info/fusion_stop_task_info.h"
+#include "graph/load/model_manager/task_info/event_record_task_info.h"
+#include "graph/load/model_manager/task_info/event_wait_task_info.h"
 #include "graph/manager/graph_var_manager.h"
-#include "graph/load/new_model_manager/model_manager.h"
+#include "graph/load/model_manager/model_manager.h"
 #undef private
 #undef protected
 
diff --git a/tests/ut/ge/graph/load/new_model_manager_model_manager_aicpu_unittest.cc b/tests/ut/ge/graph/load/new_model_manager_model_manager_aicpu_unittest.cc
index 43e094b5..a68fb307 100644
--- a/tests/ut/ge/graph/load/new_model_manager_model_manager_aicpu_unittest.cc
+++ b/tests/ut/ge/graph/load/new_model_manager_model_manager_aicpu_unittest.cc
@@ -30,9 +30,9 @@
 #include "common/helper/om_file_helper.h"
 #include "common/op/ge_op_utils.h"
 #include "graph/load/graph_loader.h"
-#include "graph/load/new_model_manager/davinci_model.h"
-#include "graph/load/new_model_manager/davinci_model_parser.h"
-#include "graph/load/new_model_manager/model_manager.h"
+#include "graph/load/model_manager/davinci_model.h"
+#include "graph/load/model_manager/davinci_model_parser.h"
+#include "graph/load/model_manager/model_manager.h"
 //#include "new_op_test_utils.h"
 #undef private
 #undef protected
diff --git a/tests/ut/ge/graph/load/new_model_manager_model_manager_unittest.cc b/tests/ut/ge/graph/load/new_model_manager_model_manager_unittest.cc
index 1c6e5a10..688e73d4 100644
--- a/tests/ut/ge/graph/load/new_model_manager_model_manager_unittest.cc
+++ b/tests/ut/ge/graph/load/new_model_manager_model_manager_unittest.cc
@@ -15,24 +15,18 @@
  */
 
 #include <gtest/gtest.h>
-
-#include <cce/compiler_stub.h>
+#include <map>
 #include "common/debug/log.h"
-#include "common/model_parser/base.h"
-#include "common/properties_manager.h"
 #include "common/types.h"
-#include "common/l2_cache_optimize.h"
-
+#include "graph/utils/graph_utils.h"
 #define private public
 #define protected public
-#include "graph/load/new_model_manager/model_manager.h"
-
+#include "graph/load/model_manager/model_manager.h"
 #include "common/helper/om_file_helper.h"
 #include "common/op/ge_op_utils.h"
 #include "graph/load/graph_loader.h"
-#include "graph/load/new_model_manager/davinci_model.h"
-#include "graph/load/new_model_manager/davinci_model_parser.h"
-#include "new_op_test_utils.h"
+#include "graph/load/model_manager/davinci_model.h"
+#include "graph/load/model_manager/davinci_model_parser.h"
 #undef private
 #undef protected
 
@@ -87,7 +81,6 @@ class UtestModelManagerModelManager : public testing::Test {
     data.model_data = new uint8_t[data.model_len];
     uint8_t data_ori[model_len];
     memset(data_ori, 10, model_len);
-    uint32_t out_len;
     ModelFileHeader *header = (ModelFileHeader *)data.model_data;
     header->magic = MODEL_FILE_MAGIC_NUM;
     header->version = MODEL_VERSION;
@@ -97,7 +90,7 @@ class UtestModelManagerModelManager : public testing::Test {
 
   void LoadStandardModelData(ge::ModelData &data) {
     static const std::string STANDARD_MODEL_DATA_PATH =
-        "llt/framework/domi/ut/ome/test/data/standard_partition_model.txt";
+      "llt/framework/domi/ut/ome/test/data/standard_partition_model.txt";
     ge::proto::ModelDef model_def;
     ReadProtoFromText(STANDARD_MODEL_DATA_PATH.c_str(), &model_def);
 
@@ -113,9 +106,8 @@ class DModelListener : public ge::ModelListener {
   uint32_t OnComputeDone(uint32_t model_id, uint32_t data_index, uint32_t resultCode) { return 0; }
 };
 
-shared_ptr<ModelListener> UTEST_CALL_BACK_FUN(new DModelListener());
 
-TEST_F(UtestModelManagerModelManager, case_load_incorrect_param) {
+/*TEST_F(UtestModelManagerModelManager, case_load_incorrect_param) {
   ModelManager mm;
   uint32_t model_id = 0;
   ge::ModelData model;
@@ -307,7 +299,7 @@ TEST_F(UtestModelManagerModelManager, get_input_output_desc_info_fail) {
 }
 
 
-/*
+*//*
 // test GetInputOutputDescInfo fail
 TEST_F(UtestModelManagerModelManager, get_input_output_desc_info_zero_copy_fail) {
   ModelManager manager;
@@ -316,7 +308,7 @@ TEST_F(UtestModelManagerModelManager, get_input_output_desc_info_zero_copy_fail)
   vector<InputOutputDescInfo> output_shape;
   EXPECT_EQ(ge::PARAM_INVALID, manager.GetInputOutputDescInfoForZeroCopy(2, input_shape, output_shape));
 }
-*/
+*//*
 
 // test Stop
 TEST_F(UtestModelManagerModelManager, stop_fail) {
@@ -347,6 +339,20 @@ TEST_F(UtestModelManagerModelManager, destroy_aicpu_session) {
 
   manager.sess_ids_.insert(0);
   manager.DestroyAicpuSession(0);
+}*/
+// test DataInputTensor
+TEST_F(UtestModelManagerModelManager, test_data_input_tensor) {
+  shared_ptr<ModelListener> g_label_call_back(nullptr);
+  auto model = std::make_shared<DavinciModel>(0, g_label_call_back);
+  ModelManager mm;
+  uint32_t model_id = 1;
+  mm.model_map_[1] = model;
+  mm.hybrid_model_map_[1] = std::make_shared<hybrid::HybridDavinciModel>();
+
+  auto input_tensor = InputTensorInfo();
+  vector<InputTensorInfo> inputs;
+  inputs.emplace_back(input_tensor);
+  auto ret = mm.DataInputTensor(model_id,inputs);
+  EXPECT_EQ(ge::UNSUPPORTED, ret);
 }
-
 }  // namespace ge
diff --git a/tests/ut/ge/graph/load/new_model_manager_task_build_unittest.cc b/tests/ut/ge/graph/load/new_model_manager_task_build_unittest.cc
index 620fac09..f10ccd7f 100644
--- a/tests/ut/ge/graph/load/new_model_manager_task_build_unittest.cc
+++ b/tests/ut/ge/graph/load/new_model_manager_task_build_unittest.cc
@@ -30,7 +30,7 @@
 #include "graph/compute_graph.h"
 #include "graph/utils/graph_utils.h"
 #include "graph/model_serialize.h"
-#include "graph/load/new_model_manager/davinci_model.h"
+#include "graph/load/model_manager/davinci_model.h"
 #include "common/properties_manager.h"
 #include "common/op/ge_op_utils.h"
 #include <cce/taskdown_api.h>
diff --git a/tests/ut/ge/graph/load/new_op_test_utils.h b/tests/ut/ge/graph/load/new_op_test_utils.h
index 4cbc78ac..984cbfb4 100644
--- a/tests/ut/ge/graph/load/new_op_test_utils.h
+++ b/tests/ut/ge/graph/load/new_op_test_utils.h
@@ -40,7 +40,7 @@
 #define private public
 #include "graph/compute_graph.h"
 #include "graph/debug/ge_attr_define.h"
-#include "graph/load/new_model_manager/davinci_model.h"
+#include "graph/load/model_manager/davinci_model.h"
 #include "graph/node.h"
 #include "graph/op_desc.h"
 #include "graph/utils/attr_utils.h"
diff --git a/tests/ut/ge/graph/load/output_net_output_unittest.cc b/tests/ut/ge/graph/load/output_net_output_unittest.cc
index ecd28fe3..97246dad 100644
--- a/tests/ut/ge/graph/load/output_net_output_unittest.cc
+++ b/tests/ut/ge/graph/load/output_net_output_unittest.cc
@@ -23,8 +23,8 @@
 #define private public
 #include "common/debug/memory_dumper.h"
 #include "common/op/ge_op_utils.h"
-#include "graph/load/new_model_manager/davinci_model.h"
-#include "graph/load/new_model_manager/model_utils.h"
+#include "graph/load/model_manager/davinci_model.h"
+#include "graph/load/model_manager/model_utils.h"
 #include "graph/manager/graph_var_manager.h"
 #include "new_op_test_utils.h"
 #include "proto/om.pb.h"
diff --git a/tests/ut/ge/graph/load/tbe_handle_store_unittest.cc b/tests/ut/ge/graph/load/tbe_handle_store_unittest.cc
index a98e14c6..82ffb388 100644
--- a/tests/ut/ge/graph/load/tbe_handle_store_unittest.cc
+++ b/tests/ut/ge/graph/load/tbe_handle_store_unittest.cc
@@ -18,7 +18,7 @@
 
 #define protected public
 #define private public
-#include "graph/load/new_model_manager/tbe_handle_store.h"
+#include "graph/load/model_manager/tbe_handle_store.h"
 #include "runtime/kernel.h"
 #undef protected
 #undef private
diff --git a/tests/ut/ge/graph/passes/variable_op_pass_unittest.cc b/tests/ut/ge/graph/passes/variable_op_pass_unittest.cc
index b51908e2..d6af6de9 100644
--- a/tests/ut/ge/graph/passes/variable_op_pass_unittest.cc
+++ b/tests/ut/ge/graph/passes/variable_op_pass_unittest.cc
@@ -849,7 +849,7 @@ class VariableOpPassSimulator {
     if (variable_ref_node_format != FORMAT_NC1HWC0 || variable_ref_node_data_type != DT_FLOAT ||
         variable_ref_node_shape.size() != 5) {
       GELOGI("wanted data format is  (%d,%d,%u)", FORMAT_NC1HWC0, DT_FLOAT, 5);
-      GELOGI("variable_ref_node_format is (%d,%d,%u)", variable_ref_node_format, variable_ref_node_data_type,
+      GELOGI("variable_ref_node_format is (%d,%d,%zu)", variable_ref_node_format, variable_ref_node_data_type,
              variable_ref_node_shape.size());
 
       std::cout << "var ref format not changed !" << std::endl;
@@ -918,7 +918,7 @@ class VariableOpPassSimulator {
     if (variable_ref_node_format != FORMAT_NCHW || variable_ref_node_data_type != DT_INT32 ||
         variable_ref_node_shape.size() != 4) {
       GELOGI("wanted data format is  (%d,%d,%u)", FORMAT_NCHW, DT_INT32, 4);
-      GELOGI("variable_ref_node_format is (%d,%d,%u)", variable_ref_node_format, variable_ref_node_data_type,
+      GELOGI("variable_ref_node_format is (%d,%d,%zu)", variable_ref_node_format, variable_ref_node_data_type,
              variable_ref_node_shape.size());
 
       std::cout << "var ref format not changed !" << std::endl;
diff --git a/tests/ut/ge/graph/preprocess/graph_preprocess_unittest.cc b/tests/ut/ge/graph/preprocess/graph_preprocess_unittest.cc
new file mode 100644
index 00000000..2f149761
--- /dev/null
+++ b/tests/ut/ge/graph/preprocess/graph_preprocess_unittest.cc
@@ -0,0 +1,77 @@
+/**
+ * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+#include <memory>
+
+#include "common/ge_inner_error_codes.h"
+#include "common/types.h"
+#include "common/util.h"
+#include "graph/passes/graph_builder_utils.h"
+#include "graph/utils/attr_utils.h"
+#include "graph/debug/ge_attr_define.h"
+
+#define private public
+#define protected public
+#include "graph/preprocess/graph_preprocess.h"
+#include "ge/ge_api.h"
+#undef private
+#undef protected
+
+using namespace std;
+namespace ge {
+class UtestGraphPreproces : public testing::Test {
+ protected:
+  void SetUp() {
+  }
+  void TearDown() {
+  }
+};
+
+ComputeGraphPtr BuildGraph1(){
+  auto builder = ut::GraphBuilder("g1");
+  auto data1 = builder.AddNode("data1",DATA,1,1);
+  auto data_opdesc = data1->GetOpDesc();
+  AttrUtils::SetInt(data_opdesc, ATTR_NAME_INDEX, 0);
+  data1->UpdateOpDesc(data_opdesc);
+  return builder.GetGraph();
+}
+
+TEST_F(UtestGraphPreproces, test_dynamic_input_shape_parse) {
+  ge::GraphPrepare graph_prepare;
+  graph_prepare.compute_graph_ = BuildGraph1();
+  // prepare user_input & graph option
+  ge::GeTensorDesc tensor1;
+  tensor1.SetFormat(ge::FORMAT_NCHW);
+  tensor1.SetShape(ge::GeShape({3, 12, 5, 5}));
+  tensor1.SetDataType(ge::DT_FLOAT);
+  GeTensor input1(tensor1);
+  std::vector<GeTensor> user_input = {input1};
+  std::map<string,string> graph_option = {{"ge.exec.dynamicGraphExecuteMode","dynamic_execute"},
+                                          {"ge.exec.dataInputsShapeRange","[3,1~20,2~10,5]"}};
+  auto ret = graph_prepare.UpdateInput(user_input, graph_option);
+  EXPECT_EQ(ret, ge::SUCCESS);
+  // check data node output shape_range and shape
+  auto data_node = graph_prepare.compute_graph_->FindNode("data1");
+  auto data_output_desc = data_node->GetOpDesc()->GetOutputDescPtr(0);
+  vector<int64_t> expect_shape = {3,-1,-1,5};
+  auto result_shape = data_output_desc->GetShape();
+  EXPECT_EQ(result_shape.GetDimNum(), expect_shape.size());
+  for(size_t i =0; i< expect_shape.size(); ++i){
+      EXPECT_EQ(result_shape.GetDim(i), expect_shape.at(i));
+  }
+}
+}
\ No newline at end of file
diff --git a/tests/ut/ge/single_op/single_op_model_unittest.cc b/tests/ut/ge/single_op/single_op_model_unittest.cc
index b6b97d89..ab909e11 100644
--- a/tests/ut/ge/single_op/single_op_model_unittest.cc
+++ b/tests/ut/ge/single_op/single_op_model_unittest.cc
@@ -18,7 +18,7 @@
 #include <vector>
 
 //#include "cce/taskdown_common.hpp"
-#include "graph/load/new_model_manager/model_utils.h"
+#include "graph/load/model_manager/model_utils.h"
 #include "graph/utils/graph_utils.h"
 #include "runtime/rt.h"
 
diff --git a/third_party/fwkacllib/inc/runtime/base.h b/third_party/fwkacllib/inc/runtime/base.h
index 7fbe9eb4..5b246eed 100644
--- a/third_party/fwkacllib/inc/runtime/base.h
+++ b/third_party/fwkacllib/inc/runtime/base.h
@@ -112,6 +112,12 @@ typedef void *rtEvent_t;
  */
 typedef void *rtLabel_t;
 
+/**
+ * @ingroup dvrt_base
+ * @brief model handle.
+ */
+typedef void *rtModel_t;
+
 /**
  * @ingroup profiling_base
  * @brief runtime handle.
@@ -217,6 +223,16 @@ typedef void *rtNotify_t;
  */
 RTS_API rtError_t rtLabelCreate(rtLabel_t *label);
 
+/**
+ * @ingroup dvrt_base
+ * @brief create label instance
+ * @param [out] label  created label
+ * @param [in] model  label set model
+ * @return RT_ERROR_NONE for ok
+ * @return RT_ERROR_INVALID_VALUE for error input
+ */
+RTS_API rtError_t rtLabelCreateV2(rtLabel_t *label, rtModel_t model);
+
 /**
  * @ingroup dvrt_base
  * @brief set label and stream instance
@@ -314,6 +330,17 @@ RTS_API rtError_t rtLabelListCpy(rtLabel_t *label, uint32_t labelNumber, void *d
  */
 RTS_API rtError_t rtLabelCreateEx(rtLabel_t *label, rtStream_t stream);
 
+/**
+ * @ingroup dvrt_base
+ * @brief labels to dev info
+ * @param [out] label  created label handle
+ * @param [in] model  label bind model
+ * @param [in] stream  label bind stream
+ * @return RT_ERROR_NONE for ok
+ * @return RT_ERROR_INVALID_VALUE for error input
+ */
+rtError_t rtLabelCreateExV2(rtLabel_t *label, rtModel_t model, rtStream_t stream);
+
 /**
  * @ingroup dvrt_base
  * @brief get current thread last stream id and task id 
diff --git a/third_party/fwkacllib/inc/runtime/kernel.h b/third_party/fwkacllib/inc/runtime/kernel.h
index d3eadd59..dc16ca58 100644
--- a/third_party/fwkacllib/inc/runtime/kernel.h
+++ b/third_party/fwkacllib/inc/runtime/kernel.h
@@ -376,7 +376,6 @@ RTS_API rtError_t rtCpuKernelLaunchWithFlag(const void *soName, const void *kern
                                             const void *args, uint32_t argsSize, rtSmDesc_t *smDesc, rtStream_t stream,
                                             uint32_t flags);
 
-typedef void *rtModel_t;
 /**
  * @ingroup rt_kernel
  * @brief L1 fusion dump addr transfered to device
diff --git a/third_party/fwkacllib/inc/runtime/rt_model.h b/third_party/fwkacllib/inc/runtime/rt_model.h
index b72b142d..482486a8 100644
--- a/third_party/fwkacllib/inc/runtime/rt_model.h
+++ b/third_party/fwkacllib/inc/runtime/rt_model.h
@@ -278,7 +278,6 @@ typedef struct tagLabelDevInfo_t {
     uint16_t labelId;
 }rtLabelDevInfo;
 
-typedef void *rtModel_t;
 typedef rtError_t (*rtTaskGenCallback)(rtModel_t model, rtTaskInfo_t *taskInfo);
 
 /**
diff --git a/third_party/fwkacllib/inc/soft_dp/ExternalSoftDp.h b/third_party/fwkacllib/inc/soft_dp/ExternalSoftDp.h
index b642cbc8..bef5c05d 100644
--- a/third_party/fwkacllib/inc/soft_dp/ExternalSoftDp.h
+++ b/third_party/fwkacllib/inc/soft_dp/ExternalSoftDp.h
@@ -15,23 +15,23 @@
 
 extern "C" {
 struct SoftDpProcsessInfo {
-  uint8_t* inputBuffer;
-  uint32_t inputBufferSize;
+    uint8_t* inputBuffer;
+    uint32_t inputBufferSize;
 
-  uint8_t* outputBuffer;
-  uint32_t outputBufferSize;
+    uint8_t* outputBuffer;
+    uint32_t outputBufferSize;
 
-  uint32_t outputWidth;
-  uint32_t outputHeight;
+    uint32_t outputWidth;
+    uint32_t outputHeight;
 
-  uint32_t reserved;
+    uint32_t reserved;
 };
 
 struct DpCropInfo {
-  uint32_t left;
-  uint32_t right;
-  uint32_t up;
-  uint32_t down;
+    uint32_t left;
+    uint32_t right;
+    uint32_t up;
+    uint32_t down;
 };
 
 /*
@@ -49,4 +49,4 @@ uint32_t DecodeAndResizeJpeg(SoftDpProcsessInfo& softDpProcsessInfo);
  */
 uint32_t DecodeAndCropAndResizeJpeg(SoftDpProcsessInfo& softDpProcsessInfo, const DpCropInfo& cropInfo);
 }
-#endif // EXTERNALSOFTDP_H
+#endif // EXTERNALSOFTDP_H
\ No newline at end of file
diff --git a/third_party/fwkacllib/inc/toolchain/slog.h b/third_party/fwkacllib/inc/toolchain/slog.h
index 7c4f7be2..683dabf1 100644
--- a/third_party/fwkacllib/inc/toolchain/slog.h
+++ b/third_party/fwkacllib/inc/toolchain/slog.h
@@ -381,13 +381,13 @@ DLL_EXPORT void DlogFlush(void);
  * @ingroup slog
  * @brief Internal log interface, other modules are not allowed to call this interface
  */
-void DlogErrorInner(int moduleId, const char *fmt, ...);
-void DlogWarnInner(int moduleId, const char *fmt, ...);
-void DlogInfoInner(int moduleId, const char *fmt, ...);
-void DlogDebugInner(int moduleId, const char *fmt, ...);
-void DlogEventInner(int moduleId, const char *fmt, ...);
-void DlogInner(int moduleId, int level, const char *fmt, ...);
-void DlogWithKVInner(int moduleId, int level, KeyValue *pstKVArray, int kvNum, const char *fmt, ...);
+void DlogErrorInner(int moduleId, const char *fmt, ...) __attribute__((format(printf, 2, 3)));
+void DlogWarnInner(int moduleId, const char *fmt, ...) __attribute__((format(printf, 2, 3)));
+void DlogInfoInner(int moduleId, const char *fmt, ...) __attribute__((format(printf, 2, 3)));
+void DlogDebugInner(int moduleId, const char *fmt, ...) __attribute__((format(printf, 2, 3)));
+void DlogEventInner(int moduleId, const char *fmt, ...) __attribute__((format(printf, 2, 3)));
+void DlogInner(int moduleId, int level, const char *fmt, ...) __attribute__((format(printf, 3, 4)));
+void DlogWithKVInner(int moduleId, int level, KeyValue *pstKVArray, int kvNum, const char *fmt, ...) __attribute__((format(printf, 5, 6)));
 
 #ifdef __cplusplus
 #ifndef LOG_CPP
@@ -500,8 +500,8 @@ DLL_EXPORT void DlogFlushForC(void);
  * @ingroup slog
  * @brief Internal log interface, other modules are not allowed to call this interface
  */
-void DlogInnerForC(int moduleId, int level, const char *fmt, ...);
-void DlogWithKVInnerForC(int moduleId, int level, KeyValue *pstKVArray, int kvNum, const char *fmt, ...);
+void DlogInnerForC(int moduleId, int level, const char *fmt, ...) __attribute__((format(printf, 3, 4)));
+void DlogWithKVInnerForC(int moduleId, int level, KeyValue *pstKVArray, int kvNum, const char *fmt, ...) __attribute__((format(printf, 5, 6)));
 
 #ifdef __cplusplus
 }