Merge branch 'master' of gitee.com:mindspore/graphengine into master

5 years ago · 1ac3a89866
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -76,9 +76,7 @@ if (ENABLE_OPEN_SRC)
        find_module(runtime libruntime.so ${GE_LIB_PATH})
        find_module(runtime_compile libruntime_compile.so ${GE_LIB_PATH})
        find_module(resource libresource.so ${GE_LIB_PATH})
        find_module(error_manager liberror_manager.so ${GE_LIB_PATH})
        find_module(ascend_hal_stub libascend_hal.so ${GE_LIB_PATH})
        find_module(error_manager_static liberror_manager.a ${GE_LIB_PATH})
        find_module(msprofiler_fwk_ext libmsprofiler_fwk.a ${GE_LIB_PATH})
        #find_module(ascendcl_static libascendcl.a ${GE_LIB_PATH})
    elseif(ENABLE_GE_COV OR ENABLE_GE_UT)
@@ -86,11 +84,9 @@ if (ENABLE_OPEN_SRC)
    else()
        find_module(slog libalog.so ${ASCEND_ATC_DIR})
        find_module(static_mmpa libmmpa.a ${ASCEND_ATC_DIR})
 	find_module(error_manager liberror_manager.so ${ASCEND_ATC_DIR})
        if(PLATFORM STREQUAL "train")
            find_module(adump_server libadump_server.a ${ASCEND_RUNTIME_DIR})
            find_module(runtime libruntime.so ${ASCEND_RUNTIME_DIR})
            find_module(error_manager liberror_manager.so ${ASCEND_RUNTIME_DIR})
            find_module(msprofiler_fwk_ext libmsprofiler_fwk.a ${ASCEND_RUNTIME_DIR})
            find_module(ascend_hal_stub libascend_hal.so ${ASCEND_DRIVER_DIR}/driver)
            if(PRODUCT STREQUAL "flr3")
@@ -100,8 +96,6 @@ if (ENABLE_OPEN_SRC)
            find_module(adump_server libadump_server.a ${ASCEND_ACL_DIR})
            find_module(runtime libruntime.so ${ASCEND_ACL_DIR})
 	    find_module(runtime_compile libruntime_compile.so ${ASCEND_ATC_DIR})
            find_module(error_manager liberror_manager.so ${ASCEND_ATC_DIR})
            find_module(error_manager_static liberror_manager.a ${ASCEND_ACL_DIR})
            find_module(msprofiler_ext libmsprofiler.a ${ASCEND_ACL_DIR})
            if(PRODUCT STREQUAL "flr3")
            elseif(PRODUCT STREQUAL "flr1")
@@ -114,11 +108,9 @@ if (ENABLE_OPEN_SRC)
        elseif(PLATFORM STREQUAL "all")
 	    find_module(adump_server libadump_server.a ${ASCEND_RUNTIME_DIR})
 	    find_module(runtime libruntime.so ${ASCEND_RUNTIME_DIR})
 	    find_module(error_manager liberror_manager.so ${ASCEND_RUNTIME_DIR})
            find_module(msprofiler_fwk_ext libmsprofiler_fwk.a ${ASCEND_RUNTIME_DIR})
 	    find_module(ascend_hal_stub libascend_hal.so ${ASCEND_DRIVER_DIR})
 	    find_module(runtime_compile libruntime_compile.so ${ASCEND_ATC_DIR})
 	    find_module(error_manager_static liberror_manager.a ${ASCEND_ACL_DIR})
 	    find_module(msprofiler_ext libmsprofiler.a ${ASCEND_ACL_DIR})
        else()
            message(STATUS "PLATFORM param is invalid, should be train or inference, you choose nothing!")
@@ -144,7 +136,6 @@ elseif (ENABLE_D OR ENABLE_ACL)

    # common libraries
    find_module(slog libalog.so ${ASCEND_MS_RUNTIME_PATH} ${ATLAS_MS_RUNTIME_PATH})
    find_module(error_manager liberror_manager.so ${ASCEND_MS_RUNTIME_PATH} ${ATLAS_MS_RUNTIME_PATH})
    find_module(static_mmpa libmmpa.a ${ASCEND_MS_RUNTIME_PATH} ${ATLAS_MS_RUNTIME_PATH})

    if (ENABLE_D)
@@ -164,7 +155,6 @@ elseif(ENABLE_MS_TESTCASES)

    # common libraries
    find_module(slog libalog.so ${ASCEND_MS_RUNTIME_PATH} ${ATLAS_MS_RUNTIME_PATH})
    find_module(error_manager liberror_manager.so ${ASCEND_MS_RUNTIME_PATH} ${ATLAS_MS_RUNTIME_PATH})
    find_module(static_mmpa libmmpa.a ${ASCEND_MS_RUNTIME_PATH} ${ATLAS_MS_RUNTIME_PATH})

    set(METADEF_DIR ${CMAKE_CURRENT_LIST_DIR}/metadef)
--- a/build.sh
+++ b/build.sh
@@ -76,8 +76,8 @@ checkopts()
        ENABLE_GE_ST="on"
        ;;
      t)
 	      ENABLE_GE_UT="on"
 	      ;;
        ENABLE_GE_UT="on"
        ;;
      c)
        ENABLE_GE_COV="on"
        ;;
@@ -185,7 +185,7 @@ build_graphengine()
    # build all the target
    TARGET="ge_runner ge_compiler fwk_atc.bin atc_atc.bin opensrc_ascendcl ${TARGET}"
  fi
  

  make ${VERBOSE} ${TARGET} -j${THREAD_NUM} && make install
  if [ $? -ne 0 ]
  then
@@ -214,13 +214,14 @@ if [[ "X$ENABLE_GE_UT" = "Xon" || "X$ENABLE_GE_COV" = "Xon" ]]; then
    cp ${BUILD_PATH}/tests/ut/ge/ut_libge_others_utest ${OUTPUT_PATH}
    cp ${BUILD_PATH}/tests/ut/ge/ut_libge_kernel_utest ${OUTPUT_PATH}

    ${OUTPUT_PATH}/ut_libgraph &&
    ${OUTPUT_PATH}/ut_libge_multiparts_utest &&
    ${OUTPUT_PATH}/ut_libge_distinct_load_utest &&
    ${OUTPUT_PATH}/ut_libge_others_utest &&
    ${OUTPUT_PATH}/ut_libge_kernel_utest
    RUN_TEST_CASE=${OUTPUT_PATH}/ut_libgraph && ${RUN_TEST_CASE} &&
    RUN_TEST_CASE=${OUTPUT_PATH}/ut_libge_multiparts_utest && ${RUN_TEST_CASE} &&
    RUN_TEST_CASE=${OUTPUT_PATH}/ut_libge_distinct_load_utest && ${RUN_TEST_CASE} &&
    RUN_TEST_CASE=${OUTPUT_PATH}/ut_libge_others_utest && ${RUN_TEST_CASE} &&
    RUN_TEST_CASE=${OUTPUT_PATH}/ut_libge_kernel_utest && ${RUN_TEST_CASE}
    if [[ "$?" -ne 0 ]]; then
        echo "!!! UT FAILED, PLEASE CHECK YOUR CHANGES !!!"
        echo -e "\033[31m${RUN_TEST_CASE}\033[0m"
        exit 1;
    fi
    echo "Generating coverage statistics, please wait..."
@@ -249,8 +250,8 @@ generate_package()
  NNENGINE_PATH="plugin/nnengine/ge_config"
  OPSKERNEL_PATH="plugin/opskernel"

  ATC_LIB=("libc_sec.so" "libge_common.so" "libge_compiler.so" "libgraph.so" "libregister.so")
  FWK_LIB=("libge_common.so" "libge_runner.so" "libgraph.so" "libregister.so")
  ATC_LIB=("libc_sec.so" "libge_common.so" "libge_compiler.so" "libgraph.so" "libregister.so" "liberror_manager.so")
  FWK_LIB=("libge_common.so" "libge_runner.so" "libgraph.so" "libregister.so" "liberror_manager.so")
  PLUGIN_OPSKERNEL=("libge_local_engine.so" "libge_local_opskernel_builder.so" "libhost_cpu_engine.so" "libhost_cpu_opskernel_builder.so" "optimizer_priority.pbtxt")
  PARSER_LIB=("lib_caffe_parser.so" "libfmk_onnx_parser.so" "libfmk_parser.so" "libparser_common.so")

@@ -269,7 +270,7 @@ generate_package()
  mk_dir "${OUTPUT_PATH}/${FWK_BIN_PATH}"
  mk_dir "${OUTPUT_PATH}/${FWK_INCLUDE_PATH}"
  mk_dir "${OUTPUT_PATH}/${ATC_INCLUDE_PATH}"
 

  cd "${OUTPUT_PATH}"

  find ./ -name graphengine_lib.tar -exec rm {} \;
--- a/ge/CMakeLists.txt
+++ b/ge/CMakeLists.txt
@@ -133,7 +133,6 @@ set(TRAIN_SRC_LIST
    "graph/load/model_manager/data_dumper.cc"
    "graph/load/model_manager/data_inputer.cc"
    "graph/load/model_manager/davinci_model.cc"
    "graph/load/model_manager/davinci_model_parser.cc"
    "graph/load/model_manager/model_manager.cc"
    "graph/load/model_manager/model_utils.cc"
    "graph/load/model_manager/aipp_utils.cc"
@@ -613,7 +612,6 @@ set(INFER_SRC_LIST
    "graph/load/model_manager/model_manager.cc"
    "graph/load/model_manager/data_inputer.cc"
    "graph/load/model_manager/davinci_model.cc"
    "graph/load/model_manager/davinci_model_parser.cc"
    "graph/load/model_manager/model_utils.cc"
    "graph/load/model_manager/aipp_utils.cc"
    "graph/load/model_manager/tbe_handle_store.cc"
--- a/ge/client/ge_api.cc
+++ b/ge/client/ge_api.cc
@@ -32,6 +32,7 @@
 #include "graph/common/ge_call_wrapper.h"
 #include "register/op_registry.h"
 #include "common/ge/tbe_plugin_manager.h"
 #include "common/util/error_manager/error_manager.h"
 #include "toolchain/plog.h"

 using domi::OpRegistry;
@@ -79,6 +80,8 @@ Status CheckOptionsValid(const std::map<string, string> &options) {
 // Initialize GE, prepare for execution, call GELib::Initialize
 Status GEInitializeImpl(const std::map<string, string> &options) {
  GELOGT(TRACE_INIT, "GEInitialize start");

  ErrorManager::GetInstance().GenWorkStreamIdDefault();
  // 0.check init status
  if (g_ge_initialized) {
    GELOGW("GEInitialize is called more than once");
@@ -157,6 +160,8 @@ Status GEInitialize(const std::map<AscendString, AscendString> &options) {
 // GE finalize, releasing all resources
 Status GEFinalize() {
  GELOGT(TRACE_INIT, "GEFinalize start");

  ErrorManager::GetInstance().GenWorkStreamIdDefault();
  // check init status
  if (!g_ge_initialized) {
    GELOGW("GEFinalize is called before GEInitialize");
@@ -202,9 +207,19 @@ Status GEFinalize() {
  return ret;
 }

 std::string GEGetErrorMsg() {
  return ErrorManager::GetInstance().GetErrorMessage();
 }

 std::string GEGetWarningMsg() {
  return ErrorManager::GetInstance().GetWarningMessage();
 }

 // Initialize session，which calls innerSession
 Session::Session(const std::map<string, string> &options) {
  GELOGT(TRACE_INIT, "Session Constructor start");

  ErrorManager::GetInstance().GenWorkStreamIdDefault();
  // check init status
  sessionId_ = 0;
  if (!g_ge_initialized) {
@@ -235,6 +250,8 @@ Session::Session(const std::map<string, string> &options) {

 Session::Session(const std::map<AscendString, AscendString> &options) {
  GELOGT(TRACE_INIT, "Session Constructor start");

  ErrorManager::GetInstance().GenWorkStreamIdDefault();
  // check init status
  sessionId_ = 0;
  if (!g_ge_initialized) {
@@ -311,11 +328,13 @@ Session::~Session() {

 Status Session::AddGraph(uint32_t graph_id, const Graph &graph) {
  std::map<std::string, std::string> options;
  ErrorManager::GetInstance().GenWorkStreamIdBySessionGraph(sessionId_, graph_id);
  return AddGraph(graph_id, graph, options);
 }

 Status Session::AddGraph(uint32_t graph_id, const Graph &graph, const std::map<std::string, std::string> &options) {
  GELOGT(TRACE_INIT, "Start to add graph in Session. graph_id: %u, session_id: %lu.", graph_id, sessionId_);
  ErrorManager::GetInstance().GenWorkStreamIdBySessionGraph(sessionId_, graph_id);
  std::shared_ptr<GELib> instance_ptr = ge::GELib::GetInstance();
  if (instance_ptr == nullptr || !instance_ptr->InitFlag()) {
    GELOGE(GE_CLI_GE_NOT_INITIALIZED, "AddGraph failed in Session.");
@@ -334,6 +353,7 @@ Status Session::AddGraph(uint32_t graph_id, const Graph &graph, const std::map<s
 Status Session::AddGraph(uint32_t graph_id, const Graph &graph,
                         const std::map<AscendString, AscendString> &options) {
  GELOGT(TRACE_INIT, "Start to add graph in Session. graph_id: %u, session_id: %lu.", graph_id, sessionId_);
  ErrorManager::GetInstance().GenWorkStreamIdBySessionGraph(sessionId_, graph_id);
  std::shared_ptr<GELib> instance_ptr = ge::GELib::GetInstance();
  if (instance_ptr == nullptr || !instance_ptr->InitFlag()) {
    GELOGE(GE_CLI_GE_NOT_INITIALIZED, "AddGraph failed in Session.");
@@ -360,6 +380,7 @@ Status Session::AddGraph(uint32_t graph_id, const Graph &graph,
 }

 Status Session::AddGraphWithCopy(uint32_t graph_id, const Graph &graph) {
  ErrorManager::GetInstance().GenWorkStreamIdBySessionGraph(sessionId_, graph_id);
  std::map<AscendString, AscendString> options;
  return AddGraphWithCopy(graph_id, graph, options);
 }
@@ -367,6 +388,7 @@ Status Session::AddGraphWithCopy(uint32_t graph_id, const Graph &graph) {
 Status Session::AddGraphWithCopy(uint32_t graph_id, const Graph &graph,
                                 const std::map<AscendString, AscendString> &options) {
  GELOGT(TRACE_INIT, "Start to add graph in Session. graph_id: %u, session_id: %lu.", graph_id, sessionId_);
  ErrorManager::GetInstance().GenWorkStreamIdBySessionGraph(sessionId_, graph_id);
  std::shared_ptr<GELib> instance_ptr = ge::GELib::GetInstance();
  if (instance_ptr == nullptr || !instance_ptr->InitFlag()) {
    GELOGE(GE_CLI_GE_NOT_INITIALIZED, "AddGraph failed in Session.");
@@ -389,6 +411,7 @@ Status Session::AddGraphWithCopy(uint32_t graph_id, const Graph &graph,
 Status Session::RemoveGraph(uint32_t graph_id) {
  GELOGT(TRACE_INIT, "Session RemoveGraph start");

  ErrorManager::GetInstance().GenWorkStreamIdBySessionGraph(sessionId_, graph_id);
  // call RemoveGraph
  std::shared_ptr<GELib> instance_ptr = ge::GELib::GetInstance();
  if (!instance_ptr || !instance_ptr->InitFlag()) {
@@ -457,6 +480,7 @@ void PrintOutputResult(std::vector<Tensor> &outputs) {
 Status Session::RunGraph(uint32_t graph_id, const std::vector<Tensor> &inputs, std::vector<Tensor> &outputs) {
  GELOGT(TRACE_INIT, "Session RunGraph start");

  ErrorManager::GetInstance().GenWorkStreamIdBySessionGraph(sessionId_, graph_id);
  std::vector<Tensor> graph_inputs = inputs;
  // call RunGraph
  std::shared_ptr<GELib> instance_ptr = ge::GELib::GetInstance();
@@ -483,10 +507,12 @@ Status Session::RunGraph(uint32_t graph_id, const std::vector<Tensor> &inputs, s
 }

 Status Session::RegisterCallBackFunc(const std::string &key, const pCallBackFunc &callback) {
  ErrorManager::GetInstance().GenWorkStreamIdDefault();
  return ge::GELib::GetInstance()->SessionManagerObj().RegisterCallBackFunc(sessionId_, key, callback);
 }

 Status Session::RegisterCallBackFunc(const char *key, const session::pCallBackFunc &callback) {
  ErrorManager::GetInstance().GenWorkStreamIdDefault();
  std::string str_key;
  if (key != nullptr) {
    str_key = key;
@@ -495,6 +521,7 @@ Status Session::RegisterCallBackFunc(const char *key, const session::pCallBackFu
 }

 Status Session::BuildGraph(uint32_t graph_id, const std::vector<InputTensorInfo> &inputs) {
  ErrorManager::GetInstance().GenWorkStreamIdBySessionGraph(sessionId_, graph_id);
  std::shared_ptr<GELib> instance_ptr = ge::GELib::GetInstance();
  if (instance_ptr == nullptr || !instance_ptr->InitFlag()) {
    GELOGE(GE_CLI_GE_NOT_INITIALIZED, "SessionConstructor failed");
@@ -511,6 +538,7 @@ Status Session::BuildGraph(uint32_t graph_id, const std::vector<InputTensorInfo>

 Status Session::RunGraphAsync(uint32_t graph_id, const std::vector<InputTensorInfo> &inputs,
                              RunAsyncCallback callback) {
  ErrorManager::GetInstance().GenWorkStreamIdBySessionGraph(sessionId_, graph_id);
  std::shared_ptr<GELib> instance_ptr = ge::GELib::GetInstance();
  if (instance_ptr == nullptr || !instance_ptr->InitFlag()) {
    GELOGE(GE_CLI_GE_NOT_INITIALIZED, "SessionConstructor failed");
@@ -529,6 +557,7 @@ Status Session::RunGraphAsync(uint32_t graph_id, const std::vector<InputTensorIn
 }

 Status Session::GetVariables(const std::vector<std::string> &var_names, std::vector<Tensor> &var_values) {
  ErrorManager::GetInstance().GenWorkStreamIdDefault();
  auto instance_ptr = ge::GELib::GetInstance();
  if (instance_ptr == nullptr || !instance_ptr->InitFlag()) {
    GELOGE(GE_CLI_GE_NOT_INITIALIZED, "SessionConstructor failed");
@@ -544,6 +573,7 @@ Status Session::GetVariables(const std::vector<std::string> &var_names, std::vec
 }

 Status Session::GetVariables(const std::vector<AscendString> &var_names, std::vector<Tensor> &var_values) {
  ErrorManager::GetInstance().GenWorkStreamIdDefault();
  auto instance_ptr = ge::GELib::GetInstance();
  if (instance_ptr == nullptr || !instance_ptr->InitFlag()) {
    GELOGE(GE_CLI_GE_NOT_INITIALIZED, "SessionConstructor failed");
--- a/ge/common/CMakeLists.txt
+++ b/ge/common/CMakeLists.txt
@@ -54,7 +54,7 @@ set(SRC_LIST
    "util.cc"
    "properties_manager.cc"
    "types.cc"
    "model_parser/base.cc"
    "model_parser/model_parser.cc"
    "kernel_store.cc"
    "tbe_kernel_store.cc"
    "cust_aicpu_kernel_store.cc"
--- a/ge/common/ge/plugin_manager.cc
+++ b/ge/common/ge/plugin_manager.cc
@@ -53,6 +53,7 @@ string PluginManager::GetPath() {
    GELOGW("Failed to read the shared library file path!");
    return string();
  } else {
    GE_IF_BOOL_EXEC(dl_info.dli_fname == nullptr, return string());
    std::string so_path = dl_info.dli_fname;
    char path[MMPA_MAX_PATH] = {0};
    if (so_path.length() >= MMPA_MAX_PATH) {
--- a/ge/common/helper/model_cache_helper.cc
+++ b/ge/common/helper/model_cache_helper.cc
@@ -14,22 +14,15 @@
 * limitations under the License.
 */

 #include <climits>
 #include "common/helper/model_cache_helper.h"

 #include <cstdio>
 #include <fstream>
 #include <functional>

 #include "common/ge/ge_util.h"
 #include "common/helper/model_cache_helper.h"
 #include "common/types.h"
 #include "framework/common/debug/ge_log.h"
 #include "framework/common/ge_types.h"
 #include "common/model_parser/model_parser.h"
 #include "framework/common/helper/model_helper.h"
 #include "framework/common/util.h"
 #include "graph/detail/attributes_holder.h"
 #include "graph/detail/model_serialize_imp.h"
 #include "graph/load/model_manager/davinci_model_parser.h"
 #include "graph/model.h"
 #include "graph/utils/graph_utils.h"
 #include "graph/utils/tensor_utils.h"
 #include "init/gelib.h"
@@ -1682,7 +1675,7 @@ Status ModelCacheHelper::LoadOmModelFromCache(GeModelPtr &ge_model) const {
  string key_path;
  int32_t priority = 0;
  ModelData model_data;
  ret = DavinciModelParser::LoadFromFile(om_path.c_str(), key_path.c_str(), priority, model_data);
  ret = ModelParserBase::LoadFromFile(om_path.c_str(), key_path.c_str(), priority, model_data);
  if (ret != SUCCESS) {
    GELOGW("LoadOmModelFromCache: Load model from file failed. ret = %u", ret);
    return ret;
--- a/ge/common/helper/model_helper.cc
+++ b/ge/common/helper/model_helper.cc
@@ -16,16 +16,10 @@

 #include "framework/common/helper/model_helper.h"

 #include "common/ge/ge_util.h"
 #include "common/util/error_manager/error_manager.h"
 #include "framework/common/debug/log.h"
 #include "framework/common/util.h"
 #include "framework/common/debug/ge_log.h"
 #include "common/model_parser/model_parser.h"
 #include "framework/omg/model_tool.h"
 #include "framework/omg/version.h"
 #include "graph/debug/ge_attr_define.h"
 #include "graph/load/model_manager/davinci_model_parser.h"
 #include "graph/utils/attr_utils.h"
 #include "graph/utils/graph_utils.h"

 using std::string;
@@ -465,7 +459,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelHelper::LoadModel(c
    return ACL_ERROR_GE_EXEC_RELEASE_MODEL_DATA;
  }

  Status status = ge::DavinciModelParser::ParseModelContent(model_data, model_addr_tmp_, model_len_tmp_);
  Status status = ModelParserBase::ParseModelContent(model_data, model_addr_tmp_, model_len_tmp_);
  if (status != SUCCESS) {
    GELOGE(ACL_ERROR_GE_PARAM_INVALID, "Parse model content failed!");
    return ACL_ERROR_GE_PARAM_INVALID;
@@ -514,7 +508,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelHelper::LoadRootMod
    return INTERNAL_ERROR;
  }

  Status status = ge::DavinciModelParser::ParseModelContent(model_data, model_addr_tmp_, model_len_tmp_);
  Status status = ModelParserBase::ParseModelContent(model_data, model_addr_tmp_, model_len_tmp_);
  if (status != SUCCESS) {
    GELOGE(ACL_ERROR_GE_PARAM_INVALID, "Parse model content failed!");
    return ACL_ERROR_GE_PARAM_INVALID;
--- a/ge/common/helper/om_file_helper.cc
+++ b/ge/common/helper/om_file_helper.cc
@@ -165,7 +165,7 @@ Status OmFileLoadHelper::LoadModelPartitionTable(uint8_t *model_data, const uint
    return ACL_ERROR_GE_PARAM_INVALID;
  }
  size_t mem_offset = SIZE_OF_MODEL_PARTITION_TABLE(*partition_table);
  GELOGD("ModelPartitionTable num :%u, ModelFileHeader length :%zu, ModelPartitionTable length :%zu",
  GELOGD("ModelPartitionTable num:%u, ModelFileHeader length:%zu, ModelPartitionTable length:%zu",
         partition_table->num, sizeof(ModelFileHeader), mem_offset);
  if (model_data_size <= mem_offset) {
    GELOGE(ACL_ERROR_GE_EXEC_MODEL_DATA_SIZE_INVALID, "invalid model data, partition_table->num:%u, model data size %u",
@@ -207,7 +207,8 @@ Status OmFileLoadHelper::LoadModelPartitionTable(uint8_t *model_data, uint32_t m
           "ModelFileHeader length :%zu, ModelPartitionTable length :%zu",
           index, partition_table->num, sizeof(ModelFileHeader), partition_table_size);
    if (model_data_size <= cur_offset) {
      GELOGE(ACL_ERROR_GE_EXEC_MODEL_DATA_SIZE_INVALID, "invalid model data, partition_table->num:%u, model data size %u",
      GELOGE(ACL_ERROR_GE_EXEC_MODEL_DATA_SIZE_INVALID,
             "invalid model data, partition_table->num:%u, model data size %u",
             partition_table->num, model_data_size);
      return ACL_ERROR_GE_EXEC_MODEL_DATA_SIZE_INVALID;
    }
--- a/ge/common/model_parser/model_parser.cc
+++ b/ge/common/model_parser/model_parser.cc
@@ -14,16 +14,13 @@
 * limitations under the License.
 */

 #include "common/model_parser/base.h"
 #include "common/helper/model_helper.h"
 #include <securec.h>
 #include "common/model_parser/model_parser.h"

 #include <fstream>
 #include <memory>
 #include <string>

 #include "framework/common/debug/ge_log.h"
 #include "framework/common/debug/log.h"
 #include "framework/common/util.h"
 #include "securec.h"
 #include "common/helper/model_helper.h"

 namespace ge {
 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelParserBase::ModelParserBase() {}
--- a/ge/common/model_parser/model_parser.h
+++ b/ge/common/model_parser/model_parser.h
--- a/ge/common/profiling/profiling_manager.cc
+++ b/ge/common/profiling/profiling_manager.cc
@@ -20,6 +20,8 @@
 #include "framework/common/debug/log.h"
 #include "framework/common/string_util.h"
 #include "graph/ge_context.h"
 #include "graph/utils/type_utils.h"
 #include "graph/types.h"
 #include "runtime/base.h"
 #include "graph/load/model_manager/davinci_model.h"

@@ -31,12 +33,30 @@ const char *const kBpPoint = "bp_point";
 #ifdef DAVINCI_SUPPORT_PROFILING
 const size_t kReportMaxLen = 2048;
 const int32_t kMaxDeviceNum = 256;
 const uint32_t kInteval = 2;
 const std::string kConfigNumsdev = "devNums";
 const std::string kConfigDevIdList = "devIdList";
 const std::string kProfStart = "prof_start";
 const std::string kProfStop = "prof_stop";
 const std::string kProfModelSubscribe = "prof_model_subscribe";
 const std::string kProfModelUnsubscribe = "prof_model_cancel_subscribe";
 const std::string kModelName = "model_name";
 const std::string kModelId = "model_id";
 const std::string kOpNmae = "op_name";
 const std::string kOptype = "op_type";
 const std::string kBlockDim = "block_dims";
 const std::string kTaskId = "task_id";
 const std::string kStreamId = "stream_id";
 const std::string kShapeType = "shape_type";
 const std::string kCurIterNum = "cur_iter_num";
 const std::string kTaskType = "task_type";
 const std::string kInput = "input";
 const std::string kOutput = "output";
 const std::string kFormat = "format";
 const std::string kDataType = "data_type";
 const std::string kShape = "shape";
 const std::string kIdx = "idx";

 #endif
 }  // namespace

@@ -206,118 +226,69 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::StopProf
 #endif
 }

 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::ProfilingTaskDescInfo(
    uint32_t model_id, const std::vector<TaskDescInfo> &task_desc_info, const int32_t &device_id) {
 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::ProfilingOpInputOutInfo(
    const TaskDescInfo &task, Json &task_json) {
 #ifdef DAVINCI_SUPPORT_PROFILING
  std::string data;
  for (const auto &task : task_desc_info) {
    std::string model_name = task.model_name;
    std::string op_name = task.op_name;
    uint32_t block_dim = task.block_dim;
    uint32_t task_id = task.task_id;
    uint32_t stream_id = task.stream_id;
    std::string shape_type = task.shape_type;
    int64_t cur_iter_num = task.cur_iter_num;
    uint32_t task_type = task.task_type;
    data = model_name.append(" ")
                     .append(op_name).append(" ")
                     .append(std::to_string(block_dim)).append(" ")
                     .append(std::to_string(task_id)).append(" ")
                     .append(std::to_string(stream_id)).append(" ")
                     .append(std::to_string(model_id)).append(" ")
                     .append(shape_type).append(" ")
                     .append(std::to_string(cur_iter_num)).append(" ")
                     .append(std::to_string(task_type)).append("\n");

    ReporterData reporter_data{};
    reporter_data.deviceId = device_id;
    reporter_data.data = (unsigned char *)data.c_str();
    reporter_data.dataLen = data.size();
    int ret = memcpy_s(reporter_data.tag, MSPROF_ENGINE_MAX_TAG_LEN + 1, "task_desc_info", sizeof("task_desc_info"));
    if (ret != EOK) {
      GELOGE(ret, "Report data tag of task_desc_info memcpy error!");
      return;
    }

    int32_t cb_ret = CallMsprofReport(reporter_data);
    if (cb_ret != 0) {
      GELOGE(cb_ret, "Reporter data of task_desc_info failed, ret:%d", cb_ret);
      return;
    }
  for (size_t i = 0; i < task.input_format.size(); i++) {
    Json tmp_input;
    tmp_input[kIdx] = i;
    Format format = task.input_format[i];
    tmp_input[kFormat] = TypeUtils::FormatToSerialString(format);
    DataType data_type = task.input_data_type[i];
    tmp_input[kDataType] = TypeUtils::DataTypeToSerialString(data_type);
    tmp_input[kShape] = task.input_shape[i];
    task_json[kInput] += tmp_input;
  }

  for (size_t i = 0; i < task.output_format.size(); i++) {
    Json tmp_output;
    tmp_output[kIdx] = i;
    Format format = task.output_format[i];
    tmp_output[kFormat] =  TypeUtils::FormatToSerialString(format);
    DataType data_type = task.output_data_type[i];
    tmp_output[kDataType] = TypeUtils::DataTypeToSerialString(data_type);
    tmp_output[kShape] = task.output_shape[i];
    task_json[kOutput] += tmp_output;
  }

  data.clear();
 #endif
 }

 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::ProfilingGraphDescInfo(
    uint32_t model_id, const std::vector<ComputeGraphDescInfo> &compute_graph_desc_info, const int32_t &device_id) {
 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::ProfilingTaskDescInfo(
  uint32_t model_id, const std::vector<TaskDescInfo> &task_desc_info, const int32_t &device_id) {
 #ifdef DAVINCI_SUPPORT_PROFILING
  std::string data;
  for (const auto &graph : compute_graph_desc_info) {
    data.append("model_name:")
        .append(graph.model_name)
        .append(" op_name:")
        .append(graph.op_name)
        .append(" op_type:")
        .append(graph.op_type);
    for (size_t i = 0; i < graph.input_format.size(); ++i) {
      data.append(" input_id:")
          .append(std::to_string(i))
          .append(" input_format:")
          .append(std::to_string(graph.input_format.at(i)))
          .append(" input_data_type:")
          .append(std::to_string(graph.input_data_type.at(i)))
          .append(" input_shape:\"");
      size_t input_shape_len = graph.input_shape.at(i).size();
      if (input_shape_len == 0) {
        data.append("");
      } else if (input_shape_len == 1) {
        data.append(std::to_string(graph.input_shape.at(i).at(0)));
      } else {
        for (size_t j = 0; j < input_shape_len - 1; ++j) {
          data.append(std::to_string(graph.input_shape.at(i).at(j))).append(",");
        }
        data.append(std::to_string(graph.input_shape.at(i).at(input_shape_len - 1)));
      }

      data.append("\"");
    }

    for (size_t i = 0; i < graph.output_format.size(); ++i) {
      data.append(" output_id:")
          .append(std::to_string(i))
          .append(" output_format:")
          .append(std::to_string(graph.output_format.at(i)))
          .append(" output_data_type:")
          .append(std::to_string(graph.output_data_type.at(i)))
          .append(" output_shape:\"");
      size_t output_shape_len = graph.output_shape.at(i).size();
      if (output_shape_len == 0) {
        data.append("");
      } else if (output_shape_len == 1) {
        data.append(std::to_string(graph.output_shape.at(i).at(0)));
      } else {
        for (size_t j = 0; j < output_shape_len - 1; ++j) {
          data.append(std::to_string(graph.output_shape.at(i).at(j))).append(",");
        }
        data.append(std::to_string(graph.output_shape.at(i).at(output_shape_len - 1)));
      }
      data.append("\"");
  for (const auto &task : task_desc_info) {
    Json task_info;
    task_info[kModelName] = task.model_name;
    task_info[kModelId] = model_id;
    task_info[kOpNmae] = task.op_name;
    task_info[kOptype] = task.op_type;
    task_info[kBlockDim] = task.block_dim;
    task_info[kTaskType] = task.task_type;
    task_info[kTaskId] = task.task_id;
    task_info[kStreamId] = task.stream_id;
    task_info[kCurIterNum] = task.cur_iter_num;
    task_info[kShapeType] = task.shape_type;
    ProfilingOpInputOutInfo(task, task_info);

    std::string reported_data;
    try {
      reported_data = task_info.dump(kInteval, ' ', false, Json::error_handler_t::ignore);
    } catch (std::exception &e) {
      GELOGE(FAILED, "Failed to convert JSON to string, reason: %s.", e.what());
      return ;
    } catch (...) {
      GELOGE(FAILED, "Failed to convert JSON to string.");
      return;
    }

    data.append(" model_id:").append(std::to_string(model_id));
    data.append(" task_id:").append(std::to_string(graph.task_id));
    data.append(" stream_id:").append(std::to_string(graph.stream_id));
    data.append("\n");

    GraphDescReport(device_id, data);
    data.clear();
    reported_data.append(",")
                 .append("\n");
    ReportData(device_id, reported_data, "task_desc_info");
  }
 #endif
 }

 void ProfilingManager::GraphDescReport(const int32_t &device_id, const string &data) {
 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::ReportData(
    const int32_t &device_id, const string &data, const string &tag_name) {
 #ifdef DAVINCI_SUPPORT_PROFILING
  ReporterData reporter_data{};
  int ret = -1;
@@ -325,36 +296,38 @@ void ProfilingManager::GraphDescReport(const int32_t &device_id, const string &d
  size_t index = data.size() / kReportMaxLen;
  if (index >= 1) {
    reporter_data.deviceId = device_id;
    ret = memcpy_s(reporter_data.tag, MSPROF_ENGINE_MAX_TAG_LEN + 1, "graph_desc_info", sizeof("graph_desc_info"));
    GE_IF_BOOL_EXEC(ret != EOK, GELOGE(ret, "Report data tag of graph_desc_info memcpy error!"); return;);
    ret = memcpy_s(reporter_data.tag, MSPROF_ENGINE_MAX_TAG_LEN + 1, tag_name.c_str(), tag_name.size());
    GE_IF_BOOL_EXEC(ret != EOK, GELOGE(ret, "Report data tag [%s] memcpy error!", tag_name.c_str()); return;);
    for (size_t i = 0; i < index; ++i) {
      reporter_data.data = (unsigned char *)data.c_str() + kReportMaxLen * i;
      reporter_data.dataLen = kReportMaxLen;
      cb_ret = CallMsprofReport(reporter_data);
      GE_IF_BOOL_EXEC(cb_ret != 0, GELOGE(cb_ret, "Reporter data of graph_desc_info failed, ret:%d", cb_ret); return;);
      GE_IF_BOOL_EXEC(cb_ret != 0, GELOGE(cb_ret, "Reporter data [%s] failed, ret:%d", tag_name.c_str(), cb_ret);
                      return;);
    }
    reporter_data.dataLen = data.size() - kReportMaxLen * index;
    if (reporter_data.dataLen != 0) {
      reporter_data.data = (unsigned char *)data.c_str() + kReportMaxLen * index;
      cb_ret = CallMsprofReport(reporter_data);
      GE_IF_BOOL_EXEC(cb_ret != 0, GELOGE(cb_ret, "Reporter data of graph_desc_info failed, ret:%d", cb_ret); return;);
      GE_IF_BOOL_EXEC(cb_ret != 0, GELOGE(cb_ret, "Reporter data [%s] failed, ret:%d", tag_name.c_str(), cb_ret);
                      return;);
    }
  } else {
    reporter_data.deviceId = device_id;
    reporter_data.data = (unsigned char *)data.c_str();
    reporter_data.dataLen = data.size();
    ret = memcpy_s(reporter_data.tag, MSPROF_ENGINE_MAX_TAG_LEN + 1, "graph_desc_info", sizeof("graph_desc_info"));
    GE_IF_BOOL_EXEC(ret != EOK, GELOGE(ret, "Report data tag of graph_desc_info memcpy error!"); return;);
    ret = memcpy_s(reporter_data.tag, MSPROF_ENGINE_MAX_TAG_LEN + 1, tag_name.c_str(), tag_name.size());
    GE_IF_BOOL_EXEC(ret != EOK, GELOGE(ret, "Report data tag [%s] memcpy error!", tag_name.c_str()); return;);

    cb_ret = CallMsprofReport(reporter_data);
    GE_IF_BOOL_EXEC(cb_ret != 0, GELOGE(cb_ret, "Reporter data of graph_desc_info failed, ret:%d", cb_ret); return;);
    GE_IF_BOOL_EXEC(cb_ret != 0, GELOGE(cb_ret, "Reporter data [%s] failed, ret:%d", tag_name.c_str(), cb_ret);
                    return;);
  }
 #endif
 }

 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::ReportProfilingData(
    uint32_t model_id, const std::vector<TaskDescInfo> &task_desc_info,
    const std::vector<ComputeGraphDescInfo> &compute_graph_desc_info) {
    uint32_t model_id, const std::vector<TaskDescInfo> &task_desc_info) {
 #ifdef DAVINCI_SUPPORT_PROFILING
  int32_t logic_device_id = 0;
  rtError_t rt_ret = rtGetDevice(&logic_device_id);
@@ -365,8 +338,6 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::ReportPr
  GELOGD("current logic_device_id:%d", logic_device_id);
  GELOGD("start ProfilingTaskDescInfo.");
  ProfilingTaskDescInfo(model_id, task_desc_info, logic_device_id);
  GELOGD("start ProfilingGraphDescInfo.");
  ProfilingGraphDescInfo(model_id, compute_graph_desc_info, logic_device_id);
  GELOGD("Report profiling data for GE end.");
 #endif
 }
@@ -813,6 +784,44 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::CallMs
      static_cast<void *>(&reporter_data), sizeof(ReporterData));
 }

 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::GetOpInputOutputInfo(
    const OpDescPtr &op, TaskDescInfo &task_desc_info) const {
  std::vector<Format> input_format;
  std::vector<std::vector<int64_t>> input_shape;
  std::vector<DataType> input_data_type;
  for (size_t i = 0; i < op->GetAllInputsSize(); ++i) {
    GeTensorDescPtr input_tensor_desc = op->MutableInputDesc(i);
    if (input_tensor_desc == nullptr) {
      continue;
    }
    input_format.emplace_back(input_tensor_desc->GetFormat());
    input_shape.emplace_back(input_tensor_desc->GetShape().GetDims());
    input_data_type.emplace_back(input_tensor_desc->GetDataType());
  }
  std::vector<Format> output_format;
  std::vector<std::vector<int64_t>> output_shape;
  std::vector<DataType> output_data_type;
  for (size_t j = 0; j < op->GetOutputsSize(); ++j) {
    GeTensorDescPtr output_tensor_desc = op->MutableOutputDesc(j);
    if (output_tensor_desc == nullptr) {
      continue;
    }
    output_format.emplace_back(output_tensor_desc->GetFormat());
    output_shape.emplace_back(output_tensor_desc->GetShape().GetDims());
    output_data_type.emplace_back(output_tensor_desc->GetDataType());
  }

  std::vector<Format> format_default =  { FORMAT_NULL };
  std::vector<std::vector<int64_t>> shape_default = { {0} };
  std::vector<DataType> data_type_default = { DT_UNDEFINED };
  task_desc_info.input_format = input_format.empty() ? format_default : input_format;
  task_desc_info.input_shape = input_shape.empty() ? shape_default : input_shape;
  task_desc_info.input_data_type = input_data_type.empty() ? data_type_default : input_data_type;
  task_desc_info.output_format = output_format.empty() ? format_default : output_format;
  task_desc_info.output_shape = output_shape.empty() ? shape_default : output_shape;
  task_desc_info.output_data_type = output_data_type.empty() ? data_type_default : output_data_type;
 }

 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::GetFpBpPoint(
    std::string &fp_point, std::string &bp_point) {
  // Env or options mode, fp_point_/bp_point_ have initiliazed on profiling init
--- a/ge/common/profiling/profiling_manager.h
+++ b/ge/common/profiling/profiling_manager.h
@@ -54,6 +54,8 @@ namespace {

 }  // namespace
 namespace ge {
 class OpDesc;
 using OpDescPtr = std::shared_ptr<OpDesc>;
 struct DeviceSubsInfo {
  uint64_t module;
  uint32_t subscribe_count;
@@ -82,12 +84,10 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ProfilingManager {
  bool ProfilingModelExecuteOn() const;
  // is_execute_profiling_ only used by ge option and env
  bool ProfilingOn() const { return is_load_profiling_ && is_execute_profiling_; }
  void ReportProfilingData(uint32_t model_id, const std::vector<TaskDescInfo> &task_desc_info,
                           const std::vector<ComputeGraphDescInfo> &compute_graph_desc_info);
  void ReportProfilingData(uint32_t model_id, const std::vector<TaskDescInfo> &task_desc_info);
  void ProfilingTaskDescInfo(uint32_t model_id, const std::vector<TaskDescInfo> &task_desc_info,
                             const int32_t &device_id);
  void ProfilingGraphDescInfo(uint32_t model_id, const std::vector<ComputeGraphDescInfo> &compute_graph_desc_info,
                              const int32_t &device_id);
  void ProfilingOpInputOutInfo(const TaskDescInfo &task, Json &task_json);
  Status PluginInit() const;
  void PluginUnInit() const;
  Status CallMsprofReport(ReporterData &reporter_data) const;
@@ -95,6 +95,8 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ProfilingManager {
  void SetMsprofCtrlCallback(MsprofCtrlCallback func) { prof_cb_.msprofCtrlCallback = func; }
  void SetMsprofReporterCallback(MsprofReporterCallback func) { prof_cb_.msprofReporterCallback = func; }
  void GetFpBpPoint(std::string &fp_point, std::string &bp_point);
  void GetOpInputOutputInfo(const OpDescPtr &op, TaskDescInfo &task_desc_info) const;
  void ReportData(const int32_t &device_id, const std::string &data, const std::string &tag_name);
 private:
  Status InitFromOptions(const Options &options, MsprofGeOptions &prof_conf);
  Status ParseOptions(const std::string &options);
@@ -103,7 +105,6 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ProfilingManager {
  Status ProfParseDeviceId(const std::map<std::string, std::string> &config_para,
                               vector<int32_t> &device_list);
  uint64_t GetProfilingModule();
  void GraphDescReport(const int32_t &device_id, const string &data);
  void UpdateDeviceIdModuleMap(string prof_type, uint64_t module, const vector<int32_t> &device_list);
  void UpdateSubscribeDeviceModuleMap(std::string prof_type, uint32_t device_id, uint64_t module);

--- a/ge/executor/CMakeLists.txt
+++ b/ge/executor/CMakeLists.txt
@@ -33,7 +33,6 @@ set(SRC_LIST
    "../model/ge_model.cc"
    "../model/ge_root_model.cc"
    "../graph/load/model_manager/davinci_model.cc"
    "../graph/load/model_manager/davinci_model_parser.cc"
    "../graph/load/model_manager/model_manager.cc"
    "../graph/load/model_manager/tbe_handle_store.cc"
    "../graph/load/model_manager/cpu_queue_schedule.cc"
@@ -250,15 +249,14 @@ target_link_options(ge_executor_shared PRIVATE
 target_link_libraries(ge_executor_shared PRIVATE
    $<BUILD_INTERFACE:intf_pub>
    msprofiler
    static_mmpa
    -Wl,--no-as-needed
    ge_common
    runtime
    slog
    mmpa
    graph
    register
    error_manager
    ascend_hal_stub
    ascend_protobuf
    c_sec
    -Wl,--as-needed
--- a/ge/executor/ge_executor.cc
+++ b/ge/executor/ge_executor.cc
@@ -16,7 +16,6 @@

 #include "executor/ge_executor.h"
 #include <cce/cce.h>
 #include <cce/compiler_stub.h>
 #include <ctime>
 #include <iostream>
 #include "common/debug/log.h"
@@ -24,19 +23,11 @@
 #include "common/helper/model_helper.h"
 #include "common/profiling/profiling_manager.h"
 #include "common/dump/dump_manager.h"
 #include "common/util.h"
 #include "framework/common/debug/ge_log.h"
 #include "framework/common/util.h"
 #include "graph/execute/graph_execute.h"
 #include "graph/load/graph_loader.h"
 #include "graph/load/model_manager/davinci_model_parser.h"
 #include "graph/load/model_manager/model_manager.h"
 #include "graph/manager/graph_mem_allocator.h"
 #include "graph/model.h"
 #include "graph/utils/graph_utils.h"
 #include "mmpa/mmpa_api.h"
 #include "single_op/single_op_manager.h"
 #include "graph/manager/graph_var_manager.h"
 #include "graph/load/model_manager/davinci_model.h"
 #include "opskernel_manager/ops_kernel_builder_manager.h"

@@ -454,7 +445,8 @@ Status GeExecutor::GetCurDynamicDims(uint32_t model_id, const vector<uint64_t> &
    if (all_data_dims[i] < 0) {
      cur_dynamic_dims.push_back(dynamic_dims[i]);
    } else if (static_cast<uint64_t>(all_data_dims[i]) != dynamic_dims[i]) {
      GELOGE(ACL_ERROR_GE_DYNAMIC_INPUT_LENGTH_INVALID, "Static dims should be same, index: %zu value: %lu should be %ld",
      GELOGE(ACL_ERROR_GE_DYNAMIC_INPUT_LENGTH_INVALID,
             "Static dims should be same, index: %zu value: %lu should be %ld",
             i, dynamic_dims[i], all_data_dims[i]);
      return ACL_ERROR_GE_DYNAMIC_INPUT_LENGTH_INVALID;
    }
@@ -930,12 +922,22 @@ Status GeExecutor::GetMemAndWeightSize(const void *model_data, size_t model_size

 Status GeExecutor::LoadSingleOp(const std::string &model_name, const ge::ModelData &modelData, void *stream,
                                SingleOp **single_op) {
  return SingleOpManager::GetInstance().GetOpFromModel(model_name, modelData, stream, single_op);
  return LoadSingleOpV2(model_name, modelData, stream, single_op, 0);
 }

 Status GeExecutor::LoadSingleOpV2(const std::string &model_name, const ge::ModelData &modelData, void *stream,
                                  SingleOp **single_op, const uint64_t model_id) {
  return SingleOpManager::GetInstance().GetOpFromModel(model_name, modelData, stream, single_op, model_id);
 }

 Status GeExecutor::LoadDynamicSingleOp(const std::string &model_name, const ge::ModelData &modelData, void *stream,
                                       DynamicSingleOp **single_op) {
  return SingleOpManager::GetInstance().GetDynamicOpFromModel(model_name, modelData, stream, single_op);
  return LoadDynamicSingleOpV2(model_name, modelData, stream, single_op, 0);
 }

 Status GeExecutor::LoadDynamicSingleOpV2(const std::string &model_name, const ge::ModelData &modelData, void *stream,
                                         DynamicSingleOp **single_op, const uint64_t model_id) {
  return SingleOpManager::GetInstance().GetDynamicOpFromModel(model_name, modelData, stream, single_op, model_id);
 }

 Status GeExecutor::ExecuteAsync(SingleOp *executor, const std::vector<DataBuffer> &inputs,
--- a/ge/generator/ge_generator.cc
+++ b/ge/generator/ge_generator.cc
@@ -147,7 +147,7 @@ static Status CheckEngineTypeSupport(const OpDescPtr &op_desc, OpEngineType engi
  return FAILED;
 }

 static Status AddInputs(const ComputeGraphPtr &graph, const NodePtr &node, GeTensorDesc &tensor, int32_t index,
 static Status AddInputs(const ComputeGraphPtr &graph, const NodePtr &node, const GeTensorDesc &tensor, int32_t index,
                        bool attr) {
  GE_CHECK_NOTNULL_EXEC(graph, return PARAM_INVALID);
  GE_CHECK_NOTNULL_EXEC(node, return PARAM_INVALID);
@@ -671,6 +671,8 @@ Status GeGenerator::CheckForSingleOp(OpDescPtr &op_desc, const vector<GeTensor>
 Status GeGenerator::BuildSingleOp(OpDescPtr &op_desc, const vector<GeTensor> &inputs, const vector<GeTensor> &outputs,
                                  const string &model_file_name, OpEngineType engine_type, ModelBufferData &model_buff,
                                  bool is_offline) {
  GE_CHECK_NOTNULL_EXEC(impl_, return PARAM_INVALID);
  impl_->is_offline_ = is_offline;
  if (!is_offline) {
    (void)AttrUtils::SetBool(op_desc, ATTR_SINGLE_OP_SCENE, true);
  }
@@ -709,8 +711,6 @@ Status GeGenerator::BuildSingleOp(OpDescPtr &op_desc, const vector<GeTensor> &in
  GELOGI("ATC parser success in single op build.");

  GeRootModelPtr ge_root_model = nullptr;
  GE_CHECK_NOTNULL_EXEC(impl_, return PARAM_INVALID);
  impl_->is_offline_ = is_offline;
  GE_CHK_STATUS_RET_NOLOG(impl_->BuildModel(graph, inputs, ge_root_model));
  map<string, GeAttrValue> op_attrs = op_desc_tmp->GetAllAttrs();
  GE_CHECK_NOTNULL(ge_root_model);
@@ -723,7 +723,7 @@ Status GeGenerator::BuildSingleOp(OpDescPtr &op_desc, const vector<GeTensor> &in
  const ComputeGraphPtr root_graph = ge_root_model->GetRootGraph();
  GeModelPtr &ge_model = name_to_ge_model.begin()->second;
  GE_CHK_STATUS_RET_NOLOG(CheckDynamicSupport(ge_model, root_graph));
  GELOGD("The opType in op_desc_tmp is [%s]", op_desc_tmp->GetType().c_str());
  GELOGI("After build model, The opType in op_desc_tmp is [%s]", op_desc_tmp->GetType().c_str());

  bool all_shape = false;
  (void)AttrUtils::GetBool(op_desc, kAicpuAllshape, all_shape);
@@ -738,6 +738,7 @@ Status GeGenerator::BuildSingleOp(OpDescPtr &op_desc, const vector<GeTensor> &in
  } else {
    GE_CHK_STATUS_RET_NOLOG(impl_->SaveParams(ge_model, op_desc_tmp->GetType(), op_attrs, inputs, outputs));
  }
  GELOGI("Start save GeModel to Model buffer");
  GE_CHK_STATUS_RET_NOLOG(impl_->SaveModel(model_file_name, ge_model, model_buff));
  return SUCCESS;
 }
@@ -753,10 +754,12 @@ Status GeGenerator::BuildSingleOp(OpDescPtr &op_desc, const vector<GeTensor> &in
 */
 Status GeGenerator::BuildSingleOpModel(OpDescPtr &op_desc, const vector<GeTensor> &inputs,
                                       const vector<GeTensor> &outputs, const string &model_file_name) {
  GELOGI("Start to build single op offline model.");
  GELOGI("Start to build single op offline model, input size: %zu, output size: %zu", inputs.size(), outputs.size());
  ModelBufferData model_buff;
  OpEngineType engine_type = ENGINE_SYS;
  return BuildSingleOp(op_desc, inputs, outputs, model_file_name, engine_type, model_buff, true);
  Status status = BuildSingleOp(op_desc, inputs, outputs, model_file_name, engine_type, model_buff, true);
  GELOGI("Finish build single offline model, status: %u", status);
  return status;
 }

 /**
@@ -772,8 +775,10 @@ Status GeGenerator::BuildSingleOpModel(OpDescPtr &op_desc, const vector<GeTensor
 Status GeGenerator::BuildSingleOpModel(OpDescPtr &op_desc, const vector<GeTensor> &inputs,
                                       const vector<GeTensor> &outputs, OpEngineType engine_type,
                                       ModelBufferData &model_buff) {
  GELOGI("Start to build single op online");
  return BuildSingleOp(op_desc, inputs, outputs, kFileNameSuffix, engine_type, model_buff, false);
  GELOGI("Start to build single op online, input size: %zu, output size: %zu", inputs.size(), outputs.size());
  Status status = BuildSingleOp(op_desc, inputs, outputs, kFileNameSuffix, engine_type, model_buff, false);
  GELOGI("Finish build single online model, status: %u", status);
  return status;
 }

 Status GeGenerator::BuildSingleOpGraph(OpDescPtr &op_desc, const vector<GeTensor> &inputs,
@@ -798,8 +803,7 @@ Status GeGenerator::BuildSingleOpGraph(OpDescPtr &op_desc, const vector<GeTensor
    }
  } else {
    for (const auto &in_desc : inputs) {
      GeTensorDesc input_desc = in_desc.GetTensorDesc();
      GE_CHK_STATUS_RET_NOLOG(AddInputs(compute_graph, op_node, input_desc, arg_index, true));
      GE_CHK_STATUS_RET_NOLOG(AddInputs(compute_graph, op_node, in_desc.GetTensorDesc(), arg_index, true));
      arg_index++;
    }
  }
--- a/ge/graph/build/memory/graph_mem_assigner.cc
+++ b/ge/graph/build/memory/graph_mem_assigner.cc
@@ -157,8 +157,8 @@ ge::Status GraphMemoryAssigner::AssignMemory2HasRefAttrNode() {
 }

 ge::Status CalculateTensorRealSizeAndOutSize(const ge::ConstGeTensorDescPtr &output_desc,
                                                                  int64_t dim_index, int64_t &output_mem_size,
                                                                  int64_t &batch_dim_num, int64_t &out_size) {
                                             int64_t dim_index, int64_t &output_mem_size,
                                             int64_t &batch_dim_num, int64_t &out_size) {
  graphStatus graph_status = ge::TensorUtils::GetSize(*output_desc, out_size);
  if (graph_status != GRAPH_SUCCESS) {
    GELOGE(FAILED, "Opdesc GetSize failed!");
@@ -430,7 +430,7 @@ Status GraphMemoryAssigner::ReAssignContinuousMemory(bool is_loop_graph) {
      GELOGE(FAILED, "node %s has no continuous type!", node->GetName().c_str());
      return FAILED;
    }
    GE_CHK_STATUS_RET(AssignContinuousInputMemoryWithAtomicProcess(node, iter->second),
    GE_CHK_STATUS_RET(AssignContinuousInputMemoryWithAtomicProcess(node, iter->second, true),
                      "Assign node %s continuous input memory failed.", node->GetName().c_str())
  }
  for (auto pair : memory_offset_) {
@@ -441,7 +441,7 @@ Status GraphMemoryAssigner::ReAssignContinuousMemory(bool is_loop_graph) {
 }

 Status GraphMemoryAssigner::AssignContinuousInputMemory(const ge::NodePtr &node, int64_t &continuous_mem_start,
    int64_t &continuous_mem_size, int64_t memory_type, uint32_t continuous_type) {
    int64_t &continuous_mem_size, int64_t memory_type, uint32_t continuous_type, bool reverse_refresh) {
  GELOGI("Current node %s needs continuous input.", node->GetName().c_str());
  auto iter = memory_offset_.find(memory_type);
  if (iter == memory_offset_.end()) {
@@ -508,12 +508,16 @@ Status GraphMemoryAssigner::AssignContinuousInputMemory(const ge::NodePtr &node,
      std::map<int32_t, int32_t> out2ins;
      GE_CHK_STATUS_RET(GetAllRef(node, out2ins), "Node: %s get all ref failed", node->GetName().c_str());
      // output is beginning offset, set offset for input; only support this case now
      if (out2ins.size() == 1 && out2ins.begin()->second == 0) {
      if ((out2ins.size() == 1) && (out2ins.begin()->second == 0) && (reverse_refresh)) {
        auto peer_output_offset = output_list.at(peer_out_data_anchor->GetIdx());
        output_list.at(peer_out_data_anchor->GetIdx()) = output_list_this.at(out2ins.begin()->first);
        peer_op_desc->SetOutputOffset(output_list);
        GELOGI("Node %s out %d ref in %d input node %s, use output offset %ld update %ld", node->GetName().c_str(),
               out2ins.begin()->first, out2ins.begin()->second, peer_op_desc->GetName().c_str(),
               output_list_this.at(out2ins.begin()->first), peer_output_offset);
      } else {
        GELOGW("Node %s out %d ref in %d with total ref numbers %zu", node->GetName().c_str(), out2ins.begin()->first,
               out2ins.begin()->second, out2ins.size());
        GELOGD("Node %s out %d ref in %d input node %s with total ref numbers %zu", node->GetName().c_str(),
               out2ins.begin()->first, out2ins.begin()->second, peer_op_desc->GetName().c_str(), out2ins.size());
      }
      // first input is beginning offset
      mem_offset = output_list.at(peer_out_data_anchor->GetIdx());
@@ -1535,6 +1539,11 @@ ge::Status GraphMemoryAssigner::GetAllRef(const NodePtr &node, map<int32_t, int3
 bool GraphMemoryAssigner::AssignContinuousInputMemoryWithAtomicProcessDirectly(
  const NodePtr &input_continuous_node, map<NodePtr, uint32_t> &node_2_continuous_type) {
  for (const auto &in_node : input_continuous_node->GetInDataNodes()) {
    if (in_node->GetType() == VARIABLE) {
      GELOGI("node %s 's precursor node %s is variable, do not store.", input_continuous_node->GetName().c_str(),
             in_node->GetName().c_str());
      return true;
    }
    auto iter = node_2_continuous_type.find(in_node);
    // In node's topo order in the front, so function can not be exception
    auto continuous_type = iter->second;
@@ -1560,13 +1569,15 @@ bool GraphMemoryAssigner::AssignContinuousInputMemoryWithAtomicProcessDirectly(
 }

 ge::Status GraphMemoryAssigner::AssignContinuousInputMemoryWithAtomicProcess(const NodePtr &input_continuous_node,
                                                                             uint32_t continuous_type) {
                                                                             uint32_t continuous_type,
                                                                             bool reverse_refresh) {
  int64_t mem_clean_start = 0;
  int64_t mem_clean_size = 0;
  int64_t memory_type = RT_MEMORY_HBM;

  GE_CHK_STATUS_RET(GetNodeMemoryType(input_continuous_node, memory_type, "input"), "Get node memory type failed.");
  auto ret = AssignContinuousInputMemory(input_continuous_node, mem_clean_start, mem_clean_size, memory_type, continuous_type);
  auto ret = AssignContinuousInputMemory(input_continuous_node, mem_clean_start, mem_clean_size, memory_type,
                                         continuous_type, reverse_refresh);
  if (ret != ge::SUCCESS) {
    GELOGE(ret, "Assign continuous input memory failed!");
    return ret;
--- a/ge/graph/build/memory/graph_mem_assigner.h
+++ b/ge/graph/build/memory/graph_mem_assigner.h
@@ -131,13 +131,14 @@ class GraphMemoryAssigner {
                                                            std::map<NodePtr, uint32_t> &node_2_continuous_type);

  ge::Status AssignContinuousInputMemoryWithAtomicProcess(const NodePtr &input_continuous_node,
                                                          uint32_t continuous_type);
                                                          uint32_t continuous_type, bool reverse_refresh=false);

  ge::Status FilterAtomicNodesForMemoryAssign(map<string, map<NodePtr, vector<NodePtr>>> &normal_atomic_nodes_map,
                                              map<string, vector<NodePtr>> &connecting_output_atomic_nodes);

  ge::Status AssignContinuousInputMemory(const ge::NodePtr &node, int64_t &continuous_mem_start,
                                         int64_t &continuous_mem_size, int64_t memory_type, uint32_t continuous_type);
                                         int64_t &continuous_mem_size, int64_t memory_type, uint32_t continuous_type,
                                         bool reverse_refresh = false);

  ge::Status AssignContinuousOutputMemory(const ge::NodePtr &node, int64_t memory_type, uint32_t continuous_type);

--- a/ge/graph/build/task_generator.cc
+++ b/ge/graph/build/task_generator.cc
@@ -852,7 +852,7 @@ Status TaskGenerator::FindProfilingTaskIndex(const ComputeGraphPtr &graph, Profi

  // subgraph  of dynamic graph no need to find index, has been found in parent graph
  if (IsSubGraphOfDynamicGraph(graph)) {
    GELOGI("Graph[%s] is subgraph of dynamic graph, no nned to find index.", graph->GetName().c_str());
    GELOGI("Graph[%s] is subgraph of dynamic graph, no need to find index.", graph->GetName().c_str());
    return SUCCESS;
  }

@@ -1042,7 +1042,7 @@ Status TaskGenerator::InsertProfilingTaskAfter(const OpDescPtr &op_desc, const P
  }
  GELOGD("Insert bp profiling task: %d, insert end profiling task: %d, fp index: %u, bp index: %u, end index size: %zu",
         is_insert_bp_profiling_task, is_insert_end_profiling_task, profiling_point.fp_index, profiling_point.bp_index,
         profiling_point.end_index.size() );
         profiling_point.end_index.size());

  bool is_all_reduce = (op_desc->GetType() == HCOMALLREDUCE || op_desc->GetType() == HVDCALLBACKALLREDUCE);
  if ((profiling_point.bp_index == node_index) || (!is_all_reduce && is_insert_bp_profiling_task)) {
--- a/ge/graph/execute/graph_execute.cc
+++ b/ge/graph/execute/graph_execute.cc
@@ -19,12 +19,8 @@
 #include <memory>
 #include <string>

 #include "common/ge_inner_error_codes.h"
 #include "common/model_parser/base.h"
 #include "graph/load/model_manager/model_manager.h"
 #include "omm/csa_interact.h"
 #include "runtime/dev.h"
 #include "runtime/mem.h"

 namespace ge {
 GraphExecutor::GraphExecutor()
--- a/ge/graph/load/graph_loader.cc
+++ b/ge/graph/load/graph_loader.cc
@@ -20,19 +20,13 @@
 #include <vector>

 #include "common/helper/model_helper.h"
 #include "common/util.h"
 #include "common/model_parser/model_parser.h"
 #include "graph/ge_context.h"
 #include "graph/load/model_manager/davinci_model_parser.h"
 #include "graph/load/model_manager/model_manager.h"
 #include "graph/manager/graph_var_manager.h"
 #include "omm/csa_interact.h"
 #include "runtime/dev.h"

 namespace ge {
 GraphLoader::GraphLoader() = default;

 GraphLoader::~GraphLoader() = default;

 Status GraphLoader::UnloadModel(uint32_t model_id) {
  auto model_manager = ModelManager::GetInstance();
  GE_CHECK_NOTNULL(model_manager);
@@ -120,7 +114,6 @@ Status GraphLoader::GetMaxUsedMemory(uint32_t model_id, uint64_t &max_size) {

 Status GraphLoader::LoadDataFromFile(const std::string &path, const std::string &key_path, int32_t priority,
                                     ModelData &model_data) {
  Status ret;
  if (!CheckInputPathValid(path)) {
    GELOGE(ACL_ERROR_GE_EXEC_MODEL_PATH_INVALID, "model path is invalid: %s", path.c_str());
    return ACL_ERROR_GE_EXEC_MODEL_PATH_INVALID;
@@ -132,16 +125,15 @@ Status GraphLoader::LoadDataFromFile(const std::string &path, const std::string
    return ACL_ERROR_GE_PARAM_INVALID;
  }

  ret = DavinciModelParser::LoadFromFile(path.c_str(), key_path.c_str(), priority, model_data);
  Status ret = ModelParserBase::LoadFromFile(path.c_str(), key_path.c_str(), priority, model_data);
  if (ret != SUCCESS) {
    GELOGE(ret, "LoadModelFromFile: Load failed. ret = %u", ret);
    if (model_data.model_data != nullptr) {
      delete[] static_cast<char *>(model_data.model_data);
      model_data.model_data = nullptr;
    }
    return ret;
  }
    return SUCCESS;
  return ret;
 }

 Status GraphLoader::CommandHandle(const Command &command) {
--- a/ge/graph/load/graph_loader.h
+++ b/ge/graph/load/graph_loader.h
@@ -32,9 +32,9 @@
 namespace ge {
 class GraphLoader {
 public:
  GraphLoader();
  GraphLoader() = default;

  virtual ~GraphLoader();
  virtual ~GraphLoader() = default;

  GraphLoader(const GraphLoader &in) = delete;

--- a/ge/graph/load/model_manager/davinci_model.cc
+++ b/ge/graph/load/model_manager/davinci_model.cc
@@ -92,9 +92,35 @@ const uint32_t kEndOfSequence = 0x0704000a;
 const uint32_t kEndOfSequenceNew = 507005;
 const int32_t kModelAbortNormal = 0x0704000e;
 const int32_t kModelAbortNormalNew = 507024;
 const uint32_t kInteval = 2;
 const char *const kModelName = "model_name";
 const char *const kModeleId = "model_id";
 const char *const kLoadStartTime = "load_start_time";
 const char *const kLoadEndTime = "load_end_time";
 const char *const kFusionOpInfo = "fusion_op_info";
 const char *const kFusionOpName = "fusion_op_name";
 const char *const kOriginalOpNum = "origin_op_num";
 const char *const kOriginalOpName = "origin_op_name";
 const char *const kStreamId = "stream_id";
 const char *const kFusionOpMemoryInfo = "memory_info";
 const char *const kInputSize = "input_size";
 const char *const kOutputSize = "output_size";
 const char *const kWeightSize = "weight_size";
 const char *const kWorkSpaceSize = "workspace_size";
 const char *const kTotalSize = "total_size";
 const char *const kTaskCount = "task_count";
 const char *const kTaskId = "task_id";
 const char* const kRequestId = "request_id";
 const char* const kThreadId = "thread_id";
 const char* const kInputBeginTime = "input_begin_time";
 const char* const kInputEndTime = "input_end_time";
 const char* const kInferBeginTime = "infer_begin_time";
 const char* const kInferEndTime = "infer_end_time";
 const char* const kOutputBeginTime = "output_start_time";
 const char* const kOutputEndTime = "output_end_time";

 inline bool IsDataOp(const std::string &node_type) {
  return node_type == DATA_TYPE || node_type == AIPP_DATA_TYPE || node_type == ANN_DATA_TYPE;
  return (node_type == DATA_TYPE) || (node_type == AIPP_DATA_TYPE) || (node_type == ANN_DATA_TYPE);
 }

 inline bool IsTbeTask(const OpDescPtr &op_desc) {
@@ -187,12 +213,12 @@ DavinciModel::~DavinciModel() {
      UnbindTaskSinkStream();
      for (size_t i = 0; i < label_list_.size(); ++i) {
        if (label_list_[i] != nullptr) {
          GE_LOGW_IF(rtLabelDestroy(label_list_[i]) != RT_ERROR_NONE, "Destroy label failed, index: %zu", i);
          GE_LOGW_IF(rtLabelDestroy(label_list_[i]) != RT_ERROR_NONE, "Destroy label failed, index:%zu", i);
        }
      }

      for (size_t i = 0; i < stream_list_.size(); ++i) {
        GE_LOGW_IF(rtStreamDestroy(stream_list_[i]) != RT_ERROR_NONE, "Destroy stream failed, index: %zu", i);
        GE_LOGW_IF(rtStreamDestroy(stream_list_[i]) != RT_ERROR_NONE, "Destroy stream failed, index:%zu", i);
      }

      for (size_t i = 0; i < event_list_.size(); ++i) {
@@ -360,7 +386,7 @@ Status DavinciModel::InitFeatureMapAndP2PMem(void *dev_ptr, size_t mem_size) {
      GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Alloc feature map memory failed. size: %zu", data_size);
      return ACL_ERROR_GE_MEMORY_ALLOCATION;
    }
    GEEVENT("[IMAS]InitFeatureMapAndP2PMem graph_%u MallocMemory type[F] memaddr[%p] mem_size[%zu]",
    GEEVENT("[IMAS]InitFeatureMapAndP2PMem graph_%u MallocMemory type[F] memaddr[%p] mem_size[%zu].",
            runtime_param_.graph_id, mem_base_, data_size);

    if (!is_inner_weight_base_) {
@@ -381,7 +407,7 @@ Status DavinciModel::InitFeatureMapAndP2PMem(void *dev_ptr, size_t mem_size) {
    is_inner_p2p_mem_base_ = true;
  }

  GE_CHK_STATUS_RET(InitVariableMem(), "Init variable memory failed.");
  GE_CHK_STATUS_RET(InitVariableMem(), "Init variable memory failed");
  runtime_param_.mem_base = mem_base_;
  runtime_param_.weight_base = weights_mem_base_;
  runtime_param_.memory_infos[RT_MEMORY_P2P_DDR].memory_base = p2p_mem_base_;
@@ -391,7 +417,7 @@ Status DavinciModel::InitFeatureMapAndP2PMem(void *dev_ptr, size_t mem_size) {
 Status DavinciModel::InitVariableMem() {
  // malloc variable memory base
  var_mem_base_ = VarManager::Instance(session_id_)->GetVarMemoryBase(RT_MEMORY_HBM);
  if (TotalVarMemSize() && var_mem_base_ == nullptr) {
  if (TotalVarMemSize() && (var_mem_base_ == nullptr)) {
    Status ret = VarManager::Instance(session_id_)->MallocVarMemory(TotalVarMemSize());
    if (ret != SUCCESS) {
      GELOGE(ret, "Malloc variable memory failed.");
@@ -500,25 +526,25 @@ Status DavinciModel::DoTaskSink() {
  }

  GE_CHK_RT_RET(rtGetAicpuDeploy(&deploy_type_));
  GELOGI("do task_sink. AiCpu deploy type is: %x.", deploy_type_);
  GELOGI("do task_sink. AiCpu deploy type is: %x", deploy_type_);

  GE_CHK_STATUS_RET(BindModelStream(), "Bind model stream failed.");

  if (known_node_) {
    GE_CHK_STATUS_RET(MallocKnownArgs(), "Mallloc known node args failed.");
    GE_CHK_STATUS_RET(MallocKnownArgs(), "Mallloc known node args failed");
  }

  GE_CHK_STATUS_RET(InitTaskInfo(*model_task_def.get()), "InitTaskInfo failed.");
  GE_CHK_STATUS_RET(InitTaskInfo(*model_task_def.get()), "InitTaskInfo failed");

  GE_CHK_STATUS_RET(ModelManager::GetInstance()->LaunchCustAicpuSo(), "Launch cust aicpu so failed.");
  GE_CHK_STATUS_RET(ModelManager::GetInstance()->LaunchCustAicpuSo(), "Launch cust aicpu so failed");

  GE_CHK_STATUS_RET(ModelManager::GetInstance()->CheckAicpuOpList(ge_model_), "Check aicpu op type failed.");
  GE_CHK_STATUS_RET(ModelManager::GetInstance()->CheckAicpuOpList(ge_model_), "Check aicpu op type failed");

  GE_CHK_STATUS_RET(InitEntryTask(), "InitEntryTask failed.");
  GE_CHK_STATUS_RET(InitEntryTask(), "InitEntryTask failed");

  GE_CHK_STATUS_RET(InitL1DataDumperArgs(), "InitL1DataDumperArgs failed.");
  GE_CHK_STATUS_RET(InitL1DataDumperArgs(), "InitL1DataDumperArgs failed");

  GE_CHK_STATUS_RET(DistributeTask(), "Distribute failed.");
  GE_CHK_STATUS_RET(DistributeTask(), "Distribute failed");

  GE_CHK_RT_RET(rtModelLoadComplete(rt_model_handle_));

@@ -744,13 +770,7 @@ Status DavinciModel::Init(void *dev_ptr, size_t mem_size, void *weight_ptr, size
 }

 Status DavinciModel::ReportProfilingData() {
  std::vector<ComputeGraphDescInfo> compute_graph_desc_info;
  Status ret = GetComputeGraphInfo(compute_graph_desc_info);
  if (ret != SUCCESS) {
    GELOGE(ret, "GetComputeGraphInfo failed.");
    return ret;
  }
  ProfilingManager::Instance().ReportProfilingData(model_id_, GetTaskDescInfo(), compute_graph_desc_info);
  ProfilingManager::Instance().ReportProfilingData(model_id_, GetTaskDescInfo());
  GE_CHK_STATUS(SinkModelProfile(), "Sink model profiler failed.");

  return SUCCESS;
@@ -2202,173 +2222,101 @@ Status DavinciModel::InitModelProfile() {
 }

 Status DavinciModel::SinkModelProfile() {
  // profiling plugin must be registered
  auto &prof_mgr = ProfilingManager::Instance();
  ReporterData reporter_data{};
  // report model data tag name
  std::string tag_name("model_load_info_" + std::to_string(this->Id()));
  GE_CHK_BOOL_EXEC(memcpy_s(reporter_data.tag, MSPROF_ENGINE_MAX_TAG_LEN, tag_name.c_str(), tag_name.size()) == EOK,
                   return FAILED, "Sink model tag memcpy error.");

  // Model Header
  std::string name = om_name_.empty() ? name_ : om_name_;
  size_t name_len = name.size();
  reporter_data.deviceId = device_id_;
  reporter_data.data = (unsigned char *)&name_len;
  reporter_data.dataLen = sizeof(int32_t);
  GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED,
                   "Reporter data fail, model id:%u.", this->Id());

  reporter_data.data = (unsigned char *)name.c_str();
  reporter_data.dataLen = name.size();
  GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED,
                   "Reporter data fail, model id:%u.", this->Id());

  uint32_t model_id = this->Id();
  reporter_data.data = (unsigned char *)&model_id;
  reporter_data.dataLen = sizeof(uint32_t);
  GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED,
                   "Reporter data fail, model id:%u.", this->Id());

  // Load Start/End Time
  int64_t start_time = this->GetLoadBeginTime();
  reporter_data.data = (unsigned char *)&start_time;
  reporter_data.dataLen = sizeof(int64_t);
  GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED,
                   "Reporter data fail, model id:%u.", this->Id());

  int64_t end_time = this->GetLoadEndTime();
  reporter_data.data = (unsigned char *)&end_time;
  reporter_data.dataLen = sizeof(int64_t);
  GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED,
                   "Reporter data fail, model id:%u.", this->Id());

  Json model_load_info;
  model_load_info[kModelName] = name;
  model_load_info[kModeleId] = model_id;
  model_load_info[kLoadStartTime] = start_time;
  model_load_info[kLoadEndTime] = end_time;
  // fusion op info
  using CIT = std::multimap<uint32_t, uint32_t>::const_iterator;
  using Range = std::pair<CIT, CIT>;
  for (const ProfileInfo &profile : profile_list_) {
    // op name after fusion
    Json fusion_op_info;
    string fusion_op_name = profile.fusion_info.op_name;
    int32_t fusion_op_name_len = fusion_op_name.size() == 0 ? 1 : fusion_op_name.size();
    reporter_data.data = (unsigned char *)&fusion_op_name_len;
    reporter_data.dataLen = sizeof(int32_t);
    GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED,
                     "Reporter data fail, model id:%u.", this->Id());

    reporter_data.data = (unsigned char *)fusion_op_name.c_str();
    reporter_data.dataLen = fusion_op_name_len;
    GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED,
                     "Reporter data fail, model id:%u.", this->Id());

    // original op name before fusion
    uint32_t op_num = profile.fusion_info.original_op_names.size();
    reporter_data.data = (unsigned char *)&op_num;
    reporter_data.dataLen = sizeof(int32_t);
    GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED,
                     "Reporter data fail, model id:%u.", this->Id());

    vector<string> original_name;
    for (uint32_t k = 0; k < op_num; k++) {
      std::string op_name = profile.fusion_info.original_op_names[k];
      int32_t op_name_len = op_name.size() == 0 ? 1 : op_name.size();
      reporter_data.data = (unsigned char *)&op_name_len;
      reporter_data.dataLen = sizeof(int32_t);
      GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED,
                       "Reporter data fail, model id:%u.", this->Id());
      reporter_data.data = (unsigned char *)op_name.c_str();
      reporter_data.dataLen = op_name_len;
      GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED,
                       "Reporter data fail, model id:%u.", this->Id());
    }

    // stream id info
      original_name.emplace_back(profile.fusion_info.original_op_names[k]);
    }
    uint32_t stream_id = 0;
    auto iter = profiler_report_op_info_.find(fusion_op_name);
    if (iter != profiler_report_op_info_.end()) {
      stream_id = iter->second.second;
    }
    reporter_data.data = (unsigned char *)&stream_id;
    reporter_data.dataLen = sizeof(int32_t);
    GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED,
                     "Reporter data fail, model id:%u.", this->Id());

    // memory info
    reporter_data.data = (unsigned char *)&profile.memory_info;
    reporter_data.dataLen = sizeof(profile.memory_info);
    GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED,
                     "Reporter data fail, model id:%u.", this->Id());

    // task info
    reporter_data.data = (unsigned char *)&profile.task_count;
    reporter_data.dataLen = sizeof(uint32_t);
    GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED,
                     "Reporter data fail, model id:%u.", this->Id());

    fusion_op_info[kFusionOpName] = fusion_op_name;
    fusion_op_info[kOriginalOpNum] = op_num;
    fusion_op_info[kOriginalOpName] = original_name;
    fusion_op_info[kStreamId] = stream_id;
    fusion_op_info[kFusionOpMemoryInfo][kInputSize] = profile.memory_info.input_size;
    fusion_op_info[kFusionOpMemoryInfo][kOutputSize] = profile.memory_info.output_size;
    fusion_op_info[kFusionOpMemoryInfo][kWeightSize] = profile.memory_info.weight_size;
    fusion_op_info[kFusionOpMemoryInfo][kWorkSpaceSize] = profile.memory_info.workspace_size;
    fusion_op_info[kFusionOpMemoryInfo][kTotalSize] = profile.memory_info.total_size;
    fusion_op_info[kTaskCount] = profile.task_count;
    vector<uint32_t> task_id;
    Range task_range = op_id_map_.equal_range(profile.fusion_info.op_index);
    for (CIT idx = task_range.first; idx != task_range.second; ++idx) {
      uint32_t task_id = idx->second;
      reporter_data.data = (unsigned char *)&task_id;
      reporter_data.dataLen = sizeof(uint32_t);
      GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED,
                       "Reporter data fail, model id:%u.", this->Id());
      task_id.push_back(idx->second);
    }
    fusion_op_info[kTaskId] = task_id;
    model_load_info[kFusionOpInfo] += fusion_op_info;
  }

  std::string tag_name("model_load_info_" + std::to_string(this->Id()));
  std::string reported_data;
  try {
    reported_data = model_load_info.dump(kInteval, ' ', false, Json::error_handler_t::ignore);
  } catch (std::exception &e) {
    GELOGE(FAILED, "Failed to convert JSON to string, reason: %s.", e.what());
  } catch (...) {
    GELOGE(FAILED, "Failed to convert JSON to string.");
  }
  reported_data.append(",")
               .append("\n");
  prof_mgr.ReportData(device_id_, reported_data, tag_name);
  return SUCCESS;
 }

 Status DavinciModel::SinkTimeProfile(const InputData &current_data) {
  // profiling plugin must be registered
  auto &prof_mgr = ProfilingManager::Instance();
  ReporterData reporter_data{};

  string name = om_name_.empty() ? name_ : om_name_;
  Json model_time_info;
  model_time_info[kModelName] = name;
  model_time_info[kModeleId] = this->Id();
  model_time_info[kRequestId] = current_data.request_id;
  model_time_info[kThreadId] = GetDataInputTid();
  model_time_info[kInputBeginTime] = time_info_.processBeginTime;
  model_time_info[kInputEndTime] = time_info_.processEndTime;
  model_time_info[kInferBeginTime] = time_info_.inferenceBeginTime;
  model_time_info[kInferEndTime] = time_info_.inferenceEndTime;
  model_time_info[kOutputBeginTime] = time_info_.dumpBeginTime;
  model_time_info[kOutputEndTime] = time_info_.dumpEndTime;

  // report model data tag name
  std::string tag_name;
  tag_name.append("model_time_info_")
      .append(std::to_string(this->Id()))
      .append("_")
      .append(std::to_string(current_data.index));

  GE_CHK_BOOL_EXEC(memcpy_s(reporter_data.tag, MSPROF_ENGINE_MAX_TAG_LEN, tag_name.c_str(), tag_name.size()) == EOK,
                   return FAILED, "Sink model tag memcpy error.");
  // device id
  reporter_data.deviceId = device_id_;

  // Model Header
  string name;
  if (!om_name_.empty()) {
    name = om_name_;
  } else {
    name = name_;
  }
  size_t name_len = name.size();
  reporter_data.data = (unsigned char *)&name_len;
  reporter_data.dataLen = sizeof(int32_t);
  GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED,
                   "Reporter data fail, model id:%u.", this->Id());

  reporter_data.data = (unsigned char *)name.c_str();
  reporter_data.dataLen = name.size();
  GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED,
                   "Reporter data fail, model id:%u.", this->Id());

  // request id
  uint64_t request_id = current_data.request_id;
  reporter_data.data = (unsigned char *)&request_id;
  reporter_data.dataLen = sizeof(uint32_t);
  GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED,
                   "Reporter data fail, model id:%u, data index:%u.", this->Id(), current_data.index);

  // thread id
  int32_t thread_id = GetDataInputTid();
  reporter_data.data = (unsigned char *)&thread_id;
  reporter_data.dataLen = sizeof(int32_t);
  GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED,
                   "Reporter data fail, model id:%u, data index:%u.", this->Id(), current_data.index);

  // time info
  time_info_.modelId = this->Id();
  reporter_data.data = (unsigned char *)&time_info_;
  reporter_data.dataLen = sizeof(struct timeInfo);
  GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED,
                   "Reporter data fail, model id:%u, data index:%u.", this->Id(), current_data.index);
    .append(std::to_string(this->Id()))
    .append("_")
    .append(std::to_string(current_data.index));
  std::string reported_data;
  try {
    reported_data = model_time_info.dump(kInteval, ' ', false, Json::error_handler_t::ignore);
  } catch (std::exception &e) {
    GELOGE(FAILED, "Failed to convert JSON to string, reason: %s.", e.what());
  } catch (...) {
    GELOGE(FAILED, "Failed to convert JSON to string.");
  }
  reported_data.append(",")
               .append("\n");
  prof_mgr.ReportData(device_id_, reported_data, tag_name);

  return SUCCESS;
 }
@@ -2641,6 +2589,7 @@ void *DavinciModel::Run(DavinciModel *model) {
  bool seq_end_flag = false;
  uint32_t model_id = model->Id();
  uint32_t device_id = model->GetDeviceId();
  GetContext().SetWorkStreamId(model->GetWorkStreamId());

  GELOGI("Model Run thread start, model_id:%u.", model_id);
  rtError_t rt_ret = rtSetDevice(static_cast<int32_t>(device_id));
@@ -2807,6 +2756,7 @@ Status DavinciModel::ModelRunStart() {
  int64_t maxDumpOpNum = std::strtol(opt.c_str(), nullptr, kDecimal);
  maxDumpOpNum_ = maxDumpOpNum;

  work_stream_id_ = GetContext().WorkStreamId();
  CREATE_STD_THREAD(thread_id_, DavinciModel::Run, this);
  GELOGI("model tread create success, model id:%u.", model_id_);
  return SUCCESS;
@@ -3069,13 +3019,15 @@ void DavinciModel::SaveProfilingTaskDescInfo(const OpDescPtr &op, const TaskInfo
    task_desc_info.model_name = name_;
  }
  task_desc_info.op_name = op->GetName();
  task_desc_info.op_type = op->GetType();
  task_desc_info.block_dim = task_def.kernel().block_dim();
  task_desc_info.task_id = task->GetTaskID();
  task_desc_info.stream_id = task->GetStreamId();
  task_desc_info.shape_type = "static";
  task_desc_info.cur_iter_num = 0;
  // task type
  task_desc_info.task_type = kTaskTypeInvalid;
  auto &prof_mgr = ProfilingManager::Instance();
  prof_mgr.GetOpInputOutputInfo(op, task_desc_info);
  auto model_task_type = static_cast<rtModelTaskType_t>(task_def.type());
  if (model_task_type == RT_MODEL_TASK_KERNEL) {
    const domi::KernelDef &kernel_def = task_def.kernel();
@@ -3107,7 +3059,6 @@ void DavinciModel::SaveProfilingTaskDescInfo(const OpDescPtr &op, const TaskInfo
      task_desc_info_.emplace_back(task_desc_info);
    }
  }
  return;
 }

 Status DavinciModel::DistributeTask() {
@@ -3332,7 +3283,7 @@ Status DavinciModel::CopyModelData(const InputData &input_data, OutputData &outp
 ///
 Status DavinciModel::UpdateIoTaskArgs(const std::map<uint32_t, ZeroCopyOffset> &data_info, bool is_input,
                                      const vector<DataBuffer> &blobs, bool is_dynamic, const string &batch_label) {
  string input_or_output = "input";
  string input_or_output;
  is_input ? input_or_output = "input" : input_or_output = "output";
  if (blobs.size() != data_info.size()) {
    GELOGE(ACL_ERROR_GE_PARAM_INVALID, "Verify %s data num failed: model requires %zu, but user actually feeds %zu",
@@ -3342,7 +3293,8 @@ Status DavinciModel::UpdateIoTaskArgs(const std::map<uint32_t, ZeroCopyOffset> &

  for (const auto &data : data_info) {
    if (data.first >= blobs.size()) {  // check data index.
      GELOGE(ACL_ERROR_GE_PARAM_INVALID, "Verify %s data num failed: can not find No.%u data, because user only feeds %zu",
      GELOGE(ACL_ERROR_GE_PARAM_INVALID,
             "Verify %s data num failed: can not find No.%u data, because user only feeds %zu",
             input_or_output.c_str(), data.first, blobs.size());
      return ACL_ERROR_GE_PARAM_INVALID;
    }
@@ -4007,41 +3959,6 @@ void DavinciModel::SaveHcclFollowStream(int64_t main_stream_id, rtStream_t strea
  main_follow_stream_mapping_[main_stream_id].emplace_back(stream);
 }

 Status DavinciModel::GetComputeGraphInfo(vector<ComputeGraphDescInfo> &graph_desc_info) {
  auto &all_op_desc = data_dumper_.GetAllOpDescInfo();
  for (auto &op_desc : all_op_desc) {
    ComputeGraphDescInfo compute_graph_info;
    if (!om_name_.empty()) {
      compute_graph_info.model_name = om_name_;
    } else {
      compute_graph_info.model_name = name_;
    }

    std::vector<Format> format =  { FORMAT_NULL };
    std::vector<std::vector<int64_t>> shape = { {0} };
    std::vector<DataType> data_type = { DT_UNDEFINED };
    compute_graph_info.op_name = op_desc.op_name;
    compute_graph_info.op_type = op_desc.op_type;
    compute_graph_info.input_format = op_desc.input_format.empty() ? format : op_desc.input_format;
    compute_graph_info.input_shape = op_desc.input_shape.empty() ? shape : op_desc.input_shape;
    compute_graph_info.input_data_type = op_desc.input_data_type.empty() ? data_type : op_desc.input_data_type;
    compute_graph_info.output_format = op_desc.output_format.empty() ? format :  op_desc.output_format;
    compute_graph_info.output_shape = op_desc.output_shape.empty() ? shape : op_desc.output_shape;
    compute_graph_info.output_data_type = op_desc.output_data_type.empty() ? data_type : op_desc.output_data_type;
    uint32_t task_id = 0;
    uint32_t stream_id = 0;
    auto iter = profiler_report_op_info_.find(op_desc.op_name);
    if (iter != profiler_report_op_info_.end()) {
      task_id = iter->second.first;
      stream_id = iter->second.second;
    }
    compute_graph_info.task_id = task_id;
    compute_graph_info.stream_id = stream_id;
    graph_desc_info.emplace_back(compute_graph_info);
  }
  return SUCCESS;
 }

 void DavinciModel::SetTotalFixedAddrsSize(string tensor_name, int64_t fix_addr_size) {
  if (tensor_name_to_fixed_addr_size_.find(tensor_name) == tensor_name_to_fixed_addr_size_.end()) {
    tensor_name_to_fixed_addr_size_[tensor_name] = total_fixed_addr_size_;
@@ -4133,10 +4050,10 @@ Status DavinciModel::InitAippInputOutputDims(uint32_t index, const OpDescPtr &op
      int64_t data_input_size;
      (void)TensorUtils::GetSize(*(op_desc->GetInputDescPtr(kDataIndex)), data_input_size);
      GELOGD("related Data[%d]: tensor_name: %s, dim_num: %zu, tensor_size: %zu, format: %s, data_type: %s, shape: %s",
        index, op_desc->GetName().c_str(), data_input_desc->GetShape().GetDimNum(), data_input_size,
        TypeUtils::FormatToSerialString(data_input_desc->GetFormat()).c_str(),
        TypeUtils::DataTypeToSerialString(data_input_desc->GetDataType()).c_str(),
        formats::JoinToString(data_input_desc->GetShape().GetDims()).c_str());
          index, op_desc->GetName().c_str(), data_input_desc->GetShape().GetDimNum(), data_input_size,
          TypeUtils::FormatToSerialString(data_input_desc->GetFormat()).c_str(),
          TypeUtils::DataTypeToSerialString(data_input_desc->GetDataType()).c_str(),
          formats::JoinToString(data_input_desc->GetShape().GetDims()).c_str());
    }
  }

--- a/ge/graph/load/model_manager/davinci_model.h
+++ b/ge/graph/load/model_manager/davinci_model.h
@@ -412,6 +412,8 @@ class DavinciModel {
  ///
  uint64_t GetSessionId() const { return session_id_; }

  uint64_t GetWorkStreamId() const { return work_stream_id_; }

  ///
  /// @ingroup ge
  /// @brief SetDeviceId
@@ -840,9 +842,6 @@ class DavinciModel {

  Status TransAllVarData(ComputeGraphPtr &graph, uint32_t graph_id);

  // get desc info of graph for profiling
  Status GetComputeGraphInfo(vector<ComputeGraphDescInfo> &graph_desc_info);

  void SetDataDumperArgs(const ComputeGraphPtr &graph, const map<string, OpDescPtr> &variable_by_name);

  Status InitL1DataDumperArgs();
@@ -960,6 +959,7 @@ class DavinciModel {
  vector<uintptr_t> output_mbuf_list_;  // output mbuf created by dequeue task.

  uint64_t session_id_;
  uint64_t work_stream_id_;

  uint32_t device_id_;

--- a/ge/graph/load/model_manager/davinci_model_parser.cc
+++ b/ge/graph/load/model_manager/davinci_model_parser.cc
@@ -1,23 +0,0 @@
 /**
 * Copyright 2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 #include "graph/load/model_manager/davinci_model_parser.h"

 namespace ge {
 DavinciModelParser::DavinciModelParser() {}

 DavinciModelParser::~DavinciModelParser() {}
 }  // namespace ge
--- a/ge/graph/load/model_manager/davinci_model_parser.h
+++ b/ge/graph/load/model_manager/davinci_model_parser.h
@@ -1,46 +0,0 @@
 /**
 * Copyright 2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 #ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_DAVINCI_MODEL_PARSER_H_
 #define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_DAVINCI_MODEL_PARSER_H_

 #include <securec.h>
 #include <memory>

 #include "common/debug/log.h"
 #include "common/ge_types.h"
 #include "common/model_parser/base.h"
 #include "common/types.h"
 #include "common/util.h"

 namespace ge {
 class DavinciModelParser : public ModelParserBase {
 public:
  ///
  /// @ingroup hiai
  /// @brief constructor
  ///
  DavinciModelParser();

  ///
  /// @ingroup hiai
  /// @brief destructor
  ///
  ~DavinciModelParser();
 };
 }  // namespace ge

 #endif  // GE_GRAPH_LOAD_NEW_MODEL_MANAGER_DAVINCI_MODEL_PARSER_H_
--- a/ge/graph/load/model_manager/model_manager.cc
+++ b/ge/graph/load/model_manager/model_manager.cc
@@ -18,23 +18,15 @@

 #include <string>

 #include "mmpa/mmpa_api.h"
 #include "aicpu/aicpu_schedule/aicpu_op_type_list.h"
 #include "common/model_parser/model_parser.h"
 #include "common/dump/dump_manager.h"
 #include "common/l2_cache_optimize.h"
 #include "common/profiling/profiling_manager.h"
 #include "common/properties_manager.h"
 #include "framework/common/debug/ge_log.h"
 #include "framework/common/util.h"
 #include "graph/common/ge_call_wrapper.h"
 #include "graph/debug/ge_attr_define.h"
 #include "graph/load/model_manager/davinci_model.h"
 #include "graph/load/model_manager/davinci_model_parser.h"
 #include "model/ge_root_model.h"
 #include "graph/common/local_context.h"
 #include "graph/utils/attr_utils.h"
 #include "common/formats/utils/formats_trans_utils.h"
 #include "hybrid/hybrid_davinci_model.h"

 namespace ge {
 thread_local uint32_t device_count = 0;
@@ -1403,7 +1395,7 @@ Status ModelManager::LaunchCustAicpuSo() {
 Status ModelManager::GetModelMemAndWeightSize(const ModelData &model, size_t &mem_size, size_t &weight_size) {
  uint8_t *model_data = nullptr;
  uint32_t model_len = 0;
  Status ret = DavinciModelParser::ParseModelContent(model, model_data, model_len);
  Status ret = ModelParserBase::ParseModelContent(model, model_data, model_len);
  GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, return ACL_ERROR_GE_PARAM_INVALID, "parse model content failed!");

  OmFileLoadHelper om_file_helper;
--- a/ge/graph/manager/graph_caching_allocator.cc
+++ b/ge/graph/manager/graph_caching_allocator.cc
@@ -28,10 +28,9 @@ const size_t bin_ranges[kNumBins] = {kRoundBlockSize * kKByteSize,
                                     kBinSizeUnit8 * kMByteSize,
                                     kBinSizeUnit32 * kMByteSize,
                                     kBinSizeUnit128 * kMByteSize,
                                     kGByteSize,
                                     kBinSizeUnit4 * kGByteSize,
                                     kBinSizeUnit16 * kGByteSize,
                                     kBinSizeUnit26 * kGByteSize};
                                     kBinSizeUnit256 * kMByteSize,
                                     kBinSizeUnit512 * kMByteSize,
                                     kGByteSize};

 static bool BlockComparator(const Block *left, const Block *right) {
  if (left->size != right->size) {
@@ -63,7 +62,10 @@ size_t GetBinIndex(size_t size) {

 size_t GetAllocationSize(size_t size) {
  size_t index = GetBinIndex(size);
  return bin_ranges[index];
  if (bin_ranges[index] >= size) {
    return bin_ranges[index];
  }
  return kGByteSize * ((size + kGByteSize - 1) / kGByteSize);
 }

 ///
@@ -119,6 +121,7 @@ void CachingAllocator::Finalize(uint32_t device_id) {
 }

 uint8_t *CachingAllocator::Malloc(size_t size, uint8_t *org_ptr, uint32_t device_id) {
  GELOGI("Start malloc pool memory, size = %zu, device id = %u", size, device_id);
  uint8_t *ptr = nullptr;
  size = GetBlockSize(size);
  Block *block = FindFreeBlock(size, org_ptr, device_id);
@@ -253,6 +256,7 @@ Block *CachingAllocator::SplitBlock(Block *block, size_t size, BlockBin &bin, ui
 }

 Status CachingAllocator::TryExtendCache(size_t size, uint32_t device_id) {
  GELOGI("Try to extend cache. size = %zu, device id = %u", size, device_id);
  auto memory_size = GetAllocationSize(size);
  const std::string purpose = "Memory for caching.";
  auto memory_addr = memory_allocator_->MallocMemory(purpose, memory_size, device_id);
--- a/ge/graph/manager/graph_caching_allocator.h
+++ b/ge/graph/manager/graph_caching_allocator.h
@@ -36,17 +36,17 @@ namespace ge {
 constexpr size_t kRoundBlockSize = 512;         // all block sizes are rounded to at least 512 bytes
 constexpr size_t kBinSizeUnit4 = 4;
 constexpr size_t kBinSizeUnit8 = 8;
 constexpr size_t kBinSizeUnit16 = 16;
 constexpr size_t kBinSizeUnit26 = 26;
 constexpr size_t kBinSizeUnit32 = 32;
 constexpr size_t kBinSizeUnit128 = 128;
 constexpr size_t kBinSizeUnit256 = 256;
 constexpr size_t kBinSizeUnit512 = 512;

 constexpr double kSplitThreshold = 0.75;         // split when malloc size <= small block size * kSpliThreshold
 constexpr double kSplitThreshold = 0.5;         // split when malloc size <= small block size * kSpliThreshold
 constexpr size_t kKByteSize = 1024;
 constexpr size_t kMByteSize = 1048576;   // 1024 * 1024
 constexpr size_t kGByteSize = 1073741824;   // 1024 * 1024 * 1024

 static const uint32_t kNumBins = 8;
 static const uint32_t kNumBins = 7;

 class MemoryAllocator;

--- a/ge/graph/manager/graph_manager.cc
+++ b/ge/graph/manager/graph_manager.cc
@@ -293,7 +293,7 @@ Status GraphManager::InitDynamicParams(ComputeGraphPtr &compute_graph) {
      return FAILED;
    }
    if ((op_desc->GetType() == DATA) || (op_type == kGetNextName)) {
      GELOGI("Need to process multi batch for compute graph.");
      GELOGI("Need to process multi batch for compute graph. op_type:%s", op_desc->GetType().c_str());
      GetLocalOmgContext().need_multi_batch = true;
      break;
    }
@@ -348,7 +348,7 @@ Status GraphManager::AddGraph(const GraphId &graph_id, const Graph &graph,
    for (auto &subgraph : compute_graph->GetAllSubgraphs()) {
      (void)AttrUtils::SetStr(*subgraph, ATTR_NAME_SESSION_GRAPH_ID, session_graph_id);
    }
    GELOGD("Get graph session_graph_id attr failed, set session id to default value: [0]");
    GELOGD("Get graph session_graph_id attr failed, set session id to default value: [0].");
  }

  GraphNodePtr graph_node = MakeShared<ge::GraphNode>(graph_id);
@@ -541,7 +541,7 @@ Status GraphManager::OptimizeSubGraphWithMultiThreads(ComputeGraphPtr compute_gr
    }
    std::future<Status> f = executor.commit(GraphManager::ProcessSubGraphWithMultiThreads, this,
                                            compute_graph->GetGraphID(), subgraph,
                                            compute_graph->GetName(), session_id,
                                            compute_graph->GetName(), session_id, GetContext().WorkStreamId(),
                                            GetThreadLocalContext());
    if (!f.valid()) {
      GELOGE(FAILED, "Future is invalid");
@@ -557,7 +557,7 @@ Status GraphManager::OptimizeSubGraphWithMultiThreads(ComputeGraphPtr compute_gr
      }
      std::future<Status> f = executor.commit(GraphManager::ProcessSubGraphWithMultiThreads, this,
                                              compute_graph->GetGraphID(), subgraph,
                                              compute_graph->GetName(), session_id,
                                              compute_graph->GetName(), session_id, GetContext().WorkStreamId(),
                                              GetThreadLocalContext());
      if (!f.valid()) {
        GELOGE(FAILED, "Future is invalid");
@@ -734,8 +734,8 @@ Status GraphManager::PreRunAfterOptimizeSubGraph(const GraphNodePtr &graph_node,
 }

 Status GraphManager::SetRtContext(rtContext_t rt_context, rtCtxMode_t mode, uint64_t session_id, uint32_t graph_id) {
  GELOGD("set rt_context, session id: %lu, graph id: %u, mode %d, device id:%u.", session_id, graph_id,
         static_cast<int>(mode), ge::GetContext().DeviceId());
  GELOGD("set rt_context, session id: %lu, graph id: %u, mode %d, device id:%u.",
         session_id, graph_id, static_cast<int>(mode), ge::GetContext().DeviceId());

  rtError_t rt_ret = rtCtxCreate(&rt_context, mode, ge::GetContext().DeviceId());
  if (rt_ret != RT_ERROR_NONE) {
@@ -758,7 +758,7 @@ Status GraphManager::RunCustomPass(const GraphNodePtr &graph_node) {

  GE_TIMESTAMP_START(RunCustomPass);
  GraphPtr graph = std::const_pointer_cast<Graph>(const_graph);
  GE_CHK_STATUS_RET(CustomPassHelper::Instance().Run(graph), "Graph[%s] run custom pass fail.",
  GE_CHK_STATUS_RET(CustomPassHelper::Instance().Run(graph), "Graph[%s] run custom pass fail",
                    comp_graph->GetName().c_str());
  GE_TIMESTAMP_END(RunCustomPass, "GraphBuilder::RunCustomPass");
  return SUCCESS;
@@ -776,7 +776,7 @@ Status GraphManager::PreRun(const GraphNodePtr &graph_node, const std::vector<Ge
  GE_CHK_STATUS_RET(analyzer_instance->BuildJsonObject(session_id, compute_graph->GetGraphID()),
                    "BuildJsonObject Failed")

  GEEVENT("PreRun start, graph node size %zu, session id %lu, graph id %u, graph name %s",
  GEEVENT("PreRun start, graph node size %zu, session id %lu, graph id %u, graph name %s.",
          compute_graph->GetDirectNodesSize(), session_id, compute_graph->GetGraphID(),
          compute_graph->GetName().c_str());
  GE_DUMP(compute_graph, "PreRunBegin");
@@ -797,7 +797,7 @@ Status GraphManager::PreRun(const GraphNodePtr &graph_node, const std::vector<Ge
  if (run_optimize_original_graph) {
    Status ret = PreRunOptimizeOriginalGraph(graph_node, inputs, compute_graph, session_id);
    if (ret != SUCCESS) {
      GELOGE(ret, "Run PreRunOptimizeOriginalGraph failed for graph:%s.", compute_graph->GetName().c_str());
      GELOGE(ret, "Run PreRunOptimizeOriginalGraph failed for graph:%s", compute_graph->GetName().c_str());
      return ret;
    }
  }
@@ -869,7 +869,7 @@ Status GraphManager::StartForRunGraph(const GraphNodePtr &graph_node, const std:
      // release rts generate context
      RtContextUtil::GetInstance().DestroyRtContexts(session_id, graph_node->GetGraphId());
      if (ret != SUCCESS) {
        GELOGE(ret, "PreRun Failed.");
        GELOGE(ret, "PreRun Failed. graph_id:%u", graph_node->GetGraphId());
        return ret;
      }
    }
@@ -1209,7 +1209,7 @@ Status GraphManager::BuildGraphForUnregisteredOp(const GraphId &graph_id, const

 Status GraphManager::BuildGraph(const GraphId &graph_id, const std::vector<GeTensor> &inputs,
                                GeRootModelPtr &ge_root_model, uint64_t session_id, bool async) {
  GELOGD("[BuildGraph] start to build graph, graph_id=%u.", graph_id);
  GELOGD("[BuildGraph] start to build graph, graph_id:%u.", graph_id);
  if (inputs.empty()) {
    GELOGW("[BuildGraph] BuildGraph warning: empty GeTensor inputs");
  }
@@ -1241,7 +1241,7 @@ Status GraphManager::BuildGraph(const GraphId &graph_id, const std::vector<GeTen
  ret = StartForRunGraph(graph_node, inputs, ge_root_model, session_id);
  graph_node->SetRunFlag(false);
  if (ret != SUCCESS) {
    GELOGE(GE_GRAPH_PRERUN_FAILED, "[BuildGraph] StartForRunGraph failed!");
    GELOGE(GE_GRAPH_PRERUN_FAILED, "[BuildGraph] StartForRunGraph failed! graph_id:%u", graph_id);
    return GE_GRAPH_PRERUN_FAILED;
  }

@@ -2254,9 +2254,8 @@ Status GraphManager::OptimizeStage2(ge::ComputeGraphPtr &compute_graph) {
  GE_CHK_STATUS_RET(after_merge_passes.AddPass("OptimizeStage2::AfterMergePasses::LinkGenMaskNodesPass",
                                               new (std::nothrow)
                                                   LinkGenMaskNodesPass(options_.stream_max_parallel_num)));
  GE_CHK_STATUS_RET(
    after_merge_passes.AddPass("OptimizeStage2::HcclContinuousMemcpyPass",
                                 new (std::nothrow) HcclContinuousMemcpyPass));
  GE_CHK_STATUS_RET(after_merge_passes.AddPass("OptimizeStage2::HcclContinuousMemcpyPass",
                                               new (std::nothrow) HcclContinuousMemcpyPass));

  GE_TIMESTAMP_START(after_merge_passes);
  auto ret = after_merge_passes.Run(compute_graph);
@@ -2509,8 +2508,10 @@ Status GraphManager::ProcessSubGraphWithMultiThreads(GraphManager *graph_manager
                                                     const SubGraphInfoPtr &sub_graph_info_ptr,
                                                     const std::string &root_graph_name,
                                                     uint64_t session_id,
                                                     uint64_t work_stream_id,
                                                     const GEThreadLocalContext &ge_context) {
  if (sub_graph_info_ptr != nullptr && graph_manager != nullptr) {
    GetContext().SetWorkStreamId(work_stream_id);
    GetContext().SetSessionId(session_id);
    GetThreadLocalContext() = ge_context;
    graph_manager->UpdateLocalOmgContext(root_graph_id);
@@ -2557,7 +2558,8 @@ Status GraphManager::RunGraphAsync(const GraphId &graph_id, const std::vector<ge
                                   uint64_t session_id, RunAsyncCallback callback) {
  GELOGI("[GraphManager] Start to run graph async, graph_id=%u, inputsSize=%zu.", graph_id, inputs.size());

  bool ret = prerun_args_q_.Push(PreRunArgs({graph_id, inputs, session_id, GetThreadLocalContext(), callback}));
  bool ret = prerun_args_q_.Push(PreRunArgs({graph_id, inputs, session_id,
    GetContext().WorkStreamId(), GetThreadLocalContext(), callback}));
  if (!ret) {
    GELOGE(FAILED, "[GraphManager] Run graph async failed, graph_id=%u.", graph_id);
    return FAILED;
@@ -2644,6 +2646,7 @@ void GraphManager::PreRunThread(GraphManager *graph_manager) {

    GELOGI("A new loop start.");

    GetContext().SetWorkStreamId(args.work_stream_id);
    GetContext().SetSessionId(args.session_id);
    GetThreadLocalContext() = args.context;
    graph_manager->UpdateLocalOmgContext(args.graph_id);
@@ -2725,8 +2728,8 @@ void GraphManager::PreRunThread(GraphManager *graph_manager) {
      ge_root_model = graph_node->GetGeRootModel();
    }

    graph_manager->run_args_q_.Push(RunArgs( { graph_node, args.graph_id, args.session_id, args.input_tensor,
        ge_root_model, GetThreadLocalContext(), args.callback }));
    graph_manager->run_args_q_.Push(RunArgs( { graph_node, args.graph_id, args.session_id, args.work_stream_id,
        args.input_tensor, ge_root_model, GetThreadLocalContext(), args.callback }));
    GELOGI("Loop end.");
  }
 }
@@ -2825,6 +2828,7 @@ void GraphManager::RunThread(GraphManager *graph_manager) {

    GELOGI("A new loop start.");

    GetContext().SetWorkStreamId(args.work_stream_id);
    GetContext().SetSessionId(args.session_id);
    GetThreadLocalContext() = args.context;
    graph_manager->UpdateLocalOmgContext(args.graph_id);
--- a/ge/graph/manager/graph_manager.h
+++ b/ge/graph/manager/graph_manager.h
@@ -196,6 +196,7 @@ class GraphManager {
    GraphId graph_id;
    std::vector<ge::InputTensorInfo> input_tensor;
    uint64_t session_id;
    uint64_t work_stream_id;
    GEThreadLocalContext context;
    RunAsyncCallback callback;
  };
@@ -204,6 +205,7 @@ class GraphManager {
    GraphNodePtr graph_node;
    GraphId graph_id;
    uint64_t session_id;
    uint64_t work_stream_id;
    std::vector<ge::InputTensorInfo> input_tensor;
    GeRootModelPtr ge_root_model;
    GEThreadLocalContext context;
@@ -221,6 +223,7 @@ class GraphManager {
                                                const SubGraphInfoPtr &sub_graph_info_ptr,
                                                const std::string &root_graph_name,
                                                uint64_t session_id,
                                                uint64_t work_stream_id,
                                                const GEThreadLocalContext &ge_context);
  Status ParseInputsDims(const std::vector<InputTensorInfo> &input_tensor);
  void ParseInputsDimsForData(const std::vector<InputTensorInfo> &input_tensor);
--- a/ge/graph/manager/graph_mem_allocator.h
+++ b/ge/graph/manager/graph_mem_allocator.h
@@ -26,6 +26,7 @@

 #include "framework/common/debug/ge_log.h"
 #include "framework/common/ge_inner_error_codes.h"
 #include "graph/manager/host_mem_allocator.h"
 #include "graph/node.h"
 #include "runtime/mem.h"

@@ -139,7 +140,6 @@ class MemoryAllocator {
 using MemoryAllocatorPtr = std::shared_ptr<MemoryAllocator>;
 class CachingAllocator;
 class RdmaPoolAllocator;
 class HostMemAllocator;
 class MemManager {
 public:
  MemManager();
--- a/ge/graph/passes/assign_remove_pass.cc
+++ b/ge/graph/passes/assign_remove_pass.cc
@@ -24,9 +24,9 @@ namespace {
 constexpr uint32_t kValidInputNodeOutputNum = 1;
 constexpr int32_t kAssignRefInputIndex = 0;
 constexpr int32_t kAssignValueInputIndex = 1;
 static const std::set<std::string> kNoTaskNodeTypes = { ge::DATA, ge::ANN_DATA, ge::AIPPDATA,
                                                        ge::CONSTANT, ge::CONSTANTOP,
                                                        ge::VARIABLE, ge::VARIABLEV2 };
 const std::set<std::string> kNoTaskNodeTypes = { ge::DATA, ge::ANN_DATA, ge::AIPPDATA,
                                                 ge::CONSTANT, ge::CONSTANTOP,
                                                 ge::VARIABLE, ge::VARIABLEV2 };
 }

 Status AssignRemovePass::Run(NodePtr &node) {
--- a/ge/graph/passes/constant_folding_pass.cc
+++ b/ge/graph/passes/constant_folding_pass.cc
@@ -50,13 +50,11 @@ Status RunOpKernelWithCheck(NodePtr &node,
  return FoldingPass::RunOpKernel(node, inputs, outputs);
 }

 const std::map<std::string, std::pair<std::uint64_t, uint64_t>>
    &ConstantFoldingPass::GetGeConstantFoldingPerfStatistic() const {
 const map<string, pair<uint64_t, uint64_t>> &ConstantFoldingPass::GetGeConstantFoldingPerfStatistic() const {
  return statistic_of_ge_constant_folding_;
 }

 const std::map<std::string, std::pair<std::uint64_t, uint64_t>>
    &ConstantFoldingPass::GetOpConstantFoldingPerfStatistic() const {
 const map<string, pair<uint64_t, uint64_t>> &ConstantFoldingPass::GetOpConstantFoldingPerfStatistic() const {
  return statistic_of_op_constant_folding_;
 }

--- a/ge/graph/passes/flow_ctrl_pass.cc
+++ b/ge/graph/passes/flow_ctrl_pass.cc
@@ -37,7 +37,7 @@ Status FlowCtrlPass::Run(ComputeGraphPtr compute_graph) {
    return NOT_CHANGED;
  }

  GELOGI("FlowCtrl pass begin");
  GELOGI("FlowCtrl pass begin.graph is [%s]", compute_graph->GetName().c_str());
  bool graph_change = false;
  // 1. Add FP/BP flow ctrl (big cycle)
  for (auto &node : compute_graph->GetDirectNode()) {
@@ -80,6 +80,16 @@ Status FlowCtrlPass::Run(ComputeGraphPtr compute_graph) {
      graph_change = true;
    }
  }

  // add edge operation below depends on memcpy node in itertor loop set single stream,or may cause block
  for (auto &active_node : active_nodes_in_iter_loop_) {
    auto ret = GraphUtils::AddEdge(active_node->GetOutControlAnchor(),
                                   assign_add_node_in_fpbp_loop_->GetInControlAnchor());
    if (ret != GRAPH_SUCCESS) {
      GELOGW("add control edge between iter_loop_node:%s and fpbp_loop_node:%s fail, may cause block",
              active_node->GetName().c_str(), assign_add_node_in_fpbp_loop_->GetName().c_str());
    }
  }
  GELOGI("FlowCtrl pass end, graph is %s.", graph_change ? "changed" : "not changed");
  return graph_change ? SUCCESS : NOT_CHANGED;
 }
@@ -279,16 +289,16 @@ Status FlowCtrlPass::CreateIterCtrlTrueBranch(ComputeGraphPtr &compute_graph, co
   *         loopIncrement
   */
  // Insert AssignAdd node
  NodePtr assign_add_node =
  assign_add_node_in_fpbp_loop_ =
      InsertAssignOp(compute_graph, ASSIGNADD, NODE_NAME_FLOWCTRL_LOOP_ASSIGNADD, loop_cond_node, loop_inc_node);
  if (assign_add_node == nullptr || switch_node == nullptr) {
  if (assign_add_node_in_fpbp_loop_ == nullptr || switch_node == nullptr) {
    GELOGE(PARAM_INVALID, "assign add node or switch node is null");
    return FAILED;
  }

  string active_name = switch_node->GetName() + "_StreamActive";
  // add attr for stream assign model to break branch.
  GE_CHK_STATUS_RET(SetStreamLabel(assign_add_node, active_name), "set stream label failed");
  GE_CHK_STATUS_RET(SetStreamLabel(assign_add_node_in_fpbp_loop_, active_name), "set stream label failed");

  // used for stream assign to find true branch
  GE_CHK_STATUS_RET(SetActiveLabelList(switch_node, { active_name }), "set active label list failed");
@@ -304,13 +314,15 @@ Status FlowCtrlPass::CreateIterCtrlTrueBranch(ComputeGraphPtr &compute_graph, co
                    DOMI_LOGE("set ATTR_NAME_IS_LOOP_ACTIVE failed"); return FAILED);

  // add ctrl edges
  graphStatus add_ret = GraphUtils::AddEdge(switch_node->GetOutControlAnchor(), assign_add_node->GetInControlAnchor());
  graphStatus add_ret = GraphUtils::AddEdge(switch_node->GetOutControlAnchor(),
                                            assign_add_node_in_fpbp_loop_->GetInControlAnchor());
  if (add_ret != GRAPH_SUCCESS) {
    GELOGE(FAILED, "Add switch_node to assign_add_node ctrl edge failed, add_ret=%u.", add_ret);
    return FAILED;
  }

  add_ret = GraphUtils::AddEdge(assign_add_node->GetOutControlAnchor(), active_node->GetInControlAnchor());
  add_ret = GraphUtils::AddEdge(assign_add_node_in_fpbp_loop_->GetOutControlAnchor(),
                                active_node->GetInControlAnchor());
  if (add_ret != GRAPH_SUCCESS) {
    GELOGE(FAILED, "Add assign_add_node to active_node ctrl edge failed, add_ret=%u.", add_ret);
    return FAILED;
@@ -533,6 +545,7 @@ Status FlowCtrlPass::AddSpecialNodeIteratorCtrl(ComputeGraphPtr &compute_graph,
  GE_CHK_STATUS_RET(SetActiveLabelList(switch_node, { active_name }), "set active label list failed");
  // used for stream assign to find active stream
  GE_CHK_STATUS_RET(SetActiveLabelList(active_node, { loop_pre_node->GetName() }), "set active label list failed");
  active_nodes_in_iter_loop_.push_back(active_node);
  return SUCCESS;
 }
 }  // namespace ge
--- a/ge/graph/passes/flow_ctrl_pass.h
+++ b/ge/graph/passes/flow_ctrl_pass.h
@@ -142,6 +142,9 @@ class FlowCtrlPass : public GraphPass {
  ///         false: only one dataSet exist
  ///
  bool CheckMultiDataSet(ComputeGraphPtr &compute_graph);

  NodePtr assign_add_node_in_fpbp_loop_ = nullptr;
  std::vector<NodePtr> active_nodes_in_iter_loop_;
 };
 }  // namespace ge

--- a/ge/graph/passes/hccl_continuous_memcpy_pass.cc
+++ b/ge/graph/passes/hccl_continuous_memcpy_pass.cc
@@ -140,7 +140,8 @@ bool HcclContinuousMemcpyPass::IsDataNode(const std::string& node_type) {
 /// @param [in] ge::OutDataAnchorPtr in_node
 /// @return ge::NodePtr
 ///
 NodePtr HcclContinuousMemcpyPass::CreateIdentityNode(const ComputeGraphPtr &graph, const OutDataAnchorPtr &out_data_anchor) {
 NodePtr HcclContinuousMemcpyPass::CreateIdentityNode(const ComputeGraphPtr &graph,
                                                     const OutDataAnchorPtr &out_data_anchor) {
  GE_CHECK_NOTNULL_EXEC(graph, return nullptr);
  NodePtr pre_node = out_data_anchor->GetOwnerNode();
  OpDescPtr pre_op_desc = pre_node->GetOpDesc();
@@ -205,8 +206,9 @@ std::string HcclContinuousMemcpyPass::CheckDuplicateName(const std::string &node
 /// @param [in] InDataAnchorPtr hccl_in_anchor
 /// @return status
 ///
 Status HcclContinuousMemcpyPass::ModifyEdgeConnection(const ComputeGraphPtr &graph, const OutDataAnchorPtr &src_out_anchor,
                                            const InDataAnchorPtr &hccl_in_anchor) {
 Status HcclContinuousMemcpyPass::ModifyEdgeConnection(const ComputeGraphPtr &graph,
                                                      const OutDataAnchorPtr &src_out_anchor,
                                                      const InDataAnchorPtr &hccl_in_anchor) {
  GE_CHECK_NOTNULL(src_out_anchor->GetOwnerNode());
  GE_CHECK_NOTNULL(hccl_in_anchor->GetOwnerNode());

@@ -235,8 +237,9 @@ Status HcclContinuousMemcpyPass::ModifyEdgeConnection(const ComputeGraphPtr &gra
 /// @param [in] InDataAnchorPtr hccl_in_anchor
 /// @return status
 ///
 Status HcclContinuousMemcpyPass::InsertIdentityBeforeHccl(const ComputeGraphPtr &graph, const OutDataAnchorPtr &src_out_anchor,
                                                const InDataAnchorPtr &hccl_in_anchor) {
 Status HcclContinuousMemcpyPass::InsertIdentityBeforeHccl(const ComputeGraphPtr &graph,
                                                          const OutDataAnchorPtr &src_out_anchor,
                                                          const InDataAnchorPtr &hccl_in_anchor) {
  GELOGI("Between op %s and op %s need insert memcpy async op.", src_out_anchor->GetOwnerNode()->GetName().c_str(),
         hccl_in_anchor->GetOwnerNode()->GetName().c_str());
  NodePtr memcpy_node = CreateIdentityNode(graph, src_out_anchor);
@@ -274,8 +277,8 @@ Status HcclContinuousMemcpyPass::InsertIdentityBeforeHccl(const ComputeGraphPtr
 /// @return status
 ///
 Status HcclContinuousMemcpyPass::InsertAssignAfterBroadcastIfNeed(const ComputeGraphPtr &graph,
                                                        const OutDataAnchorPtr &var_out_anchor,
                                                        const InDataAnchorPtr &hccl_in_anchor) {
                                                                  const OutDataAnchorPtr &var_out_anchor,
                                                                  const InDataAnchorPtr &hccl_in_anchor) {
  if (hccl_in_anchor->GetOwnerNode()->GetType() != HCOMBROADCAST) {
    GELOGD("%s not broadcast, no need to insert assign node", hccl_in_anchor->GetOwnerNode()->GetName().c_str());
    return SUCCESS;
@@ -354,8 +357,9 @@ Status HcclContinuousMemcpyPass::InsertAssignAfterBroadcastIfNeed(const ComputeG
 /// @param [in] ge::OutDataAnchorPtr variable node out anchor
 /// @return ge::NodePtr
 ///
 NodePtr HcclContinuousMemcpyPass::CreateAssignNode(const ComputeGraphPtr &graph, const OutDataAnchorPtr &out_data_anchor) {
  GE_CHECK_NOTNULL_EXEC(graph , return nullptr);
 NodePtr HcclContinuousMemcpyPass::CreateAssignNode(const ComputeGraphPtr &graph,
                                                   const OutDataAnchorPtr &out_data_anchor) {
  GE_CHECK_NOTNULL_EXEC(graph, return nullptr);
  NodePtr pre_node = out_data_anchor->GetOwnerNode();
  OpDescPtr pre_op_desc = pre_node->GetOpDesc();
  if (pre_op_desc == nullptr) {
--- a/ge/graph/passes/inplace_support_check_pass.cc
+++ b/ge/graph/passes/inplace_support_check_pass.cc
@@ -23,9 +23,9 @@ namespace ge {
 namespace {
 constexpr uint32_t kInplaceSupportOutputIndex = 0;
 constexpr uint32_t kInplaceSupportOutputNum = 1;
 static const std::set<std::string> kSrcNodeTypes = { ge::DATA, ge::ANN_DATA, ge::AIPPDATA,
                                                     ge::CONSTANT, ge::CONSTANTOP,
                                                     ge::VARIABLE, ge::VARIABLEV2 };
 const std::set<std::string> kSrcNodeTypes = { ge::DATA, ge::ANN_DATA, ge::AIPPDATA,
                                              ge::CONSTANT, ge::CONSTANTOP,
                                              ge::VARIABLE, ge::VARIABLEV2 };
 }
 Status InplaceSupportCheckPass::Run(NodePtr &node) {
  GELOGD("InplaceSupportCheckPass running");
--- a/ge/graph/passes/net_output_pass.cc
+++ b/ge/graph/passes/net_output_pass.cc
@@ -458,7 +458,7 @@ Status NetOutputPass::Run(ge::ComputeGraphPtr graph) {
    GELOGE(GE_GRAPH_PARAM_NULLPTR, "Compute graph is null.");
    return GE_GRAPH_PARAM_NULLPTR;
  }
  GELOGI("NetOutputPass Run.");
  GELOGI("NetOutputPass Run.graph is [%s]", graph->GetName().c_str());
  NodePtr output_node = graph->FindFirstNodeMatchType(NETOUTPUT);
  // save user targets node
  SaveAndRemoveTargets(graph);
--- a/ge/graph/passes/no_use_reshape_remove_pass.cc
+++ b/ge/graph/passes/no_use_reshape_remove_pass.cc
@@ -82,14 +82,41 @@ Status NoUseReshapeRemovePass::Run(ge::NodePtr &node) {
    }
  }
  if (to_be_deleted) {
    GELOGI("NoUseReshapeRemovePass remove useless node:%s", node->GetName().c_str());
    auto ret = PassUtils::UnlinkNodeWithControlCopy(node, kReshapeShapeIndex);
    if (ret != SUCCESS) {
      GELOGE(ret, "DimensionAdjustPass unlink node with control copy fail.");
      return ret;
    }
    auto ret = TryRemoveConstShapeInput(node);
    GE_CHK_STATUS_RET_NOLOG(ret);
    GELOGI("NoUseReshapeRemovePass remove useless reshape node:%s", node->GetName().c_str());
    return IsolateAndDeleteNode(node, {kReshapeDataIndex});
  }
  return SUCCESS;
 }

 Status NoUseReshapeRemovePass::TryRemoveConstShapeInput(ge::NodePtr &reshape_node) {
  auto shape_input_anchor = reshape_node->GetInDataAnchor(kReshapeShapeIndex);
  if (shape_input_anchor == nullptr) {
    return SUCCESS;
  }
  GE_CHECK_NOTNULL(shape_input_anchor->GetPeerOutAnchor());
  auto shape_input = shape_input_anchor->GetPeerOutAnchor()->GetOwnerNode();
  GE_CHECK_NOTNULL(shape_input);
  if (shape_input->GetType() != CONSTANT && shape_input->GetType() != CONSTANTOP) {
    return SUCCESS;
  }
  //   op(x)   const(shape)
  //     \     /
  //     reshape
  // const input can unlink but should copy control_dependency
  auto ret = PassUtils::UnlinkNodeWithControlCopy(reshape_node, kReshapeShapeIndex);
  if (ret != SUCCESS) {
    GELOGE(ret, "Unlink node %s with control copy failed.", shape_input->GetName().c_str());
    return ret;
  }

  // remove const without any data_output
  if (shape_input->GetOutDataNodesSize() == 0) {
    auto ret = IsolateAndDeleteNode(shape_input, {});
    GE_CHK_GRAPH_STATUS_RET(ret, "Fail to remove node %s", shape_input->GetName().c_str());
    GELOGI("Remove useless shape input const %s.", shape_input->GetName().c_str());
  }
  return SUCCESS;
 }
 }  // namespace ge
--- a/ge/graph/passes/no_use_reshape_remove_pass.h
+++ b/ge/graph/passes/no_use_reshape_remove_pass.h
@@ -32,6 +32,9 @@ class NoUseReshapeRemovePass : public BaseNodePass {
  /// @author
  ///
  Status Run(ge::NodePtr &node) override;

 private:
  Status TryRemoveConstShapeInput(NodePtr &reshape_node);
 };
 }  // namespace ge

--- a/ge/graph/passes/prune_pass.cc
+++ b/ge/graph/passes/prune_pass.cc
@@ -27,12 +27,11 @@

 namespace ge {
 Status PrunePass::Run(ge::ComputeGraphPtr graph) {
  GELOGD("PrunePass Start");
  GELOGD("PrunePass Start, graph is [%s]", graph->GetName().c_str());
  if (graph == nullptr) {
    GELOGE(GE_GRAPH_ISNULL, "input compute graph is NULL.");
    return GE_GRAPH_ISNULL;
  }

  std::vector<NodePtr> out_nodes;
  std::unordered_set<NodePtr> nodes;
  for (NodePtr &node_ptr : graph->GetDirectNode()) {
@@ -42,7 +41,6 @@ Status PrunePass::Run(ge::ComputeGraphPtr graph) {
      out_nodes.push_back(node_ptr);
    }
  }

  if (out_nodes.empty()) {
    GELOGW("graph [%s] does not contain NETOUTPUT type node,no return value. Do nothing!", graph->GetName().c_str());
    return ge::SUCCESS;
--- a/ge/graph/passes/reshape_remove_pass.cc
+++ b/ge/graph/passes/reshape_remove_pass.cc
@@ -43,7 +43,7 @@ Status ReshapeRemovePass::Run(NodePtr &node) {
  GE_CHECK_NOTNULL(node);
  GE_CHECK_NOTNULL(node->GetOpDesc());
  int key = kToBeDeleteOp.find(node->GetType()) == kToBeDeleteOp.end() ? kOpNoDelete : kToBeDeleteOp[node->GetType()];
  switch(key) {
  switch (key) {
    case kReshapeType: {
      bool is_shape_unknown = false;
      if (NodeUtils::GetNodeUnknownShapeStatus(*node, is_shape_unknown) == GRAPH_SUCCESS) {
--- a/ge/graph/passes/subgraph_const_migration_pass.cc
+++ b/ge/graph/passes/subgraph_const_migration_pass.cc
@@ -385,7 +385,7 @@ Status SubgraphConstMigrationPass::DetachParallelNode(const ComputeGraphPtr &gra

  // Break Move and follow, Link Data and follow.
  const auto &out_anchor = const_node->GetOutDataAnchor(kZeroIndex);
  const auto in_anchors =out_anchor->GetPeerInDataAnchors();
  const auto in_anchors = out_anchor->GetPeerInDataAnchors();
  for (const auto in_anchor : in_anchors) {
    GE_CHK_GRAPH_STATUS_RET(GraphUtils::RemoveEdge(out_anchor, in_anchor), "Remove edge failed");
    GELOGI("Remove Edge: %s %s", const_node->GetName().c_str(), in_anchor->GetOwnerNode()->GetName().c_str());
--- a/ge/graph/preprocess/graph_preprocess.cc
+++ b/ge/graph/preprocess/graph_preprocess.cc
@@ -991,7 +991,6 @@ Status ParseDynamicInputShapeRange(const std::string &shape_range,
 Status GetDynamicInputShapeRange(const std::vector<GeTensor> &user_input, const std::map<string, string> &graph_option,
                                 vector<vector<std::pair<int64_t, int64_t>>> &range_vec) {
  // check both mode and shape_range option are all enabled

  auto mode_iter = graph_option.find(OPTION_EXEC_DYNAMIC_EXECUTE_MODE);
  bool enable_dynamic_execute_mode = (mode_iter != graph_option.end()) && (mode_iter->second == "dynamic_execute");
  if (!enable_dynamic_execute_mode) {
@@ -1272,9 +1271,10 @@ Status GraphPrepare::AdjustDataOpOutput(const NodePtr &node) {
  return SUCCESS;
 }

 Status GraphPrepare::UpdateInput(const std::vector<GeTensor> &user_input, const std::map<string,string> &graph_option) {
 Status GraphPrepare::UpdateInput(const std::vector<GeTensor> &user_input,
                                 const std::map<string, string> &graph_option) {
  // Get shape range of input in dynamic_execute mode
  vector<vector<std::pair<int64_t,int64_t>>> dynamic_shape_range_vec;
  vector<vector<std::pair<int64_t, int64_t>>> dynamic_shape_range_vec;
  auto ret = GetDynamicInputShapeRange(user_input, graph_option, dynamic_shape_range_vec);
  GE_CHK_STATUS_RET(ret, "Graph option is not right on Dynamic execute mode.");
  compute_graph_->SaveDataFormat(ge::TypeUtils::DomiFormatToFormat(GetLocalOmgContext().format));
@@ -2012,7 +2012,8 @@ Status GraphPrepare::ProcessNetOutput() {
  return SUCCESS;
 }

 Status GraphPrepare::CheckAndUpdateInput(const std::vector<GeTensor> &user_input,const std::map<string,string> &graph_option) {
 Status GraphPrepare::CheckAndUpdateInput(const std::vector<GeTensor> &user_input,
                                         const std::map<string, string> &graph_option) {
  compute_graph_->SetInputSize(user_input.size());
  if (user_input.empty()) {
    return SUCCESS;
--- a/ge/graph/preprocess/graph_preprocess.h
+++ b/ge/graph/preprocess/graph_preprocess.h
@@ -23,7 +23,7 @@
 #include <vector>
 #include "common/debug/log.h"
 #include "common/debug/memory_dumper.h"
 #include "common/model_parser/base.h"
 #include "common/model_parser/model_parser.h"
 #include "common/properties_manager.h"
 #include "common/string_util.h"
 #include "common/types.h"
@@ -63,8 +63,8 @@ class GraphPrepare {
  Status CheckRefOp();
  Status SetRtContext(rtContext_t rt_context, rtCtxMode_t mode);
  Status AdjustDataOpOutput(const NodePtr &node);
  Status UpdateInput(const std::vector<GeTensor> &user_input, const std::map<string,string> &graph_option);
  Status CheckAndUpdateInput(const std::vector<GeTensor> &user_input, const std::map<string,string> &graph_option);
  Status UpdateInput(const std::vector<GeTensor> &user_input, const std::map<string, string> &graph_option);
  Status CheckAndUpdateInput(const std::vector<GeTensor> &user_input, const std::map<string, string> &graph_option);
  Status CheckConstOp();
  Status VerifyConstOp(const NodePtr &node);
  Status CheckUserInput(const std::vector<GeTensor> &user_input);
--- a/ge/graph/preprocess/multi_batch_options.h
+++ b/ge/graph/preprocess/multi_batch_options.h
@@ -105,7 +105,7 @@ GE_FUNC_VISIBILITY bool CheckDynamicBatchShape(const vector<int64_t> &shape, con
 /// @return 0: true/false
 ///
 GE_FUNC_VISIBILITY bool CheckDynamicImageSizeShape(const vector<int64_t> &shape, const string &data_name,
                                const std::string &input_format);
                                                   const std::string &input_format);

 }  // namespace multibatch
 }  // namespace ge
--- a/ge/host_kernels/slice_kernel.cc
+++ b/ge/host_kernels/slice_kernel.cc
@@ -16,6 +16,8 @@

 #include "host_kernels/slice_kernel.h"

 #include <set>

 #include "common/ge_inner_error_codes.h"
 #include "common/op/ge_op_utils.h"
 #include "common/types.h"
@@ -31,6 +33,30 @@ const size_t kSliceInputSize = 3;
 const size_t kSliceInputIndexX = 0;
 const size_t kSliceInputIndexBegin = 1;
 const size_t kSliceInputIndexSize = 2;
 const std::set<ge::DataType> kSupportedDataTypeToLength = {
    DT_BOOL,
    DT_INT64,
    DT_UINT64,
    DT_FLOAT,
    DT_INT32,
    DT_UINT32,
    DT_INT8,
    DT_UINT8,
    DT_INT16,
    DT_UINT16,
    DT_FLOAT16,
    DT_DOUBLE,
    DT_DUAL,
    DT_DUAL_SUB_INT8,
    DT_DUAL_SUB_UINT8,
    DT_COMPLEX64,
    DT_COMPLEX128,
    DT_QINT8,
    DT_QINT16,
    DT_QINT32,
    DT_QUINT8,
    DT_QUINT16,
 };
 }  // namespace

 Status SliceKernel::Compute(const OpDescPtr attr, const std::vector<ConstGeTensorPtr> &input,
@@ -56,6 +82,16 @@ Status SliceKernel::Compute(const OpDescPtr attr, const std::vector<ConstGeTenso

  // data type in input_x
  auto data_type = x_->GetTensorDesc().GetDataType();
  // check supported
  if (kSupportedDataTypeToLength.count(data_type) == 0) {
    GELOGW("input_x data_type is [%s], does not supported!", TypeUtils::DataTypeToSerialString(data_type).c_str());
    return NOT_CHANGED;
  }
  uint32_t type_size = 0;
  bool is_success = TypeUtils::GetDataTypeLength(data_type, type_size);
  if (!is_success) {
    return NOT_CHANGED;
  }
  // check data type of begin and size
  if (begin->GetTensorDesc().GetDataType() != DT_INT32 || size->GetTensorDesc().GetDataType() != DT_INT32) {
    GELOGW("Data type of begin and size for slice are not DT_INT32.");
@@ -69,7 +105,7 @@ Status SliceKernel::Compute(const OpDescPtr attr, const std::vector<ConstGeTenso
  GE_CHECK_NOTNULL(begin_data);
  GE_CHECK_NOTNULL(size_data);

  size_t data_size = x_->GetData().size() / sizeof(int32_t);
  size_t data_size = x_->GetData().size() / type_size;
  size_t begin_size = begin->GetData().size() / sizeof(int32_t);
  size_t size_size = size->GetData().size() / sizeof(int32_t);
  const ge::GeShape &x_shape = x_->GetTensorDesc().GetShape();
--- a/ge/hybrid/executor/hybrid_execution_context.h
+++ b/ge/hybrid/executor/hybrid_execution_context.h
@@ -62,9 +62,9 @@ struct GraphExecutionContext {
  rtStream_t stream = nullptr;
  rtContext_t rt_context = nullptr;
  rtContext_t rt_gen_context = nullptr;
  std::unique_ptr<CallbackManager> callback_manager;
  std::unique_ptr<CallbackManager> callback_manager = nullptr;
  NpuMemoryAllocator *allocator = nullptr;
  mutable std::unique_ptr<HybridProfiler> profiler;
  mutable std::unique_ptr<HybridProfiler> profiler = nullptr;
  DumpProperties dump_properties;
  bool trace_enabled = false;
  bool dump_enabled = false;
--- a/ge/hybrid/executor/hybrid_model_async_executor.cc
+++ b/ge/hybrid/executor/hybrid_model_async_executor.cc
@@ -26,6 +26,7 @@ namespace hybrid {
 namespace {
 const int kDataOutputIndex = 0;
 const size_t kMinimumPiplineStages = 2;
 const int kDefaultLoopCount = 10;
 }
 HybridModelAsyncExecutor::HybridModelAsyncExecutor(HybridModel *model)
    : model_(model), run_flag_(false) {
@@ -150,7 +151,7 @@ Status HybridModelAsyncExecutor::RunInternal() {
      GELOGI("HybridModel will execute in pipeline mode");
      auto iter_per_run = std::getenv("ITER_NUM");
      if (iter_per_run) {
        args.num_loops = static_cast<int>(strtol(iter_per_run, nullptr, 10));
        args.num_loops = static_cast<int>(strtol(iter_per_run, nullptr, kDefaultLoopCount));
      }
      ret = pipe_executor_->Execute(args);
    } else {
@@ -250,7 +251,8 @@ Status HybridModelAsyncExecutor::PrepareInputs(const InputData &current_data, Hy
        if (k >= shape.GetDimNum()) {
          break;
        }
        if (shape.GetDim(k) < range[k].first || shape.GetDim(k) > range[k].second) {
        // range[k].second can be -1
        if (shape.GetDim(k) < range[k].first || (range[k].second >= 0 && shape.GetDim(k) > range[k].second)) {
          GELOGE(PARAM_INVALID, "Dim out of range, shape idx = %zu, dim idx = %zu, dim = %ld, range = [%ld, %ld]",
                 input_index, k, shape.GetDim(k), range[k].first, range[k].second);
          return PARAM_INVALID;
--- a/ge/hybrid/executor/hybrid_model_pipeline_executor.cc
+++ b/ge/hybrid/executor/hybrid_model_pipeline_executor.cc
@@ -8,6 +8,7 @@ namespace ge {
 namespace hybrid {
 namespace {
 constexpr int kNumExecutors = 2;
 const int kMinLoopCount = 2;
 const int kIntBase = 10;
 const char *const kEnvProfilingLevel = "HYBRID_PROFILING_LEVEL";
 }
@@ -208,7 +209,7 @@ Status HybridModelPipelineExecutor::InitStageExecutors() {

 Status HybridModelPipelineExecutor::Execute(HybridModelExecutor::ExecuteArgs &args) {
  int loop_count = args.num_loops;
  GE_CHECK_GE(loop_count, 2);
  GE_CHECK_GE(loop_count, kMinLoopCount);

  auto &inputs = args.inputs;
  auto &input_desc = args.input_desc;
--- a/ge/hybrid/executor/node_state.h
+++ b/ge/hybrid/executor/node_state.h
@@ -30,7 +30,7 @@ class NodeTask;
 struct GraphExecutionContext;
 class SubgraphContext;
 class TaskContext;
 class NodeState;
 struct NodeState;

 class ShapeFuture {
 public:
--- a/ge/hybrid/executor/subgraph_executor.cc
+++ b/ge/hybrid/executor/subgraph_executor.cc
@@ -275,10 +275,10 @@ Status SubgraphExecutor::PrepareNodes(int group) {
 Status SubgraphExecutor::InferShape(ShapeInferenceEngine *shape_inference_engine, NodeState &node_state) const {
  GetContext().SetSessionId(context_->context_id);
  HYBRID_CHK_STATUS_RET(shape_inference_engine->InferShape(node_state),
                    "[%s] Failed to InferShape.", node_state.GetName().c_str());
                        "[%s] Failed to InferShape.", node_state.GetName().c_str());
  GetContext().SetSessionId(context_->session_id);
  HYBRID_CHK_STATUS_RET(shape_inference_engine->PropagateOutputShapes(node_state),
                    "[%s] Failed to PropagateOutputShapes.", node_state.GetName().c_str());
                        "[%s] Failed to PropagateOutputShapes.", node_state.GetName().c_str());
  return SUCCESS;
 }

--- a/ge/hybrid/executor/worker/execution_engine.cc
+++ b/ge/hybrid/executor/worker/execution_engine.cc
@@ -70,8 +70,6 @@ class NodeDoneCallback {
  Status PrepareConstInputs(const NodeItem &node_item);
  Status DumpDynamicNode();
  Status ProfilingReport();
  Status GetGraphDescInfo(const NodePtr node, const HybridModel *model,
                          std::vector<ComputeGraphDescInfo> &compute_graph_info);
  Status GetTaskDescInfo(const NodePtr node, const HybridModel *model,
                         std::vector<TaskDescInfo> &task_desc_info);
  GraphExecutionContext *graph_context_;
@@ -159,51 +157,14 @@ Status NodeDoneCallback::GetTaskDescInfo(const NodePtr node, const HybridModel *
  }

  GELOGD("GetTaskDescInfo of node [%s] start.", node->GetName().c_str());
  auto &prof_mgr = ProfilingManager::Instance();
  task_desc_info = context_->GetProfilingTaskDescInfo();
  context_->ClearProfilingTaskDescInfo();

  return SUCCESS;
 }

 Status NodeDoneCallback::GetGraphDescInfo(const NodePtr node, const HybridModel *model,
                                          std::vector<ComputeGraphDescInfo> &compute_graph_info) {
  GE_CHECK_NOTNULL(node);
  GE_CHECK_NOTNULL(model);

  GELOGD("GetComputeGraphInfo of node [%s] start.", node->GetName().c_str());
  compute_graph_info = context_->GetProfilingGraphDescInfo();
  context_->ClearProfilingGraphDescInfo();

  auto op_desc = node->GetOpDesc();
  GE_CHECK_NOTNULL(op_desc);
  for (auto &tmp_compute_graph_info : compute_graph_info) {
    // default
    if (op_desc->GetAllInputsSize() == 0) {
      tmp_compute_graph_info.input_format = { FORMAT_NULL };
      tmp_compute_graph_info.input_shape = { {0} };
      tmp_compute_graph_info.input_data_type = { DT_UNDEFINED };
    }
    for (size_t i = 0; i < op_desc->GetAllInputsSize(); ++i) {
      GeTensorDescPtr input_desc = op_desc->MutableInputDesc(i);
      if (input_desc == nullptr) {
        continue;
      }
      tmp_compute_graph_info.input_format.emplace_back(input_desc->GetFormat());
      tmp_compute_graph_info.input_shape.emplace_back(input_desc->GetShape().GetDims());
      tmp_compute_graph_info.input_data_type.emplace_back(input_desc->GetDataType());
    }

    if (op_desc->GetOutputsSize() == 0) {
      tmp_compute_graph_info.output_format = { FORMAT_NULL };
      tmp_compute_graph_info.output_shape = { {0} };
      tmp_compute_graph_info.output_data_type = { DT_UNDEFINED };
    }
    for (size_t j = 0; j < op_desc->GetOutputsSize(); ++j) {
      GeTensorDesc output_desc = op_desc->GetOutputDesc(j);
      tmp_compute_graph_info.output_format.emplace_back(output_desc.GetFormat());
      tmp_compute_graph_info.output_shape.emplace_back(output_desc.GetShape().GetDims());
      tmp_compute_graph_info.output_data_type.emplace_back(output_desc.GetDataType());
    }
  for (auto &tmp_task_desc : task_desc_info) {
    // save op input and output info
    auto op_desc = node->GetOpDesc();
    GE_CHECK_NOTNULL(op_desc);
    prof_mgr.GetOpInputOutputInfo(op_desc, tmp_task_desc);
  }

  return SUCCESS;
@@ -233,15 +194,8 @@ Status NodeDoneCallback::ProfilingReport() {
    return profiling_ret;
  }

  std::vector<ComputeGraphDescInfo> compute_graph_info;
  profiling_ret = GetGraphDescInfo(node, model, compute_graph_info);
  if (profiling_ret != RT_ERROR_NONE) {
    GELOGE(profiling_ret, "Get graph info of node[%s] failed.", node->GetName().c_str());
    return profiling_ret;
  }

  auto &profiling_manager = ProfilingManager::Instance();
  profiling_manager.ReportProfilingData(model->GetModelId(), task_desc_info, compute_graph_info);
  profiling_manager.ReportProfilingData(model->GetModelId(), task_desc_info);
  return SUCCESS;
 }

@@ -323,6 +277,8 @@ Status NodeDoneCallback::OnNodeDone() {
                      node_item.NodeName().c_str());
  }

  // release workspace
  context_->ReleaseWorkspace();
  // release inputs
  for (int i = 0; i < context_->NumInputs(); ++i) {
    context_->ReleaseInput(i);
--- a/ge/hybrid/model/hybrid_model_builder.cc
+++ b/ge/hybrid/model/hybrid_model_builder.cc
@@ -1199,6 +1199,8 @@ Status HybridModelBuilder::IndexTaskDefs() {
        op_index = task_def.kernel_ex().op_index();
      } else if (task_type == RT_MODEL_TASK_HCCL) {
        op_index = task_def.kernel_hccl().op_index();
      } else if (task_type == RT_MODEL_TASK_ALL_KERNEL) {
        op_index = task_def.kernel_with_handle().context().op_index();
      } else {
        GELOGD("Skip task type: %d", static_cast<int>(task_type));
        continue;
@@ -1211,7 +1213,7 @@ Status HybridModelBuilder::IndexTaskDefs() {
      }

      auto &node = iter->second;
      if (task_type == RT_MODEL_TASK_KERNEL) {
      if (task_type == RT_MODEL_TASK_KERNEL || task_type == RT_MODEL_TASK_ALL_KERNEL) {
        ge_model->GetTBEKernelStore().LoadTBEKernelBinToOpDesc(node->GetOpDesc());
      }

--- a/ge/hybrid/node_executor/aicore/aicore_node_executor.cc
+++ b/ge/hybrid/node_executor/aicore/aicore_node_executor.cc
@@ -189,12 +189,11 @@ Status AiCoreNodeTask::ExecuteAsync(TaskContext &context, std::function<void()>
    uint32_t stream_id = 0;
    rtError_t rt_ret = rtGetTaskIdAndStreamID(&task_id, &stream_id); // must be called after Launch kernel
    if (rt_ret != RT_ERROR_NONE) {
      GELOGE(rt_ret, "Get task_id and stream_id failed.");
      return FAILED;
      GELOGE(RT_FAILED, "Get task_id and stream_id failed, ret: 0x%X.", rt_ret);
      return RT_ERROR_TO_GE_STATUS(rt_ret);
    }
    GELOGD("Aicore node[%s] task_id: %u, stream_id: %u.", context.GetNodeName(), task_id, stream_id);
    (void)context.SaveProfilingTaskDescInfo(task_id, stream_id, kTaskTypeAicore, (*it)->GetBlockDim());
    (void)context.SaveProfilingGraphDescInfo(task_id, stream_id);
    RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[AiCoreNodeLaunchKernel] End");
    RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[AiCoreNodeLaunchKernel] End");
  }
--- a/ge/hybrid/node_executor/aicore/aicore_op_task.cc
+++ b/ge/hybrid/node_executor/aicore/aicore_op_task.cc
@@ -33,6 +33,20 @@ constexpr char const *kAttrOpParamSize = "op_para_size";
 constexpr char const *kAttrAtomicOpParamSize = "atomic_op_para_size";
 }  // namespace

 TbeHandleHolder::TbeHandleHolder(void *bin_handle)
    : bin_handle_(bin_handle) {}

 TbeHandleHolder::~TbeHandleHolder() {
  if (bin_handle_ != nullptr) {
    GE_CHK_RT(rtDevBinaryUnRegister(bin_handle_));
  }
 }

 bool TbeHandleRegistry::AddHandle(std::unique_ptr<TbeHandleHolder> &&holder) {
  auto ret = registered_handles_.emplace(std::move(holder));
  return ret.second;
 }

 Status AiCoreOpTask::Init(const OpDesc &op_desc, const domi::TaskDef &task_def) {
  GE_CHK_STATUS_RET_NOLOG(InitWithTaskDef(op_desc, task_def));
  GE_CHK_STATUS_RET_NOLOG(InitTilingInfo(op_desc));
@@ -69,7 +83,7 @@ Status AiCoreOpTask::RegisterTbeHandle(const OpDesc &op_desc) {
  if (rt_ret != RT_ERROR_NONE || is_single_op_) {
    void *bin_handle = nullptr;
    if (!kernel_store.FindTBEHandle(stub_name_.c_str(), bin_handle)) {
      GELOGI("TBE: can't find the kernel_name[%s] in HandleMap", stub_name_.c_str());
      GELOGI("TBE: can't find the binfile_key[%s] in HandleMap", stub_name_.c_str());
      rtDevBinary_t binary;
      std::string json_string;
      GE_IF_BOOL_EXEC(AttrUtils::GetStr(op_desc_ptr, TVM_ATTR_NAME_MAGIC, json_string),
@@ -96,7 +110,7 @@ Status AiCoreOpTask::RegisterTbeHandle(const OpDesc &op_desc) {
      GE_IF_BOOL_EXEC(!meta_data.empty(), GE_CHK_RT_RET(rtMetadataRegister(bin_handle, meta_data.c_str())));
      kernel_store.StoreTBEHandle(stub_name_.c_str(), bin_handle, tbe_kernel);
    } else {
      GELOGI("TBE: find the kernel_name[%s] in HandleMap", stub_name_.c_str());
      GELOGI("TBE: find the binfile_key[%s] in HandleMap", stub_name_.c_str());
      kernel_store.ReferTBEHandle(stub_name_.c_str());
    }
    std::string kernel_name;
@@ -108,25 +122,63 @@ Status AiCoreOpTask::RegisterTbeHandle(const OpDesc &op_desc) {
  return SUCCESS;
 }

 Status AiCoreOpTask::InitWithTaskDef(const OpDesc &op_desc, const domi::TaskDef &task_def) {
  GE_CHK_STATUS_RET(ValidateTaskDef(task_def),
                    "[%s] Failed to validate task def: [%s]",
                    op_desc.GetName().c_str(),
                    task_def.DebugString().c_str());
 Status AiCoreOpTask::RegisterKernelHandle(const OpDesc &op_desc) {
  TbeHandleRegistry &registry = TbeHandleRegistry::GetInstance();
  auto tbe_kernel = op_desc.TryGetExtAttr(OP_EXTATTR_NAME_TBE_KERNEL, TBEKernelPtr());
  if (tbe_kernel == nullptr) {
    GELOGE(INTERNAL_ERROR, "TBE: %s can't find tvm bin file!", op_desc.GetName().c_str());
    return INTERNAL_ERROR;
  }

  void *bin_handle = nullptr;
  GELOGD("Start to register kernel for node: [%s].", op_desc.GetName().c_str());
  rtDevBinary_t binary;
  std::string json_string;
  GE_IF_BOOL_EXEC(AttrUtils::GetStr(&op_desc, TVM_ATTR_NAME_MAGIC, json_string),
                  GELOGI("Get original type of session_graph_id."));
  if (json_string == "RT_DEV_BINARY_MAGIC_ELF_AICPU") {
    binary.magic = RT_DEV_BINARY_MAGIC_ELF_AICPU;
  } else if (json_string == "RT_DEV_BINARY_MAGIC_ELF") {
    binary.magic = RT_DEV_BINARY_MAGIC_ELF;
  } else if (json_string == "RT_DEV_BINARY_MAGIC_ELF_AIVEC") {
    binary.magic = RT_DEV_BINARY_MAGIC_ELF_AIVEC;
  } else {
    GELOGE(PARAM_INVALID, "TBE: Invalid parameter magic number! json: %s", json_string.c_str());
    return PARAM_INVALID;
  }
  binary.version = 0;
  binary.data = tbe_kernel->GetBinData();
  binary.length = tbe_kernel->GetBinDataSize();
  GELOGI("TBE: binary.length: %lu", binary.length);
  GE_CHK_RT_RET(rtRegisterAllKernel(&binary, &bin_handle));
  handle_ = bin_handle;
  auto holder = std::unique_ptr<TbeHandleHolder>(new (std::nothrow) TbeHandleHolder(handle_));
  if (holder == nullptr) {
    GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "create HandleHodler failed.");
    return ACL_ERROR_GE_MEMORY_ALLOCATION;
  }
  if (!registry.AddHandle(std::move(holder))) {
    GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "Add handle failed. node name = %s", op_desc.GetName().c_str());
    return ACL_ERROR_GE_INTERNAL_ERROR;
  }
  return SUCCESS;
 }

 Status AiCoreOpTask::InitWithKernelDef(const OpDesc &op_desc, const domi::TaskDef &task_def) {
  const domi::KernelDef &kernel_def = task_def.kernel();
  const domi::KernelContext &context = kernel_def.context();
  stub_name_ = kernel_def.stub_func();

  GE_CHK_STATUS_RET(RegisterTbeHandle(op_desc));

  GE_CHK_RT_RET(rtGetFunctionByName(stub_name_.c_str(), &stub_func_));
  args_size_ = kernel_def.args_size();
  block_dim_ = kernel_def.block_dim();

  // malloc args memory
  args_.reset(new(std::nothrow) uint8_t[args_size_]);
  GE_CHECK_NOTNULL(args_);
  if (kernel_def.args().size() < args_size_) {
    GELOGE(INTERNAL_ERROR, "args size of kernel_def is smaller than args_size_");
    return INTERNAL_ERROR;
  }
  errno_t err = memcpy_s(args_.get(), args_size_, kernel_def.args().data(), args_size_);
  if (err != EOK) {
    GELOGE(INTERNAL_ERROR, "AiCoreTask memcpy args failed.");
@@ -157,19 +209,75 @@ Status AiCoreOpTask::InitWithTaskDef(const OpDesc &op_desc, const domi::TaskDef
         block_dim_,
         arg_base_,
         args_size_);
  return SUCCESS;
 }

 Status AiCoreOpTask::InitWithKernelDefWithHandle(const OpDesc &op_desc, const domi::TaskDef &task_def) {
  const domi::KernelDefWithHandle &kernel_with_handle = task_def.kernel_with_handle();
  const domi::KernelContext &context = kernel_with_handle.context();

  GE_CHK_STATUS_RET(RegisterKernelHandle(op_desc));
  original_kernel_key_ = kernel_with_handle.original_kernel_key() + "_";
  node_info_ = kernel_with_handle.node_info() + "/";
  args_size_ = kernel_with_handle.args_size();
  block_dim_ = kernel_with_handle.block_dim();
  // malloc args memory
  args_.reset(new(std::nothrow) uint8_t[args_size_]);
  GE_CHECK_NOTNULL(args_);
  if (kernel_with_handle.args().size() < args_size_) {
    GELOGE(INTERNAL_ERROR, "args size of kernel_def is smaller than args_size_");
    return INTERNAL_ERROR;
  }
  errno_t err = memcpy_s(args_.get(), args_size_, kernel_with_handle.args().data(), args_size_);

  if (err != EOK) {
    GELOGE(INTERNAL_ERROR, "AiCoreTask memcpy args failed.");
    return INTERNAL_ERROR;
  }

  if (context.args_offset().size() < sizeof(uint16_t)) {
    GELOGE(INTERNAL_ERROR, "Invalid args_offset, size = %zu.", context.args_offset().size());
    return INTERNAL_ERROR;
  }

  const auto *args_offset_buffer = reinterpret_cast<const uint16_t *>(context.args_offset().data());
  uint32_t offset = *args_offset_buffer;
  if (offset > args_size_) {
    GELOGE(INTERNAL_ERROR,
           "[%s] Arg offset out of range. offset = %u, arg size = %u",
           GetName().c_str(),
           offset,
           args_size_);
    return INTERNAL_ERROR;
  }

  arg_base_ = reinterpret_cast<uintptr_t *>(args_.get() + offset);
  max_arg_count_ = (args_size_ - offset) / sizeof(void *);
  return SUCCESS;
 }

 Status AiCoreOpTask::InitWithTaskDef(const OpDesc &op_desc, const domi::TaskDef &task_def) {
  GE_CHK_STATUS_RET(ValidateTaskDef(task_def),
                    "[%s] Failed to validate task def: [%s]",
                    op_desc.GetName().c_str(),
                    task_def.DebugString().c_str());

  if (task_def.type() != RT_MODEL_TASK_ALL_KERNEL) {
    GE_CHK_STATUS_RET(InitWithKernelDef(op_desc, task_def));
  } else {
    GE_CHK_STATUS_RET(InitWithKernelDefWithHandle(op_desc, task_def));
  }
  return SUCCESS;
 }

 Status AiCoreOpTask::ValidateTaskDef(const domi::TaskDef &task_def) {
  auto task_type = static_cast<rtModelTaskType_t>(task_def.type());
  if (task_type != RT_MODEL_TASK_KERNEL) {
  if (task_type != RT_MODEL_TASK_KERNEL && task_type != RT_MODEL_TASK_ALL_KERNEL) {
    GELOGE(INTERNAL_ERROR, "Invalid task type (%d) in AiCore CreateTask.", static_cast<int>(task_type));
    return INTERNAL_ERROR;
  }

  const domi::KernelDef &kernel_def = task_def.kernel();
  const domi::KernelContext &context = kernel_def.context();
  const auto &context = task_type == RT_MODEL_TASK_KERNEL ? task_def.kernel().context() :
                                                            task_def.kernel_with_handle().context();
  auto kernel_type = static_cast<ccKernelType>(context.kernel_type());
  if (kernel_type != ccKernelType::TE) {
    GELOGE(INTERNAL_ERROR, "Invalid kernel type(%d) in AiCore TaskDef.", static_cast<int>(kernel_type));
@@ -180,10 +288,9 @@ Status AiCoreOpTask::ValidateTaskDef(const domi::TaskDef &task_def) {
 }

 Status AiCoreOpTask::PrepareWithShape(TaskContext &context) {
  if (tiling_buffer_ != nullptr) {
  if (is_dynamic_) {
    return UpdateTilingInfo(context);
  }

  return SUCCESS;
 }

@@ -212,8 +319,14 @@ Status AiCoreOpTask::UpdateTilingInfo(TaskContext &context) {
  clear_atomic_ = tiling_info.clear_atomic;

  tiling_data_ = tiling_info.tiling_data.str();
  tiling_key_ = tiling_info.tiling_key;
  GELOGD("Successfully getting [tiling_key] : %u", tiling_key_);
  if (tiling_data_.empty()) {
    GELOGE(INTERNAL_ERROR, "[%s] Tiling data is empty.", stub_name_.c_str());
    GELOGD("[%s] Tiling data is empty.", op_desc->GetName().c_str());
    return SUCCESS;
  }
  if (tiling_buffer_ == nullptr) {
    GELOGE(INTERNAL_ERROR, "tiling_buffer is nullptr while tiling_data is not empty!");
    return INTERNAL_ERROR;
  }

@@ -238,6 +351,9 @@ Status AiCoreOpTask::CalcTilingInfo(const NodePtr &node, OpRunInfo &tiling_info)
  GE_CHK_STATUS_RET(OpParaCalculate(*node, tiling_info),
                    "Failed calc tiling data of node %s.",
                    node->GetName().c_str());
  if (is_single_op_) {
    tiling_info.clear_atomic = false;
  }
  GELOGD("[%s] Done invoking OpParaCalculate successfully.", node->GetName().c_str());
  return SUCCESS;
 }
@@ -296,16 +412,26 @@ Status AiCoreOpTask::UpdateArgs(TaskContext &task_context) {
 }

 Status AiCoreOpTask::LaunchKernel(rtStream_t stream) {
  GELOGD("AiCoreOpTask LaunchKernel Start (task = %s, block_dim = %u).", stub_name_.c_str(), block_dim_);
  GE_CHK_RT_RET(rtKernelLaunch(stub_func_, block_dim_, args_.get(), args_size_, nullptr, stream));
  GELOGD("AiCoreOpTask LaunchKernel End (task = %s, block_dim = %u).", stub_name_.c_str(), block_dim_);
  if (handle_ != nullptr) {
    std::string dev_func = original_kernel_key_ + std::to_string(tiling_key_);
    std::string kernel_info = node_info_ + std::to_string(tiling_key_);
    GELOGD("AiCoreOpTask rtKernelLaunchWithHandle Start (dev_func = %s, block_dim = %u).", dev_func.c_str(),
           block_dim_);
    GE_CHK_RT_RET(rtKernelLaunchWithHandle(handle_, dev_func.c_str(), block_dim_, args_.get(), args_size_, nullptr,
                                           stream, kernel_info.c_str()));
    GELOGD("AiCoreOpTask rtKernelLaunchWithHandle End (dev_func = %s, block_dim = %u).", dev_func.c_str(),
           block_dim_);
  } else {
    GELOGD("AiCoreOpTask LaunchKernel Start (task = %s, block_dim = %u).", stub_name_.c_str(), block_dim_);
    GE_CHK_RT_RET(rtKernelLaunch(stub_func_, block_dim_, args_.get(), args_size_, nullptr, stream));
    GELOGD("AiCoreOpTask LaunchKernel End (task = %s, block_dim = %u).", stub_name_.c_str(), block_dim_);
  }
  return SUCCESS;
 }

 Status AiCoreOpTask::InitTilingInfo(const OpDesc &op_desc) {
  bool dynamic_supported = false;
  (void) AttrUtils::GetBool(op_desc, kAttrSupportDynamicShape, dynamic_supported);
  if (!dynamic_supported) {
  (void) AttrUtils::GetBool(op_desc, kAttrSupportDynamicShape, is_dynamic_);
  if (!is_dynamic_) {
    GELOGD("[%s] Dynamic shape is not supported.", op_desc.GetName().c_str());
    return SUCCESS;
  }
@@ -314,22 +440,26 @@ Status AiCoreOpTask::InitTilingInfo(const OpDesc &op_desc) {
  int64_t max_size = -1;
  (void) AttrUtils::GetInt(op_desc, GetKeyForOpParamSize(), max_size);
  GELOGD("Got op param size by key: %s, ret = %ld", GetKeyForOpParamSize().c_str(), max_size);
  if (max_size <= 0) {
  if (max_size < 0) {
    GELOGE(PARAM_INVALID, "[%s] Invalid op_param_size: %ld.", op_desc.GetName().c_str(), max_size);
    return PARAM_INVALID;
  }

  auto allocator = NpuMemoryAllocator::GetAllocator();
  GE_CHECK_NOTNULL(allocator);
  tiling_buffer_ = TensorBuffer::Create(allocator, static_cast<size_t>(max_size));
  GE_CHECK_NOTNULL(tiling_buffer_);
  if (max_size > 0) {
    tiling_buffer_ = TensorBuffer::Create(allocator, static_cast<size_t>(max_size));
    GE_CHECK_NOTNULL(tiling_buffer_);
    GELOGD("[%s] Done allocating tiling buffer, size=%ld.", op_desc.GetName().c_str(), max_size);
  } else {
    GELOGD("op_param_size is 0, no need to create tiling buffer.");
  }

  GELOGD("[%s] Done allocating tiling buffer, size=%ld.", op_desc.GetName().c_str(), max_size);
  return SUCCESS;
 }

 bool AiCoreOpTask::IsDynamicShapeSupported() {
  return tiling_buffer_ != nullptr;
  return is_dynamic_;
 }

 const std::string &AiCoreOpTask::GetName() const {
--- a/ge/hybrid/node_executor/aicore/aicore_op_task.h
+++ b/ge/hybrid/node_executor/aicore/aicore_op_task.h
@@ -28,6 +28,32 @@

 namespace ge {
 namespace hybrid {
 class TbeHandleHolder {
 public:
  TbeHandleHolder(void *bin_handle);
  ~TbeHandleHolder();

  void SetBinHandle(void *bin_handle) { bin_handle_ = bin_handle; }
  void *GetBinHandle() { return bin_handle_; }

 private:
  friend class TbeHandleRegistry;
  void *bin_handle_ = nullptr;
 };

 class TbeHandleRegistry {
 public:
  static TbeHandleRegistry &GetInstance() {
    static TbeHandleRegistry instance;
    return instance;
  }

  bool AddHandle(std::unique_ptr<TbeHandleHolder> &&holder);

 private:
  std::set<std::unique_ptr<TbeHandleHolder>> registered_handles_;
 };

 class AiCoreOpTask {
 public:
  AiCoreOpTask() = default;
@@ -67,6 +93,9 @@ class AiCoreOpTask {
  Status InitWithTaskDef(const OpDesc &node, const domi::TaskDef &task_def);
  Status InitTilingInfo(const OpDesc &op_desc);
  Status RegisterTbeHandle(const OpDesc &op_desc);
  Status RegisterKernelHandle(const OpDesc &op_desc);
  Status InitWithKernelDef(const OpDesc &op_desc, const domi::TaskDef &task_def);
  Status InitWithKernelDefWithHandle(const OpDesc &node, const domi::TaskDef &task_def);

  std::string stub_name_;
  void *stub_func_ = nullptr;
@@ -76,6 +105,11 @@ class AiCoreOpTask {
  bool clear_atomic_ = true;
  bool is_single_op_ = false;
  std::vector<int> output_indices_to_skip_;
  string original_kernel_key_;
  string node_info_;
  uint32_t tiling_key_ = 0;
  void *handle_ = nullptr;
  bool is_dynamic_ = false;
 };

 class AtomicAddrCleanOpTask : public AiCoreOpTask {
--- a/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc
+++ b/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc
@@ -201,12 +201,11 @@ Status AicpuNodeTaskBase::ExecuteAsync(TaskContext &context, std::function<void(
  uint32_t stream_id = 0;
  rtError_t rt_ret = rtGetTaskIdAndStreamID(&task_id, &stream_id); // must be called after Launch kernel
  if (rt_ret != RT_ERROR_NONE) {
    GELOGE(rt_ret, "Get task_id and stream_id failed.");
    return FAILED;
    GELOGE(RT_FAILED, "Get task_id and stream_id failed, ret: 0x%X.", rt_ret);
    return RT_ERROR_TO_GE_STATUS(rt_ret);
  }
  GELOGD("Aicpu node[%s] task_id: %u, stream_id: %u.", context.GetNodeName(), task_id, stream_id);
  (void)context.SaveProfilingTaskDescInfo(task_id, stream_id, kTaskTypeAicpu, 0);
  (void)context.SaveProfilingGraphDescInfo(task_id, stream_id);
  auto callback = [=, &context]() {
    GELOGD("Node[%s] callback start.", node_name_.c_str());
    RECORD_CALLBACK_EVENT(context.GetExecutionContext(), node_name_.c_str(), "[TaskCallback] Start");
--- a/ge/hybrid/node_executor/task_context.cc
+++ b/ge/hybrid/node_executor/task_context.cc
@@ -36,10 +36,6 @@ TaskContext::TaskContext(GraphExecutionContext *execution_context,

 TaskContext::~TaskContext() {
  GELOGD("[%s] TaskContext destroyed.", node_item_->NodeName().c_str());
  for (auto ws_addr : workspaces_) {
    execution_context_->allocator->Deallocate(ws_addr);
  }

  // release output
  for (int i = 0; i < NumOutputs(); ++i) {
    auto output_tensor = MutableOutput(i);
@@ -49,6 +45,13 @@ TaskContext::~TaskContext() {
  }
 }

 void TaskContext::ReleaseWorkspace() {
  GELOGD("[%s] Start ReleaseWorkspace.", node_item_->NodeName().c_str());
  for (auto ws_addr : workspaces_) {
    execution_context_->allocator->Deallocate(ws_addr);
  }
 }

 std::unique_ptr<TaskContext> TaskContext::Create(NodeState *node_state,
                                                 GraphExecutionContext *execution_context,
                                                 SubgraphContext *subgraph_context) {
@@ -512,21 +515,21 @@ Status TaskContext::Synchronize() {
 }

 Status TaskContext::SaveProfilingTaskDescInfo(uint32_t task_id, uint32_t  stream_id,
                                              uint32_t task_type, uint32_t block_dim) {
                                              const std::string &task_type, uint32_t block_dim) {
  if (ProfilingManager::Instance().ProfilingModelExecuteOn()) {
    const NodeItem &node_item = GetNodeItem();
    auto op_desc = node_item.GetOpDesc();
    GE_CHECK_NOTNULL(op_desc);
    const GraphExecutionContext * graph_context = GetExecutionContext();
    const GraphExecutionContext *graph_context = GetExecutionContext();
    GE_CHECK_NOTNULL(graph_context);
    const HybridModel *model = graph_context->model;
    GE_CHECK_NOTNULL(model);

    std::string op_name = op_desc->GetName();
    std::string dynamic_model_name = model->GetModelName();
    TaskDescInfo tmp_task_desc_info;
    tmp_task_desc_info.model_name = dynamic_model_name;
    tmp_task_desc_info.op_name = op_name;
    tmp_task_desc_info.op_name = op_desc->GetName();
    tmp_task_desc_info.op_type = op_desc->GetType();
    tmp_task_desc_info.block_dim = block_dim;
    tmp_task_desc_info.task_type = task_type;
    tmp_task_desc_info.task_id = task_id;
@@ -543,31 +546,5 @@ NodeState *TaskContext::GetNodeState() const {
  return node_state_;
 }

 Status TaskContext::SaveProfilingGraphDescInfo(uint32_t task_id, uint32_t stream_id) {
  if (ProfilingManager::Instance().ProfilingModelExecuteOn()) {
    const NodeItem &node_item = GetNodeItem();
    auto op_desc = node_item.GetOpDesc();
    GE_CHECK_NOTNULL(op_desc);
    const GraphExecutionContext * graph_context = GetExecutionContext();
    GE_CHECK_NOTNULL(graph_context);
    const HybridModel *model = graph_context->model;
    GE_CHECK_NOTNULL(model);

    std::string dynamic_model_name = model->GetModelName();
    auto op_mode = static_cast<uint32_t>(domi::ImplyType::INVALID);
    if (AttrUtils::GetInt(op_desc, ATTR_NAME_IMPLY_TYPE, op_mode) &&
        op_mode == static_cast<uint32_t>(domi::ImplyType::TVM)) {
      ComputeGraphDescInfo tmp_compute_graph_info;
      tmp_compute_graph_info.model_name = dynamic_model_name;
      tmp_compute_graph_info.op_name = op_desc->GetName();
      tmp_compute_graph_info.op_type = op_desc->GetType();
      tmp_compute_graph_info.task_id = task_id;
      tmp_compute_graph_info.stream_id = stream_id;
      compute_graph_info.emplace_back(tmp_compute_graph_info);
    }
  }
  return SUCCESS;
 }

 }  // namespace hybrid
 }  // namespace ge
--- a/ge/hybrid/node_executor/task_context.h
+++ b/ge/hybrid/node_executor/task_context.h
@@ -56,6 +56,7 @@ class TaskContext {
  void ReleaseInputsAndOutputs();
  bool NeedCallback();
  void ReleaseInput(int index);
  void ReleaseWorkspace();
  const TensorValue *GetInput(int index) const;
  const TensorValue *GetOutput(int index) const;
  TensorValue *MutableOutput(int index);
@@ -112,13 +113,10 @@ class TaskContext {
  void *handle_ = nullptr;

  const std::vector<TaskDescInfo>& GetProfilingTaskDescInfo() const { return task_desc_info; }
  Status SaveProfilingTaskDescInfo(uint32_t task_id, uint32_t stream_id, uint32_t task_type, uint32_t block_dim);
  Status SaveProfilingTaskDescInfo(uint32_t task_id, uint32_t stream_id,
                                   const std::string &task_type, uint32_t block_dim);
  void ClearProfilingTaskDescInfo() { task_desc_info.clear(); }

  const std::vector<ComputeGraphDescInfo>& GetProfilingGraphDescInfo() const { return compute_graph_info; }
  Status SaveProfilingGraphDescInfo(uint32_t task_id, uint32_t stream_id);
  void ClearProfilingGraphDescInfo() { compute_graph_info.clear(); }

 private:
  TaskContext(GraphExecutionContext *execution_context,
              NodeState *node_state,
@@ -140,7 +138,6 @@ class TaskContext {
  uint32_t task_id_ = 0;
  uint32_t stream_id_ = 0;
  std::vector<TaskDescInfo> task_desc_info;
  std::vector<ComputeGraphDescInfo> compute_graph_info;
 };
 }  // namespace hybrid
 }  // namespace ge
--- a/ge/offline/main.cc
+++ b/ge/offline/main.cc
@@ -62,19 +62,18 @@ using std::shared_ptr;
 using std::string;
 using std::vector;

 namespace {
 static bool is_dynamic_input = false;

 const char *const kModeSupport = "only support 0(model to framework model), "
                                 "1(framework model to json), 3(only pre-check), "
                                 "5(pbtxt to json), 6(display model info)";
 const char *const kModelToJsonSupport = "only support 0(Caffe) 3(TensorFlow) 5(Onnx)";

 static const char *const kCaffeFormatSupport = "only support NCHW, ND in Caffe model";
 static const char *const kTFFormatSupport = "only support NCHW, NHWC, ND, NCDHW, NDHWC in TF model";
 static const char *const kONNXFormatSupport = "only support NCHW, ND in ONNX model";

 const char *const kCaffeFormatSupport = "only support NCHW, ND in Caffe model";
 const char *const kTFFormatSupport = "only support NCHW, NHWC, ND, NCDHW, NDHWC in TF model";
 const char *const kONNXFormatSupport = "only support NCHW, ND in ONNX model";
 // limit available mem size 2G
 const long kMinAvailableMem = 2097152;  // 2 * 1024 * 1024
 }  // namespace

 DEFINE_string(model, "", "The model file.");
 DEFINE_string(output, "", "The output file path&name.");
@@ -1326,6 +1325,7 @@ int init(int argc, char* argv[]) {
    return ret;
  }

  ErrorManager::GetInstance().GenWorkStreamIdDefault();
  return 0;
 }

--- a/ge/session/omg.cc
+++ b/ge/session/omg.cc
@@ -23,7 +23,7 @@
 #include "common/debug/memory_dumper.h"
 #include "common/ge/ge_util.h"
 #include "common/helper/model_helper.h"
 #include "common/model_parser/base.h"
 #include "common/model_parser/model_parser.h"
 #include "common/model_saver.h"
 #include "common/properties_manager.h"
 #include "common/string_util.h"
@@ -965,7 +965,8 @@ FMK_FUNC_HOST_VISIBILITY Status ConvertOm(const char *model_file, const char *js
    } else {
      ErrorManager::GetInstance().ATCReportErrMessage("E10003",
          {"parameter", "value", "reason"}, {"om", model_file, "invalid om file"});
      GELOGE(ACL_ERROR_GE_PARAM_INVALID, "ParseModelContent failed because of invalid om file. Please check --om param.");
      GELOGE(ACL_ERROR_GE_PARAM_INVALID,
             "ParseModelContent failed because of invalid om file. Please check --om param.");
    }

    if (model.model_data != nullptr) {
--- a/ge/single_op/single_op.cc
+++ b/ge/single_op/single_op.cc
@@ -45,40 +45,24 @@ Status ProfilingTaskInfo(OpTask *op_task, const string &shape_type) {
    return SUCCESS;
  }

  string model_name;
  string op_name;
  TaskDescInfo tmp_task_desc_info;
  uint32_t model_id;
  uint32_t block_dim;
  if (op_task->GetProfilingArgs(model_name, op_name, model_id, block_dim) != SUCCESS) {
  if (op_task->GetProfilingArgs(tmp_task_desc_info, model_id) != SUCCESS) {
    GELOGE(ACL_ERROR_GE_PARAM_INVALID, "Get profiling data of task failed");
    return ACL_ERROR_GE_PARAM_INVALID;
  }
  GELOGD("ProfilingReport of op[%s] model[%s] start.", op_name.c_str(), model_name.c_str());
  std::vector<TaskDescInfo> task_desc_info;
  uint32_t task_id = 0;
  uint32_t stream_id = 0;
  auto rt_ret = rtGetTaskIdAndStreamID(&task_id, &stream_id);
  if (rt_ret != RT_ERROR_NONE) {
    GELOGE(rt_ret, "Get task_id and stream_id failed.");
    return RT_ERROR_TO_GE_STATUS(rt_ret);
  }
  GELOGD("ProfilingReport of op[%s] model[%s] start.",
         tmp_task_desc_info.op_name.c_str(), tmp_task_desc_info.model_name.c_str());

  TaskDescInfo tmp_task_desc_info;
  tmp_task_desc_info.model_name = model_name;
  tmp_task_desc_info.op_name = op_name;
  tmp_task_desc_info.block_dim = block_dim;
  tmp_task_desc_info.task_id = task_id;
  tmp_task_desc_info.stream_id = stream_id;
  tmp_task_desc_info.shape_type = shape_type;
  tmp_task_desc_info.cur_iter_num = 0;
  tmp_task_desc_info.task_type = op_task->GetTaskType();
  GELOGD("GetTaskDescInfo of op [%s] end, task_id[%u], stream_id[%u]", op_name.c_str(), task_id, stream_id);
  task_desc_info.emplace_back(tmp_task_desc_info);

  std::vector<ComputeGraphDescInfo> compute_graph_info;
  std::vector<TaskDescInfo> task_desc_info;
  task_desc_info.emplace_back(tmp_task_desc_info);

  auto &profiling_manager = ProfilingManager::Instance();
  profiling_manager.ReportProfilingData(model_id, task_desc_info, compute_graph_info);
  profiling_manager.ReportProfilingData(model_id, task_desc_info);
  return SUCCESS;
 }
 }  // namespace
--- a/ge/single_op/single_op_manager.cc
+++ b/ge/single_op/single_op_manager.cc
@@ -30,8 +30,9 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY SingleOpManager::~SingleOpManag
 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status SingleOpManager::GetOpFromModel(const std::string &model_name,
                                                                                        const ModelData &model_data,
                                                                                        void *stream,
                                                                                        SingleOp **single_op) {
  GELOGI("GetOpFromModel in. model name = %s", model_name.c_str());
                                                                                        SingleOp **single_op,
                                                                                        const uint64_t model_id) {
  GELOGI("GetOpFromModel in. model name = %s, model id = %lu", model_name.c_str(), model_id);
  if (single_op == nullptr) {
    GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "single op is null");
    return ACL_ERROR_GE_INTERNAL_ERROR;
@@ -99,7 +100,9 @@ StreamResource *SingleOpManager::TryGetResource(uintptr_t resource_id) {
 Status SingleOpManager::GetDynamicOpFromModel(const string &model_name,
                                              const ModelData &model_data,
                                              void *stream,
                                              DynamicSingleOp **single_op) {
                                              DynamicSingleOp **single_op,
                                              const uint64_t model_id) {
  GELOGI("GetOpFromModel in. model name = %s, model id = %lu", model_name.c_str(), model_id);
  if (!tiling_func_registered_) {
    RegisterTilingFunc();
  }
--- a/ge/single_op/single_op_manager.h
+++ b/ge/single_op/single_op_manager.h
@@ -37,12 +37,14 @@ class SingleOpManager {
  Status GetOpFromModel(const std::string &model_name,
                        const ge::ModelData &model_data,
                        void *stream,
                        SingleOp **single_op);
                        SingleOp **single_op,
                        const uint64_t model_id);

  Status GetDynamicOpFromModel(const std::string &model_name,
                               const ge::ModelData &model_data,
                               void *stream,
                               DynamicSingleOp **dynamic_single_op);
                               DynamicSingleOp **dynamic_single_op,
                               const uint64_t model_id);

  StreamResource *GetResource(uintptr_t resource_id, rtStream_t stream);

--- a/ge/single_op/single_op_model.cc
+++ b/ge/single_op/single_op_model.cc
@@ -190,7 +190,7 @@ Status SingleOpModel::LoadAllNodes() {
    auto node = nodes.at(i);
    auto op_desc = node->GetOpDesc();
    GE_CHECK_NOTNULL(op_desc);
    op_list_[i] = node;
    op_list_[op_desc->GetId()] = node;
    auto op_type = op_desc->GetType();
    GELOGI("[%s] node[%zu] = %s, type = %s", model_name_.c_str(), i, node->GetName().c_str(), op_type.c_str());

@@ -261,7 +261,7 @@ Status SingleOpModel::BuildTaskList(StreamResource *stream_resource, SingleOp &s
      if (kernel_type == ccKernelType::TE) {
        GELOGD("Building TBE task");
        TbeOpTask *tbe_task = nullptr;
        auto ret = BuildKernelTask(task_def.kernel(), &tbe_task);
        auto ret = BuildKernelTask(task_def, &tbe_task);
        if (ret != SUCCESS) {
          return ret;
        }
@@ -332,9 +332,11 @@ void SingleOpModel::ParseArgTable(OpTask *task, SingleOp &op) {
  }
 }

 Status SingleOpModel::BuildKernelTask(const domi::KernelDef &kernel_def, TbeOpTask **task) {
 Status SingleOpModel::BuildKernelTask(const domi::TaskDef &task_def, TbeOpTask **task) {
  GE_CHECK_NOTNULL(task);
  const auto &context = kernel_def.context();
  auto task_type = static_cast<rtModelTaskType_t>(task_def.type());
  const auto &context = task_type == RT_MODEL_TASK_KERNEL ? task_def.kernel().context() :
                                                            task_def.kernel_with_handle().context();
  auto iter = op_list_.find(context.op_index());
  if (iter == op_list_.end()) {
    GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "op desc not found. op index = %u", context.op_index());
@@ -347,7 +349,7 @@ Status SingleOpModel::BuildKernelTask(const domi::KernelDef &kernel_def, TbeOpTa
    return ACL_ERROR_GE_MEMORY_ALLOCATION;
  }

  auto builder = TbeTaskBuilder(model_name_, iter->second, kernel_def);
  auto builder = TbeTaskBuilder(model_name_, iter->second, task_def);
  auto ret = builder.BuildTask(*tbe_task, model_params_);
  if (ret != SUCCESS) {
    delete tbe_task;
@@ -418,13 +420,15 @@ Status SingleOpModel::BuildOp(StreamResource &resource, SingleOp &single_op) {
 }

 Status SingleOpModel::BuildModelTaskKernel(const TaskDef &task_def, DynamicSingleOp &single_op) {
  const domi::KernelDef &kernel_def = task_def.kernel();
  const auto &context = kernel_def.context();
  auto task_type = static_cast<rtModelTaskType_t>(task_def.type());
  const auto &context = task_type == RT_MODEL_TASK_KERNEL ? task_def.kernel().context() :
                                                            task_def.kernel_with_handle().context();

  auto kernel_type = static_cast<ccKernelType>(context.kernel_type());
  if (kernel_type == ccKernelType::TE) {
    GELOGD("Building TBE task");
    TbeOpTask *tbe_task = nullptr;
    GE_CHK_STATUS_RET_NOLOG(BuildKernelTask(task_def.kernel(), &tbe_task));
    GE_CHK_STATUS_RET_NOLOG(BuildKernelTask(task_def, &tbe_task));
    tbe_task->SetModelArgs(model_name_, model_id_);
    single_op.op_task_.reset(tbe_task);
  } else if (kernel_type == ccKernelType::AI_CPU || kernel_type == ccKernelType::CUST_AI_CPU) {
@@ -453,7 +457,7 @@ Status SingleOpModel::BuildTaskListForDynamicOp(DynamicSingleOp &single_op) {
    GELOGI("[%s] Task[%d], type = %u, DebugString = %s", model_name_.c_str(), i, task_def.type(),
           task_def.DebugString().c_str());
    auto task_type = static_cast<rtModelTaskType_t>(task_def.type());
    if (task_type == RT_MODEL_TASK_KERNEL) {
    if (task_type == RT_MODEL_TASK_KERNEL || task_type == RT_MODEL_TASK_ALL_KERNEL) {
      if (single_op.op_task_ != nullptr) {
        GELOGE(ACL_ERROR_GE_OP_TASK_TYPE_INVALID, "Do not support dynamic op with multiple tasks.");
        return ACL_ERROR_GE_OP_TASK_TYPE_INVALID;
--- a/ge/single_op/single_op_model.h
+++ b/ge/single_op/single_op_model.h
@@ -24,7 +24,6 @@
 #include <vector>

 #include "common/helper/model_helper.h"
 #include "graph/load/model_manager/davinci_model_parser.h"
 #include "single_op/single_op.h"
 #include "single_op/stream_resource.h"

@@ -67,7 +66,7 @@ class SingleOpModel {

  Status BuildTaskList(StreamResource *stream_resource, SingleOp &single_op);
  Status BuildTaskListForDynamicOp(DynamicSingleOp &dynamic_single_op);
  Status BuildKernelTask(const domi::KernelDef &kernel_def, TbeOpTask **task);
  Status BuildKernelTask(const domi::TaskDef &task_def, TbeOpTask **task);
  Status BuildKernelExTask(const domi::KernelExDef &kernel_def, AiCpuTask **task,
                           bool dynamic_flag, bool& depend_compute_flag, uint64_t kernel_id);
  Status BuildCpuKernelTask(const domi::KernelDef &kernel_def, OpTask **task, uint64_t kernel_id);
--- a/ge/single_op/task/op_task.cc
+++ b/ge/single_op/task/op_task.cc
@@ -23,6 +23,7 @@
 #include "aicpu/common/aicpu_task_struct.h"
 #include "common/dump/dump_manager.h"
 #include "common/dump/dump_op.h"
 #include "common/profiling/profiling_manager.h"
 #include "common/formats/formats.h"
 #include "common/math/math_util.h"
 #include "framework/common/debug/log.h"
@@ -93,6 +94,14 @@ void TbeOpTask::SetKernelArgs(std::unique_ptr<uint8_t[]> &&args, size_t arg_size
  op_desc_ = op_desc;
 }

 void TbeOpTask::SetKernelWithHandleArgs(std::unique_ptr<uint8_t[]> &&args, size_t arg_size, uint32_t block_dim,
                                        const OpDescPtr &op_desc,
                                        const domi::KernelDefWithHandle &kernel_def_with_handle) {
  SetKernelArgs(std::move(args), arg_size, block_dim, op_desc);
  original_kernel_key_ = kernel_def_with_handle.original_kernel_key();
  node_info_ = kernel_def_with_handle.node_info();
 }

 void TbeOpTask::SetSmDesc(void *sm_desc) { sm_desc_ = sm_desc; }

 void OpTask::SetModelArgs(std::string model_name, uint32_t model_id) {
@@ -100,15 +109,29 @@ void OpTask::SetModelArgs(std::string model_name, uint32_t model_id) {
  model_id_ = model_id;
 }

 Status OpTask::GetProfilingArgs(std::string &model_name, std::string &op_name, uint32_t &model_id,
                                uint32_t &block_dim) {
  model_name = model_name_;
  model_id = model_id_;
  block_dim = block_dim_;
 Status OpTask::GetProfilingArgs(TaskDescInfo &task_desc_info, uint32_t &model_id) {
  uint32_t task_id = 0;
  uint32_t stream_id = 0;
  auto rt_ret = rtGetTaskIdAndStreamID(&task_id, &stream_id);
  if (rt_ret != RT_ERROR_NONE) {
    GELOGE(RT_FAILED, "Get task_id and stream_id failed ret: 0x%X.", rt_ret);
    return RT_ERROR_TO_GE_STATUS(rt_ret);
  }
  GE_CHECK_NOTNULL(op_desc_);
  op_name = op_desc_->GetName();
  string op_name = op_desc_->GetName();
  GELOGD("Get profiling args of op [%s] end, task_id[%u], stream_id[%u]", op_name.c_str(), task_id, stream_id);
  model_id = model_id_;
  task_desc_info.model_name = model_name_;
  task_desc_info.block_dim = block_dim_;
  task_desc_info.task_id = task_id;
  task_desc_info.stream_id = stream_id;
  task_desc_info.op_name = op_name;
  task_desc_info.op_type = op_desc_->GetType();
  auto &prof_mgr = ProfilingManager::Instance();
  prof_mgr.GetOpInputOutputInfo(op_desc_, task_desc_info);
  return SUCCESS;
 }

 Status OpTask::UpdateRunInfo(const vector<GeTensorDesc> &input_desc, const vector<GeTensorDesc> &output_desc) {
  return UNSUPPORTED;
 }
@@ -145,7 +168,7 @@ Status OpTask::LaunchKernel(const vector<GeTensorDesc> &input_desc,
  return UNSUPPORTED;
 }

 uint32_t OpTask::GetTaskType() const { return kTaskTypeInvalid; }
 const std::string &OpTask::GetTaskType() const { return kTaskTypeInvalid; }

 TbeOpTask::~TbeOpTask() {
  if (sm_desc_ != nullptr) {
@@ -163,7 +186,11 @@ size_t TbeOpTask::GetArgSize() const { return arg_size_; }

 const std::string &TbeOpTask::GetStubName() const { return stub_name_; }

 uint32_t TbeOpTask::GetTaskType() const { return kTaskTypeAicore; }
 const std::string &TbeOpTask::GetTaskType() const { return kTaskTypeAicore; }

 void TbeOpTask::SetHandle(void *handle) {
  this->handle_ = handle;
 }

 Status TbeOpTask::LaunchKernel(rtStream_t stream) {
  GELOGD("To invoke rtKernelLaunch. task = %s, block_dim = %u", this->stub_name_.c_str(), block_dim_);
@@ -204,8 +231,9 @@ Status TbeOpTask::UpdateRunInfo(const vector<GeTensorDesc> &input_desc, const ve
  }
  block_dim_ = run_info.block_dim;
  tiling_data_ = run_info.tiling_data.str();
  GELOGD("Done invoking OpParaCalculate successfully. block_dim = %u, tiling size = %zu", block_dim_,
         tiling_data_.size());
  tiling_key_ = run_info.tiling_key;
  GELOGD("Done invoking OpParaCalculate successfully. block_dim = %u, tiling size = %zu, tiling_key = %u", block_dim_,
         tiling_data_.size(), tiling_key_);

  GE_CHK_STATUS_RET(AllocateWorkspaces(run_info.workspaces), "Failed to allocate workspaces");
  return SUCCESS;
@@ -329,8 +357,17 @@ Status TbeOpTask::LaunchKernel(const vector<GeTensorDesc> &input_desc,
  }

  GELOGD("[%s] Start to invoke rtKernelLaunch", node_->GetName().c_str());
  GE_CHK_RT_RET(rtKernelLaunch(stub_func_, block_dim_, args_.get(), arg_size_, nullptr, stream));
  GELOGD("[%s] Done invoking rtKernelLaunch successfully", node_->GetName().c_str());
  if (handle_ == nullptr) {
    GE_CHK_RT_RET(rtKernelLaunch(stub_func_, block_dim_, args_.get(), arg_size_, nullptr, stream));
    GELOGD("[%s] Done invoking rtKernelLaunch successfully", node_->GetName().c_str());
  } else {
    std::string dev_func = original_kernel_key_ + "_" + std::to_string(tiling_key_);
    std::string kernel_info = node_info_ + "/" + std::to_string(tiling_key_);
    GE_CHK_RT_RET(rtKernelLaunchWithHandle(handle_, dev_func.c_str(), block_dim_, args_.get(), arg_size_, nullptr,
                                           stream, kernel_info.c_str()));
    GELOGD("[%s] Done invoking rtKernelLaunchWithHandle successfully", node_->GetName().c_str());
  }

  return SUCCESS;
 }

@@ -363,7 +400,8 @@ Status AiCpuBaseTask::SetExtInfoAndType(const std::string &kernel_ext_info, uint
                                                                              num_inputs_,
                                                                              num_outputs_,
                                                                              unknown_type_));
  GE_CHK_BOOL_RET_STATUS(aicpu_ext_handle_ != nullptr, ACL_ERROR_GE_MEMORY_ALLOCATION, "Malloc aicpu_ext_handle mem failed!");
  GE_CHK_BOOL_RET_STATUS(aicpu_ext_handle_ != nullptr, ACL_ERROR_GE_MEMORY_ALLOCATION,
                         "Malloc aicpu_ext_handle mem failed!");

  Status ret = aicpu_ext_handle_->Parse(kernel_ext_info);
  if (ret != SUCCESS) {
@@ -401,7 +439,7 @@ Status AiCpuBaseTask::SetInputConst() {
  return SUCCESS;
 }

 Status AiCpuBaseTask::UpdateExtInfo(const std::vector<GeTensorDesc> &input_desc, 
 Status AiCpuBaseTask::UpdateExtInfo(const std::vector<GeTensorDesc> &input_desc,
                                    std::vector<GeTensorDesc> &output_desc,
                                    rtStream_t stream) {
  GELOGI("Update ext info begin, unknown_type=%d.", unknown_type_);
@@ -811,7 +849,7 @@ Status AiCpuBaseTask::UpdateArgTable(const SingleOpModelParam &param) {
  return DoUpdateArgTable(param, false);
 }

 uint32_t AiCpuBaseTask::GetTaskType() const { return kTaskTypeAicpu; }
 const std::string &AiCpuBaseTask::GetTaskType() const { return kTaskTypeAicpu; }

 void AiCpuTask::GetIoAddr(uintptr_t *&arg_base, size_t &arg_count) {
  arg_base = reinterpret_cast<uintptr_t *>(io_addr_host_.data());
--- a/ge/single_op/task/op_task.h
+++ b/ge/single_op/task/op_task.h
@@ -43,7 +43,7 @@ class OpTask {
                               const vector<GeTensorDesc> &output_desc);
  virtual Status UpdateArgTable(const SingleOpModelParam &param);
  void SetModelArgs(std::string model_name, uint32_t model_id);
  Status GetProfilingArgs(std::string &model_name, std::string &op_name, uint32_t &model_id, uint32_t &block_dim);
  Status GetProfilingArgs(TaskDescInfo &task_desc_info, uint32_t &model_id);
  const OpDescPtr &GetOpdesc() const {return op_desc_;}
  Status OpenDump(rtStream_t stream);
  virtual void GetIoAddr(uintptr_t *&arg_base, size_t &arg_count) = 0;
@@ -52,7 +52,7 @@ class OpTask {
                              std::vector<GeTensorDesc> &output_desc,
                              std::vector<DataBuffer> &output_buffers,
                              rtStream_t stream);
  virtual uint32_t GetTaskType() const;
  virtual const std::string &GetTaskType() const;

 protected:
  Status DoUpdateArgTable(const SingleOpModelParam &param, bool keep_workspace);
@@ -78,6 +78,8 @@ class TbeOpTask : public OpTask {
  void SetSmDesc(void *sm_desc);
  void SetStubFunc(const std::string &name, const void *stub_func);
  void SetKernelArgs(std::unique_ptr<uint8_t[]> &&args, size_t arg_size, uint32_t block_dim, const OpDescPtr &op_desc);
  void SetKernelWithHandleArgs(std::unique_ptr<uint8_t[]> &&args, size_t arg_size, uint32_t block_dim,
                               const OpDescPtr &op_desc, const domi::KernelDefWithHandle& kernel_def_with_handle);

  Status UpdateRunInfo(const vector<GeTensorDesc> &input_desc,
                       const vector<GeTensorDesc> &output_desc) override;
@@ -86,7 +88,8 @@ class TbeOpTask : public OpTask {
  size_t GetArgSize() const;
  const std::string &GetStubName() const;
  void EnableDynamicSupport(const NodePtr &node, void *tiling_buffer, size_t max_tiling_size);
  uint32_t GetTaskType() const override;
  const std::string &GetTaskType() const override;
  void SetHandle(void *handle);

 private:
  friend class SingleOpModel;
@@ -107,6 +110,11 @@ class TbeOpTask : public OpTask {
  std::string tiling_data_;
  std::vector<void *> workspaces_;
  NodePtr node_;

  uint32_t tiling_key_ = 0;
  void* handle_ = nullptr;
  std::string original_kernel_key_;
  std::string node_info_;
 };

 class AiCpuBaseTask : public OpTask {
@@ -115,7 +123,7 @@ class AiCpuBaseTask : public OpTask {
  ~AiCpuBaseTask() override;
  UnknowShapeOpType GetUnknownType() const { return unknown_type_; }
  Status UpdateArgTable(const SingleOpModelParam &param) override;
  uint32_t GetTaskType() const override;
  const std::string &GetTaskType() const override;

 protected:
  Status UpdateIoAddr(const std::vector<DataBuffer> &inputs, const std::vector<DataBuffer> &outputs);
--- a/ge/single_op/task/tbe_task_builder.cc
+++ b/ge/single_op/task/tbe_task_builder.cc
@@ -49,6 +49,15 @@ KernelHolder::~KernelHolder() {
  }
 }

 HandleHolder::HandleHolder(void *bin_handle)
    : bin_handle_(bin_handle) {}

 HandleHolder::~HandleHolder() {
  if (bin_handle_ != nullptr) {
    GE_CHK_RT(rtDevBinaryUnRegister(bin_handle_));
  }
 }

 const char *KernelBinRegistry::GetUnique(const string &stub_func) {
  std::lock_guard<std::mutex> lock(mutex_);
  auto it = unique_stubs_.find(stub_func);
@@ -76,10 +85,17 @@ bool KernelBinRegistry::AddKernel(const std::string &stub_name, std::unique_ptr<
  return ret.second;
 }

 TbeTaskBuilder::TbeTaskBuilder(const std::string &model_name, const NodePtr &node, const domi::KernelDef &kernel_def)
 bool HandleRegistry::AddHandle(std::unique_ptr<HandleHolder> &&holder) {
  auto ret = registered_handles_.emplace(std::move(holder));
  return ret.second;
 }

 TbeTaskBuilder::TbeTaskBuilder(const std::string &model_name, const NodePtr &node, const domi::TaskDef &task_def)
    : node_(node),
      op_desc_(node->GetOpDesc()),
      kernel_def_(kernel_def),
      task_def_(task_def),
      kernel_def_(task_def.kernel()),
      kernel_def_with_handle_(task_def.kernel_with_handle()),
      stub_name_(model_name + "/" + node->GetName() + "_tvmbin") {}

 Status TbeTaskBuilder::DoRegisterBinary(const OpKernelBin &kernel_bin, void **bin_handle,
@@ -89,9 +105,14 @@ Status TbeTaskBuilder::DoRegisterBinary(const OpKernelBin &kernel_bin, void **bi
  binary.data = kernel_bin.GetBinData();
  binary.length = kernel_bin.GetBinDataSize();
  binary.magic = param.core_type == 0 ? RT_DEV_BINARY_MAGIC_ELF : RT_DEV_BINARY_MAGIC_ELF_AIVEC;
  auto ret = rtDevBinaryRegister(&binary, bin_handle);
  Status ret = 0;
  if (task_def_.type() == RT_MODEL_TASK_ALL_KERNEL) {
    ret = rtRegisterAllKernel(&binary, bin_handle);
  } else {
    ret = rtDevBinaryRegister(&binary, bin_handle);
  }
  if (ret != RT_ERROR_NONE) {
    GELOGE(ret, "rtDevBinaryRegister failed, bin key = %s, core_type = %ld, rt ret = %d", stub_name_.c_str(),
    GELOGE(ret, "DoRegisterBinary failed, bin key = %s, core_type = %ld, rt ret = %d", stub_name_.c_str(),
           param.core_type, static_cast<int>(ret));
    return ret;
  }
@@ -128,14 +149,15 @@ Status TbeTaskBuilder::DoRegisterFunction(void *bin_handle, const char *stub_nam

 Status TbeTaskBuilder::DoRegisterKernel(const ge::OpKernelBin &tbe_kernel, const char *bin_file_key, void **bin_handle,
                                        const SingleOpModelParam &param) {
  std::string kernel_name;
  GetKernelName(op_desc_, kernel_name);

  void *handle = nullptr;
  auto ret = DoRegisterBinary(tbe_kernel, &handle, param);
  if (ret != SUCCESS) {
    return ret;
  }
  if (task_def_.type() == RT_MODEL_TASK_ALL_KERNEL) {
    *bin_handle = handle;
    return SUCCESS;
  }

  ret = DoRegisterMeta(handle);
  if (ret != SUCCESS) {
@@ -143,6 +165,8 @@ Status TbeTaskBuilder::DoRegisterKernel(const ge::OpKernelBin &tbe_kernel, const
    return ret;
  }

  std::string kernel_name;
  GetKernelName(op_desc_, kernel_name);
  ret = DoRegisterFunction(handle, bin_file_key, kernel_name.c_str());
  if (ret != SUCCESS) {
    GE_CHK_RT(rtDevBinaryUnRegister(handle));
@@ -186,13 +210,15 @@ Status TbeTaskBuilder::RegisterKernel(TbeOpTask &task, const SingleOpModelParam

    void *bin_handle = nullptr;
    auto ret = DoRegisterKernel(*tbe_kernel, stub_func, &bin_handle, param);
    if (ret == SUCCESS) {
      holder->SetBinHandle(bin_handle);
      if (!registry.AddKernel(stub_name_, std::move(holder))) {
        // should not happen. only one thread can reach here
        GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "Add kernel failed. stub name = %s", stub_name_.c_str());
        return ACL_ERROR_GE_INTERNAL_ERROR;
      }
    if (ret != SUCCESS) {
      GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "RegisterKernel failed. stub name = %s", stub_name_.c_str());
      return ACL_ERROR_GE_INTERNAL_ERROR;
    }
    holder->SetBinHandle(bin_handle);
    if (!registry.AddKernel(stub_name_, std::move(holder))) {
      // should not happen. only one thread can reach here
      GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "Add kernel failed. stub name = %s", stub_name_.c_str());
      return ACL_ERROR_GE_INTERNAL_ERROR;
    }
  }

@@ -200,6 +226,35 @@ Status TbeTaskBuilder::RegisterKernel(TbeOpTask &task, const SingleOpModelParam
  return SUCCESS;
 }

 Status TbeTaskBuilder::RegisterKernelWithHandle(TbeOpTask &task, const SingleOpModelParam &param) {
  GELOGD("RegisterKernelWithHandle begin.");
  HandleRegistry &registry = HandleRegistry::GetInstance();
  auto tbe_kernel = GetTbeKernel(op_desc_);
  if (tbe_kernel == nullptr) {
    GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "OP EXT ATTR NAME TBE_KERNEL not found. op = %s",
           op_desc_->GetName().c_str());
    return ACL_ERROR_GE_INTERNAL_ERROR;
  }
  void *bin_handle = nullptr;
  auto ret = DoRegisterKernel(*tbe_kernel, nullptr, &bin_handle, param);
  if (ret != SUCCESS) {
    GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "RegisterKernel failed. node name = %s", op_desc_->GetName().c_str());
    return ACL_ERROR_GE_INTERNAL_ERROR;
  }
  handle_ = bin_handle;
  auto holder = std::unique_ptr<HandleHolder>(new (std::nothrow) HandleHolder(handle_));
  if (holder == nullptr) {
    GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "create HandleHodler failed.");
    return ACL_ERROR_GE_MEMORY_ALLOCATION;
  }
  if (!registry.AddHandle(std::move(holder))) {
    GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "Add handle failed. node name = %s", op_desc_->GetName().c_str());
    return ACL_ERROR_GE_INTERNAL_ERROR;
  }

  return SUCCESS;
 }

 Status TbeTaskBuilder::GetSmDesc(void **sm_desc, const SingleOpModelParam &param) const {
  const std::string &sm_desc_str = kernel_def_.sm_desc();
  if (sm_desc_str.empty()) {
@@ -217,17 +272,17 @@ Status TbeTaskBuilder::GetSmDesc(void **sm_desc, const SingleOpModelParam &param
      }
    }

    auto rtRet = rtMemAllocManaged(sm_desc, sm_desc_str.size(), RT_MEMORY_SPM);
    if (rtRet != RT_ERROR_NONE) {
      GELOGE(rtRet, "rtMemAllocManaged failed, ret: %d", static_cast<int>(rtRet));
      return rtRet;
    auto rt_ret = rtMemAllocManaged(sm_desc, sm_desc_str.size(), RT_MEMORY_SPM);
    if (rt_ret != RT_ERROR_NONE) {
      GELOGE(rt_ret, "rtMemAllocManaged failed, ret: %d", static_cast<int>(rt_ret));
      return rt_ret;
    }

    rtRet = rtMemcpy(*sm_desc, sm_desc_str.size(), sm_desc_str.data(), sm_desc_str.size(), RT_MEMCPY_HOST_TO_DEVICE);
    if (rtRet != RT_ERROR_NONE) {
    rt_ret = rtMemcpy(*sm_desc, sm_desc_str.size(), sm_desc_str.data(), sm_desc_str.size(), RT_MEMCPY_HOST_TO_DEVICE);
    if (rt_ret != RT_ERROR_NONE) {
      (void)rtMemFreeManaged(*sm_desc);
      GELOGE(rtRet, "rtMemcpy, ret: %d", static_cast<int>(rtRet));
      return rtRet;
      GELOGE(rt_ret, "rtMemcpy, ret: %d", static_cast<int>(rt_ret));
      return rt_ret;
    }
  }

@@ -239,10 +294,10 @@ Status TbeTaskBuilder::SetKernelArgs(TbeOpTask &task, const SingleOpModelParam &
  auto args = std::unique_ptr<uint8_t[]>(new (std::nothrow) uint8_t[arg_size]);
  GE_CHECK_NOTNULL(args);

  auto rtRet = rtMemcpy(args.get(), arg_size, kernel_def_.args().data(), arg_size, RT_MEMCPY_HOST_TO_HOST);
  if (rtRet != RT_ERROR_NONE) {
    GELOGE(rtRet, "rtMemcpy args failed, size = %zu, ret = %d", arg_size, static_cast<int>(rtRet));
    return RT_ERROR_TO_GE_STATUS(rtRet);
  auto rt_ret = rtMemcpy(args.get(), arg_size, kernel_def_.args().data(), arg_size, RT_MEMCPY_HOST_TO_HOST);
  if (rt_ret != RT_ERROR_NONE) {
    GELOGE(rt_ret, "rtMemcpy args failed, size = %zu, ret = %d", arg_size, static_cast<int>(rt_ret));
    return RT_ERROR_TO_GE_STATUS(rt_ret);
  }

  const domi::KernelContext &context = kernel_def_.context();
@@ -258,39 +313,83 @@ Status TbeTaskBuilder::SetKernelArgs(TbeOpTask &task, const SingleOpModelParam &
    std::vector<void *> tensor_device_addr_vec = BuildTaskUtils::GetKernelArgs(op_desc_, param);
    void *src_addr = reinterpret_cast<void *>(tensor_device_addr_vec.data());
    uint64_t src_len = sizeof(void *) * tensor_device_addr_vec.size();
    rtRet = rtMemcpy(args.get() + offset, arg_size - offset, src_addr, src_len, RT_MEMCPY_HOST_TO_HOST);
    if (rtRet != RT_ERROR_NONE) {
      GELOGE(rtRet, "rtMemcpy addresses failed, ret = %d", static_cast<int>(rtRet));
      return RT_ERROR_TO_GE_STATUS(rtRet);
    rt_ret = rtMemcpy(args.get() + offset, arg_size - offset, src_addr, src_len, RT_MEMCPY_HOST_TO_HOST);
    if (rt_ret != RT_ERROR_NONE) {
      GELOGE(rt_ret, "rtMemcpy addresses failed, ret = %d", static_cast<int>(rt_ret));
      return RT_ERROR_TO_GE_STATUS(rt_ret);
    }
  }

  task.SetKernelArgs(std::move(args), arg_size, kernel_def_.block_dim(), op_desc);

  return SUCCESS;
 }

 Status TbeTaskBuilder::SetKernelWithHandleArgs(TbeOpTask &task, const SingleOpModelParam &param,
                                               const OpDescPtr &op_desc) {
  size_t arg_size = kernel_def_with_handle_.args_size();
  auto args = std::unique_ptr<uint8_t[]>(new (std::nothrow) uint8_t[arg_size]);
  GE_CHECK_NOTNULL(args);

  auto rt_ret = rtMemcpy(args.get(), arg_size, kernel_def_with_handle_.args().data(), arg_size, RT_MEMCPY_HOST_TO_HOST);
  if (rt_ret != RT_ERROR_NONE) {
    GELOGE(rt_ret, "rtMemcpy args failed, size = %zu, ret = %d", arg_size, static_cast<int>(rt_ret));
    return rt_ret;
  }

  const domi::KernelContext &context = kernel_def_with_handle_.context();
  const auto *args_offset_tmp = reinterpret_cast<const uint16_t *>(context.args_offset().data());
  uint16_t offset = *args_offset_tmp;

  bool is_dynamic = false;
  (void)AttrUtils::GetBool(op_desc_, kAttrSupportDynamicShape, is_dynamic);
  if (is_dynamic) {
    GE_CHK_STATUS_RET_NOLOG(InitTilingInfo(task));
  } else {
    // copy args
    std::vector<void *> tensor_device_addr_vec = BuildTaskUtils::GetKernelArgs(op_desc_, param);
    void *src_addr = reinterpret_cast<void *>(tensor_device_addr_vec.data());
    uint64_t src_len = sizeof(void *) * tensor_device_addr_vec.size();
    rt_ret = rtMemcpy(args.get() + offset, arg_size - offset, src_addr, src_len, RT_MEMCPY_HOST_TO_HOST);
    if (rt_ret != RT_ERROR_NONE) {
      GELOGE(rt_ret, "rtMemcpy addresses failed, ret = %d", static_cast<int>(rt_ret));
      return rt_ret;
    }
  }
  task.SetKernelWithHandleArgs(std::move(args), arg_size, kernel_def_with_handle_.block_dim(), op_desc,
                               kernel_def_with_handle_);

  return SUCCESS;
 }

 Status TbeTaskBuilder::BuildTask(TbeOpTask &task, const SingleOpModelParam &param) {
  GELOGD("Build tbe task begin");
  auto ret = SetKernelArgs(task, param, op_desc_);
  auto task_type = static_cast<rtModelTaskType_t>(task_def_.type());
  auto ret = task_type == RT_MODEL_TASK_ALL_KERNEL ? SetKernelWithHandleArgs(task, param, op_desc_) :
                                                     SetKernelArgs(task, param, op_desc_);
  if (ret != SUCCESS) {
    return ret;
  }

  ret = RegisterKernel(task, param);
  ret = task_type == RT_MODEL_TASK_ALL_KERNEL ? RegisterKernelWithHandle(task, param) :
                                                RegisterKernel(task, param);
  task.SetHandle(handle_);
  if (ret != SUCCESS) {
    return ret;
  }

  auto task_info = BuildTaskUtils::GetTaskInfo(op_desc_);
  GELOGI("[TASK_INFO] %s %s", stub_name_.c_str(), task_info.c_str());

  void *stub_func = nullptr;
  auto rtRet = rtGetFunctionByName(stub_name_.c_str(), &stub_func);
  if (rtRet != SUCCESS) {
    GELOGE(rtRet, "rtGetFunctionByName failed.");
    return RT_ERROR_TO_GE_STATUS(rtRet);
  if (task_type != RT_MODEL_TASK_ALL_KERNEL) {
    void *stub_func = nullptr;
    auto rt_ret = rtGetFunctionByName(stub_name_.c_str(), &stub_func);
    if (rt_ret != SUCCESS) {
      GELOGE(rt_ret, "rtGetFunctionByName failed.");
      return RT_ERROR_TO_GE_STATUS(rt_ret);
    }
    task.SetStubFunc(stub_name_, stub_func);
  }

  task.SetStubFunc(stub_name_, stub_func);
  return SUCCESS;
 }

@@ -299,15 +398,16 @@ Status TbeTaskBuilder::InitTilingInfo(TbeOpTask &task) {
  int64_t max_size = -1;
  (void)AttrUtils::GetInt(op_desc_, kAttrOpParamSize, max_size);
  GELOGD("Got op param size by key: %s, ret = %ld", kAttrOpParamSize, max_size);
  if (max_size <= 0) {
  if (max_size < 0) {
    GELOGE(ACL_ERROR_GE_PARAM_INVALID, "[%s] Invalid op_param_size: %ld.", op_desc_->GetName().c_str(), max_size);
    return ACL_ERROR_GE_PARAM_INVALID;
  }

  void *tiling_buffer = nullptr;
  GE_CHK_RT_RET(rtMalloc(&tiling_buffer, static_cast<uint64_t>(max_size), RT_MEMORY_HBM));
  GE_CHECK_NOTNULL(tiling_buffer);
  GELOGD("[%s] Done allocating tiling buffer, size=%ld.", op_desc_->GetName().c_str(), max_size);
  if (max_size > 0) {
    GE_CHK_RT_RET(rtMalloc(&tiling_buffer, static_cast<uint64_t>(max_size), RT_MEMORY_HBM));
    GE_CHECK_NOTNULL(tiling_buffer);
    GELOGD("[%s] Done allocating tiling buffer, size=%ld.", op_desc_->GetName().c_str(), max_size);
  }

  task.EnableDynamicSupport(node_, tiling_buffer, static_cast<size_t>(max_size));
  return SUCCESS;
--- a/ge/single_op/task/tbe_task_builder.h
+++ b/ge/single_op/task/tbe_task_builder.h
@@ -42,6 +42,19 @@ class KernelHolder {
  std::shared_ptr<ge::OpKernelBin> kernel_bin_;
 };

 class HandleHolder {
 public:
  HandleHolder(void *bin_handle);
  ~HandleHolder();

  void SetBinHandle(void *bin_handle) { bin_handle_ = bin_handle; }
  void *GetBinHandle() { return bin_handle_; }

 private:
  friend class HandleRegistry;
  void *bin_handle_ = nullptr;
 };

 class KernelBinRegistry {
 public:
  static KernelBinRegistry &GetInstance() {
@@ -61,9 +74,22 @@ class KernelBinRegistry {
  std::mutex mutex_;
 };

 class HandleRegistry {
 public:
  static HandleRegistry &GetInstance() {
    static HandleRegistry instance;
    return instance;
  }

  bool AddHandle(std::unique_ptr<HandleHolder> &&holder);

 private:
  std::set<std::unique_ptr<HandleHolder>> registered_handles_;
 };

 class TbeTaskBuilder {
 public:
  TbeTaskBuilder(const std::string &model_name, const NodePtr &node, const domi::KernelDef &kernel_def);
  TbeTaskBuilder(const std::string &model_name, const NodePtr &node, const domi::TaskDef &task_def);
  ~TbeTaskBuilder() = default;

  Status BuildTask(TbeOpTask &task, const SingleOpModelParam &param);
@@ -71,9 +97,11 @@ class TbeTaskBuilder {
 private:
  Status InitTilingInfo(TbeOpTask &task);
  Status SetKernelArgs(TbeOpTask &task, const SingleOpModelParam &param, const OpDescPtr &op_desc);
  Status SetKernelWithHandleArgs(TbeOpTask &task, const SingleOpModelParam &param, const OpDescPtr &op_desc);
  Status GetSmDesc(void **sm_desc, const SingleOpModelParam &param) const;

  Status RegisterKernel(TbeOpTask &task, const SingleOpModelParam &param);
  Status RegisterKernelWithHandle(TbeOpTask &task, const SingleOpModelParam &param);
  Status DoRegisterKernel(const OpKernelBin &kernel_bin, const char *bin_file_key, void **bin_handle,
                          const SingleOpModelParam &param);
  Status DoRegisterBinary(const OpKernelBin &kernel_bin, void **bin_handle, const SingleOpModelParam &param) const;
@@ -83,8 +111,11 @@ class TbeTaskBuilder {

  const NodePtr node_;
  const OpDescPtr op_desc_;
  const domi::TaskDef &task_def_;
  const domi::KernelDef &kernel_def_;
  const domi::KernelDefWithHandle &kernel_def_with_handle_;
  const std::string stub_name_;
  void *handle_ = nullptr;
 };
 }  // namespace ge

--- a/inc/external/ge/ge_api.h
+++ b/inc/external/ge/ge_api.h
@@ -42,6 +42,10 @@ GE_FUNC_VISIBILITY Status GEInitialize(const std::map<AscendString, AscendString
 // Finalize GE, release all resources
 GE_FUNC_VISIBILITY Status GEFinalize();

 GE_FUNC_VISIBILITY std::string GEGetErrorMsg();

 GE_FUNC_VISIBILITY std::string GEGetWarningMsg();

 class GE_FUNC_VISIBILITY Session {
 public:
  ATTRIBUTED_DEPRECATED(Session(const std::map<AscendString, AscendString> &))
--- a/inc/framework/common/ge_types.h
+++ b/inc/framework/common/ge_types.h
@@ -57,9 +57,9 @@ const char *const GE_ENGINE_ATTR_MEM_TYPE_HBM = "HBM";
 const char *const GE_OPTION_EXEC_PLACEMENT = "ge.exec.placement";

 // profiling data
 const uint32_t kTaskTypeAicore = 0;
 const uint32_t kTaskTypeAicpu = 1;
 const uint32_t kTaskTypeInvalid = 0xFFFF;
 const std::string kTaskTypeAicore = "AI_CORE";
 const std::string kTaskTypeAicpu = "AI_CPU";
 const std::string kTaskTypeInvalid = "TASK_TYPE_INVALID";

 // Data cache, including data address and length
 struct DataBuffer {
@@ -251,27 +251,19 @@ struct Options {
 struct TaskDescInfo {
  std::string model_name;
  std::string op_name;
  std::string op_type;
  uint32_t block_dim;
  uint32_t task_id;
  uint32_t stream_id;
  std::string shape_type;
  int64_t cur_iter_num;
  uint32_t task_type;
 };

 // Profiling info of graph
 struct ComputeGraphDescInfo {
  std::string model_name;
  std::string op_name;
  std::string op_type;
  std::string task_type;
  std::vector<Format> input_format;
  std::vector<std::vector<int64_t>> input_shape;
  std::vector<DataType> input_data_type;
  std::vector<Format> output_format;
  std::vector<std::vector<int64_t>> output_shape;
  std::vector<DataType> output_data_type;
  uint32_t task_id;
  uint32_t stream_id;
 };

 struct OpDescInfo {
--- a/inc/framework/executor/ge_executor.h
+++ b/inc/framework/executor/ge_executor.h
@@ -260,12 +260,18 @@ class GE_FUNC_VISIBILITY GeExecutor {
  static ge::Status LoadSingleOp(const std::string &modelName, const ge::ModelData &modelData, void *stream,
                                 SingleOp **single_op);

  static ge::Status LoadSingleOpV2(const std::string &modelName, const ge::ModelData &modelData, void *stream,
                                   SingleOp **single_op, const uint64_t model_id);

  static ge::Status ExecuteAsync(SingleOp *executor, const std::vector<DataBuffer> &inputs,
                                 std::vector<DataBuffer> &outputs);

  static ge::Status LoadDynamicSingleOp(const std::string &model_name, const ge::ModelData &modelData, void *stream,
                                        DynamicSingleOp **single_op);

  static ge::Status LoadDynamicSingleOpV2(const std::string &model_name, const ge::ModelData &modelData, void *stream,
                                          DynamicSingleOp **single_op, const uint64_t model_id);

  static ge::Status ExecuteAsync(DynamicSingleOp *executor, const std::vector<GeTensorDesc> &input_desc,
                                 const std::vector<DataBuffer> &inputs, std::vector<GeTensorDesc> &output_desc,
                                 std::vector<DataBuffer> &outputs);
--- a/inc/framework/generator/generator_api.h
+++ b/inc/framework/generator/generator_api.h
@@ -55,7 +55,8 @@ typedef void *OpTensor_t;
 /// @return 0 for success / others for fail
 ///
 GE_FUNC_VISIBILITY extern Status_t OpTaskGernerator(const char *op_type, const OpTensor_t *in_tensor, int in_num,
                                 const OpTensor_t *out_tensor, int out_num, const OpAttr_t attr, const char *om_file);
                                                    const OpTensor_t *out_tensor, int out_num, const OpAttr_t attr,
                                                    const char *om_file);

 ///
 /// @ingroup ge
--- a/inc/framework/memory/memory_api.h
+++ b/inc/framework/memory/memory_api.h
@@ -52,7 +52,8 @@ GE_FUNC_VISIBILITY Status InitRdmaPool(size_t size, rtMemType_t mem_type = RT_ME
 /// \param var_info [in] host variable addr infos.
 /// \param mem_type [in] memory type for rdma pool.
 /// \return Status result of function
 GE_FUNC_VISIBILITY Status RdmaRemoteRegister(const std::vector<HostVarInfo> &var_info, rtMemType_t mem_type = RT_MEMORY_HBM);
 GE_FUNC_VISIBILITY Status RdmaRemoteRegister(const std::vector<HostVarInfo> &var_info,
                                             rtMemType_t mem_type = RT_MEMORY_HBM);

 ///
 /// \param tensor_info [in] description for tensor stored shared memory.
--- a/+ 1
+++ b/+ 1
@@ -1 +1 @@
 Subproject commit a2b80cb22a62a6757c7dd31e684ca632e0b79268
 Subproject commit 4a9bfd772cad72ff281a2e21d59b8d225a26789c
--- a/+ 1
+++ b/+ 1
@@ -1 +1 @@
 Subproject commit cfabf622b803d5957563a73652a0ce5086aab99d
 Subproject commit 86162f60807c063f7344f902e443fc99657be637
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -19,7 +19,6 @@ add_subdirectory(depends/cce)
 add_subdirectory(depends/slog)
 add_subdirectory(depends/mmpa)
 add_subdirectory(depends/runtime)
 add_subdirectory(depends/omg)
 add_subdirectory(depends/hccl)
 add_subdirectory(depends/profiler)
 add_subdirectory(depends/error_manager)
--- a/tests/depends/mmpa/CMakeLists.txt
+++ b/tests/depends/mmpa/CMakeLists.txt
@@ -29,6 +29,11 @@ include_directories(${GE_CODE_DIR}/inc/framework)
 include_directories(${GE_CODE_DIR}/metadef/inc/external)

 add_library(mmpa_stub SHARED ${SRCS})

 target_compile_options(mmpa_stub PRIVATE
    -g
 )

 target_link_libraries(mmpa_stub PRIVATE
    $<BUILD_INTERFACE:intf_pub>
    -Wl,--no-as-needed
--- a/tests/depends/mmpa/src/mmpa_stub.cc
+++ b/tests/depends/mmpa/src/mmpa_stub.cc
@@ -231,8 +231,12 @@ INT32 mmGetTimeOfDay(mmTimeval *timeVal, mmTimezone *timeZone)
 INT32 mmRealPath(const CHAR *path, CHAR *realPath, INT32 realPathLen)
 {
  INT32 ret = EN_OK;
  char *pRet = realpath(path, realPath);
  if (pRet == NULL) {
  if (path == nullptr || realPath == nullptr || realPathLen < MMPA_MAX_PATH) {
    return EN_INVALID_PARAM;
  }

  char *ptr = realpath(path, realPath);
  if (ptr == nullptr) {
    ret = EN_ERROR;
  }
  return ret;
@@ -260,7 +264,7 @@ INT32 mmDlclose(VOID *handle)

 CHAR *mmDlerror()
 {
  return "";
  return dlerror();
 }

 INT32 mmDladdr(VOID *addr, mmDlInfo *info)
--- a/tests/depends/omg/CMakeLists.txt
+++ b/tests/depends/omg/CMakeLists.txt
@@ -1,59 +0,0 @@
 # Copyright 2019-2020 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 # http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ============================================================================

 #cmake_minimum_required(VERSION 2.8)

 project(OMG_CCE)

 set(CMAKE_CXX_STANDARD 11)

 include_directories(${GE_CODE_DIR}/third_party/fwkacllib/inc)
 include_directories(${GE_CODE_DIR}/third_party/fwkacllib/inc/cce)
 include_directories(${GE_CODE_DIR}/inc)
 include_directories(${GE_CODE_DIR}/metadef/inc)
 include_directories(${GE_CODE_DIR}/inc/framework)
 include_directories(${GE_CODE_DIR}/metadef/inc/graph)
 include_directories(${GE_CODE_DIR}/inc/external)
 include_directories(${GE_CODE_DIR}/metadef/inc/external)
 include_directories(${GE_CODE_DIR}/metadef/inc/external/graph)
 include_directories(${GE_CODE_DIR}/ge)
 include_directories(${CMAKE_BINARY_DIR})
 include_directories(${CMAKE_BINARY_DIR}/proto/ge)
 set(PROTO_LIST
    "${GE_CODE_DIR}/metadef/proto/om.proto"
    "${GE_CODE_DIR}/metadef/proto/task.proto"
 )

 protobuf_generate(ge PROTO_SRCS PROTO_HDRS ${PROTO_LIST})

 set(SRCS
 #   "${GE_CODE_DIR}/src/ge/common/util.cc"
    "src/omg_stub.cc"
 )

 add_library(omg_stub SHARED ${SRCS} ${PROTO_SRCS} ${PROTO_HDRS})

 target_compile_definitions(omg_stub PRIVATE
    google=ascend_private
 )

 target_link_libraries(omg_stub PRIVATE
    $<BUILD_INTERFACE:intf_pub>
    -Wl,--no-as-needed
    ascend_protobuf
    -Wl,--as-needed
    c_sec
    json
 )
--- a/tests/depends/omg/src/omg_stub.cc
+++ b/tests/depends/omg/src/omg_stub.cc
@@ -1,878 +0,0 @@
 /**
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 #include <map>
 #include <fstream>
 #include <unordered_map>
 #include <google/protobuf/io/coded_stream.h>
 #include <google/protobuf/io/zero_copy_stream_impl.h>

 #include "mmpa/mmpa_api.h"
 #include "common/debug/log.h"
 #include "common/debug/memory_dumper.h"
 #include "common/types.h"
 #include "common/util.h"
 #include "common/string_util.h"
 #include "common/properties_manager.h"
 #include "common/model_parser/base.h"
 #include "graph/model.h"
 #include "cce/dnn.h"
 #include "ge/ge_api_types.h"
 #include "framework/common/ge_types.h"
 #include "graph/utils/op_desc_utils.h"
 #include "common/profiling/profiling_manager.h"

 using domi::domiTensorFormat_t;
 using namespace cce;
 using namespace ge;

 struct PROC_PARAM {
  uint8_t *model_name;

  // ISV Ek buffer
  uint8_t *model_key;
  uint32_t model_key_len;

  // ISV  root certificate buffer
  uint8_t *root_cert;
  uint32_t root_cert_len;

  // ISV private key buffer
  uint8_t *pri_key;
  uint32_t pri_key_len;

  // Raw AI Module Image buffer
  uint8_t *ai_image;
  uint32_t ai_image_len;

  // ISV HW key buffer
  uint8_t *hw_key;
  uint32_t hw_key_len;
 };

 #ifdef __cplusplus
 extern "C" {
 #endif
 using namespace ge;
 namespace {
 const char FMK_STATUS_FILE_DIR_ENV[] = "FMK_STATUS_FILE_DIR";
 const char JOBSTATE_FILE_NAME[] = "jobstateupdate_framework";
 const char HCOM_DETECT_FILE_NAME[] = "hcom_detection_result";
 const char FILE_SEPARATE[] = "/";
 }  // namespace

 #ifdef __cplusplus
 }
 #endif

 namespace ge {
 struct GeModelPartition {
  ModelPartitionType type_ = MODEL_DEF;
  uint8_t *data_ = nullptr;
  size_t size_ = 0;

  GeModelPartition() = default;

  GeModelPartition(const GeModelPartition &partition){};

  GeModelPartition &operator=(const GeModelPartition &partition) = delete;

  ~GeModelPartition() {
    if (data_ != nullptr) {
      delete[] data_;
      data_ = nullptr;
    }
  }

  Status SetData(uint8_t *data, size_t size) {
    size_ = size;
    data_ = new (std::nothrow) uint8_t[size]();
    errno_t err;
    err = memcpy_s(data_, size_, data, size);
    if (err) {
      GELOGE(ge::FAILED, "[GeModel Partition] Error occur when copy GeModel Partition data.");
      return FAILED;
    }
    return SUCCESS;
  }

  Status SetType(ModelPartitionType type) {
    type_ = type;
    return SUCCESS;
  }
 };
 struct OmFileContext {
  vector<GeModelPartition> partition_datas_;
  vector<char> partition_table_;
  uint32_t model_data_len_;
 };

 class SubGraphInfo;
 using SubGraphInfoPtr = std::shared_ptr<ge::SubGraphInfo>;

 using GeModelPartitionPtr = std::shared_ptr<GeModelPartition>;
 using ModelPtr = std::shared_ptr<ge::Model>;
 class GeModel {
 public:
  explicit GeModel(const ModelPtr &model_ptr);
  ~GeModel() = default;
  GeModel(const GeModel &other) = delete;
  GeModel &operator=(const GeModel &other) = delete;

  ModelPtr GetModelPtr() const;
  Status AddPartition(uint8_t *data, size_t size, ModelPartitionType type);
  Status GetPartition(ModelPartitionType type, GeModelPartitionPtr &partition);
  uint8_t GetPlatformType() const;
  void SetPlatformType(const uint8_t platform_type) { platform_type_ = platform_type; }

 private:
  std::map<ModelPartitionType, GeModelPartitionPtr> partitions_;
  ModelPtr model_ = nullptr;
  uint8_t platform_type_ = {0};
 };
 using GeModelPtr = std::shared_ptr<ge::GeModel>;

 GeModel::GeModel(const ModelPtr &model_ptr) { this->model_ = model_ptr; }

 ModelPtr GeModel::GetModelPtr() const { return this->model_; }

 uint8_t GeModel::GetPlatformType() const { return platform_type_; }

 Status GeModel::AddPartition(uint8_t *data, size_t size, ModelPartitionType type) {
  if (size == 0) {
    return FAILED;
  }

  if (data == nullptr) {
    return FAILED;
  }

  auto iter = partitions_.find(type);
  if (iter != partitions_.end()) {
    return FAILED;
  }

  GeModelPartitionPtr partition = nullptr;
  GE_MAKE_SHARED(partition = std::make_shared<ge::GeModelPartition>(), return FAILED);
  Status ret = partition->SetType(type);
  if (ret != SUCCESS) {
    return FAILED;
  }
  ret = partition->SetData(data, size);
  if (ret != SUCCESS) {
    return FAILED;
  }

  partitions_.insert(std::pair<ModelPartitionType, GeModelPartitionPtr>(type, partition));
  return SUCCESS;
 }

 Status GeModel::GetPartition(ModelPartitionType type, GeModelPartitionPtr &partition) {
  auto iter = partitions_.find(type);
  if (iter == partitions_.end()) {
    return FAILED;
  }

  partition = iter->second;
  return SUCCESS;
 }
 class OmFileSaveHelper {
 public:
  OmFileSaveHelper();
  ~OmFileSaveHelper();
  vector<GeModelPartition> &GetModelPartitions();
  ModelPartitionTable *GetPartitionTable();
  ModelFileHeader model_header_;
  ModelFileHeader &GetModelFileHeader() { return model_header_; }
  void AddPartition(GeModelPartition &partition);

 private:
  OmFileContext context_;
 };

 OmFileSaveHelper::OmFileSaveHelper() {}

 OmFileSaveHelper::~OmFileSaveHelper() {}

 vector<GeModelPartition> &OmFileSaveHelper::GetModelPartitions() {
  static std::vector<GeModelPartition> tmp;
  return tmp;
 }

 ModelPartitionTable *OmFileSaveHelper::GetPartitionTable() { return nullptr; }

 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void OmFileSaveHelper::AddPartition(GeModelPartition &partition) {
  context_.partition_datas_.push_back(partition);
  context_.model_data_len_ += partition.size_;
 }
 class ModelBuilder {
 public:
  ModelBuilder(ge::ComputeGraphPtr compute_graph, const std::vector<SubGraphInfoPtr> &subgraphs,
               const std::map<std::string, int> &stream_max_parallel_num, bool hcom_parallel, int mode);
  virtual ~ModelBuilder();
  Status BuildModel(ge::Model &model_def);
  Status SaveWeightsToModel(ge::Model &model);
  Status SaveDataToModel(ge::Model &model, ge::GeModel &ge_model);
  Status PreBuildModel();
  Status BuildModelForGetTask(ge::Model &model_def);
  ge::Buffer GetWeightBuffer() const;
  void SetModelVersion(ge::Model &model_def);

 public:
  ge::Buffer weight_buffer_;
 };

 ModelBuilder::ModelBuilder(ge::ComputeGraphPtr compute_graph, const std::vector<SubGraphInfoPtr> &subgraphs,
                           const std::map<std::string, int> &stream_max_parallel_num, bool hcom_parallel, int mode) {
  weight_buffer_ = ge::Buffer(4100000);
 }

 ModelBuilder::~ModelBuilder() {}

 Status ModelBuilder::SaveWeightsToModel(ge::Model &model) { return SUCCESS; }

 Status ModelBuilder::BuildModel(ge::Model &model_def) { return SUCCESS; }

 Status ModelBuilder::SaveDataToModel(ge::Model &model, ge::GeModel &ge_model) { return SUCCESS; }

 Status ModelBuilder::PreBuildModel() { return SUCCESS; }

 Status ModelBuilder::BuildModelForGetTask(ge::Model &model_def) { return SUCCESS; }

 void ModelBuilder::SetModelVersion(ge::Model &model_def) { return; }

 ge::Buffer ModelBuilder::GetWeightBuffer() const { return ge::Buffer(4100000); }

 }  // namespace ge

 using ProcParam = struct PROC_PARAM;

 namespace ge {
 #include <iostream>
 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t NCHW_DIM_N = 0;
 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t NCHW_DIM_C = 1;
 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t NCHW_DIM_H = 2;
 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t NCHW_DIM_W = 3;
 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t NHWC_DIM_N = 0;
 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t NHWC_DIM_H = 1;
 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t NHWC_DIM_W = 2;
 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t NHWC_DIM_C = 3;

 const uint32_t MODEL_FILE_MAGIC_NUM = 0x444F4D49;
 const uint32_t MODEL_FILE_HEAD_LEN = 256;
 const uint32_t MODEL_VERSION = 0x10000000;
 const int MAX_FILE_SIZE_LIMIT = INT_MAX;
 bool FC_WEIGHT_COMPRESS_FLAG = false;

 bool ReadBytesFromBinaryFile(const char *file_name, char **buffer, int &length) {
  length = 10;
  *buffer = new (std::nothrow) char[10]();
  GE_CHK_BOOL_TRUE_EXEC_RET_STATUS(*buffer == nullptr, false, "new an object failed.");
  return true;
 }
 bool ReadProtoFromText(const char *file, google::protobuf::Message *message) {
  GE_CHK_BOOL_TRUE_EXEC_WITH_LOG((nullptr == file || nullptr == message), return false,
                                 "incorrect parameter. nullptr == file || nullptr == message");
  string real_path = RealPath(file);
  GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(real_path.empty(), return false, "proto file path '%s' not valid", file);
  GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(GetFileLength(real_path.c_str()) == -1, return false, "file size not valid.");
  std::ifstream fs(real_path.c_str(), std::ifstream::in);

  if (!fs.is_open()) {
    GELOGE(ge::FAILED, "proto file '%s' open fail.", file);
    return false;
  }
  google::protobuf::io::IstreamInputStream input(&fs);
  bool ret = google::protobuf::TextFormat::Parse(&input, message);
  GE_IF_BOOL_EXEC(ret != true,
                  GELOGI("call [google::protobuf::TextFormat::Parse] func ret fail, please check your text file."));
  fs.close();
  return ret;
 }

 uint64_t GetCurrentTimestap() { return 0; }

 // get length of file
 long GetFileLength(const std::string &input_file) {
  GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(input_file.empty(), return -1, "input_file path is null.");
  string real_path = RealPath(input_file.c_str());

  GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(real_path.empty(), return -1, "input_file path '%s' not valid", input_file.c_str());
  unsigned long long file_length = 0;
  GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(mmGetFileSize(input_file.c_str(), &file_length) != EN_OK, return -1,
                                 "open file failed.");
  GE_CHK_BOOL_TRUE_EXEC_WITH_LOG((file_length <= 0), return -1, "file length <= 0, not valid.");
  GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(file_length > MAX_FILE_SIZE_LIMIT, return -1, "file size %llu is out of limit: %d.",
                                 file_length, MAX_FILE_SIZE_LIMIT);
  return file_length;
 }
 string RealPath(const char *path) {
  string s = path;
  if (s.size() >= PATH_MAX) {
    return "";
  }
  if (s == "." || s == "1") {
    return path;
    // for insert_aipp_op unittest
  } else if (s.substr(0, 3) == "llt") {
    return path;
  } else {
    return "22";
  }
 }

 bool CheckInputPathValid(const string &file_path) { return true; }
 bool ReadProtoFromArray(const void *data, int size, Message *proto) { return true; }

 struct ModelPartition {
  ModelPartitionType type;
  uint8_t *data = 0;
  uint32_t size = 0;
 };

 class InsertNewOpUtil {
 public:
  InsertNewOpUtil();
  ~InsertNewOpUtil();
  Status InsertNewOps(const ComputeGraphPtr &graph);
  Status InsertAippOps(ge::ComputeGraphPtr graph, std::string &aipp_config_path);
  Status Parse(const char *conf_path);
 };

 InsertNewOpUtil::InsertNewOpUtil() {}

 Status InsertNewOpUtil::InsertNewOps(const ComputeGraphPtr &graph) { return SUCCESS; }

 Status InsertNewOpUtil::InsertAippOps(ge::ComputeGraphPtr graph, std::string &aipp_config_path) { return SUCCESS; }

 Status InsertNewOpUtil::Parse(const char *conf_path) { return SUCCESS; }

 Status InitOME() { return SUCCESS; }
 class GraphOptimizer {
 public:
  Status Optimize();
  Status OptimizeAfterCal();
  Status AdjustDataOpDesc();
  Status InsertTransOp();
  Status FusionFmkop();
  Status Optimize4Cloud();
  Status Optimize4FlowCtrl();
  Status OptimizeBeforeBuild();
 };
 Status GraphOptimizer::Optimize() { return SUCCESS; }

 Status Init(Options options) { return SUCCESS; }

 Status Shutdown(Options options) { return SUCCESS; }

 class Session {
 public:
  // singleton
  static Session *Instance();
  const uint32_t &DeviceId() const;
 };

 const uint32_t &Session::DeviceId() const { return 0; }

 Session *Session::Instance() {
  static Session instance;
  return &instance;
 }
 struct OmgContext {
  domiTensorFormat_t format;

  // get input format from cmd
  std::unordered_map<std::string, domiTensorFormat_t> input_nodes_format_map;
  std::vector<domiTensorFormat_t> output_formats;

  // user-designate input dims
  std::vector<std::pair<std::string, std::vector<int64_t>>> user_input_dims;
  // global input dims
  std::map<std::string, std::vector<int64_t>> input_dims;

  // solve rename op e.g: Detectionoutput:SsdDetectiontOutput
  std::map<std::string, std::string> op_conf_map;
  // save output node of network: key is op name, value = index, index is the output index of op
  std::map<std::string, std::vector<int32_t>> out_nodes_map;
  // user-designate out nodes (this is used for determing the orders)
  std::vector<std::pair<std::string, int32_t>> user_out_nodes;
  // save the path of cutsom_aicpu
  std::vector<std::string> aicpu_op_run_paths;
  // save ddk
  std::string ddk_version;
  // save format
  domiTensorFormat_t net_format;

  FrameworkType type;
  // RunMode run_mode;
  bool train_flag = false;

  std::string output_type;

  /// save the name of network
  /// eg:faster-rcnn, based on FirstStageProcessor after scope_fusion is faster-rcnn
  /// then reorder conv+reshape of FirstStageBoxPredictor/BoxEncodingPredictor
  /// need to delete op of reshape
  std::string net_name;
 };
 }  // namespace ge

 namespace domi {
 ge::OmgContext &GetContext() {
  static ge::OmgContext tmp;
  return tmp;
 }
 }  // namespace domi

 namespace ge {
 class OpUtils {
 public:
  static Status InitTensorDescriptor(const GeTensorDesc &tensor, ccTensorDescriptor_t &cc_tensor);
  static Status InitTensorDescriptor(int32_t format, int32_t data_type, const std::vector<int64_t> &dim,
                                     ccTensorDescriptor_t &cc_tensor, uint32_t real_dim_cnt);
  static void DestroyTensorDescriptor(ccTensorDescriptor_t &cc_tensor);
 };
 Status OpUtils::InitTensorDescriptor(const GeTensorDesc &tensor, ccTensorDescriptor_t &cc_tensor) {
  ccCreatePoolingMaskDescriptor(&cc_tensor);
  return SUCCESS;
 }
 Status OpUtils::InitTensorDescriptor(int32_t format, int32_t data_type, const std::vector<int64_t> &dim,
                                     ccTensorDescriptor_t &cc_tensor, uint32_t real_dim_cnt) {
  Status ret = SUCCESS;
  return ret;
 }

 class FileSaver {
 public:
  Status SaveToFile(const string &file_path, ModelFileHeader &model_file_header,
                    ModelPartitionTable &model_partition_table, const std::vector<ModelPartition> &partition_datas);
  Status SaveToFileWithEncrypt(const std::string file_path, const ProcParam proc_param,
                               const ModelFileHeader *model_file_header, bool check_sum);
 };

 Status FileSaver::SaveToFile(const string &file_path, ModelFileHeader &model_file_header,
                             ModelPartitionTable &model_partition_table,
                             const std::vector<ModelPartition> &partition_datas) {
  return SUCCESS;
 }

 Status FileSaver::SaveToFileWithEncrypt(const std::string file_path, const ProcParam proc_param,
                                        const ModelFileHeader *model_file_header, bool check_sum) {
  return SUCCESS;
 }

 class ModelSaver : public FileSaver {};

 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void OpUtils::DestroyTensorDescriptor(
  ccTensorDescriptor_t &cc_tensor) {
  if (nullptr != cc_tensor) {
    ccStatus_t ret = ccDestroyTensorDescriptor(&cc_tensor);
    GE_LOGE_IF(CC_STATUS_SUCCESS != ret, "ccDestroyTensorDescriptor failed. ret = %d", ret);
    cc_tensor = nullptr;
  }
 }

 }  // namespace ge

 namespace domi {
 class OpRegistrationData {};

 class OpRegistry {
 public:
  static OpRegistry *Instance();
  std::vector<OpRegistrationData> registration_datas;

  ImplyType GetImplyType(const std::string &op_type);
  void GetOpTypeByImplyType(std::vector<std::string> &vec_op_type, const ImplyType &imply_type);
 };

 OpRegistry *OpRegistry::Instance() {
  static OpRegistry instance;
  return &instance;
 }

 void OpRegistry::GetOpTypeByImplyType(std::vector<std::string> &vec_op_type, const ImplyType &imply_type) {
  if (imply_type == ImplyType::AI_CPU) {
    vec_op_type.push_back("square");
  }
 }

 class OpRegistrationTbe {
 public:
  static OpRegistrationTbe *Instance();

  bool Finalize(OpRegistrationData &reg_data, bool is_train);
 };

 OpRegistrationTbe *OpRegistrationTbe::Instance() {
  static OpRegistrationTbe instance;
  return &instance;
 }

 bool OpRegistrationTbe::Finalize(OpRegistrationData &reg_data, bool is_train) { return true; }
 }  // namespace domi

 namespace ge {
 class GraphPrepare {
 private:
  Status OptimizeForPreprocess(ge::ComputeGraphPtr &compute_graph);
 };

 Status GraphPrepare::OptimizeForPreprocess(ge::ComputeGraphPtr &compute_graph) { return SUCCESS; }
 }  // namespace ge

 namespace ge {

 Status GetOriginalType(const ge::NodePtr &node, string &type) {
  type = node->GetType();
  GE_IF_BOOL_EXEC(type != FRAMEWORKOP, return SUCCESS);
  ge::AttrUtils::GetStr(node->GetOpDesc(), "original_type", type);
  return SUCCESS;
 }

 Status SetCycleEvent(const ge::NodePtr &node) { return SUCCESS; }

 Status SetStreamLabel(const ge::NodePtr &node, const std::string &label) {
  GE_CHECK_NOTNULL(node);
  OpDescPtr tmp_desc = AttrUtils::CloneOpDesc(node->GetOpDesc());
  GE_CHECK_NOTNULL(tmp_desc);

  if (!AttrUtils::SetStr(tmp_desc, "_stream_label", label)) {
    GELOGE(ge::FAILED, "Op :%s set ATTR_NAME_STREAM_LABEL failed", node->GetName().c_str());
    return FAILED;
  }
  return SUCCESS;
 }

 Status SetActiveLabelList(const ge::NodePtr &node, const std::vector<std::string> &label) {
  GE_CHECK_NOTNULL(node);
  OpDescPtr tmp_desc = node->GetOpDesc();
  GE_CHECK_NOTNULL(tmp_desc);
  // add list of active_label
  if (!AttrUtils::SetListStr(tmp_desc, "_active_label", label)) {
    GELOGE(ge::FAILED, "Op: %s set ATTR_NAME_ACTIVE_LABEL_LIST failed", node->GetName().c_str());
    return FAILED;
  }
  return SUCCESS;
 }

 Status SetSwitchBranchNodeLabel(const ge::NodePtr &node, const std::string &branch_label) {
  GE_CHECK_NOTNULL(node);
  OpDescPtr tmp_desc = node->GetOpDesc();
  GE_CHECK_NOTNULL(tmp_desc);
  // add branch_label of switch
  if (!AttrUtils::SetStr(tmp_desc, "_switch_branch_node_label", branch_label)) {
    GELOGE(ge::FAILED, "Op :%s set ATTR_NAME_SWITCH_BRANCH_NODE_LABEL failed", node->GetName().c_str());
    return FAILED;
  }
  return SUCCESS;
 }

 Status SetSwitchTrueBranchFlag(const ge::NodePtr &node, bool value) {
  GE_CHECK_NOTNULL(node);
  OpDescPtr tmp_desc = node->GetOpDesc();
  GE_CHECK_NOTNULL(tmp_desc);
  // add switch_true_branch_flag
  if (!AttrUtils::SetBool(tmp_desc, "_switch_true_branch_flag", value)) {
    GELOGE(ge::FAILED, "Op :%s set ATTR_NAME_SWITCH_TRUE_BRANCH_FLAG failed", node->GetName().c_str());
    return FAILED;
  }
  return SUCCESS;
 }

 Status SetOriginalNodeName(const ge::NodePtr &node, const std::string &orig_name) {
  GE_CHECK_NOTNULL(node);
  OpDescPtr tmp_desc = node->GetOpDesc();
  GE_CHECK_NOTNULL(tmp_desc);
  // record original_node_name
  if (!AttrUtils::SetStr(tmp_desc, "_original_node_name", orig_name)) {
    GELOGE(ge::FAILED, "Op :%s set ATTR_NAME_ORIG_NODE_NAME failed", node->GetName().c_str());
    return FAILED;
  }
  return SUCCESS;
 }

 Status SetCyclicDependenceFlag(const ge::NodePtr &node) {
  GE_CHECK_NOTNULL(node);
  OpDescPtr tmp_desc = node->GetOpDesc();
  GE_CHECK_NOTNULL(tmp_desc);
  // add cyclic_dependence_flag
  if (!AttrUtils::SetBool(tmp_desc, "_cyclic_dependence_flag", true)) {
    GELOGE(ge::FAILED, "Op :%s set ATTR_NAME_CYCLIC_DEPENDENCE_FLAG failed", node->GetName().c_str());
    return FAILED;
  }
  return SUCCESS;
 }

 Status SetNextIteration(const ge::NodePtr &node, const std::string &next) {
  GE_CHECK_NOTNULL(node);
  OpDescPtr tmp_desc = node->GetOpDesc();
  GE_CHECK_NOTNULL(tmp_desc);

  if (!AttrUtils::SetStr(tmp_desc, "_next_iteration_node", next)) {
    GELOGE(ge::FAILED, "Op: %s set ATTR_NAME_NEXT_ITERATION failed", node->GetName().c_str());
    return FAILED;
  }
  return SUCCESS;
 }
 }  // namespace ge

 namespace cce {
 bool ccGetFuncState(ccFuncParamType_t type) { return true; }
 }  // namespace cce

 namespace ge {
 Status UnloadModel(uint32_t model_id) { return SUCCESS; }

 Status GetInputOutputDescInfo(uint32_t model_id, vector<InputOutputDescInfo> &input_desc,
                              vector<InputOutputDescInfo> &output_desc) {
  return SUCCESS;
 }

 Status DataInput(const InputData *input_data, OutputData *output_data) { return SUCCESS; }
 /*
 class ModelManager {
 public:
  static std::shared_ptr<ModelManager> GetInstance();
  static void FinalizeForPtr(ModelManager *) {}
  Status DataInputTensor(uint32_t model_id, const std::vector<ge::TensorInfo> &inputs,
                         std::vector<ge::TensorInfo> &outputs);
  Status DataInput(const InputData &input_data, OutputData &output_data);
  Status GetInputOutputDescInfo(const uint32_t model_id, std::vector<InputOutputDescInfo> &input_desc,
                                std::vector<InputOutputDescInfo> &output_desc);
  Status GetInputOutputDescInfo(const uint32_t model_id, std::vector<InputOutputDescInfo> &input_desc,
                                std::vector<InputOutputDescInfo> &output_desc, std::vector<uint32_t> &input_formats,
                                std::vector<uint32_t> &output_formats);
  Status GetInputOutputDescInfoForZeroCopy(const uint32_t model_id, std::vector<InputOutputDescInfo> &input_desc,
                                           std::vector<InputOutputDescInfo> &output_desc,
                                           std::vector<uint32_t> &input_formats, std::vector<uint32_t> &output_formats);
  Status Stop(uint32_t model_id);
  Status Unload(uint32_t model_id);
  Status LoadModelOnline(uint32_t &model_id, std::shared_ptr<ge::Model> &model,
                         std::shared_ptr<ModelListener> listener);
  Status Start(uint32_t model_id);
  Status GetMaxUsedMemory(const uint32_t model_id, uint64_t &max_size);
  Status LoadModelOffline(uint32_t &model_id, const ModelData &model, std::shared_ptr<ModelListener> listener = nullptr,
                          void *dev_ptr = nullptr, size_t mem_size = 0, void *weight_ptr = nullptr,
                          size_t weight_size = 0);
  Status LoadModelWithQ(uint32_t &model_id, const ModelData &model_data, const std::vector<uint32_t> &input_queue_ids,
                        const std::vector<uint32_t> &output_queue_ids);

  Status HandleCommand(const Command &command);
  Status ExecuteModel(uint32_t model_id, rtStream_t stream, bool async_mode, const InputData &input_data,
                      OutputData &output_data);
  void DestroyAicpuSession(uint64_t session_id);
 };
 void ModelManager::DestroyAicpuSession(uint64_t session_id) {}
 std::shared_ptr<ModelManager> ModelManager::GetInstance() {
  static std::shared_ptr<ModelManager> instance_ptr =
    shared_ptr<ModelManager>(new ModelManager(), ModelManager::FinalizeForPtr);
  return instance_ptr;
 }

 Status ModelManager::DataInputTensor(uint32_t model_id, const std::vector<ge::TensorInfo> &inputs,
                                     std::vector<ge::TensorInfo> &outputs) {
  return SUCCESS;
 }

 Status ModelManager::DataInput(const InputData &input_data, OutputData &output_data) { return SUCCESS; }

 Status ModelManager::GetInputOutputDescInfo(const uint32_t model_id, std::vector<InputOutputDescInfo> &input_desc,
                                            std::vector<InputOutputDescInfo> &output_desc,
                                            std::vector<uint32_t> &input_formats,
                                            std::vector<uint32_t> &output_formats) {
  return SUCCESS;
 }

 Status ModelManager::GetInputOutputDescInfo(const uint32_t model_id, std::vector<InputOutputDescInfo> &input_desc,
                                            std::vector<InputOutputDescInfo> &output_desc) {
  return SUCCESS;
 }

 Status ModelManager::GetInputOutputDescInfoForZeroCopy(const uint32_t model_id,
                                                       std::vector<InputOutputDescInfo> &input_desc,
                                                       std::vector<InputOutputDescInfo> &output_desc,
                                                       std::vector<uint32_t> &input_formats,
                                                       std::vector<uint32_t> &output_formats) {
  return SUCCESS;
 }

 Status ModelManager::Stop(uint32_t model_id) { return SUCCESS; }

 Status ModelManager::Unload(uint32_t model_id) { return SUCCESS; }

 Status ModelManager::LoadModelOnline(uint32_t &model_id, std::shared_ptr<ge::Model> &model,
                                     std::shared_ptr<ModelListener> listener) {
  return SUCCESS;
 }

 Status ModelManager::Start(uint32_t model_id) { return SUCCESS; }

 Status ModelManager::GetMaxUsedMemory(const uint32_t model_id, uint64_t &max_size) { return SUCCESS; }

 Status ModelManager::LoadModelOffline(uint32_t &model_id, const ModelData &model, shared_ptr<ModelListener> listener,
                                      void *dev_ptr, size_t mem_size, void *weight_ptr, size_t weight_size) {
  return SUCCESS;
 }

 Status ModelManager::LoadModelWithQ(uint32_t &model_id, const ModelData &model_data,
                                    const std::vector<uint32_t> &input_queue_ids,
                                    const std::vector<uint32_t> &output_queue_ids) {
  return SUCCESS;
 }

 Status ModelManager::HandleCommand(const Command &command) { return SUCCESS; }

 Status ModelManager::ExecuteModel(uint32_t model_id, rtStream_t stream, bool async_mode, const InputData &input_data,
                                  OutputData &output_data) {
  return SUCCESS;
 }

 */

 }  // namespace ge

 namespace ge {

 enum JobState {
  JOBSTATE_WAITING = 1,
  JOBSTATE_RUNNING,
  JOBSTATE_KILLING,
  JOBSTATE_SUCCEED,
  JOBSTATE_FAILED,
  JOBSTATE_KILLED,
  JOBSTATE_UNKOWN
 };

 enum JobSubState {
  JOBSUBSTATE_ENV_INIT = 201,
  JOBSUBSTATE_ENV_FIN,
  JOBSUBSTATE_RESOUCE_ALLOC,
  JOBSUBSTATE_MODEL_COMPILE,
  JOBSUBSTATE_GRAPH_PREPARE,
  JOBSUBSTATE_GRAPH_SPLIT,
  JOBSUBSTATE_GRAPH_OPTIMIZE,
  JOBSUBSTATE_GRAPH_BUILD,
  JOBSUBSTATE_GRAPH_LOAD,
  JOBSUBSTATE_GRAPH_EXEC,
  JOBSUBSTATE_GRAPH_UNLOAD,
  JOBSUBSTATE_OTHER
 };

 enum ErrorModule {
  ERROR_MODULE_DRIVER = 0x01,
  ERROR_MODULE_RUNTIME = 0x04,
  ERROR_MODULE_CCE = 0x06,
  ERROR_MODULE_FMK = 0x08,
  ERROR_MODULE_HCCL = 0x12
 };

 class CsaInteract {
 public:
  CsaInteract &GetInstance();
  void WriteErrorCode(uint32_t module_ret_errcode, ErrorModule error_module, JobSubState job_sub_state);
  void Init(int32_t dev_index, int64_t job_id);
  Status WriteJobState(JobState job_state, JobSubState job_sub_state = JOBSUBSTATE_OTHER,
                       uint32_t module_ret_errcode = SUCCESS, ErrorModule error_module = ERROR_MODULE_FMK);
  // device index
  int32_t dev_index_;
  // job id
  int64_t job_id_;
  // is initialization complete
  bool is_init_;
  // current job state
  JobState curr_state_;
  // job state file
  std::string job_state_file_;
  // network connectivity detect file
  std::string hcom_detect_file_;
  // identification of internal errors that occurred during the training
  bool is_have_internal_error_;
 };

 CsaInteract &CsaInteract::GetInstance() {
  static CsaInteract instance;
  return instance;
 }

 void CsaInteract::Init(int32_t dev_index, int64_t job_id) {
  if (!is_init_) {
    dev_index_ = dev_index;
    job_id_ = job_id;
    string csa_path_prefix;
    if (std::getenv(FMK_STATUS_FILE_DIR_ENV) != nullptr) {
      csa_path_prefix = std::getenv(FMK_STATUS_FILE_DIR_ENV);
    }
    if (!csa_path_prefix.empty()) {
      std::string job_state_file = csa_path_prefix + std::to_string(dev_index_) + FILE_SEPARATE + JOBSTATE_FILE_NAME;
      std::string hcom_detect_file =
        csa_path_prefix + std::to_string(dev_index_) + FILE_SEPARATE + HCOM_DETECT_FILE_NAME;
      job_state_file_ = RealPath(job_state_file.c_str());
      hcom_detect_file_ = RealPath(hcom_detect_file.c_str());
    }
    is_init_ = true;
  }
 }

 void CsaInteract::WriteErrorCode(uint32_t module_ret_errcode, ErrorModule error_module, JobSubState job_sub_state) {}

 }  // namespace ge

 Status ModelParserBase::LoadFromFile(const char *model_path, const char *key, int32_t priority,
                                     ge::ModelData &model_data) {
  return SUCCESS;
 }

 Status CsaInteract::WriteJobState(JobState job_state, JobSubState job_sub_state, uint32_t module_ret_errcode,
                                  ErrorModule error_module) {
  return SUCCESS;
 }

 namespace ge {

 static std::map<ge::DataType, uint32_t> data_type_to_length = {
  {DT_BOOL, sizeof(bool)},     {DT_INT64, sizeof(int64_t)},  {DT_UINT64, sizeof(int64_t)},  {DT_FLOAT, sizeof(float)},
  {DT_INT32, sizeof(int32_t)}, {DT_UINT32, sizeof(int32_t)}, {DT_INT8, sizeof(char)},       {DT_UINT8, sizeof(char)},
  {DT_INT16, sizeof(int16_t)}, {DT_UINT16, sizeof(int16_t)}, {DT_FLOAT16, sizeof(int16_t)}, {DT_DOUBLE, sizeof(double)},
 };

 class TypeUtils {
 public:
  static bool GetDataTypeLength(ge::DataType data_type, uint32_t &length);
  static bool CheckUint64MulOverflow(uint64_t a, uint32_t b);
 };

 bool TypeUtils::GetDataTypeLength(ge::DataType data_type, uint32_t &length) {
  auto it = data_type_to_length.find(data_type);
  if (it != data_type_to_length.end()) {
    length = it->second;
    return true;
  } else {
    return false;
  }
 }

 bool TypeUtils::CheckUint64MulOverflow(uint64_t a, uint32_t b) {
  // Not overflow
  if (a == 0) {
    return false;
  }
  if ((ULLONG_MAX / a) >= b) {
    return false;
  }
  return true;
 }
 }  // namespace ge
--- a/tests/depends/runtime/src/runtime_stub.cc
+++ b/tests/depends/runtime/src/runtime_stub.cc
@@ -27,8 +27,8 @@ rtError_t rtGetStreamId(rtStream_t stream, int32_t *stream_id) {
 }

 rtError_t rtCtxGetCurrent(rtContext_t *ctx) {
  int x = 1;
  *ctx = (void *)x;
  uintptr_t x = 1;
  *ctx = (rtContext_t *)x;
  return RT_ERROR_NONE;
 }

@@ -131,8 +131,15 @@ rtError_t rtFunctionRegister(void *bin_handle, const void *stub_func, const char

 rtError_t rtDevBinaryRegister(const rtDevBinary_t *bin, void **handle) { return RT_ERROR_NONE; }

 rtError_t rtRegisterAllKernel(const rtDevBinary_t *bin, void **handle) { return RT_ERROR_NONE; }

 rtError_t rtKernelConfigTransArg(const void *ptr, uint64_t size, uint32_t flag, void **arg) { return RT_ERROR_NONE; }

 rtError_t rtKernelLaunchWithHandle(void *handle, const void *devFunc, uint32_t blockDim, void *args, uint32_t argsSize,
                                   rtSmDesc_t *smDesc, rtStream_t stream, const void *kernelInfo) {
  return RT_ERROR_NONE;
 }

 rtError_t rtKernelLaunch(const void *stub_func, uint32_t block_dim, void *args, uint32_t args_size, rtSmDesc_t *sm_desc,
                         rtStream_t stream) {
  return RT_ERROR_NONE;
@@ -156,7 +163,7 @@ rtError_t rtSetKernelReportCallback(rtKernelReportCallback callback) {
  rt_kernel_info.module_addr = (void *)100;
  rt_kernel_info.module_size = 100;

  rtStream_t stream;
  rtStream_t stream = nullptr;
  callback(stream, &rt_kernel_info);
  return RT_ERROR_NONE;
 }
@@ -193,7 +200,8 @@ rtError_t rtModelCreate(rtModel_t *model, uint32_t flag) {
 }

 rtError_t rtModelDestroy(rtModel_t model) {
  delete model;
  uint32_t *stub = static_cast<uint32_t *>(model);
  delete stub;
  return RT_ERROR_NONE;
 }

--- a/tests/ut/ge/CMakeLists.txt
+++ b/tests/ut/ge/CMakeLists.txt
@@ -18,23 +18,23 @@ project(ut_ge)
 set(CMAKE_CXX_STANDARD 11)

 set(PROTO_LIST
        "${GE_CODE_DIR}/metadef/proto/om.proto"
        "${GE_CODE_DIR}/metadef/proto/ge_ir.proto"
        "${GE_CODE_DIR}/metadef/proto/ge_api.proto"
        "${GE_CODE_DIR}/metadef/proto/insert_op.proto"
        "${GE_CODE_DIR}/metadef/proto/dump_task.proto"
        "${GE_CODE_DIR}/metadef/proto/fwk_adapter.proto"
        "${GE_CODE_DIR}/metadef/proto/op_mapping_info.proto"
        "${GE_CODE_DIR}/metadef/proto/optimizer_priority.proto"
        "${GE_CODE_DIR}/metadef/proto/ge_api.proto"
        "${GE_CODE_DIR}/metadef/proto/tensorflow/attr_value.proto"
        "${GE_CODE_DIR}/metadef/proto/tensorflow/tensor.proto"
        "${GE_CODE_DIR}/metadef/proto/tensorflow/resource_handle.proto"
        "${GE_CODE_DIR}/metadef/proto/tensorflow/tensor_shape.proto"
        "${GE_CODE_DIR}/metadef/proto/tensorflow/types.proto"
        "${GE_CODE_DIR}/metadef/proto/tensorflow/node_def.proto"
        "${GE_CODE_DIR}/metadef/proto/proto_inner/ge_onnx.proto"
        )
    "${GE_CODE_DIR}/metadef/proto/om.proto"
    "${GE_CODE_DIR}/metadef/proto/ge_ir.proto"
    "${GE_CODE_DIR}/metadef/proto/ge_api.proto"
    "${GE_CODE_DIR}/metadef/proto/insert_op.proto"
    "${GE_CODE_DIR}/metadef/proto/dump_task.proto"
    "${GE_CODE_DIR}/metadef/proto/fwk_adapter.proto"
    "${GE_CODE_DIR}/metadef/proto/op_mapping_info.proto"
    "${GE_CODE_DIR}/metadef/proto/optimizer_priority.proto"
    "${GE_CODE_DIR}/metadef/proto/ge_api.proto"
    "${GE_CODE_DIR}/metadef/proto/tensorflow/attr_value.proto"
    "${GE_CODE_DIR}/metadef/proto/tensorflow/tensor.proto"
    "${GE_CODE_DIR}/metadef/proto/tensorflow/resource_handle.proto"
    "${GE_CODE_DIR}/metadef/proto/tensorflow/tensor_shape.proto"
    "${GE_CODE_DIR}/metadef/proto/tensorflow/types.proto"
    "${GE_CODE_DIR}/metadef/proto/tensorflow/node_def.proto"
    "${GE_CODE_DIR}/metadef/proto/proto_inner/ge_onnx.proto"
    )

 protobuf_generate(ge PROTO_SRCS PROTO_HDRS ${PROTO_LIST})

@@ -135,6 +135,7 @@ set(COMMON_SRC_FILES
    "${GE_CODE_DIR}/ge/common/types.cc"
    "${GE_CODE_DIR}/ge/common/fmk_error_codes.cc"
    "${GE_CODE_DIR}/ge/common/op/ge_op_utils.cc"
    "${GE_CODE_DIR}/ge/common/context/ctx.cc"
    "${GE_CODE_DIR}/ge/graph/manager/util/variable_accelerate_ctrl.cc"
    "${GE_CODE_DIR}/ge/opskernel_manager/ops_kernel_manager.cc"
    "${GE_CODE_DIR}/ge/generator/ge_generator.cc"
@@ -163,7 +164,7 @@ set(COMMON_SRC_FILES
    "${GE_CODE_DIR}/ge/common/dump/dump_manager.cc"
    "${GE_CODE_DIR}/ge/common/helper/om_file_helper.cc"
    "${GE_CODE_DIR}/ge/model/ge_root_model.cc"
    "${GE_CODE_DIR}/ge/common/model_parser/base.cc"
    "${GE_CODE_DIR}/ge/common/model_parser/model_parser.cc"
    "${GE_CODE_DIR}/ge/graph/load/model_manager/data_dumper.cc"
    "${GE_CODE_DIR}/ge/graph/manager/graph_manager.cc"
    "${GE_CODE_DIR}/ge/common/dump/dump_server.cc"
@@ -266,8 +267,8 @@ set(COMMON_SRC_FILES
    "${GE_CODE_DIR}/ge/graph/passes/hccl_group_pass.cc"
    "${GE_CODE_DIR}/ge/graph/passes/memcpy_addr_async_pass.cc"
    "${GE_CODE_DIR}/ge/graph/passes/set_input_output_offset_pass.cc"
 	"${GE_CODE_DIR}/ge/graph/passes/remove_same_const_pass.cc"
 	"${GE_CODE_DIR}/ge/graph/passes/useless_control_out_remove_pass.cc"
    "${GE_CODE_DIR}/ge/graph/passes/remove_same_const_pass.cc"
    "${GE_CODE_DIR}/ge/graph/passes/useless_control_out_remove_pass.cc"
    "${GE_CODE_DIR}/ge/model/ge_model.cc"
    "${GE_CODE_DIR}/ge/common/cust_aicpu_kernel_store.cc"
    "${GE_CODE_DIR}/ge/graph/load/model_manager/model_utils.cc"
@@ -393,14 +394,13 @@ set(DISTINCT_GRAPH_LOAD_SRC_FILES
    "${GE_CODE_DIR}/ge/graph/manager/util/debug.cc"
    "${GE_CODE_DIR}/ge/common/properties_manager.cc"
    "${GE_CODE_DIR}/ge/common/profiling/profiling_manager.cc"
    "${GE_CODE_DIR}/ge/common/model_parser/base.cc"
    "${GE_CODE_DIR}/ge/common/model_parser/model_parser.cc"
    "${GE_CODE_DIR}/ge/common/tbe_kernel_store.cc"
    "${GE_CODE_DIR}/ge/common/util.cc"
    "${GE_CODE_DIR}/ge/graph/load/model_manager/cpu_queue_schedule.cc"
    "${GE_CODE_DIR}/ge/graph/load/model_manager/data_dumper.cc"
    "${GE_CODE_DIR}/ge/graph/load/model_manager/data_inputer.cc"
    "${GE_CODE_DIR}/ge/graph/load/model_manager/davinci_model.cc"
    "${GE_CODE_DIR}/ge/graph/load/model_manager/davinci_model_parser.cc"
    "${GE_CODE_DIR}/ge/graph/load/model_manager/model_manager.cc"
    "${GE_CODE_DIR}/ge/graph/load/model_manager/model_utils.cc"
    "${GE_CODE_DIR}/ge/graph/load/model_manager/tbe_handle_store.cc"
@@ -458,7 +458,7 @@ set(GRAPH_BUILD_COMMON_SRC_FILES
    "${GE_CODE_DIR}/ge/common/helper/om_file_helper.cc"
    "${GE_CODE_DIR}/ge/common/tbe_kernel_store.cc"
    "${GE_CODE_DIR}/ge/common/thread_pool.cc"
    "${GE_CODE_DIR}/ge/common/model_parser/base.cc"
    "${GE_CODE_DIR}/ge/common/model_parser/model_parser.cc"
    "${GE_CODE_DIR}/ge/graph/build/run_context.cc"
    "${GE_CODE_DIR}/ge/graph/common/local_context.cc"
 )
@@ -627,7 +627,7 @@ set(DISTINCT_GRAPH_LOAD_TEST_FILES
    #"graph/load/new_model_manager_davinci_model_unittest.cc"
    "graph/load/model_manager_unittest.cc"
    #"graph/load/new_model_manager_task_build_unittest.cc"
 	"graph/load/new_model_manager_model_manager_aicpu_unittest.cc"
    "graph/load/new_model_manager_model_manager_aicpu_unittest.cc"
    "graph/load/end_graph_task_unittest.cc"
    "graph/load/new_model_manager_event_manager_unittest.cc"
    #"graph/load/output_net_output_unittest.cc"
@@ -638,7 +638,7 @@ set(DISTINCT_GRAPH_LOAD_TEST_FILES
    "graph/load/kernel_task_info_unittest.cc"
    "graph/load/memcpy_addr_async_task_info_unittest.cc"
    "graph/load/memcpy_async_task_info_unittest.cc"
 	"graph/load/cpu_queue_schedule_unittest.cc"
    "graph/load/cpu_queue_schedule_unittest.cc"
    #"graph/graph_load_unittest.cc"
    "graph/ge_executor_unittest.cc"
    "graph/load/model_helper_unittest.cc"
@@ -671,7 +671,7 @@ set(PASS_TEST_FILES
    "graph/passes/trans_op_depth_fusion_pass_unittest.cc"
    "graph/passes/transop_nearby_allreduce_fusion_pass_unittest.cc"
    "graph/passes/constant_folding_pass_unittest.cc"
 	"graph/passes/fuse_data_nodes_with_common_input_pass_unittest.cc"
    "graph/passes/fuse_data_nodes_with_common_input_pass_unittest.cc"
    "graph/passes/stop_gradient_pass_unittest.cc"
    "graph/passes/prevent_gradient_pass_unittest.cc"
    "graph/passes/identity_pass_unittest.cc"
@@ -752,25 +752,38 @@ set(MULTI_PARTS_TEST_FILES
    "graph/build/mem_assigner_unittest.cc"
    "graph/preprocess/graph_preprocess_unittest.cc"
    "graph/manager/hcom_util_unittest.cc"
    "graph/manager/graph_caching_allocator_unittest.cc"
    "session/omg_omg_unittest.cc"
 )

 set(GENERATOR_TEST_FILES
    "generator/ge_generator_unittest.cc"
 )

 set(EXECUTOR_TEST_FILES
    "executor/ge_executor_unittest.cc"
 )

 set(SINGLE_OP_TEST_FILES
    #"single_op/single_op_model_unittest.cc"
    "single_op/single_op_model_unittest.cc"
    "single_op/single_op_manager_unittest.cc"
    "single_op/stream_resource_unittest.cc"
    "single_op/single_op_task_unittest.cc"
 )

 set(PROFILING_MNG_TEST_FILES
    "profiling/ge_profiling_manager_unittest.cc"
 )

 set(HYBRID_TEST_FILES
    "hybrid/ge_hybrid_unittest.cc"
 )

 set(OTHERS_TEST_FILES
    "plugin_manager/ge_util_unittest.cc"
 )

 list(APPEND COMMON_SHARED_LIBRARIES
    omg_stub
    c_sec
    slog_stub
    cce_ge_stub
@@ -1055,10 +1068,13 @@ target_link_libraries(ut_libge_kernel_utest
 # libge_distinct_load_utest
 add_executable(ut_libge_distinct_load_utest
        ${COMMON_TEST_FILES}
        ${GENERATOR_TEST_FILES}
        ${EXECUTOR_TEST_FILES}
        ${DISTINCT_GRAPH_LOAD_TEST_FILES}
        ${DISTINCT_GRAPH_LOAD_SRC_FILES}
        ${SINGLE_OP_TEST_FILES}
        ${PROFILING_MNG_TEST_FILES}
        ${HYBRID_TEST_FILES}
 )

 target_compile_options(ut_libge_distinct_load_utest PRIVATE
--- a/tests/ut/ge/executor/ge_executor_unittest.cc
+++ b/tests/ut/ge/executor/ge_executor_unittest.cc
@@ -0,0 +1,42 @@
 /**
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 #include <gtest/gtest.h>

 #define private public
 #define protected public
 #include "executor/ge_executor.h"
 #include "graph/utils/tensor_utils.h"

 using namespace std;

 namespace ge {
 class UtestGeExecutor : public testing::Test {
 protected:
  void SetUp() {}

  void TearDown() {}
 };

 TEST_F(UtestGeExecutor, test_single_op_exec) {
  GeExecutor exeutor;
  ModelData model_data;
  string model_name = "1234";

  EXPECT_EQ(exeutor.LoadSingleOp(model_name, model_data, nullptr, nullptr), ACL_ERROR_GE_INTERNAL_ERROR);
  EXPECT_EQ(exeutor.LoadDynamicSingleOp(model_name, model_data, nullptr, nullptr), PARAM_INVALID);
 }
 }  // namespace ge
--- a/tests/ut/ge/generator/ge_generator_unittest.cc
+++ b/tests/ut/ge/generator/ge_generator_unittest.cc
@@ -0,0 +1,78 @@
 /**
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 #include <gtest/gtest.h>

 #define private public
 #define protected public
 #include "generator/ge_generator.h"
 #include "graph/utils/tensor_utils.h"

 using namespace std;

 namespace ge {
 class UtestGeGenerator : public testing::Test {
 protected:
  void SetUp() {}

  void TearDown() {}
 };

 TEST_F(UtestGeGenerator, test_build_single_op_offline) {
  GeTensorDesc tensor_desc(GeShape(), FORMAT_NCHW, DT_FLOAT);
  TensorUtils::SetSize(tensor_desc, 512);

  shared_ptr<OpDesc> op_desc = make_shared<OpDesc>("Add", "add");
  EXPECT_EQ(op_desc->AddInputDesc(tensor_desc), GRAPH_SUCCESS);
  EXPECT_EQ(op_desc->AddInputDesc(tensor_desc), GRAPH_SUCCESS);
  EXPECT_EQ(op_desc->AddOutputDesc(tensor_desc), GRAPH_SUCCESS);

  GeTensor tensor(tensor_desc);
  const vector<GeTensor> inputs = { tensor, tensor };
  const vector<GeTensor> outputs = { tensor };

  // not Initialize, impl is null.
  GeGenerator generator;
  EXPECT_EQ(generator.BuildSingleOpModel(op_desc, inputs, outputs, "offline_"), PARAM_INVALID);

  // const map<string, string> &options
  generator.Initialize({});
  EXPECT_EQ(generator.BuildSingleOpModel(op_desc, inputs, outputs, "offline_"), GE_GENERATOR_GRAPH_MANAGER_BUILD_GRAPH_FAILED);
 }

 /*
 TEST_F(UtestGeGenerator, test_build_single_op_online) {
  GeTensorDesc tensor_desc(GeShape(), FORMAT_NCHW, DT_FLOAT);
  TensorUtils::SetSize(tensor_desc, 512);

  shared_ptr<OpDesc> op_desc = make_shared<OpDesc>("Add", "add");
  EXPECT_EQ(op_desc->AddInputDesc(tensor_desc), GRAPH_SUCCESS);
  EXPECT_EQ(op_desc->AddInputDesc(tensor_desc), GRAPH_SUCCESS);
  EXPECT_EQ(op_desc->AddOutputDesc(tensor_desc), GRAPH_SUCCESS);

  GeTensor tensor(tensor_desc);
  const vector<GeTensor> inputs = { tensor, tensor };
  const vector<GeTensor> outputs = { tensor };

  // not Initialize, impl is null.
  GeGenerator generator;
  generator.Initialize({});
  ModelBufferData model_buffer;
  EXPECT_EQ(generator.BuildSingleOpModel(op_desc, inputs, outputs, ENGINE_SYS, model_buffer), GE_GENERATOR_GRAPH_MANAGER_BUILD_GRAPH_FAILED);
 }
 */

 }  // namespace ge
--- a/tests/ut/ge/graph/build/mem_assigner_unittest.cc
+++ b/tests/ut/ge/graph/build/mem_assigner_unittest.cc
@@ -25,10 +25,12 @@
 #include "graph/utils/op_desc_utils.h"
 #include "graph/utils/tensor_utils.h"
 #include "omg/omg_inner_types.h"
 #include "../passes/graph_builder_utils.h"

 #define protected public
 #define private public
 #include "graph/build/memory/binary_block_mem_assigner.h"
 #include "graph/build/memory/graph_mem_assigner.h"
 #include "graph/build/memory/hybrid_mem_assigner.h"
 #include "graph/build/memory/max_block_mem_assigner.h"
 #undef protected
@@ -41,7 +43,7 @@ using domi::GetContext;

 class UtestMemoryAssignerTest : public testing::Test {
 public:
  ge::OpDescPtr createOpWithWsSize(const string &name, int64_t wsByte, const string &type = "some") {
  ge::OpDescPtr CreateOpWithWsSize(const string &name, int64_t wsByte, const string &type = "some") {
    ge::OpDescPtr op_def = make_shared<ge::OpDesc>(name, type);
    auto desc_temp_ptr = make_shared<ge::GeTensorDesc>();
    auto desc_temp = *desc_temp_ptr;
@@ -55,26 +57,46 @@ class UtestMemoryAssignerTest : public testing::Test {
    op_def->SetWorkspaceBytes(workspace_bytes);
    return op_def;
  }
  void make_graph(ge::ComputeGraphPtr graph) {
    ge::OpDescPtr op_def_a = createOpWithWsSize("A", 6000);
  ge::OpDescPtr CreateRefOpWithWsSize(const string &name, int64_t wsByte, const string &type = "some") {
    ge::OpDescPtr op_def = make_shared<ge::OpDesc>(name, type);
    auto desc_temp_ptr = make_shared<ge::GeTensorDesc>();
    auto desc_temp = *desc_temp_ptr;

    TensorUtils::SetSize(desc_temp, 1024);
    op_def->AddInputDesc(desc_temp);

    auto desc_output_ptr = make_shared<ge::GeTensorDesc>();
    auto desc_output = *desc_output_ptr;
    TensorUtils::SetSize(desc_output, 6500);
    ge::TensorUtils::SetReuseInput(desc_output, true);
    ge::TensorUtils::SetReuseInputIndex(desc_output, 0);
    op_def->AddOutputDesc(desc_output);

    std::vector<int64_t> workspace_bytes;
    workspace_bytes.push_back(wsByte);
    op_def->SetWorkspaceBytes(workspace_bytes);
    return op_def;
  }
  void MakeGraph(ge::ComputeGraphPtr &graph) {
    ge::OpDescPtr op_def_a = CreateOpWithWsSize("A", 6000);
    op_def_a->SetStreamId(0);
    ge::OpDescPtr op_def_b = createOpWithWsSize("B", 120000);
    ge::OpDescPtr op_def_b = CreateOpWithWsSize("B", 120000);
    op_def_b->SetStreamId(0);
    ge::OpDescPtr op_def_c = createOpWithWsSize("C", 16000);
    ge::OpDescPtr op_def_c = CreateOpWithWsSize("C", 16000);
    op_def_c->SetStreamId(1);
    ge::OpDescPtr op_def_d = createOpWithWsSize("D", 24000);
    ge::OpDescPtr op_def_d = CreateOpWithWsSize("D", 24000);
    op_def_d->SetStreamId(2);
    ge::OpDescPtr op_def_e = createOpWithWsSize("E", 24000);
    ge::OpDescPtr op_def_e = CreateOpWithWsSize("E", 24000);
    op_def_e->SetStreamId(3);
    ge::OpDescPtr op_def_f = createOpWithWsSize("F", 30000);
    ge::OpDescPtr op_def_f = CreateOpWithWsSize("F", 30000);
    op_def_f->SetStreamId(2);
    ge::OpDescPtr op_def_g = createOpWithWsSize("G", 32000);
    ge::OpDescPtr op_def_g = CreateOpWithWsSize("G", 32000);
    op_def_g->SetStreamId(3);
    ge::OpDescPtr op_def_h = createOpWithWsSize("H", 48000);
    ge::OpDescPtr op_def_h = CreateOpWithWsSize("H", 48000);
    op_def_h->SetStreamId(2);
    ge::OpDescPtr op_def_i = createOpWithWsSize("I", 60000);
    ge::OpDescPtr op_def_i = CreateOpWithWsSize("I", 60000);
    op_def_i->SetStreamId(2);
    ge::OpDescPtr op_def_j = createOpWithWsSize("J", 256000, NETOUTPUT);
    ge::OpDescPtr op_def_j = CreateOpWithWsSize("J", 256000, NETOUTPUT);
    op_def_j->SetStreamId(3);

    // add node
@@ -108,24 +130,10 @@ class UtestMemoryAssignerTest : public testing::Test {
    graph->TopologicalSorting();
  }

  void make_reuse_graph(ge::ComputeGraphPtr graph) {
    ge::OpDescPtr op_def_a = createOpWithWsSize("A", 6000);
    ge::OpDescPtr op_def_b = createOpWithWsSize("B", 120000);

    ge::OpDescPtr op_def_c = make_shared<ge::OpDesc>("C", "Some");
    auto desc_input_ptr = make_shared<ge::GeTensorDesc>();
    auto desc_input = *desc_input_ptr;

    TensorUtils::SetSize(desc_input, 1024);
    op_def_c->AddInputDesc(desc_input);

    auto desc_output_ptr = make_shared<ge::GeTensorDesc>();
    auto desc_output = *desc_output_ptr;
    TensorUtils::SetSize(desc_output, 6500);
    ge::TensorUtils::SetReuseInput(desc_output, true);
    ge::TensorUtils::SetReuseInputIndex(desc_output, 0);
    op_def_c->AddOutputDesc(desc_output);

  void MakeReuseGraph(ge::ComputeGraphPtr graph) {
    ge::OpDescPtr op_def_a = CreateOpWithWsSize("A", 6000);
    ge::OpDescPtr op_def_b = CreateOpWithWsSize("B", 120000);
    ge::OpDescPtr op_def_c = CreateRefOpWithWsSize("C", 120000);
    ge::OpDescPtr op_def_d = make_shared<ge::OpDesc>("D", "CONSTANT");

    ge::NodePtr node_a = graph->AddNode(op_def_a);
@@ -141,6 +149,47 @@ class UtestMemoryAssignerTest : public testing::Test {
    graph->TopologicalSorting();
  }

  ComputeGraphPtr MakeCascadeContinuousMemoryGraph() {
    ge::ut::GraphBuilder builder("graph");
    auto data = builder.AddNode("data", "Data", 1, 1);
    auto addn1 = builder.AddNode("addn1", "AddN", 1, 1);
    auto addn2 = builder.AddNode("addn2", "AddN", 1, 1);
    auto addn3 = builder.AddNode("addn3", "AddN", 1, 1);
    auto concat1 = builder.AddNode("concat1", "Concat", 2, 1);
    auto concat2 = builder.AddNode("concat2", "Concat", 2, 1);
    auto netoutput = builder.AddNode("netoutput", "NetOutput", 2, 0);

    ge::AttrUtils::SetBool(concat1->GetOpDesc(), ATTR_NAME_NOPADDING_CONTINUOUS_INPUT, true);
    ge::AttrUtils::SetBool(concat1->GetOpDesc(), ATTR_NAME_CONTINUOUS_INPUT_ALLOC, true);
    ge::AttrUtils::SetBool(concat1->GetOpDesc(), ATTR_NAME_OUTPUT_REUSE_INPUT, true);

    ge::AttrUtils::SetBool(concat2->GetOpDesc(), ATTR_NAME_NOPADDING_CONTINUOUS_INPUT, true);
    ge::AttrUtils::SetBool(concat2->GetOpDesc(), ATTR_NAME_CONTINUOUS_INPUT_ALLOC, true);
    ge::AttrUtils::SetBool(concat2->GetOpDesc(), ATTR_NAME_OUTPUT_REUSE_INPUT, true);

    addn1->GetOpDesc()->SetOutputOffset({100});
    addn2->GetOpDesc()->SetOutputOffset({200});
    concat1->GetOpDesc()->SetOutputOffset({100});
    addn3->GetOpDesc()->SetOutputOffset({700});
    concat2->GetOpDesc()->SetOutputOffset({500});

    ge::AttrUtils::SetListInt(addn1->GetOpDesc(), ATTR_NAME_OUTPUT_OFFSET_FOR_BUFFER_FUSION, {100});
    ge::AttrUtils::SetListInt(addn2->GetOpDesc(), ATTR_NAME_OUTPUT_OFFSET_FOR_BUFFER_FUSION, {100});
    ge::AttrUtils::SetListInt(addn3->GetOpDesc(), ATTR_NAME_OUTPUT_OFFSET_FOR_BUFFER_FUSION, {100});
    ge::AttrUtils::SetListInt(concat1->GetOpDesc(), ATTR_NAME_OUTPUT_OFFSET_FOR_BUFFER_FUSION, {200});
    ge::AttrUtils::SetListInt(concat2->GetOpDesc(), ATTR_NAME_OUTPUT_OFFSET_FOR_BUFFER_FUSION, {300});


    builder.AddDataEdge(data, 0, addn1, 0);
    builder.AddDataEdge(data, 0, addn2, 0);
    builder.AddDataEdge(addn1, 0, concat1, 0);
    builder.AddDataEdge(addn2, 0, concat1, 1);
    builder.AddDataEdge(concat1, 0, concat2, 0);
    builder.AddDataEdge(addn3, 0, concat2, 1);

    return builder.GetGraph();
  }

 protected:
  void SetUp() {}

@@ -150,7 +199,7 @@ class UtestMemoryAssignerTest : public testing::Test {
 /*
 TEST_F(UtestMemoryAssignerTest, MemoryBlock_Resize_RealSizeList_is_empty) {
  ge::ComputeGraphPtr graph = make_shared<ge::ComputeGraph>("");
  ge::OpDescPtr op_def_a = createOpWithWsSize("A", 6000);
  ge::OpDescPtr op_def_a = CreateOpWithWsSize("A", 6000);
  ge::NodePtr node_a = graph->AddNode(op_def_a);
  MemoryBlock* memory_block = new MemoryBlock(0);
  memory_block->Init(1, kOutput, node_a, 0, 1);
@@ -178,7 +227,7 @@ class MockBlockMemAssigner : public BlockMemAssigner {
 // when check GetMemoryRanges return fail, Assign return fail
 TEST_F(UtestMemoryAssignerTest, Mock_block_mem_assigner_failed) {
  ge::ComputeGraphPtr graph = make_shared<ge::ComputeGraph>("");
  make_graph(graph);
  MakeGraph(graph);
  std::map<std::string, std::string> anchor_to_symbol;
  std::map<std::string, std::list<NodeIndexIO>> symbol_to_anchors;
  EXPECT_EQ(GraphUtils::GetRefMapping(graph, symbol_to_anchors, anchor_to_symbol), GRAPH_SUCCESS);
@@ -186,3 +235,17 @@ TEST_F(UtestMemoryAssignerTest, Mock_block_mem_assigner_failed) {
  MockBlockMemAssigner mock_assigner(graph, anchor_to_symbol, symbol_to_anchors);
  EXPECT_EQ(mock_assigner.Assign(), FAILED);
 }

 TEST_F(UtestMemoryAssignerTest, graph_memory_assign_continuous_input) {
  ge::ComputeGraphPtr graph = MakeCascadeContinuousMemoryGraph();
  auto addn1 = graph->FindNode("addn1");
  auto addn2 = graph->FindNode("addn2");
  EXPECT_EQ(addn1->GetOpDesc()->GetOutputOffset()[0], 100);
  EXPECT_EQ(addn2->GetOpDesc()->GetOutputOffset()[0], 200);
  GraphMemoryAssigner memoryAssigner(graph);
  MemoryOffset memory_offset(RT_MEMORY_HBM, 0);
  memoryAssigner.memory_offset_.emplace(RT_MEMORY_HBM, memory_offset);
  EXPECT_EQ(memoryAssigner.ReAssignContinuousMemory(false), GRAPH_SUCCESS);
  EXPECT_EQ(addn1->GetOpDesc()->GetOutputOffset()[0], 500);
  EXPECT_EQ(addn2->GetOpDesc()->GetOutputOffset()[0], 600);
 }
--- a/tests/ut/ge/graph/ge_executor_unittest.cc
+++ b/tests/ut/ge/graph/ge_executor_unittest.cc
@@ -34,7 +34,6 @@
 #include "common/types.h"
 #include "graph/load/graph_loader.h"
 #include "graph/load/model_manager/davinci_model.h"
 #include "graph/load/model_manager/davinci_model_parser.h"
 #include "graph/load/model_manager/model_manager.h"
 #include "graph/load/model_manager/task_info/kernel_task_info.h"
 #include "graph/load/model_manager/task_info/kernel_ex_task_info.h"
@@ -109,6 +108,26 @@ static ge::OpDescPtr CreateOpDesc(string name = "", string type = "") {
  ge::AttrUtils::SetInt(op_desc, ge::ATTR_NAME_STREAM_SWITCH_COND, 0);
  return op_desc;
 }

 TEST_F(UtestGeExecutor, load_data_from_file) {
  GeExecutor ge_executor;
  ge_executor.isInit_ = true;

  string test_smap = "/tmp/" + std::to_string(getpid()) + "_maps";
  string self_smap = "/proc/" + std::to_string(getpid()) + "/maps";
  string copy_smap = "cp " + self_smap + " " + test_smap;
  EXPECT_EQ(system(copy_smap.c_str()), 0);

  ModelData model_data;
  EXPECT_EQ(ge_executor.LoadDataFromFile(test_smap, model_data), SUCCESS);

  EXPECT_NE(model_data.model_data, nullptr);
  delete[] static_cast<char *>(model_data.model_data);
  model_data.model_data = nullptr;

  ge_executor.isInit_ = false;
 }

 /*
 TEST_F(UtestGeExecutor, fail_UnloadModel_model_manager_stop_unload_error) {
  uint32_t model_id = 1;
--- a/tests/ut/ge/graph/graph_load_unittest.cc
+++ b/tests/ut/ge/graph/graph_load_unittest.cc
@@ -24,7 +24,6 @@
 #include "common/helper/model_helper.h"
 #include "common/op/ge_op_utils.h"
 #include "common/types.h"
 #include "graph/load/model_manager/davinci_model_parser.h"
 #include "graph/op_desc.h"
 #include "graph/types.h"
 #include "graph/utils/attr_utils.h"
--- a/tests/ut/ge/graph/load/davinci_model_unittest.cc
+++ b/tests/ut/ge/graph/load/davinci_model_unittest.cc
@@ -890,4 +890,11 @@ TEST_F(UtestDavinciModel, Sink_model_profile) {
  model.SinkModelProfile();
 }

 TEST_F(UtestDavinciModel, Sink_time_profile) {
  ProfilingManager::Instance().prof_cb_.msprofReporterCallback = MsprofReport;
  DavinciModel model(0, nullptr);
  InputData current_data;
  model.SinkTimeProfile(current_data);
 }

 }  // namespace ge
--- a/tests/ut/ge/graph/load/model_manager_unittest.cc
+++ b/tests/ut/ge/graph/load/model_manager_unittest.cc
@@ -25,7 +25,6 @@
 #include "common/op/ge_op_utils.h"
 #include "graph/load/graph_loader.h"
 #include "graph/load/model_manager/davinci_model.h"
 #include "graph/load/model_manager/davinci_model_parser.h"

 using namespace std;
 using namespace testing;
--- a/tests/ut/ge/graph/load/new_model_manager_model_manager_aicpu_unittest.cc
+++ b/tests/ut/ge/graph/load/new_model_manager_model_manager_aicpu_unittest.cc
@@ -21,7 +21,7 @@

 #include "common/debug/log.h"
 #include "common/l2_cache_optimize.h"
 #include "common/model_parser/base.h"
 #include "common/model_parser/model_parser.h"
 #include "common/properties_manager.h"
 #include "common/types.h"

@@ -31,7 +31,6 @@
 #include "common/op/ge_op_utils.h"
 #include "graph/load/graph_loader.h"
 #include "graph/load/model_manager/davinci_model.h"
 #include "graph/load/model_manager/davinci_model_parser.h"
 #include "graph/load/model_manager/model_manager.h"
 //#include "new_op_test_utils.h"
 #undef private
--- a/tests/ut/ge/graph/manager/graph_caching_allocator_unittest.cc
+++ b/tests/ut/ge/graph/manager/graph_caching_allocator_unittest.cc
@@ -0,0 +1,87 @@
 /**
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 #include <gtest/gtest.h>
 #include <memory>

 #include "graph/anchor.h"
 #include "graph/attr_value.h"
 #include "graph/debug/ge_attr_define.h"
 #include "graph/utils/graph_utils.h"
 #include "graph/utils/node_utils.h"
 #include "graph/utils/op_desc_utils.h"
 #include "graph/utils/tensor_utils.h"
 #include "omg/omg_inner_types.h"

 #define protected public
 #define private public
 #include "graph/manager/graph_caching_allocator.h"
 #include "graph/manager/graph_mem_allocator.h"
 #undef protected
 #undef private

 using namespace std;
 using namespace testing;
 using namespace ge;
 using domi::GetContext;

 class UtestGraphCachingAllocatorTest : public testing::Test {
 protected:
  void SetUp() {}

  void TearDown() { GetContext().out_nodes_map.clear(); }
 };

 TEST_F(UtestGraphCachingAllocatorTest, initialize_success) {
  std::vector<rtMemType_t> mem_type;
  mem_type.push_back(RT_MEMORY_HBM);
  EXPECT_EQ(MemManager::Instance().Initialize(mem_type), SUCCESS);
  MemManager::Instance().Finalize();
 }

 TEST_F(UtestGraphCachingAllocatorTest, malloc_success) {
  std::vector<rtMemType_t> mem_type;
  mem_type.push_back(RT_MEMORY_HBM);
  EXPECT_EQ(MemManager::Instance().Initialize(mem_type), SUCCESS);
  uint8_t *ptr = MemManager::Instance().CachingInstance(RT_MEMORY_HBM).Malloc(kMByteSize);
  EXPECT_NE(nullptr, ptr);
  MemManager::Instance().Finalize();
 }

 TEST_F(UtestGraphCachingAllocatorTest, extend_malloc_success) {
  std::vector<rtMemType_t> mem_type;
  mem_type.push_back(RT_MEMORY_HBM);
  EXPECT_EQ(MemManager::Instance().Initialize(mem_type), SUCCESS);
  uint8_t *ptr = MemManager::Instance().CachingInstance(RT_MEMORY_HBM).Malloc(kMByteSize);
  EXPECT_NE(nullptr, ptr);
  ptr = MemManager::Instance().CachingInstance(RT_MEMORY_HBM).Malloc(kBinSizeUnit32*kMByteSize);
  EXPECT_NE(nullptr, ptr);
  MemManager::Instance().Finalize();
 }

 TEST_F(UtestGraphCachingAllocatorTest, malloc_statics) {
  std::vector<rtMemType_t> mem_type;
  mem_type.push_back(RT_MEMORY_HBM);
  EXPECT_EQ(MemManager::Instance().Initialize(mem_type), SUCCESS);
  uint8_t *ptr = MemManager::Instance().CachingInstance(RT_MEMORY_HBM).Malloc(kMByteSize);
  EXPECT_NE(nullptr, ptr);
  uint8_t *ptr1 = MemManager::Instance().CachingInstance(RT_MEMORY_HBM).Malloc(kKByteSize);
  EXPECT_NE(nullptr, ptr);
  EXPECT_EQ(MemManager::Instance().CachingInstance(RT_MEMORY_HBM).Free(ptr), SUCCESS);
  EXPECT_EQ(MemManager::Instance().CachingInstance(RT_MEMORY_HBM).Free(ptr1), SUCCESS);
  MemManager::Instance().CachingInstance(RT_MEMORY_HBM).FreeCachedBlocks();
  MemManager::Instance().Finalize();
 }
--- a/tests/ut/ge/hybrid/ge_hybrid_unittest.cc
+++ b/tests/ut/ge/hybrid/ge_hybrid_unittest.cc
@@ -0,0 +1,113 @@
 /**
 * Copyright 2019-2021 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 #include <gtest/gtest.h>
 #include <vector>

 #include "runtime/rt.h"

 #define protected public
 #define private public
 #include "hybrid/model/hybrid_model_builder.h"
 #include "hybrid/model/hybrid_model.h"
 #include "model/ge_model.h"
 #include "model/ge_root_model.h"

 #include "hybrid/node_executor/aicore/aicore_op_task.h"
 #include "framework/common/taskdown_common.h"
 #include "framework/common/debug/log.h"
 #include "graph/ge_context.h"
 #include "hybrid/executor/hybrid_execution_context.h"
 #include "hybrid/node_executor/aicore/aicore_task_builder.h"
 #include "graph/load/model_manager/tbe_handle_store.h"
 #include "graph/types.h"

 #undef private
 #undef protected

 using namespace std;
 using namespace testing;
 using namespace ge;

 class UtestGeHybrid : public testing::Test {
 protected:
  void SetUp() {}

  void TearDown() {}
 };

 static ge::OpDescPtr CreateOpDesc(string name = "", string type = "") {
  auto op_desc = std::make_shared<ge::OpDesc>(name, type);
  op_desc->SetStreamId(0);
  op_desc->SetId(0);

  op_desc->SetWorkspace({});
  ;
  op_desc->SetWorkspaceBytes({});
  op_desc->SetInputOffset({});
  op_desc->SetOutputOffset({});

  ge::AttrUtils::SetStr(op_desc, ge::TVM_ATTR_NAME_MAGIC, "RT_DEV_BINARY_MAGIC_ELF_AIVEC");
  bool support_dynamic = true;
  ge::AttrUtils::GetBool(op_desc, "support_dynamicshape", support_dynamic);
  return op_desc;
 }

 TEST_F(UtestGeHybrid, aicore_op_task_init_success) {
  // build aicore task
  auto aicore_task = std::unique_ptr<hybrid::AiCoreOpTask>(new(std::nothrow)hybrid::AiCoreOpTask());
  domi::TaskDef task_def;
  task_def.set_type(RT_MODEL_TASK_ALL_KERNEL);
  domi::KernelDefWithHandle *kernel_with_handle = task_def.mutable_kernel_with_handle();
  kernel_with_handle->set_original_kernel_key("");
  kernel_with_handle->set_node_info("");
  kernel_with_handle->set_block_dim(32);
  kernel_with_handle->set_args_size(64);
  string args(64, '1');
  kernel_with_handle->set_args(args.data(), 64);
  domi::KernelContext *context = kernel_with_handle->mutable_context();
  context->set_op_index(1);
  context->set_kernel_type(2);    // ccKernelType::TE
  uint16_t args_offset[9] = {0};
  context->set_args_offset(args_offset, 9 * sizeof(uint16_t));

  OpDescPtr op_desc = CreateOpDesc("Add", "Add");
  std::vector<char> kernelBin;
  TBEKernelPtr tbe_kernel = std::make_shared<ge::OpKernelBin>("name/Add", std::move(kernelBin));
  op_desc->SetExtAttr(ge::OP_EXTATTR_NAME_TBE_KERNEL, tbe_kernel);
  std::string kernel_name("kernel/Add");
  AttrUtils::SetStr(op_desc, op_desc->GetName() + "_kernelname", kernel_name);
  ASSERT_EQ(aicore_task->InitWithTaskDef(*op_desc.get(), task_def), SUCCESS);
  rtStream_t stream = nullptr;
  rtStreamCreate(&stream, 0);
  ASSERT_EQ(aicore_task->LaunchKernel(stream), SUCCESS);
  char *handle = "";
  aicore_task->handle_ = handle;
  aicore_task->tiling_key_ = 1;
  ASSERT_EQ(aicore_task->LaunchKernel(stream), SUCCESS);
 }

 TEST_F(UtestGeHybrid, task_update_tiling_info) {
  auto aicore_task = std::unique_ptr<hybrid::AiCoreOpTask>(new(std::nothrow)hybrid::AiCoreOpTask());
  aicore_task->is_single_op_ = true;
  auto graph = make_shared<ComputeGraph>("graph");
  OpDescPtr op_desc = CreateOpDesc("Add", "Add");
  ge::AttrUtils::SetStr(op_desc, "compile_info_key", "key");
  ge::AttrUtils::SetStr(op_desc, "compile_info_json", "json");
  auto node = graph->AddNode(op_desc);
  optiling::OpRunInfo tiling_info;
  ASSERT_EQ(aicore_task->CalcTilingInfo(node, tiling_info), SUCCESS);
 }
--- a/tests/ut/ge/single_op/single_op_model_unittest.cc
+++ b/tests/ut/ge/single_op/single_op_model_unittest.cc
@@ -40,6 +40,10 @@ class UtestSingleOpModel : public testing::Test {
  void TearDown() {}
 };

 //rt api stub
 rtError_t rtGetTaskIdAndStreamID(uint32_t *taskId, uint32_t *streamId) {
  return RT_ERROR_NONE;
 }
 /*
 TEST_F(UtestSingleOpModel, test_init_model) {
  string model_data_str = "123456789";
@@ -101,9 +105,9 @@ TEST_F(UtestSingleOpModel, test_set_inputs_and_outputs) {

  std::mutex stream_mu_;
  rtStream_t stream_ = nullptr;
  SingleOp single_op(&stream_mu_, stream_);

  ASSERT_EQ(model.SetInputsAndOutputs(single_op), SUCCESS);
 //  SingleOp single_op(&stream_mu_, stream_);
 //
 //  ASSERT_EQ(model.SetInputsAndOutputs(single_op), SUCCESS);
 }
 /*
 TEST_F(UtestSingleOpModel, test_build_kernel_task) {
@@ -148,7 +152,7 @@ TEST_F(UtestSingleOpModel, test_init) {
  ASSERT_EQ(op_model.Init(), FAILED);
 }
 */

 /*
 TEST_F(UtestSingleOpModel, test_parse_arg_table) {
  string model_data_str = "123456789";
  SingleOpModel op_model("model", model_data_str.c_str(), model_data_str.size());
@@ -173,3 +177,23 @@ TEST_F(UtestSingleOpModel, test_parse_arg_table) {
  ASSERT_EQ(op.arg_table_[1].size(), 1);
  ASSERT_EQ(op.arg_table_[1].front(), &arg_base[0]);
 }
 */
 TEST_F(UtestSingleOpModel, test_op_task_get_profiler_args) {
  string name = "relu";
  string type = "relu";
  auto op_desc = std::make_shared<ge::OpDesc>(name, type);
  op_desc->SetStreamId(0);
  op_desc->SetId(0);
  TbeOpTask task;
  task.op_desc_ = op_desc;
  task.model_name_ = "resnet_50";
  task.model_id_ = 1;
  TaskDescInfo task_desc_info;
  uint32_t model_id;
  task.GetProfilingArgs(task_desc_info, model_id);

  ASSERT_EQ(task_desc_info.model_name, "resnet_50");
  ASSERT_EQ(model_id, 1);
 }


--- a/tests/ut/ge/single_op/single_op_task_unittest.cc
+++ b/tests/ut/ge/single_op/single_op_task_unittest.cc
@@ -0,0 +1,117 @@
 /**
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 #include <gtest/gtest.h>
 #include <vector>

 #include "graph/load/model_manager/model_utils.h"
 #include "graph/utils/graph_utils.h"
 #include "runtime/rt.h"

 #define protected public
 #define private public
 #include "single_op/single_op_model.h"
 #include "single_op/task/tbe_task_builder.h"
 #include "single_op/task/op_task.h"
 #include "single_op/task/tbe_task_builder.h"
 #include "external/register/op_tiling_registry.h"
 #undef private
 #undef protected

 using namespace std;
 using namespace testing;
 using namespace ge;
 using namespace optiling;

 class UtestSingleOpTask : public testing::Test {
 protected:
  void SetUp() {}

  void TearDown() {}
 };

 TEST_F(UtestSingleOpTask, test_build_kernel_task) {
  string model_data_str = "123456789";
  SingleOpModel model("model", model_data_str.c_str(), model_data_str.size());
  model.input_offset_list_.push_back(0);
  model.input_sizes_.push_back(16);

  model.output_offset_list_.push_back(0);
  model.output_sizes_.push_back(16);

  auto graph = make_shared<ComputeGraph>("graph");
  auto op_desc = make_shared<OpDesc>("Add", "Add");
  std::vector<char> kernelBin;
  TBEKernelPtr tbe_kernel = std::make_shared<ge::OpKernelBin>("name/Add", std::move(kernelBin));
  op_desc->SetExtAttr(ge::OP_EXTATTR_NAME_TBE_KERNEL, tbe_kernel);
  std::string kernel_name("kernel/Add");
  AttrUtils::SetStr(op_desc, op_desc->GetName() + "_kernelname", kernel_name);

  vector<int64_t> shape{16, 16};
  GeShape ge_shape(shape);
  GeTensorDesc desc(ge_shape);
  op_desc->AddInputDesc(desc);
  op_desc->AddOutputDesc(desc);
  auto node = graph->AddNode(op_desc);

  std::mutex stream_mu_;
  rtStream_t stream_ = nullptr;
  StreamResource stream_resource(0);
  SingleOp single_op(&stream_resource, &stream_mu_, stream_);

  domi::TaskDef task_def;
  task_def.set_type(RT_MODEL_TASK_ALL_KERNEL);
  domi::KernelDefWithHandle *kernel_with_handle = task_def.mutable_kernel_with_handle();
  kernel_with_handle->set_original_kernel_key("");
  kernel_with_handle->set_node_info("");
  kernel_with_handle->set_block_dim(32);
  kernel_with_handle->set_args_size(64);
  string args(64, '1');
  kernel_with_handle->set_args(args.data(), 64);
  domi::KernelContext *context = kernel_with_handle->mutable_context();
  context->set_op_index(1);
  context->set_kernel_type(2);    // ccKernelType::TE
  uint16_t args_offset[9] = {0};
  context->set_args_offset(args_offset, 9 * sizeof(uint16_t));
  model.op_list_[1] = node;

  TbeOpTask task_tmp;
  TbeOpTask *task = &task_tmp;
  ASSERT_EQ(model.BuildKernelTask(task_def, &task), SUCCESS);
  vector<GeTensorDesc> input_desc;
  vector<DataBuffer> input_buffers;
  vector<GeTensorDesc> output_desc;
  vector<DataBuffer> output_buffers;
  task->node_ = node;
  OpTilingFunc op_tiling_func = [](const TeOpParas &, const OpCompileInfo &, OpRunInfo &) -> bool {return true;};
  OpTilingRegistryInterf("Add", op_tiling_func);
  ge::AttrUtils::SetStr(op_desc, "compile_info_key", "op_compile_info_key");
  ge::AttrUtils::SetStr(op_desc, "compile_info_json", "op_compile_info_json");
  char c = '0';
  char* buffer = &c;
  task->tiling_buffer_ = buffer;
  task->max_tiling_size_ = 64;
  task->tiling_data_ = "tiling_data";
  task->arg_size_ = 64;
  uint8_t task_args{0};
  task->args_.reset(&task_args);

  ASSERT_EQ(task->LaunchKernel(input_desc, input_buffers, output_desc, output_buffers, stream_), SUCCESS);
  char handle_tmp = '0';
  char *handle = &handle_tmp;
  task->SetHandle(handle);
  ASSERT_EQ(task->LaunchKernel(input_desc, input_buffers, output_desc, output_buffers, stream_), SUCCESS);
 }
--- a/third_party/fwkacllib/inc/runtime/kernel.h
+++ b/third_party/fwkacllib/inc/runtime/kernel.h
@@ -191,6 +191,14 @@ typedef void (*rtCallback_t)(void *fnData);
 #define RT_FUSION_KERNEL_DUMPFLAG (0x04)
 #define RT_KERNEL_CUSTOM_AICPU (0x08)

 /**
 * @ingroup rt_kernel
 * @brief kernel mode
 */
 #define RT_DEFAULT_KERNEL_MODE (0x00)
 #define RT_NORMAL_KERNEL_MODE (0x01)
 #define RT_ALL_KERNEL_MODE (0x02)

 /**
 * @ingroup rt_kernel
 * @brief kernel L1 Fusion Dump bit flags
@@ -207,6 +215,16 @@ typedef void (*rtCallback_t)(void *fnData);
 */
 RTS_API rtError_t rtDevBinaryRegister(const rtDevBinary_t *bin, void **handle);

 /**
 * @ingroup rt_kernel
 * @brief register device binary
 * @param [in] bin   device binary description
 * @param [out] handle   device binary handle
 * @return RT_ERROR_NONE for ok
 * @return RT_ERROR_INVALID_VALUE for error input
 */
 RTS_API rtError_t rtRegisterAllKernel(const rtDevBinary_t *bin, void **handle);

 /**
 * @ingroup rt_kernel
 * @brief register fast memeory device binary
@@ -314,6 +332,23 @@ RTS_API rtError_t rtKernelConfigDump(uint32_t kind, uint32_t dumpSizePerBlock, u
 RTS_API rtError_t rtKernelLaunch(const void *stubFunc, uint32_t blockDim, void *args, uint32_t argsSize,
                                 rtSmDesc_t *smDesc, rtStream_t stream);

 /**
 * @ingroup rt_kernel
 * @brief launch kernel with handle to device
 * @param [in] handle   program
 * @param [in] devFunc    device function description
 * @param [in] blockDim   block dimentions
 * @param [in] args   argments address for kernel function
 * @param [in] argsSize   argements size
 * @param [in] smDesc   shared memory description
 * @param [in] stream   associated stream
 * @param [in] kernelInfo   kernel info
 * @return RT_ERROR_NONE for ok
 * @return RT_ERROR_INVALID_VALUE for error input
 */
 RTS_API rtError_t rtKernelLaunchWithHandle(void *handle, const void *devFunc, uint32_t blockDim, void *args, uint32_t argsSize,
                                           rtSmDesc_t *smDesc, rtStream_t stream, const void *kernelInfo);

 /**
 * @ingroup rt_kernel
 * @brief launch kernel to device
--- a/third_party/fwkacllib/inc/runtime/rt_model.h
+++ b/third_party/fwkacllib/inc/runtime/rt_model.h
@@ -50,6 +50,7 @@ typedef enum tagModelTaskType {
    RT_MODEL_TASK_STREAM_LABEL_SWITCH_BY_INDEX,
    RT_MODEL_TASK_STREAM_LABEL_GOTO,
    RT_MODEL_TASK_MODEL_EXIT,
    RT_MODEL_TASK_ALL_KERNEL,
 } rtModelTaskType_t;

 typedef enum tagModelStreamType {
@@ -127,6 +128,17 @@ typedef struct tagKernelTaskInfo {
    uint16_t *argsOffset;
 } rtKernelTaskInfo_t;

 typedef struct tagAllKernelTaskInfo {
    uint16_t blockDim;
    uint16_t argsCount;
    uint16_t argsSize;
    uint16_t reserved;
    const void *dev_func;
    void *handle;
    uint8_t *smDesc;
    uint8_t *args;
    uint16_t *argsOffset;
 } rtAllKernelTaskInfo_t;
 typedef struct tagKernelTaskInfoEx {
    uint32_t flags;
    uint32_t argsSize;
@@ -251,6 +263,7 @@ typedef struct tagTaskInfo {
    union {
        rtKernelTaskInfoEx_t kernelTaskEx;
        rtKernelTaskInfo_t kernelTask;
        rtAllKernelTaskInfo_t allkernelTask;
        rtEventTaskInfo_t eventTask;
        rtStreamSwitchTaskInfo_t streamSwitchTask;
        rtStreamActiveTaskInfo_t streamActiveTask;