Merge branch 'development' of gitee.com:dajunli/graphengine into development

5 years ago · 380beb909e
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -16,8 +16,11 @@ endif()

 if(DEFINED ENV{D_PKG_SERVER})
    set(GE_PB_PKG $ENV{D_PKG_SERVER})
    message("Download packages from PKG server")
 endif()
    message("Download packages from DPKG server")
 elseif(DEFINED ENV{MSLIBS_SERVER})
    set(GE_PB_PKG "http://$ENV{MSLIBS_SERVER}:8081")
    message("Download packages from MSPKG server")
 endif ()

 set(ASCEND_DRIVER_DIR ${ASCEND_DIR}/driver/lib64)
 set(ASCEND_DRIVER_COMMON_DIR ${ASCEND_DIR}/driver/lib64/common)
@@ -105,7 +108,7 @@ if (ENABLE_OPEN_SRC)
            find_module(error_manager liberror_manager.so ${ASCEND_ATC_DIR})
            find_module(error_manager_static liberror_manager.a ${ASCEND_ACL_DIR})
            find_module(msprofiler libmsprofiler.a ${ASCEND_ACL_DIR})
 	        #find_module(ascendcl_static libascendcl.a ${ASCEND_ACL_DIR})
            #find_module(ascendcl_static libascendcl.a ${ASCEND_ACL_DIR})
            if(PRODUCT STREQUAL "flr3")
            elseif(PRODUCT STREQUAL "flr1")
                find_module(ascend_hal_stub libascend_hal.so ${ASCEND_DRIVER_DIR}/driver)
@@ -115,7 +118,7 @@ if (ENABLE_OPEN_SRC)
                find_module(ascend_hal_stub libascend_hal.so ${ASCEND_DRIVER_DIR})
            endif()
        elseif(PLATFORM STREQUAL "all")
            find_module(msprofiler libmsprofiler.a ${ASCEND_DRIVER_COMMON_DIR})
            find_module(msprofiler libmsprofiler.a ${ASCEND_ACL_DIR})
            find_module(hccl libhccl.so ${ASCEND_RUNTIME_DIR})
            find_module(adump_server libadump_server.a ${ASCEND_ACL_DIR})
            find_module(runtime libruntime.so ${ASCEND_ACL_DIR})
@@ -123,14 +126,14 @@ if (ENABLE_OPEN_SRC)
            find_module(resource libresource.so ${ASCEND_ATC_DIR})
            find_module(error_manager liberror_manager.so ${ASCEND_ATC_DIR})
            find_module(error_manager_static liberror_manager.a ${ASCEND_ACL_DIR})
            find_module(msprofiler_fwk libmsprofiler_fwk.a ${ASCEND_ACL_DIR})
            find_module(msprofiler_fwk libmsprofiler_fwk.a ${ASCEND_RUNTIME_DIR})
            find_module(ascend_hal_stub libascend_hal.so ${ASCEND_DRIVER_DIR}/driver)
            #find_module(ascendcl_static libascendcl.a ${ASCEND_ACL_DIR})
        else()
 	    message(STATUS "PLATFORM param is invalid, should be train or inference, you choose nothing!")
            message(STATUS "PLATFORM param is invalid, should be train or inference, you choose nothing!")
        endif()

 	if (ENABLE_GE_COV OR ENABLE_GE_UT)
        if (ENABLE_GE_COV OR ENABLE_GE_UT)
            add_subdirectory(tests)
        endif()

--- a/cmake/external_libs/gflags.cmake
+++ b/cmake/external_libs/gflags.cmake
@@ -23,6 +23,7 @@ ExternalProject_Add(gflags_build
                    URL ${REQ_URL}
                    #URL /home/txd/workspace/linux_cmake/pkg/protobuf-3.8.0.tar.gz
                    #SOURCE_DIR ${GE_CODE_DIR}/../../third_party/gflags/src/gflags-2.2.2 
                    TLS_VERIFY OFF
                    CONFIGURE_COMMAND ${CMAKE_COMMAND} -DCMAKE_CXX_FLAGS=${gflags_CXXFLAGS} -DCMAKE_INSTALL_PREFIX=${CMAKE_INSTALL_PREFIX}/gflags <SOURCE_DIR>
                    BUILD_COMMAND $(MAKE)
                    INSTALL_COMMAND $(MAKE) install
--- a/cmake/external_libs/gtest.cmake
+++ b/cmake/external_libs/gtest.cmake
@@ -10,7 +10,10 @@ if ((${CMAKE_INSTALL_PREFIX} STREQUAL /usr/local) OR
    message(STATUS "No install prefix selected, default to ${CMAKE_INSTALL_PREFIX}.")
 endif()

 if (ENABLE_GITEE)
 if (GE_PB_PKG)
    set(REQ_URL "${GE_PB_PKG}/libs/gtest/release-1.8.0.tar.gz")
    set(MD5 "")
 elseif (ENABLE_GITEE)
    set(REQ_URL "https://gitee.com/mirrors/googletest/repository/archive/release-1.8.0.tar.gz")
    set(MD5 "")
 else()
@@ -22,8 +25,9 @@ set (gtest_CXXFLAGS "-D_GLIBCXX_USE_CXX11_ABI=0 -D_FORTIFY_SOURCE=2 -O2 -fstack-
 set (gtest_CFLAGS "-D_GLIBCXX_USE_CXX11_ABI=0 -D_FORTIFY_SOURCE=2 -O2 -fstack-protector-all -Wl,-z,relro,-z,now,-z,noexecstack")
 ExternalProject_Add(gtest_build
                    URL ${REQ_URL}
                    TLS_VERIFY OFF
                    CONFIGURE_COMMAND ${CMAKE_COMMAND} -DCMAKE_CXX_FLAGS=${gtest_CXXFLAGS} -DCMAKE_INSTALL_PREFIX=${CMAKE_INSTALL_PREFIX}/gtest <SOURCE_DIR>
 		    -DBUILD_TESTING=OFF -DCMAKE_POSITION_INDEPENDENT_CODE=ON -DBUILD_SHARED_LIBS=ON -DCMAKE_MACOSX_RPATH=TRUE -Dgtest_disable_pthreads=ON
                -DBUILD_TESTING=OFF -DCMAKE_POSITION_INDEPENDENT_CODE=ON -DBUILD_SHARED_LIBS=ON -DCMAKE_MACOSX_RPATH=TRUE -Dgtest_disable_pthreads=ON
                    BUILD_COMMAND $(MAKE)
                    INSTALL_COMMAND $(MAKE) install
                    EXCLUDE_FROM_ALL TRUE 
--- a/cmake/external_libs/json.cmake
+++ b/cmake/external_libs/json.cmake
@@ -5,19 +5,24 @@ endif()
 include(ExternalProject)

 set(JSON_SRC_DIR ${CMAKE_BINARY_DIR}/opensrc/json/include)
 #if (ENABLE_GITEE)
 if (GE_PB_PKG)
    set(REQ_URL "${GE_PB_PKG}/libs/ge_nlohmann_json/include.zip")
    set(MD5 "0dc903888211db3a0f170304cd9f3a89")
    set(JSON_INCLUDE_DIR ${JSON_SRC_DIR})
 #elseif (ENABLE_GITEE)
 #    set(REQ_URL "https://gitee.com/mirrors/JSON-for-Modern-CPP/repository/archive/v3.6.1.zip")
 #    set(MD5 "5bda78ce308e6cfcf614dcf1d5ff27a7")
 #    set(JSON_INCLUDE_DIR "${JSON_SRC_DIR}/include")
 #else()
 set(REQ_URL "https://github.com/nlohmann/json/releases/download/v3.6.1/include.zip")
 set(MD5 "0dc903888211db3a0f170304cd9f3a89")
 set(JSON_INCLUDE_DIR ${JSON_SRC_DIR})
 #endif ()
 #set(JSON_INCLUDE_DIR "${JSON_SRC_DIR}/include")
 else()
    set(REQ_URL "https://github.com/nlohmann/json/releases/download/v3.6.1/include.zip")
    set(MD5 "0dc903888211db3a0f170304cd9f3a89")
    set(JSON_INCLUDE_DIR ${JSON_SRC_DIR})
 endif ()
 ExternalProject_Add(json_build
                    URL ${REQ_URL}
                    #URL /home/txd/workspace/cloud_code/pkg/include.zip
                    SOURCE_DIR  ${JSON_SRC_DIR}
                    TLS_VERIFY OFF
                    CONFIGURE_COMMAND ""
                    BUILD_COMMAND ""
                    INSTALL_COMMAND ""
--- a/cmake/external_libs/onnx.cmake
+++ b/cmake/external_libs/onnx.cmake
@@ -6,7 +6,10 @@ set(ONNX_PROTO_DIR ${CMAKE_BINARY_DIR}/onnx)
 set(ONNX_PROTO_FILE ${ONNX_PROTO_DIR}/onnx.proto)
 file(MAKE_DIRECTORY ${ONNX_PROTO_DIR})

 if (ENABLE_GITEE)
 if (GE_PB_PKG)
    set(REQ_URL "${GE_PB_PKG}/libs/onnx/onnx-1.6.0.tar.gz")
    set(MD5 "512f2779d6215d4a36f366b6b9acdf1e")
 elseif (ENABLE_GITEE)
    set(REQ_URL "https://gitee.com/mirrors/ONNX/repository/archive/v1.6.0.tar.gz")
    set(MD5 "1bdbcecdd68ea8392630467646776e02")
 else()
@@ -19,6 +22,7 @@ ExternalProject_Add(onnx
                    #URL /home/txd/workspace/cloud_code/pkg/onnx-1.6.0.tar.gz
                    #URL_HASH SHA256=3b88c3fe521151651a0403c4d131cb2e0311bd28b753ef692020a432a81ce345
                    #SOURCE_DIR ${ONNX_SRC_DIR}
                    TLS_VERIFY OFF
                    CONFIGURE_COMMAND ""
                    BUILD_COMMAND ""
                    #INSTALL_COMMAND "" 
--- a/cmake/external_libs/protobuf_shared.cmake
+++ b/cmake/external_libs/protobuf_shared.cmake
@@ -26,6 +26,7 @@ set(protobuf_CXXFLAGS "-Wno-maybe-uninitialized -Wno-unused-parameter -fPIC -fst
 set(protobuf_LDFLAGS "-Wl,-z,relro,-z,now,-z,noexecstack")
 ExternalProject_Add(protobuf_build
                    URL ${REQ_URL}
                    TLS_VERIFY OFF
                    CONFIGURE_COMMAND ${CMAKE_COMMAND}
                    -Dprotobuf_WITH_ZLIB=OFF
                    -DCMAKE_INSTALL_LIBDIR=${CMAKE_INSTALL_LIBDIR}
--- a/cmake/external_libs/protobuf_static.cmake
+++ b/cmake/external_libs/protobuf_static.cmake
@@ -27,6 +27,7 @@ ExternalProject_Add(protobuf_static_build
                    URL ${REQ_URL}
                    #URL /home/txd/workspace/linux_cmake/pkg/protobuf-3.8.0.tar.gz
                    #SOURCE_DIR ${METADEF_DIR}/../../third_party/protobuf/src/protobuf-3.8.0
                    TLS_VERIFY OFF
                    CONFIGURE_COMMAND ${CMAKE_COMMAND}
                    -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}
                    -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
--- a/cmake/external_libs/protoc.cmake
+++ b/cmake/external_libs/protoc.cmake
@@ -1,115 +1,116 @@
 if (HAVE_PROTOC)
    return()
 endif()

 include(ExternalProject)
 include(GNUInstallDirs)
 #set(CMAKE_INSTALL_PREFIX ${GE_CODE_DIR}/output)

 if ((${CMAKE_INSTALL_PREFIX} STREQUAL /usr/local) OR
    (${CMAKE_INSTALL_PREFIX} STREQUAL "C:/Program Files (x86)/ascend"))
    set(CMAKE_INSTALL_PREFIX ${GE_CODE_DIR}/output CACHE STRING "path for install()" FORCE)
    message(STATUS "No install prefix selected, default to ${CMAKE_INSTALL_PREFIX}.")
 endif()

 if(GE_PB_PKG)
    set(REQ_URL "${GE_PB_PKG}/libs/protobuf/v3.8.0.tar.gz")
 else()
    if (ENABLE_GITEE)
        set(REQ_URL "https://gitee.com/mirrors/protobuf_source/repository/archive/v3.8.0.tar.gz")
        set(MD5 "eba86ae9f07ba5cfbaf8af3bc4e84236")
    else()
        set(REQ_URL "https://github.com/protocolbuffers/protobuf/archive/v3.8.0.tar.gz")
        set(MD5 "3d9e32700639618a4d2d342c99d4507a")
    endif ()
 endif()

 set(protobuf_CXXFLAGS "-Wno-maybe-uninitialized -Wno-unused-parameter -fPIC -fstack-protector-all -D_FORTIFY_SOURCE=2 -D_GLIBCXX_USE_CXX11_ABI=0 -O2")
 set(protobuf_LDFLAGS "-Wl,-z,relro,-z,now,-z,noexecstack")
 ExternalProject_Add(protoc_build
                    URL ${REQ_URL}
                    #URL /home/txd/workspace/linux_cmake/pkg/protobuf-3.8.0.tar.gz
                    #SOURCE_DIR ${GE_CODE_DIR}/../third_party/protobuf/src/protobuf-3.8.0
                    CONFIGURE_COMMAND ${CMAKE_COMMAND} -Dprotobuf_WITH_ZLIB=OFF -Dprotobuf_BUILD_TESTS=OFF -DBUILD_SHARED_LIBS=OFF -DCMAKE_CXX_FLAGS=${protobuf_CXXFLAGS} -DCMAKE_CXX_LDFLAGS=${protobuf_LDFLAGS} -DCMAKE_INSTALL_PREFIX=${CMAKE_INSTALL_PREFIX}/protoc <SOURCE_DIR>/cmake
                    BUILD_COMMAND $(MAKE)
                    INSTALL_COMMAND $(MAKE) install
                    EXCLUDE_FROM_ALL TRUE
 )

 set(PROTOC_PKG_DIR ${CMAKE_INSTALL_PREFIX}/protoc)

 set(protoc_EXECUTABLE ${PROTOC_PKG_DIR}/${CMAKE_INSTALL_BINDIR}/protoc)

 function(protobuf_generate comp c_var h_var)
    if(NOT ARGN)
        message(SEND_ERROR "Error: protobuf_generate() called without any proto files")
        return()
    endif()
    set(${c_var})
    set(${h_var})

    foreach(file ${ARGN})
        get_filename_component(abs_file ${file} ABSOLUTE)
        get_filename_component(file_name ${file} NAME_WE)
        get_filename_component(file_dir ${abs_file} PATH)
        get_filename_component(parent_subdir ${file_dir} NAME)

        if("${parent_subdir}" STREQUAL "proto")
            set(proto_output_path ${CMAKE_BINARY_DIR}/proto/${comp}/proto)
        else()
            set(proto_output_path ${CMAKE_BINARY_DIR}/proto/${comp}/proto/${parent_subdir})
        endif()
        list(APPEND ${c_var} "${proto_output_path}/${file_name}.pb.cc")
        list(APPEND ${h_var} "${proto_output_path}/${file_name}.pb.h")

        add_custom_command(
                OUTPUT "${proto_output_path}/${file_name}.pb.cc" "${proto_output_path}/${file_name}.pb.h"
                WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}
                COMMAND ${CMAKE_COMMAND} -E make_directory "${proto_output_path}"
                COMMAND ${protoc_EXECUTABLE} -I${file_dir} --cpp_out=${proto_output_path} ${abs_file}
                DEPENDS protoc_build ${abs_file}
                COMMENT "Running C++ protocol buffer compiler on ${file}" VERBATIM )
    endforeach()

    set_source_files_properties(${${c_var}} ${${h_var}} PROPERTIES GENERATED TRUE)
    set(${c_var} ${${c_var}} PARENT_SCOPE)
    set(${h_var} ${${h_var}} PARENT_SCOPE)

 endfunction()

 function(protobuf_generate_py comp py_var)
    if(NOT ARGN)
        message(SEND_ERROR "Error: protobuf_generate_py() called without any proto files")
        return()
    endif()
    set(${py_var})

    foreach(file ${ARGN})
        get_filename_component(abs_file ${file} ABSOLUTE)
        get_filename_component(file_name ${file} NAME_WE)
        get_filename_component(file_dir ${abs_file} PATH)
        get_filename_component(parent_subdir ${file_dir} NAME)

        if("${parent_subdir}" STREQUAL "proto")
            set(proto_output_path ${CMAKE_BINARY_DIR}/proto/${comp}/proto)
        else()
            set(proto_output_path ${CMAKE_BINARY_DIR}/proto/${comp}/proto/${parent_subdir})
        endif()
        list(APPEND ${py_var} "${proto_output_path}/${file_name}_pb2.py")

        add_custom_command(
                OUTPUT "${proto_output_path}/${file_name}_pb2.py"
                WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}
                COMMAND ${CMAKE_COMMAND} -E make_directory "${proto_output_path}"
                COMMAND ${protoc_EXECUTABLE} -I${file_dir} --python_out=${proto_output_path} ${abs_file}
                DEPENDS protoc_build ${abs_file}
                COMMENT "Running PYTHON protocol buffer compiler on ${file}" VERBATIM )
    endforeach()

    set_source_files_properties(${${py_var}} PROPERTIES GENERATED TRUE)
    set(${py_var} ${${py_var}} PARENT_SCOPE)

 endfunction()

 #set(HAVE_PROTOC TRUE CACHE BOOL "protoc build add")
 set(HAVE_PROTOC TRUE)
 if (HAVE_PROTOC)
    return()
 endif()

 include(ExternalProject)
 include(GNUInstallDirs)
 #set(CMAKE_INSTALL_PREFIX ${GE_CODE_DIR}/output)

 if ((${CMAKE_INSTALL_PREFIX} STREQUAL /usr/local) OR
    (${CMAKE_INSTALL_PREFIX} STREQUAL "C:/Program Files (x86)/ascend"))
    set(CMAKE_INSTALL_PREFIX ${GE_CODE_DIR}/output CACHE STRING "path for install()" FORCE)
    message(STATUS "No install prefix selected, default to ${CMAKE_INSTALL_PREFIX}.")
 endif()

 if(GE_PB_PKG)
    set(REQ_URL "${GE_PB_PKG}/libs/protobuf/v3.8.0.tar.gz")
 else()
    if (ENABLE_GITEE)
        set(REQ_URL "https://gitee.com/mirrors/protobuf_source/repository/archive/v3.8.0.tar.gz")
        set(MD5 "eba86ae9f07ba5cfbaf8af3bc4e84236")
    else()
        set(REQ_URL "https://github.com/protocolbuffers/protobuf/archive/v3.8.0.tar.gz")
        set(MD5 "3d9e32700639618a4d2d342c99d4507a")
    endif ()
 endif()

 set(protobuf_CXXFLAGS "-Wno-maybe-uninitialized -Wno-unused-parameter -fPIC -fstack-protector-all -D_FORTIFY_SOURCE=2 -D_GLIBCXX_USE_CXX11_ABI=0 -O2")
 set(protobuf_LDFLAGS "-Wl,-z,relro,-z,now,-z,noexecstack")
 ExternalProject_Add(protoc_build
                    URL ${REQ_URL}
                    #URL /home/txd/workspace/linux_cmake/pkg/protobuf-3.8.0.tar.gz
                    #SOURCE_DIR ${GE_CODE_DIR}/../third_party/protobuf/src/protobuf-3.8.0
                    TLS_VERIFY OFF
                    CONFIGURE_COMMAND ${CMAKE_COMMAND} -Dprotobuf_WITH_ZLIB=OFF -Dprotobuf_BUILD_TESTS=OFF -DBUILD_SHARED_LIBS=OFF -DCMAKE_CXX_FLAGS=${protobuf_CXXFLAGS} -DCMAKE_CXX_LDFLAGS=${protobuf_LDFLAGS} -DCMAKE_INSTALL_PREFIX=${CMAKE_INSTALL_PREFIX}/protoc <SOURCE_DIR>/cmake
                    BUILD_COMMAND $(MAKE)
                    INSTALL_COMMAND $(MAKE) install
                    EXCLUDE_FROM_ALL TRUE
 )

 set(PROTOC_PKG_DIR ${CMAKE_INSTALL_PREFIX}/protoc)

 set(protoc_EXECUTABLE ${PROTOC_PKG_DIR}/${CMAKE_INSTALL_BINDIR}/protoc)

 function(protobuf_generate comp c_var h_var)
    if(NOT ARGN)
        message(SEND_ERROR "Error: protobuf_generate() called without any proto files")
        return()
    endif()
    set(${c_var})
    set(${h_var})

    foreach(file ${ARGN})
        get_filename_component(abs_file ${file} ABSOLUTE)
        get_filename_component(file_name ${file} NAME_WE)
        get_filename_component(file_dir ${abs_file} PATH)
        get_filename_component(parent_subdir ${file_dir} NAME)

        if("${parent_subdir}" STREQUAL "proto")
            set(proto_output_path ${CMAKE_BINARY_DIR}/proto/${comp}/proto)
        else()
            set(proto_output_path ${CMAKE_BINARY_DIR}/proto/${comp}/proto/${parent_subdir})
        endif()
        list(APPEND ${c_var} "${proto_output_path}/${file_name}.pb.cc")
        list(APPEND ${h_var} "${proto_output_path}/${file_name}.pb.h")

        add_custom_command(
                OUTPUT "${proto_output_path}/${file_name}.pb.cc" "${proto_output_path}/${file_name}.pb.h"
                WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}
                COMMAND ${CMAKE_COMMAND} -E make_directory "${proto_output_path}"
                COMMAND ${protoc_EXECUTABLE} -I${file_dir} --cpp_out=${proto_output_path} ${abs_file}
                DEPENDS protoc_build ${abs_file}
                COMMENT "Running C++ protocol buffer compiler on ${file}" VERBATIM )
    endforeach()

    set_source_files_properties(${${c_var}} ${${h_var}} PROPERTIES GENERATED TRUE)
    set(${c_var} ${${c_var}} PARENT_SCOPE)
    set(${h_var} ${${h_var}} PARENT_SCOPE)

 endfunction()

 function(protobuf_generate_py comp py_var)
    if(NOT ARGN)
        message(SEND_ERROR "Error: protobuf_generate_py() called without any proto files")
        return()
    endif()
    set(${py_var})

    foreach(file ${ARGN})
        get_filename_component(abs_file ${file} ABSOLUTE)
        get_filename_component(file_name ${file} NAME_WE)
        get_filename_component(file_dir ${abs_file} PATH)
        get_filename_component(parent_subdir ${file_dir} NAME)

        if("${parent_subdir}" STREQUAL "proto")
            set(proto_output_path ${CMAKE_BINARY_DIR}/proto/${comp}/proto)
        else()
            set(proto_output_path ${CMAKE_BINARY_DIR}/proto/${comp}/proto/${parent_subdir})
        endif()
        list(APPEND ${py_var} "${proto_output_path}/${file_name}_pb2.py")

        add_custom_command(
                OUTPUT "${proto_output_path}/${file_name}_pb2.py"
                WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}
                COMMAND ${CMAKE_COMMAND} -E make_directory "${proto_output_path}"
                COMMAND ${protoc_EXECUTABLE} -I${file_dir} --python_out=${proto_output_path} ${abs_file}
                DEPENDS protoc_build ${abs_file}
                COMMENT "Running PYTHON protocol buffer compiler on ${file}" VERBATIM )
    endforeach()

    set_source_files_properties(${${py_var}} PROPERTIES GENERATED TRUE)
    set(${py_var} ${${py_var}} PARENT_SCOPE)

 endfunction()

 #set(HAVE_PROTOC TRUE CACHE BOOL "protoc build add")
 set(HAVE_PROTOC TRUE)
--- a/cmake/external_libs/securec.cmake
+++ b/cmake/external_libs/securec.cmake
@@ -10,11 +10,20 @@ if ((${CMAKE_INSTALL_PREFIX} STREQUAL /usr/local) OR
    message(STATUS "No install prefix selected, default to ${CMAKE_INSTALL_PREFIX}.")
 endif()

 if (GE_PB_PKG)
    set(REQ_URL "${GE_PB_PKG}/libs/securec/v1.1.10.tar.gz")
    set(MD5 "")
 else()
    set(REQ_URL "https://gitee.com/openeuler/libboundscheck/repository/archive/v1.1.10.tar.gz")
    set(MD5 "")
 endif ()

 ExternalProject_Add(c_sec_build
                    URL https://gitee.com/openeuler/libboundscheck/repository/archive/v1.1.10.tar.gz
                    #URL /home/txd/workspace/linux_cmake/pkg/protobuf-3.8.0.tar.gz
                    URL ${REQ_URL}
                    #URL https://gitee.com/openeuler/libboundscheck/repository/archive/v1.1.10.tar.gz
                    #SOURCE_DIR ${GE_CODE_DIR}/../libc_sec
                    PATCH_COMMAND patch -p1 < ${GE_CODE_DIR}/metadef/third_party/patch/securec/0001-add-securec-cmake-script.patch
                    TLS_VERIFY OFF
                    CONFIGURE_COMMAND ${CMAKE_COMMAND}
                    -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}
                    -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
--- a/ge/CMakeLists.txt
+++ b/ge/CMakeLists.txt
@@ -605,7 +605,7 @@ set(INFER_SRC_LIST

 if (NOT ENABLE_D AND NOT ENABLE_ACL AND NOT ENABLE_MS_TESTCASES)
 ############ libge_runner.so ############
 add_library(ge_runner SHARED ${TRAIN_SRC_LIST} ${PROTO_SRCS} ${PROTO_CLIENT_SRCS} $<TARGET_OBJECTS:msprofiler_fwk>)
 add_library(ge_runner SHARED ${TRAIN_SRC_LIST} ${PROTO_SRCS} ${PROTO_CLIENT_SRCS})

 target_compile_definitions(ge_runner PRIVATE
    PROTOBUF_INLINE_NOT_IN_HEADERS=0
@@ -646,11 +646,14 @@ target_include_directories(ge_runner PRIVATE
    ${GE_CODE_DIR}/third_party/fwkacllib/inc/toolchain
 )

 target_link_libraries(ge_runner
 target_link_libraries(ge_runner PRIVATE
    $<BUILD_INTERFACE:intf_pub>
    ge_memory
    adump_server
    static_mmpa
    -Wl,--whole-archive
    msprofiler_fwk
    -Wl,--no-whole-archive
    -Wl,--no-as-needed
    graph
    ge_common
@@ -710,7 +713,7 @@ target_include_directories(ge_compiler PRIVATE
    ${GE_CODE_DIR}/third_party/fwkacllib/inc/toolchain
 )

 target_link_libraries(ge_compiler
 target_link_libraries(ge_compiler PRIVATE
    $<BUILD_INTERFACE:intf_pub>
    ge_memory
    static_mmpa
@@ -764,7 +767,14 @@ target_link_options(opensrc_ascendcl PRIVATE
    -Wl,--allow-multiple-definition
    -Wl,-z,muldefs
    -Wl,-Bsymbolic
    -Wl,--exclude-libs,ALL
    -Wl,--exclude-libs,libascend_protobuf.a
    -Wl,--exclude-libs,libge_executor.a
    -Wl,--exclude-libs,libge_common.a
    -Wl,--exclude-libs,libgraph.a
    -Wl,--exclude-libs,libmmpa.a
    -Wl,--exclude-libs,libregister.a
    -Wl,--exclude-libs,liberror_manager.a
    -Wl,--exclude-libs,libadump_server.a
 )
 target_link_libraries(opensrc_ascendcl PRIVATE
                     -Wl,--whole-archive
--- a/ge/common/profiling/profiling_manager.cc
+++ b/ge/common/profiling/profiling_manager.cc
@@ -143,6 +143,9 @@ ge::Status ProfilingManager::ParseOptions(const std::string &options) {
  }
  try {
    Json prof_options = Json::parse(options);
    if (options.find(kTrainingTrace) == std::string::npos) {
      return ge::SUCCESS;
    }
    const std::string training_trace = prof_options[kTrainingTrace];
    if (training_trace.empty()) {
      GELOGI("Training trace will not take effect.");
--- a/ge/graph/load/new_model_manager/davinci_model.cc
+++ b/ge/graph/load/new_model_manager/davinci_model.cc
@@ -2991,19 +2991,19 @@ Status DavinciModel::CreateKnownZeroCopyMap(const vector<void *> &inputs, const
  return SUCCESS;
 }

 Status DavinciModel::UpdateKnownZeroCopyAddr() {
  for (size_t i = 0; i < total_io_addrs_.size(); ++i) {
    auto it_in = knonw_input_data_info_.find(total_io_addrs_[i]);
 Status DavinciModel::UpdateKnownZeroCopyAddr(vector<void *> &total_io_addrs) {
  for (size_t i = 0; i < total_io_addrs.size(); ++i) {
    auto it_in = knonw_input_data_info_.find(total_io_addrs[i]);
    if (it_in != knonw_input_data_info_.end()) {
      GELOGI("DavinciModel::UpdateKnownZeroCopyAddr input %zu,v addr %p,p addr %p .", i, total_io_addrs_[i],
             knonw_input_data_info_.at(total_io_addrs_[i]));
      total_io_addrs_[i] = knonw_input_data_info_.at(total_io_addrs_[i]);
      GELOGI("DavinciModel::UpdateKnownZeroCopyAddr input %zu,v addr %p,p addr %p .", i, total_io_addrs[i],
             knonw_input_data_info_.at(total_io_addrs[i]));
      total_io_addrs[i] = knonw_input_data_info_.at(total_io_addrs[i]);
    }
    auto it_out = knonw_output_data_info_.find(total_io_addrs_[i]);
    auto it_out = knonw_output_data_info_.find(total_io_addrs[i]);
    if (it_out != knonw_output_data_info_.end()) {
      GELOGI("DavinciModel::UpdateKnownZeroCopyAddr output %zu,v addr %p,p addr %p .", i, total_io_addrs_[i],
             knonw_output_data_info_.at(total_io_addrs_[i]));
      total_io_addrs_[i] = knonw_output_data_info_.at(total_io_addrs_[i]);
      GELOGI("DavinciModel::UpdateKnownZeroCopyAddr output %zu,v addr %p,p addr %p .", i, total_io_addrs[i],
             knonw_output_data_info_.at(total_io_addrs[i]));
      total_io_addrs[i] = knonw_output_data_info_.at(total_io_addrs[i]);
    }
  }
  GELOGI("DavinciModel::UpdateKnownZeroCopyAddr success.");
@@ -3032,7 +3032,7 @@ Status DavinciModel::UpdateKnownNodeArgs(const vector<void *> &inputs, const vec
  } else {
    total_io_addrs_ = orig_total_io_addrs_;
  }
  GE_CHK_STATUS_RET(UpdateKnownZeroCopyAddr(), "DavinciModel::UpdateKnownZeroCopyAddr failed.");
  GE_CHK_STATUS_RET(UpdateKnownZeroCopyAddr(total_io_addrs_), "DavinciModel::UpdateKnownZeroCopyAddr failed.");

  if (total_args_size_ == 0) {
    GELOGW("DavinciModel::UpdateKnownNodeArgs device args %p, dst size %u, pass rtMemcpy.", args_, total_args_size_);
@@ -3099,7 +3099,14 @@ Status DavinciModel::MallocKnownArgs() {
    GELOGE(RT_FAILED, "Call rtMalloc failed, ret: 0x%X", rt_ret);
    return RT_ERROR_TO_GE_STATUS(rt_ret);
  }

  // malloc dynamic and static hybrid memory
  if (total_hybrid_args_size_ != 0) {
    rt_ret = rtMalloc(&hybrid_addrs_, total_hybrid_args_size_, RT_MEMORY_HBM);
    if (rt_ret != RT_ERROR_NONE) {
      GELOGE(RT_FAILED, "Call rtMalloc failed, ret: 0x%X", rt_ret);
      return RT_ERROR_TO_GE_STATUS(rt_ret);
    }
  }
  // malloc fixed addr memory, eg: rts op
  if (total_fixed_addr_size_ != 0) {
    GELOGI("Begin to allocate fixed addr.");
--- a/ge/graph/load/new_model_manager/davinci_model.h
+++ b/ge/graph/load/new_model_manager/davinci_model.h
@@ -476,6 +476,14 @@ class DavinciModel {
  void SetTotalIOAddrs(vector<void *> &io_addrs) {
    total_io_addrs_.insert(total_io_addrs_.end(), io_addrs.begin(), io_addrs.end());
  }
  void SetHybridArgsSize(uint32_t args_size) { total_hybrid_args_size_ += args_size; }
  uint32_t GetHybridArgsSize() {
    return total_hybrid_args_size_;
  }
  void *GetCurrentHybridArgsAddr(uint32_t offset) {
    void *cur_args = static_cast<char *>(hybrid_addrs_) + offset;
    return cur_args;
  }
  void SetTotalFixedAddrsSize(string tensor_name, int64_t fix_addr_size);
  int64_t GetFixedAddrsSize(string tensor_name);
  void *GetCurrentFixedAddr(int64_t offset) const {
@@ -494,7 +502,7 @@ class DavinciModel {
  Status MallocKnownArgs();
  Status UpdateKnownNodeArgs(const vector<void *> &inputs, const vector<void *> &outputs);
  Status CreateKnownZeroCopyMap(const vector<void *> &inputs, const vector<void *> &outputs);
  Status UpdateKnownZeroCopyAddr();
  Status UpdateKnownZeroCopyAddr(vector<void *> &total_io_addrs);
  void SetKnownNodeAddrNotChanged(bool base_addr_not_changed) { base_addr_not_changed_ = base_addr_not_changed; }

  Status GetOrigInputInfo(uint32_t index, OriginInputInfo &orig_input_info);
@@ -977,6 +985,8 @@ class DavinciModel {
  void *args_ = nullptr;
  void *args_host_ = nullptr;
  void *fixed_addrs_ = nullptr;
  void *hybrid_addrs_ = nullptr;
  uint32_t total_hybrid_args_size_ = 0;
  int64_t total_fixed_addr_size_ = 0;
  std::map<const void *, void *> knonw_input_data_info_;
  std::map<const void *, void *> knonw_output_data_info_;
--- a/ge/graph/load/new_model_manager/model_manager.cc
+++ b/ge/graph/load/new_model_manager/model_manager.cc
@@ -1055,7 +1055,16 @@ Status ModelManager::LoadModelOffline(uint32_t &model_id, const ModelData &model
  mmTimespec timespec = mmGetTickCount();

  ModelHelper model_helper;
  Status ret = model_helper.LoadModel(model);
  Status ret = model_helper.LoadRootModel(model);
  if (model_helper.GetModelType()) {
    bool is_shape_unknown = false;
    GE_CHK_STATUS_RET(model_helper.GetGeRootModel()->CheckIsUnknownShape(is_shape_unknown),
                      "CheckIsUnknownShape failed, model id:%u",
                      model_id);
    if (is_shape_unknown || GetContext().GetHostExecFlag()) {
      return DoLoadHybridModelOnline(model_id, model_helper.GetGeRootModel(), listener);
    }
  }
  if (ret != SUCCESS) {
    GELOGE(ret, "load model failed.");
    return ret;
@@ -1214,7 +1223,7 @@ Status ModelManager::ExecuteModel(uint32_t model_id, rtStream_t stream, bool asy

  std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id);
  GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, ACL_ERROR_GE_EXEC_MODEL_ID_INVALID,
                         "Invalid model id %u, check weather model has been loaded or not.", model_id);
                         "Invalid model id %u, check whether model has been loaded or not.", model_id);

  if (davinci_model->NeedDestroyAicpuKernel()) {
    GELOGI("Start to destroy specified aicpu kernel.");
--- a/ge/graph/load/new_model_manager/task_info/kernel_task_info.cc
+++ b/ge/graph/load/new_model_manager/task_info/kernel_task_info.cc
@@ -372,7 +372,11 @@ Status KernelTaskInfo::SuperKernelDistribute() {
 Status KernelTaskInfo::Distribute() {
  GELOGD("KernelTaskInfo Distribute Start.");
  if (davinci_model_->IsKnownNode()) {
    args_ = davinci_model_->GetCurrentArgsAddr(args_offset_);
    if (kernel_type_ == ccKernelType::TE) {
      args_ = davinci_model_->GetCurrentArgsAddr(args_offset_);
    } else if (kernel_type_ == ccKernelType::AI_CPU || kernel_type_ == ccKernelType::CUST_AI_CPU) {
      args_ = davinci_model_->GetCurrentHybridArgsAddr(hybrid_args_offset_);
    }
    GELOGI("Known node %s args addr %p, offset %u.", op_desc_->GetName().c_str(), args_, args_offset_);
  }
  rtError_t rt_ret = RT_ERROR_NONE;
@@ -428,36 +432,31 @@ Status KernelTaskInfo::UpdateArgs() {
  const RuntimeParam &rts_param = davinci_model_->GetRuntimeParam();
  vector<void *> input_data_addrs = ModelUtils::GetInputDataAddrs(rts_param, op_desc_);
  vector<void *> output_data_addrs = ModelUtils::GetOutputDataAddrs(rts_param, op_desc_);
  vector<void *> workspace_data_addrs = ModelUtils::GetWorkspaceDataAddrs(rts_param, op_desc_);

  vector<void *> io_addrs;
  if (!op_desc_->HasAttr(ATTR_DYNAMIC_SHAPE_FIXED_ADDR)) {
    io_addrs.insert(io_addrs.end(), input_data_addrs.begin(), input_data_addrs.end());
    io_addrs.insert(io_addrs.end(), output_data_addrs.begin(), output_data_addrs.end());
  io_addrs.insert(io_addrs.end(), input_data_addrs.begin(), input_data_addrs.end());
  io_addrs.insert(io_addrs.end(), output_data_addrs.begin(), output_data_addrs.end());
  if (kernel_type_ == ccKernelType::TE) {
    vector<void *> workspace_data_addrs = ModelUtils::GetWorkspaceDataAddrs(rts_param, op_desc_);
    io_addrs.insert(io_addrs.end(), workspace_data_addrs.begin(), workspace_data_addrs.end());
  } else {
    string peer_input_name;
    if (AttrUtils::GetStr(op_desc_, ATTR_DYNAMIC_SHAPE_FIXED_ADDR, peer_input_name)) {
      uint32_t output_index = davinci_model_->GetFixedAddrOutputIndex(peer_input_name);
      if (output_index > output_data_addrs.size()) {
        GELOGE(FAILED, "The output data addr size[%zu] and output index[%u] are inconsistent.",
               output_data_addrs.size(), output_index);
        return FAILED;
      }
      io_addrs.insert(io_addrs.end(), input_data_addrs.begin(), input_data_addrs.end());
      for (size_t i = 0; i < output_data_addrs.size(); ++i) {
        if (i == output_index) {
          void *fixed_addr = davinci_model_->GetCurrentFixedAddr(fixed_addr_offset_);
          io_addrs.emplace_back(fixed_addr);
          continue;
        }
        io_addrs.emplace_back(output_data_addrs[i]);
      }
      io_addrs.insert(io_addrs.end(), workspace_data_addrs.begin(), workspace_data_addrs.end());
    davinci_model_->SetTotalIOAddrs(io_addrs);
  } else if (kernel_type_ == ccKernelType::AI_CPU || kernel_type_ == ccKernelType::CUST_AI_CPU) {
    davinci_model_->UpdateKnownZeroCopyAddr(io_addrs);
    uintptr_t io_addr = reinterpret_cast<uintptr_t>(args_addr.get()) + sizeof(aicpu::AicpuParamHead);
    auto addrs_size = sizeof(uint64_t) * io_addrs.size();
    errno_t sec_ret = memcpy_s(reinterpret_cast<void *>(io_addr), addrs_size, io_addrs.data(), addrs_size);
    if (sec_ret != EOK) {
      GELOGE(FAILED, "memcpy failed, ret: %d", sec_ret);
      return FAILED;
    }
    // copy args to device
    rtError_t rt_ret = rtMemcpy(args_, args_size_, args_addr.get(), args_size_, RT_MEMCPY_HOST_TO_DEVICE);
    if (rt_ret != RT_ERROR_NONE) {
      GELOGE(RT_FAILED, "Call rt api(rtMemcpy) failed, ret: 0x%X", rt_ret);
      return RT_ERROR_TO_GE_STATUS(rt_ret);
    }
  }

  davinci_model_->SetTotalIOAddrs(io_addrs);
  GELOGI("KernelTaskInfo::UpdateArgs success.");
  return SUCCESS;
 }
@@ -533,33 +532,18 @@ Status KernelTaskInfo::UpdateL2Data(const domi::KernelDef &kernel_def) {
 }

 Status KernelTaskInfo::CalculateArgs(const domi::TaskDef &task_def, DavinciModel *davinci_model) {
  domi::KernelDef kernel_def = task_def.kernel();
  uint32_t args_size = kernel_def.args_size();
  args_offset_ = davinci_model->GetTotalArgsSize();
  davinci_model->SetTotalArgsSize(args_size);
  GELOGI("kernel task name , args_size %u, args_offset %u", args_size, args_offset_);

  // get opcontext stored in model
  const domi::KernelDef &kernel_def = task_def.kernel();
  const domi::KernelContext &context = kernel_def.context();
  // get opdesc
  op_desc_ = davinci_model->GetOpByIndex(context.op_index());
  GE_CHECK_NOTNULL(op_desc_);
  // alloc fixed addr
  string peer_input_name;
  if (AttrUtils::GetStr(op_desc_, ATTR_DYNAMIC_SHAPE_FIXED_ADDR, peer_input_name) && !peer_input_name.empty()) {
    uint32_t output_index = davinci_model->GetFixedAddrOutputIndex(peer_input_name);
    if (output_index > op_desc_->GetOutputsSize()) {
      GELOGE(FAILED, "The output size[%zu] and output index[%u] are inconsistent.", op_desc_->GetOutputsSize(),
             output_index);
      return FAILED;
    }
    fixed_addr_offset_ = davinci_model->GetFixedAddrsSize(peer_input_name);
    auto tensor_desc = op_desc_->GetOutputDesc(output_index);
    int64_t tensor_size = 0;
    GE_CHK_STATUS(TensorUtils::GetSize(tensor_desc, tensor_size));
    davinci_model->SetTotalFixedAddrsSize(peer_input_name, tensor_size);
    GELOGI("Calculate stream switch task args , tensor size is %ld, fixed addr offset %ld", tensor_size,
           fixed_addr_offset_);
  kernel_type_ = static_cast<ccKernelType>(context.kernel_type());
  if (kernel_type_ == ccKernelType::TE) {
    uint32_t args_size = kernel_def.args_size();
    args_offset_ = davinci_model->GetTotalArgsSize();
    davinci_model->SetTotalArgsSize(args_size);
    GELOGI("kernel task name , args_size %u, args_offset %u", args_size, args_offset_);
  } else if (kernel_type_ == ccKernelType::AI_CPU || kernel_type_ == ccKernelType::CUST_AI_CPU) {
    hybrid_args_offset_ = davinci_model->GetHybridArgsSize();
    davinci_model->SetHybridArgsSize(kernel_def.args_size());
    GELOGI("aicpu kernel task name , args_size %u, args_offset %u", kernel_def.args_size(), hybrid_args_offset_);
  }
  return SUCCESS;
 }
@@ -888,7 +872,7 @@ Status KernelTaskInfo::InitAicpuTask(uint32_t op_index, const domi::KernelDef &k
  }

  // copy args to new host memory
  std::unique_ptr<uint8_t[]> args_addr(new (std::nothrow) uint8_t[args_size_]);
  args_addr = std::unique_ptr<uint8_t[]>(new (std::nothrow) uint8_t[args_size_]);
  GE_PRINT_DYNAMIC_MEMORY(new, "cce task physical memory.", sizeof(uint8_t) * args_size_)
  errno_t sec_ret = memcpy_s(args_addr.get(), args_size_, kernel_def.args().data(), args_size_);
  if (sec_ret != EOK) {
@@ -896,8 +880,23 @@ Status KernelTaskInfo::InitAicpuTask(uint32_t op_index, const domi::KernelDef &k
    return FAILED;
  }

  const RuntimeParam &rts_param = davinci_model_->GetRuntimeParam();
  auto aicpu_param_head = reinterpret_cast<aicpu::AicpuParamHead *>(args_addr.get());
  const auto &ext_info = kernel_def.kernel_ext_info();
  auto init_ret = InitAicpuTaskExtInfo(ext_info);
  if (init_ret != SUCCESS) {
    GELOGE(init_ret, "Init aicpu task ext info failed, ext_info size=%zu", ext_info.size());
    return init_ret;
  }
  GELOGI("Node[%s] type[%s] kernel_ext_info size=%zu, aicpu_ext_info_addr_=%p", op_desc_->GetName().c_str(),
         op_desc_->GetType().c_str(), ext_info.size(), aicpu_ext_info_addr_);

  aicpu_param_head->extInfoAddr = reinterpret_cast<uintptr_t>(aicpu_ext_info_addr_);
  aicpu_param_head->extInfoLength = static_cast<uintptr_t>(ext_info.size());

  if (davinci_model_->IsKnownNode()) {
    return SUCCESS;
  }
  const RuntimeParam &rts_param = davinci_model_->GetRuntimeParam();
  vector<void *> input_addrs = ModelUtils::GetInputDataAddrs(rts_param, op_desc);
  vector<void *> output_addrs = ModelUtils::GetOutputDataAddrs(rts_param, op_desc);
  vector<void *> io_addrs;
@@ -914,19 +913,6 @@ Status KernelTaskInfo::InitAicpuTask(uint32_t op_index, const domi::KernelDef &k
    }
  }

  auto aicpu_param_head = reinterpret_cast<aicpu::AicpuParamHead *>(args_addr.get());
  const auto &ext_info = kernel_def.kernel_ext_info();
  auto init_ret = InitAicpuTaskExtInfo(ext_info);
  if (init_ret != SUCCESS) {
    GELOGE(init_ret, "Init aicpu task ext info failed, ext_info size=%zu", ext_info.size());
    return init_ret;
  }
  GELOGI("Node[%s] type[%s] kernel_ext_info size=%zu, aicpu_ext_info_addr_=%p", op_desc_->GetName().c_str(),
         op_desc_->GetType().c_str(), ext_info.size(), aicpu_ext_info_addr_);

  aicpu_param_head->extInfoAddr = reinterpret_cast<uintptr_t>(aicpu_ext_info_addr_);
  aicpu_param_head->extInfoLength = static_cast<uintptr_t>(ext_info.size());

  // malloc device memory for args
  rtError_t rt_ret = rtMalloc(static_cast<void **>(&args_), args_size_, RT_MEMORY_HBM);
  if (rt_ret != RT_ERROR_NONE) {
--- a/ge/graph/load/new_model_manager/task_info/kernel_task_info.h
+++ b/ge/graph/load/new_model_manager/task_info/kernel_task_info.h
@@ -159,7 +159,9 @@ class KernelTaskInfo : public TaskInfo {
  OpDescPtr op_desc_;
  DavinciModel *davinci_model_;
  uint32_t args_offset_ = 0;
  uint32_t hybrid_args_offset_ = 0;
  int64_t fixed_addr_offset_ = 0;
  std::unique_ptr<uint8_t[]> args_addr = nullptr;
  bool call_save_dump_ = false;

  // aicpu ext_info device mem
--- a/ge/hybrid/executor/node_state.cc
+++ b/ge/hybrid/executor/node_state.cc
@@ -18,6 +18,7 @@
 #include <chrono>
 #include "framework/common/debug/log.h"
 #include "graph/compute_graph.h"
 #include "graph/utils/tensor_utils.h"
 #include "hybrid_execution_context.h"
 #include "subgraph_context.h"

@@ -35,29 +36,31 @@ ShapeInferenceState::ShapeInferenceState(const NodeItem &node_item) : node_item(
         this->num_pending_shapes_);
 }

 Status ShapeInferenceState::UpdateInputShape(int idx,
                                             const GeShape &ori_shape,
                                             const GeShape &shape) {
 Status ShapeInferenceState::UpdateInputShape(int idx, const GeTensorDesc &target) {
  if (node_item.IsInputShapeStatic(idx)) {
    GELOGD("[%s] Trying to update static shape, idx = %d. old shape = [%s], new shape = [%s]",
           node_item.NodeName().c_str(),
           idx,
           node_item.MutableInputDesc(idx)->GetShape().ToString().c_str(),
           shape.ToString().c_str());
           target.GetShape().ToString().c_str());
    return SUCCESS;
  }

  GELOGD("[%s] Update input shape [%d] with Shape: [%s] and OriginalShape: [%s]",
  int64_t tensor_size = -1;
  (void) TensorUtils::GetSize(target, tensor_size);
  GELOGD("[%s] Update input shape [%d] with Shape: [%s] and OriginalShape: [%s], size = %ld",
         node_item.NodeName().c_str(),
         idx,
         shape.ToString().c_str(),
         ori_shape.ToString().c_str());
         target.GetShape().ToString().c_str(),
         target.GetOriginShape().ToString().c_str(),
         tensor_size);

  std::lock_guard<std::mutex> lk(mu_);
  auto tensor_desc = node_item.MutableInputDesc(idx);
  GE_CHECK_NOTNULL(tensor_desc);
  tensor_desc->SetShape(shape);
  tensor_desc->SetOriginShape(ori_shape);
  tensor_desc->SetShape(target.GetShape());
  tensor_desc->SetOriginShape(target.GetOriginShape());
  (void) TensorUtils::SetSize(*tensor_desc, tensor_size);
  if (--num_pending_shapes_ == 0) {
    ready_cv_.notify_all();
  }
@@ -110,24 +113,24 @@ Status ShapeInferenceState::AwaitShapesReady(const GraphExecutionContext &contex
  for (auto &p : shape_futures) {
    auto idx = p.first;
    auto &future = p.second;
    GeShape shape;
    GeShape ori_shape;
    RECORD_SHAPE_INFERENCE_EVENT(&context, node_item.NodeName().c_str(), "[AwaitShape] [idx = %u] Start", idx);
    GE_CHK_STATUS_RET(future.Get(ori_shape, shape),
                      "[%s] Get shape failed. index = %u",
                      node_item.NodeName().c_str(),
                      idx);
    auto src_tensor_desc = future.GetTensorDesc();
    GE_CHECK_NOTNULL(src_tensor_desc);
    RECORD_SHAPE_INFERENCE_EVENT(&context, node_item.NodeName().c_str(), "[AwaitShape] [idx = %u] End", idx);

    GELOGD("[%s] Update input shape [%u] with shape: [%s] and ori_shape: [%s]",
           node_item.NodeName().c_str(),
           idx,
           shape.ToString().c_str(),
           ori_shape.ToString().c_str());
    auto input_desc = node_item.MutableInputDesc(idx);
    GE_CHECK_NOTNULL(input_desc);
    input_desc->SetShape(std::move(shape));
    input_desc->SetOriginShape(ori_shape);
    int64_t tensor_size = -1;
    (void) TensorUtils::GetSize(*src_tensor_desc, tensor_size);
    GELOGD("[%s] Update input shape [%u] with shape: [%s] and ori_shape: [%s], index = %zu",
           node_item.NodeName().c_str(),
           idx,
           src_tensor_desc->GetShape().ToString().c_str(),
           src_tensor_desc->GetOriginShape().ToString().c_str(),
           tensor_size);
    input_desc->SetShape(src_tensor_desc->GetShape());
    input_desc->SetOriginShape(src_tensor_desc->GetOriginShape());
    (void) TensorUtils::SetSize(*input_desc, tensor_size);
  }

  return SUCCESS;
@@ -190,5 +193,14 @@ Status ShapeFuture::Get(GeShape &ori_shape, GeShape &shape) {
  GELOGD("Get shape from %s:%u. shape = [%s]", src_node_->GetName().c_str(), src_index_, shape.ToString().c_str());
  return SUCCESS;
 }

 GeTensorDescPtr ShapeFuture::GetTensorDesc() {
  GELOGD("Start to wait node: %s for getting shape", src_node_->GetName().c_str());
  if (!subgraph_context_->Await(src_node_)) {
    GELOGE(INTERNAL_ERROR, "cancelled");
    return nullptr;
  }
  return src_node_->GetOpDesc()->MutableOutputDesc(src_index_);
 }
 }  // namespace hybrid
 }  // namespace ge
--- a/ge/hybrid/executor/node_state.h
+++ b/ge/hybrid/executor/node_state.h
@@ -35,6 +35,7 @@ class ShapeFuture {
  ShapeFuture(NodePtr src_node, uint32_t src_index, SubgraphContext *subgraph_context);
  ~ShapeFuture() = default;
  Status Get(GeShape &ori_shape, GeShape &shape);
  GeTensorDescPtr GetTensorDesc();

 private:
  NodePtr src_node_;
@@ -45,7 +46,7 @@ class ShapeFuture {
 struct ShapeInferenceState {
  explicit ShapeInferenceState(const NodeItem &node_item);

  Status UpdateInputShape(int idx, const GeShape &ori_shape, const GeShape &shape);
  Status UpdateInputShape(int idx, const GeTensorDesc &tensor_desc);

  void UpdateInputShapeFuture(int idx, ShapeFuture &&future);

--- a/ge/hybrid/executor/subgraph_executor.cc
+++ b/ge/hybrid/executor/subgraph_executor.cc
@@ -96,7 +96,7 @@ Status SubgraphExecutor::InitInputsForUnknownShape(const std::vector<TensorValue
      GE_CHECK_NOTNULL(tensor_desc);
      auto node_state = subgraph_context_->GetOrCreateNodeState(input_node);
      GE_CHECK_NOTNULL(node_state);
      node_state->GetShapeInferenceState().UpdateInputShape(0, tensor_desc->GetOriginShape(), tensor_desc->GetShape());
      node_state->GetShapeInferenceState().UpdateInputShape(0, *tensor_desc);
    }
  }

@@ -268,13 +268,6 @@ Status SubgraphExecutor::PrepareForExecution(GraphExecutionContext *ctx, NodeSta
  } else {
    node_state.SetKernelTask(node_item.kernel_task);
  }

  GELOGD("[%s] Start to invoke CalcOpRunningParam.", node_item.NodeName().c_str());
  RECORD_COMPILE_EVENT(ctx, node_item.NodeName().c_str(), "[CalcOpRunningParam] Start");
  GE_CHK_STATUS_RET(NodeExecutorManager::GetInstance().CalcOpRunningParam(*node_item.node),
                    "[%s] Failed to invoke CalcOpRunningParam.", node_item.NodeName().c_str());
  RECORD_COMPILE_EVENT(ctx, node_item.NodeName().c_str(), "[CalcOpRunningParam] End");
  GELOGD("[%s] Done invoking CalcOpRunningParam successfully.", node_item.NodeName().c_str());
  return SUCCESS;
 }

--- a/ge/hybrid/executor/worker/execution_engine.cc
+++ b/ge/hybrid/executor/worker/execution_engine.cc
@@ -20,12 +20,9 @@
 #include "graph/utils/tensor_adapter.h"
 #include "graph/debug/ge_attr_define.h"
 #include "hybrid/node_executor/node_executor.h"
 #include "common/dump/dump_manager.h"
 #include "hybrid/executor//worker//shape_inference_engine.h"
 #include "common/dump/dump_op.h"
 #include "common/types.h"
 #include "common/ge_types.h"
 #include "common/profiling/profiling_manager.h"
 #include "runtime/base.h"

 namespace ge {
 namespace hybrid {
@@ -348,6 +345,10 @@ Status NodeDoneCallback::OnNodeDone() {
  }

  GE_CHK_STATUS_RET_NOLOG(PrepareConstInputs(node_item));
  if (node_item.shape_inference_type == DEPEND_SHAPE_RANGE || node_item.shape_inference_type == DEPEND_COMPUTE) {
    // update output tensor sizes
    GE_CHK_STATUS_RET_NOLOG(ShapeInferenceEngine::CalcOutputTensorSizes(node_item));
  }
  // PropagateOutputs for type == DEPEND_COMPUTE
  if (node_item.shape_inference_type == DEPEND_COMPUTE) {
    if (graph_context_->trace_enabled) {
--- a/ge/hybrid/executor/worker/shape_inference_engine.cc
+++ b/ge/hybrid/executor/worker/shape_inference_engine.cc
@@ -17,9 +17,15 @@
 #include "hybrid/executor/worker/shape_inference_engine.h"
 #include "graph/shape_refiner.h"
 #include "graph/utils/node_utils.h"
 #include "graph/utils/tensor_utils.h"
 #include "graph/utils/type_utils.h"
 #include "common/math/math_util.h"
 #include "hybrid/node_executor/node_executor.h"

 namespace ge {
 namespace {
 const int kAlignment = 32;
 }
 namespace hybrid {
 ShapeInferenceEngine::ShapeInferenceEngine(GraphExecutionContext *execution_context, SubgraphContext *subgraph_context)
    : execution_context_(execution_context),
@@ -40,7 +46,9 @@ Status ShapeInferenceEngine::InferShape(NodeState &node_state) {
  }

  if (node_item.fused_subgraph != nullptr) {
    return InferShapeForSubgraph(node_item, *node_item.fused_subgraph);
    GE_CHK_STATUS_RET_NOLOG(InferShapeForSubgraph(node_item, *node_item.fused_subgraph));
    GE_CHK_STATUS_RET_NOLOG(CalcOutputTensorSizes(node_item));
    return SUCCESS;
  }

  // Skip shape inference for node of type DEPEND_COMPUTE
@@ -63,21 +71,15 @@ Status ShapeInferenceEngine::InferShape(NodeState &node_state) {
    std::lock_guard<std::mutex> lk(mu_);
    RECORD_SHAPE_INFERENCE_EVENT(execution_context_, node_item.NodeName().c_str(), "[InferShapeAndType] Start");
    GE_CHK_STATUS_RET(ShapeRefiner::InferShapeAndTypeForRunning(node_item.node, true),
        "Invoke InferShapeAndType failed.");
                      "Invoke InferShapeAndType failed.");
    RECORD_SHAPE_INFERENCE_EVENT(execution_context_, node_item.NodeName().c_str(), "[InferShapeAndType] End");
  }
  // Check again to make sure shape is valid after shape inference
  if (node_item.shape_inference_type != DEPEND_SHAPE_RANGE) {
    bool is_unknown_shape = false;
    GE_CHK_STATUS_RET(NodeUtils::GetNodeUnknownShapeStatus(*node_item.node, is_unknown_shape),
                      "Failed to get shape status. node = %s",
                      node_item.NodeName().c_str());

    GE_CHK_BOOL_RET_STATUS(!is_unknown_shape,
                           INTERNAL_ERROR,
                           "[%s] Shape is still unknown after shape inference.",
                           node_item.NodeName().c_str());
  }
  // update output tensor sizes after shape inference
  // error if shape is still unknown and not of type DEPEND_SHAPE_RANGE
  RECORD_COMPILE_EVENT(execution_context_, node_item.NodeName().c_str(), "[CalcOpRunningParam] Start");
  GE_CHK_STATUS_RET_NOLOG(CalcOutputTensorSizes(node_item, node_item.shape_inference_type == DEPEND_SHAPE_RANGE));
  RECORD_COMPILE_EVENT(execution_context_, node_item.NodeName().c_str(), "[CalcOpRunningParam] End");

  GELOGD("[%s] [HybridTrace] After shape inference. Node = %s",
         node_item.NodeName().c_str(),
@@ -127,8 +129,6 @@ Status ShapeInferenceEngine::PropagateOutputShapes(const NodeItem &node_item) {
  // propagate each output
  for (int i = 0; i < node_item.num_outputs; ++i) {
    auto output_desc = node_item.op_desc->MutableOutputDesc(i);
    const auto &shape = output_desc->MutableShape();
    const auto &ori_shape = output_desc->GetOriginShape();
    auto &output_nodes = node_item.outputs[i];

    // propagate output to all sub-inputs
@@ -149,9 +149,7 @@ Status ShapeInferenceEngine::PropagateOutputShapes(const NodeItem &node_item) {
        infer_state.UpdateInputShapeFuture(dst_input_index_and_node.first,
                                           std::move(future));
      } else {
        GE_CHK_STATUS_RET_NOLOG(infer_state.UpdateInputShape(dst_input_index_and_node.first,
                                                             ori_shape,
                                                             shape));
        GE_CHK_STATUS_RET_NOLOG(infer_state.UpdateInputShape(dst_input_index_and_node.first, *output_desc));
      }
    }
  }
@@ -230,5 +228,92 @@ Status ShapeInferenceEngine::UpdatePeerNodeShape(const Node &node) {
  }
  return SUCCESS;
 }

 Status ShapeInferenceEngine::CanonicalizeShape(GeTensorDesc &tensor_desc,
                                               std::vector<int64_t> &shape,
                                               bool fallback_with_range) {
  const auto &tensor_shape = tensor_desc.MutableShape();
  if (tensor_shape.IsUnknownShape()) {
    if (!fallback_with_range) {
      GELOGE(INTERNAL_ERROR, "Output shape is still unknown after shape inference. shape = [%s]",
             tensor_shape.ToString().c_str());
      return INTERNAL_ERROR;
    }

    GELOGD("Calc output size by range");
    std::vector<std::pair<int64_t, int64_t>> shape_range;
    GE_CHK_GRAPH_STATUS_RET(tensor_desc.GetShapeRange(shape_range), "Failed to get shape range");
    if (shape_range.size() != shape.size()) {
      GELOGE(INTERNAL_ERROR, "Number of shape ranges (%zu) mismatches that of dims (%zu)",
             shape_range.size(),
             shape.size());
      return INTERNAL_ERROR;
    }

    for (size_t dim_index = 0; dim_index < shape.size(); ++dim_index) {
      if (shape[dim_index] == ge::UNKNOWN_DIM) {
        shape[dim_index] = shape_range[dim_index].second;
      }
    }

    GELOGD("After canonicalization, shape = [%s], before = [%s]",
           GeShape(shape).ToString().c_str(),
           tensor_shape.ToString().c_str());
  }

  return SUCCESS;
 }

 Status ShapeInferenceEngine::CalcTensorSize(DataType data_type,
                                            const std::vector<int64_t> &shape,
                                            int64_t &tensor_size) {
  GELOGD("To calc tensor size by shape = [%s]", GeShape(shape).ToString().c_str());
  uint32_t type_size;
  if (!TypeUtils::GetDataTypeLength(data_type, type_size)) {
    GELOGE(INTERNAL_ERROR, "Failed to get data type size");
    return INTERNAL_ERROR;
  }

  tensor_size = type_size;
  for (const auto &dim : shape) {
    GE_CHECK_GE(dim, 0);
    GE_CHK_STATUS_RET(Int64MulCheckOverflow(tensor_size, dim),
                      "Shape size overflow, shape = [%s]",
                      GeShape(shape).ToString().c_str());
    tensor_size *= dim;
  }

  GE_CHK_STATUS_RET(CheckInt64AddOverflow(tensor_size, kAlignment - 1),
                    "Tensor size is too large: %ld, shape = [%s]",
                    tensor_size,
                    GeShape(shape).ToString().c_str());
  tensor_size = (tensor_size + kAlignment - 1) / kAlignment * kAlignment;
  return SUCCESS;
 }

 Status ShapeInferenceEngine::CalcOutputTensorSizes(const NodeItem &node_item, bool fallback_with_range) {
  auto op_desc = node_item.GetOpDesc();
  for (size_t output_index = 0; output_index < op_desc->GetOutputsSize(); ++output_index) {
    auto tensor_desc = op_desc->MutableOutputDesc(output_index);
    GE_CHECK_NOTNULL(tensor_desc);
    const auto &shape = tensor_desc->MutableShape();
    // modify on copy
    auto dims = shape.GetDims();
    GE_CHK_STATUS_RET(CanonicalizeShape(*tensor_desc, dims, fallback_with_range),
                      "[%s] Failed to canonicalize shape for output %zu",
                      node_item.NodeName().c_str(),
                      output_index);

    int64_t tensor_size;
    GE_CHK_STATUS_RET(CalcTensorSize(tensor_desc->GetDataType(), dims, tensor_size),
                      "[%s] Failed to calc tensor size for output %zu",
                      node_item.NodeName().c_str(),
                      output_index);
    GELOGD("[%s] Tensor size of output %zu = %ld", node_item.NodeName().c_str(), output_index, tensor_size);
    (void) TensorUtils::SetSize(*tensor_desc, tensor_size);
  }

  return SUCCESS;
 }
 }  // namespace hybrid
 }  // namespace ge
--- a/ge/hybrid/executor/worker/shape_inference_engine.h
+++ b/ge/hybrid/executor/worker/shape_inference_engine.h
@@ -34,7 +34,11 @@ class ShapeInferenceEngine {

  Status PropagateOutputShapes(const NodeItem &node_item);

  static Status CalcOutputTensorSizes(const NodeItem &node_item, bool fallback_with_range = false);

 private:
  static Status CanonicalizeShape(GeTensorDesc &tensor_desc, std::vector<int64_t> &shape, bool fallback_with_range);
  static Status CalcTensorSize(DataType data_type, const std::vector<int64_t> &shape, int64_t &tensor_size);
  static Status UpdatePeerNodeShape(const Node &node);
  Status AwaitDependentNodes(NodeState &node_state);

--- a/ge/hybrid/model/node_item.cc
+++ b/ge/hybrid/model/node_item.cc
@@ -22,6 +22,7 @@
 #include "graph/debug/ge_attr_define.h"
 #include "graph/utils/node_utils.h"
 #include "hybrid/node_executor/node_executor.h"
 #include "hybrid/executor/worker/shape_inference_engine.h"

 namespace ge {
 namespace hybrid {
@@ -47,7 +48,7 @@ Status ParseInputMapping(Node &node, OpDesc &op_desc, FusedSubgraph &fused_subgr
    GE_CHECK_NOTNULL(dst_op_desc);
    auto in_idx = node_and_anchor.second->GetIdx();
    auto tensor_desc = dst_op_desc->MutableInputDesc(in_idx);
    fused_subgraph.input_mapping[parent_index].emplace_back(tensor_desc);
    fused_subgraph.input_mapping[static_cast<int>(parent_index)].emplace_back(tensor_desc);
    GELOGD("Input[%u] mapped to [%s:%u]", parent_index, dst_op_desc->GetName().c_str(), in_idx);
  }

@@ -64,7 +65,7 @@ Status ParseOutputMapping(const OpDescPtr &op_desc, FusedSubgraph &fused_subgrap
    return FAILED;
  }

  fused_subgraph.output_mapping.emplace(parent_index, op_desc);
  fused_subgraph.output_mapping.emplace(static_cast<int>(parent_index), op_desc);
  return SUCCESS;
 }

@@ -126,12 +127,7 @@ Status NodeItem::Create(const NodePtr &node, std::unique_ptr<NodeItem> &node_ite
  return SUCCESS;
 }

 Status NodeItem::Init() {
  GE_CHECK_LE(op_desc->GetInputsSize(), INT32_MAX);
  GE_CHECK_LE(op_desc->GetOutputsSize(), INT32_MAX);
  num_inputs = static_cast<int>(op_desc->GetInputsSize());
  num_outputs = static_cast<int>(op_desc->GetOutputsSize());

 void NodeItem::ResolveOptionalInputs() {
  if (op_desc->GetAllInputsSize() != op_desc->GetInputsSize()) {
    has_optional_inputs = true;
    for (size_t i = 0; i < op_desc->GetAllInputsSize(); ++i) {
@@ -143,7 +139,18 @@ Status NodeItem::Init() {
      }
    }
  }
 }

 Status NodeItem::InitInputsAndOutputs() {
  GE_CHECK_LE(op_desc->GetInputsSize(), INT32_MAX);
  GE_CHECK_LE(op_desc->GetOutputsSize(), INT32_MAX);
  num_inputs = static_cast<int>(op_desc->GetInputsSize());
  num_outputs = static_cast<int>(op_desc->GetOutputsSize());
  ResolveOptionalInputs();
  return SUCCESS;
 }

 Status NodeItem::ResolveDynamicState() {
  (void) AttrUtils::GetBool(op_desc, ATTR_NAME_FORCE_UNKNOWN_SHAPE, is_dynamic);
  GELOGD("node name = %s, is_dynamic = %d.", this->node_name.c_str(), is_dynamic);
  if (!is_dynamic) {
@@ -151,38 +158,54 @@ Status NodeItem::Init() {
                      "[%s] Failed to get shape status.",
                      node->GetName().c_str());
  }
  return SUCCESS;
 }

  if (is_dynamic) {
    for (int i = 0; i < num_inputs; ++i) {
      const auto &input_desc = MutableInputDesc(i);
      GE_CHECK_NOTNULL(input_desc);
      if (input_desc->MutableShape().IsUnknownShape()) {
        is_input_shape_static_.push_back(false);
      } else {
        num_static_input_shapes++;
        is_input_shape_static_.push_back(true);
        GELOGD("[%s] The shape of input[%d] is static. shape = [%s]",
               NodeName().c_str(), i, input_desc->MutableShape().ToString().c_str());
      }
 Status NodeItem::ResolveStaticInputsAndOutputs() {
  for (int i = 0; i < num_inputs; ++i) {
    const auto &input_desc = MutableInputDesc(i);
    GE_CHECK_NOTNULL(input_desc);
    if (input_desc->MutableShape().IsUnknownShape()) {
      is_input_shape_static_.push_back(false);
    } else {
      num_static_input_shapes++;
      is_input_shape_static_.push_back(true);
      GELOGD("[%s] The shape of input[%d] is static. shape = [%s]",
             NodeName().c_str(), i, input_desc->MutableShape().ToString().c_str());
    }
  }

    for (int i = 0; i < num_outputs; ++i) {
      const auto &output_desc = op_desc->MutableOutputDesc(i);
      GE_CHECK_NOTNULL(output_desc);
      if (output_desc->MutableShape().IsUnknownShape()) {
        is_output_shape_static = false;
        break;
      }
  for (int i = 0; i < num_outputs; ++i) {
    const auto &output_desc = op_desc->MutableOutputDesc(i);
    GE_CHECK_NOTNULL(output_desc);
    if (output_desc->MutableShape().IsUnknownShape()) {
      is_output_shape_static = false;
      break;
    }
  }

    if (IsControlOp() || node_type == PARTITIONEDCALL) {
      shape_inference_type = DEPEND_COMPUTE;
    } else {
      int32_t unknown_shape_type_val = 0;
      (void) AttrUtils::GetInt(op_desc, ::ge::ATTR_NAME_UNKNOWN_SHAPE_TYPE, unknown_shape_type_val);
      shape_inference_type = static_cast<UnknowShapeOpType>(unknown_shape_type_val);
    }
  if (is_output_shape_static) {
    GE_CHK_STATUS_RET_NOLOG(ShapeInferenceEngine::CalcOutputTensorSizes(*this));
  }
  return SUCCESS;
 }

 void NodeItem::ResolveUnknownShapeType() {
  if (IsControlOp() || node_type == PARTITIONEDCALL) {
    shape_inference_type = DEPEND_COMPUTE;
  } else {
    int32_t unknown_shape_type_val = 0;
    (void) AttrUtils::GetInt(op_desc, ::ge::ATTR_NAME_UNKNOWN_SHAPE_TYPE, unknown_shape_type_val);
    shape_inference_type = static_cast<UnknowShapeOpType>(unknown_shape_type_val);
  }
 }

 Status NodeItem::Init() {
  GE_CHK_STATUS_RET_NOLOG(InitInputsAndOutputs());
  GE_CHK_STATUS_RET_NOLOG(ResolveDynamicState());
  if (is_dynamic) {
    ResolveUnknownShapeType();
    GE_CHK_STATUS_RET_NOLOG(ResolveStaticInputsAndOutputs());
    GE_CHK_STATUS_RET(ParseFusedSubgraph(*this), "[%s] Failed to parse fused subgraph", node_name.c_str());
  }

--- a/ge/hybrid/model/node_item.h
+++ b/ge/hybrid/model/node_item.h
@@ -103,6 +103,11 @@ struct NodeItem {
 private:
  explicit NodeItem(NodePtr node);
  Status Init();
  Status InitInputsAndOutputs();
  void ResolveOptionalInputs();
  Status ResolveDynamicState();
  Status ResolveStaticInputsAndOutputs();
  void ResolveUnknownShapeType();

  std::vector<bool> is_input_shape_static_;
  std::vector<uint32_t> input_desc_indices_;
--- a/ge/hybrid/node_executor/task_context.cc
+++ b/ge/hybrid/node_executor/task_context.cc
@@ -148,6 +148,10 @@ Status TaskContext::AllocateWorkspaces() {
 }

 Status TaskContext::RegisterCallback(const std::function<void()> &callback_fun) const {
  if (callback_fun == nullptr) {
    GELOGW("[%s] Callback is NULL", GetNodeName());
    return SUCCESS;
  }
  auto ret = execution_context_->callback_manager->RegisterCallback(callback_fun);
  if (ret != SUCCESS) {
    GELOGE(ret, "[%s] Failed to register callback", GetNodeName());
@@ -384,6 +388,20 @@ const char *TaskContext::GetNodeName() const {
  return node_item_->NodeName().c_str();
 }

 void TaskContext::ReleaseInputsAndOutputs() {
  for (int i = 0; i < node_item_->num_inputs; ++i) {
    auto tensor = inputs_start_ + i;
    tensor->Destroy();
    GELOGD("[%s] Tensor of input[%d] released", GetNodeName(), i);
  }

  for (int i = 0; i < node_item_->num_outputs; ++i) {
    auto tensor = outputs_start_ + i;
    tensor->Destroy();
    GELOGD("[%s] Tensor of output[%d] released", GetNodeName(), i);
  }
 }

 void TaskContext::ReleaseInput(int index) {
  auto input_tensor = MutableInput(index);
  if (input_tensor != nullptr) {
@@ -456,5 +474,9 @@ Status TaskContext::TryExecuteCallback(const function<void()> &callback_fun) con
 const DumpProperties &TaskContext::GetDumpProperties() const {
  return execution_context_->dump_properties;
 }

 bool TaskContext::NeedCallback() {
  return node_item_->has_observer || IsDumpEnabled() || execution_context_->profiling_level > 0;
 }
 }  // namespace hybrid
 }  // namespace ge
--- a/ge/hybrid/node_executor/task_context.h
+++ b/ge/hybrid/node_executor/task_context.h
@@ -50,6 +50,8 @@ class TaskContext {
  ConstGeTensorDescPtr GetOutputDesc(int index) const;
  GeTensorDescPtr MutableInputDesc(int index) const;
  GeTensorDescPtr MutableOutputDesc(int index) const;
  void ReleaseInputsAndOutputs();
  bool NeedCallback();
  void ReleaseInput(int index);
  const TensorValue *GetInput(int index) const;
  const TensorValue *GetOutput(int index) const;
--- a/ge/ir_build/ge_ir_build.cc
+++ b/ge/ir_build/ge_ir_build.cc
@@ -227,7 +227,6 @@ class Impl {
  ~Impl() { (void)generator_.Finalize(); };
  graphStatus CheckOptions(const std::map<std::string, std::string> &options);
  graphStatus CreateInputsForIRBuild(const ge::Graph &graph, vector<ge::GeTensor> &inputs);
  graphStatus GetDefaultInputShape(const Graph &graph, string &default_shape);
  graphStatus UpdateDataOpAttr(const Graph &graph);
  graphStatus Init(const Graph &graph, const std::map<std::string, std::string> &options);
  graphStatus BuildModel(const Graph &graph, const std::map<std::string, std::string> &options,
@@ -321,42 +320,6 @@ graphStatus Impl::CheckOptions(const std::map<std::string, std::string> &options
  return GRAPH_SUCCESS;
 }

 graphStatus Impl::GetDefaultInputShape(const Graph &graph, string &default_shape) {
  auto compute_graph = ge::GraphUtils::GetComputeGraph(graph);
  GE_CHECK_NOTNULL(compute_graph);
  for (ge::NodePtr &input_node : compute_graph->GetDirectNode()) {
    GE_CHECK_NOTNULL(input_node);
    ge::OpDescPtr op = input_node->GetOpDesc();
    GE_CHECK_NOTNULL(op);
    if (op->GetType() == DATA) {
      string data_op_name = op->GetName();
      GELOGD("Data op name: %s, data op inputDesc size: %zu", data_op_name.c_str(), op->GetAllInputsDesc().size());
      ge::GeTensorDesc tensor = op->GetInputDesc(0);
      ge::GeShape data_shape = tensor.GetShape();
      GELOGD("Data op get shape from InputDesc in ge ir graph.");

      string tmp_shape_str;
      const std::vector<int64_t> &tmp_shape = data_shape.GetDims();
      if (tmp_shape.empty()) {
        GELOGW("Data op: %s has zero shape dims!", data_op_name.c_str());
      } else {
        tmp_shape_str += data_op_name + ":";
        for (auto tmp_dim : tmp_shape) {
          tmp_shape_str += to_string((long)tmp_dim) + ",";
        }
        tmp_shape_str = tmp_shape_str.substr(0, tmp_shape_str.size() - 1);
        tmp_shape_str += ";";
        default_shape += tmp_shape_str;
      }

      GELOGD("Data op name: %s, data shape: %s.", data_op_name.c_str(), tmp_shape_str.c_str());
    }
  }
  default_shape = (default_shape.empty() ? default_shape : default_shape.substr(0, default_shape.size() - 1));
  GELOGI("Get default data op shape: %s from ge ir graph.", default_shape.c_str());
  return GRAPH_SUCCESS;
 }

 graphStatus Impl::Init(const Graph &graph, const std::map<std::string, std::string> &options) {
  // 1. check options
  graphStatus ret = CheckOptions(options);
@@ -378,13 +341,7 @@ graphStatus Impl::Init(const Graph &graph, const std::map<std::string, std::stri
  GE_CHK_BOOL_RET_STATUS_NOLOG(ge::CheckLogParamValidAndSetLogLevel(log) == 0, GRAPH_PARAM_INVALID);
  options_[ge::ir_option::LOG_LEVEL] = log;

  string input_shape;
  if (options_.find("input_shape") == options_.end()) {
    GE_CHK_BOOL_EXEC(GetDefaultInputShape(graph, input_shape) == ge::SUCCESS,
                     return ge::GRAPH_PARAM_INVALID, "Get default data op shape from graph failed!");
  } else {
    input_shape = options_["input_shape"];
  }
  string input_shape = options_.find("input_shape") == options_.end() ? "" : options_["input_shape"];
  string input_format = options_.find("input_format") == options_.end() ? "" : options_["input_format"];
  string net_format = options_.find("net_format") == options_.end() ? "" : options_["net_format"];
  string dynamic_batch_size = options_.find(ge::ir_option::DYNAMIC_BATCH_SIZE) == options_.end()
--- a/inc/framework/omg/parser/model_parser.h
+++ b/inc/framework/omg/parser/model_parser.h
@@ -36,7 +36,7 @@ using Status = domi::Status;

 namespace domi {
 using GetGraphCallback = std::function<std::unique_ptr<google::protobuf::Message>(
    const google::protobuf::Message *root_proto, const std::string &graph)>;
  const google::protobuf::Message *root_proto, const std::string &graph)>;
 class ModelParser {
 public:
  ModelParser() {}
@@ -44,19 +44,20 @@ class ModelParser {
  virtual ~ModelParser() {}

  /**
  * @ingroup domi_omg
  * @brief Analyze network model data
  * @param [in] file  Network model file path
  * @param [in|out]  graph Save the network information after analysis
  * @return SUCCESS
  * @return Others failed
  */
   * @ingroup domi_omg
   * @brief Analyze network model data
   * @param [in] file  Network model file path
   * @param [in|out]  graph Save the network information after analysis
   * @return SUCCESS
   * @return Others failed
   */
  virtual Status Parse(const char *file, ge::Graph &graph) = 0;

  /**
   * @ingroup domi_omg
   * @brief Parse relevant data from memory and save it to graph
   * @param [in] input Model file memory data
   * @param [in] input Model file memory size
   * @param [in|out] graph A graph for saving the model information after analysis
   * @return SUCCESS
   * @return FAILED
@@ -64,36 +65,49 @@ class ModelParser {
   */
  virtual Status ParseFromMemory(const char *data, uint32_t size, ge::ComputeGraphPtr &graph) = 0;

 #ifndef ONLY_COMPILE_OPEN_SRC
  /**
   * @ingroup domi_omg
   * @brief Parse relevant data from memory and save it to graph
   * @param [in] input Model file memory data
   * @param [in] input Model file memory size
   * @param [in|out] graph A graph for saving the model information after analysis
   * @return SUCCESS
   * @return FAILED
   * @author
   */
  virtual Status ParseFromMemory(const char *data, uint32_t size, ge::Graph &graph) = 0;
 #endif

  /**
  * @ingroup domi_omg
  * @brief Analyze network model data
  * @param [in] proto  network model
  * @param [in|out]  graph Save the network information after analysis
  * @return SUCCESS
  * @return Others failed
  */
   * @ingroup domi_omg
   * @brief Analyze network model data
   * @param [in] proto  network model
   * @param [in|out]  graph Save the network information after analysis
   * @return SUCCESS
   * @return Others failed
   */
  virtual Status ParseProto(const google::protobuf::Message *proto, ge::ComputeGraphPtr &graph) = 0;

  /**
  * @ingroup domi_omg
  * @brief Analyze callback model data in subgraph
  * @param [in] proto network model
  * @param [in] callback callback of subgraph
  * @param [in|out] graph Save the network information after analysis
  * @return SUCCESS
  * @return Others failed
  */
  virtual Status ParseProtoWithSubgraph(const google::protobuf::Message *proto,
                                        GetGraphCallback callback,
   * @ingroup domi_omg
   * @brief Analyze callback model data in subgraph
   * @param [in] proto network model
   * @param [in] callback callback of subgraph
   * @param [in|out] graph Save the network information after analysis
   * @return SUCCESS
   * @return Others failed
   */
  virtual Status ParseProtoWithSubgraph(const google::protobuf::Message *proto, GetGraphCallback callback,
                                        ge::ComputeGraphPtr &graph) = 0;
  /**
  * @ingroup domi_omg
  * @brief Convert model files to JSON format
  * @param [in] model_file  Model file path to be converted
  * @param [out] json_file Converted JSON file path
  * @return SUCCESS
  * @return Others failed
  */
   * @ingroup domi_omg
   * @brief Convert model files to JSON format
   * @param [in] model_file  Model file path to be converted
   * @param [out] json_file Converted JSON file path
   * @return SUCCESS
   * @return Others failed
   */
  virtual Status ToJson(const char *model_file, const char *json_file) { return domi::SUCCESS; }

  /*
--- a/+ 1
+++ b/+ 1
@@ -1 +1 @@
 Subproject commit dba83744a3ffe3d5f89496e69bb65c50f800c299
 Subproject commit 129b50b41f79d0dfeb9fe8987b1c19c9ac51eb8b
--- a/+ 1
+++ b/+ 1
@@ -1 +1 @@
 Subproject commit ce574894f13cd94749d1a3964a13e8c97c20434a
 Subproject commit e9f7d0197aba57eb5247cb1e029c10e393631c89
--- a/third_party/fwkacllib/inc/aicpu/aicpu_schedule/aicpu_op_type_list.h
+++ b/third_party/fwkacllib/inc/aicpu/aicpu_schedule/aicpu_op_type_list.h
@@ -0,0 +1,60 @@
 /**
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 #ifndef AICPU_OP_TYPE_LIST_H_
 #define AICPU_OP_TYPE_LIST_H_

 enum OpKernelType {
    TF_KERNEL,
    CPU_KERNEL
 };

 enum ReturnCode {
    OP_TYPE_NOT_SUPPORT,
    FORMAT_NOT_SUPPORT,
    DTYPE_NOT_SUPPORT
 };

 #pragma pack(push, 1)
 //One byte alignment
 struct SysOpInfo {
    uint64_t opLen;
    uint64_t opType;
    OpKernelType kernelsType;
 };

 struct OpParamInfo {
    uint64_t num;
    uint64_t dtypeList;
    uint64_t formatList;
 };

 struct SysOpCheckInfo {
    uint64_t opListNum;
    uint64_t offSetLen;
    uint64_t sysOpInfoList;
    uint64_t opParamInfoList;
 };

 struct SysOpCheckResp {
    uint64_t opListNum;
    bool isWithoutJson;
    uint64_t returnCodeList;
    uint64_t sysOpInfoList;
    uint64_t opParamInfoList;
 };
 #pragma pack(pop)
 #endif  // AICPU_OP_TYPE_LIST_H_
--- a/third_party/fwkacllib/inc/aicpu/common/aicpu_task_struct.h
+++ b/third_party/fwkacllib/inc/aicpu/common/aicpu_task_struct.h
@@ -21,13 +21,15 @@

 namespace aicpu {

 #pragma pack(push, 1)
 struct AicpuParamHead
 {
    uint32_t        length;                    // Total length: include cunstom message
    uint32_t        ioAddrNum;                 // Input and output address number
    uint32_t        extInfoLength;             // extInfo struct Length
    uint64_t        extInfoAddr;               // extInfo address
 } __attribute__ ((packed));
 };
 #pragma pack(pop)

 }  // namespace aicpu

--- a/third_party/fwkacllib/inc/cce/aicpu_engine.h
+++ b/third_party/fwkacllib/inc/cce/aicpu_engine.h
@@ -13,10 +13,11 @@
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 #ifndef AICPU_ENGINE_H__
 #define AICPU_ENGINE_H__

 #include <stdint.h>

 #ifdef __cplusplus
 extern "C" {
 #endif
@@ -36,12 +37,23 @@ typedef enum {
 /**
 * @ingroup aicpu engine
 * @brief aeCallInterface:
 *          a interface to call  a function in a op kernfel lib
 *          a interface to call a function in a op kernfel lib
 * @param [in] addr     void *,  should be STR_KERNEL * format
 * @return aeStatus_t
 */
 aeStatus_t aeCallInterface(void *addr);

 /**
 * @ingroup aicpu engine
 * @brief aeBatchLoadKernelSo:
 *          a interface to load kernel so
 * @param [in] loadSoNum  load so number
 * @param [in] soPaths    load so paths
 * @param [in] soNames    load so names
 * @return aeStatus_t
 */
 aeStatus_t aeBatchLoadKernelSo(const uint32_t loadSoNum, const char *soPaths[], const char *soNames[]);

 #ifdef __cplusplus
 }
 #endif
--- a/third_party/fwkacllib/inc/cce/aicpu_engine_struct.h
+++ b/third_party/fwkacllib/inc/cce/aicpu_engine_struct.h
@@ -33,18 +33,22 @@ typedef enum {
  FMK_KERNEL_TYPE_RESERVED
 } FwkkernelType_t;

 #pragma pack(push, 1)
 typedef struct {
  uint32_t fwkKernelType;  // FwkkernelType_t
  union {
    ::aicpu::FWKAdapter::FWKOperateParam fwk_kernel;
  } fwkKernelBase;
 } __attribute__((packed)) STR_FWK_OP_KERNEL;
 } STR_FWK_OP_KERNEL;
 #pragma pack(pop)

 #pragma pack(push, 1)
 struct SessionInfo {
  uint64_t sessionId;
  uint64_t kernelId;
  bool sessFlag;
 } __attribute__((packed));
 };
 #pragma pack(pop)

 #ifdef __cplusplus
 }
--- a/third_party/fwkacllib/inc/cce/fwk_adpt_struct.h
+++ b/third_party/fwkacllib/inc/cce/fwk_adpt_struct.h
@@ -70,6 +70,7 @@ enum FWKExtUpdateAddrType {
  FWK_ADPT_UPDATE_INPUT_OUTPUT
 };

 #pragma pack(push, 1)
 // API Parameter Structure
 struct StrFWKKernel {
  FWKOperateType opType;
@@ -89,31 +90,39 @@ struct StrFWKKernel {

  uint64_t extInfoLen;         // extend info total length
  uint64_t extInfoAddr;        // extend info addr, ExtInfo structure
 } __attribute__((packed));
 };
 #pragma pack(pop)

 typedef StrFWKKernel FWKOperateParam;

 // Extent info ShapeAndType
 const uint32_t kMaxShapeDims = 8;
 #pragma pack(push, 1)
 struct ShapeAndType {
  int32_t type;
  int64_t dims[kMaxShapeDims];
 } __attribute__((packed));
 };
 #pragma pack(pop)

 // Extend info structure for extInfoAddr
 const uint32_t kExtInfoHeadSize = 8;

 #pragma pack(push, 1)
 struct ExtInfo {
  int32_t  infoType;    // extend type
  uint32_t infoLen;     // length for infoMsg
  char     infoMsg[0];  // extend value
 } __attribute__((packed));
 };
 #pragma pack(pop)

 #pragma pack(push, 1)
 struct ResultSummary {
  uint64_t shape_data_ptr;   // shape data addr, need convert to void*
  uint64_t shape_data_size;  // num of dims
  uint64_t raw_data_ptr;     // raw data addr,  need convert to void*
  uint64_t raw_data_size;    // size of raw data
 } __attribute__((packed));
 };
 #pragma pack(pop)
 }  // end  namespace FWKAdapter
 }  // namespace aicpu

--- a/third_party/fwkacllib/inc/hccl/base.h
+++ b/third_party/fwkacllib/inc/hccl/base.h
@@ -22,7 +22,8 @@

 #ifndef HCCL_BASE_H_
 #define HCCL_BASE_H_

 #include <hccl/hccl_types.h>
 #include <string>
 #ifdef __cplusplus
 extern "C" {
 #endif // __cplusplus
@@ -95,6 +96,33 @@ typedef void *rtStream_t;
 */
 typedef void *rtModel_t;

 struct HcomOperation {
    std::string hcclType;
    void *inputPtr;
    void *outputPtr;
    u64 count;
    HcclDataType dataType;
    HcclReduceOp opType;
    u32 root;

    HcomOperation()
    {
        inputPtr = nullptr;
        outputPtr = nullptr;
        count = 0;
        dataType = HCCL_DATA_TYPE_RESERVED;
        opType = HCCL_REDUCE_RESERVED;
        root = 0;
    }
 };

 struct HcomRemoteAccessAddrInfo {
    u32 remotetRankID;
    u64 remoteAddr;  // host embedding table address
    u64 localAddr;  // device HBM address
    u64 length;   // Memory Length in Bytes 
 };

 #ifdef __cplusplus
 }
 #endif // __cplusplus
--- a/third_party/fwkacllib/inc/hccl/hcom.h
+++ b/third_party/fwkacllib/inc/hccl/hcom.h
@@ -24,145 +24,96 @@

 #include <hccl/base.h>
 #include <hccl/hccl_types.h>
 #include <functional>
 #include <vector>

 #ifdef __cplusplus
 extern "C" {
 #endif // __cplusplus

 /**
 * @brief Initialize HCOM.
 *
 * @param rank_table A string identifying the rank table file path, include file name.
 * @param identify A string identifying the identify for the rank.
 * @return HcclResult
 * @see hcom_destroy()
 */
 extern HcclResult hcom_init(const char *rank_table, const char *identify);

 /**
 * @brief Destroy HCOM
 *
 * @return HcclResult
 * @see hcom_init()
 */
 extern HcclResult hcom_destroy(void);

 /**
 * @brief Bind the model.
 *
 * @param model A pointer identifying the model information.
 * @param stream A pointer identifying the stream information.
 * @return HcclResult
 * @see hcom_unbind_model()
 */
 extern HcclResult hcom_bind_model(rtModel_t model, rtStream_t stream);

 /**
 * @brief Unbind the model.
 * @brief Get the rank number in the group.
 *
 * @param model An pointer identifying the model information.
 * @return HcclResult
 * @see hcom_unbind_model()
 * @param group A string identifying the group name.
 * @param rankSize A pointer identifying the rank number.
 * @return HcclResult 
 */
 extern HcclResult hcom_unbind_model(rtModel_t model);
 HcclResult hcom_get_rank_size(const char *group, u32 *rankSize);

 /**
 * @brief All-gather operator.
 * @brief Get the rank number in the group.
 *
 * @param tag A string identifying the tag of the operator.
 * @param inputPtr A pointer identifying the input data address of the operator.
 * @param outputPtr A pointer identifying the output data address of the operator.
 * @param inputCount An integer(u64) identifying the number of the input data.
 * @param dataType The data type of the operator, must be one of the following types: int8, int32, float16, float32.
 * @param group A string identifying the group name of ranks participating in the operator.
 * @param stream A pointer identifying the stream information.
 * @param group A string identifying the group name.
 * @param rankSize A pointer identifying the rank number.
 * @return HcclResult 
 */
 extern HcclResult hcom_all_gather(const char *tag, void *inputPtr, void *outputPtr, u64 inputCount,
                                  HcclDataType dataType, const char *group, rtStream_t stream);
 HcclResult HcomGetRankSize(const char *group, u32 *rankSize);

 /**
 * @brief All-reduce operator.
 * @brief Get the rank number of this rank's server within the group.
 *
 * @param tag A string identifying the tag of the operator.
 * @param inputPtr A pointer identifying the input data address of the operator.
 * @param outputPtr A pointer identifying the output data address of the operator.
 * @param count An integer(u64) identifying the number of the output data.
 * @param dataType The data type of the operator, must be one of the following types: int8, int32, float16, float32.
 * @param op The reduction type of the operator, must be one of the following types: sum, min, max, prod.
 * @param group A string identifying the group name of ranks participating in the operator.
 * @param stream A pointer identifying the stream information.
 * @param group A string identifying the group name.
 * @param localRankSize A pointer identifying the rank number.
 * @return HcclResult 
 */
 extern HcclResult hcom_all_reduce(const char *tag, void *inputPtr, void *outputPtr, u64 count,
                                  HcclDataType dataType, HcclReduceOp op, const char *group, rtStream_t stream);
 HcclResult hcom_get_local_rank_size(const char *group, u32 *localRankSize);

 /**
 * @brief Broadcast operator.
 * @brief Get the rank number of this rank's server within the group.
 *
 * @param tag A string identifying the tag of the operator.
 * @param ptr A pointer identifying the data address of the operator.
 * @param count An integer(u64) identifying the number of the data.
 * @param dataType The data type of the operator, must be one of the following types: int8, int32, float16, float32.
 * @param root An integer(u32) identifying the the root rank in the operator.
 * @param group A string identifying the group name of ranks participating in the operator.
 * @param stream A pointer identifying the stream information.
 * @param group A string identifying the group name.
 * @param localRankSize A pointer identifying the rank number.
 * @return HcclResult 
 */
 extern HcclResult hcom_broadcast(const char *tag, void *ptr, u64 count, HcclDataType dataType, u32 root,
                                   const char *group, rtStream_t stream);
 HcclResult HcomGetLocalRankSize(const char *group, u32 *localRankSize);

 /**
 * @brief Reduce-scatter operator.
 * @brief Get the rank id of this rank.
 *
 * @param tag A string identifying the tag of the operator.
 * @param inputPtr A pointer identifying the input data address of the operator.
 * @param outputPtr A pointer identifying the output data address of the operator.
 * @param count An integer(u64) identifying the number of the data.
 * @param dataType The data type of the operator, must be one of the following types: int8, int32, float16, float32.
 * @param op The reduction type of the operator, must be one of the following types: sum, min, max, prod.
 * @param group A string identifying the group name of ranks participating in the operator.
 * @param stream A pointer identifying the stream information.
 * @param group A string identifying the group name.
 * @param rankId A pointer identifying the rank id.
 * @return HcclResult 
 */
 extern HcclResult hcom_reduce_scatter(const char *tag, void *inputPtr, void *outputPtr, u64 count,
                                      HcclDataType dataType, HcclReduceOp op, const char *group, rtStream_t stream);
 HcclResult hcom_get_rank_id(const char *group, u32 *rankId);

 /**
 * @brief Get the rank number in the group.
 * @brief Get the rank id of this rank.
 *
 * @param group A string identifying the group name.
 * @param rankSize A pointer identifying the rank number.
 * @param rankId A pointer identifying the rank id.
 * @return HcclResult 
 */
 HcclResult hcom_get_rank_size(const char *group, u32 *rankSize);
 HcclResult HcomGetRankId(const char *group, u32 *rankId);

 /**
 * @brief Get the rank number of this rank's server within the group.
 * @brief Get the local rank id of this rank's server within the group.
 *
 * @param group A string identifying the group name.
 * @param localRankSize A pointer identifying the rank number.
 * @param localRankId A pointer identifying the local rank id.
 * @return HcclResult 
 */
 HcclResult hcom_get_local_rank_size(const char *group, u32 *localRankSize);
 HcclResult hcom_get_local_rank_id(const char *group, u32 *localRankId);

 /**
 * @brief Get the rank id of this rank.
 * @brief Get the local rank id of this rank's server within the group.
 *
 * @param group A string identifying the group name.
 * @param rankId A pointer identifying the rank id.
 * @param localRankId A pointer identifying the local rank id.
 * @return HcclResult 
 */
 HcclResult hcom_get_rank_id(const char *group, u32 *rankId);
 HcclResult HcomGetLocalRankId(const char *group, u32 *localRankId);

 /**
 * @brief Get the local rank id of this rank's server within the group.
 * @brief Get the world rank id according to the group rank id.
 *
 * @param group A string identifying the group name.
 * @param localRankId A pointer identifying the local rank id.
 * @param groupRank An integer(u32) identifying the group rank id.
 * @param worldRank A pointer identifying the world rank id.
 * @return HcclResult 
 */
 HcclResult hcom_get_local_rank_id(const char *group, u32 *localRankId);
 HcclResult hcom_get_world_rank_from_group_rank(const char *group, u32 groupRank, u32 *worldRank);

 /**
 * @brief Get the world rank id according to the group rank id.
@@ -172,7 +123,7 @@ HcclResult hcom_get_local_rank_id(const char *group, u32 *localRankId);
 * @param worldRank A pointer identifying the world rank id.
 * @return HcclResult 
 */
 HcclResult hcom_get_world_rank_from_group_rank(const char *group, u32 groupRank, u32 *worldRank);
 HcclResult HcomGetWorldRankFromGroupRank(const char *group, u32 groupRank, u32 *worldRank);

 /**
 * @brief Get the group rank id according to the world rank id.
@@ -184,6 +135,16 @@ HcclResult hcom_get_world_rank_from_group_rank(const char *group, u32 groupRank,
 */
 HcclResult hcom_get_group_rank_from_world_rank(u32 worldRank, const char *group, u32 *groupRank);

 /**
 * @brief Get the group rank id according to the world rank id.
 *
 * @param worldRank An integer(u32) identifying the world rank id.
 * @param group A string identifying the group name.
 * @param groupRank A pointer identifying the group rank id.
 * @return HcclResult 
 */
 HcclResult HcomGetGroupRankFromWorldRank(u32 worldRank, const char *group, u32 *groupRank);

 /**
 * @brief Create group.
 *
@@ -195,60 +156,40 @@ HcclResult hcom_get_group_rank_from_world_rank(u32 worldRank, const char *group,
 HcclResult hcom_create_group(const char *group, u32 rankNum, u32 *rankIds);

 /**
 * @brief Destroy group
 * @brief Create group.
 *
 * @param group A string identifying the group name.
 * @param rankNum An integer(u32) identifying the number of ranks in the group.
 * @param rankIds A list identifying the ranks in the group.
 * @return HcclResult 
 */
 HcclResult hcom_destroy_group(const char *group);
 HcclResult HcomCreateGroup(const char *group, u32 rankNum, u32 *rankIds);

 /**
 * @brief Send operator.
 * @brief Destroy group
 *
 * @param tag A string identifying the tag of the operator.
 * @param inputPtr A pointer identifying the input data address of the operator.
 * @param count An integer(u64) identifying the number of the data.
 * @param dataType The data type of the operator, must be one of the following types: int8, int32, float16, float32.
 * @param destRank An integer identifying the destination rank.
 * @param srTag An integer identifying the send/recv message tag.
 * The message will be send by the receive operator with the same "sr_tag".
 * @param group A string identifying the group name of ranks participating in the operator.
 * @param stream A pointer identifying the stream information.
 * @param group A string identifying the group name.
 * @return HcclResult 
 */
 HcclResult hcom_send(const char *tag, void *inputPtr, u64 count, HcclDataType dataType,
    u32 destRank, u32 srTag, const char *group, rtStream_t stream);
 HcclResult hcom_destroy_group(const char *group);

 /**
 * @brief Receive operator.
 * @brief Destroy group
 *
 * @param tag A string identifying the tag of the operator.
 * @param outputPtr A pointer identifying the output data address of the operator.
 * @param count An integer(u64) identifying the number of the data.
 * @param dataType The data type of the operator, must be one of the following types: int8, int32, float16, float32.
 * @param srcRank An integer identifying the source rank.
 * @param srTag An integer identifying the send/recv message tag. 
 * The message will be send by the send operator with the same "sr_tag".
 * @param group A string identifying the group name of ranks participating in the operator.
 * @param stream A pointer identifying the stream information.
 * @param group A string identifying the group name.
 * @return HcclResult 
 */
 HcclResult hcom_receive(const char *tag, void *outputPtr, u64 count, HcclDataType dataType,
    u32 srcRank, u32 srTag, const char *group, rtStream_t stream);
 HcclResult HcomDestroyGroup(const char *group);

 /**
 * @brief Get the gradient split strategy with in the group.
 * @brief Set the gradient split strategy with in the group, according to gradient index.
 *
 * @param group A string identifying the group name.
 * @param feature A pointer identifying the feature of the model.
 * @param maxSegmentNum An integer(u32) identifying the max segments of gradients.
 * @param segmentNum A pointer identifying the segments number of gradients.
 * @param segmentIdx A list identifying the index of end gradient in each segment.
 * @return HcclResult 
 * @param segmentNum An integer(u32) identifying the segments number of gradients.
 * @param IdxList A list identifying the index of end gradient in each segment.
 * @return HcclResult
 */
 HcclResult hcom_get_split_strategy(const char *group, const struct model_feature *feature, u32 maxSegmentNum,
    u32 *segmentNum, u32 *segmentIdx, GradSplitForceMode force = FORCE_NONE,
    OriginalGraphShapeType shapeType = KNOWN_SHAPE);
 extern HcclResult hcom_set_split_strategy_by_index(const char *group, u32 segmentNum, const u32 *IdxList);

 /**
 * @brief Set the gradient split strategy with in the group, according to gradient index.
@@ -258,7 +199,7 @@ HcclResult hcom_get_split_strategy(const char *group, const struct model_feature
 * @param IdxList A list identifying the index of end gradient in each segment.
 * @return HcclResult
 */
 extern HcclResult hcom_set_split_strategy_by_index(const char *group, u32 segmentNum, const u32 *IdxList);
 extern HcclResult HcomSetGradFusionByIndex(const char *group, u32 segmentNum, const u32 *IdxList);

 /**
 * @brief Set the gradient split strategy with in the group, according to gradient data size.
@@ -270,6 +211,16 @@ extern HcclResult hcom_set_split_strategy_by_index(const char *group, u32 segmen
 */
 extern HcclResult hcom_set_split_strategy_by_size(const char *group, u32 segmentNum, const float *sizeList);

 /**
 * @brief Set the gradient split strategy with in the group, according to gradient data size.
 *
 * @param group A string identifying the group name.
 * @param segmentNum An integer(u32) identifying the segments number of gradients.
 * @param sizeList A list identifying the percent of each segment.
 * @return HcclResult
 */
 extern HcclResult HcomSetGradFusionBySize(const char *group, u32 segmentNum, const float *sizeList);

 /**
 * @brief Register memories and init resources for remote access.
 *
@@ -279,6 +230,25 @@ extern HcclResult hcom_set_split_strategy_by_size(const char *group, u32 segment
 */
 extern HcclResult hcom_remote_access_mem_register(const MemRegisterAddr* addrList, u32 count);

 /**
 * @brief Register memories and init resources for remote access.
 *
 * @param addrList memory addresses for remote access.
 * @param count number of remote memory addresses.
 * @return HcclResult
 */
 extern HcclResult HcomRegRemoteAccessMem(const MemRegisterAddr* addrList, u32 count);

 HcclResult HcomExecInitialize();

 HcclResult HcomExecFinalize();

 HcclResult HcomExecEnqueueOperation(HcomOperation opInfo, std::function<void(HcclResult status)> callback);

 HcclResult HcomExecEnqueueRemoteAccess(const std::string& remoteAccessType,
                               const std::vector<HcomRemoteAccessAddrInfo>& addrInfos,
                               std::function<void(HcclResult status)> callback);

 #ifdef __cplusplus
 }

--- a/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_linux.h
+++ b/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_linux.h
@@ -215,6 +215,10 @@ typedef struct {
 #define S_IWRITE S_IWUSR
 #endif

 #define mm_no_argument        no_argument
 #define mm_required_argument  required_argument
 #define mm_optional_argument  optional_argument

 #define M_FILE_RDONLY O_RDONLY
 #define M_FILE_WRONLY O_WRONLY
 #define M_FILE_RDWR O_RDWR
@@ -227,6 +231,7 @@ typedef struct {
 #define M_BINARY O_RDONLY
 #define M_TRUNC O_TRUNC
 #define M_IRWXU S_IRWXU
 #define M_APPEND O_APPEND

 #define M_IN_CREATE IN_CREATE
 #define M_IN_CLOSE_WRITE IN_CLOSE_WRITE
@@ -342,17 +347,17 @@ MMPA_FUNC_VISIBILITY INT32 mmCloseSocket(mmSockHandle sockFd);
 MMPA_FUNC_VISIBILITY mmSsize_t mmSocketSend(mmSockHandle sockFd, VOID *sendBuf, INT32 sendLen, INT32 sendFlag);
 MMPA_FUNC_VISIBILITY mmSsize_t mmSocketRecv(mmSockHandle sockFd, VOID *recvBuf, INT32 recvLen, INT32 recvFlag);
 MMPA_FUNC_VISIBILITY INT32 mmSocketSendTo(mmSockHandle sockFd,
                            VOID *sendMsg,
                            INT32 sendLen,
                            UINT32 sendFlag,
                            const mmSockAddr* addr,
                            INT32 tolen);
                                          VOID *sendMsg,
                                          INT32 sendLen,
                                          UINT32 sendFlag,
                                          const mmSockAddr* addr,
                                          INT32 tolen);
 MMPA_FUNC_VISIBILITY mmSsize_t mmSocketRecvFrom(mmSockHandle sockFd,
                                  VOID *recvBuf,
                                  mmSize recvLen,
                                  UINT32 recvFlag,
                                  mmSockAddr* addr,
                                  mmSocklen_t *FromLen);
                                                VOID *recvBuf,
                                                mmSize recvLen,
                                                UINT32 recvFlag,
                                                mmSockAddr* addr,
                                                mmSocklen_t *FromLen);
 MMPA_FUNC_VISIBILITY INT32 mmSAStartup();
 MMPA_FUNC_VISIBILITY INT32 mmSACleanup();
 MMPA_FUNC_VISIBILITY VOID *mmDlopen(const CHAR *fileName, INT32 mode);
@@ -360,7 +365,10 @@ MMPA_FUNC_VISIBILITY INT32 mmDladdr(VOID *addr, mmDlInfo *info);
 MMPA_FUNC_VISIBILITY VOID *mmDlsym(VOID *handle, const CHAR *funcName);
 MMPA_FUNC_VISIBILITY INT32 mmDlclose(VOID *handle);
 MMPA_FUNC_VISIBILITY CHAR *mmDlerror();
 MMPA_FUNC_VISIBILITY INT32 mmCreateAndSetTimer(mmTimer *timerHandle, mmUserBlock_t *timerBlock, UINT milliSecond, UINT period);
 MMPA_FUNC_VISIBILITY INT32 mmCreateAndSetTimer(mmTimer *timerHandle,
                                               mmUserBlock_t *timerBlock,
                                               UINT milliSecond,
                                               UINT period);
 MMPA_FUNC_VISIBILITY INT32 mmDeleteTimer(mmTimer timerHandle);
 MMPA_FUNC_VISIBILITY INT32 mmStatGet(const CHAR *path, mmStat_t *buffer);
 MMPA_FUNC_VISIBILITY INT32 mmStat64Get(const CHAR *path, mmStat64_t *buffer);
@@ -408,8 +416,12 @@ MMPA_FUNC_VISIBILITY VOID mmClosePipe(mmPipeHandle pipe[], UINT32 pipeCount);
 // Poll related interface
 MMPA_FUNC_VISIBILITY mmCompletionHandle mmCreateCompletionPort();
 MMPA_FUNC_VISIBILITY VOID mmCloseCompletionPort(mmCompletionHandle handle);
 MMPA_FUNC_VISIBILITY INT32 mmPoll(mmPollfd *fds, INT32 fdCount, INT32 timeout, mmCompletionHandle handleIOCP,
                                    pmmPollData polledData, mmPollBack pollBack);
 MMPA_FUNC_VISIBILITY INT32 mmPoll(mmPollfd *fds,
                                  INT32 fdCount,
                                  INT32 timeout,
                                  mmCompletionHandle handleIOCP,
                                  pmmPollData polledData,
                                  mmPollBack pollBack);
 MMPA_FUNC_VISIBILITY INT32 mmGetErrorCode();
 MMPA_FUNC_VISIBILITY CHAR *mmGetErrorFormatMessage(mmErrorMsg errnum, CHAR *buf, mmSize size);
 MMPA_FUNC_VISIBILITY INT32 mmGetTimeOfDay(mmTimeval *timeVal, mmTimezone *timeZone);
@@ -454,8 +466,11 @@ MMPA_FUNC_VISIBILITY VOID mmSetOpOpt(INT32 mmOptOpt);
 MMPA_FUNC_VISIBILITY CHAR *mmGetOptArg();
 MMPA_FUNC_VISIBILITY VOID mmSetOptArg(CHAR *mmOptArg);
 MMPA_FUNC_VISIBILITY INT32 mmGetOpt(INT32 argc, char *const *argv, const char *opts);
 MMPA_FUNC_VISIBILITY INT32 mmGetOptLong(INT32 argc, char *const *argv, const char *opts, const mmStructOption *longOpts,
                                          INT32 *longIndex);
 MMPA_FUNC_VISIBILITY INT32 mmGetOptLong(INT32 argc,
                                        char *const *argv,
                                        const char *opts,
                                        const mmStructOption *longOpts,
                                        INT32 *longIndex);

 MMPA_FUNC_VISIBILITY LONG mmLseek(INT32 fd, INT64 offset, INT32 seekFlag);
 MMPA_FUNC_VISIBILITY INT32 mmFtruncate(mmProcess fd, UINT32 length);
@@ -521,11 +536,14 @@ MMPA_FUNC_VISIBILITY INT32 mmGetMac(mmMacInfo **list, INT32 *count);
 MMPA_FUNC_VISIBILITY INT32 mmGetMacFree(mmMacInfo *list, INT32 count);
 MMPA_FUNC_VISIBILITY INT32 mmGetCpuInfo(mmCpuDesc **cpuInfo, INT32 *count);
 MMPA_FUNC_VISIBILITY INT32 mmCpuInfoFree(mmCpuDesc *cpuInfo, INT32 count);
 MMPA_FUNC_VISIBILITY INT32 mmCreateProcess(const CHAR *fileName, const mmArgvEnv *env, const char *stdoutRedirectFile,
                                            mmProcess *id);

 MMPA_FUNC_VISIBILITY INT32 mmCreateTaskWithThreadAttr(mmThread *threadHandle, const mmUserBlock_t *funcBlock,
                                                        const mmThreadAttr *threadAttr);
 MMPA_FUNC_VISIBILITY INT32 mmCreateProcess(const CHAR *fileName,
                                           const mmArgvEnv *env,
                                           const char *stdoutRedirectFile,
                                           mmProcess *id);

 MMPA_FUNC_VISIBILITY INT32 mmCreateTaskWithThreadAttr(mmThread *threadHandle,
                                                      const mmUserBlock_t *funcBlock,
                                                      const mmThreadAttr *threadAttr);
 MMPA_FUNC_VISIBILITY mmFileHandle mmShmOpen(const CHAR *name, INT32 oflag, mmMode_t mode);
 MMPA_FUNC_VISIBILITY INT32 mmShmUnlink(const CHAR *name);
 MMPA_FUNC_VISIBILITY VOID *mmMmap(mmFd_t fd, mmSize_t size, mmOfft_t offset, mmFd_t *extra, INT32 prot, INT32 flags);
--- a/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_win.h
+++ b/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_win.h
@@ -237,6 +237,11 @@ typedef struct {
 } mmThreadAttr;

 typedef VOID (*mmPf)(VOID);

 #define mm_no_argument        0
 #define mm_required_argument  1
 #define mm_optional_argument  2

 #define M_FILE_RDONLY GENERIC_READ
 #define M_FILE_WRONLY GENERIC_WRITE
 #define M_FILE_RDWR (GENERIC_READ | GENERIC_WRITE)
@@ -249,6 +254,7 @@ typedef VOID (*mmPf)(VOID);
 #define M_CREAT _O_CREAT
 #define M_BINARY _O_BINARY
 #define M_TRUNC _O_TRUNC
 #define M_APPEND _O_APPEND

 #define M_IREAD _S_IREAD
 #define M_IRUSR _S_IREAD
--- a/third_party/fwkacllib/inc/runtime/base.h
+++ b/third_party/fwkacllib/inc/runtime/base.h
@@ -18,6 +18,7 @@
 #define __CCE_RUNTIME_BASE_H__

 #include <stdint.h>
 #include "toolchain/prof_callback.h"

 #if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
 extern "C" {
@@ -32,309 +33,8 @@ extern "C" {
 #endif
 #endif

 /**
 * @ingroup dvrt_base
 * @brief runtime error numbers.
 */
 typedef enum tagRtError {
    RT_ERROR_NONE = 0x0,                    // success
    
    RT_ERROR_DEVICE_BASE                    = 0x07010000,
    RT_ERROR_DEVICE_NULL,
    RT_ERROR_DEVICE_NEW,
    RT_ERROR_DEVICE_ID,
    RT_ERROR_DEVICE_CHIPTYPE,
    RT_ERROR_DEVICE_DEPLOY,
    RT_ERROR_DEVICE_RETAIN,
    RT_ERROR_DEVICE_PLATFORM,
    RT_ERROR_DEVICE_LOADER,
    RT_ERROR_DEVICE_LIMIT,
    RT_ERROR_DEVICE_PROC_HANG_OUT,
    RT_ERROR_DEVICE_POWER_UP_FAIL,
    RT_ERROR_DEVICE_POWER_DOWN_FAIL,
    RT_ERROR_DEVICE_INVALID,

    RT_ERROR_DRV_BASE                       = 0x07020000,
    RT_ERROR_DRV_NULL,
    RT_ERROR_DRV_NEW,
    RT_ERROR_DRV_MEMORY,
    RT_ERROR_DRV_INPUT,
    RT_ERROR_DRV_PTRNULL,
    RT_ERROR_DRV_OPEN_AICPU,
    RT_ERROR_DRV_CLOSE_AICPU,
    RT_ERROR_DRV_SYM_AICPU,
    RT_ERROR_DRV_OPEN_TSD,
    RT_ERROR_DRV_CLOSE_TSD,
    RT_ERROR_DRV_SYM_TSD,
    RT_ERROR_DRV_SOURCE,
    RT_ERROR_DRV_REPORT,
    RT_ERROR_DRV_COMMAND,
    RT_ERROR_DRV_OCCUPY,
    RT_ERROR_DRV_ERR,

    RT_ERROR_STREAM_BASE                    = 0x07030000,
    RT_ERROR_STREAM_NULL,
    RT_ERROR_STREAM_NEW,
    RT_ERROR_STREAM_CONTEXT,
    RT_ERROR_STREAM_INVALID,
    RT_ERROR_STREAM_MODEL,
    RT_ERROR_STREAM_FUSION,
    RT_ERROR_STREAM_FULL,
    RT_ERROR_STREAM_EMPTY,
    RT_ERROR_STREAM_NOT_COMPLETE,
    RT_ERROR_STREAM_SYNC,
    RT_ERROR_STREAM_NO_CB_REG,
    RT_ERROR_STREAM_DUPLICATE,
    RT_ERROR_STREAM_NOT_EXIST,
    RT_ERROR_SQ_NO_EXIST_SQ_TO_REUSE,
    RT_ERROR_SQID_FULL,

    RT_ERROR_MODEL_BASE                     = 0x07040000,
    RT_ERROR_MODEL_NULL,
    RT_ERROR_MODEL_NEW,
    RT_ERROR_MODEL_CONTEXT,
    RT_ERROR_MODEL_ENDGRAPH,
    RT_ERROR_MODEL_STREAM,
    RT_ERROR_MODEL_EXCUTOR,
    RT_ERROR_MODEL_SETUP,
    RT_ERROR_MODEL_ID,
    RT_ERROR_MODEL_EXE_FAILED,
    RT_ERROR_END_OF_SEQUENCE,               // end of sequence
    RT_ERROR_MODEL_EXIT,
    RT_ERROR_MODEL_EXIT_STREAM_UNBIND,
    RT_ERROR_MODEL_EXIT_ID,
    RT_ERROR_MODEL_ABORT_NORMAL,

    RT_ERROR_EVENT_BASE                     = 0x07050000,
    RT_ERROR_EVENT_NULL,
    RT_ERROR_EVENT_NEW,
    RT_ERROR_EVENT_RECORDER_NULL,
    RT_ERROR_EVENT_TIMESTAMP_INVALID,
    RT_ERROR_EVENT_TIMESTAMP_REVERSAL,
    RT_ERROR_EVENT_NOT_COMPLETE,

    RT_ERROR_NOTIFY_BASE                    = 0x07060000,
    RT_ERROR_NOTIFY_NULL,
    RT_ERROR_NOTIFY_NEW,
    RT_ERROR_NOTIFY_TYPE,
    RT_ERROR_NOTIFY_NOT_COMPLETE,

    RT_ERROR_CONTEXT_BASE                   = 0x07070000,
    RT_ERROR_CONTEXT_NULL,
    RT_ERROR_CONTEXT_NEW,
    RT_ERROR_CONTEXT_DEL,
    RT_ERROR_CONTEXT_DEFAULT_STREAM_NULL,
    RT_ERROR_CONTEXT_ONLINE_STREAM_NULL,

    RT_ERROR_KERNEL_BASE                    = 0x07080000,
    RT_ERROR_KERNEL_NULL,
    RT_ERROR_KERNEL_NEW,
    RT_ERROR_KERNEL_LOOKUP,
    RT_ERROR_KERNEL_NAME,
    RT_ERROR_KERNEL_TYPE,
    RT_ERROR_KERNEL_OFFSET,
    RT_ERROR_KERNEL_DUPLICATE,
    RT_ERROR_KERNEL_UNREGISTERING,

    RT_ERROR_PROGRAM_BASE                   = 0x07090000,
    RT_ERROR_PROGRAM_NULL,
    RT_ERROR_PROGRAM_NEW,
    RT_ERROR_PROGRAM_DATA,
    RT_ERROR_PROGRAM_SIZE,
    RT_ERROR_PROGRAM_MEM_TYPE,
    RT_ERROR_PROGRAM_MACHINE_TYPE,
    RT_ERROR_PROGRAM_USEOUT,

    RT_ERROR_MODULE_BASE                    = 0x070a0000,
    RT_ERROR_MODULE_NULL,
    RT_ERROR_MODULE_NEW,

    RT_ERROR_INSTANCE_BASE                  = 0x070b0000,
    RT_ERROR_INSTANCE_NULL,
    RT_ERROR_INSTANCE_NEW,
    RT_ERROR_INSTANCE_VERSION,

    RT_ERROR_API_BASE                       = 0x070c0000,
    RT_ERROR_API_NULL,
    RT_ERROR_API_NEW,

    RT_ERROR_DATADUMP_BASE                  = 0x070d0000,
    RT_ERROR_DATADUMP_NULL,
    RT_ERROR_DATADUMP_NEW,
    RT_ERROR_DATADUMP_TIME,
    RT_ERROR_DATADUMP_FILE,
    RT_ERROR_DATADUMP_ADDRESS,
    RT_ERROR_DATADUMP_LOAD_FAILED,
    RT_ERROR_DUMP_ADDR_SET_FAILED,

    RT_ERROR_PROF_BASE                      = 0x070e0000,
    RT_ERROR_PROF_NULL,
    RT_ERROR_PROF_NEW,
    RT_ERROR_PROF_START,
    RT_ERROR_PROF_DEVICE_MEM,
    RT_ERROR_PROF_HOST_MEM,
    RT_ERROR_PROF_SET_DIR,
    RT_ERROR_PROF_OPER,
    RT_ERROR_PROF_FULL,
    RT_ERROR_PROF_NAME,

    RT_ERROR_PCTRACE_BASE                   = 0x070f0000,
    RT_ERROR_PCTRACE_NULL,
    RT_ERROR_PCTRACE_NEW,
    RT_ERROR_PCTRACE_TIME,
    RT_ERROR_PCTRACE_FILE,

    RT_ERROR_TASK_BASE                      = 0x07100000,
    RT_ERROR_TASK_NULL,
    RT_ERROR_TASK_NEW,
    RT_ERROR_TASK_TYPE,
    RT_ERROR_TASK_ALLOCATOR,

    RT_ERROR_COMMON_BASE                    = 0x07110000,
    RT_ERROR_INVALID_VALUE,             // RT_ERROR_INPUT_INVALID
    RT_ERROR_MEMORY_ADDRESS_UNALIGNED,
    RT_ERROR_SEC_HANDLE,
    RT_ERROR_OS_HANDLE,
    RT_ERROR_MUTEX_LOCK,
    RT_ERROR_MUTEX_UNLOCK,
    RT_ERROR_CALLOC,
    RT_ERROR_POOL_RESOURCE,
    RT_ERROR_TRANS_ARGS,
    RT_ERROR_METADATA,
    RT_ERROR_LOST_HEARTBEAT,
    RT_ERROR_REPORT_TIMEOUT,
    RT_ERROR_FEATURE_NOT_SUPPROT,
    RT_ERROR_MEMORY_ALLOCATION,
    RT_ERROR_MEMORY_FREE,
    RT_ERROR_INVALID_MEMORY_TYPE,

    RT_ERROR_DEBUG_BASE                     = 0x07120000,
    RT_ERROR_DEBUG_NULL,
    RT_ERROR_DEBUG_NEW,
    RT_ERROR_DEBUG_SIGNAL,
    RT_ERROR_DEBUG_OPEN,
    RT_ERROR_DEBUG_WRITE,
    RT_ERROR_DEBUG_REGISTER_FAILED,
    RT_ERROR_DEBUG_UNREGISTER_FAILED,

    RT_ERROR_ENGINE_BASE                    = 0x07130000,
    RT_ERROR_ENGINE_NULL,
    RT_ERROR_ENGINE_NEW,
    RT_ERROR_ENGINE_THREAD,

    RT_ERROR_LABEL_BASE                     = 0x07140000,
    RT_ERROR_LABEL_NULL,
    RT_ERROR_LABEL_NEW,
    RT_ERROR_LABEL_CONTEXT,
    RT_ERROR_LABEL_STREAM,
    RT_ERROR_LABEL_MODEL,
    RT_ERROR_LABEL_ALLOCATOR,
    RT_ERROR_LABEL_FREE,
    RT_ERROR_LABEL_SET,
    RT_ERROR_LABEL_ID,

    RT_ERROR_TSFW_BASE                      = 0x07150000,
    RT_ERROR_TSFW_UNKNOWN,
    RT_ERROR_TSFW_NULL_PTR,
    RT_ERROR_TSFW_ILLEGAL_AI_CORE_ID,
    RT_ERROR_TSFW_ILLEGAL_PARAM,
    RT_ERROR_TSFW_TASK_CMD_QUEUE_FULL,
    RT_ERROR_TSFW_TASK_CMD_QUEUE_EMPTY,
    RT_ERROR_TSFW_TASK_REPORT_QUEUE_FULL,
    RT_ERROR_TSFW_TASK_REPORT_QUEUE_EMPTY,
    RT_ERROR_TSFW_TASK_NODE_BUFF_ALL_OCCUPYED,
    RT_ERROR_TSFW_TASK_NODE_BUFF_ALL_FREED,
    RT_ERROR_TSFW_L2_MEM_INSUFFICIENT_SPACE,
    RT_ERROR_TSFW_L2_MALLOC_FAILED,
    RT_ERROR_TSFW_DMA_CHANNEL_ALL_OCCUPYED,
    RT_ERROR_TSFW_MEMCPY_OP_FAILED,
    RT_ERROR_TSFW_BS_SLOT_ALL_OCCUPYED,
    RT_ERROR_TSFW_TBS_SLOT_REPEAT_FREE,
    RT_ERROR_TSFW_PRIORITY_TASK_LIST_FULL,
    RT_ERROR_TSFW_PRIORITY_TASK_LIST_EMPTY,
    RT_ERROR_TSFW_NO_STREAM_LIST_NEED_TO_BE_PROCESSED,
    RT_ERROR_TSFW_REPEAT_MARK_STREAM_NEED_SERVICE,
    RT_ERROR_TSFW_SYS_DMA_CHANNEL_ALL_OCCUPAPYED,
    RT_ERROR_TSFW_NO_HBML2TASKNODE_FOUND,
    RT_ERROR_TSFW_SQNODE_NODE_SLOT_ALL_OCCUPAPYED,
    RT_ERROR_TSFW_CQNODE_NODE_SLOT_ALL_OCCUPAPYED,
    RT_ERROR_TSFW_SQNODE_NOT_ENOUGH,
    RT_ERROR_TSFW_SQNODE_SLOT_REPEAT_FREE,
    RT_ERROR_TSFW_CQNODE_SLOT_REPEAT_FREE,
    RT_ERROR_TSFW_CQ_REPORT_FAILED,
    RT_ERROR_TSFW_SYS_DMA_RESET_SUCCESS,
    RT_ERROR_TSFW_SYS_DMA_RESET_FAILED,
    RT_ERROR_TSFW_SYS_DMA_TRNSFER_FAILED,
    RT_ERROR_TSFW_SYS_DMA_MEMADDRALIGN_FAILED,
    RT_ERROR_TSFW_SYS_DMA_ERROR_QUEUE_FULL,
    RT_ERROR_TSFW_SYS_DMA_ERROR_QUEUE_EMPTY,
    RT_ERROR_TSFW_TIMER_EVENT_FULL,
    RT_ERROR_TSFW_TASK_L2_DESC_ENTRY_NOT_ENOUGH,
    RT_ERROR_TSFW_AICORE_TIMEOUT,
    RT_ERROR_TSFW_AICORE_EXCEPTION,
    RT_ERROR_TSFW_AICORE_TRAP_EXCEPTION,
    RT_ERROR_TSFW_AICPU_TIMEOUT,
    RT_ERROR_TSFW_SDMA_L2_TO_DDR_MALLOC_FAIL,
    RT_ERROR_TSFW_AICPU_EXCEPTION,
    RT_ERROR_TSFW_AICPU_DATADUMP_RSP_ERR,
    RT_ERROR_TSFW_AICPU_MODEL_RSP_ERR,
    RT_ERROR_TSFW_REPEAT_ACTIVE_MODEL_STREAM,
    RT_ERROR_TSFW_REPEAT_NOTIFY_WAIT,
    RT_ERROR_TSFW_DEBUG_INVALID_SQCQ,
    RT_ERROR_TSFW_DEBUG_WRONG_COMMAND_TYPE,
    RT_ERROR_TSFW_DEBUG_CMD_PROCESS,
    RT_ERROR_TSFW_DEBUG_INVALID_DEVICE_STATUS,
    RT_ERROR_TSFW_DEBUG_NOT_IN_DEBUG_STATUS,
    RT_ERROR_TSFW_DEBUG_INVALID_TASK_STATUS,
    RT_ERROR_TSFW_DEBUG_TASK_EMPTY,
    RT_ERROR_TSFW_DEBUG_TASK_FULL,
    RT_ERROR_TSFW_DEBUG_TASK_NOT_EXIST,
    RT_ERROR_TSFW_DEBUG_AI_CORE_FULL,
    RT_ERROR_TSFW_DEBUG_AI_CORE_NOT_EXIST,
    RT_ERROR_TSFW_DEBUG_AI_CORE_EXCEPTION,
    RT_ERROR_TSFW_DEBUG_AI_CORE_TIMEOUT,
    RT_ERROR_TSFW_DEBUG_BREAKPOINT_FULL,
    RT_ERROR_TSFW_DEBUG_READ_ERROR,
    RT_ERROR_TSFW_DEBUG_WRITE_FAIL,
    RT_ERROR_TSFW_QUEUE_FULL,
    RT_ERROR_TSFW_QUEUE_EMPTY,
    RT_ERROR_TSFW_QUEUE_ALLOC_MEM_FAIL,
    RT_ERROR_TSFW_QUEUE_DATA_SIZE_UNMATCH,
    RT_ERROR_TSFW_PCIE_DMA_INVLD_CPY_TYPE,
    RT_ERROR_TSFW_INVLD_CPY_DIR,
    RT_ERROR_TSFW_PCIE_DMA_INVLD_CQ_DES,
    RT_ERROR_TSFW_PCIE_DMA_CPY_ERR,
    RT_ERROR_TSFW_PCIE_DMA_LNK_CHN_BUSY,
    RT_ERROR_TSFW_PROFILE_BUFF_FULL,
    RT_ERROR_TSFW_PROFILE_MODE_CONFLICT,
    RT_ERROR_TSFW_PROFILE_OTHER_PID_ON,
    RT_ERROR_TSFW_SCHD_AIC_TASK_PRELOAD_FAILED,
    RT_ERROR_TSFW_TSCPU_CLOSE_FAILED,
    RT_ERROR_TSFW_EXPECT_FAIL,
    RT_ERROR_TSFW_REPEAT_MODEL_STREAM,
    RT_ERROR_TSFW_STREAM_MODEL_UNBIND,
    RT_ERROR_TSFW_MODEL_EXE_FAILED,
    RT_ERROR_TSFW_IPC_SEND_FAILED,
    RT_ERROR_TSFW_IPC_PROC_REG_FAILED,
    RT_ERROR_TSFW_STREAM_FULL,
    RT_ERROR_TSFW_END_OF_SEQUENCE,
    RT_ERROR_TSFW_SWITCH_STREAM_LABEL,
    RT_ERROR_TSFW_TRANS_SQE_FAIL,
    RT_ERROR_TSFW_RESERVED,

    RT_ERROR_SUBSCRIBE_BASE                = 0x07160000,
    RT_ERROR_SUBSCRIBE_NULL,
    RT_ERROR_SUBSCRIBE_NEW,
    RT_ERROR_SUBSCRIBE_STREAM,
    RT_ERROR_SUBSCRIBE_THREAD,
    RT_ERROR_SUBSCRIBE_GROUP,

    RT_ERROR_GROUP_BASE                    = 0x07170000,
    RT_ERROR_GROUP_NOT_SET,
    RT_ERROR_GROUP_NOT_CREATE,

    RT_ERROR_RESERVED                      = 0x07ff0000,
  }rtError_t;
 typedef int32_t rtError_t;
 static const int32_t RT_ERROR_NONE = 0; // success

 /**
 * @ingroup dvrt_base
@@ -387,10 +87,20 @@ typedef struct rtExceptionInfo {
    uint32_t deviceid;
 } rtExceptionInfo;

 typedef struct rtTaskFailInfo {
    uint32_t taskid;
    uint32_t streamid;
    uint32_t tid;
    uint32_t deviceid;
    uint32_t retcode;
 } rtTaskFailInfo;

 typedef void (*rtErrorCallback)(rtExceptionType);

 typedef void (*rtTaskFailCallback)(rtExceptionInfo *exceptionInfo);

 typedef void (*rtTaskFailCallbackByModule)(rtTaskFailInfo *exceptionInfo);

 typedef void (*rtDeviceStateCallback)(uint32_t devId, bool isOpen);

 /**
@@ -447,6 +157,12 @@ RTS_API rtError_t rtProfilerStop(uint64_t profConfig, int32_t numsDev, uint32_t*
 */
 RTS_API rtError_t rtProfilerTrace(uint64_t id, bool notify, uint32_t flags, rtStream_t stream);

 /**
 * @ingroup profiling_base
 * @brief ts set profiling reporter callback.
 */
 RTS_API rtError_t rtSetMsprofReporterCallback(MsprofReporterCallback callback);

 /**
 * @ingroup dvrt_base
 * @brief Returns the last error from a runtime call.
@@ -485,6 +201,16 @@ RTS_API rtError_t rtSetTaskFailCallback(rtTaskFailCallback callback);
 */
 RTS_API rtError_t rtRegDeviceStateCallback(const char *regName, rtDeviceStateCallback callback);

 /**
 * @ingroup dvrt_base
 * @brief register callback for fail task 
 * @param [in] uniName unique register name, can't be null
 * @param [in] callback fail task callback function
 * @param [out] NA
 * @return RT_ERROR_NONE for ok
 */
 RTS_API rtError_t rtRegTaskFailCallbackByModule(const char *moduleName, rtTaskFailCallbackByModule callback);

 /**
 * @ingroup dvrt_base
 * @brief notify handle.
--- a/third_party/fwkacllib/inc/runtime/config.h
+++ b/third_party/fwkacllib/inc/runtime/config.h
@@ -121,14 +121,6 @@ typedef struct tagRtMemoryConfig {

 typedef struct tagRtPlatformConfig { uint32_t platformConfig; } rtPlatformConfig_t;

 /**
 * @ingroup
 * @brief get platform
 * @param [in] platForm
 * @return platForm
 */
 RTS_API rtError_t rtGetPlatformConfig(rtPlatformConfig_t *platForm);

 /**
 * @ingroup
 * @brief get AI core count
@@ -169,13 +161,6 @@ RTS_API rtError_t rtGetAiCoreMemoryRates(rtAiCoreMemoryRates_t *aiCoreMemoryRate
 */
 RTS_API rtError_t rtGetMemoryConfig(rtMemoryConfig_t *memoryConfig);

 /**
 * @ingroup
 * @brief set platform in gen ctx
 * @param [in] platForm
 * @return RT_ERROR_NONE for ok, errno for failed
 */
 RTS_API rtError_t rtSetPlatformType(rtPlatformType_t platformType);

 /**
 * @ingroup
@@ -185,6 +170,14 @@ RTS_API rtError_t rtSetPlatformType(rtPlatformType_t platformType);
 */
 RTS_API rtError_t rtMemGetL2Info(rtStream_t stream, void **ptr, uint32_t *size);

 /**
 * @ingroup
 * @brief get runtime version. The version is returned as (1000 major + 10 minor). For example, RUNTIME 9.2 would be represented by 9020.
 * @param [out] runtimeVersion
 * @return RT_ERROR_NONE for ok
 * @return RT_ERROR_INVALID_VALUE for error input
 */
 RTS_API rtError_t rtGetRuntimeVersion(uint32_t *runtimeVersion);
 #if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
 }
 #endif
--- a/third_party/fwkacllib/inc/runtime/dev.h
+++ b/third_party/fwkacllib/inc/runtime/dev.h
@@ -330,12 +330,12 @@ RTS_API rtError_t rtGetPairDevicesInfo(uint32_t devId, uint32_t otherDevId, int3
                    FEATURE_TYPE_MEMCPY = 0,
                    FEATURE_TYPE_RSV,
               } rtFeatureType_t;
 * @param [in] infoType   info type
 * @param [in] featureInfo  info type
               typedef enum tagMemcpyInfo {
                    MEMCPY_INFO_SUPPORT_ZEROCOPY = 0,
                    MEMCPY_INFO _RSV,
               } rtMemcpyInfo_t;
 * @param [out] value   the capability info
 * @param [out] value  the capability info RT_CAPABILITY_SUPPORT or RT_CAPABILITY_NOT_SUPPORT
 * @return RT_ERROR_NONE for ok
 */
 RTS_API rtError_t rtGetRtCapability(rtFeatureType_t featureType, int32_t featureInfo, int64_t *value);
--- a/third_party/fwkacllib/inc/runtime/rt.h
+++ b/third_party/fwkacllib/inc/runtime/rt.h
@@ -28,4 +28,4 @@
 #include "rt_model.h"
 #include "stream.h"

 #endif  // __CCE_RUNTIME_RT_H__
 #endif  // __CCE_RUNTIME_RT_H__
--- a/third_party/fwkacllib/inc/tdt/status.h
+++ b/third_party/fwkacllib/inc/tdt/status.h
@@ -34,9 +34,16 @@ using TDT_StatusT = uint32_t;
 typedef uint32_t TDT_StatusT;
 #endif

 #define LINUX 0
 #define WINDOWS 1

 #ifndef TDT_LIB_EXPORT
 #if(TARGET_SYSTEM_NAME == WINDOWS)
 #define TDT_LIB_EXPORT __declspec(dllexport)
 #else
 #define TDT_LIB_EXPORT __attribute__((visibility("default")))
 #endif
 #endif
 /**
 * @ingroup  tdt status.
 *
--- a/third_party/fwkacllib/inc/tdt/tsd_client.h
+++ b/third_party/fwkacllib/inc/tdt/tsd_client.h
@@ -23,6 +23,7 @@
 #include <mutex>
 #include "tdt/status.h"
 #include "tdt/data_common.h"
 #include "toolchain/prof_callback.h"

 #ifdef __cplusplus
 extern "C" {
@@ -37,7 +38,7 @@ extern "C" {
 * Used for the Framework process to communicate with the TSDDaemon process,
 * and notify TSD to complete the initialization of other processes
 *
 * @param phyDeviceId [IN] type #unsigned int. Physical device ID
 * @param logicDeviceId [IN] type #unsigned int. Logic device ID
 * @param rankSize [IN] type #unsigned int. The rankSize of the training.
 * The default value is 1. When rankSize is greater than 1,
 * HCCP will be pulled to perform set communication related operations.
@@ -49,7 +50,7 @@ extern "C" {
 * @li tsd_client.h: Header file where the interface declaration is located.
 * @li data_common.h: Header file where 'TDT_StatusT' defined
 */
 TDT_LIB_EXPORT TDT_StatusT TsdOpen(const uint32_t phyDeviceId, const uint32_t rankSize);
 TDT_LIB_EXPORT TDT_StatusT TsdOpen(const uint32_t logicDeviceId, const uint32_t rankSize);

 /**
 * @ingroup Close
@@ -67,7 +68,7 @@ TDT_LIB_EXPORT TDT_StatusT TsdOpen(const uint32_t phyDeviceId, const uint32_t ra
 * @li tsd_client.h: Header file where the interface declaration is located.
 * @li data_common.h: Header file where 'TDT_StatusT' defined
 */
 TDT_LIB_EXPORT TDT_StatusT TsdClose(const uint32_t phyDeviceId);
 TDT_LIB_EXPORT TDT_StatusT TsdClose(const uint32_t logicDeviceId);

 /**
 * @ingroup UpdateProfilingMode
@@ -85,7 +86,26 @@ TDT_LIB_EXPORT TDT_StatusT TsdClose(const uint32_t phyDeviceId);
 * @li tsd_client.h: Header file where the interface declaration is located.
 * @li data_common.h: Header file where 'TDT_StatusT' defined
 */
 TDT_LIB_EXPORT TDT_StatusT UpdateProfilingMode(const uint32_t phyDeviceId, const uint32_t flag);
 TDT_LIB_EXPORT TDT_StatusT UpdateProfilingMode(const uint32_t logicDeviceId, const uint32_t flag);

 /**
 * @ingroup TsdSetMsprofReporterCallback
 * @brief 用于推理场景下设置aicpu的profilng的callback函数
 *
 * @par Function
 * 设置offline模式下aicpu_sd进程的profiling的callback函数
 *
 * @param callback [IN] type #MsprofReporterCallback. 回调函数
 * @retval TDT_OK Success
 * @retval OtherValues Failure
 *
 * @par Dependency
 * @li libtsdclient.so: Library to which the interface belongs.
 * @li tsd_client.h: Header file where the interface declaration is located.
 * @li data_common.h: Header file where 'TDT_StatusT' defined
 * @li prof_callback.h: Headerfile where 'MsprofReporterCallback' defined
 */
 TDT_LIB_EXPORT TDT_StatusT TsdSetMsprofReporterCallback(MsprofReporterCallback callback);

 /**
 * @ingroup CreateCmdParameterObj
--- a/third_party/fwkacllib/inc/toolchain/prof_callback.h
+++ b/third_party/fwkacllib/inc/toolchain/prof_callback.h
@@ -0,0 +1,135 @@
 /**
 * Copyright 2020-2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 * @file prof_callback.h
 * @brief declaraion of profiling callbacks
 */

 #ifndef MSPROFILER_PROF_CALLBACK_H_
 #define MSPROFILER_PROF_CALLBACK_H_

 #ifdef __cplusplus
 extern "C" {
 #endif // __cplusplus


 #include "stddef.h"
 #include "stdint.h"

 /**
 * @name  MsprofErrorCode
 * @brief error code
 */
 enum MsprofErrorCode {
    MSPROF_ERROR_NONE = 0,
    MSPROF_ERROR_MEM_NOT_ENOUGH,
    MSPROF_ERROR_GET_ENV,
    MSPROF_ERROR_CONFIG_INVALID,
    MSPROF_ERROR_ACL_JSON_OFF,
    MSPROF_ERROR,
 };

 #define MSPROF_ENGINE_MAX_TAG_LEN (31)

 /**
 * @name  ReporterData
 * @brief struct of data to report
 */
 struct ReporterData {
    char tag[MSPROF_ENGINE_MAX_TAG_LEN + 1];  // the sub-type of the module, data with different tag will be writen
    int deviceId;                             // the index of device
    size_t dataLen;                           // the length of send data
    unsigned char *data;                      // the data content
 };

 /**
 * @name  MsprofReporterModuleId
 * @brief module id of data to report
 */
 enum MsprofReporterModuleId {
    MSPROF_MODULE_DATA_PREPROCESS = 0,    // DATA_PREPROCESS
    MSPROF_MODULE_HCCL,                   // HCCL
    MSPROF_MODULE_ACL,                    // AclModule
    MSPROF_MODULE_FRAMEWORK,              // Framework
    MSPROF_MODULE_RUNTIME                 // runtime
 };

 /**
 * @name  MsprofReporterCallbackType
 * @brief reporter callback request type
 */
 enum MsprofReporterCallbackType {
    MSPROF_REPORTER_REPORT = 0,           // report data
    MSPROF_REPORTER_INIT,                 // init reporter
    MSPROF_REPORTER_UNINIT,               // uninit reporter
 };

 /**
 * @name  MsprofReporterCallback
 * @brief callback to start reporter/stop reporter/report date
 * @param moduleId  [IN] enum MsprofReporterModuleId
 * @param type      [IN] enum MsprofReporterCallbackType
 * @param data      [IN] callback data (nullptr on INTI/UNINIT)
 * @param len       [IN] callback data size (0 on INIT/UNINIT)
 * @return enum MsprofErrorCode
 */
 typedef int32_t (*MsprofReporterCallback)(uint32_t moduleId, uint32_t type, void *data, uint32_t len);


 #define MSPROF_OPTIONS_DEF_LEN_MAX (2048)

 /**
 * @name  MsprofGeOptions
 * @brief struct of MSPROF_CTRL_INIT_GE_OPTIONS
 */
 struct MsprofGeOptions {
    char jobId[MSPROF_OPTIONS_DEF_LEN_MAX];
    char options[MSPROF_OPTIONS_DEF_LEN_MAX];
 };

 /**
 * @name  MsprofCtrlCallbackType
 * @brief ctrl callback request type
 */
 enum MsprofCtrlCallbackType {
    MSPROF_CTRL_INIT_ACL_ENV = 0,           // start profiling with acl env
    MSPROF_CTRL_INIT_ACL_JSON,              // start profiling with acl.json
    MSPROF_CTRL_INIT_GE_OPTIONS,            // start profiling with ge env and options
    MSPROF_CTRL_FINALIZE                    // stop profiling
 };

 /**
 * @name  MsprofCtrlCallback
 * @brief callback to start/stop profiling
 * @param type      [IN] enum MsprofCtrlCallbackType
 * @param data      [IN] callback data
 * @param len       [IN] callback data size
 * @return enum MsprofErrorCode
 */
 typedef int32_t (*MsprofCtrlCallback)(uint32_t type, void *data, uint32_t len);

 /**
 * @name  MsprofSetDeviceCallback
 * @brief callback to notify set/reset device
 * @param devId     [IN] device id
 * @param isOpenDevice  [IN] true: set device, false: reset device
 */
 typedef void (*MsprofSetDeviceCallback)(uint32_t devId, bool isOpenDevice);

 #ifdef __cplusplus
 }
 #endif

 #endif  // MSPROFILER_PROF_CALLBACK_H_
--- a/third_party/fwkacllib/inc/toolchain/prof_reporter.h
+++ b/third_party/fwkacllib/inc/toolchain/prof_reporter.h
@@ -16,7 +16,17 @@

 #ifndef MSPROF_ENGINE_PROF_REPORTER_H_
 #define MSPROF_ENGINE_PROF_REPORTER_H_
 #ifndef OS_TYPE
 #define OS_TYPE 0
 #endif // OS_TYPE

 #if (OS_TYPE != LINUX)
 #define MSVP_PROF_API __declspec(dllexport)
 #else
 #define MSVP_PROF_API __attribute__((visibility("default")))
 #endif

 #include "prof_callback.h"

 /**
 * @file prof_reporter.h
@@ -25,20 +35,6 @@
 */
 namespace Msprof {
 namespace Engine {
 /// the max tag length
 #define MSPROF_ENGINE_MAX_TAG_LEN (31)
 /**
 * @ingroup reporter
 * @brief struct ReporterData
 * the sturct of the data send to libmsprof
 */
 struct ReporterData {
  char tag[MSPROF_ENGINE_MAX_TAG_LEN + 1];  ///< the sub-type of the module, data with different tag will be writen
  int deviceId;                             ///< the physical id of device
  size_t dataLen;                           ///< the length of send data
  unsigned char *data;                      ///< the data content
 };

 /**
 * @ingroup reporter
 * @brief class Reporter
@@ -86,4 +82,4 @@ class MSVP_PROF_API Reporter {
 }  // namespace Engine
 }  // namespace Msprof

 #endif  // MSPROF_ENGINE_PROF_REPORTER_H_
 #endif  // MSPROF_ENGINE_PROF_REPORTER_H_
--- a/third_party/fwkacllib/inc/toolchain/slog.h
+++ b/third_party/fwkacllib/inc/toolchain/slog.h
@@ -18,7 +18,9 @@
 #define D_SYSLOG_H_

 #ifdef __cplusplus
 #ifndef LOG_CPP
 extern "C" {
 #endif
 #endif // __cplusplus

 #ifndef LINUX
@@ -105,6 +107,7 @@ extern "C" {
 #define SECURITY_LOG_MASK   (0x00100000)
 #define RUN_LOG_MASK        (0x01000000)
 #define OPERATION_LOG_MASK  (0x10000000)
 #define RESERVERD_LENGTH 52

 typedef struct tagDCODE {
  const char *cName;
@@ -116,6 +119,18 @@ typedef struct tagKV {
  char *value;
 } KeyValue;

 typedef enum {
    APPLICATION = 0,
    SYSTEM
 } ProcessType;

 typedef struct {
    ProcessType type;
    unsigned int pid;
    unsigned int deviceId;
    char reserved[RESERVERD_LENGTH];
 } LogAttr;

 /**
 * @ingroup slog
 *
@@ -228,6 +243,14 @@ DLL_EXPORT int dlog_setlevel(int moduleId, int level, int enableEvent);
 */
 DLL_EXPORT int CheckLogLevel(int moduleId, int logLevel);

 /**
 * @ingroup slog
 * @brief DlogSetAttr: set log attr, default pid is 0, default device id is 0, default process type is APPLICATION
 * @param [in]logAttr: attr info, include pid(must be larger than 0), process type and device id(chip ID)
 * @return: 0: SUCCEED, others: FAILED
 */
 DLL_EXPORT int DlogSetAttr(LogAttr logAttr);

 /**
 * @ingroup slog
 * @brief dlog_error: print error log
@@ -367,6 +390,8 @@ void DlogInner(int moduleId, int level, const char *fmt, ...);
 void DlogWithKVInner(int moduleId, int level, KeyValue *pstKVArray, int kvNum, const char *fmt, ...);

 #ifdef __cplusplus
 #ifndef LOG_CPP
 }
 #endif // LOG_CPP
 #endif // __cplusplus
 #endif // D_SYSLOG_H_