Browse Source

Merge branch 'development' of gitee.com:dajunli/graphengine into development

pull/642/head
dajunli 5 years ago
parent
commit
380beb909e
48 changed files with 1062 additions and 876 deletions
  1. +10
    -7
      CMakeLists.txt
  2. +1
    -0
      cmake/external_libs/gflags.cmake
  3. +6
    -2
      cmake/external_libs/gtest.cmake
  4. +12
    -7
      cmake/external_libs/json.cmake
  5. +5
    -1
      cmake/external_libs/onnx.cmake
  6. +1
    -0
      cmake/external_libs/protobuf_shared.cmake
  7. +1
    -0
      cmake/external_libs/protobuf_static.cmake
  8. +116
    -115
      cmake/external_libs/protoc.cmake
  9. +11
    -2
      cmake/external_libs/securec.cmake
  10. +14
    -4
      ge/CMakeLists.txt
  11. +3
    -0
      ge/common/profiling/profiling_manager.cc
  12. +19
    -12
      ge/graph/load/new_model_manager/davinci_model.cc
  13. +11
    -1
      ge/graph/load/new_model_manager/davinci_model.h
  14. +11
    -2
      ge/graph/load/new_model_manager/model_manager.cc
  15. +52
    -66
      ge/graph/load/new_model_manager/task_info/kernel_task_info.cc
  16. +2
    -0
      ge/graph/load/new_model_manager/task_info/kernel_task_info.h
  17. +34
    -22
      ge/hybrid/executor/node_state.cc
  18. +2
    -1
      ge/hybrid/executor/node_state.h
  19. +1
    -8
      ge/hybrid/executor/subgraph_executor.cc
  20. +5
    -4
      ge/hybrid/executor/worker/execution_engine.cc
  21. +103
    -18
      ge/hybrid/executor/worker/shape_inference_engine.cc
  22. +4
    -0
      ge/hybrid/executor/worker/shape_inference_engine.h
  23. +57
    -34
      ge/hybrid/model/node_item.cc
  24. +5
    -0
      ge/hybrid/model/node_item.h
  25. +22
    -0
      ge/hybrid/node_executor/task_context.cc
  26. +2
    -0
      ge/hybrid/node_executor/task_context.h
  27. +1
    -44
      ge/ir_build/ge_ir_build.cc
  28. +46
    -32
      inc/framework/omg/parser/model_parser.h
  29. +1
    -1
      metadef
  30. +1
    -1
      parser
  31. +60
    -0
      third_party/fwkacllib/inc/aicpu/aicpu_schedule/aicpu_op_type_list.h
  32. +3
    -1
      third_party/fwkacllib/inc/aicpu/common/aicpu_task_struct.h
  33. +14
    -2
      third_party/fwkacllib/inc/cce/aicpu_engine.h
  34. +6
    -2
      third_party/fwkacllib/inc/cce/aicpu_engine_struct.h
  35. +13
    -4
      third_party/fwkacllib/inc/cce/fwk_adpt_struct.h
  36. +29
    -1
      third_party/fwkacllib/inc/hccl/base.h
  37. +92
    -122
      third_party/fwkacllib/inc/hccl/hcom.h
  38. +38
    -20
      third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_linux.h
  39. +6
    -0
      third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_win.h
  40. +29
    -303
      third_party/fwkacllib/inc/runtime/base.h
  41. +8
    -15
      third_party/fwkacllib/inc/runtime/config.h
  42. +2
    -2
      third_party/fwkacllib/inc/runtime/dev.h
  43. +1
    -1
      third_party/fwkacllib/inc/runtime/rt.h
  44. +7
    -0
      third_party/fwkacllib/inc/tdt/status.h
  45. +24
    -4
      third_party/fwkacllib/inc/tdt/tsd_client.h
  46. +135
    -0
      third_party/fwkacllib/inc/toolchain/prof_callback.h
  47. +11
    -15
      third_party/fwkacllib/inc/toolchain/prof_reporter.h
  48. +25
    -0
      third_party/fwkacllib/inc/toolchain/slog.h

+ 10
- 7
CMakeLists.txt View File

@@ -16,8 +16,11 @@ endif()

if(DEFINED ENV{D_PKG_SERVER})
set(GE_PB_PKG $ENV{D_PKG_SERVER})
message("Download packages from PKG server")
endif()
message("Download packages from DPKG server")
elseif(DEFINED ENV{MSLIBS_SERVER})
set(GE_PB_PKG "http://$ENV{MSLIBS_SERVER}:8081")
message("Download packages from MSPKG server")
endif ()

set(ASCEND_DRIVER_DIR ${ASCEND_DIR}/driver/lib64)
set(ASCEND_DRIVER_COMMON_DIR ${ASCEND_DIR}/driver/lib64/common)
@@ -105,7 +108,7 @@ if (ENABLE_OPEN_SRC)
find_module(error_manager liberror_manager.so ${ASCEND_ATC_DIR})
find_module(error_manager_static liberror_manager.a ${ASCEND_ACL_DIR})
find_module(msprofiler libmsprofiler.a ${ASCEND_ACL_DIR})
#find_module(ascendcl_static libascendcl.a ${ASCEND_ACL_DIR})
#find_module(ascendcl_static libascendcl.a ${ASCEND_ACL_DIR})
if(PRODUCT STREQUAL "flr3")
elseif(PRODUCT STREQUAL "flr1")
find_module(ascend_hal_stub libascend_hal.so ${ASCEND_DRIVER_DIR}/driver)
@@ -115,7 +118,7 @@ if (ENABLE_OPEN_SRC)
find_module(ascend_hal_stub libascend_hal.so ${ASCEND_DRIVER_DIR})
endif()
elseif(PLATFORM STREQUAL "all")
find_module(msprofiler libmsprofiler.a ${ASCEND_DRIVER_COMMON_DIR})
find_module(msprofiler libmsprofiler.a ${ASCEND_ACL_DIR})
find_module(hccl libhccl.so ${ASCEND_RUNTIME_DIR})
find_module(adump_server libadump_server.a ${ASCEND_ACL_DIR})
find_module(runtime libruntime.so ${ASCEND_ACL_DIR})
@@ -123,14 +126,14 @@ if (ENABLE_OPEN_SRC)
find_module(resource libresource.so ${ASCEND_ATC_DIR})
find_module(error_manager liberror_manager.so ${ASCEND_ATC_DIR})
find_module(error_manager_static liberror_manager.a ${ASCEND_ACL_DIR})
find_module(msprofiler_fwk libmsprofiler_fwk.a ${ASCEND_ACL_DIR})
find_module(msprofiler_fwk libmsprofiler_fwk.a ${ASCEND_RUNTIME_DIR})
find_module(ascend_hal_stub libascend_hal.so ${ASCEND_DRIVER_DIR}/driver)
#find_module(ascendcl_static libascendcl.a ${ASCEND_ACL_DIR})
else()
message(STATUS "PLATFORM param is invalid, should be train or inference, you choose nothing!")
message(STATUS "PLATFORM param is invalid, should be train or inference, you choose nothing!")
endif()

if (ENABLE_GE_COV OR ENABLE_GE_UT)
if (ENABLE_GE_COV OR ENABLE_GE_UT)
add_subdirectory(tests)
endif()



+ 1
- 0
cmake/external_libs/gflags.cmake View File

@@ -23,6 +23,7 @@ ExternalProject_Add(gflags_build
URL ${REQ_URL}
#URL /home/txd/workspace/linux_cmake/pkg/protobuf-3.8.0.tar.gz
#SOURCE_DIR ${GE_CODE_DIR}/../../third_party/gflags/src/gflags-2.2.2
TLS_VERIFY OFF
CONFIGURE_COMMAND ${CMAKE_COMMAND} -DCMAKE_CXX_FLAGS=${gflags_CXXFLAGS} -DCMAKE_INSTALL_PREFIX=${CMAKE_INSTALL_PREFIX}/gflags <SOURCE_DIR>
BUILD_COMMAND $(MAKE)
INSTALL_COMMAND $(MAKE) install


+ 6
- 2
cmake/external_libs/gtest.cmake View File

@@ -10,7 +10,10 @@ if ((${CMAKE_INSTALL_PREFIX} STREQUAL /usr/local) OR
message(STATUS "No install prefix selected, default to ${CMAKE_INSTALL_PREFIX}.")
endif()

if (ENABLE_GITEE)
if (GE_PB_PKG)
set(REQ_URL "${GE_PB_PKG}/libs/gtest/release-1.8.0.tar.gz")
set(MD5 "")
elseif (ENABLE_GITEE)
set(REQ_URL "https://gitee.com/mirrors/googletest/repository/archive/release-1.8.0.tar.gz")
set(MD5 "")
else()
@@ -22,8 +25,9 @@ set (gtest_CXXFLAGS "-D_GLIBCXX_USE_CXX11_ABI=0 -D_FORTIFY_SOURCE=2 -O2 -fstack-
set (gtest_CFLAGS "-D_GLIBCXX_USE_CXX11_ABI=0 -D_FORTIFY_SOURCE=2 -O2 -fstack-protector-all -Wl,-z,relro,-z,now,-z,noexecstack")
ExternalProject_Add(gtest_build
URL ${REQ_URL}
TLS_VERIFY OFF
CONFIGURE_COMMAND ${CMAKE_COMMAND} -DCMAKE_CXX_FLAGS=${gtest_CXXFLAGS} -DCMAKE_INSTALL_PREFIX=${CMAKE_INSTALL_PREFIX}/gtest <SOURCE_DIR>
-DBUILD_TESTING=OFF -DCMAKE_POSITION_INDEPENDENT_CODE=ON -DBUILD_SHARED_LIBS=ON -DCMAKE_MACOSX_RPATH=TRUE -Dgtest_disable_pthreads=ON
-DBUILD_TESTING=OFF -DCMAKE_POSITION_INDEPENDENT_CODE=ON -DBUILD_SHARED_LIBS=ON -DCMAKE_MACOSX_RPATH=TRUE -Dgtest_disable_pthreads=ON
BUILD_COMMAND $(MAKE)
INSTALL_COMMAND $(MAKE) install
EXCLUDE_FROM_ALL TRUE


+ 12
- 7
cmake/external_libs/json.cmake View File

@@ -5,19 +5,24 @@ endif()
include(ExternalProject)

set(JSON_SRC_DIR ${CMAKE_BINARY_DIR}/opensrc/json/include)
#if (ENABLE_GITEE)
if (GE_PB_PKG)
set(REQ_URL "${GE_PB_PKG}/libs/ge_nlohmann_json/include.zip")
set(MD5 "0dc903888211db3a0f170304cd9f3a89")
set(JSON_INCLUDE_DIR ${JSON_SRC_DIR})
#elseif (ENABLE_GITEE)
# set(REQ_URL "https://gitee.com/mirrors/JSON-for-Modern-CPP/repository/archive/v3.6.1.zip")
# set(MD5 "5bda78ce308e6cfcf614dcf1d5ff27a7")
# set(JSON_INCLUDE_DIR "${JSON_SRC_DIR}/include")
#else()
set(REQ_URL "https://github.com/nlohmann/json/releases/download/v3.6.1/include.zip")
set(MD5 "0dc903888211db3a0f170304cd9f3a89")
set(JSON_INCLUDE_DIR ${JSON_SRC_DIR})
#endif ()
#set(JSON_INCLUDE_DIR "${JSON_SRC_DIR}/include")
else()
set(REQ_URL "https://github.com/nlohmann/json/releases/download/v3.6.1/include.zip")
set(MD5 "0dc903888211db3a0f170304cd9f3a89")
set(JSON_INCLUDE_DIR ${JSON_SRC_DIR})
endif ()
ExternalProject_Add(json_build
URL ${REQ_URL}
#URL /home/txd/workspace/cloud_code/pkg/include.zip
SOURCE_DIR ${JSON_SRC_DIR}
TLS_VERIFY OFF
CONFIGURE_COMMAND ""
BUILD_COMMAND ""
INSTALL_COMMAND ""


+ 5
- 1
cmake/external_libs/onnx.cmake View File

@@ -6,7 +6,10 @@ set(ONNX_PROTO_DIR ${CMAKE_BINARY_DIR}/onnx)
set(ONNX_PROTO_FILE ${ONNX_PROTO_DIR}/onnx.proto)
file(MAKE_DIRECTORY ${ONNX_PROTO_DIR})

if (ENABLE_GITEE)
if (GE_PB_PKG)
set(REQ_URL "${GE_PB_PKG}/libs/onnx/onnx-1.6.0.tar.gz")
set(MD5 "512f2779d6215d4a36f366b6b9acdf1e")
elseif (ENABLE_GITEE)
set(REQ_URL "https://gitee.com/mirrors/ONNX/repository/archive/v1.6.0.tar.gz")
set(MD5 "1bdbcecdd68ea8392630467646776e02")
else()
@@ -19,6 +22,7 @@ ExternalProject_Add(onnx
#URL /home/txd/workspace/cloud_code/pkg/onnx-1.6.0.tar.gz
#URL_HASH SHA256=3b88c3fe521151651a0403c4d131cb2e0311bd28b753ef692020a432a81ce345
#SOURCE_DIR ${ONNX_SRC_DIR}
TLS_VERIFY OFF
CONFIGURE_COMMAND ""
BUILD_COMMAND ""
#INSTALL_COMMAND ""


+ 1
- 0
cmake/external_libs/protobuf_shared.cmake View File

@@ -26,6 +26,7 @@ set(protobuf_CXXFLAGS "-Wno-maybe-uninitialized -Wno-unused-parameter -fPIC -fst
set(protobuf_LDFLAGS "-Wl,-z,relro,-z,now,-z,noexecstack")
ExternalProject_Add(protobuf_build
URL ${REQ_URL}
TLS_VERIFY OFF
CONFIGURE_COMMAND ${CMAKE_COMMAND}
-Dprotobuf_WITH_ZLIB=OFF
-DCMAKE_INSTALL_LIBDIR=${CMAKE_INSTALL_LIBDIR}


+ 1
- 0
cmake/external_libs/protobuf_static.cmake View File

@@ -27,6 +27,7 @@ ExternalProject_Add(protobuf_static_build
URL ${REQ_URL}
#URL /home/txd/workspace/linux_cmake/pkg/protobuf-3.8.0.tar.gz
#SOURCE_DIR ${METADEF_DIR}/../../third_party/protobuf/src/protobuf-3.8.0
TLS_VERIFY OFF
CONFIGURE_COMMAND ${CMAKE_COMMAND}
-DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}
-DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}


+ 116
- 115
cmake/external_libs/protoc.cmake View File

@@ -1,115 +1,116 @@
if (HAVE_PROTOC)
return()
endif()
include(ExternalProject)
include(GNUInstallDirs)
#set(CMAKE_INSTALL_PREFIX ${GE_CODE_DIR}/output)
if ((${CMAKE_INSTALL_PREFIX} STREQUAL /usr/local) OR
(${CMAKE_INSTALL_PREFIX} STREQUAL "C:/Program Files (x86)/ascend"))
set(CMAKE_INSTALL_PREFIX ${GE_CODE_DIR}/output CACHE STRING "path for install()" FORCE)
message(STATUS "No install prefix selected, default to ${CMAKE_INSTALL_PREFIX}.")
endif()
if(GE_PB_PKG)
set(REQ_URL "${GE_PB_PKG}/libs/protobuf/v3.8.0.tar.gz")
else()
if (ENABLE_GITEE)
set(REQ_URL "https://gitee.com/mirrors/protobuf_source/repository/archive/v3.8.0.tar.gz")
set(MD5 "eba86ae9f07ba5cfbaf8af3bc4e84236")
else()
set(REQ_URL "https://github.com/protocolbuffers/protobuf/archive/v3.8.0.tar.gz")
set(MD5 "3d9e32700639618a4d2d342c99d4507a")
endif ()
endif()
set(protobuf_CXXFLAGS "-Wno-maybe-uninitialized -Wno-unused-parameter -fPIC -fstack-protector-all -D_FORTIFY_SOURCE=2 -D_GLIBCXX_USE_CXX11_ABI=0 -O2")
set(protobuf_LDFLAGS "-Wl,-z,relro,-z,now,-z,noexecstack")
ExternalProject_Add(protoc_build
URL ${REQ_URL}
#URL /home/txd/workspace/linux_cmake/pkg/protobuf-3.8.0.tar.gz
#SOURCE_DIR ${GE_CODE_DIR}/../third_party/protobuf/src/protobuf-3.8.0
CONFIGURE_COMMAND ${CMAKE_COMMAND} -Dprotobuf_WITH_ZLIB=OFF -Dprotobuf_BUILD_TESTS=OFF -DBUILD_SHARED_LIBS=OFF -DCMAKE_CXX_FLAGS=${protobuf_CXXFLAGS} -DCMAKE_CXX_LDFLAGS=${protobuf_LDFLAGS} -DCMAKE_INSTALL_PREFIX=${CMAKE_INSTALL_PREFIX}/protoc <SOURCE_DIR>/cmake
BUILD_COMMAND $(MAKE)
INSTALL_COMMAND $(MAKE) install
EXCLUDE_FROM_ALL TRUE
)
set(PROTOC_PKG_DIR ${CMAKE_INSTALL_PREFIX}/protoc)
set(protoc_EXECUTABLE ${PROTOC_PKG_DIR}/${CMAKE_INSTALL_BINDIR}/protoc)
function(protobuf_generate comp c_var h_var)
if(NOT ARGN)
message(SEND_ERROR "Error: protobuf_generate() called without any proto files")
return()
endif()
set(${c_var})
set(${h_var})
foreach(file ${ARGN})
get_filename_component(abs_file ${file} ABSOLUTE)
get_filename_component(file_name ${file} NAME_WE)
get_filename_component(file_dir ${abs_file} PATH)
get_filename_component(parent_subdir ${file_dir} NAME)
if("${parent_subdir}" STREQUAL "proto")
set(proto_output_path ${CMAKE_BINARY_DIR}/proto/${comp}/proto)
else()
set(proto_output_path ${CMAKE_BINARY_DIR}/proto/${comp}/proto/${parent_subdir})
endif()
list(APPEND ${c_var} "${proto_output_path}/${file_name}.pb.cc")
list(APPEND ${h_var} "${proto_output_path}/${file_name}.pb.h")
add_custom_command(
OUTPUT "${proto_output_path}/${file_name}.pb.cc" "${proto_output_path}/${file_name}.pb.h"
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}
COMMAND ${CMAKE_COMMAND} -E make_directory "${proto_output_path}"
COMMAND ${protoc_EXECUTABLE} -I${file_dir} --cpp_out=${proto_output_path} ${abs_file}
DEPENDS protoc_build ${abs_file}
COMMENT "Running C++ protocol buffer compiler on ${file}" VERBATIM )
endforeach()
set_source_files_properties(${${c_var}} ${${h_var}} PROPERTIES GENERATED TRUE)
set(${c_var} ${${c_var}} PARENT_SCOPE)
set(${h_var} ${${h_var}} PARENT_SCOPE)
endfunction()
function(protobuf_generate_py comp py_var)
if(NOT ARGN)
message(SEND_ERROR "Error: protobuf_generate_py() called without any proto files")
return()
endif()
set(${py_var})
foreach(file ${ARGN})
get_filename_component(abs_file ${file} ABSOLUTE)
get_filename_component(file_name ${file} NAME_WE)
get_filename_component(file_dir ${abs_file} PATH)
get_filename_component(parent_subdir ${file_dir} NAME)
if("${parent_subdir}" STREQUAL "proto")
set(proto_output_path ${CMAKE_BINARY_DIR}/proto/${comp}/proto)
else()
set(proto_output_path ${CMAKE_BINARY_DIR}/proto/${comp}/proto/${parent_subdir})
endif()
list(APPEND ${py_var} "${proto_output_path}/${file_name}_pb2.py")
add_custom_command(
OUTPUT "${proto_output_path}/${file_name}_pb2.py"
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}
COMMAND ${CMAKE_COMMAND} -E make_directory "${proto_output_path}"
COMMAND ${protoc_EXECUTABLE} -I${file_dir} --python_out=${proto_output_path} ${abs_file}
DEPENDS protoc_build ${abs_file}
COMMENT "Running PYTHON protocol buffer compiler on ${file}" VERBATIM )
endforeach()
set_source_files_properties(${${py_var}} PROPERTIES GENERATED TRUE)
set(${py_var} ${${py_var}} PARENT_SCOPE)
endfunction()
#set(HAVE_PROTOC TRUE CACHE BOOL "protoc build add")
set(HAVE_PROTOC TRUE)
if (HAVE_PROTOC)
return()
endif()

include(ExternalProject)
include(GNUInstallDirs)
#set(CMAKE_INSTALL_PREFIX ${GE_CODE_DIR}/output)

if ((${CMAKE_INSTALL_PREFIX} STREQUAL /usr/local) OR
(${CMAKE_INSTALL_PREFIX} STREQUAL "C:/Program Files (x86)/ascend"))
set(CMAKE_INSTALL_PREFIX ${GE_CODE_DIR}/output CACHE STRING "path for install()" FORCE)
message(STATUS "No install prefix selected, default to ${CMAKE_INSTALL_PREFIX}.")
endif()

if(GE_PB_PKG)
set(REQ_URL "${GE_PB_PKG}/libs/protobuf/v3.8.0.tar.gz")
else()
if (ENABLE_GITEE)
set(REQ_URL "https://gitee.com/mirrors/protobuf_source/repository/archive/v3.8.0.tar.gz")
set(MD5 "eba86ae9f07ba5cfbaf8af3bc4e84236")
else()
set(REQ_URL "https://github.com/protocolbuffers/protobuf/archive/v3.8.0.tar.gz")
set(MD5 "3d9e32700639618a4d2d342c99d4507a")
endif ()
endif()

set(protobuf_CXXFLAGS "-Wno-maybe-uninitialized -Wno-unused-parameter -fPIC -fstack-protector-all -D_FORTIFY_SOURCE=2 -D_GLIBCXX_USE_CXX11_ABI=0 -O2")
set(protobuf_LDFLAGS "-Wl,-z,relro,-z,now,-z,noexecstack")
ExternalProject_Add(protoc_build
URL ${REQ_URL}
#URL /home/txd/workspace/linux_cmake/pkg/protobuf-3.8.0.tar.gz
#SOURCE_DIR ${GE_CODE_DIR}/../third_party/protobuf/src/protobuf-3.8.0
TLS_VERIFY OFF
CONFIGURE_COMMAND ${CMAKE_COMMAND} -Dprotobuf_WITH_ZLIB=OFF -Dprotobuf_BUILD_TESTS=OFF -DBUILD_SHARED_LIBS=OFF -DCMAKE_CXX_FLAGS=${protobuf_CXXFLAGS} -DCMAKE_CXX_LDFLAGS=${protobuf_LDFLAGS} -DCMAKE_INSTALL_PREFIX=${CMAKE_INSTALL_PREFIX}/protoc <SOURCE_DIR>/cmake
BUILD_COMMAND $(MAKE)
INSTALL_COMMAND $(MAKE) install
EXCLUDE_FROM_ALL TRUE
)

set(PROTOC_PKG_DIR ${CMAKE_INSTALL_PREFIX}/protoc)

set(protoc_EXECUTABLE ${PROTOC_PKG_DIR}/${CMAKE_INSTALL_BINDIR}/protoc)

function(protobuf_generate comp c_var h_var)
if(NOT ARGN)
message(SEND_ERROR "Error: protobuf_generate() called without any proto files")
return()
endif()
set(${c_var})
set(${h_var})

foreach(file ${ARGN})
get_filename_component(abs_file ${file} ABSOLUTE)
get_filename_component(file_name ${file} NAME_WE)
get_filename_component(file_dir ${abs_file} PATH)
get_filename_component(parent_subdir ${file_dir} NAME)

if("${parent_subdir}" STREQUAL "proto")
set(proto_output_path ${CMAKE_BINARY_DIR}/proto/${comp}/proto)
else()
set(proto_output_path ${CMAKE_BINARY_DIR}/proto/${comp}/proto/${parent_subdir})
endif()
list(APPEND ${c_var} "${proto_output_path}/${file_name}.pb.cc")
list(APPEND ${h_var} "${proto_output_path}/${file_name}.pb.h")

add_custom_command(
OUTPUT "${proto_output_path}/${file_name}.pb.cc" "${proto_output_path}/${file_name}.pb.h"
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}
COMMAND ${CMAKE_COMMAND} -E make_directory "${proto_output_path}"
COMMAND ${protoc_EXECUTABLE} -I${file_dir} --cpp_out=${proto_output_path} ${abs_file}
DEPENDS protoc_build ${abs_file}
COMMENT "Running C++ protocol buffer compiler on ${file}" VERBATIM )
endforeach()

set_source_files_properties(${${c_var}} ${${h_var}} PROPERTIES GENERATED TRUE)
set(${c_var} ${${c_var}} PARENT_SCOPE)
set(${h_var} ${${h_var}} PARENT_SCOPE)

endfunction()

function(protobuf_generate_py comp py_var)
if(NOT ARGN)
message(SEND_ERROR "Error: protobuf_generate_py() called without any proto files")
return()
endif()
set(${py_var})

foreach(file ${ARGN})
get_filename_component(abs_file ${file} ABSOLUTE)
get_filename_component(file_name ${file} NAME_WE)
get_filename_component(file_dir ${abs_file} PATH)
get_filename_component(parent_subdir ${file_dir} NAME)

if("${parent_subdir}" STREQUAL "proto")
set(proto_output_path ${CMAKE_BINARY_DIR}/proto/${comp}/proto)
else()
set(proto_output_path ${CMAKE_BINARY_DIR}/proto/${comp}/proto/${parent_subdir})
endif()
list(APPEND ${py_var} "${proto_output_path}/${file_name}_pb2.py")

add_custom_command(
OUTPUT "${proto_output_path}/${file_name}_pb2.py"
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}
COMMAND ${CMAKE_COMMAND} -E make_directory "${proto_output_path}"
COMMAND ${protoc_EXECUTABLE} -I${file_dir} --python_out=${proto_output_path} ${abs_file}
DEPENDS protoc_build ${abs_file}
COMMENT "Running PYTHON protocol buffer compiler on ${file}" VERBATIM )
endforeach()

set_source_files_properties(${${py_var}} PROPERTIES GENERATED TRUE)
set(${py_var} ${${py_var}} PARENT_SCOPE)

endfunction()

#set(HAVE_PROTOC TRUE CACHE BOOL "protoc build add")
set(HAVE_PROTOC TRUE)

+ 11
- 2
cmake/external_libs/securec.cmake View File

@@ -10,11 +10,20 @@ if ((${CMAKE_INSTALL_PREFIX} STREQUAL /usr/local) OR
message(STATUS "No install prefix selected, default to ${CMAKE_INSTALL_PREFIX}.")
endif()

if (GE_PB_PKG)
set(REQ_URL "${GE_PB_PKG}/libs/securec/v1.1.10.tar.gz")
set(MD5 "")
else()
set(REQ_URL "https://gitee.com/openeuler/libboundscheck/repository/archive/v1.1.10.tar.gz")
set(MD5 "")
endif ()

ExternalProject_Add(c_sec_build
URL https://gitee.com/openeuler/libboundscheck/repository/archive/v1.1.10.tar.gz
#URL /home/txd/workspace/linux_cmake/pkg/protobuf-3.8.0.tar.gz
URL ${REQ_URL}
#URL https://gitee.com/openeuler/libboundscheck/repository/archive/v1.1.10.tar.gz
#SOURCE_DIR ${GE_CODE_DIR}/../libc_sec
PATCH_COMMAND patch -p1 < ${GE_CODE_DIR}/metadef/third_party/patch/securec/0001-add-securec-cmake-script.patch
TLS_VERIFY OFF
CONFIGURE_COMMAND ${CMAKE_COMMAND}
-DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}
-DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}


+ 14
- 4
ge/CMakeLists.txt View File

@@ -605,7 +605,7 @@ set(INFER_SRC_LIST

if (NOT ENABLE_D AND NOT ENABLE_ACL AND NOT ENABLE_MS_TESTCASES)
############ libge_runner.so ############
add_library(ge_runner SHARED ${TRAIN_SRC_LIST} ${PROTO_SRCS} ${PROTO_CLIENT_SRCS} $<TARGET_OBJECTS:msprofiler_fwk>)
add_library(ge_runner SHARED ${TRAIN_SRC_LIST} ${PROTO_SRCS} ${PROTO_CLIENT_SRCS})

target_compile_definitions(ge_runner PRIVATE
PROTOBUF_INLINE_NOT_IN_HEADERS=0
@@ -646,11 +646,14 @@ target_include_directories(ge_runner PRIVATE
${GE_CODE_DIR}/third_party/fwkacllib/inc/toolchain
)

target_link_libraries(ge_runner
target_link_libraries(ge_runner PRIVATE
$<BUILD_INTERFACE:intf_pub>
ge_memory
adump_server
static_mmpa
-Wl,--whole-archive
msprofiler_fwk
-Wl,--no-whole-archive
-Wl,--no-as-needed
graph
ge_common
@@ -710,7 +713,7 @@ target_include_directories(ge_compiler PRIVATE
${GE_CODE_DIR}/third_party/fwkacllib/inc/toolchain
)

target_link_libraries(ge_compiler
target_link_libraries(ge_compiler PRIVATE
$<BUILD_INTERFACE:intf_pub>
ge_memory
static_mmpa
@@ -764,7 +767,14 @@ target_link_options(opensrc_ascendcl PRIVATE
-Wl,--allow-multiple-definition
-Wl,-z,muldefs
-Wl,-Bsymbolic
-Wl,--exclude-libs,ALL
-Wl,--exclude-libs,libascend_protobuf.a
-Wl,--exclude-libs,libge_executor.a
-Wl,--exclude-libs,libge_common.a
-Wl,--exclude-libs,libgraph.a
-Wl,--exclude-libs,libmmpa.a
-Wl,--exclude-libs,libregister.a
-Wl,--exclude-libs,liberror_manager.a
-Wl,--exclude-libs,libadump_server.a
)
target_link_libraries(opensrc_ascendcl PRIVATE
-Wl,--whole-archive


+ 3
- 0
ge/common/profiling/profiling_manager.cc View File

@@ -143,6 +143,9 @@ ge::Status ProfilingManager::ParseOptions(const std::string &options) {
}
try {
Json prof_options = Json::parse(options);
if (options.find(kTrainingTrace) == std::string::npos) {
return ge::SUCCESS;
}
const std::string training_trace = prof_options[kTrainingTrace];
if (training_trace.empty()) {
GELOGI("Training trace will not take effect.");


+ 19
- 12
ge/graph/load/new_model_manager/davinci_model.cc View File

@@ -2991,19 +2991,19 @@ Status DavinciModel::CreateKnownZeroCopyMap(const vector<void *> &inputs, const
return SUCCESS;
}

Status DavinciModel::UpdateKnownZeroCopyAddr() {
for (size_t i = 0; i < total_io_addrs_.size(); ++i) {
auto it_in = knonw_input_data_info_.find(total_io_addrs_[i]);
Status DavinciModel::UpdateKnownZeroCopyAddr(vector<void *> &total_io_addrs) {
for (size_t i = 0; i < total_io_addrs.size(); ++i) {
auto it_in = knonw_input_data_info_.find(total_io_addrs[i]);
if (it_in != knonw_input_data_info_.end()) {
GELOGI("DavinciModel::UpdateKnownZeroCopyAddr input %zu,v addr %p,p addr %p .", i, total_io_addrs_[i],
knonw_input_data_info_.at(total_io_addrs_[i]));
total_io_addrs_[i] = knonw_input_data_info_.at(total_io_addrs_[i]);
GELOGI("DavinciModel::UpdateKnownZeroCopyAddr input %zu,v addr %p,p addr %p .", i, total_io_addrs[i],
knonw_input_data_info_.at(total_io_addrs[i]));
total_io_addrs[i] = knonw_input_data_info_.at(total_io_addrs[i]);
}
auto it_out = knonw_output_data_info_.find(total_io_addrs_[i]);
auto it_out = knonw_output_data_info_.find(total_io_addrs[i]);
if (it_out != knonw_output_data_info_.end()) {
GELOGI("DavinciModel::UpdateKnownZeroCopyAddr output %zu,v addr %p,p addr %p .", i, total_io_addrs_[i],
knonw_output_data_info_.at(total_io_addrs_[i]));
total_io_addrs_[i] = knonw_output_data_info_.at(total_io_addrs_[i]);
GELOGI("DavinciModel::UpdateKnownZeroCopyAddr output %zu,v addr %p,p addr %p .", i, total_io_addrs[i],
knonw_output_data_info_.at(total_io_addrs[i]));
total_io_addrs[i] = knonw_output_data_info_.at(total_io_addrs[i]);
}
}
GELOGI("DavinciModel::UpdateKnownZeroCopyAddr success.");
@@ -3032,7 +3032,7 @@ Status DavinciModel::UpdateKnownNodeArgs(const vector<void *> &inputs, const vec
} else {
total_io_addrs_ = orig_total_io_addrs_;
}
GE_CHK_STATUS_RET(UpdateKnownZeroCopyAddr(), "DavinciModel::UpdateKnownZeroCopyAddr failed.");
GE_CHK_STATUS_RET(UpdateKnownZeroCopyAddr(total_io_addrs_), "DavinciModel::UpdateKnownZeroCopyAddr failed.");

if (total_args_size_ == 0) {
GELOGW("DavinciModel::UpdateKnownNodeArgs device args %p, dst size %u, pass rtMemcpy.", args_, total_args_size_);
@@ -3099,7 +3099,14 @@ Status DavinciModel::MallocKnownArgs() {
GELOGE(RT_FAILED, "Call rtMalloc failed, ret: 0x%X", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret);
}

// malloc dynamic and static hybrid memory
if (total_hybrid_args_size_ != 0) {
rt_ret = rtMalloc(&hybrid_addrs_, total_hybrid_args_size_, RT_MEMORY_HBM);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "Call rtMalloc failed, ret: 0x%X", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret);
}
}
// malloc fixed addr memory, eg: rts op
if (total_fixed_addr_size_ != 0) {
GELOGI("Begin to allocate fixed addr.");


+ 11
- 1
ge/graph/load/new_model_manager/davinci_model.h View File

@@ -476,6 +476,14 @@ class DavinciModel {
void SetTotalIOAddrs(vector<void *> &io_addrs) {
total_io_addrs_.insert(total_io_addrs_.end(), io_addrs.begin(), io_addrs.end());
}
void SetHybridArgsSize(uint32_t args_size) { total_hybrid_args_size_ += args_size; }
uint32_t GetHybridArgsSize() {
return total_hybrid_args_size_;
}
void *GetCurrentHybridArgsAddr(uint32_t offset) {
void *cur_args = static_cast<char *>(hybrid_addrs_) + offset;
return cur_args;
}
void SetTotalFixedAddrsSize(string tensor_name, int64_t fix_addr_size);
int64_t GetFixedAddrsSize(string tensor_name);
void *GetCurrentFixedAddr(int64_t offset) const {
@@ -494,7 +502,7 @@ class DavinciModel {
Status MallocKnownArgs();
Status UpdateKnownNodeArgs(const vector<void *> &inputs, const vector<void *> &outputs);
Status CreateKnownZeroCopyMap(const vector<void *> &inputs, const vector<void *> &outputs);
Status UpdateKnownZeroCopyAddr();
Status UpdateKnownZeroCopyAddr(vector<void *> &total_io_addrs);
void SetKnownNodeAddrNotChanged(bool base_addr_not_changed) { base_addr_not_changed_ = base_addr_not_changed; }

Status GetOrigInputInfo(uint32_t index, OriginInputInfo &orig_input_info);
@@ -977,6 +985,8 @@ class DavinciModel {
void *args_ = nullptr;
void *args_host_ = nullptr;
void *fixed_addrs_ = nullptr;
void *hybrid_addrs_ = nullptr;
uint32_t total_hybrid_args_size_ = 0;
int64_t total_fixed_addr_size_ = 0;
std::map<const void *, void *> knonw_input_data_info_;
std::map<const void *, void *> knonw_output_data_info_;


+ 11
- 2
ge/graph/load/new_model_manager/model_manager.cc View File

@@ -1055,7 +1055,16 @@ Status ModelManager::LoadModelOffline(uint32_t &model_id, const ModelData &model
mmTimespec timespec = mmGetTickCount();

ModelHelper model_helper;
Status ret = model_helper.LoadModel(model);
Status ret = model_helper.LoadRootModel(model);
if (model_helper.GetModelType()) {
bool is_shape_unknown = false;
GE_CHK_STATUS_RET(model_helper.GetGeRootModel()->CheckIsUnknownShape(is_shape_unknown),
"CheckIsUnknownShape failed, model id:%u",
model_id);
if (is_shape_unknown || GetContext().GetHostExecFlag()) {
return DoLoadHybridModelOnline(model_id, model_helper.GetGeRootModel(), listener);
}
}
if (ret != SUCCESS) {
GELOGE(ret, "load model failed.");
return ret;
@@ -1214,7 +1223,7 @@ Status ModelManager::ExecuteModel(uint32_t model_id, rtStream_t stream, bool asy

std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id);
GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, ACL_ERROR_GE_EXEC_MODEL_ID_INVALID,
"Invalid model id %u, check weather model has been loaded or not.", model_id);
"Invalid model id %u, check whether model has been loaded or not.", model_id);

if (davinci_model->NeedDestroyAicpuKernel()) {
GELOGI("Start to destroy specified aicpu kernel.");


+ 52
- 66
ge/graph/load/new_model_manager/task_info/kernel_task_info.cc View File

@@ -372,7 +372,11 @@ Status KernelTaskInfo::SuperKernelDistribute() {
Status KernelTaskInfo::Distribute() {
GELOGD("KernelTaskInfo Distribute Start.");
if (davinci_model_->IsKnownNode()) {
args_ = davinci_model_->GetCurrentArgsAddr(args_offset_);
if (kernel_type_ == ccKernelType::TE) {
args_ = davinci_model_->GetCurrentArgsAddr(args_offset_);
} else if (kernel_type_ == ccKernelType::AI_CPU || kernel_type_ == ccKernelType::CUST_AI_CPU) {
args_ = davinci_model_->GetCurrentHybridArgsAddr(hybrid_args_offset_);
}
GELOGI("Known node %s args addr %p, offset %u.", op_desc_->GetName().c_str(), args_, args_offset_);
}
rtError_t rt_ret = RT_ERROR_NONE;
@@ -428,36 +432,31 @@ Status KernelTaskInfo::UpdateArgs() {
const RuntimeParam &rts_param = davinci_model_->GetRuntimeParam();
vector<void *> input_data_addrs = ModelUtils::GetInputDataAddrs(rts_param, op_desc_);
vector<void *> output_data_addrs = ModelUtils::GetOutputDataAddrs(rts_param, op_desc_);
vector<void *> workspace_data_addrs = ModelUtils::GetWorkspaceDataAddrs(rts_param, op_desc_);

vector<void *> io_addrs;
if (!op_desc_->HasAttr(ATTR_DYNAMIC_SHAPE_FIXED_ADDR)) {
io_addrs.insert(io_addrs.end(), input_data_addrs.begin(), input_data_addrs.end());
io_addrs.insert(io_addrs.end(), output_data_addrs.begin(), output_data_addrs.end());
io_addrs.insert(io_addrs.end(), input_data_addrs.begin(), input_data_addrs.end());
io_addrs.insert(io_addrs.end(), output_data_addrs.begin(), output_data_addrs.end());
if (kernel_type_ == ccKernelType::TE) {
vector<void *> workspace_data_addrs = ModelUtils::GetWorkspaceDataAddrs(rts_param, op_desc_);
io_addrs.insert(io_addrs.end(), workspace_data_addrs.begin(), workspace_data_addrs.end());
} else {
string peer_input_name;
if (AttrUtils::GetStr(op_desc_, ATTR_DYNAMIC_SHAPE_FIXED_ADDR, peer_input_name)) {
uint32_t output_index = davinci_model_->GetFixedAddrOutputIndex(peer_input_name);
if (output_index > output_data_addrs.size()) {
GELOGE(FAILED, "The output data addr size[%zu] and output index[%u] are inconsistent.",
output_data_addrs.size(), output_index);
return FAILED;
}
io_addrs.insert(io_addrs.end(), input_data_addrs.begin(), input_data_addrs.end());
for (size_t i = 0; i < output_data_addrs.size(); ++i) {
if (i == output_index) {
void *fixed_addr = davinci_model_->GetCurrentFixedAddr(fixed_addr_offset_);
io_addrs.emplace_back(fixed_addr);
continue;
}
io_addrs.emplace_back(output_data_addrs[i]);
}
io_addrs.insert(io_addrs.end(), workspace_data_addrs.begin(), workspace_data_addrs.end());
davinci_model_->SetTotalIOAddrs(io_addrs);
} else if (kernel_type_ == ccKernelType::AI_CPU || kernel_type_ == ccKernelType::CUST_AI_CPU) {
davinci_model_->UpdateKnownZeroCopyAddr(io_addrs);
uintptr_t io_addr = reinterpret_cast<uintptr_t>(args_addr.get()) + sizeof(aicpu::AicpuParamHead);
auto addrs_size = sizeof(uint64_t) * io_addrs.size();
errno_t sec_ret = memcpy_s(reinterpret_cast<void *>(io_addr), addrs_size, io_addrs.data(), addrs_size);
if (sec_ret != EOK) {
GELOGE(FAILED, "memcpy failed, ret: %d", sec_ret);
return FAILED;
}
// copy args to device
rtError_t rt_ret = rtMemcpy(args_, args_size_, args_addr.get(), args_size_, RT_MEMCPY_HOST_TO_DEVICE);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "Call rt api(rtMemcpy) failed, ret: 0x%X", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret);
}
}

davinci_model_->SetTotalIOAddrs(io_addrs);
GELOGI("KernelTaskInfo::UpdateArgs success.");
return SUCCESS;
}
@@ -533,33 +532,18 @@ Status KernelTaskInfo::UpdateL2Data(const domi::KernelDef &kernel_def) {
}

Status KernelTaskInfo::CalculateArgs(const domi::TaskDef &task_def, DavinciModel *davinci_model) {
domi::KernelDef kernel_def = task_def.kernel();
uint32_t args_size = kernel_def.args_size();
args_offset_ = davinci_model->GetTotalArgsSize();
davinci_model->SetTotalArgsSize(args_size);
GELOGI("kernel task name , args_size %u, args_offset %u", args_size, args_offset_);

// get opcontext stored in model
const domi::KernelDef &kernel_def = task_def.kernel();
const domi::KernelContext &context = kernel_def.context();
// get opdesc
op_desc_ = davinci_model->GetOpByIndex(context.op_index());
GE_CHECK_NOTNULL(op_desc_);
// alloc fixed addr
string peer_input_name;
if (AttrUtils::GetStr(op_desc_, ATTR_DYNAMIC_SHAPE_FIXED_ADDR, peer_input_name) && !peer_input_name.empty()) {
uint32_t output_index = davinci_model->GetFixedAddrOutputIndex(peer_input_name);
if (output_index > op_desc_->GetOutputsSize()) {
GELOGE(FAILED, "The output size[%zu] and output index[%u] are inconsistent.", op_desc_->GetOutputsSize(),
output_index);
return FAILED;
}
fixed_addr_offset_ = davinci_model->GetFixedAddrsSize(peer_input_name);
auto tensor_desc = op_desc_->GetOutputDesc(output_index);
int64_t tensor_size = 0;
GE_CHK_STATUS(TensorUtils::GetSize(tensor_desc, tensor_size));
davinci_model->SetTotalFixedAddrsSize(peer_input_name, tensor_size);
GELOGI("Calculate stream switch task args , tensor size is %ld, fixed addr offset %ld", tensor_size,
fixed_addr_offset_);
kernel_type_ = static_cast<ccKernelType>(context.kernel_type());
if (kernel_type_ == ccKernelType::TE) {
uint32_t args_size = kernel_def.args_size();
args_offset_ = davinci_model->GetTotalArgsSize();
davinci_model->SetTotalArgsSize(args_size);
GELOGI("kernel task name , args_size %u, args_offset %u", args_size, args_offset_);
} else if (kernel_type_ == ccKernelType::AI_CPU || kernel_type_ == ccKernelType::CUST_AI_CPU) {
hybrid_args_offset_ = davinci_model->GetHybridArgsSize();
davinci_model->SetHybridArgsSize(kernel_def.args_size());
GELOGI("aicpu kernel task name , args_size %u, args_offset %u", kernel_def.args_size(), hybrid_args_offset_);
}
return SUCCESS;
}
@@ -888,7 +872,7 @@ Status KernelTaskInfo::InitAicpuTask(uint32_t op_index, const domi::KernelDef &k
}

// copy args to new host memory
std::unique_ptr<uint8_t[]> args_addr(new (std::nothrow) uint8_t[args_size_]);
args_addr = std::unique_ptr<uint8_t[]>(new (std::nothrow) uint8_t[args_size_]);
GE_PRINT_DYNAMIC_MEMORY(new, "cce task physical memory.", sizeof(uint8_t) * args_size_)
errno_t sec_ret = memcpy_s(args_addr.get(), args_size_, kernel_def.args().data(), args_size_);
if (sec_ret != EOK) {
@@ -896,8 +880,23 @@ Status KernelTaskInfo::InitAicpuTask(uint32_t op_index, const domi::KernelDef &k
return FAILED;
}

const RuntimeParam &rts_param = davinci_model_->GetRuntimeParam();
auto aicpu_param_head = reinterpret_cast<aicpu::AicpuParamHead *>(args_addr.get());
const auto &ext_info = kernel_def.kernel_ext_info();
auto init_ret = InitAicpuTaskExtInfo(ext_info);
if (init_ret != SUCCESS) {
GELOGE(init_ret, "Init aicpu task ext info failed, ext_info size=%zu", ext_info.size());
return init_ret;
}
GELOGI("Node[%s] type[%s] kernel_ext_info size=%zu, aicpu_ext_info_addr_=%p", op_desc_->GetName().c_str(),
op_desc_->GetType().c_str(), ext_info.size(), aicpu_ext_info_addr_);

aicpu_param_head->extInfoAddr = reinterpret_cast<uintptr_t>(aicpu_ext_info_addr_);
aicpu_param_head->extInfoLength = static_cast<uintptr_t>(ext_info.size());

if (davinci_model_->IsKnownNode()) {
return SUCCESS;
}
const RuntimeParam &rts_param = davinci_model_->GetRuntimeParam();
vector<void *> input_addrs = ModelUtils::GetInputDataAddrs(rts_param, op_desc);
vector<void *> output_addrs = ModelUtils::GetOutputDataAddrs(rts_param, op_desc);
vector<void *> io_addrs;
@@ -914,19 +913,6 @@ Status KernelTaskInfo::InitAicpuTask(uint32_t op_index, const domi::KernelDef &k
}
}

auto aicpu_param_head = reinterpret_cast<aicpu::AicpuParamHead *>(args_addr.get());
const auto &ext_info = kernel_def.kernel_ext_info();
auto init_ret = InitAicpuTaskExtInfo(ext_info);
if (init_ret != SUCCESS) {
GELOGE(init_ret, "Init aicpu task ext info failed, ext_info size=%zu", ext_info.size());
return init_ret;
}
GELOGI("Node[%s] type[%s] kernel_ext_info size=%zu, aicpu_ext_info_addr_=%p", op_desc_->GetName().c_str(),
op_desc_->GetType().c_str(), ext_info.size(), aicpu_ext_info_addr_);

aicpu_param_head->extInfoAddr = reinterpret_cast<uintptr_t>(aicpu_ext_info_addr_);
aicpu_param_head->extInfoLength = static_cast<uintptr_t>(ext_info.size());

// malloc device memory for args
rtError_t rt_ret = rtMalloc(static_cast<void **>(&args_), args_size_, RT_MEMORY_HBM);
if (rt_ret != RT_ERROR_NONE) {


+ 2
- 0
ge/graph/load/new_model_manager/task_info/kernel_task_info.h View File

@@ -159,7 +159,9 @@ class KernelTaskInfo : public TaskInfo {
OpDescPtr op_desc_;
DavinciModel *davinci_model_;
uint32_t args_offset_ = 0;
uint32_t hybrid_args_offset_ = 0;
int64_t fixed_addr_offset_ = 0;
std::unique_ptr<uint8_t[]> args_addr = nullptr;
bool call_save_dump_ = false;

// aicpu ext_info device mem


+ 34
- 22
ge/hybrid/executor/node_state.cc View File

@@ -18,6 +18,7 @@
#include <chrono>
#include "framework/common/debug/log.h"
#include "graph/compute_graph.h"
#include "graph/utils/tensor_utils.h"
#include "hybrid_execution_context.h"
#include "subgraph_context.h"

@@ -35,29 +36,31 @@ ShapeInferenceState::ShapeInferenceState(const NodeItem &node_item) : node_item(
this->num_pending_shapes_);
}

Status ShapeInferenceState::UpdateInputShape(int idx,
const GeShape &ori_shape,
const GeShape &shape) {
Status ShapeInferenceState::UpdateInputShape(int idx, const GeTensorDesc &target) {
if (node_item.IsInputShapeStatic(idx)) {
GELOGD("[%s] Trying to update static shape, idx = %d. old shape = [%s], new shape = [%s]",
node_item.NodeName().c_str(),
idx,
node_item.MutableInputDesc(idx)->GetShape().ToString().c_str(),
shape.ToString().c_str());
target.GetShape().ToString().c_str());
return SUCCESS;
}

GELOGD("[%s] Update input shape [%d] with Shape: [%s] and OriginalShape: [%s]",
int64_t tensor_size = -1;
(void) TensorUtils::GetSize(target, tensor_size);
GELOGD("[%s] Update input shape [%d] with Shape: [%s] and OriginalShape: [%s], size = %ld",
node_item.NodeName().c_str(),
idx,
shape.ToString().c_str(),
ori_shape.ToString().c_str());
target.GetShape().ToString().c_str(),
target.GetOriginShape().ToString().c_str(),
tensor_size);

std::lock_guard<std::mutex> lk(mu_);
auto tensor_desc = node_item.MutableInputDesc(idx);
GE_CHECK_NOTNULL(tensor_desc);
tensor_desc->SetShape(shape);
tensor_desc->SetOriginShape(ori_shape);
tensor_desc->SetShape(target.GetShape());
tensor_desc->SetOriginShape(target.GetOriginShape());
(void) TensorUtils::SetSize(*tensor_desc, tensor_size);
if (--num_pending_shapes_ == 0) {
ready_cv_.notify_all();
}
@@ -110,24 +113,24 @@ Status ShapeInferenceState::AwaitShapesReady(const GraphExecutionContext &contex
for (auto &p : shape_futures) {
auto idx = p.first;
auto &future = p.second;
GeShape shape;
GeShape ori_shape;
RECORD_SHAPE_INFERENCE_EVENT(&context, node_item.NodeName().c_str(), "[AwaitShape] [idx = %u] Start", idx);
GE_CHK_STATUS_RET(future.Get(ori_shape, shape),
"[%s] Get shape failed. index = %u",
node_item.NodeName().c_str(),
idx);
auto src_tensor_desc = future.GetTensorDesc();
GE_CHECK_NOTNULL(src_tensor_desc);
RECORD_SHAPE_INFERENCE_EVENT(&context, node_item.NodeName().c_str(), "[AwaitShape] [idx = %u] End", idx);

GELOGD("[%s] Update input shape [%u] with shape: [%s] and ori_shape: [%s]",
node_item.NodeName().c_str(),
idx,
shape.ToString().c_str(),
ori_shape.ToString().c_str());
auto input_desc = node_item.MutableInputDesc(idx);
GE_CHECK_NOTNULL(input_desc);
input_desc->SetShape(std::move(shape));
input_desc->SetOriginShape(ori_shape);
int64_t tensor_size = -1;
(void) TensorUtils::GetSize(*src_tensor_desc, tensor_size);
GELOGD("[%s] Update input shape [%u] with shape: [%s] and ori_shape: [%s], index = %zu",
node_item.NodeName().c_str(),
idx,
src_tensor_desc->GetShape().ToString().c_str(),
src_tensor_desc->GetOriginShape().ToString().c_str(),
tensor_size);
input_desc->SetShape(src_tensor_desc->GetShape());
input_desc->SetOriginShape(src_tensor_desc->GetOriginShape());
(void) TensorUtils::SetSize(*input_desc, tensor_size);
}

return SUCCESS;
@@ -190,5 +193,14 @@ Status ShapeFuture::Get(GeShape &ori_shape, GeShape &shape) {
GELOGD("Get shape from %s:%u. shape = [%s]", src_node_->GetName().c_str(), src_index_, shape.ToString().c_str());
return SUCCESS;
}

GeTensorDescPtr ShapeFuture::GetTensorDesc() {
GELOGD("Start to wait node: %s for getting shape", src_node_->GetName().c_str());
if (!subgraph_context_->Await(src_node_)) {
GELOGE(INTERNAL_ERROR, "cancelled");
return nullptr;
}
return src_node_->GetOpDesc()->MutableOutputDesc(src_index_);
}
} // namespace hybrid
} // namespace ge

+ 2
- 1
ge/hybrid/executor/node_state.h View File

@@ -35,6 +35,7 @@ class ShapeFuture {
ShapeFuture(NodePtr src_node, uint32_t src_index, SubgraphContext *subgraph_context);
~ShapeFuture() = default;
Status Get(GeShape &ori_shape, GeShape &shape);
GeTensorDescPtr GetTensorDesc();

private:
NodePtr src_node_;
@@ -45,7 +46,7 @@ class ShapeFuture {
struct ShapeInferenceState {
explicit ShapeInferenceState(const NodeItem &node_item);

Status UpdateInputShape(int idx, const GeShape &ori_shape, const GeShape &shape);
Status UpdateInputShape(int idx, const GeTensorDesc &tensor_desc);

void UpdateInputShapeFuture(int idx, ShapeFuture &&future);



+ 1
- 8
ge/hybrid/executor/subgraph_executor.cc View File

@@ -96,7 +96,7 @@ Status SubgraphExecutor::InitInputsForUnknownShape(const std::vector<TensorValue
GE_CHECK_NOTNULL(tensor_desc);
auto node_state = subgraph_context_->GetOrCreateNodeState(input_node);
GE_CHECK_NOTNULL(node_state);
node_state->GetShapeInferenceState().UpdateInputShape(0, tensor_desc->GetOriginShape(), tensor_desc->GetShape());
node_state->GetShapeInferenceState().UpdateInputShape(0, *tensor_desc);
}
}

@@ -268,13 +268,6 @@ Status SubgraphExecutor::PrepareForExecution(GraphExecutionContext *ctx, NodeSta
} else {
node_state.SetKernelTask(node_item.kernel_task);
}

GELOGD("[%s] Start to invoke CalcOpRunningParam.", node_item.NodeName().c_str());
RECORD_COMPILE_EVENT(ctx, node_item.NodeName().c_str(), "[CalcOpRunningParam] Start");
GE_CHK_STATUS_RET(NodeExecutorManager::GetInstance().CalcOpRunningParam(*node_item.node),
"[%s] Failed to invoke CalcOpRunningParam.", node_item.NodeName().c_str());
RECORD_COMPILE_EVENT(ctx, node_item.NodeName().c_str(), "[CalcOpRunningParam] End");
GELOGD("[%s] Done invoking CalcOpRunningParam successfully.", node_item.NodeName().c_str());
return SUCCESS;
}



+ 5
- 4
ge/hybrid/executor/worker/execution_engine.cc View File

@@ -20,12 +20,9 @@
#include "graph/utils/tensor_adapter.h"
#include "graph/debug/ge_attr_define.h"
#include "hybrid/node_executor/node_executor.h"
#include "common/dump/dump_manager.h"
#include "hybrid/executor//worker//shape_inference_engine.h"
#include "common/dump/dump_op.h"
#include "common/types.h"
#include "common/ge_types.h"
#include "common/profiling/profiling_manager.h"
#include "runtime/base.h"

namespace ge {
namespace hybrid {
@@ -348,6 +345,10 @@ Status NodeDoneCallback::OnNodeDone() {
}

GE_CHK_STATUS_RET_NOLOG(PrepareConstInputs(node_item));
if (node_item.shape_inference_type == DEPEND_SHAPE_RANGE || node_item.shape_inference_type == DEPEND_COMPUTE) {
// update output tensor sizes
GE_CHK_STATUS_RET_NOLOG(ShapeInferenceEngine::CalcOutputTensorSizes(node_item));
}
// PropagateOutputs for type == DEPEND_COMPUTE
if (node_item.shape_inference_type == DEPEND_COMPUTE) {
if (graph_context_->trace_enabled) {


+ 103
- 18
ge/hybrid/executor/worker/shape_inference_engine.cc View File

@@ -17,9 +17,15 @@
#include "hybrid/executor/worker/shape_inference_engine.h"
#include "graph/shape_refiner.h"
#include "graph/utils/node_utils.h"
#include "graph/utils/tensor_utils.h"
#include "graph/utils/type_utils.h"
#include "common/math/math_util.h"
#include "hybrid/node_executor/node_executor.h"

namespace ge {
namespace {
const int kAlignment = 32;
}
namespace hybrid {
ShapeInferenceEngine::ShapeInferenceEngine(GraphExecutionContext *execution_context, SubgraphContext *subgraph_context)
: execution_context_(execution_context),
@@ -40,7 +46,9 @@ Status ShapeInferenceEngine::InferShape(NodeState &node_state) {
}

if (node_item.fused_subgraph != nullptr) {
return InferShapeForSubgraph(node_item, *node_item.fused_subgraph);
GE_CHK_STATUS_RET_NOLOG(InferShapeForSubgraph(node_item, *node_item.fused_subgraph));
GE_CHK_STATUS_RET_NOLOG(CalcOutputTensorSizes(node_item));
return SUCCESS;
}

// Skip shape inference for node of type DEPEND_COMPUTE
@@ -63,21 +71,15 @@ Status ShapeInferenceEngine::InferShape(NodeState &node_state) {
std::lock_guard<std::mutex> lk(mu_);
RECORD_SHAPE_INFERENCE_EVENT(execution_context_, node_item.NodeName().c_str(), "[InferShapeAndType] Start");
GE_CHK_STATUS_RET(ShapeRefiner::InferShapeAndTypeForRunning(node_item.node, true),
"Invoke InferShapeAndType failed.");
"Invoke InferShapeAndType failed.");
RECORD_SHAPE_INFERENCE_EVENT(execution_context_, node_item.NodeName().c_str(), "[InferShapeAndType] End");
}
// Check again to make sure shape is valid after shape inference
if (node_item.shape_inference_type != DEPEND_SHAPE_RANGE) {
bool is_unknown_shape = false;
GE_CHK_STATUS_RET(NodeUtils::GetNodeUnknownShapeStatus(*node_item.node, is_unknown_shape),
"Failed to get shape status. node = %s",
node_item.NodeName().c_str());

GE_CHK_BOOL_RET_STATUS(!is_unknown_shape,
INTERNAL_ERROR,
"[%s] Shape is still unknown after shape inference.",
node_item.NodeName().c_str());
}
// update output tensor sizes after shape inference
// error if shape is still unknown and not of type DEPEND_SHAPE_RANGE
RECORD_COMPILE_EVENT(execution_context_, node_item.NodeName().c_str(), "[CalcOpRunningParam] Start");
GE_CHK_STATUS_RET_NOLOG(CalcOutputTensorSizes(node_item, node_item.shape_inference_type == DEPEND_SHAPE_RANGE));
RECORD_COMPILE_EVENT(execution_context_, node_item.NodeName().c_str(), "[CalcOpRunningParam] End");

GELOGD("[%s] [HybridTrace] After shape inference. Node = %s",
node_item.NodeName().c_str(),
@@ -127,8 +129,6 @@ Status ShapeInferenceEngine::PropagateOutputShapes(const NodeItem &node_item) {
// propagate each output
for (int i = 0; i < node_item.num_outputs; ++i) {
auto output_desc = node_item.op_desc->MutableOutputDesc(i);
const auto &shape = output_desc->MutableShape();
const auto &ori_shape = output_desc->GetOriginShape();
auto &output_nodes = node_item.outputs[i];

// propagate output to all sub-inputs
@@ -149,9 +149,7 @@ Status ShapeInferenceEngine::PropagateOutputShapes(const NodeItem &node_item) {
infer_state.UpdateInputShapeFuture(dst_input_index_and_node.first,
std::move(future));
} else {
GE_CHK_STATUS_RET_NOLOG(infer_state.UpdateInputShape(dst_input_index_and_node.first,
ori_shape,
shape));
GE_CHK_STATUS_RET_NOLOG(infer_state.UpdateInputShape(dst_input_index_and_node.first, *output_desc));
}
}
}
@@ -230,5 +228,92 @@ Status ShapeInferenceEngine::UpdatePeerNodeShape(const Node &node) {
}
return SUCCESS;
}

Status ShapeInferenceEngine::CanonicalizeShape(GeTensorDesc &tensor_desc,
std::vector<int64_t> &shape,
bool fallback_with_range) {
const auto &tensor_shape = tensor_desc.MutableShape();
if (tensor_shape.IsUnknownShape()) {
if (!fallback_with_range) {
GELOGE(INTERNAL_ERROR, "Output shape is still unknown after shape inference. shape = [%s]",
tensor_shape.ToString().c_str());
return INTERNAL_ERROR;
}

GELOGD("Calc output size by range");
std::vector<std::pair<int64_t, int64_t>> shape_range;
GE_CHK_GRAPH_STATUS_RET(tensor_desc.GetShapeRange(shape_range), "Failed to get shape range");
if (shape_range.size() != shape.size()) {
GELOGE(INTERNAL_ERROR, "Number of shape ranges (%zu) mismatches that of dims (%zu)",
shape_range.size(),
shape.size());
return INTERNAL_ERROR;
}

for (size_t dim_index = 0; dim_index < shape.size(); ++dim_index) {
if (shape[dim_index] == ge::UNKNOWN_DIM) {
shape[dim_index] = shape_range[dim_index].second;
}
}

GELOGD("After canonicalization, shape = [%s], before = [%s]",
GeShape(shape).ToString().c_str(),
tensor_shape.ToString().c_str());
}

return SUCCESS;
}

Status ShapeInferenceEngine::CalcTensorSize(DataType data_type,
const std::vector<int64_t> &shape,
int64_t &tensor_size) {
GELOGD("To calc tensor size by shape = [%s]", GeShape(shape).ToString().c_str());
uint32_t type_size;
if (!TypeUtils::GetDataTypeLength(data_type, type_size)) {
GELOGE(INTERNAL_ERROR, "Failed to get data type size");
return INTERNAL_ERROR;
}

tensor_size = type_size;
for (const auto &dim : shape) {
GE_CHECK_GE(dim, 0);
GE_CHK_STATUS_RET(Int64MulCheckOverflow(tensor_size, dim),
"Shape size overflow, shape = [%s]",
GeShape(shape).ToString().c_str());
tensor_size *= dim;
}

GE_CHK_STATUS_RET(CheckInt64AddOverflow(tensor_size, kAlignment - 1),
"Tensor size is too large: %ld, shape = [%s]",
tensor_size,
GeShape(shape).ToString().c_str());
tensor_size = (tensor_size + kAlignment - 1) / kAlignment * kAlignment;
return SUCCESS;
}

Status ShapeInferenceEngine::CalcOutputTensorSizes(const NodeItem &node_item, bool fallback_with_range) {
auto op_desc = node_item.GetOpDesc();
for (size_t output_index = 0; output_index < op_desc->GetOutputsSize(); ++output_index) {
auto tensor_desc = op_desc->MutableOutputDesc(output_index);
GE_CHECK_NOTNULL(tensor_desc);
const auto &shape = tensor_desc->MutableShape();
// modify on copy
auto dims = shape.GetDims();
GE_CHK_STATUS_RET(CanonicalizeShape(*tensor_desc, dims, fallback_with_range),
"[%s] Failed to canonicalize shape for output %zu",
node_item.NodeName().c_str(),
output_index);

int64_t tensor_size;
GE_CHK_STATUS_RET(CalcTensorSize(tensor_desc->GetDataType(), dims, tensor_size),
"[%s] Failed to calc tensor size for output %zu",
node_item.NodeName().c_str(),
output_index);
GELOGD("[%s] Tensor size of output %zu = %ld", node_item.NodeName().c_str(), output_index, tensor_size);
(void) TensorUtils::SetSize(*tensor_desc, tensor_size);
}

return SUCCESS;
}
} // namespace hybrid
} // namespace ge

+ 4
- 0
ge/hybrid/executor/worker/shape_inference_engine.h View File

@@ -34,7 +34,11 @@ class ShapeInferenceEngine {

Status PropagateOutputShapes(const NodeItem &node_item);

static Status CalcOutputTensorSizes(const NodeItem &node_item, bool fallback_with_range = false);

private:
static Status CanonicalizeShape(GeTensorDesc &tensor_desc, std::vector<int64_t> &shape, bool fallback_with_range);
static Status CalcTensorSize(DataType data_type, const std::vector<int64_t> &shape, int64_t &tensor_size);
static Status UpdatePeerNodeShape(const Node &node);
Status AwaitDependentNodes(NodeState &node_state);



+ 57
- 34
ge/hybrid/model/node_item.cc View File

@@ -22,6 +22,7 @@
#include "graph/debug/ge_attr_define.h"
#include "graph/utils/node_utils.h"
#include "hybrid/node_executor/node_executor.h"
#include "hybrid/executor/worker/shape_inference_engine.h"

namespace ge {
namespace hybrid {
@@ -47,7 +48,7 @@ Status ParseInputMapping(Node &node, OpDesc &op_desc, FusedSubgraph &fused_subgr
GE_CHECK_NOTNULL(dst_op_desc);
auto in_idx = node_and_anchor.second->GetIdx();
auto tensor_desc = dst_op_desc->MutableInputDesc(in_idx);
fused_subgraph.input_mapping[parent_index].emplace_back(tensor_desc);
fused_subgraph.input_mapping[static_cast<int>(parent_index)].emplace_back(tensor_desc);
GELOGD("Input[%u] mapped to [%s:%u]", parent_index, dst_op_desc->GetName().c_str(), in_idx);
}

@@ -64,7 +65,7 @@ Status ParseOutputMapping(const OpDescPtr &op_desc, FusedSubgraph &fused_subgrap
return FAILED;
}

fused_subgraph.output_mapping.emplace(parent_index, op_desc);
fused_subgraph.output_mapping.emplace(static_cast<int>(parent_index), op_desc);
return SUCCESS;
}

@@ -126,12 +127,7 @@ Status NodeItem::Create(const NodePtr &node, std::unique_ptr<NodeItem> &node_ite
return SUCCESS;
}

Status NodeItem::Init() {
GE_CHECK_LE(op_desc->GetInputsSize(), INT32_MAX);
GE_CHECK_LE(op_desc->GetOutputsSize(), INT32_MAX);
num_inputs = static_cast<int>(op_desc->GetInputsSize());
num_outputs = static_cast<int>(op_desc->GetOutputsSize());

void NodeItem::ResolveOptionalInputs() {
if (op_desc->GetAllInputsSize() != op_desc->GetInputsSize()) {
has_optional_inputs = true;
for (size_t i = 0; i < op_desc->GetAllInputsSize(); ++i) {
@@ -143,7 +139,18 @@ Status NodeItem::Init() {
}
}
}
}

Status NodeItem::InitInputsAndOutputs() {
GE_CHECK_LE(op_desc->GetInputsSize(), INT32_MAX);
GE_CHECK_LE(op_desc->GetOutputsSize(), INT32_MAX);
num_inputs = static_cast<int>(op_desc->GetInputsSize());
num_outputs = static_cast<int>(op_desc->GetOutputsSize());
ResolveOptionalInputs();
return SUCCESS;
}

Status NodeItem::ResolveDynamicState() {
(void) AttrUtils::GetBool(op_desc, ATTR_NAME_FORCE_UNKNOWN_SHAPE, is_dynamic);
GELOGD("node name = %s, is_dynamic = %d.", this->node_name.c_str(), is_dynamic);
if (!is_dynamic) {
@@ -151,38 +158,54 @@ Status NodeItem::Init() {
"[%s] Failed to get shape status.",
node->GetName().c_str());
}
return SUCCESS;
}

if (is_dynamic) {
for (int i = 0; i < num_inputs; ++i) {
const auto &input_desc = MutableInputDesc(i);
GE_CHECK_NOTNULL(input_desc);
if (input_desc->MutableShape().IsUnknownShape()) {
is_input_shape_static_.push_back(false);
} else {
num_static_input_shapes++;
is_input_shape_static_.push_back(true);
GELOGD("[%s] The shape of input[%d] is static. shape = [%s]",
NodeName().c_str(), i, input_desc->MutableShape().ToString().c_str());
}
Status NodeItem::ResolveStaticInputsAndOutputs() {
for (int i = 0; i < num_inputs; ++i) {
const auto &input_desc = MutableInputDesc(i);
GE_CHECK_NOTNULL(input_desc);
if (input_desc->MutableShape().IsUnknownShape()) {
is_input_shape_static_.push_back(false);
} else {
num_static_input_shapes++;
is_input_shape_static_.push_back(true);
GELOGD("[%s] The shape of input[%d] is static. shape = [%s]",
NodeName().c_str(), i, input_desc->MutableShape().ToString().c_str());
}
}

for (int i = 0; i < num_outputs; ++i) {
const auto &output_desc = op_desc->MutableOutputDesc(i);
GE_CHECK_NOTNULL(output_desc);
if (output_desc->MutableShape().IsUnknownShape()) {
is_output_shape_static = false;
break;
}
for (int i = 0; i < num_outputs; ++i) {
const auto &output_desc = op_desc->MutableOutputDesc(i);
GE_CHECK_NOTNULL(output_desc);
if (output_desc->MutableShape().IsUnknownShape()) {
is_output_shape_static = false;
break;
}
}

if (IsControlOp() || node_type == PARTITIONEDCALL) {
shape_inference_type = DEPEND_COMPUTE;
} else {
int32_t unknown_shape_type_val = 0;
(void) AttrUtils::GetInt(op_desc, ::ge::ATTR_NAME_UNKNOWN_SHAPE_TYPE, unknown_shape_type_val);
shape_inference_type = static_cast<UnknowShapeOpType>(unknown_shape_type_val);
}
if (is_output_shape_static) {
GE_CHK_STATUS_RET_NOLOG(ShapeInferenceEngine::CalcOutputTensorSizes(*this));
}
return SUCCESS;
}

void NodeItem::ResolveUnknownShapeType() {
if (IsControlOp() || node_type == PARTITIONEDCALL) {
shape_inference_type = DEPEND_COMPUTE;
} else {
int32_t unknown_shape_type_val = 0;
(void) AttrUtils::GetInt(op_desc, ::ge::ATTR_NAME_UNKNOWN_SHAPE_TYPE, unknown_shape_type_val);
shape_inference_type = static_cast<UnknowShapeOpType>(unknown_shape_type_val);
}
}

Status NodeItem::Init() {
GE_CHK_STATUS_RET_NOLOG(InitInputsAndOutputs());
GE_CHK_STATUS_RET_NOLOG(ResolveDynamicState());
if (is_dynamic) {
ResolveUnknownShapeType();
GE_CHK_STATUS_RET_NOLOG(ResolveStaticInputsAndOutputs());
GE_CHK_STATUS_RET(ParseFusedSubgraph(*this), "[%s] Failed to parse fused subgraph", node_name.c_str());
}



+ 5
- 0
ge/hybrid/model/node_item.h View File

@@ -103,6 +103,11 @@ struct NodeItem {
private:
explicit NodeItem(NodePtr node);
Status Init();
Status InitInputsAndOutputs();
void ResolveOptionalInputs();
Status ResolveDynamicState();
Status ResolveStaticInputsAndOutputs();
void ResolveUnknownShapeType();

std::vector<bool> is_input_shape_static_;
std::vector<uint32_t> input_desc_indices_;


+ 22
- 0
ge/hybrid/node_executor/task_context.cc View File

@@ -148,6 +148,10 @@ Status TaskContext::AllocateWorkspaces() {
}

Status TaskContext::RegisterCallback(const std::function<void()> &callback_fun) const {
if (callback_fun == nullptr) {
GELOGW("[%s] Callback is NULL", GetNodeName());
return SUCCESS;
}
auto ret = execution_context_->callback_manager->RegisterCallback(callback_fun);
if (ret != SUCCESS) {
GELOGE(ret, "[%s] Failed to register callback", GetNodeName());
@@ -384,6 +388,20 @@ const char *TaskContext::GetNodeName() const {
return node_item_->NodeName().c_str();
}

void TaskContext::ReleaseInputsAndOutputs() {
for (int i = 0; i < node_item_->num_inputs; ++i) {
auto tensor = inputs_start_ + i;
tensor->Destroy();
GELOGD("[%s] Tensor of input[%d] released", GetNodeName(), i);
}

for (int i = 0; i < node_item_->num_outputs; ++i) {
auto tensor = outputs_start_ + i;
tensor->Destroy();
GELOGD("[%s] Tensor of output[%d] released", GetNodeName(), i);
}
}

void TaskContext::ReleaseInput(int index) {
auto input_tensor = MutableInput(index);
if (input_tensor != nullptr) {
@@ -456,5 +474,9 @@ Status TaskContext::TryExecuteCallback(const function<void()> &callback_fun) con
const DumpProperties &TaskContext::GetDumpProperties() const {
return execution_context_->dump_properties;
}

bool TaskContext::NeedCallback() {
return node_item_->has_observer || IsDumpEnabled() || execution_context_->profiling_level > 0;
}
} // namespace hybrid
} // namespace ge

+ 2
- 0
ge/hybrid/node_executor/task_context.h View File

@@ -50,6 +50,8 @@ class TaskContext {
ConstGeTensorDescPtr GetOutputDesc(int index) const;
GeTensorDescPtr MutableInputDesc(int index) const;
GeTensorDescPtr MutableOutputDesc(int index) const;
void ReleaseInputsAndOutputs();
bool NeedCallback();
void ReleaseInput(int index);
const TensorValue *GetInput(int index) const;
const TensorValue *GetOutput(int index) const;


+ 1
- 44
ge/ir_build/ge_ir_build.cc View File

@@ -227,7 +227,6 @@ class Impl {
~Impl() { (void)generator_.Finalize(); };
graphStatus CheckOptions(const std::map<std::string, std::string> &options);
graphStatus CreateInputsForIRBuild(const ge::Graph &graph, vector<ge::GeTensor> &inputs);
graphStatus GetDefaultInputShape(const Graph &graph, string &default_shape);
graphStatus UpdateDataOpAttr(const Graph &graph);
graphStatus Init(const Graph &graph, const std::map<std::string, std::string> &options);
graphStatus BuildModel(const Graph &graph, const std::map<std::string, std::string> &options,
@@ -321,42 +320,6 @@ graphStatus Impl::CheckOptions(const std::map<std::string, std::string> &options
return GRAPH_SUCCESS;
}

graphStatus Impl::GetDefaultInputShape(const Graph &graph, string &default_shape) {
auto compute_graph = ge::GraphUtils::GetComputeGraph(graph);
GE_CHECK_NOTNULL(compute_graph);
for (ge::NodePtr &input_node : compute_graph->GetDirectNode()) {
GE_CHECK_NOTNULL(input_node);
ge::OpDescPtr op = input_node->GetOpDesc();
GE_CHECK_NOTNULL(op);
if (op->GetType() == DATA) {
string data_op_name = op->GetName();
GELOGD("Data op name: %s, data op inputDesc size: %zu", data_op_name.c_str(), op->GetAllInputsDesc().size());
ge::GeTensorDesc tensor = op->GetInputDesc(0);
ge::GeShape data_shape = tensor.GetShape();
GELOGD("Data op get shape from InputDesc in ge ir graph.");

string tmp_shape_str;
const std::vector<int64_t> &tmp_shape = data_shape.GetDims();
if (tmp_shape.empty()) {
GELOGW("Data op: %s has zero shape dims!", data_op_name.c_str());
} else {
tmp_shape_str += data_op_name + ":";
for (auto tmp_dim : tmp_shape) {
tmp_shape_str += to_string((long)tmp_dim) + ",";
}
tmp_shape_str = tmp_shape_str.substr(0, tmp_shape_str.size() - 1);
tmp_shape_str += ";";
default_shape += tmp_shape_str;
}

GELOGD("Data op name: %s, data shape: %s.", data_op_name.c_str(), tmp_shape_str.c_str());
}
}
default_shape = (default_shape.empty() ? default_shape : default_shape.substr(0, default_shape.size() - 1));
GELOGI("Get default data op shape: %s from ge ir graph.", default_shape.c_str());
return GRAPH_SUCCESS;
}

graphStatus Impl::Init(const Graph &graph, const std::map<std::string, std::string> &options) {
// 1. check options
graphStatus ret = CheckOptions(options);
@@ -378,13 +341,7 @@ graphStatus Impl::Init(const Graph &graph, const std::map<std::string, std::stri
GE_CHK_BOOL_RET_STATUS_NOLOG(ge::CheckLogParamValidAndSetLogLevel(log) == 0, GRAPH_PARAM_INVALID);
options_[ge::ir_option::LOG_LEVEL] = log;

string input_shape;
if (options_.find("input_shape") == options_.end()) {
GE_CHK_BOOL_EXEC(GetDefaultInputShape(graph, input_shape) == ge::SUCCESS,
return ge::GRAPH_PARAM_INVALID, "Get default data op shape from graph failed!");
} else {
input_shape = options_["input_shape"];
}
string input_shape = options_.find("input_shape") == options_.end() ? "" : options_["input_shape"];
string input_format = options_.find("input_format") == options_.end() ? "" : options_["input_format"];
string net_format = options_.find("net_format") == options_.end() ? "" : options_["net_format"];
string dynamic_batch_size = options_.find(ge::ir_option::DYNAMIC_BATCH_SIZE) == options_.end()


+ 46
- 32
inc/framework/omg/parser/model_parser.h View File

@@ -36,7 +36,7 @@ using Status = domi::Status;

namespace domi {
using GetGraphCallback = std::function<std::unique_ptr<google::protobuf::Message>(
const google::protobuf::Message *root_proto, const std::string &graph)>;
const google::protobuf::Message *root_proto, const std::string &graph)>;
class ModelParser {
public:
ModelParser() {}
@@ -44,19 +44,20 @@ class ModelParser {
virtual ~ModelParser() {}

/**
* @ingroup domi_omg
* @brief Analyze network model data
* @param [in] file Network model file path
* @param [in|out] graph Save the network information after analysis
* @return SUCCESS
* @return Others failed
*/
* @ingroup domi_omg
* @brief Analyze network model data
* @param [in] file Network model file path
* @param [in|out] graph Save the network information after analysis
* @return SUCCESS
* @return Others failed
*/
virtual Status Parse(const char *file, ge::Graph &graph) = 0;

/**
* @ingroup domi_omg
* @brief Parse relevant data from memory and save it to graph
* @param [in] input Model file memory data
* @param [in] input Model file memory size
* @param [in|out] graph A graph for saving the model information after analysis
* @return SUCCESS
* @return FAILED
@@ -64,36 +65,49 @@ class ModelParser {
*/
virtual Status ParseFromMemory(const char *data, uint32_t size, ge::ComputeGraphPtr &graph) = 0;

#ifndef ONLY_COMPILE_OPEN_SRC
/**
* @ingroup domi_omg
* @brief Parse relevant data from memory and save it to graph
* @param [in] input Model file memory data
* @param [in] input Model file memory size
* @param [in|out] graph A graph for saving the model information after analysis
* @return SUCCESS
* @return FAILED
* @author
*/
virtual Status ParseFromMemory(const char *data, uint32_t size, ge::Graph &graph) = 0;
#endif

/**
* @ingroup domi_omg
* @brief Analyze network model data
* @param [in] proto network model
* @param [in|out] graph Save the network information after analysis
* @return SUCCESS
* @return Others failed
*/
* @ingroup domi_omg
* @brief Analyze network model data
* @param [in] proto network model
* @param [in|out] graph Save the network information after analysis
* @return SUCCESS
* @return Others failed
*/
virtual Status ParseProto(const google::protobuf::Message *proto, ge::ComputeGraphPtr &graph) = 0;

/**
* @ingroup domi_omg
* @brief Analyze callback model data in subgraph
* @param [in] proto network model
* @param [in] callback callback of subgraph
* @param [in|out] graph Save the network information after analysis
* @return SUCCESS
* @return Others failed
*/
virtual Status ParseProtoWithSubgraph(const google::protobuf::Message *proto,
GetGraphCallback callback,
* @ingroup domi_omg
* @brief Analyze callback model data in subgraph
* @param [in] proto network model
* @param [in] callback callback of subgraph
* @param [in|out] graph Save the network information after analysis
* @return SUCCESS
* @return Others failed
*/
virtual Status ParseProtoWithSubgraph(const google::protobuf::Message *proto, GetGraphCallback callback,
ge::ComputeGraphPtr &graph) = 0;
/**
* @ingroup domi_omg
* @brief Convert model files to JSON format
* @param [in] model_file Model file path to be converted
* @param [out] json_file Converted JSON file path
* @return SUCCESS
* @return Others failed
*/
* @ingroup domi_omg
* @brief Convert model files to JSON format
* @param [in] model_file Model file path to be converted
* @param [out] json_file Converted JSON file path
* @return SUCCESS
* @return Others failed
*/
virtual Status ToJson(const char *model_file, const char *json_file) { return domi::SUCCESS; }

/*


+ 1
- 1
metadef

@@ -1 +1 @@
Subproject commit dba83744a3ffe3d5f89496e69bb65c50f800c299
Subproject commit 129b50b41f79d0dfeb9fe8987b1c19c9ac51eb8b

+ 1
- 1
parser

@@ -1 +1 @@
Subproject commit ce574894f13cd94749d1a3964a13e8c97c20434a
Subproject commit e9f7d0197aba57eb5247cb1e029c10e393631c89

+ 60
- 0
third_party/fwkacllib/inc/aicpu/aicpu_schedule/aicpu_op_type_list.h View File

@@ -0,0 +1,60 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef AICPU_OP_TYPE_LIST_H_
#define AICPU_OP_TYPE_LIST_H_

enum OpKernelType {
TF_KERNEL,
CPU_KERNEL
};

enum ReturnCode {
OP_TYPE_NOT_SUPPORT,
FORMAT_NOT_SUPPORT,
DTYPE_NOT_SUPPORT
};

#pragma pack(push, 1)
//One byte alignment
struct SysOpInfo {
uint64_t opLen;
uint64_t opType;
OpKernelType kernelsType;
};

struct OpParamInfo {
uint64_t num;
uint64_t dtypeList;
uint64_t formatList;
};

struct SysOpCheckInfo {
uint64_t opListNum;
uint64_t offSetLen;
uint64_t sysOpInfoList;
uint64_t opParamInfoList;
};

struct SysOpCheckResp {
uint64_t opListNum;
bool isWithoutJson;
uint64_t returnCodeList;
uint64_t sysOpInfoList;
uint64_t opParamInfoList;
};
#pragma pack(pop)
#endif // AICPU_OP_TYPE_LIST_H_

+ 3
- 1
third_party/fwkacllib/inc/aicpu/common/aicpu_task_struct.h View File

@@ -21,13 +21,15 @@

namespace aicpu {

#pragma pack(push, 1)
struct AicpuParamHead
{
uint32_t length; // Total length: include cunstom message
uint32_t ioAddrNum; // Input and output address number
uint32_t extInfoLength; // extInfo struct Length
uint64_t extInfoAddr; // extInfo address
} __attribute__ ((packed));
};
#pragma pack(pop)

} // namespace aicpu



+ 14
- 2
third_party/fwkacllib/inc/cce/aicpu_engine.h View File

@@ -13,10 +13,11 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef AICPU_ENGINE_H__
#define AICPU_ENGINE_H__

#include <stdint.h>

#ifdef __cplusplus
extern "C" {
#endif
@@ -36,12 +37,23 @@ typedef enum {
/**
* @ingroup aicpu engine
* @brief aeCallInterface:
* a interface to call a function in a op kernfel lib
* a interface to call a function in a op kernfel lib
* @param [in] addr void *, should be STR_KERNEL * format
* @return aeStatus_t
*/
aeStatus_t aeCallInterface(void *addr);

/**
* @ingroup aicpu engine
* @brief aeBatchLoadKernelSo:
* a interface to load kernel so
* @param [in] loadSoNum load so number
* @param [in] soPaths load so paths
* @param [in] soNames load so names
* @return aeStatus_t
*/
aeStatus_t aeBatchLoadKernelSo(const uint32_t loadSoNum, const char *soPaths[], const char *soNames[]);

#ifdef __cplusplus
}
#endif


+ 6
- 2
third_party/fwkacllib/inc/cce/aicpu_engine_struct.h View File

@@ -33,18 +33,22 @@ typedef enum {
FMK_KERNEL_TYPE_RESERVED
} FwkkernelType_t;

#pragma pack(push, 1)
typedef struct {
uint32_t fwkKernelType; // FwkkernelType_t
union {
::aicpu::FWKAdapter::FWKOperateParam fwk_kernel;
} fwkKernelBase;
} __attribute__((packed)) STR_FWK_OP_KERNEL;
} STR_FWK_OP_KERNEL;
#pragma pack(pop)

#pragma pack(push, 1)
struct SessionInfo {
uint64_t sessionId;
uint64_t kernelId;
bool sessFlag;
} __attribute__((packed));
};
#pragma pack(pop)

#ifdef __cplusplus
}


+ 13
- 4
third_party/fwkacllib/inc/cce/fwk_adpt_struct.h View File

@@ -70,6 +70,7 @@ enum FWKExtUpdateAddrType {
FWK_ADPT_UPDATE_INPUT_OUTPUT
};

#pragma pack(push, 1)
// API Parameter Structure
struct StrFWKKernel {
FWKOperateType opType;
@@ -89,31 +90,39 @@ struct StrFWKKernel {

uint64_t extInfoLen; // extend info total length
uint64_t extInfoAddr; // extend info addr, ExtInfo structure
} __attribute__((packed));
};
#pragma pack(pop)

typedef StrFWKKernel FWKOperateParam;

// Extent info ShapeAndType
const uint32_t kMaxShapeDims = 8;
#pragma pack(push, 1)
struct ShapeAndType {
int32_t type;
int64_t dims[kMaxShapeDims];
} __attribute__((packed));
};
#pragma pack(pop)

// Extend info structure for extInfoAddr
const uint32_t kExtInfoHeadSize = 8;

#pragma pack(push, 1)
struct ExtInfo {
int32_t infoType; // extend type
uint32_t infoLen; // length for infoMsg
char infoMsg[0]; // extend value
} __attribute__((packed));
};
#pragma pack(pop)

#pragma pack(push, 1)
struct ResultSummary {
uint64_t shape_data_ptr; // shape data addr, need convert to void*
uint64_t shape_data_size; // num of dims
uint64_t raw_data_ptr; // raw data addr, need convert to void*
uint64_t raw_data_size; // size of raw data
} __attribute__((packed));
};
#pragma pack(pop)
} // end namespace FWKAdapter
} // namespace aicpu



+ 29
- 1
third_party/fwkacllib/inc/hccl/base.h View File

@@ -22,7 +22,8 @@

#ifndef HCCL_BASE_H_
#define HCCL_BASE_H_

#include <hccl/hccl_types.h>
#include <string>
#ifdef __cplusplus
extern "C" {
#endif // __cplusplus
@@ -95,6 +96,33 @@ typedef void *rtStream_t;
*/
typedef void *rtModel_t;

struct HcomOperation {
std::string hcclType;
void *inputPtr;
void *outputPtr;
u64 count;
HcclDataType dataType;
HcclReduceOp opType;
u32 root;

HcomOperation()
{
inputPtr = nullptr;
outputPtr = nullptr;
count = 0;
dataType = HCCL_DATA_TYPE_RESERVED;
opType = HCCL_REDUCE_RESERVED;
root = 0;
}
};

struct HcomRemoteAccessAddrInfo {
u32 remotetRankID;
u64 remoteAddr; // host embedding table address
u64 localAddr; // device HBM address
u64 length; // Memory Length in Bytes
};

#ifdef __cplusplus
}
#endif // __cplusplus


+ 92
- 122
third_party/fwkacllib/inc/hccl/hcom.h View File

@@ -24,145 +24,96 @@

#include <hccl/base.h>
#include <hccl/hccl_types.h>
#include <functional>
#include <vector>

#ifdef __cplusplus
extern "C" {
#endif // __cplusplus

/**
* @brief Initialize HCOM.
*
* @param rank_table A string identifying the rank table file path, include file name.
* @param identify A string identifying the identify for the rank.
* @return HcclResult
* @see hcom_destroy()
*/
extern HcclResult hcom_init(const char *rank_table, const char *identify);

/**
* @brief Destroy HCOM
*
* @return HcclResult
* @see hcom_init()
*/
extern HcclResult hcom_destroy(void);

/**
* @brief Bind the model.
*
* @param model A pointer identifying the model information.
* @param stream A pointer identifying the stream information.
* @return HcclResult
* @see hcom_unbind_model()
*/
extern HcclResult hcom_bind_model(rtModel_t model, rtStream_t stream);

/**
* @brief Unbind the model.
* @brief Get the rank number in the group.
*
* @param model An pointer identifying the model information.
* @return HcclResult
* @see hcom_unbind_model()
* @param group A string identifying the group name.
* @param rankSize A pointer identifying the rank number.
* @return HcclResult
*/
extern HcclResult hcom_unbind_model(rtModel_t model);
HcclResult hcom_get_rank_size(const char *group, u32 *rankSize);

/**
* @brief All-gather operator.
* @brief Get the rank number in the group.
*
* @param tag A string identifying the tag of the operator.
* @param inputPtr A pointer identifying the input data address of the operator.
* @param outputPtr A pointer identifying the output data address of the operator.
* @param inputCount An integer(u64) identifying the number of the input data.
* @param dataType The data type of the operator, must be one of the following types: int8, int32, float16, float32.
* @param group A string identifying the group name of ranks participating in the operator.
* @param stream A pointer identifying the stream information.
* @param group A string identifying the group name.
* @param rankSize A pointer identifying the rank number.
* @return HcclResult
*/
extern HcclResult hcom_all_gather(const char *tag, void *inputPtr, void *outputPtr, u64 inputCount,
HcclDataType dataType, const char *group, rtStream_t stream);
HcclResult HcomGetRankSize(const char *group, u32 *rankSize);

/**
* @brief All-reduce operator.
* @brief Get the rank number of this rank's server within the group.
*
* @param tag A string identifying the tag of the operator.
* @param inputPtr A pointer identifying the input data address of the operator.
* @param outputPtr A pointer identifying the output data address of the operator.
* @param count An integer(u64) identifying the number of the output data.
* @param dataType The data type of the operator, must be one of the following types: int8, int32, float16, float32.
* @param op The reduction type of the operator, must be one of the following types: sum, min, max, prod.
* @param group A string identifying the group name of ranks participating in the operator.
* @param stream A pointer identifying the stream information.
* @param group A string identifying the group name.
* @param localRankSize A pointer identifying the rank number.
* @return HcclResult
*/
extern HcclResult hcom_all_reduce(const char *tag, void *inputPtr, void *outputPtr, u64 count,
HcclDataType dataType, HcclReduceOp op, const char *group, rtStream_t stream);
HcclResult hcom_get_local_rank_size(const char *group, u32 *localRankSize);

/**
* @brief Broadcast operator.
* @brief Get the rank number of this rank's server within the group.
*
* @param tag A string identifying the tag of the operator.
* @param ptr A pointer identifying the data address of the operator.
* @param count An integer(u64) identifying the number of the data.
* @param dataType The data type of the operator, must be one of the following types: int8, int32, float16, float32.
* @param root An integer(u32) identifying the the root rank in the operator.
* @param group A string identifying the group name of ranks participating in the operator.
* @param stream A pointer identifying the stream information.
* @param group A string identifying the group name.
* @param localRankSize A pointer identifying the rank number.
* @return HcclResult
*/
extern HcclResult hcom_broadcast(const char *tag, void *ptr, u64 count, HcclDataType dataType, u32 root,
const char *group, rtStream_t stream);
HcclResult HcomGetLocalRankSize(const char *group, u32 *localRankSize);

/**
* @brief Reduce-scatter operator.
* @brief Get the rank id of this rank.
*
* @param tag A string identifying the tag of the operator.
* @param inputPtr A pointer identifying the input data address of the operator.
* @param outputPtr A pointer identifying the output data address of the operator.
* @param count An integer(u64) identifying the number of the data.
* @param dataType The data type of the operator, must be one of the following types: int8, int32, float16, float32.
* @param op The reduction type of the operator, must be one of the following types: sum, min, max, prod.
* @param group A string identifying the group name of ranks participating in the operator.
* @param stream A pointer identifying the stream information.
* @param group A string identifying the group name.
* @param rankId A pointer identifying the rank id.
* @return HcclResult
*/
extern HcclResult hcom_reduce_scatter(const char *tag, void *inputPtr, void *outputPtr, u64 count,
HcclDataType dataType, HcclReduceOp op, const char *group, rtStream_t stream);
HcclResult hcom_get_rank_id(const char *group, u32 *rankId);

/**
* @brief Get the rank number in the group.
* @brief Get the rank id of this rank.
*
* @param group A string identifying the group name.
* @param rankSize A pointer identifying the rank number.
* @param rankId A pointer identifying the rank id.
* @return HcclResult
*/
HcclResult hcom_get_rank_size(const char *group, u32 *rankSize);
HcclResult HcomGetRankId(const char *group, u32 *rankId);

/**
* @brief Get the rank number of this rank's server within the group.
* @brief Get the local rank id of this rank's server within the group.
*
* @param group A string identifying the group name.
* @param localRankSize A pointer identifying the rank number.
* @param localRankId A pointer identifying the local rank id.
* @return HcclResult
*/
HcclResult hcom_get_local_rank_size(const char *group, u32 *localRankSize);
HcclResult hcom_get_local_rank_id(const char *group, u32 *localRankId);

/**
* @brief Get the rank id of this rank.
* @brief Get the local rank id of this rank's server within the group.
*
* @param group A string identifying the group name.
* @param rankId A pointer identifying the rank id.
* @param localRankId A pointer identifying the local rank id.
* @return HcclResult
*/
HcclResult hcom_get_rank_id(const char *group, u32 *rankId);
HcclResult HcomGetLocalRankId(const char *group, u32 *localRankId);

/**
* @brief Get the local rank id of this rank's server within the group.
* @brief Get the world rank id according to the group rank id.
*
* @param group A string identifying the group name.
* @param localRankId A pointer identifying the local rank id.
* @param groupRank An integer(u32) identifying the group rank id.
* @param worldRank A pointer identifying the world rank id.
* @return HcclResult
*/
HcclResult hcom_get_local_rank_id(const char *group, u32 *localRankId);
HcclResult hcom_get_world_rank_from_group_rank(const char *group, u32 groupRank, u32 *worldRank);

/**
* @brief Get the world rank id according to the group rank id.
@@ -172,7 +123,7 @@ HcclResult hcom_get_local_rank_id(const char *group, u32 *localRankId);
* @param worldRank A pointer identifying the world rank id.
* @return HcclResult
*/
HcclResult hcom_get_world_rank_from_group_rank(const char *group, u32 groupRank, u32 *worldRank);
HcclResult HcomGetWorldRankFromGroupRank(const char *group, u32 groupRank, u32 *worldRank);

/**
* @brief Get the group rank id according to the world rank id.
@@ -184,6 +135,16 @@ HcclResult hcom_get_world_rank_from_group_rank(const char *group, u32 groupRank,
*/
HcclResult hcom_get_group_rank_from_world_rank(u32 worldRank, const char *group, u32 *groupRank);

/**
* @brief Get the group rank id according to the world rank id.
*
* @param worldRank An integer(u32) identifying the world rank id.
* @param group A string identifying the group name.
* @param groupRank A pointer identifying the group rank id.
* @return HcclResult
*/
HcclResult HcomGetGroupRankFromWorldRank(u32 worldRank, const char *group, u32 *groupRank);

/**
* @brief Create group.
*
@@ -195,60 +156,40 @@ HcclResult hcom_get_group_rank_from_world_rank(u32 worldRank, const char *group,
HcclResult hcom_create_group(const char *group, u32 rankNum, u32 *rankIds);

/**
* @brief Destroy group
* @brief Create group.
*
* @param group A string identifying the group name.
* @param rankNum An integer(u32) identifying the number of ranks in the group.
* @param rankIds A list identifying the ranks in the group.
* @return HcclResult
*/
HcclResult hcom_destroy_group(const char *group);
HcclResult HcomCreateGroup(const char *group, u32 rankNum, u32 *rankIds);

/**
* @brief Send operator.
* @brief Destroy group
*
* @param tag A string identifying the tag of the operator.
* @param inputPtr A pointer identifying the input data address of the operator.
* @param count An integer(u64) identifying the number of the data.
* @param dataType The data type of the operator, must be one of the following types: int8, int32, float16, float32.
* @param destRank An integer identifying the destination rank.
* @param srTag An integer identifying the send/recv message tag.
* The message will be send by the receive operator with the same "sr_tag".
* @param group A string identifying the group name of ranks participating in the operator.
* @param stream A pointer identifying the stream information.
* @param group A string identifying the group name.
* @return HcclResult
*/
HcclResult hcom_send(const char *tag, void *inputPtr, u64 count, HcclDataType dataType,
u32 destRank, u32 srTag, const char *group, rtStream_t stream);
HcclResult hcom_destroy_group(const char *group);

/**
* @brief Receive operator.
* @brief Destroy group
*
* @param tag A string identifying the tag of the operator.
* @param outputPtr A pointer identifying the output data address of the operator.
* @param count An integer(u64) identifying the number of the data.
* @param dataType The data type of the operator, must be one of the following types: int8, int32, float16, float32.
* @param srcRank An integer identifying the source rank.
* @param srTag An integer identifying the send/recv message tag.
* The message will be send by the send operator with the same "sr_tag".
* @param group A string identifying the group name of ranks participating in the operator.
* @param stream A pointer identifying the stream information.
* @param group A string identifying the group name.
* @return HcclResult
*/
HcclResult hcom_receive(const char *tag, void *outputPtr, u64 count, HcclDataType dataType,
u32 srcRank, u32 srTag, const char *group, rtStream_t stream);
HcclResult HcomDestroyGroup(const char *group);

/**
* @brief Get the gradient split strategy with in the group.
* @brief Set the gradient split strategy with in the group, according to gradient index.
*
* @param group A string identifying the group name.
* @param feature A pointer identifying the feature of the model.
* @param maxSegmentNum An integer(u32) identifying the max segments of gradients.
* @param segmentNum A pointer identifying the segments number of gradients.
* @param segmentIdx A list identifying the index of end gradient in each segment.
* @return HcclResult
* @param segmentNum An integer(u32) identifying the segments number of gradients.
* @param IdxList A list identifying the index of end gradient in each segment.
* @return HcclResult
*/
HcclResult hcom_get_split_strategy(const char *group, const struct model_feature *feature, u32 maxSegmentNum,
u32 *segmentNum, u32 *segmentIdx, GradSplitForceMode force = FORCE_NONE,
OriginalGraphShapeType shapeType = KNOWN_SHAPE);
extern HcclResult hcom_set_split_strategy_by_index(const char *group, u32 segmentNum, const u32 *IdxList);

/**
* @brief Set the gradient split strategy with in the group, according to gradient index.
@@ -258,7 +199,7 @@ HcclResult hcom_get_split_strategy(const char *group, const struct model_feature
* @param IdxList A list identifying the index of end gradient in each segment.
* @return HcclResult
*/
extern HcclResult hcom_set_split_strategy_by_index(const char *group, u32 segmentNum, const u32 *IdxList);
extern HcclResult HcomSetGradFusionByIndex(const char *group, u32 segmentNum, const u32 *IdxList);

/**
* @brief Set the gradient split strategy with in the group, according to gradient data size.
@@ -270,6 +211,16 @@ extern HcclResult hcom_set_split_strategy_by_index(const char *group, u32 segmen
*/
extern HcclResult hcom_set_split_strategy_by_size(const char *group, u32 segmentNum, const float *sizeList);

/**
* @brief Set the gradient split strategy with in the group, according to gradient data size.
*
* @param group A string identifying the group name.
* @param segmentNum An integer(u32) identifying the segments number of gradients.
* @param sizeList A list identifying the percent of each segment.
* @return HcclResult
*/
extern HcclResult HcomSetGradFusionBySize(const char *group, u32 segmentNum, const float *sizeList);

/**
* @brief Register memories and init resources for remote access.
*
@@ -279,6 +230,25 @@ extern HcclResult hcom_set_split_strategy_by_size(const char *group, u32 segment
*/
extern HcclResult hcom_remote_access_mem_register(const MemRegisterAddr* addrList, u32 count);

/**
* @brief Register memories and init resources for remote access.
*
* @param addrList memory addresses for remote access.
* @param count number of remote memory addresses.
* @return HcclResult
*/
extern HcclResult HcomRegRemoteAccessMem(const MemRegisterAddr* addrList, u32 count);

HcclResult HcomExecInitialize();

HcclResult HcomExecFinalize();

HcclResult HcomExecEnqueueOperation(HcomOperation opInfo, std::function<void(HcclResult status)> callback);

HcclResult HcomExecEnqueueRemoteAccess(const std::string& remoteAccessType,
const std::vector<HcomRemoteAccessAddrInfo>& addrInfos,
std::function<void(HcclResult status)> callback);

#ifdef __cplusplus
}



+ 38
- 20
third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_linux.h View File

@@ -215,6 +215,10 @@ typedef struct {
#define S_IWRITE S_IWUSR
#endif

#define mm_no_argument no_argument
#define mm_required_argument required_argument
#define mm_optional_argument optional_argument

#define M_FILE_RDONLY O_RDONLY
#define M_FILE_WRONLY O_WRONLY
#define M_FILE_RDWR O_RDWR
@@ -227,6 +231,7 @@ typedef struct {
#define M_BINARY O_RDONLY
#define M_TRUNC O_TRUNC
#define M_IRWXU S_IRWXU
#define M_APPEND O_APPEND

#define M_IN_CREATE IN_CREATE
#define M_IN_CLOSE_WRITE IN_CLOSE_WRITE
@@ -342,17 +347,17 @@ MMPA_FUNC_VISIBILITY INT32 mmCloseSocket(mmSockHandle sockFd);
MMPA_FUNC_VISIBILITY mmSsize_t mmSocketSend(mmSockHandle sockFd, VOID *sendBuf, INT32 sendLen, INT32 sendFlag);
MMPA_FUNC_VISIBILITY mmSsize_t mmSocketRecv(mmSockHandle sockFd, VOID *recvBuf, INT32 recvLen, INT32 recvFlag);
MMPA_FUNC_VISIBILITY INT32 mmSocketSendTo(mmSockHandle sockFd,
VOID *sendMsg,
INT32 sendLen,
UINT32 sendFlag,
const mmSockAddr* addr,
INT32 tolen);
VOID *sendMsg,
INT32 sendLen,
UINT32 sendFlag,
const mmSockAddr* addr,
INT32 tolen);
MMPA_FUNC_VISIBILITY mmSsize_t mmSocketRecvFrom(mmSockHandle sockFd,
VOID *recvBuf,
mmSize recvLen,
UINT32 recvFlag,
mmSockAddr* addr,
mmSocklen_t *FromLen);
VOID *recvBuf,
mmSize recvLen,
UINT32 recvFlag,
mmSockAddr* addr,
mmSocklen_t *FromLen);
MMPA_FUNC_VISIBILITY INT32 mmSAStartup();
MMPA_FUNC_VISIBILITY INT32 mmSACleanup();
MMPA_FUNC_VISIBILITY VOID *mmDlopen(const CHAR *fileName, INT32 mode);
@@ -360,7 +365,10 @@ MMPA_FUNC_VISIBILITY INT32 mmDladdr(VOID *addr, mmDlInfo *info);
MMPA_FUNC_VISIBILITY VOID *mmDlsym(VOID *handle, const CHAR *funcName);
MMPA_FUNC_VISIBILITY INT32 mmDlclose(VOID *handle);
MMPA_FUNC_VISIBILITY CHAR *mmDlerror();
MMPA_FUNC_VISIBILITY INT32 mmCreateAndSetTimer(mmTimer *timerHandle, mmUserBlock_t *timerBlock, UINT milliSecond, UINT period);
MMPA_FUNC_VISIBILITY INT32 mmCreateAndSetTimer(mmTimer *timerHandle,
mmUserBlock_t *timerBlock,
UINT milliSecond,
UINT period);
MMPA_FUNC_VISIBILITY INT32 mmDeleteTimer(mmTimer timerHandle);
MMPA_FUNC_VISIBILITY INT32 mmStatGet(const CHAR *path, mmStat_t *buffer);
MMPA_FUNC_VISIBILITY INT32 mmStat64Get(const CHAR *path, mmStat64_t *buffer);
@@ -408,8 +416,12 @@ MMPA_FUNC_VISIBILITY VOID mmClosePipe(mmPipeHandle pipe[], UINT32 pipeCount);
// Poll related interface
MMPA_FUNC_VISIBILITY mmCompletionHandle mmCreateCompletionPort();
MMPA_FUNC_VISIBILITY VOID mmCloseCompletionPort(mmCompletionHandle handle);
MMPA_FUNC_VISIBILITY INT32 mmPoll(mmPollfd *fds, INT32 fdCount, INT32 timeout, mmCompletionHandle handleIOCP,
pmmPollData polledData, mmPollBack pollBack);
MMPA_FUNC_VISIBILITY INT32 mmPoll(mmPollfd *fds,
INT32 fdCount,
INT32 timeout,
mmCompletionHandle handleIOCP,
pmmPollData polledData,
mmPollBack pollBack);
MMPA_FUNC_VISIBILITY INT32 mmGetErrorCode();
MMPA_FUNC_VISIBILITY CHAR *mmGetErrorFormatMessage(mmErrorMsg errnum, CHAR *buf, mmSize size);
MMPA_FUNC_VISIBILITY INT32 mmGetTimeOfDay(mmTimeval *timeVal, mmTimezone *timeZone);
@@ -454,8 +466,11 @@ MMPA_FUNC_VISIBILITY VOID mmSetOpOpt(INT32 mmOptOpt);
MMPA_FUNC_VISIBILITY CHAR *mmGetOptArg();
MMPA_FUNC_VISIBILITY VOID mmSetOptArg(CHAR *mmOptArg);
MMPA_FUNC_VISIBILITY INT32 mmGetOpt(INT32 argc, char *const *argv, const char *opts);
MMPA_FUNC_VISIBILITY INT32 mmGetOptLong(INT32 argc, char *const *argv, const char *opts, const mmStructOption *longOpts,
INT32 *longIndex);
MMPA_FUNC_VISIBILITY INT32 mmGetOptLong(INT32 argc,
char *const *argv,
const char *opts,
const mmStructOption *longOpts,
INT32 *longIndex);

MMPA_FUNC_VISIBILITY LONG mmLseek(INT32 fd, INT64 offset, INT32 seekFlag);
MMPA_FUNC_VISIBILITY INT32 mmFtruncate(mmProcess fd, UINT32 length);
@@ -521,11 +536,14 @@ MMPA_FUNC_VISIBILITY INT32 mmGetMac(mmMacInfo **list, INT32 *count);
MMPA_FUNC_VISIBILITY INT32 mmGetMacFree(mmMacInfo *list, INT32 count);
MMPA_FUNC_VISIBILITY INT32 mmGetCpuInfo(mmCpuDesc **cpuInfo, INT32 *count);
MMPA_FUNC_VISIBILITY INT32 mmCpuInfoFree(mmCpuDesc *cpuInfo, INT32 count);
MMPA_FUNC_VISIBILITY INT32 mmCreateProcess(const CHAR *fileName, const mmArgvEnv *env, const char *stdoutRedirectFile,
mmProcess *id);

MMPA_FUNC_VISIBILITY INT32 mmCreateTaskWithThreadAttr(mmThread *threadHandle, const mmUserBlock_t *funcBlock,
const mmThreadAttr *threadAttr);
MMPA_FUNC_VISIBILITY INT32 mmCreateProcess(const CHAR *fileName,
const mmArgvEnv *env,
const char *stdoutRedirectFile,
mmProcess *id);

MMPA_FUNC_VISIBILITY INT32 mmCreateTaskWithThreadAttr(mmThread *threadHandle,
const mmUserBlock_t *funcBlock,
const mmThreadAttr *threadAttr);
MMPA_FUNC_VISIBILITY mmFileHandle mmShmOpen(const CHAR *name, INT32 oflag, mmMode_t mode);
MMPA_FUNC_VISIBILITY INT32 mmShmUnlink(const CHAR *name);
MMPA_FUNC_VISIBILITY VOID *mmMmap(mmFd_t fd, mmSize_t size, mmOfft_t offset, mmFd_t *extra, INT32 prot, INT32 flags);


+ 6
- 0
third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_win.h View File

@@ -237,6 +237,11 @@ typedef struct {
} mmThreadAttr;

typedef VOID (*mmPf)(VOID);

#define mm_no_argument 0
#define mm_required_argument 1
#define mm_optional_argument 2

#define M_FILE_RDONLY GENERIC_READ
#define M_FILE_WRONLY GENERIC_WRITE
#define M_FILE_RDWR (GENERIC_READ | GENERIC_WRITE)
@@ -249,6 +254,7 @@ typedef VOID (*mmPf)(VOID);
#define M_CREAT _O_CREAT
#define M_BINARY _O_BINARY
#define M_TRUNC _O_TRUNC
#define M_APPEND _O_APPEND

#define M_IREAD _S_IREAD
#define M_IRUSR _S_IREAD


+ 29
- 303
third_party/fwkacllib/inc/runtime/base.h View File

@@ -18,6 +18,7 @@
#define __CCE_RUNTIME_BASE_H__

#include <stdint.h>
#include "toolchain/prof_callback.h"

#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
extern "C" {
@@ -32,309 +33,8 @@ extern "C" {
#endif
#endif

/**
* @ingroup dvrt_base
* @brief runtime error numbers.
*/
typedef enum tagRtError {
RT_ERROR_NONE = 0x0, // success
RT_ERROR_DEVICE_BASE = 0x07010000,
RT_ERROR_DEVICE_NULL,
RT_ERROR_DEVICE_NEW,
RT_ERROR_DEVICE_ID,
RT_ERROR_DEVICE_CHIPTYPE,
RT_ERROR_DEVICE_DEPLOY,
RT_ERROR_DEVICE_RETAIN,
RT_ERROR_DEVICE_PLATFORM,
RT_ERROR_DEVICE_LOADER,
RT_ERROR_DEVICE_LIMIT,
RT_ERROR_DEVICE_PROC_HANG_OUT,
RT_ERROR_DEVICE_POWER_UP_FAIL,
RT_ERROR_DEVICE_POWER_DOWN_FAIL,
RT_ERROR_DEVICE_INVALID,

RT_ERROR_DRV_BASE = 0x07020000,
RT_ERROR_DRV_NULL,
RT_ERROR_DRV_NEW,
RT_ERROR_DRV_MEMORY,
RT_ERROR_DRV_INPUT,
RT_ERROR_DRV_PTRNULL,
RT_ERROR_DRV_OPEN_AICPU,
RT_ERROR_DRV_CLOSE_AICPU,
RT_ERROR_DRV_SYM_AICPU,
RT_ERROR_DRV_OPEN_TSD,
RT_ERROR_DRV_CLOSE_TSD,
RT_ERROR_DRV_SYM_TSD,
RT_ERROR_DRV_SOURCE,
RT_ERROR_DRV_REPORT,
RT_ERROR_DRV_COMMAND,
RT_ERROR_DRV_OCCUPY,
RT_ERROR_DRV_ERR,

RT_ERROR_STREAM_BASE = 0x07030000,
RT_ERROR_STREAM_NULL,
RT_ERROR_STREAM_NEW,
RT_ERROR_STREAM_CONTEXT,
RT_ERROR_STREAM_INVALID,
RT_ERROR_STREAM_MODEL,
RT_ERROR_STREAM_FUSION,
RT_ERROR_STREAM_FULL,
RT_ERROR_STREAM_EMPTY,
RT_ERROR_STREAM_NOT_COMPLETE,
RT_ERROR_STREAM_SYNC,
RT_ERROR_STREAM_NO_CB_REG,
RT_ERROR_STREAM_DUPLICATE,
RT_ERROR_STREAM_NOT_EXIST,
RT_ERROR_SQ_NO_EXIST_SQ_TO_REUSE,
RT_ERROR_SQID_FULL,

RT_ERROR_MODEL_BASE = 0x07040000,
RT_ERROR_MODEL_NULL,
RT_ERROR_MODEL_NEW,
RT_ERROR_MODEL_CONTEXT,
RT_ERROR_MODEL_ENDGRAPH,
RT_ERROR_MODEL_STREAM,
RT_ERROR_MODEL_EXCUTOR,
RT_ERROR_MODEL_SETUP,
RT_ERROR_MODEL_ID,
RT_ERROR_MODEL_EXE_FAILED,
RT_ERROR_END_OF_SEQUENCE, // end of sequence
RT_ERROR_MODEL_EXIT,
RT_ERROR_MODEL_EXIT_STREAM_UNBIND,
RT_ERROR_MODEL_EXIT_ID,
RT_ERROR_MODEL_ABORT_NORMAL,

RT_ERROR_EVENT_BASE = 0x07050000,
RT_ERROR_EVENT_NULL,
RT_ERROR_EVENT_NEW,
RT_ERROR_EVENT_RECORDER_NULL,
RT_ERROR_EVENT_TIMESTAMP_INVALID,
RT_ERROR_EVENT_TIMESTAMP_REVERSAL,
RT_ERROR_EVENT_NOT_COMPLETE,

RT_ERROR_NOTIFY_BASE = 0x07060000,
RT_ERROR_NOTIFY_NULL,
RT_ERROR_NOTIFY_NEW,
RT_ERROR_NOTIFY_TYPE,
RT_ERROR_NOTIFY_NOT_COMPLETE,

RT_ERROR_CONTEXT_BASE = 0x07070000,
RT_ERROR_CONTEXT_NULL,
RT_ERROR_CONTEXT_NEW,
RT_ERROR_CONTEXT_DEL,
RT_ERROR_CONTEXT_DEFAULT_STREAM_NULL,
RT_ERROR_CONTEXT_ONLINE_STREAM_NULL,

RT_ERROR_KERNEL_BASE = 0x07080000,
RT_ERROR_KERNEL_NULL,
RT_ERROR_KERNEL_NEW,
RT_ERROR_KERNEL_LOOKUP,
RT_ERROR_KERNEL_NAME,
RT_ERROR_KERNEL_TYPE,
RT_ERROR_KERNEL_OFFSET,
RT_ERROR_KERNEL_DUPLICATE,
RT_ERROR_KERNEL_UNREGISTERING,

RT_ERROR_PROGRAM_BASE = 0x07090000,
RT_ERROR_PROGRAM_NULL,
RT_ERROR_PROGRAM_NEW,
RT_ERROR_PROGRAM_DATA,
RT_ERROR_PROGRAM_SIZE,
RT_ERROR_PROGRAM_MEM_TYPE,
RT_ERROR_PROGRAM_MACHINE_TYPE,
RT_ERROR_PROGRAM_USEOUT,

RT_ERROR_MODULE_BASE = 0x070a0000,
RT_ERROR_MODULE_NULL,
RT_ERROR_MODULE_NEW,

RT_ERROR_INSTANCE_BASE = 0x070b0000,
RT_ERROR_INSTANCE_NULL,
RT_ERROR_INSTANCE_NEW,
RT_ERROR_INSTANCE_VERSION,

RT_ERROR_API_BASE = 0x070c0000,
RT_ERROR_API_NULL,
RT_ERROR_API_NEW,

RT_ERROR_DATADUMP_BASE = 0x070d0000,
RT_ERROR_DATADUMP_NULL,
RT_ERROR_DATADUMP_NEW,
RT_ERROR_DATADUMP_TIME,
RT_ERROR_DATADUMP_FILE,
RT_ERROR_DATADUMP_ADDRESS,
RT_ERROR_DATADUMP_LOAD_FAILED,
RT_ERROR_DUMP_ADDR_SET_FAILED,

RT_ERROR_PROF_BASE = 0x070e0000,
RT_ERROR_PROF_NULL,
RT_ERROR_PROF_NEW,
RT_ERROR_PROF_START,
RT_ERROR_PROF_DEVICE_MEM,
RT_ERROR_PROF_HOST_MEM,
RT_ERROR_PROF_SET_DIR,
RT_ERROR_PROF_OPER,
RT_ERROR_PROF_FULL,
RT_ERROR_PROF_NAME,

RT_ERROR_PCTRACE_BASE = 0x070f0000,
RT_ERROR_PCTRACE_NULL,
RT_ERROR_PCTRACE_NEW,
RT_ERROR_PCTRACE_TIME,
RT_ERROR_PCTRACE_FILE,

RT_ERROR_TASK_BASE = 0x07100000,
RT_ERROR_TASK_NULL,
RT_ERROR_TASK_NEW,
RT_ERROR_TASK_TYPE,
RT_ERROR_TASK_ALLOCATOR,

RT_ERROR_COMMON_BASE = 0x07110000,
RT_ERROR_INVALID_VALUE, // RT_ERROR_INPUT_INVALID
RT_ERROR_MEMORY_ADDRESS_UNALIGNED,
RT_ERROR_SEC_HANDLE,
RT_ERROR_OS_HANDLE,
RT_ERROR_MUTEX_LOCK,
RT_ERROR_MUTEX_UNLOCK,
RT_ERROR_CALLOC,
RT_ERROR_POOL_RESOURCE,
RT_ERROR_TRANS_ARGS,
RT_ERROR_METADATA,
RT_ERROR_LOST_HEARTBEAT,
RT_ERROR_REPORT_TIMEOUT,
RT_ERROR_FEATURE_NOT_SUPPROT,
RT_ERROR_MEMORY_ALLOCATION,
RT_ERROR_MEMORY_FREE,
RT_ERROR_INVALID_MEMORY_TYPE,

RT_ERROR_DEBUG_BASE = 0x07120000,
RT_ERROR_DEBUG_NULL,
RT_ERROR_DEBUG_NEW,
RT_ERROR_DEBUG_SIGNAL,
RT_ERROR_DEBUG_OPEN,
RT_ERROR_DEBUG_WRITE,
RT_ERROR_DEBUG_REGISTER_FAILED,
RT_ERROR_DEBUG_UNREGISTER_FAILED,

RT_ERROR_ENGINE_BASE = 0x07130000,
RT_ERROR_ENGINE_NULL,
RT_ERROR_ENGINE_NEW,
RT_ERROR_ENGINE_THREAD,

RT_ERROR_LABEL_BASE = 0x07140000,
RT_ERROR_LABEL_NULL,
RT_ERROR_LABEL_NEW,
RT_ERROR_LABEL_CONTEXT,
RT_ERROR_LABEL_STREAM,
RT_ERROR_LABEL_MODEL,
RT_ERROR_LABEL_ALLOCATOR,
RT_ERROR_LABEL_FREE,
RT_ERROR_LABEL_SET,
RT_ERROR_LABEL_ID,

RT_ERROR_TSFW_BASE = 0x07150000,
RT_ERROR_TSFW_UNKNOWN,
RT_ERROR_TSFW_NULL_PTR,
RT_ERROR_TSFW_ILLEGAL_AI_CORE_ID,
RT_ERROR_TSFW_ILLEGAL_PARAM,
RT_ERROR_TSFW_TASK_CMD_QUEUE_FULL,
RT_ERROR_TSFW_TASK_CMD_QUEUE_EMPTY,
RT_ERROR_TSFW_TASK_REPORT_QUEUE_FULL,
RT_ERROR_TSFW_TASK_REPORT_QUEUE_EMPTY,
RT_ERROR_TSFW_TASK_NODE_BUFF_ALL_OCCUPYED,
RT_ERROR_TSFW_TASK_NODE_BUFF_ALL_FREED,
RT_ERROR_TSFW_L2_MEM_INSUFFICIENT_SPACE,
RT_ERROR_TSFW_L2_MALLOC_FAILED,
RT_ERROR_TSFW_DMA_CHANNEL_ALL_OCCUPYED,
RT_ERROR_TSFW_MEMCPY_OP_FAILED,
RT_ERROR_TSFW_BS_SLOT_ALL_OCCUPYED,
RT_ERROR_TSFW_TBS_SLOT_REPEAT_FREE,
RT_ERROR_TSFW_PRIORITY_TASK_LIST_FULL,
RT_ERROR_TSFW_PRIORITY_TASK_LIST_EMPTY,
RT_ERROR_TSFW_NO_STREAM_LIST_NEED_TO_BE_PROCESSED,
RT_ERROR_TSFW_REPEAT_MARK_STREAM_NEED_SERVICE,
RT_ERROR_TSFW_SYS_DMA_CHANNEL_ALL_OCCUPAPYED,
RT_ERROR_TSFW_NO_HBML2TASKNODE_FOUND,
RT_ERROR_TSFW_SQNODE_NODE_SLOT_ALL_OCCUPAPYED,
RT_ERROR_TSFW_CQNODE_NODE_SLOT_ALL_OCCUPAPYED,
RT_ERROR_TSFW_SQNODE_NOT_ENOUGH,
RT_ERROR_TSFW_SQNODE_SLOT_REPEAT_FREE,
RT_ERROR_TSFW_CQNODE_SLOT_REPEAT_FREE,
RT_ERROR_TSFW_CQ_REPORT_FAILED,
RT_ERROR_TSFW_SYS_DMA_RESET_SUCCESS,
RT_ERROR_TSFW_SYS_DMA_RESET_FAILED,
RT_ERROR_TSFW_SYS_DMA_TRNSFER_FAILED,
RT_ERROR_TSFW_SYS_DMA_MEMADDRALIGN_FAILED,
RT_ERROR_TSFW_SYS_DMA_ERROR_QUEUE_FULL,
RT_ERROR_TSFW_SYS_DMA_ERROR_QUEUE_EMPTY,
RT_ERROR_TSFW_TIMER_EVENT_FULL,
RT_ERROR_TSFW_TASK_L2_DESC_ENTRY_NOT_ENOUGH,
RT_ERROR_TSFW_AICORE_TIMEOUT,
RT_ERROR_TSFW_AICORE_EXCEPTION,
RT_ERROR_TSFW_AICORE_TRAP_EXCEPTION,
RT_ERROR_TSFW_AICPU_TIMEOUT,
RT_ERROR_TSFW_SDMA_L2_TO_DDR_MALLOC_FAIL,
RT_ERROR_TSFW_AICPU_EXCEPTION,
RT_ERROR_TSFW_AICPU_DATADUMP_RSP_ERR,
RT_ERROR_TSFW_AICPU_MODEL_RSP_ERR,
RT_ERROR_TSFW_REPEAT_ACTIVE_MODEL_STREAM,
RT_ERROR_TSFW_REPEAT_NOTIFY_WAIT,
RT_ERROR_TSFW_DEBUG_INVALID_SQCQ,
RT_ERROR_TSFW_DEBUG_WRONG_COMMAND_TYPE,
RT_ERROR_TSFW_DEBUG_CMD_PROCESS,
RT_ERROR_TSFW_DEBUG_INVALID_DEVICE_STATUS,
RT_ERROR_TSFW_DEBUG_NOT_IN_DEBUG_STATUS,
RT_ERROR_TSFW_DEBUG_INVALID_TASK_STATUS,
RT_ERROR_TSFW_DEBUG_TASK_EMPTY,
RT_ERROR_TSFW_DEBUG_TASK_FULL,
RT_ERROR_TSFW_DEBUG_TASK_NOT_EXIST,
RT_ERROR_TSFW_DEBUG_AI_CORE_FULL,
RT_ERROR_TSFW_DEBUG_AI_CORE_NOT_EXIST,
RT_ERROR_TSFW_DEBUG_AI_CORE_EXCEPTION,
RT_ERROR_TSFW_DEBUG_AI_CORE_TIMEOUT,
RT_ERROR_TSFW_DEBUG_BREAKPOINT_FULL,
RT_ERROR_TSFW_DEBUG_READ_ERROR,
RT_ERROR_TSFW_DEBUG_WRITE_FAIL,
RT_ERROR_TSFW_QUEUE_FULL,
RT_ERROR_TSFW_QUEUE_EMPTY,
RT_ERROR_TSFW_QUEUE_ALLOC_MEM_FAIL,
RT_ERROR_TSFW_QUEUE_DATA_SIZE_UNMATCH,
RT_ERROR_TSFW_PCIE_DMA_INVLD_CPY_TYPE,
RT_ERROR_TSFW_INVLD_CPY_DIR,
RT_ERROR_TSFW_PCIE_DMA_INVLD_CQ_DES,
RT_ERROR_TSFW_PCIE_DMA_CPY_ERR,
RT_ERROR_TSFW_PCIE_DMA_LNK_CHN_BUSY,
RT_ERROR_TSFW_PROFILE_BUFF_FULL,
RT_ERROR_TSFW_PROFILE_MODE_CONFLICT,
RT_ERROR_TSFW_PROFILE_OTHER_PID_ON,
RT_ERROR_TSFW_SCHD_AIC_TASK_PRELOAD_FAILED,
RT_ERROR_TSFW_TSCPU_CLOSE_FAILED,
RT_ERROR_TSFW_EXPECT_FAIL,
RT_ERROR_TSFW_REPEAT_MODEL_STREAM,
RT_ERROR_TSFW_STREAM_MODEL_UNBIND,
RT_ERROR_TSFW_MODEL_EXE_FAILED,
RT_ERROR_TSFW_IPC_SEND_FAILED,
RT_ERROR_TSFW_IPC_PROC_REG_FAILED,
RT_ERROR_TSFW_STREAM_FULL,
RT_ERROR_TSFW_END_OF_SEQUENCE,
RT_ERROR_TSFW_SWITCH_STREAM_LABEL,
RT_ERROR_TSFW_TRANS_SQE_FAIL,
RT_ERROR_TSFW_RESERVED,

RT_ERROR_SUBSCRIBE_BASE = 0x07160000,
RT_ERROR_SUBSCRIBE_NULL,
RT_ERROR_SUBSCRIBE_NEW,
RT_ERROR_SUBSCRIBE_STREAM,
RT_ERROR_SUBSCRIBE_THREAD,
RT_ERROR_SUBSCRIBE_GROUP,

RT_ERROR_GROUP_BASE = 0x07170000,
RT_ERROR_GROUP_NOT_SET,
RT_ERROR_GROUP_NOT_CREATE,

RT_ERROR_RESERVED = 0x07ff0000,
}rtError_t;
typedef int32_t rtError_t;
static const int32_t RT_ERROR_NONE = 0; // success

/**
* @ingroup dvrt_base
@@ -387,10 +87,20 @@ typedef struct rtExceptionInfo {
uint32_t deviceid;
} rtExceptionInfo;

typedef struct rtTaskFailInfo {
uint32_t taskid;
uint32_t streamid;
uint32_t tid;
uint32_t deviceid;
uint32_t retcode;
} rtTaskFailInfo;

typedef void (*rtErrorCallback)(rtExceptionType);

typedef void (*rtTaskFailCallback)(rtExceptionInfo *exceptionInfo);

typedef void (*rtTaskFailCallbackByModule)(rtTaskFailInfo *exceptionInfo);

typedef void (*rtDeviceStateCallback)(uint32_t devId, bool isOpen);

/**
@@ -447,6 +157,12 @@ RTS_API rtError_t rtProfilerStop(uint64_t profConfig, int32_t numsDev, uint32_t*
*/
RTS_API rtError_t rtProfilerTrace(uint64_t id, bool notify, uint32_t flags, rtStream_t stream);

/**
* @ingroup profiling_base
* @brief ts set profiling reporter callback.
*/
RTS_API rtError_t rtSetMsprofReporterCallback(MsprofReporterCallback callback);

/**
* @ingroup dvrt_base
* @brief Returns the last error from a runtime call.
@@ -485,6 +201,16 @@ RTS_API rtError_t rtSetTaskFailCallback(rtTaskFailCallback callback);
*/
RTS_API rtError_t rtRegDeviceStateCallback(const char *regName, rtDeviceStateCallback callback);

/**
* @ingroup dvrt_base
* @brief register callback for fail task
* @param [in] uniName unique register name, can't be null
* @param [in] callback fail task callback function
* @param [out] NA
* @return RT_ERROR_NONE for ok
*/
RTS_API rtError_t rtRegTaskFailCallbackByModule(const char *moduleName, rtTaskFailCallbackByModule callback);

/**
* @ingroup dvrt_base
* @brief notify handle.


+ 8
- 15
third_party/fwkacllib/inc/runtime/config.h View File

@@ -121,14 +121,6 @@ typedef struct tagRtMemoryConfig {

typedef struct tagRtPlatformConfig { uint32_t platformConfig; } rtPlatformConfig_t;

/**
* @ingroup
* @brief get platform
* @param [in] platForm
* @return platForm
*/
RTS_API rtError_t rtGetPlatformConfig(rtPlatformConfig_t *platForm);

/**
* @ingroup
* @brief get AI core count
@@ -169,13 +161,6 @@ RTS_API rtError_t rtGetAiCoreMemoryRates(rtAiCoreMemoryRates_t *aiCoreMemoryRate
*/
RTS_API rtError_t rtGetMemoryConfig(rtMemoryConfig_t *memoryConfig);

/**
* @ingroup
* @brief set platform in gen ctx
* @param [in] platForm
* @return RT_ERROR_NONE for ok, errno for failed
*/
RTS_API rtError_t rtSetPlatformType(rtPlatformType_t platformType);

/**
* @ingroup
@@ -185,6 +170,14 @@ RTS_API rtError_t rtSetPlatformType(rtPlatformType_t platformType);
*/
RTS_API rtError_t rtMemGetL2Info(rtStream_t stream, void **ptr, uint32_t *size);

/**
* @ingroup
* @brief get runtime version. The version is returned as (1000 major + 10 minor). For example, RUNTIME 9.2 would be represented by 9020.
* @param [out] runtimeVersion
* @return RT_ERROR_NONE for ok
* @return RT_ERROR_INVALID_VALUE for error input
*/
RTS_API rtError_t rtGetRuntimeVersion(uint32_t *runtimeVersion);
#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
}
#endif


+ 2
- 2
third_party/fwkacllib/inc/runtime/dev.h View File

@@ -330,12 +330,12 @@ RTS_API rtError_t rtGetPairDevicesInfo(uint32_t devId, uint32_t otherDevId, int3
FEATURE_TYPE_MEMCPY = 0,
FEATURE_TYPE_RSV,
} rtFeatureType_t;
* @param [in] infoType info type
* @param [in] featureInfo info type
typedef enum tagMemcpyInfo {
MEMCPY_INFO_SUPPORT_ZEROCOPY = 0,
MEMCPY_INFO _RSV,
} rtMemcpyInfo_t;
* @param [out] value the capability info
* @param [out] value the capability info RT_CAPABILITY_SUPPORT or RT_CAPABILITY_NOT_SUPPORT
* @return RT_ERROR_NONE for ok
*/
RTS_API rtError_t rtGetRtCapability(rtFeatureType_t featureType, int32_t featureInfo, int64_t *value);


+ 1
- 1
third_party/fwkacllib/inc/runtime/rt.h View File

@@ -28,4 +28,4 @@
#include "rt_model.h"
#include "stream.h"

#endif // __CCE_RUNTIME_RT_H__
#endif // __CCE_RUNTIME_RT_H__

+ 7
- 0
third_party/fwkacllib/inc/tdt/status.h View File

@@ -34,9 +34,16 @@ using TDT_StatusT = uint32_t;
typedef uint32_t TDT_StatusT;
#endif

#define LINUX 0
#define WINDOWS 1

#ifndef TDT_LIB_EXPORT
#if(TARGET_SYSTEM_NAME == WINDOWS)
#define TDT_LIB_EXPORT __declspec(dllexport)
#else
#define TDT_LIB_EXPORT __attribute__((visibility("default")))
#endif
#endif
/**
* @ingroup tdt status.
*


+ 24
- 4
third_party/fwkacllib/inc/tdt/tsd_client.h View File

@@ -23,6 +23,7 @@
#include <mutex>
#include "tdt/status.h"
#include "tdt/data_common.h"
#include "toolchain/prof_callback.h"

#ifdef __cplusplus
extern "C" {
@@ -37,7 +38,7 @@ extern "C" {
* Used for the Framework process to communicate with the TSDDaemon process,
* and notify TSD to complete the initialization of other processes
*
* @param phyDeviceId [IN] type #unsigned int. Physical device ID
* @param logicDeviceId [IN] type #unsigned int. Logic device ID
* @param rankSize [IN] type #unsigned int. The rankSize of the training.
* The default value is 1. When rankSize is greater than 1,
* HCCP will be pulled to perform set communication related operations.
@@ -49,7 +50,7 @@ extern "C" {
* @li tsd_client.h: Header file where the interface declaration is located.
* @li data_common.h: Header file where 'TDT_StatusT' defined
*/
TDT_LIB_EXPORT TDT_StatusT TsdOpen(const uint32_t phyDeviceId, const uint32_t rankSize);
TDT_LIB_EXPORT TDT_StatusT TsdOpen(const uint32_t logicDeviceId, const uint32_t rankSize);

/**
* @ingroup Close
@@ -67,7 +68,7 @@ TDT_LIB_EXPORT TDT_StatusT TsdOpen(const uint32_t phyDeviceId, const uint32_t ra
* @li tsd_client.h: Header file where the interface declaration is located.
* @li data_common.h: Header file where 'TDT_StatusT' defined
*/
TDT_LIB_EXPORT TDT_StatusT TsdClose(const uint32_t phyDeviceId);
TDT_LIB_EXPORT TDT_StatusT TsdClose(const uint32_t logicDeviceId);

/**
* @ingroup UpdateProfilingMode
@@ -85,7 +86,26 @@ TDT_LIB_EXPORT TDT_StatusT TsdClose(const uint32_t phyDeviceId);
* @li tsd_client.h: Header file where the interface declaration is located.
* @li data_common.h: Header file where 'TDT_StatusT' defined
*/
TDT_LIB_EXPORT TDT_StatusT UpdateProfilingMode(const uint32_t phyDeviceId, const uint32_t flag);
TDT_LIB_EXPORT TDT_StatusT UpdateProfilingMode(const uint32_t logicDeviceId, const uint32_t flag);

/**
* @ingroup TsdSetMsprofReporterCallback
* @brief 用于推理场景下设置aicpu的profilng的callback函数
*
* @par Function
* 设置offline模式下aicpu_sd进程的profiling的callback函数
*
* @param callback [IN] type #MsprofReporterCallback. 回调函数
* @retval TDT_OK Success
* @retval OtherValues Failure
*
* @par Dependency
* @li libtsdclient.so: Library to which the interface belongs.
* @li tsd_client.h: Header file where the interface declaration is located.
* @li data_common.h: Header file where 'TDT_StatusT' defined
* @li prof_callback.h: Headerfile where 'MsprofReporterCallback' defined
*/
TDT_LIB_EXPORT TDT_StatusT TsdSetMsprofReporterCallback(MsprofReporterCallback callback);

/**
* @ingroup CreateCmdParameterObj


+ 135
- 0
third_party/fwkacllib/inc/toolchain/prof_callback.h View File

@@ -0,0 +1,135 @@
/**
* Copyright 2020-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* @file prof_callback.h
* @brief declaraion of profiling callbacks
*/

#ifndef MSPROFILER_PROF_CALLBACK_H_
#define MSPROFILER_PROF_CALLBACK_H_

#ifdef __cplusplus
extern "C" {
#endif // __cplusplus


#include "stddef.h"
#include "stdint.h"

/**
* @name MsprofErrorCode
* @brief error code
*/
enum MsprofErrorCode {
MSPROF_ERROR_NONE = 0,
MSPROF_ERROR_MEM_NOT_ENOUGH,
MSPROF_ERROR_GET_ENV,
MSPROF_ERROR_CONFIG_INVALID,
MSPROF_ERROR_ACL_JSON_OFF,
MSPROF_ERROR,
};

#define MSPROF_ENGINE_MAX_TAG_LEN (31)

/**
* @name ReporterData
* @brief struct of data to report
*/
struct ReporterData {
char tag[MSPROF_ENGINE_MAX_TAG_LEN + 1]; // the sub-type of the module, data with different tag will be writen
int deviceId; // the index of device
size_t dataLen; // the length of send data
unsigned char *data; // the data content
};

/**
* @name MsprofReporterModuleId
* @brief module id of data to report
*/
enum MsprofReporterModuleId {
MSPROF_MODULE_DATA_PREPROCESS = 0, // DATA_PREPROCESS
MSPROF_MODULE_HCCL, // HCCL
MSPROF_MODULE_ACL, // AclModule
MSPROF_MODULE_FRAMEWORK, // Framework
MSPROF_MODULE_RUNTIME // runtime
};

/**
* @name MsprofReporterCallbackType
* @brief reporter callback request type
*/
enum MsprofReporterCallbackType {
MSPROF_REPORTER_REPORT = 0, // report data
MSPROF_REPORTER_INIT, // init reporter
MSPROF_REPORTER_UNINIT, // uninit reporter
};

/**
* @name MsprofReporterCallback
* @brief callback to start reporter/stop reporter/report date
* @param moduleId [IN] enum MsprofReporterModuleId
* @param type [IN] enum MsprofReporterCallbackType
* @param data [IN] callback data (nullptr on INTI/UNINIT)
* @param len [IN] callback data size (0 on INIT/UNINIT)
* @return enum MsprofErrorCode
*/
typedef int32_t (*MsprofReporterCallback)(uint32_t moduleId, uint32_t type, void *data, uint32_t len);


#define MSPROF_OPTIONS_DEF_LEN_MAX (2048)

/**
* @name MsprofGeOptions
* @brief struct of MSPROF_CTRL_INIT_GE_OPTIONS
*/
struct MsprofGeOptions {
char jobId[MSPROF_OPTIONS_DEF_LEN_MAX];
char options[MSPROF_OPTIONS_DEF_LEN_MAX];
};

/**
* @name MsprofCtrlCallbackType
* @brief ctrl callback request type
*/
enum MsprofCtrlCallbackType {
MSPROF_CTRL_INIT_ACL_ENV = 0, // start profiling with acl env
MSPROF_CTRL_INIT_ACL_JSON, // start profiling with acl.json
MSPROF_CTRL_INIT_GE_OPTIONS, // start profiling with ge env and options
MSPROF_CTRL_FINALIZE // stop profiling
};

/**
* @name MsprofCtrlCallback
* @brief callback to start/stop profiling
* @param type [IN] enum MsprofCtrlCallbackType
* @param data [IN] callback data
* @param len [IN] callback data size
* @return enum MsprofErrorCode
*/
typedef int32_t (*MsprofCtrlCallback)(uint32_t type, void *data, uint32_t len);

/**
* @name MsprofSetDeviceCallback
* @brief callback to notify set/reset device
* @param devId [IN] device id
* @param isOpenDevice [IN] true: set device, false: reset device
*/
typedef void (*MsprofSetDeviceCallback)(uint32_t devId, bool isOpenDevice);

#ifdef __cplusplus
}
#endif

#endif // MSPROFILER_PROF_CALLBACK_H_

+ 11
- 15
third_party/fwkacllib/inc/toolchain/prof_reporter.h View File

@@ -16,7 +16,17 @@

#ifndef MSPROF_ENGINE_PROF_REPORTER_H_
#define MSPROF_ENGINE_PROF_REPORTER_H_
#ifndef OS_TYPE
#define OS_TYPE 0
#endif // OS_TYPE

#if (OS_TYPE != LINUX)
#define MSVP_PROF_API __declspec(dllexport)
#else
#define MSVP_PROF_API __attribute__((visibility("default")))
#endif

#include "prof_callback.h"

/**
* @file prof_reporter.h
@@ -25,20 +35,6 @@
*/
namespace Msprof {
namespace Engine {
/// the max tag length
#define MSPROF_ENGINE_MAX_TAG_LEN (31)
/**
* @ingroup reporter
* @brief struct ReporterData
* the sturct of the data send to libmsprof
*/
struct ReporterData {
char tag[MSPROF_ENGINE_MAX_TAG_LEN + 1]; ///< the sub-type of the module, data with different tag will be writen
int deviceId; ///< the physical id of device
size_t dataLen; ///< the length of send data
unsigned char *data; ///< the data content
};

/**
* @ingroup reporter
* @brief class Reporter
@@ -86,4 +82,4 @@ class MSVP_PROF_API Reporter {
} // namespace Engine
} // namespace Msprof

#endif // MSPROF_ENGINE_PROF_REPORTER_H_
#endif // MSPROF_ENGINE_PROF_REPORTER_H_

+ 25
- 0
third_party/fwkacllib/inc/toolchain/slog.h View File

@@ -18,7 +18,9 @@
#define D_SYSLOG_H_

#ifdef __cplusplus
#ifndef LOG_CPP
extern "C" {
#endif
#endif // __cplusplus

#ifndef LINUX
@@ -105,6 +107,7 @@ extern "C" {
#define SECURITY_LOG_MASK (0x00100000)
#define RUN_LOG_MASK (0x01000000)
#define OPERATION_LOG_MASK (0x10000000)
#define RESERVERD_LENGTH 52

typedef struct tagDCODE {
const char *cName;
@@ -116,6 +119,18 @@ typedef struct tagKV {
char *value;
} KeyValue;

typedef enum {
APPLICATION = 0,
SYSTEM
} ProcessType;

typedef struct {
ProcessType type;
unsigned int pid;
unsigned int deviceId;
char reserved[RESERVERD_LENGTH];
} LogAttr;

/**
* @ingroup slog
*
@@ -228,6 +243,14 @@ DLL_EXPORT int dlog_setlevel(int moduleId, int level, int enableEvent);
*/
DLL_EXPORT int CheckLogLevel(int moduleId, int logLevel);

/**
* @ingroup slog
* @brief DlogSetAttr: set log attr, default pid is 0, default device id is 0, default process type is APPLICATION
* @param [in]logAttr: attr info, include pid(must be larger than 0), process type and device id(chip ID)
* @return: 0: SUCCEED, others: FAILED
*/
DLL_EXPORT int DlogSetAttr(LogAttr logAttr);

/**
* @ingroup slog
* @brief dlog_error: print error log
@@ -367,6 +390,8 @@ void DlogInner(int moduleId, int level, const char *fmt, ...);
void DlogWithKVInner(int moduleId, int level, KeyValue *pstKVArray, int kvNum, const char *fmt, ...);

#ifdef __cplusplus
#ifndef LOG_CPP
}
#endif // LOG_CPP
#endif // __cplusplus
#endif // D_SYSLOG_H_

Loading…
Cancel
Save