| @@ -16,8 +16,11 @@ endif() | |||
| if(DEFINED ENV{D_PKG_SERVER}) | |||
| set(GE_PB_PKG $ENV{D_PKG_SERVER}) | |||
| message("Download packages from PKG server") | |||
| endif() | |||
| message("Download packages from DPKG server") | |||
| elseif(DEFINED ENV{MSLIBS_SERVER}) | |||
| set(GE_PB_PKG "http://$ENV{MSLIBS_SERVER}:8081") | |||
| message("Download packages from MSPKG server") | |||
| endif () | |||
| set(ASCEND_DRIVER_DIR ${ASCEND_DIR}/driver/lib64) | |||
| set(ASCEND_DRIVER_COMMON_DIR ${ASCEND_DIR}/driver/lib64/common) | |||
| @@ -105,7 +108,7 @@ if (ENABLE_OPEN_SRC) | |||
| find_module(error_manager liberror_manager.so ${ASCEND_ATC_DIR}) | |||
| find_module(error_manager_static liberror_manager.a ${ASCEND_ACL_DIR}) | |||
| find_module(msprofiler libmsprofiler.a ${ASCEND_ACL_DIR}) | |||
| #find_module(ascendcl_static libascendcl.a ${ASCEND_ACL_DIR}) | |||
| #find_module(ascendcl_static libascendcl.a ${ASCEND_ACL_DIR}) | |||
| if(PRODUCT STREQUAL "flr3") | |||
| elseif(PRODUCT STREQUAL "flr1") | |||
| find_module(ascend_hal_stub libascend_hal.so ${ASCEND_DRIVER_DIR}/driver) | |||
| @@ -115,7 +118,7 @@ if (ENABLE_OPEN_SRC) | |||
| find_module(ascend_hal_stub libascend_hal.so ${ASCEND_DRIVER_DIR}) | |||
| endif() | |||
| elseif(PLATFORM STREQUAL "all") | |||
| find_module(msprofiler libmsprofiler.a ${ASCEND_DRIVER_COMMON_DIR}) | |||
| find_module(msprofiler libmsprofiler.a ${ASCEND_ACL_DIR}) | |||
| find_module(hccl libhccl.so ${ASCEND_RUNTIME_DIR}) | |||
| find_module(adump_server libadump_server.a ${ASCEND_ACL_DIR}) | |||
| find_module(runtime libruntime.so ${ASCEND_ACL_DIR}) | |||
| @@ -123,14 +126,14 @@ if (ENABLE_OPEN_SRC) | |||
| find_module(resource libresource.so ${ASCEND_ATC_DIR}) | |||
| find_module(error_manager liberror_manager.so ${ASCEND_ATC_DIR}) | |||
| find_module(error_manager_static liberror_manager.a ${ASCEND_ACL_DIR}) | |||
| find_module(msprofiler_fwk libmsprofiler_fwk.a ${ASCEND_ACL_DIR}) | |||
| find_module(msprofiler_fwk libmsprofiler_fwk.a ${ASCEND_RUNTIME_DIR}) | |||
| find_module(ascend_hal_stub libascend_hal.so ${ASCEND_DRIVER_DIR}/driver) | |||
| #find_module(ascendcl_static libascendcl.a ${ASCEND_ACL_DIR}) | |||
| else() | |||
| message(STATUS "PLATFORM param is invalid, should be train or inference, you choose nothing!") | |||
| message(STATUS "PLATFORM param is invalid, should be train or inference, you choose nothing!") | |||
| endif() | |||
| if (ENABLE_GE_COV OR ENABLE_GE_UT) | |||
| if (ENABLE_GE_COV OR ENABLE_GE_UT) | |||
| add_subdirectory(tests) | |||
| endif() | |||
| @@ -23,6 +23,7 @@ ExternalProject_Add(gflags_build | |||
| URL ${REQ_URL} | |||
| #URL /home/txd/workspace/linux_cmake/pkg/protobuf-3.8.0.tar.gz | |||
| #SOURCE_DIR ${GE_CODE_DIR}/../../third_party/gflags/src/gflags-2.2.2 | |||
| TLS_VERIFY OFF | |||
| CONFIGURE_COMMAND ${CMAKE_COMMAND} -DCMAKE_CXX_FLAGS=${gflags_CXXFLAGS} -DCMAKE_INSTALL_PREFIX=${CMAKE_INSTALL_PREFIX}/gflags <SOURCE_DIR> | |||
| BUILD_COMMAND $(MAKE) | |||
| INSTALL_COMMAND $(MAKE) install | |||
| @@ -10,7 +10,10 @@ if ((${CMAKE_INSTALL_PREFIX} STREQUAL /usr/local) OR | |||
| message(STATUS "No install prefix selected, default to ${CMAKE_INSTALL_PREFIX}.") | |||
| endif() | |||
| if (ENABLE_GITEE) | |||
| if (GE_PB_PKG) | |||
| set(REQ_URL "${GE_PB_PKG}/libs/gtest/release-1.8.0.tar.gz") | |||
| set(MD5 "") | |||
| elseif (ENABLE_GITEE) | |||
| set(REQ_URL "https://gitee.com/mirrors/googletest/repository/archive/release-1.8.0.tar.gz") | |||
| set(MD5 "") | |||
| else() | |||
| @@ -22,8 +25,9 @@ set (gtest_CXXFLAGS "-D_GLIBCXX_USE_CXX11_ABI=0 -D_FORTIFY_SOURCE=2 -O2 -fstack- | |||
| set (gtest_CFLAGS "-D_GLIBCXX_USE_CXX11_ABI=0 -D_FORTIFY_SOURCE=2 -O2 -fstack-protector-all -Wl,-z,relro,-z,now,-z,noexecstack") | |||
| ExternalProject_Add(gtest_build | |||
| URL ${REQ_URL} | |||
| TLS_VERIFY OFF | |||
| CONFIGURE_COMMAND ${CMAKE_COMMAND} -DCMAKE_CXX_FLAGS=${gtest_CXXFLAGS} -DCMAKE_INSTALL_PREFIX=${CMAKE_INSTALL_PREFIX}/gtest <SOURCE_DIR> | |||
| -DBUILD_TESTING=OFF -DCMAKE_POSITION_INDEPENDENT_CODE=ON -DBUILD_SHARED_LIBS=ON -DCMAKE_MACOSX_RPATH=TRUE -Dgtest_disable_pthreads=ON | |||
| -DBUILD_TESTING=OFF -DCMAKE_POSITION_INDEPENDENT_CODE=ON -DBUILD_SHARED_LIBS=ON -DCMAKE_MACOSX_RPATH=TRUE -Dgtest_disable_pthreads=ON | |||
| BUILD_COMMAND $(MAKE) | |||
| INSTALL_COMMAND $(MAKE) install | |||
| EXCLUDE_FROM_ALL TRUE | |||
| @@ -5,19 +5,24 @@ endif() | |||
| include(ExternalProject) | |||
| set(JSON_SRC_DIR ${CMAKE_BINARY_DIR}/opensrc/json/include) | |||
| #if (ENABLE_GITEE) | |||
| if (GE_PB_PKG) | |||
| set(REQ_URL "${GE_PB_PKG}/libs/ge_nlohmann_json/include.zip") | |||
| set(MD5 "0dc903888211db3a0f170304cd9f3a89") | |||
| set(JSON_INCLUDE_DIR ${JSON_SRC_DIR}) | |||
| #elseif (ENABLE_GITEE) | |||
| # set(REQ_URL "https://gitee.com/mirrors/JSON-for-Modern-CPP/repository/archive/v3.6.1.zip") | |||
| # set(MD5 "5bda78ce308e6cfcf614dcf1d5ff27a7") | |||
| # set(JSON_INCLUDE_DIR "${JSON_SRC_DIR}/include") | |||
| #else() | |||
| set(REQ_URL "https://github.com/nlohmann/json/releases/download/v3.6.1/include.zip") | |||
| set(MD5 "0dc903888211db3a0f170304cd9f3a89") | |||
| set(JSON_INCLUDE_DIR ${JSON_SRC_DIR}) | |||
| #endif () | |||
| #set(JSON_INCLUDE_DIR "${JSON_SRC_DIR}/include") | |||
| else() | |||
| set(REQ_URL "https://github.com/nlohmann/json/releases/download/v3.6.1/include.zip") | |||
| set(MD5 "0dc903888211db3a0f170304cd9f3a89") | |||
| set(JSON_INCLUDE_DIR ${JSON_SRC_DIR}) | |||
| endif () | |||
| ExternalProject_Add(json_build | |||
| URL ${REQ_URL} | |||
| #URL /home/txd/workspace/cloud_code/pkg/include.zip | |||
| SOURCE_DIR ${JSON_SRC_DIR} | |||
| TLS_VERIFY OFF | |||
| CONFIGURE_COMMAND "" | |||
| BUILD_COMMAND "" | |||
| INSTALL_COMMAND "" | |||
| @@ -6,7 +6,10 @@ set(ONNX_PROTO_DIR ${CMAKE_BINARY_DIR}/onnx) | |||
| set(ONNX_PROTO_FILE ${ONNX_PROTO_DIR}/onnx.proto) | |||
| file(MAKE_DIRECTORY ${ONNX_PROTO_DIR}) | |||
| if (ENABLE_GITEE) | |||
| if (GE_PB_PKG) | |||
| set(REQ_URL "${GE_PB_PKG}/libs/onnx/onnx-1.6.0.tar.gz") | |||
| set(MD5 "512f2779d6215d4a36f366b6b9acdf1e") | |||
| elseif (ENABLE_GITEE) | |||
| set(REQ_URL "https://gitee.com/mirrors/ONNX/repository/archive/v1.6.0.tar.gz") | |||
| set(MD5 "1bdbcecdd68ea8392630467646776e02") | |||
| else() | |||
| @@ -19,6 +22,7 @@ ExternalProject_Add(onnx | |||
| #URL /home/txd/workspace/cloud_code/pkg/onnx-1.6.0.tar.gz | |||
| #URL_HASH SHA256=3b88c3fe521151651a0403c4d131cb2e0311bd28b753ef692020a432a81ce345 | |||
| #SOURCE_DIR ${ONNX_SRC_DIR} | |||
| TLS_VERIFY OFF | |||
| CONFIGURE_COMMAND "" | |||
| BUILD_COMMAND "" | |||
| #INSTALL_COMMAND "" | |||
| @@ -26,6 +26,7 @@ set(protobuf_CXXFLAGS "-Wno-maybe-uninitialized -Wno-unused-parameter -fPIC -fst | |||
| set(protobuf_LDFLAGS "-Wl,-z,relro,-z,now,-z,noexecstack") | |||
| ExternalProject_Add(protobuf_build | |||
| URL ${REQ_URL} | |||
| TLS_VERIFY OFF | |||
| CONFIGURE_COMMAND ${CMAKE_COMMAND} | |||
| -Dprotobuf_WITH_ZLIB=OFF | |||
| -DCMAKE_INSTALL_LIBDIR=${CMAKE_INSTALL_LIBDIR} | |||
| @@ -27,6 +27,7 @@ ExternalProject_Add(protobuf_static_build | |||
| URL ${REQ_URL} | |||
| #URL /home/txd/workspace/linux_cmake/pkg/protobuf-3.8.0.tar.gz | |||
| #SOURCE_DIR ${METADEF_DIR}/../../third_party/protobuf/src/protobuf-3.8.0 | |||
| TLS_VERIFY OFF | |||
| CONFIGURE_COMMAND ${CMAKE_COMMAND} | |||
| -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} | |||
| -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} | |||
| @@ -1,115 +1,116 @@ | |||
| if (HAVE_PROTOC) | |||
| return() | |||
| endif() | |||
| include(ExternalProject) | |||
| include(GNUInstallDirs) | |||
| #set(CMAKE_INSTALL_PREFIX ${GE_CODE_DIR}/output) | |||
| if ((${CMAKE_INSTALL_PREFIX} STREQUAL /usr/local) OR | |||
| (${CMAKE_INSTALL_PREFIX} STREQUAL "C:/Program Files (x86)/ascend")) | |||
| set(CMAKE_INSTALL_PREFIX ${GE_CODE_DIR}/output CACHE STRING "path for install()" FORCE) | |||
| message(STATUS "No install prefix selected, default to ${CMAKE_INSTALL_PREFIX}.") | |||
| endif() | |||
| if(GE_PB_PKG) | |||
| set(REQ_URL "${GE_PB_PKG}/libs/protobuf/v3.8.0.tar.gz") | |||
| else() | |||
| if (ENABLE_GITEE) | |||
| set(REQ_URL "https://gitee.com/mirrors/protobuf_source/repository/archive/v3.8.0.tar.gz") | |||
| set(MD5 "eba86ae9f07ba5cfbaf8af3bc4e84236") | |||
| else() | |||
| set(REQ_URL "https://github.com/protocolbuffers/protobuf/archive/v3.8.0.tar.gz") | |||
| set(MD5 "3d9e32700639618a4d2d342c99d4507a") | |||
| endif () | |||
| endif() | |||
| set(protobuf_CXXFLAGS "-Wno-maybe-uninitialized -Wno-unused-parameter -fPIC -fstack-protector-all -D_FORTIFY_SOURCE=2 -D_GLIBCXX_USE_CXX11_ABI=0 -O2") | |||
| set(protobuf_LDFLAGS "-Wl,-z,relro,-z,now,-z,noexecstack") | |||
| ExternalProject_Add(protoc_build | |||
| URL ${REQ_URL} | |||
| #URL /home/txd/workspace/linux_cmake/pkg/protobuf-3.8.0.tar.gz | |||
| #SOURCE_DIR ${GE_CODE_DIR}/../third_party/protobuf/src/protobuf-3.8.0 | |||
| CONFIGURE_COMMAND ${CMAKE_COMMAND} -Dprotobuf_WITH_ZLIB=OFF -Dprotobuf_BUILD_TESTS=OFF -DBUILD_SHARED_LIBS=OFF -DCMAKE_CXX_FLAGS=${protobuf_CXXFLAGS} -DCMAKE_CXX_LDFLAGS=${protobuf_LDFLAGS} -DCMAKE_INSTALL_PREFIX=${CMAKE_INSTALL_PREFIX}/protoc <SOURCE_DIR>/cmake | |||
| BUILD_COMMAND $(MAKE) | |||
| INSTALL_COMMAND $(MAKE) install | |||
| EXCLUDE_FROM_ALL TRUE | |||
| ) | |||
| set(PROTOC_PKG_DIR ${CMAKE_INSTALL_PREFIX}/protoc) | |||
| set(protoc_EXECUTABLE ${PROTOC_PKG_DIR}/${CMAKE_INSTALL_BINDIR}/protoc) | |||
| function(protobuf_generate comp c_var h_var) | |||
| if(NOT ARGN) | |||
| message(SEND_ERROR "Error: protobuf_generate() called without any proto files") | |||
| return() | |||
| endif() | |||
| set(${c_var}) | |||
| set(${h_var}) | |||
| foreach(file ${ARGN}) | |||
| get_filename_component(abs_file ${file} ABSOLUTE) | |||
| get_filename_component(file_name ${file} NAME_WE) | |||
| get_filename_component(file_dir ${abs_file} PATH) | |||
| get_filename_component(parent_subdir ${file_dir} NAME) | |||
| if("${parent_subdir}" STREQUAL "proto") | |||
| set(proto_output_path ${CMAKE_BINARY_DIR}/proto/${comp}/proto) | |||
| else() | |||
| set(proto_output_path ${CMAKE_BINARY_DIR}/proto/${comp}/proto/${parent_subdir}) | |||
| endif() | |||
| list(APPEND ${c_var} "${proto_output_path}/${file_name}.pb.cc") | |||
| list(APPEND ${h_var} "${proto_output_path}/${file_name}.pb.h") | |||
| add_custom_command( | |||
| OUTPUT "${proto_output_path}/${file_name}.pb.cc" "${proto_output_path}/${file_name}.pb.h" | |||
| WORKING_DIRECTORY ${PROJECT_SOURCE_DIR} | |||
| COMMAND ${CMAKE_COMMAND} -E make_directory "${proto_output_path}" | |||
| COMMAND ${protoc_EXECUTABLE} -I${file_dir} --cpp_out=${proto_output_path} ${abs_file} | |||
| DEPENDS protoc_build ${abs_file} | |||
| COMMENT "Running C++ protocol buffer compiler on ${file}" VERBATIM ) | |||
| endforeach() | |||
| set_source_files_properties(${${c_var}} ${${h_var}} PROPERTIES GENERATED TRUE) | |||
| set(${c_var} ${${c_var}} PARENT_SCOPE) | |||
| set(${h_var} ${${h_var}} PARENT_SCOPE) | |||
| endfunction() | |||
| function(protobuf_generate_py comp py_var) | |||
| if(NOT ARGN) | |||
| message(SEND_ERROR "Error: protobuf_generate_py() called without any proto files") | |||
| return() | |||
| endif() | |||
| set(${py_var}) | |||
| foreach(file ${ARGN}) | |||
| get_filename_component(abs_file ${file} ABSOLUTE) | |||
| get_filename_component(file_name ${file} NAME_WE) | |||
| get_filename_component(file_dir ${abs_file} PATH) | |||
| get_filename_component(parent_subdir ${file_dir} NAME) | |||
| if("${parent_subdir}" STREQUAL "proto") | |||
| set(proto_output_path ${CMAKE_BINARY_DIR}/proto/${comp}/proto) | |||
| else() | |||
| set(proto_output_path ${CMAKE_BINARY_DIR}/proto/${comp}/proto/${parent_subdir}) | |||
| endif() | |||
| list(APPEND ${py_var} "${proto_output_path}/${file_name}_pb2.py") | |||
| add_custom_command( | |||
| OUTPUT "${proto_output_path}/${file_name}_pb2.py" | |||
| WORKING_DIRECTORY ${PROJECT_SOURCE_DIR} | |||
| COMMAND ${CMAKE_COMMAND} -E make_directory "${proto_output_path}" | |||
| COMMAND ${protoc_EXECUTABLE} -I${file_dir} --python_out=${proto_output_path} ${abs_file} | |||
| DEPENDS protoc_build ${abs_file} | |||
| COMMENT "Running PYTHON protocol buffer compiler on ${file}" VERBATIM ) | |||
| endforeach() | |||
| set_source_files_properties(${${py_var}} PROPERTIES GENERATED TRUE) | |||
| set(${py_var} ${${py_var}} PARENT_SCOPE) | |||
| endfunction() | |||
| #set(HAVE_PROTOC TRUE CACHE BOOL "protoc build add") | |||
| set(HAVE_PROTOC TRUE) | |||
| if (HAVE_PROTOC) | |||
| return() | |||
| endif() | |||
| include(ExternalProject) | |||
| include(GNUInstallDirs) | |||
| #set(CMAKE_INSTALL_PREFIX ${GE_CODE_DIR}/output) | |||
| if ((${CMAKE_INSTALL_PREFIX} STREQUAL /usr/local) OR | |||
| (${CMAKE_INSTALL_PREFIX} STREQUAL "C:/Program Files (x86)/ascend")) | |||
| set(CMAKE_INSTALL_PREFIX ${GE_CODE_DIR}/output CACHE STRING "path for install()" FORCE) | |||
| message(STATUS "No install prefix selected, default to ${CMAKE_INSTALL_PREFIX}.") | |||
| endif() | |||
| if(GE_PB_PKG) | |||
| set(REQ_URL "${GE_PB_PKG}/libs/protobuf/v3.8.0.tar.gz") | |||
| else() | |||
| if (ENABLE_GITEE) | |||
| set(REQ_URL "https://gitee.com/mirrors/protobuf_source/repository/archive/v3.8.0.tar.gz") | |||
| set(MD5 "eba86ae9f07ba5cfbaf8af3bc4e84236") | |||
| else() | |||
| set(REQ_URL "https://github.com/protocolbuffers/protobuf/archive/v3.8.0.tar.gz") | |||
| set(MD5 "3d9e32700639618a4d2d342c99d4507a") | |||
| endif () | |||
| endif() | |||
| set(protobuf_CXXFLAGS "-Wno-maybe-uninitialized -Wno-unused-parameter -fPIC -fstack-protector-all -D_FORTIFY_SOURCE=2 -D_GLIBCXX_USE_CXX11_ABI=0 -O2") | |||
| set(protobuf_LDFLAGS "-Wl,-z,relro,-z,now,-z,noexecstack") | |||
| ExternalProject_Add(protoc_build | |||
| URL ${REQ_URL} | |||
| #URL /home/txd/workspace/linux_cmake/pkg/protobuf-3.8.0.tar.gz | |||
| #SOURCE_DIR ${GE_CODE_DIR}/../third_party/protobuf/src/protobuf-3.8.0 | |||
| TLS_VERIFY OFF | |||
| CONFIGURE_COMMAND ${CMAKE_COMMAND} -Dprotobuf_WITH_ZLIB=OFF -Dprotobuf_BUILD_TESTS=OFF -DBUILD_SHARED_LIBS=OFF -DCMAKE_CXX_FLAGS=${protobuf_CXXFLAGS} -DCMAKE_CXX_LDFLAGS=${protobuf_LDFLAGS} -DCMAKE_INSTALL_PREFIX=${CMAKE_INSTALL_PREFIX}/protoc <SOURCE_DIR>/cmake | |||
| BUILD_COMMAND $(MAKE) | |||
| INSTALL_COMMAND $(MAKE) install | |||
| EXCLUDE_FROM_ALL TRUE | |||
| ) | |||
| set(PROTOC_PKG_DIR ${CMAKE_INSTALL_PREFIX}/protoc) | |||
| set(protoc_EXECUTABLE ${PROTOC_PKG_DIR}/${CMAKE_INSTALL_BINDIR}/protoc) | |||
| function(protobuf_generate comp c_var h_var) | |||
| if(NOT ARGN) | |||
| message(SEND_ERROR "Error: protobuf_generate() called without any proto files") | |||
| return() | |||
| endif() | |||
| set(${c_var}) | |||
| set(${h_var}) | |||
| foreach(file ${ARGN}) | |||
| get_filename_component(abs_file ${file} ABSOLUTE) | |||
| get_filename_component(file_name ${file} NAME_WE) | |||
| get_filename_component(file_dir ${abs_file} PATH) | |||
| get_filename_component(parent_subdir ${file_dir} NAME) | |||
| if("${parent_subdir}" STREQUAL "proto") | |||
| set(proto_output_path ${CMAKE_BINARY_DIR}/proto/${comp}/proto) | |||
| else() | |||
| set(proto_output_path ${CMAKE_BINARY_DIR}/proto/${comp}/proto/${parent_subdir}) | |||
| endif() | |||
| list(APPEND ${c_var} "${proto_output_path}/${file_name}.pb.cc") | |||
| list(APPEND ${h_var} "${proto_output_path}/${file_name}.pb.h") | |||
| add_custom_command( | |||
| OUTPUT "${proto_output_path}/${file_name}.pb.cc" "${proto_output_path}/${file_name}.pb.h" | |||
| WORKING_DIRECTORY ${PROJECT_SOURCE_DIR} | |||
| COMMAND ${CMAKE_COMMAND} -E make_directory "${proto_output_path}" | |||
| COMMAND ${protoc_EXECUTABLE} -I${file_dir} --cpp_out=${proto_output_path} ${abs_file} | |||
| DEPENDS protoc_build ${abs_file} | |||
| COMMENT "Running C++ protocol buffer compiler on ${file}" VERBATIM ) | |||
| endforeach() | |||
| set_source_files_properties(${${c_var}} ${${h_var}} PROPERTIES GENERATED TRUE) | |||
| set(${c_var} ${${c_var}} PARENT_SCOPE) | |||
| set(${h_var} ${${h_var}} PARENT_SCOPE) | |||
| endfunction() | |||
| function(protobuf_generate_py comp py_var) | |||
| if(NOT ARGN) | |||
| message(SEND_ERROR "Error: protobuf_generate_py() called without any proto files") | |||
| return() | |||
| endif() | |||
| set(${py_var}) | |||
| foreach(file ${ARGN}) | |||
| get_filename_component(abs_file ${file} ABSOLUTE) | |||
| get_filename_component(file_name ${file} NAME_WE) | |||
| get_filename_component(file_dir ${abs_file} PATH) | |||
| get_filename_component(parent_subdir ${file_dir} NAME) | |||
| if("${parent_subdir}" STREQUAL "proto") | |||
| set(proto_output_path ${CMAKE_BINARY_DIR}/proto/${comp}/proto) | |||
| else() | |||
| set(proto_output_path ${CMAKE_BINARY_DIR}/proto/${comp}/proto/${parent_subdir}) | |||
| endif() | |||
| list(APPEND ${py_var} "${proto_output_path}/${file_name}_pb2.py") | |||
| add_custom_command( | |||
| OUTPUT "${proto_output_path}/${file_name}_pb2.py" | |||
| WORKING_DIRECTORY ${PROJECT_SOURCE_DIR} | |||
| COMMAND ${CMAKE_COMMAND} -E make_directory "${proto_output_path}" | |||
| COMMAND ${protoc_EXECUTABLE} -I${file_dir} --python_out=${proto_output_path} ${abs_file} | |||
| DEPENDS protoc_build ${abs_file} | |||
| COMMENT "Running PYTHON protocol buffer compiler on ${file}" VERBATIM ) | |||
| endforeach() | |||
| set_source_files_properties(${${py_var}} PROPERTIES GENERATED TRUE) | |||
| set(${py_var} ${${py_var}} PARENT_SCOPE) | |||
| endfunction() | |||
| #set(HAVE_PROTOC TRUE CACHE BOOL "protoc build add") | |||
| set(HAVE_PROTOC TRUE) | |||
| @@ -10,11 +10,20 @@ if ((${CMAKE_INSTALL_PREFIX} STREQUAL /usr/local) OR | |||
| message(STATUS "No install prefix selected, default to ${CMAKE_INSTALL_PREFIX}.") | |||
| endif() | |||
| if (GE_PB_PKG) | |||
| set(REQ_URL "${GE_PB_PKG}/libs/securec/v1.1.10.tar.gz") | |||
| set(MD5 "") | |||
| else() | |||
| set(REQ_URL "https://gitee.com/openeuler/libboundscheck/repository/archive/v1.1.10.tar.gz") | |||
| set(MD5 "") | |||
| endif () | |||
| ExternalProject_Add(c_sec_build | |||
| URL https://gitee.com/openeuler/libboundscheck/repository/archive/v1.1.10.tar.gz | |||
| #URL /home/txd/workspace/linux_cmake/pkg/protobuf-3.8.0.tar.gz | |||
| URL ${REQ_URL} | |||
| #URL https://gitee.com/openeuler/libboundscheck/repository/archive/v1.1.10.tar.gz | |||
| #SOURCE_DIR ${GE_CODE_DIR}/../libc_sec | |||
| PATCH_COMMAND patch -p1 < ${GE_CODE_DIR}/metadef/third_party/patch/securec/0001-add-securec-cmake-script.patch | |||
| TLS_VERIFY OFF | |||
| CONFIGURE_COMMAND ${CMAKE_COMMAND} | |||
| -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} | |||
| -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} | |||
| @@ -605,7 +605,7 @@ set(INFER_SRC_LIST | |||
| if (NOT ENABLE_D AND NOT ENABLE_ACL AND NOT ENABLE_MS_TESTCASES) | |||
| ############ libge_runner.so ############ | |||
| add_library(ge_runner SHARED ${TRAIN_SRC_LIST} ${PROTO_SRCS} ${PROTO_CLIENT_SRCS} $<TARGET_OBJECTS:msprofiler_fwk>) | |||
| add_library(ge_runner SHARED ${TRAIN_SRC_LIST} ${PROTO_SRCS} ${PROTO_CLIENT_SRCS}) | |||
| target_compile_definitions(ge_runner PRIVATE | |||
| PROTOBUF_INLINE_NOT_IN_HEADERS=0 | |||
| @@ -646,11 +646,14 @@ target_include_directories(ge_runner PRIVATE | |||
| ${GE_CODE_DIR}/third_party/fwkacllib/inc/toolchain | |||
| ) | |||
| target_link_libraries(ge_runner | |||
| target_link_libraries(ge_runner PRIVATE | |||
| $<BUILD_INTERFACE:intf_pub> | |||
| ge_memory | |||
| adump_server | |||
| static_mmpa | |||
| -Wl,--whole-archive | |||
| msprofiler_fwk | |||
| -Wl,--no-whole-archive | |||
| -Wl,--no-as-needed | |||
| graph | |||
| ge_common | |||
| @@ -710,7 +713,7 @@ target_include_directories(ge_compiler PRIVATE | |||
| ${GE_CODE_DIR}/third_party/fwkacllib/inc/toolchain | |||
| ) | |||
| target_link_libraries(ge_compiler | |||
| target_link_libraries(ge_compiler PRIVATE | |||
| $<BUILD_INTERFACE:intf_pub> | |||
| ge_memory | |||
| static_mmpa | |||
| @@ -764,7 +767,14 @@ target_link_options(opensrc_ascendcl PRIVATE | |||
| -Wl,--allow-multiple-definition | |||
| -Wl,-z,muldefs | |||
| -Wl,-Bsymbolic | |||
| -Wl,--exclude-libs,ALL | |||
| -Wl,--exclude-libs,libascend_protobuf.a | |||
| -Wl,--exclude-libs,libge_executor.a | |||
| -Wl,--exclude-libs,libge_common.a | |||
| -Wl,--exclude-libs,libgraph.a | |||
| -Wl,--exclude-libs,libmmpa.a | |||
| -Wl,--exclude-libs,libregister.a | |||
| -Wl,--exclude-libs,liberror_manager.a | |||
| -Wl,--exclude-libs,libadump_server.a | |||
| ) | |||
| target_link_libraries(opensrc_ascendcl PRIVATE | |||
| -Wl,--whole-archive | |||
| @@ -143,6 +143,9 @@ ge::Status ProfilingManager::ParseOptions(const std::string &options) { | |||
| } | |||
| try { | |||
| Json prof_options = Json::parse(options); | |||
| if (options.find(kTrainingTrace) == std::string::npos) { | |||
| return ge::SUCCESS; | |||
| } | |||
| const std::string training_trace = prof_options[kTrainingTrace]; | |||
| if (training_trace.empty()) { | |||
| GELOGI("Training trace will not take effect."); | |||
| @@ -2991,19 +2991,19 @@ Status DavinciModel::CreateKnownZeroCopyMap(const vector<void *> &inputs, const | |||
| return SUCCESS; | |||
| } | |||
| Status DavinciModel::UpdateKnownZeroCopyAddr() { | |||
| for (size_t i = 0; i < total_io_addrs_.size(); ++i) { | |||
| auto it_in = knonw_input_data_info_.find(total_io_addrs_[i]); | |||
| Status DavinciModel::UpdateKnownZeroCopyAddr(vector<void *> &total_io_addrs) { | |||
| for (size_t i = 0; i < total_io_addrs.size(); ++i) { | |||
| auto it_in = knonw_input_data_info_.find(total_io_addrs[i]); | |||
| if (it_in != knonw_input_data_info_.end()) { | |||
| GELOGI("DavinciModel::UpdateKnownZeroCopyAddr input %zu,v addr %p,p addr %p .", i, total_io_addrs_[i], | |||
| knonw_input_data_info_.at(total_io_addrs_[i])); | |||
| total_io_addrs_[i] = knonw_input_data_info_.at(total_io_addrs_[i]); | |||
| GELOGI("DavinciModel::UpdateKnownZeroCopyAddr input %zu,v addr %p,p addr %p .", i, total_io_addrs[i], | |||
| knonw_input_data_info_.at(total_io_addrs[i])); | |||
| total_io_addrs[i] = knonw_input_data_info_.at(total_io_addrs[i]); | |||
| } | |||
| auto it_out = knonw_output_data_info_.find(total_io_addrs_[i]); | |||
| auto it_out = knonw_output_data_info_.find(total_io_addrs[i]); | |||
| if (it_out != knonw_output_data_info_.end()) { | |||
| GELOGI("DavinciModel::UpdateKnownZeroCopyAddr output %zu,v addr %p,p addr %p .", i, total_io_addrs_[i], | |||
| knonw_output_data_info_.at(total_io_addrs_[i])); | |||
| total_io_addrs_[i] = knonw_output_data_info_.at(total_io_addrs_[i]); | |||
| GELOGI("DavinciModel::UpdateKnownZeroCopyAddr output %zu,v addr %p,p addr %p .", i, total_io_addrs[i], | |||
| knonw_output_data_info_.at(total_io_addrs[i])); | |||
| total_io_addrs[i] = knonw_output_data_info_.at(total_io_addrs[i]); | |||
| } | |||
| } | |||
| GELOGI("DavinciModel::UpdateKnownZeroCopyAddr success."); | |||
| @@ -3032,7 +3032,7 @@ Status DavinciModel::UpdateKnownNodeArgs(const vector<void *> &inputs, const vec | |||
| } else { | |||
| total_io_addrs_ = orig_total_io_addrs_; | |||
| } | |||
| GE_CHK_STATUS_RET(UpdateKnownZeroCopyAddr(), "DavinciModel::UpdateKnownZeroCopyAddr failed."); | |||
| GE_CHK_STATUS_RET(UpdateKnownZeroCopyAddr(total_io_addrs_), "DavinciModel::UpdateKnownZeroCopyAddr failed."); | |||
| if (total_args_size_ == 0) { | |||
| GELOGW("DavinciModel::UpdateKnownNodeArgs device args %p, dst size %u, pass rtMemcpy.", args_, total_args_size_); | |||
| @@ -3099,7 +3099,14 @@ Status DavinciModel::MallocKnownArgs() { | |||
| GELOGE(RT_FAILED, "Call rtMalloc failed, ret: 0x%X", rt_ret); | |||
| return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
| } | |||
| // malloc dynamic and static hybrid memory | |||
| if (total_hybrid_args_size_ != 0) { | |||
| rt_ret = rtMalloc(&hybrid_addrs_, total_hybrid_args_size_, RT_MEMORY_HBM); | |||
| if (rt_ret != RT_ERROR_NONE) { | |||
| GELOGE(RT_FAILED, "Call rtMalloc failed, ret: 0x%X", rt_ret); | |||
| return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
| } | |||
| } | |||
| // malloc fixed addr memory, eg: rts op | |||
| if (total_fixed_addr_size_ != 0) { | |||
| GELOGI("Begin to allocate fixed addr."); | |||
| @@ -476,6 +476,14 @@ class DavinciModel { | |||
| void SetTotalIOAddrs(vector<void *> &io_addrs) { | |||
| total_io_addrs_.insert(total_io_addrs_.end(), io_addrs.begin(), io_addrs.end()); | |||
| } | |||
| void SetHybridArgsSize(uint32_t args_size) { total_hybrid_args_size_ += args_size; } | |||
| uint32_t GetHybridArgsSize() { | |||
| return total_hybrid_args_size_; | |||
| } | |||
| void *GetCurrentHybridArgsAddr(uint32_t offset) { | |||
| void *cur_args = static_cast<char *>(hybrid_addrs_) + offset; | |||
| return cur_args; | |||
| } | |||
| void SetTotalFixedAddrsSize(string tensor_name, int64_t fix_addr_size); | |||
| int64_t GetFixedAddrsSize(string tensor_name); | |||
| void *GetCurrentFixedAddr(int64_t offset) const { | |||
| @@ -494,7 +502,7 @@ class DavinciModel { | |||
| Status MallocKnownArgs(); | |||
| Status UpdateKnownNodeArgs(const vector<void *> &inputs, const vector<void *> &outputs); | |||
| Status CreateKnownZeroCopyMap(const vector<void *> &inputs, const vector<void *> &outputs); | |||
| Status UpdateKnownZeroCopyAddr(); | |||
| Status UpdateKnownZeroCopyAddr(vector<void *> &total_io_addrs); | |||
| void SetKnownNodeAddrNotChanged(bool base_addr_not_changed) { base_addr_not_changed_ = base_addr_not_changed; } | |||
| Status GetOrigInputInfo(uint32_t index, OriginInputInfo &orig_input_info); | |||
| @@ -977,6 +985,8 @@ class DavinciModel { | |||
| void *args_ = nullptr; | |||
| void *args_host_ = nullptr; | |||
| void *fixed_addrs_ = nullptr; | |||
| void *hybrid_addrs_ = nullptr; | |||
| uint32_t total_hybrid_args_size_ = 0; | |||
| int64_t total_fixed_addr_size_ = 0; | |||
| std::map<const void *, void *> knonw_input_data_info_; | |||
| std::map<const void *, void *> knonw_output_data_info_; | |||
| @@ -1055,7 +1055,16 @@ Status ModelManager::LoadModelOffline(uint32_t &model_id, const ModelData &model | |||
| mmTimespec timespec = mmGetTickCount(); | |||
| ModelHelper model_helper; | |||
| Status ret = model_helper.LoadModel(model); | |||
| Status ret = model_helper.LoadRootModel(model); | |||
| if (model_helper.GetModelType()) { | |||
| bool is_shape_unknown = false; | |||
| GE_CHK_STATUS_RET(model_helper.GetGeRootModel()->CheckIsUnknownShape(is_shape_unknown), | |||
| "CheckIsUnknownShape failed, model id:%u", | |||
| model_id); | |||
| if (is_shape_unknown || GetContext().GetHostExecFlag()) { | |||
| return DoLoadHybridModelOnline(model_id, model_helper.GetGeRootModel(), listener); | |||
| } | |||
| } | |||
| if (ret != SUCCESS) { | |||
| GELOGE(ret, "load model failed."); | |||
| return ret; | |||
| @@ -1214,7 +1223,7 @@ Status ModelManager::ExecuteModel(uint32_t model_id, rtStream_t stream, bool asy | |||
| std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id); | |||
| GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, ACL_ERROR_GE_EXEC_MODEL_ID_INVALID, | |||
| "Invalid model id %u, check weather model has been loaded or not.", model_id); | |||
| "Invalid model id %u, check whether model has been loaded or not.", model_id); | |||
| if (davinci_model->NeedDestroyAicpuKernel()) { | |||
| GELOGI("Start to destroy specified aicpu kernel."); | |||
| @@ -372,7 +372,11 @@ Status KernelTaskInfo::SuperKernelDistribute() { | |||
| Status KernelTaskInfo::Distribute() { | |||
| GELOGD("KernelTaskInfo Distribute Start."); | |||
| if (davinci_model_->IsKnownNode()) { | |||
| args_ = davinci_model_->GetCurrentArgsAddr(args_offset_); | |||
| if (kernel_type_ == ccKernelType::TE) { | |||
| args_ = davinci_model_->GetCurrentArgsAddr(args_offset_); | |||
| } else if (kernel_type_ == ccKernelType::AI_CPU || kernel_type_ == ccKernelType::CUST_AI_CPU) { | |||
| args_ = davinci_model_->GetCurrentHybridArgsAddr(hybrid_args_offset_); | |||
| } | |||
| GELOGI("Known node %s args addr %p, offset %u.", op_desc_->GetName().c_str(), args_, args_offset_); | |||
| } | |||
| rtError_t rt_ret = RT_ERROR_NONE; | |||
| @@ -428,36 +432,31 @@ Status KernelTaskInfo::UpdateArgs() { | |||
| const RuntimeParam &rts_param = davinci_model_->GetRuntimeParam(); | |||
| vector<void *> input_data_addrs = ModelUtils::GetInputDataAddrs(rts_param, op_desc_); | |||
| vector<void *> output_data_addrs = ModelUtils::GetOutputDataAddrs(rts_param, op_desc_); | |||
| vector<void *> workspace_data_addrs = ModelUtils::GetWorkspaceDataAddrs(rts_param, op_desc_); | |||
| vector<void *> io_addrs; | |||
| if (!op_desc_->HasAttr(ATTR_DYNAMIC_SHAPE_FIXED_ADDR)) { | |||
| io_addrs.insert(io_addrs.end(), input_data_addrs.begin(), input_data_addrs.end()); | |||
| io_addrs.insert(io_addrs.end(), output_data_addrs.begin(), output_data_addrs.end()); | |||
| io_addrs.insert(io_addrs.end(), input_data_addrs.begin(), input_data_addrs.end()); | |||
| io_addrs.insert(io_addrs.end(), output_data_addrs.begin(), output_data_addrs.end()); | |||
| if (kernel_type_ == ccKernelType::TE) { | |||
| vector<void *> workspace_data_addrs = ModelUtils::GetWorkspaceDataAddrs(rts_param, op_desc_); | |||
| io_addrs.insert(io_addrs.end(), workspace_data_addrs.begin(), workspace_data_addrs.end()); | |||
| } else { | |||
| string peer_input_name; | |||
| if (AttrUtils::GetStr(op_desc_, ATTR_DYNAMIC_SHAPE_FIXED_ADDR, peer_input_name)) { | |||
| uint32_t output_index = davinci_model_->GetFixedAddrOutputIndex(peer_input_name); | |||
| if (output_index > output_data_addrs.size()) { | |||
| GELOGE(FAILED, "The output data addr size[%zu] and output index[%u] are inconsistent.", | |||
| output_data_addrs.size(), output_index); | |||
| return FAILED; | |||
| } | |||
| io_addrs.insert(io_addrs.end(), input_data_addrs.begin(), input_data_addrs.end()); | |||
| for (size_t i = 0; i < output_data_addrs.size(); ++i) { | |||
| if (i == output_index) { | |||
| void *fixed_addr = davinci_model_->GetCurrentFixedAddr(fixed_addr_offset_); | |||
| io_addrs.emplace_back(fixed_addr); | |||
| continue; | |||
| } | |||
| io_addrs.emplace_back(output_data_addrs[i]); | |||
| } | |||
| io_addrs.insert(io_addrs.end(), workspace_data_addrs.begin(), workspace_data_addrs.end()); | |||
| davinci_model_->SetTotalIOAddrs(io_addrs); | |||
| } else if (kernel_type_ == ccKernelType::AI_CPU || kernel_type_ == ccKernelType::CUST_AI_CPU) { | |||
| davinci_model_->UpdateKnownZeroCopyAddr(io_addrs); | |||
| uintptr_t io_addr = reinterpret_cast<uintptr_t>(args_addr.get()) + sizeof(aicpu::AicpuParamHead); | |||
| auto addrs_size = sizeof(uint64_t) * io_addrs.size(); | |||
| errno_t sec_ret = memcpy_s(reinterpret_cast<void *>(io_addr), addrs_size, io_addrs.data(), addrs_size); | |||
| if (sec_ret != EOK) { | |||
| GELOGE(FAILED, "memcpy failed, ret: %d", sec_ret); | |||
| return FAILED; | |||
| } | |||
| // copy args to device | |||
| rtError_t rt_ret = rtMemcpy(args_, args_size_, args_addr.get(), args_size_, RT_MEMCPY_HOST_TO_DEVICE); | |||
| if (rt_ret != RT_ERROR_NONE) { | |||
| GELOGE(RT_FAILED, "Call rt api(rtMemcpy) failed, ret: 0x%X", rt_ret); | |||
| return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
| } | |||
| } | |||
| davinci_model_->SetTotalIOAddrs(io_addrs); | |||
| GELOGI("KernelTaskInfo::UpdateArgs success."); | |||
| return SUCCESS; | |||
| } | |||
| @@ -533,33 +532,18 @@ Status KernelTaskInfo::UpdateL2Data(const domi::KernelDef &kernel_def) { | |||
| } | |||
| Status KernelTaskInfo::CalculateArgs(const domi::TaskDef &task_def, DavinciModel *davinci_model) { | |||
| domi::KernelDef kernel_def = task_def.kernel(); | |||
| uint32_t args_size = kernel_def.args_size(); | |||
| args_offset_ = davinci_model->GetTotalArgsSize(); | |||
| davinci_model->SetTotalArgsSize(args_size); | |||
| GELOGI("kernel task name , args_size %u, args_offset %u", args_size, args_offset_); | |||
| // get opcontext stored in model | |||
| const domi::KernelDef &kernel_def = task_def.kernel(); | |||
| const domi::KernelContext &context = kernel_def.context(); | |||
| // get opdesc | |||
| op_desc_ = davinci_model->GetOpByIndex(context.op_index()); | |||
| GE_CHECK_NOTNULL(op_desc_); | |||
| // alloc fixed addr | |||
| string peer_input_name; | |||
| if (AttrUtils::GetStr(op_desc_, ATTR_DYNAMIC_SHAPE_FIXED_ADDR, peer_input_name) && !peer_input_name.empty()) { | |||
| uint32_t output_index = davinci_model->GetFixedAddrOutputIndex(peer_input_name); | |||
| if (output_index > op_desc_->GetOutputsSize()) { | |||
| GELOGE(FAILED, "The output size[%zu] and output index[%u] are inconsistent.", op_desc_->GetOutputsSize(), | |||
| output_index); | |||
| return FAILED; | |||
| } | |||
| fixed_addr_offset_ = davinci_model->GetFixedAddrsSize(peer_input_name); | |||
| auto tensor_desc = op_desc_->GetOutputDesc(output_index); | |||
| int64_t tensor_size = 0; | |||
| GE_CHK_STATUS(TensorUtils::GetSize(tensor_desc, tensor_size)); | |||
| davinci_model->SetTotalFixedAddrsSize(peer_input_name, tensor_size); | |||
| GELOGI("Calculate stream switch task args , tensor size is %ld, fixed addr offset %ld", tensor_size, | |||
| fixed_addr_offset_); | |||
| kernel_type_ = static_cast<ccKernelType>(context.kernel_type()); | |||
| if (kernel_type_ == ccKernelType::TE) { | |||
| uint32_t args_size = kernel_def.args_size(); | |||
| args_offset_ = davinci_model->GetTotalArgsSize(); | |||
| davinci_model->SetTotalArgsSize(args_size); | |||
| GELOGI("kernel task name , args_size %u, args_offset %u", args_size, args_offset_); | |||
| } else if (kernel_type_ == ccKernelType::AI_CPU || kernel_type_ == ccKernelType::CUST_AI_CPU) { | |||
| hybrid_args_offset_ = davinci_model->GetHybridArgsSize(); | |||
| davinci_model->SetHybridArgsSize(kernel_def.args_size()); | |||
| GELOGI("aicpu kernel task name , args_size %u, args_offset %u", kernel_def.args_size(), hybrid_args_offset_); | |||
| } | |||
| return SUCCESS; | |||
| } | |||
| @@ -888,7 +872,7 @@ Status KernelTaskInfo::InitAicpuTask(uint32_t op_index, const domi::KernelDef &k | |||
| } | |||
| // copy args to new host memory | |||
| std::unique_ptr<uint8_t[]> args_addr(new (std::nothrow) uint8_t[args_size_]); | |||
| args_addr = std::unique_ptr<uint8_t[]>(new (std::nothrow) uint8_t[args_size_]); | |||
| GE_PRINT_DYNAMIC_MEMORY(new, "cce task physical memory.", sizeof(uint8_t) * args_size_) | |||
| errno_t sec_ret = memcpy_s(args_addr.get(), args_size_, kernel_def.args().data(), args_size_); | |||
| if (sec_ret != EOK) { | |||
| @@ -896,8 +880,23 @@ Status KernelTaskInfo::InitAicpuTask(uint32_t op_index, const domi::KernelDef &k | |||
| return FAILED; | |||
| } | |||
| const RuntimeParam &rts_param = davinci_model_->GetRuntimeParam(); | |||
| auto aicpu_param_head = reinterpret_cast<aicpu::AicpuParamHead *>(args_addr.get()); | |||
| const auto &ext_info = kernel_def.kernel_ext_info(); | |||
| auto init_ret = InitAicpuTaskExtInfo(ext_info); | |||
| if (init_ret != SUCCESS) { | |||
| GELOGE(init_ret, "Init aicpu task ext info failed, ext_info size=%zu", ext_info.size()); | |||
| return init_ret; | |||
| } | |||
| GELOGI("Node[%s] type[%s] kernel_ext_info size=%zu, aicpu_ext_info_addr_=%p", op_desc_->GetName().c_str(), | |||
| op_desc_->GetType().c_str(), ext_info.size(), aicpu_ext_info_addr_); | |||
| aicpu_param_head->extInfoAddr = reinterpret_cast<uintptr_t>(aicpu_ext_info_addr_); | |||
| aicpu_param_head->extInfoLength = static_cast<uintptr_t>(ext_info.size()); | |||
| if (davinci_model_->IsKnownNode()) { | |||
| return SUCCESS; | |||
| } | |||
| const RuntimeParam &rts_param = davinci_model_->GetRuntimeParam(); | |||
| vector<void *> input_addrs = ModelUtils::GetInputDataAddrs(rts_param, op_desc); | |||
| vector<void *> output_addrs = ModelUtils::GetOutputDataAddrs(rts_param, op_desc); | |||
| vector<void *> io_addrs; | |||
| @@ -914,19 +913,6 @@ Status KernelTaskInfo::InitAicpuTask(uint32_t op_index, const domi::KernelDef &k | |||
| } | |||
| } | |||
| auto aicpu_param_head = reinterpret_cast<aicpu::AicpuParamHead *>(args_addr.get()); | |||
| const auto &ext_info = kernel_def.kernel_ext_info(); | |||
| auto init_ret = InitAicpuTaskExtInfo(ext_info); | |||
| if (init_ret != SUCCESS) { | |||
| GELOGE(init_ret, "Init aicpu task ext info failed, ext_info size=%zu", ext_info.size()); | |||
| return init_ret; | |||
| } | |||
| GELOGI("Node[%s] type[%s] kernel_ext_info size=%zu, aicpu_ext_info_addr_=%p", op_desc_->GetName().c_str(), | |||
| op_desc_->GetType().c_str(), ext_info.size(), aicpu_ext_info_addr_); | |||
| aicpu_param_head->extInfoAddr = reinterpret_cast<uintptr_t>(aicpu_ext_info_addr_); | |||
| aicpu_param_head->extInfoLength = static_cast<uintptr_t>(ext_info.size()); | |||
| // malloc device memory for args | |||
| rtError_t rt_ret = rtMalloc(static_cast<void **>(&args_), args_size_, RT_MEMORY_HBM); | |||
| if (rt_ret != RT_ERROR_NONE) { | |||
| @@ -159,7 +159,9 @@ class KernelTaskInfo : public TaskInfo { | |||
| OpDescPtr op_desc_; | |||
| DavinciModel *davinci_model_; | |||
| uint32_t args_offset_ = 0; | |||
| uint32_t hybrid_args_offset_ = 0; | |||
| int64_t fixed_addr_offset_ = 0; | |||
| std::unique_ptr<uint8_t[]> args_addr = nullptr; | |||
| bool call_save_dump_ = false; | |||
| // aicpu ext_info device mem | |||
| @@ -18,6 +18,7 @@ | |||
| #include <chrono> | |||
| #include "framework/common/debug/log.h" | |||
| #include "graph/compute_graph.h" | |||
| #include "graph/utils/tensor_utils.h" | |||
| #include "hybrid_execution_context.h" | |||
| #include "subgraph_context.h" | |||
| @@ -35,29 +36,31 @@ ShapeInferenceState::ShapeInferenceState(const NodeItem &node_item) : node_item( | |||
| this->num_pending_shapes_); | |||
| } | |||
| Status ShapeInferenceState::UpdateInputShape(int idx, | |||
| const GeShape &ori_shape, | |||
| const GeShape &shape) { | |||
| Status ShapeInferenceState::UpdateInputShape(int idx, const GeTensorDesc &target) { | |||
| if (node_item.IsInputShapeStatic(idx)) { | |||
| GELOGD("[%s] Trying to update static shape, idx = %d. old shape = [%s], new shape = [%s]", | |||
| node_item.NodeName().c_str(), | |||
| idx, | |||
| node_item.MutableInputDesc(idx)->GetShape().ToString().c_str(), | |||
| shape.ToString().c_str()); | |||
| target.GetShape().ToString().c_str()); | |||
| return SUCCESS; | |||
| } | |||
| GELOGD("[%s] Update input shape [%d] with Shape: [%s] and OriginalShape: [%s]", | |||
| int64_t tensor_size = -1; | |||
| (void) TensorUtils::GetSize(target, tensor_size); | |||
| GELOGD("[%s] Update input shape [%d] with Shape: [%s] and OriginalShape: [%s], size = %ld", | |||
| node_item.NodeName().c_str(), | |||
| idx, | |||
| shape.ToString().c_str(), | |||
| ori_shape.ToString().c_str()); | |||
| target.GetShape().ToString().c_str(), | |||
| target.GetOriginShape().ToString().c_str(), | |||
| tensor_size); | |||
| std::lock_guard<std::mutex> lk(mu_); | |||
| auto tensor_desc = node_item.MutableInputDesc(idx); | |||
| GE_CHECK_NOTNULL(tensor_desc); | |||
| tensor_desc->SetShape(shape); | |||
| tensor_desc->SetOriginShape(ori_shape); | |||
| tensor_desc->SetShape(target.GetShape()); | |||
| tensor_desc->SetOriginShape(target.GetOriginShape()); | |||
| (void) TensorUtils::SetSize(*tensor_desc, tensor_size); | |||
| if (--num_pending_shapes_ == 0) { | |||
| ready_cv_.notify_all(); | |||
| } | |||
| @@ -110,24 +113,24 @@ Status ShapeInferenceState::AwaitShapesReady(const GraphExecutionContext &contex | |||
| for (auto &p : shape_futures) { | |||
| auto idx = p.first; | |||
| auto &future = p.second; | |||
| GeShape shape; | |||
| GeShape ori_shape; | |||
| RECORD_SHAPE_INFERENCE_EVENT(&context, node_item.NodeName().c_str(), "[AwaitShape] [idx = %u] Start", idx); | |||
| GE_CHK_STATUS_RET(future.Get(ori_shape, shape), | |||
| "[%s] Get shape failed. index = %u", | |||
| node_item.NodeName().c_str(), | |||
| idx); | |||
| auto src_tensor_desc = future.GetTensorDesc(); | |||
| GE_CHECK_NOTNULL(src_tensor_desc); | |||
| RECORD_SHAPE_INFERENCE_EVENT(&context, node_item.NodeName().c_str(), "[AwaitShape] [idx = %u] End", idx); | |||
| GELOGD("[%s] Update input shape [%u] with shape: [%s] and ori_shape: [%s]", | |||
| node_item.NodeName().c_str(), | |||
| idx, | |||
| shape.ToString().c_str(), | |||
| ori_shape.ToString().c_str()); | |||
| auto input_desc = node_item.MutableInputDesc(idx); | |||
| GE_CHECK_NOTNULL(input_desc); | |||
| input_desc->SetShape(std::move(shape)); | |||
| input_desc->SetOriginShape(ori_shape); | |||
| int64_t tensor_size = -1; | |||
| (void) TensorUtils::GetSize(*src_tensor_desc, tensor_size); | |||
| GELOGD("[%s] Update input shape [%u] with shape: [%s] and ori_shape: [%s], index = %zu", | |||
| node_item.NodeName().c_str(), | |||
| idx, | |||
| src_tensor_desc->GetShape().ToString().c_str(), | |||
| src_tensor_desc->GetOriginShape().ToString().c_str(), | |||
| tensor_size); | |||
| input_desc->SetShape(src_tensor_desc->GetShape()); | |||
| input_desc->SetOriginShape(src_tensor_desc->GetOriginShape()); | |||
| (void) TensorUtils::SetSize(*input_desc, tensor_size); | |||
| } | |||
| return SUCCESS; | |||
| @@ -190,5 +193,14 @@ Status ShapeFuture::Get(GeShape &ori_shape, GeShape &shape) { | |||
| GELOGD("Get shape from %s:%u. shape = [%s]", src_node_->GetName().c_str(), src_index_, shape.ToString().c_str()); | |||
| return SUCCESS; | |||
| } | |||
| GeTensorDescPtr ShapeFuture::GetTensorDesc() { | |||
| GELOGD("Start to wait node: %s for getting shape", src_node_->GetName().c_str()); | |||
| if (!subgraph_context_->Await(src_node_)) { | |||
| GELOGE(INTERNAL_ERROR, "cancelled"); | |||
| return nullptr; | |||
| } | |||
| return src_node_->GetOpDesc()->MutableOutputDesc(src_index_); | |||
| } | |||
| } // namespace hybrid | |||
| } // namespace ge | |||
| @@ -35,6 +35,7 @@ class ShapeFuture { | |||
| ShapeFuture(NodePtr src_node, uint32_t src_index, SubgraphContext *subgraph_context); | |||
| ~ShapeFuture() = default; | |||
| Status Get(GeShape &ori_shape, GeShape &shape); | |||
| GeTensorDescPtr GetTensorDesc(); | |||
| private: | |||
| NodePtr src_node_; | |||
| @@ -45,7 +46,7 @@ class ShapeFuture { | |||
| struct ShapeInferenceState { | |||
| explicit ShapeInferenceState(const NodeItem &node_item); | |||
| Status UpdateInputShape(int idx, const GeShape &ori_shape, const GeShape &shape); | |||
| Status UpdateInputShape(int idx, const GeTensorDesc &tensor_desc); | |||
| void UpdateInputShapeFuture(int idx, ShapeFuture &&future); | |||
| @@ -96,7 +96,7 @@ Status SubgraphExecutor::InitInputsForUnknownShape(const std::vector<TensorValue | |||
| GE_CHECK_NOTNULL(tensor_desc); | |||
| auto node_state = subgraph_context_->GetOrCreateNodeState(input_node); | |||
| GE_CHECK_NOTNULL(node_state); | |||
| node_state->GetShapeInferenceState().UpdateInputShape(0, tensor_desc->GetOriginShape(), tensor_desc->GetShape()); | |||
| node_state->GetShapeInferenceState().UpdateInputShape(0, *tensor_desc); | |||
| } | |||
| } | |||
| @@ -268,13 +268,6 @@ Status SubgraphExecutor::PrepareForExecution(GraphExecutionContext *ctx, NodeSta | |||
| } else { | |||
| node_state.SetKernelTask(node_item.kernel_task); | |||
| } | |||
| GELOGD("[%s] Start to invoke CalcOpRunningParam.", node_item.NodeName().c_str()); | |||
| RECORD_COMPILE_EVENT(ctx, node_item.NodeName().c_str(), "[CalcOpRunningParam] Start"); | |||
| GE_CHK_STATUS_RET(NodeExecutorManager::GetInstance().CalcOpRunningParam(*node_item.node), | |||
| "[%s] Failed to invoke CalcOpRunningParam.", node_item.NodeName().c_str()); | |||
| RECORD_COMPILE_EVENT(ctx, node_item.NodeName().c_str(), "[CalcOpRunningParam] End"); | |||
| GELOGD("[%s] Done invoking CalcOpRunningParam successfully.", node_item.NodeName().c_str()); | |||
| return SUCCESS; | |||
| } | |||
| @@ -20,12 +20,9 @@ | |||
| #include "graph/utils/tensor_adapter.h" | |||
| #include "graph/debug/ge_attr_define.h" | |||
| #include "hybrid/node_executor/node_executor.h" | |||
| #include "common/dump/dump_manager.h" | |||
| #include "hybrid/executor//worker//shape_inference_engine.h" | |||
| #include "common/dump/dump_op.h" | |||
| #include "common/types.h" | |||
| #include "common/ge_types.h" | |||
| #include "common/profiling/profiling_manager.h" | |||
| #include "runtime/base.h" | |||
| namespace ge { | |||
| namespace hybrid { | |||
| @@ -348,6 +345,10 @@ Status NodeDoneCallback::OnNodeDone() { | |||
| } | |||
| GE_CHK_STATUS_RET_NOLOG(PrepareConstInputs(node_item)); | |||
| if (node_item.shape_inference_type == DEPEND_SHAPE_RANGE || node_item.shape_inference_type == DEPEND_COMPUTE) { | |||
| // update output tensor sizes | |||
| GE_CHK_STATUS_RET_NOLOG(ShapeInferenceEngine::CalcOutputTensorSizes(node_item)); | |||
| } | |||
| // PropagateOutputs for type == DEPEND_COMPUTE | |||
| if (node_item.shape_inference_type == DEPEND_COMPUTE) { | |||
| if (graph_context_->trace_enabled) { | |||
| @@ -17,9 +17,15 @@ | |||
| #include "hybrid/executor/worker/shape_inference_engine.h" | |||
| #include "graph/shape_refiner.h" | |||
| #include "graph/utils/node_utils.h" | |||
| #include "graph/utils/tensor_utils.h" | |||
| #include "graph/utils/type_utils.h" | |||
| #include "common/math/math_util.h" | |||
| #include "hybrid/node_executor/node_executor.h" | |||
| namespace ge { | |||
| namespace { | |||
| const int kAlignment = 32; | |||
| } | |||
| namespace hybrid { | |||
| ShapeInferenceEngine::ShapeInferenceEngine(GraphExecutionContext *execution_context, SubgraphContext *subgraph_context) | |||
| : execution_context_(execution_context), | |||
| @@ -40,7 +46,9 @@ Status ShapeInferenceEngine::InferShape(NodeState &node_state) { | |||
| } | |||
| if (node_item.fused_subgraph != nullptr) { | |||
| return InferShapeForSubgraph(node_item, *node_item.fused_subgraph); | |||
| GE_CHK_STATUS_RET_NOLOG(InferShapeForSubgraph(node_item, *node_item.fused_subgraph)); | |||
| GE_CHK_STATUS_RET_NOLOG(CalcOutputTensorSizes(node_item)); | |||
| return SUCCESS; | |||
| } | |||
| // Skip shape inference for node of type DEPEND_COMPUTE | |||
| @@ -63,21 +71,15 @@ Status ShapeInferenceEngine::InferShape(NodeState &node_state) { | |||
| std::lock_guard<std::mutex> lk(mu_); | |||
| RECORD_SHAPE_INFERENCE_EVENT(execution_context_, node_item.NodeName().c_str(), "[InferShapeAndType] Start"); | |||
| GE_CHK_STATUS_RET(ShapeRefiner::InferShapeAndTypeForRunning(node_item.node, true), | |||
| "Invoke InferShapeAndType failed."); | |||
| "Invoke InferShapeAndType failed."); | |||
| RECORD_SHAPE_INFERENCE_EVENT(execution_context_, node_item.NodeName().c_str(), "[InferShapeAndType] End"); | |||
| } | |||
| // Check again to make sure shape is valid after shape inference | |||
| if (node_item.shape_inference_type != DEPEND_SHAPE_RANGE) { | |||
| bool is_unknown_shape = false; | |||
| GE_CHK_STATUS_RET(NodeUtils::GetNodeUnknownShapeStatus(*node_item.node, is_unknown_shape), | |||
| "Failed to get shape status. node = %s", | |||
| node_item.NodeName().c_str()); | |||
| GE_CHK_BOOL_RET_STATUS(!is_unknown_shape, | |||
| INTERNAL_ERROR, | |||
| "[%s] Shape is still unknown after shape inference.", | |||
| node_item.NodeName().c_str()); | |||
| } | |||
| // update output tensor sizes after shape inference | |||
| // error if shape is still unknown and not of type DEPEND_SHAPE_RANGE | |||
| RECORD_COMPILE_EVENT(execution_context_, node_item.NodeName().c_str(), "[CalcOpRunningParam] Start"); | |||
| GE_CHK_STATUS_RET_NOLOG(CalcOutputTensorSizes(node_item, node_item.shape_inference_type == DEPEND_SHAPE_RANGE)); | |||
| RECORD_COMPILE_EVENT(execution_context_, node_item.NodeName().c_str(), "[CalcOpRunningParam] End"); | |||
| GELOGD("[%s] [HybridTrace] After shape inference. Node = %s", | |||
| node_item.NodeName().c_str(), | |||
| @@ -127,8 +129,6 @@ Status ShapeInferenceEngine::PropagateOutputShapes(const NodeItem &node_item) { | |||
| // propagate each output | |||
| for (int i = 0; i < node_item.num_outputs; ++i) { | |||
| auto output_desc = node_item.op_desc->MutableOutputDesc(i); | |||
| const auto &shape = output_desc->MutableShape(); | |||
| const auto &ori_shape = output_desc->GetOriginShape(); | |||
| auto &output_nodes = node_item.outputs[i]; | |||
| // propagate output to all sub-inputs | |||
| @@ -149,9 +149,7 @@ Status ShapeInferenceEngine::PropagateOutputShapes(const NodeItem &node_item) { | |||
| infer_state.UpdateInputShapeFuture(dst_input_index_and_node.first, | |||
| std::move(future)); | |||
| } else { | |||
| GE_CHK_STATUS_RET_NOLOG(infer_state.UpdateInputShape(dst_input_index_and_node.first, | |||
| ori_shape, | |||
| shape)); | |||
| GE_CHK_STATUS_RET_NOLOG(infer_state.UpdateInputShape(dst_input_index_and_node.first, *output_desc)); | |||
| } | |||
| } | |||
| } | |||
| @@ -230,5 +228,92 @@ Status ShapeInferenceEngine::UpdatePeerNodeShape(const Node &node) { | |||
| } | |||
| return SUCCESS; | |||
| } | |||
| Status ShapeInferenceEngine::CanonicalizeShape(GeTensorDesc &tensor_desc, | |||
| std::vector<int64_t> &shape, | |||
| bool fallback_with_range) { | |||
| const auto &tensor_shape = tensor_desc.MutableShape(); | |||
| if (tensor_shape.IsUnknownShape()) { | |||
| if (!fallback_with_range) { | |||
| GELOGE(INTERNAL_ERROR, "Output shape is still unknown after shape inference. shape = [%s]", | |||
| tensor_shape.ToString().c_str()); | |||
| return INTERNAL_ERROR; | |||
| } | |||
| GELOGD("Calc output size by range"); | |||
| std::vector<std::pair<int64_t, int64_t>> shape_range; | |||
| GE_CHK_GRAPH_STATUS_RET(tensor_desc.GetShapeRange(shape_range), "Failed to get shape range"); | |||
| if (shape_range.size() != shape.size()) { | |||
| GELOGE(INTERNAL_ERROR, "Number of shape ranges (%zu) mismatches that of dims (%zu)", | |||
| shape_range.size(), | |||
| shape.size()); | |||
| return INTERNAL_ERROR; | |||
| } | |||
| for (size_t dim_index = 0; dim_index < shape.size(); ++dim_index) { | |||
| if (shape[dim_index] == ge::UNKNOWN_DIM) { | |||
| shape[dim_index] = shape_range[dim_index].second; | |||
| } | |||
| } | |||
| GELOGD("After canonicalization, shape = [%s], before = [%s]", | |||
| GeShape(shape).ToString().c_str(), | |||
| tensor_shape.ToString().c_str()); | |||
| } | |||
| return SUCCESS; | |||
| } | |||
| Status ShapeInferenceEngine::CalcTensorSize(DataType data_type, | |||
| const std::vector<int64_t> &shape, | |||
| int64_t &tensor_size) { | |||
| GELOGD("To calc tensor size by shape = [%s]", GeShape(shape).ToString().c_str()); | |||
| uint32_t type_size; | |||
| if (!TypeUtils::GetDataTypeLength(data_type, type_size)) { | |||
| GELOGE(INTERNAL_ERROR, "Failed to get data type size"); | |||
| return INTERNAL_ERROR; | |||
| } | |||
| tensor_size = type_size; | |||
| for (const auto &dim : shape) { | |||
| GE_CHECK_GE(dim, 0); | |||
| GE_CHK_STATUS_RET(Int64MulCheckOverflow(tensor_size, dim), | |||
| "Shape size overflow, shape = [%s]", | |||
| GeShape(shape).ToString().c_str()); | |||
| tensor_size *= dim; | |||
| } | |||
| GE_CHK_STATUS_RET(CheckInt64AddOverflow(tensor_size, kAlignment - 1), | |||
| "Tensor size is too large: %ld, shape = [%s]", | |||
| tensor_size, | |||
| GeShape(shape).ToString().c_str()); | |||
| tensor_size = (tensor_size + kAlignment - 1) / kAlignment * kAlignment; | |||
| return SUCCESS; | |||
| } | |||
| Status ShapeInferenceEngine::CalcOutputTensorSizes(const NodeItem &node_item, bool fallback_with_range) { | |||
| auto op_desc = node_item.GetOpDesc(); | |||
| for (size_t output_index = 0; output_index < op_desc->GetOutputsSize(); ++output_index) { | |||
| auto tensor_desc = op_desc->MutableOutputDesc(output_index); | |||
| GE_CHECK_NOTNULL(tensor_desc); | |||
| const auto &shape = tensor_desc->MutableShape(); | |||
| // modify on copy | |||
| auto dims = shape.GetDims(); | |||
| GE_CHK_STATUS_RET(CanonicalizeShape(*tensor_desc, dims, fallback_with_range), | |||
| "[%s] Failed to canonicalize shape for output %zu", | |||
| node_item.NodeName().c_str(), | |||
| output_index); | |||
| int64_t tensor_size; | |||
| GE_CHK_STATUS_RET(CalcTensorSize(tensor_desc->GetDataType(), dims, tensor_size), | |||
| "[%s] Failed to calc tensor size for output %zu", | |||
| node_item.NodeName().c_str(), | |||
| output_index); | |||
| GELOGD("[%s] Tensor size of output %zu = %ld", node_item.NodeName().c_str(), output_index, tensor_size); | |||
| (void) TensorUtils::SetSize(*tensor_desc, tensor_size); | |||
| } | |||
| return SUCCESS; | |||
| } | |||
| } // namespace hybrid | |||
| } // namespace ge | |||
| @@ -34,7 +34,11 @@ class ShapeInferenceEngine { | |||
| Status PropagateOutputShapes(const NodeItem &node_item); | |||
| static Status CalcOutputTensorSizes(const NodeItem &node_item, bool fallback_with_range = false); | |||
| private: | |||
| static Status CanonicalizeShape(GeTensorDesc &tensor_desc, std::vector<int64_t> &shape, bool fallback_with_range); | |||
| static Status CalcTensorSize(DataType data_type, const std::vector<int64_t> &shape, int64_t &tensor_size); | |||
| static Status UpdatePeerNodeShape(const Node &node); | |||
| Status AwaitDependentNodes(NodeState &node_state); | |||
| @@ -22,6 +22,7 @@ | |||
| #include "graph/debug/ge_attr_define.h" | |||
| #include "graph/utils/node_utils.h" | |||
| #include "hybrid/node_executor/node_executor.h" | |||
| #include "hybrid/executor/worker/shape_inference_engine.h" | |||
| namespace ge { | |||
| namespace hybrid { | |||
| @@ -47,7 +48,7 @@ Status ParseInputMapping(Node &node, OpDesc &op_desc, FusedSubgraph &fused_subgr | |||
| GE_CHECK_NOTNULL(dst_op_desc); | |||
| auto in_idx = node_and_anchor.second->GetIdx(); | |||
| auto tensor_desc = dst_op_desc->MutableInputDesc(in_idx); | |||
| fused_subgraph.input_mapping[parent_index].emplace_back(tensor_desc); | |||
| fused_subgraph.input_mapping[static_cast<int>(parent_index)].emplace_back(tensor_desc); | |||
| GELOGD("Input[%u] mapped to [%s:%u]", parent_index, dst_op_desc->GetName().c_str(), in_idx); | |||
| } | |||
| @@ -64,7 +65,7 @@ Status ParseOutputMapping(const OpDescPtr &op_desc, FusedSubgraph &fused_subgrap | |||
| return FAILED; | |||
| } | |||
| fused_subgraph.output_mapping.emplace(parent_index, op_desc); | |||
| fused_subgraph.output_mapping.emplace(static_cast<int>(parent_index), op_desc); | |||
| return SUCCESS; | |||
| } | |||
| @@ -126,12 +127,7 @@ Status NodeItem::Create(const NodePtr &node, std::unique_ptr<NodeItem> &node_ite | |||
| return SUCCESS; | |||
| } | |||
| Status NodeItem::Init() { | |||
| GE_CHECK_LE(op_desc->GetInputsSize(), INT32_MAX); | |||
| GE_CHECK_LE(op_desc->GetOutputsSize(), INT32_MAX); | |||
| num_inputs = static_cast<int>(op_desc->GetInputsSize()); | |||
| num_outputs = static_cast<int>(op_desc->GetOutputsSize()); | |||
| void NodeItem::ResolveOptionalInputs() { | |||
| if (op_desc->GetAllInputsSize() != op_desc->GetInputsSize()) { | |||
| has_optional_inputs = true; | |||
| for (size_t i = 0; i < op_desc->GetAllInputsSize(); ++i) { | |||
| @@ -143,7 +139,18 @@ Status NodeItem::Init() { | |||
| } | |||
| } | |||
| } | |||
| } | |||
| Status NodeItem::InitInputsAndOutputs() { | |||
| GE_CHECK_LE(op_desc->GetInputsSize(), INT32_MAX); | |||
| GE_CHECK_LE(op_desc->GetOutputsSize(), INT32_MAX); | |||
| num_inputs = static_cast<int>(op_desc->GetInputsSize()); | |||
| num_outputs = static_cast<int>(op_desc->GetOutputsSize()); | |||
| ResolveOptionalInputs(); | |||
| return SUCCESS; | |||
| } | |||
| Status NodeItem::ResolveDynamicState() { | |||
| (void) AttrUtils::GetBool(op_desc, ATTR_NAME_FORCE_UNKNOWN_SHAPE, is_dynamic); | |||
| GELOGD("node name = %s, is_dynamic = %d.", this->node_name.c_str(), is_dynamic); | |||
| if (!is_dynamic) { | |||
| @@ -151,38 +158,54 @@ Status NodeItem::Init() { | |||
| "[%s] Failed to get shape status.", | |||
| node->GetName().c_str()); | |||
| } | |||
| return SUCCESS; | |||
| } | |||
| if (is_dynamic) { | |||
| for (int i = 0; i < num_inputs; ++i) { | |||
| const auto &input_desc = MutableInputDesc(i); | |||
| GE_CHECK_NOTNULL(input_desc); | |||
| if (input_desc->MutableShape().IsUnknownShape()) { | |||
| is_input_shape_static_.push_back(false); | |||
| } else { | |||
| num_static_input_shapes++; | |||
| is_input_shape_static_.push_back(true); | |||
| GELOGD("[%s] The shape of input[%d] is static. shape = [%s]", | |||
| NodeName().c_str(), i, input_desc->MutableShape().ToString().c_str()); | |||
| } | |||
| Status NodeItem::ResolveStaticInputsAndOutputs() { | |||
| for (int i = 0; i < num_inputs; ++i) { | |||
| const auto &input_desc = MutableInputDesc(i); | |||
| GE_CHECK_NOTNULL(input_desc); | |||
| if (input_desc->MutableShape().IsUnknownShape()) { | |||
| is_input_shape_static_.push_back(false); | |||
| } else { | |||
| num_static_input_shapes++; | |||
| is_input_shape_static_.push_back(true); | |||
| GELOGD("[%s] The shape of input[%d] is static. shape = [%s]", | |||
| NodeName().c_str(), i, input_desc->MutableShape().ToString().c_str()); | |||
| } | |||
| } | |||
| for (int i = 0; i < num_outputs; ++i) { | |||
| const auto &output_desc = op_desc->MutableOutputDesc(i); | |||
| GE_CHECK_NOTNULL(output_desc); | |||
| if (output_desc->MutableShape().IsUnknownShape()) { | |||
| is_output_shape_static = false; | |||
| break; | |||
| } | |||
| for (int i = 0; i < num_outputs; ++i) { | |||
| const auto &output_desc = op_desc->MutableOutputDesc(i); | |||
| GE_CHECK_NOTNULL(output_desc); | |||
| if (output_desc->MutableShape().IsUnknownShape()) { | |||
| is_output_shape_static = false; | |||
| break; | |||
| } | |||
| } | |||
| if (IsControlOp() || node_type == PARTITIONEDCALL) { | |||
| shape_inference_type = DEPEND_COMPUTE; | |||
| } else { | |||
| int32_t unknown_shape_type_val = 0; | |||
| (void) AttrUtils::GetInt(op_desc, ::ge::ATTR_NAME_UNKNOWN_SHAPE_TYPE, unknown_shape_type_val); | |||
| shape_inference_type = static_cast<UnknowShapeOpType>(unknown_shape_type_val); | |||
| } | |||
| if (is_output_shape_static) { | |||
| GE_CHK_STATUS_RET_NOLOG(ShapeInferenceEngine::CalcOutputTensorSizes(*this)); | |||
| } | |||
| return SUCCESS; | |||
| } | |||
| void NodeItem::ResolveUnknownShapeType() { | |||
| if (IsControlOp() || node_type == PARTITIONEDCALL) { | |||
| shape_inference_type = DEPEND_COMPUTE; | |||
| } else { | |||
| int32_t unknown_shape_type_val = 0; | |||
| (void) AttrUtils::GetInt(op_desc, ::ge::ATTR_NAME_UNKNOWN_SHAPE_TYPE, unknown_shape_type_val); | |||
| shape_inference_type = static_cast<UnknowShapeOpType>(unknown_shape_type_val); | |||
| } | |||
| } | |||
| Status NodeItem::Init() { | |||
| GE_CHK_STATUS_RET_NOLOG(InitInputsAndOutputs()); | |||
| GE_CHK_STATUS_RET_NOLOG(ResolveDynamicState()); | |||
| if (is_dynamic) { | |||
| ResolveUnknownShapeType(); | |||
| GE_CHK_STATUS_RET_NOLOG(ResolveStaticInputsAndOutputs()); | |||
| GE_CHK_STATUS_RET(ParseFusedSubgraph(*this), "[%s] Failed to parse fused subgraph", node_name.c_str()); | |||
| } | |||
| @@ -103,6 +103,11 @@ struct NodeItem { | |||
| private: | |||
| explicit NodeItem(NodePtr node); | |||
| Status Init(); | |||
| Status InitInputsAndOutputs(); | |||
| void ResolveOptionalInputs(); | |||
| Status ResolveDynamicState(); | |||
| Status ResolveStaticInputsAndOutputs(); | |||
| void ResolveUnknownShapeType(); | |||
| std::vector<bool> is_input_shape_static_; | |||
| std::vector<uint32_t> input_desc_indices_; | |||
| @@ -148,6 +148,10 @@ Status TaskContext::AllocateWorkspaces() { | |||
| } | |||
| Status TaskContext::RegisterCallback(const std::function<void()> &callback_fun) const { | |||
| if (callback_fun == nullptr) { | |||
| GELOGW("[%s] Callback is NULL", GetNodeName()); | |||
| return SUCCESS; | |||
| } | |||
| auto ret = execution_context_->callback_manager->RegisterCallback(callback_fun); | |||
| if (ret != SUCCESS) { | |||
| GELOGE(ret, "[%s] Failed to register callback", GetNodeName()); | |||
| @@ -384,6 +388,20 @@ const char *TaskContext::GetNodeName() const { | |||
| return node_item_->NodeName().c_str(); | |||
| } | |||
| void TaskContext::ReleaseInputsAndOutputs() { | |||
| for (int i = 0; i < node_item_->num_inputs; ++i) { | |||
| auto tensor = inputs_start_ + i; | |||
| tensor->Destroy(); | |||
| GELOGD("[%s] Tensor of input[%d] released", GetNodeName(), i); | |||
| } | |||
| for (int i = 0; i < node_item_->num_outputs; ++i) { | |||
| auto tensor = outputs_start_ + i; | |||
| tensor->Destroy(); | |||
| GELOGD("[%s] Tensor of output[%d] released", GetNodeName(), i); | |||
| } | |||
| } | |||
| void TaskContext::ReleaseInput(int index) { | |||
| auto input_tensor = MutableInput(index); | |||
| if (input_tensor != nullptr) { | |||
| @@ -456,5 +474,9 @@ Status TaskContext::TryExecuteCallback(const function<void()> &callback_fun) con | |||
| const DumpProperties &TaskContext::GetDumpProperties() const { | |||
| return execution_context_->dump_properties; | |||
| } | |||
| bool TaskContext::NeedCallback() { | |||
| return node_item_->has_observer || IsDumpEnabled() || execution_context_->profiling_level > 0; | |||
| } | |||
| } // namespace hybrid | |||
| } // namespace ge | |||
| @@ -50,6 +50,8 @@ class TaskContext { | |||
| ConstGeTensorDescPtr GetOutputDesc(int index) const; | |||
| GeTensorDescPtr MutableInputDesc(int index) const; | |||
| GeTensorDescPtr MutableOutputDesc(int index) const; | |||
| void ReleaseInputsAndOutputs(); | |||
| bool NeedCallback(); | |||
| void ReleaseInput(int index); | |||
| const TensorValue *GetInput(int index) const; | |||
| const TensorValue *GetOutput(int index) const; | |||
| @@ -227,7 +227,6 @@ class Impl { | |||
| ~Impl() { (void)generator_.Finalize(); }; | |||
| graphStatus CheckOptions(const std::map<std::string, std::string> &options); | |||
| graphStatus CreateInputsForIRBuild(const ge::Graph &graph, vector<ge::GeTensor> &inputs); | |||
| graphStatus GetDefaultInputShape(const Graph &graph, string &default_shape); | |||
| graphStatus UpdateDataOpAttr(const Graph &graph); | |||
| graphStatus Init(const Graph &graph, const std::map<std::string, std::string> &options); | |||
| graphStatus BuildModel(const Graph &graph, const std::map<std::string, std::string> &options, | |||
| @@ -321,42 +320,6 @@ graphStatus Impl::CheckOptions(const std::map<std::string, std::string> &options | |||
| return GRAPH_SUCCESS; | |||
| } | |||
| graphStatus Impl::GetDefaultInputShape(const Graph &graph, string &default_shape) { | |||
| auto compute_graph = ge::GraphUtils::GetComputeGraph(graph); | |||
| GE_CHECK_NOTNULL(compute_graph); | |||
| for (ge::NodePtr &input_node : compute_graph->GetDirectNode()) { | |||
| GE_CHECK_NOTNULL(input_node); | |||
| ge::OpDescPtr op = input_node->GetOpDesc(); | |||
| GE_CHECK_NOTNULL(op); | |||
| if (op->GetType() == DATA) { | |||
| string data_op_name = op->GetName(); | |||
| GELOGD("Data op name: %s, data op inputDesc size: %zu", data_op_name.c_str(), op->GetAllInputsDesc().size()); | |||
| ge::GeTensorDesc tensor = op->GetInputDesc(0); | |||
| ge::GeShape data_shape = tensor.GetShape(); | |||
| GELOGD("Data op get shape from InputDesc in ge ir graph."); | |||
| string tmp_shape_str; | |||
| const std::vector<int64_t> &tmp_shape = data_shape.GetDims(); | |||
| if (tmp_shape.empty()) { | |||
| GELOGW("Data op: %s has zero shape dims!", data_op_name.c_str()); | |||
| } else { | |||
| tmp_shape_str += data_op_name + ":"; | |||
| for (auto tmp_dim : tmp_shape) { | |||
| tmp_shape_str += to_string((long)tmp_dim) + ","; | |||
| } | |||
| tmp_shape_str = tmp_shape_str.substr(0, tmp_shape_str.size() - 1); | |||
| tmp_shape_str += ";"; | |||
| default_shape += tmp_shape_str; | |||
| } | |||
| GELOGD("Data op name: %s, data shape: %s.", data_op_name.c_str(), tmp_shape_str.c_str()); | |||
| } | |||
| } | |||
| default_shape = (default_shape.empty() ? default_shape : default_shape.substr(0, default_shape.size() - 1)); | |||
| GELOGI("Get default data op shape: %s from ge ir graph.", default_shape.c_str()); | |||
| return GRAPH_SUCCESS; | |||
| } | |||
| graphStatus Impl::Init(const Graph &graph, const std::map<std::string, std::string> &options) { | |||
| // 1. check options | |||
| graphStatus ret = CheckOptions(options); | |||
| @@ -378,13 +341,7 @@ graphStatus Impl::Init(const Graph &graph, const std::map<std::string, std::stri | |||
| GE_CHK_BOOL_RET_STATUS_NOLOG(ge::CheckLogParamValidAndSetLogLevel(log) == 0, GRAPH_PARAM_INVALID); | |||
| options_[ge::ir_option::LOG_LEVEL] = log; | |||
| string input_shape; | |||
| if (options_.find("input_shape") == options_.end()) { | |||
| GE_CHK_BOOL_EXEC(GetDefaultInputShape(graph, input_shape) == ge::SUCCESS, | |||
| return ge::GRAPH_PARAM_INVALID, "Get default data op shape from graph failed!"); | |||
| } else { | |||
| input_shape = options_["input_shape"]; | |||
| } | |||
| string input_shape = options_.find("input_shape") == options_.end() ? "" : options_["input_shape"]; | |||
| string input_format = options_.find("input_format") == options_.end() ? "" : options_["input_format"]; | |||
| string net_format = options_.find("net_format") == options_.end() ? "" : options_["net_format"]; | |||
| string dynamic_batch_size = options_.find(ge::ir_option::DYNAMIC_BATCH_SIZE) == options_.end() | |||
| @@ -36,7 +36,7 @@ using Status = domi::Status; | |||
| namespace domi { | |||
| using GetGraphCallback = std::function<std::unique_ptr<google::protobuf::Message>( | |||
| const google::protobuf::Message *root_proto, const std::string &graph)>; | |||
| const google::protobuf::Message *root_proto, const std::string &graph)>; | |||
| class ModelParser { | |||
| public: | |||
| ModelParser() {} | |||
| @@ -44,19 +44,20 @@ class ModelParser { | |||
| virtual ~ModelParser() {} | |||
| /** | |||
| * @ingroup domi_omg | |||
| * @brief Analyze network model data | |||
| * @param [in] file Network model file path | |||
| * @param [in|out] graph Save the network information after analysis | |||
| * @return SUCCESS | |||
| * @return Others failed | |||
| */ | |||
| * @ingroup domi_omg | |||
| * @brief Analyze network model data | |||
| * @param [in] file Network model file path | |||
| * @param [in|out] graph Save the network information after analysis | |||
| * @return SUCCESS | |||
| * @return Others failed | |||
| */ | |||
| virtual Status Parse(const char *file, ge::Graph &graph) = 0; | |||
| /** | |||
| * @ingroup domi_omg | |||
| * @brief Parse relevant data from memory and save it to graph | |||
| * @param [in] input Model file memory data | |||
| * @param [in] input Model file memory size | |||
| * @param [in|out] graph A graph for saving the model information after analysis | |||
| * @return SUCCESS | |||
| * @return FAILED | |||
| @@ -64,36 +65,49 @@ class ModelParser { | |||
| */ | |||
| virtual Status ParseFromMemory(const char *data, uint32_t size, ge::ComputeGraphPtr &graph) = 0; | |||
| #ifndef ONLY_COMPILE_OPEN_SRC | |||
| /** | |||
| * @ingroup domi_omg | |||
| * @brief Parse relevant data from memory and save it to graph | |||
| * @param [in] input Model file memory data | |||
| * @param [in] input Model file memory size | |||
| * @param [in|out] graph A graph for saving the model information after analysis | |||
| * @return SUCCESS | |||
| * @return FAILED | |||
| * @author | |||
| */ | |||
| virtual Status ParseFromMemory(const char *data, uint32_t size, ge::Graph &graph) = 0; | |||
| #endif | |||
| /** | |||
| * @ingroup domi_omg | |||
| * @brief Analyze network model data | |||
| * @param [in] proto network model | |||
| * @param [in|out] graph Save the network information after analysis | |||
| * @return SUCCESS | |||
| * @return Others failed | |||
| */ | |||
| * @ingroup domi_omg | |||
| * @brief Analyze network model data | |||
| * @param [in] proto network model | |||
| * @param [in|out] graph Save the network information after analysis | |||
| * @return SUCCESS | |||
| * @return Others failed | |||
| */ | |||
| virtual Status ParseProto(const google::protobuf::Message *proto, ge::ComputeGraphPtr &graph) = 0; | |||
| /** | |||
| * @ingroup domi_omg | |||
| * @brief Analyze callback model data in subgraph | |||
| * @param [in] proto network model | |||
| * @param [in] callback callback of subgraph | |||
| * @param [in|out] graph Save the network information after analysis | |||
| * @return SUCCESS | |||
| * @return Others failed | |||
| */ | |||
| virtual Status ParseProtoWithSubgraph(const google::protobuf::Message *proto, | |||
| GetGraphCallback callback, | |||
| * @ingroup domi_omg | |||
| * @brief Analyze callback model data in subgraph | |||
| * @param [in] proto network model | |||
| * @param [in] callback callback of subgraph | |||
| * @param [in|out] graph Save the network information after analysis | |||
| * @return SUCCESS | |||
| * @return Others failed | |||
| */ | |||
| virtual Status ParseProtoWithSubgraph(const google::protobuf::Message *proto, GetGraphCallback callback, | |||
| ge::ComputeGraphPtr &graph) = 0; | |||
| /** | |||
| * @ingroup domi_omg | |||
| * @brief Convert model files to JSON format | |||
| * @param [in] model_file Model file path to be converted | |||
| * @param [out] json_file Converted JSON file path | |||
| * @return SUCCESS | |||
| * @return Others failed | |||
| */ | |||
| * @ingroup domi_omg | |||
| * @brief Convert model files to JSON format | |||
| * @param [in] model_file Model file path to be converted | |||
| * @param [out] json_file Converted JSON file path | |||
| * @return SUCCESS | |||
| * @return Others failed | |||
| */ | |||
| virtual Status ToJson(const char *model_file, const char *json_file) { return domi::SUCCESS; } | |||
| /* | |||
| @@ -1 +1 @@ | |||
| Subproject commit dba83744a3ffe3d5f89496e69bb65c50f800c299 | |||
| Subproject commit 129b50b41f79d0dfeb9fe8987b1c19c9ac51eb8b | |||
| @@ -1 +1 @@ | |||
| Subproject commit ce574894f13cd94749d1a3964a13e8c97c20434a | |||
| Subproject commit e9f7d0197aba57eb5247cb1e029c10e393631c89 | |||
| @@ -0,0 +1,60 @@ | |||
| /** | |||
| * Copyright 2019-2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef AICPU_OP_TYPE_LIST_H_ | |||
| #define AICPU_OP_TYPE_LIST_H_ | |||
| enum OpKernelType { | |||
| TF_KERNEL, | |||
| CPU_KERNEL | |||
| }; | |||
| enum ReturnCode { | |||
| OP_TYPE_NOT_SUPPORT, | |||
| FORMAT_NOT_SUPPORT, | |||
| DTYPE_NOT_SUPPORT | |||
| }; | |||
| #pragma pack(push, 1) | |||
| //One byte alignment | |||
| struct SysOpInfo { | |||
| uint64_t opLen; | |||
| uint64_t opType; | |||
| OpKernelType kernelsType; | |||
| }; | |||
| struct OpParamInfo { | |||
| uint64_t num; | |||
| uint64_t dtypeList; | |||
| uint64_t formatList; | |||
| }; | |||
| struct SysOpCheckInfo { | |||
| uint64_t opListNum; | |||
| uint64_t offSetLen; | |||
| uint64_t sysOpInfoList; | |||
| uint64_t opParamInfoList; | |||
| }; | |||
| struct SysOpCheckResp { | |||
| uint64_t opListNum; | |||
| bool isWithoutJson; | |||
| uint64_t returnCodeList; | |||
| uint64_t sysOpInfoList; | |||
| uint64_t opParamInfoList; | |||
| }; | |||
| #pragma pack(pop) | |||
| #endif // AICPU_OP_TYPE_LIST_H_ | |||
| @@ -21,13 +21,15 @@ | |||
| namespace aicpu { | |||
| #pragma pack(push, 1) | |||
| struct AicpuParamHead | |||
| { | |||
| uint32_t length; // Total length: include cunstom message | |||
| uint32_t ioAddrNum; // Input and output address number | |||
| uint32_t extInfoLength; // extInfo struct Length | |||
| uint64_t extInfoAddr; // extInfo address | |||
| } __attribute__ ((packed)); | |||
| }; | |||
| #pragma pack(pop) | |||
| } // namespace aicpu | |||
| @@ -13,10 +13,11 @@ | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef AICPU_ENGINE_H__ | |||
| #define AICPU_ENGINE_H__ | |||
| #include <stdint.h> | |||
| #ifdef __cplusplus | |||
| extern "C" { | |||
| #endif | |||
| @@ -36,12 +37,23 @@ typedef enum { | |||
| /** | |||
| * @ingroup aicpu engine | |||
| * @brief aeCallInterface: | |||
| * a interface to call a function in a op kernfel lib | |||
| * a interface to call a function in a op kernfel lib | |||
| * @param [in] addr void *, should be STR_KERNEL * format | |||
| * @return aeStatus_t | |||
| */ | |||
| aeStatus_t aeCallInterface(void *addr); | |||
| /** | |||
| * @ingroup aicpu engine | |||
| * @brief aeBatchLoadKernelSo: | |||
| * a interface to load kernel so | |||
| * @param [in] loadSoNum load so number | |||
| * @param [in] soPaths load so paths | |||
| * @param [in] soNames load so names | |||
| * @return aeStatus_t | |||
| */ | |||
| aeStatus_t aeBatchLoadKernelSo(const uint32_t loadSoNum, const char *soPaths[], const char *soNames[]); | |||
| #ifdef __cplusplus | |||
| } | |||
| #endif | |||
| @@ -33,18 +33,22 @@ typedef enum { | |||
| FMK_KERNEL_TYPE_RESERVED | |||
| } FwkkernelType_t; | |||
| #pragma pack(push, 1) | |||
| typedef struct { | |||
| uint32_t fwkKernelType; // FwkkernelType_t | |||
| union { | |||
| ::aicpu::FWKAdapter::FWKOperateParam fwk_kernel; | |||
| } fwkKernelBase; | |||
| } __attribute__((packed)) STR_FWK_OP_KERNEL; | |||
| } STR_FWK_OP_KERNEL; | |||
| #pragma pack(pop) | |||
| #pragma pack(push, 1) | |||
| struct SessionInfo { | |||
| uint64_t sessionId; | |||
| uint64_t kernelId; | |||
| bool sessFlag; | |||
| } __attribute__((packed)); | |||
| }; | |||
| #pragma pack(pop) | |||
| #ifdef __cplusplus | |||
| } | |||
| @@ -70,6 +70,7 @@ enum FWKExtUpdateAddrType { | |||
| FWK_ADPT_UPDATE_INPUT_OUTPUT | |||
| }; | |||
| #pragma pack(push, 1) | |||
| // API Parameter Structure | |||
| struct StrFWKKernel { | |||
| FWKOperateType opType; | |||
| @@ -89,31 +90,39 @@ struct StrFWKKernel { | |||
| uint64_t extInfoLen; // extend info total length | |||
| uint64_t extInfoAddr; // extend info addr, ExtInfo structure | |||
| } __attribute__((packed)); | |||
| }; | |||
| #pragma pack(pop) | |||
| typedef StrFWKKernel FWKOperateParam; | |||
| // Extent info ShapeAndType | |||
| const uint32_t kMaxShapeDims = 8; | |||
| #pragma pack(push, 1) | |||
| struct ShapeAndType { | |||
| int32_t type; | |||
| int64_t dims[kMaxShapeDims]; | |||
| } __attribute__((packed)); | |||
| }; | |||
| #pragma pack(pop) | |||
| // Extend info structure for extInfoAddr | |||
| const uint32_t kExtInfoHeadSize = 8; | |||
| #pragma pack(push, 1) | |||
| struct ExtInfo { | |||
| int32_t infoType; // extend type | |||
| uint32_t infoLen; // length for infoMsg | |||
| char infoMsg[0]; // extend value | |||
| } __attribute__((packed)); | |||
| }; | |||
| #pragma pack(pop) | |||
| #pragma pack(push, 1) | |||
| struct ResultSummary { | |||
| uint64_t shape_data_ptr; // shape data addr, need convert to void* | |||
| uint64_t shape_data_size; // num of dims | |||
| uint64_t raw_data_ptr; // raw data addr, need convert to void* | |||
| uint64_t raw_data_size; // size of raw data | |||
| } __attribute__((packed)); | |||
| }; | |||
| #pragma pack(pop) | |||
| } // end namespace FWKAdapter | |||
| } // namespace aicpu | |||
| @@ -22,7 +22,8 @@ | |||
| #ifndef HCCL_BASE_H_ | |||
| #define HCCL_BASE_H_ | |||
| #include <hccl/hccl_types.h> | |||
| #include <string> | |||
| #ifdef __cplusplus | |||
| extern "C" { | |||
| #endif // __cplusplus | |||
| @@ -95,6 +96,33 @@ typedef void *rtStream_t; | |||
| */ | |||
| typedef void *rtModel_t; | |||
| struct HcomOperation { | |||
| std::string hcclType; | |||
| void *inputPtr; | |||
| void *outputPtr; | |||
| u64 count; | |||
| HcclDataType dataType; | |||
| HcclReduceOp opType; | |||
| u32 root; | |||
| HcomOperation() | |||
| { | |||
| inputPtr = nullptr; | |||
| outputPtr = nullptr; | |||
| count = 0; | |||
| dataType = HCCL_DATA_TYPE_RESERVED; | |||
| opType = HCCL_REDUCE_RESERVED; | |||
| root = 0; | |||
| } | |||
| }; | |||
| struct HcomRemoteAccessAddrInfo { | |||
| u32 remotetRankID; | |||
| u64 remoteAddr; // host embedding table address | |||
| u64 localAddr; // device HBM address | |||
| u64 length; // Memory Length in Bytes | |||
| }; | |||
| #ifdef __cplusplus | |||
| } | |||
| #endif // __cplusplus | |||
| @@ -24,145 +24,96 @@ | |||
| #include <hccl/base.h> | |||
| #include <hccl/hccl_types.h> | |||
| #include <functional> | |||
| #include <vector> | |||
| #ifdef __cplusplus | |||
| extern "C" { | |||
| #endif // __cplusplus | |||
| /** | |||
| * @brief Initialize HCOM. | |||
| * | |||
| * @param rank_table A string identifying the rank table file path, include file name. | |||
| * @param identify A string identifying the identify for the rank. | |||
| * @return HcclResult | |||
| * @see hcom_destroy() | |||
| */ | |||
| extern HcclResult hcom_init(const char *rank_table, const char *identify); | |||
| /** | |||
| * @brief Destroy HCOM | |||
| * | |||
| * @return HcclResult | |||
| * @see hcom_init() | |||
| */ | |||
| extern HcclResult hcom_destroy(void); | |||
| /** | |||
| * @brief Bind the model. | |||
| * | |||
| * @param model A pointer identifying the model information. | |||
| * @param stream A pointer identifying the stream information. | |||
| * @return HcclResult | |||
| * @see hcom_unbind_model() | |||
| */ | |||
| extern HcclResult hcom_bind_model(rtModel_t model, rtStream_t stream); | |||
| /** | |||
| * @brief Unbind the model. | |||
| * @brief Get the rank number in the group. | |||
| * | |||
| * @param model An pointer identifying the model information. | |||
| * @return HcclResult | |||
| * @see hcom_unbind_model() | |||
| * @param group A string identifying the group name. | |||
| * @param rankSize A pointer identifying the rank number. | |||
| * @return HcclResult | |||
| */ | |||
| extern HcclResult hcom_unbind_model(rtModel_t model); | |||
| HcclResult hcom_get_rank_size(const char *group, u32 *rankSize); | |||
| /** | |||
| * @brief All-gather operator. | |||
| * @brief Get the rank number in the group. | |||
| * | |||
| * @param tag A string identifying the tag of the operator. | |||
| * @param inputPtr A pointer identifying the input data address of the operator. | |||
| * @param outputPtr A pointer identifying the output data address of the operator. | |||
| * @param inputCount An integer(u64) identifying the number of the input data. | |||
| * @param dataType The data type of the operator, must be one of the following types: int8, int32, float16, float32. | |||
| * @param group A string identifying the group name of ranks participating in the operator. | |||
| * @param stream A pointer identifying the stream information. | |||
| * @param group A string identifying the group name. | |||
| * @param rankSize A pointer identifying the rank number. | |||
| * @return HcclResult | |||
| */ | |||
| extern HcclResult hcom_all_gather(const char *tag, void *inputPtr, void *outputPtr, u64 inputCount, | |||
| HcclDataType dataType, const char *group, rtStream_t stream); | |||
| HcclResult HcomGetRankSize(const char *group, u32 *rankSize); | |||
| /** | |||
| * @brief All-reduce operator. | |||
| * @brief Get the rank number of this rank's server within the group. | |||
| * | |||
| * @param tag A string identifying the tag of the operator. | |||
| * @param inputPtr A pointer identifying the input data address of the operator. | |||
| * @param outputPtr A pointer identifying the output data address of the operator. | |||
| * @param count An integer(u64) identifying the number of the output data. | |||
| * @param dataType The data type of the operator, must be one of the following types: int8, int32, float16, float32. | |||
| * @param op The reduction type of the operator, must be one of the following types: sum, min, max, prod. | |||
| * @param group A string identifying the group name of ranks participating in the operator. | |||
| * @param stream A pointer identifying the stream information. | |||
| * @param group A string identifying the group name. | |||
| * @param localRankSize A pointer identifying the rank number. | |||
| * @return HcclResult | |||
| */ | |||
| extern HcclResult hcom_all_reduce(const char *tag, void *inputPtr, void *outputPtr, u64 count, | |||
| HcclDataType dataType, HcclReduceOp op, const char *group, rtStream_t stream); | |||
| HcclResult hcom_get_local_rank_size(const char *group, u32 *localRankSize); | |||
| /** | |||
| * @brief Broadcast operator. | |||
| * @brief Get the rank number of this rank's server within the group. | |||
| * | |||
| * @param tag A string identifying the tag of the operator. | |||
| * @param ptr A pointer identifying the data address of the operator. | |||
| * @param count An integer(u64) identifying the number of the data. | |||
| * @param dataType The data type of the operator, must be one of the following types: int8, int32, float16, float32. | |||
| * @param root An integer(u32) identifying the the root rank in the operator. | |||
| * @param group A string identifying the group name of ranks participating in the operator. | |||
| * @param stream A pointer identifying the stream information. | |||
| * @param group A string identifying the group name. | |||
| * @param localRankSize A pointer identifying the rank number. | |||
| * @return HcclResult | |||
| */ | |||
| extern HcclResult hcom_broadcast(const char *tag, void *ptr, u64 count, HcclDataType dataType, u32 root, | |||
| const char *group, rtStream_t stream); | |||
| HcclResult HcomGetLocalRankSize(const char *group, u32 *localRankSize); | |||
| /** | |||
| * @brief Reduce-scatter operator. | |||
| * @brief Get the rank id of this rank. | |||
| * | |||
| * @param tag A string identifying the tag of the operator. | |||
| * @param inputPtr A pointer identifying the input data address of the operator. | |||
| * @param outputPtr A pointer identifying the output data address of the operator. | |||
| * @param count An integer(u64) identifying the number of the data. | |||
| * @param dataType The data type of the operator, must be one of the following types: int8, int32, float16, float32. | |||
| * @param op The reduction type of the operator, must be one of the following types: sum, min, max, prod. | |||
| * @param group A string identifying the group name of ranks participating in the operator. | |||
| * @param stream A pointer identifying the stream information. | |||
| * @param group A string identifying the group name. | |||
| * @param rankId A pointer identifying the rank id. | |||
| * @return HcclResult | |||
| */ | |||
| extern HcclResult hcom_reduce_scatter(const char *tag, void *inputPtr, void *outputPtr, u64 count, | |||
| HcclDataType dataType, HcclReduceOp op, const char *group, rtStream_t stream); | |||
| HcclResult hcom_get_rank_id(const char *group, u32 *rankId); | |||
| /** | |||
| * @brief Get the rank number in the group. | |||
| * @brief Get the rank id of this rank. | |||
| * | |||
| * @param group A string identifying the group name. | |||
| * @param rankSize A pointer identifying the rank number. | |||
| * @param rankId A pointer identifying the rank id. | |||
| * @return HcclResult | |||
| */ | |||
| HcclResult hcom_get_rank_size(const char *group, u32 *rankSize); | |||
| HcclResult HcomGetRankId(const char *group, u32 *rankId); | |||
| /** | |||
| * @brief Get the rank number of this rank's server within the group. | |||
| * @brief Get the local rank id of this rank's server within the group. | |||
| * | |||
| * @param group A string identifying the group name. | |||
| * @param localRankSize A pointer identifying the rank number. | |||
| * @param localRankId A pointer identifying the local rank id. | |||
| * @return HcclResult | |||
| */ | |||
| HcclResult hcom_get_local_rank_size(const char *group, u32 *localRankSize); | |||
| HcclResult hcom_get_local_rank_id(const char *group, u32 *localRankId); | |||
| /** | |||
| * @brief Get the rank id of this rank. | |||
| * @brief Get the local rank id of this rank's server within the group. | |||
| * | |||
| * @param group A string identifying the group name. | |||
| * @param rankId A pointer identifying the rank id. | |||
| * @param localRankId A pointer identifying the local rank id. | |||
| * @return HcclResult | |||
| */ | |||
| HcclResult hcom_get_rank_id(const char *group, u32 *rankId); | |||
| HcclResult HcomGetLocalRankId(const char *group, u32 *localRankId); | |||
| /** | |||
| * @brief Get the local rank id of this rank's server within the group. | |||
| * @brief Get the world rank id according to the group rank id. | |||
| * | |||
| * @param group A string identifying the group name. | |||
| * @param localRankId A pointer identifying the local rank id. | |||
| * @param groupRank An integer(u32) identifying the group rank id. | |||
| * @param worldRank A pointer identifying the world rank id. | |||
| * @return HcclResult | |||
| */ | |||
| HcclResult hcom_get_local_rank_id(const char *group, u32 *localRankId); | |||
| HcclResult hcom_get_world_rank_from_group_rank(const char *group, u32 groupRank, u32 *worldRank); | |||
| /** | |||
| * @brief Get the world rank id according to the group rank id. | |||
| @@ -172,7 +123,7 @@ HcclResult hcom_get_local_rank_id(const char *group, u32 *localRankId); | |||
| * @param worldRank A pointer identifying the world rank id. | |||
| * @return HcclResult | |||
| */ | |||
| HcclResult hcom_get_world_rank_from_group_rank(const char *group, u32 groupRank, u32 *worldRank); | |||
| HcclResult HcomGetWorldRankFromGroupRank(const char *group, u32 groupRank, u32 *worldRank); | |||
| /** | |||
| * @brief Get the group rank id according to the world rank id. | |||
| @@ -184,6 +135,16 @@ HcclResult hcom_get_world_rank_from_group_rank(const char *group, u32 groupRank, | |||
| */ | |||
| HcclResult hcom_get_group_rank_from_world_rank(u32 worldRank, const char *group, u32 *groupRank); | |||
| /** | |||
| * @brief Get the group rank id according to the world rank id. | |||
| * | |||
| * @param worldRank An integer(u32) identifying the world rank id. | |||
| * @param group A string identifying the group name. | |||
| * @param groupRank A pointer identifying the group rank id. | |||
| * @return HcclResult | |||
| */ | |||
| HcclResult HcomGetGroupRankFromWorldRank(u32 worldRank, const char *group, u32 *groupRank); | |||
| /** | |||
| * @brief Create group. | |||
| * | |||
| @@ -195,60 +156,40 @@ HcclResult hcom_get_group_rank_from_world_rank(u32 worldRank, const char *group, | |||
| HcclResult hcom_create_group(const char *group, u32 rankNum, u32 *rankIds); | |||
| /** | |||
| * @brief Destroy group | |||
| * @brief Create group. | |||
| * | |||
| * @param group A string identifying the group name. | |||
| * @param rankNum An integer(u32) identifying the number of ranks in the group. | |||
| * @param rankIds A list identifying the ranks in the group. | |||
| * @return HcclResult | |||
| */ | |||
| HcclResult hcom_destroy_group(const char *group); | |||
| HcclResult HcomCreateGroup(const char *group, u32 rankNum, u32 *rankIds); | |||
| /** | |||
| * @brief Send operator. | |||
| * @brief Destroy group | |||
| * | |||
| * @param tag A string identifying the tag of the operator. | |||
| * @param inputPtr A pointer identifying the input data address of the operator. | |||
| * @param count An integer(u64) identifying the number of the data. | |||
| * @param dataType The data type of the operator, must be one of the following types: int8, int32, float16, float32. | |||
| * @param destRank An integer identifying the destination rank. | |||
| * @param srTag An integer identifying the send/recv message tag. | |||
| * The message will be send by the receive operator with the same "sr_tag". | |||
| * @param group A string identifying the group name of ranks participating in the operator. | |||
| * @param stream A pointer identifying the stream information. | |||
| * @param group A string identifying the group name. | |||
| * @return HcclResult | |||
| */ | |||
| HcclResult hcom_send(const char *tag, void *inputPtr, u64 count, HcclDataType dataType, | |||
| u32 destRank, u32 srTag, const char *group, rtStream_t stream); | |||
| HcclResult hcom_destroy_group(const char *group); | |||
| /** | |||
| * @brief Receive operator. | |||
| * @brief Destroy group | |||
| * | |||
| * @param tag A string identifying the tag of the operator. | |||
| * @param outputPtr A pointer identifying the output data address of the operator. | |||
| * @param count An integer(u64) identifying the number of the data. | |||
| * @param dataType The data type of the operator, must be one of the following types: int8, int32, float16, float32. | |||
| * @param srcRank An integer identifying the source rank. | |||
| * @param srTag An integer identifying the send/recv message tag. | |||
| * The message will be send by the send operator with the same "sr_tag". | |||
| * @param group A string identifying the group name of ranks participating in the operator. | |||
| * @param stream A pointer identifying the stream information. | |||
| * @param group A string identifying the group name. | |||
| * @return HcclResult | |||
| */ | |||
| HcclResult hcom_receive(const char *tag, void *outputPtr, u64 count, HcclDataType dataType, | |||
| u32 srcRank, u32 srTag, const char *group, rtStream_t stream); | |||
| HcclResult HcomDestroyGroup(const char *group); | |||
| /** | |||
| * @brief Get the gradient split strategy with in the group. | |||
| * @brief Set the gradient split strategy with in the group, according to gradient index. | |||
| * | |||
| * @param group A string identifying the group name. | |||
| * @param feature A pointer identifying the feature of the model. | |||
| * @param maxSegmentNum An integer(u32) identifying the max segments of gradients. | |||
| * @param segmentNum A pointer identifying the segments number of gradients. | |||
| * @param segmentIdx A list identifying the index of end gradient in each segment. | |||
| * @return HcclResult | |||
| * @param segmentNum An integer(u32) identifying the segments number of gradients. | |||
| * @param IdxList A list identifying the index of end gradient in each segment. | |||
| * @return HcclResult | |||
| */ | |||
| HcclResult hcom_get_split_strategy(const char *group, const struct model_feature *feature, u32 maxSegmentNum, | |||
| u32 *segmentNum, u32 *segmentIdx, GradSplitForceMode force = FORCE_NONE, | |||
| OriginalGraphShapeType shapeType = KNOWN_SHAPE); | |||
| extern HcclResult hcom_set_split_strategy_by_index(const char *group, u32 segmentNum, const u32 *IdxList); | |||
| /** | |||
| * @brief Set the gradient split strategy with in the group, according to gradient index. | |||
| @@ -258,7 +199,7 @@ HcclResult hcom_get_split_strategy(const char *group, const struct model_feature | |||
| * @param IdxList A list identifying the index of end gradient in each segment. | |||
| * @return HcclResult | |||
| */ | |||
| extern HcclResult hcom_set_split_strategy_by_index(const char *group, u32 segmentNum, const u32 *IdxList); | |||
| extern HcclResult HcomSetGradFusionByIndex(const char *group, u32 segmentNum, const u32 *IdxList); | |||
| /** | |||
| * @brief Set the gradient split strategy with in the group, according to gradient data size. | |||
| @@ -270,6 +211,16 @@ extern HcclResult hcom_set_split_strategy_by_index(const char *group, u32 segmen | |||
| */ | |||
| extern HcclResult hcom_set_split_strategy_by_size(const char *group, u32 segmentNum, const float *sizeList); | |||
| /** | |||
| * @brief Set the gradient split strategy with in the group, according to gradient data size. | |||
| * | |||
| * @param group A string identifying the group name. | |||
| * @param segmentNum An integer(u32) identifying the segments number of gradients. | |||
| * @param sizeList A list identifying the percent of each segment. | |||
| * @return HcclResult | |||
| */ | |||
| extern HcclResult HcomSetGradFusionBySize(const char *group, u32 segmentNum, const float *sizeList); | |||
| /** | |||
| * @brief Register memories and init resources for remote access. | |||
| * | |||
| @@ -279,6 +230,25 @@ extern HcclResult hcom_set_split_strategy_by_size(const char *group, u32 segment | |||
| */ | |||
| extern HcclResult hcom_remote_access_mem_register(const MemRegisterAddr* addrList, u32 count); | |||
| /** | |||
| * @brief Register memories and init resources for remote access. | |||
| * | |||
| * @param addrList memory addresses for remote access. | |||
| * @param count number of remote memory addresses. | |||
| * @return HcclResult | |||
| */ | |||
| extern HcclResult HcomRegRemoteAccessMem(const MemRegisterAddr* addrList, u32 count); | |||
| HcclResult HcomExecInitialize(); | |||
| HcclResult HcomExecFinalize(); | |||
| HcclResult HcomExecEnqueueOperation(HcomOperation opInfo, std::function<void(HcclResult status)> callback); | |||
| HcclResult HcomExecEnqueueRemoteAccess(const std::string& remoteAccessType, | |||
| const std::vector<HcomRemoteAccessAddrInfo>& addrInfos, | |||
| std::function<void(HcclResult status)> callback); | |||
| #ifdef __cplusplus | |||
| } | |||
| @@ -215,6 +215,10 @@ typedef struct { | |||
| #define S_IWRITE S_IWUSR | |||
| #endif | |||
| #define mm_no_argument no_argument | |||
| #define mm_required_argument required_argument | |||
| #define mm_optional_argument optional_argument | |||
| #define M_FILE_RDONLY O_RDONLY | |||
| #define M_FILE_WRONLY O_WRONLY | |||
| #define M_FILE_RDWR O_RDWR | |||
| @@ -227,6 +231,7 @@ typedef struct { | |||
| #define M_BINARY O_RDONLY | |||
| #define M_TRUNC O_TRUNC | |||
| #define M_IRWXU S_IRWXU | |||
| #define M_APPEND O_APPEND | |||
| #define M_IN_CREATE IN_CREATE | |||
| #define M_IN_CLOSE_WRITE IN_CLOSE_WRITE | |||
| @@ -342,17 +347,17 @@ MMPA_FUNC_VISIBILITY INT32 mmCloseSocket(mmSockHandle sockFd); | |||
| MMPA_FUNC_VISIBILITY mmSsize_t mmSocketSend(mmSockHandle sockFd, VOID *sendBuf, INT32 sendLen, INT32 sendFlag); | |||
| MMPA_FUNC_VISIBILITY mmSsize_t mmSocketRecv(mmSockHandle sockFd, VOID *recvBuf, INT32 recvLen, INT32 recvFlag); | |||
| MMPA_FUNC_VISIBILITY INT32 mmSocketSendTo(mmSockHandle sockFd, | |||
| VOID *sendMsg, | |||
| INT32 sendLen, | |||
| UINT32 sendFlag, | |||
| const mmSockAddr* addr, | |||
| INT32 tolen); | |||
| VOID *sendMsg, | |||
| INT32 sendLen, | |||
| UINT32 sendFlag, | |||
| const mmSockAddr* addr, | |||
| INT32 tolen); | |||
| MMPA_FUNC_VISIBILITY mmSsize_t mmSocketRecvFrom(mmSockHandle sockFd, | |||
| VOID *recvBuf, | |||
| mmSize recvLen, | |||
| UINT32 recvFlag, | |||
| mmSockAddr* addr, | |||
| mmSocklen_t *FromLen); | |||
| VOID *recvBuf, | |||
| mmSize recvLen, | |||
| UINT32 recvFlag, | |||
| mmSockAddr* addr, | |||
| mmSocklen_t *FromLen); | |||
| MMPA_FUNC_VISIBILITY INT32 mmSAStartup(); | |||
| MMPA_FUNC_VISIBILITY INT32 mmSACleanup(); | |||
| MMPA_FUNC_VISIBILITY VOID *mmDlopen(const CHAR *fileName, INT32 mode); | |||
| @@ -360,7 +365,10 @@ MMPA_FUNC_VISIBILITY INT32 mmDladdr(VOID *addr, mmDlInfo *info); | |||
| MMPA_FUNC_VISIBILITY VOID *mmDlsym(VOID *handle, const CHAR *funcName); | |||
| MMPA_FUNC_VISIBILITY INT32 mmDlclose(VOID *handle); | |||
| MMPA_FUNC_VISIBILITY CHAR *mmDlerror(); | |||
| MMPA_FUNC_VISIBILITY INT32 mmCreateAndSetTimer(mmTimer *timerHandle, mmUserBlock_t *timerBlock, UINT milliSecond, UINT period); | |||
| MMPA_FUNC_VISIBILITY INT32 mmCreateAndSetTimer(mmTimer *timerHandle, | |||
| mmUserBlock_t *timerBlock, | |||
| UINT milliSecond, | |||
| UINT period); | |||
| MMPA_FUNC_VISIBILITY INT32 mmDeleteTimer(mmTimer timerHandle); | |||
| MMPA_FUNC_VISIBILITY INT32 mmStatGet(const CHAR *path, mmStat_t *buffer); | |||
| MMPA_FUNC_VISIBILITY INT32 mmStat64Get(const CHAR *path, mmStat64_t *buffer); | |||
| @@ -408,8 +416,12 @@ MMPA_FUNC_VISIBILITY VOID mmClosePipe(mmPipeHandle pipe[], UINT32 pipeCount); | |||
| // Poll related interface | |||
| MMPA_FUNC_VISIBILITY mmCompletionHandle mmCreateCompletionPort(); | |||
| MMPA_FUNC_VISIBILITY VOID mmCloseCompletionPort(mmCompletionHandle handle); | |||
| MMPA_FUNC_VISIBILITY INT32 mmPoll(mmPollfd *fds, INT32 fdCount, INT32 timeout, mmCompletionHandle handleIOCP, | |||
| pmmPollData polledData, mmPollBack pollBack); | |||
| MMPA_FUNC_VISIBILITY INT32 mmPoll(mmPollfd *fds, | |||
| INT32 fdCount, | |||
| INT32 timeout, | |||
| mmCompletionHandle handleIOCP, | |||
| pmmPollData polledData, | |||
| mmPollBack pollBack); | |||
| MMPA_FUNC_VISIBILITY INT32 mmGetErrorCode(); | |||
| MMPA_FUNC_VISIBILITY CHAR *mmGetErrorFormatMessage(mmErrorMsg errnum, CHAR *buf, mmSize size); | |||
| MMPA_FUNC_VISIBILITY INT32 mmGetTimeOfDay(mmTimeval *timeVal, mmTimezone *timeZone); | |||
| @@ -454,8 +466,11 @@ MMPA_FUNC_VISIBILITY VOID mmSetOpOpt(INT32 mmOptOpt); | |||
| MMPA_FUNC_VISIBILITY CHAR *mmGetOptArg(); | |||
| MMPA_FUNC_VISIBILITY VOID mmSetOptArg(CHAR *mmOptArg); | |||
| MMPA_FUNC_VISIBILITY INT32 mmGetOpt(INT32 argc, char *const *argv, const char *opts); | |||
| MMPA_FUNC_VISIBILITY INT32 mmGetOptLong(INT32 argc, char *const *argv, const char *opts, const mmStructOption *longOpts, | |||
| INT32 *longIndex); | |||
| MMPA_FUNC_VISIBILITY INT32 mmGetOptLong(INT32 argc, | |||
| char *const *argv, | |||
| const char *opts, | |||
| const mmStructOption *longOpts, | |||
| INT32 *longIndex); | |||
| MMPA_FUNC_VISIBILITY LONG mmLseek(INT32 fd, INT64 offset, INT32 seekFlag); | |||
| MMPA_FUNC_VISIBILITY INT32 mmFtruncate(mmProcess fd, UINT32 length); | |||
| @@ -521,11 +536,14 @@ MMPA_FUNC_VISIBILITY INT32 mmGetMac(mmMacInfo **list, INT32 *count); | |||
| MMPA_FUNC_VISIBILITY INT32 mmGetMacFree(mmMacInfo *list, INT32 count); | |||
| MMPA_FUNC_VISIBILITY INT32 mmGetCpuInfo(mmCpuDesc **cpuInfo, INT32 *count); | |||
| MMPA_FUNC_VISIBILITY INT32 mmCpuInfoFree(mmCpuDesc *cpuInfo, INT32 count); | |||
| MMPA_FUNC_VISIBILITY INT32 mmCreateProcess(const CHAR *fileName, const mmArgvEnv *env, const char *stdoutRedirectFile, | |||
| mmProcess *id); | |||
| MMPA_FUNC_VISIBILITY INT32 mmCreateTaskWithThreadAttr(mmThread *threadHandle, const mmUserBlock_t *funcBlock, | |||
| const mmThreadAttr *threadAttr); | |||
| MMPA_FUNC_VISIBILITY INT32 mmCreateProcess(const CHAR *fileName, | |||
| const mmArgvEnv *env, | |||
| const char *stdoutRedirectFile, | |||
| mmProcess *id); | |||
| MMPA_FUNC_VISIBILITY INT32 mmCreateTaskWithThreadAttr(mmThread *threadHandle, | |||
| const mmUserBlock_t *funcBlock, | |||
| const mmThreadAttr *threadAttr); | |||
| MMPA_FUNC_VISIBILITY mmFileHandle mmShmOpen(const CHAR *name, INT32 oflag, mmMode_t mode); | |||
| MMPA_FUNC_VISIBILITY INT32 mmShmUnlink(const CHAR *name); | |||
| MMPA_FUNC_VISIBILITY VOID *mmMmap(mmFd_t fd, mmSize_t size, mmOfft_t offset, mmFd_t *extra, INT32 prot, INT32 flags); | |||
| @@ -237,6 +237,11 @@ typedef struct { | |||
| } mmThreadAttr; | |||
| typedef VOID (*mmPf)(VOID); | |||
| #define mm_no_argument 0 | |||
| #define mm_required_argument 1 | |||
| #define mm_optional_argument 2 | |||
| #define M_FILE_RDONLY GENERIC_READ | |||
| #define M_FILE_WRONLY GENERIC_WRITE | |||
| #define M_FILE_RDWR (GENERIC_READ | GENERIC_WRITE) | |||
| @@ -249,6 +254,7 @@ typedef VOID (*mmPf)(VOID); | |||
| #define M_CREAT _O_CREAT | |||
| #define M_BINARY _O_BINARY | |||
| #define M_TRUNC _O_TRUNC | |||
| #define M_APPEND _O_APPEND | |||
| #define M_IREAD _S_IREAD | |||
| #define M_IRUSR _S_IREAD | |||
| @@ -18,6 +18,7 @@ | |||
| #define __CCE_RUNTIME_BASE_H__ | |||
| #include <stdint.h> | |||
| #include "toolchain/prof_callback.h" | |||
| #if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) | |||
| extern "C" { | |||
| @@ -32,309 +33,8 @@ extern "C" { | |||
| #endif | |||
| #endif | |||
| /** | |||
| * @ingroup dvrt_base | |||
| * @brief runtime error numbers. | |||
| */ | |||
| typedef enum tagRtError { | |||
| RT_ERROR_NONE = 0x0, // success | |||
| RT_ERROR_DEVICE_BASE = 0x07010000, | |||
| RT_ERROR_DEVICE_NULL, | |||
| RT_ERROR_DEVICE_NEW, | |||
| RT_ERROR_DEVICE_ID, | |||
| RT_ERROR_DEVICE_CHIPTYPE, | |||
| RT_ERROR_DEVICE_DEPLOY, | |||
| RT_ERROR_DEVICE_RETAIN, | |||
| RT_ERROR_DEVICE_PLATFORM, | |||
| RT_ERROR_DEVICE_LOADER, | |||
| RT_ERROR_DEVICE_LIMIT, | |||
| RT_ERROR_DEVICE_PROC_HANG_OUT, | |||
| RT_ERROR_DEVICE_POWER_UP_FAIL, | |||
| RT_ERROR_DEVICE_POWER_DOWN_FAIL, | |||
| RT_ERROR_DEVICE_INVALID, | |||
| RT_ERROR_DRV_BASE = 0x07020000, | |||
| RT_ERROR_DRV_NULL, | |||
| RT_ERROR_DRV_NEW, | |||
| RT_ERROR_DRV_MEMORY, | |||
| RT_ERROR_DRV_INPUT, | |||
| RT_ERROR_DRV_PTRNULL, | |||
| RT_ERROR_DRV_OPEN_AICPU, | |||
| RT_ERROR_DRV_CLOSE_AICPU, | |||
| RT_ERROR_DRV_SYM_AICPU, | |||
| RT_ERROR_DRV_OPEN_TSD, | |||
| RT_ERROR_DRV_CLOSE_TSD, | |||
| RT_ERROR_DRV_SYM_TSD, | |||
| RT_ERROR_DRV_SOURCE, | |||
| RT_ERROR_DRV_REPORT, | |||
| RT_ERROR_DRV_COMMAND, | |||
| RT_ERROR_DRV_OCCUPY, | |||
| RT_ERROR_DRV_ERR, | |||
| RT_ERROR_STREAM_BASE = 0x07030000, | |||
| RT_ERROR_STREAM_NULL, | |||
| RT_ERROR_STREAM_NEW, | |||
| RT_ERROR_STREAM_CONTEXT, | |||
| RT_ERROR_STREAM_INVALID, | |||
| RT_ERROR_STREAM_MODEL, | |||
| RT_ERROR_STREAM_FUSION, | |||
| RT_ERROR_STREAM_FULL, | |||
| RT_ERROR_STREAM_EMPTY, | |||
| RT_ERROR_STREAM_NOT_COMPLETE, | |||
| RT_ERROR_STREAM_SYNC, | |||
| RT_ERROR_STREAM_NO_CB_REG, | |||
| RT_ERROR_STREAM_DUPLICATE, | |||
| RT_ERROR_STREAM_NOT_EXIST, | |||
| RT_ERROR_SQ_NO_EXIST_SQ_TO_REUSE, | |||
| RT_ERROR_SQID_FULL, | |||
| RT_ERROR_MODEL_BASE = 0x07040000, | |||
| RT_ERROR_MODEL_NULL, | |||
| RT_ERROR_MODEL_NEW, | |||
| RT_ERROR_MODEL_CONTEXT, | |||
| RT_ERROR_MODEL_ENDGRAPH, | |||
| RT_ERROR_MODEL_STREAM, | |||
| RT_ERROR_MODEL_EXCUTOR, | |||
| RT_ERROR_MODEL_SETUP, | |||
| RT_ERROR_MODEL_ID, | |||
| RT_ERROR_MODEL_EXE_FAILED, | |||
| RT_ERROR_END_OF_SEQUENCE, // end of sequence | |||
| RT_ERROR_MODEL_EXIT, | |||
| RT_ERROR_MODEL_EXIT_STREAM_UNBIND, | |||
| RT_ERROR_MODEL_EXIT_ID, | |||
| RT_ERROR_MODEL_ABORT_NORMAL, | |||
| RT_ERROR_EVENT_BASE = 0x07050000, | |||
| RT_ERROR_EVENT_NULL, | |||
| RT_ERROR_EVENT_NEW, | |||
| RT_ERROR_EVENT_RECORDER_NULL, | |||
| RT_ERROR_EVENT_TIMESTAMP_INVALID, | |||
| RT_ERROR_EVENT_TIMESTAMP_REVERSAL, | |||
| RT_ERROR_EVENT_NOT_COMPLETE, | |||
| RT_ERROR_NOTIFY_BASE = 0x07060000, | |||
| RT_ERROR_NOTIFY_NULL, | |||
| RT_ERROR_NOTIFY_NEW, | |||
| RT_ERROR_NOTIFY_TYPE, | |||
| RT_ERROR_NOTIFY_NOT_COMPLETE, | |||
| RT_ERROR_CONTEXT_BASE = 0x07070000, | |||
| RT_ERROR_CONTEXT_NULL, | |||
| RT_ERROR_CONTEXT_NEW, | |||
| RT_ERROR_CONTEXT_DEL, | |||
| RT_ERROR_CONTEXT_DEFAULT_STREAM_NULL, | |||
| RT_ERROR_CONTEXT_ONLINE_STREAM_NULL, | |||
| RT_ERROR_KERNEL_BASE = 0x07080000, | |||
| RT_ERROR_KERNEL_NULL, | |||
| RT_ERROR_KERNEL_NEW, | |||
| RT_ERROR_KERNEL_LOOKUP, | |||
| RT_ERROR_KERNEL_NAME, | |||
| RT_ERROR_KERNEL_TYPE, | |||
| RT_ERROR_KERNEL_OFFSET, | |||
| RT_ERROR_KERNEL_DUPLICATE, | |||
| RT_ERROR_KERNEL_UNREGISTERING, | |||
| RT_ERROR_PROGRAM_BASE = 0x07090000, | |||
| RT_ERROR_PROGRAM_NULL, | |||
| RT_ERROR_PROGRAM_NEW, | |||
| RT_ERROR_PROGRAM_DATA, | |||
| RT_ERROR_PROGRAM_SIZE, | |||
| RT_ERROR_PROGRAM_MEM_TYPE, | |||
| RT_ERROR_PROGRAM_MACHINE_TYPE, | |||
| RT_ERROR_PROGRAM_USEOUT, | |||
| RT_ERROR_MODULE_BASE = 0x070a0000, | |||
| RT_ERROR_MODULE_NULL, | |||
| RT_ERROR_MODULE_NEW, | |||
| RT_ERROR_INSTANCE_BASE = 0x070b0000, | |||
| RT_ERROR_INSTANCE_NULL, | |||
| RT_ERROR_INSTANCE_NEW, | |||
| RT_ERROR_INSTANCE_VERSION, | |||
| RT_ERROR_API_BASE = 0x070c0000, | |||
| RT_ERROR_API_NULL, | |||
| RT_ERROR_API_NEW, | |||
| RT_ERROR_DATADUMP_BASE = 0x070d0000, | |||
| RT_ERROR_DATADUMP_NULL, | |||
| RT_ERROR_DATADUMP_NEW, | |||
| RT_ERROR_DATADUMP_TIME, | |||
| RT_ERROR_DATADUMP_FILE, | |||
| RT_ERROR_DATADUMP_ADDRESS, | |||
| RT_ERROR_DATADUMP_LOAD_FAILED, | |||
| RT_ERROR_DUMP_ADDR_SET_FAILED, | |||
| RT_ERROR_PROF_BASE = 0x070e0000, | |||
| RT_ERROR_PROF_NULL, | |||
| RT_ERROR_PROF_NEW, | |||
| RT_ERROR_PROF_START, | |||
| RT_ERROR_PROF_DEVICE_MEM, | |||
| RT_ERROR_PROF_HOST_MEM, | |||
| RT_ERROR_PROF_SET_DIR, | |||
| RT_ERROR_PROF_OPER, | |||
| RT_ERROR_PROF_FULL, | |||
| RT_ERROR_PROF_NAME, | |||
| RT_ERROR_PCTRACE_BASE = 0x070f0000, | |||
| RT_ERROR_PCTRACE_NULL, | |||
| RT_ERROR_PCTRACE_NEW, | |||
| RT_ERROR_PCTRACE_TIME, | |||
| RT_ERROR_PCTRACE_FILE, | |||
| RT_ERROR_TASK_BASE = 0x07100000, | |||
| RT_ERROR_TASK_NULL, | |||
| RT_ERROR_TASK_NEW, | |||
| RT_ERROR_TASK_TYPE, | |||
| RT_ERROR_TASK_ALLOCATOR, | |||
| RT_ERROR_COMMON_BASE = 0x07110000, | |||
| RT_ERROR_INVALID_VALUE, // RT_ERROR_INPUT_INVALID | |||
| RT_ERROR_MEMORY_ADDRESS_UNALIGNED, | |||
| RT_ERROR_SEC_HANDLE, | |||
| RT_ERROR_OS_HANDLE, | |||
| RT_ERROR_MUTEX_LOCK, | |||
| RT_ERROR_MUTEX_UNLOCK, | |||
| RT_ERROR_CALLOC, | |||
| RT_ERROR_POOL_RESOURCE, | |||
| RT_ERROR_TRANS_ARGS, | |||
| RT_ERROR_METADATA, | |||
| RT_ERROR_LOST_HEARTBEAT, | |||
| RT_ERROR_REPORT_TIMEOUT, | |||
| RT_ERROR_FEATURE_NOT_SUPPROT, | |||
| RT_ERROR_MEMORY_ALLOCATION, | |||
| RT_ERROR_MEMORY_FREE, | |||
| RT_ERROR_INVALID_MEMORY_TYPE, | |||
| RT_ERROR_DEBUG_BASE = 0x07120000, | |||
| RT_ERROR_DEBUG_NULL, | |||
| RT_ERROR_DEBUG_NEW, | |||
| RT_ERROR_DEBUG_SIGNAL, | |||
| RT_ERROR_DEBUG_OPEN, | |||
| RT_ERROR_DEBUG_WRITE, | |||
| RT_ERROR_DEBUG_REGISTER_FAILED, | |||
| RT_ERROR_DEBUG_UNREGISTER_FAILED, | |||
| RT_ERROR_ENGINE_BASE = 0x07130000, | |||
| RT_ERROR_ENGINE_NULL, | |||
| RT_ERROR_ENGINE_NEW, | |||
| RT_ERROR_ENGINE_THREAD, | |||
| RT_ERROR_LABEL_BASE = 0x07140000, | |||
| RT_ERROR_LABEL_NULL, | |||
| RT_ERROR_LABEL_NEW, | |||
| RT_ERROR_LABEL_CONTEXT, | |||
| RT_ERROR_LABEL_STREAM, | |||
| RT_ERROR_LABEL_MODEL, | |||
| RT_ERROR_LABEL_ALLOCATOR, | |||
| RT_ERROR_LABEL_FREE, | |||
| RT_ERROR_LABEL_SET, | |||
| RT_ERROR_LABEL_ID, | |||
| RT_ERROR_TSFW_BASE = 0x07150000, | |||
| RT_ERROR_TSFW_UNKNOWN, | |||
| RT_ERROR_TSFW_NULL_PTR, | |||
| RT_ERROR_TSFW_ILLEGAL_AI_CORE_ID, | |||
| RT_ERROR_TSFW_ILLEGAL_PARAM, | |||
| RT_ERROR_TSFW_TASK_CMD_QUEUE_FULL, | |||
| RT_ERROR_TSFW_TASK_CMD_QUEUE_EMPTY, | |||
| RT_ERROR_TSFW_TASK_REPORT_QUEUE_FULL, | |||
| RT_ERROR_TSFW_TASK_REPORT_QUEUE_EMPTY, | |||
| RT_ERROR_TSFW_TASK_NODE_BUFF_ALL_OCCUPYED, | |||
| RT_ERROR_TSFW_TASK_NODE_BUFF_ALL_FREED, | |||
| RT_ERROR_TSFW_L2_MEM_INSUFFICIENT_SPACE, | |||
| RT_ERROR_TSFW_L2_MALLOC_FAILED, | |||
| RT_ERROR_TSFW_DMA_CHANNEL_ALL_OCCUPYED, | |||
| RT_ERROR_TSFW_MEMCPY_OP_FAILED, | |||
| RT_ERROR_TSFW_BS_SLOT_ALL_OCCUPYED, | |||
| RT_ERROR_TSFW_TBS_SLOT_REPEAT_FREE, | |||
| RT_ERROR_TSFW_PRIORITY_TASK_LIST_FULL, | |||
| RT_ERROR_TSFW_PRIORITY_TASK_LIST_EMPTY, | |||
| RT_ERROR_TSFW_NO_STREAM_LIST_NEED_TO_BE_PROCESSED, | |||
| RT_ERROR_TSFW_REPEAT_MARK_STREAM_NEED_SERVICE, | |||
| RT_ERROR_TSFW_SYS_DMA_CHANNEL_ALL_OCCUPAPYED, | |||
| RT_ERROR_TSFW_NO_HBML2TASKNODE_FOUND, | |||
| RT_ERROR_TSFW_SQNODE_NODE_SLOT_ALL_OCCUPAPYED, | |||
| RT_ERROR_TSFW_CQNODE_NODE_SLOT_ALL_OCCUPAPYED, | |||
| RT_ERROR_TSFW_SQNODE_NOT_ENOUGH, | |||
| RT_ERROR_TSFW_SQNODE_SLOT_REPEAT_FREE, | |||
| RT_ERROR_TSFW_CQNODE_SLOT_REPEAT_FREE, | |||
| RT_ERROR_TSFW_CQ_REPORT_FAILED, | |||
| RT_ERROR_TSFW_SYS_DMA_RESET_SUCCESS, | |||
| RT_ERROR_TSFW_SYS_DMA_RESET_FAILED, | |||
| RT_ERROR_TSFW_SYS_DMA_TRNSFER_FAILED, | |||
| RT_ERROR_TSFW_SYS_DMA_MEMADDRALIGN_FAILED, | |||
| RT_ERROR_TSFW_SYS_DMA_ERROR_QUEUE_FULL, | |||
| RT_ERROR_TSFW_SYS_DMA_ERROR_QUEUE_EMPTY, | |||
| RT_ERROR_TSFW_TIMER_EVENT_FULL, | |||
| RT_ERROR_TSFW_TASK_L2_DESC_ENTRY_NOT_ENOUGH, | |||
| RT_ERROR_TSFW_AICORE_TIMEOUT, | |||
| RT_ERROR_TSFW_AICORE_EXCEPTION, | |||
| RT_ERROR_TSFW_AICORE_TRAP_EXCEPTION, | |||
| RT_ERROR_TSFW_AICPU_TIMEOUT, | |||
| RT_ERROR_TSFW_SDMA_L2_TO_DDR_MALLOC_FAIL, | |||
| RT_ERROR_TSFW_AICPU_EXCEPTION, | |||
| RT_ERROR_TSFW_AICPU_DATADUMP_RSP_ERR, | |||
| RT_ERROR_TSFW_AICPU_MODEL_RSP_ERR, | |||
| RT_ERROR_TSFW_REPEAT_ACTIVE_MODEL_STREAM, | |||
| RT_ERROR_TSFW_REPEAT_NOTIFY_WAIT, | |||
| RT_ERROR_TSFW_DEBUG_INVALID_SQCQ, | |||
| RT_ERROR_TSFW_DEBUG_WRONG_COMMAND_TYPE, | |||
| RT_ERROR_TSFW_DEBUG_CMD_PROCESS, | |||
| RT_ERROR_TSFW_DEBUG_INVALID_DEVICE_STATUS, | |||
| RT_ERROR_TSFW_DEBUG_NOT_IN_DEBUG_STATUS, | |||
| RT_ERROR_TSFW_DEBUG_INVALID_TASK_STATUS, | |||
| RT_ERROR_TSFW_DEBUG_TASK_EMPTY, | |||
| RT_ERROR_TSFW_DEBUG_TASK_FULL, | |||
| RT_ERROR_TSFW_DEBUG_TASK_NOT_EXIST, | |||
| RT_ERROR_TSFW_DEBUG_AI_CORE_FULL, | |||
| RT_ERROR_TSFW_DEBUG_AI_CORE_NOT_EXIST, | |||
| RT_ERROR_TSFW_DEBUG_AI_CORE_EXCEPTION, | |||
| RT_ERROR_TSFW_DEBUG_AI_CORE_TIMEOUT, | |||
| RT_ERROR_TSFW_DEBUG_BREAKPOINT_FULL, | |||
| RT_ERROR_TSFW_DEBUG_READ_ERROR, | |||
| RT_ERROR_TSFW_DEBUG_WRITE_FAIL, | |||
| RT_ERROR_TSFW_QUEUE_FULL, | |||
| RT_ERROR_TSFW_QUEUE_EMPTY, | |||
| RT_ERROR_TSFW_QUEUE_ALLOC_MEM_FAIL, | |||
| RT_ERROR_TSFW_QUEUE_DATA_SIZE_UNMATCH, | |||
| RT_ERROR_TSFW_PCIE_DMA_INVLD_CPY_TYPE, | |||
| RT_ERROR_TSFW_INVLD_CPY_DIR, | |||
| RT_ERROR_TSFW_PCIE_DMA_INVLD_CQ_DES, | |||
| RT_ERROR_TSFW_PCIE_DMA_CPY_ERR, | |||
| RT_ERROR_TSFW_PCIE_DMA_LNK_CHN_BUSY, | |||
| RT_ERROR_TSFW_PROFILE_BUFF_FULL, | |||
| RT_ERROR_TSFW_PROFILE_MODE_CONFLICT, | |||
| RT_ERROR_TSFW_PROFILE_OTHER_PID_ON, | |||
| RT_ERROR_TSFW_SCHD_AIC_TASK_PRELOAD_FAILED, | |||
| RT_ERROR_TSFW_TSCPU_CLOSE_FAILED, | |||
| RT_ERROR_TSFW_EXPECT_FAIL, | |||
| RT_ERROR_TSFW_REPEAT_MODEL_STREAM, | |||
| RT_ERROR_TSFW_STREAM_MODEL_UNBIND, | |||
| RT_ERROR_TSFW_MODEL_EXE_FAILED, | |||
| RT_ERROR_TSFW_IPC_SEND_FAILED, | |||
| RT_ERROR_TSFW_IPC_PROC_REG_FAILED, | |||
| RT_ERROR_TSFW_STREAM_FULL, | |||
| RT_ERROR_TSFW_END_OF_SEQUENCE, | |||
| RT_ERROR_TSFW_SWITCH_STREAM_LABEL, | |||
| RT_ERROR_TSFW_TRANS_SQE_FAIL, | |||
| RT_ERROR_TSFW_RESERVED, | |||
| RT_ERROR_SUBSCRIBE_BASE = 0x07160000, | |||
| RT_ERROR_SUBSCRIBE_NULL, | |||
| RT_ERROR_SUBSCRIBE_NEW, | |||
| RT_ERROR_SUBSCRIBE_STREAM, | |||
| RT_ERROR_SUBSCRIBE_THREAD, | |||
| RT_ERROR_SUBSCRIBE_GROUP, | |||
| RT_ERROR_GROUP_BASE = 0x07170000, | |||
| RT_ERROR_GROUP_NOT_SET, | |||
| RT_ERROR_GROUP_NOT_CREATE, | |||
| RT_ERROR_RESERVED = 0x07ff0000, | |||
| }rtError_t; | |||
| typedef int32_t rtError_t; | |||
| static const int32_t RT_ERROR_NONE = 0; // success | |||
| /** | |||
| * @ingroup dvrt_base | |||
| @@ -387,10 +87,20 @@ typedef struct rtExceptionInfo { | |||
| uint32_t deviceid; | |||
| } rtExceptionInfo; | |||
| typedef struct rtTaskFailInfo { | |||
| uint32_t taskid; | |||
| uint32_t streamid; | |||
| uint32_t tid; | |||
| uint32_t deviceid; | |||
| uint32_t retcode; | |||
| } rtTaskFailInfo; | |||
| typedef void (*rtErrorCallback)(rtExceptionType); | |||
| typedef void (*rtTaskFailCallback)(rtExceptionInfo *exceptionInfo); | |||
| typedef void (*rtTaskFailCallbackByModule)(rtTaskFailInfo *exceptionInfo); | |||
| typedef void (*rtDeviceStateCallback)(uint32_t devId, bool isOpen); | |||
| /** | |||
| @@ -447,6 +157,12 @@ RTS_API rtError_t rtProfilerStop(uint64_t profConfig, int32_t numsDev, uint32_t* | |||
| */ | |||
| RTS_API rtError_t rtProfilerTrace(uint64_t id, bool notify, uint32_t flags, rtStream_t stream); | |||
| /** | |||
| * @ingroup profiling_base | |||
| * @brief ts set profiling reporter callback. | |||
| */ | |||
| RTS_API rtError_t rtSetMsprofReporterCallback(MsprofReporterCallback callback); | |||
| /** | |||
| * @ingroup dvrt_base | |||
| * @brief Returns the last error from a runtime call. | |||
| @@ -485,6 +201,16 @@ RTS_API rtError_t rtSetTaskFailCallback(rtTaskFailCallback callback); | |||
| */ | |||
| RTS_API rtError_t rtRegDeviceStateCallback(const char *regName, rtDeviceStateCallback callback); | |||
| /** | |||
| * @ingroup dvrt_base | |||
| * @brief register callback for fail task | |||
| * @param [in] uniName unique register name, can't be null | |||
| * @param [in] callback fail task callback function | |||
| * @param [out] NA | |||
| * @return RT_ERROR_NONE for ok | |||
| */ | |||
| RTS_API rtError_t rtRegTaskFailCallbackByModule(const char *moduleName, rtTaskFailCallbackByModule callback); | |||
| /** | |||
| * @ingroup dvrt_base | |||
| * @brief notify handle. | |||
| @@ -121,14 +121,6 @@ typedef struct tagRtMemoryConfig { | |||
| typedef struct tagRtPlatformConfig { uint32_t platformConfig; } rtPlatformConfig_t; | |||
| /** | |||
| * @ingroup | |||
| * @brief get platform | |||
| * @param [in] platForm | |||
| * @return platForm | |||
| */ | |||
| RTS_API rtError_t rtGetPlatformConfig(rtPlatformConfig_t *platForm); | |||
| /** | |||
| * @ingroup | |||
| * @brief get AI core count | |||
| @@ -169,13 +161,6 @@ RTS_API rtError_t rtGetAiCoreMemoryRates(rtAiCoreMemoryRates_t *aiCoreMemoryRate | |||
| */ | |||
| RTS_API rtError_t rtGetMemoryConfig(rtMemoryConfig_t *memoryConfig); | |||
| /** | |||
| * @ingroup | |||
| * @brief set platform in gen ctx | |||
| * @param [in] platForm | |||
| * @return RT_ERROR_NONE for ok, errno for failed | |||
| */ | |||
| RTS_API rtError_t rtSetPlatformType(rtPlatformType_t platformType); | |||
| /** | |||
| * @ingroup | |||
| @@ -185,6 +170,14 @@ RTS_API rtError_t rtSetPlatformType(rtPlatformType_t platformType); | |||
| */ | |||
| RTS_API rtError_t rtMemGetL2Info(rtStream_t stream, void **ptr, uint32_t *size); | |||
| /** | |||
| * @ingroup | |||
| * @brief get runtime version. The version is returned as (1000 major + 10 minor). For example, RUNTIME 9.2 would be represented by 9020. | |||
| * @param [out] runtimeVersion | |||
| * @return RT_ERROR_NONE for ok | |||
| * @return RT_ERROR_INVALID_VALUE for error input | |||
| */ | |||
| RTS_API rtError_t rtGetRuntimeVersion(uint32_t *runtimeVersion); | |||
| #if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) | |||
| } | |||
| #endif | |||
| @@ -330,12 +330,12 @@ RTS_API rtError_t rtGetPairDevicesInfo(uint32_t devId, uint32_t otherDevId, int3 | |||
| FEATURE_TYPE_MEMCPY = 0, | |||
| FEATURE_TYPE_RSV, | |||
| } rtFeatureType_t; | |||
| * @param [in] infoType info type | |||
| * @param [in] featureInfo info type | |||
| typedef enum tagMemcpyInfo { | |||
| MEMCPY_INFO_SUPPORT_ZEROCOPY = 0, | |||
| MEMCPY_INFO _RSV, | |||
| } rtMemcpyInfo_t; | |||
| * @param [out] value the capability info | |||
| * @param [out] value the capability info RT_CAPABILITY_SUPPORT or RT_CAPABILITY_NOT_SUPPORT | |||
| * @return RT_ERROR_NONE for ok | |||
| */ | |||
| RTS_API rtError_t rtGetRtCapability(rtFeatureType_t featureType, int32_t featureInfo, int64_t *value); | |||
| @@ -28,4 +28,4 @@ | |||
| #include "rt_model.h" | |||
| #include "stream.h" | |||
| #endif // __CCE_RUNTIME_RT_H__ | |||
| #endif // __CCE_RUNTIME_RT_H__ | |||
| @@ -34,9 +34,16 @@ using TDT_StatusT = uint32_t; | |||
| typedef uint32_t TDT_StatusT; | |||
| #endif | |||
| #define LINUX 0 | |||
| #define WINDOWS 1 | |||
| #ifndef TDT_LIB_EXPORT | |||
| #if(TARGET_SYSTEM_NAME == WINDOWS) | |||
| #define TDT_LIB_EXPORT __declspec(dllexport) | |||
| #else | |||
| #define TDT_LIB_EXPORT __attribute__((visibility("default"))) | |||
| #endif | |||
| #endif | |||
| /** | |||
| * @ingroup tdt status. | |||
| * | |||
| @@ -23,6 +23,7 @@ | |||
| #include <mutex> | |||
| #include "tdt/status.h" | |||
| #include "tdt/data_common.h" | |||
| #include "toolchain/prof_callback.h" | |||
| #ifdef __cplusplus | |||
| extern "C" { | |||
| @@ -37,7 +38,7 @@ extern "C" { | |||
| * Used for the Framework process to communicate with the TSDDaemon process, | |||
| * and notify TSD to complete the initialization of other processes | |||
| * | |||
| * @param phyDeviceId [IN] type #unsigned int. Physical device ID | |||
| * @param logicDeviceId [IN] type #unsigned int. Logic device ID | |||
| * @param rankSize [IN] type #unsigned int. The rankSize of the training. | |||
| * The default value is 1. When rankSize is greater than 1, | |||
| * HCCP will be pulled to perform set communication related operations. | |||
| @@ -49,7 +50,7 @@ extern "C" { | |||
| * @li tsd_client.h: Header file where the interface declaration is located. | |||
| * @li data_common.h: Header file where 'TDT_StatusT' defined | |||
| */ | |||
| TDT_LIB_EXPORT TDT_StatusT TsdOpen(const uint32_t phyDeviceId, const uint32_t rankSize); | |||
| TDT_LIB_EXPORT TDT_StatusT TsdOpen(const uint32_t logicDeviceId, const uint32_t rankSize); | |||
| /** | |||
| * @ingroup Close | |||
| @@ -67,7 +68,7 @@ TDT_LIB_EXPORT TDT_StatusT TsdOpen(const uint32_t phyDeviceId, const uint32_t ra | |||
| * @li tsd_client.h: Header file where the interface declaration is located. | |||
| * @li data_common.h: Header file where 'TDT_StatusT' defined | |||
| */ | |||
| TDT_LIB_EXPORT TDT_StatusT TsdClose(const uint32_t phyDeviceId); | |||
| TDT_LIB_EXPORT TDT_StatusT TsdClose(const uint32_t logicDeviceId); | |||
| /** | |||
| * @ingroup UpdateProfilingMode | |||
| @@ -85,7 +86,26 @@ TDT_LIB_EXPORT TDT_StatusT TsdClose(const uint32_t phyDeviceId); | |||
| * @li tsd_client.h: Header file where the interface declaration is located. | |||
| * @li data_common.h: Header file where 'TDT_StatusT' defined | |||
| */ | |||
| TDT_LIB_EXPORT TDT_StatusT UpdateProfilingMode(const uint32_t phyDeviceId, const uint32_t flag); | |||
| TDT_LIB_EXPORT TDT_StatusT UpdateProfilingMode(const uint32_t logicDeviceId, const uint32_t flag); | |||
| /** | |||
| * @ingroup TsdSetMsprofReporterCallback | |||
| * @brief 用于推理场景下设置aicpu的profilng的callback函数 | |||
| * | |||
| * @par Function | |||
| * 设置offline模式下aicpu_sd进程的profiling的callback函数 | |||
| * | |||
| * @param callback [IN] type #MsprofReporterCallback. 回调函数 | |||
| * @retval TDT_OK Success | |||
| * @retval OtherValues Failure | |||
| * | |||
| * @par Dependency | |||
| * @li libtsdclient.so: Library to which the interface belongs. | |||
| * @li tsd_client.h: Header file where the interface declaration is located. | |||
| * @li data_common.h: Header file where 'TDT_StatusT' defined | |||
| * @li prof_callback.h: Headerfile where 'MsprofReporterCallback' defined | |||
| */ | |||
| TDT_LIB_EXPORT TDT_StatusT TsdSetMsprofReporterCallback(MsprofReporterCallback callback); | |||
| /** | |||
| * @ingroup CreateCmdParameterObj | |||
| @@ -0,0 +1,135 @@ | |||
| /** | |||
| * Copyright 2020-2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| * | |||
| * @file prof_callback.h | |||
| * @brief declaraion of profiling callbacks | |||
| */ | |||
| #ifndef MSPROFILER_PROF_CALLBACK_H_ | |||
| #define MSPROFILER_PROF_CALLBACK_H_ | |||
| #ifdef __cplusplus | |||
| extern "C" { | |||
| #endif // __cplusplus | |||
| #include "stddef.h" | |||
| #include "stdint.h" | |||
| /** | |||
| * @name MsprofErrorCode | |||
| * @brief error code | |||
| */ | |||
| enum MsprofErrorCode { | |||
| MSPROF_ERROR_NONE = 0, | |||
| MSPROF_ERROR_MEM_NOT_ENOUGH, | |||
| MSPROF_ERROR_GET_ENV, | |||
| MSPROF_ERROR_CONFIG_INVALID, | |||
| MSPROF_ERROR_ACL_JSON_OFF, | |||
| MSPROF_ERROR, | |||
| }; | |||
| #define MSPROF_ENGINE_MAX_TAG_LEN (31) | |||
| /** | |||
| * @name ReporterData | |||
| * @brief struct of data to report | |||
| */ | |||
| struct ReporterData { | |||
| char tag[MSPROF_ENGINE_MAX_TAG_LEN + 1]; // the sub-type of the module, data with different tag will be writen | |||
| int deviceId; // the index of device | |||
| size_t dataLen; // the length of send data | |||
| unsigned char *data; // the data content | |||
| }; | |||
| /** | |||
| * @name MsprofReporterModuleId | |||
| * @brief module id of data to report | |||
| */ | |||
| enum MsprofReporterModuleId { | |||
| MSPROF_MODULE_DATA_PREPROCESS = 0, // DATA_PREPROCESS | |||
| MSPROF_MODULE_HCCL, // HCCL | |||
| MSPROF_MODULE_ACL, // AclModule | |||
| MSPROF_MODULE_FRAMEWORK, // Framework | |||
| MSPROF_MODULE_RUNTIME // runtime | |||
| }; | |||
| /** | |||
| * @name MsprofReporterCallbackType | |||
| * @brief reporter callback request type | |||
| */ | |||
| enum MsprofReporterCallbackType { | |||
| MSPROF_REPORTER_REPORT = 0, // report data | |||
| MSPROF_REPORTER_INIT, // init reporter | |||
| MSPROF_REPORTER_UNINIT, // uninit reporter | |||
| }; | |||
| /** | |||
| * @name MsprofReporterCallback | |||
| * @brief callback to start reporter/stop reporter/report date | |||
| * @param moduleId [IN] enum MsprofReporterModuleId | |||
| * @param type [IN] enum MsprofReporterCallbackType | |||
| * @param data [IN] callback data (nullptr on INTI/UNINIT) | |||
| * @param len [IN] callback data size (0 on INIT/UNINIT) | |||
| * @return enum MsprofErrorCode | |||
| */ | |||
| typedef int32_t (*MsprofReporterCallback)(uint32_t moduleId, uint32_t type, void *data, uint32_t len); | |||
| #define MSPROF_OPTIONS_DEF_LEN_MAX (2048) | |||
| /** | |||
| * @name MsprofGeOptions | |||
| * @brief struct of MSPROF_CTRL_INIT_GE_OPTIONS | |||
| */ | |||
| struct MsprofGeOptions { | |||
| char jobId[MSPROF_OPTIONS_DEF_LEN_MAX]; | |||
| char options[MSPROF_OPTIONS_DEF_LEN_MAX]; | |||
| }; | |||
| /** | |||
| * @name MsprofCtrlCallbackType | |||
| * @brief ctrl callback request type | |||
| */ | |||
| enum MsprofCtrlCallbackType { | |||
| MSPROF_CTRL_INIT_ACL_ENV = 0, // start profiling with acl env | |||
| MSPROF_CTRL_INIT_ACL_JSON, // start profiling with acl.json | |||
| MSPROF_CTRL_INIT_GE_OPTIONS, // start profiling with ge env and options | |||
| MSPROF_CTRL_FINALIZE // stop profiling | |||
| }; | |||
| /** | |||
| * @name MsprofCtrlCallback | |||
| * @brief callback to start/stop profiling | |||
| * @param type [IN] enum MsprofCtrlCallbackType | |||
| * @param data [IN] callback data | |||
| * @param len [IN] callback data size | |||
| * @return enum MsprofErrorCode | |||
| */ | |||
| typedef int32_t (*MsprofCtrlCallback)(uint32_t type, void *data, uint32_t len); | |||
| /** | |||
| * @name MsprofSetDeviceCallback | |||
| * @brief callback to notify set/reset device | |||
| * @param devId [IN] device id | |||
| * @param isOpenDevice [IN] true: set device, false: reset device | |||
| */ | |||
| typedef void (*MsprofSetDeviceCallback)(uint32_t devId, bool isOpenDevice); | |||
| #ifdef __cplusplus | |||
| } | |||
| #endif | |||
| #endif // MSPROFILER_PROF_CALLBACK_H_ | |||
| @@ -16,7 +16,17 @@ | |||
| #ifndef MSPROF_ENGINE_PROF_REPORTER_H_ | |||
| #define MSPROF_ENGINE_PROF_REPORTER_H_ | |||
| #ifndef OS_TYPE | |||
| #define OS_TYPE 0 | |||
| #endif // OS_TYPE | |||
| #if (OS_TYPE != LINUX) | |||
| #define MSVP_PROF_API __declspec(dllexport) | |||
| #else | |||
| #define MSVP_PROF_API __attribute__((visibility("default"))) | |||
| #endif | |||
| #include "prof_callback.h" | |||
| /** | |||
| * @file prof_reporter.h | |||
| @@ -25,20 +35,6 @@ | |||
| */ | |||
| namespace Msprof { | |||
| namespace Engine { | |||
| /// the max tag length | |||
| #define MSPROF_ENGINE_MAX_TAG_LEN (31) | |||
| /** | |||
| * @ingroup reporter | |||
| * @brief struct ReporterData | |||
| * the sturct of the data send to libmsprof | |||
| */ | |||
| struct ReporterData { | |||
| char tag[MSPROF_ENGINE_MAX_TAG_LEN + 1]; ///< the sub-type of the module, data with different tag will be writen | |||
| int deviceId; ///< the physical id of device | |||
| size_t dataLen; ///< the length of send data | |||
| unsigned char *data; ///< the data content | |||
| }; | |||
| /** | |||
| * @ingroup reporter | |||
| * @brief class Reporter | |||
| @@ -86,4 +82,4 @@ class MSVP_PROF_API Reporter { | |||
| } // namespace Engine | |||
| } // namespace Msprof | |||
| #endif // MSPROF_ENGINE_PROF_REPORTER_H_ | |||
| #endif // MSPROF_ENGINE_PROF_REPORTER_H_ | |||
| @@ -18,7 +18,9 @@ | |||
| #define D_SYSLOG_H_ | |||
| #ifdef __cplusplus | |||
| #ifndef LOG_CPP | |||
| extern "C" { | |||
| #endif | |||
| #endif // __cplusplus | |||
| #ifndef LINUX | |||
| @@ -105,6 +107,7 @@ extern "C" { | |||
| #define SECURITY_LOG_MASK (0x00100000) | |||
| #define RUN_LOG_MASK (0x01000000) | |||
| #define OPERATION_LOG_MASK (0x10000000) | |||
| #define RESERVERD_LENGTH 52 | |||
| typedef struct tagDCODE { | |||
| const char *cName; | |||
| @@ -116,6 +119,18 @@ typedef struct tagKV { | |||
| char *value; | |||
| } KeyValue; | |||
| typedef enum { | |||
| APPLICATION = 0, | |||
| SYSTEM | |||
| } ProcessType; | |||
| typedef struct { | |||
| ProcessType type; | |||
| unsigned int pid; | |||
| unsigned int deviceId; | |||
| char reserved[RESERVERD_LENGTH]; | |||
| } LogAttr; | |||
| /** | |||
| * @ingroup slog | |||
| * | |||
| @@ -228,6 +243,14 @@ DLL_EXPORT int dlog_setlevel(int moduleId, int level, int enableEvent); | |||
| */ | |||
| DLL_EXPORT int CheckLogLevel(int moduleId, int logLevel); | |||
| /** | |||
| * @ingroup slog | |||
| * @brief DlogSetAttr: set log attr, default pid is 0, default device id is 0, default process type is APPLICATION | |||
| * @param [in]logAttr: attr info, include pid(must be larger than 0), process type and device id(chip ID) | |||
| * @return: 0: SUCCEED, others: FAILED | |||
| */ | |||
| DLL_EXPORT int DlogSetAttr(LogAttr logAttr); | |||
| /** | |||
| * @ingroup slog | |||
| * @brief dlog_error: print error log | |||
| @@ -367,6 +390,8 @@ void DlogInner(int moduleId, int level, const char *fmt, ...); | |||
| void DlogWithKVInner(int moduleId, int level, KeyValue *pstKVArray, int kvNum, const char *fmt, ...); | |||
| #ifdef __cplusplus | |||
| #ifndef LOG_CPP | |||
| } | |||
| #endif // LOG_CPP | |||
| #endif // __cplusplus | |||
| #endif // D_SYSLOG_H_ | |||