| @@ -16,8 +16,11 @@ endif() | |||||
| if(DEFINED ENV{D_PKG_SERVER}) | if(DEFINED ENV{D_PKG_SERVER}) | ||||
| set(GE_PB_PKG $ENV{D_PKG_SERVER}) | set(GE_PB_PKG $ENV{D_PKG_SERVER}) | ||||
| message("Download packages from PKG server") | |||||
| endif() | |||||
| message("Download packages from DPKG server") | |||||
| elseif(DEFINED ENV{MSLIBS_SERVER}) | |||||
| set(GE_PB_PKG "http://$ENV{MSLIBS_SERVER}:8081") | |||||
| message("Download packages from MSPKG server") | |||||
| endif () | |||||
| set(ASCEND_DRIVER_DIR ${ASCEND_DIR}/driver/lib64) | set(ASCEND_DRIVER_DIR ${ASCEND_DIR}/driver/lib64) | ||||
| set(ASCEND_DRIVER_COMMON_DIR ${ASCEND_DIR}/driver/lib64/common) | set(ASCEND_DRIVER_COMMON_DIR ${ASCEND_DIR}/driver/lib64/common) | ||||
| @@ -105,7 +108,7 @@ if (ENABLE_OPEN_SRC) | |||||
| find_module(error_manager liberror_manager.so ${ASCEND_ATC_DIR}) | find_module(error_manager liberror_manager.so ${ASCEND_ATC_DIR}) | ||||
| find_module(error_manager_static liberror_manager.a ${ASCEND_ACL_DIR}) | find_module(error_manager_static liberror_manager.a ${ASCEND_ACL_DIR}) | ||||
| find_module(msprofiler libmsprofiler.a ${ASCEND_ACL_DIR}) | find_module(msprofiler libmsprofiler.a ${ASCEND_ACL_DIR}) | ||||
| #find_module(ascendcl_static libascendcl.a ${ASCEND_ACL_DIR}) | |||||
| #find_module(ascendcl_static libascendcl.a ${ASCEND_ACL_DIR}) | |||||
| if(PRODUCT STREQUAL "flr3") | if(PRODUCT STREQUAL "flr3") | ||||
| elseif(PRODUCT STREQUAL "flr1") | elseif(PRODUCT STREQUAL "flr1") | ||||
| find_module(ascend_hal_stub libascend_hal.so ${ASCEND_DRIVER_DIR}/driver) | find_module(ascend_hal_stub libascend_hal.so ${ASCEND_DRIVER_DIR}/driver) | ||||
| @@ -115,7 +118,7 @@ if (ENABLE_OPEN_SRC) | |||||
| find_module(ascend_hal_stub libascend_hal.so ${ASCEND_DRIVER_DIR}) | find_module(ascend_hal_stub libascend_hal.so ${ASCEND_DRIVER_DIR}) | ||||
| endif() | endif() | ||||
| elseif(PLATFORM STREQUAL "all") | elseif(PLATFORM STREQUAL "all") | ||||
| find_module(msprofiler libmsprofiler.a ${ASCEND_DRIVER_COMMON_DIR}) | |||||
| find_module(msprofiler libmsprofiler.a ${ASCEND_ACL_DIR}) | |||||
| find_module(hccl libhccl.so ${ASCEND_RUNTIME_DIR}) | find_module(hccl libhccl.so ${ASCEND_RUNTIME_DIR}) | ||||
| find_module(adump_server libadump_server.a ${ASCEND_ACL_DIR}) | find_module(adump_server libadump_server.a ${ASCEND_ACL_DIR}) | ||||
| find_module(runtime libruntime.so ${ASCEND_ACL_DIR}) | find_module(runtime libruntime.so ${ASCEND_ACL_DIR}) | ||||
| @@ -123,14 +126,14 @@ if (ENABLE_OPEN_SRC) | |||||
| find_module(resource libresource.so ${ASCEND_ATC_DIR}) | find_module(resource libresource.so ${ASCEND_ATC_DIR}) | ||||
| find_module(error_manager liberror_manager.so ${ASCEND_ATC_DIR}) | find_module(error_manager liberror_manager.so ${ASCEND_ATC_DIR}) | ||||
| find_module(error_manager_static liberror_manager.a ${ASCEND_ACL_DIR}) | find_module(error_manager_static liberror_manager.a ${ASCEND_ACL_DIR}) | ||||
| find_module(msprofiler_fwk libmsprofiler_fwk.a ${ASCEND_ACL_DIR}) | |||||
| find_module(msprofiler_fwk libmsprofiler_fwk.a ${ASCEND_RUNTIME_DIR}) | |||||
| find_module(ascend_hal_stub libascend_hal.so ${ASCEND_DRIVER_DIR}/driver) | find_module(ascend_hal_stub libascend_hal.so ${ASCEND_DRIVER_DIR}/driver) | ||||
| #find_module(ascendcl_static libascendcl.a ${ASCEND_ACL_DIR}) | #find_module(ascendcl_static libascendcl.a ${ASCEND_ACL_DIR}) | ||||
| else() | else() | ||||
| message(STATUS "PLATFORM param is invalid, should be train or inference, you choose nothing!") | |||||
| message(STATUS "PLATFORM param is invalid, should be train or inference, you choose nothing!") | |||||
| endif() | endif() | ||||
| if (ENABLE_GE_COV OR ENABLE_GE_UT) | |||||
| if (ENABLE_GE_COV OR ENABLE_GE_UT) | |||||
| add_subdirectory(tests) | add_subdirectory(tests) | ||||
| endif() | endif() | ||||
| @@ -23,6 +23,7 @@ ExternalProject_Add(gflags_build | |||||
| URL ${REQ_URL} | URL ${REQ_URL} | ||||
| #URL /home/txd/workspace/linux_cmake/pkg/protobuf-3.8.0.tar.gz | #URL /home/txd/workspace/linux_cmake/pkg/protobuf-3.8.0.tar.gz | ||||
| #SOURCE_DIR ${GE_CODE_DIR}/../../third_party/gflags/src/gflags-2.2.2 | #SOURCE_DIR ${GE_CODE_DIR}/../../third_party/gflags/src/gflags-2.2.2 | ||||
| TLS_VERIFY OFF | |||||
| CONFIGURE_COMMAND ${CMAKE_COMMAND} -DCMAKE_CXX_FLAGS=${gflags_CXXFLAGS} -DCMAKE_INSTALL_PREFIX=${CMAKE_INSTALL_PREFIX}/gflags <SOURCE_DIR> | CONFIGURE_COMMAND ${CMAKE_COMMAND} -DCMAKE_CXX_FLAGS=${gflags_CXXFLAGS} -DCMAKE_INSTALL_PREFIX=${CMAKE_INSTALL_PREFIX}/gflags <SOURCE_DIR> | ||||
| BUILD_COMMAND $(MAKE) | BUILD_COMMAND $(MAKE) | ||||
| INSTALL_COMMAND $(MAKE) install | INSTALL_COMMAND $(MAKE) install | ||||
| @@ -10,7 +10,10 @@ if ((${CMAKE_INSTALL_PREFIX} STREQUAL /usr/local) OR | |||||
| message(STATUS "No install prefix selected, default to ${CMAKE_INSTALL_PREFIX}.") | message(STATUS "No install prefix selected, default to ${CMAKE_INSTALL_PREFIX}.") | ||||
| endif() | endif() | ||||
| if (ENABLE_GITEE) | |||||
| if (GE_PB_PKG) | |||||
| set(REQ_URL "${GE_PB_PKG}/libs/gtest/release-1.8.0.tar.gz") | |||||
| set(MD5 "") | |||||
| elseif (ENABLE_GITEE) | |||||
| set(REQ_URL "https://gitee.com/mirrors/googletest/repository/archive/release-1.8.0.tar.gz") | set(REQ_URL "https://gitee.com/mirrors/googletest/repository/archive/release-1.8.0.tar.gz") | ||||
| set(MD5 "") | set(MD5 "") | ||||
| else() | else() | ||||
| @@ -22,8 +25,9 @@ set (gtest_CXXFLAGS "-D_GLIBCXX_USE_CXX11_ABI=0 -D_FORTIFY_SOURCE=2 -O2 -fstack- | |||||
| set (gtest_CFLAGS "-D_GLIBCXX_USE_CXX11_ABI=0 -D_FORTIFY_SOURCE=2 -O2 -fstack-protector-all -Wl,-z,relro,-z,now,-z,noexecstack") | set (gtest_CFLAGS "-D_GLIBCXX_USE_CXX11_ABI=0 -D_FORTIFY_SOURCE=2 -O2 -fstack-protector-all -Wl,-z,relro,-z,now,-z,noexecstack") | ||||
| ExternalProject_Add(gtest_build | ExternalProject_Add(gtest_build | ||||
| URL ${REQ_URL} | URL ${REQ_URL} | ||||
| TLS_VERIFY OFF | |||||
| CONFIGURE_COMMAND ${CMAKE_COMMAND} -DCMAKE_CXX_FLAGS=${gtest_CXXFLAGS} -DCMAKE_INSTALL_PREFIX=${CMAKE_INSTALL_PREFIX}/gtest <SOURCE_DIR> | CONFIGURE_COMMAND ${CMAKE_COMMAND} -DCMAKE_CXX_FLAGS=${gtest_CXXFLAGS} -DCMAKE_INSTALL_PREFIX=${CMAKE_INSTALL_PREFIX}/gtest <SOURCE_DIR> | ||||
| -DBUILD_TESTING=OFF -DCMAKE_POSITION_INDEPENDENT_CODE=ON -DBUILD_SHARED_LIBS=ON -DCMAKE_MACOSX_RPATH=TRUE -Dgtest_disable_pthreads=ON | |||||
| -DBUILD_TESTING=OFF -DCMAKE_POSITION_INDEPENDENT_CODE=ON -DBUILD_SHARED_LIBS=ON -DCMAKE_MACOSX_RPATH=TRUE -Dgtest_disable_pthreads=ON | |||||
| BUILD_COMMAND $(MAKE) | BUILD_COMMAND $(MAKE) | ||||
| INSTALL_COMMAND $(MAKE) install | INSTALL_COMMAND $(MAKE) install | ||||
| EXCLUDE_FROM_ALL TRUE | EXCLUDE_FROM_ALL TRUE | ||||
| @@ -5,19 +5,24 @@ endif() | |||||
| include(ExternalProject) | include(ExternalProject) | ||||
| set(JSON_SRC_DIR ${CMAKE_BINARY_DIR}/opensrc/json/include) | set(JSON_SRC_DIR ${CMAKE_BINARY_DIR}/opensrc/json/include) | ||||
| #if (ENABLE_GITEE) | |||||
| if (GE_PB_PKG) | |||||
| set(REQ_URL "${GE_PB_PKG}/libs/ge_nlohmann_json/include.zip") | |||||
| set(MD5 "0dc903888211db3a0f170304cd9f3a89") | |||||
| set(JSON_INCLUDE_DIR ${JSON_SRC_DIR}) | |||||
| #elseif (ENABLE_GITEE) | |||||
| # set(REQ_URL "https://gitee.com/mirrors/JSON-for-Modern-CPP/repository/archive/v3.6.1.zip") | # set(REQ_URL "https://gitee.com/mirrors/JSON-for-Modern-CPP/repository/archive/v3.6.1.zip") | ||||
| # set(MD5 "5bda78ce308e6cfcf614dcf1d5ff27a7") | # set(MD5 "5bda78ce308e6cfcf614dcf1d5ff27a7") | ||||
| # set(JSON_INCLUDE_DIR "${JSON_SRC_DIR}/include") | |||||
| #else() | |||||
| set(REQ_URL "https://github.com/nlohmann/json/releases/download/v3.6.1/include.zip") | |||||
| set(MD5 "0dc903888211db3a0f170304cd9f3a89") | |||||
| set(JSON_INCLUDE_DIR ${JSON_SRC_DIR}) | |||||
| #endif () | |||||
| #set(JSON_INCLUDE_DIR "${JSON_SRC_DIR}/include") | |||||
| else() | |||||
| set(REQ_URL "https://github.com/nlohmann/json/releases/download/v3.6.1/include.zip") | |||||
| set(MD5 "0dc903888211db3a0f170304cd9f3a89") | |||||
| set(JSON_INCLUDE_DIR ${JSON_SRC_DIR}) | |||||
| endif () | |||||
| ExternalProject_Add(json_build | ExternalProject_Add(json_build | ||||
| URL ${REQ_URL} | URL ${REQ_URL} | ||||
| #URL /home/txd/workspace/cloud_code/pkg/include.zip | #URL /home/txd/workspace/cloud_code/pkg/include.zip | ||||
| SOURCE_DIR ${JSON_SRC_DIR} | SOURCE_DIR ${JSON_SRC_DIR} | ||||
| TLS_VERIFY OFF | |||||
| CONFIGURE_COMMAND "" | CONFIGURE_COMMAND "" | ||||
| BUILD_COMMAND "" | BUILD_COMMAND "" | ||||
| INSTALL_COMMAND "" | INSTALL_COMMAND "" | ||||
| @@ -6,7 +6,10 @@ set(ONNX_PROTO_DIR ${CMAKE_BINARY_DIR}/onnx) | |||||
| set(ONNX_PROTO_FILE ${ONNX_PROTO_DIR}/onnx.proto) | set(ONNX_PROTO_FILE ${ONNX_PROTO_DIR}/onnx.proto) | ||||
| file(MAKE_DIRECTORY ${ONNX_PROTO_DIR}) | file(MAKE_DIRECTORY ${ONNX_PROTO_DIR}) | ||||
| if (ENABLE_GITEE) | |||||
| if (GE_PB_PKG) | |||||
| set(REQ_URL "${GE_PB_PKG}/libs/onnx/onnx-1.6.0.tar.gz") | |||||
| set(MD5 "512f2779d6215d4a36f366b6b9acdf1e") | |||||
| elseif (ENABLE_GITEE) | |||||
| set(REQ_URL "https://gitee.com/mirrors/ONNX/repository/archive/v1.6.0.tar.gz") | set(REQ_URL "https://gitee.com/mirrors/ONNX/repository/archive/v1.6.0.tar.gz") | ||||
| set(MD5 "1bdbcecdd68ea8392630467646776e02") | set(MD5 "1bdbcecdd68ea8392630467646776e02") | ||||
| else() | else() | ||||
| @@ -19,6 +22,7 @@ ExternalProject_Add(onnx | |||||
| #URL /home/txd/workspace/cloud_code/pkg/onnx-1.6.0.tar.gz | #URL /home/txd/workspace/cloud_code/pkg/onnx-1.6.0.tar.gz | ||||
| #URL_HASH SHA256=3b88c3fe521151651a0403c4d131cb2e0311bd28b753ef692020a432a81ce345 | #URL_HASH SHA256=3b88c3fe521151651a0403c4d131cb2e0311bd28b753ef692020a432a81ce345 | ||||
| #SOURCE_DIR ${ONNX_SRC_DIR} | #SOURCE_DIR ${ONNX_SRC_DIR} | ||||
| TLS_VERIFY OFF | |||||
| CONFIGURE_COMMAND "" | CONFIGURE_COMMAND "" | ||||
| BUILD_COMMAND "" | BUILD_COMMAND "" | ||||
| #INSTALL_COMMAND "" | #INSTALL_COMMAND "" | ||||
| @@ -26,6 +26,7 @@ set(protobuf_CXXFLAGS "-Wno-maybe-uninitialized -Wno-unused-parameter -fPIC -fst | |||||
| set(protobuf_LDFLAGS "-Wl,-z,relro,-z,now,-z,noexecstack") | set(protobuf_LDFLAGS "-Wl,-z,relro,-z,now,-z,noexecstack") | ||||
| ExternalProject_Add(protobuf_build | ExternalProject_Add(protobuf_build | ||||
| URL ${REQ_URL} | URL ${REQ_URL} | ||||
| TLS_VERIFY OFF | |||||
| CONFIGURE_COMMAND ${CMAKE_COMMAND} | CONFIGURE_COMMAND ${CMAKE_COMMAND} | ||||
| -Dprotobuf_WITH_ZLIB=OFF | -Dprotobuf_WITH_ZLIB=OFF | ||||
| -DCMAKE_INSTALL_LIBDIR=${CMAKE_INSTALL_LIBDIR} | -DCMAKE_INSTALL_LIBDIR=${CMAKE_INSTALL_LIBDIR} | ||||
| @@ -27,6 +27,7 @@ ExternalProject_Add(protobuf_static_build | |||||
| URL ${REQ_URL} | URL ${REQ_URL} | ||||
| #URL /home/txd/workspace/linux_cmake/pkg/protobuf-3.8.0.tar.gz | #URL /home/txd/workspace/linux_cmake/pkg/protobuf-3.8.0.tar.gz | ||||
| #SOURCE_DIR ${METADEF_DIR}/../../third_party/protobuf/src/protobuf-3.8.0 | #SOURCE_DIR ${METADEF_DIR}/../../third_party/protobuf/src/protobuf-3.8.0 | ||||
| TLS_VERIFY OFF | |||||
| CONFIGURE_COMMAND ${CMAKE_COMMAND} | CONFIGURE_COMMAND ${CMAKE_COMMAND} | ||||
| -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} | -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} | ||||
| -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} | -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} | ||||
| @@ -1,115 +1,116 @@ | |||||
| if (HAVE_PROTOC) | |||||
| return() | |||||
| endif() | |||||
| include(ExternalProject) | |||||
| include(GNUInstallDirs) | |||||
| #set(CMAKE_INSTALL_PREFIX ${GE_CODE_DIR}/output) | |||||
| if ((${CMAKE_INSTALL_PREFIX} STREQUAL /usr/local) OR | |||||
| (${CMAKE_INSTALL_PREFIX} STREQUAL "C:/Program Files (x86)/ascend")) | |||||
| set(CMAKE_INSTALL_PREFIX ${GE_CODE_DIR}/output CACHE STRING "path for install()" FORCE) | |||||
| message(STATUS "No install prefix selected, default to ${CMAKE_INSTALL_PREFIX}.") | |||||
| endif() | |||||
| if(GE_PB_PKG) | |||||
| set(REQ_URL "${GE_PB_PKG}/libs/protobuf/v3.8.0.tar.gz") | |||||
| else() | |||||
| if (ENABLE_GITEE) | |||||
| set(REQ_URL "https://gitee.com/mirrors/protobuf_source/repository/archive/v3.8.0.tar.gz") | |||||
| set(MD5 "eba86ae9f07ba5cfbaf8af3bc4e84236") | |||||
| else() | |||||
| set(REQ_URL "https://github.com/protocolbuffers/protobuf/archive/v3.8.0.tar.gz") | |||||
| set(MD5 "3d9e32700639618a4d2d342c99d4507a") | |||||
| endif () | |||||
| endif() | |||||
| set(protobuf_CXXFLAGS "-Wno-maybe-uninitialized -Wno-unused-parameter -fPIC -fstack-protector-all -D_FORTIFY_SOURCE=2 -D_GLIBCXX_USE_CXX11_ABI=0 -O2") | |||||
| set(protobuf_LDFLAGS "-Wl,-z,relro,-z,now,-z,noexecstack") | |||||
| ExternalProject_Add(protoc_build | |||||
| URL ${REQ_URL} | |||||
| #URL /home/txd/workspace/linux_cmake/pkg/protobuf-3.8.0.tar.gz | |||||
| #SOURCE_DIR ${GE_CODE_DIR}/../third_party/protobuf/src/protobuf-3.8.0 | |||||
| CONFIGURE_COMMAND ${CMAKE_COMMAND} -Dprotobuf_WITH_ZLIB=OFF -Dprotobuf_BUILD_TESTS=OFF -DBUILD_SHARED_LIBS=OFF -DCMAKE_CXX_FLAGS=${protobuf_CXXFLAGS} -DCMAKE_CXX_LDFLAGS=${protobuf_LDFLAGS} -DCMAKE_INSTALL_PREFIX=${CMAKE_INSTALL_PREFIX}/protoc <SOURCE_DIR>/cmake | |||||
| BUILD_COMMAND $(MAKE) | |||||
| INSTALL_COMMAND $(MAKE) install | |||||
| EXCLUDE_FROM_ALL TRUE | |||||
| ) | |||||
| set(PROTOC_PKG_DIR ${CMAKE_INSTALL_PREFIX}/protoc) | |||||
| set(protoc_EXECUTABLE ${PROTOC_PKG_DIR}/${CMAKE_INSTALL_BINDIR}/protoc) | |||||
| function(protobuf_generate comp c_var h_var) | |||||
| if(NOT ARGN) | |||||
| message(SEND_ERROR "Error: protobuf_generate() called without any proto files") | |||||
| return() | |||||
| endif() | |||||
| set(${c_var}) | |||||
| set(${h_var}) | |||||
| foreach(file ${ARGN}) | |||||
| get_filename_component(abs_file ${file} ABSOLUTE) | |||||
| get_filename_component(file_name ${file} NAME_WE) | |||||
| get_filename_component(file_dir ${abs_file} PATH) | |||||
| get_filename_component(parent_subdir ${file_dir} NAME) | |||||
| if("${parent_subdir}" STREQUAL "proto") | |||||
| set(proto_output_path ${CMAKE_BINARY_DIR}/proto/${comp}/proto) | |||||
| else() | |||||
| set(proto_output_path ${CMAKE_BINARY_DIR}/proto/${comp}/proto/${parent_subdir}) | |||||
| endif() | |||||
| list(APPEND ${c_var} "${proto_output_path}/${file_name}.pb.cc") | |||||
| list(APPEND ${h_var} "${proto_output_path}/${file_name}.pb.h") | |||||
| add_custom_command( | |||||
| OUTPUT "${proto_output_path}/${file_name}.pb.cc" "${proto_output_path}/${file_name}.pb.h" | |||||
| WORKING_DIRECTORY ${PROJECT_SOURCE_DIR} | |||||
| COMMAND ${CMAKE_COMMAND} -E make_directory "${proto_output_path}" | |||||
| COMMAND ${protoc_EXECUTABLE} -I${file_dir} --cpp_out=${proto_output_path} ${abs_file} | |||||
| DEPENDS protoc_build ${abs_file} | |||||
| COMMENT "Running C++ protocol buffer compiler on ${file}" VERBATIM ) | |||||
| endforeach() | |||||
| set_source_files_properties(${${c_var}} ${${h_var}} PROPERTIES GENERATED TRUE) | |||||
| set(${c_var} ${${c_var}} PARENT_SCOPE) | |||||
| set(${h_var} ${${h_var}} PARENT_SCOPE) | |||||
| endfunction() | |||||
| function(protobuf_generate_py comp py_var) | |||||
| if(NOT ARGN) | |||||
| message(SEND_ERROR "Error: protobuf_generate_py() called without any proto files") | |||||
| return() | |||||
| endif() | |||||
| set(${py_var}) | |||||
| foreach(file ${ARGN}) | |||||
| get_filename_component(abs_file ${file} ABSOLUTE) | |||||
| get_filename_component(file_name ${file} NAME_WE) | |||||
| get_filename_component(file_dir ${abs_file} PATH) | |||||
| get_filename_component(parent_subdir ${file_dir} NAME) | |||||
| if("${parent_subdir}" STREQUAL "proto") | |||||
| set(proto_output_path ${CMAKE_BINARY_DIR}/proto/${comp}/proto) | |||||
| else() | |||||
| set(proto_output_path ${CMAKE_BINARY_DIR}/proto/${comp}/proto/${parent_subdir}) | |||||
| endif() | |||||
| list(APPEND ${py_var} "${proto_output_path}/${file_name}_pb2.py") | |||||
| add_custom_command( | |||||
| OUTPUT "${proto_output_path}/${file_name}_pb2.py" | |||||
| WORKING_DIRECTORY ${PROJECT_SOURCE_DIR} | |||||
| COMMAND ${CMAKE_COMMAND} -E make_directory "${proto_output_path}" | |||||
| COMMAND ${protoc_EXECUTABLE} -I${file_dir} --python_out=${proto_output_path} ${abs_file} | |||||
| DEPENDS protoc_build ${abs_file} | |||||
| COMMENT "Running PYTHON protocol buffer compiler on ${file}" VERBATIM ) | |||||
| endforeach() | |||||
| set_source_files_properties(${${py_var}} PROPERTIES GENERATED TRUE) | |||||
| set(${py_var} ${${py_var}} PARENT_SCOPE) | |||||
| endfunction() | |||||
| #set(HAVE_PROTOC TRUE CACHE BOOL "protoc build add") | |||||
| set(HAVE_PROTOC TRUE) | |||||
| if (HAVE_PROTOC) | |||||
| return() | |||||
| endif() | |||||
| include(ExternalProject) | |||||
| include(GNUInstallDirs) | |||||
| #set(CMAKE_INSTALL_PREFIX ${GE_CODE_DIR}/output) | |||||
| if ((${CMAKE_INSTALL_PREFIX} STREQUAL /usr/local) OR | |||||
| (${CMAKE_INSTALL_PREFIX} STREQUAL "C:/Program Files (x86)/ascend")) | |||||
| set(CMAKE_INSTALL_PREFIX ${GE_CODE_DIR}/output CACHE STRING "path for install()" FORCE) | |||||
| message(STATUS "No install prefix selected, default to ${CMAKE_INSTALL_PREFIX}.") | |||||
| endif() | |||||
| if(GE_PB_PKG) | |||||
| set(REQ_URL "${GE_PB_PKG}/libs/protobuf/v3.8.0.tar.gz") | |||||
| else() | |||||
| if (ENABLE_GITEE) | |||||
| set(REQ_URL "https://gitee.com/mirrors/protobuf_source/repository/archive/v3.8.0.tar.gz") | |||||
| set(MD5 "eba86ae9f07ba5cfbaf8af3bc4e84236") | |||||
| else() | |||||
| set(REQ_URL "https://github.com/protocolbuffers/protobuf/archive/v3.8.0.tar.gz") | |||||
| set(MD5 "3d9e32700639618a4d2d342c99d4507a") | |||||
| endif () | |||||
| endif() | |||||
| set(protobuf_CXXFLAGS "-Wno-maybe-uninitialized -Wno-unused-parameter -fPIC -fstack-protector-all -D_FORTIFY_SOURCE=2 -D_GLIBCXX_USE_CXX11_ABI=0 -O2") | |||||
| set(protobuf_LDFLAGS "-Wl,-z,relro,-z,now,-z,noexecstack") | |||||
| ExternalProject_Add(protoc_build | |||||
| URL ${REQ_URL} | |||||
| #URL /home/txd/workspace/linux_cmake/pkg/protobuf-3.8.0.tar.gz | |||||
| #SOURCE_DIR ${GE_CODE_DIR}/../third_party/protobuf/src/protobuf-3.8.0 | |||||
| TLS_VERIFY OFF | |||||
| CONFIGURE_COMMAND ${CMAKE_COMMAND} -Dprotobuf_WITH_ZLIB=OFF -Dprotobuf_BUILD_TESTS=OFF -DBUILD_SHARED_LIBS=OFF -DCMAKE_CXX_FLAGS=${protobuf_CXXFLAGS} -DCMAKE_CXX_LDFLAGS=${protobuf_LDFLAGS} -DCMAKE_INSTALL_PREFIX=${CMAKE_INSTALL_PREFIX}/protoc <SOURCE_DIR>/cmake | |||||
| BUILD_COMMAND $(MAKE) | |||||
| INSTALL_COMMAND $(MAKE) install | |||||
| EXCLUDE_FROM_ALL TRUE | |||||
| ) | |||||
| set(PROTOC_PKG_DIR ${CMAKE_INSTALL_PREFIX}/protoc) | |||||
| set(protoc_EXECUTABLE ${PROTOC_PKG_DIR}/${CMAKE_INSTALL_BINDIR}/protoc) | |||||
| function(protobuf_generate comp c_var h_var) | |||||
| if(NOT ARGN) | |||||
| message(SEND_ERROR "Error: protobuf_generate() called without any proto files") | |||||
| return() | |||||
| endif() | |||||
| set(${c_var}) | |||||
| set(${h_var}) | |||||
| foreach(file ${ARGN}) | |||||
| get_filename_component(abs_file ${file} ABSOLUTE) | |||||
| get_filename_component(file_name ${file} NAME_WE) | |||||
| get_filename_component(file_dir ${abs_file} PATH) | |||||
| get_filename_component(parent_subdir ${file_dir} NAME) | |||||
| if("${parent_subdir}" STREQUAL "proto") | |||||
| set(proto_output_path ${CMAKE_BINARY_DIR}/proto/${comp}/proto) | |||||
| else() | |||||
| set(proto_output_path ${CMAKE_BINARY_DIR}/proto/${comp}/proto/${parent_subdir}) | |||||
| endif() | |||||
| list(APPEND ${c_var} "${proto_output_path}/${file_name}.pb.cc") | |||||
| list(APPEND ${h_var} "${proto_output_path}/${file_name}.pb.h") | |||||
| add_custom_command( | |||||
| OUTPUT "${proto_output_path}/${file_name}.pb.cc" "${proto_output_path}/${file_name}.pb.h" | |||||
| WORKING_DIRECTORY ${PROJECT_SOURCE_DIR} | |||||
| COMMAND ${CMAKE_COMMAND} -E make_directory "${proto_output_path}" | |||||
| COMMAND ${protoc_EXECUTABLE} -I${file_dir} --cpp_out=${proto_output_path} ${abs_file} | |||||
| DEPENDS protoc_build ${abs_file} | |||||
| COMMENT "Running C++ protocol buffer compiler on ${file}" VERBATIM ) | |||||
| endforeach() | |||||
| set_source_files_properties(${${c_var}} ${${h_var}} PROPERTIES GENERATED TRUE) | |||||
| set(${c_var} ${${c_var}} PARENT_SCOPE) | |||||
| set(${h_var} ${${h_var}} PARENT_SCOPE) | |||||
| endfunction() | |||||
| function(protobuf_generate_py comp py_var) | |||||
| if(NOT ARGN) | |||||
| message(SEND_ERROR "Error: protobuf_generate_py() called without any proto files") | |||||
| return() | |||||
| endif() | |||||
| set(${py_var}) | |||||
| foreach(file ${ARGN}) | |||||
| get_filename_component(abs_file ${file} ABSOLUTE) | |||||
| get_filename_component(file_name ${file} NAME_WE) | |||||
| get_filename_component(file_dir ${abs_file} PATH) | |||||
| get_filename_component(parent_subdir ${file_dir} NAME) | |||||
| if("${parent_subdir}" STREQUAL "proto") | |||||
| set(proto_output_path ${CMAKE_BINARY_DIR}/proto/${comp}/proto) | |||||
| else() | |||||
| set(proto_output_path ${CMAKE_BINARY_DIR}/proto/${comp}/proto/${parent_subdir}) | |||||
| endif() | |||||
| list(APPEND ${py_var} "${proto_output_path}/${file_name}_pb2.py") | |||||
| add_custom_command( | |||||
| OUTPUT "${proto_output_path}/${file_name}_pb2.py" | |||||
| WORKING_DIRECTORY ${PROJECT_SOURCE_DIR} | |||||
| COMMAND ${CMAKE_COMMAND} -E make_directory "${proto_output_path}" | |||||
| COMMAND ${protoc_EXECUTABLE} -I${file_dir} --python_out=${proto_output_path} ${abs_file} | |||||
| DEPENDS protoc_build ${abs_file} | |||||
| COMMENT "Running PYTHON protocol buffer compiler on ${file}" VERBATIM ) | |||||
| endforeach() | |||||
| set_source_files_properties(${${py_var}} PROPERTIES GENERATED TRUE) | |||||
| set(${py_var} ${${py_var}} PARENT_SCOPE) | |||||
| endfunction() | |||||
| #set(HAVE_PROTOC TRUE CACHE BOOL "protoc build add") | |||||
| set(HAVE_PROTOC TRUE) | |||||
| @@ -10,11 +10,20 @@ if ((${CMAKE_INSTALL_PREFIX} STREQUAL /usr/local) OR | |||||
| message(STATUS "No install prefix selected, default to ${CMAKE_INSTALL_PREFIX}.") | message(STATUS "No install prefix selected, default to ${CMAKE_INSTALL_PREFIX}.") | ||||
| endif() | endif() | ||||
| if (GE_PB_PKG) | |||||
| set(REQ_URL "${GE_PB_PKG}/libs/securec/v1.1.10.tar.gz") | |||||
| set(MD5 "") | |||||
| else() | |||||
| set(REQ_URL "https://gitee.com/openeuler/libboundscheck/repository/archive/v1.1.10.tar.gz") | |||||
| set(MD5 "") | |||||
| endif () | |||||
| ExternalProject_Add(c_sec_build | ExternalProject_Add(c_sec_build | ||||
| URL https://gitee.com/openeuler/libboundscheck/repository/archive/v1.1.10.tar.gz | |||||
| #URL /home/txd/workspace/linux_cmake/pkg/protobuf-3.8.0.tar.gz | |||||
| URL ${REQ_URL} | |||||
| #URL https://gitee.com/openeuler/libboundscheck/repository/archive/v1.1.10.tar.gz | |||||
| #SOURCE_DIR ${GE_CODE_DIR}/../libc_sec | #SOURCE_DIR ${GE_CODE_DIR}/../libc_sec | ||||
| PATCH_COMMAND patch -p1 < ${GE_CODE_DIR}/metadef/third_party/patch/securec/0001-add-securec-cmake-script.patch | PATCH_COMMAND patch -p1 < ${GE_CODE_DIR}/metadef/third_party/patch/securec/0001-add-securec-cmake-script.patch | ||||
| TLS_VERIFY OFF | |||||
| CONFIGURE_COMMAND ${CMAKE_COMMAND} | CONFIGURE_COMMAND ${CMAKE_COMMAND} | ||||
| -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} | -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} | ||||
| -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} | -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} | ||||
| @@ -605,7 +605,7 @@ set(INFER_SRC_LIST | |||||
| if (NOT ENABLE_D AND NOT ENABLE_ACL AND NOT ENABLE_MS_TESTCASES) | if (NOT ENABLE_D AND NOT ENABLE_ACL AND NOT ENABLE_MS_TESTCASES) | ||||
| ############ libge_runner.so ############ | ############ libge_runner.so ############ | ||||
| add_library(ge_runner SHARED ${TRAIN_SRC_LIST} ${PROTO_SRCS} ${PROTO_CLIENT_SRCS} $<TARGET_OBJECTS:msprofiler_fwk>) | |||||
| add_library(ge_runner SHARED ${TRAIN_SRC_LIST} ${PROTO_SRCS} ${PROTO_CLIENT_SRCS}) | |||||
| target_compile_definitions(ge_runner PRIVATE | target_compile_definitions(ge_runner PRIVATE | ||||
| PROTOBUF_INLINE_NOT_IN_HEADERS=0 | PROTOBUF_INLINE_NOT_IN_HEADERS=0 | ||||
| @@ -646,11 +646,14 @@ target_include_directories(ge_runner PRIVATE | |||||
| ${GE_CODE_DIR}/third_party/fwkacllib/inc/toolchain | ${GE_CODE_DIR}/third_party/fwkacllib/inc/toolchain | ||||
| ) | ) | ||||
| target_link_libraries(ge_runner | |||||
| target_link_libraries(ge_runner PRIVATE | |||||
| $<BUILD_INTERFACE:intf_pub> | $<BUILD_INTERFACE:intf_pub> | ||||
| ge_memory | ge_memory | ||||
| adump_server | adump_server | ||||
| static_mmpa | static_mmpa | ||||
| -Wl,--whole-archive | |||||
| msprofiler_fwk | |||||
| -Wl,--no-whole-archive | |||||
| -Wl,--no-as-needed | -Wl,--no-as-needed | ||||
| graph | graph | ||||
| ge_common | ge_common | ||||
| @@ -710,7 +713,7 @@ target_include_directories(ge_compiler PRIVATE | |||||
| ${GE_CODE_DIR}/third_party/fwkacllib/inc/toolchain | ${GE_CODE_DIR}/third_party/fwkacllib/inc/toolchain | ||||
| ) | ) | ||||
| target_link_libraries(ge_compiler | |||||
| target_link_libraries(ge_compiler PRIVATE | |||||
| $<BUILD_INTERFACE:intf_pub> | $<BUILD_INTERFACE:intf_pub> | ||||
| ge_memory | ge_memory | ||||
| static_mmpa | static_mmpa | ||||
| @@ -764,7 +767,14 @@ target_link_options(opensrc_ascendcl PRIVATE | |||||
| -Wl,--allow-multiple-definition | -Wl,--allow-multiple-definition | ||||
| -Wl,-z,muldefs | -Wl,-z,muldefs | ||||
| -Wl,-Bsymbolic | -Wl,-Bsymbolic | ||||
| -Wl,--exclude-libs,ALL | |||||
| -Wl,--exclude-libs,libascend_protobuf.a | |||||
| -Wl,--exclude-libs,libge_executor.a | |||||
| -Wl,--exclude-libs,libge_common.a | |||||
| -Wl,--exclude-libs,libgraph.a | |||||
| -Wl,--exclude-libs,libmmpa.a | |||||
| -Wl,--exclude-libs,libregister.a | |||||
| -Wl,--exclude-libs,liberror_manager.a | |||||
| -Wl,--exclude-libs,libadump_server.a | |||||
| ) | ) | ||||
| target_link_libraries(opensrc_ascendcl PRIVATE | target_link_libraries(opensrc_ascendcl PRIVATE | ||||
| -Wl,--whole-archive | -Wl,--whole-archive | ||||
| @@ -143,6 +143,9 @@ ge::Status ProfilingManager::ParseOptions(const std::string &options) { | |||||
| } | } | ||||
| try { | try { | ||||
| Json prof_options = Json::parse(options); | Json prof_options = Json::parse(options); | ||||
| if (options.find(kTrainingTrace) == std::string::npos) { | |||||
| return ge::SUCCESS; | |||||
| } | |||||
| const std::string training_trace = prof_options[kTrainingTrace]; | const std::string training_trace = prof_options[kTrainingTrace]; | ||||
| if (training_trace.empty()) { | if (training_trace.empty()) { | ||||
| GELOGI("Training trace will not take effect."); | GELOGI("Training trace will not take effect."); | ||||
| @@ -2991,19 +2991,19 @@ Status DavinciModel::CreateKnownZeroCopyMap(const vector<void *> &inputs, const | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| Status DavinciModel::UpdateKnownZeroCopyAddr() { | |||||
| for (size_t i = 0; i < total_io_addrs_.size(); ++i) { | |||||
| auto it_in = knonw_input_data_info_.find(total_io_addrs_[i]); | |||||
| Status DavinciModel::UpdateKnownZeroCopyAddr(vector<void *> &total_io_addrs) { | |||||
| for (size_t i = 0; i < total_io_addrs.size(); ++i) { | |||||
| auto it_in = knonw_input_data_info_.find(total_io_addrs[i]); | |||||
| if (it_in != knonw_input_data_info_.end()) { | if (it_in != knonw_input_data_info_.end()) { | ||||
| GELOGI("DavinciModel::UpdateKnownZeroCopyAddr input %zu,v addr %p,p addr %p .", i, total_io_addrs_[i], | |||||
| knonw_input_data_info_.at(total_io_addrs_[i])); | |||||
| total_io_addrs_[i] = knonw_input_data_info_.at(total_io_addrs_[i]); | |||||
| GELOGI("DavinciModel::UpdateKnownZeroCopyAddr input %zu,v addr %p,p addr %p .", i, total_io_addrs[i], | |||||
| knonw_input_data_info_.at(total_io_addrs[i])); | |||||
| total_io_addrs[i] = knonw_input_data_info_.at(total_io_addrs[i]); | |||||
| } | } | ||||
| auto it_out = knonw_output_data_info_.find(total_io_addrs_[i]); | |||||
| auto it_out = knonw_output_data_info_.find(total_io_addrs[i]); | |||||
| if (it_out != knonw_output_data_info_.end()) { | if (it_out != knonw_output_data_info_.end()) { | ||||
| GELOGI("DavinciModel::UpdateKnownZeroCopyAddr output %zu,v addr %p,p addr %p .", i, total_io_addrs_[i], | |||||
| knonw_output_data_info_.at(total_io_addrs_[i])); | |||||
| total_io_addrs_[i] = knonw_output_data_info_.at(total_io_addrs_[i]); | |||||
| GELOGI("DavinciModel::UpdateKnownZeroCopyAddr output %zu,v addr %p,p addr %p .", i, total_io_addrs[i], | |||||
| knonw_output_data_info_.at(total_io_addrs[i])); | |||||
| total_io_addrs[i] = knonw_output_data_info_.at(total_io_addrs[i]); | |||||
| } | } | ||||
| } | } | ||||
| GELOGI("DavinciModel::UpdateKnownZeroCopyAddr success."); | GELOGI("DavinciModel::UpdateKnownZeroCopyAddr success."); | ||||
| @@ -3032,7 +3032,7 @@ Status DavinciModel::UpdateKnownNodeArgs(const vector<void *> &inputs, const vec | |||||
| } else { | } else { | ||||
| total_io_addrs_ = orig_total_io_addrs_; | total_io_addrs_ = orig_total_io_addrs_; | ||||
| } | } | ||||
| GE_CHK_STATUS_RET(UpdateKnownZeroCopyAddr(), "DavinciModel::UpdateKnownZeroCopyAddr failed."); | |||||
| GE_CHK_STATUS_RET(UpdateKnownZeroCopyAddr(total_io_addrs_), "DavinciModel::UpdateKnownZeroCopyAddr failed."); | |||||
| if (total_args_size_ == 0) { | if (total_args_size_ == 0) { | ||||
| GELOGW("DavinciModel::UpdateKnownNodeArgs device args %p, dst size %u, pass rtMemcpy.", args_, total_args_size_); | GELOGW("DavinciModel::UpdateKnownNodeArgs device args %p, dst size %u, pass rtMemcpy.", args_, total_args_size_); | ||||
| @@ -3099,7 +3099,14 @@ Status DavinciModel::MallocKnownArgs() { | |||||
| GELOGE(RT_FAILED, "Call rtMalloc failed, ret: 0x%X", rt_ret); | GELOGE(RT_FAILED, "Call rtMalloc failed, ret: 0x%X", rt_ret); | ||||
| return RT_ERROR_TO_GE_STATUS(rt_ret); | return RT_ERROR_TO_GE_STATUS(rt_ret); | ||||
| } | } | ||||
| // malloc dynamic and static hybrid memory | |||||
| if (total_hybrid_args_size_ != 0) { | |||||
| rt_ret = rtMalloc(&hybrid_addrs_, total_hybrid_args_size_, RT_MEMORY_HBM); | |||||
| if (rt_ret != RT_ERROR_NONE) { | |||||
| GELOGE(RT_FAILED, "Call rtMalloc failed, ret: 0x%X", rt_ret); | |||||
| return RT_ERROR_TO_GE_STATUS(rt_ret); | |||||
| } | |||||
| } | |||||
| // malloc fixed addr memory, eg: rts op | // malloc fixed addr memory, eg: rts op | ||||
| if (total_fixed_addr_size_ != 0) { | if (total_fixed_addr_size_ != 0) { | ||||
| GELOGI("Begin to allocate fixed addr."); | GELOGI("Begin to allocate fixed addr."); | ||||
| @@ -476,6 +476,14 @@ class DavinciModel { | |||||
| void SetTotalIOAddrs(vector<void *> &io_addrs) { | void SetTotalIOAddrs(vector<void *> &io_addrs) { | ||||
| total_io_addrs_.insert(total_io_addrs_.end(), io_addrs.begin(), io_addrs.end()); | total_io_addrs_.insert(total_io_addrs_.end(), io_addrs.begin(), io_addrs.end()); | ||||
| } | } | ||||
| void SetHybridArgsSize(uint32_t args_size) { total_hybrid_args_size_ += args_size; } | |||||
| uint32_t GetHybridArgsSize() { | |||||
| return total_hybrid_args_size_; | |||||
| } | |||||
| void *GetCurrentHybridArgsAddr(uint32_t offset) { | |||||
| void *cur_args = static_cast<char *>(hybrid_addrs_) + offset; | |||||
| return cur_args; | |||||
| } | |||||
| void SetTotalFixedAddrsSize(string tensor_name, int64_t fix_addr_size); | void SetTotalFixedAddrsSize(string tensor_name, int64_t fix_addr_size); | ||||
| int64_t GetFixedAddrsSize(string tensor_name); | int64_t GetFixedAddrsSize(string tensor_name); | ||||
| void *GetCurrentFixedAddr(int64_t offset) const { | void *GetCurrentFixedAddr(int64_t offset) const { | ||||
| @@ -494,7 +502,7 @@ class DavinciModel { | |||||
| Status MallocKnownArgs(); | Status MallocKnownArgs(); | ||||
| Status UpdateKnownNodeArgs(const vector<void *> &inputs, const vector<void *> &outputs); | Status UpdateKnownNodeArgs(const vector<void *> &inputs, const vector<void *> &outputs); | ||||
| Status CreateKnownZeroCopyMap(const vector<void *> &inputs, const vector<void *> &outputs); | Status CreateKnownZeroCopyMap(const vector<void *> &inputs, const vector<void *> &outputs); | ||||
| Status UpdateKnownZeroCopyAddr(); | |||||
| Status UpdateKnownZeroCopyAddr(vector<void *> &total_io_addrs); | |||||
| void SetKnownNodeAddrNotChanged(bool base_addr_not_changed) { base_addr_not_changed_ = base_addr_not_changed; } | void SetKnownNodeAddrNotChanged(bool base_addr_not_changed) { base_addr_not_changed_ = base_addr_not_changed; } | ||||
| Status GetOrigInputInfo(uint32_t index, OriginInputInfo &orig_input_info); | Status GetOrigInputInfo(uint32_t index, OriginInputInfo &orig_input_info); | ||||
| @@ -977,6 +985,8 @@ class DavinciModel { | |||||
| void *args_ = nullptr; | void *args_ = nullptr; | ||||
| void *args_host_ = nullptr; | void *args_host_ = nullptr; | ||||
| void *fixed_addrs_ = nullptr; | void *fixed_addrs_ = nullptr; | ||||
| void *hybrid_addrs_ = nullptr; | |||||
| uint32_t total_hybrid_args_size_ = 0; | |||||
| int64_t total_fixed_addr_size_ = 0; | int64_t total_fixed_addr_size_ = 0; | ||||
| std::map<const void *, void *> knonw_input_data_info_; | std::map<const void *, void *> knonw_input_data_info_; | ||||
| std::map<const void *, void *> knonw_output_data_info_; | std::map<const void *, void *> knonw_output_data_info_; | ||||
| @@ -1055,7 +1055,16 @@ Status ModelManager::LoadModelOffline(uint32_t &model_id, const ModelData &model | |||||
| mmTimespec timespec = mmGetTickCount(); | mmTimespec timespec = mmGetTickCount(); | ||||
| ModelHelper model_helper; | ModelHelper model_helper; | ||||
| Status ret = model_helper.LoadModel(model); | |||||
| Status ret = model_helper.LoadRootModel(model); | |||||
| if (model_helper.GetModelType()) { | |||||
| bool is_shape_unknown = false; | |||||
| GE_CHK_STATUS_RET(model_helper.GetGeRootModel()->CheckIsUnknownShape(is_shape_unknown), | |||||
| "CheckIsUnknownShape failed, model id:%u", | |||||
| model_id); | |||||
| if (is_shape_unknown || GetContext().GetHostExecFlag()) { | |||||
| return DoLoadHybridModelOnline(model_id, model_helper.GetGeRootModel(), listener); | |||||
| } | |||||
| } | |||||
| if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
| GELOGE(ret, "load model failed."); | GELOGE(ret, "load model failed."); | ||||
| return ret; | return ret; | ||||
| @@ -1214,7 +1223,7 @@ Status ModelManager::ExecuteModel(uint32_t model_id, rtStream_t stream, bool asy | |||||
| std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id); | std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id); | ||||
| GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, ACL_ERROR_GE_EXEC_MODEL_ID_INVALID, | GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, ACL_ERROR_GE_EXEC_MODEL_ID_INVALID, | ||||
| "Invalid model id %u, check weather model has been loaded or not.", model_id); | |||||
| "Invalid model id %u, check whether model has been loaded or not.", model_id); | |||||
| if (davinci_model->NeedDestroyAicpuKernel()) { | if (davinci_model->NeedDestroyAicpuKernel()) { | ||||
| GELOGI("Start to destroy specified aicpu kernel."); | GELOGI("Start to destroy specified aicpu kernel."); | ||||
| @@ -372,7 +372,11 @@ Status KernelTaskInfo::SuperKernelDistribute() { | |||||
| Status KernelTaskInfo::Distribute() { | Status KernelTaskInfo::Distribute() { | ||||
| GELOGD("KernelTaskInfo Distribute Start."); | GELOGD("KernelTaskInfo Distribute Start."); | ||||
| if (davinci_model_->IsKnownNode()) { | if (davinci_model_->IsKnownNode()) { | ||||
| args_ = davinci_model_->GetCurrentArgsAddr(args_offset_); | |||||
| if (kernel_type_ == ccKernelType::TE) { | |||||
| args_ = davinci_model_->GetCurrentArgsAddr(args_offset_); | |||||
| } else if (kernel_type_ == ccKernelType::AI_CPU || kernel_type_ == ccKernelType::CUST_AI_CPU) { | |||||
| args_ = davinci_model_->GetCurrentHybridArgsAddr(hybrid_args_offset_); | |||||
| } | |||||
| GELOGI("Known node %s args addr %p, offset %u.", op_desc_->GetName().c_str(), args_, args_offset_); | GELOGI("Known node %s args addr %p, offset %u.", op_desc_->GetName().c_str(), args_, args_offset_); | ||||
| } | } | ||||
| rtError_t rt_ret = RT_ERROR_NONE; | rtError_t rt_ret = RT_ERROR_NONE; | ||||
| @@ -428,36 +432,31 @@ Status KernelTaskInfo::UpdateArgs() { | |||||
| const RuntimeParam &rts_param = davinci_model_->GetRuntimeParam(); | const RuntimeParam &rts_param = davinci_model_->GetRuntimeParam(); | ||||
| vector<void *> input_data_addrs = ModelUtils::GetInputDataAddrs(rts_param, op_desc_); | vector<void *> input_data_addrs = ModelUtils::GetInputDataAddrs(rts_param, op_desc_); | ||||
| vector<void *> output_data_addrs = ModelUtils::GetOutputDataAddrs(rts_param, op_desc_); | vector<void *> output_data_addrs = ModelUtils::GetOutputDataAddrs(rts_param, op_desc_); | ||||
| vector<void *> workspace_data_addrs = ModelUtils::GetWorkspaceDataAddrs(rts_param, op_desc_); | |||||
| vector<void *> io_addrs; | vector<void *> io_addrs; | ||||
| if (!op_desc_->HasAttr(ATTR_DYNAMIC_SHAPE_FIXED_ADDR)) { | |||||
| io_addrs.insert(io_addrs.end(), input_data_addrs.begin(), input_data_addrs.end()); | |||||
| io_addrs.insert(io_addrs.end(), output_data_addrs.begin(), output_data_addrs.end()); | |||||
| io_addrs.insert(io_addrs.end(), input_data_addrs.begin(), input_data_addrs.end()); | |||||
| io_addrs.insert(io_addrs.end(), output_data_addrs.begin(), output_data_addrs.end()); | |||||
| if (kernel_type_ == ccKernelType::TE) { | |||||
| vector<void *> workspace_data_addrs = ModelUtils::GetWorkspaceDataAddrs(rts_param, op_desc_); | |||||
| io_addrs.insert(io_addrs.end(), workspace_data_addrs.begin(), workspace_data_addrs.end()); | io_addrs.insert(io_addrs.end(), workspace_data_addrs.begin(), workspace_data_addrs.end()); | ||||
| } else { | |||||
| string peer_input_name; | |||||
| if (AttrUtils::GetStr(op_desc_, ATTR_DYNAMIC_SHAPE_FIXED_ADDR, peer_input_name)) { | |||||
| uint32_t output_index = davinci_model_->GetFixedAddrOutputIndex(peer_input_name); | |||||
| if (output_index > output_data_addrs.size()) { | |||||
| GELOGE(FAILED, "The output data addr size[%zu] and output index[%u] are inconsistent.", | |||||
| output_data_addrs.size(), output_index); | |||||
| return FAILED; | |||||
| } | |||||
| io_addrs.insert(io_addrs.end(), input_data_addrs.begin(), input_data_addrs.end()); | |||||
| for (size_t i = 0; i < output_data_addrs.size(); ++i) { | |||||
| if (i == output_index) { | |||||
| void *fixed_addr = davinci_model_->GetCurrentFixedAddr(fixed_addr_offset_); | |||||
| io_addrs.emplace_back(fixed_addr); | |||||
| continue; | |||||
| } | |||||
| io_addrs.emplace_back(output_data_addrs[i]); | |||||
| } | |||||
| io_addrs.insert(io_addrs.end(), workspace_data_addrs.begin(), workspace_data_addrs.end()); | |||||
| davinci_model_->SetTotalIOAddrs(io_addrs); | |||||
| } else if (kernel_type_ == ccKernelType::AI_CPU || kernel_type_ == ccKernelType::CUST_AI_CPU) { | |||||
| davinci_model_->UpdateKnownZeroCopyAddr(io_addrs); | |||||
| uintptr_t io_addr = reinterpret_cast<uintptr_t>(args_addr.get()) + sizeof(aicpu::AicpuParamHead); | |||||
| auto addrs_size = sizeof(uint64_t) * io_addrs.size(); | |||||
| errno_t sec_ret = memcpy_s(reinterpret_cast<void *>(io_addr), addrs_size, io_addrs.data(), addrs_size); | |||||
| if (sec_ret != EOK) { | |||||
| GELOGE(FAILED, "memcpy failed, ret: %d", sec_ret); | |||||
| return FAILED; | |||||
| } | |||||
| // copy args to device | |||||
| rtError_t rt_ret = rtMemcpy(args_, args_size_, args_addr.get(), args_size_, RT_MEMCPY_HOST_TO_DEVICE); | |||||
| if (rt_ret != RT_ERROR_NONE) { | |||||
| GELOGE(RT_FAILED, "Call rt api(rtMemcpy) failed, ret: 0x%X", rt_ret); | |||||
| return RT_ERROR_TO_GE_STATUS(rt_ret); | |||||
| } | } | ||||
| } | } | ||||
| davinci_model_->SetTotalIOAddrs(io_addrs); | |||||
| GELOGI("KernelTaskInfo::UpdateArgs success."); | GELOGI("KernelTaskInfo::UpdateArgs success."); | ||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| @@ -533,33 +532,18 @@ Status KernelTaskInfo::UpdateL2Data(const domi::KernelDef &kernel_def) { | |||||
| } | } | ||||
| Status KernelTaskInfo::CalculateArgs(const domi::TaskDef &task_def, DavinciModel *davinci_model) { | Status KernelTaskInfo::CalculateArgs(const domi::TaskDef &task_def, DavinciModel *davinci_model) { | ||||
| domi::KernelDef kernel_def = task_def.kernel(); | |||||
| uint32_t args_size = kernel_def.args_size(); | |||||
| args_offset_ = davinci_model->GetTotalArgsSize(); | |||||
| davinci_model->SetTotalArgsSize(args_size); | |||||
| GELOGI("kernel task name , args_size %u, args_offset %u", args_size, args_offset_); | |||||
| // get opcontext stored in model | |||||
| const domi::KernelDef &kernel_def = task_def.kernel(); | |||||
| const domi::KernelContext &context = kernel_def.context(); | const domi::KernelContext &context = kernel_def.context(); | ||||
| // get opdesc | |||||
| op_desc_ = davinci_model->GetOpByIndex(context.op_index()); | |||||
| GE_CHECK_NOTNULL(op_desc_); | |||||
| // alloc fixed addr | |||||
| string peer_input_name; | |||||
| if (AttrUtils::GetStr(op_desc_, ATTR_DYNAMIC_SHAPE_FIXED_ADDR, peer_input_name) && !peer_input_name.empty()) { | |||||
| uint32_t output_index = davinci_model->GetFixedAddrOutputIndex(peer_input_name); | |||||
| if (output_index > op_desc_->GetOutputsSize()) { | |||||
| GELOGE(FAILED, "The output size[%zu] and output index[%u] are inconsistent.", op_desc_->GetOutputsSize(), | |||||
| output_index); | |||||
| return FAILED; | |||||
| } | |||||
| fixed_addr_offset_ = davinci_model->GetFixedAddrsSize(peer_input_name); | |||||
| auto tensor_desc = op_desc_->GetOutputDesc(output_index); | |||||
| int64_t tensor_size = 0; | |||||
| GE_CHK_STATUS(TensorUtils::GetSize(tensor_desc, tensor_size)); | |||||
| davinci_model->SetTotalFixedAddrsSize(peer_input_name, tensor_size); | |||||
| GELOGI("Calculate stream switch task args , tensor size is %ld, fixed addr offset %ld", tensor_size, | |||||
| fixed_addr_offset_); | |||||
| kernel_type_ = static_cast<ccKernelType>(context.kernel_type()); | |||||
| if (kernel_type_ == ccKernelType::TE) { | |||||
| uint32_t args_size = kernel_def.args_size(); | |||||
| args_offset_ = davinci_model->GetTotalArgsSize(); | |||||
| davinci_model->SetTotalArgsSize(args_size); | |||||
| GELOGI("kernel task name , args_size %u, args_offset %u", args_size, args_offset_); | |||||
| } else if (kernel_type_ == ccKernelType::AI_CPU || kernel_type_ == ccKernelType::CUST_AI_CPU) { | |||||
| hybrid_args_offset_ = davinci_model->GetHybridArgsSize(); | |||||
| davinci_model->SetHybridArgsSize(kernel_def.args_size()); | |||||
| GELOGI("aicpu kernel task name , args_size %u, args_offset %u", kernel_def.args_size(), hybrid_args_offset_); | |||||
| } | } | ||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| @@ -888,7 +872,7 @@ Status KernelTaskInfo::InitAicpuTask(uint32_t op_index, const domi::KernelDef &k | |||||
| } | } | ||||
| // copy args to new host memory | // copy args to new host memory | ||||
| std::unique_ptr<uint8_t[]> args_addr(new (std::nothrow) uint8_t[args_size_]); | |||||
| args_addr = std::unique_ptr<uint8_t[]>(new (std::nothrow) uint8_t[args_size_]); | |||||
| GE_PRINT_DYNAMIC_MEMORY(new, "cce task physical memory.", sizeof(uint8_t) * args_size_) | GE_PRINT_DYNAMIC_MEMORY(new, "cce task physical memory.", sizeof(uint8_t) * args_size_) | ||||
| errno_t sec_ret = memcpy_s(args_addr.get(), args_size_, kernel_def.args().data(), args_size_); | errno_t sec_ret = memcpy_s(args_addr.get(), args_size_, kernel_def.args().data(), args_size_); | ||||
| if (sec_ret != EOK) { | if (sec_ret != EOK) { | ||||
| @@ -896,8 +880,23 @@ Status KernelTaskInfo::InitAicpuTask(uint32_t op_index, const domi::KernelDef &k | |||||
| return FAILED; | return FAILED; | ||||
| } | } | ||||
| const RuntimeParam &rts_param = davinci_model_->GetRuntimeParam(); | |||||
| auto aicpu_param_head = reinterpret_cast<aicpu::AicpuParamHead *>(args_addr.get()); | |||||
| const auto &ext_info = kernel_def.kernel_ext_info(); | |||||
| auto init_ret = InitAicpuTaskExtInfo(ext_info); | |||||
| if (init_ret != SUCCESS) { | |||||
| GELOGE(init_ret, "Init aicpu task ext info failed, ext_info size=%zu", ext_info.size()); | |||||
| return init_ret; | |||||
| } | |||||
| GELOGI("Node[%s] type[%s] kernel_ext_info size=%zu, aicpu_ext_info_addr_=%p", op_desc_->GetName().c_str(), | |||||
| op_desc_->GetType().c_str(), ext_info.size(), aicpu_ext_info_addr_); | |||||
| aicpu_param_head->extInfoAddr = reinterpret_cast<uintptr_t>(aicpu_ext_info_addr_); | |||||
| aicpu_param_head->extInfoLength = static_cast<uintptr_t>(ext_info.size()); | |||||
| if (davinci_model_->IsKnownNode()) { | |||||
| return SUCCESS; | |||||
| } | |||||
| const RuntimeParam &rts_param = davinci_model_->GetRuntimeParam(); | |||||
| vector<void *> input_addrs = ModelUtils::GetInputDataAddrs(rts_param, op_desc); | vector<void *> input_addrs = ModelUtils::GetInputDataAddrs(rts_param, op_desc); | ||||
| vector<void *> output_addrs = ModelUtils::GetOutputDataAddrs(rts_param, op_desc); | vector<void *> output_addrs = ModelUtils::GetOutputDataAddrs(rts_param, op_desc); | ||||
| vector<void *> io_addrs; | vector<void *> io_addrs; | ||||
| @@ -914,19 +913,6 @@ Status KernelTaskInfo::InitAicpuTask(uint32_t op_index, const domi::KernelDef &k | |||||
| } | } | ||||
| } | } | ||||
| auto aicpu_param_head = reinterpret_cast<aicpu::AicpuParamHead *>(args_addr.get()); | |||||
| const auto &ext_info = kernel_def.kernel_ext_info(); | |||||
| auto init_ret = InitAicpuTaskExtInfo(ext_info); | |||||
| if (init_ret != SUCCESS) { | |||||
| GELOGE(init_ret, "Init aicpu task ext info failed, ext_info size=%zu", ext_info.size()); | |||||
| return init_ret; | |||||
| } | |||||
| GELOGI("Node[%s] type[%s] kernel_ext_info size=%zu, aicpu_ext_info_addr_=%p", op_desc_->GetName().c_str(), | |||||
| op_desc_->GetType().c_str(), ext_info.size(), aicpu_ext_info_addr_); | |||||
| aicpu_param_head->extInfoAddr = reinterpret_cast<uintptr_t>(aicpu_ext_info_addr_); | |||||
| aicpu_param_head->extInfoLength = static_cast<uintptr_t>(ext_info.size()); | |||||
| // malloc device memory for args | // malloc device memory for args | ||||
| rtError_t rt_ret = rtMalloc(static_cast<void **>(&args_), args_size_, RT_MEMORY_HBM); | rtError_t rt_ret = rtMalloc(static_cast<void **>(&args_), args_size_, RT_MEMORY_HBM); | ||||
| if (rt_ret != RT_ERROR_NONE) { | if (rt_ret != RT_ERROR_NONE) { | ||||
| @@ -159,7 +159,9 @@ class KernelTaskInfo : public TaskInfo { | |||||
| OpDescPtr op_desc_; | OpDescPtr op_desc_; | ||||
| DavinciModel *davinci_model_; | DavinciModel *davinci_model_; | ||||
| uint32_t args_offset_ = 0; | uint32_t args_offset_ = 0; | ||||
| uint32_t hybrid_args_offset_ = 0; | |||||
| int64_t fixed_addr_offset_ = 0; | int64_t fixed_addr_offset_ = 0; | ||||
| std::unique_ptr<uint8_t[]> args_addr = nullptr; | |||||
| bool call_save_dump_ = false; | bool call_save_dump_ = false; | ||||
| // aicpu ext_info device mem | // aicpu ext_info device mem | ||||
| @@ -18,6 +18,7 @@ | |||||
| #include <chrono> | #include <chrono> | ||||
| #include "framework/common/debug/log.h" | #include "framework/common/debug/log.h" | ||||
| #include "graph/compute_graph.h" | #include "graph/compute_graph.h" | ||||
| #include "graph/utils/tensor_utils.h" | |||||
| #include "hybrid_execution_context.h" | #include "hybrid_execution_context.h" | ||||
| #include "subgraph_context.h" | #include "subgraph_context.h" | ||||
| @@ -35,29 +36,31 @@ ShapeInferenceState::ShapeInferenceState(const NodeItem &node_item) : node_item( | |||||
| this->num_pending_shapes_); | this->num_pending_shapes_); | ||||
| } | } | ||||
| Status ShapeInferenceState::UpdateInputShape(int idx, | |||||
| const GeShape &ori_shape, | |||||
| const GeShape &shape) { | |||||
| Status ShapeInferenceState::UpdateInputShape(int idx, const GeTensorDesc &target) { | |||||
| if (node_item.IsInputShapeStatic(idx)) { | if (node_item.IsInputShapeStatic(idx)) { | ||||
| GELOGD("[%s] Trying to update static shape, idx = %d. old shape = [%s], new shape = [%s]", | GELOGD("[%s] Trying to update static shape, idx = %d. old shape = [%s], new shape = [%s]", | ||||
| node_item.NodeName().c_str(), | node_item.NodeName().c_str(), | ||||
| idx, | idx, | ||||
| node_item.MutableInputDesc(idx)->GetShape().ToString().c_str(), | node_item.MutableInputDesc(idx)->GetShape().ToString().c_str(), | ||||
| shape.ToString().c_str()); | |||||
| target.GetShape().ToString().c_str()); | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| GELOGD("[%s] Update input shape [%d] with Shape: [%s] and OriginalShape: [%s]", | |||||
| int64_t tensor_size = -1; | |||||
| (void) TensorUtils::GetSize(target, tensor_size); | |||||
| GELOGD("[%s] Update input shape [%d] with Shape: [%s] and OriginalShape: [%s], size = %ld", | |||||
| node_item.NodeName().c_str(), | node_item.NodeName().c_str(), | ||||
| idx, | idx, | ||||
| shape.ToString().c_str(), | |||||
| ori_shape.ToString().c_str()); | |||||
| target.GetShape().ToString().c_str(), | |||||
| target.GetOriginShape().ToString().c_str(), | |||||
| tensor_size); | |||||
| std::lock_guard<std::mutex> lk(mu_); | std::lock_guard<std::mutex> lk(mu_); | ||||
| auto tensor_desc = node_item.MutableInputDesc(idx); | auto tensor_desc = node_item.MutableInputDesc(idx); | ||||
| GE_CHECK_NOTNULL(tensor_desc); | GE_CHECK_NOTNULL(tensor_desc); | ||||
| tensor_desc->SetShape(shape); | |||||
| tensor_desc->SetOriginShape(ori_shape); | |||||
| tensor_desc->SetShape(target.GetShape()); | |||||
| tensor_desc->SetOriginShape(target.GetOriginShape()); | |||||
| (void) TensorUtils::SetSize(*tensor_desc, tensor_size); | |||||
| if (--num_pending_shapes_ == 0) { | if (--num_pending_shapes_ == 0) { | ||||
| ready_cv_.notify_all(); | ready_cv_.notify_all(); | ||||
| } | } | ||||
| @@ -110,24 +113,24 @@ Status ShapeInferenceState::AwaitShapesReady(const GraphExecutionContext &contex | |||||
| for (auto &p : shape_futures) { | for (auto &p : shape_futures) { | ||||
| auto idx = p.first; | auto idx = p.first; | ||||
| auto &future = p.second; | auto &future = p.second; | ||||
| GeShape shape; | |||||
| GeShape ori_shape; | |||||
| RECORD_SHAPE_INFERENCE_EVENT(&context, node_item.NodeName().c_str(), "[AwaitShape] [idx = %u] Start", idx); | RECORD_SHAPE_INFERENCE_EVENT(&context, node_item.NodeName().c_str(), "[AwaitShape] [idx = %u] Start", idx); | ||||
| GE_CHK_STATUS_RET(future.Get(ori_shape, shape), | |||||
| "[%s] Get shape failed. index = %u", | |||||
| node_item.NodeName().c_str(), | |||||
| idx); | |||||
| auto src_tensor_desc = future.GetTensorDesc(); | |||||
| GE_CHECK_NOTNULL(src_tensor_desc); | |||||
| RECORD_SHAPE_INFERENCE_EVENT(&context, node_item.NodeName().c_str(), "[AwaitShape] [idx = %u] End", idx); | RECORD_SHAPE_INFERENCE_EVENT(&context, node_item.NodeName().c_str(), "[AwaitShape] [idx = %u] End", idx); | ||||
| GELOGD("[%s] Update input shape [%u] with shape: [%s] and ori_shape: [%s]", | |||||
| node_item.NodeName().c_str(), | |||||
| idx, | |||||
| shape.ToString().c_str(), | |||||
| ori_shape.ToString().c_str()); | |||||
| auto input_desc = node_item.MutableInputDesc(idx); | auto input_desc = node_item.MutableInputDesc(idx); | ||||
| GE_CHECK_NOTNULL(input_desc); | GE_CHECK_NOTNULL(input_desc); | ||||
| input_desc->SetShape(std::move(shape)); | |||||
| input_desc->SetOriginShape(ori_shape); | |||||
| int64_t tensor_size = -1; | |||||
| (void) TensorUtils::GetSize(*src_tensor_desc, tensor_size); | |||||
| GELOGD("[%s] Update input shape [%u] with shape: [%s] and ori_shape: [%s], index = %zu", | |||||
| node_item.NodeName().c_str(), | |||||
| idx, | |||||
| src_tensor_desc->GetShape().ToString().c_str(), | |||||
| src_tensor_desc->GetOriginShape().ToString().c_str(), | |||||
| tensor_size); | |||||
| input_desc->SetShape(src_tensor_desc->GetShape()); | |||||
| input_desc->SetOriginShape(src_tensor_desc->GetOriginShape()); | |||||
| (void) TensorUtils::SetSize(*input_desc, tensor_size); | |||||
| } | } | ||||
| return SUCCESS; | return SUCCESS; | ||||
| @@ -190,5 +193,14 @@ Status ShapeFuture::Get(GeShape &ori_shape, GeShape &shape) { | |||||
| GELOGD("Get shape from %s:%u. shape = [%s]", src_node_->GetName().c_str(), src_index_, shape.ToString().c_str()); | GELOGD("Get shape from %s:%u. shape = [%s]", src_node_->GetName().c_str(), src_index_, shape.ToString().c_str()); | ||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| GeTensorDescPtr ShapeFuture::GetTensorDesc() { | |||||
| GELOGD("Start to wait node: %s for getting shape", src_node_->GetName().c_str()); | |||||
| if (!subgraph_context_->Await(src_node_)) { | |||||
| GELOGE(INTERNAL_ERROR, "cancelled"); | |||||
| return nullptr; | |||||
| } | |||||
| return src_node_->GetOpDesc()->MutableOutputDesc(src_index_); | |||||
| } | |||||
| } // namespace hybrid | } // namespace hybrid | ||||
| } // namespace ge | } // namespace ge | ||||
| @@ -35,6 +35,7 @@ class ShapeFuture { | |||||
| ShapeFuture(NodePtr src_node, uint32_t src_index, SubgraphContext *subgraph_context); | ShapeFuture(NodePtr src_node, uint32_t src_index, SubgraphContext *subgraph_context); | ||||
| ~ShapeFuture() = default; | ~ShapeFuture() = default; | ||||
| Status Get(GeShape &ori_shape, GeShape &shape); | Status Get(GeShape &ori_shape, GeShape &shape); | ||||
| GeTensorDescPtr GetTensorDesc(); | |||||
| private: | private: | ||||
| NodePtr src_node_; | NodePtr src_node_; | ||||
| @@ -45,7 +46,7 @@ class ShapeFuture { | |||||
| struct ShapeInferenceState { | struct ShapeInferenceState { | ||||
| explicit ShapeInferenceState(const NodeItem &node_item); | explicit ShapeInferenceState(const NodeItem &node_item); | ||||
| Status UpdateInputShape(int idx, const GeShape &ori_shape, const GeShape &shape); | |||||
| Status UpdateInputShape(int idx, const GeTensorDesc &tensor_desc); | |||||
| void UpdateInputShapeFuture(int idx, ShapeFuture &&future); | void UpdateInputShapeFuture(int idx, ShapeFuture &&future); | ||||
| @@ -96,7 +96,7 @@ Status SubgraphExecutor::InitInputsForUnknownShape(const std::vector<TensorValue | |||||
| GE_CHECK_NOTNULL(tensor_desc); | GE_CHECK_NOTNULL(tensor_desc); | ||||
| auto node_state = subgraph_context_->GetOrCreateNodeState(input_node); | auto node_state = subgraph_context_->GetOrCreateNodeState(input_node); | ||||
| GE_CHECK_NOTNULL(node_state); | GE_CHECK_NOTNULL(node_state); | ||||
| node_state->GetShapeInferenceState().UpdateInputShape(0, tensor_desc->GetOriginShape(), tensor_desc->GetShape()); | |||||
| node_state->GetShapeInferenceState().UpdateInputShape(0, *tensor_desc); | |||||
| } | } | ||||
| } | } | ||||
| @@ -268,13 +268,6 @@ Status SubgraphExecutor::PrepareForExecution(GraphExecutionContext *ctx, NodeSta | |||||
| } else { | } else { | ||||
| node_state.SetKernelTask(node_item.kernel_task); | node_state.SetKernelTask(node_item.kernel_task); | ||||
| } | } | ||||
| GELOGD("[%s] Start to invoke CalcOpRunningParam.", node_item.NodeName().c_str()); | |||||
| RECORD_COMPILE_EVENT(ctx, node_item.NodeName().c_str(), "[CalcOpRunningParam] Start"); | |||||
| GE_CHK_STATUS_RET(NodeExecutorManager::GetInstance().CalcOpRunningParam(*node_item.node), | |||||
| "[%s] Failed to invoke CalcOpRunningParam.", node_item.NodeName().c_str()); | |||||
| RECORD_COMPILE_EVENT(ctx, node_item.NodeName().c_str(), "[CalcOpRunningParam] End"); | |||||
| GELOGD("[%s] Done invoking CalcOpRunningParam successfully.", node_item.NodeName().c_str()); | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| @@ -20,12 +20,9 @@ | |||||
| #include "graph/utils/tensor_adapter.h" | #include "graph/utils/tensor_adapter.h" | ||||
| #include "graph/debug/ge_attr_define.h" | #include "graph/debug/ge_attr_define.h" | ||||
| #include "hybrid/node_executor/node_executor.h" | #include "hybrid/node_executor/node_executor.h" | ||||
| #include "common/dump/dump_manager.h" | |||||
| #include "hybrid/executor//worker//shape_inference_engine.h" | |||||
| #include "common/dump/dump_op.h" | #include "common/dump/dump_op.h" | ||||
| #include "common/types.h" | |||||
| #include "common/ge_types.h" | |||||
| #include "common/profiling/profiling_manager.h" | #include "common/profiling/profiling_manager.h" | ||||
| #include "runtime/base.h" | |||||
| namespace ge { | namespace ge { | ||||
| namespace hybrid { | namespace hybrid { | ||||
| @@ -348,6 +345,10 @@ Status NodeDoneCallback::OnNodeDone() { | |||||
| } | } | ||||
| GE_CHK_STATUS_RET_NOLOG(PrepareConstInputs(node_item)); | GE_CHK_STATUS_RET_NOLOG(PrepareConstInputs(node_item)); | ||||
| if (node_item.shape_inference_type == DEPEND_SHAPE_RANGE || node_item.shape_inference_type == DEPEND_COMPUTE) { | |||||
| // update output tensor sizes | |||||
| GE_CHK_STATUS_RET_NOLOG(ShapeInferenceEngine::CalcOutputTensorSizes(node_item)); | |||||
| } | |||||
| // PropagateOutputs for type == DEPEND_COMPUTE | // PropagateOutputs for type == DEPEND_COMPUTE | ||||
| if (node_item.shape_inference_type == DEPEND_COMPUTE) { | if (node_item.shape_inference_type == DEPEND_COMPUTE) { | ||||
| if (graph_context_->trace_enabled) { | if (graph_context_->trace_enabled) { | ||||
| @@ -17,9 +17,15 @@ | |||||
| #include "hybrid/executor/worker/shape_inference_engine.h" | #include "hybrid/executor/worker/shape_inference_engine.h" | ||||
| #include "graph/shape_refiner.h" | #include "graph/shape_refiner.h" | ||||
| #include "graph/utils/node_utils.h" | #include "graph/utils/node_utils.h" | ||||
| #include "graph/utils/tensor_utils.h" | |||||
| #include "graph/utils/type_utils.h" | |||||
| #include "common/math/math_util.h" | |||||
| #include "hybrid/node_executor/node_executor.h" | #include "hybrid/node_executor/node_executor.h" | ||||
| namespace ge { | namespace ge { | ||||
| namespace { | |||||
| const int kAlignment = 32; | |||||
| } | |||||
| namespace hybrid { | namespace hybrid { | ||||
| ShapeInferenceEngine::ShapeInferenceEngine(GraphExecutionContext *execution_context, SubgraphContext *subgraph_context) | ShapeInferenceEngine::ShapeInferenceEngine(GraphExecutionContext *execution_context, SubgraphContext *subgraph_context) | ||||
| : execution_context_(execution_context), | : execution_context_(execution_context), | ||||
| @@ -40,7 +46,9 @@ Status ShapeInferenceEngine::InferShape(NodeState &node_state) { | |||||
| } | } | ||||
| if (node_item.fused_subgraph != nullptr) { | if (node_item.fused_subgraph != nullptr) { | ||||
| return InferShapeForSubgraph(node_item, *node_item.fused_subgraph); | |||||
| GE_CHK_STATUS_RET_NOLOG(InferShapeForSubgraph(node_item, *node_item.fused_subgraph)); | |||||
| GE_CHK_STATUS_RET_NOLOG(CalcOutputTensorSizes(node_item)); | |||||
| return SUCCESS; | |||||
| } | } | ||||
| // Skip shape inference for node of type DEPEND_COMPUTE | // Skip shape inference for node of type DEPEND_COMPUTE | ||||
| @@ -63,21 +71,15 @@ Status ShapeInferenceEngine::InferShape(NodeState &node_state) { | |||||
| std::lock_guard<std::mutex> lk(mu_); | std::lock_guard<std::mutex> lk(mu_); | ||||
| RECORD_SHAPE_INFERENCE_EVENT(execution_context_, node_item.NodeName().c_str(), "[InferShapeAndType] Start"); | RECORD_SHAPE_INFERENCE_EVENT(execution_context_, node_item.NodeName().c_str(), "[InferShapeAndType] Start"); | ||||
| GE_CHK_STATUS_RET(ShapeRefiner::InferShapeAndTypeForRunning(node_item.node, true), | GE_CHK_STATUS_RET(ShapeRefiner::InferShapeAndTypeForRunning(node_item.node, true), | ||||
| "Invoke InferShapeAndType failed."); | |||||
| "Invoke InferShapeAndType failed."); | |||||
| RECORD_SHAPE_INFERENCE_EVENT(execution_context_, node_item.NodeName().c_str(), "[InferShapeAndType] End"); | RECORD_SHAPE_INFERENCE_EVENT(execution_context_, node_item.NodeName().c_str(), "[InferShapeAndType] End"); | ||||
| } | } | ||||
| // Check again to make sure shape is valid after shape inference | |||||
| if (node_item.shape_inference_type != DEPEND_SHAPE_RANGE) { | |||||
| bool is_unknown_shape = false; | |||||
| GE_CHK_STATUS_RET(NodeUtils::GetNodeUnknownShapeStatus(*node_item.node, is_unknown_shape), | |||||
| "Failed to get shape status. node = %s", | |||||
| node_item.NodeName().c_str()); | |||||
| GE_CHK_BOOL_RET_STATUS(!is_unknown_shape, | |||||
| INTERNAL_ERROR, | |||||
| "[%s] Shape is still unknown after shape inference.", | |||||
| node_item.NodeName().c_str()); | |||||
| } | |||||
| // update output tensor sizes after shape inference | |||||
| // error if shape is still unknown and not of type DEPEND_SHAPE_RANGE | |||||
| RECORD_COMPILE_EVENT(execution_context_, node_item.NodeName().c_str(), "[CalcOpRunningParam] Start"); | |||||
| GE_CHK_STATUS_RET_NOLOG(CalcOutputTensorSizes(node_item, node_item.shape_inference_type == DEPEND_SHAPE_RANGE)); | |||||
| RECORD_COMPILE_EVENT(execution_context_, node_item.NodeName().c_str(), "[CalcOpRunningParam] End"); | |||||
| GELOGD("[%s] [HybridTrace] After shape inference. Node = %s", | GELOGD("[%s] [HybridTrace] After shape inference. Node = %s", | ||||
| node_item.NodeName().c_str(), | node_item.NodeName().c_str(), | ||||
| @@ -127,8 +129,6 @@ Status ShapeInferenceEngine::PropagateOutputShapes(const NodeItem &node_item) { | |||||
| // propagate each output | // propagate each output | ||||
| for (int i = 0; i < node_item.num_outputs; ++i) { | for (int i = 0; i < node_item.num_outputs; ++i) { | ||||
| auto output_desc = node_item.op_desc->MutableOutputDesc(i); | auto output_desc = node_item.op_desc->MutableOutputDesc(i); | ||||
| const auto &shape = output_desc->MutableShape(); | |||||
| const auto &ori_shape = output_desc->GetOriginShape(); | |||||
| auto &output_nodes = node_item.outputs[i]; | auto &output_nodes = node_item.outputs[i]; | ||||
| // propagate output to all sub-inputs | // propagate output to all sub-inputs | ||||
| @@ -149,9 +149,7 @@ Status ShapeInferenceEngine::PropagateOutputShapes(const NodeItem &node_item) { | |||||
| infer_state.UpdateInputShapeFuture(dst_input_index_and_node.first, | infer_state.UpdateInputShapeFuture(dst_input_index_and_node.first, | ||||
| std::move(future)); | std::move(future)); | ||||
| } else { | } else { | ||||
| GE_CHK_STATUS_RET_NOLOG(infer_state.UpdateInputShape(dst_input_index_and_node.first, | |||||
| ori_shape, | |||||
| shape)); | |||||
| GE_CHK_STATUS_RET_NOLOG(infer_state.UpdateInputShape(dst_input_index_and_node.first, *output_desc)); | |||||
| } | } | ||||
| } | } | ||||
| } | } | ||||
| @@ -230,5 +228,92 @@ Status ShapeInferenceEngine::UpdatePeerNodeShape(const Node &node) { | |||||
| } | } | ||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| Status ShapeInferenceEngine::CanonicalizeShape(GeTensorDesc &tensor_desc, | |||||
| std::vector<int64_t> &shape, | |||||
| bool fallback_with_range) { | |||||
| const auto &tensor_shape = tensor_desc.MutableShape(); | |||||
| if (tensor_shape.IsUnknownShape()) { | |||||
| if (!fallback_with_range) { | |||||
| GELOGE(INTERNAL_ERROR, "Output shape is still unknown after shape inference. shape = [%s]", | |||||
| tensor_shape.ToString().c_str()); | |||||
| return INTERNAL_ERROR; | |||||
| } | |||||
| GELOGD("Calc output size by range"); | |||||
| std::vector<std::pair<int64_t, int64_t>> shape_range; | |||||
| GE_CHK_GRAPH_STATUS_RET(tensor_desc.GetShapeRange(shape_range), "Failed to get shape range"); | |||||
| if (shape_range.size() != shape.size()) { | |||||
| GELOGE(INTERNAL_ERROR, "Number of shape ranges (%zu) mismatches that of dims (%zu)", | |||||
| shape_range.size(), | |||||
| shape.size()); | |||||
| return INTERNAL_ERROR; | |||||
| } | |||||
| for (size_t dim_index = 0; dim_index < shape.size(); ++dim_index) { | |||||
| if (shape[dim_index] == ge::UNKNOWN_DIM) { | |||||
| shape[dim_index] = shape_range[dim_index].second; | |||||
| } | |||||
| } | |||||
| GELOGD("After canonicalization, shape = [%s], before = [%s]", | |||||
| GeShape(shape).ToString().c_str(), | |||||
| tensor_shape.ToString().c_str()); | |||||
| } | |||||
| return SUCCESS; | |||||
| } | |||||
| Status ShapeInferenceEngine::CalcTensorSize(DataType data_type, | |||||
| const std::vector<int64_t> &shape, | |||||
| int64_t &tensor_size) { | |||||
| GELOGD("To calc tensor size by shape = [%s]", GeShape(shape).ToString().c_str()); | |||||
| uint32_t type_size; | |||||
| if (!TypeUtils::GetDataTypeLength(data_type, type_size)) { | |||||
| GELOGE(INTERNAL_ERROR, "Failed to get data type size"); | |||||
| return INTERNAL_ERROR; | |||||
| } | |||||
| tensor_size = type_size; | |||||
| for (const auto &dim : shape) { | |||||
| GE_CHECK_GE(dim, 0); | |||||
| GE_CHK_STATUS_RET(Int64MulCheckOverflow(tensor_size, dim), | |||||
| "Shape size overflow, shape = [%s]", | |||||
| GeShape(shape).ToString().c_str()); | |||||
| tensor_size *= dim; | |||||
| } | |||||
| GE_CHK_STATUS_RET(CheckInt64AddOverflow(tensor_size, kAlignment - 1), | |||||
| "Tensor size is too large: %ld, shape = [%s]", | |||||
| tensor_size, | |||||
| GeShape(shape).ToString().c_str()); | |||||
| tensor_size = (tensor_size + kAlignment - 1) / kAlignment * kAlignment; | |||||
| return SUCCESS; | |||||
| } | |||||
| Status ShapeInferenceEngine::CalcOutputTensorSizes(const NodeItem &node_item, bool fallback_with_range) { | |||||
| auto op_desc = node_item.GetOpDesc(); | |||||
| for (size_t output_index = 0; output_index < op_desc->GetOutputsSize(); ++output_index) { | |||||
| auto tensor_desc = op_desc->MutableOutputDesc(output_index); | |||||
| GE_CHECK_NOTNULL(tensor_desc); | |||||
| const auto &shape = tensor_desc->MutableShape(); | |||||
| // modify on copy | |||||
| auto dims = shape.GetDims(); | |||||
| GE_CHK_STATUS_RET(CanonicalizeShape(*tensor_desc, dims, fallback_with_range), | |||||
| "[%s] Failed to canonicalize shape for output %zu", | |||||
| node_item.NodeName().c_str(), | |||||
| output_index); | |||||
| int64_t tensor_size; | |||||
| GE_CHK_STATUS_RET(CalcTensorSize(tensor_desc->GetDataType(), dims, tensor_size), | |||||
| "[%s] Failed to calc tensor size for output %zu", | |||||
| node_item.NodeName().c_str(), | |||||
| output_index); | |||||
| GELOGD("[%s] Tensor size of output %zu = %ld", node_item.NodeName().c_str(), output_index, tensor_size); | |||||
| (void) TensorUtils::SetSize(*tensor_desc, tensor_size); | |||||
| } | |||||
| return SUCCESS; | |||||
| } | |||||
| } // namespace hybrid | } // namespace hybrid | ||||
| } // namespace ge | } // namespace ge | ||||
| @@ -34,7 +34,11 @@ class ShapeInferenceEngine { | |||||
| Status PropagateOutputShapes(const NodeItem &node_item); | Status PropagateOutputShapes(const NodeItem &node_item); | ||||
| static Status CalcOutputTensorSizes(const NodeItem &node_item, bool fallback_with_range = false); | |||||
| private: | private: | ||||
| static Status CanonicalizeShape(GeTensorDesc &tensor_desc, std::vector<int64_t> &shape, bool fallback_with_range); | |||||
| static Status CalcTensorSize(DataType data_type, const std::vector<int64_t> &shape, int64_t &tensor_size); | |||||
| static Status UpdatePeerNodeShape(const Node &node); | static Status UpdatePeerNodeShape(const Node &node); | ||||
| Status AwaitDependentNodes(NodeState &node_state); | Status AwaitDependentNodes(NodeState &node_state); | ||||
| @@ -22,6 +22,7 @@ | |||||
| #include "graph/debug/ge_attr_define.h" | #include "graph/debug/ge_attr_define.h" | ||||
| #include "graph/utils/node_utils.h" | #include "graph/utils/node_utils.h" | ||||
| #include "hybrid/node_executor/node_executor.h" | #include "hybrid/node_executor/node_executor.h" | ||||
| #include "hybrid/executor/worker/shape_inference_engine.h" | |||||
| namespace ge { | namespace ge { | ||||
| namespace hybrid { | namespace hybrid { | ||||
| @@ -47,7 +48,7 @@ Status ParseInputMapping(Node &node, OpDesc &op_desc, FusedSubgraph &fused_subgr | |||||
| GE_CHECK_NOTNULL(dst_op_desc); | GE_CHECK_NOTNULL(dst_op_desc); | ||||
| auto in_idx = node_and_anchor.second->GetIdx(); | auto in_idx = node_and_anchor.second->GetIdx(); | ||||
| auto tensor_desc = dst_op_desc->MutableInputDesc(in_idx); | auto tensor_desc = dst_op_desc->MutableInputDesc(in_idx); | ||||
| fused_subgraph.input_mapping[parent_index].emplace_back(tensor_desc); | |||||
| fused_subgraph.input_mapping[static_cast<int>(parent_index)].emplace_back(tensor_desc); | |||||
| GELOGD("Input[%u] mapped to [%s:%u]", parent_index, dst_op_desc->GetName().c_str(), in_idx); | GELOGD("Input[%u] mapped to [%s:%u]", parent_index, dst_op_desc->GetName().c_str(), in_idx); | ||||
| } | } | ||||
| @@ -64,7 +65,7 @@ Status ParseOutputMapping(const OpDescPtr &op_desc, FusedSubgraph &fused_subgrap | |||||
| return FAILED; | return FAILED; | ||||
| } | } | ||||
| fused_subgraph.output_mapping.emplace(parent_index, op_desc); | |||||
| fused_subgraph.output_mapping.emplace(static_cast<int>(parent_index), op_desc); | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| @@ -126,12 +127,7 @@ Status NodeItem::Create(const NodePtr &node, std::unique_ptr<NodeItem> &node_ite | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| Status NodeItem::Init() { | |||||
| GE_CHECK_LE(op_desc->GetInputsSize(), INT32_MAX); | |||||
| GE_CHECK_LE(op_desc->GetOutputsSize(), INT32_MAX); | |||||
| num_inputs = static_cast<int>(op_desc->GetInputsSize()); | |||||
| num_outputs = static_cast<int>(op_desc->GetOutputsSize()); | |||||
| void NodeItem::ResolveOptionalInputs() { | |||||
| if (op_desc->GetAllInputsSize() != op_desc->GetInputsSize()) { | if (op_desc->GetAllInputsSize() != op_desc->GetInputsSize()) { | ||||
| has_optional_inputs = true; | has_optional_inputs = true; | ||||
| for (size_t i = 0; i < op_desc->GetAllInputsSize(); ++i) { | for (size_t i = 0; i < op_desc->GetAllInputsSize(); ++i) { | ||||
| @@ -143,7 +139,18 @@ Status NodeItem::Init() { | |||||
| } | } | ||||
| } | } | ||||
| } | } | ||||
| } | |||||
| Status NodeItem::InitInputsAndOutputs() { | |||||
| GE_CHECK_LE(op_desc->GetInputsSize(), INT32_MAX); | |||||
| GE_CHECK_LE(op_desc->GetOutputsSize(), INT32_MAX); | |||||
| num_inputs = static_cast<int>(op_desc->GetInputsSize()); | |||||
| num_outputs = static_cast<int>(op_desc->GetOutputsSize()); | |||||
| ResolveOptionalInputs(); | |||||
| return SUCCESS; | |||||
| } | |||||
| Status NodeItem::ResolveDynamicState() { | |||||
| (void) AttrUtils::GetBool(op_desc, ATTR_NAME_FORCE_UNKNOWN_SHAPE, is_dynamic); | (void) AttrUtils::GetBool(op_desc, ATTR_NAME_FORCE_UNKNOWN_SHAPE, is_dynamic); | ||||
| GELOGD("node name = %s, is_dynamic = %d.", this->node_name.c_str(), is_dynamic); | GELOGD("node name = %s, is_dynamic = %d.", this->node_name.c_str(), is_dynamic); | ||||
| if (!is_dynamic) { | if (!is_dynamic) { | ||||
| @@ -151,38 +158,54 @@ Status NodeItem::Init() { | |||||
| "[%s] Failed to get shape status.", | "[%s] Failed to get shape status.", | ||||
| node->GetName().c_str()); | node->GetName().c_str()); | ||||
| } | } | ||||
| return SUCCESS; | |||||
| } | |||||
| if (is_dynamic) { | |||||
| for (int i = 0; i < num_inputs; ++i) { | |||||
| const auto &input_desc = MutableInputDesc(i); | |||||
| GE_CHECK_NOTNULL(input_desc); | |||||
| if (input_desc->MutableShape().IsUnknownShape()) { | |||||
| is_input_shape_static_.push_back(false); | |||||
| } else { | |||||
| num_static_input_shapes++; | |||||
| is_input_shape_static_.push_back(true); | |||||
| GELOGD("[%s] The shape of input[%d] is static. shape = [%s]", | |||||
| NodeName().c_str(), i, input_desc->MutableShape().ToString().c_str()); | |||||
| } | |||||
| Status NodeItem::ResolveStaticInputsAndOutputs() { | |||||
| for (int i = 0; i < num_inputs; ++i) { | |||||
| const auto &input_desc = MutableInputDesc(i); | |||||
| GE_CHECK_NOTNULL(input_desc); | |||||
| if (input_desc->MutableShape().IsUnknownShape()) { | |||||
| is_input_shape_static_.push_back(false); | |||||
| } else { | |||||
| num_static_input_shapes++; | |||||
| is_input_shape_static_.push_back(true); | |||||
| GELOGD("[%s] The shape of input[%d] is static. shape = [%s]", | |||||
| NodeName().c_str(), i, input_desc->MutableShape().ToString().c_str()); | |||||
| } | } | ||||
| } | |||||
| for (int i = 0; i < num_outputs; ++i) { | |||||
| const auto &output_desc = op_desc->MutableOutputDesc(i); | |||||
| GE_CHECK_NOTNULL(output_desc); | |||||
| if (output_desc->MutableShape().IsUnknownShape()) { | |||||
| is_output_shape_static = false; | |||||
| break; | |||||
| } | |||||
| for (int i = 0; i < num_outputs; ++i) { | |||||
| const auto &output_desc = op_desc->MutableOutputDesc(i); | |||||
| GE_CHECK_NOTNULL(output_desc); | |||||
| if (output_desc->MutableShape().IsUnknownShape()) { | |||||
| is_output_shape_static = false; | |||||
| break; | |||||
| } | } | ||||
| } | |||||
| if (IsControlOp() || node_type == PARTITIONEDCALL) { | |||||
| shape_inference_type = DEPEND_COMPUTE; | |||||
| } else { | |||||
| int32_t unknown_shape_type_val = 0; | |||||
| (void) AttrUtils::GetInt(op_desc, ::ge::ATTR_NAME_UNKNOWN_SHAPE_TYPE, unknown_shape_type_val); | |||||
| shape_inference_type = static_cast<UnknowShapeOpType>(unknown_shape_type_val); | |||||
| } | |||||
| if (is_output_shape_static) { | |||||
| GE_CHK_STATUS_RET_NOLOG(ShapeInferenceEngine::CalcOutputTensorSizes(*this)); | |||||
| } | |||||
| return SUCCESS; | |||||
| } | |||||
| void NodeItem::ResolveUnknownShapeType() { | |||||
| if (IsControlOp() || node_type == PARTITIONEDCALL) { | |||||
| shape_inference_type = DEPEND_COMPUTE; | |||||
| } else { | |||||
| int32_t unknown_shape_type_val = 0; | |||||
| (void) AttrUtils::GetInt(op_desc, ::ge::ATTR_NAME_UNKNOWN_SHAPE_TYPE, unknown_shape_type_val); | |||||
| shape_inference_type = static_cast<UnknowShapeOpType>(unknown_shape_type_val); | |||||
| } | |||||
| } | |||||
| Status NodeItem::Init() { | |||||
| GE_CHK_STATUS_RET_NOLOG(InitInputsAndOutputs()); | |||||
| GE_CHK_STATUS_RET_NOLOG(ResolveDynamicState()); | |||||
| if (is_dynamic) { | |||||
| ResolveUnknownShapeType(); | |||||
| GE_CHK_STATUS_RET_NOLOG(ResolveStaticInputsAndOutputs()); | |||||
| GE_CHK_STATUS_RET(ParseFusedSubgraph(*this), "[%s] Failed to parse fused subgraph", node_name.c_str()); | GE_CHK_STATUS_RET(ParseFusedSubgraph(*this), "[%s] Failed to parse fused subgraph", node_name.c_str()); | ||||
| } | } | ||||
| @@ -103,6 +103,11 @@ struct NodeItem { | |||||
| private: | private: | ||||
| explicit NodeItem(NodePtr node); | explicit NodeItem(NodePtr node); | ||||
| Status Init(); | Status Init(); | ||||
| Status InitInputsAndOutputs(); | |||||
| void ResolveOptionalInputs(); | |||||
| Status ResolveDynamicState(); | |||||
| Status ResolveStaticInputsAndOutputs(); | |||||
| void ResolveUnknownShapeType(); | |||||
| std::vector<bool> is_input_shape_static_; | std::vector<bool> is_input_shape_static_; | ||||
| std::vector<uint32_t> input_desc_indices_; | std::vector<uint32_t> input_desc_indices_; | ||||
| @@ -148,6 +148,10 @@ Status TaskContext::AllocateWorkspaces() { | |||||
| } | } | ||||
| Status TaskContext::RegisterCallback(const std::function<void()> &callback_fun) const { | Status TaskContext::RegisterCallback(const std::function<void()> &callback_fun) const { | ||||
| if (callback_fun == nullptr) { | |||||
| GELOGW("[%s] Callback is NULL", GetNodeName()); | |||||
| return SUCCESS; | |||||
| } | |||||
| auto ret = execution_context_->callback_manager->RegisterCallback(callback_fun); | auto ret = execution_context_->callback_manager->RegisterCallback(callback_fun); | ||||
| if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
| GELOGE(ret, "[%s] Failed to register callback", GetNodeName()); | GELOGE(ret, "[%s] Failed to register callback", GetNodeName()); | ||||
| @@ -384,6 +388,20 @@ const char *TaskContext::GetNodeName() const { | |||||
| return node_item_->NodeName().c_str(); | return node_item_->NodeName().c_str(); | ||||
| } | } | ||||
| void TaskContext::ReleaseInputsAndOutputs() { | |||||
| for (int i = 0; i < node_item_->num_inputs; ++i) { | |||||
| auto tensor = inputs_start_ + i; | |||||
| tensor->Destroy(); | |||||
| GELOGD("[%s] Tensor of input[%d] released", GetNodeName(), i); | |||||
| } | |||||
| for (int i = 0; i < node_item_->num_outputs; ++i) { | |||||
| auto tensor = outputs_start_ + i; | |||||
| tensor->Destroy(); | |||||
| GELOGD("[%s] Tensor of output[%d] released", GetNodeName(), i); | |||||
| } | |||||
| } | |||||
| void TaskContext::ReleaseInput(int index) { | void TaskContext::ReleaseInput(int index) { | ||||
| auto input_tensor = MutableInput(index); | auto input_tensor = MutableInput(index); | ||||
| if (input_tensor != nullptr) { | if (input_tensor != nullptr) { | ||||
| @@ -456,5 +474,9 @@ Status TaskContext::TryExecuteCallback(const function<void()> &callback_fun) con | |||||
| const DumpProperties &TaskContext::GetDumpProperties() const { | const DumpProperties &TaskContext::GetDumpProperties() const { | ||||
| return execution_context_->dump_properties; | return execution_context_->dump_properties; | ||||
| } | } | ||||
| bool TaskContext::NeedCallback() { | |||||
| return node_item_->has_observer || IsDumpEnabled() || execution_context_->profiling_level > 0; | |||||
| } | |||||
| } // namespace hybrid | } // namespace hybrid | ||||
| } // namespace ge | } // namespace ge | ||||
| @@ -50,6 +50,8 @@ class TaskContext { | |||||
| ConstGeTensorDescPtr GetOutputDesc(int index) const; | ConstGeTensorDescPtr GetOutputDesc(int index) const; | ||||
| GeTensorDescPtr MutableInputDesc(int index) const; | GeTensorDescPtr MutableInputDesc(int index) const; | ||||
| GeTensorDescPtr MutableOutputDesc(int index) const; | GeTensorDescPtr MutableOutputDesc(int index) const; | ||||
| void ReleaseInputsAndOutputs(); | |||||
| bool NeedCallback(); | |||||
| void ReleaseInput(int index); | void ReleaseInput(int index); | ||||
| const TensorValue *GetInput(int index) const; | const TensorValue *GetInput(int index) const; | ||||
| const TensorValue *GetOutput(int index) const; | const TensorValue *GetOutput(int index) const; | ||||
| @@ -227,7 +227,6 @@ class Impl { | |||||
| ~Impl() { (void)generator_.Finalize(); }; | ~Impl() { (void)generator_.Finalize(); }; | ||||
| graphStatus CheckOptions(const std::map<std::string, std::string> &options); | graphStatus CheckOptions(const std::map<std::string, std::string> &options); | ||||
| graphStatus CreateInputsForIRBuild(const ge::Graph &graph, vector<ge::GeTensor> &inputs); | graphStatus CreateInputsForIRBuild(const ge::Graph &graph, vector<ge::GeTensor> &inputs); | ||||
| graphStatus GetDefaultInputShape(const Graph &graph, string &default_shape); | |||||
| graphStatus UpdateDataOpAttr(const Graph &graph); | graphStatus UpdateDataOpAttr(const Graph &graph); | ||||
| graphStatus Init(const Graph &graph, const std::map<std::string, std::string> &options); | graphStatus Init(const Graph &graph, const std::map<std::string, std::string> &options); | ||||
| graphStatus BuildModel(const Graph &graph, const std::map<std::string, std::string> &options, | graphStatus BuildModel(const Graph &graph, const std::map<std::string, std::string> &options, | ||||
| @@ -321,42 +320,6 @@ graphStatus Impl::CheckOptions(const std::map<std::string, std::string> &options | |||||
| return GRAPH_SUCCESS; | return GRAPH_SUCCESS; | ||||
| } | } | ||||
| graphStatus Impl::GetDefaultInputShape(const Graph &graph, string &default_shape) { | |||||
| auto compute_graph = ge::GraphUtils::GetComputeGraph(graph); | |||||
| GE_CHECK_NOTNULL(compute_graph); | |||||
| for (ge::NodePtr &input_node : compute_graph->GetDirectNode()) { | |||||
| GE_CHECK_NOTNULL(input_node); | |||||
| ge::OpDescPtr op = input_node->GetOpDesc(); | |||||
| GE_CHECK_NOTNULL(op); | |||||
| if (op->GetType() == DATA) { | |||||
| string data_op_name = op->GetName(); | |||||
| GELOGD("Data op name: %s, data op inputDesc size: %zu", data_op_name.c_str(), op->GetAllInputsDesc().size()); | |||||
| ge::GeTensorDesc tensor = op->GetInputDesc(0); | |||||
| ge::GeShape data_shape = tensor.GetShape(); | |||||
| GELOGD("Data op get shape from InputDesc in ge ir graph."); | |||||
| string tmp_shape_str; | |||||
| const std::vector<int64_t> &tmp_shape = data_shape.GetDims(); | |||||
| if (tmp_shape.empty()) { | |||||
| GELOGW("Data op: %s has zero shape dims!", data_op_name.c_str()); | |||||
| } else { | |||||
| tmp_shape_str += data_op_name + ":"; | |||||
| for (auto tmp_dim : tmp_shape) { | |||||
| tmp_shape_str += to_string((long)tmp_dim) + ","; | |||||
| } | |||||
| tmp_shape_str = tmp_shape_str.substr(0, tmp_shape_str.size() - 1); | |||||
| tmp_shape_str += ";"; | |||||
| default_shape += tmp_shape_str; | |||||
| } | |||||
| GELOGD("Data op name: %s, data shape: %s.", data_op_name.c_str(), tmp_shape_str.c_str()); | |||||
| } | |||||
| } | |||||
| default_shape = (default_shape.empty() ? default_shape : default_shape.substr(0, default_shape.size() - 1)); | |||||
| GELOGI("Get default data op shape: %s from ge ir graph.", default_shape.c_str()); | |||||
| return GRAPH_SUCCESS; | |||||
| } | |||||
| graphStatus Impl::Init(const Graph &graph, const std::map<std::string, std::string> &options) { | graphStatus Impl::Init(const Graph &graph, const std::map<std::string, std::string> &options) { | ||||
| // 1. check options | // 1. check options | ||||
| graphStatus ret = CheckOptions(options); | graphStatus ret = CheckOptions(options); | ||||
| @@ -378,13 +341,7 @@ graphStatus Impl::Init(const Graph &graph, const std::map<std::string, std::stri | |||||
| GE_CHK_BOOL_RET_STATUS_NOLOG(ge::CheckLogParamValidAndSetLogLevel(log) == 0, GRAPH_PARAM_INVALID); | GE_CHK_BOOL_RET_STATUS_NOLOG(ge::CheckLogParamValidAndSetLogLevel(log) == 0, GRAPH_PARAM_INVALID); | ||||
| options_[ge::ir_option::LOG_LEVEL] = log; | options_[ge::ir_option::LOG_LEVEL] = log; | ||||
| string input_shape; | |||||
| if (options_.find("input_shape") == options_.end()) { | |||||
| GE_CHK_BOOL_EXEC(GetDefaultInputShape(graph, input_shape) == ge::SUCCESS, | |||||
| return ge::GRAPH_PARAM_INVALID, "Get default data op shape from graph failed!"); | |||||
| } else { | |||||
| input_shape = options_["input_shape"]; | |||||
| } | |||||
| string input_shape = options_.find("input_shape") == options_.end() ? "" : options_["input_shape"]; | |||||
| string input_format = options_.find("input_format") == options_.end() ? "" : options_["input_format"]; | string input_format = options_.find("input_format") == options_.end() ? "" : options_["input_format"]; | ||||
| string net_format = options_.find("net_format") == options_.end() ? "" : options_["net_format"]; | string net_format = options_.find("net_format") == options_.end() ? "" : options_["net_format"]; | ||||
| string dynamic_batch_size = options_.find(ge::ir_option::DYNAMIC_BATCH_SIZE) == options_.end() | string dynamic_batch_size = options_.find(ge::ir_option::DYNAMIC_BATCH_SIZE) == options_.end() | ||||
| @@ -36,7 +36,7 @@ using Status = domi::Status; | |||||
| namespace domi { | namespace domi { | ||||
| using GetGraphCallback = std::function<std::unique_ptr<google::protobuf::Message>( | using GetGraphCallback = std::function<std::unique_ptr<google::protobuf::Message>( | ||||
| const google::protobuf::Message *root_proto, const std::string &graph)>; | |||||
| const google::protobuf::Message *root_proto, const std::string &graph)>; | |||||
| class ModelParser { | class ModelParser { | ||||
| public: | public: | ||||
| ModelParser() {} | ModelParser() {} | ||||
| @@ -44,19 +44,20 @@ class ModelParser { | |||||
| virtual ~ModelParser() {} | virtual ~ModelParser() {} | ||||
| /** | /** | ||||
| * @ingroup domi_omg | |||||
| * @brief Analyze network model data | |||||
| * @param [in] file Network model file path | |||||
| * @param [in|out] graph Save the network information after analysis | |||||
| * @return SUCCESS | |||||
| * @return Others failed | |||||
| */ | |||||
| * @ingroup domi_omg | |||||
| * @brief Analyze network model data | |||||
| * @param [in] file Network model file path | |||||
| * @param [in|out] graph Save the network information after analysis | |||||
| * @return SUCCESS | |||||
| * @return Others failed | |||||
| */ | |||||
| virtual Status Parse(const char *file, ge::Graph &graph) = 0; | virtual Status Parse(const char *file, ge::Graph &graph) = 0; | ||||
| /** | /** | ||||
| * @ingroup domi_omg | * @ingroup domi_omg | ||||
| * @brief Parse relevant data from memory and save it to graph | * @brief Parse relevant data from memory and save it to graph | ||||
| * @param [in] input Model file memory data | * @param [in] input Model file memory data | ||||
| * @param [in] input Model file memory size | |||||
| * @param [in|out] graph A graph for saving the model information after analysis | * @param [in|out] graph A graph for saving the model information after analysis | ||||
| * @return SUCCESS | * @return SUCCESS | ||||
| * @return FAILED | * @return FAILED | ||||
| @@ -64,36 +65,49 @@ class ModelParser { | |||||
| */ | */ | ||||
| virtual Status ParseFromMemory(const char *data, uint32_t size, ge::ComputeGraphPtr &graph) = 0; | virtual Status ParseFromMemory(const char *data, uint32_t size, ge::ComputeGraphPtr &graph) = 0; | ||||
| #ifndef ONLY_COMPILE_OPEN_SRC | |||||
| /** | |||||
| * @ingroup domi_omg | |||||
| * @brief Parse relevant data from memory and save it to graph | |||||
| * @param [in] input Model file memory data | |||||
| * @param [in] input Model file memory size | |||||
| * @param [in|out] graph A graph for saving the model information after analysis | |||||
| * @return SUCCESS | |||||
| * @return FAILED | |||||
| * @author | |||||
| */ | |||||
| virtual Status ParseFromMemory(const char *data, uint32_t size, ge::Graph &graph) = 0; | |||||
| #endif | |||||
| /** | /** | ||||
| * @ingroup domi_omg | |||||
| * @brief Analyze network model data | |||||
| * @param [in] proto network model | |||||
| * @param [in|out] graph Save the network information after analysis | |||||
| * @return SUCCESS | |||||
| * @return Others failed | |||||
| */ | |||||
| * @ingroup domi_omg | |||||
| * @brief Analyze network model data | |||||
| * @param [in] proto network model | |||||
| * @param [in|out] graph Save the network information after analysis | |||||
| * @return SUCCESS | |||||
| * @return Others failed | |||||
| */ | |||||
| virtual Status ParseProto(const google::protobuf::Message *proto, ge::ComputeGraphPtr &graph) = 0; | virtual Status ParseProto(const google::protobuf::Message *proto, ge::ComputeGraphPtr &graph) = 0; | ||||
| /** | /** | ||||
| * @ingroup domi_omg | |||||
| * @brief Analyze callback model data in subgraph | |||||
| * @param [in] proto network model | |||||
| * @param [in] callback callback of subgraph | |||||
| * @param [in|out] graph Save the network information after analysis | |||||
| * @return SUCCESS | |||||
| * @return Others failed | |||||
| */ | |||||
| virtual Status ParseProtoWithSubgraph(const google::protobuf::Message *proto, | |||||
| GetGraphCallback callback, | |||||
| * @ingroup domi_omg | |||||
| * @brief Analyze callback model data in subgraph | |||||
| * @param [in] proto network model | |||||
| * @param [in] callback callback of subgraph | |||||
| * @param [in|out] graph Save the network information after analysis | |||||
| * @return SUCCESS | |||||
| * @return Others failed | |||||
| */ | |||||
| virtual Status ParseProtoWithSubgraph(const google::protobuf::Message *proto, GetGraphCallback callback, | |||||
| ge::ComputeGraphPtr &graph) = 0; | ge::ComputeGraphPtr &graph) = 0; | ||||
| /** | /** | ||||
| * @ingroup domi_omg | |||||
| * @brief Convert model files to JSON format | |||||
| * @param [in] model_file Model file path to be converted | |||||
| * @param [out] json_file Converted JSON file path | |||||
| * @return SUCCESS | |||||
| * @return Others failed | |||||
| */ | |||||
| * @ingroup domi_omg | |||||
| * @brief Convert model files to JSON format | |||||
| * @param [in] model_file Model file path to be converted | |||||
| * @param [out] json_file Converted JSON file path | |||||
| * @return SUCCESS | |||||
| * @return Others failed | |||||
| */ | |||||
| virtual Status ToJson(const char *model_file, const char *json_file) { return domi::SUCCESS; } | virtual Status ToJson(const char *model_file, const char *json_file) { return domi::SUCCESS; } | ||||
| /* | /* | ||||
| @@ -1 +1 @@ | |||||
| Subproject commit dba83744a3ffe3d5f89496e69bb65c50f800c299 | |||||
| Subproject commit 129b50b41f79d0dfeb9fe8987b1c19c9ac51eb8b | |||||
| @@ -1 +1 @@ | |||||
| Subproject commit ce574894f13cd94749d1a3964a13e8c97c20434a | |||||
| Subproject commit e9f7d0197aba57eb5247cb1e029c10e393631c89 | |||||
| @@ -0,0 +1,60 @@ | |||||
| /** | |||||
| * Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef AICPU_OP_TYPE_LIST_H_ | |||||
| #define AICPU_OP_TYPE_LIST_H_ | |||||
| enum OpKernelType { | |||||
| TF_KERNEL, | |||||
| CPU_KERNEL | |||||
| }; | |||||
| enum ReturnCode { | |||||
| OP_TYPE_NOT_SUPPORT, | |||||
| FORMAT_NOT_SUPPORT, | |||||
| DTYPE_NOT_SUPPORT | |||||
| }; | |||||
| #pragma pack(push, 1) | |||||
| //One byte alignment | |||||
| struct SysOpInfo { | |||||
| uint64_t opLen; | |||||
| uint64_t opType; | |||||
| OpKernelType kernelsType; | |||||
| }; | |||||
| struct OpParamInfo { | |||||
| uint64_t num; | |||||
| uint64_t dtypeList; | |||||
| uint64_t formatList; | |||||
| }; | |||||
| struct SysOpCheckInfo { | |||||
| uint64_t opListNum; | |||||
| uint64_t offSetLen; | |||||
| uint64_t sysOpInfoList; | |||||
| uint64_t opParamInfoList; | |||||
| }; | |||||
| struct SysOpCheckResp { | |||||
| uint64_t opListNum; | |||||
| bool isWithoutJson; | |||||
| uint64_t returnCodeList; | |||||
| uint64_t sysOpInfoList; | |||||
| uint64_t opParamInfoList; | |||||
| }; | |||||
| #pragma pack(pop) | |||||
| #endif // AICPU_OP_TYPE_LIST_H_ | |||||
| @@ -21,13 +21,15 @@ | |||||
| namespace aicpu { | namespace aicpu { | ||||
| #pragma pack(push, 1) | |||||
| struct AicpuParamHead | struct AicpuParamHead | ||||
| { | { | ||||
| uint32_t length; // Total length: include cunstom message | uint32_t length; // Total length: include cunstom message | ||||
| uint32_t ioAddrNum; // Input and output address number | uint32_t ioAddrNum; // Input and output address number | ||||
| uint32_t extInfoLength; // extInfo struct Length | uint32_t extInfoLength; // extInfo struct Length | ||||
| uint64_t extInfoAddr; // extInfo address | uint64_t extInfoAddr; // extInfo address | ||||
| } __attribute__ ((packed)); | |||||
| }; | |||||
| #pragma pack(pop) | |||||
| } // namespace aicpu | } // namespace aicpu | ||||
| @@ -13,10 +13,11 @@ | |||||
| * See the License for the specific language governing permissions and | * See the License for the specific language governing permissions and | ||||
| * limitations under the License. | * limitations under the License. | ||||
| */ | */ | ||||
| #ifndef AICPU_ENGINE_H__ | #ifndef AICPU_ENGINE_H__ | ||||
| #define AICPU_ENGINE_H__ | #define AICPU_ENGINE_H__ | ||||
| #include <stdint.h> | |||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| extern "C" { | extern "C" { | ||||
| #endif | #endif | ||||
| @@ -36,12 +37,23 @@ typedef enum { | |||||
| /** | /** | ||||
| * @ingroup aicpu engine | * @ingroup aicpu engine | ||||
| * @brief aeCallInterface: | * @brief aeCallInterface: | ||||
| * a interface to call a function in a op kernfel lib | |||||
| * a interface to call a function in a op kernfel lib | |||||
| * @param [in] addr void *, should be STR_KERNEL * format | * @param [in] addr void *, should be STR_KERNEL * format | ||||
| * @return aeStatus_t | * @return aeStatus_t | ||||
| */ | */ | ||||
| aeStatus_t aeCallInterface(void *addr); | aeStatus_t aeCallInterface(void *addr); | ||||
| /** | |||||
| * @ingroup aicpu engine | |||||
| * @brief aeBatchLoadKernelSo: | |||||
| * a interface to load kernel so | |||||
| * @param [in] loadSoNum load so number | |||||
| * @param [in] soPaths load so paths | |||||
| * @param [in] soNames load so names | |||||
| * @return aeStatus_t | |||||
| */ | |||||
| aeStatus_t aeBatchLoadKernelSo(const uint32_t loadSoNum, const char *soPaths[], const char *soNames[]); | |||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| } | } | ||||
| #endif | #endif | ||||
| @@ -33,18 +33,22 @@ typedef enum { | |||||
| FMK_KERNEL_TYPE_RESERVED | FMK_KERNEL_TYPE_RESERVED | ||||
| } FwkkernelType_t; | } FwkkernelType_t; | ||||
| #pragma pack(push, 1) | |||||
| typedef struct { | typedef struct { | ||||
| uint32_t fwkKernelType; // FwkkernelType_t | uint32_t fwkKernelType; // FwkkernelType_t | ||||
| union { | union { | ||||
| ::aicpu::FWKAdapter::FWKOperateParam fwk_kernel; | ::aicpu::FWKAdapter::FWKOperateParam fwk_kernel; | ||||
| } fwkKernelBase; | } fwkKernelBase; | ||||
| } __attribute__((packed)) STR_FWK_OP_KERNEL; | |||||
| } STR_FWK_OP_KERNEL; | |||||
| #pragma pack(pop) | |||||
| #pragma pack(push, 1) | |||||
| struct SessionInfo { | struct SessionInfo { | ||||
| uint64_t sessionId; | uint64_t sessionId; | ||||
| uint64_t kernelId; | uint64_t kernelId; | ||||
| bool sessFlag; | bool sessFlag; | ||||
| } __attribute__((packed)); | |||||
| }; | |||||
| #pragma pack(pop) | |||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| } | } | ||||
| @@ -70,6 +70,7 @@ enum FWKExtUpdateAddrType { | |||||
| FWK_ADPT_UPDATE_INPUT_OUTPUT | FWK_ADPT_UPDATE_INPUT_OUTPUT | ||||
| }; | }; | ||||
| #pragma pack(push, 1) | |||||
| // API Parameter Structure | // API Parameter Structure | ||||
| struct StrFWKKernel { | struct StrFWKKernel { | ||||
| FWKOperateType opType; | FWKOperateType opType; | ||||
| @@ -89,31 +90,39 @@ struct StrFWKKernel { | |||||
| uint64_t extInfoLen; // extend info total length | uint64_t extInfoLen; // extend info total length | ||||
| uint64_t extInfoAddr; // extend info addr, ExtInfo structure | uint64_t extInfoAddr; // extend info addr, ExtInfo structure | ||||
| } __attribute__((packed)); | |||||
| }; | |||||
| #pragma pack(pop) | |||||
| typedef StrFWKKernel FWKOperateParam; | typedef StrFWKKernel FWKOperateParam; | ||||
| // Extent info ShapeAndType | // Extent info ShapeAndType | ||||
| const uint32_t kMaxShapeDims = 8; | const uint32_t kMaxShapeDims = 8; | ||||
| #pragma pack(push, 1) | |||||
| struct ShapeAndType { | struct ShapeAndType { | ||||
| int32_t type; | int32_t type; | ||||
| int64_t dims[kMaxShapeDims]; | int64_t dims[kMaxShapeDims]; | ||||
| } __attribute__((packed)); | |||||
| }; | |||||
| #pragma pack(pop) | |||||
| // Extend info structure for extInfoAddr | // Extend info structure for extInfoAddr | ||||
| const uint32_t kExtInfoHeadSize = 8; | const uint32_t kExtInfoHeadSize = 8; | ||||
| #pragma pack(push, 1) | |||||
| struct ExtInfo { | struct ExtInfo { | ||||
| int32_t infoType; // extend type | int32_t infoType; // extend type | ||||
| uint32_t infoLen; // length for infoMsg | uint32_t infoLen; // length for infoMsg | ||||
| char infoMsg[0]; // extend value | char infoMsg[0]; // extend value | ||||
| } __attribute__((packed)); | |||||
| }; | |||||
| #pragma pack(pop) | |||||
| #pragma pack(push, 1) | |||||
| struct ResultSummary { | struct ResultSummary { | ||||
| uint64_t shape_data_ptr; // shape data addr, need convert to void* | uint64_t shape_data_ptr; // shape data addr, need convert to void* | ||||
| uint64_t shape_data_size; // num of dims | uint64_t shape_data_size; // num of dims | ||||
| uint64_t raw_data_ptr; // raw data addr, need convert to void* | uint64_t raw_data_ptr; // raw data addr, need convert to void* | ||||
| uint64_t raw_data_size; // size of raw data | uint64_t raw_data_size; // size of raw data | ||||
| } __attribute__((packed)); | |||||
| }; | |||||
| #pragma pack(pop) | |||||
| } // end namespace FWKAdapter | } // end namespace FWKAdapter | ||||
| } // namespace aicpu | } // namespace aicpu | ||||
| @@ -22,7 +22,8 @@ | |||||
| #ifndef HCCL_BASE_H_ | #ifndef HCCL_BASE_H_ | ||||
| #define HCCL_BASE_H_ | #define HCCL_BASE_H_ | ||||
| #include <hccl/hccl_types.h> | |||||
| #include <string> | |||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| extern "C" { | extern "C" { | ||||
| #endif // __cplusplus | #endif // __cplusplus | ||||
| @@ -95,6 +96,33 @@ typedef void *rtStream_t; | |||||
| */ | */ | ||||
| typedef void *rtModel_t; | typedef void *rtModel_t; | ||||
| struct HcomOperation { | |||||
| std::string hcclType; | |||||
| void *inputPtr; | |||||
| void *outputPtr; | |||||
| u64 count; | |||||
| HcclDataType dataType; | |||||
| HcclReduceOp opType; | |||||
| u32 root; | |||||
| HcomOperation() | |||||
| { | |||||
| inputPtr = nullptr; | |||||
| outputPtr = nullptr; | |||||
| count = 0; | |||||
| dataType = HCCL_DATA_TYPE_RESERVED; | |||||
| opType = HCCL_REDUCE_RESERVED; | |||||
| root = 0; | |||||
| } | |||||
| }; | |||||
| struct HcomRemoteAccessAddrInfo { | |||||
| u32 remotetRankID; | |||||
| u64 remoteAddr; // host embedding table address | |||||
| u64 localAddr; // device HBM address | |||||
| u64 length; // Memory Length in Bytes | |||||
| }; | |||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| } | } | ||||
| #endif // __cplusplus | #endif // __cplusplus | ||||
| @@ -24,145 +24,96 @@ | |||||
| #include <hccl/base.h> | #include <hccl/base.h> | ||||
| #include <hccl/hccl_types.h> | #include <hccl/hccl_types.h> | ||||
| #include <functional> | |||||
| #include <vector> | |||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| extern "C" { | extern "C" { | ||||
| #endif // __cplusplus | #endif // __cplusplus | ||||
| /** | |||||
| * @brief Initialize HCOM. | |||||
| * | |||||
| * @param rank_table A string identifying the rank table file path, include file name. | |||||
| * @param identify A string identifying the identify for the rank. | |||||
| * @return HcclResult | |||||
| * @see hcom_destroy() | |||||
| */ | |||||
| extern HcclResult hcom_init(const char *rank_table, const char *identify); | |||||
| /** | |||||
| * @brief Destroy HCOM | |||||
| * | |||||
| * @return HcclResult | |||||
| * @see hcom_init() | |||||
| */ | |||||
| extern HcclResult hcom_destroy(void); | |||||
| /** | |||||
| * @brief Bind the model. | |||||
| * | |||||
| * @param model A pointer identifying the model information. | |||||
| * @param stream A pointer identifying the stream information. | |||||
| * @return HcclResult | |||||
| * @see hcom_unbind_model() | |||||
| */ | |||||
| extern HcclResult hcom_bind_model(rtModel_t model, rtStream_t stream); | |||||
| /** | /** | ||||
| * @brief Unbind the model. | |||||
| * @brief Get the rank number in the group. | |||||
| * | * | ||||
| * @param model An pointer identifying the model information. | |||||
| * @return HcclResult | |||||
| * @see hcom_unbind_model() | |||||
| * @param group A string identifying the group name. | |||||
| * @param rankSize A pointer identifying the rank number. | |||||
| * @return HcclResult | |||||
| */ | */ | ||||
| extern HcclResult hcom_unbind_model(rtModel_t model); | |||||
| HcclResult hcom_get_rank_size(const char *group, u32 *rankSize); | |||||
| /** | /** | ||||
| * @brief All-gather operator. | |||||
| * @brief Get the rank number in the group. | |||||
| * | * | ||||
| * @param tag A string identifying the tag of the operator. | |||||
| * @param inputPtr A pointer identifying the input data address of the operator. | |||||
| * @param outputPtr A pointer identifying the output data address of the operator. | |||||
| * @param inputCount An integer(u64) identifying the number of the input data. | |||||
| * @param dataType The data type of the operator, must be one of the following types: int8, int32, float16, float32. | |||||
| * @param group A string identifying the group name of ranks participating in the operator. | |||||
| * @param stream A pointer identifying the stream information. | |||||
| * @param group A string identifying the group name. | |||||
| * @param rankSize A pointer identifying the rank number. | |||||
| * @return HcclResult | * @return HcclResult | ||||
| */ | */ | ||||
| extern HcclResult hcom_all_gather(const char *tag, void *inputPtr, void *outputPtr, u64 inputCount, | |||||
| HcclDataType dataType, const char *group, rtStream_t stream); | |||||
| HcclResult HcomGetRankSize(const char *group, u32 *rankSize); | |||||
| /** | /** | ||||
| * @brief All-reduce operator. | |||||
| * @brief Get the rank number of this rank's server within the group. | |||||
| * | * | ||||
| * @param tag A string identifying the tag of the operator. | |||||
| * @param inputPtr A pointer identifying the input data address of the operator. | |||||
| * @param outputPtr A pointer identifying the output data address of the operator. | |||||
| * @param count An integer(u64) identifying the number of the output data. | |||||
| * @param dataType The data type of the operator, must be one of the following types: int8, int32, float16, float32. | |||||
| * @param op The reduction type of the operator, must be one of the following types: sum, min, max, prod. | |||||
| * @param group A string identifying the group name of ranks participating in the operator. | |||||
| * @param stream A pointer identifying the stream information. | |||||
| * @param group A string identifying the group name. | |||||
| * @param localRankSize A pointer identifying the rank number. | |||||
| * @return HcclResult | * @return HcclResult | ||||
| */ | */ | ||||
| extern HcclResult hcom_all_reduce(const char *tag, void *inputPtr, void *outputPtr, u64 count, | |||||
| HcclDataType dataType, HcclReduceOp op, const char *group, rtStream_t stream); | |||||
| HcclResult hcom_get_local_rank_size(const char *group, u32 *localRankSize); | |||||
| /** | /** | ||||
| * @brief Broadcast operator. | |||||
| * @brief Get the rank number of this rank's server within the group. | |||||
| * | * | ||||
| * @param tag A string identifying the tag of the operator. | |||||
| * @param ptr A pointer identifying the data address of the operator. | |||||
| * @param count An integer(u64) identifying the number of the data. | |||||
| * @param dataType The data type of the operator, must be one of the following types: int8, int32, float16, float32. | |||||
| * @param root An integer(u32) identifying the the root rank in the operator. | |||||
| * @param group A string identifying the group name of ranks participating in the operator. | |||||
| * @param stream A pointer identifying the stream information. | |||||
| * @param group A string identifying the group name. | |||||
| * @param localRankSize A pointer identifying the rank number. | |||||
| * @return HcclResult | * @return HcclResult | ||||
| */ | */ | ||||
| extern HcclResult hcom_broadcast(const char *tag, void *ptr, u64 count, HcclDataType dataType, u32 root, | |||||
| const char *group, rtStream_t stream); | |||||
| HcclResult HcomGetLocalRankSize(const char *group, u32 *localRankSize); | |||||
| /** | /** | ||||
| * @brief Reduce-scatter operator. | |||||
| * @brief Get the rank id of this rank. | |||||
| * | * | ||||
| * @param tag A string identifying the tag of the operator. | |||||
| * @param inputPtr A pointer identifying the input data address of the operator. | |||||
| * @param outputPtr A pointer identifying the output data address of the operator. | |||||
| * @param count An integer(u64) identifying the number of the data. | |||||
| * @param dataType The data type of the operator, must be one of the following types: int8, int32, float16, float32. | |||||
| * @param op The reduction type of the operator, must be one of the following types: sum, min, max, prod. | |||||
| * @param group A string identifying the group name of ranks participating in the operator. | |||||
| * @param stream A pointer identifying the stream information. | |||||
| * @param group A string identifying the group name. | |||||
| * @param rankId A pointer identifying the rank id. | |||||
| * @return HcclResult | * @return HcclResult | ||||
| */ | */ | ||||
| extern HcclResult hcom_reduce_scatter(const char *tag, void *inputPtr, void *outputPtr, u64 count, | |||||
| HcclDataType dataType, HcclReduceOp op, const char *group, rtStream_t stream); | |||||
| HcclResult hcom_get_rank_id(const char *group, u32 *rankId); | |||||
| /** | /** | ||||
| * @brief Get the rank number in the group. | |||||
| * @brief Get the rank id of this rank. | |||||
| * | * | ||||
| * @param group A string identifying the group name. | * @param group A string identifying the group name. | ||||
| * @param rankSize A pointer identifying the rank number. | |||||
| * @param rankId A pointer identifying the rank id. | |||||
| * @return HcclResult | * @return HcclResult | ||||
| */ | */ | ||||
| HcclResult hcom_get_rank_size(const char *group, u32 *rankSize); | |||||
| HcclResult HcomGetRankId(const char *group, u32 *rankId); | |||||
| /** | /** | ||||
| * @brief Get the rank number of this rank's server within the group. | |||||
| * @brief Get the local rank id of this rank's server within the group. | |||||
| * | * | ||||
| * @param group A string identifying the group name. | * @param group A string identifying the group name. | ||||
| * @param localRankSize A pointer identifying the rank number. | |||||
| * @param localRankId A pointer identifying the local rank id. | |||||
| * @return HcclResult | * @return HcclResult | ||||
| */ | */ | ||||
| HcclResult hcom_get_local_rank_size(const char *group, u32 *localRankSize); | |||||
| HcclResult hcom_get_local_rank_id(const char *group, u32 *localRankId); | |||||
| /** | /** | ||||
| * @brief Get the rank id of this rank. | |||||
| * @brief Get the local rank id of this rank's server within the group. | |||||
| * | * | ||||
| * @param group A string identifying the group name. | * @param group A string identifying the group name. | ||||
| * @param rankId A pointer identifying the rank id. | |||||
| * @param localRankId A pointer identifying the local rank id. | |||||
| * @return HcclResult | * @return HcclResult | ||||
| */ | */ | ||||
| HcclResult hcom_get_rank_id(const char *group, u32 *rankId); | |||||
| HcclResult HcomGetLocalRankId(const char *group, u32 *localRankId); | |||||
| /** | /** | ||||
| * @brief Get the local rank id of this rank's server within the group. | |||||
| * @brief Get the world rank id according to the group rank id. | |||||
| * | * | ||||
| * @param group A string identifying the group name. | * @param group A string identifying the group name. | ||||
| * @param localRankId A pointer identifying the local rank id. | |||||
| * @param groupRank An integer(u32) identifying the group rank id. | |||||
| * @param worldRank A pointer identifying the world rank id. | |||||
| * @return HcclResult | * @return HcclResult | ||||
| */ | */ | ||||
| HcclResult hcom_get_local_rank_id(const char *group, u32 *localRankId); | |||||
| HcclResult hcom_get_world_rank_from_group_rank(const char *group, u32 groupRank, u32 *worldRank); | |||||
| /** | /** | ||||
| * @brief Get the world rank id according to the group rank id. | * @brief Get the world rank id according to the group rank id. | ||||
| @@ -172,7 +123,7 @@ HcclResult hcom_get_local_rank_id(const char *group, u32 *localRankId); | |||||
| * @param worldRank A pointer identifying the world rank id. | * @param worldRank A pointer identifying the world rank id. | ||||
| * @return HcclResult | * @return HcclResult | ||||
| */ | */ | ||||
| HcclResult hcom_get_world_rank_from_group_rank(const char *group, u32 groupRank, u32 *worldRank); | |||||
| HcclResult HcomGetWorldRankFromGroupRank(const char *group, u32 groupRank, u32 *worldRank); | |||||
| /** | /** | ||||
| * @brief Get the group rank id according to the world rank id. | * @brief Get the group rank id according to the world rank id. | ||||
| @@ -184,6 +135,16 @@ HcclResult hcom_get_world_rank_from_group_rank(const char *group, u32 groupRank, | |||||
| */ | */ | ||||
| HcclResult hcom_get_group_rank_from_world_rank(u32 worldRank, const char *group, u32 *groupRank); | HcclResult hcom_get_group_rank_from_world_rank(u32 worldRank, const char *group, u32 *groupRank); | ||||
| /** | |||||
| * @brief Get the group rank id according to the world rank id. | |||||
| * | |||||
| * @param worldRank An integer(u32) identifying the world rank id. | |||||
| * @param group A string identifying the group name. | |||||
| * @param groupRank A pointer identifying the group rank id. | |||||
| * @return HcclResult | |||||
| */ | |||||
| HcclResult HcomGetGroupRankFromWorldRank(u32 worldRank, const char *group, u32 *groupRank); | |||||
| /** | /** | ||||
| * @brief Create group. | * @brief Create group. | ||||
| * | * | ||||
| @@ -195,60 +156,40 @@ HcclResult hcom_get_group_rank_from_world_rank(u32 worldRank, const char *group, | |||||
| HcclResult hcom_create_group(const char *group, u32 rankNum, u32 *rankIds); | HcclResult hcom_create_group(const char *group, u32 rankNum, u32 *rankIds); | ||||
| /** | /** | ||||
| * @brief Destroy group | |||||
| * @brief Create group. | |||||
| * | * | ||||
| * @param group A string identifying the group name. | * @param group A string identifying the group name. | ||||
| * @param rankNum An integer(u32) identifying the number of ranks in the group. | |||||
| * @param rankIds A list identifying the ranks in the group. | |||||
| * @return HcclResult | * @return HcclResult | ||||
| */ | */ | ||||
| HcclResult hcom_destroy_group(const char *group); | |||||
| HcclResult HcomCreateGroup(const char *group, u32 rankNum, u32 *rankIds); | |||||
| /** | /** | ||||
| * @brief Send operator. | |||||
| * @brief Destroy group | |||||
| * | * | ||||
| * @param tag A string identifying the tag of the operator. | |||||
| * @param inputPtr A pointer identifying the input data address of the operator. | |||||
| * @param count An integer(u64) identifying the number of the data. | |||||
| * @param dataType The data type of the operator, must be one of the following types: int8, int32, float16, float32. | |||||
| * @param destRank An integer identifying the destination rank. | |||||
| * @param srTag An integer identifying the send/recv message tag. | |||||
| * The message will be send by the receive operator with the same "sr_tag". | |||||
| * @param group A string identifying the group name of ranks participating in the operator. | |||||
| * @param stream A pointer identifying the stream information. | |||||
| * @param group A string identifying the group name. | |||||
| * @return HcclResult | * @return HcclResult | ||||
| */ | */ | ||||
| HcclResult hcom_send(const char *tag, void *inputPtr, u64 count, HcclDataType dataType, | |||||
| u32 destRank, u32 srTag, const char *group, rtStream_t stream); | |||||
| HcclResult hcom_destroy_group(const char *group); | |||||
| /** | /** | ||||
| * @brief Receive operator. | |||||
| * @brief Destroy group | |||||
| * | * | ||||
| * @param tag A string identifying the tag of the operator. | |||||
| * @param outputPtr A pointer identifying the output data address of the operator. | |||||
| * @param count An integer(u64) identifying the number of the data. | |||||
| * @param dataType The data type of the operator, must be one of the following types: int8, int32, float16, float32. | |||||
| * @param srcRank An integer identifying the source rank. | |||||
| * @param srTag An integer identifying the send/recv message tag. | |||||
| * The message will be send by the send operator with the same "sr_tag". | |||||
| * @param group A string identifying the group name of ranks participating in the operator. | |||||
| * @param stream A pointer identifying the stream information. | |||||
| * @param group A string identifying the group name. | |||||
| * @return HcclResult | * @return HcclResult | ||||
| */ | */ | ||||
| HcclResult hcom_receive(const char *tag, void *outputPtr, u64 count, HcclDataType dataType, | |||||
| u32 srcRank, u32 srTag, const char *group, rtStream_t stream); | |||||
| HcclResult HcomDestroyGroup(const char *group); | |||||
| /** | /** | ||||
| * @brief Get the gradient split strategy with in the group. | |||||
| * @brief Set the gradient split strategy with in the group, according to gradient index. | |||||
| * | * | ||||
| * @param group A string identifying the group name. | * @param group A string identifying the group name. | ||||
| * @param feature A pointer identifying the feature of the model. | |||||
| * @param maxSegmentNum An integer(u32) identifying the max segments of gradients. | |||||
| * @param segmentNum A pointer identifying the segments number of gradients. | |||||
| * @param segmentIdx A list identifying the index of end gradient in each segment. | |||||
| * @return HcclResult | |||||
| * @param segmentNum An integer(u32) identifying the segments number of gradients. | |||||
| * @param IdxList A list identifying the index of end gradient in each segment. | |||||
| * @return HcclResult | |||||
| */ | */ | ||||
| HcclResult hcom_get_split_strategy(const char *group, const struct model_feature *feature, u32 maxSegmentNum, | |||||
| u32 *segmentNum, u32 *segmentIdx, GradSplitForceMode force = FORCE_NONE, | |||||
| OriginalGraphShapeType shapeType = KNOWN_SHAPE); | |||||
| extern HcclResult hcom_set_split_strategy_by_index(const char *group, u32 segmentNum, const u32 *IdxList); | |||||
| /** | /** | ||||
| * @brief Set the gradient split strategy with in the group, according to gradient index. | * @brief Set the gradient split strategy with in the group, according to gradient index. | ||||
| @@ -258,7 +199,7 @@ HcclResult hcom_get_split_strategy(const char *group, const struct model_feature | |||||
| * @param IdxList A list identifying the index of end gradient in each segment. | * @param IdxList A list identifying the index of end gradient in each segment. | ||||
| * @return HcclResult | * @return HcclResult | ||||
| */ | */ | ||||
| extern HcclResult hcom_set_split_strategy_by_index(const char *group, u32 segmentNum, const u32 *IdxList); | |||||
| extern HcclResult HcomSetGradFusionByIndex(const char *group, u32 segmentNum, const u32 *IdxList); | |||||
| /** | /** | ||||
| * @brief Set the gradient split strategy with in the group, according to gradient data size. | * @brief Set the gradient split strategy with in the group, according to gradient data size. | ||||
| @@ -270,6 +211,16 @@ extern HcclResult hcom_set_split_strategy_by_index(const char *group, u32 segmen | |||||
| */ | */ | ||||
| extern HcclResult hcom_set_split_strategy_by_size(const char *group, u32 segmentNum, const float *sizeList); | extern HcclResult hcom_set_split_strategy_by_size(const char *group, u32 segmentNum, const float *sizeList); | ||||
| /** | |||||
| * @brief Set the gradient split strategy with in the group, according to gradient data size. | |||||
| * | |||||
| * @param group A string identifying the group name. | |||||
| * @param segmentNum An integer(u32) identifying the segments number of gradients. | |||||
| * @param sizeList A list identifying the percent of each segment. | |||||
| * @return HcclResult | |||||
| */ | |||||
| extern HcclResult HcomSetGradFusionBySize(const char *group, u32 segmentNum, const float *sizeList); | |||||
| /** | /** | ||||
| * @brief Register memories and init resources for remote access. | * @brief Register memories and init resources for remote access. | ||||
| * | * | ||||
| @@ -279,6 +230,25 @@ extern HcclResult hcom_set_split_strategy_by_size(const char *group, u32 segment | |||||
| */ | */ | ||||
| extern HcclResult hcom_remote_access_mem_register(const MemRegisterAddr* addrList, u32 count); | extern HcclResult hcom_remote_access_mem_register(const MemRegisterAddr* addrList, u32 count); | ||||
| /** | |||||
| * @brief Register memories and init resources for remote access. | |||||
| * | |||||
| * @param addrList memory addresses for remote access. | |||||
| * @param count number of remote memory addresses. | |||||
| * @return HcclResult | |||||
| */ | |||||
| extern HcclResult HcomRegRemoteAccessMem(const MemRegisterAddr* addrList, u32 count); | |||||
| HcclResult HcomExecInitialize(); | |||||
| HcclResult HcomExecFinalize(); | |||||
| HcclResult HcomExecEnqueueOperation(HcomOperation opInfo, std::function<void(HcclResult status)> callback); | |||||
| HcclResult HcomExecEnqueueRemoteAccess(const std::string& remoteAccessType, | |||||
| const std::vector<HcomRemoteAccessAddrInfo>& addrInfos, | |||||
| std::function<void(HcclResult status)> callback); | |||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| } | } | ||||
| @@ -215,6 +215,10 @@ typedef struct { | |||||
| #define S_IWRITE S_IWUSR | #define S_IWRITE S_IWUSR | ||||
| #endif | #endif | ||||
| #define mm_no_argument no_argument | |||||
| #define mm_required_argument required_argument | |||||
| #define mm_optional_argument optional_argument | |||||
| #define M_FILE_RDONLY O_RDONLY | #define M_FILE_RDONLY O_RDONLY | ||||
| #define M_FILE_WRONLY O_WRONLY | #define M_FILE_WRONLY O_WRONLY | ||||
| #define M_FILE_RDWR O_RDWR | #define M_FILE_RDWR O_RDWR | ||||
| @@ -227,6 +231,7 @@ typedef struct { | |||||
| #define M_BINARY O_RDONLY | #define M_BINARY O_RDONLY | ||||
| #define M_TRUNC O_TRUNC | #define M_TRUNC O_TRUNC | ||||
| #define M_IRWXU S_IRWXU | #define M_IRWXU S_IRWXU | ||||
| #define M_APPEND O_APPEND | |||||
| #define M_IN_CREATE IN_CREATE | #define M_IN_CREATE IN_CREATE | ||||
| #define M_IN_CLOSE_WRITE IN_CLOSE_WRITE | #define M_IN_CLOSE_WRITE IN_CLOSE_WRITE | ||||
| @@ -342,17 +347,17 @@ MMPA_FUNC_VISIBILITY INT32 mmCloseSocket(mmSockHandle sockFd); | |||||
| MMPA_FUNC_VISIBILITY mmSsize_t mmSocketSend(mmSockHandle sockFd, VOID *sendBuf, INT32 sendLen, INT32 sendFlag); | MMPA_FUNC_VISIBILITY mmSsize_t mmSocketSend(mmSockHandle sockFd, VOID *sendBuf, INT32 sendLen, INT32 sendFlag); | ||||
| MMPA_FUNC_VISIBILITY mmSsize_t mmSocketRecv(mmSockHandle sockFd, VOID *recvBuf, INT32 recvLen, INT32 recvFlag); | MMPA_FUNC_VISIBILITY mmSsize_t mmSocketRecv(mmSockHandle sockFd, VOID *recvBuf, INT32 recvLen, INT32 recvFlag); | ||||
| MMPA_FUNC_VISIBILITY INT32 mmSocketSendTo(mmSockHandle sockFd, | MMPA_FUNC_VISIBILITY INT32 mmSocketSendTo(mmSockHandle sockFd, | ||||
| VOID *sendMsg, | |||||
| INT32 sendLen, | |||||
| UINT32 sendFlag, | |||||
| const mmSockAddr* addr, | |||||
| INT32 tolen); | |||||
| VOID *sendMsg, | |||||
| INT32 sendLen, | |||||
| UINT32 sendFlag, | |||||
| const mmSockAddr* addr, | |||||
| INT32 tolen); | |||||
| MMPA_FUNC_VISIBILITY mmSsize_t mmSocketRecvFrom(mmSockHandle sockFd, | MMPA_FUNC_VISIBILITY mmSsize_t mmSocketRecvFrom(mmSockHandle sockFd, | ||||
| VOID *recvBuf, | |||||
| mmSize recvLen, | |||||
| UINT32 recvFlag, | |||||
| mmSockAddr* addr, | |||||
| mmSocklen_t *FromLen); | |||||
| VOID *recvBuf, | |||||
| mmSize recvLen, | |||||
| UINT32 recvFlag, | |||||
| mmSockAddr* addr, | |||||
| mmSocklen_t *FromLen); | |||||
| MMPA_FUNC_VISIBILITY INT32 mmSAStartup(); | MMPA_FUNC_VISIBILITY INT32 mmSAStartup(); | ||||
| MMPA_FUNC_VISIBILITY INT32 mmSACleanup(); | MMPA_FUNC_VISIBILITY INT32 mmSACleanup(); | ||||
| MMPA_FUNC_VISIBILITY VOID *mmDlopen(const CHAR *fileName, INT32 mode); | MMPA_FUNC_VISIBILITY VOID *mmDlopen(const CHAR *fileName, INT32 mode); | ||||
| @@ -360,7 +365,10 @@ MMPA_FUNC_VISIBILITY INT32 mmDladdr(VOID *addr, mmDlInfo *info); | |||||
| MMPA_FUNC_VISIBILITY VOID *mmDlsym(VOID *handle, const CHAR *funcName); | MMPA_FUNC_VISIBILITY VOID *mmDlsym(VOID *handle, const CHAR *funcName); | ||||
| MMPA_FUNC_VISIBILITY INT32 mmDlclose(VOID *handle); | MMPA_FUNC_VISIBILITY INT32 mmDlclose(VOID *handle); | ||||
| MMPA_FUNC_VISIBILITY CHAR *mmDlerror(); | MMPA_FUNC_VISIBILITY CHAR *mmDlerror(); | ||||
| MMPA_FUNC_VISIBILITY INT32 mmCreateAndSetTimer(mmTimer *timerHandle, mmUserBlock_t *timerBlock, UINT milliSecond, UINT period); | |||||
| MMPA_FUNC_VISIBILITY INT32 mmCreateAndSetTimer(mmTimer *timerHandle, | |||||
| mmUserBlock_t *timerBlock, | |||||
| UINT milliSecond, | |||||
| UINT period); | |||||
| MMPA_FUNC_VISIBILITY INT32 mmDeleteTimer(mmTimer timerHandle); | MMPA_FUNC_VISIBILITY INT32 mmDeleteTimer(mmTimer timerHandle); | ||||
| MMPA_FUNC_VISIBILITY INT32 mmStatGet(const CHAR *path, mmStat_t *buffer); | MMPA_FUNC_VISIBILITY INT32 mmStatGet(const CHAR *path, mmStat_t *buffer); | ||||
| MMPA_FUNC_VISIBILITY INT32 mmStat64Get(const CHAR *path, mmStat64_t *buffer); | MMPA_FUNC_VISIBILITY INT32 mmStat64Get(const CHAR *path, mmStat64_t *buffer); | ||||
| @@ -408,8 +416,12 @@ MMPA_FUNC_VISIBILITY VOID mmClosePipe(mmPipeHandle pipe[], UINT32 pipeCount); | |||||
| // Poll related interface | // Poll related interface | ||||
| MMPA_FUNC_VISIBILITY mmCompletionHandle mmCreateCompletionPort(); | MMPA_FUNC_VISIBILITY mmCompletionHandle mmCreateCompletionPort(); | ||||
| MMPA_FUNC_VISIBILITY VOID mmCloseCompletionPort(mmCompletionHandle handle); | MMPA_FUNC_VISIBILITY VOID mmCloseCompletionPort(mmCompletionHandle handle); | ||||
| MMPA_FUNC_VISIBILITY INT32 mmPoll(mmPollfd *fds, INT32 fdCount, INT32 timeout, mmCompletionHandle handleIOCP, | |||||
| pmmPollData polledData, mmPollBack pollBack); | |||||
| MMPA_FUNC_VISIBILITY INT32 mmPoll(mmPollfd *fds, | |||||
| INT32 fdCount, | |||||
| INT32 timeout, | |||||
| mmCompletionHandle handleIOCP, | |||||
| pmmPollData polledData, | |||||
| mmPollBack pollBack); | |||||
| MMPA_FUNC_VISIBILITY INT32 mmGetErrorCode(); | MMPA_FUNC_VISIBILITY INT32 mmGetErrorCode(); | ||||
| MMPA_FUNC_VISIBILITY CHAR *mmGetErrorFormatMessage(mmErrorMsg errnum, CHAR *buf, mmSize size); | MMPA_FUNC_VISIBILITY CHAR *mmGetErrorFormatMessage(mmErrorMsg errnum, CHAR *buf, mmSize size); | ||||
| MMPA_FUNC_VISIBILITY INT32 mmGetTimeOfDay(mmTimeval *timeVal, mmTimezone *timeZone); | MMPA_FUNC_VISIBILITY INT32 mmGetTimeOfDay(mmTimeval *timeVal, mmTimezone *timeZone); | ||||
| @@ -454,8 +466,11 @@ MMPA_FUNC_VISIBILITY VOID mmSetOpOpt(INT32 mmOptOpt); | |||||
| MMPA_FUNC_VISIBILITY CHAR *mmGetOptArg(); | MMPA_FUNC_VISIBILITY CHAR *mmGetOptArg(); | ||||
| MMPA_FUNC_VISIBILITY VOID mmSetOptArg(CHAR *mmOptArg); | MMPA_FUNC_VISIBILITY VOID mmSetOptArg(CHAR *mmOptArg); | ||||
| MMPA_FUNC_VISIBILITY INT32 mmGetOpt(INT32 argc, char *const *argv, const char *opts); | MMPA_FUNC_VISIBILITY INT32 mmGetOpt(INT32 argc, char *const *argv, const char *opts); | ||||
| MMPA_FUNC_VISIBILITY INT32 mmGetOptLong(INT32 argc, char *const *argv, const char *opts, const mmStructOption *longOpts, | |||||
| INT32 *longIndex); | |||||
| MMPA_FUNC_VISIBILITY INT32 mmGetOptLong(INT32 argc, | |||||
| char *const *argv, | |||||
| const char *opts, | |||||
| const mmStructOption *longOpts, | |||||
| INT32 *longIndex); | |||||
| MMPA_FUNC_VISIBILITY LONG mmLseek(INT32 fd, INT64 offset, INT32 seekFlag); | MMPA_FUNC_VISIBILITY LONG mmLseek(INT32 fd, INT64 offset, INT32 seekFlag); | ||||
| MMPA_FUNC_VISIBILITY INT32 mmFtruncate(mmProcess fd, UINT32 length); | MMPA_FUNC_VISIBILITY INT32 mmFtruncate(mmProcess fd, UINT32 length); | ||||
| @@ -521,11 +536,14 @@ MMPA_FUNC_VISIBILITY INT32 mmGetMac(mmMacInfo **list, INT32 *count); | |||||
| MMPA_FUNC_VISIBILITY INT32 mmGetMacFree(mmMacInfo *list, INT32 count); | MMPA_FUNC_VISIBILITY INT32 mmGetMacFree(mmMacInfo *list, INT32 count); | ||||
| MMPA_FUNC_VISIBILITY INT32 mmGetCpuInfo(mmCpuDesc **cpuInfo, INT32 *count); | MMPA_FUNC_VISIBILITY INT32 mmGetCpuInfo(mmCpuDesc **cpuInfo, INT32 *count); | ||||
| MMPA_FUNC_VISIBILITY INT32 mmCpuInfoFree(mmCpuDesc *cpuInfo, INT32 count); | MMPA_FUNC_VISIBILITY INT32 mmCpuInfoFree(mmCpuDesc *cpuInfo, INT32 count); | ||||
| MMPA_FUNC_VISIBILITY INT32 mmCreateProcess(const CHAR *fileName, const mmArgvEnv *env, const char *stdoutRedirectFile, | |||||
| mmProcess *id); | |||||
| MMPA_FUNC_VISIBILITY INT32 mmCreateTaskWithThreadAttr(mmThread *threadHandle, const mmUserBlock_t *funcBlock, | |||||
| const mmThreadAttr *threadAttr); | |||||
| MMPA_FUNC_VISIBILITY INT32 mmCreateProcess(const CHAR *fileName, | |||||
| const mmArgvEnv *env, | |||||
| const char *stdoutRedirectFile, | |||||
| mmProcess *id); | |||||
| MMPA_FUNC_VISIBILITY INT32 mmCreateTaskWithThreadAttr(mmThread *threadHandle, | |||||
| const mmUserBlock_t *funcBlock, | |||||
| const mmThreadAttr *threadAttr); | |||||
| MMPA_FUNC_VISIBILITY mmFileHandle mmShmOpen(const CHAR *name, INT32 oflag, mmMode_t mode); | MMPA_FUNC_VISIBILITY mmFileHandle mmShmOpen(const CHAR *name, INT32 oflag, mmMode_t mode); | ||||
| MMPA_FUNC_VISIBILITY INT32 mmShmUnlink(const CHAR *name); | MMPA_FUNC_VISIBILITY INT32 mmShmUnlink(const CHAR *name); | ||||
| MMPA_FUNC_VISIBILITY VOID *mmMmap(mmFd_t fd, mmSize_t size, mmOfft_t offset, mmFd_t *extra, INT32 prot, INT32 flags); | MMPA_FUNC_VISIBILITY VOID *mmMmap(mmFd_t fd, mmSize_t size, mmOfft_t offset, mmFd_t *extra, INT32 prot, INT32 flags); | ||||
| @@ -237,6 +237,11 @@ typedef struct { | |||||
| } mmThreadAttr; | } mmThreadAttr; | ||||
| typedef VOID (*mmPf)(VOID); | typedef VOID (*mmPf)(VOID); | ||||
| #define mm_no_argument 0 | |||||
| #define mm_required_argument 1 | |||||
| #define mm_optional_argument 2 | |||||
| #define M_FILE_RDONLY GENERIC_READ | #define M_FILE_RDONLY GENERIC_READ | ||||
| #define M_FILE_WRONLY GENERIC_WRITE | #define M_FILE_WRONLY GENERIC_WRITE | ||||
| #define M_FILE_RDWR (GENERIC_READ | GENERIC_WRITE) | #define M_FILE_RDWR (GENERIC_READ | GENERIC_WRITE) | ||||
| @@ -249,6 +254,7 @@ typedef VOID (*mmPf)(VOID); | |||||
| #define M_CREAT _O_CREAT | #define M_CREAT _O_CREAT | ||||
| #define M_BINARY _O_BINARY | #define M_BINARY _O_BINARY | ||||
| #define M_TRUNC _O_TRUNC | #define M_TRUNC _O_TRUNC | ||||
| #define M_APPEND _O_APPEND | |||||
| #define M_IREAD _S_IREAD | #define M_IREAD _S_IREAD | ||||
| #define M_IRUSR _S_IREAD | #define M_IRUSR _S_IREAD | ||||
| @@ -18,6 +18,7 @@ | |||||
| #define __CCE_RUNTIME_BASE_H__ | #define __CCE_RUNTIME_BASE_H__ | ||||
| #include <stdint.h> | #include <stdint.h> | ||||
| #include "toolchain/prof_callback.h" | |||||
| #if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) | #if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) | ||||
| extern "C" { | extern "C" { | ||||
| @@ -32,309 +33,8 @@ extern "C" { | |||||
| #endif | #endif | ||||
| #endif | #endif | ||||
| /** | |||||
| * @ingroup dvrt_base | |||||
| * @brief runtime error numbers. | |||||
| */ | |||||
| typedef enum tagRtError { | |||||
| RT_ERROR_NONE = 0x0, // success | |||||
| RT_ERROR_DEVICE_BASE = 0x07010000, | |||||
| RT_ERROR_DEVICE_NULL, | |||||
| RT_ERROR_DEVICE_NEW, | |||||
| RT_ERROR_DEVICE_ID, | |||||
| RT_ERROR_DEVICE_CHIPTYPE, | |||||
| RT_ERROR_DEVICE_DEPLOY, | |||||
| RT_ERROR_DEVICE_RETAIN, | |||||
| RT_ERROR_DEVICE_PLATFORM, | |||||
| RT_ERROR_DEVICE_LOADER, | |||||
| RT_ERROR_DEVICE_LIMIT, | |||||
| RT_ERROR_DEVICE_PROC_HANG_OUT, | |||||
| RT_ERROR_DEVICE_POWER_UP_FAIL, | |||||
| RT_ERROR_DEVICE_POWER_DOWN_FAIL, | |||||
| RT_ERROR_DEVICE_INVALID, | |||||
| RT_ERROR_DRV_BASE = 0x07020000, | |||||
| RT_ERROR_DRV_NULL, | |||||
| RT_ERROR_DRV_NEW, | |||||
| RT_ERROR_DRV_MEMORY, | |||||
| RT_ERROR_DRV_INPUT, | |||||
| RT_ERROR_DRV_PTRNULL, | |||||
| RT_ERROR_DRV_OPEN_AICPU, | |||||
| RT_ERROR_DRV_CLOSE_AICPU, | |||||
| RT_ERROR_DRV_SYM_AICPU, | |||||
| RT_ERROR_DRV_OPEN_TSD, | |||||
| RT_ERROR_DRV_CLOSE_TSD, | |||||
| RT_ERROR_DRV_SYM_TSD, | |||||
| RT_ERROR_DRV_SOURCE, | |||||
| RT_ERROR_DRV_REPORT, | |||||
| RT_ERROR_DRV_COMMAND, | |||||
| RT_ERROR_DRV_OCCUPY, | |||||
| RT_ERROR_DRV_ERR, | |||||
| RT_ERROR_STREAM_BASE = 0x07030000, | |||||
| RT_ERROR_STREAM_NULL, | |||||
| RT_ERROR_STREAM_NEW, | |||||
| RT_ERROR_STREAM_CONTEXT, | |||||
| RT_ERROR_STREAM_INVALID, | |||||
| RT_ERROR_STREAM_MODEL, | |||||
| RT_ERROR_STREAM_FUSION, | |||||
| RT_ERROR_STREAM_FULL, | |||||
| RT_ERROR_STREAM_EMPTY, | |||||
| RT_ERROR_STREAM_NOT_COMPLETE, | |||||
| RT_ERROR_STREAM_SYNC, | |||||
| RT_ERROR_STREAM_NO_CB_REG, | |||||
| RT_ERROR_STREAM_DUPLICATE, | |||||
| RT_ERROR_STREAM_NOT_EXIST, | |||||
| RT_ERROR_SQ_NO_EXIST_SQ_TO_REUSE, | |||||
| RT_ERROR_SQID_FULL, | |||||
| RT_ERROR_MODEL_BASE = 0x07040000, | |||||
| RT_ERROR_MODEL_NULL, | |||||
| RT_ERROR_MODEL_NEW, | |||||
| RT_ERROR_MODEL_CONTEXT, | |||||
| RT_ERROR_MODEL_ENDGRAPH, | |||||
| RT_ERROR_MODEL_STREAM, | |||||
| RT_ERROR_MODEL_EXCUTOR, | |||||
| RT_ERROR_MODEL_SETUP, | |||||
| RT_ERROR_MODEL_ID, | |||||
| RT_ERROR_MODEL_EXE_FAILED, | |||||
| RT_ERROR_END_OF_SEQUENCE, // end of sequence | |||||
| RT_ERROR_MODEL_EXIT, | |||||
| RT_ERROR_MODEL_EXIT_STREAM_UNBIND, | |||||
| RT_ERROR_MODEL_EXIT_ID, | |||||
| RT_ERROR_MODEL_ABORT_NORMAL, | |||||
| RT_ERROR_EVENT_BASE = 0x07050000, | |||||
| RT_ERROR_EVENT_NULL, | |||||
| RT_ERROR_EVENT_NEW, | |||||
| RT_ERROR_EVENT_RECORDER_NULL, | |||||
| RT_ERROR_EVENT_TIMESTAMP_INVALID, | |||||
| RT_ERROR_EVENT_TIMESTAMP_REVERSAL, | |||||
| RT_ERROR_EVENT_NOT_COMPLETE, | |||||
| RT_ERROR_NOTIFY_BASE = 0x07060000, | |||||
| RT_ERROR_NOTIFY_NULL, | |||||
| RT_ERROR_NOTIFY_NEW, | |||||
| RT_ERROR_NOTIFY_TYPE, | |||||
| RT_ERROR_NOTIFY_NOT_COMPLETE, | |||||
| RT_ERROR_CONTEXT_BASE = 0x07070000, | |||||
| RT_ERROR_CONTEXT_NULL, | |||||
| RT_ERROR_CONTEXT_NEW, | |||||
| RT_ERROR_CONTEXT_DEL, | |||||
| RT_ERROR_CONTEXT_DEFAULT_STREAM_NULL, | |||||
| RT_ERROR_CONTEXT_ONLINE_STREAM_NULL, | |||||
| RT_ERROR_KERNEL_BASE = 0x07080000, | |||||
| RT_ERROR_KERNEL_NULL, | |||||
| RT_ERROR_KERNEL_NEW, | |||||
| RT_ERROR_KERNEL_LOOKUP, | |||||
| RT_ERROR_KERNEL_NAME, | |||||
| RT_ERROR_KERNEL_TYPE, | |||||
| RT_ERROR_KERNEL_OFFSET, | |||||
| RT_ERROR_KERNEL_DUPLICATE, | |||||
| RT_ERROR_KERNEL_UNREGISTERING, | |||||
| RT_ERROR_PROGRAM_BASE = 0x07090000, | |||||
| RT_ERROR_PROGRAM_NULL, | |||||
| RT_ERROR_PROGRAM_NEW, | |||||
| RT_ERROR_PROGRAM_DATA, | |||||
| RT_ERROR_PROGRAM_SIZE, | |||||
| RT_ERROR_PROGRAM_MEM_TYPE, | |||||
| RT_ERROR_PROGRAM_MACHINE_TYPE, | |||||
| RT_ERROR_PROGRAM_USEOUT, | |||||
| RT_ERROR_MODULE_BASE = 0x070a0000, | |||||
| RT_ERROR_MODULE_NULL, | |||||
| RT_ERROR_MODULE_NEW, | |||||
| RT_ERROR_INSTANCE_BASE = 0x070b0000, | |||||
| RT_ERROR_INSTANCE_NULL, | |||||
| RT_ERROR_INSTANCE_NEW, | |||||
| RT_ERROR_INSTANCE_VERSION, | |||||
| RT_ERROR_API_BASE = 0x070c0000, | |||||
| RT_ERROR_API_NULL, | |||||
| RT_ERROR_API_NEW, | |||||
| RT_ERROR_DATADUMP_BASE = 0x070d0000, | |||||
| RT_ERROR_DATADUMP_NULL, | |||||
| RT_ERROR_DATADUMP_NEW, | |||||
| RT_ERROR_DATADUMP_TIME, | |||||
| RT_ERROR_DATADUMP_FILE, | |||||
| RT_ERROR_DATADUMP_ADDRESS, | |||||
| RT_ERROR_DATADUMP_LOAD_FAILED, | |||||
| RT_ERROR_DUMP_ADDR_SET_FAILED, | |||||
| RT_ERROR_PROF_BASE = 0x070e0000, | |||||
| RT_ERROR_PROF_NULL, | |||||
| RT_ERROR_PROF_NEW, | |||||
| RT_ERROR_PROF_START, | |||||
| RT_ERROR_PROF_DEVICE_MEM, | |||||
| RT_ERROR_PROF_HOST_MEM, | |||||
| RT_ERROR_PROF_SET_DIR, | |||||
| RT_ERROR_PROF_OPER, | |||||
| RT_ERROR_PROF_FULL, | |||||
| RT_ERROR_PROF_NAME, | |||||
| RT_ERROR_PCTRACE_BASE = 0x070f0000, | |||||
| RT_ERROR_PCTRACE_NULL, | |||||
| RT_ERROR_PCTRACE_NEW, | |||||
| RT_ERROR_PCTRACE_TIME, | |||||
| RT_ERROR_PCTRACE_FILE, | |||||
| RT_ERROR_TASK_BASE = 0x07100000, | |||||
| RT_ERROR_TASK_NULL, | |||||
| RT_ERROR_TASK_NEW, | |||||
| RT_ERROR_TASK_TYPE, | |||||
| RT_ERROR_TASK_ALLOCATOR, | |||||
| RT_ERROR_COMMON_BASE = 0x07110000, | |||||
| RT_ERROR_INVALID_VALUE, // RT_ERROR_INPUT_INVALID | |||||
| RT_ERROR_MEMORY_ADDRESS_UNALIGNED, | |||||
| RT_ERROR_SEC_HANDLE, | |||||
| RT_ERROR_OS_HANDLE, | |||||
| RT_ERROR_MUTEX_LOCK, | |||||
| RT_ERROR_MUTEX_UNLOCK, | |||||
| RT_ERROR_CALLOC, | |||||
| RT_ERROR_POOL_RESOURCE, | |||||
| RT_ERROR_TRANS_ARGS, | |||||
| RT_ERROR_METADATA, | |||||
| RT_ERROR_LOST_HEARTBEAT, | |||||
| RT_ERROR_REPORT_TIMEOUT, | |||||
| RT_ERROR_FEATURE_NOT_SUPPROT, | |||||
| RT_ERROR_MEMORY_ALLOCATION, | |||||
| RT_ERROR_MEMORY_FREE, | |||||
| RT_ERROR_INVALID_MEMORY_TYPE, | |||||
| RT_ERROR_DEBUG_BASE = 0x07120000, | |||||
| RT_ERROR_DEBUG_NULL, | |||||
| RT_ERROR_DEBUG_NEW, | |||||
| RT_ERROR_DEBUG_SIGNAL, | |||||
| RT_ERROR_DEBUG_OPEN, | |||||
| RT_ERROR_DEBUG_WRITE, | |||||
| RT_ERROR_DEBUG_REGISTER_FAILED, | |||||
| RT_ERROR_DEBUG_UNREGISTER_FAILED, | |||||
| RT_ERROR_ENGINE_BASE = 0x07130000, | |||||
| RT_ERROR_ENGINE_NULL, | |||||
| RT_ERROR_ENGINE_NEW, | |||||
| RT_ERROR_ENGINE_THREAD, | |||||
| RT_ERROR_LABEL_BASE = 0x07140000, | |||||
| RT_ERROR_LABEL_NULL, | |||||
| RT_ERROR_LABEL_NEW, | |||||
| RT_ERROR_LABEL_CONTEXT, | |||||
| RT_ERROR_LABEL_STREAM, | |||||
| RT_ERROR_LABEL_MODEL, | |||||
| RT_ERROR_LABEL_ALLOCATOR, | |||||
| RT_ERROR_LABEL_FREE, | |||||
| RT_ERROR_LABEL_SET, | |||||
| RT_ERROR_LABEL_ID, | |||||
| RT_ERROR_TSFW_BASE = 0x07150000, | |||||
| RT_ERROR_TSFW_UNKNOWN, | |||||
| RT_ERROR_TSFW_NULL_PTR, | |||||
| RT_ERROR_TSFW_ILLEGAL_AI_CORE_ID, | |||||
| RT_ERROR_TSFW_ILLEGAL_PARAM, | |||||
| RT_ERROR_TSFW_TASK_CMD_QUEUE_FULL, | |||||
| RT_ERROR_TSFW_TASK_CMD_QUEUE_EMPTY, | |||||
| RT_ERROR_TSFW_TASK_REPORT_QUEUE_FULL, | |||||
| RT_ERROR_TSFW_TASK_REPORT_QUEUE_EMPTY, | |||||
| RT_ERROR_TSFW_TASK_NODE_BUFF_ALL_OCCUPYED, | |||||
| RT_ERROR_TSFW_TASK_NODE_BUFF_ALL_FREED, | |||||
| RT_ERROR_TSFW_L2_MEM_INSUFFICIENT_SPACE, | |||||
| RT_ERROR_TSFW_L2_MALLOC_FAILED, | |||||
| RT_ERROR_TSFW_DMA_CHANNEL_ALL_OCCUPYED, | |||||
| RT_ERROR_TSFW_MEMCPY_OP_FAILED, | |||||
| RT_ERROR_TSFW_BS_SLOT_ALL_OCCUPYED, | |||||
| RT_ERROR_TSFW_TBS_SLOT_REPEAT_FREE, | |||||
| RT_ERROR_TSFW_PRIORITY_TASK_LIST_FULL, | |||||
| RT_ERROR_TSFW_PRIORITY_TASK_LIST_EMPTY, | |||||
| RT_ERROR_TSFW_NO_STREAM_LIST_NEED_TO_BE_PROCESSED, | |||||
| RT_ERROR_TSFW_REPEAT_MARK_STREAM_NEED_SERVICE, | |||||
| RT_ERROR_TSFW_SYS_DMA_CHANNEL_ALL_OCCUPAPYED, | |||||
| RT_ERROR_TSFW_NO_HBML2TASKNODE_FOUND, | |||||
| RT_ERROR_TSFW_SQNODE_NODE_SLOT_ALL_OCCUPAPYED, | |||||
| RT_ERROR_TSFW_CQNODE_NODE_SLOT_ALL_OCCUPAPYED, | |||||
| RT_ERROR_TSFW_SQNODE_NOT_ENOUGH, | |||||
| RT_ERROR_TSFW_SQNODE_SLOT_REPEAT_FREE, | |||||
| RT_ERROR_TSFW_CQNODE_SLOT_REPEAT_FREE, | |||||
| RT_ERROR_TSFW_CQ_REPORT_FAILED, | |||||
| RT_ERROR_TSFW_SYS_DMA_RESET_SUCCESS, | |||||
| RT_ERROR_TSFW_SYS_DMA_RESET_FAILED, | |||||
| RT_ERROR_TSFW_SYS_DMA_TRNSFER_FAILED, | |||||
| RT_ERROR_TSFW_SYS_DMA_MEMADDRALIGN_FAILED, | |||||
| RT_ERROR_TSFW_SYS_DMA_ERROR_QUEUE_FULL, | |||||
| RT_ERROR_TSFW_SYS_DMA_ERROR_QUEUE_EMPTY, | |||||
| RT_ERROR_TSFW_TIMER_EVENT_FULL, | |||||
| RT_ERROR_TSFW_TASK_L2_DESC_ENTRY_NOT_ENOUGH, | |||||
| RT_ERROR_TSFW_AICORE_TIMEOUT, | |||||
| RT_ERROR_TSFW_AICORE_EXCEPTION, | |||||
| RT_ERROR_TSFW_AICORE_TRAP_EXCEPTION, | |||||
| RT_ERROR_TSFW_AICPU_TIMEOUT, | |||||
| RT_ERROR_TSFW_SDMA_L2_TO_DDR_MALLOC_FAIL, | |||||
| RT_ERROR_TSFW_AICPU_EXCEPTION, | |||||
| RT_ERROR_TSFW_AICPU_DATADUMP_RSP_ERR, | |||||
| RT_ERROR_TSFW_AICPU_MODEL_RSP_ERR, | |||||
| RT_ERROR_TSFW_REPEAT_ACTIVE_MODEL_STREAM, | |||||
| RT_ERROR_TSFW_REPEAT_NOTIFY_WAIT, | |||||
| RT_ERROR_TSFW_DEBUG_INVALID_SQCQ, | |||||
| RT_ERROR_TSFW_DEBUG_WRONG_COMMAND_TYPE, | |||||
| RT_ERROR_TSFW_DEBUG_CMD_PROCESS, | |||||
| RT_ERROR_TSFW_DEBUG_INVALID_DEVICE_STATUS, | |||||
| RT_ERROR_TSFW_DEBUG_NOT_IN_DEBUG_STATUS, | |||||
| RT_ERROR_TSFW_DEBUG_INVALID_TASK_STATUS, | |||||
| RT_ERROR_TSFW_DEBUG_TASK_EMPTY, | |||||
| RT_ERROR_TSFW_DEBUG_TASK_FULL, | |||||
| RT_ERROR_TSFW_DEBUG_TASK_NOT_EXIST, | |||||
| RT_ERROR_TSFW_DEBUG_AI_CORE_FULL, | |||||
| RT_ERROR_TSFW_DEBUG_AI_CORE_NOT_EXIST, | |||||
| RT_ERROR_TSFW_DEBUG_AI_CORE_EXCEPTION, | |||||
| RT_ERROR_TSFW_DEBUG_AI_CORE_TIMEOUT, | |||||
| RT_ERROR_TSFW_DEBUG_BREAKPOINT_FULL, | |||||
| RT_ERROR_TSFW_DEBUG_READ_ERROR, | |||||
| RT_ERROR_TSFW_DEBUG_WRITE_FAIL, | |||||
| RT_ERROR_TSFW_QUEUE_FULL, | |||||
| RT_ERROR_TSFW_QUEUE_EMPTY, | |||||
| RT_ERROR_TSFW_QUEUE_ALLOC_MEM_FAIL, | |||||
| RT_ERROR_TSFW_QUEUE_DATA_SIZE_UNMATCH, | |||||
| RT_ERROR_TSFW_PCIE_DMA_INVLD_CPY_TYPE, | |||||
| RT_ERROR_TSFW_INVLD_CPY_DIR, | |||||
| RT_ERROR_TSFW_PCIE_DMA_INVLD_CQ_DES, | |||||
| RT_ERROR_TSFW_PCIE_DMA_CPY_ERR, | |||||
| RT_ERROR_TSFW_PCIE_DMA_LNK_CHN_BUSY, | |||||
| RT_ERROR_TSFW_PROFILE_BUFF_FULL, | |||||
| RT_ERROR_TSFW_PROFILE_MODE_CONFLICT, | |||||
| RT_ERROR_TSFW_PROFILE_OTHER_PID_ON, | |||||
| RT_ERROR_TSFW_SCHD_AIC_TASK_PRELOAD_FAILED, | |||||
| RT_ERROR_TSFW_TSCPU_CLOSE_FAILED, | |||||
| RT_ERROR_TSFW_EXPECT_FAIL, | |||||
| RT_ERROR_TSFW_REPEAT_MODEL_STREAM, | |||||
| RT_ERROR_TSFW_STREAM_MODEL_UNBIND, | |||||
| RT_ERROR_TSFW_MODEL_EXE_FAILED, | |||||
| RT_ERROR_TSFW_IPC_SEND_FAILED, | |||||
| RT_ERROR_TSFW_IPC_PROC_REG_FAILED, | |||||
| RT_ERROR_TSFW_STREAM_FULL, | |||||
| RT_ERROR_TSFW_END_OF_SEQUENCE, | |||||
| RT_ERROR_TSFW_SWITCH_STREAM_LABEL, | |||||
| RT_ERROR_TSFW_TRANS_SQE_FAIL, | |||||
| RT_ERROR_TSFW_RESERVED, | |||||
| RT_ERROR_SUBSCRIBE_BASE = 0x07160000, | |||||
| RT_ERROR_SUBSCRIBE_NULL, | |||||
| RT_ERROR_SUBSCRIBE_NEW, | |||||
| RT_ERROR_SUBSCRIBE_STREAM, | |||||
| RT_ERROR_SUBSCRIBE_THREAD, | |||||
| RT_ERROR_SUBSCRIBE_GROUP, | |||||
| RT_ERROR_GROUP_BASE = 0x07170000, | |||||
| RT_ERROR_GROUP_NOT_SET, | |||||
| RT_ERROR_GROUP_NOT_CREATE, | |||||
| RT_ERROR_RESERVED = 0x07ff0000, | |||||
| }rtError_t; | |||||
| typedef int32_t rtError_t; | |||||
| static const int32_t RT_ERROR_NONE = 0; // success | |||||
| /** | /** | ||||
| * @ingroup dvrt_base | * @ingroup dvrt_base | ||||
| @@ -387,10 +87,20 @@ typedef struct rtExceptionInfo { | |||||
| uint32_t deviceid; | uint32_t deviceid; | ||||
| } rtExceptionInfo; | } rtExceptionInfo; | ||||
| typedef struct rtTaskFailInfo { | |||||
| uint32_t taskid; | |||||
| uint32_t streamid; | |||||
| uint32_t tid; | |||||
| uint32_t deviceid; | |||||
| uint32_t retcode; | |||||
| } rtTaskFailInfo; | |||||
| typedef void (*rtErrorCallback)(rtExceptionType); | typedef void (*rtErrorCallback)(rtExceptionType); | ||||
| typedef void (*rtTaskFailCallback)(rtExceptionInfo *exceptionInfo); | typedef void (*rtTaskFailCallback)(rtExceptionInfo *exceptionInfo); | ||||
| typedef void (*rtTaskFailCallbackByModule)(rtTaskFailInfo *exceptionInfo); | |||||
| typedef void (*rtDeviceStateCallback)(uint32_t devId, bool isOpen); | typedef void (*rtDeviceStateCallback)(uint32_t devId, bool isOpen); | ||||
| /** | /** | ||||
| @@ -447,6 +157,12 @@ RTS_API rtError_t rtProfilerStop(uint64_t profConfig, int32_t numsDev, uint32_t* | |||||
| */ | */ | ||||
| RTS_API rtError_t rtProfilerTrace(uint64_t id, bool notify, uint32_t flags, rtStream_t stream); | RTS_API rtError_t rtProfilerTrace(uint64_t id, bool notify, uint32_t flags, rtStream_t stream); | ||||
| /** | |||||
| * @ingroup profiling_base | |||||
| * @brief ts set profiling reporter callback. | |||||
| */ | |||||
| RTS_API rtError_t rtSetMsprofReporterCallback(MsprofReporterCallback callback); | |||||
| /** | /** | ||||
| * @ingroup dvrt_base | * @ingroup dvrt_base | ||||
| * @brief Returns the last error from a runtime call. | * @brief Returns the last error from a runtime call. | ||||
| @@ -485,6 +201,16 @@ RTS_API rtError_t rtSetTaskFailCallback(rtTaskFailCallback callback); | |||||
| */ | */ | ||||
| RTS_API rtError_t rtRegDeviceStateCallback(const char *regName, rtDeviceStateCallback callback); | RTS_API rtError_t rtRegDeviceStateCallback(const char *regName, rtDeviceStateCallback callback); | ||||
| /** | |||||
| * @ingroup dvrt_base | |||||
| * @brief register callback for fail task | |||||
| * @param [in] uniName unique register name, can't be null | |||||
| * @param [in] callback fail task callback function | |||||
| * @param [out] NA | |||||
| * @return RT_ERROR_NONE for ok | |||||
| */ | |||||
| RTS_API rtError_t rtRegTaskFailCallbackByModule(const char *moduleName, rtTaskFailCallbackByModule callback); | |||||
| /** | /** | ||||
| * @ingroup dvrt_base | * @ingroup dvrt_base | ||||
| * @brief notify handle. | * @brief notify handle. | ||||
| @@ -121,14 +121,6 @@ typedef struct tagRtMemoryConfig { | |||||
| typedef struct tagRtPlatformConfig { uint32_t platformConfig; } rtPlatformConfig_t; | typedef struct tagRtPlatformConfig { uint32_t platformConfig; } rtPlatformConfig_t; | ||||
| /** | |||||
| * @ingroup | |||||
| * @brief get platform | |||||
| * @param [in] platForm | |||||
| * @return platForm | |||||
| */ | |||||
| RTS_API rtError_t rtGetPlatformConfig(rtPlatformConfig_t *platForm); | |||||
| /** | /** | ||||
| * @ingroup | * @ingroup | ||||
| * @brief get AI core count | * @brief get AI core count | ||||
| @@ -169,13 +161,6 @@ RTS_API rtError_t rtGetAiCoreMemoryRates(rtAiCoreMemoryRates_t *aiCoreMemoryRate | |||||
| */ | */ | ||||
| RTS_API rtError_t rtGetMemoryConfig(rtMemoryConfig_t *memoryConfig); | RTS_API rtError_t rtGetMemoryConfig(rtMemoryConfig_t *memoryConfig); | ||||
| /** | |||||
| * @ingroup | |||||
| * @brief set platform in gen ctx | |||||
| * @param [in] platForm | |||||
| * @return RT_ERROR_NONE for ok, errno for failed | |||||
| */ | |||||
| RTS_API rtError_t rtSetPlatformType(rtPlatformType_t platformType); | |||||
| /** | /** | ||||
| * @ingroup | * @ingroup | ||||
| @@ -185,6 +170,14 @@ RTS_API rtError_t rtSetPlatformType(rtPlatformType_t platformType); | |||||
| */ | */ | ||||
| RTS_API rtError_t rtMemGetL2Info(rtStream_t stream, void **ptr, uint32_t *size); | RTS_API rtError_t rtMemGetL2Info(rtStream_t stream, void **ptr, uint32_t *size); | ||||
| /** | |||||
| * @ingroup | |||||
| * @brief get runtime version. The version is returned as (1000 major + 10 minor). For example, RUNTIME 9.2 would be represented by 9020. | |||||
| * @param [out] runtimeVersion | |||||
| * @return RT_ERROR_NONE for ok | |||||
| * @return RT_ERROR_INVALID_VALUE for error input | |||||
| */ | |||||
| RTS_API rtError_t rtGetRuntimeVersion(uint32_t *runtimeVersion); | |||||
| #if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) | #if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) | ||||
| } | } | ||||
| #endif | #endif | ||||
| @@ -330,12 +330,12 @@ RTS_API rtError_t rtGetPairDevicesInfo(uint32_t devId, uint32_t otherDevId, int3 | |||||
| FEATURE_TYPE_MEMCPY = 0, | FEATURE_TYPE_MEMCPY = 0, | ||||
| FEATURE_TYPE_RSV, | FEATURE_TYPE_RSV, | ||||
| } rtFeatureType_t; | } rtFeatureType_t; | ||||
| * @param [in] infoType info type | |||||
| * @param [in] featureInfo info type | |||||
| typedef enum tagMemcpyInfo { | typedef enum tagMemcpyInfo { | ||||
| MEMCPY_INFO_SUPPORT_ZEROCOPY = 0, | MEMCPY_INFO_SUPPORT_ZEROCOPY = 0, | ||||
| MEMCPY_INFO _RSV, | MEMCPY_INFO _RSV, | ||||
| } rtMemcpyInfo_t; | } rtMemcpyInfo_t; | ||||
| * @param [out] value the capability info | |||||
| * @param [out] value the capability info RT_CAPABILITY_SUPPORT or RT_CAPABILITY_NOT_SUPPORT | |||||
| * @return RT_ERROR_NONE for ok | * @return RT_ERROR_NONE for ok | ||||
| */ | */ | ||||
| RTS_API rtError_t rtGetRtCapability(rtFeatureType_t featureType, int32_t featureInfo, int64_t *value); | RTS_API rtError_t rtGetRtCapability(rtFeatureType_t featureType, int32_t featureInfo, int64_t *value); | ||||
| @@ -28,4 +28,4 @@ | |||||
| #include "rt_model.h" | #include "rt_model.h" | ||||
| #include "stream.h" | #include "stream.h" | ||||
| #endif // __CCE_RUNTIME_RT_H__ | |||||
| #endif // __CCE_RUNTIME_RT_H__ | |||||
| @@ -34,9 +34,16 @@ using TDT_StatusT = uint32_t; | |||||
| typedef uint32_t TDT_StatusT; | typedef uint32_t TDT_StatusT; | ||||
| #endif | #endif | ||||
| #define LINUX 0 | |||||
| #define WINDOWS 1 | |||||
| #ifndef TDT_LIB_EXPORT | #ifndef TDT_LIB_EXPORT | ||||
| #if(TARGET_SYSTEM_NAME == WINDOWS) | |||||
| #define TDT_LIB_EXPORT __declspec(dllexport) | |||||
| #else | |||||
| #define TDT_LIB_EXPORT __attribute__((visibility("default"))) | #define TDT_LIB_EXPORT __attribute__((visibility("default"))) | ||||
| #endif | #endif | ||||
| #endif | |||||
| /** | /** | ||||
| * @ingroup tdt status. | * @ingroup tdt status. | ||||
| * | * | ||||
| @@ -23,6 +23,7 @@ | |||||
| #include <mutex> | #include <mutex> | ||||
| #include "tdt/status.h" | #include "tdt/status.h" | ||||
| #include "tdt/data_common.h" | #include "tdt/data_common.h" | ||||
| #include "toolchain/prof_callback.h" | |||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| extern "C" { | extern "C" { | ||||
| @@ -37,7 +38,7 @@ extern "C" { | |||||
| * Used for the Framework process to communicate with the TSDDaemon process, | * Used for the Framework process to communicate with the TSDDaemon process, | ||||
| * and notify TSD to complete the initialization of other processes | * and notify TSD to complete the initialization of other processes | ||||
| * | * | ||||
| * @param phyDeviceId [IN] type #unsigned int. Physical device ID | |||||
| * @param logicDeviceId [IN] type #unsigned int. Logic device ID | |||||
| * @param rankSize [IN] type #unsigned int. The rankSize of the training. | * @param rankSize [IN] type #unsigned int. The rankSize of the training. | ||||
| * The default value is 1. When rankSize is greater than 1, | * The default value is 1. When rankSize is greater than 1, | ||||
| * HCCP will be pulled to perform set communication related operations. | * HCCP will be pulled to perform set communication related operations. | ||||
| @@ -49,7 +50,7 @@ extern "C" { | |||||
| * @li tsd_client.h: Header file where the interface declaration is located. | * @li tsd_client.h: Header file where the interface declaration is located. | ||||
| * @li data_common.h: Header file where 'TDT_StatusT' defined | * @li data_common.h: Header file where 'TDT_StatusT' defined | ||||
| */ | */ | ||||
| TDT_LIB_EXPORT TDT_StatusT TsdOpen(const uint32_t phyDeviceId, const uint32_t rankSize); | |||||
| TDT_LIB_EXPORT TDT_StatusT TsdOpen(const uint32_t logicDeviceId, const uint32_t rankSize); | |||||
| /** | /** | ||||
| * @ingroup Close | * @ingroup Close | ||||
| @@ -67,7 +68,7 @@ TDT_LIB_EXPORT TDT_StatusT TsdOpen(const uint32_t phyDeviceId, const uint32_t ra | |||||
| * @li tsd_client.h: Header file where the interface declaration is located. | * @li tsd_client.h: Header file where the interface declaration is located. | ||||
| * @li data_common.h: Header file where 'TDT_StatusT' defined | * @li data_common.h: Header file where 'TDT_StatusT' defined | ||||
| */ | */ | ||||
| TDT_LIB_EXPORT TDT_StatusT TsdClose(const uint32_t phyDeviceId); | |||||
| TDT_LIB_EXPORT TDT_StatusT TsdClose(const uint32_t logicDeviceId); | |||||
| /** | /** | ||||
| * @ingroup UpdateProfilingMode | * @ingroup UpdateProfilingMode | ||||
| @@ -85,7 +86,26 @@ TDT_LIB_EXPORT TDT_StatusT TsdClose(const uint32_t phyDeviceId); | |||||
| * @li tsd_client.h: Header file where the interface declaration is located. | * @li tsd_client.h: Header file where the interface declaration is located. | ||||
| * @li data_common.h: Header file where 'TDT_StatusT' defined | * @li data_common.h: Header file where 'TDT_StatusT' defined | ||||
| */ | */ | ||||
| TDT_LIB_EXPORT TDT_StatusT UpdateProfilingMode(const uint32_t phyDeviceId, const uint32_t flag); | |||||
| TDT_LIB_EXPORT TDT_StatusT UpdateProfilingMode(const uint32_t logicDeviceId, const uint32_t flag); | |||||
| /** | |||||
| * @ingroup TsdSetMsprofReporterCallback | |||||
| * @brief 用于推理场景下设置aicpu的profilng的callback函数 | |||||
| * | |||||
| * @par Function | |||||
| * 设置offline模式下aicpu_sd进程的profiling的callback函数 | |||||
| * | |||||
| * @param callback [IN] type #MsprofReporterCallback. 回调函数 | |||||
| * @retval TDT_OK Success | |||||
| * @retval OtherValues Failure | |||||
| * | |||||
| * @par Dependency | |||||
| * @li libtsdclient.so: Library to which the interface belongs. | |||||
| * @li tsd_client.h: Header file where the interface declaration is located. | |||||
| * @li data_common.h: Header file where 'TDT_StatusT' defined | |||||
| * @li prof_callback.h: Headerfile where 'MsprofReporterCallback' defined | |||||
| */ | |||||
| TDT_LIB_EXPORT TDT_StatusT TsdSetMsprofReporterCallback(MsprofReporterCallback callback); | |||||
| /** | /** | ||||
| * @ingroup CreateCmdParameterObj | * @ingroup CreateCmdParameterObj | ||||
| @@ -0,0 +1,135 @@ | |||||
| /** | |||||
| * Copyright 2020-2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| * | |||||
| * @file prof_callback.h | |||||
| * @brief declaraion of profiling callbacks | |||||
| */ | |||||
| #ifndef MSPROFILER_PROF_CALLBACK_H_ | |||||
| #define MSPROFILER_PROF_CALLBACK_H_ | |||||
| #ifdef __cplusplus | |||||
| extern "C" { | |||||
| #endif // __cplusplus | |||||
| #include "stddef.h" | |||||
| #include "stdint.h" | |||||
| /** | |||||
| * @name MsprofErrorCode | |||||
| * @brief error code | |||||
| */ | |||||
| enum MsprofErrorCode { | |||||
| MSPROF_ERROR_NONE = 0, | |||||
| MSPROF_ERROR_MEM_NOT_ENOUGH, | |||||
| MSPROF_ERROR_GET_ENV, | |||||
| MSPROF_ERROR_CONFIG_INVALID, | |||||
| MSPROF_ERROR_ACL_JSON_OFF, | |||||
| MSPROF_ERROR, | |||||
| }; | |||||
| #define MSPROF_ENGINE_MAX_TAG_LEN (31) | |||||
| /** | |||||
| * @name ReporterData | |||||
| * @brief struct of data to report | |||||
| */ | |||||
| struct ReporterData { | |||||
| char tag[MSPROF_ENGINE_MAX_TAG_LEN + 1]; // the sub-type of the module, data with different tag will be writen | |||||
| int deviceId; // the index of device | |||||
| size_t dataLen; // the length of send data | |||||
| unsigned char *data; // the data content | |||||
| }; | |||||
| /** | |||||
| * @name MsprofReporterModuleId | |||||
| * @brief module id of data to report | |||||
| */ | |||||
| enum MsprofReporterModuleId { | |||||
| MSPROF_MODULE_DATA_PREPROCESS = 0, // DATA_PREPROCESS | |||||
| MSPROF_MODULE_HCCL, // HCCL | |||||
| MSPROF_MODULE_ACL, // AclModule | |||||
| MSPROF_MODULE_FRAMEWORK, // Framework | |||||
| MSPROF_MODULE_RUNTIME // runtime | |||||
| }; | |||||
| /** | |||||
| * @name MsprofReporterCallbackType | |||||
| * @brief reporter callback request type | |||||
| */ | |||||
| enum MsprofReporterCallbackType { | |||||
| MSPROF_REPORTER_REPORT = 0, // report data | |||||
| MSPROF_REPORTER_INIT, // init reporter | |||||
| MSPROF_REPORTER_UNINIT, // uninit reporter | |||||
| }; | |||||
| /** | |||||
| * @name MsprofReporterCallback | |||||
| * @brief callback to start reporter/stop reporter/report date | |||||
| * @param moduleId [IN] enum MsprofReporterModuleId | |||||
| * @param type [IN] enum MsprofReporterCallbackType | |||||
| * @param data [IN] callback data (nullptr on INTI/UNINIT) | |||||
| * @param len [IN] callback data size (0 on INIT/UNINIT) | |||||
| * @return enum MsprofErrorCode | |||||
| */ | |||||
| typedef int32_t (*MsprofReporterCallback)(uint32_t moduleId, uint32_t type, void *data, uint32_t len); | |||||
| #define MSPROF_OPTIONS_DEF_LEN_MAX (2048) | |||||
| /** | |||||
| * @name MsprofGeOptions | |||||
| * @brief struct of MSPROF_CTRL_INIT_GE_OPTIONS | |||||
| */ | |||||
| struct MsprofGeOptions { | |||||
| char jobId[MSPROF_OPTIONS_DEF_LEN_MAX]; | |||||
| char options[MSPROF_OPTIONS_DEF_LEN_MAX]; | |||||
| }; | |||||
| /** | |||||
| * @name MsprofCtrlCallbackType | |||||
| * @brief ctrl callback request type | |||||
| */ | |||||
| enum MsprofCtrlCallbackType { | |||||
| MSPROF_CTRL_INIT_ACL_ENV = 0, // start profiling with acl env | |||||
| MSPROF_CTRL_INIT_ACL_JSON, // start profiling with acl.json | |||||
| MSPROF_CTRL_INIT_GE_OPTIONS, // start profiling with ge env and options | |||||
| MSPROF_CTRL_FINALIZE // stop profiling | |||||
| }; | |||||
| /** | |||||
| * @name MsprofCtrlCallback | |||||
| * @brief callback to start/stop profiling | |||||
| * @param type [IN] enum MsprofCtrlCallbackType | |||||
| * @param data [IN] callback data | |||||
| * @param len [IN] callback data size | |||||
| * @return enum MsprofErrorCode | |||||
| */ | |||||
| typedef int32_t (*MsprofCtrlCallback)(uint32_t type, void *data, uint32_t len); | |||||
| /** | |||||
| * @name MsprofSetDeviceCallback | |||||
| * @brief callback to notify set/reset device | |||||
| * @param devId [IN] device id | |||||
| * @param isOpenDevice [IN] true: set device, false: reset device | |||||
| */ | |||||
| typedef void (*MsprofSetDeviceCallback)(uint32_t devId, bool isOpenDevice); | |||||
| #ifdef __cplusplus | |||||
| } | |||||
| #endif | |||||
| #endif // MSPROFILER_PROF_CALLBACK_H_ | |||||
| @@ -16,7 +16,17 @@ | |||||
| #ifndef MSPROF_ENGINE_PROF_REPORTER_H_ | #ifndef MSPROF_ENGINE_PROF_REPORTER_H_ | ||||
| #define MSPROF_ENGINE_PROF_REPORTER_H_ | #define MSPROF_ENGINE_PROF_REPORTER_H_ | ||||
| #ifndef OS_TYPE | |||||
| #define OS_TYPE 0 | |||||
| #endif // OS_TYPE | |||||
| #if (OS_TYPE != LINUX) | |||||
| #define MSVP_PROF_API __declspec(dllexport) | |||||
| #else | |||||
| #define MSVP_PROF_API __attribute__((visibility("default"))) | #define MSVP_PROF_API __attribute__((visibility("default"))) | ||||
| #endif | |||||
| #include "prof_callback.h" | |||||
| /** | /** | ||||
| * @file prof_reporter.h | * @file prof_reporter.h | ||||
| @@ -25,20 +35,6 @@ | |||||
| */ | */ | ||||
| namespace Msprof { | namespace Msprof { | ||||
| namespace Engine { | namespace Engine { | ||||
| /// the max tag length | |||||
| #define MSPROF_ENGINE_MAX_TAG_LEN (31) | |||||
| /** | |||||
| * @ingroup reporter | |||||
| * @brief struct ReporterData | |||||
| * the sturct of the data send to libmsprof | |||||
| */ | |||||
| struct ReporterData { | |||||
| char tag[MSPROF_ENGINE_MAX_TAG_LEN + 1]; ///< the sub-type of the module, data with different tag will be writen | |||||
| int deviceId; ///< the physical id of device | |||||
| size_t dataLen; ///< the length of send data | |||||
| unsigned char *data; ///< the data content | |||||
| }; | |||||
| /** | /** | ||||
| * @ingroup reporter | * @ingroup reporter | ||||
| * @brief class Reporter | * @brief class Reporter | ||||
| @@ -86,4 +82,4 @@ class MSVP_PROF_API Reporter { | |||||
| } // namespace Engine | } // namespace Engine | ||||
| } // namespace Msprof | } // namespace Msprof | ||||
| #endif // MSPROF_ENGINE_PROF_REPORTER_H_ | |||||
| #endif // MSPROF_ENGINE_PROF_REPORTER_H_ | |||||
| @@ -18,7 +18,9 @@ | |||||
| #define D_SYSLOG_H_ | #define D_SYSLOG_H_ | ||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| #ifndef LOG_CPP | |||||
| extern "C" { | extern "C" { | ||||
| #endif | |||||
| #endif // __cplusplus | #endif // __cplusplus | ||||
| #ifndef LINUX | #ifndef LINUX | ||||
| @@ -105,6 +107,7 @@ extern "C" { | |||||
| #define SECURITY_LOG_MASK (0x00100000) | #define SECURITY_LOG_MASK (0x00100000) | ||||
| #define RUN_LOG_MASK (0x01000000) | #define RUN_LOG_MASK (0x01000000) | ||||
| #define OPERATION_LOG_MASK (0x10000000) | #define OPERATION_LOG_MASK (0x10000000) | ||||
| #define RESERVERD_LENGTH 52 | |||||
| typedef struct tagDCODE { | typedef struct tagDCODE { | ||||
| const char *cName; | const char *cName; | ||||
| @@ -116,6 +119,18 @@ typedef struct tagKV { | |||||
| char *value; | char *value; | ||||
| } KeyValue; | } KeyValue; | ||||
| typedef enum { | |||||
| APPLICATION = 0, | |||||
| SYSTEM | |||||
| } ProcessType; | |||||
| typedef struct { | |||||
| ProcessType type; | |||||
| unsigned int pid; | |||||
| unsigned int deviceId; | |||||
| char reserved[RESERVERD_LENGTH]; | |||||
| } LogAttr; | |||||
| /** | /** | ||||
| * @ingroup slog | * @ingroup slog | ||||
| * | * | ||||
| @@ -228,6 +243,14 @@ DLL_EXPORT int dlog_setlevel(int moduleId, int level, int enableEvent); | |||||
| */ | */ | ||||
| DLL_EXPORT int CheckLogLevel(int moduleId, int logLevel); | DLL_EXPORT int CheckLogLevel(int moduleId, int logLevel); | ||||
| /** | |||||
| * @ingroup slog | |||||
| * @brief DlogSetAttr: set log attr, default pid is 0, default device id is 0, default process type is APPLICATION | |||||
| * @param [in]logAttr: attr info, include pid(must be larger than 0), process type and device id(chip ID) | |||||
| * @return: 0: SUCCEED, others: FAILED | |||||
| */ | |||||
| DLL_EXPORT int DlogSetAttr(LogAttr logAttr); | |||||
| /** | /** | ||||
| * @ingroup slog | * @ingroup slog | ||||
| * @brief dlog_error: print error log | * @brief dlog_error: print error log | ||||
| @@ -367,6 +390,8 @@ void DlogInner(int moduleId, int level, const char *fmt, ...); | |||||
| void DlogWithKVInner(int moduleId, int level, KeyValue *pstKVArray, int kvNum, const char *fmt, ...); | void DlogWithKVInner(int moduleId, int level, KeyValue *pstKVArray, int kvNum, const char *fmt, ...); | ||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| #ifndef LOG_CPP | |||||
| } | } | ||||
| #endif // LOG_CPP | |||||
| #endif // __cplusplus | #endif // __cplusplus | ||||
| #endif // D_SYSLOG_H_ | #endif // D_SYSLOG_H_ | ||||