diff --git a/CMakeLists.txt b/CMakeLists.txt index 6dd5e1e1..021e7798 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -16,8 +16,11 @@ endif() if(DEFINED ENV{D_PKG_SERVER}) set(GE_PB_PKG $ENV{D_PKG_SERVER}) - message("Download packages from PKG server") -endif() + message("Download packages from DPKG server") +elseif(DEFINED ENV{MSLIBS_SERVER}) + set(GE_PB_PKG "http://$ENV{MSLIBS_SERVER}:8081") + message("Download packages from MSPKG server") +endif () set(ASCEND_DRIVER_DIR ${ASCEND_DIR}/driver/lib64) set(ASCEND_DRIVER_COMMON_DIR ${ASCEND_DIR}/driver/lib64/common) @@ -37,7 +40,7 @@ set(ATLAS_MS_RUNTIME_PATH ${ATLAS_RUNTIME_DIR} ${ATLAS_ACL_DIR} ${ATLAS_ATC_DIR} option(ENABLE_OPEN_SRC "Enable graphengine compile in opensource." FALSE) if (ENABLE_OPEN_SRC) - set(HI_PYTHON python3.7) + set(HI_PYTHON python3) include(cmake/external_libs/protobuf_shared.cmake) include(cmake/external_libs/protobuf_static.cmake) @@ -71,7 +74,7 @@ if (ENABLE_OPEN_SRC) set(STATIC_ACL_LIB ${GE_LIB_PATH}) find_module(slog libslog.so ${GE_LIB_PATH}) find_module(static_mmpa libmmpa.a ${GE_LIB_PATH}) - find_module(msprof libmsprof.so ${GE_LIB_PATH}) + find_module(msprofiler libmsprofiler.a ${GE_LIB_PATH}) find_module(hccl libhccl.so ${GE_LIB_PATH}) find_module(adump_server libadump_server.a ${GE_LIB_PATH}) find_module(runtime libruntime.so ${GE_LIB_PATH}) @@ -80,20 +83,19 @@ if (ENABLE_OPEN_SRC) find_module(error_manager liberror_manager.so ${GE_LIB_PATH}) find_module(ascend_hal_stub libascend_hal.so ${GE_LIB_PATH}) find_module(error_manager_static liberror_manager.a ${GE_LIB_PATH}) - find_module(msprofiler libmsprofiler.a ${GE_LIB_PATH}) + find_module(msprofiler_fwk libmsprofiler_fwk.a ${GE_LIB_PATH}) #find_module(ascendcl_static libascendcl.a ${GE_LIB_PATH}) else() find_module(slog libslog.so ${ASCEND_ATC_DIR} ${ASCEND_DRIVER_COMMON_DIR}) find_module(static_mmpa libmmpa.a ${ASCEND_ATC_DIR} ${ASCEND_RUNTIME_DIR}) find_module(error_manager liberror_manager.so ${ASCEND_ATC_DIR} ${ASCEND_RUNTIME_DIR}) if(PLATFORM STREQUAL "train") - find_module(msprof libmsprof.so ${ASCEND_DRIVER_COMMON_DIR}) find_module(hccl libhccl.so ${ASCEND_RUNTIME_DIR}) find_module(adump_server libadump_server.a ${ASCEND_RUNTIME_DIR}) find_module(runtime libruntime.so ${ASCEND_RUNTIME_DIR}) find_module(resource libresource.so ${ASCEND_RUNTIME_DIR}) find_module(error_manager liberror_manager.so ${ASCEND_RUNTIME_DIR}) - find_module(msprofiler libmsprofiler.a ${ASCEND_RUNTIME_DIR}) + find_module(msprofiler_fwk libmsprofiler_fwk.a ${ASCEND_RUNTIME_DIR}) find_module(ascend_hal_stub libascend_hal.so ${ASCEND_DRIVER_DIR}/driver) if(PRODUCT STREQUAL "flr3") message(FATAL_ERROR "This platform is not supported in train mode, build terminated") @@ -106,20 +108,17 @@ if (ENABLE_OPEN_SRC) find_module(error_manager liberror_manager.so ${ASCEND_ATC_DIR}) find_module(error_manager_static liberror_manager.a ${ASCEND_ACL_DIR}) find_module(msprofiler libmsprofiler.a ${ASCEND_ACL_DIR}) - #find_module(ascendcl_static libascendcl.a ${ASCEND_ACL_DIR}) + #find_module(ascendcl_static libascendcl.a ${ASCEND_ACL_DIR}) if(PRODUCT STREQUAL "flr3") - find_module(msprof libmsprof.so ${ASCEND_DRIVER_SHARE_DIR}) elseif(PRODUCT STREQUAL "flr1") find_module(ascend_hal_stub libascend_hal.so ${ASCEND_DRIVER_DIR}/driver) - find_module(msprof libmsprof.so ${ASCEND_DRIVER_COMMON_DIR}) elseif(PRODUCT STREQUAL "flr2") # flr2 ascend_hal_stub limsprof ? else() find_module(ascend_hal_stub libascend_hal.so ${ASCEND_DRIVER_DIR}) - find_module(msprof libmsprof.so ${ASCEND_DRIVER_DIR}) endif() elseif(PLATFORM STREQUAL "all") - find_module(msprof libmsprof.so ${ASCEND_DRIVER_COMMON_DIR}) + find_module(msprofiler libmsprofiler.a ${ASCEND_DRIVER_COMMON_DIR}) find_module(hccl libhccl.so ${ASCEND_RUNTIME_DIR}) find_module(adump_server libadump_server.a ${ASCEND_ACL_DIR}) find_module(runtime libruntime.so ${ASCEND_ACL_DIR}) @@ -127,14 +126,14 @@ if (ENABLE_OPEN_SRC) find_module(resource libresource.so ${ASCEND_ATC_DIR}) find_module(error_manager liberror_manager.so ${ASCEND_ATC_DIR}) find_module(error_manager_static liberror_manager.a ${ASCEND_ACL_DIR}) - find_module(msprofiler libmsprofiler.a ${ASCEND_ACL_DIR}) + find_module(msprofiler_fwk libmsprofiler_fwk.a ${ASCEND_ACL_DIR}) find_module(ascend_hal_stub libascend_hal.so ${ASCEND_DRIVER_DIR}/driver) #find_module(ascendcl_static libascendcl.a ${ASCEND_ACL_DIR}) else() - message(STATUS "PLATFORM param is invalid, should be train or inference, you choose nothing!") + message(STATUS "PLATFORM param is invalid, should be train or inference, you choose nothing!") endif() - if (ENABLE_GE_COV OR ENABLE_GE_UT) + if (ENABLE_GE_COV OR ENABLE_GE_UT) add_subdirectory(tests) endif() diff --git a/cmake/external_libs/gflags.cmake b/cmake/external_libs/gflags.cmake index f3f0f0ef..50cfb2bc 100755 --- a/cmake/external_libs/gflags.cmake +++ b/cmake/external_libs/gflags.cmake @@ -23,6 +23,7 @@ ExternalProject_Add(gflags_build URL ${REQ_URL} #URL /home/txd/workspace/linux_cmake/pkg/protobuf-3.8.0.tar.gz #SOURCE_DIR ${GE_CODE_DIR}/../../third_party/gflags/src/gflags-2.2.2 + TLS_VERIFY OFF CONFIGURE_COMMAND ${CMAKE_COMMAND} -DCMAKE_CXX_FLAGS=${gflags_CXXFLAGS} -DCMAKE_INSTALL_PREFIX=${CMAKE_INSTALL_PREFIX}/gflags BUILD_COMMAND $(MAKE) INSTALL_COMMAND $(MAKE) install diff --git a/cmake/external_libs/gtest.cmake b/cmake/external_libs/gtest.cmake index 96ea84b4..c5edcd72 100755 --- a/cmake/external_libs/gtest.cmake +++ b/cmake/external_libs/gtest.cmake @@ -10,7 +10,10 @@ if ((${CMAKE_INSTALL_PREFIX} STREQUAL /usr/local) OR message(STATUS "No install prefix selected, default to ${CMAKE_INSTALL_PREFIX}.") endif() -if (ENABLE_GITEE) +if (GE_PB_PKG) + set(REQ_URL "${GE_PB_PKG}/libs/gtest/release-1.8.0.tar.gz") + set(MD5 "") +elseif (ENABLE_GITEE) set(REQ_URL "https://gitee.com/mirrors/googletest/repository/archive/release-1.8.0.tar.gz") set(MD5 "") else() @@ -22,8 +25,9 @@ set (gtest_CXXFLAGS "-D_GLIBCXX_USE_CXX11_ABI=0 -D_FORTIFY_SOURCE=2 -O2 -fstack- set (gtest_CFLAGS "-D_GLIBCXX_USE_CXX11_ABI=0 -D_FORTIFY_SOURCE=2 -O2 -fstack-protector-all -Wl,-z,relro,-z,now,-z,noexecstack") ExternalProject_Add(gtest_build URL ${REQ_URL} + TLS_VERIFY OFF CONFIGURE_COMMAND ${CMAKE_COMMAND} -DCMAKE_CXX_FLAGS=${gtest_CXXFLAGS} -DCMAKE_INSTALL_PREFIX=${CMAKE_INSTALL_PREFIX}/gtest - -DBUILD_TESTING=OFF -DCMAKE_POSITION_INDEPENDENT_CODE=ON -DBUILD_SHARED_LIBS=ON -DCMAKE_MACOSX_RPATH=TRUE -Dgtest_disable_pthreads=ON + -DBUILD_TESTING=OFF -DCMAKE_POSITION_INDEPENDENT_CODE=ON -DBUILD_SHARED_LIBS=ON -DCMAKE_MACOSX_RPATH=TRUE -Dgtest_disable_pthreads=ON BUILD_COMMAND $(MAKE) INSTALL_COMMAND $(MAKE) install EXCLUDE_FROM_ALL TRUE diff --git a/cmake/external_libs/json.cmake b/cmake/external_libs/json.cmake index ce473d4b..3c1cd012 100755 --- a/cmake/external_libs/json.cmake +++ b/cmake/external_libs/json.cmake @@ -5,10 +5,14 @@ endif() include(ExternalProject) set(JSON_SRC_DIR ${CMAKE_BINARY_DIR}/opensrc/json/include) -if (ENABLE_GITEE) - set(REQ_URL "https://gitee.com/mirrors/JSON-for-Modern-CPP/repository/archive/v3.6.1.zip") - set(MD5 "5bda78ce308e6cfcf614dcf1d5ff27a7") - set(JSON_INCLUDE_DIR "${JSON_SRC_DIR}/include") +if (GE_PB_PKG) + set(REQ_URL "${GE_PB_PKG}/libs/ge_nlohmann_json/include.zip") + set(MD5 "0dc903888211db3a0f170304cd9f3a89") + set(JSON_INCLUDE_DIR ${JSON_SRC_DIR}) +#elseif (ENABLE_GITEE) +# set(REQ_URL "https://gitee.com/mirrors/JSON-for-Modern-CPP/repository/archive/v3.6.1.zip") +# set(MD5 "5bda78ce308e6cfcf614dcf1d5ff27a7") +#set(JSON_INCLUDE_DIR "${JSON_SRC_DIR}/include") else() set(REQ_URL "https://github.com/nlohmann/json/releases/download/v3.6.1/include.zip") set(MD5 "0dc903888211db3a0f170304cd9f3a89") @@ -18,6 +22,7 @@ ExternalProject_Add(json_build URL ${REQ_URL} #URL /home/txd/workspace/cloud_code/pkg/include.zip SOURCE_DIR ${JSON_SRC_DIR} + TLS_VERIFY OFF CONFIGURE_COMMAND "" BUILD_COMMAND "" INSTALL_COMMAND "" diff --git a/cmake/external_libs/onnx.cmake b/cmake/external_libs/onnx.cmake index 9dadb544..1ee80d2d 100755 --- a/cmake/external_libs/onnx.cmake +++ b/cmake/external_libs/onnx.cmake @@ -6,7 +6,10 @@ set(ONNX_PROTO_DIR ${CMAKE_BINARY_DIR}/onnx) set(ONNX_PROTO_FILE ${ONNX_PROTO_DIR}/onnx.proto) file(MAKE_DIRECTORY ${ONNX_PROTO_DIR}) -if (ENABLE_GITEE) +if (GE_PB_PKG) + set(REQ_URL "${GE_PB_PKG}/libs/onnx/onnx-1.6.0.tar.gz") + set(MD5 "512f2779d6215d4a36f366b6b9acdf1e") +elseif (ENABLE_GITEE) set(REQ_URL "https://gitee.com/mirrors/ONNX/repository/archive/v1.6.0.tar.gz") set(MD5 "1bdbcecdd68ea8392630467646776e02") else() @@ -19,6 +22,7 @@ ExternalProject_Add(onnx #URL /home/txd/workspace/cloud_code/pkg/onnx-1.6.0.tar.gz #URL_HASH SHA256=3b88c3fe521151651a0403c4d131cb2e0311bd28b753ef692020a432a81ce345 #SOURCE_DIR ${ONNX_SRC_DIR} + TLS_VERIFY OFF CONFIGURE_COMMAND "" BUILD_COMMAND "" #INSTALL_COMMAND "" diff --git a/cmake/external_libs/protobuf_shared.cmake b/cmake/external_libs/protobuf_shared.cmake index c9c6b7d9..6334c8a3 100755 --- a/cmake/external_libs/protobuf_shared.cmake +++ b/cmake/external_libs/protobuf_shared.cmake @@ -26,6 +26,7 @@ set(protobuf_CXXFLAGS "-Wno-maybe-uninitialized -Wno-unused-parameter -fPIC -fst set(protobuf_LDFLAGS "-Wl,-z,relro,-z,now,-z,noexecstack") ExternalProject_Add(protobuf_build URL ${REQ_URL} + TLS_VERIFY OFF CONFIGURE_COMMAND ${CMAKE_COMMAND} -Dprotobuf_WITH_ZLIB=OFF -DCMAKE_INSTALL_LIBDIR=${CMAKE_INSTALL_LIBDIR} diff --git a/cmake/external_libs/protobuf_static.cmake b/cmake/external_libs/protobuf_static.cmake index 6f3e1f53..e4bbb9a0 100755 --- a/cmake/external_libs/protobuf_static.cmake +++ b/cmake/external_libs/protobuf_static.cmake @@ -27,6 +27,7 @@ ExternalProject_Add(protobuf_static_build URL ${REQ_URL} #URL /home/txd/workspace/linux_cmake/pkg/protobuf-3.8.0.tar.gz #SOURCE_DIR ${METADEF_DIR}/../../third_party/protobuf/src/protobuf-3.8.0 + TLS_VERIFY OFF CONFIGURE_COMMAND ${CMAKE_COMMAND} -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} diff --git a/cmake/external_libs/protoc.cmake b/cmake/external_libs/protoc.cmake index 0d162c0d..58321f04 100755 --- a/cmake/external_libs/protoc.cmake +++ b/cmake/external_libs/protoc.cmake @@ -1,115 +1,116 @@ -if (HAVE_PROTOC) - return() -endif() - -include(ExternalProject) -include(GNUInstallDirs) -#set(CMAKE_INSTALL_PREFIX ${GE_CODE_DIR}/output) - -if ((${CMAKE_INSTALL_PREFIX} STREQUAL /usr/local) OR - (${CMAKE_INSTALL_PREFIX} STREQUAL "C:/Program Files (x86)/ascend")) - set(CMAKE_INSTALL_PREFIX ${GE_CODE_DIR}/output CACHE STRING "path for install()" FORCE) - message(STATUS "No install prefix selected, default to ${CMAKE_INSTALL_PREFIX}.") -endif() - -if(GE_PB_PKG) - set(REQ_URL "${GE_PB_PKG}/libs/protobuf/v3.8.0.tar.gz") -else() - if (ENABLE_GITEE) - set(REQ_URL "https://gitee.com/mirrors/protobuf_source/repository/archive/v3.8.0.tar.gz") - set(MD5 "eba86ae9f07ba5cfbaf8af3bc4e84236") - else() - set(REQ_URL "https://github.com/protocolbuffers/protobuf/archive/v3.8.0.tar.gz") - set(MD5 "3d9e32700639618a4d2d342c99d4507a") - endif () -endif() - -set(protobuf_CXXFLAGS "-Wno-maybe-uninitialized -Wno-unused-parameter -fPIC -fstack-protector-all -D_FORTIFY_SOURCE=2 -D_GLIBCXX_USE_CXX11_ABI=0 -O2") -set(protobuf_LDFLAGS "-Wl,-z,relro,-z,now,-z,noexecstack") -ExternalProject_Add(protoc_build - URL ${REQ_URL} - #URL /home/txd/workspace/linux_cmake/pkg/protobuf-3.8.0.tar.gz - #SOURCE_DIR ${GE_CODE_DIR}/../third_party/protobuf/src/protobuf-3.8.0 - CONFIGURE_COMMAND ${CMAKE_COMMAND} -Dprotobuf_WITH_ZLIB=OFF -Dprotobuf_BUILD_TESTS=OFF -DBUILD_SHARED_LIBS=OFF -DCMAKE_CXX_FLAGS=${protobuf_CXXFLAGS} -DCMAKE_CXX_LDFLAGS=${protobuf_LDFLAGS} -DCMAKE_INSTALL_PREFIX=${CMAKE_INSTALL_PREFIX}/protoc /cmake - BUILD_COMMAND $(MAKE) - INSTALL_COMMAND $(MAKE) install - EXCLUDE_FROM_ALL TRUE -) - -set(PROTOC_PKG_DIR ${CMAKE_INSTALL_PREFIX}/protoc) - -set(protoc_EXECUTABLE ${PROTOC_PKG_DIR}/${CMAKE_INSTALL_BINDIR}/protoc) - -function(protobuf_generate comp c_var h_var) - if(NOT ARGN) - message(SEND_ERROR "Error: protobuf_generate() called without any proto files") - return() - endif() - set(${c_var}) - set(${h_var}) - - foreach(file ${ARGN}) - get_filename_component(abs_file ${file} ABSOLUTE) - get_filename_component(file_name ${file} NAME_WE) - get_filename_component(file_dir ${abs_file} PATH) - get_filename_component(parent_subdir ${file_dir} NAME) - - if("${parent_subdir}" STREQUAL "proto") - set(proto_output_path ${CMAKE_BINARY_DIR}/proto/${comp}/proto) - else() - set(proto_output_path ${CMAKE_BINARY_DIR}/proto/${comp}/proto/${parent_subdir}) - endif() - list(APPEND ${c_var} "${proto_output_path}/${file_name}.pb.cc") - list(APPEND ${h_var} "${proto_output_path}/${file_name}.pb.h") - - add_custom_command( - OUTPUT "${proto_output_path}/${file_name}.pb.cc" "${proto_output_path}/${file_name}.pb.h" - WORKING_DIRECTORY ${PROJECT_SOURCE_DIR} - COMMAND ${CMAKE_COMMAND} -E make_directory "${proto_output_path}" - COMMAND ${protoc_EXECUTABLE} -I${file_dir} --cpp_out=${proto_output_path} ${abs_file} - DEPENDS protoc_build ${abs_file} - COMMENT "Running C++ protocol buffer compiler on ${file}" VERBATIM ) - endforeach() - - set_source_files_properties(${${c_var}} ${${h_var}} PROPERTIES GENERATED TRUE) - set(${c_var} ${${c_var}} PARENT_SCOPE) - set(${h_var} ${${h_var}} PARENT_SCOPE) - -endfunction() - -function(protobuf_generate_py comp py_var) - if(NOT ARGN) - message(SEND_ERROR "Error: protobuf_generate_py() called without any proto files") - return() - endif() - set(${py_var}) - - foreach(file ${ARGN}) - get_filename_component(abs_file ${file} ABSOLUTE) - get_filename_component(file_name ${file} NAME_WE) - get_filename_component(file_dir ${abs_file} PATH) - get_filename_component(parent_subdir ${file_dir} NAME) - - if("${parent_subdir}" STREQUAL "proto") - set(proto_output_path ${CMAKE_BINARY_DIR}/proto/${comp}/proto) - else() - set(proto_output_path ${CMAKE_BINARY_DIR}/proto/${comp}/proto/${parent_subdir}) - endif() - list(APPEND ${py_var} "${proto_output_path}/${file_name}_pb2.py") - - add_custom_command( - OUTPUT "${proto_output_path}/${file_name}_pb2.py" - WORKING_DIRECTORY ${PROJECT_SOURCE_DIR} - COMMAND ${CMAKE_COMMAND} -E make_directory "${proto_output_path}" - COMMAND ${protoc_EXECUTABLE} -I${file_dir} --python_out=${proto_output_path} ${abs_file} - DEPENDS protoc_build ${abs_file} - COMMENT "Running PYTHON protocol buffer compiler on ${file}" VERBATIM ) - endforeach() - - set_source_files_properties(${${py_var}} PROPERTIES GENERATED TRUE) - set(${py_var} ${${py_var}} PARENT_SCOPE) - -endfunction() - -#set(HAVE_PROTOC TRUE CACHE BOOL "protoc build add") -set(HAVE_PROTOC TRUE) +if (HAVE_PROTOC) + return() +endif() + +include(ExternalProject) +include(GNUInstallDirs) +#set(CMAKE_INSTALL_PREFIX ${GE_CODE_DIR}/output) + +if ((${CMAKE_INSTALL_PREFIX} STREQUAL /usr/local) OR + (${CMAKE_INSTALL_PREFIX} STREQUAL "C:/Program Files (x86)/ascend")) + set(CMAKE_INSTALL_PREFIX ${GE_CODE_DIR}/output CACHE STRING "path for install()" FORCE) + message(STATUS "No install prefix selected, default to ${CMAKE_INSTALL_PREFIX}.") +endif() + +if(GE_PB_PKG) + set(REQ_URL "${GE_PB_PKG}/libs/protobuf/v3.8.0.tar.gz") +else() + if (ENABLE_GITEE) + set(REQ_URL "https://gitee.com/mirrors/protobuf_source/repository/archive/v3.8.0.tar.gz") + set(MD5 "eba86ae9f07ba5cfbaf8af3bc4e84236") + else() + set(REQ_URL "https://github.com/protocolbuffers/protobuf/archive/v3.8.0.tar.gz") + set(MD5 "3d9e32700639618a4d2d342c99d4507a") + endif () +endif() + +set(protobuf_CXXFLAGS "-Wno-maybe-uninitialized -Wno-unused-parameter -fPIC -fstack-protector-all -D_FORTIFY_SOURCE=2 -D_GLIBCXX_USE_CXX11_ABI=0 -O2") +set(protobuf_LDFLAGS "-Wl,-z,relro,-z,now,-z,noexecstack") +ExternalProject_Add(protoc_build + URL ${REQ_URL} + #URL /home/txd/workspace/linux_cmake/pkg/protobuf-3.8.0.tar.gz + #SOURCE_DIR ${GE_CODE_DIR}/../third_party/protobuf/src/protobuf-3.8.0 + TLS_VERIFY OFF + CONFIGURE_COMMAND ${CMAKE_COMMAND} -Dprotobuf_WITH_ZLIB=OFF -Dprotobuf_BUILD_TESTS=OFF -DBUILD_SHARED_LIBS=OFF -DCMAKE_CXX_FLAGS=${protobuf_CXXFLAGS} -DCMAKE_CXX_LDFLAGS=${protobuf_LDFLAGS} -DCMAKE_INSTALL_PREFIX=${CMAKE_INSTALL_PREFIX}/protoc /cmake + BUILD_COMMAND $(MAKE) + INSTALL_COMMAND $(MAKE) install + EXCLUDE_FROM_ALL TRUE +) + +set(PROTOC_PKG_DIR ${CMAKE_INSTALL_PREFIX}/protoc) + +set(protoc_EXECUTABLE ${PROTOC_PKG_DIR}/${CMAKE_INSTALL_BINDIR}/protoc) + +function(protobuf_generate comp c_var h_var) + if(NOT ARGN) + message(SEND_ERROR "Error: protobuf_generate() called without any proto files") + return() + endif() + set(${c_var}) + set(${h_var}) + + foreach(file ${ARGN}) + get_filename_component(abs_file ${file} ABSOLUTE) + get_filename_component(file_name ${file} NAME_WE) + get_filename_component(file_dir ${abs_file} PATH) + get_filename_component(parent_subdir ${file_dir} NAME) + + if("${parent_subdir}" STREQUAL "proto") + set(proto_output_path ${CMAKE_BINARY_DIR}/proto/${comp}/proto) + else() + set(proto_output_path ${CMAKE_BINARY_DIR}/proto/${comp}/proto/${parent_subdir}) + endif() + list(APPEND ${c_var} "${proto_output_path}/${file_name}.pb.cc") + list(APPEND ${h_var} "${proto_output_path}/${file_name}.pb.h") + + add_custom_command( + OUTPUT "${proto_output_path}/${file_name}.pb.cc" "${proto_output_path}/${file_name}.pb.h" + WORKING_DIRECTORY ${PROJECT_SOURCE_DIR} + COMMAND ${CMAKE_COMMAND} -E make_directory "${proto_output_path}" + COMMAND ${protoc_EXECUTABLE} -I${file_dir} --cpp_out=${proto_output_path} ${abs_file} + DEPENDS protoc_build ${abs_file} + COMMENT "Running C++ protocol buffer compiler on ${file}" VERBATIM ) + endforeach() + + set_source_files_properties(${${c_var}} ${${h_var}} PROPERTIES GENERATED TRUE) + set(${c_var} ${${c_var}} PARENT_SCOPE) + set(${h_var} ${${h_var}} PARENT_SCOPE) + +endfunction() + +function(protobuf_generate_py comp py_var) + if(NOT ARGN) + message(SEND_ERROR "Error: protobuf_generate_py() called without any proto files") + return() + endif() + set(${py_var}) + + foreach(file ${ARGN}) + get_filename_component(abs_file ${file} ABSOLUTE) + get_filename_component(file_name ${file} NAME_WE) + get_filename_component(file_dir ${abs_file} PATH) + get_filename_component(parent_subdir ${file_dir} NAME) + + if("${parent_subdir}" STREQUAL "proto") + set(proto_output_path ${CMAKE_BINARY_DIR}/proto/${comp}/proto) + else() + set(proto_output_path ${CMAKE_BINARY_DIR}/proto/${comp}/proto/${parent_subdir}) + endif() + list(APPEND ${py_var} "${proto_output_path}/${file_name}_pb2.py") + + add_custom_command( + OUTPUT "${proto_output_path}/${file_name}_pb2.py" + WORKING_DIRECTORY ${PROJECT_SOURCE_DIR} + COMMAND ${CMAKE_COMMAND} -E make_directory "${proto_output_path}" + COMMAND ${protoc_EXECUTABLE} -I${file_dir} --python_out=${proto_output_path} ${abs_file} + DEPENDS protoc_build ${abs_file} + COMMENT "Running PYTHON protocol buffer compiler on ${file}" VERBATIM ) + endforeach() + + set_source_files_properties(${${py_var}} PROPERTIES GENERATED TRUE) + set(${py_var} ${${py_var}} PARENT_SCOPE) + +endfunction() + +#set(HAVE_PROTOC TRUE CACHE BOOL "protoc build add") +set(HAVE_PROTOC TRUE) diff --git a/cmake/external_libs/securec.cmake b/cmake/external_libs/securec.cmake index 0bd62ab2..0f8b6d3a 100755 --- a/cmake/external_libs/securec.cmake +++ b/cmake/external_libs/securec.cmake @@ -10,11 +10,20 @@ if ((${CMAKE_INSTALL_PREFIX} STREQUAL /usr/local) OR message(STATUS "No install prefix selected, default to ${CMAKE_INSTALL_PREFIX}.") endif() +if (GE_PB_PKG) + set(REQ_URL "${GE_PB_PKG}/libs/securec/v1.1.10.tar.gz") + set(MD5 "") +else() + set(REQ_URL "https://gitee.com/openeuler/libboundscheck/repository/archive/v1.1.10.tar.gz") + set(MD5 "") +endif () + ExternalProject_Add(c_sec_build - URL https://gitee.com/openeuler/libboundscheck/repository/archive/v1.1.10.tar.gz - #URL /home/txd/workspace/linux_cmake/pkg/protobuf-3.8.0.tar.gz + URL ${REQ_URL} + #URL https://gitee.com/openeuler/libboundscheck/repository/archive/v1.1.10.tar.gz #SOURCE_DIR ${GE_CODE_DIR}/../libc_sec PATCH_COMMAND patch -p1 < ${GE_CODE_DIR}/metadef/third_party/patch/securec/0001-add-securec-cmake-script.patch + TLS_VERIFY OFF CONFIGURE_COMMAND ${CMAKE_COMMAND} -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} diff --git a/ge/CMakeLists.txt b/ge/CMakeLists.txt index 88a5c52f..59b804d8 100755 --- a/ge/CMakeLists.txt +++ b/ge/CMakeLists.txt @@ -60,6 +60,8 @@ set(TRAIN_SRC_LIST "common/dump/dump_manager.cc" "common/dump/dump_properties.cc" "common/dump/dump_op.cc" + "common/profiling/ge_profiling.cc" + "common/profiling/ge_runner_profiling.cc" "engine_manager/dnnengine_manager.cc" "ge_local_engine/engine/host_cpu_engine.cc" "generator/ge_generator.cc" @@ -201,6 +203,7 @@ set(TRAIN_SRC_LIST "host_kernels/sub_kernel.cc" "host_kernels/transdata_kernel.cc" "host_kernels/unpack_kernel.cc" + "host_kernels/reformat_kernel.cc" "graph/passes/folding_pass.cc" "graph/passes/get_original_format_pass.cc" "graph/passes/guarantee_const_pass.cc" @@ -331,7 +334,6 @@ set(TRAIN_SRC_LIST "hybrid/hybrid_davinci_model.cc" "executor/ge_executor.cc" "client/ge_api.cc" - "client/ge_prof.cc" "analyzer/analyzer.cc" "ir_build/ge_ir_build.cc" "ir_build/atc_ir_common.cc" @@ -487,6 +489,7 @@ set(INFER_SRC_LIST "host_kernels/slice_d_kernel.cc" "host_kernels/dynamic_stitch_kernel.cc" "host_kernels/identity_kernel.cc" + "host_kernels/reformat_kernel.cc" "graph/passes/stop_gradient_pass.cc" "graph/passes/prevent_gradient_pass.cc" "graph/passes/identity_pass.cc" @@ -602,7 +605,7 @@ set(INFER_SRC_LIST if (NOT ENABLE_D AND NOT ENABLE_ACL AND NOT ENABLE_MS_TESTCASES) ############ libge_runner.so ############ -add_library(ge_runner SHARED ${TRAIN_SRC_LIST} ${PROTO_SRCS} ${PROTO_CLIENT_SRCS}) +add_library(ge_runner SHARED ${TRAIN_SRC_LIST} ${PROTO_SRCS} ${PROTO_CLIENT_SRCS} $) target_compile_definitions(ge_runner PRIVATE PROTOBUF_INLINE_NOT_IN_HEADERS=0 @@ -647,7 +650,6 @@ target_link_libraries(ge_runner $ ge_memory adump_server - msprofiler static_mmpa -Wl,--no-as-needed graph @@ -656,7 +658,6 @@ target_link_libraries(ge_runner register c_sec slog - msprof runtime resource error_manager @@ -781,7 +782,6 @@ target_link_libraries(opensrc_ascendcl PRIVATE c_sec runtime slog - msprof ascend_hal_stub -Wl,--as-needed -lrt @@ -797,12 +797,10 @@ set_target_properties(opensrc_ascendcl PROPERTIES add_custom_command( OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/stub_ge_ir_build.cc ${CMAKE_CURRENT_BINARY_DIR}/stub_ge_api.cc - ${CMAKE_CURRENT_BINARY_DIR}/stub_ge_prof.cc COMMAND echo "Generating stub files." && ${HI_PYTHON} ${CMAKE_CURRENT_LIST_DIR}/stub/gen_stubapi.py ${GE_CODE_DIR}/inc/external ${CMAKE_CURRENT_BINARY_DIR} && mv ge_ir_build.cc stub_ge_ir_build.cc && mv ge_api.cc stub_ge_api.cc - && mv ge_prof.cc stub_ge_prof.cc && echo "Generating stub files end." #WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} #DEPENDS stub/gen_stubapi.py ${TOP_DIR}/inc/external ${CMAKE_CURRENT_BINARY_DIR} @@ -811,7 +809,6 @@ add_custom_command( add_custom_target(ge_stub DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/stub_ge_ir_build.cc ${CMAKE_CURRENT_BINARY_DIR}/stub_ge_api.cc - ${CMAKE_CURRENT_BINARY_DIR}/stub_ge_prof.cc ) ################################################################## @@ -853,7 +850,6 @@ target_include_directories(atc_stub_ge_compiler PRIVATE ############ stub/libge_runner.so ############ add_library(fwk_stub_ge_runner SHARED stub_ge_api.cc - stub_ge_prof.cc stub_ge_ir_build.cc ) diff --git a/ge/client/ge_api.cc b/ge/client/ge_api.cc index 9ecc3016..66958310 100644 --- a/ge/client/ge_api.cc +++ b/ge/client/ge_api.cc @@ -134,7 +134,7 @@ Status GEInitialize(const std::map &options) { Status GEInitialize(const std::map &options) { std::map str_options; - for (auto & option : options) { + for (auto &option : options) { if (option.first.GetString() == nullptr || option.second.GetString() == nullptr) { GELOGE(FAILED, "GEInitialize options is nullptr."); return FAILED; diff --git a/ge/client/ge_prof.cc b/ge/client/ge_prof.cc deleted file mode 100644 index ede38430..00000000 --- a/ge/client/ge_prof.cc +++ /dev/null @@ -1,369 +0,0 @@ -/** - * Copyright 2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "ge/ge_prof.h" -#include "ge/ge_api.h" -#include "init/gelib.h" -#include "common/debug/log.h" -#include "framework/common/debug/ge_log.h" -#include "common/profiling/profiling_manager.h" -#include "graph/load/graph_loader.h" -#include "toolchain/prof_acl_api.h" - -using std::map; -using std::string; -using std::vector; - -namespace { -const uint32_t kMaxDeviceNum = 64; -const uint32_t kDeviceListIndex = 3; -const std::string kProfilingInit = "prof_init"; -const std::string kProfilingFinalize = "prof_finalize"; -const std::string kProfilingStart = "prof_start"; -const std::string kProfilingStop = "prof_stop"; -const std::string kDeviceNums = "devNums"; -const std::string kDeviceIdList = "devIdList"; -const std::string kAicoreMetrics = "aicoreMetrics"; - -const std::map kProfAicoreMetricsToString = { - {ge::kAicoreArithmaticThroughput, "AICORE_ARITHMATIC_THROUGHPUT"}, - {ge::kAicorePipeline, "AICORE_PIPELINE"}, - {ge::kAicoreSynchronization, "AICORE_SYNCHRONIZATION"}, - {ge::kAicoreMemory, "AICORE_MEMORY"}, - {ge::kAicoreInternalMemory, "AICORE_INTERNAL_MEMORY"}, - {ge::kAicoreStall, "AICORE_STALL"}}; -} // namespace - -static bool g_graph_prof_init_ = false; -static std::mutex g_prof_mutex_; - -namespace ge { -struct aclgrphProfConfig { - ProfConfig config; -}; - -Status aclgrphProfInit(const char *profiler_path, uint32_t length) { - GELOGT(TRACE_INIT, "Graph prof init start"); - - std::shared_ptr instance_ptr = ge::GELib::GetInstance(); - if (instance_ptr == nullptr || !instance_ptr->InitFlag()) { - GELOGE(GE_CLI_GE_NOT_INITIALIZED, "Ge client is not initialized."); - return FAILED; - } - - std::lock_guard lock(g_prof_mutex_); - if (g_graph_prof_init_) { - GELOGW("Multi graph profiling initializations."); - return GE_PROF_MULTI_INIT; - } - - Status ret = CheckPath(profiler_path, length); - if (ret != SUCCESS) { - GELOGE(ret, "Profiling config path is invalid."); - return ret; - } - // if command mode is set, just return - if (ProfilingManager::Instance().ProfilingOn()) { - GELOGW("Graph prof init failed, cause profiling command pattern is running."); - return GE_PROF_MODE_CONFLICT; - } - - ret = ProfInit(profiler_path); - if (ret != SUCCESS) { - GELOGE(ret, "ProfInit init fail"); - return ret; - } - - GraphLoader graph_loader; - Command command; - command.cmd_params.clear(); - command.cmd_type = kProfilingInit; - command.module_index = PROF_MODEL_LOAD; - ret = graph_loader.CommandHandle(command); - if (ret != SUCCESS) { - GELOGE(ret, "Handle profiling command %s failed, config = %s", kProfilingInit.c_str(), profiler_path); - return ret; - } - if (!g_graph_prof_init_) { - g_graph_prof_init_ = true; - GELOGI("Profiling init successfully."); - } - - GELOGI("Successfully execute GraphProfInit."); - return SUCCESS; -} - -Status aclgrphProfFinalize() { - std::shared_ptr instance_ptr = ge::GELib::GetInstance(); - if (instance_ptr == nullptr || !instance_ptr->InitFlag()) { - GELOGE(GE_CLI_GE_NOT_INITIALIZED, "Ge client is not initialized."); - return FAILED; - } - std::lock_guard lock(g_prof_mutex_); - // if command mode is set, just return - if (ProfilingManager::Instance().ProfilingOn()) { - GELOGW("Graph prof finalize failed, cause profiling command pattern is running."); - return GE_PROF_MODE_CONFLICT; - } - - if (!g_graph_prof_init_) { - GELOGE(GE_PROF_NOT_INIT, "Graph not profiling initialize."); - return GE_PROF_NOT_INIT; - } - GraphLoader graph_loader; - Command command; - command.cmd_params.clear(); - command.cmd_type = kProfilingFinalize; - Status ret = graph_loader.CommandHandle(command); - if (ret != SUCCESS) { - GELOGE(ret, "Handle profiling command %s failed.", kProfilingFinalize.c_str()); - return ret; - } - - ret = ProfFinalize(); - if (ret != SUCCESS) { - GELOGE(ret, "Finalize profiling failed, result = %d", ret); - } - - if (ret == SUCCESS) { - g_graph_prof_init_ = false; - GELOGI("Successfully execute GraphProfFinalize."); - } - return ret; -} - -bool TransProfConfigToParam(const aclgrphProfConfig *profiler_config, vector &prof_config_params) { - prof_config_params.clear(); - prof_config_params.emplace_back(kDeviceNums); - prof_config_params.emplace_back(std::to_string(profiler_config->config.devNums)); - prof_config_params.emplace_back(kDeviceIdList); - std::string devID = ""; - if (profiler_config->config.devNums == 0) { - GELOGW("The device num is invalid."); - return false; - } - for (uint32_t i = 0; i < profiler_config->config.devNums; i++) { - devID.append(std::to_string(profiler_config->config.devIdList[i])); - if (i != profiler_config->config.devNums - 1) { - devID.append(","); - } - } - - prof_config_params.push_back(devID); - prof_config_params.push_back(kAicoreMetrics); - auto iter = - kProfAicoreMetricsToString.find(static_cast(profiler_config->config.aicoreMetrics)); - if (iter == kProfAicoreMetricsToString.end()) { - GELOGW("The prof aicore metrics is invalid."); - return false; - } - prof_config_params.push_back(iter->second); - return true; -} - -bool isProfConfigValid(const uint32_t *deviceid_list, uint32_t device_nums) { - if (deviceid_list == nullptr) { - GELOGE(PARAM_INVALID, "deviceIdList is nullptr"); - return false; - } - if (device_nums == 0 || device_nums > kMaxDeviceNum) { - GELOGE(PARAM_INVALID, "The device nums is invalid."); - return false; - } - - // real device num - int32_t dev_count = 0; - rtError_t rt_err = rtGetDeviceCount(&dev_count); - if (rt_err != RT_ERROR_NONE) { - GELOGE(INTERNAL_ERROR, "Get the Device count fail."); - return false; - } - - if (device_nums > static_cast(dev_count)) { - GELOGE(PARAM_INVALID, "Device num(%u) is not in range 1 ~ %d.", device_nums, dev_count); - return false; - } - - std::unordered_set record; - for (size_t i = 0; i < device_nums; ++i) { - uint32_t dev_id = deviceid_list[i]; - if (dev_id >= static_cast(dev_count)) { - GELOGE(PARAM_INVALID, "Device id %u is not in range 0 ~ %d(exclude %d)", dev_id, dev_count, dev_count); - return false; - } - if (record.count(dev_id) > 0) { - GELOGE(PARAM_INVALID, "Device id %u is duplicatedly set", dev_id); - return false; - } - record.insert(dev_id); - } - return true; -} - -aclgrphProfConfig *aclgrphProfCreateConfig(uint32_t *deviceid_list, uint32_t device_nums, - ProfilingAicoreMetrics aicore_metrics, ProfAicoreEvents *aicore_events, - uint64_t data_type_config) { - if (!isProfConfigValid(deviceid_list, device_nums)) { - return nullptr; - } - aclgrphProfConfig *config = new (std::nothrow) aclgrphProfConfig(); - if (config == nullptr) { - GELOGE(INTERNAL_ERROR, "new aclgrphProfConfig fail"); - return nullptr; - } - config->config.devNums = device_nums; - if (memcpy_s(config->config.devIdList, sizeof(config->config.devIdList), deviceid_list, - device_nums * sizeof(uint32_t)) != EOK) { - GELOGE(INTERNAL_ERROR, "copy devID failed. size = %u", device_nums); - delete config; - return nullptr; - } - - config->config.aicoreMetrics = static_cast(aicore_metrics); - config->config.dataTypeConfig = data_type_config; - GELOGI("Successfully create prof config."); - return config; -} - -Status aclgrphProfDestroyConfig(aclgrphProfConfig *profiler_config) { - if (profiler_config == nullptr) { - GELOGE(PARAM_INVALID, "destroy profilerConfig failed, profilerConfig must not be nullptr"); - return PARAM_INVALID; - } - - delete profiler_config; - GELOGI("Successfully destroy prof config."); - return SUCCESS; -} - -Status aclgrphProfStart(aclgrphProfConfig *profiler_config) { - if (profiler_config == nullptr) { - GELOGE(PARAM_INVALID, "aclgrphProfConfig is invalid."); - return FAILED; - } - std::shared_ptr instance_ptr = ge::GELib::GetInstance(); - if (instance_ptr == nullptr || !instance_ptr->InitFlag()) { - GELOGE(GE_CLI_GE_NOT_INITIALIZED, "Ge client is not initialized."); - return FAILED; - } - - std::lock_guard lock(g_prof_mutex_); - // if command mode is set, just return - if (ProfilingManager::Instance().ProfilingOn()) { - GELOGW("Graph prof finalize failed, cause profiling command pattern is running."); - return GE_PROF_MODE_CONFLICT; - } - if (!g_graph_prof_init_) { - GELOGE(GE_PROF_NOT_INIT, "Graph not profiling initialize."); - return GE_PROF_NOT_INIT; - } - - Status ret = ProfStartProfiling(&profiler_config->config); - if (ret != SUCCESS) { - GELOGE(ret, "Start profiling failed, prof result = %d", ret); - return FAILED; - } - - std::vector prof_params; - if (!TransProfConfigToParam(profiler_config, prof_params)) { - GELOGE(PARAM_INVALID, "Transfer profilerConfig to string vector failed"); - return PARAM_INVALID; - } - - GraphLoader graph_loader; - Command command; - command.cmd_params.clear(); - command.cmd_type = kProfilingStart; - command.cmd_params = prof_params; - command.module_index = profiler_config->config.dataTypeConfig; - GELOGI("Profiling will start, device nums:%s , deviceID:[%s], data type config: 0x%llx", prof_params[0].c_str(), - prof_params[kDeviceListIndex].c_str(), command.module_index); - ret = graph_loader.CommandHandle(command); - if (ret != SUCCESS) { - GELOGE(ret, "Handle profiling command failed"); - return FAILED; - } - - GELOGI("Successfully execute GraphProfStartProfiling."); - - return SUCCESS; -} - -Status aclgrphProfStop(aclgrphProfConfig *profiler_config) { - if (profiler_config == nullptr) { - GELOGE(PARAM_INVALID, "aclgrphProfConfig is invalid."); - return FAILED; - } - std::shared_ptr instance_ptr = ge::GELib::GetInstance(); - if (instance_ptr == nullptr || !instance_ptr->InitFlag()) { - GELOGE(GE_CLI_GE_NOT_INITIALIZED, "Ge client is not initialized."); - return FAILED; - } - - std::lock_guard lock(g_prof_mutex_); - // if command mode is set, just return - if (ProfilingManager::Instance().ProfilingOn()) { - GELOGW("Graph prof finalize failed, cause profiling command pattern is running."); - return GE_PROF_MODE_CONFLICT; - } - if (!g_graph_prof_init_) { - GELOGE(GE_PROF_NOT_INIT, "Graph not profiling initialize."); - return GE_PROF_NOT_INIT; - } - - for (uint32_t i = 0; i < profiler_config->config.devNums; i++) { - uint64_t data_type_config; - Status status = ProfGetDataTypeConfig(profiler_config->config.devIdList[i], data_type_config); - if (status != SUCCESS) { - GELOGE(status, "Prof get data type config failed, prof result = %d", status); - return status; - } - if (data_type_config != profiler_config->config.dataTypeConfig) { - GELOGE(FAILED, "data type config verify failed"); - return FAILED; - } - } - - std::vector prof_params; - if (!TransProfConfigToParam(profiler_config, prof_params)) { - GELOGE(PARAM_INVALID, "Transfer profilerConfig to string vector failed"); - return PARAM_INVALID; - } - - GraphLoader graph_loader; - Command command; - command.cmd_params.clear(); - command.cmd_type = kProfilingStop; - command.cmd_params = prof_params; - command.module_index = profiler_config->config.dataTypeConfig; - GELOGI("Profiling will stop, device nums:%s , deviceID:[%s], data type config: 0x%llx", prof_params[0].c_str(), - prof_params[kDeviceListIndex].c_str(), command.module_index); - Status ret = graph_loader.CommandHandle(command); - if (ret != SUCCESS) { - GELOGE(ret, "Handle profiling command failed"); - return FAILED; - } - - ret = ProfStopProfiling(&profiler_config->config); - if (ret != SUCCESS) { - GELOGE(ret, "Stop profiling failed, prof result = %d", ret); - return ret; - } - - GELOGI("Successfully execute GraphProfStopProfiling."); - return SUCCESS; -} -} // namespace ge diff --git a/ge/client/module.mk b/ge/client/module.mk index 6ac69d31..e9d35418 100644 --- a/ge/client/module.mk +++ b/ge/client/module.mk @@ -4,7 +4,6 @@ LOCAL_PATH := $(call my-dir) COMMON_LOCAL_SRC_FILES := \ proto/ge_api.proto \ ge_api.cc \ - ge_prof.cc \ COMMON_LOCAL_C_INCLUDES := \ @@ -69,9 +68,9 @@ LOCAL_SHARED_LIBRARIES := \ libgraph \ libregister \ libge_compiler \ - libge_common \ - libmsprof + libge_common +LOCAL_STATIC_LIBRARIES += libmsprofiler_fwk \ LOCAL_LDFLAGS := -lrt -ldl @@ -104,8 +103,10 @@ LOCAL_SHARED_LIBRARIES := \ libregister \ libruntime \ libge_compiler \ - libge_common \ - libmsprof + libge_common + + +LOCAL_STATIC_LIBRARIES += libmsprofiler_fwk \ LOCAL_LDFLAGS := -lrt -ldl diff --git a/ge/common/CMakeLists.txt b/ge/common/CMakeLists.txt index aa546c0d..d196995c 100755 --- a/ge/common/CMakeLists.txt +++ b/ge/common/CMakeLists.txt @@ -24,6 +24,7 @@ set(SRC_LIST "helper/om_file_helper.cc" "helper/model_helper.cc" "../model/ge_model.cc" + "../model/ge_root_model.cc" "auth/file_saver.cc" "fp16_t.cc" "math/fp16_math.cc" diff --git a/ge/common/auth/file_saver.cc b/ge/common/auth/file_saver.cc index 7b41397a..e708653a 100755 --- a/ge/common/auth/file_saver.cc +++ b/ge/common/auth/file_saver.cc @@ -54,8 +54,8 @@ Status FileSaver::OpenFile(int32_t &fd, const std::string &file_path) { Status FileSaver::WriteData(const void *data, uint32_t size, int32_t fd) { GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(size == 0 || data == nullptr, return PARAM_INVALID); mmSsize_t write_count; - uint32_t size_2g = ((uint32_t) 0x1 << 31); - uint32_t size_1g = ((uint32_t) 0x1 << 30); + uint32_t size_2g = 2147483648; // 0x1 << 31 + uint32_t size_1g = 1073741824; // 0x1 << 30 // Write data if (size > size_2g) { auto seek = reinterpret_cast(const_cast(data)); @@ -258,6 +258,65 @@ FileSaver::SaveToFile(const string &file_path, ModelFileHeader &file_header, Mod return SUCCESS; } +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status +FileSaver::SaveToFile(const string &file_path, ModelFileHeader &file_header, + vector &model_partition_tables, + const vector> &all_partition_datas) { + file_header.is_encrypt = ModelEncryptType::UNENCRYPTED; + + const Status ret = SaveWithFileHeader(file_path, file_header, model_partition_tables, all_partition_datas); + GE_CHK_BOOL_RET_STATUS(ret == SUCCESS, FAILED, "save file failed, file_path:%s, file header len:%u.", + file_path.c_str(), file_header.length); + return SUCCESS; +} + +Status FileSaver::SaveWithFileHeader(const std::string &file_path, const ModelFileHeader &file_header, + vector &model_partition_tables, + const vector> &all_partition_datas) { + + GE_CHK_BOOL_EXEC(model_partition_tables.size() == all_partition_datas.size(), + return PARAM_INVALID, + "model table size %zu does not match partition size %zu", + model_partition_tables.size(), all_partition_datas.size()) + for (size_t index = 0; index < model_partition_tables.size(); ++index) { + auto &cur_partiton_data = all_partition_datas[index]; + auto &cur_model_partition_table = *model_partition_tables[index]; + GE_CHK_BOOL_RET_STATUS(!cur_partiton_data.empty() && cur_model_partition_table.num != 0 + && cur_model_partition_table.num == cur_partiton_data.size(), FAILED, + "Invalid param:partition data size is (%u), model_partition_table.num is (%zu).", + cur_model_partition_table.num, cur_partiton_data.size()); + } + + // Open file + int32_t fd = 0; + GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(OpenFile(fd, file_path) != SUCCESS, return FAILED); + Status ret = SUCCESS; + do { + // Write file header + GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( + WriteData(static_cast(&file_header), sizeof(ModelFileHeader), fd) != SUCCESS, ret = FAILED; + break); + for (size_t index = 0; index < model_partition_tables.size(); ++index) { + // Write model partition table + auto &cur_tabel = *model_partition_tables[index]; + uint32_t table_size = static_cast(SIZE_OF_MODEL_PARTITION_TABLE(cur_tabel)); + GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( + WriteData(static_cast(&cur_tabel), table_size, fd) != SUCCESS, ret = FAILED; break); + // Write partition data + auto &cur_partition_datas = all_partition_datas[index]; + for (const auto &partition_data : cur_partition_datas) { + GELOGI("GC:size[%zu]", partition_data.size); + GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( + WriteData(static_cast(partition_data.data), partition_data.size, fd) != SUCCESS, ret = FAILED; + break); + } + } + } while (0); + // Close file + GE_CHK_BOOL_RET_STATUS(mmClose(fd) == EN_OK, FAILED, "Close file failed."); + return ret; +} + FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status FileSaver::SaveToFile(const string &file_path, const void *data, int len) { if (data == nullptr || len <= 0) { diff --git a/ge/common/auth/file_saver.h b/ge/common/auth/file_saver.h index 79e2126e..97fbaae5 100644 --- a/ge/common/auth/file_saver.h +++ b/ge/common/auth/file_saver.h @@ -74,6 +74,10 @@ class FileSaver { ModelPartitionTable &model_partition_table, const std::vector &partition_datas); + static Status SaveToFile(const string &file_path, ModelFileHeader &file_header, + vector &model_partition_tables, + const vector> &all_partition_datas); + static Status SaveToBuffWithFileHeader(const ModelFileHeader &file_header, ModelPartitionTable &model_partition_table, const std::vector &partitionDatas, @@ -108,6 +112,9 @@ class FileSaver { static Status SaveWithFileHeader(const std::string &file_path, const ModelFileHeader &file_header, ModelPartitionTable &model_partition_table, const std::vector &partition_datas); + static Status SaveWithFileHeader(const std::string &file_path, const ModelFileHeader &file_header, + vector &model_partition_tables, + const vector> &all_partition_datas); }; } // namespace ge #endif // GE_COMMON_AUTH_FILE_SAVER_H_ diff --git a/ge/common/base64.h b/ge/common/base64.h index fb6c1870..a537e585 100644 --- a/ge/common/base64.h +++ b/ge/common/base64.h @@ -25,32 +25,38 @@ namespace ge { namespace { -const char* kBase64Chars = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" - "abcdefghijklmnopqrstuvwxyz" - "0123456789+/"; +const char *kBase64Chars = + "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + "abcdefghijklmnopqrstuvwxyz" + "0123456789+/"; const char kEqualSymbol = '='; const size_t kBase64CharsNum = 64; const size_t kThreeByteOneGroup = 3; const size_t kFourByteOneGroup = 4; -} +const size_t kThreeByteOneGroupIndex0 = 0; +const size_t kThreeByteOneGroupIndex1 = 1; +const size_t kThreeByteOneGroupIndex2 = 2; +const size_t kFourByteOneGroupIndex0 = 0; +const size_t kFourByteOneGroupIndex1 = 1; +const size_t kFourByteOneGroupIndex2 = 2; +const size_t kFourByteOneGroupIndex3 = 3; +} // namespace namespace base64 { -static inline bool IsBase64Char(const char &c) { - return (isalnum(c) || (c == '+') || (c == '/')); -} +static inline bool IsBase64Char(const char &c) { return (isalnum(c) || (c == '+') || (c == '/')); } static std::string EncodeToBase64(const std::string &raw_data) { size_t encode_length = raw_data.size() / kThreeByteOneGroup * kFourByteOneGroup; encode_length += raw_data.size() % kThreeByteOneGroup == 0 ? 0 : kFourByteOneGroup; - size_t raw_data_index = 0 ; + size_t raw_data_index = 0; size_t encode_data_index = 0; std::string encode_data; encode_data.resize(encode_length); for (; raw_data_index + kThreeByteOneGroup <= raw_data.size(); raw_data_index += kThreeByteOneGroup) { auto char_1 = static_cast(raw_data[raw_data_index]); - auto char_2 = static_cast(raw_data[raw_data_index + 1]); - auto char_3 = static_cast(raw_data[raw_data_index + 2]); + auto char_2 = static_cast(raw_data[raw_data_index + kThreeByteOneGroupIndex1]); + auto char_3 = static_cast(raw_data[raw_data_index + kThreeByteOneGroupIndex2]); encode_data[encode_data_index++] = kBase64Chars[char_1 >> 2u]; encode_data[encode_data_index++] = kBase64Chars[((char_1 << 4u) & 0x30) | (char_2 >> 4u)]; encode_data[encode_data_index++] = kBase64Chars[((char_2 << 2u) & 0x3c) | (char_3 >> 6u)]; @@ -80,8 +86,7 @@ static std::string EncodeToBase64(const std::string &raw_data) { #pragma GCC diagnostic ignored "-Wunused-function" static Status DecodeFromBase64(const std::string &base64_data, std::string &decode_data) { if (base64_data.size() % kFourByteOneGroup != 0) { - GELOGE(PARAM_INVALID, "base64 data size must can be divided by 4, but given data size is %zu", - base64_data.size()); + GELOGE(PARAM_INVALID, "base64 data size must can be divided by 4, but given data size is %zu", base64_data.size()); return PARAM_INVALID; } decode_data.clear(); @@ -92,10 +97,10 @@ static Status DecodeFromBase64(const std::string &base64_data, std::string &deco return static_cast(std::distance(kBase64Chars, char_pos)) & 0xff; }; - for (std::size_t input_data_index = 0; input_data_index < base64_data_len; input_data_index += 4) { + for (std::size_t input_data_index = 0; input_data_index < base64_data_len; input_data_index += kFourByteOneGroup) { for (size_t i = 0; i < kFourByteOneGroup; ++i) { if (base64_data[input_data_index + i] == kEqualSymbol && - input_data_index >= base64_data_len - 4 && i > 1) { + input_data_index >= base64_data_len - kFourByteOneGroup && i > 1) { byte_4[i] = kBase64CharsNum; } else if (IsBase64Char(base64_data[input_data_index + i])) { byte_4[i] = FindCharInBase64Chars(base64_data[input_data_index + i]); @@ -104,19 +109,23 @@ static Status DecodeFromBase64(const std::string &base64_data, std::string &deco return PARAM_INVALID; } } - decode_data += static_cast((byte_4[0] << 2u) + ((byte_4[1] & 0x30) >> 4u)); - if (byte_4[2] >= kBase64CharsNum){ + decode_data += + static_cast((byte_4[kFourByteOneGroupIndex0] << 2u) + ((byte_4[kFourByteOneGroupIndex1] & 0x30) >> 4u)); + if (byte_4[kFourByteOneGroupIndex2] >= kBase64CharsNum) { break; - } else if (byte_4[3] >= kBase64CharsNum) { - decode_data += static_cast(((byte_4[1] & 0x0f) << 4u) + ((byte_4[2] & 0x3c) >> 2u)); + } else if (byte_4[kFourByteOneGroupIndex3] >= kBase64CharsNum) { + decode_data += static_cast(((byte_4[kFourByteOneGroupIndex1] & 0x0f) << 4u) + + ((byte_4[kFourByteOneGroupIndex2] & 0x3c) >> 2u)); break; } - decode_data += static_cast(((byte_4[1] & 0x0f) << 4u) + ((byte_4[2] & 0x3c) >> 2u)); - decode_data += static_cast(((byte_4[2] & 0x03) << 6u) + byte_4[3]); + decode_data += static_cast(((byte_4[kFourByteOneGroupIndex1] & 0x0f) << 4u) + + ((byte_4[kFourByteOneGroupIndex2] & 0x3c) >> 2u)); + decode_data += + static_cast(((byte_4[kFourByteOneGroupIndex2] & 0x03) << 6u) + byte_4[kFourByteOneGroupIndex3]); } return SUCCESS; } #pragma GCC diagnostic pop -} +} // namespace base64 } // namespace ge #endif // GE_COMMON_BASE64_H_ \ No newline at end of file diff --git a/ge/common/debug/memory_dumper.cc b/ge/common/debug/memory_dumper.cc index 872fe1da..527f0bb2 100644 --- a/ge/common/debug/memory_dumper.cc +++ b/ge/common/debug/memory_dumper.cc @@ -139,7 +139,8 @@ int MemoryDumper::OpenFile(const char *filename) { GE_IF_BOOL_EXEC( -1 != path_split_pos, string prefix_path = std::string(filename).substr(0, path_split_pos); string last_path = std::string(filename).substr(path_split_pos, strlen(filename) - 1); - GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(prefix_path.length() >= MMPA_MAX_PATH, return kInvalidFd, "Prefix path is too long!"); + GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(prefix_path.length() >= MMPA_MAX_PATH, + return kInvalidFd, "Prefix path is too long!"); GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(mmRealPath(prefix_path.c_str(), tmp_path, MMPA_MAX_PATH) != EN_OK, return kInvalidFd, "Dir %s does not exit.", prefix_path.c_str()); real_path = std::string(tmp_path) + last_path;) diff --git a/ge/common/formats/format_transfers/format_transfer_fractal_nz.cc b/ge/common/formats/format_transfers/format_transfer_fractal_nz.cc index ed1c6941..cb528453 100755 --- a/ge/common/formats/format_transfers/format_transfer_fractal_nz.cc +++ b/ge/common/formats/format_transfers/format_transfer_fractal_nz.cc @@ -23,12 +23,30 @@ #include "common/formats/utils/formats_trans_utils.h" #include "framework/common/debug/ge_log.h" #include "framework/common/debug/log.h" +#include "framework/common/types.h" #include "graph/utils/type_utils.h" namespace ge { namespace formats { namespace { const int kDimSize4D = 4; + +const size_t kSingleDim = 1; + +const size_t kNdDimIndexN = 0; +const size_t kNdDimIndexH = 1; +const size_t kNdDimIndexW = 2; + +const size_t kDimDValueBNdFNz = 2; // dim d-value between Nd and FractalZz + +const size_t kNdDimCountBackwardsW = 1; +const size_t kNdDimCountBackwardsWH = 2; + +const size_t kFNzDimCountBackwardsW0 = 1; +const size_t kFNzDimCountBackwardsW0H0 = 2; +const size_t kFNzDimCountBackwardsW0H0H1 = 3; +const size_t kFNzDimCountBackwardsW0H0H1W1 = 4; + bool IsDataTypeSupport(DataType data_type) { return GetSizeByDataType(data_type) > 0; } using ShapeVector = std::vector; @@ -60,14 +78,14 @@ Status TransShapeToFracNz(const ShapeVector &src_shape, DataType data_type, Shap auto w0 = GetCubeSizeByDataType(data_type); int64_t h0 = kCubeSize; switch (src_shape.size()) { - case 1: - dst_shape.push_back(Ceil(src_shape[0], w0)); - dst_shape.push_back(1); + case kSingleDim: + dst_shape.push_back(Ceil(src_shape[kNdDimIndexN], w0)); + dst_shape.push_back(DIM_DEFAULT_VALUE); dst_shape.push_back(h0); dst_shape.push_back(w0); - hw_shape.push_back(1); - hw_shape.push_back(1); - hw_shape.push_back(src_shape[0]); + hw_shape.push_back(DIM_DEFAULT_VALUE); + hw_shape.push_back(DIM_DEFAULT_VALUE); + hw_shape.push_back(src_shape[kNdDimIndexN]); if (!IsShapeValid(dst_shape)) { GELOGE(PARAM_INVALID, "Failed to check dst shape %s", ShapeToString(dst_shape).c_str()); return PARAM_INVALID; @@ -76,17 +94,17 @@ Status TransShapeToFracNz(const ShapeVector &src_shape, DataType data_type, Shap default: auto size = src_shape.size(); int64_t times = 1; - for (size_t i = 0; i != size - 2; i++) { + for (size_t i = 0; i != size - kDimDValueBNdFNz; i++) { dst_shape.push_back(src_shape[i]); times *= src_shape[i]; } - dst_shape.push_back(Ceil(src_shape[size - 1], w0)); - dst_shape.push_back(Ceil(src_shape[size - 2], h0)); + dst_shape.push_back(Ceil(src_shape[size - kNdDimCountBackwardsW], w0)); + dst_shape.push_back(Ceil(src_shape[size - kNdDimCountBackwardsWH], h0)); dst_shape.push_back(h0); dst_shape.push_back(w0); hw_shape.push_back(times); - hw_shape.push_back(src_shape[size - 2]); - hw_shape.push_back(src_shape[size - 1]); + hw_shape.push_back(src_shape[size - kNdDimCountBackwardsWH]); + hw_shape.push_back(src_shape[size - kNdDimCountBackwardsW]); if (!IsShapeValid(dst_shape)) { GELOGE(PARAM_INVALID, "Failed to check dst shape %s", ShapeToString(dst_shape).c_str()); return PARAM_INVALID; @@ -128,16 +146,16 @@ Status TransFormatFromNdToFracNz(const TransArgs &args, TransResult &result, con } // src&dst_shape can be written as times*H*W & times*W1*H1*H0*W0, respectively. dst_shape_size >= kDimNum4D - auto times = hw_shape.at(0); - auto h = hw_shape.at(1); - auto w = hw_shape.at(2); + auto times = hw_shape.at(kNdDimIndexN); + auto h = hw_shape.at(kNdDimIndexH); + auto w = hw_shape.at(kNdDimIndexW); auto hw = h * w; auto shape_size = args.dst_shape.size(); - auto w1 = args.dst_shape[shape_size - 4]; - auto h1 = args.dst_shape[shape_size - 3]; - auto h0 = args.dst_shape[shape_size - 2]; - auto w0 = args.dst_shape[shape_size - 1]; + auto w1 = args.dst_shape[shape_size - kFNzDimCountBackwardsW0H0H1W1]; + auto h1 = args.dst_shape[shape_size - kFNzDimCountBackwardsW0H0H1]; + auto h0 = args.dst_shape[shape_size - kFNzDimCountBackwardsW0H0]; + auto w0 = args.dst_shape[shape_size - kFNzDimCountBackwardsW0]; auto h1h0 = h1 * h0; auto h1h0w0 = h1h0 * w0; auto w1h1h0w0 = w1 * h1h0w0; @@ -198,16 +216,16 @@ Status TransFormatFromFracNzToNd(const TransArgs &args, TransResult &result, con return OUT_OF_MEMORY; } - auto times = dst_hw_shape.at(0); - auto h = dst_hw_shape.at(1); - auto w = dst_hw_shape.at(2); + auto times = dst_hw_shape.at(kNdDimIndexN); + auto h = dst_hw_shape.at(kNdDimIndexH); + auto w = dst_hw_shape.at(kNdDimIndexW); auto hw = h * w; auto shape_size = args.src_shape.size(); - auto w1 = args.src_shape[shape_size - 4]; - auto h1 = args.src_shape[shape_size - 3]; - auto h0 = args.src_shape[shape_size - 2]; - auto w0 = args.src_shape[shape_size - 1]; + auto w1 = args.src_shape[shape_size - kFNzDimCountBackwardsW0H0H1W1]; + auto h1 = args.src_shape[shape_size - kFNzDimCountBackwardsW0H0H1]; + auto h0 = args.src_shape[shape_size - kFNzDimCountBackwardsW0H0]; + auto w0 = args.src_shape[shape_size - kFNzDimCountBackwardsW0]; auto h1h0 = h1 * h0; auto h1h0w0 = h1h0 * w0; auto w1h1h0w0 = w1 * h1h0w0; diff --git a/ge/common/formats/format_transfers/format_transfer_fractal_zz.cc b/ge/common/formats/format_transfers/format_transfer_fractal_zz.cc index d890e681..88603d5c 100755 --- a/ge/common/formats/format_transfers/format_transfer_fractal_zz.cc +++ b/ge/common/formats/format_transfers/format_transfer_fractal_zz.cc @@ -23,12 +23,29 @@ #include "common/formats/utils/formats_trans_utils.h" #include "framework/common/debug/ge_log.h" #include "framework/common/debug/log.h" +#include "framework/common/types.h" #include "graph/utils/type_utils.h" namespace ge { namespace formats { namespace { const int kDimSize4D = 4; + +const size_t kSingleDim = 1; + +const size_t kNdDimIndexN = 0; +const size_t kNdDimIndexH = 1; +const size_t kNdDimIndexW = 2; + +const size_t kDimDValueBNdFZz = 2; // dim d-value between Nd and FractalZz + +const size_t kNdDimCountBackwardsW = 1; +const size_t kNdDimCountBackwardsWH = 2; + +const size_t kFZzDimCountBackwardsW0 = 1; +const size_t kFZzDimCountBackwardsW0H0 = 2; +const size_t kFZzDimCountBackwardsW0H0W1 = 3; +const size_t kFZzDimCountBackwardsW0H0W1H1 = 4; bool IsDataTypeSupport(DataType d_type) { return GetSizeByDataType(d_type) > 0; } using ShapeVector = std::vector; @@ -40,8 +57,8 @@ bool CheckShape(Format format, const ShapeVector &shape) { case FORMAT_NHWC: return CheckShapeValid(shape, kDimSize4D); default: - std::string error = "Trans format between " + FmtToStr(TypeUtils::FormatToSerialString(format)) + - " and FORMAT_FRACTAL_ZZ is not supported."; + std::string error = "Trans format between " + FmtToStr(TypeUtils::FormatToSerialString(format)) + + " and FORMAT_FRACTAL_ZZ is not supported."; GE_ERRORLOG_AND_ERRORMSG(PARAM_INVALID, error.c_str()); return false; } @@ -60,14 +77,14 @@ Status TransShapeToFracZz(const ShapeVector &src_shape, DataType data_type, Shap auto w0 = GetCubeSizeByDataType(data_type); auto h0 = GetCubeSizeByDataType(data_type); switch (src_shape.size()) { - case 1: - dst_shape.push_back(1); - dst_shape.push_back(Ceil(src_shape[0], w0)); + case kSingleDim: + dst_shape.push_back(DIM_DEFAULT_VALUE); + dst_shape.push_back(Ceil(src_shape[kNdDimIndexN], w0)); dst_shape.push_back(h0); dst_shape.push_back(w0); - hw_shape.push_back(1); - hw_shape.push_back(1); - hw_shape.push_back(src_shape[0]); + hw_shape.push_back(DIM_DEFAULT_VALUE); + hw_shape.push_back(DIM_DEFAULT_VALUE); + hw_shape.push_back(src_shape[kNdDimIndexN]); if (!IsShapeValid(dst_shape)) { GELOGE(PARAM_INVALID, "Failed to check dst shape %s", ShapeToString(dst_shape).c_str()); return PARAM_INVALID; @@ -76,17 +93,17 @@ Status TransShapeToFracZz(const ShapeVector &src_shape, DataType data_type, Shap default: auto size = src_shape.size(); int64_t times = 1; - for (size_t i = 0; i != size - 2; i++) { + for (size_t i = 0; i != size - kDimDValueBNdFZz; i++) { dst_shape.push_back(src_shape[i]); times *= src_shape[i]; } - dst_shape.push_back(Ceil(src_shape[size - 2], h0)); - dst_shape.push_back(Ceil(src_shape[size - 1], w0)); + dst_shape.push_back(Ceil(src_shape[size - kNdDimCountBackwardsWH], h0)); + dst_shape.push_back(Ceil(src_shape[size - kNdDimCountBackwardsW], w0)); dst_shape.push_back(h0); dst_shape.push_back(w0); hw_shape.push_back(times); - hw_shape.push_back(src_shape[size - 2]); - hw_shape.push_back(src_shape[size - 1]); + hw_shape.push_back(src_shape[size - kNdDimCountBackwardsWH]); + hw_shape.push_back(src_shape[size - kNdDimCountBackwardsW]); if (!IsShapeValid(dst_shape)) { GELOGE(PARAM_INVALID, "Failed to check dst shape %s", ShapeToString(dst_shape).c_str()); return PARAM_INVALID; @@ -127,16 +144,16 @@ Status TransFormatFromNdToFracZz(const TransArgs &args, TransResult &result, con return OUT_OF_MEMORY; } // The src&dst_shape can be written as times*H*W & times*H1*W1*H0*W0, respectively. dst_shape_size >= kDimNum4D - auto times = hw_shape.at(0); - auto h = hw_shape.at(1); - auto w = hw_shape.at(2); + auto times = hw_shape.at(kNdDimIndexN); + auto h = hw_shape.at(kNdDimIndexH); + auto w = hw_shape.at(kNdDimIndexW); auto hw = h * w; auto shape_size = args.dst_shape.size(); - auto h1 = args.dst_shape[shape_size - 4]; - auto w1 = args.dst_shape[shape_size - 3]; - auto h0 = args.dst_shape[shape_size - 2]; - auto w0 = args.dst_shape[shape_size - 1]; + auto h1 = args.dst_shape[shape_size - kFZzDimCountBackwardsW0H0W1H1]; + auto w1 = args.dst_shape[shape_size - kFZzDimCountBackwardsW0H0W1]; + auto h0 = args.dst_shape[shape_size - kFZzDimCountBackwardsW0H0]; + auto w0 = args.dst_shape[shape_size - kFZzDimCountBackwardsW0]; auto h0w0 = h0 * w0; auto w1h0w0 = w1 * h0w0; auto h1w1h0w0 = h1 * w1h0w0; @@ -155,8 +172,8 @@ Status TransFormatFromNdToFracZz(const TransArgs &args, TransResult &result, con auto src_offset = (src_h_head + w1_idx * w0) * size; auto dst_offset = (h0_head + w1_idx * h0w0) * size; auto protected_size = dst_size - dst_offset < static_cast(SECUREC_MEM_MAX_LEN) - ? dst_size - dst_offset - : static_cast(SECUREC_MEM_MAX_LEN); + ? dst_size - dst_offset + : static_cast(SECUREC_MEM_MAX_LEN); auto ret = memcpy_s(dst.get() + dst_offset, static_cast(protected_size), args.data + src_offset, static_cast(size * w0)); if (ret != EOK) { @@ -171,8 +188,8 @@ Status TransFormatFromNdToFracZz(const TransArgs &args, TransResult &result, con auto src_offset = (src_h_head + src_w_idx) * size; auto dst_offset = (w0_head + w0_idx) * size; auto protected_size = dst_size - dst_offset < static_cast(SECUREC_MEM_MAX_LEN) - ? dst_size - dst_offset - : static_cast(SECUREC_MEM_MAX_LEN); + ? dst_size - dst_offset + : static_cast(SECUREC_MEM_MAX_LEN); auto ret = memcpy_s(dst.get() + dst_offset, static_cast(protected_size), args.data + src_offset, static_cast(size)); if (ret != EOK) { @@ -205,16 +222,16 @@ Status TransFormatFromFracZzToNd(const TransArgs &args, TransResult &result, con } // The src&dst_shape can be written as times*H*W & times*H1*W1*H0*W0, respectively. dst_shape_size >= kDimNum4D - auto times = dst_hw_shape.at(0); - auto h = dst_hw_shape.at(1); - auto w = dst_hw_shape.at(2); + auto times = dst_hw_shape.at(kNdDimIndexN); + auto h = dst_hw_shape.at(kNdDimIndexH); + auto w = dst_hw_shape.at(kNdDimIndexW); auto hw = h * w; auto shape_size = args.src_shape.size(); - auto h1 = args.src_shape[shape_size - 4]; - auto w1 = args.src_shape[shape_size - 3]; - auto h0 = args.src_shape[shape_size - 2]; - auto w0 = args.src_shape[shape_size - 1]; + auto h1 = args.src_shape[shape_size - kFZzDimCountBackwardsW0H0W1H1]; + auto w1 = args.src_shape[shape_size - kFZzDimCountBackwardsW0H0W1]; + auto h0 = args.src_shape[shape_size - kFZzDimCountBackwardsW0H0]; + auto w0 = args.src_shape[shape_size - kFZzDimCountBackwardsW0]; auto h0w0 = h0 * w0; auto w1h0w0 = w1 * h0w0; auto h1w1h0w0 = h1 * w1h0w0; @@ -233,8 +250,8 @@ Status TransFormatFromFracZzToNd(const TransArgs &args, TransResult &result, con auto src_offset = (h0_head + w1_idx * h0w0) * size; auto dst_offset = (dst_h_head + w1_idx * w0) * size; auto protected_size = dst_size - dst_offset < static_cast(SECUREC_MEM_MAX_LEN) - ? dst_size - dst_offset - : static_cast(SECUREC_MEM_MAX_LEN); + ? dst_size - dst_offset + : static_cast(SECUREC_MEM_MAX_LEN); auto ret = memcpy_s(dst.get() + dst_offset, static_cast(protected_size), args.data + src_offset, static_cast(size * w0)); if (ret != EOK) { @@ -249,8 +266,8 @@ Status TransFormatFromFracZzToNd(const TransArgs &args, TransResult &result, con auto dst_w_idx = w1_head + w0_idx; auto dst_offset = (dst_h_head + dst_w_idx) * size; auto protected_size = dst_size - dst_offset < static_cast(SECUREC_MEM_MAX_LEN) - ? dst_size - dst_offset - : static_cast(SECUREC_MEM_MAX_LEN); + ? dst_size - dst_offset + : static_cast(SECUREC_MEM_MAX_LEN); auto ret = memcpy_s(dst.get() + dst_offset, static_cast(protected_size), args.data + src_offset, static_cast(size)); if (ret != EOK) { diff --git a/ge/common/formats/format_transfers/format_transfer_nchw_fz_c04.cc b/ge/common/formats/format_transfers/format_transfer_nchw_fz_c04.cc index a66aeeb4..49b19f46 100644 --- a/ge/common/formats/format_transfers/format_transfer_nchw_fz_c04.cc +++ b/ge/common/formats/format_transfers/format_transfer_nchw_fz_c04.cc @@ -35,7 +35,6 @@ * Padding to (N, ceil(Z/16)*16) * Last Step: View the (N, ceil(Z/16)*16) as 4D (N/16, 16, C/16, 16) and transpose to (C/16, N/16, 16, 16) */ - namespace ge { namespace formats { namespace { diff --git a/ge/common/formats/format_transfers/format_transfer_transpose.cc b/ge/common/formats/format_transfers/format_transfer_transpose.cc index e623d9e7..9be74b1f 100755 --- a/ge/common/formats/format_transfers/format_transfer_transpose.cc +++ b/ge/common/formats/format_transfers/format_transfer_transpose.cc @@ -19,6 +19,7 @@ #include #include +#include "common/formats/utils/formats_definitions.h" #include "common/formats/utils/formats_trans_utils.h" #include "framework/common/debug/ge_log.h" #include "framework/common/debug/log.h" @@ -29,21 +30,21 @@ namespace formats { namespace { std::map>> perm_args{ {FORMAT_NCHW, - {{FORMAT_NHWC, std::vector({0, 2, 3, 1})}, - {FORMAT_HWCN, std::vector({2, 3, 1, 0})}, - {FORMAT_CHWN, std::vector({1, 2, 3, 0})}}}, + {{FORMAT_NHWC, std::vector({kNchwN, kNchwH, kNchwW, kNchwC})}, + {FORMAT_HWCN, std::vector({kNchwH, kNchwW, kNchwC, kNchwN})}, + {FORMAT_CHWN, std::vector({kNchwC, kNchwH, kNchwW, kNchwN})}}}, {FORMAT_NHWC, - {{FORMAT_NCHW, std::vector({0, 3, 1, 2})}, - {FORMAT_CHWN, std::vector({3, 1, 2, 0})}, - {FORMAT_HWCN, std::vector({1, 2, 3, 0})}}}, + {{FORMAT_NCHW, std::vector({kNhwcN, kNhwcC, kNhwcH, kNhwcW})}, + {FORMAT_CHWN, std::vector({kNhwcC, kNhwcH, kNhwcW, kNhwcN})}, + {FORMAT_HWCN, std::vector({kNhwcH, kNhwcW, kNhwcC, kNhwcN})}}}, {FORMAT_HWCN, - {{FORMAT_NCHW, std::vector({3, 2, 0, 1})}, - {FORMAT_NHWC, std::vector({3, 0, 1, 2})}, - {FORMAT_CHWN, std::vector({2, 0, 1, 3})}}}, + {{FORMAT_NCHW, std::vector({kHwcnN, kHwcnC, kHwcnH, kHwcnW})}, + {FORMAT_NHWC, std::vector({kHwcnN, kHwcnH, kHwcnW, kHwcnC})}, + {FORMAT_CHWN, std::vector({kHwcnC, kHwcnH, kHwcnW, kHwcnN})}}}, {FORMAT_CHWN, - {{FORMAT_NCHW, std::vector({3, 0, 1, 2})}, - {FORMAT_NHWC, std::vector({3, 1, 2, 0})}, - {FORMAT_HWCN, std::vector({1, 2, 0, 3})}}}, + {{FORMAT_NCHW, std::vector({kChwnN, kChwnC, kChwnH, kChwnW})}, + {FORMAT_NHWC, std::vector({kChwnN, kChwnH, kChwnW, kChwnC})}, + {FORMAT_HWCN, std::vector({kChwnH, kChwnW, kChwnC, kChwnN})}}}, }; bool IsShapeArgValid(const std::vector &src_shape, const std::vector &perm_arg) { diff --git a/ge/common/formats/utils/formats_definitions.h b/ge/common/formats/utils/formats_definitions.h index 7f873f1b..25f36d6a 100755 --- a/ge/common/formats/utils/formats_definitions.h +++ b/ge/common/formats/utils/formats_definitions.h @@ -23,6 +23,7 @@ static const int kCubeSize = 16; static const int kNiSize = 16; static const int64_t kShapeItemNumMAX = 1024UL * 1024UL * 1024UL * 1024UL; + enum NchwDimIndex { kNchwN, kNchwC, @@ -47,6 +48,14 @@ enum HwcnDimIndex { kHwcnDimsNum }; +enum ChwnDimIndex { + kChwnC, + kChwnH, + kChwnW, + kChwnN, + kChwnDimsNum +}; + enum Nc1hwc0DimIndex { kNc1hwc0N, kNc1hwc0C1, diff --git a/ge/common/ge/plugin_manager.cc b/ge/common/ge/plugin_manager.cc index 7bb1310c..75a36d99 100644 --- a/ge/common/ge/plugin_manager.cc +++ b/ge/common/ge/plugin_manager.cc @@ -123,7 +123,10 @@ Status PluginManager::LoadSo(const string &path, const vector &func_chec if (handle == nullptr) { const char *error = mmDlerror(); GE_IF_BOOL_EXEC(error == nullptr, error = ""); - GELOGE(GE_PLGMGR_PATH_INVALID, "Failed to dlopen %s!", error); + ErrorManager::GetInstance().ATCReportErrMessage("E19012", {"function", "reason"}, + {"mmDlopen", "shared library path is " + FmtToStr(file_path_dlopen) + ". Errormessage" + FmtToStr(error)}); + GELOGE(GE_PLGMGR_PATH_INVALID, "Failed to dlopen the shared library path[%s]. Errormessage[%s]!", + file_path_dlopen.c_str(), error); continue; } @@ -132,6 +135,9 @@ Status PluginManager::LoadSo(const string &path, const vector &func_chec for (const auto &func_name : func_check_list) { auto real_fn = (void (*)())mmDlsym(handle, const_cast(func_name.c_str())); if (real_fn == nullptr) { + ErrorManager::GetInstance().ATCReportErrMessage("E19012", {"function", "reason"}, + {"mmDlsym", FmtToStr(func_name) + " is skipped since function" + + FmtToStr(func_name) + " is not existed!"}); GELOGE(GE_PLGMGR_PATH_INVALID, "%s is skipped since function %s is not existed!", func_name.c_str(), func_name.c_str()); is_valid = false; diff --git a/ge/common/ge/tbe_plugin_manager.cc b/ge/common/ge/tbe_plugin_manager.cc index b91f1204..44199c32 100755 --- a/ge/common/ge/tbe_plugin_manager.cc +++ b/ge/common/ge/tbe_plugin_manager.cc @@ -37,6 +37,8 @@ #include "graph/utils/type_utils.h" namespace ge { +const int kBaseInt = 10; + std::map TBEPluginManager::options_ = {}; // Get Singleton Instance @@ -155,7 +157,7 @@ void TBEPluginManager::GetCustomOpPath(std::string &customop_path) { domi::FrameworkType type = domi::TENSORFLOW; auto it = options_.find(FRAMEWORK_TYPE); if (it != options_.end()) { - type = static_cast(std::strtol(it->second.c_str(), nullptr, 10)); + type = static_cast(std::strtol(it->second.c_str(), nullptr, kBaseInt)); } fmk_type = ge::TypeUtils::FmkTypeToSerialString(type); GELOGI("Framework type is %s.", fmk_type.c_str()); diff --git a/ge/common/ge_common.mk b/ge/common/ge_common.mk index 3fffd203..e28090ad 100755 --- a/ge/common/ge_common.mk +++ b/ge/common/ge_common.mk @@ -7,6 +7,7 @@ GE_COMMON_LOCAL_SRC_FILES := \ helper/om_file_helper.cc \ helper/model_helper.cc \ ../model/ge_model.cc \ + ../model/ge_root_model.cc \ auth/file_saver.cc \ fp16_t.cc \ math/fp16_math.cc \ diff --git a/ge/common/helper/model_helper.cc b/ge/common/helper/model_helper.cc index 6f201461..aacef88c 100644 --- a/ge/common/helper/model_helper.cc +++ b/ge/common/helper/model_helper.cc @@ -32,6 +32,7 @@ using domi::ModelTaskDef; namespace { const int64_t kOriginalOmPartitionNum = 1; +const uint32_t kStatiOmFileModelNum = 1; } @@ -39,7 +40,7 @@ namespace ge { FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelHelper::~ModelHelper() { (void)ReleaseLocalModelData(); } Status ModelHelper::SaveModelPartition(std::shared_ptr &om_file_save_helper, ModelPartitionType type, - const uint8_t *data, size_t size) { + const uint8_t *data, size_t size, size_t model_index) { if (size < 1 || size > UINT32_MAX) { GELOGE(PARAM_INVALID, "Add model partition failed, partition size %zu invalid", size); if (size > UINT32_MAX) { @@ -68,25 +69,16 @@ Status ModelHelper::SaveModelPartition(std::shared_ptr &om_fil partition_model.data = const_cast(data); partition_model.size = static_cast(size); partition_model.type = type; - if (om_file_save_helper->AddPartition(partition_model) != SUCCESS) { + if (om_file_save_helper->AddPartition(partition_model, model_index) != SUCCESS) { GELOGE(PARAM_INVALID, "Add model partition failed, partition size %zu", size); return PARAM_INVALID; } return SUCCESS; } -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelHelper::SaveToOmModel(const GeModelPtr &ge_model, - const SaveParam &save_param, - const std::string &output_file, - ModelBufferData& model) { - if (output_file.empty()) { - GELOGE(FAILED, "GraphBuilder SaveModel received invalid file name prefix"); - return FAILED; - } - GE_IF_BOOL_EXEC(ge_model == nullptr, GELOGE(FAILED, "Ge_model is nullptr"); return FAILED); - std::shared_ptr om_file_save_helper = ge::MakeShared(); - GE_CHECK_NOTNULL(om_file_save_helper); +Status ModelHelper::SaveModelDef(std::shared_ptr &om_file_save_helper, + const GeModelPtr &ge_model, ge::Buffer &model_buffer, size_t model_index) { ModelPtr model_tmp = ge::MakeShared(ge_model->GetName(), ge_model->GetPlatformVersion()); if (model_tmp == nullptr) { GELOGE(FAILED, "Create Model %s Ptr failed", ge_model->GetName().c_str()); @@ -96,16 +88,21 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelHelper::SaveToOmMod model_tmp->SetVersion(ge_model->GetVersion()); model_tmp->SetAttr(ge_model->MutableAttrMap()); - ge::Buffer model_buffer; + (void)model_tmp->Save(model_buffer); GELOGD("MODEL_DEF size is %zu", model_buffer.GetSize()); if (model_buffer.GetSize() > 0) { if (SaveModelPartition(om_file_save_helper, ModelPartitionType::MODEL_DEF, model_buffer.GetData(), - model_buffer.GetSize()) != SUCCESS) { + model_buffer.GetSize(), model_index) != SUCCESS) { GELOGE(PARAM_INVALID, "Add model graph partition failed"); return PARAM_INVALID; } } + return SUCCESS; +} + +Status ModelHelper::SaveModelWeights(std::shared_ptr &om_file_save_helper, + const GeModelPtr &ge_model, size_t model_index) { auto ge_model_weight = ge_model->GetWeight(); GELOGD("WEIGHTS_DATA size is %zu, %p", ge_model_weight.GetSize(), ge_model_weight.GetData()); // weight is not necessary @@ -113,31 +110,43 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelHelper::SaveToOmMod GE_CHK_STATUS_RET(SaveModelPartition(om_file_save_helper, ModelPartitionType::WEIGHTS_DATA, ge_model_weight.GetData(), - ge_model_weight.GetSize()), "Add weight partition failed"); + ge_model_weight.GetSize(), model_index), "Add weight partition failed"); } + return SUCCESS; +} +Status ModelHelper::SaveModelTbeKernel(std::shared_ptr &om_file_save_helper, + const GeModelPtr &ge_model, size_t model_index) { TBEKernelStore tbe_kernel_store = ge_model->GetTBEKernelStore(); GELOGD("TBE_KERNELS size is %zu", tbe_kernel_store.DataSize()); if (tbe_kernel_store.DataSize() > 0) { - GE_CHK_STATUS_RET(SaveModelPartition(om_file_save_helper, - ModelPartitionType::TBE_KERNELS, - tbe_kernel_store.Data(), - tbe_kernel_store.DataSize()), "Add tbe kernel partition failed"); + GE_CHK_STATUS_RET( + SaveModelPartition(om_file_save_helper, ModelPartitionType::TBE_KERNELS, + ge_model->GetTBEKernelStore().Data(), ge_model->GetTBEKernelStore().DataSize(), + model_index), "Add tbe kernel partition failed"); } - // no need to check value, DATA->NetOutput (void)tbe_kernel_store.Load(tbe_kernel_store.Data(), tbe_kernel_store.DataSize()); + return SUCCESS; +} + +Status ModelHelper::SaveModelCustAICPU(std::shared_ptr &om_file_save_helper, + const GeModelPtr &ge_model, size_t model_index) { CustAICPUKernelStore cust_aicpu_kernel_store = ge_model->GetCustAICPUKernelStore(); GELOGD("cust aicpu kernels size is %zu", cust_aicpu_kernel_store.DataSize()); if (cust_aicpu_kernel_store.DataSize() > 0) { GE_CHK_STATUS_RET(SaveModelPartition(om_file_save_helper, ModelPartitionType::CUST_AICPU_KERNELS, - cust_aicpu_kernel_store.Data(), - cust_aicpu_kernel_store.DataSize()), + ge_model->GetCustAICPUKernelStore().Data(), + cust_aicpu_kernel_store.DataSize(), model_index), "Add cust aicpu kernel partition failed"); } + return SUCCESS; +} +Status ModelHelper::SaveModelTaskDef(std::shared_ptr &om_file_save_helper, + const GeModelPtr &ge_model, ge::Buffer &task_buffer, size_t model_index) { std::shared_ptr model_task_def = ge_model->GetModelTaskDefPtr(); if (model_task_def == nullptr) { GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Create model task def ptr failed"); @@ -146,9 +155,9 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelHelper::SaveToOmMod size_t partition_task_size = model_task_def->ByteSizeLong(); GE_IF_BOOL_EXEC(partition_task_size == 0 || partition_task_size > INT_MAX, GELOGE(FAILED, "Model_def's byte size (%zu) is invalid!", partition_task_size); - return FAILED); + return FAILED); - ge::Buffer task_buffer(partition_task_size); + task_buffer = ge::Buffer(partition_task_size); if (task_buffer.GetSize() == 0) { GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Alloc model task def buffer failed"); return ACL_ERROR_GE_MEMORY_ALLOCATION; @@ -159,21 +168,28 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelHelper::SaveToOmMod GELOGD("TASK_INFO size is %zu", partition_task_size); if (SaveModelPartition(om_file_save_helper, ModelPartitionType::TASK_INFO, task_buffer.GetData(), - partition_task_size) != SUCCESS) { + partition_task_size, model_index) != SUCCESS) { GELOGE(PARAM_INVALID, "Add model task def partition failed"); return PARAM_INVALID; } + return SUCCESS; +} + +Status ModelHelper::SaveModelHeader(std::shared_ptr &om_file_save_helper, + const GeModelPtr &ge_model, size_t model_num) { // Save target/version to model_header ModelFileHeader &model_header = om_file_save_helper->GetModelFileHeader(); model_header.platform_type = ge_model->GetPlatformType(); model_header.om_ir_version = ge_model->GetVersion(); + model_header.model_num = model_num; std::string platform_version = ge_model->GetPlatformVersion(); errno_t err; err = memcpy_s(model_header.platform_version, PLATFORM_VERSION_LEN, platform_version.c_str(), platform_version.size() + 1); if (err != EOK) { - GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "ModelHelper SaveModel failed while allocating memory for platform_version."); + GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, + "ModelHelper SaveModel failed while allocating memory for platform_version."); return ACL_ERROR_GE_MEMORY_ALLOCATION; } string version = reinterpret_cast(model_header.platform_version); @@ -188,8 +204,142 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelHelper::SaveToOmMod } string model_name = reinterpret_cast(model_header.name); GELOGD("Model name save:%s", model_name.c_str()); + return SUCCESS; +} + +Status ModelHelper::SaveAllModelPartiton(std::shared_ptr& om_file_save_helper, + const GeModelPtr &ge_model, ge::Buffer &model_buffer, + ge::Buffer &task_buffer, size_t model_index) { + if (SaveModelDef(om_file_save_helper, ge_model, model_buffer, model_index) != SUCCESS) { + GELOGE(FAILED, "save model def failed"); + return FAILED; + } + + if (SaveModelWeights(om_file_save_helper, ge_model, model_index) != SUCCESS) { + GELOGE(FAILED, "save model weights failed"); + return FAILED; + } + + if (SaveModelTbeKernel(om_file_save_helper, ge_model, model_index) != SUCCESS) { + GELOGE(FAILED, "save model tbe kernel failed"); + return FAILED; + } + + if (SaveModelCustAICPU(om_file_save_helper, ge_model, model_index) != SUCCESS) { + GELOGE(FAILED, "save model cust ai cpu failed"); + return FAILED; + } + + + if (SaveModelTaskDef(om_file_save_helper, ge_model, task_buffer, model_index) != SUCCESS) { + GELOGE(FAILED, "save task def failed"); + return FAILED; + } + return SUCCESS; +} + +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelHelper::SaveToOmModel(const GeModelPtr &ge_model, + const SaveParam &save_param, + const std::string &output_file, + ModelBufferData& model) { + if (output_file.empty()) { + GELOGE(FAILED, "GraphBuilder SaveModel received invalid file name prefix"); + return FAILED; + } - Status ret = om_file_save_helper->SaveModel(save_param, output_file.c_str(), model, is_offline_); + GE_IF_BOOL_EXEC(ge_model == nullptr, GELOGE(FAILED, "Ge_model is nullptr"); return FAILED); + std::shared_ptr om_file_save_helper = ge::MakeShared(); + GE_CHECK_NOTNULL(om_file_save_helper); + ge::Buffer model_buffer; + ge::Buffer task_buffer; + + auto ret = SaveAllModelPartiton(om_file_save_helper, ge_model, model_buffer, task_buffer); + if (ret != SUCCESS) { + GELOGE(ret, "save all model partition failed"); + return ret; + } + + ret = SaveModelHeader(om_file_save_helper, ge_model); + if (ret != SUCCESS) { + GELOGE(ret, "save model header failed"); + return ret; + } + + ret = om_file_save_helper->SaveModel(save_param, output_file.c_str(), model, is_offline_); + if (ret != SUCCESS) { + GELOGE(FAILED, "OmFileSaveHelper SaveModel return fail."); + return ret; + } + return SUCCESS; +} + +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelHelper::SaveToOmRootModel( + const GeRootModelPtr &ge_root_model, + const SaveParam &save_param, + const std::string &output_file, + ModelBufferData& model, + bool is_unknown_shape) { + + GE_CHECK_NOTNULL(ge_root_model); + GE_IF_BOOL_EXEC(ge_root_model == nullptr, GELOGE(FAILED, "Ge_root_model is nullptr"); return FAILED); + + auto &name_to_ge_model = ge_root_model->GetSubgraphInstanceNameToModel(); + GE_IF_BOOL_EXEC(name_to_ge_model.empty(), GELOGE(FAILED, "Ge_root_model has no sub model"); return FAILED); + GE_IF_BOOL_EXEC(output_file.empty(), + GELOGE(FAILED, "GraphBuilder SaveModel received invalid file name prefix"); + return FAILED); + + if (!is_unknown_shape) { + auto &model_root = name_to_ge_model.begin()->second; + return SaveToOmModel(model_root, save_param, output_file, model); + } + + std::shared_ptr om_file_save_helper = ge::MakeShared(); + GE_CHECK_NOTNULL(om_file_save_helper); + + auto &first_ge_model = name_to_ge_model.at(ge_root_model->GetRootGraph()->GetName()); + + // ge root model must be the first to be loaded + vector model_names{ge_root_model->GetRootGraph()->GetName()}; + for (auto &item : name_to_ge_model) { + if (item.first != model_names.front()) { + model_names.emplace_back(item.first); + } + } + + vector model_buffers(model_names.size()); + vector task_buffers(model_names.size()); + + size_t cur_index = 0; + + if (model_names.size() > 1) { + GELOGD("only save first model MODEL_DEF"); + if (SaveModelDef(om_file_save_helper, first_ge_model, model_buffers[cur_index], cur_index) != SUCCESS) { + GELOGE(FAILED, "save model def failed"); + return FAILED; + } + ++cur_index; + } + + for (; cur_index < model_names.size(); ++cur_index) { + auto model_name = model_names[cur_index]; + GELOGD("cur model %s index is %zu", model_name.c_str(), cur_index); + const GeModelPtr &ge_model = name_to_ge_model.at(model_name); + auto ret = SaveAllModelPartiton(om_file_save_helper, ge_model, model_buffers[cur_index], + task_buffers[cur_index], cur_index); + if (ret != SUCCESS) { + GELOGE(INTERNAL_ERROR, "Save model %s failed", model_name.c_str()); + return INTERNAL_ERROR; + } + } + + auto ret = SaveModelHeader(om_file_save_helper, first_ge_model, model_names.size()); + if (ret != SUCCESS) { + GELOGE(INTERNAL_ERROR, "Save model %s header failed", first_ge_model->GetName().c_str()); + return INTERNAL_ERROR; + } + + ret = om_file_save_helper->SaveRootModel(save_param, output_file.c_str(), model, is_offline_); if (ret != SUCCESS) { GELOGE(FAILED, "OmFileSaveHelper SaveModel return fail."); return FAILED; @@ -288,7 +438,6 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelHelper::LoadModel(c } file_header_ = reinterpret_cast(model_data.model_data); - OmFileLoadHelper om_load_helper; status = om_load_helper.Init(model_addr_tmp_, model_len_tmp_); if (status != SUCCESS) { @@ -310,7 +459,61 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelHelper::LoadModel(c GELOGE(status, "GenerateGeModel failed"); return status; } + GELOGD("in ModelHelper::LoadModel, is_assign_model_ is setted to true!"); + is_assign_model_ = true; + return SUCCESS; +} + +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelHelper::LoadRootModel(const ge::ModelData &model_data) { + if (model_data.model_data == nullptr || model_data.model_len == 0) { + GELOGE(GE_EXEC_MODEL_DATA_SIZE_INVALID, "Model_data is nullptr, or model_data_size is 0"); + return GE_EXEC_MODEL_DATA_SIZE_INVALID; + } + + if (is_assign_model_) { + GELOGE(GE_EXEC_LOAD_MODEL_REPEATED, "Model helper has already loaded!"); + return GE_EXEC_LOAD_MODEL_REPEATED; + } + if (ReleaseLocalModelData() != SUCCESS) { + GELOGE(INTERNAL_ERROR, "ReleaseLocalModelData failed."); + return INTERNAL_ERROR; + } + + Status status = ge::DavinciModelParser::ParseModelContent(model_data, model_addr_tmp_, model_len_tmp_); + if (status != SUCCESS) { + GELOGE(status, "Parse model content failed!"); + return status; + } + + file_header_ = reinterpret_cast(model_data.model_data); + + //model verison 1.0 file header does not have model_num member + is_unknown_shape_model_ = file_header_->version >= ge::MODEL_VERSION && + file_header_->model_num > kStatiOmFileModelNum; + GELOGD("cur om model is ge root model or no %d, model version %zu", is_unknown_shape_model_, file_header_->version); + + OmFileLoadHelper om_load_helper; + if (is_unknown_shape_model_) { + auto model_num = file_header_->model_num; + status = om_load_helper.Init(model_addr_tmp_, model_len_tmp_, model_num); + } else { + status = om_load_helper.Init(model_addr_tmp_, model_len_tmp_); + } + if (status != SUCCESS) { + GELOGE(status, "Om_load_helper init failed"); + model_addr_tmp_ = nullptr; + return status; + } + // Encrypt model need to del temp model/no encrypt model don't need to del model + model_addr_tmp_ = nullptr; + + status = GenerateGeRootModel(om_load_helper); + if (status != SUCCESS) { + GELOGE(status, "GenerateGeRootModel failed"); + return status; + } + GELOGD("in ModelHelper::LoadRootModel, is_assign_model_ is setted to true!"); is_assign_model_ = true; return SUCCESS; } @@ -341,6 +544,61 @@ Status ModelHelper::GenerateGeModel(OmFileLoadHelper &om_load_helper) { return SUCCESS; } +Status ModelHelper::GenerateGeRootModel(OmFileLoadHelper &om_load_helper) { + GELOGD("Begin to generate ge root model"); + root_model_ = ge::MakeShared(); + GE_CHECK_NOTNULL(root_model_); + if (!is_unknown_shape_model_) { + if (GenerateGeModel(om_load_helper) != SUCCESS) { + GELOGE(FAILED, "GenerateGeModel failed"); + return FAILED; + } + GE_CHECK_NOTNULL(model_); + root_model_->SetRootGraph(GraphUtils::GetComputeGraph(model_->GetGraph())); + return SUCCESS; + } + + bool is_first_model = true; + for (size_t mode_index = 0; mode_index < file_header_->model_num; ++mode_index) { + GeModelPtr cur_model = ge::MakeShared(); + Status ret = LoadModelData(om_load_helper, cur_model, mode_index); + if (ret != SUCCESS) { + return GE_EXEC_LOAD_MODEL_PARTITION_FAILED; + } + + if (is_first_model) { + is_first_model = false; + root_model_->SetRootGraph(GraphUtils::GetComputeGraph(cur_model->GetGraph())); + root_model_->SetModelId(cur_model->GetModelId()); + model_ = cur_model; + continue; + } + + ret = LoadWeights(om_load_helper, cur_model, mode_index); + if (ret != SUCCESS) { + return GE_EXEC_LOAD_WEIGHT_PARTITION_FAILED; + } + + ret = LoadTBEKernelStore(om_load_helper, cur_model, mode_index); + if (ret != SUCCESS) { + return GE_EXEC_LOAD_KERNEL_PARTITION_FAILED; + } + + ret = LoadCustAICPUKernelStore(om_load_helper, cur_model, mode_index); + if (ret != SUCCESS) { + return GE_EXEC_LOAD_KERNEL_PARTITION_FAILED; + } + + ret = LoadTask(om_load_helper, cur_model, mode_index); + if (ret != SUCCESS) { + return GE_EXEC_LOAD_TASK_PARTITION_FAILED; + } + root_model_->SetSubgraphInstanceNameToModel(cur_model->GetName(), cur_model); + } + + return SUCCESS; +} + Status ModelHelper::LoadModelData(OmFileLoadHelper &om_load_helper) { ModelPartition partition_model_def; // no need to check value, DATA->NetOutput @@ -366,6 +624,28 @@ void ModelHelper::SetModelToGeModel(ge::Model &model) { model_->SetAttr(model.MutableAttrMap()); } +Status ModelHelper::LoadModelData(OmFileLoadHelper &om_load_helper, GeModelPtr &cur_model, size_t mode_index) { + ModelPartition partition_model_def; + // no need to check value, DATA->NetOutput + om_load_helper.GetModelPartition(ModelPartitionType::MODEL_DEF, partition_model_def, mode_index); + GELOGD("Model_def partition addr:%p,size:%u", partition_model_def.data, partition_model_def.size); + + ge::Model model; + if (ge::Model::Load(partition_model_def.data, partition_model_def.size, model) != SUCCESS) { + GELOGE(INTERNAL_ERROR, "Load model failed."); + return INTERNAL_ERROR; + } + + cur_model->SetGraph(model.GetGraph()); + cur_model->SetName(model.GetName()); + cur_model->SetVersion(model.GetVersion()); + cur_model->SetPlatformVersion(model.GetPlatformVersion()); + cur_model->SetAttr(model.MutableAttrMap()); + + return SUCCESS; +} + + Status ModelHelper::LoadWeights(OmFileLoadHelper &om_load_helper) { ModelPartition partition; if (om_load_helper.GetModelPartition(ModelPartitionType::WEIGHTS_DATA, partition) != SUCCESS) { @@ -379,6 +659,19 @@ Status ModelHelper::LoadWeights(OmFileLoadHelper &om_load_helper) { return SUCCESS; } +Status ModelHelper::LoadWeights(OmFileLoadHelper &om_load_helper, GeModelPtr &cur_model, size_t mode_index) { + ModelPartition partition; + if (om_load_helper.GetModelPartition(ModelPartitionType::WEIGHTS_DATA, partition, mode_index) != SUCCESS) { + GELOGE(FAILED, "Get weight model partition failed."); + return FAILED; + } + ge::Buffer weight = ge::Buffer::CopyFrom(partition.data, partition.size); + cur_model->SetWeight(weight); + + GELOGD("GetWeight size:%u", partition.size); + return SUCCESS; +} + FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelHelper::LoadTask(OmFileLoadHelper &om_load_helper) { ModelPartition task_partition; if (om_load_helper.GetModelPartition(ModelPartitionType::TASK_INFO, task_partition) != SUCCESS) { @@ -398,6 +691,27 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelHelper::LoadTask(Om return SUCCESS; } +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelHelper::LoadTask(OmFileLoadHelper &om_load_helper, + GeModelPtr &cur_model, + size_t mode_index) { + ModelPartition task_partition; + if (om_load_helper.GetModelPartition(ModelPartitionType::TASK_INFO, task_partition, mode_index) != SUCCESS) { + GELOGE(FAILED, "Get task model partition failed."); + return FAILED; + } + std::shared_ptr task = ge::MakeShared(); + GE_CHECK_NOTNULL(task); + if (task_partition.size != 0) { + if (!ReadProtoFromArray(task_partition.data, task_partition.size, task.get())) { + GELOGE(INTERNAL_ERROR, "ReadProtoFromArray failed."); + return INTERNAL_ERROR; + } + GELOGD("TASK_INFO op_size:%zu, stream_num:%u", task->op().size(), task->stream_num()); + } + cur_model->SetModelTaskDef(task); + return SUCCESS; +} + Status ModelHelper::LoadTBEKernelStore(OmFileLoadHelper &om_load_helper) { // Load tbe kernels ModelPartition partition_kernel_def; @@ -414,6 +728,23 @@ Status ModelHelper::LoadTBEKernelStore(OmFileLoadHelper &om_load_helper) { return SUCCESS; } +Status ModelHelper::LoadTBEKernelStore(OmFileLoadHelper &om_load_helper, GeModelPtr &cur_model, size_t mode_index) { + // Load tbe kernels + ModelPartition partition_kernel_def; + TBEKernelStore kernel_store; + if (om_load_helper.GetModelPartition(ModelPartitionType::TBE_KERNELS, partition_kernel_def, mode_index) == + SUCCESS) { + GELOGD("Kernels partition size:%u", partition_kernel_def.size); + if (kernel_store.Load(partition_kernel_def.data, partition_kernel_def.size)) { + GELOGD("Load tbe kernels success"); + } else { + GELOGW("Load tbe kernels failed"); + } + } + cur_model->SetTBEKernelStore(kernel_store); + return SUCCESS; +} + Status ModelHelper::LoadCustAICPUKernelStore(OmFileLoadHelper &om_load_helper) { // Load cust aicpu kernels ModelPartition partition_kernel_def; @@ -421,19 +752,39 @@ Status ModelHelper::LoadCustAICPUKernelStore(OmFileLoadHelper &om_load_helper) { if (om_load_helper.GetModelPartition(ModelPartitionType::CUST_AICPU_KERNELS, partition_kernel_def) == SUCCESS) { GELOGD("Kernels partition size:%u", partition_kernel_def.size); if (kernel_store.Load(partition_kernel_def.data, partition_kernel_def.size)) { - GELOGI("Load cust aicpu kernels success"); + GELOGD("Load cust aicpu kernels success"); + } else { + GELOGW("Load cust aicpu kernels failed"); } } model_->SetCustAICPUKernelStore(kernel_store); return SUCCESS; } +Status ModelHelper::LoadCustAICPUKernelStore(OmFileLoadHelper &om_load_helper, + GeModelPtr &cur_model, size_t mode_index) { + // Load cust aicpu kernels + ModelPartition partition_kernel_def; + CustAICPUKernelStore kernel_store; + if (om_load_helper.GetModelPartition(ModelPartitionType::CUST_AICPU_KERNELS, partition_kernel_def, mode_index) + == SUCCESS) { + GELOGD("Kernels partition size:%u", partition_kernel_def.size); + if (kernel_store.Load(partition_kernel_def.data, partition_kernel_def.size)) { + GELOGD("Load cust aicpu kernels success"); + } else { + GELOGW("Load cust aicpu kernels failed"); + } + } + cur_model->SetCustAICPUKernelStore(kernel_store); + return SUCCESS; +} + FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY GeModelPtr ModelHelper::GetGeModel() { if (model_ != nullptr) { return model_; } - GELOGI("Model has not been loaded!"); + GELOGD("Model has not been loaded!"); std::shared_ptr out_model = ge::MakeShared(); if (out_model == nullptr) { return nullptr; @@ -441,6 +792,20 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY GeModelPtr ModelHelper::GetGeMo return out_model; } +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY GeRootModelPtr ModelHelper::GetGeRootModel() { + if (root_model_ != nullptr) { + return root_model_; + } + + GELOGD("Model has not been loaded!"); + std::shared_ptr out_model = ge::MakeShared(); + if (out_model == nullptr) { + return nullptr; + } + return out_model; +} + + Status ModelHelper::ReleaseLocalModelData() noexcept { Status result = SUCCESS; if (model_addr_tmp_ != nullptr) { diff --git a/ge/common/helper/om_file_helper.cc b/ge/common/helper/om_file_helper.cc index ce88cd08..d1c52b13 100644 --- a/ge/common/helper/om_file_helper.cc +++ b/ge/common/helper/om_file_helper.cc @@ -52,6 +52,17 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status OmFileLoadHelper::Init(u return SUCCESS; } +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status OmFileLoadHelper::Init(uint8_t *model_data, + uint32_t model_data_size, + uint32_t model_num) { + Status status = LoadModelPartitionTable(model_data, model_data_size, model_num); + if (status != SUCCESS) { + return status; + } + is_inited_ = true; + return SUCCESS; +} + // Use both FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status OmFileLoadHelper::GetModelPartition(ModelPartitionType type, ModelPartition &partition) { @@ -79,6 +90,37 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status OmFileLoadHelper::GetMod return SUCCESS; } +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status OmFileLoadHelper::GetModelPartition(ModelPartitionType type, + ModelPartition &partition, + size_t model_index) { + if (!is_inited_) { + GELOGE(PARAM_INVALID, "OmFileLoadHelper has not been initialized!"); + return PARAM_INVALID; + } + if (model_index >= model_contexts_.size()) { + GELOGE(PARAM_INVALID, "cur index : %zu, model_contexts size:%zu", model_index, model_contexts_.size()); + return PARAM_INVALID; + } + auto &cur_ctx = model_contexts_[model_index]; + bool found = false; + for (ModelPartition &part : cur_ctx.partition_datas_) { + if (part.type == type) { + partition = part; + found = true; + break; + } + } + + if (!found) { + if (type != ModelPartitionType::TBE_KERNELS && type != ModelPartitionType::WEIGHTS_DATA && + type != ModelPartitionType::CUST_AICPU_KERNELS) { + GELOGE(FAILED, "GetModelPartition:type:%d is not in partition_datas!", static_cast(type)); + return FAILED; + } + } + return SUCCESS; +} + Status OmFileLoadHelper::CheckModelValid(const ge::ModelData &model) const { // Parameter validity check if (model.model_data == nullptr) { @@ -138,7 +180,8 @@ Status OmFileLoadHelper::LoadModelPartitionTable(uint8_t *model_data, const uint context_.partition_datas_.push_back(partition); if (partition.size > model_data_size || mem_offset > model_data_size - partition.size) { - GELOGE(ACL_ERROR_GE_EXEC_MODEL_DATA_SIZE_INVALID, "The partition size %zu is greater than the model data size %u.", + GELOGE(ACL_ERROR_GE_EXEC_MODEL_DATA_SIZE_INVALID, + "The partition size %zu is greater than the model data size %u.", partition.size + mem_offset, model_data_size); return ACL_ERROR_GE_EXEC_MODEL_DATA_SIZE_INVALID; } @@ -148,6 +191,61 @@ Status OmFileLoadHelper::LoadModelPartitionTable(uint8_t *model_data, const uint return SUCCESS; } +Status OmFileLoadHelper::LoadModelPartitionTable(uint8_t *model_data, uint32_t model_data_size, uint32_t model_num) { + if (model_data == nullptr) { + GELOGE(PARAM_INVALID, "Param model_data must not be null!"); + return PARAM_INVALID; + } + + uint32_t cur_offset = 0; + for (uint32_t index = 0; index < model_num; ++index) { + // Init partition table + auto partition_table = reinterpret_cast(model_data + cur_offset); + size_t partition_table_size = SIZE_OF_MODEL_PARTITION_TABLE(*partition_table); + cur_offset += partition_table_size; + GELOGD("Cur model index %zu: ModelPartitionTable num :%u, " + "ModelFileHeader length :%zu, ModelPartitionTable length :%zu", + index, partition_table->num, sizeof(ModelFileHeader), partition_table_size); + if (model_data_size <= cur_offset) { + GELOGE(GE_EXEC_MODEL_DATA_SIZE_INVALID, "invalid model data, partition_table->num:%u, model data size %u", + partition_table->num, model_data_size); + return GE_EXEC_MODEL_DATA_SIZE_INVALID; + } + + for (uint32_t i = 0; i < partition_table->num; i++) { + ModelPartition partition; + partition.size = partition_table->partition[i].mem_size; + partition.data = model_data + cur_offset; + partition.type = partition_table->partition[i].type; + if (index >= model_contexts_.size()) { + if (index != model_contexts_.size()) { + GELOGE(FAILED, "cur index is %zu make model_contexts_ overflow", index); + return FAILED; + } + + OmFileContext tmp_ctx; + tmp_ctx.partition_datas_.push_back(partition); + model_contexts_.push_back(tmp_ctx); + } else { + model_contexts_[index].partition_datas_.push_back(partition); + } + + if (partition.size > model_data_size || cur_offset > model_data_size - partition.size) { + GELOGE(GE_EXEC_MODEL_DATA_SIZE_INVALID, "The partition size %zu is greater than the model data size %u.", + partition.size + cur_offset, model_data_size); + return GE_EXEC_MODEL_DATA_SIZE_INVALID; + } + cur_offset += partition.size; + GELOGD("Partition, type:%d, size:%u, model_index:%zu", static_cast(partition.type), partition.size, index); + } + } + if (cur_offset != model_data_size) { + GELOGE(FAILED, "do not get the complete model, read end offset:%zu, all size:%zu", cur_offset, model_data_size); + return FAILED; + } + return SUCCESS; +} + FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::vector &OmFileSaveHelper::GetModelPartitions() const { return context_.partition_datas_; @@ -172,6 +270,28 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelPartitionTable *OmFileSave return partition_table; } +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelPartitionTable *OmFileSaveHelper::GetPartitionTable( + size_t cur_ctx_index) { + auto &cur_ctx = model_contexts_[cur_ctx_index]; + auto partition_size = static_cast(cur_ctx.partition_datas_.size()); + // Build ModelPartitionTable, flex array + cur_ctx.partition_table_.clear(); + cur_ctx.partition_table_.resize(sizeof(ModelPartitionTable) + sizeof(ModelPartitionMemInfo) * partition_size, 0); + + auto partition_table = reinterpret_cast(cur_ctx.partition_table_.data()); + partition_table->num = partition_size; + + uint32_t mem_offset = 0; + for (uint32_t i = 0; i < partition_size; i++) { + ModelPartition partition = cur_ctx.partition_datas_[i]; + partition_table->partition[i] = {partition.type, mem_offset, partition.size}; + mem_offset += partition.size; + GELOGD("Partition, type:%d, size:%u", static_cast(partition.type), partition.size); + } + return partition_table; +} + + FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status OmFileSaveHelper::AddPartition(ModelPartition &partition) { if (ge::CheckUint32AddOverflow(context_.model_data_len_, partition.size) != SUCCESS) { GELOGE(FAILED, "UINT32 %u and %u addition can result in overflow!", context_.model_data_len_, partition.size); @@ -182,6 +302,27 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status OmFileSaveHelper::AddPar return SUCCESS; } +Status OmFileSaveHelper::AddPartition(ModelPartition &partition, size_t cur_index) { + if (ge::CheckUint32AddOverflow(context_.model_data_len_, partition.size) != SUCCESS) { + GELOGE(FAILED, "UINT32 %u and %u addition can result in overflow!", context_.model_data_len_, partition.size); + return FAILED; + } + if (cur_index >= model_contexts_.size()) { + if (cur_index != model_contexts_.size()) { + GELOGE(FAILED, "cur index is %zu make model_contexts_ overflow", cur_index); + return FAILED; + } + OmFileContext tmp_ctx; + tmp_ctx.model_data_len_ += partition.size; + tmp_ctx.partition_datas_.push_back(partition); + model_contexts_.push_back(tmp_ctx); + } else { + model_contexts_[cur_index].model_data_len_ += partition.size; + model_contexts_[cur_index].partition_datas_.push_back(partition); + } + return SUCCESS; +} + Status OmFileSaveHelper::SaveModel(const SaveParam &save_param, const char *output_file, ModelBufferData &model, bool is_offline) { (void)save_param.cert_file; @@ -198,6 +339,10 @@ Status OmFileSaveHelper::SaveModel(const SaveParam &save_param, const char *outp Status OmFileSaveHelper::SaveModelToFile(const char *output_file, ModelBufferData &model, bool is_offline) { #if !defined(NONSUPPORT_SAVE_TO_FILE) + if (context_.partition_datas_.empty()) { + GE_CHK_BOOL_EXEC(!model_contexts_.empty(), return FAILED, "mode contexts empty"); + context_ = model_contexts_.front(); + } uint32_t model_data_len = context_.model_data_len_; if (model_data_len == 0) { GELOGE(domi::PARAM_INVALID, "Model data len error! should not be 0"); @@ -231,4 +376,53 @@ Status OmFileSaveHelper::SaveModelToFile(const char *output_file, ModelBufferDat return SUCCESS; #endif } + +Status OmFileSaveHelper::SaveRootModel(const SaveParam &save_param, const char *output_file, + ModelBufferData &model, bool is_offline) { + (void)save_param.cert_file; + (void)save_param.ek_file; + (void)save_param.encode_mode; + (void)save_param.hw_key_file; + (void)save_param.pri_key_file; + +#if !defined(NONSUPPORT_SAVE_TO_FILE) + vector model_partition_tabels; + vector> all_model_partitions; + for (size_t ctx_index = 0; ctx_index < model_contexts_.size(); ++ctx_index) { + auto &cur_ctx = model_contexts_[ctx_index]; + uint32_t cur_model_data_len = cur_ctx.model_data_len_; + if (cur_model_data_len == 0) { + GELOGE(domi::PARAM_INVALID, "Model data len error! should not be 0"); + return domi::PARAM_INVALID; + } + + auto tmp_table = GetPartitionTable(ctx_index); + if (tmp_table == nullptr) { + GELOGE(ge::GE_GRAPH_SAVE_FAILED, "SaveModelToFile execute failed: partition_table is NULL."); + return ge::GE_GRAPH_SAVE_FAILED; + } + uint32_t size_of_table = SIZE_OF_MODEL_PARTITION_TABLE(*tmp_table); + FMK_UINT32_ADDCHECK(size_of_table, cur_model_data_len) + FMK_UINT32_ADDCHECK(size_of_table + cur_model_data_len, model_header_.length) + model_header_.length += size_of_table + cur_model_data_len; + model_partition_tabels.push_back(tmp_table); + all_model_partitions.push_back(cur_ctx.partition_datas_); + GELOGD("sizeof(ModelPartitionTable):%u, cur_model_data_len:%u, cur_context_index:%zu", + size_of_table, cur_model_data_len, ctx_index); + } + Status ret; + if (is_offline) { + ret = FileSaver::SaveToFile(output_file, model_header_, model_partition_tabels, all_model_partitions); + } else { + GELOGW("do not support save ge root model to buff now"); + return FAILED; + } + if (ret == SUCCESS) { + GELOGD("Save model success without encrypt."); + } + return ret; +#else + return SUCCESS; +#endif +} } // namespace ge diff --git a/ge/common/op/ge_op_utils.cc b/ge/common/op/ge_op_utils.cc index 579190d6..fc2990b6 100644 --- a/ge/common/op/ge_op_utils.cc +++ b/ge/common/op/ge_op_utils.cc @@ -357,7 +357,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void OpUtils::TransDataHWCK2KCH const char *w_data = (const char *)input; int64_t count = h * w * c * k; - GE_IF_BOOL_EXEC(count <= 0, GELOGW("Count value must be greater than 0, but count = %ld", count); return ); + GE_IF_BOOL_EXEC(count <= 0, GELOGW("Count value must be greater than 0, but count = %ld", count); return); float *buf = new (std::nothrow) float[count](); GE_RT_VOID_CHECK_NOTNULL(buf); float *src_buff = nullptr; diff --git a/ge/common/profiling/ge_profiling.cc b/ge/common/profiling/ge_profiling.cc new file mode 100644 index 00000000..640f77a1 --- /dev/null +++ b/ge/common/profiling/ge_profiling.cc @@ -0,0 +1,199 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "common/profiling/ge_profiling.h" +#include "runtime/base.h" +#include "common/profiling/profiling_manager.h" +#include "framework/common/debug/ge_log.h" +#include "framework/common/debug/log.h" +#include "graph/load/graph_loader.h" +#include "init/gelib.h" +#include "framework/common/ge_inner_error_codes.h" + +namespace { +const uint32_t kDeviceListIndex = 3; +const std::string kDeviceNums = "devNums"; +const std::string kDeviceIdList = "devIdList"; +const std::string kProfilingInit = "prof_init"; +const std::string kProfilingFinalize = "prof_finalize"; +const std::string kProfilingStart = "prof_start"; +const std::string kProfilingStop = "prof_stop"; +const std::string kProfModelSubscribe = "prof_model_subscribe"; +const std::string kProfModelUnsubscribe = "prof_model_cancel_subscribe"; +const std::string kRtSetDeviceRegName = "profiling"; + +const std::map kProfCommandTypeMap = { + {kProfCommandhandleInit, kProfilingInit}, + {kProfCommandhandleStart, kProfilingStart}, + {kProfCommandhandleStop, kProfilingStop}, + {kProfCommandhandleFinalize, kProfilingFinalize}, + {kProfCommandhandleModelSubscribe, kProfModelSubscribe}, + {kProfCommandhandleModelUnsubscribe, kProfModelUnsubscribe}}; +} // namespace + +bool TransProfConfigToParam(const ProfCommandHandleData &profCommand, vector &prof_config_params) { + prof_config_params.clear(); + prof_config_params.emplace_back(kDeviceNums); + prof_config_params.emplace_back(std::to_string(profCommand.devNums)); + prof_config_params.emplace_back(kDeviceIdList); + std::string devID = ""; + if (profCommand.devNums == 0) { + GELOGW("The device num is invalid."); + return false; + } + for (uint32_t i = 0; i < profCommand.devNums; i++) { + devID.append(std::to_string(profCommand.devIdList[i])); + if (i != profCommand.devNums - 1) { + devID.append(","); + } + } + + prof_config_params.push_back(devID); + return true; +} + +bool isProfConfigValid(const uint32_t *deviceid_list, uint32_t device_nums) { + if (deviceid_list == nullptr) { + GELOGE(ge::PARAM_INVALID, "deviceIdList is nullptr"); + return false; + } + if (device_nums == 0 || device_nums > MAX_DEV_NUM) { + GELOGE(ge::PARAM_INVALID, "The device nums: %u is invalid.", device_nums); + return false; + } + + // real device num + int32_t dev_count = 0; + rtError_t rt_err = rtGetDeviceCount(&dev_count); + if (rt_err != RT_ERROR_NONE) { + GELOGE(ge::INTERNAL_ERROR, "Get the Device count fail."); + return false; + } + + if (device_nums > static_cast(dev_count)) { + GELOGE(ge::PARAM_INVALID, "Device num(%u) is not in range 1 ~ %d.", device_nums, dev_count); + return false; + } + + std::unordered_set record; + for (size_t i = 0; i < device_nums; ++i) { + uint32_t dev_id = deviceid_list[i]; + if (dev_id >= static_cast(dev_count)) { + GELOGE(ge::PARAM_INVALID, "Device id %u is not in range 0 ~ %d(exclude %d)", dev_id, dev_count, dev_count); + return false; + } + if (record.count(dev_id) > 0) { + GELOGE(ge::PARAM_INVALID, "Device id %u is duplicatedly set", dev_id); + return false; + } + record.insert(dev_id); + } + return true; +} + +ge::Status RegProfCtrlCallback(MsprofCtrlCallback func) { + if (func == nullptr) { + GELOGE(ge::PARAM_INVALID, "Msprof ctrl callback is nullptr."); + return ge::PARAM_INVALID; + } + if (ge::ProfilingManager::Instance().GetMsprofCallback().msprofCtrlCallback != nullptr) { + GELOGW("Msprof ctrl callback is exist, just ignore it."); + } else { + GELOGI("GE register Msprof ctrl callback."); + ge::ProfilingManager::Instance().SetMsprofCtrlCallback(func); + } + return ge::SUCCESS; +} + +ge::Status RegProfSetDeviceCallback(MsprofSetDeviceCallback func) { + if (func == nullptr) { + GELOGE(ge::PARAM_INVALID, "MsprofSetDeviceCallback callback is nullptr."); + return ge::PARAM_INVALID; + } + // Pass MsprofSetDeviceCallback to runtime + GELOGI("GE pass setdevice callback to runtime."); + ge::Status rt_ret = rtRegDeviceStateCallback(kRtSetDeviceRegName.c_str(), static_cast(func)); + if (rt_ret != ge::SUCCESS) { + GELOGE(rt_ret, "Pass MsprofSetDeviceCallback to runtime failed!"); + return rt_ret; + } + return ge::SUCCESS; +} + +ge::Status RegProfReporterCallback(MsprofReporterCallback func) { + if (func == nullptr) { + GELOGE(ge::PARAM_INVALID, "MsprofReporterCallback callback is nullptr."); + return ge::PARAM_INVALID; + } + if (ge::ProfilingManager::Instance().GetMsprofCallback().msprofReporterCallback != nullptr) { + GELOGW("Msprof reporter callback is exist, just ignore it."); + } else { + GELOGI("GE register Msprof reporter callback."); + ge::ProfilingManager::Instance().SetMsprofReporterCallback(func); + // Pass MsprofReporterCallback to runtime + ge::Status rt_ret = rtSetMsprofReporterCallback(func); + if (rt_ret != ge::SUCCESS) { + GELOGE(rt_ret, "Pass MsprofReporterCallback to runtime failed!!"); + return rt_ret; + } + // Pass MsprofReporterCallback to hccl + } + return ge::SUCCESS; +} + +ge::Status ProfCommandHandle(ProfCommandHandleType type, void *data, uint32_t len) { + if (type != kProfCommandhandleFinalize) { + GE_CHECK_NOTNULL(data); + } + ProfCommandHandleData *prof_config_param = (ProfCommandHandleData *)data; + auto iter = kProfCommandTypeMap.find(type); + if (iter == kProfCommandTypeMap.end()) { + GELOGW("The prof comand type is invalid."); + return ge::PARAM_INVALID; + } + std::vector prof_params; + if (type == kProfCommandhandleStart || type == kProfCommandhandleStop) { + if (!isProfConfigValid(prof_config_param->devIdList, prof_config_param->devNums)) { + return ge::FAILED; + } + + if (!TransProfConfigToParam(*prof_config_param, prof_params)) { + GELOGE(ge::PARAM_INVALID, "Transfer profilerConfig to string vector failed"); + return ge::PARAM_INVALID; + } + } + ge::GraphLoader graph_loader; + ge::Command command; + command.cmd_params.clear(); + command.cmd_type = iter->second; + command.cmd_params = prof_params; + if (type != kProfCommandhandleFinalize) { + command.module_index = prof_config_param->profSwitch; + } + GELOGI("GE commandhandle execute, Command Type: %d, data type config: 0x%llx", type, command.module_index); + if (type == kProfCommandhandleStart || type == kProfCommandhandleStop) { + GELOGI("Profiling device nums:%s , deviceID:[%s]", prof_params[0].c_str(), prof_params[kDeviceListIndex].c_str()); + } + ge::Status ret = graph_loader.CommandHandle(command); + if (ret != ge::SUCCESS) { + GELOGE(ret, "Handle profiling command failed"); + return ge::FAILED; + } + + GELOGI("Successfully execute profiling command type: %d, command 0x%llx.", type, command.module_index); + return ge::SUCCESS; +} + diff --git a/ge/common/profiling/ge_runner_profiling.cc b/ge/common/profiling/ge_runner_profiling.cc new file mode 100644 index 00000000..067aafe3 --- /dev/null +++ b/ge/common/profiling/ge_runner_profiling.cc @@ -0,0 +1,26 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "common/profiling/ge_runner_profiling.h" +#include "init/gelib.h" + +bool IsInitialize() { + std::shared_ptr instance_ptr = ge::GELib::GetInstance(); + if (instance_ptr == nullptr || instance_ptr->InitFlag() == false) { + return false; + } + return true; +} diff --git a/ge/common/profiling/profiling_manager.cc b/ge/common/profiling/profiling_manager.cc index 2f0f061f..456cb0a4 100644 --- a/ge/common/profiling/profiling_manager.cc +++ b/ge/common/profiling/profiling_manager.cc @@ -24,16 +24,9 @@ #include "graph/load/new_model_manager/davinci_model.h" namespace { -const char *const kJobID = "jobID"; -const char *const kDeviceID = "deviceID"; -const char *const kStartCfg = "startCfg"; -const char *const kFeatures = "features"; -const char *const kConf = "conf"; -const char *const kEvents = "events"; -const char *const kAiCoreEvents = "ai_core_events"; -const char *const kName = "name"; -const char *const kTraceID = "traceId"; -const char *const kProfDir = "resultPath"; +const char *const kTrainingTrace = "training_trace"; +const char *const kFpPoint = "fp_point"; +const char *const kBpPoint = "bp_point"; const size_t kReportMaxLen = 2048; const int32_t kMaxDeviceNum = 256; const std::string kConfigNumsdev = "devNums"; @@ -45,7 +38,13 @@ const std::string kProfModelUnsubscribe = "prof_model_cancel_subscribe"; } // namespace namespace ge { -ProfilingManager::ProfilingManager() : subscribe_count_(0) {} +ProfilingManager::ProfilingManager() : is_load_profiling_(false), + is_execute_profiling_(false), + is_training_trace_(false), + subscribe_count_(0) { + prof_cb_.msprofCtrlCallback = nullptr; + prof_cb_.msprofReporterCallback = nullptr; +} ProfilingManager::~ProfilingManager() {} @@ -58,44 +57,29 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ge::Status ProfilingManager::In #ifdef DAVINCI_SUPPORT_PROFILING vector().swap(device_id_); subscribe_count_ = 0; - job_id_ = options.job_id; - - GELOGI("ProfilingManager::Init job_id:%s", job_id_.c_str()); - + GELOGI("ProfilingManager::Init job_id:%s", options.job_id.c_str()); - - Status ret; - if (!recv_profiling_config_.empty()) { - GELOGI("Profiling json config from acl:%s", recv_profiling_config_.c_str()); - ret = InitFromAclCfg(recv_profiling_config_); - } else { - ret = InitFromOptions(options); - if (ret == SUCCESS && is_load_profiling_) { - device_id_.push_back(options.device_id); - } - } + struct MsprofGeOptions prof_conf = {{ 0 }}; + Status ret = InitFromOptions(options, prof_conf); if (ret != SUCCESS) { GELOGE(ret, "Failed to init profiling."); return ret; } - if (is_load_profiling_) { - // register Framework to profiling - int result = Msprof::Engine::Init(GE_PROFILING_MODULE, &engine_); - if (result != 0) { - GELOGE(FAILED, "Register profiling engine failed."); - return FAILED; + if (is_execute_profiling_) { + if (prof_cb_.msprofCtrlCallback == nullptr) { + GELOGE(ge::PARAM_INVALID, "MsprofCtrlCallback callback is nullptr."); + return ge::PARAM_INVALID; } - // profiling startup first time - GELOGI("Begin to init profiling, device num %zu", device_id_.size()); - for (size_t i = 0; i < device_id_.size(); ++i) { - ret = StartProfiling(0, device_id_[i]); - if (ret != SUCCESS) { - GELOGW("Profiling start failed on device %d.", device_id_[i]); - continue; - } - GELOGI("Profiling init succ on device %d.", device_id_[i]); + int32_t cb_ret = prof_cb_.msprofCtrlCallback( + static_cast(MsprofCtrlCallbackType::MSPROF_CTRL_INIT_GE_OPTIONS), + static_cast(&prof_conf), sizeof(MsprofGeOptions)); + if (cb_ret != 0) { + GELOGE(FAILED, "Call msprofCtrlCallback failed, type:%u, return:%d", + static_cast(MsprofCtrlCallbackType::MSPROF_CTRL_INIT_GE_OPTIONS), cb_ret); + return FAILED; } + GELOGI("Profiling init success"); } else { GELOGI("The profiling is off, skip the initialization"); } @@ -103,288 +87,116 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ge::Status ProfilingManager::In return SUCCESS; } -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ge::Status ProfilingManager::InitFromAclCfg( - const std::string &config) { +ge::Status ProfilingManager::InitFromOptions(const Options &options, MsprofGeOptions &prof_conf) { #ifdef DAVINCI_SUPPORT_PROFILING - try { - is_load_profiling_ = false; - is_execute_profiling_ = false; - profiling_opts_.clear(); - op_trace_conf_.clear(); - Json start_prof_conf = Json::parse(config); - Json &prof_conf = start_prof_conf[kStartCfg][0]; - job_id_ = prof_conf[kJobID]; - auto iter = prof_conf.find(kProfDir); - if (iter != prof_conf.end()) { - prof_dir_ = prof_conf[kProfDir]; - } - Json &device_id = prof_conf[kDeviceID]; - if (device_id.size() != 0) { - vector().swap(device_id_); - bool is_all = false; - for (size_t i = 0; i < device_id.size(); i++) { - std::string device_id_str = device_id[i].get(); - if (device_id_str == "all") { - is_all = true; - break; - } - device_id_.push_back(std::stoi(device_id_str)); - } - if (is_all) { - int32_t count = 0; - rtError_t rt_err = rtGetDeviceCount(&count); - if (rt_err != RT_ERROR_NONE) { - GELOGE(FAILED, "Call rtGetDeviceCount to get device failed."); - } - - vector().swap(device_id_); - for (int32_t i = 0; i < count; ++i) { - device_id_.push_back(i); - } - } + // enable profiling by env + char env_profiling_mode[MMPA_MAX_PATH] = { 0x00 }; + is_load_profiling_ = false; // Change in ProfInit + is_execute_profiling_ = false; + + if (options.profiling_mode == "1" && !options.profiling_options.empty()) { + // enable profiling by ge option + if (memcpy_s(prof_conf.options, MSPROF_OPTIONS_DEF_LEN_MAX, options.profiling_options.c_str(), + options.profiling_options.size()) != EOK) { + GELOGE(INTERNAL_ERROR, "copy profiling_options failed."); + return INTERNAL_ERROR; } - - Json &features = prof_conf[kFeatures]; - if (ParseFeaturesFromAclCfg(features) != SUCCESS) { - GELOGE(FAILED, "Parse feature from acl cfg failed."); - return FAILED; + is_execute_profiling_ = true; + GELOGI("The profiling in options is %s, %s. origin option: %s", options.profiling_mode.c_str(), + prof_conf.options, options.profiling_options.c_str()); + } else { + (void)mmGetEnv("PROFILING_MODE", env_profiling_mode, MMPA_MAX_PATH); + (void)mmGetEnv("PROFILING_OPTIONS", prof_conf.options, MSPROF_OPTIONS_DEF_LEN_MAX); + // The env is invalid + if ((strcmp("true", env_profiling_mode) != 0) || (strcmp(prof_conf.options, "\0") == 0)) { + return SUCCESS; } - is_load_profiling_ = true; + // enable profiling by env is_execute_profiling_ = true; - } catch (...) { - GELOGE(FAILED, "Json conf is not invalid !"); + GELOGI("The profiling in env is %s, %s", env_profiling_mode, prof_conf.options); + } + + if (!is_execute_profiling_) { + return SUCCESS; + } + + // Parse json str for bp fp + Status ret = ParseOptions(prof_conf.options); + if (ret != ge::SUCCESS) { + GELOGE(ge::PARAM_INVALID, "Parse training trace param failed."); return ge::PARAM_INVALID; } + + if (memcpy_s(prof_conf.jobId, sizeof(prof_conf.jobId), options.job_id.c_str(), + sizeof(options.job_id.c_str())) != EOK) { + GELOGE(INTERNAL_ERROR, "copy job_id failed."); + return INTERNAL_ERROR; + } #endif return ge::SUCCESS; } -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ge::Status ProfilingManager::ParseFeaturesFromAclCfg( - const Json &features) { -#ifdef DAVINCI_SUPPORT_PROFILING +ge::Status ProfilingManager::ParseOptions(const std::string &options) { + if (options.empty()) { + GELOGE(ge::PARAM_INVALID, "Profiling options is empty."); + return ge::PARAM_INVALID; + } try { - for (size_t i = 0; i < features.size(); ++i) { - const Json &feature = features[i]; - if ((feature.find(kName) == feature.end()) || feature[kName].is_null()) { - continue; - } - const std::string &name = feature[kName]; - if (name == "op_trace") { - const Json &conf = feature[kConf]; - const Json &events = conf[0][kEvents]; - const std::string &ai_core_events = events[0][kAiCoreEvents]; - GELOGI("Op trace config from acl ai_core_events:%s", ai_core_events.c_str()); - is_op_trace_ = true; - ProfMgrConf prof_mgr_conf; - int result = ProfMgrGetConf(ai_core_events, &prof_mgr_conf); - if (result != 0) { - GELOGE(FAILED, "ProfMgrGetConf failed."); - return FAILED; - } - op_trace_conf_ = prof_mgr_conf.conf; - op_trace_iter_num_ = static_cast(op_trace_conf_.size()); - GELOGI("Op trace profiling iter num %d,", op_trace_iter_num_); - } else if (name == "task_trace") { - is_op_trace_ = false; - if (feature.find(kConf) != feature.end()) { - const Json &conf = feature[kConf]; - std::stringstream task_trace_conf; - task_trace_conf << conf; - task_trace_conf_ = task_trace_conf.str(); - } - GELOGI("Task trace config from acl"); - } else if (name == "system_trace") { - is_op_trace_ = false; - const Json &conf = feature[kConf]; - std::stringstream system_trace_conf; - system_trace_conf << conf; - system_trace_conf_ = system_trace_conf.str(); - GELOGI("System trace config from acl"); - } - profiling_opts_.push_back(name); + Json prof_options = Json::parse(options); + const std::string training_trace = prof_options[kTrainingTrace]; + if (training_trace.empty()) { + GELOGI("Training trace will not take effect."); + return ge::SUCCESS; + } + GELOGI("GE profiling training trace:%s", training_trace.c_str()); + if (training_trace != "on") { + GELOGE(ge::PARAM_INVALID, "Training trace param:%s is invalid.", training_trace.c_str()); + return ge::PARAM_INVALID; + } + fp_point_ = prof_options[kFpPoint]; + bp_point_ = prof_options[kBpPoint]; + if (!fp_point_.empty() && !bp_point_.empty()) { + GELOGI("Training trace bp fp is set, bp_point:%s, fp_point:%s.", bp_point_.c_str(), fp_point_.c_str()); } } catch (...) { - GELOGE(ge::PARAM_INVALID, "Json conf feature is not invalid !"); + GELOGE(FAILED, "Json prof_conf options is invalid."); return ge::PARAM_INVALID; } -#endif return ge::SUCCESS; } -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ge::Status ProfilingManager::InitFromOptions(const Options &options) { +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::StopProfiling() { #ifdef DAVINCI_SUPPORT_PROFILING - // enable profiling support two ways: env and front end - char profiling_mode_temp[MMPA_MAX_PATH] = { 0x00 }; - char prof_options_temp[MMPA_MAX_PATH] = { 0x00 }; - (void)mmGetEnv("PROFILING_MODE", profiling_mode_temp, MMPA_MAX_PATH); - (void)mmGetEnv("PROFILING_OPTIONS", prof_options_temp, MMPA_MAX_PATH ); - const char *profiling_mode = profiling_mode_temp; - const char *prof_options = prof_options_temp; - if ((profiling_mode == nullptr) || (strcmp("true", profiling_mode) != 0) || (prof_options == nullptr)) { - is_load_profiling_ = false; - is_execute_profiling_ = false; - } else { - std::string prof_options_str = std::string(prof_options); - profiling_opts_ = StringUtils::Split(prof_options_str, ':'); - is_load_profiling_ = true; - is_execute_profiling_ = true; - GELOGI("The profiling in env is %s, %s", profiling_mode, prof_options); - } - if (!is_load_profiling_) { - const std::string enable_profiling = "1"; - if (options.profiling_mode != enable_profiling || options.profiling_options.empty()) { - is_load_profiling_ = false; - is_execute_profiling_ = false; - return SUCCESS; - } else { - profiling_opts_ = StringUtils::Split(options.profiling_options, ':'); - is_load_profiling_ = true; - is_execute_profiling_ = true; - GELOGI("The profiling in options is %s, %s", options.profiling_mode.c_str(), options.profiling_options.c_str()); - } - } - // features:'training_trace', 'task_trace' or 'op_trace' etc - if (!profiling_opts_.empty()) { - if (profiling_opts_[0] == "op_trace") { - is_op_trace_ = true; - // op trace get conf - ProfMgrConf prof_mgr_conf; - int result = ProfMgrGetConf("", &prof_mgr_conf); - if (result != 0) { - GELOGE(FAILED, "ProfMgrGetConf failed."); - return FAILED; - } - op_trace_conf_ = prof_mgr_conf.conf; - op_trace_iter_num_ = static_cast(op_trace_conf_.size()); - GELOGI("op trace profiling iter num %d,", op_trace_iter_num_); - } else { - is_op_trace_ = false; - op_trace_iter_num_ = 1; + uint64_t module = GetProfilingModule(); + // The following if case will not be executed in normal case, inc case of ProfStopProfiling is abnormal + int32_t device_num = static_cast(device_id_.size()); + if (device_num != 0) { + auto device_id_ptr = std::unique_ptr(new (std::nothrow) uint32_t[device_num]); + if (device_id_ptr == nullptr) { + GELOGE(FAILED, "Stop profiling: device id ptr is null."); + return; } - } -#endif - return ge::SUCCESS; -} - -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ge::Status ProfilingManager::StartProfiling(int32_t iter_num, - int32_t device_id) { -#ifdef DAVINCI_SUPPORT_PROFILING - if (!profiling_opts_.empty()) { - GELOGI("Start profiling index is %d", iter_num); - // current one docker only use one device - Json p_device; - - try { - // profiling need physical_device_id - p_device[kDeviceID] = std::to_string(device_id); - p_device[kJobID] = job_id_; - p_device[kTraceID] = std::to_string(GetContext().TraceId()); - if (!prof_dir_.empty()) { - p_device[kProfDir] = prof_dir_; - GELOGI("Prof dir: %s.", prof_dir_.c_str()); - } - - Json features; - if (is_op_trace_) { - Json f; - f[kName] = "op_trace"; - Json conf; - if (op_trace_conf_.size() <= static_cast(iter_num)) { - GELOGE(FAILED, "Op trace iter num is invalid!"); - return FAILED; - } - Json events; - events[0] = nlohmann::json::parse(op_trace_conf_[iter_num]); - conf[0][kEvents] = events; - f[kConf] = conf; - features[0] = f; - if (iter_num == 0) { - is_load_ = true; - } - } else { - for (std::vector::size_type i = 0; i < profiling_opts_.size(); i++) { - Json f; - if (profiling_opts_[i] == "system_trace") { - f[kConf] = nlohmann::json::parse(system_trace_conf_); - } else if (profiling_opts_[i] == "task_trace") { - if (!task_trace_conf_.empty()) { - f[kConf] = nlohmann::json::parse(task_trace_conf_); - } - } - f[kName] = profiling_opts_[i]; - features[i] = f; - } - is_load_ = true; - } - p_device[kFeatures] = features; - // only one device, but sProfMgrStartUp API require for device list - Json devices; - devices[0] = p_device; - - Json start_cfg; - start_cfg[kStartCfg] = devices; - - // convert json to string - std::stringstream ss; - ss << start_cfg; - send_profiling_config_ = ss.str(); - GELOGI("Profiling config %s\n", send_profiling_config_.c_str()); - } catch (...) { - GELOGE(FAILED, "Op trace json conf is not invalid !"); - return FAILED; + for (int32_t i = 0; i < device_num; i++) { + device_id_ptr[i] = static_cast(device_id_[i]); } - - // runtime startup for profiling - uint64_t module = GetProfilingModule(); - int32_t device_num = 1; - uint32_t device_id_rt = static_cast(device_id); - GE_CHK_RT_RET(rtProfilerStart(module, device_num, &device_id_rt)); - - // call profiling startup API - ProfMgrCfg prof_cfg = {send_profiling_config_}; - void *prof_handle = ProfMgrStartUp(&prof_cfg); - if (prof_handle == nullptr) { - GELOGW("ProfMgrStartUp failed on device %d ", device_id); - return FAILED; + rtError_t rt_ret = rtProfilerStop(module, device_num, device_id_ptr.get()); + if (rt_ret != RT_ERROR_NONE) { + GELOGW("Call rtProfilerStop failed, ret:%d", rt_ret); } - GELOGD("StartProfiling, prof_handle: %p", prof_handle); - prof_handle_vec_.push_back(prof_handle); } -#endif - return SUCCESS; -} - -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::StopProfiling() { -#ifdef DAVINCI_SUPPORT_PROFILING - Msprof::Engine::Reporter *reporter = PluginImpl::GetPluginReporter(); - if (reporter != nullptr) { - int ret = reporter->Flush(); - GELOGI("Report data end, ret is %d", ret); + + // stop profiling + if (prof_cb_.msprofCtrlCallback == nullptr) { + GELOGE(ge::PARAM_INVALID, "MsprofCtrlCallback callback is nullptr."); + return; } - uint64_t module = GetProfilingModule(); - int32_t device_num = static_cast(device_id_.size()); - auto device_id_ptr = std::unique_ptr(new (std::nothrow) uint32_t[device_num]); - if (device_id_ptr == nullptr) { - GELOGE(FAILED, "Stop profiling: device id ptr is null."); + int32_t cb_ret = prof_cb_.msprofCtrlCallback(static_cast(MsprofCtrlCallbackType::MSPROF_CTRL_FINALIZE), + nullptr, 0); + if (cb_ret != 0) { + GELOGW("call msprofCtrlCallback failed, type:%u, return:%d", + static_cast(MsprofCtrlCallbackType::MSPROF_CTRL_FINALIZE), cb_ret); return; } - for (int32_t i = 0; i < device_num; i++) { - device_id_ptr[i] = static_cast(device_id_[i]); - } - rtError_t rt_ret = rtProfilerStop(module, device_num, device_id_ptr.get()); - if (rt_ret != RT_ERROR_NONE) { - GELOGW("Call rtProfilerStop failed, ret:%d", rt_ret); - } - - for (size_t i = 0; i < prof_handle_vec_.size(); ++i) { - int result = ProfMgrStop(prof_handle_vec_[i]); - if (result != 0) { - GELOGW("ProfMgr stop return fail:%d, handle:%p", result, prof_handle_vec_[i]); - } - } - vector().swap(prof_handle_vec_); - is_load_ = false; - recv_profiling_config_ = ""; GELOGI("Stop Profiling success."); #endif } @@ -392,12 +204,6 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::StopProf FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::ProfilingTaskDescInfo( uint32_t model_id, const std::vector &task_desc_info, const int32_t &device_id) { #ifdef DAVINCI_SUPPORT_PROFILING - Msprof::Engine::Reporter *reporter = PluginImpl::GetPluginReporter(); - if (reporter == nullptr) { - GELOGI("Profiling report is nullptr!"); - return; - } - std::string data; for (const auto &task : task_desc_info) { std::string model_name = task.model_name; @@ -412,7 +218,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::Profilin .append(std::to_string(stream_id)).append(" ") .append(std::to_string(model_id)).append("\n")); - Msprof::Engine::ReporterData reporter_data{}; + ReporterData reporter_data{}; reporter_data.deviceId = device_id; reporter_data.data = (unsigned char *)data.c_str(); reporter_data.dataLen = data.size(); @@ -422,9 +228,9 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::Profilin return; } - ret = reporter->Report(&reporter_data); - if (ret != SUCCESS) { - GELOGE(ret, "Reporter data of task_desc_info fail!"); + int32_t cb_ret = CallMsprofReport(reporter_data); + if (cb_ret != 0) { + GELOGE(cb_ret, "Reporter data of task_desc_info failed, ret:%d", cb_ret); return; } } @@ -436,9 +242,6 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::Profilin FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::ProfilingGraphDescInfo( uint32_t model_id, const std::vector &compute_graph_desc_info, const int32_t &device_id) { #ifdef DAVINCI_SUPPORT_PROFILING - Msprof::Engine::Reporter *reporter = PluginImpl::GetPluginReporter(); - GE_IF_BOOL_EXEC(reporter == nullptr, GELOGI("Profiling report is nullptr!"); return;); - std::string data; for (const auto &graph : compute_graph_desc_info) { data.append("model_name:") @@ -493,64 +296,52 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::Profilin } data.append(" model_id:").append(std::to_string(model_id)); - data.append("\n"); - Msprof::Engine::ReporterData reporter_data{}; - Report(device_id, data, *reporter, reporter_data); - + GraphDescReport(device_id, data); data.clear(); } #endif } -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::Report( - const int32_t &device_id, const string &data, Msprof::Engine::Reporter &reporter, - Msprof::Engine::ReporterData &reporter_data) { +void ProfilingManager::GraphDescReport(const int32_t &device_id, const string &data) { #ifdef DAVINCI_SUPPORT_PROFILING + ReporterData reporter_data{}; + int ret = -1; + int32_t cb_ret = -1; size_t index = data.size() / kReportMaxLen; if (index >= 1) { reporter_data.deviceId = device_id; - int ret = memcpy_s(reporter_data.tag, MSPROF_ENGINE_MAX_TAG_LEN + 1, "graph_desc_info", sizeof("graph_desc_info")); + ret = memcpy_s(reporter_data.tag, MSPROF_ENGINE_MAX_TAG_LEN + 1, "graph_desc_info", sizeof("graph_desc_info")); GE_IF_BOOL_EXEC(ret != EOK, GELOGE(ret, "Report data tag of graph_desc_info memcpy error!"); return;); for (size_t i = 0; i < index; ++i) { reporter_data.data = (unsigned char *)data.c_str() + kReportMaxLen * i; reporter_data.dataLen = kReportMaxLen; - ret = reporter.Report(&reporter_data); - GE_IF_BOOL_EXEC(ret != SUCCESS, GELOGE(ret, "Reporter data of graph_desc_info fail!"); return;); + cb_ret = CallMsprofReport(reporter_data); + GE_IF_BOOL_EXEC(cb_ret != 0, GELOGE(cb_ret, "Reporter data of graph_desc_info failed, ret:%d", cb_ret); return;); } reporter_data.dataLen = data.size() - kReportMaxLen * index; if (reporter_data.dataLen != 0) { reporter_data.data = (unsigned char *)data.c_str() + kReportMaxLen * index; - ret = reporter.Report(&reporter_data); - GE_IF_BOOL_EXEC(ret != SUCCESS, GELOGE(ret, "Reporter data of graph_desc_info fail!"); return;); + cb_ret = CallMsprofReport(reporter_data); + GE_IF_BOOL_EXEC(cb_ret != 0, GELOGE(cb_ret, "Reporter data of graph_desc_info failed, ret:%d", cb_ret); return;); } } else { reporter_data.deviceId = device_id; reporter_data.data = (unsigned char *)data.c_str(); reporter_data.dataLen = data.size(); - int ret = memcpy_s(reporter_data.tag, MSPROF_ENGINE_MAX_TAG_LEN + 1, "graph_desc_info", sizeof("graph_desc_info")); + ret = memcpy_s(reporter_data.tag, MSPROF_ENGINE_MAX_TAG_LEN + 1, "graph_desc_info", sizeof("graph_desc_info")); GE_IF_BOOL_EXEC(ret != EOK, GELOGE(ret, "Report data tag of graph_desc_info memcpy error!"); return;); - ret = reporter.Report(&reporter_data); - GE_IF_BOOL_EXEC(ret != SUCCESS, GELOGE(ret, "Reporter data of graph_desc_info fail!"); return;); - } -#endif -} - -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::PluginUnInit(const std::string &module) const { -#ifdef DAVINCI_SUPPORT_PROFILING - int ret = Msprof::Engine::UnInit(module); - if (ret != SUCCESS) { - GELOGE(ret, "profiling plugin uninit failed, ret:%d", ret); + cb_ret = CallMsprofReport(reporter_data); + GE_IF_BOOL_EXEC(cb_ret != 0, GELOGE(cb_ret, "Reporter data of graph_desc_info failed, ret:%d", cb_ret); return;); } #endif } FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::ReportProfilingData( uint32_t model_id, const std::vector &task_desc_info, - const std::vector &compute_graph_desc_info, - bool check_device) { + const std::vector &compute_graph_desc_info) { #ifdef DAVINCI_SUPPORT_PROFILING int32_t logic_device_id = 0; rtError_t rt_ret = rtGetDevice(&logic_device_id); @@ -559,13 +350,6 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::ReportPr return; } GELOGD("current logic_device_id:%d", logic_device_id); - if (check_device) { - auto ret = std::find(device_id_.begin(), device_id_.end(), logic_device_id); - if (ret == device_id_.end()) { - GELOGE(FAILED, "get valid phy_device_id failed, profiling report failed."); - return; - } - } GELOGD("start ProfilingTaskDescInfo."); ProfilingTaskDescInfo(model_id, task_desc_info, logic_device_id); GELOGD("start ProfilingGraphDescInfo."); @@ -574,11 +358,6 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::ReportPr #endif } -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::SetProfilingConfig( - const std::string &profiling_cfg) { - recv_profiling_config_ = profiling_cfg; -} - FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY uint64_t ProfilingManager::GetProfilingModule() { uint64_t module = PROF_MODEL_EXECUTE_MASK | PROF_RUNTIME_API_MASK | @@ -594,9 +373,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY uint64_t ProfilingManager::GetP return module; } -void ProfilingManager::UpdateSubscribeDeviceModuleMap(std::string prof_type, - uint32_t device_id, - uint64_t module) { +void ProfilingManager::UpdateSubscribeDeviceModuleMap(std::string prof_type, uint32_t device_id, uint64_t module) { #ifdef DAVINCI_SUPPORT_PROFILING if (prof_type == kProfModelSubscribe) { if (subs_dev_module_.find(device_id) != subs_dev_module_.end()) { @@ -608,9 +385,13 @@ void ProfilingManager::UpdateSubscribeDeviceModuleMap(std::string prof_type, subs_dev_module_[device_id] = dev_info; } } else if (prof_type == kProfModelUnsubscribe) { - if (subs_dev_module_.find(device_id) != subs_dev_module_.end()) { - if (subs_dev_module_[device_id].subscribe_count > 0) { - subs_dev_module_[device_id].subscribe_count--; + auto iter = subs_dev_module_.find(device_id); + if (iter != subs_dev_module_.end()) { + if (iter->second.subscribe_count > 0) { + iter->second.subscribe_count--; + } + if (iter->second.subscribe_count == 0) { + subs_dev_module_.erase(iter); } } } else { @@ -626,10 +407,11 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::ProfMo uint64_t model_load_mask = module & PROF_MODEL_LOAD_MASK; if ((subscribe_count_ == 0) && (model_load_mask == PROF_MODEL_LOAD_MASK)) { // register framework to profiling - int32_t result = Msprof::Engine::Init(GE_PROFILING_MODULE, &engine_); - if (result != SUCCESS) { - GELOGE(FAILED, "Register profiling engine failed."); - return FAILED; + // register Framework to profiling + int32_t cb_ret = PluginInit(); + if (cb_ret != 0) { + GELOGE(cb_ret, "profiling plugin init failed, ret:%d", cb_ret); + return cb_ret; } GELOGI("Prof subscribe: model load profiling on."); } @@ -647,7 +429,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::ProfMo UpdateSubscribeDeviceModuleMap(kProfModelSubscribe, device[0], module); // Report profiling data - Status p_ret = davinci_model->ReportProfilingData(false); + Status p_ret = davinci_model->ReportProfilingData(); if (p_ret != SUCCESS) { GELOGE(p_ret, "Report profiling data failed."); return p_ret; @@ -672,6 +454,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::ProfMo auto iter = subs_dev_module_.find(device[0]); if (iter != subs_dev_module_.end()) { if (subs_dev_module_[device[0]].subscribe_count == 1) { + // The same device_id, only stop at last time rtError_t rt_ret = rtProfilerStop(subs_dev_module_[device[0]].module, dev_num, device); if (rt_ret != RT_ERROR_NONE) { GELOGE(FAILED, "Runtime profiler stop failed."); @@ -679,15 +462,15 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::ProfMo } } UpdateSubscribeDeviceModuleMap(kProfModelUnsubscribe, device[0], subs_dev_module_[device[0]].module); + } else { + GELOGE(FAILED, "The device_id:%u has not been subscribed, do not need to cancel.", device[0]); + return FAILED; } subscribe_count_--; if (subscribe_count_ == 0) { - int32_t ret = Msprof::Engine::UnInit(GE_PROFILING_MODULE); - if (ret != SUCCESS) { - GELOGE(ret, "Profiling plugin uninit failed, ret:%d", ret); - return ret; - } + // profiling plugin uninit at last subscription + PluginUnInit(); } #endif return SUCCESS; @@ -700,11 +483,12 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::ProfIn if (model_load_mask == PROF_MODEL_LOAD_MASK) { // register Framework to profiling - int32_t result = Msprof::Engine::Init(GE_PROFILING_MODULE, &engine_); - if (result != SUCCESS) { - GELOGE(FAILED, "Register profiling engine failed."); - return FAILED; + int32_t cb_ret = PluginInit(); + if (cb_ret != 0) { + GELOGE(cb_ret, "profiling plugin init failed, ret:%d", cb_ret); + return cb_ret; } + int32_t device_num = -1; rtError_t rt_ret = rtProfilerStart(model_load_mask, device_num, nullptr); if (rt_ret != RT_ERROR_NONE) { @@ -719,7 +503,6 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::ProfIn if (training_trace_mask == PROF_TRAINING_TRACE_MASK) { is_training_trace_ = true; } - is_acl_api_mode_ = true; GELOGI("Prof init success."); #endif return SUCCESS; @@ -730,19 +513,17 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::ProfFi std::lock_guard lock(mutex_); is_load_profiling_ = false; is_training_trace_ = false; - is_acl_api_mode_ = false; + is_execute_profiling_ = false; + + // profiling plugin uninit + PluginUnInit(); - int32_t ret = Msprof::Engine::UnInit(GE_PROFILING_MODULE); - if (ret != SUCCESS) { - GELOGE(ret, "Profiling plugin uninit failed, ret:%d", ret); - } int32_t dev_num = -1; rtError_t rt_ret = rtProfilerStop(PROF_MODEL_LOAD_MASK, dev_num, nullptr); if (rt_ret != RT_ERROR_NONE) { GELOGE(FAILED, "Runtime profiler stop failed."); return FAILED; } - for (auto device_id_module : device_id_module_map_) { if (device_id_module.second != 0) { uint32_t device_id = static_cast(device_id_module.first); @@ -792,6 +573,7 @@ Status ProfilingManager::ProfParseDeviceId(const std::map return FAILED; } catch (std::out_of_range &) { GELOGE(FAILED, "Device num: %s is out of range.", iter->second.c_str()); + return FAILED; } catch (...) { GELOGE(FAILED, "Device num: %s cannot change to int.", iter->second.c_str()); return FAILED; @@ -859,7 +642,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::ProfSt for (int32_t i = 0; i < device_num; i++) { device_id_ptr[i] = static_cast(device_list[i]); } - GELOGD("Runtime config param: 0x%llx, device num: %d.", module, device_num); + GELOGI("Runtime config param: 0x%llx, device num: %d.", module, device_num); rtError_t rt_ret = rtProfilerStart(module, device_num, device_id_ptr.get()); if (rt_ret != RT_ERROR_NONE) { @@ -878,7 +661,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::ProfSt GELOGW("Prof start: load model module is invalid."); } UpdateDeviceIdModuleMap(kProfStart, module, device_list); - GELOGD("Prof start profiling success."); + GELOGI("Prof start profiling success."); #endif return SUCCESS; } @@ -901,7 +684,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::ProfSt for (int32_t i = 0; i < device_num; i++) { device_id_ptr[i] = static_cast(device_list[i]); } - GELOGD("Prof stop: runtime config param: 0x%llx, device num: %d", module, device_num); + GELOGI("Prof stop: runtime config param: 0x%llx, device num: %d", module, device_num); rtError_t rt_ret = rtProfilerStop(module, device_num, device_id_ptr.get()); if (rt_ret != RT_ERROR_NONE) { GELOGE(FAILED, "Prof stop: runtime profiler config proc failed."); @@ -921,7 +704,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::ProfSt GELOGW("Prof stop: load model module is invalid."); } UpdateDeviceIdModuleMap(kProfStop, module, device_list); - GELOGD("Prof stop profiling success."); + GELOGI("Prof stop profiling success."); #endif return SUCCESS; } @@ -963,47 +746,90 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY bool ProfilingManager::Profilin if (rt_ret != RT_ERROR_NONE) { GELOGE(rt_ret, "Runtime get logic_device_id failed, current logic_device_id:%d", logic_device_id); } - GELOGD("Current logic_device_id:%d", logic_device_id); + GELOGI("Current logic_device_id:%d", logic_device_id); bool execute_model_prof_on = false; auto iter = std::find(device_id_.begin(), device_id_.end(), logic_device_id); if (iter != device_id_.end()) { execute_model_prof_on = true; } - GELOGD("Flag is_execute_profiling: %d, execute_model_prof_on: %d", is_execute_profiling_, execute_model_prof_on); - return is_execute_profiling_ || execute_model_prof_on; + GELOGI("Flag is_execute_profiling: %d, execute_model_prof_on: %d", is_execute_profiling_, execute_model_prof_on); + return execute_model_prof_on; } -/** - * @brief Profiling PluginImpl - */ -// PluginImpl static variable init -Msprof::Engine::Reporter *PluginImpl::reporter_ = nullptr; - -PluginImpl::PluginImpl(const std::string &module) : module_(module) { GELOGI("Create PluginImpl\n"); } - -int PluginImpl::Init(const Msprof::Engine::Reporter *reporter) { - GELOGI("PluginImpl init"); - reporter_ = const_cast(reporter); - return 0; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::PluginInit() const { + if (prof_cb_.msprofReporterCallback == nullptr) { + GELOGE(ge::PARAM_INVALID, "MsprofReporterCallback callback is nullptr."); + return ge::PARAM_INVALID; + } + return prof_cb_.msprofReporterCallback( + static_cast(MsprofReporterModuleId::MSPROF_MODULE_FRAMEWORK), + static_cast(MsprofReporterCallbackType::MSPROF_REPORTER_INIT), + nullptr, 0); } -int PluginImpl::UnInit() { - GELOGI("PluginImpl Uninit"); - reporter_ = nullptr; - return 0; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::PluginUnInit() const { +#ifdef DAVINCI_SUPPORT_PROFILING + if (prof_cb_.msprofReporterCallback == nullptr) { + GELOGE(ge::PARAM_INVALID, "MsprofReporterCallback callback is nullptr."); + return; + } + int32_t cb_ret = prof_cb_.msprofReporterCallback( + static_cast(MsprofReporterModuleId::MSPROF_MODULE_FRAMEWORK), + static_cast(MsprofReporterCallbackType::MSPROF_REPORTER_UNINIT), + nullptr, 0); + if (cb_ret != 0) { + GELOGW("profiling plugin uninit failed, ret:%d", cb_ret); + } +#endif } -Msprof::Engine::PluginIntf *ProfilingEngineImpl::CreatePlugin() { - GELOGI(" Create Plugin"); - return new (std::nothrow) PluginImpl(GE_PROFILING_MODULE); +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::CallMsprofReport( + ReporterData &reporter_data) const { + if (prof_cb_.msprofReporterCallback == nullptr) { + GELOGE(ge::PARAM_INVALID, "MsprofReporterCallback callback is nullptr."); + return ge::PARAM_INVALID; + } + return prof_cb_.msprofReporterCallback( + static_cast(MsprofReporterModuleId::MSPROF_MODULE_FRAMEWORK), + static_cast(MsprofReporterCallbackType::MSPROF_REPORTER_REPORT), + static_cast(&reporter_data), sizeof(ReporterData)); } -int ProfilingEngineImpl::ReleasePlugin(Msprof::Engine::PluginIntf *plugin) { - if (plugin != nullptr) { - delete plugin; - plugin = nullptr; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::GetFpBpPoint( + std::string &fp_point, std::string &bp_point) { + // Env or options mode, fp_point_/bp_point_ have initiliazed on profiling init + if (!fp_point_.empty() && !bp_point_.empty()) { + fp_point = fp_point_; + bp_point = bp_point_; + GELOGI("Bp Fp have been initialized in env or options. bp_point: %s, fp_point: %s", bp_point.c_str(), fp_point.c_str()); + return; + } + // ProfApi mode and training trace is set + try { + char env_profiling_options[MSPROF_OPTIONS_DEF_LEN_MAX] = { 0x00 }; + INT32 ret = mmGetEnv("PROFILING_OPTIONS", env_profiling_options, MSPROF_OPTIONS_DEF_LEN_MAX); + if (ret != EN_OK) { + GELOGI("PROFILING_OPTIONS env is not exist."); + return; + } + GELOGI("Parse env PROFILING_OPTIONS:%s.", env_profiling_options); + Json prof_options = Json::parse(env_profiling_options); + + fp_point_ = prof_options[kFpPoint]; + bp_point_ = prof_options[kBpPoint]; + + fp_point = fp_point_; + bp_point = bp_point_; + if (!fp_point_.empty() && !bp_point_.empty()) { + GELOGI("Training trace bp fp is set, bp_point:%s, fp_point:%s.", bp_point_.c_str(), fp_point_.c_str()); + } + } catch (...) { + GELOGE(FAILED, "Json prof options is invalid."); + return; } - return 0; + return; } + + } // namespace ge diff --git a/ge/common/profiling/profiling_manager.h b/ge/common/profiling/profiling_manager.h index 66cefc32..5fa4fac4 100755 --- a/ge/common/profiling/profiling_manager.h +++ b/ge/common/profiling/profiling_manager.h @@ -26,9 +26,7 @@ #include "framework/common/ge_inner_error_codes.h" #include "framework/common/ge_types.h" #include "external/register/register_types.h" -#include "toolchain/prof_engine.h" -#include "toolchain/prof_mgr_core.h" -#include "toolchain/prof_acl_api.h" +#include "toolchain/prof_callback.h" using std::map; using std::string; @@ -37,35 +35,33 @@ using Json = nlohmann::json; namespace { const std::string GE_PROFILING_MODULE = "Framework"; + // DataTypeConfig MASK + #define PROF_ACL_API_MASK 0x0001 + #define PROF_TASK_TIME_MASK 0x0002 + #define PROF_AICORE_METRICS_MASK 0x0004 + #define PROF_AICPU_TRACE_MASK 0x0008 + #define PROF_MODEL_EXECUTE_MASK 0x0010 + #define PROF_RUNTIME_API_MASK 0x0020 + #define PROF_RUNTIME_TRACE_MASK 0x0040 + #define PROF_SCHEDULE_TIMELINE_MASK 0x0080 + #define PROF_SCHEDULE_TRACE_MASK 0x0100 + #define PROF_AIVECTORCORE_METRICS_MASK 0x0200 + #define PROF_SUBTASK_TIME_MASK 0x0400 + #define PROF_TRAINING_TRACE_MASK 0x0800 + #define PROF_HCCL_TRACE_MASK 0x1000 + #define PROF_DATA_PROCESS_MASK 0x2000 + #define PROF_MODEL_LOAD_MASK 0x8000000000000000 + } // namespace namespace ge { struct DeviceSubsInfo { uint64_t module; uint32_t subscribe_count; }; -// register Plugin -class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY PluginImpl : public Msprof::Engine::PluginIntf { - public: - explicit PluginImpl(const std::string &module); - ~PluginImpl() {} - - int Init(const Msprof::Engine::Reporter *reporter); - int UnInit(); - static Msprof::Engine::Reporter *GetPluginReporter() { return reporter_; } - private: - static Msprof::Engine::Reporter *reporter_; - std::string module_; -}; - -// register Engine -class ProfilingEngineImpl : public Msprof::Engine::EngineIntf { - public: - ProfilingEngineImpl() {} - ~ProfilingEngineImpl() {} - - Msprof::Engine::PluginIntf *CreatePlugin(); - int ReleasePlugin(Msprof::Engine::PluginIntf *plugin); +struct MsprofCallback { + MsprofCtrlCallback msprofCtrlCallback; + MsprofReporterCallback msprofReporterCallback; }; class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ProfilingManager { @@ -73,68 +69,54 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ProfilingManager { ProfilingManager(); virtual ~ProfilingManager(); static ProfilingManager &Instance(); - ge::Status Init(const Options &options); - ge::Status InitFromOptions(const Options &options); - ge::Status InitFromAclCfg(const std::string &config); - ge::Status StartProfiling(int32_t iter, int32_t device_id); - void UpdateSubscribeDeviceModuleMap(std::string prof_type, uint32_t device_id, uint64_t module); - ge::Status ProfModelSubscribe(uint64_t module, void *model); - ge::Status ProfModelUnsubscribe(void *model); - ge::Status ProfInit(uint64_t module); - ge::Status ProfFinalize(); - ge::Status ProfStartProfiling(uint64_t module, const std::map &config_para); - ge::Status ProfStopProfiling(uint64_t module, const std::map &config_para); + Status Init(const Options &options); + Status ProfInit(uint64_t module); + Status ProfFinalize(); + Status ProfStartProfiling(uint64_t module, const std::map &config_para); + Status ProfStopProfiling(uint64_t module, const std::map &config_para); + Status ProfModelSubscribe(uint64_t module, void *model); + Status ProfModelUnsubscribe(void *model); void StopProfiling(); - bool ProfilingOpTraceOn() const { return is_op_trace_; } - bool ProfilingLoadFlag() const { return is_load_; } bool ProfilingTrainingTraceOn() const { return is_training_trace_; } bool ProfilingModelLoadOn() const { return is_load_profiling_; } bool ProfilingModelExecuteOn() const; - bool ProfilingOn() const { return is_load_profiling_ && is_execute_profiling_; } // only used by command pattern - bool IsAclApiMode() const { return is_acl_api_mode_; } - int32_t GetOpTraceIterNum() const { return op_trace_iter_num_; } + bool ProfilingOn() const { return is_load_profiling_ && is_execute_profiling_; } // is_execute_profiling_ only used by ge option and env void ReportProfilingData(uint32_t model_id, const std::vector &task_desc_info, - const std::vector &compute_graph_desc_info, - bool check_device); - void Report(const int32_t &device_id, const string &data, Msprof::Engine::Reporter &reporter, - Msprof::Engine::ReporterData &reporter_data); + const std::vector &compute_graph_desc_info); void ProfilingTaskDescInfo(uint32_t model_id, const std::vector &task_desc_info, const int32_t &device_id); void ProfilingGraphDescInfo(uint32_t model_id, const std::vector &compute_graph_desc_info, const int32_t &device_id); - void SetProfilingConfig(const string &profiling_cfg); - vector GetProfilingDeviceId() const { return device_id_; } - void PluginUnInit(const std::string &module) const; + Status PluginInit() const; + void PluginUnInit() const; + Status CallMsprofReport(ReporterData &reporter_data) const; + struct MsprofCallback &GetMsprofCallback() { return prof_cb_; } + void SetMsprofCtrlCallback(MsprofCtrlCallback func) { prof_cb_.msprofCtrlCallback = func; } + void SetMsprofReporterCallback(MsprofReporterCallback func) { prof_cb_.msprofReporterCallback = func; } + void GetFpBpPoint(std::string &fp_point, std::string &bp_point); private: - ge::Status ParseFeaturesFromAclCfg(const Json &feature); - ge::Status ProfParseParam(const std::map &config_para, int32_t &device_num, - vector &device_list); - ge::Status ProfParseDeviceId(const std::map &config_para, + Status InitFromOptions(const Options &options, MsprofGeOptions &prof_conf); + Status ParseOptions(const std::string &options); + Status ProfParseParam(const std::map &config_para, int32_t &device_num, + vector &device_list); + Status ProfParseDeviceId(const std::map &config_para, vector &device_list); uint64_t GetProfilingModule(); + void GraphDescReport(const int32_t &device_id, const string &data); void UpdateDeviceIdModuleMap(string prof_type, uint64_t module, const vector &device_list); - bool is_load_profiling_ = false; - bool is_execute_profiling_ = false; - bool is_op_trace_ = false; - bool is_load_ = false; - bool is_training_trace_ = false; - bool is_acl_api_mode_ = false; - int32_t op_trace_iter_num_ = 0; - string job_id_; - string prof_dir_; + void UpdateSubscribeDeviceModuleMap(std::string prof_type, uint32_t device_id, uint64_t module); + + bool is_load_profiling_; + bool is_execute_profiling_; + bool is_training_trace_; vector device_id_; - vector op_trace_conf_; - vector profiling_opts_; - vector prof_handle_vec_; - string recv_profiling_config_; - string send_profiling_config_; - string system_trace_conf_; - string task_trace_conf_; - const ProfilingEngineImpl engine_; map device_id_module_map_; // key: device_id, value: profiling on module map subs_dev_module_; // key: device_id, value: profiling on module uint32_t subscribe_count_; std::mutex mutex_; + MsprofCallback prof_cb_; + std::string fp_point_; + std::string bp_point_; }; } // namespace ge #endif // GE_COMMON_PROFILING_PROFILING_MANAGER_H_ diff --git a/ge/common/types.cc b/ge/common/types.cc index 54dc769f..1cc70347 100644 --- a/ge/common/types.cc +++ b/ge/common/types.cc @@ -801,7 +801,7 @@ const uint32_t XRGB_CHN_NUM = 4; /// const bool DEFAULT_GLOBAL_POOLING = false; -const uint32_t MODEL_VERSION = 0x10000000; ///< Model version 1.0/// +const uint32_t MODEL_VERSION = 0x20000000; ///< Model version 2.0/// // Eltwise's input size const int ELTWISE_MIN_INPUT_SIZE = 2; diff --git a/ge/common/util.cc b/ge/common/util.cc index 480be3c1..0a343a83 100644 --- a/ge/common/util.cc +++ b/ge/common/util.cc @@ -51,14 +51,15 @@ namespace { * If such an exception is encountered during operation, * the proto file can be divided into several small files or the limit value can be increased. */ -const int kProtoReadBytesLimit = INT_MAX; // Max size of 2 GB minus 1 byte. -const int kWarningThreshold = 536870912 * 2; // 536870912 represent 512M +const int kFileSizeOutLimitedOrOpenFailed = -1; +const int kProtoReadBytesLimit = INT_MAX; // Max size of 2 GB minus 1 byte. +const int kWarningThreshold = 1073741824; // 536870912 * 2 536870912 represent 512M /// The maximum length of the file. -const uint32_t kMaxFileSizeLimit = UINT32_MAX; // 4G for now +const uint32_t kMaxFileSizeLimit = UINT32_MAX; // 4G for now const int kMaxBuffSize = 256; const char *const kPathValidReason = "The path can only contain 'a-z' 'A-Z' '0-9' '-' '.' '_' and chinese character"; -constexpr uint32_t kMaxConfigFileByte = 10 * 1024 * 1024; +constexpr uint32_t kMaxConfigFileByte = 10485760; // 10 * 1024 * 1024 } // namespace namespace ge { @@ -76,7 +77,8 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY bool ReadProtoFromBinaryFile(co std::string real_path = RealPath(file); GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(real_path.empty(), return false, "pb file path '%s' not valid", file); - GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(GetFileLength(real_path) == -1, return false, "file size not valid."); + GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(GetFileLength(real_path) == kFileSizeOutLimitedOrOpenFailed, return false, + "file size not valid."); std::ifstream fs(real_path, std::ifstream::in | std::ifstream::binary); if (!fs.is_open()) { @@ -118,20 +120,20 @@ long GetFileLength(const std::string &input_file) { GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(real_path.empty(), return -1, "input_file path '%s' not valid", input_file.c_str()); unsigned long long file_length = 0; GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( - mmGetFileSize(input_file.c_str(), &file_length) != EN_OK, - ErrorManager::GetInstance().ATCReportErrMessage("E19001", {"file", "errmsg"}, {input_file, strerror(errno)}); - return -1, "Open file[%s] failed. %s", input_file.c_str(), strerror(errno)); + mmGetFileSize(input_file.c_str(), &file_length) != EN_OK, + ErrorManager::GetInstance().ATCReportErrMessage("E19001", {"file", "errmsg"}, {input_file, strerror(errno)}); + return kFileSizeOutLimitedOrOpenFailed, "Open file[%s] failed. %s", input_file.c_str(), strerror(errno)); GE_CHK_BOOL_TRUE_EXEC_WITH_LOG((file_length == 0), ErrorManager::GetInstance().ATCReportErrMessage("E19015", {"filepath"}, {input_file}); return -1, "File[%s] size is 0, not valid.", input_file.c_str()); - GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(file_length > kMaxFileSizeLimit, - ErrorManager::GetInstance().ATCReportErrMessage( - "E19016", {"filepath", "filesize", "maxlen"}, - {input_file, std::to_string(file_length), std::to_string(kMaxFileSizeLimit)}); - return -1, "File[%s] size %lld is out of limit: %d.", input_file.c_str(), file_length, - kMaxFileSizeLimit); + GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( + file_length > kMaxFileSizeLimit, ErrorManager::GetInstance().ATCReportErrMessage( + "E19016", {"filepath", "filesize", "maxlen"}, + {input_file, std::to_string(file_length), std::to_string(kMaxFileSizeLimit)}); + return kFileSizeOutLimitedOrOpenFailed, "File[%s] size %lld is out of limit: %d.", input_file.c_str(), file_length, + kMaxFileSizeLimit); return static_cast(file_length); } @@ -187,7 +189,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY bool ReadBytesFromBinaryFile(co std::streamsize size = file.tellg(); GE_CHK_BOOL_TRUE_EXEC_WITH_LOG((size <= 0), file.close(); return false, "file length <= 0, not valid."); - GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(size > static_cast(kMaxFileSizeLimit), file.close(); + GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(size > static_cast(kMaxFileSizeLimit), file.close(); return false, "file size %ld is out of limit: %d.", size, kMaxFileSizeLimit); file.seekg(0, std::ios::beg); // [no need to check value] @@ -210,8 +212,8 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY int CreateDirectory(const std:: GE_CHK_BOOL_EXEC(!directory_path.empty(), return -1, "directory path is empty."); auto dir_path_len = directory_path.length(); if (dir_path_len >= MMPA_MAX_PATH) { - ErrorManager::GetInstance().ATCReportErrMessage( - "E19002", {"filepath", "size"}, {directory_path, std::to_string(MMPA_MAX_PATH)}); + ErrorManager::GetInstance().ATCReportErrMessage("E19002", {"filepath", "size"}, + {directory_path, std::to_string(MMPA_MAX_PATH)}); GELOGW("Path[%s] len is too long, it must be less than %d", directory_path.c_str(), MMPA_MAX_PATH); return -1; } @@ -224,8 +226,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY int CreateDirectory(const std:: if (ret != 0) { if (errno != EEXIST) { ErrorManager::GetInstance().ATCReportErrMessage("E19006", {"path"}, {directory_path}); - GELOGW("Can not create directory %s. Make sure the directory exists and writable.", - directory_path.c_str()); + GELOGW("Can not create directory %s. Make sure the directory exists and writable.", directory_path.c_str()); return ret; } } @@ -265,7 +266,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY bool ReadProtoFromText(const ch std::string real_path = RealPath(file); GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(real_path.empty(), ErrorManager::GetInstance().ATCReportErrMessage( - "E19000", {"path", "errmsg"}, {file, strerror(errno)}); + "E19000", {"path", "errmsg"}, {file, strerror(errno)}); return false, "Path[%s]'s realpath is empty, errmsg[%s]", file, strerror(errno)); GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(GetFileLength(real_path) == -1, return false, "file size not valid."); @@ -301,13 +302,13 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY bool ReadProtoFromMem(const cha google::protobuf::io::IstreamInputStream input(&fs); bool ret = google::protobuf::TextFormat::Parse(&input, message); GE_IF_BOOL_EXEC( - !ret, GELOGE(ret, "Call [google::protobuf::TextFormat::Parse] func ret fail, please check your text file.")); + !ret, GELOGE(ret, "Call [google::protobuf::TextFormat::Parse] func ret fail, please check your text file.")); return ret; } FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY uint64_t GetCurrentTimestamp() { - mmTimeval tv {}; + mmTimeval tv{}; int ret = mmGetTimeOfDay(&tv, nullptr); GE_LOGE_IF(ret != EN_OK, "Func gettimeofday may failed: ret=%d", ret); auto total_use_time = tv.tv_usec + tv.tv_sec * 1000000; // 1000000: seconds to microseconds @@ -315,7 +316,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY uint64_t GetCurrentTimestamp() } FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY uint32_t GetCurrentSecondTimestap() { - mmTimeval tv {}; + mmTimeval tv{}; int ret = mmGetTimeOfDay(&tv, nullptr); GE_LOGE_IF(ret != EN_OK, "Func gettimeofday may failed: ret=%d", ret); auto total_use_time = tv.tv_sec; // seconds @@ -350,8 +351,9 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY bool CheckInt64MulOverflow(int6 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY std::string RealPath(const char *path) { GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(path == nullptr, return "", "path pointer is NULL."); GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(strlen(path) >= MMPA_MAX_PATH, - ErrorManager::GetInstance().ATCReportErrMessage("E19002", {"filepath", "size"}, {path, std::to_string(MMPA_MAX_PATH)}); - return "", "Path[%s] len is too long, it must be less than %d", path, MMPA_MAX_PATH); + ErrorManager::GetInstance().ATCReportErrMessage("E19002", {"filepath", "size"}, + {path, std::to_string(MMPA_MAX_PATH)}); + return "", "Path[%s] len is too long, it must be less than %d", path, MMPA_MAX_PATH); // Nullptr is returned when the path does not exist or there is no permission // Return absolute path when path is accessible @@ -385,16 +387,16 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY bool CheckInputPathValid(const // Path section: Support upper and lower case letters, numbers dots(.) chinese and underscores // File name section: Support upper and lower case letters, numbers, underscores chinese and dots(.) #ifdef __GNUC__ - std::string mode = "^[\u4e00-\u9fa5A-Za-z0-9./_-]+$"; + std::string mode = "^[\u4e00-\u9fa5A-Za-z0-9./_-]+$"; #else - std::string mode = "^[a-zA-Z]:([\\\\/][^\\s\\\\/:*?<>\"|][^\\\\/:*?<>\"|]*)*([/\\\\][^\\s\\\\/:*?<>\"|])?$"; + std::string mode = "^[a-zA-Z]:([\\\\/][^\\s\\\\/:*?<>\"|][^\\\\/:*?<>\"|]*)*([/\\\\][^\\s\\\\/:*?<>\"|])?$"; #endif GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( - !ValidateStr(real_path, mode), - ErrorManager::GetInstance().ATCReportErrMessage("E10001", {"parameter", "value", "reason"}, - {atc_param, real_path, kPathValidReason}); - return false, "Invalid value for %s[%s], %s.", atc_param.c_str(), real_path.c_str(), kPathValidReason); + !ValidateStr(real_path, mode), + ErrorManager::GetInstance().ATCReportErrMessage("E10001", {"parameter", "value", "reason"}, + {atc_param, real_path, kPathValidReason}); + return false, "Invalid value for %s[%s], %s.", atc_param.c_str(), real_path.c_str(), kPathValidReason); // The absolute path points to a file that is not readable if (mmAccess2(real_path.c_str(), M_R_OK) != EN_OK) { @@ -416,24 +418,25 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY bool CheckOutputPathValid(const } GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(strlen(file_path.c_str()) >= MMPA_MAX_PATH, - ErrorManager::GetInstance().ATCReportErrMessage( - "E19002", {"filepath", "size"}, {file_path, std::to_string(MMPA_MAX_PATH)}); - return "", "Path[%s] len is too long, it must be less than %d", file_path.c_str(), MMPA_MAX_PATH); + ErrorManager::GetInstance().ATCReportErrMessage( + "E19002", {"filepath", "size"}, {file_path, std::to_string(MMPA_MAX_PATH)}); + return "", "Path[%s] len is too long, it must be less than %d", file_path.c_str(), + MMPA_MAX_PATH); // A regular matching expression to verify the validity of the input file path // Path section: Support upper and lower case letters, numbers dots(.) chinese and underscores // File name section: Support upper and lower case letters, numbers, underscores chinese and dots(.) #ifdef __GNUC__ - std::string mode = "^[\u4e00-\u9fa5A-Za-z0-9./_-]+$"; + std::string mode = "^[\u4e00-\u9fa5A-Za-z0-9./_-]+$"; #else - std::string mode = "^[a-zA-Z]:([\\\\/][^\\s\\\\/:*?<>\"|][^\\\\/:*?<>\"|]*)*([/\\\\][^\\s\\\\/:*?<>\"|])?$"; + std::string mode = "^[a-zA-Z]:([\\\\/][^\\s\\\\/:*?<>\"|][^\\\\/:*?<>\"|]*)*([/\\\\][^\\s\\\\/:*?<>\"|])?$"; #endif GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( - !ValidateStr(file_path, mode), - ErrorManager::GetInstance().ATCReportErrMessage("E10001", {"parameter", "value", "reason"}, - {atc_param, file_path, kPathValidReason}); - return false, "Invalid value for %s[%s], %s.", atc_param.c_str(), file_path.c_str(), kPathValidReason); + !ValidateStr(file_path, mode), + ErrorManager::GetInstance().ATCReportErrMessage("E10001", {"parameter", "value", "reason"}, + {atc_param, file_path, kPathValidReason}); + return false, "Invalid value for %s[%s], %s.", atc_param.c_str(), file_path.c_str(), kPathValidReason); std::string real_path = RealPath(file_path.c_str()); // Can get absolute path (file exists) diff --git a/ge/executor/CMakeLists.txt b/ge/executor/CMakeLists.txt index d7dfdc84..cc5c1710 100644 --- a/ge/executor/CMakeLists.txt +++ b/ge/executor/CMakeLists.txt @@ -17,6 +17,7 @@ set(SRC_LIST "../common/dump/dump_properties.cc" "../common/dump/dump_manager.cc" "../common/dump/dump_op.cc" + "../common/profiling/ge_profiling.cc" "../graph/load/graph_loader.cc" "../graph/execute/graph_execute.cc" "../omm/csa_interact.cc" @@ -244,7 +245,6 @@ target_link_libraries(ge_executor_shared PRIVATE mmpa graph register - msprof error_manager ascend_hal_stub ascend_protobuf diff --git a/ge/executor/ge_executor.cc b/ge/executor/ge_executor.cc index d03a8d7b..57ab7800 100755 --- a/ge/executor/ge_executor.cc +++ b/ge/executor/ge_executor.cc @@ -283,7 +283,8 @@ Status GeExecutor::Initialize() { // Start profiling Options profiling_options; profiling_options.device_id = 0; - profiling_options.job_id = ""; + // job id need to be set, the value is meaningless; + profiling_options.job_id = "1"; ProfilingManager::Instance().Init(profiling_options); isInit_ = true; @@ -303,7 +304,7 @@ Status GeExecutor::Finalize() { // Stop profiling if (ProfilingManager::Instance().ProfilingOn()) { ProfilingManager::Instance().StopProfiling(); - ProfilingManager::Instance().PluginUnInit(GE_PROFILING_MODULE); + ProfilingManager::Instance().PluginUnInit(); } GELOGI("Uninit GeExecutor over."); @@ -638,7 +639,8 @@ Status GeExecutor::UnloadModel(uint32_t model_id) { return ACL_ERROR_GE_INTERNAL_ERROR; } - std::shared_ptr hybrid_davinci_model = ModelManager::GetInstance()->GetHybridModel(model_id); + std::shared_ptr hybrid_davinci_model = + ModelManager::GetInstance()->GetHybridModel(model_id); if (hybrid_davinci_model != nullptr) { uint64_t session_id = hybrid_davinci_model->GetSessionId(); VarManagerPool::Instance().RemoveVarManager(session_id); diff --git a/ge/executor/module.mk b/ge/executor/module.mk index 9566ca64..34c2a37e 100644 --- a/ge/executor/module.mk +++ b/ge/executor/module.mk @@ -8,6 +8,7 @@ local_ge_executor_src_files := \ ../common/dump/dump_op.cc \ ../common/ge/plugin_manager.cc \ ../common/ge/op_tiling_manager.cc \ + ../common/profiling/ge_profiling.cc \ ../graph/load/graph_loader.cc \ ../graph/execute/graph_execute.cc \ ../omm/csa_interact.cc \ @@ -177,7 +178,6 @@ local_ge_executor_shared_library := \ libmmpa \ libgraph \ libregister \ - libmsprof \ liberror_manager \ local_ge_executor_ldflags := -lrt -ldl \ @@ -234,7 +234,6 @@ LOCAL_SHARED_LIBRARIES := \ libmmpa \ libgraph \ libregister \ - libmsprof \ liberror_manager \ stub/libascend_hal \ @@ -272,7 +271,6 @@ LOCAL_SHARED_LIBRARIES := \ libruntime \ libslog \ libmmpa \ - libmsprof \ LOCAL_LDFLAGS += $(local_ge_executor_ldflags) @@ -304,7 +302,6 @@ LOCAL_SHARED_LIBRARIES := \ libruntime \ libslog \ libmmpa \ - libmsprof \ ifeq ($(device_os),android) LOCAL_LDFLAGS += -ldl diff --git a/ge/ge_inference.mk b/ge/ge_inference.mk index 0987f148..80887e8b 100755 --- a/ge/ge_inference.mk +++ b/ge/ge_inference.mk @@ -164,6 +164,7 @@ OMG_HOST_SRC_FILES := \ host_kernels/slice_d_kernel.cc \ host_kernels/dynamic_stitch_kernel.cc \ host_kernels/identity_kernel.cc \ + host_kernels/reformat_kernel.cc \ graph/passes/stop_gradient_pass.cc \ graph/passes/prevent_gradient_pass.cc \ graph/passes/identity_pass.cc \ diff --git a/ge/ge_local_engine/engine/host_cpu_engine.cc b/ge/ge_local_engine/engine/host_cpu_engine.cc index f1e152f4..c836d4d6 100755 --- a/ge/ge_local_engine/engine/host_cpu_engine.cc +++ b/ge/ge_local_engine/engine/host_cpu_engine.cc @@ -14,7 +14,6 @@ * limitations under the License. */ #include "host_cpu_engine.h" -#include #include "graph/common/omg_util.h" #include "graph/utils/op_desc_utils.h" #include "graph/utils/tensor_adapter.h" @@ -96,8 +95,8 @@ Status GetDataNumber(const GeTensorDesc &out_desc, uint64_t &data_num) { void HostCpuEngine::CloseSo() { for (auto handle : lib_handles_) { - if (dlclose(handle) != 0) { - GELOGW("failed to close handle, message: %s", dlerror()); + if (mmDlclose(handle) != 0) { + GELOGW("failed to close handle, message: %s", mmDlerror()); } } lib_handles_.clear(); @@ -323,13 +322,13 @@ Status HostCpuEngine::LoadLibs(std::vector &lib_paths) { Status HostCpuEngine::LoadLib(const std::string &lib_path) { GELOGI("To invoke dlopen on lib: %s", lib_path.c_str()); - auto handle = dlopen(lib_path.c_str(), RTLD_NOW | RTLD_GLOBAL); + auto handle = mmDlopen(lib_path.c_str(), MMPA_RTLD_NOW | MMPA_RTLD_GLOBAL); if (handle == nullptr) { - GELOGE(INTERNAL_ERROR, "Failed to invoke dlopen. path = %s, error = %s", lib_path.c_str(), dlerror()); + GELOGE(INTERNAL_ERROR, "Failed to invoke dlopen. path = %s, error = %s", lib_path.c_str(), mmDlerror()); return INTERNAL_ERROR; } - auto initialize = (Status (*)(const HostCpuContext &))dlsym(handle, "Initialize"); + auto initialize = (Status (*)(const HostCpuContext &))mmDlsym(handle, "Initialize"); if (initialize != nullptr) { GELOGI("Invoke function Initialize in lib: %s", lib_path.c_str()); if (initialize(HostCpuContext()) != SUCCESS) { diff --git a/ge/ge_runner.mk b/ge/ge_runner.mk index a2679ed1..c0f59320 100644 --- a/ge/ge_runner.mk +++ b/ge/ge_runner.mk @@ -29,6 +29,8 @@ LIBGE_LOCAL_SRC_FILES := \ common/dump/dump_manager.cc \ common/dump/dump_properties.cc \ common/dump/dump_op.cc \ + common/profiling/ge_profiling.cc \ + common/profiling/ge_runner_profiling.cc \ engine_manager/dnnengine_manager.cc \ ge_local_engine/engine/host_cpu_engine.cc \ generator/ge_generator.cc \ @@ -170,6 +172,7 @@ LIBGE_LOCAL_SRC_FILES := \ host_kernels/sub_kernel.cc \ host_kernels/transdata_kernel.cc \ host_kernels/unpack_kernel.cc \ + host_kernels/reformat_kernel.cc \ graph/passes/folding_pass.cc \ graph/passes/get_original_format_pass.cc \ graph/passes/guarantee_const_pass.cc \ @@ -306,7 +309,6 @@ LIBGE_LOCAL_SRC_FILES := \ LIBCLIENT_LOCAL_SRC_FILES := \ proto/ge_api.proto \ client/ge_api.cc \ - client/ge_prof.cc \ RUNNER_LOCAL_C_INCLUDES := \ $(LOCAL_PATH) ./ \ @@ -371,7 +373,7 @@ LOCAL_SRC_FILES += $(LIBCLIENT_LOCAL_SRC_FILES) LOCAL_STATIC_LIBRARIES := libge_memory \ libadump_server \ - libmsprofiler \ + libmsprofiler_fwk \ libmmpa \ LOCAL_SHARED_LIBRARIES := \ @@ -381,7 +383,6 @@ LOCAL_SHARED_LIBRARIES := \ libgraph \ libregister \ libge_common \ - libmsprof \ liberror_manager \ LOCAL_LDFLAGS := -lrt -ldl @@ -408,7 +409,6 @@ endif LOCAL_C_INCLUDES := $(RUNNER_LOCAL_C_INCLUDES) LOCAL_SRC_FILES := ../../out/ge/lib64/stub/ge_api.cc \ - ../../out/ge/lib64/stub/ge_prof.cc \ ../../out/ge/lib64/stub/ge_ir_build.cc \ LOCAL_SHARED_LIBRARIES := @@ -464,7 +464,6 @@ LOCAL_SHARED_LIBRARIES := \ libc_sec \ libslog \ libmmpa \ - libmsprof \ LOCAL_LDFLAGS := -lrt -ldl @@ -497,7 +496,6 @@ LOCAL_SHARED_LIBRARIES := \ libc_sec \ libslog \ libmmpa \ - libmsprof \ LOCAL_LDFLAGS := -lrt -ldl diff --git a/ge/ge_runtime/runtime_model.cc b/ge/ge_runtime/runtime_model.cc index fb0f3e85..8baa5b05 100644 --- a/ge/ge_runtime/runtime_model.cc +++ b/ge/ge_runtime/runtime_model.cc @@ -28,6 +28,7 @@ namespace ge { namespace model_runner { +const int kOffsetUnit = 8; RuntimeModel::~RuntimeModel() { GELOGI("RuntimeModel destructor start"); @@ -495,7 +496,7 @@ bool RuntimeModel::InitConstantInfo(std::shared_ptr &davinci_model return false; } uint64_t *buff = reinterpret_cast(const_cast(constant->weight_data.data())); - int64_t offset = elem_num * 8; + int64_t offset = elem_num * kOffsetUnit; uintptr_t hbm_raw_data_base_addr = reinterpret_cast(constant->output_addrs[0]) + offset; for (int64_t i = elem_num - 1; i >= 0; --i) { buff[i] = hbm_raw_data_base_addr + (buff[i] - buff[0]); diff --git a/ge/generator/ge_generator.cc b/ge/generator/ge_generator.cc index 16d63f6b..e50feae2 100644 --- a/ge/generator/ge_generator.cc +++ b/ge/generator/ge_generator.cc @@ -156,7 +156,12 @@ static Status AddInputs(const ComputeGraphPtr &graph, const NodePtr &node, GeTen } string op_type; - if (!AttrUtils::GetStr(tensor, kAttrOpType, op_type) || op_type.empty()) { + bool is_const = false; + (void)AttrUtils::GetBool(tensor, CONST_ATTR_NAME_INPUT, is_const); + if (is_const) { + GELOGD("Get input[%d] is const", index); + op_type = CONSTANTOP; + } else if (!AttrUtils::GetStr(tensor, kAttrOpType, op_type) || op_type.empty()) { op_type = DATA; } @@ -165,6 +170,18 @@ static Status AddInputs(const ComputeGraphPtr &graph, const NodePtr &node, GeTen if (data_op == nullptr) { return FAILED; } + if (is_const) { + ConstGeTensorPtr tensor_value; + if (!AttrUtils::GetTensor(tensor, ge::ATTR_NAME_WEIGHTS, tensor_value)) { + GELOGE(FAILED, "Get value failed, node name:%s.", tensor.GetName().c_str()); + return FAILED; + } + if (!AttrUtils::SetTensor(data_op, ge::ATTR_NAME_WEIGHTS, tensor_value)) { + GELOGE(FAILED, "Set attr ATTR_NAME_WEIGHTS fail."); + return FAILED; + } + } + (void)AttrUtils::SetBool(data_op, "_is_single_op", true); GE_CHK_BOOL_EXEC(data_op->AddInputDesc(tensor) == GRAPH_SUCCESS, return FAILED, "Add input desc fail."); @@ -240,6 +257,8 @@ class GeGenerator::Impl { Status SaveModel(const string &file_name_prefix, GeModelPtr &models, ModelBufferData &model); + Status SaveRootModel(const string &file_name_prefix, GeRootModelPtr &model, ModelBufferData &model_buff); + Status SaveParams(GeModelPtr &ge_model, const string &type, const map &attrs, const vector &inputs, const vector &outputs); @@ -505,19 +524,7 @@ Status GeGenerator::GenerateModel(const Graph &graph, const string &file_name_pr GE_CHECK_NOTNULL(ge_root_model); GE_CHECK_NOTNULL(ge_root_model->GetRootGraph()); - ModelHelper model_helper; - string model_name = ""; - Status name_ret = model_helper.GetModelNameFromMergedGraphName(ge_root_model->GetRootGraph()->GetName(), model_name); - if (name_ret != SUCCESS) { - ErrorManager::GetInstance().ATCReportErrMessage("E10000", {"parameter"}, {"output"}); - GELOGE(FAILED, "Get model_name failed. Param --output is invalid"); - return PARAM_INVALID; - } - map name_to_ge_model = ge_root_model->GetSubgraphInstanceNameToModel(); - GeModelPtr &ge_model = name_to_ge_model[ge_root_model->GetRootGraph()->GetName()]; - GE_RETURN_WITH_LOG_IF_FALSE(ge_model != nullptr, "ge_model can not be null"); - ge_model->SetName(model_name); - ret = impl_->SaveModel(file_name_prefix, ge_model, model); + ret = impl_->SaveRootModel(file_name_prefix, ge_root_model, model); if (ret != SUCCESS) { GELOGE(ret, "Save model failed"); if (impl_->graph_manager_.Finalize() != SUCCESS) { @@ -567,6 +574,9 @@ Status GeGenerator::CheckForSingleOp(OpDescPtr &op_desc, const vector Status GeGenerator::BuildSingleOp(OpDescPtr &op_desc, const vector &inputs, const vector &outputs, const string &model_file_name, OpEngineType engine_type, ModelBufferData &model_buff, bool is_offline) { + if (!is_offline) { + (void)AttrUtils::SetBool(op_desc, ATTR_DYNAMIC_SHAPE_SINGLE_AICPU, true); + } if (CheckForSingleOp(op_desc, inputs, outputs) != SUCCESS) { GELOGE(PARAM_INVALID, "input param is invalid when build single op!"); @@ -594,40 +604,11 @@ Status GeGenerator::BuildSingleOp(OpDescPtr &op_desc, const vector &in // 2. Create ComputeGraph. string name = ge::CurrentTimeInStr() + "_" + model_file_name; - ge::ComputeGraphPtr compute_graph = MakeShared(name); - GE_CHECK_NOTNULL_EXEC(compute_graph, return INTERNAL_ERROR); - - // 3. Add Node to ComputeGraph. - NodePtr op_node = compute_graph->AddNode(op_desc); - GE_CHECK_NOTNULL_EXEC(op_node, return INTERNAL_ERROR); - - // 4. Create InputData node. - int32_t arg_index = 0; - if (inputs.empty()) { - for (const auto &input_desc : op_desc->GetAllInputsDescPtr()) { - GE_CHECK_NOTNULL_EXEC(input_desc, return INTERNAL_ERROR); - if (!IsNeedConnectInputOpForSingleOp(*input_desc)) { - continue; - } - GE_CHK_STATUS_RET_NOLOG(AddInputs(compute_graph, op_node, *input_desc, arg_index, false)); - arg_index++; - } - } else { - for (const auto &in_desc : inputs) { - GeTensorDesc input_desc = in_desc.GetTensorDesc(); - GE_CHK_STATUS_RET_NOLOG(AddInputs(compute_graph, op_node, input_desc, arg_index, true)); - arg_index++; - } + Graph graph; + if (BuildSingleOpGraph(op_desc, inputs, outputs, name, graph) != ge::SUCCESS) { + GELOGE(GRAPH_FAILED, "make graph fail."); + return GRAPH_FAILED; } - - // 5. Create Output node. - if (!outputs.empty()) { - GE_CHK_STATUS_RET_NOLOG(AddOutputs(compute_graph, op_node, outputs)); - } - - // dump ComputeGraph. - compute_graph->Dump(); - Graph graph = ge::GraphUtils::CreateGraphFromComputeGraph(compute_graph); GELOGI("ATC parser success in single op build."); GeRootModelPtr ge_root_model = nullptr; @@ -683,6 +664,46 @@ Status GeGenerator::BuildSingleOpModel(OpDescPtr &op_desc, const vector &inputs, + const vector &outputs, std::string graph_name, Graph &graph) { + ge::ComputeGraphPtr compute_graph = MakeShared(graph_name); + GE_CHECK_NOTNULL_EXEC(compute_graph, return INTERNAL_ERROR); + + // 1. Add Node to ComputeGraph. + NodePtr op_node = compute_graph->AddNode(op_desc); + GE_CHECK_NOTNULL_EXEC(op_node, return INTERNAL_ERROR); + + // 2. Create InputData node. + int32_t arg_index = 0; + if (inputs.empty()) { + for (const auto &input_desc : op_desc->GetAllInputsDescPtr()) { + GE_CHECK_NOTNULL_EXEC(input_desc, return INTERNAL_ERROR); + if (!IsNeedConnectInputOpForSingleOp(*input_desc)) { + continue; + } + GE_CHK_STATUS_RET_NOLOG(AddInputs(compute_graph, op_node, *input_desc, arg_index, false)); + arg_index++; + } + } else { + for (const auto &in_desc : inputs) { + GeTensorDesc input_desc = in_desc.GetTensorDesc(); + GE_CHK_STATUS_RET_NOLOG(AddInputs(compute_graph, op_node, input_desc, arg_index, true)); + arg_index++; + } + } + + // 3. Create Output node. + if (!outputs.empty()) { + GE_CHK_STATUS_RET_NOLOG(AddOutputs(compute_graph, op_node, outputs)); + } + + // dump ComputeGraph node. + compute_graph->Dump(); + graph = ge::GraphUtils::CreateGraphFromComputeGraph(compute_graph); + + return SUCCESS; +} + Status GeGenerator::Impl::SaveParams(GeModelPtr &ge_model, const string &type, const map &attrs, const vector &inputs, const vector &outputs) { GE_CHECK_NOTNULL_EXEC(ge_model, return PARAM_INVALID); @@ -712,6 +733,44 @@ Status GeGenerator::Impl::SaveModel(const string &file_name_prefix, GeModelPtr & return SUCCESS; } +Status GeGenerator::Impl::SaveRootModel(const string &file_name_prefix, GeRootModelPtr &ge_root_model, + ModelBufferData &model_buff) { + bool is_unknown_shape = false; + auto ret = ge_root_model->CheckIsUnknownShape(is_unknown_shape); + if (ret != SUCCESS) { + GELOGE(FAILED, "Check root model is unkonwn shape failed"); + return FAILED; + } + GELOGD("begin save root model, cur model is unkonwn shape model ? : %d", is_unknown_shape); + GE_CHK_BOOL_EXEC(!ge_root_model->GetSubgraphInstanceNameToModel().empty(), return FAILED, + "ge root model has no sub model") + GeModelPtr model_root = nullptr; + if (is_unknown_shape) { + model_root = make_shared(); + model_root->SetGraph(GraphUtils::CreateGraphFromComputeGraph(ge_root_model->GetRootGraph())); + ge_root_model->SetSubgraphInstanceNameToModel(ge_root_model->GetRootGraph()->GetName(), model_root); + model_root->SetName(ge_root_model->GetRootGraph()->GetName()); + } else { + model_root = ge_root_model->GetSubgraphInstanceNameToModel().begin()->second; + } + // set atc version + if (!SetAtcVersionInfo(*(model_root.get()))) { + GELOGW("SetPackageVersionInfo of atc failed!"); + } + // set opp version + if (!SetOppVersionInfo(*(model_root.get()))) { + GELOGW("SetPackageVersionInfo of ops failed!"); + } + ModelHelper model_helper; + model_helper.SetSaveMode(is_offline_); + ret = model_helper.SaveToOmRootModel(ge_root_model, save_param_, file_name_prefix, model_buff, is_unknown_shape); + if (ret != SUCCESS) { + GELOGE(ret, "Save to om model failed"); + return ret; + } + return SUCCESS; +} + Status GeGenerator::Impl::BuildModel(const Graph &graph, const vector &inputs, GeRootModelPtr &ge_root_model) { static std::atomic atomic_graph_id(0); diff --git a/ge/graph/build/graph_builder.cc b/ge/graph/build/graph_builder.cc index bdb02b3a..87d2a206 100644 --- a/ge/graph/build/graph_builder.cc +++ b/ge/graph/build/graph_builder.cc @@ -349,7 +349,8 @@ static Status GenerateTaskForConstant(const std::shared_ptr &graph GELOGD("Insert MemcpyAsync node between %s and %s.", in_node->GetName().c_str(), node->GetName().c_str()); std::string name = node->GetName() + "_input_" + std::to_string(in_data_anchor->GetIdx()) + "_Memcpy"; if (InsertMemcpyNode(graph, peer_out_anchor, {in_data_anchor}, name) != SUCCESS) { - GELOGE(FAILED, "Insert memcpy between %s and %s failed.", in_node->GetName().c_str(), node->GetName().c_str()); + GELOGE(FAILED, "Insert memcpy between %s and %s failed.", + in_node->GetName().c_str(), node->GetName().c_str()); return FAILED; } } @@ -475,7 +476,7 @@ Status GraphBuilder::GetTaskInfo(const ge::ModelBuilder &builder, const ModelPtr } Status GraphBuilder::SetInputSize(const ge::NodePtr &node_ptr) { - // set input_desc.size = src_node.output_desc.size + // Set the size of input_desc to 'src_node.output_desc.size' if (node_ptr->GetType() == DATA) { bool is_unknown_shape = false; GE_CHK_STATUS_RET(ge::NodeUtils::GetNodeUnknownShapeStatus(*node_ptr, is_unknown_shape), @@ -498,7 +499,7 @@ Status GraphBuilder::SetInputSize(const ge::NodePtr &node_ptr) { GE_IF_BOOL_EXEC(src_op == nullptr, continue); auto node_op_desc = node_ptr->GetOpDesc(); GE_IF_BOOL_EXEC(node_op_desc == nullptr, continue); - // set dst_node.input_desc = src_node.output_desc + // Set the input_desc of dst_node to 'src_node.output_desc' auto output_desc = src_op->GetOutputDescPtr(peer_out_anchor->GetIdx()); int64_t size = 0; GE_IF_BOOL_EXEC(ge::TensorUtils::GetSize(*output_desc, size) != SUCCESS, GELOGI("Get size failed!")); @@ -512,7 +513,6 @@ Status GraphBuilder::SetInputSize(const ge::NodePtr &node_ptr) { auto input_desc = node_op_desc->MutableInputDesc(in_data_anchor->GetIdx()); GE_CHECK_NOTNULL(input_desc); (void) ge::TensorUtils::SetSize(*input_desc, size); - GE_CHK_STATUS_RET(node_op_desc->UpdateInputDesc(in_data_anchor->GetIdx(), *input_desc)); GELOGD("%s input desc, dim_size: %zu, mem_size: %ld, format: %s, type: %s.", node_ptr->GetName().c_str(), input_desc->GetShape().GetDimNum(), size, TypeUtils::FormatToSerialString(input_desc->GetFormat()).c_str(), TypeUtils::DataTypeToSerialString(input_desc->GetDataType()).c_str()); diff --git a/ge/graph/build/memory/binary_block_mem_assigner.cc b/ge/graph/build/memory/binary_block_mem_assigner.cc index ecd2488c..fff589f3 100644 --- a/ge/graph/build/memory/binary_block_mem_assigner.cc +++ b/ge/graph/build/memory/binary_block_mem_assigner.cc @@ -21,8 +21,8 @@ namespace { const uint32_t kRangeCeilInterval = 2; const uint32_t kLogBase = 2; -const int64_t kLargeBlockSize = 8 * 1024 * 1024; -const int64_t kLargeBlockRangeSize = 10; +const int64_t kLargeBlockSize = 8388608; // 8 * 1024 * 1024 +const int64_t kLargeBlockRangeSize = 2; } // namespace namespace ge { @@ -73,15 +73,17 @@ Status BinaryBlockMemAssigner::GetMemoryRanges(vector &range_ceils) { GELOGE(FAILED, "dividend is 0!"); return FAILED; } + // Memory size is 512 aligned, so it is not necessary to take less than 512 + int64_t min_memory_size = (all_memory_size.back() > MEM_ALIGN_SIZE) ? MEM_ALIGN_SIZE : all_memory_size.front(); auto range_number = static_cast( - ceil(log(all_memory_size.back() / static_cast(all_memory_size.front())) / log(kLogBase))); + ceil(log(all_memory_size.back() / static_cast(min_memory_size)) / log(kLogBase))); range_number = (range_number == 0) ? 1 : range_number; GELOGD("Range number: %zu", range_number); vector> ranges(range_number); GE_CHK_BOOL_EXEC((range_number != 0), return PARAM_INVALID, "range_number can't be 0."); size_t range_number_limit = all_memory_size.size() / range_number; - int64_t range_ceil = all_memory_size[0]; + int64_t range_ceil = min_memory_size; for (size_t i = 1; i <= range_number; i++) { GE_IF_BOOL_EXEC(TypeUtils::CheckUint64MulOverflow(static_cast(range_ceil), kRangeCeilInterval), GELOGE(FAILED, "Multiply result is out of range."); @@ -114,7 +116,7 @@ Status BinaryBlockMemAssigner::GetMemoryRanges(vector &range_ceils) { range_ceils.push_back(range.back()); } } - GELOGD("Range ceils: %s", ToString(range_ceils).c_str()); + GELOGI("Range ceils: %s", ToString(range_ceils).c_str()); return SUCCESS; } diff --git a/ge/graph/build/memory/block_mem_assigner.cc b/ge/graph/build/memory/block_mem_assigner.cc index 00f47573..9dc0cf73 100755 --- a/ge/graph/build/memory/block_mem_assigner.cc +++ b/ge/graph/build/memory/block_mem_assigner.cc @@ -65,6 +65,98 @@ void AlignMemOffset(size_t &mem_align_size) { mem_align_size = (mem_align_size + MEM_ALIGN_SIZE - 1) / MEM_ALIGN_SIZE * MEM_ALIGN_SIZE; } +static bool CompareLifeTime(const NodeTypeIndex &left, const NodeTypeIndex &right) { + auto left_node_op_desc = left.node->GetOpDesc(); + auto right_node_op_desc = right.node->GetOpDesc(); + if ((left_node_op_desc != nullptr) && (right_node_op_desc != nullptr) + && (left_node_op_desc->GetId() < right_node_op_desc->GetId())) { + return true; + } + return false; +} + +void GetLifeList(const MemoryBlock &block, std::vector &life_list, bool child) { + for (auto &node : block.NodeTypeIndexList()) { + life_list.emplace_back(node); + } + + if (child) { + for (auto child_block : block.ChildBlockList()) { + if (child_block == nullptr) { + continue; + } + if (block.stream_id_ != child_block->stream_id_ || !block.same_stream_ || !child_block->same_stream_) { + life_list.clear(); + return; + } + GetLifeList(*child_block, life_list, child); + } + } +} + +bool CrossLifeTime(const NodeTypeIndex &left, const NodeTypeIndex &right) { + if ((left.node == nullptr) || (right.node == nullptr)) { + return true; + } + auto left_node_op_desc = left.node->GetOpDesc(); + auto right_node_op_desc = right.node->GetOpDesc(); + if ((left_node_op_desc != nullptr) && (right_node_op_desc != nullptr)) { + if (left_node_op_desc->GetId() < right_node_op_desc->GetId()) { + if (left.life_time_end >= static_cast(right_node_op_desc->GetId())) { + return true; + } + } else if (left_node_op_desc->GetId() == right_node_op_desc->GetId()) { + return true; + } else { + if (right.life_time_end >= static_cast(left_node_op_desc->GetId())) { + return true; + } + } + } + return false; +} + +/// +/// When child block's life time are not cross with parent block, they can be reused(only same stream). +/// |-----------------------------parent block---------------------| +/// |------child block1--------------||------child block2------| +/// |--child block1-1-| +/// +bool CanIntervalLifeReuse(MemoryBlock &parent_block, MemoryBlock &child_block) { + // judge by interval life time, only same stream can be judged by interval life time + if (parent_block.stream_id_ != child_block.stream_id_ || !parent_block.same_stream_ || !child_block.same_stream_ + || parent_block.NodeTypeIndexList().empty() || child_block.NodeTypeIndexList().empty()) { + return false; + } + + // quick judge by front and back node + if (CrossLifeTime(parent_block.NodeTypeIndexList().front(), child_block.NodeTypeIndexList().front())) { + return false; + } + if (CrossLifeTime(parent_block.NodeTypeIndexList().back(), child_block.NodeTypeIndexList().back())) { + return false; + } + + std::vector life_list; + GetLifeList(parent_block, life_list, false); + GetLifeList(child_block, life_list, true); + if (life_list.empty()) { + return false; + } + std::sort(life_list.begin(), life_list.end(), CompareLifeTime); + size_t pre_life_end = 0; + for (auto &node : life_list) { + auto node_op_desc = node.node->GetOpDesc(); + if (node_op_desc != nullptr && pre_life_end >= static_cast(node_op_desc->GetId())) { + // life time cross + return false; + } + pre_life_end = node.life_time_end; + } + GELOGI("Block size[%zu, %zu] life time are not cross.", parent_block.Size(), child_block.Size()); + return true; +} + void MemoryBlock::SetHeadOffset(size_t offset) { head_offset_ = offset; size_t child_offset = head_offset_; @@ -125,20 +217,12 @@ size_t MemoryBlock::AlignSize() const { return align_block_size; } -bool MemoryBlock::IsSameLabel(std::string &first_batch_label) { - if (node_type_index_list_.empty()) { +bool MemoryBlock::IsSameBatchLabel() { + // only same batch label can reuse + if (batch_label_.empty() || node_type_index_list_.empty()) { return false; } - auto node_op_desc = node_type_index_list_[0].node->GetOpDesc(); - if (node_op_desc == nullptr) { - return false; - } - // not all op has ATTR_NAME_BATCH_LABEL, no need check return value, only check out parameter - (void)ge::AttrUtils::GetStr(node_op_desc, ATTR_NAME_BATCH_LABEL, first_batch_label); - if (first_batch_label.empty()) { - return false; - } bool all_same_label = true; for (size_t index = 1; index < node_type_index_list_.size(); ++index) { if (node_type_index_list_[index].node == nullptr) { @@ -147,8 +231,9 @@ bool MemoryBlock::IsSameLabel(std::string &first_batch_label) { std::string batch_label; auto index_op_desc = node_type_index_list_[index].node->GetOpDesc(); GE_IF_BOOL_EXEC(index_op_desc == nullptr, continue); + // not all op has ATTR_NAME_BATCH_LABEL, no need check return value, only check out parameter (void)ge::AttrUtils::GetStr(index_op_desc, ATTR_NAME_BATCH_LABEL, batch_label); - if (first_batch_label != batch_label) { + if (batch_label_ != batch_label) { all_same_label = false; break; } @@ -197,7 +282,7 @@ void MemoryBlock::AddContinuousLifeReuseBlock(MemoryBlock *block, DependStreamLi } void MemoryBlock::AddLifeReuseBlock(MemoryBlock *block, DependStreamLife &total_node_depend_stream_life) { - if (CanNotLifeReuse(this) || CanNotLifeReuse(block)) { + if (CanNotLifeReuse(this) || CanNotLifeReuse(block) || (batch_label_ != block->batch_label_)) { return; } if (block->continuous_block_) { @@ -207,16 +292,27 @@ void MemoryBlock::AddLifeReuseBlock(MemoryBlock *block, DependStreamLife &total_ MemoryBlock *parent = nullptr; MemoryBlock *child = nullptr; // merge small block to large block - if (block->GetDependLifeBegin(stream_id_, total_node_depend_stream_life) > GetLifeEnd()) { - if ((child_offset_ + block->AlignSize()) <= AlignSize()) { - parent = this; - child = block; - } else if ((block->child_offset_ + AlignSize()) <= block->AlignSize()) { - parent = block; - child = this; + // noalign size 802816 + 802816 = 1605632 can reuse + // after 32 align size 802848 + 802848 > 1605664 can't reuse + // after 512 align size 803328 + 803328 > 1606144 can't reuse + // so 803328 + 803328 = 1606144 + 512 can reuse + if ((child_offset_ + block->AlignSize()) <= (AlignSize() + MEM_ALIGN_SIZE)) { + parent = this; + child = block; + } else if ((block->child_offset_ + AlignSize()) <= (block->AlignSize() + MEM_ALIGN_SIZE)) { + parent = block; + child = this; + } + + if ((parent != nullptr) && (child != nullptr)) { + // Different streams must use stream dependency to judge the life cycle + // In case same stream if it has child block, can judge all the child block's life time in CanIntervalLifeReuse + bool can_block_life_reuse = (child->child_blocks_.empty() + && (block->GetDependLifeBegin(stream_id_, total_node_depend_stream_life) > GetLifeEnd())); + if (!can_block_life_reuse && !CanIntervalLifeReuse(*parent, *child)) { + return; } - } - if ((parent != nullptr) && (child != nullptr) && child->child_blocks_.empty()) { + parent->child_blocks_.emplace_back(child); parent->child_offset_ += child->AlignSize(); child->deleted_block_ = true; @@ -261,6 +357,7 @@ size_t MemoryBlock::GetDependLifeBegin(int64_t stream_id, DependStreamLife &tota void AddDependLife(const ge::NodePtr &org_node, const ge::NodePtr &node, int64_t stream_id, std::map &depend_stream_life, DependStreamLife &total_node_depend_stream_life) { GE_CHECK_NOTNULL_EXEC(node, return); + GE_CHECK_NOTNULL_EXEC(org_node, return); auto node_desc = node->GetOpDesc(); GE_CHECK_NOTNULL_EXEC(node_desc, return); auto node_id = node_desc->GetId(); @@ -415,12 +512,60 @@ BlockMemAssigner::~BlockMemAssigner() { } } +void GetMaxBatchAllMemorySize(std::map> &batch_all_memory_size, + std::map batch_total_size, vector &all_memory_size, + std::string &max_batch_label) { + // use max batch all memory size for reuse range + int64_t max_batch_size = 0; + for (const auto &it : batch_total_size) { + GELOGI("Batch[%s] total memory size[%ld]", it.first.c_str(), it.second); + // no batch label + if (it.first.empty()) { + continue; + } + if (it.second > max_batch_size) { + max_batch_size = it.second; + max_batch_label = it.first; + } + } + GELOGI("Max batch[%s] total memory size[%ld]", max_batch_label.c_str(), max_batch_size); + + for (const auto &it : batch_all_memory_size) { + if (it.first.empty() || (it.first == max_batch_label)) { + all_memory_size.insert(all_memory_size.end(), it.second.begin(), it.second.end()); + } + } + // all_memory_size can't be empty + if (all_memory_size.empty()) { + all_memory_size.emplace_back(MEM_ALIGN_SIZE); + } + sort(all_memory_size.begin(), all_memory_size.end()); + GELOGD("All memory size: %s", ToString(all_memory_size).c_str()); + + for (auto iter = all_memory_size.begin(); iter != all_memory_size.end();) { + if (*iter == 0) { + iter = all_memory_size.erase(iter); + } else { + ++iter; + } + } +} + void BlockMemAssigner::GetOutAndWorkSpaceMem(vector &all_memory_size) { vector temp; + std::map> batch_all_memory_size; + std::map batch_total_size; for (const NodePtr &n : compute_graph_->GetAllNodes()) { auto node_op_desc = n->GetOpDesc(); GE_IF_BOOL_EXEC(node_op_desc == nullptr, continue); + if (CheckIsZeroMemNodeType(node_op_desc->GetType())) { + continue; + } + + std::string batch_label; + (void)ge::AttrUtils::GetStr(node_op_desc, ATTR_NAME_BATCH_LABEL, batch_label); + if (node_op_desc->GetType() == ATOMICADDRCLEAN) { atomic_addr_clean_id_ = node_op_desc->GetId(); } @@ -434,9 +579,14 @@ void BlockMemAssigner::GetOutAndWorkSpaceMem(vector &all_memory_size) { if (!reuse_input) { int64_t size = 0; GE_IF_BOOL_EXEC(ge::TensorUtils::GetSize(output_desc, size) != SUCCESS, GELOGI("Get size failed")); - if (anchor_to_symbol_.empty()) { - all_memory_size.emplace_back(size); + batch_all_memory_size[batch_label].emplace_back(size); + if (batch_total_size.find(batch_label) == batch_total_size.end()) { + batch_total_size[batch_label] = size; } else { + batch_total_size[batch_label] += size; + } + + if (!anchor_to_symbol_.empty()) { auto iter1 = anchor_to_symbol_.find(NodeIndexIO(n, out_anchor->GetIdx(), kOut).ToString()); if (iter1 == anchor_to_symbol_.end()) { continue; @@ -452,23 +602,11 @@ void BlockMemAssigner::GetOutAndWorkSpaceMem(vector &all_memory_size) { } } temp.clear(); - GetNodeWorkSpaceSize(n, temp); - all_memory_size.insert(all_memory_size.end(), temp.begin(), temp.end()); - } - for (const auto &pair : symbol_size_) { - all_memory_size.emplace_back(pair.second); - } - sort(all_memory_size.begin(), all_memory_size.end()); - GELOGD("All memory size: %s", ToString(all_memory_size).c_str()); - - for (auto iter = all_memory_size.begin(); iter != all_memory_size.end();) { - if (*iter == 0) { - iter = all_memory_size.erase(iter); - } else { - ++iter; - } + GetNodeWorkSpaceSize(n, temp, batch_total_size[batch_label]); + batch_all_memory_size[batch_label].insert(batch_all_memory_size[batch_label].end(), temp.begin(), temp.end()); } - + GELOGI("The last atomic_addr_clean node id: %ld", atomic_addr_clean_id_); + GetMaxBatchAllMemorySize(batch_all_memory_size, batch_total_size, all_memory_size, max_batch_label_); InitReuseFlag(); PrintSymbolMap(); } @@ -529,16 +667,6 @@ bool CanReuseBySize(const map &reusable_block_counts, const Me bool can_reuse = false; if (reusable_block.Size() == block_size) { can_reuse = true; - } else { - string key = std::to_string(reusable_block.Size()); - key += "_" + std::to_string(reusable_block.stream_id_); - key += "_" + std::to_string(reusable_block.memory_type_); - auto it = reusable_block_counts.find(key); - GE_IF_BOOL_EXEC((it != reusable_block_counts.end() && (it->second > kReuseMaxCount)) && - (reusable_block.Size() > block_size), - can_reuse = true; - GELOGD("Less size mem reuse, reuse block size:%zu, current block size:%zu", - reusable_block.Size(), block_size);); } return can_reuse; } @@ -860,34 +988,35 @@ MemoryBlock *BlockMemAssigner::ApplyMemory(size_t block_size, size_t real_size, GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(n == nullptr, return nullptr, "Input parameter n is null."); auto node_op_desc = n->GetOpDesc(); GE_IF_BOOL_EXEC(node_op_desc == nullptr, return nullptr); + std::string batch_label; + (void)ge::AttrUtils::GetStr(node_op_desc, ATTR_NAME_BATCH_LABEL, batch_label); + if (batch_label.empty() || (batch_label == max_batch_label_)) { + size_t align_size = real_size; + AlignMemOffset(align_size); + theory_memory_size_ += align_size; + if (theory_memory_size_ > theory_min_memory_size_) { + theory_min_memory_size_ = theory_memory_size_; + } + } bool is_reuse_memory = false; - string ge_disable_reuse_mem_env = "0"; - (void)ge::GetContext().GetOption(OPTION_EXEC_DISABLE_REUSED_MEMORY, ge_disable_reuse_mem_env); - if (ge_disable_reuse_mem_env != "1") { + if (ge_disable_reuse_mem_env_ != "1") { bool reuse_mem_flag = (mem_type == kOutput) ? IsPreReuse(n, out_index) : !((workspace_reuse_flag.size() > out_index) && !workspace_reuse_flag[out_index]); is_reuse_memory = !node_op_desc->HasAttr(kL2FusionDynamicConvergeOp) && !node_op_desc->HasAttr(kOpNoReuseMem) && reuse_mem_flag && is_op_reuse_mem; - auto stream_id = node_op_desc->GetStreamId(); - if (is_reuse_memory && !continuous && !reusable_blocks_[memory_type].empty()) { - for (auto it = reusable_blocks_[memory_type][stream_id].begin(); - it != reusable_blocks_[memory_type][stream_id].end(); ++it) { + bool do_reuse = is_reuse_memory && !continuous && !reusable_blocks_[memory_type].empty(); + if (do_reuse) { + auto stream_id = node_op_desc->GetStreamId(); + for (auto it = reusable_blocks_[memory_type][stream_id].rbegin(); + it != reusable_blocks_[memory_type][stream_id].rend(); ++it) { MemoryBlock *reusable_block = *it; if (!IsPostReuse(reusable_block)) { reusable_block->reuse_mem_ = false; GELOGI("Unreusable block."); continue; } - std::string batch_label; - if (reusable_block->IsSameLabel(batch_label)) { - std::string op_label; - (void)ge::AttrUtils::GetStr(node_op_desc, ATTR_NAME_BATCH_LABEL, op_label); - if (batch_label != op_label) { - GELOGI("label diff, op name %s", node_op_desc->GetName().c_str()); - continue; - } - } + GE_IF_BOOL_EXEC(reusable_block->batch_label_ != batch_label, continue); // A node can reuse blocks of the same stream and preorder streams if (CanReuseBySize(reusable_block_counts_, *reusable_block, block_size, real_size, continuous)) { @@ -901,7 +1030,7 @@ MemoryBlock *BlockMemAssigner::ApplyMemory(size_t block_size, size_t real_size, reusable_block->continuous_block_ = continuous; reusable_block->ref_count_++; ReduceReusableBlockCount(*reusable_block, reusable_block_counts_); - reusable_blocks_[memory_type][stream_id].erase(it); + reusable_blocks_[memory_type][stream_id].erase((++it).base()); return reusable_block; } } @@ -914,10 +1043,11 @@ MemoryBlock *BlockMemAssigner::ApplyMemory(size_t block_size, size_t real_size, // Data and netoutput need zero copy block block->is_zero_copy_ = IsZeroCopyBlock(n, continuous); - block->Init(real_size, mem_type, n, out_index, no_align_size); + block->Init(real_size, mem_type, n, out_index, no_align_size, node_op_desc->GetStreamId()); block->stream_id_ = node_op_desc->GetStreamId(); block->ref_count_++; block->continuous_block_ = continuous; + block->batch_label_ = batch_label; if (mem_type == kOutput) { auto iter = anchor_to_symbol_.find(NodeIndexIO(n, out_index, kOut).ToString()); if (iter != anchor_to_symbol_.end()) { @@ -945,6 +1075,11 @@ MemoryBlock *BlockMemAssigner::ApplyContinuousMemory(const NodePtr &n, const vec return nullptr; } + if (CheckIsZeroMemNodeType(n->GetType())) { + zero_memory_list_.emplace_back(n, kOutput, index); + continue; + } + int64_t size = 0; if (ge::TensorUtils::GetSize(*output_op_desc, size) != SUCCESS) { GELOGI("Get size failed"); @@ -957,9 +1092,7 @@ MemoryBlock *BlockMemAssigner::ApplyContinuousMemory(const NodePtr &n, const vec // only apply total size in first block if (index != 0) { zero_memory_list_.emplace_back(n, kOutput, index); - } - - if (index == 0) { + } else { NodeIndexIO node_index_io(n, index, kOut); auto iter = anchor_to_symbol_.find(node_index_io.ToString()); if (iter != anchor_to_symbol_.end()) { @@ -972,6 +1105,10 @@ MemoryBlock *BlockMemAssigner::ApplyContinuousMemory(const NodePtr &n, const vec } } + if (total_size == 0) { + return nullptr; + } + auto block_size = GetBlockSize(total_size, ranges); GELOGI("Node[%s] continuous out memory size[%ld] block size[%zu]", node_op_desc->GetName().c_str(), total_size, block_size); @@ -1119,15 +1256,28 @@ bool IsKnownSubgraphData(const NodePtr &node) { return node->GetOpDesc()->HasAttr(ATTR_NAME_PARENT_NODE_INDEX); } -void BlockMemAssigner::ReleaseMemory(MemoryBlock *to_release, vector &reusable_memory) { +void BlockMemAssigner::ReleaseMemory(MemoryBlock *to_release, vector &reusable_memory, + bool same_stream) { GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(to_release == nullptr, return, "Input parameter to_release is null."); GE_CHK_TRUE_EXEC_INFO(to_release->ref_count_ <= 0, return, "Release memory"); GE_CHK_TRUE_EXEC_INFO(!to_release->reuse_mem_, return, "doesn't reuse memory"); --to_release->ref_count_; + if (!same_stream) { + to_release->same_stream_ = false; + } if (to_release->ref_count_ == 0) { - to_release->SetLifeTimeEnd(life_time_); - reusable_memory.emplace_back(to_release); - AddReusableBlockCount(*to_release, reusable_block_counts_); + if (to_release->reuse_mem_ && !to_release->RealSizeList().empty()) { + if (to_release->batch_label_.empty() || (to_release->batch_label_ == max_batch_label_)) { + size_t align_size = to_release->RealSizeList().back(); + AlignMemOffset(align_size); + theory_memory_size_ -= align_size; + } + } + if (to_release->same_stream_) { + to_release->SetLifeTimeEnd(life_time_); + reusable_memory.emplace_back(to_release); + AddReusableBlockCount(*to_release, reusable_block_counts_); + } } } @@ -1167,10 +1317,9 @@ void BlockMemAssigner::ReleaseInputNodeOutMemory(const unordered_mapGetName().c_str()); if ((node_type_indexs.back().node == in_anchor->GetPeerOutAnchor()->GetOwnerNode()) && - (node_type_indexs.back().index == static_cast(in_anchor->GetPeerOutAnchor()->GetIdx())) && - (node->GetOpDesc()->GetStreamId() == block->stream_id_)) { - ReleaseMemory(block, reusable_memory); - if (block->ref_count_ == 0) { + (node_type_indexs.back().index == static_cast(in_anchor->GetPeerOutAnchor()->GetIdx()))) { + ReleaseMemory(block, reusable_memory, (node->GetOpDesc()->GetStreamId() == block->stream_id_)); + if (block->ref_count_ == 0 && block->same_stream_) { SetLastUsedInputMemAttr(node, in_anchor->GetIdx()); } } @@ -1267,7 +1416,8 @@ Status BlockMemAssigner::AssignOutputMemoryWithReuse(const NodePtr &node, vector bool no_need_assign_memory = ((size == 0) || CheckIsZeroMemNodeType(node->GetType())); if (!no_need_assign_memory) { out_node_set_continuous_input = - IsOutNodeSetContinuousInput(node, i, peer_name, peer_input_index, no_need_assign_memory, reset_zero_copy_flag); + IsOutNodeSetContinuousInput(node, i, peer_name, peer_input_index, + no_need_assign_memory, reset_zero_copy_flag); GE_IF_BOOL_EXEC(!no_need_assign_memory, no_need_assign_memory = IsAtomicOutputMemory(node, i, is_atomic, out_node_set_continuous_input);); } @@ -1328,7 +1478,8 @@ void BlockMemAssigner::AssignMemoryWithReuse(vector &ranges) { iter->second[stream_id].clear(); } vector temp; - GetNodeWorkSpaceSize(n, temp); + int64_t tatal_size = 0; + GetNodeWorkSpaceSize(n, temp, tatal_size); vector workspace_bytes; vector tvm_workspace_memory_type; bool has_tvm_workspace_mem_type_attr = @@ -1349,7 +1500,7 @@ void BlockMemAssigner::AssignMemoryWithReuse(vector &ranges) { bool workspace_skip_flag = false; if (has_tvm_workspace_mem_type_attr && tvm_workspace_memory_type[i] == RT_MEMORY_L1) { GELOGI( - "fusion: node[%s]workspace index[%zu] is not hbm type, add to zero_memory_list, workspace memory type [%ld]", + "fusion:node[%s]workspace index[%zu] is not hbm type, add to zero_memory_list, workspace memory type [%ld]", node_op_desc->GetName().c_str(), i, tvm_workspace_memory_type[i]); workspace_skip_flag = true; } @@ -1380,9 +1531,7 @@ void BlockMemAssigner::AssignMemoryWithReuse(vector &ranges) { (void)mem_block; // Fix warning } - bool merge_dynamic_batch = false; - GE_IF_BOOL_EXEC(!(ge_disable_reuse_mem_env_ == "1"), merge_dynamic_batch = MergeDynamicBatchBlocks()); - GE_IF_BOOL_EXEC((!(ge_disable_reuse_mem_env_ == "1") && !merge_dynamic_batch), ReuseBlocksByLifeTime(ranges.size())); + GE_IF_BOOL_EXEC(!(ge_disable_reuse_mem_env_ == "1"), ReuseBlocksByLifeTime(ranges.size())); AssignContinuousBlocks(); ResizeMemoryBlocks(); @@ -1402,92 +1551,19 @@ void BlockMemAssigner::CheckWorkspaceReuse(const vector &workspace_reuse_f } } -void BlockMemAssigner::GetNodeWorkSpaceSize(const NodePtr &node, vector &workspace_memory) { +void BlockMemAssigner::GetNodeWorkSpaceSize(const NodePtr &node, vector &workspace_memory, + int64_t &total_size) { GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(node->GetOpDesc() == nullptr, return, "Op desc is null."); vector workspace_byte_nums = node->GetOpDesc()->GetWorkspaceBytes(); GELOGD("node[%s] size:%zu", node->GetOpDesc()->GetName().c_str(), workspace_byte_nums.size()); for (int64_t byte_size : workspace_byte_nums) { workspace_memory.emplace_back(byte_size); + total_size += byte_size; GELOGD("push back size:%ld", byte_size); } } -// descending order -static bool CompareBlockMaxSize(MemoryBlock *left, MemoryBlock *right) { - if (left == nullptr || right == nullptr) { - return false; - } - auto left_max_size = std::max_element(left->RealSizeList().begin(), left->RealSizeList().end()); - if (left_max_size != left->RealSizeList().end()) { - auto right_max_size = std::max_element(right->RealSizeList().begin(), right->RealSizeList().end()); - if (right_max_size == right->RealSizeList().end() || (*left_max_size > *right_max_size)) { - return true; - } - } - return false; -} - -void MergeBlocks(std::vector &dest, std::vector &src) { - for (size_t i = 0; i < dest.size(); ++i) { - if (i >= src.size()) { - return; - } - if (dest[i] != nullptr && src[i] != nullptr) { - if (!dest[i]->reuse_mem_ || !src[i]->reuse_mem_) { - GELOGD("Diff batch's workspace can't be reused, i: %zu, dest[i]: %s, stream: %ld, src[i]: %s, stream: %ld.", - i, dest[i]->String().c_str(), dest[i]->stream_id_, src[i]->String().c_str(), src[i]->stream_id_); - continue; - } - for (auto &symbol : src[i]->SymbolList()) { - dest[i]->AddSymbol(symbol); - } - for (size_t j = 0; j < src[i]->NodeTypeIndexList().size(); ++j) { - dest[i]->AddNodeTypeIndex(src[i]->NodeTypeIndexList()[j], - src[i]->RealSizeList()[j], - src[i]->NoAlignSizeList()[j]); - src[i]->deleted_block_ = true; - } - } - } -} - -bool BlockMemAssigner::MergeDynamicBatchBlocks() { - bool merged = false; - std::map> dynamic_batch_blocks; - for (auto block : memory_blocks_) { - if (block == nullptr) { - continue; - } - std::string batch_label; - if (block->IsSameLabel(batch_label)) { - dynamic_batch_blocks[batch_label].emplace_back(block); - } - } - - auto it = dynamic_batch_blocks.begin(); - auto it_max = it; - - // find max block counts - for (; it != dynamic_batch_blocks.end(); ++it) { - if (it->second.size() > it_max->second.size()) { - it_max = it; - } - std::sort(it->second.begin(), it->second.end(), CompareBlockMaxSize); - } - if (it_max != dynamic_batch_blocks.end()) { - GELOGD("MergeDynamicBatch %s block counts %zu", it_max->first.c_str(), it_max->second.size()); - } - for (it = dynamic_batch_blocks.begin(); it != dynamic_batch_blocks.end(); ++it) { - if (it != it_max) { - GELOGD("MergeDynamicBatch from %s to %s", it->first.c_str(), it_max->first.c_str()); - MergeBlocks(it_max->second, it->second); - merged = true; - } - } - return merged; -} - // asending order static bool CompareBlockIndex(MemoryBlock *left, MemoryBlock *right) { if (left == nullptr || right == nullptr) { @@ -1597,38 +1673,93 @@ void BlockMemAssigner::ReuseBlocksByLifeTime(size_t range_size) { } } +void AddBlockMemOffset(size_t &mem_offset, size_t &p2p_mem_offset, MemoryBlock &block) { + if (block.memory_type_ == RT_MEMORY_HBM) { + if (block.first_continuous_block_) { + mem_offset += MEM_ALIGN_SIZE; + } + block.Resize(); + block.SetHeadOffset(mem_offset); + mem_offset += block.Size(); + block.SetTailOffset(mem_offset - 1); + } else if (block.memory_type_ == RT_MEMORY_P2P_DDR) { + if (block.first_continuous_block_) { + p2p_mem_offset += MEM_ALIGN_SIZE; + } + block.Resize(); + block.SetHeadOffset(p2p_mem_offset); + p2p_mem_offset += block.Size(); + block.SetTailOffset(p2p_mem_offset - 1); + } +} + +bool DynamicBatchBlockReuse(MemoryBlock &block) { + return (block.IsSameBatchLabel() && block.reuse_mem_); +} + /// /// @ingroup domi_omg -/// @brief traverse memory size, resize, calculate offset +/// @brief get max batch memory size, others reuse this block memory /// @param [in&out] memory_blocks_ memory block, after calculating offset +/// |-dynamic batch block batch1| +/// |-dynamic batch block batch2----| +/// |-dynamic batch block batch3--| /// -void BlockMemAssigner::ResizeMemoryBlocks() { - for (auto &memory_block : memory_blocks_) { - if (memory_block == nullptr || memory_block->deleted_block_ || memory_block->is_zero_copy_) { +void BlockMemAssigner::ResizeDynamicBatchBlocks() { + std::map> dynamic_batch_blocks; + for (auto block : memory_blocks_) { + if (block == nullptr) { continue; } - if (memory_block->memory_type_ == RT_MEMORY_HBM) { - if (memory_block->first_continuous_block_) { - mem_offset_ += MEM_ALIGN_SIZE; - } + // when memory is not reuseable, it can't be reused by different branch + if (DynamicBatchBlockReuse(*block)) { + dynamic_batch_blocks[block->batch_label_].emplace_back(block); + } + } - memory_block->Resize(); - memory_block->SetHeadOffset(mem_offset_); - mem_offset_ += memory_block->Size(); - memory_block->SetTailOffset(mem_offset_ - 1); - } else if (memory_block->memory_type_ == RT_MEMORY_P2P_DDR) { - if (memory_block->first_continuous_block_) { - p2p_mem_offset_ += MEM_ALIGN_SIZE; + size_t max_mem_offset = mem_offset_; + size_t max_p2p_mem_offset = p2p_mem_offset_; + for (auto &batch_blocks : dynamic_batch_blocks) { + size_t mem_offset = mem_offset_; + size_t p2p_mem_offset = p2p_mem_offset_; + for (auto block : batch_blocks.second) { + if (block == nullptr || block->deleted_block_ || block->is_zero_copy_) { + continue; } + AddBlockMemOffset(mem_offset, p2p_mem_offset, *block); + } + if (mem_offset > max_mem_offset) { + max_mem_offset = mem_offset; + } + if (p2p_mem_offset > max_p2p_mem_offset) { + max_p2p_mem_offset = p2p_mem_offset; + } + GELOGI("Batch[%s] offset[%zu] p2p_offset[%zu]", batch_blocks.first.c_str(), mem_offset, p2p_mem_offset); + } + mem_offset_ = max_mem_offset; + p2p_mem_offset_ = max_p2p_mem_offset; +} - memory_block->Resize(); - memory_block->SetHeadOffset(p2p_mem_offset_); - p2p_mem_offset_ += memory_block->Size(); - memory_block->SetTailOffset(p2p_mem_offset_ - 1); +/// +/// @ingroup domi_omg +/// @brief traverse memory size, resize, calculate offset +/// @param [in&out] memory_blocks_ memory block, after calculating offset +/// |-not dynamic batch block-||-dynamic batch block batch1| |-zero copy block-| +/// |-not dynamic batch block-||-dynamic batch block batch2----||-zero copy block-| +/// |-not dynamic batch block-||-dynamic batch block batch3--| |-zero copy block-| +/// +void BlockMemAssigner::ResizeMemoryBlocks() { + for (auto &memory_block : memory_blocks_) { + if (memory_block == nullptr || memory_block->deleted_block_ || memory_block->is_zero_copy_ + || DynamicBatchBlockReuse(*memory_block)) { + continue; } + + AddBlockMemOffset(mem_offset_, p2p_mem_offset_, *memory_block); } - GELOGD("mem_offset_ exclude zero_copy_memory is %zu, p2p_mem_offset_ exclude zero_copy_memory is %zu.", - mem_offset_, p2p_mem_offset_); + ResizeDynamicBatchBlocks(); + GELOGI("mem_offset_ exclude zero_copy_memory is %zu, p2p_mem_offset_ exclude zero_copy_memory is %zu," + "theory_min_memory_size %zu", mem_offset_, p2p_mem_offset_, theory_min_memory_size_); } /// @@ -1641,7 +1772,7 @@ void BlockMemAssigner::ResizeMemoryBlocks() { /// @return Status result /// void SetOffsetSize(const NodeTypeIndex &node_type, const MemoryBlock *block, - size_t real_size, size_t no_align_size, bool child_block) { + size_t real_size, size_t no_align_size, int32_t child_block_level) { ge::OpDescPtr op_desc = node_type.node->GetOpDesc(); GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(op_desc == nullptr, return, "op_desc is null."); string graph_name = node_type.node->GetOwnerComputeGraph()->GetName(); @@ -1689,14 +1820,15 @@ void SetOffsetSize(const NodeTypeIndex &node_type, const MemoryBlock *block, } op_desc->SetWorkspace(workspace_list); } - GELOGI("[IMAS]Set %s name[%s] %s[%u] offset to [%ld] streamid[%ld] size[%zu] realsize[%zu]" - " noalignsize[%zu] life time begin[%zu] life time end[%zu] child[%d:%d:%d:%d] isref[%d].", graph_name.c_str(), + GELOGI("[IMAS]Set %s name[%s] %s[%u] offset to [%ld] streamid[%ld] size[%zu] realsize[%zu] noalignsize[%zu] " + "life time begin[%zu] life time end[%zu] child[%d:%d:%d:%d:%d] isref[%d] batch[%s]", graph_name.c_str(), op_desc->GetName().c_str(), node_type.GetMemType().c_str(), node_type.index, offset, op_desc->GetStreamId(), - block->Size(), real_size, no_align_size, op_desc->GetId(), end, child_block, block->reuse_mem_, - block->continuous_block_, block->deleted_block_, node_type.ref_input); + block->Size(), real_size, no_align_size, op_desc->GetId(), end, child_block_level, block->reuse_mem_, + block->continuous_block_, block->is_zero_copy_, block->same_stream_, node_type.ref_input, + block->batch_label_.c_str()); } -void SetBlockOpMemOffset(MemoryBlock *block, bool child_block) { +void SetBlockOpMemOffset(MemoryBlock *block, int32_t child_block_level) { if (block == nullptr) { return; } @@ -1709,9 +1841,14 @@ void SetBlockOpMemOffset(MemoryBlock *block, bool child_block) { real_size = block->RealSizeList()[index]; no_align_size = block->NoAlignSizeList()[index]; } - SetOffsetSize(node_type_index, block, real_size, no_align_size, child_block); + SetOffsetSize(node_type_index, block, real_size, no_align_size, child_block_level); index++; } + + child_block_level++; + for (MemoryBlock *child_block : block->ChildBlockList()) { + SetBlockOpMemOffset(child_block, child_block_level); + } } void BlockMemAssigner::SetOpMemOffset(bool is_zero_copy) { @@ -1724,16 +1861,13 @@ void BlockMemAssigner::SetOpMemOffset(bool is_zero_copy) { continue; } - SetBlockOpMemOffset(memory_block, false); - for (MemoryBlock *child_block : memory_block->ChildBlockList()) { - SetBlockOpMemOffset(child_block, true); - } + SetBlockOpMemOffset(memory_block, 0); } if (!is_zero_copy) { for (const NodeTypeIndex &node_type_index : zero_memory_list_) { MemoryBlock block(0, 0); - SetOffsetSize(node_type_index, &block, 0, 0, false); + SetOffsetSize(node_type_index, &block, 0, 0, 0); } } } diff --git a/ge/graph/build/memory/block_mem_assigner.h b/ge/graph/build/memory/block_mem_assigner.h index f3d26c1d..d514ca34 100755 --- a/ge/graph/build/memory/block_mem_assigner.h +++ b/ge/graph/build/memory/block_mem_assigner.h @@ -65,6 +65,7 @@ class MemoryBlock { stream_id_(stream_id), deleted_block_(false), reuse_mem_(reuse_mem), + same_stream_(true), input_index_(0), continuous_block_(false), first_continuous_block_(false), @@ -85,10 +86,14 @@ class MemoryBlock { symbol_list_.clear(); } - void Init(size_t real_size, OpMemoryType type, const ge::NodePtr &node, uint32_t out_index, size_t no_align_size) { + void Init(size_t real_size, OpMemoryType type, const ge::NodePtr &node, uint32_t out_index, size_t no_align_size, + int64_t stream_id) { real_size_list_.emplace_back(real_size); no_align_size_list_.emplace_back(no_align_size); node_type_index_list_.emplace_back(node, type, out_index, false); + if (stream_id != stream_id_) { + same_stream_ = false; + } } size_t Size() const { return block_size_; } @@ -106,6 +111,12 @@ class MemoryBlock { node_type_index_list_.emplace_back(node_type_index); real_size_list_.emplace_back(real_size); no_align_size_list_.emplace_back(no_align_size); + if ((node_type_index.node != nullptr) && (node_type_index.node->GetOpDesc() != nullptr)) { + auto stream_id = node_type_index.node->GetOpDesc()->GetStreamId(); + if (stream_id != stream_id_) { + same_stream_ = false; + } + } } void AddSymbol(const std::string &symbol) { @@ -122,7 +133,7 @@ class MemoryBlock { std::string String(); - bool IsSameLabel(std::string &first_batch_label); + bool IsSameBatchLabel(); void AddContinuousLifeReuseBlock(MemoryBlock *block, DependStreamLife &total_node_depend_stream_life); @@ -142,6 +153,7 @@ class MemoryBlock { int64_t stream_id_; bool deleted_block_; bool reuse_mem_; + bool same_stream_; uint32_t input_index_; bool continuous_block_; bool first_continuous_block_; @@ -149,6 +161,7 @@ class MemoryBlock { bool is_zero_copy_; std::map depend_stream_life_; int64_t memory_type_; + std::string batch_label_; private: size_t block_size_; std::vector real_size_list_; @@ -209,7 +222,7 @@ class BlockMemAssigner : public MemAssigner { void GetOutAndWorkSpaceMem(std::vector &all_memory_size); - void GetNodeWorkSpaceSize(const ge::NodePtr &node, std::vector &workspace_memory); + void GetNodeWorkSpaceSize(const ge::NodePtr &node, std::vector &workspace_memory, int64_t &total_size); /// /// @ingroup GE @@ -353,7 +366,7 @@ class BlockMemAssigner : public MemAssigner { /// @return void /// @author /// - void ReleaseMemory(MemoryBlock *to_release, vector &reusable_memory); + void ReleaseMemory(MemoryBlock *to_release, vector &reusable_memory, bool same_stream = true); /// /// @ingroup GE @@ -379,11 +392,11 @@ class BlockMemAssigner : public MemAssigner { /// /// @ingroup GE - /// @brief Merge memory blocks between different batchs + /// @brief Resize memory blocks for each batchs /// @return merge or not /// @author /// - bool MergeDynamicBatchBlocks(); + void ResizeDynamicBatchBlocks(); void AssignContinuousBlocks(); @@ -436,6 +449,17 @@ class BlockMemAssigner : public MemAssigner { int64_t atomic_addr_clean_id_ = 0; + size_t theory_min_memory_size_ = 0; + + size_t theory_memory_size_ = 0; + + std::string max_batch_label_; + + /// + /// @ [stream1][nodeid] + /// @[nodeid] [stream2][nodeid] + /// @ [stream2][nodeid] + /// DependStreamLife total_node_depend_stream_life_; }; } // namespace ge diff --git a/ge/graph/build/memory/graph_mem_assigner.cc b/ge/graph/build/memory/graph_mem_assigner.cc index ad0235d5..16d5d38f 100755 --- a/ge/graph/build/memory/graph_mem_assigner.cc +++ b/ge/graph/build/memory/graph_mem_assigner.cc @@ -419,7 +419,8 @@ Status GraphMemoryAssigner::AssignContinuousInputMemory(const ge::NodePtr &node, GE_IF_BOOL_EXEC(is_peer_output_continuous && (peer_output_size != 1), std::string error = "Current op" + FmtToStr(node->GetOpDesc()->GetName()) + " requires continuous input, while the previous op" + FmtToStr(peer_op_desc->GetName()) + - " requires continuous output. There may be conflict between the two. This node is not supported now."; + " requires continuous output. There may be conflict between the two." + + "This node is not supported now."; GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); return PARAM_INVALID;); @@ -429,7 +430,8 @@ Status GraphMemoryAssigner::AssignContinuousInputMemory(const ge::NodePtr &node, GE_IF_BOOL_EXEC(is_peer_reference, std::string error = "Current op" + FmtToStr(node->GetOpDesc()->GetName()) + " requires continuous input, while the previous op" + FmtToStr(peer_op_desc->GetName()) + - " requires continuous output. There may be conflict between the two. This node is not supported now."; + " requires continuous output. There may be conflict between the two." + + "This node is not supported now."; GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); return PARAM_INVALID;); @@ -1646,9 +1648,9 @@ ge::Status GraphMemoryAssigner::SetAtomicCleanAttr(const NodePtr &node, const ve } string atomic_mem_size_str = ss.str(); - GELOGI("[IMAS]SetAtomicCleanAttr : Set graph[%s] atomic_node[%s] output offset [%s] size[%s] streamid[%ld]", + GELOGI("[IMAS]SetAtomicCleanAttr : Set %s atomic_node name[%s] output[0] offset to [%s] streamid[%ld] size[%s]", node->GetOwnerComputeGraph()->GetName().c_str(), node_op_desc->GetName().c_str(), - atomic_mem_start_str.c_str(), atomic_mem_size_str.c_str(), node->GetOpDesc()->GetStreamId()); + atomic_mem_start_str.c_str(), node->GetOpDesc()->GetStreamId(), atomic_mem_size_str.c_str()); } return SUCCESS; } diff --git a/ge/graph/build/model_builder.cc b/ge/graph/build/model_builder.cc index d7039cfb..37eb499a 100755 --- a/ge/graph/build/model_builder.cc +++ b/ge/graph/build/model_builder.cc @@ -282,7 +282,7 @@ Status ModelBuilder::SetInputOutputDesc() { void ModelBuilder::AddNodeInputProperty() { for (const ge::NodePtr &node : compute_graph_->GetNodes(compute_graph_->GetGraphUnknownFlag())) { auto node_op_desc = node->GetOpDesc(); - GE_IF_BOOL_EXEC(node_op_desc == nullptr, GELOGW("node_op_desc is nullptr!"); return ); + GE_IF_BOOL_EXEC(node_op_desc == nullptr, GELOGW("node_op_desc is nullptr!"); return); vector src_name_list; vector src_index_list; for (const auto &in_data_anchor : node->GetAllInDataAnchors()) { @@ -309,10 +309,10 @@ void ModelBuilder::AddNodeInputProperty() { for (const ge::NodePtr &node : compute_graph_->GetNodes(compute_graph_->GetGraphUnknownFlag())) { auto node_op_desc = node->GetOpDesc(); - GE_IF_BOOL_EXEC(node_op_desc == nullptr, GELOGW("node_op_desc is nullptr!"); return ); + GE_IF_BOOL_EXEC(node_op_desc == nullptr, GELOGW("node_op_desc is nullptr!"); return); GE_IF_BOOL_EXEC(node_op_desc->GetType() == NETOUTPUT, continue); auto out_control_anchor = node->GetOutControlAnchor(); - GE_IF_BOOL_EXEC(out_control_anchor == nullptr, GELOGW("out_control_anchor is nullptr"); return ); + GE_IF_BOOL_EXEC(out_control_anchor == nullptr, GELOGW("out_control_anchor is nullptr"); return); vector dst_name_list; vector dst_index_list; string dst_name_temp; @@ -330,7 +330,7 @@ void ModelBuilder::AddNodeInputProperty() { dst_name_temp = ""; int64_t dst_index = kWrongIndex; // assign an impossible value to dst_index. for (const auto &in_data_anchor : out_data_anchor->GetPeerInDataAnchors()) { - GE_IF_BOOL_EXEC(in_data_anchor == nullptr, GELOGW("in_data_anchor is nullptr"); return ); + GE_IF_BOOL_EXEC(in_data_anchor == nullptr, GELOGW("in_data_anchor is nullptr"); return); ge::NodePtr dst_node = in_data_anchor->GetOwnerNode(); dst_name_temp = dst_name_temp.empty() ? dst_node->GetName() : dst_name_temp + ":" + dst_node->GetName(); dst_index = in_data_anchor->GetIdx(); diff --git a/ge/graph/build/stream_allocator.cc b/ge/graph/build/stream_allocator.cc index 4378f71b..a1cda506 100644 --- a/ge/graph/build/stream_allocator.cc +++ b/ge/graph/build/stream_allocator.cc @@ -49,7 +49,8 @@ inline bool HasContinuousStreamLabel(const ge::OpDescPtr &op_desc, std::string & } bool IsHcclOp(const string &op_type) { - const set hccl_op_types({ge::HCOMBROADCAST, ge::HCOMALLGATHER, ge::HCOMALLREDUCE, ge::HCOMREDUCESCATTER, ge::HCOMREDUCE}); + const set hccl_op_types({ge::HCOMBROADCAST, ge::HCOMALLGATHER, + ge::HCOMALLREDUCE, ge::HCOMREDUCESCATTER, ge::HCOMREDUCE}); return hccl_op_types.find(op_type) != hccl_op_types.end(); } } // namespace diff --git a/ge/graph/build/stream_graph_optimizer.cc b/ge/graph/build/stream_graph_optimizer.cc index 582c080b..2933d413 100644 --- a/ge/graph/build/stream_graph_optimizer.cc +++ b/ge/graph/build/stream_graph_optimizer.cc @@ -38,7 +38,7 @@ void StreamGraphOptimizer::RefreshNodeId(const ComputeGraphPtr &comp_graph, Grap continue; } for (ge::NodePtr &node : subgraph->GetDirectNode()) { - GE_CHECK_NOTNULL_EXEC(node->GetOpDesc(), return ); + GE_CHECK_NOTNULL_EXEC(node->GetOpDesc(), return); if ((node->GetType() == END) || (node->GetType() == PLACEHOLDER)) { node->GetOpDesc()->SetId(static_cast(node_size)); node_size++; diff --git a/ge/graph/build/task_generator.cc b/ge/graph/build/task_generator.cc index 41607f1f..b506f945 100755 --- a/ge/graph/build/task_generator.cc +++ b/ge/graph/build/task_generator.cc @@ -49,8 +49,6 @@ const char *const kIsLastNode = "is_last_node"; const char *const kIsInputVar = "INPUT_IS_VAR"; const char *const kIsOutputVar = "OUTPUT_IS_VAR"; const char *const kProfilingMode = "PROFILING_MODE"; -const char *const kProfilingFpPoint = "FP_POINT"; -const char *const kProfilingBpPoint = "BP_POINT"; const uint32_t kProfilingArStep = 2; const uint64_t kProfilingFpStartLogid = 1; const uint64_t kProfilingBpEndLogid = 2; @@ -810,35 +808,23 @@ Status TaskGenerator::GetFpBpIndex(const ComputeGraphPtr &graph, ProfilingPoint vector &all_reduce_nodes, std::string &fp_point_str, std::string &bp_point_str) const { - if (ge::GetContext().GetOption(OPTION_EXEC_PROFILING_FPPONIT_OPTIONS, fp_point_str) == SUCCESS && - ge::GetContext().GetOption(OPTION_EXEC_PROFILING_BPPONIT_OPTIONS, bp_point_str) == SUCCESS && - !fp_point_str.empty() && !bp_point_str.empty()) { - return SUCCESS; - } + ProfilingManager::Instance().GetFpBpPoint(fp_point_str, bp_point_str); Status ret = SUCCESS; - const char *fp_point = std::getenv(kProfilingFpPoint); - if (fp_point == nullptr) { + if (fp_point_str.empty()) { ret = AutoFindFpOpIndex(graph, profiling_point); if (ret != SUCCESS) { GELOGW("First forward profiling op_index not set and FindFpOpIndex failed."); return FAILED; } - } else { - fp_point_str = string(fp_point); - GELOGI("Get fp_point_str from env %s", fp_point_str.c_str()); } - const char *bp_point = std::getenv(kProfilingBpPoint); - if (bp_point == nullptr) { + if (bp_point_str.empty()) { ret = AutoFindBpOpIndex(graph, profiling_point, all_reduce_nodes); if (ret != SUCCESS) { GELOGW("Last backward profiling op_index not set and FindBpOpIndex failed."); return FAILED; } - } else { - bp_point_str = string(bp_point); - GELOGI("Get bp_point_str from env %s", bp_point_str.c_str()); } return SUCCESS; diff --git a/ge/graph/label/case_label_maker.h b/ge/graph/label/case_label_maker.h index 1078a906..3dbfb2bc 100644 --- a/ge/graph/label/case_label_maker.h +++ b/ge/graph/label/case_label_maker.h @@ -86,7 +86,6 @@ | Node | +------------+ *******************************************************************************/ - namespace ge { class CaseOpLabelMaker : public LabelMaker { public: diff --git a/ge/graph/label/if_label_maker.h b/ge/graph/label/if_label_maker.h index 0807f549..8b07eb96 100644 --- a/ge/graph/label/if_label_maker.h +++ b/ge/graph/label/if_label_maker.h @@ -70,7 +70,6 @@ | Node | +------------+ *******************************************************************************/ - namespace ge { class IfOpLabelMaker : public LabelMaker { public: diff --git a/ge/graph/label/partitioned_call_label_maker.h b/ge/graph/label/partitioned_call_label_maker.h index b89cb94c..3944aabd 100644 --- a/ge/graph/label/partitioned_call_label_maker.h +++ b/ge/graph/label/partitioned_call_label_maker.h @@ -54,7 +54,6 @@ | c | +---------------+ *******************************************************************************/ - namespace ge { class PartitionedCallLabelMaker : public LabelMaker { public: diff --git a/ge/graph/label/while_label_maker.h b/ge/graph/label/while_label_maker.h index 0eb0deee..6c30475b 100644 --- a/ge/graph/label/while_label_maker.h +++ b/ge/graph/label/while_label_maker.h @@ -70,7 +70,6 @@ | Node | +------------+ *******************************************************************************/ - namespace ge { class WhileOpLabelMaker : public LabelMaker { public: diff --git a/ge/graph/load/graph_loader.cc b/ge/graph/load/graph_loader.cc index aa825a5d..44556422 100755 --- a/ge/graph/load/graph_loader.cc +++ b/ge/graph/load/graph_loader.cc @@ -283,7 +283,8 @@ Status GraphLoader::ExecuteModel(uint32_t model_id, rtStream_t stream, bool asyn std::vector &output_desc) { auto model_manager = ModelManager::GetInstance(); GE_CHECK_NOTNULL(model_manager); - Status ret = model_manager->ExecuteModel(model_id, stream, async_mode, input_data, input_desc, output_data, output_desc); + Status ret = model_manager->ExecuteModel(model_id, stream, async_mode, + input_data, input_desc, output_data, output_desc); if (ret != SUCCESS) { GELOGE(ret, "Execute model failed, model_id:%u.", model_id); return ret; diff --git a/ge/graph/load/new_model_manager/data_dumper.cc b/ge/graph/load/new_model_manager/data_dumper.cc index 4534fe73..b331d780 100644 --- a/ge/graph/load/new_model_manager/data_dumper.cc +++ b/ge/graph/load/new_model_manager/data_dumper.cc @@ -919,11 +919,11 @@ Status DataDumper::DumpExceptionInfo(const std::vector exceptio ReplaceStringElem(op_name); ReplaceStringElem(op_type); string dump_file_path = - "./" + op_type + "." + op_name + "." + to_string(op_desc_info.task_id) + "." + to_string(now_time); + "./" + op_type + "." + op_name + "." + std::to_string(op_desc_info.task_id) + "." + std::to_string(now_time); GELOGI("The exception dump file path is %s", dump_file_path.c_str()); uint64_t proto_size = dump_data.ByteSizeLong(); - unique_ptr proto_msg(new (std::nothrow) char[proto_size]); + std::unique_ptr proto_msg(new (std::nothrow) char[proto_size]); bool ret = dump_data.SerializeToArray(proto_msg.get(), proto_size); if (!ret || proto_size == 0) { GELOGE(PARAM_INVALID, "Dump data proto serialize failed"); diff --git a/ge/graph/load/new_model_manager/davinci_model.cc b/ge/graph/load/new_model_manager/davinci_model.cc index 81d47b3b..bc755e07 100755 --- a/ge/graph/load/new_model_manager/davinci_model.cc +++ b/ge/graph/load/new_model_manager/davinci_model.cc @@ -16,7 +16,6 @@ #include "graph/load/new_model_manager/davinci_model.h" -#include #include #include #include @@ -84,7 +83,7 @@ const uint32_t kAddrLen = sizeof(void *); const int kDecimal = 10; const int kBytes = 8; const uint32_t kDataMemAlignSizeCompare = 64; -const uint32_t kDumpL1FusionOpMByteSize = 2 * 1024 * 1024; +const uint32_t kDumpL1FusionOpMByteSize = 2097152; // 2 * 1024 * 1024 const uint32_t kDumpFlagOfL1Fusion = 0; const char *const kDefaultBatchLable = "Batch_default"; const char *const kGetDynamicDimsName = "ascend_mbatch_get_dynamic_dims_node"; @@ -331,8 +330,8 @@ Status DavinciModel::InitFeatureMapAndP2PMem(void *dev_ptr, size_t mem_size) { GELOGE(GE_EXEC_ALLOC_FEATURE_MAP_MEM_FAILED, "Alloc feature map memory failed. size: %zu", data_size); return GE_EXEC_ALLOC_FEATURE_MAP_MEM_FAILED; } - GEEVENT("[IMAS]InitFeatureMapAndP2PMem graph_%u MallocMemory type[F] memaddr[%p] mem_size[%zu]", runtime_param_.graph_id, - mem_base_, data_size); + GEEVENT("[IMAS]InitFeatureMapAndP2PMem graph_%u MallocMemory type[F] memaddr[%p] mem_size[%zu]", + runtime_param_.graph_id, mem_base_, data_size); if (!is_inner_weight_base_) { weights_mem_base_ = mem_base_; @@ -713,7 +712,7 @@ Status DavinciModel::Init(void *dev_ptr, size_t mem_size, void *weight_ptr, size // collect profiling for ge auto &profiling_manager = ProfilingManager::Instance(); if (profiling_manager.ProfilingModelLoadOn()) { - Status p_ret = ReportProfilingData(!profiling_manager.IsAclApiMode()); + Status p_ret = ReportProfilingData(); if (p_ret != SUCCESS) { GELOGE(p_ret, "Report profiling data failed."); return p_ret; @@ -724,14 +723,14 @@ Status DavinciModel::Init(void *dev_ptr, size_t mem_size, void *weight_ptr, size return ret; } -Status DavinciModel::ReportProfilingData(bool check_device) { +Status DavinciModel::ReportProfilingData() { std::vector compute_graph_desc_info; Status ret = GetComputeGraphInfo(compute_graph_desc_info); if (ret != SUCCESS) { GELOGE(ret, "GetComputeGraphInfo failed."); return ret; } - ProfilingManager::Instance().ReportProfilingData(model_id_, GetTaskDescInfo(), compute_graph_desc_info, check_device); + ProfilingManager::Instance().ReportProfilingData(model_id_, GetTaskDescInfo(), compute_graph_desc_info); GE_CHK_STATUS(SinkModelProfile(), "Sink model profiler failed."); op_list_.clear(); @@ -1544,7 +1543,8 @@ Status DavinciModel::LoadWithQueue() { } if (output_queue_ids_.size() != new_output_data_info_.size()) { - GELOGE(ACL_ERROR_GE_EXEC_MODEL_QUEUE_ID_INVALID, "Output queue ids not match model: output_queue=%zu output_data=%zu", + GELOGE(ACL_ERROR_GE_EXEC_MODEL_QUEUE_ID_INVALID, + "Output queue ids not match model: output_queue=%zu output_data=%zu", output_queue_ids_.size(), new_output_data_info_.size()); return ACL_ERROR_GE_EXEC_MODEL_QUEUE_ID_INVALID; } @@ -2186,8 +2186,9 @@ Status DavinciModel::CopyInputData(const InputData &input_data, bool device_data const std::vector &blobs = input_data.blobs; for (const auto &data : new_input_data_info_) { if (data.first >= blobs.size()) { - GELOGE(FAILED, "Blobs not match: blobs=%zu, tensor=%zu, index=%u, size=%ld", blobs.size(), - new_input_data_info_.size(), data.first, data.second.GetDataInfo().at(0).first); + GELOGE(FAILED, "Blobs not match: blobs=%zu, tensor=%zu, index=%u, size=%ld, op_name(%s)", blobs.size(), + new_input_data_info_.size(), data.first, data.second.GetDataInfo().at(0).first, + data.second.GetOpName().c_str()); return FAILED; } @@ -2198,13 +2199,14 @@ Status DavinciModel::CopyInputData(const InputData &input_data, bool device_data } uint64_t data_size = data.second.GetDataSize(); GE_CHK_BOOL_RET_STATUS(data_size >= data_buf.length, PARAM_INVALID, - "input data size(%lu) does not match model required size(%lu), ret failed.", data_buf.length, - data_size); + "input data size(%lu) does not match model required size(%lu), op_name(%s) ret failed.", + data_buf.length, data_size, data.second.GetOpName().c_str()); void *mem_addr = data.second.GetBasicAddr(); void *data_buf_addr = reinterpret_cast(reinterpret_cast(data_buf.data)); uint64_t data_buf_length = data_buf.length; - GELOGI("[IMAS]CopyPlainData memcpy graph_%u type[F] input[%u] dst[%p] src[%p] mem_size[%lu] datasize[%lu]", - runtime_param_.graph_id, data.first, mem_addr, data_buf_addr, data_size, data_buf_length); + GELOGI("CopyPlainData memcpy graph_%u type[F] input[%s] rank[%u] dst[%p] src[%p] mem_size[%lu] datasize[%lu]", + runtime_param_.graph_id, data.second.GetOpName().c_str(), data.first, mem_addr, data_buf_addr, data_size, + data_buf_length); GE_CHK_RT_RET(rtMemcpy(mem_addr, data_size, data_buf_addr, data_buf_length, kind)); } @@ -2248,10 +2250,8 @@ inline int64_t SumSize(const vector &size_list) { Status DavinciModel::SinkModelProfile() { // profiling plugin must be registered - Msprof::Engine::Reporter *reporter = PluginImpl::GetPluginReporter(); - GE_IF_BOOL_EXEC(reporter == nullptr, GELOGI("Profiling report is nullptr!"); return SUCCESS); - - Msprof::Engine::ReporterData reporter_data{}; + auto &prof_mgr = ProfilingManager::Instance(); + ReporterData reporter_data{}; // report model data tag name std::string tag_name; tag_name.append("model_load_info_").append(std::to_string(this->Id())); @@ -2269,32 +2269,32 @@ Status DavinciModel::SinkModelProfile() { reporter_data.deviceId = device_id_; reporter_data.data = (unsigned char *)&name_len; reporter_data.dataLen = sizeof(int32_t); - GE_CHK_BOOL_EXEC(reporter->Report(&reporter_data) == SUCCESS, return FAILED, "Reporter data fail, model id:%u.", - this->Id()); + GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, + "Reporter data fail, model id:%u.", this->Id()); reporter_data.data = (unsigned char *)name.c_str(); reporter_data.dataLen = name.size(); - GE_CHK_BOOL_EXEC(reporter->Report(&reporter_data) == SUCCESS, return FAILED, "Reporter data fail, model id:%u.", - this->Id()); + GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, + "Reporter data fail, model id:%u.", this->Id()); uint32_t model_id = this->Id(); reporter_data.data = (unsigned char *)&model_id; reporter_data.dataLen = sizeof(uint32_t); - GE_CHK_BOOL_EXEC(reporter->Report(&reporter_data) == SUCCESS, return FAILED, "Reporter data fail, model id:%u.", - this->Id()); + GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, + "Reporter data fail, model id:%u.", this->Id()); // Load Start/End Time int64_t start_time = this->GetLoadBeginTime(); reporter_data.data = (unsigned char *)&start_time; reporter_data.dataLen = sizeof(int64_t); - GE_CHK_BOOL_EXEC(reporter->Report(&reporter_data) == SUCCESS, return FAILED, "Reporter data fail, model id:%u.", - this->Id()); + GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, + "Reporter data fail, model id:%u.", this->Id()); int64_t end_time = this->GetLoadEndTime(); reporter_data.data = (unsigned char *)&end_time; reporter_data.dataLen = sizeof(int64_t); - GE_CHK_BOOL_EXEC(reporter->Report(&reporter_data) == SUCCESS, return FAILED, "Reporter data fail, model id:%u.", - this->Id()); + GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, + "Reporter data fail, model id:%u.", this->Id()); int32_t task_num = task_list_.size(); std::multimap op_id_map; @@ -2308,6 +2308,7 @@ Status DavinciModel::SinkModelProfile() { uint32_t op_num = fusion_op_info->original_op_names.size(); uint32_t task_id = task->GetTaskID(); if (op_num > 0) { + GELOGI("task.id = %u, opNum = %u", task_id, op_num); op_id_map.insert(std::make_pair(fusion_op_info->op_index, task_id)); } } @@ -2350,39 +2351,39 @@ Status DavinciModel::SinkModelProfile() { int32_t fusion_op_name_len = fusion_op_name.size() == 0 ? 1 : fusion_op_name.size(); reporter_data.data = (unsigned char *)&fusion_op_name_len; reporter_data.dataLen = sizeof(int32_t); - GE_CHK_BOOL_EXEC(reporter->Report(&reporter_data) == SUCCESS, return FAILED, "Reporter data fail, model id:%u.", - this->Id()); + GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, + "Reporter data fail, model id:%u.", this->Id()); reporter_data.data = (unsigned char *)fusion_op_name.c_str(); reporter_data.dataLen = fusion_op_name_len; - GE_CHK_BOOL_EXEC(reporter->Report(&reporter_data) == SUCCESS, return FAILED, "Reporter data fail, model id:%u.", - this->Id()); + GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, + "Reporter data fail, model id:%u.", this->Id()); // original op name before fusion reporter_data.data = (unsigned char *)&op_num; reporter_data.dataLen = sizeof(int32_t); - GE_CHK_BOOL_EXEC(reporter->Report(&reporter_data) == SUCCESS, return FAILED, "Reporter data fail, model id:%u.", - this->Id()); + GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, + "Reporter data fail, model id:%u.", this->Id()); for (uint32_t k = 0; k < op_num; k++) { std::string op_name = fusion_op_info->original_op_names[k]; int32_t op_name_len = op_name.size() == 0 ? 1 : op_name.size(); reporter_data.data = (unsigned char *)&op_name_len; reporter_data.dataLen = sizeof(int32_t); - GE_CHK_BOOL_EXEC(reporter->Report(&reporter_data) == SUCCESS, return FAILED, "Reporter data fail, model id:%u.", - this->Id()); + GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, + "Reporter data fail, model id:%u.", this->Id()); reporter_data.data = (unsigned char *)op_name.c_str(); reporter_data.dataLen = op_name_len; - GE_CHK_BOOL_EXEC(reporter->Report(&reporter_data) == SUCCESS, return FAILED, "Reporter data fail, model id:%u.", - this->Id()); + GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, + "Reporter data fail, model id:%u.", this->Id()); } // stream id info uint32_t streamId = task->GetStreamId(); reporter_data.data = (unsigned char *)&streamId; reporter_data.dataLen = sizeof(int32_t); - GE_CHK_BOOL_EXEC(reporter->Report(&reporter_data) == SUCCESS, return FAILED, "Reporter data fail, model id:%u.", - this->Id()); + GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, + "Reporter data fail, model id:%u.", this->Id()); // memory info struct memoryInfo memory_info; @@ -2398,22 +2399,22 @@ Status DavinciModel::SinkModelProfile() { memory_info.weight_size + memory_info.input_size + memory_info.output_size + memory_info.workspace_size; reporter_data.data = (unsigned char *)&memory_info; reporter_data.dataLen = sizeof(struct memoryInfo); - GE_CHK_BOOL_EXEC(reporter->Report(&reporter_data) == SUCCESS, return FAILED, "Reporter data fail, model id:%u.", - this->Id()); + GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, + "Reporter data fail, model id:%u.", this->Id()); // task info reporter_data.data = (unsigned char *)&task_count; reporter_data.dataLen = sizeof(uint32_t); - GE_CHK_BOOL_EXEC(reporter->Report(&reporter_data) == SUCCESS, return FAILED, "Reporter data fail, model id:%u.", - this->Id()); + GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, + "Reporter data fail, model id:%u.", this->Id()); Range task_range = op_id_map.equal_range(op_id); for (CIT idx = task_range.first; idx != task_range.second; ++idx) { uint32_t task_id = idx->second; reporter_data.data = (unsigned char *)&task_id; reporter_data.dataLen = sizeof(uint32_t); - GE_CHK_BOOL_EXEC(reporter->Report(&reporter_data) == SUCCESS, return FAILED, "Reporter data fail, model id:%u.", - this->Id()); + GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, + "Reporter data fail, model id:%u.", this->Id()); } } } @@ -2422,10 +2423,8 @@ Status DavinciModel::SinkModelProfile() { Status DavinciModel::SinkTimeProfile(const InputData ¤t_data) { // profiling plugin must be registered - Msprof::Engine::Reporter *reporter = PluginImpl::GetPluginReporter(); - GE_IF_BOOL_EXEC(reporter == nullptr, GELOGI("Profiling report is nullptr!"); return SUCCESS); - - Msprof::Engine::ReporterData reporter_data{}; + auto &prof_mgr = ProfilingManager::Instance(); + ReporterData reporter_data{}; // report model data tag name std::string tag_name; tag_name.append("model_time_info_") @@ -2448,33 +2447,33 @@ Status DavinciModel::SinkTimeProfile(const InputData ¤t_data) { size_t name_len = name.size(); reporter_data.data = (unsigned char *)&name_len; reporter_data.dataLen = sizeof(int32_t); - GE_CHK_BOOL_EXEC(reporter->Report(&reporter_data) == SUCCESS, return FAILED, "Reporter data fail, model id:%u.", - this->Id()); + GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, + "Reporter data fail, model id:%u.", this->Id()); reporter_data.data = (unsigned char *)name.c_str(); reporter_data.dataLen = name.size(); - GE_CHK_BOOL_EXEC(reporter->Report(&reporter_data) == SUCCESS, return FAILED, "Reporter data fail, model id:%u.", - this->Id()); + GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, + "Reporter data fail, model id:%u.", this->Id()); // request id uint64_t request_id = current_data.request_id; reporter_data.data = (unsigned char *)&request_id; reporter_data.dataLen = sizeof(uint32_t); - GE_CHK_BOOL_EXEC(reporter->Report(&reporter_data) == SUCCESS, return FAILED, + GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, "Reporter data fail, model id:%u, data index:%u.", this->Id(), current_data.index); // thread id int32_t thread_id = GetDataInputTid(); reporter_data.data = (unsigned char *)&thread_id; reporter_data.dataLen = sizeof(int32_t); - GE_CHK_BOOL_EXEC(reporter->Report(&reporter_data) == SUCCESS, return FAILED, + GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, "Reporter data fail, model id:%u, data index:%u.", this->Id(), current_data.index); // time info time_info_.modelId = this->Id(); reporter_data.data = (unsigned char *)&time_info_; reporter_data.dataLen = sizeof(struct timeInfo); - GE_CHK_BOOL_EXEC(reporter->Report(&reporter_data) == SUCCESS, return FAILED, + GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, "Reporter data fail, model id:%u, data index:%u.", this->Id(), current_data.index); return SUCCESS; @@ -2696,8 +2695,9 @@ Status DavinciModel::ReturnResult(uint32_t data_id, const bool rslt_flg, const b is_getnext_sink_dynamic_ = true; cur_dynamic_dims_.clear(); cur_dynamic_dims_.resize(shape_of_cur_dynamic_dims_); - GE_CHK_RT_RET(rtMemcpy(cur_dynamic_dims_.data(), shape_of_cur_dynamic_dims_ * sizeof(int64_t), - netoutput_last_input_addr_, netoutput_last_input_size_, RT_MEMCPY_DEVICE_TO_HOST)); + auto ret = rtMemcpy(cur_dynamic_dims_.data(), shape_of_cur_dynamic_dims_ * sizeof(int64_t), + netoutput_last_input_addr_, netoutput_last_input_size_, RT_MEMCPY_DEVICE_TO_HOST); + GE_CHK_RT_RET(ret); } GELOGD("Cur dynamic dims is %s.", formats::JoinToString(cur_dynamic_dims_).c_str()); if (GenOutputTensorInfo(op_desc, data_index, output_data, outputs) != SUCCESS) { @@ -2801,76 +2801,42 @@ void *DavinciModel::Run(DavinciModel *model) { reinterpret_cast(shape_data_buffer_data) + shape_data_buffer_length / sizeof(int64_t)); GELOGD("Data: cur dynamic dims is %s", formats::JoinToString(model->cur_dynamic_dims_).c_str()); - delete[] (int64_t *)current_data.blobs.back().data; + delete[] reinterpret_cast(current_data.blobs.back().data); current_data.blobs.pop_back(); } GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), model->SetProfileTime(MODEL_PRE_PROC_END)); GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), model->SetProfileTime(MODEL_INFER_START)); - if (ProfilingManager::Instance().ProfilingOpTraceOn()) { - GELOGI("GetOpTraceIterNum:%d", ProfilingManager::Instance().GetOpTraceIterNum()); - for (int32_t i = 0; i < ProfilingManager::Instance().GetOpTraceIterNum(); i++) { - if (!ProfilingManager::Instance().ProfilingLoadFlag()) { - vector prof_device_id_vec = ProfilingManager::Instance().GetProfilingDeviceId(); - for (size_t j = 0; j < prof_device_id_vec.size(); ++j) { - // just profiling, no need to check value - (void)ProfilingManager::Instance().StartProfiling(i, prof_device_id_vec[j]); - } - } - - GELOGI("rtModelExecute start."); - rt_ret = rtModelExecute(model->rt_model_handle_, model->rt_model_stream_, 0); - GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, rslt_flg = false; - (void)model->ReturnResult(current_data.index, false, false, data_wrapper->GetOutput()); - continue); // [No need to check value] - GELOGI("rtModelExecute end"); - - GELOGI("rtStreamSynchronize start."); - rt_ret = rtStreamSynchronize(model->rt_model_stream_); - if (rt_ret == kModelAbortNormal || rt_ret == kModelAbortNormalNew) { - GELOGI("The model with multiple datasets aborts normally."); - } else { - GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, rslt_flg = false; - (void)model->ReturnResult(current_data.index, false, seq_end_flag, data_wrapper->GetOutput()); - continue); // [No need to check value] - } - - GELOGI("rtStreamSynchronize end."); - (void)ProfilingManager::Instance().StopProfiling(); // just profiling, no need to check value - } + GE_TIMESTAMP_START(rtModelExecute); + GELOGI("rtModelExecute start."); + rt_ret = rtModelExecute(model->rt_model_handle_, model->rt_model_stream_, 0); + GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, rslt_flg = false; + (void)model->ReturnResult(current_data.index, false, false, data_wrapper->GetOutput()); + CsaInteract::GetInstance().WriteErrorCode(rt_ret, ERROR_MODULE_RUNTIME, JOBSUBSTATE_GRAPH_EXEC); + continue); + GELOGI("rtModelExecute end"); + GE_IF_BOOL_EXEC(model->is_first_execute_, GE_TIMESTAMP_EVENT_END(rtModelExecute, "GraphExcute::rtModelExecute")); + + GE_TIMESTAMP_START(rtStreamSynchronize); + GELOGI("rtStreamSynchronize start."); + rt_ret = rtStreamSynchronize(model->rt_model_stream_); + if (rt_ret == kEndOfSequence || rt_ret == kEndOfSequenceNew) { + seq_end_flag = true; + } + if (rt_ret == kModelAbortNormal || rt_ret == kModelAbortNormalNew) { + GELOGI("The model with multiple datasets aborts normally."); } else { - GE_TIMESTAMP_START(rtModelExecute); - GELOGI("rtModelExecute start."); - rt_ret = rtModelExecute(model->rt_model_handle_, model->rt_model_stream_, 0); - GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, rslt_flg = false; - (void)model->ReturnResult(current_data.index, false, false, data_wrapper->GetOutput()); - CsaInteract::GetInstance().WriteErrorCode(rt_ret, ERROR_MODULE_RUNTIME, JOBSUBSTATE_GRAPH_EXEC); - continue); - GELOGI("rtModelExecute end"); - GE_IF_BOOL_EXEC(model->is_first_execute_, GE_TIMESTAMP_EVENT_END(rtModelExecute, "GraphExcute::rtModelExecute")); - - GE_TIMESTAMP_START(rtStreamSynchronize); - GELOGI("rtStreamSynchronize start."); - rt_ret = rtStreamSynchronize(model->rt_model_stream_); - if (rt_ret == kEndOfSequence || rt_ret == kEndOfSequenceNew) { - seq_end_flag = true; - } - if (rt_ret == kModelAbortNormal || rt_ret == kModelAbortNormalNew) { - GELOGI("The model with multiple datasets aborts normally."); - } else { - GE_IF_BOOL_EXEC( - rt_ret != RT_ERROR_NONE, rslt_flg = false; GELOGI("seq_end_flg: %d", seq_end_flag); - (void)model->ReturnResult(current_data.index, false, seq_end_flag, - data_wrapper->GetOutput()); // [No need to check value] - CsaInteract::GetInstance().StoreInternalErrorCode(rt_ret, ERROR_MODULE_RUNTIME, JOBSUBSTATE_GRAPH_EXEC); - continue); - } - - GELOGI("rtStreamSynchronize end."); - GE_IF_BOOL_EXEC(model->is_first_execute_, - GE_TIMESTAMP_EVENT_END(rtStreamSynchronize, "GraphExcute::Wait for rtStreamSynchronize")); - GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), model->SetProfileTime(MODEL_INFER_END)); + GE_IF_BOOL_EXEC( + rt_ret != RT_ERROR_NONE, rslt_flg = false; GELOGI("seq_end_flg: %d", seq_end_flag); + (void)model->ReturnResult(current_data.index, false, seq_end_flag, + data_wrapper->GetOutput()); // [No need to check value] + CsaInteract::GetInstance().StoreInternalErrorCode(rt_ret, ERROR_MODULE_RUNTIME, JOBSUBSTATE_GRAPH_EXEC); + continue); } + GELOGI("rtStreamSynchronize end."); + GE_IF_BOOL_EXEC(model->is_first_execute_, + GE_TIMESTAMP_EVENT_END(rtStreamSynchronize, "GraphExcute::Wait for rtStreamSynchronize")); + GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), model->SetProfileTime(MODEL_INFER_END)); GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), model->SetProfileTime(MODEL_AFTER_PROC_START)); GE_TIMESTAMP_START(ReturnResult3); @@ -3170,21 +3136,29 @@ Status DavinciModel::DistributeTask() { const auto &model_task_def = ge_model_->GetModelTaskDefPtr(); for (size_t task_index = 0; task_index < task_list_.size(); ++task_index) { + auto &task_def = model_task_def->task(task_index); auto &task = task_list_.at(task_index); GE_CHK_STATUS_RET(task->Distribute(), "Task[%zu] distribute fail", task_index); // for data dump - auto op_index = std::max(model_task_def->task(task_index).kernel().context().op_index(), - model_task_def->task(task_index).kernel_ex().op_index()); + auto op_index = std::max(task_def.kernel().context().op_index(), + task_def.kernel_ex().op_index()); OpDescPtr op = GetOpByIndex(op_index); GE_CHECK_NOTNULL(op); - SaveDumpOpInfo(runtime_param_, op, task->GetTaskID(), task->GetStreamId()); if (reinterpret_cast(task->GetDumpArgs()) != nullptr) { bool call_dump = GetDumpProperties().IsLayerNeedDump(name_, om_name_, op->GetName()) && task->CallSaveDumpInfo(); if (call_dump || is_op_debug_reg_) { SaveDumpTask(task->GetTaskID(), task->GetStreamId(), op, task->GetDumpArgs()); } } + + auto task_type = static_cast(task_def.type()); + bool no_need_profiling = (task_type != RT_MODEL_TASK_KERNEL) + && (task_type != RT_MODEL_TASK_KERNEL_EX) + && (task_type != RT_MODEL_TASK_HCCL); + GE_IF_BOOL_EXEC(no_need_profiling, continue); + + SaveDumpOpInfo(runtime_param_, op, task->GetTaskID(), task->GetStreamId()); // Load task info for profiling TaskDescInfo task_desc_info; if (!om_name_.empty()) { @@ -3193,7 +3167,7 @@ Status DavinciModel::DistributeTask() { task_desc_info.model_name = name_; } task_desc_info.op_name = op->GetName(); - task_desc_info.block_dim = model_task_def->task(task_index).kernel().block_dim(); + task_desc_info.block_dim = task_def.kernel().block_dim(); task_desc_info.task_id = task->GetTaskID(); task_desc_info.stream_id = task->GetStreamId(); task_desc_info_.emplace_back(task_desc_info); @@ -3391,14 +3365,14 @@ bool DavinciModel::CheckInputAndModelSize(const int64_t &input_size, const int64 /// Status DavinciModel::CopyModelData(const InputData &input_data, OutputData &output_data, bool is_dynamic) { if (UpdateIoTaskArgs(new_input_data_info_, true, input_data.blobs, is_dynamic, input_data.batch_label) != SUCCESS) { - GELOGE(PARAM_INVALID, "[ZCPY] Update input data to model failed."); - return PARAM_INVALID; + GELOGE(ACL_ERROR_GE_PARAM_INVALID, "[ZCPY] Update input data to model failed."); + return ACL_ERROR_GE_PARAM_INVALID; } if (UpdateIoTaskArgs(new_output_data_info_, false, output_data.blobs, is_dynamic, input_data.batch_label) != SUCCESS) { - GELOGE(PARAM_INVALID, "[ZCPY] Update output data to model failed."); - return PARAM_INVALID; + GELOGE(ACL_ERROR_GE_PARAM_INVALID, "[ZCPY] Update output data to model failed."); + return ACL_ERROR_GE_PARAM_INVALID; } for (ZeroCopyTask &task : zero_copy_tasks_) { @@ -3444,7 +3418,7 @@ Status DavinciModel::UpdateIoTaskArgs(const std::map & } if (!CheckInputAndModelSize(buffer.length, data.second.GetDataSize(), is_dynamic)) { - GELOGE(FAILED, "Check input size and model size failed"); + GELOGE(FAILED, "Check input size and model size failed, op[%s]", data.second.GetOpName().c_str()); return FAILED; } @@ -3861,7 +3835,8 @@ Status DavinciModel::NnExecute(rtStream_t stream, bool async_mode, const InputDa if (!is_async_mode_) { GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), SetProfileTime(MODEL_AFTER_PROC_START)); ret = CopyOutputData(input_data.index, output_data, RT_MEMCPY_DEVICE_TO_DEVICE); - GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, return ret, "Copy Output data to user failed."); + GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, return ACL_ERROR_GE_INTERNAL_ERROR, + "Copy Output data to user failed."); GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), SetProfileTime(MODEL_AFTER_PROC_END)); } @@ -4061,7 +4036,7 @@ void DavinciModel::SetDataDumperArgs(const ComputeGraphPtr &compute_graph) { data_dumper_.SetDeviceId(device_id); // set loop count addr - auto get_var_addr = [](const OpDescPtr &op, const RuntimeParam &runtime_param) -> void * { + auto get_var_addr = [](const OpDescPtr &op, const RuntimeParam &runtime_param) -> void *{ if (op != nullptr) { auto v_output_size = ModelUtils::GetOutputSize(op); auto v_output_addr = ModelUtils::GetOutputDataAddrs(runtime_param, op); diff --git a/ge/graph/load/new_model_manager/davinci_model.h b/ge/graph/load/new_model_manager/davinci_model.h index 650f19eb..19888e1f 100755 --- a/ge/graph/load/new_model_manager/davinci_model.h +++ b/ge/graph/load/new_model_manager/davinci_model.h @@ -440,7 +440,7 @@ class DavinciModel { Status SinkTimeProfile(const InputData ¤t_data); - Status ReportProfilingData(bool check_device = true); + Status ReportProfilingData(); void SaveDumpOpInfo(const RuntimeParam &model_param, const OpDescPtr &op, uint32_t task_id, uint32_t stream_id) { data_dumper_.SaveDumpOpInfo(model_param, op, task_id, stream_id); diff --git a/ge/graph/load/new_model_manager/model_manager.cc b/ge/graph/load/new_model_manager/model_manager.cc index d6cdf42d..b595ac39 100755 --- a/ge/graph/load/new_model_manager/model_manager.cc +++ b/ge/graph/load/new_model_manager/model_manager.cc @@ -40,9 +40,7 @@ const int kCmdParSize = 2; const int kDumpCmdPairSize = 2; const std::size_t kProfCmdParaMaxSize = 1000; const std::size_t kProfStartCmdParaSize = 2; -const std::string kCmdTypeProfile = "profile"; const std::string kCmdTypeDump = "dump"; -const std::string kCmdTypeProfiling = "profiling"; const std::string kCmdTypeProfInit = "prof_init"; const std::string kCmdTypeProfFinalize = "prof_finalize"; const std::string kCmdTypeProfStart = "prof_start"; @@ -51,6 +49,9 @@ const std::string kCmdTypeProfModelSubscribe = "prof_model_subscribe"; const std::string kCmdTypeProfModelUnsubscribe = "prof_model_cancel_subscribe"; const char *const kBatchLoadBuf = "batchLoadsoFrombuf"; const char *const kDeleteCustOp = "deleteCustOp"; +const int kTimeSpecNano = 1000000000; +const int kTimeSpecMiro = 1000000; +const int kSessionMaxBias = 100; struct CustAicpuSoBuf { uint64_t kernelSoBuf; uint32_t kernelSoBufLen; @@ -224,7 +225,7 @@ ge::Status ModelManager::DestroyAicpuSessionForInfer(uint32_t model_id) { ge::Status ModelManager::DestroyAicpuKernel(uint64_t session_id, uint32_t model_id) { GELOGD("destroy aicpu kernel in session_id %lu, model_id %u.", session_id, model_id); - std::lock_guard lock(sess_ids_mutex_); + std::lock_guard lock(map_mutex_); std::string model_key = std::to_string(session_id) + "_" + std::to_string(model_id); if (model_aicpu_kernel_.find(model_key) != model_aicpu_kernel_.end()) { Status ret = KernelLaunchEx(aicpu::FWKAdapter::FWKOperateType::FWK_ADPT_KERNEL_DESTROY, session_id, model_id); @@ -237,7 +238,7 @@ ge::Status ModelManager::DestroyAicpuKernel(uint64_t session_id, uint32_t model_ } ge::Status ModelManager::CreateAicpuKernel(uint64_t session_id, uint32_t model_id, uint64_t kernel_id) { - std::lock_guard lock(sess_ids_mutex_); + std::lock_guard lock(map_mutex_); std::vector v_aicpu_kernel; std::string model_key = std::to_string(session_id) + "_" + std::to_string(model_id); if (model_aicpu_kernel_.find(model_key) != model_aicpu_kernel_.end()) { @@ -345,7 +346,7 @@ Status ModelManager::LoadModelOnline(uint32_t &model_id, const shared_ptrSetProfileTime(MODEL_LOAD_START, (timespec.tv_sec * 1000 * 1000 * 1000 + + davinci_model->SetProfileTime(MODEL_LOAD_START, (timespec.tv_sec * kTimeSpecNano + timespec.tv_nsec)); // 1000 ^ 3 converts second to nanosecond davinci_model->SetProfileTime(MODEL_LOAD_END); } while (0); @@ -629,8 +630,7 @@ Status ModelManager::Stop(uint32_t model_id) { /// Status ModelManager::HandleCommand(const Command &command) { static const std::map> cmds = { - {kCmdTypeProfile, HandleProfileCommand}, {kCmdTypeDump, HandleDumpCommand}, - {kCmdTypeProfiling, HandleAclProfilingCommand}, {kCmdTypeProfInit, HandleProfInitCommand}, + {kCmdTypeDump, HandleDumpCommand}, {kCmdTypeProfInit, HandleProfInitCommand}, {kCmdTypeProfFinalize, HandleProfFinalizeCommand}, {kCmdTypeProfStart, HandleProfStartCommand}, {kCmdTypeProfStop, HandleProfStopCommand}, {kCmdTypeProfModelSubscribe, HandleProfModelSubscribeCommand}, @@ -645,21 +645,6 @@ Status ModelManager::HandleCommand(const Command &command) { } } -Status ModelManager::HandleAclProfilingCommand(const Command &command) { - if (command.cmd_params.size() < kCmdParSize) { - GELOGE(PARAM_INVALID, "When the cmd_type is 'profiling', the size of cmd_params must larger than 2."); - return PARAM_INVALID; - } - - std::string map_key = command.cmd_params[0]; - std::string value = command.cmd_params[1]; - if (map_key == PROFILE_CONFIG) { - ProfilingManager::Instance().SetProfilingConfig(value); - } - - return SUCCESS; -} - Status ModelManager::GetModelByCmd(const Command &command, std::shared_ptr &davinci_model) { if (command.cmd_params.size() < kCmdParSize) { @@ -806,29 +791,6 @@ Status ModelManager::HandleProfStopCommand(const Command &command) { return SUCCESS; } -Status ModelManager::HandleProfileCommand(const Command &command) { - if (command.cmd_params.size() < kCmdParSize) { - GELOGE(PARAM_INVALID, "When the cmd_type is 'profile', the size of cmd_params must larger than 2."); - return PARAM_INVALID; - } - - std::string map_key = command.cmd_params[0]; - std::string value = command.cmd_params[1]; - - GELOGI("Profiling mode, Command key:%s , value:%s ", map_key.c_str(), value.c_str()); - - auto iter = PROFILE_COMPONENT_MAP.find(map_key); - if (iter != PROFILE_COMPONENT_MAP.end()) { - std::string property_value = (value == "on") ? "1" : "0"; - PropertiesManager::Instance().SetPropertyValue(iter->second, property_value); - } - - if ((map_key == PROFILER_JOBCTX || map_key == PROFILER_TARGET_PATH || map_key == RTS_PROFILE_PATH)) { - PropertiesManager::Instance().SetPropertyValue(map_key, value); - } - return SUCCESS; -} - static Status ParserPara(const Command &command, const string &dump_key, string &dump_value) { auto iter = std::find(command.cmd_params.begin(), command.cmd_params.end(), dump_key); if (iter != command.cmd_params.end()) { @@ -1072,12 +1034,12 @@ Status ModelManager::GenSessionId(uint64_t &session_id) { GELOGE(INTERNAL_ERROR, "Failed to get current time."); return INTERNAL_ERROR; } - session_id = static_cast(tv.tv_sec * 1000000 + tv.tv_usec); // 1000000us + session_id = static_cast(tv.tv_sec * kTimeSpecMiro + tv.tv_usec); // 1000000us session_id_bias_++; // max bais 100. - session_id_bias_ = session_id_bias_ % 100; - session_id = session_id * 100 + session_id_bias_; + session_id_bias_ = session_id_bias_ % kSessionMaxBias; + session_id = session_id * kSessionMaxBias + session_id_bias_; GELOGD("Generate new session id: %lu.", session_id); return SUCCESS; @@ -1086,8 +1048,7 @@ Status ModelManager::GenSessionId(uint64_t &session_id) { Status ModelManager::LoadModelOffline(uint32_t &model_id, const ModelData &model, shared_ptr listener, void *dev_ptr, size_t mem_size, void *weight_ptr, size_t weight_size) { GE_CHK_BOOL_RET_STATUS(model.key.empty() || mmAccess2(model.key.c_str(), M_F_OK) == EN_OK, - ACL_ERROR_GE_PARAM_INVALID, - "input key file path %s is invalid, %s", model.key.c_str(), strerror(errno)); + ACL_ERROR_GE_PARAM_INVALID, "input key file path %s is invalid, %s", model.key.c_str(), strerror(errno)); GenModelId(&model_id); shared_ptr davinci_model = nullptr; @@ -1148,7 +1109,7 @@ Status ModelManager::LoadModelOffline(uint32_t &model_id, const ModelData &model GELOGI("Parse model %u success.", model_id); - davinci_model->SetProfileTime(MODEL_LOAD_START, (timespec.tv_sec * 1000 * 1000 * 1000 + + davinci_model->SetProfileTime(MODEL_LOAD_START, (timespec.tv_sec * kTimeSpecNano + timespec.tv_nsec)); // 1000 ^ 3 converts second to nanosecond davinci_model->SetProfileTime(MODEL_LOAD_END); @@ -1252,7 +1213,8 @@ Status ModelManager::ExecuteModel(uint32_t model_id, rtStream_t stream, bool asy } std::shared_ptr davinci_model = GetModel(model_id); - GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, PARAM_INVALID, "Invalid model id %u.", model_id); + GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, ACL_ERROR_GE_EXEC_MODEL_ID_INVALID, + "Invalid model id %u, check weather model has been loaded or not.", model_id); if (davinci_model->NeedDestroyAicpuKernel()) { GELOGI("Start to destroy specified aicpu kernel."); @@ -1289,8 +1251,8 @@ Status ModelManager::CreateAicpuSession(uint64_t session_id) { return SUCCESS; } -Status ModelManager::LoadCustAicpuSo(const OpDescPtr &op_desc, const string &so_name) { - GELOGI("LoadCustAicpuSo in, op name %s, so name %s", op_desc->GetName().c_str(), so_name.c_str()); +Status ModelManager::LoadCustAicpuSo(const OpDescPtr &op_desc, const string &so_name, bool &loaded) { + GELOGD("LoadCustAicpuSo in, op name %s, so name %s", op_desc->GetName().c_str(), so_name.c_str()); std::lock_guard lock(cust_aicpu_mutex_); CustAICPUKernelPtr aicpu_kernel = op_desc->TryGetExtAttr(OP_EXTATTR_CUSTAICPU_KERNEL, CustAICPUKernelPtr()); if (aicpu_kernel == nullptr) { @@ -1313,18 +1275,24 @@ Status ModelManager::LoadCustAicpuSo(const OpDescPtr &op_desc, const string &so_ std::map new_so_name; new_so_name.insert({so_name, aicpu_kernel}); cust_aicpu_so_[resource_id] = new_so_name; - GELOGI("LoadCustAicpuSo new aicpu so resource id %lu", resource_id); + loaded = false; + GELOGD("LoadCustAicpuSo new aicpu so name %s, resource id %lu", so_name.c_str(), resource_id); return SUCCESS; } auto it_so_name = it->second.find(so_name); if (it_so_name == it->second.end()) { it->second.insert({so_name, aicpu_kernel}); - GELOGI("LoadCustAicpuSo add aicpu so resource id %lu", resource_id); + loaded = false; + GELOGD("LoadCustAicpuSo add aicpu so name %s, resource id %lu", so_name.c_str(), resource_id); + return SUCCESS; } + loaded = true; + GELOGD("LoadCustAicpuSo so name %s has been loaded.", so_name.c_str()); return SUCCESS; } Status ModelManager::LaunchKernelCustAicpuSo(const string &kernel_name) { + GELOGD("Aicpu kernel launch task in, kernel name %s.", kernel_name.c_str()); std::lock_guard lock(cust_aicpu_mutex_); if (cust_aicpu_so_.size() == 0) return SUCCESS; // get current context diff --git a/ge/graph/load/new_model_manager/model_manager.h b/ge/graph/load/new_model_manager/model_manager.h index e3780d5b..fc98d9c2 100755 --- a/ge/graph/load/new_model_manager/model_manager.h +++ b/ge/graph/load/new_model_manager/model_manager.h @@ -169,8 +169,6 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelManager { /// @brief comment handle function /// ge::Status HandleCommand(const Command &command); - static ge::Status HandleAclProfilingCommand(const Command &command); - static ge::Status HandleProfileCommand(const Command &command); static ge::Status HandleDumpCommand(const Command &command); static ge::Status HandleProfModelSubscribeCommand(const Command &command); static ge::Status HandleProfModelUnsubscribeCommand(const Command &command); @@ -289,7 +287,7 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelManager { ge::Status DestroyAicpuSessionForInfer(uint32_t model_id); - ge::Status LoadCustAicpuSo(const OpDescPtr &op_desc, const string &so_name); + ge::Status LoadCustAicpuSo(const OpDescPtr &op_desc, const string &so_name, bool &loaded); ge::Status LaunchCustAicpuSo(); diff --git a/ge/graph/load/new_model_manager/model_utils.cc b/ge/graph/load/new_model_manager/model_utils.cc index 34fb7ff3..22a657ad 100755 --- a/ge/graph/load/new_model_manager/model_utils.cc +++ b/ge/graph/load/new_model_manager/model_utils.cc @@ -61,7 +61,7 @@ vector ModelUtils::GetInputSize(ConstOpDescPtr op_desc) { GELOGI("Get size from TensorDesc failed, op : %s, input index : %zu", op_desc->GetName().c_str(), i); continue); - GELOGI("[IMAS]GetInputSize op: %s, index: %zu, size:%ld", op_desc->GetName().c_str(), i, tensor_size); + GELOGI("GetInputSize op: %s, index: %zu, size:%ld", op_desc->GetName().c_str(), i, tensor_size); v_input_size.push_back(tensor_size); } @@ -96,7 +96,7 @@ vector ModelUtils::GetOutputSize(ConstOpDescPtr op_desc) { GELOGI("Get size from TensorDesc failed, op : %s, output index : %zu", op_desc->GetName().c_str(), i); continue); - GELOGI("[IMAS]GetOutputSize op: %s, index: %zu, size:%ld", op_desc->GetName().c_str(), i, tensor_size); + GELOGI("GetOutputSize op: %s, index: %zu, size:%ld", op_desc->GetName().c_str(), i, tensor_size); v_output_size.push_back(tensor_size); } diff --git a/ge/graph/load/new_model_manager/task_info/hccl_task_info.cc b/ge/graph/load/new_model_manager/task_info/hccl_task_info.cc index b09a4fce..4fb64aab 100644 --- a/ge/graph/load/new_model_manager/task_info/hccl_task_info.cc +++ b/ge/graph/load/new_model_manager/task_info/hccl_task_info.cc @@ -279,9 +279,10 @@ Status HcclTaskInfo::SetAddrs(const std::shared_ptr &op_desc, output_data_addr = output_data_addrs_.empty() ? nullptr : output_data_addrs_[i]; } kernel_hccl_infos[i].inputDataAddr = input_data_addr; - if (hccl_type == HCOMALLGATHER || hccl_type == HCOMRECEIVE || hccl_type == HVDCALLBACKALLGATHER || hccl_type == HCOMREDUCE) { + if (hccl_type == HCOMALLGATHER || hccl_type == HCOMRECEIVE || hccl_type == HVDCALLBACKALLGATHER) { kernel_hccl_infos[i].outputDataAddr = output_data_addr; - } else if (hccl_type == HCOMALLREDUCE || hccl_type == HCOMREDUCESCATTER || hccl_type == HVDCALLBACKALLREDUCE) { + } else if (hccl_type == HCOMALLREDUCE || + hccl_type == HCOMREDUCESCATTER || hccl_type == HVDCALLBACKALLREDUCE || hccl_type == HCOMREDUCE) { GE_CHK_STATUS_RET(HcomOmeUtil::GetHcclOperationType(op_desc, op_type), "davinci_model: GetHcomOperationType fail!"); kernel_hccl_infos[i].outputDataAddr = output_data_addr; diff --git a/ge/graph/load/new_model_manager/task_info/kernel_task_info.cc b/ge/graph/load/new_model_manager/task_info/kernel_task_info.cc index 04607c02..74faeb24 100755 --- a/ge/graph/load/new_model_manager/task_info/kernel_task_info.cc +++ b/ge/graph/load/new_model_manager/task_info/kernel_task_info.cc @@ -43,6 +43,13 @@ const char *kIsLastNode = "is_last_node"; const char *kIsFirstNode = "is_first_node"; const int64_t kCloseSkt = 100; const uint32_t kAddrLen = sizeof(void *); +const int kBaseInt = 10; +const int kStrtolFail = 0; +const int kArgsInputDesc = 0; +const int kArgsInputAddr = 1; +const int kArgsOutputDesc = 2; +const int kArgsOutputAddr = 3; +const int kArgsAttrHandle = 4; } // namespace namespace ge { @@ -66,7 +73,7 @@ Status KernelTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci // get opcontext stored in model const domi::KernelContext &context = kernel_def.context(); // get kernel_type - kernel_type_ = static_cast(context.kernel_type()); + kernel_type_ = static_cast(context.kernel_type()); // get opdesc op_desc_ = davinci_model_->GetOpByIndex(context.op_index()); GE_CHECK_NOTNULL(op_desc_); @@ -88,13 +95,13 @@ Status KernelTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci // get bin_file_key const char *bin_file_key = davinci_model_->GetRegisterStub(op_desc_->GetName(), session_graph_model_id); // new aicpu kernel(rtCpuKernelLaunch) no need to check function - if (kernel_type_ == cce::ccKernelType::CCE_AI_CORE) { + if (kernel_type_ == ccKernelType::CCE_AI_CORE) { rtError_t rt_ret; rt_ret = rtGetFunctionByName(const_cast(kernel_def.stub_func().c_str()), &stub_func_); GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "execute rtGetFunctionByName failed. stub_func: %s", kernel_def.stub_func().c_str()); return RT_ERROR_TO_GE_STATUS(rt_ret);); - } else if (kernel_type_ == cce::ccKernelType::TE) { + } else if (kernel_type_ == ccKernelType::TE) { rtError_t rt_ret; rt_ret = rtGetFunctionByName(bin_file_key, &stub_func_); GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, @@ -111,7 +118,7 @@ Status KernelTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci ctx_.opIndex2[i] = context.origin_op_index(i); } ctx_.opCount = context.origin_op_index_size(); - if (kernel_type_ == cce::ccKernelType::TE) { + if (kernel_type_ == ccKernelType::TE) { ctx_.opIndex = context.op_index(); uint16_t *args_offset_tmp = reinterpret_cast(const_cast(context.args_offset().data())); if (context.args_offset().size() / sizeof(uint16_t) < 1) { @@ -120,9 +127,9 @@ Status KernelTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci } ret = InitTVMTask(args_offset_tmp[0], kernel_def); - } else if (kernel_type_ == cce::ccKernelType::CUSTOMIZED) { + } else if (kernel_type_ == ccKernelType::CUSTOMIZED) { ret = InitAICPUCustomTask(context.op_index(), kernel_def); - } else if (kernel_type_ == cce::ccKernelType::AI_CPU || kernel_type_ == cce::ccKernelType::CUST_AI_CPU) { + } else if (kernel_type_ == ccKernelType::AI_CPU || kernel_type_ == ccKernelType::CUST_AI_CPU) { ret = InitAicpuTask(context.op_index(), kernel_def); } else { if (kernel_def.args().empty() || args_size_ == 0) { @@ -371,9 +378,9 @@ Status KernelTaskInfo::Distribute() { rtError_t rt_ret = RT_ERROR_NONE; char skt_enable_env[MMPA_MAX_PATH] = { 0x00 }; INT32 res = mmGetEnv("SKT_ENABLE", skt_enable_env, MMPA_MAX_PATH); - int64_t env_flag = (res == EN_OK) ? strtol(skt_enable_env, nullptr, 10) : 0; + int64_t env_flag = (res == EN_OK) ? strtol(skt_enable_env, nullptr, kBaseInt) : kStrtolFail; bool call_skt = ((env_flag != 0) || is_l1_fusion_enable_); - if (kernel_type_ == cce::ccKernelType::AI_CPU || kernel_type_ == cce::ccKernelType::CUST_AI_CPU) { + if (kernel_type_ == ccKernelType::AI_CPU || kernel_type_ == ccKernelType::CUST_AI_CPU) { GELOGI("distribute task info kernel_type %d, flag %d", kernel_type_, dump_flag_); // blockDim is reserved parameter, set to 1 rt_ret = rtCpuKernelLaunchWithFlag(reinterpret_cast(so_name_.c_str()), @@ -749,15 +756,15 @@ Status KernelTaskInfo::InitAICPUCustomTask(uint32_t op_index, const domi::Kernel return FAILED; } } - *(reinterpret_cast(args + ctx_.argsOffset[0])) = + *(reinterpret_cast(args + ctx_.argsOffset[kArgsInputDesc])) = static_cast(reinterpret_cast(custom_info_.input_descs)); // arg 0 - *(reinterpret_cast(args + ctx_.argsOffset[1])) = + *(reinterpret_cast(args + ctx_.argsOffset[kArgsInputAddr])) = static_cast(reinterpret_cast(custom_info_.input_addrs)); // arg 1 - *(reinterpret_cast(args + ctx_.argsOffset[2])) = + *(reinterpret_cast(args + ctx_.argsOffset[kArgsOutputDesc])) = static_cast(reinterpret_cast(custom_info_.output_descs)); // arg 2 - *(reinterpret_cast(args + ctx_.argsOffset[3])) = + *(reinterpret_cast(args + ctx_.argsOffset[kArgsOutputAddr])) = static_cast(reinterpret_cast(custom_info_.output_addrs)); // arg 3 - *(reinterpret_cast(args + ctx_.argsOffset[4])) = + *(reinterpret_cast(args + ctx_.argsOffset[kArgsAttrHandle])) = static_cast(reinterpret_cast(custom_info_.attr_handle)); // arg 4 rt_ret = rtMalloc(&args_, args_size_, RT_MEMORY_HBM); @@ -874,8 +881,10 @@ Status KernelTaskInfo::InitAicpuTask(uint32_t op_index, const domi::KernelDef &k return INTERNAL_ERROR; } - if (kernel_type_ == cce::ccKernelType::CUST_AI_CPU) { - GE_CHK_STATUS_RET(ModelManager::GetInstance()->LoadCustAicpuSo(op_desc, so_name_), "launch cust aicpu so failed"); + if (kernel_type_ == ccKernelType::CUST_AI_CPU) { + bool loaded = false; + GE_CHK_STATUS_RET(ModelManager::GetInstance()->LoadCustAicpuSo(op_desc, so_name_, loaded), + "launch cust aicpu so failed"); } // copy args to new host memory @@ -946,7 +955,7 @@ Status KernelTaskInfo::InitAicpuTask(uint32_t op_index, const domi::KernelDef &k GELOGI("Op debug is open in aicpu task info"); dump_args_ = static_cast(args_) + sizeof(aicpu::AicpuParamHead); } - if (kernel_type_ == cce::ccKernelType::CUST_AI_CPU) { + if (kernel_type_ == ccKernelType::CUST_AI_CPU) { dump_flag_ |= RT_KERNEL_CUSTOM_AICPU; } @@ -1076,7 +1085,7 @@ Status KernelTaskInfo::StoreInputOutputTensor(const std::vector &input_d Status KernelTaskInfo::SetContext(const domi::KernelDef &kernel_def) { const domi::KernelContext &context = kernel_def.context(); - ctx_.kernelType = static_cast(context.kernel_type()); + ctx_.kernelType = static_cast(context.kernel_type()); ctx_.opId = context.op_id(); ctx_.kernelFuncId = context.kernel_func_id(); ctx_.isFlowtable = context.is_flowtable(); @@ -1161,10 +1170,10 @@ Status KernelTaskInfo::CceUpdateKernelArgs(const domi::KernelContext &context, u GELOGE(GE_PLGMGR_SO_NOT_EXIST, "Failed in dlopen %s! ", error); return FAILED; } - cce::ccStatus_t cc_ret; + ccStatus_t cc_ret; std::string update_kernel_args = "ccUpdateKernelArgs"; - auto cceUpdateKernelArgs = (cce::ccStatus_t(*)(cce::ccOpContext &, uint64_t, uint64_t, uint64_t, void *, uint64_t, - void *))mmDlsym(handle, const_cast(update_kernel_args.c_str())); + auto cceUpdateKernelArgs = (ccStatus_t(*)(ccOpContext &, uint64_t, uint64_t, + uint64_t, void *, uint64_t, void *))mmDlsym(handle, const_cast(update_kernel_args.c_str())); if (cceUpdateKernelArgs == nullptr) { GELOGE(FAILED, "Failed to invoke function ccUpdateKernelArgs"); if (mmDlclose(handle) != 0) { @@ -1189,7 +1198,7 @@ Status KernelTaskInfo::CceUpdateKernelArgs(const domi::KernelContext &context, u GELOGW("Failed to close handle %s", error); return FAILED; } - if (cc_ret != cce::CC_STATUS_SUCCESS) { + if (cc_ret != CC_STATUS_SUCCESS) { GELOGE(CCE_FAILED, "Call cce api failed, ret: 0x%X", cc_ret); return CCE_FAILED; } diff --git a/ge/graph/load/new_model_manager/task_info/kernel_task_info.h b/ge/graph/load/new_model_manager/task_info/kernel_task_info.h index f2945b0b..1f90ede1 100644 --- a/ge/graph/load/new_model_manager/task_info/kernel_task_info.h +++ b/ge/graph/load/new_model_manager/task_info/kernel_task_info.h @@ -43,7 +43,7 @@ class KernelTaskInfo : public TaskInfo { stream_id_(0), so_name_(""), kernel_name_(""), - kernel_type_(cce::ccKernelType::CCE_AI_CORE), + kernel_type_(ccKernelType::CCE_AI_CORE), dump_flag_(RT_KERNEL_DEFAULT), dump_args_(nullptr), op_desc_(nullptr), @@ -75,7 +75,7 @@ class KernelTaskInfo : public TaskInfo { Status Release() override; - cce::ccOpContext *GetCtx() override { return &ctx_; } + ccOpContext *GetCtx() override { return &ctx_; } FusionOpInfo *GetFusionOpInfo() override { return &fusion_op_info_; } @@ -92,7 +92,7 @@ class KernelTaskInfo : public TaskInfo { bool CallSaveDumpInfo() override { return call_save_dump_; }; - cce::ccOpContext ctx_; + ccOpContext ctx_; FusionOpInfo fusion_op_info_; private: @@ -153,7 +153,7 @@ class KernelTaskInfo : public TaskInfo { uint32_t stream_id_; std::string so_name_; std::string kernel_name_; - cce::ccKernelType kernel_type_; + ccKernelType kernel_type_; uint32_t dump_flag_; void *dump_args_; OpDescPtr op_desc_; diff --git a/ge/graph/load/new_model_manager/task_info/stream_switch_task_info.h b/ge/graph/load/new_model_manager/task_info/stream_switch_task_info.h index 89642cf8..a72d7de2 100755 --- a/ge/graph/load/new_model_manager/task_info/stream_switch_task_info.h +++ b/ge/graph/load/new_model_manager/task_info/stream_switch_task_info.h @@ -41,7 +41,7 @@ class StreamSwitchTaskInfo : public TaskInfo { Status CalculateArgs(const domi::TaskDef &task_def, DavinciModel *davinci_model) override; private: - void SetInputAndValuePtr(DavinciModel *davinci_model, const vector &input_data_addrs); + void SetInputAndValuePtr(DavinciModel *davinci_model, const std::vector &input_data_addrs); void *input_ptr_; rtCondition_t cond_; void *value_ptr_; @@ -49,7 +49,7 @@ class StreamSwitchTaskInfo : public TaskInfo { uint32_t true_stream_id_; rtSwitchDataType_t data_type_; static const uint32_t kInputNum = 2; - vector fixed_addr_offset_; + std::vector fixed_addr_offset_; }; } // namespace ge #endif // GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_STREAM_SWITCH_TASK_INFO_H_ diff --git a/ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc b/ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc index 63f29f84..65dca3b3 100644 --- a/ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc +++ b/ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc @@ -25,10 +25,11 @@ Status SuperKernel::Launch(rtStream_t stream, uint32_t dump_flag) { const void *args[] = {this->GetNavTablePtr(), reinterpret_cast(static_cast(this->GetNavTableSize()))}; - rtError_t rt_ret = rtMalloc((void **)&(device_args_addr_), sizeof(args), RT_MEMORY_HBM); - GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "rtMalloc failied. error: 0x%X", rt_ret); return - RT_ERROR_TO_GE_STATUS(rt_ret);) - rt_ret = rtMemcpy((void *)device_args_addr_, sizeof(args), (void *)args, sizeof(args), RT_MEMCPY_HOST_TO_DEVICE); + rtError_t rt_ret = rtMalloc(reinterpret_cast(&device_args_addr_), sizeof(args), RT_MEMORY_HBM); + GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "rtMalloc failied. error: 0x%X", rt_ret); + return RT_ERROR_TO_GE_STATUS(rt_ret);) + rt_ret = rtMemcpy(reinterpret_cast(device_args_addr_), sizeof(args), reinterpret_cast(args), + sizeof(args), RT_MEMCPY_HOST_TO_DEVICE); GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "rtMemcpy failied. error: 0x%X", rt_ret); return RT_ERROR_TO_GE_STATUS(rt_ret);) rt_ret = rtKernelLaunchWithFlag((void *const)func_stub_, block_dim_, device_args_addr_, sizeof(args), NULL, stream, diff --git a/ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc b/ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc index 69f7b159..4e22cd7c 100644 --- a/ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc +++ b/ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc @@ -19,6 +19,8 @@ namespace ge { namespace skt { +const size_t kFusedKernelMinimumSize = 2; +const size_t kFusedKernelSizeUnit = 2; SuperKernelFactory &SuperKernelFactory::GetInstance() { static SuperKernelFactory factory; return factory; @@ -79,17 +81,17 @@ Status SuperKernelFactory::FuseKernels(const std::vector &stub_func_list return FAILED; } - if (super_kernel_size < 2) { + if (super_kernel_size < kFusedKernelMinimumSize) { GELOGW( "SKT: the number of kernels being fused must be greater than or " "equal to 2"); return FAILED; } GELOGI("SKT: superkernel start fuse, superkernel size %zu.", stub_func_list.size()); - const size_t nav_table_len = 2 * stub_func_list.size(); + const size_t nav_table_len = kFusedKernelSizeUnit * stub_func_list.size(); std::unique_ptr nav_table(new(std::nothrow) uint64_t[nav_table_len]); GE_CHECK_NOTNULL(nav_table); - uint64_t nav_table_size = 2 * stub_func_list.size() * sizeof(int64_t); + uint64_t nav_table_size = kFusedKernelSizeUnit * stub_func_list.size() * sizeof(int64_t); rtError_t rt_ret; void *hbm_nav_table_addr = nullptr; @@ -101,21 +103,21 @@ Status SuperKernelFactory::FuseKernels(const std::vector &stub_func_list GELOGD("SKT: fuseKernels subFunc %p, device func address %p", stub_func_list[i], sub_device_func); // store two uint64_t address // address divided by 4 because of 32bits encoding, call offset will *4 when calculating - nav_table[i * 2] = static_cast(reinterpret_cast(sub_device_func)) / 4; - GELOGD("SKT: CALL offet %lu", nav_table[i * 2]); - nav_table[i * 2 + 1] = static_cast(reinterpret_cast(args_addr_list[i])); - GELOGD("SKT: fuseKernels args base address %lu", nav_table[i * 2 + 1]); + nav_table[i * kFusedKernelSizeUnit] = static_cast(reinterpret_cast(sub_device_func)) / 4; + GELOGD("SKT: CALL offet %lu", nav_table[i * kFusedKernelSizeUnit]); + nav_table[i * kFusedKernelSizeUnit + 1] = static_cast(reinterpret_cast(args_addr_list[i])); + GELOGD("SKT: fuseKernels args base address %lu", nav_table[i * kFusedKernelSizeUnit + 1]); } - rt_ret = rtMalloc((void **)&hbm_nav_table_addr, nav_table_size, RT_MEMORY_HBM); + rt_ret = rtMalloc(reinterpret_cast(&hbm_nav_table_addr), nav_table_size, RT_MEMORY_HBM); GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "rtMalloc failed. error: 0x%X", rt_ret); return RT_ERROR_TO_GE_STATUS(rt_ret);) - rt_ret = - rtMemcpy((void *)hbm_nav_table_addr, nav_table_size, (void *)nav_table.get(), nav_table_size, RT_MEMCPY_HOST_TO_DEVICE); + rt_ret = rtMemcpy(reinterpret_cast(hbm_nav_table_addr), nav_table_size, + reinterpret_cast(nav_table.get()), nav_table_size, RT_MEMCPY_HOST_TO_DEVICE); GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "rtMemcpy failed. error: 0x%X", rt_ret); GE_CHK_RT(rtFree(hbm_nav_table_addr)); return RT_ERROR_TO_GE_STATUS(rt_ret);) // Create the necessary metadata for the super kernel - h = std::unique_ptr( - new SuperKernel(this->func_stub_, hbm_nav_table_addr, nav_table_size, block_dim)); + h = + std::unique_ptr(new SuperKernel(this->func_stub_, hbm_nav_table_addr, nav_table_size, block_dim)); return SUCCESS; } } // namespace skt diff --git a/ge/graph/load/new_model_manager/task_info/task_info.h b/ge/graph/load/new_model_manager/task_info/task_info.h index d296d29e..26f22564 100644 --- a/ge/graph/load/new_model_manager/task_info/task_info.h +++ b/ge/graph/load/new_model_manager/task_info/task_info.h @@ -20,7 +20,7 @@ #include #include "cce/customize.h" -#include "cce/taskdown_common.hpp" +#include "framework/common/taskdown_common.h" #include "framework/common/ge_inner_error_codes.h" #include "graph/load/new_model_manager/ts_mem_mall.h" #include "graph/load/new_model_manager/task_info/task_info_factory.h" @@ -63,8 +63,8 @@ struct RuntimeParam { }; typedef struct FusionOpInfo { - vector original_op_names; - string op_name; + std::vector original_op_names; + std::string op_name; uint32_t op_index; uint32_t stream_id; } FusionOpInfo; @@ -87,7 +87,7 @@ class TaskInfo { virtual Status Release() { return SUCCESS; } - virtual cce::ccOpContext *GetCtx() { return nullptr; } + virtual ccOpContext *GetCtx() { return nullptr; } virtual uint32_t GetTaskID() { return 0xFFFFFFFF; } diff --git a/ge/graph/load/new_model_manager/ts_mem_mall.h b/ge/graph/load/new_model_manager/ts_mem_mall.h index 42ad3957..64a64930 100644 --- a/ge/graph/load/new_model_manager/ts_mem_mall.h +++ b/ge/graph/load/new_model_manager/ts_mem_mall.h @@ -25,7 +25,7 @@ #include "framework/common/debug/ge_log.h" namespace { -constexpr uint32_t kMaxTsMemBlock = 2 * 1024 * 1024; // Max block 2M +constexpr uint32_t kMaxTsMemBlock = 2097152; // Max block 2M 2 * 1024 * 1024 constexpr uint32_t kTsMemAligment = 64; // Malloc for 64 bits align constexpr uint32_t kTsMemAlignMask = kTsMemAligment - 1; } diff --git a/ge/graph/load/new_model_manager/zero_copy_offset.cc b/ge/graph/load/new_model_manager/zero_copy_offset.cc index 970b292c..9cd3f30b 100644 --- a/ge/graph/load/new_model_manager/zero_copy_offset.cc +++ b/ge/graph/load/new_model_manager/zero_copy_offset.cc @@ -35,6 +35,7 @@ Status ZeroCopyOffset::InitInputDataInfo(int64_t output_size, void *virtual_addr GELOGI("[ZCPY] Start to InitInputDataInfo of %s, total_data_size is %ld, virtual_addr is %p", op_desc->GetName().c_str(), output_size, virtual_addr); basic_addr_ = virtual_addr; + op_name_ = op_desc->GetName(); (void)ge::AttrUtils::GetListInt(op_desc, ATTR_ZERO_COPY_BASIC_OFFSET, zero_copy_basic_offset_); (void)ge::AttrUtils::GetListInt(op_desc, ATTR_ZERO_COPY_RELATIVE_OFFSET, zero_copy_relative_offset_); GE_CHK_BOOL_EXEC(zero_copy_basic_offset_.size() == zero_copy_relative_offset_.size(), return PARAM_INVALID, @@ -82,6 +83,7 @@ Status ZeroCopyOffset::InitOutputDataInfo(const vector &input_size_list GELOGD("Tensor data size: GetSize=%ld, GetTensorSizeInBytes=%ld", input_size_list[idx], size); basic_addr_ = virtual_addr_list[idx]; + op_name_ = op_desc->GetName(); (void)ge::AttrUtils::GetListInt(op_desc, ATTR_ZERO_COPY_BASIC_OFFSET, zero_copy_basic_offset_); (void)ge::AttrUtils::GetListInt(op_desc, ATTR_ZERO_COPY_RELATIVE_OFFSET, zero_copy_relative_offset_); GE_CHK_BOOL_EXEC(zero_copy_basic_offset_.size() == zero_copy_relative_offset_.size(), return PARAM_INVALID, diff --git a/ge/graph/load/new_model_manager/zero_copy_offset.h b/ge/graph/load/new_model_manager/zero_copy_offset.h index 025d1b14..fa80f28b 100644 --- a/ge/graph/load/new_model_manager/zero_copy_offset.h +++ b/ge/graph/load/new_model_manager/zero_copy_offset.h @@ -66,9 +66,12 @@ class ZeroCopyOffset { int64_t GetDataSize() const { return data_size_; } // value of *outside_addrs_ from davinci_model std::vector>> &GetOutsideAddrs() { return outside_addrs_; } + // name of op + std::string GetOpName() const { return op_name_; } private: void *basic_addr_ = nullptr; + std::string op_name_; uint32_t data_count_ = 0; std::vector> data_info_; vector relative_offset_; @@ -80,4 +83,4 @@ class ZeroCopyOffset { std::vector zero_copy_relative_offset_; }; } // namespace ge -#endif // GE_GRAPH_LOAD_NEW_MODEL_MANAGER_ZERO_COPY_OFFSET_H_ \ No newline at end of file +#endif // GE_GRAPH_LOAD_NEW_MODEL_MANAGER_ZERO_COPY_OFFSET_H_ diff --git a/ge/graph/load/new_model_manager/zero_copy_task.cc b/ge/graph/load/new_model_manager/zero_copy_task.cc index 9b42d563..2609cb4b 100755 --- a/ge/graph/load/new_model_manager/zero_copy_task.cc +++ b/ge/graph/load/new_model_manager/zero_copy_task.cc @@ -131,7 +131,7 @@ Status ZeroCopyTask::UpdateTaskParam(uintptr_t addr, void *buffer_addr, const ma auto dst_addr = static_cast(buffer_addr); GELOGI("[ZCPY] %s update task, args_addr: %p, size: %zu, offset: %zu, virtual_addr: 0x%lx, user_data_addr: %p", name_.c_str(), args_addr_, args_size_, offset, addr, buffer_addr); - *(uintptr_t *)(args_info + offset) = reinterpret_cast(dst_addr); + *reinterpret_cast(args_info + offset)= reinterpret_cast(dst_addr); is_updated_ = true; } } diff --git a/ge/graph/manager/graph_caching_allocator.cc b/ge/graph/manager/graph_caching_allocator.cc index 4ba39ca8..d6027a08 100644 --- a/ge/graph/manager/graph_caching_allocator.cc +++ b/ge/graph/manager/graph_caching_allocator.cc @@ -25,13 +25,13 @@ namespace ge { const size_t bin_ranges[kNumBins] = {kRoundBlockSize * kKByteSize, - 8 * kMByteSize, - 32 * kMByteSize, - 128 * kMByteSize, + kBinSizeUnit8 * kMByteSize, + kBinSizeUnit32 * kMByteSize, + kBinSizeUnit128 * kMByteSize, kGByteSize, - 4 * kGByteSize, - 16 * kGByteSize, - 26 * kGByteSize}; + kBinSizeUnit4 * kGByteSize, + kBinSizeUnit16 * kGByteSize, + kBinSizeUnit26 * kGByteSize}; static bool BlockComparator(const Block *left, const Block *right) { if (left->size != right->size) { diff --git a/ge/graph/manager/graph_caching_allocator.h b/ge/graph/manager/graph_caching_allocator.h index dc4af753..e024d5cd 100644 --- a/ge/graph/manager/graph_caching_allocator.h +++ b/ge/graph/manager/graph_caching_allocator.h @@ -34,10 +34,17 @@ namespace ge { constexpr size_t kRoundBlockSize = 512; // all block sizes are rounded to at least 512 bytes +constexpr size_t kBinSizeUnit4 = 4; +constexpr size_t kBinSizeUnit8 = 8; +constexpr size_t kBinSizeUnit16 = 16; +constexpr size_t kBinSizeUnit26 = 26; +constexpr size_t kBinSizeUnit32 = 32; +constexpr size_t kBinSizeUnit128 = 128; + constexpr double kSplitThreshold = 0.75; // split when malloc size <= small block size * kSpliThreshold constexpr size_t kKByteSize = 1024; -constexpr size_t kMByteSize = 1024 * 1024; -constexpr size_t kGByteSize = 1024 * 1024 * 1024; +constexpr size_t kMByteSize = 1048576; // 1024 * 1024 +constexpr size_t kGByteSize = 1073741824; // 1024 * 1024 * 1024 static const uint32_t kNumBins = 8; diff --git a/ge/graph/manager/graph_manager.cc b/ge/graph/manager/graph_manager.cc index 87070e79..2c2495b4 100755 --- a/ge/graph/manager/graph_manager.cc +++ b/ge/graph/manager/graph_manager.cc @@ -533,9 +533,8 @@ Status GraphManager::CopySubGraphAndMarkFusion(const ComputeGraphPtr &compute_gr return SUCCESS; } -Status GraphManager::OptimizeSubGraphWithMultiThreads(ComputeGraphPtr compute_graph, - Graph2SubGraphInfoList &sub_graph_map, - uint64_t session_id) { +Status GraphManager::OptimizeSubGraphWithMultiThreads(ComputeGraphPtr compute_graph, + Graph2SubGraphInfoList &sub_graph_map, uint64_t session_id) { GE_CHECK_NOTNULL(compute_graph); // use default 16 multi thread const uint32_t thread_num = 16; @@ -550,14 +549,14 @@ Status GraphManager::OptimizeSubGraphWithMultiThreads(ComputeGraphPtr compute_gr (void) AttrUtils::SetStr(subgraph->GetSubGraph(), ATTR_NAME_OP_COMPILE_STRATEGY, op_compile_strategy); } std::future f = executor.commit(GraphManager::ProcessSubGraphWithMultiThreads, this, - compute_graph->GetGraphID(), subgraph, compute_graph, session_id, GetThreadLocalContext()); + compute_graph->GetGraphID(), subgraph, compute_graph, session_id, + GetThreadLocalContext()); if (!f.valid()) { GELOGE(FAILED, "Future is invalid"); return FAILED; } vector_future.emplace_back(std::move(f)); } - for (auto &function_graph : compute_graph->GetAllSubgraphs()) { auto subgraph_list = sub_graph_map[function_graph]; for (const auto &subgraph : subgraph_list) { @@ -651,62 +650,13 @@ Status GraphManager::ReplaceSubgraphWithOriGraph(const ComputeGraphPtr &compute_ Status GraphManager::SetSubgraph(uint64_t session_id, ComputeGraphPtr compute_graph, GraphPartitioner &partitioner) { GE_CHECK_NOTNULL(compute_graph); auto sub_graph_map = partitioner.GetSubGraphMap(); - std::string buffer_optimize; - graphStatus graph_status = ge::GetContext().GetOption(BUFFER_OPTIMIZE, buffer_optimize); - bool need_lx_fusion = (graph_status == GRAPH_SUCCESS) && (buffer_optimize != kOffOptimize); - if (options_.build_mode.empty() && need_lx_fusion) { - GELOGI("Enter normal mode with buffer_optimize:%s.", buffer_optimize.c_str()); - /// 1. Copy subgraph for buffer optimize while lx fusion failed. - /// 2. Set graph with attr "lx_fusion" for fusion optimize. - std::unordered_map copy_graphs; - GE_TIMESTAMP_START(CopySubGraphAndMarkFusion); - Status ret = CopySubGraphAndMarkFusion(compute_graph, sub_graph_map, copy_graphs); - GE_TIMESTAMP_EVENT_END(CopySubGraphAndMarkFusion, "SetSubgraph:CopySubGraphAndMarkFusion"); - if (ret != SUCCESS) { - GELOGE(ret, "CopySubGraphAndMarkFusion failed."); - return ret; - } - - // Multiply optimize subgraph with lx fusion - ret = OptimizeSubGraphWithMultiThreads(compute_graph, sub_graph_map, session_id); - if (ret != SUCCESS) { - GELOGE(ret, "Multiply optimize subgraph with lx fusion failed."); - return ret; - } - - // Check whether all subgraph lx fusion success - GE_TIMESTAMP_START(CheckAllFusionOptimizeSuccess); - if (CheckAllFusionOptimizeSuccess(compute_graph, sub_graph_map)) { - GE_TIMESTAMP_EVENT_END(CheckAllFusionOptimizeSuccess, "SetSubgraph:CheckAllFusionOptimizeSuccess"); - return SUCCESS; - } - - // Replace subgraph with original graph for lx buffer - ret = ReplaceSubgraphWithOriGraph(compute_graph, sub_graph_map, copy_graphs); - if (ret != SUCCESS) { - GELOGE(ret, "Replace subgraph with original graph failed."); - return ret; - } - - // Multiply optimize subgraph with lx buffer - ret = OptimizeSubGraphWithMultiThreads(compute_graph, sub_graph_map, session_id); - if (ret != SUCCESS) { - GELOGE(ret, "Multiply optimize subgraph with lx buffer failed."); - return ret; - } - } else { - /// Multiply optimize subgraph: - /// 1. run lx buffer while build_mode is normal and buffer_optimize is empty or "off_optimize"; - /// 2. run lx fusion or buffer according build_mode and build_step in fe. - GELOGD("Directly optimize subgraph with build mode:%s, and step:%s, buffer_optimize:%s.", - options_.build_mode.c_str(), - options_.build_step.c_str(), - buffer_optimize.c_str()); - Status ret = OptimizeSubGraphWithMultiThreads(compute_graph, sub_graph_map, session_id); - if (ret != SUCCESS) { - GELOGE(ret, "Multiply optimize subgraph with lx buffer"); - return ret; - } + GELOGD("Directly optimize subgraph with build mode:%s, and step:%s.", + options_.build_mode.c_str(), + options_.build_step.c_str()); + Status ret = OptimizeSubGraphWithMultiThreads(compute_graph, sub_graph_map, session_id); + if (ret != SUCCESS) { + GELOGE(ret, "Multiply optimize subgraph failed"); + return ret; } return SUCCESS; } @@ -2515,7 +2465,6 @@ Status GraphManager::ProcessSubGraphWithMultiThreads(GraphManager *graph_manager GetContext().SetSessionId(session_id); GetThreadLocalContext() = ge_context; graph_manager->UpdateLocalOmgContext(root_graph_id); - ComputeGraphPtr compute_graph_tmp = sub_graph_info_ptr->GetSubGraph(); const std::string &engine_name = sub_graph_info_ptr->GetEngineName(); GELOGD("ProcessSubGraphWithMultiThreads start, graph name is %s, engine_name is %s, thread id is %lu", @@ -2523,6 +2472,10 @@ Status GraphManager::ProcessSubGraphWithMultiThreads(GraphManager *graph_manager pthread_self()); GE_DUMP(compute_graph_tmp, "OptimizeSubGraphBefore"); GE_CHECK_NOTNULL(compute_graph_tmp); + if (!AttrUtils::SetInt(*compute_graph_tmp, ATTR_NAME_ROOT_GRAPH_ID, root_graph_id)) { + GELOGE(FAILED, "Failed to set attr ATTR_NAME_ROOT_GRAPH_ID for subgraph, graph_id: %u.", root_graph_id); + return FAILED; + } compute_graph_tmp->SetSessionID(session_id); Status ret = graph_manager->GetCompilerStages(root_graph_id).optimizer.OptimizeSubGraph(compute_graph_tmp, compute_graph, @@ -2688,9 +2641,7 @@ void GraphManager::PreRunThread(GraphManager *graph_manager) { } // it will not execute graph preprocess, optimize, parition, build if the graph has built successful. - GELOGI("Start for run graph async."); - GeRootModelPtr ge_root_model = nullptr; if (graph_manager->IsGraphNeedBuild(graph_node)) { if (graph_node->GetBuildFlag()) { diff --git a/ge/graph/manager/graph_var_manager.cc b/ge/graph/manager/graph_var_manager.cc index be7d4eb2..84a07069 100755 --- a/ge/graph/manager/graph_var_manager.cc +++ b/ge/graph/manager/graph_var_manager.cc @@ -280,9 +280,9 @@ Status MemResource::AssignVarMem(const std::string &var_name, uint64_t size, uin return PARAM_INVALID; } uint64_t free_size = total_size_ - var_mem_size_; - if (free_size < (size + kSessionMemAlignSize * 2)) { + if (free_size < (size + kSessionMemAlignSize * kSessionMemAlignUnit)) { GELOGE(PARAM_INVALID, "Out of memory : current var size[%lu] exceeds total var size[%lu]", - size + kSessionMemAlignSize * 2 + var_mem_size_, total_size_); + size + kSessionMemAlignSize * kSessionMemAlignUnit + var_mem_size_, total_size_); return PARAM_INVALID; } diff --git a/ge/graph/manager/graph_var_manager.h b/ge/graph/manager/graph_var_manager.h index b4f6aca3..fcbc92c5 100755 --- a/ge/graph/manager/graph_var_manager.h +++ b/ge/graph/manager/graph_var_manager.h @@ -42,6 +42,7 @@ const size_t kGraphMemoryBuffer = 4UL * 1024UL * 1024UL * 1024UL; const size_t kMaxMemorySize = 256UL * 1024UL * 1024UL * 1024UL; const char kEnvGeuseStaticMemory[] = "GE_USE_STATIC_MEMORY"; const uint64_t kSessionMemAlignSize = 512; +const size_t kSessionMemAlignUnit = 2; enum MemStatus { NORMAL = 0, diff --git a/ge/graph/manager/host_mem_manager.cc b/ge/graph/manager/host_mem_manager.cc index d4aceddd..c99c9e87 100644 --- a/ge/graph/manager/host_mem_manager.cc +++ b/ge/graph/manager/host_mem_manager.cc @@ -106,7 +106,7 @@ Status HostMemManager::QueryVarMemInfo(const string &op_name, uint64_t &base_add GELOGE(INTERNAL_ERROR, "Find host base base_addr failed,node name:%s!", op_name.c_str()); return INTERNAL_ERROR; } - base_addr = reinterpret_cast(reinterpret_cast(var_memory_base_map_[op_name].device_address)); + base_addr = static_cast(reinterpret_cast(var_memory_base_map_[op_name].device_address)); data_size = var_memory_base_map_[op_name].mem_size; return SUCCESS; } diff --git a/ge/graph/manager/util/debug.cc b/ge/graph/manager/util/debug.cc index 45c070c6..2c930d1f 100644 --- a/ge/graph/manager/util/debug.cc +++ b/ge/graph/manager/util/debug.cc @@ -32,7 +32,8 @@ Debug::~Debug() = default; void Debug::DumpProto(const Message &proto, const char *file) { std::string file_path = RealPath(file); - int fd = mmOpen2(file_path.c_str(), M_WRONLY | M_CREAT | O_TRUNC, M_IRUSR | M_IWUSR | M_UMASK_GRPREAD | M_UMASK_OTHREAD); + int fd = mmOpen2(file_path.c_str(), M_WRONLY | M_CREAT | O_TRUNC, M_IRUSR | M_IWUSR | M_UMASK_GRPREAD | + M_UMASK_OTHREAD); if (fd == -1) { GELOGW("Write %s failed", file_path.c_str()); return; diff --git a/ge/graph/manager/util/hcom_util.cc b/ge/graph/manager/util/hcom_util.cc index 487b24af..50fa9936 100644 --- a/ge/graph/manager/util/hcom_util.cc +++ b/ge/graph/manager/util/hcom_util.cc @@ -263,7 +263,8 @@ Status HcomOmeUtil::GetHcclRootId(const ge::ConstOpDescPtr &op_desc, int64_t &ro Status HcomOmeUtil::GetAllRootId(const ge::ConstOpDescPtr &op_desc, std::vector &kernel_hccl_infos) { GE_CHECK_NOTNULL(op_desc); - if (op_desc->GetType() == HCOMBROADCAST || op_desc->GetType() == HVDCALLBACKBROADCAST || op_desc->GetType() == HCOMREDUCE) { + if (op_desc->GetType() == HCOMBROADCAST || + op_desc->GetType() == HVDCALLBACKBROADCAST || op_desc->GetType() == HCOMREDUCE) { GELOGI("GetAllRootId Node[%s] opType[%s] get hccl rootId.", op_desc->GetName().c_str(), op_desc->GetType().c_str()); int64_t root_id = 0; Status dmrt = GetHcclRootId(op_desc, root_id); diff --git a/ge/graph/optimize/mem_rw_conflict_optimize.cc b/ge/graph/optimize/mem_rw_conflict_optimize.cc index 2fabc035..dfc6c9df 100644 --- a/ge/graph/optimize/mem_rw_conflict_optimize.cc +++ b/ge/graph/optimize/mem_rw_conflict_optimize.cc @@ -26,6 +26,13 @@ namespace { using namespace ge; const int kIdentityAnchorIndex = 0; +const size_t kSerialStringVecSize = 4; + +const int kCaseReadOnly = 0; +const int kCaseScopeWriteable = 2; +const int kCaseWriteable = 3; +const int kCaseInvalidRWType = 5; + // rw type of input. enum class InputRWType { kReadOnly, // Normal op input only read @@ -55,7 +62,7 @@ thread_local map node_rwtype_map_; /// @return rw_type_name /// static std::string InputRWTypeToSerialString(InputRWType rw_type) { - const static char *names[4] = {"ReadOnly", "Writeable", "ScopeWriteable", "InvalidRWType"}; + const static char *names[kSerialStringVecSize] = {"ReadOnly", "Writeable", "ScopeWriteable", "InvalidRWType"}; return names[static_cast(rw_type)]; } @@ -65,7 +72,7 @@ static std::string InputRWTypeToSerialString(InputRWType rw_type) { /// @return rw_type_name /// static std::string OutputRWTypeToSerialString(OutputRWType rw_type) { - const static char *names[4] = {"ReadOnly", "SoftRead", "Writeable", "InvalidRWType"}; + const static char *names[kSerialStringVecSize] = {"ReadOnly", "SoftRead", "Writeable", "InvalidRWType"}; return names[static_cast(rw_type)]; } @@ -118,13 +125,13 @@ InputRWType GetInputRwTypeInConflict(const std::set &rw_type_set) { } switch (total_rw_type) { - case 0: + case kCaseReadOnly: return InputRWType::kReadOnly; // all input rw type is readonly - case 2: + case kCaseScopeWriteable: return InputRWType::kScopeWriteable; // readonly 2 scope_writeable - case 3: + case kCaseWriteable: return InputRWType::kWriteable; // all input rw type is writeable or readonly 2 writeable - case 5: + case kCaseInvalidRWType: return InputRWType::kInvalidRWType; // writeable 2 scope_writeable default: return InputRWType::kInvalidRWType; @@ -643,7 +650,7 @@ Status HandleAllreduceDuplicateInput(ComputeGraphPtr &compute_graph) { auto ret = GraphUtils::InsertNodeBetweenDataAnchors(pre_out_anchor, in_data_anchor, identity_node); GE_CHK_STATUS_RET(ret, "Fail to insert identity."); GELOGI("InsertNode %s between %s and %s successfully.", identity_node->GetName().c_str(), - pre_node->GetName().c_str(), node->GetName().c_str()); + pre_node->GetName().c_str(), node->GetName().c_str()); } } } diff --git a/ge/graph/partition/graph_partition.cc b/ge/graph/partition/graph_partition.cc index 6a1fbb34..fbc13920 100755 --- a/ge/graph/partition/graph_partition.cc +++ b/ge/graph/partition/graph_partition.cc @@ -614,32 +614,32 @@ Status ge::GraphPartitioner::AddPartitionsToGraphNode(vectorSetParentNode(compute_graph->GetParentNode()); - (void) AttrUtils::SetStr(*sub_graph, ATTR_NAME_PARENT_GRAPH_NAME, compute_graph->GetName()); - auto sgi = MakeShared(); - if (sgi == nullptr) { - GELOGE(GE_GRAPH_PARAM_NULLPTR, "[GraphPartitioner]: MakeShared sub graph info failed."); - return FAILED; - } - // set engine name - sgi->SetEngineName(engine_name); - // set stream label - string sub_graph_stream; - if (AttrUtils::GetStr(sub_graph->GetDirectNode().at(0)->GetOpDesc(), ATTR_NAME_STREAM_LABEL, sub_graph_stream)) { - sgi->SetStreamLabel(sub_graph_stream); - } - /// for now inputFlag is the same before and after partition. It should - /// be changed according to the real partition - std::vector sub_graph_input(graph_info_.input_size_, true); - std::vector sub_graph_output(graph_info_.output_size_, true); - sgi->SetSubGraph(sub_graph); - sgi->SetOutputFlag(sub_graph_output); - sgi->SetInputFlag(sub_graph_input); - sgi->SetOutputContext(graph_info_.output_name_); - AddEndPldInformationToSubGraphInfo(sgi); - GELOGI("[GraphPartitioner]: subGraph engine name is %s, graph name is %s, stream label is %s", - engine_name.c_str(), - sub_graph->GetName().c_str(), - sgi->GetStreamLabel().empty() ? "null" : sgi->GetStreamLabel().c_str()); + (void)AttrUtils::SetStr(*sub_graph, ATTR_NAME_PARENT_GRAPH_NAME, compute_graph->GetName()); + GELOGD("set attr success. subgraph(%s) with parent graph(%s)", sub_graph->GetName().c_str(), + compute_graph->GetName().c_str()); + auto sgi = MakeShared(); + if (sgi == nullptr) { + GELOGE(GE_GRAPH_PARAM_NULLPTR, "[GraphPartitioner]: MakeShared sub graph info failed."); + return FAILED; + } + // set engine name + sgi->SetEngineName(engine_name); + // set stream label + string sub_graph_stream; + if (AttrUtils::GetStr(sub_graph->GetDirectNode().at(0)->GetOpDesc(), ATTR_NAME_STREAM_LABEL, sub_graph_stream)) { + sgi->SetStreamLabel(sub_graph_stream); + } + /// for now inputFlag is the same before and after partition. It should + /// be changed according to the real partition + std::vector sub_graph_input(graph_info_.input_size_, true); + std::vector sub_graph_output(graph_info_.output_size_, true); + sgi->SetSubGraph(sub_graph); + sgi->SetOutputFlag(sub_graph_output); + sgi->SetInputFlag(sub_graph_input); + sgi->SetOutputContext(graph_info_.output_name_); + AddEndPldInformationToSubGraphInfo(sgi); + GELOGI("[GraphPartitioner]: subGraph engine name is %s, graph name is %s, stream label is %s", engine_name.c_str(), + sub_graph->GetName().c_str(), sgi->GetStreamLabel().empty() ? "null" : sgi->GetStreamLabel().c_str()); if (engine_name != input_subgraph_name) { // do not add Data subGraph into SubGraphInfo output_subgraphs.push_back(sgi); } else { diff --git a/ge/graph/passes/atomic_addr_clean_pass.cc b/ge/graph/passes/atomic_addr_clean_pass.cc index 60742eb1..7c6ed8ce 100755 --- a/ge/graph/passes/atomic_addr_clean_pass.cc +++ b/ge/graph/passes/atomic_addr_clean_pass.cc @@ -74,10 +74,88 @@ Status AtomicAddrCleanPass::Run(ComputeGraphPtr graph) { return SUCCESS; } +// just hccl may mark atomic from ops kernel now, and hccl's atomic if for all input +bool AtomicAddrCleanPass::CheckAtomicFromOpsKernel(const NodePtr &node) { + // 1.Check if isAtomic attrs exist for HCOM + std::shared_ptr instance_ptr = GELib::GetInstance(); + if ((instance_ptr == nullptr) || (!instance_ptr->InitFlag())) { + GELOGW("GELib not initialized, atomic from ops kernel judge false, node_name: %s", node->GetName().c_str()); + return false; + } + + OpsKernelManager &ops_kernel_manager = instance_ptr->OpsKernelManagerObj(); + vector op_info_vec = ops_kernel_manager.GetOpsKernelInfo(node->GetType()); + for (const auto &op_info : op_info_vec) { + if (op_info.isAtomic) { + // check peer input is DATA + for (const auto &in_data_anchor : node->GetAllInDataAnchors()) { + if (in_data_anchor->GetPeerOutAnchor() != nullptr && + in_data_anchor->GetPeerOutAnchor()->GetOwnerNode() != nullptr) { + auto peer_in_node = in_data_anchor->GetPeerOutAnchor()->GetOwnerNode(); + if (peer_in_node->GetType() == DATA) { + GELOGI("Recognized atomic op %s from %s engine and input is DATA.", node->GetName().c_str(), + op_info.engine.c_str()); + return false; + } + } + } + GELOGI("Recognized atomic op %s from %s engine.", node->GetName().c_str(), op_info.engine.c_str()); + hcom_node_vec_.push_back(node); + return true; + } + } + return false; +} + +bool AtomicAddrCleanPass::IsOutputIndexPeerInputAtomic(const NodePtr &node, int64_t output_index) { + auto out_data_anchor = node->GetAllOutDataAnchors().at(output_index); + if (out_data_anchor == nullptr) { + return false; + } + + for (auto input_anchor : out_data_anchor->GetPeerInDataAnchors()) { + auto output_node = input_anchor->GetOwnerNode(); + // just hccl may mark atomic from ops kernel now, and hccl's atomic if for all input + // hccl's attr ATOMIC_ATTR_INPUT_INDEX mark on CalcOpRunningParam, can't be get here + if (CheckAtomicFromOpsKernel(output_node)) { + return true; + } + } + return false; +} + +bool AtomicAddrCleanPass::CheckSkipInsertInLoopGraph(const NodePtr &node) { + OpDescPtr op_desc = node->GetOpDesc(); + std::map> node_workspace_offset; + bool has_atomic_input = op_desc->HasAttr(ATOMIC_ATTR_INPUT_INDEX); + bool has_atomic_output = op_desc->HasAttr(ATOMIC_ATTR_OUTPUT_INDEX); + node_workspace_offset = op_desc->TryGetExtAttr(EXT_ATTR_ATOMIC_WORKSPACE_OFFSET, node_workspace_offset); + if (!has_atomic_input && has_atomic_output && node_workspace_offset.empty()) { + std::vector atomic_output_index; + (void) ge::AttrUtils::GetListInt(op_desc, ATOMIC_ATTR_OUTPUT_INDEX, atomic_output_index); + bool is_all_output_peer_also_atomic = true; + for (const auto &output_index : atomic_output_index) { + if (!IsOutputIndexPeerInputAtomic(node, output_index)) { + is_all_output_peer_also_atomic = false; + break; + } + } + if (is_all_output_peer_also_atomic) { + GELOGI("all out peer node input atomic, skip this out atomic process, node name: %s", node->GetName().c_str()); + return true; + } + } + return false; +} + Status AtomicAddrCleanPass::HandleLoopGraph(ComputeGraphPtr &graph, const vector &atomic_node_vec) { // Loop graph , insert clean node follow atomic node int index = 0; for (const auto &node : atomic_node_vec) { + if (CheckSkipInsertInLoopGraph(node)) { + continue; + } + // Insert atomic clean op NodePtr clean_addr_node = InsertAtomicAddrCleanNode(graph); if (clean_addr_node == nullptr) { @@ -249,32 +327,10 @@ bool AtomicAddrCleanPass::IsAtomicOp(const NodePtr &node) { return false; } // 1.Check if isAtomic attrs exist for HCOM - std::shared_ptr instance_ptr = GELib::GetInstance(); - if ((instance_ptr == nullptr) || (!instance_ptr->InitFlag())) { - GELOGW("GELib not initialized"); - return false; + if (CheckAtomicFromOpsKernel(node)) { + return true; } - OpsKernelManager &ops_kernel_manager = instance_ptr->OpsKernelManagerObj(); - vector op_info_vec = ops_kernel_manager.GetOpsKernelInfo(op_desc->GetType()); - for (const auto &op_info : op_info_vec) { - if (op_info.isAtomic) { - GELOGI("Recognized atomic op %s from DNN_HCCL engine.", op_desc->GetName().c_str()); - // check peer input is DATA - for (auto &in_data_anchor : node->GetAllInDataAnchors()) { - if (in_data_anchor->GetPeerOutAnchor() != nullptr && - in_data_anchor->GetPeerOutAnchor()->GetOwnerNode() != nullptr) { - auto peer_in_node = in_data_anchor->GetPeerOutAnchor()->GetOwnerNode(); - if (peer_in_node->GetType() == DATA) { - GELOGI("Recognized atomic op %s from DNN_HCCL engine and input is DATA.", op_desc->GetName().c_str()); - return false; - } - } - } - hcom_node_vec_.push_back(node); - return true; - } - } // 2.Check atomic attr in node std::map> node_workspace_offset; bool has_atomic_input = op_desc->HasAttr(ATOMIC_ATTR_INPUT_INDEX); diff --git a/ge/graph/passes/atomic_addr_clean_pass.h b/ge/graph/passes/atomic_addr_clean_pass.h index ad60b7b5..8138d511 100755 --- a/ge/graph/passes/atomic_addr_clean_pass.h +++ b/ge/graph/passes/atomic_addr_clean_pass.h @@ -84,6 +84,11 @@ class AtomicAddrCleanPass : public GraphPass { Status HandleDispersedAtomicNodes(ComputeGraphPtr &graph, const std::vector &atomic_node_vec, std::vector &common_atomic_nodes); + bool CheckAtomicFromOpsKernel(const NodePtr &node); + + bool IsOutputIndexPeerInputAtomic(const NodePtr &node, int64_t output_index); + + bool CheckSkipInsertInLoopGraph(const NodePtr &node); vector hcom_node_vec_; bool is_loop_graph_ = false; diff --git a/ge/graph/passes/attach_stream_label_pass.cc b/ge/graph/passes/attach_stream_label_pass.cc index b04643a4..c0e0f669 100644 --- a/ge/graph/passes/attach_stream_label_pass.cc +++ b/ge/graph/passes/attach_stream_label_pass.cc @@ -24,11 +24,7 @@ Status AttachStreamLabelPass::Run(ComputeGraphPtr graph) { FindNodes(graph); for (const auto &node : need_label_nodes_) { - OpDescPtr op_desc = node->GetOpDesc(); - GE_CHECK_NOTNULL(op_desc); - if (!op_desc->HasAttr(ATTR_NAME_STREAM_LABEL)) { - GE_CHK_STATUS_RET(UpdateCondBranch(node), "Update cond branch failed, start node:%s.", node->GetName().c_str()); - } + GE_CHK_STATUS_RET(UpdateCondBranch(node), "Update cond branch failed, start node:%s.", node->GetName().c_str()); } GE_CHK_STATUS_RET(UpdateEnterNode(), "UpdateEnterNode failed."); @@ -55,13 +51,15 @@ Status AttachStreamLabelPass::ClearStatus() { /// void AttachStreamLabelPass::FindNodes(const ComputeGraphPtr &graph) { for (const NodePtr &node : graph->GetDirectNode()) { - const std::string &type = node->GetType(); - if (type == STREAMSWITCH) { + const auto &op_desc = node->GetOpDesc(); + if (op_desc == nullptr) { + continue; + } + const std::string &type = op_desc->GetType(); + if ((type == STREAMSWITCH) && op_desc->HasAttr(ATTR_NAME_SWITCH_TRUE_BRANCH_FLAG)) { stream_switch_nodes_.emplace_back(node); - } else if (type == STREAMMERGE) { - if ((node->GetOpDesc() != nullptr) && !node->GetOpDesc()->HasAttr(ATTR_NAME_NEXT_ITERATION)) { - need_label_nodes_.emplace_back(node); - } + } else if ((type == STREAMMERGE) && !op_desc->HasAttr(ATTR_NAME_NEXT_ITERATION)) { + need_label_nodes_.emplace_back(node); } else if ((type == ENTER) || (type == REFENTER)) { enter_nodes_.emplace_back(node); } @@ -83,11 +81,15 @@ void AttachStreamLabelPass::FindNodes(const ComputeGraphPtr &graph) { /// Status AttachStreamLabelPass::UpdateCondBranch(const NodePtr &node) { std::string stream_label; + if (AttachFlag(node, stream_label) != SUCCESS) { + GELOGE(FAILED, "Attach flag for node %s failed.", node->GetName().c_str()); + return FAILED; + } + std::unordered_set branch_nodes; std::unordered_set visited; std::stack nodes; nodes.push(node); - static const std::set end_type_set = {STREAMSWITCH, STREAMMERGE, MERGE}; while (!nodes.empty()) { NodePtr cur_node = nodes.top(); @@ -95,10 +97,6 @@ Status AttachStreamLabelPass::UpdateCondBranch(const NodePtr &node) { if (visited.count(cur_node) > 0) { continue; } - if (AttachFlag(cur_node, stream_label) != SUCCESS) { - GELOGE(FAILED, "Attach flag for node %s failed.", cur_node->GetName().c_str()); - return FAILED; - } const std::string &type = cur_node->GetType(); for (const auto &out_node : cur_node->GetOutAllNodes()) { @@ -115,10 +113,6 @@ Status AttachStreamLabelPass::UpdateCondBranch(const NodePtr &node) { visited.insert(cur_node); } - if (node->GetType() == STREAMSWITCH) { - GE_CHK_STATUS_RET(SetActiveLabelList(node, {stream_label}), "set active_label_list failed."); - } - for (const NodePtr &tmp_node : branch_nodes) { GELOGD("Attach label %s to node: %s.", stream_label.c_str(), tmp_node->GetName().c_str()); GE_CHK_STATUS_RET(SetStreamLabel(tmp_node, stream_label), "Set stream label failed."); @@ -148,11 +142,10 @@ Status AttachStreamLabelPass::AttachFlag(const NodePtr &node, std::string &strea GE_CHK_BOOL_EXEC(AttrUtils::GetBool(op_desc, ATTR_NAME_SWITCH_TRUE_BRANCH_FLAG, value), return FAILED, "StreamSwitch get attr TRUE_BRANCH_STREAM failed."); stream_label += (value ? "_t" : "_f"); + GE_CHK_STATUS_RET(SetActiveLabelList(node, {stream_label}), "set active_label_list failed."); } else if (type == STREAMMERGE) { stream_label = node->GetName(); GE_CHK_STATUS_RET(SetStreamLabel(node, stream_label), "Set stream label failed."); - } else if ((type == EXIT) || (type == REFEXIT)) { - GE_CHK_STATUS_RET(SetStreamLabel(node, stream_label), "Set stream label failed."); } return SUCCESS; @@ -166,12 +159,13 @@ Status AttachStreamLabelPass::UpdateEnterNode() { std::unordered_map> enter_active_map; for (const auto &enter_node : enter_nodes_) { for (const auto &out_ctrl_node : enter_node->GetOutControlNodes()) { - if (out_ctrl_node->GetType() == STREAMACTIVE) { - if (enter_active_map.find(out_ctrl_node) == enter_active_map.end()) { - enter_active_map[out_ctrl_node] = {enter_node}; - } else { - enter_active_map[out_ctrl_node].emplace_back(enter_node); - } + if (out_ctrl_node->GetType() != STREAMACTIVE) { + continue; + } + if (enter_active_map.find(out_ctrl_node) == enter_active_map.end()) { + enter_active_map[out_ctrl_node] = {enter_node}; + } else { + enter_active_map[out_ctrl_node].emplace_back(enter_node); } } } @@ -226,9 +220,8 @@ Status AttachStreamLabelPass::SetEnterLabel(const std::vector &enter_no std::string stream_label; GE_CHECK_NOTNULL(active_node); (void)AttrUtils::GetStr(active_node->GetOpDesc(), ATTR_NAME_STREAM_LABEL, stream_label); - if (stream_label.empty()) { - GELOGW("stream_label of enter_active & enter_nodes is empty."); + GELOGD("stream_label of enter_active %s is empty.", active_node->GetName().c_str()); return SUCCESS; } @@ -238,7 +231,6 @@ Status AttachStreamLabelPass::SetEnterLabel(const std::vector &enter_no GE_CHK_STATUS_RET(SetStreamLabel(enter_node, stream_label), "Set stream label failed."); } } - GE_CHK_STATUS_RET(SetStreamLabel(active_node, stream_label), "Set stream label failed."); return SUCCESS; } diff --git a/ge/graph/passes/cond_remove_pass.cc b/ge/graph/passes/cond_remove_pass.cc index e8d1493f..bf2e1170 100644 --- a/ge/graph/passes/cond_remove_pass.cc +++ b/ge/graph/passes/cond_remove_pass.cc @@ -37,6 +37,12 @@ Status CondRemovePass::Run(NodePtr &node) { OutDataAnchorPtr cond_out_anchor = nullptr; InDataAnchorPtr cond_in_anchor = nullptr; Status ret = GetCondInfo(node, graph, cond_out_anchor, cond_in_anchor); + if (ret == NOT_CHANGED) { + return SUCCESS; + } else if (ret != SUCCESS) { + GELOGE(FAILED, "Get cond_info for node %s failed.", node->GetName().c_str()); + return FAILED; + } int32_t cond_index = 0; GELOGD("Handle cond remove for node %s.", node->GetOpDesc()->GetName().c_str()); bool if_cond_const = CheckIfCondConstInput(cond_out_anchor, cond_in_anchor, cond_index); @@ -322,11 +328,11 @@ Status CondRemovePass::GetCondInfo(const NodePtr &node, ComputeGraphPtr &graph, std::string type = node->GetType(); if ((kIfOpTypes.count(type) != 0) || (kCaseOpTypes.count(type) != 0)) { if (GetCondInfoForIfCase(node, graph, cond_out_anchor, cond_in_anchor) != SUCCESS) { - GELOGE(FAILED, "Get cond_info for if node failed."); + GELOGE(FAILED, "Get cond_info for if/case node failed."); return FAILED; } } else { - GELOGD("no need cond_pass for node %s.", node->GetName().c_str()); + GELOGD("no need cond_remove_pass for node %s.", node->GetName().c_str()); return NOT_CHANGED; } diff --git a/ge/graph/passes/ctrl_edge_transfer_pass.cc b/ge/graph/passes/ctrl_edge_transfer_pass.cc index f53dc7be..a538a10c 100755 --- a/ge/graph/passes/ctrl_edge_transfer_pass.cc +++ b/ge/graph/passes/ctrl_edge_transfer_pass.cc @@ -38,7 +38,6 @@ namespace ge { * \ / * B */ - Status CtrlEdgeTransferPass::Run(ge::ComputeGraphPtr graph) { GELOGD("CtrlEdgeTransferPass start running"); GE_CHECK_NOTNULL(graph); diff --git a/ge/graph/passes/data_pass.cc b/ge/graph/passes/data_pass.cc index 4ec8743e..5bbd2fb1 100644 --- a/ge/graph/passes/data_pass.cc +++ b/ge/graph/passes/data_pass.cc @@ -21,6 +21,7 @@ namespace ge { namespace { +const int kDataIndexOffset = 2; Status MappingSubgraphInput(const ComputeGraphPtr &graph, const std::function &input) { for (const auto &node : graph->GetDirectNode()) { if (node->GetType() != DATA) { @@ -111,7 +112,7 @@ Status ParseSubgraphPostFnWhile(const string &subgraph_name, const ComputeGraphP Status ParseSubgraphPostFnFor(const string &subgraph_name, const ComputeGraphPtr &graph) { return MappingSubgraphIndex(graph, - [](int data_index) { return (data_index == 0) ? 0 : data_index + 2; }, + [](int data_index) { return (data_index == 0) ? 0 : data_index + kDataIndexOffset; }, [](int retval_index) { return retval_index; }); } diff --git a/ge/graph/passes/enter_pass.cc b/ge/graph/passes/enter_pass.cc index 206d271c..afeca78f 100644 --- a/ge/graph/passes/enter_pass.cc +++ b/ge/graph/passes/enter_pass.cc @@ -16,6 +16,7 @@ #include "graph/passes/enter_pass.h" +#include "graph/debug/ge_attr_define.h" #include "framework/common/debug/ge_log.h" #include "framework/common/debug/log.h" #include "graph/utils/graph_utils.h" @@ -72,33 +73,25 @@ Status EnterPass::Run(NodePtr &node) { } Status EnterPass::OptimizeEnter(NodePtr &node, NodePtr &in_node) { - auto out_nodes_of_in_node = in_node->GetOutAllNodes(); - if (out_nodes_of_in_node.size() != kOutNodesNum) { + if ((in_node->GetOutAllNodes().size() != kOutNodesNum) || !node->GetOutControlNodes().empty()) { return SUCCESS; } - - if (!node->GetOutControlNodes().empty()) { + bool is_constant_flag = true; + (void)AttrUtils::GetBool(node->GetOpDesc(), ENTER_ATTR_CONSTANT_FLAG, is_constant_flag); + if (!is_constant_flag) { return SUCCESS; } - for (const auto &out_node : node->GetOutDataNodes()) { - GE_CHECK_NOTNULL(out_node); - if (out_node->GetType() == MERGE) { - return SUCCESS; - } - } - GE_CHECK_NOTNULL(in_node->GetOutDataAnchor(0)); GE_CHK_STATUS_RET(in_node->GetOutDataAnchor(0)->Unlink(node->GetInDataAnchor(0))); - auto out_data_anchor = node->GetOutDataAnchor(0); + const auto &out_data_anchor = node->GetOutDataAnchor(0); GE_CHECK_NOTNULL(out_data_anchor); - for (auto peer_in_data_anchor : out_data_anchor->GetPeerInDataAnchors()) { + for (const auto &peer_in_data_anchor : out_data_anchor->GetPeerInDataAnchors()) { GE_CHK_STATUS_RET(out_data_anchor->Unlink(peer_in_data_anchor)); GE_CHK_STATUS_RET(in_node->GetOutDataAnchor(0)->LinkTo(peer_in_data_anchor)); } - - auto graph = node->GetOwnerComputeGraph(); - GE_CHK_STATUS_RET(GraphUtils::RemoveNodeWithoutRelink(graph, node)) + GE_CHK_STATUS_RET(GraphUtils::RemoveNodeWithoutRelink(node->GetOwnerComputeGraph(), node)); + AddNodeDeleted(node); AddRePassNodesWithInOut(in_node); return SUCCESS; diff --git a/ge/graph/passes/for_pass.cc b/ge/graph/passes/for_pass.cc index f3caea35..31dee390 100644 --- a/ge/graph/passes/for_pass.cc +++ b/ge/graph/passes/for_pass.cc @@ -37,6 +37,7 @@ namespace { const uint32_t kSubgraphLoopVarInputIndex = 0; const uint32_t kSubgraphInputIndex = 1; const uint32_t kWhileOutputIndex = 5; + const size_t kIDiffValue = 2; const std::string kAbs = "Abs"; } @@ -137,7 +138,7 @@ Status ForPass::BuildForInfo(const ComputeGraphPtr &root_graph, const NodePtr &n for_info.ctrl_inputs = std::move(ctrl_inputs); for_info.ctrl_outputs = std::move(ctrl_outputs); - GELOGI("Build for_info for node %s succ.", node->GetName().c_str()); + GELOGI("Build for_info for node %s success.", node->GetName().c_str()); return SUCCESS; } @@ -159,13 +160,7 @@ OutDataAnchorPtr ForPass::FindInputWithIndex(const NodePtr &node, uint32_t index return nullptr; } - OutDataAnchorPtr peer_out_anchor = in_data_anchor->GetPeerOutAnchor(); - if (peer_out_anchor == nullptr) { - GELOGE(FAILED, "FindInputWithIndex %s:%u failed: peer_out_anchor is NULL.", node->GetName().c_str(), index); - return nullptr; - } - - return peer_out_anchor; + return in_data_anchor->GetPeerOutAnchor(); } /// @@ -186,20 +181,13 @@ Status ForPass::FindInputsAndOutputs(const NodePtr &node, std::vectorGetAllInDataAnchorsSize(); for (uint32_t index = FOR_DATA_INPUT; index < input_data_num; index++) { InDataAnchorPtr in_data_anchor = node->GetInDataAnchor(index); - if (in_data_anchor == nullptr) { - GELOGE(FAILED, "FindInputWithIndex %s:%u failed: in_data_anchor is NULL.", node->GetName().c_str(), index); - return FAILED; - } - GE_IF_BOOL_EXEC(in_data_anchor->GetPeerOutAnchor() == nullptr, - GELOGW("Get null input by index %d from node %s ", - in_data_anchor->GetIdx(), node->GetName().c_str()); - continue); + GE_CHECK_NOTNULL(in_data_anchor); data_inputs.emplace_back(in_data_anchor->GetPeerOutAnchor()); } - for (auto &out_data_anchor : node->GetAllOutDataAnchors()) { + for (const auto &out_data_anchor : node->GetAllOutDataAnchors()) { std::vector peer_in_data_anchors; - for (auto &peer_in_data_anchor : out_data_anchor->GetPeerInDataAnchors()) { + for (const auto &peer_in_data_anchor : out_data_anchor->GetPeerInDataAnchors()) { peer_in_data_anchors.emplace_back(peer_in_data_anchor); } data_outputs.emplace_back(peer_in_data_anchors); @@ -207,13 +195,13 @@ Status ForPass::FindInputsAndOutputs(const NodePtr &node, std::vectorGetInControlAnchor(); GE_CHECK_NOTNULL(in_ctrl_anchor); - for (auto &peer_out_ctrl_anchor : in_ctrl_anchor->GetPeerOutControlAnchors()) { + for (const auto &peer_out_ctrl_anchor : in_ctrl_anchor->GetPeerOutControlAnchors()) { ctrl_inputs.emplace_back(peer_out_ctrl_anchor); } OutControlAnchorPtr out_ctrl_anchor = node->GetOutControlAnchor(); GE_CHECK_NOTNULL(out_ctrl_anchor); - for (auto &peer_in_ctrl_anchor : out_ctrl_anchor->GetPeerInControlAnchors()) { + for (const auto &peer_in_ctrl_anchor : out_ctrl_anchor->GetPeerInControlAnchors()) { ctrl_outputs.emplace_back(peer_in_ctrl_anchor); } @@ -707,7 +695,7 @@ Status ForPass::UpdateForBodyInputMapping(const WhileInfo &while_info) { } else if ((i == FOR_LIMIT_INPUT) || (i == FOR_DELTA_INPUT)) { continue; } else { - input_mapping[i] = i - 2; + input_mapping[i] = i - kIDiffValue; } } for_body->UpdateInputMapping(input_mapping); diff --git a/ge/graph/passes/mark_agnostic_pass.cc b/ge/graph/passes/mark_agnostic_pass.cc index 8c9a0451..30fa1742 100644 --- a/ge/graph/passes/mark_agnostic_pass.cc +++ b/ge/graph/passes/mark_agnostic_pass.cc @@ -19,6 +19,8 @@ #include "graph/utils/tensor_utils.h" namespace ge { +const size_t kTwoInputNodesSize = 2; + Status MarkAgnosticPass::Run(ComputeGraphPtr graph) { for (const auto &node : graph->GetDirectNode()) { auto node_type = NodeUtils::GetNodeType(*node); @@ -52,7 +54,7 @@ Status MarkAgnosticPass::Run(ComputeGraphPtr graph) { /// Enter-----------+ /// +-> Merge /// NextIteration---+ - if (input_nodes.size() == 2) { + if (input_nodes.size() == kTwoInputNodesSize) { if (input_nodes.at(0)->GetType() == ENTER && input_nodes.at(1)->GetType() == NEXTITERATION) { continue; } diff --git a/ge/graph/passes/merge_pass.cc b/ge/graph/passes/merge_pass.cc index d2340037..26d82820 100644 --- a/ge/graph/passes/merge_pass.cc +++ b/ge/graph/passes/merge_pass.cc @@ -21,18 +21,16 @@ #include #include "framework/common/debug/ge_log.h" -#include "common/ge_inner_error_codes.h" #include "common/ge/ge_util.h" #include "graph/common/omg_util.h" #include "graph/debug/ge_attr_define.h" #include "graph/utils/graph_utils.h" #include "graph/passes/pass_utils.h" -using domi::PARAM_INVALID; -using domi::SUCCESS; - namespace ge { const int kValueIndexOutputIndex = 1; +const size_t kCaseNoInput = 0; +const size_t kCaseOneInput = 1; Status MergePass::Run(NodePtr &node) { GELOGD("MergePass running"); @@ -47,15 +45,14 @@ Status MergePass::Run(NodePtr &node) { return SUCCESS; } - auto out_data_anchors = node->GetAllOutDataAnchors(); - if (out_data_anchors.empty()) { + if (node->GetAllOutDataAnchors().empty()) { GELOGE(PARAM_INVALID, "[%s] Merge node output anchor is empty", node->GetName().c_str()); return PARAM_INVALID; } - auto in_data_nodes = node->GetInDataNodes(); + const auto &in_data_nodes = node->GetInDataNodes(); switch (in_data_nodes.size()) { - case 0: { + case kCaseNoInput: { /// Case A: input_count = 0, the output of merge node is inactive as well /// In which case the output branch can be removed /// until another merge node is met @@ -70,7 +67,7 @@ Status MergePass::Run(NodePtr &node) { } return ret; } - case 1: { // Case B: input_count = 1, the merge node can be optimized out + case kCaseOneInput: { // Case B: input_count = 1, the merge node can be optimized out std::vector merge_io_map = {PassUtils::GetUniqueInDataAnchorIndex(node), -1}; if (merge_io_map[0] != -1 && IsNeedChangeIndexToConstant(node)) { int index = merge_io_map[0]; diff --git a/ge/graph/passes/multi_batch_pass.cc b/ge/graph/passes/multi_batch_pass.cc index c7034612..74f7e30e 100644 --- a/ge/graph/passes/multi_batch_pass.cc +++ b/ge/graph/passes/multi_batch_pass.cc @@ -22,9 +22,6 @@ #include "graph/common/omg_util.h" #include "graph/utils/type_utils.h" -using std::string; -using std::vector; - namespace ge { Status MultiBatchPass::Run(ComputeGraphPtr graph) { GELOGD("MultiBatchPass Enter"); @@ -53,7 +50,7 @@ Status MultiBatchPass::Run(ComputeGraphPtr graph) { return FAILED; } std::vector> batch_shape; - vector> combined_batch; + std::vector> combined_batch; if (!CheckSwitchN(batch_shape, combined_batch)) { GELOGE(FAILED, "CheckSwitchN failed."); return FAILED; @@ -104,6 +101,7 @@ Status MultiBatchPass::ClearStatus() { /// Status MultiBatchPass::SetCaseLabel(const ComputeGraphPtr &graph, const NodePtr &case_node) { const auto &func_desc = case_node->GetOpDesc(); + GE_CHECK_NOTNULL(func_desc); if (!func_desc->HasAttr(ATTR_NAME_BATCH_NUM)) { GELOGD("Graph: %s Not multi-batch, Node: %s", graph->GetName().c_str(), case_node->GetName().c_str()); return SUCCESS; @@ -114,7 +112,7 @@ Status MultiBatchPass::SetCaseLabel(const ComputeGraphPtr &graph, const NodePtr const auto &subgraph = graph->GetSubgraph(dynamic_branch_names[i]); GE_CHECK_NOTNULL(subgraph); - const string batch_label = "Batch_" + std::to_string(i); + const std::string batch_label = "Batch_" + std::to_string(i); for (const auto &node : subgraph->GetDirectNode()) { (void)AttrUtils::SetStr(node->GetOpDesc(), ATTR_NAME_BATCH_LABEL, batch_label); } @@ -139,12 +137,12 @@ Status MultiBatchPass::FindPredValue(const ComputeGraphPtr &graph, OutDataAnchor continue; } - InDataAnchorPtr in_data_anchor = node->GetInDataAnchor(SWITCH_PRED_INPUT); + const auto &in_data_anchor = node->GetInDataAnchor(SWITCH_PRED_INPUT); if (in_data_anchor == nullptr) { GELOGE(FAILED, "FindPredInput failed, in_data_anchor is null, node:%s.", node->GetName().c_str()); return FAILED; } - OutDataAnchorPtr pred_input = in_data_anchor->GetPeerOutAnchor(); + const auto &pred_input = in_data_anchor->GetPeerOutAnchor(); if (pred_input == nullptr) { GELOGE(FAILED, "FindPredInput failed, pred_input is null, node:%s.", node->GetName().c_str()); return FAILED; @@ -178,12 +176,10 @@ Status MultiBatchPass::FindPredValue(const ComputeGraphPtr &graph, OutDataAnchor /// @return Status /// Status MultiBatchPass::GetDynamicType() { - for (const auto &switchn : switch_n_nodes_) { - auto switchn_desc = switchn->GetOpDesc(); - GE_CHECK_NOTNULL(switchn_desc); + for (const auto &switch_n : switch_n_nodes_) { int32_t dynamic_type = static_cast(FIXED); - if (!AttrUtils::GetInt(switchn_desc, ATTR_DYNAMIC_TYPE, dynamic_type)) { - GELOGE(FAILED, "Get attr ATTR_DYNAMIC_TYPE of node: %s failed.", switchn->GetName().c_str()); + if (!AttrUtils::GetInt(switch_n->GetOpDesc(), ATTR_DYNAMIC_TYPE, dynamic_type)) { + GELOGE(FAILED, "Get attr ATTR_DYNAMIC_TYPE of node: %s failed.", switch_n->GetName().c_str()); return FAILED; } if (dynamic_type == static_cast(FIXED)) { @@ -191,7 +187,7 @@ Status MultiBatchPass::GetDynamicType() { return FAILED; } if (dynamic_type_ != static_cast(FIXED) && dynamic_type_ != dynamic_type) { - GELOGE(FAILED, "Attr ATTR_DYNAMIC_TYPE of all switchn node should be same, while one is %d and another is %d.", + GELOGE(FAILED, "Attr ATTR_DYNAMIC_TYPE of all switch_n node should be same, while one is %d and another is %d.", dynamic_type, dynamic_type_); return FAILED; } @@ -212,21 +208,19 @@ Status MultiBatchPass::GetDynamicType() { Status MultiBatchPass::GetUserDesignateShape() { data_name_order_.clear(); bool first_check = true; - for (const auto &switchn : switch_n_nodes_) { - auto switchn_desc = switchn->GetOpDesc(); - GE_CHECK_NOTNULL(switchn_desc); - vector cur_switchn_data_name_order; - if (!AttrUtils::GetListStr(switchn_desc, ATTR_USER_DESIGNEATE_SHAPE_ORDER, cur_switchn_data_name_order)) { - GELOGE(FAILED, "Get attr ATTR_USER_DESIGNEATE_SHAPE_ORDER of node: %s failed.", switchn->GetName().c_str()); + for (const auto &switch_n : switch_n_nodes_) { + std::vector cur_data_name_order; + if (!AttrUtils::GetListStr(switch_n->GetOpDesc(), ATTR_USER_DESIGNEATE_SHAPE_ORDER, cur_data_name_order)) { + GELOGE(FAILED, "Get attr ATTR_USER_DESIGNEATE_SHAPE_ORDER of node: %s failed.", switch_n->GetName().c_str()); return FAILED; } if (first_check) { - data_name_order_ = cur_switchn_data_name_order; + data_name_order_ = cur_data_name_order; first_check = false; } else { - if (data_name_order_ != cur_switchn_data_name_order) { + if (data_name_order_ != cur_data_name_order) { GELOGE(FAILED, "The ATTR_USER_DESIGNEATE_SHAPE_ORDER of switchN must be same: %s failed.", - switchn->GetName().c_str()); + switch_n->GetName().c_str()); return FAILED; } } @@ -245,7 +239,8 @@ Status MultiBatchPass::GetUserDesignateShape() { /// @param [out] combined_batch /// @return bool /// -bool MultiBatchPass::CheckSwitchN(vector> &batch_shape, vector> &combined_batch) { +bool MultiBatchPass::CheckSwitchN(std::vector> &batch_shape, + std::vector> &combined_batch) { // Check if output_num of different SwitchN is same uint32_t batch_num = 0; for (const NodePtr &node : switch_n_nodes_) { @@ -281,7 +276,8 @@ bool MultiBatchPass::CheckSwitchN(vector> &batch_shape, vector> &batch_shape, vector> &batch_shape, - vector> &combined_batch) { +bool MultiBatchPass::GetBatchInfo(uint32_t batch_num, std::vector> &batch_shape, + std::vector> &combined_batch) { // Check if output_shape of different SwitchN is same - vector> idx_batch_shape; - vector> idx_combined_batch; + std::vector> idx_batch_shape; + std::vector> idx_combined_batch; for (uint32_t i = 0; i < batch_num; i++) { idx_batch_shape.clear(); idx_combined_batch.clear(); @@ -310,7 +306,7 @@ bool MultiBatchPass::GetBatchInfo(uint32_t batch_num, vector> &b GELOGE(FAILED, "CheckDims failed, get op_desc failed, node: %s.", node->GetName().c_str()); return false; } - vector output_dims; + std::vector output_dims; if (!AttrUtils::GetListInt(op_desc->GetOutputDesc(i), ATTR_NAME_SWITCHN_PRED_VALUE, output_dims)) { GELOGE(FAILED, "CheckDims failed, get attr ATTR_NAME_SWITCHN_PRED_VALUE failed, batch_index=%u.", i); return false; @@ -385,8 +381,8 @@ Status MultiBatchPass::FindSwitchOutNodes(uint32_t batch_num) { /// @return Status /// Status MultiBatchPass::ReplaceSwitchN(const ComputeGraphPtr &graph, const OutDataAnchorPtr &pred_value, - const vector> &batch_shape, - const vector> &combined_batch) { + const std::vector> &batch_shape, + const std::vector> &combined_batch) { NodePtr pred_value_node = pred_value->GetOwnerNode(); // Create SwitchCase node const std::string &switch_case_name = pred_value_node->GetName() + "_" + STREAMSWITCHN; @@ -429,31 +425,11 @@ bool MultiBatchPass::CheckDims(const std::vector> &output_s return false; } - size_t num = output_shape.size(); - size_t dim_num = output_shape[0].size(); - for (size_t i = 1; i < num; i++) { - size_t tmp_dim_num = output_shape[i].size(); - if (dim_num != tmp_dim_num) { - GELOGE(FAILED, "CheckDims failed: dim_num not equal, output_0:%zu, output_%zu:%zu.", dim_num, i, tmp_dim_num); + for (auto iter = output_shape.begin() + 1; iter != output_shape.end(); ++iter) { + if (output_shape[0] != *iter) { return false; } } - - if (dim_num == 0) { - return true; - } - - for (size_t i = 0; i < dim_num; i++) { - int64_t dim_value = output_shape[0][i]; - for (size_t j = 1; j < num; j++) { - int64_t tmp_dim_value = output_shape[j][i]; - if (dim_value != tmp_dim_value) { - GELOGE(FAILED, "CheckDims failed: dim_value not equal, dim_index=%zu, dim_value_0:%ld, dim_value_%zu:%ld.", i, - dim_value, j, tmp_dim_value); - return false; - } - } - } return true; } @@ -468,8 +444,8 @@ bool MultiBatchPass::CheckDims(const std::vector> &output_s /// NodePtr MultiBatchPass::CreateSwitchCaseNode(const ComputeGraphPtr &graph, const std::string &name, const OutDataAnchorPtr &pred_value, - const vector> &batch_shape, - const vector> &combined_batch) { + const std::vector> &batch_shape, + const std::vector> &combined_batch) { OpDescPtr op_desc = MakeShared(name, STREAMSWITCHN); if (op_desc == nullptr) { GELOGE(FAILED, "Create op_desc failed, StreamSwitchN:%s.", name.c_str()); @@ -512,7 +488,7 @@ NodePtr MultiBatchPass::CreateSwitchCaseNode(const ComputeGraphPtr &graph, const GELOGE(FAILED, "set attr ATTR_NAME_PRED_VALUE failed, StreamSwitchN:%s.", name.c_str()); return nullptr; } - const string &attr_combined_batch = ATTR_NAME_COMBINED_BATCH + "_" + std::to_string(i); + const std::string &attr_combined_batch = ATTR_NAME_COMBINED_BATCH + "_" + std::to_string(i); if (!AttrUtils::SetListInt(op_desc, attr_combined_batch, combined_batch[i])) { GELOGE(FAILED, "set attr ATTR_NAME_COMBINED_BATCH failed, StreamSwitchN:%s.", name.c_str()); return nullptr; diff --git a/ge/graph/passes/pass_utils.cc b/ge/graph/passes/pass_utils.cc index 5359ff63..3adfbde3 100644 --- a/ge/graph/passes/pass_utils.cc +++ b/ge/graph/passes/pass_utils.cc @@ -37,10 +37,6 @@ #include "graph/utils/type_utils.h" namespace ge { -namespace { -const uint32_t kShapeDimSize = 1; -const uint32_t DIM_SIZE_TWO = 2; -} // namespace Status PassUtils::ConstructTensorDescWithData(const GeTensorDesc &out_desc, std::vector &data, std::vector &v_output, const bool scalar_output) { diff --git a/ge/graph/passes/subgraph_pass.cc b/ge/graph/passes/subgraph_pass.cc index 88e661a7..d1111d52 100755 --- a/ge/graph/passes/subgraph_pass.cc +++ b/ge/graph/passes/subgraph_pass.cc @@ -149,10 +149,10 @@ Status SubgraphPass::SubgraphOutputNode(const ComputeGraphPtr &graph, const Node // 5. While->NetOutput in known subgraph std::string op_type; bool insert_flag = NodeUtils::GetConstOpType(in_node, op_type) || - IsAtomicRequired(in_node, peer_out_anchor->GetIdx()) || IsOutputContinuesRequired(in_node) || - ((in_node->GetType() == DATA) && (kWhileOpTypes.count(graph->GetParentNode()->GetType()) == 0)) || - (!graph->GetGraphUnknownFlag() && NodeUtils::IsDynamicShape(node) && - (kWhileOpTypes.count(in_node->GetType()) != 0)); + IsAtomicRequired(in_node, peer_out_anchor->GetIdx()) || IsOutputContinuesRequired(in_node) || + ((in_node->GetType() == DATA) && (kWhileOpTypes.count(graph->GetParentNode()->GetType()) == 0)) || + (!graph->GetGraphUnknownFlag() && NodeUtils::IsDynamicShape(node) && + (kWhileOpTypes.count(in_node->GetType()) != 0)); if (insert_flag) { GELOGD("Insert MemcpyAsync node between %s and %s.", in_node->GetName().c_str(), node->GetName().c_str()); std::string name = node->GetName() + "_input_" + std::to_string(in_data_anchor->GetIdx()) + "_Memcpy"; diff --git a/ge/graph/passes/switch_to_stream_switch_pass.cc b/ge/graph/passes/switch_to_stream_switch_pass.cc index 529480a6..f75a104f 100644 --- a/ge/graph/passes/switch_to_stream_switch_pass.cc +++ b/ge/graph/passes/switch_to_stream_switch_pass.cc @@ -72,25 +72,26 @@ Status SwitchToStreamSwitchPass::CheckCycleDependence(const ComputeGraphPtr &gra std::unordered_map> cond_switch_map; for (const NodePtr &node : graph->GetDirectNode()) { GE_CHK_STATUS_RET(GetOriginalType(node, type), "Get node type failed."); - if ((type == SWITCH) || (type == REFSWITCH)) { - InDataAnchorPtr in_cond_anchor = node->GetInDataAnchor(SWITCH_PRED_INPUT); - GE_CHECK_NOTNULL(in_cond_anchor); - OutDataAnchorPtr peer_out_anchor = in_cond_anchor->GetPeerOutAnchor(); - GE_CHECK_NOTNULL(peer_out_anchor); - if (FindSwitchCondInput(true, peer_out_anchor) != SUCCESS) { - GELOGE(FAILED, "Find pred_input for switch_node %s failed.", node->GetName().c_str()); - return FAILED; - } + if ((type != SWITCH) && (type != REFSWITCH)) { + continue; + } + InDataAnchorPtr in_cond_anchor = node->GetInDataAnchor(SWITCH_PRED_INPUT); + GE_CHECK_NOTNULL(in_cond_anchor); + OutDataAnchorPtr peer_out_anchor = in_cond_anchor->GetPeerOutAnchor(); + GE_CHECK_NOTNULL(peer_out_anchor); + if (FindSwitchCondInput(peer_out_anchor) != SUCCESS) { + GELOGE(FAILED, "Find pred_input for switch_node %s failed.", node->GetName().c_str()); + return FAILED; + } - NodePtr cond_node = peer_out_anchor->GetOwnerNode(); - auto iter = cond_switch_map.find(cond_node); - if (iter == cond_switch_map.end()) { - cond_switch_map[cond_node] = { node }; - } else { - iter->second.emplace_back(node); - } - switch_nodes_.emplace_back(node); + NodePtr cond_node = peer_out_anchor->GetOwnerNode(); + auto iter = cond_switch_map.find(cond_node); + if (iter == cond_switch_map.end()) { + cond_switch_map[cond_node] = { node }; + } else { + iter->second.emplace_back(node); } + switch_nodes_.emplace_back(node); } MarkCycleDependence(cond_switch_map); @@ -241,10 +242,6 @@ Status SwitchToStreamSwitchPass::BypassSwitchNode(const NodePtr &switch_node, Ou if (idx == SWITCH_DATA_INPUT) { peer_data_anchor = peer_out_anchor; } else { - if (FindSwitchCondInput(false, peer_out_anchor) != SUCCESS) { - GELOGE(FAILED, "Find pred_input for switch_node %s failed.", switch_node->GetName().c_str()); - return FAILED; - } peer_cond_anchor = peer_out_anchor; } } @@ -254,15 +251,14 @@ Status SwitchToStreamSwitchPass::BypassSwitchNode(const NodePtr &switch_node, Ou /// /// @brief Find Switch cond input -/// @param [in] pass_switch_flag /// @param [out] peer_cond_anchor /// @return Status /// -Status SwitchToStreamSwitchPass::FindSwitchCondInput(bool pass_switch_flag, OutDataAnchorPtr &peer_cond_anchor) { +Status SwitchToStreamSwitchPass::FindSwitchCondInput(OutDataAnchorPtr &peer_cond_anchor) { NodePtr tmp_node = nullptr; - string type; - bool need_pass_type = true; - while (need_pass_type) { + std::string type; + bool pass_flag = true; + while (pass_flag) { if (tmp_node == nullptr) { tmp_node = peer_cond_anchor->GetOwnerNode(); } else { @@ -274,7 +270,7 @@ Status SwitchToStreamSwitchPass::FindSwitchCondInput(bool pass_switch_flag, OutD } GE_CHK_STATUS_RET(GetOriginalType(tmp_node, type), "Get node type failed."); - need_pass_type = (pass_switch_flag && ((type == SWITCH) || (type == REFSWITCH))); + pass_flag = ((type == SWITCH) || (type == REFSWITCH)); } return SUCCESS; @@ -369,7 +365,7 @@ Status SwitchToStreamSwitchPass::MarkBranches(const OutDataAnchorPtr &peer_cond_ } } else { int64_t switch_group_id = GetGroupId(stream_switch); - map>> switch_group_map; + std::map>> switch_group_map; std::list false_node_list; std::list true_node_list; std::list &node_list = true_branch_flag ? true_node_list : false_node_list; @@ -389,7 +385,7 @@ Status SwitchToStreamSwitchPass::MarkBranches(const OutDataAnchorPtr &peer_cond_ /// @return group_id /// int64_t SwitchToStreamSwitchPass::GetGroupId(const NodePtr &node) { - string tailing_optimization_option; + std::string tailing_optimization_option; bool is_tailing_optimization = false; if (GetContext().GetOption(OPTION_EXEC_ENABLE_TAILING_OPTIMIZATION, tailing_optimization_option) == GRAPH_SUCCESS) { // "1" means it's True from frontend option @@ -400,7 +396,7 @@ int64_t SwitchToStreamSwitchPass::GetGroupId(const NodePtr &node) { return 0; } - string hccl_group_id; + std::string hccl_group_id; if (!AttrUtils::GetStr(node->GetOpDesc(), ATTR_NAME_HCCL_FUSED_GROUP, hccl_group_id)) { GELOGI("Node %s can not find hccl group id.", node->GetName().c_str()); return 0; @@ -432,6 +428,7 @@ Status SwitchToStreamSwitchPass::CombineSwitchNode(const ComputeGraphPtr &graph) same_cond_switch.insert(true_switch_list.begin(), true_switch_list.end()); OutDataAnchorPtr peer_cond_anchor = iter->first; + GE_CHECK_NOTNULL(peer_cond_anchor); NodePtr cond_node = peer_cond_anchor->GetOwnerNode(); GELOGI("CombineSwitchNode: cond_node=%s.", cond_node->GetName().c_str()); @@ -549,6 +546,7 @@ NodePtr SwitchToStreamSwitchPass::CreateCastOp(const ComputeGraphPtr &graph, con NodePtr cast_node = graph->AddNode(cast_desc); GE_CHK_BOOL_EXEC(cast_node != nullptr, return nullptr, "Create cast_node failed."); + // Cast node has and only has one input GE_CHK_STATUS(GraphUtils::AddEdge(peer_cond_anchor, cast_node->GetInDataAnchor(0)), "Cast add data edge failed."); return cast_node; @@ -614,24 +612,24 @@ Status SwitchToStreamSwitchPass::ModifySwitchInCtlEdges(const NodePtr &switch_no return INTERNAL_ERROR; } - for (const NodePtr &in_ctl_node : switch_node->GetInControlNodes()) { - GE_CHK_STATUS(GraphUtils::RemoveEdge(in_ctl_node->GetOutControlAnchor(), switch_node->GetInControlAnchor()), + for (const NodePtr &in_ctrl_node : switch_node->GetInControlNodes()) { + GE_CHK_STATUS(GraphUtils::RemoveEdge(in_ctrl_node->GetOutControlAnchor(), switch_node->GetInControlAnchor()), "Remove ctl edge failed."); - GE_IF_BOOL_EXEC(!in_ctl_node->GetOutControlAnchor()->IsLinkedWith(cast_node->GetInControlAnchor()), { - GE_CHK_STATUS(GraphUtils::AddEdge(in_ctl_node->GetOutControlAnchor(), cast_node->GetInControlAnchor()), + GE_IF_BOOL_EXEC(!in_ctrl_node->GetOutControlAnchor()->IsLinkedWith(cast_node->GetInControlAnchor()), { + GE_CHK_STATUS(GraphUtils::AddEdge(in_ctrl_node->GetOutControlAnchor(), cast_node->GetInControlAnchor()), "Add ctl edge failed."); }); - GE_IF_BOOL_EXEC(in_ctl_node->GetType() != STREAMSWITCH, continue); - if (same_cond_switch.count(in_ctl_node) > 0) { - GE_CHK_STATUS(GraphUtils::RemoveEdge(in_ctl_node->GetOutControlAnchor(), cast_node->GetInControlAnchor()), + GE_IF_BOOL_EXEC(in_ctrl_node->GetType() != STREAMSWITCH, continue); + if (same_cond_switch.count(in_ctrl_node) > 0) { + GE_CHK_STATUS(GraphUtils::RemoveEdge(in_ctrl_node->GetOutControlAnchor(), cast_node->GetInControlAnchor()), "Remove ctl edge failed."); continue; } - auto find_res1 = switch_node_map_.find(in_ctl_node); + auto find_res1 = switch_node_map_.find(in_ctrl_node); GE_IF_BOOL_EXEC(find_res1 == switch_node_map_.end(), { - GELOGE(INTERNAL_ERROR, "StreamSwitch node %s not found in switch_node_map_.", in_ctl_node->GetName().c_str()); + GELOGE(INTERNAL_ERROR, "StreamSwitch node %s not found in switch_node_map_.", in_ctrl_node->GetName().c_str()); return INTERNAL_ERROR; }); auto find_res2 = find_res1->second.find(orig_switch_name); diff --git a/ge/graph/passes/switch_to_stream_switch_pass.h b/ge/graph/passes/switch_to_stream_switch_pass.h index 48725230..05628871 100644 --- a/ge/graph/passes/switch_to_stream_switch_pass.h +++ b/ge/graph/passes/switch_to_stream_switch_pass.h @@ -42,9 +42,9 @@ namespace ge { +-----------+ +-----------+ | Const | | VariableV2| +-----------+ +-----------+ -*/ -/* Switch branch op optimize, Switches in same case merge to one StreamSwitch, update following nodes' input + + Switch branch op optimize, Switches in same case merge to one StreamSwitch, update following nodes' input +-----------+ / | task2 | \ @@ -131,11 +131,10 @@ class SwitchToStreamSwitchPass : public GraphPass { /// /// @brief Find Switch cond input - /// @param [in] pass_switch_flag /// @param [out] peer_cond_anchor /// @return Status /// - Status FindSwitchCondInput(bool pass_switch_flag, OutDataAnchorPtr &peer_cond_anchor); + Status FindSwitchCondInput(OutDataAnchorPtr &peer_cond_anchor); /// /// @brief Create StreamSwitch Node diff --git a/ge/graph/passes/transop_breadth_fusion_pass.cc b/ge/graph/passes/transop_breadth_fusion_pass.cc index 689510f0..654c3822 100644 --- a/ge/graph/passes/transop_breadth_fusion_pass.cc +++ b/ge/graph/passes/transop_breadth_fusion_pass.cc @@ -70,8 +70,10 @@ std::string TransOpBreadthFusionPass::GetNodeId(const int anchor_index, const No trans_data_type = true; trans_format = true; trans_shape = true; - } else if (node->GetType() == RESHAPE) { + } else if (node->GetType() == RESHAPE || node->GetType() == EXPANDDIMS || node->GetType() == SQUEEZE) { trans_shape = true; + } else if (node->GetType() == REFORMAT) { + trans_format = true; } id << node->GetType() << '-' << anchor_index; diff --git a/ge/graph/passes/transop_without_reshape_fusion_pass.cc b/ge/graph/passes/transop_without_reshape_fusion_pass.cc index d2b3f1b1..6bea9edc 100644 --- a/ge/graph/passes/transop_without_reshape_fusion_pass.cc +++ b/ge/graph/passes/transop_without_reshape_fusion_pass.cc @@ -63,7 +63,7 @@ void TransOpWithoutReshapeFusionPass::SetRemainNode( continue; } GELOGI("SetRemainNode node is %s", op_desc->GetName().c_str()); - GE_IF_BOOL_EXEC(!op_desc->SetExtAttr(kRemainNode, true), GELOGE(INTERNAL_ERROR, "set ext attr failed"); return ); + GE_IF_BOOL_EXEC(!op_desc->SetExtAttr(kRemainNode, true), GELOGE(INTERNAL_ERROR, "set ext attr failed"); return); } } @@ -594,7 +594,7 @@ void TransOpWithoutReshapeFusionPass::GetBeginOutDescAndEndInDesc(const int inde auto out_owner_node = out_peer_anchor->GetOwnerNode(); GE_CHECK_NOTNULL_JUST_RETURN(out_owner_node); auto out_peer_op_desc = out_owner_node->GetOpDesc(); - GE_IF_BOOL_EXEC(out_peer_op_desc == nullptr, GELOGE(INTERNAL_ERROR, "out_peer_op_desc is nullptr"); return ); + GE_IF_BOOL_EXEC(out_peer_op_desc == nullptr, GELOGE(INTERNAL_ERROR, "out_peer_op_desc is nullptr"); return); out_desc = out_peer_op_desc->GetInputDesc(out_peer_anchor->GetIdx()); auto in_peer_anchor = nodes_anchor.back().first; @@ -602,7 +602,7 @@ void TransOpWithoutReshapeFusionPass::GetBeginOutDescAndEndInDesc(const int inde auto in_owner_node = in_peer_anchor->GetOwnerNode(); GE_CHECK_NOTNULL_JUST_RETURN(in_owner_node); auto in_peer_op_desc = in_owner_node->GetOpDesc(); - GE_IF_BOOL_EXEC(in_peer_op_desc == nullptr, GELOGE(INTERNAL_ERROR, "in_peer_op_desc is nullptr"); return ); + GE_IF_BOOL_EXEC(in_peer_op_desc == nullptr, GELOGE(INTERNAL_ERROR, "in_peer_op_desc is nullptr"); return); in_desc = in_peer_op_desc->GetOutputDesc(in_peer_anchor->GetIdx()); } @@ -734,10 +734,14 @@ void TransOpWithoutReshapeFusionPass::RemoveNousedNodes(const ComputeGraphPtr &g continue; } - GE_IF_BOOL_EXEC(!op_desc->SetExtAttr(kRemainNode, true), GELOGE(INTERNAL_ERROR, "set ext attr failed"); return ); + GE_IF_BOOL_EXEC(!op_desc->SetExtAttr(kRemainNode, true), GELOGE(INTERNAL_ERROR, "set ext attr failed"); return); GELOGI("remove node:%s", node->GetName().c_str()); - if (graph->RemoveNode(node) != GRAPH_SUCCESS) { - GELOGW("remove node failed!node:%s", node->GetName().c_str()); + if (GraphUtils::IsolateNode(node, {0}) != GRAPH_SUCCESS) { + GELOGW("Isolate node: %s failed.", node->GetName().c_str()); + continue; + } + if (GraphUtils::RemoveNodeWithoutRelink(graph, node) != GRAPH_SUCCESS) { + GELOGW("Remove node: %s failed.", node->GetName().c_str()); continue; } } diff --git a/ge/graph/passes/transpose_transdata_pass.cc b/ge/graph/passes/transpose_transdata_pass.cc index 7348f143..2178eac7 100644 --- a/ge/graph/passes/transpose_transdata_pass.cc +++ b/ge/graph/passes/transpose_transdata_pass.cc @@ -217,11 +217,11 @@ void TransposeTransDataPass::CopyInputEdges(NodePtr &origin_node, NodePtr &new_n } OutDataAnchorPtr out_anchor = origin_node->GetInDataAnchor(0)->GetPeerOutAnchor(); new_in_data_anchor->UnlinkAll(); - GE_IF_BOOL_EXEC(new_in_data_anchor->LinkFrom(out_anchor) != GRAPH_SUCCESS, GELOGW("Link failed"); return ); + GE_IF_BOOL_EXEC(new_in_data_anchor->LinkFrom(out_anchor) != GRAPH_SUCCESS, GELOGW("Link failed"); return); // control anchor only link to control anchor GE_IF_BOOL_EXEC( - GraphUtils::CopyInCtrlEdges(origin_node, new_node) != GRAPH_SUCCESS, GELOGW("Copy in ctrl edges failed"); return ); + GraphUtils::CopyInCtrlEdges(origin_node, new_node) != GRAPH_SUCCESS, GELOGW("Copy in ctrl edges failed"); return); } bool TransposeTransDataPass::TransDataCheckAccuracySupported(const OpDescPtr &op_desc) { diff --git a/ge/graph/passes/variable_op_pass_bak.cc b/ge/graph/passes/variable_op_pass_bak.cc index 3e40e686..c9218296 100644 --- a/ge/graph/passes/variable_op_pass_bak.cc +++ b/ge/graph/passes/variable_op_pass_bak.cc @@ -252,7 +252,6 @@ Status VariableOpPass::RenewTransRoadDesc(const NodePtr &var, VarTransRoad &fusi // case 2: suppose input format of transdata not equal with out format // and input format not equal with var // so we make input format equal with var - for (auto &cur_trans : fusion_road) { if (cur_trans.input.GetFormat() == cur_trans.output.GetFormat()) { cur_trans.output.SetFormat(prev_node_info.output.GetFormat()); @@ -319,8 +318,8 @@ Status VariableOpPass::FusionIfNeed(const NodePtr &var, VarTransRoad &fusion_roa } Status VariableOpPass::UpdateTransRoad(VarTransRoad &fusion_road, vector &first_path_trans_order, - map> &trans_type_to_changed_desc, - map> &trans_type_to_trans_ops){ + map> &trans_type_to_changed_desc, + map> &trans_type_to_trans_ops){ vector delete_trans_type; for (auto &trans_type : first_path_trans_order) { if (trans_type_to_changed_desc.find(trans_type) == trans_type_to_changed_desc.end()) { diff --git a/ge/graph/passes/variable_op_pass_bak.h b/ge/graph/passes/variable_op_pass_bak.h index b9fbb90e..fccd063b 100644 --- a/ge/graph/passes/variable_op_pass_bak.h +++ b/ge/graph/passes/variable_op_pass_bak.h @@ -45,8 +45,8 @@ class VariableOpPass : public GraphPass { private: Status UpdateTransRoad(VarTransRoad &fusion_road, vector &trans_road_order, - map> &trans_type_to_changed_desc, - map> &trans_type_to_trans_ops); + map> &trans_type_to_changed_desc, + map> &trans_type_to_trans_ops); Status DealFusion(const ge::NodePtr &var_node, VarTransRoad &fusion_road, map> trans_type_to_changed_desc, diff --git a/ge/graph/preprocess/graph_preprocess.cc b/ge/graph/preprocess/graph_preprocess.cc index b899ee83..2ee5e330 100644 --- a/ge/graph/preprocess/graph_preprocess.cc +++ b/ge/graph/preprocess/graph_preprocess.cc @@ -1621,7 +1621,8 @@ Status GraphPrepare::CheckUserInput(const std::vector &user_input) { for (size_t i = 0; i < desc.GetShape().GetDimNum(); ++i) { if (desc.GetShape().GetDim(i) < 0) { - std::string situation = "data dim[" + std::to_string(i) + "][" + std::to_string(desc.GetShape().GetDim(i)) + "]" ; + std::string situation = "data dim[" + std::to_string(i) + "][" + + std::to_string(desc.GetShape().GetDim(i)) + "]" ; std::string reason = "it need >= 0"; ErrorManager::GetInstance().ATCReportErrMessage("E19025", {"situation", "reason"}, {situation, reason}); GELOGE(GE_GRAPH_INIT_FAILED, "data dim %zu is not supported, need >= 0, real:%ld.", i, @@ -1701,7 +1702,7 @@ Status GraphPrepare::PrepareOptimize() { try { (void)original_graph_passes.AddPass("PrepareOptimize::ShapeOperateOpRemovePass", new ShapeOperateOpRemovePass); (void)original_graph_passes.AddPass("PrepareOptimize::ReplaceTransShapePass", new ReplaceTransShapePass); - (void)original_graph_passes.AddPass("PrepareOptimize::MarkAgnosticPass" , new MarkAgnosticPass); + (void)original_graph_passes.AddPass("PrepareOptimize::MarkAgnosticPass", new MarkAgnosticPass); } catch (std::bad_alloc &e) { GELOGE(INTERNAL_ERROR, "Add pass failed, bad memory allocation occurs."); return INTERNAL_ERROR; @@ -1796,6 +1797,16 @@ Status GraphPrepare::PrepareOptimize() { } void GraphPrepare::TypeConversionOfConstant() { + bool is_acl_compile = false; + for (ge::NodePtr &n : compute_graph_->GetAllNodes()) { + // This can ensure that n is not a null pointer + // No Conversion when called by aclOpCompile + (void)AttrUtils::GetBool(n->GetOpDesc(), ATTR_DYNAMIC_SHAPE_SINGLE_AICPU, is_acl_compile); + if (is_acl_compile) { + return; + } + } + if (options_.train_graph_flag) { GELOGD("trans CONSTANT to CONSTANTOP in train."); for (ge::NodePtr &n : compute_graph_->GetAllNodes()) { diff --git a/ge/graph/preprocess/insert_op/ge_aipp_op.cc b/ge/graph/preprocess/insert_op/ge_aipp_op.cc index 98712a82..7c8d9073 100755 --- a/ge/graph/preprocess/insert_op/ge_aipp_op.cc +++ b/ge/graph/preprocess/insert_op/ge_aipp_op.cc @@ -408,7 +408,7 @@ Status AippOp::ConvertRelatedInputNameToRank() { GE_CHECK_NOTNULL(aipp_params_); string related_input_name = aipp_params_->related_input_name(); - if(related_input_name.empty()) { + if (related_input_name.empty()) { return SUCCESS; } diff --git a/ge/graph/preprocess/insert_op/util_insert_aipp_op.cc b/ge/graph/preprocess/insert_op/util_insert_aipp_op.cc index 1b926e4b..3b37003f 100755 --- a/ge/graph/preprocess/insert_op/util_insert_aipp_op.cc +++ b/ge/graph/preprocess/insert_op/util_insert_aipp_op.cc @@ -470,7 +470,7 @@ Status InsertNewOpUtil::UpdateDataBySwitchN(const NodePtr &switchn, const NodePt } } if (max_index >= switchn->GetOpDesc()->GetOutputsSize()) { - string error_msg = "No max size found from switchn node[" + switchn->GetName()+ "]"; + string error_msg = "No max size found from switchn node[" + switchn->GetName() + "]"; GE_ERRORLOG_AND_ERRORMSG(INTERNAL_ERROR, error_msg.c_str()); return INTERNAL_ERROR; } diff --git a/ge/host_kernels/concat_v2_kernel.cc b/ge/host_kernels/concat_v2_kernel.cc index a9f0da81..234d8c8a 100644 --- a/ge/host_kernels/concat_v2_kernel.cc +++ b/ge/host_kernels/concat_v2_kernel.cc @@ -120,7 +120,7 @@ Status ConcatV2Kernel::ConcatV2PreCompute(const std::vector &i int &tidx, ConstGeTensorPtr &tensor) { size_t input_size = input.size(); - // N >= 2 and N + 1 >= 3 + // N + 1 is greater than or equal to 3 if (input_size < kConcatV2InputNum) { GELOGI("The number of input for ConcatV2 must not be less than %zu.", kConcatV2InputNum); return NOT_CHANGED; diff --git a/ge/host_kernels/floordiv_kernel.cc b/ge/host_kernels/floordiv_kernel.cc index e254af09..df381212 100644 --- a/ge/host_kernels/floordiv_kernel.cc +++ b/ge/host_kernels/floordiv_kernel.cc @@ -112,8 +112,8 @@ void FloorDivKernel::ShapeCal(const std::vector &input, Ge template T FloorDivKernel::DivCal(const T &x_i, const T &y_i) { if ((x_i < static_cast(0)) != (y_i < static_cast(0))) { - T abs_x_i = std::abs(x_i); - T abs_y_i = std::abs(y_i); + T abs_x_i = x_i < 0 ? -x_i : x_i; + T abs_y_i = y_i < 0 ? -y_i : y_i; return static_cast(static_cast(-(abs_x_i + abs_y_i - 1) / abs_y_i)); } else { return static_cast(static_cast(x_i / y_i)); diff --git a/ge/host_kernels/floordiv_kernel.h b/ge/host_kernels/floordiv_kernel.h index d3dc3ff7..b8f6dd12 100755 --- a/ge/host_kernels/floordiv_kernel.h +++ b/ge/host_kernels/floordiv_kernel.h @@ -40,10 +40,6 @@ class FloorDivKernel : public Kernel { template Status DataCal(const std::vector &input, ge::GeTensorPtr output_ptr); Status ComputeByDataType(DataType data_type, const std::vector &input, GeTensorPtr output_ptr); - - int64_t axis_dim_; - int64_t head_dim_; - int64_t end_dim_; }; } // namespace ge diff --git a/ge/host_kernels/gather_v2_kernel.cc b/ge/host_kernels/gather_v2_kernel.cc index e52b4534..ee73626b 100644 --- a/ge/host_kernels/gather_v2_kernel.cc +++ b/ge/host_kernels/gather_v2_kernel.cc @@ -40,6 +40,10 @@ const size_t kGatherV2InpotNum = 3; const size_t kMaxIndicatesDims = 1; // only support scalar and 1 dims indicates_ const std::set supported_type = {DT_FLOAT16, DT_DOUBLE, DT_INT8, DT_INT16, DT_INT16, DT_INT32, DT_INT64, DT_UINT8, DT_UINT16, DT_UINT32, DT_UINT64}; +const int64_t DIM_AXIS_0 = 0; +const int64_t DIM_AXIS_1 = 1; +const int64_t DIM_AXIS_2 = 2; +const int64_t DIM_AXIS_3 = 3; } // namespace template Status GatherV2Kernel::ProcessAxis0(ConstGeTensorPtr tensor_x, GeTensorPtr output) { @@ -191,16 +195,16 @@ Status GatherV2Kernel::GenData(const int64_t data_num, ConstGeTensorPtr tensor_x Status ret = SUCCESS; switch (axis) { - case 0: + case DIM_AXIS_0: ret = ProcessAxis0(tensor_x, output); break; - case 1: + case DIM_AXIS_1: ret = ProcessAxis1(tensor_x, output); break; - case 2: + case DIM_AXIS_2: ret = ProcessAxis2(tensor_x, output); break; - case 3: + case DIM_AXIS_3: ret = ProcessAxis3(tensor_x, output); break; default: diff --git a/ge/host_kernels/range_kernel.cc b/ge/host_kernels/range_kernel.cc index 32a72b47..97254fff 100644 --- a/ge/host_kernels/range_kernel.cc +++ b/ge/host_kernels/range_kernel.cc @@ -32,6 +32,9 @@ namespace ge { namespace { constexpr size_t kRangeInputNum = 3; constexpr uint32_t kRangeDimNum = 0; +constexpr size_t kStartIndex = 0; +constexpr size_t kLimitIndex = 1; +constexpr size_t kDeltaIndex = 2; const std::set kRangeSupportedType = {DT_INT32, DT_FLOAT}; } // namespace @@ -53,9 +56,9 @@ Status RangeKernel::Compute(const OpDescPtr op_desc_ptr, const std::vectorGetTensorDesc().GetDataType(); if (data_type == DT_FLOAT) { if (GetRange(*reinterpret_cast(start->GetData().data()), diff --git a/ge/host_kernels/ssd_prior_box_kernel.cc b/ge/host_kernels/ssd_prior_box_kernel.cc index b3a0fc3e..3661fa9d 100644 --- a/ge/host_kernels/ssd_prior_box_kernel.cc +++ b/ge/host_kernels/ssd_prior_box_kernel.cc @@ -180,14 +180,18 @@ Status SsdPriorboxKernel::SetVariance(const vector &variance, const int d return SUCCESS; } -Status SsdPriorboxKernel::GetNumPriorAndDimSize(uint aspect_ratios_size, uint min_sizes_size, uint max_sizes_size, - int layer_width, int layer_height, int &num_priors, +Status SsdPriorboxKernel::GetNumPriorAndDimSize(uint32_t aspect_ratios_size, + uint32_t min_sizes_size, + uint32_t max_sizes_size, + int layer_width, + int layer_height, + int &num_priors, int &dim_size) const { if (ge::CheckUint32MulOverflow(min_sizes_size, aspect_ratios_size) != SUCCESS) { return PARAM_INVALID; } - uint tmp_value = aspect_ratios_size * min_sizes_size; + uint32_t tmp_value = aspect_ratios_size * min_sizes_size; if (ge::CheckUint32AddOverflow(tmp_value, max_sizes_size) != SUCCESS) { GELOGW("Failed to get list param."); return PARAM_INVALID; @@ -199,7 +203,7 @@ Status SsdPriorboxKernel::GetNumPriorAndDimSize(uint aspect_ratios_size, uint mi return PARAM_INVALID; } num_priors = static_cast(tmp_value); - + if (ge::CheckIntMulOverflow(layer_width, layer_height) != SUCCESS) { GELOGW("Failed to get list param."); return PARAM_INVALID; @@ -288,7 +292,7 @@ std::unique_ptr SsdPriorboxKernel::BoundaryCalulate(int dim_size, int l } } - return std::move(output_data); + return output_data; } Status SsdPriorboxKernel::Compute(const NodePtr &node, std::vector &v_output) { diff --git a/ge/host_kernels/ssd_prior_box_kernel.h b/ge/host_kernels/ssd_prior_box_kernel.h index 0ebf221d..c08217e2 100755 --- a/ge/host_kernels/ssd_prior_box_kernel.h +++ b/ge/host_kernels/ssd_prior_box_kernel.h @@ -100,8 +100,8 @@ class SsdPriorboxKernel : public Kernel { * @return OTHERS: Execution failed * @author */ - Status GetNumPriorAndDimSize(uint aspect_ratios_size, uint min_sizes_size, uint max_sizes_size, int layer_width, - int layer_height, int &num_priors, int &dim_size) const; + Status GetNumPriorAndDimSize(uint32_t aspect_ratios_size, uint32_t min_sizes_size, uint32_t max_sizes_size, + int layer_width, int layer_height, int &num_priors, int &dim_size) const; void DataCalulate(float x, float y, float box_x, float box_y, int img_x, int img_y, vector &result); std::unique_ptr BoundaryCalulate(int dim_size, int layer_width, int layer_height, float step_width, float step_height, int img_width, int img_height, float offset, diff --git a/ge/host_kernels/strided_slice_kernel.cc b/ge/host_kernels/strided_slice_kernel.cc index 2fe74415..b1bfb10a 100644 --- a/ge/host_kernels/strided_slice_kernel.cc +++ b/ge/host_kernels/strided_slice_kernel.cc @@ -272,6 +272,10 @@ Status StridedSliceKernel::InitParamWithAttrs(const std::vector &x_dims) { auto begin_data_type_size = GetSizeByDataType(begin_tensor->GetTensorDesc().GetDataType()); + if (begin_data_type_size == 0) { + GELOGW("Param begin_data_type_size should not be zero."); + return; + } size_t begin_vec_size = begin_tensor->GetData().size() / begin_data_type_size; auto final_dim_num = x_dims_num < begin_vec_size ? begin_vec_size : x_dims_num; for (size_t i = 0; i < final_dim_num; i++) { @@ -284,8 +288,10 @@ void StridedSliceKernel::ExpandDimsWithNewAxis(const ConstGeTensorPtr &begin_ten } void StridedSliceKernel::ExpandStrideWithEllipsisMask(const size_t x_dims_num, - const vector &x_dims, vector &orig_begin_vec, - vector &orig_end_vec, vector &orig_stride_vec) { + const vector &x_dims, + vector &orig_begin_vec, + vector &orig_end_vec, + vector &orig_stride_vec) { if (attr_value_map_.at(STRIDE_SLICE_ATTR_ELLIPSIS_MASK) != 0) { auto end_mask = attr_value_map_.at(STRIDE_SLICE_ATTR_END_MASK); @@ -308,7 +314,7 @@ void StridedSliceKernel::ExpandStrideWithEllipsisMask(const size_t x_dims_num, if (orig_begin_vec.size() < x_dims_num) { for (size_t j = 1; j < (x_dims_num - orig_begin_vec.size() + 1); ++j) { orig_begin_vec.insert((orig_begin_vec.begin() + ellipsis_dim + j), 0); - orig_end_vec.insert((orig_end_vec.begin() + ellipsis_dim + j), x_dims.at(ellipsis_dim +j)); + orig_end_vec.insert((orig_end_vec.begin() + ellipsis_dim + j), x_dims.at(ellipsis_dim + j)); orig_stride_vec.insert((orig_stride_vec.begin() + ellipsis_dim + j), 1); } } diff --git a/ge/hybrid/common/npu_memory_allocator.cc b/ge/hybrid/common/npu_memory_allocator.cc index f506caec..2c38367a 100644 --- a/ge/hybrid/common/npu_memory_allocator.cc +++ b/ge/hybrid/common/npu_memory_allocator.cc @@ -23,6 +23,8 @@ namespace ge { namespace hybrid { +const size_t kPaddingUnit = 2; + size_t kMaxHbmMemorySize = 1024UL * 1024UL * 1024UL * 1024UL; // 1024G std::map> NpuMemoryAllocator::allocators_; @@ -77,7 +79,7 @@ void *NpuMemoryAllocator::Allocate(std::size_t size, AllocationAttr *attr) { } } // padding up to multiple of padding, and add extra padding - allocate_size = (size + 2 * padding - 1) / padding * padding; + allocate_size = (size + kPaddingUnit * padding - 1) / padding * padding; GELOGD("Padding size %ld by %d. final size = %zu.", size, padding, allocate_size); buffer = MemManager::Instance() .CachingInstance(RT_MEMORY_HBM) diff --git a/ge/hybrid/executor/hybrid_execution_context.h b/ge/hybrid/executor/hybrid_execution_context.h index 0910d2c7..1fe40c77 100644 --- a/ge/hybrid/executor/hybrid_execution_context.h +++ b/ge/hybrid/executor/hybrid_execution_context.h @@ -57,7 +57,8 @@ struct GraphExecutionContext { do { \ if ((context != nullptr) && (context)->profiler != nullptr) { \ if (node_name != nullptr) { \ - context->profiler->RecordEvent(evt_type, "tid:%lu [%s] [%s] " fmt, GeLog::GetTid(), node_name, category, ##__VA_ARGS__);\ + context->profiler->RecordEvent(evt_type, "tid:%lu [%s] [%s] " fmt, GeLog::GetTid(), node_name, category, \ + ##__VA_ARGS__); \ } else { \ context->profiler->RecordEvent(evt_type, "tid:%lu [%s] " fmt, GeLog::GetTid(), category, ##__VA_ARGS__); \ }\ @@ -77,7 +78,7 @@ do { \ RECORD_PROFILING_EVENT((context), HybridProfiler::EXECUTION, fmt, "Execution", name, ##__VA_ARGS__) #define RECORD_CALLBACK_EVENT(context, name, fmt, ...) \ - RECORD_PROFILING_EVENT((context), HybridProfiler::CALLBACK, fmt, "Callback", name, ##__VA_ARGS__) + RECORD_PROFILING_EVENT((context), HybridProfiler::CALLBACKS, fmt, "Callback", name, ##__VA_ARGS__) } // namespace hybrid } // namespace ge #endif // GE_HYBRID_EXECUTOR_HYBRID_EXECUTION_CONTEXT_H_ diff --git a/ge/hybrid/executor/hybrid_model_async_executor.cc b/ge/hybrid/executor/hybrid_model_async_executor.cc index 91996ab3..ba717a2d 100644 --- a/ge/hybrid/executor/hybrid_model_async_executor.cc +++ b/ge/hybrid/executor/hybrid_model_async_executor.cc @@ -379,11 +379,13 @@ Status HybridModelAsyncExecutor::Execute(const std::vector &inputs, } if (output_real_size > 0) { if (outputs[i].length < static_cast(output_real_size)) { - GELOGE(FAILED, "output idx[%zu], the memory size of output[%lu] given by user should be greater than or equal to the real size of output[%ld]", + GELOGE(FAILED, "output idx[%zu], the memory size of output[%lu] given by " + "user should be greater than or equal to the real size of output[%ld]", i, outputs[i].length, output_real_size); return FAILED; } - GE_CHK_RT_RET(rtMemcpy(outputs[i].data, outputs[i].length, args.outputs[i].GetData(), output_real_size, RT_MEMCPY_DEVICE_TO_DEVICE)); + GE_CHK_RT_RET(rtMemcpy(outputs[i].data, outputs[i].length, args.outputs[i].GetData(), output_real_size, + RT_MEMCPY_DEVICE_TO_DEVICE)); } outputs[i].length = output_real_size; } diff --git a/ge/hybrid/executor/hybrid_model_executor.cc b/ge/hybrid/executor/hybrid_model_executor.cc index 4af34451..8ba687c2 100755 --- a/ge/hybrid/executor/hybrid_model_executor.cc +++ b/ge/hybrid/executor/hybrid_model_executor.cc @@ -82,7 +82,7 @@ Status HybridModelExecutor::ExecuteGraphInternal(SubgraphExecutor &executor, Status HybridModelExecutor::Cleanup() { GELOGD("Start to cleanup."); context_.callback_manager->Destroy(); - RuntimeInferenceContext::DestroyContext(to_string(context_.session_id)); + RuntimeInferenceContext::DestroyContext(std::to_string(context_.session_id)); GELOGD("Cleanup successfully."); return SUCCESS; } diff --git a/ge/hybrid/executor/hybrid_profiler.cc b/ge/hybrid/executor/hybrid_profiler.cc index 7228197f..336a633f 100644 --- a/ge/hybrid/executor/hybrid_profiler.cc +++ b/ge/hybrid/executor/hybrid_profiler.cc @@ -25,7 +25,7 @@ namespace ge { namespace hybrid { namespace { const int kMaxEvents = 10000; -const int kEventDescMax = 256; +const int kEventDescMax = 512; const int kMaxEventTypes = 8; const int kIndent = 8; } diff --git a/ge/hybrid/executor/hybrid_profiler.h b/ge/hybrid/executor/hybrid_profiler.h index 62ef9c73..94a042e4 100644 --- a/ge/hybrid/executor/hybrid_profiler.h +++ b/ge/hybrid/executor/hybrid_profiler.h @@ -33,7 +33,7 @@ class HybridProfiler { SHAPE_INFERENCE, COMPILE, EXECUTION, - CALLBACK, + CALLBACKS }; struct Event { diff --git a/ge/hybrid/executor/node_done_manager.cc b/ge/hybrid/executor/node_done_manager.cc index c0b0b17b..f0d4324a 100644 --- a/ge/hybrid/executor/node_done_manager.cc +++ b/ge/hybrid/executor/node_done_manager.cc @@ -21,7 +21,7 @@ namespace ge { namespace hybrid { namespace { -constexpr int kDefaultWaitTimeoutInSec = 60 * 10; +constexpr int kDefaultWaitTimeoutInSec = 600; } bool NodeDoneManager::Cond::Await() { std::unique_lock lk(cond_mu_); diff --git a/ge/hybrid/executor/node_state.h b/ge/hybrid/executor/node_state.h index 48b2ed72..04f1ee4b 100644 --- a/ge/hybrid/executor/node_state.h +++ b/ge/hybrid/executor/node_state.h @@ -27,7 +27,7 @@ namespace ge { namespace hybrid { class NodeTask; -class GraphExecutionContext; +struct GraphExecutionContext; class SubgraphContext; class ShapeFuture { diff --git a/ge/hybrid/executor/subgraph_executor.cc b/ge/hybrid/executor/subgraph_executor.cc index 76a6cc37..5a464f8e 100644 --- a/ge/hybrid/executor/subgraph_executor.cc +++ b/ge/hybrid/executor/subgraph_executor.cc @@ -93,6 +93,7 @@ Status SubgraphExecutor::InitInputsForUnknownShape(const std::vectorGetName().c_str(), i); GE_CHECK_LE(i + 1, input_desc.size()); const auto &tensor_desc = input_desc[i]; + GE_CHECK_NOTNULL(tensor_desc); auto node_state = subgraph_context_->GetOrCreateNodeState(input_node); GE_CHECK_NOTNULL(node_state); node_state->GetShapeInferenceState().UpdateInputShape(0, tensor_desc->GetOriginShape(), tensor_desc->GetShape()); diff --git a/ge/hybrid/executor/worker/execution_engine.cc b/ge/hybrid/executor/worker/execution_engine.cc index e6729352..b984eec3 100755 --- a/ge/hybrid/executor/worker/execution_engine.cc +++ b/ge/hybrid/executor/worker/execution_engine.cc @@ -260,8 +260,7 @@ Status NodeDoneCallback::ProfilingReport() { } auto &profiling_manager = ProfilingManager::Instance(); - profiling_manager.ReportProfilingData(model->GetModelId(), task_desc_info, compute_graph_info, - !profiling_manager.IsAclApiMode()); + profiling_manager.ReportProfilingData(model->GetModelId(), task_desc_info, compute_graph_info); return SUCCESS; } diff --git a/ge/hybrid/executor/worker/shape_inference_engine.cc b/ge/hybrid/executor/worker/shape_inference_engine.cc index bd429b21..1d813526 100755 --- a/ge/hybrid/executor/worker/shape_inference_engine.cc +++ b/ge/hybrid/executor/worker/shape_inference_engine.cc @@ -62,7 +62,8 @@ Status ShapeInferenceEngine::InferShape(NodeState &node_state) { { std::lock_guard lk(mu_); RECORD_SHAPE_INFERENCE_EVENT(execution_context_, node_item.NodeName().c_str(), "[InferShapeAndType] Start"); - GE_CHK_STATUS_RET(ShapeRefiner::InferShapeAndTypeForRunning(node_item.node, true), "Invoke InferShapeAndType failed."); + GE_CHK_STATUS_RET(ShapeRefiner::InferShapeAndTypeForRunning(node_item.node, true), + "Invoke InferShapeAndType failed."); RECORD_SHAPE_INFERENCE_EVENT(execution_context_, node_item.NodeName().c_str(), "[InferShapeAndType] End"); } // Check again to make sure shape is valid after shape inference @@ -164,7 +165,7 @@ Status ShapeInferenceEngine::InferShapeForSubgraph(const NodeItem &node_item, co for (auto &it : fused_subgraph.input_mapping) { auto parent_tensor_desc = node_item.MutableInputDesc(it.first); GE_CHECK_NOTNULL(parent_tensor_desc); - GELOGD("Start to update shape by input[%u]", it.first); + GELOGD("Start to update shape by input[%d]", it.first); GELOGD("Update shape to [%s]", parent_tensor_desc->GetShape().ToString().c_str()); GELOGD("Update original shape to [%s]", parent_tensor_desc->GetOriginShape().ToString().c_str()); for (auto &tensor_desc : it.second) { @@ -183,12 +184,12 @@ Status ShapeInferenceEngine::InferShapeForSubgraph(const NodeItem &node_item, co } for (auto &it : fused_subgraph.output_mapping) { - uint32_t parent_output_idx = it.first; + int parent_output_idx = it.first; const auto &op_desc = it.second; GELOGD("Update parent output[%d] by [%s]", parent_output_idx, op_desc->GetName().c_str()); auto input_desc = op_desc->MutableInputDesc(0); GE_CHECK_NOTNULL(input_desc); - auto parent_output_tensor_desc = node_item.op_desc->MutableOutputDesc(parent_output_idx); + auto parent_output_tensor_desc = node_item.MutableOutputDesc(parent_output_idx); GE_CHECK_NOTNULL(parent_output_tensor_desc); GELOGD("Update shape to [%s]", input_desc->GetShape().ToString().c_str()); GELOGD("Update original shape to [%s]", input_desc->GetOriginShape().ToString().c_str()); diff --git a/ge/hybrid/hybrid_davinci_model.cc b/ge/hybrid/hybrid_davinci_model.cc index a491c9a5..7009331c 100755 --- a/ge/hybrid/hybrid_davinci_model.cc +++ b/ge/hybrid/hybrid_davinci_model.cc @@ -113,8 +113,8 @@ HybridDavinciModel::~HybridDavinciModel() { delete impl_; } -unique_ptr HybridDavinciModel::Create(const GeRootModelPtr &ge_root_model) { - auto instance = unique_ptr(new (std::nothrow)HybridDavinciModel()); +std::unique_ptr HybridDavinciModel::Create(const GeRootModelPtr &ge_root_model) { + auto instance = std::unique_ptr(new (std::nothrow)HybridDavinciModel()); if (instance != nullptr) { instance->impl_ = new (std::nothrow) HybridDavinciModel::Impl(ge_root_model); if (instance->impl_ != nullptr) { diff --git a/ge/hybrid/model/hybrid_model.cc b/ge/hybrid/model/hybrid_model.cc index feb6757b..132b0f8c 100644 --- a/ge/hybrid/model/hybrid_model.cc +++ b/ge/hybrid/model/hybrid_model.cc @@ -176,20 +176,9 @@ Status HybridModel::GetInputOutputDescInfo(vector &input_de return SUCCESS; } -void HybridModel::SetInputDimsAndShapeRangesInfo(const vector &model_input_dims, std::vector> &shape_ranges, - Format &format, InputOutputDescInfo &input) { - uint32_t n, c, h, w; - n = format == FORMAT_NHWC ? NHWC_DIM_N : NCHW_DIM_N; - c = format == FORMAT_NHWC ? NHWC_DIM_C : NCHW_DIM_C; - h = format == FORMAT_NHWC ? NHWC_DIM_H : NCHW_DIM_H; - w = format == FORMAT_NHWC ? NHWC_DIM_W : NCHW_DIM_W; - - if (model_input_dims.size() == static_cast(NORMAL_TENSOR_SIZE)) { - input.shape_info.num = model_input_dims[n]; - input.shape_info.height = model_input_dims[h]; - input.shape_info.width = model_input_dims[w]; - input.shape_info.channel = model_input_dims[c]; - } +void HybridModel::SetInputDimsAndShapeRangesInfo(const vector &model_input_dims, + std::vector> &shape_ranges, + InputOutputDescInfo &input) { for (auto model_input_dim : model_input_dims) { input.shape_info.dims.push_back(model_input_dim); } @@ -197,25 +186,25 @@ void HybridModel::SetInputDimsAndShapeRangesInfo(const vector &model_in return; } -void HybridModel::CreateInputDimsInfo(const OpDescPtr &op_desc, Format format, InputOutputDescInfo &input) { +void HybridModel::CreateInputDimsInfo(const OpDescPtr &op_desc, InputOutputDescInfo &input) { std::vector> shape_ranges; if (is_new_model_desc_ && op_desc->HasAttr(ATTR_NAME_INPUT_DIMS)) { // When static aipp is set, need to get the model input dims which processed by aipp vector model_input_dims; (void)AttrUtils::GetListInt(op_desc, ATTR_NAME_INPUT_DIMS, model_input_dims); - SetInputDimsAndShapeRangesInfo(model_input_dims, shape_ranges, format, input); + SetInputDimsAndShapeRangesInfo(model_input_dims, shape_ranges, input); return; } // judge if this data is linked dynamic aipp first, multiply batch has been considered if (op_desc->HasAttr("_dynamic_aipp_input_dims")) { vector dynamic_aipp_input_dims; (void)AttrUtils::GetListInt(op_desc, "_dynamic_aipp_input_dims", dynamic_aipp_input_dims); - SetInputDimsAndShapeRangesInfo(dynamic_aipp_input_dims, shape_ranges, format, input); + SetInputDimsAndShapeRangesInfo(dynamic_aipp_input_dims, shape_ranges, input); return; } else { vector input_dims = op_desc->GetInputDescPtr(0)->GetShape().GetDims(); op_desc->GetInputDescPtr(0)->GetShapeRange(shape_ranges); - SetInputDimsAndShapeRangesInfo(input_dims, shape_ranges, format, input); + SetInputDimsAndShapeRangesInfo(input_dims, shape_ranges, input); return; } } @@ -248,7 +237,7 @@ Status HybridModel::GetInputDescInfo(vector &input_desc, st // not support dynamic shape input for now, so input_size here will be not less than zero. input.size = input_size; - CreateInputDimsInfo(op_desc, format, input); + CreateInputDimsInfo(op_desc, input); formats.push_back(format); input_desc.push_back(input); @@ -257,29 +246,15 @@ Status HybridModel::GetInputDescInfo(vector &input_desc, st return SUCCESS; } -void HybridModel::CreateOutput(ConstGeTensorDescPtr &output_desc, InputOutputDescInfo &output_desc_info, uint32_t &format_result) { +void HybridModel::CreateOutput(ConstGeTensorDescPtr &output_desc, + InputOutputDescInfo &output_desc_info, uint32_t &format_result) { GE_IF_BOOL_EXEC(output_desc == nullptr, GELOGE(FAILED, "output desc ptr is nullptr"); return ); Format format = output_desc->GetFormat(); GeShape shape = output_desc->GetShape(); std::vector> shape_ranges; output_desc->GetShapeRange(shape_ranges); DataType data_type = output_desc->GetDataType(); - int64_t dims[] = {1, 1, 1, 1}; format_result = format; - if (format == FORMAT_ND) { // for ND tensor - for (size_t i = 0; i < shape.GetDimNum() && i < (sizeof(dims) / sizeof(dims[0])); i++) { - dims[i] = shape.GetDim(i); - } - } else { // FOR FORMAT_NHWC or FORMAT_NCHW - dims[0] = shape.GetDim(format == FORMAT_NHWC ? NHWC_DIM_N : NCHW_DIM_N); // 0: first dim - dims[1] = shape.GetDim(format == FORMAT_NHWC ? NHWC_DIM_C : NCHW_DIM_C); // 1: second dim - dims[2] = shape.GetDim(format == FORMAT_NHWC ? NHWC_DIM_H : NCHW_DIM_H); // 2: third dim - dims[3] = shape.GetDim(format == FORMAT_NHWC ? NHWC_DIM_W : NCHW_DIM_W); // 3: forth dim - } - output_desc_info.shape_info.num = dims[0]; // 0: first dim - output_desc_info.shape_info.channel = dims[1]; // 1: second dim - output_desc_info.shape_info.height = dims[2]; // 2: third dim - output_desc_info.shape_info.width = dims[3]; // 3: forth dim if (format == FORMAT_FRACTAL_Z) { // FraczToHWCK int64_t k = shape.GetDim(0); // 0: first dim int64_t c = shape.GetDim(1); // 1: second dim @@ -310,7 +285,8 @@ void HybridModel::CreateOutput(ConstGeTensorDescPtr &output_desc, InputOutputDes Status HybridModel::GetOutputDescInfo(vector &output_desc, std::vector &formats) { std::vector output_desc_list; - GE_CHK_STATUS_RET(root_graph_item_->GetOutputDescList(output_desc_list), "get output desc info failed"); // output_desc_list contains vaild input desc + // output_desc_list contains vaild input desc + GE_CHK_STATUS_RET(root_graph_item_->GetOutputDescList(output_desc_list), "get output desc info failed"); vector out_node_names; (void)ge::AttrUtils::GetListStr(ge_root_model_->GetRootGraph(), ATTR_MODEL_OUT_NODES_NAME, out_node_names); @@ -320,7 +296,8 @@ Status HybridModel::GetOutputDescInfo(vector &output_desc, GE_CHECK_NOTNULL(op_desc); auto out_size = static_cast(op_desc->GetInputsSize()); - GE_CHK_BOOL_RET_STATUS(out_size == output_desc_list.size(), FAILED, "output size[%u] not match output_desc_list size[%zu]", out_size, output_desc_list.size()); + GE_CHK_BOOL_RET_STATUS(out_size == output_desc_list.size(), + FAILED, "output size[%u] not match output_desc_list size[%zu]", out_size, output_desc_list.size()); for (uint32_t index = 0; index < out_size; ++index) { string output_name; @@ -328,9 +305,11 @@ Status HybridModel::GetOutputDescInfo(vector &output_desc, std::vector src_index = op_desc->GetSrcIndex(); if (out_size == out_node_names.size()) { bool contains_colon = out_node_names[index].find(":") != std::string::npos; - output_name = contains_colon ? out_node_names[index] : out_node_names[index] + ":" + std::to_string(src_index[index]); + output_name = contains_colon ? out_node_names[index] : out_node_names[index] + + ":" + std::to_string(src_index[index]); } else { - output_name = std::string("output_") + std::to_string(index) + "_" + src_name[index] + "_" + std::to_string(src_index[index]); + output_name = std::string("output_") + std::to_string(index) + "_" + src_name[index] + + "_" + std::to_string(src_index[index]); } InputOutputDescInfo output_desc_info; diff --git a/ge/hybrid/model/hybrid_model.h b/ge/hybrid/model/hybrid_model.h index 1ec2f8a8..5fd5f8f5 100644 --- a/ge/hybrid/model/hybrid_model.h +++ b/ge/hybrid/model/hybrid_model.h @@ -100,12 +100,13 @@ class HybridModel { Status GetOutputDescInfo(vector &output_desc, std::vector &formats); - void CreateInputDimsInfo(const OpDescPtr &op_desc, Format format, InputOutputDescInfo &input); + void CreateInputDimsInfo(const OpDescPtr &op_desc, InputOutputDescInfo &input); void SetModelDescVersion(bool is_new_model_desc) { is_new_model_desc_ = is_new_model_desc; } - void SetInputDimsAndShapeRangesInfo(const vector &model_input_dims, std::vector> &shape_ranges, - Format &format, InputOutputDescInfo &input); + void SetInputDimsAndShapeRangesInfo(const vector &model_input_dims, + std::vector> &shape_ranges, + InputOutputDescInfo &input); private: friend class HybridModelBuilder; diff --git a/ge/hybrid/model/hybrid_model_builder.cc b/ge/hybrid/model/hybrid_model_builder.cc index f4da3dcf..d519c35b 100755 --- a/ge/hybrid/model/hybrid_model_builder.cc +++ b/ge/hybrid/model/hybrid_model_builder.cc @@ -35,7 +35,6 @@ namespace hybrid { namespace { const uint32_t kSubgraphIndex = 0U; const uint32_t kVarOutputIndex = 0U; -const uint32_t kAlignment = 32; const int kBytes = 8; const char *const kOwnerGraphIsUnknown = "OwnerGraphIsUnknown"; @@ -339,9 +338,9 @@ Status HybridModelBuilder::ParseDependentForFusedSubgraph(NodeItem &node_item) { uint32_t parent_index = 0; if (!AttrUtils::GetInt(*op_desc, ATTR_NAME_PARENT_NODE_INDEX, parent_index)) { GELOGE(INTERNAL_ERROR, - "[%s] Failed to get attr [%s]", - op_desc->GetName().c_str(), - ATTR_NAME_PARENT_NODE_INDEX.c_str()); + "[%s] Failed to get attr [%s]", + op_desc->GetName().c_str(), + ATTR_NAME_PARENT_NODE_INDEX.c_str()); return INTERNAL_ERROR; } @@ -793,7 +792,7 @@ Status HybridModelBuilder::HandleDtString(const GeTensor &tensor, void *var_addr "Shape size is invalid"); auto offset = static_cast(elem_num * kBytes); auto hbm_raw_data_base_addr = - reinterpret_cast(reinterpret_cast(var_addr) + offset); + static_cast(reinterpret_cast(var_addr) + offset); for (int64_t i = elem_num - 1; i >= 0; --i) { buff[i] = hbm_raw_data_base_addr + (buff[i] - buff[0]); } @@ -987,7 +986,7 @@ Status HybridModelBuilder::IndexTaskDefs() { // index task defs GELOGD("To index tasks for subgraph: %s", name.c_str()); - unordered_map node_map; + std::unordered_map node_map; for (const auto &node : sub_graph->GetDirectNode()) { GE_CHECK_NOTNULL(node); GE_CHECK_NOTNULL(node->GetOpDesc()); diff --git a/ge/hybrid/model/node_item.h b/ge/hybrid/model/node_item.h index 8fac4a73..8fbdc648 100644 --- a/ge/hybrid/model/node_item.h +++ b/ge/hybrid/model/node_item.h @@ -30,8 +30,8 @@ class NodeTask; class NodeExecutor; struct FusedSubgraph { - std::map> input_mapping; - std::map output_mapping; + std::map> input_mapping; + std::map output_mapping; std::vector nodes; ComputeGraphPtr graph; }; diff --git a/ge/hybrid/node_executor/aicore/aicore_node_executor.cc b/ge/hybrid/node_executor/aicore/aicore_node_executor.cc index 3b87c8b8..407210cf 100755 --- a/ge/hybrid/node_executor/aicore/aicore_node_executor.cc +++ b/ge/hybrid/node_executor/aicore/aicore_node_executor.cc @@ -15,7 +15,7 @@ */ #include "aicore_node_executor.h" -#include "cce/taskdown_common.hpp" +#include "framework/common/taskdown_common.h" #include "hybrid/executor/hybrid_execution_context.h" namespace ge { diff --git a/ge/hybrid/node_executor/aicore/aicore_node_executor.h b/ge/hybrid/node_executor/aicore/aicore_node_executor.h index 989090e9..9e92a160 100755 --- a/ge/hybrid/node_executor/aicore/aicore_node_executor.h +++ b/ge/hybrid/node_executor/aicore/aicore_node_executor.h @@ -89,7 +89,7 @@ class TaskCompilerFactory { class CompilerFunctionRegistrar { public: - CompilerFunctionRegistrar(CreateFn fn); + explicit CompilerFunctionRegistrar(CreateFn fn); ~CompilerFunctionRegistrar() = default; }; } // namespace hybrid diff --git a/ge/hybrid/node_executor/aicore/aicore_op_task.cc b/ge/hybrid/node_executor/aicore/aicore_op_task.cc index 7ed14309..80ea579b 100644 --- a/ge/hybrid/node_executor/aicore/aicore_op_task.cc +++ b/ge/hybrid/node_executor/aicore/aicore_op_task.cc @@ -15,7 +15,7 @@ */ #include "hybrid/node_executor/aicore/aicore_op_task.h" -#include "cce/taskdown_common.hpp" +#include "framework/common/taskdown_common.h" #include "framework/common/debug/log.h" #include "hybrid/executor/hybrid_execution_context.h" #include "hybrid/node_executor/aicore/aicore_task_builder.h" @@ -38,7 +38,7 @@ Status AiCoreOpTask::Init(const OpDesc &op_desc, const domi::TaskDef &task_def) } Status AiCoreOpTask::RegisterTbeHandle(const OpDesc &op_desc) { - auto op_desc_ptr = make_shared(op_desc); + auto op_desc_ptr = std::make_shared(op_desc); GE_CHECK_NOTNULL(op_desc_ptr); auto tbe_kernel = op_desc_ptr->TryGetExtAttr(OP_EXTATTR_NAME_TBE_KERNEL, TBEKernelPtr()); if (tbe_kernel == nullptr) { @@ -151,8 +151,8 @@ Status AiCoreOpTask::ValidateTaskDef(const domi::TaskDef &task_def) { const domi::KernelDef &kernel_def = task_def.kernel(); const domi::KernelContext &context = kernel_def.context(); - auto kernel_type = static_cast(context.kernel_type()); - if (kernel_type != cce::ccKernelType::TE) { + auto kernel_type = static_cast(context.kernel_type()); + if (kernel_type != ccKernelType::TE) { GELOGE(INTERNAL_ERROR, "Invalid kernel type(%d) in AiCore TaskDef.", static_cast(kernel_type)); return INTERNAL_ERROR; } diff --git a/ge/hybrid/node_executor/aicore/aicore_task_compiler.h b/ge/hybrid/node_executor/aicore/aicore_task_compiler.h index bf948349..b6dfd82b 100755 --- a/ge/hybrid/node_executor/aicore/aicore_task_compiler.h +++ b/ge/hybrid/node_executor/aicore/aicore_task_compiler.h @@ -26,7 +26,7 @@ namespace hybrid { class AiCoreTaskCompiler : public TaskCompiler { public: AiCoreTaskCompiler() = default; - ~AiCoreTaskCompiler() = default; + ~AiCoreTaskCompiler() override = default; Status CompileOp(const NodePtr &node, std::vector &tasks) override; Status Initialize() override; diff --git a/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc b/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc index 1a47e525..7330f616 100755 --- a/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc +++ b/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc @@ -15,7 +15,7 @@ */ #include "hybrid/node_executor/aicpu/aicpu_node_executor.h" -#include "cce/taskdown_common.hpp" +#include "framework/common/taskdown_common.h" #include "common/formats/formats.h" #include "aicpu/common/aicpu_task_struct.h" #include "graph/load/new_model_manager/model_manager.h" @@ -642,10 +642,14 @@ Status AicpuNodeTask::Init(const HybridModel &model) { const std::string &so_name = kernel_def.so_name(); const OpDescPtr op_desc = node_item_->GetOpDesc(); const auto &context = kernel_def.context(); - auto kernel_type = static_cast(context.kernel_type()); - if (kernel_type == cce::ccKernelType::CUST_AI_CPU) { - GE_CHK_STATUS_RET(ModelManager::GetInstance()->LoadCustAicpuSo(op_desc, so_name), "load cust aicpu so failed."); - GE_CHK_STATUS_RET(ModelManager::GetInstance()->LaunchCustAicpuSo(), "Launch cust aicpu so failed."); + auto kernel_type = static_cast(context.kernel_type()); + if (kernel_type == ccKernelType::CUST_AI_CPU) { + bool loaded = false; + GE_CHK_STATUS_RET(ModelManager::GetInstance()->LoadCustAicpuSo(op_desc, so_name, loaded), + "load cust aicpu so failed."); + if (!loaded) { + GE_CHK_STATUS_RET(ModelManager::GetInstance()->LaunchCustAicpuSo(), "Launch cust aicpu so failed."); + } } GE_CHK_BOOL_RET_STATUS(args.size() == args_size_, FAILED, @@ -723,9 +727,9 @@ Status AicpuNodeTask::UpdateIoAddr(TaskContext &context) { auto io_addr = args_.get() + sizeof(aicpu::AicpuParamHead); // if has input and output, need copy to ioaddr - error_t cpy_ret = memcpy_s(io_addr, args_size_ - sizeof(aicpu::AicpuParamHead), - &io_addrs[0], sizeof(uint64_t) * io_addrs.size()); - GE_CHK_BOOL_RET_STATUS(cpy_ret == EOK, INTERNAL_ERROR, + int cpy_ret = memcpy_s(io_addr, args_size_ - sizeof(aicpu::AicpuParamHead), + &io_addrs[0], sizeof(uint64_t) * io_addrs.size()); + GE_CHK_BOOL_RET_STATUS(cpy_ret == 0, INTERNAL_ERROR, "Node[%s] memcpy io addr to AicpuParamHead failed, ret=%d, args_size=%u, io nums=%zu.", node_name_.c_str(), cpy_ret, args_size_, io_addrs.size()); return SUCCESS; @@ -736,9 +740,9 @@ Status AicpuNodeTask::LaunchTask(TaskContext &context) { const auto &so_name = task_def_.kernel().so_name(); const auto &kernel_name = task_def_.kernel().kernel_name(); const auto &kcontext = task_def_.kernel().context(); - auto kernel_type = static_cast(kcontext.kernel_type()); + auto kernel_type = static_cast(kcontext.kernel_type()); uint32_t flag = RT_KERNEL_DEFAULT; - if (kernel_type == cce::ccKernelType::CUST_AI_CPU) { + if (kernel_type == ccKernelType::CUST_AI_CPU) { flag |= static_cast(RT_KERNEL_CUSTOM_AICPU); } auto rt_ret = rtCpuKernelLaunchWithFlag(reinterpret_cast(so_name.c_str()), diff --git a/ge/hybrid/node_executor/aicpu/aicpu_node_executor.h b/ge/hybrid/node_executor/aicpu/aicpu_node_executor.h index b984cc86..1205b190 100644 --- a/ge/hybrid/node_executor/aicpu/aicpu_node_executor.h +++ b/ge/hybrid/node_executor/aicpu/aicpu_node_executor.h @@ -37,6 +37,8 @@ class AicpuNodeTaskBase : public NodeTask { ~AicpuNodeTaskBase() override = default; + using NodeTask::Init; + virtual Status Init(const HybridModel &model) = 0; Status UpdateArgs(TaskContext &context) override; diff --git a/ge/hybrid/node_executor/compiledsubgraph/known_node_executor.h b/ge/hybrid/node_executor/compiledsubgraph/known_node_executor.h index fb1966b4..2dde993b 100644 --- a/ge/hybrid/node_executor/compiledsubgraph/known_node_executor.h +++ b/ge/hybrid/node_executor/compiledsubgraph/known_node_executor.h @@ -27,7 +27,7 @@ class HybridModel; class KnownNodeTask : public NodeTask { public: - KnownNodeTask(std::shared_ptr davinci_model) + explicit KnownNodeTask(std::shared_ptr davinci_model) : davinci_model_(davinci_model) {} diff --git a/ge/hybrid/node_executor/controlop/control_op_executor.cc b/ge/hybrid/node_executor/controlop/control_op_executor.cc index 83fc09ee..74920b22 100644 --- a/ge/hybrid/node_executor/controlop/control_op_executor.cc +++ b/ge/hybrid/node_executor/controlop/control_op_executor.cc @@ -405,7 +405,7 @@ Status ControlOpNodeExecutor::LoadTask(const HybridModel &model, auto node_item = model.GetNodeItem(node); GE_CHECK_NOTNULL(node_item); - unique_ptr node_task; + std::unique_ptr node_task; auto node_type = node->GetType(); if (node_type == IF || node_type == STATELESSIF) { node_task.reset(new(std::nothrow) IfOpNodeTask()); diff --git a/ge/hybrid/node_executor/controlop/control_op_executor.h b/ge/hybrid/node_executor/controlop/control_op_executor.h index 7520afd1..3becfaaa 100644 --- a/ge/hybrid/node_executor/controlop/control_op_executor.h +++ b/ge/hybrid/node_executor/controlop/control_op_executor.h @@ -25,6 +25,7 @@ namespace ge { namespace hybrid { class ControlOpNodeTask : public NodeTask { public: + using NodeTask::Init; virtual Status Init(const NodePtr &node, const HybridModel &model) = 0; Status UpdateArgs(TaskContext &context) override; diff --git a/ge/hybrid/node_executor/ge_local/ge_local_node_executor.cc b/ge/hybrid/node_executor/ge_local/ge_local_node_executor.cc index ee45964c..a52e5670 100755 --- a/ge/hybrid/node_executor/ge_local/ge_local_node_executor.cc +++ b/ge/hybrid/node_executor/ge_local/ge_local_node_executor.cc @@ -61,18 +61,18 @@ Status RefInputTask::Execute(TaskContext &context) { Status RefInputTask::RefOneByOne(TaskContext &context) { GELOGI("node %s type %s ref input one by one begin.", node_name_.c_str(), node_type_.c_str()); - uint32_t input_num = context.NumInputs(); - uint32_t output_num = context.NumOutputs(); + int input_num = context.NumInputs(); + int output_num = context.NumOutputs(); if (output_num > input_num) { - GELOGE(INTERNAL_ERROR, "node %s type %s has %u outputs but only %u inputs, can't ref one by one.", + GELOGE(INTERNAL_ERROR, "node %s type %s has %d outputs but only %d inputs, can't ref one by one.", node_name_.c_str(), node_type_.c_str(), output_num, input_num); return INTERNAL_ERROR; } - for (uint32_t out_index = 0; out_index < output_num; ++out_index) { + for (uint32_t out_index = 0; out_index < static_cast(output_num); ++out_index) { auto input = context.GetInput(out_index); GE_CHECK_NOTNULL(input); GE_CHK_STATUS_RET(context.SetOutput(out_index, *input)); - GELOGD("node %s type %s output[%u] ref input[%u] addr=%p.", + GELOGD("node %s type %s output[%d] ref input[%d] addr=%p.", node_name_.c_str(), node_type_.c_str(), out_index, out_index, input->GetData()); } GELOGI("node %s type %s ref input one by one end.", node_name_.c_str(), node_type_.c_str()); diff --git a/ge/hybrid/node_executor/host_cpu/kernel/assign_kernel.cc b/ge/hybrid/node_executor/host_cpu/kernel/assign_kernel.cc index 3bf71013..01fd391d 100644 --- a/ge/hybrid/node_executor/host_cpu/kernel/assign_kernel.cc +++ b/ge/hybrid/node_executor/host_cpu/kernel/assign_kernel.cc @@ -20,7 +20,6 @@ #include "hybrid/node_executor/host_cpu/kernel_factory.h" namespace { -const size_t kAssignInputNum = 2; const size_t kAssignRefInputIndex = 0; const size_t kAssignValueInputIndex = 1; const size_t kAssignRefOutputIndex = 0; diff --git a/ge/hybrid/node_executor/node_executor.cc b/ge/hybrid/node_executor/node_executor.cc index e577f09b..95e50c31 100755 --- a/ge/hybrid/node_executor/node_executor.cc +++ b/ge/hybrid/node_executor/node_executor.cc @@ -34,7 +34,6 @@ const char *const kEngineNameAiCpuTf = "aicpu_tf_kernel"; const char *const kEngineNameHccl = "ops_kernel_info_hccl"; const char *const kEngineNameRts = "DNN_VM_RTS_OP_STORE"; const char *const kEngineNameHostCpu = "DNN_VM_HOST_CPU_OP_STORE"; -const char *const kOwnerGraphIsUnknown = "OwnerGraphIsUnknown"; } Status NodeExecutor::PrepareTask(NodeTask &task, TaskContext &context) const { GE_CHK_STATUS_RET_NOLOG(context.AllocateOutputs()); diff --git a/ge/hybrid/node_executor/partitioned_call/partitioned_call_node_executor.h b/ge/hybrid/node_executor/partitioned_call/partitioned_call_node_executor.h index 9ea544a1..73873002 100644 --- a/ge/hybrid/node_executor/partitioned_call/partitioned_call_node_executor.h +++ b/ge/hybrid/node_executor/partitioned_call/partitioned_call_node_executor.h @@ -41,7 +41,6 @@ class PartitionedCallNodeTask : public NodeTask { const GraphItem *graph_item_; std::unique_ptr subgraph_executor_; - GraphExecutionContext *context_ = nullptr; }; class PartitionedCallNodeExecutor : public NodeExecutor { diff --git a/ge/hybrid/node_executor/task_context.cc b/ge/hybrid/node_executor/task_context.cc index b7152878..77004f99 100644 --- a/ge/hybrid/node_executor/task_context.cc +++ b/ge/hybrid/node_executor/task_context.cc @@ -233,9 +233,7 @@ Status TaskContext::AllocateOutput(int index, } else { GE_CHK_STATUS_RET_NOLOG(AllocateTensor(tensor_desc, outputs_start_[index], attr)); GELOGD("Allocating output successfully. node: %s. index = %d, size = %zu", - node_item_->NodeName().c_str(), - index, - outputs_start_[index].GetSize()); + node_item_->NodeName().c_str(), index, outputs_start_[index].GetSize()); } } } diff --git a/ge/hybrid/node_executor/task_context.h b/ge/hybrid/node_executor/task_context.h index 2cff0536..0549a1dc 100644 --- a/ge/hybrid/node_executor/task_context.h +++ b/ge/hybrid/node_executor/task_context.h @@ -29,7 +29,7 @@ namespace ge { namespace hybrid { -class GraphExecutionContext; +struct GraphExecutionContext; class SubgraphContext; class TaskContext { diff --git a/ge/init/gelib.cc b/ge/init/gelib.cc index 306a804a..92700179 100755 --- a/ge/init/gelib.cc +++ b/ge/init/gelib.cc @@ -485,11 +485,9 @@ Status GELib::Finalize() { void GELib::ShutDownProfiling() { std::lock_guard lock(status_mutex_); - if (!ProfilingManager::Instance().ProfilingOpTraceOn() && ProfilingManager::Instance().ProfilingOn()) { - ProfilingManager::Instance().StopProfiling(); - } if (ProfilingManager::Instance().ProfilingOn()) { - ProfilingManager::Instance().PluginUnInit(GE_PROFILING_MODULE); + ProfilingManager::Instance().StopProfiling(); + ProfilingManager::Instance().PluginUnInit(); } } diff --git a/ge/ir_build/ge_ir_build.cc b/ge/ir_build/ge_ir_build.cc index 74aa6a60..f181170c 100644 --- a/ge/ir_build/ge_ir_build.cc +++ b/ge/ir_build/ge_ir_build.cc @@ -49,6 +49,8 @@ const std::string IR_OPTION_LOG_LEVEL_DEFAULT = "default"; const std::string IR_OPTION_BUFFER_OPTIMIZE_DEFAULT = "l2_optimize"; const std::string IR_OPTION_DISABLE_REUSE_MEMORY_DEFAULT = "0"; const std::string IR_OPTION_ENABLE_COMPRESS_WEIGHT_DEFAULT = "false"; +const std::string kInputShape = "input_shape"; +const std::string kInputFormat = "input_format"; } // namespace static graphStatus CheckGlobalOptions(std::map &global_options) { @@ -225,7 +227,9 @@ class Impl { ~Impl() { (void)generator_.Finalize(); }; graphStatus CheckOptions(const std::map &options); graphStatus CreateInputsForIRBuild(const ge::Graph &graph, vector &inputs); - graphStatus Init(const std::map &options); + graphStatus GetDefaultInputShape(const Graph &graph, string &default_shape); + graphStatus UpdateDataOpAttr(const Graph &graph); + graphStatus Init(const Graph &graph, const std::map &options); graphStatus BuildModel(const Graph &graph, const std::map &options, ModelBufferData &ge_models); graphStatus InitDomiOmgContext(const string &input_shape, const string &input_format, const string &net_format, @@ -240,6 +244,40 @@ class Impl { OmgContext omg_context_; }; +graphStatus Impl::UpdateDataOpAttr(const Graph &graph) { + GELOGD("Enter Update Data Attr Process!"); + if (options_.find(kInputShape) == options_.end()) { + return GRAPH_SUCCESS; + } + unordered_map> shape_map; + vector>> user_shape_map; + GE_CHK_BOOL_EXEC(ParseInputShape(options_[kInputShape], shape_map, user_shape_map, true), + return GRAPH_PARAM_INVALID, "parse input shape failed!"); + auto compute_graph = ge::GraphUtils::GetComputeGraph(graph); + GE_CHECK_NOTNULL(compute_graph); + for (ge::NodePtr &input_node : compute_graph->GetDirectNode()) { + GE_CHECK_NOTNULL(input_node); + ge::OpDescPtr op = input_node->GetOpDesc(); + GE_CHECK_NOTNULL(op); + if (op->GetType() == DATA) { + auto tensor_input = op->MutableInputDesc(0); + auto tensor_output = op->MutableOutputDesc(0); + GE_CHECK_NOTNULL(tensor_input); + GE_CHECK_NOTNULL(tensor_output); + string data_op_name = op->GetName(); + auto iter = shape_map.find(data_op_name); + if (iter != shape_map.end()) { + tensor_input->SetShape(ge::GeShape(iter->second)); + tensor_output->SetShape(ge::GeShape(iter->second)); + GELOGD("update input [%s] shape info", data_op_name.c_str()); + } else { + GELOGI("no need update input [%s] attr because not found from input_shape.", data_op_name.c_str()); + } + } + } + return GRAPH_SUCCESS; +} + graphStatus Impl::CheckOptions(const std::map &options) { for (auto &ele : options) { auto it = ge::ir_option::ir_builder_suppported_options.find(ele.first); @@ -275,17 +313,61 @@ graphStatus Impl::CheckOptions(const std::map &options return GRAPH_PARAM_INVALID; } } + // Check option EXEC_DISABLE_REUSED_MEMORY + it = options_.find(ge::ir_option::EXEC_DISABLE_REUSED_MEMORY); + if (it != options_.end() && (CheckDisableReuseMemoryParamValid(it->second) != GRAPH_SUCCESS)) { + return GRAPH_PARAM_INVALID; + } + return GRAPH_SUCCESS; +} + +graphStatus Impl::GetDefaultInputShape(const Graph &graph, string &default_shape) { + auto compute_graph = ge::GraphUtils::GetComputeGraph(graph); + GE_CHECK_NOTNULL(compute_graph); + for (ge::NodePtr &input_node : compute_graph->GetDirectNode()) { + GE_CHECK_NOTNULL(input_node); + ge::OpDescPtr op = input_node->GetOpDesc(); + GE_CHECK_NOTNULL(op); + if (op->GetType() == DATA) { + string data_op_name = op->GetName(); + GELOGD("Data op name: %s, data op inputDesc size: %zu", data_op_name.c_str(), op->GetAllInputsDesc().size()); + ge::GeTensorDesc tensor = op->GetInputDesc(0); + ge::GeShape data_shape = tensor.GetShape(); + GELOGD("Data op get shape from InputDesc in ge ir graph."); + + string tmp_shape_str; + const std::vector &tmp_shape = data_shape.GetDims(); + if (tmp_shape.empty()) { + GELOGW("Data op: %s has zero shape dims!", data_op_name.c_str()); + } else { + tmp_shape_str += data_op_name + ":"; + for (auto tmp_dim : tmp_shape) { + tmp_shape_str += to_string((long)tmp_dim) + ","; + } + tmp_shape_str = tmp_shape_str.substr(0, tmp_shape_str.size() - 1); + tmp_shape_str += ";"; + default_shape += tmp_shape_str; + } + + GELOGD("Data op name: %s, data shape: %s.", data_op_name.c_str(), tmp_shape_str.c_str()); + } + } + default_shape = (default_shape.empty() ? default_shape : default_shape.substr(0, default_shape.size() - 1)); + GELOGI("Get default data op shape: %s from ge ir graph.", default_shape.c_str()); return GRAPH_SUCCESS; } -graphStatus Impl::Init(const std::map &options) { +graphStatus Impl::Init(const Graph &graph, const std::map &options) { // 1. check options graphStatus ret = CheckOptions(options); if (ret != GRAPH_SUCCESS) { GELOGE(ret, "User input options are illegal! Please check!"); return ret; } - + ret = UpdateDataOpAttr(graph); + if (ret != GRAPH_SUCCESS) { + return ret; + } std::string build_mode = (options_.find(BUILD_MODE) == options_.end() || options_[BUILD_MODE] == BUILD_MODE_NORMAL) ? "" : options_[BUILD_MODE]; options_[BUILD_MODE] = build_mode; @@ -296,7 +378,13 @@ graphStatus Impl::Init(const std::map &options) { GE_CHK_BOOL_RET_STATUS_NOLOG(ge::CheckLogParamValidAndSetLogLevel(log) == 0, GRAPH_PARAM_INVALID); options_[ge::ir_option::LOG_LEVEL] = log; - string input_shape = options_.find("input_shape") == options_.end() ? "" : options_["input_shape"]; + string input_shape; + if (options_.find("input_shape") == options_.end()) { + GE_CHK_BOOL_EXEC(GetDefaultInputShape(graph, input_shape) == ge::SUCCESS, + return ge::GRAPH_PARAM_INVALID, "Get default data op shape from graph failed!"); + } else { + input_shape = options_["input_shape"]; + } string input_format = options_.find("input_format") == options_.end() ? "" : options_["input_format"]; string net_format = options_.find("net_format") == options_.end() ? "" : options_["net_format"]; string dynamic_batch_size = options_.find(ge::ir_option::DYNAMIC_BATCH_SIZE) == options_.end() @@ -416,7 +504,7 @@ graphStatus Impl::CreateInputsForIRBuild(const ge::Graph &graph, vector &options, ModelBufferData &model) { // 1. init GeGenerator with user optios - graphStatus ret = Init(options); + graphStatus ret = Init(graph, options); if (ret != GRAPH_SUCCESS) { GELOGE(ret, "Build ir model Init failed!"); return ret; @@ -502,7 +590,7 @@ graphStatus aclgrphSaveModel(const string &output_file, const ModelBufferData &m GELOGE(GRAPH_PARAM_INVALID, "input model is illegal"); return GRAPH_PARAM_INVALID; } - return FileSaver::SaveToFile((output_file + ".om"), reinterpret_cast(model.data.get()), + return FileSaver::SaveToFile((output_file + ".om"), reinterpret_cast(model.data.get()), static_cast(model.length)); } @@ -517,7 +605,7 @@ graphStatus aclgrphSaveModel(const char *output_file, const ModelBufferData &mod return GRAPH_PARAM_INVALID; } std::string str_output_file = output_file; - return FileSaver::SaveToFile((str_output_file + ".om"), reinterpret_cast(model.data.get()), + return FileSaver::SaveToFile((str_output_file + ".om"), reinterpret_cast(model.data.get()), static_cast(model.length)); } @@ -543,7 +631,7 @@ graphStatus aclgrphInferShapeAndType(ge::Graph &graph) { } auto ret = compute_graph->TopologicalSorting(); - if(ret != GRAPH_SUCCESS) { + if (ret != GRAPH_SUCCESS) { GELOGE(ret, "Acl topo logical sort failed."); return ret; } @@ -622,4 +710,52 @@ graphStatus aclgrphDumpGraph(const ge::Graph &graph, const char *file, const siz return GRAPH_SUCCESS; } +graphStatus aclgrphGenerateForOp(const AscendString &op_type, const vector &inputs, + const vector &outputs, Graph &graph) { + auto op_type_str = std::string(op_type.GetString()); + auto op_name = op_type_str + "_" + std::to_string(ge::GetCurrentTimestamp()); + auto op_desc = ge::MakeShared(op_name, op_type_str); + GE_CHECK_NOTNULL(op_desc); + + // convert input tensordesc to getensor + std::vector input_tensors; + for (const auto &input : inputs) { + ge::GeTensorDesc tensor_desc(ge::GeShape(input.GetShape().GetDims()), input.GetFormat(), input.GetDataType()); + + tensor_desc.SetOriginFormat(input.GetFormat()); + ge::TensorUtils::SetRealDimCnt(tensor_desc, static_cast(input.GetShape().GetDims().size())); + ge::TensorUtils::SetInputTensor(tensor_desc, true); + ge::TensorUtils::SetOutputTensor(tensor_desc, false); + + if (op_desc->AddInputDesc(tensor_desc) != ge::GRAPH_SUCCESS) { + GELOGE(ge::FAILED, "AddInputDesc fail."); + return ge::FAILED; + } + input_tensors.emplace_back(tensor_desc); + } + + // convert output tensordesc to getensor + std::vector output_tensors; + for (const auto &output : outputs) { + ge::GeTensorDesc tensor_desc(ge::GeShape(output.GetShape().GetDims()), output.GetFormat(), output.GetDataType()); + + tensor_desc.SetOriginFormat(output.GetFormat()); + ge::TensorUtils::SetRealDimCnt(tensor_desc, static_cast(output.GetShape().GetDims().size())); + ge::TensorUtils::SetInputTensor(tensor_desc, false); + ge::TensorUtils::SetOutputTensor(tensor_desc, true); + + (void)op_desc->AddOutputDesc(tensor_desc); + output_tensors.emplace_back(tensor_desc); + } + + // call api to get graph + ge::GeGenerator generator; + std::string graph_name = ge::CurrentTimeInStr() + "_graph"; + if (generator.BuildSingleOpGraph(op_desc, input_tensors, output_tensors, graph_name, graph) != ge::SUCCESS) { + GELOGE(GRAPH_FAILED, "make graph fail."); + return GRAPH_FAILED; + } + return GRAPH_SUCCESS; +} + } // namespace ge diff --git a/ge/model/ge_root_model.h b/ge/model/ge_root_model.h index 53174064..aa5a4d47 100755 --- a/ge/model/ge_root_model.h +++ b/ge/model/ge_root_model.h @@ -23,6 +23,7 @@ namespace ge { class GeRootModel { public: + GeRootModel() = default; explicit GeRootModel(ComputeGraphPtr &root_graph) : root_graph_(root_graph), model_id_(INVALID_MODEL_ID) {}; ~GeRootModel() = default; @@ -35,11 +36,11 @@ class GeRootModel { void SetModelId(uint32_t model_id) { model_id_ = model_id; } uint32_t GetModelId() const { return model_id_; } Status CheckIsUnknownShape(bool &is_dynamic_shape); - + void SetRootGraph(ComputeGraphPtr graph) { root_graph_ = graph; } private: - ComputeGraphPtr root_graph_; + ComputeGraphPtr root_graph_ = nullptr; std::map subgraph_instance_name_to_model_; - uint32_t model_id_; + uint32_t model_id_ = 0; }; } // namespace ge using GeRootModelPtr = std::shared_ptr; diff --git a/ge/offline/CMakeLists.txt b/ge/offline/CMakeLists.txt index 49af37c0..21221042 100644 --- a/ge/offline/CMakeLists.txt +++ b/ge/offline/CMakeLists.txt @@ -11,13 +11,13 @@ set(SRC_LIST "main.cc" "single_op_parser.cc" "../session/omg.cc" - "../ir_build/atc_ir_common.cc" + "../ir_build/atc_ir_common.cc" ) ############ atc ############ add_executable(atc ${SRC_LIST} ${PROTO_HDRS}) -target_compile_options(atc PRIVATE +target_compile_options(atc PRIVATE -Werror -O2 -Wno-deprecated-declarations @@ -74,6 +74,136 @@ target_link_libraries(atc PRIVATE -ldl ) +############ atc_atc.bin ############ +add_executable(atc_atc.bin ${SRC_LIST} ${PROTO_HDRS}) + +target_compile_options(atc_atc.bin PRIVATE + -Werror + -O2 + -Wno-deprecated-declarations +) + +target_compile_definitions(atc_atc.bin PRIVATE + PROTOBUF_INLINE_NOT_IN_HEADERS=0 + COMPILE_OMG_PACKAGE + google=ascend_private +) + +target_include_directories(atc_atc.bin PRIVATE + ${CMAKE_CURRENT_LIST_DIR} + ${GE_CODE_DIR} + ${GE_CODE_DIR}/ge + ${GE_CODE_DIR}/inc/external + ${GE_CODE_DIR}/common/inc/external + ${GE_CODE_DIR}/common/inc/external/graph + ${GE_CODE_DIR}/inc + ${GE_CODE_DIR}/inc/framework + ${METADEF_DIR}/inc + ${METADEF_DIR}/inc/graph + ${METADEF_DIR}/inc/register + ${METADEF_DIR}/inc/external + ${METADEF_DIR}/inc/external/graph + ${METADEF_DIR}/inc/external/register + ${PARSER_DIR} + ${CMAKE_BINARY_DIR} + ${CMAKE_BINARY_DIR}/proto/ge + #### yellow zone #### + ${GE_CODE_DIR}/../inc + ${GE_CODE_DIR}/../inc/common + #### blue zone #### + ${GE_CODE_DIR}/third_party/fwkacllib/inc + ${GE_CODE_DIR}/third_party/fwkacllib/inc/toolchain +) + +target_link_libraries(atc_atc.bin PRIVATE + $ + ascend_protobuf + ge_common + register + c_sec + graph + error_manager + ge_compiler + parser_common + gflags + json + runtime_compile + slog + static_mmpa + -lrt + -ldl +) + +set_target_properties(atc_atc.bin PROPERTIES + OUTPUT_NAME atc.bin + RUNTIME_OUTPUT_DIRECTORY atclib +) + +############ fwk_atc.bin ############ +add_executable(fwk_atc.bin ${SRC_LIST} ${PROTO_HDRS}) + +target_compile_options(fwk_atc.bin PRIVATE + -Werror + -O2 + -Wno-deprecated-declarations +) + +target_compile_definitions(fwk_atc.bin PRIVATE + PROTOBUF_INLINE_NOT_IN_HEADERS=0 + COMPILE_OMG_PACKAGE + google=ascend_private +) + +target_include_directories(fwk_atc.bin PRIVATE + ${CMAKE_CURRENT_LIST_DIR} + ${GE_CODE_DIR} + ${GE_CODE_DIR}/ge + ${GE_CODE_DIR}/inc/external + ${GE_CODE_DIR}/common/inc/external + ${GE_CODE_DIR}/common/inc/external/graph + ${GE_CODE_DIR}/inc + ${GE_CODE_DIR}/inc/framework + ${METADEF_DIR}/inc + ${METADEF_DIR}/inc/graph + ${METADEF_DIR}/inc/register + ${METADEF_DIR}/inc/external + ${METADEF_DIR}/inc/external/graph + ${METADEF_DIR}/inc/external/register + ${PARSER_DIR} + ${CMAKE_BINARY_DIR} + ${CMAKE_BINARY_DIR}/proto/ge + #### yellow zone #### + ${GE_CODE_DIR}/../inc + ${GE_CODE_DIR}/../inc/common + #### blue zone #### + ${GE_CODE_DIR}/third_party/fwkacllib/inc + ${GE_CODE_DIR}/third_party/fwkacllib/inc/toolchain +) + +target_link_libraries(fwk_atc.bin PRIVATE + $ + ascend_protobuf + ge_common + register + c_sec + graph + error_manager + ge_runner + parser_common + gflags + json + runtime + slog + static_mmpa + -lrt + -ldl +) + +set_target_properties(fwk_atc.bin PROPERTIES + OUTPUT_NAME atc.bin + RUNTIME_OUTPUT_DIRECTORY fwkacl +) + ############ install ############ set(INSTALL_BASE_DIR "") set(INSTALL_LIBRARY_DIR lib) @@ -81,3 +211,11 @@ set(INSTALL_LIBRARY_DIR lib) install(TARGETS atc OPTIONAL LIBRARY DESTINATION ${INSTALL_LIBRARY_DIR} ) + +install(TARGETS atc_atc.bin OPTIONAL + RUNTIME DESTINATION ${INSTALL_LIBRARY_DIR}/atclib +) + +install(TARGETS fwk_atc.bin OPTIONAL + RUNTIME DESTINATION ${INSTALL_LIBRARY_DIR}/fwkacl +) diff --git a/ge/offline/atc b/ge/offline/atc new file mode 100644 index 00000000..05c65c26 --- /dev/null +++ b/ge/offline/atc @@ -0,0 +1,21 @@ +#!/bin/bash +#------------------------------------------------------------------- +# Purpose: +# Copyright 2020 Huawei Technologies Co., Ltd. All rights reserved. +#------------------------------------------------------------------- + +real_path=$(readlink "$0") +if [ $? -eq 0 ]; then + LOCAL_PATH=$(cd "$(dirname "$real_path")"; pwd) +else + LOCAL_PATH=$(cd "$(dirname "$0")"; pwd) +fi +PKG_PATH=$(cd ${LOCAL_PATH}/..; pwd) +LIB_P="/lib64" +PYTHON_P="/python/site-packages" +LIB64_PATH="${PKG_PATH}${LIB_P}" +PYTHON_PATH="${PKG_PATH}${PYTHON_P}" +export LD_LIBRARY_PATH="${LIB64_PATH}:${LD_LIBRARY_PATH}" +export PYTHONPATH="${PYTHON_PATH}:${PYTHONPATH}" + +${PKG_PATH}/bin/atc.bin "$@" diff --git a/ge/offline/main.cc b/ge/offline/main.cc index 76494c68..b7188a85 100755 --- a/ge/offline/main.cc +++ b/ge/offline/main.cc @@ -68,7 +68,7 @@ const char *const kModeSupport = "only support 0(model to framework model), " const char *const kModelToJsonSupport = "only support 0(Caffe) 3(TensorFlow) 5(Onnx)"; // limit available mem size 2G -const long kMinAvailableMem = 2 * 1024 * 1024; +const long kMinAvailableMem = 2097152; // 2 * 1024 * 1024 DEFINE_string(model, "", "The model file."); DEFINE_string(output, "", "The output file path&name."); diff --git a/ge/offline/module.mk b/ge/offline/module.mk index 8859df29..5c7a919c 100755 --- a/ge/offline/module.mk +++ b/ge/offline/module.mk @@ -54,3 +54,108 @@ LOCAL_LDFLAGS := -lrt -ldl include $(BUILD_HOST_EXECUTABLE) +include $(CLEAR_VARS) + +LOCAL_MODULE := atclib/atc.bin + +LOCAL_CFLAGS += -Werror -Wno-deprecated-declarations +LOCAL_CFLAGS += -DPROTOBUF_INLINE_NOT_IN_HEADERS=0 -DCOMPILE_OMG_PACKAGE -O2 -Dgoogle=ascend_private + +LOCAL_SRC_FILES := \ + main.cc \ + single_op_parser.cc \ + ../session/omg.cc \ + ../ir_build/atc_ir_common.cc \ + +LOCAL_C_INCLUDES := \ + $(LOCAL_PATH)/../ ./ \ + $(TOPDIR)inc \ + $(TOPDIR)metadef/inc \ + $(TOPDIR)graphengine/inc \ + $(TOPDIR)inc/external \ + $(TOPDIR)metadef/inc/external \ + $(TOPDIR)graphengine/inc/external \ + $(TOPDIR)metadef/inc/external/graph \ + $(TOPDIR)graphengine/inc/framework \ + $(TOPDIR)libc_sec/include \ + $(TOPDIR)metadef/inc/common/util \ + $(TOPDIR)parser \ + third_party/json/include \ + third_party/gflags/include \ + third_party/protobuf/include \ + proto/om.proto \ + proto/ge_ir.proto \ + proto/task.proto \ + proto/insert_op.proto \ + +LOCAL_SHARED_LIBRARIES := \ + libc_sec \ + libge_common \ + libascend_protobuf \ + libslog \ + libgraph \ + libregister \ + liberror_manager \ + libge_compiler \ + libruntime_compile \ + libparser_common \ + liberror_manager \ + +LOCAL_STATIC_LIBRARIES := libgflags + +LOCAL_LDFLAGS := -lrt -ldl + +include $(BUILD_HOST_EXECUTABLE) + +include $(CLEAR_VARS) + +LOCAL_MODULE := fwkacl/atc.bin + +LOCAL_CFLAGS += -Werror -Wno-deprecated-declarations +LOCAL_CFLAGS += -DPROTOBUF_INLINE_NOT_IN_HEADERS=0 -DCOMPILE_OMG_PACKAGE -O2 -Dgoogle=ascend_private + +LOCAL_SRC_FILES := \ + main.cc \ + single_op_parser.cc \ + ../session/omg.cc \ + ../ir_build/atc_ir_common.cc \ + +LOCAL_C_INCLUDES := \ + $(LOCAL_PATH)/../ ./ \ + $(TOPDIR)inc \ + $(TOPDIR)metadef/inc \ + $(TOPDIR)graphengine/inc \ + $(TOPDIR)inc/external \ + $(TOPDIR)metadef/inc/external \ + $(TOPDIR)graphengine/inc/external \ + $(TOPDIR)metadef/inc/external/graph \ + $(TOPDIR)graphengine/inc/framework \ + $(TOPDIR)libc_sec/include \ + $(TOPDIR)metadef/inc/common/util \ + $(TOPDIR)parser \ + third_party/json/include \ + third_party/gflags/include \ + third_party/protobuf/include \ + proto/om.proto \ + proto/ge_ir.proto \ + proto/task.proto \ + proto/insert_op.proto \ + +LOCAL_SHARED_LIBRARIES := \ + libc_sec \ + libge_common \ + libascend_protobuf \ + libslog \ + libgraph \ + libregister \ + liberror_manager \ + libge_runner \ + libruntime \ + libparser_common \ + liberror_manager \ + +LOCAL_STATIC_LIBRARIES := libgflags + +LOCAL_LDFLAGS := -lrt -ldl + +include $(BUILD_HOST_EXECUTABLE) diff --git a/ge/offline/single_op_parser.cc b/ge/offline/single_op_parser.cc index d4b9c1c9..b1e0da6d 100644 --- a/ge/offline/single_op_parser.cc +++ b/ge/offline/single_op_parser.cc @@ -27,6 +27,7 @@ #include "common/ge_inner_error_codes.h" #include "framework/common/util.h" #include "graph/utils/tensor_utils.h" +#include "graph/utils/type_utils.h" #include "graph/utils/op_desc_utils.h" #include "graph/operator_factory_impl.h" @@ -176,6 +177,7 @@ T GetValue(const map &dict, string &key, T default_val) { } void from_json(const Json &j, SingleOpTensorDesc &desc) { + bool is_tensor_valid = true; desc.dims = j.at(kKeyShape).get>(); auto it = j.find(kKeyShapeRange); if (it != j.end()) { @@ -189,9 +191,12 @@ void from_json(const Json &j, SingleOpTensorDesc &desc) { string type_str = j.at(kKeyType).get(); desc.format = GetValue(kFormatDict, format_str, FORMAT_RESERVED); desc.type = GetValue(kDataTypeDict, type_str, DT_UNDEFINED); + is_tensor_valid = is_tensor_valid && ge::TypeUtils::IsFormatValid(format_str); + is_tensor_valid = is_tensor_valid && ge::TypeUtils::IsDataTypeValid(type_str); it = j.find(kKeyOriginFormat); if (it != j.end()) { string origin_format_str = j.at(kKeyOriginFormat).get(); + is_tensor_valid = is_tensor_valid && ge::TypeUtils::IsFormatValid(origin_format_str); desc.ori_format = GetValue(kFormatDict, origin_format_str, FORMAT_RESERVED); } auto tensor_name = j.find(kKeyName); @@ -202,6 +207,9 @@ void from_json(const Json &j, SingleOpTensorDesc &desc) { if (dynamic_input_name != j.end()) { desc.dynamic_input_name = dynamic_input_name->get(); } + if (!is_tensor_valid) { + desc.SetValidFlag(is_tensor_valid); + } } void from_json(const Json &j, SingleOpAttr &attr) { @@ -305,6 +313,12 @@ bool SingleOpParser::Validate(const SingleOpDesc &op_desc) { int index = 0; for (auto &tensor_desc : op_desc.input_desc) { + if (!tensor_desc.GetValidFlag()) { + ErrorManager::GetInstance().ATCReportErrMessage("E10027", {"input", "type", "index"}, + {"intput", "datatype or format", std::to_string(index)}); + GELOGE(PARAM_INVALID, "Input's dataType or format is invalid when the index is %d", index); + return false; + } if ((tensor_desc.type == DT_UNDEFINED && tensor_desc.format != FORMAT_RESERVED) || (tensor_desc.type != DT_UNDEFINED && tensor_desc.format == FORMAT_RESERVED)){ ErrorManager::GetInstance().ATCReportErrMessage("E10027", {"input", "type", "index"}, @@ -317,6 +331,12 @@ bool SingleOpParser::Validate(const SingleOpDesc &op_desc) { index = 0; for (auto &tensor_desc : op_desc.output_desc) { + if (!tensor_desc.GetValidFlag()) { + ErrorManager::GetInstance().ATCReportErrMessage("E10027", {"input", "type", "index"}, + {"output", "datatype", std::to_string(index)}); + GELOGE(PARAM_INVALID, "Output's dataType is invalid when the index is %d", index); + return false; + } if (tensor_desc.type == DT_UNDEFINED) { ErrorManager::GetInstance().ATCReportErrMessage("E10027", {"input", "type", "index"}, {"output", "datatype", std::to_string(index)}); diff --git a/ge/offline/single_op_parser.h b/ge/offline/single_op_parser.h index 19879a32..71aa58bb 100644 --- a/ge/offline/single_op_parser.h +++ b/ge/offline/single_op_parser.h @@ -28,6 +28,10 @@ namespace ge { struct SingleOpTensorDesc { +public: + bool GetValidFlag() const { return is_valid_; } + void SetValidFlag(bool is_valid) { is_valid_ = is_valid; } +public: std::string name; std::vector dims; std::vector ori_dims; @@ -36,6 +40,8 @@ struct SingleOpTensorDesc { ge::Format ori_format = ge::FORMAT_RESERVED; ge::DataType type = ge::DT_UNDEFINED; std::string dynamic_input_name; +private: + bool is_valid_ = true; }; struct SingleOpAttr { diff --git a/ge/omm/csa_interact.cc b/ge/omm/csa_interact.cc index 1599af94..1b33ddbd 100644 --- a/ge/omm/csa_interact.cc +++ b/ge/omm/csa_interact.cc @@ -202,7 +202,7 @@ Status CsaInteract::WriteFile(const std::string &file_name, const std::string &c } } - mmSsize_t ret = mmWrite(fd, (void *)content.c_str(), content.length()); + mmSsize_t ret = mmWrite(fd, reinterpret_cast(const_cast(content.c_str())), content.length()); if (ret == EN_ERROR) { GELOGE(INTERNAL_ERROR, "write file fail, errno is %d", errno); ret = mmClose(fd); diff --git a/ge/opskernel_manager/ops_kernel_builder_manager.cc b/ge/opskernel_manager/ops_kernel_builder_manager.cc index e0001fcd..37bdcf7a 100644 --- a/ge/opskernel_manager/ops_kernel_builder_manager.cc +++ b/ge/opskernel_manager/ops_kernel_builder_manager.cc @@ -167,4 +167,5 @@ Status OpsKernelBuilderManager::GenerateTask(const Node &node, GELOGD("Done invoking GenerateTask successfully"); return SUCCESS; } -} // namespace ge \ No newline at end of file + +} // namespace ge diff --git a/ge/opskernel_manager/ops_kernel_manager.cc b/ge/opskernel_manager/ops_kernel_manager.cc index 8134a463..30f39c0d 100644 --- a/ge/opskernel_manager/ops_kernel_manager.cc +++ b/ge/opskernel_manager/ops_kernel_manager.cc @@ -175,8 +175,8 @@ Status OpsKernelManager::ParsePluginOptions(const map &options, } else if (flag == 1) { enable_flag = true; } else { - GELOGE(GE_GRAPH_OPTIONS_INVALID, "option_key:%s, its value %s is invalid, it must be 0 or 1.", plugin_name.c_str(), - iter->second.c_str()); + GELOGE(GE_GRAPH_OPTIONS_INVALID, "option_key:%s, its value %s is invalid, it must be 0 or 1.", + plugin_name.c_str(), iter->second.c_str()); return GE_GRAPH_OPTIONS_INVALID; } } catch (std::invalid_argument &) { @@ -188,8 +188,8 @@ Status OpsKernelManager::ParsePluginOptions(const map &options, iter->second.c_str()); return GE_GRAPH_OPTIONS_INVALID; } catch (...) { - GELOGE(GE_GRAPH_OPTIONS_INVALID, "option_key:%s, its value %s is invalid, it must be 0 or 1.", plugin_name.c_str(), - iter->second.c_str()); + GELOGE(GE_GRAPH_OPTIONS_INVALID, "option_key:%s, its value %s is invalid, it must be 0 or 1.", + plugin_name.c_str(), iter->second.c_str()); return GE_GRAPH_OPTIONS_INVALID; } } else { diff --git a/ge/session/omg.cc b/ge/session/omg.cc index df837f99..7ff52e82 100755 --- a/ge/session/omg.cc +++ b/ge/session/omg.cc @@ -68,6 +68,9 @@ const std::string kScopeIdAttr = "fusion_scope"; const char *const kOutputTypeSample = "correct sample is \"opname:index:dtype\""; const char *const kOutputTypeSupport = "only support FP32, FP16, UINT8"; const char *const kOutputTypeError = "The multiple out nodes set in output_type must be found in out_nodes."; +const size_t kNodeNameIndex = 0; +const size_t kIndexStrIndex = 1; +const size_t kDTValueIndex = 2; } // namespace // When the model is converted to a JSON file, the following operator attributes in the blacklist will be ignored @@ -381,14 +384,14 @@ Status ParseOutputType(const std::string &output_type, std::map(model.model_data); model.model_data = nullptr; } return status; @@ -902,7 +906,7 @@ FMK_FUNC_HOST_VISIBILITY Status ConvertOmModelToJson(const char *model_file, con if (status != ge::GRAPH_SUCCESS) { GELOGE(ge::FAILED, "Get model part failed."); if (model.model_data != nullptr) { - delete[](char *) model.model_data; + delete[] reinterpret_cast(model.model_data); model.model_data = nullptr; } return status; @@ -928,7 +932,7 @@ FMK_FUNC_HOST_VISIBILITY Status ConvertOmModelToJson(const char *model_file, con } if (model.model_data != nullptr) { - delete[](char *) model.model_data; + delete[] reinterpret_cast(model.model_data); model.model_data = nullptr; } return ret; diff --git a/ge/single_op/single_op.cc b/ge/single_op/single_op.cc index 371d7110..a2652b67 100755 --- a/ge/single_op/single_op.cc +++ b/ge/single_op/single_op.cc @@ -17,6 +17,7 @@ #include "single_op/single_op.h" #include "common/fmk_types.h" +#include "common/ge_types.h" #include "common/math/math_util.h" #include "common/profiling/profiling_manager.h" #include "framework/common/debug/ge_log.h" @@ -24,19 +25,60 @@ #include "graph/load/new_model_manager/model_utils.h" #include "runtime/mem.h" #include "single_op/single_op_manager.h" +#include "single_op/task/build_task_utils.h" #include "graph/load/new_model_manager/model_manager.h" namespace ge { namespace { const size_t kDataMemAlignSize = 32; +const size_t kDataMemAlignUnit = 2; size_t GetAlignedSize(size_t size) { - size_t aligned_size = (size + 2 * kDataMemAlignSize - 1) / kDataMemAlignSize * kDataMemAlignSize; + size_t aligned_size = (size + kDataMemAlignUnit * kDataMemAlignSize - 1) / kDataMemAlignSize * kDataMemAlignSize; return aligned_size; } + +Status ProfilingTaskInfo(OpTask *op_task) { + if (!ProfilingManager::Instance().ProfilingModelExecuteOn()) { + return SUCCESS; + } + + string model_name; + string op_name; + uint32_t model_id; + uint32_t block_dim; + if (op_task->GetProfilingArgs(model_name, op_name, model_id, block_dim) != SUCCESS) { + GELOGE(ACL_ERROR_GE_PARAM_INVALID, "Get profiling data of task failed"); + return ACL_ERROR_GE_PARAM_INVALID; + } + GELOGD("ProfilingReport of op[%s] model[%s] start.", op_name.c_str(), model_name.c_str()); + std::vector task_desc_info; + uint32_t task_id = 0; + uint32_t stream_id = 0; + if (rtGetTaskIdAndStreamID(&task_id, &stream_id) != RT_ERROR_NONE) { + GELOGE(ACL_ERROR_GE_PARAM_INVALID, "Get task_id and stream_id failed."); + return ACL_ERROR_GE_PARAM_INVALID; + } + + TaskDescInfo tmp_task_desc_info; + tmp_task_desc_info.model_name = model_name; + tmp_task_desc_info.op_name = op_name; + tmp_task_desc_info.block_dim = block_dim; + tmp_task_desc_info.task_id = task_id; + tmp_task_desc_info.stream_id = stream_id; + GELOGD("GetTaskDescInfo of op [%s] end, task_id[%u], stream_id[%u]", op_name.c_str(), task_id, stream_id); + task_desc_info.emplace_back(tmp_task_desc_info); + + std::vector compute_graph_info; + + auto &profiling_manager = ProfilingManager::Instance(); + profiling_manager.ReportProfilingData(model_id, task_desc_info, compute_graph_info); + return SUCCESS; +} } // namespace -SingleOp::SingleOp(std::mutex *stream_mutex, rtStream_t stream) : stream_mutex_(stream_mutex), stream_(stream) { +SingleOp::SingleOp(StreamResource *stream_resource, std::mutex *stream_mutex, rtStream_t stream) + : stream_resource_(stream_resource), stream_mutex_(stream_mutex), stream_(stream) { } FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY SingleOp::~SingleOp() { @@ -68,7 +110,8 @@ Status SingleOp::ValidateArgs(const std::vector &inputs, const std:: auto num_outputs = outputs.size(); if (num_outputs != output_sizes_.size()) { - GELOGE(ACL_ERROR_GE_PARAM_INVALID, "output num mismatch. model expect %zu, but given %zu", output_sizes_.size(), outputs.size()); + GELOGE(ACL_ERROR_GE_PARAM_INVALID, "output num mismatch. model expect %zu, but given %zu", + output_sizes_.size(), outputs.size()); return ACL_ERROR_GE_PARAM_INVALID; } @@ -117,37 +160,6 @@ Status SingleOp::UpdateArgs(const std::vector &inputs, const std::ve *arg_addr = args_[i]; } } - // update aicpu_TF or aicpu_CC args - for (auto &task : tasks_) { - size_t io_addr_num = args_.size(); - if (task->GetOpTaskType() == OP_TASK_AICPU) { - GELOGD("Update aicpu_TF task args"); - task->SetIoAddrsForDump(args_); - auto *dst_io_addr = const_cast(reinterpret_cast(task->GetIOAddr())); - GE_CHECK_NOTNULL(dst_io_addr); - auto rt_ret = rtMemcpyAsync(dst_io_addr, - sizeof(uint64_t) * args_.size(), - &args_[0], - sizeof(uint64_t) * args_.size(), - RT_MEMCPY_HOST_TO_DEVICE_EX, - stream_); - if (rt_ret != RT_ERROR_NONE) { - GELOGE(rt_ret, "rtMemcpyAsync addresses failed, ret = %d", rt_ret); - return rt_ret; - } - } else if (task->GetOpTaskType() == OP_TASK_AICPUCC) { - GELOGD("Update aicpu_CC task args"); - const uintptr_t *task_io_addr = reinterpret_cast(task->GetIOAddr()); - GE_CHECK_NOTNULL(task_io_addr); - auto io_addr = reinterpret_cast(const_cast(task_io_addr)); - for (size_t i = 0; i < io_addr_num; ++i) { - io_addr[i] = static_cast(args_[i]); - } - } else { - GELOGW("Only TF_kernel aicpu and aicpu_CC are supported, but got %u", task->GetOpTaskType()); - continue; - } - } return SUCCESS; } @@ -158,7 +170,19 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status SingleOp::ExecuteAsync(c return ret; } + GE_CHECK_NOTNULL(stream_resource_); std::lock_guard lk(*stream_mutex_); + auto current_mem_base = stream_resource_->GetMemoryBase(); + if (running_param_->mem_base != current_mem_base) { + running_param_->mem_base = const_cast(current_mem_base); + GELOGD("Memory base changed, new memory base = %p", current_mem_base); + for (auto &task : tasks_) { + auto new_address = BuildTaskUtils::GetAddresses(task->GetOpdesc(), *running_param_); + GE_CHK_STATUS_RET(task->UpdateArgTable(*running_param_), + "[%s] Failed to update arg table", + task->GetOpdesc()->GetName().c_str()); + } + } ret = UpdateArgs(inputs, outputs); if (ret != SUCCESS) { return ret; @@ -169,6 +193,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status SingleOp::ExecuteAsync(c if (ret != SUCCESS) { return ret; } + GE_CHK_STATUS_RET_NOLOG(ProfilingTaskInfo(task)); } return ret; @@ -182,9 +207,6 @@ DynamicSingleOp::DynamicSingleOp(uintptr_t resource_id, std::mutex *stream_mutex : resource_id_(resource_id), stream_mutex_(stream_mutex), stream_(stream) { } -DynamicSingleOp::~DynamicSingleOp() { -} - Status DynamicSingleOp::ValidateParams(const vector &input_desc, const std::vector &inputs, std::vector &output_desc, @@ -206,63 +228,24 @@ Status DynamicSingleOp::ValidateParams(const vector &input_desc, } if (input_desc.size() != num_inputs_) { - GELOGE(ACL_ERROR_GE_PARAM_INVALID, "Input number mismatches. expect %zu, but given %zu", num_inputs_, input_desc.size()); + GELOGE(ACL_ERROR_GE_PARAM_INVALID, + "Input number mismatches. expect %zu, but given %zu", + num_inputs_, + input_desc.size()); return ACL_ERROR_GE_PARAM_INVALID; } if (output_desc.size() != num_outputs_) { - GELOGE(ACL_ERROR_GE_PARAM_INVALID, "Output number mismatches. expect %zu, but given %zu", num_outputs_, output_desc.size()); + GELOGE(ACL_ERROR_GE_PARAM_INVALID, + "Output number mismatches. expect %zu, but given %zu", + num_outputs_, + output_desc.size()); return ACL_ERROR_GE_PARAM_INVALID; } return SUCCESS; } -Status DynamicSingleOp::AllocateWorkspaces(const std::vector &workspace_sizes, - std::vector &workspaces) { - static const std::string kPurpose("malloc workspace memory for dynamic op."); - if (workspace_sizes.empty()) { - GELOGD("No need to allocate workspace."); - return SUCCESS; - } - int64_t total_size = 0; - std::vector ws_offsets; - for (auto ws_size : workspace_sizes) { - // alignment and padding should be done in OpParaCalculate - GE_CHK_STATUS_RET_NOLOG(CheckInt64AddOverflow(total_size, ws_size)); - ws_offsets.emplace_back(total_size); - total_size += ws_size; - } - - GELOGD("Total workspace size is %ld", total_size); - StreamResource *stream_resource = SingleOpManager::GetInstance().GetResource(resource_id_, stream_); - GE_CHECK_NOTNULL(stream_resource); - auto ws_base = stream_resource->MallocMemory(kPurpose, static_cast(total_size)); - if (ws_base == nullptr) { - GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Failed to allocate memory of size: %ld", total_size); - return ACL_ERROR_GE_MEMORY_ALLOCATION; - } - GELOGD("Done allocating workspace memory successfully."); - - for (auto ws_offset : ws_offsets) { - workspaces.emplace_back(ws_base + ws_offset); - } - - return SUCCESS; -} - -Status DynamicSingleOp::ExecuteTbeTask(const vector &input_desc, - const vector &inputs, - vector &output_desc, - vector &outputs) { - GE_CHK_STATUS_RET_NOLOG(op_task_->UpdateRunInfo(input_desc, output_desc)); - - std::vector workspace_buffers; - GE_CHK_STATUS_RET_NOLOG(AllocateWorkspaces(op_task_->GetWorkspaceSizes(), workspace_buffers)); - - return op_task_->LaunchKernel(inputs, outputs, workspace_buffers, stream_); -} - Status DynamicSingleOp::ExecuteAsync(const vector &input_desc, const vector &input_buffers, vector &output_desc, @@ -271,24 +254,8 @@ Status DynamicSingleOp::ExecuteAsync(const vector &input_desc, GE_CHK_STATUS_RET_NOLOG(ValidateParams(input_desc, input_buffers, output_desc, output_buffers)); std::lock_guard lk(*stream_mutex_); - std::vector inputs; - std::vector outputs; - for (auto &buffer : input_buffers) { - inputs.emplace_back(buffer.data); - } - for (auto &buffer : output_buffers) { - outputs.emplace_back(buffer.data); - } - - if (op_task_->GetOpTaskType() == OP_TASK_TBE) { - return ExecuteTbeTask(input_desc, inputs, output_desc, outputs); - } else if (op_task_->GetOpTaskType() == OP_TASK_AICPU || op_task_->GetOpTaskType() == OP_TASK_AICPUCC) { - return op_task_->LaunchKernel(input_desc, input_buffers, output_desc, output_buffers, stream_); - } else { - GELOGE(ACL_ERROR_GE_OP_TASK_TYPE_INVALID, - "Only TBE_Task, AI_CPU_Task and AI_CPUCC_Task are supported, but got %u", - op_task_->GetOpTaskType()); - return ACL_ERROR_GE_OP_TASK_TYPE_INVALID; - } + GE_CHK_STATUS_RET_NOLOG(op_task_->LaunchKernel(input_desc, input_buffers, output_desc, output_buffers, stream_)); + GE_CHK_STATUS_RET_NOLOG(ProfilingTaskInfo(op_task_.get())); + return SUCCESS; } } // namespace ge diff --git a/ge/single_op/single_op.h b/ge/single_op/single_op.h index 14ef8ce1..d677f94a 100755 --- a/ge/single_op/single_op.h +++ b/ge/single_op/single_op.h @@ -30,9 +30,11 @@ #include "cce/aicpu_engine_struct.h" namespace ge { +class StreamResource; +struct SingleOpModelParam; class SingleOp { public: - SingleOp(std::mutex *stream_mutex, rtStream_t stream); + SingleOp(StreamResource *stream_resource, std::mutex *stream_mutex, rtStream_t stream); ~SingleOp(); Status ExecuteAsync(const std::vector &inputs, const std::vector &outputs); @@ -44,6 +46,7 @@ class SingleOp { Status GetArgs(const std::vector &inputs, const std::vector &outputs); friend class SingleOpModel; + StreamResource *stream_resource_; std::mutex *stream_mutex_; rtStream_t stream_ = nullptr; std::vector input_addr_list_; @@ -54,12 +57,13 @@ class SingleOp { std::vector tasks_; std::vector> arg_table_; + std::unique_ptr running_param_; }; class DynamicSingleOp { public: DynamicSingleOp(uintptr_t resource_id, std::mutex *stream_mutex_, rtStream_t stream); - ~DynamicSingleOp(); + ~DynamicSingleOp() = default; Status ExecuteAsync(const vector &input_desc, const std::vector &inputs, std::vector &output_desc, @@ -72,14 +76,6 @@ class DynamicSingleOp { std::vector &output_desc, std::vector &outputs) const; - Status AllocateWorkspaces(const std::vector &workspace_sizes, - std::vector &workspaces); - - Status ExecuteTbeTask(const vector &input_desc, - const vector &inputs, - vector &output_desc, - vector &outputs); - std::unique_ptr op_task_; uintptr_t resource_id_ = 0; std::mutex *stream_mutex_; diff --git a/ge/single_op/single_op_model.cc b/ge/single_op/single_op_model.cc index 49968f4f..a4a4b623 100755 --- a/ge/single_op/single_op_model.cc +++ b/ge/single_op/single_op_model.cc @@ -92,7 +92,8 @@ Status SingleOpModel::InitModelMem(StreamResource &res) { if (model_params_.memory_size > model_params_.zero_copy_mem_size) { const string purpose("malloc feature map memory on model execute."); GELOGI("total memory: %lu, zero_copy_mem: %lu", model_params_.memory_size, model_params_.zero_copy_mem_size); - model_params_.mem_base = res.MallocMemory(purpose, model_params_.memory_size - model_params_.zero_copy_mem_size); + model_params_.mem_base = + res.MallocMemory(purpose, model_params_.memory_size - model_params_.zero_copy_mem_size, false); if (model_params_.mem_base == nullptr) { return ACL_ERROR_GE_MEMORY_ALLOCATION; } @@ -157,6 +158,7 @@ Status SingleOpModel::LoadAllNodes() { auto ge_model = model_helper_.GetGeModel(); GE_CHECK_NOTNULL(ge_model); Graph graph = ge_model->GetGraph(); + model_id_ = ge_model->GetModelId(); auto compute_graph = GraphUtils::GetComputeGraph(graph); if (compute_graph == nullptr) { GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "[%s] compute_graph is null", model_name_.c_str()); @@ -225,9 +227,10 @@ Status SingleOpModel::SetInputsAndOutputs(SingleOp &single_op) { return SUCCESS; } -Status SingleOpModel::BuildTaskList(SingleOp &single_op) { +Status SingleOpModel::BuildTaskList(StreamResource *stream_resource, SingleOp &single_op) { auto ge_model = model_helper_.GetGeModel(); GE_CHECK_NOTNULL(ge_model); + single_op.arg_table_.resize(single_op.input_sizes_.size() + single_op.output_sizes_.size()); auto tasks = ge_model->GetModelTaskDefPtr()->task(); for (int i = 0; i < tasks.size(); ++i) { const TaskDef &task_def = tasks[i]; @@ -237,8 +240,8 @@ Status SingleOpModel::BuildTaskList(SingleOp &single_op) { if (task_type == RT_MODEL_TASK_KERNEL) { const domi::KernelDef &kernel_def = task_def.kernel(); const auto &context = kernel_def.context(); - auto kernel_type = static_cast(context.kernel_type()); - if (kernel_type == cce::ccKernelType::TE) { + auto kernel_type = static_cast(context.kernel_type()); + if (kernel_type == ccKernelType::TE) { GELOGD("Building TBE task"); TbeOpTask *tbe_task = nullptr; auto ret = BuildKernelTask(task_def.kernel(), &tbe_task); @@ -246,10 +249,13 @@ Status SingleOpModel::BuildTaskList(SingleOp &single_op) { return ret; } - single_op.arg_table_.resize(single_op.input_sizes_.size() + single_op.output_sizes_.size()); ParseArgTable(tbe_task, single_op); + tbe_task->SetModelArgs(model_name_, model_id_); + if (tbe_task->tiling_buffer_ != nullptr) { + tbe_task->stream_resource_ = stream_resource; + } single_op.tasks_.emplace_back(tbe_task); - } else if (kernel_type == cce::ccKernelType::AI_CPU || kernel_type == cce::ccKernelType::CUST_AI_CPU) { + } else if (kernel_type == ccKernelType::AI_CPU || kernel_type == ccKernelType::CUST_AI_CPU) { GELOGD("Building AICPU_CC task"); OpTask *task = nullptr; uint64_t singleop_kernel_id = aicpu_kernel_id++; @@ -258,9 +264,12 @@ Status SingleOpModel::BuildTaskList(SingleOp &single_op) { if (ret != SUCCESS) { return ret; } + task->SetModelArgs(model_name_, model_id_); + ParseArgTable(task, single_op); single_op.tasks_.emplace_back(task); } else { - GELOGE(ACL_ERROR_GE_OP_KERNEL_TYPE_INVALID, "Only TBE, AI_CPU, CUST_AI_CPU kernel are supported, but got %u", context.kernel_type()); + GELOGE(ACL_ERROR_GE_OP_KERNEL_TYPE_INVALID, + "Only TBE, AI_CPU, CUST_AI_CPU kernel are supported, but got %u", context.kernel_type()); return ACL_ERROR_GE_OP_KERNEL_TYPE_INVALID; } } else if (task_type == RT_MODEL_TASK_KERNEL_EX) { @@ -273,6 +282,8 @@ Status SingleOpModel::BuildTaskList(SingleOp &single_op) { if (ret != SUCCESS) { return ret; } + aicpu_task->SetModelArgs(model_name_, model_id_); + ParseArgTable(aicpu_task, single_op); single_op.tasks_.emplace_back(aicpu_task); } else { // skip @@ -282,21 +293,23 @@ Status SingleOpModel::BuildTaskList(SingleOp &single_op) { return SUCCESS; } -void SingleOpModel::ParseArgTable(TbeOpTask *task, SingleOp &op) { +void SingleOpModel::ParseArgTable(OpTask *task, SingleOp &op) { if (task == nullptr) { GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "tbe op task is nullptr"); return; } + // args: addr1, addr2, addr3 ... - auto *args = const_cast(reinterpret_cast(task->GetArgs())); - size_t arg_size = task->GetArgSize(); - for (size_t i = 0; i < arg_size / sizeof(void *); ++i) { - uintptr_t *ptr_to_addr = args + i; + uintptr_t *arg_base = nullptr; + size_t arg_num = 0; + task->GetIoAddr(arg_base, arg_num); + for (size_t i = 0; i < arg_num; ++i) { + uintptr_t *ptr_to_addr = arg_base + i; uintptr_t addr = *ptr_to_addr; auto iter = model_params_.addr_mapping_.find(addr); if (iter != model_params_.addr_mapping_.end()) { int arg_index = iter->second; - GELOGI("%s args[%zu] mapped to user designated args[%d]", task->GetStubName().c_str(), i, arg_index); + GELOGI("%s args[%zu] mapped to user designated args[%d]", task->GetOpdesc()->GetName().c_str(), i, arg_index); op.arg_table_[iter->second].emplace_back(ptr_to_addr); } } @@ -368,7 +381,7 @@ Status SingleOpModel::BuildCpuKernelTask(const domi::KernelDef &kernel_def, OpTa } auto builder = AiCpuCCTaskBuilder(iter->second->GetOpDesc(), kernel_def); - auto ret = builder.BuildTask(*aicpucc_task, kernel_id); + auto ret = builder.BuildTask(*aicpucc_task, kernel_id, model_params_); if (ret != SUCCESS) { GELOGE(ret, "build aicpu_CC op task failed"); return ret; @@ -381,25 +394,29 @@ Status SingleOpModel::BuildCpuKernelTask(const domi::KernelDef &kernel_def, OpTa Status SingleOpModel::BuildOp(StreamResource &resource, SingleOp &single_op) { GE_CHK_STATUS_RET_NOLOG(ParseInputsAndOutputs()); GE_CHK_STATUS_RET_NOLOG(InitModelMem(resource)); + single_op.running_param_.reset(new (std::nothrow)SingleOpModelParam(model_params_)); + GE_CHECK_NOTNULL(single_op.running_param_); GE_CHK_STATUS_RET_NOLOG(SetInputsAndOutputs(single_op)); - return BuildTaskList(single_op); + return BuildTaskList(&resource, single_op); } Status SingleOpModel::BuildModelTaskKernel(const TaskDef &task_def, DynamicSingleOp &single_op) { const domi::KernelDef &kernel_def = task_def.kernel(); const auto &context = kernel_def.context(); - auto kernel_type = static_cast(context.kernel_type()); - if (kernel_type == cce::ccKernelType::TE) { + auto kernel_type = static_cast(context.kernel_type()); + if (kernel_type == ccKernelType::TE) { GELOGD("Building TBE task"); TbeOpTask *tbe_task = nullptr; GE_CHK_STATUS_RET_NOLOG(BuildKernelTask(task_def.kernel(), &tbe_task)); + tbe_task->SetModelArgs(model_name_, model_id_); single_op.op_task_.reset(tbe_task); - } else if (kernel_type == cce::ccKernelType::AI_CPU || kernel_type == cce::ccKernelType::CUST_AI_CPU) { + } else if (kernel_type == ccKernelType::AI_CPU || kernel_type == ccKernelType::CUST_AI_CPU) { GELOGD("Building AICPU_CC task"); OpTask *task = nullptr; uint64_t dynamic_singleop_kernel_id = aicpu_kernel_id++; GELOGI("Build dynamic singleOp CCTask, kernel_id = %lu", dynamic_singleop_kernel_id); GE_CHK_STATUS_RET_NOLOG(BuildCpuKernelTask(task_def.kernel(), &task, dynamic_singleop_kernel_id)); + task->SetModelArgs(model_name_, model_id_); single_op.op_task_.reset(task); } else { GELOGE(ACL_ERROR_GE_OP_KERNEL_TYPE_INVALID, @@ -446,6 +463,7 @@ Status SingleOpModel::BuildTaskListForDynamicOp(DynamicSingleOp &single_op) { const TaskDef ©_task_def = tasks[i]; GE_CHK_STATUS_RET_NOLOG(aicpu_task->SetMemCopyTask(copy_task_def.kernel_ex())); } + aicpu_task->SetModelArgs(model_name_, model_id_); single_op.op_task_.reset(aicpu_task); } else { // skip diff --git a/ge/single_op/single_op_model.h b/ge/single_op/single_op_model.h index 50aeb7ab..c3164543 100755 --- a/ge/single_op/single_op_model.h +++ b/ge/single_op/single_op_model.h @@ -65,7 +65,7 @@ class SingleOpModel { Status ParseInputNode(const OpDescPtr &op_desc); void ParseOutputNode(const OpDescPtr &op_desc); - Status BuildTaskList(SingleOp &single_op); + Status BuildTaskList(StreamResource *stream_resource, SingleOp &single_op); Status BuildTaskListForDynamicOp(DynamicSingleOp &dynamic_single_op); Status BuildKernelTask(const domi::KernelDef &kernel_def, TbeOpTask **task); Status BuildKernelExTask(const domi::KernelExDef &kernel_def, AiCpuTask **task, @@ -74,9 +74,10 @@ class SingleOpModel { Status BuildModelTaskKernel(const domi::TaskDef &task_def, DynamicSingleOp &single_op); static void ParseOpModelParams(ModelHelper &model_helper, SingleOpModelParam ¶m); - void ParseArgTable(TbeOpTask *task, SingleOp &op); + void ParseArgTable(OpTask *task, SingleOp &op); std::string model_name_; + uint32_t model_id_ = 0; const void *ori_model_data_; uint32_t ori_model_size_; diff --git a/ge/single_op/stream_resource.cc b/ge/single_op/stream_resource.cc index f545b6c8..722a1024 100755 --- a/ge/single_op/stream_resource.cc +++ b/ge/single_op/stream_resource.cc @@ -69,11 +69,25 @@ uint8_t *StreamResource::DoMallocMemory(const std::string &purpose, size_t size, size_t &max_allocated, std::vector &allocated) { + if (size == 0) { + GELOGD("Mem size == 0"); + return nullptr; + } + if (size <= max_allocated && !allocated.empty()) { GELOGD("reuse last memory"); return allocated.back(); } + if (!allocated.empty()) { + uint8_t *current_buffer = allocated.back(); + allocated.pop_back(); + if (rtStreamSynchronize(stream_) != RT_ERROR_NONE) { + GELOGW("Failed to invoke rtStreamSynchronize"); + } + (void) rtFree(current_buffer); + } + uint8_t *buffer = nullptr; auto ret = rtMalloc(reinterpret_cast(&buffer), size, RT_MEMORY_HBM); if (ret != RT_ERROR_NONE) { @@ -96,10 +110,14 @@ uint8_t *StreamResource::DoMallocMemory(const std::string &purpose, return buffer; } -uint8_t *StreamResource::MallocMemory(const std::string &purpose, size_t size) { +uint8_t *StreamResource::MallocMemory(const std::string &purpose, size_t size, bool holding_lock) { GELOGD("To Malloc memory, size = %zu", size); - uint8_t *buffer = DoMallocMemory(purpose, size, max_memory_size_, memory_list_); - return buffer; + if (holding_lock) { + return DoMallocMemory(purpose, size, max_memory_size_, memory_list_); + } else { + std::lock_guard lk(stream_mu_); + return DoMallocMemory(purpose, size, max_memory_size_, memory_list_); + } } uint8_t *StreamResource::MallocWeight(const std::string &purpose, size_t size) { @@ -158,7 +176,7 @@ Status StreamResource::BuildOperator(const string &model_name, const ModelData & return ret; } - auto new_op = std::unique_ptr(new(std::nothrow) SingleOp(&stream_mu_, stream_)); + auto new_op = std::unique_ptr(new(std::nothrow) SingleOp(this, &stream_mu_, stream_)); if (new_op == nullptr) { GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "new SingleOp failed"); return ACL_ERROR_GE_MEMORY_ALLOCATION; @@ -171,4 +189,12 @@ Status StreamResource::BuildOperator(const string &model_name, const ModelData & op_map_[model_data.model_data] = std::move(new_op); return SUCCESS; } + +const uint8_t *StreamResource::GetMemoryBase() const { + if (memory_list_.empty()) { + return nullptr; + } + + return memory_list_.back(); +} } // namespace ge diff --git a/ge/single_op/stream_resource.h b/ge/single_op/stream_resource.h index 39f08ebe..d5bc941a 100755 --- a/ge/single_op/stream_resource.h +++ b/ge/single_op/stream_resource.h @@ -45,8 +45,9 @@ class StreamResource { Status BuildOperator(const std::string &model_name, const ModelData &model_data, SingleOp **single_op); Status BuildDynamicOperator(const std::string &model_name, const ModelData &model_data, DynamicSingleOp **single_op); - uint8_t *MallocMemory(const std::string &purpose, size_t size); + uint8_t *MallocMemory(const std::string &purpose, size_t size, bool holding_lock = true); uint8_t *MallocWeight(const std::string &purpose, size_t size); + const uint8_t *GetMemoryBase() const; private: uint8_t *DoMallocMemory(const std::string &purpose, diff --git a/ge/single_op/task/aicpu_kernel_task_builder.cc b/ge/single_op/task/aicpu_kernel_task_builder.cc index 26f6a166..f8a2bd1b 100755 --- a/ge/single_op/task/aicpu_kernel_task_builder.cc +++ b/ge/single_op/task/aicpu_kernel_task_builder.cc @@ -15,19 +15,24 @@ */ #include "single_op/task/aicpu_kernel_task_builder.h" -#include "cce/taskdown_common.hpp" +#include "framework/common/taskdown_common.h" #include "graph/load/new_model_manager/model_manager.h" +#include "build_task_utils.h" namespace ge { AiCpuCCTaskBuilder::AiCpuCCTaskBuilder(const OpDescPtr &op_desc, const domi::KernelDef &kernel_def) : op_desc_(op_desc), kernel_def_(kernel_def) {} -Status AiCpuCCTaskBuilder::SetKernelArgs(AiCpuCCTask &task) { +Status AiCpuCCTaskBuilder::SetKernelArgs(AiCpuCCTask &task, const SingleOpModelParam ¶m) { size_t aicpu_arg_size = kernel_def_.args_size(); - if (aicpu_arg_size <= 0) { + if (aicpu_arg_size <= sizeof(aicpu::AicpuParamHead)) { GELOGE(ACL_ERROR_GE_PARAM_INVALID, "aicpu_arg_size is invalid, value = %zu", aicpu_arg_size); return ACL_ERROR_GE_PARAM_INVALID; } + + task.io_addr_num_ = op_desc_->GetInputsSize() + op_desc_->GetOutputsSize(); + GE_CHECK_GE(aicpu_arg_size - sizeof(aicpu::AicpuParamHead), task.io_addr_num_ * sizeof(void *)); + std::unique_ptr aicpu_args; aicpu_args.reset(new(std::nothrow) uint8_t[aicpu_arg_size]()); if (aicpu_args == nullptr) { @@ -41,13 +46,19 @@ Status AiCpuCCTaskBuilder::SetKernelArgs(AiCpuCCTask &task) { return ACL_ERROR_GE_INTERNAL_ERROR; } - task.SetIoAddr(aicpu_args.get() + sizeof(aicpu::AicpuParamHead)); + task.SetIoAddr(reinterpret_cast(aicpu_args.get() + sizeof(aicpu::AicpuParamHead))); task.SetKernelArgs(std::move(aicpu_args), aicpu_arg_size); + + auto addresses = BuildTaskUtils::GetKernelArgs(op_desc_, param); + GE_CHECK_GE(addresses.size(), task.io_addr_num_); + for (size_t i = 0; i < task.io_addr_num_; ++i) { + task.io_addr_[i] = reinterpret_cast(addresses[i]); + } return SUCCESS; } -Status AiCpuCCTaskBuilder::BuildTask(AiCpuCCTask &task, uint64_t kernel_id) { - auto ret = SetKernelArgs(task); +Status AiCpuCCTaskBuilder::BuildTask(AiCpuCCTask &task, uint64_t kernel_id, const SingleOpModelParam ¶m) { + auto ret = SetKernelArgs(task, param); if (ret != SUCCESS) { return ret; } @@ -58,12 +69,16 @@ Status AiCpuCCTaskBuilder::BuildTask(AiCpuCCTask &task, uint64_t kernel_id) { task.op_desc_ = op_desc_; const auto &context = kernel_def_.context(); - auto kernel_type = static_cast(context.kernel_type()); - if (kernel_type == cce::ccKernelType::CUST_AI_CPU) { + auto kernel_type = static_cast(context.kernel_type()); + if (kernel_type == ccKernelType::CUST_AI_CPU) { task.is_custom_ = true; task.dump_flag_ |= RT_KERNEL_CUSTOM_AICPU; - GE_CHK_STATUS_RET(ModelManager::GetInstance()->LoadCustAicpuSo(op_desc_, so_name), "launch cust aicpu so failed"); - GE_CHK_STATUS_RET(ModelManager::GetInstance()->LaunchCustAicpuSo(), "launch cust aicpu so failed."); + bool loaded = false; + GE_CHK_STATUS_RET(ModelManager::GetInstance()->LoadCustAicpuSo(op_desc_, so_name, loaded), + "launch cust aicpu so failed"); + if (!loaded) { + GE_CHK_STATUS_RET(ModelManager::GetInstance()->LaunchCustAicpuSo(), "launch cust aicpu so failed."); + } } task.num_inputs_ = op_desc_->GetInputsSize(); @@ -82,6 +97,10 @@ Status AiCpuCCTaskBuilder::BuildTask(AiCpuCCTask &task, uint64_t kernel_id) { return ret; } + if (task.GetUnknownType() == DEPEND_COMPUTE) { + GELOGE(FAILED, "AiCpuCCTask unknown type is depend compute, it's not supported now."); + return FAILED; + } auto aicpu_param_head = reinterpret_cast(task.args_.get()); if (task.ext_info_addr_dev_ != nullptr) { aicpu_param_head->extInfoLength = kernel_ext_info.size(); diff --git a/ge/single_op/task/aicpu_kernel_task_builder.h b/ge/single_op/task/aicpu_kernel_task_builder.h index e77e3c10..85d5034d 100755 --- a/ge/single_op/task/aicpu_kernel_task_builder.h +++ b/ge/single_op/task/aicpu_kernel_task_builder.h @@ -30,10 +30,10 @@ class AiCpuCCTaskBuilder { explicit AiCpuCCTaskBuilder(const OpDescPtr &op_desc, const domi::KernelDef &kernel_def); ~AiCpuCCTaskBuilder() = default; - Status BuildTask(AiCpuCCTask &task, uint64_t kernel_id); + Status BuildTask(AiCpuCCTask &task, uint64_t kernel_id, const SingleOpModelParam ¶m); private: - Status SetKernelArgs(AiCpuCCTask &task); + Status SetKernelArgs(AiCpuCCTask &task, const SingleOpModelParam ¶m); const OpDescPtr op_desc_; const domi::KernelDef &kernel_def_; }; diff --git a/ge/single_op/task/aicpu_task_builder.cc b/ge/single_op/task/aicpu_task_builder.cc index 8f28ffda..0cc5c554 100755 --- a/ge/single_op/task/aicpu_task_builder.cc +++ b/ge/single_op/task/aicpu_task_builder.cc @@ -26,26 +26,6 @@ namespace ge { AiCpuTaskBuilder::AiCpuTaskBuilder(const OpDescPtr &op_desc, const domi::KernelExDef &kernel_def) : op_desc_(op_desc), kernel_def_(kernel_def) {} - Status AiCpuTaskBuilder::SetInputOutputAddr(void **io_addr, const std::vector &addresses) { - size_t arg_size = kernel_def_.args_size(); - auto rt_ret = rtMalloc(io_addr, arg_size, RT_MEMORY_HBM); - if (rt_ret != RT_ERROR_NONE) { - GELOGE(rt_ret, "rtMalloc failed, size = %zu, ret = %d", arg_size, rt_ret); - return rt_ret; - } - - const void *src_addr = reinterpret_cast(addresses.data()); - uint64_t src_len = sizeof(void *) * addresses.size(); - rt_ret = rtMemcpy(*io_addr, arg_size, src_addr, src_len, RT_MEMCPY_HOST_TO_DEVICE); - if (rt_ret != RT_ERROR_NONE) { - (void)rtFree(*io_addr); - GELOGE(rt_ret, "rtMemcpy addresses failed, ret = %d", rt_ret); - return rt_ret; - } - - return SUCCESS; - } - Status AiCpuTaskBuilder::SetFmkOpKernel(void *io_addr, void *ws_addr, STR_FWK_OP_KERNEL &fwk_op_kernel) { auto sec_ret = memcpy_s(&fwk_op_kernel, sizeof(STR_FWK_OP_KERNEL), kernel_def_.args().data(), kernel_def_.args().size()); @@ -80,39 +60,27 @@ namespace ge { return SUCCESS; } - Status AiCpuTaskBuilder::InitWorkspaceAndIO(void **io_addr, void **kernel_workspace, - const SingleOpModelParam ¶m, bool dynamic_flag) { + Status AiCpuTaskBuilder::InitWorkspaceAndIO(AiCpuTask &task, const SingleOpModelParam ¶m, bool dynamic_flag) { if (kernel_def_.args_size() > sizeof(STR_FWK_OP_KERNEL)) { GELOGE(ACL_ERROR_GE_PARAM_INVALID, "sizeof STR_FWK_OP_KERNEL is: %lu, but args_size is: %d", sizeof(STR_FWK_OP_KERNEL), kernel_def_.args_size()); return ACL_ERROR_GE_PARAM_INVALID; } - auto addresses = BuildTaskUtils::GetAddresses(op_desc_, param); - auto ws_addr_vec = addresses.at(BuildTaskUtils::kAddressIndexWorkspace); - - if (dynamic_flag) { - GE_CHK_RT_RET(rtMalloc(kernel_workspace, kernel_def_.task_info_size(), RT_MEMORY_HBM)); - } else { - if (ws_addr_vec.empty()) { - GELOGE(ACL_ERROR_GE_PARAM_INVALID, "workspace Data Address is empty."); - return ACL_ERROR_GE_PARAM_INVALID; - } - *kernel_workspace = ws_addr_vec[0]; - } - GE_CHK_RT_RET(rtMemcpy(*kernel_workspace, kernel_def_.task_info_size(), + GE_CHK_RT_RET(rtMalloc(&task.workspace_addr_, kernel_def_.task_info_size(), RT_MEMORY_HBM)); + GE_CHK_RT_RET(rtMemcpy(task.workspace_addr_, kernel_def_.task_info_size(), kernel_def_.task_info().data(), kernel_def_.task_info_size(), RT_MEMCPY_HOST_TO_DEVICE)); - auto ret = SetInputOutputAddr(io_addr, BuildTaskUtils::JoinAddresses(addresses)); - if (ret != SUCCESS) { - return ret; - } + auto addresses = BuildTaskUtils::GetAddresses(op_desc_, param, false); + task.io_addr_host_ = BuildTaskUtils::JoinAddresses(addresses); + task.io_addr_size_ = task.io_addr_host_.size() * sizeof(void *); + GE_CHK_RT_RET(rtMalloc(&task.io_addr_, task.io_addr_size_, RT_MEMORY_HBM)); return SUCCESS; } Status AiCpuTaskBuilder::BuildTask(ge::AiCpuTask &task, const SingleOpModelParam ¶m, bool dynamic_flag, uint64_t kernel_id) { - GE_CHK_STATUS_RET_NOLOG(InitWorkspaceAndIO(&task.io_addr_, &task.workspace_addr_, param, dynamic_flag)); + GE_CHK_STATUS_RET_NOLOG(InitWorkspaceAndIO(task, param, dynamic_flag)); STR_FWK_OP_KERNEL fwk_op_kernel = {0}; auto ret = SetFmkOpKernel(task.io_addr_, task.workspace_addr_, fwk_op_kernel); diff --git a/ge/single_op/task/aicpu_task_builder.h b/ge/single_op/task/aicpu_task_builder.h index 4669e118..fe9c9bc2 100755 --- a/ge/single_op/task/aicpu_task_builder.h +++ b/ge/single_op/task/aicpu_task_builder.h @@ -33,10 +33,8 @@ namespace ge { private: static Status SetKernelArgs(void **args, STR_FWK_OP_KERNEL &kernel); - Status SetInputOutputAddr(void **io_addr, const std::vector &addresses); Status SetFmkOpKernel(void *io_addr, void *ws_addr, STR_FWK_OP_KERNEL &kernel); - Status InitWorkspaceAndIO(void **io_addr, void **kernel_workspace, - const SingleOpModelParam ¶m, bool dynamic_flag); + Status InitWorkspaceAndIO(AiCpuTask &task, const SingleOpModelParam ¶m, bool dynamic_flag); const OpDescPtr op_desc_; const domi::KernelExDef &kernel_def_; diff --git a/ge/single_op/task/build_task_utils.cc b/ge/single_op/task/build_task_utils.cc index 29f1657b..071e514b 100644 --- a/ge/single_op/task/build_task_utils.cc +++ b/ge/single_op/task/build_task_utils.cc @@ -32,7 +32,8 @@ const uint64_t kVarSize = 0; } std::vector> BuildTaskUtils::GetAddresses(const OpDescPtr &op_desc, - const SingleOpModelParam ¶m) { + const SingleOpModelParam ¶m, + bool keep_workspace) { std::vector> ret; RuntimeParam runtime_para; runtime_para.mem_size = param.memory_size; @@ -49,7 +50,9 @@ std::vector> BuildTaskUtils::GetAddresses(const OpDescPtr &o ret.emplace_back(ModelUtils::GetInputDataAddrs(runtime_para, op_desc)); ret.emplace_back(ModelUtils::GetOutputDataAddrs(runtime_para, op_desc)); - ret.emplace_back(ModelUtils::GetWorkspaceDataAddrs(runtime_para, op_desc)); + if (keep_workspace) { + ret.emplace_back(ModelUtils::GetWorkspaceDataAddrs(runtime_para, op_desc)); + } return ret; } diff --git a/ge/single_op/task/build_task_utils.h b/ge/single_op/task/build_task_utils.h index cddc7a2b..7a2369e4 100644 --- a/ge/single_op/task/build_task_utils.h +++ b/ge/single_op/task/build_task_utils.h @@ -27,15 +27,17 @@ namespace ge { class BuildTaskUtils { public: + static constexpr int kAddressIndexOutput = 1; static constexpr int kAddressIndexWorkspace = 2; - static std::vector> GetAddresses(const OpDescPtr &op_desc, const SingleOpModelParam ¶m); + static std::vector> GetAddresses(const OpDescPtr &op_desc, + const SingleOpModelParam ¶m, + bool keep_workspace = true); static std::vector JoinAddresses(const std::vector> &addresses); static std::vector GetKernelArgs(const OpDescPtr &op_desc, const SingleOpModelParam ¶m); static std::string GetTaskInfo(const OpDescPtr &op_desc); template - static std::string VectorToString(const std::vector &values) - { + static std::string VectorToString(const std::vector &values) { std::stringstream ss; ss << '['; auto size = values.size(); diff --git a/ge/single_op/task/op_task.cc b/ge/single_op/task/op_task.cc index c3c4e5bb..22433ec9 100755 --- a/ge/single_op/task/op_task.cc +++ b/ge/single_op/task/op_task.cc @@ -24,9 +24,11 @@ #include "common/dump/dump_manager.h" #include "common/dump/dump_op.h" #include "common/formats/formats.h" +#include "common/math/math_util.h" #include "framework/common/debug/log.h" #include "register/op_tiling.h" #include "runtime/rt.h" +#include "build_task_utils.h" namespace ge { namespace { @@ -48,18 +50,22 @@ Status OpTask::OpenDump(rtStream_t stream) { std::vector output_adds; auto input_size = op_desc_->GetInputsSize(); auto output_size = op_desc_->GetOutputsSize(); - auto all_size = io_addrs_for_dump_.size(); - if (input_size + output_size != all_size) { - GELOGE(FAILED, "io_addrs_for_dump_ size %zu is not equal input and output size %zu", all_size, + uintptr_t *arg_base = nullptr; + size_t arg_num = 0; + GetIoAddr(arg_base, arg_num); + if (arg_num < input_size + output_size) { + GELOGE(FAILED, "io_addrs_for_dump_ size %zu is not equal input and output size %zu", + arg_num, input_size + output_size); return FAILED; } + for (size_t i = 0; i < input_size; i++) { - uint64_t input_addr = io_addrs_for_dump_[i]; + uint64_t input_addr = arg_base[i]; input_addrs.emplace_back(input_addr); } for (size_t j = 0; j < output_size; j++) { - uint64_t output_addr = io_addrs_for_dump_[input_size + j]; + uint64_t output_addr = arg_base[input_size + j]; output_adds.emplace_back(output_addr); } dump_op_.SetDumpInfo(DumpManager::GetInstance().GetDumpProperties(), op_desc_, input_addrs, output_adds, stream); @@ -89,9 +95,50 @@ void TbeOpTask::SetKernelArgs(std::unique_ptr &&args, size_t arg_size void TbeOpTask::SetSmDesc(void *sm_desc) { sm_desc_ = sm_desc; } -const vector &OpTask::GetWorkspaceSizes() const { return workspace_sizes_; } +void OpTask::SetModelArgs(std::string model_name, uint32_t model_id) { + model_name_ = model_name; + model_id_ = model_id; +} -void OpTask::SetWorkspaceSizes(const vector &workspace_sizes) { workspace_sizes_ = workspace_sizes; } +Status OpTask::GetProfilingArgs(std::string &model_name, std::string &op_name, uint32_t &model_id, + uint32_t &block_dim) { + model_name = model_name_; + model_id = model_id_; + block_dim = block_dim_; + GE_CHECK_NOTNULL(op_desc_); + op_name = op_desc_->GetName(); + return SUCCESS; +} +Status OpTask::UpdateRunInfo(const vector &input_desc, const vector &output_desc) { + return UNSUPPORTED; +} +Status OpTask::UpdateArgTable(const SingleOpModelParam ¶m) { + auto addresses = BuildTaskUtils::GetAddresses(op_desc_, param); + auto all_addresses = BuildTaskUtils::JoinAddresses(addresses); + uintptr_t *arg_base = nullptr; + size_t arg_num = 0; + GetIoAddr(arg_base, arg_num); + if (arg_num != all_addresses.size()) { + GELOGE(INTERNAL_ERROR, "[%s] arg number mismatches, expect = %zu, but got = %zu", + op_desc_->GetName().c_str(), + arg_num, + all_addresses.size()); + return INTERNAL_ERROR; + } + + for (void *addr : all_addresses) { + *arg_base++ = reinterpret_cast(addr); + } + return SUCCESS; +} + +Status OpTask::LaunchKernel(const vector &input_desc, + const vector &input_buffers, + vector &output_desc, + vector &output_buffers, + rtStream_t stream) { + return UNSUPPORTED; +} TbeOpTask::~TbeOpTask() { if (sm_desc_ != nullptr) { @@ -126,12 +173,6 @@ Status TbeOpTask::LaunchKernel(rtStream_t stream) { return RT_FAILED; } GELOGI("[TASK_INFO] %s", this->stub_name_.c_str()); - - size_t input_size = op_desc_->GetInputsSize(); - size_t output_size = op_desc_->GetOutputsSize(); - uint64_t *io_addr = reinterpret_cast(args_.get()); - std::vector io_addrs(io_addr, io_addr + input_size + output_size); - SetIoAddrsForDump(io_addrs); auto status = OpenDump(stream); if (status != SUCCESS) { GELOGE(status, "Open dump failed in the tbe single op %s", this->stub_name_.c_str()); @@ -152,11 +193,12 @@ Status TbeOpTask::UpdateRunInfo(const vector &input_desc, const ve GELOGE(FAILED, "Failed to invoke OpParaCalculate. ret = %u", ret); return FAILED; } - SetWorkspaceSizes(run_info.workspaces); block_dim_ = run_info.block_dim; tiling_data_ = run_info.tiling_data.str(); GELOGD("Done invoking OpParaCalculate successfully. block_dim = %u, tiling size = %zu", block_dim_, tiling_data_.size()); + + GE_CHK_STATUS_RET(AllocateWorkspaces(run_info.workspaces), "Failed to allocate workspaces"); return SUCCESS; } @@ -212,13 +254,54 @@ void TbeOpTask::EnableDynamicSupport(const NodePtr &node, void *tiling_buffer, s max_tiling_size_ = max_tiling_size; } -Status TbeOpTask::LaunchKernel(const vector &inputs, const vector &outputs, - const vector &workspaces, rtStream_t stream) { +Status TbeOpTask::AllocateWorkspaces(const vector &workspace_sizes) { + static const std::string kPurpose("malloc workspace memory for dynamic op."); + if (workspace_sizes.empty()) { + GELOGD("No need to allocate workspace."); + return SUCCESS; + } + int64_t total_size = 0; + std::vector ws_offsets; + for (auto ws_size : workspace_sizes) { + // alignment and padding should be done in OpParaCalculate + GE_CHK_STATUS_RET_NOLOG(CheckInt64AddOverflow(total_size, ws_size)); + ws_offsets.emplace_back(total_size); + total_size += ws_size; + } + + GELOGD("Total workspace size is %ld", total_size); + GE_CHECK_NOTNULL(stream_resource_); + auto ws_base = stream_resource_->MallocMemory(kPurpose, static_cast(total_size)); + if (ws_base == nullptr) { + GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Failed to allocate memory of size: %ld", total_size); + return ACL_ERROR_GE_MEMORY_ALLOCATION; + } + GELOGD("Done allocating workspace memory successfully."); + + for (auto ws_offset : ws_offsets) { + workspaces_.emplace_back(ws_base + ws_offset); + } + + return SUCCESS; +} + +Status TbeOpTask::LaunchKernel(const vector &input_desc, + const vector &input_buffers, + vector &output_desc, + vector &output_buffers, + rtStream_t stream) { + GE_CHK_STATUS_RET_NOLOG(UpdateRunInfo(input_desc, output_desc)); GELOGD("[%s] Start to launch kernel", node_->GetName().c_str()); std::vector args; - args.insert(args.end(), inputs.begin(), inputs.end()); - args.insert(args.end(), outputs.begin(), outputs.end()); - args.insert(args.end(), workspaces.begin(), workspaces.end()); + for (auto &buffer : input_buffers) { + args.emplace_back(buffer.data); + } + for (auto &buffer : output_buffers) { + args.emplace_back(buffer.data); + } + for (auto &buffer : workspaces_) { + args.emplace_back(buffer); + } if (tiling_buffer_ != nullptr) { GELOGD("[%s] Start to copy tiling info. size = %zu", node_->GetName().c_str(), tiling_data_.size()); @@ -239,6 +322,14 @@ Status TbeOpTask::LaunchKernel(const vector &inputs, const vector(args_.get()); + arg_count = arg_size_ / sizeof(void *); + if (tiling_buffer_ != nullptr) { + --arg_count; + } +} + AiCpuBaseTask::~AiCpuBaseTask() { if (ext_info_addr_dev_ != nullptr) { (void)rtFree(ext_info_addr_dev_); @@ -363,6 +454,29 @@ Status AiCpuBaseTask::UpdateShapeToOutputDesc(const GeShape &shape_new, GeTensor return SUCCESS; } +Status AiCpuBaseTask::UpdateIoAddr(const vector &inputs, const vector &outputs) { + uintptr_t *arg_base = nullptr; + size_t arg_num = 0; + GetIoAddr(arg_base, arg_num); + + // input number and output number was check in ValidateParams + for (size_t i = 0; i < inputs.size(); ++i) { + auto addr = inputs[i].data; + GE_CHECK_NOTNULL(addr); + GELOGD("AICpuTask input[%zu] addr = %p", i, addr); + *arg_base++ = reinterpret_cast(addr); + } + + for (size_t i = 0; i < outputs.size(); ++i) { + auto addr = outputs[i].data; + GE_CHECK_NOTNULL(addr); + GELOGD("AICpuTask output[%zu] addr = %p", i, addr); + *arg_base++ = reinterpret_cast(addr); + } + + return SUCCESS; +} + AiCpuTask::~AiCpuTask() { FreeHbm(args_); FreeHbm(io_addr_); @@ -384,12 +498,14 @@ AiCpuTask::~AiCpuTask() { } } -const void *AiCpuTask::GetIOAddr() const { return io_addr_; } - Status AiCpuTask::LaunchKernel(rtStream_t stream) { GELOGD("Start to launch kernel. task = %s", this->op_type_.c_str()); - auto ret = rtMemcpyAsync(workspace_addr_, task_info_.size(), task_info_.data(), task_info_.size(), - RT_MEMCPY_HOST_TO_DEVICE_EX, stream); + auto ret = rtMemcpyAsync(io_addr_, + io_addr_size_, + io_addr_host_.data(), + io_addr_host_.size() * sizeof(void *), + RT_MEMCPY_HOST_TO_DEVICE_EX, + stream); if (ret != RT_ERROR_NONE) { GELOGE(RT_FAILED, "rtMemcpyAsync workspace data failed. ret = %d, task = %s", ret, this->op_type_.c_str()); return RT_FAILED; @@ -538,40 +654,6 @@ Status AiCpuTask::UpdateShapeAndDataByResultSummary(vector &output return SUCCESS; } -Status AiCpuTask::SetIO(const vector &inputs, vector &outputs) { - vector io_addrs; - io_addrs.reserve(num_inputs_ + num_outputs_); - for (size_t i = 0; i < num_inputs_; ++i) { - GE_CHECK_NOTNULL(inputs[i]); - GELOGD("AiCpuTask input[%zu] addr = %p", i, inputs[i]); - io_addrs.emplace_back(reinterpret_cast(inputs[i])); - } - - if (unknown_type_ != DEPEND_COMPUTE) { - for (size_t i = 0; i < num_outputs_; ++i) { - GE_CHECK_NOTNULL(outputs[i]); - GELOGD("AiCpuTask output[%zu] addr = %p", i, outputs[i]); - io_addrs.emplace_back(reinterpret_cast(outputs[i])); - } - } else { - for (size_t i = 0; i < num_outputs_; ++i) { - void *summary_addr = output_summary_[i]; - io_addrs.emplace_back(reinterpret_cast(summary_addr)); - } - } - - if (!io_addrs.empty()) { - auto *dst_io_addr = const_cast(reinterpret_cast(io_addr_)); - GE_CHK_RT_RET(rtMemcpy(dst_io_addr, - sizeof(uint64_t) * io_addrs.size(), - &io_addrs[0], - sizeof(uint64_t) * io_addrs.size(), - RT_MEMCPY_HOST_TO_DEVICE)); - GE_CHECK_NOTNULL(dst_io_addr); - }; - return SUCCESS; -} - Status AiCpuTask::InitForSummaryAndCopy() { if (unknown_type_ != DEPEND_COMPUTE || num_outputs_ == 0) { GELOGI("Unknown_type is %d, output num is %d.", unknown_type_, num_outputs_); @@ -643,17 +725,17 @@ Status AiCpuTask::LaunchKernel(const std::vector &input_desc, std::vector &output_buffers, rtStream_t stream) { GE_CHK_STATUS_RET_NOLOG(UpdateExtInfo(input_desc, output_desc, stream)); - std::vector inputs; - std::vector outputs; - for (auto &buffer : input_buffers) { - inputs.emplace_back(buffer.data); - } - for (auto &buffer : output_buffers) { - outputs.emplace_back(buffer.data); + if (unknown_type_ == DEPEND_COMPUTE) { + std::vector summary_buffers; + for (size_t i = 0; i < num_outputs_; ++i) { + summary_buffers.emplace_back(output_summary_[i], sizeof(aicpu::FWKAdapter::ResultSummary), false); + } + GE_CHK_STATUS_RET_NOLOG(UpdateIoAddr(input_buffers, summary_buffers)); + } else { + GE_CHK_STATUS_RET_NOLOG(UpdateIoAddr(input_buffers, output_buffers)); } - GE_CHK_STATUS_RET_NOLOG(SetIO(inputs, outputs)); - GE_CHK_STATUS_RET_NOLOG(LaunchKernel(stream)); + GE_CHK_STATUS_RET_NOLOG(LaunchKernel(stream)); if (unknown_type_ == DEPEND_SHAPE_RANGE) { GE_CHK_RT_RET(rtStreamSynchronize(stream)); GE_CHK_STATUS_RET_NOLOG(UpdateOutputShape(output_desc)); @@ -665,6 +747,17 @@ Status AiCpuTask::LaunchKernel(const std::vector &input_desc, return SUCCESS; } +Status AiCpuTask::UpdateArgTable(const SingleOpModelParam ¶m) { + auto addresses = BuildTaskUtils::GetAddresses(op_desc_, param, false); + io_addr_host_ = BuildTaskUtils::JoinAddresses(addresses); + return SUCCESS; +} + +void AiCpuTask::GetIoAddr(uintptr_t *&arg_base, size_t &arg_count) { + arg_base = reinterpret_cast(io_addr_host_.data()); + arg_count = io_addr_host_.size(); +} + void AiCpuCCTask::SetKernelArgs(std::unique_ptr args, size_t arg_size) { args_ = std::move(args); arg_size_ = arg_size; @@ -676,9 +769,7 @@ void AiCpuCCTask::SetSoName(const std::string &so_name) { so_name_ = so_name; } void AiCpuCCTask::SetkernelName(const std::string &kernel_Name) { kernel_name_ = kernel_Name; } -void AiCpuCCTask::SetIoAddr(void *io_addr) { io_addr_ = io_addr; } - -const void *AiCpuCCTask::GetIOAddr() const { return io_addr_; } +void AiCpuCCTask::SetIoAddr(uintptr_t *io_addr) { io_addr_ = io_addr; } const void *AiCpuCCTask::GetArgs() const { return args_.get(); } @@ -701,12 +792,6 @@ Status AiCpuCCTask::LaunchKernel(rtStream_t stream) { return ret; } GELOGD("Invoke rtCpuKernelLaunch succeeded"); - - size_t input_size = op_desc_->GetInputsSize(); - size_t output_size = op_desc_->GetOutputsSize(); - uint64_t *io_addr = reinterpret_cast(io_addr_); - std::vector io_addrs (io_addr, io_addr + input_size + output_size); - SetIoAddrsForDump(io_addrs); auto status = OpenDump(stream); if (status != SUCCESS) { GELOGE(status, "Open dump failed in the aicpucc single op %s", this->kernel_name_.c_str()); @@ -721,24 +806,9 @@ Status AiCpuCCTask::LaunchKernel(const std::vector &input_desc, std::vector &output_desc, std::vector &output_buffers, rtStream_t stream) { - GE_CHK_BOOL_RET_STATUS(unknown_type_ != DEPEND_COMPUTE, FAILED, - "AiCpuCCTask unknown type[%d] is depend compute, it's not supported now.", - unknown_type_); - GE_CHK_STATUS_RET_NOLOG(UpdateExtInfo(input_desc, output_desc, stream)); - - size_t arg_index = 0; - auto *task_io_addr = reinterpret_cast(io_addr_); - GE_CHECK_NOTNULL(task_io_addr); - for (auto &input : input_buffers) { - task_io_addr[arg_index++] = reinterpret_cast(input.data); - } - for (auto &output : output_buffers) { - task_io_addr[arg_index++] = reinterpret_cast(output.data); - } - + GE_CHK_STATUS_RET_NOLOG(UpdateIoAddr(input_buffers, output_buffers)); GE_CHK_STATUS_RET_NOLOG(LaunchKernel(stream)); - if (unknown_type_ == DEPEND_SHAPE_RANGE) { GE_CHK_RT_RET(rtStreamSynchronize(stream)); GE_CHK_STATUS_RET_NOLOG(UpdateOutputShape(output_desc)); @@ -746,4 +816,9 @@ Status AiCpuCCTask::LaunchKernel(const std::vector &input_desc, return SUCCESS; } + +void AiCpuCCTask::GetIoAddr(uintptr_t *&arg_base, size_t &arg_count) { + arg_base = io_addr_; + arg_count = io_addr_num_; +} } // namespace ge diff --git a/ge/single_op/task/op_task.h b/ge/single_op/task/op_task.h index 65c77800..e2122b6f 100644 --- a/ge/single_op/task/op_task.h +++ b/ge/single_op/task/op_task.h @@ -32,64 +32,46 @@ #include "init/gelib.h" namespace ge { -enum OpTaskType { - OP_TASK_TBE = 0, - OP_TASK_AICPU, - OP_TASK_AICPUCC, - OP_TASK_INVALID, -}; - +class StreamResource; +struct SingleOpModelParam; class OpTask { public: OpTask() = default; virtual ~OpTask() = default; virtual Status LaunchKernel(rtStream_t stream) = 0; virtual Status UpdateRunInfo(const vector &input_desc, - const vector &output_desc) { - return UNSUPPORTED; - } - virtual Status LaunchKernel(const std::vector &inputs, - const std::vector &outputs, - const std::vector &workspaces, - rtStream_t stream) { - return UNSUPPORTED; - } - virtual OpTaskType GetOpTaskType() = 0; - virtual const void *GetIOAddr() const = 0; - const vector &GetWorkspaceSizes() const; - void SetWorkspaceSizes(const vector &workspace_sizes); + const vector &output_desc); + virtual Status UpdateArgTable(const SingleOpModelParam ¶m); + void SetModelArgs(std::string model_name, uint32_t model_id); + Status GetProfilingArgs(std::string &model_name, std::string &op_name, uint32_t &model_id, uint32_t &block_dim); const OpDescPtr &GetOpdesc() const {return op_desc_;} Status OpenDump(rtStream_t stream); - void SetIoAddrsForDump(const vector &io_addrs_for_dump) { - io_addrs_for_dump_ = io_addrs_for_dump; - } + virtual void GetIoAddr(uintptr_t *&arg_base, size_t &arg_count) = 0; virtual Status LaunchKernel(const std::vector &input_desc, const std::vector &input_buffers, std::vector &output_desc, std::vector &output_buffers, - rtStream_t stream) { - return UNSUPPORTED; - } + rtStream_t stream); - private: - std::vector workspace_sizes_; protected: DumpProperties dump_properties_; DumpOp dump_op_; OpDescPtr op_desc_; - std::vector io_addrs_for_dump_; + std::string model_name_; + uint32_t model_id_ = 0; + uint32_t block_dim_ = 1; }; class TbeOpTask : public OpTask { public: ~TbeOpTask() override; Status LaunchKernel(rtStream_t stream) override; - OpTaskType GetOpTaskType() override { - return OP_TASK_TBE; - } - const void *GetIOAddr() const override { - return nullptr; - } + Status LaunchKernel(const std::vector &input_desc, + const std::vector &input_buffers, + std::vector &output_desc, + std::vector &output_buffers, + rtStream_t stream) override; + void GetIoAddr(uintptr_t *&arg_base, size_t &arg_count) override; void SetSmDesc(void *sm_desc); void SetStubFunc(const std::string &name, const void *stub_func); void SetKernelArgs(std::unique_ptr &&args, size_t arg_size, uint32_t block_dim, const OpDescPtr &op_desc); @@ -97,31 +79,29 @@ class TbeOpTask : public OpTask { Status UpdateRunInfo(const vector &input_desc, const vector &output_desc) override; - Status LaunchKernel(const vector &inputs, - const vector &outputs, - const vector &workspaces, - rtStream_t stream) override; - const void *GetArgs() const; size_t GetArgSize() const; const std::string &GetStubName() const; void EnableDynamicSupport(const NodePtr &node, void *tiling_buffer, size_t max_tiling_size); private: + friend class SingleOpModel; static Status UpdateTensorDesc(const GeTensorDesc &src_tensor, GeTensorDesc &dst_tensor); Status UpdateNodeByShape(const vector &input_desc, const vector &output_desc); + Status AllocateWorkspaces(const std::vector &workspace_sizes); const void *stub_func_ = nullptr; std::unique_ptr args_; size_t arg_size_ = 0; - uint32_t block_dim_ = 1; void *sm_desc_ = nullptr; std::string stub_name_; + StreamResource *stream_resource_ = nullptr; void *tiling_buffer_ = nullptr; uint32_t max_tiling_size_ = 0; std::string tiling_data_; + std::vector workspaces_; NodePtr node_; }; @@ -129,9 +109,10 @@ class AiCpuBaseTask : public OpTask { public: AiCpuBaseTask() = default; ~AiCpuBaseTask() override; - const UnknowShapeOpType GetUnknownType() const { return unknown_type_; } + UnknowShapeOpType GetUnknownType() const { return unknown_type_; } protected: + Status UpdateIoAddr(const std::vector &inputs, const std::vector &outputs); Status SetExtInfoAndType(const std::string &kernel_ext_info, uint64_t kernel_id); Status UpdateExtInfo(const std::vector &input_desc, @@ -154,10 +135,8 @@ class AiCpuTask : public AiCpuBaseTask { ~AiCpuTask() override; Status LaunchKernel(rtStream_t stream) override; - OpTaskType GetOpTaskType() override { - return OP_TASK_AICPU; - } - const void *GetIOAddr() const override; + Status UpdateArgTable(const SingleOpModelParam ¶m) override; + void GetIoAddr(uintptr_t *&arg_base, size_t &arg_count) override; Status LaunchKernel(const std::vector &input_desc, const std::vector &input_buffers, @@ -167,8 +146,6 @@ class AiCpuTask : public AiCpuBaseTask { Status SetMemCopyTask(const domi::KernelExDef &kernel_def); private: - Status SetIO(const vector &inputs, vector &outputs); - // for copy task. Status InitForSummaryAndCopy(); Status UpdateShapeAndDataByResultSummary(vector &output_desc, @@ -184,27 +161,31 @@ class AiCpuTask : public AiCpuBaseTask { friend class AiCpuTaskBuilder; void *workspace_addr_ = nullptr; std::string task_info_; - // device addr + // device addr void *args_ = nullptr; size_t arg_size_ = 0; std::string op_type_; // device addr void *io_addr_ = nullptr; + size_t io_addr_size_ = 0; + + // host addr + std::vector io_addr_host_; bool dynamic_flag_ = false; // for copy task - void *copy_task_args_buf_; - void *copy_workspace_buf_; + void *copy_task_args_buf_ = nullptr; + void *copy_workspace_buf_ = nullptr; std::vector output_summary_; std::vector output_summary_host_; - void *copy_ioaddr_dev_; + void *copy_ioaddr_dev_ = nullptr; - void *copy_input_release_flag_dev_; - void *copy_input_data_size_dev_; - void *copy_input_src_dev_; - void *copy_input_dst_dev_; + void *copy_input_release_flag_dev_ = nullptr; + void *copy_input_data_size_dev_ = nullptr; + void *copy_input_src_dev_ = nullptr; + void *copy_input_dst_dev_ = nullptr; vector out_shape_hbm_; uint64_t kernel_id_ = 0; @@ -218,13 +199,12 @@ class AiCpuCCTask : public AiCpuBaseTask { AiCpuCCTask &operator=(const AiCpuCCTask &) = delete; Status LaunchKernel(rtStream_t stream) override; - OpTaskType GetOpTaskType() override { return OP_TASK_AICPUCC; } - const void *GetIOAddr() const override; + void GetIoAddr(uintptr_t *&arg_base, size_t &arg_count) override; const void *GetArgs() const; void SetKernelArgs(std::unique_ptr args, size_t arg_size); void SetSoName(const std::string &so_name); void SetkernelName(const std::string &kernel_Name); - void SetIoAddr(void *io_addr); + void SetIoAddr(uintptr_t *io_addr); size_t GetArgSize() const; Status LaunchKernel(const std::vector &input_desc, @@ -239,9 +219,9 @@ private: std::string kernel_name_; std::unique_ptr args_; size_t arg_size_ = 0; - uint32_t block_dim_ = 1; void *sm_desc_ = nullptr; - void *io_addr_ = nullptr; + uintptr_t *io_addr_ = nullptr; + size_t io_addr_num_ = 0; bool is_custom_ = false; uint32_t dump_flag_ = RT_KERNEL_DEFAULT; }; diff --git a/ge/single_op/task/tbe_task_builder.cc b/ge/single_op/task/tbe_task_builder.cc index e06a08c6..594352aa 100644 --- a/ge/single_op/task/tbe_task_builder.cc +++ b/ge/single_op/task/tbe_task_builder.cc @@ -173,7 +173,8 @@ Status TbeTaskBuilder::RegisterKernel(TbeOpTask &task, const SingleOpModelParam auto tbe_kernel = GetTbeKernel(op_desc_); if (tbe_kernel == nullptr) { - GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "OP EXT ATTR NAME TBE_KERNEL not found. op = %s", op_desc_->GetName().c_str()); + GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "OP EXT ATTR NAME TBE_KERNEL not found. op = %s", + op_desc_->GetName().c_str()); return ACL_ERROR_GE_INTERNAL_ERROR; } diff --git a/ge/stub/gen_stubapi.py b/ge/stub/gen_stubapi.py index f2a6a287..1476d505 100644 --- a/ge/stub/gen_stubapi.py +++ b/ge/stub/gen_stubapi.py @@ -1,3 +1,10 @@ +#!/usr/bin/python3.7 +# -*- coding: UTF-8 -*- +#------------------------------------------------------------------- +# Purpose: +# Copyright 2020 Huawei Technologies Co., Ltd. All rights reserved. +#------------------------------------------------------------------- + import os import re import sys @@ -64,7 +71,7 @@ max_code_len_per_line = 100 when DEBUG on """ white_list_for_debug = ["attr_value.h", "operator.h", "tensor.h", "graph.h", "operator_factory.h", - "ge_ir_build.h", "ge_api.h", "ge_prof.h", "tensorflow_parser.h", "caffe_parser.h"] + "ge_ir_build.h", "ge_api.h", "tensorflow_parser.h", "caffe_parser.h"] include_dir_key_words = ["ge", "graph", "parser"] DEBUG = True diff --git a/inc/external/acl/acl_base.h b/inc/external/acl/acl_base.h index debadcfd..c1341d59 100644 --- a/inc/external/acl/acl_base.h +++ b/inc/external/acl/acl_base.h @@ -223,6 +223,29 @@ ACL_FUNC_VISIBILITY aclDataBuffer *aclCreateDataBuffer(void *data, size_t size); */ ACL_FUNC_VISIBILITY aclError aclDestroyDataBuffer(const aclDataBuffer *dataBuffer); +/** + * @ingroup AscendCL + * @brief update new data of aclDataBuffer + * + * @param dataBuffer [OUT] pointer to aclDataBuffer + * @li The old data need to be released by the user, otherwise it may occur memory leak leakage + * call aclGetDataBufferAddr interface to get old data address + * call aclrtFree interface to release memory + * + * @param data [IN] pointer to new data + * @li Need to be managed by the user, + * call aclrtMalloc interface to apply for memory, + * call aclrtFree interface to release memory + * + * @param size [IN] size of data in bytes + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see aclrtMalloc | aclrtFree | aclGetDataBufferAddr + */ +ACL_FUNC_VISIBILITY aclError aclUpdateDataBuffer(aclDataBuffer *dataBuffer, void *data, size_t size); + /** * @ingroup AscendCL * @brief get data address from aclDataBuffer @@ -547,6 +570,19 @@ ACL_FUNC_VISIBILITY void *aclGetTensorDescAddress(const aclTensorDesc *desc); */ ACL_FUNC_VISIBILITY aclError aclSetTensorDynamicInput(aclTensorDesc *desc, const char *dynamicInputName); +/** + * @ingroup AscendCL + * @brief Set const data specified by the tensor description + * + * @param desc [OUT] pointer to the instance of aclTensorDesc + * @param dataBuffer [IN] pointer to the const databuffer + * @param length [IN] the length of const databuffer + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclSetTensorConst(aclTensorDesc *desc, void *dataBuffer, size_t length); + /** * @ingroup AscendCL * @brief an interface for users to output APP logs diff --git a/inc/external/acl/acl_prof.h b/inc/external/acl/acl_prof.h index bfb8a68b..65c55290 100644 --- a/inc/external/acl/acl_prof.h +++ b/inc/external/acl/acl_prof.h @@ -33,11 +33,11 @@ extern "C" { typedef enum { ACL_AICORE_ARITHMATIC_THROUGHPUT = 0, - ACL_AICORE_PIPELINE = 1, - ACL_AICORE_SYNCHRONIZATION = 2, - ACL_AICORE_MEMORY = 3, - ACL_AICORE_INTERNAL_MEMORY = 4, - ACL_AICORE_STALL = 5, + ACL_AICORE_ARITHMETIC_UTILIZATION = 0, + ACL_AICORE_PIPE_UTILIZATION = 1, + ACL_AICORE_MEMORY_BANDWIDTH = 2, + ACL_AICORE_L0B_AND_WIDTH = 3, + ACL_AICORE_RESOURCE_CONFLICT_RATIO = 4, ACL_AICORE_NONE = 0xFF } aclprofAicoreMetrics; @@ -290,6 +290,32 @@ ACL_FUNC_VISIBILITY uint64_t aclprofGetOpDuration(const void *opInfo, size_t opI */ ACL_FUNC_VISIBILITY size_t aclprofGetModelId(const void *opInfo, size_t opInfoLen, uint32_t index); +/** + * @ingroup AscendCL + * @brief get cube ops from subscription data + * + * @param opInfo [IN] pointer to subscription data + * @param opInfoLen [IN] memory size of subscription data + * @param index [IN] index of op array in opInfo + * + * @retval cube ops of subscription data + * @retval 0 for failed + */ +ACL_FUNC_VISIBILITY uint64_t aclprofGetOpCubeOps(const void *opInfo, size_t opInfoLen, uint32_t index); + +/** + * @ingroup AscendCL + * @brief get vector ops from subscription data + * + * @param opInfo [IN] pointer to subscription data + * @param opInfoLen [IN] memory size of subscription data + * @param index [IN] index of op array in opInfo + * + * @retval vector ops of subscription data + * @retval 0 for failed + */ +ACL_FUNC_VISIBILITY uint64_t aclprofGetOpVectorOps(const void *opInfo, size_t opInfoLen, uint32_t index); + #ifdef __cplusplus } #endif diff --git a/inc/external/acl/error_codes/rt_error_codes.h b/inc/external/acl/error_codes/rt_error_codes.h index 2dd2c70c..73d9564b 100644 --- a/inc/external/acl/error_codes/rt_error_codes.h +++ b/inc/external/acl/error_codes/rt_error_codes.h @@ -46,6 +46,7 @@ static const int32_t ACL_ERROR_RT_INVALID_MEMORY_TYPE = 107016; // invalid static const int32_t ACL_ERROR_RT_FEATURE_NOT_SUPPROT = 207000; // feature not support static const int32_t ACL_ERROR_RT_MEMORY_ALLOCATION = 207001; // memory allocation error static const int32_t ACL_ERROR_RT_MEMORY_FREE = 207002; // memory free error +static const int32_t ACL_ERROR_RT_AICORE_OVER_FLOW = 207003; // aicore over flow static const int32_t ACL_ERROR_RT_INTERNEL_ERROR = 507000; // runtime internel error static const int32_t ACL_ERROR_RT_TS_ERROR = 507001; // ts internel error diff --git a/inc/external/acl/ops/acl_dvpp.h b/inc/external/acl/ops/acl_dvpp.h index 32a21e91..1a0f582d 100644 --- a/inc/external/acl/ops/acl_dvpp.h +++ b/inc/external/acl/ops/acl_dvpp.h @@ -130,6 +130,23 @@ enum acldvppChannelMode { DVPP_CHNMODE_VPC = 1, DVPP_CHNMODE_JPEGD = 2, DVPP_CHN // Supported Border Type enum acldvppBorderType { BORDER_CONSTANT = 0, BORDER_REPLICATE, BORDER_REFLECT, BORDER_REFLECT_101 }; +// Venc parameter type +enum aclvencChannelDescParamType { + ACL_VENC_THREAD_ID_UINT64 = 0, + ACL_VENC_CALLBACK_PTR, + ACL_VENC_PIXEL_FORMAT_UINT32, + ACL_VENC_ENCODE_TYPE_UINT32, + ACL_VENC_PIC_WIDTH_UINT32, + ACL_VENC_PIC_HEIGHT_UINT32, + ACL_VENC_KEY_FRAME_INTERVAL_UINT32, + ACL_VENC_BUF_ADDR_PTR, + ACL_VENC_BUF_SIZE_UINT32, + ACL_VENC_RC_MODE_UINT32, + ACL_VENC_SRC_RATE_UINT32, + ACL_VENC_MAX_BITRATE_UINT32, + ACL_VENC_MAX_IP_PROP_UINT32 +}; + /** * @ingroup AscendCL * @brief alloc device memory for dvpp. @@ -1037,6 +1054,21 @@ ACL_FUNC_VISIBILITY aclError aclvencSetChannelDescSrcRate(aclvencChannelDesc *ch */ ACL_FUNC_VISIBILITY aclError aclvencSetChannelDescMaxBitRate(aclvencChannelDesc *channelDesc, uint32_t maxBitRate); +/** + * @ingroup AscendCL + * @brief Set venc parameter for venc channel desc. + * + * @param channelDesc [OUT] venc channel desc + * @param paramType [IN] parameter type + * @param length [IN] parameter length + * @param param [IN] pointer to parameter value + * + * @retval ACL_SUCCESS for success, other for failure + */ +ACL_FUNC_VISIBILITY aclError aclvencSetChannelDescParam(aclvencChannelDesc *channelDesc, + aclvencChannelDescParamType paramType, size_t length, + const void *param); + /** * @ingroup AscendCL * @brief Get output buffer address for venc channel desc. @@ -1170,6 +1202,23 @@ ACL_FUNC_VISIBILITY uint32_t aclvencGetChannelDescSrcRate(const aclvencChannelDe */ ACL_FUNC_VISIBILITY uint32_t aclvencGetChannelDescMaxBitRate(const aclvencChannelDesc *channelDesc); +/** + * @ingroup AscendCL + * + * @brief Get venc parameter for venc channel desc. + * + * @param channelDesc [IN] venc channel desc + * @param paramType [IN] parameter type + * @param length [IN] parameter length + * @param paramRetSize [OUT] pointer to parameter real length + * @param param [OUT] pointer to parameter value + * + * @retval ACL_SUCCESS for success, other for failure + */ +ACL_FUNC_VISIBILITY aclError aclvencGetChannelDescParam(const aclvencChannelDesc *channelDesc, + aclvencChannelDescParamType paramType, size_t length, + size_t *paramRetSize, void *param); + /** * @ingroup AscendCL * @brief get forced restart of I-frame interval from config diff --git a/inc/external/ge/ge_api_types.h b/inc/external/ge/ge_api_types.h index 374a816a..cce17f93 100644 --- a/inc/external/ge/ge_api_types.h +++ b/inc/external/ge/ge_api_types.h @@ -369,6 +369,7 @@ static const char *const OP_BANK_PATH = ge::OP_BANK_PATH_FLAG.c_str(); static const char *const OP_DEBUG_LEVEL = ge::OP_DEBUG_LEVEL.c_str(); // for interface: aclgrphBuildModel +#ifdef __GNUC__ const std::set ir_builder_suppported_options = {INPUT_FORMAT, INPUT_SHAPE, OP_NAME_MAP, @@ -424,6 +425,7 @@ const std::set global_options = {CORE_TYPE, DEBUG_DIR, OP_COMPILER_CACHE_DIR, OP_COMPILER_CACHE_MODE}; +#endif } // namespace ir_option } // namespace ge diff --git a/inc/external/ge/ge_ir_build.h b/inc/external/ge/ge_ir_build.h index 778ec21d..182c0444 100644 --- a/inc/external/ge/ge_ir_build.h +++ b/inc/external/ge/ge_ir_build.h @@ -24,9 +24,9 @@ #include "graph/ge_error_codes.h" namespace { -#define IR_MAJOR_VERSION (int(1)) -#define IR_MINOR_VERSION (int(0)) -#define IR_PATCH_VERSION (int(0)) +const int IR_MAJOR_VERSION = 1; +const int IR_MINOR_VERSION = 0; +const int IR_PATCH_VERSION = 0; } // namespace namespace ge { @@ -121,5 +121,20 @@ graphStatus aclgrphInferShapeAndType(ge::Graph &graph); * @retval OtherValues Failure */ graphStatus aclgrphDumpGraph(const ge::Graph &graph, const char *file, const size_t len); + +/** + * @ingroup AscendCL + * @brief create single op graph + * + * @param op_type[IN] the op_type + * @param inputs[IN] the inputdesc + * @param outputs[IN] the outputdesc + * @param graph[OUT] the graph + * @retval GRAPH_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +graphStatus aclgrphGenerateForOp(const AscendString &op_type, const std::vector &inputs, + const std::vector &outputs, Graph &graph); + }; // namespace ge #endif // INC_EXTERNAL_GE_IR_BUILD_H_ diff --git a/inc/external/ge/ge_prof.h b/inc/external/ge/ge_prof.h deleted file mode 100644 index 658cea76..00000000 --- a/inc/external/ge/ge_prof.h +++ /dev/null @@ -1,102 +0,0 @@ -/** - * Copyright 2019-2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef INC_EXTERNAL_GE_GE_PROF_H_ -#define INC_EXTERNAL_GE_GE_PROF_H_ - -#include -#include -#include - -#include "ge/ge_api_error_codes.h" - -namespace ge { -enum ProfDataTypeConfig { - kProfTaskTime = 0x0002, - kProfAiCoreMetrics = 0x0004, - kProfAicpuTrace = 0x0008, - kProfTrainingTrace = 0x0800, - kProfHcclTrace = 0x1000 -}; - -enum ProfilingAicoreMetrics { - kAicoreArithmaticThroughput = 0, - kAicorePipeline = 1, - kAicoreSynchronization = 2, - kAicoreMemory = 3, - kAicoreInternalMemory = 4, - kAicoreStall = 5 -}; - -typedef struct ProfAicoreEvents ProfAicoreEvents; -typedef struct aclgrphProfConfig aclgrphProfConfig; - -/// -/// @ingroup AscendCL -/// @brief Initialize the profiling and set profiling configuration path -/// @param [in] profiler_path: configuration path of profiling -/// @param [in] length: length of configuration path -/// @return Status result of function -/// -Status aclgrphProfInit(const char *profiler_path, uint32_t length); - -/// -/// @ingroup AscendCL -/// @brief Finalize profiling -/// @return Status result of function -/// -Status aclgrphProfFinalize(); - -/// -/// @ingroup AscendCL -/// @brief Create data of type aclgrphProfConfig -/// @param [in] deviceid_list: device id list -/// @param [in] device_nums: device numbers -/// @param [in] aicore_metrics: type of aicore metrics -/// @param [in] aicore_events: pointer to aicore events be reserved, only support NULL now -/// @param [in] data_type_config: modules need profiling -/// @return Status result of function -/// -aclgrphProfConfig *aclgrphProfCreateConfig(uint32_t *deviceid_list, uint32_t device_nums, - ProfilingAicoreMetrics aicore_metrics, ProfAicoreEvents *aicore_events, - uint64_t data_type_config); - -/// -/// @ingroup AscendCL -/// @brief Destroy data of type aclgrphProfConfig -/// @param [in] profiler_config: config of profiling -/// @return Status result of function -/// -Status aclgrphProfDestroyConfig(aclgrphProfConfig *profiler_config); - -/// -/// @ingroup AscendCL -/// @brief Start profiling of modules which is configured by profiler config -/// @param [in] profiler_config: config of profiling -/// @return Status result of function -/// -Status aclgrphProfStart(aclgrphProfConfig *profiler_config); - -/// -/// @ingroup AscendCL -/// @brief Stop profiling of modules which is configured by profiler config -/// @param [in] profiler_config: config of profiling -/// @return Status result of function -/// -Status aclgrphProfStop(aclgrphProfConfig *profiler_config); -} // namespace ge - -#endif // INC_EXTERNAL_GE_GE_PROF_H_ diff --git a/inc/external/runtime/rt_error_codes.h b/inc/external/runtime/rt_error_codes.h index 2dd2c70c..73d9564b 100644 --- a/inc/external/runtime/rt_error_codes.h +++ b/inc/external/runtime/rt_error_codes.h @@ -46,6 +46,7 @@ static const int32_t ACL_ERROR_RT_INVALID_MEMORY_TYPE = 107016; // invalid static const int32_t ACL_ERROR_RT_FEATURE_NOT_SUPPROT = 207000; // feature not support static const int32_t ACL_ERROR_RT_MEMORY_ALLOCATION = 207001; // memory allocation error static const int32_t ACL_ERROR_RT_MEMORY_FREE = 207002; // memory free error +static const int32_t ACL_ERROR_RT_AICORE_OVER_FLOW = 207003; // aicore over flow static const int32_t ACL_ERROR_RT_INTERNEL_ERROR = 507000; // runtime internel error static const int32_t ACL_ERROR_RT_TS_ERROR = 507001; // ts internel error diff --git a/inc/framework/common/fmk_error_codes.h b/inc/framework/common/fmk_error_codes.h index ec1f26d0..358fca04 100644 --- a/inc/framework/common/fmk_error_codes.h +++ b/inc/framework/common/fmk_error_codes.h @@ -23,10 +23,6 @@ #include "framework/common/fmk_types.h" #include "register/register_error_codes.h" -#define MODID_OMG 1 // OMG module ID -#define MODID_OME 2 // OME module ID -#define MODID_CALIBRATION 3 // Calibration module ID - // Each module uses the following four macros to define error codes: #define DECLARE_ERRORNO_OMG(name, value) DECLARE_ERRORNO(SYSID_FWK, MODID_OMG, name, value) #define DECLARE_ERRORNO_OME(name, value) DECLARE_ERRORNO(SYSID_FWK, MODID_OME, name, value) @@ -37,6 +33,10 @@ // Interface for Obtaining Error Code Description #define GET_ERRORNO_STR(value) domi::StatusFactory::Instance()->GetErrDesc(value) +const int MODID_OMG = 1; // OMG module ID +const int MODID_OME = 2; // OME module ID +const int MODID_CALIBRATION = 3; // Calibration module ID + namespace domi { class StatusFactory { public: diff --git a/inc/framework/common/helper/model_helper.h b/inc/framework/common/helper/model_helper.h index 949d8b4c..7867e63d 100644 --- a/inc/framework/common/helper/model_helper.h +++ b/inc/framework/common/helper/model_helper.h @@ -25,6 +25,7 @@ #include "common/types.h" #include "graph/model.h" #include "model/ge_model.h" +#include "model/ge_root_model.h" namespace ge { class ModelHelper { @@ -32,17 +33,22 @@ class ModelHelper { ModelHelper() = default; ~ModelHelper(); - Status SaveToOmModel(const GeModelPtr &ge_model, const SaveParam &save_param, - const std::string &output_file, ge::ModelBufferData &model); - Status SaveOriginalGraphToOmModel(const ge::Graph& graph, const std::string& output_file); + Status SaveToOmModel(const GeModelPtr &ge_model, const SaveParam &save_param, const std::string &output_file, + ge::ModelBufferData &model); + Status SaveToOmRootModel(const GeRootModelPtr &ge_root_model, const SaveParam &save_param, const string &output_file, + ModelBufferData &model, bool is_unknown_shape); + Status SaveOriginalGraphToOmModel(const ge::Graph &graph, const std::string &output_file); Status LoadModel(const ge::ModelData &model_data); - Status GetModelBufferData(ge::ModelBufferData& model); + Status LoadRootModel(const ge::ModelData &model_data); + Status GetModelBufferData(ge::ModelBufferData &model); - const ModelFileHeader* GetFileHeader() const { return file_header_; } + const ModelFileHeader *GetFileHeader() const { return file_header_; } GeModelPtr GetGeModel(); + GeRootModelPtr GetGeRootModel(); void SetSaveMode(bool val) { is_offline_ = val; } bool GetSaveMode(void) const { return is_offline_; } + bool GetModelType() const { return is_unknown_shape_model_; }; Status GetBaseNameFromFileName(const std::string &file_name, std::string &base_name); Status GetModelNameFromMergedGraphName(const std::string &graph_name, std::string &model_name); @@ -50,24 +56,46 @@ class ModelHelper { private: bool is_assign_model_ = false; bool is_offline_ = true; - ModelFileHeader* file_header_ = nullptr; + bool is_unknown_shape_model_ = false; + ModelFileHeader *file_header_ = nullptr; // Encrypted model need delete temp model and unencrypted model need not delete model uint8_t *model_addr_tmp_ = nullptr; uint32_t model_len_tmp_ = 0; GeModelPtr model_; + GeRootModelPtr root_model_; - ModelHelper(const ModelHelper&); - ModelHelper& operator=(const ModelHelper&); - Status GenerateGeModel(OmFileLoadHelper& om_load_helper); - Status LoadModelData(OmFileLoadHelper& om_load_helper); - void SetModelToGeModel(ge::Model& model); - Status LoadWeights(OmFileLoadHelper& om_load_helper); - Status LoadTask(OmFileLoadHelper& om_load_helper); - Status LoadTBEKernelStore(OmFileLoadHelper& om_load_helper); - Status LoadCustAICPUKernelStore(OmFileLoadHelper& om_load_helper); + ModelHelper(const ModelHelper &); + ModelHelper &operator=(const ModelHelper &); + Status GenerateGeModel(OmFileLoadHelper &om_load_helper); + Status GenerateGeRootModel(OmFileLoadHelper &om_load_helper); + Status LoadModelData(OmFileLoadHelper &om_load_helper); + void SetModelToGeModel(ge::Model &model); + Status LoadModelData(OmFileLoadHelper &om_load_helper, GeModelPtr &cur_model, size_t mode_index); + Status LoadWeights(OmFileLoadHelper &om_load_helper); + Status LoadWeights(OmFileLoadHelper &om_load_helper, GeModelPtr &cur_model, size_t mode_index); + Status LoadTask(OmFileLoadHelper &om_load_helper); + Status LoadTask(OmFileLoadHelper &om_load_helper, GeModelPtr &cur_model, size_t mode_index); + Status LoadTBEKernelStore(OmFileLoadHelper &om_load_helper); + Status LoadTBEKernelStore(OmFileLoadHelper &om_load_helper, GeModelPtr &cur_model, size_t mode_index); + Status LoadCustAICPUKernelStore(OmFileLoadHelper &om_load_helper); + Status LoadCustAICPUKernelStore(OmFileLoadHelper &om_load_helper, GeModelPtr &cur_model, size_t mode_index); Status ReleaseLocalModelData() noexcept; - Status SaveModelPartition(std::shared_ptr& om_file_save_helper, - ModelPartitionType type, const uint8_t* data, size_t size); + Status SaveModelPartition(std::shared_ptr &om_file_save_helper, ModelPartitionType type, + const uint8_t *data, size_t size, size_t model_index); + Status SaveModelDef(shared_ptr &om_file_save_helper, const GeModelPtr &ge_model, + Buffer &model_buffer, size_t model_index = 0); + Status SaveModelWeights(shared_ptr &om_file_save_helper, const GeModelPtr &ge_model, + size_t model_index = 0); + Status SaveModelTbeKernel(shared_ptr &om_file_save_helper, const GeModelPtr &ge_model, + size_t model_index = 0); + Status SaveModelCustAICPU(shared_ptr &om_file_save_helper, const GeModelPtr &ge_model, + size_t model_index = 0); + Status SaveModelTaskDef(shared_ptr &om_file_save_helper, const GeModelPtr &ge_model, + Buffer &task_buffer, size_t model_index = 0); + Status SaveModelHeader(shared_ptr &om_file_save_helper, const GeModelPtr &ge_model, + size_t model_num = 1); + Status SaveAllModelPartiton(shared_ptr &om_file_save_helper, const GeModelPtr &ge_model, + Buffer &model_buffer, Buffer &task_buffer, size_t model_index = 0); }; } // namespace ge #endif // INC_FRAMEWORK_COMMON_HELPER_MODEL_HELPER_H_ diff --git a/inc/framework/common/helper/om_file_helper.h b/inc/framework/common/helper/om_file_helper.h index 4ca54b50..98ad55d7 100644 --- a/inc/framework/common/helper/om_file_helper.h +++ b/inc/framework/common/helper/om_file_helper.h @@ -32,14 +32,14 @@ using std::vector; namespace ge { struct ModelPartition { ModelPartitionType type; - uint8_t* data = 0; + uint8_t *data = 0; uint32_t size = 0; }; struct OmFileContext { std::vector partition_datas_; std::vector partition_table_; - uint32_t model_data_len_; + uint32_t model_data_len_ = 0; }; struct SaveParam { @@ -57,15 +57,23 @@ class OmFileLoadHelper { Status Init(uint8_t *model_data, const uint32_t model_data_size); + Status Init(uint8_t *model_data, const uint32_t model_data_size, uint32_t model_num); + Status GetModelPartition(ModelPartitionType type, ModelPartition &partition); + Status GetModelPartition(ModelPartitionType type, ModelPartition &partition, size_t model_index); + OmFileContext context_; + vector model_contexts_; + private: Status CheckModelValid(const ge::ModelData &model) const; Status LoadModelPartitionTable(uint8_t *model_data, const uint32_t model_data_size); + Status LoadModelPartitionTable(uint8_t *model_data, const uint32_t model_data_size, uint32_t model_num); + bool is_inited_{false}; }; @@ -79,15 +87,23 @@ class OmFileSaveHelper { Status AddPartition(ModelPartition &partition); + Status AddPartition(ModelPartition &partition, size_t cur_index); + const std::vector &GetModelPartitions() const; - Status SaveModel(const SaveParam &save_param, const char *target_file, - ge::ModelBufferData& model, bool is_offline = true); + Status SaveModel(const SaveParam &save_param, const char *target_file, ge::ModelBufferData &model, + bool is_offline = true); Status SaveModelToFile(const char *output_file, ge::ModelBufferData &model, bool is_offline = true); + vector model_contexts_; + ModelFileHeader model_header_; OmFileContext context_; + + ModelPartitionTable *GetPartitionTable(size_t cur_ctx_index); + + Status SaveRootModel(const SaveParam &save_param, const char *output_file, ModelBufferData &model, bool is_offline); }; } // namespace ge #endif // INC_FRAMEWORK_COMMON_HELPER_OM_FILE_HELPER_H_ diff --git a/inc/framework/common/op/ge_op_utils.h b/inc/framework/common/op/ge_op_utils.h index 4718b180..aa50c8a1 100644 --- a/inc/framework/common/op/ge_op_utils.h +++ b/inc/framework/common/op/ge_op_utils.h @@ -17,7 +17,6 @@ #ifndef INC_FRAMEWORK_COMMON_OP_GE_OP_UTILS_H_ #define INC_FRAMEWORK_COMMON_OP_GE_OP_UTILS_H_ -#include #include #include @@ -32,7 +31,6 @@ #include "proto/insert_op.pb.h" namespace ge { -using namespace cce; using domi::Status; // Add Sub Mul @@ -76,18 +74,7 @@ class OpUtils { static inline bool CheckEnumValid(int32_t check_value, int32_t min_enum_value, int32_t max_enum_value) { return check_value < min_enum_value ? false : (check_value >= max_enum_value ? false : true); } - /// - /// @ingroup domi_omg - /// @brief Convert the dimension of array according to different format - /// @param [in] src_format src_shape format - /// @param [in] src Dimension array to be converted - /// @param [in] dst_format Target format after conversion - /// @param [out] dst Dimension array after conversion - /// @return SUCCESS success - /// @return FAILED fail - /// - static bool ConvertDim(ccTensorFormat_t src_format, const std::vector &src, ccTensorFormat_t dst_format, - std::vector &dst); + /// /// @ingroup domi_omg /// @brief Determine whether to manually calculate the tensor size based on the values of format and dim @@ -97,73 +84,6 @@ class OpUtils { /// @return false skip /// static bool IsComputDimsSize(const int32_t format, const uint32_t real_dim_cnt); - /// - /// @ingroup domi_ome - /// @brief Initialize the tensor description, which is used for input and output. - /// @param [in] model_tensor Tensor information defined by the offline model - /// @param [out] cc_tensor Tensor definition used by CC - /// @return SUCCESS success - /// @return FAILED fail - /// - static Status InitTensorDescriptor(const ge::GeTensorDesc &model_tensor, ccTensorDescriptor_t &cc_tensor); - /// - /// @ingroup domi_ome - /// @brief Initialize the tensor description, which is used for input and output. - /// @param [in] model_tensor Tensor information defined by the offline model - /// @param [in] dst_data_type data_type of the target cc_tensor - /// @param [out] cc_tensor Tensor definition used by CC - /// @return SUCCESS success - /// @return FAILED fail - /// - static Status InitTensorDescriptor(const ge::GeTensorDesc &model_tensor, int32_t dst_data_type, - ccTensorDescriptor_t &cc_tensor); - /// - /// @ingroup domi_ome - /// @brief Initialize the tensor description for bias. - /// @param [in] model_tensor Tensor information defined by the offline model - /// @param [out] cc_tensor Tensor definition used by CC - /// @return SUCCESS success - /// @return FAILED fail - /// - /// - static Status InitTensorDescriptor(const ge::GeTensor &model_tensor, ccTensorDescriptor_t &cc_tensor); - /// - /// @ingroup domi_ome - /// @brief Initialize the tensor description for bias. - /// @param [in] model_tensor Tensor information defined by the offline model - /// @param [in] dst_data_type data_type of the target cc_tensor - /// @param [out] cc_tensor Tensor definition used by CC - /// @return SUCCESS success - /// @return FAILED fail - /// - static Status InitTensorDescriptor(const ge::GeTensor &model_tensor, int32_t dst_data_type, - ccTensorDescriptor_t &cc_tensor); - - static Status InitTensorDescriptor(int32_t format, int32_t data_type, const std::vector &dim, - ccTensorDescriptor_t &cc_tensor, uint32_t real_dim_cnt = 4); - /// - /// @ingroup domi_ome - /// @brief Destroys a tensor - /// @param [inout] cc_tensor Tensor definition used by CC - /// - static void DestroyTensorDescriptor(ccTensorDescriptor_t &cc_tensor) noexcept; - - /// - /// @ingroup domi_ome - /// @brief Destroys a tensor - /// @param [inout] cc_filter cc_filter Definition of the filter used by CC - /// - static void DestroyFilterDescriptor(ccFilterDescriptor_t &cc_filter); - - /// - /// @ingroup domi_ome - /// @brief Initializing Filter Description - /// @param [in] model_filter Filter information defined in the offline model - /// @param [out] cc_filter Definition of the filter used by CC - /// @return SUCCESS success - /// @return FAILED fail - /// - static Status InitFilterDescriptor(const ge::GeTensor &model_filter, ccFilterDescriptor_t &cc_filter); /// /// @brief Extract AIPP parameters from AttrDefMap and splice them @@ -209,16 +129,7 @@ class OpUtils { /// @param [out] output Data pointer after conversion. The format is HWCK /// static void TransDataKCHW2HWCK(const void *input, int64_t K, int64_t C, int64_t H, int64_t W, void *output); - /// - /// @ingroup domi_omg - /// @brief Initialize the input and output description of the data node which is applied to filter weight in the - /// training network - /// @param [in] model_tensor input and output tensor information - /// @param [out] cc_tensor Tensor in CCE format after conversion - /// - static Status InitFilterTensorDescriptor(const ge::GeTensorDesc &model_tensor, ccFilterDescriptor_t &cc_tensor); - - static void SetTensorDescriptorAllOffsetQuantizeInfo(const GeTensorDesc &tensor, ccTensorDescriptor_t cc_tensor); + static vector GetWeights(const ge::Node &node); static vector GetWeights(ge::ConstNodePtr node); static vector MutableWeights(const ge::Node &node); @@ -228,69 +139,7 @@ class OpUtils { static Status GetShapeDataFromConstTensor(const ConstGeTensorPtr &tensor, DataType type, std::vector &dims); private: - friend class CceTensorDescriptor; static uint32_t GetRealDimCnt(const GeTensorDesc &tensor_desc); }; - -class CceTensorDescriptor; - -using CceTensorDescriptorPtr = std::shared_ptr; - -class CceTensorDescriptor { - public: - explicit CceTensorDescriptor(ccTensorDescriptor_t cc_tensor); - CceTensorDescriptor(const CceTensorDescriptor &) = delete; - CceTensorDescriptor &operator=(const CceTensorDescriptor &) = delete; - - ~CceTensorDescriptor(); - - ccTensorDescriptor_t GetPtr() { return cc_tensor_; } - - /// - /// @brief Initializes the tensor based on shape information. - /// @param[in] format data permutation format - /// @param[in] data_type Data Type - /// @param[in] dim dim information - /// @return return code - /// - Status InitTensor(int32_t format, int32_t data_type, const std::vector &dims); - - Status InitTensor(int32_t format, int32_t data_type, const ge::GeShape &shape); - - /// - /// @brief get format of tensor - /// @param[out] format format of the tensor - /// @return return code - /// - Status GetFormat(ccTensorFormat_t *format); - - /// - /// @brief Obtains the size of the tensor. - /// @param[out] size size of Tensor - /// @return return code - /// - Status GetTensorSizeInBytes(uint32_t *size); - - /// - /// @brief transform tensor between 4d(NCHW) and 5d(NC1HWC0) - /// @param [in] xDesc descriptor of input tensor - /// @param [in] x point to input data in host memory - /// @param [in] dataTypeTransmode mode of data type transform - /// @param [in] yDesc descriptor of output tensor - /// @param [in|out] y point to output data in host memory - /// @param [in] ySizeInBytes size of outputData - /// @return return code - /// - static Status TransTensor(const ccTensorDescriptor_t xDesc, const void *x, const CceTensorDescriptorPtr &yDesc, - void *y, uint32_t ySizeInBytes); - - /// - /// @brief CceTensorDescriptor Static Constructor - /// @return CceTensorDescriptor smart pointer - /// - static CceTensorDescriptorPtr Create(); - - ccTensorDescriptor_t cc_tensor_ = nullptr; -}; } // namespace ge #endif // INC_FRAMEWORK_COMMON_OP_GE_OP_UTILS_H_ diff --git a/inc/framework/common/op/op_parser_util.h b/inc/framework/common/op/op_parser_util.h index 49b4350a..43254ca9 100644 --- a/inc/framework/common/op/op_parser_util.h +++ b/inc/framework/common/op/op_parser_util.h @@ -17,7 +17,6 @@ #ifndef INC_FRAMEWORK_COMMON_OP_OP_PARSER_UTIL_H_ #define INC_FRAMEWORK_COMMON_OP_OP_PARSER_UTIL_H_ -#include #include #include #include @@ -31,10 +30,7 @@ const uint32_t NORMAL_OUTPUT_NUM = 1; const uint32_t NORMAL_WORKSPACE_NUM = 0; const int32_t NORMAL_1D_DIM_NUM = 1; const int32_t NORMAL_SCALE_DIM_NUM = 0; -const int NORMAL_TENSOR_FORMAT = static_cast(cce::CC_TENSOR_NC1HWC0); const int NORMAL_TENSOR_SIZE = 4; -const int NORMAL_DEVICE_DATA_TYPE = static_cast(cce::CC_DATA_HALF); -const int DEFAULT_POOLING_MODE = static_cast(cce::CC_POOLING_MAX); const uint32_t DEFAULT_REAL_DIM_CNT = 4; // const @@ -183,7 +179,6 @@ const int32_t SSD_DETECTIONOUTPUT_BACKGROUND_LABEL_ID_DEFAULT_VALUE = 0; const float SSD_DETECTIONOUTPUT_NMS_THRESHOLD_DEFAULT_VALUE = 0.3; const int32_t SSD_DETECTIONOUTPUT_TOP_K_DEFAULT_VALUE = 200; const float SSD_DETECTIONOUTPUT_ETA_DEFAULT_VALUE = 1.0; -const int SSD_DETECTIONOUTPUT_CODE_TYPE_DEFAULT_VALUE = static_cast(cce::CC_BOX_CENTER_SIZE); const int32_t SSD_DETECTIONOUTPUT_KEEP_TOP_K_DEFAULT_VALUE = 200; const bool SSD_DETECTIONOUTPUT_VARIANCE_ENCODED_IN_TARGET_DEFAULT_VALUE = false; const float SSD_DETECTIONOUTPUT_CONFIDENCE_THRESHOLD_DEFAULT_VALUE = 0.1; @@ -200,7 +195,6 @@ const float REFINEDET_DETECTIONOUTPUT_NMS_THRESHOLD_DEFAULT_VALUE = 0.3; const int32_t REFINEDET_DETECTIONOUTPUT_TOP_K_DEFAULT_VALUE = 200; const float REFINEDET_DETECTIONOUTPUT_ETA_DEFAULT_VALUE = 1.0; const bool REFINEDET_DETECTIONOUTPUT_VARIANCE_ENCODED_IN_TARGET_DEFAULT_VALUE = false; -const int REFINEDET_DETECTIONOUTPUT_CODE_TYPE_DEFAULT_VALUE = static_cast(cce::CC_BOX_CENTER_SIZE); const int32_t REFINEDET_DETECTIONOUTPUT_KEEP_TOP_K_DEFAULT_VALUE = 200; const float REFINEDET_DETECTIONOUTPUT_CONFIDENCE_THRESHOLD_DEFAULT_VALUE = 0.1; const float REFINEDET_DETECTIONOUTPUT_OBJECTNESS_SCORE_DEFAULT_VALUE = 0; diff --git a/inc/framework/common/profiling/ge_profiling.h b/inc/framework/common/profiling/ge_profiling.h new file mode 100644 index 00000000..e56411c9 --- /dev/null +++ b/inc/framework/common/profiling/ge_profiling.h @@ -0,0 +1,45 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef INC_FRAMEWORK_COMMON_GE_PROFILING_H_ +#define INC_FRAMEWORK_COMMON_GE_PROFILING_H_ + +#include "ge/ge_api_error_codes.h" +#include "toolchain/prof_callback.h" + +#define MAX_DEV_NUM (64) +enum ProfCommandHandleType { + kProfCommandhandleInit = 0, + kProfCommandhandleStart, + kProfCommandhandleStop, + kProfCommandhandleFinalize, + kProfCommandhandleModelSubscribe, + kProfCommandhandleModelUnsubscribe +}; + +struct ProfCommandHandleData { + uint64_t profSwitch; + uint32_t devNums; // length of device id list + uint32_t devIdList[MAX_DEV_NUM]; + uint32_t modelId; +}; + +ge::Status RegProfCtrlCallback(MsprofCtrlCallback func); +ge::Status RegProfSetDeviceCallback(MsprofSetDeviceCallback func); +ge::Status RegProfReporterCallback(MsprofReporterCallback func); +ge::Status ProfCommandHandle(ProfCommandHandleType type, void *data, uint32_t len); + +#endif // INC_FRAMEWORK_COMMON_GE_PROFILING_H_ diff --git a/inc/framework/common/profiling/ge_runner_profiling.h b/inc/framework/common/profiling/ge_runner_profiling.h new file mode 100644 index 00000000..d2eff767 --- /dev/null +++ b/inc/framework/common/profiling/ge_runner_profiling.h @@ -0,0 +1,24 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef INC_FRAMEWORK_COMMON_GE_RUNNER_PROFILING_H_ +#define INC_FRAMEWORK_COMMON_GE_RUNNER_PROFILING_H_ + +#include "profiling/ge_profiling.h" + +bool IsInitialize(); + +#endif // INC_FRAMEWORK_COMMON_GE_RUNNER_PROFILING_H_ diff --git a/inc/framework/common/taskdown_common.h b/inc/framework/common/taskdown_common.h new file mode 100644 index 00000000..090e7e26 --- /dev/null +++ b/inc/framework/common/taskdown_common.h @@ -0,0 +1,71 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef INC_FRAMEWORK_COMMON_TASKDOWN_COMMON_H_ +#define INC_FRAMEWORK_COMMON_TASKDOWN_COMMON_H_ + +#include "runtime/rt.h" + +namespace ge { + +const int CC_FUSION_OP_MAX = 32; + +typedef enum tagCcStatus { + CC_STATUS_SUCCESS = 0, /**< succ */ + CC_STATUS_NOT_INITIALIZED = 1, /**< not init */ + CC_STATUS_ALLOC_FAILED = 2, /**< alloc mem failed */ + CC_STATUS_BAD_PARAM = 3, /**< para check failed */ + CC_STATUS_INTERNAL_ERROR = 4, /**< internal error */ + CC_STATUS_KERNEL_ERROR = 5, /**< kernel error */ + CC_STATUS_RUNTIME_ERROR = 6, /**< runtime error */ + CC_STATUS_NOT_SUPPORTED = 7, /**< unsupport error */ + CC_STATUS_INVALID_VALUE = 7, /**< invalid value error for blas*/ + CC_STATUS_RESERVED /**< just for check */ +} ccStatus_t; + +typedef enum tagccKernelType { + CCE_AI_CORE = 0, /* cce aicore */ + CCE_AI_CPU = 1, /* cce aicpu */ + TE = 2, /* te operator*/ + CUSTOMIZED = 3, /* customized operator */ + TE_AI_CORE = 4, /* te aicore operator*/ + TE_AI_CPU = 5, /* te aicpu operator */ + AI_CPU = 6, /* aicpu */ + CUST_AI_CPU = 7, /* custom aicpu*/ + INVALID = 8, /* unknown kernel type */ +} ccKernelType; + +typedef struct tagOpContext { + ccKernelType kernelType; + uint32_t opId; + uint32_t kernelFuncId; + uint32_t opIndex; + uint32_t opCount; + uint32_t opIndex2[CC_FUSION_OP_MAX]; + bool isFlowtable; + uint16_t *argsOffset; + uint32_t argsCount; + uint64_t genDataBaseAddr; + uint64_t genDataBaseSize; + uint64_t genWeightBaseAddr; + uint64_t genWeightBaseSize; + uint64_t genVariableBaseAddr; + uint64_t genVariableBaseSize; + uint64_t l2ctrlSize; +} ccOpContext; +} // namespace ge + +#endif // INC_FRAMEWORK_COMMON_TASKDOWN_COMMON_H_ diff --git a/inc/framework/common/types.h b/inc/framework/common/types.h index 441d0757..99c2ea03 100644 --- a/inc/framework/common/types.h +++ b/inc/framework/common/types.h @@ -529,7 +529,7 @@ REGISTER_OPTYPE_DECLARE(HVDWAIT, "HorovodWait"); // aicpu op for online_infer dynamic_dims REGISTER_OPTYPE_DECLARE(GETDYNAMICDIMS, "GetDynamicDims"); -enum InputMode { INPUT = 0, CONST_INPUT}; +enum InputMode { INPUT = 0, CONST_INPUT }; // Definition of the processing status enum of the process module enum ModelProcessState { @@ -605,7 +605,7 @@ static constexpr uint32_t MODEL_FILE_CHECKSUM_LENGTH = 64; /// /// @brief length of the reserved field in the model file header /// -static constexpr uint32_t MODEL_FILE_RESERVED_LENGTH = 79; +static constexpr uint32_t MODEL_FILE_RESERVED_LENGTH = 75; /// /// @ingroup domi_omg @@ -843,9 +843,10 @@ struct ModelFileHeader { uint32_t ops = 0; // Computing power (Kops) uint8_t userdefineinfo[USER_DEFINE_INFO_LENGTH] = {0}; // User-defined information. The value contains 32 characters uint32_t om_ir_version = 0; + uint32_t model_num = 0; uint8_t platform_version[PLATFORM_VERSION_LEN] = {0}; uint8_t platform_type = {0}; - uint8_t reserved[MODEL_FILE_RESERVED_LENGTH] = {0}; // Reserved field 79 + uint8_t reserved[MODEL_FILE_RESERVED_LENGTH] = {0}; // Reserved field 75 }; static constexpr uint8_t TARGET_TYPE_LTTE_8BIT = 0; diff --git a/inc/framework/generator/ge_generator.h b/inc/framework/generator/ge_generator.h index c446b983..e0904965 100644 --- a/inc/framework/generator/ge_generator.h +++ b/inc/framework/generator/ge_generator.h @@ -74,11 +74,22 @@ class GeGenerator { /// @param [in] op_desc: the OP description. /// @param [in] inputs: input tensors. /// @param [in] outputs: output tensors. - /// @param [in] engine_type: specific engine. - /// @param [out] model_buff: model buff of single op. + /// @param [in] engine_type: engine type. + /// @param [out] model_buff: model buff of op. /// @return SUCCESS or FAILED Status BuildSingleOpModel(OpDescPtr &op_desc, const vector &inputs, const vector &outputs, OpEngineType engine_type, ModelBufferData &model_buff); + /// + /// @ingroup ge + /// @brief: Build single Op into model buff. + /// @param [in] op_desc: the OP description. + /// @param [in] inputs: input tensors. + /// @param [in] outputs: output tensors. + /// @param [in] graph_name: graph name. + /// @param [out] graph: graph of single op. + /// @return SUCCESS or FAILED + Status BuildSingleOpGraph(OpDescPtr &op_desc, const vector &inputs, const vector &outputs, + std::string graph_name, Graph &graph); private: Status GenerateModel(const Graph &graph, const string &file_name_prefix, const vector &inputs, diff --git a/metadef b/metadef index 4176fab0..dba83744 160000 --- a/metadef +++ b/metadef @@ -1 +1 @@ -Subproject commit 4176fab0cb2fd4f8794061916878983afb75c8da +Subproject commit dba83744a3ffe3d5f89496e69bb65c50f800c299 diff --git a/parser b/parser index 9e392045..ce574894 160000 --- a/parser +++ b/parser @@ -1 +1 @@ -Subproject commit 9e392045c26a57913b512d0686e1285650b62abe +Subproject commit ce574894f13cd94749d1a3964a13e8c97c20434a diff --git a/tests/depends/error_manager/src/error_manager_stub.cc b/tests/depends/error_manager/src/error_manager_stub.cc index 4f6b6b3d..edf5a487 100644 --- a/tests/depends/error_manager/src/error_manager_stub.cc +++ b/tests/depends/error_manager/src/error_manager_stub.cc @@ -58,7 +58,7 @@ /// @param [in] value: vector parameter value /// void ErrorManager::ATCReportErrMessage(std::string error_code, const std::vector &key, - const std::vector &value) { + const std::vector &value) { } /// diff --git a/tests/depends/hccl/src/hccl_stub.cc b/tests/depends/hccl/src/hccl_stub.cc index 1cc8fdb3..b9b9d4f6 100644 --- a/tests/depends/hccl/src/hccl_stub.cc +++ b/tests/depends/hccl/src/hccl_stub.cc @@ -19,26 +19,26 @@ #include "hccl/hcom.h" HcclResult hcom_all_gather(const char *tag, void *input_count_ptr, void *output_ptr, u64 input_count, - HcclDataType data_type, const char *group, rtStream_t stream) { + HcclDataType data_type, const char *group, rtStream_t stream) { return HCCL_SUCCESS; } HcclResult hcom_broadcast(const char *tag, void *ptr, u64 count, HcclDataType data_type, u32 root, - const char *group, rtStream_t stream) { + const char *group, rtStream_t stream) { return HCCL_SUCCESS; } HcclResult hcom_all_reduce(const char *tag, void *input_ptr, void *output_ptr, u64 count, HcclDataType data_type, - HcclReduceOp op, const char *group, rtStream_t stream) { + HcclReduceOp op, const char *group, rtStream_t stream) { return HCCL_SUCCESS; } HcclResult hcom_get_split_strategy(const char *group, const struct model_feature *feature, u32 max_segment_num, - u32 *segment_num, u32 *segment_idx) { + u32 *segment_num, u32 *segment_idx) { return HCCL_SUCCESS; } HcclResult hcom_reduce_scatter(const char *tag, void *input_ptr, void *output_ptr, u64 count, - HcclDataType data_type, HcclReduceOp op, const char *group, rtStream_t stream) { + HcclDataType data_type, HcclReduceOp op, const char *group, rtStream_t stream) { return HCCL_SUCCESS; } diff --git a/tests/depends/runtime/src/runtime_stub.cc b/tests/depends/runtime/src/runtime_stub.cc index 2ab6684d..75eefdd1 100644 --- a/tests/depends/runtime/src/runtime_stub.cc +++ b/tests/depends/runtime/src/runtime_stub.cc @@ -325,7 +325,7 @@ rtError_t rtSetTaskFailCallback(rtTaskFailCallback callback) } rtError_t rtMallocHostSharedMemory(rtMallocHostSharedMemoryIn *in, - rtMallocHostSharedMemoryOut *out) + rtMallocHostSharedMemoryOut *out) { out->ptr = new uint8_t[in->size]; out->devPtr = new uint8_t[in->size]; diff --git a/tests/st/CMakeLists.txt b/tests/st/CMakeLists.txt deleted file mode 100644 index 56babec1..00000000 --- a/tests/st/CMakeLists.txt +++ /dev/null @@ -1,42 +0,0 @@ -# Copyright 2019-2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ - -cmake_minimum_required(VERSION 3.0) -set(CMAKE_CXX_STANDARD 11) -project(ge_st CXX C) - -set(CMAKE_CXX_FLAGS "-O1 -fPIC -Wl,-unresolved-symbols=ignore-in-shared-libs") - - -file(GLOB_RECURSE RES50_TRAIN_SRCS RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} - "resnet50/resnet50_train.cc" - "resnet50/common.cc" -) - -include_directories(${GE_SOURCE_DIR}/inc) -include_directories(${GE_SOURCE_DIR}/inc/graph) -include_directories(${GE_SOURCE_DIR}/inc/framework) -include_directories(${GE_SOURCE_DIR}/inc/external) -include_directories(${GE_SOURCE_DIR}/inc/external/ge) -include_directories(${GE_SOURCE_DIR}/inc/external/graph) -include_directories(${GE_SOURCE_DIR}/third_party/fwkacllib/inc) -include_directories(${GE_SOURCE_DIR}/third_party/fwkacllib/inc/ops) -include_directories(/usr/local/HiAI/opp/op_proto/built-in/inc) - -add_executable(st_resnet50_train ${RES50_TRAIN_SRCS}) -target_link_libraries(st_resnet50_train - ${PROTOBUF_LIBRARY} - ge_client_train ge_memory -) \ No newline at end of file diff --git a/tests/st/resnet50/common.cc b/tests/st/resnet50/common.cc deleted file mode 100644 index 674ef926..00000000 --- a/tests/st/resnet50/common.cc +++ /dev/null @@ -1,768 +0,0 @@ -/** - * Copyright 2019-2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include -#include -#include -#include -#include - -#include "common.h" -#include "model.h" - -#define MAX_HEAD_SIZE 50 - -using namespace std; -using namespace ge; - -void update_op_format(Operator ops, Format format) { - printf("set format begin.........\n"); - ge::TensorDesc tensor_desc_x = ops.GetInputDesc("x"); - ge::TensorDesc tensor_desc_y = ops.GetOutputDesc("y"); - Format f_x0 = tensor_desc_x.GetFormat(); - Format f_y0 = tensor_desc_x.GetFormat(); - printf("before set x format:%d \n", f_x0); - printf("before set y format:%d \n", f_y0); - printf("format to be set is :%d \n", format); - tensor_desc_x.SetFormat(format); - tensor_desc_y.SetFormat(format); - ops.UpdateInputDesc("x", tensor_desc_x); - ops.UpdateOutputDesc("y", tensor_desc_y); - Format f_x = tensor_desc_x.GetFormat(); - Format f_y = tensor_desc_y.GetFormat(); - printf("after set x format:%d \n", f_x); - printf("after set y format:%d \n", f_y); -} - -/// getDimInfo: get dim info from data file -/// param: -/// fp: the testing datafile object -/// -/// return : -/// dim_info: array to store the info of the dim in datafile, like [4,3,3,6,3,162(3*3*6*3)],4 is dim size,3,3,6,3 is the -/// dim shape data_size: the size of the testing data including the data file -void getDimInfo(FILE *fp, std::vector &dim_info) { - // get dim info from hisi testing data file - uint32_t *dim_buffer = (uint32_t *)malloc(MAX_HEAD_SIZE * sizeof(uint32_t)); - fread(dim_buffer, sizeof(uint32_t), MAX_HEAD_SIZE, fp); - dim_info.push_back(*dim_buffer); // get dim size - - // get data shape to compute the datasize - uint64_t data_size = 1; - uint32_t i = 1; - for (; i <= dim_info[0]; i++) { - dim_info.push_back(*(dim_buffer + i)); - data_size *= *(dim_buffer + i); - } - dim_info.push_back(data_size); - - free(dim_buffer); -} - -/// readTestDataFile: read test date from hisi .t datafile -/// param: -/// infile: the path of hisi .t datafile -/// return: -/// dim_info: array to store the info of the dim in datafile, like [4,3,3,6,3],4 is dim size,3,3,6,3 is the dim shape -void *readTestDataFile(std::string infile, std::vector &dim_info) { - FILE *fp; - fp = fopen(infile.c_str(), "r"); - - if (fp == NULL) { - printf("ERROR: cant't open file %s\n", infile.c_str()); - return NULL; - } else { - getDimInfo(fp, dim_info); - uint64_t data_size = dim_info[dim_info.size() - 1]; - - fclose(fp); - - fp = fopen(infile.c_str(), "r"); - if (fp == NULL) { - printf("ERROR: cant't open file %s\n", infile.c_str()); - return NULL; - } - uint32_t *memory = (uint32_t *)malloc((dim_info[0] + 1 + data_size) * sizeof(uint32_t)); - fread(memory, sizeof(uint32_t), (dim_info[0] + 1 + data_size), fp); - fclose(fp); - return memory + (dim_info[0] + 1); - } -} - -void *readUint8TestDataFile(std::string infile, int size) { - FILE *fp; - fp = fopen(infile.c_str(), "r"); - - if (fp == NULL) { - printf("ERROR: cant't open file %s\n", infile.c_str()); - return NULL; - } - uint8_t *memory = (uint8_t *)malloc((size) * sizeof(uint8_t)); - fread(memory, sizeof(uint8_t), (size), fp); - fclose(fp); - return memory; -} - -/// allclose -/// param: -/// a:compared file a -/// b:compared file b -/// count: the count size which will compare -/// rtol: -/// atol: -/// return: -/// true or false -bool allclose(float *a, float *b, uint64_t count, float rtol = 1e-05, float atol = 1e-08) { - uint32_t i = 0; - - for (; i < count; ++i) { - if (fabs(a[i] - b[i]) > (atol + rtol * fabs(b[i]))) { - printf("compara failed: i= %d, a[i]=%f, b[i]=%f,atol=%f,rtol=%f\n", i, a[i], b[i], atol, rtol); - return false; - } - } - - return true; -} - -/// compFp32WithTData: compare the data with the data in hisi .t file -/// param: -/// actual_output_data: the result of ge -/// expected_data_file: the path of hisi .t result file -/// rtol: -/// atol: -/// return: -/// true of false -bool compFp32WithTData(float *actual_output_data, std::string expected_data_file, float rtol = 1e-05, float atol = 1e-08) { - std::vector dim_info; - float *expected_output_data = (float *)readTestDataFile(expected_data_file, dim_info); - - uint32_t i = 1; - uint64_t data_size = 1; - for (; i <= dim_info[0]; i++) { - data_size *= dim_info[i]; - } - return allclose(actual_output_data, expected_output_data, data_size, rtol, atol); -} - -int SwitchDatatype(DataType dt) { - int size = 1; - if (dt == ge::DT_FLOAT) size = 4; - if (dt == ge::DT_INT32) size = 4; - if (dt == ge::DT_FLOAT16) size = 2; - if (dt == ge::DT_INT64) size = 8; - return size; -} - -ge::Tensor genTensor(std::vector tensor_shape, Format format, DataType dt) { - int size = 1; - for (int i = 0; i < tensor_shape.size(); i++) { - size = size * tensor_shape[i]; - } - - int data_type_size = SwitchDatatype(dt); - - size = abs(size * data_type_size); - vector data_value; - - if (size == 0) { - TensorDesc input_tensor_desc = TensorDesc(ge::Shape(tensor_shape), format, dt); - input_tensor_desc.SetRealDimCnt(tensor_shape.size()); - Tensor gen_tensor = Tensor(input_tensor_desc, data_value); - return gen_tensor; - } - for (int i = 0; i < size; i++) { - data_value.push_back(1); - } - TensorDesc input_tensor_desc = TensorDesc(ge::Shape(tensor_shape), format, dt); - input_tensor_desc.SetRealDimCnt(tensor_shape.size()); - Tensor gen_tensor = Tensor(input_tensor_desc, data_value); - return gen_tensor; -} - -ge::Tensor genTensor_withVaule(std::vector tensor_shape, float value) { - int size = 1; - for (int i = 0; i < tensor_shape.size(); i++) { - size = size * tensor_shape[i]; - } - - float *data_value = new float[size]; - for (int i = 0; i < size; i++) { - *(data_value + i) = value; - } - Tensor gen_ge_tensor; - TensorDesc input_tensor_desc = TensorDesc(ge::Shape(tensor_shape), FORMAT_NCHW); - gen_ge_tensor.SetTensorDesc(input_tensor_desc); - gen_ge_tensor.SetData((uint8_t *)data_value, size * 4); - - return gen_ge_tensor; -} - -Tensor genTesnor_Shape_as_data(std::vector tensor_shape) { - Format format = FORMAT_NCHW; - DataType dt = DT_INT32; - int size = tensor_shape.size(); - int32_t *tensor_data = new int32_t[size]; - std::cout << "shape tensor size:" << size << endl; - for (int i = 0; i < size; i++) { - *(tensor_data + i) = tensor_shape[i]; - } - - Tensor gen_tensor; - TensorDesc input_tensor_desc = TensorDesc(ge::Shape({size}), FORMAT_NCHW, DT_INT32); - gen_tensor.SetData((uint8_t *)tensor_data, size * GetDatTypeSize(dt)); - gen_tensor.SetTensorDesc(input_tensor_desc); - - return gen_tensor; -} - -/// train_flag is 0 when infer; train_flag is 1 when train; train_flag is 0 default -/// run_mode_path is not 0,1,2 when TBE; run_mode_path is 1 when FE; run_mode_path is 0 default -/// run_mode_path is 2 now when AICPU, ge.enabledlocalFmkop is 1 -ge::Status GEInitialize_api(string train_flag, string run_mode_path) { - ge::Status ret; - if (run_mode_path == "0") { - const std::map config = { - {"device_id", "0,2,4,6"}, - {"rank_table_file", "hccl from csa/paas"}, - {"ge.graphRunMode", train_flag}, - {"ge.aicpuFlag", "1"}, - {"ge.feFlag", "1"}, - {DDK_VERSION_FLAG, "1.60.T17.B830"}, - {"ge.soLoadPath", - "/usr/local/HiAI/runtime/lib64/plugin/opskernel/libfe.so:/usr/local/HiAI/runtime/lib64/plugin/opskernel/" - "libaicpu_plugin.so"}}; - ret = ge::GEInitialize(config); - } else if (run_mode_path == "1") { - const std::map config = { - {"device_id", "0,2,4,6"}, - {"rank_table_file", "hccl from csa/paas"}, - {"ge.graphRunMode", train_flag}, - {"ge.feFlag", "1"}, - {DDK_VERSION_FLAG, "1.60.T17.B830"}, - {TBE_PLUGIN_PATH_FLAG, "/usr/local/HiAI/runtime/lib64/tbe_plugin/bert"}, - {"ge.soLoadPath", "/usr/local/HiAI/runtime/lib64/plugin/opskernel/libfe.so"}}; - ret = ge::GEInitialize(config); - } else if (run_mode_path == "2") { - const std::map config = {{"device_id", "0,2,4,6"}, - {"rank_table_file", "hccl from csa/paas"}, - {"ge.graphRunMode", train_flag}, - {LOCAL_FMKOP_FLAG, "1"}}; - ret = ge::GEInitialize(config); - } else { - const std::map config = { - {"device_id", "0,2,4,6"}, - {"rank_table_file", "hccl from csa/paas"}, - {"ge.graphRunMode", train_flag}, - {DDK_VERSION_FLAG, "1.60.T17.B830"}, - {TBE_PLUGIN_PATH_FLAG, "/usr/local/HiAI/runtime/lib64/tbe_plugin/" + run_mode_path}}; - ret = ge::GEInitialize(config); - } - std::cout << "GEInitialize_ret is " << ret << std::endl; - - return ret; -} - -/// train_flag is infer default -/// run_mode: is multi group of [fe,aicpu,bert,deeplabv3,mobilenetv2,single_path_nas,ssd] -/// but bert,deeplabv3,mobilenetv2,single_path_nas,ssd can only set one value from array -/// eg:"fe,aicpu,bert" or "fe", default is “fe” -/// "fe,aicpu,bert" remain open fe aicpu and bert -ge::Status GEInitialize_api_new(string train_flag, string run_mode) { - ge::Status ret; - vector modes; - - char *strs = new char[run_mode.length() + 1]; - strcpy(strs, run_mode.c_str()); - const char *delim = ","; - char *p = strtok(strs, delim); - while (p) { - string s = p; // transform substr to string - modes.push_back(s); // save to result array - p = strtok(NULL, delim); - } - - std::map config = { - {"device_id", "0,2,4,6"}, - {"rank_table_file", "hccl from csa/paas"}, - {DDK_VERSION_FLAG, "1.60.T17.B830"}, - {"ge.opsProtoLibPath", "/usr/local/HiAI/runtime/ops/op_proto/built-in/libopsproto.so"}}; - if (train_flag == "infer") - config.insert(pair("ge.graphRunMode", "0")); - else if (train_flag == "train") - config.insert(pair("ge.graphRunMode", "1")); - else - std::cout << "GeInitialize give the error param" << std::endl; - - for (int i = 0; i < modes.size(); i++) { - if (modes[i] == "fe") { - config.insert(pair("ge.feFlag", "1")); - if (config.find("ge.soLoadPath") != config.end()) { - config["ge.soLoadPath"] = - "/usr/local/HiAI/runtime/lib64/plugin/opskernel/libfe.so:/usr/local/HiAI/runtime/lib64/plugin/opskernel/" - "libaicpu_plugin.so:/usr/local/HiAI/runtime/lib64/plugin/opskernel/libge_local_engine.so:/usr/local/HiAI/" - "runtime/lib64/plugin/opskernel/librts_engine.so"; - } else { - config.insert(pair( - "ge.soLoadPath", - "/usr/local/HiAI/runtime/lib64/plugin/opskernel/libfe.so:/usr/local/HiAI/runtime/lib64/plugin/opskernel/" - "libge_local_engine.so:/usr/local/HiAI/runtime/lib64/plugin/opskernel/librts_engine.so")); - } - } else if (modes[i] == "aicpu") { - config.insert(pair("ge.aicpuFlag", "1")); - if (config.find("ge.soLoadPath") != config.end()) { - config["ge.soLoadPath"] = - "/usr/local/HiAI/runtime/lib64/plugin/opskernel/libfe.so:/usr/local/HiAI/runtime/lib64/plugin/opskernel/" - "libaicpu_plugin.so:/usr/local/HiAI/runtime/lib64/plugin/opskernel/libge_local_engine.so:/usr/local/HiAI/" - "runtime/lib64/plugin/opskernel/librts_engine.so"; - } else { - config.insert(pair( - "ge.soLoadPath", - "/usr/local/HiAI/runtime/lib64/plugin/opskernel/libaicpu_plugin.so:/usr/local/HiAI/runtime/lib64/plugin/" - "opskernel/libge_local_engine.so:/usr/local/HiAI/runtime/lib64/plugin/opskernel/librts_engine.so")); - } - } else if (modes[i] == "bert" || modes[i] == "deeplabv3" || modes[i] == "mobilenetv2" || - modes[i] == "single_path_nas" || modes[i] == "ssd") { - config.insert(pair(TBE_PLUGIN_PATH_FLAG, "/usr/local/HiAI/runtime/lib64/tbe_plugin/" + modes[i])); - } else if (modes[i] == "plugin") { - - } else - std::cout << "GeInitialize give the error param" << std::endl; - } - ret = ge::GEInitialize(config); - - std::cout << "GEInitialize_ret is " << ret << std::endl; - - return ret; -} - -ge::Status GEFinalize_api() { - ge::Status ret = ge::GEFinalize(); - std::cout << "GEFinalize ret is " << ret << std::endl; - - return ret; -} - -/// set train_flag -/// if run_mode_path is "fe" remain FE process; "fe,plugin" is FE and TBE plugin process -/// "aicpu" is open aicpu plugin -int RunGraph_initData(Graph &graph, string op_name, map> attr_test, string train_flag, - string run_mode_path) { - std::map options = {{RUN_FLAG, "1"}}; - uint32_t graph_id = 0; - - ge::Status ret = GEInitialize_api_new(train_flag, run_mode_path); - EXPECT_EQ(ret, ge::SUCCESS); - - ge::Session *session = new Session(options); - ASSERT_TRUE(session != NULL); - - std::vector input; - if (attr_test.find("input1") != attr_test.end()) { - Tensor input_tensor = genTensor(attr_test["input1"]); - input.push_back(input_tensor); - } - if (attr_test.find("input2") != attr_test.end()) { - Tensor input_tensor = genTensor(attr_test["input2"]); - input.push_back(input_tensor); - } - if (attr_test.find("input3") != attr_test.end()) { - Tensor input_tensor = genTensor(attr_test["input3"]); - input.push_back(input_tensor); - } - std::vector output; - - ret = session->AddGraph(graph_id, graph); - EXPECT_EQ(ret, ge::SUCCESS); - if (train_flag == "1") { - setenv("GE_TRAIN", "1", true); - ret = session->RunGraph(graph_id, input, output); - setenv("GE_TRAIN", "0", true); - } else { - ret = session->RunGraph(graph_id, input, output); - } - delete session; - GEFinalize_api(); - - if (ret != ge::SUCCESS) { - std::cout << " run graph failed" << std::endl; - return -1; - } else { - return 0; - } -} - -ge::Status session_add_and_run_graph(ge::Session *session, uint32_t graph_id, Graph &graph, std::vector inputs, - std::vector &outputs) { - ge::Status ret = session->AddGraph(graph_id, graph); - EXPECT_EQ(ret, ge::SUCCESS); - ret = session->RunGraph(graph_id, inputs, outputs); - - return ret; -} - -ge::Session *create_session() { - // Init session - std::map options = {{"a", "b"}, {TRAIN_FLAG, "1"}}; - ge::Session *session = new Session(options); - ASSERT_TRUE(session != NULL); - - return session; -} - -ge::Session *create_aipp_session() { - // Init session - std::map options = {{"a", "b"}, {TRAIN_FLAG, "1"}, {"ge.insertOpFile", "/root/host/ge/aipp.cfg"}}; - ge::Session *session = new Session(options); - ASSERT_TRUE(session != NULL); - - return session; -} - -int buildCheckPointGraph(Graph &graph, map variables) { - std::vector inputs{}; - std::vector outputs{}; - - for (map::iterator it = variables.begin(); it != variables.end(); ++it) { - auto var = op::Variable(string(it->first)); - var.update_output_desc_y(it->second); - inputs.push_back(var); - graph.AddOp(var); - } - - auto save = op::Save().create_dynamic_input_tensors(inputs.size()); - for (int i = 0; i < inputs.size(); i++) { - save.set_dynamic_input_tensors(i, inputs[i]); - } - - graph.SetInputs(inputs).SetOutputs(outputs); - return 0; -} - -int buildInitGraph(Graph &graph, std::vector desc_var, std::vector name_var, - std::vector values_var) { - std::vector inputs{}; - std::vector outputs{}; - - for (int i = 0; i < desc_var.size(); i++) { - desc_var[i].SetRealDimCnt(desc_var[i].GetShape().GetDimNum()); - auto tensor_data = genTensor_withVaule(desc_var[i].GetShape().GetDims(), values_var[i]); - auto var_constant = op::Constant().set_attr_value(tensor_data); - var_constant.update_output_desc_y(desc_var[i]); - - auto var_init = op::Variable(string(name_var[i])); - var_init.update_output_desc_y(desc_var[i]); - auto var_assign = op::Assign().set_input_ref(var_init).set_input_value(var_constant); - inputs.push_back(var_init); - } - graph.SetInputs(inputs).SetOutputs(outputs); - return 0; -} - -int buildInitGraph_other_dataType(Graph &graph, std::vector desc_var, std::vector name_var) { - std::vector inputs{}; - std::vector outputs{}; - - for (int i = 0; i < desc_var.size(); i++) { - desc_var[i].SetRealDimCnt(desc_var[i].GetShape().GetDimNum()); - auto tensor_data = genTensor(desc_var[i].GetShape().GetDims(), desc_var[i].GetFormat(), desc_var[i].GetDataType()); - auto var_constant = op::Constant().set_attr_value(tensor_data); - var_constant.update_output_desc_y(desc_var[i]); - - auto var_init = op::Variable(string(name_var[i])); - var_init.update_output_desc_y(desc_var[i]); - auto var_assign = op::Assign().set_input_ref(var_init).set_input_value(var_constant); - inputs.push_back(var_init); - - graph.AddOp(var_constant); - graph.AddOp(var_init); - graph.AddOp(var_assign); - } - graph.SetInputs(inputs).SetOutputs(outputs); - return 0; -} - -bool build_multi_input_multi_output_graph(Graph &graph) { - auto data1 = op::Data("Data1").set_attr_index(0); - auto data2 = op::Data("Data2").set_attr_index(1); - - vector dim_info; - - auto relu1 = op::Relu("Relu1").set_input_x(data1); - auto relu2 = op::Relu("Relu2").set_input_x(data2); - - auto eltwise = op::Eltwise("Eltwise") - .create_dynamic_input_x(2) - .set_dynamic_input_x(0, relu1) - .set_dynamic_input_x(1, relu2) - .set_attr_N(2) - .set_attr_mode(1) - .set_attr_coeff({1, 1}); - - auto eltwise1 = op::Eltwise("Eltwise1") - .create_dynamic_input_x(2) - .set_dynamic_input_x(0, eltwise) - .set_dynamic_input_x(1, eltwise) - .set_attr_N(2) - .set_attr_mode(1) - .set_attr_coeff({1, 1}); - - auto eltwise2 = op::Eltwise("Eltwise2") - .create_dynamic_input_x(2) - .set_dynamic_input_x(0, eltwise) - .set_dynamic_input_x(1, eltwise) - .set_attr_N(2) - .set_attr_mode(1) - .set_attr_coeff({1, 1}); - - std::vector inputs{data1, data2}; - std::vector outputs{eltwise1, eltwise2}; - graph.SetInputs(inputs).SetOutputs(outputs); - return true; -} - -void build_big_graph(Graph &graph, map> attr) { - auto data = op::Data("Data").set_attr_index(0); - auto weight = op::Const("weight1").set_attr_value(genTensor(attr["weight"])); - vector weight_shape(attr["weight"].begin(), attr["weight"].end()); - TensorDesc weight_desc(ge::Shape(weight_shape), FORMAT_NCHW, DT_FLOAT); - weight.update_output_desc_y(weight_desc); - auto conv_1 = op::Conv2D("conv1").set_input_x(data).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - - auto conv_2 = op::Conv2D("conv2").set_input_x(conv_1).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_3 = op::Conv2D("conv3").set_input_x(conv_2).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_4 = op::Conv2D("conv4").set_input_x(conv_3).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_5 = op::Conv2D("conv5").set_input_x(conv_4).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_6 = op::Conv2D("conv6").set_input_x(conv_5).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_7 = op::Conv2D("conv7").set_input_x(conv_6).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_8 = op::Conv2D("conv8").set_input_x(conv_7).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_9 = op::Conv2D("conv9").set_input_x(conv_8).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_10 = op::Conv2D("conv10").set_input_x(conv_9).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_11 = op::Conv2D("conv11").set_input_x(conv_10).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_12 = op::Conv2D("conv12").set_input_x(conv_11).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_13 = op::Conv2D("conv13").set_input_x(conv_12).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_14 = op::Conv2D("conv14").set_input_x(conv_13).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_15 = op::Conv2D("conv15").set_input_x(conv_14).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_16 = op::Conv2D("conv16").set_input_x(conv_15).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_17 = op::Conv2D("conv17").set_input_x(conv_16).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_18 = op::Conv2D("conv18").set_input_x(conv_17).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_19 = op::Conv2D("conv19").set_input_x(conv_18).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_20 = op::Conv2D("conv20").set_input_x(conv_19).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_21 = op::Conv2D("conv21").set_input_x(conv_20).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_22 = op::Conv2D("conv22").set_input_x(conv_21).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_23 = op::Conv2D("conv23").set_input_x(conv_22).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_24 = op::Conv2D("conv24").set_input_x(conv_23).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_25 = op::Conv2D("conv25").set_input_x(conv_24).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_26 = op::Conv2D("conv26").set_input_x(conv_25).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_27 = op::Conv2D("conv27").set_input_x(conv_26).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_28 = op::Conv2D("conv28").set_input_x(conv_27).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_29 = op::Conv2D("conv29").set_input_x(conv_28).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_30 = op::Conv2D("conv30").set_input_x(conv_29).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_31 = op::Conv2D("conv31").set_input_x(conv_30).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_32 = op::Conv2D("conv32").set_input_x(conv_31).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_33 = op::Conv2D("conv33").set_input_x(conv_32).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_34 = op::Conv2D("conv34").set_input_x(conv_33).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_35 = op::Conv2D("conv35").set_input_x(conv_34).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_36 = op::Conv2D("conv36").set_input_x(conv_35).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_37 = op::Conv2D("conv37").set_input_x(conv_36).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_38 = op::Conv2D("conv38").set_input_x(conv_37).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_39 = op::Conv2D("conv39").set_input_x(conv_38).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_40 = op::Conv2D("conv40").set_input_x(conv_39).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_41 = op::Conv2D("conv41").set_input_x(conv_40).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_42 = op::Conv2D("conv42").set_input_x(conv_41).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_43 = op::Conv2D("conv43").set_input_x(conv_42).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_44 = op::Conv2D("conv44").set_input_x(conv_43).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_45 = op::Conv2D("conv45").set_input_x(conv_44).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_46 = op::Conv2D("conv46").set_input_x(conv_45).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_47 = op::Conv2D("conv47").set_input_x(conv_46).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_48 = op::Conv2D("conv48").set_input_x(conv_47).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_49 = op::Conv2D("conv49").set_input_x(conv_48).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_50 = op::Conv2D("conv50").set_input_x(conv_49).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_51 = op::Conv2D("conv51").set_input_x(conv_50).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_52 = op::Conv2D("conv52").set_input_x(conv_51).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_53 = op::Conv2D("conv53").set_input_x(conv_52).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_54 = op::Conv2D("conv54").set_input_x(conv_53).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_55 = op::Conv2D("conv55").set_input_x(conv_54).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_56 = op::Conv2D("conv56").set_input_x(conv_55).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_57 = op::Conv2D("conv57").set_input_x(conv_56).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_58 = op::Conv2D("conv58").set_input_x(conv_57).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_59 = op::Conv2D("conv59").set_input_x(conv_58).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_60 = op::Conv2D("conv60").set_input_x(conv_59).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_61 = op::Conv2D("conv61").set_input_x(conv_60).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_62 = op::Conv2D("conv62").set_input_x(conv_61).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_63 = op::Conv2D("conv63").set_input_x(conv_62).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_64 = op::Conv2D("conv64").set_input_x(conv_63).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_65 = op::Conv2D("conv65").set_input_x(conv_64).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_66 = op::Conv2D("conv66").set_input_x(conv_65).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_67 = op::Conv2D("conv67").set_input_x(conv_66).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_68 = op::Conv2D("conv68").set_input_x(conv_67).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_69 = op::Conv2D("conv69").set_input_x(conv_68).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_70 = op::Conv2D("conv70").set_input_x(conv_69).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_71 = op::Conv2D("conv71").set_input_x(conv_70).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_72 = op::Conv2D("conv72").set_input_x(conv_71).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_73 = op::Conv2D("conv73").set_input_x(conv_72).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_74 = op::Conv2D("conv74").set_input_x(conv_73).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_75 = op::Conv2D("conv75").set_input_x(conv_74).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_76 = op::Conv2D("conv76").set_input_x(conv_75).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_77 = op::Conv2D("conv77").set_input_x(conv_76).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_78 = op::Conv2D("conv78").set_input_x(conv_77).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_79 = op::Conv2D("conv79").set_input_x(conv_78).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_80 = op::Conv2D("conv80").set_input_x(conv_79).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_81 = op::Conv2D("conv81").set_input_x(conv_80).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_82 = op::Conv2D("conv82").set_input_x(conv_81).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_83 = op::Conv2D("conv83").set_input_x(conv_82).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_84 = op::Conv2D("conv84").set_input_x(conv_83).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_85 = op::Conv2D("conv85").set_input_x(conv_84).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_86 = op::Conv2D("conv86").set_input_x(conv_85).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_87 = op::Conv2D("conv87").set_input_x(conv_86).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_88 = op::Conv2D("conv88").set_input_x(conv_87).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_89 = op::Conv2D("conv89").set_input_x(conv_88).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_90 = op::Conv2D("conv90").set_input_x(conv_89).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_91 = op::Conv2D("conv91").set_input_x(conv_80).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_92 = op::Conv2D("conv92").set_input_x(conv_91).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_93 = op::Conv2D("conv93").set_input_x(conv_92).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_94 = op::Conv2D("conv94").set_input_x(conv_93).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_95 = op::Conv2D("conv95").set_input_x(conv_94).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_96 = op::Conv2D("conv96").set_input_x(conv_95).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_97 = op::Conv2D("conv97").set_input_x(conv_96).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_98 = op::Conv2D("conv98").set_input_x(conv_97).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_99 = op::Conv2D("conv99").set_input_x(conv_98).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_100 = op::Conv2D("conv100").set_input_x(conv_99).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_101 = op::Conv2D("conv101").set_input_x(conv_100).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_102 = op::Conv2D("conv102").set_input_x(conv_101).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_103 = op::Conv2D("conv103").set_input_x(conv_102).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_104 = op::Conv2D("conv104").set_input_x(conv_103).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_105 = op::Conv2D("conv105").set_input_x(conv_104).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_106 = op::Conv2D("conv106").set_input_x(conv_105).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_107 = op::Conv2D("conv107").set_input_x(conv_106).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_108 = op::Conv2D("conv108").set_input_x(conv_107).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_109 = op::Conv2D("conv109").set_input_x(conv_108).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_110 = op::Conv2D("conv110").set_input_x(conv_109).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_111 = op::Conv2D("conv111").set_input_x(conv_110).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_112 = op::Conv2D("conv112").set_input_x(conv_111).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_113 = op::Conv2D("conv113").set_input_x(conv_112).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_114 = op::Conv2D("conv114").set_input_x(conv_113).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_115 = op::Conv2D("conv115").set_input_x(conv_114).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_116 = op::Conv2D("conv116").set_input_x(conv_115).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_117 = op::Conv2D("conv117").set_input_x(conv_116).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_118 = op::Conv2D("conv118").set_input_x(conv_117).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_119 = op::Conv2D("conv119").set_input_x(conv_118).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_120 = op::Conv2D("conv120").set_input_x(conv_119).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_121 = op::Conv2D("conv121").set_input_x(conv_120).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_122 = op::Conv2D("conv122").set_input_x(conv_121).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_123 = op::Conv2D("conv123").set_input_x(conv_122).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_124 = op::Conv2D("conv124").set_input_x(conv_123).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_125 = op::Conv2D("conv125").set_input_x(conv_124).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_126 = op::Conv2D("conv126").set_input_x(conv_125).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_127 = op::Conv2D("conv127").set_input_x(conv_126).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_128 = op::Conv2D("conv128").set_input_x(conv_127).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_129 = op::Conv2D("conv129").set_input_x(conv_128).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_130 = op::Conv2D("conv130").set_input_x(conv_129).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - - std::vector inputs{data}; - std::vector outputs{conv_130}; - graph.SetInputs(inputs).SetOutputs(outputs); -} - -int GetDatTypeSize(DataType dt) { - int dailation = 1; - if (dt == ge::DT_FLOAT) - dailation = 4; - else if (dt == ge::DT_FLOAT16) - dailation = 2; - else if (dt == ge::DT_INT16) - dailation = 2; - else if (dt == ge::DT_UINT16) - dailation = 2; - else if (dt == ge::DT_INT32) - dailation = 4; - else if (dt == ge::DT_UINT32) - dailation = 4; - else if (dt == ge::DT_INT64) - dailation = 8; - else if (dt == ge::DT_UINT64) - dailation = 8; - else if (dt == ge::DT_INT8) - dailation = 1; - - return dailation; -} - -int buildConvGraph_new(Graph &graph, std::vector desc_var, std::vector name_var, int flag, - Format format) { - auto data_x_shape = op::Data("xShape").set_attr_index(0); - auto var = op::Variable(name_var[0]); - auto var1 = op::Variable(name_var[1]); //add one seat of ApplyMomentum() - auto label1 = op::Variable(name_var[2]); //add one seat of ApplyMomentum() - auto conv2dgrad = op::Conv2DBackpropFilterD("output_1"); - auto test2 = op::ApplyMomentum(); - - var.update_output_desc_y(desc_var[0]); - var1.update_output_desc_y(desc_var[1]); - label1.update_output_desc_y(desc_var[2]); - - graph.AddOp(var); - graph.AddOp(var1); - graph.AddOp(label1); - - auto conv2d = op::Conv2D().set_input_x(data_x_shape).set_input_filter(var).set_attr_strides({1, 1, 1, 1}).set_attr_pads({0,0,0,0}); - update_op_format(conv2d, format); - ge::TensorDesc tensor_desc_w = conv2d.GetInputDesc("filter"); - tensor_desc_w.SetFormat(format); - conv2d.UpdateInputDesc("filter", tensor_desc_w); - - if (flag >= 1) { - conv2dgrad.set_input_x(data_x_shape) - .set_attr_filter_size(desc_var[0].GetShape().GetDims()) - .set_input_out_backprop(conv2d) - .set_attr_strides({1, 1, 1, 1}) - .set_attr_pads({0, 0, 0, 0}); - update_op_format(conv2dgrad, format); - graph.AddOp(conv2dgrad); - } - if (flag >= 2) { - // set conv2dgrad var - test2.set_input_accum(var1) - .set_input_grad(conv2dgrad) - .set_input_lr(label1) - .set_input_momentum(label1) - .set_input_var(var); - graph.AddOp(test2); - } - - std::vector inputs{data_x_shape}; // set all val - std::vector outputs{conv2d}; - graph.SetInputs(inputs).SetOutputs(outputs); - graph.AddOp(conv2d); - - return 0; -} - -/// load bin data_fail -/// input_path: path of bin data_file -/// shapes: the shape of Tensor -/// ft: the format of Tensor -/// dt: the dataType of Tensor -Tensor load_variable_input_data(string input_path, std::vector shapes, Format ft, DataType dt) { - vector dim_info1; - - uint8_t *input_data = (uint8_t *)readTestDataFile(input_path, dim_info1); // common.h - TensorDesc input_tensor_desc = TensorDesc(ge::Shape(shapes), ft, dt); - input_tensor_desc.SetRealDimCnt(shapes.size()); - Tensor input_tensor = Tensor(input_tensor_desc, input_data, GetDatTypeSize(dt) * dim_info1[dim_info1[0] + 1]); - return input_tensor; -} diff --git a/tests/st/resnet50/common.h b/tests/st/resnet50/common.h deleted file mode 100644 index 75805db7..00000000 --- a/tests/st/resnet50/common.h +++ /dev/null @@ -1,102 +0,0 @@ -/** - * Copyright 2019-2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef ST_RESNET50_GE_COMMON_H_ -#define ST_RESNET50_GE_COMMON_H_ -#include "common/ge_inner_error_codes.h" -#include "utils/tensor_utils.h" - -#define MY_USER_GE_LOGI(...) GE_LOG_INFO(1, __VA_ARGS__) -#define MY_USER_GE_LOGW(...) GE_LOG_WARN(1, __VA_ARGS__) -#define MY_USER_GE_LOGE(...) GE_LOG_ERROR(1, 3, __VA_ARGS__) - -#ifndef USER_GE_LOGI -#define USER_GE_LOGI MY_USER_GE_LOGI -#endif // USER_GE_LOGI - -#ifndef USER_GE_LOGW -#define USER_GE_LOGW MY_USER_GE_LOGW -#endif // USER_GE_LOGW - -#ifndef USER_GE_LOGE -#define USER_GE_LOGE MY_USER_GE_LOGE -#endif // USER_GE_LOGE - -/// train_flag is 0 when infer, train_flag is 1 when train.this param is set for RunGranph_readData() and -/// RunGraph_initData() -#define TRAIN_FLAG_INFER "infer" -#define TRAIN_FLAG_TRAIN "train" - -#include -#include -#include -#include -#include -#include -#include - -#include "ge_api.h" -#include "graph.h" -#include "ptest.h" -#include "ops/all_ops.h" -using namespace std; -using namespace ge; - -// read bin file and compile result -void update_op_format(Operator ops, Format format = ge::FORMAT_NCHW); -void getDimInfo(FILE *fp, std::vector &dim_info); -void *readTestDataFile(std::string infile, std::vector &dim_info); -void *readUint8TestDataFile(std::string infile, int size); -bool allclose(float *a, float *b, uint64_t count, float rtol, float atol); -bool compFp32WithTData(float *actual_output_data, std::string expected_data_file, float rtol, float atol); -Tensor load_variable_input_data(string input_path, std::vector shapes, Format ft = ge::FORMAT_NCHW, - DataType dt = ge::DT_FLOAT); -// constructor Tensor -int GetDatTypeSize(DataType dt); -ge::Tensor genTensor(std::vector tensor_shape, Format format = ge::FORMAT_NCHW, DataType dt = ge::DT_FLOAT); -ge::Tensor genTensor_withVaule(std::vector tensor_shape, float value = 1); -Tensor genTesnor_Shape_as_data(std::vector tensor_shape); -// Init GE -ge::Status GEInitialize_api(string train_flag = "0", string run_mode_path = "0"); -ge::Status GEInitialize_api_new(string train_flag = "infer", string run_mode = "fe"); -ge::Status GEFinalize_api(); -// constructor session and build graph -ge::Session *create_aipp_session(); -ge::Session *create_session(); -ge::Status session_add_and_run_graph(ge::Session *session, uint32_t graphId, Graph &graph, std::vector inputs, - std::vector &outputs); - -// common interface for infer -int RunGraph_initData(Graph &graph, string op_name, map> attr_test, - string train_flag = "infer", string run_mode_path = "fe"); -void Inputs_load_Data(string op_name, std::vector &input, map> attr_test, - Format format = ge::FORMAT_NCHW, DataType dt = ge::DT_FLOAT); -bool comparaData(std::vector &output, string op_name, map> attr_test); -int RunGraph_readData(Graph &graph, string op_name, map> attr_test, - string train_flag = "infer", string run_mode_path = "fe", Format format = ge::FORMAT_NCHW, - DataType dt = ge::DT_FLOAT); - -// common interface for train -int buildCheckPointGraph(Graph &graph, map variables); -int buildInitGraph(Graph &graph, std::vector desc_var, std::vector name_var, - std::vector values_var); -int buildInitGraph_other_dataType(Graph &graph, std::vector desc_var, std::vector name_var); - -bool build_multi_input_multi_output_graph(Graph &graph); -void build_big_graph(Graph &graph, map> attr); -int buildConvGraph_new(Graph &graph, std::vector desc_var, std::vector name_var, int flag = 2); - -#endif // ST_RESNET50_GE_COMMON_H_ diff --git a/tests/st/resnet50/ptest.h b/tests/st/resnet50/ptest.h deleted file mode 100644 index 568969f8..00000000 --- a/tests/st/resnet50/ptest.h +++ /dev/null @@ -1,225 +0,0 @@ -/** - * Copyright 2019-2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef ST_RESNET50_PTEST_H_ -#define ST_RESNET50_PTEST_H_ - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -namespace ptest { -class assertion_error : public std::exception { - public: - const char *what() const throw() { return "Assertion Exception"; } -}; - -class TestFixture { - public: - virtual void SetUp() {} - virtual void TearDown() {} - void Run() { _func(); } - void BindFunction(std::function function) { _func = function; } - void SetName(const std::string &name) { _name = name; } - std::string Name() const { return _name; } - virtual ~TestFixture() {} - - private: - std::function _func; - std::string _name; -}; - -enum TestResult { SUCCESS, FAILED, UNAVAILABLE, UNKNOWN, NOCASEFOUND }; - -class TestManager { - public: - static TestManager &GetSingleton() { - static TestManager instance; - return instance; - } - void RegisterTest(const std::string &name, TestFixture *fixture) { _testfixtures[name] = fixture; } - - const std::string GetRunningTestcaseName() const { return _running_testcase_name; } - - const std::list GetAllTestNames() const { - std::list result; - for (auto &t : _testfixtures) { - result.push_back(t.first); - } - return result; - } - - TestResult RunTest(const std::string &name) { - if (_testfixtures.find(name) == _testfixtures.end()) { - return NOCASEFOUND; - } - - _running_testcase_name = name; - - do { - SetTestResult(name, UNKNOWN); - _testfixtures[name]->SetUp(); - if (_testresults[name] == FAILED) { - _testresults[name] = UNAVAILABLE; - break; - } - SetTestResult(name, SUCCESS); - try { - _testfixtures[name]->Run(); - } catch (assertion_error &e) { - // Do nothing as the error has been handled by the TestManager. - } - _testfixtures[name]->TearDown(); - } while (0); - - return _testresults[name]; - } - void SetTestResult(const std::string &name, TestResult result) { _testresults[name] = result; } - TestResult GetTestResult(const std::string &name) { return _testresults[name]; } - - private: - std::map _testfixtures; - std::map _testresults; - std::string _running_testcase_name; -}; - -class TestFixtureRegister { - public: - TestFixtureRegister(const std::string &name, TestFixture *fixture, std::function function) { - fixture->BindFunction(function); - fixture->SetName(name); - TestManager::GetSingleton().RegisterTest(name, fixture); - } -}; -} // namespace ptest - -#define _STR(x) #x -#define _EMPTY_NAMESPACE - -#define _TEST(NAMESPACE, FIXTURECLASS, TESTNAME, CASENAME) \ - void g_func_##TESTNAME##_##CASENAME(void); \ - NAMESPACE::FIXTURECLASS g_fixture_##TESTNAME##_##CASENAME; \ - ptest::TestFixtureRegister g_register_##TESTNAME##_##CASENAME( \ - _STR(TESTNAME##_##CASENAME), &g_fixture_##TESTNAME##_##CASENAME, g_func_##TESTNAME##_##CASENAME); \ - void g_func_##TESTNAME##_##CASENAME(void) - -#define TEST(TESTNAME, CASENAME) _TEST(ptest, TestFixture, TESTNAME, CASENAME) - -#define TEST_F(TESTFIXTURE, CASENAME) _TEST(_EMPTY_NAMESPACE, TESTFIXTURE, TESTFIXTURE, CASENAME) - -#define EXPECT_TRUE(X) \ - do { \ - if (!(X)) { \ - std::string test_name = ptest::TestManager::GetSingleton().GetRunningTestcaseName(); \ - ptest::TestManager::GetSingleton().SetTestResult(test_name, ptest::FAILED); \ - std::cerr << #X << "Expectation Failed\n" \ - << "Testcase Name: " << test_name << "\n" \ - << "File: " __FILE__ << "\tLine:" << __LINE__ << std::endl; \ - } \ - } while (0); - -// With the macro definition ensures that the compiler can detect compiler warning. -#define Max_Log_Len 1024 -#define PRINT_ERR(lpszFormat, ...) \ - do { \ - char szTmpBuf[Max_Log_Len + 1] = {0}; \ - snprintf(szTmpBuf, Max_Log_Len, lpszFormat, ##__VA_ARGS__); \ - std::cerr << szTmpBuf << std::endl; \ - } while (0) - -// Increase the content of print error messages and error to facilitate rapid analysis -#define EXPECT_TRUE_C(X, ERR_TYPE, format, ...) \ - do { \ - if (!(X)) { \ - std::string test_name = ptest::TestManager::GetSingleton().GetRunningTestcaseName(); \ - ptest::TestManager::GetSingleton().SetTestResult(test_name, ptest::FAILED); \ - std::cerr << #X << " Expectation Failed." \ - << "Testcase Name: " << test_name << " File:" __FILE__ << " Line:" << __LINE__ << std::endl; \ - PRINT_ERR("[" ERR_TYPE "]" format, ##__VA_ARGS__); \ - } \ - } while (0) - -#define ASSERT_TRUE(X) \ - do { \ - if (!(X)) { \ - std::string test_name = ptest::TestManager::GetSingleton().GetRunningTestcaseName(); \ - ptest::TestManager::GetSingleton().SetTestResult(test_name, ptest::FAILED); \ - std::cerr << #X << "Assertion Failed\n" \ - << "Testcase Name: " << test_name << "\n" \ - << "File: " __FILE__ << "\tLine:" << __LINE__ << std::endl; \ - throw ptest::assertion_error(); \ - } \ - } while (0); - -// Add printing error information and error line content for quick analysis -#define ASSERT_TRUE_C(X, ERR_TYPE, format, ...) \ - do { \ - if (!(X)) { \ - std::string test_name = ptest::TestManager::GetSingleton().GetRunningTestcaseName(); \ - ptest::TestManager::GetSingleton().SetTestResult(test_name, ptest::FAILED); \ - std::cerr << #X << " Assertion Failed." \ - << "Testcase Name: " << test_name << " File:" __FILE__ << " Line:" << __LINE__ << std::endl; \ - PRINT_ERR("[" ERR_TYPE "]" format, ##__VA_ARGS__); \ - throw ptest::assertion_error(); \ - } \ - } while (0); - -#define CONFIG_ERR "CONFIG_ERR" -#define LOAD_MODEL_ERR "LOAD_MODEL_ERR" -#define FILE_READ_ERR "FILE_READ_ERR" -#define RUN_ERROR "RUN_ERROR" -#define MEM_ERROR "MEM_ERROR" -#define RESULT_ERR "RESULT_ERR" - -#define EXPECT_FALSE(X) EXPECT_TRUE(!(X)) -#define EXPECT_EQ(X, Y) EXPECT_TRUE(((X) == (Y))) -#define EXPECT_NE(X, Y) EXPECT_TRUE(((X) != (Y))) -#define EXPECT_GT(X, Y) EXPECT_TRUE(((X) > (Y))) -#define EXPECT_GE(X, Y) EXPECT_TRUE(((X) >= (Y))) -#define EXPECT_LT(X, Y) EXPECT_TRUE(((X) < (Y))) -#define EXPECT_LE(X, Y) EXPECT_TRUE(((X) <= (Y))) - -#define EXPECT_FALSE_C(X, ERR_TYPE, format, ...) EXPECT_TRUE_C(!(X), ERR_TYPE, format, ##__VA_ARGS__) -#define EXPECT_EQ_C(X, Y, ERR_TYPE, format, ...) EXPECT_TRUE_C(((X) == (Y)), ERR_TYPE, format, ##__VA_ARGS__) -#define EXPECT_NE_C(X, Y, ERR_TYPE, format, ...) EXPECT_TRUE_C(((X) != (Y)), ERR_TYPE, format, ##__VA_ARGS__) -#define EXPECT_GT_C(X, Y, ERR_TYPE, format, ...) EXPECT_TRUE_C(((X) > (Y)), ERR_TYPE, format, ##__VA_ARGS__) -#define EXPECT_GE_C(X, Y, ERR_TYPE, format, ...) EXPECT_TRUE_C(((X) >= (Y)), ERR_TYPE, format, ##__VA_ARGS__) -#define EXPECT_LT_C(X, Y, ERR_TYPE, format, ...) EXPECT_TRUE_C(((X) < (Y)), ERR_TYPE, format, ##__VA_ARGS__) -#define EXPECT_LE_C(X, Y, ERR_TYPE, format, ...) EXPECT_TRUE_C(((X) <= (Y)), ERR_TYPE, format, ##__VA_ARGS__) - -#define ASSERT_FALSE(X) ASSERT_TRUE(!(X)) -#define ASSERT_EQ(X, Y) ASSERT_TRUE(((X) == (Y))) -#define ASSERT_NE(X, Y) ASSERT_TRUE(((X) != (Y))) -#define ASSERT_GT(X, Y) ASSERT_TRUE(((X) > (Y))) -#define ASSERT_GE(X, Y) ASSERT_TRUE(((X) >= (Y))) -#define ASSERT_LT(X, Y) ASSERT_TRUE(((X) < (Y))) -#define ASSERT_LE(X, Y) ASSERT_TRUE(((X) <= (Y))) - -#define ASSERT_FALSE_C(X, ERR_TYPE, format, ...) ASSERT_TRUE_C(!(X), ERR_TYPE, format, ##__VA_ARGS__) -#define ASSERT_EQ_C(X, Y, ERR_TYPE, format, ...) ASSERT_TRUE_C(((X) == (Y)), ERR_TYPE, format, ##__VA_ARGS__) -#define ASSERT_NE_C(X, Y, ERR_TYPE, format, ...) ASSERT_TRUE_C(((X) != (Y)), ERR_TYPE, format, ##__VA_ARGS__) -#define ASSERT_GT_C(X, Y, ERR_TYPE, format, ...) ASSERT_TRUE_C(((X) > (Y)), ERR_TYPE, format, ##__VA_ARGS__) -#define ASSERT_GE_C(X, Y, ERR_TYPE, format, ...) ASSERT_TRUE_C(((X) >= (Y)), ERR_TYPE, format, ##__VA_ARGS__) -#define ASSERT_LT_C(X, Y, ERR_TYPE, format, ...) ASSERT_TRUE_C(((X) < (Y)), ERR_TYPE, format, ##__VA_ARGS__) -#define ASSERT_LE_C(X, Y, ERR_TYPE, format, ...) ASSERT_TRUE_C(((X) <= (Y)), ERR_TYPE, format, ##__VA_ARGS__) - -#endif // ST_RESNET50_PTEST_H_ diff --git a/tests/st/resnet50/resnet50_train.cc b/tests/st/resnet50/resnet50_train.cc deleted file mode 100644 index f1d1e58d..00000000 --- a/tests/st/resnet50/resnet50_train.cc +++ /dev/null @@ -1,852 +0,0 @@ -/** - * Copyright 2019-2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include -#include -#include -#include -#include -#include - -#include "common.h" -#include "ge_api.h" -#include "graph.h" -#include "ops/all_ops.h" -#include "types.h" -#include "utils/tensor_utils.h" - -using namespace std; -using namespace ge; -using namespace op; - -typedef bool (*Func)(Graph &graph); - -#define PADDING_MODE 6 -#define GRAD_PADDING_MODE 3 -vector pad_1{1, 1, 1, 1}; -vector pad_0{0, 0, 0, 0}; -vector stride_1{1, 1}; -vector stride_2{2, 2}; - -// (int out_channels, int h, int w, vector stride{1,1}, vector pad{1,1,1,1}, op::Data() input) -#define GENERATE_CONV_VAR(LAYER, BLK, OPNUM, in_channels, out_channels, h, w, stride, pad, input) \ - auto &LAYER##_##BLK##_##OPNUM##_input = input; \ - \ - TensorDesc LAYER##_##BLK##_##OPNUM##_desc(ge::Shape({out_channels, in_channels, h, w}), FORMAT_NCHW, DT_FLOAT); \ - auto LAYER##_##BLK##_##OPNUM##_weight = op::Variable(string(#LAYER) + string(#BLK) + string(#OPNUM) + "_weight"); \ - LAYER##_##BLK##_##OPNUM##_weight.update_output_desc_y(LAYER##_##BLK##_##OPNUM##_desc); \ - \ - auto LAYER##_##BLK##_##OPNUM##_mom_weight = \ - op::Variable(string(#LAYER) + string(#BLK) + string(#OPNUM) + "_mom_weight"); \ - LAYER##_##BLK##_##OPNUM##_mom_weight.update_output_desc_y(LAYER##_##BLK##_##OPNUM##_desc); \ - LAYER##_##BLK##_##OPNUM##_mom_weight.update_input_desc_x(LAYER##_##BLK##_##OPNUM##_desc); \ - \ - cout << string(#LAYER) + string(#BLK) + string(#OPNUM) << "'s weight shape is:" << in_channels << out_channels << h \ - << w << endl; \ - cout << string(#LAYER) + string(#BLK) + string(#OPNUM) \ - << "'s input_x op's shape is:" << input.GetOutputDesc("y").GetShape().GetDim(2) << endl; \ - auto LAYER##_##BLK##_##OPNUM##_tmp_dims = input.GetOutputDesc("y").GetShape().GetDims(); \ - for (auto LAYER##_##BLK##_##OPNUM##_tmp_it = LAYER##_##BLK##_##OPNUM##_tmp_dims.begin(); \ - LAYER##_##BLK##_##OPNUM##_tmp_it != LAYER##_##BLK##_##OPNUM##_tmp_dims.end(); \ - LAYER##_##BLK##_##OPNUM##_tmp_it++) { \ - cout << *LAYER##_##BLK##_##OPNUM##_tmp_it; \ - } \ - cout << endl; \ - \ - auto LAYER##_##BLK##_##OPNUM = op::Conv2D(string(#LAYER) + string(#BLK) + string(#OPNUM)) \ - .set_input_x(input, "y") \ - .set_input_filter(LAYER##_##BLK##_##OPNUM##_weight) \ - .set_attr_strides({1, 1, stride[0], stride[1]}) \ - .set_attr_pads(pad) \ - .set_attr_data_format("NCHW"); \ - update_op_format(LAYER##_##BLK##_##OPNUM); - -#define GENERATE_CONSTANT(LAYER, BLK, OPNUM, CONSTNAME) \ - Tensor LAYER##_##BLK##_##OPNUM##_##CONSTNAME##_tensor; \ - float *LAYER##_##BLK##_##OPNUM##_##CONSTNAME##_data = new float[LAYER##_##BLK##_##OPNUM##_size]; \ - for (int i = 0; i < (int)LAYER##_##BLK##_##OPNUM##_size; i++) { \ - *(LAYER##_##BLK##_##OPNUM##_##CONSTNAME##_data + i) = 0.01; \ - } \ - LAYER##_##BLK##_##OPNUM##_##CONSTNAME##_tensor.SetData((uint8_t *)LAYER##_##BLK##_##OPNUM##_##CONSTNAME##_data, \ - LAYER##_##BLK##_##OPNUM##_size * sizeof(float)); \ - LAYER##_##BLK##_##OPNUM##_##CONSTNAME##_tensor.SetTensorDesc(LAYER##_##BLK##_##OPNUM##_desc); \ - \ - auto LAYER##_##BLK##_##OPNUM##_##CONSTNAME##_constant = \ - op::Constant().set_attr_value(LAYER##_##BLK##_##OPNUM##_##CONSTNAME##_tensor); \ - LAYER##_##BLK##_##OPNUM##_##CONSTNAME##_constant.update_output_desc_y(LAYER##_##BLK##_##OPNUM##_desc); \ - delete[] LAYER##_##BLK##_##OPNUM##_##CONSTNAME##_data; - -#define GENERATE_CONV_VAR_VAR(LAYER, BLK, OPNUM, in_channels, out_channels, h, w, stride, pad, input) \ - TensorDesc LAYER##_##BLK##_##OPNUM##_desc(ge::Shape({out_channels, in_channels, h, w}), FORMAT_NCHW, DT_FLOAT); \ - uint32_t LAYER##_##BLK##_##OPNUM##_size = LAYER##_##BLK##_##OPNUM##_desc.GetShape().GetShapeSize(); \ - auto LAYER##_##BLK##_##OPNUM##_weight = op::Variable(string(#LAYER) + string(#BLK) + string(#OPNUM) + "_weight"); \ - LAYER##_##BLK##_##OPNUM##_weight.update_output_desc_y(LAYER##_##BLK##_##OPNUM##_desc); \ - \ - auto LAYER##_##BLK##_##OPNUM##_mom_weight = \ - op::Variable(string(#LAYER) + string(#BLK) + string(#OPNUM) + "_mom_weight"); \ - LAYER##_##BLK##_##OPNUM##_mom_weight.update_output_desc_y(LAYER##_##BLK##_##OPNUM##_desc); \ - \ - GENERATE_CONSTANT(LAYER, BLK, OPNUM, weight); \ - auto LAYER##_##BLK##_##OPNUM##_weight_assign = op::Assign() \ - .set_input_ref(LAYER##_##BLK##_##OPNUM##_weight) \ - .set_input_value(LAYER##_##BLK##_##OPNUM##_weight_constant); \ - \ - GENERATE_CONSTANT(LAYER, BLK, OPNUM, mom_weight); \ - auto LAYER##_##BLK##_##OPNUM##_mom_weight_assign = \ - op::Assign() \ - .set_input_ref(LAYER##_##BLK##_##OPNUM##_mom_weight) \ - .set_input_value(LAYER##_##BLK##_##OPNUM##_mom_weight_constant); \ - \ - input.push_back(LAYER##_##BLK##_##OPNUM##_weight); \ - input.push_back(LAYER##_##BLK##_##OPNUM##_mom_weight); - -// (int out_channels, Operator& input) -#define GENERATE_BN_VAR(LAYER, BLK, OPNUM, out_channels, input) \ - auto &LAYER##_##BLK##_##OPNUM##_input = input; \ - \ - TensorDesc LAYER##_##BLK##_##OPNUM##_desc(ge::Shape({1, out_channels, 1, 1}), FORMAT_NCHW, DT_FLOAT); \ - auto LAYER##_##BLK##_##OPNUM##_scale = op::Variable(string(#LAYER) + string(#BLK) + string(#OPNUM) + "_scale"); \ - LAYER##_##BLK##_##OPNUM##_scale.update_output_desc_y(LAYER##_##BLK##_##OPNUM##_desc); \ - \ - auto LAYER##_##BLK##_##OPNUM##_mom_scale = \ - op::Variable(string(#LAYER) + string(#BLK) + string(#OPNUM) + "_mom_scale"); \ - LAYER##_##BLK##_##OPNUM##_mom_scale.update_output_desc_y(LAYER##_##BLK##_##OPNUM##_desc); \ - \ - auto LAYER##_##BLK##_##OPNUM##_b = op::Variable(string(#LAYER) + string(#BLK) + string(#OPNUM) + "_b"); \ - LAYER##_##BLK##_##OPNUM##_b.update_output_desc_y(LAYER##_##BLK##_##OPNUM##_desc); \ - \ - auto LAYER##_##BLK##_##OPNUM##_mom_b = op::Variable(string(#LAYER) + string(#BLK) + string(#OPNUM) + "_mom_b"); \ - LAYER##_##BLK##_##OPNUM##_mom_b.update_output_desc_y(LAYER##_##BLK##_##OPNUM##_desc); \ - \ - auto LAYER##_##BLK##_##OPNUM##_mean = op::Variable(string(#LAYER) + string(#BLK) + string(#OPNUM) + "_mean"); \ - LAYER##_##BLK##_##OPNUM##_mean.update_output_desc_y(LAYER##_##BLK##_##OPNUM##_desc); \ - auto LAYER##_##BLK##_##OPNUM##_variance = \ - op::Variable(string(#LAYER) + string(#BLK) + string(#OPNUM) + "_variance"); \ - LAYER##_##BLK##_##OPNUM##_variance.update_output_desc_y(LAYER##_##BLK##_##OPNUM##_desc); \ - \ - auto LAYER##_##BLK##_##OPNUM = op::FusedBatchNorm(string(#LAYER) + string(#BLK) + string(#OPNUM)) \ - .set_input_x(input, "y") \ - .set_input_scale(LAYER##_##BLK##_##OPNUM##_scale) \ - .set_input_b(LAYER##_##BLK##_##OPNUM##_b) \ - .set_input_mean(LAYER##_##BLK##_##OPNUM##_mean) \ - .set_input_variance(LAYER##_##BLK##_##OPNUM##_variance) \ - .set_attr_mode(1) \ - .set_attr_epsilon(1e-5) \ - .set_attr_is_training(true); - -#define GENERATE_BN_VAR_VAR(LAYER, BLK, OPNUM, out_channels, input) \ - TensorDesc LAYER##_##BLK##_##OPNUM##_desc(ge::Shape({1, out_channels, 1, 1}), FORMAT_NCHW, DT_FLOAT); \ - uint32_t LAYER##_##BLK##_##OPNUM##_size = LAYER##_##BLK##_##OPNUM##_desc.GetShape().GetShapeSize(); \ - auto LAYER##_##BLK##_##OPNUM##_scale = op::Variable(string(#LAYER) + string(#BLK) + string(#OPNUM) + "_scale"); \ - LAYER##_##BLK##_##OPNUM##_scale.update_output_desc_y(LAYER##_##BLK##_##OPNUM##_desc); \ - \ - auto LAYER##_##BLK##_##OPNUM##_mom_scale = \ - op::Variable(string(#LAYER) + string(#BLK) + string(#OPNUM) + "_mom_scale"); \ - LAYER##_##BLK##_##OPNUM##_mom_scale.update_output_desc_y(LAYER##_##BLK##_##OPNUM##_desc); \ - \ - auto LAYER##_##BLK##_##OPNUM##_b = op::Variable(string(#LAYER) + string(#BLK) + string(#OPNUM) + "_b"); \ - LAYER##_##BLK##_##OPNUM##_b.update_output_desc_y(LAYER##_##BLK##_##OPNUM##_desc); \ - \ - auto LAYER##_##BLK##_##OPNUM##_mom_b = op::Variable(string(#LAYER) + string(#BLK) + string(#OPNUM) + "_mom_b"); \ - LAYER##_##BLK##_##OPNUM##_mom_b.update_output_desc_y(LAYER##_##BLK##_##OPNUM##_desc); \ - \ - auto LAYER##_##BLK##_##OPNUM##_mean = op::Variable(string(#LAYER) + string(#BLK) + string(#OPNUM) + "_mean"); \ - LAYER##_##BLK##_##OPNUM##_mean.update_output_desc_y(LAYER##_##BLK##_##OPNUM##_desc); \ - auto LAYER##_##BLK##_##OPNUM##_variance = \ - op::Variable(string(#LAYER) + string(#BLK) + string(#OPNUM) + "_variance"); \ - LAYER##_##BLK##_##OPNUM##_variance.update_output_desc_y(LAYER##_##BLK##_##OPNUM##_desc); \ - \ - GENERATE_CONSTANT(LAYER, BLK, OPNUM, scale); \ - \ - auto LAYER##_##BLK##_##OPNUM##_scale_assign = op::Assign() \ - .set_input_ref(LAYER##_##BLK##_##OPNUM##_scale) \ - .set_input_value(LAYER##_##BLK##_##OPNUM##_scale_constant); \ - GENERATE_CONSTANT(LAYER, BLK, OPNUM, mom_scale); \ - \ - auto LAYER##_##BLK##_##OPNUM##_mom_scale_assign = \ - op::Assign() \ - .set_input_ref(LAYER##_##BLK##_##OPNUM##_mom_scale) \ - .set_input_value(LAYER##_##BLK##_##OPNUM##_mom_scale_constant); \ - \ - GENERATE_CONSTANT(LAYER, BLK, OPNUM, b); \ - \ - auto LAYER##_##BLK##_##OPNUM##_b_assign = \ - op::Assign().set_input_ref(LAYER##_##BLK##_##OPNUM##_b).set_input_value(LAYER##_##BLK##_##OPNUM##_b_constant); \ - \ - GENERATE_CONSTANT(LAYER, BLK, OPNUM, mom_b); \ - \ - auto LAYER##_##BLK##_##OPNUM##_mom_b_assign = op::Assign() \ - .set_input_ref(LAYER##_##BLK##_##OPNUM##_mom_b) \ - .set_input_value(LAYER##_##BLK##_##OPNUM##_mom_b_constant); \ - GENERATE_CONSTANT(LAYER, BLK, OPNUM, mean); \ - \ - auto LAYER##_##BLK##_##OPNUM##_mean_assign = op::Assign() \ - .set_input_ref(LAYER##_##BLK##_##OPNUM##_mean) \ - .set_input_value(LAYER##_##BLK##_##OPNUM##_mean_constant); \ - \ - GENERATE_CONSTANT(LAYER, BLK, OPNUM, variance); \ - \ - auto LAYER##_##BLK##_##OPNUM##_variance_assign = op::Assign() \ - .set_input_ref(LAYER##_##BLK##_##OPNUM##_variance) \ - .set_input_value(LAYER##_##BLK##_##OPNUM##_variance_constant); \ - \ - input.push_back(LAYER##_##BLK##_##OPNUM##_scale); \ - input.push_back(LAYER##_##BLK##_##OPNUM##_mom_scale); \ - input.push_back(LAYER##_##BLK##_##OPNUM##_b); \ - input.push_back(LAYER##_##BLK##_##OPNUM##_mom_b); \ - input.push_back(LAYER##_##BLK##_##OPNUM##_mean); \ - input.push_back(LAYER##_##BLK##_##OPNUM##_variance); - -// (int out_channels, Operator& input) -#define GENERATE_RELU_VAR(LAYER, BLK, OPNUM, input) \ - auto &LAYER##_##BLK##_##OPNUM##_input = input; \ - auto LAYER##_##BLK##_##OPNUM = op::Relu(string(#LAYER) + string(#BLK) + string(#OPNUM)).set_input_x(input, "y"); - -// (int out_channels, Operator& input) -#define GENERATE_MAXPOOL_VAR(LAYER, BLK, OPNUM, input) \ - auto &LAYER##_##BLK##_##OPNUM##_input = input; \ - \ - auto LAYER##_##BLK##_##OPNUM = op::MaxPoolWithArgmax(string(#LAYER) + string(#BLK) + string(#OPNUM)) \ - .set_input_x(input, "y") \ - .set_attr_ksize({1, 3, 3, 1}) \ - .set_attr_padding("SAME") \ - .set_attr_strides({1, 2, 2, 1}); - -// (int out_channels, Operator& input) -#define GENERATE_ADD_VAR(LAYER, BLK, OPNUM, input_x1, input_x2) \ - auto LAYER##_##BLK##_##OPNUM = \ - op::Add(string(#LAYER) + string(#BLK) + string(#OPNUM)).set_input_x1(input_x1, "y").set_input_x2(input_x2, "y"); - -// (int in_channels, int out_channels,vector stride{1,1}, Operator& input) -#define MAKE_RESIDUAL_BLOCK(LAYER, BLK, in_channels, out_channels, stride, input) \ - auto &LAYER##_##BLK##_input = input; \ - auto &LAYER##_##BLK##_stride = stride; \ - int LAYER##_##BLK##_out_chls = out_channels / 4; \ - \ - GENERATE_CONV_VAR(LAYER, BLK, conv1, in_channels, LAYER##_##BLK##_out_chls, 1, 1, stride, pad_0, input); \ - GENERATE_BN_VAR(LAYER, BLK, bn1, LAYER##_##BLK##_out_chls, LAYER##_##BLK##_conv1); \ - GENERATE_RELU_VAR(LAYER, BLK, relu1, LAYER##_##BLK##_bn1); \ - \ - GENERATE_CONV_VAR(LAYER, BLK, conv2, LAYER##_##BLK##_out_chls, LAYER##_##BLK##_out_chls, 3, 3, stride_1, pad_1, \ - LAYER##_##BLK##_relu1); \ - GENERATE_BN_VAR(LAYER, BLK, bn2, LAYER##_##BLK##_out_chls, LAYER##_##BLK##_conv2); \ - GENERATE_RELU_VAR(LAYER, BLK, relu2, LAYER##_##BLK##_bn2); \ - \ - GENERATE_CONV_VAR(LAYER, BLK, conv3, LAYER##_##BLK##_out_chls, out_channels, 1, 1, stride_1, pad_0, \ - LAYER##_##BLK##_relu2); \ - GENERATE_BN_VAR(LAYER, BLK, bn3, out_channels, LAYER##_##BLK##_conv3); \ - \ - GENERATE_CONV_VAR(LAYER, BLK, conv4, in_channels, out_channels, 1, 1, stride, pad_0, input); \ - GENERATE_BN_VAR(LAYER, BLK, bn4, out_channels, LAYER##_##BLK##_conv4); \ - \ - GENERATE_ADD_VAR(LAYER, BLK, add5, LAYER##_##BLK##_bn3, LAYER##_##BLK##_bn4); \ - GENERATE_RELU_VAR(LAYER, BLK, relu5, LAYER##_##BLK##_add5); \ - \ - auto &LAYER##_##BLK##_output = LAYER##_##BLK##_relu5; \ - auto &LAYER##_##BLK##_output_label = "y"; - -#define MAKE_RESIDUAL_BLOCK_VAR(LAYER, BLK, in_channels, out_channels, stride, input) \ - int LAYER##_##BLK##_out_chls = out_channels / 4; \ - GENERATE_CONV_VAR_VAR(LAYER, BLK, conv1, in_channels, LAYER##_##BLK##_out_chls, 1, 1, stride, pad_0, input); \ - GENERATE_BN_VAR_VAR(LAYER, BLK, bn1, LAYER##_##BLK##_out_chls, input); \ - \ - GENERATE_CONV_VAR_VAR(LAYER, BLK, conv2, LAYER##_##BLK##_out_chls, LAYER##_##BLK##_out_chls, 3, 3, stride_1, pad_1, \ - input); \ - GENERATE_BN_VAR_VAR(LAYER, BLK, bn2, LAYER##_##BLK##_out_chls, input); \ - \ - GENERATE_CONV_VAR_VAR(LAYER, BLK, conv3, LAYER##_##BLK##_out_chls, out_channels, 1, 1, stride_1, pad_0, input); \ - GENERATE_BN_VAR_VAR(LAYER, BLK, bn3, out_channels, input); \ - \ - GENERATE_CONV_VAR_VAR(LAYER, BLK, conv4, in_channels, out_channels, 1, 1, stride, pad_0, input); \ - GENERATE_BN_VAR_VAR(LAYER, BLK, bn4, out_channels, input); - -// (int in_channels, int out_channels,vector stride{1,1}, Operator& input) -#define MAKE_NORMAL_BLOCK(LAYER, BLK, in_channels, out_channels, stride, input) \ - auto &LAYER##_##BLK##_input = input; \ - auto &LAYER##_##BLK##_stride = stride; \ - int LAYER##_##BLK##_out_chls = out_channels / 4; \ - \ - GENERATE_CONV_VAR(LAYER, BLK, conv1, in_channels, LAYER##_##BLK##_out_chls, 1, 1, stride, pad_0, input); \ - GENERATE_BN_VAR(LAYER, BLK, bn1, LAYER##_##BLK##_out_chls, LAYER##_##BLK##_conv1); \ - GENERATE_RELU_VAR(LAYER, BLK, relu1, LAYER##_##BLK##_bn1); \ - \ - GENERATE_CONV_VAR(LAYER, BLK, conv2, LAYER##_##BLK##_out_chls, LAYER##_##BLK##_out_chls, 3, 3, stride_1, pad_1, \ - LAYER##_##BLK##_relu1); \ - GENERATE_BN_VAR(LAYER, BLK, bn2, LAYER##_##BLK##_out_chls, LAYER##_##BLK##_conv2); \ - GENERATE_RELU_VAR(LAYER, BLK, relu2, LAYER##_##BLK##_bn2); \ - \ - GENERATE_CONV_VAR(LAYER, BLK, conv3, LAYER##_##BLK##_out_chls, out_channels, 1, 1, stride_1, pad_0, \ - LAYER##_##BLK##_relu2); \ - GENERATE_BN_VAR(LAYER, BLK, bn3, out_channels, LAYER##_##BLK##_conv3); \ - \ - GENERATE_ADD_VAR(LAYER, BLK, add5, LAYER##_##BLK##_bn3, input); \ - GENERATE_RELU_VAR(LAYER, BLK, relu5, LAYER##_##BLK##_add5); \ - \ - auto &LAYER##_##BLK##_output = LAYER##_##BLK##_relu5; \ - auto &LAYER##_##BLK##_output_label = "y"; - -#define MAKE_NORMAL_BLOCK_VAR(LAYER, BLK, in_channels, out_channels, stride, input) \ - int LAYER##_##BLK##_out_chls = out_channels / 4; \ - GENERATE_CONV_VAR_VAR(LAYER, BLK, conv1, in_channels, LAYER##_##BLK##_out_chls, 1, 1, stride, pad_0, input); \ - GENERATE_BN_VAR_VAR(LAYER, BLK, bn1, LAYER##_##BLK##_out_chls, input); \ - \ - GENERATE_CONV_VAR_VAR(LAYER, BLK, conv2, LAYER##_##BLK##_out_chls, LAYER##_##BLK##_out_chls, 3, 3, stride_1, pad_1, \ - input); \ - GENERATE_BN_VAR_VAR(LAYER, BLK, bn2, LAYER##_##BLK##_out_chls, input); \ - \ - GENERATE_CONV_VAR_VAR(LAYER, BLK, conv3, LAYER##_##BLK##_out_chls, out_channels, 1, 1, stride_1, pad_0, input); \ - GENERATE_BN_VAR_VAR(LAYER, BLK, bn3, out_channels, input); - -// (int in_channels, int out_channels,vector stride{1,1}, Operator& input) -#define MAKE_RESIDUAL_LAYER(LAYER, in_channels, out_channels, stride, input) \ - MAKE_RESIDUAL_BLOCK(LAYER, blk1, in_channels, out_channels, stride, input); \ - \ - auto &LAYER##_output = LAYER##_blk1_output; \ - auto &LAYER##_output_label = LAYER##_blk1_output_label; - -#define MAKE_RESIDUAL_LAYER_VAR(LAYER, in_channels, out_channels, stride, input) \ - MAKE_RESIDUAL_BLOCK_VAR(LAYER, blk1, in_channels, out_channels, stride, input); - -// (int in_channels, int out_channels,vector stride{1,1}, Operator& input) -#define MAKE_NORMAL_LAYER(LAYER, in_channels, out_channels, stride, input) \ - MAKE_NORMAL_BLOCK(LAYER, blk1, in_channels, out_channels, stride, input); \ - \ - auto &LAYER##_output = LAYER##_blk1_output; \ - auto &LAYER##_output_label = LAYER##_blk1_output_label; - -#define MAKE_NORMAL_LAYER_VAR(LAYER, in_channels, out_channels, stride, input) \ - MAKE_NORMAL_BLOCK_VAR(LAYER, blk1, in_channels, out_channels, stride, input); - -#define MAKE_RESNET50(input) \ - MAKE_RESIDUAL_LAYER(layer1, 64, 256, stride_1, input) \ - MAKE_NORMAL_LAYER(layer2, 256, 256, stride_1, layer1_output) \ - MAKE_NORMAL_LAYER(layer3, 256, 256, stride_1, layer2_output) \ - MAKE_RESIDUAL_LAYER(layer4, 256, 512, stride_2, layer3_output) \ - MAKE_NORMAL_LAYER(layer5, 512, 512, stride_1, layer4_output) \ - MAKE_NORMAL_LAYER(layer6, 512, 512, stride_1, layer5_output) \ - MAKE_NORMAL_LAYER(layer7, 512, 512, stride_1, layer6_output) \ - MAKE_RESIDUAL_LAYER(layer8, 512, 1024, stride_2, layer7_output) \ - MAKE_NORMAL_LAYER(layer9, 1024, 1024, stride_1, layer8_output) \ - MAKE_NORMAL_LAYER(layer10, 1024, 1024, stride_1, layer9_output) \ - MAKE_NORMAL_LAYER(layer11, 1024, 1024, stride_1, layer10_output) \ - MAKE_NORMAL_LAYER(layer12, 1024, 1024, stride_1, layer11_output) \ - MAKE_NORMAL_LAYER(layer13, 1024, 1024, stride_1, layer12_output) \ - MAKE_RESIDUAL_LAYER(layer14, 1024, 2048, stride_2, layer13_output) \ - MAKE_NORMAL_LAYER(layer15, 2048, 2048, stride_1, layer14_output) \ - MAKE_NORMAL_LAYER(layer16, 2048, 2048, stride_1, layer15_output) \ - \ - auto &resnet50_output = layer16_output; \ - auto &resnet50_output_label = layer16_output_label; - -#define MAKE_RESNET50_VAR(inputs) \ - MAKE_RESIDUAL_LAYER_VAR(layer1, 64, 256, stride_1, inputs) \ - MAKE_NORMAL_LAYER_VAR(layer2, 256, 256, stride_1, inputs) \ - MAKE_NORMAL_LAYER_VAR(layer3, 256, 256, stride_1, inputs) \ - MAKE_RESIDUAL_LAYER_VAR(layer4, 256, 512, stride_2, inputs) \ - MAKE_NORMAL_LAYER_VAR(layer5, 512, 512, stride_1, inputs) \ - MAKE_NORMAL_LAYER_VAR(layer6, 512, 512, stride_1, inputs) \ - MAKE_NORMAL_LAYER_VAR(layer7, 512, 512, stride_1, inputs) \ - MAKE_RESIDUAL_LAYER_VAR(layer8, 512, 1024, stride_2, inputs) \ - MAKE_NORMAL_LAYER_VAR(layer9, 1024, 1024, stride_1, inputs) \ - MAKE_NORMAL_LAYER_VAR(layer10, 1024, 1024, stride_1, inputs) \ - MAKE_NORMAL_LAYER_VAR(layer11, 1024, 1024, stride_1, inputs) \ - MAKE_NORMAL_LAYER_VAR(layer12, 1024, 1024, stride_1, inputs) \ - MAKE_NORMAL_LAYER_VAR(layer13, 1024, 1024, stride_1, inputs) \ - MAKE_RESIDUAL_LAYER_VAR(layer14, 1024, 2048, stride_2, inputs) \ - MAKE_NORMAL_LAYER_VAR(layer15, 2048, 2048, stride_1, inputs) \ - MAKE_NORMAL_LAYER_VAR(layer16, 2048, 2048, stride_1, inputs) \ -//--------------------------------------------------------------------------------------------- - -// (Operator& input) -#define GENERATE_BIASADD_GRAD(LAYER, BLK, OPNUM, input) \ - auto LAYER##_##BLK##_##OPNUM##_grad = \ - op::BiasAddGrad(string(#LAYER) + string(#BLK) + string(#OPNUM) + string("grad")) \ - .set_input_x(input, input.name_out_dx()); - -// (Operator& input) -#define GENERATE_MATMUL_GRAD(LAYER, BLK, OPNUM, input) \ - auto LAYER##_##BLK##_##OPNUM##_grad = \ - op::MatMul(string(#LAYER) + string(#BLK) + string(#OPNUM) + string("grad")).set_input_x1(input); - -// (Operator& input) -#define GENERATE_RESHAPE_GRAD(LAYER, BLK, OPNUM, input) \ - auto LAYER##_##BLK##_##OPNUM##_grad = \ - op::Reshape(string(#LAYER) + string(#BLK) + string(#OPNUM) + string("grad")).set_input_tensor(input); - -// (Operator& input_grad, Operator& input_maxpool) -#define GENERATE_MAXPOOL_GRAD(LAYER, BLK, OPNUM, input_grad, input_maxpool) \ - auto LAYER##_##BLK##_##OPNUM##_grad = \ - op::MaxPoolGradWithArgmax(string(#LAYER) + string(#BLK) + string(#OPNUM) + string("grad")) \ - .set_input_x(LAYER##_##BLK##_##OPNUM##_input, "y") \ - .set_input_grad(input_grad) \ - .set_input_argmax(input_maxpool, input_maxpool.name_out_argmax()) \ - .set_attr_ksize({1, 1, 3, 3}) \ - .set_attr_strides({1, 1, 2, 2}) \ - .set_attr_padding("SAME"); - -// (Operator& input_dy) -#define GENERATE_RELU_GRAD(LAYER, BLK, OPNUM, input_dy, dy_label) \ - auto LAYER##_##BLK##_##OPNUM##_grad = op::ReluGrad(string(#LAYER) + string(#BLK) + string(#OPNUM) + string("grad")) \ - .set_input_gradients(input_dy, dy_label) \ - .set_input_features(LAYER##_##BLK##_##OPNUM, "y"); - -// (Operator& input_dy) -#define GENERATE_BN_GRAD(LAYER, BLK, OPNUM, input_dy) \ - auto LAYER##_##BLK##_##OPNUM##_grad = \ - op::FusedBatchNormGrad(string(#LAYER) + string(#BLK) + string(#OPNUM) + string("grad")) \ - .set_input_dy(input_dy, "backprops") \ - .set_input_x(LAYER##_##BLK##_##OPNUM##_input, "y") \ - .set_input_scale(LAYER##_##BLK##_##OPNUM##_scale) \ - .set_input_save_mean(LAYER##_##BLK##_##OPNUM, "save_mean") \ - .set_input_save_inv_variance(LAYER##_##BLK##_##OPNUM, "save_inv_variance") \ - .set_attr_epsilon(0.0001); \ - \ - auto LAYER##_##BLK##_##OPNUM##_momentum_scale = \ - op::ApplyMomentum() \ - .set_input_accum(LAYER##_##BLK##_##OPNUM##_mom_scale) \ - .set_input_grad(LAYER##_##BLK##_##OPNUM##_grad, LAYER##_##BLK##_##OPNUM##_grad.name_out_bn_scale()) \ - .set_input_lr(label1) \ - .set_input_momentum(label1) \ - .set_input_var(LAYER##_##BLK##_##OPNUM##_scale); \ - \ - auto LAYER##_##BLK##_##OPNUM##_momentum_b = \ - op::ApplyMomentum() \ - .set_input_accum(LAYER##_##BLK##_##OPNUM##_mom_b) \ - .set_input_grad(LAYER##_##BLK##_##OPNUM##_grad, LAYER##_##BLK##_##OPNUM##_grad.name_out_bn_bias()) \ - .set_input_lr(label1) \ - .set_input_momentum(label1) \ - .set_input_var(LAYER##_##BLK##_##OPNUM##_b); - -// (Operator& input) -#define GENERATE_CONV_PROP_FILTER(LAYER, BLK, OPNUM, input_bngrad, stride) \ - auto LAYER##_##BLK##_##OPNUM##_propfilter = \ - op::Conv2DBackpropFilterD(string(#LAYER) + string(#BLK) + string(#OPNUM) + string("_propfilter")) \ - .set_input_x(LAYER##_##BLK##_##OPNUM##_input, "y") \ - .set_attr_filter_size(LAYER##_##BLK##_##OPNUM##_desc.GetShape().GetDims()) \ - .set_input_out_backprop(input_bngrad, input_bngrad.name_out_dx()) \ - .set_attr_strides(stride) \ - .set_attr_pads({1, 1, 1, 1}); \ - \ - update_op_format(LAYER##_##BLK##_##OPNUM##_propfilter); \ - auto LAYER##_##BLK##_##OPNUM##_momentum_weight = op::ApplyMomentum() \ - .set_input_accum(LAYER##_##BLK##_##OPNUM##_mom_weight) \ - .set_input_grad(LAYER##_##BLK##_##OPNUM##_propfilter) \ - .set_input_lr(label1) \ - .set_input_momentum(label1) \ - .set_input_var(LAYER##_##BLK##_##OPNUM##_weight); - -///.set_attr_input_size({input_bngrad.name_out_dx().GetOutputDesc().GetShape().GetDim(0),LAYER##_##BLK##_##OPNUM##_weight.GetOutputDesc().GetShape().GetDim(1), -///input_bngrad.name_out_dx().GetOutputDesc().GetShape().GetDim(2)*stride[2], -///input_bngrad.name_out_dx().GetOutputDesc().GetShape().GetDim(3)*stride[3]}) -#define GENERATE_CONV_PROP_INPUT(LAYER, BLK, OPNUM, input_bngrad, stride) \ - auto LAYER##_##BLK##_##OPNUM##_propinput = \ - op::Conv2DBackpropInputD(string(#LAYER) + string(#BLK) + string(#OPNUM) + string("_propinput")) \ - .set_attr_input_size(LAYER##_##BLK##_##OPNUM##_input.GetOutputDesc("y").GetShape().GetDims()) \ - .set_input_filter(LAYER##_##BLK##_##OPNUM##_weight) \ - .set_input_out_backprop(input_bngrad, input_bngrad.name_out_dx()) \ - .set_attr_strides(stride) \ - .set_attr_pads({1, 1, 1, 1}); \ - cout << string(#LAYER) + string(#BLK) + string(#OPNUM) + "_propinput" \ - << "'s input_x op's shape is:" << input_bngrad.GetOutputDesc("dx").GetShape().GetDim(3) * stride[3] << endl; \ - cout << string(#LAYER) + string(#BLK) + string(#OPNUM) + "_propinput" \ - << "'s input_x op's shape is:" << input_bngrad.GetOutputDesc("dx").GetShape().GetDim(2) * stride[2] << endl; \ - \ - update_op_format(LAYER##_##BLK##_##OPNUM##_propinput); \ - auto &LAYER##_##BLK##_##OPNUM##_propinput_label = "y" - -// (int out_channels, Operator& input) -#define GENERATE_ADD_GRAD(LAYER, BLK, OPNUM, input_x1, input_x1_label, input_x2, input_x2_label) \ - auto LAYER##_##BLK##_##OPNUM##_grad = op::Add(string(#LAYER) + string(#BLK) + string(#OPNUM) + string("grad")) \ - .set_input_x1(input_x1, input_x1_label) \ - .set_input_x2(input_x2, input_x2_label); - -// (Operator& input) -#define MAKE_RESIDUAL_BLOCK_GRAD(LAYER, BLK, input_dy, dy_label) \ - GENERATE_RELU_GRAD(LAYER, BLK, relu5, input_dy, dy_label); \ - \ - GENERATE_BN_GRAD(LAYER, BLK, bn4, LAYER##_##BLK##_relu5_grad); \ - GENERATE_CONV_PROP_FILTER(LAYER, BLK, conv4, LAYER##_##BLK##_bn4_grad, LAYER##_##BLK##_stride); \ - GENERATE_CONV_PROP_INPUT(LAYER, BLK, conv4, LAYER##_##BLK##_bn4_grad, LAYER##_##BLK##_stride); \ - \ - GENERATE_BN_GRAD(LAYER, BLK, bn3, LAYER##_##BLK##_relu5_grad); \ - GENERATE_CONV_PROP_FILTER(LAYER, BLK, conv3, LAYER##_##BLK##_bn3_grad, stride_1); \ - GENERATE_CONV_PROP_INPUT(LAYER, BLK, conv3, LAYER##_##BLK##_bn3_grad, stride_1); \ - \ - GENERATE_RELU_GRAD(LAYER, BLK, relu2, LAYER##_##BLK##_conv3_propinput, "y"); \ - GENERATE_BN_GRAD(LAYER, BLK, bn2, LAYER##_##BLK##_relu2_grad); \ - GENERATE_CONV_PROP_FILTER(LAYER, BLK, conv2, LAYER##_##BLK##_bn2_grad, stride_1); \ - GENERATE_CONV_PROP_INPUT(LAYER, BLK, conv2, LAYER##_##BLK##_bn2_grad, stride_1); \ - \ - GENERATE_RELU_GRAD(LAYER, BLK, relu1, LAYER##_##BLK##_conv2_propinput, "y"); \ - GENERATE_BN_GRAD(LAYER, BLK, bn1, LAYER##_##BLK##_relu1_grad); \ - GENERATE_CONV_PROP_FILTER(LAYER, BLK, conv1, LAYER##_##BLK##_bn1_grad, LAYER##_##BLK##_stride); \ - GENERATE_CONV_PROP_INPUT(LAYER, BLK, conv1, LAYER##_##BLK##_bn1_grad, LAYER##_##BLK##_stride); \ - \ - GENERATE_ADD_GRAD(LAYER, BLK, add5, LAYER##_##BLK##_conv1_propinput, LAYER##_##BLK##_conv1_propinput_label, \ - LAYER##_##BLK##_conv4_propinput, LAYER##_##BLK##_conv4_propinput_label); \ - \ - auto &LAYER##_##BLK##_grad_output = LAYER##_##BLK##_add5_grad; \ - auto &LAYER##_##BLK##_grad_output_label = "y" - -// (Operator& input) -#define MAKE_NORMAL_BLOCK_GRAD(LAYER, BLK, input_dy, dy_label) \ - GENERATE_RELU_GRAD(LAYER, BLK, relu5, input_dy, dy_label); \ - \ - GENERATE_BN_GRAD(LAYER, BLK, bn3, LAYER##_##BLK##_relu5_grad); \ - GENERATE_CONV_PROP_FILTER(LAYER, BLK, conv3, LAYER##_##BLK##_bn3_grad, stride_1); \ - GENERATE_CONV_PROP_INPUT(LAYER, BLK, conv3, LAYER##_##BLK##_bn3_grad, stride_1); \ - \ - GENERATE_RELU_GRAD(LAYER, BLK, relu2, LAYER##_##BLK##_conv3_propinput, "y"); \ - GENERATE_BN_GRAD(LAYER, BLK, bn2, LAYER##_##BLK##_relu2_grad); \ - GENERATE_CONV_PROP_FILTER(LAYER, BLK, conv2, LAYER##_##BLK##_bn2_grad, stride_1); \ - GENERATE_CONV_PROP_INPUT(LAYER, BLK, conv2, LAYER##_##BLK##_bn2_grad, stride_1); \ - \ - GENERATE_RELU_GRAD(LAYER, BLK, relu1, LAYER##_##BLK##_conv2_propinput, "y"); \ - GENERATE_BN_GRAD(LAYER, BLK, bn1, LAYER##_##BLK##_relu1_grad); \ - GENERATE_CONV_PROP_FILTER(LAYER, BLK, conv1, LAYER##_##BLK##_bn1_grad, LAYER##_##BLK##_stride); \ - GENERATE_CONV_PROP_INPUT(LAYER, BLK, conv1, LAYER##_##BLK##_bn1_grad, LAYER##_##BLK##_stride); \ - \ - GENERATE_ADD_GRAD(LAYER, BLK, add5, LAYER##_##BLK##_conv1_propinput, LAYER##_##BLK##_conv1_propinput_label, \ - input_dy, dy_label); \ - \ - auto &LAYER##_##BLK##_grad_output = LAYER##_##BLK##_add5_grad; \ - auto &LAYER##_##BLK##_grad_output_label = "y" - -// (Operator& input_dy) -#define MAKE_RESIDUAL_LAYER_GRAD(LAYER, input_dy, dy_label) \ - MAKE_RESIDUAL_BLOCK_GRAD(LAYER, blk1, input_dy, dy_label); \ - \ - auto &LAYER##_grad_output = LAYER##_blk1_grad_output; \ - auto &LAYER##_grad_output_label = LAYER##_blk1_grad_output_label; - -// (Operator& input_dy) -#define MAKE_NORMAL_LAYER_GRAD(LAYER, input_dy, dy_label) \ - MAKE_NORMAL_BLOCK_GRAD(LAYER, blk1, input_dy, dy_label); \ - \ - auto &LAYER##_grad_output = LAYER##_blk1_grad_output; \ - auto &LAYER##_grad_output_label = LAYER##_blk1_grad_output_label; - -#define MAKE_RESNET50_GRAD(input_dy, dy_label) \ - MAKE_NORMAL_LAYER_GRAD(layer16, input_dy, dy_label) \ - MAKE_NORMAL_LAYER_GRAD(layer15, layer16_grad_output, layer16_grad_output_label) \ - MAKE_RESIDUAL_LAYER_GRAD(layer14, layer15_grad_output, layer15_grad_output_label) \ - MAKE_NORMAL_LAYER_GRAD(layer13, layer14_grad_output, layer14_grad_output_label) \ - MAKE_NORMAL_LAYER_GRAD(layer12, layer13_grad_output, layer13_grad_output_label) \ - MAKE_NORMAL_LAYER_GRAD(layer11, layer12_grad_output, layer12_grad_output_label) \ - MAKE_NORMAL_LAYER_GRAD(layer10, layer11_grad_output, layer11_grad_output_label) \ - MAKE_NORMAL_LAYER_GRAD(layer9, layer10_grad_output, layer10_grad_output_label) \ - MAKE_RESIDUAL_LAYER_GRAD(layer8, layer9_grad_output, layer9_grad_output_label) \ - MAKE_NORMAL_LAYER_GRAD(layer7, layer8_grad_output, layer8_grad_output_label) \ - MAKE_NORMAL_LAYER_GRAD(layer6, layer7_grad_output, layer7_grad_output_label) \ - MAKE_NORMAL_LAYER_GRAD(layer5, layer6_grad_output, layer6_grad_output_label) \ - MAKE_RESIDUAL_LAYER_GRAD(layer4, layer5_grad_output, layer5_grad_output_label) \ - MAKE_NORMAL_LAYER_GRAD(layer3, layer4_grad_output, layer4_grad_output_label) \ - MAKE_NORMAL_LAYER_GRAD(layer2, layer3_grad_output, layer3_grad_output_label) \ - MAKE_RESIDUAL_LAYER_GRAD(layer1, layer2_grad_output, layer2_grad_output_label) \ - \ - auto &resnet50_grad_output = layer1_grad_output; \ - auto &resnet50_grad_output_label = layer1_grad_output_label; - -bool resnet50(Graph &graph) { - auto data = op::Data().set_attr_index(0); - auto data1 = op::Data().set_attr_index(1); - TensorDesc shape_desc(ge::Shape({32, 3, 224, 224}), FORMAT_NCHW, DT_FLOAT); - data.update_output_desc_y(shape_desc); - - TensorDesc desc(ge::Shape({64, 3, 7, 7}), FORMAT_NCHW, DT_FLOAT); - - auto var = op::Variable("conv2d_var"); - var.update_output_desc_y(desc); - var.update_input_desc_x(desc); - - auto varw1 = op::Variable("conv2d_varw1"); - varw1.update_output_desc_y(desc); - - auto conv2d = op::Conv2D("Translate") - .set_input_x(data) - .set_input_filter(var) - .set_attr_strides({1, 1, 2, 2}) - .set_attr_pads({2, 3, 2, 3}) - .set_attr_data_format("NCHW"); - TensorDesc desc_y; - desc_y.SetFormat(FORMAT_NCHW); // shape: 32 64 112 112 - conv2d.update_output_desc_y(desc_y); - - TensorDesc desc1(ge::Shape({1, 64, 1, 1}), FORMAT_NCHW, DT_FLOAT); - auto var1 = op::Variable("bn_var1"); - var1.update_output_desc_y(desc1); - - auto var2 = op::Variable("bn_var2"); - var2.update_output_desc_y(desc1); - - auto var3 = op::Variable("bn_var3"); - var3.update_output_desc_y(desc1); - - auto var4 = op::Variable("bn_var4"); - var4.update_output_desc_y(desc1); - - TensorDesc desc2(ge::Shape({2048, 1001}), FORMAT_NCHW, DT_FLOAT); - - auto var5 = op::Variable("var5"); - var5.update_output_desc_y(desc2); - - auto var6 = op::Variable("var6"); - var6.update_output_desc_y(desc2); - - TensorDesc desclabel(ge::Shape({1, 1001, 1, 1}), FORMAT_NCHW, DT_FLOAT); - - auto label1 = op::Variable("label1"); - label1.update_output_desc_y(desclabel); - - TensorDesc descmatlabel(ge::Shape({1, 1001, 1, 1}), FORMAT_NCHW, DT_FLOAT); - auto matvar = op::Variable("matvar"); - matvar.update_output_desc_y(descmatlabel); - - auto matvar1 = op::Variable("matvar1"); - matvar1.update_output_desc_y(descmatlabel); - - auto bn = op::FusedBatchNorm() - .set_input_x(conv2d, "y") - .set_input_scale(var1) - .set_input_b(var2) - .set_input_mean(var3) - .set_input_variance(var4) - .set_attr_mode(1) - .set_attr_epsilon(1e-5) - .set_attr_is_training(true) - .set_attr_is_training_fusion(true) - .set_attr_moving_average_fraction(994352128); - - auto relu = op::Relu().set_input_x(bn, "y"); - - auto maxpool = op::MaxPoolWithArgmax() - .set_input_x(relu, "y") - .set_attr_ksize({1, 3, 3, 1}) - .set_attr_padding("SAME") - .set_attr_strides({1, 2, 2, 1}); - - MAKE_RESNET50(maxpool); - std::vector inputs{data}; //,var,var1,layer1_blk1_bn1_b,var3,var4}; - std::vector outputs{}; - - graph.SetInputs(inputs).SetOutputs(outputs); - return true; -} - -#define GENERATE_CONSTANT_USE_DESC(OPNUM, desc, val) \ - uint32_t OPNUM##_size = desc.GetShape().GetShapeSize(); \ - Tensor OPNUM##_tensor; \ - OPNUM##_tensor.SetTensorDesc(desc); \ - if (desc.GetDataType() == DT_FLOAT) { \ - float *OPNUM##_data = new float[OPNUM##_size]; \ - for (int i = 0; i < (int)OPNUM##_size; i++) { \ - *(OPNUM##_data + i) = val; \ - } \ - OPNUM##_tensor.SetData((uint8_t *)OPNUM##_data, OPNUM##_size * sizeof(float)); \ - delete[] OPNUM##_data; \ - } \ - if (desc.GetDataType() == DT_INT64) { \ - int64_t *OPNUM##_data = new int64_t[OPNUM##_size]; \ - for (int i = 0; i < (int)OPNUM##_size; i++) { \ - *(OPNUM##_data + i) = val; \ - } \ - OPNUM##_tensor.SetData((uint8_t *)OPNUM##_data, OPNUM##_size * sizeof(int64_t)); \ - delete[] OPNUM##_data; \ - } \ - auto OPNUM##_constant = op::Constant().set_attr_value(OPNUM##_tensor); \ - OPNUM##_constant.update_output_desc_y(desc); - -#define GENERATE_VAR_LAYER(OPNUM, desc, input) \ - auto OPNUM##_weight = op::Variable(string(#OPNUM)); \ - OPNUM##_weight.update_output_desc_y(desc); \ - auto OPNUM##_assign = op::Assign().set_input_ref(OPNUM##_weight).set_input_value(OPNUM##_constant); \ - \ - input.push_back(OPNUM##_weight); - -#define GENERATE_VAR_LAYER_1(OPNUM, desc, var_format, input, name) \ - auto OPNUM##_weight = op::Variable(string(name)); \ - OPNUM##_weight.update_output_desc_y(desc); \ - auto OPNUM##_assign = op::Assign().set_input_ref(OPNUM##_weight).set_input_value(OPNUM##_constant); \ - \ - input.push_back(OPNUM##_weight); - -int BuildInitVarGraph(Graph &graph) { - std::vector inputs{}; - std::vector outputs{}; - - TensorDesc desc(ge::Shape({64, 3, 7, 7}), FORMAT_NCHW, DT_FLOAT); - GENERATE_CONSTANT_USE_DESC(conv2d_var, desc, 0.01); - GENERATE_VAR_LAYER(conv2d_var, desc, inputs); - - GENERATE_CONSTANT_USE_DESC(conv2d_varw1, desc, 0.01); - GENERATE_VAR_LAYER(conv2d_varw1, desc, inputs); - - TensorDesc desc1(ge::Shape({1, 64, 1, 1}), FORMAT_NCHW, DT_FLOAT); - GENERATE_CONSTANT_USE_DESC(bn_var1, desc1, 0.01); - GENERATE_VAR_LAYER(bn_var1, desc1, inputs); - GENERATE_CONSTANT_USE_DESC(bn_var2, desc1, 0.01); - GENERATE_VAR_LAYER(bn_var2, desc1, inputs); - GENERATE_CONSTANT_USE_DESC(bn_var3, desc1, 0.01); - GENERATE_VAR_LAYER(bn_var3, desc1, inputs); - GENERATE_CONSTANT_USE_DESC(bn_var4, desc1, 0.01); - GENERATE_VAR_LAYER(bn_var4, desc1, inputs); - - TensorDesc desc2(ge::Shape({2048, 1001}), FORMAT_NCHW, DT_FLOAT); - GENERATE_CONSTANT_USE_DESC(var5, desc2, 0.01); - GENERATE_VAR_LAYER(var5, desc2, inputs); - GENERATE_CONSTANT_USE_DESC(var6, desc2, 0.01); - GENERATE_VAR_LAYER(var6, desc2, inputs); - - TensorDesc desclabel(ge::Shape({1, 1001, 1, 1}), FORMAT_NCHW, DT_FLOAT); - GENERATE_CONSTANT_USE_DESC(label1, desclabel, 0.1); - GENERATE_VAR_LAYER(label1, desclabel, inputs); - - TensorDesc descmatlabel(ge::Shape({1, 1001, 1, 1}), FORMAT_NCHW, DT_FLOAT); - GENERATE_CONSTANT_USE_DESC(matvar, descmatlabel, 0.01); - GENERATE_VAR_LAYER(matvar, descmatlabel, inputs); - GENERATE_CONSTANT_USE_DESC(matvar1, descmatlabel, 0.01); - GENERATE_VAR_LAYER(matvar1, descmatlabel, inputs); - - MAKE_RESNET50_VAR(inputs); - - TensorDesc ctrl(ge::Shape({1, 1, 1, 1}), FORMAT_NCHW, DT_INT64); - - GENERATE_CONSTANT_USE_DESC(iterations_per_loop, ctrl, 100); - GENERATE_VAR_LAYER_1(iterations_per_loop, ctrl, "4D", inputs, "npu_runconfig/iterations_per_loop"); - GENERATE_CONSTANT_USE_DESC(loop_cond, ctrl, 0); - GENERATE_VAR_LAYER_1(loop_cond, ctrl, "4D", inputs, "npu_runconfig/loop_cond"); - GENERATE_CONSTANT_USE_DESC(one, ctrl, 1); - GENERATE_VAR_LAYER_1(one, ctrl, "4D", inputs, "npu_runconfig/one"); - GENERATE_CONSTANT_USE_DESC(zero, ctrl, 0); - GENERATE_VAR_LAYER_1(zero, ctrl, "4D", inputs, "npu_runconfig/zero"); - - graph.SetInputs(inputs).SetOutputs(outputs); - return 0; -} -int TestBuildGraphTest(Func fun, Graph &graph, vector &inputs, vector &outputs) { - bool graph_ret = fun(graph); - ge::Tensor shapeTensor; - TensorDesc shape_desc(ge::Shape({32, 3, 224, 224}), FORMAT_NCHW, DT_FLOAT); - uint32_t sizeshape = shape_desc.GetShape().GetShapeSize(); - printf("[test] desc size filter shape:%u\n", sizeshape); - shapeTensor.SetTensorDesc(shape_desc); - vector dataValuec; - for (int i = 0; i < sizeshape; i++) { - dataValuec.push_back(1); - } - - shapeTensor.SetData((uint8_t *)dataValuec.data(), 4 * sizeshape); - inputs.push_back(shapeTensor); - - ge::Tensor shapeTensor1; - TensorDesc shape_desc1(ge::Shape({1, 32, 1, 1}), FORMAT_NCHW, DT_FLOAT); - uint32_t sizeshape1 = shape_desc1.GetShape().GetShapeSize(); - printf("[test] desc size filter shape:%u\n", sizeshape1); - shapeTensor1.SetTensorDesc(shape_desc1); - vector dataValuec1; - for (int i = 0; i < sizeshape1; i++) { - dataValuec1.push_back(1); - } - - shapeTensor1.SetData((uint8_t *)dataValuec1.data(), 4 * sizeshape1); - - return 0; -} -int runTrainGraph(Func fun, int loopCount) { - printf("GE BBIT begin...\n"); - std::chrono::system_clock::time_point start = std::chrono::system_clock::now(); - - std::map ge_options = { - {"device_id", "0"}, {"rank_table_file", ""}, {"graphType", "1"}, {"ge.graphRunMode", "2"}}; - - std::map session_options = {{"a", "b"}, {TRAIN_FLAG, "1"}}; - - ge::Status ret; - - // init ge - ret = GEInitialize_api_new("train", "fe,plugin"); - printf("ge::GEInitialize ret:%d\n", ret); - - // init session - ge::Session session(session_options); - - int graphId_initvar = 1; - ge::Graph graph_initvar("initVarGraph"); - bool graph_ret = BuildInitVarGraph(graph_initvar); - - // session addgraph - int graphId = 0; - - // build graph - ge::Graph graph("bigGraph"); - std::vector inputs; - ge::Tensor outputTensor; - std::vector outputs; - graph_ret = TestBuildGraphTest(fun, graph, inputs, outputs); - printf("TestReluGrad ret:%d\n", graph_ret); - - ret = session.AddGraph(graphId_initvar, graph_initvar); - printf("session.AddVarGraph ret:%d\n", ret); - if (ret) return ret; - - ret = session.AddGraph(graphId, graph); - printf("session.AddGraph ret:%d\n", ret); - if (ret) return ret; - - std::vector inputs1; - std::vector outputs1; - ret = session.RunGraph(graphId_initvar, inputs1, outputs1); - - if (ret != SUCCESS) { - return ret; - } - // add loop for test of stabilty: - for (int i = 0; i < loopCount; i++) { - // session rungraph - printf("loopCount:%d\n", loopCount); - ret = session.RunGraph(graphId, inputs, outputs); - printf("session.RunGraph ret:%d\n", ret); - if (ret) return ret; - - // define 99999 as loop forever - if (loopCount == 99999) i = 0; - } - std::chrono::system_clock::time_point end = std::chrono::system_clock::now(); - auto millisecondsduration = std::chrono::duration_cast(end - start); - auto ms = millisecondsduration.count(); - std::stringstream ss; - ss << ms << "ms"; - std::string run_time = ss.str(); - printf("run time is : %s \n", run_time.c_str()); - - return 0; -} - -int main(int argc, char *argv[]) { - // add loop for test of stabilty: - int loopCount = 1; - if (argc >= 2) loopCount = atoi(argv[1]); - - Status ret = SUCCESS; - ret = runTrainGraph(resnet50, loopCount); - if (ret == SUCCESS) { - std::cout << "[train resnet50 success]" << std::endl; - } else { - std::cout << "!!! train resnet50 fail !!!" << std::endl; - } - return ret; -} diff --git a/tests/st/test_ge_st.py b/tests/st/test_ge_st.py deleted file mode 100644 index b5479cfc..00000000 --- a/tests/st/test_ge_st.py +++ /dev/null @@ -1,56 +0,0 @@ -# Copyright 2019-2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ - -""" -ge st test. -""" -import pytest -import subprocess -import os - -@pytest.mark.level0 -@pytest.mark.platform_arm_ascend_training -@pytest.mark.platform_x86_ascend_training -@pytest.mark.env_card -@pytest.mark.component_ge -def test_resnet50_train(): - ge_st_dir=os.environ.get('GE_ST_DIR', - '/home/jenkins/workspace/release_pkg/gate/graphengine_lib') - ge_lib_dir=os.environ.get('GRAPHENGINE_LIB', '/home/jenkins/workspace/release_pkg/gate/graphengine_lib') - - real_pythonpath=os.environ.get('REAL_PYTHONPATH') - pythonpath=os.environ.get('PYTHONPATH') - if real_pythonpath: - if pythonpath: - os.environ['PYTHONPATH']=real_pythonpath+':'+pythonpath - else: - os.environ['PYTHONPATH']=real_pythonpath - print('PYTHONPATH: '+os.environ.get('PYTHONPATH')) - - os.environ['ASCEND_OPP_PATH']='/usr/local/Ascend/opp' - os.environ['ASCEND_ENGINE_PATH']='/usr/local/Ascend/fwkacllib/lib64/plugin/opskernel/libaicpu_engine.so:' \ - '/usr/local/Ascend/fwkacllib/lib64/plugin/opskernel/libfe.so:' \ - '/usr/local/Ascend/fwkacllib/lib64/plugin/opskernel/librts_engine.so:'+ \ - ge_lib_dir + '/libge_local_engine.so' - print('ASCEND_OPP_PATH: '+os.environ.get('ASCEND_OPP_PATH')) - print('ASCEND_ENGINE_PATH: '+os.environ.get('ASCEND_ENGINE_PATH')) - print('LD_LIBRARY_PATH: '+os.environ.get('LD_LIBRARY_PATH')) - - cmd=ge_st_dir + '/st_resnet50_train' - print('cmd: '+cmd) - os.environ['SLOG_PRINT_TO_STDOUT']="1" - ret=subprocess.call([cmd], shell=True) - assert ret==0 - diff --git a/tests/ut/ge/graph/build/logical_stream_allocator_unittest.cc b/tests/ut/ge/graph/build/logical_stream_allocator_unittest.cc index 68416409..5b87939f 100644 --- a/tests/ut/ge/graph/build/logical_stream_allocator_unittest.cc +++ b/tests/ut/ge/graph/build/logical_stream_allocator_unittest.cc @@ -306,8 +306,8 @@ class UtestLogicalStreamAllocator : public testing::Test { max_parallel_num["aicpu"] = parallel_num; Status status = AssignLogicalStreams({const1, const2, get_next, genmask1, genmask2, domask, subgraph4, subgraph5, - subgraph6, allreduce1, allreduce2, apply1, apply2}, - confs, max_parallel_num); + subgraph6, allreduce1, allreduce2, apply1, apply2}, + confs, max_parallel_num); EXPECT_EQ(status, ge::SUCCESS); EXPECT_EQ(GetStream(get_next), 0); diff --git a/tests/ut/ge/graph/load/new_op_test_utils.h b/tests/ut/ge/graph/load/new_op_test_utils.h index 325a3f1f..4cbc78ac 100644 --- a/tests/ut/ge/graph/load/new_op_test_utils.h +++ b/tests/ut/ge/graph/load/new_op_test_utils.h @@ -154,7 +154,7 @@ class OmeTestOpUtils { if (model->HasAttr(MODEL_ATTR_TASKS)) { ge::Buffer task_buffer; GE_CHK_BOOL_RET_STATUS(ge::AttrUtils::GetZeroCopyBytes(model, MODEL_ATTR_TASKS, task_buffer), FAILED, - "Get bytes failed."); + "Get bytes failed."); std::shared_ptr task = ge::MakeShared(); GE_CHECK_NOTNULL(task); GE_IF_BOOL_EXEC(task_buffer.GetData() == nullptr, GELOGE(FAILED, "Get data fail"); return FAILED); diff --git a/third_party/fwkacllib/inc/aicpu/aicpu_schedule/aicpu_op_type_list.h b/third_party/fwkacllib/inc/aicpu/aicpu_schedule/aicpu_op_type_list.h new file mode 100644 index 00000000..7e0f94a8 --- /dev/null +++ b/third_party/fwkacllib/inc/aicpu/aicpu_schedule/aicpu_op_type_list.h @@ -0,0 +1,60 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef AICPU_OP_TYPE_LIST_H_ +#define AICPU_OP_TYPE_LIST_H_ + +enum OpKernelType { + TF_KERNEL, + CPU_KERNEL +}; + +enum ReturnCode { + OP_TYPE_NOT_SUPPORT, + FORMAT_NOT_SUPPORT, + DTYPE_NOT_SUPPORT +}; + +#pragma pack(push, 1) +//One byte alignment +struct SysOpInfo { + uint64_t opLen; + uint64_t opType; + OpKernelType kernelsType; +}; + +struct OpParamInfo { + uint64_t num; + uint64_t dtypeList; + uint64_t formatList; +}; + +struct SysOpCheckInfo { + uint64_t opListNum; + uint64_t offSetLen; + uint64_t sysOpInfoList; + uint64_t opParamInfoList; +}; + +struct SysOpCheckResp { + uint64_t opListNum; + bool isWithoutJson; + uint64_t returnCodeList; + uint64_t sysOpInfoList; + uint64_t opParamInfoList; +}; +#pragma pack(pop) +#endif // AICPU_OP_TYPE_LIST_H_ diff --git a/third_party/fwkacllib/inc/cce/aicpu_engine_struct.h b/third_party/fwkacllib/inc/cce/aicpu_engine_struct.h index a5f43be9..8c0c1847 100644 --- a/third_party/fwkacllib/inc/cce/aicpu_engine_struct.h +++ b/third_party/fwkacllib/inc/cce/aicpu_engine_struct.h @@ -33,18 +33,22 @@ typedef enum { FMK_KERNEL_TYPE_RESERVED } FwkkernelType_t; +#pragma pack(push, 1) typedef struct { uint32_t fwkKernelType; // FwkkernelType_t union { ::aicpu::FWKAdapter::FWKOperateParam fwk_kernel; } fwkKernelBase; -} __attribute__((packed)) STR_FWK_OP_KERNEL; +} STR_FWK_OP_KERNEL; +#pragma pack(pop) +#pragma pack(push, 1) struct SessionInfo { uint64_t sessionId; uint64_t kernelId; bool sessFlag; -} __attribute__((packed)); +}; +#pragma pack(pop) #ifdef __cplusplus } diff --git a/third_party/fwkacllib/inc/cce/fwk_adpt_struct.h b/third_party/fwkacllib/inc/cce/fwk_adpt_struct.h index 79d94023..50b39d91 100644 --- a/third_party/fwkacllib/inc/cce/fwk_adpt_struct.h +++ b/third_party/fwkacllib/inc/cce/fwk_adpt_struct.h @@ -70,6 +70,7 @@ enum FWKExtUpdateAddrType { FWK_ADPT_UPDATE_INPUT_OUTPUT }; +#pragma pack(push, 1) // API Parameter Structure struct StrFWKKernel { FWKOperateType opType; @@ -89,31 +90,39 @@ struct StrFWKKernel { uint64_t extInfoLen; // extend info total length uint64_t extInfoAddr; // extend info addr, ExtInfo structure -} __attribute__((packed)); +}; +#pragma pack(pop) typedef StrFWKKernel FWKOperateParam; // Extent info ShapeAndType const uint32_t kMaxShapeDims = 8; +#pragma pack(push, 1) struct ShapeAndType { int32_t type; int64_t dims[kMaxShapeDims]; -} __attribute__((packed)); +}; +#pragma pack(pop) // Extend info structure for extInfoAddr const uint32_t kExtInfoHeadSize = 8; + +#pragma pack(push, 1) struct ExtInfo { int32_t infoType; // extend type uint32_t infoLen; // length for infoMsg char infoMsg[0]; // extend value -} __attribute__((packed)); +}; +#pragma pack(pop) +#pragma pack(push, 1) struct ResultSummary { uint64_t shape_data_ptr; // shape data addr, need convert to void* uint64_t shape_data_size; // num of dims uint64_t raw_data_ptr; // raw data addr, need convert to void* uint64_t raw_data_size; // size of raw data -} __attribute__((packed)); +}; +#pragma pack(pop) } // end namespace FWKAdapter } // namespace aicpu diff --git a/third_party/fwkacllib/inc/hccl/base.h b/third_party/fwkacllib/inc/hccl/base.h index 8194097e..9facd20c 100644 --- a/third_party/fwkacllib/inc/hccl/base.h +++ b/third_party/fwkacllib/inc/hccl/base.h @@ -22,7 +22,8 @@ #ifndef HCCL_BASE_H_ #define HCCL_BASE_H_ - +#include +#include #ifdef __cplusplus extern "C" { #endif // __cplusplus @@ -95,6 +96,33 @@ typedef void *rtStream_t; */ typedef void *rtModel_t; +struct HcomOperation { + std::string hcclType; + void *inputPtr; + void *outputPtr; + u64 count; + HcclDataType dataType; + HcclReduceOp opType; + u32 root; + + HcomOperation() + { + inputPtr = nullptr; + outputPtr = nullptr; + count = 0; + dataType = HCCL_DATA_TYPE_RESERVED; + opType = HCCL_REDUCE_RESERVED; + root = 0; + } +}; + +struct HcomRemoteAccessAddrInfo { + u32 remotetRankID; + u64 remoteAddr; // host embedding table address + u64 localAddr; // device HBM address + u64 length; // Memory Length in Bytes +}; + #ifdef __cplusplus } #endif // __cplusplus diff --git a/third_party/fwkacllib/inc/hccl/hccl_types.h b/third_party/fwkacllib/inc/hccl/hccl_types.h new file mode 100644 index 00000000..50a64795 --- /dev/null +++ b/third_party/fwkacllib/inc/hccl/hccl_types.h @@ -0,0 +1,101 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file hccl_types.h + * @brief HCCL data type definition + * + */ + +#ifndef HCCL_TYPES_H_ +#define HCCL_TYPES_H_ + +#include + +#ifdef __cplusplus +extern "C" { +#endif // __cplusplus + +/** + * @brief HCCL functions return value definition + */ +typedef enum { + HCCL_SUCCESS = 0, /**< success */ + HCCL_E_PARA = 1, /**< parameter error */ + HCCL_E_PTR = 2, /**< empty pointer */ + HCCL_E_MEMORY = 3, /**< memory error */ + HCCL_E_INTERNAL = 4, /**< internal error */ + HCCL_E_NOT_SUPPORT = 5, /**< not support feature */ + HCCL_E_NOT_FOUND = 6, /**< not found specific resource */ + HCCL_E_UNAVAIL = 7, /**< resource unavailable */ + HCCL_E_SYSCALL = 8, /**< call system interface error */ + HCCL_E_TIMEOUT = 9, /**< timeout */ + HCCL_E_OPEN_FILE_FAILURE = 10, /**< open file fail */ + HCCL_E_TCP_CONNECT = 11, /**< tcp connect fail */ + HCCL_E_ROCE_CONNECT = 12, /**< roce connect fail */ + HCCL_E_TCP_TRANSFER = 13, /**< tcp transfer fail */ + HCCL_E_ROCE_TRANSFER = 14, /**< roce transfer fail */ + HCCL_E_RUNTIME = 15, /**< call runtime api fail */ + HCCL_E_DRV = 16, /**< call driver api fail */ + HCCL_E_PROFILING = 17, /**< call profiling api fail */ + HCCL_E_CCE = 18, /**< call cce api fail */ + HCCL_E_NETWORK = 19, /**< call network api fail */ + HCCL_E_RESERVED /**< reserved */ +} HcclResult; + +/** + * @brief handle to HCCL communicator + */ +typedef void *HcclComm; + +/** + * @brief HCCL Reduction opperation + */ +typedef enum { + HCCL_REDUCE_SUM = 0, /**< sum */ + HCCL_REDUCE_PROD = 1, /**< prod */ + HCCL_REDUCE_MAX = 2, /**< max */ + HCCL_REDUCE_MIN = 3, /**< min */ + HCCL_REDUCE_RESERVED /**< reserved */ +} HcclReduceOp; + +/** + * @brief HCCL data type + */ +typedef enum { + HCCL_DATA_TYPE_INT8 = 0, /**< int8 */ + HCCL_DATA_TYPE_INT16 = 1, /**< int16 */ + HCCL_DATA_TYPE_INT32 = 2, /**< int32 */ + HCCL_DATA_TYPE_FP16 = 3, /**< fp16 */ + HCCL_DATA_TYPE_FP32 = 4, /**< fp32 */ + HCCL_DATA_TYPE_INT64 = 5, /**< int64 */ + HCCL_DATA_TYPE_UINT64 = 6, /**< uint64 */ + HCCL_DATA_TYPE_RESERVED /**< reserved */ +} HcclDataType; + +const uint32_t HCCL_ROOT_INFO_BYTES = 4108; // 4108: root info length + +/** + * @brief HCCL root info + */ +typedef struct HcclRootInfoDef { + char internal[HCCL_ROOT_INFO_BYTES]; +} HcclRootInfo; + +#ifdef __cplusplus +} +#endif // __cplusplus +#endif // HCCL_TYPES_H_ diff --git a/third_party/fwkacllib/inc/hccl/hcom.h b/third_party/fwkacllib/inc/hccl/hcom.h index de140b4b..e491d43f 100644 --- a/third_party/fwkacllib/inc/hccl/hcom.h +++ b/third_party/fwkacllib/inc/hccl/hcom.h @@ -24,6 +24,8 @@ #include #include +#include +#include #ifdef __cplusplus extern "C" { @@ -40,6 +42,15 @@ extern "C" { */ HcclResult hcom_get_rank_size(const char *group, u32 *rankSize); +/** + * @brief Get the rank number in the group. + * + * @param group A string identifying the group name. + * @param rankSize A pointer identifying the rank number. + * @return HcclResult + */ +HcclResult HcomGetRankSize(const char *group, u32 *rankSize); + /** * @brief Get the rank number of this rank's server within the group. * @@ -49,6 +60,15 @@ HcclResult hcom_get_rank_size(const char *group, u32 *rankSize); */ HcclResult hcom_get_local_rank_size(const char *group, u32 *localRankSize); +/** + * @brief Get the rank number of this rank's server within the group. + * + * @param group A string identifying the group name. + * @param localRankSize A pointer identifying the rank number. + * @return HcclResult + */ +HcclResult HcomGetLocalRankSize(const char *group, u32 *localRankSize); + /** * @brief Get the rank id of this rank. * @@ -58,6 +78,15 @@ HcclResult hcom_get_local_rank_size(const char *group, u32 *localRankSize); */ HcclResult hcom_get_rank_id(const char *group, u32 *rankId); +/** + * @brief Get the rank id of this rank. + * + * @param group A string identifying the group name. + * @param rankId A pointer identifying the rank id. + * @return HcclResult + */ +HcclResult HcomGetRankId(const char *group, u32 *rankId); + /** * @brief Get the local rank id of this rank's server within the group. * @@ -67,6 +96,15 @@ HcclResult hcom_get_rank_id(const char *group, u32 *rankId); */ HcclResult hcom_get_local_rank_id(const char *group, u32 *localRankId); +/** + * @brief Get the local rank id of this rank's server within the group. + * + * @param group A string identifying the group name. + * @param localRankId A pointer identifying the local rank id. + * @return HcclResult + */ +HcclResult HcomGetLocalRankId(const char *group, u32 *localRankId); + /** * @brief Get the world rank id according to the group rank id. * @@ -77,6 +115,16 @@ HcclResult hcom_get_local_rank_id(const char *group, u32 *localRankId); */ HcclResult hcom_get_world_rank_from_group_rank(const char *group, u32 groupRank, u32 *worldRank); +/** + * @brief Get the world rank id according to the group rank id. + * + * @param group A string identifying the group name. + * @param groupRank An integer(u32) identifying the group rank id. + * @param worldRank A pointer identifying the world rank id. + * @return HcclResult + */ +HcclResult HcomGetWorldRankFromGroupRank(const char *group, u32 groupRank, u32 *worldRank); + /** * @brief Get the group rank id according to the world rank id. * @@ -87,6 +135,16 @@ HcclResult hcom_get_world_rank_from_group_rank(const char *group, u32 groupRank, */ HcclResult hcom_get_group_rank_from_world_rank(u32 worldRank, const char *group, u32 *groupRank); +/** + * @brief Get the group rank id according to the world rank id. + * + * @param worldRank An integer(u32) identifying the world rank id. + * @param group A string identifying the group name. + * @param groupRank A pointer identifying the group rank id. + * @return HcclResult + */ +HcclResult HcomGetGroupRankFromWorldRank(u32 worldRank, const char *group, u32 *groupRank); + /** * @brief Create group. * @@ -97,6 +155,16 @@ HcclResult hcom_get_group_rank_from_world_rank(u32 worldRank, const char *group, */ HcclResult hcom_create_group(const char *group, u32 rankNum, u32 *rankIds); +/** + * @brief Create group. + * + * @param group A string identifying the group name. + * @param rankNum An integer(u32) identifying the number of ranks in the group. + * @param rankIds A list identifying the ranks in the group. + * @return HcclResult + */ +HcclResult HcomCreateGroup(const char *group, u32 rankNum, u32 *rankIds); + /** * @brief Destroy group * @@ -105,6 +173,14 @@ HcclResult hcom_create_group(const char *group, u32 rankNum, u32 *rankIds); */ HcclResult hcom_destroy_group(const char *group); +/** + * @brief Destroy group + * + * @param group A string identifying the group name. + * @return HcclResult + */ +HcclResult HcomDestroyGroup(const char *group); + /** * @brief Set the gradient split strategy with in the group, according to gradient index. * @@ -115,6 +191,16 @@ HcclResult hcom_destroy_group(const char *group); */ extern HcclResult hcom_set_split_strategy_by_index(const char *group, u32 segmentNum, const u32 *IdxList); +/** + * @brief Set the gradient split strategy with in the group, according to gradient index. + * + * @param group A string identifying the group name. + * @param segmentNum An integer(u32) identifying the segments number of gradients. + * @param IdxList A list identifying the index of end gradient in each segment. + * @return HcclResult + */ +extern HcclResult HcomSetGradFusionByIndex(const char *group, u32 segmentNum, const u32 *IdxList); + /** * @brief Set the gradient split strategy with in the group, according to gradient data size. * @@ -125,6 +211,16 @@ extern HcclResult hcom_set_split_strategy_by_index(const char *group, u32 segmen */ extern HcclResult hcom_set_split_strategy_by_size(const char *group, u32 segmentNum, const float *sizeList); +/** + * @brief Set the gradient split strategy with in the group, according to gradient data size. + * + * @param group A string identifying the group name. + * @param segmentNum An integer(u32) identifying the segments number of gradients. + * @param sizeList A list identifying the percent of each segment. + * @return HcclResult + */ +extern HcclResult HcomSetGradFusionBySize(const char *group, u32 segmentNum, const float *sizeList); + /** * @brief Register memories and init resources for remote access. * @@ -134,6 +230,25 @@ extern HcclResult hcom_set_split_strategy_by_size(const char *group, u32 segment */ extern HcclResult hcom_remote_access_mem_register(const MemRegisterAddr* addrList, u32 count); +/** + * @brief Register memories and init resources for remote access. + * + * @param addrList memory addresses for remote access. + * @param count number of remote memory addresses. + * @return HcclResult + */ +extern HcclResult HcomRegRemoteAccessMem(const MemRegisterAddr* addrList, u32 count); + +HcclResult HcomExecInitialize(); + +HcclResult HcomExecFinalize(); + +HcclResult HcomExecEnqueueOperation(HcomOperation opInfo, std::function callback); + +HcclResult HcomExecEnqueueRemoteAccess(const std::string& remoteAccessType, + const std::vector& addrInfos, + std::function callback); + #ifdef __cplusplus } #endif // __cplusplus diff --git a/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_linux.h b/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_linux.h index c74f95ac..66638bbb 100644 --- a/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_linux.h +++ b/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_linux.h @@ -50,7 +50,7 @@ typedef int (*mmFilter)(const mmDirent *entry); typedef int (*mmFilter2)(const mmDirent2 *entry); typedef int (*mmSort)(const mmDirent **a, const mmDirent **b); typedef int (*mmSort2)(const mmDirent2 **a, const mmDirent2 **b); -typedef size_t mmSize_t; +typedef size_t mmSize_t; //lint !e410 !e1051 typedef off_t mmOfft_t; typedef pid_t mmPid_t; typedef long MM_LONG; @@ -215,6 +215,10 @@ typedef struct { #define S_IWRITE S_IWUSR #endif +#define mm_no_argument no_argument +#define mm_required_argument required_argument +#define mm_optional_argument optional_argument + #define M_FILE_RDONLY O_RDONLY #define M_FILE_WRONLY O_WRONLY #define M_FILE_RDWR O_RDWR @@ -412,8 +416,12 @@ MMPA_FUNC_VISIBILITY VOID mmClosePipe(mmPipeHandle pipe[], UINT32 pipeCount); // Poll related interface MMPA_FUNC_VISIBILITY mmCompletionHandle mmCreateCompletionPort(); MMPA_FUNC_VISIBILITY VOID mmCloseCompletionPort(mmCompletionHandle handle); -MMPA_FUNC_VISIBILITY INT32 mmPoll(mmPollfd *fds, INT32 fdCount, INT32 timeout, mmCompletionHandle handleIOCP, - pmmPollData polledData, mmPollBack pollBack); +MMPA_FUNC_VISIBILITY INT32 mmPoll(mmPollfd *fds, + INT32 fdCount, + INT32 timeout, + mmCompletionHandle handleIOCP, + pmmPollData polledData, + mmPollBack pollBack); MMPA_FUNC_VISIBILITY INT32 mmGetErrorCode(); MMPA_FUNC_VISIBILITY CHAR *mmGetErrorFormatMessage(mmErrorMsg errnum, CHAR *buf, mmSize size); MMPA_FUNC_VISIBILITY INT32 mmGetTimeOfDay(mmTimeval *timeVal, mmTimezone *timeZone); diff --git a/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_win.h b/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_win.h index a5a22b4f..aa58e722 100644 --- a/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_win.h +++ b/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_win.h @@ -237,6 +237,11 @@ typedef struct { } mmThreadAttr; typedef VOID (*mmPf)(VOID); + +#define mm_no_argument 0 +#define mm_required_argument 1 +#define mm_optional_argument 2 + #define M_FILE_RDONLY GENERIC_READ #define M_FILE_WRONLY GENERIC_WRITE #define M_FILE_RDWR (GENERIC_READ | GENERIC_WRITE) diff --git a/third_party/fwkacllib/inc/runtime/base.h b/third_party/fwkacllib/inc/runtime/base.h index 85f16cc5..aa8263f9 100644 --- a/third_party/fwkacllib/inc/runtime/base.h +++ b/third_party/fwkacllib/inc/runtime/base.h @@ -18,6 +18,7 @@ #define __CCE_RUNTIME_BASE_H__ #include +#include "toolchain/prof_callback.h" #if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) extern "C" { @@ -86,10 +87,20 @@ typedef struct rtExceptionInfo { uint32_t deviceid; } rtExceptionInfo; +typedef struct rtTaskFailInfo { + uint32_t taskid; + uint32_t streamid; + uint32_t tid; + uint32_t deviceid; + uint32_t retcode; +} rtTaskFailInfo; + typedef void (*rtErrorCallback)(rtExceptionType); typedef void (*rtTaskFailCallback)(rtExceptionInfo *exceptionInfo); +typedef void (*rtTaskFailCallbackByModule)(rtTaskFailInfo *exceptionInfo); + typedef void (*rtDeviceStateCallback)(uint32_t devId, bool isOpen); /** @@ -146,6 +157,12 @@ RTS_API rtError_t rtProfilerStop(uint64_t profConfig, int32_t numsDev, uint32_t* */ RTS_API rtError_t rtProfilerTrace(uint64_t id, bool notify, uint32_t flags, rtStream_t stream); +/** + * @ingroup profiling_base + * @brief ts set profiling reporter callback. + */ +RTS_API rtError_t rtSetMsprofReporterCallback(MsprofReporterCallback callback); + /** * @ingroup dvrt_base * @brief Returns the last error from a runtime call. @@ -184,6 +201,16 @@ RTS_API rtError_t rtSetTaskFailCallback(rtTaskFailCallback callback); */ RTS_API rtError_t rtRegDeviceStateCallback(const char *regName, rtDeviceStateCallback callback); +/** + * @ingroup dvrt_base + * @brief register callback for fail task + * @param [in] uniName unique register name, can't be null + * @param [in] callback fail task callback function + * @param [out] NA + * @return RT_ERROR_NONE for ok + */ +RTS_API rtError_t rtRegTaskFailCallbackByModule(const char *moduleName, rtTaskFailCallbackByModule callback); + /** * @ingroup dvrt_base * @brief notify handle. diff --git a/third_party/fwkacllib/inc/runtime/config.h b/third_party/fwkacllib/inc/runtime/config.h index c471f128..c35a1278 100644 --- a/third_party/fwkacllib/inc/runtime/config.h +++ b/third_party/fwkacllib/inc/runtime/config.h @@ -121,14 +121,6 @@ typedef struct tagRtMemoryConfig { typedef struct tagRtPlatformConfig { uint32_t platformConfig; } rtPlatformConfig_t; -/** - * @ingroup - * @brief get platform - * @param [in] platForm - * @return platForm - */ -RTS_API rtError_t rtGetPlatformConfig(rtPlatformConfig_t *platForm); - /** * @ingroup * @brief get AI core count @@ -169,13 +161,6 @@ RTS_API rtError_t rtGetAiCoreMemoryRates(rtAiCoreMemoryRates_t *aiCoreMemoryRate */ RTS_API rtError_t rtGetMemoryConfig(rtMemoryConfig_t *memoryConfig); -/** - * @ingroup - * @brief set platform in gen ctx - * @param [in] platForm - * @return RT_ERROR_NONE for ok, errno for failed - */ -RTS_API rtError_t rtSetPlatformType(rtPlatformType_t platformType); /** * @ingroup diff --git a/third_party/fwkacllib/inc/tdt/tsd_client.h b/third_party/fwkacllib/inc/tdt/tsd_client.h index 6066a12e..665c8b82 100644 --- a/third_party/fwkacllib/inc/tdt/tsd_client.h +++ b/third_party/fwkacllib/inc/tdt/tsd_client.h @@ -23,6 +23,7 @@ #include #include "tdt/status.h" #include "tdt/data_common.h" +#include "toolchain/prof_callback.h" #ifdef __cplusplus extern "C" { @@ -37,7 +38,7 @@ extern "C" { * Used for the Framework process to communicate with the TSDDaemon process, * and notify TSD to complete the initialization of other processes * -* @param phyDeviceId [IN] type #unsigned int. Physical device ID +* @param logicDeviceId [IN] type #unsigned int. Logic device ID * @param rankSize [IN] type #unsigned int. The rankSize of the training. * The default value is 1. When rankSize is greater than 1, * HCCP will be pulled to perform set communication related operations. @@ -49,7 +50,7 @@ extern "C" { * @li tsd_client.h: Header file where the interface declaration is located. * @li data_common.h: Header file where 'TDT_StatusT' defined */ -TDT_LIB_EXPORT TDT_StatusT TsdOpen(const uint32_t phyDeviceId, const uint32_t rankSize); +TDT_LIB_EXPORT TDT_StatusT TsdOpen(const uint32_t logicDeviceId, const uint32_t rankSize); /** * @ingroup Close @@ -67,7 +68,7 @@ TDT_LIB_EXPORT TDT_StatusT TsdOpen(const uint32_t phyDeviceId, const uint32_t ra * @li tsd_client.h: Header file where the interface declaration is located. * @li data_common.h: Header file where 'TDT_StatusT' defined */ -TDT_LIB_EXPORT TDT_StatusT TsdClose(const uint32_t phyDeviceId); +TDT_LIB_EXPORT TDT_StatusT TsdClose(const uint32_t logicDeviceId); /** * @ingroup UpdateProfilingMode @@ -85,7 +86,26 @@ TDT_LIB_EXPORT TDT_StatusT TsdClose(const uint32_t phyDeviceId); * @li tsd_client.h: Header file where the interface declaration is located. * @li data_common.h: Header file where 'TDT_StatusT' defined */ -TDT_LIB_EXPORT TDT_StatusT UpdateProfilingMode(const uint32_t phyDeviceId, const uint32_t flag); +TDT_LIB_EXPORT TDT_StatusT UpdateProfilingMode(const uint32_t logicDeviceId, const uint32_t flag); + +/** +* @ingroup TsdSetMsprofReporterCallback +* @brief 用于推理场景下设置aicpu的profilng的callback函数 +* +* @par Function +* 设置offline模式下aicpu_sd进程的profiling的callback函数 +* +* @param callback [IN] type #MsprofReporterCallback. 回调函数 +* @retval TDT_OK Success +* @retval OtherValues Failure +* +* @par Dependency +* @li libtsdclient.so: Library to which the interface belongs. +* @li tsd_client.h: Header file where the interface declaration is located. +* @li data_common.h: Header file where 'TDT_StatusT' defined +* @li prof_callback.h: Headerfile where 'MsprofReporterCallback' defined +*/ +TDT_LIB_EXPORT TDT_StatusT TsdSetMsprofReporterCallback(MsprofReporterCallback callback); /** * @ingroup CreateCmdParameterObj diff --git a/third_party/fwkacllib/inc/toolchain/prof_acl_api.h b/third_party/fwkacllib/inc/toolchain/prof_acl_api.h index 430ed14d..efb37cfb 100644 --- a/third_party/fwkacllib/inc/toolchain/prof_acl_api.h +++ b/third_party/fwkacllib/inc/toolchain/prof_acl_api.h @@ -17,380 +17,76 @@ #ifndef MSPROFILER_API_PROF_ACL_API_H_ #define MSPROFILER_API_PROF_ACL_API_H_ -#define MSVP_MAX_DEV_NUM 64 -#ifndef OS_TYPE -#define OS_TYPE 0 -#endif // OS_TYPE - - -#if (OS_TYPE != LINUX) -#define MSVP_PROF_API __declspec(dllexport) -#else -#define MSVP_PROF_API __attribute__((visibility("default"))) -#endif - // DataTypeConfig -#define PROF_ACL_API 0x0001 -#define PROF_TASK_TIME 0x0002 -#define PROF_AICORE_METRICS 0x0004 -#define PROF_AICPU_TRACE 0x0008 -#define PROF_MODEL_EXECUTE 0x0010 -#define PROF_RUNTIME_API 0x0020 -#define PROF_RUNTIME_TRACE 0x0040 -#define PROF_SCHEDULE_TIMELINE 0x0080 -#define PROF_SCHEDULE_TRACE 0x0100 -#define PROF_AIVECTORCORE_METRICS 0x0200 -#define PROF_SUBTASK_TIME 0x0400 - -#define PROF_TRAINING_TRACE 0x0800 -#define PROF_HCCL_TRACE 0x1000 -#define PROF_DATA_PROCESS 0x2000 -#define PROF_TASK_TRACE 0x3842 +#define PROF_ACL_API 0x00000001 +#define PROF_TASK_TIME 0x00000002 +#define PROF_AICORE_METRICS 0x00000004 +#define PROF_AICPU_TRACE 0x00000008 +#define PROF_MODEL_EXECUTE 0x00000010 +#define PROF_RUNTIME_API 0x00000020 +#define PROF_RUNTIME_TRACE 0x00000040 +#define PROF_SCHEDULE_TIMELINE 0x00000080 +#define PROF_SCHEDULE_TRACE 0x00000100 +#define PROF_AIVECTORCORE_METRICS 0x00000200 +#define PROF_SUBTASK_TIME 0x00000400 + +#define PROF_TRAINING_TRACE 0x00000800 +#define PROF_HCCL_TRACE 0x00001000 + +#define PROF_TASK_TRACE 0x00001852 + +// system profilinig switch +#define PROF_CPU 0x00010000 +#define PROF_HARDWARE_MEMORY 0x00020000 +#define PROF_IO 0x00040000 +#define PROF_INTER_CONNECTION 0x00080000 +#define PROF_DVPP 0x00100000 +#define PROF_SYS_AICORE_SAMPLE 0x00200000 +#define PROF_AIVECTORCORE_SAMPLE 0x00400000 #define PROF_MODEL_LOAD 0x8000000000000000 // DataTypeConfig MASK -#define PROF_ACL_API_MASK 0x0001 -#define PROF_TASK_TIME_MASK 0x0002 -#define PROF_AICORE_METRICS_MASK 0x0004 -#define PROF_AICPU_TRACE_MASK 0x0008 -#define PROF_MODEL_EXECUTE_MASK 0x0010 -#define PROF_RUNTIME_API_MASK 0x0020 -#define PROF_RUNTIME_TRACE_MASK 0x0040 -#define PROF_SCHEDULE_TIMELINE_MASK 0x0080 -#define PROF_SCHEDULE_TRACE_MASK 0x0100 -#define PROF_AIVECTORCORE_METRICS_MASK 0x0200 -#define PROF_SUBTASK_TIME_MASK 0x0400 - -#define PROF_TRAINING_TRACE_MASK 0x0800 -#define PROF_HCCL_TRACE_MASK 0x1000 -#define PROF_DATA_PROCESS_MASK 0x2000 +#define PROF_ACL_API_MASK 0x00000001 +#define PROF_TASK_TIME_MASK 0x00000002 +#define PROF_AICORE_METRICS_MASK 0x00000004 +#define PROF_AICPU_TRACE_MASK 0x00000008 +#define PROF_MODEL_EXECUTE_MASK 0x00000010 +#define PROF_RUNTIME_API_MASK 0x00000020 +#define PROF_RUNTIME_TRACE_MASK 0x00000040 +#define PROF_SCHEDULE_TIMELINE_MASK 0x00000080 +#define PROF_SCHEDULE_TRACE_MASK 0x00000100 +#define PROF_AIVECTORCORE_METRICS_MASK 0x00000200 +#define PROF_SUBTASK_TIME_MASK 0x00000400 + +#define PROF_TRAINING_TRACE_MASK 0x00000800 +#define PROF_HCCL_TRACE_MASK 0x00001000 + +// system profilinig mask +#define PROF_CPU_MASK 0x00010000 +#define PROF_HARDWARE_MEMORY_MASK 0x00020000 +#define PROF_IO_MASK 0x00040000 +#define PROF_INTER_CONNECTION_MASK 0x00080000 +#define PROF_DVPP_MASK 0x00100000 +#define PROF_SYS_AICORE_SAMPLE_MASK 0x00200000 +#define PROF_AIVECTORCORE_SAMPLE_MASK 0x00400000 #define PROF_MODEL_LOAD_MASK 0x8000000000000000 #include -#include - -/** - * @name ProrErrorCode - * @brief error code enum of prof_acl_apis - */ -enum ProfErrorCode { - PROF_ERROR_NONE = 0, // ok - PROF_ERROR_PARAM_INVALID, // param invalid, for example nullptr - PROF_ERROR_REPEAT_INIT, // profiling has already been inited - PROF_ERROR_CONFIG_INVALID, // config invalid, for example invalid json string - PROF_ERROR_DIR_NO_ACCESS, // dir is not accessable - PROF_ERROR_FAILURE, // failed to init or start profiling - PROF_ERROR_NOT_INITED, // profiling has not been inited - PROF_ERROR_DEVICE_INVALID, // device id invalid - PROF_ERROR_UNSUPPORTED, // unsupported data type or ai core metrics - PROF_ERROR_REPEAT_START, // profiilng has already been started - PROF_ERROR_NOT_STARTED, // profiling has not been started - PROF_ERROR_REPEAT_SUBSCRIBE, // same model id has already been subscribed - PROF_ERROR_MODEL_ID_INVALID, // model id does not exist or has not been subscribed - PROF_ERROR_API_CONFLICT, // prof ctrl api mode conflicts with subscribe mode -}; - -/** - * @brief transfer profiling config in acl.json to sample config - * @param aclCfg [IN] profiling json string from acl.json as {"switch":"on", "result_path":"/home",...} - * @param sampleCfg [OUT] json string for GE as {"startCfg":[{"deviceID":"all","jobID":"1234",...}]} - * @return ProfErrorCode - */ -MSVP_PROF_API int32_t ProfAclCfgToSampleCfg(const std::string &aclCfg, std::string &sampleCfg); - -/** - * @name ProfInit - * @brief init profiling - * @param profInitCfg [IN] config of init profiling of json format - * @return ProfErrorCode - */ -MSVP_PROF_API int32_t ProfInit(const std::string &profInitCfg); - -/** - * @name ProfAicoreMetrics - * @brief aicore metrics enum - */ -enum ProfAicoreMetrics { - PROF_AICORE_ARITHMATIC_THROUGHPUT = 0, - PROF_AICORE_PIPELINE = 1, - PROF_AICORE_SYNCHRONIZATION = 2, - PROF_AICORE_MEMORY = 3, - PROF_AICORE_INTERNAL_MEMORY = 4, - PROF_AICORE_STALL = 5, - PROF_AICORE_METRICS_COUNT, - PROF_AICORE_NONE = 0xff, -}; - -/** - * @name ProfConfig - * @brief struct of ProfStart - */ -struct ProfConfig { - uint32_t devNums; // length of device id list - uint32_t devIdList[MSVP_MAX_DEV_NUM]; // physical device id list - ProfAicoreMetrics aicoreMetrics; // aicore metric - uint64_t dataTypeConfig; // data type to start profiling -}; - -/** - * @name ProfStartProfiling - * @brief start profiling - * @param profStartCfg [IN] config to start profiling - * @return ProfErrorCode - */ -MSVP_PROF_API int32_t ProfStartProfiling(const ProfConfig *profStartCfg); - -/** - * @name ProfStopProfiling - * @brief stop profiling - * @param profStopCfg [IN] config to stop profiling - * @return ProfErrorCode - */ -MSVP_PROF_API int32_t ProfStopProfiling(const ProfConfig *profStopCfg); - -/** - * @name ProfFinalize - * @brief finalize profiling task - * @return ProfErrorCode - */ -MSVP_PROF_API int32_t ProfFinalize(); - -/** - * @name ProfGetDataTypeConfig - * @brief get dataTypeConfig started with of one device - * @param deviceId [IN] deviceId to get dataTypeConfig - * @param dataTypeConfig [OUT] result get - * @return ProfErrorCode - */ -MSVP_PROF_API int32_t ProfGetDataTypeConfig(uint32_t deviceId, uint64_t &dataTypeConfig); namespace Msprofiler { namespace Api { -/** - * @brief transfer profiling config in acl.json to sample config - * @param aclCfg [IN] profiling json string from acl.json as {"switch":"on", "result_path":"/home",...} - * @param sampleCfg [OUT] json string for GE as {"startCfg":[{"deviceID":"all","jobID":"1234",...}]} - * @return ProfErrorCode - */ -MSVP_PROF_API int32_t ProfAclCfgToSampleCfg(const std::string &aclCfg, std::string &sampleCfg); - -/** - * @name ProfInit - * @brief init profiling - * @param profInitCfg [IN] config of init profiling of json format - * @return ProfErrorCode - */ -MSVP_PROF_API int32_t ProfInit(const std::string &profInitCfg); - -/** - * @name ProfStartProfiling - * @brief start profiling - * @param profStartCfg [IN] config to start profiling - * @return ProfErrorCode - */ -MSVP_PROF_API int32_t ProfStartProfiling(const ProfConfig *profStartCfg); - -/** - * @name ProfStopProfiling - * @brief stop profiling - * @param profStopCfg [IN] config to stop profiling - * @return ProfErrorCode - */ -MSVP_PROF_API int32_t ProfStopProfiling(const ProfConfig *profStopCfg); - -/** - * @name ProfFinalize - * @brief finalize profiling task - * @return ProfErrorCode - */ -MSVP_PROF_API int32_t ProfFinalize(); - -/** - * @name ProfGetDataTypeConfig - * @brief get dataTypeConfig started with of one device - * @param deviceId [IN] deviceId to get dataTypeConfig - * @param dataTypeConfig [OUT] result get - * @return ProfErrorCode - */ -MSVP_PROF_API int32_t ProfGetDataTypeConfig(uint32_t deviceId, uint64_t &dataTypeConfig); - -/** - * @name WorkMode - * @brief profiling api work mode - */ -enum WorkMode { - WORK_MODE_OFF, // profiling not at work - WORK_MODE_API_CTRL, // profiling work on api ctrl mode, (ProfInit) - WORK_MODE_SUBSCRIBE, // profiling work on subscribe mode -}; - -/** - * @name ProfGetApiWorkMode - * @brief get profiling api work mode - * @return WorkMode - */ -MSVP_PROF_API WorkMode ProfGetApiWorkMode(); - -/** - * @name ProfSubscribeConfig - * @brief config of subscribe api - */ -struct ProfSubscribeConfig { - bool timeInfo; // subscribe op time - ProfAicoreMetrics aicoreMetrics; // subscribe ai core metrics - void* fd; // pipe fd -}; - -/** - * @name ProfGetDataTypeConfig - * @brief get DataTypeConfig of subscribe - * @param profSubscribeConfig [IN] config to subscribe data - * @return DataTypeConfig - */ -MSVP_PROF_API uint64_t ProfGetDataTypeConfig(const ProfSubscribeConfig *profSubscribeConfig); - -/** - * @name ProfModelSubscribe - * @brief subscribe data of one model id - * @param modelId [IN] model id to subscribe data - * @param devId [IN] device id of model - * @param profSubscribeConfig [IN] config to subscribe data - * @return ProfErrorCode - */ -MSVP_PROF_API int32_t ProfModelSubscribe(uint32_t modelId, uint32_t devId, - const ProfSubscribeConfig *profSubscribeConfig); - -/** - * @name ProfIsModelSubscribed - * @brief check if a model id is subscribed - * @param modeiId [IN] modei id to check - * @return true: subscribed, false: not - */ -MSVP_PROF_API bool ProfIsModelSubscribed(uint32_t modelId); - -/** - * @name ProfModelUnSubscribe - * @brief unsubscribe a model id - * @param modeiId [IN] modei id to unsubscribe - * @return ProfErrorCode - */ -MSVP_PROF_API int32_t ProfModelUnSubscribe(uint32_t modelId); - -/** - * @name ProfGetOpDescSize - * @brief get profiling data struct size - * @param opDescSize [OUT] bytes of profiling subscribe data struct - * @return ProfErrorCode - */ -MSVP_PROF_API int32_t ProfGetOpDescSize(uint32_t *opDescSize); - -/** - * @name ProfGetOpNum - * @brief get how many op data there are in data - * @param data [IN] data read from pipe - * @param len [IN] data length - * @param opNum [OUT] number of op in data - * @return ProfErrorCode - */ -MSVP_PROF_API int32_t ProfGetOpNum(const void *data, uint32_t len, uint32_t *opNum); - -/** - * @name ProfGetModelId - * @brief get model id of specific part of data - * @param data [IN] data read from pipe - * @param len [IN] data length - * @param index [IN] index of part(op) - * @return model id - */ -MSVP_PROF_API uint32_t ProfGetModelId(const void *data, uint32_t len, uint32_t index); - -/** - * @name ProfGetOpType - * @brief get op type of specific part of data - * @param data [IN] data read from pipe - * @param len [IN] data length - * @param opType [OUT] op type buffer - * @param opTypeLen [IN] buffer size of param opType - * @param index [IN] index of part(op) - * @return ProfErrorCode - */ -MSVP_PROF_API int32_t ProfGetOpType(const void *data, uint32_t len, char *opType, uint32_t opTypeLen, uint32_t index); - -/** - * @name ProfGetOpName - * @brief get op name of specific part of data - * @param data [IN] data read from pipe - * @param len [IN] data length - * @param opType [OUT] op name buffer - * @param opTypeLen [IN] buffer size of param opName - * @param index [IN] index of part(op) - * @return ProfErrorCode - */ -MSVP_PROF_API int32_t ProfGetOpName(const void *data, uint32_t len, char *opName, uint32_t opNameLen, uint32_t index); - -/** - * @name ProfGetOpStart - * @brief get op start timestamp of specific part of data - * @param data [IN] data read from pipe - * @param len [IN] data length - * @param index [IN] index of part(op) - * @return op start timestamp (us) - */ -MSVP_PROF_API uint64_t ProfGetOpStart(const void *data, uint32_t len, uint32_t index); - -/** - * @name ProfGetOpEnd - * @brief get op end timestamp of specific part of data - * @param data [IN] data read from pipe - * @param len [IN] data length - * @param index [IN] index of part(op) - * @return op end timestamp (us) - */ -MSVP_PROF_API uint64_t ProfGetOpEnd(const void *data, uint32_t len, uint32_t index); - -/** - * @name ProfGetOpDuration - * @brief get op duration of specific part of data - * @param data [IN] data read from pipe - * @param len [IN] data length - * @param index [IN] index of part(op) - * @return op duration (us) - */ -MSVP_PROF_API uint64_t ProfGetOpDuration(const void *data, uint32_t len, uint32_t index); - /** * @name ProfGetOpExecutionTime * @brief get op execution time of specific part of data - * @param data [IN] data read from pipe - * @param len [IN] data length + * @param data [IN] data read from pipe + * @param len [IN] data length * @param index [IN] index of part(op) * @return op execution time (us) */ -MSVP_PROF_API uint64_t ProfGetOpExecutionTime(const void *data, uint32_t len, uint32_t index); - -/** - * @name ProfGetOpCubeOps - * @brief get op cube fops of specific part of data - * @param data [IN] data read from pipe - * @param len [IN] data length - * @param index [IN] index of part(op) - * @return op cube fops - */ -MSVP_PROF_API uint64_t ProfGetOpCubeOps(const void *data, uint32_t len, uint32_t index); - -/** - * @name ProfGetOpVectorOps - * @brief get op vector fops of specific part of data - * @param data [IN] data read from pipe - * @param len [IN] data length - * @param index [IN] index of part(op) - * @return op vector fops - */ -MSVP_PROF_API uint64_t ProfGetOpVectorOps(const void *data, uint32_t len, uint32_t index); - -} // namespace Api -} // namespace Msprofiler +uint64_t ProfGetOpExecutionTime(const void *data, uint32_t len, uint32_t index); +} +} #endif // MSPROFILER_API_PROF_ACL_API_H_ diff --git a/third_party/fwkacllib/inc/toolchain/prof_callback.h b/third_party/fwkacllib/inc/toolchain/prof_callback.h new file mode 100644 index 00000000..1299ae59 --- /dev/null +++ b/third_party/fwkacllib/inc/toolchain/prof_callback.h @@ -0,0 +1,132 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MSPROFILER_PROF_CALLBACK_H_ +#define MSPROFILER_PROF_CALLBACK_H_ + +#ifdef __cplusplus +extern "C" { +#endif // __cplusplus + + +#include "stddef.h" +#include "stdint.h" + +/** + * @name MsprofErrorCode + * @brief error code + */ +enum MsprofErrorCode { + MSPROF_ERROR_NONE = 0, + MSPROF_ERROR_MEM_NOT_ENOUGH, + MSPROF_ERROR_GET_ENV, + MSPROF_ERROR_CONFIG_INVALID, + MSPROF_ERROR_ACL_JSON_OFF, + MSPROF_ERROR, +}; + +#define MSPROF_ENGINE_MAX_TAG_LEN (31) + +/** + * @name ReporterData + * @brief struct of data to report + */ +struct ReporterData { + char tag[MSPROF_ENGINE_MAX_TAG_LEN + 1]; // the sub-type of the module, data with different tag will be writen + int deviceId; // the index of device + size_t dataLen; // the length of send data + unsigned char *data; // the data content +}; + +/** + * @name MsprofReporterModuleId + * @brief module id of data to report + */ +enum MsprofReporterModuleId { + MSPROF_MODULE_DATA_PREPROCESS = 0, // DATA_PREPROCESS + MSPROF_MODULE_HCCL, // HCCL + MSPROF_MODULE_ACL, // AclModule + MSPROF_MODULE_FRAMEWORK, // Framework + MSPROF_MODULE_RUNTIME // runtime +}; + +/** + * @name MsprofReporterCallbackType + * @brief reporter callback request type + */ +enum MsprofReporterCallbackType { + MSPROF_REPORTER_REPORT = 0, // report data + MSPROF_REPORTER_INIT, // init reporter + MSPROF_REPORTER_UNINIT, // uninit reporter +}; + +/** + * @name MsprofReporterCallback + * @brief callback to start reporter/stop reporter/report date + * @param moduleId [IN] enum MsprofReporterModuleId + * @param type [IN] enum MsprofReporterCallbackType + * @param data [IN] callback data (nullptr on INTI/UNINIT) + * @param len [IN] callback data size (0 on INIT/UNINIT) + * @return enum MsprofErrorCode + */ +typedef int32_t (*MsprofReporterCallback)(uint32_t moduleId, uint32_t type, void *data, uint32_t len); + + +#define MSPROF_OPTIONS_DEF_LEN_MAX (2048) + +/** + * @name MsprofGeOptions + * @brief struct of MSPROF_CTRL_INIT_GE_OPTIONS + */ +struct MsprofGeOptions { + char jobId[MSPROF_OPTIONS_DEF_LEN_MAX]; + char options[MSPROF_OPTIONS_DEF_LEN_MAX]; +}; + +/** + * @name MsprofCtrlCallbackType + * @brief ctrl callback request type + */ +enum MsprofCtrlCallbackType { + MSPROF_CTRL_INIT_ACL_ENV = 0, // start profiling with acl env + MSPROF_CTRL_INIT_ACL_JSON, // start profiling with acl.json + MSPROF_CTRL_INIT_GE_OPTIONS, // start profiling with ge env and options + MSPROF_CTRL_FINALIZE // stop profiling +}; + +/** + * @name MsprofCtrlCallback + * @brief callback to start/stop profiling + * @param type [IN] enum MsprofCtrlCallbackType + * @param data [IN] callback data + * @param len [IN] callback data size + * @return enum MsprofErrorCode + */ +typedef int32_t (*MsprofCtrlCallback)(uint32_t type, void *data, uint32_t len); + +/** + * @name MsprofSetDeviceCallback + * @brief callback to notify set/reset device + * @param devId [IN] device id + * @param isOpenDevice [IN] true: set device, false: reset device + */ +typedef void (*MsprofSetDeviceCallback)(uint32_t devId, bool isOpenDevice); + +#ifdef __cplusplus +} +#endif + +#endif // MSPROFILER_PROF_CALLBACK_H_ diff --git a/third_party/fwkacllib/inc/toolchain/prof_reporter.h b/third_party/fwkacllib/inc/toolchain/prof_reporter.h index 949011d3..ff91351b 100644 --- a/third_party/fwkacllib/inc/toolchain/prof_reporter.h +++ b/third_party/fwkacllib/inc/toolchain/prof_reporter.h @@ -26,6 +26,8 @@ #define MSVP_PROF_API __attribute__((visibility("default"))) #endif +#include "prof_callback.h" + /** * @file prof_reporter.h * @defgroup reporter the reporter group @@ -33,20 +35,6 @@ */ namespace Msprof { namespace Engine { -/// the max tag length -#define MSPROF_ENGINE_MAX_TAG_LEN (31) -/** - * @ingroup reporter - * @brief struct ReporterData - * the sturct of the data send to libmsprof - */ -struct ReporterData { - char tag[MSPROF_ENGINE_MAX_TAG_LEN + 1]; ///< the sub-type of the module, data with different tag will be writen - int deviceId; ///< the physical id of device - size_t dataLen; ///< the length of send data - unsigned char *data; ///< the data content -}; - /** * @ingroup reporter * @brief class Reporter