| @@ -3,33 +3,34 @@ include_directories(${CMAKE_SOURCE_DIR}/mindspore/core) | |||||
| include_directories(${CMAKE_CURRENT_SOURCE_DIR}) | include_directories(${CMAKE_CURRENT_SOURCE_DIR}) | ||||
| include_directories(${CMAKE_BINARY_DIR}) | include_directories(${CMAKE_BINARY_DIR}) | ||||
| if (ENABLE_ACL) | |||||
| if(ENABLE_ACL) | |||||
| set(ASCEND_PATH /usr/local/Ascend) | set(ASCEND_PATH /usr/local/Ascend) | ||||
| include_directories(${ASCEND_PATH}/acllib/include) | include_directories(${ASCEND_PATH}/acllib/include) | ||||
| link_directories(${ASCEND_PATH}/acllib/lib64/) | link_directories(${ASCEND_PATH}/acllib/lib64/) | ||||
| find_library(ascendcl acl_dvpp ${ASCEND_PATH}/acllib/lib64) | find_library(ascendcl acl_dvpp ${ASCEND_PATH}/acllib/lib64) | ||||
| endif () | |||||
| endif() | |||||
| if (NOT(CMAKE_SYSTEM_NAME MATCHES "Darwin")) | |||||
| if(NOT(CMAKE_SYSTEM_NAME MATCHES "Darwin")) | |||||
| link_directories(${CMAKE_SOURCE_DIR}/build/mindspore/graphengine) | link_directories(${CMAKE_SOURCE_DIR}/build/mindspore/graphengine) | ||||
| else() | else() | ||||
| set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-delete-non-abstract-non-virtual-dtor") | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-delete-non-abstract-non-virtual-dtor") | ||||
| endif () | |||||
| endif() | |||||
| if (CMAKE_SYSTEM_NAME MATCHES "Windows") | |||||
| if(CMAKE_SYSTEM_NAME MATCHES "Windows") | |||||
| set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-attributes -DHAVE_SNPRINTF") | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-attributes -DHAVE_SNPRINTF") | ||||
| add_compile_definitions(BUILDING_DLL) | add_compile_definitions(BUILDING_DLL) | ||||
| endif() | endif() | ||||
| if (ENABLE_MPI) | |||||
| if(ENABLE_MPI) | |||||
| add_compile_definitions(ENABLE_MPI) | add_compile_definitions(ENABLE_MPI) | ||||
| endif () | |||||
| endif() | |||||
| if(ENABLE_GPU) | if(ENABLE_GPU) | ||||
| find_package(CUDA REQUIRED) | find_package(CUDA REQUIRED) | ||||
| find_package(Threads) | find_package(Threads) | ||||
| if(${CUDA_VERSION} VERSION_LESS ${MS_REQUIRE_CUDA_VERSION}) | if(${CUDA_VERSION} VERSION_LESS ${MS_REQUIRE_CUDA_VERSION}) | ||||
| message(FATAL_ERROR "The minimum CUDA version ${MS_REQUIRE_CUDA_VERSION} is required, but only CUDA ${CUDA_VERSION} found.") | |||||
| message(FATAL_ERROR "The minimum CUDA version ${MS_REQUIRE_CUDA_VERSION} is required, \ | |||||
| but only CUDA ${CUDA_VERSION} found.") | |||||
| endif() | endif() | ||||
| enable_language(CUDA) | enable_language(CUDA) | ||||
| if(NOT CUDA_PATH OR CUDA_PATH STREQUAL "") | if(NOT CUDA_PATH OR CUDA_PATH STREQUAL "") | ||||
| @@ -40,31 +41,36 @@ if(ENABLE_GPU) | |||||
| endif() | endif() | ||||
| endif() | endif() | ||||
| if (DEFINED ENV{CUDNN_HOME} AND NOT $ENV{CUDNN_HOME} STREQUAL "") | |||||
| if(DEFINED ENV{CUDNN_HOME} AND NOT $ENV{CUDNN_HOME} STREQUAL "") | |||||
| set(CUDNN_INCLUDE_DIR $ENV{CUDNN_HOME}/include) | set(CUDNN_INCLUDE_DIR $ENV{CUDNN_HOME}/include) | ||||
| set(CUDNN_LIBRARY_DIR $ENV{CUDNN_HOME}/lib64) | set(CUDNN_LIBRARY_DIR $ENV{CUDNN_HOME}/lib64) | ||||
| find_path(CUDNN_INCLUDE_PATH cudnn.h HINTS ${CUDNN_INCLUDE_DIR} NO_DEFAULT_PATH) | find_path(CUDNN_INCLUDE_PATH cudnn.h HINTS ${CUDNN_INCLUDE_DIR} NO_DEFAULT_PATH) | ||||
| find_library(CUDNN_LIBRARY_PATH "cudnn" HINTS ${CUDNN_LIBRARY_DIR} NO_DEFAULT_PATH) | find_library(CUDNN_LIBRARY_PATH "cudnn" HINTS ${CUDNN_LIBRARY_DIR} NO_DEFAULT_PATH) | ||||
| if (CUDNN_INCLUDE_PATH STREQUAL CUDNN_INCLUDE_PATH-NOTFOUND) | |||||
| message(FATAL_ERROR "Failed to find cudnn header file, please set environment variable CUDNN_HOME to cudnn installation position.") | |||||
| if(CUDNN_INCLUDE_PATH STREQUAL CUDNN_INCLUDE_PATH-NOTFOUND) | |||||
| message(FATAL_ERROR "Failed to find cudnn header file, please set environment variable CUDNN_HOME to \ | |||||
| cudnn installation position.") | |||||
| endif() | endif() | ||||
| if (CUDNN_LIBRARY_PATH STREQUAL CUDNN_LIBRARY_PATH-NOTFOUND) | |||||
| message(FATAL_ERROR "Failed to find cudnn library file, please set environment variable CUDNN_HOME to cudnn installation position.") | |||||
| if(CUDNN_LIBRARY_PATH STREQUAL CUDNN_LIBRARY_PATH-NOTFOUND) | |||||
| message(FATAL_ERROR "Failed to find cudnn library file, please set environment variable CUDNN_HOME to \ | |||||
| cudnn installation position.") | |||||
| endif() | endif() | ||||
| else() | else() | ||||
| list(APPEND CMAKE_PREFIX_PATH ${CUDA_TOOLKIT_ROOT_DIR}) | list(APPEND CMAKE_PREFIX_PATH ${CUDA_TOOLKIT_ROOT_DIR}) | ||||
| find_path(CUDNN_INCLUDE_PATH cudnn.h PATH_SUFFIXES cuda/inclulde include cuda) | find_path(CUDNN_INCLUDE_PATH cudnn.h PATH_SUFFIXES cuda/inclulde include cuda) | ||||
| find_library(CUDNN_LIBRARY_PATH "cudnn" PATH_SUFFIXES cuda/lib64 lib64 lib cuda/lib lib/x86_64-linux-gnu) | find_library(CUDNN_LIBRARY_PATH "cudnn" PATH_SUFFIXES cuda/lib64 lib64 lib cuda/lib lib/x86_64-linux-gnu) | ||||
| if (CUDNN_INCLUDE_PATH STREQUAL CUDNN_INCLUDE_PATH-NOTFOUND) | |||||
| message(FATAL_ERROR "Failed to find cudnn header file, if cudnn library is not installed, please put cudnn header file in cuda include path \ | |||||
| or user include path(eg. /usr/local/cuda/include; /usr/local/include; /usr/include), if cudnn library is installed in other position,\ | |||||
| please set environment variable CUDNN_HOME to cudnn installation position, there should be cudnn.h in {CUDNN_HOME}/include.") | |||||
| if(CUDNN_INCLUDE_PATH STREQUAL CUDNN_INCLUDE_PATH-NOTFOUND) | |||||
| message(FATAL_ERROR "Failed to find cudnn header file, if cudnn library is not installed, please put \ | |||||
| cudnn header file in cuda include path or user include path(eg. /usr/local/cuda/include; \ | |||||
| /usr/local/include; /usr/include), if cudnn library is installed in other position, please \ | |||||
| set environment variable CUDNN_HOME to cudnn installation position, there should be cudnn.h \ | |||||
| in {CUDNN_HOME}/include.") | |||||
| endif() | endif() | ||||
| if (CUDNN_LIBRARY_PATH STREQUAL CUDNN_LIBRARY_PATH-NOTFOUND) | |||||
| message(FATAL_ERROR "Failed to find cudnn library file, if cudnn library is not installed, please put cudnn library file in \ | |||||
| cuda library path or user library path(eg. /usr/local/cuda/lib64; /usr/local/lib64; /usr/lib64; /usr/local/lib; /usr/lib),\ | |||||
| if cudnn library is installed in other position, please set environment variable CUDNN_HOME to cudnn installation position, \ | |||||
| there should be cudnn library file in {CUDNN_HOME}/lib64.") | |||||
| if(CUDNN_LIBRARY_PATH STREQUAL CUDNN_LIBRARY_PATH-NOTFOUND) | |||||
| message(FATAL_ERROR "Failed to find cudnn library file, if cudnn library is not installed, please put \ | |||||
| cudnn library file in cuda library path or user library path(eg. /usr/local/cuda/lib64; \ | |||||
| /usr/local/lib64; /usr/lib64; /usr/local/lib; /usr/lib), if cudnn library is installed in other \ | |||||
| position, please set environment variable CUDNN_HOME to cudnn installation position, there should \ | |||||
| be cudnn library file in {CUDNN_HOME}/lib64.") | |||||
| endif() | endif() | ||||
| endif() | endif() | ||||
| @@ -102,7 +108,7 @@ if(ENABLE_GPU) | |||||
| cuda_add_library(gpu_cuda_lib STATIC ${GPU_SRC_LIST}) | cuda_add_library(gpu_cuda_lib STATIC ${GPU_SRC_LIST}) | ||||
| set(CMAKE_CXX_FLAGS ${NVCC_TMP_CMAKE_CXX_FLAGS}) | set(CMAKE_CXX_FLAGS ${NVCC_TMP_CMAKE_CXX_FLAGS}) | ||||
| add_compile_definitions(ENABLE_GPU) | add_compile_definitions(ENABLE_GPU) | ||||
| endif () | |||||
| endif() | |||||
| ## make protobuf files | ## make protobuf files | ||||
| @@ -117,7 +123,13 @@ file(GLOB_RECURSE COMM_PROTO_IN RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "ps/core/pr | |||||
| ms_protobuf_generate(COMM_PROTO_SRCS COMM_PROTO_HDRS ${COMM_PROTO_IN}) | ms_protobuf_generate(COMM_PROTO_SRCS COMM_PROTO_HDRS ${COMM_PROTO_IN}) | ||||
| list(APPEND MINDSPORE_PROTO_LIST ${COMM_PROTO_SRCS}) | list(APPEND MINDSPORE_PROTO_LIST ${COMM_PROTO_SRCS}) | ||||
| if (ENABLE_DEBUGGER) | |||||
| include_directories("${CMAKE_BINARY_DIR}/profiler/device/common") | |||||
| file(GLOB_RECURSE PROFILER_PROTO_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} | |||||
| "profiler/device/common/memory_profiling.proto") | |||||
| ms_protobuf_generate(PROFILER_MEM_PROTO_SRCS PROFILER_MEM_PROTO_HDRS ${PROFILER_PROTO_LIST}) | |||||
| list(APPEND MINDSPORE_PROTO_LIST ${PROFILER_MEM_PROTO_SRCS}) | |||||
| if(ENABLE_DEBUGGER) | |||||
| # debugger: compile proto files | # debugger: compile proto files | ||||
| include_directories("${CMAKE_BINARY_DIR}/debug/debugger") | include_directories("${CMAKE_BINARY_DIR}/debug/debugger") | ||||
| file(GLOB_RECURSE DEBUGGER_PROTO_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "debug/debugger/debug_graph.proto") | file(GLOB_RECURSE DEBUGGER_PROTO_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "debug/debugger/debug_graph.proto") | ||||
| @@ -126,9 +138,9 @@ if (ENABLE_DEBUGGER) | |||||
| ms_grpc_generate(DEBUGGER_GRPC_SRCS DEBUGGER_GRPC_HDRS ${DEBUGGER_GRPC_LIST}) | ms_grpc_generate(DEBUGGER_GRPC_SRCS DEBUGGER_GRPC_HDRS ${DEBUGGER_GRPC_LIST}) | ||||
| list(APPEND MINDSPORE_PROTO_LIST ${DEBUGGER_PROTO_SRCS}) | list(APPEND MINDSPORE_PROTO_LIST ${DEBUGGER_PROTO_SRCS}) | ||||
| list(APPEND MINDSPORE_PROTO_LIST ${DEBUGGER_GRPC_SRCS}) | list(APPEND MINDSPORE_PROTO_LIST ${DEBUGGER_GRPC_SRCS}) | ||||
| endif () | |||||
| endif() | |||||
| if (ENABLE_DUMP_PROTO) | |||||
| if(ENABLE_DUMP_PROTO) | |||||
| include_directories(${CMAKE_BINARY_DIR}) | include_directories(${CMAKE_BINARY_DIR}) | ||||
| file(GLOB_RECURSE PROTO_PY RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} | file(GLOB_RECURSE PROTO_PY RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} | ||||
| @@ -144,9 +156,9 @@ if (ENABLE_DUMP_PROTO) | |||||
| list(APPEND MINDSPORE_PROTO_LIST ${PROTO_SRCS}) | list(APPEND MINDSPORE_PROTO_LIST ${PROTO_SRCS}) | ||||
| list(APPEND MINDSPORE_PROTO_LIST ${PY_SRCS}) | list(APPEND MINDSPORE_PROTO_LIST ${PY_SRCS}) | ||||
| endif () | |||||
| endif() | |||||
| if (ENABLE_D) | |||||
| if(ENABLE_D) | |||||
| include_directories("${CMAKE_BINARY_DIR}/backend/kernel_compiler/aicpu") | include_directories("${CMAKE_BINARY_DIR}/backend/kernel_compiler/aicpu") | ||||
| file(GLOB_RECURSE PROTO_IN RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "backend/kernel_compiler/aicpu/proto/*.proto") | file(GLOB_RECURSE PROTO_IN RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "backend/kernel_compiler/aicpu/proto/*.proto") | ||||
| ms_protobuf_generate(PROTOSRCS PROTOHDRS ${PROTO_IN}) | ms_protobuf_generate(PROTOSRCS PROTOHDRS ${PROTO_IN}) | ||||
| @@ -159,9 +171,9 @@ if (ENABLE_D) | |||||
| list(APPEND MINDSPORE_PROTO_LIST ${DUMP_PROTOSRCS}) | list(APPEND MINDSPORE_PROTO_LIST ${DUMP_PROTOSRCS}) | ||||
| add_compile_definitions(ENABLE_D) | add_compile_definitions(ENABLE_D) | ||||
| endif () | |||||
| endif() | |||||
| if (MINDSPORE_PROTO_LIST) | |||||
| if(MINDSPORE_PROTO_LIST) | |||||
| add_library(proto_input STATIC ${MINDSPORE_PROTO_LIST}) | add_library(proto_input STATIC ${MINDSPORE_PROTO_LIST}) | ||||
| set_target_properties(proto_input PROPERTIES COMPILE_FLAGS "-Wno-unused-variable") | set_target_properties(proto_input PROPERTIES COMPILE_FLAGS "-Wno-unused-variable") | ||||
| endif() | endif() | ||||
| @@ -183,58 +195,58 @@ set(SUB_COMP | |||||
| common debug pybind_api utils vm profiler ps | common debug pybind_api utils vm profiler ps | ||||
| ) | ) | ||||
| foreach (_comp ${SUB_COMP}) | |||||
| foreach(_comp ${SUB_COMP}) | |||||
| add_subdirectory(${_comp}) | add_subdirectory(${_comp}) | ||||
| string(REPLACE "/" "_" sub ${_comp}) | string(REPLACE "/" "_" sub ${_comp}) | ||||
| if (TARGET _mindspore_${sub}_obj) | |||||
| if(TARGET _mindspore_${sub}_obj) | |||||
| list(APPEND SUB_OBJECTS_SRC $<TARGET_OBJECTS:_mindspore_${sub}_obj>) | list(APPEND SUB_OBJECTS_SRC $<TARGET_OBJECTS:_mindspore_${sub}_obj>) | ||||
| add_dependencies(_mindspore_${sub}_obj proto_input) | add_dependencies(_mindspore_${sub}_obj proto_input) | ||||
| endif () | |||||
| endforeach () | |||||
| endif() | |||||
| endforeach() | |||||
| set_property(SOURCE ${SUB_OBJECTS_SRC} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_ME) | set_property(SOURCE ${SUB_OBJECTS_SRC} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_ME) | ||||
| add_library(mindspore STATIC ${SUB_OBJECTS_SRC}) | add_library(mindspore STATIC ${SUB_OBJECTS_SRC}) | ||||
| target_link_libraries(mindspore mindspore_core) | target_link_libraries(mindspore mindspore_core) | ||||
| if (ENABLE_DEBUGGER) | |||||
| if(ENABLE_DEBUGGER) | |||||
| # debugger: link grpc | # debugger: link grpc | ||||
| target_link_libraries(proto_input mindspore::grpc++) | target_link_libraries(proto_input mindspore::grpc++) | ||||
| endif() | endif() | ||||
| target_link_libraries(mindspore securec mindspore::flatbuffers) | target_link_libraries(mindspore securec mindspore::flatbuffers) | ||||
| if (NOT WIN32) | |||||
| if(NOT WIN32) | |||||
| target_link_libraries(mindspore dl) | target_link_libraries(mindspore dl) | ||||
| endif() | endif() | ||||
| if (ENABLE_GE) | |||||
| if(ENABLE_GE) | |||||
| if(ENABLE_TRAIN) | if(ENABLE_TRAIN) | ||||
| target_link_libraries(mindspore ge_runner hccl) | target_link_libraries(mindspore ge_runner hccl) | ||||
| else () | |||||
| else() | |||||
| target_link_libraries(mindspore ge_client) | target_link_libraries(mindspore ge_client) | ||||
| endif () | |||||
| endif() | |||||
| target_link_libraries(mindspore graph tsdclient datatransfer) | target_link_libraries(mindspore graph tsdclient datatransfer) | ||||
| endif() | endif() | ||||
| if (ENABLE_D) | |||||
| if (DEFINED ENV{D_LINK_PATH}) | |||||
| if (CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "aarch64") | |||||
| if(ENABLE_D) | |||||
| if(DEFINED ENV{D_LINK_PATH}) | |||||
| if(CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "aarch64") | |||||
| MESSAGE("system processor matches aarch64") | MESSAGE("system processor matches aarch64") | ||||
| set(D_LIB_PATH $ENV{D_LINK_PATH}/aarch64) | set(D_LIB_PATH $ENV{D_LINK_PATH}/aarch64) | ||||
| elseif (CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "x86_64") | |||||
| elseif(CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "x86_64") | |||||
| MESSAGE("system processor matches x86_64") | MESSAGE("system processor matches x86_64") | ||||
| set(D_LIB_PATH $ENV{D_LINK_PATH}/x86_64) | set(D_LIB_PATH $ENV{D_LINK_PATH}/x86_64) | ||||
| else () | |||||
| else() | |||||
| MESSAGE("system ${CMAKE_HOST_SYSTEM_PROCESSOR} not support") | MESSAGE("system ${CMAKE_HOST_SYSTEM_PROCESSOR} not support") | ||||
| endif() | endif() | ||||
| else () | |||||
| else() | |||||
| MESSAGE("use system default lib") | MESSAGE("use system default lib") | ||||
| if (DEFINED ENV{ASCEND_CUSTOM_PATH}) | |||||
| if(DEFINED ENV{ASCEND_CUSTOM_PATH}) | |||||
| set(ASCEND_PATH $ENV{ASCEND_CUSTOM_PATH}) | set(ASCEND_PATH $ENV{ASCEND_CUSTOM_PATH}) | ||||
| else () | |||||
| else() | |||||
| set(ASCEND_PATH /usr/local/Ascend) | set(ASCEND_PATH /usr/local/Ascend) | ||||
| endif () | |||||
| endif() | |||||
| set(ASCEND_DRIVER_PATH ${ASCEND_PATH}/driver/lib64/common) | set(ASCEND_DRIVER_PATH ${ASCEND_PATH}/driver/lib64/common) | ||||
| set(ASCEND_DRIVER_BACK_PATH ${ASCEND_PATH}/driver/lib64/driver) | set(ASCEND_DRIVER_BACK_PATH ${ASCEND_PATH}/driver/lib64/driver) | ||||
| set(ASCEND_RUNTIME_PATH ${ASCEND_PATH}/fwkacllib/lib64) | set(ASCEND_RUNTIME_PATH ${ASCEND_PATH}/fwkacllib/lib64) | ||||
| @@ -246,8 +258,10 @@ if (ENABLE_D) | |||||
| find_library(HCCL hccl ${ASCEND_RUNTIME_PATH} ${ASCEND_TOOLKIT_RUNTIME_PATH}) | find_library(HCCL hccl ${ASCEND_RUNTIME_PATH} ${ASCEND_TOOLKIT_RUNTIME_PATH}) | ||||
| find_library(CCE_LIB cce ${ASCEND_RUNTIME_PATH} ${ASCEND_TOOLKIT_RUNTIME_PATH}) | find_library(CCE_LIB cce ${ASCEND_RUNTIME_PATH} ${ASCEND_TOOLKIT_RUNTIME_PATH}) | ||||
| find_library(RUNTIME_LIB runtime ${ASCEND_RUNTIME_PATH} ${ASCEND_TOOLKIT_RUNTIME_PATH}) | find_library(RUNTIME_LIB runtime ${ASCEND_RUNTIME_PATH} ${ASCEND_TOOLKIT_RUNTIME_PATH}) | ||||
| find_library(TSDCLIENT tsdclient HINTS ${ASCEND_RUNTIME_PATH} ${ASCEND_TOOLKIT_RUNTIME_PATH} ${ASCEND_DRIVER_BACK_PATH}) | |||||
| find_library(DATATRANSFER datatransfer HINTS ${ASCEND_RUNTIME_PATH} ${ASCEND_TOOLKIT_RUNTIME_PATH} ${ASCEND_DRIVER_BACK_PATH}) | |||||
| find_library(TSDCLIENT tsdclient HINTS ${ASCEND_RUNTIME_PATH} ${ASCEND_TOOLKIT_RUNTIME_PATH} | |||||
| ${ASCEND_DRIVER_BACK_PATH}) | |||||
| find_library(DATATRANSFER datatransfer HINTS ${ASCEND_RUNTIME_PATH} ${ASCEND_TOOLKIT_RUNTIME_PATH} | |||||
| ${ASCEND_DRIVER_BACK_PATH}) | |||||
| find_library(PROFILING msprofiler_fwk ${ASCEND_RUNTIME_PATH} ${ASCEND_TOOLKIT_RUNTIME_PATH}) | find_library(PROFILING msprofiler_fwk ${ASCEND_RUNTIME_PATH} ${ASCEND_TOOLKIT_RUNTIME_PATH}) | ||||
| find_library(REGISTER register ${ASCEND_RUNTIME_PATH} ${ASCEND_TOOLKIT_RUNTIME_PATH}) | find_library(REGISTER register ${ASCEND_RUNTIME_PATH} ${ASCEND_TOOLKIT_RUNTIME_PATH}) | ||||
| find_library(PLATFORM platform ${ASCEND_RUNTIME_PATH} ${ASCEND_TOOLKIT_RUNTIME_PATH}) | find_library(PLATFORM platform ${ASCEND_RUNTIME_PATH} ${ASCEND_TOOLKIT_RUNTIME_PATH}) | ||||
| @@ -255,42 +269,48 @@ if (ENABLE_D) | |||||
| # hccl_adpter | # hccl_adpter | ||||
| find_library(HCCL_ADPTER hcom_graph_adaptor ${ASCEND_RUNTIME_PATH} ${ASCEND_TOOLKIT_RUNTIME_PATH}) | find_library(HCCL_ADPTER hcom_graph_adaptor ${ASCEND_RUNTIME_PATH} ${ASCEND_TOOLKIT_RUNTIME_PATH}) | ||||
| find_library(HCCL_RA ra ${ASCEND_RUNTIME_PATH} ${ASCEND_TOOLKIT_RUNTIME_PATH}) | find_library(HCCL_RA ra ${ASCEND_RUNTIME_PATH} ${ASCEND_TOOLKIT_RUNTIME_PATH}) | ||||
| find_library(HCCL_BUILDER hcom_opskernel_builder ${ASCEND_RUNTIME_PATH}/plugin/opskernel ${ASCEND_TOOLKIT_RUNTIME_PATH}/plugin/opskernel) | |||||
| find_library(HCCL_BUILDER hcom_opskernel_builder ${ASCEND_RUNTIME_PATH}/plugin/opskernel | |||||
| ${ASCEND_TOOLKIT_RUNTIME_PATH}/plugin/opskernel) | |||||
| add_library(ms_profile SHARED ${CMAKE_CURRENT_SOURCE_DIR}/runtime/device/ascend/profiling/profiling_callback_register.cc) | |||||
| add_library(ms_profile SHARED | |||||
| ${CMAKE_CURRENT_SOURCE_DIR}/runtime/device/ascend/profiling/profiling_callback_register.cc) | |||||
| set_target_properties(ms_profile PROPERTIES LINKER_LANGUAGE CXX) | set_target_properties(ms_profile PROPERTIES LINKER_LANGUAGE CXX) | ||||
| target_link_options(ms_profile PRIVATE -Wl,-init,common_log_init) | target_link_options(ms_profile PRIVATE -Wl,-init,common_log_init) | ||||
| target_link_libraries(ms_profile -Wl,--start-group -Wl,--whole-archive ${PROFILING} -Wl,--no-whole-archive mindspore::protobuf -Wl,--end-group) | |||||
| target_link_libraries(ms_profile -Wl,--start-group -Wl,--whole-archive ${PROFILING} -Wl,--no-whole-archive | |||||
| mindspore::protobuf -Wl,--end-group) | |||||
| target_link_libraries(mindspore ge_runtime ${CCE_LIB} ${RUNTIME_LIB} ${TSDCLIENT} ${HCCL} ${DATATRANSFER} | target_link_libraries(mindspore ge_runtime ${CCE_LIB} ${RUNTIME_LIB} ${TSDCLIENT} ${HCCL} ${DATATRANSFER} | ||||
| ${HCCL_ADPTER} ${REGISTER} -Wl,--no-as-needed ${OPTILING} ${HCCL_BUILDER} ${HCCL_RA} ${PLATFORM}) | |||||
| ${HCCL_ADPTER} ${REGISTER} -Wl,--no-as-needed ${OPTILING} ${HCCL_BUILDER} | |||||
| ${HCCL_RA} ${PLATFORM}) | |||||
| target_link_libraries(mindspore -Wl,--start-group proto_input mindspore::protobuf -Wl,--end-group) | target_link_libraries(mindspore -Wl,--start-group proto_input mindspore::protobuf -Wl,--end-group) | ||||
| elseif (CMAKE_SYSTEM_NAME MATCHES "Windows") | |||||
| target_link_libraries(mindspore -Wl,--start-group proto_input mindspore::protobuf mindspore::sentencepiece -Wl,--end-group) | |||||
| elseif (CMAKE_SYSTEM_NAME MATCHES "Darwin") | |||||
| elseif(CMAKE_SYSTEM_NAME MATCHES "Windows") | |||||
| target_link_libraries(mindspore -Wl,--start-group proto_input mindspore::protobuf mindspore::sentencepiece | |||||
| -Wl,--end-group) | |||||
| elseif(CMAKE_SYSTEM_NAME MATCHES "Darwin") | |||||
| target_link_libraries(mindspore -Wl proto_input mindspore::protobuf mindspore::sentencepiece -Wl) | target_link_libraries(mindspore -Wl proto_input mindspore::protobuf mindspore::sentencepiece -Wl) | ||||
| else () | |||||
| else() | |||||
| target_link_libraries(mindspore -Wl,--start-group proto_input mindspore::protobuf -Wl,--end-group) | target_link_libraries(mindspore -Wl,--start-group proto_input mindspore::protobuf -Wl,--end-group) | ||||
| endif () | |||||
| endif() | |||||
| # set c_expression building | # set c_expression building | ||||
| set(CMAKE_BUILD_WITH_INSTALL_RPATH TRUE) | set(CMAKE_BUILD_WITH_INSTALL_RPATH TRUE) | ||||
| set_property(SOURCE "pipeline/jit/init.cc" PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_PIPELINE) | |||||
| set_property(SOURCE "pipeline/jit/init.cc" PROPERTY | |||||
| COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_PIPELINE) | |||||
| pybind11_add_module(_c_expression "pipeline/jit/init.cc") | pybind11_add_module(_c_expression "pipeline/jit/init.cc") | ||||
| MESSAGE(STATUS "operation system is ${CMAKE_SYSTEM}") | MESSAGE(STATUS "operation system is ${CMAKE_SYSTEM}") | ||||
| if (CMAKE_SYSTEM_NAME MATCHES "Linux") | |||||
| if(CMAKE_SYSTEM_NAME MATCHES "Linux") | |||||
| target_link_options(_c_expression PRIVATE -Wl,-init,mindspore_log_init) | target_link_options(_c_expression PRIVATE -Wl,-init,mindspore_log_init) | ||||
| set(ORIGIN_PATH $ORIGIN) | set(ORIGIN_PATH $ORIGIN) | ||||
| elseif (CMAKE_SYSTEM_NAME MATCHES "Darwin") | |||||
| elseif(CMAKE_SYSTEM_NAME MATCHES "Darwin") | |||||
| set_target_properties(_c_expression PROPERTIES MACOSX_RPATH ON) | set_target_properties(_c_expression PROPERTIES MACOSX_RPATH ON) | ||||
| set(ORIGIN_PATH @loader_path) | set(ORIGIN_PATH @loader_path) | ||||
| elseif (CMAKE_SYSTEM_NAME MATCHES "Windows") | |||||
| elseif(CMAKE_SYSTEM_NAME MATCHES "Windows") | |||||
| set(ORIGIN_PATH $ORIGIN) | set(ORIGIN_PATH $ORIGIN) | ||||
| else () | |||||
| else() | |||||
| MESSAGE(FATAL_ERROR "other platform: ${CMAKE_SYSTEM_NAME}") | MESSAGE(FATAL_ERROR "other platform: ${CMAKE_SYSTEM_NAME}") | ||||
| endif () | |||||
| endif() | |||||
| if (ENABLE_D) | |||||
| if(ENABLE_D) | |||||
| set(MINDSPORE_RPATH ${MINDSPORE_RPATH}:/usr/local/Ascend/nnae/latest/fwkacllib/lib64) | set(MINDSPORE_RPATH ${MINDSPORE_RPATH}:/usr/local/Ascend/nnae/latest/fwkacllib/lib64) | ||||
| set(MINDSPORE_RPATH ${MINDSPORE_RPATH}:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/lib64) | set(MINDSPORE_RPATH ${MINDSPORE_RPATH}:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/lib64) | ||||
| set(MINDSPORE_RPATH ${MINDSPORE_RPATH}:/usr/local/Ascend/fwkacllib/lib64) | set(MINDSPORE_RPATH ${MINDSPORE_RPATH}:/usr/local/Ascend/fwkacllib/lib64) | ||||
| @@ -300,45 +320,47 @@ if (ENABLE_D) | |||||
| set(MINDSPORE_RPATH ${MINDSPORE_RPATH}:/usr/local/Ascend/add-ons) | set(MINDSPORE_RPATH ${MINDSPORE_RPATH}:/usr/local/Ascend/add-ons) | ||||
| set(MINDSPORE_RPATH ${MINDSPORE_RPATH}:/usr/local/Ascend/opp/op_impl/built-in/ai_core/tbe/op_tiling) | set(MINDSPORE_RPATH ${MINDSPORE_RPATH}:/usr/local/Ascend/opp/op_impl/built-in/ai_core/tbe/op_tiling) | ||||
| set(MINDSPORE_RPATH ${MINDSPORE_RPATH}:/usr/local/Ascend/nnae/latest/opp/op_impl/built-in/ai_core/tbe/op_tiling) | set(MINDSPORE_RPATH ${MINDSPORE_RPATH}:/usr/local/Ascend/nnae/latest/opp/op_impl/built-in/ai_core/tbe/op_tiling) | ||||
| set(MINDSPORE_RPATH ${MINDSPORE_RPATH}:/usr/local/Ascend/ascend-toolkit/latest/opp/op_impl/built-in/ai_core/tbe/op_tiling) | |||||
| elseif (ENABLE_GPU) | |||||
| set(MINDSPORE_RPATH | |||||
| ${MINDSPORE_RPATH}:/usr/local/Ascend/ascend-toolkit/latest/opp/op_impl/built-in/ai_core/tbe/op_tiling) | |||||
| elseif(ENABLE_GPU) | |||||
| set(MINDSPORE_RPATH ${MINDSPORE_RPATH}:/usr/local/cuda/lib64) | set(MINDSPORE_RPATH ${MINDSPORE_RPATH}:/usr/local/cuda/lib64) | ||||
| endif () | |||||
| endif() | |||||
| set(MINDSPORE_RPATH ${ORIGIN_PATH}/lib:${MINDSPORE_RPATH}) | set(MINDSPORE_RPATH ${ORIGIN_PATH}/lib:${MINDSPORE_RPATH}) | ||||
| set_target_properties(_c_expression PROPERTIES INSTALL_RPATH ${MINDSPORE_RPATH}) | set_target_properties(_c_expression PROPERTIES INSTALL_RPATH ${MINDSPORE_RPATH}) | ||||
| if (CMAKE_SYSTEM_NAME MATCHES "Windows") | |||||
| if(CMAKE_SYSTEM_NAME MATCHES "Windows") | |||||
| target_link_libraries(mindspore mindspore::pybind11_module) | target_link_libraries(mindspore mindspore::pybind11_module) | ||||
| target_link_libraries(mindspore mindspore_gvar) | target_link_libraries(mindspore mindspore_gvar) | ||||
| target_link_libraries(_c_expression PRIVATE -Wl,--whole-archive mindspore -Wl,--no-whole-archive) | target_link_libraries(_c_expression PRIVATE -Wl,--whole-archive mindspore -Wl,--no-whole-archive) | ||||
| elseif (CMAKE_SYSTEM_NAME MATCHES "Darwin") | |||||
| elseif(CMAKE_SYSTEM_NAME MATCHES "Darwin") | |||||
| target_link_libraries(mindspore mindspore::pybind11_module) | target_link_libraries(mindspore mindspore::pybind11_module) | ||||
| target_link_libraries(mindspore mindspore_gvar) | target_link_libraries(mindspore mindspore_gvar) | ||||
| target_link_libraries(_c_expression PRIVATE -Wl,-force_load mindspore -Wl,-noall_load) | target_link_libraries(_c_expression PRIVATE -Wl,-force_load mindspore -Wl,-noall_load) | ||||
| else () | |||||
| if (ENABLE_CPU AND (ENABLE_D OR ENABLE_GPU)) | |||||
| target_link_libraries(mindspore mindspore::pslite proto_input mindspore::protobuf mindspore::event mindspore::event_pthreads ${zeromq_DIRPATH}/zmq_install/lib/libzmq.a) | |||||
| else() | |||||
| if(ENABLE_CPU AND (ENABLE_D OR ENABLE_GPU)) | |||||
| target_link_libraries(mindspore mindspore::pslite proto_input mindspore::protobuf | |||||
| mindspore::event mindspore::event_pthreads ${zeromq_DIRPATH}/zmq_install/lib/libzmq.a) | |||||
| target_link_libraries(mindspore -Wl,--no-as-needed mindspore::event_core ps_cache) | target_link_libraries(mindspore -Wl,--no-as-needed mindspore::event_core ps_cache) | ||||
| if (${ENABLE_IBVERBS} STREQUAL "ON") | |||||
| if(${ENABLE_IBVERBS} STREQUAL "ON") | |||||
| target_link_libraries(mindspore ibverbs rdmacm) | target_link_libraries(mindspore ibverbs rdmacm) | ||||
| endif() | endif() | ||||
| endif() | endif() | ||||
| target_link_libraries(_c_expression PRIVATE -Wl,--whole-archive mindspore proto_input -Wl,--no-whole-archive) | target_link_libraries(_c_expression PRIVATE -Wl,--whole-archive mindspore proto_input -Wl,--no-whole-archive) | ||||
| target_link_libraries(_c_expression PRIVATE mindspore::pybind11_module) | target_link_libraries(_c_expression PRIVATE mindspore::pybind11_module) | ||||
| target_link_libraries(_c_expression PRIVATE mindspore_gvar) | target_link_libraries(_c_expression PRIVATE mindspore_gvar) | ||||
| if (ENABLE_D) | |||||
| if(ENABLE_D) | |||||
| target_link_libraries(_c_expression PRIVATE -Wl,--no-as-needed ms_profile) | target_link_libraries(_c_expression PRIVATE -Wl,--no-as-needed ms_profile) | ||||
| endif () | |||||
| if (ENABLE_ACL) | |||||
| endif() | |||||
| if(ENABLE_ACL) | |||||
| target_link_libraries(_c_expression PRIVATE -Wl,--no-as-needed graph) | target_link_libraries(_c_expression PRIVATE -Wl,--no-as-needed graph) | ||||
| endif () | |||||
| endif () | |||||
| endif() | |||||
| endif() | |||||
| if (USE_GLOG) | |||||
| if(USE_GLOG) | |||||
| target_link_libraries(_c_expression PRIVATE mindspore::glog) | target_link_libraries(_c_expression PRIVATE mindspore::glog) | ||||
| endif () | |||||
| endif() | |||||
| if (ENABLE_GPU) | |||||
| if(ENABLE_GPU) | |||||
| message("add gpu lib to c_expression") | message("add gpu lib to c_expression") | ||||
| target_link_libraries(_c_expression PRIVATE gpu_cuda_lib gpu_queue cublas | target_link_libraries(_c_expression PRIVATE gpu_cuda_lib gpu_queue cublas | ||||
| ${CUDA_PATH}/lib64/libcurand.so | ${CUDA_PATH}/lib64/libcurand.so | ||||
| @@ -346,27 +368,27 @@ if (ENABLE_GPU) | |||||
| ${CUDA_PATH}/lib64/libcudart.so | ${CUDA_PATH}/lib64/libcudart.so | ||||
| ${CUDA_PATH}/lib64/stubs/libcuda.so | ${CUDA_PATH}/lib64/stubs/libcuda.so | ||||
| ${CUDA_PATH}/lib64/libcusolver.so) | ${CUDA_PATH}/lib64/libcusolver.so) | ||||
| if (ENABLE_MPI) | |||||
| if(ENABLE_MPI) | |||||
| set_target_properties(_ms_mpi PROPERTIES INSTALL_RPATH ${MINDSPORE_RPATH}) | set_target_properties(_ms_mpi PROPERTIES INSTALL_RPATH ${MINDSPORE_RPATH}) | ||||
| endif () | |||||
| endif () | |||||
| endif() | |||||
| endif() | |||||
| if (CMAKE_SYSTEM_NAME MATCHES "Darwin") | |||||
| if(CMAKE_SYSTEM_NAME MATCHES "Darwin") | |||||
| set(CMAKE_MACOSX_RPATH 1) | set(CMAKE_MACOSX_RPATH 1) | ||||
| set(CMAKE_INSTALL_RPATH "@loader_path/lib;@loader_path") | set(CMAKE_INSTALL_RPATH "@loader_path/lib;@loader_path") | ||||
| set_target_properties(_c_expression PROPERTIES INSTALL_RPATH "${CMAKE_INSTALL_RPATH}") | set_target_properties(_c_expression PROPERTIES INSTALL_RPATH "${CMAKE_INSTALL_RPATH}") | ||||
| endif () | |||||
| endif() | |||||
| if (ENABLE_CPU) | |||||
| if(ENABLE_CPU) | |||||
| target_link_libraries(_c_expression PRIVATE mindspore::dnnl mindspore::mkldnn) | target_link_libraries(_c_expression PRIVATE mindspore::dnnl mindspore::mkldnn) | ||||
| endif () | |||||
| endif() | |||||
| if (ENABLE_MINDDATA) | |||||
| if(ENABLE_MINDDATA) | |||||
| add_subdirectory(minddata/mindrecord) | add_subdirectory(minddata/mindrecord) | ||||
| add_subdirectory(minddata/dataset) | add_subdirectory(minddata/dataset) | ||||
| endif () | |||||
| endif() | |||||
| if (ENABLE_D) | |||||
| if(ENABLE_D) | |||||
| find_library(adump_server libadump_server.a ${ASCEND_RUNTIME_PATH} ${ASCEND_TOOLKIT_RUNTIME_PATH}) | find_library(adump_server libadump_server.a ${ASCEND_RUNTIME_PATH} ${ASCEND_TOOLKIT_RUNTIME_PATH}) | ||||
| target_link_libraries(_c_expression PRIVATE ${adump_server}) | target_link_libraries(_c_expression PRIVATE ${adump_server}) | ||||
| endif() | endif() | ||||
| @@ -35,6 +35,11 @@ | |||||
| #include "utils/ms_context.h" | #include "utils/ms_context.h" | ||||
| #include "debug/common.h" | #include "debug/common.h" | ||||
| #include "common/thread_pool.h" | #include "common/thread_pool.h" | ||||
| #include "profiler/device/common/memory_profiling.h" | |||||
| using mindspore::profiler::MemoryProfiling; | |||||
| using mindspore::profiler::NodeMemory; | |||||
| using mindspore::profiler::TensorMemory; | |||||
| namespace mindspore { | namespace mindspore { | ||||
| namespace somas { | namespace somas { | ||||
| @@ -49,6 +54,11 @@ std::map<TensorType, std::string> tensor_type_name_map = {{kCommon, "Common"}, | |||||
| {kRefNodeOutput, "RefNodeOutput"}, | {kRefNodeOutput, "RefNodeOutput"}, | ||||
| {kUnknown, "Unknown"}}; | {kUnknown, "Unknown"}}; | ||||
| std::map<LifeLongType, std::string> life_long_name_map = {{kLifeLongNone, "LifeLongNone"}, | |||||
| {kLifeLongGraphAll, "LifeLongGraphAll"}, | |||||
| {kLifeLongGraphStart, "LifeLongGraphStart"}, | |||||
| {kLifeLongGraphEnd, "LifeLongGraphEnd"}}; | |||||
| bool Somas::Allocate(const session::KernelGraph *graph) { | bool Somas::Allocate(const session::KernelGraph *graph) { | ||||
| auto ret = InitSomasTensors(graph); | auto ret = InitSomasTensors(graph); | ||||
| if (!ret) { | if (!ret) { | ||||
| @@ -1413,5 +1423,43 @@ uint8_t *Somas::GetNodeWorkSpacePtr(const AnfNodePtr &node, size_t index) const | |||||
| } | } | ||||
| return ptr; | return ptr; | ||||
| } | } | ||||
| void Somas::ConvertToProfilingNode(uint32_t graph_id) { | |||||
| #ifdef ENABLE_D | |||||
| auto graph_node = MemoryProfiling::GetInstance().GetGraphMemoryNode(graph_id); | |||||
| if (graph_node == nullptr) { | |||||
| graph_node = MemoryProfiling::GetInstance().AddGraphMemoryNode(graph_id); | |||||
| MS_LOG(INFO) << "Add graph memory node for dynamic memory profiling, graph id is " << graph_id; | |||||
| } | |||||
| for (const auto &tensor : tensors_list_) { | |||||
| TensorMemory tensor_memory; | |||||
| tensor_memory.SetTensorId(tensor->GetId()); | |||||
| tensor_memory.SetAlignedSize(tensor->GetAlignedSize()); | |||||
| tensor_memory.SetType(tensor_type_name_map[tensor->type_]); | |||||
| tensor_memory.SetLifeStart(tensor->lifetime_.start_); | |||||
| tensor_memory.SetLifeEnd(tensor->lifetime_.end_); | |||||
| tensor_memory.SetLifeLong(life_long_name_map[tensor->lifelong_value_]); | |||||
| graph_node->AddTensorMemory(tensor_memory); | |||||
| } | |||||
| for (const auto &node : nodes_list_) { | |||||
| NodeMemory node_memory; | |||||
| std::string name = GetSplitName(node->scope_full_name_); | |||||
| node_memory.SetNodeName(name); | |||||
| node_memory.SetNodeId(node->GetId()); | |||||
| for (const auto &tensor : node->input_tensors_) { | |||||
| node_memory.AddInputTensorId(tensor->GetId()); | |||||
| } | |||||
| for (const auto &tensor : node->output_tensors_) { | |||||
| node_memory.AddOutputTensorId(tensor->GetId()); | |||||
| } | |||||
| for (const auto &tensor : node->workspace_tensors_) { | |||||
| node_memory.AddWorkSpaceTensorId(tensor->GetId()); | |||||
| } | |||||
| graph_node->AddNodeMemory(node_memory); | |||||
| } | |||||
| #endif | |||||
| } | |||||
| } // namespace somas | } // namespace somas | ||||
| } // namespace mindspore | } // namespace mindspore | ||||
| @@ -54,6 +54,8 @@ class Somas { | |||||
| static bool NodeSort(SomasNodePtr, SomasNodePtr); | static bool NodeSort(SomasNodePtr, SomasNodePtr); | ||||
| std::vector<DynamicBitSet> reuse_matrix_; | std::vector<DynamicBitSet> reuse_matrix_; | ||||
| std::vector<DynamicBitSet> tensor_relation; | |||||
| void ConvertToProfilingNode(uint32_t graph_id); | |||||
| private: | private: | ||||
| // Maps | // Maps | ||||
| @@ -30,6 +30,7 @@ | |||||
| #include "runtime/device/ascend/kernel_select_ascend.h" | #include "runtime/device/ascend/kernel_select_ascend.h" | ||||
| #include "runtime/device/ascend/kernel_build_ascend.h" | #include "runtime/device/ascend/kernel_build_ascend.h" | ||||
| #include "runtime/device/ascend/ascend_kernel_runtime.h" | #include "runtime/device/ascend/ascend_kernel_runtime.h" | ||||
| #include "runtime/device/ascend/profiling/profiling_manager.h" | |||||
| #include "backend/optimizer/ascend/ascend_backend_optimization.h" | #include "backend/optimizer/ascend/ascend_backend_optimization.h" | ||||
| #include "backend/optimizer/common/common_backend_optimization.h" | #include "backend/optimizer/common/common_backend_optimization.h" | ||||
| #include "backend/optimizer/ascend/mindir/dropout_unify_mindir.h" | #include "backend/optimizer/ascend/mindir/dropout_unify_mindir.h" | ||||
| @@ -65,6 +66,11 @@ | |||||
| #include "ps/util.h" | #include "ps/util.h" | ||||
| #include "ps/ps_cache/ps_cache_manager.h" | #include "ps/ps_cache/ps_cache_manager.h" | ||||
| #endif | #endif | ||||
| #include "profiler/device/common/memory_profiling.h" | |||||
| using mindspore::device::ascend::ProfilingManager; | |||||
| using mindspore::profiler::MemoryProfiling; | |||||
| static constexpr uint32_t kLabelSwitchLabelId = 2; | static constexpr uint32_t kLabelSwitchLabelId = 2; | ||||
| namespace mindspore { | namespace mindspore { | ||||
| namespace session { | namespace session { | ||||
| @@ -649,6 +655,15 @@ GraphId AscendSession::CompileGraphImpl(NotNull<FuncGraphPtr> func_graph) { | |||||
| root_graph->SetInputNodes(); | root_graph->SetInputNodes(); | ||||
| root_graph->SetOptimizerFlag(); | root_graph->SetOptimizerFlag(); | ||||
| DumpAllGraphs(all_graphs); | DumpAllGraphs(all_graphs); | ||||
| // Save memory profiling data to proto file | |||||
| if (ProfilingManager::GetInstance().IsProfiling()) { | |||||
| auto runtime_instance = device::KernelRuntimeManager::Instance().GetKernelRuntime(kAscendDevice, device_id_); | |||||
| MS_EXCEPTION_IF_NULL(runtime_instance); | |||||
| uint64_t mem_size = runtime_instance->GetAvailableMemMaxSize(); | |||||
| auto instance = MemoryProfiling::GetInstance(); | |||||
| instance.SetDeviceMemSize(mem_size); | |||||
| instance.SaveMemoryProfiling(); | |||||
| } | |||||
| // return the root_graph id to backend | // return the root_graph id to backend | ||||
| auto graph_id = root_graph->graph_id(); | auto graph_id = root_graph->graph_id(); | ||||
| return graph_id; | return graph_id; | ||||
| @@ -1,11 +1,14 @@ | |||||
| if (ENABLE_GPU) | |||||
| if(ENABLE_GPU) | |||||
| file(GLOB_RECURSE PROFILER_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "device/gpu/*.cc") | file(GLOB_RECURSE PROFILER_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "device/gpu/*.cc") | ||||
| set_property(SOURCE ${PROFILER_SRC_LIST} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_PROFILER) | |||||
| set_property(SOURCE ${PROFILER_SRC_LIST} PROPERTY COMPILE_DEFINITIONS | |||||
| SUBMODULE_ID=mindspore::SubModuleId::SM_PROFILER) | |||||
| add_library(_mindspore_profiler_obj OBJECT ${PROFILER_SRC_LIST}) | add_library(_mindspore_profiler_obj OBJECT ${PROFILER_SRC_LIST}) | ||||
| endif () | |||||
| endif() | |||||
| if (ENABLE_D) | |||||
| file(GLOB_RECURSE PROFILER_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "device/ascend/*.cc") | |||||
| set_property(SOURCE ${PROFILER_SRC_LIST} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_PROFILER) | |||||
| if(ENABLE_D) | |||||
| file(GLOB_RECURSE PROFILER_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "device/ascend/*.cc" "device/common/*.cc") | |||||
| set_property(SOURCE ${PROFILER_SRC_LIST} PROPERTY COMPILE_DEFINITIONS | |||||
| SUBMODULE_ID=mindspore::SubModuleId::SM_PROFILER) | |||||
| add_library(_mindspore_profiler_obj OBJECT ${PROFILER_SRC_LIST}) | add_library(_mindspore_profiler_obj OBJECT ${PROFILER_SRC_LIST}) | ||||
| endif () | |||||
| add_dependencies(_mindspore_profiler_obj mindspore::protobuf) | |||||
| endif() | |||||
| @@ -0,0 +1,97 @@ | |||||
| /** | |||||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "profiler/device/common/memory_profiling.h" | |||||
| #include <fstream> | |||||
| #include <memory> | |||||
| #include "utils/log_adapter.h" | |||||
| #include "utils/ms_context.h" | |||||
| namespace mindspore { | |||||
| namespace profiler { | |||||
| std::shared_ptr<GraphMemory> MemoryProfiling::AddGraphMemoryNode(uint32_t graph_id) { | |||||
| std::shared_ptr<GraphMemory> node = std::make_shared<GraphMemory>(graph_id); | |||||
| graph_memory_[graph_id] = node; | |||||
| return node; | |||||
| } | |||||
| std::shared_ptr<GraphMemory> MemoryProfiling::GetGraphMemoryNode(uint32_t graph_id) { | |||||
| auto node = graph_memory_.find(graph_id); | |||||
| if (node != graph_memory_.end()) { | |||||
| return node->second; | |||||
| } | |||||
| return nullptr; | |||||
| } | |||||
| void MemoryProfiling::MemoryToPB() { | |||||
| memory_proto_.set_total_mem(device_mem_size_); | |||||
| for (const auto &graph : graph_memory_) { | |||||
| GraphMemProto *graph_proto = memory_proto_.add_graph_mem(); | |||||
| graph_proto->set_graph_id(graph.second->GetGraphId()); | |||||
| graph_proto->set_static_mem(graph.second->GetStaticMemSize()); | |||||
| // node memory to PB | |||||
| for (const auto &node : graph.second->GetNodeMemory()) { | |||||
| NodeMemProto *node_mem = graph_proto->add_node_mems(); | |||||
| node_mem->set_node_name(node.GetNodeName()); | |||||
| node_mem->set_node_id(node.GetNodeId()); | |||||
| for (const auto &id : node.GetInputTensorId()) { | |||||
| node_mem->add_input_tensor_id(id); | |||||
| } | |||||
| for (const auto &id : node.GetOutputTensorId()) { | |||||
| node_mem->add_output_tensor_id(id); | |||||
| } | |||||
| for (const auto &id : node.GetOutputTensorId()) { | |||||
| node_mem->add_workspace_tensor_id(id); | |||||
| } | |||||
| } | |||||
| // tensor memory to PB | |||||
| for (const auto &node : graph.second->GetTensorMemory()) { | |||||
| TensorMemProto *tensor_mem = graph_proto->add_tensor_mems(); | |||||
| tensor_mem->set_tensor_id(node.GetTensorId()); | |||||
| tensor_mem->set_size(node.GetAlignedSize()); | |||||
| std::string type = node.GetType(); | |||||
| tensor_mem->set_type(type); | |||||
| tensor_mem->set_life_start(node.GetLifeStart()); | |||||
| tensor_mem->set_life_end(node.GetLifeEnd()); | |||||
| std::string life_long = node.GetLifeLong(); | |||||
| tensor_mem->set_life_long(life_long); | |||||
| } | |||||
| } | |||||
| MS_LOG(INFO) << "Memory profiling data to PB end"; | |||||
| return; | |||||
| } | |||||
| void MemoryProfiling::SaveMemoryProfiling() { | |||||
| auto context = MsContext::GetInstance(); | |||||
| MS_EXCEPTION_IF_NULL(context); | |||||
| std::string dir_path = context->get_param<std::string>(MS_CTX_PROFILING_DIR_PATH); | |||||
| auto device_id = context->get_param<uint32_t>(MS_CTX_DEVICE_ID); | |||||
| std::string file = dir_path + std::string("/memory_usage_") + std::to_string(device_id) + std::string(".pb"); | |||||
| MemoryToPB(); | |||||
| std::fstream handle(file, std::ios::out | std::ios::trunc | std::ios::binary); | |||||
| if (!memory_proto_.SerializeToOstream(&handle)) { | |||||
| MS_LOG(ERROR) << "Save memory profiling data to file failed"; | |||||
| } | |||||
| handle.close(); | |||||
| MS_LOG(INFO) << "Start save memory profiling data to " << file << " end"; | |||||
| return; | |||||
| } | |||||
| } // namespace profiler | |||||
| } // namespace mindspore | |||||
| @@ -0,0 +1,124 @@ | |||||
| /** | |||||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef MINDSPORE_PROFILER_DEVICE_COMMON_PROFILING_MEMORY_H | |||||
| #define MINDSPORE_PROFILER_DEVICE_COMMON_PROFILING_MEMORY_H | |||||
| #include "proto/memory_profiling.pb.h" | |||||
| #include <string> | |||||
| #include <map> | |||||
| #include <vector> | |||||
| #include <memory> | |||||
| #include "utils/ms_context.h" | |||||
| namespace mindspore { | |||||
| namespace profiler { | |||||
| class NodeMemory { | |||||
| public: | |||||
| NodeMemory() : node_name_(""), node_id_(0) {} | |||||
| ~NodeMemory() = default; | |||||
| void SetNodeName(const std::string &name) { node_name_ = name; } | |||||
| void SetNodeId(uint64_t node_id) { node_id_ = node_id; } | |||||
| void AddInputTensorId(uint64_t node_id) { input_tensor_id_.emplace_back(node_id); } | |||||
| void AddOutputTensorId(uint64_t node_id) { output_tensor_id_.emplace_back(node_id); } | |||||
| void AddWorkSpaceTensorId(uint64_t node_id) { workspace_tensor_id_.emplace_back(node_id); } | |||||
| std::string GetNodeName() const { return node_name_; } | |||||
| uint64_t GetNodeId() const { return node_id_; } | |||||
| std::vector<uint64_t> GetInputTensorId() const { return input_tensor_id_; } | |||||
| std::vector<uint64_t> GetOutputTensorId() const { return output_tensor_id_; } | |||||
| std::vector<uint64_t> GetWorkspaceTensorId() const { return workspace_tensor_id_; } | |||||
| private: | |||||
| std::string node_name_; | |||||
| uint64_t node_id_; | |||||
| std::vector<uint64_t> input_tensor_id_; | |||||
| std::vector<uint64_t> output_tensor_id_; | |||||
| std::vector<uint64_t> workspace_tensor_id_; | |||||
| }; | |||||
| class TensorMemory { | |||||
| public: | |||||
| TensorMemory() : tensor_id_(0), size_(0), type_(""), life_start_(0), life_end_(0), life_long_("") {} | |||||
| ~TensorMemory() = default; | |||||
| void SetTensorId(uint64_t tensor_id) { tensor_id_ = tensor_id; } | |||||
| void SetAlignedSize(uint64_t size) { size_ = size; } | |||||
| void SetType(const std::string &type) { type_ = type; } | |||||
| void SetLifeStart(uint64_t start) { life_start_ = start; } | |||||
| void SetLifeEnd(uint64_t end) { life_end_ = end; } | |||||
| void SetLifeLong(const std::string &life_long) { life_long_ = life_long; } | |||||
| uint64_t GetTensorId() const { return tensor_id_; } | |||||
| uint64_t GetAlignedSize() const { return size_; } | |||||
| std::string GetType() const { return type_; } | |||||
| uint64_t GetLifeStart() const { return life_start_; } | |||||
| uint64_t GetLifeEnd() const { return life_end_; } | |||||
| std::string GetLifeLong() const { return life_long_; } | |||||
| private: | |||||
| uint64_t tensor_id_; | |||||
| uint64_t size_; // aligned tensor size | |||||
| std::string type_; // see TensorType in somas_tensor.h | |||||
| uint64_t life_start_; // the exe node id at which tensor memory allocated | |||||
| uint64_t life_end_; // the exe node id at which tensor memory deallocated | |||||
| std::string life_long_; // see LifeLongType in somas_tensor.h | |||||
| }; | |||||
| class GraphMemory { | |||||
| public: | |||||
| explicit GraphMemory(uint32_t graph_id) : graph_id_(graph_id), static_mem_size_(0) {} | |||||
| ~GraphMemory() = default; | |||||
| void AddStaticMemorySize(uint32_t size) { static_mem_size_ += size; } | |||||
| void AddNodeMemory(const NodeMemory &node) { node_memory_.emplace_back(node); } | |||||
| void AddTensorMemory(const TensorMemory &node) { tensor_memory_.emplace_back(node); } | |||||
| uint32_t GetGraphId() const { return graph_id_; } | |||||
| uint32_t GetStaticMemSize() const { return static_mem_size_; } | |||||
| std::vector<NodeMemory> GetNodeMemory() const { return node_memory_; } | |||||
| std::vector<TensorMemory> GetTensorMemory() const { return tensor_memory_; } | |||||
| private: | |||||
| uint32_t graph_id_; | |||||
| uint32_t static_mem_size_; | |||||
| std::vector<NodeMemory> node_memory_; | |||||
| std::vector<TensorMemory> tensor_memory_; | |||||
| }; | |||||
| class MemoryProfiling { | |||||
| public: | |||||
| MemoryProfiling() = default; | |||||
| ~MemoryProfiling() = default; | |||||
| static MemoryProfiling &GetInstance() { | |||||
| static MemoryProfiling instance; | |||||
| return instance; | |||||
| } | |||||
| MemoryProto &GetMemProto() { return memory_proto_; } | |||||
| std::shared_ptr<GraphMemory> AddGraphMemoryNode(uint32_t graph_id); | |||||
| std::shared_ptr<GraphMemory> GetGraphMemoryNode(uint32_t graph_id); | |||||
| void SetDeviceMemSize(uint64_t size) { device_mem_size_ = size; } | |||||
| void MemoryToPB(); | |||||
| void SaveMemoryProfiling(); | |||||
| private: | |||||
| MemoryProto memory_proto_; | |||||
| std::map<uint32_t, std::shared_ptr<GraphMemory>> graph_memory_; | |||||
| uint64_t device_mem_size_; | |||||
| }; | |||||
| } // namespace profiler | |||||
| } // namespace mindspore | |||||
| #endif | |||||
| @@ -0,0 +1,50 @@ | |||||
| /** | |||||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| syntax = "proto3"; | |||||
| package mindspore.profiler; | |||||
| message MemoryProto { | |||||
| repeated GraphMemProto graph_mem = 1; // memory usage of multiple graphs | |||||
| int64 total_mem = 2; // total allocated device memory | |||||
| } | |||||
| message GraphMemProto { | |||||
| int64 graph_id = 1; // graph id | |||||
| int64 static_mem = 2; // size of allocated static memory for current graph | |||||
| repeated NodeMemProto node_mems = 3; // execution nodes | |||||
| repeated TensorMemProto tensor_mems = 4; // all tensors | |||||
| string fp_start = 5; // node name of fp start | |||||
| string bp_end = 6; // node name of bp end | |||||
| } | |||||
| message NodeMemProto { | |||||
| string node_name = 1; // node name | |||||
| int64 node_id = 2; // node id with respect to the execution order | |||||
| repeated int64 input_tensor_id = 3; // input tensor id | |||||
| repeated int64 output_tensor_id = 4; // output tensor id | |||||
| repeated int64 workspace_tensor_id = 5; // workspace tensor id | |||||
| } | |||||
| message TensorMemProto { | |||||
| int64 tensor_id = 1; // tensor id | |||||
| int64 size = 2; // aligned tensor size | |||||
| string type = 3; // tensor type, e.g. Common, OutputOnly | |||||
| int64 life_start = 4; // the exe node id at which tensor memory allocated | |||||
| int64 life_end = 5; // the exe node id at which tensor memory deallocated | |||||
| string life_long = 6; // see LifeLongType enum | |||||
| } | |||||
| @@ -94,8 +94,8 @@ REGISTER_PYBIND_DEFINE(MsContextPy, ([](const py::module *m) { | |||||
| .value("save_graphs_path", MsCtxParam::MS_CTX_SAVE_GRAPHS_PATH) | .value("save_graphs_path", MsCtxParam::MS_CTX_SAVE_GRAPHS_PATH) | ||||
| .value("variable_memory_max_size", MsCtxParam::MS_CTX_VARIABLE_MEMORY_MAX_SIZE) | .value("variable_memory_max_size", MsCtxParam::MS_CTX_VARIABLE_MEMORY_MAX_SIZE) | ||||
| .value("device_id", MsCtxParam::MS_CTX_DEVICE_ID) | .value("device_id", MsCtxParam::MS_CTX_DEVICE_ID) | ||||
| .value("max_call_depth", MsCtxParam::MS_CTX_MAX_CALL_DEPTH); | |||||
| .value("max_call_depth", MsCtxParam::MS_CTX_MAX_CALL_DEPTH) | |||||
| .value("profiling_dir_path", MsCtxParam::MS_CTX_PROFILING_DIR_PATH); | |||||
| (void)py::class_<mindspore::MsContext, std::shared_ptr<mindspore::MsContext>>(*m, "MSContext") | (void)py::class_<mindspore::MsContext, std::shared_ptr<mindspore::MsContext>>(*m, "MSContext") | ||||
| .def_static("get_instance", &mindspore::MsContext::GetInstance, "Get ms context instance.") | .def_static("get_instance", &mindspore::MsContext::GetInstance, "Get ms context instance.") | ||||
| .def("get_param", &mindspore::MsCtxGetParameter, "Get value of specified paramter.") | .def("get_param", &mindspore::MsCtxGetParameter, "Get value of specified paramter.") | ||||
| @@ -895,4 +895,9 @@ void AscendKernelRuntime::KernelLaunchProfiling(const std::string &kernel_name) | |||||
| MS_LOG(EXCEPTION) << "Too many profiling data"; | MS_LOG(EXCEPTION) << "Too many profiling data"; | ||||
| } | } | ||||
| } | } | ||||
| uint64_t AscendKernelRuntime::GetAvailableMemMaxSize() const { | |||||
| auto ascend_mem_manager = dynamic_pointer_cast<AscendMemoryManager>(mem_manager_); | |||||
| return ascend_mem_manager->GetDeviceMemSize(); | |||||
| } | |||||
| } // namespace mindspore::device::ascend | } // namespace mindspore::device::ascend | ||||
| @@ -55,6 +55,7 @@ class AscendKernelRuntime : public KernelRuntime { | |||||
| void CreateContext() override; | void CreateContext() override; | ||||
| void *context() const override { return rt_context_; } | void *context() const override { return rt_context_; } | ||||
| void PreInit() override; | void PreInit() override; | ||||
| uint64_t GetAvailableMemMaxSize() const; | |||||
| protected: | protected: | ||||
| DeviceAddressPtr CreateDeviceAddress(void *device_ptr, size_t device_size, const string &format, | DeviceAddressPtr CreateDeviceAddress(void *device_ptr, size_t device_size, const string &format, | ||||
| @@ -18,6 +18,12 @@ | |||||
| #include "runtime/device/ascend/ascend_memory_pool.h" | #include "runtime/device/ascend/ascend_memory_pool.h" | ||||
| #include "utils/ms_context.h" | #include "utils/ms_context.h" | ||||
| #include "runtime/mem.h" | #include "runtime/mem.h" | ||||
| #include "runtime/device/ascend/profiling/profiling_manager.h" | |||||
| #include "profiler/device/common/memory_profiling.h" | |||||
| using mindspore::device::ascend::ProfilingManager; | |||||
| using mindspore::profiler::MemoryProfiling; | |||||
| namespace mindspore { | namespace mindspore { | ||||
| namespace device { | namespace device { | ||||
| namespace ascend { | namespace ascend { | ||||
| @@ -44,6 +50,11 @@ void AscendMemoryManager::MallocDeviceMemory() { | |||||
| AscendMemoryPool::GetInstance().Init(device_mem_base_, device_mem_size_, dynamic_mem_offset_); | AscendMemoryPool::GetInstance().Init(device_mem_base_, device_mem_size_, dynamic_mem_offset_); | ||||
| } | } | ||||
| uint64_t AscendMemoryManager::GetDeviceMemSize() { | |||||
| auto mem_size = GetDeviceMemSizeFromContext(); | |||||
| return mem_size == 0 ? kAscendDeviceMemSize : mem_size; | |||||
| } | |||||
| uint64_t AscendMemoryManager::GetDeviceMemSizeFromContext() { | uint64_t AscendMemoryManager::GetDeviceMemSizeFromContext() { | ||||
| auto context = MsContext::GetInstance(); | auto context = MsContext::GetInstance(); | ||||
| MS_EXCEPTION_IF_NULL(context); | MS_EXCEPTION_IF_NULL(context); | ||||
| @@ -88,7 +99,7 @@ void *AscendMemoryManager::MallocMemFromMemPool(size_t size) { | |||||
| return AscendMemoryPool::GetInstance().AllocTensorMem(align_size); | return AscendMemoryPool::GetInstance().AllocTensorMem(align_size); | ||||
| } | } | ||||
| uint8_t *AscendMemoryManager::MallocStaticMem(size_t size, bool communication_mem) { | |||||
| uint8_t *AscendMemoryManager::MallocStaticMem(size_t size, bool communication_mem, uint32_t graph_id) { | |||||
| size_t align_size = 0; | size_t align_size = 0; | ||||
| if (communication_mem) { | if (communication_mem) { | ||||
| align_size = GetCommunicationAlignSize(size); | align_size = GetCommunicationAlignSize(size); | ||||
| @@ -96,6 +107,16 @@ uint8_t *AscendMemoryManager::MallocStaticMem(size_t size, bool communication_me | |||||
| align_size = GetCommonAlignSize(size); | align_size = GetCommonAlignSize(size); | ||||
| } | } | ||||
| if (ProfilingManager::GetInstance().IsProfiling() && graph_id != kInvalidGraphId) { | |||||
| auto node = MemoryProfiling::GetInstance().GetGraphMemoryNode(graph_id); | |||||
| if (node == nullptr) { | |||||
| node = MemoryProfiling::GetInstance().AddGraphMemoryNode(graph_id); | |||||
| MS_LOG(INFO) << "Add graph memory node for static memory profiling, graph id is " << graph_id; | |||||
| } | |||||
| node->AddStaticMemorySize(align_size); | |||||
| } | |||||
| auto device_mem_pool_offset = AscendMemoryPool::GetInstance().device_mem_pool_offset(); | auto device_mem_pool_offset = AscendMemoryPool::GetInstance().device_mem_pool_offset(); | ||||
| MS_LOG(INFO) << "Malloc Memory: Static, total[" << device_mem_size_ << "] (dynamic[" << total_dynamic_size_ | MS_LOG(INFO) << "Malloc Memory: Static, total[" << device_mem_size_ << "] (dynamic[" << total_dynamic_size_ | ||||
| << "] memory pool[" << device_mem_size_ - device_mem_pool_offset << "])" | << "] memory pool[" << device_mem_size_ - device_mem_pool_offset << "])" | ||||
| @@ -139,6 +160,13 @@ uint8_t *AscendMemoryManager::MallocDynamicMem(size_t size, bool communication_m | |||||
| return device_mem_base_ + offset; | return device_mem_base_ + offset; | ||||
| } | } | ||||
| } | } | ||||
| void AscendMemoryManager::MallocSomasDynamicMem(const session::KernelGraph *graph) { | |||||
| MemoryManager::MallocSomasDynamicMem(graph); | |||||
| if (ProfilingManager::GetInstance().IsProfiling()) { | |||||
| somas_reuse_util_ptr_->ConvertToProfilingNode(graph->graph_id()); | |||||
| } | |||||
| } | |||||
| } // namespace ascend | } // namespace ascend | ||||
| } // namespace device | } // namespace device | ||||
| } // namespace mindspore | } // namespace mindspore | ||||
| @@ -31,9 +31,11 @@ class AscendMemoryManager : public MemoryManager { | |||||
| void ResetDynamicMemory() override; | void ResetDynamicMemory() override; | ||||
| void ClearGlobalIdleMem() override; | void ClearGlobalIdleMem() override; | ||||
| void *MallocMemFromMemPool(size_t size) override; | void *MallocMemFromMemPool(size_t size) override; | ||||
| uint64_t GetDeviceMemSize(); | |||||
| void MallocSomasDynamicMem(const session::KernelGraph *graph); | |||||
| protected: | protected: | ||||
| uint8_t *MallocStaticMem(size_t size, bool communication_mem) override; | |||||
| uint8_t *MallocStaticMem(size_t size, bool communication_mem, uint32_t graph_id = kInvalidGraphId) override; | |||||
| uint8_t *MallocDynamicMem(size_t size, bool communication_mem) override; | uint8_t *MallocDynamicMem(size_t size, bool communication_mem) override; | ||||
| private: | private: | ||||
| @@ -22,7 +22,7 @@ namespace mindspore { | |||||
| namespace device { | namespace device { | ||||
| namespace cpu { | namespace cpu { | ||||
| uint8_t *CPUMemoryManager::MallocStaticMem(size_t size, bool) { | |||||
| uint8_t *CPUMemoryManager::MallocStaticMem(size_t size, bool, uint32_t) { | |||||
| void *ptr = malloc(size); | void *ptr = malloc(size); | ||||
| if (ptr != nullptr) { | if (ptr != nullptr) { | ||||
| memset_s(ptr, size, 0, size); | memset_s(ptr, size, 0, size); | ||||
| @@ -44,7 +44,7 @@ class CPUMemoryManager : public MemoryManager { | |||||
| void DecreaseSummaryRefCount(const session::NamedSummaryOutputs &summary_outputs); | void DecreaseSummaryRefCount(const session::NamedSummaryOutputs &summary_outputs); | ||||
| protected: | protected: | ||||
| uint8_t *MallocStaticMem(size_t size, bool communication_mem) override; | |||||
| uint8_t *MallocStaticMem(size_t size, bool communication_mem, uint32_t graph_id = kInvalidGraphId) override; | |||||
| uint8_t *MallocDynamicMem(size_t size, bool communication_mem) override; | uint8_t *MallocDynamicMem(size_t size, bool communication_mem) override; | ||||
| private: | private: | ||||
| @@ -101,7 +101,7 @@ void GPUMemoryManager::FreeDeviceMemory() { | |||||
| GPUMemoryAllocator::GetInstance().ReleaseDeviceRes(); | GPUMemoryAllocator::GetInstance().ReleaseDeviceRes(); | ||||
| } | } | ||||
| uint8_t *GPUMemoryManager::MallocStaticMem(size_t size, bool) { | |||||
| uint8_t *GPUMemoryManager::MallocStaticMem(size_t size, bool, uint32_t) { | |||||
| auto context_ptr = MsContext::GetInstance(); | auto context_ptr = MsContext::GetInstance(); | ||||
| MS_EXCEPTION_IF_NULL(context_ptr); | MS_EXCEPTION_IF_NULL(context_ptr); | ||||
| if (context_ptr->get_param<bool>(MS_CTX_ENABLE_DYNAMIC_MEM_POOL)) { | if (context_ptr->get_param<bool>(MS_CTX_ENABLE_DYNAMIC_MEM_POOL)) { | ||||
| @@ -36,7 +36,7 @@ class GPUMemoryManager : public MemoryManager { | |||||
| std::vector<size_t> size_list) override; | std::vector<size_t> size_list) override; | ||||
| protected: | protected: | ||||
| uint8_t *MallocStaticMem(size_t size, bool communication_mem) override; | |||||
| uint8_t *MallocStaticMem(size_t size, bool communication_mem, uint32_t graph_id = kInvalidGraphId) override; | |||||
| }; | }; | ||||
| } // namespace gpu | } // namespace gpu | ||||
| } // namespace device | } // namespace device | ||||
| @@ -360,7 +360,7 @@ void KernelRuntime::AssignStaticMemoryInput(const session::KernelGraph *graph) { | |||||
| auto tensor_size = CountNodeDeviceMemorySize(item, index); | auto tensor_size = CountNodeDeviceMemorySize(item, index); | ||||
| device_address = CreateDeviceAddress(nullptr, tensor_size, AnfAlgo::GetOutputFormat(item, index), output_type_id); | device_address = CreateDeviceAddress(nullptr, tensor_size, AnfAlgo::GetOutputFormat(item, index), output_type_id); | ||||
| MS_LOG(DEBUG) << "Malloc static memory for " << item->fullname_with_scope(); | MS_LOG(DEBUG) << "Malloc static memory for " << item->fullname_with_scope(); | ||||
| if (mem_manager_->MallocMem(kStaticMem, tensor_size, device_address) == nullptr) { | |||||
| if (mem_manager_->MallocMem(kStaticMem, tensor_size, device_address, graph->graph_id()) == nullptr) { | |||||
| MS_LOG(EXCEPTION) << "Cannot alloc address when flag is: " << kStaticMem << ", tensor size is: " << tensor_size; | MS_LOG(EXCEPTION) << "Cannot alloc address when flag is: " << kStaticMem << ", tensor size is: " << tensor_size; | ||||
| } | } | ||||
| MS_LOG(INFO) << "Malloc Input for graph " << graph->graph_id() << ", node: " << item->fullname_with_scope() | MS_LOG(INFO) << "Malloc Input for graph " << graph->graph_id() << ", node: " << item->fullname_with_scope() | ||||
| @@ -629,6 +629,10 @@ void KernelRuntime::AssignValueNodeTensor(const ValueNodePtr &value_node, const | |||||
| MS_EXCEPTION_IF_NULL(ms_context); | MS_EXCEPTION_IF_NULL(ms_context); | ||||
| std::vector<tensor::TensorPtr> tensors; | std::vector<tensor::TensorPtr> tensors; | ||||
| TensorValueToTensor(node_value, &tensors); | TensorValueToTensor(node_value, &tensors); | ||||
| // Graph id should be passed to record static memory if profiling is enabled. | |||||
| auto kernel_info = static_cast<device::KernelInfo *>(value_node->kernel_info()); | |||||
| MS_EXCEPTION_IF_NULL(kernel_info); | |||||
| uint32_t graph_id = kernel_info->graph_id(); | |||||
| for (const auto &tensor : tensors) { | for (const auto &tensor : tensors) { | ||||
| if (tensor == nullptr) { | if (tensor == nullptr) { | ||||
| MS_LOG(WARNING) << "Tensor is null"; | MS_LOG(WARNING) << "Tensor is null"; | ||||
| @@ -651,7 +655,7 @@ void KernelRuntime::AssignValueNodeTensor(const ValueNodePtr &value_node, const | |||||
| if (ms_context->get_param<bool>(MS_CTX_ENABLE_PYNATIVE_INFER) && | if (ms_context->get_param<bool>(MS_CTX_ENABLE_PYNATIVE_INFER) && | ||||
| !mem_manager_->MallocMemFromMemPool(address, node_size)) { | !mem_manager_->MallocMemFromMemPool(address, node_size)) { | ||||
| MS_LOG(EXCEPTION) << "Cannot alloc address from memory pool when tensor size is: " << node_size; | MS_LOG(EXCEPTION) << "Cannot alloc address from memory pool when tensor size is: " << node_size; | ||||
| } else if (mem_manager_->MallocMem(kStaticMem, node_size, address) == nullptr) { | |||||
| } else if (mem_manager_->MallocMem(kStaticMem, node_size, address, graph_id) == nullptr) { | |||||
| MS_LOG(EXCEPTION) << "Cannot alloc address when flag is: " << kStaticMem << ", tensor size is: " << node_size; | MS_LOG(EXCEPTION) << "Cannot alloc address when flag is: " << kStaticMem << ", tensor size is: " << node_size; | ||||
| } | } | ||||
| AnfAlgo::SetOutputAddr(address, output_idx, value_node.get()); | AnfAlgo::SetOutputAddr(address, output_idx, value_node.get()); | ||||
| @@ -662,6 +666,8 @@ void KernelRuntime::AssignValueNodeTensor(const ValueNodePtr &value_node, const | |||||
| << "node dtype is " << AnfAlgo::GetOutputInferDataType(value_node, output_idx); | << "node dtype is " << AnfAlgo::GetOutputInferDataType(value_node, output_idx); | ||||
| } | } | ||||
| } | } | ||||
| return; | |||||
| } | } | ||||
| void KernelRuntime::AssignStaticMemoryValueNode(session::KernelGraph *graph) { | void KernelRuntime::AssignStaticMemoryValueNode(session::KernelGraph *graph) { | ||||
| @@ -690,7 +696,7 @@ void KernelRuntime::AssignStaticMemoryValueNode(session::KernelGraph *graph) { | |||||
| if (ms_context->get_param<bool>(MS_CTX_ENABLE_PYNATIVE_INFER) && | if (ms_context->get_param<bool>(MS_CTX_ENABLE_PYNATIVE_INFER) && | ||||
| !mem_manager_->MallocMemFromMemPool(address, tensor_size)) { | !mem_manager_->MallocMemFromMemPool(address, tensor_size)) { | ||||
| MS_LOG(EXCEPTION) << "Cannot alloc address from memory pool when tensor size is: " << tensor_size; | MS_LOG(EXCEPTION) << "Cannot alloc address from memory pool when tensor size is: " << tensor_size; | ||||
| } else if (mem_manager_->MallocMem(kStaticMem, tensor_size, address) == nullptr) { | |||||
| } else if (mem_manager_->MallocMem(kStaticMem, tensor_size, address, graph->graph_id()) == nullptr) { | |||||
| MS_LOG(EXCEPTION) << "Cannot alloc address when flag is: " << kStaticMem << ", tensor size is: " << tensor_size; | MS_LOG(EXCEPTION) << "Cannot alloc address when flag is: " << kStaticMem << ", tensor size is: " << tensor_size; | ||||
| } | } | ||||
| AnfAlgo::SetOutputAddr(address, 0, value_node.get()); | AnfAlgo::SetOutputAddr(address, 0, value_node.get()); | ||||
| @@ -100,6 +100,7 @@ class KernelRuntime { | |||||
| } | } | ||||
| virtual void PreInit() {} | virtual void PreInit() {} | ||||
| virtual uint64_t GetAvailableMemMaxSize() const { return 0; } | |||||
| protected: | protected: | ||||
| virtual DeviceAddressPtr CreateDeviceAddress(void *device_ptr, size_t device_size, const string &format, | virtual DeviceAddressPtr CreateDeviceAddress(void *device_ptr, size_t device_size, const string &format, | ||||
| @@ -18,8 +18,10 @@ | |||||
| #include <string> | #include <string> | ||||
| #include "backend/session/anf_runtime_algorithm.h" | #include "backend/session/anf_runtime_algorithm.h" | ||||
| #include "utils/ms_context.h" | #include "utils/ms_context.h" | ||||
| using mindspore::memreuse::BestFitMemReuse; | using mindspore::memreuse::BestFitMemReuse; | ||||
| using mindspore::memreuse::MemReuseUtilPtr; | using mindspore::memreuse::MemReuseUtilPtr; | ||||
| namespace mindspore { | namespace mindspore { | ||||
| namespace device { | namespace device { | ||||
| size_t MemoryManager::GetCommonAlignSize(size_t input_size) const { | size_t MemoryManager::GetCommonAlignSize(size_t input_size) const { | ||||
| @@ -139,11 +141,11 @@ uint8_t *MemoryManager::MallocWorkSpaceMem(const AnfNodePtr &node, size_t index, | |||||
| return MallocDynamicMem(size, false); | return MallocDynamicMem(size, false); | ||||
| } | } | ||||
| uint8_t *MemoryManager::MallocMem(MemType type, size_t size, const DeviceAddressPtr &address) { | |||||
| uint8_t *MemoryManager::MallocMem(MemType type, size_t size, const DeviceAddressPtr &address, uint32_t graph_id) { | |||||
| MS_EXCEPTION_IF_NULL(address); | MS_EXCEPTION_IF_NULL(address); | ||||
| uint8_t *ptr = nullptr; | uint8_t *ptr = nullptr; | ||||
| if (type == kStaticMem) { | if (type == kStaticMem) { | ||||
| ptr = MallocStaticMem(size, false); | |||||
| ptr = MallocStaticMem(size, false, graph_id); | |||||
| address->from_mem_pool_ = true; | address->from_mem_pool_ = true; | ||||
| } else if (type == kDynamicMem) { | } else if (type == kDynamicMem) { | ||||
| ptr = MallocDynamicMem(size, false); | ptr = MallocDynamicMem(size, false); | ||||
| @@ -152,7 +154,7 @@ uint8_t *MemoryManager::MallocMem(MemType type, size_t size, const DeviceAddress | |||||
| return ptr; | return ptr; | ||||
| } | } | ||||
| uint8_t *MemoryManager::MallocStaticMem(size_t size, bool communication_mem) { | |||||
| uint8_t *MemoryManager::MallocStaticMem(size_t size, bool communication_mem, uint32_t graph_id) { | |||||
| size_t align_size = 0; | size_t align_size = 0; | ||||
| if (communication_mem) { | if (communication_mem) { | ||||
| align_size = GetCommunicationAlignSize(size); | align_size = GetCommunicationAlignSize(size); | ||||
| @@ -44,11 +44,12 @@ class MemoryManager { | |||||
| virtual void ClearGlobalIdleMem() {} | virtual void ClearGlobalIdleMem() {} | ||||
| void MallocReusedDynamicMem(const session::KernelGraph *graph); | void MallocReusedDynamicMem(const session::KernelGraph *graph); | ||||
| void MallocSomasDynamicMem(const session::KernelGraph *graph); | |||||
| virtual void MallocSomasDynamicMem(const session::KernelGraph *graph); | |||||
| uint8_t *MallocOutputMem(const AnfNodePtr &node, size_t index, MemType type, size_t size, | uint8_t *MallocOutputMem(const AnfNodePtr &node, size_t index, MemType type, size_t size, | ||||
| const DeviceAddressPtr &address, bool comm_mem); | const DeviceAddressPtr &address, bool comm_mem); | ||||
| uint8_t *MallocWorkSpaceMem(const AnfNodePtr &node, size_t index, MemType type, size_t size); | uint8_t *MallocWorkSpaceMem(const AnfNodePtr &node, size_t index, MemType type, size_t size); | ||||
| virtual uint8_t *MallocMem(MemType type, size_t size, const DeviceAddressPtr &address); | |||||
| virtual uint8_t *MallocMem(MemType type, size_t size, const DeviceAddressPtr &address, | |||||
| uint32_t graph_id = kInvalidGraphId); | |||||
| virtual bool MallocMemFromMemPool(const DeviceAddressPtr address, size_t size); | virtual bool MallocMemFromMemPool(const DeviceAddressPtr address, size_t size); | ||||
| virtual void *MallocMemFromMemPool(size_t size); | virtual void *MallocMemFromMemPool(size_t size); | ||||
| @@ -62,7 +63,7 @@ class MemoryManager { | |||||
| size_t GetCommunicationAlignSize(size_t input_size) const; | size_t GetCommunicationAlignSize(size_t input_size) const; | ||||
| protected: | protected: | ||||
| virtual uint8_t *MallocStaticMem(size_t size, bool communication_mem); | |||||
| virtual uint8_t *MallocStaticMem(size_t size, bool communication_mem, uint32_t graph_id = kInvalidGraphId); | |||||
| virtual uint8_t *MallocDynamicMem(size_t size, bool communication_mem); | virtual uint8_t *MallocDynamicMem(size_t size, bool communication_mem); | ||||
| uint8_t *device_mem_base_{nullptr}; | uint8_t *device_mem_base_{nullptr}; | ||||
| uint64_t device_mem_size_{0}; | uint64_t device_mem_size_{0}; | ||||
| @@ -73,6 +73,7 @@ MsContext::MsContext(const std::string &policy, const std::string &target) { | |||||
| set_param<bool>(MS_CTX_ENABLE_GRAPH_KERNEL, false); | set_param<bool>(MS_CTX_ENABLE_GRAPH_KERNEL, false); | ||||
| set_param<bool>(MS_CTX_ENABLE_SPARSE, false); | set_param<bool>(MS_CTX_ENABLE_SPARSE, false); | ||||
| set_param<bool>(MS_CTX_ENABLE_PARALLEL_SPLIT, false); | set_param<bool>(MS_CTX_ENABLE_PARALLEL_SPLIT, false); | ||||
| set_param<std::string>(MS_CTX_PROFILING_DIR_PATH, ""); | |||||
| backend_policy_ = policy_map_[policy]; | backend_policy_ = policy_map_[policy]; | ||||
| } | } | ||||
| @@ -104,6 +104,7 @@ enum MsCtxParam : unsigned { | |||||
| MS_CTX_SAVE_GRAPHS_PATH, | MS_CTX_SAVE_GRAPHS_PATH, | ||||
| MS_CTX_VARIABLE_MEMORY_MAX_SIZE, | MS_CTX_VARIABLE_MEMORY_MAX_SIZE, | ||||
| MS_CTX_PYTHON_EXE_PATH, | MS_CTX_PYTHON_EXE_PATH, | ||||
| MS_CTX_PROFILING_DIR_PATH, | |||||
| MS_CTX_TYPE_STRING_END, | MS_CTX_TYPE_STRING_END, | ||||
| // parameter numbers of each type | // parameter numbers of each type | ||||
| @@ -140,7 +140,8 @@ class Profiler: | |||||
| logger.error(msg) | logger.error(msg) | ||||
| raise ValueError(msg) | raise ValueError(msg) | ||||
| # use context interface to open profiling, for the new mindspore version(after 2020.5.21) | # use context interface to open profiling, for the new mindspore version(after 2020.5.21) | ||||
| context.set_context(enable_profiling=True, profiling_options=profiling_options) | |||||
| context.set_context(enable_profiling=True, profiling_options=profiling_options, | |||||
| profiling_dir_path=self._output_path) | |||||
| base_profiling_container_path = os.path.join(self._output_path, "container") | base_profiling_container_path = os.path.join(self._output_path, "container") | ||||
| container_path = os.path.join(base_profiling_container_path, self._dev_id) | container_path = os.path.join(base_profiling_container_path, self._dev_id) | ||||
| data_path = os.path.join(container_path, "data") | data_path = os.path.join(container_path, "data") | ||||
| @@ -4,12 +4,12 @@ message("build ut testcases...") | |||||
| project(ut) | project(ut) | ||||
| set(PROJECT_DIR "${PROJECT_SOURCE_DIR}/../../..") | set(PROJECT_DIR "${PROJECT_SOURCE_DIR}/../../..") | ||||
| if (ENABLE_DUMP_IR) | |||||
| if(ENABLE_DUMP_IR) | |||||
| add_compile_definitions(ENABLE_DUMP_IR) | add_compile_definitions(ENABLE_DUMP_IR) | ||||
| endif (ENABLE_DUMP_IR) | |||||
| if (ENABLE_D) | |||||
| endif() | |||||
| if(ENABLE_D) | |||||
| add_compile_definitions(ENABLE_D) | add_compile_definitions(ENABLE_D) | ||||
| endif () | |||||
| endif() | |||||
| #add python lib and include for all ut executables; | #add python lib and include for all ut executables; | ||||
| message("PYTHON_INCLUDE_DIRS = ${PYTHON_INCLUDE_DIRS}") | message("PYTHON_INCLUDE_DIRS = ${PYTHON_INCLUDE_DIRS}") | ||||
| @@ -25,13 +25,13 @@ MESSAGE("check ut_test ${CMAKE_BINARY_DIR}") | |||||
| link_directories(${MS_CCSRC_BUILD_PATH}) | link_directories(${MS_CCSRC_BUILD_PATH}) | ||||
| if (ENABLE_MINDDATA) | |||||
| if(ENABLE_MINDDATA) | |||||
| add_definitions(-D ENABLE_MINDDATA) | add_definitions(-D ENABLE_MINDDATA) | ||||
| link_directories(${MS_CCSRC_BUILD_PATH}/minddata/dataset) | link_directories(${MS_CCSRC_BUILD_PATH}/minddata/dataset) | ||||
| link_directories(${MS_CCSRC_BUILD_PATH}/minddata/mindrecord) | link_directories(${MS_CCSRC_BUILD_PATH}/minddata/mindrecord) | ||||
| endif () | |||||
| endif() | |||||
| # fetch ut test files | # fetch ut test files | ||||
| if (ENABLE_MINDDATA) | |||||
| if(ENABLE_MINDDATA) | |||||
| include_directories(${CMAKE_SOURCE_DIR}/mindspore/ccsrc/minddata/dataset/kernels/image) | include_directories(${CMAKE_SOURCE_DIR}/mindspore/ccsrc/minddata/dataset/kernels/image) | ||||
| file(GLOB_RECURSE UT_SRCS RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} | file(GLOB_RECURSE UT_SRCS RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} | ||||
| ./stub/*.cc | ./stub/*.cc | ||||
| @@ -61,7 +61,7 @@ if (ENABLE_MINDDATA) | |||||
| ./cxx_api/*.cc | ./cxx_api/*.cc | ||||
| ) | ) | ||||
| if (NOT ENABLE_PYTHON) | |||||
| if(NOT ENABLE_PYTHON) | |||||
| set(PYTHON_RELATED_SRCS | set(PYTHON_RELATED_SRCS | ||||
| dataset/filter_op_test.cc | dataset/filter_op_test.cc | ||||
| dataset/voc_op_test.cc | dataset/voc_op_test.cc | ||||
| @@ -69,15 +69,15 @@ if (ENABLE_MINDDATA) | |||||
| dataset/sentence_piece_vocab_op_test.cc | dataset/sentence_piece_vocab_op_test.cc | ||||
| ) | ) | ||||
| list(REMOVE_ITEM UT_SRCS ${PYTHON_RELATED_SRCS}) | list(REMOVE_ITEM UT_SRCS ${PYTHON_RELATED_SRCS}) | ||||
| endif () | |||||
| else () | |||||
| endif() | |||||
| else() | |||||
| file(GLOB_RECURSE TEMP_UT_SRCS ./*.cc) | file(GLOB_RECURSE TEMP_UT_SRCS ./*.cc) | ||||
| foreach (OBJ ${TEMP_UT_SRCS}) | |||||
| if (NOT ${OBJ} MATCHES "./dataset/" AND NOT ${OBJ} MATCHES "./mindrecord/") | |||||
| foreach(OBJ ${TEMP_UT_SRCS}) | |||||
| if(NOT ${OBJ} MATCHES "./dataset/" AND NOT ${OBJ} MATCHES "./mindrecord/") | |||||
| list(APPEND UT_SRCS ${OBJ}) | list(APPEND UT_SRCS ${OBJ}) | ||||
| endif () | |||||
| endforeach () | |||||
| endif () | |||||
| endif() | |||||
| endforeach() | |||||
| endif() | |||||
| file(GLOB_RECURSE MINDSPORE_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} | file(GLOB_RECURSE MINDSPORE_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} | ||||
| "../../../mindspore/ccsrc/pybind_api/*.cc" | "../../../mindspore/ccsrc/pybind_api/*.cc" | ||||
| @@ -133,9 +133,11 @@ file(GLOB_RECURSE MINDSPORE_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} | |||||
| "../../../mindspore/ccsrc/transform/graph_ir/*.cc" | "../../../mindspore/ccsrc/transform/graph_ir/*.cc" | ||||
| "../../../mindspore/ccsrc/transform/graph_ir/op_declare/*.cc" | "../../../mindspore/ccsrc/transform/graph_ir/op_declare/*.cc" | ||||
| "../../../mindspore/ccsrc/ps/*.cc" | "../../../mindspore/ccsrc/ps/*.cc" | ||||
| "../../../mindspore/ccsrc/profiler/device/common/*.cc" | |||||
| ) | ) | ||||
| list(REMOVE_ITEM MINDSPORE_SRC_LIST "../../../mindspore/ccsrc/frontend/parallel/strategy_checkpoint/parallel_strategy_checkpoint.cc") | |||||
| list(REMOVE_ITEM MINDSPORE_SRC_LIST | |||||
| "../../../mindspore/ccsrc/frontend/parallel/strategy_checkpoint/parallel_strategy_checkpoint.cc") | |||||
| list(REMOVE_ITEM MINDSPORE_SRC_LIST "../../../mindspore/ccsrc/ps/util.cc") | list(REMOVE_ITEM MINDSPORE_SRC_LIST "../../../mindspore/ccsrc/ps/util.cc") | ||||
| list(REMOVE_ITEM MINDSPORE_SRC_LIST "../../../mindspore/ccsrc/ps/scheduler.cc") | list(REMOVE_ITEM MINDSPORE_SRC_LIST "../../../mindspore/ccsrc/ps/scheduler.cc") | ||||
| list(REMOVE_ITEM MINDSPORE_SRC_LIST "../../../mindspore/ccsrc/ps/optimizer_info.cc") | list(REMOVE_ITEM MINDSPORE_SRC_LIST "../../../mindspore/ccsrc/ps/optimizer_info.cc") | ||||
| @@ -154,31 +156,32 @@ add_dependencies(_ut_ut_obj engine-cache-server) | |||||
| add_executable(ut_tests $<TARGET_OBJECTS:_ut_ut_obj> | add_executable(ut_tests $<TARGET_OBJECTS:_ut_ut_obj> | ||||
| $<TARGET_OBJECTS:_ut_mindspore_obj>) | $<TARGET_OBJECTS:_ut_mindspore_obj>) | ||||
| if (ENABLE_GE) | |||||
| if (ENABLE_TRAIN) | |||||
| if(ENABLE_GE) | |||||
| if(ENABLE_TRAIN) | |||||
| target_link_libraries(ut_tests PRIVATE graph ge_runner) | target_link_libraries(ut_tests PRIVATE graph ge_runner) | ||||
| else () | |||||
| else() | |||||
| target_link_libraries(ut_tests PRIVATE graph ge_client) | target_link_libraries(ut_tests PRIVATE graph ge_client) | ||||
| endif () | |||||
| endif() | |||||
| target_link_libraries(mindspore PRIVATE tsdclient) | target_link_libraries(mindspore PRIVATE tsdclient) | ||||
| endif () | |||||
| endif() | |||||
| if (CMAKE_SYSTEM_NAME MATCHES "Linux") | |||||
| target_link_libraries(ut_tests PRIVATE mindspore::gtest mindspore::event mindspore::event_pthreads mindspore_gvar ${PYTHON_LIBRARIES} pthread util dl) | |||||
| if (ENABLE_MINDDATA) | |||||
| if(CMAKE_SYSTEM_NAME MATCHES "Linux") | |||||
| target_link_libraries(ut_tests PRIVATE mindspore::gtest mindspore::event mindspore::event_pthreads | |||||
| mindspore_gvar ${PYTHON_LIBRARIES} pthread util dl) | |||||
| if(ENABLE_MINDDATA) | |||||
| # AUX_SOURCE_DIRECTORY(LITE_CV_FILES) | # AUX_SOURCE_DIRECTORY(LITE_CV_FILES) | ||||
| # message(STATUS "xxxxxxxxxxxxxxxxx"${LITE_CV_FILES} ) | # message(STATUS "xxxxxxxxxxxxxxxxx"${LITE_CV_FILES} ) | ||||
| # add_library(_live_cv OBJECT ${LITE_CV_FILES}) | # add_library(_live_cv OBJECT ${LITE_CV_FILES}) | ||||
| target_link_libraries(ut_tests PRIVATE _c_dataengine _c_mindrecord) | target_link_libraries(ut_tests PRIVATE _c_dataengine _c_mindrecord) | ||||
| endif () | |||||
| else () | |||||
| endif() | |||||
| else() | |||||
| target_link_libraries(ut_tests PRIVATE mindspore::gtest mindspore_gvar ${PYTHON_LIBRARIES}) | target_link_libraries(ut_tests PRIVATE mindspore::gtest mindspore_gvar ${PYTHON_LIBRARIES}) | ||||
| endif () | |||||
| if (USE_GLOG) | |||||
| endif() | |||||
| if(USE_GLOG) | |||||
| target_link_libraries(ut_tests PRIVATE mindspore::glog) | target_link_libraries(ut_tests PRIVATE mindspore::glog) | ||||
| endif () | |||||
| endif() | |||||
| target_link_libraries(ut_tests PRIVATE mindspore mindspore_shared_lib securec graph) | target_link_libraries(ut_tests PRIVATE mindspore mindspore_shared_lib securec graph) | ||||