Browse Source

profiler memory

tags/v1.2.0-rc1
yanghaitao1 5 years ago
parent
commit
8d147deb07
25 changed files with 560 additions and 147 deletions
  1. +115
    -93
      mindspore/ccsrc/CMakeLists.txt
  2. +48
    -0
      mindspore/ccsrc/backend/optimizer/somas/somas.cc
  3. +2
    -0
      mindspore/ccsrc/backend/optimizer/somas/somas.h
  4. +15
    -0
      mindspore/ccsrc/backend/session/ascend_session.cc
  5. +10
    -7
      mindspore/ccsrc/profiler/CMakeLists.txt
  6. +97
    -0
      mindspore/ccsrc/profiler/device/common/memory_profiling.cc
  7. +124
    -0
      mindspore/ccsrc/profiler/device/common/memory_profiling.h
  8. +50
    -0
      mindspore/ccsrc/profiler/device/common/memory_profiling.proto
  9. +2
    -2
      mindspore/ccsrc/pybind_api/utils/ms_context_py.cc
  10. +5
    -0
      mindspore/ccsrc/runtime/device/ascend/ascend_kernel_runtime.cc
  11. +1
    -0
      mindspore/ccsrc/runtime/device/ascend/ascend_kernel_runtime.h
  12. +29
    -1
      mindspore/ccsrc/runtime/device/ascend/ascend_memory_manager.cc
  13. +3
    -1
      mindspore/ccsrc/runtime/device/ascend/ascend_memory_manager.h
  14. +1
    -1
      mindspore/ccsrc/runtime/device/cpu/cpu_memory_manager.cc
  15. +1
    -1
      mindspore/ccsrc/runtime/device/cpu/cpu_memory_manager.h
  16. +1
    -1
      mindspore/ccsrc/runtime/device/gpu/gpu_memory_manager.cc
  17. +1
    -1
      mindspore/ccsrc/runtime/device/gpu/gpu_memory_manager.h
  18. +9
    -3
      mindspore/ccsrc/runtime/device/kernel_runtime.cc
  19. +1
    -0
      mindspore/ccsrc/runtime/device/kernel_runtime.h
  20. +5
    -3
      mindspore/ccsrc/runtime/device/memory_manager.cc
  21. +4
    -3
      mindspore/ccsrc/runtime/device/memory_manager.h
  22. +1
    -0
      mindspore/core/utils/ms_context.cc
  23. +1
    -0
      mindspore/core/utils/ms_context.h
  24. +2
    -1
      mindspore/profiler/profiling.py
  25. +32
    -29
      tests/ut/cpp/CMakeLists.txt

+ 115
- 93
mindspore/ccsrc/CMakeLists.txt View File

@@ -3,33 +3,34 @@ include_directories(${CMAKE_SOURCE_DIR}/mindspore/core)
include_directories(${CMAKE_CURRENT_SOURCE_DIR})
include_directories(${CMAKE_BINARY_DIR})

if (ENABLE_ACL)
if(ENABLE_ACL)
set(ASCEND_PATH /usr/local/Ascend)
include_directories(${ASCEND_PATH}/acllib/include)
link_directories(${ASCEND_PATH}/acllib/lib64/)
find_library(ascendcl acl_dvpp ${ASCEND_PATH}/acllib/lib64)
endif ()
endif()

if (NOT(CMAKE_SYSTEM_NAME MATCHES "Darwin"))
if(NOT(CMAKE_SYSTEM_NAME MATCHES "Darwin"))
link_directories(${CMAKE_SOURCE_DIR}/build/mindspore/graphengine)
else()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-delete-non-abstract-non-virtual-dtor")
endif ()
endif()

if (CMAKE_SYSTEM_NAME MATCHES "Windows")
if(CMAKE_SYSTEM_NAME MATCHES "Windows")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-attributes -DHAVE_SNPRINTF")
add_compile_definitions(BUILDING_DLL)
endif()

if (ENABLE_MPI)
if(ENABLE_MPI)
add_compile_definitions(ENABLE_MPI)
endif ()
endif()

if(ENABLE_GPU)
find_package(CUDA REQUIRED)
find_package(Threads)
if(${CUDA_VERSION} VERSION_LESS ${MS_REQUIRE_CUDA_VERSION})
message(FATAL_ERROR "The minimum CUDA version ${MS_REQUIRE_CUDA_VERSION} is required, but only CUDA ${CUDA_VERSION} found.")
message(FATAL_ERROR "The minimum CUDA version ${MS_REQUIRE_CUDA_VERSION} is required, \
but only CUDA ${CUDA_VERSION} found.")
endif()
enable_language(CUDA)
if(NOT CUDA_PATH OR CUDA_PATH STREQUAL "")
@@ -40,31 +41,36 @@ if(ENABLE_GPU)
endif()
endif()

if (DEFINED ENV{CUDNN_HOME} AND NOT $ENV{CUDNN_HOME} STREQUAL "")
if(DEFINED ENV{CUDNN_HOME} AND NOT $ENV{CUDNN_HOME} STREQUAL "")
set(CUDNN_INCLUDE_DIR $ENV{CUDNN_HOME}/include)
set(CUDNN_LIBRARY_DIR $ENV{CUDNN_HOME}/lib64)
find_path(CUDNN_INCLUDE_PATH cudnn.h HINTS ${CUDNN_INCLUDE_DIR} NO_DEFAULT_PATH)
find_library(CUDNN_LIBRARY_PATH "cudnn" HINTS ${CUDNN_LIBRARY_DIR} NO_DEFAULT_PATH)
if (CUDNN_INCLUDE_PATH STREQUAL CUDNN_INCLUDE_PATH-NOTFOUND)
message(FATAL_ERROR "Failed to find cudnn header file, please set environment variable CUDNN_HOME to cudnn installation position.")
if(CUDNN_INCLUDE_PATH STREQUAL CUDNN_INCLUDE_PATH-NOTFOUND)
message(FATAL_ERROR "Failed to find cudnn header file, please set environment variable CUDNN_HOME to \
cudnn installation position.")
endif()
if (CUDNN_LIBRARY_PATH STREQUAL CUDNN_LIBRARY_PATH-NOTFOUND)
message(FATAL_ERROR "Failed to find cudnn library file, please set environment variable CUDNN_HOME to cudnn installation position.")
if(CUDNN_LIBRARY_PATH STREQUAL CUDNN_LIBRARY_PATH-NOTFOUND)
message(FATAL_ERROR "Failed to find cudnn library file, please set environment variable CUDNN_HOME to \
cudnn installation position.")
endif()
else()
list(APPEND CMAKE_PREFIX_PATH ${CUDA_TOOLKIT_ROOT_DIR})
find_path(CUDNN_INCLUDE_PATH cudnn.h PATH_SUFFIXES cuda/inclulde include cuda)
find_library(CUDNN_LIBRARY_PATH "cudnn" PATH_SUFFIXES cuda/lib64 lib64 lib cuda/lib lib/x86_64-linux-gnu)
if (CUDNN_INCLUDE_PATH STREQUAL CUDNN_INCLUDE_PATH-NOTFOUND)
message(FATAL_ERROR "Failed to find cudnn header file, if cudnn library is not installed, please put cudnn header file in cuda include path \
or user include path(eg. /usr/local/cuda/include; /usr/local/include; /usr/include), if cudnn library is installed in other position,\
please set environment variable CUDNN_HOME to cudnn installation position, there should be cudnn.h in {CUDNN_HOME}/include.")
if(CUDNN_INCLUDE_PATH STREQUAL CUDNN_INCLUDE_PATH-NOTFOUND)
message(FATAL_ERROR "Failed to find cudnn header file, if cudnn library is not installed, please put \
cudnn header file in cuda include path or user include path(eg. /usr/local/cuda/include; \
/usr/local/include; /usr/include), if cudnn library is installed in other position, please \
set environment variable CUDNN_HOME to cudnn installation position, there should be cudnn.h \
in {CUDNN_HOME}/include.")
endif()
if (CUDNN_LIBRARY_PATH STREQUAL CUDNN_LIBRARY_PATH-NOTFOUND)
message(FATAL_ERROR "Failed to find cudnn library file, if cudnn library is not installed, please put cudnn library file in \
cuda library path or user library path(eg. /usr/local/cuda/lib64; /usr/local/lib64; /usr/lib64; /usr/local/lib; /usr/lib),\
if cudnn library is installed in other position, please set environment variable CUDNN_HOME to cudnn installation position, \
there should be cudnn library file in {CUDNN_HOME}/lib64.")
if(CUDNN_LIBRARY_PATH STREQUAL CUDNN_LIBRARY_PATH-NOTFOUND)
message(FATAL_ERROR "Failed to find cudnn library file, if cudnn library is not installed, please put \
cudnn library file in cuda library path or user library path(eg. /usr/local/cuda/lib64; \
/usr/local/lib64; /usr/lib64; /usr/local/lib; /usr/lib), if cudnn library is installed in other \
position, please set environment variable CUDNN_HOME to cudnn installation position, there should \
be cudnn library file in {CUDNN_HOME}/lib64.")
endif()
endif()

@@ -102,7 +108,7 @@ if(ENABLE_GPU)
cuda_add_library(gpu_cuda_lib STATIC ${GPU_SRC_LIST})
set(CMAKE_CXX_FLAGS ${NVCC_TMP_CMAKE_CXX_FLAGS})
add_compile_definitions(ENABLE_GPU)
endif ()
endif()


## make protobuf files
@@ -117,7 +123,13 @@ file(GLOB_RECURSE COMM_PROTO_IN RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "ps/core/pr
ms_protobuf_generate(COMM_PROTO_SRCS COMM_PROTO_HDRS ${COMM_PROTO_IN})
list(APPEND MINDSPORE_PROTO_LIST ${COMM_PROTO_SRCS})

if (ENABLE_DEBUGGER)
include_directories("${CMAKE_BINARY_DIR}/profiler/device/common")
file(GLOB_RECURSE PROFILER_PROTO_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
"profiler/device/common/memory_profiling.proto")
ms_protobuf_generate(PROFILER_MEM_PROTO_SRCS PROFILER_MEM_PROTO_HDRS ${PROFILER_PROTO_LIST})
list(APPEND MINDSPORE_PROTO_LIST ${PROFILER_MEM_PROTO_SRCS})

if(ENABLE_DEBUGGER)
# debugger: compile proto files
include_directories("${CMAKE_BINARY_DIR}/debug/debugger")
file(GLOB_RECURSE DEBUGGER_PROTO_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "debug/debugger/debug_graph.proto")
@@ -126,9 +138,9 @@ if (ENABLE_DEBUGGER)
ms_grpc_generate(DEBUGGER_GRPC_SRCS DEBUGGER_GRPC_HDRS ${DEBUGGER_GRPC_LIST})
list(APPEND MINDSPORE_PROTO_LIST ${DEBUGGER_PROTO_SRCS})
list(APPEND MINDSPORE_PROTO_LIST ${DEBUGGER_GRPC_SRCS})
endif ()
endif()

if (ENABLE_DUMP_PROTO)
if(ENABLE_DUMP_PROTO)
include_directories(${CMAKE_BINARY_DIR})

file(GLOB_RECURSE PROTO_PY RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
@@ -144,9 +156,9 @@ if (ENABLE_DUMP_PROTO)

list(APPEND MINDSPORE_PROTO_LIST ${PROTO_SRCS})
list(APPEND MINDSPORE_PROTO_LIST ${PY_SRCS})
endif ()
endif()

if (ENABLE_D)
if(ENABLE_D)
include_directories("${CMAKE_BINARY_DIR}/backend/kernel_compiler/aicpu")
file(GLOB_RECURSE PROTO_IN RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "backend/kernel_compiler/aicpu/proto/*.proto")
ms_protobuf_generate(PROTOSRCS PROTOHDRS ${PROTO_IN})
@@ -159,9 +171,9 @@ if (ENABLE_D)
list(APPEND MINDSPORE_PROTO_LIST ${DUMP_PROTOSRCS})

add_compile_definitions(ENABLE_D)
endif ()
endif()

if (MINDSPORE_PROTO_LIST)
if(MINDSPORE_PROTO_LIST)
add_library(proto_input STATIC ${MINDSPORE_PROTO_LIST})
set_target_properties(proto_input PROPERTIES COMPILE_FLAGS "-Wno-unused-variable")
endif()
@@ -183,58 +195,58 @@ set(SUB_COMP
common debug pybind_api utils vm profiler ps
)

foreach (_comp ${SUB_COMP})
foreach(_comp ${SUB_COMP})
add_subdirectory(${_comp})
string(REPLACE "/" "_" sub ${_comp})
if (TARGET _mindspore_${sub}_obj)
if(TARGET _mindspore_${sub}_obj)
list(APPEND SUB_OBJECTS_SRC $<TARGET_OBJECTS:_mindspore_${sub}_obj>)
add_dependencies(_mindspore_${sub}_obj proto_input)
endif ()
endforeach ()
endif()
endforeach()

set_property(SOURCE ${SUB_OBJECTS_SRC} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_ME)
add_library(mindspore STATIC ${SUB_OBJECTS_SRC})

target_link_libraries(mindspore mindspore_core)

if (ENABLE_DEBUGGER)
if(ENABLE_DEBUGGER)
# debugger: link grpc
target_link_libraries(proto_input mindspore::grpc++)
endif()

target_link_libraries(mindspore securec mindspore::flatbuffers)

if (NOT WIN32)
if(NOT WIN32)
target_link_libraries(mindspore dl)
endif()

if (ENABLE_GE)
if(ENABLE_GE)
if(ENABLE_TRAIN)
target_link_libraries(mindspore ge_runner hccl)
else ()
else()
target_link_libraries(mindspore ge_client)
endif ()
endif()
target_link_libraries(mindspore graph tsdclient datatransfer)
endif()

if (ENABLE_D)
if (DEFINED ENV{D_LINK_PATH})
if (CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "aarch64")
if(ENABLE_D)
if(DEFINED ENV{D_LINK_PATH})
if(CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "aarch64")
MESSAGE("system processor matches aarch64")
set(D_LIB_PATH $ENV{D_LINK_PATH}/aarch64)
elseif (CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "x86_64")
elseif(CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "x86_64")
MESSAGE("system processor matches x86_64")
set(D_LIB_PATH $ENV{D_LINK_PATH}/x86_64)
else ()
else()
MESSAGE("system ${CMAKE_HOST_SYSTEM_PROCESSOR} not support")
endif()
else ()
else()
MESSAGE("use system default lib")
if (DEFINED ENV{ASCEND_CUSTOM_PATH})
if(DEFINED ENV{ASCEND_CUSTOM_PATH})
set(ASCEND_PATH $ENV{ASCEND_CUSTOM_PATH})
else ()
else()
set(ASCEND_PATH /usr/local/Ascend)
endif ()
endif()
set(ASCEND_DRIVER_PATH ${ASCEND_PATH}/driver/lib64/common)
set(ASCEND_DRIVER_BACK_PATH ${ASCEND_PATH}/driver/lib64/driver)
set(ASCEND_RUNTIME_PATH ${ASCEND_PATH}/fwkacllib/lib64)
@@ -246,8 +258,10 @@ if (ENABLE_D)
find_library(HCCL hccl ${ASCEND_RUNTIME_PATH} ${ASCEND_TOOLKIT_RUNTIME_PATH})
find_library(CCE_LIB cce ${ASCEND_RUNTIME_PATH} ${ASCEND_TOOLKIT_RUNTIME_PATH})
find_library(RUNTIME_LIB runtime ${ASCEND_RUNTIME_PATH} ${ASCEND_TOOLKIT_RUNTIME_PATH})
find_library(TSDCLIENT tsdclient HINTS ${ASCEND_RUNTIME_PATH} ${ASCEND_TOOLKIT_RUNTIME_PATH} ${ASCEND_DRIVER_BACK_PATH})
find_library(DATATRANSFER datatransfer HINTS ${ASCEND_RUNTIME_PATH} ${ASCEND_TOOLKIT_RUNTIME_PATH} ${ASCEND_DRIVER_BACK_PATH})
find_library(TSDCLIENT tsdclient HINTS ${ASCEND_RUNTIME_PATH} ${ASCEND_TOOLKIT_RUNTIME_PATH}
${ASCEND_DRIVER_BACK_PATH})
find_library(DATATRANSFER datatransfer HINTS ${ASCEND_RUNTIME_PATH} ${ASCEND_TOOLKIT_RUNTIME_PATH}
${ASCEND_DRIVER_BACK_PATH})
find_library(PROFILING msprofiler_fwk ${ASCEND_RUNTIME_PATH} ${ASCEND_TOOLKIT_RUNTIME_PATH})
find_library(REGISTER register ${ASCEND_RUNTIME_PATH} ${ASCEND_TOOLKIT_RUNTIME_PATH})
find_library(PLATFORM platform ${ASCEND_RUNTIME_PATH} ${ASCEND_TOOLKIT_RUNTIME_PATH})
@@ -255,42 +269,48 @@ if (ENABLE_D)
# hccl_adpter
find_library(HCCL_ADPTER hcom_graph_adaptor ${ASCEND_RUNTIME_PATH} ${ASCEND_TOOLKIT_RUNTIME_PATH})
find_library(HCCL_RA ra ${ASCEND_RUNTIME_PATH} ${ASCEND_TOOLKIT_RUNTIME_PATH})
find_library(HCCL_BUILDER hcom_opskernel_builder ${ASCEND_RUNTIME_PATH}/plugin/opskernel ${ASCEND_TOOLKIT_RUNTIME_PATH}/plugin/opskernel)
find_library(HCCL_BUILDER hcom_opskernel_builder ${ASCEND_RUNTIME_PATH}/plugin/opskernel
${ASCEND_TOOLKIT_RUNTIME_PATH}/plugin/opskernel)

add_library(ms_profile SHARED ${CMAKE_CURRENT_SOURCE_DIR}/runtime/device/ascend/profiling/profiling_callback_register.cc)
add_library(ms_profile SHARED
${CMAKE_CURRENT_SOURCE_DIR}/runtime/device/ascend/profiling/profiling_callback_register.cc)
set_target_properties(ms_profile PROPERTIES LINKER_LANGUAGE CXX)
target_link_options(ms_profile PRIVATE -Wl,-init,common_log_init)
target_link_libraries(ms_profile -Wl,--start-group -Wl,--whole-archive ${PROFILING} -Wl,--no-whole-archive mindspore::protobuf -Wl,--end-group)
target_link_libraries(ms_profile -Wl,--start-group -Wl,--whole-archive ${PROFILING} -Wl,--no-whole-archive
mindspore::protobuf -Wl,--end-group)
target_link_libraries(mindspore ge_runtime ${CCE_LIB} ${RUNTIME_LIB} ${TSDCLIENT} ${HCCL} ${DATATRANSFER}
${HCCL_ADPTER} ${REGISTER} -Wl,--no-as-needed ${OPTILING} ${HCCL_BUILDER} ${HCCL_RA} ${PLATFORM})
${HCCL_ADPTER} ${REGISTER} -Wl,--no-as-needed ${OPTILING} ${HCCL_BUILDER}
${HCCL_RA} ${PLATFORM})
target_link_libraries(mindspore -Wl,--start-group proto_input mindspore::protobuf -Wl,--end-group)
elseif (CMAKE_SYSTEM_NAME MATCHES "Windows")
target_link_libraries(mindspore -Wl,--start-group proto_input mindspore::protobuf mindspore::sentencepiece -Wl,--end-group)
elseif (CMAKE_SYSTEM_NAME MATCHES "Darwin")
elseif(CMAKE_SYSTEM_NAME MATCHES "Windows")
target_link_libraries(mindspore -Wl,--start-group proto_input mindspore::protobuf mindspore::sentencepiece
-Wl,--end-group)
elseif(CMAKE_SYSTEM_NAME MATCHES "Darwin")
target_link_libraries(mindspore -Wl proto_input mindspore::protobuf mindspore::sentencepiece -Wl)
else ()
else()
target_link_libraries(mindspore -Wl,--start-group proto_input mindspore::protobuf -Wl,--end-group)
endif ()
endif()

# set c_expression building
set(CMAKE_BUILD_WITH_INSTALL_RPATH TRUE)
set_property(SOURCE "pipeline/jit/init.cc" PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_PIPELINE)
set_property(SOURCE "pipeline/jit/init.cc" PROPERTY
COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_PIPELINE)
pybind11_add_module(_c_expression "pipeline/jit/init.cc")

MESSAGE(STATUS "operation system is ${CMAKE_SYSTEM}")
if (CMAKE_SYSTEM_NAME MATCHES "Linux")
if(CMAKE_SYSTEM_NAME MATCHES "Linux")
target_link_options(_c_expression PRIVATE -Wl,-init,mindspore_log_init)
set(ORIGIN_PATH $ORIGIN)
elseif (CMAKE_SYSTEM_NAME MATCHES "Darwin")
elseif(CMAKE_SYSTEM_NAME MATCHES "Darwin")
set_target_properties(_c_expression PROPERTIES MACOSX_RPATH ON)
set(ORIGIN_PATH @loader_path)
elseif (CMAKE_SYSTEM_NAME MATCHES "Windows")
elseif(CMAKE_SYSTEM_NAME MATCHES "Windows")
set(ORIGIN_PATH $ORIGIN)
else ()
else()
MESSAGE(FATAL_ERROR "other platform: ${CMAKE_SYSTEM_NAME}")
endif ()
endif()

if (ENABLE_D)
if(ENABLE_D)
set(MINDSPORE_RPATH ${MINDSPORE_RPATH}:/usr/local/Ascend/nnae/latest/fwkacllib/lib64)
set(MINDSPORE_RPATH ${MINDSPORE_RPATH}:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/lib64)
set(MINDSPORE_RPATH ${MINDSPORE_RPATH}:/usr/local/Ascend/fwkacllib/lib64)
@@ -300,45 +320,47 @@ if (ENABLE_D)
set(MINDSPORE_RPATH ${MINDSPORE_RPATH}:/usr/local/Ascend/add-ons)
set(MINDSPORE_RPATH ${MINDSPORE_RPATH}:/usr/local/Ascend/opp/op_impl/built-in/ai_core/tbe/op_tiling)
set(MINDSPORE_RPATH ${MINDSPORE_RPATH}:/usr/local/Ascend/nnae/latest/opp/op_impl/built-in/ai_core/tbe/op_tiling)
set(MINDSPORE_RPATH ${MINDSPORE_RPATH}:/usr/local/Ascend/ascend-toolkit/latest/opp/op_impl/built-in/ai_core/tbe/op_tiling)
elseif (ENABLE_GPU)
set(MINDSPORE_RPATH
${MINDSPORE_RPATH}:/usr/local/Ascend/ascend-toolkit/latest/opp/op_impl/built-in/ai_core/tbe/op_tiling)
elseif(ENABLE_GPU)
set(MINDSPORE_RPATH ${MINDSPORE_RPATH}:/usr/local/cuda/lib64)
endif ()
endif()
set(MINDSPORE_RPATH ${ORIGIN_PATH}/lib:${MINDSPORE_RPATH})
set_target_properties(_c_expression PROPERTIES INSTALL_RPATH ${MINDSPORE_RPATH})

if (CMAKE_SYSTEM_NAME MATCHES "Windows")
if(CMAKE_SYSTEM_NAME MATCHES "Windows")
target_link_libraries(mindspore mindspore::pybind11_module)
target_link_libraries(mindspore mindspore_gvar)
target_link_libraries(_c_expression PRIVATE -Wl,--whole-archive mindspore -Wl,--no-whole-archive)
elseif (CMAKE_SYSTEM_NAME MATCHES "Darwin")
elseif(CMAKE_SYSTEM_NAME MATCHES "Darwin")
target_link_libraries(mindspore mindspore::pybind11_module)
target_link_libraries(mindspore mindspore_gvar)
target_link_libraries(_c_expression PRIVATE -Wl,-force_load mindspore -Wl,-noall_load)
else ()
if (ENABLE_CPU AND (ENABLE_D OR ENABLE_GPU))
target_link_libraries(mindspore mindspore::pslite proto_input mindspore::protobuf mindspore::event mindspore::event_pthreads ${zeromq_DIRPATH}/zmq_install/lib/libzmq.a)
else()
if(ENABLE_CPU AND (ENABLE_D OR ENABLE_GPU))
target_link_libraries(mindspore mindspore::pslite proto_input mindspore::protobuf
mindspore::event mindspore::event_pthreads ${zeromq_DIRPATH}/zmq_install/lib/libzmq.a)
target_link_libraries(mindspore -Wl,--no-as-needed mindspore::event_core ps_cache)
if (${ENABLE_IBVERBS} STREQUAL "ON")
if(${ENABLE_IBVERBS} STREQUAL "ON")
target_link_libraries(mindspore ibverbs rdmacm)
endif()
endif()
target_link_libraries(_c_expression PRIVATE -Wl,--whole-archive mindspore proto_input -Wl,--no-whole-archive)
target_link_libraries(_c_expression PRIVATE mindspore::pybind11_module)
target_link_libraries(_c_expression PRIVATE mindspore_gvar)
if (ENABLE_D)
if(ENABLE_D)
target_link_libraries(_c_expression PRIVATE -Wl,--no-as-needed ms_profile)
endif ()
if (ENABLE_ACL)
endif()
if(ENABLE_ACL)
target_link_libraries(_c_expression PRIVATE -Wl,--no-as-needed graph)
endif ()
endif ()
endif()
endif()

if (USE_GLOG)
if(USE_GLOG)
target_link_libraries(_c_expression PRIVATE mindspore::glog)
endif ()
endif()

if (ENABLE_GPU)
if(ENABLE_GPU)
message("add gpu lib to c_expression")
target_link_libraries(_c_expression PRIVATE gpu_cuda_lib gpu_queue cublas
${CUDA_PATH}/lib64/libcurand.so
@@ -346,27 +368,27 @@ if (ENABLE_GPU)
${CUDA_PATH}/lib64/libcudart.so
${CUDA_PATH}/lib64/stubs/libcuda.so
${CUDA_PATH}/lib64/libcusolver.so)
if (ENABLE_MPI)
if(ENABLE_MPI)
set_target_properties(_ms_mpi PROPERTIES INSTALL_RPATH ${MINDSPORE_RPATH})
endif ()
endif ()
endif()
endif()

if (CMAKE_SYSTEM_NAME MATCHES "Darwin")
if(CMAKE_SYSTEM_NAME MATCHES "Darwin")
set(CMAKE_MACOSX_RPATH 1)
set(CMAKE_INSTALL_RPATH "@loader_path/lib;@loader_path")
set_target_properties(_c_expression PROPERTIES INSTALL_RPATH "${CMAKE_INSTALL_RPATH}")
endif ()
endif()

if (ENABLE_CPU)
if(ENABLE_CPU)
target_link_libraries(_c_expression PRIVATE mindspore::dnnl mindspore::mkldnn)
endif ()
endif()

if (ENABLE_MINDDATA)
if(ENABLE_MINDDATA)
add_subdirectory(minddata/mindrecord)
add_subdirectory(minddata/dataset)
endif ()
endif()

if (ENABLE_D)
if(ENABLE_D)
find_library(adump_server libadump_server.a ${ASCEND_RUNTIME_PATH} ${ASCEND_TOOLKIT_RUNTIME_PATH})
target_link_libraries(_c_expression PRIVATE ${adump_server})
endif()


+ 48
- 0
mindspore/ccsrc/backend/optimizer/somas/somas.cc View File

@@ -35,6 +35,11 @@
#include "utils/ms_context.h"
#include "debug/common.h"
#include "common/thread_pool.h"
#include "profiler/device/common/memory_profiling.h"

using mindspore::profiler::MemoryProfiling;
using mindspore::profiler::NodeMemory;
using mindspore::profiler::TensorMemory;

namespace mindspore {
namespace somas {
@@ -49,6 +54,11 @@ std::map<TensorType, std::string> tensor_type_name_map = {{kCommon, "Common"},
{kRefNodeOutput, "RefNodeOutput"},
{kUnknown, "Unknown"}};

std::map<LifeLongType, std::string> life_long_name_map = {{kLifeLongNone, "LifeLongNone"},
{kLifeLongGraphAll, "LifeLongGraphAll"},
{kLifeLongGraphStart, "LifeLongGraphStart"},
{kLifeLongGraphEnd, "LifeLongGraphEnd"}};

bool Somas::Allocate(const session::KernelGraph *graph) {
auto ret = InitSomasTensors(graph);
if (!ret) {
@@ -1413,5 +1423,43 @@ uint8_t *Somas::GetNodeWorkSpacePtr(const AnfNodePtr &node, size_t index) const
}
return ptr;
}

void Somas::ConvertToProfilingNode(uint32_t graph_id) {
#ifdef ENABLE_D
auto graph_node = MemoryProfiling::GetInstance().GetGraphMemoryNode(graph_id);
if (graph_node == nullptr) {
graph_node = MemoryProfiling::GetInstance().AddGraphMemoryNode(graph_id);
MS_LOG(INFO) << "Add graph memory node for dynamic memory profiling, graph id is " << graph_id;
}

for (const auto &tensor : tensors_list_) {
TensorMemory tensor_memory;
tensor_memory.SetTensorId(tensor->GetId());
tensor_memory.SetAlignedSize(tensor->GetAlignedSize());
tensor_memory.SetType(tensor_type_name_map[tensor->type_]);
tensor_memory.SetLifeStart(tensor->lifetime_.start_);
tensor_memory.SetLifeEnd(tensor->lifetime_.end_);
tensor_memory.SetLifeLong(life_long_name_map[tensor->lifelong_value_]);
graph_node->AddTensorMemory(tensor_memory);
}

for (const auto &node : nodes_list_) {
NodeMemory node_memory;
std::string name = GetSplitName(node->scope_full_name_);
node_memory.SetNodeName(name);
node_memory.SetNodeId(node->GetId());
for (const auto &tensor : node->input_tensors_) {
node_memory.AddInputTensorId(tensor->GetId());
}
for (const auto &tensor : node->output_tensors_) {
node_memory.AddOutputTensorId(tensor->GetId());
}
for (const auto &tensor : node->workspace_tensors_) {
node_memory.AddWorkSpaceTensorId(tensor->GetId());
}
graph_node->AddNodeMemory(node_memory);
}
#endif
}
} // namespace somas
} // namespace mindspore

+ 2
- 0
mindspore/ccsrc/backend/optimizer/somas/somas.h View File

@@ -54,6 +54,8 @@ class Somas {

static bool NodeSort(SomasNodePtr, SomasNodePtr);
std::vector<DynamicBitSet> reuse_matrix_;
std::vector<DynamicBitSet> tensor_relation;
void ConvertToProfilingNode(uint32_t graph_id);

private:
// Maps


+ 15
- 0
mindspore/ccsrc/backend/session/ascend_session.cc View File

@@ -30,6 +30,7 @@
#include "runtime/device/ascend/kernel_select_ascend.h"
#include "runtime/device/ascend/kernel_build_ascend.h"
#include "runtime/device/ascend/ascend_kernel_runtime.h"
#include "runtime/device/ascend/profiling/profiling_manager.h"
#include "backend/optimizer/ascend/ascend_backend_optimization.h"
#include "backend/optimizer/common/common_backend_optimization.h"
#include "backend/optimizer/ascend/mindir/dropout_unify_mindir.h"
@@ -65,6 +66,11 @@
#include "ps/util.h"
#include "ps/ps_cache/ps_cache_manager.h"
#endif
#include "profiler/device/common/memory_profiling.h"

using mindspore::device::ascend::ProfilingManager;
using mindspore::profiler::MemoryProfiling;

static constexpr uint32_t kLabelSwitchLabelId = 2;
namespace mindspore {
namespace session {
@@ -649,6 +655,15 @@ GraphId AscendSession::CompileGraphImpl(NotNull<FuncGraphPtr> func_graph) {
root_graph->SetInputNodes();
root_graph->SetOptimizerFlag();
DumpAllGraphs(all_graphs);
// Save memory profiling data to proto file
if (ProfilingManager::GetInstance().IsProfiling()) {
auto runtime_instance = device::KernelRuntimeManager::Instance().GetKernelRuntime(kAscendDevice, device_id_);
MS_EXCEPTION_IF_NULL(runtime_instance);
uint64_t mem_size = runtime_instance->GetAvailableMemMaxSize();
auto instance = MemoryProfiling::GetInstance();
instance.SetDeviceMemSize(mem_size);
instance.SaveMemoryProfiling();
}
// return the root_graph id to backend
auto graph_id = root_graph->graph_id();
return graph_id;


+ 10
- 7
mindspore/ccsrc/profiler/CMakeLists.txt View File

@@ -1,11 +1,14 @@
if (ENABLE_GPU)
if(ENABLE_GPU)
file(GLOB_RECURSE PROFILER_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "device/gpu/*.cc")
set_property(SOURCE ${PROFILER_SRC_LIST} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_PROFILER)
set_property(SOURCE ${PROFILER_SRC_LIST} PROPERTY COMPILE_DEFINITIONS
SUBMODULE_ID=mindspore::SubModuleId::SM_PROFILER)
add_library(_mindspore_profiler_obj OBJECT ${PROFILER_SRC_LIST})
endif ()
endif()

if (ENABLE_D)
file(GLOB_RECURSE PROFILER_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "device/ascend/*.cc")
set_property(SOURCE ${PROFILER_SRC_LIST} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_PROFILER)
if(ENABLE_D)
file(GLOB_RECURSE PROFILER_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "device/ascend/*.cc" "device/common/*.cc")
set_property(SOURCE ${PROFILER_SRC_LIST} PROPERTY COMPILE_DEFINITIONS
SUBMODULE_ID=mindspore::SubModuleId::SM_PROFILER)
add_library(_mindspore_profiler_obj OBJECT ${PROFILER_SRC_LIST})
endif ()
add_dependencies(_mindspore_profiler_obj mindspore::protobuf)
endif()

+ 97
- 0
mindspore/ccsrc/profiler/device/common/memory_profiling.cc View File

@@ -0,0 +1,97 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "profiler/device/common/memory_profiling.h"
#include <fstream>
#include <memory>
#include "utils/log_adapter.h"
#include "utils/ms_context.h"

namespace mindspore {
namespace profiler {

std::shared_ptr<GraphMemory> MemoryProfiling::AddGraphMemoryNode(uint32_t graph_id) {
std::shared_ptr<GraphMemory> node = std::make_shared<GraphMemory>(graph_id);
graph_memory_[graph_id] = node;
return node;
}

std::shared_ptr<GraphMemory> MemoryProfiling::GetGraphMemoryNode(uint32_t graph_id) {
auto node = graph_memory_.find(graph_id);
if (node != graph_memory_.end()) {
return node->second;
}

return nullptr;
}

void MemoryProfiling::MemoryToPB() {
memory_proto_.set_total_mem(device_mem_size_);
for (const auto &graph : graph_memory_) {
GraphMemProto *graph_proto = memory_proto_.add_graph_mem();
graph_proto->set_graph_id(graph.second->GetGraphId());
graph_proto->set_static_mem(graph.second->GetStaticMemSize());
// node memory to PB
for (const auto &node : graph.second->GetNodeMemory()) {
NodeMemProto *node_mem = graph_proto->add_node_mems();
node_mem->set_node_name(node.GetNodeName());
node_mem->set_node_id(node.GetNodeId());
for (const auto &id : node.GetInputTensorId()) {
node_mem->add_input_tensor_id(id);
}
for (const auto &id : node.GetOutputTensorId()) {
node_mem->add_output_tensor_id(id);
}
for (const auto &id : node.GetOutputTensorId()) {
node_mem->add_workspace_tensor_id(id);
}
}
// tensor memory to PB
for (const auto &node : graph.second->GetTensorMemory()) {
TensorMemProto *tensor_mem = graph_proto->add_tensor_mems();
tensor_mem->set_tensor_id(node.GetTensorId());
tensor_mem->set_size(node.GetAlignedSize());
std::string type = node.GetType();
tensor_mem->set_type(type);
tensor_mem->set_life_start(node.GetLifeStart());
tensor_mem->set_life_end(node.GetLifeEnd());
std::string life_long = node.GetLifeLong();
tensor_mem->set_life_long(life_long);
}
}
MS_LOG(INFO) << "Memory profiling data to PB end";
return;
}

void MemoryProfiling::SaveMemoryProfiling() {
auto context = MsContext::GetInstance();
MS_EXCEPTION_IF_NULL(context);
std::string dir_path = context->get_param<std::string>(MS_CTX_PROFILING_DIR_PATH);
auto device_id = context->get_param<uint32_t>(MS_CTX_DEVICE_ID);
std::string file = dir_path + std::string("/memory_usage_") + std::to_string(device_id) + std::string(".pb");

MemoryToPB();

std::fstream handle(file, std::ios::out | std::ios::trunc | std::ios::binary);
if (!memory_proto_.SerializeToOstream(&handle)) {
MS_LOG(ERROR) << "Save memory profiling data to file failed";
}
handle.close();
MS_LOG(INFO) << "Start save memory profiling data to " << file << " end";
return;
}
} // namespace profiler
} // namespace mindspore

+ 124
- 0
mindspore/ccsrc/profiler/device/common/memory_profiling.h View File

@@ -0,0 +1,124 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef MINDSPORE_PROFILER_DEVICE_COMMON_PROFILING_MEMORY_H
#define MINDSPORE_PROFILER_DEVICE_COMMON_PROFILING_MEMORY_H

#include "proto/memory_profiling.pb.h"
#include <string>
#include <map>
#include <vector>
#include <memory>
#include "utils/ms_context.h"

namespace mindspore {
namespace profiler {

class NodeMemory {
public:
NodeMemory() : node_name_(""), node_id_(0) {}
~NodeMemory() = default;

void SetNodeName(const std::string &name) { node_name_ = name; }
void SetNodeId(uint64_t node_id) { node_id_ = node_id; }
void AddInputTensorId(uint64_t node_id) { input_tensor_id_.emplace_back(node_id); }
void AddOutputTensorId(uint64_t node_id) { output_tensor_id_.emplace_back(node_id); }
void AddWorkSpaceTensorId(uint64_t node_id) { workspace_tensor_id_.emplace_back(node_id); }
std::string GetNodeName() const { return node_name_; }
uint64_t GetNodeId() const { return node_id_; }
std::vector<uint64_t> GetInputTensorId() const { return input_tensor_id_; }
std::vector<uint64_t> GetOutputTensorId() const { return output_tensor_id_; }
std::vector<uint64_t> GetWorkspaceTensorId() const { return workspace_tensor_id_; }

private:
std::string node_name_;
uint64_t node_id_;
std::vector<uint64_t> input_tensor_id_;
std::vector<uint64_t> output_tensor_id_;
std::vector<uint64_t> workspace_tensor_id_;
};

class TensorMemory {
public:
TensorMemory() : tensor_id_(0), size_(0), type_(""), life_start_(0), life_end_(0), life_long_("") {}
~TensorMemory() = default;

void SetTensorId(uint64_t tensor_id) { tensor_id_ = tensor_id; }
void SetAlignedSize(uint64_t size) { size_ = size; }
void SetType(const std::string &type) { type_ = type; }
void SetLifeStart(uint64_t start) { life_start_ = start; }
void SetLifeEnd(uint64_t end) { life_end_ = end; }
void SetLifeLong(const std::string &life_long) { life_long_ = life_long; }
uint64_t GetTensorId() const { return tensor_id_; }
uint64_t GetAlignedSize() const { return size_; }
std::string GetType() const { return type_; }
uint64_t GetLifeStart() const { return life_start_; }
uint64_t GetLifeEnd() const { return life_end_; }
std::string GetLifeLong() const { return life_long_; }

private:
uint64_t tensor_id_;
uint64_t size_; // aligned tensor size
std::string type_; // see TensorType in somas_tensor.h
uint64_t life_start_; // the exe node id at which tensor memory allocated
uint64_t life_end_; // the exe node id at which tensor memory deallocated
std::string life_long_; // see LifeLongType in somas_tensor.h
};

class GraphMemory {
public:
explicit GraphMemory(uint32_t graph_id) : graph_id_(graph_id), static_mem_size_(0) {}
~GraphMemory() = default;
void AddStaticMemorySize(uint32_t size) { static_mem_size_ += size; }
void AddNodeMemory(const NodeMemory &node) { node_memory_.emplace_back(node); }
void AddTensorMemory(const TensorMemory &node) { tensor_memory_.emplace_back(node); }
uint32_t GetGraphId() const { return graph_id_; }
uint32_t GetStaticMemSize() const { return static_mem_size_; }
std::vector<NodeMemory> GetNodeMemory() const { return node_memory_; }
std::vector<TensorMemory> GetTensorMemory() const { return tensor_memory_; }

private:
uint32_t graph_id_;
uint32_t static_mem_size_;
std::vector<NodeMemory> node_memory_;
std::vector<TensorMemory> tensor_memory_;
};

class MemoryProfiling {
public:
MemoryProfiling() = default;
~MemoryProfiling() = default;

static MemoryProfiling &GetInstance() {
static MemoryProfiling instance;
return instance;
}

MemoryProto &GetMemProto() { return memory_proto_; }
std::shared_ptr<GraphMemory> AddGraphMemoryNode(uint32_t graph_id);
std::shared_ptr<GraphMemory> GetGraphMemoryNode(uint32_t graph_id);
void SetDeviceMemSize(uint64_t size) { device_mem_size_ = size; }
void MemoryToPB();
void SaveMemoryProfiling();

private:
MemoryProto memory_proto_;
std::map<uint32_t, std::shared_ptr<GraphMemory>> graph_memory_;
uint64_t device_mem_size_;
};
} // namespace profiler
} // namespace mindspore
#endif

+ 50
- 0
mindspore/ccsrc/profiler/device/common/memory_profiling.proto View File

@@ -0,0 +1,50 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

syntax = "proto3";

package mindspore.profiler;

message MemoryProto {
repeated GraphMemProto graph_mem = 1; // memory usage of multiple graphs
int64 total_mem = 2; // total allocated device memory
}

message GraphMemProto {
int64 graph_id = 1; // graph id
int64 static_mem = 2; // size of allocated static memory for current graph
repeated NodeMemProto node_mems = 3; // execution nodes
repeated TensorMemProto tensor_mems = 4; // all tensors
string fp_start = 5; // node name of fp start
string bp_end = 6; // node name of bp end
}

message NodeMemProto {
string node_name = 1; // node name
int64 node_id = 2; // node id with respect to the execution order
repeated int64 input_tensor_id = 3; // input tensor id
repeated int64 output_tensor_id = 4; // output tensor id
repeated int64 workspace_tensor_id = 5; // workspace tensor id
}

message TensorMemProto {
int64 tensor_id = 1; // tensor id
int64 size = 2; // aligned tensor size
string type = 3; // tensor type, e.g. Common, OutputOnly
int64 life_start = 4; // the exe node id at which tensor memory allocated
int64 life_end = 5; // the exe node id at which tensor memory deallocated
string life_long = 6; // see LifeLongType enum
}

+ 2
- 2
mindspore/ccsrc/pybind_api/utils/ms_context_py.cc View File

@@ -94,8 +94,8 @@ REGISTER_PYBIND_DEFINE(MsContextPy, ([](const py::module *m) {
.value("save_graphs_path", MsCtxParam::MS_CTX_SAVE_GRAPHS_PATH)
.value("variable_memory_max_size", MsCtxParam::MS_CTX_VARIABLE_MEMORY_MAX_SIZE)
.value("device_id", MsCtxParam::MS_CTX_DEVICE_ID)
.value("max_call_depth", MsCtxParam::MS_CTX_MAX_CALL_DEPTH);
.value("max_call_depth", MsCtxParam::MS_CTX_MAX_CALL_DEPTH)
.value("profiling_dir_path", MsCtxParam::MS_CTX_PROFILING_DIR_PATH);
(void)py::class_<mindspore::MsContext, std::shared_ptr<mindspore::MsContext>>(*m, "MSContext")
.def_static("get_instance", &mindspore::MsContext::GetInstance, "Get ms context instance.")
.def("get_param", &mindspore::MsCtxGetParameter, "Get value of specified paramter.")


+ 5
- 0
mindspore/ccsrc/runtime/device/ascend/ascend_kernel_runtime.cc View File

@@ -895,4 +895,9 @@ void AscendKernelRuntime::KernelLaunchProfiling(const std::string &kernel_name)
MS_LOG(EXCEPTION) << "Too many profiling data";
}
}

uint64_t AscendKernelRuntime::GetAvailableMemMaxSize() const {
auto ascend_mem_manager = dynamic_pointer_cast<AscendMemoryManager>(mem_manager_);
return ascend_mem_manager->GetDeviceMemSize();
}
} // namespace mindspore::device::ascend

+ 1
- 0
mindspore/ccsrc/runtime/device/ascend/ascend_kernel_runtime.h View File

@@ -55,6 +55,7 @@ class AscendKernelRuntime : public KernelRuntime {
void CreateContext() override;
void *context() const override { return rt_context_; }
void PreInit() override;
uint64_t GetAvailableMemMaxSize() const;

protected:
DeviceAddressPtr CreateDeviceAddress(void *device_ptr, size_t device_size, const string &format,


+ 29
- 1
mindspore/ccsrc/runtime/device/ascend/ascend_memory_manager.cc View File

@@ -18,6 +18,12 @@
#include "runtime/device/ascend/ascend_memory_pool.h"
#include "utils/ms_context.h"
#include "runtime/mem.h"
#include "runtime/device/ascend/profiling/profiling_manager.h"
#include "profiler/device/common/memory_profiling.h"

using mindspore::device::ascend::ProfilingManager;
using mindspore::profiler::MemoryProfiling;

namespace mindspore {
namespace device {
namespace ascend {
@@ -44,6 +50,11 @@ void AscendMemoryManager::MallocDeviceMemory() {
AscendMemoryPool::GetInstance().Init(device_mem_base_, device_mem_size_, dynamic_mem_offset_);
}

uint64_t AscendMemoryManager::GetDeviceMemSize() {
auto mem_size = GetDeviceMemSizeFromContext();
return mem_size == 0 ? kAscendDeviceMemSize : mem_size;
}

uint64_t AscendMemoryManager::GetDeviceMemSizeFromContext() {
auto context = MsContext::GetInstance();
MS_EXCEPTION_IF_NULL(context);
@@ -88,7 +99,7 @@ void *AscendMemoryManager::MallocMemFromMemPool(size_t size) {
return AscendMemoryPool::GetInstance().AllocTensorMem(align_size);
}

uint8_t *AscendMemoryManager::MallocStaticMem(size_t size, bool communication_mem) {
uint8_t *AscendMemoryManager::MallocStaticMem(size_t size, bool communication_mem, uint32_t graph_id) {
size_t align_size = 0;
if (communication_mem) {
align_size = GetCommunicationAlignSize(size);
@@ -96,6 +107,16 @@ uint8_t *AscendMemoryManager::MallocStaticMem(size_t size, bool communication_me
align_size = GetCommonAlignSize(size);
}

if (ProfilingManager::GetInstance().IsProfiling() && graph_id != kInvalidGraphId) {
auto node = MemoryProfiling::GetInstance().GetGraphMemoryNode(graph_id);
if (node == nullptr) {
node = MemoryProfiling::GetInstance().AddGraphMemoryNode(graph_id);
MS_LOG(INFO) << "Add graph memory node for static memory profiling, graph id is " << graph_id;
}

node->AddStaticMemorySize(align_size);
}

auto device_mem_pool_offset = AscendMemoryPool::GetInstance().device_mem_pool_offset();
MS_LOG(INFO) << "Malloc Memory: Static, total[" << device_mem_size_ << "] (dynamic[" << total_dynamic_size_
<< "] memory pool[" << device_mem_size_ - device_mem_pool_offset << "])"
@@ -139,6 +160,13 @@ uint8_t *AscendMemoryManager::MallocDynamicMem(size_t size, bool communication_m
return device_mem_base_ + offset;
}
}

void AscendMemoryManager::MallocSomasDynamicMem(const session::KernelGraph *graph) {
MemoryManager::MallocSomasDynamicMem(graph);
if (ProfilingManager::GetInstance().IsProfiling()) {
somas_reuse_util_ptr_->ConvertToProfilingNode(graph->graph_id());
}
}
} // namespace ascend
} // namespace device
} // namespace mindspore

+ 3
- 1
mindspore/ccsrc/runtime/device/ascend/ascend_memory_manager.h View File

@@ -31,9 +31,11 @@ class AscendMemoryManager : public MemoryManager {
void ResetDynamicMemory() override;
void ClearGlobalIdleMem() override;
void *MallocMemFromMemPool(size_t size) override;
uint64_t GetDeviceMemSize();
void MallocSomasDynamicMem(const session::KernelGraph *graph);

protected:
uint8_t *MallocStaticMem(size_t size, bool communication_mem) override;
uint8_t *MallocStaticMem(size_t size, bool communication_mem, uint32_t graph_id = kInvalidGraphId) override;
uint8_t *MallocDynamicMem(size_t size, bool communication_mem) override;

private:


+ 1
- 1
mindspore/ccsrc/runtime/device/cpu/cpu_memory_manager.cc View File

@@ -22,7 +22,7 @@ namespace mindspore {
namespace device {
namespace cpu {

uint8_t *CPUMemoryManager::MallocStaticMem(size_t size, bool) {
uint8_t *CPUMemoryManager::MallocStaticMem(size_t size, bool, uint32_t) {
void *ptr = malloc(size);
if (ptr != nullptr) {
memset_s(ptr, size, 0, size);


+ 1
- 1
mindspore/ccsrc/runtime/device/cpu/cpu_memory_manager.h View File

@@ -44,7 +44,7 @@ class CPUMemoryManager : public MemoryManager {
void DecreaseSummaryRefCount(const session::NamedSummaryOutputs &summary_outputs);

protected:
uint8_t *MallocStaticMem(size_t size, bool communication_mem) override;
uint8_t *MallocStaticMem(size_t size, bool communication_mem, uint32_t graph_id = kInvalidGraphId) override;
uint8_t *MallocDynamicMem(size_t size, bool communication_mem) override;

private:


+ 1
- 1
mindspore/ccsrc/runtime/device/gpu/gpu_memory_manager.cc View File

@@ -101,7 +101,7 @@ void GPUMemoryManager::FreeDeviceMemory() {
GPUMemoryAllocator::GetInstance().ReleaseDeviceRes();
}

uint8_t *GPUMemoryManager::MallocStaticMem(size_t size, bool) {
uint8_t *GPUMemoryManager::MallocStaticMem(size_t size, bool, uint32_t) {
auto context_ptr = MsContext::GetInstance();
MS_EXCEPTION_IF_NULL(context_ptr);
if (context_ptr->get_param<bool>(MS_CTX_ENABLE_DYNAMIC_MEM_POOL)) {


+ 1
- 1
mindspore/ccsrc/runtime/device/gpu/gpu_memory_manager.h View File

@@ -36,7 +36,7 @@ class GPUMemoryManager : public MemoryManager {
std::vector<size_t> size_list) override;

protected:
uint8_t *MallocStaticMem(size_t size, bool communication_mem) override;
uint8_t *MallocStaticMem(size_t size, bool communication_mem, uint32_t graph_id = kInvalidGraphId) override;
};
} // namespace gpu
} // namespace device


+ 9
- 3
mindspore/ccsrc/runtime/device/kernel_runtime.cc View File

@@ -360,7 +360,7 @@ void KernelRuntime::AssignStaticMemoryInput(const session::KernelGraph *graph) {
auto tensor_size = CountNodeDeviceMemorySize(item, index);
device_address = CreateDeviceAddress(nullptr, tensor_size, AnfAlgo::GetOutputFormat(item, index), output_type_id);
MS_LOG(DEBUG) << "Malloc static memory for " << item->fullname_with_scope();
if (mem_manager_->MallocMem(kStaticMem, tensor_size, device_address) == nullptr) {
if (mem_manager_->MallocMem(kStaticMem, tensor_size, device_address, graph->graph_id()) == nullptr) {
MS_LOG(EXCEPTION) << "Cannot alloc address when flag is: " << kStaticMem << ", tensor size is: " << tensor_size;
}
MS_LOG(INFO) << "Malloc Input for graph " << graph->graph_id() << ", node: " << item->fullname_with_scope()
@@ -629,6 +629,10 @@ void KernelRuntime::AssignValueNodeTensor(const ValueNodePtr &value_node, const
MS_EXCEPTION_IF_NULL(ms_context);
std::vector<tensor::TensorPtr> tensors;
TensorValueToTensor(node_value, &tensors);
// Graph id should be passed to record static memory if profiling is enabled.
auto kernel_info = static_cast<device::KernelInfo *>(value_node->kernel_info());
MS_EXCEPTION_IF_NULL(kernel_info);
uint32_t graph_id = kernel_info->graph_id();
for (const auto &tensor : tensors) {
if (tensor == nullptr) {
MS_LOG(WARNING) << "Tensor is null";
@@ -651,7 +655,7 @@ void KernelRuntime::AssignValueNodeTensor(const ValueNodePtr &value_node, const
if (ms_context->get_param<bool>(MS_CTX_ENABLE_PYNATIVE_INFER) &&
!mem_manager_->MallocMemFromMemPool(address, node_size)) {
MS_LOG(EXCEPTION) << "Cannot alloc address from memory pool when tensor size is: " << node_size;
} else if (mem_manager_->MallocMem(kStaticMem, node_size, address) == nullptr) {
} else if (mem_manager_->MallocMem(kStaticMem, node_size, address, graph_id) == nullptr) {
MS_LOG(EXCEPTION) << "Cannot alloc address when flag is: " << kStaticMem << ", tensor size is: " << node_size;
}
AnfAlgo::SetOutputAddr(address, output_idx, value_node.get());
@@ -662,6 +666,8 @@ void KernelRuntime::AssignValueNodeTensor(const ValueNodePtr &value_node, const
<< "node dtype is " << AnfAlgo::GetOutputInferDataType(value_node, output_idx);
}
}

return;
}

void KernelRuntime::AssignStaticMemoryValueNode(session::KernelGraph *graph) {
@@ -690,7 +696,7 @@ void KernelRuntime::AssignStaticMemoryValueNode(session::KernelGraph *graph) {
if (ms_context->get_param<bool>(MS_CTX_ENABLE_PYNATIVE_INFER) &&
!mem_manager_->MallocMemFromMemPool(address, tensor_size)) {
MS_LOG(EXCEPTION) << "Cannot alloc address from memory pool when tensor size is: " << tensor_size;
} else if (mem_manager_->MallocMem(kStaticMem, tensor_size, address) == nullptr) {
} else if (mem_manager_->MallocMem(kStaticMem, tensor_size, address, graph->graph_id()) == nullptr) {
MS_LOG(EXCEPTION) << "Cannot alloc address when flag is: " << kStaticMem << ", tensor size is: " << tensor_size;
}
AnfAlgo::SetOutputAddr(address, 0, value_node.get());


+ 1
- 0
mindspore/ccsrc/runtime/device/kernel_runtime.h View File

@@ -100,6 +100,7 @@ class KernelRuntime {
}

virtual void PreInit() {}
virtual uint64_t GetAvailableMemMaxSize() const { return 0; }

protected:
virtual DeviceAddressPtr CreateDeviceAddress(void *device_ptr, size_t device_size, const string &format,


+ 5
- 3
mindspore/ccsrc/runtime/device/memory_manager.cc View File

@@ -18,8 +18,10 @@
#include <string>
#include "backend/session/anf_runtime_algorithm.h"
#include "utils/ms_context.h"

using mindspore::memreuse::BestFitMemReuse;
using mindspore::memreuse::MemReuseUtilPtr;

namespace mindspore {
namespace device {
size_t MemoryManager::GetCommonAlignSize(size_t input_size) const {
@@ -139,11 +141,11 @@ uint8_t *MemoryManager::MallocWorkSpaceMem(const AnfNodePtr &node, size_t index,
return MallocDynamicMem(size, false);
}

uint8_t *MemoryManager::MallocMem(MemType type, size_t size, const DeviceAddressPtr &address) {
uint8_t *MemoryManager::MallocMem(MemType type, size_t size, const DeviceAddressPtr &address, uint32_t graph_id) {
MS_EXCEPTION_IF_NULL(address);
uint8_t *ptr = nullptr;
if (type == kStaticMem) {
ptr = MallocStaticMem(size, false);
ptr = MallocStaticMem(size, false, graph_id);
address->from_mem_pool_ = true;
} else if (type == kDynamicMem) {
ptr = MallocDynamicMem(size, false);
@@ -152,7 +154,7 @@ uint8_t *MemoryManager::MallocMem(MemType type, size_t size, const DeviceAddress
return ptr;
}

uint8_t *MemoryManager::MallocStaticMem(size_t size, bool communication_mem) {
uint8_t *MemoryManager::MallocStaticMem(size_t size, bool communication_mem, uint32_t graph_id) {
size_t align_size = 0;
if (communication_mem) {
align_size = GetCommunicationAlignSize(size);


+ 4
- 3
mindspore/ccsrc/runtime/device/memory_manager.h View File

@@ -44,11 +44,12 @@ class MemoryManager {
virtual void ClearGlobalIdleMem() {}

void MallocReusedDynamicMem(const session::KernelGraph *graph);
void MallocSomasDynamicMem(const session::KernelGraph *graph);
virtual void MallocSomasDynamicMem(const session::KernelGraph *graph);
uint8_t *MallocOutputMem(const AnfNodePtr &node, size_t index, MemType type, size_t size,
const DeviceAddressPtr &address, bool comm_mem);
uint8_t *MallocWorkSpaceMem(const AnfNodePtr &node, size_t index, MemType type, size_t size);
virtual uint8_t *MallocMem(MemType type, size_t size, const DeviceAddressPtr &address);
virtual uint8_t *MallocMem(MemType type, size_t size, const DeviceAddressPtr &address,
uint32_t graph_id = kInvalidGraphId);

virtual bool MallocMemFromMemPool(const DeviceAddressPtr address, size_t size);
virtual void *MallocMemFromMemPool(size_t size);
@@ -62,7 +63,7 @@ class MemoryManager {
size_t GetCommunicationAlignSize(size_t input_size) const;

protected:
virtual uint8_t *MallocStaticMem(size_t size, bool communication_mem);
virtual uint8_t *MallocStaticMem(size_t size, bool communication_mem, uint32_t graph_id = kInvalidGraphId);
virtual uint8_t *MallocDynamicMem(size_t size, bool communication_mem);
uint8_t *device_mem_base_{nullptr};
uint64_t device_mem_size_{0};


+ 1
- 0
mindspore/core/utils/ms_context.cc View File

@@ -73,6 +73,7 @@ MsContext::MsContext(const std::string &policy, const std::string &target) {
set_param<bool>(MS_CTX_ENABLE_GRAPH_KERNEL, false);
set_param<bool>(MS_CTX_ENABLE_SPARSE, false);
set_param<bool>(MS_CTX_ENABLE_PARALLEL_SPLIT, false);
set_param<std::string>(MS_CTX_PROFILING_DIR_PATH, "");

backend_policy_ = policy_map_[policy];
}


+ 1
- 0
mindspore/core/utils/ms_context.h View File

@@ -104,6 +104,7 @@ enum MsCtxParam : unsigned {
MS_CTX_SAVE_GRAPHS_PATH,
MS_CTX_VARIABLE_MEMORY_MAX_SIZE,
MS_CTX_PYTHON_EXE_PATH,
MS_CTX_PROFILING_DIR_PATH,
MS_CTX_TYPE_STRING_END,

// parameter numbers of each type


+ 2
- 1
mindspore/profiler/profiling.py View File

@@ -140,7 +140,8 @@ class Profiler:
logger.error(msg)
raise ValueError(msg)
# use context interface to open profiling, for the new mindspore version(after 2020.5.21)
context.set_context(enable_profiling=True, profiling_options=profiling_options)
context.set_context(enable_profiling=True, profiling_options=profiling_options,
profiling_dir_path=self._output_path)
base_profiling_container_path = os.path.join(self._output_path, "container")
container_path = os.path.join(base_profiling_container_path, self._dev_id)
data_path = os.path.join(container_path, "data")


+ 32
- 29
tests/ut/cpp/CMakeLists.txt View File

@@ -4,12 +4,12 @@ message("build ut testcases...")
project(ut)

set(PROJECT_DIR "${PROJECT_SOURCE_DIR}/../../..")
if (ENABLE_DUMP_IR)
if(ENABLE_DUMP_IR)
add_compile_definitions(ENABLE_DUMP_IR)
endif (ENABLE_DUMP_IR)
if (ENABLE_D)
endif()
if(ENABLE_D)
add_compile_definitions(ENABLE_D)
endif ()
endif()

#add python lib and include for all ut executables;
message("PYTHON_INCLUDE_DIRS = ${PYTHON_INCLUDE_DIRS}")
@@ -25,13 +25,13 @@ MESSAGE("check ut_test ${CMAKE_BINARY_DIR}")

link_directories(${MS_CCSRC_BUILD_PATH})

if (ENABLE_MINDDATA)
if(ENABLE_MINDDATA)
add_definitions(-D ENABLE_MINDDATA)
link_directories(${MS_CCSRC_BUILD_PATH}/minddata/dataset)
link_directories(${MS_CCSRC_BUILD_PATH}/minddata/mindrecord)
endif ()
endif()
# fetch ut test files
if (ENABLE_MINDDATA)
if(ENABLE_MINDDATA)
include_directories(${CMAKE_SOURCE_DIR}/mindspore/ccsrc/minddata/dataset/kernels/image)
file(GLOB_RECURSE UT_SRCS RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
./stub/*.cc
@@ -61,7 +61,7 @@ if (ENABLE_MINDDATA)
./cxx_api/*.cc
)

if (NOT ENABLE_PYTHON)
if(NOT ENABLE_PYTHON)
set(PYTHON_RELATED_SRCS
dataset/filter_op_test.cc
dataset/voc_op_test.cc
@@ -69,15 +69,15 @@ if (ENABLE_MINDDATA)
dataset/sentence_piece_vocab_op_test.cc
)
list(REMOVE_ITEM UT_SRCS ${PYTHON_RELATED_SRCS})
endif ()
else ()
endif()
else()
file(GLOB_RECURSE TEMP_UT_SRCS ./*.cc)
foreach (OBJ ${TEMP_UT_SRCS})
if (NOT ${OBJ} MATCHES "./dataset/" AND NOT ${OBJ} MATCHES "./mindrecord/")
foreach(OBJ ${TEMP_UT_SRCS})
if(NOT ${OBJ} MATCHES "./dataset/" AND NOT ${OBJ} MATCHES "./mindrecord/")
list(APPEND UT_SRCS ${OBJ})
endif ()
endforeach ()
endif ()
endif()
endforeach()
endif()

file(GLOB_RECURSE MINDSPORE_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
"../../../mindspore/ccsrc/pybind_api/*.cc"
@@ -133,9 +133,11 @@ file(GLOB_RECURSE MINDSPORE_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
"../../../mindspore/ccsrc/transform/graph_ir/*.cc"
"../../../mindspore/ccsrc/transform/graph_ir/op_declare/*.cc"
"../../../mindspore/ccsrc/ps/*.cc"
"../../../mindspore/ccsrc/profiler/device/common/*.cc"
)

list(REMOVE_ITEM MINDSPORE_SRC_LIST "../../../mindspore/ccsrc/frontend/parallel/strategy_checkpoint/parallel_strategy_checkpoint.cc")
list(REMOVE_ITEM MINDSPORE_SRC_LIST
"../../../mindspore/ccsrc/frontend/parallel/strategy_checkpoint/parallel_strategy_checkpoint.cc")
list(REMOVE_ITEM MINDSPORE_SRC_LIST "../../../mindspore/ccsrc/ps/util.cc")
list(REMOVE_ITEM MINDSPORE_SRC_LIST "../../../mindspore/ccsrc/ps/scheduler.cc")
list(REMOVE_ITEM MINDSPORE_SRC_LIST "../../../mindspore/ccsrc/ps/optimizer_info.cc")
@@ -154,31 +156,32 @@ add_dependencies(_ut_ut_obj engine-cache-server)
add_executable(ut_tests $<TARGET_OBJECTS:_ut_ut_obj>
$<TARGET_OBJECTS:_ut_mindspore_obj>)

if (ENABLE_GE)
if (ENABLE_TRAIN)
if(ENABLE_GE)
if(ENABLE_TRAIN)
target_link_libraries(ut_tests PRIVATE graph ge_runner)
else ()
else()
target_link_libraries(ut_tests PRIVATE graph ge_client)
endif ()
endif()

target_link_libraries(mindspore PRIVATE tsdclient)
endif ()
endif()

if (CMAKE_SYSTEM_NAME MATCHES "Linux")
target_link_libraries(ut_tests PRIVATE mindspore::gtest mindspore::event mindspore::event_pthreads mindspore_gvar ${PYTHON_LIBRARIES} pthread util dl)
if (ENABLE_MINDDATA)
if(CMAKE_SYSTEM_NAME MATCHES "Linux")
target_link_libraries(ut_tests PRIVATE mindspore::gtest mindspore::event mindspore::event_pthreads
mindspore_gvar ${PYTHON_LIBRARIES} pthread util dl)
if(ENABLE_MINDDATA)

# AUX_SOURCE_DIRECTORY(LITE_CV_FILES)
# message(STATUS "xxxxxxxxxxxxxxxxx"${LITE_CV_FILES} )
# add_library(_live_cv OBJECT ${LITE_CV_FILES})

target_link_libraries(ut_tests PRIVATE _c_dataengine _c_mindrecord)
endif ()
else ()
endif()
else()
target_link_libraries(ut_tests PRIVATE mindspore::gtest mindspore_gvar ${PYTHON_LIBRARIES})
endif ()
if (USE_GLOG)
endif()
if(USE_GLOG)
target_link_libraries(ut_tests PRIVATE mindspore::glog)
endif ()
endif()

target_link_libraries(ut_tests PRIVATE mindspore mindspore_shared_lib securec graph)

Loading…
Cancel
Save