Browse Source

Fix async data dump failed

tags/v1.1.0
caifubi 5 years ago
parent
commit
b17b4abf7d
5 changed files with 34 additions and 3 deletions
  1. +5
    -1
      mindspore/ccsrc/CMakeLists.txt
  2. +12
    -1
      mindspore/ccsrc/backend/session/ascend_session.cc
  3. +1
    -1
      mindspore/ccsrc/debug/data_dump/dump_json_parser.cc
  4. +11
    -0
      mindspore/ccsrc/runtime/device/ascend/ascend_kernel_runtime.cc
  5. +5
    -0
      tests/ut/cpp/stub/runtime/runtime_stub.cc

+ 5
- 1
mindspore/ccsrc/CMakeLists.txt View File

@@ -326,6 +326,11 @@ add_library(inference SHARED
target_link_libraries(inference PRIVATE ${PYTHON_LIBRARIES} ${SECUREC_LIBRARY}
-Wl,--whole-archive mindspore -Wl,--no-whole-archive mindspore_gvar mindspore::protobuf)

if (ENABLE_D)
target_link_libraries(_c_expression PRIVATE ${ASCEND_PATH}/fwkacllib/lib64/libadump_server.a)
target_link_libraries(inference PRIVATE ${ASCEND_PATH}/fwkacllib/lib64/libadump_server.a)
endif()

if (ENABLE_CPU)
target_link_libraries(inference PRIVATE mindspore::dnnl mindspore::mkldnn)
endif ()
@@ -339,4 +344,3 @@ if (CMAKE_SYSTEM_NAME MATCHES "Linux")
elseif (CMAKE_SYSTEM_NAME MATCHES "Darwin")
set_target_properties(inference PROPERTIES MACOSX_RPATH ON)
endif ()


+ 12
- 1
mindspore/ccsrc/backend/session/ascend_session.cc View File

@@ -44,6 +44,7 @@
#include "debug/data_dump/e2e_dump_util.h"
#include "debug/anf_ir_dump.h"
#include "debug/dump_proto.h"
#include "toolchain/adx_datadump_server.h"

namespace mindspore {
namespace session {
@@ -417,6 +418,16 @@ void AscendSession::SelectKernel(const KernelGraph &kernel_graph) const {
MS_LOG(INFO) << "Finish!";
}

void DumpInit() {
auto &json_parser = DumpJsonParser::GetInstance();
json_parser.Parse();
if (json_parser.async_dump_enabled()) {
if (AdxDataDumpServerInit() != 0) {
MS_LOG(EXCEPTION) << "Adx data dump server init failed";
}
}
}

void AscendSession::InitRuntimeResource() {
MS_LOG(INFO) << "Start!";
auto runtime_instance = device::KernelRuntimeManager::Instance().GetKernelRuntime(kAscendDevice, device_id_);
@@ -424,7 +435,7 @@ void AscendSession::InitRuntimeResource() {
if (!runtime_instance->Init()) {
MS_LOG(EXCEPTION) << "Kernel runtime init error.";
}
DumpJsonParser::GetInstance().Parse();
DumpInit();
MS_LOG(INFO) << "Finish!";
}



+ 1
- 1
mindspore/ccsrc/debug/data_dump/dump_json_parser.cc View File

@@ -169,7 +169,7 @@ void DumpJsonParser::ParseAsyncDumpSetting(const nlohmann::json &content) {
}

void DumpJsonParser::ParseE2eDumpSetting(const nlohmann::json &content) {
auto e2e_dump_setting = CheckJsonKeyExist(content, kE2eDumpSettings);
auto e2e_dump_setting = content.find(kE2eDumpSettings);
if (e2e_dump_setting == content.end()) {
MS_LOG(INFO) << "No e2e_dump_settings";
return;


+ 11
- 0
mindspore/ccsrc/runtime/device/ascend/ascend_kernel_runtime.cc View File

@@ -40,6 +40,7 @@
#include "runtime/device/ascend/ascend_memory_manager.h"
#include "debug/tensor_load.h"
#include "debug/data_dump/dump_json_parser.h"
#include "toolchain/adx_datadump_server.h"
#include "utils/shape_utils.h"
#ifdef MEM_REUSE_DEBUG
#include "backend/optimizer/mem_reuse/mem_reuse_checker.h"
@@ -169,6 +170,14 @@ bool AscendKernelRuntime::NeedDestroyHccl() {
return true;
}

void AsyncDataDumpUninit() {
if (DumpJsonParser::GetInstance().async_dump_enabled()) {
if (AdxDataDumpServerUnInit() != 0) {
MS_LOG(ERROR) << "Adx data dump server uninit failed";
}
}
}

void AscendKernelRuntime::ReleaseDeviceRes() {
MS_LOG(INFO) << "Ascend finalize start";
#ifdef ENABLE_DEBUGGER
@@ -184,6 +193,8 @@ void AscendKernelRuntime::ReleaseDeviceRes() {
// release ge runtime
ClearGraphModelMap();

AsyncDataDumpUninit();

auto context_ptr = MsContext::GetInstance();
MS_EXCEPTION_IF_NULL(context_ptr);
auto ret = rtSetDevice(context_ptr->get_param<uint32_t>(MS_CTX_DEVICE_ID));


+ 5
- 0
tests/ut/cpp/stub/runtime/runtime_stub.cc View File

@@ -21,6 +21,7 @@
#include "runtime/mem.h"
#include "runtime/rt_model.h"
#include "runtime/stream.h"
#include "toolchain/adx_datadump_server.h"

rtError_t rtEventSynchronize(rtEvent_t event) { return RT_ERROR_NONE; }

@@ -141,3 +142,7 @@ rtError_t rtSetTaskGenCallback(rtTaskGenCallback callback) { return RT_ERROR_NON
RTS_API rtError_t rtProfilerStart(uint64_t profConfig, int32_t numsDev, uint32_t* deviceList) { return RT_ERROR_NONE; }

RTS_API rtError_t rtProfilerStop(uint64_t profConfig, int32_t numsDev, uint32_t* deviceList) { return RT_ERROR_NONE; }

int AdxDataDumpServerInit() { return 0; }

int AdxDataDumpServerUnInit() { return 0; }

Loading…
Cancel
Save