Browse Source

add reset device, fix runtime delete ring buffer failed

Signed-off-by: zhoufeng <zhoufeng54@huawei.com>
tags/v1.1.0
zhoufeng 5 years ago
parent
commit
ee7e412def
4 changed files with 10 additions and 16 deletions
  1. +0
    -9
      mindspore/ccsrc/cxx_api/graph/ms/ms_graph_impl.cc
  2. +7
    -6
      mindspore/ccsrc/runtime/device/ascend/ascend_kernel_runtime.cc
  3. +1
    -1
      mindspore/ccsrc/runtime/device/ascend/ascend_kernel_runtime.h
  4. +2
    -0
      tests/ut/cpp/stub/runtime/runtime_stub.cc

+ 0
- 9
mindspore/ccsrc/cxx_api/graph/ms/ms_graph_impl.cc View File

@@ -116,15 +116,6 @@ Status MsGraphImpl::FinalizeEnv() {
MS_LOG_INFO << "Start finalize env";
session::ExecutorManager::Instance().Clear();
device::KernelRuntimeManager::Instance().ClearRuntimeResource();
auto ms_context = MsContext::GetInstance();
if (ms_context == nullptr) {
MS_LOG(ERROR) << "Get Context failed!";
return FAILED;
}
if (!context::CloseTsd(ms_context)) {
MS_LOG(ERROR) << "CloseTsd failed!";
return FAILED;
}

init_flag_ = false;
MS_LOG(INFO) << "End finalize env";


+ 7
- 6
mindspore/ccsrc/runtime/device/ascend/ascend_kernel_runtime.cc View File

@@ -245,10 +245,7 @@ void AscendKernelRuntime::ReleaseDeviceRes() {

auto context_ptr = MsContext::GetInstance();
MS_EXCEPTION_IF_NULL(context_ptr);
auto ret = rtSetDevice(context_ptr->get_param<uint32_t>(MS_CTX_DEVICE_ID));
if (ret != RT_ERROR_NONE) {
MS_EXCEPTION(DeviceProcessError) << "Call rtSetDevice, ret[" << static_cast<int>(ret) << "]";
}
uint32_t device_id = context_ptr->get_param<uint32_t>(MS_CTX_DEVICE_ID);

if (mem_manager_ != nullptr) {
mem_manager_->FreeDeviceMemory();
@@ -256,7 +253,7 @@ void AscendKernelRuntime::ReleaseDeviceRes() {

(void)DestroySingleOpHccl();
(void)DestroyHccl();
(void)ResetDevice();
(void)ResetDevice(device_id);
(void)ProfilingManager::GetInstance().StopProfiling();
MS_LOG(INFO) << "Ascend finalize end";
}
@@ -729,7 +726,7 @@ bool AscendKernelRuntime::InitDevice() {
return true;
}

bool AscendKernelRuntime::ResetDevice() {
bool AscendKernelRuntime::ResetDevice(uint32_t device_id) {
InnerSetContext();
if (stream_ != nullptr) {
auto ret = rtStreamDestroy(stream_);
@@ -747,6 +744,10 @@ bool AscendKernelRuntime::ResetDevice() {
rt_context_ = nullptr;
}

auto ret = rtDeviceReset(device_id);
if (ret != RT_ERROR_NONE) {
MS_EXCEPTION(DeviceProcessError) << "Call rtDeviceReset, ret[" << ret << "]";
}
// set to nullptr as its not created, only bounded to existing context
rt_context_hccl_ = nullptr;



+ 1
- 1
mindspore/ccsrc/runtime/device/ascend/ascend_kernel_runtime.h View File

@@ -67,7 +67,7 @@ class AscendKernelRuntime : public KernelRuntime {

private:
bool InitDevice();
bool ResetDevice();
bool ResetDevice(uint32_t device_id);
bool HcclInit();
bool NeedDestroyHccl();
bool DestroyHccl();


+ 2
- 0
tests/ut/cpp/stub/runtime/runtime_stub.cc View File

@@ -37,6 +37,8 @@ rtError_t rtGetDeviceCount(int32_t *count) { return RT_ERROR_NONE; }

rtError_t rtSetDevice(int32_t device) { return RT_ERROR_NONE; }

rtError_t rtDeviceReset(int32_t device) { return RT_ERROR_NONE; }

rtError_t rtCtxCreate(rtContext_t *ctx, uint32_t flags, int32_t device) { return RT_ERROR_NONE; }

rtError_t rtCtxSetCurrent(rtContext_t ctx) { return RT_ERROR_NONE; }


Loading…
Cancel
Save