From 4aecf539e935ce59b5a898f28b0c960412e6e1f0 Mon Sep 17 00:00:00 2001 From: caifubi Date: Tue, 29 Dec 2020 16:12:37 +0800 Subject: [PATCH] Fix GPU sync stream Segmentation fault --- mindspore/ccsrc/runtime/device/gpu/gpu_device_manager.cc | 6 +++++- mindspore/ccsrc/runtime/device/gpu/gpu_device_manager.h | 3 ++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/mindspore/ccsrc/runtime/device/gpu/gpu_device_manager.cc b/mindspore/ccsrc/runtime/device/gpu/gpu_device_manager.cc index cd666a0148..ee1c52c5bc 100644 --- a/mindspore/ccsrc/runtime/device/gpu/gpu_device_manager.cc +++ b/mindspore/ccsrc/runtime/device/gpu/gpu_device_manager.cc @@ -39,6 +39,7 @@ void GPUDeviceManager::InitDevice() { cusolverDnSetStream(cusolver_dn_handle_, reinterpret_cast(default_stream())), "Failed to set stream for cusolver dn handle"); CHECK_OP_RET_WITH_EXCEPT(GPUMemoryAllocator::GetInstance().Init(), "Failed to Init gpu memory allocator") + dev_alive_ = true; } void GPUDeviceManager::ReleaseDevice() { @@ -57,6 +58,7 @@ void GPUDeviceManager::ReleaseDevice() { CHECK_CUSOLVER_RET_WITH_ERROR(cusolverDnDestroy(cusolver_dn_handle_), "Failed to destroy cusolver dn handle."); } CHECK_OP_RET_WITH_ERROR(GPUMemoryAllocator::GetInstance().Finalize(), "Failed to destroy gpu memory allocator"); + dev_alive_ = false; } bool GPUDeviceManager::CreateStream(DeviceStream *stream) { @@ -89,7 +91,9 @@ const cudnnHandle_t &GPUDeviceManager::GetCudnnHandle() const { return cudnn_han const cublasHandle_t &GPUDeviceManager::GetCublasHandle() const { return cublas_handle_; } const cusolverDnHandle_t &GPUDeviceManager::GetCusolverDnHandle() const { return cusolver_dn_handle_; } -bool GPUDeviceManager::SyncStream(const DeviceStream &stream) const { return CudaDriver::SyncStream(stream); } +bool GPUDeviceManager::SyncStream(const DeviceStream &stream) const { + return dev_alive_ ? CudaDriver::SyncStream(stream) : false; +} bool GPUDeviceManager::CopyDeviceMemToHost(const HostMemPtr &dst, const DeviceMemPtr &src, size_t size) const { return CudaDriver::CopyDeviceMemToHost(dst, src, size); diff --git a/mindspore/ccsrc/runtime/device/gpu/gpu_device_manager.h b/mindspore/ccsrc/runtime/device/gpu/gpu_device_manager.h index 68f569c40a..664d5da11f 100644 --- a/mindspore/ccsrc/runtime/device/gpu/gpu_device_manager.h +++ b/mindspore/ccsrc/runtime/device/gpu/gpu_device_manager.h @@ -60,7 +60,7 @@ class GPUDeviceManager { } private: - GPUDeviceManager() : dev_id_init_(false), cur_dev_id_(0) {} + GPUDeviceManager() : dev_id_init_(false), cur_dev_id_(0), dev_alive_(false) {} ~GPUDeviceManager() = default; GPUDeviceManager(const GPUDeviceManager &) = delete; GPUDeviceManager &operator=(const GPUDeviceManager &) = delete; @@ -81,6 +81,7 @@ class GPUDeviceManager { cusolverDnHandle_t cusolver_dn_handle_{nullptr}; bool dev_id_init_; uint32_t cur_dev_id_; + bool dev_alive_; }; } // namespace gpu } // namespace device