From b330766a0f3c1eb1ba73503e6cea2bb8d09005e1 Mon Sep 17 00:00:00 2001 From: wilfChen Date: Mon, 18 May 2020 18:35:35 +0800 Subject: [PATCH] cuda exception check --- .../ccsrc/kernel/gpu/math/bias_add_gpu_kernel.h | 14 +++++++++----- .../ccsrc/kernel/gpu/math/matmul_gpu_kernel.h | 15 ++++++++++----- 2 files changed, 19 insertions(+), 10 deletions(-) diff --git a/mindspore/ccsrc/kernel/gpu/math/bias_add_gpu_kernel.h b/mindspore/ccsrc/kernel/gpu/math/bias_add_gpu_kernel.h index 90609c3be5..03192a36a3 100644 --- a/mindspore/ccsrc/kernel/gpu/math/bias_add_gpu_kernel.h +++ b/mindspore/ccsrc/kernel/gpu/math/bias_add_gpu_kernel.h @@ -49,11 +49,15 @@ class BiasAddGpuKernel : public GpuKernel { T *b_addr = GetDeviceAddress(inputs, 1); T *output_addr = GetDeviceAddress(outputs, 0); - const float alpha = 1; - const float beta = 0; - CHECK_CUDNN_RET_WITH_EXCEPT(cudnnOpTensor(cudnn_handle_, op_desc_, &alpha, x_desc_, x_addr, &alpha, b_desc_, b_addr, - &beta, x_desc_, output_addr), - "cudnnOpTensor Add failed"); + try { + const float alpha = 1; + const float beta = 0; + CHECK_CUDNN_RET_WITH_EXCEPT(cudnnOpTensor(cudnn_handle_, op_desc_, &alpha, x_desc_, x_addr, &alpha, b_desc_, + b_addr, &beta, x_desc_, output_addr), + "cudnnOpTensor failed"); + } catch (const std::exception &e) { + MS_LOG(EXCEPTION) << "Encountered an exception: " << e.what() << " when invoke cudnnOpTensor"; + } return true; } bool Init(const CNodePtr &kernel_node) override { diff --git a/mindspore/ccsrc/kernel/gpu/math/matmul_gpu_kernel.h b/mindspore/ccsrc/kernel/gpu/math/matmul_gpu_kernel.h index e1e3b92620..2dc164b457 100644 --- a/mindspore/ccsrc/kernel/gpu/math/matmul_gpu_kernel.h +++ b/mindspore/ccsrc/kernel/gpu/math/matmul_gpu_kernel.h @@ -64,11 +64,16 @@ class MatMulGpuKernel : public GpuKernel { auto stride_a = SizeToInt(m_ * k_); auto stride_b = SizeToInt(k_ * n_); auto stride_c = SizeToInt(m_ * n_); - CHECK_CUBLAS_RET_WITH_EXCEPT( - cublasGemmStridedBatchedEx(handle_, transpose_x2_, transpose_x1_, SizeToInt(n_), SizeToInt(m_), SizeToInt(k_), - &alpha, input2_addr, dtype_b_, ldb, stride_b, input1_addr, dtype_a_, lda, stride_a, - &beta, output_addr, dtype_c_, ldc, stride_c, batch_, CUDA_R_32F, algo_), - "cublasSgemm Call Fail"); + + try { + CHECK_CUBLAS_RET_WITH_EXCEPT( + cublasGemmStridedBatchedEx(handle_, transpose_x2_, transpose_x1_, SizeToInt(n_), SizeToInt(m_), SizeToInt(k_), + &alpha, input2_addr, dtype_b_, ldb, stride_b, input1_addr, dtype_a_, lda, stride_a, + &beta, output_addr, dtype_c_, ldc, stride_c, batch_, CUDA_R_32F, algo_), + "cublasSgemm Call Fail"); + } catch (const std::exception &e) { + MS_LOG(EXCEPTION) << "Encountered an exception: " << e.what() << " when invoke cublas cublasGemmStridedBatchedEx"; + } return true; } bool Init(const CNodePtr &kernel_node) override {