From 811de4f5d4fa812b2a04f5372e0548a6bd227d40 Mon Sep 17 00:00:00 2001 From: CaoJian Date: Sun, 7 Feb 2021 16:10:02 +0800 Subject: [PATCH] GPU AbsGrad calculat error while input is 0.0 --- .../kernel_compiler/gpu/cuda_impl/broadcast_impl.cu | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/broadcast_impl.cu b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/broadcast_impl.cu index f8080eccf1..c9b7b1dcf2 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/broadcast_impl.cu +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/broadcast_impl.cu @@ -180,7 +180,7 @@ template struct AbsGradFunc { __device__ __forceinline__ T operator()(const T &lhs, const T &rhs) { T zero = 0.0; - return lhs < zero ? -rhs : rhs; + return lhs < zero ? -rhs : lhs > zero ? rhs : zero; } }; @@ -188,7 +188,7 @@ template <> struct AbsGradFunc { __device__ __forceinline__ half2 operator()(const half2 &lhs, const half2 &rhs) { half2 zero(0.0, 0.0); - return lhs < zero ? -rhs : rhs; + return lhs < zero ? -rhs : lhs > zero ? rhs : zero; } }; @@ -200,7 +200,7 @@ struct SquaredDifferenceFunc { } }; -// Element-wise Comparation +// Element-wise Comparison template __global__ void ElewiseCmpKernel(const int nums, const T *x0, const T *x1, bool *y) { for (size_t pos = blockIdx.x * blockDim.x + threadIdx.x; pos < nums; pos += blockDim.x * gridDim.x) { @@ -305,7 +305,7 @@ template void ElewiseArith(const int &nums, enum BroadcastOpType op, const uint8 template void ElewiseArith(const int &nums, enum BroadcastOpType op, const int64_t *x0, const int64_t *x1, int64_t *y, cudaStream_t stream); -// Broadcast comparation +// Broadcast comparison __device__ __forceinline__ size_t Index(const size_t &index, const size_t &dim) { return dim == 1 ? 0 : index; } template