diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/argmaxwithvalue_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/argmaxwithvalue_gpu_kernel.h index 7511a3afb5..dd0b6f91a0 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/argmaxwithvalue_gpu_kernel.h +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/argmaxwithvalue_gpu_kernel.h @@ -46,8 +46,8 @@ class ArgmaxWithValueGpuKernel : public GpuKernel { bool Init(const CNodePtr &kernel_node) override { std::vector shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); auto output_shape = AnfAlgo::GetOutputInferShape(kernel_node, 1); - int dims = shape.size(); - int axis = static_cast(GetAttr(kernel_node, "axis")); + int64_t dims = shape.size(); + int64_t axis = GetAttr(kernel_node, "axis"); if (axis < 0) { axis += dims; } @@ -59,14 +59,16 @@ class ArgmaxWithValueGpuKernel : public GpuKernel { for (auto x : output_shape) { output_size_ *= x; } - bound_ = shape[axis]; + bound_ = static_cast(shape[axis]); + if (shape[axis] != static_cast(bound_)) { + MS_LOG(EXCEPTION) << "bound's shape is larger than index type and overflows when casting."; + } outerSize_ = 1; - for (int i = axis - 1; i >= 0; i--) { + for (int64_t i = axis - 1; i >= 0; i--) { outerSize_ *= shape[i]; } - innerSize_ = 1; - for (int i = axis + 1; i < dims; i++) { + for (int64_t i = axis + 1; i < dims; i++) { innerSize_ *= shape[i]; } InitSizeLists(); @@ -86,7 +88,7 @@ class ArgmaxWithValueGpuKernel : public GpuKernel { std::vector input_size_list_; std::vector output_size_list_; std::vector workspace_size_list_; - size_t bound_; + S bound_; size_t outerSize_; size_t innerSize_; }; diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/argmaxwithvalue_impl.cu b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/argmaxwithvalue_impl.cu index 0c4f0198f1..8bafcecf1e 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/argmaxwithvalue_impl.cu +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/argmaxwithvalue_impl.cu @@ -18,7 +18,7 @@ #include "runtime/device/gpu/cuda_common.h" #include "include/cuda_fp16.h" template -__global__ void ArgmaxWithValue(const T *input, const size_t bound, size_t outerSize, +__global__ void ArgmaxWithValue(const T *input, const S bound, size_t outerSize, size_t innerSize, S *index, T *output) { for (size_t pos = blockIdx.x * blockDim.x + threadIdx.x; pos < outerSize * innerSize; pos += gridDim.x * blockDim.x) { @@ -27,7 +27,7 @@ __global__ void ArgmaxWithValue(const T *input, const size_t bound, size_t outer S idx = 0; size_t InputOffset = x * bound * innerSize + 0 * innerSize + y; T maxData = input[InputOffset]; - for (size_t i = 0; i < bound; i++) { + for (S i = 0; i < bound; i++) { InputOffset = x * bound * innerSize + i * innerSize + y; auto inputData = input[InputOffset]; idx = inputData > maxData ? i : idx; @@ -40,16 +40,16 @@ __global__ void ArgmaxWithValue(const T *input, const size_t bound, size_t outer } template -void CalArgmaxWithValue(const T *input, const size_t bound_, const size_t outerSize_, const size_t innerSize_, +void CalArgmaxWithValue(const T *input, const S bound_, const size_t outerSize_, const size_t innerSize_, S *index, T *output, cudaStream_t cuda_stream) { ArgmaxWithValue<<>>(input, bound_, outerSize_, innerSize_, index, output); return; } -template void CalArgmaxWithValue(const float *input, const size_t bound_, const size_t outerSize_, +template void CalArgmaxWithValue(const float *input, const int bound_, const size_t outerSize_, const size_t innerSize_, int *index, float *output, cudaStream_t cuda_stream); -template void CalArgmaxWithValue(const half *input, const size_t bound_, const size_t outerSize_, +template void CalArgmaxWithValue(const half *input, const int bound_, const size_t outerSize_, const size_t innerSize_, int *index, half *output, cudaStream_t cuda_stream); diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/argmaxwithvalue_impl.cuh b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/argmaxwithvalue_impl.cuh index 9bdcab3eec..2a08365f20 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/argmaxwithvalue_impl.cuh +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/argmaxwithvalue_impl.cuh @@ -17,6 +17,6 @@ #ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMP_ARGMAXWITHVALUE_H_ #define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMP_ARGMAXWITHVALUE_H_ template -void CalArgmaxWithValue(const T *input, const size_t bound_, const size_t outerSize_, const size_t innerSize_, S *index, +void CalArgmaxWithValue(const T *input, const S bound_, const size_t outerSize_, const size_t innerSize_, S *index, T *output, cudaStream_t cuda_stream); #endif // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMP_ARGMAXWITHVALUE_H_