diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/concatv2_gpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/concatv2_gpu_kernel.cc index 1a88d0863f..e834b9b592 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/concatv2_gpu_kernel.cc +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/concatv2_gpu_kernel.cc @@ -32,7 +32,7 @@ MS_REG_GPU_KERNEL_ONE(Concat, ConcatV2GpuFwdKernel, short) // NOLINT MS_REG_GPU_KERNEL_ONE(Concat, KernelAttr().AddAllSameAttr(true).AddInputAttr(kNumberTypeUInt8).AddOutputAttr(kNumberTypeUInt8), - ConcatV2GpuFwdKernel, char) + ConcatV2GpuFwdKernel, uchar) MS_REG_GPU_KERNEL_ONE(Concat, KernelAttr().AddAllSameAttr(true).AddInputAttr(kNumberTypeBool).AddOutputAttr(kNumberTypeBool), ConcatV2GpuFwdKernel, bool) diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/gathernd_gpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/gathernd_gpu_kernel.cc index 141e28daf6..995da56d1a 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/gathernd_gpu_kernel.cc +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/gathernd_gpu_kernel.cc @@ -34,7 +34,7 @@ MS_REG_GPU_KERNEL_TWO( GatherNdGpuFwdKernel, short, int) // NOLINT MS_REG_GPU_KERNEL_TWO( GatherNd, KernelAttr().AddInputAttr(kNumberTypeUInt8).AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeUInt8), - GatherNdGpuFwdKernel, char, int) + GatherNdGpuFwdKernel, uchar, int) MS_REG_GPU_KERNEL_TWO( GatherNd, KernelAttr().AddInputAttr(kNumberTypeBool).AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeBool), GatherNdGpuFwdKernel, bool, int) diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/strided_slice_gpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/strided_slice_gpu_kernel.cc index 7be294b591..eb6342e9d2 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/strided_slice_gpu_kernel.cc +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/strided_slice_gpu_kernel.cc @@ -27,7 +27,7 @@ MS_REG_GPU_KERNEL_ONE(StridedSlice, KernelAttr().AddInputAttr(kNumberTypeInt32). MS_REG_GPU_KERNEL_ONE(StridedSlice, KernelAttr().AddInputAttr(kNumberTypeInt16).AddOutputAttr(kNumberTypeInt16), StridedSliceGpuKernel, short) // NOLINT MS_REG_GPU_KERNEL_ONE(StridedSlice, KernelAttr().AddInputAttr(kNumberTypeUInt8).AddOutputAttr(kNumberTypeUInt8), - StridedSliceGpuKernel, char) + StridedSliceGpuKernel, uchar) MS_REG_GPU_KERNEL_ONE(StridedSlice, KernelAttr().AddInputAttr(kNumberTypeBool).AddOutputAttr(kNumberTypeBool), StridedSliceGpuKernel, bool) } // namespace kernel diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/strided_slice_grad_gpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/strided_slice_grad_gpu_kernel.cc index cf28ce0179..58e2c11098 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/strided_slice_grad_gpu_kernel.cc +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/strided_slice_grad_gpu_kernel.cc @@ -27,7 +27,7 @@ MS_REG_GPU_KERNEL_ONE(StridedSliceGrad, KernelAttr().AddInputAttr(kNumberTypeInt MS_REG_GPU_KERNEL_ONE(StridedSliceGrad, KernelAttr().AddInputAttr(kNumberTypeInt16).AddOutputAttr(kNumberTypeInt16), StridedSliceGradGpuKernel, short) // NOLINT MS_REG_GPU_KERNEL_ONE(StridedSliceGrad, KernelAttr().AddInputAttr(kNumberTypeUInt8).AddOutputAttr(kNumberTypeUInt8), - StridedSliceGradGpuKernel, char) + StridedSliceGradGpuKernel, uchar) MS_REG_GPU_KERNEL_ONE(StridedSliceGrad, KernelAttr().AddInputAttr(kNumberTypeBool).AddOutputAttr(kNumberTypeBool), StridedSliceGradGpuKernel, bool) } // namespace kernel diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/check_valid_impl.cu b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/check_valid_impl.cu index d8a660139d..ac5180d971 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/check_valid_impl.cu +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/check_valid_impl.cu @@ -37,7 +37,8 @@ __global__ void CheckValidKernel(const size_t size, const T *box, const T *img_m } template -__global__ void CheckValidKernel(const size_t size, const char *box, const char *img_metas, S *valid) { +__global__ void CheckValidKernel(const size_t size, const unsigned char *box, + const unsigned char *img_metas, S *valid) { for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < size; i += gridDim.x * blockDim.x) { const size_t left_x = i * 4; const size_t left_y = i * 4 + 1; @@ -45,10 +46,8 @@ __global__ void CheckValidKernel(const size_t size, const char *box, const char const size_t right_y = i * 4 + 3; S valid_flag = false; - valid_flag |= !((unsigned int)box[left_x] >= 0); - valid_flag |= !((unsigned int)box[left_y] >= 0); - valid_flag |= !((unsigned int)img_metas[0] * (unsigned int)img_metas[2] - 1 >= (unsigned int)box[right_x]); - valid_flag |= !((unsigned int)img_metas[1] * (unsigned int)img_metas[2] - 1 >= (unsigned int)box[right_y]); + valid_flag |= !(img_metas[0] * img_metas[2] >= box[right_x] + 1); + valid_flag |= !(img_metas[1] * img_metas[2] >= box[right_y] + 1); valid[i] = !valid_flag; } @@ -67,5 +66,5 @@ template void CheckValid(const size_t &size, const half *box, const half *img_me cudaStream_t cuda_stream); template void CheckValid(const size_t &size, const short *box, const short *img_metas, bool *valid, // NOLINT cudaStream_t cuda_stream); -template void CheckValid(const size_t &size, const char *box, const char *img_metas, bool *valid, +template void CheckValid(const size_t &size, const unsigned char *box, const unsigned char *img_metas, bool *valid, cudaStream_t cuda_stream); diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/concatv2_impl.cu b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/concatv2_impl.cu index fe726f9550..032626336e 100755 --- a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/concatv2_impl.cu +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/concatv2_impl.cu @@ -73,7 +73,7 @@ template void ConcatKernel(const size_t size, const int input_num, cudaStream_t cuda_stream); template void ConcatKernel(const size_t size, const int input_num, const int all_size_before_axis, const int all_size_axis, - int* len_axis, char** inputs, char* output, + int* len_axis, unsigned char** inputs, unsigned char* output, cudaStream_t cuda_stream); template void ConcatKernel(const size_t size, const int input_num, const int all_size_before_axis, const int all_size_axis, diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/gathernd.cu b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/gathernd.cu index adfa0adacb..8b9b2ea604 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/gathernd.cu +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/gathernd.cu @@ -64,11 +64,12 @@ template void GatherNd(int *input, int *indices, int *output, const si const size_t &output_dim1, const size_t &indices_dim1, int *batch_indices, int *batch_strides, cudaStream_t stream); template void GatherNd(short *input, int *indices, short *output, const size_t &output_dim0, // NOLINT - const size_t &output_dim1, const size_t &indices_dim1, int *batch_indices, - int *batch_strides, cudaStream_t stream); -template void GatherNd(char *input, int *indices, char *output, const size_t &output_dim0, - const size_t &output_dim1, const size_t &indices_dim1, int *batch_indices, - int *batch_strides, cudaStream_t stream); + const size_t &output_dim1, const size_t &indices_dim1, int *batch_indices, + int *batch_strides, cudaStream_t stream); +template void GatherNd(unsigned char *input, int *indices, unsigned char *output, + const size_t &output_dim0, const size_t &output_dim1, + const size_t &indices_dim1, int *batch_indices, int *batch_strides, + cudaStream_t stream); template void GatherNd(bool *input, int *indices, bool *output, const size_t &output_dim0, const size_t &output_dim1, const size_t &indices_dim1, int *batch_indices, int *batch_strides, cudaStream_t stream); diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/slice_impl.cu b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/slice_impl.cu index 140298d44c..3b68941080 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/slice_impl.cu +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/slice_impl.cu @@ -180,8 +180,7 @@ template void CalSliceGrad(const size_t input_size, const int *dy, const st const std::vector begin, const std::vector size, int *output, cudaStream_t cuda_stream); -// NOLINTNEXTLINE -template void FillDeviceArray(const size_t input_size, short *addr, const float value, cudaStream_t cuda_stream); +template void FillDeviceArray(const size_t input_size, short *addr, const float value, cudaStream_t cuda_stream); // NOLINT template void Slice4DKernel(const int s1, const int s2, const int s3, const int s4, const int l1, const int l2, const int l3, const int l4, const int d1, const int d2, const int d3, const int d4, const short *input, short *output, cudaStream_t stream); // NOLINT @@ -189,13 +188,14 @@ template void CalSliceGrad(const size_t input_size, const short *dy, cons const std::vector begin, const std::vector size, short *output, // NOLINT cudaStream_t cuda_stream); -template void FillDeviceArray(const size_t input_size, char *addr, const float value, cudaStream_t cuda_stream); +template void FillDeviceArray(const size_t input_size, unsigned char *addr, const float value, + cudaStream_t cuda_stream); template void Slice4DKernel(const int s1, const int s2, const int s3, const int s4, const int l1, const int l2, const int l3, const int l4, const int d1, const int d2, const int d3, const int d4, - const char *input, char *output, cudaStream_t stream); -template void CalSliceGrad(const size_t input_size, const char *dy, const std::vector in_shape, - const std::vector begin, const std::vector size, char *output, - cudaStream_t cuda_stream); + const unsigned char *input, unsigned char *output, cudaStream_t stream); +template void CalSliceGrad(const size_t input_size, const unsigned char *dy, + const std::vector in_shape, const std::vector begin, + const std::vector size, unsigned char *output, cudaStream_t cuda_stream); template void FillDeviceArray(const size_t input_size, bool *addr, const float value, cudaStream_t cuda_stream); template void Slice4DKernel(const int s1, const int s2, const int s3, const int s4, const int l1, const int l2, @@ -215,12 +215,11 @@ template void StridedSlice(const std::vector &input_shape, const std::ve const std::vector &strides, const std::vector &output_shape, const int *input, int *output, cudaStream_t cuda_stream); template void StridedSlice(const std::vector &input_shape, const std::vector &begin, - // NOLINTNEXTLINE - const std::vector &strides, const std::vector &output_shape, const short *input, - short *output, cudaStream_t cuda_stream); // NOLINT + const std::vector &strides, const std::vector &output_shape, + const short *input, short *output, cudaStream_t cuda_stream); // NOLINT template void StridedSlice(const std::vector &input_shape, const std::vector &begin, - const std::vector &strides, const std::vector &output_shape, const char *input, - char *output, cudaStream_t cuda_stream); + const std::vector &strides, const std::vector &output_shape, + const unsigned char *input, unsigned char *output, cudaStream_t cuda_stream); template void StridedSlice(const std::vector &input_shape, const std::vector &begin, const std::vector &strides, const std::vector &output_shape, const bool *input, bool *output, cudaStream_t cuda_stream); @@ -235,12 +234,11 @@ template void StridedSliceGrad(const std::vector &dy_shape, const std::vect const std::vector &strides, const std::vector &dx_shape, const int *dy, int *dx, cudaStream_t cuda_stream); template void StridedSliceGrad(const std::vector &dy_shape, const std::vector &begin, - // NOLINTNEXTLINE - const std::vector &strides, const std::vector &dx_shape, const short *dy, + const std::vector &strides, const std::vector &dx_shape, const short *dy, // NOLINT short *dx, cudaStream_t cuda_stream); // NOLINT template void StridedSliceGrad(const std::vector &dy_shape, const std::vector &begin, - const std::vector &strides, const std::vector &dx_shape, const char *dy, - char *dx, cudaStream_t cuda_stream); + const std::vector &strides, const std::vector &dx_shape, + const unsigned char *dy, unsigned char *dx, cudaStream_t cuda_stream); template void StridedSliceGrad(const std::vector &dy_shape, const std::vector &begin, const std::vector &strides, const std::vector &dx_shape, const bool *dy, bool *dx, cudaStream_t cuda_stream); diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/gpu_kernel_factory.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/gpu_kernel_factory.h index f6ea0f0efb..967f143aa2 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/gpu/gpu_kernel_factory.h +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/gpu_kernel_factory.h @@ -64,6 +64,14 @@ class GpuKernelRegister { } }; +// This is necessary for gpu kernels to support uint8 data type. In cuda, an unsigned, +// 8 bit integral type is represented by an unsigned char, but the MS_REG_GPU_KERNEL +// macros defined below will create compilation errors when datatype T contains a space, +// because the variable created by the macro will also contain a space. So, we solve this +// problem by writing uchar when calling these macros, and expanding uchar after the +// variable has been created. +#define uchar unsigned char + #define MS_REG_GPU_KERNEL(OPNAME, OPCLASS) \ static_assert(std::is_base_of::value, " must be base of GpuKernel"); \ static const GpuKernelRegister g_##OPNAME##_gpu_kernel_reg(#OPNAME, KernelAttr(), []() { return new OPCLASS(); }); @@ -88,7 +96,6 @@ class GpuKernelRegister { static_assert(std::is_base_of>::value, " must be base of GpuKernel"); \ static const GpuKernelRegister g_##OPNAME##_##T##_##S##_gpu_kernel_reg(#OPNAME, ATTR, \ []() { return new OPCLASS(); }); - // register of mixed accuracy kernels which use template and maintain three typename #define MS_REG_GPU_KERNEL_THREE(OPNAME, ATTR, OPCLASS, T, S, G) \ static_assert(std::is_base_of>::value, " must be base of GpuKernel"); \ diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/other/check_valid_gpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/other/check_valid_gpu_kernel.cc index a9631307dd..9f6bd669ad 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/gpu/other/check_valid_gpu_kernel.cc +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/other/check_valid_gpu_kernel.cc @@ -31,6 +31,6 @@ MS_REG_GPU_KERNEL_TWO( CheckValidGpuKernel, short, bool) // NOLINT MS_REG_GPU_KERNEL_TWO( CheckValid, KernelAttr().AddInputAttr(kNumberTypeUInt8).AddInputAttr(kNumberTypeUInt8).AddOutputAttr(kNumberTypeBool), - CheckValidGpuKernel, char, bool) + CheckValidGpuKernel, uchar, bool) } // namespace kernel } // namespace mindspore