diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/gatherv2_gpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/gatherv2_gpu_kernel.cc index 7ff7f1d13b..1131d8847a 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/gatherv2_gpu_kernel.cc +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/gatherv2_gpu_kernel.cc @@ -18,69 +18,54 @@ namespace mindspore { namespace kernel { - MS_REG_GPU_KERNEL_TWO( Gather, KernelAttr().AddInputAttr(kNumberTypeFloat64).AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeFloat64), GatherV2GpuFwdKernel, double, int) - MS_REG_GPU_KERNEL_TWO( Gather, KernelAttr().AddInputAttr(kNumberTypeFloat64).AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeFloat64), GatherV2GpuFwdKernel, double, int64_t) - MS_REG_GPU_KERNEL_TWO( Gather, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeFloat32), GatherV2GpuFwdKernel, float, int) - MS_REG_GPU_KERNEL_TWO( Gather, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeFloat32), GatherV2GpuFwdKernel, float, int64_t) - MS_REG_GPU_KERNEL_TWO( Gather, KernelAttr().AddInputAttr(kNumberTypeFloat16).AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeFloat16), GatherV2GpuFwdKernel, half, int) - MS_REG_GPU_KERNEL_TWO( Gather, KernelAttr().AddInputAttr(kNumberTypeFloat16).AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeFloat16), GatherV2GpuFwdKernel, half, int64_t) - MS_REG_GPU_KERNEL_TWO( Gather, KernelAttr().AddInputAttr(kNumberTypeInt32).AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeInt32), GatherV2GpuFwdKernel, int, int) - MS_REG_GPU_KERNEL_TWO( Gather, KernelAttr().AddInputAttr(kNumberTypeInt32).AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeInt32), GatherV2GpuFwdKernel, int, int64_t) - MS_REG_GPU_KERNEL_TWO( Gather, KernelAttr().AddInputAttr(kNumberTypeInt16).AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeInt16), GatherV2GpuFwdKernel, int16_t, int) - MS_REG_GPU_KERNEL_TWO( Gather, KernelAttr().AddInputAttr(kNumberTypeInt16).AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeInt16), GatherV2GpuFwdKernel, int16_t, int64_t) - MS_REG_GPU_KERNEL_TWO( Gather, KernelAttr().AddInputAttr(kNumberTypeInt8).AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeInt8), GatherV2GpuFwdKernel, int8_t, int) - MS_REG_GPU_KERNEL_TWO( Gather, KernelAttr().AddInputAttr(kNumberTypeInt8).AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeInt8), GatherV2GpuFwdKernel, int8_t, int64_t) - MS_REG_GPU_KERNEL_TWO( Gather, KernelAttr().AddInputAttr(kNumberTypeUInt8).AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeUInt8), GatherV2GpuFwdKernel, uint8_t, int) - MS_REG_GPU_KERNEL_TWO( Gather, KernelAttr().AddInputAttr(kNumberTypeUInt8).AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeUInt8), GatherV2GpuFwdKernel, uint8_t, int64_t) - MS_REG_GPU_KERNEL_TWO(Gather, KernelAttr() .AddInputAttr(kNumberTypeFloat32) @@ -88,7 +73,6 @@ MS_REG_GPU_KERNEL_TWO(Gather, .AddInputAttr(kNumberTypeInt64) .AddOutputAttr(kNumberTypeFloat32), GatherV2GpuFwdKernel, float, int) - MS_REG_GPU_KERNEL_TWO(Gather, KernelAttr() .AddInputAttr(kNumberTypeFloat32) @@ -96,7 +80,6 @@ MS_REG_GPU_KERNEL_TWO(Gather, .AddInputAttr(kNumberTypeInt64) .AddOutputAttr(kNumberTypeFloat32), GatherV2GpuFwdKernel, float, int64_t) - MS_REG_GPU_KERNEL_TWO(Gather, KernelAttr() .AddInputAttr(kNumberTypeFloat16) @@ -104,7 +87,6 @@ MS_REG_GPU_KERNEL_TWO(Gather, .AddInputAttr(kNumberTypeInt64) .AddOutputAttr(kNumberTypeFloat16), GatherV2GpuFwdKernel, half, int) - MS_REG_GPU_KERNEL_TWO(Gather, KernelAttr() .AddInputAttr(kNumberTypeFloat16) @@ -112,17 +94,14 @@ MS_REG_GPU_KERNEL_TWO(Gather, .AddInputAttr(kNumberTypeInt64) .AddOutputAttr(kNumberTypeFloat16), GatherV2GpuFwdKernel, half, int64_t) - MS_REG_GPU_KERNEL_TWO( SparseGatherV2, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeFloat32), GatherV2GpuFwdKernel, float, int) - MS_REG_GPU_KERNEL_TWO( SparseGatherV2, KernelAttr().AddInputAttr(kNumberTypeFloat16).AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeFloat16), GatherV2GpuFwdKernel, half, int) - MS_REG_GPU_KERNEL_TWO(SparseGatherV2, KernelAttr() .AddInputAttr(kNumberTypeFloat32) @@ -130,7 +109,6 @@ MS_REG_GPU_KERNEL_TWO(SparseGatherV2, .AddInputAttr(kNumberTypeInt64) .AddOutputAttr(kNumberTypeFloat32), GatherV2GpuFwdKernel, float, int) - MS_REG_GPU_KERNEL_TWO(SparseGatherV2, KernelAttr() .AddInputAttr(kNumberTypeFloat16) diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/gpu_kernel.h index 2d0dba3a70..e195003035 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/gpu/gpu_kernel.h +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/gpu_kernel.h @@ -1,5 +1,5 @@ /** - * Copyright 2019 Huawei Technologies Co., Ltd + * Copyright 2019-2021 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -207,14 +207,8 @@ class GpuKernel : public KernelMod { MS_EXCEPTION(ValueError) << "cudnnSetTensorNdDescriptor don't support" << shape.size() << "D."; } const int nbDims = shape.size(); - int *dim = new (std::nothrow) int[nbDims]; - if (dim == nullptr) { - MS_LOG(EXCEPTION) << "malloc dim failed."; - } - int *stride = new (std::nothrow) int[nbDims]; - if (stride == nullptr) { - MS_LOG(EXCEPTION) << "malloc stride failed."; - } + std::unique_ptr dim = std::make_unique(nbDims); + std::unique_ptr stride = std::make_unique(nbDims); for (int i = 0; i < nbDims; i++) { dim[i] = SizeToInt(shape[i]); @@ -225,13 +219,9 @@ class GpuKernel : public KernelMod { stride[i] = stride[i + 1] * SizeToInt(shape[i + 1]); } - CHECK_CUDNN_RET_WITH_EXCEPT(node, cudnnSetTensorNdDescriptor(descriptor, data_type, nbDims, dim, stride), + CHECK_CUDNN_RET_WITH_EXCEPT(node, + cudnnSetTensorNdDescriptor(descriptor, data_type, nbDims, dim.get(), stride.get()), "cudnnSetTensorNdDescriptor failed"); - - delete[] dim; - dim = nullptr; - delete[] stride; - stride = nullptr; } // choose the suitable datatype for cudnn/cublas diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/math/squared_difference_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/math/squared_difference_kernel.cc index 2395d2e1c7..4601492f6d 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/gpu/math/squared_difference_kernel.cc +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/math/squared_difference_kernel.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2020-2021 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -23,18 +23,15 @@ MS_REG_GPU_KERNEL_ONE( SquaredDifference, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), SquaredDifferenceOpGpuKernel, float) - // fp16 MS_REG_GPU_KERNEL_ONE( SquaredDifference, KernelAttr().AddInputAttr(kNumberTypeFloat16).AddInputAttr(kNumberTypeFloat16).AddOutputAttr(kNumberTypeFloat16), SquaredDifferenceOpGpuKernel, half) - // int32 MS_REG_GPU_KERNEL_ONE( SquaredDifference, KernelAttr().AddInputAttr(kNumberTypeInt32).AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeInt32), SquaredDifferenceOpGpuKernel, int) - } // namespace kernel } // namespace mindspore diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/pooling_grad_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/pooling_grad_gpu_kernel.h index 9d8f018251..ea34c22813 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/pooling_grad_gpu_kernel.h +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/pooling_grad_gpu_kernel.h @@ -1,5 +1,5 @@ /** - * Copyright 2019 Huawei Technologies Co., Ltd + * Copyright 2019-2021 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -79,7 +79,7 @@ class PoolingGradGpuKernel : public GpuKernel { } bool InitShape(const CNodePtr &kernel_node, int *dimA, int *strideAin, int *dimAy, int *strideAiny, int *dimAdy, - int *strideAdy, int *dimAout, int *strideAout) { + int *strideAdy, int *dimAout, int *strideAout, int nbDims) { auto input_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0); auto input_mask = AnfAlgo::GetInputDeviceShape(kernel_node, 1); auto dout_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 2); @@ -98,14 +98,14 @@ class PoolingGradGpuKernel : public GpuKernel { } CHECK_TENSOR_SIZE(input_shape); SetNCHW(input_shape, &n_, &c_, &old_height_, &old_width_, data_format); - SetDimA(input_shape, dimA, 4, data_format); - SetStrideA(input_shape, strideAin, 4, data_format); - SetDimA(input_mask, dimAy, 4, data_format); - SetStrideA(input_mask, strideAiny, 4, data_format); - SetDimA(dout_shape, dimAdy, 4, data_format); - SetStrideA(dout_shape, strideAdy, 4, data_format); - SetDimA(output_shape, dimAout, 4, data_format); - SetStrideA(output_shape, strideAout, 4, data_format); + SetDimA(input_shape, dimA, nbDims, data_format); + SetStrideA(input_shape, strideAin, nbDims, data_format); + SetDimA(input_mask, dimAy, nbDims, data_format); + SetStrideA(input_mask, strideAiny, nbDims, data_format); + SetDimA(dout_shape, dimAdy, nbDims, data_format); + SetStrideA(dout_shape, strideAdy, nbDims, data_format); + SetDimA(output_shape, dimAout, nbDims, data_format); + SetStrideA(output_shape, strideAout, nbDims, data_format); return true; } @@ -124,7 +124,7 @@ class PoolingGradGpuKernel : public GpuKernel { int strideAdy[4]; int dimAout[4]; int strideAout[4]; - if (!InitShape(kernel_node, dimA, strideAin, dimAy, strideAiny, dimAdy, strideAdy, dimAout, strideAout)) { + if (!InitShape(kernel_node, dimA, strideAin, dimAy, strideAiny, dimAdy, strideAdy, dimAout, strideAout, nbDims)) { return true; } CHECK_CUDNN_RET_WITH_EXCEPT(kernel_node_,