| @@ -18,69 +18,54 @@ | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| MS_REG_GPU_KERNEL_TWO( | |||
| Gather, | |||
| KernelAttr().AddInputAttr(kNumberTypeFloat64).AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeFloat64), | |||
| GatherV2GpuFwdKernel, double, int) | |||
| MS_REG_GPU_KERNEL_TWO( | |||
| Gather, | |||
| KernelAttr().AddInputAttr(kNumberTypeFloat64).AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeFloat64), | |||
| GatherV2GpuFwdKernel, double, int64_t) | |||
| MS_REG_GPU_KERNEL_TWO( | |||
| Gather, | |||
| KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeFloat32), | |||
| GatherV2GpuFwdKernel, float, int) | |||
| MS_REG_GPU_KERNEL_TWO( | |||
| Gather, | |||
| KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeFloat32), | |||
| GatherV2GpuFwdKernel, float, int64_t) | |||
| MS_REG_GPU_KERNEL_TWO( | |||
| Gather, | |||
| KernelAttr().AddInputAttr(kNumberTypeFloat16).AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeFloat16), | |||
| GatherV2GpuFwdKernel, half, int) | |||
| MS_REG_GPU_KERNEL_TWO( | |||
| Gather, | |||
| KernelAttr().AddInputAttr(kNumberTypeFloat16).AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeFloat16), | |||
| GatherV2GpuFwdKernel, half, int64_t) | |||
| MS_REG_GPU_KERNEL_TWO( | |||
| Gather, KernelAttr().AddInputAttr(kNumberTypeInt32).AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeInt32), | |||
| GatherV2GpuFwdKernel, int, int) | |||
| MS_REG_GPU_KERNEL_TWO( | |||
| Gather, KernelAttr().AddInputAttr(kNumberTypeInt32).AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeInt32), | |||
| GatherV2GpuFwdKernel, int, int64_t) | |||
| MS_REG_GPU_KERNEL_TWO( | |||
| Gather, KernelAttr().AddInputAttr(kNumberTypeInt16).AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeInt16), | |||
| GatherV2GpuFwdKernel, int16_t, int) | |||
| MS_REG_GPU_KERNEL_TWO( | |||
| Gather, KernelAttr().AddInputAttr(kNumberTypeInt16).AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeInt16), | |||
| GatherV2GpuFwdKernel, int16_t, int64_t) | |||
| MS_REG_GPU_KERNEL_TWO( | |||
| Gather, KernelAttr().AddInputAttr(kNumberTypeInt8).AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeInt8), | |||
| GatherV2GpuFwdKernel, int8_t, int) | |||
| MS_REG_GPU_KERNEL_TWO( | |||
| Gather, KernelAttr().AddInputAttr(kNumberTypeInt8).AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeInt8), | |||
| GatherV2GpuFwdKernel, int8_t, int64_t) | |||
| MS_REG_GPU_KERNEL_TWO( | |||
| Gather, KernelAttr().AddInputAttr(kNumberTypeUInt8).AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeUInt8), | |||
| GatherV2GpuFwdKernel, uint8_t, int) | |||
| MS_REG_GPU_KERNEL_TWO( | |||
| Gather, KernelAttr().AddInputAttr(kNumberTypeUInt8).AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeUInt8), | |||
| GatherV2GpuFwdKernel, uint8_t, int64_t) | |||
| MS_REG_GPU_KERNEL_TWO(Gather, | |||
| KernelAttr() | |||
| .AddInputAttr(kNumberTypeFloat32) | |||
| @@ -88,7 +73,6 @@ MS_REG_GPU_KERNEL_TWO(Gather, | |||
| .AddInputAttr(kNumberTypeInt64) | |||
| .AddOutputAttr(kNumberTypeFloat32), | |||
| GatherV2GpuFwdKernel, float, int) | |||
| MS_REG_GPU_KERNEL_TWO(Gather, | |||
| KernelAttr() | |||
| .AddInputAttr(kNumberTypeFloat32) | |||
| @@ -96,7 +80,6 @@ MS_REG_GPU_KERNEL_TWO(Gather, | |||
| .AddInputAttr(kNumberTypeInt64) | |||
| .AddOutputAttr(kNumberTypeFloat32), | |||
| GatherV2GpuFwdKernel, float, int64_t) | |||
| MS_REG_GPU_KERNEL_TWO(Gather, | |||
| KernelAttr() | |||
| .AddInputAttr(kNumberTypeFloat16) | |||
| @@ -104,7 +87,6 @@ MS_REG_GPU_KERNEL_TWO(Gather, | |||
| .AddInputAttr(kNumberTypeInt64) | |||
| .AddOutputAttr(kNumberTypeFloat16), | |||
| GatherV2GpuFwdKernel, half, int) | |||
| MS_REG_GPU_KERNEL_TWO(Gather, | |||
| KernelAttr() | |||
| .AddInputAttr(kNumberTypeFloat16) | |||
| @@ -112,17 +94,14 @@ MS_REG_GPU_KERNEL_TWO(Gather, | |||
| .AddInputAttr(kNumberTypeInt64) | |||
| .AddOutputAttr(kNumberTypeFloat16), | |||
| GatherV2GpuFwdKernel, half, int64_t) | |||
| MS_REG_GPU_KERNEL_TWO( | |||
| SparseGatherV2, | |||
| KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeFloat32), | |||
| GatherV2GpuFwdKernel, float, int) | |||
| MS_REG_GPU_KERNEL_TWO( | |||
| SparseGatherV2, | |||
| KernelAttr().AddInputAttr(kNumberTypeFloat16).AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeFloat16), | |||
| GatherV2GpuFwdKernel, half, int) | |||
| MS_REG_GPU_KERNEL_TWO(SparseGatherV2, | |||
| KernelAttr() | |||
| .AddInputAttr(kNumberTypeFloat32) | |||
| @@ -130,7 +109,6 @@ MS_REG_GPU_KERNEL_TWO(SparseGatherV2, | |||
| .AddInputAttr(kNumberTypeInt64) | |||
| .AddOutputAttr(kNumberTypeFloat32), | |||
| GatherV2GpuFwdKernel, float, int) | |||
| MS_REG_GPU_KERNEL_TWO(SparseGatherV2, | |||
| KernelAttr() | |||
| .AddInputAttr(kNumberTypeFloat16) | |||
| @@ -1,5 +1,5 @@ | |||
| /** | |||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||
| * Copyright 2019-2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| @@ -207,14 +207,8 @@ class GpuKernel : public KernelMod { | |||
| MS_EXCEPTION(ValueError) << "cudnnSetTensorNdDescriptor don't support" << shape.size() << "D."; | |||
| } | |||
| const int nbDims = shape.size(); | |||
| int *dim = new (std::nothrow) int[nbDims]; | |||
| if (dim == nullptr) { | |||
| MS_LOG(EXCEPTION) << "malloc dim failed."; | |||
| } | |||
| int *stride = new (std::nothrow) int[nbDims]; | |||
| if (stride == nullptr) { | |||
| MS_LOG(EXCEPTION) << "malloc stride failed."; | |||
| } | |||
| std::unique_ptr<int[]> dim = std::make_unique<int[]>(nbDims); | |||
| std::unique_ptr<int[]> stride = std::make_unique<int[]>(nbDims); | |||
| for (int i = 0; i < nbDims; i++) { | |||
| dim[i] = SizeToInt(shape[i]); | |||
| @@ -225,13 +219,9 @@ class GpuKernel : public KernelMod { | |||
| stride[i] = stride[i + 1] * SizeToInt(shape[i + 1]); | |||
| } | |||
| CHECK_CUDNN_RET_WITH_EXCEPT(node, cudnnSetTensorNdDescriptor(descriptor, data_type, nbDims, dim, stride), | |||
| CHECK_CUDNN_RET_WITH_EXCEPT(node, | |||
| cudnnSetTensorNdDescriptor(descriptor, data_type, nbDims, dim.get(), stride.get()), | |||
| "cudnnSetTensorNdDescriptor failed"); | |||
| delete[] dim; | |||
| dim = nullptr; | |||
| delete[] stride; | |||
| stride = nullptr; | |||
| } | |||
| // choose the suitable datatype for cudnn/cublas | |||
| @@ -1,5 +1,5 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * Copyright 2020-2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| @@ -23,18 +23,15 @@ MS_REG_GPU_KERNEL_ONE( | |||
| SquaredDifference, | |||
| KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), | |||
| SquaredDifferenceOpGpuKernel, float) | |||
| // fp16 | |||
| MS_REG_GPU_KERNEL_ONE( | |||
| SquaredDifference, | |||
| KernelAttr().AddInputAttr(kNumberTypeFloat16).AddInputAttr(kNumberTypeFloat16).AddOutputAttr(kNumberTypeFloat16), | |||
| SquaredDifferenceOpGpuKernel, half) | |||
| // int32 | |||
| MS_REG_GPU_KERNEL_ONE( | |||
| SquaredDifference, | |||
| KernelAttr().AddInputAttr(kNumberTypeInt32).AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeInt32), | |||
| SquaredDifferenceOpGpuKernel, int) | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| @@ -1,5 +1,5 @@ | |||
| /** | |||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||
| * Copyright 2019-2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| @@ -79,7 +79,7 @@ class PoolingGradGpuKernel : public GpuKernel { | |||
| } | |||
| bool InitShape(const CNodePtr &kernel_node, int *dimA, int *strideAin, int *dimAy, int *strideAiny, int *dimAdy, | |||
| int *strideAdy, int *dimAout, int *strideAout) { | |||
| int *strideAdy, int *dimAout, int *strideAout, int nbDims) { | |||
| auto input_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0); | |||
| auto input_mask = AnfAlgo::GetInputDeviceShape(kernel_node, 1); | |||
| auto dout_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 2); | |||
| @@ -98,14 +98,14 @@ class PoolingGradGpuKernel : public GpuKernel { | |||
| } | |||
| CHECK_TENSOR_SIZE(input_shape); | |||
| SetNCHW(input_shape, &n_, &c_, &old_height_, &old_width_, data_format); | |||
| SetDimA(input_shape, dimA, 4, data_format); | |||
| SetStrideA(input_shape, strideAin, 4, data_format); | |||
| SetDimA(input_mask, dimAy, 4, data_format); | |||
| SetStrideA(input_mask, strideAiny, 4, data_format); | |||
| SetDimA(dout_shape, dimAdy, 4, data_format); | |||
| SetStrideA(dout_shape, strideAdy, 4, data_format); | |||
| SetDimA(output_shape, dimAout, 4, data_format); | |||
| SetStrideA(output_shape, strideAout, 4, data_format); | |||
| SetDimA(input_shape, dimA, nbDims, data_format); | |||
| SetStrideA(input_shape, strideAin, nbDims, data_format); | |||
| SetDimA(input_mask, dimAy, nbDims, data_format); | |||
| SetStrideA(input_mask, strideAiny, nbDims, data_format); | |||
| SetDimA(dout_shape, dimAdy, nbDims, data_format); | |||
| SetStrideA(dout_shape, strideAdy, nbDims, data_format); | |||
| SetDimA(output_shape, dimAout, nbDims, data_format); | |||
| SetStrideA(output_shape, strideAout, nbDims, data_format); | |||
| return true; | |||
| } | |||
| @@ -124,7 +124,7 @@ class PoolingGradGpuKernel : public GpuKernel { | |||
| int strideAdy[4]; | |||
| int dimAout[4]; | |||
| int strideAout[4]; | |||
| if (!InitShape(kernel_node, dimA, strideAin, dimAy, strideAiny, dimAdy, strideAdy, dimAout, strideAout)) { | |||
| if (!InitShape(kernel_node, dimA, strideAin, dimAy, strideAiny, dimAdy, strideAdy, dimAout, strideAout, nbDims)) { | |||
| return true; | |||
| } | |||
| CHECK_CUDNN_RET_WITH_EXCEPT(kernel_node_, | |||