From d1fcfaf0f23e19dc305362358d7f568ab735be2a Mon Sep 17 00:00:00 2001 From: danishnxt Date: Wed, 16 Dec 2020 15:19:13 -0500 Subject: [PATCH] First commit - scatter_nd_int64_update lint fix - no lint required for previous code fix int64 templates removed extra files --- .../gpu/arrays/scatter_nd_gpu_kernel.cc | 17 +++++++++++ .../gpu/cuda_impl/scatter_nd.cu | 30 ++++++++++++++++--- mindspore/ops/operations/array_ops.py | 2 +- tests/st/ops/gpu/test_scatter_nd.py | 14 +++++++++ 4 files changed, 58 insertions(+), 5 deletions(-) diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/scatter_nd_gpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/scatter_nd_gpu_kernel.cc index b2b3ee031b..d0ed82890c 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/scatter_nd_gpu_kernel.cc +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/scatter_nd_gpu_kernel.cc @@ -22,18 +22,35 @@ MS_REG_GPU_KERNEL_TWO( ScatterNd, KernelAttr().AddInputAttr(kNumberTypeInt32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), ScatterNdGpuFwdKernel, float, int) +MS_REG_GPU_KERNEL_TWO( + ScatterNd, + KernelAttr().AddInputAttr(kNumberTypeInt64).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), + ScatterNdGpuFwdKernel, float, int64_t) MS_REG_GPU_KERNEL_TWO( ScatterNd, KernelAttr().AddInputAttr(kNumberTypeInt32).AddInputAttr(kNumberTypeFloat16).AddOutputAttr(kNumberTypeFloat16), ScatterNdGpuFwdKernel, half, int) +MS_REG_GPU_KERNEL_TWO( + ScatterNd, + KernelAttr().AddInputAttr(kNumberTypeInt64).AddInputAttr(kNumberTypeFloat16).AddOutputAttr(kNumberTypeFloat16), + ScatterNdGpuFwdKernel, half, int64_t) MS_REG_GPU_KERNEL_TWO( ScatterNd, KernelAttr().AddInputAttr(kNumberTypeInt32).AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeInt32), ScatterNdGpuFwdKernel, int, int) +MS_REG_GPU_KERNEL_TWO( + ScatterNd, KernelAttr().AddInputAttr(kNumberTypeInt64).AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeInt32), + ScatterNdGpuFwdKernel, int, int64_t) MS_REG_GPU_KERNEL_TWO( ScatterNd, KernelAttr().AddInputAttr(kNumberTypeInt32).AddInputAttr(kNumberTypeInt16).AddOutputAttr(kNumberTypeInt16), ScatterNdGpuFwdKernel, short, int) // NOLINT +MS_REG_GPU_KERNEL_TWO( + ScatterNd, KernelAttr().AddInputAttr(kNumberTypeInt64).AddInputAttr(kNumberTypeInt16).AddOutputAttr(kNumberTypeInt16), + ScatterNdGpuFwdKernel, short, int64_t) // NOLINT MS_REG_GPU_KERNEL_TWO( ScatterNd, KernelAttr().AddInputAttr(kNumberTypeInt32).AddInputAttr(kNumberTypeUInt8).AddOutputAttr(kNumberTypeUInt8), ScatterNdGpuFwdKernel, uchar, int) +MS_REG_GPU_KERNEL_TWO( + ScatterNd, KernelAttr().AddInputAttr(kNumberTypeInt64).AddInputAttr(kNumberTypeUInt8).AddOutputAttr(kNumberTypeUInt8), + ScatterNdGpuFwdKernel, uchar, int64_t) } // namespace kernel } // namespace mindspore diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/scatter_nd.cu b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/scatter_nd.cu index c34cd99084..897f48beb8 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/scatter_nd.cu +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/scatter_nd.cu @@ -23,9 +23,9 @@ __global__ void ScatterNdKernel(S *indices, T *update, T *output, const size_t b const size_t output_size, const size_t indices_dim_0, const size_t indices_dim_1, S *indices_stride, S *work_shape) { int i, j; - for (int read_index = blockIdx.x * blockDim.x + threadIdx.x; read_index < input_size; + for (size_t read_index = blockIdx.x * blockDim.x + threadIdx.x; read_index < input_size; read_index += blockDim.x * gridDim.x) { - int write_index = 0; + size_t write_index = 0; bool out_bound = false; i = read_index / block_size; @@ -51,8 +51,8 @@ void ScatterNd(S *indices, T *update, T *output, const size_t &block_size, const const size_t &output_size, const size_t &indices_dim_0, const size_t &indices_dim_1, S *indices_stride, S *work_shape, cudaStream_t stream) { ScatterNdKernel<<>>(indices, update, output, block_size, input_size, - output_size, indices_dim_0, indices_dim_1, - indices_stride, work_shape); + output_size, indices_dim_0, indices_dim_1, + indices_stride, work_shape); return; } @@ -60,21 +60,43 @@ template void ScatterNd(int *indices, float *update, float *output, const size_t &input_size, const size_t &output_size, const size_t &indices_dim_0, const size_t &indices_dim_1, int *indices_stride, int *work_shape, cudaStream_t stream); +template void ScatterNd(int64_t *indices, float *update, float *output, const size_t &block_size, + const size_t &input_size, const size_t &output_size, + const size_t &indices_dim_0, const size_t &indices_dim_1, + int64_t *indices_stride, int64_t *work_shape, cudaStream_t stream); template void ScatterNd(int *indices, half *update, half *output, const size_t &block_size, const size_t &input_size, const size_t &output_size, const size_t &indices_dim_0, const size_t &indices_dim_1, int *indices_stride, int *work_shape, cudaStream_t stream); +template void ScatterNd(int64_t *indices, half *update, half *output, const size_t &block_size, + const size_t &input_size, const size_t &output_size, const size_t &indices_dim_0, + const size_t &indices_dim_1, int64_t *indices_stride, int64_t *work_shape, + cudaStream_t stream); template void ScatterNd(int *indices, int *update, int *output, const size_t &block_size, const size_t &input_size, const size_t &output_size, const size_t &indices_dim_0, const size_t &indices_dim_1, int *indices_stride, int *work_shape, cudaStream_t stream); +template void ScatterNd(int64_t *indices, int *update, int *output, const size_t &block_size, + const size_t &input_size, const size_t &output_size, const size_t &indices_dim_0, + const size_t &indices_dim_1, int64_t *indices_stride, int64_t *work_shape, + cudaStream_t stream); // NOLINTNEXTLINE template void ScatterNd(int *indices, short *update, short *output, const size_t &block_size, const size_t &input_size, const size_t &output_size, const size_t &indices_dim_0, const size_t &indices_dim_1, int *indices_stride, int *work_shape, cudaStream_t stream); +// NOLINTNEXTLINE +template void ScatterNd(int64_t *indices, short *update, short *output, const size_t &block_size, + const size_t &input_size, const size_t &output_size, + const size_t &indices_dim_0, const size_t &indices_dim_1, + int64_t *indices_stride, int64_t *work_shape, cudaStream_t stream); template void ScatterNd(int *indices, unsigned char *update, unsigned char *output, const size_t &block_size, const size_t &input_size, const size_t &output_size, const size_t &indices_dim_0, const size_t &indices_dim_1, int *indices_stride, int *work_shape, cudaStream_t stream); +template void ScatterNd(int64_t *indices, unsigned char *update, unsigned char *output, + const size_t &block_size, const size_t &input_size, + const size_t &output_size, const size_t &indices_dim_0, + const size_t &indices_dim_1, int64_t *indices_stride, + int64_t *work_shape, cudaStream_t stream); diff --git a/mindspore/ops/operations/array_ops.py b/mindspore/ops/operations/array_ops.py index 02f6ff0d11..86fe6d9c23 100644 --- a/mindspore/ops/operations/array_ops.py +++ b/mindspore/ops/operations/array_ops.py @@ -2984,7 +2984,7 @@ class ScatterNd(PrimitiveWithInfer): def __infer__(self, indices, update, shape): shp = shape['value'] validator.check_subclass("update_dtype", update['dtype'], mstype.tensor, self.name) - validator.check_tensor_dtype_valid("indices", indices['dtype'], [mstype.int32], self.name) + validator.check_tensor_dtype_valid("indices", indices['dtype'], [mstype.int32, mstype.int64], self.name) validator.check_value_type("shape", shp, [tuple], self.name) for i, x in enumerate(shp): validator.check_positive_int(x, f'shape[{i}]', self.name) diff --git a/tests/st/ops/gpu/test_scatter_nd.py b/tests/st/ops/gpu/test_scatter_nd.py index 061fb697ae..dfe3d18423 100644 --- a/tests/st/ops/gpu/test_scatter_nd.py +++ b/tests/st/ops/gpu/test_scatter_nd.py @@ -48,6 +48,13 @@ def scatternd_positive(nptype): [0., 1.1]]).astype(nptype) scatternd_net(arr_indices, arr_update, shape, expect) + arr_indices = np.array([[0, 1], [1, 1], [0, 1], [0, 1], [0, 1]]).astype(np.int64) + arr_update = np.array([3.2, 1.1, 5.3, -2.2, -1.0]).astype(nptype) + shape = (2, 2) + expect = np.array([[0., 5.3], + [0., 1.1]]).astype(nptype) + scatternd_net(arr_indices, arr_update, shape, expect) + def scatternd_negative(nptype): context.set_context(mode=context.GRAPH_MODE, device_target="GPU") @@ -58,6 +65,13 @@ def scatternd_negative(nptype): [-21.4, -3.1]]).astype(nptype) scatternd_net(arr_indices, arr_update, shape, expect) + arr_indices = np.array([[1, 0], [1, 1], [1, 0], [1, 0], [1, 0]]).astype(np.int64) + arr_update = np.array([-13.4, -3.1, 5.1, -12.1, -1.0]).astype(nptype) + shape = (2, 2) + expect = np.array([[0., 0.], + [-21.4, -3.1]]).astype(nptype) + scatternd_net(arr_indices, arr_update, shape, expect) + @pytest.mark.level0 @pytest.mark.platform_x86_gpu_traning @pytest.mark.env_onecard