add float64 support to gather grad and gatherd grad

add float64 support to scatterNd for GatherNd grad fix typo left out a file
5 years ago · 1e93aaceeb
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/scatter_nd_gpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/scatter_nd_gpu_kernel.cc
@@ -1,5 +1,5 @@
 /**
 * Copyright 2020 Huawei Technologies Co., Ltd
 * Copyright 2020-2021 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -18,6 +18,14 @@

 namespace mindspore {
 namespace kernel {
 MS_REG_GPU_KERNEL_TWO(
  ScatterNd,
  KernelAttr().AddInputAttr(kNumberTypeInt32).AddInputAttr(kNumberTypeFloat64).AddOutputAttr(kNumberTypeFloat64),
  ScatterNdGpuFwdKernel, double, int)
 MS_REG_GPU_KERNEL_TWO(
  ScatterNd,
  KernelAttr().AddInputAttr(kNumberTypeInt64).AddInputAttr(kNumberTypeFloat64).AddOutputAttr(kNumberTypeFloat64),
  ScatterNdGpuFwdKernel, double, int64_t)
 MS_REG_GPU_KERNEL_TWO(
  ScatterNd,
  KernelAttr().AddInputAttr(kNumberTypeInt32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/transpose_gpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/transpose_gpu_kernel.cc
@@ -1,5 +1,5 @@
 /**
 * Copyright 2020 Huawei Technologies Co., Ltd
 * Copyright 2020-2021 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -17,6 +17,8 @@
 #include "backend/kernel_compiler/gpu/arrays/transpose_gpu_kernel.h"
 namespace mindspore {
 namespace kernel {
 MS_REG_GPU_KERNEL_ONE(Transpose, KernelAttr().AddInputAttr(kNumberTypeFloat64).AddOutputAttr(kNumberTypeFloat64),
                      TransposeGpuFwdKernel, double)
 MS_REG_GPU_KERNEL_ONE(Transpose, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
                      TransposeGpuFwdKernel, float)
 MS_REG_GPU_KERNEL_ONE(Transpose, KernelAttr().AddInputAttr(kNumberTypeFloat16).AddOutputAttr(kNumberTypeFloat16),
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/unsorted_segment_sum_gpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/unsorted_segment_sum_gpu_kernel.cc
@@ -1,5 +1,5 @@
 /**
 * Copyright 2020 Huawei Technologies Co., Ltd
 * Copyright 2020-2021 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -18,6 +18,16 @@

 namespace mindspore {
 namespace kernel {
 MS_REG_GPU_KERNEL_TWO(
  UnsortedSegmentSum,
  KernelAttr().AddInputAttr(kNumberTypeFloat64).AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeFloat64),
  UnsortedSegmentSumGpuKernel, double, int)

 MS_REG_GPU_KERNEL_TWO(
  UnsortedSegmentSum,
  KernelAttr().AddInputAttr(kNumberTypeFloat64).AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeFloat64),
  UnsortedSegmentSumGpuKernel, double, int64_t)

 MS_REG_GPU_KERNEL_TWO(
  UnsortedSegmentSum,
  KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeFloat32),
@@ -39,6 +49,36 @@ MS_REG_GPU_KERNEL_TWO(
  UnsortedSegmentSumGpuKernel, int, int64_t)

 // Re-registration with 3 inputs - dynamic shape mode - sets of Int64/Int32 num segments types
 MS_REG_GPU_KERNEL_TWO(UnsortedSegmentSum,
                      KernelAttr()
                        .AddInputAttr(kNumberTypeFloat64)
                        .AddInputAttr(kNumberTypeInt32)
                        .AddInputAttr(kNumberTypeInt64)
                        .AddOutputAttr(kNumberTypeFloat64),
                      UnsortedSegmentSumGpuKernel, double, int)
 MS_REG_GPU_KERNEL_TWO(UnsortedSegmentSum,
                      KernelAttr()
                        .AddInputAttr(kNumberTypeFloat64)
                        .AddInputAttr(kNumberTypeInt32)
                        .AddInputAttr(kNumberTypeInt32)
                        .AddOutputAttr(kNumberTypeFloat64),
                      UnsortedSegmentSumGpuKernel, double, int)

 MS_REG_GPU_KERNEL_TWO(UnsortedSegmentSum,
                      KernelAttr()
                        .AddInputAttr(kNumberTypeFloat64)
                        .AddInputAttr(kNumberTypeInt64)
                        .AddInputAttr(kNumberTypeInt64)
                        .AddOutputAttr(kNumberTypeFloat64),
                      UnsortedSegmentSumGpuKernel, double, int64_t)
 MS_REG_GPU_KERNEL_TWO(UnsortedSegmentSum,
                      KernelAttr()
                        .AddInputAttr(kNumberTypeFloat64)
                        .AddInputAttr(kNumberTypeInt64)
                        .AddInputAttr(kNumberTypeInt32)
                        .AddOutputAttr(kNumberTypeFloat64),
                      UnsortedSegmentSumGpuKernel, double, int64_t)

 MS_REG_GPU_KERNEL_TWO(UnsortedSegmentSum,
                      KernelAttr()
                        .AddInputAttr(kNumberTypeFloat32)
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/scatter_nd.cu
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/scatter_nd.cu
@@ -1,5 +1,5 @@
 /**
 * Copyright 2020 Huawei Technologies Co., Ltd
 * Copyright 2020-2021 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -56,6 +56,14 @@ void ScatterNd(S *indices, T *update, T *output, const size_t &block_size, const
  return;
 }

 template void ScatterNd<double, int>(int *indices, double *update, double *output, const size_t &block_size,
                                     const size_t &input_size, const size_t &output_size, const size_t &indices_dim_0,
                                     const size_t &indices_dim_1, int *indices_stride, int *work_shape,
                                     cudaStream_t stream);
 template void ScatterNd<double, int64_t>(int64_t *indices, double *update, double *output, const size_t &block_size,
                                         const size_t &input_size, const size_t &output_size,
                                         const size_t &indices_dim_0, const size_t &indices_dim_1,
                                         int64_t *indices_stride, int64_t *work_shape, cudaStream_t stream);
 template void ScatterNd<float, int>(int *indices, float *update, float *output, const size_t &block_size,
                                    const size_t &input_size, const size_t &output_size, const size_t &indices_dim_0,
                                    const size_t &indices_dim_1, int *indices_stride, int *work_shape,
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/transpose_impl.cu
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/transpose_impl.cu
@@ -1,5 +1,5 @@
 /**
 * Copyright 2020 Huawei Technologies Co., Ltd
 * Copyright 2020-2021 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -61,6 +61,9 @@ void CalTranspose(const size_t size, const T *input, const size_t *input_shape,
  return;
 }

 template void CalTranspose<double>(const size_t size, const double *input, const size_t *input_shape,
                                   const size_t *input_axis, const size_t shape_size, double *output,
                                   cudaStream_t cuda_stream);
 template void CalTranspose<float>(const size_t size, const float *input, const size_t *input_shape,
                                  const size_t *input_axis, const size_t shape_size, float *output,
                                  cudaStream_t cuda_stream);
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/unsorted_segment_sum.cu
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/unsorted_segment_sum.cu
@@ -1,5 +1,5 @@
 /**
 * Copyright 2020 Huawei Technologies Co., Ltd
 * Copyright 2020-2021 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -15,6 +15,7 @@
 */

 #include "backend/kernel_compiler/gpu/cuda_impl/unsorted_segment_sum.cuh"
 #include "backend/kernel_compiler/gpu/cuda_impl/util.cuh"

 template<typename T, typename S>
 __global__ void UnsortedSegmentSum(size_t input_dim0, size_t input_dim1, size_t output_dim0, size_t output_dim1,
@@ -29,7 +30,7 @@ __global__ void UnsortedSegmentSum(size_t input_dim0, size_t input_dim1, size_t
      continue;
    }
    size_t output_index = i * output_dim1 + k;
    atomicAdd(output_addr + output_index, input_addr[input_index]);
    MsAtomicAdd(output_addr + output_index, input_addr[input_index]);
  }
 }

@@ -42,6 +43,11 @@ void UnsortedSegmentSum(size_t input_dim0, size_t input_dim1, size_t output_dim0
  return;
 }

 template void UnsortedSegmentSum(size_t input_dim0, size_t input_dim1, size_t output_dim0, size_t output_dim1,
                                 double* input_addr, int* ids_addr, double* output_addr, cudaStream_t stream);
 template void UnsortedSegmentSum(size_t input_dim0, size_t input_dim1, size_t output_dim0, size_t output_dim1,
                                 double* input_addr, int64_t* ids_addr, double* output_addr, cudaStream_t stream);

 template void UnsortedSegmentSum(size_t input_dim0, size_t input_dim1, size_t output_dim0, size_t output_dim1,
                                 float* input_addr, int* ids_addr, float* output_addr, cudaStream_t stream);
 template void UnsortedSegmentSum(size_t input_dim0, size_t input_dim1, size_t output_dim0, size_t output_dim1,
--- a/mindspore/core/abstract/prim_arrays.cc
+++ b/mindspore/core/abstract/prim_arrays.cc
@@ -247,7 +247,7 @@ AbstractBasePtr InferImplUnsortedSegmentSum(const AnalysisEnginePtr &, const Pri
  MS_EXCEPTION_IF_NULL(segment_ids);
  MS_EXCEPTION_IF_NULL(segment_ids->shape());
  auto segment_ids_shape = segment_ids->shape()->shape();
  (void)CheckTensorDType(x, {kFloat16, kFloat32, kInt32}, "Input 0 (x) for UnsortedSegmentSum should be %s");
  (void)CheckTensorDType(x, {kFloat16, kFloat32, kFloat64, kInt32}, "Input 0 (x) for UnsortedSegmentSum should be %s");
  (void)CheckTensorDType(segment_ids, {kInt32, kInt64}, "Input 1 (segment_ids) for UnsortedSegmentSum should be %s");
  bool x_is_dyn = (!x->shape()->min_shape().empty() && !x->shape()->max_shape().empty());  // check if dynamic shape
  bool ids_is_dyn = (!segment_ids->shape()->min_shape().empty() && !segment_ids->shape()->max_shape().empty());