diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/scatter_nd_gpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/scatter_nd_gpu_kernel.cc
index d0ed82890c..cc4079fe97 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/scatter_nd_gpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/scatter_nd_gpu_kernel.cc
@@ -1,5 +1,5 @@
 /**
- * Copyright 2020 Huawei Technologies Co., Ltd
+ * Copyright 2020-2021 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -18,6 +18,14 @@
 
 namespace mindspore {
 namespace kernel {
+MS_REG_GPU_KERNEL_TWO(
+  ScatterNd,
+  KernelAttr().AddInputAttr(kNumberTypeInt32).AddInputAttr(kNumberTypeFloat64).AddOutputAttr(kNumberTypeFloat64),
+  ScatterNdGpuFwdKernel, double, int)
+MS_REG_GPU_KERNEL_TWO(
+  ScatterNd,
+  KernelAttr().AddInputAttr(kNumberTypeInt64).AddInputAttr(kNumberTypeFloat64).AddOutputAttr(kNumberTypeFloat64),
+  ScatterNdGpuFwdKernel, double, int64_t)
 MS_REG_GPU_KERNEL_TWO(
   ScatterNd,
   KernelAttr().AddInputAttr(kNumberTypeInt32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/transpose_gpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/transpose_gpu_kernel.cc
index 2afd3638ed..6adcbcd0a2 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/transpose_gpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/transpose_gpu_kernel.cc
@@ -1,5 +1,5 @@
 /**
- * Copyright 2020 Huawei Technologies Co., Ltd
+ * Copyright 2020-2021 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -17,6 +17,8 @@
 #include "backend/kernel_compiler/gpu/arrays/transpose_gpu_kernel.h"
 namespace mindspore {
 namespace kernel {
+MS_REG_GPU_KERNEL_ONE(Transpose, KernelAttr().AddInputAttr(kNumberTypeFloat64).AddOutputAttr(kNumberTypeFloat64),
+                      TransposeGpuFwdKernel, double)
 MS_REG_GPU_KERNEL_ONE(Transpose, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
                       TransposeGpuFwdKernel, float)
 MS_REG_GPU_KERNEL_ONE(Transpose, KernelAttr().AddInputAttr(kNumberTypeFloat16).AddOutputAttr(kNumberTypeFloat16),
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/unsorted_segment_sum_gpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/unsorted_segment_sum_gpu_kernel.cc
index 03f5096c34..991152df5e 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/unsorted_segment_sum_gpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/unsorted_segment_sum_gpu_kernel.cc
@@ -1,5 +1,5 @@
 /**
- * Copyright 2020 Huawei Technologies Co., Ltd
+ * Copyright 2020-2021 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -18,6 +18,16 @@
 
 namespace mindspore {
 namespace kernel {
+MS_REG_GPU_KERNEL_TWO(
+  UnsortedSegmentSum,
+  KernelAttr().AddInputAttr(kNumberTypeFloat64).AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeFloat64),
+  UnsortedSegmentSumGpuKernel, double, int)
+
+MS_REG_GPU_KERNEL_TWO(
+  UnsortedSegmentSum,
+  KernelAttr().AddInputAttr(kNumberTypeFloat64).AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeFloat64),
+  UnsortedSegmentSumGpuKernel, double, int64_t)
+
 MS_REG_GPU_KERNEL_TWO(
   UnsortedSegmentSum,
   KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeFloat32),
@@ -39,6 +49,36 @@ MS_REG_GPU_KERNEL_TWO(
   UnsortedSegmentSumGpuKernel, int, int64_t)
 
 // Re-registration with 3 inputs - dynamic shape mode - sets of Int64/Int32 num segments types
+MS_REG_GPU_KERNEL_TWO(UnsortedSegmentSum,
+                      KernelAttr()
+                        .AddInputAttr(kNumberTypeFloat64)
+                        .AddInputAttr(kNumberTypeInt32)
+                        .AddInputAttr(kNumberTypeInt64)
+                        .AddOutputAttr(kNumberTypeFloat64),
+                      UnsortedSegmentSumGpuKernel, double, int)
+MS_REG_GPU_KERNEL_TWO(UnsortedSegmentSum,
+                      KernelAttr()
+                        .AddInputAttr(kNumberTypeFloat64)
+                        .AddInputAttr(kNumberTypeInt32)
+                        .AddInputAttr(kNumberTypeInt32)
+                        .AddOutputAttr(kNumberTypeFloat64),
+                      UnsortedSegmentSumGpuKernel, double, int)
+
+MS_REG_GPU_KERNEL_TWO(UnsortedSegmentSum,
+                      KernelAttr()
+                        .AddInputAttr(kNumberTypeFloat64)
+                        .AddInputAttr(kNumberTypeInt64)
+                        .AddInputAttr(kNumberTypeInt64)
+                        .AddOutputAttr(kNumberTypeFloat64),
+                      UnsortedSegmentSumGpuKernel, double, int64_t)
+MS_REG_GPU_KERNEL_TWO(UnsortedSegmentSum,
+                      KernelAttr()
+                        .AddInputAttr(kNumberTypeFloat64)
+                        .AddInputAttr(kNumberTypeInt64)
+                        .AddInputAttr(kNumberTypeInt32)
+                        .AddOutputAttr(kNumberTypeFloat64),
+                      UnsortedSegmentSumGpuKernel, double, int64_t)
+
 MS_REG_GPU_KERNEL_TWO(UnsortedSegmentSum,
                       KernelAttr()
                         .AddInputAttr(kNumberTypeFloat32)
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/scatter_nd.cu b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/scatter_nd.cu
index 897f48beb8..ab706d7739 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/scatter_nd.cu
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/scatter_nd.cu
@@ -1,5 +1,5 @@
 /**
- * Copyright 2020 Huawei Technologies Co., Ltd
+ * Copyright 2020-2021 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -56,6 +56,14 @@ void ScatterNd(S *indices, T *update, T *output, const size_t &block_size, const
   return;
 }
 
+template void ScatterNd<double, int>(int *indices, double *update, double *output, const size_t &block_size,
+                                     const size_t &input_size, const size_t &output_size, const size_t &indices_dim_0,
+                                     const size_t &indices_dim_1, int *indices_stride, int *work_shape,
+                                     cudaStream_t stream);
+template void ScatterNd<double, int64_t>(int64_t *indices, double *update, double *output, const size_t &block_size,
+                                         const size_t &input_size, const size_t &output_size,
+                                         const size_t &indices_dim_0, const size_t &indices_dim_1,
+                                         int64_t *indices_stride, int64_t *work_shape, cudaStream_t stream);
 template void ScatterNd<float, int>(int *indices, float *update, float *output, const size_t &block_size,
                                     const size_t &input_size, const size_t &output_size, const size_t &indices_dim_0,
                                     const size_t &indices_dim_1, int *indices_stride, int *work_shape,
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/transpose_impl.cu b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/transpose_impl.cu
index 7dfc42caa1..b8c713f537 100755
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/transpose_impl.cu
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/transpose_impl.cu
@@ -1,5 +1,5 @@
 /**
- * Copyright 2020 Huawei Technologies Co., Ltd
+ * Copyright 2020-2021 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -61,6 +61,9 @@ void CalTranspose(const size_t size, const T *input, const size_t *input_shape,
   return;
 }
 
+template void CalTranspose<double>(const size_t size, const double *input, const size_t *input_shape,
+                                   const size_t *input_axis, const size_t shape_size, double *output,
+                                   cudaStream_t cuda_stream);
 template void CalTranspose<float>(const size_t size, const float *input, const size_t *input_shape,
                                   const size_t *input_axis, const size_t shape_size, float *output,
                                   cudaStream_t cuda_stream);
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/unsorted_segment_sum.cu b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/unsorted_segment_sum.cu
index 3d299c2352..76d9728808 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/unsorted_segment_sum.cu
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/unsorted_segment_sum.cu
@@ -1,5 +1,5 @@
 /**
- * Copyright 2020 Huawei Technologies Co., Ltd
+ * Copyright 2020-2021 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -15,6 +15,7 @@
  */
 
 #include "backend/kernel_compiler/gpu/cuda_impl/unsorted_segment_sum.cuh"
+#include "backend/kernel_compiler/gpu/cuda_impl/util.cuh"
 
 template<typename T, typename S>
 __global__ void UnsortedSegmentSum(size_t input_dim0, size_t input_dim1, size_t output_dim0, size_t output_dim1,
@@ -29,7 +30,7 @@ __global__ void UnsortedSegmentSum(size_t input_dim0, size_t input_dim1, size_t
       continue;
     }
     size_t output_index = i * output_dim1 + k;
-    atomicAdd(output_addr + output_index, input_addr[input_index]);
+    MsAtomicAdd(output_addr + output_index, input_addr[input_index]);
   }
 }
 
@@ -42,6 +43,11 @@ void UnsortedSegmentSum(size_t input_dim0, size_t input_dim1, size_t output_dim0
   return;
 }
 
+template void UnsortedSegmentSum(size_t input_dim0, size_t input_dim1, size_t output_dim0, size_t output_dim1,
+                                 double* input_addr, int* ids_addr, double* output_addr, cudaStream_t stream);
+template void UnsortedSegmentSum(size_t input_dim0, size_t input_dim1, size_t output_dim0, size_t output_dim1,
+                                 double* input_addr, int64_t* ids_addr, double* output_addr, cudaStream_t stream);
+
 template void UnsortedSegmentSum(size_t input_dim0, size_t input_dim1, size_t output_dim0, size_t output_dim1,
                                  float* input_addr, int* ids_addr, float* output_addr, cudaStream_t stream);
 template void UnsortedSegmentSum(size_t input_dim0, size_t input_dim1, size_t output_dim0, size_t output_dim1,
diff --git a/mindspore/core/abstract/prim_arrays.cc b/mindspore/core/abstract/prim_arrays.cc
index 209182781e..c7f419b09d 100644
--- a/mindspore/core/abstract/prim_arrays.cc
+++ b/mindspore/core/abstract/prim_arrays.cc
@@ -247,7 +247,7 @@ AbstractBasePtr InferImplUnsortedSegmentSum(const AnalysisEnginePtr &, const Pri
   MS_EXCEPTION_IF_NULL(segment_ids);
   MS_EXCEPTION_IF_NULL(segment_ids->shape());
   auto segment_ids_shape = segment_ids->shape()->shape();
-  (void)CheckTensorDType(x, {kFloat16, kFloat32, kInt32}, "Input 0 (x) for UnsortedSegmentSum should be %s");
+  (void)CheckTensorDType(x, {kFloat16, kFloat32, kFloat64, kInt32}, "Input 0 (x) for UnsortedSegmentSum should be %s");
   (void)CheckTensorDType(segment_ids, {kInt32, kInt64}, "Input 1 (segment_ids) for UnsortedSegmentSum should be %s");
   bool x_is_dyn = (!x->shape()->min_shape().empty() && !x->shape()->max_shape().empty());  // check if dynamic shape
   bool ids_is_dyn = (!segment_ids->shape()->min_shape().empty() && !segment_ids->shape()->max_shape().empty());