Browse Source

!10275 Fix gpu ops bug.

From: @linqingke
Reviewed-by: @liangchenghui,@oacjiewen
Signed-off-by: @liangchenghui
tags/v1.1.0
mindspore-ci-bot Gitee 5 years ago
parent
commit
2b8737a908
3 changed files with 12 additions and 0 deletions
  1. +3
    -0
      mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/split_gpu_kernel.cc
  2. +3
    -0
      mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/split_impl.cu
  3. +6
    -0
      mindspore/ccsrc/backend/kernel_compiler/gpu/nn/sgd_gpu_kernel.h

+ 3
- 0
mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/split_gpu_kernel.cc View File

@@ -27,5 +27,8 @@ MS_REG_GPU_KERNEL_ONE(Split,
MS_REG_GPU_KERNEL_ONE(
Split, KernelAttr().AddAllSameAttr(true).AddInputAttr(kNumberTypeFloat16).AddOutputAttr(kNumberTypeFloat16),
SplitGpuFwdKernel, half)
MS_REG_GPU_KERNEL_ONE(
Split, KernelAttr().AddAllSameAttr(true).AddInputAttr(kNumberTypeUInt32).AddOutputAttr(kNumberTypeUInt32),
SplitGpuFwdKernel, uint32_t)
} // namespace kernel
} // namespace mindspore

+ 3
- 0
mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/split_impl.cu View File

@@ -48,3 +48,6 @@ template void SplitKernel(const size_t size, const int axis_step, const int all_
template void SplitKernel(const size_t size, const int axis_step, const int all_size_before_axis,
const int all_size_axis, const half* input, half** outputs,
cudaStream_t cuda_stream);
template void SplitKernel(const size_t size, const int axis_step, const int all_size_before_axis,
const int all_size_axis, const uint32_t* input, uint32_t** outputs,
cudaStream_t cuda_stream);

+ 6
- 0
mindspore/ccsrc/backend/kernel_compiler/gpu/nn/sgd_gpu_kernel.h View File

@@ -42,12 +42,18 @@ class SGDGpuKernel : public GpuKernel {
T *accum = GetDeviceAddress<T>(inputs, 3);
T *momentum = GetDeviceAddress<T>(inputs, 4);
T *stat = GetDeviceAddress<T>(inputs, 5);
T *output_param = GetDeviceAddress<T>(outputs, 0);
SGD(size_, dampening_, weight_decay_, nesterov_, lr, momentum, grad, param, accum, stat,
reinterpret_cast<cudaStream_t>(stream));
CHECK_CUDA_RET_WITH_EXCEPT(kernel_node_,
cudaMemcpyAsync(output_param, param, sizeof(T) * size_, cudaMemcpyDeviceToDevice,
reinterpret_cast<cudaStream_t>(stream)),
"SGD cudaMemcpyAsync params to outputs failed");
return true;
}
bool Init(const CNodePtr &kernel_node) override {
kernel_node_ = kernel_node;
dampening_ = GetAttr<float>(kernel_node, "dampening");
weight_decay_ = GetAttr<float>(kernel_node, "weight_decay");
nesterov_ = GetAttr<bool>(kernel_node, "nesterov");


Loading…
Cancel
Save