Browse Source

gpu update conv kernel for auto-mixed-precision

tags/v0.3.0-alpha
VectorSL 5 years ago
parent
commit
ee7a64018c
4 changed files with 28 additions and 7 deletions
  1. +1
    -1
      mindspore/ccsrc/kernel/gpu/math/binary_op_gpu_kernel.h
  2. +9
    -2
      mindspore/ccsrc/kernel/gpu/nn/conv2d_gpu_kernel.h
  3. +9
    -2
      mindspore/ccsrc/kernel/gpu/nn/conv2d_grad_filter_gpu_kernel.h
  4. +9
    -2
      mindspore/ccsrc/kernel/gpu/nn/conv2d_grad_input_gpu_kernel.h

+ 1
- 1
mindspore/ccsrc/kernel/gpu/math/binary_op_gpu_kernel.h View File

@@ -218,7 +218,7 @@ class BinaryOpGpuKernel : public GpuKernel {
}
}
CHECK_CUDNN_RET_WITH_EXCEPT(
cudnnSetOpTensorDescriptor(opTensor_descriptor_, tensor_op_, cudnn_data_type_, CUDNN_NOT_PROPAGATE_NAN),
cudnnSetOpTensorDescriptor(opTensor_descriptor_, tensor_op_, CUDNN_DATA_FLOAT, CUDNN_NOT_PROPAGATE_NAN),
"cudnnSetOpTensorDescriptor failed");
return;
}


+ 9
- 2
mindspore/ccsrc/kernel/gpu/nn/conv2d_gpu_kernel.h View File

@@ -142,10 +142,14 @@ class Conv2dGpuFwdKernel : public GpuKernel {
}
CHECK_CUDNN_RET_WITH_EXCEPT(
cudnnSetConvolution2dDescriptor(conv_desc_, pad_height_, pad_width_, stride_, stride_, dilation_, dilation_,
CUDNN_CROSS_CORRELATION, cudnn_data_type_),
CUDNN_CROSS_CORRELATION, CUDNN_DATA_FLOAT),
"cudnnSetConvolution2dDescriptor failed");
input_descriptor_real = input_desc_;
}
if (cudnn_data_type_ == CUDNN_DATA_HALF) {
CHECK_CUDNN_RET_WITH_EXCEPT(cudnnSetConvolutionMathType(conv_desc_, CUDNN_TENSOR_OP_MATH),
"cudnnSetConvolutionMathType failed.")
}
SelectAlgorithm(input_descriptor_real);
InitSizeLists();
return true;
@@ -240,7 +244,7 @@ class Conv2dGpuFwdKernel : public GpuKernel {
"cudnnSetTensor4dDescriptor failed");
CHECK_CUDNN_RET_WITH_EXCEPT(
cudnnSetConvolution2dDescriptor(conv_desc_, use_pad_ ? 0 : pad_top_, use_pad_ ? 0 : pad_left_, stride_, stride_,
dilation_, dilation_, CUDNN_CROSS_CORRELATION, cudnn_data_type_),
dilation_, dilation_, CUDNN_CROSS_CORRELATION, CUDNN_DATA_FLOAT),
"cudnnSetConvolution2dDescriptor failed");
}

@@ -276,6 +280,9 @@ class Conv2dGpuFwdKernel : public GpuKernel {
"cudnnGetConvolutionForwardAlgorithm_v7 failed");
conv_algorithm_ = perf_results.algo;
}
if (cudnn_data_type_ == CUDNN_DATA_HALF) {
conv_algorithm_ = CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM;
}
}
cudnnHandle_t cudnn_handle_;
cudnnTensorDescriptor_t input_desc_;


+ 9
- 2
mindspore/ccsrc/kernel/gpu/nn/conv2d_grad_filter_gpu_kernel.h View File

@@ -141,10 +141,14 @@ class ConvGradFilterGpuBkwKernel : public GpuKernel {
}
CHECK_CUDNN_RET_WITH_EXCEPT(
cudnnSetConvolution2dDescriptor(conv_desc_, pad_height_, pad_width_, stride_, stride_, dilation_, dilation_,
CUDNN_CROSS_CORRELATION, cudnn_data_type_),
CUDNN_CROSS_CORRELATION, CUDNN_DATA_FLOAT),
"GetConvolution2dDescriptor failed");
x_desc_real = x_desc_;
}
if (cudnn_data_type_ == CUDNN_DATA_HALF) {
CHECK_CUDNN_RET_WITH_EXCEPT(cudnnSetConvolutionMathType(conv_desc_, CUDNN_TENSOR_OP_MATH),
"cudnnSetConvolutionMathType failed.")
}
SelectAlgorithm(x_desc_real);
InitSizeLists();
return true;
@@ -239,7 +243,7 @@ class ConvGradFilterGpuBkwKernel : public GpuKernel {
"cudnnSetTensor4dDescriptor failed");
CHECK_CUDNN_RET_WITH_EXCEPT(
cudnnSetConvolution2dDescriptor(conv_desc_, use_pad_ ? 0 : pad_top_, use_pad_ ? 0 : pad_left_, stride_, stride_,
dilation_, dilation_, CUDNN_CROSS_CORRELATION, cudnn_data_type_),
dilation_, dilation_, CUDNN_CROSS_CORRELATION, CUDNN_DATA_FLOAT),
"cudnnSetConvolution2dDescriptor failed");
}
void SelectAlgorithm(cudnnTensorDescriptor_t x_desc_real) {
@@ -258,6 +262,9 @@ class ConvGradFilterGpuBkwKernel : public GpuKernel {
"GetConvolutionBackwardFilterAlgorithm failed");
algo_ = perf_results.algo;
}
if (cudnn_data_type_ == CUDNN_DATA_HALF) {
algo_ = CUDNN_CONVOLUTION_BWD_FILTER_ALGO_1;
}
}
void GetFilterShape(const CNodePtr &kernel_node, std::vector<int> *filter_shape) {
auto shp_tuple_x = AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("filter_sizes")->cast<ValueTuplePtr>()->value();


+ 9
- 2
mindspore/ccsrc/kernel/gpu/nn/conv2d_grad_input_gpu_kernel.h View File

@@ -142,10 +142,14 @@ class ConvGradInputGpuBkwKernel : public GpuKernel {
}
CHECK_CUDNN_RET_WITH_EXCEPT(
cudnnSetConvolution2dDescriptor(conv_desc_, pad_height_, pad_width_, stride_, stride_, dilation_, dilation_,
CUDNN_CROSS_CORRELATION, cudnn_data_type_),
CUDNN_CROSS_CORRELATION, CUDNN_DATA_FLOAT),
"cudnnSetConvolution2dDescriptor failed");
dx_desc_real = dx_desc_;
}
if (cudnn_data_type_ == CUDNN_DATA_HALF) {
CHECK_CUDNN_RET_WITH_EXCEPT(cudnnSetConvolutionMathType(conv_desc_, CUDNN_TENSOR_OP_MATH),
"cudnnSetConvolutionMathType failed.")
}
SelectAlgorithm(dx_desc_real);
InitSizeLists();
return true;
@@ -239,7 +243,7 @@ class ConvGradInputGpuBkwKernel : public GpuKernel {
"cudnnSetTensor4dDescriptor failed");
CHECK_CUDNN_RET_WITH_EXCEPT(
cudnnSetConvolution2dDescriptor(conv_desc_, use_pad_ ? 0 : pad_top_, use_pad_ ? 0 : pad_left_, stride_, stride_,
dilation_, dilation_, CUDNN_CROSS_CORRELATION, cudnn_data_type_),
dilation_, dilation_, CUDNN_CROSS_CORRELATION, CUDNN_DATA_FLOAT),
"cudnnSetConvolution2dDescriptor failed");
}
void SelectAlgorithm(cudnnTensorDescriptor_t dx_desc_real) {
@@ -258,6 +262,9 @@ class ConvGradInputGpuBkwKernel : public GpuKernel {
"cudnnGetConvolutionBackwardDataAlgorithm_v7 failed");
algo_ = perf_results.algo;
}
if (cudnn_data_type_ == CUDNN_DATA_HALF) {
algo_ = CUDNN_CONVOLUTION_BWD_DATA_ALGO_1;
}
}
void GetInputShape(const CNodePtr &kernel_node, std::vector<int> *input_shape) {
auto shp_tuple_x = AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("input_sizes")->cast<ValueTuplePtr>()->value();


Loading…
Cancel
Save