gpu fix codex

5 years ago · c9bfc9f40b
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/gpu_kernel.h
@@ -80,7 +80,10 @@ class GpuKernel : public KernelMod {
    std::swap((*shape)[2], (*shape)[1]);
  }

  void SetDimA(const std::vector<size_t> &shape, int *dimA, const std::string &format) {
  void SetDimA(const std::vector<size_t> &shape, int *dimA, size_t len, const std::string &format) {
    if (shape.size() != len) {
      MS_EXCEPTION(ValueError) << "Invalid size of input shape " << shape.size() << "-D with dimA " << len << "-D.";
    }
    if (format == "NCHW" || format == "DefaultFormat") {
      dimA[0] = SizeToInt(shape[0]);
      dimA[1] = SizeToInt(shape[1]);
@@ -95,7 +98,10 @@ class GpuKernel : public KernelMod {
      MS_LOG(ERROR) << "Unsupported data format " << format;
    }
  }
  void SetStrideA(const std::vector<size_t> &shape, int *strideA, const std::string &format) {
  void SetStrideA(const std::vector<size_t> &shape, int *strideA, size_t len, const std::string &format) {
    if (shape.size() != len) {
      MS_EXCEPTION(ValueError) << "Invalid size of input shape " << shape.size() << "-D with strideA " << len << "-D.";
    }
    if (format == "NCHW" || format == "DefaultFormat") {
      strideA[0] = SizeToInt(shape[1] * shape[2] * shape[3]);
      strideA[1] = SizeToInt(shape[2] * shape[3]);
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/conv2d_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/conv2d_gpu_kernel.h
@@ -147,13 +147,13 @@ class Conv2dGpuFwdKernel : public GpuKernel {
      if (data_format_ == "NCHW" || data_format_ == "DefaultFormat") {
        auto padded_shape = {IntToSize(n_), IntToSize(c_), IntToSize(old_height_ + pad_height_),
                             IntToSize(old_width_ + pad_width_)};
        SetDimA(padded_shape, dimA, data_format_);
        SetStrideA(padded_shape, strideApadded, data_format_);
        SetDimA(padded_shape, dimA, 4, data_format_);
        SetStrideA(padded_shape, strideApadded, 4, data_format_);
      } else if (data_format_ == "NHWC") {
        auto padded_shape = {IntToSize(n_), IntToSize(old_height_ + pad_height_), IntToSize(old_width_ + pad_width_),
                             IntToSize(c_)};
        SetDimA(padded_shape, dimA, data_format_);
        SetStrideA(padded_shape, strideApadded, data_format_);
        SetDimA(padded_shape, dimA, 4, data_format_);
        SetStrideA(padded_shape, strideApadded, 4, data_format_);
      }
      CHECK_CUDNN_RET_WITH_EXCEPT(cudnnSetTensorNdDescriptor(padded_desc_, cudnn_data_type_, 4, dimA, strideApadded),
                                  "cudnnSetTensor4dDescriptor failed");
@@ -259,18 +259,18 @@ class Conv2dGpuFwdKernel : public GpuKernel {

  void Set4DDesc(const std::vector<size_t> &in_shape, const std::vector<size_t> &filter_shape,
                 const std::vector<size_t> &output_shape) {
    int nbDims = 4;
    const int nbDims = 4;
    int dimA[4];
    int strideAin[4];
    int dimAout[4];
    int strideAout[4];
    SetDimA(in_shape, dimA, data_format_);
    SetStrideA(in_shape, strideAin, data_format_);
    SetDimA(output_shape, dimAout, data_format_);
    SetStrideA(output_shape, strideAout, data_format_);
    SetDimA(in_shape, dimA, 4, data_format_);
    SetStrideA(in_shape, strideAin, 4, data_format_);
    SetDimA(output_shape, dimAout, 4, data_format_);
    SetStrideA(output_shape, strideAout, 4, data_format_);
    int filterDimA[4];
    // OHWI for NHWC; OIHW for NCHW
    SetDimA(filter_shape, filterDimA, data_format_);
    SetDimA(filter_shape, filterDimA, 4, data_format_);
    CHECK_CUDNN_RET_WITH_EXCEPT(cudnnSetTensorNdDescriptor(input_desc_, cudnn_data_type_, nbDims, dimA, strideAin),
                                "cudnnSetTensor4dDescriptor failed");

--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/conv2d_grad_filter_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/conv2d_grad_filter_gpu_kernel.h
@@ -148,13 +148,13 @@ class ConvGradFilterGpuBkwKernel : public GpuKernel {
      if (data_format_ == "NCHW" || data_format_ == "DefaultFormat") {
        auto padded_shape = {IntToSize(n_), IntToSize(c_), IntToSize(old_height_ + pad_height_),
                             IntToSize(old_width_ + pad_width_)};
        SetDimA(padded_shape, dimA, data_format_);
        SetStrideA(padded_shape, strideApadded, data_format_);
        SetDimA(padded_shape, dimA, 4, data_format_);
        SetStrideA(padded_shape, strideApadded, 4, data_format_);
      } else if (data_format_ == "NHWC") {
        auto padded_shape = {IntToSize(n_), IntToSize(old_height_ + pad_height_), IntToSize(old_width_ + pad_width_),
                             IntToSize(c_)};
        SetDimA(padded_shape, dimA, data_format_);
        SetStrideA(padded_shape, strideApadded, data_format_);
        SetDimA(padded_shape, dimA, 4, data_format_);
        SetStrideA(padded_shape, strideApadded, 4, data_format_);
      }
      CHECK_CUDNN_RET_WITH_EXCEPT(
        cudnnSetTensorNdDescriptor(padded_descriptor_, cudnn_data_type_, 4, dimA, strideApadded),
@@ -283,15 +283,15 @@ class ConvGradFilterGpuBkwKernel : public GpuKernel {
  }
  void Set4DDesc(const std::vector<size_t> &dy_shape, const std::vector<size_t> &filter_shape,
                 const std::vector<size_t> &in_shape) {
    int nbDims = 4;
    const int nbDims = 4;
    int dimA[4];
    int strideAin[4];
    int dimAdy[4];
    int strideAdy[4];
    SetDimA(in_shape, dimA, data_format_);
    SetStrideA(in_shape, strideAin, data_format_);
    SetDimA(dy_shape, dimAdy, data_format_);
    SetStrideA(dy_shape, strideAdy, data_format_);
    SetDimA(in_shape, dimA, 4, data_format_);
    SetStrideA(in_shape, strideAin, 4, data_format_);
    SetDimA(dy_shape, dimAdy, 4, data_format_);
    SetStrideA(dy_shape, strideAdy, 4, data_format_);
    // filter shape always keep OIHW.
    int filterDimA[4] = {SizeToInt(filter_shape[0]), SizeToInt(filter_shape[1]), SizeToInt(filter_shape[2]),
                         SizeToInt(filter_shape[3])};
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/conv2d_grad_input_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/conv2d_grad_input_gpu_kernel.h
@@ -149,13 +149,13 @@ class ConvGradInputGpuBkwKernel : public GpuKernel {
      if (data_format_ == "NCHW" || data_format_ == "DefaultFormat") {
        auto padded_shape = {IntToSize(n_), IntToSize(c_), IntToSize(old_height_ + pad_height_),
                             IntToSize(old_width_ + pad_width_)};
        SetDimA(padded_shape, dimA, data_format_);
        SetStrideA(padded_shape, strideApadded, data_format_);
        SetDimA(padded_shape, dimA, 4, data_format_);
        SetStrideA(padded_shape, strideApadded, 4, data_format_);
      } else if (data_format_ == "NHWC") {
        auto padded_shape = {IntToSize(n_), IntToSize(old_height_ + pad_height_), IntToSize(old_width_ + pad_width_),
                             IntToSize(c_)};
        SetDimA(padded_shape, dimA, data_format_);
        SetStrideA(padded_shape, strideApadded, data_format_);
        SetDimA(padded_shape, dimA, 4, data_format_);
        SetStrideA(padded_shape, strideApadded, 4, data_format_);
      }
      CHECK_CUDNN_RET_WITH_EXCEPT(
        cudnnSetTensorNdDescriptor(padded_descriptor_, cudnn_data_type_, 4, dimA, strideApadded),
@@ -284,17 +284,17 @@ class ConvGradInputGpuBkwKernel : public GpuKernel {
  }
  void Set4DDesc(const std::vector<size_t> &dy_shape, const std::vector<size_t> &input_shape,
                 const std::vector<size_t> &filter_shape) {
    int nbDims = 4;
    const int nbDims = 4;
    int dimA[4];
    int strideAin[4];
    int dimAdy[4];
    int strideAdy[4];
    int filterDimA[4];
    SetDimA(input_shape, dimA, data_format_);
    SetStrideA(input_shape, strideAin, data_format_);
    SetDimA(dy_shape, dimAdy, data_format_);
    SetStrideA(dy_shape, strideAdy, data_format_);
    SetDimA(filter_shape, filterDimA, data_format_);
    SetDimA(input_shape, dimA, 4, data_format_);
    SetStrideA(input_shape, strideAin, 4, data_format_);
    SetDimA(dy_shape, dimAdy, 4, data_format_);
    SetStrideA(dy_shape, strideAdy, 4, data_format_);
    SetDimA(filter_shape, filterDimA, 4, data_format_);

    CHECK_CUDNN_RET_WITH_EXCEPT(cudnnSetTensorNdDescriptor(dy_desc_, cudnn_data_type_, nbDims, dimAdy, strideAdy),
                                "cudnnSetTensorNdDescriptor failed");
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/pooling_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/pooling_gpu_kernel.h
@@ -88,15 +88,15 @@ class PoolingGpuFwdKernel : public GpuKernel {
      return true;
    }
    SetNCHW(input_shape, &n_, &c_, &old_height_, &old_width_, data_format_);
    int nbDims = 4;
    const int nbDims = 4;
    int dimA[4];
    int strideAin[4];
    int dimAout[4];
    int strideAout[4];
    SetDimA(input_shape, dimA, data_format_);
    SetStrideA(input_shape, strideAin, data_format_);
    SetDimA(output_shape, dimAout, data_format_);
    SetStrideA(output_shape, strideAout, data_format_);
    SetDimA(input_shape, dimA, 4, data_format_);
    SetStrideA(input_shape, strideAin, 4, data_format_);
    SetDimA(output_shape, dimAout, 4, data_format_);
    SetStrideA(output_shape, strideAout, 4, data_format_);
    CHECK_CUDNN_RET_WITH_EXCEPT(
      cudnnSetTensorNdDescriptor(input_descriptor_, cudnn_data_type_, nbDims, dimA, strideAin),
      "cudnnSetTensor4dDescriptor failed");
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/pooling_grad_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/pooling_grad_gpu_kernel.h
@@ -100,7 +100,7 @@ class PoolingGradGpuKernel : public GpuKernel {
    int windowDimA[2] = {window_height, window_width};
    int paddingA[2] = {0, 0};
    int strideA[2] = {stride_[2], stride_[3]};
    int nbDims = 4;
    const int nbDims = 4;
    int dimA[4];
    int strideAin[4];
    int dimAy[4];
@@ -109,14 +109,14 @@ class PoolingGradGpuKernel : public GpuKernel {
    int strideAdy[4];
    int dimAout[4];
    int strideAout[4];
    SetDimA(input_shape, dimA, data_format_);
    SetStrideA(input_shape, strideAin, data_format_);
    SetDimA(input_mask, dimAy, data_format_);
    SetStrideA(input_mask, strideAiny, data_format_);
    SetDimA(dout_shape, dimAdy, data_format_);
    SetStrideA(dout_shape, strideAdy, data_format_);
    SetDimA(output_shape, dimAout, data_format_);
    SetStrideA(output_shape, strideAout, data_format_);
    SetDimA(input_shape, dimA, 4, data_format_);
    SetStrideA(input_shape, strideAin, 4, data_format_);
    SetDimA(input_mask, dimAy, 4, data_format_);
    SetStrideA(input_mask, strideAiny, 4, data_format_);
    SetDimA(dout_shape, dimAdy, 4, data_format_);
    SetStrideA(dout_shape, strideAdy, 4, data_format_);
    SetDimA(output_shape, dimAout, 4, data_format_);
    SetStrideA(output_shape, strideAout, 4, data_format_);
    CHECK_CUDNN_RET_WITH_EXCEPT(cudnnSetTensorNdDescriptor(y_descriptor_, cudnn_data_type_, nbDims, dimAy, strideAiny),
                                "cudnnSetTensor4dDescriptor failed");
    CHECK_CUDNN_RET_WITH_EXCEPT(cudnnSetTensorNdDescriptor(dy_descriptor_, cudnn_data_type_, nbDims, dimAdy, strideAdy),