Merge pull request !5071 from VectorSL/codextags/v1.0.0
| @@ -80,7 +80,10 @@ class GpuKernel : public KernelMod { | |||
| std::swap((*shape)[2], (*shape)[1]); | |||
| } | |||
| void SetDimA(const std::vector<size_t> &shape, int *dimA, const std::string &format) { | |||
| void SetDimA(const std::vector<size_t> &shape, int *dimA, size_t len, const std::string &format) { | |||
| if (shape.size() != len) { | |||
| MS_EXCEPTION(ValueError) << "Invalid size of input shape " << shape.size() << "-D with dimA " << len << "-D."; | |||
| } | |||
| if (format == "NCHW" || format == "DefaultFormat") { | |||
| dimA[0] = SizeToInt(shape[0]); | |||
| dimA[1] = SizeToInt(shape[1]); | |||
| @@ -95,7 +98,10 @@ class GpuKernel : public KernelMod { | |||
| MS_LOG(ERROR) << "Unsupported data format " << format; | |||
| } | |||
| } | |||
| void SetStrideA(const std::vector<size_t> &shape, int *strideA, const std::string &format) { | |||
| void SetStrideA(const std::vector<size_t> &shape, int *strideA, size_t len, const std::string &format) { | |||
| if (shape.size() != len) { | |||
| MS_EXCEPTION(ValueError) << "Invalid size of input shape " << shape.size() << "-D with strideA " << len << "-D."; | |||
| } | |||
| if (format == "NCHW" || format == "DefaultFormat") { | |||
| strideA[0] = SizeToInt(shape[1] * shape[2] * shape[3]); | |||
| strideA[1] = SizeToInt(shape[2] * shape[3]); | |||
| @@ -147,13 +147,13 @@ class Conv2dGpuFwdKernel : public GpuKernel { | |||
| if (data_format_ == "NCHW" || data_format_ == "DefaultFormat") { | |||
| auto padded_shape = {IntToSize(n_), IntToSize(c_), IntToSize(old_height_ + pad_height_), | |||
| IntToSize(old_width_ + pad_width_)}; | |||
| SetDimA(padded_shape, dimA, data_format_); | |||
| SetStrideA(padded_shape, strideApadded, data_format_); | |||
| SetDimA(padded_shape, dimA, 4, data_format_); | |||
| SetStrideA(padded_shape, strideApadded, 4, data_format_); | |||
| } else if (data_format_ == "NHWC") { | |||
| auto padded_shape = {IntToSize(n_), IntToSize(old_height_ + pad_height_), IntToSize(old_width_ + pad_width_), | |||
| IntToSize(c_)}; | |||
| SetDimA(padded_shape, dimA, data_format_); | |||
| SetStrideA(padded_shape, strideApadded, data_format_); | |||
| SetDimA(padded_shape, dimA, 4, data_format_); | |||
| SetStrideA(padded_shape, strideApadded, 4, data_format_); | |||
| } | |||
| CHECK_CUDNN_RET_WITH_EXCEPT(cudnnSetTensorNdDescriptor(padded_desc_, cudnn_data_type_, 4, dimA, strideApadded), | |||
| "cudnnSetTensor4dDescriptor failed"); | |||
| @@ -259,18 +259,18 @@ class Conv2dGpuFwdKernel : public GpuKernel { | |||
| void Set4DDesc(const std::vector<size_t> &in_shape, const std::vector<size_t> &filter_shape, | |||
| const std::vector<size_t> &output_shape) { | |||
| int nbDims = 4; | |||
| const int nbDims = 4; | |||
| int dimA[4]; | |||
| int strideAin[4]; | |||
| int dimAout[4]; | |||
| int strideAout[4]; | |||
| SetDimA(in_shape, dimA, data_format_); | |||
| SetStrideA(in_shape, strideAin, data_format_); | |||
| SetDimA(output_shape, dimAout, data_format_); | |||
| SetStrideA(output_shape, strideAout, data_format_); | |||
| SetDimA(in_shape, dimA, 4, data_format_); | |||
| SetStrideA(in_shape, strideAin, 4, data_format_); | |||
| SetDimA(output_shape, dimAout, 4, data_format_); | |||
| SetStrideA(output_shape, strideAout, 4, data_format_); | |||
| int filterDimA[4]; | |||
| // OHWI for NHWC; OIHW for NCHW | |||
| SetDimA(filter_shape, filterDimA, data_format_); | |||
| SetDimA(filter_shape, filterDimA, 4, data_format_); | |||
| CHECK_CUDNN_RET_WITH_EXCEPT(cudnnSetTensorNdDescriptor(input_desc_, cudnn_data_type_, nbDims, dimA, strideAin), | |||
| "cudnnSetTensor4dDescriptor failed"); | |||
| @@ -148,13 +148,13 @@ class ConvGradFilterGpuBkwKernel : public GpuKernel { | |||
| if (data_format_ == "NCHW" || data_format_ == "DefaultFormat") { | |||
| auto padded_shape = {IntToSize(n_), IntToSize(c_), IntToSize(old_height_ + pad_height_), | |||
| IntToSize(old_width_ + pad_width_)}; | |||
| SetDimA(padded_shape, dimA, data_format_); | |||
| SetStrideA(padded_shape, strideApadded, data_format_); | |||
| SetDimA(padded_shape, dimA, 4, data_format_); | |||
| SetStrideA(padded_shape, strideApadded, 4, data_format_); | |||
| } else if (data_format_ == "NHWC") { | |||
| auto padded_shape = {IntToSize(n_), IntToSize(old_height_ + pad_height_), IntToSize(old_width_ + pad_width_), | |||
| IntToSize(c_)}; | |||
| SetDimA(padded_shape, dimA, data_format_); | |||
| SetStrideA(padded_shape, strideApadded, data_format_); | |||
| SetDimA(padded_shape, dimA, 4, data_format_); | |||
| SetStrideA(padded_shape, strideApadded, 4, data_format_); | |||
| } | |||
| CHECK_CUDNN_RET_WITH_EXCEPT( | |||
| cudnnSetTensorNdDescriptor(padded_descriptor_, cudnn_data_type_, 4, dimA, strideApadded), | |||
| @@ -283,15 +283,15 @@ class ConvGradFilterGpuBkwKernel : public GpuKernel { | |||
| } | |||
| void Set4DDesc(const std::vector<size_t> &dy_shape, const std::vector<size_t> &filter_shape, | |||
| const std::vector<size_t> &in_shape) { | |||
| int nbDims = 4; | |||
| const int nbDims = 4; | |||
| int dimA[4]; | |||
| int strideAin[4]; | |||
| int dimAdy[4]; | |||
| int strideAdy[4]; | |||
| SetDimA(in_shape, dimA, data_format_); | |||
| SetStrideA(in_shape, strideAin, data_format_); | |||
| SetDimA(dy_shape, dimAdy, data_format_); | |||
| SetStrideA(dy_shape, strideAdy, data_format_); | |||
| SetDimA(in_shape, dimA, 4, data_format_); | |||
| SetStrideA(in_shape, strideAin, 4, data_format_); | |||
| SetDimA(dy_shape, dimAdy, 4, data_format_); | |||
| SetStrideA(dy_shape, strideAdy, 4, data_format_); | |||
| // filter shape always keep OIHW. | |||
| int filterDimA[4] = {SizeToInt(filter_shape[0]), SizeToInt(filter_shape[1]), SizeToInt(filter_shape[2]), | |||
| SizeToInt(filter_shape[3])}; | |||
| @@ -149,13 +149,13 @@ class ConvGradInputGpuBkwKernel : public GpuKernel { | |||
| if (data_format_ == "NCHW" || data_format_ == "DefaultFormat") { | |||
| auto padded_shape = {IntToSize(n_), IntToSize(c_), IntToSize(old_height_ + pad_height_), | |||
| IntToSize(old_width_ + pad_width_)}; | |||
| SetDimA(padded_shape, dimA, data_format_); | |||
| SetStrideA(padded_shape, strideApadded, data_format_); | |||
| SetDimA(padded_shape, dimA, 4, data_format_); | |||
| SetStrideA(padded_shape, strideApadded, 4, data_format_); | |||
| } else if (data_format_ == "NHWC") { | |||
| auto padded_shape = {IntToSize(n_), IntToSize(old_height_ + pad_height_), IntToSize(old_width_ + pad_width_), | |||
| IntToSize(c_)}; | |||
| SetDimA(padded_shape, dimA, data_format_); | |||
| SetStrideA(padded_shape, strideApadded, data_format_); | |||
| SetDimA(padded_shape, dimA, 4, data_format_); | |||
| SetStrideA(padded_shape, strideApadded, 4, data_format_); | |||
| } | |||
| CHECK_CUDNN_RET_WITH_EXCEPT( | |||
| cudnnSetTensorNdDescriptor(padded_descriptor_, cudnn_data_type_, 4, dimA, strideApadded), | |||
| @@ -284,17 +284,17 @@ class ConvGradInputGpuBkwKernel : public GpuKernel { | |||
| } | |||
| void Set4DDesc(const std::vector<size_t> &dy_shape, const std::vector<size_t> &input_shape, | |||
| const std::vector<size_t> &filter_shape) { | |||
| int nbDims = 4; | |||
| const int nbDims = 4; | |||
| int dimA[4]; | |||
| int strideAin[4]; | |||
| int dimAdy[4]; | |||
| int strideAdy[4]; | |||
| int filterDimA[4]; | |||
| SetDimA(input_shape, dimA, data_format_); | |||
| SetStrideA(input_shape, strideAin, data_format_); | |||
| SetDimA(dy_shape, dimAdy, data_format_); | |||
| SetStrideA(dy_shape, strideAdy, data_format_); | |||
| SetDimA(filter_shape, filterDimA, data_format_); | |||
| SetDimA(input_shape, dimA, 4, data_format_); | |||
| SetStrideA(input_shape, strideAin, 4, data_format_); | |||
| SetDimA(dy_shape, dimAdy, 4, data_format_); | |||
| SetStrideA(dy_shape, strideAdy, 4, data_format_); | |||
| SetDimA(filter_shape, filterDimA, 4, data_format_); | |||
| CHECK_CUDNN_RET_WITH_EXCEPT(cudnnSetTensorNdDescriptor(dy_desc_, cudnn_data_type_, nbDims, dimAdy, strideAdy), | |||
| "cudnnSetTensorNdDescriptor failed"); | |||
| @@ -88,15 +88,15 @@ class PoolingGpuFwdKernel : public GpuKernel { | |||
| return true; | |||
| } | |||
| SetNCHW(input_shape, &n_, &c_, &old_height_, &old_width_, data_format_); | |||
| int nbDims = 4; | |||
| const int nbDims = 4; | |||
| int dimA[4]; | |||
| int strideAin[4]; | |||
| int dimAout[4]; | |||
| int strideAout[4]; | |||
| SetDimA(input_shape, dimA, data_format_); | |||
| SetStrideA(input_shape, strideAin, data_format_); | |||
| SetDimA(output_shape, dimAout, data_format_); | |||
| SetStrideA(output_shape, strideAout, data_format_); | |||
| SetDimA(input_shape, dimA, 4, data_format_); | |||
| SetStrideA(input_shape, strideAin, 4, data_format_); | |||
| SetDimA(output_shape, dimAout, 4, data_format_); | |||
| SetStrideA(output_shape, strideAout, 4, data_format_); | |||
| CHECK_CUDNN_RET_WITH_EXCEPT( | |||
| cudnnSetTensorNdDescriptor(input_descriptor_, cudnn_data_type_, nbDims, dimA, strideAin), | |||
| "cudnnSetTensor4dDescriptor failed"); | |||
| @@ -100,7 +100,7 @@ class PoolingGradGpuKernel : public GpuKernel { | |||
| int windowDimA[2] = {window_height, window_width}; | |||
| int paddingA[2] = {0, 0}; | |||
| int strideA[2] = {stride_[2], stride_[3]}; | |||
| int nbDims = 4; | |||
| const int nbDims = 4; | |||
| int dimA[4]; | |||
| int strideAin[4]; | |||
| int dimAy[4]; | |||
| @@ -109,14 +109,14 @@ class PoolingGradGpuKernel : public GpuKernel { | |||
| int strideAdy[4]; | |||
| int dimAout[4]; | |||
| int strideAout[4]; | |||
| SetDimA(input_shape, dimA, data_format_); | |||
| SetStrideA(input_shape, strideAin, data_format_); | |||
| SetDimA(input_mask, dimAy, data_format_); | |||
| SetStrideA(input_mask, strideAiny, data_format_); | |||
| SetDimA(dout_shape, dimAdy, data_format_); | |||
| SetStrideA(dout_shape, strideAdy, data_format_); | |||
| SetDimA(output_shape, dimAout, data_format_); | |||
| SetStrideA(output_shape, strideAout, data_format_); | |||
| SetDimA(input_shape, dimA, 4, data_format_); | |||
| SetStrideA(input_shape, strideAin, 4, data_format_); | |||
| SetDimA(input_mask, dimAy, 4, data_format_); | |||
| SetStrideA(input_mask, strideAiny, 4, data_format_); | |||
| SetDimA(dout_shape, dimAdy, 4, data_format_); | |||
| SetStrideA(dout_shape, strideAdy, 4, data_format_); | |||
| SetDimA(output_shape, dimAout, 4, data_format_); | |||
| SetStrideA(output_shape, strideAout, 4, data_format_); | |||
| CHECK_CUDNN_RET_WITH_EXCEPT(cudnnSetTensorNdDescriptor(y_descriptor_, cudnn_data_type_, nbDims, dimAy, strideAiny), | |||
| "cudnnSetTensor4dDescriptor failed"); | |||
| CHECK_CUDNN_RET_WITH_EXCEPT(cudnnSetTensorNdDescriptor(dy_descriptor_, cudnn_data_type_, nbDims, dimAdy, strideAdy), | |||