Merge pull request !896 from VectorSL/update-tensoraddtags/v0.3.0-alpha
| @@ -86,6 +86,14 @@ class TensorAddGpuFwdKernel : public GpuKernel { | |||
| } | |||
| auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); | |||
| auto input_shapeB = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1); | |||
| auto output_shape = AnfAlgo::GetOutputInferShape(kernel_node, 0); | |||
| if (input_shape != output_shape && input_shapeB != output_shape) { | |||
| MS_LOG(ERROR) << "Double-sided broadcast was not supported in cudnn of cudnnOpTensor:\n" | |||
| "InputA must match the corresponding dimension of the destination tensor outC, and each " | |||
| "dimension of the inputB" | |||
| "must match the corresponding dimension of outC or must be equal to 1."; | |||
| return false; | |||
| } | |||
| is_null_input_ = CHECK_NULL_INPUT(input_shape) || CHECK_NULL_INPUT(input_shapeB); | |||
| if (is_null_input_) { | |||
| MS_LOG(WARNING) << "TensorAddGpuFwdKernel input is null"; | |||
| @@ -46,8 +46,6 @@ class Conv2dGpuFwdKernel : public GpuKernel { | |||
| pad_left_(0), | |||
| n_(0), | |||
| c_(0), | |||
| stride_(1), | |||
| dilation_(0), | |||
| group_(1), | |||
| is_null_input_(false), | |||
| input_size_(0), | |||
| @@ -125,8 +123,8 @@ class Conv2dGpuFwdKernel : public GpuKernel { | |||
| pad_width_ = 0; | |||
| } | |||
| CHECK_CUDNN_RET_WITH_EXCEPT( | |||
| cudnnSetConvolution2dDescriptor(conv_desc_, pad_height_, pad_width_, stride_, stride_, dilation_, dilation_, | |||
| CUDNN_CROSS_CORRELATION, CUDNN_DATA_FLOAT), | |||
| cudnnSetConvolution2dDescriptor(conv_desc_, pad_height_, pad_width_, stride_[2], stride_[3], dilation_[2], | |||
| dilation_[3], CUDNN_CROSS_CORRELATION, CUDNN_DATA_FLOAT), | |||
| "cudnnSetConvolution2dDescriptor failed"); | |||
| input_descriptor_real = input_desc_; | |||
| } | |||
| @@ -226,10 +224,10 @@ class Conv2dGpuFwdKernel : public GpuKernel { | |||
| CHECK_CUDNN_RET_WITH_EXCEPT(cudnnSetTensor4dDescriptor(padded_desc_, CUDNN_TENSOR_NCHW, cudnn_data_type_, n_, c_, | |||
| old_height_ + pad_height_, old_width_ + pad_width_), | |||
| "cudnnSetTensor4dDescriptor failed"); | |||
| CHECK_CUDNN_RET_WITH_EXCEPT( | |||
| cudnnSetConvolution2dDescriptor(conv_desc_, use_pad_ ? 0 : pad_top_, use_pad_ ? 0 : pad_left_, stride_, stride_, | |||
| dilation_, dilation_, CUDNN_CROSS_CORRELATION, CUDNN_DATA_FLOAT), | |||
| "cudnnSetConvolution2dDescriptor failed"); | |||
| CHECK_CUDNN_RET_WITH_EXCEPT(cudnnSetConvolution2dDescriptor( | |||
| conv_desc_, use_pad_ ? 0 : pad_top_, use_pad_ ? 0 : pad_left_, stride_[2], stride_[3], | |||
| dilation_[2], dilation_[3], CUDNN_CROSS_CORRELATION, CUDNN_DATA_FLOAT), | |||
| "cudnnSetConvolution2dDescriptor failed"); | |||
| } | |||
| void Set4DDesc(const std::vector<size_t> &in_shape, const std::vector<size_t> &filter_shape, | |||
| @@ -269,22 +267,20 @@ class Conv2dGpuFwdKernel : public GpuKernel { | |||
| } | |||
| } | |||
| void SetStrideAndDilation(const CNodePtr &kernel_node) { | |||
| auto stride_ori = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, "stride"); | |||
| auto dilation_ori = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, "dilation"); | |||
| if (stride_ori.size() != 4 || stride_ori[2] != stride_ori[3]) { | |||
| MS_LOG(EXCEPTION) << "conv2d only support equal stride, and stride must be 4d!"; | |||
| stride_ = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, "stride"); | |||
| dilation_ = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, "dilation"); | |||
| if (stride_.size() != 4) { | |||
| MS_LOG(EXCEPTION) << "Conv2d's' stride must be 4d!"; | |||
| } | |||
| if (stride_ori[0] != 1 || stride_ori[1] != 1) { | |||
| MS_LOG(EXCEPTION) << "conv2d stride only support 1 in N axis and C axis!"; | |||
| if (stride_[0] != 1 || stride_[1] != 1) { | |||
| MS_LOG(EXCEPTION) << "Conv2d stride only support 1 in N axis and C axis!"; | |||
| } | |||
| if (dilation_ori.size() != 4 || dilation_ori[2] != dilation_ori[3]) { | |||
| MS_LOG(EXCEPTION) << "conv2d only support equal dilation, and dilation must be 4d!"; | |||
| if (dilation_.size() != 4) { | |||
| MS_LOG(EXCEPTION) << "Conv2d's dilation must be 4d!"; | |||
| } | |||
| if (dilation_ori[0] != 1 || dilation_ori[1] != 1) { | |||
| MS_LOG(EXCEPTION) << "conv2d dilation only support 1 in N axis and C axis!"; | |||
| if (dilation_[0] != 1 || dilation_[1] != 1) { | |||
| MS_LOG(EXCEPTION) << "Conv2d dilation only support 1 in N axis and C axis!"; | |||
| } | |||
| stride_ = stride_ori[2]; | |||
| dilation_ = dilation_ori[2]; | |||
| } | |||
| cudnnHandle_t cudnn_handle_; | |||
| cudnnTensorDescriptor_t input_desc_; | |||
| @@ -307,8 +303,8 @@ class Conv2dGpuFwdKernel : public GpuKernel { | |||
| int pad_left_; | |||
| int n_; | |||
| int c_; | |||
| int stride_; | |||
| int dilation_; | |||
| std::vector<int> stride_; | |||
| std::vector<int> dilation_; | |||
| int group_; | |||
| bool is_null_input_; | |||
| size_t input_size_; | |||
| @@ -46,8 +46,6 @@ class ConvGradFilterGpuBkwKernel : public GpuKernel { | |||
| pad_left_(0), | |||
| n_(0), | |||
| c_(0), | |||
| stride_(1), | |||
| dilation_(0), | |||
| group_(1), | |||
| is_null_input_(false), | |||
| input_size_(0), | |||
| @@ -128,8 +126,8 @@ class ConvGradFilterGpuBkwKernel : public GpuKernel { | |||
| pad_width_ = 0; | |||
| } | |||
| CHECK_CUDNN_RET_WITH_EXCEPT( | |||
| cudnnSetConvolution2dDescriptor(conv_desc_, pad_height_, pad_width_, stride_, stride_, dilation_, dilation_, | |||
| CUDNN_CROSS_CORRELATION, CUDNN_DATA_FLOAT), | |||
| cudnnSetConvolution2dDescriptor(conv_desc_, pad_height_, pad_width_, stride_[0], stride_[1], dilation_[2], | |||
| dilation_[3], CUDNN_CROSS_CORRELATION, CUDNN_DATA_FLOAT), | |||
| "GetConvolution2dDescriptor failed"); | |||
| x_desc_real = x_desc_; | |||
| } | |||
| @@ -229,10 +227,10 @@ class ConvGradFilterGpuBkwKernel : public GpuKernel { | |||
| CHECK_CUDNN_RET_WITH_EXCEPT(cudnnSetTensor4dDescriptor(padded_descriptor_, CUDNN_TENSOR_NCHW, cudnn_data_type_, n_, | |||
| c_, old_height_ + pad_height_, old_width_ + pad_width_), | |||
| "cudnnSetTensor4dDescriptor failed"); | |||
| CHECK_CUDNN_RET_WITH_EXCEPT( | |||
| cudnnSetConvolution2dDescriptor(conv_desc_, use_pad_ ? 0 : pad_top_, use_pad_ ? 0 : pad_left_, stride_, stride_, | |||
| dilation_, dilation_, CUDNN_CROSS_CORRELATION, CUDNN_DATA_FLOAT), | |||
| "cudnnSetConvolution2dDescriptor failed"); | |||
| CHECK_CUDNN_RET_WITH_EXCEPT(cudnnSetConvolution2dDescriptor( | |||
| conv_desc_, use_pad_ ? 0 : pad_top_, use_pad_ ? 0 : pad_left_, stride_[0], stride_[1], | |||
| dilation_[2], dilation_[3], CUDNN_CROSS_CORRELATION, CUDNN_DATA_FLOAT), | |||
| "cudnnSetConvolution2dDescriptor failed"); | |||
| } | |||
| void SelectAlgorithm(cudnnTensorDescriptor_t x_desc_real) { | |||
| if (group_ > 1 || CUDNN_MAJOR < 7) { | |||
| @@ -277,19 +275,17 @@ class ConvGradFilterGpuBkwKernel : public GpuKernel { | |||
| "SetTensor4dDescriptor failed"); | |||
| } | |||
| void SetStrideAndDilation(const CNodePtr &kernel_node) { | |||
| auto stride_ori = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, "stride"); | |||
| auto dilation_ori = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, "dilation"); | |||
| if (stride_ori.size() != 2 || stride_ori[0] != stride_ori[1]) { | |||
| MS_LOG(EXCEPTION) << "ConvGradFilterGpuBkwKernel only support equal stride, and stride must be 2d!"; | |||
| stride_ = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, "stride"); | |||
| dilation_ = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, "dilation"); | |||
| if (stride_.size() != 2) { | |||
| MS_LOG(EXCEPTION) << "ConvGradFilterGpuBkwKernel's stride must be 2d!"; | |||
| } | |||
| if (dilation_ori.size() != 4 || dilation_ori[2] != dilation_ori[3]) { | |||
| MS_LOG(EXCEPTION) << "ConvGradFilterGpuBkwKernel only support equal dilation, and dilation must be 4d!"; | |||
| if (dilation_.size() != 4) { | |||
| MS_LOG(EXCEPTION) << "ConvGradFilterGpuBkwKernel's dilation must be 4d!"; | |||
| } | |||
| if (dilation_ori[0] != 1 || dilation_ori[1] != 1) { | |||
| if (dilation_[0] != 1 || dilation_[1] != 1) { | |||
| MS_LOG(EXCEPTION) << "ConvGradFilterGpuBkwKernel dilation only support 1 in N axis and C axis!"; | |||
| } | |||
| stride_ = stride_ori[0]; | |||
| dilation_ = dilation_ori[2]; | |||
| } | |||
| cudnnHandle_t cudnn_handle_; | |||
| cudnnFilterDescriptor_t dw_desc_; | |||
| @@ -312,8 +308,8 @@ class ConvGradFilterGpuBkwKernel : public GpuKernel { | |||
| int pad_left_; | |||
| int n_; | |||
| int c_; | |||
| int stride_; | |||
| int dilation_; | |||
| std::vector<int> stride_; | |||
| std::vector<int> dilation_; | |||
| int group_; | |||
| bool is_null_input_; | |||
| size_t input_size_; | |||
| @@ -46,8 +46,6 @@ class ConvGradInputGpuBkwKernel : public GpuKernel { | |||
| pad_left_(0), | |||
| n_(0), | |||
| c_(0), | |||
| stride_(1), | |||
| dilation_(0), | |||
| group_(1), | |||
| is_null_input_(false), | |||
| input_size_(0), | |||
| @@ -84,7 +82,7 @@ class ConvGradInputGpuBkwKernel : public GpuKernel { | |||
| cudnnConvolutionBackwardData(cudnn_handle_, &alpha, w_desc_, w, dy_desc_, dy, conv_desc_, algo_, work_space, | |||
| workspace_size_, &beta, padded_descriptor_, padded), | |||
| "ConvolutionBackwardData failed"); | |||
| CalPadGrad(padded_size_ / sizeof(T), padded, n_, c_, old_height_, old_width_, old_height_ + pad_height_, | |||
| CalPadGrad(input_size_ / sizeof(T), padded, n_, c_, old_height_, old_width_, old_height_ + pad_height_, | |||
| old_width_ + pad_width_, pad_top_, pad_left_, dx, reinterpret_cast<cudaStream_t>(stream_ptr)); | |||
| } else { | |||
| CHECK_CUDNN_RET_WITH_EXCEPT( | |||
| @@ -129,8 +127,8 @@ class ConvGradInputGpuBkwKernel : public GpuKernel { | |||
| pad_width_ = 0; | |||
| } | |||
| CHECK_CUDNN_RET_WITH_EXCEPT( | |||
| cudnnSetConvolution2dDescriptor(conv_desc_, pad_height_, pad_width_, stride_, stride_, dilation_, dilation_, | |||
| CUDNN_CROSS_CORRELATION, CUDNN_DATA_FLOAT), | |||
| cudnnSetConvolution2dDescriptor(conv_desc_, pad_height_, pad_width_, stride_[0], stride_[1], dilation_[2], | |||
| dilation_[3], CUDNN_CROSS_CORRELATION, CUDNN_DATA_FLOAT), | |||
| "cudnnSetConvolution2dDescriptor failed"); | |||
| dx_desc_real = dx_desc_; | |||
| } | |||
| @@ -229,10 +227,10 @@ class ConvGradInputGpuBkwKernel : public GpuKernel { | |||
| CHECK_CUDNN_RET_WITH_EXCEPT(cudnnSetTensor4dDescriptor(padded_descriptor_, CUDNN_TENSOR_NCHW, cudnn_data_type_, n_, | |||
| c_, old_height_ + pad_height_, old_width_ + pad_width_), | |||
| "cudnnSetTensor4dDescriptor failed"); | |||
| CHECK_CUDNN_RET_WITH_EXCEPT( | |||
| cudnnSetConvolution2dDescriptor(conv_desc_, use_pad_ ? 0 : pad_top_, use_pad_ ? 0 : pad_left_, stride_, stride_, | |||
| dilation_, dilation_, CUDNN_CROSS_CORRELATION, CUDNN_DATA_FLOAT), | |||
| "cudnnSetConvolution2dDescriptor failed"); | |||
| CHECK_CUDNN_RET_WITH_EXCEPT(cudnnSetConvolution2dDescriptor( | |||
| conv_desc_, use_pad_ ? 0 : pad_top_, use_pad_ ? 0 : pad_left_, stride_[0], stride_[1], | |||
| dilation_[2], dilation_[3], CUDNN_CROSS_CORRELATION, CUDNN_DATA_FLOAT), | |||
| "cudnnSetConvolution2dDescriptor failed"); | |||
| } | |||
| void SelectAlgorithm(cudnnTensorDescriptor_t dx_desc_real) { | |||
| if (group_ > 1 || CUDNN_MAJOR < 7) { | |||
| @@ -275,19 +273,17 @@ class ConvGradInputGpuBkwKernel : public GpuKernel { | |||
| "SetTensor4dDescriptor failed"); | |||
| } | |||
| void SetStrideAndDilation(const CNodePtr &kernel_node) { | |||
| auto stride_ori = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, "stride"); | |||
| auto dilation_ori = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, "dilation"); | |||
| if (stride_ori.size() != 2 || stride_ori[0] != stride_ori[1]) { | |||
| MS_LOG(EXCEPTION) << "ConvGradInputGpuBkwKernel only support equal stride, and stride must be 2d!"; | |||
| stride_ = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, "stride"); | |||
| dilation_ = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, "dilation"); | |||
| if (stride_.size() != 2) { | |||
| MS_LOG(EXCEPTION) << "ConvGradInputGpuBkwKernel's stride must be 2d!"; | |||
| } | |||
| if (dilation_ori.size() != 4 || dilation_ori[2] != dilation_ori[3]) { | |||
| MS_LOG(EXCEPTION) << "ConvGradInputGpuBkwKernel only support equal dilation, and dilation must be 4d!"; | |||
| if (dilation_.size() != 4) { | |||
| MS_LOG(EXCEPTION) << "ConvGradInputGpuBkwKernel's dilation must be 4d!"; | |||
| } | |||
| if (dilation_ori[0] != 1 || dilation_ori[1] != 1) { | |||
| if (dilation_[0] != 1 || dilation_[1] != 1) { | |||
| MS_LOG(EXCEPTION) << "ConvGradInputGpuBkwKernel dilation only support 1 in N axis and C axis!"; | |||
| } | |||
| stride_ = stride_ori[0]; | |||
| dilation_ = dilation_ori[2]; | |||
| } | |||
| cudnnHandle_t cudnn_handle_; | |||
| cudnnFilterDescriptor_t w_desc_; | |||
| @@ -309,8 +305,8 @@ class ConvGradInputGpuBkwKernel : public GpuKernel { | |||
| int pad_left_; | |||
| int n_; | |||
| int c_; | |||
| int stride_; | |||
| int dilation_; | |||
| std::vector<int> stride_; | |||
| std::vector<int> dilation_; | |||
| int group_; | |||
| bool is_null_input_; | |||
| size_t input_size_; | |||