optimizes the kernel error description of LSTM, Pad, ReLU, etc.

4 years ago · 6cbe8dd02e
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/activation_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/activation_gpu_kernel.h
@@ -66,7 +66,7 @@ class ActivationGpuFwdKernel : public GpuKernel {
    cudnn_data_type_ = GetCudnnDataType(TypeIdLabel(AnfAlgo::GetInputDeviceDataType(kernel_node, 0)));
    size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
    if (input_num != 1) {
      MS_LOG(EXCEPTION) << "For '" << node_name << "', the number of input should be 1, but got " << input_num;
      MS_LOG(EXCEPTION) << "For '" << node_name << "', the number of inputs should be 1, but got " << input_num;
    }
    auto input_shape = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 0);
    is_null_input_ = CHECK_SHAPE_NULL(input_shape, node_name, "input");
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/activation_grad_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/activation_grad_kernel.h
@@ -75,7 +75,7 @@ class ActivationGradGpuKernel : public GpuKernel {
    cudnn_data_type_ = GetCudnnDataType(TypeIdLabel(AnfAlgo::GetInputDeviceDataType(kernel_node, 0)));
    size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
    if (input_num != 2) {
      MS_LOG(EXCEPTION) << "For '" << node_name << "', the number of input should be 2, but got " << input_num;
      MS_LOG(EXCEPTION) << "For '" << node_name << "', the number of inputs should be 2, but got " << input_num;
    }
    auto input_shape = AnfAlgo::GetOutputInferShape(kernel_node, 0);
    is_null_input_ = CHECK_SHAPE_NULL(input_shape, node_name, "input");
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/adagrad_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/adagrad_gpu_kernel.h
@@ -74,7 +74,7 @@ class AdagradGpuKernel : public GpuKernel {
    size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
    update_slots = AnfAlgo::GetNodeAttr<bool>(kernel_node, "update_slots");
    if (input_num != 4) {
      MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of input should be 4, but got " << input_num;
      MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of inputs should be 4, but got " << input_num;
    }
    variable_size_ = sizeof(T);
    accumulation_size_ = sizeof(T);
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/adam_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/adam_gpu_kernel.h
@@ -72,7 +72,7 @@ class AdamGpuKernel : public GpuKernel {
    kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
    size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
    if (input_num != INPUT_NUM) {
      MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of input should be " << INPUT_NUM << ", but got "
      MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of inputs should be " << INPUT_NUM << ", but got "
                        << input_num;
    }

--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/adam_weight_decay_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/adam_weight_decay_gpu_kernel.h
@@ -70,7 +70,7 @@ class AdamWeightDecayGpuKernel : public GpuKernel {
    kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
    size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
    if (input_num != INPUT_NUM) {
      MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of input should be " << INPUT_NUM << ", but got "
      MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of inputs should be " << INPUT_NUM << ", but got "
                        << input_num;
    }

--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/adaptive_avg_pool2d_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/adaptive_avg_pool2d_gpu_kernel.h
@@ -76,7 +76,7 @@ class AdaptiveAvgPool2DKernel : public GpuKernel {

    size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
    if (input_num != 1) {
      MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of input should be 1, but got " << input_num;
      MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of inputs should be 1, but got " << input_num;
    }

    input_size_ = sizeof(T);
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/adaptive_avg_pool2d_grad_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/adaptive_avg_pool2d_grad_gpu_kernel.h
@@ -66,7 +66,7 @@ class AdaptiveAvgPool2DGradKernel : public GpuKernel {
    kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
    size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
    if (input_num != kAdaptiveAvgPool2dGradInputNum) {
      MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of input should be "
      MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of inputs should be "
                        << kAdaptiveAvgPool2dGradInputNum << ", but got " << input_num;
    }

--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/apply_gradient_descent_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/apply_gradient_descent_gpu_kernel.h
@@ -54,11 +54,11 @@ class ApplyGradientDescentKernel : public GpuKernel {
    kernel_node_ = kernel_node;
    size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
    if (input_num != 3) {
      MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of input should be 3, but got " << input_num;
      MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of inputs should be 3, but got " << input_num;
    }
    size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
    if (output_num != 1) {
      MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of output should be 1, but got " << output_num;
      MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of outputs should be 1, but got " << output_num;
    }
    auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
    is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name_, "var");
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/batch_norm_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/batch_norm_gpu_kernel.h
@@ -105,12 +105,12 @@ class BatchNormGpuKernel : public GpuKernel {
    size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
    if (bn_ops_ == CUDNN_BATCHNORM_OPS_BN_ADD_ACTIVATION) {
      if (input_num != CUDNN_BATCHNORM_OPS_BN_ADD_ACTIVATION_INPUT_NUM) {
        MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of input should be "
        MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of inputs should be "
                          << CUDNN_BATCHNORM_OPS_BN_ADD_ACTIVATION_INPUT_NUM << ", but got " << input_num;
      }
    } else {
      if (input_num != NO_CUDNN_BATCHNORM_OPS_BN_ADD_ACTIVATION_INPUT_NUM) {
        MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of input should be "
        MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of inputs should be "
                          << NO_CUDNN_BATCHNORM_OPS_BN_ADD_ACTIVATION_INPUT_NUM << ", but got " << input_num;
      }
    }
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/batch_norm_grad_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/batch_norm_grad_gpu_kernel.h
@@ -136,12 +136,12 @@ class BatchNormGradGpuKernel : public GpuKernel {
    size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
    if (bn_ops_ == CUDNN_BATCHNORM_OPS_BN) {
      if (input_num != CUDNN_BATCHNORM_OPS_BN_INPUT_NUM) {
        MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of input should be "
        MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of inputs should be "
                          << CUDNN_BATCHNORM_OPS_BN_INPUT_NUM << ", but got " << input_num;
      }
    } else {
      if (input_num != NO_CUDNN_BATCHNORM_OPS_BN_INPUT_NUM) {
        MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of input should be "
        MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of inputs should be "
                          << NO_CUDNN_BATCHNORM_OPS_BN_INPUT_NUM << ", but got " << input_num;
      }
    }
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/bce_with_logits_loss_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/bce_with_logits_loss_kernel.h
@@ -73,11 +73,11 @@ class BCEWithLogitsLossKernel : public GpuKernel {
    kernel_node_ = kernel_node;
    size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
    if (input_num != 4) {
      MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of input should be 4, but got " << input_num;
      MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of inputs should be 4, but got " << input_num;
    }
    size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
    if (output_num != 1) {
      MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of output should be 1, but got " << output_num;
      MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of outputs should be 1, but got " << output_num;
    }
    input_shape_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
    weight_shape_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 2);
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/conv2d_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/conv2d_gpu_kernel.h
@@ -279,11 +279,11 @@ class Conv2dGpuFwdKernel : public GpuKernel {
  void CheckParam(const CNodePtr &kernel_node) {
    size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
    if (input_num != 2) {
      MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of input should be 2, but got " << input_num;
      MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of inputs should be 2, but got " << input_num;
    }
    size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
    if (output_num != 1) {
      MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of output should be 1, but got " << output_num;
      MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of outputs should be 1, but got " << output_num;
    }
  }

--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/conv2d_grad_filter_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/conv2d_grad_filter_gpu_kernel.h
@@ -272,11 +272,11 @@ class ConvGradFilterGpuBkwKernel : public GpuKernel {
  void CheckParam(const CNodePtr &kernel_node) {
    size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
    if (input_num != 2) {
      MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of input should be 2, but got " << input_num;
      MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of inputs should be 2, but got " << input_num;
    }
    size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
    if (output_num != 1) {
      MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of output should be 1, but got " << output_num;
      MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of outputs should be 1, but got " << output_num;
    }
  }
  void SelectAlgorithm(cudnnTensorDescriptor_t x_desc_real) {
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/conv2d_grad_input_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/conv2d_grad_input_gpu_kernel.h
@@ -282,11 +282,11 @@ class ConvGradInputGpuBkwKernel : public GpuKernel {
  void CheckParam(const CNodePtr &kernel_node) {
    size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
    if (input_num != 2) {
      MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of input should be 2, but got " << input_num;
      MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of inputs should be 2, but got " << input_num;
    }
    size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
    if (output_num != 1) {
      MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of output should be 1, but got " << output_num;
      MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of outputs should be 1, but got " << output_num;
    }
  }
  void SetPad(const std::vector<int> &input_shape, const CNodePtr &kernel_node) {
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/conv3d_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/conv3d_gpu_kernel.h
@@ -285,11 +285,11 @@ class Conv3dGpuKernel : public GpuKernel {
  void CheckParam(const CNodePtr &kernel_node) {
    size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
    if (input_num != 2) {
      MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of input should be 2, but got " << input_num;
      MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of inputs should be 2, but got " << input_num;
    }
    size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
    if (output_num != 1) {
      MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of output should be 1, but got " << output_num;
      MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of outputs should be 1, but got " << output_num;
    }
  }

--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/conv3d_grad_filter_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/conv3d_grad_filter_gpu_kernel.h
@@ -302,11 +302,11 @@ class Conv3dGradFilterGpuKernel : public GpuKernel {
  void CheckParam(const CNodePtr &kernel_node) {
    size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
    if (input_num != 2) {
      MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of input should be 2, but got " << input_num;
      MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of inputs should be 2, but got " << input_num;
    }
    size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
    if (output_num != 1) {
      MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of output should be 1, but got " << output_num;
      MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of outputs should be 1, but got " << output_num;
    }
  }

--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/conv3d_grad_input_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/conv3d_grad_input_gpu_kernel.h
@@ -278,11 +278,11 @@ class Conv3dGradInputGpuKernel : public GpuKernel {
  void CheckParam(const CNodePtr &kernel_node) {
    size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
    if (input_num != 2) {
      MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of input should be 2, but got " << input_num;
      MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of inputs should be 2, but got " << input_num;
    }
    size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
    if (output_num != 1) {
      MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of output should be 1, but got " << output_num;
      MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of outputs should be 1, but got " << output_num;
    }
  }

--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/conv3d_transpose_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/conv3d_transpose_gpu_kernel.h
@@ -361,11 +361,11 @@ class Conv3dTransposeGpuFwdKernel : public GpuKernel {
  void CheckParam(const CNodePtr &kernel_node) {
    size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
    if (input_num != 2) {
      MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of input should be 2, but got " << input_num;
      MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of inputs should be 2, but got " << input_num;
    }
    size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
    if (output_num != 1) {
      MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of output should be 1, but got " << output_num;
      MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of outputs should be 1, but got " << output_num;
    }
  }

--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/dropout3d_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/dropout3d_gpu_kernel.h
@@ -75,7 +75,7 @@ class Dropout3DGpuFwdKernel : public GpuKernel {

    size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
    if (input_num != 1) {
      MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of input should be 1, but got " << input_num;
      MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of inputs should be 1, but got " << input_num;
    }

    std::vector<size_t> input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/dropout_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/dropout_gpu_kernel.h
@@ -71,7 +71,7 @@ class DropoutGpuFwdKernel : public GpuKernel {

    size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
    if (input_num != 1) {
      MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of input should be 1, but got " << input_num;
      MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of inputs should be 1, but got " << input_num;
    }

    auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/dropout_grad_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/dropout_grad_kernel.h
@@ -55,7 +55,7 @@ class DropoutGradGpuBwdKernel : public GpuKernel {

    size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
    if (input_num != 2) {
      MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of input should be 2, but got " << input_num;
      MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of inputs should be 2, but got " << input_num;
    }

    auto input_shape = AnfAlgo::GetOutputInferShape(kernel_node, 0);
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/flatten_grad_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/flatten_grad_gpu_kernel.h
@@ -55,7 +55,7 @@ class FlattenGardGpuBkwKernel : public GpuKernel {
    kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
    size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
    if (input_num != 1) {
      MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of input should be 1, but got " << input_num;
      MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of inputs should be 1, but got " << input_num;
    }

    auto shape = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 0);
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/ftrl_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/ftrl_gpu_kernel.h
@@ -68,7 +68,7 @@ class FtrlGpuKernel : public GpuKernel {
    kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
    size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
    if (input_num != INPUT_NUM) {
      MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of input should be " << INPUT_NUM << ", but got "
      MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of inputs should be " << INPUT_NUM << ", but got "
                        << input_num;
    }

--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/fused_scale_momentum_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/fused_scale_momentum_gpu_kernel.h
@@ -53,7 +53,7 @@ class FusedScaleMomentumGpuKernel : public GpuKernel {
    auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
    size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
    if (input_num != INPUT_NUM) {
      MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of input should be " << INPUT_NUM << ", but got "
      MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of inputs should be " << INPUT_NUM << ", but got "
                        << input_num;
    }

--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/fused_weightdecay_momentum_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/fused_weightdecay_momentum_gpu_kernel.h
@@ -53,7 +53,7 @@ class FusedWeightDecayMomentumGpuKernel : public GpuKernel {
    auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
    size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
    if (input_num != INPUT_NUM) {
      MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of input should be " << INPUT_NUM << ", but got "
      MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of inputs should be " << INPUT_NUM << ", but got "
                        << input_num;
    }

--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/fused_weightdecay_scale_momentum_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/fused_weightdecay_scale_momentum_gpu_kernel.h
@@ -54,7 +54,7 @@ class FusedWeightDecayScaleMomentumGpuKernel : public GpuKernel {
    auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
    size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
    if (input_num != INPUT_NUM) {
      MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of input should be " << INPUT_NUM << ", but got "
      MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of inputs should be " << INPUT_NUM << ", but got "
                        << input_num;
    }

--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/hsigmoid_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/hsigmoid_gpu_kernel.h
@@ -51,11 +51,11 @@ class HSigmoidKernel : public GpuKernel {
    kernel_node_ = kernel_node;
    size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
    if (input_num != 1) {
      MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of input should be 1, but got " << input_num;
      MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of inputs should be 1, but got " << input_num;
    }
    size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
    if (output_num != 1) {
      MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of output should be 1, but got " << output_num;
      MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of outputs should be 1, but got " << output_num;
    }
    auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
    is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name, "input");
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/hsigmoid_grad_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/hsigmoid_grad_gpu_kernel.h
@@ -52,11 +52,11 @@ class HSigmoidGradKernel : public GpuKernel {
    kernel_node_ = kernel_node;
    size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
    if (input_num != 2) {
      MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of input should be 2, but got " << input_num;
      MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of inputs should be 2, but got " << input_num;
    }
    size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
    if (output_num != 1) {
      MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of output should be 1, but got " << output_num;
      MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of outputs should be 1, but got " << output_num;
    }
    auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
    is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name, "input");
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/hswish_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/hswish_gpu_kernel.h
@@ -51,11 +51,11 @@ class HSwishKernel : public GpuKernel {
    kernel_node_ = kernel_node;
    size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
    if (input_num != 1) {
      MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of input should be 1, but got " << input_num;
      MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of inputs should be 1, but got " << input_num;
    }
    size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
    if (output_num != 1) {
      MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of output should be 1, but got " << output_num;
      MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of outputs should be 1, but got " << output_num;
    }
    auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
    is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name, "input");
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/hswish_grad_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/hswish_grad_gpu_kernel.h
@@ -52,11 +52,11 @@ class HSwishGradKernel : public GpuKernel {
    kernel_node_ = kernel_node;
    size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
    if (input_num != 2) {
      MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of input should be 2, but got " << input_num;
      MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of inputs should be 2, but got " << input_num;
    }
    size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
    if (output_num != 1) {
      MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of output should be 1, but got " << output_num;
      MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of outputs should be 1, but got " << output_num;
    }
    auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
    is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name, "input");
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/im2col_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/im2col_gpu_kernel.h
@@ -193,12 +193,12 @@ class Im2ColGpuFwdKernel : public GpuKernel {
    cudnn_data_type_ = GetCudnnDataType(TypeIdLabel(AnfAlgo::GetInputDeviceDataType(kernel_node, 0)));
    size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
    if (input_num != 1) {
      MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of input should be 1, but got " << input_num;
      MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of inputs should be 1, but got " << input_num;
    }

    size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
    if (output_num != 1) {
      MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of output should be 1, but got " << output_num;
      MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of outputs should be 1, but got " << output_num;
    }
  }
  void SetPad(const std::vector<size_t> &in_shape, const CNodePtr &kernel_node) {
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/instance_norm_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/instance_norm_gpu_kernel.h
@@ -109,7 +109,7 @@ class InstanceNormGpuKernel : public GpuKernel {
    cudnn_data_type_ = GetCudnnDataType(TypeIdLabel(AnfAlgo::GetInputDeviceDataType(kernel_node, 0)));
    size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
    if (input_num != 5) {
      MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of input should be 5, but got " << input_num;
      MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of inputs should be 5, but got " << input_num;
    }
    input_shape_ = AnfAlgo::GetInputDeviceShape(kernel_node, 0);
    if (input_shape_.size() != 4) {
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/instance_norm_grad_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/instance_norm_grad_gpu_kernel.h
@@ -113,7 +113,7 @@ class InstanceNormGradGpuKernel : public GpuKernel {
    cudnn_data_type_ = GetCudnnDataType(TypeIdLabel(AnfAlgo::GetInputDeviceDataType(kernel_node, 0)));
    size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
    if (input_num != 5) {
      MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of input should be 5, but got " << input_num;
      MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of inputs should be 5, but got " << input_num;
    }

    input_shape_ = AnfAlgo::GetInputDeviceShape(kernel_node, 0);
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/l2_loss_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/l2_loss_gpu_kernel.h
@@ -46,10 +46,10 @@ class L2LossGpuKernel : public GpuKernel {
  }

  bool Init(const CNodePtr &kernel_node) override {
    auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
    auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
    is_null_input_ = CHECK_NULL_INPUT(input_shape);
    is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name, "input");
    if (is_null_input_) {
      MS_LOG(WARNING) << "For 'L2LossGpuKernel', input is null.";
      InitSizeLists();
      return true;
    }
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/l2normalize_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/l2normalize_gpu_kernel.h
@@ -182,11 +182,11 @@ class L2NormalizeGpuKernel : public GpuKernel {
  void CheckIONumber(const CNodePtr &kernel_node) {
    size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
    if (input_num != 1) {
      MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of input should be 1, but got " << input_num;
      MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of inputs should be 1, but got " << input_num;
    }
    size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
    if (output_num != 1) {
      MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of output should be 1, but got " << output_num;
      MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of outputs should be 1, but got " << output_num;
    }
  }
  void DestroyResource() noexcept {
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/l2normalize_grad_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/l2normalize_grad_gpu_kernel.h
@@ -191,12 +191,12 @@ class L2NormalizeGradGpuKernel : public GpuKernel {
  void CheckIONumber(const CNodePtr &kernel_node) {
    size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
    if (input_num != INPUT_SIZE) {
      MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of input should be " << INPUT_SIZE << ", but got "
      MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of inputs should be " << INPUT_SIZE << ", but got "
                        << input_num;
    }
    size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
    if (output_num != 1) {
      MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of output should be 1, but got " << output_num;
      MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of outputs should be 1, but got " << output_num;
    }
  }
  void InitResource() override {
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/layer_norm_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/layer_norm_gpu_kernel.h
@@ -52,13 +52,13 @@ class LayerNormGpuKernel : public GpuKernel {
    return true;
  }
  bool Init(const CNodePtr &kernel_node) override {
    auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
    int begin_norm_axis = static_cast<int>(GetAttr<int64_t>(kernel_node, "begin_norm_axis"));
    int begin_params_axis = static_cast<int>(GetAttr<int64_t>(kernel_node, "begin_params_axis"));

    auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
    is_null_input_ = CHECK_NULL_INPUT(input_shape);
    is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name, "input_x");
    if (is_null_input_) {
      MS_LOG(WARNING) << "For 'LayerNormGpuKernel', input is null.";
      InitSizeLists();
      return true;
    }
@@ -71,9 +71,9 @@ class LayerNormGpuKernel : public GpuKernel {
    }

    if (IntToSize(begin_norm_axis) > input_shape.size()) {
      MS_LOG(EXCEPTION) << "For 'LayerNormGpuKernel', begin_norm_axis should be less than or equal to "
                        << "the rank of input, but got begin_norm_axis: " << IntToSize(begin_norm_axis)
                        << ", rank of input: " << input_shape.size();
      MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the value of 'begin_norm_axis' should be less than or equal "
                        << "to the dimension of input_x, but got begin_norm_axis: " << IntToSize(begin_norm_axis)
                        << ", the dimension of input_x: " << input_shape.size();
    }
    for (size_t i = 0; i < IntToSize(begin_norm_axis); i++) {
      input_row_ *= input_shape[i];
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/layer_norm_grad_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/layer_norm_grad_gpu_kernel.h
@@ -54,13 +54,13 @@ class LayerNormGradGpuKernel : public GpuKernel {
    return true;
  }
  bool Init(const CNodePtr &kernel_node) override {
    auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
    int begin_norm_axis = static_cast<int>(GetAttr<int64_t>(kernel_node, "begin_norm_axis"));
    int begin_params_axis = static_cast<int>(GetAttr<int64_t>(kernel_node, "begin_params_axis"));

    auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
    is_null_input_ = CHECK_NULL_INPUT(input_shape);
    is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name, "input");
    if (is_null_input_) {
      MS_LOG(WARNING) << "For 'LayerNormGradGpuKernel', input is null.";
      InitSizeLists();
      return true;
    }
@@ -73,9 +73,9 @@ class LayerNormGradGpuKernel : public GpuKernel {
    }

    if (IntToSize(begin_norm_axis) > input_shape.size()) {
      MS_LOG(EXCEPTION) << "For 'LayerNormGradGpuKernel', begin_norm_axis should be less than or equal to "
                        << "the rank of input, but got begin_norm_axis: " << IntToSize(begin_norm_axis)
                        << ", rank of input: " << input_shape.size();
      MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the value of 'begin_norm_axis' should be less than or equal "
                        << "to the dimension of input, but got begin_norm_axis: " << IntToSize(begin_norm_axis)
                        << ", the dimension of input: " << input_shape.size();
    }
    for (size_t i = 0; i < IntToSize(begin_norm_axis); i++) {
      input_row_ *= input_shape[i];
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/layer_norm_grad_grad_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/layer_norm_grad_grad_gpu_kernel.h
@@ -67,14 +67,14 @@ class LayerNormGradGradGpuKernel : public GpuKernel {
    return true;
  }
  bool Init(const CNodePtr &kernel_node) override {
    auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
    kernel_node_ = kernel_node;
    int begin_norm_axis = static_cast<int>(GetAttr<int64_t>(kernel_node, "begin_norm_axis"));
    int begin_params_axis = static_cast<int>(GetAttr<int64_t>(kernel_node, "begin_params_axis"));

    auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
    is_null_input_ = CHECK_NULL_INPUT(input_shape);
    is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name, "input");
    if (is_null_input_) {
      MS_LOG(WARNING) << "For 'LayerNormGradGradGpuKernel', input is null.";
      InitSizeLists();
      return true;
    }
@@ -87,9 +87,9 @@ class LayerNormGradGradGpuKernel : public GpuKernel {
    }

    if (IntToSize(begin_norm_axis) > input_shape.size()) {
      MS_LOG(EXCEPTION) << "For 'LayerNormGradGradGpuKernel', begin_norm_axis should be less than or equal to "
                        << "the rank of input, but got begin_norm_axis: " << IntToSize(begin_norm_axis)
                        << ", rank of input: " << input_shape.size();
      MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the value of 'begin_norm_axis' should be less than or equal "
                        << "to the dimension of input, but got begin_norm_axis: " << IntToSize(begin_norm_axis)
                        << ", the dimension of input: " << input_shape.size();
    }
    for (size_t i = 0; i < IntToSize(begin_norm_axis); i++) {
      input_row_ *= input_shape[i];
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/local_response_norm_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/local_response_norm_gpu_kernel.h
@@ -97,9 +97,8 @@ class LocalResponseNormGpuKernel : public GpuKernel {
  bool Init(const CNodePtr &kernel_node) override {
    kernel_node_ = kernel_node;
    MS_EXCEPTION_IF_NULL(kernel_node);
    if (!CheckParam(kernel_node)) {
      return false;
    }
    kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
    (void)CheckParam(kernel_node);

    depth_radius_ = static_cast<int>(GetAttr<int64_t>(kernel_node, "depth_radius"));
    bias_ = GetAttr<float>(kernel_node, "bias");
@@ -115,14 +114,14 @@ class LocalResponseNormGpuKernel : public GpuKernel {
    InitResource();

    auto input_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0);
    is_null_input_ = CHECK_NULL_INPUT(input_shape);
    is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name_, "input");
    if (is_null_input_) {
      MS_LOG(WARNING) << "LocalResponseNormGpuKernel input is null";
      InitSizeLists();
      return true;
    }
    if (input_shape.size() != 4) {
      MS_LOG(EXCEPTION) << "tensor shape is " << input_shape.size() << ", LocalResponseNormGpuKernel should be 4D";
      MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the dimension of input should be 4, but got "
                        << input_shape.size();
    }

    if (use_native_) {
@@ -149,6 +148,7 @@ class LocalResponseNormGpuKernel : public GpuKernel {
    input_size_ = 0;
    output_size_ = 0;
    is_null_input_ = false;
    kernel_name_ = "LocalResponseNorm";
    x_desc_ = nullptr;
    y_desc_ = nullptr;
    norm_desc_ = nullptr;
@@ -211,18 +211,15 @@ class LocalResponseNormGpuKernel : public GpuKernel {
  }

 private:
  bool CheckParam(const CNodePtr &kernel_node) {
  void CheckParam(const CNodePtr &kernel_node) {
    size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
    if (input_num != 1) {
      MS_LOG(ERROR) << "Input number is " << input_num << ", but LocalResponseNormGpuKernel needs 1 inputs.";
      return false;
      MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of inputs should be 1, but got " << input_num;
    }
    size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
    if (output_num != 1) {
      MS_LOG(ERROR) << "Output number is " << output_num << ", but LocalResponseNormGpuKernel needs 1 output.";
      return false;
      MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of outputs should be 1, but got " << output_num;
    }
    return true;
  }

  void SetCUDNNDescriptors(const std::vector<size_t> &shape, int lrnN, double lrnAlpha) {
@@ -248,6 +245,7 @@ class LocalResponseNormGpuKernel : public GpuKernel {
  size_t input_size_;
  size_t output_size_;
  bool is_null_input_;
  std::string kernel_name_;
  cudnnTensorDescriptor_t x_desc_;
  cudnnTensorDescriptor_t y_desc_;
  cudnnLRNDescriptor_t norm_desc_;
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/local_response_norm_grad_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/local_response_norm_grad_gpu_kernel.h
@@ -107,9 +107,8 @@ class LocalResponseNormGradGpuKernel : public GpuKernel {
  bool Init(const CNodePtr &kernel_node) override {
    kernel_node_ = kernel_node;
    MS_EXCEPTION_IF_NULL(kernel_node);
    if (!CheckParam(kernel_node)) {
      return false;
    }
    kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
    (void)CheckParam(kernel_node);

    depth_radius_ = static_cast<int>(GetAttr<int64_t>(kernel_node, "depth_radius"));
    bias_ = GetAttr<float>(kernel_node, "bias");
@@ -125,14 +124,14 @@ class LocalResponseNormGradGpuKernel : public GpuKernel {
    InitResource();

    auto input_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0);
    is_null_input_ = CHECK_NULL_INPUT(input_shape);
    is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name_, "input");
    if (is_null_input_) {
      MS_LOG(WARNING) << "LocalResponseNormGradGpuKernel input is null";
      InitSizeLists();
      return true;
    }
    if (input_shape.size() != 4) {
      MS_LOG(EXCEPTION) << "tensor shape is " << input_shape.size() << ", LocalResponseNormGradGpuKernel should be 4D";
      MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the dimension of input should be 4, but got "
                        << input_shape.size();
    }

    if (use_native_) {
@@ -159,6 +158,7 @@ class LocalResponseNormGradGpuKernel : public GpuKernel {
    input_size_ = 0;
    output_size_ = 0;
    is_null_input_ = false;
    kernel_name_ = "LocalResponseNormGrad";
    dy_desc_ = nullptr;
    x_desc_ = nullptr;
    y_desc_ = nullptr;
@@ -235,18 +235,15 @@ class LocalResponseNormGradGpuKernel : public GpuKernel {
  }

 private:
  bool CheckParam(const CNodePtr &kernel_node) {
  void CheckParam(const CNodePtr &kernel_node) {
    size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
    if (input_num != 3) {
      MS_LOG(ERROR) << "Input number is " << input_num << ", but LocalResponseNormGradGpuKernel needs 3 inputs.";
      return false;
      MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of inputs should be 3, but got " << input_num;
    }
    size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
    if (output_num != 1) {
      MS_LOG(ERROR) << "Output number is " << output_num << ", but LocalResponseNormGradGpuKernel needs 1 output.";
      return false;
      MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of outputs should be 1, but got " << output_num;
    }
    return true;
  }

  void SetCUDNNDescriptors(const std::vector<size_t> &shape, int lrnN, double lrnAlpha) {
@@ -282,6 +279,7 @@ class LocalResponseNormGradGpuKernel : public GpuKernel {
  size_t input_size_;
  size_t output_size_;
  bool is_null_input_;
  std::string kernel_name_;
  cudnnTensorDescriptor_t dy_desc_;
  cudnnTensorDescriptor_t x_desc_;
  cudnnTensorDescriptor_t y_desc_;
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/lstm_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/lstm_gpu_kernel.h
@@ -93,19 +93,19 @@ class LstmGpuKernel : public GpuKernel {
    return true;
  }
  bool Init(const CNodePtr &kernel_node) override {
    auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
    kernel_node_ = kernel_node;
    InitResource();
    cudnn_data_type_ = GetCudnnDataType(TypeIdLabel(AnfAlgo::GetInputDeviceDataType(kernel_node, 0)));
    auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
    is_null_input_ = CHECK_NULL_INPUT(input_shape);
    is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name, "input");
    if (is_null_input_) {
      MS_LOG(WARNING) << "For 'LstmGpuKernel', input is null.";
      InitSizeLists();
      return true;
    }
    if (input_shape.size() < 3) {
      MS_LOG(EXCEPTION) << "For 'LstmGpuKernel', the rank of input should be greater than or equal to 3, "
                        << "but got the rank of input: " << input_shape.size();
      MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the dimension of input cannot be less than 3, but got "
                        << input_shape.size();
    }
    seq_len_ = SizeToInt(input_shape[0]);
    batch_size_ = SizeToInt(input_shape[1]);
@@ -155,22 +155,22 @@ class LstmGpuKernel : public GpuKernel {
                                "set rnn_desc failed");
 #endif
    auto weight_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 3);
    is_null_input_ = CHECK_NULL_INPUT(weight_shape);
    is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name, "weight");
    if (is_null_input_) {
      MS_LOG(WARNING) << "For 'LstmGpuKernel', weight is null.";
      InitSizeLists();
      return true;
    }
    if (weight_shape.size() < 3) {
      MS_LOG(EXCEPTION) << "For 'LstmGpuKernel', the rank of weight should be greater than or equal to 3, "
                        << "but got the rank of weight: " << weight_shape.size();
      MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the dimension of weight cannot be less than 3, but got "
                        << weight_shape.size();
    }
    size_t weight_size = weight_shape[0] * weight_shape[1] * weight_shape[2] * sizeof(T);
    CHECK_CUDNN_RET_WITH_EXCEPT(kernel_node_,
                                cudnnGetRNNParamsSize(handle_, rnn_desc_, x_desc_[0], &weight_size_, cudnn_data_type_),
                                "get weight_size_ failed");
    if (weight_size != weight_size_) {
      MS_LOG(EXCEPTION) << "weight size: " << weight_size << " error, expect: " << weight_size_ << " .";
      MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the size of weight should be equal to " << weight_size_
                        << " but got " << weight_size;
    }
    int w_dims[3] = {SizeToInt(weight_size_ / sizeof(T)), 1, 1};
    CHECK_CUDNN_RET_WITH_EXCEPT(kernel_node_,
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/lstm_grad_data_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/lstm_grad_data_gpu_kernel.h
@@ -109,19 +109,19 @@ class LstmGradDataGpuKernel : public GpuKernel {
    dropout_ = GetAttr<float>(kernel_node, "dropout");
  }
  bool Init(const CNodePtr &kernel_node) override {
    auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
    kernel_node_ = kernel_node;
    InitResource();
    cudnn_data_type_ = GetCudnnDataType(TypeIdLabel(AnfAlgo::GetInputDeviceDataType(kernel_node, 0)));
    auto input_shape = AnfAlgo::GetOutputInferShape(kernel_node, 0);
    is_null_input_ = CHECK_NULL_INPUT(input_shape);
    is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name, "input");
    if (is_null_input_) {
      MS_LOG(WARNING) << "For 'LstmGradDataGpuKernel', input is null.";
      InitSizeLists();
      return true;
    }
    if (input_shape.size() < 2) {
      MS_LOG(EXCEPTION) << "For 'LstmGradDataGpuKernel', the rank of input should be greater than or equal to 2, "
                        << "but got the rank of input: " << input_shape.size();
      MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the dimension of input cannot be less than 2, but got "
                        << input_shape.size();
    }
    seq_len_ = SizeToInt(input_shape[0]);
    batch_size_ = SizeToInt(input_shape[1]);
@@ -169,22 +169,22 @@ class LstmGradDataGpuKernel : public GpuKernel {
                                "set rnn_desc failed");
 #endif
    auto weight_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 4);
    is_null_input_ = CHECK_NULL_INPUT(input_shape);
    is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name, "weight");
    if (is_null_input_) {
      MS_LOG(WARNING) << "For 'LstmGradDataGpuKernel', weight is null.";
      InitSizeLists();
      return true;
    }
    if (weight_shape.size() < 3) {
      MS_LOG(EXCEPTION) << "For 'LstmGradDataGpuKernel', the rank of weight should be greater than or equal to 3, "
                        << "but got the rank of weight: " << weight_shape.size();
      MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the dimension of weight cannot be less than 3, but got "
                        << weight_shape.size();
    }
    size_t weight_size = weight_shape[0] * weight_shape[1] * weight_shape[2] * sizeof(T);
    CHECK_CUDNN_RET_WITH_EXCEPT(kernel_node_,
                                cudnnGetRNNParamsSize(handle_, rnn_desc_, dx_desc_[0], &weight_size_, cudnn_data_type_),
                                "get weight_size_ failed");
    if (weight_size != weight_size_) {
      MS_LOG(EXCEPTION) << "weight size: " << weight_size << " error, expect: " << weight_size_ << " .";
      MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the size of weight should be equal to " << weight_size_
                        << " but got " << weight_size;
    }
    int w_dims[3] = {SizeToInt(weight_size_ / sizeof(T)), 1, 1};
    CHECK_CUDNN_RET_WITH_EXCEPT(kernel_node_,
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/lstm_grad_weight_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/lstm_grad_weight_gpu_kernel.h
@@ -89,19 +89,19 @@ class LstmGradWeightGpuKernel : public GpuKernel {
    return true;
  }
  bool Init(const CNodePtr &kernel_node) override {
    auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
    kernel_node_ = kernel_node;
    InitResource();
    cudnn_data_type_ = GetCudnnDataType(TypeIdLabel(AnfAlgo::GetInputDeviceDataType(kernel_node, 0)));
    auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
    is_null_input_ = CHECK_NULL_INPUT(input_shape);
    is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name, "input");
    if (is_null_input_) {
      MS_LOG(WARNING) << "For 'LstmGradWeightGpuKernel', input is null.";
      InitSizeLists();
      return true;
    }
    if (input_shape.size() < 2) {
      MS_LOG(EXCEPTION) << "For 'LstmGradWeightGpuKernel', the rank of input should be greater than or equal to 2, "
                        << "but got the rank of input: " << input_shape.size();
      MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the dimension of input cannot be less than 2, but got "
                        << input_shape.size();
    }
    seq_len_ = SizeToInt(input_shape[0]);
    batch_size_ = SizeToInt(input_shape[1]);
@@ -143,15 +143,14 @@ class LstmGradWeightGpuKernel : public GpuKernel {
                                "set rnn_desc failed");
 #endif
    auto weight_shape = AnfAlgo::GetOutputInferShape(kernel_node, 0);
    is_null_input_ = CHECK_NULL_INPUT(weight_shape);
    is_null_input_ = CHECK_SHAPE_NULL(weight_shape, kernel_name, "weight");
    if (is_null_input_) {
      MS_LOG(WARNING) << "For 'LstmGradWeightGpuKernel', weight is null.";
      InitSizeLists();
      return true;
    }
    if (weight_shape.size() < 3) {
      MS_LOG(EXCEPTION) << "For 'LstmGradWeightGpuKernel', the rank of weight should be greater than or equal to 3, "
                        << "but got the rank of weight: " << weight_shape.size();
      MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the dimension of weight cannot be less than 3, but got "
                        << weight_shape.size();
    }
    size_t weight_size = weight_shape[0] * weight_shape[1] * weight_shape[2] * sizeof(T);

@@ -159,7 +158,8 @@ class LstmGradWeightGpuKernel : public GpuKernel {
                                cudnnGetRNNParamsSize(handle_, rnn_desc_, x_desc_[0], &weight_size_, cudnn_data_type_),
                                "get weight_size_ failed");
    if (weight_size != weight_size_) {
      MS_LOG(EXCEPTION) << "weight size: " << weight_size << " error, expect: " << weight_size_ << " .";
      MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the size of weight should be equal to " << weight_size_
                        << " but got " << weight_size;
    }
    int w_dims[3] = {SizeToInt(weight_size_ / sizeof(T)), 1, 1};
    CHECK_CUDNN_RET_WITH_EXCEPT(kernel_node_,
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/maxpool_with_argmax_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/maxpool_with_argmax_gpu_kernel.h
@@ -68,21 +68,20 @@ class MaxPoolWithArgmaxGpuFwdKernel : public GpuKernel {
  }

  bool Init(const CNodePtr &kernel_node) {
    auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
    size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
    if (input_num != 1) {
      MS_LOG(ERROR) << "Input number is " << input_num << ", but MaxPoolWithArgmax needs 1 inputs.";
      return false;
      MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of inputs should be 1, but got " << input_num;
    }
    size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
    if (output_num != 2) {
      MS_LOG(ERROR) << "Output number is " << output_num << ", but MaxPoolWithArgmax needs 2 output.";
      return false;
      MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of outputs should be 2, but got " << output_num;
    }
    auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
    auto output_shape = AnfAlgo::GetOutputInferShape(kernel_node, 0);
    is_null_input_ = CHECK_NULL_INPUT(input_shape) || CHECK_NULL_INPUT(output_shape);
    is_null_input_ =
      CHECK_SHAPE_NULL(input_shape, kernel_name, "input") || CHECK_SHAPE_NULL(output_shape, kernel_name, "output");
    if (is_null_input_) {
      MS_LOG(WARNING) << "For 'MaxPoolWithArgmaxGpuKernel', input or output is null.";
      InitSizeLists();
      return true;
    }
@@ -95,9 +94,9 @@ class MaxPoolWithArgmaxGpuFwdKernel : public GpuKernel {
      output_size_ *= x;
    }
    if (input_shape.size() < 4 || output_shape.size() < 4) {
      MS_LOG(EXCEPTION) << "For 'MaxPoolWithArgmaxGpuKernel', the rank of input or output should be greater than "
                        << "or equal to 4, but got the rank of input: " << input_shape.size()
                        << ", the rank of output: " << output_shape.size();
      MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the dimension of input and output cannot be less than 4, but "
                        << "got the dimension of input: " << input_shape.size()
                        << ", the dimension of output: " << output_shape.size();
    }
    n_ = SizeToInt(input_shape[0]);
    c_ = SizeToInt(input_shape[1]);
@@ -112,8 +111,8 @@ class MaxPoolWithArgmaxGpuFwdKernel : public GpuKernel {
    (void)std::transform(window_me.begin(), window_me.end(), std::back_inserter(window),
                         [](const int64_t &value) { return static_cast<int>(value); });
    if (window.size() < 3) {
      MS_LOG(EXCEPTION) << "For 'MaxPoolWithArgmaxGpuKernel', the rank of window should be greater than "
                        << "or equal to 3, but got the rank of window: " << window.size();
      MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the length of 'kernel_size' cannot be less than 3, but got "
                        << window.size();
    }
    window_height_ = window[1];
    window_width_ = window[2];
@@ -122,8 +121,8 @@ class MaxPoolWithArgmaxGpuFwdKernel : public GpuKernel {
    (void)std::transform(stride_me.begin(), stride_me.end(), std::back_inserter(stride),
                         [](const int64_t &value) { return static_cast<int>(value); });
    if (stride.size() < 3) {
      MS_LOG(EXCEPTION) << "For 'MaxPoolWithArgmaxGpuKernel', the rank of stride should be greater than "
                        << "or equal to 3, but got the rank of stride: " << stride.size();
      MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the length of 'strides' cannot be less than 3, but got "
                        << stride.size();
    }
    stride_height_ = stride[1];
    stride_width_ = stride[2];
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/maxpool_with_argmax_grad_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/maxpool_with_argmax_grad_gpu_kernel.h
@@ -61,24 +61,23 @@ class MaxPoolWithArgmaxGradGpuKernel : public GpuKernel {
  }

  bool Init(const CNodePtr &kernel_node) {
    auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
    size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
    if (input_num != 3) {
      MS_LOG(ERROR) << "Input number is " << input_num << ", but MaxPoolGradWithArgmax needs 3 inputs.";
      return false;
      MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of inputs should be 3, but got " << input_num;
    }
    size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
    if (output_num != 1) {
      MS_LOG(ERROR) << "Output number is " << output_num << ", but MaxPoolGradWithArgmax needs 1 output.";
      return false;
      MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of outputs should be 1, but got " << output_num;
    }
    auto x_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
    auto dy_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);
    auto index_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 2);
    auto dx_shape = AnfAlgo::GetOutputInferShape(kernel_node, 0);
    is_null_input_ = CHECK_NULL_INPUT(x_shape) || CHECK_NULL_INPUT(dy_shape) || CHECK_NULL_INPUT(index_shape) ||
                     CHECK_NULL_INPUT(dx_shape);
    is_null_input_ = CHECK_SHAPE_NULL(x_shape, kernel_name, "x") || CHECK_SHAPE_NULL(dy_shape, kernel_name, "dy") ||
                     CHECK_SHAPE_NULL(index_shape, kernel_name, "index") ||
                     CHECK_SHAPE_NULL(dx_shape, kernel_name, "dx");
    if (is_null_input_) {
      MS_LOG(WARNING) << "For 'MaxPoolWithArgmaxGradGpuKernel', input or output is null.";
      InitSizeLists();
      return true;
    }
@@ -99,9 +98,8 @@ class MaxPoolWithArgmaxGradGpuKernel : public GpuKernel {
      dx_size_ *= x;
    }
    if (x_shape.size() < 4 || dy_shape.size() < 4) {
      MS_LOG(EXCEPTION) << "For 'MaxPoolWithArgmaxGradGpuKernel', the rank of x or dy should be greater than "
                        << "or equal to 4, but got the rank of x: " << x_shape.size()
                        << ", the rank of dy: " << dy_shape.size();
      MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the dimension of x and dy cannot be less than 4, but got "
                        << "the dimension of x: " << x_shape.size() << ", the dimension of dy: " << dy_shape.size();
    }
    n_ = SizeToInt(x_shape[0]);
    c_ = SizeToInt(x_shape[1]);
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/mirror_pad_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/mirror_pad_gpu_kernel.h
@@ -62,15 +62,14 @@ class MirrorPadGpuFwdKernel : public GpuKernel {
  }

  bool Init(const CNodePtr &kernel_node) override {
    auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
    size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
    if (input_num != 2) {
      MS_LOG(ERROR) << "Input number is " << input_num << ", but MirrorPad needs 2 input.";
      return false;
      MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of inputs should be 2, but got " << input_num;
    }
    size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
    if (output_num != 1) {
      MS_LOG(ERROR) << "Output number is " << output_num << ", but Pad needs 1 output.";
      return false;
      MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of outputs should be 1, but got " << output_num;
    }
    auto prim = AnfAlgo::GetCNodePrimitive(kernel_node);
    MS_EXCEPTION_IF_NULL(prim);
@@ -84,9 +83,10 @@ class MirrorPadGpuFwdKernel : public GpuKernel {
    auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
    auto padding_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);
    auto output_shape = AnfAlgo::GetOutputInferShape(kernel_node, 0);
    is_null_input_ = CHECK_NULL_INPUT(input_shape) || CHECK_NULL_INPUT(padding_shape) || CHECK_NULL_INPUT(output_shape);
    is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name, "input_x") ||
                     CHECK_SHAPE_NULL(padding_shape, kernel_name, "paddings") ||
                     CHECK_SHAPE_NULL(output_shape, kernel_name, "output");
    if (is_null_input_) {
      MS_LOG(WARNING) << "For 'MirrorPadGpuKernel', input or output is null.";
      InitSizeLists();
      return true;
    }
@@ -100,8 +100,8 @@ class MirrorPadGpuFwdKernel : public GpuKernel {
      (void)input_shape.insert(it, 2, 1);  // channel padding
    }
    if (input_shape.size() < 4) {
      MS_LOG(EXCEPTION) << "For 'MirrorPadGpuKernel', the rank of input should be greater than or equal to 4, "
                        << "but got the rank of input: " << input_shape.size();
      MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the dimension of input_x cannot be less than 4, but "
                        << "got the " << input_shape.size();
    }

    for (auto in_shape : input_shape) {
@@ -117,8 +117,8 @@ class MirrorPadGpuFwdKernel : public GpuKernel {
    output_size_ = sizeof(T);

    if (output_shape.size() < 2) {
      MS_LOG(EXCEPTION) << "For 'MirrorPadGpuKernel', the rank of output should be greater than or equal to 2, "
                        << "but got the rank of output: " << output_shape.size();
      MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the dimension of output cannot be less than 2, but "
                        << "got the " << output_shape.size();
    }
    for (auto x : output_shape) {
      output_size_ *= x;
@@ -137,8 +137,9 @@ class MirrorPadGpuFwdKernel : public GpuKernel {
    }
    if (output_shape_[(output_shape_.size() - 2) + 0] > max_width ||
        output_shape_[(output_shape_.size() - 2) + 1] > max_width) {
      MS_LOG(ERROR) << "ERROR: Padding value too high for input Tensor on 1 or more dims";
      return false;
      MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the output.shape[-1] and output.shape[-2] cannot be greater "
                        << "than input_x.shape[-1], but got output.shape: " << CONVERT_VECTOR_TO_STRING(output_shape_)
                        << ", input_x.shape: " << CONVERT_VECTOR_TO_STRING(input_shape_);
    }
    InitSizeLists();
    return true;
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/mirror_pad_grad_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/mirror_pad_grad_gpu_kernel.h
@@ -34,6 +34,7 @@ class MirrorPadGpuBackKernel : public GpuKernel {
        num_paddings_(0),
        mode_(0),
        is_null_input_(false),
        kernel_name_("MirrorPadGrad"),
        input_size_(1),
        output_size_(1),
        workspace_size_(0) {}
@@ -62,16 +63,8 @@ class MirrorPadGpuBackKernel : public GpuKernel {
  }

  bool Init(const CNodePtr &kernel_node) override {
    size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
    if (input_num != 2) {
      MS_LOG(ERROR) << "Input number is " << input_num << ", but MirrorPadGrad needs 2 input.";
      return false;
    }
    size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
    if (output_num != 1) {
      MS_LOG(ERROR) << "Output number is " << output_num << ", but MirrorPadGrad needs 1 output.";
      return false;
    }
    kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
    (void)CheckParam(kernel_node);
    auto prim = AnfAlgo::GetCNodePrimitive(kernel_node);
    MS_EXCEPTION_IF_NULL(prim);
    string mode = GetValue<string>(prim->GetAttr("mode"));
@@ -84,9 +77,10 @@ class MirrorPadGpuBackKernel : public GpuKernel {
    auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
    auto padding_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);
    auto output_shape = AnfAlgo::GetOutputInferShape(kernel_node, 0);
    is_null_input_ = CHECK_NULL_INPUT(input_shape) || CHECK_NULL_INPUT(padding_shape) || CHECK_NULL_INPUT(output_shape);
    is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name_, "input_x") ||
                     CHECK_SHAPE_NULL(padding_shape, kernel_name_, "paddings") ||
                     CHECK_SHAPE_NULL(output_shape, kernel_name_, "output");
    if (is_null_input_) {
      MS_LOG(WARNING) << "For 'MirrorPadGradGpuKernel', input or output is null.";
      InitSizeLists();
      return true;
    }
@@ -99,8 +93,8 @@ class MirrorPadGpuBackKernel : public GpuKernel {
      (void)input_shape.insert(it, 2, 1);  // channel padding
    }
    if (input_shape.size() < 4) {
      MS_LOG(EXCEPTION) << "For 'MirrorPadGradGpuKernel', the rank of input should be greater than or equal to 4, "
                        << "but got the rank of input: " << input_shape.size();
      MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the dimension of input_x cannot be less than 4, but "
                        << "got the " << input_shape.size();
    }
    input_size_ = sizeof(T);
    for (auto in_shape : input_shape) {
@@ -123,8 +117,8 @@ class MirrorPadGpuBackKernel : public GpuKernel {
      (void)output_shape.insert(it, 2, 1);  // channel padding
    }
    if (output_shape.size() < 2) {
      MS_LOG(EXCEPTION) << "For 'MirrorPadGradGpuKernel', the rank of output should be greater than or equal to 2, "
                        << "but got the rank of output: " << output_shape.size();
      MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the dimension of output cannot be less than 2, but "
                        << "got the " << output_shape.size();
    }
    output_size_ = sizeof(T);
    for (auto x : output_shape) {
@@ -152,8 +146,9 @@ class MirrorPadGpuBackKernel : public GpuKernel {
    }
    if (output_shape_[(output_shape_.size() - 2) + 0] > max_width ||
        output_shape_[(output_shape_.size() - 2) + 1] > max_width) {
      MS_LOG(ERROR) << "ERROR: Padding value too high for input Tensor on 1 or more DIMS";
      return false;
      MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the output.shape[-1] and output.shape[-2] cannot be greater "
                        << "than input_x.shape[-1], but got output.shape: " << CONVERT_VECTOR_TO_STRING(output_shape_)
                        << ", input_x.shape: " << CONVERT_VECTOR_TO_STRING(input_shape_);
    }
    InitSizeLists();
    return true;
@@ -168,10 +163,22 @@ class MirrorPadGpuBackKernel : public GpuKernel {
  }

 private:
  void CheckParam(const CNodePtr &kernel_node) {
    size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
    if (input_num != 2) {
      MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of inputs should be 2, but got " << input_num;
    }
    size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
    if (output_num != 1) {
      MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of outputs should be 1, but got " << output_num;
    }
  }

  size_t num_input_;
  int num_paddings_;
  int mode_;
  bool is_null_input_;
  std::string kernel_name_;
  std::vector<int> input_shape_;
  std::vector<int> output_shape_;
  size_t input_size_;
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/momentum_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/momentum_gpu_kernel.h
@@ -55,10 +55,11 @@ class MomentumGpuKernel : public GpuKernel {
    return true;
  }
  bool Init(const CNodePtr &kernel_node) override {
    auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
    size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
    if (input_num != INPUT_NUM) {
      MS_LOG(ERROR) << "Input number is " << input_num << ", but momentum needs " << INPUT_NUM << " inputs.";
      return false;
      MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of inputs should be " << INPUT_NUM << ", but got "
                        << input_num;
    }
    use_nesterov_ = GetAttr<bool>(kernel_node, "use_nesterov");

@@ -71,10 +72,10 @@ class MomentumGpuKernel : public GpuKernel {
    auto variable_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
    auto accumulation_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);
    auto gradient_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 3);
    is_null_input_ =
      CHECK_NULL_INPUT(variable_shape) || CHECK_NULL_INPUT(accumulation_shape) || CHECK_NULL_INPUT(gradient_shape);
    is_null_input_ = CHECK_SHAPE_NULL(variable_shape, kernel_name, "variable") ||
                     CHECK_SHAPE_NULL(accumulation_shape, kernel_name, "accumulation") ||
                     CHECK_SHAPE_NULL(gradient_shape, kernel_name, "gradient");
    if (is_null_input_) {
      MS_LOG(WARNING) << "For 'MomentumGpuKernel', input is null.";
      InitSizeLists();
      return true;
    }
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/nll_loss_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/nll_loss_gpu_kernel.h
@@ -59,16 +59,16 @@ class NLLLossGpuKernel : public GpuKernel {
  }

  bool Init(const CNodePtr &kernel_node) override {
    auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
    std::vector<size_t> input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
    is_null_input_ = CHECK_NULL_INPUT(input_shape);
    is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name, "logits");
    if (is_null_input_) {
      MS_LOG(WARNING) << "For 'NllLossGpuKernel', input is null";
      InitSizeLists();
      return true;
    }
    if (input_shape.size() < 2) {
      MS_LOG(EXCEPTION) << "For 'NllLossGpuKernel', the rank of input cannot less than 2, but got "
                        << input_shape.size();
      MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the dimension of logits cannot be less than 2, but "
                        << "got the " << input_shape.size();
    }
    n_ = static_cast<int>(input_shape[0]);
    c_ = static_cast<int>(input_shape[1]);
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/nll_loss_grad_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/nll_loss_grad_gpu_kernel.h
@@ -56,16 +56,16 @@ class NLLLossGradGpuKernel : public GpuKernel {
  }

  bool Init(const CNodePtr &kernel_node) override {
    auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
    std::vector<size_t> input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
    is_null_input_ = CHECK_NULL_INPUT(input_shape);
    is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name, "logits");
    if (is_null_input_) {
      MS_LOG(WARNING) << "For 'NllLossGradGpuKernel', input is null";
      InitSizeLists();
      return true;
    }
    if (input_shape.size() < 2) {
      MS_LOG(EXCEPTION) << "For 'NllLossGradGpuKernel', the rank of input cannot less than 2, but got "
                        << input_shape.size();
      MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the dimension of logits cannot be less than 2, but "
                        << "got the " << input_shape.size();
    }
    n_ = static_cast<int>(input_shape[0]);
    c_ = static_cast<int>(input_shape[1]);
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/pad_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/pad_gpu_kernel.h
@@ -19,6 +19,7 @@

 #include <iostream>
 #include <vector>
 #include <string>
 #include <algorithm>

 #include "backend/kernel_compiler/gpu/cuda_impl/pad_impl.cuh"
@@ -75,15 +76,14 @@ class PadGpuFwdKernel : public GpuKernel {
  }

  bool Init(const CNodePtr &kernel_node) override {
    if (!CheckIONumber(kernel_node)) {
      return false;
    }
    kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
    (void)CheckIONumber(kernel_node);

    input_shape_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
    std::vector<size_t> output_shape = AnfAlgo::GetOutputDeviceShape(kernel_node, 0);
    is_null_input_ = CHECK_NULL_INPUT(input_shape_) || CHECK_NULL_INPUT(output_shape);
    is_null_input_ =
      CHECK_SHAPE_NULL(input_shape_, kernel_name_, "input") || CHECK_SHAPE_NULL(output_shape, kernel_name_, "output");
    if (is_null_input_) {
      MS_LOG(WARNING) << "For 'PadGpuKernel', input or output is null.";
      InitSizeLists();
      return true;
    }
@@ -93,12 +93,15 @@ class PadGpuFwdKernel : public GpuKernel {
    MS_EXCEPTION_IF_NULL(prim);
    std::vector<std::vector<int64_t>> paddings = GetValue<std::vector<std::vector<int64_t>>>(prim->GetAttr("paddings"));
    if (paddings.size() != input_rank_) {
      MS_LOG(EXCEPTION) << "PadGpuFwdKernel: paddings' size must be equal to the rank of the input.";
      MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the length of 'paddings' should be equal to the dimension of "
                        << "input, but got the length of 'paddings': " << paddings.size()
                        << " the dimension of input: " << input_rank_;
    }

    for (size_t i = 0; i < paddings.size(); i++) {
      if (paddings[i].size() != 2) {
        MS_LOG(EXCEPTION) << "PadGpuFwdKernel: each element in paddings must have size 2.";
        MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the size of element of 'paddings' should be equal to 2, "
                          << "but got the size of paddings[" << i << "]: " << paddings[i].size();
      }
      flattened_paddings_.push_back(paddings[i][0]);
      flattened_paddings_.push_back(paddings[i][1]);
@@ -111,14 +114,14 @@ class PadGpuFwdKernel : public GpuKernel {
      output_size_ *= (input_shape_[i] + flattened_paddings_[2 * i] + flattened_paddings_[(2 * i) + 1]);
    }

    if (input_rank_ < 1) {
      MS_LOG(EXCEPTION) << "For 'PadGpuKernel', the rank of input should be greater than or equal to 1, "
                        << "but got the rank of input: " << input_rank_;
    if (input_rank_ == 0) {
      MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the dimension of input cannot be equal to 0, but "
                        << "got the " << input_rank_;
    }
    if (output_shape.size() != input_rank_) {
      MS_LOG(EXCEPTION) << "For 'PadGpuKernel', the rank of input should be equal to the rank of output, "
                        << "but got the rank of input: " << input_rank_
                        << ", the rank of output: " << output_shape.size();
      MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the dimension of input and output should be the same, but "
                        << "got the dimension of input: " << input_rank_
                        << ", the dimension of output: " << output_shape.size();
    }
    strides_.resize(input_rank_);
    strides_[input_rank_ - 1] = 1;
@@ -136,6 +139,7 @@ class PadGpuFwdKernel : public GpuKernel {
    output_size_ = 0;
    workspace_size_ = 0;
    is_null_input_ = false;
    kernel_name_ = "Pad";
    flattened_paddings_.clear();
    input_shape_.clear();
    strides_.clear();
@@ -154,18 +158,15 @@ class PadGpuFwdKernel : public GpuKernel {
  }

 private:
  bool CheckIONumber(const CNodePtr &kernel_node) {
  void CheckIONumber(const CNodePtr &kernel_node) {
    size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
    if (input_num != 1) {
      MS_LOG(ERROR) << "Input number is " << input_num << ", but Pad needs 1 input.";
      return false;
      MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of inputs should be 1, but got " << input_num;
    }
    size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
    if (output_num != 1) {
      MS_LOG(ERROR) << "Output number is " << output_num << ", but Pad needs 1 output.";
      return false;
      MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of outputs should be 1, but got " << output_num;
    }
    return true;
  }

  size_t input_rank_;
@@ -178,6 +179,7 @@ class PadGpuFwdKernel : public GpuKernel {
  size_t output_size_;
  size_t workspace_size_;
  bool is_null_input_;
  std::string kernel_name_;
  std::vector<size_t> input_size_list_;
  std::vector<size_t> output_size_list_;
  std::vector<size_t> workspace_size_list_;
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/pooling_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/pooling_gpu_kernel.h
@@ -51,6 +51,7 @@ class PoolingGpuFwdKernel : public GpuKernel {
        c_(0),
        pad_value_(0),
        is_null_input_(false),
        kernel_name_("Pooling"),
        input_size_(0),
        output_size_(0),
        workspace_size_(0) {}
@@ -76,11 +77,10 @@ class PoolingGpuFwdKernel : public GpuKernel {
    return true;
  }
  bool Init(const CNodePtr &kernel_node) {
    kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
    kernel_node_ = kernel_node;
    InitResource();
    if (!CheckParam(kernel_node)) {
      return false;
    }
    (void)CheckParam(kernel_node);
    cudnn_data_type_ = GetCudnnDataType(TypeIdLabel(AnfAlgo::GetInputDeviceDataType(kernel_node, 0)));
    data_format_ = AnfAlgo::GetInputFormat(kernel_node, 0);
    auto format_attr = GetAttr<std::string>(kernel_node, "format");
@@ -89,9 +89,9 @@ class PoolingGpuFwdKernel : public GpuKernel {
    }
    auto input_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0);
    auto output_shape = AnfAlgo::GetOutputDeviceShape(kernel_node, 0);
    is_null_input_ = CHECK_NULL_INPUT(input_shape) || CHECK_NULL_INPUT(output_shape);
    is_null_input_ =
      CHECK_SHAPE_NULL(input_shape, kernel_name_, "input") || CHECK_SHAPE_NULL(output_shape, kernel_name_, "output");
    if (is_null_input_) {
      MS_LOG(WARNING) << "PoolingGpuFwdKernel input or output is null.";
      InitSizeLists();
      return true;
    }
@@ -160,13 +160,11 @@ class PoolingGpuFwdKernel : public GpuKernel {
  }

 private:
  bool CheckParam(const CNodePtr &kernel_node) {
  void CheckParam(const CNodePtr &kernel_node) {
    size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
    if (input_num != 1) {
      MS_LOG(ERROR) << "Input number is " << input_num << ", but pooling needs 1 inputs.";
      return false;
      MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of inputs should be 1, but got " << input_num;
    }
    return true;
  }

  void SetPoolingMode(const CNodePtr &kernel_node) {
@@ -188,8 +186,8 @@ class PoolingGpuFwdKernel : public GpuKernel {
    (void)std::transform(window_me.begin(), window_me.end(), std::back_inserter(window),
                         [](const int64_t &value) { return static_cast<int>(value); });
    if (window.size() < 4) {
      MS_LOG(EXCEPTION) << "For 'PoolingGpuKernel', the rank of window should be greater than or equal to 4 for 2D, "
                        << "but got the rank of window: " << window.size();
      MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the length of 'kernel_size' cannot be less than 4, but got "
                        << window.size();
    }
    int window_height = window[2];
    int window_width = window[3];
@@ -199,8 +197,8 @@ class PoolingGpuFwdKernel : public GpuKernel {
    int windowDimA[2] = {window_height, window_width};
    int paddingA[2] = {0, 0};
    if (stride_.size() < 4) {
      MS_LOG(EXCEPTION) << "For 'PoolingGpuKernel', the rank of stride_ should be greater than or equal to 4 for 2D, "
                        << "but got the rank of stride_: " << stride_.size();
      MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the length of 'strides' cannot be less than 4, but got "
                        << stride_.size();
    }
    int strideA[2] = {stride_[2], stride_[3]};
    int stride_h = stride_[2];
@@ -231,8 +229,8 @@ class PoolingGpuFwdKernel : public GpuKernel {
    (void)std::transform(window_me.begin(), window_me.end(), std::back_inserter(window),
                         [](const int64_t &value) { return static_cast<int>(value); });
    if (window.size() < 5) {
      MS_LOG(EXCEPTION) << "For 'PoolingGpuKernel', the rank of window should be greater than or equal to 5 for 3D, "
                        << "but got the rank of window: " << window.size();
      MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the length of 'kernel_size' cannot be less than 5, but got "
                        << window.size();
    }
    int window_depth = window[2];
    int window_height = window[3];
@@ -243,8 +241,8 @@ class PoolingGpuFwdKernel : public GpuKernel {
    int windowDimA[3] = {window_depth, window_height, window_width};
    int paddingA[3] = {0, 0, 0};
    if (stride_.size() < 5) {
      MS_LOG(EXCEPTION) << "For 'PoolingGpuKernel', the rank of stride_ should be greater than or equal to 5 for 3D, "
                        << "but got the rank of stride_: " << stride_.size();
      MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the length of 'strides' cannot be less than 5, but got "
                        << stride_.size();
    }
    int strideA[3] = {stride_[2], stride_[3], stride_[4]};
    int stride_d = stride_[2];
@@ -298,6 +296,7 @@ class PoolingGpuFwdKernel : public GpuKernel {
  int c_;
  float pad_value_;
  bool is_null_input_;
  std::string kernel_name_;
  size_t input_size_;
  size_t output_size_;
  size_t workspace_size_;
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/pooling_grad_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/pooling_grad_gpu_kernel.h
@@ -54,6 +54,7 @@ class PoolingGradGpuKernel : public GpuKernel {
        c_(0),
        pad_value_(0),
        is_null_input_(false),
        kernel_name_("PoolingGrad"),
        input_size_(0),
        output_size_(0),
        workspace_size_(0) {}
@@ -94,10 +95,10 @@ class PoolingGradGpuKernel : public GpuKernel {
      data_format = format_attr_;
    }
    cudnn_data_type_ = GetCudnnDataType(TypeIdLabel(AnfAlgo::GetInputDeviceDataType(kernel_node, 0)));
    is_null_input_ = CHECK_NULL_INPUT(input_shape) || CHECK_NULL_INPUT(input_mask) || CHECK_NULL_INPUT(dout_shape) ||
                     CHECK_NULL_INPUT(output_shape);
    is_null_input_ =
      CHECK_SHAPE_NULL(input_shape, kernel_name_, "input") || CHECK_SHAPE_NULL(input_mask, kernel_name_, "mask") ||
      CHECK_SHAPE_NULL(dout_shape, kernel_name_, "dout") || CHECK_SHAPE_NULL(output_shape, kernel_name_, "output");
    if (is_null_input_) {
      MS_LOG(WARNING) << "For 'PoolingGradGpuKernel', input or output is null.";
      InitSizeLists();
      return true;
    }
@@ -119,11 +120,10 @@ class PoolingGradGpuKernel : public GpuKernel {
  }

  bool Init(const CNodePtr &kernel_node) override {
    kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
    kernel_node_ = kernel_node;
    InitResource();
    if (!CheckParam(kernel_node)) {
      return false;
    }
    (void)CheckParam(kernel_node);
    auto input_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0);
    int nbDims = SizeToInt(input_shape.size());

@@ -210,14 +210,12 @@ class PoolingGradGpuKernel : public GpuKernel {
  }

 private:
  bool CheckParam(const CNodePtr &kernel_node) {
  void CheckParam(const CNodePtr &kernel_node) {
    size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
    if (input_num != INPUT_NUM) {
      MS_LOG(ERROR) << "Input number is " << input_num << ", but PoolingGradGpuKernel needs " << INPUT_NUM
                    << " inputs.";
      return false;
      MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of inputs should be " << INPUT_NUM << ", but got "
                        << input_num;
    }
    return true;
  }
  void SetPad(const CNodePtr &kernel_node) {
    pad_mode_ = GetAttr<std::string>(kernel_node, "pad_mode");
@@ -229,14 +227,14 @@ class PoolingGradGpuKernel : public GpuKernel {
    (void)std::transform(window_me.begin(), window_me.end(), std::back_inserter(window),
                         [](const int64_t &value) { return static_cast<int>(value); });
    if (window.size() < 4) {
      MS_LOG(EXCEPTION) << "For 'PoolingGradGpuKernel', the rank of window should be greater than or equal to 4 "
                        << "for 2D, but got the rank of window: " << window.size();
      MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the length of 'kernel_size' cannot be less than 4, but got "
                        << window.size();
    }
    int window_height = window[2];
    int window_width = window[3];
    if (stride_.size() < 4) {
      MS_LOG(EXCEPTION) << "For 'PoolingGradGpuKernel', the rank of stride_ should be greater than or equal to 4 "
                        << "for 2D, but got the rank of stride_: " << stride_.size();
      MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the length of 'strides' cannot be less than 4, but got "
                        << stride_.size();
    }
    int stride_h = stride_[2];
    int stride_w = stride_[3];
@@ -278,15 +276,15 @@ class PoolingGradGpuKernel : public GpuKernel {
    (void)std::transform(window_me.begin(), window_me.end(), std::back_inserter(window),
                         [](const int64_t &value) { return static_cast<int>(value); });
    if (window.size() < 5) {
      MS_LOG(EXCEPTION) << "For 'PoolingGradGpuKernel', the rank of window should be greater than or equal to 5 "
                        << "for 3D, but got the rank of window: " << window.size();
      MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the length of 'kernel_size' cannot be less than 5, but got "
                        << window.size();
    }
    int window_depth = window[2];
    int window_height = window[3];
    int window_width = window[4];
    if (stride_.size() < 5) {
      MS_LOG(EXCEPTION) << "For 'PoolingGradGpuKernel', the rank of stride_ should be greater than or equal to 5 "
                        << "for 3D, but got the rank of stride_: " << stride_.size();
      MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the length of 'strides' cannot be less than 5, but got "
                        << stride_.size();
    }
    int stride_d = stride_[2];
    int stride_h = stride_[3];
@@ -366,6 +364,7 @@ class PoolingGradGpuKernel : public GpuKernel {
  int c_;
  float pad_value_;
  bool is_null_input_;
  std::string kernel_name_;
  size_t input_size_;
  size_t output_size_;
  size_t workspace_size_;
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/prelu_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/prelu_gpu_kernel.h
@@ -51,23 +51,22 @@ class PReLUGpuKernel : public GpuKernel {
  }

  bool Init(const CNodePtr &kernel_node) override {
    auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
    ResetResource();
    size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
    if (input_num != 2) {
      MS_LOG(ERROR) << "PReLU needs 2 inputs, but got " << input_num;
      return false;
      MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of inputs should be 2, but got " << input_num;
    }
    size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
    if (output_num != 1) {
      MS_LOG(ERROR) << "ReLU should have 1 output, but got " << input_num;
      return false;
      MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of outputs should be 1, but got " << output_num;
    }

    auto input_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0);
    auto weight_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 1);
    is_null_input_ = CHECK_NULL_INPUT(input_shape) || CHECK_NULL_INPUT(weight_shape);
    is_null_input_ =
      CHECK_SHAPE_NULL(input_shape, kernel_name, "x") || CHECK_SHAPE_NULL(weight_shape, kernel_name, "weight");
    if (is_null_input_) {
      MS_LOG(WARNING) << "For 'PReLUGpuKernel', input is null.";
      InitSizeLists();
      return true;
    }
@@ -86,9 +85,10 @@ class PReLUGpuKernel : public GpuKernel {
    }

    if (weight_shape.size() != 1 || (weight_shape[0] != 1 && weight_shape[0] != channel_num)) {
      MS_LOG(EXCEPTION) << "PReLU requires the rank of weight should be 1, and the elements number should be "
                           "1 or channels number "
                        << channel_num << ", but got weight shape " << weight_shape;
      MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the dimension of weight should be equal to 1 and "
                        << "weight.shape[0] should be equal to 1 or the channel number, but got the dimension of "
                        << "weight: " << weight_shape.size() << ", weight.shape[0]: " << weight_shape[0]
                        << ", the channel num: " << channel_num;
    }
    weight_length_ = weight_shape[0];
    InitSizeLists();
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/prelu_grad_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/prelu_grad_gpu_kernel.h
@@ -54,23 +54,22 @@ class PReLUGradGpuKernel : public GpuKernel {
  }

  bool Init(const CNodePtr &kernel_node) override {
    auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
    ResetResource();
    size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
    if (input_num != 3) {
      MS_LOG(ERROR) << "ReLUGrad needs 3 inputs, but got " << input_num;
      return false;
      MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of inputs should be 3, but got " << input_num;
    }
    size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
    if (output_num != 2) {
      MS_LOG(ERROR) << "ReLUGrad should have 2 outputs, but got " << input_num;
      return false;
      MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of outputs should be 2, but got " << output_num;
    }

    auto x_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 1);
    auto weight_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 2);
    is_null_input_ = CHECK_NULL_INPUT(x_shape) || CHECK_NULL_INPUT(weight_shape);
    is_null_input_ =
      CHECK_SHAPE_NULL(x_shape, kernel_name, "x") || CHECK_SHAPE_NULL(weight_shape, kernel_name, "weight");
    if (is_null_input_) {
      MS_LOG(WARNING) << "For 'PReLUGradGpuKernel', input is null.";
      InitSizeLists();
      return true;
    }
@@ -89,9 +88,10 @@ class PReLUGradGpuKernel : public GpuKernel {
    }

    if (weight_shape.size() != 1 || (weight_shape[0] != 1 && weight_shape[0] != channel_num)) {
      MS_LOG(EXCEPTION) << "PReLUGrad requires the rank of weight should be 1, and the elements number should be "
                           "1 or channels number "
                        << channel_num << ", but got weight shape " << weight_shape;
      MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the dimension of weight should be equal to 1 and "
                        << "weight.shape[0] should be equal to 1 or the channel number, but got the dimension of "
                        << "weight: " << weight_shape.size() << ", weight.shape[0]: " << weight_shape[0]
                        << ", the channel num: " << channel_num;
    }
    weight_length_ = weight_shape[0];
    workspace_size_ = weight_length_ * IntToSize(GET_BLOCKS(input_length_) * GET_THREADS) * sizeof(float);
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/relu_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/relu_gpu_kernel.h
@@ -48,15 +48,14 @@ class ReLUGpuFwdKernel : public GpuKernel {
    return true;
  }
  bool Init(const CNodePtr &kernel_node) override {
    auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
    size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
    if (input_num != 1) {
      MS_LOG(ERROR) << "Argument number is " << input_num << ", but ReLUGpuFwdKernel needs 1.";
      return false;
      MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of inputs should be 1, but got " << input_num;
    }
    auto input_shape = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 0);
    is_null_input_ = CHECK_NULL_INPUT(input_shape);
    is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name, "input");
    if (is_null_input_) {
      MS_LOG(WARNING) << "For 'ReLUGpuKernel', input is null.";
      InitSizeLists();
      return true;
    }
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/relu_grad_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/relu_grad_gpu_kernel.h
@@ -51,16 +51,15 @@ class ReluGradGpuFwdKernel : public GpuKernel {
    return true;
  }
  bool Init(const CNodePtr &kernel_node) override {
    auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
    InitResource();
    size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
    if (input_num != 2) {
      MS_LOG(ERROR) << "Argument number is " << input_num << ", but ReluGradGpuKernel needs 2.";
      return false;
      MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of inputs should be 2, but got " << input_num;
    }
    auto input_shape = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 0);
    is_null_input_ = CHECK_NULL_INPUT(input_shape);
    is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name, "input");
    if (is_null_input_) {
      MS_LOG(WARNING) << "For 'ReLUGradGpuKernel', input is null.";
      InitSizeLists();
      return true;
    }
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/relu_grad_v2_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/relu_grad_v2_gpu_kernel.h
@@ -51,10 +51,10 @@ class ReluGradV2GpuKernel : public GpuKernel {

  bool Init(const CNodePtr &kernel_node) override {
    MS_EXCEPTION_IF_NULL(kernel_node);
    auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
    auto shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0);
    is_null_input_ = CHECK_NULL_INPUT(shape);
    is_null_input_ = CHECK_SHAPE_NULL(shape, kernel_name, "input");
    if (is_null_input_) {
      MS_LOG(WARNING) << "For 'ReLUGradV2GpuKernel', input is null.";
      InitSizeLists();
      return true;
    }
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/relu_v2_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/relu_v2_gpu_kernel.h
@@ -51,10 +51,10 @@ class ReluV2GpuKernel : public GpuKernel {

  bool Init(const CNodePtr &kernel_node) override {
    MS_EXCEPTION_IF_NULL(kernel_node);
    auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
    auto shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0);
    is_null_input_ = CHECK_NULL_INPUT(shape);
    is_null_input_ = CHECK_SHAPE_NULL(shape, kernel_name, "input");
    if (is_null_input_) {
      MS_LOG(WARNING) << "For 'ReLUV2GpuKernel', input is null.";
      InitSizeLists();
      return true;
    }
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/resize_bilinear_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/resize_bilinear_gpu_kernel.h
@@ -49,28 +49,27 @@ class ResizeBilinearGpuKernel : public GpuKernel {
  }

  bool Init(const CNodePtr &kernel_node) override {
    auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
    size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
    if (input_num != 1) {
      MS_LOG(ERROR) << "Input number is " << input_num << ", but ResizeBilinear needs 1 input.";
      return false;
      MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of inputs should be 1, but got " << input_num;
    }
    size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
    if (output_num != 1) {
      MS_LOG(ERROR) << "Output number is " << output_num << ", but ResizeBilinear has 1 output.";
      return false;
      MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of outputs should be 1, but got " << output_num;
    }
    std::vector<size_t> input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
    std::vector<size_t> output_shape = AnfAlgo::GetOutputInferShape(kernel_node, 0);
    is_null_input_ = CHECK_NULL_INPUT(input_shape) || CHECK_NULL_INPUT(output_shape);
    is_null_input_ =
      CHECK_SHAPE_NULL(input_shape, kernel_name, "input") || CHECK_SHAPE_NULL(output_shape, kernel_name, "output");
    if (is_null_input_) {
      MS_LOG(WARNING) << "For 'ResizeBilinearGpuKernel', input or output is null.";
      InitSizeLists();
      return true;
    }
    if (input_shape.size() != 4 || output_shape.size() != 4) {
      MS_LOG(ERROR) << "For 'ResizeBilinear', the rank of input and output must be 4, but got the rank of input: "
                    << input_shape.size() << ", the rank of output: " << output_shape.size();
      return false;
      MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the dimension of input and output should be equal to 4, but "
                        << "got the dimension of input: " << input_shape.size()
                        << ", the dimension of output: " << output_shape.size();
    }
    n_ = SizeToInt(input_shape[0]);
    c_ = SizeToInt(input_shape[1]);
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/resize_bilinear_grad_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/resize_bilinear_grad_gpu_kernel.h
@@ -56,37 +56,36 @@ class ResizeBilinearGradGpuKernel : public GpuKernel {
  }

  bool Init(const CNodePtr &kernel_node) override {
    auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
    kernel_node_ = kernel_node;
    size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
    if (input_num != 2) {
      MS_LOG(ERROR) << "Input number is " << input_num << ", but ResizeBilinearGrad needs 1 input.";
      return false;
      MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of inputs should be 2, but got " << input_num;
    }
    size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
    if (output_num != 1) {
      MS_LOG(ERROR) << "Output number is " << output_num << ", but ResizeBilinearGrad has 1 output.";
      return false;
      MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of outputs should be 1, but got " << output_num;
    }
    std::vector<size_t> dy_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
    std::vector<size_t> x_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);
    std::vector<size_t> dx_shape = AnfAlgo::GetOutputInferShape(kernel_node, 0);
    is_null_input_ = CHECK_NULL_INPUT(dy_shape) || CHECK_NULL_INPUT(x_shape) || CHECK_NULL_INPUT(dx_shape);
    is_null_input_ = CHECK_SHAPE_NULL(dy_shape, kernel_name, "dy") || CHECK_SHAPE_NULL(x_shape, kernel_name, "x") ||
                     CHECK_SHAPE_NULL(dx_shape, kernel_name, "dx");
    if (is_null_input_) {
      MS_LOG(WARNING) << "For 'ResizeBilinearGradGpuKernel', input or output is null.";
      InitSizeLists();
      return true;
    }
    if (dy_shape.size() != 4) {
      MS_LOG(ERROR) << "Input is " << dy_shape.size() << "-D, but ResizeBilinearGrad supports only 4-D inputs.";
      return false;
      MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the dimension of dy should be equal to 4, but got "
                        << dy_shape.size();
    }
    if (x_shape.size() != 4) {
      MS_LOG(ERROR) << "Input is " << x_shape.size() << "-D, but ResizeBilinearGrad supports only 4-D inputs.";
      return false;
      MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the dimension of x should be equal to 4, but got "
                        << x_shape.size();
    }
    if (dx_shape.size() != 4) {
      MS_LOG(ERROR) << "For 'ResizeBilinearGradGpuKernel', the rank of output must be 4, but got " << dx_shape.size();
      return false;
      MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the dimension of dx should be equal to 4, but got "
                        << dx_shape.size();
    }
    n_ = SizeToInt(dy_shape[0]);
    c_ = SizeToInt(dy_shape[1]);
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/rmsprop_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/rmsprop_gpu_kernel.h
@@ -77,9 +77,8 @@ class RMSPropGpuKernel : public GpuKernel {
      epsilon_ = GetAttr<float>(kernel_node, "epsilon");
    }
    auto input_shape = AnfAlgo::GetOutputInferShape(kernel_node, 0);
    is_null_input_ = CHECK_NULL_INPUT(input_shape);
    is_null_input_ = CHECK_SHAPE_NULL(input_shape, node_name, "var");
    if (is_null_input_) {
      MS_LOG(WARNING) << "For 'RMSPropGpuKernel', input is null.";
      InitSizeLists();
      return true;
    }
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/roi_align_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/roi_align_gpu_kernel.h
@@ -64,34 +64,33 @@ class ROIAlignGpuFwdKernel : public GpuKernel {
  }

  bool Init(const CNodePtr &kernel_node) override {
    auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
    // Get the number of input args
    size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
    if (input_num != 2) {
      MS_LOG(ERROR) << "Input number is " << input_num << ", but ROIAlign needs 2 input.";
      return false;
      MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of inputs should be 2, but got " << input_num;
    }

    // Get the number of output args
    size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
    if (output_num != 1) {
      MS_LOG(ERROR) << "Output number is " << output_num << ", but ROIAlign needs 1 output.";
      return false;
      MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of outputs should be 1, but got " << output_num;
    }

    // Get the input shapes
    auto x_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
    auto rois_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);
    is_null_input_ = CHECK_NULL_INPUT(x_shape) || CHECK_NULL_INPUT(rois_shape);
    is_null_input_ =
      CHECK_SHAPE_NULL(x_shape, kernel_name, "features") || CHECK_SHAPE_NULL(rois_shape, kernel_name, "rois");
    if (is_null_input_) {
      MS_LOG(WARNING) << "For 'ROIAlignGpuKernel', input is null.";
      InitSizeLists();
      return true;
    }

    auto x_shape_size = x_shape.size();
    if (x_shape_size != 4) {
      MS_LOG(ERROR) << "x shape size is " << x_shape_size << ", but should be 4.";
      return false;
      MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the dimension of features should be equal to 4, but got "
                        << x_shape_size;
    }

    // Get channels, height & width
@@ -103,8 +102,8 @@ class ROIAlignGpuFwdKernel : public GpuKernel {
    x_size_ = batch_N_ * channels_ * height_ * width_ * sizeof(T);

    if (rois_shape.size() < 2) {
      MS_LOG(EXCEPTION) << "For 'ROIAlignGpuKernel', the rank of rois_shape should be greater than or equal to 2, "
                        << "but got the rank of rois_shape: " << rois_shape.size();
      MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the dimension of rois cannot be less than 2, but got "
                        << rois_shape.size();
    }
    // Get rois rows and cols
    roi_rows_ = rois_shape[0];
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/roi_align_grad_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/roi_align_grad_gpu_kernel.h
@@ -64,34 +64,32 @@ class ROIAlignGradGpuFwdKernel : public GpuKernel {
  }

  bool Init(const CNodePtr &kernel_node) override {
    auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
    // Get the number of input args
    size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
    if (input_num != 2) {
      MS_LOG(ERROR) << "Input number is " << input_num << ", but ROIAlignGrad needs 2 input.";
      return false;
      MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of inputs should be 2, but got " << input_num;
    }

    // Get the number of output args
    size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
    if (output_num != 1) {
      MS_LOG(ERROR) << "Output number is " << output_num << ", but ROIAlignGrad needs 1 output.";
      return false;
      MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of outputs should be 1, but got " << output_num;
    }

    // Get the input shapes
    auto dy_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
    auto rois_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);
    is_null_input_ = CHECK_NULL_INPUT(dy_shape) || CHECK_NULL_INPUT(rois_shape);
    is_null_input_ = CHECK_SHAPE_NULL(dy_shape, kernel_name, "dy") || CHECK_SHAPE_NULL(rois_shape, kernel_name, "rois");
    if (is_null_input_) {
      MS_LOG(WARNING) << "For 'ROIAlignGradGpuKernel', input is null.";
      InitSizeLists();
      return true;
    }

    auto dy_shape_size = dy_shape.size();
    if (dy_shape_size != 4) {
      MS_LOG(ERROR) << "dy shape size is " << dy_shape_size << ", but should be 4.";
      return false;
      MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the dimension of dy should be equal to 4, but got "
                        << dy_shape_size;
    }

    // Parse y diff
@@ -100,8 +98,8 @@ class ROIAlignGradGpuFwdKernel : public GpuKernel {
    dy_size_ = dy_shape_[0] * dy_shape_[1] * dy_shape_[2] * dy_shape_[3] * sizeof(T);

    if (rois_shape.size() < 2) {
      MS_LOG(EXCEPTION) << "For 'ROIAlignGradGpuKernel', the rank of rois_shape should be greater than or equal to 2, "
                        << "but got the rank of rois_shape: " << rois_shape.size();
      MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the dimension of rois cannot be less than 2, but got "
                        << rois_shape.size();
    }
    // Get rois rows and cols
    roi_rows_ = rois_shape[0];
@@ -120,8 +118,8 @@ class ROIAlignGradGpuFwdKernel : public GpuKernel {
    roi_end_mode_ = 1;

    if (xdiff_shape_.size() < 4) {
      MS_LOG(EXCEPTION) << "For 'ROIAlignGradGpuKernel', the rank of xdiff_shape_ should be greater than or equal to "
                        << "4, but got the rank of xdiff_shape_: " << xdiff_shape_.size();
      MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the length of xdiff_shape cannot be less than 4, but got "
                        << xdiff_shape_.size();
    }
    // Get channels, height & width
    batch_size_ = xdiff_shape_[0];
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/sgd_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/sgd_gpu_kernel.h
@@ -56,15 +56,15 @@ class SGDGpuKernel : public GpuKernel {
    return true;
  }
  bool Init(const CNodePtr &kernel_node) override {
    auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
    kernel_node_ = kernel_node;
    dampening_ = GetAttr<float>(kernel_node, "dampening");
    weight_decay_ = GetAttr<float>(kernel_node, "weight_decay");
    nesterov_ = GetAttr<bool>(kernel_node, "nesterov");

    auto input_shape = AnfAlgo::GetOutputInferShape(kernel_node, 0);
    is_null_input_ = CHECK_NULL_INPUT(input_shape);
    is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name, "parameters");
    if (is_null_input_) {
      MS_LOG(WARNING) << "For 'SGDGpuKernel', input is null.";
      InitSizeLists();
      return true;
    }
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/sigmoid_cross_entropy_with_logits_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/sigmoid_cross_entropy_with_logits_gpu_kernel.h
@@ -52,11 +52,11 @@ class SigmoidCrossEntropyWithLogitsGpuKernel : public GpuKernel {
  }

  bool Init(const CNodePtr &kernel_node) override {
    auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
    size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
    if (input_num != INPUT_NUM) {
      MS_LOG(ERROR) << "Input number is " << input_num << ", but SigmoidCrossEntropyWithLogits needs " << INPUT_NUM
                    << " inputs.";
      return false;
      MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of inputs should be " << INPUT_NUM << ", but got "
                        << input_num;
    }
    logits_size_ = sizeof(T);
    labels_size_ = sizeof(S);
@@ -65,9 +65,10 @@ class SigmoidCrossEntropyWithLogitsGpuKernel : public GpuKernel {
    auto logits_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
    auto labels_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);
    auto output_shape = AnfAlgo::GetOutputInferShape(kernel_node, 0);
    is_null_input_ = CHECK_NULL_INPUT(logits_shape) || CHECK_NULL_INPUT(labels_shape) || CHECK_NULL_INPUT(output_shape);
    is_null_input_ = CHECK_SHAPE_NULL(logits_shape, kernel_name, "logits") ||
                     CHECK_SHAPE_NULL(labels_shape, kernel_name, "labels") ||
                     CHECK_SHAPE_NULL(output_shape, kernel_name, "output");
    if (is_null_input_) {
      MS_LOG(WARNING) << "For 'SigmoidCrossEntropyWithLogitsGpuKernel', input or output is null.";
      InitSizeLists();
      return true;
    }
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/sigmoid_cross_entropy_with_logits_grad_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/sigmoid_cross_entropy_with_logits_grad_gpu_kernel.h
@@ -51,10 +51,10 @@ class SigmoidCrossEntropyWithLogitsGradGpuKernel : public GpuKernel {
  }

  bool Init(const CNodePtr &kernel_node) override {
    auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
    size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
    if (input_num != 3) {
      MS_LOG(ERROR) << "Input number is " << input_num << ", but SigmoidCrossEntropyWithLogitsGrad needs 3 inputs.";
      return false;
      MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of inputs should be 3, but got " << input_num;
    }
    logits_size_ = sizeof(T);
    labels_size_ = sizeof(S);
@@ -63,9 +63,10 @@ class SigmoidCrossEntropyWithLogitsGradGpuKernel : public GpuKernel {
    auto logits_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
    auto labels_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);
    auto output_shape = AnfAlgo::GetOutputInferShape(kernel_node, 0);
    is_null_input_ = CHECK_NULL_INPUT(logits_shape) || CHECK_NULL_INPUT(labels_shape) || CHECK_NULL_INPUT(output_shape);
    is_null_input_ = CHECK_SHAPE_NULL(logits_shape, kernel_name, "logits") ||
                     CHECK_SHAPE_NULL(labels_shape, kernel_name, "labels") ||
                     CHECK_SHAPE_NULL(output_shape, kernel_name, "output");
    if (is_null_input_) {
      MS_LOG(WARNING) << "For 'SigmoidCrossEntropyWithLogitsGradGpuKernel', input or output is null.";
      InitSizeLists();
      return true;
    }
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/smooth_l1_loss_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/smooth_l1_loss_gpu_kernel.h
@@ -47,10 +47,10 @@ class SmoothL1LossGpuKernel : public GpuKernel {
  }

  bool Init(const CNodePtr &kernel_node) override {
    auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
    auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
    is_null_input_ = CHECK_NULL_INPUT(input_shape);
    is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name, "logits");
    if (is_null_input_) {
      MS_LOG(WARNING) << "For 'SmoothL1LossGpuKernel', input is null.";
      InitSizeLists();
      return true;
    }
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/smooth_l1_loss_grad_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/smooth_l1_loss_grad_gpu_kernel.h
@@ -48,10 +48,10 @@ class SmoothL1LossGradGpuKernel : public GpuKernel {
  }

  bool Init(const CNodePtr &kernel_node) override {
    auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
    auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
    is_null_input_ = CHECK_NULL_INPUT(input_shape);
    is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name, "logits");
    if (is_null_input_) {
      MS_LOG(WARNING) << "For 'SmoothL1LossGradGpuKernel', input is null.";
      InitSizeLists();
      return true;
    }
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/softmax_cross_entropy_with_logits_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/softmax_cross_entropy_with_logits_gpu_kernel.h
@@ -19,6 +19,7 @@

 #include <stdint.h>
 #include <vector>
 #include <string>
 #include "backend/kernel_compiler/gpu/gpu_kernel.h"
 #include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
 #include "backend/kernel_compiler/gpu/cuda_impl/cross_entropy_impl.cuh"
@@ -37,6 +38,7 @@ class SoftmaxCrossEntropyWithLogitsGpuKernel : public GpuKernel {
        mode_(CUDNN_SOFTMAX_MODE_INSTANCE),
        cudnn_data_type_(CUDNN_DATA_FLOAT),
        is_null_input_(false),
        kernel_name_("SoftmaxCrossEntropyWithLogits"),
        logits_size_(0),
        labels_size_(0),
        output1_size_(0),
@@ -75,19 +77,16 @@ class SoftmaxCrossEntropyWithLogitsGpuKernel : public GpuKernel {
    return true;
  }
  bool Init(const CNodePtr &kernel_node) override {
    kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
    kernel_node_ = kernel_node;
    InitResource();
    size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
    if (input_num != 2) {
      MS_LOG(ERROR) << "Input number is " << input_num
                    << ", but SoftmaxCrossEntropyWithLogitsGpuKernel needs 2 inputs.";
      return false;
      MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of inputs should be 2, but got " << input_num;
    }
    size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
    if (output_num != 2) {
      MS_LOG(ERROR) << "Output number is " << output_num
                    << ", but SoftmaxCrossEntropyWithLogitsGpuKernel needs 2 output.";
      return false;
      MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of outputs should be 2, but got " << output_num;
    }
    cudnn_data_type_ = GetCudnnDataType(TypeIdLabel(AnfAlgo::GetInputDeviceDataType(kernel_node, 0)));

@@ -132,9 +131,9 @@ class SoftmaxCrossEntropyWithLogitsGpuKernel : public GpuKernel {
  void InferInputOutputSize(const CNodePtr &kernel_node) {
    auto logits_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
    auto labels_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);
    is_null_input_ = CHECK_NULL_INPUT(logits_shape) || CHECK_NULL_INPUT(labels_shape);
    is_null_input_ =
      CHECK_SHAPE_NULL(logits_shape, kernel_name_, "logits") || CHECK_SHAPE_NULL(labels_shape, kernel_name_, "labels");
    if (is_null_input_) {
      MS_LOG(WARNING) << "For 'SoftmaxCrossEntropyWithLogitsGpuKernel', input is null";
      InitSizeLists();
      return;
    }
@@ -165,17 +164,19 @@ class SoftmaxCrossEntropyWithLogitsGpuKernel : public GpuKernel {
    size_t logits_dim_length = logits_shape.size();
    size_t labels_dim_length = labels_shape.size();
    if (logits_dim_length == 0) {
      MS_LOG(EXCEPTION) << "Logits shape cannot be empty";
      MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the dimension of logits cannot be equal to 0, but got "
                        << logits_dim_length;
    }

    if (labels_dim_length != logits_dim_length) {
      MS_LOG(EXCEPTION) << "Labels shape length should be equal to Logits shape length for "
                           "SoftmaxCrossEntropyWithLogits, but got Labels "
                           "shape length:"
                        << labels_dim_length << ", Logits shape length:" << logits_dim_length;
      MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the dimension of logits and labels should be the same, but "
                        << "got the dimension of labels: " << labels_dim_length
                        << ", the dimension of logits: " << logits_dim_length;
    }
    if (!std::equal(labels_shape.begin(), labels_shape.end(), logits_shape.begin())) {
      MS_LOG(EXCEPTION) << "The shape of labels should be the same as the shape of logits except its last dimension.";
      MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the shape of logits and labels should be the same except "
                        << "the last dimension, but got the shape of logits: " << CONVERT_VECTOR_TO_STRING(logits_shape)
                        << ", the shape of labels: " << CONVERT_VECTOR_TO_STRING(labels_shape);
    }
  }

@@ -186,6 +187,7 @@ class SoftmaxCrossEntropyWithLogitsGpuKernel : public GpuKernel {
  cudnnSoftmaxMode_t mode_;
  cudnnDataType_t cudnn_data_type_;
  bool is_null_input_;
  std::string kernel_name_;

  size_t logits_size_;
  size_t labels_size_;
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/softmax_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/softmax_gpu_kernel.h
@@ -18,6 +18,7 @@
 #define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_NN_SOFTMAX_GPU_KERNEL_H_

 #include <vector>
 #include <string>
 #include <algorithm>
 #include "backend/kernel_compiler/gpu/gpu_kernel.h"
 #include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
@@ -37,6 +38,7 @@ class SoftmaxGpuKernel : public GpuKernel {
        mode_(CUDNN_SOFTMAX_MODE_INSTANCE),
        cudnn_data_type_(CUDNN_DATA_FLOAT),
        is_null_input_(false),
        kernel_name_("Softmax"),
        input_size_(0),
        output_size_(0),
        workspace_size_(0),
@@ -100,23 +102,21 @@ class SoftmaxGpuKernel : public GpuKernel {
  }

  bool Init(const CNodePtr &kernel_node) override {
    kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
    kernel_node_ = kernel_node;
    InitResource();
    cudnn_data_type_ = GetCudnnDataType(TypeIdLabel(AnfAlgo::GetInputDeviceDataType(kernel_node, 0)));
    size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
    if (input_num != 1) {
      MS_LOG(ERROR) << "Input number is " << input_num << ", but softmax needs 1 input.";
      return false;
      MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of inputs should be 1, but got " << input_num;
    }
    size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
    if (output_num != 1) {
      MS_LOG(ERROR) << "Output number is " << output_num << ", but softmax needs 1 output.";
      return false;
      MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of outputs should be 1, but got " << output_num;
    }
    auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
    is_null_input_ = CHECK_NULL_INPUT(input_shape);
    is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name_, "input");
    if (is_null_input_) {
      MS_LOG(WARNING) << "SoftmaxGpuKernel input is null";
      InitSizeLists();
      return true;
    }
@@ -133,8 +133,8 @@ class SoftmaxGpuKernel : public GpuKernel {
      (void)std::transform(axis_me.begin(), axis_me.end(), std::back_inserter(axis),
                           [](const int64_t &value) { return LongToInt(value); });
      if (axis.size() < 1) {
        MS_LOG(EXCEPTION) << "For 'SoftmaxGpuKernel', the rank of axis should be greater than or equal to 1, "
                          << "but got the rank of axis: " << axis.size();
        MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the length of 'axis' cannot be equal to 0, but got "
                          << axis.size();
      }
      InitSizeByAxis(input_shape, axis[0]);
    }
@@ -217,7 +217,8 @@ class SoftmaxGpuKernel : public GpuKernel {
      transpose_axis_.push_back(0);
      need_transpose_ = true;
    } else {
      MS_LOG(EXCEPTION) << "Input is " << shape_size_ << "-D, but axis(" << axis << ") is invalid.";
      MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the value of 'axis' should be in range [-" << shape_size_
                        << ", " << shape_size_ << "), but got " << axis;
    }

    height_ = 1;
@@ -234,7 +235,8 @@ class SoftmaxGpuKernel : public GpuKernel {
    }
    // axis should be -1 with ND
    if (axis_pos != SizeToInt(input_shape.size() - 1)) {
      MS_LOG(EXCEPTION) << "Input is " << shape_size_ << "-D, but axis(" << axis << ") is invalid.";
      MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the value of 'axis' should be equal to -1 or "
                        << (input_shape.size() - 1) << ", but got " << axis;
    }
    // squeeze to 2d, then invoke cudnn
    size_t n = 1;
@@ -260,8 +262,8 @@ class SoftmaxGpuKernel : public GpuKernel {
    }

    if (axis_pos >= input_shape.size()) {
      MS_LOG(EXCEPTION) << "For 'SoftmaxGpuKernel', the axis_pos should be less than the rank of input_shape, "
                        << "but got axis_pos: " << axis_pos << ", the rank of input_shape: " << input_shape.size();
      MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the value of 'axis' should be in range [-"
                        << input_shape.size() << ", " << input_shape.size() << "), but got " << axis;
    }
    // n keep tracks of squeezed size
    size_t n = 1;
@@ -299,6 +301,7 @@ class SoftmaxGpuKernel : public GpuKernel {
  cudnnSoftmaxMode_t mode_;
  cudnnDataType_t cudnn_data_type_;
  bool is_null_input_;
  std::string kernel_name_;
  size_t input_size_;
  size_t output_size_;
  size_t workspace_size_;
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/softmax_grad_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/softmax_grad_gpu_kernel.h
@@ -18,6 +18,7 @@
 #define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_NN_SOFTMAX_GRAD_GPU_KERNEL_H_

 #include <vector>
 #include <string>
 #include <algorithm>
 #include "backend/kernel_compiler/gpu/gpu_kernel.h"
 #include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
@@ -36,6 +37,7 @@ class SoftmaxGradGpuKernel : public GpuKernel {
        mode_(CUDNN_SOFTMAX_MODE_INSTANCE),
        cudnn_data_type_(CUDNN_DATA_FLOAT),
        is_null_input_(false),
        kernel_name_("SoftmaxGrad"),
        input_size_(0),
        output_size_(0),
        workspace_size_(0),
@@ -103,29 +105,28 @@ class SoftmaxGradGpuKernel : public GpuKernel {
  }

  bool Init(const CNodePtr &kernel_node) override {
    kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
    kernel_node_ = kernel_node;
    InitResource();
    cudnn_data_type_ = GetCudnnDataType(TypeIdLabel(AnfAlgo::GetInputDeviceDataType(kernel_node, 0)));
    size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
    if (input_num != 2) {
      MS_LOG(ERROR) << "Input number is " << input_num << ", but softmax grad needs 2 input.";
      return false;
      MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of inputs should be 2, but got " << input_num;
    }
    size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
    if (output_num != 1) {
      MS_LOG(ERROR) << "Output number is " << output_num << ", but softmax grad needs 1 output.";
      return false;
      MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of outputs should be 1, but got " << output_num;
    }
    auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
    is_null_input_ = CHECK_NULL_INPUT(input_shape);
    is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name_, "input");
    if (is_null_input_) {
      MS_LOG(WARNING) << "SoftmaxGradGpuKernel input is null";
      InitSizeLists();
      return true;
    }
    shape_size_ = input_shape.size();
    if (shape_size_ != 2) {
      MS_LOG(EXCEPTION) << "Input is " << shape_size_ << "-D, but softmax grad only supports 2-D inputs.";
      MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the dimension of input should be equal to 2, but got "
                        << shape_size_;
    }
    auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
    if (kernel_name == "LogSoftmaxGrad") {
@@ -139,8 +140,8 @@ class SoftmaxGradGpuKernel : public GpuKernel {
      (void)std::transform(axis_me.begin(), axis_me.end(), std::back_inserter(axis),
                           [](const int64_t &value) { return static_cast<int>(value); });
      if (axis.size() < 1) {
        MS_LOG(EXCEPTION) << "For 'SoftmaxGradGpuKernel', the rank of axis should be greater than or equal to 1, "
                          << "but got the rank of axis: " << axis.size();
        MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the length of 'axis' cannot be equal to 0, but got "
                          << axis.size();
      }
      InitSizeByAxis(input_shape, axis[0]);
    }
@@ -194,7 +195,8 @@ class SoftmaxGradGpuKernel : public GpuKernel {
      transpose_axis_.push_back(1);
      transpose_axis_.push_back(0);
    } else {
      MS_LOG(EXCEPTION) << "Input is " << shape_size_ << "-D, but axis(" << axis << ") is invalid.";
      MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the value of 'axis' should be in range [-" << shape_size_
                        << ", " << shape_size_ << "), but got " << axis;
    }

    height_ = 1;
@@ -210,6 +212,7 @@ class SoftmaxGradGpuKernel : public GpuKernel {
  cudnnSoftmaxMode_t mode_;
  cudnnDataType_t cudnn_data_type_;
  bool is_null_input_;
  std::string kernel_name_;
  size_t input_size_;
  size_t output_size_;
  size_t workspace_size_;
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/softplus_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/softplus_gpu_kernel.h
@@ -47,12 +47,12 @@ class SoftplusGpuKernel : public GpuKernel {
  }

  bool Init(const CNodePtr &kernel_node) override {
    auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
    InitResource();
    input_size_ = sizeof(T);
    auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
    is_null_input_ = CHECK_NULL_INPUT(input_shape);
    is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name, "input");
    if (is_null_input_) {
      MS_LOG(WARNING) << "For 'SoftplusGpuKernel', input is null";
      InitSizeLists();
      return true;
    }
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/softplus_grad_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/softplus_grad_gpu_kernel.h
@@ -48,12 +48,12 @@ class SoftplusGpuGradKernel : public GpuKernel {
  }

  bool Init(const CNodePtr &kernel_node) override {
    auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
    InitResource();
    input_size_ = sizeof(T);
    auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
    is_null_input_ = CHECK_NULL_INPUT(input_shape);
    is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name, "input");
    if (is_null_input_) {
      MS_LOG(WARNING) << "For 'SoftplusGradGpuKernel', input is null";
      InitSizeLists();
      return true;
    }
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/sparse_apply_proximal_adagrad_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/sparse_apply_proximal_adagrad_kernel.h
@@ -62,11 +62,11 @@ class SparseApplyProximalAdagradKernel : public GpuKernel {
  }

  bool Init(const CNodePtr &kernel_node) override {
    auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
    size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
    if (input_num != INPUT_NUM) {
      MS_LOG(ERROR) << "Input number is " << input_num << ", but SparseApplyProximalAdagrad needs " << INPUT_NUM
                    << " inputs.";
      return false;
      MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of inputs should be " << INPUT_NUM << ", but got "
                        << input_num;
    }

    variable_size_ = sizeof(T);
@@ -82,11 +82,12 @@ class SparseApplyProximalAdagradKernel : public GpuKernel {
    auto learning_rate_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 2);
    auto gradient_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 5);
    auto indices_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 6);
    is_null_input_ = CHECK_NULL_INPUT(variable_shape) || CHECK_NULL_INPUT(accumulation_shape) ||
                     CHECK_NULL_INPUT(learning_rate_shape) || CHECK_NULL_INPUT(gradient_shape) ||
                     CHECK_NULL_INPUT(indices_shape);
    is_null_input_ = CHECK_SHAPE_NULL(variable_shape, kernel_name, "var") ||
                     CHECK_SHAPE_NULL(accumulation_shape, kernel_name, "accum") ||
                     CHECK_SHAPE_NULL(learning_rate_shape, kernel_name, "lr") ||
                     CHECK_SHAPE_NULL(gradient_shape, kernel_name, "grad") ||
                     CHECK_SHAPE_NULL(indices_shape, kernel_name, "indices");
    if (is_null_input_) {
      MS_LOG(WARNING) << "For 'SparseApplyProximalAdagradGpuKernel', input is null";
      InitSizeLists();
      return true;
    }
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/sparse_ftrl_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/sparse_ftrl_gpu_kernel.h
@@ -65,11 +65,12 @@ class SparseFtrlGpuKernel : public GpuKernel {
  }

  bool Init(const CNodePtr &kernel_node) override {
    auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
    kernel_node_ = kernel_node;
    size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
    if (input_num != INPUT_NUM) {
      MS_LOG(ERROR) << "Input number is " << input_num << ", but sparse ftrl needs " << INPUT_NUM << " inputs.";
      return false;
      MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of inputs should be " << INPUT_NUM << ", but got "
                        << input_num;
    }

    variable_size_ = sizeof(T);
@@ -83,11 +84,12 @@ class SparseFtrlGpuKernel : public GpuKernel {
    auto linear_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 2);
    auto gradient_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 3);
    auto indices_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 4);
    is_null_input_ = CHECK_NULL_INPUT(variable_shape) || CHECK_NULL_INPUT(accumulation_shape) ||
                     CHECK_NULL_INPUT(linear_shape) || CHECK_NULL_INPUT(gradient_shape) ||
                     CHECK_NULL_INPUT(indices_shape);
    is_null_input_ = CHECK_SHAPE_NULL(variable_shape, kernel_name, "var") ||
                     CHECK_SHAPE_NULL(accumulation_shape, kernel_name, "accum") ||
                     CHECK_SHAPE_NULL(linear_shape, kernel_name, "linear") ||
                     CHECK_SHAPE_NULL(gradient_shape, kernel_name, "grad") ||
                     CHECK_SHAPE_NULL(indices_shape, kernel_name, "indices");
    if (is_null_input_) {
      MS_LOG(WARNING) << "For 'SparseFTRLGpuKernel', input is null";
      InitSizeLists();
      return true;
    }
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/sparse_softmax_cross_entropy_with_logits_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/sparse_softmax_cross_entropy_with_logits_gpu_kernel.h
@@ -19,6 +19,7 @@

 #include <stdint.h>
 #include <vector>
 #include <string>
 #include "backend/kernel_compiler/gpu/gpu_kernel.h"
 #include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
 #include "backend/kernel_compiler/gpu/cuda_impl/cross_entropy_impl.cuh"
@@ -38,6 +39,7 @@ class SparseSoftmaxCrossEntropyWithLogitsGpuKernel : public GpuKernel {
        cudnn_data_type_(CUDNN_DATA_FLOAT),
        is_grad_(false),
        is_null_input_(false),
        kernel_name_("SparseSoftmaxCrossEntropyWithLogits"),
        logits_size_(0),
        labels_size_(0),
        output_size_(0),
@@ -79,19 +81,16 @@ class SparseSoftmaxCrossEntropyWithLogitsGpuKernel : public GpuKernel {
    return true;
  }
  bool Init(const CNodePtr &kernel_node) override {
    kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
    kernel_node_ = kernel_node;
    InitResource();
    size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
    if (input_num != 2) {
      MS_LOG(ERROR) << "Input number is " << input_num
                    << ", but SparseSoftmaxCrossEntropyWithLogitsGpuKernel needs 2 inputs.";
      return false;
      MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of inputs should be 2, but got " << input_num;
    }
    size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
    if (output_num != 1) {
      MS_LOG(ERROR) << "Output number is " << output_num
                    << ", but SparseSoftmaxCrossEntropyWithLogitsGpuKernel needs 1 output.";
      return false;
      MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of outputs should be 1, but got " << output_num;
    }
    is_grad_ = GetAttr<bool>(kernel_node, "is_grad");
    cudnn_data_type_ = GetCudnnDataType(TypeIdLabel(AnfAlgo::GetInputDeviceDataType(kernel_node, 0)));
@@ -136,9 +135,9 @@ class SparseSoftmaxCrossEntropyWithLogitsGpuKernel : public GpuKernel {
  void InferInputOutputSize(const CNodePtr &kernel_node) {
    auto logits_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
    auto labels_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);
    is_null_input_ = CHECK_NULL_INPUT(logits_shape) || CHECK_NULL_INPUT(labels_shape);
    is_null_input_ =
      CHECK_SHAPE_NULL(logits_shape, kernel_name_, "logits") || CHECK_SHAPE_NULL(labels_shape, kernel_name_, "labels");
    if (is_null_input_) {
      MS_LOG(WARNING) << "For 'SparseSoftmaxCrossEntropyWithLogitsGpuKernel', input is null";
      InitSizeLists();
      return;
    }
@@ -168,13 +167,14 @@ class SparseSoftmaxCrossEntropyWithLogitsGpuKernel : public GpuKernel {
    size_t logits_dim_length = logits_shape.size();
    size_t labels_dim_length = labels_shape.size();
    if (labels_dim_length != logits_dim_length - 1) {
      MS_LOG(EXCEPTION) << "Labels shape length should be equal to Logits shape length minus 1 for "
                           "SparseSoftmaxCrossEntropyWithLogits, "
                           "but got Labels shape length:"
                        << labels_dim_length << ", Logits shape length:" << logits_dim_length;
      MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the dimension of logits and labels should satisfy this "
                        << "equation: len(labels.shape) = len(logits.shape) - 1, but got the dimension of labels: "
                        << labels_dim_length << ", the dimension of logits: " << logits_dim_length;
    }
    if (!std::equal(labels_shape.begin(), labels_shape.end(), logits_shape.begin())) {
      MS_LOG(EXCEPTION) << "The shape of labels should be the same as the shape of logits except its last dimension.";
      MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the shape of logits and labels should be the same except "
                        << "the last dimension, but got the shape of logits: " << CONVERT_VECTOR_TO_STRING(logits_shape)
                        << ", the shape of labels: " << CONVERT_VECTOR_TO_STRING(labels_shape);
    }
  }

@@ -186,6 +186,7 @@ class SparseSoftmaxCrossEntropyWithLogitsGpuKernel : public GpuKernel {
  cudnnDataType_t cudnn_data_type_;
  bool is_grad_;
  bool is_null_input_;
  std::string kernel_name_;

  size_t logits_size_;
  size_t labels_size_;
--- a/mindspore/ccsrc/runtime/device/gpu/gpu_common.h
+++ b/mindspore/ccsrc/runtime/device/gpu/gpu_common.h
@@ -216,7 +216,8 @@ inline bool CheckNullInput(const std::vector<size_t> &input_shape) {
 }
 #define CHECK_NULL_INPUT(input_shape) mindspore::device::gpu::CheckNullInput(input_shape)

 inline std::string ConvertVectorToString(const std::vector<size_t> &value) {
 template <typename T>
 inline std::string ConvertVectorToString(const std::vector<T> &value) {
  std::stringstream ss;
  ss << "(";
  for (auto it = value.begin(); it != value.end(); it++) {