diff --git a/config/op_info.config b/config/op_info.config
index 9e8c0bff22..7f7f027737 100644
--- a/config/op_info.config
+++ b/config/op_info.config
@@ -147,10 +147,10 @@
 {"op_name": "Conv2DBackpropInput", "inputs": [{"index": 0, "name": "out_backprop", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "filter", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": true, "param_type": "required", "shape": "all"}], "attr": [{"name": "input_sizes", "param_type": "required", "type": "listInt", "value": "all"}, {"name": "stride", "param_type": "required", "type": "listInt", "value": "all"}, {"name": "pad_list", "param_type": "required", "type": "listInt", "value": "all"}, {"name": "dilation", "param_type": "required", "type": "listInt", "value": "all"}, {"name": "groups", "param_type": "optional", "type": "int", "value": "all"}, {"name": "format", "param_type": "optional", "type": "str", "value": "all"}], "fusion_type": "CONVLUTION", "dtype_format": [[["float16", "NC1HWC0"], ["float16", "FracZ"], ["float16", "NC1HWC0"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "conv2d_backprop_input_d.so", "compute_cost": 10, "kernel_name": "conv2d_backprop_input_d", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "dynamic_shape": false, "need_check_supported": false, "op_pattern": ""}
 {"op_name": "ConfusionMulGrad", "inputs": [{"index": 0, "name": "input0", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "input1", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "input2", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "output0", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "output1", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "axis", "param_type": "required", "type": "listInt", "value": "all"}, {"name": "keep_dims", "param_type": "required", "type": "bool", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "confusion_mul_grad.so", "compute_cost": 10, "kernel_name": "confusion_mul_grad", "partial_flag": false, "reshape_type": "", "dynamic_format": false, "dynamic_shape": false, "need_check_supported": false, "op_pattern": ""}
 {"op_name": "DropoutDoMask", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "mask", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "keep_prob", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "OPAQUE", "dtype_format": [[["", ""], ["", ""], ["", ""], ["", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "drop_out_do_mask.so", "compute_cost": 10, "kernel_name": "drop_out_do_mask", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "dynamic_shape": false, "need_check_supported": false, "op_pattern": "dynamicFormat"}
-{"op_name": "Gelu", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "ELEMWISE", "dtype_format": [[["float16", ""], ["float16", ""]], [["float32", ""], ["float32", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "gelu.so", "compute_cost": 10, "kernel_name": "gelu", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "dynamic_shape": false, "need_check_supported": false, "op_pattern": "formatAgnostic"}
-{"op_name": "GeluGrad", "inputs": [{"index": 0, "name": "dy", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "z", "need_compile": true, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "ELEMWISE", "dtype_format": [[["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"]], [["float16", "FRACTAL_NZ"], ["float16", "FRACTAL_NZ"], ["float16", "FRACTAL_NZ"], ["float16", "FRACTAL_NZ"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"]], [["float32", "FRACTAL_NZ"], ["float32", "FRACTAL_NZ"], ["float32", "FRACTAL_NZ"], ["float32", "FRACTAL_NZ"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "gelu_grad.so", "compute_cost": 10, "kernel_name": "gelu_grad", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "dynamic_shape": false, "need_check_supported": false, "op_pattern": ""}
-{"op_name": "FastGelu", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "ELEMWISE", "dtype_format": [[["float16", ""], ["float16", ""]], [["float32", ""], ["float32", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "fast_gelu.so", "compute_cost": 10, "kernel_name": "fast_gelu", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "dynamic_shape": false, "need_check_supported": false, "op_pattern": "formatAgnostic"}
-{"op_name": "FastGeluGrad", "inputs": [{"index": 0, "name": "dy", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "z", "need_compile": true, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "ELEMWISE", "dtype_format": [[["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"]], [["float16", "FRACTAL_NZ"], ["float16", "FRACTAL_NZ"], ["float16", "FRACTAL_NZ"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"]], [["float32", "FRACTAL_NZ"], ["float32", "FRACTAL_NZ"], ["float32", "FRACTAL_NZ"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "fast_gelu_grad.so", "compute_cost": 10, "kernel_name": "fast_gelu_grad", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "dynamic_shape": false, "need_check_supported": false, "op_pattern": ""}
+{"op_name": "GeLU", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "ELEMWISE", "dtype_format": [[["float16", ""], ["float16", ""]], [["float32", ""], ["float32", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "gelu.so", "compute_cost": 10, "kernel_name": "gelu", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "dynamic_shape": false, "need_check_supported": false, "op_pattern": "formatAgnostic"}
+{"op_name": "GeLUGrad", "inputs": [{"index": 0, "name": "dy", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "z", "need_compile": true, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "ELEMWISE", "dtype_format": [[["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"]], [["float16", "FRACTAL_NZ"], ["float16", "FRACTAL_NZ"], ["float16", "FRACTAL_NZ"], ["float16", "FRACTAL_NZ"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"]], [["float32", "FRACTAL_NZ"], ["float32", "FRACTAL_NZ"], ["float32", "FRACTAL_NZ"], ["float32", "FRACTAL_NZ"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "gelu_grad.so", "compute_cost": 10, "kernel_name": "gelu_grad", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "dynamic_shape": false, "need_check_supported": false, "op_pattern": ""}
+{"op_name": "FastGeLU", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "ELEMWISE", "dtype_format": [[["float16", ""], ["float16", ""]], [["float32", ""], ["float32", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "fast_gelu.so", "compute_cost": 10, "kernel_name": "fast_gelu", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "dynamic_shape": false, "need_check_supported": false, "op_pattern": "formatAgnostic"}
+{"op_name": "FastGeLUGrad", "inputs": [{"index": 0, "name": "dy", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "z", "need_compile": true, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "ELEMWISE", "dtype_format": [[["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"]], [["float16", "FRACTAL_NZ"], ["float16", "FRACTAL_NZ"], ["float16", "FRACTAL_NZ"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"]], [["float32", "FRACTAL_NZ"], ["float32", "FRACTAL_NZ"], ["float32", "FRACTAL_NZ"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "fast_gelu_grad.so", "compute_cost": 10, "kernel_name": "fast_gelu_grad", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "dynamic_shape": false, "need_check_supported": false, "op_pattern": ""}
 {"op_name": "MaxPool", "inputs": [{"index": 0, "name": "input_data", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "output_data", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "kernel_size", "param_type": "required", "type": "listInt", "value": "all"}, {"name": "strides", "param_type": "required", "type": "listInt", "value": "all"}, {"name": "pad_mode", "param_type": "required", "type": "str", "value": "all"}, {"name": "format", "param_type": "required", "type": "str", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "NC1HWC0"], ["float16", "NC1HWC0"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "max_pool.so", "compute_cost": 10, "kernel_name": "max_pool", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "dynamic_shape": false, "need_check_supported": false, "op_pattern": ""}
 {"op_name": "MaxPoolGrad", "inputs": [{"index": 0, "name": "x1", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "x2", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "grad", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "kernel_size", "param_type": "required", "type": "listInt", "value": "all"}, {"name": "strides", "param_type": "required", "type": "listInt", "value": "all"}, {"name": "pad_mode", "param_type": "required", "type": "str", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "max_pool_grad.so", "compute_cost": 10, "kernel_name": "max_pool_grad", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "dynamic_shape": false, "need_check_supported": false, "op_pattern": ""}
 {"op_name": "MaxPoolGradWithArgmax", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "grad", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "argmax", "need_compile": false, "param_type": "optional", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "kernel_size", "param_type": "required", "type": "listInt", "value": "all"}, {"name": "strides", "param_type": "required", "type": "listInt", "value": "all"}, {"name": "pad_mode", "param_type": "required", "type": "str", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["uint16", "NC1HWC0"], ["float16", "NC1HWC0"]], [["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["int64", "NC1HWC0"], ["float16", "NC1HWC0"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "max_pool_grad_with_argmax.so", "compute_cost": 10, "kernel_name": "max_pool_grad_with_argmax", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "dynamic_shape": false, "need_check_supported": false, "op_pattern": ""}
diff --git a/mindspore/_extends/graph_kernel/expanders/gelu.py b/mindspore/_extends/graph_kernel/expanders/gelu.py
index 86be18aed9..fe626de831 100644
--- a/mindspore/_extends/graph_kernel/expanders/gelu.py
+++ b/mindspore/_extends/graph_kernel/expanders/gelu.py
@@ -22,7 +22,7 @@ HALF = 0.5
 
 
 def expand_gelu(expand_info):
-    """Gelu expander"""
+    """GeLU expander"""
     # cal formula are:
     # gelu(x) is 0.5 * x * (1.0 + tanh(y))
     # y is sqrt(2.0 / pi) * (x + 0.044715 * x * x * x)
diff --git a/mindspore/_extends/graph_kernel/expanders/gelu_grad.py b/mindspore/_extends/graph_kernel/expanders/gelu_grad.py
index 5e0647634f..0597564095 100644
--- a/mindspore/_extends/graph_kernel/expanders/gelu_grad.py
+++ b/mindspore/_extends/graph_kernel/expanders/gelu_grad.py
@@ -23,7 +23,7 @@ HALF = 0.5
 
 
 def expand_gelugrad(expand_info):
-    """GeluGrad expander"""
+    """GeLUGrad expander"""
     # cal formula are:
     # gelu_grad(dy, x) is dy * y'
     # y' is 0.5 * (1.0 + tanh(tanh_para)) + 0.5 * x * (1.0 - tanh(tanh_para) * tanh(para)) * mul_right
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/gelu_grad_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/gelu_grad_kernel.cc
index 274e4896c9..add8367dd8 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/gelu_grad_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/gelu_grad_kernel.cc
@@ -18,14 +18,14 @@
 
 namespace mindspore {
 namespace kernel {
-MS_REG_GPU_KERNEL_ONE(GeluGrad,
+MS_REG_GPU_KERNEL_ONE(GeLUGrad,
                       KernelAttr()
                         .AddInputAttr(kNumberTypeFloat32)
                         .AddInputAttr(kNumberTypeFloat32)
                         .AddInputAttr(kNumberTypeFloat32)
                         .AddOutputAttr(kNumberTypeFloat32),
                       GeLUGpuGradKernel, float)
-MS_REG_GPU_KERNEL_ONE(GeluGrad,
+MS_REG_GPU_KERNEL_ONE(GeLUGrad,
                       KernelAttr()
                         .AddInputAttr(kNumberTypeFloat16)
                         .AddInputAttr(kNumberTypeFloat16)
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/gelu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/gelu_kernel.cc
index 03cd9a155b..34b7c583d8 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/gelu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/gelu_kernel.cc
@@ -18,9 +18,9 @@
 
 namespace mindspore {
 namespace kernel {
-MS_REG_GPU_KERNEL_ONE(Gelu, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
+MS_REG_GPU_KERNEL_ONE(GeLU, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
                       GeluGpuKernel, float)
-MS_REG_GPU_KERNEL_ONE(Gelu, KernelAttr().AddInputAttr(kNumberTypeFloat16).AddOutputAttr(kNumberTypeFloat16),
+MS_REG_GPU_KERNEL_ONE(GeLU, KernelAttr().AddInputAttr(kNumberTypeFloat16).AddOutputAttr(kNumberTypeFloat16),
                       GeluGpuKernel, half)
 }  // namespace kernel
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/backend/optimizer/graph_kernel/graph_kernel_helper.cc b/mindspore/ccsrc/backend/optimizer/graph_kernel/graph_kernel_helper.cc
index e23655c670..2be93a51fb 100644
--- a/mindspore/ccsrc/backend/optimizer/graph_kernel/graph_kernel_helper.cc
+++ b/mindspore/ccsrc/backend/optimizer/graph_kernel/graph_kernel_helper.cc
@@ -701,7 +701,7 @@ FuncGraphPtr JsonDescToAnf(const std::string &json_desc, const std::vector<AnfNo
 std::unordered_set<PrimitivePtr> GetExpandOps() {
   std::unordered_set<PrimitivePtr> expand_ops = {
     prim::kPrimSquare,
-    prim::kPrimGeluGrad,
+    prim::kPrimGeLUGrad,
 #if ENABLE_D
     prim::kPrimTile,
     prim::kPrimSqrtGrad,
@@ -709,7 +709,7 @@ std::unordered_set<PrimitivePtr> GetExpandOps() {
 #elif ENABLE_GPU
     prim::kPrimBiasAdd,
     prim::kPrimBiasAddGrad,
-    prim::kPrimGelu,
+    prim::kPrimGeLU,
     prim::kPrimFusedAdam,
     prim::kPrimFusedAdamWeightDecay,
     prim::kPrimReduceMean,
diff --git a/mindspore/ccsrc/frontend/parallel/dynamic_creator.h b/mindspore/ccsrc/frontend/parallel/dynamic_creator.h
index acf8ecdbd8..ac93c1fbca 100644
--- a/mindspore/ccsrc/frontend/parallel/dynamic_creator.h
+++ b/mindspore/ccsrc/frontend/parallel/dynamic_creator.h
@@ -77,7 +77,7 @@ class RegisterAction {
 
 // operator register
 REGISTER(MatMulInfo);
-REGISTER(GeluInfo);
+REGISTER(GeLUInfo);
 REGISTER(VirtualDatasetInfo);
 REGISTER(BatchParallelInfo);
 REGISTER(TanhInfo);
diff --git a/mindspore/ccsrc/frontend/parallel/ops_info/activation_info.h b/mindspore/ccsrc/frontend/parallel/ops_info/activation_info.h
index 14229928ed..7980433196 100644
--- a/mindspore/ccsrc/frontend/parallel/ops_info/activation_info.h
+++ b/mindspore/ccsrc/frontend/parallel/ops_info/activation_info.h
@@ -82,12 +82,12 @@ class ActivationOther : public Activation {
   Status GetAttrs() override;
 };
 
-class GeluInfo : public ActivationOther {
+class GeLUInfo : public ActivationOther {
  public:
-  GeluInfo(const std::string &name, const Shapes &inputs_shape, const Shapes &outputs_shape,
+  GeLUInfo(const std::string &name, const Shapes &inputs_shape, const Shapes &outputs_shape,
            const PrimitiveAttrs &attrs)
       : ActivationOther(name, inputs_shape, outputs_shape, attrs) {}
-  ~GeluInfo() override = default;
+  ~GeLUInfo() override = default;
 };
 
 class TanhInfo : public ActivationOther {
diff --git a/mindspore/ccsrc/frontend/parallel/ops_info/ops_utils.h b/mindspore/ccsrc/frontend/parallel/ops_info/ops_utils.h
index 8e25b80030..585a28122e 100644
--- a/mindspore/ccsrc/frontend/parallel/ops_info/ops_utils.h
+++ b/mindspore/ccsrc/frontend/parallel/ops_info/ops_utils.h
@@ -182,7 +182,7 @@ constexpr char CONCAT[] = "Concat";
 constexpr char SOFTMAX_CROSS_ENTROPY_WITH_LOGITS[] = "SoftmaxCrossEntropyWithLogits";
 constexpr char SIGMOID_CROSS_ENTROPY_WITH_LOGITS[] = "SigmoidCrossEntropyWithLogits";
 constexpr char MATMUL[] = "MatMul";
-constexpr char GELU[] = "Gelu";
+constexpr char GELU[] = "GeLU";
 constexpr char TANH[] = "Tanh";
 constexpr char RECEIVE[] = "Receive";
 constexpr char SEND[] = "Send";
diff --git a/mindspore/ccsrc/transform/graph_ir/op_declare/nonlinear_fuc_ops_declare.cc b/mindspore/ccsrc/transform/graph_ir/op_declare/nonlinear_fuc_ops_declare.cc
index 51c8aff5dd..321e252316 100644
--- a/mindspore/ccsrc/transform/graph_ir/op_declare/nonlinear_fuc_ops_declare.cc
+++ b/mindspore/ccsrc/transform/graph_ir/op_declare/nonlinear_fuc_ops_declare.cc
@@ -101,27 +101,27 @@ ATTR_MAP(TanhGrad) = EMPTY_ATTR_MAP;
 OUTPUT_MAP(TanhGrad) = {{0, OUTPUT_DESC(z)}};
 REG_ADPT_DESC(TanhGrad, prim::kPrimTanhGrad->name(), ADPT_DESC(TanhGrad))
 
-// Gelu
+// GeLU
 INPUT_MAP(Gelu) = {{1, INPUT_DESC(x)}};
 ATTR_MAP(Gelu) = EMPTY_ATTR_MAP;
 OUTPUT_MAP(Gelu) = {{0, OUTPUT_DESC(y)}};
-REG_ADPT_DESC(Gelu, prim::kPrimGelu->name(), ADPT_DESC(Gelu))
+REG_ADPT_DESC(Gelu, prim::kPrimGeLU->name(), ADPT_DESC(Gelu))
 
-// GeluGrad
+// GeLUGrad
 INPUT_MAP(GeluGrad) = {{1, INPUT_DESC(dy)}, {2, INPUT_DESC(x)}, {3, INPUT_DESC(y)}};
 ATTR_MAP(GeluGrad) = EMPTY_ATTR_MAP;
 OUTPUT_MAP(GeluGrad) = {{0, OUTPUT_DESC(z)}};
-REG_ADPT_DESC(GeluGrad, prim::kPrimGeluGrad->name(), ADPT_DESC(GeluGrad))
+REG_ADPT_DESC(GeluGrad, prim::kPrimGeLUGrad->name(), ADPT_DESC(GeluGrad))
 
-// FastGelu
+// FastGeLU
 INPUT_MAP(FastGelu) = {{1, INPUT_DESC(x)}};
 ATTR_MAP(FastGelu) = EMPTY_ATTR_MAP;
 OUTPUT_MAP(FastGelu) = {{0, OUTPUT_DESC(y)}};
-REG_ADPT_DESC(FastGelu, prim::kPrimFastGelu->name(), ADPT_DESC(FastGelu))
+REG_ADPT_DESC(FastGelu, prim::kPrimFastGeLU->name(), ADPT_DESC(FastGelu))
 
-// FastGeluGrad
+// FastGeLUGrad
 INPUT_MAP(FastGeluGrad) = {{1, INPUT_DESC(dy)}, {2, INPUT_DESC(x)}};
 ATTR_MAP(FastGeluGrad) = EMPTY_ATTR_MAP;
 OUTPUT_MAP(FastGeluGrad) = {{0, OUTPUT_DESC(z)}};
-REG_ADPT_DESC(FastGeluGrad, prim::kPrimFastGeluGrad->name(), ADPT_DESC(FastGeluGrad))
+REG_ADPT_DESC(FastGeluGrad, prim::kPrimFastGeLUGrad->name(), ADPT_DESC(FastGeluGrad))
 }  // namespace mindspore::transform
diff --git a/mindspore/core/abstract/infer_functions.h b/mindspore/core/abstract/infer_functions.h
index c9fa606faf..b4798de771 100644
--- a/mindspore/core/abstract/infer_functions.h
+++ b/mindspore/core/abstract/infer_functions.h
@@ -63,13 +63,13 @@ AbstractBasePtr InferImplConv2DBackpropFilter(const AnalysisEnginePtr &, const P
                                               const AbstractBasePtrList &args_spec_list);
 AbstractBasePtr InferImplBiasAddGrad(const AnalysisEnginePtr &, const PrimitivePtr &primitive,
                                      const AbstractBasePtrList &args_spec_list);
-AbstractBasePtr InferImplGelu(const AnalysisEnginePtr &, const PrimitivePtr &primitive,
+AbstractBasePtr InferImplGeLU(const AnalysisEnginePtr &, const PrimitivePtr &primitive,
                               const AbstractBasePtrList &args_spec_list);
-AbstractBasePtr InferImplGeluGrad(const AnalysisEnginePtr &, const PrimitivePtr &primitive,
+AbstractBasePtr InferImplGeLUGrad(const AnalysisEnginePtr &, const PrimitivePtr &primitive,
                                   const AbstractBasePtrList &args_spec_list);
-AbstractBasePtr InferImplFastGelu(const AnalysisEnginePtr &, const PrimitivePtr &primitive,
+AbstractBasePtr InferImplFastGeLU(const AnalysisEnginePtr &, const PrimitivePtr &primitive,
                                   const AbstractBasePtrList &args_spec_list);
-AbstractBasePtr InferImplFastGeluGrad(const AnalysisEnginePtr &, const PrimitivePtr &primitive,
+AbstractBasePtr InferImplFastGeLUGrad(const AnalysisEnginePtr &, const PrimitivePtr &primitive,
                                       const AbstractBasePtrList &args_spec_list);
 AbstractBasePtr InferImplRelu(const AnalysisEnginePtr &, const PrimitivePtr &primitive,
                               const AbstractBasePtrList &args_spec_list);
diff --git a/mindspore/core/base/core_ops.h b/mindspore/core/base/core_ops.h
index fc0c06527d..d849a41cf7 100644
--- a/mindspore/core/base/core_ops.h
+++ b/mindspore/core/base/core_ops.h
@@ -41,6 +41,10 @@ constexpr auto kTupleGetItem = "TupleGetItem";
 constexpr auto kMakeTuple = "MakeTuple";
 constexpr auto kReturn = "Return";
 constexpr auto kSGD = "SGD";
+constexpr auto kGeLU = "GeLU";
+constexpr auto kGeLUGrad = "GeLUGrad";
+constexpr auto kFastGeLU = "FastGeLU";
+constexpr auto kFastGeLUGrad = "FastGeLUGrad";
 
 // Here list all primitives used in backend or some special primitives used by core.
 // Arithmetic
@@ -249,10 +253,10 @@ inline const PrimitivePtr kPrimDropout = std::make_shared<Primitive>("Dropout");
 inline const PrimitivePtr kPrimUniformReal = std::make_shared<Primitive>("UniformReal");
 inline const PrimitivePtr kPrimCudnnUniformReal = std::make_shared<Primitive>("CudnnUniformReal");
 inline const PrimitivePtr kPrimOneHot = std::make_shared<Primitive>("OneHot");
-inline const PrimitivePtr kPrimGelu = std::make_shared<Primitive>("Gelu");
-inline const PrimitivePtr kPrimGeluGrad = std::make_shared<Primitive>("GeluGrad");
-inline const PrimitivePtr kPrimFastGelu = std::make_shared<Primitive>("FastGelu");
-inline const PrimitivePtr kPrimFastGeluGrad = std::make_shared<Primitive>("FastGeluGrad");
+inline const PrimitivePtr kPrimGeLU = std::make_shared<Primitive>(kGeLU);
+inline const PrimitivePtr kPrimGeLUGrad = std::make_shared<Primitive>(kGeLUGrad);
+inline const PrimitivePtr kPrimFastGeLU = std::make_shared<Primitive>(kFastGeLU);
+inline const PrimitivePtr kPrimFastGeLUGrad = std::make_shared<Primitive>(kFastGeLUGrad);
 inline const PrimitivePtr kPrimRelu = std::make_shared<Primitive>("ReLU");
 inline const PrimitivePtr kPrimElu = std::make_shared<Primitive>("ELU");
 inline const PrimitivePtr kPrimRelu6 = std::make_shared<Primitive>("ReLU6");
diff --git a/mindspore/core/ops/gelu.cc b/mindspore/core/ops/gelu.cc
index 3fccb33994..a8ab8f6aa9 100644
--- a/mindspore/core/ops/gelu.cc
+++ b/mindspore/core/ops/gelu.cc
@@ -26,16 +26,16 @@
 namespace mindspore {
 namespace ops {
 namespace {
-abstract::ShapePtr GeluInferShape(const PrimitivePtr &primitive, const std::vector<AbstractBasePtr> &input_args) {
+abstract::ShapePtr GeLUInferShape(const PrimitivePtr &primitive, const std::vector<AbstractBasePtr> &input_args) {
   MS_EXCEPTION_IF_NULL(primitive);
-  auto gelu_prim = primitive->cast<PrimGeluPtr>();
+  auto gelu_prim = primitive->cast<PrimGeLUPtr>();
   MS_EXCEPTION_IF_NULL(gelu_prim);
   auto prim_name = gelu_prim->name();
   auto input_shape = CheckAndConvertUtils::ConvertShapePtrToShape("input_x", input_args[0]->BuildShape(), prim_name);
   return std::make_shared<abstract::Shape>(input_shape);
 }
 
-TypePtr GeluInferType(const PrimitivePtr &prim, const std::vector<AbstractBasePtr> &input_args) {
+TypePtr GeLUInferType(const PrimitivePtr &prim, const std::vector<AbstractBasePtr> &input_args) {
   for (const auto &item : input_args) {
     MS_EXCEPTION_IF_NULL(item);
   }
@@ -46,13 +46,13 @@ TypePtr GeluInferType(const PrimitivePtr &prim, const std::vector<AbstractBasePt
   return TypeIdToType(infer_type);
 }
 }  // namespace
-AbstractBasePtr GeluInfer(const abstract::AnalysisEnginePtr &, const PrimitivePtr &primitive,
+AbstractBasePtr GeLUInfer(const abstract::AnalysisEnginePtr &, const PrimitivePtr &primitive,
                           const std::vector<AbstractBasePtr> &input_args) {
-  return std::make_shared<abstract::AbstractTensor>(GeluInferType(primitive, input_args),
-                                                    GeluInferShape(primitive, input_args)->shape());
+  return std::make_shared<abstract::AbstractTensor>(GeLUInferType(primitive, input_args),
+                                                    GeLUInferShape(primitive, input_args)->shape());
 }
 
-REGISTER_PRIMITIVE_EVAL_IMPL(Gelu, prim::kPrimGelu, GeluInfer);
-REGISTER_PRIMITIVE_C(kNameGelu, Gelu);
+REGISTER_PRIMITIVE_EVAL_IMPL(GeLU, prim::kPrimGeLU, GeLUInfer);
+REGISTER_PRIMITIVE_C(kNameGeLU, GeLU);
 }  // namespace ops
 }  // namespace mindspore
diff --git a/mindspore/core/ops/gelu.h b/mindspore/core/ops/gelu.h
index 0050d851ea..87dc92ada6 100644
--- a/mindspore/core/ops/gelu.h
+++ b/mindspore/core/ops/gelu.h
@@ -25,17 +25,17 @@
 
 namespace mindspore {
 namespace ops {
-constexpr auto kNameGelu = "Gelu";
-class Gelu : public PrimitiveC {
+constexpr auto kNameGeLU = "GeLU";
+class GeLU : public PrimitiveC {
  public:
-  Gelu() : PrimitiveC(kNameGelu) {}
-  ~Gelu() = default;
-  MS_DECLARE_PARENT(Gelu, PrimitiveC);
+  GeLU() : PrimitiveC(kNameGeLU) {}
+  ~GeLU() = default;
+  MS_DECLARE_PARENT(GeLU, PrimitiveC);
   void Init() {}
 };
-AbstractBasePtr GeluInfer(const abstract::AnalysisEnginePtr &, const PrimitivePtr &primitive,
+AbstractBasePtr GeLUInfer(const abstract::AnalysisEnginePtr &, const PrimitivePtr &primitive,
                           const std::vector<AbstractBasePtr> &input_args);
-using PrimGeluPtr = std::shared_ptr<Gelu>;
+using PrimGeLUPtr = std::shared_ptr<GeLU>;
 }  // namespace ops
 }  // namespace mindspore
 
diff --git a/mindspore/lite/tools/optimizer/graph/primitive_adjust_pass.cc b/mindspore/lite/tools/optimizer/graph/primitive_adjust_pass.cc
index 698976b054..71f89a3e2e 100644
--- a/mindspore/lite/tools/optimizer/graph/primitive_adjust_pass.cc
+++ b/mindspore/lite/tools/optimizer/graph/primitive_adjust_pass.cc
@@ -81,7 +81,7 @@ using mindspore::ops::kNameDepthWiseConv2D;
 using mindspore::ops::kNameDiv;
 using mindspore::ops::kNameElu;
 using mindspore::ops::kNameExp;
-using mindspore::ops::kNameGelu;
+using mindspore::ops::kNameGeLU;
 using mindspore::ops::kNameL2Normalize;
 using mindspore::ops::kNameLayerNorm;
 using mindspore::ops::kNameLeakyRelu;
@@ -117,7 +117,7 @@ constexpr auto kNameGatherV2 = "GatherV2";
 constexpr auto kNameTensorAdd = "TensorAdd";
 std::map<std::string, mindspore::ActivationType> activation_map = {
   {ops::kNameAbs, mindspore::ABS},         {ops::kNameElu, mindspore::ELU},
-  {ops::kNameGelu, mindspore::GELU},       {ops::kNameLeakyRelu, mindspore::LEAKY_RELU},
+  {ops::kNameGeLU, mindspore::GELU},       {ops::kNameLeakyRelu, mindspore::LEAKY_RELU},
   {ops::kNameReLU, mindspore::RELU},       {ops::kNameReLU6, mindspore::RELU6},
   {ops::kNameSigmoid, mindspore::SIGMOID}, {ops::kNameTanh, mindspore::TANH}};
 
@@ -413,7 +413,7 @@ REGIST_PRIMITIVE_ADJUST(kNameDiv, MoveAttrMapCommon<ops::DivFusion>)
 REGIST_PRIMITIVE_ADJUST(kNameElu, MoveAttrMapActivation)
 REGIST_PRIMITIVE_ADJUST(kNameExp, MoveAttrMapCommon<ops::ExpFusion>)
 REGIST_PRIMITIVE_ADJUST(kNameGatherV2, MoveAttrMapCommon<ops::Gather>)
-REGIST_PRIMITIVE_ADJUST(kNameGelu, MoveAttrMapActivation)
+REGIST_PRIMITIVE_ADJUST(kNameGeLU, MoveAttrMapActivation)
 REGIST_PRIMITIVE_ADJUST(kNameL2Normalize, MoveAttrMapCommon<ops::L2NormalizeFusion>)
 REGIST_PRIMITIVE_ADJUST(kNameLayerNorm, MoveAttrMapLayerNorm)
 REGIST_PRIMITIVE_ADJUST(kNameLeakyRelu, MoveAttrMapActivation)
diff --git a/mindspore/ops/_grad/grad_nn_ops.py b/mindspore/ops/_grad/grad_nn_ops.py
index 4a9ed4abcf..129fd0fd46 100755
--- a/mindspore/ops/_grad/grad_nn_ops.py
+++ b/mindspore/ops/_grad/grad_nn_ops.py
@@ -604,10 +604,10 @@ def get_bprop_tanh(self):
     return bprop
 
 
-@bprop_getters.register(P.Gelu)
+@bprop_getters.register(P.GeLU)
 def get_bprop_gelu(self):
-    """Grad definition for `Gelu` operation."""
-    input_grad = G.GeluGrad()
+    """Grad definition for `GeLU` operation."""
+    input_grad = G.GeLUGrad()
 
     def bprop(x, out, dout):
         dx = input_grad(dout, x, out)
@@ -616,10 +616,10 @@ def get_bprop_gelu(self):
     return bprop
 
 
-@bprop_getters.register(P.FastGelu)
+@bprop_getters.register(P.FastGeLU)
 def get_bprop_fast_gelu(self):
-    """Grad definition for `FastGelu` operation."""
-    input_grad = G.FastGeluGrad()
+    """Grad definition for `FastGeLU` operation."""
+    input_grad = G.FastGeLUGrad()
 
     def bprop(x, out, dout):
         dx = input_grad(dout, x)
diff --git a/mindspore/ops/_op_impl/tbe/fast_gelu.py b/mindspore/ops/_op_impl/tbe/fast_gelu.py
index f108f7af7f..0491641b33 100644
--- a/mindspore/ops/_op_impl/tbe/fast_gelu.py
+++ b/mindspore/ops/_op_impl/tbe/fast_gelu.py
@@ -13,10 +13,10 @@
 # limitations under the License.
 # ============================================================================
 
-"""FastGelu op"""
+"""FastGeLU op"""
 from mindspore.ops.op_info_register import op_info_register, TBERegOp, DataType
 
-fast_gelu_op_info = TBERegOp("FastGelu") \
+fast_gelu_op_info = TBERegOp("FastGeLU") \
     .fusion_type("ELEMWISE") \
     .async_flag(False) \
     .binfile_name("fast_gelu.so") \
@@ -33,5 +33,5 @@ fast_gelu_op_info = TBERegOp("FastGelu") \
 
 @op_info_register(fast_gelu_op_info)
 def _fast_gelu_tbe():
-    """FastGelu TBE register"""
+    """FastGeLU TBE register"""
     return
diff --git a/mindspore/ops/_op_impl/tbe/fast_gelu_grad.py b/mindspore/ops/_op_impl/tbe/fast_gelu_grad.py
index 18baa6ec48..54fb796438 100644
--- a/mindspore/ops/_op_impl/tbe/fast_gelu_grad.py
+++ b/mindspore/ops/_op_impl/tbe/fast_gelu_grad.py
@@ -13,10 +13,10 @@
 # limitations under the License.
 # ============================================================================
 
-"""FastGeluGrad op"""
+"""FastGeLUGrad op"""
 from mindspore.ops.op_info_register import op_info_register, TBERegOp, DataType
 
-fast_gelu_grad_op_info = TBERegOp("FastGeluGrad") \
+fast_gelu_grad_op_info = TBERegOp("FastGeLUGrad") \
     .fusion_type("ELEMWISE") \
     .async_flag(False) \
     .binfile_name("fast_gelu_grad.so") \
@@ -37,5 +37,5 @@ fast_gelu_grad_op_info = TBERegOp("FastGeluGrad") \
 
 @op_info_register(fast_gelu_grad_op_info)
 def _fast_gelu_grad_tbe():
-    """FastGeluGrad TBE register"""
+    """FastGeLUGrad TBE register"""
     return
diff --git a/mindspore/ops/_op_impl/tbe/gelu.py b/mindspore/ops/_op_impl/tbe/gelu.py
index a539cb8ef3..9c91e53d19 100644
--- a/mindspore/ops/_op_impl/tbe/gelu.py
+++ b/mindspore/ops/_op_impl/tbe/gelu.py
@@ -13,10 +13,10 @@
 # limitations under the License.
 # ============================================================================
 
-"""Gelu op"""
+"""GeLU op"""
 from mindspore.ops.op_info_register import op_info_register, TBERegOp, DataType
 
-gelu_op_info = TBERegOp("Gelu") \
+gelu_op_info = TBERegOp("GeLU") \
     .fusion_type("ELEMWISE") \
     .async_flag(False) \
     .binfile_name("gelu.so") \
@@ -33,5 +33,5 @@ gelu_op_info = TBERegOp("Gelu") \
 
 @op_info_register(gelu_op_info)
 def _gelu_tbe():
-    """Gelu TBE register"""
+    """GeLU TBE register"""
     return
diff --git a/mindspore/ops/_op_impl/tbe/gelu_grad.py b/mindspore/ops/_op_impl/tbe/gelu_grad.py
index ce62e55071..8d4da1a75a 100644
--- a/mindspore/ops/_op_impl/tbe/gelu_grad.py
+++ b/mindspore/ops/_op_impl/tbe/gelu_grad.py
@@ -13,10 +13,10 @@
 # limitations under the License.
 # ============================================================================
 
-"""GeluGrad op"""
+"""GeLUGrad op"""
 from mindspore.ops.op_info_register import op_info_register, TBERegOp, DataType
 
-gelu_grad_op_info = TBERegOp("GeluGrad") \
+gelu_grad_op_info = TBERegOp("GeLUGrad") \
     .fusion_type("ELEMWISE") \
     .async_flag(False) \
     .binfile_name("gelu_grad.so") \
@@ -38,5 +38,5 @@ gelu_grad_op_info = TBERegOp("GeluGrad") \
 
 @op_info_register(gelu_grad_op_info)
 def _gelu_grad_tbe():
-    """GeluGrad TBE register"""
+    """GeLUGrad TBE register"""
     return
diff --git a/mindspore/ops/operations/__init__.py b/mindspore/ops/operations/__init__.py
index a4d3bfafee..c2c7b5ac12 100644
--- a/mindspore/ops/operations/__init__.py
+++ b/mindspore/ops/operations/__init__.py
@@ -64,7 +64,7 @@ from .nn_ops import (LSTM, SGD, Adam, FusedSparseAdam, FusedSparseLazyAdam, Adam
                      DepthwiseConv2dNative,
                      DropoutDoMask, Dropout,
                      DropoutGenMask, Flatten, FusedBatchNorm, FusedBatchNormEx, BNTrainingReduce, BNTrainingUpdate,
-                     Gelu, FastGelu, Elu,
+                     GeLU, Gelu, FastGeLU, FastGelu, Elu,
                      GetNext, L2Normalize, LayerNorm, L2Loss, CTCLoss, CTCGreedyDecoder,
                      LogSoftmax,
                      MaxPool, DataFormatDimMap,
@@ -168,7 +168,9 @@ __all__ = [
     'Unpack',
     'Tile',
     'BiasAdd',
+    'GeLU',
     'Gelu',
+    'FastGeLU',
     'FastGelu',
     'Minimum',
     'Maximum',
diff --git a/mindspore/ops/operations/_grad_ops.py b/mindspore/ops/operations/_grad_ops.py
index 85e729e856..6dca4b3163 100644
--- a/mindspore/ops/operations/_grad_ops.py
+++ b/mindspore/ops/operations/_grad_ops.py
@@ -775,12 +775,12 @@ class BNTrainingUpdateGrad(PrimitiveWithInfer):
         return (batch_mean, batch_variance)
 
 
-class GeluGrad(PrimitiveWithInfer):
-    """Gradients of Gelu operation."""
+class GeLUGrad(PrimitiveWithInfer):
+    """Gradients of GeLU operation."""
 
     @prim_attr_register
     def __init__(self):
-        """Initialize GeluGrad"""
+        """Initialize GeLUGrad"""
 
     def infer_shape(self, y_backprop_shape, x_shape, y_shape):
         return x_shape
@@ -793,12 +793,12 @@ class GeluGrad(PrimitiveWithInfer):
         return x_dtype
 
 
-class FastGeluGrad(PrimitiveWithInfer):
-    """Gradients of FastGelu operation."""
+class FastGeLUGrad(PrimitiveWithInfer):
+    """Gradients of FastGeLU operation."""
 
     @prim_attr_register
     def __init__(self):
-        """init FastGeluGrad"""
+        """init FastGeLUGrad"""
 
     def infer_shape(self, y_backprop_shape, x_shape):
         return x_shape
diff --git a/mindspore/ops/operations/nn_ops.py b/mindspore/ops/operations/nn_ops.py
index 2ab20192c8..65ef0a05ed 100644
--- a/mindspore/ops/operations/nn_ops.py
+++ b/mindspore/ops/operations/nn_ops.py
@@ -19,6 +19,7 @@ import math
 import operator
 from functools import reduce, partial
 from mindspore._checkparam import _check_3d_int_or_tuple
+from mindspore import log as logger
 import numpy as np
 from ... import context
 from .. import signature as sig
@@ -2921,7 +2922,7 @@ class OneHot(PrimitiveWithInfer):
                 'value': None}
 
 
-class Gelu(PrimitiveWithInfer):
+class GeLU(PrimitiveWithInfer):
     r"""
     Gaussian Error Linear Units activation function.
 
@@ -2929,7 +2930,7 @@ class Gelu(PrimitiveWithInfer):
     And also please refer to `BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding
     <https://arxiv.org/abs/1810.04805>`_.
 
-    Gelu is defined as follows:
+    GeLU is defined as follows:
 
     .. math::
         \text{output} = 0.5 * x * (1 + erf(x / \sqrt{2})),
@@ -2937,7 +2938,7 @@ class Gelu(PrimitiveWithInfer):
     where :math:`erf` is the "Gauss error function" .
 
     Inputs:
-        - **input_x** (Tensor) - Input to compute the Gelu with data type of float16 or float32.
+        - **input_x** (Tensor) - Input to compute the GeLU with data type of float16 or float32.
 
     Outputs:
         Tensor, with the same type and shape as input.
@@ -2947,7 +2948,7 @@ class Gelu(PrimitiveWithInfer):
 
     Examples:
         >>> tensor = Tensor(np.array([1.0, 2.0, 3.0]), mindspore.float32)
-        >>> gelu = ops.Gelu()
+        >>> gelu = ops.GeLU()
         >>> result = gelu(tensor)
         >>> print(result)
         [0.841192  1.9545976  2.9963627]
@@ -2965,12 +2966,16 @@ class Gelu(PrimitiveWithInfer):
         validator.check_tensor_dtype_valid("input_x", input_x, (mstype.float16, mstype.float32), self.name)
         return input_x
 
+def Gelu():
+    logger.warning("WARN_DEPRECATED: The usage of Gelu is deprecated. Please use GeLU.")
+    return GeLU()
 
-class FastGelu(PrimitiveWithInfer):
+
+class FastGeLU(PrimitiveWithInfer):
     r"""
     Fast Gaussian Error Linear Units activation function.
 
-    FastGelu is defined as follows:
+    FastGeLU is defined as follows:
 
     .. math::
         \text{output} = \frac {x} {1 + \exp(-1.702 * \left| x \right|)} * \exp(0.851 * (x - \left| x \right|)),
@@ -2978,7 +2983,7 @@ class FastGelu(PrimitiveWithInfer):
     where :math:`x` is the element of the input.
 
     Inputs:
-        - **input_x** (Tensor) - Input to compute the FastGelu with data type of float16 or float32.
+        - **input_x** (Tensor) - Input to compute the FastGeLU with data type of float16 or float32.
 
     Outputs:
         Tensor, with the same type and shape as input.
@@ -2988,7 +2993,7 @@ class FastGelu(PrimitiveWithInfer):
 
     Examples:
         >>> tensor = Tensor(np.array([[-1.0, 4.0, -8.0], [2.0, -5.0, 9.0]]), mindspore.float32)
-        >>> fast_gelu = P.FastGelu()
+        >>> fast_gelu = P.FastGeLU()
         >>> output = fast_gelu(tensor)
         >>> print(output)
         [[-1.5420423e-01  3.9955849e+00 -9.7664278e-06]
@@ -3007,6 +3012,10 @@ class FastGelu(PrimitiveWithInfer):
         validator.check_tensor_dtype_valid("input_x", input_x, (mstype.float16, mstype.float32), self.name)
         return input_x
 
+def FastGelu():
+    logger.warning("WARN_DEPRECATED: The usage of FastGelu is deprecated. Please use FastGeLU.")
+    return FastGeLU()
+
 
 class GetNext(PrimitiveWithInfer):
     """
diff --git a/tests/st/ops/gpu/test_gelu_grad_op.py b/tests/st/ops/gpu/test_gelu_grad_op.py
index 975355114e..5eaecf4e08 100644
--- a/tests/st/ops/gpu/test_gelu_grad_op.py
+++ b/tests/st/ops/gpu/test_gelu_grad_op.py
@@ -28,7 +28,7 @@ context.set_context(mode=context.GRAPH_MODE, device_target="GPU")
 class GeluNet(nn.Cell):
     def __init__(self):
         super(GeluNet, self).__init__()
-        self.gelu = P.Gelu()
+        self.gelu = P.GeLU()
 
     def construct(self, x):
         return self.gelu(x)
diff --git a/tests/st/ops/graph_kernel/test_gelu.py b/tests/st/ops/graph_kernel/test_gelu.py
index d5eb67a497..4497d945e3 100644
--- a/tests/st/ops/graph_kernel/test_gelu.py
+++ b/tests/st/ops/graph_kernel/test_gelu.py
@@ -25,7 +25,7 @@ import mindspore.ops.operations._grad_ops as G
 class GeluNet(Cell):
     def __init__(self):
         super(GeluNet, self).__init__()
-        self.gelu = P.Gelu()
+        self.gelu = P.GeLU()
 
     def construct(self, x):
         return self.gelu(x)
@@ -34,7 +34,7 @@ class GeluNet(Cell):
 class GeluGradNet(Cell):
     def __init__(self):
         super(GeluGradNet, self).__init__()
-        self.gelu_grad = G.GeluGrad()
+        self.gelu_grad = G.GeLUGrad()
 
     def construct(self, dy, x, y):
         return self.gelu_grad(dy, x, y)
diff --git a/tests/ut/cpp/ops/test_ops_gelu.cc b/tests/ut/cpp/ops/test_ops_gelu.cc
index f17f36505c..e7c7bcc07d 100644
--- a/tests/ut/cpp/ops/test_ops_gelu.cc
+++ b/tests/ut/cpp/ops/test_ops_gelu.cc
@@ -24,15 +24,15 @@
 
 namespace mindspore {
 namespace ops {
-class TestGelu : public UT::Common {
+class TestGeLU : public UT::Common {
  public:
-  TestGelu() {}
+  TestGeLU() {}
   void SetUp() {}
   void TearDown() {}
 };
 
-TEST_F(TestGelu, test_ops_gelu1) {
-  auto gelu = std::make_shared<Gelu>();
+TEST_F(TestGeLU, test_ops_gelu1) {
+  auto gelu = std::make_shared<GeLU>();
   auto input1 = TensorConstructUtils::CreateOnesTensor(kNumberTypeFloat16, std::vector<int64_t>{1, 2});
   MS_EXCEPTION_IF_NULL(input1);
   auto abstract = gelu->Infer({input1->ToAbstract()});
@@ -56,8 +56,8 @@ TEST_F(TestGelu, test_ops_gelu1) {
   EXPECT_EQ(shape_vec[0], 1);
 }
 
-TEST_F(TestGelu, test_ops_gelu2) {
-  auto gelu = std::make_shared<Gelu>();
+TEST_F(TestGeLU, test_ops_gelu2) {
+  auto gelu = std::make_shared<GeLU>();
   auto input1 = TensorConstructUtils::CreateOnesTensor(kNumberTypeFloat32, std::vector<int64_t>{1, 2});
   MS_EXCEPTION_IF_NULL(input1);
   auto abstract = gelu->Infer({input1->ToAbstract()});
diff --git a/tests/ut/cpp/parallel/ops_info/gelu_info_test.cc b/tests/ut/cpp/parallel/ops_info/gelu_info_test.cc
index e0e9424ac2..3093a950ce 100644
--- a/tests/ut/cpp/parallel/ops_info/gelu_info_test.cc
+++ b/tests/ut/cpp/parallel/ops_info/gelu_info_test.cc
@@ -26,18 +26,18 @@
 namespace mindspore {
 namespace parallel {
 
-class GeluInfo;
-using GeluInfoPtr = std::shared_ptr<GeluInfo>;
-GeluInfoPtr gelu;
+class GeLUInfo;
+using GeLUInfoPtr = std::shared_ptr<GeLUInfo>;
+GeLUInfoPtr gelu;
 
-class TestGeluInfo : public UT::Common {
+class TestGeLUInfo : public UT::Common {
  public:
-  TestGeluInfo() {}
+  TestGeLUInfo() {}
   void SetUp();
   void TearDown() {}
 };
 
-void TestGeluInfo::SetUp() {
+void TestGeLUInfo::SetUp() {
   RankList dev_list;
 
   for (int32_t i = 0; i < 130; i++) {
@@ -59,10 +59,10 @@ void TestGeluInfo::SetUp() {
   Shapes inputs_shape = {{2, 4, 8, 16}};
   Shapes outputs_shape = {{2, 4, 8, 16}};
 
-  gelu = std::make_shared<GeluInfo>("gelu_info", inputs_shape, outputs_shape, attr);
+  gelu = std::make_shared<GeLUInfo>("gelu_info", inputs_shape, outputs_shape, attr);
 }
 
-TEST_F(TestGeluInfo, InferDevMatrixShape1) {
+TEST_F(TestGeLUInfo, InferDevMatrixShape1) {
   Strategys inputs = {{2, 4, 1, 16}};
   StrategyPtr strategy = NewStrategy(0, inputs);
 
@@ -73,7 +73,7 @@ TEST_F(TestGeluInfo, InferDevMatrixShape1) {
   ASSERT_EQ(dev_matrix_shape, expect);
 }
 
-TEST_F(TestGeluInfo, InferSliceShape1) {
+TEST_F(TestGeLUInfo, InferSliceShape1) {
   Strategys str = {{2, 4, 1, 16}};
   StrategyPtr strategy = NewStrategy(0, str);
 
@@ -94,7 +94,7 @@ TEST_F(TestGeluInfo, InferSliceShape1) {
   ASSERT_EQ(output_slice_shape, output_slice_shape_expect);
 }
 
-TEST_F(TestGeluInfo, GetTensorLayout1) {
+TEST_F(TestGeLUInfo, GetTensorLayout1) {
   Strategys str = {{2, 4, 1, 16}};
   StrategyPtr strategy = NewStrategy(0, str);
 
@@ -115,7 +115,7 @@ TEST_F(TestGeluInfo, GetTensorLayout1) {
   ASSERT_EQ(output_tensor_map.array(), output_expect);
 }
 
-TEST_F(TestGeluInfo, GetForwardOp1) {
+TEST_F(TestGeLUInfo, GetForwardOp1) {
   Strategys inputs = {{2, 4, 1, 16}};
   StrategyPtr strategy = NewStrategy(0, inputs);
 
@@ -126,7 +126,7 @@ TEST_F(TestGeluInfo, GetForwardOp1) {
   ASSERT_EQ(size, 0);
 }
 
-TEST_F(TestGeluInfo, GetMirrorOPs1) {
+TEST_F(TestGeLUInfo, GetMirrorOPs1) {
   Strategys inputs = {{2, 4, 1, 16}};
   StrategyPtr strategy = NewStrategy(0, inputs);
 
@@ -138,7 +138,7 @@ TEST_F(TestGeluInfo, GetMirrorOPs1) {
   ASSERT_EQ(size, 0);
 }
 
-TEST_F(TestGeluInfo, CheckStrategy1) {
+TEST_F(TestGeLUInfo, CheckStrategy1) {
   // Success: {{2,4,1,16}}
   Strategys inputs = {{2, 2, 8, 16}, {2, 4, 16, 1}};
   StrategyPtr strategy = NewStrategy(0, inputs);
@@ -147,7 +147,7 @@ TEST_F(TestGeluInfo, CheckStrategy1) {
   ASSERT_EQ(ret, FAILED);
 }
 
-TEST_F(TestGeluInfo, CheckStrategy2) {
+TEST_F(TestGeLUInfo, CheckStrategy2) {
   // Success: {{2,4,1,16}}
   Strategys inputs = {{2, 4, 8}};
   StrategyPtr strategy = NewStrategy(0, inputs);
@@ -156,7 +156,7 @@ TEST_F(TestGeluInfo, CheckStrategy2) {
   ASSERT_EQ(ret, FAILED);
 }
 
-TEST_F(TestGeluInfo, CheckStrategy3) {
+TEST_F(TestGeLUInfo, CheckStrategy3) {
   // Success: {{2,4,1,16}}
   Strategys inputs = {{2, 4, 1, 16}};
   StrategyPtr strategy = NewStrategy(0, inputs);
diff --git a/tests/ut/python/ops/test_ops.py b/tests/ut/python/ops/test_ops.py
index 165d266ede..de76a1af22 100755
--- a/tests/ut/python/ops/test_ops.py
+++ b/tests/ut/python/ops/test_ops.py
@@ -1544,12 +1544,12 @@ test_case_nn_ops = [
         'block': G.BiasAddGrad(),
         'desc_inputs': [[1, 3, 3, 3]],
         'skip': ['backward']}),
-    ('Gelu', {
-        'block': P.Gelu(),
+    ('GeLU', {
+        'block': P.GeLU(),
         'desc_inputs': [[1, 3, 4, 4]],
         'desc_bprop': [[1, 3, 4, 4]]}),
-    ('GeluGrad', {
-        'block': G.GeluGrad(),
+    ('GeLUGrad', {
+        'block': G.GeLUGrad(),
         'desc_inputs': [[2, 2], [2, 2], [2, 2]],
         'desc_bprop': [[2, 2]],
         'skip': ['backward']}),
diff --git a/tests/ut/python/parallel/test_dataset.py b/tests/ut/python/parallel/test_dataset.py
index c4f2e68b5f..968de04430 100644
--- a/tests/ut/python/parallel/test_dataset.py
+++ b/tests/ut/python/parallel/test_dataset.py
@@ -27,7 +27,7 @@ class VirtualDatasetNet(nn.Cell):
         self.virtual_dataset = _VirtualDataset()
         self.matmul1 = P.MatMul()
         self.matmul2 = P.MatMul()
-        self.gelu = P.Gelu()
+        self.gelu = P.GeLU()
 
     def construct(self, x, y, z):
         x, y, z = self.virtual_dataset(x, y, z)
diff --git a/tests/ut/python/parallel/test_virtual_dataset_3_input.py b/tests/ut/python/parallel/test_virtual_dataset_3_input.py
index e1be9e7868..7bf2c877e1 100644
--- a/tests/ut/python/parallel/test_virtual_dataset_3_input.py
+++ b/tests/ut/python/parallel/test_virtual_dataset_3_input.py
@@ -57,7 +57,7 @@ def test_virtual_dataset_3_input():
             self.virtual_dataset = _VirtualDataset().shard(strategy0)
             self.matmul1 = P.MatMul().shard(strategy1)
             self.matmul2 = P.MatMul().shard(strategy2)
-            self.gelu = P.Gelu().shard(strategy3)
+            self.gelu = P.GeLU().shard(strategy3)
 
         def construct(self, x, y, b):
             x, y, b = self.virtual_dataset(x, y, b)
@@ -86,7 +86,7 @@ def test_virtualdataset_cell_3_inputs():
             super().__init__()
             self.matmul1 = P.MatMul().shard(strategy1)
             self.matmul2 = P.MatMul().shard(strategy2)
-            self.gelu = P.Gelu().shard(strategy3)
+            self.gelu = P.GeLU().shard(strategy3)
 
         def construct(self, x, y, b):
             out = self.gelu(self.matmul1(x, y))