diff --git a/config/op_info.config b/config/op_info.config index 9e8c0bff22..7f7f027737 100644 --- a/config/op_info.config +++ b/config/op_info.config @@ -147,10 +147,10 @@ {"op_name": "Conv2DBackpropInput", "inputs": [{"index": 0, "name": "out_backprop", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "filter", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": true, "param_type": "required", "shape": "all"}], "attr": [{"name": "input_sizes", "param_type": "required", "type": "listInt", "value": "all"}, {"name": "stride", "param_type": "required", "type": "listInt", "value": "all"}, {"name": "pad_list", "param_type": "required", "type": "listInt", "value": "all"}, {"name": "dilation", "param_type": "required", "type": "listInt", "value": "all"}, {"name": "groups", "param_type": "optional", "type": "int", "value": "all"}, {"name": "format", "param_type": "optional", "type": "str", "value": "all"}], "fusion_type": "CONVLUTION", "dtype_format": [[["float16", "NC1HWC0"], ["float16", "FracZ"], ["float16", "NC1HWC0"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "conv2d_backprop_input_d.so", "compute_cost": 10, "kernel_name": "conv2d_backprop_input_d", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "dynamic_shape": false, "need_check_supported": false, "op_pattern": ""} {"op_name": "ConfusionMulGrad", "inputs": [{"index": 0, "name": "input0", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "input1", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "input2", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "output0", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "output1", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "axis", "param_type": "required", "type": "listInt", "value": "all"}, {"name": "keep_dims", "param_type": "required", "type": "bool", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "confusion_mul_grad.so", "compute_cost": 10, "kernel_name": "confusion_mul_grad", "partial_flag": false, "reshape_type": "", "dynamic_format": false, "dynamic_shape": false, "need_check_supported": false, "op_pattern": ""} {"op_name": "DropoutDoMask", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "mask", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "keep_prob", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "OPAQUE", "dtype_format": [[["", ""], ["", ""], ["", ""], ["", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "drop_out_do_mask.so", "compute_cost": 10, "kernel_name": "drop_out_do_mask", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "dynamic_shape": false, "need_check_supported": false, "op_pattern": "dynamicFormat"} -{"op_name": "Gelu", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "ELEMWISE", "dtype_format": [[["float16", ""], ["float16", ""]], [["float32", ""], ["float32", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "gelu.so", "compute_cost": 10, "kernel_name": "gelu", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "dynamic_shape": false, "need_check_supported": false, "op_pattern": "formatAgnostic"} -{"op_name": "GeluGrad", "inputs": [{"index": 0, "name": "dy", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "z", "need_compile": true, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "ELEMWISE", "dtype_format": [[["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"]], [["float16", "FRACTAL_NZ"], ["float16", "FRACTAL_NZ"], ["float16", "FRACTAL_NZ"], ["float16", "FRACTAL_NZ"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"]], [["float32", "FRACTAL_NZ"], ["float32", "FRACTAL_NZ"], ["float32", "FRACTAL_NZ"], ["float32", "FRACTAL_NZ"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "gelu_grad.so", "compute_cost": 10, "kernel_name": "gelu_grad", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "dynamic_shape": false, "need_check_supported": false, "op_pattern": ""} -{"op_name": "FastGelu", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "ELEMWISE", "dtype_format": [[["float16", ""], ["float16", ""]], [["float32", ""], ["float32", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "fast_gelu.so", "compute_cost": 10, "kernel_name": "fast_gelu", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "dynamic_shape": false, "need_check_supported": false, "op_pattern": "formatAgnostic"} -{"op_name": "FastGeluGrad", "inputs": [{"index": 0, "name": "dy", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "z", "need_compile": true, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "ELEMWISE", "dtype_format": [[["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"]], [["float16", "FRACTAL_NZ"], ["float16", "FRACTAL_NZ"], ["float16", "FRACTAL_NZ"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"]], [["float32", "FRACTAL_NZ"], ["float32", "FRACTAL_NZ"], ["float32", "FRACTAL_NZ"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "fast_gelu_grad.so", "compute_cost": 10, "kernel_name": "fast_gelu_grad", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "dynamic_shape": false, "need_check_supported": false, "op_pattern": ""} +{"op_name": "GeLU", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "ELEMWISE", "dtype_format": [[["float16", ""], ["float16", ""]], [["float32", ""], ["float32", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "gelu.so", "compute_cost": 10, "kernel_name": "gelu", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "dynamic_shape": false, "need_check_supported": false, "op_pattern": "formatAgnostic"} +{"op_name": "GeLUGrad", "inputs": [{"index": 0, "name": "dy", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "z", "need_compile": true, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "ELEMWISE", "dtype_format": [[["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"]], [["float16", "FRACTAL_NZ"], ["float16", "FRACTAL_NZ"], ["float16", "FRACTAL_NZ"], ["float16", "FRACTAL_NZ"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"]], [["float32", "FRACTAL_NZ"], ["float32", "FRACTAL_NZ"], ["float32", "FRACTAL_NZ"], ["float32", "FRACTAL_NZ"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "gelu_grad.so", "compute_cost": 10, "kernel_name": "gelu_grad", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "dynamic_shape": false, "need_check_supported": false, "op_pattern": ""} +{"op_name": "FastGeLU", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "ELEMWISE", "dtype_format": [[["float16", ""], ["float16", ""]], [["float32", ""], ["float32", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "fast_gelu.so", "compute_cost": 10, "kernel_name": "fast_gelu", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "dynamic_shape": false, "need_check_supported": false, "op_pattern": "formatAgnostic"} +{"op_name": "FastGeLUGrad", "inputs": [{"index": 0, "name": "dy", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "z", "need_compile": true, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "ELEMWISE", "dtype_format": [[["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"]], [["float16", "FRACTAL_NZ"], ["float16", "FRACTAL_NZ"], ["float16", "FRACTAL_NZ"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"]], [["float32", "FRACTAL_NZ"], ["float32", "FRACTAL_NZ"], ["float32", "FRACTAL_NZ"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "fast_gelu_grad.so", "compute_cost": 10, "kernel_name": "fast_gelu_grad", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "dynamic_shape": false, "need_check_supported": false, "op_pattern": ""} {"op_name": "MaxPool", "inputs": [{"index": 0, "name": "input_data", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "output_data", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "kernel_size", "param_type": "required", "type": "listInt", "value": "all"}, {"name": "strides", "param_type": "required", "type": "listInt", "value": "all"}, {"name": "pad_mode", "param_type": "required", "type": "str", "value": "all"}, {"name": "format", "param_type": "required", "type": "str", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "NC1HWC0"], ["float16", "NC1HWC0"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "max_pool.so", "compute_cost": 10, "kernel_name": "max_pool", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "dynamic_shape": false, "need_check_supported": false, "op_pattern": ""} {"op_name": "MaxPoolGrad", "inputs": [{"index": 0, "name": "x1", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "x2", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "grad", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "kernel_size", "param_type": "required", "type": "listInt", "value": "all"}, {"name": "strides", "param_type": "required", "type": "listInt", "value": "all"}, {"name": "pad_mode", "param_type": "required", "type": "str", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "max_pool_grad.so", "compute_cost": 10, "kernel_name": "max_pool_grad", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "dynamic_shape": false, "need_check_supported": false, "op_pattern": ""} {"op_name": "MaxPoolGradWithArgmax", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "grad", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "argmax", "need_compile": false, "param_type": "optional", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "kernel_size", "param_type": "required", "type": "listInt", "value": "all"}, {"name": "strides", "param_type": "required", "type": "listInt", "value": "all"}, {"name": "pad_mode", "param_type": "required", "type": "str", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["uint16", "NC1HWC0"], ["float16", "NC1HWC0"]], [["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["int64", "NC1HWC0"], ["float16", "NC1HWC0"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "max_pool_grad_with_argmax.so", "compute_cost": 10, "kernel_name": "max_pool_grad_with_argmax", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "dynamic_shape": false, "need_check_supported": false, "op_pattern": ""} diff --git a/mindspore/_extends/graph_kernel/expanders/gelu.py b/mindspore/_extends/graph_kernel/expanders/gelu.py index 86be18aed9..fe626de831 100644 --- a/mindspore/_extends/graph_kernel/expanders/gelu.py +++ b/mindspore/_extends/graph_kernel/expanders/gelu.py @@ -22,7 +22,7 @@ HALF = 0.5 def expand_gelu(expand_info): - """Gelu expander""" + """GeLU expander""" # cal formula are: # gelu(x) is 0.5 * x * (1.0 + tanh(y)) # y is sqrt(2.0 / pi) * (x + 0.044715 * x * x * x) diff --git a/mindspore/_extends/graph_kernel/expanders/gelu_grad.py b/mindspore/_extends/graph_kernel/expanders/gelu_grad.py index 5e0647634f..0597564095 100644 --- a/mindspore/_extends/graph_kernel/expanders/gelu_grad.py +++ b/mindspore/_extends/graph_kernel/expanders/gelu_grad.py @@ -23,7 +23,7 @@ HALF = 0.5 def expand_gelugrad(expand_info): - """GeluGrad expander""" + """GeLUGrad expander""" # cal formula are: # gelu_grad(dy, x) is dy * y' # y' is 0.5 * (1.0 + tanh(tanh_para)) + 0.5 * x * (1.0 - tanh(tanh_para) * tanh(para)) * mul_right diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/gelu_grad_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/gelu_grad_kernel.cc index 274e4896c9..add8367dd8 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/gelu_grad_kernel.cc +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/gelu_grad_kernel.cc @@ -18,14 +18,14 @@ namespace mindspore { namespace kernel { -MS_REG_GPU_KERNEL_ONE(GeluGrad, +MS_REG_GPU_KERNEL_ONE(GeLUGrad, KernelAttr() .AddInputAttr(kNumberTypeFloat32) .AddInputAttr(kNumberTypeFloat32) .AddInputAttr(kNumberTypeFloat32) .AddOutputAttr(kNumberTypeFloat32), GeLUGpuGradKernel, float) -MS_REG_GPU_KERNEL_ONE(GeluGrad, +MS_REG_GPU_KERNEL_ONE(GeLUGrad, KernelAttr() .AddInputAttr(kNumberTypeFloat16) .AddInputAttr(kNumberTypeFloat16) diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/gelu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/gelu_kernel.cc index 03cd9a155b..34b7c583d8 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/gelu_kernel.cc +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/gelu_kernel.cc @@ -18,9 +18,9 @@ namespace mindspore { namespace kernel { -MS_REG_GPU_KERNEL_ONE(Gelu, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), +MS_REG_GPU_KERNEL_ONE(GeLU, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), GeluGpuKernel, float) -MS_REG_GPU_KERNEL_ONE(Gelu, KernelAttr().AddInputAttr(kNumberTypeFloat16).AddOutputAttr(kNumberTypeFloat16), +MS_REG_GPU_KERNEL_ONE(GeLU, KernelAttr().AddInputAttr(kNumberTypeFloat16).AddOutputAttr(kNumberTypeFloat16), GeluGpuKernel, half) } // namespace kernel } // namespace mindspore diff --git a/mindspore/ccsrc/backend/optimizer/graph_kernel/graph_kernel_helper.cc b/mindspore/ccsrc/backend/optimizer/graph_kernel/graph_kernel_helper.cc index e23655c670..2be93a51fb 100644 --- a/mindspore/ccsrc/backend/optimizer/graph_kernel/graph_kernel_helper.cc +++ b/mindspore/ccsrc/backend/optimizer/graph_kernel/graph_kernel_helper.cc @@ -701,7 +701,7 @@ FuncGraphPtr JsonDescToAnf(const std::string &json_desc, const std::vector GetExpandOps() { std::unordered_set expand_ops = { prim::kPrimSquare, - prim::kPrimGeluGrad, + prim::kPrimGeLUGrad, #if ENABLE_D prim::kPrimTile, prim::kPrimSqrtGrad, @@ -709,7 +709,7 @@ std::unordered_set GetExpandOps() { #elif ENABLE_GPU prim::kPrimBiasAdd, prim::kPrimBiasAddGrad, - prim::kPrimGelu, + prim::kPrimGeLU, prim::kPrimFusedAdam, prim::kPrimFusedAdamWeightDecay, prim::kPrimReduceMean, diff --git a/mindspore/ccsrc/frontend/parallel/dynamic_creator.h b/mindspore/ccsrc/frontend/parallel/dynamic_creator.h index acf8ecdbd8..ac93c1fbca 100644 --- a/mindspore/ccsrc/frontend/parallel/dynamic_creator.h +++ b/mindspore/ccsrc/frontend/parallel/dynamic_creator.h @@ -77,7 +77,7 @@ class RegisterAction { // operator register REGISTER(MatMulInfo); -REGISTER(GeluInfo); +REGISTER(GeLUInfo); REGISTER(VirtualDatasetInfo); REGISTER(BatchParallelInfo); REGISTER(TanhInfo); diff --git a/mindspore/ccsrc/frontend/parallel/ops_info/activation_info.h b/mindspore/ccsrc/frontend/parallel/ops_info/activation_info.h index 14229928ed..7980433196 100644 --- a/mindspore/ccsrc/frontend/parallel/ops_info/activation_info.h +++ b/mindspore/ccsrc/frontend/parallel/ops_info/activation_info.h @@ -82,12 +82,12 @@ class ActivationOther : public Activation { Status GetAttrs() override; }; -class GeluInfo : public ActivationOther { +class GeLUInfo : public ActivationOther { public: - GeluInfo(const std::string &name, const Shapes &inputs_shape, const Shapes &outputs_shape, + GeLUInfo(const std::string &name, const Shapes &inputs_shape, const Shapes &outputs_shape, const PrimitiveAttrs &attrs) : ActivationOther(name, inputs_shape, outputs_shape, attrs) {} - ~GeluInfo() override = default; + ~GeLUInfo() override = default; }; class TanhInfo : public ActivationOther { diff --git a/mindspore/ccsrc/frontend/parallel/ops_info/ops_utils.h b/mindspore/ccsrc/frontend/parallel/ops_info/ops_utils.h index 8e25b80030..585a28122e 100644 --- a/mindspore/ccsrc/frontend/parallel/ops_info/ops_utils.h +++ b/mindspore/ccsrc/frontend/parallel/ops_info/ops_utils.h @@ -182,7 +182,7 @@ constexpr char CONCAT[] = "Concat"; constexpr char SOFTMAX_CROSS_ENTROPY_WITH_LOGITS[] = "SoftmaxCrossEntropyWithLogits"; constexpr char SIGMOID_CROSS_ENTROPY_WITH_LOGITS[] = "SigmoidCrossEntropyWithLogits"; constexpr char MATMUL[] = "MatMul"; -constexpr char GELU[] = "Gelu"; +constexpr char GELU[] = "GeLU"; constexpr char TANH[] = "Tanh"; constexpr char RECEIVE[] = "Receive"; constexpr char SEND[] = "Send"; diff --git a/mindspore/ccsrc/transform/graph_ir/op_declare/nonlinear_fuc_ops_declare.cc b/mindspore/ccsrc/transform/graph_ir/op_declare/nonlinear_fuc_ops_declare.cc index 51c8aff5dd..321e252316 100644 --- a/mindspore/ccsrc/transform/graph_ir/op_declare/nonlinear_fuc_ops_declare.cc +++ b/mindspore/ccsrc/transform/graph_ir/op_declare/nonlinear_fuc_ops_declare.cc @@ -101,27 +101,27 @@ ATTR_MAP(TanhGrad) = EMPTY_ATTR_MAP; OUTPUT_MAP(TanhGrad) = {{0, OUTPUT_DESC(z)}}; REG_ADPT_DESC(TanhGrad, prim::kPrimTanhGrad->name(), ADPT_DESC(TanhGrad)) -// Gelu +// GeLU INPUT_MAP(Gelu) = {{1, INPUT_DESC(x)}}; ATTR_MAP(Gelu) = EMPTY_ATTR_MAP; OUTPUT_MAP(Gelu) = {{0, OUTPUT_DESC(y)}}; -REG_ADPT_DESC(Gelu, prim::kPrimGelu->name(), ADPT_DESC(Gelu)) +REG_ADPT_DESC(Gelu, prim::kPrimGeLU->name(), ADPT_DESC(Gelu)) -// GeluGrad +// GeLUGrad INPUT_MAP(GeluGrad) = {{1, INPUT_DESC(dy)}, {2, INPUT_DESC(x)}, {3, INPUT_DESC(y)}}; ATTR_MAP(GeluGrad) = EMPTY_ATTR_MAP; OUTPUT_MAP(GeluGrad) = {{0, OUTPUT_DESC(z)}}; -REG_ADPT_DESC(GeluGrad, prim::kPrimGeluGrad->name(), ADPT_DESC(GeluGrad)) +REG_ADPT_DESC(GeluGrad, prim::kPrimGeLUGrad->name(), ADPT_DESC(GeluGrad)) -// FastGelu +// FastGeLU INPUT_MAP(FastGelu) = {{1, INPUT_DESC(x)}}; ATTR_MAP(FastGelu) = EMPTY_ATTR_MAP; OUTPUT_MAP(FastGelu) = {{0, OUTPUT_DESC(y)}}; -REG_ADPT_DESC(FastGelu, prim::kPrimFastGelu->name(), ADPT_DESC(FastGelu)) +REG_ADPT_DESC(FastGelu, prim::kPrimFastGeLU->name(), ADPT_DESC(FastGelu)) -// FastGeluGrad +// FastGeLUGrad INPUT_MAP(FastGeluGrad) = {{1, INPUT_DESC(dy)}, {2, INPUT_DESC(x)}}; ATTR_MAP(FastGeluGrad) = EMPTY_ATTR_MAP; OUTPUT_MAP(FastGeluGrad) = {{0, OUTPUT_DESC(z)}}; -REG_ADPT_DESC(FastGeluGrad, prim::kPrimFastGeluGrad->name(), ADPT_DESC(FastGeluGrad)) +REG_ADPT_DESC(FastGeluGrad, prim::kPrimFastGeLUGrad->name(), ADPT_DESC(FastGeluGrad)) } // namespace mindspore::transform diff --git a/mindspore/core/abstract/infer_functions.h b/mindspore/core/abstract/infer_functions.h index c9fa606faf..b4798de771 100644 --- a/mindspore/core/abstract/infer_functions.h +++ b/mindspore/core/abstract/infer_functions.h @@ -63,13 +63,13 @@ AbstractBasePtr InferImplConv2DBackpropFilter(const AnalysisEnginePtr &, const P const AbstractBasePtrList &args_spec_list); AbstractBasePtr InferImplBiasAddGrad(const AnalysisEnginePtr &, const PrimitivePtr &primitive, const AbstractBasePtrList &args_spec_list); -AbstractBasePtr InferImplGelu(const AnalysisEnginePtr &, const PrimitivePtr &primitive, +AbstractBasePtr InferImplGeLU(const AnalysisEnginePtr &, const PrimitivePtr &primitive, const AbstractBasePtrList &args_spec_list); -AbstractBasePtr InferImplGeluGrad(const AnalysisEnginePtr &, const PrimitivePtr &primitive, +AbstractBasePtr InferImplGeLUGrad(const AnalysisEnginePtr &, const PrimitivePtr &primitive, const AbstractBasePtrList &args_spec_list); -AbstractBasePtr InferImplFastGelu(const AnalysisEnginePtr &, const PrimitivePtr &primitive, +AbstractBasePtr InferImplFastGeLU(const AnalysisEnginePtr &, const PrimitivePtr &primitive, const AbstractBasePtrList &args_spec_list); -AbstractBasePtr InferImplFastGeluGrad(const AnalysisEnginePtr &, const PrimitivePtr &primitive, +AbstractBasePtr InferImplFastGeLUGrad(const AnalysisEnginePtr &, const PrimitivePtr &primitive, const AbstractBasePtrList &args_spec_list); AbstractBasePtr InferImplRelu(const AnalysisEnginePtr &, const PrimitivePtr &primitive, const AbstractBasePtrList &args_spec_list); diff --git a/mindspore/core/base/core_ops.h b/mindspore/core/base/core_ops.h index fc0c06527d..d849a41cf7 100644 --- a/mindspore/core/base/core_ops.h +++ b/mindspore/core/base/core_ops.h @@ -41,6 +41,10 @@ constexpr auto kTupleGetItem = "TupleGetItem"; constexpr auto kMakeTuple = "MakeTuple"; constexpr auto kReturn = "Return"; constexpr auto kSGD = "SGD"; +constexpr auto kGeLU = "GeLU"; +constexpr auto kGeLUGrad = "GeLUGrad"; +constexpr auto kFastGeLU = "FastGeLU"; +constexpr auto kFastGeLUGrad = "FastGeLUGrad"; // Here list all primitives used in backend or some special primitives used by core. // Arithmetic @@ -249,10 +253,10 @@ inline const PrimitivePtr kPrimDropout = std::make_shared("Dropout"); inline const PrimitivePtr kPrimUniformReal = std::make_shared("UniformReal"); inline const PrimitivePtr kPrimCudnnUniformReal = std::make_shared("CudnnUniformReal"); inline const PrimitivePtr kPrimOneHot = std::make_shared("OneHot"); -inline const PrimitivePtr kPrimGelu = std::make_shared("Gelu"); -inline const PrimitivePtr kPrimGeluGrad = std::make_shared("GeluGrad"); -inline const PrimitivePtr kPrimFastGelu = std::make_shared("FastGelu"); -inline const PrimitivePtr kPrimFastGeluGrad = std::make_shared("FastGeluGrad"); +inline const PrimitivePtr kPrimGeLU = std::make_shared(kGeLU); +inline const PrimitivePtr kPrimGeLUGrad = std::make_shared(kGeLUGrad); +inline const PrimitivePtr kPrimFastGeLU = std::make_shared(kFastGeLU); +inline const PrimitivePtr kPrimFastGeLUGrad = std::make_shared(kFastGeLUGrad); inline const PrimitivePtr kPrimRelu = std::make_shared("ReLU"); inline const PrimitivePtr kPrimElu = std::make_shared("ELU"); inline const PrimitivePtr kPrimRelu6 = std::make_shared("ReLU6"); diff --git a/mindspore/core/ops/gelu.cc b/mindspore/core/ops/gelu.cc index 3fccb33994..a8ab8f6aa9 100644 --- a/mindspore/core/ops/gelu.cc +++ b/mindspore/core/ops/gelu.cc @@ -26,16 +26,16 @@ namespace mindspore { namespace ops { namespace { -abstract::ShapePtr GeluInferShape(const PrimitivePtr &primitive, const std::vector &input_args) { +abstract::ShapePtr GeLUInferShape(const PrimitivePtr &primitive, const std::vector &input_args) { MS_EXCEPTION_IF_NULL(primitive); - auto gelu_prim = primitive->cast(); + auto gelu_prim = primitive->cast(); MS_EXCEPTION_IF_NULL(gelu_prim); auto prim_name = gelu_prim->name(); auto input_shape = CheckAndConvertUtils::ConvertShapePtrToShape("input_x", input_args[0]->BuildShape(), prim_name); return std::make_shared(input_shape); } -TypePtr GeluInferType(const PrimitivePtr &prim, const std::vector &input_args) { +TypePtr GeLUInferType(const PrimitivePtr &prim, const std::vector &input_args) { for (const auto &item : input_args) { MS_EXCEPTION_IF_NULL(item); } @@ -46,13 +46,13 @@ TypePtr GeluInferType(const PrimitivePtr &prim, const std::vector &input_args) { - return std::make_shared(GeluInferType(primitive, input_args), - GeluInferShape(primitive, input_args)->shape()); + return std::make_shared(GeLUInferType(primitive, input_args), + GeLUInferShape(primitive, input_args)->shape()); } -REGISTER_PRIMITIVE_EVAL_IMPL(Gelu, prim::kPrimGelu, GeluInfer); -REGISTER_PRIMITIVE_C(kNameGelu, Gelu); +REGISTER_PRIMITIVE_EVAL_IMPL(GeLU, prim::kPrimGeLU, GeLUInfer); +REGISTER_PRIMITIVE_C(kNameGeLU, GeLU); } // namespace ops } // namespace mindspore diff --git a/mindspore/core/ops/gelu.h b/mindspore/core/ops/gelu.h index 0050d851ea..87dc92ada6 100644 --- a/mindspore/core/ops/gelu.h +++ b/mindspore/core/ops/gelu.h @@ -25,17 +25,17 @@ namespace mindspore { namespace ops { -constexpr auto kNameGelu = "Gelu"; -class Gelu : public PrimitiveC { +constexpr auto kNameGeLU = "GeLU"; +class GeLU : public PrimitiveC { public: - Gelu() : PrimitiveC(kNameGelu) {} - ~Gelu() = default; - MS_DECLARE_PARENT(Gelu, PrimitiveC); + GeLU() : PrimitiveC(kNameGeLU) {} + ~GeLU() = default; + MS_DECLARE_PARENT(GeLU, PrimitiveC); void Init() {} }; -AbstractBasePtr GeluInfer(const abstract::AnalysisEnginePtr &, const PrimitivePtr &primitive, +AbstractBasePtr GeLUInfer(const abstract::AnalysisEnginePtr &, const PrimitivePtr &primitive, const std::vector &input_args); -using PrimGeluPtr = std::shared_ptr; +using PrimGeLUPtr = std::shared_ptr; } // namespace ops } // namespace mindspore diff --git a/mindspore/lite/tools/optimizer/graph/primitive_adjust_pass.cc b/mindspore/lite/tools/optimizer/graph/primitive_adjust_pass.cc index 698976b054..71f89a3e2e 100644 --- a/mindspore/lite/tools/optimizer/graph/primitive_adjust_pass.cc +++ b/mindspore/lite/tools/optimizer/graph/primitive_adjust_pass.cc @@ -81,7 +81,7 @@ using mindspore::ops::kNameDepthWiseConv2D; using mindspore::ops::kNameDiv; using mindspore::ops::kNameElu; using mindspore::ops::kNameExp; -using mindspore::ops::kNameGelu; +using mindspore::ops::kNameGeLU; using mindspore::ops::kNameL2Normalize; using mindspore::ops::kNameLayerNorm; using mindspore::ops::kNameLeakyRelu; @@ -117,7 +117,7 @@ constexpr auto kNameGatherV2 = "GatherV2"; constexpr auto kNameTensorAdd = "TensorAdd"; std::map activation_map = { {ops::kNameAbs, mindspore::ABS}, {ops::kNameElu, mindspore::ELU}, - {ops::kNameGelu, mindspore::GELU}, {ops::kNameLeakyRelu, mindspore::LEAKY_RELU}, + {ops::kNameGeLU, mindspore::GELU}, {ops::kNameLeakyRelu, mindspore::LEAKY_RELU}, {ops::kNameReLU, mindspore::RELU}, {ops::kNameReLU6, mindspore::RELU6}, {ops::kNameSigmoid, mindspore::SIGMOID}, {ops::kNameTanh, mindspore::TANH}}; @@ -413,7 +413,7 @@ REGIST_PRIMITIVE_ADJUST(kNameDiv, MoveAttrMapCommon) REGIST_PRIMITIVE_ADJUST(kNameElu, MoveAttrMapActivation) REGIST_PRIMITIVE_ADJUST(kNameExp, MoveAttrMapCommon) REGIST_PRIMITIVE_ADJUST(kNameGatherV2, MoveAttrMapCommon) -REGIST_PRIMITIVE_ADJUST(kNameGelu, MoveAttrMapActivation) +REGIST_PRIMITIVE_ADJUST(kNameGeLU, MoveAttrMapActivation) REGIST_PRIMITIVE_ADJUST(kNameL2Normalize, MoveAttrMapCommon) REGIST_PRIMITIVE_ADJUST(kNameLayerNorm, MoveAttrMapLayerNorm) REGIST_PRIMITIVE_ADJUST(kNameLeakyRelu, MoveAttrMapActivation) diff --git a/mindspore/ops/_grad/grad_nn_ops.py b/mindspore/ops/_grad/grad_nn_ops.py index 4a9ed4abcf..129fd0fd46 100755 --- a/mindspore/ops/_grad/grad_nn_ops.py +++ b/mindspore/ops/_grad/grad_nn_ops.py @@ -604,10 +604,10 @@ def get_bprop_tanh(self): return bprop -@bprop_getters.register(P.Gelu) +@bprop_getters.register(P.GeLU) def get_bprop_gelu(self): - """Grad definition for `Gelu` operation.""" - input_grad = G.GeluGrad() + """Grad definition for `GeLU` operation.""" + input_grad = G.GeLUGrad() def bprop(x, out, dout): dx = input_grad(dout, x, out) @@ -616,10 +616,10 @@ def get_bprop_gelu(self): return bprop -@bprop_getters.register(P.FastGelu) +@bprop_getters.register(P.FastGeLU) def get_bprop_fast_gelu(self): - """Grad definition for `FastGelu` operation.""" - input_grad = G.FastGeluGrad() + """Grad definition for `FastGeLU` operation.""" + input_grad = G.FastGeLUGrad() def bprop(x, out, dout): dx = input_grad(dout, x) diff --git a/mindspore/ops/_op_impl/tbe/fast_gelu.py b/mindspore/ops/_op_impl/tbe/fast_gelu.py index f108f7af7f..0491641b33 100644 --- a/mindspore/ops/_op_impl/tbe/fast_gelu.py +++ b/mindspore/ops/_op_impl/tbe/fast_gelu.py @@ -13,10 +13,10 @@ # limitations under the License. # ============================================================================ -"""FastGelu op""" +"""FastGeLU op""" from mindspore.ops.op_info_register import op_info_register, TBERegOp, DataType -fast_gelu_op_info = TBERegOp("FastGelu") \ +fast_gelu_op_info = TBERegOp("FastGeLU") \ .fusion_type("ELEMWISE") \ .async_flag(False) \ .binfile_name("fast_gelu.so") \ @@ -33,5 +33,5 @@ fast_gelu_op_info = TBERegOp("FastGelu") \ @op_info_register(fast_gelu_op_info) def _fast_gelu_tbe(): - """FastGelu TBE register""" + """FastGeLU TBE register""" return diff --git a/mindspore/ops/_op_impl/tbe/fast_gelu_grad.py b/mindspore/ops/_op_impl/tbe/fast_gelu_grad.py index 18baa6ec48..54fb796438 100644 --- a/mindspore/ops/_op_impl/tbe/fast_gelu_grad.py +++ b/mindspore/ops/_op_impl/tbe/fast_gelu_grad.py @@ -13,10 +13,10 @@ # limitations under the License. # ============================================================================ -"""FastGeluGrad op""" +"""FastGeLUGrad op""" from mindspore.ops.op_info_register import op_info_register, TBERegOp, DataType -fast_gelu_grad_op_info = TBERegOp("FastGeluGrad") \ +fast_gelu_grad_op_info = TBERegOp("FastGeLUGrad") \ .fusion_type("ELEMWISE") \ .async_flag(False) \ .binfile_name("fast_gelu_grad.so") \ @@ -37,5 +37,5 @@ fast_gelu_grad_op_info = TBERegOp("FastGeluGrad") \ @op_info_register(fast_gelu_grad_op_info) def _fast_gelu_grad_tbe(): - """FastGeluGrad TBE register""" + """FastGeLUGrad TBE register""" return diff --git a/mindspore/ops/_op_impl/tbe/gelu.py b/mindspore/ops/_op_impl/tbe/gelu.py index a539cb8ef3..9c91e53d19 100644 --- a/mindspore/ops/_op_impl/tbe/gelu.py +++ b/mindspore/ops/_op_impl/tbe/gelu.py @@ -13,10 +13,10 @@ # limitations under the License. # ============================================================================ -"""Gelu op""" +"""GeLU op""" from mindspore.ops.op_info_register import op_info_register, TBERegOp, DataType -gelu_op_info = TBERegOp("Gelu") \ +gelu_op_info = TBERegOp("GeLU") \ .fusion_type("ELEMWISE") \ .async_flag(False) \ .binfile_name("gelu.so") \ @@ -33,5 +33,5 @@ gelu_op_info = TBERegOp("Gelu") \ @op_info_register(gelu_op_info) def _gelu_tbe(): - """Gelu TBE register""" + """GeLU TBE register""" return diff --git a/mindspore/ops/_op_impl/tbe/gelu_grad.py b/mindspore/ops/_op_impl/tbe/gelu_grad.py index ce62e55071..8d4da1a75a 100644 --- a/mindspore/ops/_op_impl/tbe/gelu_grad.py +++ b/mindspore/ops/_op_impl/tbe/gelu_grad.py @@ -13,10 +13,10 @@ # limitations under the License. # ============================================================================ -"""GeluGrad op""" +"""GeLUGrad op""" from mindspore.ops.op_info_register import op_info_register, TBERegOp, DataType -gelu_grad_op_info = TBERegOp("GeluGrad") \ +gelu_grad_op_info = TBERegOp("GeLUGrad") \ .fusion_type("ELEMWISE") \ .async_flag(False) \ .binfile_name("gelu_grad.so") \ @@ -38,5 +38,5 @@ gelu_grad_op_info = TBERegOp("GeluGrad") \ @op_info_register(gelu_grad_op_info) def _gelu_grad_tbe(): - """GeluGrad TBE register""" + """GeLUGrad TBE register""" return diff --git a/mindspore/ops/operations/__init__.py b/mindspore/ops/operations/__init__.py index a4d3bfafee..c2c7b5ac12 100644 --- a/mindspore/ops/operations/__init__.py +++ b/mindspore/ops/operations/__init__.py @@ -64,7 +64,7 @@ from .nn_ops import (LSTM, SGD, Adam, FusedSparseAdam, FusedSparseLazyAdam, Adam DepthwiseConv2dNative, DropoutDoMask, Dropout, DropoutGenMask, Flatten, FusedBatchNorm, FusedBatchNormEx, BNTrainingReduce, BNTrainingUpdate, - Gelu, FastGelu, Elu, + GeLU, Gelu, FastGeLU, FastGelu, Elu, GetNext, L2Normalize, LayerNorm, L2Loss, CTCLoss, CTCGreedyDecoder, LogSoftmax, MaxPool, DataFormatDimMap, @@ -168,7 +168,9 @@ __all__ = [ 'Unpack', 'Tile', 'BiasAdd', + 'GeLU', 'Gelu', + 'FastGeLU', 'FastGelu', 'Minimum', 'Maximum', diff --git a/mindspore/ops/operations/_grad_ops.py b/mindspore/ops/operations/_grad_ops.py index 85e729e856..6dca4b3163 100644 --- a/mindspore/ops/operations/_grad_ops.py +++ b/mindspore/ops/operations/_grad_ops.py @@ -775,12 +775,12 @@ class BNTrainingUpdateGrad(PrimitiveWithInfer): return (batch_mean, batch_variance) -class GeluGrad(PrimitiveWithInfer): - """Gradients of Gelu operation.""" +class GeLUGrad(PrimitiveWithInfer): + """Gradients of GeLU operation.""" @prim_attr_register def __init__(self): - """Initialize GeluGrad""" + """Initialize GeLUGrad""" def infer_shape(self, y_backprop_shape, x_shape, y_shape): return x_shape @@ -793,12 +793,12 @@ class GeluGrad(PrimitiveWithInfer): return x_dtype -class FastGeluGrad(PrimitiveWithInfer): - """Gradients of FastGelu operation.""" +class FastGeLUGrad(PrimitiveWithInfer): + """Gradients of FastGeLU operation.""" @prim_attr_register def __init__(self): - """init FastGeluGrad""" + """init FastGeLUGrad""" def infer_shape(self, y_backprop_shape, x_shape): return x_shape diff --git a/mindspore/ops/operations/nn_ops.py b/mindspore/ops/operations/nn_ops.py index 2ab20192c8..65ef0a05ed 100644 --- a/mindspore/ops/operations/nn_ops.py +++ b/mindspore/ops/operations/nn_ops.py @@ -19,6 +19,7 @@ import math import operator from functools import reduce, partial from mindspore._checkparam import _check_3d_int_or_tuple +from mindspore import log as logger import numpy as np from ... import context from .. import signature as sig @@ -2921,7 +2922,7 @@ class OneHot(PrimitiveWithInfer): 'value': None} -class Gelu(PrimitiveWithInfer): +class GeLU(PrimitiveWithInfer): r""" Gaussian Error Linear Units activation function. @@ -2929,7 +2930,7 @@ class Gelu(PrimitiveWithInfer): And also please refer to `BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding `_. - Gelu is defined as follows: + GeLU is defined as follows: .. math:: \text{output} = 0.5 * x * (1 + erf(x / \sqrt{2})), @@ -2937,7 +2938,7 @@ class Gelu(PrimitiveWithInfer): where :math:`erf` is the "Gauss error function" . Inputs: - - **input_x** (Tensor) - Input to compute the Gelu with data type of float16 or float32. + - **input_x** (Tensor) - Input to compute the GeLU with data type of float16 or float32. Outputs: Tensor, with the same type and shape as input. @@ -2947,7 +2948,7 @@ class Gelu(PrimitiveWithInfer): Examples: >>> tensor = Tensor(np.array([1.0, 2.0, 3.0]), mindspore.float32) - >>> gelu = ops.Gelu() + >>> gelu = ops.GeLU() >>> result = gelu(tensor) >>> print(result) [0.841192 1.9545976 2.9963627] @@ -2965,12 +2966,16 @@ class Gelu(PrimitiveWithInfer): validator.check_tensor_dtype_valid("input_x", input_x, (mstype.float16, mstype.float32), self.name) return input_x +def Gelu(): + logger.warning("WARN_DEPRECATED: The usage of Gelu is deprecated. Please use GeLU.") + return GeLU() -class FastGelu(PrimitiveWithInfer): + +class FastGeLU(PrimitiveWithInfer): r""" Fast Gaussian Error Linear Units activation function. - FastGelu is defined as follows: + FastGeLU is defined as follows: .. math:: \text{output} = \frac {x} {1 + \exp(-1.702 * \left| x \right|)} * \exp(0.851 * (x - \left| x \right|)), @@ -2978,7 +2983,7 @@ class FastGelu(PrimitiveWithInfer): where :math:`x` is the element of the input. Inputs: - - **input_x** (Tensor) - Input to compute the FastGelu with data type of float16 or float32. + - **input_x** (Tensor) - Input to compute the FastGeLU with data type of float16 or float32. Outputs: Tensor, with the same type and shape as input. @@ -2988,7 +2993,7 @@ class FastGelu(PrimitiveWithInfer): Examples: >>> tensor = Tensor(np.array([[-1.0, 4.0, -8.0], [2.0, -5.0, 9.0]]), mindspore.float32) - >>> fast_gelu = P.FastGelu() + >>> fast_gelu = P.FastGeLU() >>> output = fast_gelu(tensor) >>> print(output) [[-1.5420423e-01 3.9955849e+00 -9.7664278e-06] @@ -3007,6 +3012,10 @@ class FastGelu(PrimitiveWithInfer): validator.check_tensor_dtype_valid("input_x", input_x, (mstype.float16, mstype.float32), self.name) return input_x +def FastGelu(): + logger.warning("WARN_DEPRECATED: The usage of FastGelu is deprecated. Please use FastGeLU.") + return FastGeLU() + class GetNext(PrimitiveWithInfer): """ diff --git a/tests/st/ops/gpu/test_gelu_grad_op.py b/tests/st/ops/gpu/test_gelu_grad_op.py index 975355114e..5eaecf4e08 100644 --- a/tests/st/ops/gpu/test_gelu_grad_op.py +++ b/tests/st/ops/gpu/test_gelu_grad_op.py @@ -28,7 +28,7 @@ context.set_context(mode=context.GRAPH_MODE, device_target="GPU") class GeluNet(nn.Cell): def __init__(self): super(GeluNet, self).__init__() - self.gelu = P.Gelu() + self.gelu = P.GeLU() def construct(self, x): return self.gelu(x) diff --git a/tests/st/ops/graph_kernel/test_gelu.py b/tests/st/ops/graph_kernel/test_gelu.py index d5eb67a497..4497d945e3 100644 --- a/tests/st/ops/graph_kernel/test_gelu.py +++ b/tests/st/ops/graph_kernel/test_gelu.py @@ -25,7 +25,7 @@ import mindspore.ops.operations._grad_ops as G class GeluNet(Cell): def __init__(self): super(GeluNet, self).__init__() - self.gelu = P.Gelu() + self.gelu = P.GeLU() def construct(self, x): return self.gelu(x) @@ -34,7 +34,7 @@ class GeluNet(Cell): class GeluGradNet(Cell): def __init__(self): super(GeluGradNet, self).__init__() - self.gelu_grad = G.GeluGrad() + self.gelu_grad = G.GeLUGrad() def construct(self, dy, x, y): return self.gelu_grad(dy, x, y) diff --git a/tests/ut/cpp/ops/test_ops_gelu.cc b/tests/ut/cpp/ops/test_ops_gelu.cc index f17f36505c..e7c7bcc07d 100644 --- a/tests/ut/cpp/ops/test_ops_gelu.cc +++ b/tests/ut/cpp/ops/test_ops_gelu.cc @@ -24,15 +24,15 @@ namespace mindspore { namespace ops { -class TestGelu : public UT::Common { +class TestGeLU : public UT::Common { public: - TestGelu() {} + TestGeLU() {} void SetUp() {} void TearDown() {} }; -TEST_F(TestGelu, test_ops_gelu1) { - auto gelu = std::make_shared(); +TEST_F(TestGeLU, test_ops_gelu1) { + auto gelu = std::make_shared(); auto input1 = TensorConstructUtils::CreateOnesTensor(kNumberTypeFloat16, std::vector{1, 2}); MS_EXCEPTION_IF_NULL(input1); auto abstract = gelu->Infer({input1->ToAbstract()}); @@ -56,8 +56,8 @@ TEST_F(TestGelu, test_ops_gelu1) { EXPECT_EQ(shape_vec[0], 1); } -TEST_F(TestGelu, test_ops_gelu2) { - auto gelu = std::make_shared(); +TEST_F(TestGeLU, test_ops_gelu2) { + auto gelu = std::make_shared(); auto input1 = TensorConstructUtils::CreateOnesTensor(kNumberTypeFloat32, std::vector{1, 2}); MS_EXCEPTION_IF_NULL(input1); auto abstract = gelu->Infer({input1->ToAbstract()}); diff --git a/tests/ut/cpp/parallel/ops_info/gelu_info_test.cc b/tests/ut/cpp/parallel/ops_info/gelu_info_test.cc index e0e9424ac2..3093a950ce 100644 --- a/tests/ut/cpp/parallel/ops_info/gelu_info_test.cc +++ b/tests/ut/cpp/parallel/ops_info/gelu_info_test.cc @@ -26,18 +26,18 @@ namespace mindspore { namespace parallel { -class GeluInfo; -using GeluInfoPtr = std::shared_ptr; -GeluInfoPtr gelu; +class GeLUInfo; +using GeLUInfoPtr = std::shared_ptr; +GeLUInfoPtr gelu; -class TestGeluInfo : public UT::Common { +class TestGeLUInfo : public UT::Common { public: - TestGeluInfo() {} + TestGeLUInfo() {} void SetUp(); void TearDown() {} }; -void TestGeluInfo::SetUp() { +void TestGeLUInfo::SetUp() { RankList dev_list; for (int32_t i = 0; i < 130; i++) { @@ -59,10 +59,10 @@ void TestGeluInfo::SetUp() { Shapes inputs_shape = {{2, 4, 8, 16}}; Shapes outputs_shape = {{2, 4, 8, 16}}; - gelu = std::make_shared("gelu_info", inputs_shape, outputs_shape, attr); + gelu = std::make_shared("gelu_info", inputs_shape, outputs_shape, attr); } -TEST_F(TestGeluInfo, InferDevMatrixShape1) { +TEST_F(TestGeLUInfo, InferDevMatrixShape1) { Strategys inputs = {{2, 4, 1, 16}}; StrategyPtr strategy = NewStrategy(0, inputs); @@ -73,7 +73,7 @@ TEST_F(TestGeluInfo, InferDevMatrixShape1) { ASSERT_EQ(dev_matrix_shape, expect); } -TEST_F(TestGeluInfo, InferSliceShape1) { +TEST_F(TestGeLUInfo, InferSliceShape1) { Strategys str = {{2, 4, 1, 16}}; StrategyPtr strategy = NewStrategy(0, str); @@ -94,7 +94,7 @@ TEST_F(TestGeluInfo, InferSliceShape1) { ASSERT_EQ(output_slice_shape, output_slice_shape_expect); } -TEST_F(TestGeluInfo, GetTensorLayout1) { +TEST_F(TestGeLUInfo, GetTensorLayout1) { Strategys str = {{2, 4, 1, 16}}; StrategyPtr strategy = NewStrategy(0, str); @@ -115,7 +115,7 @@ TEST_F(TestGeluInfo, GetTensorLayout1) { ASSERT_EQ(output_tensor_map.array(), output_expect); } -TEST_F(TestGeluInfo, GetForwardOp1) { +TEST_F(TestGeLUInfo, GetForwardOp1) { Strategys inputs = {{2, 4, 1, 16}}; StrategyPtr strategy = NewStrategy(0, inputs); @@ -126,7 +126,7 @@ TEST_F(TestGeluInfo, GetForwardOp1) { ASSERT_EQ(size, 0); } -TEST_F(TestGeluInfo, GetMirrorOPs1) { +TEST_F(TestGeLUInfo, GetMirrorOPs1) { Strategys inputs = {{2, 4, 1, 16}}; StrategyPtr strategy = NewStrategy(0, inputs); @@ -138,7 +138,7 @@ TEST_F(TestGeluInfo, GetMirrorOPs1) { ASSERT_EQ(size, 0); } -TEST_F(TestGeluInfo, CheckStrategy1) { +TEST_F(TestGeLUInfo, CheckStrategy1) { // Success: {{2,4,1,16}} Strategys inputs = {{2, 2, 8, 16}, {2, 4, 16, 1}}; StrategyPtr strategy = NewStrategy(0, inputs); @@ -147,7 +147,7 @@ TEST_F(TestGeluInfo, CheckStrategy1) { ASSERT_EQ(ret, FAILED); } -TEST_F(TestGeluInfo, CheckStrategy2) { +TEST_F(TestGeLUInfo, CheckStrategy2) { // Success: {{2,4,1,16}} Strategys inputs = {{2, 4, 8}}; StrategyPtr strategy = NewStrategy(0, inputs); @@ -156,7 +156,7 @@ TEST_F(TestGeluInfo, CheckStrategy2) { ASSERT_EQ(ret, FAILED); } -TEST_F(TestGeluInfo, CheckStrategy3) { +TEST_F(TestGeLUInfo, CheckStrategy3) { // Success: {{2,4,1,16}} Strategys inputs = {{2, 4, 1, 16}}; StrategyPtr strategy = NewStrategy(0, inputs); diff --git a/tests/ut/python/ops/test_ops.py b/tests/ut/python/ops/test_ops.py index 165d266ede..de76a1af22 100755 --- a/tests/ut/python/ops/test_ops.py +++ b/tests/ut/python/ops/test_ops.py @@ -1544,12 +1544,12 @@ test_case_nn_ops = [ 'block': G.BiasAddGrad(), 'desc_inputs': [[1, 3, 3, 3]], 'skip': ['backward']}), - ('Gelu', { - 'block': P.Gelu(), + ('GeLU', { + 'block': P.GeLU(), 'desc_inputs': [[1, 3, 4, 4]], 'desc_bprop': [[1, 3, 4, 4]]}), - ('GeluGrad', { - 'block': G.GeluGrad(), + ('GeLUGrad', { + 'block': G.GeLUGrad(), 'desc_inputs': [[2, 2], [2, 2], [2, 2]], 'desc_bprop': [[2, 2]], 'skip': ['backward']}), diff --git a/tests/ut/python/parallel/test_dataset.py b/tests/ut/python/parallel/test_dataset.py index c4f2e68b5f..968de04430 100644 --- a/tests/ut/python/parallel/test_dataset.py +++ b/tests/ut/python/parallel/test_dataset.py @@ -27,7 +27,7 @@ class VirtualDatasetNet(nn.Cell): self.virtual_dataset = _VirtualDataset() self.matmul1 = P.MatMul() self.matmul2 = P.MatMul() - self.gelu = P.Gelu() + self.gelu = P.GeLU() def construct(self, x, y, z): x, y, z = self.virtual_dataset(x, y, z) diff --git a/tests/ut/python/parallel/test_virtual_dataset_3_input.py b/tests/ut/python/parallel/test_virtual_dataset_3_input.py index e1be9e7868..7bf2c877e1 100644 --- a/tests/ut/python/parallel/test_virtual_dataset_3_input.py +++ b/tests/ut/python/parallel/test_virtual_dataset_3_input.py @@ -57,7 +57,7 @@ def test_virtual_dataset_3_input(): self.virtual_dataset = _VirtualDataset().shard(strategy0) self.matmul1 = P.MatMul().shard(strategy1) self.matmul2 = P.MatMul().shard(strategy2) - self.gelu = P.Gelu().shard(strategy3) + self.gelu = P.GeLU().shard(strategy3) def construct(self, x, y, b): x, y, b = self.virtual_dataset(x, y, b) @@ -86,7 +86,7 @@ def test_virtualdataset_cell_3_inputs(): super().__init__() self.matmul1 = P.MatMul().shard(strategy1) self.matmul2 = P.MatMul().shard(strategy2) - self.gelu = P.Gelu().shard(strategy3) + self.gelu = P.GeLU().shard(strategy3) def construct(self, x, y, b): out = self.gelu(self.matmul1(x, y))