| @@ -147,10 +147,10 @@ | |||
| {"op_name": "Conv2DBackpropInput", "inputs": [{"index": 0, "name": "out_backprop", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "filter", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": true, "param_type": "required", "shape": "all"}], "attr": [{"name": "input_sizes", "param_type": "required", "type": "listInt", "value": "all"}, {"name": "stride", "param_type": "required", "type": "listInt", "value": "all"}, {"name": "pad_list", "param_type": "required", "type": "listInt", "value": "all"}, {"name": "dilation", "param_type": "required", "type": "listInt", "value": "all"}, {"name": "groups", "param_type": "optional", "type": "int", "value": "all"}, {"name": "format", "param_type": "optional", "type": "str", "value": "all"}], "fusion_type": "CONVLUTION", "dtype_format": [[["float16", "NC1HWC0"], ["float16", "FracZ"], ["float16", "NC1HWC0"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "conv2d_backprop_input_d.so", "compute_cost": 10, "kernel_name": "conv2d_backprop_input_d", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "dynamic_shape": false, "need_check_supported": false, "op_pattern": ""} | |||
| {"op_name": "ConfusionMulGrad", "inputs": [{"index": 0, "name": "input0", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "input1", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "input2", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "output0", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "output1", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "axis", "param_type": "required", "type": "listInt", "value": "all"}, {"name": "keep_dims", "param_type": "required", "type": "bool", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "confusion_mul_grad.so", "compute_cost": 10, "kernel_name": "confusion_mul_grad", "partial_flag": false, "reshape_type": "", "dynamic_format": false, "dynamic_shape": false, "need_check_supported": false, "op_pattern": ""} | |||
| {"op_name": "DropoutDoMask", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "mask", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "keep_prob", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "OPAQUE", "dtype_format": [[["", ""], ["", ""], ["", ""], ["", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "drop_out_do_mask.so", "compute_cost": 10, "kernel_name": "drop_out_do_mask", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "dynamic_shape": false, "need_check_supported": false, "op_pattern": "dynamicFormat"} | |||
| {"op_name": "Gelu", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "ELEMWISE", "dtype_format": [[["float16", ""], ["float16", ""]], [["float32", ""], ["float32", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "gelu.so", "compute_cost": 10, "kernel_name": "gelu", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "dynamic_shape": false, "need_check_supported": false, "op_pattern": "formatAgnostic"} | |||
| {"op_name": "GeluGrad", "inputs": [{"index": 0, "name": "dy", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "z", "need_compile": true, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "ELEMWISE", "dtype_format": [[["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"]], [["float16", "FRACTAL_NZ"], ["float16", "FRACTAL_NZ"], ["float16", "FRACTAL_NZ"], ["float16", "FRACTAL_NZ"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"]], [["float32", "FRACTAL_NZ"], ["float32", "FRACTAL_NZ"], ["float32", "FRACTAL_NZ"], ["float32", "FRACTAL_NZ"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "gelu_grad.so", "compute_cost": 10, "kernel_name": "gelu_grad", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "dynamic_shape": false, "need_check_supported": false, "op_pattern": ""} | |||
| {"op_name": "FastGelu", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "ELEMWISE", "dtype_format": [[["float16", ""], ["float16", ""]], [["float32", ""], ["float32", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "fast_gelu.so", "compute_cost": 10, "kernel_name": "fast_gelu", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "dynamic_shape": false, "need_check_supported": false, "op_pattern": "formatAgnostic"} | |||
| {"op_name": "FastGeluGrad", "inputs": [{"index": 0, "name": "dy", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "z", "need_compile": true, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "ELEMWISE", "dtype_format": [[["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"]], [["float16", "FRACTAL_NZ"], ["float16", "FRACTAL_NZ"], ["float16", "FRACTAL_NZ"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"]], [["float32", "FRACTAL_NZ"], ["float32", "FRACTAL_NZ"], ["float32", "FRACTAL_NZ"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "fast_gelu_grad.so", "compute_cost": 10, "kernel_name": "fast_gelu_grad", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "dynamic_shape": false, "need_check_supported": false, "op_pattern": ""} | |||
| {"op_name": "GeLU", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "ELEMWISE", "dtype_format": [[["float16", ""], ["float16", ""]], [["float32", ""], ["float32", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "gelu.so", "compute_cost": 10, "kernel_name": "gelu", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "dynamic_shape": false, "need_check_supported": false, "op_pattern": "formatAgnostic"} | |||
| {"op_name": "GeLUGrad", "inputs": [{"index": 0, "name": "dy", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "z", "need_compile": true, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "ELEMWISE", "dtype_format": [[["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"]], [["float16", "FRACTAL_NZ"], ["float16", "FRACTAL_NZ"], ["float16", "FRACTAL_NZ"], ["float16", "FRACTAL_NZ"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"]], [["float32", "FRACTAL_NZ"], ["float32", "FRACTAL_NZ"], ["float32", "FRACTAL_NZ"], ["float32", "FRACTAL_NZ"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "gelu_grad.so", "compute_cost": 10, "kernel_name": "gelu_grad", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "dynamic_shape": false, "need_check_supported": false, "op_pattern": ""} | |||
| {"op_name": "FastGeLU", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "ELEMWISE", "dtype_format": [[["float16", ""], ["float16", ""]], [["float32", ""], ["float32", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "fast_gelu.so", "compute_cost": 10, "kernel_name": "fast_gelu", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "dynamic_shape": false, "need_check_supported": false, "op_pattern": "formatAgnostic"} | |||
| {"op_name": "FastGeLUGrad", "inputs": [{"index": 0, "name": "dy", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "z", "need_compile": true, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "ELEMWISE", "dtype_format": [[["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"]], [["float16", "FRACTAL_NZ"], ["float16", "FRACTAL_NZ"], ["float16", "FRACTAL_NZ"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"]], [["float32", "FRACTAL_NZ"], ["float32", "FRACTAL_NZ"], ["float32", "FRACTAL_NZ"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "fast_gelu_grad.so", "compute_cost": 10, "kernel_name": "fast_gelu_grad", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "dynamic_shape": false, "need_check_supported": false, "op_pattern": ""} | |||
| {"op_name": "MaxPool", "inputs": [{"index": 0, "name": "input_data", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "output_data", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "kernel_size", "param_type": "required", "type": "listInt", "value": "all"}, {"name": "strides", "param_type": "required", "type": "listInt", "value": "all"}, {"name": "pad_mode", "param_type": "required", "type": "str", "value": "all"}, {"name": "format", "param_type": "required", "type": "str", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "NC1HWC0"], ["float16", "NC1HWC0"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "max_pool.so", "compute_cost": 10, "kernel_name": "max_pool", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "dynamic_shape": false, "need_check_supported": false, "op_pattern": ""} | |||
| {"op_name": "MaxPoolGrad", "inputs": [{"index": 0, "name": "x1", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "x2", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "grad", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "kernel_size", "param_type": "required", "type": "listInt", "value": "all"}, {"name": "strides", "param_type": "required", "type": "listInt", "value": "all"}, {"name": "pad_mode", "param_type": "required", "type": "str", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "max_pool_grad.so", "compute_cost": 10, "kernel_name": "max_pool_grad", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "dynamic_shape": false, "need_check_supported": false, "op_pattern": ""} | |||
| {"op_name": "MaxPoolGradWithArgmax", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "grad", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "argmax", "need_compile": false, "param_type": "optional", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "kernel_size", "param_type": "required", "type": "listInt", "value": "all"}, {"name": "strides", "param_type": "required", "type": "listInt", "value": "all"}, {"name": "pad_mode", "param_type": "required", "type": "str", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["uint16", "NC1HWC0"], ["float16", "NC1HWC0"]], [["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["int64", "NC1HWC0"], ["float16", "NC1HWC0"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "max_pool_grad_with_argmax.so", "compute_cost": 10, "kernel_name": "max_pool_grad_with_argmax", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "dynamic_shape": false, "need_check_supported": false, "op_pattern": ""} | |||
| @@ -22,7 +22,7 @@ HALF = 0.5 | |||
| def expand_gelu(expand_info): | |||
| """Gelu expander""" | |||
| """GeLU expander""" | |||
| # cal formula are: | |||
| # gelu(x) is 0.5 * x * (1.0 + tanh(y)) | |||
| # y is sqrt(2.0 / pi) * (x + 0.044715 * x * x * x) | |||
| @@ -23,7 +23,7 @@ HALF = 0.5 | |||
| def expand_gelugrad(expand_info): | |||
| """GeluGrad expander""" | |||
| """GeLUGrad expander""" | |||
| # cal formula are: | |||
| # gelu_grad(dy, x) is dy * y' | |||
| # y' is 0.5 * (1.0 + tanh(tanh_para)) + 0.5 * x * (1.0 - tanh(tanh_para) * tanh(para)) * mul_right | |||
| @@ -18,14 +18,14 @@ | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| MS_REG_GPU_KERNEL_ONE(GeluGrad, | |||
| MS_REG_GPU_KERNEL_ONE(GeLUGrad, | |||
| KernelAttr() | |||
| .AddInputAttr(kNumberTypeFloat32) | |||
| .AddInputAttr(kNumberTypeFloat32) | |||
| .AddInputAttr(kNumberTypeFloat32) | |||
| .AddOutputAttr(kNumberTypeFloat32), | |||
| GeLUGpuGradKernel, float) | |||
| MS_REG_GPU_KERNEL_ONE(GeluGrad, | |||
| MS_REG_GPU_KERNEL_ONE(GeLUGrad, | |||
| KernelAttr() | |||
| .AddInputAttr(kNumberTypeFloat16) | |||
| .AddInputAttr(kNumberTypeFloat16) | |||
| @@ -18,9 +18,9 @@ | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| MS_REG_GPU_KERNEL_ONE(Gelu, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), | |||
| MS_REG_GPU_KERNEL_ONE(GeLU, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), | |||
| GeluGpuKernel, float) | |||
| MS_REG_GPU_KERNEL_ONE(Gelu, KernelAttr().AddInputAttr(kNumberTypeFloat16).AddOutputAttr(kNumberTypeFloat16), | |||
| MS_REG_GPU_KERNEL_ONE(GeLU, KernelAttr().AddInputAttr(kNumberTypeFloat16).AddOutputAttr(kNumberTypeFloat16), | |||
| GeluGpuKernel, half) | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| @@ -701,7 +701,7 @@ FuncGraphPtr JsonDescToAnf(const std::string &json_desc, const std::vector<AnfNo | |||
| std::unordered_set<PrimitivePtr> GetExpandOps() { | |||
| std::unordered_set<PrimitivePtr> expand_ops = { | |||
| prim::kPrimSquare, | |||
| prim::kPrimGeluGrad, | |||
| prim::kPrimGeLUGrad, | |||
| #if ENABLE_D | |||
| prim::kPrimTile, | |||
| prim::kPrimSqrtGrad, | |||
| @@ -709,7 +709,7 @@ std::unordered_set<PrimitivePtr> GetExpandOps() { | |||
| #elif ENABLE_GPU | |||
| prim::kPrimBiasAdd, | |||
| prim::kPrimBiasAddGrad, | |||
| prim::kPrimGelu, | |||
| prim::kPrimGeLU, | |||
| prim::kPrimFusedAdam, | |||
| prim::kPrimFusedAdamWeightDecay, | |||
| prim::kPrimReduceMean, | |||
| @@ -77,7 +77,7 @@ class RegisterAction { | |||
| // operator register | |||
| REGISTER(MatMulInfo); | |||
| REGISTER(GeluInfo); | |||
| REGISTER(GeLUInfo); | |||
| REGISTER(VirtualDatasetInfo); | |||
| REGISTER(BatchParallelInfo); | |||
| REGISTER(TanhInfo); | |||
| @@ -82,12 +82,12 @@ class ActivationOther : public Activation { | |||
| Status GetAttrs() override; | |||
| }; | |||
| class GeluInfo : public ActivationOther { | |||
| class GeLUInfo : public ActivationOther { | |||
| public: | |||
| GeluInfo(const std::string &name, const Shapes &inputs_shape, const Shapes &outputs_shape, | |||
| GeLUInfo(const std::string &name, const Shapes &inputs_shape, const Shapes &outputs_shape, | |||
| const PrimitiveAttrs &attrs) | |||
| : ActivationOther(name, inputs_shape, outputs_shape, attrs) {} | |||
| ~GeluInfo() override = default; | |||
| ~GeLUInfo() override = default; | |||
| }; | |||
| class TanhInfo : public ActivationOther { | |||
| @@ -182,7 +182,7 @@ constexpr char CONCAT[] = "Concat"; | |||
| constexpr char SOFTMAX_CROSS_ENTROPY_WITH_LOGITS[] = "SoftmaxCrossEntropyWithLogits"; | |||
| constexpr char SIGMOID_CROSS_ENTROPY_WITH_LOGITS[] = "SigmoidCrossEntropyWithLogits"; | |||
| constexpr char MATMUL[] = "MatMul"; | |||
| constexpr char GELU[] = "Gelu"; | |||
| constexpr char GELU[] = "GeLU"; | |||
| constexpr char TANH[] = "Tanh"; | |||
| constexpr char RECEIVE[] = "Receive"; | |||
| constexpr char SEND[] = "Send"; | |||
| @@ -101,27 +101,27 @@ ATTR_MAP(TanhGrad) = EMPTY_ATTR_MAP; | |||
| OUTPUT_MAP(TanhGrad) = {{0, OUTPUT_DESC(z)}}; | |||
| REG_ADPT_DESC(TanhGrad, prim::kPrimTanhGrad->name(), ADPT_DESC(TanhGrad)) | |||
| // Gelu | |||
| // GeLU | |||
| INPUT_MAP(Gelu) = {{1, INPUT_DESC(x)}}; | |||
| ATTR_MAP(Gelu) = EMPTY_ATTR_MAP; | |||
| OUTPUT_MAP(Gelu) = {{0, OUTPUT_DESC(y)}}; | |||
| REG_ADPT_DESC(Gelu, prim::kPrimGelu->name(), ADPT_DESC(Gelu)) | |||
| REG_ADPT_DESC(Gelu, prim::kPrimGeLU->name(), ADPT_DESC(Gelu)) | |||
| // GeluGrad | |||
| // GeLUGrad | |||
| INPUT_MAP(GeluGrad) = {{1, INPUT_DESC(dy)}, {2, INPUT_DESC(x)}, {3, INPUT_DESC(y)}}; | |||
| ATTR_MAP(GeluGrad) = EMPTY_ATTR_MAP; | |||
| OUTPUT_MAP(GeluGrad) = {{0, OUTPUT_DESC(z)}}; | |||
| REG_ADPT_DESC(GeluGrad, prim::kPrimGeluGrad->name(), ADPT_DESC(GeluGrad)) | |||
| REG_ADPT_DESC(GeluGrad, prim::kPrimGeLUGrad->name(), ADPT_DESC(GeluGrad)) | |||
| // FastGelu | |||
| // FastGeLU | |||
| INPUT_MAP(FastGelu) = {{1, INPUT_DESC(x)}}; | |||
| ATTR_MAP(FastGelu) = EMPTY_ATTR_MAP; | |||
| OUTPUT_MAP(FastGelu) = {{0, OUTPUT_DESC(y)}}; | |||
| REG_ADPT_DESC(FastGelu, prim::kPrimFastGelu->name(), ADPT_DESC(FastGelu)) | |||
| REG_ADPT_DESC(FastGelu, prim::kPrimFastGeLU->name(), ADPT_DESC(FastGelu)) | |||
| // FastGeluGrad | |||
| // FastGeLUGrad | |||
| INPUT_MAP(FastGeluGrad) = {{1, INPUT_DESC(dy)}, {2, INPUT_DESC(x)}}; | |||
| ATTR_MAP(FastGeluGrad) = EMPTY_ATTR_MAP; | |||
| OUTPUT_MAP(FastGeluGrad) = {{0, OUTPUT_DESC(z)}}; | |||
| REG_ADPT_DESC(FastGeluGrad, prim::kPrimFastGeluGrad->name(), ADPT_DESC(FastGeluGrad)) | |||
| REG_ADPT_DESC(FastGeluGrad, prim::kPrimFastGeLUGrad->name(), ADPT_DESC(FastGeluGrad)) | |||
| } // namespace mindspore::transform | |||
| @@ -63,13 +63,13 @@ AbstractBasePtr InferImplConv2DBackpropFilter(const AnalysisEnginePtr &, const P | |||
| const AbstractBasePtrList &args_spec_list); | |||
| AbstractBasePtr InferImplBiasAddGrad(const AnalysisEnginePtr &, const PrimitivePtr &primitive, | |||
| const AbstractBasePtrList &args_spec_list); | |||
| AbstractBasePtr InferImplGelu(const AnalysisEnginePtr &, const PrimitivePtr &primitive, | |||
| AbstractBasePtr InferImplGeLU(const AnalysisEnginePtr &, const PrimitivePtr &primitive, | |||
| const AbstractBasePtrList &args_spec_list); | |||
| AbstractBasePtr InferImplGeluGrad(const AnalysisEnginePtr &, const PrimitivePtr &primitive, | |||
| AbstractBasePtr InferImplGeLUGrad(const AnalysisEnginePtr &, const PrimitivePtr &primitive, | |||
| const AbstractBasePtrList &args_spec_list); | |||
| AbstractBasePtr InferImplFastGelu(const AnalysisEnginePtr &, const PrimitivePtr &primitive, | |||
| AbstractBasePtr InferImplFastGeLU(const AnalysisEnginePtr &, const PrimitivePtr &primitive, | |||
| const AbstractBasePtrList &args_spec_list); | |||
| AbstractBasePtr InferImplFastGeluGrad(const AnalysisEnginePtr &, const PrimitivePtr &primitive, | |||
| AbstractBasePtr InferImplFastGeLUGrad(const AnalysisEnginePtr &, const PrimitivePtr &primitive, | |||
| const AbstractBasePtrList &args_spec_list); | |||
| AbstractBasePtr InferImplRelu(const AnalysisEnginePtr &, const PrimitivePtr &primitive, | |||
| const AbstractBasePtrList &args_spec_list); | |||
| @@ -41,6 +41,10 @@ constexpr auto kTupleGetItem = "TupleGetItem"; | |||
| constexpr auto kMakeTuple = "MakeTuple"; | |||
| constexpr auto kReturn = "Return"; | |||
| constexpr auto kSGD = "SGD"; | |||
| constexpr auto kGeLU = "GeLU"; | |||
| constexpr auto kGeLUGrad = "GeLUGrad"; | |||
| constexpr auto kFastGeLU = "FastGeLU"; | |||
| constexpr auto kFastGeLUGrad = "FastGeLUGrad"; | |||
| // Here list all primitives used in backend or some special primitives used by core. | |||
| // Arithmetic | |||
| @@ -249,10 +253,10 @@ inline const PrimitivePtr kPrimDropout = std::make_shared<Primitive>("Dropout"); | |||
| inline const PrimitivePtr kPrimUniformReal = std::make_shared<Primitive>("UniformReal"); | |||
| inline const PrimitivePtr kPrimCudnnUniformReal = std::make_shared<Primitive>("CudnnUniformReal"); | |||
| inline const PrimitivePtr kPrimOneHot = std::make_shared<Primitive>("OneHot"); | |||
| inline const PrimitivePtr kPrimGelu = std::make_shared<Primitive>("Gelu"); | |||
| inline const PrimitivePtr kPrimGeluGrad = std::make_shared<Primitive>("GeluGrad"); | |||
| inline const PrimitivePtr kPrimFastGelu = std::make_shared<Primitive>("FastGelu"); | |||
| inline const PrimitivePtr kPrimFastGeluGrad = std::make_shared<Primitive>("FastGeluGrad"); | |||
| inline const PrimitivePtr kPrimGeLU = std::make_shared<Primitive>(kGeLU); | |||
| inline const PrimitivePtr kPrimGeLUGrad = std::make_shared<Primitive>(kGeLUGrad); | |||
| inline const PrimitivePtr kPrimFastGeLU = std::make_shared<Primitive>(kFastGeLU); | |||
| inline const PrimitivePtr kPrimFastGeLUGrad = std::make_shared<Primitive>(kFastGeLUGrad); | |||
| inline const PrimitivePtr kPrimRelu = std::make_shared<Primitive>("ReLU"); | |||
| inline const PrimitivePtr kPrimElu = std::make_shared<Primitive>("ELU"); | |||
| inline const PrimitivePtr kPrimRelu6 = std::make_shared<Primitive>("ReLU6"); | |||
| @@ -26,16 +26,16 @@ | |||
| namespace mindspore { | |||
| namespace ops { | |||
| namespace { | |||
| abstract::ShapePtr GeluInferShape(const PrimitivePtr &primitive, const std::vector<AbstractBasePtr> &input_args) { | |||
| abstract::ShapePtr GeLUInferShape(const PrimitivePtr &primitive, const std::vector<AbstractBasePtr> &input_args) { | |||
| MS_EXCEPTION_IF_NULL(primitive); | |||
| auto gelu_prim = primitive->cast<PrimGeluPtr>(); | |||
| auto gelu_prim = primitive->cast<PrimGeLUPtr>(); | |||
| MS_EXCEPTION_IF_NULL(gelu_prim); | |||
| auto prim_name = gelu_prim->name(); | |||
| auto input_shape = CheckAndConvertUtils::ConvertShapePtrToShape("input_x", input_args[0]->BuildShape(), prim_name); | |||
| return std::make_shared<abstract::Shape>(input_shape); | |||
| } | |||
| TypePtr GeluInferType(const PrimitivePtr &prim, const std::vector<AbstractBasePtr> &input_args) { | |||
| TypePtr GeLUInferType(const PrimitivePtr &prim, const std::vector<AbstractBasePtr> &input_args) { | |||
| for (const auto &item : input_args) { | |||
| MS_EXCEPTION_IF_NULL(item); | |||
| } | |||
| @@ -46,13 +46,13 @@ TypePtr GeluInferType(const PrimitivePtr &prim, const std::vector<AbstractBasePt | |||
| return TypeIdToType(infer_type); | |||
| } | |||
| } // namespace | |||
| AbstractBasePtr GeluInfer(const abstract::AnalysisEnginePtr &, const PrimitivePtr &primitive, | |||
| AbstractBasePtr GeLUInfer(const abstract::AnalysisEnginePtr &, const PrimitivePtr &primitive, | |||
| const std::vector<AbstractBasePtr> &input_args) { | |||
| return std::make_shared<abstract::AbstractTensor>(GeluInferType(primitive, input_args), | |||
| GeluInferShape(primitive, input_args)->shape()); | |||
| return std::make_shared<abstract::AbstractTensor>(GeLUInferType(primitive, input_args), | |||
| GeLUInferShape(primitive, input_args)->shape()); | |||
| } | |||
| REGISTER_PRIMITIVE_EVAL_IMPL(Gelu, prim::kPrimGelu, GeluInfer); | |||
| REGISTER_PRIMITIVE_C(kNameGelu, Gelu); | |||
| REGISTER_PRIMITIVE_EVAL_IMPL(GeLU, prim::kPrimGeLU, GeLUInfer); | |||
| REGISTER_PRIMITIVE_C(kNameGeLU, GeLU); | |||
| } // namespace ops | |||
| } // namespace mindspore | |||
| @@ -25,17 +25,17 @@ | |||
| namespace mindspore { | |||
| namespace ops { | |||
| constexpr auto kNameGelu = "Gelu"; | |||
| class Gelu : public PrimitiveC { | |||
| constexpr auto kNameGeLU = "GeLU"; | |||
| class GeLU : public PrimitiveC { | |||
| public: | |||
| Gelu() : PrimitiveC(kNameGelu) {} | |||
| ~Gelu() = default; | |||
| MS_DECLARE_PARENT(Gelu, PrimitiveC); | |||
| GeLU() : PrimitiveC(kNameGeLU) {} | |||
| ~GeLU() = default; | |||
| MS_DECLARE_PARENT(GeLU, PrimitiveC); | |||
| void Init() {} | |||
| }; | |||
| AbstractBasePtr GeluInfer(const abstract::AnalysisEnginePtr &, const PrimitivePtr &primitive, | |||
| AbstractBasePtr GeLUInfer(const abstract::AnalysisEnginePtr &, const PrimitivePtr &primitive, | |||
| const std::vector<AbstractBasePtr> &input_args); | |||
| using PrimGeluPtr = std::shared_ptr<Gelu>; | |||
| using PrimGeLUPtr = std::shared_ptr<GeLU>; | |||
| } // namespace ops | |||
| } // namespace mindspore | |||
| @@ -81,7 +81,7 @@ using mindspore::ops::kNameDepthWiseConv2D; | |||
| using mindspore::ops::kNameDiv; | |||
| using mindspore::ops::kNameElu; | |||
| using mindspore::ops::kNameExp; | |||
| using mindspore::ops::kNameGelu; | |||
| using mindspore::ops::kNameGeLU; | |||
| using mindspore::ops::kNameL2Normalize; | |||
| using mindspore::ops::kNameLayerNorm; | |||
| using mindspore::ops::kNameLeakyRelu; | |||
| @@ -117,7 +117,7 @@ constexpr auto kNameGatherV2 = "GatherV2"; | |||
| constexpr auto kNameTensorAdd = "TensorAdd"; | |||
| std::map<std::string, mindspore::ActivationType> activation_map = { | |||
| {ops::kNameAbs, mindspore::ABS}, {ops::kNameElu, mindspore::ELU}, | |||
| {ops::kNameGelu, mindspore::GELU}, {ops::kNameLeakyRelu, mindspore::LEAKY_RELU}, | |||
| {ops::kNameGeLU, mindspore::GELU}, {ops::kNameLeakyRelu, mindspore::LEAKY_RELU}, | |||
| {ops::kNameReLU, mindspore::RELU}, {ops::kNameReLU6, mindspore::RELU6}, | |||
| {ops::kNameSigmoid, mindspore::SIGMOID}, {ops::kNameTanh, mindspore::TANH}}; | |||
| @@ -413,7 +413,7 @@ REGIST_PRIMITIVE_ADJUST(kNameDiv, MoveAttrMapCommon<ops::DivFusion>) | |||
| REGIST_PRIMITIVE_ADJUST(kNameElu, MoveAttrMapActivation) | |||
| REGIST_PRIMITIVE_ADJUST(kNameExp, MoveAttrMapCommon<ops::ExpFusion>) | |||
| REGIST_PRIMITIVE_ADJUST(kNameGatherV2, MoveAttrMapCommon<ops::Gather>) | |||
| REGIST_PRIMITIVE_ADJUST(kNameGelu, MoveAttrMapActivation) | |||
| REGIST_PRIMITIVE_ADJUST(kNameGeLU, MoveAttrMapActivation) | |||
| REGIST_PRIMITIVE_ADJUST(kNameL2Normalize, MoveAttrMapCommon<ops::L2NormalizeFusion>) | |||
| REGIST_PRIMITIVE_ADJUST(kNameLayerNorm, MoveAttrMapLayerNorm) | |||
| REGIST_PRIMITIVE_ADJUST(kNameLeakyRelu, MoveAttrMapActivation) | |||
| @@ -604,10 +604,10 @@ def get_bprop_tanh(self): | |||
| return bprop | |||
| @bprop_getters.register(P.Gelu) | |||
| @bprop_getters.register(P.GeLU) | |||
| def get_bprop_gelu(self): | |||
| """Grad definition for `Gelu` operation.""" | |||
| input_grad = G.GeluGrad() | |||
| """Grad definition for `GeLU` operation.""" | |||
| input_grad = G.GeLUGrad() | |||
| def bprop(x, out, dout): | |||
| dx = input_grad(dout, x, out) | |||
| @@ -616,10 +616,10 @@ def get_bprop_gelu(self): | |||
| return bprop | |||
| @bprop_getters.register(P.FastGelu) | |||
| @bprop_getters.register(P.FastGeLU) | |||
| def get_bprop_fast_gelu(self): | |||
| """Grad definition for `FastGelu` operation.""" | |||
| input_grad = G.FastGeluGrad() | |||
| """Grad definition for `FastGeLU` operation.""" | |||
| input_grad = G.FastGeLUGrad() | |||
| def bprop(x, out, dout): | |||
| dx = input_grad(dout, x) | |||
| @@ -13,10 +13,10 @@ | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """FastGelu op""" | |||
| """FastGeLU op""" | |||
| from mindspore.ops.op_info_register import op_info_register, TBERegOp, DataType | |||
| fast_gelu_op_info = TBERegOp("FastGelu") \ | |||
| fast_gelu_op_info = TBERegOp("FastGeLU") \ | |||
| .fusion_type("ELEMWISE") \ | |||
| .async_flag(False) \ | |||
| .binfile_name("fast_gelu.so") \ | |||
| @@ -33,5 +33,5 @@ fast_gelu_op_info = TBERegOp("FastGelu") \ | |||
| @op_info_register(fast_gelu_op_info) | |||
| def _fast_gelu_tbe(): | |||
| """FastGelu TBE register""" | |||
| """FastGeLU TBE register""" | |||
| return | |||
| @@ -13,10 +13,10 @@ | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """FastGeluGrad op""" | |||
| """FastGeLUGrad op""" | |||
| from mindspore.ops.op_info_register import op_info_register, TBERegOp, DataType | |||
| fast_gelu_grad_op_info = TBERegOp("FastGeluGrad") \ | |||
| fast_gelu_grad_op_info = TBERegOp("FastGeLUGrad") \ | |||
| .fusion_type("ELEMWISE") \ | |||
| .async_flag(False) \ | |||
| .binfile_name("fast_gelu_grad.so") \ | |||
| @@ -37,5 +37,5 @@ fast_gelu_grad_op_info = TBERegOp("FastGeluGrad") \ | |||
| @op_info_register(fast_gelu_grad_op_info) | |||
| def _fast_gelu_grad_tbe(): | |||
| """FastGeluGrad TBE register""" | |||
| """FastGeLUGrad TBE register""" | |||
| return | |||
| @@ -13,10 +13,10 @@ | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """Gelu op""" | |||
| """GeLU op""" | |||
| from mindspore.ops.op_info_register import op_info_register, TBERegOp, DataType | |||
| gelu_op_info = TBERegOp("Gelu") \ | |||
| gelu_op_info = TBERegOp("GeLU") \ | |||
| .fusion_type("ELEMWISE") \ | |||
| .async_flag(False) \ | |||
| .binfile_name("gelu.so") \ | |||
| @@ -33,5 +33,5 @@ gelu_op_info = TBERegOp("Gelu") \ | |||
| @op_info_register(gelu_op_info) | |||
| def _gelu_tbe(): | |||
| """Gelu TBE register""" | |||
| """GeLU TBE register""" | |||
| return | |||
| @@ -13,10 +13,10 @@ | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """GeluGrad op""" | |||
| """GeLUGrad op""" | |||
| from mindspore.ops.op_info_register import op_info_register, TBERegOp, DataType | |||
| gelu_grad_op_info = TBERegOp("GeluGrad") \ | |||
| gelu_grad_op_info = TBERegOp("GeLUGrad") \ | |||
| .fusion_type("ELEMWISE") \ | |||
| .async_flag(False) \ | |||
| .binfile_name("gelu_grad.so") \ | |||
| @@ -38,5 +38,5 @@ gelu_grad_op_info = TBERegOp("GeluGrad") \ | |||
| @op_info_register(gelu_grad_op_info) | |||
| def _gelu_grad_tbe(): | |||
| """GeluGrad TBE register""" | |||
| """GeLUGrad TBE register""" | |||
| return | |||
| @@ -64,7 +64,7 @@ from .nn_ops import (LSTM, SGD, Adam, FusedSparseAdam, FusedSparseLazyAdam, Adam | |||
| DepthwiseConv2dNative, | |||
| DropoutDoMask, Dropout, | |||
| DropoutGenMask, Flatten, FusedBatchNorm, FusedBatchNormEx, BNTrainingReduce, BNTrainingUpdate, | |||
| Gelu, FastGelu, Elu, | |||
| GeLU, Gelu, FastGeLU, FastGelu, Elu, | |||
| GetNext, L2Normalize, LayerNorm, L2Loss, CTCLoss, CTCGreedyDecoder, | |||
| LogSoftmax, | |||
| MaxPool, DataFormatDimMap, | |||
| @@ -168,7 +168,9 @@ __all__ = [ | |||
| 'Unpack', | |||
| 'Tile', | |||
| 'BiasAdd', | |||
| 'GeLU', | |||
| 'Gelu', | |||
| 'FastGeLU', | |||
| 'FastGelu', | |||
| 'Minimum', | |||
| 'Maximum', | |||
| @@ -775,12 +775,12 @@ class BNTrainingUpdateGrad(PrimitiveWithInfer): | |||
| return (batch_mean, batch_variance) | |||
| class GeluGrad(PrimitiveWithInfer): | |||
| """Gradients of Gelu operation.""" | |||
| class GeLUGrad(PrimitiveWithInfer): | |||
| """Gradients of GeLU operation.""" | |||
| @prim_attr_register | |||
| def __init__(self): | |||
| """Initialize GeluGrad""" | |||
| """Initialize GeLUGrad""" | |||
| def infer_shape(self, y_backprop_shape, x_shape, y_shape): | |||
| return x_shape | |||
| @@ -793,12 +793,12 @@ class GeluGrad(PrimitiveWithInfer): | |||
| return x_dtype | |||
| class FastGeluGrad(PrimitiveWithInfer): | |||
| """Gradients of FastGelu operation.""" | |||
| class FastGeLUGrad(PrimitiveWithInfer): | |||
| """Gradients of FastGeLU operation.""" | |||
| @prim_attr_register | |||
| def __init__(self): | |||
| """init FastGeluGrad""" | |||
| """init FastGeLUGrad""" | |||
| def infer_shape(self, y_backprop_shape, x_shape): | |||
| return x_shape | |||
| @@ -19,6 +19,7 @@ import math | |||
| import operator | |||
| from functools import reduce, partial | |||
| from mindspore._checkparam import _check_3d_int_or_tuple | |||
| from mindspore import log as logger | |||
| import numpy as np | |||
| from ... import context | |||
| from .. import signature as sig | |||
| @@ -2921,7 +2922,7 @@ class OneHot(PrimitiveWithInfer): | |||
| 'value': None} | |||
| class Gelu(PrimitiveWithInfer): | |||
| class GeLU(PrimitiveWithInfer): | |||
| r""" | |||
| Gaussian Error Linear Units activation function. | |||
| @@ -2929,7 +2930,7 @@ class Gelu(PrimitiveWithInfer): | |||
| And also please refer to `BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding | |||
| <https://arxiv.org/abs/1810.04805>`_. | |||
| Gelu is defined as follows: | |||
| GeLU is defined as follows: | |||
| .. math:: | |||
| \text{output} = 0.5 * x * (1 + erf(x / \sqrt{2})), | |||
| @@ -2937,7 +2938,7 @@ class Gelu(PrimitiveWithInfer): | |||
| where :math:`erf` is the "Gauss error function" . | |||
| Inputs: | |||
| - **input_x** (Tensor) - Input to compute the Gelu with data type of float16 or float32. | |||
| - **input_x** (Tensor) - Input to compute the GeLU with data type of float16 or float32. | |||
| Outputs: | |||
| Tensor, with the same type and shape as input. | |||
| @@ -2947,7 +2948,7 @@ class Gelu(PrimitiveWithInfer): | |||
| Examples: | |||
| >>> tensor = Tensor(np.array([1.0, 2.0, 3.0]), mindspore.float32) | |||
| >>> gelu = ops.Gelu() | |||
| >>> gelu = ops.GeLU() | |||
| >>> result = gelu(tensor) | |||
| >>> print(result) | |||
| [0.841192 1.9545976 2.9963627] | |||
| @@ -2965,12 +2966,16 @@ class Gelu(PrimitiveWithInfer): | |||
| validator.check_tensor_dtype_valid("input_x", input_x, (mstype.float16, mstype.float32), self.name) | |||
| return input_x | |||
| def Gelu(): | |||
| logger.warning("WARN_DEPRECATED: The usage of Gelu is deprecated. Please use GeLU.") | |||
| return GeLU() | |||
| class FastGelu(PrimitiveWithInfer): | |||
| class FastGeLU(PrimitiveWithInfer): | |||
| r""" | |||
| Fast Gaussian Error Linear Units activation function. | |||
| FastGelu is defined as follows: | |||
| FastGeLU is defined as follows: | |||
| .. math:: | |||
| \text{output} = \frac {x} {1 + \exp(-1.702 * \left| x \right|)} * \exp(0.851 * (x - \left| x \right|)), | |||
| @@ -2978,7 +2983,7 @@ class FastGelu(PrimitiveWithInfer): | |||
| where :math:`x` is the element of the input. | |||
| Inputs: | |||
| - **input_x** (Tensor) - Input to compute the FastGelu with data type of float16 or float32. | |||
| - **input_x** (Tensor) - Input to compute the FastGeLU with data type of float16 or float32. | |||
| Outputs: | |||
| Tensor, with the same type and shape as input. | |||
| @@ -2988,7 +2993,7 @@ class FastGelu(PrimitiveWithInfer): | |||
| Examples: | |||
| >>> tensor = Tensor(np.array([[-1.0, 4.0, -8.0], [2.0, -5.0, 9.0]]), mindspore.float32) | |||
| >>> fast_gelu = P.FastGelu() | |||
| >>> fast_gelu = P.FastGeLU() | |||
| >>> output = fast_gelu(tensor) | |||
| >>> print(output) | |||
| [[-1.5420423e-01 3.9955849e+00 -9.7664278e-06] | |||
| @@ -3007,6 +3012,10 @@ class FastGelu(PrimitiveWithInfer): | |||
| validator.check_tensor_dtype_valid("input_x", input_x, (mstype.float16, mstype.float32), self.name) | |||
| return input_x | |||
| def FastGelu(): | |||
| logger.warning("WARN_DEPRECATED: The usage of FastGelu is deprecated. Please use FastGeLU.") | |||
| return FastGeLU() | |||
| class GetNext(PrimitiveWithInfer): | |||
| """ | |||
| @@ -28,7 +28,7 @@ context.set_context(mode=context.GRAPH_MODE, device_target="GPU") | |||
| class GeluNet(nn.Cell): | |||
| def __init__(self): | |||
| super(GeluNet, self).__init__() | |||
| self.gelu = P.Gelu() | |||
| self.gelu = P.GeLU() | |||
| def construct(self, x): | |||
| return self.gelu(x) | |||
| @@ -25,7 +25,7 @@ import mindspore.ops.operations._grad_ops as G | |||
| class GeluNet(Cell): | |||
| def __init__(self): | |||
| super(GeluNet, self).__init__() | |||
| self.gelu = P.Gelu() | |||
| self.gelu = P.GeLU() | |||
| def construct(self, x): | |||
| return self.gelu(x) | |||
| @@ -34,7 +34,7 @@ class GeluNet(Cell): | |||
| class GeluGradNet(Cell): | |||
| def __init__(self): | |||
| super(GeluGradNet, self).__init__() | |||
| self.gelu_grad = G.GeluGrad() | |||
| self.gelu_grad = G.GeLUGrad() | |||
| def construct(self, dy, x, y): | |||
| return self.gelu_grad(dy, x, y) | |||
| @@ -24,15 +24,15 @@ | |||
| namespace mindspore { | |||
| namespace ops { | |||
| class TestGelu : public UT::Common { | |||
| class TestGeLU : public UT::Common { | |||
| public: | |||
| TestGelu() {} | |||
| TestGeLU() {} | |||
| void SetUp() {} | |||
| void TearDown() {} | |||
| }; | |||
| TEST_F(TestGelu, test_ops_gelu1) { | |||
| auto gelu = std::make_shared<Gelu>(); | |||
| TEST_F(TestGeLU, test_ops_gelu1) { | |||
| auto gelu = std::make_shared<GeLU>(); | |||
| auto input1 = TensorConstructUtils::CreateOnesTensor(kNumberTypeFloat16, std::vector<int64_t>{1, 2}); | |||
| MS_EXCEPTION_IF_NULL(input1); | |||
| auto abstract = gelu->Infer({input1->ToAbstract()}); | |||
| @@ -56,8 +56,8 @@ TEST_F(TestGelu, test_ops_gelu1) { | |||
| EXPECT_EQ(shape_vec[0], 1); | |||
| } | |||
| TEST_F(TestGelu, test_ops_gelu2) { | |||
| auto gelu = std::make_shared<Gelu>(); | |||
| TEST_F(TestGeLU, test_ops_gelu2) { | |||
| auto gelu = std::make_shared<GeLU>(); | |||
| auto input1 = TensorConstructUtils::CreateOnesTensor(kNumberTypeFloat32, std::vector<int64_t>{1, 2}); | |||
| MS_EXCEPTION_IF_NULL(input1); | |||
| auto abstract = gelu->Infer({input1->ToAbstract()}); | |||
| @@ -26,18 +26,18 @@ | |||
| namespace mindspore { | |||
| namespace parallel { | |||
| class GeluInfo; | |||
| using GeluInfoPtr = std::shared_ptr<GeluInfo>; | |||
| GeluInfoPtr gelu; | |||
| class GeLUInfo; | |||
| using GeLUInfoPtr = std::shared_ptr<GeLUInfo>; | |||
| GeLUInfoPtr gelu; | |||
| class TestGeluInfo : public UT::Common { | |||
| class TestGeLUInfo : public UT::Common { | |||
| public: | |||
| TestGeluInfo() {} | |||
| TestGeLUInfo() {} | |||
| void SetUp(); | |||
| void TearDown() {} | |||
| }; | |||
| void TestGeluInfo::SetUp() { | |||
| void TestGeLUInfo::SetUp() { | |||
| RankList dev_list; | |||
| for (int32_t i = 0; i < 130; i++) { | |||
| @@ -59,10 +59,10 @@ void TestGeluInfo::SetUp() { | |||
| Shapes inputs_shape = {{2, 4, 8, 16}}; | |||
| Shapes outputs_shape = {{2, 4, 8, 16}}; | |||
| gelu = std::make_shared<GeluInfo>("gelu_info", inputs_shape, outputs_shape, attr); | |||
| gelu = std::make_shared<GeLUInfo>("gelu_info", inputs_shape, outputs_shape, attr); | |||
| } | |||
| TEST_F(TestGeluInfo, InferDevMatrixShape1) { | |||
| TEST_F(TestGeLUInfo, InferDevMatrixShape1) { | |||
| Strategys inputs = {{2, 4, 1, 16}}; | |||
| StrategyPtr strategy = NewStrategy(0, inputs); | |||
| @@ -73,7 +73,7 @@ TEST_F(TestGeluInfo, InferDevMatrixShape1) { | |||
| ASSERT_EQ(dev_matrix_shape, expect); | |||
| } | |||
| TEST_F(TestGeluInfo, InferSliceShape1) { | |||
| TEST_F(TestGeLUInfo, InferSliceShape1) { | |||
| Strategys str = {{2, 4, 1, 16}}; | |||
| StrategyPtr strategy = NewStrategy(0, str); | |||
| @@ -94,7 +94,7 @@ TEST_F(TestGeluInfo, InferSliceShape1) { | |||
| ASSERT_EQ(output_slice_shape, output_slice_shape_expect); | |||
| } | |||
| TEST_F(TestGeluInfo, GetTensorLayout1) { | |||
| TEST_F(TestGeLUInfo, GetTensorLayout1) { | |||
| Strategys str = {{2, 4, 1, 16}}; | |||
| StrategyPtr strategy = NewStrategy(0, str); | |||
| @@ -115,7 +115,7 @@ TEST_F(TestGeluInfo, GetTensorLayout1) { | |||
| ASSERT_EQ(output_tensor_map.array(), output_expect); | |||
| } | |||
| TEST_F(TestGeluInfo, GetForwardOp1) { | |||
| TEST_F(TestGeLUInfo, GetForwardOp1) { | |||
| Strategys inputs = {{2, 4, 1, 16}}; | |||
| StrategyPtr strategy = NewStrategy(0, inputs); | |||
| @@ -126,7 +126,7 @@ TEST_F(TestGeluInfo, GetForwardOp1) { | |||
| ASSERT_EQ(size, 0); | |||
| } | |||
| TEST_F(TestGeluInfo, GetMirrorOPs1) { | |||
| TEST_F(TestGeLUInfo, GetMirrorOPs1) { | |||
| Strategys inputs = {{2, 4, 1, 16}}; | |||
| StrategyPtr strategy = NewStrategy(0, inputs); | |||
| @@ -138,7 +138,7 @@ TEST_F(TestGeluInfo, GetMirrorOPs1) { | |||
| ASSERT_EQ(size, 0); | |||
| } | |||
| TEST_F(TestGeluInfo, CheckStrategy1) { | |||
| TEST_F(TestGeLUInfo, CheckStrategy1) { | |||
| // Success: {{2,4,1,16}} | |||
| Strategys inputs = {{2, 2, 8, 16}, {2, 4, 16, 1}}; | |||
| StrategyPtr strategy = NewStrategy(0, inputs); | |||
| @@ -147,7 +147,7 @@ TEST_F(TestGeluInfo, CheckStrategy1) { | |||
| ASSERT_EQ(ret, FAILED); | |||
| } | |||
| TEST_F(TestGeluInfo, CheckStrategy2) { | |||
| TEST_F(TestGeLUInfo, CheckStrategy2) { | |||
| // Success: {{2,4,1,16}} | |||
| Strategys inputs = {{2, 4, 8}}; | |||
| StrategyPtr strategy = NewStrategy(0, inputs); | |||
| @@ -156,7 +156,7 @@ TEST_F(TestGeluInfo, CheckStrategy2) { | |||
| ASSERT_EQ(ret, FAILED); | |||
| } | |||
| TEST_F(TestGeluInfo, CheckStrategy3) { | |||
| TEST_F(TestGeLUInfo, CheckStrategy3) { | |||
| // Success: {{2,4,1,16}} | |||
| Strategys inputs = {{2, 4, 1, 16}}; | |||
| StrategyPtr strategy = NewStrategy(0, inputs); | |||
| @@ -1544,12 +1544,12 @@ test_case_nn_ops = [ | |||
| 'block': G.BiasAddGrad(), | |||
| 'desc_inputs': [[1, 3, 3, 3]], | |||
| 'skip': ['backward']}), | |||
| ('Gelu', { | |||
| 'block': P.Gelu(), | |||
| ('GeLU', { | |||
| 'block': P.GeLU(), | |||
| 'desc_inputs': [[1, 3, 4, 4]], | |||
| 'desc_bprop': [[1, 3, 4, 4]]}), | |||
| ('GeluGrad', { | |||
| 'block': G.GeluGrad(), | |||
| ('GeLUGrad', { | |||
| 'block': G.GeLUGrad(), | |||
| 'desc_inputs': [[2, 2], [2, 2], [2, 2]], | |||
| 'desc_bprop': [[2, 2]], | |||
| 'skip': ['backward']}), | |||
| @@ -27,7 +27,7 @@ class VirtualDatasetNet(nn.Cell): | |||
| self.virtual_dataset = _VirtualDataset() | |||
| self.matmul1 = P.MatMul() | |||
| self.matmul2 = P.MatMul() | |||
| self.gelu = P.Gelu() | |||
| self.gelu = P.GeLU() | |||
| def construct(self, x, y, z): | |||
| x, y, z = self.virtual_dataset(x, y, z) | |||
| @@ -57,7 +57,7 @@ def test_virtual_dataset_3_input(): | |||
| self.virtual_dataset = _VirtualDataset().shard(strategy0) | |||
| self.matmul1 = P.MatMul().shard(strategy1) | |||
| self.matmul2 = P.MatMul().shard(strategy2) | |||
| self.gelu = P.Gelu().shard(strategy3) | |||
| self.gelu = P.GeLU().shard(strategy3) | |||
| def construct(self, x, y, b): | |||
| x, y, b = self.virtual_dataset(x, y, b) | |||
| @@ -86,7 +86,7 @@ def test_virtualdataset_cell_3_inputs(): | |||
| super().__init__() | |||
| self.matmul1 = P.MatMul().shard(strategy1) | |||
| self.matmul2 = P.MatMul().shard(strategy2) | |||
| self.gelu = P.Gelu().shard(strategy3) | |||
| self.gelu = P.GeLU().shard(strategy3) | |||
| def construct(self, x, y, b): | |||
| out = self.gelu(self.matmul1(x, y)) | |||