[graph kernel] add expander ops.

5 years ago · 13fbfca6b9
--- a/mindspore/_extends/graph_kernel/expanders/init.py
+++ b/mindspore/_extends/graph_kernel/expanders/init.py
@@ -15,14 +15,15 @@
 """expanders init"""

 from .assign_add import AssignAdd
 from .batchnorm import BatchNorm
 from .batchnorm_grad import BatchNormGrad
 from .bias_add import BiasAdd
 from .bias_add_grad import BiasAddGrad
 from .clip_by_norm_no_div_sum import ClipByNormNoDivSum
 from .dropout_grad import DropoutGrad
 from .expand_dims import ExpandDims
 from .fused_adam import FusedAdam
 from .fused_adam_weight_decay import FusedAdamWeightDecay
 from .batchnorm import BatchNorm
 from .batchnorm_grad import BatchNormGrad
 from .gelu import GeLU
 from .gelu_grad import GeLUGrad
 from .gkdropout import GkDropout
@@ -43,6 +44,7 @@ from .sigmoid_cross_entropy_with_logits_grad import SigmoidCrossEntropyWithLogit
 from .softmax_cross_entropy_with_logits import SoftmaxCrossEntropyWithLogits
 from .sqrt_grad import SqrtGrad
 from .square import Square
 from .squeeze import Squeeze
 from .tanh_grad import TanhGrad
 from .tile import Tile
 from .lamb_apply_optimizer_assign import LambApplyOptimizerAssign
--- a/mindspore/_extends/graph_kernel/expanders/batchnorm.py
+++ b/mindspore/_extends/graph_kernel/expanders/batchnorm.py
@@ -15,6 +15,7 @@
 """generate json desc for BatchNorm"""
 from mindspore._extends.graph_kernel.model.model import DataFormat as DF
 from ._utils import Expander, ExpanderInfoValidator as VLD
 from .expand_dims import ExpandDims


@VLD.add_format(DF.NHWC, DF.DEFAULT, DF.DEFAULT, DF.DEFAULT, DF.DEFAULT)
@@ -30,19 +31,19 @@ class BatchNorm(Expander):
        input_offset = self.inputs[2]
        input_mean = self.inputs[3]
        input_variance = self.inputs[4]
        epsilon_v = graph_builder.value(input_scale.dtype, self.attrs['epsilon'], input_scale.data_format)
        epsilon_v = graph_builder.value(input_scale.dtype, self.attrs['epsilon'])

        if self.attrs['is_training']:
            reduce_axis = ()
            shape_x = input_x.shape
            if input_x.data_format == "NHWC":
            if input_x.data_format == DF.NHWC:
                reduce_axis = (0, 1, 2)
                num = shape_x[0] * shape_x[1] * shape_x[2]
            else:
                reduce_axis = (0, 2, 3)
                num = shape_x[0] * shape_x[2] * shape_x[3]
            num_rec = 1.0 / num
            num_rec_v = graph_builder.value(input_scale.dtype, num_rec, input_scale.data_format)
            num_rec_v = graph_builder.value(input_scale.dtype, num_rec)

            # compute mean value of input_x
            mean_sum = graph_builder.emit(
@@ -50,9 +51,9 @@ class BatchNorm(Expander):
            mean_muls = graph_builder.emit('Mul', [mean_sum, num_rec_v])

            # compute variance of input_x
            if not input_x.data_format == "NHWC":
                mean_muls_expand = graph_builder.emit('ExpandDims', [mean_muls], attrs={'axis': 1})
                mean_muls_expand = graph_builder.emit('ExpandDims', [mean_muls_expand], attrs={'axis': 2})
            if input_x.data_format in (DF.DEFAULT, DF.NCHW):
                mean_muls_expand = graph_builder.emit(
                    'Reshape', [mean_muls], attrs={'shape': ExpandDims.infer_shape(mean_muls.shape, [-1, -1])})
            else:
                mean_muls_expand = mean_muls
            var_sub = graph_builder.emit('Sub', [input_x, mean_muls_expand])
@@ -62,37 +63,37 @@ class BatchNorm(Expander):

            # y_sqrt_rec means 1 / sqrt(variance + epsilon), which is calculated in backward pass
            scalar_one = 1.0
            scalar_one_v = graph_builder.value(input_scale.dtype, scalar_one, input_scale.data_format)
            scalar_one_v = graph_builder.value(input_scale.dtype, scalar_one)
            y_add = graph_builder.emit('Add', [var_mul, epsilon_v])
            y_sqrt = graph_builder.emit('Sqrt', [y_add])
            y_sqrt_rec = graph_builder.emit('RealDiv', [scalar_one_v, y_sqrt])

            # compute res_y
            tmp_sub = graph_builder.emit('Sub', [input_x, mean_muls_expand])
            if not input_x.data_format == "NHWC":
                y_sqrt_rec_expand = graph_builder.emit('ExpandDims', [y_sqrt_rec], attrs={'axis': 1})
                y_sqrt_rec_expand = graph_builder.emit('ExpandDims', [y_sqrt_rec_expand], attrs={'axis': 2})
            if input_x.data_format in (DF.DEFAULT, DF.NCHW):
                y_sqrt_rec_expand = graph_builder.emit(
                    'Reshape', [y_sqrt_rec], attrs={'shape': ExpandDims.infer_shape(y_sqrt_rec.shape, [-1, -1])})
            else:
                y_sqrt_rec_expand = y_sqrt_rec
            y_norm = graph_builder.emit('Mul', [tmp_sub, y_sqrt_rec_expand])
            if not input_x.data_format == "NHWC":
                input_scale_expand = graph_builder.emit('ExpandDims', [input_scale], attrs={'axis': 1})
                input_scale_expand = graph_builder.emit('ExpandDims', [input_scale_expand], attrs={'axis': 2})
            if input_x.data_format in (DF.DEFAULT, DF.NCHW):
                input_scale_expand = graph_builder.emit(
                    'Reshape', [input_scale], attrs={'shape': ExpandDims.infer_shape(input_scale.shape, [-1, -1])})
            else:
                input_scale_expand = input_scale
            res_y_mul = graph_builder.emit('Mul', [input_scale_expand, y_norm])
            if not input_x.data_format == "NHWC":
                input_offset_expand = graph_builder.emit('ExpandDims', [input_offset], attrs={'axis': 1})
                input_offset_expand = graph_builder.emit('ExpandDims', [input_offset_expand], attrs={'axis': 2})
            if input_x.data_format in (DF.DEFAULT, DF.NCHW):
                input_offset_expand = graph_builder.emit(
                    'Reshape', [input_offset], attrs={'shape': ExpandDims.infer_shape(input_offset.shape, [-1, -1])})
            else:
                input_offset_expand = input_offset
            res_y = graph_builder.emit('Add', [res_y_mul, input_offset_expand])

            # compute mean_res
            momentum_sub = scalar_one - self.attrs['momentum']
            momentum_v_sub = graph_builder.value(input_scale.dtype, momentum_sub, input_scale.data_format)
            momentum_v_sub = graph_builder.value(input_scale.dtype, momentum_sub)
            new_running_mean_tmp = graph_builder.emit('Mul', [momentum_v_sub, input_mean])
            momentum_v = graph_builder.value(input_scale.dtype, self.attrs['momentum'], input_scale.data_format)
            momentum_v = graph_builder.value(input_scale.dtype, self.attrs['momentum'])
            current_mean_tmp = graph_builder.emit('Mul', [momentum_v, mean_muls])
            updated_moving_mean = graph_builder.emit('Add', [new_running_mean_tmp, current_mean_tmp])
            mean_res = graph_builder.emit(
@@ -100,7 +101,7 @@ class BatchNorm(Expander):

            # variance_res is calculated by sample variance, and need to multiply by num / (num - 1)
            var_num = float(num) / (num - 1)
            var_num_v = graph_builder.value(input_scale.dtype, var_num, input_scale.data_format)
            var_num_v = graph_builder.value(input_scale.dtype, var_num)
            var_mul_update = graph_builder.emit('Mul', [var_num_v, var_mul])
            new_running_var_tmp = graph_builder.emit('Mul', [momentum_v_sub, input_variance])
            current_var_tmp = graph_builder.emit('Mul', [momentum_v, var_mul_update])
@@ -109,24 +110,22 @@ class BatchNorm(Expander):
                'InplaceAssign', [input_variance, updated_moving_variance, updated_moving_variance],
                attrs={'fake_output': True})

            # compute reverse, just return a C shape tensor
            reserve = graph_builder.emit('Add', [input_offset, scalar_one_v])
            return res_y, mean_res, variance_res, mean_muls, y_sqrt_rec, reserve
            return res_y, mean_res, variance_res, mean_muls, y_sqrt_rec
        # infer mode
        if not input_x.data_format == "NHWC":
            input_mean = graph_builder.emit('ExpandDims', [input_mean], attrs={'axis': 1})
            input_mean = graph_builder.emit('ExpandDims', [input_mean], attrs={'axis': 2})
            input_scale = graph_builder.emit('ExpandDims', [input_scale], attrs={'axis': 1})
            input_scale = graph_builder.emit('ExpandDims', [input_scale], attrs={'axis': 2})
            input_offset = graph_builder.emit('ExpandDims', [input_offset], attrs={'axis': 1})
            input_offset = graph_builder.emit('ExpandDims', [input_offset], attrs={'axis': 2})
        if input_x.data_format in (DF.DEFAULT, DF.NCHW):
            input_mean = graph_builder.emit(
                'Reshape', [input_mean], attrs={'shape': ExpandDims.infer_shape(input_mean.shape, [-1, -1])})
            input_scale = graph_builder.emit(
                'Reshape', [input_scale], attrs={'shape': ExpandDims.infer_shape(input_scale.shape, [-1, -1])})
            input_offset = graph_builder.emit(
                'Reshape', [input_offset], attrs={'shape': ExpandDims.infer_shape(input_offset.shape, [-1, -1])})
        x_sub = graph_builder.emit('Sub', [input_x, input_mean])
        x_sub_mul = graph_builder.emit('Mul', [input_scale, x_sub])
        var_add = graph_builder.emit('Add', [epsilon_v, input_variance])
        var_add_sqrt = graph_builder.emit('Sqrt', [var_add])
        if not input_x.data_format == "NHWC":
            var_add_sqrt = graph_builder.emit('ExpandDims', [var_add_sqrt], attrs={'axis': 1})
            var_add_sqrt = graph_builder.emit('ExpandDims', [var_add_sqrt], attrs={'axis': 2})
        if input_x.data_format in (DF.DEFAULT, DF.NCHW):
            var_add_sqrt = graph_builder.emit(
                'Reshape', [var_add_sqrt], attrs={'shape': ExpandDims.infer_shape(var_add_sqrt.shape, [-1, -1])})
        x_div = graph_builder.emit('RealDiv', [x_sub_mul, var_add_sqrt])
        res_y = graph_builder.emit('Add', [input_offset, x_div])
        return res_y, var_add, var_add, var_add, var_add
--- a/mindspore/_extends/graph_kernel/expanders/batchnorm_grad.py
+++ b/mindspore/_extends/graph_kernel/expanders/batchnorm_grad.py
@@ -15,6 +15,7 @@
 """generate json desc for BatchNormGrad"""
 from mindspore._extends.graph_kernel.model.model import DataFormat as DF
 from ._utils import Expander, ExpanderInfoValidator as VLD
 from .expand_dims import ExpandDims

@VLD.add_format(DF.NHWC, DF.NHWC, DF.DEFAULT, DF.DEFAULT, DF.DEFAULT, DF.DEFAULT)
@VLD.add_format(DF.NCHW, DF.NCHW, DF.DEFAULT, DF.DEFAULT, DF.DEFAULT, DF.DEFAULT)
@@ -32,7 +33,7 @@ class BatchNormGrad(Expander):

        reduce_axis = ()
        shape_x = input_x.shape
        if input_x.data_format == "NHWC":
        if input_x.data_format == DF.NHWC:
            reduce_axis = (0, 1, 2)
            num = shape_x[0] * shape_x[1] * shape_x[2]
        else:
@@ -44,28 +45,28 @@ class BatchNormGrad(Expander):
        if input_dy.dtype == 'float16':
            input_dy = graph_builder.emit('Cast', [input_dy], attrs={'dst_type': 'float32'})
        num_rec = -1.0 / num
        num_rec_v = graph_builder.value(input_scale.dtype, num_rec, input_scale.data_format)
        num_rec_v = graph_builder.value(input_scale.dtype, num_rec)
        dbeta = graph_builder.emit('ReduceSum', [input_dy], attrs={'reduce_axis': reduce_axis, 'keep_dims': False})

        # in training input_save_inv_variance means 1 / sqrt(variance + epsilon), which is calculated in forward pass
        if self.attrs['is_training']:
            inv_variance = input_save_inv_variance
        else:
            epsilon_v = graph_builder.value(input_scale.dtype, self.attrs['epsilon'], input_scale.data_format)
            epsilon_v = graph_builder.value(input_scale.dtype, self.attrs['epsilon'])
            var_add = graph_builder.emit('Add', [input_save_inv_variance, epsilon_v])
            sqrt_var_eps = graph_builder.emit('Sqrt', [var_add])
            scalar_one = 1.0
            scalar_one_v = graph_builder.value(input_scale.dtype, scalar_one, input_scale.data_format)
            scalar_one_v = graph_builder.value(input_scale.dtype, scalar_one)
            inv_variance = graph_builder.emit('RealDiv', [scalar_one_v, sqrt_var_eps])

        # compute dgamma
        if not input_x.data_format == "NHWC":
            input_save_mean = graph_builder.emit('ExpandDims', [input_save_mean], attrs={'axis': 1})
            input_save_mean = graph_builder.emit('ExpandDims', [input_save_mean], attrs={'axis': 2})
            inv_variance = graph_builder.emit('ExpandDims', [inv_variance], attrs={'axis': 1})
            inv_variance = graph_builder.emit('ExpandDims', [inv_variance], attrs={'axis': 2})
            input_scale = graph_builder.emit('ExpandDims', [input_scale], attrs={'axis': 1})
            input_scale = graph_builder.emit('ExpandDims', [input_scale], attrs={'axis': 2})
        if input_x.data_format in (DF.DEFAULT, DF.NCHW):
            input_save_mean = graph_builder.emit(
                'Reshape', [input_save_mean], attrs={'shape': ExpandDims.infer_shape(input_save_mean.shape, [-1, -1])})
            inv_variance = graph_builder.emit(
                'Reshape', [inv_variance], attrs={'shape': ExpandDims.infer_shape(inv_variance.shape, [-1, -1])})
            input_scale = graph_builder.emit(
                'Reshape', [input_scale], attrs={'shape': ExpandDims.infer_shape(input_scale.shape, [-1, -1])})
        x_sub_mean = graph_builder.emit('Sub', [input_x, input_save_mean])
        x_div = graph_builder.emit('Mul', [x_sub_mean, inv_variance])
        dgamma_param = graph_builder.emit('Mul', [input_dy, x_div])
@@ -75,11 +76,11 @@ class BatchNormGrad(Expander):
        # compute dx
        if self.attrs['is_training']:
            tmp_b = graph_builder.emit('Mul', [num_rec_v, dbeta])
            if not input_x.data_format == "NHWC":
                dgamma_expand = graph_builder.emit('ExpandDims', [dgamma], attrs={'axis': 1})
                dgamma_expand = graph_builder.emit('ExpandDims', [dgamma_expand], attrs={'axis': 2})
                tmp_b = graph_builder.emit('ExpandDims', [tmp_b], attrs={'axis': 1})
                tmp_b = graph_builder.emit('ExpandDims', [tmp_b], attrs={'axis': 2})
            if input_x.data_format in (DF.DEFAULT, DF.NCHW):
                dgamma_expand = graph_builder.emit(
                    'Reshape', [dgamma], attrs={'shape': ExpandDims.infer_shape(dgamma.shape, [-1, -1])})
                tmp_b = graph_builder.emit(
                    'Reshape', [tmp_b], attrs={'shape': ExpandDims.infer_shape(tmp_b.shape, [-1, -1])})
            else:
                dgamma_expand = dgamma
            x_sub_mean_dgamma_mul = graph_builder.emit('Mul', [x_div, dgamma_expand])
--- a/mindspore/_extends/graph_kernel/expanders/bias_add.py
+++ b/mindspore/_extends/graph_kernel/expanders/bias_add.py
@@ -15,6 +15,7 @@
 """generate json desc for bias_add"""
 from mindspore._extends.graph_kernel.model.model import DataFormat as DF
 from ._utils import Expander, ExpanderInfoValidator as VLD
 from .expand_dims import ExpandDims


@VLD.add_format(DF.DEFAULT, DF.DEFAULT)
@@ -27,18 +28,19 @@ class BiasAdd(Expander):
        input_x, input_y = self.inputs

        if input_x.data_format == DF.NCHW:
            input_y_expand = graph_builder.emit('ExpandDims', [input_y], attrs={'axis': 1})
            input_y_expand = graph_builder.emit('ExpandDims', [input_y_expand], attrs={'axis': 2})
            input_y_expand = graph_builder.emit(
                'Reshape', [input_y], attrs={'shape': ExpandDims.infer_shape(input_y.shape, [1, 2])})
            result = graph_builder.emit('Add', [input_x, input_y_expand])
        elif input_x.data_format == DF.DEFAULT:
            if len(input_x.shape) == 2:
                result = graph_builder.emit('Add', [input_x, input_y])
            elif len(input_x.shape) == 3:
                input_y_expand = graph_builder.emit('ExpandDims', [input_y], attrs={'axis': 1})
                input_y_expand = graph_builder.emit(
                    'Reshape', [input_y], attrs={'shape': ExpandDims.infer_shape(input_y.shape, 1)})
                result = graph_builder.emit('Add', [input_x, input_y_expand])
            else:  # len == 4
                input_y_expand = graph_builder.emit('ExpandDims', [input_y], attrs={'axis': 1})
                input_y_expand = graph_builder.emit('ExpandDims', [input_y_expand], attrs={'axis': 2})
                input_y_expand = graph_builder.emit(
                    'Reshape', [input_y], attrs={'shape': ExpandDims.infer_shape(input_y.shape, [1, 2])})
                result = graph_builder.emit('Add', [input_x, input_y_expand])
        else:  # NHWC
            result = graph_builder.emit('Add', [input_x, input_y])
--- a/mindspore/_extends/graph_kernel/expanders/expand_dims.py
+++ b/mindspore/_extends/graph_kernel/expanders/expand_dims.py
@@ -0,0 +1,48 @@
 # Copyright 2021 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 # http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ===========================================================================
 """generate json desc for expand_dims"""
 from ._utils import Expander, ExpanderInfoValidator as VLD


@VLD.check_attrs('axis')
 class ExpandDims(Expander):
    """ExpandDims expander"""

    def _expand(self, graph_builder):
        input_x = self.inputs[0]
        shape = self.infer_shape(input_x.shape, self.attrs['axis'])
        result = graph_builder.emit('Reshape', [input_x], attrs={'shape': shape})

        return result

    @staticmethod
    def infer_shape(shape, axis):
        """infer shape for expand_dims"""
        def insert_axis(shape, axis):
            if not isinstance(axis, int) or axis > len(shape) or axis < -len(shape) - 1:
                raise ValueError("invalid dim for ExpandDims")
            if axis >= 0:
                shape.insert(axis, 1)
            else:
                shape.insert(axis + len(shape) + 1, 1)
            return shape
        out_shape = shape[:]
        if isinstance(axis, int):
            return insert_axis(out_shape, axis)
        if isinstance(axis, (list, tuple)):
            for i in axis:
                out_shape = insert_axis(out_shape, i)
            return out_shape
        raise ValueError("invalid dim for ExpandDims")
--- a/mindspore/_extends/graph_kernel/expanders/squeeze.py
+++ b/mindspore/_extends/graph_kernel/expanders/squeeze.py
@@ -0,0 +1,49 @@
 # Copyright 2021 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 # http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ===========================================================================
 """generate json desc for squeeze"""
 from ._utils import Expander, ExpanderInfoValidator as VLD


@VLD.check_attrs('axis')
 class Squeeze(Expander):
    """Squeeze expander"""

    def _expand(self, graph_builder):
        input_x = self.inputs[0]
        out_shape = self.infer_shape(input_x.shape, self.attrs['axis'])
        result = graph_builder.emit('Reshape', [input_x], attrs={'shape': out_shape})

        return result

    @staticmethod
    def infer_shape(shape, axis):
        """infer shape for squeeze"""
        def squeeze_axis(shape, axis):
            if not axis:
                out_shape = [d for d in shape if d != 1]
            else:
                out_shape = []
                for idx, dim in enumerate(shape):
                    if idx not in axis:
                        out_shape.append(dim)
            if not out_shape:
                out_shape = [1]
            return out_shape
        if isinstance(shape, (list, tuple)):
            if isinstance(axis, int):
                axis = [axis]
            if isinstance(axis, (list, tuple)):
                return squeeze_axis(shape, axis)
        raise ValueError("Invalid axis for Squeeze.")
--- a/mindspore/_extends/graph_kernel/model/model.py
+++ b/mindspore/_extends/graph_kernel/model/model.py
@@ -176,7 +176,6 @@ class PrimLib:
        'ReduceMin': Prim(REDUCE),
        'Assign': Prim(ELEMWISE),
        'Tanh': Prim(ELEMWISE),
        'ExpandDims': Prim(RESHAPE),
        'InplaceAssign': Prim(ELEMWISE),
        '@ReduceInit': Prim(ELEMWISE),
        'Reshape': Prim(RESHAPE),
--- a/mindspore/_extends/graph_kernel/model/op_infer.py
+++ b/mindspore/_extends/graph_kernel/model/op_infer.py
@@ -174,13 +174,6 @@ class Reshape(_Reshape):
        return self.attrs["shape"]


 class ExpandDims(_Reshape):
    def _infer_shape(self):
        shape = list(self.inputs[0].shape)
        shape.insert(self.attrs["axis"], 1)
        return shape


 class Cast(_Elemwise):
    def _infer_type(self):
        return self.attrs["dst_type"]
--- a/mindspore/ccsrc/backend/optimizer/graph_kernel/graph_kernel_expander.cc
+++ b/mindspore/ccsrc/backend/optimizer/graph_kernel/graph_kernel_expander.cc
@@ -49,6 +49,7 @@ std::vector<PrimitivePtr> GetExpandOps() {
    prim::kPrimAssignAdd,
    prim::kPrimLayerNorm,
    prim::kPrimLayerNormGrad,
    prim::kPrimExpandDims,
 #if ENABLE_D
    prim::kPrimTile,
    prim::kPrimSqrtGrad,
@@ -76,6 +77,7 @@ std::vector<PrimitivePtr> GetExpandOps() {
    prim::kPrimSigmoidCrossEntropyWithLogits,
    prim::kPrimSigmoidCrossEntropyWithLogitsGrad,
    prim::kPrimSoftmaxCrossEntropyWithLogits,
    prim::kPrimSqueeze,
 #endif
  };
  const auto &flags = context::GraphKernelFlags::GetInstance();
--- a/mindspore/ccsrc/backend/optimizer/graph_kernel/graph_kernel_helper.cc
+++ b/mindspore/ccsrc/backend/optimizer/graph_kernel/graph_kernel_helper.cc
@@ -581,19 +581,20 @@ std::string ExtractGraphKernelName(const AnfNodePtrList &cnodes, const string &p
 std::vector<PrimitivePtr> GetFusibleOpList() {
 #if ENABLE_D
  std::vector<PrimitivePtr> fusible_basic_ops = {
    prim::kPrimAbs,        prim::kPrimRound,      prim::kPrimNeg,     prim::kPrimExp,     prim::kPrimAdd,
    prim::kPrimExpandDims, prim::kPrimMul,        prim::kPrimMinimum, prim::kPrimMaximum, prim::kPrimLog,
    prim::kPrimPow,        prim::kPrimSub,        prim::kPrimRsqrt,   prim::kPrimSqrt,    prim::kPrimAddN,
    prim::kPrimEqual,      prim::kPrimReciprocal, prim::kPrimTanh,    prim::kPrimReshape, prim::kPrimTranspose,
    prim::kPrimCast,       prim::kPrimRealDiv,    prim::kPrimMatMul,  prim::kPrimAssign,  prim::kPrimReduceSum};
    prim::kPrimAbs,     prim::kPrimRound,      prim::kPrimNeg,     prim::kPrimExp,      prim::kPrimAdd,
    prim::kPrimCast,    prim::kPrimMul,        prim::kPrimMinimum, prim::kPrimMaximum,  prim::kPrimLog,
    prim::kPrimPow,     prim::kPrimSub,        prim::kPrimRsqrt,   prim::kPrimSqrt,     prim::kPrimAddN,
    prim::kPrimEqual,   prim::kPrimReciprocal, prim::kPrimTanh,    prim::kPrimReshape,  prim::kPrimTranspose,
    prim::kPrimRealDiv, prim::kPrimMatMul,     prim::kPrimAssign,  prim::kPrimReduceSum};
 #elif ENABLE_GPU
  std::vector<PrimitivePtr> fusible_basic_ops = {
    prim::kPrimAbs,     prim::kPrimRound,      prim::kPrimNeg,       prim::kPrimExp,       prim::kPrimAdd,
    prim::kPrimRealDiv, prim::kPrimMul,        prim::kPrimMinimum,   prim::kPrimMaximum,   prim::kPrimLog,
    prim::kPrimPow,     prim::kPrimSub,        prim::kPrimRsqrt,     prim::kPrimSqrt,      prim::kPrimAddN,
    prim::kPrimEqual,   prim::kPrimReciprocal, prim::KPrimTransData, prim::kPrimSelect,    prim::kPrimGreater,
    prim::kPrimCast,    prim::kPrimReduceSum,  prim::kPrimTanh,      prim::kPrimReshape,   prim::kPrimTranspose,
    prim::kPrimAssign,  prim::kPrimExpandDims, prim::kPrimLess,      prim::kPrimLessEqual, prim::kPrimGreaterEqual};
    prim::kPrimAbs,     prim::kPrimRound,      prim::kPrimNeg,          prim::kPrimExp,       prim::kPrimAdd,
    prim::kPrimRealDiv, prim::kPrimMul,        prim::kPrimMinimum,      prim::kPrimMaximum,   prim::kPrimLog,
    prim::kPrimPow,     prim::kPrimSub,        prim::kPrimRsqrt,        prim::kPrimSqrt,      prim::kPrimAddN,
    prim::kPrimEqual,   prim::kPrimReciprocal, prim::KPrimTransData,    prim::kPrimSelect,    prim::kPrimGreater,
    prim::kPrimCast,    prim::kPrimReduceSum,  prim::kPrimTanh,         prim::kPrimReshape,   prim::kPrimTranspose,
    prim::kPrimAssign,  prim::kPrimLessEqual,  prim::kPrimGreaterEqual, prim::kPrimReduceMax, prim::kPrimReduceMin,
    prim::kPrimLess};
 #else
  std::vector<PrimitivePtr> fusible_basic_ops;
 #endif
--- a/tests/st/ops/graph_kernel/test_batchnorm.py
+++ b/tests/st/ops/graph_kernel/test_batchnorm.py
@@ -34,8 +34,7 @@ class Net(nn.Cell):


 def get_output(x, weight, bias, moving_mean, moving_var, is_training, enable_graph_kernel=False):
    if enable_graph_kernel:
        context.set_context(enable_graph_kernel=True)
    context.set_context(enable_graph_kernel=enable_graph_kernel)
    net = Net(Tensor(weight), Tensor(bias), Tensor(moving_mean), Tensor(moving_var), is_training)
    output = net(Tensor(x))
    return output, net.mean, net.variance
--- a/tests/st/ops/graph_kernel/test_batchnorm_grad.py
+++ b/tests/st/ops/graph_kernel/test_batchnorm_grad.py
@@ -32,8 +32,7 @@ class Net(nn.Cell):

 def get_output(input_dy, input_x, input_scale, input_save_mean, input_save_inv_variance, input_reverse,
               is_training, enable_graph_kernel=False):
    if enable_graph_kernel:
        context.set_context(enable_graph_kernel=True)
    context.set_context(enable_graph_kernel=enable_graph_kernel)
    net = Net(is_training)
    output = net(input_dy, input_x, input_scale, input_save_mean, input_save_inv_variance, input_reverse)
    return output
--- a/tests/st/ops/graph_kernel/test_clip_by_norm_no_div_sum.py
+++ b/tests/st/ops/graph_kernel/test_clip_by_norm_no_div_sum.py
@@ -37,8 +37,7 @@ class ClipByNormNoDivSum(nn.Cell):


 def get_output(x0, x1, x2, x3, enable_graph_kernel=False):
    if enable_graph_kernel:
        context.set_context(enable_graph_kernel=True)
    context.set_context(enable_graph_kernel=enable_graph_kernel)
    net = ClipByNormNoDivSum()
    output = net(x0, x1, x2, x3)
    return output
--- a/tests/st/ops/graph_kernel/test_expand_dims.py
+++ b/tests/st/ops/graph_kernel/test_expand_dims.py
@@ -0,0 +1,51 @@
 # Copyright 2021 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 # http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ============================================================================
 import numpy as np
 import pytest
 import mindspore.context as context
 import mindspore.nn as nn
 from mindspore import Tensor
 from mindspore.ops import operations as P


 class Net(nn.Cell):
    def __init__(self):
        super(Net, self).__init__()
        self.expand_dims = P.ExpandDims()

    def construct(self, x, dim):
        return self.expand_dims(x, dim)


 def get_output(x, axis, enable_graph_kernel=False):
    context.set_context(enable_graph_kernel=enable_graph_kernel)
    net = Net()
    output = net(x, axis)
    return output


 def test_expand_dims(shape, dtype, axis):
    x = Tensor(np.random.normal(0, 10, shape).astype(dtype))
    expect = get_output(x, axis, False)
    output = get_output(x, axis, True)
    assert np.allclose(expect.asnumpy(), output.asnumpy(), 0.0001, 0.0001)


@pytest.mark.level0
@pytest.mark.platform_x86_gpu_training
@pytest.mark.env_onecard
 def test_expand_dims_gpu():
    context.set_context(mode=context.GRAPH_MODE, device_target="GPU")
    test_expand_dims((2, 3), np.float16, 2)
--- a/tests/st/ops/graph_kernel/test_fuse.py
+++ b/tests/st/ops/graph_kernel/test_fuse.py
@@ -41,8 +41,7 @@ class Net(Cell):


 def get_output(i0, i1, i2, enable_graph_kernel=False):
    if enable_graph_kernel:
        context.set_context(enable_graph_kernel=True)
    context.set_context(enable_graph_kernel=enable_graph_kernel)
    net = Net()
    output = net(i0, i1, i2)
    return output
--- a/tests/st/ops/graph_kernel/test_matmul.py
+++ b/tests/st/ops/graph_kernel/test_matmul.py
@@ -39,15 +39,13 @@ class Net1(Cell):
        return self.add(res, bias)

 def get_output(i0, i1, enable_graph_kernel=False):
    if enable_graph_kernel:
        context.set_context(enable_graph_kernel=True, save_graphs=False)
    context.set_context(enable_graph_kernel=enable_graph_kernel)
    net = Net()
    output = net(i0, i1)
    return output

 def get_output1(i0, i1, i2, enable_graph_kernel=False):
    if enable_graph_kernel:
        context.set_context(enable_graph_kernel=True, save_graphs=False)
    context.set_context(enable_graph_kernel=enable_graph_kernel)
    net = Net1()
    output = net(i0, i1, i2)
    return output
--- a/tests/st/ops/graph_kernel/test_reduce_max.py
+++ b/tests/st/ops/graph_kernel/test_reduce_max.py
@@ -0,0 +1,68 @@
 # Copyright 2021 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 # http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ============================================================================

 import numpy as np
 import pytest
 import mindspore.context as context
 import mindspore.nn as nn
 from mindspore import Tensor
 from mindspore.ops import operations as P


 class ReduceMax(nn.Cell):
    def __init__(self, keep_dims):
        super(ReduceMax, self).__init__()
        self.reduce_max = P.ReduceMax(keep_dims)

    def construct(self, x, axis):
        return self.reduce_max(x, axis)


 def get_output(x, axis, keep_dims, enable_graph_kernel=False):
    context.set_context(enable_graph_kernel=enable_graph_kernel)
    net = ReduceMax(keep_dims)
    output = net(x, axis)
    return output


 def test_reduce_max():
    x0 = Tensor(np.random.normal(0, 1, [2, 3, 4, 4]).astype(np.float32))
    axis0 = 3
    keep_dims0 = True
    expect = get_output(x0, axis0, keep_dims0, False)
    output = get_output(x0, axis0, keep_dims0, True)
    assert np.allclose(expect.asnumpy(), output.asnumpy(), 0.0001, 0.0001)

    x1 = Tensor(np.random.normal(0, 1, [2, 3, 4, 4]).astype(np.float32))
    axis1 = 3
    keep_dims1 = False
    expect = get_output(x1, axis1, keep_dims1, False)
    output = get_output(x1, axis1, keep_dims1, True)
    assert np.allclose(expect.asnumpy(), output.asnumpy(), 0.0001, 0.0001)

    x2 = Tensor(np.random.normal(0, 1, [2, 3, 1, 4]).astype(np.float32))
    axis2 = 2
    keep_dims2 = True
    expect = get_output(x2, axis2, keep_dims2, False)
    output = get_output(x2, axis2, keep_dims2, True)
    assert np.allclose(expect.asnumpy(), output.asnumpy(), 0.0001, 0.0001)


@pytest.mark.level0
@pytest.mark.platform_x86_gpu_training
@pytest.mark.env_onecard
 def test_reduce_max_gpu():
    context.set_context(mode=context.GRAPH_MODE, device_target="GPU")
    test_reduce_max()
--- a/tests/st/ops/graph_kernel/test_reduce_min.py
+++ b/tests/st/ops/graph_kernel/test_reduce_min.py
@@ -0,0 +1,68 @@
 # Copyright 2021 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 # http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ============================================================================

 import numpy as np
 import pytest
 import mindspore.context as context
 import mindspore.nn as nn
 from mindspore import Tensor
 from mindspore.ops import operations as P


 class ReduceMin(nn.Cell):
    def __init__(self, keep_dims):
        super(ReduceMin, self).__init__()
        self.reduce_min = P.ReduceMin(keep_dims)

    def construct(self, x, axis):
        return self.reduce_min(x, axis)


 def get_output(x, axis, keep_dims, enable_graph_kernel=False):
    context.set_context(enable_graph_kernel=enable_graph_kernel)
    net = ReduceMin(keep_dims)
    output = net(x, axis)
    return output


 def test_reduce_min():
    x0 = Tensor(np.random.normal(0, 1, [2, 3, 4, 4]).astype(np.float32))
    axis0 = 3
    keep_dims0 = True
    expect = get_output(x0, axis0, keep_dims0, False)
    output = get_output(x0, axis0, keep_dims0, True)
    assert np.allclose(expect.asnumpy(), output.asnumpy(), 0.0001, 0.0001)

    x1 = Tensor(np.random.normal(0, 1, [2, 3, 4, 4]).astype(np.float32))
    axis1 = 3
    keep_dims1 = False
    expect = get_output(x1, axis1, keep_dims1, False)
    output = get_output(x1, axis1, keep_dims1, True)
    assert np.allclose(expect.asnumpy(), output.asnumpy(), 0.0001, 0.0001)

    x2 = Tensor(np.random.normal(0, 1, [2, 3, 1, 4]).astype(np.float32))
    axis2 = 2
    keep_dims2 = True
    expect = get_output(x2, axis2, keep_dims2, False)
    output = get_output(x2, axis2, keep_dims2, True)
    assert np.allclose(expect.asnumpy(), output.asnumpy(), 0.0001, 0.0001)


@pytest.mark.level0
@pytest.mark.platform_x86_gpu_training
@pytest.mark.env_onecard
 def test_reduce_min_gpu():
    context.set_context(mode=context.GRAPH_MODE, device_target="GPU")
    test_reduce_min()
--- a/tests/st/ops/graph_kernel/test_relu.py
+++ b/tests/st/ops/graph_kernel/test_relu.py
@@ -30,8 +30,7 @@ class Net(nn.Cell):


 def get_output(x, enable_graph_kernel=False):
    if enable_graph_kernel:
        context.set_context(enable_graph_kernel=True)
    context.set_context(enable_graph_kernel=enable_graph_kernel)
    net = Net()
    output = net(x)
    return output
--- a/tests/st/ops/graph_kernel/test_relu_grad.py
+++ b/tests/st/ops/graph_kernel/test_relu_grad.py
@@ -30,8 +30,7 @@ class Net(nn.Cell):


 def get_output(y_backprop, x, enable_graph_kernel=False):
    if enable_graph_kernel:
        context.set_context(enable_graph_kernel=True)
    context.set_context(enable_graph_kernel=enable_graph_kernel)
    net = Net()
    output = net(y_backprop, x)
    return output
--- a/tests/st/ops/graph_kernel/test_sqrt_grad.py
+++ b/tests/st/ops/graph_kernel/test_sqrt_grad.py
@@ -29,8 +29,7 @@ class Net(nn.Cell):


 def get_output(x, dout, enable_graph_kernel=False):
    if enable_graph_kernel:
        context.set_context(enable_graph_kernel=True)
    context.set_context(enable_graph_kernel=enable_graph_kernel)
    net = Net()
    output = net(x, dout)
    return output
--- a/tests/st/ops/graph_kernel/test_squeeze.py
+++ b/tests/st/ops/graph_kernel/test_squeeze.py
@@ -0,0 +1,52 @@
 # Copyright 2021 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 # http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ============================================================================
 import numpy as np
 import pytest
 import mindspore.context as context
 import mindspore.nn as nn
 from mindspore import Tensor
 from mindspore.ops import operations as P


 class Net(nn.Cell):
    def __init__(self, axis):
        super(Net, self).__init__()
        self.squeeze = P.Squeeze(axis)

    def construct(self, x):
        return self.squeeze(x)


 def get_output(x, axis=(), enable_graph_kernel=False):
    context.set_context(enable_graph_kernel=enable_graph_kernel)
    net = Net(axis)
    output = net(x)
    return output


 def test_squeeze(shape, dtype, axis=()):
    x = Tensor(np.random.normal(0, 10, shape).astype(dtype))
    expect = get_output(x, axis, False)
    output = get_output(x, axis, True)
    assert np.allclose(expect.asnumpy(), output.asnumpy(), 0.0001, 0.0001)


@pytest.mark.level0
@pytest.mark.platform_x86_gpu_training
@pytest.mark.env_onecard
 def test_squeeze_gpu():
    context.set_context(mode=context.GRAPH_MODE, device_target="GPU")
    test_squeeze((1, 16, 1, 1), np.int32)
    test_squeeze((1, 16, 1, 1), np.float32, (0, 2))
--- a/tests/st/ops/graph_kernel/test_tile.py
+++ b/tests/st/ops/graph_kernel/test_tile.py
@@ -30,8 +30,7 @@ class Net(nn.Cell):


 def get_output(x, multiples, enable_graph_kernel=False):
    if enable_graph_kernel:
        context.set_context(enable_graph_kernel=True)
    context.set_context(enable_graph_kernel=enable_graph_kernel)
    net = Net(multiples)
    output = net(x)
    return output