!19854 clearing the untrusted code of quantization

Merge pull request !19854 from Erpim/master
4 years ago · 5cde059ef3
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/quant/fake_learned_scale_quant_perchannel_grad_gpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/quant/fake_learned_scale_quant_perchannel_grad_gpu_kernel.cc
@@ -42,7 +42,6 @@ const std::vector<size_t> &FakeLearnedScaleQuantPerChannelGradGpuKernel::GetWork
 bool FakeLearnedScaleQuantPerChannelGradGpuKernel::Init(const CNodePtr &kernel_node) {
  kernel_node_ = kernel_node;
  size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);

  if (input_num != 4) {
    MS_LOG(EXCEPTION) << "Input number is " << input_num
                      << ", but FakeLearnedScaleQuantPerChannelGrad GpuKernel OP needs 4 input.";
@@ -109,7 +108,7 @@ bool FakeLearnedScaleQuantPerChannelGradGpuKernel::Launch(const std::vector<Addr

  if (global_step_ >= quant_delay_) {
    CHECK_CUDA_RET_WITH_ERROR(kernel_node_,
                              cudaMemcpyAsync(grad_alpha, alpha_no_grad, sizeof(float) * num_channels_,
                              cudaMemcpyAsync(grad_alpha, alpha_no_grad, sizeof(float) * kChannelLen,
                                              cudaMemcpyHostToDevice, reinterpret_cast<cudaStream_t>(stream_ptr)),
                              "Copy gpu memory failed");
    CalLSQNudgePerChannel(input, quant_num_, input_alpha, input_quant_max, input_div_alpha, input_quant, neg_trunc_,
@@ -118,7 +117,7 @@ bool FakeLearnedScaleQuantPerChannelGradGpuKernel::Launch(const std::vector<Addr
                                           neg_trunc_, num_channels_, reinterpret_cast<cudaStream_t>(stream_ptr));
  } else {
    CHECK_CUDA_RET_WITH_ERROR(kernel_node_,
                              cudaMemcpyAsync(grad_alpha, alpha_no_grad, sizeof(float) * num_channels_,
                              cudaMemcpyAsync(grad_alpha, alpha_no_grad, sizeof(float) * kChannelLen,
                                              cudaMemcpyHostToDevice, reinterpret_cast<cudaStream_t>(stream_ptr)),
                              "Copy gpu memory failed");
    CHECK_CUDA_RET_WITH_ERROR(kernel_node_,
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/quant/fake_learned_scale_quant_perlayer_grad_gpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/quant/fake_learned_scale_quant_perlayer_grad_gpu_kernel.cc
@@ -36,7 +36,6 @@ const std::vector<size_t> &FakeLearnedScaleQuantPerLayerGradGpuKernel::GetWorksp
 bool FakeLearnedScaleQuantPerLayerGradGpuKernel::Init(const CNodePtr &kernel_node) {
  kernel_node_ = kernel_node;
  size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);

  if (input_num != 4) {
    MS_LOG(EXCEPTION) << "Input number is " << input_num
                      << ", but FakeLearnedScaleQuantPerLayerGrad GpuKernel OP needs 4 input.";
--- a/mindspore/ccsrc/backend/optimizer/ascend/mindir/fake_learned_scale_quant_grad_unify_mindir.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/mindir/fake_learned_scale_quant_grad_unify_mindir.cc
@@ -29,7 +29,7 @@ namespace mindspore {
 namespace opt {
 namespace {
 void CreateOutputsOfLSQPerLayerGradD(const FuncGraphPtr &graph, const CNodePtr &lsq_perlayer_grad_node,
                                     std::vector<AnfNodePtr> *lsq_perlayer_grad_d_outputs) {
                                     std::vector<AnfNodePtr> *const lsq_perlayer_grad_d_outputs) {
  MS_EXCEPTION_IF_NULL(graph);
  MS_EXCEPTION_IF_NULL(lsq_perlayer_grad_node);
  const auto &lsq_perlayer_grad_inputs = lsq_perlayer_grad_node->inputs();
@@ -58,7 +58,7 @@ void CreateOutputsOfLSQPerLayerGradD(const FuncGraphPtr &graph, const CNodePtr &

 void CreateOutputsOfLSQPerLayerReduceGrad(const FuncGraphPtr &graph, const CNodePtr &lsq_perlayer_grad_node,
                                          const std::vector<AnfNodePtr> &lsq_perlayer_grad_d_outputs,
                                          std::vector<AnfNodePtr> *lsq_perlayer_reduce_grad_outputs) {
                                          std::vector<AnfNodePtr> *const lsq_perlayer_reduce_grad_outputs) {
  MS_EXCEPTION_IF_NULL(graph);
  MS_EXCEPTION_IF_NULL(lsq_perlayer_grad_node);
  MS_EXCEPTION_IF_NULL(lsq_perlayer_reduce_grad_outputs);
@@ -86,7 +86,7 @@ void CreateOutputsOfLSQPerLayerReduceGrad(const FuncGraphPtr &graph, const CNode
 }

 void CreateOutputsOfLSQPerChannelGradD(const FuncGraphPtr &graph, const CNodePtr &lsq_perchannel_grad_node,
                                       std::vector<AnfNodePtr> *lsq_perchannel_grad_d_outputs) {
                                       std::vector<AnfNodePtr> *const lsq_perchannel_grad_d_outputs) {
  MS_EXCEPTION_IF_NULL(graph);
  MS_EXCEPTION_IF_NULL(lsq_perchannel_grad_node);
  const auto &lsq_perchannel_grad_inputs = lsq_perchannel_grad_node->inputs();
@@ -116,7 +116,7 @@ void CreateOutputsOfLSQPerChannelGradD(const FuncGraphPtr &graph, const CNodePtr

 void CreateOutputsOfLSQPerChannelReduceGrad(const FuncGraphPtr &graph, const CNodePtr &lsq_perchannel_grad_node,
                                            const std::vector<AnfNodePtr> &lsq_perchannel_grad_d_outputs,
                                            std::vector<AnfNodePtr> *lsq_perchannel_reduce_grad_outputs) {
                                            std::vector<AnfNodePtr> *const lsq_perchannel_reduce_grad_outputs) {
  MS_EXCEPTION_IF_NULL(graph);
  MS_EXCEPTION_IF_NULL(lsq_perchannel_grad_node);
  MS_EXCEPTION_IF_NULL(lsq_perchannel_reduce_grad_outputs);
--- a/mindspore/compression/quant/qat.py
+++ b/mindspore/compression/quant/qat.py
@@ -30,7 +30,7 @@ from ...nn.layer import quant
 from ...ops import functional as F
 from ..common import QuantDtype
 from .quantizer import Quantizer, OptimizeOption
 from .quant_utils import compute_KL_threshold
 from .quant_utils import compute_kl_threshold


 __all__ = ["QuantizationAwareTraining", "create_quant_config"]
@@ -281,7 +281,8 @@ class QuantizationAwareTraining(Quantizer):
                                                mode=self.mode)
        self.eps = 1e-5

    def _convert_op_name(self, name):
    @staticmethod
    def _convert_op_name(name):
        pattern = re.compile(r'([A-Z]{1})')
        name_new = re.sub(pattern, r'_\1', name).lower()
        if name_new[0] == '_':
@@ -382,7 +383,7 @@ class QuantizationAwareTraining(Quantizer):
                scale_factor = (subcell.batchnorm.gamma.data.asnumpy() /
                                np.sqrt(subcell.batchnorm.moving_variance.data.asnumpy() + self.eps))
                subcell_weight_para = subcell_weight_para * scale_factor.reshape(-1, 1, 1, 1)
            min_init, max_init = self._KL_init(subcell_weight_para, self.weight_dtype)
            min_init, max_init = self._kl_init(subcell_weight_para, self.weight_dtype)
        self.quant_config = self.quant_config._replace(
            weight=self.quant_config.weight.partial_init(min_init=min_init, max_init=max_init))

@@ -485,7 +486,7 @@ class QuantizationAwareTraining(Quantizer):
                scale_factor = (subcell.batchnorm.gamma.data.asnumpy() /
                                np.sqrt(subcell.batchnorm.moving_variance.data.asnumpy() + self.eps))
                subcell_weight_para = subcell_weight_para * scale_factor.reshape(-1, 1, 1, 1)
            min_init, max_init = self._KL_init(subcell_weight_para, self.weight_dtype)
            min_init, max_init = self._kl_init(subcell_weight_para, self.weight_dtype)
        self.quant_config = self.quant_config._replace(
            weight=self.quant_config.weight.partial_init(min_init=min_init, max_init=max_init))

@@ -533,16 +534,16 @@ class QuantizationAwareTraining(Quantizer):
                                  quant_dtype=self.act_dtype)
        raise ValueError("Unsupported activation in auto quant: ", act_class)

    def _KL_init(self, subcell_weight_para, weight_dtype):
    def _kl_init(self, subcell_weight_para, weight_dtype):
        """
        Calculate the value of max_init and min_init with compute_KL_threshold.
        Calculate the value of max_init and min_init with compute_kl_threshold.
        """
        if self.weight_channel:
            max_init = [compute_KL_threshold(weight_para_each, weight_dtype)
            max_init = [compute_kl_threshold(weight_para_each, weight_dtype)
                        for weight_para_each in subcell_weight_para]
            min_init = [-x for x in max_init]
        else:
            max_init = [compute_KL_threshold(subcell_weight_para, weight_dtype)]
            max_init = [compute_kl_threshold(subcell_weight_para, weight_dtype)]
            min_init = [-x for x in max_init]
        return min_init, max_init

@@ -567,15 +568,17 @@ class QuantizationAwareTraining(Quantizer):
            raise ValueError("The `_set_mixed_bits` function is currently only valid for `LEARNED_SCALE` "
                             "optimize_option.")

        self.quantizable_idx = []
        quantizable_idx = []
        pass_cell = None
        for i, cell_and_name in enumerate(network.cells_and_names()):
            cell = cell_and_name[1]
            if isinstance(cell, (nn.Conv2dBnAct, nn.DenseBnAct)) and cell is not pass_cell:
                self.quantizable_idx.append(i)
                quantizable_idx.append(i)

        assert len(self.quantizable_idx) == len(strategy)
        quantizable_layer_bit_dict = {idx: bit for idx, bit in zip(self.quantizable_idx, strategy)}
        if len(quantizable_idx) != len(strategy):
            raise ValueError("The dimension of quantifiable layers is not consistent with that of strategy.")

        quantizable_layer_bit_dict = {idx: bit for idx, bit in zip(quantizable_idx, strategy)}
        type_map = {
            QuantDtype.INT2.num_bits: QuantDtype.INT2,
            QuantDtype.INT3.num_bits: QuantDtype.INT3,
@@ -587,7 +590,7 @@ class QuantizationAwareTraining(Quantizer):
        }
        for i, cell_and_name in enumerate(network.cells_and_names()):
            cell = cell_and_name[1]
            if i not in self.quantizable_idx:
            if i not in quantizable_idx:
                continue
            else:
                if isinstance(cell, (nn.Conv2dBnAct, nn.DenseBnAct)):
@@ -598,7 +601,7 @@ class QuantizationAwareTraining(Quantizer):
                            scale_factor = (cell.conv.gamma.data.asnumpy() /
                                            np.sqrt(cell.conv.moving_variance.data.asnumpy() + self.eps))
                            subcell_weight_para = subcell_weight_para * scale_factor.reshape(-1, 1, 1, 1)
                        min_init, max_init = self._KL_init(subcell_weight_para, cell.weight_dtype)
                        min_init, max_init = self._kl_init(subcell_weight_para, cell.weight_dtype)
                        cell.conv.fake_quant_weight.reset(quant_dtype=cell.weight_dtype,
                                                          min_init=min_init,
                                                          max_init=max_init)
@@ -608,7 +611,7 @@ class QuantizationAwareTraining(Quantizer):
                            scale_factor = (cell.dense.gamma.data.asnumpy() /
                                            np.sqrt(cell.dense.moving_variance.data.asnumpy() + self.eps))
                            subcell_weight_para = subcell_weight_para * scale_factor.reshape(-1, 1, 1, 1)
                        min_init, max_init = self._KL_init(subcell_weight_para, cell.weight_dtype)
                        min_init, max_init = self._kl_init(subcell_weight_para, cell.weight_dtype)
                        cell.dense.fake_quant_weight.reset(quant_dtype=cell.weight_dtype,
                                                           min_init=min_init,
                                                           max_init=max_init)
--- a/mindspore/compression/quant/quant_utils.py
+++ b/mindspore/compression/quant/quant_utils.py
@@ -222,7 +222,7 @@ def without_fold_batchnorm(weight, cell_quant):
    return weight, bias


 def compute_KL_threshold(data, bitwidth):
 def compute_kl_threshold(data, bitwidth):
    r"""
    Using KL-J Distance to calculate the clip threshold.

@@ -232,20 +232,18 @@ def compute_KL_threshold(data, bitwidth):
    Outputs:
        Tensor with Shape 1. Threshold to calculate the data.
    """
    bitwidth = bitwidth.num_bits
    data_min = 0
    data_max = np.abs(data).max()
    if data_max < 1e-5:
        return 1e-5
    hist, bin_edges = np.histogram(np.abs(data), bins='sqrt', range=(data_min, data_max), density=True)
    hist, bin_edges = np.histogram(np.abs(data), bins='sqrt', range=(0, data_max), density=True)
    # For the sake of high efficiency, we limit the maximum number of bins to 1024 in `sqrt` mode, If it exceeds the
    # largest size, turn to use the default bins config.
    largest_bin_size = 1024
    if hist.shape[0] > largest_bin_size:
        hist, bin_edges = np.histogram(np.abs(data), range=(data_min, data_max), density=True)
        hist, bin_edges = np.histogram(np.abs(data), range=(0, data_max), density=True)
    hist = hist / np.sum(hist)
    cumsum = np.cumsum(hist)
    bit_pow_range = pow(2, int(bitwidth) - 1)
    bit_pow_range = pow(2, int(bitwidth.num_bits) - 1)
    threshold = []
    scaling_factor = []
    kl = []
@@ -349,11 +347,11 @@ def load_nonquant_param_into_quant_net(quant_model, params_dict, quant_new_param
                subcell_weight_para = subcell_weight_para * scale_factor.reshape(-1, 1, 1, 1)

            if cell.fake_quant_weight.per_channel:
                max_init = [compute_KL_threshold(weight_para_each, cell.fake_quant_weight.quant_dtype)
                max_init = [compute_kl_threshold(weight_para_each, cell.fake_quant_weight.quant_dtype)
                            for weight_para_each in subcell_weight_para]
                min_init = [-x for x in max_init]
            else:
                max_init = [compute_KL_threshold(subcell_weight_para, cell.fake_quant_weight.quant_dtype)]
                max_init = [compute_kl_threshold(subcell_weight_para, cell.fake_quant_weight.quant_dtype)]
                min_init = [-x for x in max_init]

            cell.fake_quant_weight.reset(quant_dtype=cell.fake_quant_weight.quant_dtype,
--- a/mindspore/compression/quant/quantizer.py
+++ b/mindspore/compression/quant/quantizer.py
@@ -57,4 +57,8 @@ class Quantizer(ABC):

    @abstractmethod
    def quantize(self, network):
        pass
        """
        Quant API to convert input network to a quantization aware training network
        Args:
            network (Cell): network to be quantized.
        """
--- a/mindspore/nn/layer/quant.py
+++ b/mindspore/nn/layer/quant.py
@@ -1535,6 +1535,7 @@ class ActQuant(_QuantActivation):
                                                      quant_dtype=quant_dtype,
                                                      neg_trunc=self.neg_trunc,
                                                      narrow_range=self.narrow_range)

    def construct(self, x):
        if self.fake_before:
            x = self.fake_quant_act_before(x)
--- a/mindspore/ops/_grad/grad_quant_ops.py
+++ b/mindspore/ops/_grad/grad_quant_ops.py
@@ -125,7 +125,7 @@ def get_bprop_batchnorm_fold2(self):


@bprop_getters.register(Q.BatchNormFoldD)
 def get_bprop_BatchNormFold(self):
 def get_bprop_batchnormfold(self):
    """Generate bprop for BatchNormFold for Ascend"""
    op = Q.BatchNormFoldGradD(self.epsilon, self.is_training, self.freeze_bn)

@@ -137,7 +137,7 @@ def get_bprop_BatchNormFold(self):


@bprop_getters.register(P.BNTrainingReduce)
 def get_bprop_BNTrainingReduce(self):
 def get_bprop_bn_training_reduce(self):
    """Generate bprop for BNTrainingReduce for Ascend"""

    def bprop(x, out, dout):
--- a/mindspore/ops/_op_impl/_custom_op/batchnorm_fold.py
+++ b/mindspore/ops/_op_impl/_custom_op/batchnorm_fold.py
@@ -57,6 +57,43 @@ def _batchnorm_fold_tbe():
    return


 def _batchnorm_fold_compute(x_input, x_sum, x_square_sum, mean, variance, momentum, epsilon):
    """_batchnorm_fold_compute"""
    shape_x = te.lang.cce.util.shape_to_list(x_input.shape)
    num = shape_x[0] * shape_x[2] * shape_x[3]
    num_rec = 1.0 / num

    # compute the mean of x
    batch_mean = te.lang.cce.vmuls(x_sum, num_rec)

    # compute the variance of x
    variance_div = te.lang.cce.vmuls(x_square_sum, num_rec)
    mean_square = te.lang.cce.vmul(batch_mean, batch_mean)
    batch_var_biased = te.lang.cce.vsub(variance_div, mean_square)
    batch_std = te.lang.cce.vsqrt(te.lang.cce.vadds(batch_var_biased, epsilon))
    if num == 1:
        batch_var_scaler = 0.0
    else:
        batch_var_scaler = float(num) / (num - 1)
    batch_var_unbiased = te.lang.cce.vmuls(batch_var_biased, batch_var_scaler)

    factor = 1.0 - momentum
    factor_reverse = momentum
    mean_mul = te.lang.cce.vmuls(batch_mean, factor)
    mean_mul_rev = te.lang.cce.vmuls(mean, factor_reverse)
    mean_updated = te.lang.cce.vadd(mean_mul, mean_mul_rev)

    var_mul = te.lang.cce.vmuls(batch_var_unbiased, factor)
    var_mul_rev = te.lang.cce.vmuls(variance, factor_reverse)
    variance_updated = te.lang.cce.vadd(var_mul, var_mul_rev)

    y = te.lang.cce.vadds(x_input, 0.0)
    running_mean = te.lang.cce.vadds(mean, 0.0)
    running_std = te.lang.cce.vsqrt(te.lang.cce.vadds(variance, epsilon))
    res = [y, batch_mean, batch_std, running_mean, running_std, mean_updated, variance_updated]
    return res


@util.check_input_type(dict, dict, dict, dict, dict,
                       dict, dict, dict, dict, dict, dict, dict,
                       float, float, bool, int, str, str)
@@ -108,39 +145,7 @@ def batchnorm_fold(x, x_sum, x_square_sum, mean, variance,
    mean = tvm.placeholder(shape_mean, name="mean", dtype=dtype_mean.lower())
    variance = tvm.placeholder(shape_mean, name="variance", dtype=dtype_variance.lower())

    shape_x = te.lang.cce.util.shape_to_list(x_input.shape)
    num = shape_x[0] * shape_x[2] * shape_x[3]
    num_rec = 1.0 / num

    # compute the mean of x
    batch_mean = te.lang.cce.vmuls(x_sum, num_rec)

    # compute the variance of x
    variance_div = te.lang.cce.vmuls(x_square_sum, num_rec)
    mean_square = te.lang.cce.vmul(batch_mean, batch_mean)
    batch_var_biased = te.lang.cce.vsub(variance_div, mean_square)
    batch_std = te.lang.cce.vsqrt(te.lang.cce.vadds(batch_var_biased, epsilon))
    if num == 1:
        batch_var_scaler = 0.0
    else:
        batch_var_scaler = float(num) / (num - 1)
    batch_var_unbiased = te.lang.cce.vmuls(batch_var_biased, batch_var_scaler)

    factor = 1.0 - momentum
    factor_reverse = momentum
    mean_mul = te.lang.cce.vmuls(batch_mean, factor)
    mean_mul_rev = te.lang.cce.vmuls(mean, factor_reverse)
    mean_updated = te.lang.cce.vadd(mean_mul, mean_mul_rev)

    var_mul = te.lang.cce.vmuls(batch_var_unbiased, factor)
    var_mul_rev = te.lang.cce.vmuls(variance, factor_reverse)
    variance_updated = te.lang.cce.vadd(var_mul, var_mul_rev)

    y = te.lang.cce.vadds(x_input, 0.0)
    running_mean = te.lang.cce.vadds(mean, 0.0)
    running_std = te.lang.cce.vsqrt(te.lang.cce.vadds(variance, epsilon))
    res = [y, batch_mean, batch_std, running_mean, running_std, mean_updated, variance_updated]

    res = _batchnorm_fold_compute(x_input, x_sum, x_square_sum, mean, variance, momentum, epsilon)
    with tvm.target.cce():
        sch = generic.auto_schedule(res)
    config = {"name": kernel_name,
--- a/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py
+++ b/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py
@@ -21,6 +21,7 @@ from te.platform.cce_build import build_config
 from topi import generic
 from topi.cce import util
 from mindspore.ops.op_info_register import op_info_register, TBERegOp, DataType
 from impl.bn_training_reduce import bn_training_reduce_schedule_nd

 SHAPE_SIZE_LIMIT = 2147483648

@@ -81,9 +82,8 @@ def batchnorm_fold2_grad_reduce(dout, x, dout_reduce, dout_x_reduce, kernel_name
    util.check_kernel_name(kernel_name)
    util.check_shape_rule(shape)
    util.check_shape_size(shape, SHAPE_SIZE_LIMIT)
    check_list = ["float16", "float32"]
    inp_dtype = x.get("dtype").lower()
    if not inp_dtype in check_list:
    if not inp_dtype in ["float16", "float32"]:
        raise RuntimeError("Dtype of input only support float16, float32")
    dout_t = tvm.placeholder(shape, name="dout", dtype=inp_dtype)
    x_t = tvm.placeholder(shape, name="x", dtype=inp_dtype)
@@ -100,7 +100,7 @@ def batchnorm_fold2_grad_reduce(dout, x, dout_reduce, dout_x_reduce, kernel_name

        te.lang.cce.cce_build_code(sch, config)
        return
    from impl.bn_training_reduce import bn_training_reduce_schedule_nd

    sch, tensor_list = bn_training_reduce_schedule_nd(res_list)
    with build_config:
        tvm.build(sch, tensor_list, "cce", name=kernel_name)
--- a/mindspore/ops/_op_impl/_custom_op/batchnorm_fold_grad.py
+++ b/mindspore/ops/_op_impl/_custom_op/batchnorm_fold_grad.py
@@ -95,12 +95,12 @@ def batchnorm_fold_grad(d_batch_mean, d_batch_std, x, batch_mean, batch_std, dx,
    dtype_mean = d_batch_mean.get("dtype").lower()
    if format_data == "NC1HWC0":
        if len(shape_x) != 5:
            raise RuntimeError("batchnorm_fold only support shape 5D"
            raise RuntimeError("batchnorm_fold grad only support shape 5D"
                               "when input format is NC1HWC0")
        shape_mean = (1, shape_x[1], 1, 1, shape_x[4])
    elif format_data == "NCHW":
        if len(shape_x) < 2 or len(shape_x) > 4:
            raise RuntimeError("batchnorm_fold only support shape 2D to 4D")
            raise RuntimeError("batchnorm_fold grad only support shape 2D to 4D")
        if shape_x[1] != shape_mean[0]:
            raise RuntimeError("data_format is NCHW, shape_bias must"
                               "be equal to the second axis of shape_x")
--- a/mindspore/ops/_op_impl/_custom_op/correction_mul.py
+++ b/mindspore/ops/_op_impl/_custom_op/correction_mul.py
@@ -66,9 +66,8 @@ def correction_mul(x, batch_std, running_std, y, channel, kernel_name="correctio
    util.check_kernel_name(kernel_name)
    util.check_shape_rule(shape)
    util.check_shape_size(shape, SHAPE_SIZE_LIMIT)
    check_list = ["float16", "float32"]
    inp_dtype = x.get("dtype").lower()
    if not inp_dtype in check_list:
    if not inp_dtype in ["float16", "float32"]:
        raise RuntimeError("Dtype of input only support float16, float32")

    x_t = tvm.placeholder(shape, name="x", dtype=inp_dtype)
--- a/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py
+++ b/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py
@@ -71,10 +71,9 @@ def fake_learned_scale_quant_perchannel_compute(input_data, alpha_data, quant_ma
    return res


@util.check_input_type(dict, dict, dict, dict, bool, int, str)
 def fake_learned_scale_quant_perchannel(input_x, alpha, quant_max, out, neg_trunc, channel_axis,
                                        kernel_name="fake_learned_scale_quant_perchannel"):
    """FakeLearnedScaleQuantPerChannel"""
 def fake_learned_scale_quant_perchannel_param(input_x, alpha, quant_max, channel_axis,
                                              kernel_name="fake_learned_scale_quant_perchannel"):
    """Get and check FakeLearnedScaleQuantPerChannel parameters"""
    input_shape = input_x.get("shape")
    input_x_shape_ = input_x.get("ori_shape")
    input_x_format = input_x.get("format")
@@ -113,6 +112,16 @@ def fake_learned_scale_quant_perchannel(input_x, alpha, quant_max, out, neg_trun
    input_data = tvm.placeholder(input_shape, name="x", dtype=input_dtype)
    alpha_data = tvm.placeholder(shape_c, name="alpha_data", dtype=alpha_dtype)
    quant_max_data = tvm.placeholder(quant_max_shape, name="quant_max_data", dtype=quant_max_dtype)
    return input_data, alpha_data, quant_max_data


@util.check_input_type(dict, dict, dict, dict, bool, int, str)
 def fake_learned_scale_quant_perchannel(input_x, alpha, quant_max, out, neg_trunc, channel_axis,
                                        kernel_name="fake_learned_scale_quant_perchannel"):
    """FakeLearnedScaleQuantPerChannel"""
    input_data, alpha_data, quant_max_data = \
        fake_learned_scale_quant_perchannel_param(input_x, alpha, quant_max, channel_axis, kernel_name)

    res = fake_learned_scale_quant_perchannel_compute(input_data, alpha_data, quant_max_data, neg_trunc, kernel_name)

    with tvm.target.cce():
--- a/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py
+++ b/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py
@@ -58,6 +58,36 @@ def _fake_learned_scale_quant_perchannel_grad_d_tbe():
    return


 def _sign_function(dtype, input_div_alpha):
    """sign function imp"""
    if dtype == "float32":
        data_min = tvm.const(SCALAR_MIN_FP32, dtype=dtype)
        neg_data_min = tvm.const(NEG_SCALAR_MIN_FP32, dtype=dtype)
    elif dtype == "float16":
        data_min = tvm.const(SCALAR_MIN_FP16, dtype=dtype)
        neg_data_min = tvm.const(NEG_SCALAR_MIN_FP16, dtype=dtype)
    else:
        data_min = tvm.const(1, dtype=dtype)
        neg_data_min = tvm.const(-1, dtype=dtype)
    vmax = te.lang.cce.vmaxs(input_div_alpha, neg_data_min)
    vmin = te.lang.cce.vmins(vmax, data_min)
    if dtype == "float32":
        # max num of float32 is 2**126
        max_support_fp32 = tvm.const(2 ** 62, dtype=dtype)
        res_mul1 = te.lang.cce.vmuls(vmin, max_support_fp32)
        res_mul2 = te.lang.cce.vmuls(res_mul1, max_support_fp32)
        sign = te.lang.cce.vmuls(res_mul2, tvm.const(2 ** 2, dtype=dtype))
    elif dtype == "float16":
        # max num of float16 is 2**24
        # but cce can only support 2**12, so use 12/12 to adaptor 24
        max_support_fp16 = tvm.const(2 ** 12, dtype=dtype)
        res_mul1 = te.lang.cce.vmuls(vmin, max_support_fp16)
        sign = te.lang.cce.vmuls(res_mul1, max_support_fp16)
    else:
        sign = vmin
    return sign


@fusion_manager.register("fake_learned_scale_quant_perchannel_grad_d")
 def fake_learned_scale_quant_perchannel_grad_d_compute(dout, input_data, alpha_data, quant_max_data, neg_trunc,
                                                       kernel_name="fake_learned_scale_quant_perchannel_grad_d"):
@@ -86,7 +116,6 @@ def fake_learned_scale_quant_perchannel_grad_d_compute(dout, input_data, alpha_d
    tensor_one = tvm.const(1.0, input_div_alpha.dtype)
    tensor_one = te.lang.cce.broadcast(tensor_one, shape)

    #out_of_bounds = te.lang.cce.vcmpsel(te.lang.cce.vabs(input_div_alpha), 1.0, 'gt', 1.0, 0.0)
    out_of_upper_bounds = te.lang.cce.vcmpsel(input_div_alpha, 1.0, 'gt', 1.0, 0.0)
    if neg_trunc:
        out_of_lower_bounds = te.lang.cce.vcmpsel(input_div_alpha, 0.0, 'lt', 1.0, 0.0)
@@ -96,32 +125,7 @@ def fake_learned_scale_quant_perchannel_grad_d_compute(dout, input_data, alpha_d

    dx = te.lang.cce.vmul(dx, te.lang.cce.vsub(tensor_one, out_of_bounds))

    # sign function imp
    if dtype == "float32":
        data_min = tvm.const(SCALAR_MIN_FP32, dtype=dtype)
        neg_data_min = tvm.const(NEG_SCALAR_MIN_FP32, dtype=dtype)
    elif dtype == "float16":
        data_min = tvm.const(SCALAR_MIN_FP16, dtype=dtype)
        neg_data_min = tvm.const(NEG_SCALAR_MIN_FP16, dtype=dtype)
    else:
        data_min = tvm.const(1, dtype=dtype)
        neg_data_min = tvm.const(-1, dtype=dtype)
    vmax = te.lang.cce.vmaxs(input_div_alpha, neg_data_min)
    vmin = te.lang.cce.vmins(vmax, data_min)
    if dtype == "float32":
        # max num of float32 is 2**126
        max_support_fp32 = tvm.const(2 ** 62, dtype=dtype)
        res_mul1 = te.lang.cce.vmuls(vmin, max_support_fp32)
        res_mul2 = te.lang.cce.vmuls(res_mul1, max_support_fp32)
        sign = te.lang.cce.vmuls(res_mul2, tvm.const(2 ** 2, dtype=dtype))
    elif dtype == "float16":
        # max num of float16 is 2**24
        # but cce can only support 2**12, so use 12/12 to adaptor 24
        max_support_fp16 = tvm.const(2 ** 12, dtype=dtype)
        res_mul1 = te.lang.cce.vmuls(vmin, max_support_fp16)
        sign = te.lang.cce.vmuls(res_mul1, max_support_fp16)
    else:
        sign = vmin
    sign = _sign_function(dtype, input_div_alpha)

    # The following lines are equivalent to :
    #     dalpha_each = dout * sign                             if out of bounds
@@ -136,10 +140,9 @@ def fake_learned_scale_quant_perchannel_grad_d_compute(dout, input_data, alpha_d
    return [dx, dalpha_each]


@util.check_input_type(dict, dict, dict, dict, dict, dict, bool, int, str)
 def fake_learned_scale_quant_perchannel_grad_d(dout, input_x, alpha, quant_max, dx, dalpha, neg_trunc,
                                               channel_axis, kernel_name="fake_learned_scale_quant_perchannel_grad_d"):
    """FakeLearnedScaleQuantPerChannelGradD"""
 def fake_learned_scale_quant_perchannel_grad_d_param(input_x, alpha, quant_max, channel_axis,
                                                     kernel_name="fake_learned_scale_quant_perchannel_grad_d"):
    """Get and check FakeLearnedScaleQuantPerChannelGradD parameters"""
    input_shape = input_x.get("shape")
    input_x_shape_ = input_x.get("ori_shape")
    input_x_format = input_x.get("format")
@@ -179,6 +182,16 @@ def fake_learned_scale_quant_perchannel_grad_d(dout, input_x, alpha, quant_max,
    input_data = tvm.placeholder(input_shape, name="x", dtype=input_dtype)
    alpha_data = tvm.placeholder(shape_c, name="alpha_data", dtype=alpha_dtype)
    quant_max_data = tvm.placeholder(quant_max_shape, name="quant_max_data", dtype=quant_max_dtype)
    return dout_data, input_data, alpha_data, quant_max_data


@util.check_input_type(dict, dict, dict, dict, dict, dict, bool, int, str)
 def fake_learned_scale_quant_perchannel_grad_d(dout, input_x, alpha, quant_max, dx, dalpha, neg_trunc,
                                               channel_axis, kernel_name="fake_learned_scale_quant_perchannel_grad_d"):
    """FakeLearnedScaleQuantPerChannelGradD"""
    dout_data, input_data, alpha_data, quant_max_data = \
        fake_learned_scale_quant_perchannel_grad_d_param(input_x, alpha, quant_max, channel_axis, kernel_name)

    res = fake_learned_scale_quant_perchannel_grad_d_compute(dout_data, input_data, alpha_data, quant_max_data,
                                                             neg_trunc, kernel_name)

--- a/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py
+++ b/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py
@@ -71,10 +71,9 @@ def fake_learned_scale_quant_perlayer_compute(input_data, alpha_data, quant_max_
    return res


@util.check_input_type(dict, dict, dict, dict, bool, str)
 def fake_learned_scale_quant_perlayer(input_x, alpha, quant_max, out, neg_trunc,
                                      kernel_name="fake_learned_scale_quant_perlayer"):
    """FakeLearnedScaleQuantPerLayer"""
 def fake_learned_scale_quant_perlayer_param(input_x, alpha, quant_max,
                                            kernel_name="fake_learned_scale_quant_perlayer"):
    """Get and check FakeLearnedScaleQuantPerLayer parameters"""
    input_shape = input_x.get("shape")
    input_dtype = input_x.get("dtype")
    alpha_shape = alpha.get("ori_shape")
@@ -105,6 +104,16 @@ def fake_learned_scale_quant_perlayer(input_x, alpha, quant_max, out, neg_trunc,
    input_data = tvm.placeholder(input_shape, name="x", dtype=input_dtype)
    alpha_data = tvm.placeholder(alpha_shape, name="alpha_data", dtype=alpha_dtype)
    quant_max_data = tvm.placeholder(quant_max_shape, name="quant_max_data", dtype=quant_max_dtype)
    return input_data, alpha_data, quant_max_data


@util.check_input_type(dict, dict, dict, dict, bool, str)
 def fake_learned_scale_quant_perlayer(input_x, alpha, quant_max, out, neg_trunc,
                                      kernel_name="fake_learned_scale_quant_perlayer"):
    """FakeLearnedScaleQuantPerLayer"""
    input_data, alpha_data, quant_max_data = \
        fake_learned_scale_quant_perlayer_param(input_x, alpha, quant_max, kernel_name)

    res = fake_learned_scale_quant_perlayer_compute(input_data, alpha_data, quant_max_data, neg_trunc, kernel_name)

    with tvm.target.cce():
--- a/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py
+++ b/mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py
@@ -59,6 +59,36 @@ def _fake_learned_scale_quant_perlayer_grad_d_tbe():
    return


 def _sign_function(dtype, input_div_alpha):
    """sign function imp"""
    if dtype == "float32":
        data_min = tvm.const(SCALAR_MIN_FP32, dtype=dtype)
        neg_data_min = tvm.const(NEG_SCALAR_MIN_FP32, dtype=dtype)
    elif dtype == "float16":
        data_min = tvm.const(SCALAR_MIN_FP16, dtype=dtype)
        neg_data_min = tvm.const(NEG_SCALAR_MIN_FP16, dtype=dtype)
    else:
        data_min = tvm.const(1, dtype=dtype)
        neg_data_min = tvm.const(-1, dtype=dtype)
    vmax = te.lang.cce.vmaxs(input_div_alpha, neg_data_min)
    vmin = te.lang.cce.vmins(vmax, data_min)
    if dtype == "float32":
        # max num of float32 is 2**126
        max_support_fp32 = tvm.const(2 ** 62, dtype=dtype)
        res_mul1 = te.lang.cce.vmuls(vmin, max_support_fp32)
        res_mul2 = te.lang.cce.vmuls(res_mul1, max_support_fp32)
        sign = te.lang.cce.vmuls(res_mul2, tvm.const(2 ** 2, dtype=dtype))
    elif dtype == "float16":
        # max num of float16 is 2**24
        # but cce can only support 2**12, so use 12/12 to adaptor 24
        max_support_fp16 = tvm.const(2 ** 12, dtype=dtype)
        res_mul1 = te.lang.cce.vmuls(vmin, max_support_fp16)
        sign = te.lang.cce.vmuls(res_mul1, max_support_fp16)
    else:
        sign = vmin
    return sign


@fusion_manager.register("fake_learned_scale_quant_perlayer_grad_d")
 def fake_learned_scale_quant_perlayer_grad_d_compute(dout, input_data, alpha_data, quant_max_data, neg_trunc,
                                                     kernel_name="fake_learned_scale_quant_perlayer_grad_d"):
@@ -87,7 +117,6 @@ def fake_learned_scale_quant_perlayer_grad_d_compute(dout, input_data, alpha_dat
    tensor_one = tvm.const(1.0, input_div_alpha.dtype)
    tensor_one = te.lang.cce.broadcast(tensor_one, shape)

    #out_of_bounds = te.lang.cce.vcmpsel(te.lang.cce.vabs(input_div_alpha), 1.0, 'gt', 1.0, 0.0)
    out_of_upper_bounds = te.lang.cce.vcmpsel(input_div_alpha, 1.0, 'gt', 1.0, 0.0)
    if neg_trunc:
        out_of_lower_bounds = te.lang.cce.vcmpsel(input_div_alpha, 0.0, 'lt', 1.0, 0.0)
@@ -97,32 +126,7 @@ def fake_learned_scale_quant_perlayer_grad_d_compute(dout, input_data, alpha_dat

    dx = te.lang.cce.vmul(dx, te.lang.cce.vsub(tensor_one, out_of_bounds))

    # sign function imp
    if dtype == "float32":
        data_min = tvm.const(SCALAR_MIN_FP32, dtype=dtype)
        neg_data_min = tvm.const(NEG_SCALAR_MIN_FP32, dtype=dtype)
    elif dtype == "float16":
        data_min = tvm.const(SCALAR_MIN_FP16, dtype=dtype)
        neg_data_min = tvm.const(NEG_SCALAR_MIN_FP16, dtype=dtype)
    else:
        data_min = tvm.const(1, dtype=dtype)
        neg_data_min = tvm.const(-1, dtype=dtype)
    vmax = te.lang.cce.vmaxs(input_div_alpha, neg_data_min)
    vmin = te.lang.cce.vmins(vmax, data_min)
    if dtype == "float32":
        # max num of float32 is 2**126
        max_support_fp32 = tvm.const(2 ** 62, dtype=dtype)
        res_mul1 = te.lang.cce.vmuls(vmin, max_support_fp32)
        res_mul2 = te.lang.cce.vmuls(res_mul1, max_support_fp32)
        sign = te.lang.cce.vmuls(res_mul2, tvm.const(2 ** 2, dtype=dtype))
    elif dtype == "float16":
        # max num of float16 is 2**24
        # but cce can only support 2**12, so use 12/12 to adaptor 24
        max_support_fp16 = tvm.const(2 ** 12, dtype=dtype)
        res_mul1 = te.lang.cce.vmuls(vmin, max_support_fp16)
        sign = te.lang.cce.vmuls(res_mul1, max_support_fp16)
    else:
        sign = vmin
    sign = _sign_function(dtype, input_div_alpha)

    # The following lines are equivalent to :
    #     dalpha_each = dout * sign                             if out of bounds
@@ -137,10 +141,9 @@ def fake_learned_scale_quant_perlayer_grad_d_compute(dout, input_data, alpha_dat
    return [dx, dalpha_each]


@util.check_input_type(dict, dict, dict, dict, dict, dict, bool, str)
 def fake_learned_scale_quant_perlayer_grad_d(dout, input_x, alpha, quant_max, dx, dalpha, neg_trunc,
                                             kernel_name="fake_learned_scale_quant_perlayer_grad_d"):
    """FakeLearnedScaleQuantPerLayerGradD"""
 def fake_learned_scale_quant_perlayer_grad_d_param(input_x, alpha, quant_max,
                                                   kernel_name="fake_learned_scale_quant_perlayer_grad_d"):
    """Get and check FakeLearnedScaleQuantPerLayerGradD parameters"""
    input_shape = input_x.get("shape")
    input_dtype = input_x.get("dtype")
    alpha_shape = alpha.get("ori_shape")
@@ -172,6 +175,15 @@ def fake_learned_scale_quant_perlayer_grad_d(dout, input_x, alpha, quant_max, dx
    input_data = tvm.placeholder(input_shape, name="x", dtype=input_dtype)
    alpha_data = tvm.placeholder(alpha_shape, name="alpha_data", dtype=alpha_dtype)
    quant_max_data = tvm.placeholder(quant_max_shape, name="quant_max_data", dtype=quant_max_dtype)
    return dout_data, input_data, alpha_data, quant_max_data

@util.check_input_type(dict, dict, dict, dict, dict, dict, bool, str)
 def fake_learned_scale_quant_perlayer_grad_d(dout, input_x, alpha, quant_max, dx, dalpha, neg_trunc,
                                             kernel_name="fake_learned_scale_quant_perlayer_grad_d"):
    """FakeLearnedScaleQuantPerLayerGradD"""
    dout_data, input_data, alpha_data, quant_max_data = \
        fake_learned_scale_quant_perlayer_grad_d_param(input_x, alpha, quant_max, kernel_name)

    res = fake_learned_scale_quant_perlayer_grad_d_compute(dout_data, input_data, alpha_data, quant_max_data,
                                                           neg_trunc, kernel_name)

--- a/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py
+++ b/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py
@@ -86,11 +86,9 @@ def fake_quant_perchannel_compute(x, min_val, max_val, y, quant_min, quant_max,
    return res


@util.check_input_type(dict, dict, dict, dict, bool, bool, int, int, str)
 def fake_quant_perchannel(x, min_val, max_val, y,
                          symmetric, narrow_range, num_bits, channel_axis,
                          kernel_name="fake_quant_perchannel"):
    """FakeQuantPerChannel"""
 def fake_quant_perchannel_param(x, min_val, max_val, channel_axis,
                                kernel_name="fake_quant_perchannel"):
    """Get and check fake_quant_perchannel parameters"""
    x_shape = x.get("shape")
    x_shape_ = x.get("ori_shape")
    x_format = x.get("format")
@@ -120,15 +118,25 @@ def fake_quant_perchannel(x, min_val, max_val, y,
    util.check_dtype_rule(min_dtype, check_list)
    util.check_dtype_rule(max_dtype, check_list)

    shape_c = [1] * len(x_shape)
    shape_c[channel_axis_] = min_val.get("ori_shape")[0]
    if x_format == "NC1HWC0" and channel_axis_ == 1:
        shape_c = min_val.get("shape")
    return x_shape, shape_c, x_dtype


@util.check_input_type(dict, dict, dict, dict, bool, bool, int, int, str)
 def fake_quant_perchannel(x, min_val, max_val, y,
                          symmetric, narrow_range, num_bits, channel_axis,
                          kernel_name="fake_quant_perchannel"):
    """FakeQuantPerChannel"""
    quant_min = 0
    quant_max = 2 ** num_bits - 1
    if narrow_range:
        quant_min = quant_min + 1

    shape_c = [1] * len(x_shape)
    shape_c[channel_axis_] = min_val.get("ori_shape")[0]
    if x_format == "NC1HWC0" and channel_axis_ == 1:
        shape_c = min_val.get("shape")
    x_shape, shape_c, x_dtype = fake_quant_perchannel_param(x, min_val, max_val,
                                                            channel_axis, kernel_name)
    input_data = tvm.placeholder(x_shape, name="x", dtype=x_dtype)
    min_data = tvm.placeholder(shape_c, name="min_val", dtype=x_dtype)
    max_data = tvm.placeholder(shape_c, name="max_val", dtype=x_dtype)
--- a/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py
+++ b/mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py
@@ -110,11 +110,9 @@ def fake_quant_perchannel_grad_compute(dout, x, min_val, max_val, quant_min, qua
    return res


@util.check_input_type(dict, dict, dict, dict, dict, bool, bool, int, int, str)
 def fake_quant_perchannel_grad(dout, x, min_val, max_val, dx,
                               symmetric, narrow_range, num_bits, channel_axis,
                               kernel_name="fake_quant_perchannel_grad"):
    """FakeQuantPerChannelGrad"""
 def fake_quant_perchannel_grad_param(x, min_val, max_val, channel_axis,
                                     kernel_name="fake_quant_perchannel_grad"):
    """Get and check FakeQuantPerChannelGrad parameters"""
    x_shape = x.get("shape")
    x_shape_ = x.get("ori_shape")
    x_format = x.get("format")
@@ -144,6 +142,18 @@ def fake_quant_perchannel_grad(dout, x, min_val, max_val, dx,
    util.check_dtype_rule(min_dtype, check_list)
    util.check_dtype_rule(max_dtype, check_list)

    shape_c = [1] * len(x_shape)
    shape_c[channel_axis_] = min_val.get("ori_shape")[0]
    if x_format == "NC1HWC0" and channel_axis_ == 1:
        shape_c = min_val.get("shape")
    return x_shape, shape_c, x_dtype


@util.check_input_type(dict, dict, dict, dict, dict, bool, bool, int, int, str)
 def fake_quant_perchannel_grad(dout, x, min_val, max_val, dx,
                               symmetric, narrow_range, num_bits, channel_axis,
                               kernel_name="fake_quant_perchannel_grad"):
    """FakeQuantPerChannelGrad"""
    if symmetric:
        quant_min = 0 - 2 ** (num_bits - 1)
        quant_max = 2 ** (num_bits - 1) - 1
@@ -153,10 +163,8 @@ def fake_quant_perchannel_grad(dout, x, min_val, max_val, dx,
    if narrow_range:
        quant_min = quant_min + 1

    shape_c = [1] * len(x_shape)
    shape_c[channel_axis_] = min_val.get("ori_shape")[0]
    if x_format == "NC1HWC0" and channel_axis_ == 1:
        shape_c = min_val.get("shape")
    x_shape, shape_c, x_dtype = fake_quant_perchannel_grad_param(x, min_val, max_val,
                                                                 channel_axis, kernel_name)
    dout_data = tvm.placeholder(x_shape, name="dout", dtype=x_dtype)
    input_data = tvm.placeholder(x_shape, name="x", dtype=x_dtype)
    min_data = tvm.placeholder(shape_c, name="min_val", dtype=x_dtype)
--- a/mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py
+++ b/mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py
@@ -82,7 +82,6 @@ def minmax_update_perchannel(x, min_val, max_val, min_up, max_up,
                             kernel_name="minmax_update_perchannel"):
    """MinMaxUpdatePerChannel op"""
    x_shape = x.get("ori_shape")
    x_format = x.get("format")
    x_dtype = x.get("dtype")
    min_shape = min_val.get("ori_shape")
    min_dtype = min_val.get("dtype")
--- a/mindspore/ops/bprop_mindir/Identity_bprop.mindir
+++ b/mindspore/ops/bprop_mindir/Identity_bprop.mindir
@@ -6,4 +6,4 @@
 bprop.10:x*
 bprop.10:out*
 
bprop.10:dout2
 bprop.10:[CNode]12:2:€027af68f320ba40d9fbd0893da424c07f9c3a4ec82e98f9543bff9b5a15547a2102a58399653345b09bd6f5b337c4b81c4f8900664c0abc09fb80f38f8e95be82366f7bd59ea5ec135e982de03b4f7cab6b61d833d046a6e13f78bdaf2fb2b224c332efad4a51b4773cb78093dd53a4ca850b2dc6cdd5f2ae47106b3fda77bb365c0e00bc893ef15ec6199798d6c8c46997153587d375b3240c1195ff2c7278c7e635a08323207b4cb3f73fd8437b4d7ee28a7676a68f005a7749bd19e5ed4eca0593a639478ea8dfad17fdbe39f66855cc459eb58bcaf5eac44185e03b16374a6c407ad6a3b57190d3702d6a45031d13b97bb6952735edf94fb36f73dbff6cdab258748286fc6d783abacce203dfc79d2fc31e23a427ce1f86e08777a687f71c0606bdbf14ec1b2b2d86ab82b5eb2ac71f1d3d0ba743f7cee45a1d9a0a2d82ac414b8c313aac4f85c6217fbbb7009dd079b2d5548f8b695a470a11cb8cc83e6f5e78f5b3c67f2e7bf339b250c3638aee952e1a073002e2834011401f3827260
 bprop.10:[CNode]12:2:€027af68f320ba40d9fbd0893da424c07f9c3a4ec82e98f9543bff9b5a15547a2102a58399653345b09bd6f5b337c4b81c4f8900664c0abc09fb80f38f8e95be82366f7bd59ea5ec135e982de03b4f7cab6b61d833d046a6e13f78bdaf2fb2b224c332efad4a51b4773cb78093dd53a4ca850b2dc6cdd5f2ae47106b3fda77bb3565f906930f68ca2413e9ad958d105e129e717cd183b95d11d65a8b0b030fc0d65c0e00bc893ef15ec6199798d6c8c46997153587d375b3240c1195ff2c7278c7e635a08323207b4cb3f73fd8437b4d7ee28a7676a68f005a7749bd19e5ed4eca0593a639478ea8dfad17fdbe39f66855cc459eb58bcaf5eac44185e03b16374a6c407ad6a3b57190d3702d6a45031d13b97bb6952735edf94fb36f73dbff6cdab258748286fc6d783abacce203dfc79d2fc31e23a427ce1f86e08777a687f71c414b8c313aac4f85c6217fbbb7009dd079b2d5548f8b695a470a11cb8cc83e6f5e78f5b3c67f2e7bf339b250c3638aee952e1a073002e2834011401f3827260
--- a/mindspore/ops/bprop_mindir/ReLU_bprop.mindir
+++ b/mindspore/ops/bprop_mindir/ReLU_bprop.mindir
@@ -8,4 +8,4 @@
 	bprop.2:x*
 bprop.2:out*
 bprop.2:dout2
 bprop.2:[CNode]4:3:€027af68f320ba40d9fbd0893da424c07f9c3a4ec82e98f9543bff9b5a15547a2102a58399653345b09bd6f5b337c4b81c4f8900664c0abc09fb80f38f8e95be82366f7bd59ea5ec135e982de03b4f7cab6b61d833d046a6e13f78bdaf2fb2b224c332efad4a51b4773cb78093dd53a4ca850b2dc6cdd5f2ae47106b3fda77bb365c0e00bc893ef15ec6199798d6c8c46997153587d375b3240c1195ff2c7278c7e635a08323207b4cb3f73fd8437b4d7ee28a7676a68f005a7749bd19e5ed4eca0593a639478ea8dfad17fdbe39f66855cc459eb58bcaf5eac44185e03b16374a6c407ad6a3b57190d3702d6a45031d13b97bb6952735edf94fb36f73dbff6cdab258748286fc6d783abacce203dfc79d2fc31e23a427ce1f86e08777a687f71c0606bdbf14ec1b2b2d86ab82b5eb2ac71f1d3d0ba743f7cee45a1d9a0a2d82ac414b8c313aac4f85c6217fbbb7009dd079b2d5548f8b695a470a11cb8cc83e6f5e78f5b3c67f2e7bf339b250c3638aee952e1a073002e2834011401f3827260
 bprop.2:[CNode]4:3:€027af68f320ba40d9fbd0893da424c07f9c3a4ec82e98f9543bff9b5a15547a2102a58399653345b09bd6f5b337c4b81c4f8900664c0abc09fb80f38f8e95be82366f7bd59ea5ec135e982de03b4f7cab6b61d833d046a6e13f78bdaf2fb2b224c332efad4a51b4773cb78093dd53a4ca850b2dc6cdd5f2ae47106b3fda77bb3565f906930f68ca2413e9ad958d105e129e717cd183b95d11d65a8b0b030fc0d65c0e00bc893ef15ec6199798d6c8c46997153587d375b3240c1195ff2c7278c7e635a08323207b4cb3f73fd8437b4d7ee28a7676a68f005a7749bd19e5ed4eca0593a639478ea8dfad17fdbe39f66855cc459eb58bcaf5eac44185e03b16374a6c407ad6a3b57190d3702d6a45031d13b97bb6952735edf94fb36f73dbff6cdab258748286fc6d783abacce203dfc79d2fc31e23a427ce1f86e08777a687f71c414b8c313aac4f85c6217fbbb7009dd079b2d5548f8b695a470a11cb8cc83e6f5e78f5b3c67f2e7bf339b250c3638aee952e1a073002e2834011401f3827260