add QuantConfig & modify quant cells

5 years ago · a84affffd7
--- a/mindspore/nn/layer/quant.py
+++ b/mindspore/nn/layer/quant.py
@@ -15,6 +15,7 @@
 """Quantization aware training."""

 from functools import partial
 from collections import namedtuple
 import numpy as np
 from mindspore import nn
 import mindspore.common.dtype as mstype
@@ -34,7 +35,7 @@ from ...ops.operations import _quant_ops as Q
 __all__ = [
    'Conv2dBnAct',
    'DenseBnAct',
    'FakeQuantWithMinMax',
    'FakeQuantWithMinMaxObserver',
    'Conv2dBnFoldQuant',
    'Conv2dBnWithoutFoldQuant',
    'Conv2dQuant',
@@ -422,14 +423,14 @@ class FakeQuantWithMinMaxObserver(UniformQuantObserver):
                 symmetric=False,
                 narrow_range=False,
                 quant_delay=0):
        """Initialize FakeQuantWithMinMax layer"""
        """Initialize FakeQuantWithMinMaxObserver"""
        super(FakeQuantWithMinMaxObserver, self).__init__(quant_dtype=quant_dtype, per_channel=per_channel,
                                                          symmetric=symmetric, narrow_range=narrow_range,
                                                          num_channels=num_channels)
        Validator.check_type("min_init", min_init, [int, float])
        Validator.check_type("max_init", max_init, [int, float])
        Validator.check("min_init", min_init, "max_init", max_init, rel=Rel.LT)
        Validator.check_integer('quant_delay', quant_delay, 0, Rel.GE)
        Validator.check_non_negative_int(quant_delay, 'quant_delay')
        self.min_init = min_init
        self.max_init = max_init
        self.quant_dtype = quant_dtype
@@ -498,119 +499,9 @@ class FakeQuantWithMinMaxObserver(UniformQuantObserver):
        return out


 class FakeQuantWithMinMax(Cell):
    r"""
    Quantization aware op. This OP provides the fake quantization observer function on data with min and max.

    Args:
        min_init (int, float): The initialized min value. Default: -6.
        max_init (int, float): The initialized max value. Default: 6.
        ema (bool): The exponential Moving Average algorithm updates min and max. Default: False.
        ema_decay (float): Exponential Moving Average algorithm parameter. Default: 0.999.
        per_channel (bool):  Quantization granularity based on layer or on channel. Default: False.
        channel_axis (int): Quantization by channel axis. Default: 1.
        num_channels (int): declarate the min and max channel size, Default: 1.
        num_bits (int): The bit number of quantization, supporting 4 and 8bits. Default: 8.
        symmetric (bool): Whether the quantization algorithm is symmetric or not. Default: False.
        narrow_range (bool): Whether the quantization algorithm uses narrow range or not. Default: False.
        quant_delay (int): Quantization delay parameters according to the global step. Default: 0.

    Inputs:
        - **x** (Tensor) - The input of FakeQuantWithMinMax.

    Outputs:
        Tensor, with the same type and shape as the `x`.

    Examples:
        >>> fake_quant = FakeQuantWithMinMax()
        >>> input_x = Tensor(np.array([[1, 2, 1], [-2, 0, -1]]), mindspore.float32)
        >>> result = fake_quant(input_x)
    """

    def __init__(self,
                 min_init=-6,
                 max_init=6,
                 ema=False,
                 ema_decay=0.999,
                 per_channel=False,
                 channel_axis=1,
                 num_channels=1,
                 num_bits=8,
                 symmetric=False,
                 narrow_range=False,
                 quant_delay=0):
        """Initialize FakeQuantWithMinMax layer"""
        super(FakeQuantWithMinMax, self).__init__()
        Validator.check_type("min_init", min_init, [int, float])
        Validator.check_type("max_init", max_init, [int, float])
        Validator.check("min_init", min_init, "max_init", max_init, rel=Rel.LT)
        Validator.check_non_negative_int(quant_delay, 'quant_delay')
        self.min_init = min_init
        self.max_init = max_init
        self.num_bits = num_bits
        self.ema = ema
        self.ema_decay = ema_decay
        self.per_channel = per_channel
        self.num_channels = num_channels
        self.channel_axis = channel_axis
        self.quant_delay = quant_delay
        self.symmetric = symmetric
        self.narrow_range = narrow_range
        self.is_ascend = context.get_context('device_target') == "Ascend"

        # init tensor min and max for fake quant op
        if self.per_channel:
            min_array = np.array([self.min_init] * self.num_channels).astype(np.float32)
            max_array = np.array([self.max_init] * self.num_channels).astype(np.float32)
        else:
            min_array = np.array([self.min_init]).astype(np.float32)
            max_array = np.array([self.max_init]).astype(np.float32)
        self.minq = Parameter(Tensor(min_array), name='quant_min', requires_grad=False)
        self.maxq = Parameter(Tensor(max_array), name='quant_max', requires_grad=False)

        # init fake quant relative op
        if self.per_channel:
            quant_fun = partial(Q.FakeQuantPerChannel, channel_axis=self.channel_axis)
            ema_fun = partial(Q.MinMaxUpdatePerChannel, channel_axis=self.channel_axis)
        else:
            quant_fun = Q.FakeQuantPerLayer
            ema_fun = Q.MinMaxUpdatePerLayer

        self.ema_update = ema_fun(ema=self.ema, ema_decay=self.ema_decay)
        if self.is_ascend:
            self.fake_quant_train = quant_fun(num_bits=self.num_bits,
                                              symmetric=self.symmetric,
                                              narrow_range=self.narrow_range,
                                              quant_delay=self.quant_delay)
            self.fake_quant_infer = self.fake_quant_train
        else:
            quant_fun = partial(quant_fun,
                                ema=self.ema,
                                ema_decay=ema_decay,
                                num_bits=self.num_bits,
                                symmetric=self.symmetric,
                                narrow_range=self.narrow_range,
                                quant_delay=self.quant_delay)
            self.fake_quant_train = quant_fun(training=True)
            self.fake_quant_infer = quant_fun(training=False)

    def extend_repr(self):
        s = 'num_bits={}, symmetric={}, narrow_range={}, ema={}({}), per_channel={}({}, {}), ' \
            'quant_delay={}, min_init={}, max_init={}'.format(self.num_bits, self.symmetric, self.narrow_range,
                                                              self.ema, self.ema_decay, self.per_channel,
                                                              self.channel_axis, self.num_channels, self.quant_delay,
                                                              self.min_init, self.max_init)
        return s
 QuantConfig = namedtuple("QuantConfig", ['weight', 'activation'])

    def construct(self, x):
        if self.training:
            min_up, max_up = self.ema_update(x, self.minq, self.maxq)
            P.Assign()(self.minq, min_up)
            P.Assign()(self.maxq, max_up)
            out = self.fake_quant_train(x, self.minq, self.maxq)
        else:
            out = self.fake_quant_infer(x, self.minq, self.maxq)
        return out
 quant_config_default = QuantConfig(weight=FakeQuantWithMinMaxObserver, activation=FakeQuantWithMinMaxObserver)


 class Conv2dBnFoldQuant(Cell):
@@ -641,12 +532,9 @@ class Conv2dBnFoldQuant(Cell):
            mean vector. Default: 'zeros'.
        var_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the
            variance vector. Default: 'ones'.
        fake (bool): Whether Conv2dBnFoldQuant Cell adds FakeQuantWithMinMax op. Default: True.
        per_channel (bool): FakeQuantWithMinMax Parameters. Default: False.
        num_bits (int): The bit number of quantization, supporting 4 and 8bits. Default: 8.
        symmetric (bool): The quantization algorithm is symmetric or not. Default: False.
        narrow_range (bool): The quantization algorithm uses narrow range or not. Default: False.
        quant_delay (int): The Quantization delay parameters according to the global step. Default: 0.
        fake (bool): Whether Conv2dBnFoldQuant Cell adds FakeQuantWithMinMaxObserver. Default: True.
        quant_config (QuantConfig): Configs the oberser type of weight and activation. Default: quant_config_default.
        quant_dtype (QuantDtype): Specifies the FakeQuant datatype. Default: QuantDtype.INT8.
        freeze_bn (int): The quantization freeze BatchNormal op is according to the global step. Default: 100000.

    Inputs:
@@ -680,11 +568,8 @@ class Conv2dBnFoldQuant(Cell):
                 mean_init='zeros',
                 var_init='ones',
                 fake=True,
                 per_channel=False,
                 num_bits=8,
                 symmetric=False,
                 narrow_range=False,
                 quant_delay=0,
                 quant_config=quant_config_default,
                 quant_dtype=QuantDtype.INT8,
                 freeze_bn=100000):
        """Initialize Conv2dBnFoldQuant layer"""
        super(Conv2dBnFoldQuant, self).__init__()
@@ -699,13 +584,10 @@ class Conv2dBnFoldQuant(Cell):
        self.eps = eps
        self.momentum = momentum
        self.has_bias = has_bias
        self.quant_delay = quant_delay
        self.freeze_bn = freeze_bn
        self.fake = fake
        self.num_bits = num_bits
        self.per_channel = per_channel
        self.symmetric = symmetric
        self.narrow_range = narrow_range
        self.quant_config = quant_config
        self.quant_dtype = quant_dtype
        self.is_gpu = context.get_context('device_target') == "GPU"

        # initialize convolution op and Parameter
@@ -745,16 +627,12 @@ class Conv2dBnFoldQuant(Cell):
                                         requires_grad=False)

        # initialize fake ops
        self.fake_quant_weight = FakeQuantWithMinMax(min_init=-6,
        self.fake_quant_weight = quant_config.weight(min_init=-6,
                                                     max_init=6,
                                                     ema=False,
                                                     per_channel=per_channel,
                                                     channel_axis=channel_axis,
                                                     num_channels=out_channels,
                                                     num_bits=num_bits,
                                                     symmetric=symmetric,
                                                     narrow_range=narrow_range,
                                                     quant_delay=quant_delay)
                                                     quant_dtype=quant_dtype)
        self.batchnorm_fold = BatchNormFoldCell(epsilon=eps, momentum=momentum, freeze_bn=freeze_bn)
        self.correct_mul = Q.CorrectionMul(channel_axis)
        if context.get_context('device_target') == "Ascend":
@@ -777,7 +655,7 @@ class Conv2dBnFoldQuant(Cell):
                                                                        self.pad_mode, self.padding, self.dilation,
                                                                        self.group,
                                                                        self.fake, self.freeze_bn, self.momentum,
                                                                        self.quant_delay)
                                                                        self.fake_quant_weight.quant_delay)
        return s

    def construct(self, x):
@@ -836,11 +714,8 @@ class Conv2dBnWithoutFoldQuant(Cell):
        weight_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the convolution kernel.
            Default: 'normal'.
        bias_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the bias vector. Default: 'zeros'.
        per_channel (bool): FakeQuantWithMinMax Parameters. Default: False.
        num_bits (int): The bit number of quantization, supporting 4 and 8bits. Default: 8.
        symmetric (bool): The quantization algorithm is symmetric or not. Default: False.
        narrow_range (bool): The quantization algorithm uses narrow range or not. Default: False.
        quant_delay (int): Quantization delay parameters according to the global step. Default: 0.
        quant_config (QuantConfig): Configs the oberser type of weight and activation. Default: quant_config_default.
        quant_dtype (QuantDtype): Specifies the FakeQuant datatype. Default: QuantDtype.INT8.

    Inputs:
        - **x** (Tensor) - Tensor of shape :math:`(N, C_{in}, H_{in}, W_{in})`.
@@ -868,11 +743,8 @@ class Conv2dBnWithoutFoldQuant(Cell):
                 momentum=0.997,
                 weight_init='normal',
                 bias_init='zeros',
                 per_channel=False,
                 num_bits=8,
                 symmetric=False,
                 narrow_range=False,
                 quant_delay=0):
                 quant_config=quant_config_default,
                 quant_dtype=QuantDtype.INT8):
        super(Conv2dBnWithoutFoldQuant, self).__init__()
        if isinstance(kernel_size, int):
            self.kernel_size = (kernel_size, kernel_size)
@@ -886,7 +758,6 @@ class Conv2dBnWithoutFoldQuant(Cell):
        self.pad_mode = pad_mode
        self.padding = padding
        self.group = group
        self.quant_delay = quant_delay

        self.bias_add = P.BiasAdd()
        if Validator.check_bool(has_bias):
@@ -917,16 +788,12 @@ class Conv2dBnWithoutFoldQuant(Cell):
            weight_shape = [out_channels, in_channels // group, *self.kernel_size]
            channel_axis = 0
        self.weight = Parameter(initializer(weight_init, weight_shape), name='weight')
        self.fake_quant_weight = FakeQuantWithMinMax(min_init=-6,
        self.fake_quant_weight = quant_config.weight(min_init=-6,
                                                     max_init=6,
                                                     ema=False,
                                                     per_channel=per_channel,
                                                     channel_axis=channel_axis,
                                                     num_channels=out_channels,
                                                     num_bits=num_bits,
                                                     symmetric=symmetric,
                                                     narrow_range=narrow_range,
                                                     quant_delay=quant_delay)
                                                     quant_dtype=quant_dtype)
        self.batchnorm = BatchNorm2d(out_channels, eps=eps, momentum=momentum)

    def construct(self, x):
@@ -942,7 +809,7 @@ class Conv2dBnWithoutFoldQuant(Cell):
            'pad_mode={}, padding={}, dilation={}, group={}, ' \
            'has_bias={}, quant_delay={}'.format(self.in_channels, self.out_channels, self.kernel_size, self.stride,
                                                 self.pad_mode, self.padding, self.dilation, self.group,
                                                 self.has_bias, self.quant_delay)
                                                 self.has_bias, self.fake_quant_weight.quant_delay)
        return s


@@ -966,11 +833,8 @@ class Conv2dQuant(Cell):
        weight_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the convolution kernel.
            Default: 'normal'.
        bias_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the bias vector. Default: 'zeros'.
        per_channel (bool): FakeQuantWithMinMax Parameters. Default: False.
        num_bits (int): The bit number of quantization, supporting 4 and 8bits. Default: 8.
        symmetric (bool): The quantization algorithm is symmetric or not. Default: False.
        narrow_range (bool): The quantization algorithm uses narrow range or not. Default: False.
        quant_delay (int): Quantization delay parameters according to the global step. Default: 0.
        quant_config (QuantConfig): Configs the oberser type of weight and activation. Default: quant_config_default.
        quant_dtype (QuantDtype): Specifies the FakeQuant datatype. Default: QuantDtype.INT8.

    Inputs:
        - **x** (Tensor) - Tensor of shape :math:`(N, C_{in}, H_{in}, W_{in})`.
@@ -996,11 +860,8 @@ class Conv2dQuant(Cell):
                 has_bias=False,
                 weight_init='normal',
                 bias_init='zeros',
                 per_channel=False,
                 num_bits=8,
                 symmetric=False,
                 narrow_range=False,
                 quant_delay=0):
                 quant_config=quant_config_default,
                 quant_dtype=QuantDtype.INT8):
        super(Conv2dQuant, self).__init__()
        if isinstance(kernel_size, int):
            self.kernel_size = (kernel_size, kernel_size)
@@ -1014,7 +875,6 @@ class Conv2dQuant(Cell):
        self.pad_mode = pad_mode
        self.padding = padding
        self.group = group
        self.quant_delay = quant_delay

        weight_shape = [out_channels, in_channels // group, *self.kernel_size]
        self.weight = Parameter(initializer(weight_init, weight_shape), name='weight')
@@ -1033,16 +893,12 @@ class Conv2dQuant(Cell):
                             stride=self.stride,
                             dilation=self.dilation,
                             group=self.group)
        self.fake_quant_weight = FakeQuantWithMinMax(min_init=-6,
        self.fake_quant_weight = quant_config.weight(min_init=-6,
                                                     max_init=6,
                                                     ema=False,
                                                     per_channel=per_channel,
                                                     channel_axis=0,
                                                     num_channels=out_channels,
                                                     num_bits=num_bits,
                                                     symmetric=symmetric,
                                                     narrow_range=narrow_range,
                                                     quant_delay=quant_delay)
                                                     quant_dtype=quant_dtype)

    def construct(self, x):
        weight = self.fake_quant_weight(self.weight)
@@ -1056,7 +912,7 @@ class Conv2dQuant(Cell):
            'pad_mode={}, padding={}, dilation={}, group={}, ' \
            'has_bias={}, quant_delay={}'.format(self.in_channels, self.out_channels, self.kernel_size, self.stride,
                                                 self.pad_mode, self.padding, self.dilation, self.group,
                                                 self.has_bias, self.quant_delay)
                                                 self.has_bias, self.fake_quant_weight.quant_delay)
        return s


@@ -1075,11 +931,8 @@ class DenseQuant(Cell):
            same as input x. The values of str refer to the function `initializer`. Default: 'zeros'.
        has_bias (bool): Specifies whether the layer uses a bias vector. Default: True.
        activation (str): The regularization function applied to the output of the layer, eg. 'relu'. Default: None.
        per_channel (bool): FakeQuantWithMinMax Parameters. Default: False.
        num_bits (int): The bit number of quantization, supporting 4 and 8bits. Default: 8.
        symmetric (bool): The quantization algorithm is symmetric or not. Default: False.
        narrow_range (bool): The quantization algorithm uses narrow range or not. Default: False.
        quant_delay (int): Quantization delay parameters according to the global step. Default: 0.
        quant_config (QuantConfig): Configs the oberser type of weight and activation. Default: quant_config_default.
        quant_dtype (QuantDtype): Specifies the FakeQuant datatype. Default: QuantDtype.INT8.

    Inputs:
        - **x** (Tensor) - Tensor of shape :math:`(N, C_{in}, H_{in}, W_{in})`.
@@ -1093,19 +946,15 @@ class DenseQuant(Cell):
        >>> result = dense_quant(input_x)
    """

    def __init__(
            self,
            in_channels,
            out_channels,
            weight_init='normal',
            bias_init='zeros',
            has_bias=True,
            activation=None,
            per_channel=False,
            num_bits=8,
            symmetric=False,
            narrow_range=False,
            quant_delay=0):
    def __init__(self,
                 in_channels,
                 out_channels,
                 weight_init='normal',
                 bias_init='zeros',
                 has_bias=True,
                 activation=None,
                 quant_config=quant_config_default,
                 quant_dtype=QuantDtype.INT8):
        super(DenseQuant, self).__init__()
        self.in_channels = Validator.check_positive_int(in_channels)
        self.out_channels = Validator.check_positive_int(out_channels)
@@ -1132,16 +981,12 @@ class DenseQuant(Cell):

        self.activation = get_activation(activation)
        self.activation_flag = self.activation is not None
        self.fake_quant_weight = FakeQuantWithMinMax(min_init=-6,
        self.fake_quant_weight = quant_config.weight(min_init=-6,
                                                     max_init=6,
                                                     ema=False,
                                                     per_channel=per_channel,
                                                     channel_axis=0,
                                                     num_channels=out_channels,
                                                     num_bits=num_bits,
                                                     symmetric=symmetric,
                                                     narrow_range=narrow_range,
                                                     quant_delay=quant_delay)
                                                     quant_dtype=quant_dtype)

    def construct(self, x):
        """Use operators to construct the Dense layer."""
@@ -1179,16 +1024,13 @@ class ActQuant(_QuantActivation):
    Quantization aware training activation function.

    Add the fake quant op to the end of activation op, by which the output of activation op will be truncated.
    Please check `FakeQuantWithMinMax` for more details.
    Please check `FakeQuantWithMinMaxObserver` or other observer for more details.

    Args:
        activation (Cell): Activation cell class.
        ema_decay (float): Exponential Moving Average algorithm parameter. Default: 0.999.
        per_channel (bool):  Quantization granularity based on layer or on channel. Default: False.
        num_bits (int): The bit number of quantization, supporting 4 and 8bits. Default: 8.
        symmetric (bool): The quantization algorithm is symmetric or not. Default: False.
        narrow_range (bool): The quantization algorithm uses narrow range or not. Default: False.
        quant_delay (int): Quantization delay parameters according to the global steps. Default: 0.
        quant_config (QuantConfig): Configs the oberser type of weight and activation. Default: quant_config_default.
        quant_dtype (QuantDtype): Specifies the FakeQuant datatype. Default: QuantDtype.INT8.

    Inputs:
        - **x** (Tensor) - The input of ReLU6Quant.
@@ -1205,21 +1047,14 @@ class ActQuant(_QuantActivation):
    def __init__(self,
                 activation,
                 ema_decay=0.999,
                 per_channel=False,
                 num_bits=8,
                 symmetric=False,
                 narrow_range=False,
                 quant_delay=0):
                 quant_config=quant_config_default,
                 quant_dtype=QuantDtype.INT8):
        super(ActQuant, self).__init__()
        self.fake_quant_act = FakeQuantWithMinMax(min_init=0,
                                                  max_init=6,
                                                  ema=True,
                                                  ema_decay=ema_decay,
                                                  per_channel=per_channel,
                                                  num_bits=num_bits,
                                                  symmetric=symmetric,
                                                  narrow_range=narrow_range,
                                                  quant_delay=quant_delay)
        self.fake_quant_act = quant_config.activation(min_init=-6,
                                                      max_init=6,
                                                      ema=False,
                                                      ema_decay=ema_decay,
                                                      quant_dtype=quant_dtype)
        self.act = activation

    def construct(self, x):
@@ -1240,11 +1075,8 @@ class LeakyReLUQuant(_QuantActivation):
    Args:
        activation (Cell): Activation cell class.
        ema_decay (float): Exponential Moving Average algorithm parameter. Default: 0.999.
        per_channel (bool):  Quantization granularity based on layer or on channel. Default: False.
        num_bits (int): The bit number of quantization, supporting 4 and 8bits. Default: 8.
        symmetric (bool): The quantization algorithm is symmetric or not. Default: False.
        narrow_range (bool): The quantization algorithm uses narrow range or not. Default: False.
        quant_delay (int): Quantization delay parameters according to the global step. Default: 0.
        quant_config (QuantConfig): Configs the oberser type of weight and activation. Default: quant_config_default.
        quant_dtype (QuantDtype): Specifies the FakeQuant datatype. Default: QuantDtype.INT8.

    Inputs:
        - **x** (Tensor) - The input of LeakyReLUQuant.
@@ -1261,30 +1093,19 @@ class LeakyReLUQuant(_QuantActivation):
    def __init__(self,
                 activation,
                 ema_decay=0.999,
                 per_channel=False,
                 num_bits=8,
                 symmetric=False,
                 narrow_range=False,
                 quant_delay=0):
                 quant_config=quant_config_default,
                 quant_dtype=QuantDtype.INT8):
        super(LeakyReLUQuant, self).__init__()
        self.fake_quant_act_before = FakeQuantWithMinMax(min_init=-6,
                                                         max_init=6,
                                                         ema=True,
                                                         ema_decay=ema_decay,
                                                         per_channel=per_channel,
                                                         num_bits=num_bits,
                                                         symmetric=symmetric,
                                                         narrow_range=narrow_range,
                                                         quant_delay=quant_delay)
        self.fake_quant_act_after = FakeQuantWithMinMax(min_init=-6,
                                                        max_init=6,
                                                        ema=True,
                                                        ema_decay=ema_decay,
                                                        per_channel=per_channel,
                                                        num_bits=num_bits,
                                                        symmetric=symmetric,
                                                        narrow_range=narrow_range,
                                                        quant_delay=quant_delay)
        self.fake_quant_act_before = quant_config.activation(min_init=-6,
                                                             max_init=6,
                                                             ema=True,
                                                             ema_decay=ema_decay,
                                                             quant_dtype=quant_dtype)
        self.fake_quant_act_after = quant_config.activation(min_init=-6,
                                                            max_init=6,
                                                            ema=True,
                                                            ema_decay=ema_decay,
                                                            quant_dtype=quant_dtype)
        if issubclass(activation.__class__, nn.LeakyReLU):
            self.act = activation
        else:
@@ -1309,11 +1130,8 @@ class HSwishQuant(_QuantActivation):
    Args:
        activation (Cell): Activation cell class.
        ema_decay (float): Exponential Moving Average algorithm parameter. Default: 0.999.
        per_channel (bool):  Quantization granularity based on layer or on channel. Default: False.
        num_bits (int): The bit number of quantization, supporting 4 and 8bits. Default: 8.
        symmetric (bool): Whether the quantization algorithm is symmetric or not. Default: False.
        narrow_range (bool): Whether the quantization algorithm uses narrow range or not. Default: False.
        quant_delay (int): Quantization delay parameters according to the global step. Default: 0.
        quant_config (QuantConfig): Configs the oberser type of weight and activation. Default: quant_config_default.
        quant_dtype (QuantDtype): Specifies the FakeQuant datatype. Default: QuantDtype.INT8.

    Inputs:
        - **x** (Tensor) - The input of HSwishQuant.
@@ -1330,30 +1148,19 @@ class HSwishQuant(_QuantActivation):
    def __init__(self,
                 activation,
                 ema_decay=0.999,
                 per_channel=False,
                 num_bits=8,
                 symmetric=False,
                 narrow_range=False,
                 quant_delay=0):
                 quant_config=quant_config_default,
                 quant_dtype=QuantDtype.INT8):
        super(HSwishQuant, self).__init__()
        self.fake_quant_act_before = FakeQuantWithMinMax(min_init=-6,
                                                         max_init=6,
                                                         ema=True,
                                                         ema_decay=ema_decay,
                                                         per_channel=per_channel,
                                                         num_bits=num_bits,
                                                         symmetric=symmetric,
                                                         narrow_range=narrow_range,
                                                         quant_delay=quant_delay)
        self.fake_quant_act_after = FakeQuantWithMinMax(min_init=-6,
                                                        max_init=6,
                                                        ema=True,
                                                        ema_decay=ema_decay,
                                                        per_channel=per_channel,
                                                        num_bits=num_bits,
                                                        symmetric=symmetric,
                                                        narrow_range=narrow_range,
                                                        quant_delay=quant_delay)
        self.fake_quant_act_before = quant_config.activation(min_init=-6,
                                                             max_init=6,
                                                             ema=True,
                                                             ema_decay=ema_decay,
                                                             quant_dtype=quant_dtype)
        self.fake_quant_act_after = quant_config.activation(min_init=-6,
                                                            max_init=6,
                                                            ema=True,
                                                            ema_decay=ema_decay,
                                                            quant_dtype=quant_dtype)
        if issubclass(activation.__class__, nn.HSwish):
            self.act = activation
        else:
@@ -1378,11 +1185,8 @@ class HSigmoidQuant(_QuantActivation):
    Args:
        activation (Cell): Activation cell class.
        ema_decay (float): Exponential Moving Average algorithm parameter. Default: 0.999.
        per_channel (bool):  Quantization granularity based on layer or on channel. Default: False.
        num_bits (int): The bit number of quantization, supporting 4 and 8bits. Default: 8.
        symmetric (bool): Whether the quantization algorithm is symmetric or not. Default: False.
        narrow_range (bool): Whether the quantization algorithm uses narrow range or not. Default: False.
        quant_delay (int): Quantization delay parameters according to the global step. Default: 0.
        quant_config (QuantConfig): Configs the oberser type of weight and activation. Default: quant_config_default.
        quant_dtype (QuantDtype): Specifies the FakeQuant datatype. Default: QuantDtype.INT8.

    Inputs:
        - **x** (Tensor) - The input of HSigmoidQuant.
@@ -1399,30 +1203,19 @@ class HSigmoidQuant(_QuantActivation):
    def __init__(self,
                 activation,
                 ema_decay=0.999,
                 per_channel=False,
                 num_bits=8,
                 symmetric=False,
                 narrow_range=False,
                 quant_delay=0):
                 quant_config=quant_config_default,
                 quant_dtype=QuantDtype.INT8):
        super(HSigmoidQuant, self).__init__()
        self.fake_quant_act_before = FakeQuantWithMinMax(min_init=-6,
                                                         max_init=6,
                                                         ema=True,
                                                         ema_decay=ema_decay,
                                                         per_channel=per_channel,
                                                         num_bits=num_bits,
                                                         symmetric=symmetric,
                                                         narrow_range=narrow_range,
                                                         quant_delay=quant_delay)
        self.fake_quant_act_after = FakeQuantWithMinMax(min_init=-6,
                                                        max_init=6,
                                                        ema=True,
                                                        ema_decay=ema_decay,
                                                        per_channel=per_channel,
                                                        num_bits=num_bits,
                                                        symmetric=symmetric,
                                                        narrow_range=narrow_range,
                                                        quant_delay=quant_delay)
        self.fake_quant_act_before = quant_config.activation(min_init=-6,
                                                             max_init=6,
                                                             ema=True,
                                                             ema_decay=ema_decay,
                                                             quant_dtype=quant_dtype)
        self.fake_quant_act_after = quant_config.activation(min_init=-6,
                                                            max_init=6,
                                                            ema=True,
                                                            ema_decay=ema_decay,
                                                            quant_dtype=quant_dtype)
        if issubclass(activation.__class__, nn.HSigmoid):
            self.act = activation
        else:
@@ -1446,11 +1239,8 @@ class TensorAddQuant(Cell):

    Args:
        ema_decay (float): Exponential Moving Average algorithm parameter. Default: 0.999.
        per_channel (bool):  Quantization granularity based on layer or on channel. Default: False.
        num_bits (int): The bit number of quantization, supporting 4 and 8bits. Default: 8.
        symmetric (bool): The quantization algorithm is symmetric or not. Default: False.
        narrow_range (bool): The quantization algorithm uses narrow range or not. Default: False.
        quant_delay (int): Quantization delay parameters according to the global step. Default: 0.
        quant_config (QuantConfig): Configs the oberser type of weight and activation. Default: quant_config_default.
        quant_dtype (QuantDtype): Specifies the FakeQuant datatype. Default: QuantDtype.INT8.

    Inputs:
        - **x** (Tensor) - The input of TensorAddQuant.
@@ -1467,21 +1257,14 @@ class TensorAddQuant(Cell):

    def __init__(self,
                 ema_decay=0.999,
                 per_channel=False,
                 num_bits=8,
                 symmetric=False,
                 narrow_range=False,
                 quant_delay=0):
                 quant_config=quant_config_default,
                 quant_dtype=QuantDtype.INT8):
        super(TensorAddQuant, self).__init__()
        self.fake_quant_act = FakeQuantWithMinMax(min_init=-6,
                                                  max_init=6,
                                                  ema=True,
                                                  ema_decay=ema_decay,
                                                  per_channel=per_channel,
                                                  num_bits=num_bits,
                                                  symmetric=symmetric,
                                                  narrow_range=narrow_range,
                                                  quant_delay=quant_delay)
        self.fake_quant_act = quant_config.activation(min_init=-6,
                                                      max_init=6,
                                                      ema=True,
                                                      ema_decay=ema_decay,
                                                      quant_dtype=quant_dtype)
        self.add = P.TensorAdd()

    def construct(self, x1, x2):
@@ -1498,11 +1281,8 @@ class MulQuant(Cell):

    Args:
        ema_decay (float): Exponential Moving Average algorithm parameter. Default: 0.999.
        per_channel (bool):  Quantization granularity based on layer or on channel. Default: False.
        num_bits (int): The bit number of quantization, supporting 4 and 8bits. Default: 8.
        symmetric (bool): The quantization algorithm is symmetric or not. Default: False.
        narrow_range (bool): The quantization algorithm uses narrow range or not. Default: False.
        quant_delay (int): Quantization delay parameters according to the global step. Default: 0.
        quant_config (QuantConfig): Configs the oberser type of weight and activation. Default: quant_config_default.
        quant_dtype (QuantDtype): Specifies the FakeQuant datatype. Default: QuantDtype.INT8.

    Inputs:
        - **x** (Tensor) - The input of MulQuant.
@@ -1510,25 +1290,23 @@ class MulQuant(Cell):
    Outputs:
        Tensor, with the same type and shape as the `x`.

    Examples:
        >>> mul_quant = nn.MulQuant()
        >>> input_x = Tensor(np.array([[1, 2, 1], [-2, 0, -1]]), mindspore.float32)
        >>> input_y = Tensor(np.random.randint(-2, 2, (2, 3)), mindspore.float32)
        >>> result = mul_quant(input_x, input_y)
    """

    def __init__(self,
                 ema_decay=0.999,
                 per_channel=False,
                 num_bits=8,
                 symmetric=False,
                 narrow_range=False,
                 quant_delay=0):
                 quant_config=quant_config_default,
                 quant_dtype=QuantDtype.INT8):
        super(MulQuant, self).__init__()
        self.fake_quant_act = FakeQuantWithMinMax(min_init=-6,
                                                  max_init=6,
                                                  ema=True,
                                                  ema_decay=ema_decay,
                                                  per_channel=per_channel,
                                                  num_bits=num_bits,
                                                  symmetric=symmetric,
                                                  narrow_range=narrow_range,
                                                  quant_delay=quant_delay)
        self.fake_quant_act = quant_config.activation(min_init=-6,
                                                      max_init=6,
                                                      ema=True,
                                                      ema_decay=ema_decay,
                                                      quant_dtype=quant_dtype)
        self.mul = P.Mul()

    def construct(self, x1, x2):
--- a/mindspore/train/quant/quant.py
+++ b/mindspore/train/quant/quant.py
@@ -27,6 +27,7 @@ from ...common import Tensor
 from ...common import dtype as mstype
 from ...common.api import _executor
 from ...nn.layer import quant
 from ...compression.common import QuantDtype
 from ...ops import functional as F
 from ...ops import operations as P
 from ...ops.operations import _inner_ops as inner
@@ -41,6 +42,46 @@ _ACTIVATION_MAP = {nn.ReLU: quant.ActQuant,
                   nn.HSwish: quant.HSwishQuant}


 def get_quant_config(quant_observer=(quant.FakeQuantWithMinMaxObserver, quant.FakeQuantWithMinMaxObserver),
                     quant_delay=(0, 0),
                     quant_dtype=(QuantDtype.INT8, QuantDtype.INT8),
                     per_channel=(False, False),
                     symmetric=(False, False),
                     narrow_range=(False, False)
                     ):
    r"""
    Configs the oberser type of weights and data flow with quant params.

    Args:
        quant_observer (Observer, list or tuple): The oberser type to do quantization. The first element represent
            weights and second element represent data flow.
            Default: (quant.FakeQuantWithMinMaxObserver, quant.FakeQuantWithMinMaxObserver)
        quant_delay (int, list or tuple): Number of steps after which weights and activations are quantized during
            eval. The first element represent weights and second element represent data flow. Default: (0, 0)
        quant_dtype (QuantDtype, list or tuple): Datatype to use for quantize weights and activations. The first
            element represent weights and second element represent data flow.
            Default: (QuantDtype.INT8, QuantDtype.INT8)
        per_channel (bool, list or tuple):  Quantization granularity based on layer or on channel. If `True`
            then base on per channel otherwise base on per layer. The first element represent weights
            and second element represent data flow. Default: (False, False)
        symmetric (bool, list or tuple): Whether the quantization algorithm is symmetric or not. If `True` then base on
            symmetric otherwise base on asymmetric. The first element represent weights and second
            element represent data flow. Default: (False, False)
        narrow_range (bool, list or tuple): Whether the quantization algorithm uses narrow range or not.
            The first element represents weights and the second element represents data flow. Default: (False, False)

    Returns:
        QuantConfig, Contains the oberser type of weight and activation.
    """
    weight_observer = quant_observer[0].partial_init(quant_delay=quant_delay[0], quant_dtype=quant_dtype[0],
                                                     per_channel=per_channel[0], symmetric=symmetric[0],
                                                     narrow_range=narrow_range[0])
    act_observer = quant_observer[0].partial_init(quant_delay=quant_delay[-1], quant_dtype=quant_dtype[-1],
                                                  per_channel=per_channel[-1], symmetric=symmetric[-1],
                                                  narrow_range=narrow_range[-1])
    return quant.QuantConfig(weight=weight_observer, activation=act_observer)


 class _AddFakeQuantInput(nn.Cell):
    """
    Add FakeQuant OP at input of the network. Only support one input case.
@@ -48,7 +89,8 @@ class _AddFakeQuantInput(nn.Cell):

    def __init__(self, network, quant_delay=0):
        super(_AddFakeQuantInput, self).__init__(auto_prefix=False)
        self.fake_quant_input = quant.FakeQuantWithMinMax(min_init=-6, max_init=6, quant_delay=quant_delay, ema=True)
        self.fake_quant_input = quant.FakeQuantWithMinMaxObserver(min_init=-6, max_init=6,
                                                                  quant_delay=quant_delay, ema=True)
        self.fake_quant_input.update_parameters_name('fake_quant_input.')
        self.network = network

@@ -66,14 +108,14 @@ class _AddFakeQuantAfterSubCell(nn.Cell):
    def __init__(self, subcell, **kwargs):
        super(_AddFakeQuantAfterSubCell, self).__init__(auto_prefix=False)
        self.subcell = subcell
        self.fake_quant_act = quant.FakeQuantWithMinMax(min_init=-6,
                                                        max_init=6,
                                                        ema=True,
                                                        num_bits=kwargs["num_bits"],
                                                        quant_delay=kwargs["quant_delay"],
                                                        per_channel=kwargs["per_channel"],
                                                        symmetric=kwargs["symmetric"],
                                                        narrow_range=kwargs["narrow_range"])
        self.fake_quant_act = quant.FakeQuantWithMinMaxObserver(min_init=-6,
                                                                max_init=6,
                                                                ema=True,
                                                                quant_dtype=kwargs["quant_dtype"],
                                                                quant_delay=kwargs["quant_delay"],
                                                                per_channel=kwargs["per_channel"],
                                                                symmetric=kwargs["symmetric"],
                                                                narrow_range=kwargs["narrow_range"])

    def construct(self, *data):
        output = self.subcell(*data)
@@ -93,8 +135,8 @@ class ConvertToQuantNetwork:
        self.act_qdelay = Validator.check_int(kwargs["quant_delay"][-1], 0, Rel.GE, "quant delay")
        self.bn_fold = Validator.check_bool(kwargs["bn_fold"], "bn fold")
        self.freeze_bn = Validator.check_non_negative_int(kwargs["freeze_bn"], "freeze bn")
        self.weight_bits = Validator.check_non_negative_int(kwargs["num_bits"][0], "weights bit")
        self.act_bits = Validator.check_int(kwargs["num_bits"][-1], 0, Rel.GE, "activations bit")
        self.weight_dtype = Validator.check_isinstance("weights dtype", kwargs["quant_dtype"][0], QuantDtype)
        self.act_dtype = Validator.check_isinstance("activations dtype", kwargs["quant_dtype"][-1], QuantDtype)
        self.weight_channel = Validator.check_bool(kwargs["per_channel"][0], "per channel")
        self.act_channel = Validator.check_bool(kwargs["per_channel"][-1], "per channel")
        self.weight_symmetric = Validator.check_bool(kwargs["symmetric"][0], "symmetric")
@@ -103,6 +145,11 @@ class ConvertToQuantNetwork:
        self.act_range = Validator.check_bool(kwargs["narrow_range"][-1], "narrow range")
        self._convert_method_map = {quant.Conv2dBnAct: self._convert_conv,
                                    quant.DenseBnAct: self._convert_dense}
        self.quant_config = get_quant_config(quant_delay=kwargs["quant_delay"],
                                             quant_dtype=kwargs["quant_dtype"],
                                             per_channel=kwargs["per_channel"],
                                             symmetric=kwargs["symmetric"],
                                             narrow_range=kwargs["narrow_range"])

    def _convert_op_name(self, name):
        pattern = re.compile(r'([A-Z]{1})')
@@ -149,7 +196,7 @@ class ConvertToQuantNetwork:
        for name, prim_op in add_list:
            prefix = name
            add_quant = _AddFakeQuantAfterSubCell(prim_op,
                                                  num_bits=self.act_bits,
                                                  quant_dtype=self.act_dtype,
                                                  quant_delay=self.act_qdelay,
                                                  per_channel=self.act_channel,
                                                  symmetric=self.act_symmetric,
@@ -180,15 +227,12 @@ class ConvertToQuantNetwork:
                                                     group=conv_inner.group,
                                                     eps=bn_inner.eps,
                                                     momentum=bn_inner.momentum,
                                                     quant_delay=self.weight_qdelay,
                                                     freeze_bn=self.freeze_bn,
                                                     per_channel=self.weight_channel,
                                                     num_bits=self.weight_bits,
                                                     fake=True,
                                                     symmetric=self.weight_symmetric,
                                                     narrow_range=self.weight_range,
                                                     has_bias=conv_inner.has_bias,
                                                     bias_init=conv_inner.bias_init)
                                                     bias_init=conv_inner.bias_init,
                                                     freeze_bn=self.freeze_bn,
                                                     quant_config=self.quant_config,
                                                     quant_dtype=self.weight_dtype,
                                                     fake=True)
                # change original network BatchNormal OP parameters to quant network
                conv_inner.gamma = subcell.batchnorm.gamma
                conv_inner.beta = subcell.batchnorm.beta
@@ -209,13 +253,10 @@ class ConvertToQuantNetwork:
                                                            group=conv_inner.group,
                                                            eps=bn_inner.eps,
                                                            momentum=bn_inner.momentum,
                                                            quant_delay=self.weight_qdelay,
                                                            per_channel=self.weight_channel,
                                                            num_bits=self.weight_bits,
                                                            symmetric=self.weight_symmetric,
                                                            narrow_range=self.weight_range,
                                                            has_bias=conv_inner.has_bias,
                                                            bias_init=conv_inner.bias_init)
                                                            bias_init=conv_inner.bias_init,
                                                            quant_config=self.quant_config,
                                                            quant_dtype=self.weight_dtype)
                # change original network BatchNormal OP parameters to quant network
                conv_inner.batchnorm.gamma = subcell.batchnorm.gamma
                conv_inner.batchnorm.beta = subcell.batchnorm.beta
@@ -234,11 +275,8 @@ class ConvertToQuantNetwork:
                                           dilation=conv_inner.dilation,
                                           group=conv_inner.group,
                                           has_bias=conv_inner.has_bias,
                                           quant_delay=self.weight_qdelay,
                                           per_channel=self.weight_channel,
                                           num_bits=self.weight_bits,
                                           symmetric=self.weight_symmetric,
                                           narrow_range=self.weight_range)
                                           quant_config=self.quant_config,
                                           quant_dtype=self.weight_dtype)
        # change original network Conv2D OP parameters to quant network
        conv_inner.weight = subcell.conv.weight
        if subcell.conv.has_bias:
@@ -249,7 +287,7 @@ class ConvertToQuantNetwork:
        elif subcell.after_fake:
            subcell.has_act = True
            subcell.activation = _AddFakeQuantAfterSubCell(F.identity,
                                                           num_bits=self.act_bits,
                                                           quant_dtype=self.act_dtype,
                                                           quant_delay=self.act_qdelay,
                                                           per_channel=self.act_channel,
                                                           symmetric=self.act_symmetric,
@@ -264,11 +302,8 @@ class ConvertToQuantNetwork:
        dense_inner = quant.DenseQuant(dense_inner.in_channels,
                                       dense_inner.out_channels,
                                       has_bias=dense_inner.has_bias,
                                       num_bits=self.weight_bits,
                                       quant_delay=self.weight_qdelay,
                                       per_channel=self.weight_channel,
                                       symmetric=self.weight_symmetric,
                                       narrow_range=self.weight_range)
                                       quant_config=self.quant_config,
                                       quant_dtype=self.weight_dtype)
        # change original network Dense OP parameters to quant network
        dense_inner.weight = subcell.dense.weight
        if subcell.dense.has_bias:
@@ -279,7 +314,7 @@ class ConvertToQuantNetwork:
        elif subcell.after_fake:
            subcell.has_act = True
            subcell.activation = _AddFakeQuantAfterSubCell(F.identity,
                                                           num_bits=self.act_bits,
                                                           quant_dtype=self.act_dtype,
                                                           quant_delay=self.act_qdelay,
                                                           per_channel=self.act_channel,
                                                           symmetric=self.act_symmetric,
@@ -291,11 +326,8 @@ class ConvertToQuantNetwork:
        if act_class not in _ACTIVATION_MAP:
            raise ValueError("Unsupported activation in auto quant: ", act_class)
        return _ACTIVATION_MAP[act_class](activation=activation,
                                          num_bits=self.act_bits,
                                          quant_delay=self.act_qdelay,
                                          per_channel=self.act_channel,
                                          symmetric=self.act_symmetric,
                                          narrow_range=self.act_range)
                                          quant_config=self.quant_config,
                                          quant_dtype=self.act_dtype)


 class ExportToQuantInferNetwork:
@@ -523,7 +555,7 @@ def convert_quant_network(network,
                          bn_fold=True,
                          freeze_bn=10000000,
                          quant_delay=(0, 0),
                          num_bits=(8, 8),
                          quant_dtype=(QuantDtype.INT8, QuantDtype.INT8),
                          per_channel=(False, False),
                          symmetric=(False, False),
                          narrow_range=(False, False)
@@ -537,8 +569,9 @@ def convert_quant_network(network,
        freeze_bn (int): Number of steps after which BatchNorm OP parameters used total mean and variance. Default: 1e7.
        quant_delay (int, list or tuple): Number of steps after which weights and activations are quantized during
            eval. The first element represent weights and second element represent data flow. Default: (0, 0)
        num_bits (int, list or tuple): Number of bits to use for quantize weights and activations. The first
            element represent weights and second element represent data flow. Default: (8, 8)
        quant_dtype (QuantDtype, list or tuple): Datatype to use for quantize weights and activations. The first
            element represent weights and second element represent data flow.
            Default: (QuantDtype.INT8, QuantDtype.INT8)
        per_channel (bool, list or tuple):  Quantization granularity based on layer or on channel. If `True`
            then base on per channel otherwise base on per layer. The first element represent weights
            and second element represent data flow. Default: (False, False)
@@ -561,7 +594,7 @@ def convert_quant_network(network,
        return value

    quant_delay = convert2list("quant delay", quant_delay)
    num_bits = convert2list("num bits", num_bits)
    quant_dtype = convert2list("quant dtype", quant_dtype)
    per_channel = convert2list("per channel", per_channel)
    symmetric = convert2list("symmetric", symmetric)
    narrow_range = convert2list("narrow range", narrow_range)
@@ -573,7 +606,7 @@ def convert_quant_network(network,
                                quant_delay=quant_delay,
                                bn_fold=bn_fold,
                                freeze_bn=freeze_bn,
                                num_bits=num_bits,
                                quant_dtype=quant_dtype,
                                per_channel=per_channel,
                                symmetric=symmetric,
                                narrow_range=narrow_range)
--- a/model_zoo/official/cv/resnet50_quant/models/resnet_quant_manual.py
+++ b/model_zoo/official/cv/resnet50_quant/models/resnet_quant_manual.py
@@ -17,12 +17,14 @@ import numpy as np
 import mindspore.nn as nn
 from mindspore.ops import operations as P
 from mindspore import Tensor
 from mindspore.nn import FakeQuantWithMinMax, Conv2dBnFoldQuant as Conv2dBatchNormQuant
 from mindspore.nn import FakeQuantWithMinMaxObserver, Conv2dBnFoldQuant as Conv2dBatchNormQuant
 from mindspore.train.quant import quant

 _ema_decay = 0.999
 _symmetric = True
 _fake = True
 _per_channel = True
 _quant_config = quant.get_quant_config(per_channel=(_per_channel, False), symmetric=(_symmetric, False))


 def _weight_variable(shape, factor=0.01):
@@ -89,7 +91,7 @@ class ConvBNReLU(nn.Cell):
        super(ConvBNReLU, self).__init__()
        padding = (kernel_size - 1) // 2
        conv = Conv2dBatchNormQuant(in_planes, out_planes, kernel_size, stride, pad_mode='pad', padding=padding,
                                    group=groups, fake=_fake, per_channel=_per_channel, symmetric=_symmetric)
                                    group=groups, fake=_fake, quant_config=_quant_config)
        layers = [conv, nn.ActQuant(nn.ReLU())] if _fake else [conv, nn.ReLU()]
        self.features = nn.SequentialCell(layers)

@@ -124,13 +126,12 @@ class ResidualBlock(nn.Cell):
        channel = out_channel // self.expansion
        self.conv1 = ConvBNReLU(in_channel, channel, kernel_size=1, stride=1)
        self.conv2 = ConvBNReLU(channel, channel, kernel_size=3, stride=stride)
        self.conv3 = nn.SequentialCell([Conv2dBatchNormQuant(channel, out_channel, fake=_fake, per_channel=_per_channel,
                                                             symmetric=_symmetric,
        self.conv3 = nn.SequentialCell([Conv2dBatchNormQuant(channel, out_channel, fake=_fake,
                                                             quant_config=_quant_config,
                                                             kernel_size=1, stride=1, pad_mode='same', padding=0),
                                        FakeQuantWithMinMax(ema=True, ema_decay=_ema_decay, symmetric=False)
                                        FakeQuantWithMinMaxObserver(ema=True, ema_decay=_ema_decay, symmetric=False)
                                        ]) if _fake else Conv2dBatchNormQuant(channel, out_channel, fake=_fake,
                                                                              per_channel=_per_channel,
                                                                              symmetric=_symmetric,
                                                                              quant_config=_quant_config,
                                                                              kernel_size=1, stride=1,
                                                                              pad_mode='same', padding=0)

@@ -142,16 +143,15 @@ class ResidualBlock(nn.Cell):

        if self.down_sample:
            self.down_sample_layer = nn.SequentialCell([Conv2dBatchNormQuant(in_channel, out_channel,
                                                                             per_channel=_per_channel,
                                                                             symmetric=_symmetric,
                                                                             quant_config=_quant_config,
                                                                             kernel_size=1, stride=stride,
                                                                             pad_mode='same', padding=0),
                                                        FakeQuantWithMinMax(ema=True, ema_decay=_ema_decay,
                                                                            symmetric=False)
                                                        FakeQuantWithMinMaxObserver(ema=True, ema_decay=_ema_decay,
                                                                                    symmetric=False)
                                                        ]) if _fake else Conv2dBatchNormQuant(in_channel, out_channel,
                                                                                              fake=_fake,
                                                                                              per_channel=_per_channel,
                                                                                              symmetric=_symmetric,
                                                                                              quant_config=\
                                                                                                  _quant_config,
                                                                                              kernel_size=1,
                                                                                              stride=stride,
                                                                                              pad_mode='same',
@@ -235,9 +235,8 @@ class ResNet(nn.Cell):

        self.mean = P.ReduceMean(keep_dims=True)
        self.flatten = nn.Flatten()
        self.end_point = nn.DenseQuant(out_channels[3], num_classes, has_bias=True, per_channel=_per_channel,
                                       symmetric=_symmetric)
        self.output_fake = nn.FakeQuantWithMinMax(ema=True, ema_decay=_ema_decay)
        self.end_point = nn.DenseQuant(out_channels[3], num_classes, has_bias=True, quant_config=_quant_config)
        self.output_fake = nn.FakeQuantWithMinMaxObserver(ema=True, ema_decay=_ema_decay)

    def _make_layer(self, block, layer_num, in_channel, out_channel, stride):
        """
--- a/tests/st/quantization/resnet50_quant/resnet_quant_manual.py
+++ b/tests/st/quantization/resnet50_quant/resnet_quant_manual.py
@@ -13,20 +13,19 @@
 # limitations under the License.
 # ============================================================================
 """ResNet."""

 import numpy as np

 import mindspore.nn as nn
 import mindspore.common.initializer as weight_init
 from mindspore import Tensor
 from mindspore.ops import operations as P
 from mindspore.nn import FakeQuantWithMinMax, Conv2dBnFoldQuant as Conv2dBatchNormQuant

 from mindspore import Tensor
 from mindspore.nn import FakeQuantWithMinMaxObserver, Conv2dBnFoldQuant as Conv2dBatchNormQuant
 from mindspore.train.quant import quant

 _ema_decay = 0.999
 _symmetric = True
 _fake = True
 _per_channel = True
 _quant_config = quant.get_quant_config(per_channel=(_per_channel, False), symmetric=(_symmetric, False))


 def _weight_variable(shape, factor=0.01):
@@ -93,7 +92,7 @@ class ConvBNReLU(nn.Cell):
        super(ConvBNReLU, self).__init__()
        padding = (kernel_size - 1) // 2
        conv = Conv2dBatchNormQuant(in_planes, out_planes, kernel_size, stride, pad_mode='pad', padding=padding,
                                    group=groups, fake=_fake, per_channel=_per_channel, symmetric=_symmetric)
                                    group=groups, fake=_fake, quant_config=_quant_config)
        layers = [conv, nn.ActQuant(nn.ReLU())] if _fake else [conv, nn.ReLU()]
        self.features = nn.SequentialCell(layers)

@@ -128,14 +127,12 @@ class ResidualBlock(nn.Cell):
        channel = out_channel // self.expansion
        self.conv1 = ConvBNReLU(in_channel, channel, kernel_size=1, stride=1)
        self.conv2 = ConvBNReLU(channel, channel, kernel_size=3, stride=stride)
        self.conv3 = nn.SequentialCell([Conv2dBatchNormQuant(channel, out_channel, fake=_fake, per_channel=_per_channel,
                                                             symmetric=_symmetric,
        self.conv3 = nn.SequentialCell([Conv2dBatchNormQuant(channel, out_channel, fake=_fake,
                                                             quant_config=_quant_config,
                                                             kernel_size=1, stride=1, pad_mode='same', padding=0),
                                        FakeQuantWithMinMax(
                                            ema=True, ema_decay=_ema_decay, symmetric=False)
                                        FakeQuantWithMinMaxObserver(ema=True, ema_decay=_ema_decay, symmetric=False)
                                        ]) if _fake else Conv2dBatchNormQuant(channel, out_channel, fake=_fake,
                                                                              per_channel=_per_channel,
                                                                              symmetric=_symmetric,
                                                                              quant_config=_quant_config,
                                                                              kernel_size=1, stride=1,
                                                                              pad_mode='same', padding=0)

@@ -147,16 +144,15 @@ class ResidualBlock(nn.Cell):

        if self.down_sample:
            self.down_sample_layer = nn.SequentialCell([Conv2dBatchNormQuant(in_channel, out_channel,
                                                                             per_channel=_per_channel,
                                                                             symmetric=_symmetric,
                                                                             quant_config=_quant_config,
                                                                             kernel_size=1, stride=stride,
                                                                             pad_mode='same', padding=0),
                                                        FakeQuantWithMinMax(ema=True, ema_decay=_ema_decay,
                                                                            symmetric=False)
                                                        FakeQuantWithMinMaxObserver(ema=True, ema_decay=_ema_decay,
                                                                                    symmetric=False)
                                                        ]) if _fake else Conv2dBatchNormQuant(in_channel, out_channel,
                                                                                              fake=_fake,
                                                                                              per_channel=_per_channel,
                                                                                              symmetric=_symmetric,
                                                                                              quant_config=\
                                                                                                  _quant_config,
                                                                                              kernel_size=1,
                                                                                              stride=stride,
                                                                                              pad_mode='same',
@@ -212,8 +208,7 @@ class ResNet(nn.Cell):
        super(ResNet, self).__init__()

        if not len(layer_nums) == len(in_channels) == len(out_channels) == 4:
            raise ValueError(
                "the length of layer_num, in_channels, out_channels list must be 4!")
            raise ValueError("the length of layer_num, in_channels, out_channels list must be 4!")

        self.conv1 = ConvBNReLU(3, 64, kernel_size=7, stride=2)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, pad_mode="same")
@@ -241,10 +236,8 @@ class ResNet(nn.Cell):

        self.mean = P.ReduceMean(keep_dims=True)
        self.flatten = nn.Flatten()
        self.end_point = nn.DenseQuant(out_channels[3], num_classes, has_bias=True, per_channel=_per_channel,
                                       symmetric=_symmetric)
        self.output_fake = nn.FakeQuantWithMinMax(
            ema=True, ema_decay=_ema_decay)
        self.end_point = nn.DenseQuant(out_channels[3], num_classes, has_bias=True, quant_config=_quant_config)
        self.output_fake = nn.FakeQuantWithMinMaxObserver(ema=True, ema_decay=_ema_decay)

        # init weights
        self._initialize_weights()