api_modify

4 years ago · 9626c3a1ae
--- a/mindspore/compression/common/constant.py
+++ b/mindspore/compression/common/constant.py
@@ -107,10 +107,13 @@ class QuantDtype(enum.Enum):
        Get the num bits of the QuantDtype member.

        Returns:
            int, the num bits of the QuantDtype member
            int, the num bits of the QuantDtype member.

        Examples:
            >>> from mindspore.compression.common import QuantDtype
            >>> quant_dtype = QuantDtype.INT8
            >>> num_bits = quant_dtype.num_bits
            >>> print(num_bits)
            8
        """
        return self._value
--- a/mindspore/compression/quant/qat.py
+++ b/mindspore/compression/quant/qat.py
@@ -49,25 +49,31 @@ def create_quant_config(quant_observer=(nn.FakeQuantWithMinMaxObserver, nn.FakeQ
    Args:
        quant_observer (Union[Observer, list, tuple]): The observer type to do quantization. The first element
            represents weights and second element represents data flow.
            Default: (nn.FakeQuantWithMinMaxObserver, nn.FakeQuantWithMinMaxObserver)
        quant_delay (Union[int, list, tuple]): Number of steps after which weights and activations are quantized during
            eval. The first element represents weights and second element represents data flow. Default: (0, 0)
            Default: (nn.FakeQuantWithMinMaxObserver, nn.FakeQuantWithMinMaxObserver).
        quant_delay (Union[int, list, tuple]): Number of steps after which weights and activations are quantized
            during train and eval. The first element represents weights and second element represents data flow.
            Default: (0, 0).
        quant_dtype (Union[QuantDtype, list, tuple]): Datatype to use for quantize weights and activations. The first
            element represents weights and second element represents data flow.
            Default: (QuantDtype.INT8, QuantDtype.INT8)
            Default: (QuantDtype.INT8, QuantDtype.INT8).
        per_channel (Union[bool, list, tuple]):  Quantization granularity based on layer or on channel. If `True`
            then base on per channel otherwise base on per layer. The first element represents weights
            and second element represents data flow, and second element must be `False` now. Default: (False, False)
            and second element represents data flow, and second element must be `False` now.
            Default: (False, False).
        symmetric (Union[bool, list, tuple]): Whether the quantization algorithm is symmetric or not. If `True` then
            base on symmetric otherwise base on asymmetric. The first element represents weights and second
            element represents data flow. Default: (False, False)
            element represents data flow. Default: (False, False).
        narrow_range (Union[bool, list, tuple]): Whether the quantization algorithm uses narrow range or not.
            The first element represents weights and the second element represents data flow. Default: (False, False)
            The first element represents weights and the second element represents data flow.
            Default: (False, False).
        mode (String): Optional quantization mode, currently only `DEFAULT`(QAT) and `LEARNED_SCALE` are supported.
            Default: ("DEFAULT")
            Default: ("DEFAULT").

    Returns:
        QuantConfig, Contains the observer type of weight and activation.
        QuantConfig, contains the observer type of weight and activation.

    Raises:
        ValueError: If the second element of `per_channel` is not `False`.
    """
    if per_channel[-1]:
        raise ValueError("Arg 'per_channel' second element must be 'False'.")
@@ -136,31 +142,47 @@ class QuantizationAwareTraining(Quantizer):
    Quantizer for quantization aware training.

    Args:
        bn_fold (bool): Flag to used bn fold ops for simulation inference operation. Default: True.
        freeze_bn (int): Number of steps after which BatchNorm OP parameters used total mean and variance. Default: 1e7.
        quant_delay (Union[int, list, tuple]): Number of steps after which weights and activations are quantized during
            eval. The first element represents weights and second element represents data flow. Default: (0, 0)
        bn_fold (bool): Whether to use bn fold ops for simulation inference operation. Default: True.
        freeze_bn (int): Number of steps after which BatchNorm OP parameters fixed to global mean and variance.
            Default: 1e7.
        quant_delay (Union[int, list, tuple]): Number of steps after which weights and activations are quantized
            during train and eval. The first element represents weights and second element represents data flow.
            Default: (0, 0).
        quant_dtype (Union[QuantDtype, list, tuple]): Datatype to use for quantize weights and activations. The first
            element represents weights and second element represents data flow. It is necessary to consider the
            precision support of hardware devices in the practical quantization infer scenario.
            Default: (QuantDtype.INT8, QuantDtype.INT8)
            Default: (QuantDtype.INT8, QuantDtype.INT8).
        per_channel (Union[bool, list, tuple]):  Quantization granularity based on layer or on channel. If `True`
            then base on per channel otherwise base on per layer. The first element represents weights
            and second element represents data flow, and second element must be `False` now. Default: (False, False)
            then base on per channel otherwise base on per layer. The first element represents weights and second
            element represents data flow, and second element must be `False` now. Default: (False, False).
        symmetric (Union[bool, list, tuple]): Whether the quantization algorithm is symmetric or not. If `True` then
            base on symmetric otherwise base on asymmetric. The first element represents weights and second
            element represents data flow. Default: (False, False)
            element represents data flow. Default: (False, False).
        narrow_range (Union[bool, list, tuple]): Whether the quantization algorithm uses narrow range or not.
            The first element represents weights and the second element represents data flow. Default: (False, False)
        optimize_option (Union[OptimizeOption, list, tuple]): Specifies the quant algorithm and options, currently only
            support QAT and LEARNED_SCALE (Note that, if both QAT and LEARNED_SCALE are configured, LEARNED_SCALE has
            a higher priority. LEARNED_SCALE currently only work under some constraints, which includes: freeze_bn=0,
            quant_delay=0, symmetric=Ture, narrow_range=True, More specifically, for operators such as ReLu and ReLu6,
            which only have positive values, we add a negative truncation to optimize this scenario, and narrow_range
            will automatically match to False). Default: OptimizeOption.QAT
        one_conv_fold (bool): Flag to used one conv bn fold ops for simulation inference operation. Default: True.
            The first element represents weights and the second element represents data flow.
            Default: (False, False).
        optimize_option (Union[OptimizeOption, list, tuple]): Specifies the quant algorithm and options, currently
            only support `QAT` and `LEARNED_SCALE` (Note that, if both `QAT` and `LEARNED_SCALE` are configured,
            `LEARNED_SCALE` has a higher priority. `LEARNED_SCALE` currently only work under some constraints, which
            includes: freeze_bn=0, quant_delay=0, symmetric=Ture, narrow_range=True, More specifically, for operators
            such as ReLu and ReLu6, which only have positive values, we add a negative truncation to optimize this
            scenario, and narrow_range will automatically match to False). Default: OptimizeOption.QAT.
        one_conv_fold (bool): Whether to use one conv bn fold ops for simulation inference operation. Default: True.

    Raises:
        TypeError: If the element of `quant_delay` or `freeze_bn` is not int.
        TypeError: If `bn_fold`, `one_conv_fold` or the element of `per_channel`, `symmetric`, `narrow_range`
            is not bool.
        TypeError: If the element of `quant_dtype` is not `QuantDtype`.
        ValueError: If the length of `quant_delay`, `quant_dtype`, `per_channel`, `symmetric` or `narrow_range` is
            not less than 2.
        ValueError: If the `optimize_option` is `LEARNED_SCALE` and `freeze_bn` is not equal to 0.
        ValueError: If the `optimize_option` is `LEARNED_SCALE` and `symmetric` is not (True, True).
        ValueError: If the `optimize_option` is `LEARNED_SCALE` and `narrow_range` is not (True, True).
        ValueError: If the `optimize_option` is `LEARNED_SCALE` and `quant_delay` is not (0, 0).

    Examples:
        >>> from mindspore.compression.quant import QuantizationAwareTraining
        >>> class LeNet5(nn.Cell):
        ...     def __init__(self, num_class=10, channel=1):
        ...         super(LeNet5, self).__init__()
@@ -267,15 +289,19 @@ class QuantizationAwareTraining(Quantizer):

    def quantize(self, network):
        """
        Quant API to convert input network to a quantization aware training network
        Quant API to convert input network to a quantization aware training network.

        Note:
            Please refer to the Examples of class: `mindspore.compression.quant.QuantizationAwareTraining`.

        Args:
            network (Cell): network to be quantized.

        Examples:
            >>> net = Net()
            >>> quantizer = QuantizationAwareTraining()
            >>> net_qat = quantizer.quantize(net)
        Returns:
            Cell, a quantization aware training network.

        Raises:
            KeyError: If the `device_target` set in context is not in `support_device`.
        """
        support_device = ["Ascend", "GPU"]
        if context.get_context('device_target') not in support_device:
@@ -532,14 +558,17 @@ class QuantizationAwareTraining(Quantizer):
        Set network's quantization strategy, this function is currently only valid for `LEARNED_SCALE`
        optimize_option.

        Inputs:
            network (Cell): input network
            strategy (List): the quantization strategy for layers that need to be quantified (eg. [[8], [8],
            ..., [6], [4], [8]]), currently only the quant_dtype for weights of the dense layer and the
            convolution layer is supported.
        Args:
            network (Cell): Input network.
            strategy (list): The quantization strategy for layers that need to be quantified (eg. [[8], [8],
                ..., [6], [4], [8]]), currently only the quant_dtype for weights of the dense layer and the
                convolution layer is supported.

        Returns:
            Cell, a network with mixed bit strategy configured.

        Outputs:
            network (Cell)
        Raises:
            ValueError: If `OptimizeOption.LEARNED_SCALE` is not in `self.optimize_option`.
        """
        if OptimizeOption.LEARNED_SCALE not in self.optimize_option:
            raise ValueError("The `set_mixed_bits` function is currently only valid for `LEARNED_SCALE` "
--- a/mindspore/compression/quant/quant_utils.py
+++ b/mindspore/compression/quant/quant_utils.py
@@ -350,12 +350,14 @@ def load_nonquant_param_into_quant_net(quant_model, params_dict, quant_new_param
    Load fp32 model parameters into quantization model.

    Args:
        quant_model(Cell): quantization model.
        params_dict(dict): parameter dict that stores fp32 parameters.
        quant_new_params(list): parameters that exist in quantitative network but not in unquantitative network.

    Returns:
        None
        quant_model(Cell): Quantization model.
        params_dict(dict): Parameter dict that stores fp32 parameters.
        quant_new_params(list): Parameters that exist in quantization network but not in non-quantization network.

    Raises:
        TypeError: If `quant_new_params` is not None and is not list.
        ValueError: If there are parameters in the `quant_model` that are neither in `params_dict`
            nor in `quant_new_params`.
    """
    if quant_new_params is not None and not isinstance(quant_new_params, list):
        raise TypeError("quant_new_params must be list or None.")
--- a/mindspore/compression/quant/quantizer.py
+++ b/mindspore/compression/quant/quantizer.py
@@ -24,7 +24,7 @@ __all__ = ["OptimizeOption"]

 class OptimizeOption(Enum):
    r"""
    An enum for the model quantization optimize option, currently only support `QAT`.
    An enum for the model quantization optimize option, currently only support `QAT` and `LEARNED_SCALE`.
    """
    # using quantization aware training
    QAT = "QAT"
--- a/mindspore/nn/layer/quant.py
+++ b/mindspore/nn/layer/quant.py
@@ -345,17 +345,22 @@ class FakeQuantWithMinMaxObserver(UniformQuantObserver):
    Raises:
        TypeError: If `min_init` or `max_init` is not int, float or list.
        TypeError: If `quant_delay` is not an int.
        TypeError: If `min_init` is not less than `max_init`.
        TypeError: If `quant_delay` is not greater than or equal to 0.
        ValueError: If `quant_delay` is less than 0.
        ValueError: If `min_init` is not less than `max_init`.
        ValueError: If `mode` is neither `DEFAULT` nor `LEARNED_SCALE`.
        ValueError: If `mode` is `LEARNED_SCALE` and `symmetric` is not `True`.
        ValueError: If `mode` is `LEARNED_SCALE`, and `narrow_range` is not `True` unless when `neg_trunc` is `True`.

    Supported Platforms:
        ``Ascend`` ``GPU``

    Examples:
        >>> import mindspore
        >>> from mindspore import Tensor
        >>> fake_quant = nn.FakeQuantWithMinMaxObserver()
        >>> input = Tensor(np.array([[1, 2, 1], [-2, 0, -1]]), mindspore.float32)
        >>> output = fake_quant(input)
        >>> print(output)
        >>> input_data = Tensor(np.array([[1, 2, 1], [-2, 0, -1]]), mindspore.float32)
        >>> result = fake_quant(input_data)
        >>> print(result)
        [[ 0.9882355  1.9764705  0.9882355]
         [-1.9764705  0.        -0.9882355]]
    """
@@ -434,7 +439,8 @@ class FakeQuantWithMinMaxObserver(UniformQuantObserver):
                self.fake_quant_infer = quant_fun(training=False)
        elif self.mode == "LEARNED_SCALE":
            if not self.symmetric:
                raise ValueError("The 'LEARNED_SCALE' mode only support symmetric quant, please set symmetric to True.")
                raise ValueError("The 'LEARNED_SCALE' mode only support symmetric quant, "
                                 "please set symmetric to True.")
            if self.neg_trunc:
                min_array = self._get_init_array(0)
                if self.narrow_range:
@@ -555,21 +561,33 @@ class Conv2dBnFoldQuantOneConv(Cell):
    operation folded construct.

    This part is a more detailed overview of Conv2d operation. For more detials about Quantilization,
    please refer to :class:`mindspore.nn.FakeQuantWithMinMaxObserver`.
    please refer to the implementation of subclass of class:`_Observer`, for example,
    class:`mindspore.nn.FakeQuantWithMinMaxObserver`.

    .. math::
        w_{q}=quant(\frac{w}{\sqrt{var_{G}+\epsilon}}*\gamma )

        b=\frac{-\mu _{G} }{\sqrt{var_{G}+\epsilon }}*\gamma +\beta

        y=w_{q}\times x+b

    where :math:`quant` is the continuous execution of quant and dequant, you can refer to the implementation of
    subclass of class:`_Observer`, for example, class:`mindspore.nn.FakeQuantWithMinMaxObserver`.
    `mu _{G}` and `var_{G}` represent the global mean and variance respectively.

    Args:
        in_channels (int): The number of input channel :math:`C_{in}`.
        out_channels (int): The number of output channel :math:`C_{out}`.
        kernel_size (Union[int, tuple]): Specifies the height and width of the 2D convolution window.
        stride (int): Specifies stride for all spatial dimensions with the same value.
        kernel_size (Union[int, tuple[int]]): Specifies the height and width of the 2D convolution window.
        stride (Union[int, tuple[int]]): Specifies stride for all spatial dimensions with the same value. Default: 1.
        pad_mode (str): Specifies padding mode. The optional values are "same", "valid", "pad". Default: "same".
        padding (int): Implicit paddings on both sides of the input. Default: 0.
        padding (Union[int, tuple[int]]): Implicit paddings on both sides of the input. Default: 0.
        eps (float): Parameters for Batch Normalization. Default: 1e-5.
        momentum (float): Parameters for Batch Normalization op. Default: 0.997.
        dilation (int): Specifies the dilation rate to use for dilated convolution. Default: 1.
        dilation (Union[int, tuple[int]]): Specifies the dilation rate to use for dilated convolution. Default: 1.
        group (int): Splits filter into groups, `in_ channels` and `out_channels` must be
            divisible by the number of groups. Default: 1.
        has_bias (bool): Specifies whether the layer uses a bias vector. Default: False.
        has_bias (bool): Specifies whether the layer uses a bias vector, which is temporarily invalid. Default: False.
        weight_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the
            convolution kernel. Default: 'normal'.
        bias_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the
@@ -595,23 +613,29 @@ class Conv2dBnFoldQuantOneConv(Cell):
        Tensor of shape :math:`(N, C_{out}, H_{out}, W_{out})`.

    Raises:
        TypeError: If `in_channels`, `out_channels`, `stride`, `padding` or `dilation` is not an int.
        TypeError: If `has_bias` is not a bool.
        ValueError: If `in_channels` or `out_channels` `stride`, `padding` or `dilation` is less than 1.
        TypeError: If `in_channels`, `out_channels` or `group` is not an int.
        TypeError: If `kernel_size`, `stride`, `padding` or `dilation` is neither an int nor a tuple.
        TypeError: If `has_bias` or `fake` is not a bool.
        TypeError: If `data_format` is not a string.
        ValueError: If `in_channels`, `out_channels`, `kernel_size`, `stride` or `dilation` is less than 1.
        ValueError: If `padding` is less than 0.
        ValueError: If `pad_mode` is not one of 'same', 'valid', 'pad'.

    Supported Platforms:
        ``Ascend`` ``GPU``

    Examples:
        >>> qconfig = compression.quant.create_quant_config()
        >>> conv2d_bnfold = nn.Conv2dBnFoldQuantOneConv(1, 6, kernel_size=(2, 2), stride=(1, 1), pad_mode="valid",
        ...                                             quant_config=qconfig)
        >>> input = Tensor(np.random.randint(-2, 2, (2, 1, 3, 3)), mindspore.float32)
        >>> result = conv2d_bnfold(input)
        >>> output = result.shape
        >>> print(output)
        (2, 6, 2, 2)
        >>> import mindspore
        >>> from mindspore.compression import quant
        >>> from mindspore import Tensor
        >>> qconfig = quant.create_quant_config()
        >>> conv2d_bnfold = nn.Conv2dBnFoldQuantOneConv(1, 1, kernel_size=(2, 2), stride=(1, 1), pad_mode="valid",
        ...                                             weight_init="ones", quant_config=qconfig)
        >>> input_data = Tensor(np.array([[[[1, 0, 3], [1, 4, 7], [2, 5, 2]]]]), mindspore.float32)
        >>> result = conv2d_bnfold(input_data)
        >>> print(result)
        [[[[5.9296875, 13.8359375]
           [11.859375, 17.78125]]]]
    """

    def __init__(self,
@@ -641,14 +665,31 @@ class Conv2dBnFoldQuantOneConv(Cell):
        self.out_channels = Validator.check_positive_int(out_channels)
        self.kernel_size = twice(kernel_size)
        self.stride = twice(stride)
        self.pad_mode = pad_mode
        self.padding = padding
        self.dilation = twice(dilation)
        self.group = group
        for kernel_size_elem in self.kernel_size:
            Validator.check_positive_int(kernel_size_elem, 'kernel_size item', self.cls_name)
        for stride_elem in self.stride:
            Validator.check_positive_int(stride_elem, 'stride item', self.cls_name)
        for dilation_elem in self.dilation:
            Validator.check_positive_int(dilation_elem, 'dilation item', self.cls_name)
        if pad_mode not in ('valid', 'same', 'pad'):
            raise ValueError('Attr \'pad_mode\' of \'Conv2dBnFoldQuant\' Op passed '
                             + str(pad_mode) + ', should be one of values in \'valid\', \'same\', \'pad\'.')
        self.pad_mode = pad_mode
        if isinstance(padding, int):
            Validator.check_non_negative_int(padding, 'padding', self.cls_name)
            self.padding = padding
        elif isinstance(padding, tuple):
            for pad in padding:
                Validator.check_non_negative_int(pad, 'padding item', self.cls_name)
            self.padding = padding
        else:
            raise TypeError("padding type must be int/tuple(int) cannot be {}!".format(type(padding)))
        self.group = Validator.check_positive_int(group)
        self.eps = eps
        self.momentum = 1 - momentum
        self.has_bias = has_bias
        self.fake = fake
        self.fake = Validator.check_bool(fake)
        self.quant_config = quant_config
        self.quant_dtype = quant_dtype
        data_format = 'NCHW'
@@ -757,18 +798,31 @@ class Conv2dBnFoldQuant(Cell):
    2D convolution with Batch Normalization operation folded construct.

    This part is a more detailed overview of Conv2d operation. For more detials about Quantilization,
    please refer to :class:`mindspore.nn.FakeQuantWithMinMaxObserver`.
    please refer to the implementation of subclass of class:`_Observer`, for example,
    class:`mindspore.nn.FakeQuantWithMinMaxObserver`.

    .. math::
        y = x\times w+  b

        w_{q}=quant(\frac{w}{\sqrt{Var[y]+\epsilon}}*\gamma )

        y_{out}= w_{q}\times x+\frac{b-E[y]}{\sqrt{Var[y]+\epsilon}}*\gamma +\beta

    where :math:`quant` is the continuous execution of quant and dequant, you can refer to the implementation of
    subclass of class:`_Observer`, for example, class:`mindspore.nn.FakeQuantWithMinMaxObserver`. Two convolution
    and Batch Normalization operation are used here, the purpose of the first convolution and Batch Normalization
    is to count the mean `E[y]` and variance `Var[y]` of current batch output for quantization.

    Args:
        in_channels (int): The number of input channel :math:`C_{in}`.
        out_channels (int): The number of output channel :math:`C_{out}`.
        kernel_size (Union[int, tuple]): Specifies the height and width of the 2D convolution window.
        stride (int): Specifies stride for all spatial dimensions with the same value.
        kernel_size (Union[int, tuple[int]]): Specifies the height and width of the 2D convolution window.
        stride (Union[int, tuple[int]]): Specifies stride for all spatial dimensions with the same value. Default: 1.
        pad_mode (str): Specifies padding mode. The optional values are "same", "valid", "pad". Default: "same".
        padding (int): Implicit paddings on both sides of the input. Default: 0.
        padding (Union[int, tuple[int]]): Implicit paddings on both sides of the input. Default: 0.
        eps (float): Parameters for Batch Normalization. Default: 1e-5.
        momentum (float): Parameters for Batch Normalization op. Default: 0.997.
        dilation (int): Specifies the dilation rate to use for dilated convolution. Default: 1.
        dilation (Union[int, tuple[int]]): Specifies the dilation rate to use for dilated convolution. Default: 1.
        group (int): Splits filter into groups, `in_ channels` and `out_channels` must be
            divisible by the number of groups. Default: 1.
        has_bias (bool): Specifies whether the layer uses a bias vector. Default: False.
@@ -799,22 +853,29 @@ class Conv2dBnFoldQuant(Cell):
        Tensor of shape :math:`(N, C_{out}, H_{out}, W_{out})`.

    Raises:
        TypeError: If `in_channels`, `out_channels`, `stride`, `padding` or `dilation` is not an int.
        TypeError: If `has_bias` is not a bool.
        ValueError: If `in_channels` or `out_channels` `stride`, `padding` or `dilation` is less than 1.
        TypeError: If `in_channels`, `out_channels` or `group` is not an int.
        TypeError: If `kernel_size`, `stride`, `padding` or `dilation` is neither an int nor a tuple.
        TypeError: If `has_bias` or `fake` is not a bool.
        ValueError: If `in_channels`, `out_channels`, `kernel_size`, `stride` or `dilation` is less than 1.
        ValueError: If `padding` is less than 0.
        ValueError: If `pad_mode` is not one of 'same', 'valid', 'pad'.
        ValueError: If `device_target` in context is neither `Ascend` nor `GPU`.

    Supported Platforms:
        ``Ascend`` ``GPU``

    Examples:
        >>> qconfig = compression.quant.create_quant_config()
        >>> conv2d_bnfold = nn.Conv2dBnFoldQuant(1, 6, kernel_size=(2, 2), stride=(1, 1), pad_mode="valid",
        ...                                      quant_config=qconfig)
        >>> input = Tensor(np.random.randint(-2, 2, (2, 1, 3, 3)), mindspore.float32)
        >>> output = conv2d_bnfold(input)
        >>> print(output.shape)
        (2, 6, 2, 2)
        >>> import mindspore
        >>> from mindspore.compression import quant
        >>> from mindspore import Tensor
        >>> qconfig = quant.create_quant_config()
        >>> conv2d_bnfold = nn.Conv2dBnFoldQuant(1, 1, kernel_size=(2, 2), stride=(1, 1), pad_mode="valid",
        ...                                      weight_init="ones", quant_config=qconfig)
        >>> input_data = Tensor(np.array([[[[1, 0, 3], [1, 4, 7], [2, 5, 2]]]]), mindspore.float32)
        >>> result = conv2d_bnfold(input_data)
        >>> print(result)
        [[[[5.9296875, 13.8359375]
           [11.859375, 17.78125]]]]
    """

    def __init__(self,
@@ -845,15 +906,32 @@ class Conv2dBnFoldQuant(Cell):
        self.out_channels = Validator.check_positive_int(out_channels)
        self.kernel_size = twice(kernel_size)
        self.stride = twice(stride)
        self.pad_mode = pad_mode
        self.padding = padding
        self.dilation = twice(dilation)
        self.group = group
        for kernel_size_elem in self.kernel_size:
            Validator.check_positive_int(kernel_size_elem, 'kernel_size item', self.cls_name)
        for stride_elem in self.stride:
            Validator.check_positive_int(stride_elem, 'stride item', self.cls_name)
        for dilation_elem in self.dilation:
            Validator.check_positive_int(dilation_elem, 'dilation item', self.cls_name)
        if pad_mode not in ('valid', 'same', 'pad'):
            raise ValueError('Attr \'pad_mode\' of \'Conv2dBnFoldQuant\' Op passed '
                             + str(pad_mode) + ', should be one of values in \'valid\', \'same\', \'pad\'.')
        self.pad_mode = pad_mode
        if isinstance(padding, int):
            Validator.check_non_negative_int(padding, 'padding', self.cls_name)
            self.padding = padding
        elif isinstance(padding, tuple):
            for pad in padding:
                Validator.check_non_negative_int(pad, 'padding item', self.cls_name)
            self.padding = padding
        else:
            raise TypeError("padding type must be int/tuple(int) cannot be {}!".format(type(padding)))
        self.group = Validator.check_positive_int(group)
        self.eps = eps
        self.momentum = momentum
        self.has_bias = has_bias
        self.freeze_bn = freeze_bn
        self.fake = fake
        self.fake = Validator.check_bool(fake)
        self.quant_config = quant_config
        self.quant_dtype = quant_dtype
        self.is_gpu = context.get_context('device_target') == "GPU"
@@ -951,16 +1029,25 @@ class Conv2dBnWithoutFoldQuant(Cell):
    2D convolution and batchnorm without fold with fake quantized construct.

    This part is a more detailed overview of Conv2d operation. For more detials about Quantilization,
    please refer to :class:`mindspore.nn.FakeQuantWithMinMaxObserver`.
    please refer to the implementation of subclass of class:`_Observer`, for example,
    class:`mindspore.nn.FakeQuantWithMinMaxObserver`.

    .. math::
        y =x\times quant(w)+  b

        y_{bn} =\frac{y-E[y] }{\sqrt{Var[y]+  \epsilon  } } *\gamma +  \beta

    where :math:`quant` is the continuous execution of quant and dequant, you can refer to the implementation of
    subclass of class:`_Observer`, for example, class:`mindspore.nn.FakeQuantWithMinMaxObserver`.

    Args:
        in_channels (int): The number of input channel :math:`C_{in}`.
        out_channels (int): The number of output channel :math:`C_{out}`.
        kernel_size (Union[int, tuple]): Specifies the height and width of the 2D convolution window.
        stride (int): Specifies stride for all spatial dimensions with the same value. Default: 1.
        kernel_size (Union[int, tuple[int]]): Specifies the height and width of the 2D convolution window.
        stride (Union[int, tuple[int]]): Specifies stride for all spatial dimensions with the same value. Default: 1.
        pad_mode (str): Specifies padding mode. The optional values are "same", "valid", "pad". Default: "same".
        padding (int): Implicit paddings on both sides of the input. Default: 0.
        dilation (int): Specifies the dilation rate to use for dilated convolution. Default: 1.
        padding (Union[int, tuple[int]]): Implicit paddings on both sides of the input. Default: 0.
        dilation (Union[int, tuple[int]]): Specifies the dilation rate to use for dilated convolution. Default: 1.
        group (int): Splits filter into groups, `in_ channels` and `out_channels` must be
            divisible by the number of groups. Default: 1.
        has_bias (bool): Specifies whether the layer uses a bias vector. Default: False.
@@ -983,14 +1070,26 @@ class Conv2dBnWithoutFoldQuant(Cell):
    Supported Platforms:
        ``Ascend`` ``GPU``

    Raises:
        TypeError: If `in_channels`, `out_channels` or `group` is not an int.
        TypeError: If `kernel_size`, `stride`, `padding` or `dilation` is neither an int nor a tuple.
        TypeError: If `has_bias` is not a bool.
        ValueError: If `in_channels`, `out_channels`, `kernel_size`, `stride` or `dilation` is less than 1.
        ValueError: If `padding` is less than 0.
        ValueError: If `pad_mode` is not one of 'same', 'valid', 'pad'.

    Examples:
        >>> qconfig = compression.quant.create_quant_config()
        >>> conv2d_no_bnfold = nn.Conv2dBnWithoutFoldQuant(1, 6, kernel_size=(2, 2), stride=(1, 1), pad_mode="valid",
        ...                                                quant_config=qconfig)
        >>> input = Tensor(np.random.randint(-2, 2, (2, 1, 3, 3)), mstype.float32)
        >>> output = conv2d_no_bnfold(input)
        >>> print(output.shape)
        (2, 6, 2, 2)
        >>> import mindspore
        >>> from mindspore.compression import quant
        >>> from mindspore import Tensor
        >>> qconfig = quant.create_quant_config()
        >>> conv2d_no_bnfold = nn.Conv2dBnWithoutFoldQuant(1, 1, kernel_size=(2, 2), stride=(1, 1), pad_mode="valid",
        ...                                                weight_init='ones', quant_config=qconfig)
        >>> input_data = Tensor(np.array([[[[1, 0, 3], [1, 4, 7], [2, 5, 2]]]]), mindspore.float32)
        >>> result = conv2d_no_bnfold(input_data)
        >>> print(result)
        [[[[5.929658  13.835868]
           [11.859316  17.78116]]]]
    """

    def __init__(self,
@@ -1016,9 +1115,26 @@ class Conv2dBnWithoutFoldQuant(Cell):
        self.kernel_size = twice(kernel_size)
        self.stride = twice(stride)
        self.dilation = twice(dilation)
        for kernel_size_elem in self.kernel_size:
            Validator.check_positive_int(kernel_size_elem, 'kernel_size item', self.cls_name)
        for stride_elem in self.stride:
            Validator.check_positive_int(stride_elem, 'stride item', self.cls_name)
        for dilation_elem in self.dilation:
            Validator.check_positive_int(dilation_elem, 'dilation item', self.cls_name)
        if pad_mode not in ('valid', 'same', 'pad'):
            raise ValueError('Attr \'pad_mode\' of \'Conv2dBnWithoutFoldQuant\' Op passed '
                             + str(pad_mode) + ', should be one of values in \'valid\', \'same\', \'pad\'.')
        self.pad_mode = pad_mode
        self.padding = padding
        self.group = group
        if isinstance(padding, int):
            Validator.check_non_negative_int(padding, 'padding', self.cls_name)
            self.padding = padding
        elif isinstance(padding, tuple):
            for pad in padding:
                Validator.check_non_negative_int(pad, 'padding item', self.cls_name)
            self.padding = padding
        else:
            raise TypeError("padding type must be int/tuple(int) cannot be {}!".format(type(padding)))
        self.group = Validator.check_positive_int(group)

        self.bias_add = P.BiasAdd()
        if Validator.check_bool(has_bias):
@@ -1065,16 +1181,17 @@ class Conv2dQuant(Cell):
    2D convolution with fake quantized operation layer.

    This part is a more detailed overview of Conv2d operation. For more detials about Quantilization,
    please refer to :class:`mindspore.nn.FakeQuantWithMinMaxObserver`.
    please refer to the implementation of subclass of class:`_Observer`, for example,
    class:`mindspore.nn.FakeQuantWithMinMaxObserver`.

    Args:
        in_channels (int): The number of input channel :math:`C_{in}`.
        out_channels (int): The number of output channel :math:`C_{out}`.
        kernel_size (Union[int, tuple]): Specifies the height and width of the 2D convolution window.
        stride (int): Specifies stride for all spatial dimensions with the same value. Default: 1.
        kernel_size (Union[int, tuple[int]]): Specifies the height and width of the 2D convolution window.
        stride (Union[int, tuple[int]]): Specifies stride for all spatial dimensions with the same value. Default: 1.
        pad_mode (str): Specifies padding mode. The optional values are "same", "valid", "pad". Default: "same".
        padding (int): Implicit paddings on both sides of the input. Default: 0.
        dilation (int): Specifies the dilation rate to use for dilated convolution. Default: 1.
        padding (Union[int, tuple[int]]): Implicit paddings on both sides of the input. Default: 0.
        dilation (Union[int, tuple[int]]): Specifies the dilation rate to use for dilated convolution. Default: 1.
        group (int): Splits filter into groups, `in_ channels` and `out_channels` must be
            divisible by the number of groups. Default: 1.
        has_bias (bool): Specifies whether the layer uses a bias vector. Default: False.
@@ -1093,22 +1210,28 @@ class Conv2dQuant(Cell):
        Tensor of shape :math:`(N, C_{out}, H_{out}, W_{out})`.

    Raises:
        TypeError: If `in_channels`, `out_channels`, `stride`, `padding` or `dilation` is not an int.
        TypeError: If `in_channels`, `out_channels` or `group` is not an int.
        TypeError: If `kernel_size`, `stride`, `padding` or `dilation` is neither an int nor a tuple.
        TypeError: If `has_bias` is not a bool.
        ValueError: If `in_channels` or `out_channels` `stride`, `padding` or `dilation` is less than 1.
        ValueError: If `in_channels`, `out_channels`, `kernel_size`, `stride` or `dilation` is less than 1.
        ValueError: If `padding` is less than 0.
        ValueError: If `pad_mode` is not one of 'same', 'valid', 'pad'.

    Supported Platforms:
        ``Ascend`` ``GPU``

    Examples:
        >>> qconfig = compression.quant.create_quant_config()
        >>> conv2d_quant = nn.Conv2dQuant(1, 6, kernel_size= (2, 2), stride=(1, 1), pad_mode="valid",
        ...                               quant_config=qconfig)
        >>> input = Tensor(np.random.randint(-2, 2, (2, 1, 3, 3)), mindspore.float32)
        >>> output = conv2d_quant(input)
        >>> print(output.shape)
        (2, 6, 2, 2)
        >>> import mindspore
        >>> from mindspore.compression import quant
        >>> from mindspore import Tensor
        >>> qconfig = quant.create_quant_config()
        >>> conv2d_quant = nn.Conv2dQuant(1, 1, kernel_size=(2, 2), stride=(1, 1), pad_mode="valid",
        ...                               weight_init='ones', quant_config=qconfig)
        >>> input_data = Tensor(np.array([[[[1, 0, 3], [1, 4, 7], [2, 5, 2]]]]), mindspore.float32)
        >>> result = conv2d_quant(input_data)
        >>> print(result)
        [[[[5.9296875  13.8359375]
           [11.859375  17.78125]]]]
    """

    def __init__(self,
@@ -1132,9 +1255,26 @@ class Conv2dQuant(Cell):
        self.kernel_size = twice(kernel_size)
        self.stride = twice(stride)
        self.dilation = twice(dilation)
        for kernel_size_elem in self.kernel_size:
            Validator.check_positive_int(kernel_size_elem, 'kernel_size item', self.cls_name)
        for stride_elem in self.stride:
            Validator.check_positive_int(stride_elem, 'stride item', self.cls_name)
        for dilation_elem in self.dilation:
            Validator.check_positive_int(dilation_elem, 'dilation item', self.cls_name)
        if pad_mode not in ('valid', 'same', 'pad'):
            raise ValueError('Attr \'pad_mode\' of \'Conv2dQuant\' Op passed '
                             + str(pad_mode) + ', should be one of values in \'valid\', \'same\', \'pad\'.')
        self.pad_mode = pad_mode
        self.padding = padding
        self.group = group
        if isinstance(padding, int):
            Validator.check_non_negative_int(padding, 'padding', self.cls_name)
            self.padding = padding
        elif isinstance(padding, tuple):
            for pad in padding:
                Validator.check_non_negative_int(pad, 'padding item', self.cls_name)
            self.padding = padding
        else:
            raise TypeError("padding type must be int/tuple(int) cannot be {}!".format(type(padding)))
        self.group = Validator.check_positive_int(group)

        weight_shape = [out_channels, in_channels // group, *self.kernel_size]
        self.weight = Parameter(initializer(weight_init, weight_shape), name='weight')
@@ -1180,7 +1320,8 @@ class DenseQuant(Cell):
    The fully connected layer with fake quantized operation.

    This part is a more detailed overview of Dense operation. For more detials about Quantilization,
    please refer to :class:`mindspore.nn.FakeQuantWithMinMaxObserver`.
    please refer to the implementation of subclass of class:`_Observer`, for example,
    class:`mindspore.nn.FakeQuantWithMinMaxObserver`.

    Args:
        in_channels (int): The dimension of the input space.
@@ -1206,19 +1347,27 @@ class DenseQuant(Cell):
    Raises:
        TypeError: If `in_channels`, `out_channels` is not an int.
        TypeError: If `has_bias` is not a bool.
        TypeError: If `activation` is not str, Cell and Primitive.
        ValueError: If `in_channels` or `out_channels` is less than 1.
        ValueError: If the dims of `weight_init` is not equal to 2 or the first element of `weight_init` is not equal
            to `out_channels` or the second element of `weight_init` is not equal to `in_channels`.
        ValueError: If the dims of `bias_init` is not equal to 1 or the element of `bias_init` is not equal
            to `out_channels`.

    Supported Platforms:
        ``Ascend`` ``GPU``

    Examples:
        >>> qconfig = compression.quant.create_quant_config()
        >>> dense_quant = nn.DenseQuant(3, 6, quant_config=qconfig)
        >>> input = Tensor(np.random.randint(-2, 2, (2, 3)), mindspore.float32)
        >>> result = dense_quant(input)
        >>> output = result.shape
        >>> print(output)
        (2, 6)
        >>> import mindspore
        >>> from mindspore.compression import quant
        >>> from mindspore import Tensor
        >>> qconfig = quant.create_quant_config()
        >>> dense_quant = nn.DenseQuant(2, 1, weight_init='ones', quant_config=qconfig)
        >>> input_data = Tensor(np.array([[1, 5], [3, 4]]), mindspore.float32)
        >>> result = dense_quant(input_data)
        >>> print(result)
        [[5.929413]
         [6.9176483]]
    """

    def __init__(self,
@@ -1298,9 +1447,9 @@ class ActQuant(_QuantActivation):
    r"""
    Quantization aware training activation function.

    Add the fake quantized operation to the end of activation operation, by which the output of activation operation
    will be truncated. For more detials about Quantilization,
    please refer to :class:`mindspore.nn.FakeQuantWithMinMaxObserver`.
    Add the fake quantized operation to the end of activation operation, by which the output of activation
    operation will be truncated. For more detials about Quantilization, please refer to the implementation
    of subclass of class:`_Observer`, for example, class:`mindspore.nn.FakeQuantWithMinMaxObserver`.

    Args:
        activation (Cell): Activation cell.
@@ -1326,11 +1475,14 @@ class ActQuant(_QuantActivation):
        ``Ascend`` ``GPU``

    Examples:
        >>> qconfig = compression.quant.create_quant_config()
        >>> import mindspore
        >>> from mindspore.compression import quant
        >>> from mindspore import Tensor
        >>> qconfig = quant.create_quant_config()
        >>> act_quant = nn.ActQuant(nn.ReLU(), quant_config=qconfig)
        >>> input = Tensor(np.array([[1, 2, -1], [-2, 0, -1]]), mindspore.float32)
        >>> output = act_quant(input)
        >>> print(output)
        >>> input_data = Tensor(np.array([[1, 2, -1], [-2, 0, -1]]), mindspore.float32)
        >>> result = act_quant(input_data)
        >>> print(result)
        [[0.9882355 1.9764705 0.       ]
         [0.        0.        0.       ]]
    """
@@ -1373,7 +1525,8 @@ class TensorAddQuant(Cell):
    Adds fake quantized operation after TensorAdd operation.

    This part is a more detailed overview of TensorAdd operation. For more detials about Quantilization,
    please refer to :class:`mindspore.nn.FakeQuantWithMinMaxObserver`.
    please refer to the implementation of subclass of class:`_Observer`, for example,
    class:`mindspore.nn.FakeQuantWithMinMaxObserver`.

    Args:
        ema_decay (float): Exponential Moving Average algorithm parameter. Default: 0.999.
@@ -1396,7 +1549,10 @@ class TensorAddQuant(Cell):
        ``Ascend`` ``GPU``

    Examples:
        >>> qconfig = compression.quant.create_quant_config()
        >>> import mindspore
        >>> from mindspore.compression import quant
        >>> from mindspore import Tensor
        >>> qconfig = quant.create_quant_config()
        >>> add_quant = nn.TensorAddQuant(quant_config=qconfig)
        >>> input_x1 = Tensor(np.array([[1, 2, 1], [-2, 0, -1]]), mindspore.float32)
        >>> input_x2 = Tensor(np.ones((2, 3)), mindspore.float32)
@@ -1429,12 +1585,13 @@ class MulQuant(Cell):
    Adds fake quantized operation after `Mul` operation.

    This part is a more detailed overview of `Mul` operation. For more detials about Quantilization,
    please refer to :class:`mindspore.nn.FakeQuantWithMinMaxObserver`.
    please refer to the implementation of subclass of class:`_Observer`, for example,
    class:`mindspore.nn.FakeQuantWithMinMaxObserver`.

    Args:
        ema_decay (float): Exponential Moving Average algorithm parameter. Default: 0.999.
        quant_config (QuantConfig): Configures the oberser types and quant settings of weight and activation. Can be
            generated by compression.quant.create_quant_config method.
            generated by `compression.quant.create_quant_config` method.
            Default: both set to default :class:`FakeQuantWithMinMaxObserver`.
        quant_dtype (QuantDtype): Specifies the FakeQuant datatype. Default: QuantDtype.INT8.

@@ -1452,7 +1609,10 @@ class MulQuant(Cell):
        ``Ascend`` ``GPU``

    Examples:
        >>> qconfig = compression.quant.create_quant_config()
        >>> import mindspore
        >>> from mindspore.compression import quant
        >>> from mindspore import Tensor
        >>> qconfig = quant.create_quant_config()
        >>> mul_quant = nn.MulQuant(quant_config=qconfig)
        >>> input_x1 = Tensor(np.array([[1, 2, 1], [-2, 0, -1]]), mindspore.float32)
        >>> input_x2 = Tensor(np.ones((2, 3)) * 2, mindspore.float32)