modified the quant API document description

4 years ago · ce2c5d5fce
--- a/mindspore/compression/quant/qat.py
+++ b/mindspore/compression/quant/qat.py
@@ -47,8 +47,9 @@ def create_quant_config(quant_observer=(nn.FakeQuantWithMinMaxObserver, nn.FakeQ
    Config the observer type of weights and data flow with quant params.

    Args:
        quant_observer (Union[Observer, list, tuple]): The observer type to do quantization. The first element
            represents weights and second element represents data flow.
        quant_observer (Union[Observer, list, tuple]): The types of observer for quantization. The first element
            applies to weights and second applies to data flow. Currently, only
            :class:`FakeQuantWithMinMaxObserver` supported.
            Default: (nn.FakeQuantWithMinMaxObserver, nn.FakeQuantWithMinMaxObserver).
        quant_delay (Union[int, list, tuple]): Number of steps after which weights and activations are quantized
            during train and eval. The first element represents weights and second element represents data flow.
@@ -66,7 +67,7 @@ def create_quant_config(quant_observer=(nn.FakeQuantWithMinMaxObserver, nn.FakeQ
        narrow_range (Union[bool, list, tuple]): Whether the quantization algorithm uses narrow range or not.
            The first element represents weights and the second element represents data flow.
            Default: (False, False).
        mode (String): Optional quantization mode, currently only `DEFAULT`(QAT) and `LEARNED_SCALE` are supported.
        mode (str): Optional quantization mode, currently only `DEFAULT`(QAT) and `LEARNED_SCALE` are supported.
            Default: ("DEFAULT").

    Returns:
@@ -545,7 +546,7 @@ class QuantizationAwareTraining(Quantizer):
            min_init = [-x for x in max_init]
        return min_init, max_init

    def set_mixed_bits(self, network, strategy):
    def _set_mixed_bits(self, network, strategy):
        r"""
        Set network's quantization strategy, this function is currently only valid for `LEARNED_SCALE`
        optimize_option.
@@ -563,7 +564,7 @@ class QuantizationAwareTraining(Quantizer):
            ValueError: If `OptimizeOption.LEARNED_SCALE` is not in `self.optimize_option`.
        """
        if OptimizeOption.LEARNED_SCALE not in self.optimize_option:
            raise ValueError("The `set_mixed_bits` function is currently only valid for `LEARNED_SCALE` "
            raise ValueError("The `_set_mixed_bits` function is currently only valid for `LEARNED_SCALE` "
                             "optimize_option.")

        self.quantizable_idx = []
--- a/mindspore/compression/quant/quant_utils.py
+++ b/mindspore/compression/quant/quant_utils.py
@@ -337,11 +337,8 @@ def query_quant_layers(network):
    quantization layers are queried before graph compile optimization in the graph mode, thus may be appear some
    redundant quantized layers, which are not exist in practical execution.

    Input:
    Args:
        network (Cell): input network

    Returns:
        None
    """
    network = Validator.check_isinstance("network", network, nn.Cell)
    tplt = "{0:60}\t{1:10}"
@@ -359,7 +356,8 @@ def load_nonquant_param_into_quant_net(quant_model, params_dict, quant_new_param
    Args:
        quant_model(Cell): Quantization model.
        params_dict(dict): Parameter dict that stores fp32 parameters.
        quant_new_params(list): Parameters that exist in quantization network but not in non-quantization network.
        quant_new_params(list): Parameters that exist in quantization network but not in non-quantization
            network. Default: None.

    Raises:
        TypeError: If `quant_new_params` is not None and is not list.
--- a/mindspore/nn/layer/combined.py
+++ b/mindspore/nn/layer/combined.py
@@ -61,7 +61,7 @@ class Conv2dBnAct(Cell):
            Initializer and string are the same as 'weight_init'. Refer to the values of
            Initializer for more details. Default: 'zeros'.
        has_bn (bool): Specifies to used batchnorm or not. Default: False.
        momentum (float): Momentum for moving average for batchnorm, must be [0, 1]. Default:0.9
        momentum (float): Momentum for moving average for batchnorm, must be [0, 1]. Default:0.997
        eps (float): Term added to the denominator to improve numerical stability for batchnorm, should be greater
            than 0. Default: 1e-5.
        activation (Union[str, Cell, Primitive]): Specifies activation type. The optional values are as following:
@@ -69,6 +69,7 @@ class Conv2dBnAct(Cell):
            'prelu', 'leakyrelu', 'hswish', 'hsigmoid'. Default: None.
        alpha (float): Slope of the activation function at x < 0 for LeakyReLU. Default: 0.2.
        after_fake(bool): Determine whether there must be a fake quantization operation after Cond2dBnAct.
            Default: True.

    Inputs:
        - **x** (Tensor) - Tensor of shape :math:`(N, C_{in}, H_{in}, W_{in})`. The data type is float32.
@@ -170,6 +171,7 @@ class DenseBnAct(Cell):
            'prelu', 'leakyrelu', 'hswish', 'hsigmoid'. Default: None.
        alpha (float): Slope of the activation function at x < 0 for LeakyReLU. Default: 0.2.
        after_fake(bool): Determine whether there must be a fake quantization operation after DenseBnAct.
            Default: True.

    Inputs:
        - **x** (Tensor) - Tensor of shape :math:`(N, in\_channels)`. The data type is float32.
--- a/mindspore/nn/layer/quant.py
+++ b/mindspore/nn/layer/quant.py
@@ -294,14 +294,14 @@ class FakeQuantWithMinMaxObserver(UniformQuantObserver):

    .. math::

        \frac{\partial \ output}{\partial \ maxq} & = \left\{\begin{matrix}
        \frac{\partial \ output}{\partial \ maxq} = \left\{\begin{matrix}
        -\frac{X}{maxq}+\left \lfloor \frac{X}{maxq} \right \rceil \qquad if\quad bound_{lower}< \frac{X}{maxq}< 1\\
        -1 \qquad \quad \qquad \quad if\quad \frac{X}{maxq}\le bound_{lower}\\
         1  \qquad \quad \qquad \quad if\quad \frac{X}{maxq}\ge  1 \qquad \quad
        \end{matrix}\right. \\

        bound_{lower}=
        \end{align}\left\{\begin{matrix}
        \left\{\begin{matrix}
         0\qquad \quad if\quad neg\_trunc\\
        -1\qquad if\quad otherwise
        \end{matrix}\right.
@@ -336,7 +336,7 @@ class FakeQuantWithMinMaxObserver(UniformQuantObserver):
        narrow_range (bool): Whether the quantization algorithm uses narrow range or not. Default: False.
        quant_delay (int): Quantization delay parameters according to the global step. Default: 0.
        neg_trunc (bool): Whether the quantization algorithm uses nagetive truncation or not. Default: False.
        mode (string): Optional quantization mode, currently only `DEFAULT`(QAT) and `LEARNED_SCALE` are supported.
        mode (str): Optional quantization mode, currently only `DEFAULT`(QAT) and `LEARNED_SCALE` are supported.
            Default: ("DEFAULT")
    Inputs:
        - **x** (Tensor) - The input of FakeQuantWithMinMaxObserver. The input dimension is preferably 2D or 4D.
@@ -565,7 +565,7 @@ class Conv2dBnFoldQuantOneConv(Cell):

    This part is a more detailed overview of Conv2d operation. For more detials about Quantilization,
    please refer to the implementation of subclass of class:`_Observer`, for example,
    class:`mindspore.nn.FakeQuantWithMinMaxObserver`.
    :class:`FakeQuantWithMinMaxObserver`.

    .. math::
        w_{q}=quant(\frac{w}{\sqrt{var_{G}+\epsilon}}*\gamma )
@@ -585,11 +585,11 @@ class Conv2dBnFoldQuantOneConv(Cell):
        stride (Union[int, tuple[int]]): Specifies stride for all spatial dimensions with the same value. Default: 1.
        pad_mode (str): Specifies padding mode. The optional values are "same", "valid", "pad". Default: "same".
        padding (Union[int, tuple[int]]): Implicit paddings on both sides of the `x`. Default: 0.
        eps (float): Parameters for Batch Normalization. Default: 1e-5.
        momentum (float): Parameters for Batch Normalization op. Default: 0.997.
        dilation (Union[int, tuple[int]]): Specifies the dilation rate to use for dilated convolution. Default: 1.
        group (int): Splits filter into groups, `in_ channels` and `out_channels` must be
            divisible by the number of groups. Default: 1.
        eps (float): Parameters for Batch Normalization. Default: 1e-5.
        momentum (float): Parameters for Batch Normalization op. Default: 0.997.
        has_bias (bool): Specifies whether the layer uses a bias vector, which is temporarily invalid. Default: False.
        weight_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the
            convolution kernel. Default: 'normal'.
@@ -604,9 +604,10 @@ class Conv2dBnFoldQuantOneConv(Cell):
        var_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the
            variance vector. Default: 'ones'.
        fake (bool): Whether Conv2dBnFoldQuant Cell adds FakeQuantWithMinMaxObserver. Default: True.
        quant_config (QuantConfig): Configures the oberser types and quant settings of weight and activation. Can be
            generated by compression.quant.create_quant_config method.
            Default: both set to default FakeQuantWithMinMaxObserver.
        quant_config (QuantConfig): Configures the types of quant observer and quant settings of weight and
            activation. Note that, QuantConfig is a special namedtuple, which is designed for quantization
            and can be generated by :func:`mindspore.compression.quant.create_quant_config` method.
            Default: QuantConfig with both items set to default :class:`FakeQuantWithMinMaxObserver`.
        quant_dtype (QuantDtype): Specifies the FakeQuant datatype. Default: QuantDtype.INT8.

    Inputs:
@@ -802,7 +803,7 @@ class Conv2dBnFoldQuant(Cell):

    This part is a more detailed overview of Conv2d operation. For more detials about Quantilization,
    please refer to the implementation of subclass of class:`_Observer`, for example,
    class:`mindspore.nn.FakeQuantWithMinMaxObserver`.
    :class:`FakeQuantWithMinMaxObserver`.

    .. math::
        y = x\times w+  b
@@ -823,11 +824,11 @@ class Conv2dBnFoldQuant(Cell):
        stride (Union[int, tuple[int]]): Specifies stride for all spatial dimensions with the same value. Default: 1.
        pad_mode (str): Specifies padding mode. The optional values are "same", "valid", "pad". Default: "same".
        padding (Union[int, tuple[int]]): Implicit paddings on both sides of the `x`. Default: 0.
        eps (float): Parameters for Batch Normalization. Default: 1e-5.
        momentum (float): Parameters for Batch Normalization op. Default: 0.997.
        dilation (Union[int, tuple[int]]): Specifies the dilation rate to use for dilated convolution. Default: 1.
        group (int): Splits filter into groups, `in_ channels` and `out_channels` must be
            divisible by the number of groups. Default: 1.
        eps (float): Parameters for Batch Normalization. Default: 1e-5.
        momentum (float): Parameters for Batch Normalization op. Default: 0.997.
        has_bias (bool): Specifies whether the layer uses a bias vector. Default: False.
        weight_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the
            convolution kernel. Default: 'normal'.
@@ -842,9 +843,10 @@ class Conv2dBnFoldQuant(Cell):
        var_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the
            variance vector. Default: 'ones'.
        fake (bool): Whether Conv2dBnFoldQuant Cell adds FakeQuantWithMinMaxObserver. Default: True.
        quant_config (QuantConfig): Configures the oberser types and quant settings of weight and activation. Can be
            generated by compression.quant.create_quant_config method.
            Default: both set to default FakeQuantWithMinMaxObserver.
        quant_config (QuantConfig): Configures the types of quant observer and quant settings of weight and
            activation. Note that, QuantConfig is a special namedtuple, which is designed for quantization
            and can be generated by :func:`mindspore.compression.quant.create_quant_config` method.
            Default: QuantConfig with both items set to default :class:`FakeQuantWithMinMaxObserver`.
        quant_dtype (QuantDtype): Specifies the FakeQuant datatype. Default: QuantDtype.INT8.
        freeze_bn (int): The quantization freeze Batch Normalization op is according to the global step.
            Default: 100000.
@@ -1059,9 +1061,10 @@ class Conv2dBnWithoutFoldQuant(Cell):
        weight_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the convolution kernel.
            Default: 'normal'.
        bias_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the bias vector. Default: 'zeros'.
        quant_config (QuantConfig): Configures the oberser types and quant settings of weight and activation. Can be
            generated by compression.quant.create_quant_config method.
            Default: both set to default FakeQuantWithMinMaxObserver.
        quant_config (QuantConfig): Configures the types of quant observer and quant settings of weight and
            activation. Note that, QuantConfig is a special namedtuple, which is designed for quantization
            and can be generated by :func:`mindspore.compression.quant.create_quant_config` method.
            Default: QuantConfig with both items set to default :class:`FakeQuantWithMinMaxObserver`.
        quant_dtype (QuantDtype): Specifies the FakeQuant datatype. Default: QuantDtype.INT8.

    Inputs:
@@ -1202,9 +1205,10 @@ class Conv2dQuant(Cell):
        weight_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the convolution kernel.
            Default: 'normal'.
        bias_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the bias vector. Default: 'zeros'.
        quant_config (QuantConfig): Configures the oberser types and quant settings of weight and activation. Can be
            generated by compression.quant.create_quant_config method.
            Default: both set to default FakeQuantWithMinMaxObserver.
        quant_config (QuantConfig): Configures the types of quant observer and quant settings of weight and
            activation. Note that, QuantConfig is a special namedtuple, which is designed for quantization
            and can be generated by :func:`mindspore.compression.quant.create_quant_config` method.
            Default: QuantConfig with both items set to default :class:`FakeQuantWithMinMaxObserver`.
        quant_dtype (QuantDtype): Specifies the FakeQuant datatype. Default: QuantDtype.INT8.

    Inputs:
@@ -1339,9 +1343,10 @@ class DenseQuant(Cell):
        has_bias (bool): Specifies whether the layer uses a bias vector. Default: True.
        activation (Union[str, Cell, Primitive]): The regularization function applied to the output of the layer,
            eg. 'relu'. Default: None.
        quant_config (QuantConfig): Configures the oberser types and quant settings of weight and activation. Can be
            generated by compression.quant.create_quant_config method.
            Default: both set to default FakeQuantWithMinMaxObserver.
        quant_config (QuantConfig): Configures the types of quant observer and quant settings of weight and
            activation. Note that, QuantConfig is a special namedtuple, which is designed for quantization
            and can be generated by :func:`mindspore.compression.quant.create_quant_config` method.
            Default: QuantConfig with both items set to default :class:`FakeQuantWithMinMaxObserver`.
        quant_dtype (QuantDtype): Specifies the FakeQuant datatype. Default: QuantDtype.INT8.

    Inputs:
@@ -1464,9 +1469,10 @@ class ActQuant(_QuantActivation):
        ema (bool): The exponential Moving Average algorithm updates min and max. Default: False.
        ema_decay (float): Exponential Moving Average algorithm parameter. Default: 0.999.
        fake_before (bool): Whether add fake quantized operation before activation. Default: False.
        quant_config (QuantConfig): Configures the oberser types and quant settings of weight and activation. Can be
            generated by compression.quant.create_quant_config method.
            Default: both set to default FakeQuantWithMinMaxObserver.
        quant_config (QuantConfig): Configures the types of quant observer and quant settings of weight and
            activation. Note that, QuantConfig is a special namedtuple, which is designed for quantization
            and can be generated by :func:`mindspore.compression.quant.create_quant_config` method.
            Default: QuantConfig with both items set to default :class:`FakeQuantWithMinMaxObserver`.
        quant_dtype (QuantDtype): Specifies the FakeQuant datatype. Default: QuantDtype.INT8.

    Inputs:
@@ -1550,9 +1556,10 @@ class TensorAddQuant(Cell):

    Args:
        ema_decay (float): Exponential Moving Average algorithm parameter. Default: 0.999.
        quant_config (QuantConfig): Configures the oberser types and quant settings of weight and activation. Can be
            generated by compression.quant.create_quant_config method.
            Default: both set to default FakeQuantWithMinMaxObserver.
        quant_config (QuantConfig): Configures the types of quant observer and quant settings of weight and
            activation. Note that, QuantConfig is a special namedtuple, which is designed for quantization
            and can be generated by :func:`mindspore.compression.quant.create_quant_config` method.
            Default: QuantConfig with both items set to default :class:`FakeQuantWithMinMaxObserver`.
        quant_dtype (QuantDtype): Specifies the FakeQuant datatype. Default: QuantDtype.INT8.

    Inputs:
@@ -1612,9 +1619,10 @@ class MulQuant(Cell):

    Args:
        ema_decay (float): Exponential Moving Average algorithm parameter. Default: 0.999.
        quant_config (QuantConfig): Configures the oberser types and quant settings of weight and activation. Can be
            generated by `compression.quant.create_quant_config` method.
            Default: both set to default :class:`FakeQuantWithMinMaxObserver`.
        quant_config (QuantConfig): Configures the types of quant observer and quant settings of weight and
            activation. Note that, QuantConfig is a special namedtuple, which is designed for quantization
            and can be generated by :func:`mindspore.compression.quant.create_quant_config` method.
            Default: QuantConfig with both items set to default :class:`FakeQuantWithMinMaxObserver`.
        quant_dtype (QuantDtype): Specifies the FakeQuant datatype. Default: QuantDtype.INT8.

    Inputs: