api modify

4 years ago · 5fed27b32d
--- a/mindspore/compression/quant/qat.py
+++ b/mindspore/compression/quant/qat.py
@@ -44,25 +44,25 @@ def create_quant_config(quant_observer=(nn.FakeQuantWithMinMaxObserver, nn.FakeQ
                        narrow_range=(False, False),
                        mode="DEFAULT"):
    r"""
    Config the observer type of weights and data flow with quant params.
    Config the observer type of weights and data flow with quant parameters.

    Args:
        quant_observer (Union[Observer, list, tuple]): The types of observer for quantization. The first element
            applies to weights and second applies to data flow. Currently, only
            applies to weights and the second applies to data flow. Currently, only
            :class:`FakeQuantWithMinMaxObserver` supported.
            Default: (nn.FakeQuantWithMinMaxObserver, nn.FakeQuantWithMinMaxObserver).
        quant_delay (Union[int, list, tuple]): Number of steps after which weights and activations are quantized
            during train and eval. The first element represents weights and second element represents data flow.
            during train and eval. The first element represents weights and the second element represents data flow.
            Default: (0, 0).
        quant_dtype (Union[QuantDtype, list, tuple]): Datatype to use for quantize weights and activations. The first
            element represents weights and second element represents data flow.
        quant_dtype (Union[QuantDtype, list, tuple]): Datatype used to quantize weights and activations. The first
            element represents weights and the second element represents data flow.
            Default: (QuantDtype.INT8, QuantDtype.INT8).
        per_channel (Union[bool, list, tuple]):  Quantization granularity based on layer or on channel. If `True`
            then base on per channel otherwise base on per layer. The first element represents weights
            and second element represents data flow, and second element must be `False` now.
            then base on per channel, otherwise base on per layer. The first element represents weights
            and the second element represents data flow, and the second element must be `False` now.
            Default: (False, False).
        symmetric (Union[bool, list, tuple]): Whether the quantization algorithm is symmetric or not. If `True` then
            base on symmetric otherwise base on asymmetric. The first element represents weights and second
            base on symmetric, otherwise base on asymmetric. The first element represents weights and the second
            element represents data flow. Default: (False, False).
        narrow_range (Union[bool, list, tuple]): Whether the quantization algorithm uses narrow range or not.
            The first element represents weights and the second element represents data flow.
@@ -147,17 +147,17 @@ class QuantizationAwareTraining(Quantizer):
        freeze_bn (int): Number of steps after which BatchNorm OP parameters fixed to global mean and variance.
            Default: 1e7.
        quant_delay (Union[int, list, tuple]): Number of steps after which weights and activations are quantized
            during train and eval. The first element represents weights and second element represents data flow.
            during train and eval. The first element represents weights and the second element represents data flow.
            Default: (0, 0).
        quant_dtype (Union[QuantDtype, list, tuple]): Datatype to use for quantize weights and activations. The first
            element represents weights and second element represents data flow. It is necessary to consider the
        quant_dtype (Union[QuantDtype, list, tuple]): Datatype used to quantize weights and activations. The first
            element represents weights and the second element represents data flow. It is necessary to consider the
            precision support of hardware devices in the practical quantization infer scenario.
            Default: (QuantDtype.INT8, QuantDtype.INT8).
        per_channel (Union[bool, list, tuple]):  Quantization granularity based on layer or on channel. If `True`
            then base on per channel otherwise base on per layer. The first element represents weights and second
            element represents data flow, and second element must be `False` now. Default: (False, False).
            then base on per channel, otherwise base on per layer. The first element represents weights and the
            second element represents data flow, and the second element must be `False` now. Default: (False, False).
        symmetric (Union[bool, list, tuple]): Whether the quantization algorithm is symmetric or not. If `True` then
            base on symmetric otherwise base on asymmetric. The first element represents weights and second
            base on symmetric, otherwise base on asymmetric. The first element represents weights and the second
            element represents data flow. Default: (False, False).
        narrow_range (Union[bool, list, tuple]): Whether the quantization algorithm uses narrow range or not.
            The first element represents weights and the second element represents data flow.
@@ -165,8 +165,8 @@ class QuantizationAwareTraining(Quantizer):
        optimize_option (Union[OptimizeOption, list, tuple]): Specifies the quant algorithm and options, currently
            only support `QAT` and `LEARNED_SCALE` (Note that, if both `QAT` and `LEARNED_SCALE` are configured,
            `LEARNED_SCALE` has a higher priority. `LEARNED_SCALE` currently only work under some constraints, which
            includes: freeze_bn=0, quant_delay=0, symmetric=Ture, narrow_range=True, More specifically, for operators
            such as ReLu and ReLu6, which only have positive values, we add a negative truncation to optimize this
            includes: freeze_bn=0, quant_delay=0, symmetric=True, narrow_range=True, More specifically, for operators
            such as Relu and Relu6, which only have positive values, we add a negative truncation to optimize this
            scenario, and narrow_range will automatically match to False). Default: OptimizeOption.QAT.
        one_conv_fold (bool): Whether to use one conv bn fold ops for simulation inference operation. Default: True.

--- a/mindspore/compression/quant/quant_utils.py
+++ b/mindspore/compression/quant/quant_utils.py
@@ -280,8 +280,8 @@ def compute_kl_threshold(data, bitwidth):
 def query_quant_layers(network):
    r"""
    Query the network's quantization strategy of each quantized layer and print it to the screen, note that all the
    quantization layers are queried before graph compile optimization in the graph mode, thus may be appear some
    redundant quantized layers, which are not exist in practical execution.
    quantization layers are queried before graph compile optimization in the graph mode, thus, some redundant quantized
    layers, which not exist in practical execution, may appear.

    Args:
        network (Cell): input network