|
|
|
@@ -294,14 +294,14 @@ class FakeQuantWithMinMaxObserver(UniformQuantObserver): |
|
|
|
|
|
|
|
.. math:: |
|
|
|
|
|
|
|
\frac{\partial \ output}{\partial \ maxq} & = \left\{\begin{matrix} |
|
|
|
\frac{\partial \ output}{\partial \ maxq} = \left\{\begin{matrix} |
|
|
|
-\frac{X}{maxq}+\left \lfloor \frac{X}{maxq} \right \rceil \qquad if\quad bound_{lower}< \frac{X}{maxq}< 1\\ |
|
|
|
-1 \qquad \quad \qquad \quad if\quad \frac{X}{maxq}\le bound_{lower}\\ |
|
|
|
1 \qquad \quad \qquad \quad if\quad \frac{X}{maxq}\ge 1 \qquad \quad |
|
|
|
\end{matrix}\right. \\ |
|
|
|
|
|
|
|
bound_{lower}= |
|
|
|
\end{align}\left\{\begin{matrix} |
|
|
|
\left\{\begin{matrix} |
|
|
|
0\qquad \quad if\quad neg\_trunc\\ |
|
|
|
-1\qquad if\quad otherwise |
|
|
|
\end{matrix}\right. |
|
|
|
@@ -336,7 +336,7 @@ class FakeQuantWithMinMaxObserver(UniformQuantObserver): |
|
|
|
narrow_range (bool): Whether the quantization algorithm uses narrow range or not. Default: False. |
|
|
|
quant_delay (int): Quantization delay parameters according to the global step. Default: 0. |
|
|
|
neg_trunc (bool): Whether the quantization algorithm uses nagetive truncation or not. Default: False. |
|
|
|
mode (string): Optional quantization mode, currently only `DEFAULT`(QAT) and `LEARNED_SCALE` are supported. |
|
|
|
mode (str): Optional quantization mode, currently only `DEFAULT`(QAT) and `LEARNED_SCALE` are supported. |
|
|
|
Default: ("DEFAULT") |
|
|
|
Inputs: |
|
|
|
- **x** (Tensor) - The input of FakeQuantWithMinMaxObserver. The input dimension is preferably 2D or 4D. |
|
|
|
@@ -565,7 +565,7 @@ class Conv2dBnFoldQuantOneConv(Cell): |
|
|
|
|
|
|
|
This part is a more detailed overview of Conv2d operation. For more detials about Quantilization, |
|
|
|
please refer to the implementation of subclass of class:`_Observer`, for example, |
|
|
|
class:`mindspore.nn.FakeQuantWithMinMaxObserver`. |
|
|
|
:class:`FakeQuantWithMinMaxObserver`. |
|
|
|
|
|
|
|
.. math:: |
|
|
|
w_{q}=quant(\frac{w}{\sqrt{var_{G}+\epsilon}}*\gamma ) |
|
|
|
@@ -585,11 +585,11 @@ class Conv2dBnFoldQuantOneConv(Cell): |
|
|
|
stride (Union[int, tuple[int]]): Specifies stride for all spatial dimensions with the same value. Default: 1. |
|
|
|
pad_mode (str): Specifies padding mode. The optional values are "same", "valid", "pad". Default: "same". |
|
|
|
padding (Union[int, tuple[int]]): Implicit paddings on both sides of the `x`. Default: 0. |
|
|
|
eps (float): Parameters for Batch Normalization. Default: 1e-5. |
|
|
|
momentum (float): Parameters for Batch Normalization op. Default: 0.997. |
|
|
|
dilation (Union[int, tuple[int]]): Specifies the dilation rate to use for dilated convolution. Default: 1. |
|
|
|
group (int): Splits filter into groups, `in_ channels` and `out_channels` must be |
|
|
|
divisible by the number of groups. Default: 1. |
|
|
|
eps (float): Parameters for Batch Normalization. Default: 1e-5. |
|
|
|
momentum (float): Parameters for Batch Normalization op. Default: 0.997. |
|
|
|
has_bias (bool): Specifies whether the layer uses a bias vector, which is temporarily invalid. Default: False. |
|
|
|
weight_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the |
|
|
|
convolution kernel. Default: 'normal'. |
|
|
|
@@ -604,9 +604,10 @@ class Conv2dBnFoldQuantOneConv(Cell): |
|
|
|
var_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the |
|
|
|
variance vector. Default: 'ones'. |
|
|
|
fake (bool): Whether Conv2dBnFoldQuant Cell adds FakeQuantWithMinMaxObserver. Default: True. |
|
|
|
quant_config (QuantConfig): Configures the oberser types and quant settings of weight and activation. Can be |
|
|
|
generated by compression.quant.create_quant_config method. |
|
|
|
Default: both set to default FakeQuantWithMinMaxObserver. |
|
|
|
quant_config (QuantConfig): Configures the types of quant observer and quant settings of weight and |
|
|
|
activation. Note that, QuantConfig is a special namedtuple, which is designed for quantization |
|
|
|
and can be generated by :func:`mindspore.compression.quant.create_quant_config` method. |
|
|
|
Default: QuantConfig with both items set to default :class:`FakeQuantWithMinMaxObserver`. |
|
|
|
quant_dtype (QuantDtype): Specifies the FakeQuant datatype. Default: QuantDtype.INT8. |
|
|
|
|
|
|
|
Inputs: |
|
|
|
@@ -802,7 +803,7 @@ class Conv2dBnFoldQuant(Cell): |
|
|
|
|
|
|
|
This part is a more detailed overview of Conv2d operation. For more detials about Quantilization, |
|
|
|
please refer to the implementation of subclass of class:`_Observer`, for example, |
|
|
|
class:`mindspore.nn.FakeQuantWithMinMaxObserver`. |
|
|
|
:class:`FakeQuantWithMinMaxObserver`. |
|
|
|
|
|
|
|
.. math:: |
|
|
|
y = x\times w+ b |
|
|
|
@@ -823,11 +824,11 @@ class Conv2dBnFoldQuant(Cell): |
|
|
|
stride (Union[int, tuple[int]]): Specifies stride for all spatial dimensions with the same value. Default: 1. |
|
|
|
pad_mode (str): Specifies padding mode. The optional values are "same", "valid", "pad". Default: "same". |
|
|
|
padding (Union[int, tuple[int]]): Implicit paddings on both sides of the `x`. Default: 0. |
|
|
|
eps (float): Parameters for Batch Normalization. Default: 1e-5. |
|
|
|
momentum (float): Parameters for Batch Normalization op. Default: 0.997. |
|
|
|
dilation (Union[int, tuple[int]]): Specifies the dilation rate to use for dilated convolution. Default: 1. |
|
|
|
group (int): Splits filter into groups, `in_ channels` and `out_channels` must be |
|
|
|
divisible by the number of groups. Default: 1. |
|
|
|
eps (float): Parameters for Batch Normalization. Default: 1e-5. |
|
|
|
momentum (float): Parameters for Batch Normalization op. Default: 0.997. |
|
|
|
has_bias (bool): Specifies whether the layer uses a bias vector. Default: False. |
|
|
|
weight_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the |
|
|
|
convolution kernel. Default: 'normal'. |
|
|
|
@@ -842,9 +843,10 @@ class Conv2dBnFoldQuant(Cell): |
|
|
|
var_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the |
|
|
|
variance vector. Default: 'ones'. |
|
|
|
fake (bool): Whether Conv2dBnFoldQuant Cell adds FakeQuantWithMinMaxObserver. Default: True. |
|
|
|
quant_config (QuantConfig): Configures the oberser types and quant settings of weight and activation. Can be |
|
|
|
generated by compression.quant.create_quant_config method. |
|
|
|
Default: both set to default FakeQuantWithMinMaxObserver. |
|
|
|
quant_config (QuantConfig): Configures the types of quant observer and quant settings of weight and |
|
|
|
activation. Note that, QuantConfig is a special namedtuple, which is designed for quantization |
|
|
|
and can be generated by :func:`mindspore.compression.quant.create_quant_config` method. |
|
|
|
Default: QuantConfig with both items set to default :class:`FakeQuantWithMinMaxObserver`. |
|
|
|
quant_dtype (QuantDtype): Specifies the FakeQuant datatype. Default: QuantDtype.INT8. |
|
|
|
freeze_bn (int): The quantization freeze Batch Normalization op is according to the global step. |
|
|
|
Default: 100000. |
|
|
|
@@ -1059,9 +1061,10 @@ class Conv2dBnWithoutFoldQuant(Cell): |
|
|
|
weight_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the convolution kernel. |
|
|
|
Default: 'normal'. |
|
|
|
bias_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the bias vector. Default: 'zeros'. |
|
|
|
quant_config (QuantConfig): Configures the oberser types and quant settings of weight and activation. Can be |
|
|
|
generated by compression.quant.create_quant_config method. |
|
|
|
Default: both set to default FakeQuantWithMinMaxObserver. |
|
|
|
quant_config (QuantConfig): Configures the types of quant observer and quant settings of weight and |
|
|
|
activation. Note that, QuantConfig is a special namedtuple, which is designed for quantization |
|
|
|
and can be generated by :func:`mindspore.compression.quant.create_quant_config` method. |
|
|
|
Default: QuantConfig with both items set to default :class:`FakeQuantWithMinMaxObserver`. |
|
|
|
quant_dtype (QuantDtype): Specifies the FakeQuant datatype. Default: QuantDtype.INT8. |
|
|
|
|
|
|
|
Inputs: |
|
|
|
@@ -1202,9 +1205,10 @@ class Conv2dQuant(Cell): |
|
|
|
weight_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the convolution kernel. |
|
|
|
Default: 'normal'. |
|
|
|
bias_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the bias vector. Default: 'zeros'. |
|
|
|
quant_config (QuantConfig): Configures the oberser types and quant settings of weight and activation. Can be |
|
|
|
generated by compression.quant.create_quant_config method. |
|
|
|
Default: both set to default FakeQuantWithMinMaxObserver. |
|
|
|
quant_config (QuantConfig): Configures the types of quant observer and quant settings of weight and |
|
|
|
activation. Note that, QuantConfig is a special namedtuple, which is designed for quantization |
|
|
|
and can be generated by :func:`mindspore.compression.quant.create_quant_config` method. |
|
|
|
Default: QuantConfig with both items set to default :class:`FakeQuantWithMinMaxObserver`. |
|
|
|
quant_dtype (QuantDtype): Specifies the FakeQuant datatype. Default: QuantDtype.INT8. |
|
|
|
|
|
|
|
Inputs: |
|
|
|
@@ -1339,9 +1343,10 @@ class DenseQuant(Cell): |
|
|
|
has_bias (bool): Specifies whether the layer uses a bias vector. Default: True. |
|
|
|
activation (Union[str, Cell, Primitive]): The regularization function applied to the output of the layer, |
|
|
|
eg. 'relu'. Default: None. |
|
|
|
quant_config (QuantConfig): Configures the oberser types and quant settings of weight and activation. Can be |
|
|
|
generated by compression.quant.create_quant_config method. |
|
|
|
Default: both set to default FakeQuantWithMinMaxObserver. |
|
|
|
quant_config (QuantConfig): Configures the types of quant observer and quant settings of weight and |
|
|
|
activation. Note that, QuantConfig is a special namedtuple, which is designed for quantization |
|
|
|
and can be generated by :func:`mindspore.compression.quant.create_quant_config` method. |
|
|
|
Default: QuantConfig with both items set to default :class:`FakeQuantWithMinMaxObserver`. |
|
|
|
quant_dtype (QuantDtype): Specifies the FakeQuant datatype. Default: QuantDtype.INT8. |
|
|
|
|
|
|
|
Inputs: |
|
|
|
@@ -1464,9 +1469,10 @@ class ActQuant(_QuantActivation): |
|
|
|
ema (bool): The exponential Moving Average algorithm updates min and max. Default: False. |
|
|
|
ema_decay (float): Exponential Moving Average algorithm parameter. Default: 0.999. |
|
|
|
fake_before (bool): Whether add fake quantized operation before activation. Default: False. |
|
|
|
quant_config (QuantConfig): Configures the oberser types and quant settings of weight and activation. Can be |
|
|
|
generated by compression.quant.create_quant_config method. |
|
|
|
Default: both set to default FakeQuantWithMinMaxObserver. |
|
|
|
quant_config (QuantConfig): Configures the types of quant observer and quant settings of weight and |
|
|
|
activation. Note that, QuantConfig is a special namedtuple, which is designed for quantization |
|
|
|
and can be generated by :func:`mindspore.compression.quant.create_quant_config` method. |
|
|
|
Default: QuantConfig with both items set to default :class:`FakeQuantWithMinMaxObserver`. |
|
|
|
quant_dtype (QuantDtype): Specifies the FakeQuant datatype. Default: QuantDtype.INT8. |
|
|
|
|
|
|
|
Inputs: |
|
|
|
@@ -1550,9 +1556,10 @@ class TensorAddQuant(Cell): |
|
|
|
|
|
|
|
Args: |
|
|
|
ema_decay (float): Exponential Moving Average algorithm parameter. Default: 0.999. |
|
|
|
quant_config (QuantConfig): Configures the oberser types and quant settings of weight and activation. Can be |
|
|
|
generated by compression.quant.create_quant_config method. |
|
|
|
Default: both set to default FakeQuantWithMinMaxObserver. |
|
|
|
quant_config (QuantConfig): Configures the types of quant observer and quant settings of weight and |
|
|
|
activation. Note that, QuantConfig is a special namedtuple, which is designed for quantization |
|
|
|
and can be generated by :func:`mindspore.compression.quant.create_quant_config` method. |
|
|
|
Default: QuantConfig with both items set to default :class:`FakeQuantWithMinMaxObserver`. |
|
|
|
quant_dtype (QuantDtype): Specifies the FakeQuant datatype. Default: QuantDtype.INT8. |
|
|
|
|
|
|
|
Inputs: |
|
|
|
@@ -1612,9 +1619,10 @@ class MulQuant(Cell): |
|
|
|
|
|
|
|
Args: |
|
|
|
ema_decay (float): Exponential Moving Average algorithm parameter. Default: 0.999. |
|
|
|
quant_config (QuantConfig): Configures the oberser types and quant settings of weight and activation. Can be |
|
|
|
generated by `compression.quant.create_quant_config` method. |
|
|
|
Default: both set to default :class:`FakeQuantWithMinMaxObserver`. |
|
|
|
quant_config (QuantConfig): Configures the types of quant observer and quant settings of weight and |
|
|
|
activation. Note that, QuantConfig is a special namedtuple, which is designed for quantization |
|
|
|
and can be generated by :func:`mindspore.compression.quant.create_quant_config` method. |
|
|
|
Default: QuantConfig with both items set to default :class:`FakeQuantWithMinMaxObserver`. |
|
|
|
quant_dtype (QuantDtype): Specifies the FakeQuant datatype. Default: QuantDtype.INT8. |
|
|
|
|
|
|
|
Inputs: |
|
|
|
|