|
|
|
@@ -212,6 +212,61 @@ class FakeQuantWithMinMaxObserver(UniformQuantObserver): |
|
|
|
r""" |
|
|
|
Quantization aware operation which provides the fake quantization observer function on data with min and max. |
|
|
|
|
|
|
|
The running min/max :math:`x_\text{min}` and :math:`x_\text{max}` are computed as: |
|
|
|
|
|
|
|
.. math:: |
|
|
|
|
|
|
|
\begin{array}{ll} \\ |
|
|
|
x_\text{min} = |
|
|
|
\begin{cases} |
|
|
|
\min(\min(X), 0) |
|
|
|
& \text{ if } ema = \text{False} \\ |
|
|
|
\min((1 - c) \min(X) + \text{c } x_\text{min}, 0) |
|
|
|
& \text{ if } \text{otherwise} |
|
|
|
\end{cases}\\ |
|
|
|
x_\text{max} = |
|
|
|
\begin{cases} |
|
|
|
\max(\max(X), 0) |
|
|
|
& \text{ if } ema = \text{False} \\ |
|
|
|
\max((1 - c) \max(X) + \text{c } x_\text{max}, 0) |
|
|
|
& \text{ if } \text{otherwise} |
|
|
|
\end{cases} |
|
|
|
\end{array} |
|
|
|
|
|
|
|
where X is the input tensor, and :math:`c` is the `ema_decay`. |
|
|
|
|
|
|
|
The scale s and zero point zp is computed as: |
|
|
|
|
|
|
|
.. math:: |
|
|
|
|
|
|
|
\begin{array}{ll} \\ |
|
|
|
s = |
|
|
|
\begin{cases} |
|
|
|
\frac{x_\text{max} - x_\text{min}}{Q_\text{max} - Q_\text{min}} |
|
|
|
& \text{ if } symmetric = \text{False} \\ |
|
|
|
\frac{2\max(x_\text{max}, \left | x_\text{min} \right |) }{Q_\text{max} - Q_\text{min}} |
|
|
|
& \text{ if } \text{otherwise} |
|
|
|
\end{cases}\\ |
|
|
|
zp\_min = Q_\text{min} - \frac{x_\text{min}}{scale} \\ |
|
|
|
zp = \left \lfloor \min(Q_\text{max}, \max(Q_\text{min}, zp\_min)) + 0.5 \right \rfloor |
|
|
|
\end{array} |
|
|
|
|
|
|
|
where :math:`Q_\text{max}` and :math:`Q_\text{min}` is decided by quant_dtype, for example, if quant_dtype=INT8, |
|
|
|
then :math:`Q_\text{max}`=127 and :math:`Q_\text{min}`=-128. |
|
|
|
|
|
|
|
The fake quant output is computed as: |
|
|
|
|
|
|
|
.. math:: |
|
|
|
|
|
|
|
\begin{array}{ll} \\ |
|
|
|
u_\text{min} = (Q_\text{min} - zp) * scale \\ |
|
|
|
u_\text{max} = (Q_\text{max} - zp) * scale \\ |
|
|
|
u_X = \left \lfloor \frac{\min(u_\text{max}, \max(u_\text{min}, X)) - u_\text{min}}{scale} |
|
|
|
+ 0.5 \right \rfloor \\ |
|
|
|
output = u_X * scale + u_\text{min} |
|
|
|
\end{array} |
|
|
|
|
|
|
|
|
|
|
|
Args: |
|
|
|
min_init (int, float): The initialized min value. Default: -6. |
|
|
|
max_init (int, float): The initialized max value. Default: 6. |
|
|
|
@@ -337,7 +392,8 @@ class Conv2dBnFoldQuantOneConv(Cell): |
|
|
|
r""" |
|
|
|
2D convolution which use the convolution layer statistics once to calculate BatchNormal operation folded construct. |
|
|
|
|
|
|
|
This part is a more detailed overview of Conv2d operation. |
|
|
|
This part is a more detailed overview of Conv2d operation. For more detials about Quantilization, |
|
|
|
please refer to :class`mindspore.nn.FakeQuantWithMinMaxObserver`. |
|
|
|
|
|
|
|
Args: |
|
|
|
in_channels (int): The number of input channel :math:`C_{in}`. |
|
|
|
@@ -365,7 +421,7 @@ class Conv2dBnFoldQuantOneConv(Cell): |
|
|
|
var_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the |
|
|
|
variance vector. Default: 'ones'. |
|
|
|
fake (bool): Whether Conv2dBnFoldQuant Cell adds FakeQuantWithMinMaxObserver. Default: True. |
|
|
|
quant_config (QuantConfig): Configs the observer types and quant settings of weight and activation. Can be |
|
|
|
quant_config (QuantConfig): Configures the oberser types and quant settings of weight and activation. Can be |
|
|
|
generated by compression.quant.create_quant_config method. |
|
|
|
Default: both set to default FakeQuantWithMinMaxObserver. |
|
|
|
quant_dtype (QuantDtype): Specifies the FakeQuant datatype. Default: QuantDtype.INT8. |
|
|
|
@@ -537,7 +593,8 @@ class Conv2dBnFoldQuant(Cell): |
|
|
|
r""" |
|
|
|
2D convolution with BatchNormal operation folded construct. |
|
|
|
|
|
|
|
This part is a more detailed overview of Conv2d operation. |
|
|
|
This part is a more detailed overview of Conv2d operation. For more detials about Quantilization, |
|
|
|
please refer to :class`mindspore.nn.FakeQuantWithMinMaxObserver`. |
|
|
|
|
|
|
|
Args: |
|
|
|
in_channels (int): The number of input channel :math:`C_{in}`. |
|
|
|
@@ -565,7 +622,7 @@ class Conv2dBnFoldQuant(Cell): |
|
|
|
var_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the |
|
|
|
variance vector. Default: 'ones'. |
|
|
|
fake (bool): Whether Conv2dBnFoldQuant Cell adds FakeQuantWithMinMaxObserver. Default: True. |
|
|
|
quant_config (QuantConfig): Configs the observer types and quant settings of weight and activation. Can be |
|
|
|
quant_config (QuantConfig): Configures the oberser types and quant settings of weight and activation. Can be |
|
|
|
generated by compression.quant.create_quant_config method. |
|
|
|
Default: both set to default FakeQuantWithMinMaxObserver. |
|
|
|
quant_dtype (QuantDtype): Specifies the FakeQuant datatype. Default: QuantDtype.INT8. |
|
|
|
@@ -725,7 +782,8 @@ class Conv2dBnWithoutFoldQuant(Cell): |
|
|
|
r""" |
|
|
|
2D convolution and batchnorm without fold with fake quantized construct. |
|
|
|
|
|
|
|
This part is a more detailed overview of Conv2d operation. |
|
|
|
This part is a more detailed overview of Conv2d operation. For more detials about Quantilization, |
|
|
|
please refer to :class`mindspore.nn.FakeQuantWithMinMaxObserver`. |
|
|
|
|
|
|
|
Args: |
|
|
|
in_channels (int): The number of input channel :math:`C_{in}`. |
|
|
|
@@ -743,7 +801,7 @@ class Conv2dBnWithoutFoldQuant(Cell): |
|
|
|
weight_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the convolution kernel. |
|
|
|
Default: 'normal'. |
|
|
|
bias_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the bias vector. Default: 'zeros'. |
|
|
|
quant_config (QuantConfig): Configs the observer types and quant settings of weight and activation. Can be |
|
|
|
quant_config (QuantConfig): Configures the oberser types and quant settings of weight and activation. Can be |
|
|
|
generated by compression.quant.create_quant_config method. |
|
|
|
Default: both set to default FakeQuantWithMinMaxObserver. |
|
|
|
quant_dtype (QuantDtype): Specifies the FakeQuant datatype. Default: QuantDtype.INT8. |
|
|
|
@@ -840,7 +898,8 @@ class Conv2dQuant(Cell): |
|
|
|
r""" |
|
|
|
2D convolution with fake quantized operation layer. |
|
|
|
|
|
|
|
This part is a more detailed overview of Conv2d operation. |
|
|
|
This part is a more detailed overview of Conv2d operation. For more detials about Quantilization, |
|
|
|
please refer to :class`mindspore.nn.FakeQuantWithMinMaxObserver`. |
|
|
|
|
|
|
|
Args: |
|
|
|
in_channels (int): The number of input channel :math:`C_{in}`. |
|
|
|
@@ -856,7 +915,7 @@ class Conv2dQuant(Cell): |
|
|
|
weight_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the convolution kernel. |
|
|
|
Default: 'normal'. |
|
|
|
bias_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the bias vector. Default: 'zeros'. |
|
|
|
quant_config (QuantConfig): Configs the observer types and quant settings of weight and activation. Can be |
|
|
|
quant_config (QuantConfig): Configures the oberser types and quant settings of weight and activation. Can be |
|
|
|
generated by compression.quant.create_quant_config method. |
|
|
|
Default: both set to default FakeQuantWithMinMaxObserver. |
|
|
|
quant_dtype (QuantDtype): Specifies the FakeQuant datatype. Default: QuantDtype.INT8. |
|
|
|
@@ -950,7 +1009,8 @@ class DenseQuant(Cell): |
|
|
|
r""" |
|
|
|
The fully connected layer with fake quantized operation. |
|
|
|
|
|
|
|
This part is a more detailed overview of Dense operation. |
|
|
|
This part is a more detailed overview of Dense operation. For more detials about Quantilization, |
|
|
|
please refer to :class`mindspore.nn.FakeQuantWithMinMaxObserver`. |
|
|
|
|
|
|
|
Args: |
|
|
|
in_channels (int): The dimension of the input space. |
|
|
|
@@ -962,7 +1022,7 @@ class DenseQuant(Cell): |
|
|
|
has_bias (bool): Specifies whether the layer uses a bias vector. Default: True. |
|
|
|
activation (Union[str, Cell, Primitive]): The regularization function applied to the output of the layer, |
|
|
|
eg. 'relu'. Default: None. |
|
|
|
quant_config (QuantConfig): Configs the observer types and quant settings of weight and activation. Can be |
|
|
|
quant_config (QuantConfig): Configures the oberser types and quant settings of weight and activation. Can be |
|
|
|
generated by compression.quant.create_quant_config method. |
|
|
|
Default: both set to default FakeQuantWithMinMaxObserver. |
|
|
|
quant_dtype (QuantDtype): Specifies the FakeQuant datatype. Default: QuantDtype.INT8. |
|
|
|
@@ -1066,20 +1126,21 @@ class ActQuant(_QuantActivation): |
|
|
|
Quantization aware training activation function. |
|
|
|
|
|
|
|
Add the fake quantized operation to the end of activation operation, by which the output of activation operation |
|
|
|
will be truncated. Please check `FakeQuantWithMinMaxObserver` or other observer for more details. |
|
|
|
will be truncated. For more detials about Quantilization, |
|
|
|
please refer to :class`mindspore.nn.FakeQuantWithMinMaxObserver`. |
|
|
|
|
|
|
|
Args: |
|
|
|
activation (Cell): Activation cell. |
|
|
|
ema (bool): The exponential Moving Average algorithm updates min and max. Default: False. |
|
|
|
ema_decay (float): Exponential Moving Average algorithm parameter. Default: 0.999. |
|
|
|
fake_before (bool): Whether add fake quantized operation before activation. Default: False. |
|
|
|
quant_config (QuantConfig): Configs the observer types and quant settings of weight and activation. Can be |
|
|
|
quant_config (QuantConfig): Configures the oberser types and quant settings of weight and activation. Can be |
|
|
|
generated by compression.quant.create_quant_config method. |
|
|
|
Default: both set to default FakeQuantWithMinMaxObserver. |
|
|
|
quant_dtype (QuantDtype): Specifies the FakeQuant datatype. Default: QuantDtype.INT8. |
|
|
|
|
|
|
|
Inputs: |
|
|
|
- **input** (Tensor) - The input of ReLU6Quant. |
|
|
|
- **input** (Tensor) - The input of ActQuant. |
|
|
|
|
|
|
|
Outputs: |
|
|
|
Tensor, with the same type and shape as the `input`. |
|
|
|
@@ -1134,11 +1195,12 @@ class TensorAddQuant(Cell): |
|
|
|
r""" |
|
|
|
Add fake quantized operation after TensorAdd operation. |
|
|
|
|
|
|
|
This part is a more detailed overview of TensorAdd operation. |
|
|
|
This part is a more detailed overview of TensorAdd operation. For more detials about Quantilization, |
|
|
|
please refer to :class`mindspore.nn.FakeQuantWithMinMaxObserver`. |
|
|
|
|
|
|
|
Args: |
|
|
|
ema_decay (float): Exponential Moving Average algorithm parameter. Default: 0.999. |
|
|
|
quant_config (QuantConfig): Configs the observer types and quant settings of weight and activation. Can be |
|
|
|
quant_config (QuantConfig): Configures the oberser types and quant settings of weight and activation. Can be |
|
|
|
generated by compression.quant.create_quant_config method. |
|
|
|
Default: both set to default FakeQuantWithMinMaxObserver. |
|
|
|
quant_dtype (QuantDtype): Specifies the FakeQuant datatype. Default: QuantDtype.INT8. |
|
|
|
@@ -1186,11 +1248,12 @@ class MulQuant(Cell): |
|
|
|
r""" |
|
|
|
Add fake quantized operation after `Mul` operation. |
|
|
|
|
|
|
|
This part is a more detailed overview of `Mul` operation. |
|
|
|
This part is a more detailed overview of `Mul` operation. For more detials about Quantilization, |
|
|
|
please refer to :class`mindspore.nn.FakeQuantWithMinMaxObserver`. |
|
|
|
|
|
|
|
Args: |
|
|
|
ema_decay (float): Exponential Moving Average algorithm parameter. Default: 0.999. |
|
|
|
quant_config (QuantConfig): Configs the observer types and quant settings of weight and activation. Can be |
|
|
|
quant_config (QuantConfig): Configures the oberser types and quant settings of weight and activation. Can be |
|
|
|
generated by compression.quant.create_quant_config method. |
|
|
|
Default: both set to default FakeQuantWithMinMaxObserver. |
|
|
|
quant_dtype (QuantDtype): Specifies the FakeQuant datatype. Default: QuantDtype.INT8. |
|
|
|
|