add QuantDtype and Observer

5 years ago · 025ea2f392
--- a/cmake/package.cmake
+++ b/cmake/package.cmake
@@ -248,6 +248,7 @@ install(
        ${CMAKE_SOURCE_DIR}/mindspore/ops
        ${CMAKE_SOURCE_DIR}/mindspore/communication
        ${CMAKE_SOURCE_DIR}/mindspore/profiler
        ${CMAKE_SOURCE_DIR}/mindspore/compression
    DESTINATION ${INSTALL_PY_DIR}
    COMPONENT mindspore
 )
--- a/mindspore/compression/init.py
+++ b/mindspore/compression/init.py
@@ -0,0 +1,17 @@
 # Copyright 2020 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 # http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ============================================================================
 """
 MindSpore compression module.
 """
--- a/mindspore/compression/common/init.py
+++ b/mindspore/compression/common/init.py
@@ -0,0 +1,19 @@
 # Copyright 2020 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 # http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ============================================================================
 """
 Compression common module.
 """
 from .constant import *
--- a/mindspore/compression/common/constant.py
+++ b/mindspore/compression/common/constant.py
@@ -0,0 +1,85 @@
 # Copyright 2020 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 # http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ============================================================================
 """Constant module for compression"""
 import enum
 import re
 from types import DynamicClassAttribute
 __all__ = ["QuantDtype"]
@enum.unique
 class QuantDtype(enum.Enum):
    """
    For type switch
    """
    INT2 = "INT2"
    INT3 = "INT3"
    INT4 = "INT4"
    INT5 = "INT5"
    INT6 = "INT6"
    INT7 = "INT7"
    INT8 = "INT8"
    UINT2 = "UINT2"
    UINT3 = "UINT3"
    UINT4 = "UINT4"
    UINT5 = "UINT5"
    UINT6 = "UINT6"
    UINT7 = "UINT7"
    UINT8 = "UINT8"
    FLOAT16 = "FLOAT16"
    FLOAT32 = "FLOAT32"
    def __str__(self):
        return f"{self.name}"
    @staticmethod
    def is_signed(dtype):
        return dtype in [QuantDtype.INT2, QuantDtype.INT3, QuantDtype.INT4, QuantDtype.INT5,
                         QuantDtype.INT6, QuantDtype.INT7, QuantDtype.INT8]
    @staticmethod
    def switch_signed(dtype):
        """switch signed"""
        type_map = {
            QuantDtype.INT2: QuantDtype.UINT2,
            QuantDtype.INT3: QuantDtype.UINT3,
            QuantDtype.INT4: QuantDtype.UINT4,
            QuantDtype.INT5: QuantDtype.UINT5,
            QuantDtype.INT6: QuantDtype.UINT6,
            QuantDtype.INT7: QuantDtype.UINT7,
            QuantDtype.INT8: QuantDtype.UINT8,
            QuantDtype.UINT2: QuantDtype.INT2,
            QuantDtype.UINT3: QuantDtype.INT3,
            QuantDtype.UINT4: QuantDtype.INT4,
            QuantDtype.UINT5: QuantDtype.INT5,
            QuantDtype.UINT6: QuantDtype.INT6,
            QuantDtype.UINT7: QuantDtype.INT7,
            QuantDtype.UINT8: QuantDtype.INT8
        }
        return type_map[dtype]
    @DynamicClassAttribute
    def value(self):
        """The value of the Enum member."""
        return int(re.search(r"(\d+)", self._value_).group(1))
    @DynamicClassAttribute
    def num_bits(self):
        """The num_bits of the Enum member."""
        return self.value
--- a/mindspore/nn/layer/quant.py
+++ b/mindspore/nn/layer/quant.py
@@ -24,6 +24,7 @@ from mindspore.common.parameter import Parameter
 from mindspore.common.initializer import initializer
 from mindspore.common.tensor import Tensor
 from mindspore._checkparam import Validator, Rel, twice
 from mindspore.compression.common import QuantDtype
 import mindspore.context as context
 from .normalization import BatchNorm2d, BatchNorm1d
 from .activation import get_activation, ReLU, LeakyReLU
@@ -277,13 +278,233 @@ class BatchNormFoldCell(Cell):
        return batch_mean, batch_std, running_mean, running_std
 def _partial_init(cls_or_self, **kwargs):
    """
    Wrapper that allows creation of class factories.
    This can be useful when there is a need to create classes with the same
    constructor arguments, but different instances.
    Example::
        >>> Foo.partial_init = classmethod(_partial_init)
        >>> foo_builder = Foo.partial_init(a=3, b=4).partial_init(answer=42)
        >>> foo_instance1 = foo_builder()
        >>> foo_instance2 = foo_builder()
        >>> id(foo_instance1) == id(foo_instance2)
        False
    """
    class _PartialWrapper:
        r"""
        class of wrapper that allows creation of class factories.
        """
        def __init__(self, p):
            self.p = p
        def __call__(self, *args, **keywords):
            return self.p(*args, **keywords)
        def __repr__(self):
            return self.p.__repr__()
        partial_init = _partial_init
    r = _PartialWrapper(partial(cls_or_self, **kwargs))
    return r
 class Observer(Cell):
    """
    Base class of Observer. Observer is used to calculate the statistics of specific layer.
    Notes:
        This class is an abstract class.
    Args:
        quant_dtype (QuantDtype): The type of FakeQuant data.
    """
    def __init__(self, quant_dtype):
        super(Observer, self).__init__()
        self.quant_dtype = quant_dtype
    def extend_repr(self):
        s = f"dtype={self.dtype}"
        return s
    def construct(self):
        pass
    partial_init = classmethod(_partial_init)
 class UniformQuantObserver(Observer):
    """
    The base class of Uniform Quantization Observer.
    Args:
        quant_dtype (QuantDtype): The type of FakeQuant data. Default: QuantDtype.INT8.
        per_channel (bool):  Quantization granularity based on layer or on channel. Default: False.
        symmetric (bool): Whether the quantization algorithm is symmetric or not. Default: False.
        narrow_range (bool): Whether the quantization algorithm uses narrow range or not. Default: False.
        num_channels (int): declarate the min and max channel size, Default: 1.
    Returns:
        Tensor.
    """
    min_max_map = {
        QuantDtype.INT2: (-2, 1),
        QuantDtype.INT3: (-4, 3),
        QuantDtype.INT4: (-8, 7),
        QuantDtype.INT5: (-16, 15),
        QuantDtype.INT6: (-32, 31),
        QuantDtype.INT7: (-64, 63),
        QuantDtype.INT8: (-128, 127),
        QuantDtype.UINT2: (0, 3),
        QuantDtype.UINT3: (0, 7),
        QuantDtype.UINT4: (0, 15),
        QuantDtype.UINT5: (0, 31),
        QuantDtype.UINT6: (0, 63),
        QuantDtype.UINT7: (0, 127),
        QuantDtype.UINT8: (0, 255)
    }
    def __init__(self, quant_dtype=QuantDtype.INT8, per_channel=False, symmetric=False, narrow_range=False,
                 num_channels=1):
        super(UniformQuantObserver, self).__init__(quant_dtype)
        self.per_channel = per_channel
        self.symmetric = symmetric
        self.narrow_range = narrow_range
        self.num_channels = num_channels
 class FakeQuantWithMinMaxObserver(UniformQuantObserver):
    r"""
    Quantization aware op. This OP provides the fake quantization observer function on data with min and max.
    Args:
        min_init (int, float): The initialized min value. Default: -6.
        max_init (int, float): The initialized max value. Default: 6.
        ema (bool): The exponential Moving Average algorithm updates min and max. Default: False.
        ema_decay (float): Exponential Moving Average algorithm parameter. Default: 0.999.
        per_channel (bool):  Quantization granularity based on layer or on channel. Default: False.
        channel_axis (int): Quantization by channel axis. Default: 1.
        num_channels (int): declarate the min and max channel size, Default: 1.
        quant_dtype (QuantDtype): The datatype of quantization, supporting 4 and 8bits. Default: QuantDtype.INT8.
        symmetric (bool): Whether the quantization algorithm is symmetric or not. Default: False.
        narrow_range (bool): Whether the quantization algorithm uses narrow range or not. Default: False.
        quant_delay (int): Quantization delay parameters according to the global step. Default: 0.
    Inputs:
        - **x** (Tensor) - The input of FakeQuantWithMinMaxObserver.
    Outputs:
        Tensor, with the same type and shape as the `x`.
    Examples:
        >>> fake_quant = FakeQuantWithMinMaxObserver()
        >>> input_x = Tensor(np.array([[1, 2, 1], [-2, 0, -1]]), mindspore.float32)
        >>> result = fake_quant(input_x)
    """
    def __init__(self,
                 min_init=-6,
                 max_init=6,
                 ema=False,
                 ema_decay=0.999,
                 per_channel=False,
                 channel_axis=1,
                 num_channels=1,
                 quant_dtype=QuantDtype.INT8,
                 symmetric=False,
                 narrow_range=False,
                 quant_delay=0):
        """Initialize FakeQuantWithMinMax layer"""
        super(FakeQuantWithMinMaxObserver, self).__init__(quant_dtype=quant_dtype, per_channel=per_channel,
                                                          symmetric=symmetric, narrow_range=narrow_range,
                                                          num_channels=num_channels)
        Validator.check_type("min_init", min_init, [int, float])
        Validator.check_type("max_init", max_init, [int, float])
        Validator.check("min_init", min_init, "max_init", max_init, rel=Rel.LT)
        Validator.check_integer('quant_delay', quant_delay, 0, Rel.GE)
        self.min_init = min_init
        self.max_init = max_init
        self.quant_dtype = quant_dtype
        self.ema = ema
        self.ema_decay = ema_decay
        self.per_channel = per_channel
        self.num_channels = num_channels
        self.channel_axis = channel_axis
        self.quant_delay = quant_delay
        self.symmetric = symmetric
        self.narrow_range = narrow_range
        self.is_ascend = context.get_context('device_target') == "Ascend"
        # init tensor min and max for fake quant op
        if self.per_channel:
            min_array = np.array([self.min_init] * self.num_channels).astype(np.float32)
            max_array = np.array([self.max_init] * self.num_channels).astype(np.float32)
        else:
            min_array = np.array([self.min_init]).astype(np.float32)
            max_array = np.array([self.max_init]).astype(np.float32)
        self.minq = Parameter(Tensor(min_array), name='quant_min', requires_grad=False)
        self.maxq = Parameter(Tensor(max_array), name='quant_max', requires_grad=False)
        # init fake quant relative op
        if self.per_channel:
            quant_fun = partial(Q.FakeQuantPerChannel, channel_axis=self.channel_axis)
            ema_fun = partial(Q.MinMaxUpdatePerChannel, channel_axis=self.channel_axis)
        else:
            quant_fun = Q.FakeQuantPerLayer
            ema_fun = Q.MinMaxUpdatePerLayer
        self.ema_update = ema_fun(ema=self.ema, ema_decay=self.ema_decay)
        if self.is_ascend:
            self.fake_quant_train = quant_fun(num_bits=self.quant_dtype.num_bits,
                                              symmetric=self.symmetric,
                                              narrow_range=self.narrow_range,
                                              quant_delay=self.quant_delay)
            self.fake_quant_infer = self.fake_quant_train
        else:
            quant_fun = partial(quant_fun,
                                ema=self.ema,
                                ema_decay=ema_decay,
                                num_bits=self.quant_dtype.num_bits,
                                symmetric=self.symmetric,
                                narrow_range=self.narrow_range,
                                quant_delay=self.quant_delay)
            self.fake_quant_train = quant_fun(training=True)
            self.fake_quant_infer = quant_fun(training=False)
    def extend_repr(self):
        s = 'quant_dtype={}, symmetric={}, narrow_range={}, ema={}({}), per_channel={}({}, {}), ' \
            'quant_delay={}, min_init={}, max_init={}'.format(self.quant_dtype, self.symmetric, self.narrow_range,
                                                              self.ema, self.ema_decay, self.per_channel,
                                                              self.channel_axis, self.num_channels, self.quant_delay,
                                                              self.min_init, self.max_init)
        return s
    def construct(self, x):
        if self.training:
            min_up, max_up = self.ema_update(x, self.minq, self.maxq)
            P.Assign()(self.minq, min_up)
            P.Assign()(self.maxq, max_up)
            out = self.fake_quant_train(x, self.minq, self.maxq)
        else:
            out = self.fake_quant_infer(x, self.minq, self.maxq)
        return out
 class FakeQuantWithMinMax(Cell):
    r"""
    Quantization aware op. This OP provides the fake quantization observer function on data with min and max.
    Args:
        min_init (int, float): The dimension of channel or 1(layer). Default: -6.
        max_init (int, float): The dimension of channel or 1(layer). Default: 6.
        min_init (int, float): The initialized min value. Default: -6.
        max_init (int, float): The initialized max value. Default: 6.
        ema (bool): The exponential Moving Average algorithm updates min and max. Default: False.
        ema_decay (float): Exponential Moving Average algorithm parameter. Default: 0.999.
        per_channel (bool):  Quantization granularity based on layer or on channel. Default: False.