You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

quant.py 64 kB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408
  1. # Copyright 2020 Huawei Technologies Co., Ltd
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. # ============================================================================
  15. """Quantization aware training."""
  16. from functools import partial
  17. from collections import namedtuple
  18. import numpy as np
  19. import mindspore.common.dtype as mstype
  20. from mindspore.ops.primitive import Primitive
  21. from mindspore.ops import operations as P
  22. from mindspore.ops import functional as F
  23. from mindspore.common.parameter import Parameter
  24. from mindspore.common.initializer import initializer
  25. from mindspore.common.tensor import Tensor
  26. from mindspore._checkparam import Validator, Rel, twice
  27. from mindspore.compression.common import QuantDtype
  28. import mindspore.context as context
  29. from .normalization import BatchNorm2d
  30. from .activation import get_activation, ReLU
  31. from ..cell import Cell
  32. from ...ops.operations import _quant_ops as Q
  33. __all__ = [
  34. 'FakeQuantWithMinMaxObserver',
  35. 'Conv2dBnFoldQuantOneConv',
  36. 'Conv2dBnFoldQuant',
  37. 'Conv2dBnWithoutFoldQuant',
  38. 'Conv2dQuant',
  39. 'DenseQuant',
  40. 'ActQuant',
  41. 'TensorAddQuant',
  42. 'MulQuant',
  43. ]
  44. class BatchNormFoldCell(Cell):
  45. """
  46. Batch normalization folded.
  47. Args:
  48. momentum (float): Momentum value must be [0, 1]. Default: 0.9.
  49. epsilon (float): A small float number to avoid dividing by 0. 1e-5 if dtype in
  50. float32 else 1e-3. Default: 1e-5.
  51. freeze_bn (int): Delay in steps at which computation switches from regular batch
  52. norm to frozen mean and std. Default: 0.
  53. Inputs:
  54. - **x** (Tensor) - Tensor of shape :math:`(N, C, H, W)`.
  55. - **mean** (Tensor) - Tensor of shape :math:`(C,)`.
  56. - **variance** (Tensor) - Tensor of shape :math:`(C,)`.
  57. - **global_step** (Tensor) - Tensor to record current global step.
  58. Outputs:
  59. Tuple of 4 Tensor, the normalized input and the updated parameters.
  60. - **batch_mean** (Tensor) - Tensor of shape :math:`(C,)`.
  61. - **batch_std** (Tensor) - Tensor of shape :math:`(C,)`.
  62. - **running_mean** (Tensor) - Tensor of shape :math:`(C,)`.
  63. - **running_std** (Tensor) - Tensor of shape :math:`(C,)`.
  64. """
  65. def __init__(self, momentum=0.9, epsilon=1e-5, freeze_bn=0):
  66. """Initialize batch norm fold layer"""
  67. super(BatchNormFoldCell, self).__init__()
  68. self.epsilon = epsilon
  69. self.is_gpu = context.get_context('device_target') == "GPU"
  70. if self.is_gpu:
  71. self.bn_train = Q.BatchNormFold(momentum, epsilon, is_training=True, freeze_bn=freeze_bn)
  72. self.bn_infer = Q.BatchNormFold(momentum, epsilon, is_training=False, freeze_bn=freeze_bn)
  73. else:
  74. self.bn_reduce = P.BNTrainingReduce()
  75. self.bn_update = Q.BatchNormFoldD(momentum, epsilon, is_training=True, freeze_bn=freeze_bn)
  76. def construct(self, x, mean, variance, global_step):
  77. if self.is_gpu:
  78. if self.training:
  79. batch_mean, batch_std, running_mean, running_std = self.bn_train(x, mean, variance, global_step)
  80. else:
  81. batch_mean, batch_std, running_mean, running_std = self.bn_infer(x, mean, variance, global_step)
  82. else:
  83. if self.training:
  84. x_sum, x_square_sum = self.bn_reduce(x)
  85. _, batch_mean, batch_std, running_mean, running_std, mean_updated, variance_updated = \
  86. self.bn_update(x, x_sum, x_square_sum, mean, variance)
  87. P.Assign()(mean, mean_updated)
  88. P.Assign()(variance, variance_updated)
  89. else:
  90. batch_mean = P.ZerosLike()(variance)
  91. batch_std = P.OnesLike()(variance)
  92. running_mean = P.TensorAdd()(mean, 0.)
  93. running_std = P.Sqrt()(P.TensorAdd()(variance, self.epsilon))
  94. return batch_mean, batch_std, running_mean, running_std
  95. def _partial_init(cls_or_self, **kwargs):
  96. """
  97. Wrapper that allows creation of class factories.
  98. This can be useful when there is a need to create classes with the same
  99. constructor arguments, but different instances.
  100. Example::
  101. >>> Foo.partial_init = classmethod(_partial_init)
  102. >>> foo_builder = Foo.partial_init(a=3, b=4).partial_init(answer=42)
  103. >>> foo_instance1 = foo_builder()
  104. >>> foo_instance2 = foo_builder()
  105. >>> result = (id(foo_instance1) == id(foo_instance2))
  106. >>> print(result)
  107. False
  108. """
  109. class _PartialWrapper:
  110. r"""
  111. class of wrapper that allows creation of class factories.
  112. """
  113. def __init__(self, p):
  114. self.p = p
  115. def __call__(self, *args, **keywords):
  116. return self.p(*args, **keywords)
  117. def __repr__(self):
  118. return self.p.__repr__()
  119. partial_init = _partial_init
  120. r = _PartialWrapper(partial(cls_or_self, **kwargs))
  121. return r
  122. class _Observer(Cell):
  123. """
  124. Base class of Observer. Observer is used to calculate the statistics of specific layer.
  125. Notes:
  126. This class is an abstract class.
  127. Args:
  128. quant_dtype (QuantDtype): The type of FakeQuant data.
  129. """
  130. def __init__(self, quant_dtype):
  131. super(_Observer, self).__init__()
  132. self.quant_dtype = quant_dtype
  133. def extend_repr(self):
  134. s = f"dtype={self.dtype}"
  135. return s
  136. def construct(self):
  137. pass
  138. partial_init = classmethod(_partial_init)
  139. class UniformQuantObserver(_Observer):
  140. """
  141. The base class of Uniform Quantization Observer.
  142. Args:
  143. quant_dtype (QuantDtype): The type of FakeQuant data. Default: QuantDtype.INT8.
  144. per_channel (bool): Quantization granularity based on layer or on channel. Default: False.
  145. symmetric (bool): Whether the quantization algorithm is symmetric or not. Default: False.
  146. narrow_range (bool): Whether the quantization algorithm uses narrow range or not. Default: False.
  147. num_channels (int): declarate the min and max channel size, Default: 1.
  148. Returns:
  149. Tensor.
  150. """
  151. min_max_map = {
  152. QuantDtype.INT2: (-2, 1),
  153. QuantDtype.INT3: (-4, 3),
  154. QuantDtype.INT4: (-8, 7),
  155. QuantDtype.INT5: (-16, 15),
  156. QuantDtype.INT6: (-32, 31),
  157. QuantDtype.INT7: (-64, 63),
  158. QuantDtype.INT8: (-128, 127),
  159. QuantDtype.UINT2: (0, 3),
  160. QuantDtype.UINT3: (0, 7),
  161. QuantDtype.UINT4: (0, 15),
  162. QuantDtype.UINT5: (0, 31),
  163. QuantDtype.UINT6: (0, 63),
  164. QuantDtype.UINT7: (0, 127),
  165. QuantDtype.UINT8: (0, 255)
  166. }
  167. def __init__(self, quant_dtype=QuantDtype.INT8, per_channel=False, symmetric=False, narrow_range=False,
  168. num_channels=1):
  169. super(UniformQuantObserver, self).__init__(quant_dtype)
  170. self.per_channel = per_channel
  171. self.symmetric = symmetric
  172. self.narrow_range = narrow_range
  173. self.num_channels = num_channels
  174. class FakeQuantWithMinMaxObserver(UniformQuantObserver):
  175. r"""
  176. Quantization aware op. This OP provides the fake quantization observer function on data with min and max.
  177. Args:
  178. min_init (int, float): The initialized min value. Default: -6.
  179. max_init (int, float): The initialized max value. Default: 6.
  180. ema (bool): The exponential Moving Average algorithm updates min and max. Default: False.
  181. ema_decay (float): Exponential Moving Average algorithm parameter. Default: 0.999.
  182. per_channel (bool): Quantization granularity based on layer or on channel. Default: False.
  183. channel_axis (int): Quantization by channel axis. Default: 1.
  184. num_channels (int): declarate the min and max channel size, Default: 1.
  185. quant_dtype (QuantDtype): The datatype of quantization, supporting 4 and 8bits. Default: QuantDtype.INT8.
  186. symmetric (bool): Whether the quantization algorithm is symmetric or not. Default: False.
  187. narrow_range (bool): Whether the quantization algorithm uses narrow range or not. Default: False.
  188. quant_delay (int): Quantization delay parameters according to the global step. Default: 0.
  189. Inputs:
  190. - **input** (Tensor) - The input of FakeQuantWithMinMaxObserver.
  191. Outputs:
  192. Tensor, with the same type and shape as the `input`.
  193. Examples:
  194. >>> fake_quant = nn.FakeQuantWithMinMaxObserver()
  195. >>> input = Tensor(np.array([[1, 2, 1], [-2, 0, -1]]), mindspore.float32)
  196. >>> output = fake_quant(input)
  197. >>> print(output)
  198. [[ 0.9882355 1.9764705 0.9882355]
  199. [-1.9764705 0. -0.9882355]]
  200. """
  201. def __init__(self,
  202. min_init=-6,
  203. max_init=6,
  204. ema=False,
  205. ema_decay=0.999,
  206. per_channel=False,
  207. channel_axis=1,
  208. num_channels=1,
  209. quant_dtype=QuantDtype.INT8,
  210. symmetric=False,
  211. narrow_range=False,
  212. quant_delay=0):
  213. """Initialize FakeQuantWithMinMaxObserver"""
  214. super(FakeQuantWithMinMaxObserver, self).__init__(quant_dtype=quant_dtype, per_channel=per_channel,
  215. symmetric=symmetric, narrow_range=narrow_range,
  216. num_channels=num_channels)
  217. Validator.check_value_type("min_init", min_init, [int, float], type(self).__name__)
  218. Validator.check_value_type("max_init", max_init, [int, float], type(self).__name__)
  219. Validator.check("min_init", min_init, "max_init", max_init, rel=Rel.LT)
  220. Validator.check_non_negative_int(quant_delay, 'quant_delay')
  221. self.min_init = min_init
  222. self.max_init = max_init
  223. self.quant_dtype = quant_dtype
  224. self.ema = ema
  225. self.ema_decay = ema_decay
  226. self.per_channel = per_channel
  227. self.num_channels = num_channels
  228. self.channel_axis = channel_axis
  229. self.quant_delay = quant_delay
  230. self.symmetric = symmetric
  231. self.narrow_range = narrow_range
  232. self.is_ascend = context.get_context('device_target') == "Ascend"
  233. # init tensor min and max for fake quant op
  234. if self.per_channel:
  235. min_array = np.array([self.min_init] * self.num_channels).astype(np.float32)
  236. max_array = np.array([self.max_init] * self.num_channels).astype(np.float32)
  237. else:
  238. min_array = np.array([self.min_init]).astype(np.float32)
  239. max_array = np.array([self.max_init]).astype(np.float32)
  240. self.minq = Parameter(Tensor(min_array), name='quant_min', requires_grad=False)
  241. self.maxq = Parameter(Tensor(max_array), name='quant_max', requires_grad=False)
  242. # init fake quant relative op
  243. if self.per_channel:
  244. quant_fun = partial(Q.FakeQuantPerChannel, channel_axis=self.channel_axis)
  245. ema_fun = partial(Q.MinMaxUpdatePerChannel, channel_axis=self.channel_axis)
  246. else:
  247. quant_fun = Q.FakeQuantPerLayer
  248. ema_fun = Q.MinMaxUpdatePerLayer
  249. self.ema_update = ema_fun(ema=self.ema, ema_decay=self.ema_decay)
  250. if self.is_ascend:
  251. self.fake_quant_train = quant_fun(num_bits=self.quant_dtype.num_bits,
  252. symmetric=self.symmetric,
  253. narrow_range=self.narrow_range,
  254. quant_delay=self.quant_delay)
  255. self.fake_quant_infer = self.fake_quant_train
  256. else:
  257. quant_fun = partial(quant_fun,
  258. ema=self.ema,
  259. ema_decay=ema_decay,
  260. num_bits=self.quant_dtype.num_bits,
  261. symmetric=self.symmetric,
  262. narrow_range=self.narrow_range,
  263. quant_delay=self.quant_delay)
  264. self.fake_quant_train = quant_fun(training=True)
  265. self.fake_quant_infer = quant_fun(training=False)
  266. def extend_repr(self):
  267. s = 'quant_dtype={}, symmetric={}, narrow_range={}, ema={}({}), per_channel={}({}, {}), ' \
  268. 'quant_delay={}, min_init={}, max_init={}'.format(self.quant_dtype, self.symmetric, self.narrow_range,
  269. self.ema, self.ema_decay, self.per_channel,
  270. self.channel_axis, self.num_channels, self.quant_delay,
  271. self.min_init, self.max_init)
  272. return s
  273. def construct(self, x):
  274. if self.training:
  275. min_up, max_up = self.ema_update(x, self.minq, self.maxq)
  276. P.Assign()(self.minq, min_up)
  277. P.Assign()(self.maxq, max_up)
  278. out = self.fake_quant_train(x, self.minq, self.maxq)
  279. else:
  280. out = self.fake_quant_infer(x, self.minq, self.maxq)
  281. return out
  282. QuantConfig = namedtuple("QuantConfig", ['weight', 'activation'])
  283. quant_config_default = QuantConfig(weight=FakeQuantWithMinMaxObserver, activation=FakeQuantWithMinMaxObserver)
  284. class Conv2dBnFoldQuantOneConv(Cell):
  285. r"""
  286. 2D convolution with BatchNormal op folded construct.
  287. This part is a more detailed overview of Conv2d op.
  288. Args:
  289. in_channels (int): The number of input channel :math:`C_{in}`.
  290. out_channels (int): The number of output channel :math:`C_{out}`.
  291. kernel_size (Union[int, tuple]): Specifies the height and width of the 2D convolution window.
  292. stride (int): Specifies stride for all spatial dimensions with the same value.
  293. pad_mode (str): Specifies padding mode. The optional values are "same", "valid", "pad". Default: "same".
  294. padding (int): Implicit paddings on both sides of the input. Default: 0.
  295. eps (float): Parameters for BatchNormal. Default: 1e-5.
  296. momentum (float): Parameters for BatchNormal op. Default: 0.997.
  297. has_bias (bool): Specifies whether the layer uses a bias vector. Default: False.
  298. weight_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the
  299. convolution kernel. Default: 'normal'.
  300. bias_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the
  301. bias vector. Default: 'zeros'.
  302. beta_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the
  303. beta vector. Default: 'zeros'.
  304. gamma_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the
  305. gamma vector. Default: 'ones'.
  306. mean_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the
  307. mean vector. Default: 'zeros'.
  308. var_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the
  309. variance vector. Default: 'ones'.
  310. fake (bool): Whether Conv2dBnFoldQuant Cell adds FakeQuantWithMinMaxObserver. Default: True.
  311. quant_config (QuantConfig): Configs the oberser types and quant configs of weight and activation. Default:
  312. both set to default FakeQuantWithMinMaxObserver.
  313. quant_dtype (QuantDtype): Specifies the FakeQuant datatype. Default: QuantDtype.INT8.
  314. Inputs:
  315. - **input** (Tensor) - Tensor of shape :math:`(N, C_{in}, H_{in}, W_{in})`.
  316. Outputs:
  317. Tensor of shape :math:`(N, C_{out}, H_{out}, W_{out})`.
  318. Examples:
  319. >>> qconfig = compression.quant.create_quant_config()
  320. >>> conv2d_bnfold = nn.Conv2dBnFoldQuant(1, 6, kernel_size=(2, 2), stride=(1, 1), pad_mode="valid",
  321. >>> quant_config=qconfig)
  322. >>> input = Tensor(np.random.randint(-2, 2, (2, 1, 3, 3)), mindspore.float32)
  323. >>> result = conv2d_bnfold(input)
  324. >>> output = result.shape
  325. >>> print(output)
  326. (2, 6, 2, 2)
  327. """
  328. def __init__(self,
  329. in_channels,
  330. out_channels,
  331. kernel_size,
  332. stride=1,
  333. pad_mode='same',
  334. padding=0,
  335. dilation=1,
  336. group=1,
  337. eps=1e-5,
  338. momentum=0.997,
  339. has_bias=False,
  340. weight_init='normal',
  341. bias_init='zeros',
  342. beta_init='zeros',
  343. gamma_init='ones',
  344. mean_init='zeros',
  345. var_init='ones',
  346. fake=True,
  347. quant_config=quant_config_default,
  348. quant_dtype=QuantDtype.INT8):
  349. """Initialize Conv2dBnFoldQuant layer"""
  350. super(Conv2dBnFoldQuantOneConv, self).__init__()
  351. self.in_channels = in_channels
  352. self.out_channels = out_channels
  353. self.kernel_size = twice(kernel_size)
  354. self.stride = twice(stride)
  355. self.pad_mode = pad_mode
  356. self.padding = padding
  357. self.dilation = twice(dilation)
  358. self.group = group
  359. self.eps = eps
  360. self.momentum = momentum
  361. self.has_bias = has_bias
  362. self.fake = fake
  363. self.quant_config = quant_config
  364. self.quant_dtype = quant_dtype
  365. data_format = 'NCHW'
  366. self.format = Validator.check_string(data_format, ['NCHW', 'NHWC'], 'format', self.cls_name)
  367. self.is_gpu = context.get_context('device_target') == "GPU"
  368. self.is_Ascend = context.get_context('device_target') == "Ascend"
  369. self.is_graph_mode = context.get_context("mode") == context.GRAPH_MODE
  370. if context.get_context("enable_ge"):
  371. self.is_ge_backend = True
  372. else:
  373. self.is_ge_backend = False
  374. self.enable_default_train = self.is_graph_mode and \
  375. (self.is_ge_backend or self.is_ascend)
  376. # initialize convolution op and Parameter
  377. if context.get_context('device_target') == "Ascend" and group > 1:
  378. Validator.check_equal_int(group, in_channels, 'group')
  379. Validator.check_equal_int(group, out_channels, 'group')
  380. self.conv = P.DepthwiseConv2dNative(channel_multiplier=1,
  381. kernel_size=self.kernel_size,
  382. pad_mode=pad_mode,
  383. pad=padding,
  384. stride=self.stride,
  385. dilation=self.dilation)
  386. weight_shape = [1, in_channels, *self.kernel_size]
  387. channel_axis = 1
  388. else:
  389. self.conv = P.Conv2D(out_channel=out_channels,
  390. kernel_size=self.kernel_size,
  391. pad_mode=pad_mode,
  392. pad=padding,
  393. stride=self.stride,
  394. dilation=self.dilation,
  395. group=group)
  396. weight_shape = [out_channels, in_channels // group, *self.kernel_size]
  397. channel_axis = 0
  398. self.channel_axis = channel_axis
  399. self.weight = Parameter(initializer(weight_init, weight_shape), name='weight')
  400. self.bias_add = P.BiasAdd()
  401. if Validator.check_bool(has_bias):
  402. self.bias = Parameter(initializer(bias_init, [out_channels]), name='bias')
  403. else:
  404. self.bias = None
  405. # initialize BatchNorm Parameter
  406. self.gamma = Parameter(initializer(gamma_init, [out_channels]), name='gamma')
  407. self.beta = Parameter(initializer(beta_init, [out_channels]), name='beta')
  408. self.moving_mean = Parameter(initializer(mean_init, [out_channels]), name='moving_mean', requires_grad=False)
  409. self.moving_variance = Parameter(initializer(var_init, [out_channels]), name='moving_variance',
  410. requires_grad=False)
  411. # initialize fake ops
  412. self.fake_quant_weight = quant_config.weight(min_init=-6,
  413. max_init=6,
  414. ema=False,
  415. channel_axis=channel_axis,
  416. num_channels=out_channels,
  417. quant_dtype=quant_dtype)
  418. if self.is_graph_mode and (self.is_ge_backend or self.is_ascend):
  419. self.bn_train = P.BatchNorm(is_training=True,
  420. epsilon=self.eps)
  421. elif self.is_gpu:
  422. self.bn_train = P.FusedBatchNormEx(mode=1,
  423. epsilon=self.eps,
  424. momentum=self.momentum,
  425. data_format=self.format)
  426. else:
  427. self.bn_train = P.FusedBatchNorm(mode=1,
  428. epsilon=self.eps,
  429. momentum=self.momentum)
  430. self.bn_infer = P.BatchNorm(is_training=False, epsilon=self.eps, data_format=self.format)
  431. data_parallel_strategy = ((1,), (1,))
  432. data_parallel_strategy_one = ((1,), ())
  433. self.sub_mean = P.Sub().shard(data_parallel_strategy)
  434. self.sub_var = P.Sub().shard(data_parallel_strategy)
  435. self.mul_mean = P.Mul().shard(data_parallel_strategy_one)
  436. self.mul_var = P.Mul().shard(data_parallel_strategy_one)
  437. self.assign_sub_mean = P.AssignSub().shard(data_parallel_strategy)
  438. self.assign_sub_var = P.AssignSub().shard(data_parallel_strategy)
  439. self.reshape = P.Reshape()
  440. def extend_repr(self):
  441. s = 'in_channels={}, out_channels={}, kernel_size={}, stride={}, ' \
  442. 'pad_mode={}, padding={}, dilation={}, group={}, ' \
  443. 'fake={}, freeze_bn={}, momentum={}, quant_delay={}'.format(self.in_channels, self.out_channels,
  444. self.kernel_size, self.stride,
  445. self.pad_mode, self.padding, self.dilation,
  446. self.group,
  447. self.fake, self.freeze_bn, self.momentum,
  448. self.fake_quant_weight.quant_delay)
  449. return s
  450. def construct(self, x):
  451. running_std = P.Sqrt()(P.TensorAdd()(self.moving_variance, self.eps))
  452. scale_factor = self.gamma / running_std
  453. if self.channel_axis:
  454. scale_factor = self.reshape(scale_factor, (1, -1, 1, 1))
  455. else:
  456. scale_factor = self.reshape(scale_factor, (-1, 1, 1, 1))
  457. weight = self.weight * scale_factor
  458. if self.fake:
  459. weight = self.fake_quant_weight(weight)
  460. conv = self.conv(x, weight)
  461. scale_factor = self.reshape(scale_factor, (1, -1, 1, 1))
  462. if self.enable_default_train:
  463. scale_factor = P.Reciprocal()(scale_factor)
  464. conv_orig = conv * scale_factor
  465. else:
  466. conv_orig = conv / scale_factor
  467. if self.training:
  468. if self.enable_default_train:
  469. out, batch_mean, batch_var, _, _ = self.bn_train(conv_orig,
  470. self.gamma,
  471. self.beta,
  472. None,
  473. None)
  474. mean_sub = self.sub_mean(self.moving_mean, batch_mean)
  475. temp_mean = self.mul_mean(mean_sub, self.momentum)
  476. mean_sub2 = self.sub_var(self.moving_variance, batch_var)
  477. temp_variance = self.mul_var(mean_sub2, self.momentum)
  478. out = F.depend(out, self.assign_sub_mean(self.moving_mean, temp_mean))
  479. out = F.depend(out, self.assign_sub_var(self.moving_variance, temp_variance))
  480. return out
  481. return self.bn_train(conv_orig,
  482. self.gamma,
  483. self.beta,
  484. self.moving_mean,
  485. self.moving_variance)[0]
  486. return self.bn_infer(conv_orig,
  487. self.gamma,
  488. self.beta,
  489. self.moving_mean,
  490. self.moving_variance)[0]
  491. class Conv2dBnFoldQuant(Cell):
  492. r"""
  493. 2D convolution with BatchNormal op folded construct.
  494. This part is a more detailed overview of Conv2d op.
  495. Args:
  496. in_channels (int): The number of input channel :math:`C_{in}`.
  497. out_channels (int): The number of output channel :math:`C_{out}`.
  498. kernel_size (Union[int, tuple]): Specifies the height and width of the 2D convolution window.
  499. stride (int): Specifies stride for all spatial dimensions with the same value.
  500. pad_mode (str): Specifies padding mode. The optional values are "same", "valid", "pad". Default: "same".
  501. padding (int): Implicit paddings on both sides of the input. Default: 0.
  502. eps (float): Parameters for BatchNormal. Default: 1e-5.
  503. momentum (float): Parameters for BatchNormal op. Default: 0.997.
  504. has_bias (bool): Specifies whether the layer uses a bias vector. Default: False.
  505. weight_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the
  506. convolution kernel. Default: 'normal'.
  507. bias_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the
  508. bias vector. Default: 'zeros'.
  509. beta_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the
  510. beta vector. Default: 'zeros'.
  511. gamma_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the
  512. gamma vector. Default: 'ones'.
  513. mean_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the
  514. mean vector. Default: 'zeros'.
  515. var_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the
  516. variance vector. Default: 'ones'.
  517. fake (bool): Whether Conv2dBnFoldQuant Cell adds FakeQuantWithMinMaxObserver. Default: True.
  518. quant_config (QuantConfig): Configs the oberser types and quant configs of weight and activation. Default:
  519. both set to default FakeQuantWithMinMaxObserver.
  520. quant_dtype (QuantDtype): Specifies the FakeQuant datatype. Default: QuantDtype.INT8.
  521. freeze_bn (int): The quantization freeze BatchNormal op is according to the global step. Default: 100000.
  522. Inputs:
  523. - **input** (Tensor) - Tensor of shape :math:`(N, C_{in}, H_{in}, W_{in})`.
  524. Outputs:
  525. Tensor of shape :math:`(N, C_{out}, H_{out}, W_{out})`.
  526. Supported Platforms:
  527. ``Ascend`` ``GPU``
  528. Examples:
  529. >>> qconfig = compression.quant.create_quant_config()
  530. >>> conv2d_bnfold = nn.Conv2dBnFoldQuant(1, 6, kernel_size=(2, 2), stride=(1, 1), pad_mode="valid",
  531. ... quant_config=qconfig)
  532. >>> input = Tensor(np.random.randint(-2, 2, (2, 1, 3, 3)), mindspore.float32)
  533. >>> output = conv2d_bnfold(input)
  534. >>> print(output.shape)
  535. (2, 6, 2, 2)
  536. """
  537. def __init__(self,
  538. in_channels,
  539. out_channels,
  540. kernel_size,
  541. stride=1,
  542. pad_mode='same',
  543. padding=0,
  544. dilation=1,
  545. group=1,
  546. eps=1e-5,
  547. momentum=0.997,
  548. has_bias=False,
  549. weight_init='normal',
  550. bias_init='zeros',
  551. beta_init='zeros',
  552. gamma_init='ones',
  553. mean_init='zeros',
  554. var_init='ones',
  555. fake=True,
  556. quant_config=quant_config_default,
  557. quant_dtype=QuantDtype.INT8,
  558. freeze_bn=100000):
  559. """Initialize Conv2dBnFoldQuant layer"""
  560. super(Conv2dBnFoldQuant, self).__init__()
  561. self.in_channels = in_channels
  562. self.out_channels = out_channels
  563. self.kernel_size = twice(kernel_size)
  564. self.stride = twice(stride)
  565. self.pad_mode = pad_mode
  566. self.padding = padding
  567. self.dilation = twice(dilation)
  568. self.group = group
  569. self.eps = eps
  570. self.momentum = momentum
  571. self.has_bias = has_bias
  572. self.freeze_bn = freeze_bn
  573. self.fake = fake
  574. self.quant_config = quant_config
  575. self.quant_dtype = quant_dtype
  576. self.is_gpu = context.get_context('device_target') == "GPU"
  577. # initialize convolution op and Parameter
  578. if context.get_context('device_target') == "Ascend" and group > 1:
  579. Validator.check_equal_int(group, in_channels, 'group')
  580. Validator.check_equal_int(group, out_channels, 'group')
  581. self.conv = P.DepthwiseConv2dNative(channel_multiplier=1,
  582. kernel_size=self.kernel_size,
  583. pad_mode=pad_mode,
  584. pad=padding,
  585. stride=self.stride,
  586. dilation=self.dilation)
  587. weight_shape = [1, in_channels, *self.kernel_size]
  588. channel_axis = 1
  589. else:
  590. self.conv = P.Conv2D(out_channel=out_channels,
  591. kernel_size=self.kernel_size,
  592. pad_mode=pad_mode,
  593. pad=padding,
  594. stride=self.stride,
  595. dilation=self.dilation,
  596. group=group)
  597. weight_shape = [out_channels, in_channels // group, *self.kernel_size]
  598. channel_axis = 0
  599. self.weight = Parameter(initializer(weight_init, weight_shape), name='weight')
  600. self.bias_add = P.BiasAdd()
  601. if Validator.check_bool(has_bias):
  602. self.bias = Parameter(initializer(bias_init, [out_channels]), name='bias')
  603. else:
  604. self.bias = None
  605. # initialize BatchNorm Parameter
  606. self.gamma = Parameter(initializer(gamma_init, [out_channels]), name='gamma')
  607. self.beta = Parameter(initializer(beta_init, [out_channels]), name='beta')
  608. self.moving_mean = Parameter(initializer(mean_init, [out_channels]), name='moving_mean', requires_grad=False)
  609. self.moving_variance = Parameter(initializer(var_init, [out_channels]), name='moving_variance',
  610. requires_grad=False)
  611. # initialize fake ops
  612. self.fake_quant_weight = quant_config.weight(min_init=-6,
  613. max_init=6,
  614. ema=False,
  615. channel_axis=channel_axis,
  616. num_channels=out_channels,
  617. quant_dtype=quant_dtype)
  618. self.batchnorm_fold = BatchNormFoldCell(epsilon=eps, momentum=momentum, freeze_bn=freeze_bn)
  619. self.correct_mul = Q.CorrectionMul(channel_axis)
  620. if context.get_context('device_target') == "Ascend":
  621. self.batchnorm_fold2_train = Q.BatchNormFold2_D(freeze_bn=freeze_bn)
  622. self.batchnorm_fold2_infer = Q.BatchNormFold2_D(freeze_bn=0)
  623. elif context.get_context('device_target') == "GPU":
  624. self.batchnorm_fold2_train = Q.BatchNormFold2(freeze_bn=freeze_bn)
  625. self.batchnorm_fold2_infer = Q.BatchNormFold2(freeze_bn=0)
  626. else:
  627. raise ValueError("Unsupported platform: {}".format(context.get_context('device_target')))
  628. self.step = Parameter(initializer('normal', [1], dtype=mstype.int32), name='step', requires_grad=False)
  629. self.one = Tensor(1, mstype.int32)
  630. self.assignadd = P.AssignAdd()
  631. def extend_repr(self):
  632. s = 'in_channels={}, out_channels={}, kernel_size={}, stride={}, ' \
  633. 'pad_mode={}, padding={}, dilation={}, group={}, ' \
  634. 'fake={}, freeze_bn={}, momentum={}, quant_delay={}'.format(self.in_channels, self.out_channels,
  635. self.kernel_size, self.stride,
  636. self.pad_mode, self.padding, self.dilation,
  637. self.group,
  638. self.fake, self.freeze_bn, self.momentum,
  639. self.fake_quant_weight.quant_delay)
  640. return s
  641. def construct(self, x):
  642. out_conv = self.conv(x, self.weight)
  643. if self.has_bias:
  644. out_conv = self.bias_add(out_conv, self.bias)
  645. # BN fold1
  646. batch_mean, batch_std, running_mean, running_std = self.batchnorm_fold(out_conv,
  647. self.moving_mean,
  648. self.moving_variance,
  649. self.step)
  650. # fake weight
  651. weight = self.correct_mul(self.weight, self.gamma, running_std)
  652. if self.fake:
  653. weight = self.fake_quant_weight(weight)
  654. out = self.conv(x, weight)
  655. if self.has_bias:
  656. out = self.bias_add(out, self.bias)
  657. # BN fold2
  658. if self.is_gpu:
  659. if self.training:
  660. out = self.batchnorm_fold2_train(out, self.beta, self.gamma,
  661. batch_std, batch_mean, running_std, running_mean, self.step)
  662. F.control_depend(out, self.assignadd(self.step, self.one))
  663. else:
  664. out = self.batchnorm_fold2_infer(out, self.beta, self.gamma,
  665. batch_std, batch_mean, running_std, running_mean, self.step)
  666. else:
  667. if self.training:
  668. out = self.batchnorm_fold2_train(out, self.beta, self.gamma, batch_std, batch_mean, running_std)
  669. F.control_depend(out, self.assignadd(self.step, self.one))
  670. else:
  671. out = self.batchnorm_fold2_infer(out, self.beta, self.gamma, running_std, running_mean, running_std)
  672. return out
  673. class Conv2dBnWithoutFoldQuant(Cell):
  674. r"""
  675. 2D convolution + batchnorm without fold with fake quant construct.
  676. This part is a more detailed overview of Conv2d op.
  677. Args:
  678. in_channels (int): The number of input channel :math:`C_{in}`.
  679. out_channels (int): The number of output channel :math:`C_{out}`.
  680. kernel_size (Union[int, tuple]): Specifies the height and width of the 2D convolution window.
  681. stride (int): Specifies stride for all spatial dimensions with the same value. Default: 1.
  682. pad_mode (str): Specifies padding mode. The optional values are "same", "valid", "pad". Default: "same".
  683. padding (int): Implicit paddings on both sides of the input. Default: 0.
  684. dilation (int): Specifies the dilation rate to use for dilated convolution. Default: 1.
  685. group (int): Splits filter into groups, `in_ channels` and `out_channels` must be
  686. divisible by the number of groups. Default: 1.
  687. has_bias (bool): Specifies whether the layer uses a bias vector. Default: False.
  688. eps (float): Parameters for BatchNormal. Default: 1e-5.
  689. momentum (float): Parameters for BatchNormal op. Default: 0.997.
  690. weight_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the convolution kernel.
  691. Default: 'normal'.
  692. bias_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the bias vector. Default: 'zeros'.
  693. quant_config (QuantConfig): Configs the oberser types and quant configs of weight and activation. Default:
  694. both set to default FakeQuantWithMinMaxObserver.
  695. quant_dtype (QuantDtype): Specifies the FakeQuant datatype. Default: QuantDtype.INT8.
  696. Inputs:
  697. - **input** (Tensor) - Tensor of shape :math:`(N, C_{in}, H_{in}, W_{in})`.
  698. Outputs:
  699. Tensor of shape :math:`(N, C_{out}, H_{out}, W_{out})`.
  700. Supported Platforms:
  701. ``Ascend`` ``GPU``
  702. Examples:
  703. >>> qconfig = compression.quant.create_quant_config()
  704. >>> conv2d_no_bnfold = nn.Conv2dBnWithoutFoldQuant(1, 6, kernel_size=(2, 2), stride=(1, 1), pad_mode="valid",
  705. ... quant_config=qconfig)
  706. >>> input = Tensor(np.random.randint(-2, 2, (2, 1, 3, 3)), mstype.float32)
  707. >>> output = conv2d_no_bnfold(input)
  708. >>> print(output.shape)
  709. (2, 6, 2, 2)
  710. """
  711. def __init__(self,
  712. in_channels,
  713. out_channels,
  714. kernel_size,
  715. stride=1,
  716. pad_mode='same',
  717. padding=0,
  718. dilation=1,
  719. group=1,
  720. has_bias=False,
  721. eps=1e-5,
  722. momentum=0.997,
  723. weight_init='normal',
  724. bias_init='zeros',
  725. quant_config=quant_config_default,
  726. quant_dtype=QuantDtype.INT8):
  727. super(Conv2dBnWithoutFoldQuant, self).__init__()
  728. self.in_channels = Validator.check_positive_int(in_channels)
  729. self.out_channels = Validator.check_positive_int(out_channels)
  730. self.has_bias = has_bias
  731. self.kernel_size = twice(kernel_size)
  732. self.stride = twice(stride)
  733. self.dilation = twice(dilation)
  734. self.pad_mode = pad_mode
  735. self.padding = padding
  736. self.group = group
  737. self.bias_add = P.BiasAdd()
  738. if Validator.check_bool(has_bias):
  739. self.bias = Parameter(initializer(bias_init, [out_channels]), name='bias')
  740. else:
  741. self.bias = None
  742. # initialize convolution op and Parameter
  743. if context.get_context('device_target') == "Ascend" and group > 1:
  744. Validator.check_equal_int(group, in_channels, 'group')
  745. Validator.check_equal_int(group, out_channels, 'group')
  746. self.conv = P.DepthwiseConv2dNative(channel_multiplier=1,
  747. kernel_size=self.kernel_size,
  748. pad_mode=pad_mode,
  749. pad=padding,
  750. stride=self.stride,
  751. dilation=self.dilation)
  752. weight_shape = [1, in_channels, *self.kernel_size]
  753. channel_axis = 1
  754. else:
  755. self.conv = P.Conv2D(out_channel=self.out_channels,
  756. kernel_size=self.kernel_size,
  757. mode=1,
  758. pad_mode=self.pad_mode,
  759. pad=self.padding,
  760. stride=self.stride,
  761. dilation=self.dilation,
  762. group=self.group)
  763. weight_shape = [out_channels, in_channels // group, *self.kernel_size]
  764. channel_axis = 0
  765. self.weight = Parameter(initializer(weight_init, weight_shape), name='weight')
  766. self.fake_quant_weight = quant_config.weight(min_init=-6,
  767. max_init=6,
  768. ema=False,
  769. channel_axis=channel_axis,
  770. num_channels=out_channels,
  771. quant_dtype=quant_dtype)
  772. self.batchnorm = BatchNorm2d(out_channels, eps=eps, momentum=momentum)
  773. def construct(self, x):
  774. weight = self.fake_quant_weight(self.weight)
  775. out = self.conv(x, weight)
  776. if self.has_bias:
  777. out = self.bias_add(out, self.bias)
  778. out = self.batchnorm(out)
  779. return out
  780. def extend_repr(self):
  781. s = 'in_channels={}, out_channels={}, kernel_size={}, stride={}, ' \
  782. 'pad_mode={}, padding={}, dilation={}, group={}, ' \
  783. 'has_bias={}, quant_delay={}'.format(self.in_channels, self.out_channels, self.kernel_size, self.stride,
  784. self.pad_mode, self.padding, self.dilation, self.group,
  785. self.has_bias, self.fake_quant_weight.quant_delay)
  786. return s
  787. class Conv2dQuant(Cell):
  788. r"""
  789. 2D convolution with fake quant op layer.
  790. This part is a more detailed overview of Conv2d op.
  791. Args:
  792. in_channels (int): The number of input channel :math:`C_{in}`.
  793. out_channels (int): The number of output channel :math:`C_{out}`.
  794. kernel_size (Union[int, tuple]): Specifies the height and width of the 2D convolution window.
  795. stride (int): Specifies stride for all spatial dimensions with the same value. Default: 1.
  796. pad_mode (str): Specifies padding mode. The optional values are "same", "valid", "pad". Default: "same".
  797. padding (int): Implicit paddings on both sides of the input. Default: 0.
  798. dilation (int): Specifies the dilation rate to use for dilated convolution. Default: 1.
  799. group (int): Splits filter into groups, `in_ channels` and `out_channels` must be
  800. divisible by the number of groups. Default: 1.
  801. has_bias (bool): Specifies whether the layer uses a bias vector. Default: False.
  802. weight_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the convolution kernel.
  803. Default: 'normal'.
  804. bias_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the bias vector. Default: 'zeros'.
  805. quant_config (QuantConfig): Configs the oberser types and quant configs of weight and activation. Default:
  806. both set to default FakeQuantWithMinMaxObserver.
  807. quant_dtype (QuantDtype): Specifies the FakeQuant datatype. Default: QuantDtype.INT8.
  808. Inputs:
  809. - **input** (Tensor) - Tensor of shape :math:`(N, C_{in}, H_{in}, W_{in})`.
  810. Outputs:
  811. Tensor of shape :math:`(N, C_{out}, H_{out}, W_{out})`.
  812. Supported Platforms:
  813. ``Ascend`` ``GPU``
  814. Examples:
  815. >>> qconfig = compression.quant.create_quant_config()
  816. >>> conv2d_quant = nn.Conv2dQuant(1, 6, kernel_size= (2, 2), stride=(1, 1), pad_mode="valid",
  817. ... quant_config=qconfig)
  818. >>> input = Tensor(np.random.randint(-2, 2, (2, 1, 3, 3)), mindspore.float32)
  819. >>> output = conv2d_quant(input)
  820. >>> print(output.shape)
  821. (2, 6, 2, 2)
  822. """
  823. def __init__(self,
  824. in_channels,
  825. out_channels,
  826. kernel_size,
  827. stride=1,
  828. pad_mode='same',
  829. padding=0,
  830. dilation=1,
  831. group=1,
  832. has_bias=False,
  833. weight_init='normal',
  834. bias_init='zeros',
  835. quant_config=quant_config_default,
  836. quant_dtype=QuantDtype.INT8):
  837. super(Conv2dQuant, self).__init__()
  838. self.in_channels = Validator.check_positive_int(in_channels)
  839. self.out_channels = Validator.check_positive_int(out_channels)
  840. self.has_bias = has_bias
  841. self.kernel_size = twice(kernel_size)
  842. self.stride = twice(stride)
  843. self.dilation = twice(dilation)
  844. self.pad_mode = pad_mode
  845. self.padding = padding
  846. self.group = group
  847. weight_shape = [out_channels, in_channels // group, *self.kernel_size]
  848. self.weight = Parameter(initializer(weight_init, weight_shape), name='weight')
  849. self.bias_add = P.BiasAdd()
  850. if Validator.check_bool(has_bias):
  851. self.bias = Parameter(initializer(bias_init, [out_channels]), name='bias')
  852. else:
  853. self.bias = None
  854. self.conv = P.Conv2D(out_channel=self.out_channels,
  855. kernel_size=self.kernel_size,
  856. mode=1,
  857. pad_mode=self.pad_mode,
  858. pad=self.padding,
  859. stride=self.stride,
  860. dilation=self.dilation,
  861. group=self.group)
  862. self.fake_quant_weight = quant_config.weight(min_init=-6,
  863. max_init=6,
  864. ema=False,
  865. channel_axis=0,
  866. num_channels=out_channels,
  867. quant_dtype=quant_dtype)
  868. def construct(self, x):
  869. weight = self.fake_quant_weight(self.weight)
  870. out = self.conv(x, weight)
  871. if self.has_bias:
  872. return self.bias_add(out, self.bias)
  873. return out
  874. def extend_repr(self):
  875. s = 'in_channels={}, out_channels={}, kernel_size={}, stride={}, ' \
  876. 'pad_mode={}, padding={}, dilation={}, group={}, ' \
  877. 'has_bias={}, quant_delay={}'.format(self.in_channels, self.out_channels, self.kernel_size, self.stride,
  878. self.pad_mode, self.padding, self.dilation, self.group,
  879. self.has_bias, self.fake_quant_weight.quant_delay)
  880. return s
  881. class DenseQuant(Cell):
  882. r"""
  883. The fully connected layer with fake quant op.
  884. This part is a more detailed overview of Dense op.
  885. Args:
  886. in_channels (int): The dimension of the input space.
  887. out_channels (int): The dimension of the output space.
  888. weight_init (Union[Tensor, str, Initializer, numbers.Number]): The trainable weight_init parameter. The dtype
  889. is same as input. The values of str refer to the function `initializer`. Default: 'normal'.
  890. bias_init (Union[Tensor, str, Initializer, numbers.Number]): The trainable bias_init parameter. The dtype is
  891. same as input. The values of str refer to the function `initializer`. Default: 'zeros'.
  892. has_bias (bool): Specifies whether the layer uses a bias vector. Default: True.
  893. activation (Union[str, Cell, Primitive]): The regularization function applied to the output of the layer,
  894. eg. 'relu'. Default: None.
  895. quant_config (QuantConfig): Configs the oberser types and quant configs of weight and activation. Default:
  896. both set to default FakeQuantWithMinMaxObserver.
  897. quant_dtype (QuantDtype): Specifies the FakeQuant datatype. Default: QuantDtype.INT8.
  898. Inputs:
  899. - **input** (Tensor) - Tensor of shape :math:`(N, C_{in}, H_{in}, W_{in})`.
  900. Outputs:
  901. Tensor of shape :math:`(N, C_{out}, H_{out}, W_{out})`.
  902. Supported Platforms:
  903. ``Ascend`` ``GPU``
  904. Examples:
  905. >>> qconfig = compression.quant.create_quant_config()
  906. >>> dense_quant = nn.DenseQuant(3, 6, quant_config=qconfig)
  907. >>> input = Tensor(np.random.randint(-2, 2, (2, 3)), mindspore.float32)
  908. >>> result = dense_quant(input)
  909. >>> output = result.shape
  910. >>> print(output)
  911. (2, 6)
  912. """
  913. def __init__(self,
  914. in_channels,
  915. out_channels,
  916. weight_init='normal',
  917. bias_init='zeros',
  918. has_bias=True,
  919. activation=None,
  920. quant_config=quant_config_default,
  921. quant_dtype=QuantDtype.INT8):
  922. super(DenseQuant, self).__init__()
  923. self.in_channels = Validator.check_positive_int(in_channels)
  924. self.out_channels = Validator.check_positive_int(out_channels)
  925. self.has_bias = Validator.check_bool(has_bias)
  926. if isinstance(weight_init, Tensor):
  927. if weight_init.dim() != 2 or weight_init.shape[0] != out_channels or \
  928. weight_init.shape[1] != in_channels:
  929. raise ValueError("weight_init shape error")
  930. self.weight = Parameter(initializer(
  931. weight_init, [out_channels, in_channels]), name="weight")
  932. if self.has_bias:
  933. if isinstance(bias_init, Tensor):
  934. if bias_init.dim() != 1 or bias_init.shape[0] != out_channels:
  935. raise ValueError("bias_init shape error")
  936. self.bias = Parameter(initializer(
  937. bias_init, [out_channels]), name="bias")
  938. self.matmul = P.MatMul(transpose_b=True)
  939. self.bias_add = P.BiasAdd()
  940. self.activation = get_activation(activation) if isinstance(activation, str) else activation
  941. if activation is not None and not isinstance(self.activation, (Cell, Primitive)):
  942. raise TypeError("The activation must be str or Cell or Primitive,"" but got {}.".format(activation))
  943. self.activation_flag = self.activation is not None
  944. self.fake_quant_weight = quant_config.weight(min_init=-6,
  945. max_init=6,
  946. ema=False,
  947. channel_axis=0,
  948. num_channels=out_channels,
  949. quant_dtype=quant_dtype)
  950. def construct(self, x):
  951. """Use operators to construct the Dense layer."""
  952. output = self.fake_quant_weight(self.weight)
  953. output = self.matmul(x, output)
  954. if self.has_bias:
  955. output = self.bias_add(output, self.bias)
  956. if self.activation_flag:
  957. return self.activation(output)
  958. return output
  959. def extend_repr(self):
  960. """A pretty print for Dense layer."""
  961. s = 'in_channels={}, out_channels={}, weight={}, has_bias={}'.format(
  962. self.in_channels, self.out_channels, self.weight, self.has_bias)
  963. if self.has_bias:
  964. s += ', bias={}'.format(self.bias)
  965. if self.activation_flag:
  966. s += ', activation={}'.format(self.activation)
  967. return s
  968. class _QuantActivation(Cell):
  969. r"""
  970. Base class for quantization aware training activation function. Add Fake Quant OP after activation OP.
  971. """
  972. def get_origin(self):
  973. raise NotImplementedError
  974. class ActQuant(_QuantActivation):
  975. r"""
  976. Quantization aware training activation function.
  977. Add the fake quant op to the end of activation op, by which the output of activation op will be truncated.
  978. Please check `FakeQuantWithMinMaxObserver` or other observer for more details.
  979. Args:
  980. activation (Cell): Activation cell.
  981. ema (bool): The exponential Moving Average algorithm updates min and max. Default: False.
  982. ema_decay (float): Exponential Moving Average algorithm parameter. Default: 0.999.
  983. fake_before (bool): Whether add fake quant operation before activation. Default: False.
  984. quant_config (QuantConfig): Configs the oberser types and quant configs of weight and activation. Default:
  985. both set to default FakeQuantWithMinMaxObserver.
  986. quant_dtype (QuantDtype): Specifies the FakeQuant datatype. Default: QuantDtype.INT8.
  987. Inputs:
  988. - **input** (Tensor) - The input of ReLU6Quant.
  989. Outputs:
  990. Tensor, with the same type and shape as the `input`.
  991. Supported Platforms:
  992. ``Ascend`` ``GPU``
  993. Examples:
  994. >>> qconfig = compression.quant.create_quant_config()
  995. >>> act_quant = nn.ActQuant(nn.ReLU(), quant_config=qconfig)
  996. >>> input = Tensor(np.array([[1, 2, -1], [-2, 0, -1]]), mindspore.float32)
  997. >>> output = act_quant(input)
  998. >>> print(output)
  999. [[0.9882355 1.9764705 0. ]
  1000. [0. 0. 0. ]]
  1001. """
  1002. def __init__(self,
  1003. activation,
  1004. ema=False,
  1005. ema_decay=0.999,
  1006. fake_before=False,
  1007. quant_config=quant_config_default,
  1008. quant_dtype=QuantDtype.INT8):
  1009. super(ActQuant, self).__init__()
  1010. self.act = Validator.check_isinstance("activation", activation, Cell)
  1011. self.fake_before = Validator.check_bool(fake_before, "fake_before")
  1012. if self.fake_before:
  1013. self.fake_quant_act_before = quant_config.activation(min_init=-6,
  1014. max_init=6,
  1015. ema=ema,
  1016. ema_decay=ema_decay,
  1017. quant_dtype=quant_dtype)
  1018. self.fake_quant_act = quant_config.activation(min_init=-6,
  1019. max_init=6,
  1020. ema=ema,
  1021. ema_decay=ema_decay,
  1022. quant_dtype=quant_dtype)
  1023. def construct(self, x):
  1024. if self.fake_before:
  1025. x = self.fake_quant_act_before(x)
  1026. x = self.act(x)
  1027. x = self.fake_quant_act(x)
  1028. return x
  1029. def get_origin(self):
  1030. return self.act
  1031. class TensorAddQuant(Cell):
  1032. r"""
  1033. Add Fake Quant OP after TensorAdd OP.
  1034. This part is a more detailed overview of TensorAdd op.
  1035. Args:
  1036. ema_decay (float): Exponential Moving Average algorithm parameter. Default: 0.999.
  1037. quant_config (QuantConfig): Configs the oberser types and quant configs of weight and activation. Default:
  1038. both set to default FakeQuantWithMinMaxObserver.
  1039. quant_dtype (QuantDtype): Specifies the FakeQuant datatype. Default: QuantDtype.INT8.
  1040. Inputs:
  1041. - **input_x1** (Tensor) - The first tensor of TensorAddQuant.
  1042. - **input_x2** (Tensor) - The second tensor of TensorAddQuant.
  1043. Outputs:
  1044. Tensor, with the same type and shape as the `input_x1`.
  1045. Supported Platforms:
  1046. ``Ascend`` ``GPU``
  1047. Examples:
  1048. >>> qconfig = compression.quant.create_quant_config()
  1049. >>> add_quant = nn.TensorAddQuant(quant_config=qconfig)
  1050. >>> input_x1 = Tensor(np.array([[1, 2, 1], [-2, 0, -1]]), mindspore.float32)
  1051. >>> input_x2 = Tensor(np.ones((2, 3)), mindspore.float32)
  1052. >>> output = add_quant(input_x1, input_x2)
  1053. >>> print(output)
  1054. [[ 1.9764705 3.011765 1.9764705]
  1055. [-0.9882355 0.9882355 0. ]]
  1056. """
  1057. def __init__(self,
  1058. ema_decay=0.999,
  1059. quant_config=quant_config_default,
  1060. quant_dtype=QuantDtype.INT8):
  1061. super(TensorAddQuant, self).__init__()
  1062. self.fake_quant_act = quant_config.activation(min_init=-6,
  1063. max_init=6,
  1064. ema=True,
  1065. ema_decay=ema_decay,
  1066. quant_dtype=quant_dtype)
  1067. self.add = P.TensorAdd()
  1068. def construct(self, x1, x2):
  1069. x = self.add(x1, x2)
  1070. x = self.fake_quant_act(x)
  1071. return x
  1072. class MulQuant(Cell):
  1073. r"""
  1074. Add Fake Quant OP after Mul OP.
  1075. This part is a more detailed overview of Mul op.
  1076. Args:
  1077. ema_decay (float): Exponential Moving Average algorithm parameter. Default: 0.999.
  1078. quant_config (QuantConfig): Configs the oberser types and quant configs of weight and activation. Default:
  1079. both set to default FakeQuantWithMinMaxObserver.
  1080. quant_dtype (QuantDtype): Specifies the FakeQuant datatype. Default: QuantDtype.INT8.
  1081. Inputs:
  1082. - **input_x1** (Tensor) - The first tensor of MulQuant.
  1083. - **input_x2** (Tensor) - The second tensor of MulQuant.
  1084. Outputs:
  1085. Tensor, with the same type and shape as the `input_x1`.
  1086. Supported Platforms:
  1087. ``Ascend`` ``GPU``
  1088. Examples:
  1089. >>> qconfig = compression.quant.create_quant_config()
  1090. >>> mul_quant = nn.MulQuant(quant_config=qconfig)
  1091. >>> input_x1 = Tensor(np.array([[1, 2, 1], [-2, 0, -1]]), mindspore.float32)
  1092. >>> input_x2 = Tensor(np.ones((2, 3)) * 2, mindspore.float32)
  1093. >>> output = mul_quant(input_x1, input_x2)
  1094. >>> print(output)
  1095. [[ 1.9764705 4.0000005 1.9764705]
  1096. [-4. 0. -1.9764705]]
  1097. """
  1098. def __init__(self,
  1099. ema_decay=0.999,
  1100. quant_config=quant_config_default,
  1101. quant_dtype=QuantDtype.INT8):
  1102. super(MulQuant, self).__init__()
  1103. self.fake_quant_act = quant_config.activation(min_init=-6,
  1104. max_init=6,
  1105. ema=True,
  1106. ema_decay=ema_decay,
  1107. quant_dtype=quant_dtype)
  1108. self.mul = P.Mul()
  1109. def construct(self, x1, x2):
  1110. x = self.mul(x1, x2)
  1111. x = self.fake_quant_act(x)
  1112. return x
  1113. class QuantBlock(Cell):
  1114. r"""
  1115. A quant block of Conv/Dense, activation layer for Ascend deploy.
  1116. Calculate Conv or Dense in Int8, with Quant and DeQuant.
  1117. Notes:
  1118. This block is only for deploy, and not trainable.
  1119. Args:
  1120. in_channels (int): The number of channels in the input space.
  1121. out_channels (int): The number of channels in the output space.
  1122. weight_init (Union[Tensor, str, Initializer, numbers.Number]): The trainable weight_init parameter. The dtype
  1123. is same as input x. The values of str refer to the function `initializer`. Default: 'normal'.
  1124. bias_init (Union[Tensor, str, Initializer, numbers.Number]): The trainable bias_init parameter. The dtype is
  1125. same as input x. The values of str refer to the function `initializer`. Default: 'zeros'.
  1126. has_bias (bool): Specifies whether the layer uses a bias vector. Default: True.
  1127. activation (str): The regularization function applied to the output of the layer, eg. 'relu'. Default: None.
  1128. batchnorm (bool): Specifies to used batchnorm or not. Default: None.
  1129. activation (string): Specifies activation type. The optional values are as following:
  1130. 'softmax', 'logsoftmax', 'relu', 'relu6', 'tanh', 'gelu', 'sigmoid',
  1131. 'prelu', 'leakyrelu', 'hswish', 'hsigmoid'. Default: None.
  1132. Inputs:
  1133. - **input** (Tensor) - Tensor of shape :math:`(N, in\_channels)`.
  1134. Outputs:
  1135. Tensor of shape :math:`(N, out\_channels)`.
  1136. """
  1137. def __init__(self,
  1138. core_op,
  1139. weight,
  1140. quant_op,
  1141. dequant_op,
  1142. dequant_scale,
  1143. bias=None,
  1144. activation=None):
  1145. super(QuantBlock, self).__init__()
  1146. self.core_op = core_op
  1147. self.weight = weight
  1148. self.quant = quant_op
  1149. self.dequant = dequant_op
  1150. self.dequant_scale = dequant_scale
  1151. self.bias = bias
  1152. self.has_bias = bias is not None
  1153. self.activation = activation
  1154. self.has_act = activation is not None
  1155. self.bias_add = P.BiasAdd()
  1156. def construct(self, x):
  1157. x = self.quant(x)
  1158. if self.has_bias:
  1159. x = self.core_op(x, self.weight)
  1160. x = self.bias_add(x, self.bias)
  1161. else:
  1162. x = self.core_op(x, self.weight)
  1163. x = self.dequant(x, self.dequant_scale)
  1164. x = F.cast(x, mstype.float32)
  1165. if self.has_act:
  1166. x = self.activation(x)
  1167. return x
  1168. def extend_repr(self):
  1169. s = f'quant={self.quant}, core_op={type(self.core_op)}, weight=shape[{self.weight.shape}]'
  1170. if self.has_bias:
  1171. s += f', bias=shape[{self.bias.shape}]'
  1172. if self.has_act:
  1173. s += f', activation={self.activation}'
  1174. s += f', dequant={self.dequant}'
  1175. return s
  1176. class QuantMindirBlock(Cell):
  1177. """A quant binary block of Conv/Dense, activation layer for export MINDIR model.
  1178. Args:
  1179. core_op (Cell): The operation cell.
  1180. weight (Tensor): The weigth of the cell.
  1181. bias (Tensor): The bias of the cell. Default: None.
  1182. activation (str): The regularization function applied to the output of the layer, eg. 'relu'. Default: None.
  1183. param_dict (dict): The information of the cell.
  1184. """
  1185. def __init__(self,
  1186. core_op,
  1187. weight,
  1188. bias=None,
  1189. activation=None,
  1190. param_dict=None):
  1191. super(QuantMindirBlock, self).__init__()
  1192. self.core_op = core_op
  1193. if activation is not None:
  1194. self.core_op.add_prim_attr("activation_name", activation.__class__.__name__)
  1195. self.core_op.add_prim_attr("filter_maxq", Tensor(param_dict["filter_maxq"]))
  1196. self.core_op.add_prim_attr("filter_minq", Tensor(param_dict["filter_minq"]))
  1197. if param_dict["output_maxq"] is not None:
  1198. self.core_op.add_prim_attr("output_maxq", Tensor(param_dict["output_maxq"]))
  1199. self.core_op.add_prim_attr("output_minq", Tensor(param_dict["output_minq"]))
  1200. self.core_op.add_prim_attr("symmetric", Tensor(param_dict["symmetric"]))
  1201. if hasattr(core_op, 'pad_mode'):
  1202. self.core_op.add_prim_attr("pad_mode", core_op.pad_mode)
  1203. self.core_op.add_prim_attr("num_bits", Tensor(8))
  1204. self.core_op.add_prim_attr("narrow_range", Tensor(False))
  1205. if param_dict["input_maxq"] == 'None':
  1206. self.core_op.add_prim_attr("mean", Tensor(param_dict["mean"]))
  1207. self.core_op.add_prim_attr("std_dev", Tensor(param_dict["std_dev"]))
  1208. elif param_dict["input_maxq"] is not None:
  1209. self.core_op.add_prim_attr("input_maxq", Tensor(param_dict["input_maxq"]))
  1210. self.core_op.add_prim_attr("input_minq", Tensor(param_dict["input_minq"]))
  1211. self.weight = weight
  1212. self.bias = bias
  1213. self.has_bias = bias is not None
  1214. self.activation = activation
  1215. self.has_act = activation is not None
  1216. self.bias_add = P.BiasAdd()
  1217. if isinstance(activation, ReLU):
  1218. self.activation = None
  1219. self.has_act = False
  1220. def construct(self, x):
  1221. if self.has_bias:
  1222. x = self.core_op(x, self.weight)
  1223. x = self.bias_add(x, self.bias)
  1224. else:
  1225. x = self.core_op(x, self.weight)
  1226. return x
  1227. def extend_repr(self):
  1228. s = f'core_op={type(self.core_op)}, weight=shape[{self.weight.shape}]'
  1229. if self.has_bias:
  1230. s += f', bias=shape[{self.bias.shape}]'
  1231. if self.has_act:
  1232. s += f', activation={self.activation}'
  1233. return s