You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

quant.py 54 kB

5 years ago
5 years ago
1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204
  1. # Copyright 2020 Huawei Technologies Co., Ltd
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. # ============================================================================
  15. """Quantization aware."""
  16. from functools import partial
  17. import numpy as np
  18. import mindspore.common.dtype as mstype
  19. from mindspore.ops import operations as P
  20. from mindspore.ops import functional as F
  21. from mindspore.common.parameter import Parameter
  22. from mindspore.common.initializer import initializer
  23. from mindspore.common.tensor import Tensor
  24. from mindspore._checkparam import check_int_positive, check_bool, twice
  25. from mindspore._checkparam import Validator as validator, Rel
  26. from mindspore.nn.cell import Cell
  27. from mindspore.nn.layer.activation import get_activation
  28. import mindspore.context as context
  29. from .normalization import BatchNorm2d
  30. from .activation import get_activation
  31. from ..cell import Cell
  32. from . import conv, basic
  33. from ..._checkparam import ParamValidator as validator
  34. from ...ops.operations import _quant_ops as Q
  35. __all__ = [
  36. 'Conv2dBnAct',
  37. 'DenseBnAct',
  38. 'FakeQuantWithMinMax',
  39. 'Conv2dBatchNormQuant',
  40. 'Conv2dQuant',
  41. 'DenseQuant',
  42. 'ReLUQuant',
  43. 'ReLU6Quant',
  44. 'HSwishQuant',
  45. 'HSigmoidQuant',
  46. 'TensorAddQuant',
  47. 'MulQuant',
  48. ]
  49. class Conv2dBnAct(Cell):
  50. r"""
  51. A combination of convolution, Batchnorm, activation layer.
  52. For a more Detailed overview of Conv2d op.
  53. Args:
  54. in_channels (int): The number of input channel :math:`C_{in}`.
  55. out_channels (int): The number of output channel :math:`C_{out}`.
  56. kernel_size (Union[int, tuple]): The data type is int or tuple with 2 integers. Specifies the height
  57. and width of the 2D convolution window. Single int means the value if for both height and width of
  58. the kernel. A tuple of 2 ints means the first value is for the height and the other is for the
  59. width of the kernel.
  60. stride (int): Specifies stride for all spatial dimensions with the same value. Value of stride should be
  61. greater or equal to 1 but bounded by the height and width of the input. Default: 1.
  62. pad_mode (str): Specifies padding mode. The optional values are "same", "valid", "pad". Default: "same".
  63. padding (int): Implicit paddings on both sides of the input. Default: 0.
  64. dilation (int): Specifying the dilation rate to use for dilated convolution. If set to be :math:`k > 1`,
  65. there will be :math:`k - 1` pixels skipped for each sampling location. Its value should be greater
  66. or equal to 1 and bounded by the height and width of the input. Default: 1.
  67. group (int): Split filter into groups, `in_ channels` and `out_channels` should be
  68. divisible by the number of groups. Default: 1.
  69. has_bias (bool): Specifies whether the layer uses a bias vector. Default: False.
  70. weight_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the convolution kernel.
  71. It can be a Tensor, a string, an Initializer or a numbers.Number. When a string is specified,
  72. values from 'TruncatedNormal', 'Normal', 'Uniform', 'HeUniform' and 'XavierUniform' distributions as well
  73. as constant 'One' and 'Zero' distributions are possible. Alias 'xavier_uniform', 'he_uniform', 'ones'
  74. and 'zeros' are acceptable. Uppercase and lowercase are both acceptable. Refer to the values of
  75. Initializer for more details. Default: 'normal'.
  76. bias_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the bias vector. Possible
  77. Initializer and string are the same as 'weight_init'. Refer to the values of
  78. Initializer for more details. Default: 'zeros'.
  79. batchnorm (bool): Specifies to used batchnorm or not. Default: None.
  80. activation (string): Specifies activation type. The optional values are as following:
  81. 'softmax', 'logsoftmax', 'relu', 'relu6', 'tanh', 'gelu', 'sigmoid',
  82. 'prelu', 'leakyrelu', 'hswish', 'hsigmoid'. Default: None.
  83. Inputs:
  84. - **input** (Tensor) - Tensor of shape :math:`(N, C_{in}, H_{in}, W_{in})`.
  85. Outputs:
  86. Tensor of shape :math:`(N, C_{out}, H_{out}, W_{out})`.
  87. Examples:
  88. >>> net = Conv2dBnAct(120, 240, 4, batchnorm=True, activation='ReLU')
  89. >>> input = Tensor(np.ones([1, 120, 1024, 640]), mindspore.float32)
  90. >>> net(input).shape
  91. (1, 240, 1024, 640)
  92. """
  93. def __init__(self,
  94. in_channels,
  95. out_channels,
  96. kernel_size,
  97. stride=1,
  98. pad_mode='same',
  99. padding=0,
  100. dilation=1,
  101. group=1,
  102. has_bias=False,
  103. weight_init='normal',
  104. bias_init='zeros',
  105. batchnorm=None,
  106. activation=None):
  107. super(Conv2dBnAct, self).__init__()
  108. self.conv = conv.Conv2d(
  109. in_channels,
  110. out_channels,
  111. kernel_size,
  112. stride,
  113. pad_mode,
  114. padding,
  115. dilation,
  116. group,
  117. has_bias,
  118. weight_init,
  119. bias_init)
  120. self.has_bn = batchnorm is not None
  121. self.has_act = activation is not None
  122. self.batchnorm = batchnorm
  123. if batchnorm is True:
  124. self.batchnorm = BatchNorm2d(out_channels)
  125. elif batchnorm is not None:
  126. validator.check_isinstance('batchnorm', batchnorm, (BatchNorm2d,))
  127. self.activation = get_activation(activation)
  128. def construct(self, x):
  129. x = self.conv(x)
  130. if self.has_bn:
  131. x = self.batchnorm(x)
  132. if self.has_act:
  133. x = self.activation(x)
  134. return x
  135. class DenseBnAct(Cell):
  136. r"""
  137. A combination of Dense, Batchnorm, activation layer.
  138. For a more Detailed overview of Dense op.
  139. Args:
  140. in_channels (int): The number of channels in the input space.
  141. out_channels (int): The number of channels in the output space.
  142. weight_init (Union[Tensor, str, Initializer, numbers.Number]): The trainable weight_init parameter. The dtype
  143. is same as input x. The values of str refer to the function `initializer`. Default: 'normal'.
  144. bias_init (Union[Tensor, str, Initializer, numbers.Number]): The trainable bias_init parameter. The dtype is
  145. same as input x. The values of str refer to the function `initializer`. Default: 'zeros'.
  146. has_bias (bool): Specifies whether the layer uses a bias vector. Default: True.
  147. activation (str): Regularizer function applied to the output of the layer, eg. 'relu'. Default: None.
  148. batchnorm (bool): Specifies to used batchnorm or not. Default: None.
  149. activation (string): Specifies activation type. The optional values are as following:
  150. 'softmax', 'logsoftmax', 'relu', 'relu6', 'tanh', 'gelu', 'sigmoid',
  151. 'prelu', 'leakyrelu', 'hswish', 'hsigmoid'. Default: None.
  152. Inputs:
  153. - **input** (Tensor) - Tensor of shape :math:`(N, in\_channels)`.
  154. Outputs:
  155. Tensor of shape :math:`(N, out\_channels)`.
  156. Examples:
  157. >>> net = nn.DenseBnAct(3, 4)
  158. >>> input = Tensor(np.random.randint(0, 255, [2, 3]), mindspore.float32)
  159. >>> net(input)
  160. """
  161. def __init__(self,
  162. in_channels,
  163. out_channels,
  164. weight_init='normal',
  165. bias_init='zeros',
  166. has_bias=True,
  167. batchnorm=None,
  168. activation=None):
  169. super(DenseBnAct, self).__init__()
  170. self.dense = basic.Dense(
  171. in_channels,
  172. out_channels,
  173. weight_init,
  174. bias_init,
  175. has_bias)
  176. self.has_bn = batchnorm is not None
  177. self.has_act = activation is not None
  178. if batchnorm is True:
  179. self.batchnorm = BatchNorm2d(out_channels)
  180. elif batchnorm is not None:
  181. validator.check_isinstance('batchnorm', batchnorm, (BatchNorm2d,))
  182. self.activation = get_activation(activation)
  183. def construct(self, x):
  184. x = self.dense(x)
  185. if self.has_bn:
  186. x = self.batchnorm(x)
  187. if self.has_act:
  188. x = self.activation(x)
  189. return x
  190. class BatchNormFoldCell(Cell):
  191. """
  192. Batch normalization folded.
  193. Args:
  194. momentum (float): Momentum value should be [0, 1]. Default: 0.9.
  195. epsilon (float): A small float number to avoid dividing by 0. 1e-5 if dtype in
  196. float32 else 1e-3. Default: 1e-5.
  197. freeze_bn (int): Delay in steps at which computation switches from regular batch
  198. norm to frozen mean and std. Default: 0.
  199. Inputs:
  200. - **x** (Tensor) - Tensor of shape :math:`(N, C, H, W)`.
  201. - **mean** (Tensor) - Tensor of shape :math:`(C,)`.
  202. - **variance** (Tensor) - Tensor of shape :math:`(C,)`.
  203. - **global_step** (Tensor) - Tensor to record current global step.
  204. Outputs:
  205. Tuple of 4 Tensor, the normalized input and the updated parameters.
  206. - **batch_mean** (Tensor) - Tensor of shape :math:`(C,)`.
  207. - **batch_std** (Tensor) - Tensor of shape :math:`(C,)`.
  208. - **running_mean** (Tensor) - Tensor of shape :math:`(C,)`.
  209. - **running_std** (Tensor) - Tensor of shape :math:`(C,)`.
  210. """
  211. def __init__(self, momentum=0.9, epsilon=1e-5, freeze_bn=0):
  212. """init batch norm fold layer"""
  213. super(BatchNormFoldCell, self).__init__()
  214. self.epsilon = epsilon
  215. self.is_gpu = context.get_context('device_target') == "GPU"
  216. if self.is_gpu:
  217. self.bn_train = Q.BatchNormFold(momentum, epsilon, is_training=True, freeze_bn=freeze_bn)
  218. self.bn_infer = Q.BatchNormFold(momentum, epsilon, is_training=False, freeze_bn=freeze_bn)
  219. else:
  220. self.bn_reduce = P.BNTrainingReduce()
  221. self.bn_update = Q.BatchNormFoldD(momentum, epsilon, is_training=True, freeze_bn=freeze_bn)
  222. def construct(self, x, mean, variance, global_step):
  223. if self.is_gpu:
  224. if self.training:
  225. batch_mean, batch_std, running_mean, running_std = self.bn_train(x, mean, variance, global_step)
  226. else:
  227. batch_mean, batch_std, running_mean, running_std = self.bn_infer(x, mean, variance, global_step)
  228. else:
  229. if self.training:
  230. x_sum, x_square_sum = self.bn_reduce(x)
  231. _, batch_mean, batch_std, running_mean, running_std, mean_updated, variance_updated = \
  232. self.bn_update(x, x_sum, x_square_sum, mean, variance)
  233. P.Assign()(mean, mean_updated)
  234. P.Assign()(variance, variance_updated)
  235. else:
  236. batch_mean = P.ZerosLike()(variance)
  237. batch_std = P.OnesLike()(variance)
  238. running_mean = P.TensorAdd()(mean, 0.)
  239. running_std = P.Sqrt()(P.TensorAdd()(variance, self.epsilon))
  240. return batch_mean, batch_std, running_mean, running_std
  241. class FakeQuantWithMinMax(Cell):
  242. r"""
  243. Quantization aware op. This OP provide Fake quantization observer function on data with min and max.
  244. Args:
  245. min_init (int, float): The dimension of channel or 1(layer). Default: -6.
  246. max_init (int, float): The dimension of channel or 1(layer). Default: 6.
  247. ema (bool): Exponential Moving Average algorithm update min and max. Default: False.
  248. ema_decay (float): Exponential Moving Average algorithm parameter. Default: 0.999.
  249. per_channel (bool): Quantization granularity based on layer or on channel. Default: False.
  250. channel_axis (int): Quantization by channel axis. Default: 1.
  251. num_channels (int): declarate the min and max channel size, Default: 1.
  252. num_bits (int): Quantization number bit, support 4 and 8bit. Default: 8.
  253. symmetric (bool): Quantization algorithm use symmetric or not. Default: False.
  254. narrow_range (bool): Quantization algorithm use narrow range or not. Default: False.
  255. quant_delay (int): Quantization delay parameters according by global step. Default: 0.
  256. Inputs:
  257. - **x** (Tensor) - The input of FakeQuantWithMinMax.
  258. Outputs:
  259. Tensor, with the same type and shape as the `x`.
  260. Examples:
  261. >>> fake_quant = FakeQuantWithMinMax()
  262. >>> input_x = Tensor(np.array([[1, 2, 1], [-2, 0, -1]]), mindspore.float32)
  263. >>> result = fake_quant(input_x)
  264. """
  265. def __init__(self,
  266. min_init=-6,
  267. max_init=6,
  268. ema=False,
  269. ema_decay=0.999,
  270. per_channel=False,
  271. channel_axis=1,
  272. num_channels=1,
  273. num_bits=8,
  274. symmetric=False,
  275. narrow_range=False,
  276. quant_delay=0):
  277. """init FakeQuantWithMinMax layer"""
  278. super(FakeQuantWithMinMax, self).__init__()
  279. self.min_init = min_init
  280. self.max_init = max_init
  281. self.num_bits = num_bits
  282. self.ema = ema
  283. self.ema_decay = ema_decay
  284. self.per_channel = per_channel
  285. self.num_channels = num_channels
  286. self.channel_axis = channel_axis
  287. self.quant_delay = quant_delay
  288. self.symmetric = symmetric
  289. self.narrow_range = narrow_range
  290. self.is_ascend = context.get_context('device_target') == "Ascend"
  291. # init tensor min and max for fake quant op
  292. if self.per_channel:
  293. min_array = np.array([self.min_init] * self.num_channels).astype(np.float32)
  294. max_array = np.array([self.max_init] * self.num_channels).astype(np.float32)
  295. else:
  296. min_array = np.array([self.min_init]).astype(np.float32)
  297. max_array = np.array([self.max_init]).astype(np.float32)
  298. self.minq = Parameter(Tensor(min_array), name='quant_min', requires_grad=False)
  299. self.maxq = Parameter(Tensor(max_array), name='quant_max', requires_grad=False)
  300. # init fake quant relative op
  301. if per_channel:
  302. quant_fun = partial(Q.FakeQuantPerChannel, channel_axis=self.channel_axis)
  303. ema_fun = partial(Q.MinMaxUpdatePerChannel, channel_axis=self.channel_axis)
  304. else:
  305. quant_fun = Q.FakeQuantPerLayer
  306. ema_fun = Q.MinMaxUpdatePerLayer
  307. self.ema_update = ema_fun(ema=self.ema, ema_decay=self.ema_decay)
  308. if self.is_ascend:
  309. self.fake_quant_train = quant_fun(num_bits=self.num_bits,
  310. symmetric=self.symmetric,
  311. narrow_range=self.narrow_range)
  312. self.fake_quant_infer = self.fake_quant_train
  313. else:
  314. quant_fun = partial(quant_fun,
  315. ema=self.ema,
  316. ema_decay=ema_decay,
  317. num_bits=self.num_bits,
  318. symmetric=self.symmetric,
  319. narrow_range=self.narrow_range,
  320. quant_delay=quant_delay)
  321. self.fake_quant_train = quant_fun(training=True)
  322. self.fake_quant_infer = quant_fun(training=False)
  323. def extend_repr(self):
  324. s = 'num_bits={}, symmetric={}, narrow_range={}, ema={}({}), per_channel={}({}, {}), ' \
  325. 'quant_delay={}, min_init={}, max_init={}'.format(
  326. self.num_bits, self.symmetric, self.narrow_range, self.ema, self.ema_decay, self.per_channel,
  327. self.channel_axis, self.num_channels, self.quant_delay, self.min_init, self.max_init)
  328. return s
  329. def construct(self, x):
  330. if self.training:
  331. min_up, max_up = self.ema_update(x, self.minq, self.maxq)
  332. P.Assign()(self.minq, min_up)
  333. P.Assign()(self.maxq, max_up)
  334. out = self.fake_quant_train(x, self.minq, self.maxq)
  335. else:
  336. out = self.fake_quant_infer(x, self.minq, self.maxq)
  337. return out
  338. class Conv2dBatchNormQuant(Cell):
  339. r"""
  340. 2D convolution with BatchNormal op folded layer.
  341. For a more Detailed overview of Conv2d op.
  342. Args:
  343. in_channels (int): The number of input channel :math:`C_{in}`.
  344. out_channels (int): The number of output channel :math:`C_{out}`.
  345. kernel_size (Union[int, tuple]): Specifies the height and width of the 2D convolution window.
  346. stride (int): Specifies stride for all spatial dimensions with the same value.
  347. pad_mode: (str): Specifies padding mode. The optional values are "same", "valid", "pad". Default: "same".
  348. padding: (int): Implicit paddings on both sides of the input. Default: 0.
  349. eps (float): Parameters for BatchNormal. Default: 1e-5.
  350. momentum (float): Parameters for BatchNormal op. Default: 0.997.
  351. weight_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the
  352. convolution kernel. Default: 'normal'.
  353. beta_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the
  354. beta vector. Default: 'zeros'.
  355. gamma_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the
  356. gamma vector. Default: 'ones'.
  357. mean_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the
  358. mean vector. Default: 'zeros'.
  359. var_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the
  360. variance vector. Default: 'ones'.
  361. fake (bool): Conv2dBatchNormQuant Cell add FakeQuantWithMinMax op or not. Default: True.
  362. per_channel (bool): FakeQuantWithMinMax Parameters. Default: False.
  363. num_bits (int): Quantization number bit, support 4 and 8bit. Default: 8.
  364. symmetric (bool): Quantization algorithm use symmetric or not. Default: False.
  365. narrow_range (bool): Quantization algorithm use narrow range or not. Default: False.
  366. quant_delay (int): Quantization delay parameters according by global step. Default: 0.
  367. freeze_bn (int): Quantization freeze BatchNormal op according by global step. Default: 100000.
  368. Inputs:
  369. - **x** (Tensor) - Tensor of shape :math:`(N, C_{in}, H_{in}, W_{in})`.
  370. Outputs:
  371. Tensor of shape :math:`(N, C_{out}, H_{out}, W_{out})`.
  372. Examples:
  373. >>> batchnorm_quant = nn.Conv2dBatchNormQuant(1, 6, kernel_size= (2, 2), stride=(1, 1), pad_mode="valid",
  374. >>> dilation=(1, 1))
  375. >>> input_x = Tensor(np.random.randint(-2, 2, (2, 1, 1, 3)), mindspore.float32)
  376. >>> result = batchnorm_quant(input_x)
  377. """
  378. def __init__(self,
  379. in_channels,
  380. out_channels,
  381. kernel_size,
  382. stride=1,
  383. pad_mode='same',
  384. padding=0,
  385. dilation=1,
  386. group=1,
  387. eps=1e-5,
  388. momentum=0.997,
  389. weight_init='normal',
  390. beta_init='zeros',
  391. gamma_init='ones',
  392. mean_init='zeros',
  393. var_init='ones',
  394. fake=True,
  395. per_channel=False,
  396. num_bits=8,
  397. symmetric=False,
  398. narrow_range=False,
  399. quant_delay=0,
  400. freeze_bn=100000):
  401. """init Conv2dBatchNormQuant layer"""
  402. super(Conv2dBatchNormQuant, self).__init__()
  403. self.in_channels = in_channels
  404. self.out_channels = out_channels
  405. self.kernel_size = twice(kernel_size)
  406. self.stride = twice(stride)
  407. self.pad_mode = pad_mode
  408. self.padding = padding
  409. self.dilation = twice(dilation)
  410. self.group = group
  411. self.eps = eps
  412. self.momentum = momentum
  413. self.quant_delay = quant_delay
  414. self.freeze_bn = freeze_bn
  415. self.fake = fake
  416. self.num_bits = num_bits
  417. self.per_channel = per_channel
  418. self.symmetric = symmetric
  419. self.narrow_range = narrow_range
  420. self.is_gpu = context.get_context('device_target') == "GPU"
  421. # initialize convolution op and Parameter
  422. if context.get_context('device_target') == "Ascend" and group > 1:
  423. validator.check_integer('group', group, in_channels, Rel.EQ)
  424. validator.check_integer('group', group, out_channels, Rel.EQ)
  425. self.conv = P.DepthwiseConv2dNative(channel_multiplier=1,
  426. kernel_size=self.kernel_size,
  427. pad_mode=pad_mode,
  428. pad=padding,
  429. stride=self.stride,
  430. dilation=self.dilation)
  431. weight_shape = [1, in_channels, *self.kernel_size]
  432. channel_axis = 1
  433. else:
  434. self.conv = P.Conv2D(out_channel=out_channels,
  435. kernel_size=self.kernel_size,
  436. pad_mode=pad_mode,
  437. pad=padding,
  438. stride=self.stride,
  439. dilation=self.dilation,
  440. group=group)
  441. weight_shape = [out_channels, in_channels // group, *self.kernel_size]
  442. channel_axis = 0
  443. self.weight = Parameter(initializer(weight_init, weight_shape), name='weight')
  444. # initialize batchnorm Parameter
  445. self.gamma = Parameter(initializer(gamma_init, [out_channels]), name='gamma')
  446. self.beta = Parameter(initializer(beta_init, [out_channels]), name='beta')
  447. self.moving_mean = Parameter(initializer(mean_init, [out_channels]), name='moving_mean', requires_grad=False)
  448. self.moving_variance = Parameter(initializer(var_init, [out_channels]), name='moving_variance',
  449. requires_grad=False)
  450. # initialize fake ops
  451. self.fake_quant_weight = FakeQuantWithMinMax(min_init=-6,
  452. max_init=6,
  453. ema=False,
  454. per_channel=per_channel,
  455. channel_axis=channel_axis,
  456. num_channels=out_channels,
  457. num_bits=num_bits,
  458. symmetric=symmetric,
  459. narrow_range=narrow_range,
  460. quant_delay=quant_delay)
  461. self.batchnorm_fold = BatchNormFoldCell(epsilon=eps, momentum=momentum, freeze_bn=freeze_bn)
  462. self.correct_mul = Q.CorrectionMul(channel_axis)
  463. if context.get_context('device_target') == "Ascend":
  464. self.batchnorm_fold2_train = Q.BatchNormFold2_D(freeze_bn=freeze_bn)
  465. self.batchnorm_fold2_infer = Q.BatchNormFold2_D(freeze_bn=0)
  466. elif context.get_context('device_target') == "GPU":
  467. self.batchnorm_fold2_train = Q.BatchNormFold2(freeze_bn=freeze_bn)
  468. self.batchnorm_fold2_infer = Q.BatchNormFold2(freeze_bn=0)
  469. else:
  470. raise ValueError("Unsupported platform: {}".format(context.get_context('device_target')))
  471. self.step = Parameter(initializer('normal', [1], dtype=mstype.int32), name='step', requires_grad=False)
  472. self.one = Tensor(1, mstype.int32)
  473. self.assignadd = P.AssignAdd()
  474. def extend_repr(self):
  475. s = 'in_channels={}, out_channels={}, kernel_size={}, stride={}, ' \
  476. 'pad_mode={}, padding={}, dilation={}, group={}, ' \
  477. 'fake={}, freeze_bn={}, momentum={}, quant_delay={}'.format(
  478. self.in_channels, self.out_channels, self.kernel_size, self.stride,
  479. self.pad_mode, self.padding, self.dilation, self.group,
  480. self.fake, self.freeze_bn, self.momentum, self.quant_delay)
  481. return s
  482. def construct(self, x):
  483. out_conv = self.conv(x, self.weight)
  484. # BN fold1
  485. batch_mean, batch_std, running_mean, running_std = self.batchnorm_fold(out_conv,
  486. self.moving_mean,
  487. self.moving_variance,
  488. self.step)
  489. # fake weight
  490. weight = self.correct_mul(self.weight, self.gamma, running_std)
  491. if self.fake:
  492. weight = self.fake_quant_weight(weight)
  493. out = self.conv(x, weight)
  494. # BN fold2
  495. if self.is_gpu:
  496. if self.training:
  497. out = self.batchnorm_fold2_train(out, self.beta, self.gamma,
  498. batch_std, batch_mean, running_std, running_mean, self.step)
  499. F.control_depend(out, self.assignadd(self.step, self.one))
  500. else:
  501. out = self.batchnorm_fold2_infer(out, self.beta, self.gamma,
  502. batch_std, batch_mean, running_std, running_mean, self.step)
  503. else:
  504. if self.training:
  505. out = self.batchnorm_fold2_train(out, self.beta, self.gamma, batch_std, batch_mean, running_std)
  506. F.control_depend(out, self.assignadd(self.step, self.one))
  507. else:
  508. out = self.batchnorm_fold2_infer(out, self.beta, self.gamma, running_std, running_mean, running_std)
  509. return out
  510. class Conv2dQuant(Cell):
  511. r"""
  512. 2D convolution with fake quant op layer.
  513. For a more Detailed overview of Conv2d op.
  514. Args:
  515. in_channels (int): The number of input channel :math:`C_{in}`.
  516. out_channels (int): The number of output channel :math:`C_{out}`.
  517. kernel_size (Union[int, tuple]): Specifies the height and width of the 2D convolution window.
  518. stride (int): Specifies stride for all spatial dimensions with the same value. Default: 1.
  519. pad_mode: (str): Specifies padding mode. The optional values are "same", "valid", "pad". Default: "same".
  520. padding: (int): Implicit paddings on both sides of the input. Default: 0.
  521. dilation (int): Specifying the dilation rate to use for dilated convolution. Default: 1.
  522. group (int): Split filter into groups, `in_ channels` and `out_channels` should be
  523. divisible by the number of groups. Default: 1.
  524. has_bias (bool): Specifies whether the layer uses a bias vector. Default: False.
  525. weight_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the convolution kernel.
  526. Default: 'normal'.
  527. bias_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the bias vector. Default: 'zeros'.
  528. per_channel (bool): FakeQuantWithMinMax Parameters. Default: False.
  529. num_bits (int): Quantization number bit, support 4 and 8bit. Default: 8.
  530. symmetric (bool): Quantization algorithm use symmetric or not. Default: False.
  531. narrow_range (bool): Quantization algorithm use narrow range or not. Default: False.
  532. quant_delay (int): Quantization delay parameters according by global step. Default: 0.
  533. Inputs:
  534. - **x** (Tensor) - Tensor of shape :math:`(N, C_{in}, H_{in}, W_{in})`.
  535. Outputs:
  536. Tensor of shape :math:`(N, C_{out}, H_{out}, W_{out})`.
  537. Examples:
  538. >>> conv2d_quant = nn.Conv2dQuant(1, 6, kernel_size= (2, 2), stride=(1, 1), pad_mode="valid",
  539. >>> dilation=(1, 1))
  540. >>> input_x = Tensor(np.random.randint(-2, 2, (2, 1, 1, 3)), mindspore.float32)
  541. >>> result = conv2d_quant(input_x)
  542. """
  543. def __init__(self,
  544. in_channels,
  545. out_channels,
  546. kernel_size,
  547. stride=1,
  548. pad_mode='same',
  549. padding=0,
  550. dilation=1,
  551. group=1,
  552. has_bias=False,
  553. weight_init='normal',
  554. bias_init='zeros',
  555. per_channel=False,
  556. num_bits=8,
  557. symmetric=False,
  558. narrow_range=False,
  559. quant_delay=0):
  560. super(Conv2dQuant, self).__init__()
  561. if isinstance(kernel_size, int):
  562. self.kernel_size = (kernel_size, kernel_size)
  563. else:
  564. self.kernel_size = kernel_size
  565. self.in_channels = check_int_positive(in_channels)
  566. self.out_channels = check_int_positive(out_channels)
  567. self.has_bias = has_bias
  568. self.stride = twice(stride)
  569. self.dilation = twice(dilation)
  570. self.pad_mode = pad_mode
  571. self.padding = padding
  572. self.group = group
  573. self.quant_delay = quant_delay
  574. weight_shape = [out_channels, in_channels // group, *self.kernel_size]
  575. self.weight = Parameter(initializer(weight_init, weight_shape), name='weight')
  576. self.bias_add = P.BiasAdd()
  577. if check_bool(has_bias):
  578. self.bias = Parameter(initializer(bias_init, [out_channels]), name='bias')
  579. else:
  580. self.bias = None
  581. self.conv = P.Conv2D(out_channel=self.out_channels,
  582. kernel_size=self.kernel_size,
  583. mode=1,
  584. pad_mode=self.pad_mode,
  585. pad=self.padding,
  586. stride=self.stride,
  587. dilation=self.dilation,
  588. group=self.group)
  589. self.fake_quant_weight = FakeQuantWithMinMax(min_init=-6,
  590. max_init=6,
  591. ema=False,
  592. per_channel=per_channel,
  593. channel_axis=0,
  594. num_channels=out_channels,
  595. num_bits=num_bits,
  596. symmetric=symmetric,
  597. narrow_range=narrow_range,
  598. quant_delay=quant_delay)
  599. def construct(self, x):
  600. weight = self.fake_quant_weight(self.weight)
  601. out = self.conv(x, weight)
  602. if self.has_bias:
  603. return self.bias_add(out, self.bias)
  604. return out
  605. def extend_repr(self):
  606. s = 'in_channels={}, out_channels={}, kernel_size={}, stride={}, ' \
  607. 'pad_mode={}, padding={}, dilation={}, group={}, ' \
  608. 'has_bias={}, quant_delay={}'.format(
  609. self.in_channels, self.out_channels, self.kernel_size, self.stride,
  610. self.pad_mode, self.padding, self.dilation, self.group,
  611. self.has_bias, self.quant_delay)
  612. return s
  613. class DenseQuant(Cell):
  614. r"""
  615. The fully connected layer with fake quant op.
  616. For a more Detailed overview of Dense op.
  617. Args:
  618. in_channels (int): The dimension of the input space.
  619. out_channels (int): The dimension of the output space.
  620. weight_init (Union[Tensor, str, Initializer, numbers.Number]): The trainable weight_init parameter. The dtype
  621. is same as input x. The values of str refer to the function `initializer`. Default: 'normal'.
  622. bias_init (Union[Tensor, str, Initializer, numbers.Number]): The trainable bias_init parameter. The dtype is
  623. same as input x. The values of str refer to the function `initializer`. Default: 'zeros'.
  624. has_bias (bool): Specifies whether the layer uses a bias vector. Default: True.
  625. activation (str): Regularizer function applied to the output of the layer, eg. 'relu'. Default: None.
  626. per_channel (bool): FakeQuantWithMinMax Parameters. Default: False.
  627. num_bits (int): Quantization number bit, support 4 and 8bit. Default: 8.
  628. symmetric (bool): Quantization algorithm use symmetric or not. Default: False.
  629. narrow_range (bool): Quantization algorithm use narrow range or not. Default: False.
  630. quant_delay (int): Quantization delay parameters according by global step. Default: 0.
  631. Inputs:
  632. - **x** (Tensor) - Tensor of shape :math:`(N, C_{in}, H_{in}, W_{in})`.
  633. Outputs:
  634. Tensor of shape :math:`(N, C_{out}, H_{out}, W_{out})`.
  635. Examples:
  636. >>> dense_quant = nn.DenseQuant(3, 6)
  637. >>> input_x = Tensor(np.random.randint(-2, 2, (2, 3)), mindspore.float32)
  638. >>> result = dense_quant(input_x)
  639. """
  640. def __init__(
  641. self,
  642. in_channels,
  643. out_channels,
  644. weight_init='normal',
  645. bias_init='zeros',
  646. has_bias=True,
  647. activation=None,
  648. per_channel=False,
  649. num_bits=8,
  650. symmetric=False,
  651. narrow_range=False,
  652. quant_delay=0):
  653. super(DenseQuant, self).__init__()
  654. self.in_channels = check_int_positive(in_channels)
  655. self.out_channels = check_int_positive(out_channels)
  656. self.has_bias = check_bool(has_bias)
  657. if isinstance(weight_init, Tensor):
  658. if weight_init.dim() != 2 or weight_init.shape()[0] != out_channels or \
  659. weight_init.shape()[1] != in_channels:
  660. raise ValueError("weight_init shape error")
  661. self.weight = Parameter(initializer(
  662. weight_init, [out_channels, in_channels]), name="weight")
  663. if self.has_bias:
  664. if isinstance(bias_init, Tensor):
  665. if bias_init.dim() != 1 or bias_init.shape()[0] != out_channels:
  666. raise ValueError("bias_init shape error")
  667. self.bias = Parameter(initializer(
  668. bias_init, [out_channels]), name="bias")
  669. self.matmul = P.MatMul(transpose_b=True)
  670. self.bias_add = P.BiasAdd()
  671. self.activation = get_activation(activation)
  672. self.activation_flag = self.activation is not None
  673. self.fake_quant_weight = FakeQuantWithMinMax(min_init=-6,
  674. max_init=6,
  675. ema=False,
  676. per_channel=per_channel,
  677. channel_axis=0,
  678. num_channels=out_channels,
  679. num_bits=num_bits,
  680. symmetric=symmetric,
  681. narrow_range=narrow_range,
  682. quant_delay=quant_delay)
  683. def construct(self, x):
  684. """Use operators to construct to Dense layer."""
  685. output = self.fake_quant_weight(self.weight)
  686. output = self.matmul(x, output)
  687. if self.has_bias:
  688. output = self.bias_add(output, self.bias)
  689. if self.activation_flag:
  690. return self.activation(output)
  691. return output
  692. def extend_repr(self):
  693. """A pretty print for Dense layer."""
  694. str_info = 'in_channels={}, out_channels={}, weight={}, has_bias={}'.format(
  695. self.in_channels, self.out_channels, self.weight, self.has_bias)
  696. if self.has_bias:
  697. str_info = str_info + ', bias={}'.format(self.bias)
  698. if self.activation_flag:
  699. str_info = str_info + ', activation={}'.format(self.activation)
  700. return str_info
  701. class _QuantActivation(Cell):
  702. r"""
  703. Base class for Quant activation function. Add Fake Quant OP after activation OP.
  704. """
  705. def get_origin(self):
  706. raise NotImplementedError
  707. class ReLUQuant(_QuantActivation):
  708. r"""
  709. ReLUQuant activation function. Add Fake Quant OP after Relu OP.
  710. For a more Detailed overview of ReLU op.
  711. Args:
  712. ema_decay (float): Exponential Moving Average algorithm parameter. Default: 0.999.
  713. per_channel (bool): Quantization granularity based on layer or on channel. Default: False.
  714. num_bits (int): Quantization number bit, support 4 and 8bit. Default: 8.
  715. symmetric (bool): Quantization algorithm use symmetric or not. Default: False.
  716. narrow_range (bool): Quantization algorithm use narrow range or not. Default: False.
  717. quant_delay (int): Quantization delay parameters according by global step. Default: 0.
  718. Inputs:
  719. - **x** (Tensor) - The input of ReLUQuant.
  720. Outputs:
  721. Tensor, with the same type and shape as the `x`.
  722. Examples:
  723. >>> relu_quant = nn.ReLUQuant()
  724. >>> input_x = Tensor(np.array([[1, 2, 0], [-1, -2, 1]]), mindspore.float32)
  725. >>> result = relu_quant(input_x)
  726. """
  727. def __init__(self,
  728. ema_decay=0.999,
  729. per_channel=False,
  730. num_bits=8,
  731. symmetric=False,
  732. narrow_range=False,
  733. quant_delay=0):
  734. super(ReLUQuant, self).__init__()
  735. self.fake_quant_act = FakeQuantWithMinMax(min_init=0,
  736. max_init=6,
  737. ema=True,
  738. ema_decay=ema_decay,
  739. per_channel=per_channel,
  740. num_bits=num_bits,
  741. symmetric=symmetric,
  742. narrow_range=narrow_range,
  743. quant_delay=quant_delay)
  744. self.relu = P.ReLU()
  745. def construct(self, x):
  746. x = self.relu(x)
  747. x = self.fake_quant_act(x)
  748. return x
  749. def get_origin(self):
  750. return self.relu
  751. class ReLU6Quant(_QuantActivation):
  752. r"""
  753. ReLU6Quant activation function.
  754. Add Fake Quant OP after Relu6. Not Recommand to used these cell for Fake Quant Op
  755. Will climp the max range of the activation and the relu6 do the same operation.
  756. For a more Detailed overview of ReLU6 op.
  757. Args:
  758. ema_decay (float): Exponential Moving Average algorithm parameter. Default: 0.999.
  759. per_channel (bool): Quantization granularity based on layer or on channel. Default: False.
  760. num_bits (int): Quantization number bit, support 4 and 8bit. Default: 8.
  761. symmetric (bool): Quantization algorithm use symmetric or not. Default: False.
  762. narrow_range (bool): Quantization algorithm use narrow range or not. Default: False.
  763. quant_delay (int): Quantization delay parameters according by global step. Default: 0.
  764. Inputs:
  765. - **x** (Tensor) - The input of ReLU6Quant.
  766. Outputs:
  767. Tensor, with the same type and shape as the `x`.
  768. Examples:
  769. >>> relu6_quant = nn.ReLU6Quant(4, 1)
  770. >>> input_x = Tensor(np.array([[1, 2, -1], [-2, 0, -1]]), mindspore.float32)
  771. >>> result = relu6_quant(input_x)
  772. """
  773. def __init__(self,
  774. ema_decay=0.999,
  775. per_channel=False,
  776. num_bits=8,
  777. symmetric=False,
  778. narrow_range=False,
  779. quant_delay=0):
  780. super(ReLU6Quant, self).__init__()
  781. self.fake_quant_act = FakeQuantWithMinMax(min_init=0,
  782. max_init=6,
  783. ema=True,
  784. ema_decay=ema_decay,
  785. per_channel=per_channel,
  786. num_bits=num_bits,
  787. symmetric=symmetric,
  788. narrow_range=narrow_range,
  789. quant_delay=quant_delay)
  790. self.relu6 = P.ReLU6()
  791. def construct(self, x):
  792. x = self.relu6(x)
  793. x = self.fake_quant_act(x)
  794. return x
  795. def get_origin(self):
  796. return self.relu6
  797. class HSwishQuant(_QuantActivation):
  798. r"""
  799. HSwishQuant activation function. Add Fake Quant OP after HSwish OP.
  800. For a more Detailed overview of HSwish op.
  801. Args:
  802. ema_decay (float): Exponential Moving Average algorithm parameter. Default: 0.999.
  803. per_channel (bool): Quantization granularity based on layer or on channel. Default: False.
  804. num_bits (int): Quantization number bit, support 4 and 8bit. Default: 8.
  805. symmetric (bool): Quantization algorithm use symmetric or not. Default: False.
  806. narrow_range (bool): Quantization algorithm use narrow range or not. Default: False.
  807. quant_delay (int): Quantization delay parameters according by global step. Default: 0.
  808. Inputs:
  809. - **x** (Tensor) - The input of HSwishQuant.
  810. Outputs:
  811. Tensor, with the same type and shape as the `x`.
  812. Examples:
  813. >>> hswish_quant = nn.HSwishQuant(4, 1)
  814. >>> input_x = Tensor(np.array([[1, 2, 1], [-2, 0, -1]]), mindspore.float32)
  815. >>> result = hswish_quant(input_x)
  816. """
  817. def __init__(self,
  818. ema_decay=0.999,
  819. per_channel=False,
  820. num_bits=8,
  821. symmetric=False,
  822. narrow_range=False,
  823. quant_delay=0):
  824. super(HSwishQuant, self).__init__()
  825. self.fake_quant_act_before = FakeQuantWithMinMax(min_init=-6,
  826. max_init=6,
  827. ema=True,
  828. ema_decay=ema_decay,
  829. per_channel=per_channel,
  830. num_bits=num_bits,
  831. symmetric=symmetric,
  832. narrow_range=narrow_range,
  833. quant_delay=quant_delay)
  834. self.fake_quant_act_after = FakeQuantWithMinMax(min_init=-6,
  835. max_init=6,
  836. ema=True,
  837. ema_decay=ema_decay,
  838. per_channel=per_channel,
  839. num_bits=num_bits,
  840. symmetric=symmetric,
  841. narrow_range=narrow_range,
  842. quant_delay=quant_delay)
  843. self.act = P.HSwish()
  844. def construct(self, x):
  845. x = self.fake_quant_act_before(x)
  846. x = self.act(x)
  847. x = self.fake_quant_act_after(x)
  848. return x
  849. def get_origin(self):
  850. return self.act
  851. class HSigmoidQuant(_QuantActivation):
  852. r"""
  853. HSigmoidQuant activation function. Add Fake Quant OP before and after HSigmoid OP.
  854. For a more Detailed overview of HSigmoid op.
  855. Args:
  856. ema_decay (float): Exponential Moving Average algorithm parameter. Default: 0.999.
  857. per_channel (bool): Quantization granularity based on layer or on channel. Default: False.
  858. num_bits (int): Quantization number bit, support 4 and 8bit. Default: 8.
  859. symmetric (bool): Quantization algorithm use symmetric or not. Default: False.
  860. narrow_range (bool): Quantization algorithm use narrow range or not. Default: False.
  861. quant_delay (int): Quantization delay parameters according by global step. Default: 0.
  862. Inputs:
  863. - **x** (Tensor) - The input of HSigmoidQuant.
  864. Outputs:
  865. Tensor, with the same type and shape as the `x`.
  866. Examples:
  867. >>> hsigmoid_quant = nn.HSigmoidQuant(4, 1)
  868. >>> input_x = Tensor(np.array([[1, 2, 1], [-2, 0, -1]]), mindspore.float32)
  869. >>> result = hsigmoid_quant(input_x)
  870. """
  871. def __init__(self,
  872. ema_decay=0.999,
  873. per_channel=False,
  874. num_bits=8,
  875. symmetric=False,
  876. narrow_range=False,
  877. quant_delay=0):
  878. super(HSigmoidQuant, self).__init__()
  879. self.fake_quant_act_before = FakeQuantWithMinMax(min_init=-6,
  880. max_init=6,
  881. ema=True,
  882. ema_decay=ema_decay,
  883. per_channel=per_channel,
  884. num_bits=num_bits,
  885. symmetric=symmetric,
  886. narrow_range=narrow_range,
  887. quant_delay=quant_delay)
  888. self.fake_quant_act_after = FakeQuantWithMinMax(min_init=-6,
  889. max_init=6,
  890. ema=True,
  891. ema_decay=ema_decay,
  892. per_channel=per_channel,
  893. num_bits=num_bits,
  894. symmetric=symmetric,
  895. narrow_range=narrow_range,
  896. quant_delay=quant_delay)
  897. self.act = P.HSigmoid()
  898. def construct(self, x):
  899. x = self.fake_quant_act_before(x)
  900. x = self.act(x)
  901. x = self.fake_quant_act_after(x)
  902. return x
  903. def get_origin(self):
  904. return self.act
  905. class TensorAddQuant(Cell):
  906. r"""
  907. Add Fake Quant OP after TensorAdd OP.
  908. For a more Detailed overview of TensorAdd op.
  909. Args:
  910. ema_decay (float): Exponential Moving Average algorithm parameter. Default: 0.999.
  911. per_channel (bool): Quantization granularity based on layer or on channel. Default: False.
  912. num_bits (int): Quantization number bit, support 4 and 8bit. Default: 8.
  913. symmetric (bool): Quantization algorithm use symmetric or not. Default: False.
  914. narrow_range (bool): Quantization algorithm use narrow range or not. Default: False.
  915. quant_delay (int): Quantization delay parameters according by global step. Default: 0.
  916. Inputs:
  917. - **x** (Tensor) - The input of TensorAddQuant.
  918. Outputs:
  919. Tensor, with the same type and shape as the `x`.
  920. Examples:
  921. >>> add_quant = nn.TensorAddQuant()
  922. >>> input_x = Tensor(np.array([[1, 2, 1], [-2, 0, -1]]), mindspore.float32)
  923. >>> input_y = Tensor(np.random.randint(-2, 2, (2, 3)), mindspore.float32)
  924. >>> result = add_quant(input_x, input_y)
  925. """
  926. def __init__(self,
  927. ema_decay=0.999,
  928. per_channel=False,
  929. num_bits=8,
  930. symmetric=False,
  931. narrow_range=False,
  932. quant_delay=0):
  933. super(TensorAddQuant, self).__init__()
  934. self.fake_quant_act = FakeQuantWithMinMax(min_init=-6,
  935. max_init=6,
  936. ema=True,
  937. ema_decay=ema_decay,
  938. per_channel=per_channel,
  939. num_bits=num_bits,
  940. symmetric=symmetric,
  941. narrow_range=narrow_range,
  942. quant_delay=quant_delay)
  943. self.add = P.TensorAdd()
  944. def construct(self, x1, x2):
  945. x = self.add(x1, x2)
  946. x = self.fake_quant_act(x)
  947. return x
  948. class MulQuant(Cell):
  949. r"""
  950. Add Fake Quant OP after Mul OP.
  951. For a more Detailed overview of Mul op.
  952. Args:
  953. ema_decay (float): Exponential Moving Average algorithm parameter. Default: 0.999.
  954. per_channel (bool): Quantization granularity based on layer or on channel. Default: False.
  955. num_bits (int): Quantization number bit, support 4 and 8bit. Default: 8.
  956. symmetric (bool): Quantization algorithm use symmetric or not. Default: False.
  957. narrow_range (bool): Quantization algorithm use narrow range or not. Default: False.
  958. quant_delay (int): Quantization delay parameters according by global step. Default: 0.
  959. Inputs:
  960. - **x** (Tensor) - The input of MulQuant.
  961. Outputs:
  962. Tensor, with the same type and shape as the `x`.
  963. """
  964. def __init__(self,
  965. ema_decay=0.999,
  966. per_channel=False,
  967. num_bits=8,
  968. symmetric=False,
  969. narrow_range=False,
  970. quant_delay=0):
  971. super(MulQuant, self).__init__()
  972. self.fake_quant_act = FakeQuantWithMinMax(min_init=-6,
  973. max_init=6,
  974. ema=True,
  975. ema_decay=ema_decay,
  976. per_channel=per_channel,
  977. num_bits=num_bits,
  978. symmetric=symmetric,
  979. narrow_range=narrow_range,
  980. quant_delay=quant_delay)
  981. self.mul = P.Mul()
  982. def construct(self, x1, x2):
  983. x = self.mul(x1, x2)
  984. x = self.fake_quant_act(x)
  985. return x
  986. class QuantBlock(Cell):
  987. r"""
  988. A quant block of Conv/Dense, activation layer for Ascend deploy.
  989. Calculate Conv or Dense in Int8, with AscendQuant and AscendDeQuant.
  990. Notes:
  991. This block is only for deploy, and not trainable.
  992. Args:
  993. in_channels (int): The number of channels in the input space.
  994. out_channels (int): The number of channels in the output space.
  995. weight_init (Union[Tensor, str, Initializer, numbers.Number]): The trainable weight_init parameter. The dtype
  996. is same as input x. The values of str refer to the function `initializer`. Default: 'normal'.
  997. bias_init (Union[Tensor, str, Initializer, numbers.Number]): The trainable bias_init parameter. The dtype is
  998. same as input x. The values of str refer to the function `initializer`. Default: 'zeros'.
  999. has_bias (bool): Specifies whether the layer uses a bias vector. Default: True.
  1000. activation (str): Regularizer function applied to the output of the layer, eg. 'relu'. Default: None.
  1001. batchnorm (bool): Specifies to used batchnorm or not. Default: None.
  1002. activation (string): Specifies activation type. The optional values are as following:
  1003. 'softmax', 'logsoftmax', 'relu', 'relu6', 'tanh', 'gelu', 'sigmoid',
  1004. 'prelu', 'leakyrelu', 'hswish', 'hsigmoid'. Default: None.
  1005. Inputs:
  1006. - **input** (Tensor) - Tensor of shape :math:`(N, in\_channels)`.
  1007. Outputs:
  1008. Tensor of shape :math:`(N, out\_channels)`.
  1009. Examples:
  1010. >>> net = nn.Dense(3, 4)
  1011. >>> input = Tensor(np.random.randint(0, 255, [2, 3]), mindspore.float32)
  1012. >>> net(input)
  1013. """
  1014. def __init__(self,
  1015. core_op,
  1016. weight,
  1017. quant_op,
  1018. dequant_op,
  1019. dequant_scale,
  1020. bias=None,
  1021. activation=None):
  1022. super(QuantBlock, self).__init__()
  1023. self.core_op = core_op
  1024. self.weight = weight
  1025. self.quant = quant_op
  1026. self.dequant = dequant_op
  1027. self.dequant_scale = dequant_scale
  1028. self.bias = bias
  1029. self.has_bias = bias is None
  1030. self.activation = activation
  1031. self.has_act = activation is None
  1032. def construct(self, x):
  1033. x = self.quant(x)
  1034. x = self.core_op(x, self.weight)
  1035. if self.has_bias:
  1036. output = self.bias_add(output, self.bias)
  1037. if self.has_act:
  1038. x = self.activation(x)
  1039. x = self.dequant(x, self.dequant_scale)
  1040. return x
  1041. def extend_repr(self):
  1042. str_info = f'quant={self.quant}, core_op={type(self.core_op)}'
  1043. if self.has_bias:
  1044. str_info = str_info + f', bias={self.bias}'
  1045. if self.has_act:
  1046. str_info = str_info + f', activation={self.activation}'
  1047. str_info = str_info + f', dequant={self.dequant}'
  1048. return str_info