You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

quant.py 68 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455
  1. # Copyright 2020 Huawei Technologies Co., Ltd
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. # ============================================================================
  15. """Quantization aware training."""
  16. from functools import partial
  17. import numpy as np
  18. from mindspore import nn
  19. import mindspore.common.dtype as mstype
  20. from mindspore.ops import operations as P
  21. from mindspore.ops import functional as F
  22. from mindspore.common.parameter import Parameter
  23. from mindspore.common.initializer import initializer
  24. from mindspore.common.tensor import Tensor
  25. from mindspore._checkparam import Validator, Rel, twice
  26. import mindspore.context as context
  27. from .normalization import BatchNorm2d, BatchNorm1d
  28. from .activation import get_activation, ReLU, LeakyReLU
  29. from ..cell import Cell
  30. from ...ops.operations import _quant_ops as Q
  31. __all__ = [
  32. 'Conv2dBnAct',
  33. 'DenseBnAct',
  34. 'FakeQuantWithMinMax',
  35. 'Conv2dBnFoldQuant',
  36. 'Conv2dBnWithoutFoldQuant',
  37. 'Conv2dQuant',
  38. 'DenseQuant',
  39. 'ActQuant',
  40. 'LeakyReLUQuant',
  41. 'HSwishQuant',
  42. 'HSigmoidQuant',
  43. 'TensorAddQuant',
  44. 'MulQuant',
  45. ]
  46. class Conv2dBnAct(Cell):
  47. r"""
  48. A combination of convolution, Batchnorm, activation layer.
  49. This part is a more detailed overview of Conv2d op.
  50. Args:
  51. in_channels (int): The number of input channel :math:`C_{in}`.
  52. out_channels (int): The number of output channel :math:`C_{out}`.
  53. kernel_size (Union[int, tuple]): The data type is int or a tuple of 2 integers. Specifies the height
  54. and width of the 2D convolution window. Single int means the value is for both height and width of
  55. the kernel. A tuple of 2 ints means the first value is for the height and the other is for the
  56. width of the kernel.
  57. stride (int): Specifies stride for all spatial dimensions with the same value. The value of stride must be
  58. greater than or equal to 1 and lower than any one of the height and width of the input. Default: 1.
  59. pad_mode (str): Specifies padding mode. The optional values are "same", "valid", "pad". Default: "same".
  60. padding (int): Implicit paddings on both sides of the input. Default: 0.
  61. dilation (int): Specifies the dilation rate to use for dilated convolution. If set to be :math:`k > 1`,
  62. there will be :math:`k - 1` pixels skipped for each sampling location. Its value must be greater than
  63. or equal to 1 and lower than any one of the height and width of the input. Default: 1.
  64. group (int): Splits filter into groups, `in_ channels` and `out_channels` must be
  65. divisible by the number of groups. Default: 1.
  66. has_bias (bool): Specifies whether the layer uses a bias vector. Default: False.
  67. weight_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the convolution kernel.
  68. It can be a Tensor, a string, an Initializer or a number. When a string is specified,
  69. values from 'TruncatedNormal', 'Normal', 'Uniform', 'HeUniform' and 'XavierUniform' distributions as well
  70. as constant 'One' and 'Zero' distributions are possible. Alias 'xavier_uniform', 'he_uniform', 'ones'
  71. and 'zeros' are acceptable. Uppercase and lowercase are both acceptable. Refer to the values of
  72. Initializer for more details. Default: 'normal'.
  73. bias_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the bias vector. Possible
  74. Initializer and string are the same as 'weight_init'. Refer to the values of
  75. Initializer for more details. Default: 'zeros'.
  76. has_bn (bool): Specifies to used batchnorm or not. Default: False.
  77. momentum (float): Momentum for moving average.Momentum value must be [0, 1].Default:0.9
  78. eps (float): Term added to the denominator to improve numerical stability. Should be greater than 0. Default:
  79. 1e-5.
  80. activation (Cell): Specifies activation type. The optional values are as following:
  81. 'softmax', 'logsoftmax', 'relu', 'relu6', 'tanh', 'gelu', 'sigmoid',
  82. 'prelu', 'leakyrelu', 'hswish', 'hsigmoid'. Default: None.
  83. alpha (float): Slope of the activation function at x < 0. Default: 0.2.
  84. after_fake(bool): Determin whether there must be a fake quantization operation after Cond2dBnAct.
  85. Inputs:
  86. - **input** (Tensor) - Tensor of shape :math:`(N, C_{in}, H_{in}, W_{in})`.
  87. Outputs:
  88. Tensor of shape :math:`(N, C_{out}, H_{out}, W_{out})`.
  89. Examples:
  90. >>> net = Conv2dBnAct(120, 240, 4, has_bn=True, activation='ReLU')
  91. >>> input = Tensor(np.ones([1, 120, 1024, 640]), mindspore.float32)
  92. >>> net(input).shape
  93. (1, 240, 1024, 640)
  94. """
  95. def __init__(self,
  96. in_channels,
  97. out_channels,
  98. kernel_size,
  99. stride=1,
  100. pad_mode='same',
  101. padding=0,
  102. dilation=1,
  103. group=1,
  104. has_bias=False,
  105. weight_init='normal',
  106. bias_init='zeros',
  107. has_bn=False,
  108. momentum=0.9,
  109. eps=1e-5,
  110. activation=None,
  111. alpha=0.2,
  112. after_fake=True):
  113. super(Conv2dBnAct, self).__init__()
  114. self.conv = nn.Conv2d(in_channels,
  115. out_channels,
  116. kernel_size=kernel_size,
  117. stride=stride,
  118. pad_mode=pad_mode,
  119. padding=padding,
  120. dilation=dilation,
  121. group=group,
  122. has_bias=has_bias,
  123. weight_init=weight_init,
  124. bias_init=bias_init)
  125. self.has_bn = Validator.check_bool(has_bn, "has_bn")
  126. self.has_act = activation is not None
  127. self.after_fake = after_fake
  128. if has_bn:
  129. self.batchnorm = BatchNorm2d(out_channels, eps, momentum)
  130. if activation == "leakyrelu":
  131. self.activation = LeakyReLU(alpha)
  132. else:
  133. self.activation = get_activation(activation)
  134. def construct(self, x):
  135. x = self.conv(x)
  136. if self.has_bn:
  137. x = self.batchnorm(x)
  138. if self.has_act:
  139. x = self.activation(x)
  140. return x
  141. class DenseBnAct(Cell):
  142. r"""
  143. A combination of Dense, Batchnorm, and the activation layer.
  144. This part is a more detailed overview of Dense op.
  145. Args:
  146. in_channels (int): The number of channels in the input space.
  147. out_channels (int): The number of channels in the output space.
  148. weight_init (Union[Tensor, str, Initializer, numbers.Number]): The trainable weight_init parameter. The dtype
  149. is same as input x. The values of str refer to the function `initializer`. Default: 'normal'.
  150. bias_init (Union[Tensor, str, Initializer, numbers.Number]): The trainable bias_init parameter. The dtype is
  151. same as input x. The values of str refer to the function `initializer`. Default: 'zeros'.
  152. has_bias (bool): Specifies whether the layer uses a bias vector. Default: True.
  153. activation (Cell): The regularization function applied to the output of the layer, eg. 'ReLU'. Default: None.
  154. has_bn (bool): Specifies to use batchnorm or not. Default: False.
  155. activation (string): Specifies activation type. The optional values are as following:
  156. 'Softmax', 'LogSoftmax', 'ReLU', 'ReLU6', 'Tanh', 'GELU', 'Sigmoid',
  157. 'PReLU', 'LeakyReLU', 'h-Swish', and 'h-Sigmoid'. Default: None.
  158. after_fake(bool): Determin whether there must be a fake quantization operation after DenseBnAct.
  159. Inputs:
  160. - **input** (Tensor) - Tensor of shape :math:`(N, in\_channels)`.
  161. Outputs:
  162. Tensor of shape :math:`(N, out\_channels)`.
  163. Examples:
  164. >>> net = nn.DenseBnAct(3, 4)
  165. >>> input = Tensor(np.random.randint(0, 255, [2, 3]), mindspore.float32)
  166. >>> net(input)
  167. """
  168. def __init__(self,
  169. in_channels,
  170. out_channels,
  171. weight_init='normal',
  172. bias_init='zeros',
  173. has_bias=True,
  174. has_bn=False,
  175. activation=None,
  176. after_fake=True):
  177. super(DenseBnAct, self).__init__()
  178. self.dense = nn.Dense(
  179. in_channels,
  180. out_channels,
  181. weight_init,
  182. bias_init,
  183. has_bias)
  184. self.has_bn = Validator.check_bool(has_bn, "has_bn")
  185. self.has_act = activation is not None
  186. self.after_fake = after_fake
  187. if has_bn:
  188. self.batchnorm = BatchNorm1d(out_channels)
  189. self.activation = get_activation(activation)
  190. def construct(self, x):
  191. x = self.dense(x)
  192. if self.has_bn:
  193. x = self.batchnorm(x)
  194. if self.has_act:
  195. x = self.activation(x)
  196. return x
  197. class BatchNormFoldCell(Cell):
  198. """
  199. Batch normalization folded.
  200. Args:
  201. momentum (float): Momentum value must be [0, 1]. Default: 0.9.
  202. epsilon (float): A small float number to avoid dividing by 0. 1e-5 if dtype in
  203. float32 else 1e-3. Default: 1e-5.
  204. freeze_bn (int): Delay in steps at which computation switches from regular batch
  205. norm to frozen mean and std. Default: 0.
  206. Inputs:
  207. - **x** (Tensor) - Tensor of shape :math:`(N, C, H, W)`.
  208. - **mean** (Tensor) - Tensor of shape :math:`(C,)`.
  209. - **variance** (Tensor) - Tensor of shape :math:`(C,)`.
  210. - **global_step** (Tensor) - Tensor to record current global step.
  211. Outputs:
  212. Tuple of 4 Tensor, the normalized input and the updated parameters.
  213. - **batch_mean** (Tensor) - Tensor of shape :math:`(C,)`.
  214. - **batch_std** (Tensor) - Tensor of shape :math:`(C,)`.
  215. - **running_mean** (Tensor) - Tensor of shape :math:`(C,)`.
  216. - **running_std** (Tensor) - Tensor of shape :math:`(C,)`.
  217. """
  218. def __init__(self, momentum=0.9, epsilon=1e-5, freeze_bn=0):
  219. """Initialize batch norm fold layer"""
  220. super(BatchNormFoldCell, self).__init__()
  221. self.epsilon = epsilon
  222. self.is_gpu = context.get_context('device_target') == "GPU"
  223. if self.is_gpu:
  224. self.bn_train = Q.BatchNormFold(momentum, epsilon, is_training=True, freeze_bn=freeze_bn)
  225. self.bn_infer = Q.BatchNormFold(momentum, epsilon, is_training=False, freeze_bn=freeze_bn)
  226. else:
  227. self.bn_reduce = P.BNTrainingReduce()
  228. self.bn_update = Q.BatchNormFoldD(momentum, epsilon, is_training=True, freeze_bn=freeze_bn)
  229. def construct(self, x, mean, variance, global_step):
  230. if self.is_gpu:
  231. if self.training:
  232. batch_mean, batch_std, running_mean, running_std = self.bn_train(x, mean, variance, global_step)
  233. else:
  234. batch_mean, batch_std, running_mean, running_std = self.bn_infer(x, mean, variance, global_step)
  235. else:
  236. if self.training:
  237. x_sum, x_square_sum = self.bn_reduce(x)
  238. _, batch_mean, batch_std, running_mean, running_std, mean_updated, variance_updated = \
  239. self.bn_update(x, x_sum, x_square_sum, mean, variance)
  240. P.Assign()(mean, mean_updated)
  241. P.Assign()(variance, variance_updated)
  242. else:
  243. batch_mean = P.ZerosLike()(variance)
  244. batch_std = P.OnesLike()(variance)
  245. running_mean = P.TensorAdd()(mean, 0.)
  246. running_std = P.Sqrt()(P.TensorAdd()(variance, self.epsilon))
  247. return batch_mean, batch_std, running_mean, running_std
  248. class FakeQuantWithMinMax(Cell):
  249. r"""
  250. Quantization aware op. This OP provides the fake quantization observer function on data with min and max.
  251. Args:
  252. min_init (int, float): The dimension of channel or 1(layer). Default: -6.
  253. max_init (int, float): The dimension of channel or 1(layer). Default: 6.
  254. ema (bool): The exponential Moving Average algorithm updates min and max. Default: False.
  255. ema_decay (float): Exponential Moving Average algorithm parameter. Default: 0.999.
  256. per_channel (bool): Quantization granularity based on layer or on channel. Default: False.
  257. channel_axis (int): Quantization by channel axis. Default: 1.
  258. num_channels (int): declarate the min and max channel size, Default: 1.
  259. num_bits (int): The bit number of quantization, supporting 4 and 8bits. Default: 8.
  260. symmetric (bool): Whether the quantization algorithm is symmetric or not. Default: False.
  261. narrow_range (bool): Whether the quantization algorithm uses narrow range or not. Default: False.
  262. quant_delay (int): Quantization delay parameters according to the global step. Default: 0.
  263. Inputs:
  264. - **x** (Tensor) - The input of FakeQuantWithMinMax.
  265. Outputs:
  266. Tensor, with the same type and shape as the `x`.
  267. Examples:
  268. >>> fake_quant = FakeQuantWithMinMax()
  269. >>> input_x = Tensor(np.array([[1, 2, 1], [-2, 0, -1]]), mindspore.float32)
  270. >>> result = fake_quant(input_x)
  271. """
  272. def __init__(self,
  273. min_init=-6,
  274. max_init=6,
  275. ema=False,
  276. ema_decay=0.999,
  277. per_channel=False,
  278. channel_axis=1,
  279. num_channels=1,
  280. num_bits=8,
  281. symmetric=False,
  282. narrow_range=False,
  283. quant_delay=0):
  284. """Initialize FakeQuantWithMinMax layer"""
  285. super(FakeQuantWithMinMax, self).__init__()
  286. Validator.check_type("min_init", min_init, [int, float])
  287. Validator.check_type("max_init", max_init, [int, float])
  288. Validator.check("min_init", min_init, "max_init", max_init, rel=Rel.LT)
  289. Validator.check_integer('quant_delay', quant_delay, 0, Rel.GE)
  290. self.min_init = min_init
  291. self.max_init = max_init
  292. self.num_bits = num_bits
  293. self.ema = ema
  294. self.ema_decay = ema_decay
  295. self.per_channel = per_channel
  296. self.num_channels = num_channels
  297. self.channel_axis = channel_axis
  298. self.quant_delay = quant_delay
  299. self.symmetric = symmetric
  300. self.narrow_range = narrow_range
  301. self.is_ascend = context.get_context('device_target') == "Ascend"
  302. # init tensor min and max for fake quant op
  303. if self.per_channel:
  304. min_array = np.array([self.min_init] * self.num_channels).astype(np.float32)
  305. max_array = np.array([self.max_init] * self.num_channels).astype(np.float32)
  306. else:
  307. min_array = np.array([self.min_init]).astype(np.float32)
  308. max_array = np.array([self.max_init]).astype(np.float32)
  309. self.minq = Parameter(Tensor(min_array), name='quant_min', requires_grad=False)
  310. self.maxq = Parameter(Tensor(max_array), name='quant_max', requires_grad=False)
  311. # init fake quant relative op
  312. if self.per_channel:
  313. quant_fun = partial(Q.FakeQuantPerChannel, channel_axis=self.channel_axis)
  314. ema_fun = partial(Q.MinMaxUpdatePerChannel, channel_axis=self.channel_axis)
  315. else:
  316. quant_fun = Q.FakeQuantPerLayer
  317. ema_fun = Q.MinMaxUpdatePerLayer
  318. self.ema_update = ema_fun(ema=self.ema, ema_decay=self.ema_decay)
  319. if self.is_ascend:
  320. self.fake_quant_train = quant_fun(num_bits=self.num_bits,
  321. symmetric=self.symmetric,
  322. narrow_range=self.narrow_range,
  323. quant_delay=self.quant_delay)
  324. self.fake_quant_infer = self.fake_quant_train
  325. else:
  326. quant_fun = partial(quant_fun,
  327. ema=self.ema,
  328. ema_decay=ema_decay,
  329. num_bits=self.num_bits,
  330. symmetric=self.symmetric,
  331. narrow_range=self.narrow_range,
  332. quant_delay=self.quant_delay)
  333. self.fake_quant_train = quant_fun(training=True)
  334. self.fake_quant_infer = quant_fun(training=False)
  335. def extend_repr(self):
  336. s = 'num_bits={}, symmetric={}, narrow_range={}, ema={}({}), per_channel={}({}, {}), ' \
  337. 'quant_delay={}, min_init={}, max_init={}'.format(self.num_bits, self.symmetric, self.narrow_range,
  338. self.ema, self.ema_decay, self.per_channel,
  339. self.channel_axis, self.num_channels, self.quant_delay,
  340. self.min_init, self.max_init)
  341. return s
  342. def construct(self, x):
  343. if self.training:
  344. min_up, max_up = self.ema_update(x, self.minq, self.maxq)
  345. P.Assign()(self.minq, min_up)
  346. P.Assign()(self.maxq, max_up)
  347. out = self.fake_quant_train(x, self.minq, self.maxq)
  348. else:
  349. out = self.fake_quant_infer(x, self.minq, self.maxq)
  350. return out
  351. class Conv2dBnFoldQuant(Cell):
  352. r"""
  353. 2D convolution with BatchNormal op folded construct.
  354. This part is a more detailed overview of Conv2d op.
  355. Args:
  356. in_channels (int): The number of input channel :math:`C_{in}`.
  357. out_channels (int): The number of output channel :math:`C_{out}`.
  358. kernel_size (Union[int, tuple]): Specifies the height and width of the 2D convolution window.
  359. stride (int): Specifies stride for all spatial dimensions with the same value.
  360. pad_mode (str): Specifies padding mode. The optional values are "same", "valid", "pad". Default: "same".
  361. padding (int): Implicit paddings on both sides of the input. Default: 0.
  362. eps (float): Parameters for BatchNormal. Default: 1e-5.
  363. momentum (float): Parameters for BatchNormal op. Default: 0.997.
  364. has_bias (bool): Specifies whether the layer uses a bias vector. Default: False.
  365. weight_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the
  366. convolution kernel. Default: 'normal'.
  367. bias_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the
  368. bias vector. Default: 'zeros'.
  369. beta_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the
  370. beta vector. Default: 'zeros'.
  371. gamma_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the
  372. gamma vector. Default: 'ones'.
  373. mean_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the
  374. mean vector. Default: 'zeros'.
  375. var_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the
  376. variance vector. Default: 'ones'.
  377. fake (bool): Whether Conv2dBnFoldQuant Cell adds FakeQuantWithMinMax op. Default: True.
  378. per_channel (bool): FakeQuantWithMinMax Parameters. Default: False.
  379. num_bits (int): The bit number of quantization, supporting 4 and 8bits. Default: 8.
  380. symmetric (bool): The quantization algorithm is symmetric or not. Default: False.
  381. narrow_range (bool): The quantization algorithm uses narrow range or not. Default: False.
  382. quant_delay (int): The Quantization delay parameters according to the global step. Default: 0.
  383. freeze_bn (int): The quantization freeze BatchNormal op is according to the global step. Default: 100000.
  384. Inputs:
  385. - **x** (Tensor) - Tensor of shape :math:`(N, C_{in}, H_{in}, W_{in})`.
  386. Outputs:
  387. Tensor of shape :math:`(N, C_{out}, H_{out}, W_{out})`.
  388. Examples:
  389. >>> conv2d_bn = nn.Conv2dBnFoldQuant(1, 6, kernel_size=(2, 2), stride=(1, 1), pad_mode="valid")
  390. >>> x = Tensor(np.random.randint(-2, 2, (2, 1, 1, 3)), mindspore.float32)
  391. >>> y = conv2d_bn(x)
  392. """
  393. def __init__(self,
  394. in_channels,
  395. out_channels,
  396. kernel_size,
  397. stride=1,
  398. pad_mode='same',
  399. padding=0,
  400. dilation=1,
  401. group=1,
  402. eps=1e-5,
  403. momentum=0.997,
  404. has_bias=False,
  405. weight_init='normal',
  406. bias_init='zeros',
  407. beta_init='zeros',
  408. gamma_init='ones',
  409. mean_init='zeros',
  410. var_init='ones',
  411. fake=True,
  412. per_channel=False,
  413. num_bits=8,
  414. symmetric=False,
  415. narrow_range=False,
  416. quant_delay=0,
  417. freeze_bn=100000):
  418. """Initialize Conv2dBnFoldQuant layer"""
  419. super(Conv2dBnFoldQuant, self).__init__()
  420. self.in_channels = in_channels
  421. self.out_channels = out_channels
  422. self.kernel_size = twice(kernel_size)
  423. self.stride = twice(stride)
  424. self.pad_mode = pad_mode
  425. self.padding = padding
  426. self.dilation = twice(dilation)
  427. self.group = group
  428. self.eps = eps
  429. self.momentum = momentum
  430. self.has_bias = has_bias
  431. self.quant_delay = quant_delay
  432. self.freeze_bn = freeze_bn
  433. self.fake = fake
  434. self.num_bits = num_bits
  435. self.per_channel = per_channel
  436. self.symmetric = symmetric
  437. self.narrow_range = narrow_range
  438. self.is_gpu = context.get_context('device_target') == "GPU"
  439. # initialize convolution op and Parameter
  440. if context.get_context('device_target') == "Ascend" and group > 1:
  441. Validator.check_integer('group', group, in_channels, Rel.EQ)
  442. Validator.check_integer('group', group, out_channels, Rel.EQ)
  443. self.conv = P.DepthwiseConv2dNative(channel_multiplier=1,
  444. kernel_size=self.kernel_size,
  445. pad_mode=pad_mode,
  446. pad=padding,
  447. stride=self.stride,
  448. dilation=self.dilation)
  449. weight_shape = [1, in_channels, *self.kernel_size]
  450. channel_axis = 1
  451. else:
  452. self.conv = P.Conv2D(out_channel=out_channels,
  453. kernel_size=self.kernel_size,
  454. pad_mode=pad_mode,
  455. pad=padding,
  456. stride=self.stride,
  457. dilation=self.dilation,
  458. group=group)
  459. weight_shape = [out_channels, in_channels // group, *self.kernel_size]
  460. channel_axis = 0
  461. self.weight = Parameter(initializer(weight_init, weight_shape), name='weight')
  462. self.bias_add = P.BiasAdd()
  463. if Validator.check_bool(has_bias):
  464. self.bias = Parameter(initializer(bias_init, [out_channels]), name='bias')
  465. else:
  466. self.bias = None
  467. # initialize BatchNorm Parameter
  468. self.gamma = Parameter(initializer(gamma_init, [out_channels]), name='gamma')
  469. self.beta = Parameter(initializer(beta_init, [out_channels]), name='beta')
  470. self.moving_mean = Parameter(initializer(mean_init, [out_channels]), name='moving_mean', requires_grad=False)
  471. self.moving_variance = Parameter(initializer(var_init, [out_channels]), name='moving_variance',
  472. requires_grad=False)
  473. # initialize fake ops
  474. self.fake_quant_weight = FakeQuantWithMinMax(min_init=-6,
  475. max_init=6,
  476. ema=False,
  477. per_channel=per_channel,
  478. channel_axis=channel_axis,
  479. num_channels=out_channels,
  480. num_bits=num_bits,
  481. symmetric=symmetric,
  482. narrow_range=narrow_range,
  483. quant_delay=quant_delay)
  484. self.batchnorm_fold = BatchNormFoldCell(epsilon=eps, momentum=momentum, freeze_bn=freeze_bn)
  485. self.correct_mul = Q.CorrectionMul(channel_axis)
  486. if context.get_context('device_target') == "Ascend":
  487. self.batchnorm_fold2_train = Q.BatchNormFold2_D(freeze_bn=freeze_bn)
  488. self.batchnorm_fold2_infer = Q.BatchNormFold2_D(freeze_bn=0)
  489. elif context.get_context('device_target') == "GPU":
  490. self.batchnorm_fold2_train = Q.BatchNormFold2(freeze_bn=freeze_bn)
  491. self.batchnorm_fold2_infer = Q.BatchNormFold2(freeze_bn=0)
  492. else:
  493. raise ValueError("Unsupported platform: {}".format(context.get_context('device_target')))
  494. self.step = Parameter(initializer('normal', [1], dtype=mstype.int32), name='step', requires_grad=False)
  495. self.one = Tensor(1, mstype.int32)
  496. self.assignadd = P.AssignAdd()
  497. def extend_repr(self):
  498. s = 'in_channels={}, out_channels={}, kernel_size={}, stride={}, ' \
  499. 'pad_mode={}, padding={}, dilation={}, group={}, ' \
  500. 'fake={}, freeze_bn={}, momentum={}, quant_delay={}'.format(self.in_channels, self.out_channels,
  501. self.kernel_size, self.stride,
  502. self.pad_mode, self.padding, self.dilation,
  503. self.group,
  504. self.fake, self.freeze_bn, self.momentum,
  505. self.quant_delay)
  506. return s
  507. def construct(self, x):
  508. out_conv = self.conv(x, self.weight)
  509. if self.has_bias:
  510. out_conv = self.bias_add(out_conv, self.bias)
  511. # BN fold1
  512. batch_mean, batch_std, running_mean, running_std = self.batchnorm_fold(out_conv,
  513. self.moving_mean,
  514. self.moving_variance,
  515. self.step)
  516. # fake weight
  517. weight = self.correct_mul(self.weight, self.gamma, running_std)
  518. if self.fake:
  519. weight = self.fake_quant_weight(weight)
  520. out = self.conv(x, weight)
  521. if self.has_bias:
  522. out = self.bias_add(out, self.bias)
  523. # BN fold2
  524. if self.is_gpu:
  525. if self.training:
  526. out = self.batchnorm_fold2_train(out, self.beta, self.gamma,
  527. batch_std, batch_mean, running_std, running_mean, self.step)
  528. F.control_depend(out, self.assignadd(self.step, self.one))
  529. else:
  530. out = self.batchnorm_fold2_infer(out, self.beta, self.gamma,
  531. batch_std, batch_mean, running_std, running_mean, self.step)
  532. else:
  533. if self.training:
  534. out = self.batchnorm_fold2_train(out, self.beta, self.gamma, batch_std, batch_mean, running_std)
  535. F.control_depend(out, self.assignadd(self.step, self.one))
  536. else:
  537. out = self.batchnorm_fold2_infer(out, self.beta, self.gamma, running_std, running_mean, running_std)
  538. return out
  539. class Conv2dBnWithoutFoldQuant(Cell):
  540. r"""
  541. 2D convolution + batchnorm without fold with fake quant construct.
  542. This part is a more detailed overview of Conv2d op.
  543. Args:
  544. in_channels (int): The number of input channel :math:`C_{in}`.
  545. out_channels (int): The number of output channel :math:`C_{out}`.
  546. kernel_size (Union[int, tuple]): Specifies the height and width of the 2D convolution window.
  547. stride (int): Specifies stride for all spatial dimensions with the same value. Default: 1.
  548. pad_mode (str): Specifies padding mode. The optional values are "same", "valid", "pad". Default: "same".
  549. padding (int): Implicit paddings on both sides of the input. Default: 0.
  550. dilation (int): Specifies the dilation rate to use for dilated convolution. Default: 1.
  551. group (int): Splits filter into groups, `in_ channels` and `out_channels` must be
  552. divisible by the number of groups. Default: 1.
  553. has_bias (bool): Specifies whether the layer uses a bias vector. Default: False.
  554. eps (float): Parameters for BatchNormal. Default: 1e-5.
  555. momentum (float): Parameters for BatchNormal op. Default: 0.997.
  556. weight_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the convolution kernel.
  557. Default: 'normal'.
  558. bias_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the bias vector. Default: 'zeros'.
  559. per_channel (bool): FakeQuantWithMinMax Parameters. Default: False.
  560. num_bits (int): The bit number of quantization, supporting 4 and 8bits. Default: 8.
  561. symmetric (bool): The quantization algorithm is symmetric or not. Default: False.
  562. narrow_range (bool): The quantization algorithm uses narrow range or not. Default: False.
  563. quant_delay (int): Quantization delay parameters according to the global step. Default: 0.
  564. Inputs:
  565. - **x** (Tensor) - Tensor of shape :math:`(N, C_{in}, H_{in}, W_{in})`.
  566. Outputs:
  567. Tensor of shape :math:`(N, C_{out}, H_{out}, W_{out})`.
  568. Examples:
  569. >>> conv2d_quant = nn.Conv2dBnWithoutFoldQuant(1, 6, kernel_size=(2, 2), stride=(1, 1), pad_mode="valid")
  570. >>> x = Tensor(np.random.randint(-2, 2, (2, 1, 1, 3)), mstype.float32)
  571. >>> y = conv2d_quant(x)
  572. """
  573. def __init__(self,
  574. in_channels,
  575. out_channels,
  576. kernel_size,
  577. stride=1,
  578. pad_mode='same',
  579. padding=0,
  580. dilation=1,
  581. group=1,
  582. has_bias=False,
  583. eps=1e-5,
  584. momentum=0.997,
  585. weight_init='normal',
  586. bias_init='zeros',
  587. per_channel=False,
  588. num_bits=8,
  589. symmetric=False,
  590. narrow_range=False,
  591. quant_delay=0):
  592. super(Conv2dBnWithoutFoldQuant, self).__init__()
  593. if isinstance(kernel_size, int):
  594. self.kernel_size = (kernel_size, kernel_size)
  595. else:
  596. self.kernel_size = kernel_size
  597. self.in_channels = Validator.check_positive_int(in_channels)
  598. self.out_channels = Validator.check_positive_int(out_channels)
  599. self.has_bias = has_bias
  600. self.stride = twice(stride)
  601. self.dilation = twice(dilation)
  602. self.pad_mode = pad_mode
  603. self.padding = padding
  604. self.group = group
  605. self.quant_delay = quant_delay
  606. self.bias_add = P.BiasAdd()
  607. if Validator.check_bool(has_bias):
  608. self.bias = Parameter(initializer(bias_init, [out_channels]), name='bias')
  609. else:
  610. self.bias = None
  611. # initialize convolution op and Parameter
  612. if context.get_context('device_target') == "Ascend" and group > 1:
  613. Validator.check_integer('group', group, in_channels, Rel.EQ)
  614. Validator.check_integer('group', group, out_channels, Rel.EQ)
  615. self.conv = P.DepthwiseConv2dNative(channel_multiplier=1,
  616. kernel_size=self.kernel_size,
  617. pad_mode=pad_mode,
  618. pad=padding,
  619. stride=self.stride,
  620. dilation=self.dilation)
  621. weight_shape = [1, in_channels, *self.kernel_size]
  622. channel_axis = 1
  623. else:
  624. self.conv = P.Conv2D(out_channel=self.out_channels,
  625. kernel_size=self.kernel_size,
  626. mode=1,
  627. pad_mode=self.pad_mode,
  628. pad=self.padding,
  629. stride=self.stride,
  630. dilation=self.dilation,
  631. group=self.group)
  632. weight_shape = [out_channels, in_channels // group, *self.kernel_size]
  633. channel_axis = 0
  634. self.weight = Parameter(initializer(weight_init, weight_shape), name='weight')
  635. self.fake_quant_weight = FakeQuantWithMinMax(min_init=-6,
  636. max_init=6,
  637. ema=False,
  638. per_channel=per_channel,
  639. channel_axis=channel_axis,
  640. num_channels=out_channels,
  641. num_bits=num_bits,
  642. symmetric=symmetric,
  643. narrow_range=narrow_range,
  644. quant_delay=quant_delay)
  645. self.batchnorm = BatchNorm2d(out_channels, eps=eps, momentum=momentum)
  646. def construct(self, x):
  647. weight = self.fake_quant_weight(self.weight)
  648. out = self.conv(x, weight)
  649. if self.has_bias:
  650. out = self.bias_add(out, self.bias)
  651. out = self.batchnorm(out)
  652. return out
  653. def extend_repr(self):
  654. s = 'in_channels={}, out_channels={}, kernel_size={}, stride={}, ' \
  655. 'pad_mode={}, padding={}, dilation={}, group={}, ' \
  656. 'has_bias={}, quant_delay={}'.format(self.in_channels, self.out_channels, self.kernel_size, self.stride,
  657. self.pad_mode, self.padding, self.dilation, self.group,
  658. self.has_bias, self.quant_delay)
  659. return s
  660. class Conv2dQuant(Cell):
  661. r"""
  662. 2D convolution with fake quant op layer.
  663. This part is a more detailed overview of Conv2d op.
  664. Args:
  665. in_channels (int): The number of input channel :math:`C_{in}`.
  666. out_channels (int): The number of output channel :math:`C_{out}`.
  667. kernel_size (Union[int, tuple]): Specifies the height and width of the 2D convolution window.
  668. stride (int): Specifies stride for all spatial dimensions with the same value. Default: 1.
  669. pad_mode (str): Specifies padding mode. The optional values are "same", "valid", "pad". Default: "same".
  670. padding (int): Implicit paddings on both sides of the input. Default: 0.
  671. dilation (int): Specifies the dilation rate to use for dilated convolution. Default: 1.
  672. group (int): Splits filter into groups, `in_ channels` and `out_channels` must be
  673. divisible by the number of groups. Default: 1.
  674. has_bias (bool): Specifies whether the layer uses a bias vector. Default: False.
  675. weight_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the convolution kernel.
  676. Default: 'normal'.
  677. bias_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the bias vector. Default: 'zeros'.
  678. per_channel (bool): FakeQuantWithMinMax Parameters. Default: False.
  679. num_bits (int): The bit number of quantization, supporting 4 and 8bits. Default: 8.
  680. symmetric (bool): The quantization algorithm is symmetric or not. Default: False.
  681. narrow_range (bool): The quantization algorithm uses narrow range or not. Default: False.
  682. quant_delay (int): Quantization delay parameters according to the global step. Default: 0.
  683. Inputs:
  684. - **x** (Tensor) - Tensor of shape :math:`(N, C_{in}, H_{in}, W_{in})`.
  685. Outputs:
  686. Tensor of shape :math:`(N, C_{out}, H_{out}, W_{out})`.
  687. Examples:
  688. >>> conv2d_quant = nn.Conv2dQuant(1, 6, kernel_size= (2, 2), stride=(1, 1), pad_mode="valid")
  689. >>> x = Tensor(np.random.randint(-2, 2, (2, 1, 1, 3)), mindspore.float32)
  690. >>> y = conv2d_quant(x)
  691. """
  692. def __init__(self,
  693. in_channels,
  694. out_channels,
  695. kernel_size,
  696. stride=1,
  697. pad_mode='same',
  698. padding=0,
  699. dilation=1,
  700. group=1,
  701. has_bias=False,
  702. weight_init='normal',
  703. bias_init='zeros',
  704. per_channel=False,
  705. num_bits=8,
  706. symmetric=False,
  707. narrow_range=False,
  708. quant_delay=0):
  709. super(Conv2dQuant, self).__init__()
  710. if isinstance(kernel_size, int):
  711. self.kernel_size = (kernel_size, kernel_size)
  712. else:
  713. self.kernel_size = kernel_size
  714. self.in_channels = Validator.check_positive_int(in_channels)
  715. self.out_channels = Validator.check_positive_int(out_channels)
  716. self.has_bias = has_bias
  717. self.stride = twice(stride)
  718. self.dilation = twice(dilation)
  719. self.pad_mode = pad_mode
  720. self.padding = padding
  721. self.group = group
  722. self.quant_delay = quant_delay
  723. weight_shape = [out_channels, in_channels // group, *self.kernel_size]
  724. self.weight = Parameter(initializer(weight_init, weight_shape), name='weight')
  725. self.bias_add = P.BiasAdd()
  726. if Validator.check_bool(has_bias):
  727. self.bias = Parameter(initializer(bias_init, [out_channels]), name='bias')
  728. else:
  729. self.bias = None
  730. self.conv = P.Conv2D(out_channel=self.out_channels,
  731. kernel_size=self.kernel_size,
  732. mode=1,
  733. pad_mode=self.pad_mode,
  734. pad=self.padding,
  735. stride=self.stride,
  736. dilation=self.dilation,
  737. group=self.group)
  738. self.fake_quant_weight = FakeQuantWithMinMax(min_init=-6,
  739. max_init=6,
  740. ema=False,
  741. per_channel=per_channel,
  742. channel_axis=0,
  743. num_channels=out_channels,
  744. num_bits=num_bits,
  745. symmetric=symmetric,
  746. narrow_range=narrow_range,
  747. quant_delay=quant_delay)
  748. def construct(self, x):
  749. weight = self.fake_quant_weight(self.weight)
  750. out = self.conv(x, weight)
  751. if self.has_bias:
  752. return self.bias_add(out, self.bias)
  753. return out
  754. def extend_repr(self):
  755. s = 'in_channels={}, out_channels={}, kernel_size={}, stride={}, ' \
  756. 'pad_mode={}, padding={}, dilation={}, group={}, ' \
  757. 'has_bias={}, quant_delay={}'.format(self.in_channels, self.out_channels, self.kernel_size, self.stride,
  758. self.pad_mode, self.padding, self.dilation, self.group,
  759. self.has_bias, self.quant_delay)
  760. return s
  761. class DenseQuant(Cell):
  762. r"""
  763. The fully connected layer with fake quant op.
  764. This part is a more detailed overview of Dense op.
  765. Args:
  766. in_channels (int): The dimension of the input space.
  767. out_channels (int): The dimension of the output space.
  768. weight_init (Union[Tensor, str, Initializer, numbers.Number]): The trainable weight_init parameter. The dtype
  769. is same as input x. The values of str refer to the function `initializer`. Default: 'normal'.
  770. bias_init (Union[Tensor, str, Initializer, numbers.Number]): The trainable bias_init parameter. The dtype is
  771. same as input x. The values of str refer to the function `initializer`. Default: 'zeros'.
  772. has_bias (bool): Specifies whether the layer uses a bias vector. Default: True.
  773. activation (str): The regularization function applied to the output of the layer, eg. 'relu'. Default: None.
  774. per_channel (bool): FakeQuantWithMinMax Parameters. Default: False.
  775. num_bits (int): The bit number of quantization, supporting 4 and 8bits. Default: 8.
  776. symmetric (bool): The quantization algorithm is symmetric or not. Default: False.
  777. narrow_range (bool): The quantization algorithm uses narrow range or not. Default: False.
  778. quant_delay (int): Quantization delay parameters according to the global step. Default: 0.
  779. Inputs:
  780. - **x** (Tensor) - Tensor of shape :math:`(N, C_{in}, H_{in}, W_{in})`.
  781. Outputs:
  782. Tensor of shape :math:`(N, C_{out}, H_{out}, W_{out})`.
  783. Examples:
  784. >>> dense_quant = nn.DenseQuant(3, 6)
  785. >>> input_x = Tensor(np.random.randint(-2, 2, (2, 3)), mindspore.float32)
  786. >>> result = dense_quant(input_x)
  787. """
  788. def __init__(
  789. self,
  790. in_channels,
  791. out_channels,
  792. weight_init='normal',
  793. bias_init='zeros',
  794. has_bias=True,
  795. activation=None,
  796. per_channel=False,
  797. num_bits=8,
  798. symmetric=False,
  799. narrow_range=False,
  800. quant_delay=0):
  801. super(DenseQuant, self).__init__()
  802. self.in_channels = Validator.check_positive_int(in_channels)
  803. self.out_channels = Validator.check_positive_int(out_channels)
  804. self.has_bias = Validator.check_bool(has_bias)
  805. if isinstance(weight_init, Tensor):
  806. if weight_init.dim() != 2 or weight_init.shape[0] != out_channels or \
  807. weight_init.shape[1] != in_channels:
  808. raise ValueError("weight_init shape error")
  809. self.weight = Parameter(initializer(
  810. weight_init, [out_channels, in_channels]), name="weight")
  811. if self.has_bias:
  812. if isinstance(bias_init, Tensor):
  813. if bias_init.dim() != 1 or bias_init.shape[0] != out_channels:
  814. raise ValueError("bias_init shape error")
  815. self.bias = Parameter(initializer(
  816. bias_init, [out_channels]), name="bias")
  817. self.matmul = P.MatMul(transpose_b=True)
  818. self.bias_add = P.BiasAdd()
  819. self.activation = get_activation(activation)
  820. self.activation_flag = self.activation is not None
  821. self.fake_quant_weight = FakeQuantWithMinMax(min_init=-6,
  822. max_init=6,
  823. ema=False,
  824. per_channel=per_channel,
  825. channel_axis=0,
  826. num_channels=out_channels,
  827. num_bits=num_bits,
  828. symmetric=symmetric,
  829. narrow_range=narrow_range,
  830. quant_delay=quant_delay)
  831. def construct(self, x):
  832. """Use operators to construct the Dense layer."""
  833. output = self.fake_quant_weight(self.weight)
  834. output = self.matmul(x, output)
  835. if self.has_bias:
  836. output = self.bias_add(output, self.bias)
  837. if self.activation_flag:
  838. return self.activation(output)
  839. return output
  840. def extend_repr(self):
  841. """A pretty print for Dense layer."""
  842. str_info = 'in_channels={}, out_channels={}, weight={}, has_bias={}'.format(
  843. self.in_channels, self.out_channels, self.weight, self.has_bias)
  844. if self.has_bias:
  845. str_info = str_info + ', bias={}'.format(self.bias)
  846. if self.activation_flag:
  847. str_info = str_info + ', activation={}'.format(self.activation)
  848. return str_info
  849. class _QuantActivation(Cell):
  850. r"""
  851. Base class for quantization aware training activation function. Add Fake Quant OP after activation OP.
  852. """
  853. def get_origin(self):
  854. raise NotImplementedError
  855. class ActQuant(_QuantActivation):
  856. r"""
  857. Quantization aware training activation function.
  858. Add the fake quant op to the end of activation op, by which the output of activation op will be truncated.
  859. Please check `FakeQuantWithMinMax` for more details.
  860. Args:
  861. activation (Cell): Activation cell class.
  862. ema_decay (float): Exponential Moving Average algorithm parameter. Default: 0.999.
  863. per_channel (bool): Quantization granularity based on layer or on channel. Default: False.
  864. num_bits (int): The bit number of quantization, supporting 4 and 8bits. Default: 8.
  865. symmetric (bool): The quantization algorithm is symmetric or not. Default: False.
  866. narrow_range (bool): The quantization algorithm uses narrow range or not. Default: False.
  867. quant_delay (int): Quantization delay parameters according to the global steps. Default: 0.
  868. Inputs:
  869. - **x** (Tensor) - The input of ReLU6Quant.
  870. Outputs:
  871. Tensor, with the same type and shape as the `x`.
  872. Examples:
  873. >>> act_quant = nn.ActQuant(nn.ReLU())
  874. >>> input_x = Tensor(np.array([[1, 2, -1], [-2, 0, -1]]), mindspore.float32)
  875. >>> result = act_quant(input_x)
  876. """
  877. def __init__(self,
  878. activation,
  879. ema_decay=0.999,
  880. per_channel=False,
  881. num_bits=8,
  882. symmetric=False,
  883. narrow_range=False,
  884. quant_delay=0):
  885. super(ActQuant, self).__init__()
  886. self.fake_quant_act = FakeQuantWithMinMax(min_init=0,
  887. max_init=6,
  888. ema=True,
  889. ema_decay=ema_decay,
  890. per_channel=per_channel,
  891. num_bits=num_bits,
  892. symmetric=symmetric,
  893. narrow_range=narrow_range,
  894. quant_delay=quant_delay)
  895. self.act = activation
  896. def construct(self, x):
  897. x = self.act(x)
  898. x = self.fake_quant_act(x)
  899. return x
  900. def get_origin(self):
  901. return self.act
  902. class LeakyReLUQuant(_QuantActivation):
  903. r"""
  904. LeakyReLUQuant activation function. Add Fake Quant OP after HSwish OP.
  905. This part is a more detailed overview of HSwish op.
  906. Args:
  907. activation (Cell): Activation cell class.
  908. ema_decay (float): Exponential Moving Average algorithm parameter. Default: 0.999.
  909. per_channel (bool): Quantization granularity based on layer or on channel. Default: False.
  910. num_bits (int): The bit number of quantization, supporting 4 and 8bits. Default: 8.
  911. symmetric (bool): The quantization algorithm is symmetric or not. Default: False.
  912. narrow_range (bool): The quantization algorithm uses narrow range or not. Default: False.
  913. quant_delay (int): Quantization delay parameters according to the global step. Default: 0.
  914. Inputs:
  915. - **x** (Tensor) - The input of LeakyReLUQuant.
  916. Outputs:
  917. Tensor, with the same type and shape as the `x`.
  918. Examples:
  919. >>> activation = nn.LeakyReLUQuant(nn.LeakyReLU())
  920. >>> input = Tensor(np.array([[1, 2, 1], [-2, 0, -1]]), mindspore.float32)
  921. >>> result = activation(input)
  922. """
  923. def __init__(self,
  924. activation,
  925. ema_decay=0.999,
  926. per_channel=False,
  927. num_bits=8,
  928. symmetric=False,
  929. narrow_range=False,
  930. quant_delay=0):
  931. super(LeakyReLUQuant, self).__init__()
  932. self.fake_quant_act_before = FakeQuantWithMinMax(min_init=-6,
  933. max_init=6,
  934. ema=True,
  935. ema_decay=ema_decay,
  936. per_channel=per_channel,
  937. num_bits=num_bits,
  938. symmetric=symmetric,
  939. narrow_range=narrow_range,
  940. quant_delay=quant_delay)
  941. self.fake_quant_act_after = FakeQuantWithMinMax(min_init=-6,
  942. max_init=6,
  943. ema=True,
  944. ema_decay=ema_decay,
  945. per_channel=per_channel,
  946. num_bits=num_bits,
  947. symmetric=symmetric,
  948. narrow_range=narrow_range,
  949. quant_delay=quant_delay)
  950. if issubclass(activation.__class__, nn.LeakyReLU):
  951. self.act = activation
  952. else:
  953. raise ValueError("Activation should be `nn.LeakyReLU`")
  954. def construct(self, x):
  955. x = self.fake_quant_act_before(x)
  956. x = self.act(x)
  957. x = self.fake_quant_act_after(x)
  958. return x
  959. def get_origin(self):
  960. return self.act
  961. class HSwishQuant(_QuantActivation):
  962. r"""
  963. HSwishQuant activation function. Add Fake Quant OP after HSwish OP.
  964. This part is a more detailed overview of HSwish op.
  965. Args:
  966. activation (Cell): Activation cell class.
  967. ema_decay (float): Exponential Moving Average algorithm parameter. Default: 0.999.
  968. per_channel (bool): Quantization granularity based on layer or on channel. Default: False.
  969. num_bits (int): The bit number of quantization, supporting 4 and 8bits. Default: 8.
  970. symmetric (bool): Whether the quantization algorithm is symmetric or not. Default: False.
  971. narrow_range (bool): Whether the quantization algorithm uses narrow range or not. Default: False.
  972. quant_delay (int): Quantization delay parameters according to the global step. Default: 0.
  973. Inputs:
  974. - **x** (Tensor) - The input of HSwishQuant.
  975. Outputs:
  976. Tensor, with the same type and shape as the `x`.
  977. Examples:
  978. >>> activation = nn.HSwishQuant(nn.HSwish())
  979. >>> input = Tensor(np.array([[1, 2, 1], [-2, 0, -1]]), mindspore.float32)
  980. >>> result = activation(input)
  981. """
  982. def __init__(self,
  983. activation,
  984. ema_decay=0.999,
  985. per_channel=False,
  986. num_bits=8,
  987. symmetric=False,
  988. narrow_range=False,
  989. quant_delay=0):
  990. super(HSwishQuant, self).__init__()
  991. self.fake_quant_act_before = FakeQuantWithMinMax(min_init=-6,
  992. max_init=6,
  993. ema=True,
  994. ema_decay=ema_decay,
  995. per_channel=per_channel,
  996. num_bits=num_bits,
  997. symmetric=symmetric,
  998. narrow_range=narrow_range,
  999. quant_delay=quant_delay)
  1000. self.fake_quant_act_after = FakeQuantWithMinMax(min_init=-6,
  1001. max_init=6,
  1002. ema=True,
  1003. ema_decay=ema_decay,
  1004. per_channel=per_channel,
  1005. num_bits=num_bits,
  1006. symmetric=symmetric,
  1007. narrow_range=narrow_range,
  1008. quant_delay=quant_delay)
  1009. if issubclass(activation.__class__, nn.HSwish):
  1010. self.act = activation
  1011. else:
  1012. raise ValueError("Activation should be `nn.HSwish`")
  1013. def construct(self, x):
  1014. x = self.fake_quant_act_before(x)
  1015. x = self.act(x)
  1016. x = self.fake_quant_act_after(x)
  1017. return x
  1018. def get_origin(self):
  1019. return self.act
  1020. class HSigmoidQuant(_QuantActivation):
  1021. r"""
  1022. HSigmoidQuant activation function. Add Fake Quant OP before and after HSigmoid OP.
  1023. This part is a more detailed overview of HSigmoid op.
  1024. Args:
  1025. activation (Cell): Activation cell class.
  1026. ema_decay (float): Exponential Moving Average algorithm parameter. Default: 0.999.
  1027. per_channel (bool): Quantization granularity based on layer or on channel. Default: False.
  1028. num_bits (int): The bit number of quantization, supporting 4 and 8bits. Default: 8.
  1029. symmetric (bool): Whether the quantization algorithm is symmetric or not. Default: False.
  1030. narrow_range (bool): Whether the quantization algorithm uses narrow range or not. Default: False.
  1031. quant_delay (int): Quantization delay parameters according to the global step. Default: 0.
  1032. Inputs:
  1033. - **x** (Tensor) - The input of HSigmoidQuant.
  1034. Outputs:
  1035. Tensor, with the same type and shape as the `x`.
  1036. Examples:
  1037. >>> activation = nn.HSigmoidQuant(nn.HSigmoid())
  1038. >>> input = Tensor(np.array([[1, 2, 1], [-2, 0, -1]]), mindspore.float32)
  1039. >>> result = activation(input)
  1040. """
  1041. def __init__(self,
  1042. activation,
  1043. ema_decay=0.999,
  1044. per_channel=False,
  1045. num_bits=8,
  1046. symmetric=False,
  1047. narrow_range=False,
  1048. quant_delay=0):
  1049. super(HSigmoidQuant, self).__init__()
  1050. self.fake_quant_act_before = FakeQuantWithMinMax(min_init=-6,
  1051. max_init=6,
  1052. ema=True,
  1053. ema_decay=ema_decay,
  1054. per_channel=per_channel,
  1055. num_bits=num_bits,
  1056. symmetric=symmetric,
  1057. narrow_range=narrow_range,
  1058. quant_delay=quant_delay)
  1059. self.fake_quant_act_after = FakeQuantWithMinMax(min_init=-6,
  1060. max_init=6,
  1061. ema=True,
  1062. ema_decay=ema_decay,
  1063. per_channel=per_channel,
  1064. num_bits=num_bits,
  1065. symmetric=symmetric,
  1066. narrow_range=narrow_range,
  1067. quant_delay=quant_delay)
  1068. if issubclass(activation.__class__, nn.HSigmoid):
  1069. self.act = activation
  1070. else:
  1071. raise ValueError("Activation should be `nn.HSigmoid`")
  1072. def construct(self, x):
  1073. x = self.fake_quant_act_before(x)
  1074. x = self.act(x)
  1075. x = self.fake_quant_act_after(x)
  1076. return x
  1077. def get_origin(self):
  1078. return self.act
  1079. class TensorAddQuant(Cell):
  1080. r"""
  1081. Add Fake Quant OP after TensorAdd OP.
  1082. This part is a more detailed overview of TensorAdd op.
  1083. Args:
  1084. ema_decay (float): Exponential Moving Average algorithm parameter. Default: 0.999.
  1085. per_channel (bool): Quantization granularity based on layer or on channel. Default: False.
  1086. num_bits (int): The bit number of quantization, supporting 4 and 8bits. Default: 8.
  1087. symmetric (bool): The quantization algorithm is symmetric or not. Default: False.
  1088. narrow_range (bool): The quantization algorithm uses narrow range or not. Default: False.
  1089. quant_delay (int): Quantization delay parameters according to the global step. Default: 0.
  1090. Inputs:
  1091. - **x** (Tensor) - The input of TensorAddQuant.
  1092. Outputs:
  1093. Tensor, with the same type and shape as the `x`.
  1094. Examples:
  1095. >>> add_quant = nn.TensorAddQuant()
  1096. >>> input_x = Tensor(np.array([[1, 2, 1], [-2, 0, -1]]), mindspore.float32)
  1097. >>> input_y = Tensor(np.random.randint(-2, 2, (2, 3)), mindspore.float32)
  1098. >>> result = add_quant(input_x, input_y)
  1099. """
  1100. def __init__(self,
  1101. ema_decay=0.999,
  1102. per_channel=False,
  1103. num_bits=8,
  1104. symmetric=False,
  1105. narrow_range=False,
  1106. quant_delay=0):
  1107. super(TensorAddQuant, self).__init__()
  1108. self.fake_quant_act = FakeQuantWithMinMax(min_init=-6,
  1109. max_init=6,
  1110. ema=True,
  1111. ema_decay=ema_decay,
  1112. per_channel=per_channel,
  1113. num_bits=num_bits,
  1114. symmetric=symmetric,
  1115. narrow_range=narrow_range,
  1116. quant_delay=quant_delay)
  1117. self.add = P.TensorAdd()
  1118. def construct(self, x1, x2):
  1119. x = self.add(x1, x2)
  1120. x = self.fake_quant_act(x)
  1121. return x
  1122. class MulQuant(Cell):
  1123. r"""
  1124. Add Fake Quant OP after Mul OP.
  1125. This part is a more detailed overview of Mul op.
  1126. Args:
  1127. ema_decay (float): Exponential Moving Average algorithm parameter. Default: 0.999.
  1128. per_channel (bool): Quantization granularity based on layer or on channel. Default: False.
  1129. num_bits (int): The bit number of quantization, supporting 4 and 8bits. Default: 8.
  1130. symmetric (bool): The quantization algorithm is symmetric or not. Default: False.
  1131. narrow_range (bool): The quantization algorithm uses narrow range or not. Default: False.
  1132. quant_delay (int): Quantization delay parameters according to the global step. Default: 0.
  1133. Inputs:
  1134. - **x** (Tensor) - The input of MulQuant.
  1135. Outputs:
  1136. Tensor, with the same type and shape as the `x`.
  1137. """
  1138. def __init__(self,
  1139. ema_decay=0.999,
  1140. per_channel=False,
  1141. num_bits=8,
  1142. symmetric=False,
  1143. narrow_range=False,
  1144. quant_delay=0):
  1145. super(MulQuant, self).__init__()
  1146. self.fake_quant_act = FakeQuantWithMinMax(min_init=-6,
  1147. max_init=6,
  1148. ema=True,
  1149. ema_decay=ema_decay,
  1150. per_channel=per_channel,
  1151. num_bits=num_bits,
  1152. symmetric=symmetric,
  1153. narrow_range=narrow_range,
  1154. quant_delay=quant_delay)
  1155. self.mul = P.Mul()
  1156. def construct(self, x1, x2):
  1157. x = self.mul(x1, x2)
  1158. x = self.fake_quant_act(x)
  1159. return x
  1160. class QuantBlock(Cell):
  1161. r"""
  1162. A quant block of Conv/Dense, activation layer for Ascend deploy.
  1163. Calculate Conv or Dense in Int8, with Quant and DeQuant.
  1164. Notes:
  1165. This block is only for deploy, and not trainable.
  1166. Args:
  1167. in_channels (int): The number of channels in the input space.
  1168. out_channels (int): The number of channels in the output space.
  1169. weight_init (Union[Tensor, str, Initializer, numbers.Number]): The trainable weight_init parameter. The dtype
  1170. is same as input x. The values of str refer to the function `initializer`. Default: 'normal'.
  1171. bias_init (Union[Tensor, str, Initializer, numbers.Number]): The trainable bias_init parameter. The dtype is
  1172. same as input x. The values of str refer to the function `initializer`. Default: 'zeros'.
  1173. has_bias (bool): Specifies whether the layer uses a bias vector. Default: True.
  1174. activation (str): The regularization function applied to the output of the layer, eg. 'relu'. Default: None.
  1175. batchnorm (bool): Specifies to used batchnorm or not. Default: None.
  1176. activation (string): Specifies activation type. The optional values are as following:
  1177. 'softmax', 'logsoftmax', 'relu', 'relu6', 'tanh', 'gelu', 'sigmoid',
  1178. 'prelu', 'leakyrelu', 'hswish', 'hsigmoid'. Default: None.
  1179. Inputs:
  1180. - **input** (Tensor) - Tensor of shape :math:`(N, in\_channels)`.
  1181. Outputs:
  1182. Tensor of shape :math:`(N, out\_channels)`.
  1183. """
  1184. def __init__(self,
  1185. core_op,
  1186. weight,
  1187. quant_op,
  1188. dequant_op,
  1189. dequant_scale,
  1190. bias=None,
  1191. activation=None):
  1192. super(QuantBlock, self).__init__()
  1193. self.core_op = core_op
  1194. self.weight = weight
  1195. self.quant = quant_op
  1196. self.dequant = dequant_op
  1197. self.dequant_scale = dequant_scale
  1198. self.bias = bias
  1199. self.has_bias = bias is not None
  1200. self.activation = activation
  1201. self.has_act = activation is not None
  1202. self.bias_add = P.BiasAdd()
  1203. def construct(self, x):
  1204. x = self.quant(x)
  1205. if self.has_bias:
  1206. x = self.core_op(x, self.weight)
  1207. x = self.bias_add(x, self.bias)
  1208. else:
  1209. x = self.core_op(x, self.weight)
  1210. x = self.dequant(x, self.dequant_scale)
  1211. x = F.cast(x, mstype.float32)
  1212. if self.has_act:
  1213. x = self.activation(x)
  1214. return x
  1215. def extend_repr(self):
  1216. str_info = f'quant={self.quant}, core_op={type(self.core_op)}, weight=shape[{self.weight.shape}]'
  1217. if self.has_bias:
  1218. str_info = str_info + f', bias=shape[{self.bias.shape}]'
  1219. if self.has_act:
  1220. str_info = str_info + f', activation={self.activation}'
  1221. str_info = str_info + f', dequant={self.dequant}'
  1222. return str_info
  1223. class QuantMindirBlock(Cell):
  1224. """A quant binary block of Conv/Dense, activation layer for export MINDIR model.
  1225. Args:
  1226. core_op (Cell): The operation cell.
  1227. weight (Tensor): The weigth of the cell.
  1228. bias (Tensor): The bias of the cell. Default: None.
  1229. activation (str): The regularization function applied to the output of the layer, eg. 'relu'. Default: None.
  1230. param_dict (dict): The information of the cell.
  1231. """
  1232. def __init__(self,
  1233. core_op,
  1234. weight,
  1235. bias=None,
  1236. activation=None,
  1237. param_dict=None):
  1238. super(QuantMindirBlock, self).__init__()
  1239. self.core_op = core_op
  1240. if activation is not None:
  1241. self.core_op.add_prim_attr("activation_name", activation.__class__.__name__)
  1242. self.core_op.add_prim_attr("filter_maxq", Tensor(param_dict["filter_maxq"]))
  1243. self.core_op.add_prim_attr("filter_minq", Tensor(param_dict["filter_minq"]))
  1244. if param_dict["output_maxq"] is not None:
  1245. self.core_op.add_prim_attr("output_maxq", Tensor(param_dict["output_maxq"]))
  1246. self.core_op.add_prim_attr("output_minq", Tensor(param_dict["output_minq"]))
  1247. self.core_op.add_prim_attr("symmetric", Tensor(param_dict["symmetric"]))
  1248. if hasattr(core_op, 'pad_mode'):
  1249. self.core_op.add_prim_attr("pad_mode", core_op.pad_mode)
  1250. self.core_op.add_prim_attr("num_bits", Tensor(8))
  1251. self.core_op.add_prim_attr("narrow_range", Tensor(False))
  1252. if param_dict["input_maxq"] == 'None':
  1253. self.core_op.add_prim_attr("mean", Tensor(param_dict["mean"]))
  1254. self.core_op.add_prim_attr("std_dev", Tensor(param_dict["std_dev"]))
  1255. elif param_dict["input_maxq"] is not None:
  1256. self.core_op.add_prim_attr("input_maxq", Tensor(param_dict["input_maxq"]))
  1257. self.core_op.add_prim_attr("input_minq", Tensor(param_dict["input_minq"]))
  1258. self.weight = weight
  1259. self.bias = bias
  1260. self.has_bias = bias is not None
  1261. self.activation = activation
  1262. self.has_act = activation is not None
  1263. self.bias_add = P.BiasAdd()
  1264. if isinstance(activation, ReLU):
  1265. self.activation = None
  1266. self.has_act = False
  1267. def construct(self, x):
  1268. if self.has_bias:
  1269. x = self.core_op(x, self.weight)
  1270. x = self.bias_add(x, self.bias)
  1271. else:
  1272. x = self.core_op(x, self.weight)
  1273. return x
  1274. def extend_repr(self):
  1275. str_info = f'core_op={type(self.core_op)}, weight=shape[{self.weight.shape}]'
  1276. if self.has_bias:
  1277. str_info = str_info + f', bias=shape[{self.bias.shape}]'
  1278. if self.has_act:
  1279. str_info = str_info + f', activation={self.activation}'
  1280. return str_info