You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

quant_utils.py 16 kB

5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400
  1. # Copyright 2020 Huawei Technologies Co., Ltd
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. # ============================================================================
  15. """Quantization utils."""
  16. import numpy as np
  17. from mindspore._checkparam import Validator
  18. from ... import nn
  19. __all__ = ["load_nonquant_param_into_quant_net", "query_quant_layers"]
  20. def cal_quantization_params(input_min,
  21. input_max,
  22. data_type,
  23. num_bits=8,
  24. symmetric=False,
  25. narrow_range=False,
  26. neg_trunc=False):
  27. r"""
  28. Calculate quantization params for scale and zero point.
  29. Args:
  30. input_min (numpy.ndarray): The dimension of channel or 1.
  31. input_max (numpy.ndarray): The dimension of channel or 1.
  32. data_type (numpy type) : Can be numpy int8, numpy uint8.
  33. num_bits (int): Quantization number bit, support 4 and 8bit. Default: 8.
  34. symmetric (bool): Whether the quantization algorithm is symmetric or not. Default: False.
  35. narrow_range (bool): Whether the quantization algorithm uses narrow range or not. Default: False.
  36. neg_trunc (bool): Whether the quantization algorithm uses negative truncation or not. Default: False.
  37. Returns:
  38. scale (numpy.ndarray): quantization param.
  39. zero point (numpy.ndarray): quantization param.
  40. """
  41. input_max = np.maximum(0.0, input_max)
  42. input_min = np.minimum(0.0, input_min)
  43. if input_min.shape != input_max.shape:
  44. raise ValueError("input min shape should equal to input max.")
  45. if len(input_min.shape) > 1:
  46. raise ValueError("input min and max shape should be one dim.")
  47. if (input_min > input_max).all():
  48. raise ValueError("input_min min should less than input max.")
  49. if (input_max == input_min).all():
  50. return np.ones(input_min.shape), np.zeros(input_min.shape)
  51. if data_type == np.int8:
  52. quant_min = 0 - 2 ** (num_bits - 1)
  53. quant_max = 2 ** (num_bits - 1) - 1
  54. elif data_type == np.uint8:
  55. quant_min = 0
  56. quant_max = 2 ** num_bits - 1
  57. else:
  58. raise ValueError("Unsupported datatype({})".format(data_type))
  59. if narrow_range:
  60. quant_min = quant_min + 1
  61. # calculate scale
  62. if symmetric and not neg_trunc:
  63. input_max = np.maximum(-input_min, input_max)
  64. input_min = -input_max
  65. scale = (input_max - input_min) / (quant_max - quant_min)
  66. # calculate zero point
  67. zp_double = quant_min - input_min / scale
  68. zp = np.floor(zp_double + 0.5)
  69. return scale, zp
  70. def weight2int(data, scale, zero_point, data_type, num_bits=8, narrow_range=False):
  71. r"""
  72. Calculate int8/uint8 weight from fp32. the formula is defined as:
  73. .. math::
  74. int8/uint8 = round(float/scale) + offset
  75. Args:
  76. data (numpy.ndarray): The dimension of channel or 1. Should be NCHW.
  77. scale (numpy.ndarray): The dimension of channel or 1.
  78. zero_point (numpy.ndarray): The dimension of channel or 1.
  79. data_type (numpy type) : Can be numpy int8, numpy uint8.
  80. num_bits (int): Quantization number bit, support 4 and 8bit. Default: 8.
  81. narrow_range (bool): Whether the quantization algorithm uses narrow range or not. Default: False.
  82. Returns:
  83. weight (numpy.ndarray): The dimension of channel or 1.
  84. """
  85. if scale.shape != zero_point.shape:
  86. raise ValueError("`scale` and `zero_point` should have the same shape.")
  87. if scale.shape[0] < 0:
  88. raise ValueError("`scale` and `zero_point` shape should greater than zero.")
  89. if len(scale.shape) >= 1 and scale.shape[0] > 1:
  90. # for perchannel
  91. if scale.shape[0] == data.shape[0]:
  92. # `Conv2d` or `Dense` op weight
  93. shape_list = [-1] + [1] * len(data.shape[1:])
  94. scale = scale.reshape(shape_list)
  95. zero_point = zero_point.reshape(shape_list)
  96. elif scale.shape[0] == data.shape[1]:
  97. # `DepthwiseConv2d` op weight
  98. shape_list = [1, -1] + [1] * len(data.shape[2:])
  99. scale = scale.reshape(shape_list)
  100. zero_point = zero_point.reshape(shape_list)
  101. else:
  102. raise ValueError("Unsupported weight shape({})".format(data.shape))
  103. if data_type == np.int8:
  104. quant_min = 0 - 2 ** (num_bits - 1)
  105. quant_max = 2 ** (num_bits - 1) - 1
  106. elif data_type == np.uint8:
  107. quant_min = 0
  108. quant_max = 2 ** num_bits - 1
  109. else:
  110. raise ValueError("Unsupported weight datatype({})".format(data_type))
  111. if narrow_range:
  112. quant_min = quant_min + 1
  113. weight_int = np.round((data / scale) + zero_point)
  114. weight_int[weight_int > quant_max] = quant_max
  115. weight_int[weight_int < quant_min] = quant_min
  116. return weight_int
  117. def scale_zp_max_min_from_fake_quant_cell(cell, data_type):
  118. """Get calculate quantization params for scale, zero point, max and min from `FakeQuantWithMinMaxObserver`."""
  119. minq = cell.minq.data.asnumpy()
  120. maxq = cell.maxq.data.asnumpy()
  121. scale, zp = cal_quantization_params(
  122. minq, maxq, data_type,
  123. num_bits=cell.num_bits,
  124. symmetric=cell.symmetric,
  125. narrow_range=cell.narrow_range,
  126. neg_trunc=cell.neg_trunc)
  127. return scale, zp, maxq, minq
  128. def scale_zp_from_data(op, minq, maxq, data_type):
  129. r"""
  130. Get calculate quantization params for scale and zero point.
  131. Calculate from `FakeQuantWithMinMax`'s Parameter or Fake quant primitive.
  132. Args:
  133. op (Primitive): Fake quant primitive `mindspore.ops.operation.FakeQuantPerLayer` or
  134. `mindspore.ops.operation.FakeQuantPerChannel`
  135. minq (Parameter): Parameter `minq` of `mindspore.nn.layer.FakeQuantWithMinMax`
  136. maxq (Parameter): Parameter `maxq` of `mindspore.nn.layer.FakeQuantWithMinMax`
  137. data_type (numpy type): Can be `numpy.int8` or `numpy.uint8`.
  138. Returns:
  139. scale (numpy.ndarray): quantization param.
  140. zero point (numpy.ndarray): quantization param.
  141. """
  142. minq = minq.data.asnumpy()
  143. maxq = maxq.data.asnumpy()
  144. scale, zp = cal_quantization_params(
  145. minq, maxq, data_type,
  146. num_bits=op.num_bits,
  147. symmetric=op.symmetric,
  148. narrow_range=op.narrow_range)
  149. return scale, zp
  150. def scale_zp_max_min_from_data(op, minq, maxq, data_type):
  151. """Get calculate quantization params for scale, zero point, max and min."""
  152. minq = minq.data.asnumpy()
  153. maxq = maxq.data.asnumpy()
  154. scale, zp = cal_quantization_params(
  155. minq, maxq, data_type,
  156. num_bits=op.num_bits,
  157. symmetric=op.symmetric,
  158. narrow_range=op.narrow_range)
  159. return scale, zp, maxq, minq
  160. def fold_batchnorm(weight, cell_quant):
  161. r"""
  162. Fold the batchnorm in `Conv2dBnFoldQuant` to weight.
  163. Calculate from `FakeQuantWithMinMax`'s Parameter or Fake quant primitive.
  164. Args:
  165. weight (numpy.ndarray): Weight of `cell_quant`.
  166. cell_quant (Cell): Object of `mindspore.nn.layer.Conv2dBnFoldQuant`.
  167. Returns:
  168. weight (numpy.ndarray): Folded weight.
  169. bias (numpy.ndarray): Folded bias.
  170. """
  171. variance = cell_quant.moving_variance.data.asnumpy()
  172. mean = cell_quant.moving_mean.data.asnumpy()
  173. gamma = cell_quant.gamma.data.asnumpy()
  174. beta = cell_quant.beta.data.asnumpy()
  175. epsilon = cell_quant.eps
  176. sigma = np.sqrt(variance + epsilon)
  177. if gamma.shape[0] == weight.shape[0]:
  178. # `Conv2d` or `Dense` op weight
  179. shape_list = [-1] + [1] * len(weight.shape[1:])
  180. _gamma = gamma.reshape(shape_list)
  181. _sigma = sigma.reshape(shape_list)
  182. elif gamma.shape[0] == weight.shape[1]:
  183. # `DepthwiseConv2d` op weight
  184. shape_list = [1, -1] + [1] * len(weight.shape[2:])
  185. _gamma = gamma.reshape(shape_list)
  186. _sigma = sigma.reshape(shape_list)
  187. else:
  188. raise ValueError("Unsupported weight shape({})".format(weight.shape))
  189. weight = weight * _gamma / _sigma
  190. bias = beta - gamma * mean / sigma
  191. return weight, bias
  192. def without_fold_batchnorm(weight, cell_quant):
  193. r"""
  194. Fold the batchnorm in `Conv2dBnWithoutFoldQuant` to weight.
  195. Calculate from `FakeQuantWithMinMax`'s Parameter or Fake quant primitive.
  196. Args:
  197. weight (numpy.ndarray): Weight of `cell_quant`.
  198. cell_quant (Cell): Object of `mindspore.nn.layer.Conv2dBnWithoutFoldQuant`.
  199. Returns:
  200. weight (numpy.ndarray): whihout folded weight.
  201. bias (numpy.ndarray): without folded bias.
  202. """
  203. variance = cell_quant.batchnorm.moving_variance.data.asnumpy()
  204. mean = cell_quant.batchnorm.moving_mean.data.asnumpy()
  205. gamma = cell_quant.batchnorm.gamma.data.asnumpy()
  206. beta = cell_quant.batchnorm.beta.data.asnumpy()
  207. epsilon = cell_quant.batchnorm.eps
  208. sigma = np.sqrt(variance + epsilon)
  209. if gamma.shape[0] == weight.shape[0]:
  210. # `Conv2d` or `Dense` op weight
  211. shape_list = [-1] + [1] * len(weight.shape[1:])
  212. _gamma = gamma.reshape(shape_list)
  213. _sigma = sigma.reshape(shape_list)
  214. elif gamma.shape[0] == weight.shape[1]:
  215. # `DepthwiseConv2d` op weight
  216. shape_list = [1, -1] + [1] * len(weight.shape[2:])
  217. _gamma = gamma.reshape(shape_list)
  218. _sigma = sigma.reshape(shape_list)
  219. else:
  220. raise ValueError("Unsupported weight shape({})".format(weight.shape))
  221. weight = weight * _gamma / _sigma
  222. bias = beta - gamma * mean / sigma
  223. return weight, bias
  224. def compute_KL_threshold(data, bitwidth):
  225. r"""
  226. Using KL-J Distance to calculate the clip threshold.
  227. Args:
  228. - **data** (NumpyArray) - Data observed to calculate the threshold for quantization,
  229. - **bitwidth** (QuantDtype) - The datatype of quantization.
  230. Outputs:
  231. Tensor with Shape 1. Threshold to calculate the data.
  232. """
  233. bitwidth = bitwidth.num_bits
  234. data_min = 0
  235. data_max = np.abs(data).max()
  236. if data_max < 1e-5:
  237. return 1e-5
  238. hist, bin_edges = np.histogram(np.abs(data), bins='sqrt', range=(data_min, data_max), density=True)
  239. hist = hist / np.sum(hist)
  240. cumsum = np.cumsum(hist)
  241. bit_pow_range = pow(2, int(bitwidth) - 1)
  242. threshold = []
  243. scaling_factor = []
  244. kl = []
  245. if bit_pow_range + 1 > len(bin_edges) - 1:
  246. th_layer_out = bin_edges[-1]
  247. return float(th_layer_out)
  248. for i in range(bit_pow_range + 1, len(bin_edges), 1):
  249. threshold_tmp = (i + 0.5) * (bin_edges[1] - bin_edges[0])
  250. threshold = np.concatenate((threshold, [threshold_tmp]))
  251. scaling_factor_tmp = threshold_tmp / (bit_pow_range - 1)
  252. scaling_factor = np.concatenate((scaling_factor, [scaling_factor_tmp]))
  253. # forward interpolation
  254. cumsum_tmp = np.copy(cumsum)
  255. cumsum_tmp[(i - 1):] = 1
  256. fwd_x = np.linspace(0.0, 1.0, bit_pow_range)
  257. fwd_xp = np.linspace(0.0, 1.0, i)
  258. fwd_fp = cumsum_tmp[:i]
  259. forward_interp = np.interp(fwd_x, fwd_xp, fwd_fp)
  260. # backward interpolation
  261. bwd_x = np.linspace(0.0, 1.0, i)
  262. bwd_xp = np.linspace(0.0, 1.0, bit_pow_range)
  263. bwd_fp = forward_interp
  264. backward_interp = np.interp(bwd_x, bwd_xp, bwd_fp)
  265. cumsum_tmp[:i] = backward_interp
  266. kl_tmp = np.sum((cumsum - cumsum_tmp) * np.log2(cumsum / cumsum_tmp)) # Kullback-Leibler-J
  267. kl = np.concatenate((kl, [kl_tmp]))
  268. th_layer_out = threshold[np.argmin(kl)]
  269. threshold = float(th_layer_out)
  270. if threshold < 1e-5:
  271. threshold = 1e-5
  272. return threshold
  273. def query_quant_layers(network):
  274. r"""
  275. Query the network's quantization strategy of each quantized layer and print it to the screen, note that all the
  276. quantization layers are queried before graph compile optimization in the graph mode, thus may be appear some
  277. redundant quantized layers, which are not exist in practical execution.
  278. Input:
  279. network (Cell): input network
  280. Returns:
  281. None
  282. """
  283. network = Validator.check_isinstance("network", network, nn.Cell)
  284. tplt = "{0:60}\t{1:10}"
  285. for cell_and_name in network.cells_and_names():
  286. cell_name = cell_and_name[0]
  287. cell = cell_and_name[1]
  288. if isinstance(cell, nn.FakeQuantWithMinMaxObserver):
  289. print(tplt.format(cell_name, cell.quant_dtype))
  290. def load_nonquant_param_into_quant_net(quant_model, params_dict, quant_new_params=None):
  291. r"""
  292. Load fp32 model parameters into quantization model.
  293. Args:
  294. quant_model(Cell): quantization model.
  295. params_dict(dict): parameter dict that stores fp32 parameters.
  296. quant_new_params(list): parameters that exist in quantitative network but not in unquantitative network.
  297. Returns:
  298. None
  299. """
  300. iterable_dict = {
  301. 'weight': iter(list(filter(lambda item: item[0].endswith('weight'), params_dict.items()))),
  302. 'bias': iter(list(filter(lambda item: item[0].endswith('bias'), params_dict.items()))),
  303. 'gamma': iter(list(filter(lambda item: item[0].endswith('gamma'), params_dict.items()))),
  304. 'beta': iter(list(filter(lambda item: item[0].endswith('beta'), params_dict.items()))),
  305. 'moving_mean': iter(list(filter(lambda item: item[0].endswith('moving_mean'), params_dict.items()))),
  306. 'moving_variance': iter(list(filter(lambda item: item[0].endswith('moving_variance'), params_dict.items()))),
  307. 'minq': iter(list(filter(lambda item: item[0].endswith('minq'), params_dict.items()))),
  308. 'maxq': iter(list(filter(lambda item: item[0].endswith('maxq'), params_dict.items()))),
  309. 'quant_max': iter(list(filter(lambda item: item[0].endswith('quant_max'), params_dict.items())))
  310. }
  311. for name, param in quant_model.parameters_and_names():
  312. key_name = name.split(".")[-1]
  313. if key_name not in iterable_dict.keys():
  314. if key_name not in quant_new_params:
  315. raise ValueError(f"Can't find match parameter in ckpt,param name = {name}")
  316. continue
  317. value_param = next(iterable_dict[key_name], None)
  318. if value_param:
  319. param.set_data(value_param[1].data)
  320. print(f'init model param {name} with checkpoint param {value_param[0]}')
  321. # Perform KL_init when learned scale quantization is executed.
  322. for cell_and_name in quant_model.cells_and_names():
  323. cell = cell_and_name[1]
  324. if isinstance(cell, (nn.Conv2dBnFoldQuantOneConv, nn.Conv2dBnFoldQuant, nn.Conv2dBnWithoutFoldQuant,
  325. nn.Conv2dQuant, nn.DenseQuant)) and cell.fake_quant_weight.mode == "LEARNED_SCALE":
  326. subcell_weight_para = cell.weight.data.asnumpy()
  327. if hasattr(cell, 'gamma'):
  328. scale_factor = (cell.gamma.data.asnumpy() /
  329. np.sqrt(cell.moving_variance.data.asnumpy() + 1e-5))
  330. subcell_weight_para = subcell_weight_para * scale_factor.reshape(-1, 1, 1, 1)
  331. if cell.fake_quant_weight.per_channel:
  332. max_init = [compute_KL_threshold(weight_para_each, cell.fake_quant_weight.quant_dtype)
  333. for weight_para_each in subcell_weight_para]
  334. min_init = [-x for x in max_init]
  335. else:
  336. max_init = [compute_KL_threshold(subcell_weight_para, cell.fake_quant_weight.quant_dtype)]
  337. min_init = [-x for x in max_init]
  338. cell.fake_quant_weight.reset(quant_dtype=cell.fake_quant_weight.quant_dtype,
  339. min_init=min_init, max_init=max_init)