You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

quant_export.py 13 kB

5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290
  1. # Copyright 2020 Huawei Technologies Co., Ltd
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. # ============================================================================
  15. """Export for quantization."""
  16. import copy
  17. import numpy as np
  18. from ... import nn, ops
  19. from ..._checkparam import Validator
  20. from ...common import Tensor
  21. from ...common import dtype as mstype
  22. from ...common.api import _executor
  23. from ...nn.layer import quant
  24. from ...ops import operations as P
  25. from ...ops.operations import _inner_ops as inner
  26. from ..quant import quant_utils
  27. from ..quant.qat import QuantizationAwareTraining, _AddFakeQuantInput, _AddFakeQuantAfterSubCell
  28. __all__ = ["ExportToQuantInferNetwork", "ExportManualQuantNetwork"]
  29. class ExportToQuantInferNetwork:
  30. """
  31. Convert quantization aware network to infer network.
  32. Args:
  33. network (Cell): MindSpore network API `convert_quant_network`.
  34. inputs (Tensor): Input tensors of the `quantization aware training network`.
  35. mean (int): Input data mean. Default: 127.5.
  36. std_dev (int, float): Input data variance. Default: 127.5.
  37. is_mindir (bool): Whether is MINDIR format. Default: False.
  38. Returns:
  39. Cell, Infer network.
  40. """
  41. __quant_op_name__ = ["Add", "Sub", "Mul", "RealDiv"]
  42. def __init__(self, network, mean, std_dev, *inputs, is_mindir=False):
  43. network = Validator.check_isinstance('network', network, (nn.Cell,))
  44. self.input_scale = 1 / std_dev
  45. self.input_zero_point = round(mean)
  46. self.data_type = mstype.int8
  47. self.network = copy.deepcopy(network)
  48. self.all_parameters = {p.name: p for p in self.network.get_parameters()}
  49. self.get_inputs_table(inputs)
  50. self.mean = mean
  51. self.std_dev = std_dev
  52. self.is_mindir = is_mindir
  53. def get_inputs_table(self, inputs):
  54. """Get the support info for quant export."""
  55. phase_name = 'export_quant'
  56. graph_id, _ = _executor.compile(self.network, *inputs, phase=phase_name, do_convert=False)
  57. self.quant_info_table = _executor.fetch_info_for_quant_export(graph_id)
  58. def run(self):
  59. """Start to convert."""
  60. self.network.update_cell_prefix()
  61. network = self.network
  62. if isinstance(network, _AddFakeQuantInput):
  63. network = network.network
  64. network = self._convert_quant2deploy(network)
  65. return network
  66. def _get_quant_block(self, cell_core, activation, fake_quant_a_out):
  67. """convet network's quant subcell to deploy subcell"""
  68. # Calculate the scale and zero point
  69. w_minq_name = cell_core.fake_quant_weight.minq.name
  70. np_type = mstype.dtype_to_nptype(self.data_type)
  71. param_dict = dict()
  72. param_dict["filter_maxq"] = None
  73. param_dict["filter_minq"] = None
  74. param_dict["output_maxq"] = None
  75. param_dict["output_minq"] = None
  76. param_dict["input_maxq"] = None
  77. param_dict["input_minq"] = None
  78. param_dict["mean"] = self.mean
  79. param_dict["std_dev"] = self.std_dev
  80. param_dict["symmetric"] = cell_core.fake_quant_weight.symmetric
  81. scale_w, zp_w, param_dict["filter_maxq"], param_dict["filter_minq"] = \
  82. quant_utils.scale_zp_max_min_from_fake_quant_cell(cell_core.fake_quant_weight, np_type)
  83. if fake_quant_a_out is not None:
  84. _, _, param_dict["output_maxq"], param_dict["output_minq"] = \
  85. quant_utils.scale_zp_max_min_from_fake_quant_cell(fake_quant_a_out, np_type)
  86. info = self.quant_info_table.get(w_minq_name, None)
  87. if info:
  88. fake_quant_a_in_op, minq_name = info
  89. if minq_name == 'input':
  90. scale_a_in, zp_a_in, param_dict["input_maxq"], param_dict["input_minq"] = \
  91. self.input_scale, self.input_zero_point, 'None', 'None'
  92. else:
  93. maxq = self.all_parameters[minq_name[:-4] + "maxq"]
  94. minq = self.all_parameters[minq_name]
  95. scale_a_in, zp_a_in, param_dict["input_maxq"], param_dict["input_minq"] = \
  96. quant_utils.scale_zp_max_min_from_data(fake_quant_a_in_op, minq, maxq, np_type)
  97. else:
  98. # skip quant layer
  99. scale_a_in, zp_a_in = 1.0, 0.0
  100. # Build the `Quant` `Dequant` op.
  101. # Quant only support perlayer version. Need check here.
  102. quant_op = inner.Quant(1 / float(scale_a_in), float(zp_a_in))
  103. scale_deq = scale_a_in * scale_w
  104. dequant_op = inner.Dequant()
  105. if isinstance(activation, _AddFakeQuantAfterSubCell):
  106. activation = activation.subcell
  107. elif hasattr(activation, "get_origin"):
  108. activation = activation.get_origin()
  109. # get the `weight` and `bias`
  110. weight = cell_core.weight.data.asnumpy()
  111. bias = None
  112. if isinstance(cell_core, (quant.DenseQuant, quant.Conv2dQuant)):
  113. if cell_core.has_bias:
  114. bias = cell_core.bias.data.asnumpy()
  115. elif isinstance(cell_core, quant.Conv2dBnFoldQuant):
  116. weight, bias = quant_utils.fold_batchnorm(weight, cell_core)
  117. elif isinstance(cell_core, quant.Conv2dBnWithoutFoldQuant):
  118. weight, bias = quant_utils.without_fold_batchnorm(weight, cell_core)
  119. weight_b = weight
  120. bias_b = bias
  121. # apply the quant
  122. fake_quant_weight_op = cell_core.fake_quant_weight.fake_quant_infer
  123. weight = quant_utils.weight2int(weight, scale_w, zp_w, np_type, fake_quant_weight_op.num_bits,
  124. fake_quant_weight_op.narrow_range)
  125. if bias is not None:
  126. bias = Tensor(bias / scale_a_in / scale_w, mstype.int32)
  127. # fuse parameter
  128. # |--------|47:40|--------|39:32|--------|31:0|
  129. # offset_w [8] shift_N [8] deq_scale [32]
  130. float32_deq_scale = scale_deq.astype(np.float32)
  131. uint32_deq_scale = np.frombuffer(float32_deq_scale, np.uint32)
  132. scale_length = scale_deq.size # channel
  133. dequant_param = np.zeros(scale_length, dtype=np.uint64)
  134. for index in range(scale_length):
  135. dequant_param[index] += uint32_deq_scale[index]
  136. scale_deq = Tensor(dequant_param, mstype.uint64)
  137. # get op
  138. if isinstance(cell_core, quant.DenseQuant):
  139. op_core = P.MatMul()
  140. weight = np.transpose(weight)
  141. weight_b = np.transpose(weight_b)
  142. else:
  143. op_core = cell_core.conv
  144. weight = Tensor(weight, self.data_type)
  145. weight_b = Tensor(weight_b)
  146. if bias_b is not None:
  147. bias_b = Tensor(bias_b, mstype.float32)
  148. if self.is_mindir:
  149. block = quant.QuantMindirBlock(op_core, weight_b, bias_b, activation, param_dict)
  150. else:
  151. block = quant.QuantBlock(op_core, weight, quant_op, dequant_op, scale_deq, bias, activation)
  152. return block
  153. def _convert_quant2deploy(self, network):
  154. """Convert network's all quant subcell to deploy subcell."""
  155. cells = network.name_cells()
  156. change = False
  157. for name in cells:
  158. subcell = cells[name]
  159. if subcell == network:
  160. continue
  161. cell_core = None
  162. fake_quant_act = None
  163. activation = None
  164. if isinstance(subcell, nn.Conv2dBnAct):
  165. cell_core = subcell.conv
  166. activation = subcell.activation
  167. fake_quant_act = activation.fake_quant_act if hasattr(activation, "fake_quant_act") else None
  168. elif isinstance(subcell, nn.DenseBnAct):
  169. cell_core = subcell.dense
  170. activation = subcell.activation
  171. fake_quant_act = activation.fake_quant_act if hasattr(activation, "fake_quant_act") else None
  172. if cell_core is not None:
  173. new_subcell = self._get_quant_block(cell_core, activation, fake_quant_act)
  174. if new_subcell:
  175. prefix = subcell.param_prefix
  176. new_subcell.update_parameters_name(prefix + '.')
  177. network.insert_child_to_cell(name, new_subcell)
  178. change = True
  179. elif isinstance(subcell, _AddFakeQuantAfterSubCell):
  180. op = subcell.subcell
  181. if op.name in QuantizationAwareTraining.__quant_op_name__ and isinstance(op, ops.Primitive):
  182. if self.is_mindir:
  183. op.add_prim_attr('output_maxq', Tensor(subcell.fake_quant_act.maxq.data.asnumpy()))
  184. op.add_prim_attr('output_minq', Tensor(subcell.fake_quant_act.minq.data.asnumpy()))
  185. network.__delattr__(name)
  186. network.__setattr__(name, op)
  187. change = True
  188. else:
  189. self._convert_quant2deploy(subcell)
  190. if isinstance(network, nn.SequentialCell) and change:
  191. network.cell_list = list(network.cells())
  192. return network
  193. class ExportManualQuantNetwork(ExportToQuantInferNetwork):
  194. """
  195. Convert manual quantization aware network to infer network.
  196. Args:
  197. network (Cell): MindSpore network API `convert_quant_network`.
  198. inputs (Tensor): Input tensors of the `quantization aware training network`.
  199. mean (int): Input data mean. Default: 127.5.
  200. std_dev (int, float): Input data variance. Default: 127.5.
  201. is_mindir (bool): Whether is MINDIR format. Default: False.
  202. Returns:
  203. Cell, Infer network.
  204. """
  205. __quant_op_name__ = ["Add", "Sub", "Mul", "RealDiv"]
  206. def __init__(self, network, mean, std_dev, *inputs, is_mindir=False):
  207. super(ExportManualQuantNetwork, self).__init__(network, mean, std_dev, *inputs, is_mindir)
  208. self.upcell = None
  209. self.upname = None
  210. def _convert_quant2deploy(self, network):
  211. """Convert network's all quant subcell to deploy subcell."""
  212. cells = network.name_cells()
  213. change = False
  214. for name in cells:
  215. subcell = cells[name]
  216. if subcell == network:
  217. continue
  218. if isinstance(subcell, nn.Conv2dBnAct):
  219. network, change = self._convert_subcell(network, change, name, subcell)
  220. elif isinstance(subcell, nn.DenseBnAct):
  221. network, change = self._convert_subcell(network, change, name, subcell, conv=False)
  222. elif isinstance(subcell, (quant.Conv2dBnFoldQuant, quant.Conv2dBnWithoutFoldQuant,
  223. quant.Conv2dQuant, quant.DenseQuant)):
  224. network, change = self._convert_subcell(network, change, name, subcell, core=False)
  225. elif isinstance(subcell, quant.FakeQuantWithMinMaxObserver) and self.upcell:
  226. np_type = mstype.dtype_to_nptype(self.data_type)
  227. _, _, maxq, minq = quant_utils.scale_zp_max_min_from_fake_quant_cell(subcell, np_type)
  228. self.upcell.core_op.add_prim_attr('output_maxq', Tensor(maxq))
  229. self.upcell.core_op.add_prim_attr('output_minq', Tensor(minq))
  230. network.insert_child_to_cell(self.upname, self.upcell)
  231. elif isinstance(subcell, _AddFakeQuantAfterSubCell):
  232. op = subcell.subcell
  233. if op.name in QuantizationAwareTraining.__quant_op_name__ and isinstance(op, ops.Primitive):
  234. if self.is_mindir:
  235. op.add_prim_attr('output_maxq', Tensor(subcell.fake_quant_act.maxq.data.asnumpy()))
  236. op.add_prim_attr('output_minq', Tensor(subcell.fake_quant_act.minq.data.asnumpy()))
  237. network.__delattr__(name)
  238. network.__setattr__(name, op)
  239. change = True
  240. else:
  241. self.upcell, self.upname = None, None
  242. self._convert_quant2deploy(subcell)
  243. if isinstance(network, nn.SequentialCell) and change:
  244. network.cell_list = list(network.cells())
  245. return network
  246. def _convert_subcell(self, network, change, name, subcell, core=True, conv=True):
  247. """Convert subcell to ant subcell."""
  248. if core:
  249. cell_core = subcell.conv if conv else subcell.dense
  250. activation = subcell.activation
  251. fake_quant_act = activation.fake_quant_act
  252. else:
  253. cell_core = subcell
  254. activation = None
  255. fake_quant_act = None
  256. new_subcell = self._get_quant_block(cell_core, activation, fake_quant_act)
  257. if new_subcell:
  258. prefix = subcell.param_prefix
  259. new_subcell.update_parameters_name(prefix + '.')
  260. self.upcell = None if core else new_subcell
  261. self.upname = None if core else name
  262. network.insert_child_to_cell(name, new_subcell)
  263. change = True
  264. return network, change