| @@ -17,9 +17,7 @@ | |||
| import copy | |||
| import numpy as np | |||
| import mindspore.context as context | |||
| from ... import log as logger | |||
| from ... import nn, ops | |||
| from ..._checkparam import Validator | |||
| from ...common import Tensor | |||
| @@ -28,12 +26,11 @@ from ...common.api import _executor | |||
| from ...nn.layer import quant | |||
| from ...ops import operations as P | |||
| from ...ops.operations import _inner_ops as inner | |||
| from ...train import serialization | |||
| from ..quant import quant_utils | |||
| from ..quant.qat import QuantizationAwareTraining, _AddFakeQuantInput, _AddFakeQuantAfterSubCell | |||
| __all__ = ["export", "manual_export"] | |||
| __all__ = ["ExportToQuantInferNetwork", "ExportManualQuantNetwork"] | |||
| class ExportToQuantInferNetwork: | |||
| """ | |||
| @@ -109,14 +106,11 @@ class ExportToQuantInferNetwork: | |||
| else: | |||
| maxq = self.all_parameters[minq_name[:-4] + "maxq"] | |||
| minq = self.all_parameters[minq_name] | |||
| if self.is_mindir: | |||
| scale_a_in, zp_a_in, param_dict["input_maxq"], param_dict["input_minq"] = \ | |||
| quant_utils.scale_zp_max_min_from_data(fake_quant_a_in_op, minq, maxq, np_type) | |||
| else: | |||
| scale_a_in, zp_a_in = quant_utils.scale_zp_from_data(fake_quant_a_in_op, minq, maxq, np_type) | |||
| scale_a_in, zp_a_in, param_dict["input_maxq"], param_dict["input_minq"] = \ | |||
| quant_utils.scale_zp_max_min_from_data(fake_quant_a_in_op, minq, maxq, np_type) | |||
| else: | |||
| logger.warning(f"Can not find `fake_quant` from input with `fake_quant.minq` {w_minq_name}") | |||
| return None | |||
| # skip quant layer | |||
| scale_a_in, zp_a_in = 1.0, 0.0 | |||
| # Build the `Quant` `Dequant` op. | |||
| # Quant only support perlayer version. Need check here. | |||
| @@ -217,50 +211,9 @@ class ExportToQuantInferNetwork: | |||
| network.cell_list = list(network.cells()) | |||
| return network | |||
| def export(network, *inputs, file_name, mean=127.5, std_dev=127.5, file_format='AIR'): | |||
| """ | |||
| Exports MindSpore quantization predict model to deploy with AIR. | |||
| Args: | |||
| network (Cell): MindSpore network produced by `convert_quant_network`. | |||
| inputs (Tensor): Inputs of the `quantization aware training network`. | |||
| file_name (str): File name of model to export. | |||
| mean (int, float): Input data mean. Default: 127.5. | |||
| std_dev (int, float): Input data variance. Default: 127.5. | |||
| file_format (str): MindSpore currently supports 'AIR' and 'MINDIR' format for exported | |||
| quantization aware model. Default: 'AIR'. | |||
| - AIR: Graph Engine Intermidiate Representation. An intermidiate representation format of | |||
| Ascend model. | |||
| - MINDIR: MindSpore Native Intermidiate Representation for Anf. An intermidiate representation format | |||
| for MindSpore models. | |||
| Recommended suffix for output file is '.mindir'. | |||
| """ | |||
| supported_device = ["Ascend", "GPU"] | |||
| supported_formats = ['AIR', 'MINDIR'] | |||
| mean = Validator.check_type("mean", mean, (int, float)) | |||
| std_dev = Validator.check_type("std_dev", std_dev, (int, float)) | |||
| if context.get_context('device_target') not in supported_device: | |||
| raise KeyError("Unsupported {} device target.".format(context.get_context('device_target'))) | |||
| if file_format not in supported_formats: | |||
| raise ValueError('Illegal file format {}.'.format(file_format)) | |||
| network.set_train(False) | |||
| if file_format == "MINDIR": | |||
| exporter = ExportToQuantInferNetwork(network, mean, std_dev, *inputs, is_mindir=True) | |||
| else: | |||
| exporter = ExportToQuantInferNetwork(network, mean, std_dev, *inputs) | |||
| deploy_net = exporter.run() | |||
| serialization.export(deploy_net, *inputs, file_name=file_name, file_format=file_format) | |||
| class ExportManualQuantNetwork: | |||
| class ExportManualQuantNetwork(ExportToQuantInferNetwork): | |||
| """ | |||
| Convert anual quantization aware network to infer network. | |||
| Convert manual quantization aware network to infer network. | |||
| Args: | |||
| network (Cell): MindSpore network API `convert_quant_network`. | |||
| @@ -275,35 +228,11 @@ class ExportManualQuantNetwork: | |||
| __quant_op_name__ = ["TensorAdd", "Sub", "Mul", "RealDiv"] | |||
| def __init__(self, network, mean, std_dev, *inputs, is_mindir=False): | |||
| network = Validator.check_isinstance('network', network, (nn.Cell,)) | |||
| self.input_scale = 1 / std_dev | |||
| self.input_zero_point = round(mean) | |||
| self.data_type = mstype.int8 | |||
| self.network = copy.deepcopy(network) | |||
| self.all_parameters = {p.name: p for p in self.network.get_parameters()} | |||
| self.get_inputs_table(inputs) | |||
| self.mean = mean | |||
| self.std_dev = std_dev | |||
| self.is_mindir = is_mindir | |||
| super(ExportManualQuantNetwork, self).__init__(network, mean, std_dev, *inputs, is_mindir) | |||
| self.upcell = None | |||
| self.upname = None | |||
| def get_inputs_table(self, inputs): | |||
| """Get the support info for quant export.""" | |||
| phase_name = 'export_quant' | |||
| graph_id, _ = _executor.compile(self.network, *inputs, phase=phase_name, do_convert=False) | |||
| self.quant_info_table = _executor.fetch_info_for_quant_export(graph_id) | |||
| def run(self): | |||
| """Start to convert.""" | |||
| self.network.update_cell_prefix() | |||
| network = self.network | |||
| if isinstance(network, _AddFakeQuantInput): | |||
| network = network.network | |||
| network = self._convert_manual_network(network) | |||
| return network | |||
| def _convert_manual_network(self, network): | |||
| def _convert_quant2deploy(self, network): | |||
| """Convert network's all quant subcell to deploy subcell.""" | |||
| cells = network.name_cells() | |||
| change = False | |||
| @@ -335,7 +264,7 @@ class ExportManualQuantNetwork: | |||
| change = True | |||
| else: | |||
| self.upcell, self.upname = None, None | |||
| self._convert_manual_network(subcell) | |||
| self._convert_quant2deploy(subcell) | |||
| if isinstance(network, nn.SequentialCell) and change: | |||
| network.cell_list = list(network.cells()) | |||
| return network | |||
| @@ -359,134 +288,3 @@ class ExportManualQuantNetwork: | |||
| network.insert_child_to_cell(name, new_subcell) | |||
| change = True | |||
| return network, change | |||
| def _get_quant_block(self, cell_core, activation, fake_quant_a_out): | |||
| """convet network's quant subcell to deploy subcell""" | |||
| w_minq_name = cell_core.fake_quant_weight.minq.name | |||
| np_type = mstype.dtype_to_nptype(self.data_type) | |||
| param_dict = dict() | |||
| param_dict["filter_maxq"] = None | |||
| param_dict["filter_minq"] = None | |||
| param_dict["output_maxq"] = None | |||
| param_dict["output_minq"] = None | |||
| param_dict["input_maxq"] = None | |||
| param_dict["input_minq"] = None | |||
| param_dict["mean"] = self.mean | |||
| param_dict["std_dev"] = self.std_dev | |||
| param_dict["symmetric"] = cell_core.fake_quant_weight.symmetric | |||
| scale_w, zp_w, param_dict["filter_maxq"], param_dict["filter_minq"] = \ | |||
| quant_utils.scale_zp_max_min_from_fake_quant_cell(cell_core.fake_quant_weight, np_type) | |||
| if fake_quant_a_out is not None: | |||
| _, _, param_dict["output_maxq"], param_dict["output_minq"] = \ | |||
| quant_utils.scale_zp_max_min_from_fake_quant_cell(fake_quant_a_out, np_type) | |||
| info = self.quant_info_table.get(w_minq_name, None) | |||
| if info: | |||
| fack_quant_a_in_op, minq_name = info | |||
| if minq_name == 'input': | |||
| scale_a_in, zp_a_in, param_dict["input_maxq"], param_dict["input_minq"] = \ | |||
| self.input_scale, self.input_zero_point, 'None', 'None' | |||
| else: | |||
| maxq = self.all_parameters[minq_name[:-4] + "maxq"] | |||
| minq = self.all_parameters[minq_name] | |||
| scale_a_in, zp_a_in, param_dict["input_maxq"], param_dict["input_minq"] = \ | |||
| quant_utils.scale_zp_max_min_from_data(fack_quant_a_in_op, minq, maxq, np_type) | |||
| else: | |||
| # skip quant layer | |||
| scale_a_in, zp_a_in = 1, 0 | |||
| # Build the `Quant` `Dequant` op. | |||
| # Quant only support perlayer version. Need check here. | |||
| quant_op = inner.Quant(1 / float(scale_a_in), float(zp_a_in)) | |||
| scale_deq = scale_a_in * scale_w | |||
| dequant_op = inner.Dequant() | |||
| if isinstance(activation, _AddFakeQuantAfterSubCell): | |||
| activation = activation.subcell | |||
| elif hasattr(activation, "get_origin"): | |||
| activation = activation.get_origin() | |||
| # get the `weight` and `bias` | |||
| weight = cell_core.weight.data.asnumpy() | |||
| bias = None | |||
| if isinstance(cell_core, (quant.DenseQuant, quant.Conv2dQuant)): | |||
| if cell_core.has_bias: | |||
| bias = cell_core.bias.data.asnumpy() | |||
| elif isinstance(cell_core, quant.Conv2dBnFoldQuant): | |||
| weight, bias = quant_utils.fold_batchnorm(weight, cell_core) | |||
| elif isinstance(cell_core, quant.Conv2dBnWithoutFoldQuant): | |||
| weight, bias = quant_utils.without_fold_batchnorm(weight, cell_core) | |||
| weight_b = weight | |||
| bias_b = bias | |||
| # apply the quant | |||
| fake_quant_weight_op = cell_core.fake_quant_weight.fake_quant_infer | |||
| weight = quant_utils.weight2int(weight, scale_w, zp_w, np_type, fake_quant_weight_op.num_bits, | |||
| fake_quant_weight_op.narrow_range) | |||
| if bias is not None: | |||
| bias = Tensor(bias / scale_a_in / scale_w, mstype.int32) | |||
| float32_deq_scale = scale_deq.astype(np.float32) | |||
| uint32_deq_scale = np.frombuffer(float32_deq_scale, np.uint32) | |||
| scale_length = scale_deq.size # channel | |||
| dequant_param = np.zeros(scale_length, dtype=np.uint64) | |||
| for index in range(scale_length): | |||
| dequant_param[index] += uint32_deq_scale[index] | |||
| scale_deq = Tensor(dequant_param, mstype.uint64) | |||
| # get op | |||
| if isinstance(cell_core, quant.DenseQuant): | |||
| op_core = P.MatMul() | |||
| weight = np.transpose(weight) | |||
| weight_b = np.transpose(weight_b) | |||
| else: | |||
| op_core = cell_core.conv | |||
| weight = Tensor(weight, self.data_type) | |||
| weight_b = Tensor(weight_b) | |||
| if bias_b is not None: | |||
| bias_b = Tensor(bias_b, mstype.float32) | |||
| if self.is_mindir: | |||
| block = quant.QuantMindirBlock(op_core, weight_b, bias_b, activation, param_dict) | |||
| else: | |||
| block = quant.QuantBlock(op_core, weight, quant_op, dequant_op, scale_deq, bias, activation) | |||
| return block | |||
| def manual_export(network, *inputs, file_name, mean=127.5, std_dev=127.5, file_format='MINDIR'): | |||
| """ | |||
| Manual exports MindSpore quantization predict model to deploy wiAIR and MINDIR. | |||
| Args: | |||
| network (Cell): MindSpore network produced by `convert_quant_network`. | |||
| inputs (Tensor): Inputs of the `quantization aware training network`. | |||
| file_name (str): File name of model to export. | |||
| mean (int, float): Input data mean. Default: 127.5. | |||
| std_dev (int, float): Input data variance. Default: 127.5. | |||
| file_format (str): MindSpore currently supports 'AIR' and 'MINDIR' format for exported | |||
| quantization aware model. Default: 'AIR'. | |||
| - AIR: Graph Engine Intermidiate Representation. An intermidiate representation format of | |||
| Ascend model. | |||
| - MINDIR: MindSpore Native Intermidiate Representation for Anf. An intermidiate representation format | |||
| for MindSpore models. | |||
| Recommended suffix for output file is '.mindir'. | |||
| """ | |||
| supported_device = ["Ascend", "GPU"] | |||
| supported_formats = ['AIR', 'MINDIR'] | |||
| mean = Validator.check_type("mean", mean, (int, float)) | |||
| std_dev = Validator.check_type("std_dev", std_dev, (int, float)) | |||
| if context.get_context('device_target') not in supported_device: | |||
| raise KeyError("Unsupported {} device target.".format(context.get_context('device_target'))) | |||
| if file_format not in supported_formats: | |||
| raise ValueError('Illegal file format {}.'.format(file_format)) | |||
| network.set_train(False) | |||
| if file_format == "MINDIR": | |||
| exporter = ExportManualQuantNetwork(network, mean, std_dev, *inputs, is_mindir=True) | |||
| else: | |||
| exporter = ExportManualQuantNetwork(network, mean, std_dev, *inputs, is_mindir=False) | |||
| deploy_net = exporter.run() | |||
| serialization.export(deploy_net, *inputs, file_name=file_name, file_format=file_format) | |||