Browse Source

integrate_quant_export_v1

tags/v1.1.0
bai-yangfan 5 years ago
parent
commit
a34c5f7630
1 changed files with 10 additions and 212 deletions
  1. +10
    -212
      mindspore/compression/export/quant_export.py

+ 10
- 212
mindspore/compression/export/quant_export.py View File

@@ -17,9 +17,7 @@
import copy

import numpy as np
import mindspore.context as context

from ... import log as logger
from ... import nn, ops
from ..._checkparam import Validator
from ...common import Tensor
@@ -28,12 +26,11 @@ from ...common.api import _executor
from ...nn.layer import quant
from ...ops import operations as P
from ...ops.operations import _inner_ops as inner
from ...train import serialization
from ..quant import quant_utils
from ..quant.qat import QuantizationAwareTraining, _AddFakeQuantInput, _AddFakeQuantAfterSubCell


__all__ = ["export", "manual_export"]
__all__ = ["ExportToQuantInferNetwork", "ExportManualQuantNetwork"]

class ExportToQuantInferNetwork:
"""
@@ -109,14 +106,11 @@ class ExportToQuantInferNetwork:
else:
maxq = self.all_parameters[minq_name[:-4] + "maxq"]
minq = self.all_parameters[minq_name]
if self.is_mindir:
scale_a_in, zp_a_in, param_dict["input_maxq"], param_dict["input_minq"] = \
quant_utils.scale_zp_max_min_from_data(fake_quant_a_in_op, minq, maxq, np_type)
else:
scale_a_in, zp_a_in = quant_utils.scale_zp_from_data(fake_quant_a_in_op, minq, maxq, np_type)
scale_a_in, zp_a_in, param_dict["input_maxq"], param_dict["input_minq"] = \
quant_utils.scale_zp_max_min_from_data(fake_quant_a_in_op, minq, maxq, np_type)
else:
logger.warning(f"Can not find `fake_quant` from input with `fake_quant.minq` {w_minq_name}")
return None
# skip quant layer
scale_a_in, zp_a_in = 1.0, 0.0

# Build the `Quant` `Dequant` op.
# Quant only support perlayer version. Need check here.
@@ -217,50 +211,9 @@ class ExportToQuantInferNetwork:
network.cell_list = list(network.cells())
return network


def export(network, *inputs, file_name, mean=127.5, std_dev=127.5, file_format='AIR'):
"""
Exports MindSpore quantization predict model to deploy with AIR.

Args:
network (Cell): MindSpore network produced by `convert_quant_network`.
inputs (Tensor): Inputs of the `quantization aware training network`.
file_name (str): File name of model to export.
mean (int, float): Input data mean. Default: 127.5.
std_dev (int, float): Input data variance. Default: 127.5.
file_format (str): MindSpore currently supports 'AIR' and 'MINDIR' format for exported
quantization aware model. Default: 'AIR'.

- AIR: Graph Engine Intermidiate Representation. An intermidiate representation format of
Ascend model.
- MINDIR: MindSpore Native Intermidiate Representation for Anf. An intermidiate representation format
for MindSpore models.
Recommended suffix for output file is '.mindir'.
"""
supported_device = ["Ascend", "GPU"]
supported_formats = ['AIR', 'MINDIR']

mean = Validator.check_type("mean", mean, (int, float))
std_dev = Validator.check_type("std_dev", std_dev, (int, float))

if context.get_context('device_target') not in supported_device:
raise KeyError("Unsupported {} device target.".format(context.get_context('device_target')))

if file_format not in supported_formats:
raise ValueError('Illegal file format {}.'.format(file_format))

network.set_train(False)
if file_format == "MINDIR":
exporter = ExportToQuantInferNetwork(network, mean, std_dev, *inputs, is_mindir=True)
else:
exporter = ExportToQuantInferNetwork(network, mean, std_dev, *inputs)
deploy_net = exporter.run()
serialization.export(deploy_net, *inputs, file_name=file_name, file_format=file_format)


class ExportManualQuantNetwork:
class ExportManualQuantNetwork(ExportToQuantInferNetwork):
"""
Convert anual quantization aware network to infer network.
Convert manual quantization aware network to infer network.

Args:
network (Cell): MindSpore network API `convert_quant_network`.
@@ -275,35 +228,11 @@ class ExportManualQuantNetwork:
__quant_op_name__ = ["TensorAdd", "Sub", "Mul", "RealDiv"]

def __init__(self, network, mean, std_dev, *inputs, is_mindir=False):
network = Validator.check_isinstance('network', network, (nn.Cell,))
self.input_scale = 1 / std_dev
self.input_zero_point = round(mean)
self.data_type = mstype.int8
self.network = copy.deepcopy(network)
self.all_parameters = {p.name: p for p in self.network.get_parameters()}
self.get_inputs_table(inputs)
self.mean = mean
self.std_dev = std_dev
self.is_mindir = is_mindir
super(ExportManualQuantNetwork, self).__init__(network, mean, std_dev, *inputs, is_mindir)
self.upcell = None
self.upname = None

def get_inputs_table(self, inputs):
"""Get the support info for quant export."""
phase_name = 'export_quant'
graph_id, _ = _executor.compile(self.network, *inputs, phase=phase_name, do_convert=False)
self.quant_info_table = _executor.fetch_info_for_quant_export(graph_id)

def run(self):
"""Start to convert."""
self.network.update_cell_prefix()
network = self.network
if isinstance(network, _AddFakeQuantInput):
network = network.network
network = self._convert_manual_network(network)
return network

def _convert_manual_network(self, network):
def _convert_quant2deploy(self, network):
"""Convert network's all quant subcell to deploy subcell."""
cells = network.name_cells()
change = False
@@ -335,7 +264,7 @@ class ExportManualQuantNetwork:
change = True
else:
self.upcell, self.upname = None, None
self._convert_manual_network(subcell)
self._convert_quant2deploy(subcell)
if isinstance(network, nn.SequentialCell) and change:
network.cell_list = list(network.cells())
return network
@@ -359,134 +288,3 @@ class ExportManualQuantNetwork:
network.insert_child_to_cell(name, new_subcell)
change = True
return network, change

def _get_quant_block(self, cell_core, activation, fake_quant_a_out):
"""convet network's quant subcell to deploy subcell"""
w_minq_name = cell_core.fake_quant_weight.minq.name
np_type = mstype.dtype_to_nptype(self.data_type)
param_dict = dict()
param_dict["filter_maxq"] = None
param_dict["filter_minq"] = None
param_dict["output_maxq"] = None
param_dict["output_minq"] = None
param_dict["input_maxq"] = None
param_dict["input_minq"] = None
param_dict["mean"] = self.mean
param_dict["std_dev"] = self.std_dev
param_dict["symmetric"] = cell_core.fake_quant_weight.symmetric

scale_w, zp_w, param_dict["filter_maxq"], param_dict["filter_minq"] = \
quant_utils.scale_zp_max_min_from_fake_quant_cell(cell_core.fake_quant_weight, np_type)
if fake_quant_a_out is not None:
_, _, param_dict["output_maxq"], param_dict["output_minq"] = \
quant_utils.scale_zp_max_min_from_fake_quant_cell(fake_quant_a_out, np_type)

info = self.quant_info_table.get(w_minq_name, None)
if info:
fack_quant_a_in_op, minq_name = info
if minq_name == 'input':
scale_a_in, zp_a_in, param_dict["input_maxq"], param_dict["input_minq"] = \
self.input_scale, self.input_zero_point, 'None', 'None'
else:
maxq = self.all_parameters[minq_name[:-4] + "maxq"]
minq = self.all_parameters[minq_name]
scale_a_in, zp_a_in, param_dict["input_maxq"], param_dict["input_minq"] = \
quant_utils.scale_zp_max_min_from_data(fack_quant_a_in_op, minq, maxq, np_type)
else:
# skip quant layer
scale_a_in, zp_a_in = 1, 0

# Build the `Quant` `Dequant` op.
# Quant only support perlayer version. Need check here.
quant_op = inner.Quant(1 / float(scale_a_in), float(zp_a_in))
scale_deq = scale_a_in * scale_w
dequant_op = inner.Dequant()

if isinstance(activation, _AddFakeQuantAfterSubCell):
activation = activation.subcell
elif hasattr(activation, "get_origin"):
activation = activation.get_origin()

# get the `weight` and `bias`
weight = cell_core.weight.data.asnumpy()
bias = None
if isinstance(cell_core, (quant.DenseQuant, quant.Conv2dQuant)):
if cell_core.has_bias:
bias = cell_core.bias.data.asnumpy()
elif isinstance(cell_core, quant.Conv2dBnFoldQuant):
weight, bias = quant_utils.fold_batchnorm(weight, cell_core)
elif isinstance(cell_core, quant.Conv2dBnWithoutFoldQuant):
weight, bias = quant_utils.without_fold_batchnorm(weight, cell_core)
weight_b = weight
bias_b = bias
# apply the quant
fake_quant_weight_op = cell_core.fake_quant_weight.fake_quant_infer
weight = quant_utils.weight2int(weight, scale_w, zp_w, np_type, fake_quant_weight_op.num_bits,
fake_quant_weight_op.narrow_range)
if bias is not None:
bias = Tensor(bias / scale_a_in / scale_w, mstype.int32)

float32_deq_scale = scale_deq.astype(np.float32)
uint32_deq_scale = np.frombuffer(float32_deq_scale, np.uint32)
scale_length = scale_deq.size # channel
dequant_param = np.zeros(scale_length, dtype=np.uint64)
for index in range(scale_length):
dequant_param[index] += uint32_deq_scale[index]

scale_deq = Tensor(dequant_param, mstype.uint64)
# get op
if isinstance(cell_core, quant.DenseQuant):
op_core = P.MatMul()
weight = np.transpose(weight)
weight_b = np.transpose(weight_b)
else:
op_core = cell_core.conv
weight = Tensor(weight, self.data_type)
weight_b = Tensor(weight_b)
if bias_b is not None:
bias_b = Tensor(bias_b, mstype.float32)
if self.is_mindir:
block = quant.QuantMindirBlock(op_core, weight_b, bias_b, activation, param_dict)
else:
block = quant.QuantBlock(op_core, weight, quant_op, dequant_op, scale_deq, bias, activation)
return block


def manual_export(network, *inputs, file_name, mean=127.5, std_dev=127.5, file_format='MINDIR'):
"""
Manual exports MindSpore quantization predict model to deploy wiAIR and MINDIR.

Args:
network (Cell): MindSpore network produced by `convert_quant_network`.
inputs (Tensor): Inputs of the `quantization aware training network`.
file_name (str): File name of model to export.
mean (int, float): Input data mean. Default: 127.5.
std_dev (int, float): Input data variance. Default: 127.5.
file_format (str): MindSpore currently supports 'AIR' and 'MINDIR' format for exported
quantization aware model. Default: 'AIR'.

- AIR: Graph Engine Intermidiate Representation. An intermidiate representation format of
Ascend model.
- MINDIR: MindSpore Native Intermidiate Representation for Anf. An intermidiate representation format
for MindSpore models.
Recommended suffix for output file is '.mindir'.
"""
supported_device = ["Ascend", "GPU"]
supported_formats = ['AIR', 'MINDIR']

mean = Validator.check_type("mean", mean, (int, float))
std_dev = Validator.check_type("std_dev", std_dev, (int, float))

if context.get_context('device_target') not in supported_device:
raise KeyError("Unsupported {} device target.".format(context.get_context('device_target')))

if file_format not in supported_formats:
raise ValueError('Illegal file format {}.'.format(file_format))

network.set_train(False)
if file_format == "MINDIR":
exporter = ExportManualQuantNetwork(network, mean, std_dev, *inputs, is_mindir=True)
else:
exporter = ExportManualQuantNetwork(network, mean, std_dev, *inputs, is_mindir=False)
deploy_net = exporter.run()
serialization.export(deploy_net, *inputs, file_name=file_name, file_format=file_format)

Loading…
Cancel
Save