From feba63025ed15e64674d22e249ee248ff89809c8 Mon Sep 17 00:00:00 2001 From: xutianchun Date: Wed, 11 Nov 2020 14:15:29 +0800 Subject: [PATCH] fix post quantization bug when fullyconnection with bias --- .../quantizer/post_training_quantizer.cc | 75 ++++++++++++------- .../converter/quantizer/quantize_util.cc | 1 + .../graph/weight_format_transform_pass.cc | 1 + 3 files changed, 49 insertions(+), 28 deletions(-) diff --git a/mindspore/lite/tools/converter/quantizer/post_training_quantizer.cc b/mindspore/lite/tools/converter/quantizer/post_training_quantizer.cc index a512014d70..563abedee3 100644 --- a/mindspore/lite/tools/converter/quantizer/post_training_quantizer.cc +++ b/mindspore/lite/tools/converter/quantizer/post_training_quantizer.cc @@ -685,25 +685,19 @@ STATUS PostTrainingQuantizer::DoBiasQuant(const AnfNodePtr &bias, const std::sha quant_params.emplace_back(quant_param); } // quant bias data - auto *quant_datas = new (std::nothrow) int32_t[shape_size]; - if (quant_datas == nullptr) { - MS_LOG(ERROR) << "null pointer dereferencing."; - return RET_NULL_PTR; - } + std::vector quant_datas(shape_size); + auto *raw_datas = static_cast(bias_param->tensor_addr()); double bias_scale_tmp; const constexpr int32_t quanted_bias_abs_limit = 0.5 * INT32_MAX; - for (size_t i = 0; i < shape_size; i++) { - if (bias_scales.size() == 1) { - bias_scale_tmp = bias_scales[0]; - } else { + + if (bias_scales.size() == shape_size) { + for (size_t i = 0; i < shape_size; i++) { bias_scale_tmp = bias_scales[i]; - } - if (std::abs(raw_datas[i] / bias_scale_tmp) >= quanted_bias_abs_limit) { - MS_LOG(DEBUG) << "quanted bias over flow, maybe the scale of weight: " << active_weight_quant_params[1][i].scale - << " is too small, need to update"; - // update filter scale and zp - if (input_scales.size() == 1 && active_weight_quant_params[1].size() == shape_size) { + if (std::abs(raw_datas[i] / bias_scale_tmp) >= quanted_bias_abs_limit) { + MS_LOG(DEBUG) << "quanted bias over flow, maybe the scale of weight: " << active_weight_quant_params[1][i].scale + << " is too small, need to update"; + // update filter scale and zp double activate_scale = input_scales[0]; double filter_scale = std::abs(raw_datas[i]) / (activate_scale * quanted_bias_abs_limit); active_weight_quant_params[1][i].scale = filter_scale; @@ -712,22 +706,48 @@ STATUS PostTrainingQuantizer::DoBiasQuant(const AnfNodePtr &bias, const std::sha bias_scale_tmp = std::abs(raw_datas[i]) / quanted_bias_abs_limit; quant_params[i].scale = bias_scale_tmp; MS_LOG(DEBUG) << "new filter scale: " << filter_scale; - } else { - MS_LOG(WARNING) << "unexpected input_scales size: " << input_scales.size() - << " weight_scales size: " << active_weight_quant_params[1].size(); } + auto quant_data = (int32_t)std::round(raw_datas[i] / bias_scale_tmp); + quant_datas[i] = quant_data; } - auto quant_data = (int32_t)std::round(raw_datas[i] / bias_scale_tmp); - quant_datas[i] = quant_data; + } else if (bias_scales.size() == 1) { + // for fc, per tensor quant + bias_scale_tmp = quant_params[0].scale; + float max_raw_data = 0.0f; + for (size_t i = 0; i < shape_size; i++) { + if (std::abs(raw_datas[i]) > max_raw_data) { + max_raw_data = std::abs(raw_datas[i]); + } + } + if (std::abs(max_raw_data / bias_scale_tmp) >= quanted_bias_abs_limit) { + MS_LOG(DEBUG) << "quanted bias over flow, maybe the scale of weight: " << active_weight_quant_params[1][0].scale + << " is too small, need to update"; + double activate_scale = input_scales[0]; + double filter_scale = std::abs(max_raw_data) / (activate_scale * quanted_bias_abs_limit); + active_weight_quant_params[1][0].scale = filter_scale; + active_weight_quant_params[1][0].zeroPoint = 0; + primitive_c->SetInputQuantParams(active_weight_quant_params); + bias_scale_tmp = max_raw_data / quanted_bias_abs_limit; + quant_params[0].scale = bias_scale_tmp; + MS_LOG(DEBUG) << "new filter scale: " << filter_scale; + } + for (size_t i = 0; i < shape_size; i++) { + auto quant_data = (int32_t)std::round(raw_datas[i] / bias_scale_tmp); + quant_datas[i] = quant_data; + } + } else { + MS_LOG(ERROR) << "unexpected input_scales size: " << input_scales.size() + << " weight_scales size: " << active_weight_quant_params[1].size(); + return RET_ERROR; } + primitive_c->AddInputQuantParam(quant_params); - auto ret = memcpy_s(bias_param->tensor_addr(), bias_param->tensor_size(), quant_datas, shape_size * sizeof(int32_t)); + auto ret = + memcpy_s(bias_param->tensor_addr(), bias_param->tensor_size(), quant_datas.data(), shape_size * sizeof(int32_t)); if (ret != EOK) { MS_LOG(ERROR) << "memcpy_s failed."; - delete[] quant_datas; return RET_ERROR; } - delete[] quant_datas; // set dtype auto abstractBase = bias_parameter_ptr->abstract(); if (abstractBase == nullptr) { @@ -795,7 +815,7 @@ STATUS PostTrainingQuantizer::QuantNode() { continue; } else if (op_type != PrimitiveType_Conv2D && op_type != PrimitiveType_DepthwiseConv2D && op_type != PrimitiveType_DeConv2D && op_type != PrimitiveType_DeDepthwiseConv2D && - op_type != PrimitiveType_FullConnection) { + op_type != PrimitiveType_FullConnection && op_type != PrimitiveType_LayerNorm) { for (size_t i = 1; i < cnode->inputs().size(); i++) { auto input_node = cnode->input(i); bool is_graph_input = false; @@ -865,10 +885,9 @@ STATUS PostTrainingQuantizer::QuantNode() { DoQuantInput(input_scale, input_zp, &input_min_max, primitive_c); // do weight quant auto weight = cnode->input(2); - bool perchannel = per_channel_; - if (op_type == PrimitiveType_FullConnection || op_type == PrimitiveType_DeConv2D || - op_type == PrimitiveType_DeDepthwiseConv2D) { - perchannel = false; + bool perchannel = false; + if (op_type == PrimitiveType_Conv2D || op_type == PrimitiveType_DepthwiseConv2D) { + perchannel = true; } DoWeightQuant(weight, primitive_c, perchannel); // do bias quant diff --git a/mindspore/lite/tools/converter/quantizer/quantize_util.cc b/mindspore/lite/tools/converter/quantizer/quantize_util.cc index a02251eba4..1e610cccf5 100644 --- a/mindspore/lite/tools/converter/quantizer/quantize_util.cc +++ b/mindspore/lite/tools/converter/quantizer/quantize_util.cc @@ -110,6 +110,7 @@ bool QuantStrategy::CanOpPostQuantized(AnfNodePtr &node) const { schema::PrimitiveType_Activation, schema::PrimitiveType_Transpose, schema::PrimitiveType_Eltwise, + schema::PrimitiveType_LayerNorm, }; bool contain = IsContain(int8OpList, type); if (!contain) { diff --git a/mindspore/lite/tools/optimizer/graph/weight_format_transform_pass.cc b/mindspore/lite/tools/optimizer/graph/weight_format_transform_pass.cc index 7454e7ec6e..f63501901c 100644 --- a/mindspore/lite/tools/optimizer/graph/weight_format_transform_pass.cc +++ b/mindspore/lite/tools/optimizer/graph/weight_format_transform_pass.cc @@ -16,6 +16,7 @@ #include "tools/optimizer/graph/weight_format_transform_pass.h" #include #include +#include #include "tools/optimizer/common/gllo_utils.h" using mindspore::lite::converter::FmkType_CAFFE;