Browse Source

!8451 fix post training quant bug when FC with bias

From: @xutianchun
Reviewed-by: @HilbertDavid,@zhanghaibo5
Signed-off-by: @HilbertDavid
tags/v1.1.0
mindspore-ci-bot Gitee 5 years ago
parent
commit
b8df9e6f0a
3 changed files with 49 additions and 28 deletions
  1. +47
    -28
      mindspore/lite/tools/converter/quantizer/post_training_quantizer.cc
  2. +1
    -0
      mindspore/lite/tools/converter/quantizer/quantize_util.cc
  3. +1
    -0
      mindspore/lite/tools/optimizer/graph/weight_format_transform_pass.cc

+ 47
- 28
mindspore/lite/tools/converter/quantizer/post_training_quantizer.cc View File

@@ -685,25 +685,19 @@ STATUS PostTrainingQuantizer::DoBiasQuant(const AnfNodePtr &bias, const std::sha
quant_params.emplace_back(quant_param);
}
// quant bias data
auto *quant_datas = new (std::nothrow) int32_t[shape_size];
if (quant_datas == nullptr) {
MS_LOG(ERROR) << "null pointer dereferencing.";
return RET_NULL_PTR;
}
std::vector<int32_t> quant_datas(shape_size);

auto *raw_datas = static_cast<float *>(bias_param->tensor_addr());
double bias_scale_tmp;
const constexpr int32_t quanted_bias_abs_limit = 0.5 * INT32_MAX;
for (size_t i = 0; i < shape_size; i++) {
if (bias_scales.size() == 1) {
bias_scale_tmp = bias_scales[0];
} else {

if (bias_scales.size() == shape_size) {
for (size_t i = 0; i < shape_size; i++) {
bias_scale_tmp = bias_scales[i];
}
if (std::abs(raw_datas[i] / bias_scale_tmp) >= quanted_bias_abs_limit) {
MS_LOG(DEBUG) << "quanted bias over flow, maybe the scale of weight: " << active_weight_quant_params[1][i].scale
<< " is too small, need to update";
// update filter scale and zp
if (input_scales.size() == 1 && active_weight_quant_params[1].size() == shape_size) {
if (std::abs(raw_datas[i] / bias_scale_tmp) >= quanted_bias_abs_limit) {
MS_LOG(DEBUG) << "quanted bias over flow, maybe the scale of weight: " << active_weight_quant_params[1][i].scale
<< " is too small, need to update";
// update filter scale and zp
double activate_scale = input_scales[0];
double filter_scale = std::abs(raw_datas[i]) / (activate_scale * quanted_bias_abs_limit);
active_weight_quant_params[1][i].scale = filter_scale;
@@ -712,22 +706,48 @@ STATUS PostTrainingQuantizer::DoBiasQuant(const AnfNodePtr &bias, const std::sha
bias_scale_tmp = std::abs(raw_datas[i]) / quanted_bias_abs_limit;
quant_params[i].scale = bias_scale_tmp;
MS_LOG(DEBUG) << "new filter scale: " << filter_scale;
} else {
MS_LOG(WARNING) << "unexpected input_scales size: " << input_scales.size()
<< " weight_scales size: " << active_weight_quant_params[1].size();
}
auto quant_data = (int32_t)std::round(raw_datas[i] / bias_scale_tmp);
quant_datas[i] = quant_data;
}
auto quant_data = (int32_t)std::round(raw_datas[i] / bias_scale_tmp);
quant_datas[i] = quant_data;
} else if (bias_scales.size() == 1) {
// for fc, per tensor quant
bias_scale_tmp = quant_params[0].scale;
float max_raw_data = 0.0f;
for (size_t i = 0; i < shape_size; i++) {
if (std::abs(raw_datas[i]) > max_raw_data) {
max_raw_data = std::abs(raw_datas[i]);
}
}
if (std::abs(max_raw_data / bias_scale_tmp) >= quanted_bias_abs_limit) {
MS_LOG(DEBUG) << "quanted bias over flow, maybe the scale of weight: " << active_weight_quant_params[1][0].scale
<< " is too small, need to update";
double activate_scale = input_scales[0];
double filter_scale = std::abs(max_raw_data) / (activate_scale * quanted_bias_abs_limit);
active_weight_quant_params[1][0].scale = filter_scale;
active_weight_quant_params[1][0].zeroPoint = 0;
primitive_c->SetInputQuantParams(active_weight_quant_params);
bias_scale_tmp = max_raw_data / quanted_bias_abs_limit;
quant_params[0].scale = bias_scale_tmp;
MS_LOG(DEBUG) << "new filter scale: " << filter_scale;
}
for (size_t i = 0; i < shape_size; i++) {
auto quant_data = (int32_t)std::round(raw_datas[i] / bias_scale_tmp);
quant_datas[i] = quant_data;
}
} else {
MS_LOG(ERROR) << "unexpected input_scales size: " << input_scales.size()
<< " weight_scales size: " << active_weight_quant_params[1].size();
return RET_ERROR;
}

primitive_c->AddInputQuantParam(quant_params);
auto ret = memcpy_s(bias_param->tensor_addr(), bias_param->tensor_size(), quant_datas, shape_size * sizeof(int32_t));
auto ret =
memcpy_s(bias_param->tensor_addr(), bias_param->tensor_size(), quant_datas.data(), shape_size * sizeof(int32_t));
if (ret != EOK) {
MS_LOG(ERROR) << "memcpy_s failed.";
delete[] quant_datas;
return RET_ERROR;
}
delete[] quant_datas;
// set dtype
auto abstractBase = bias_parameter_ptr->abstract();
if (abstractBase == nullptr) {
@@ -795,7 +815,7 @@ STATUS PostTrainingQuantizer::QuantNode() {
continue;
} else if (op_type != PrimitiveType_Conv2D && op_type != PrimitiveType_DepthwiseConv2D &&
op_type != PrimitiveType_DeConv2D && op_type != PrimitiveType_DeDepthwiseConv2D &&
op_type != PrimitiveType_FullConnection) {
op_type != PrimitiveType_FullConnection && op_type != PrimitiveType_LayerNorm) {
for (size_t i = 1; i < cnode->inputs().size(); i++) {
auto input_node = cnode->input(i);
bool is_graph_input = false;
@@ -865,10 +885,9 @@ STATUS PostTrainingQuantizer::QuantNode() {
DoQuantInput(input_scale, input_zp, &input_min_max, primitive_c);
// do weight quant
auto weight = cnode->input(2);
bool perchannel = per_channel_;
if (op_type == PrimitiveType_FullConnection || op_type == PrimitiveType_DeConv2D ||
op_type == PrimitiveType_DeDepthwiseConv2D) {
perchannel = false;
bool perchannel = false;
if (op_type == PrimitiveType_Conv2D || op_type == PrimitiveType_DepthwiseConv2D) {
perchannel = true;
}
DoWeightQuant(weight, primitive_c, perchannel);
// do bias quant


+ 1
- 0
mindspore/lite/tools/converter/quantizer/quantize_util.cc View File

@@ -110,6 +110,7 @@ bool QuantStrategy::CanOpPostQuantized(AnfNodePtr &node) const {
schema::PrimitiveType_Activation,
schema::PrimitiveType_Transpose,
schema::PrimitiveType_Eltwise,
schema::PrimitiveType_LayerNorm,
};
bool contain = IsContain(int8OpList, type);
if (!contain) {


+ 1
- 0
mindspore/lite/tools/optimizer/graph/weight_format_transform_pass.cc View File

@@ -16,6 +16,7 @@
#include "tools/optimizer/graph/weight_format_transform_pass.h"
#include <memory>
#include <algorithm>
#include <vector>
#include "tools/optimizer/common/gllo_utils.h"

using mindspore::lite::converter::FmkType_CAFFE;


Loading…
Cancel
Save