!8451 fix post training quant bug when FC with bias

From: @xutianchun Reviewed-by: @HilbertDavid,@zhanghaibo5 Signed-off-by: @HilbertDavid
5 years ago · b8df9e6f0a
--- a/mindspore/lite/tools/converter/quantizer/post_training_quantizer.cc
+++ b/mindspore/lite/tools/converter/quantizer/post_training_quantizer.cc
@@ -685,25 +685,19 @@ STATUS PostTrainingQuantizer::DoBiasQuant(const AnfNodePtr &bias, const std::sha
    quant_params.emplace_back(quant_param);
  }
  // quant bias data
  auto *quant_datas = new (std::nothrow) int32_t[shape_size];
  if (quant_datas == nullptr) {
    MS_LOG(ERROR) << "null pointer dereferencing.";
    return RET_NULL_PTR;
  }
  std::vector<int32_t> quant_datas(shape_size);

  auto *raw_datas = static_cast<float *>(bias_param->tensor_addr());
  double bias_scale_tmp;
  const constexpr int32_t quanted_bias_abs_limit = 0.5 * INT32_MAX;
  for (size_t i = 0; i < shape_size; i++) {
    if (bias_scales.size() == 1) {
      bias_scale_tmp = bias_scales[0];
    } else {

  if (bias_scales.size() == shape_size) {
    for (size_t i = 0; i < shape_size; i++) {
      bias_scale_tmp = bias_scales[i];
    }
    if (std::abs(raw_datas[i] / bias_scale_tmp) >= quanted_bias_abs_limit) {
      MS_LOG(DEBUG) << "quanted bias over flow, maybe the scale of weight: " << active_weight_quant_params[1][i].scale
                    << " is too small, need to update";
      // update filter scale and zp
      if (input_scales.size() == 1 && active_weight_quant_params[1].size() == shape_size) {
      if (std::abs(raw_datas[i] / bias_scale_tmp) >= quanted_bias_abs_limit) {
        MS_LOG(DEBUG) << "quanted bias over flow, maybe the scale of weight: " << active_weight_quant_params[1][i].scale
                      << " is too small, need to update";
        // update filter scale and zp
        double activate_scale = input_scales[0];
        double filter_scale = std::abs(raw_datas[i]) / (activate_scale * quanted_bias_abs_limit);
        active_weight_quant_params[1][i].scale = filter_scale;
@@ -712,22 +706,48 @@ STATUS PostTrainingQuantizer::DoBiasQuant(const AnfNodePtr &bias, const std::sha
        bias_scale_tmp = std::abs(raw_datas[i]) / quanted_bias_abs_limit;
        quant_params[i].scale = bias_scale_tmp;
        MS_LOG(DEBUG) << "new filter scale: " << filter_scale;
      } else {
        MS_LOG(WARNING) << "unexpected input_scales size: " << input_scales.size()
                        << " weight_scales size: " << active_weight_quant_params[1].size();
      }
      auto quant_data = (int32_t)std::round(raw_datas[i] / bias_scale_tmp);
      quant_datas[i] = quant_data;
    }
    auto quant_data = (int32_t)std::round(raw_datas[i] / bias_scale_tmp);
    quant_datas[i] = quant_data;
  } else if (bias_scales.size() == 1) {
    // for fc, per tensor quant
    bias_scale_tmp = quant_params[0].scale;
    float max_raw_data = 0.0f;
    for (size_t i = 0; i < shape_size; i++) {
      if (std::abs(raw_datas[i]) > max_raw_data) {
        max_raw_data = std::abs(raw_datas[i]);
      }
    }
    if (std::abs(max_raw_data / bias_scale_tmp) >= quanted_bias_abs_limit) {
      MS_LOG(DEBUG) << "quanted bias over flow, maybe the scale of weight: " << active_weight_quant_params[1][0].scale
                    << " is too small, need to update";
      double activate_scale = input_scales[0];
      double filter_scale = std::abs(max_raw_data) / (activate_scale * quanted_bias_abs_limit);
      active_weight_quant_params[1][0].scale = filter_scale;
      active_weight_quant_params[1][0].zeroPoint = 0;
      primitive_c->SetInputQuantParams(active_weight_quant_params);
      bias_scale_tmp = max_raw_data / quanted_bias_abs_limit;
      quant_params[0].scale = bias_scale_tmp;
      MS_LOG(DEBUG) << "new filter scale: " << filter_scale;
    }
    for (size_t i = 0; i < shape_size; i++) {
      auto quant_data = (int32_t)std::round(raw_datas[i] / bias_scale_tmp);
      quant_datas[i] = quant_data;
    }
  } else {
    MS_LOG(ERROR) << "unexpected input_scales size: " << input_scales.size()
                  << " weight_scales size: " << active_weight_quant_params[1].size();
    return RET_ERROR;
  }

  primitive_c->AddInputQuantParam(quant_params);
  auto ret = memcpy_s(bias_param->tensor_addr(), bias_param->tensor_size(), quant_datas, shape_size * sizeof(int32_t));
  auto ret =
    memcpy_s(bias_param->tensor_addr(), bias_param->tensor_size(), quant_datas.data(), shape_size * sizeof(int32_t));
  if (ret != EOK) {
    MS_LOG(ERROR) << "memcpy_s failed.";
    delete[] quant_datas;
    return RET_ERROR;
  }
  delete[] quant_datas;
  // set dtype
  auto abstractBase = bias_parameter_ptr->abstract();
  if (abstractBase == nullptr) {
@@ -795,7 +815,7 @@ STATUS PostTrainingQuantizer::QuantNode() {
      continue;
    } else if (op_type != PrimitiveType_Conv2D && op_type != PrimitiveType_DepthwiseConv2D &&
               op_type != PrimitiveType_DeConv2D && op_type != PrimitiveType_DeDepthwiseConv2D &&
               op_type != PrimitiveType_FullConnection) {
               op_type != PrimitiveType_FullConnection && op_type != PrimitiveType_LayerNorm) {
      for (size_t i = 1; i < cnode->inputs().size(); i++) {
        auto input_node = cnode->input(i);
        bool is_graph_input = false;
@@ -865,10 +885,9 @@ STATUS PostTrainingQuantizer::QuantNode() {
      DoQuantInput(input_scale, input_zp, &input_min_max, primitive_c);
      // do weight quant
      auto weight = cnode->input(2);
      bool perchannel = per_channel_;
      if (op_type == PrimitiveType_FullConnection || op_type == PrimitiveType_DeConv2D ||
          op_type == PrimitiveType_DeDepthwiseConv2D) {
        perchannel = false;
      bool perchannel = false;
      if (op_type == PrimitiveType_Conv2D || op_type == PrimitiveType_DepthwiseConv2D) {
        perchannel = true;
      }
      DoWeightQuant(weight, primitive_c, perchannel);
      // do bias quant
--- a/mindspore/lite/tools/converter/quantizer/quantize_util.cc
+++ b/mindspore/lite/tools/converter/quantizer/quantize_util.cc
@@ -110,6 +110,7 @@ bool QuantStrategy::CanOpPostQuantized(AnfNodePtr &node) const {
    schema::PrimitiveType_Activation,
    schema::PrimitiveType_Transpose,
    schema::PrimitiveType_Eltwise,
    schema::PrimitiveType_LayerNorm,
  };
  bool contain = IsContain(int8OpList, type);
  if (!contain) {
--- a/mindspore/lite/tools/optimizer/graph/weight_format_transform_pass.cc
+++ b/mindspore/lite/tools/optimizer/graph/weight_format_transform_pass.cc
@@ -16,6 +16,7 @@
 #include "tools/optimizer/graph/weight_format_transform_pass.h"
 #include <memory>
 #include <algorithm>
 #include <vector>
 #include "tools/optimizer/common/gllo_utils.h"

 using mindspore::lite::converter::FmkType_CAFFE;