!15024 [MS][LITE] Support perchannel

From: @cjh9368 Reviewed-by: @hangangqiang,@zhanghaibo5 Signed-off-by: @hangangqiang
4 years ago · 2396c76fe0
--- a/mindspore/lite/test/models_mindspore.cfg
+++ b/mindspore/lite/test/models_mindspore.cfg
@@ -2,3 +2,4 @@ deeplabv3.r1.1.mindir 1.5
 mobilenetv2.r1.1.mindir 0.5
 ssd.r1.1.mindir 0.5
 ssd_ghostnet.r1.1.mindir 2.0
 lenet_quant.mindir 0.5
--- a/mindspore/lite/tools/common/node_util.cc
+++ b/mindspore/lite/tools/common/node_util.cc
@@ -382,6 +382,16 @@ STATUS NodeInferShpae(const schema::CNodeT &node, const std::vector<Tensor *> &i
  return ret;
 }

 size_t GetTensorInputIndexInCNode(const uint32_t &tensor_index, const schema::CNodeT &cnode) {
  size_t ret = -1;
  for (size_t i = 0; i < cnode.inputIndex.size(); i++) {
    if (cnode.inputIndex.at(i) == tensor_index) {
      ret = i;
    }
  }
  return ret;
 }

 STATUS TransFilterFormat(schema::TensorT *tensor, schema::Format dstFormat) {
  if (tensor == nullptr) {
    MS_LOG(ERROR) << "tensor is null";
--- a/mindspore/lite/tools/common/node_util.h
+++ b/mindspore/lite/tools/common/node_util.h
@@ -71,11 +71,12 @@ std::unordered_map<schema::PrimitiveType, std::vector<int>> GetExtNhwcIndexes();
 std::vector<schema::PrimitiveType> Getfp32FullOpList();

 std::vector<schema::PrimitiveType> GetUint8NhwcOpList();

 std::vector<schema::PrimitiveType> GetInt8OpList();

 const schema::Primitive *ConvertToPrimitive(schema::PrimitiveT *primitive_t, flatbuffers::FlatBufferBuilder *fbb);

 size_t GetTensorInputIndexInCNode(const uint32_t &tensor_index, const schema::CNodeT &cnode);

 class NodeUtils {
 public:
  static STATUS ConvertDims(schema::Format src_format, const std::vector<int32_t> &src_dims, schema::Format dst_format,
--- a/mindspore/lite/tools/converter/legacy_optimizer/graph/tensor_quant_pass.cc
+++ b/mindspore/lite/tools/converter/legacy_optimizer/graph/tensor_quant_pass.cc
@@ -20,6 +20,8 @@
 #include "tools/converter/converter_context.h"
 #include "tools/converter/quantizer/quantize_util.h"
 #include "tools/common/tensor_util.h"
 #include "tools/common/graph_util.h"
 #include "tools/common/node_util.h"

 namespace mindspore::lite {
 namespace {
@@ -112,6 +114,62 @@ STATUS ComputeDataToInt32(const std::unique_ptr<TensorT> &tensor) {
  }
  return RET_OK;
 }

 STATUS ComputeQuantTensorPerChannel(TensorT *tensor, const int &tensor_index, const schema::MetaGraphT &graph) {
  bool channel_at_first = true;
  int channel_cnt = -1;
  auto used_nodes_idx = GetLinkedPostIdx(graph, tensor_index);
  if (used_nodes_idx.size() != 1) {
    MS_LOG(ERROR) << "Tensor is used by nodes more than one";
    return RET_ERROR;
  }
  auto &used_node = graph.nodes.at(used_nodes_idx.front());
  auto &primitive = used_node->primitive;
  int input_index = GetTensorInputIndexInCNode(tensor_index, *used_node);
  quant::CalQuantAssitInfo(*primitive, tensor->dims, input_index, &channel_at_first, &channel_cnt);

  auto *raw_datas = reinterpret_cast<float *>(tensor->data.data());
  ShapeVector dims;
  std::transform(tensor->dims.begin(), tensor->dims.end(), std::back_inserter(dims),
                 [&](int32_t dim) { return (int64_t)dim; });
  auto channels = quant::CalChannels(dims, channel_cnt, &channel_at_first);
  if (channels == 0) {
    MS_LOG(ERROR) << "channels is zero";
    return RET_ERROR;
  }
  int32_t dst_dtype = tensor->quantParams.front()->dstDtype == kNumberTypeInt32 ? kNumberTypeInt32 : kNumberTypeInt8;
  size_t elem_count = tensor->data.size() / sizeof(float);
  size_t data_size = dst_dtype == kNumberTypeInt32 ? elem_count * sizeof(int32_t) : elem_count * sizeof(int8_t);
  std::vector<int8_t> dst_data(data_size);
  size_t one_filter_size = elem_count / channels;
  for (int i = 0; i < channels; i++) {
    // do quantization
    for (uint32_t j = 0; j < one_filter_size; j++) {
      auto index = j + i * one_filter_size;
      if (!channel_at_first) {
        index = j * channels + i;
      }
      MS_ASSERT(index < elem_count);
      float raw_data = raw_datas[index];
      if (tensor->quantParams.at(i)->dstDtype == kNumberTypeInt32) {
        auto quant_data = (int32_t)std::round(raw_datas[i] / tensor->quantParams.at(i)->scale);
        auto *dst_data_int32 = reinterpret_cast<int32_t *>(dst_data.data());
        dst_data_int32[index] = quant_data;
      } else {
        auto quant_data = quant::QuantizeData<int8_t>(raw_data, tensor->quantParams.at(i).get());
        dst_data[index] = quant_data;
      }
    }
  }
  tensor->data.clear();
  tensor->data.resize(data_size);
  tensor->dataType = dst_dtype;
  if (memcpy_s(tensor->data.data(), data_size, dst_data.data(), data_size) != EOK) {
    MS_LOG(ERROR) << "memcpy_s failed";
    return RET_ERROR;
  }
  return RET_OK;
 }
 }  // namespace

 STATUS TensorQuantPass::Run(schema::MetaGraphT *graph) {
@@ -133,8 +191,13 @@ STATUS TensorQuantPass::Run(schema::MetaGraphT *graph) {
      continue;
    }
    if (tensor->quantParams.size() != 1) {  // perchannel
      MS_LOG(ERROR) << "perchannel do quant is not supported yet";
      return RET_ERROR;
      status = ComputeQuantTensorPerChannel(tensor.get(), index, *graph);
      if (status != RET_OK) {
        MS_LOG(ERROR) << "compute tensor to int8 prechannel failed.";
        return RET_ERROR;
      }
      index++;
      continue;
    }
    // perlayer
    auto &quantParam = tensor->quantParams.front();
--- a/mindspore/lite/tools/converter/quantizer/quant_helper/concat_quant_param_propogator.cc
+++ b/mindspore/lite/tools/converter/quantizer/quant_helper/concat_quant_param_propogator.cc
@@ -52,6 +52,12 @@ STATUS ConcatQuantParamPropogator::PropogateQuantParams(mindspore::schema::MetaG
        MS_ASSERT(narrow_range == quantParam->narrowRange);
        MS_ASSERT(num_bits == quantParam->numBits);
      }

      if (in_quant_param->max < in_quant_param->min) {
        MS_LOG(DEBUG) << "Input quant param is invalid for propogator";
        return RET_ERROR;
      }

      if (min_min > in_quant_param->min) {
        min_min = in_quant_param->min;
      }
--- a/mindspore/lite/tools/converter/quantizer/quant_helper/conv_quant_param_propogator.cc
+++ b/mindspore/lite/tools/converter/quantizer/quant_helper/conv_quant_param_propogator.cc
@@ -35,23 +35,22 @@ STATUS ConvQuantParamPropogator::PropogateQuantParams(mindspore::schema::MetaGra
        return RET_OK;
      }
      auto &input_quant_param = input_tensor->quantParams.at(0);
      auto &weight_quant_param = weight_tensor->quantParams.at(0);

      if (bias_tensor->quantParams.empty()) {
        auto tmp_quant_param = std::make_unique<schema::QuantParamT>();
        bias_tensor->quantParams.emplace_back(std::move(tmp_quant_param));
      }
      auto &bias_quant_param = bias_tensor->quantParams.front();
      bias_quant_param->min = 0.0;
      bias_quant_param->max = 0.0;
      bias_quant_param->dstDtype = kNumberTypeInt32;
      bias_quant_param->inited = input_quant_param->inited && weight_quant_param->inited;
      bias_quant_param->zeroPoint = 0;
      if (bias_quant_param->inited) {
        bias_quant_param->scale = input_quant_param->scale * weight_quant_param->scale;
      std::vector<std::unique_ptr<schema::QuantParamT>> bias_quant_params;
      for (auto &weight_quant_param : weight_tensor->quantParams) {
        auto bias_quant_param = std::make_unique<schema::QuantParamT>();
        bias_quant_param->min = 0.0;
        bias_quant_param->max = 0.0;
        bias_quant_param->dstDtype = kNumberTypeInt32;
        bias_quant_param->inited = input_quant_param->inited && weight_quant_param->inited;
        bias_quant_param->zeroPoint = 0;
        if (bias_quant_param->inited) {
          bias_quant_param->scale = input_quant_param->scale * weight_quant_param->scale;
        }
        bias_quant_param->roundType = 1;
        bias_quant_param->multiplier = 1;
        bias_quant_params.emplace_back(std::move(bias_quant_param));
      }
      bias_quant_param->roundType = 1;
      bias_quant_param->multiplier = 1;
      bias_tensor->quantParams = std::move(bias_quant_params);
    }
    for (auto &quantParam : bias_tensor->quantParams) {
      quantParam->dstDtype = TypeId::kNumberTypeInt32;
--- a/mindspore/lite/tools/converter/quantizer/quantize_util.cc
+++ b/mindspore/lite/tools/converter/quantizer/quantize_util.cc
@@ -1042,4 +1042,32 @@ void CalQuantAssitInfo(const PrimitivePtr &primitive, const ShapeVector &shapes,
  }
 }

 void CalQuantAssitInfo(const schema::PrimitiveT &primitive, const std::vector<int> &shapes, int index,
                       bool *channel_at_first, int *channel_cnt) {
  if (primitive.value.type == schema::PrimitiveType_MatMul && static_cast<int>(shapes.size()) == 2) {
    auto matmul_prim = primitive.value.AsMatMul();
    MS_ASSERT(matmul_prim != nullptr);
    *channel_at_first = index != 1 || matmul_prim->transpose_b;
  } else if (primitive.value.type == schema::PrimitiveType_LSTM) {
    if (index == 1 || index == 2) {
      if (shapes.size() != 3) {
        MS_LOG(WARNING) << "unexpected lstm shape size: " << shapes.size();
      } else {
        *channel_cnt = shapes[0] * shapes[1];
      }
    } else if (index == 3) {
      if (shapes.size() != 2) {
        MS_LOG(WARNING) << "unexpected lstm shape size: " << shapes.size();
      } else {
        auto tensor_elem_cnt = shapes[0] * shapes[1];
        if (tensor_elem_cnt / 4 * 4 == tensor_elem_cnt) {
          *channel_cnt = 4;
        }
      }
    } else {
      MS_LOG(WARNING) << "unexpected index of lstm: " << index;
    }
  }
 }

 }  // namespace mindspore::lite::quant
--- a/mindspore/lite/tools/converter/quantizer/quantize_util.h
+++ b/mindspore/lite/tools/converter/quantizer/quantize_util.h
@@ -120,6 +120,9 @@ int CalChannels(const ShapeVector &dims, int channel_cnt, bool *channel_at_first
 void CalQuantAssitInfo(const PrimitivePtr &primitive, const ShapeVector &shapes, int index, bool *channel_at_first,
                       int *channel_cnt);

 void CalQuantAssitInfo(const schema::PrimitiveT &primitive, const std::vector<int> &shapes, int index,
                       bool *channel_at_first, int *channel_cnt);

 template <typename T>
 T QuantizeData(const float originData, const schema::QuantParamT *quantParam) {
  MS_ASSERT(quantParam != nullptr);
--- a/mindspore/lite/tools/optimizer/graph/mindir_adjust_pass.cc
+++ b/mindspore/lite/tools/optimizer/graph/mindir_adjust_pass.cc
@@ -69,17 +69,19 @@ int ConvertInputQuantParam(const PrimitivePtr &prim, bool narrow_range, int32_t
    quant_param.min = FLT_MAX;
    quant_param.max = FLT_MIN;
    for (int i = 0; i < filterMinPtr->ElementsNum(); ++i) {
      quant_param.min = (*(minBuf) < quant_param.min) ? (*minBuf) : quant_param.min;
      quant_param.max = (*(maxBuf) > quant_param.max) ? (*maxBuf) : quant_param.max;
      schema::QuantParamT tmp_quant_param;
      tmp_quant_param.min = *minBuf;
      tmp_quant_param.max = *maxBuf;
      auto ret =
        lite::quant::CalQuantizationParams(&tmp_quant_param, tmp_quant_param.min, tmp_quant_param.max, true, numbits);
      if (ret != RET_OK) {
        MS_LOG(ERROR) << "Can't calculate quant parameters";
        return ret;
      }
      quants.emplace_back(tmp_quant_param);
      minBuf++;
      maxBuf++;
    }
    auto ret = lite::quant::CalQuantizationParams(&quant_param, quant_param.min, quant_param.max, true, numbits);
    if (ret != RET_OK) {
      MS_LOG(ERROR) << "Can't calculate quant parameters";
      return ret;
    }
    quants.emplace_back(quant_param);
    quant_param_holder->set_input_quant_param(1, quants);
  }
  return lite::RET_OK;