From: @cjh9368 Reviewed-by: @hangangqiang,@zhanghaibo5 Signed-off-by: @hangangqiangpull/15024/MERGE
| @@ -2,3 +2,4 @@ deeplabv3.r1.1.mindir 1.5 | |||
| mobilenetv2.r1.1.mindir 0.5 | |||
| ssd.r1.1.mindir 0.5 | |||
| ssd_ghostnet.r1.1.mindir 2.0 | |||
| lenet_quant.mindir 0.5 | |||
| @@ -382,6 +382,16 @@ STATUS NodeInferShpae(const schema::CNodeT &node, const std::vector<Tensor *> &i | |||
| return ret; | |||
| } | |||
| size_t GetTensorInputIndexInCNode(const uint32_t &tensor_index, const schema::CNodeT &cnode) { | |||
| size_t ret = -1; | |||
| for (size_t i = 0; i < cnode.inputIndex.size(); i++) { | |||
| if (cnode.inputIndex.at(i) == tensor_index) { | |||
| ret = i; | |||
| } | |||
| } | |||
| return ret; | |||
| } | |||
| STATUS TransFilterFormat(schema::TensorT *tensor, schema::Format dstFormat) { | |||
| if (tensor == nullptr) { | |||
| MS_LOG(ERROR) << "tensor is null"; | |||
| @@ -71,11 +71,12 @@ std::unordered_map<schema::PrimitiveType, std::vector<int>> GetExtNhwcIndexes(); | |||
| std::vector<schema::PrimitiveType> Getfp32FullOpList(); | |||
| std::vector<schema::PrimitiveType> GetUint8NhwcOpList(); | |||
| std::vector<schema::PrimitiveType> GetInt8OpList(); | |||
| const schema::Primitive *ConvertToPrimitive(schema::PrimitiveT *primitive_t, flatbuffers::FlatBufferBuilder *fbb); | |||
| size_t GetTensorInputIndexInCNode(const uint32_t &tensor_index, const schema::CNodeT &cnode); | |||
| class NodeUtils { | |||
| public: | |||
| static STATUS ConvertDims(schema::Format src_format, const std::vector<int32_t> &src_dims, schema::Format dst_format, | |||
| @@ -20,6 +20,8 @@ | |||
| #include "tools/converter/converter_context.h" | |||
| #include "tools/converter/quantizer/quantize_util.h" | |||
| #include "tools/common/tensor_util.h" | |||
| #include "tools/common/graph_util.h" | |||
| #include "tools/common/node_util.h" | |||
| namespace mindspore::lite { | |||
| namespace { | |||
| @@ -112,6 +114,62 @@ STATUS ComputeDataToInt32(const std::unique_ptr<TensorT> &tensor) { | |||
| } | |||
| return RET_OK; | |||
| } | |||
| STATUS ComputeQuantTensorPerChannel(TensorT *tensor, const int &tensor_index, const schema::MetaGraphT &graph) { | |||
| bool channel_at_first = true; | |||
| int channel_cnt = -1; | |||
| auto used_nodes_idx = GetLinkedPostIdx(graph, tensor_index); | |||
| if (used_nodes_idx.size() != 1) { | |||
| MS_LOG(ERROR) << "Tensor is used by nodes more than one"; | |||
| return RET_ERROR; | |||
| } | |||
| auto &used_node = graph.nodes.at(used_nodes_idx.front()); | |||
| auto &primitive = used_node->primitive; | |||
| int input_index = GetTensorInputIndexInCNode(tensor_index, *used_node); | |||
| quant::CalQuantAssitInfo(*primitive, tensor->dims, input_index, &channel_at_first, &channel_cnt); | |||
| auto *raw_datas = reinterpret_cast<float *>(tensor->data.data()); | |||
| ShapeVector dims; | |||
| std::transform(tensor->dims.begin(), tensor->dims.end(), std::back_inserter(dims), | |||
| [&](int32_t dim) { return (int64_t)dim; }); | |||
| auto channels = quant::CalChannels(dims, channel_cnt, &channel_at_first); | |||
| if (channels == 0) { | |||
| MS_LOG(ERROR) << "channels is zero"; | |||
| return RET_ERROR; | |||
| } | |||
| int32_t dst_dtype = tensor->quantParams.front()->dstDtype == kNumberTypeInt32 ? kNumberTypeInt32 : kNumberTypeInt8; | |||
| size_t elem_count = tensor->data.size() / sizeof(float); | |||
| size_t data_size = dst_dtype == kNumberTypeInt32 ? elem_count * sizeof(int32_t) : elem_count * sizeof(int8_t); | |||
| std::vector<int8_t> dst_data(data_size); | |||
| size_t one_filter_size = elem_count / channels; | |||
| for (int i = 0; i < channels; i++) { | |||
| // do quantization | |||
| for (uint32_t j = 0; j < one_filter_size; j++) { | |||
| auto index = j + i * one_filter_size; | |||
| if (!channel_at_first) { | |||
| index = j * channels + i; | |||
| } | |||
| MS_ASSERT(index < elem_count); | |||
| float raw_data = raw_datas[index]; | |||
| if (tensor->quantParams.at(i)->dstDtype == kNumberTypeInt32) { | |||
| auto quant_data = (int32_t)std::round(raw_datas[i] / tensor->quantParams.at(i)->scale); | |||
| auto *dst_data_int32 = reinterpret_cast<int32_t *>(dst_data.data()); | |||
| dst_data_int32[index] = quant_data; | |||
| } else { | |||
| auto quant_data = quant::QuantizeData<int8_t>(raw_data, tensor->quantParams.at(i).get()); | |||
| dst_data[index] = quant_data; | |||
| } | |||
| } | |||
| } | |||
| tensor->data.clear(); | |||
| tensor->data.resize(data_size); | |||
| tensor->dataType = dst_dtype; | |||
| if (memcpy_s(tensor->data.data(), data_size, dst_data.data(), data_size) != EOK) { | |||
| MS_LOG(ERROR) << "memcpy_s failed"; | |||
| return RET_ERROR; | |||
| } | |||
| return RET_OK; | |||
| } | |||
| } // namespace | |||
| STATUS TensorQuantPass::Run(schema::MetaGraphT *graph) { | |||
| @@ -133,8 +191,13 @@ STATUS TensorQuantPass::Run(schema::MetaGraphT *graph) { | |||
| continue; | |||
| } | |||
| if (tensor->quantParams.size() != 1) { // perchannel | |||
| MS_LOG(ERROR) << "perchannel do quant is not supported yet"; | |||
| return RET_ERROR; | |||
| status = ComputeQuantTensorPerChannel(tensor.get(), index, *graph); | |||
| if (status != RET_OK) { | |||
| MS_LOG(ERROR) << "compute tensor to int8 prechannel failed."; | |||
| return RET_ERROR; | |||
| } | |||
| index++; | |||
| continue; | |||
| } | |||
| // perlayer | |||
| auto &quantParam = tensor->quantParams.front(); | |||
| @@ -52,6 +52,12 @@ STATUS ConcatQuantParamPropogator::PropogateQuantParams(mindspore::schema::MetaG | |||
| MS_ASSERT(narrow_range == quantParam->narrowRange); | |||
| MS_ASSERT(num_bits == quantParam->numBits); | |||
| } | |||
| if (in_quant_param->max < in_quant_param->min) { | |||
| MS_LOG(DEBUG) << "Input quant param is invalid for propogator"; | |||
| return RET_ERROR; | |||
| } | |||
| if (min_min > in_quant_param->min) { | |||
| min_min = in_quant_param->min; | |||
| } | |||
| @@ -35,23 +35,22 @@ STATUS ConvQuantParamPropogator::PropogateQuantParams(mindspore::schema::MetaGra | |||
| return RET_OK; | |||
| } | |||
| auto &input_quant_param = input_tensor->quantParams.at(0); | |||
| auto &weight_quant_param = weight_tensor->quantParams.at(0); | |||
| if (bias_tensor->quantParams.empty()) { | |||
| auto tmp_quant_param = std::make_unique<schema::QuantParamT>(); | |||
| bias_tensor->quantParams.emplace_back(std::move(tmp_quant_param)); | |||
| } | |||
| auto &bias_quant_param = bias_tensor->quantParams.front(); | |||
| bias_quant_param->min = 0.0; | |||
| bias_quant_param->max = 0.0; | |||
| bias_quant_param->dstDtype = kNumberTypeInt32; | |||
| bias_quant_param->inited = input_quant_param->inited && weight_quant_param->inited; | |||
| bias_quant_param->zeroPoint = 0; | |||
| if (bias_quant_param->inited) { | |||
| bias_quant_param->scale = input_quant_param->scale * weight_quant_param->scale; | |||
| std::vector<std::unique_ptr<schema::QuantParamT>> bias_quant_params; | |||
| for (auto &weight_quant_param : weight_tensor->quantParams) { | |||
| auto bias_quant_param = std::make_unique<schema::QuantParamT>(); | |||
| bias_quant_param->min = 0.0; | |||
| bias_quant_param->max = 0.0; | |||
| bias_quant_param->dstDtype = kNumberTypeInt32; | |||
| bias_quant_param->inited = input_quant_param->inited && weight_quant_param->inited; | |||
| bias_quant_param->zeroPoint = 0; | |||
| if (bias_quant_param->inited) { | |||
| bias_quant_param->scale = input_quant_param->scale * weight_quant_param->scale; | |||
| } | |||
| bias_quant_param->roundType = 1; | |||
| bias_quant_param->multiplier = 1; | |||
| bias_quant_params.emplace_back(std::move(bias_quant_param)); | |||
| } | |||
| bias_quant_param->roundType = 1; | |||
| bias_quant_param->multiplier = 1; | |||
| bias_tensor->quantParams = std::move(bias_quant_params); | |||
| } | |||
| for (auto &quantParam : bias_tensor->quantParams) { | |||
| quantParam->dstDtype = TypeId::kNumberTypeInt32; | |||
| @@ -1042,4 +1042,32 @@ void CalQuantAssitInfo(const PrimitivePtr &primitive, const ShapeVector &shapes, | |||
| } | |||
| } | |||
| void CalQuantAssitInfo(const schema::PrimitiveT &primitive, const std::vector<int> &shapes, int index, | |||
| bool *channel_at_first, int *channel_cnt) { | |||
| if (primitive.value.type == schema::PrimitiveType_MatMul && static_cast<int>(shapes.size()) == 2) { | |||
| auto matmul_prim = primitive.value.AsMatMul(); | |||
| MS_ASSERT(matmul_prim != nullptr); | |||
| *channel_at_first = index != 1 || matmul_prim->transpose_b; | |||
| } else if (primitive.value.type == schema::PrimitiveType_LSTM) { | |||
| if (index == 1 || index == 2) { | |||
| if (shapes.size() != 3) { | |||
| MS_LOG(WARNING) << "unexpected lstm shape size: " << shapes.size(); | |||
| } else { | |||
| *channel_cnt = shapes[0] * shapes[1]; | |||
| } | |||
| } else if (index == 3) { | |||
| if (shapes.size() != 2) { | |||
| MS_LOG(WARNING) << "unexpected lstm shape size: " << shapes.size(); | |||
| } else { | |||
| auto tensor_elem_cnt = shapes[0] * shapes[1]; | |||
| if (tensor_elem_cnt / 4 * 4 == tensor_elem_cnt) { | |||
| *channel_cnt = 4; | |||
| } | |||
| } | |||
| } else { | |||
| MS_LOG(WARNING) << "unexpected index of lstm: " << index; | |||
| } | |||
| } | |||
| } | |||
| } // namespace mindspore::lite::quant | |||
| @@ -120,6 +120,9 @@ int CalChannels(const ShapeVector &dims, int channel_cnt, bool *channel_at_first | |||
| void CalQuantAssitInfo(const PrimitivePtr &primitive, const ShapeVector &shapes, int index, bool *channel_at_first, | |||
| int *channel_cnt); | |||
| void CalQuantAssitInfo(const schema::PrimitiveT &primitive, const std::vector<int> &shapes, int index, | |||
| bool *channel_at_first, int *channel_cnt); | |||
| template <typename T> | |||
| T QuantizeData(const float originData, const schema::QuantParamT *quantParam) { | |||
| MS_ASSERT(quantParam != nullptr); | |||
| @@ -69,17 +69,19 @@ int ConvertInputQuantParam(const PrimitivePtr &prim, bool narrow_range, int32_t | |||
| quant_param.min = FLT_MAX; | |||
| quant_param.max = FLT_MIN; | |||
| for (int i = 0; i < filterMinPtr->ElementsNum(); ++i) { | |||
| quant_param.min = (*(minBuf) < quant_param.min) ? (*minBuf) : quant_param.min; | |||
| quant_param.max = (*(maxBuf) > quant_param.max) ? (*maxBuf) : quant_param.max; | |||
| schema::QuantParamT tmp_quant_param; | |||
| tmp_quant_param.min = *minBuf; | |||
| tmp_quant_param.max = *maxBuf; | |||
| auto ret = | |||
| lite::quant::CalQuantizationParams(&tmp_quant_param, tmp_quant_param.min, tmp_quant_param.max, true, numbits); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Can't calculate quant parameters"; | |||
| return ret; | |||
| } | |||
| quants.emplace_back(tmp_quant_param); | |||
| minBuf++; | |||
| maxBuf++; | |||
| } | |||
| auto ret = lite::quant::CalQuantizationParams(&quant_param, quant_param.min, quant_param.max, true, numbits); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Can't calculate quant parameters"; | |||
| return ret; | |||
| } | |||
| quants.emplace_back(quant_param); | |||
| quant_param_holder->set_input_quant_param(1, quants); | |||
| } | |||
| return lite::RET_OK; | |||