From: @cjh9368 Reviewed-by: @hangangqiang,@zhanghaibo5 Signed-off-by: @hangangqiangpull/15024/MERGE
| @@ -2,3 +2,4 @@ deeplabv3.r1.1.mindir 1.5 | |||||
| mobilenetv2.r1.1.mindir 0.5 | mobilenetv2.r1.1.mindir 0.5 | ||||
| ssd.r1.1.mindir 0.5 | ssd.r1.1.mindir 0.5 | ||||
| ssd_ghostnet.r1.1.mindir 2.0 | ssd_ghostnet.r1.1.mindir 2.0 | ||||
| lenet_quant.mindir 0.5 | |||||
| @@ -382,6 +382,16 @@ STATUS NodeInferShpae(const schema::CNodeT &node, const std::vector<Tensor *> &i | |||||
| return ret; | return ret; | ||||
| } | } | ||||
| size_t GetTensorInputIndexInCNode(const uint32_t &tensor_index, const schema::CNodeT &cnode) { | |||||
| size_t ret = -1; | |||||
| for (size_t i = 0; i < cnode.inputIndex.size(); i++) { | |||||
| if (cnode.inputIndex.at(i) == tensor_index) { | |||||
| ret = i; | |||||
| } | |||||
| } | |||||
| return ret; | |||||
| } | |||||
| STATUS TransFilterFormat(schema::TensorT *tensor, schema::Format dstFormat) { | STATUS TransFilterFormat(schema::TensorT *tensor, schema::Format dstFormat) { | ||||
| if (tensor == nullptr) { | if (tensor == nullptr) { | ||||
| MS_LOG(ERROR) << "tensor is null"; | MS_LOG(ERROR) << "tensor is null"; | ||||
| @@ -71,11 +71,12 @@ std::unordered_map<schema::PrimitiveType, std::vector<int>> GetExtNhwcIndexes(); | |||||
| std::vector<schema::PrimitiveType> Getfp32FullOpList(); | std::vector<schema::PrimitiveType> Getfp32FullOpList(); | ||||
| std::vector<schema::PrimitiveType> GetUint8NhwcOpList(); | std::vector<schema::PrimitiveType> GetUint8NhwcOpList(); | ||||
| std::vector<schema::PrimitiveType> GetInt8OpList(); | std::vector<schema::PrimitiveType> GetInt8OpList(); | ||||
| const schema::Primitive *ConvertToPrimitive(schema::PrimitiveT *primitive_t, flatbuffers::FlatBufferBuilder *fbb); | const schema::Primitive *ConvertToPrimitive(schema::PrimitiveT *primitive_t, flatbuffers::FlatBufferBuilder *fbb); | ||||
| size_t GetTensorInputIndexInCNode(const uint32_t &tensor_index, const schema::CNodeT &cnode); | |||||
| class NodeUtils { | class NodeUtils { | ||||
| public: | public: | ||||
| static STATUS ConvertDims(schema::Format src_format, const std::vector<int32_t> &src_dims, schema::Format dst_format, | static STATUS ConvertDims(schema::Format src_format, const std::vector<int32_t> &src_dims, schema::Format dst_format, | ||||
| @@ -20,6 +20,8 @@ | |||||
| #include "tools/converter/converter_context.h" | #include "tools/converter/converter_context.h" | ||||
| #include "tools/converter/quantizer/quantize_util.h" | #include "tools/converter/quantizer/quantize_util.h" | ||||
| #include "tools/common/tensor_util.h" | #include "tools/common/tensor_util.h" | ||||
| #include "tools/common/graph_util.h" | |||||
| #include "tools/common/node_util.h" | |||||
| namespace mindspore::lite { | namespace mindspore::lite { | ||||
| namespace { | namespace { | ||||
| @@ -112,6 +114,62 @@ STATUS ComputeDataToInt32(const std::unique_ptr<TensorT> &tensor) { | |||||
| } | } | ||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| STATUS ComputeQuantTensorPerChannel(TensorT *tensor, const int &tensor_index, const schema::MetaGraphT &graph) { | |||||
| bool channel_at_first = true; | |||||
| int channel_cnt = -1; | |||||
| auto used_nodes_idx = GetLinkedPostIdx(graph, tensor_index); | |||||
| if (used_nodes_idx.size() != 1) { | |||||
| MS_LOG(ERROR) << "Tensor is used by nodes more than one"; | |||||
| return RET_ERROR; | |||||
| } | |||||
| auto &used_node = graph.nodes.at(used_nodes_idx.front()); | |||||
| auto &primitive = used_node->primitive; | |||||
| int input_index = GetTensorInputIndexInCNode(tensor_index, *used_node); | |||||
| quant::CalQuantAssitInfo(*primitive, tensor->dims, input_index, &channel_at_first, &channel_cnt); | |||||
| auto *raw_datas = reinterpret_cast<float *>(tensor->data.data()); | |||||
| ShapeVector dims; | |||||
| std::transform(tensor->dims.begin(), tensor->dims.end(), std::back_inserter(dims), | |||||
| [&](int32_t dim) { return (int64_t)dim; }); | |||||
| auto channels = quant::CalChannels(dims, channel_cnt, &channel_at_first); | |||||
| if (channels == 0) { | |||||
| MS_LOG(ERROR) << "channels is zero"; | |||||
| return RET_ERROR; | |||||
| } | |||||
| int32_t dst_dtype = tensor->quantParams.front()->dstDtype == kNumberTypeInt32 ? kNumberTypeInt32 : kNumberTypeInt8; | |||||
| size_t elem_count = tensor->data.size() / sizeof(float); | |||||
| size_t data_size = dst_dtype == kNumberTypeInt32 ? elem_count * sizeof(int32_t) : elem_count * sizeof(int8_t); | |||||
| std::vector<int8_t> dst_data(data_size); | |||||
| size_t one_filter_size = elem_count / channels; | |||||
| for (int i = 0; i < channels; i++) { | |||||
| // do quantization | |||||
| for (uint32_t j = 0; j < one_filter_size; j++) { | |||||
| auto index = j + i * one_filter_size; | |||||
| if (!channel_at_first) { | |||||
| index = j * channels + i; | |||||
| } | |||||
| MS_ASSERT(index < elem_count); | |||||
| float raw_data = raw_datas[index]; | |||||
| if (tensor->quantParams.at(i)->dstDtype == kNumberTypeInt32) { | |||||
| auto quant_data = (int32_t)std::round(raw_datas[i] / tensor->quantParams.at(i)->scale); | |||||
| auto *dst_data_int32 = reinterpret_cast<int32_t *>(dst_data.data()); | |||||
| dst_data_int32[index] = quant_data; | |||||
| } else { | |||||
| auto quant_data = quant::QuantizeData<int8_t>(raw_data, tensor->quantParams.at(i).get()); | |||||
| dst_data[index] = quant_data; | |||||
| } | |||||
| } | |||||
| } | |||||
| tensor->data.clear(); | |||||
| tensor->data.resize(data_size); | |||||
| tensor->dataType = dst_dtype; | |||||
| if (memcpy_s(tensor->data.data(), data_size, dst_data.data(), data_size) != EOK) { | |||||
| MS_LOG(ERROR) << "memcpy_s failed"; | |||||
| return RET_ERROR; | |||||
| } | |||||
| return RET_OK; | |||||
| } | |||||
| } // namespace | } // namespace | ||||
| STATUS TensorQuantPass::Run(schema::MetaGraphT *graph) { | STATUS TensorQuantPass::Run(schema::MetaGraphT *graph) { | ||||
| @@ -133,8 +191,13 @@ STATUS TensorQuantPass::Run(schema::MetaGraphT *graph) { | |||||
| continue; | continue; | ||||
| } | } | ||||
| if (tensor->quantParams.size() != 1) { // perchannel | if (tensor->quantParams.size() != 1) { // perchannel | ||||
| MS_LOG(ERROR) << "perchannel do quant is not supported yet"; | |||||
| return RET_ERROR; | |||||
| status = ComputeQuantTensorPerChannel(tensor.get(), index, *graph); | |||||
| if (status != RET_OK) { | |||||
| MS_LOG(ERROR) << "compute tensor to int8 prechannel failed."; | |||||
| return RET_ERROR; | |||||
| } | |||||
| index++; | |||||
| continue; | |||||
| } | } | ||||
| // perlayer | // perlayer | ||||
| auto &quantParam = tensor->quantParams.front(); | auto &quantParam = tensor->quantParams.front(); | ||||
| @@ -52,6 +52,12 @@ STATUS ConcatQuantParamPropogator::PropogateQuantParams(mindspore::schema::MetaG | |||||
| MS_ASSERT(narrow_range == quantParam->narrowRange); | MS_ASSERT(narrow_range == quantParam->narrowRange); | ||||
| MS_ASSERT(num_bits == quantParam->numBits); | MS_ASSERT(num_bits == quantParam->numBits); | ||||
| } | } | ||||
| if (in_quant_param->max < in_quant_param->min) { | |||||
| MS_LOG(DEBUG) << "Input quant param is invalid for propogator"; | |||||
| return RET_ERROR; | |||||
| } | |||||
| if (min_min > in_quant_param->min) { | if (min_min > in_quant_param->min) { | ||||
| min_min = in_quant_param->min; | min_min = in_quant_param->min; | ||||
| } | } | ||||
| @@ -35,23 +35,22 @@ STATUS ConvQuantParamPropogator::PropogateQuantParams(mindspore::schema::MetaGra | |||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| auto &input_quant_param = input_tensor->quantParams.at(0); | auto &input_quant_param = input_tensor->quantParams.at(0); | ||||
| auto &weight_quant_param = weight_tensor->quantParams.at(0); | |||||
| if (bias_tensor->quantParams.empty()) { | |||||
| auto tmp_quant_param = std::make_unique<schema::QuantParamT>(); | |||||
| bias_tensor->quantParams.emplace_back(std::move(tmp_quant_param)); | |||||
| } | |||||
| auto &bias_quant_param = bias_tensor->quantParams.front(); | |||||
| bias_quant_param->min = 0.0; | |||||
| bias_quant_param->max = 0.0; | |||||
| bias_quant_param->dstDtype = kNumberTypeInt32; | |||||
| bias_quant_param->inited = input_quant_param->inited && weight_quant_param->inited; | |||||
| bias_quant_param->zeroPoint = 0; | |||||
| if (bias_quant_param->inited) { | |||||
| bias_quant_param->scale = input_quant_param->scale * weight_quant_param->scale; | |||||
| std::vector<std::unique_ptr<schema::QuantParamT>> bias_quant_params; | |||||
| for (auto &weight_quant_param : weight_tensor->quantParams) { | |||||
| auto bias_quant_param = std::make_unique<schema::QuantParamT>(); | |||||
| bias_quant_param->min = 0.0; | |||||
| bias_quant_param->max = 0.0; | |||||
| bias_quant_param->dstDtype = kNumberTypeInt32; | |||||
| bias_quant_param->inited = input_quant_param->inited && weight_quant_param->inited; | |||||
| bias_quant_param->zeroPoint = 0; | |||||
| if (bias_quant_param->inited) { | |||||
| bias_quant_param->scale = input_quant_param->scale * weight_quant_param->scale; | |||||
| } | |||||
| bias_quant_param->roundType = 1; | |||||
| bias_quant_param->multiplier = 1; | |||||
| bias_quant_params.emplace_back(std::move(bias_quant_param)); | |||||
| } | } | ||||
| bias_quant_param->roundType = 1; | |||||
| bias_quant_param->multiplier = 1; | |||||
| bias_tensor->quantParams = std::move(bias_quant_params); | |||||
| } | } | ||||
| for (auto &quantParam : bias_tensor->quantParams) { | for (auto &quantParam : bias_tensor->quantParams) { | ||||
| quantParam->dstDtype = TypeId::kNumberTypeInt32; | quantParam->dstDtype = TypeId::kNumberTypeInt32; | ||||
| @@ -1042,4 +1042,32 @@ void CalQuantAssitInfo(const PrimitivePtr &primitive, const ShapeVector &shapes, | |||||
| } | } | ||||
| } | } | ||||
| void CalQuantAssitInfo(const schema::PrimitiveT &primitive, const std::vector<int> &shapes, int index, | |||||
| bool *channel_at_first, int *channel_cnt) { | |||||
| if (primitive.value.type == schema::PrimitiveType_MatMul && static_cast<int>(shapes.size()) == 2) { | |||||
| auto matmul_prim = primitive.value.AsMatMul(); | |||||
| MS_ASSERT(matmul_prim != nullptr); | |||||
| *channel_at_first = index != 1 || matmul_prim->transpose_b; | |||||
| } else if (primitive.value.type == schema::PrimitiveType_LSTM) { | |||||
| if (index == 1 || index == 2) { | |||||
| if (shapes.size() != 3) { | |||||
| MS_LOG(WARNING) << "unexpected lstm shape size: " << shapes.size(); | |||||
| } else { | |||||
| *channel_cnt = shapes[0] * shapes[1]; | |||||
| } | |||||
| } else if (index == 3) { | |||||
| if (shapes.size() != 2) { | |||||
| MS_LOG(WARNING) << "unexpected lstm shape size: " << shapes.size(); | |||||
| } else { | |||||
| auto tensor_elem_cnt = shapes[0] * shapes[1]; | |||||
| if (tensor_elem_cnt / 4 * 4 == tensor_elem_cnt) { | |||||
| *channel_cnt = 4; | |||||
| } | |||||
| } | |||||
| } else { | |||||
| MS_LOG(WARNING) << "unexpected index of lstm: " << index; | |||||
| } | |||||
| } | |||||
| } | |||||
| } // namespace mindspore::lite::quant | } // namespace mindspore::lite::quant | ||||
| @@ -120,6 +120,9 @@ int CalChannels(const ShapeVector &dims, int channel_cnt, bool *channel_at_first | |||||
| void CalQuantAssitInfo(const PrimitivePtr &primitive, const ShapeVector &shapes, int index, bool *channel_at_first, | void CalQuantAssitInfo(const PrimitivePtr &primitive, const ShapeVector &shapes, int index, bool *channel_at_first, | ||||
| int *channel_cnt); | int *channel_cnt); | ||||
| void CalQuantAssitInfo(const schema::PrimitiveT &primitive, const std::vector<int> &shapes, int index, | |||||
| bool *channel_at_first, int *channel_cnt); | |||||
| template <typename T> | template <typename T> | ||||
| T QuantizeData(const float originData, const schema::QuantParamT *quantParam) { | T QuantizeData(const float originData, const schema::QuantParamT *quantParam) { | ||||
| MS_ASSERT(quantParam != nullptr); | MS_ASSERT(quantParam != nullptr); | ||||
| @@ -69,17 +69,19 @@ int ConvertInputQuantParam(const PrimitivePtr &prim, bool narrow_range, int32_t | |||||
| quant_param.min = FLT_MAX; | quant_param.min = FLT_MAX; | ||||
| quant_param.max = FLT_MIN; | quant_param.max = FLT_MIN; | ||||
| for (int i = 0; i < filterMinPtr->ElementsNum(); ++i) { | for (int i = 0; i < filterMinPtr->ElementsNum(); ++i) { | ||||
| quant_param.min = (*(minBuf) < quant_param.min) ? (*minBuf) : quant_param.min; | |||||
| quant_param.max = (*(maxBuf) > quant_param.max) ? (*maxBuf) : quant_param.max; | |||||
| schema::QuantParamT tmp_quant_param; | |||||
| tmp_quant_param.min = *minBuf; | |||||
| tmp_quant_param.max = *maxBuf; | |||||
| auto ret = | |||||
| lite::quant::CalQuantizationParams(&tmp_quant_param, tmp_quant_param.min, tmp_quant_param.max, true, numbits); | |||||
| if (ret != RET_OK) { | |||||
| MS_LOG(ERROR) << "Can't calculate quant parameters"; | |||||
| return ret; | |||||
| } | |||||
| quants.emplace_back(tmp_quant_param); | |||||
| minBuf++; | minBuf++; | ||||
| maxBuf++; | maxBuf++; | ||||
| } | } | ||||
| auto ret = lite::quant::CalQuantizationParams(&quant_param, quant_param.min, quant_param.max, true, numbits); | |||||
| if (ret != RET_OK) { | |||||
| MS_LOG(ERROR) << "Can't calculate quant parameters"; | |||||
| return ret; | |||||
| } | |||||
| quants.emplace_back(quant_param); | |||||
| quant_param_holder->set_input_quant_param(1, quants); | quant_param_holder->set_input_quant_param(1, quants); | ||||
| } | } | ||||
| return lite::RET_OK; | return lite::RET_OK; | ||||