Merge pull request !5637 from yankai10/merge_0901tags/v1.0.0
| @@ -253,6 +253,16 @@ std::string Tensor::ToString() const { | |||||
| } | } | ||||
| } | } | ||||
| } break; | } break; | ||||
| case kNumberTypeInt8: { | |||||
| auto data = static_cast<int8_t *>(this->data_); | |||||
| if (data == nullptr) { | |||||
| return "Data of tensor is nullptr"; | |||||
| } else { | |||||
| for (int i = 0; i < 40 && i < this->ElementsNum(); i++) { | |||||
| oss << " " << static_cast<int32_t >(data[i]); | |||||
| } | |||||
| } | |||||
| } break; | |||||
| default: | default: | ||||
| oss << "Unsupported data type to print"; | oss << "Unsupported data type to print"; | ||||
| break; | break; | ||||
| @@ -15,11 +15,16 @@ | |||||
| */ | */ | ||||
| #include "src/ops/conv2d.h" | #include "src/ops/conv2d.h" | ||||
| #include <string> | |||||
| #include <map> | |||||
| #include <memory> | #include <memory> | ||||
| #include <string> | |||||
| #include "include/errorcode.h" | #include "include/errorcode.h" | ||||
| #include "utils/log_adapter.h" | #include "utils/log_adapter.h" | ||||
| #ifdef PRIMITIVE_WRITEABLE | #ifdef PRIMITIVE_WRITEABLE | ||||
| #include <float.h> | |||||
| #include "tools/converter/quantizer/quantize_util.h" | #include "tools/converter/quantizer/quantize_util.h" | ||||
| #endif | #endif | ||||
| @@ -156,6 +161,13 @@ void Conv2D::PopulaterConv2DMultiGroup(const Primitive &prim, schema::PrimitiveT | |||||
| attr->padMode = schema::PadMode_NOTSET; | attr->padMode = schema::PadMode_NOTSET; | ||||
| } | } | ||||
| if (prim.GetAttr("activation_name") != nullptr) { | |||||
| std::string activate_name = GetValue<std::string>(prim.GetAttr("activation_name")); | |||||
| attr->activationType = kActivationTypeMap[activate_name]; | |||||
| } else { | |||||
| attr->activationType = schema::ActivationType_NO_ACTIVATION; | |||||
| } | |||||
| int channel_mutiplier = 1; | int channel_mutiplier = 1; | ||||
| if (prim.GetAttr("channel_mutiplier") != nullptr) { | if (prim.GetAttr("channel_mutiplier") != nullptr) { | ||||
| channel_mutiplier = GetValue<int>(prim.GetAttr("channel_multiplier")); | channel_mutiplier = GetValue<int>(prim.GetAttr("channel_multiplier")); | ||||
| @@ -213,98 +225,18 @@ void Conv2D::PopulaterConv2DSingleGroup(const Primitive &prim, schema::Primitive | |||||
| } else { | } else { | ||||
| attr->padMode = schema::PadMode_NOTSET; | attr->padMode = schema::PadMode_NOTSET; | ||||
| } | } | ||||
| primitive->value.type = schema::PrimitiveType_Conv2D; | |||||
| primitive->value.value = attr.release(); | |||||
| } | |||||
| void Conv2D::CalQuantParam(const double &mean, const double &stdDev, float *mMin, float *mMax) { | |||||
| const float qmin = 0; | |||||
| const float qmax = 255; | |||||
| *mMin = static_cast<float>((qmin - mean) / stdDev); | |||||
| *mMax = static_cast<float>((qmax - mean) / stdDev); | |||||
| } | |||||
| void Conv2D::PopulaterQuantParam(const Primitive &prim, | |||||
| std::vector<std::vector<schema::QuantParamT>> *vecInputQuantParam, | |||||
| std::vector<std::vector<schema::QuantParamT>> *vecOutputQuantParam) { | |||||
| auto narrow_range = prim.GetAttr("narrow_range"); | |||||
| bool narrowRangeQuantParam = GetValue<bool>(narrow_range); | |||||
| auto num_bits = prim.GetAttr("num_bits"); | |||||
| int32_t numbitsRangeQuantParam = GetValue<int32_t>(num_bits); | |||||
| std::vector<schema::QuantParamT> quants; | |||||
| schema::QuantParamT quantParam; | |||||
| auto mean = prim.GetAttr("mean"); | |||||
| auto std_dev = prim.GetAttr("std_dev"); | |||||
| if (mean != nullptr && std_dev != nullptr) { | |||||
| auto meanQuantOaram = GetValue<double>(mean); | |||||
| double stddevQuantOaram = GetValue<double>(std_dev); | |||||
| float mMin = 0.0; | |||||
| float mMax = 0.0; | |||||
| CalQuantParam(meanQuantOaram, stddevQuantOaram, &mMin, &mMax); | |||||
| quantParam.min = mMin; | |||||
| quantParam.max = mMax; | |||||
| if (prim.GetAttr("activation_name") != nullptr) { | |||||
| std::string activate_name = GetValue<std::string>(prim.GetAttr("activation_name")); | |||||
| attr->activationType = kActivationTypeMap[activate_name]; | |||||
| } else { | } else { | ||||
| auto inputMin = prim.GetAttr("input_minq"); | |||||
| auto inputMax = prim.GetAttr("input_maxq"); | |||||
| auto inputMinPtr = inputMin->cast<lite::tensor::TensorPtr>(); | |||||
| auto inputMaxPtr = inputMax->cast<lite::tensor::TensorPtr>(); | |||||
| float *minBuf = static_cast<float *>(inputMinPtr->Data()); | |||||
| float *maxBuf = static_cast<float *>(inputMaxPtr->Data()); | |||||
| quantParam.min = *minBuf; | |||||
| quantParam.max = *maxBuf; | |||||
| } | |||||
| quant::CalQuantizationParams(&quantParam, quantParam.min, quantParam.max, narrowRangeQuantParam, | |||||
| numbitsRangeQuantParam); | |||||
| quants.emplace_back(quantParam); | |||||
| vecInputQuantParam->emplace_back(quants); | |||||
| quants.clear(); | |||||
| int biasQuantSize = 0; | |||||
| auto filterMin = prim.GetAttr("filter_minq"); | |||||
| auto filterMax = prim.GetAttr("filter_maxq"); | |||||
| if (filterMin != nullptr && filterMax != nullptr) { | |||||
| auto filterMinPtr = filterMin->cast<lite::tensor::TensorPtr>(); | |||||
| auto filterMaxPtr = filterMax->cast<lite::tensor::TensorPtr>(); | |||||
| float *minBuf = static_cast<float *>(filterMinPtr->Data()); | |||||
| float *maxBuf = static_cast<float *>(filterMaxPtr->Data()); | |||||
| biasQuantSize = filterMinPtr->DataSize(); | |||||
| for (int i = 0; i < biasQuantSize; ++i) { | |||||
| quantParam.min = *(minBuf++); | |||||
| quantParam.max = *(maxBuf++); | |||||
| quant::CalQuantizationParams(&quantParam, quantParam.min, quantParam.max, narrowRangeQuantParam, | |||||
| numbitsRangeQuantParam); | |||||
| quants.emplace_back(quantParam); | |||||
| } | |||||
| vecInputQuantParam->emplace_back(quants); | |||||
| attr->activationType = schema::ActivationType_NO_ACTIVATION; | |||||
| } | } | ||||
| quants.clear(); | |||||
| for (int i = 0; i < biasQuantSize; ++i) { | |||||
| quantParam.min = 0.0; | |||||
| quantParam.max = 0.0; | |||||
| quantParam.zeroPoint = 0; | |||||
| quantParam.scale = vecInputQuantParam->at(0).at(0).scale * vecInputQuantParam->at(1).at(i).scale; | |||||
| quants.emplace_back(quantParam); | |||||
| } | |||||
| vecInputQuantParam->emplace_back(quants); | |||||
| quants.clear(); | |||||
| auto outputMin = prim.GetAttr("output_minq"); | |||||
| auto outputMax = prim.GetAttr("output_maxq"); | |||||
| if (outputMin != nullptr && outputMax != nullptr) { | |||||
| auto outputMinPtr = outputMin->cast<lite::tensor::TensorPtr>(); | |||||
| auto outputMaxPtr = outputMax->cast<lite::tensor::TensorPtr>(); | |||||
| float *minBuf = static_cast<float *>(outputMinPtr->Data()); | |||||
| float *maxBuf = static_cast<float *>(outputMaxPtr->Data()); | |||||
| quantParam.min = *minBuf; | |||||
| quantParam.max = *maxBuf; | |||||
| quant::CalQuantizationParams(&quantParam, quantParam.min, quantParam.max, narrowRangeQuantParam, | |||||
| numbitsRangeQuantParam); | |||||
| quants.emplace_back(quantParam); | |||||
| vecOutputQuantParam->emplace_back(quants); | |||||
| } | |||||
| // attr->padMode = schema::PadMode_SAME; | |||||
| // attr->activationType = schema::ActivationType_RELU; | |||||
| primitive->value.type = schema::PrimitiveType_Conv2D; | |||||
| primitive->value.value = attr.release(); | |||||
| } | } | ||||
| int Conv2D::UnPackAttr(const Primitive &prim, const std::vector<AnfNodePtr> &inputs) { | int Conv2D::UnPackAttr(const Primitive &prim, const std::vector<AnfNodePtr> &inputs) { | ||||
| @@ -57,9 +57,6 @@ class Conv2D : public PrimitiveC { | |||||
| void PopulaterConv2DMultiGroup(const Primitive &prim, schema::PrimitiveT *primitive, const int &group, | void PopulaterConv2DMultiGroup(const Primitive &prim, schema::PrimitiveT *primitive, const int &group, | ||||
| const std::vector<AnfNodePtr> &inputs); | const std::vector<AnfNodePtr> &inputs); | ||||
| void PopulaterConv2DSingleGroup(const Primitive &prim, schema::PrimitiveT *primitive, const int &group); | void PopulaterConv2DSingleGroup(const Primitive &prim, schema::PrimitiveT *primitive, const int &group); | ||||
| void PopulaterQuantParam(const Primitive &prim, std::vector<std::vector<schema::QuantParamT>> *vecInputQuantParam, | |||||
| std::vector<std::vector<schema::QuantParamT>> *vecOutputQuantParam); | |||||
| void CalQuantParam(const double &mean, const double &stdDev, float *mMin, float *mMax); | |||||
| #else | #else | ||||
| public: | public: | ||||
| @@ -15,6 +15,7 @@ | |||||
| */ | */ | ||||
| #include "src/ops/depthwise_conv2d.h" | #include "src/ops/depthwise_conv2d.h" | ||||
| #include <memory> | #include <memory> | ||||
| #include <string> | #include <string> | ||||
| #ifdef PRIMITIVE_WRITEABLE | #ifdef PRIMITIVE_WRITEABLE | ||||
| @@ -69,96 +70,6 @@ void DepthwiseConv2D::SetActivationType(int activation_type) { | |||||
| this->primitive_->value.AsDepthwiseConv2D()->activationType = (schema::ActivationType)activation_type; | this->primitive_->value.AsDepthwiseConv2D()->activationType = (schema::ActivationType)activation_type; | ||||
| } | } | ||||
| void DepthwiseConv2D::CalQuantParam(const double &mean, const double &stdDev, float *mMin, float *mMax) { | |||||
| const float qmin = 0; | |||||
| const float qmax = 255; | |||||
| *mMin = static_cast<float>((qmin - mean) / stdDev); | |||||
| *mMax = static_cast<float>((qmax - mean) / stdDev); | |||||
| } | |||||
| void DepthwiseConv2D::PopulaterQuantParam(const Primitive &prim, | |||||
| std::vector<std::vector<schema::QuantParamT>> *vecInputQuantParam, | |||||
| std::vector<std::vector<schema::QuantParamT>> *vecOutputQuantParam) { | |||||
| auto narrow_range = prim.GetAttr("narrow_range"); | |||||
| bool narrowRangeQuantParam = GetValue<bool>(narrow_range); | |||||
| auto num_bits = prim.GetAttr("num_bits"); | |||||
| int32_t numbitsRangeQuantParam = GetValue<int32_t>(num_bits); | |||||
| std::vector<schema::QuantParamT> quants; | |||||
| schema::QuantParamT quantParam; | |||||
| auto mean = prim.GetAttr("mean"); | |||||
| auto std_dev = prim.GetAttr("std_dev"); | |||||
| if (mean != nullptr && std_dev != nullptr) { | |||||
| auto meanQuantOaram = GetValue<double>(mean); | |||||
| double stddevQuantOaram = GetValue<double>(std_dev); | |||||
| float mMin = 0.0; | |||||
| float mMax = 0.0; | |||||
| CalQuantParam(meanQuantOaram, stddevQuantOaram, &mMin, &mMax); | |||||
| quantParam.min = mMin; | |||||
| quantParam.max = mMax; | |||||
| } else { | |||||
| auto inputMin = prim.GetAttr("input_minq"); | |||||
| auto inputMax = prim.GetAttr("input_maxq"); | |||||
| auto inputMinPtr = inputMin->cast<lite::tensor::TensorPtr>(); | |||||
| auto inputMaxPtr = inputMax->cast<lite::tensor::TensorPtr>(); | |||||
| float *minBuf = static_cast<float *>(inputMinPtr->Data()); | |||||
| float *maxBuf = static_cast<float *>(inputMaxPtr->Data()); | |||||
| quantParam.min = *minBuf; | |||||
| quantParam.max = *maxBuf; | |||||
| } | |||||
| quant::CalQuantizationParams(&quantParam, quantParam.min, quantParam.max, narrowRangeQuantParam, | |||||
| numbitsRangeQuantParam); | |||||
| quants.emplace_back(quantParam); | |||||
| vecInputQuantParam->emplace_back(quants); | |||||
| quants.clear(); | |||||
| int biasQuantSize = 0; | |||||
| auto filterMin = prim.GetAttr("filter_minq"); | |||||
| auto filterMax = prim.GetAttr("filter_maxq"); | |||||
| if (filterMin != nullptr && filterMax != nullptr) { | |||||
| auto filterMinPtr = filterMin->cast<lite::tensor::TensorPtr>(); | |||||
| auto filterMaxPtr = filterMax->cast<lite::tensor::TensorPtr>(); | |||||
| float *minBuf = static_cast<float *>(filterMinPtr->Data()); | |||||
| float *maxBuf = static_cast<float *>(filterMaxPtr->Data()); | |||||
| biasQuantSize = filterMinPtr->DataSize(); | |||||
| for (int i = 0; i < biasQuantSize; ++i) { | |||||
| quantParam.min = *(minBuf++); | |||||
| quantParam.max = *(maxBuf++); | |||||
| quant::CalQuantizationParams(&quantParam, quantParam.min, quantParam.max, narrowRangeQuantParam, | |||||
| numbitsRangeQuantParam); | |||||
| quants.emplace_back(quantParam); | |||||
| } | |||||
| vecInputQuantParam->emplace_back(quants); | |||||
| } | |||||
| quants.clear(); | |||||
| for (int i = 0; i < biasQuantSize; ++i) { | |||||
| quantParam.min = 0.0; | |||||
| quantParam.max = 0.0; | |||||
| quantParam.zeroPoint = 0; | |||||
| quantParam.scale = vecInputQuantParam->at(0).at(0).scale * vecInputQuantParam->at(1).at(i).scale; | |||||
| quants.emplace_back(quantParam); | |||||
| } | |||||
| vecInputQuantParam->emplace_back(quants); | |||||
| quants.clear(); | |||||
| auto outputMin = prim.GetAttr("output_minq"); | |||||
| auto outputMax = prim.GetAttr("output_maxq"); | |||||
| if (outputMin != nullptr && outputMax != nullptr) { | |||||
| auto outputMinPtr = outputMin->cast<lite::tensor::TensorPtr>(); | |||||
| auto outputMaxPtr = outputMax->cast<lite::tensor::TensorPtr>(); | |||||
| float *minBuf = static_cast<float *>(outputMinPtr->Data()); | |||||
| float *maxBuf = static_cast<float *>(outputMaxPtr->Data()); | |||||
| quantParam.min = *minBuf; | |||||
| quantParam.max = *maxBuf; | |||||
| quant::CalQuantizationParams(&quantParam, quantParam.min, quantParam.max, narrowRangeQuantParam, | |||||
| numbitsRangeQuantParam); | |||||
| quants.emplace_back(quantParam); | |||||
| vecOutputQuantParam->emplace_back(quants); | |||||
| } | |||||
| } | |||||
| int DepthwiseConv2D::UnPackAttr(const Primitive &prim, const std::vector<AnfNodePtr> &inputs) { | int DepthwiseConv2D::UnPackAttr(const Primitive &prim, const std::vector<AnfNodePtr> &inputs) { | ||||
| this->primitive_ = new (schema::PrimitiveT); | this->primitive_ = new (schema::PrimitiveT); | ||||
| auto attr = std::make_unique<schema::DepthwiseConv2DT>(); | auto attr = std::make_unique<schema::DepthwiseConv2DT>(); | ||||
| @@ -197,7 +108,14 @@ int DepthwiseConv2D::UnPackAttr(const Primitive &prim, const std::vector<AnfNode | |||||
| } else { | } else { | ||||
| attr->padMode = schema::PadMode_NOTSET; | attr->padMode = schema::PadMode_NOTSET; | ||||
| } | } | ||||
| if (prim.GetAttr("activation_name") != nullptr) { | |||||
| std::string activate_name = GetValue<std::string>(prim.GetAttr("activation_name")); | |||||
| attr->activationType = kActivationTypeMap[activate_name]; | |||||
| } else { | |||||
| attr->activationType = schema::ActivationType_NO_ACTIVATION; | |||||
| } | |||||
| // attr->padMode = schema::PadMode_SAME; | |||||
| // attr->activationType = schema::ActivationType_RELU; | |||||
| auto channel_multiplier = GetValue<int>(prim.GetAttr("channel_multiplier")); | auto channel_multiplier = GetValue<int>(prim.GetAttr("channel_multiplier")); | ||||
| attr->channelMultiplier = channel_multiplier; | attr->channelMultiplier = channel_multiplier; | ||||
| @@ -51,10 +51,6 @@ class DepthwiseConv2D : public PrimitiveC { | |||||
| void SetHasBias(bool has_bias); | void SetHasBias(bool has_bias); | ||||
| void SetActivationType(int activation_type); | void SetActivationType(int activation_type); | ||||
| private: | |||||
| void PopulaterQuantParam(const Primitive &prim, std::vector<std::vector<schema::QuantParamT>> *vecInputQuantParam, | |||||
| std::vector<std::vector<schema::QuantParamT>> *vecOutputQuantParam); | |||||
| void CalQuantParam(const double &mean, const double &stdDev, float *mMin, float *mMax); | |||||
| #else | #else | ||||
| public: | public: | ||||
| @@ -30,83 +30,6 @@ bool MatMul::GetTransposeB() const { return this->primitive_->value.AsMatMul()-> | |||||
| void MatMul::SetTransposeA(bool transpose_a) { this->primitive_->value.AsMatMul()->transposeA = transpose_a; } | void MatMul::SetTransposeA(bool transpose_a) { this->primitive_->value.AsMatMul()->transposeA = transpose_a; } | ||||
| void MatMul::SetTransposeB(bool transpose_b) { this->primitive_->value.AsMatMul()->transposeB = transpose_b; } | void MatMul::SetTransposeB(bool transpose_b) { this->primitive_->value.AsMatMul()->transposeB = transpose_b; } | ||||
| void MatMul::CalQuantParam(const double &mean, const double &stdDev, float *mMin, float *mMax) { | |||||
| const float qmin = 0; | |||||
| const float qmax = 255; | |||||
| *mMin = static_cast<float>((qmin - mean) / stdDev); | |||||
| *mMax = static_cast<float>((qmax - mean) / stdDev); | |||||
| } | |||||
| void MatMul::PopulaterQuantParam(const Primitive &prim, | |||||
| std::vector<std::vector<schema::QuantParamT>> *vecInputQuantParam, | |||||
| std::vector<std::vector<schema::QuantParamT>> *vecOutputQuantParam) { | |||||
| auto narrow_range = prim.GetAttr("narrow_range"); | |||||
| bool narrowRangeQuantParam = GetValue<bool>(narrow_range); | |||||
| auto num_bits = prim.GetAttr("num_bits"); | |||||
| int32_t numbitsRangeQuantParam = GetValue<int32_t>(num_bits); | |||||
| std::vector<schema::QuantParamT> quants; | |||||
| schema::QuantParamT quantParam; | |||||
| auto mean = prim.GetAttr("mean"); | |||||
| auto std_dev = prim.GetAttr("std_dev"); | |||||
| if (mean != nullptr && std_dev != nullptr) { | |||||
| auto meanQuantOaram = GetValue<double>(mean); | |||||
| double stddevQuantOaram = GetValue<double>(std_dev); | |||||
| float mMin = 0.0; | |||||
| float mMax = 0.0; | |||||
| CalQuantParam(meanQuantOaram, stddevQuantOaram, &mMin, &mMax); | |||||
| quantParam.min = mMin; | |||||
| quantParam.max = mMax; | |||||
| } else { | |||||
| auto inputMin = prim.GetAttr("input_minq"); | |||||
| auto inputMax = prim.GetAttr("input_maxq"); | |||||
| auto inputMinPtr = inputMin->cast<lite::tensor::TensorPtr>(); | |||||
| auto inputMaxPtr = inputMax->cast<lite::tensor::TensorPtr>(); | |||||
| float *minBuf = static_cast<float *>(inputMinPtr->Data()); | |||||
| float *maxBuf = static_cast<float *>(inputMaxPtr->Data()); | |||||
| quantParam.min = *minBuf; | |||||
| quantParam.max = *maxBuf; | |||||
| } | |||||
| quant::CalQuantizationParams(&quantParam, quantParam.min, quantParam.max, narrowRangeQuantParam, | |||||
| numbitsRangeQuantParam); | |||||
| quants.emplace_back(quantParam); | |||||
| vecInputQuantParam->emplace_back(quants); | |||||
| quants.clear(); | |||||
| auto filterMin = prim.GetAttr("filter_minq"); | |||||
| auto filterMax = prim.GetAttr("filter_maxq"); | |||||
| if (filterMin != nullptr && filterMax != nullptr) { | |||||
| auto filterMinPtr = filterMin->cast<lite::tensor::TensorPtr>(); | |||||
| auto filterMaxPtr = filterMax->cast<lite::tensor::TensorPtr>(); | |||||
| float *minBuf = static_cast<float *>(filterMinPtr->Data()); | |||||
| float *maxBuf = static_cast<float *>(filterMaxPtr->Data()); | |||||
| for (int i = 0; i < filterMinPtr->DataSize(); ++i) { | |||||
| quantParam.min = *(minBuf++); | |||||
| quantParam.max = *(maxBuf++); | |||||
| quant::CalQuantizationParams(&quantParam, quantParam.min, quantParam.max, narrowRangeQuantParam, | |||||
| numbitsRangeQuantParam); | |||||
| quants.emplace_back(quantParam); | |||||
| } | |||||
| vecInputQuantParam->emplace_back(quants); | |||||
| } | |||||
| quants.clear(); | |||||
| auto outputMin = prim.GetAttr("output_minq"); | |||||
| auto outputMax = prim.GetAttr("output_maxq"); | |||||
| if (outputMin != nullptr && outputMax != nullptr) { | |||||
| auto outputMinPtr = outputMin->cast<lite::tensor::TensorPtr>(); | |||||
| auto outputMaxPtr = outputMax->cast<lite::tensor::TensorPtr>(); | |||||
| float *minBuf = static_cast<float *>(outputMinPtr->Data()); | |||||
| float *maxBuf = static_cast<float *>(outputMaxPtr->Data()); | |||||
| quantParam.min = *minBuf; | |||||
| quantParam.max = *maxBuf; | |||||
| quant::CalQuantizationParams(&quantParam, quantParam.min, quantParam.max, narrowRangeQuantParam, | |||||
| numbitsRangeQuantParam); | |||||
| quants.emplace_back(quantParam); | |||||
| vecOutputQuantParam->emplace_back(quants); | |||||
| } | |||||
| } | |||||
| int MatMul::UnPackAttr(const Primitive &prim, const std::vector<AnfNodePtr> &inputs) { | int MatMul::UnPackAttr(const Primitive &prim, const std::vector<AnfNodePtr> &inputs) { | ||||
| if (this->primitive_ == nullptr) { | if (this->primitive_ == nullptr) { | ||||
| this->primitive_ = new (std::nothrow) schema::PrimitiveT; | this->primitive_ = new (std::nothrow) schema::PrimitiveT; | ||||
| @@ -36,10 +36,6 @@ class MatMul : public PrimitiveC { | |||||
| void SetTransposeA(bool transpose_a); | void SetTransposeA(bool transpose_a); | ||||
| void SetTransposeB(bool transpose_b); | void SetTransposeB(bool transpose_b); | ||||
| private: | |||||
| void PopulaterQuantParam(const Primitive &prim, std::vector<std::vector<schema::QuantParamT>> *vecInputQuantParam, | |||||
| std::vector<std::vector<schema::QuantParamT>> *vecOutputQuantParam); | |||||
| void CalQuantParam(const double &mean, const double &stdDev, float *mMin, float *mMax); | |||||
| #else | #else | ||||
| public: | public: | ||||
| @@ -16,6 +16,7 @@ | |||||
| #include "src/ops/primitive_c.h" | #include "src/ops/primitive_c.h" | ||||
| #include <memory> | #include <memory> | ||||
| #include <map> | |||||
| #include "src/ops/space_to_batch.h" | #include "src/ops/space_to_batch.h" | ||||
| #include "src/ops/space_to_batch_nd.h" | #include "src/ops/space_to_batch_nd.h" | ||||
| #include "src/ops/conv2d.h" | #include "src/ops/conv2d.h" | ||||
| @@ -121,10 +122,99 @@ | |||||
| #include "src/ops/l2_norm.h" | #include "src/ops/l2_norm.h" | ||||
| #include "src/ops/sparse_to_dense.h" | #include "src/ops/sparse_to_dense.h" | ||||
| #include "src/ops/detection_post_process.h" | #include "src/ops/detection_post_process.h" | ||||
| #ifdef PRIMITIVE_WRITEABLE | |||||
| #include "tools/converter/quantizer/quantize_util.h" | |||||
| #endif | |||||
| namespace mindspore { | namespace mindspore { | ||||
| namespace lite { | namespace lite { | ||||
| #ifdef PRIMITIVE_WRITEABLE | #ifdef PRIMITIVE_WRITEABLE | ||||
| void PrimitiveC::CalQuantParam(const double &mean, const double &stdDev, float *mMin, float *mMax) { | |||||
| const float qmin = 0; | |||||
| const float qmax = 255; | |||||
| *mMin = static_cast<float>((qmin - mean) / stdDev); | |||||
| *mMax = static_cast<float>((qmax - mean) / stdDev); | |||||
| } | |||||
| void PrimitiveC::PopulaterQuantParam(const Primitive &prim, | |||||
| std::vector<std::vector<schema::QuantParamT>> *vecInputQuantParam, | |||||
| std::vector<std::vector<schema::QuantParamT>> *vecOutputQuantParam) { | |||||
| auto narrow_range = prim.GetAttr("narrow_range"); | |||||
| bool narrowRangeQuantParam = GetValue<bool>(narrow_range); | |||||
| auto num_bits = prim.GetAttr("num_bits"); | |||||
| int32_t numbitsRangeQuantParam = GetValue<int32_t>(num_bits); | |||||
| std::vector<schema::QuantParamT> quants; | |||||
| schema::QuantParamT quantParam; | |||||
| auto mean = prim.GetAttr("mean"); | |||||
| auto std_dev = prim.GetAttr("std_dev"); | |||||
| if (mean != nullptr && std_dev != nullptr) { | |||||
| auto meanQuantOaram = GetValue<double>(mean); | |||||
| double stddevQuantOaram = GetValue<double>(std_dev); | |||||
| float mMin = 0.0; | |||||
| float mMax = 0.0; | |||||
| CalQuantParam(meanQuantOaram, stddevQuantOaram, &mMin, &mMax); | |||||
| quantParam.min = mMin; | |||||
| quantParam.max = mMax; | |||||
| } else { | |||||
| auto inputMin = prim.GetAttr("input_minq"); | |||||
| auto inputMax = prim.GetAttr("input_maxq"); | |||||
| auto inputMinPtr = inputMin->cast<lite::tensor::TensorPtr>(); | |||||
| auto inputMaxPtr = inputMax->cast<lite::tensor::TensorPtr>(); | |||||
| float *minBuf = static_cast<float *>(inputMinPtr->Data()); | |||||
| float *maxBuf = static_cast<float *>(inputMaxPtr->Data()); | |||||
| quantParam.min = *minBuf; | |||||
| quantParam.max = *maxBuf; | |||||
| } | |||||
| quant::CalQuantizationParams(&quantParam, quantParam.min, quantParam.max, narrowRangeQuantParam, | |||||
| numbitsRangeQuantParam); | |||||
| quants.emplace_back(quantParam); | |||||
| vecInputQuantParam->emplace_back(quants); | |||||
| quants.clear(); | |||||
| auto filterMin = prim.GetAttr("filter_minq"); | |||||
| auto filterMax = prim.GetAttr("filter_maxq"); | |||||
| if (filterMin != nullptr && filterMax != nullptr) { | |||||
| auto filterMinPtr = filterMin->cast<lite::tensor::TensorPtr>(); | |||||
| auto filterMaxPtr = filterMax->cast<lite::tensor::TensorPtr>(); | |||||
| float *minBuf = static_cast<float *>(filterMinPtr->Data()); | |||||
| float *maxBuf = static_cast<float *>(filterMaxPtr->Data()); | |||||
| quantParam.min = FLT_MAX; | |||||
| quantParam.max = FLT_MIN; | |||||
| for (int i = 0; i < filterMinPtr->DataSize(); ++i) { | |||||
| quantParam.min = (*(minBuf) < quantParam.min) ? (*minBuf) : quantParam.min; | |||||
| quantParam.max = (*(maxBuf) > quantParam.max) ? (*maxBuf) : quantParam.max; | |||||
| minBuf++; | |||||
| maxBuf++; | |||||
| } | |||||
| quant::CalQuantizationParams(&quantParam, quantParam.min, quantParam.max, true, numbitsRangeQuantParam); | |||||
| quants.emplace_back(quantParam); | |||||
| vecInputQuantParam->emplace_back(quants); | |||||
| } | |||||
| quants.clear(); | |||||
| quantParam.min = 0.0; | |||||
| quantParam.max = 0.0; | |||||
| quantParam.zeroPoint = 0; | |||||
| quantParam.scale = vecInputQuantParam->at(0).at(0).scale * vecInputQuantParam->at(1).at(0).scale; | |||||
| quants.emplace_back(quantParam); | |||||
| vecInputQuantParam->emplace_back(quants); | |||||
| quants.clear(); | |||||
| auto outputMin = prim.GetAttr("output_minq"); | |||||
| auto outputMax = prim.GetAttr("output_maxq"); | |||||
| if (outputMin != nullptr && outputMax != nullptr) { | |||||
| auto outputMinPtr = outputMin->cast<lite::tensor::TensorPtr>(); | |||||
| auto outputMaxPtr = outputMax->cast<lite::tensor::TensorPtr>(); | |||||
| float *minBuf = static_cast<float *>(outputMinPtr->Data()); | |||||
| float *maxBuf = static_cast<float *>(outputMaxPtr->Data()); | |||||
| quantParam.min = *minBuf; | |||||
| quantParam.max = *maxBuf; | |||||
| quant::CalQuantizationParams(&quantParam, quantParam.min, quantParam.max, narrowRangeQuantParam, | |||||
| numbitsRangeQuantParam); | |||||
| quants.emplace_back(quantParam); | |||||
| vecOutputQuantParam->emplace_back(quants); | |||||
| } | |||||
| } | |||||
| schema::PrimitiveT *PrimitiveC::GetPrimitiveT() const { return this->primitive_; } | schema::PrimitiveT *PrimitiveC::GetPrimitiveT() const { return this->primitive_; } | ||||
| void PrimitiveC::ClearPrimitiveT() { this->primitive_ = nullptr; } | void PrimitiveC::ClearPrimitiveT() { this->primitive_ = nullptr; } | ||||
| @@ -152,7 +242,7 @@ void PrimitiveC::AddOutputQuantParam(std::vector<schema::QuantParamT> quant_para | |||||
| } | } | ||||
| std::vector<std::vector<schema::QuantParamT>> PrimitiveC::GetOutputQuantParams() const { return output_quant_param_; } | std::vector<std::vector<schema::QuantParamT>> PrimitiveC::GetOutputQuantParams() const { return output_quant_param_; } | ||||
| void PrimitiveC::SetQuantType(schema::QuantType quant_type) { this->quant_type_ = quant_type; } | |||||
| void PrimitiveC::SetQuantType(const schema::QuantType &quant_type) { this->quant_type_ = quant_type; } | |||||
| schema::QuantType PrimitiveC::GetQuantType() const { return quant_type_; } | schema::QuantType PrimitiveC::GetQuantType() const { return quant_type_; } | ||||
| @@ -205,12 +295,14 @@ std::shared_ptr<PrimitiveC> GetTupleGetItemPrim() { | |||||
| } | } | ||||
| template <typename T, typename = std::enable_if<std::is_base_of<PrimitiveC, T>::value>> | template <typename T, typename = std::enable_if<std::is_base_of<PrimitiveC, T>::value>> | ||||
| std::shared_ptr<PrimitiveC> NewPrimitiveC(const Primitive &prim, const std::vector<AnfNodePtr> &inputs) { | |||||
| std::shared_ptr<PrimitiveC> NewPrimitiveC(const Primitive &prim, const std::vector<AnfNodePtr> &inputs, | |||||
| const schema::QuantType &quantType) { | |||||
| auto primc = std::make_shared<T>(); | auto primc = std::make_shared<T>(); | ||||
| if (primc == nullptr) { | if (primc == nullptr) { | ||||
| MS_LOG(ERROR) << "make_shared PrimitiveC failed"; | MS_LOG(ERROR) << "make_shared PrimitiveC failed"; | ||||
| return nullptr; | return nullptr; | ||||
| } | } | ||||
| primc->SetQuantType(quantType); | |||||
| auto ret = primc->UnPackAttr(prim, inputs); | auto ret = primc->UnPackAttr(prim, inputs); | ||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "UnPackAttr failed"; | MS_LOG(ERROR) << "UnPackAttr failed"; | ||||
| @@ -220,46 +312,47 @@ std::shared_ptr<PrimitiveC> NewPrimitiveC(const Primitive &prim, const std::vect | |||||
| } | } | ||||
| std::shared_ptr<PrimitiveC> PrimitiveC::UnPackFromPrimitive(const Primitive &prim, | std::shared_ptr<PrimitiveC> PrimitiveC::UnPackFromPrimitive(const Primitive &prim, | ||||
| const std::vector<AnfNodePtr> &inputs) { | |||||
| const std::vector<AnfNodePtr> &inputs, | |||||
| const schema::QuantType &quantType) { | |||||
| const auto &op_type = prim.name(); | const auto &op_type = prim.name(); | ||||
| if (op_type == "ReLU" || op_type == "ReLU6" || op_type == "Sigmoid") { | if (op_type == "ReLU" || op_type == "ReLU6" || op_type == "Sigmoid") { | ||||
| return NewPrimitiveC<Activation>(prim, inputs); | |||||
| return NewPrimitiveC<Activation>(prim, inputs, quantType); | |||||
| } else if (op_type == "BatchNorm") { | } else if (op_type == "BatchNorm") { | ||||
| return NewPrimitiveC<BatchNorm>(prim, inputs); | |||||
| return NewPrimitiveC<BatchNorm>(prim, inputs, quantType); | |||||
| } else if (op_type == "BiasAdd") { | } else if (op_type == "BiasAdd") { | ||||
| return NewPrimitiveC<BiasAdd>(prim, inputs); | |||||
| return NewPrimitiveC<BiasAdd>(prim, inputs, quantType); | |||||
| } else if (op_type == "Concat") { | } else if (op_type == "Concat") { | ||||
| return NewPrimitiveC<Concat>(prim, inputs); | |||||
| return NewPrimitiveC<Concat>(prim, inputs, quantType); | |||||
| } else if (op_type == "Conv2D") { | } else if (op_type == "Conv2D") { | ||||
| return NewPrimitiveC<Conv2D>(prim, inputs); | |||||
| return NewPrimitiveC<Conv2D>(prim, inputs, quantType); | |||||
| } else if (op_type == "DepthwiseConv2dNative" || op_type == "DepthwiseConv2D") { | } else if (op_type == "DepthwiseConv2dNative" || op_type == "DepthwiseConv2D") { | ||||
| return NewPrimitiveC<DepthwiseConv2D>(prim, inputs); | |||||
| return NewPrimitiveC<DepthwiseConv2D>(prim, inputs, quantType); | |||||
| } else if (op_type == "Dequant") { | } else if (op_type == "Dequant") { | ||||
| return NewPrimitiveC<Dequant>(prim, inputs); | |||||
| return NewPrimitiveC<Dequant>(prim, inputs, quantType); | |||||
| } else if (op_type == "Flatten") { | } else if (op_type == "Flatten") { | ||||
| return NewPrimitiveC<Flatten>(prim, inputs); | |||||
| return NewPrimitiveC<Flatten>(prim, inputs, quantType); | |||||
| } else if (op_type == "make_tuple") { | } else if (op_type == "make_tuple") { | ||||
| return NewPrimitiveC<MakeTuple>(prim, inputs); | |||||
| return NewPrimitiveC<MakeTuple>(prim, inputs, quantType); | |||||
| } else if (op_type == "MatMul") { | } else if (op_type == "MatMul") { | ||||
| return NewPrimitiveC<MatMul>(prim, inputs); | |||||
| return NewPrimitiveC<MatMul>(prim, inputs, quantType); | |||||
| } else if (op_type == "Mul") { | } else if (op_type == "Mul") { | ||||
| return NewPrimitiveC<Mul>(prim, inputs); | |||||
| return NewPrimitiveC<Mul>(prim, inputs, quantType); | |||||
| } else if (op_type == "MaxPool") { | } else if (op_type == "MaxPool") { | ||||
| return NewPrimitiveC<Pooling>(prim, inputs); | |||||
| return NewPrimitiveC<Pooling>(prim, inputs, quantType); | |||||
| } else if (op_type == "Quant") { | } else if (op_type == "Quant") { | ||||
| return NewPrimitiveC<Quant>(prim, inputs); | |||||
| return NewPrimitiveC<Quant>(prim, inputs, quantType); | |||||
| } else if (op_type == "ReduceMean") { | } else if (op_type == "ReduceMean") { | ||||
| return NewPrimitiveC<Reduce>(prim, inputs); | |||||
| return NewPrimitiveC<Reduce>(prim, inputs, quantType); | |||||
| } else if (op_type == "Reshape") { | } else if (op_type == "Reshape") { | ||||
| return NewPrimitiveC<Reshape>(prim, inputs); | |||||
| return NewPrimitiveC<Reshape>(prim, inputs, quantType); | |||||
| } else if (op_type == "TensorAdd") { | } else if (op_type == "TensorAdd") { | ||||
| return NewPrimitiveC<Add>(prim, inputs); | |||||
| return NewPrimitiveC<Add>(prim, inputs, quantType); | |||||
| } else if (op_type == "Transpose") { | } else if (op_type == "Transpose") { | ||||
| return NewPrimitiveC<Transpose>(prim, inputs); | |||||
| return NewPrimitiveC<Transpose>(prim, inputs, quantType); | |||||
| } else if (op_type == "tuple_getitem") { | } else if (op_type == "tuple_getitem") { | ||||
| return NewPrimitiveC<TupleGetItem>(prim, inputs); | |||||
| return NewPrimitiveC<TupleGetItem>(prim, inputs, quantType); | |||||
| } else if (op_type == "Softmax") { | } else if (op_type == "Softmax") { | ||||
| return NewPrimitiveC<SoftMax>(prim, inputs); | |||||
| return NewPrimitiveC<SoftMax>(prim, inputs, quantType); | |||||
| } else { | } else { | ||||
| MS_LOG(ERROR) << "Unsupported primitive type in UnPackFromPrimitive : " << op_type; | MS_LOG(ERROR) << "Unsupported primitive type in UnPackFromPrimitive : " << op_type; | ||||
| return nullptr; | return nullptr; | ||||
| @@ -20,6 +20,7 @@ | |||||
| #include <set> | #include <set> | ||||
| #include <vector> | #include <vector> | ||||
| #include <memory> | #include <memory> | ||||
| #include <map> | |||||
| #ifdef PRIMITIVE_WRITEABLE | #ifdef PRIMITIVE_WRITEABLE | ||||
| #include "ir/primitive.h" | #include "ir/primitive.h" | ||||
| #include "schema/inner/model_generated.h" | #include "schema/inner/model_generated.h" | ||||
| @@ -44,6 +45,9 @@ const std::set<int> kSupportDataType = {kNumberTypeUInt8, kNumberTypeInt32, kNum | |||||
| constexpr int kAnfPopulaterOne = 1; | constexpr int kAnfPopulaterOne = 1; | ||||
| constexpr int kAnfPopulaterTwo = 2; | constexpr int kAnfPopulaterTwo = 2; | ||||
| constexpr int kAnfPopulaterThree = 3; | constexpr int kAnfPopulaterThree = 3; | ||||
| static std::map<std::string, schema::ActivationType> kActivationTypeMap{{"ReLU", schema::ActivationType_RELU}, | |||||
| {"ReLU6", schema::ActivationType_RELU6}, | |||||
| {"Sigmoid", schema::ActivationType_SIGMOID}}; | |||||
| class PrimitiveC : public mindspore::Primitive { | class PrimitiveC : public mindspore::Primitive { | ||||
| public: | public: | ||||
| // Argument primitive is deliverd into PrimitiveC and will be deleted in ~PrimitiveC(). | // Argument primitive is deliverd into PrimitiveC and will be deleted in ~PrimitiveC(). | ||||
| @@ -94,7 +98,7 @@ class PrimitiveC : public mindspore::Primitive { | |||||
| std::vector<std::vector<schema::QuantParamT>> GetOutputQuantParams() const; | std::vector<std::vector<schema::QuantParamT>> GetOutputQuantParams() const; | ||||
| void SetQuantType(schema::QuantType quant_type); | |||||
| void SetQuantType(const schema::QuantType &quant_type); | |||||
| schema::QuantType GetQuantType() const; | schema::QuantType GetQuantType() const; | ||||
| @@ -110,7 +114,11 @@ class PrimitiveC : public mindspore::Primitive { | |||||
| static PrimitiveC *UnPackFromSchemaPrimitiveT(mindspore::schema::PrimitiveT *primitive); | static PrimitiveC *UnPackFromSchemaPrimitiveT(mindspore::schema::PrimitiveT *primitive); | ||||
| static std::shared_ptr<PrimitiveC> UnPackFromPrimitive(const Primitive &prim, const std::vector<AnfNodePtr> &inputs); | |||||
| static std::shared_ptr<PrimitiveC> UnPackFromPrimitive(const Primitive &prim, const std::vector<AnfNodePtr> &inputs, | |||||
| const schema::QuantType &quantType); | |||||
| void PopulaterQuantParam(const Primitive &prim, std::vector<std::vector<schema::QuantParamT>> *vecInputQuantParam, | |||||
| std::vector<std::vector<schema::QuantParamT>> *vecOutputQuantParam); | |||||
| void CalQuantParam(const double &mean, const double &stdDev, float *mMin, float *mMax); | |||||
| protected: | protected: | ||||
| virtual int UnPackAttr(const Primitive &prim, const std::vector<AnfNodePtr> &inputs) { return RET_ERROR; } | virtual int UnPackAttr(const Primitive &prim, const std::vector<AnfNodePtr> &inputs) { return RET_ERROR; } | ||||
| @@ -71,8 +71,8 @@ int AnfExporter::ConvertQuantParam(const std::unique_ptr<schema::MetaGraphT> &me | |||||
| auto input_quant_params = primitive->GetInputQuantParams(); | auto input_quant_params = primitive->GetInputQuantParams(); | ||||
| auto node_type = (schema::PrimitiveType)primitive->Type(); | auto node_type = (schema::PrimitiveType)primitive->Type(); | ||||
| if (input_quant_params.empty()) { | if (input_quant_params.empty()) { | ||||
| MS_LOG(ERROR) << "node: " << dst_node->name << " input quant params is empty"; | |||||
| return RET_ERROR; | |||||
| MS_LOG(WARNING) << "node: " << dst_node->name << " input quant params is empty"; | |||||
| return RET_OK; | |||||
| } | } | ||||
| for (size_t i = 0; i < input_quant_params.size(); i++) { | for (size_t i = 0; i < input_quant_params.size(); i++) { | ||||
| if (i >= dst_node->inputIndex.size()) { | if (i >= dst_node->inputIndex.size()) { | ||||
| @@ -473,7 +473,7 @@ CNodePtr AnfImporterFromProtobuf::BuildCNodeForFuncGraph(const FuncGraphPtr &out | |||||
| } | } | ||||
| inputs.push_back(anfnode_build_map_[input_name]); | inputs.push_back(anfnode_build_map_[input_name]); | ||||
| } | } | ||||
| auto primitivec_ptr = PrimitiveC::UnPackFromPrimitive(*prim, inputs); | |||||
| auto primitivec_ptr = PrimitiveC::UnPackFromPrimitive(*prim, inputs, quantType); | |||||
| if (primitivec_ptr == nullptr) { | if (primitivec_ptr == nullptr) { | ||||
| MS_LOG(ERROR) << "Create PrimitiveC return nullptr, " << prim->name(); | MS_LOG(ERROR) << "Create PrimitiveC return nullptr, " << prim->name(); | ||||
| return nullptr; | return nullptr; | ||||
| @@ -50,7 +50,8 @@ static const std::vector<schema::PrimitiveType> int8OpList = { | |||||
| schema::PrimitiveType_Mul, schema::PrimitiveType_Slice, | schema::PrimitiveType_Mul, schema::PrimitiveType_Slice, | ||||
| schema::PrimitiveType_SoftMax, schema::PrimitiveType_Split, | schema::PrimitiveType_SoftMax, schema::PrimitiveType_Split, | ||||
| schema::PrimitiveType_Squeeze, schema::PrimitiveType_Sub, | schema::PrimitiveType_Squeeze, schema::PrimitiveType_Sub, | ||||
| schema::PrimitiveType_TopK, schema::PrimitiveType_Unsqueeze}; | |||||
| schema::PrimitiveType_TopK, schema::PrimitiveType_Unsqueeze, | |||||
| schema::PrimitiveType_MatMul}; | |||||
| static const std::vector<schema::PrimitiveType> needInsertOpList = { | static const std::vector<schema::PrimitiveType> needInsertOpList = { | ||||
| schema::PrimitiveType_Eltwise, schema::PrimitiveType_Activation, | schema::PrimitiveType_Eltwise, schema::PrimitiveType_Activation, | ||||
| @@ -155,11 +155,11 @@ STATUS WeightFormatHardCodePass::HardCodeMS(const std::unique_ptr<CNodeT> &node, | |||||
| switch (this->quantType) { | switch (this->quantType) { | ||||
| case QuantType_AwareTraining: { | case QuantType_AwareTraining: { | ||||
| if (opType == schema::PrimitiveType_Conv2D) { | if (opType == schema::PrimitiveType_Conv2D) { | ||||
| weightTensor->format = schema::Format_HWCK; | |||||
| weightTensor->format = schema::Format_KCHW; | |||||
| } else if (opType == PrimitiveType_DepthwiseConv2D) { | } else if (opType == PrimitiveType_DepthwiseConv2D) { | ||||
| weightTensor->format = Format_CKHW; | weightTensor->format = Format_CKHW; | ||||
| } else { | } else { | ||||
| weightTensor->format = schema::Format_HWKC; | |||||
| weightTensor->format = schema::Format_KCHW; | |||||
| } | } | ||||
| } break; | } break; | ||||
| case QuantType_QUANT_NONE: { | case QuantType_QUANT_NONE: { | ||||
| @@ -36,15 +36,13 @@ using std::vector; | |||||
| namespace mindspore::lite::quant { | namespace mindspore::lite::quant { | ||||
| const std::array<schema::PrimitiveType, 7> AwareQuantizer::propagatedOps = { | const std::array<schema::PrimitiveType, 7> AwareQuantizer::propagatedOps = { | ||||
| {schema::PrimitiveType_Concat, schema::PrimitiveType_Resize, | |||||
| schema::PrimitiveType_Reshape, schema::PrimitiveType_Squeeze, | |||||
| schema::PrimitiveType_RealDiv, schema::PrimitiveType_Activation, | |||||
| schema::PrimitiveType_DetectionPostProcess}}; | |||||
| {schema::PrimitiveType_Concat, schema::PrimitiveType_Resize, schema::PrimitiveType_Reshape, | |||||
| schema::PrimitiveType_Squeeze, schema::PrimitiveType_RealDiv, schema::PrimitiveType_Activation, | |||||
| schema::PrimitiveType_DetectionPostProcess}}; | |||||
| STATUS InputArray::InitQuantParam() { | STATUS InputArray::InitQuantParam() { | ||||
| this->quantParam = std::make_unique<schema::QuantParamT>(); | this->quantParam = std::make_unique<schema::QuantParamT>(); | ||||
| auto status = CalQuantizationParams(quantParam.get(), mMin, mMax, | |||||
| narrowRange, numBits); | |||||
| auto status = CalQuantizationParams(quantParam.get(), mMin, mMax, narrowRange, numBits); | |||||
| if (status != RET_OK) { | if (status != RET_OK) { | ||||
| return status; | return status; | ||||
| } | } | ||||
| @@ -58,8 +56,7 @@ STATUS InputArray::SetInputArrayQP(schema::MetaGraphT *graph, size_t inputTensor | |||||
| if (!tensor->quantParams.empty()) { | if (!tensor->quantParams.empty()) { | ||||
| auto param = GetTensorQuantParam(tensor); | auto param = GetTensorQuantParam(tensor); | ||||
| if (param != nullptr && param->inited) { | if (param != nullptr && param->inited) { | ||||
| MS_LOG(DEBUG) << "tensor " << inputTensorIdx | |||||
| << " already has quantParam"; | |||||
| MS_LOG(DEBUG) << "tensor " << inputTensorIdx << " already has quantParam"; | |||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| tensor->quantParams.clear(); | tensor->quantParams.clear(); | ||||
| @@ -74,9 +71,7 @@ STATUS InputArray::SetInputArrayQP(schema::MetaGraphT *graph, size_t inputTensor | |||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| AwareQuantizer::AwareQuantizer(schema::MetaGraphT *graph, | |||||
| const string &inputInferType, | |||||
| const string &stdValues, | |||||
| AwareQuantizer::AwareQuantizer(schema::MetaGraphT *graph, const string &inputInferType, const string &stdValues, | |||||
| const string &meanValues) | const string &meanValues) | ||||
| : FbQuantizer(graph) { | : FbQuantizer(graph) { | ||||
| MS_ASSERT(graph != nullptr); | MS_ASSERT(graph != nullptr); | ||||
| @@ -94,12 +89,9 @@ AwareQuantizer::AwareQuantizer(schema::MetaGraphT *graph, | |||||
| mInputArray->InitQuantParam(); | mInputArray->InitQuantParam(); | ||||
| } | } | ||||
| STATUS AwareQuantizer::RemoveFakeQuant() { | |||||
| return RET_OK; | |||||
| } | |||||
| STATUS AwareQuantizer::RemoveFakeQuant() { return RET_OK; } | |||||
| STATUS AwareQuantizer::GenerateDefaultQuantParam( | |||||
| const schema::MetaGraphT *subGraph) { | |||||
| STATUS AwareQuantizer::GenerateDefaultQuantParam(const schema::MetaGraphT *subGraph) { | |||||
| MS_ASSERT(subGraph != nullptr); | MS_ASSERT(subGraph != nullptr); | ||||
| for (const auto &tensor : subGraph->allTensors) { | for (const auto &tensor : subGraph->allTensors) { | ||||
| if (!tensor->quantParams.empty()) { | if (!tensor->quantParams.empty()) { | ||||
| @@ -111,8 +103,7 @@ STATUS AwareQuantizer::GenerateDefaultQuantParam( | |||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| STATUS AwareQuantizer::SetAttrToConvolution(const schema::MetaGraphT *subGraph, | |||||
| schema::CNodeT *node) { | |||||
| STATUS AwareQuantizer::SetAttrToConvolution(const schema::MetaGraphT *subGraph, schema::CNodeT *node) { | |||||
| // MS_ASSERT(subGraph != nullptr); | // MS_ASSERT(subGraph != nullptr); | ||||
| // MS_ASSERT(node != nullptr); | // MS_ASSERT(node != nullptr); | ||||
| // auto inputIndexes = node->inputIndex; | // auto inputIndexes = node->inputIndex; | ||||
| @@ -193,19 +184,15 @@ STATUS AwareQuantizer::GenerateQuantParam() { | |||||
| GetCNodeTType(*node) == schema::PrimitiveType_FakeQuantWithMinMaxVars) { | GetCNodeTType(*node) == schema::PrimitiveType_FakeQuantWithMinMaxVars) { | ||||
| MS_ASSERT(false); | MS_ASSERT(false); | ||||
| } | } | ||||
| auto *quantParamCalcer = | |||||
| quantParamRegister->GetQuantParamCalcer(GetCNodeTType(*node)); | |||||
| auto *quantParamCalcer = quantParamRegister->GetQuantParamCalcer(GetCNodeTType(*node)); | |||||
| if (quantParamCalcer == nullptr) { | if (quantParamCalcer == nullptr) { | ||||
| MS_LOG(ERROR) << "Can not find QuantParamCalcer for " | |||||
| << node->name.c_str() | |||||
| << ", type: " << GetCNodeTTypeName(*node).c_str() | |||||
| << " set node to QuantNone and skip"; | |||||
| MS_LOG(ERROR) << "Can not find QuantParamCalcer for " << node->name.c_str() | |||||
| << ", type: " << GetCNodeTTypeName(*node).c_str() << " set node to QuantNone and skip"; | |||||
| node->quantType = static_cast<schema::QuantType>(QuantType_QUANT_NONE); | node->quantType = static_cast<schema::QuantType>(QuantType_QUANT_NONE); | ||||
| } else { | } else { | ||||
| status = quantParamCalcer->Calc(graph, *node); | status = quantParamCalcer->Calc(graph, *node); | ||||
| if (status != RET_OK) { | if (status != RET_OK) { | ||||
| MS_LOG(ERROR) << "quantParamCalcer failed: " << status | |||||
| << " node: " << node->name.c_str(); | |||||
| MS_LOG(ERROR) << "quantParamCalcer failed: " << status << " node: " << node->name.c_str(); | |||||
| node->quantType = schema::QuantType_QUANT_NONE; | node->quantType = schema::QuantType_QUANT_NONE; | ||||
| } else { | } else { | ||||
| node->quantType = schema::QuantType_AwareTraining; | node->quantType = schema::QuantType_AwareTraining; | ||||
| @@ -227,11 +214,11 @@ STATUS AwareQuantizer::DoQuantize() { | |||||
| STATUS status; | STATUS status; | ||||
| if (GetCNodeTType(*node) == schema::PrimitiveType_Conv2D || | if (GetCNodeTType(*node) == schema::PrimitiveType_Conv2D || | ||||
| GetCNodeTType(*node) == schema::PrimitiveType_DepthwiseConv2D || | GetCNodeTType(*node) == schema::PrimitiveType_DepthwiseConv2D || | ||||
| GetCNodeTType(*node) == schema::PrimitiveType_FullConnection) { | |||||
| GetCNodeTType(*node) == schema::PrimitiveType_FullConnection || | |||||
| GetCNodeTType(*node) == schema::PrimitiveType_MatMul) { | |||||
| auto inputIndexes = node->inputIndex; | auto inputIndexes = node->inputIndex; | ||||
| if (inputIndexes.size() < 2) { | if (inputIndexes.size() < 2) { | ||||
| MS_LOG(ERROR) << node->name.c_str() | |||||
| << " node input has invalid inputs tensor count"; | |||||
| MS_LOG(ERROR) << node->name.c_str() << " node input has invalid inputs tensor count"; | |||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| // quant weight | // quant weight | ||||
| @@ -248,8 +235,7 @@ STATUS AwareQuantizer::DoQuantize() { | |||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| } | } | ||||
| } else if (GetCNodeTType(*node) == | |||||
| schema::PrimitiveType_DetectionPostProcess) { | |||||
| } else if (GetCNodeTType(*node) == schema::PrimitiveType_DetectionPostProcess) { | |||||
| status = QuantDetectionPostProcessConstTensor(graph, node.get()); | status = QuantDetectionPostProcessConstTensor(graph, node.get()); | ||||
| if (status != RET_OK) { | if (status != RET_OK) { | ||||
| MS_LOG(ERROR) << "QuantDetectionPostProcessConstTensor failed!"; | MS_LOG(ERROR) << "QuantDetectionPostProcessConstTensor failed!"; | ||||
| @@ -275,8 +261,7 @@ STATUS AwareQuantizer::DoQuantize() { | |||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| STATUS AwareQuantizer::QuantAddConstTensor(const schema::MetaGraphT *graph, | |||||
| schema::CNodeT *node) { | |||||
| STATUS AwareQuantizer::QuantAddConstTensor(const schema::MetaGraphT *graph, schema::CNodeT *node) { | |||||
| MS_ASSERT(graph != nullptr); | MS_ASSERT(graph != nullptr); | ||||
| MS_ASSERT(node != nullptr); | MS_ASSERT(node != nullptr); | ||||
| for (size_t i = 0; i < node->inputIndex.size(); i++) { | for (size_t i = 0; i < node->inputIndex.size(); i++) { | ||||
| @@ -295,8 +280,7 @@ STATUS AwareQuantizer::QuantAddConstTensor(const schema::MetaGraphT *graph, | |||||
| void *inData = inTensor->data.data(); | void *inData = inTensor->data.data(); | ||||
| auto *castedInData = static_cast<float *>(inData); | auto *castedInData = static_cast<float *>(inData); | ||||
| for (size_t j = 0; j < constTensorShapeSize; j++) { | for (size_t j = 0; j < constTensorShapeSize; j++) { | ||||
| qDatas[j] = | |||||
| QuantizeData<uint8_t>(castedInData[j], quantParam.get()); | |||||
| qDatas[j] = QuantizeData<uint8_t>(castedInData[j], quantParam.get()); | |||||
| } | } | ||||
| inTensor->data = std::move(qDatas); | inTensor->data = std::move(qDatas); | ||||
| inTensor->dataType = kNumberTypeUInt8; | inTensor->dataType = kNumberTypeUInt8; | ||||
| @@ -312,17 +296,14 @@ STATUS AwareQuantizer::QuantAddConstTensor(const schema::MetaGraphT *graph, | |||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| STATUS AwareQuantizer::QuantDetectionPostProcessConstTensor( | |||||
| const schema::MetaGraphT *subGraph, schema::CNodeT *node) { | |||||
| STATUS AwareQuantizer::QuantDetectionPostProcessConstTensor(const schema::MetaGraphT *subGraph, schema::CNodeT *node) { | |||||
| MS_ASSERT(subGraph != nullptr); | MS_ASSERT(subGraph != nullptr); | ||||
| MS_ASSERT(node != nullptr); | MS_ASSERT(node != nullptr); | ||||
| auto &constTensor = subGraph->allTensors.at(node->inputIndex[2]); | auto &constTensor = subGraph->allTensors.at(node->inputIndex[2]); | ||||
| MS_ASSERT(constTensor != nullptr); | MS_ASSERT(constTensor != nullptr); | ||||
| const auto *constData = | |||||
| reinterpret_cast<const float *>(constTensor->data.data()); | |||||
| const auto *constData = reinterpret_cast<const float *>(constTensor->data.data()); | |||||
| if (constTensor->refCount == 999 && | |||||
| constTensor->dataType == TypeId::kNumberTypeFloat) { | |||||
| if (constTensor->nodeType == schema::NodeType_ValueNode && constTensor->dataType == TypeId::kNumberTypeFloat) { | |||||
| size_t constTensorShapeSize = GetShapeSize(*constTensor); | size_t constTensorShapeSize = GetShapeSize(*constTensor); | ||||
| std::unique_ptr<QuantParamT> quantParam = GetTensorQuantParam(constTensor); | std::unique_ptr<QuantParamT> quantParam = GetTensorQuantParam(constTensor); | ||||
| if (quantParam == nullptr) { | if (quantParam == nullptr) { | ||||
| @@ -340,8 +321,7 @@ STATUS AwareQuantizer::QuantDetectionPostProcessConstTensor( | |||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| STATUS AwareQuantizer::QuantConvBias(const mindspore::schema::MetaGraphT *graph, | |||||
| mindspore::schema::CNodeT *node) { | |||||
| STATUS AwareQuantizer::QuantConvBias(const mindspore::schema::MetaGraphT *graph, mindspore::schema::CNodeT *node) { | |||||
| MS_ASSERT(graph != nullptr); | MS_ASSERT(graph != nullptr); | ||||
| MS_ASSERT(node != nullptr); | MS_ASSERT(node != nullptr); | ||||
| auto inputIndexes = node->inputIndex; | auto inputIndexes = node->inputIndex; | ||||
| @@ -351,15 +331,10 @@ STATUS AwareQuantizer::QuantConvBias(const mindspore::schema::MetaGraphT *graph, | |||||
| MS_ASSERT(graph->allTensors.size() > inputIndexes.at(2)); | MS_ASSERT(graph->allTensors.size() > inputIndexes.at(2)); | ||||
| auto &biasTensor = graph->allTensors.at(inputIndexes.at(2)); | auto &biasTensor = graph->allTensors.at(inputIndexes.at(2)); | ||||
| MS_ASSERT(biasTensor != nullptr); | MS_ASSERT(biasTensor != nullptr); | ||||
| if (biasTensor->dataType != TypeId::kNumberTypeFloat) { | |||||
| // MS_LOGD("conv %s's bias data is not float", node->name.c_str()); | |||||
| return RET_OK; | |||||
| } | |||||
| if (biasTensor->dataType == TypeId::kNumberTypeInt32) { | if (biasTensor->dataType == TypeId::kNumberTypeInt32) { | ||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| if (biasTensor->dataType != TypeId::kNumberTypeFloat) { | |||||
| if (biasTensor->dataType != TypeId::kNumberTypeFloat && biasTensor->dataType != TypeId::kNumberTypeFloat32) { | |||||
| // MS_LOGE("conv %s's bias data is not float", node->name.c_str()); | // MS_LOGE("conv %s's bias data is not float", node->name.c_str()); | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| @@ -400,8 +375,8 @@ STATUS AwareQuantizer::QuantConvBias(const mindspore::schema::MetaGraphT *graph, | |||||
| biasTensor->dataType = TypeId::kNumberTypeInt32; | biasTensor->dataType = TypeId::kNumberTypeInt32; | ||||
| biasTensor->data.clear(); | biasTensor->data.clear(); | ||||
| biasTensor->data.resize(bShapeSize * sizeof(int32_t)); | biasTensor->data.resize(bShapeSize * sizeof(int32_t)); | ||||
| auto ret = memcpy_s(biasTensor->data.data(), bShapeSize * sizeof(int32_t), | |||||
| qDatas.get(), bShapeSize * sizeof(int32_t)); | |||||
| auto ret = | |||||
| memcpy_s(biasTensor->data.data(), bShapeSize * sizeof(int32_t), qDatas.get(), bShapeSize * sizeof(int32_t)); | |||||
| if (ret != EOK) { | if (ret != EOK) { | ||||
| MS_LOG(ERROR) << "memcpy_s failed: " << ret; | MS_LOG(ERROR) << "memcpy_s failed: " << ret; | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| @@ -409,12 +384,10 @@ STATUS AwareQuantizer::QuantConvBias(const mindspore::schema::MetaGraphT *graph, | |||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| STATUS AwareQuantizer::QuantConvWeight(const schema::MetaGraphT *subGraph, | |||||
| schema::CNodeT *node) { | |||||
| STATUS AwareQuantizer::QuantConvWeight(const schema::MetaGraphT *subGraph, schema::CNodeT *node) { | |||||
| MS_ASSERT(subGraph != nullptr); | MS_ASSERT(subGraph != nullptr); | ||||
| MS_ASSERT(node != nullptr); | MS_ASSERT(node != nullptr); | ||||
| MS_ASSERT(node->quantParam.size() == | |||||
| node->inputIndex.size() + node->outputIndex.size()); | |||||
| MS_ASSERT(node->quantParam.size() == node->inputIndex.size() + node->outputIndex.size()); | |||||
| auto inputIndexes = node->inputIndex; | auto inputIndexes = node->inputIndex; | ||||
| MS_ASSERT(inputIndexes.size() >= 2); | MS_ASSERT(inputIndexes.size() >= 2); | ||||
| MS_ASSERT(subGraph->allTensors.size() > inputIndexes.at(1)); | MS_ASSERT(subGraph->allTensors.size() > inputIndexes.at(1)); | ||||
| @@ -422,11 +395,9 @@ STATUS AwareQuantizer::QuantConvWeight(const schema::MetaGraphT *subGraph, | |||||
| if (weightTensor->dataType == TypeId::kNumberTypeInt8) { | if (weightTensor->dataType == TypeId::kNumberTypeInt8) { | ||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| if (weightTensor->dataType != TypeId::kNumberTypeFloat32 && | |||||
| weightTensor->dataType != TypeId::kNumberTypeFloat && | |||||
| if (weightTensor->dataType != TypeId::kNumberTypeFloat32 && weightTensor->dataType != TypeId::kNumberTypeFloat && | |||||
| weightTensor->dataType != TypeId::kNumberTypeUInt8) { | weightTensor->dataType != TypeId::kNumberTypeUInt8) { | ||||
| MS_LOG(ERROR) << "conv " << node->name.c_str() | |||||
| << "'s weight data is not float or uint8"; | |||||
| MS_LOG(ERROR) << "conv " << node->name.c_str() << "'s weight data is not float or uint8"; | |||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| size_t wShapeSize = GetShapeSize(*(weightTensor.get())); | size_t wShapeSize = GetShapeSize(*(weightTensor.get())); | ||||
| @@ -434,8 +405,8 @@ STATUS AwareQuantizer::QuantConvWeight(const schema::MetaGraphT *subGraph, | |||||
| MS_ASSERT(node->quantParam.at(1)->param.front() != nullptr); | MS_ASSERT(node->quantParam.at(1)->param.front() != nullptr); | ||||
| vector<int8_t> qDatas(wShapeSize); | vector<int8_t> qDatas(wShapeSize); | ||||
| auto weightQauntParam = GetTensorQuantParam(weightTensor); | auto weightQauntParam = GetTensorQuantParam(weightTensor); | ||||
| if (weightTensor->dataType == | |||||
| TypeId::kNumberTypeFloat) { // normal awareing quant | |||||
| if (weightTensor->dataType == TypeId::kNumberTypeFloat || | |||||
| weightTensor->dataType == TypeId::kNumberTypeFloat32) { // normal awareing quant | |||||
| auto *weightData = static_cast<float *>(oriWeightData); | auto *weightData = static_cast<float *>(oriWeightData); | ||||
| for (size_t j = 0; j < wShapeSize; j++) { | for (size_t j = 0; j < wShapeSize; j++) { | ||||
| qDatas[j] = QuantizeData<int8_t>(weightData[j], weightQauntParam.get()); | qDatas[j] = QuantizeData<int8_t>(weightData[j], weightQauntParam.get()); | ||||
| @@ -463,8 +434,7 @@ STATUS AwareQuantizer::DetermineNodeQuantType() { | |||||
| MS_ASSERT(graph->allTensors.size() > inTensorIdx); | MS_ASSERT(graph->allTensors.size() > inTensorIdx); | ||||
| auto &inTensor = graph->allTensors.at(inTensorIdx); | auto &inTensor = graph->allTensors.at(inTensorIdx); | ||||
| MS_ASSERT(inTensor != nullptr); | MS_ASSERT(inTensor != nullptr); | ||||
| if (inTensor->quantParams.empty() || | |||||
| inTensor->quantParams.front() == nullptr || | |||||
| if (inTensor->quantParams.empty() || inTensor->quantParams.front() == nullptr || | |||||
| !inTensor->quantParams.front()->inited) { | !inTensor->quantParams.front()->inited) { | ||||
| canQuant = false; | canQuant = false; | ||||
| break; | break; | ||||
| @@ -476,8 +446,7 @@ STATUS AwareQuantizer::DetermineNodeQuantType() { | |||||
| MS_ASSERT(graph->allTensors.size() > outTensorIdx); | MS_ASSERT(graph->allTensors.size() > outTensorIdx); | ||||
| auto &outTensor = graph->allTensors.at(outTensorIdx); | auto &outTensor = graph->allTensors.at(outTensorIdx); | ||||
| MS_ASSERT(outTensor != nullptr); | MS_ASSERT(outTensor != nullptr); | ||||
| if (outTensor->quantParams.empty() || | |||||
| outTensor->quantParams.front() == nullptr || | |||||
| if (outTensor->quantParams.empty() || outTensor->quantParams.front() == nullptr || | |||||
| !outTensor->quantParams.front()->inited) { | !outTensor->quantParams.front()->inited) { | ||||
| canQuant = false; | canQuant = false; | ||||
| break; | break; | ||||
| @@ -13,13 +13,14 @@ | |||||
| * See the License for the specific language governing permissions and | * See the License for the specific language governing permissions and | ||||
| * limitations under the License. | * limitations under the License. | ||||
| */ | */ | ||||
| #include "mindspore/lite/tools/converter/quantizer/quantize_util.h" | |||||
| #include <cmath> | #include <cmath> | ||||
| #include <string> | #include <string> | ||||
| #include <algorithm> | #include <algorithm> | ||||
| #include <memory> | #include <memory> | ||||
| #include <vector> | #include <vector> | ||||
| #include "src/ops/primitive_c.h" | #include "src/ops/primitive_c.h" | ||||
| #include "mindspore/lite/tools/converter/quantizer/quantize_util.h" | |||||
| #include "mindspore/lite/tools/converter/quantizer/general_bitpacking.h" | #include "mindspore/lite/tools/converter/quantizer/general_bitpacking.h" | ||||
| #include "src/common/utils.h" | #include "src/common/utils.h" | ||||
| #include "abstract/abstract_value.h" | #include "abstract/abstract_value.h" | ||||
| @@ -32,7 +33,7 @@ namespace mindspore { | |||||
| namespace lite { | namespace lite { | ||||
| namespace quant { | namespace quant { | ||||
| const std::array<std::string, 4> QuantStrategy::mConvTypes = { | const std::array<std::string, 4> QuantStrategy::mConvTypes = { | ||||
| {"Conv2D", "DeConv2D", "DepthwiseConv2D", "DeDepthwiseConv2D"}}; | |||||
| {"Conv2D", "DeConv2D", "DepthwiseConv2D", "DeDepthwiseConv2D"}}; | |||||
| const std::array<std::string, 4> QuantStrategy::mMulTypes = {{"Mul", "MatMul", "BatchMatMul", "FullConnection"}}; | const std::array<std::string, 4> QuantStrategy::mMulTypes = {{"Mul", "MatMul", "BatchMatMul", "FullConnection"}}; | ||||
| QuantStrategy::QuantStrategy(size_t weightSize, size_t convWeightQuantChannelThreshold) | QuantStrategy::QuantStrategy(size_t weightSize, size_t convWeightQuantChannelThreshold) | ||||
| @@ -99,10 +100,9 @@ bool QuantStrategy::CanOpPostQuantized(AnfNodePtr &node) const { | |||||
| schema::PrimitiveType_Nchw2Nhwc, schema::PrimitiveType_Nhwc2Nchw, | schema::PrimitiveType_Nchw2Nhwc, schema::PrimitiveType_Nhwc2Nchw, | ||||
| schema::PrimitiveType_Conv2D, schema::PrimitiveType_DepthwiseConv2D, | schema::PrimitiveType_Conv2D, schema::PrimitiveType_DepthwiseConv2D, | ||||
| schema::PrimitiveType_Add, schema::PrimitiveType_Pooling, | schema::PrimitiveType_Add, schema::PrimitiveType_Pooling, | ||||
| schema::PrimitiveType_Concat, /*schema::PrimitiveType_SoftMax,*/ | |||||
| schema::PrimitiveType_Concat, /*schema::PrimitiveType_SoftMax,*/ | |||||
| schema::PrimitiveType_Reshape, schema::PrimitiveType_FullConnection, | schema::PrimitiveType_Reshape, schema::PrimitiveType_FullConnection, | ||||
| schema::PrimitiveType_MatMul, | |||||
| schema::PrimitiveType_Activation}; | |||||
| schema::PrimitiveType_MatMul, schema::PrimitiveType_Activation}; | |||||
| return IsContain(uint8OpList, type); | return IsContain(uint8OpList, type); | ||||
| } | } | ||||
| @@ -164,8 +164,8 @@ bool QuantStrategy::CanMulOpQuantized(const CNodePtr &node) const { | |||||
| return true; | return true; | ||||
| } | } | ||||
| STATUS CalQuantizationParams(schema::QuantParamT *quantParam, double mMin, double mMax, bool narrowRange, | |||||
| int quant_max, int quant_min, int num_bits) { | |||||
| STATUS CalQuantizationParams(schema::QuantParamT *quantParam, double mMin, double mMax, bool narrowRange, int quant_max, | |||||
| int quant_min, int num_bits) { | |||||
| MS_ASSERT(quantParam != nullptr); | MS_ASSERT(quantParam != nullptr); | ||||
| if (mMin > 0.0f) { | if (mMin > 0.0f) { | ||||
| MS_LOG(DEBUG) << "min " << mMin << " is bigger then 0, set to 0, this may course low precision"; | MS_LOG(DEBUG) << "min " << mMin << " is bigger then 0, set to 0, this may course low precision"; | ||||
| @@ -216,8 +216,7 @@ STATUS CalQuantizationParams(schema::QuantParamT *quantParam, double mMin, doubl | |||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| STATUS CalQuantizationParams(schema::QuantParamT *quantParam, double mMin, double mMax, | |||||
| bool narrowRange, int numBits) { | |||||
| STATUS CalQuantizationParams(schema::QuantParamT *quantParam, double mMin, double mMax, bool narrowRange, int numBits) { | |||||
| MS_ASSERT(quantParam != nullptr); | MS_ASSERT(quantParam != nullptr); | ||||
| if (mMin > 0.0f) { | if (mMin > 0.0f) { | ||||
| MS_LOG(DEBUG) << "min " << mMin << " is bigger then 0, set to 0, this may course low precision"; | MS_LOG(DEBUG) << "min " << mMin << " is bigger then 0, set to 0, this may course low precision"; | ||||
| @@ -246,8 +245,8 @@ STATUS CalQuantizationParams(schema::QuantParamT *quantParam, double mMin, doubl | |||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| int quantMin = narrowRange ? 1 : 0 - 128; | |||||
| int quantMax = (1 << (unsigned int) numBits) - 1 - 128; | |||||
| const int8_t quantMin = std::numeric_limits<int8_t>::min() + (narrowRange ? 1 : 0); | |||||
| const int8_t quantMax = std::numeric_limits<int8_t>::max(); | |||||
| auto quantMinFloat = static_cast<double>(quantMin); | auto quantMinFloat = static_cast<double>(quantMin); | ||||
| auto quantMaxFloat = static_cast<double>(quantMax); | auto quantMaxFloat = static_cast<double>(quantMax); | ||||
| double scale = (mMax - mMin) / (quantMaxFloat - quantMinFloat); | double scale = (mMax - mMin) / (quantMaxFloat - quantMinFloat); | ||||
| @@ -264,6 +263,9 @@ STATUS CalQuantizationParams(schema::QuantParamT *quantParam, double mMin, doubl | |||||
| } else { | } else { | ||||
| zeroPoint = static_cast<int32_t>(std::round(zpDouble)); | zeroPoint = static_cast<int32_t>(std::round(zpDouble)); | ||||
| } | } | ||||
| if (std::abs(mMin) == std::abs(mMax)) { | |||||
| zeroPoint = 0; | |||||
| } | |||||
| // The zero point should always be in the range of quantized value, | // The zero point should always be in the range of quantized value, | ||||
| // [qmin, qmax]. | // [qmin, qmax]. | ||||
| MS_ASSERT(zeroPoint >= quantMin); | MS_ASSERT(zeroPoint >= quantMin); | ||||
| @@ -23,6 +23,7 @@ | |||||
| #include <array> | #include <array> | ||||
| #include <vector> | #include <vector> | ||||
| #include <algorithm> | #include <algorithm> | ||||
| #include <limits> | |||||
| #include "tools/converter/quantizer/quantizer.h" | #include "tools/converter/quantizer/quantizer.h" | ||||
| #include "src/ops/primitive_c.h" | #include "src/ops/primitive_c.h" | ||||
| #include "include/errorcode.h" | #include "include/errorcode.h" | ||||
| @@ -75,13 +76,15 @@ T QuantizeData(const float originData, const schema::QuantParamT *quantParam) { | |||||
| const auto zeroPoint = quantParam->zeroPoint; | const auto zeroPoint = quantParam->zeroPoint; | ||||
| const auto numBit = quantParam->numBits; | const auto numBit = quantParam->numBits; | ||||
| const auto narrowRange = quantParam->narrowRange; | const auto narrowRange = quantParam->narrowRange; | ||||
| const double maxLimit = static_cast<float>((1 << (unsigned int)numBit) - 1 - zeroPoint) * scale; | |||||
| double maxLimitTemp = static_cast<float>((1 << (unsigned int)numBit) - 1); | |||||
| const double maxLimit = static_cast<float>(maxLimitTemp - zeroPoint + std::numeric_limits<int8_t>::min()) * scale; | |||||
| double minLimit; | double minLimit; | ||||
| if (narrowRange) { | if (narrowRange) { | ||||
| minLimit = static_cast<float>(1 - zeroPoint) * scale; | |||||
| minLimit = static_cast<float>(std::numeric_limits<int8_t>::min() + 1 - zeroPoint) * scale; | |||||
| } else { | } else { | ||||
| minLimit = static_cast<float>(0 - zeroPoint) * scale; | |||||
| minLimit = static_cast<float>(std::numeric_limits<int8_t>::min() - zeroPoint) * scale; | |||||
| } | } | ||||
| return [maxLimit, minLimit, zeroPoint, scale, narrowRange, originData] { | return [maxLimit, minLimit, zeroPoint, scale, narrowRange, originData] { | ||||
| double tmp = 0.0f; | double tmp = 0.0f; | ||||
| if (originData > maxLimit) { | if (originData > maxLimit) { | ||||
| @@ -91,10 +94,7 @@ T QuantizeData(const float originData, const schema::QuantParamT *quantParam) { | |||||
| } else { | } else { | ||||
| tmp = originData; | tmp = originData; | ||||
| } | } | ||||
| auto quantData = static_cast<T>(std::round(tmp / scale + zeroPoint)); | |||||
| if (quantData == 0 && narrowRange) { | |||||
| quantData++; | |||||
| } | |||||
| auto quantData = static_cast<T>(std::round(zeroPoint + tmp / scale)); | |||||
| return quantData; | return quantData; | ||||
| }(); | }(); | ||||
| } | } | ||||