Merge pull request !7404 from ghzl/percert-methodtags/v1.1.0
| @@ -54,6 +54,21 @@ STATUS DivergInfo::RecordMaxValue(const std::vector<float> &datas) { | |||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| STATUS DivergInfo::RecordMaxValueArray(const std::vector<float> &datas) { | |||||
| if (datas.size() == 0) { | |||||
| return RET_ERROR; | |||||
| } | |||||
| float max_num = datas.at(0); | |||||
| float min_num = datas.at(0); | |||||
| for (float data : datas) { | |||||
| max_num = std::max(data, max_num); | |||||
| min_num = std::min(data, min_num); | |||||
| } | |||||
| this->max_datas.emplace_back(max_num); | |||||
| this->min_datas.emplace_back(min_num); | |||||
| return RET_OK; | |||||
| } | |||||
| void DivergInfo::UpdateInterval() { | void DivergInfo::UpdateInterval() { | ||||
| auto max_value = std::max(fabs(this->max), fabs(this->min)); | auto max_value = std::max(fabs(this->max), fabs(this->min)); | ||||
| this->interval = max_value / static_cast<float>(bin_num); | this->interval = max_value / static_cast<float>(bin_num); | ||||
| @@ -85,6 +100,12 @@ STATUS DivergInfo::ComputeThreshold() { | |||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| if (method_x == kMethodOutlier) { | |||||
| this->percent_result = PercentMethod(min_datas, max_datas); | |||||
| this->best_T = std::max(std::fabs(percent_result.first), std::fabs(percent_result.second)); | |||||
| return RET_OK; | |||||
| } | |||||
| constexpr int quant_bint_nums = 128; | constexpr int quant_bint_nums = 128; | ||||
| int threshold = quant_bint_nums; | int threshold = quant_bint_nums; | ||||
| float min_kl = FLT_MAX; | float min_kl = FLT_MAX; | ||||
| @@ -195,8 +216,14 @@ std::pair<CNodePtr, float> DivergInfo::GetScale() { | |||||
| float max_value = this->best_T; | float max_value = this->best_T; | ||||
| float min_value = -max_value; | float min_value = -max_value; | ||||
| if (this->method_x == kMethodOutlier) { | |||||
| min_value = percent_result.first; | |||||
| max_value = percent_result.second; | |||||
| } | |||||
| MS_ASSERT(quant_max - quant_min != 0); | MS_ASSERT(quant_max - quant_min != 0); | ||||
| float scale = (max_value - min_value) / (quant_max - quant_min); | float scale = (max_value - min_value) / (quant_max - quant_min); | ||||
| this->scale_tmp = scale; | |||||
| MS_ASSERT(scale != 0); | MS_ASSERT(scale != 0); | ||||
| return std::make_pair(this->cnode, scale); | return std::make_pair(this->cnode, scale); | ||||
| } | } | ||||
| @@ -210,6 +237,10 @@ std::pair<CNodePtr, int32_t> DivergInfo::GetZeropoint() { | |||||
| } else { | } else { | ||||
| MS_LOG(WARNING) << "unexpectd quant range, quant_min: " << quant_min << " quant_max: " << quant_max; | MS_LOG(WARNING) << "unexpectd quant range, quant_min: " << quant_min << " quant_max: " << quant_max; | ||||
| } | } | ||||
| if (this->method_x == kMethodOutlier) { | |||||
| zero_point = std::round(quant_max - percent_result.second / scale_tmp); | |||||
| } | |||||
| return std::make_pair(this->cnode, zero_point); | return std::make_pair(this->cnode, zero_point); | ||||
| } | } | ||||
| @@ -267,6 +298,7 @@ STATUS Calibrator::RecordMaxValue(const std::string &op_name, const vector<float | |||||
| auto got = (*diverg_info).find(op_name); | auto got = (*diverg_info).find(op_name); | ||||
| if (got != (*diverg_info).end()) { | if (got != (*diverg_info).end()) { | ||||
| ((*got).second)->RecordMaxValue(data); | ((*got).second)->RecordMaxValue(data); | ||||
| ((*got).second)->RecordMaxValueArray(data); | |||||
| } | } | ||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| @@ -445,7 +477,7 @@ STATUS Calibrator::ReadConfig() { | |||||
| } else if (key == "thread_num") { | } else if (key == "thread_num") { | ||||
| config_param_.thread_num = std::stoul(value); | config_param_.thread_num = std::stoul(value); | ||||
| } else if (key == "method_x") { | } else if (key == "method_x") { | ||||
| if (value != kMethodKL && value != kMethodMaxMin) { | |||||
| if (value != kMethodKL && value != kMethodMaxMin && value != kMethodOutlier) { | |||||
| MS_LOG(WARNING) << "unsupported method_x: " << value << ". Use default value."; | MS_LOG(WARNING) << "unsupported method_x: " << value << ". Use default value."; | ||||
| } else { | } else { | ||||
| config_param_.method_x = value; | config_param_.method_x = value; | ||||
| @@ -42,6 +42,7 @@ struct MaxMin { | |||||
| const char kMethodMaxMin[] = "MAX_MIN"; | const char kMethodMaxMin[] = "MAX_MIN"; | ||||
| const char kMethodKL[] = "KL"; | const char kMethodKL[] = "KL"; | ||||
| const char kMethodOutlier[] = "RemovalOutlier"; | |||||
| constexpr int kDefaultBinNumber = 2048; | constexpr int kDefaultBinNumber = 2048; | ||||
| struct ConfigParam { | struct ConfigParam { | ||||
| @@ -127,6 +128,10 @@ struct DivergInfo { | |||||
| int quant_max = 255; | int quant_max = 255; | ||||
| int quant_min = 0; | int quant_min = 0; | ||||
| std::string method_x = kMethodKL; | std::string method_x = kMethodKL; | ||||
| std::vector<float> min_datas; | |||||
| std::vector<float> max_datas; | |||||
| std::pair<float, float> percent_result{0.0, 0.0}; | |||||
| float scale_tmp = 0; | |||||
| DivergInfo(CNodePtr cnode, int bins, size_t bits, int quant_max, int quant_min, const std::string &method_x) { | DivergInfo(CNodePtr cnode, int bins, size_t bits, int quant_max, int quant_min, const std::string &method_x) { | ||||
| this->method_x = method_x; | this->method_x = method_x; | ||||
| @@ -143,6 +148,8 @@ struct DivergInfo { | |||||
| STATUS RecordMaxValue(const std::vector<float> &datas); | STATUS RecordMaxValue(const std::vector<float> &datas); | ||||
| STATUS RecordMaxValueArray(const std::vector<float> &datas); | |||||
| void UpdateInterval(); | void UpdateInterval(); | ||||
| STATUS UpdateHistogram(const std::vector<float> &data); | STATUS UpdateHistogram(const std::vector<float> &data); | ||||
| @@ -304,6 +304,74 @@ STATUS PostBitPack(float *weight, size_t shapeSize, size_t bitNum) { | |||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| bool SearchLowerBound(const std::vector<float> &data, const size_t &index, const float &max_tmp, float *min_tmp, | |||||
| size_t *min_idx) { | |||||
| size_t length = data.size(); | |||||
| if (max_tmp - data.at(index) < delta) { | |||||
| return false; | |||||
| } | |||||
| float range_ratio = (data.at(index) - *min_tmp) / (max_tmp - *min_tmp); | |||||
| float index_ratio = static_cast<float>(index - *min_idx) / (length - *min_idx); | |||||
| if (index_ratio > 0 && range_ratio / index_ratio > ratio) { | |||||
| *min_idx = index; | |||||
| *min_tmp = data.at(index); | |||||
| } | |||||
| return true; | |||||
| } | |||||
| bool SearchUpperBound(const std::vector<float> &data, const size_t &index, float *max_tmp, const float &min_tmp, | |||||
| size_t *max_idx) { | |||||
| size_t length = data.size(); | |||||
| if (data.at(index) - min_tmp < delta) { | |||||
| return false; | |||||
| } | |||||
| float range_ratio = (*max_tmp - data.at(index)) / (*max_tmp - min_tmp); | |||||
| float index_ratio = static_cast<float>(index - *max_idx) / (length - *max_idx); | |||||
| if (index_ratio > 0 && range_ratio / index_ratio > ratio) { | |||||
| *max_idx = index; | |||||
| *max_tmp = data.at(index); | |||||
| } | |||||
| return true; | |||||
| } | |||||
| float CalPercentile(const std::vector<float> &datas, const int &outlier_percent) { | |||||
| const int size = datas.size(); | |||||
| float val = outlier_percent / 100.0 * size; | |||||
| int index = std::ceil(val); | |||||
| float result = 0.0; | |||||
| if (index - val > 0) { | |||||
| result = datas.at(index - 1); | |||||
| } else { | |||||
| result = (datas.at(index - 1) + datas.at(index)) / 2; | |||||
| } | |||||
| return result; | |||||
| } | |||||
| std::pair<float, float> PercentMethod(std::vector<float> min_datas, std::vector<float> max_datas) { | |||||
| std::sort(max_datas.begin(), max_datas.end()); | |||||
| std::sort(min_datas.begin(), min_datas.end()); | |||||
| float min_val = CalPercentile(min_datas, percent); | |||||
| float max_val = CalPercentile(max_datas, 100 - percent); | |||||
| std::reverse(max_datas.begin(), max_datas.end()); | |||||
| MS_ASSERT(min_val < max_val); | |||||
| MS_ASSERT(min_datas.size() == max_datas.size()); | |||||
| float min_tmp = min_val; | |||||
| float max_tmp = max_val; | |||||
| size_t min_idx = 0; | |||||
| size_t max_idx = 0; | |||||
| size_t length = min_datas.size(); | |||||
| for (size_t i = 0; i < length; i++) { | |||||
| if (!SearchLowerBound(min_datas, i, max_tmp, &min_tmp, &min_idx)) { | |||||
| break; | |||||
| } | |||||
| if (!SearchUpperBound(min_datas, i, &max_tmp, min_tmp, &max_idx)) { | |||||
| break; | |||||
| } | |||||
| } | |||||
| std::pair<float, float> result{min_tmp, max_tmp}; | |||||
| return result; | |||||
| } | |||||
| } // namespace quant | } // namespace quant | ||||
| } // namespace lite | } // namespace lite | ||||
| } // namespace mindspore | } // namespace mindspore | ||||
| @@ -24,6 +24,7 @@ | |||||
| #include <vector> | #include <vector> | ||||
| #include <algorithm> | #include <algorithm> | ||||
| #include <limits> | #include <limits> | ||||
| #include <utility> | |||||
| #include "tools/converter/quantizer/quantizer.h" | #include "tools/converter/quantizer/quantizer.h" | ||||
| #include "src/ops/primitive_c.h" | #include "src/ops/primitive_c.h" | ||||
| #include "include/errorcode.h" | #include "include/errorcode.h" | ||||
| @@ -61,12 +62,26 @@ class QuantStrategy { | |||||
| static const std::vector<schema::PrimitiveType> mul_types; | static const std::vector<schema::PrimitiveType> mul_types; | ||||
| }; | }; | ||||
| constexpr float delta = 0.1; | |||||
| constexpr float ratio = 10.0; | |||||
| constexpr int percent = 10; | |||||
| STATUS CalQuantizationParams(schema::QuantParamT *quantParam, double mMin, double mMax, bool narrowRange, int quant_max, | STATUS CalQuantizationParams(schema::QuantParamT *quantParam, double mMin, double mMax, bool narrowRange, int quant_max, | ||||
| int quant_min, int num_bits); | int quant_min, int num_bits); | ||||
| STATUS CalQuantizationParams(schema::QuantParamT *quantParam, double mMin, double mMax, bool narrowRange = false, | STATUS CalQuantizationParams(schema::QuantParamT *quantParam, double mMin, double mMax, bool narrowRange = false, | ||||
| int numBits = UINT8_QUANTIZATION); | int numBits = UINT8_QUANTIZATION); | ||||
| bool SearchLowerBound(const std::vector<float> &data, const size_t &index, const float &max_tmp, float *min_tmp, | |||||
| size_t *min_idx); | |||||
| bool SearchUpperBound(const std::vector<float> &data, const size_t &index, float *max_tmp, const float &min_tmp, | |||||
| size_t *max_idx); | |||||
| float CalPercentile(const std::vector<float> &datas, const int &percent); | |||||
| std::pair<float, float> PercentMethod(std::vector<float> min_datas, std::vector<float> max_datas); | |||||
| template <typename T> | template <typename T> | ||||
| T QuantizeData(const float originData, const schema::QuantParamT *quantParam) { | T QuantizeData(const float originData, const schema::QuantParamT *quantParam) { | ||||
| MS_ASSERT(quantParam != nullptr); | MS_ASSERT(quantParam != nullptr); | ||||