| @@ -34,6 +34,7 @@ table QuantParam { | |||||
| inited: bool = false; | inited: bool = false; | ||||
| var_corr: double = 1; | var_corr: double = 1; | ||||
| mean_corr: double = 0; | mean_corr: double = 0; | ||||
| clusters: [float]; | |||||
| } | } | ||||
| table Tensor { | table Tensor { | ||||
| @@ -105,6 +105,12 @@ int LiteSession::ConvertTensors(const lite::Model *model) { | |||||
| quant_arg.zeroPoint = quant_params->Get(j)->zeroPoint(); | quant_arg.zeroPoint = quant_params->Get(j)->zeroPoint(); | ||||
| quant_arg.var_corr = quant_params->Get(j)->var_corr(); | quant_arg.var_corr = quant_params->Get(j)->var_corr(); | ||||
| quant_arg.mean_corr = quant_params->Get(j)->mean_corr(); | quant_arg.mean_corr = quant_params->Get(j)->mean_corr(); | ||||
| auto quant_clusters = quant_params->Get(j)->clusters(); | |||||
| if (quant_clusters != nullptr) { | |||||
| for (size_t k = 0; k < quant_clusters->size(); k++) { | |||||
| quant_arg.clusters.emplace_back(quant_clusters->Get(k)); | |||||
| } | |||||
| } | |||||
| dstTensor->AddQuantParam(quant_arg); | dstTensor->AddQuantParam(quant_arg); | ||||
| } | } | ||||
| } | } | ||||
| @@ -69,7 +69,17 @@ class DequantUtil { | |||||
| auto scale = param.scale; | auto scale = param.scale; | ||||
| auto zero_point = param.zeroPoint; | auto zero_point = param.zeroPoint; | ||||
| for (int64_t j = 0; j < input_tensor->ElementsNum(); j++) { | for (int64_t j = 0; j < input_tensor->ElementsNum(); j++) { | ||||
| dequant_datas[j] = static_cast<float>((quant_datas[j] - zero_point) * scale); | |||||
| if (param.clusters.size() != 0) { | |||||
| int8_t index = quant_datas[j]; | |||||
| if (index > INT8_MAX || index < INT8_MIN) { | |||||
| MS_LOG(ERROR) << "KMeans param quant is error."; | |||||
| free(dequant_datas); | |||||
| return nullptr; | |||||
| } | |||||
| dequant_datas[j] = static_cast<float>(param.clusters[index - INT8_MIN]); | |||||
| } else { | |||||
| dequant_datas[j] = static_cast<float>((quant_datas[j] - zero_point) * scale); | |||||
| } | |||||
| } | } | ||||
| } | } | ||||
| return dequant_datas; | return dequant_datas; | ||||
| @@ -35,6 +35,7 @@ struct QuantArg { | |||||
| int32_t zeroPoint; | int32_t zeroPoint; | ||||
| double var_corr{1}; | double var_corr{1}; | ||||
| double mean_corr{0}; | double mean_corr{0}; | ||||
| std::vector<float> clusters{}; | |||||
| }; | }; | ||||
| class Tensor : public mindspore::tensor::MSTensor { | class Tensor : public mindspore::tensor::MSTensor { | ||||
| @@ -101,7 +101,7 @@ STATUS DivergInfo::ComputeThreshold() { | |||||
| } | } | ||||
| if (method_x == kMethodOutlier) { | if (method_x == kMethodOutlier) { | ||||
| this->percent_result = PercentMethod(min_datas, max_datas); | |||||
| this->percent_result = OutlierMethod(min_datas, max_datas); | |||||
| this->best_T = std::max(std::fabs(percent_result.first), std::fabs(percent_result.second)); | this->best_T = std::max(std::fabs(percent_result.first), std::fabs(percent_result.second)); | ||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| @@ -20,6 +20,7 @@ | |||||
| #include <algorithm> | #include <algorithm> | ||||
| #include <memory> | #include <memory> | ||||
| #include <vector> | #include <vector> | ||||
| #include <set> | |||||
| #include "src/ops/primitive_c.h" | #include "src/ops/primitive_c.h" | ||||
| #include "mindspore/lite/tools/converter/quantizer/general_bitpacking.h" | #include "mindspore/lite/tools/converter/quantizer/general_bitpacking.h" | ||||
| #include "src/common/utils.h" | #include "src/common/utils.h" | ||||
| @@ -305,8 +306,8 @@ STATUS PostBitPack(float *weight, size_t shapeSize, size_t bitNum) { | |||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| bool SearchLowerBound(const std::vector<float> &data, const size_t &index, const float &max_tmp, float *min_tmp, | |||||
| size_t *min_idx) { | |||||
| static bool SearchLowerBound(const std::vector<float> &data, const size_t &index, const float &max_tmp, float *min_tmp, | |||||
| size_t *min_idx) { | |||||
| size_t length = data.size(); | size_t length = data.size(); | ||||
| if (max_tmp - data.at(index) < delta) { | if (max_tmp - data.at(index) < delta) { | ||||
| return false; | return false; | ||||
| @@ -320,8 +321,8 @@ bool SearchLowerBound(const std::vector<float> &data, const size_t &index, const | |||||
| return true; | return true; | ||||
| } | } | ||||
| bool SearchUpperBound(const std::vector<float> &data, const size_t &index, float *max_tmp, const float &min_tmp, | |||||
| size_t *max_idx) { | |||||
| static bool SearchUpperBound(const std::vector<float> &data, const size_t &index, float *max_tmp, const float &min_tmp, | |||||
| size_t *max_idx) { | |||||
| size_t length = data.size(); | size_t length = data.size(); | ||||
| if (data.at(index) - min_tmp < delta) { | if (data.at(index) - min_tmp < delta) { | ||||
| return false; | return false; | ||||
| @@ -335,7 +336,7 @@ bool SearchUpperBound(const std::vector<float> &data, const size_t &index, float | |||||
| return true; | return true; | ||||
| } | } | ||||
| float CalPercentile(const std::vector<float> &datas, const int &outlier_percent) { | |||||
| static float CalPercentile(const std::vector<float> &datas, const int &outlier_percent) { | |||||
| const int size = datas.size(); | const int size = datas.size(); | ||||
| float val = outlier_percent / 100.0 * size; | float val = outlier_percent / 100.0 * size; | ||||
| int index = std::ceil(val); | int index = std::ceil(val); | ||||
| @@ -348,7 +349,7 @@ float CalPercentile(const std::vector<float> &datas, const int &outlier_percent) | |||||
| return result; | return result; | ||||
| } | } | ||||
| std::pair<float, float> PercentMethod(std::vector<float> min_datas, std::vector<float> max_datas) { | |||||
| std::pair<float, float> OutlierMethod(std::vector<float> min_datas, std::vector<float> max_datas) { | |||||
| std::sort(max_datas.begin(), max_datas.end()); | std::sort(max_datas.begin(), max_datas.end()); | ||||
| std::sort(min_datas.begin(), min_datas.end()); | std::sort(min_datas.begin(), min_datas.end()); | ||||
| float min_val = CalPercentile(min_datas, percent); | float min_val = CalPercentile(min_datas, percent); | ||||
| @@ -372,6 +373,64 @@ std::pair<float, float> PercentMethod(std::vector<float> min_datas, std::vector< | |||||
| std::pair<float, float> result{min_tmp, max_tmp}; | std::pair<float, float> result{min_tmp, max_tmp}; | ||||
| return result; | return result; | ||||
| } | } | ||||
| static std::vector<float> InitClusters(float *data, size_t elem_count, size_t k) { | |||||
| std::set<float> set_unique{}; | |||||
| for (size_t i = 0; i < elem_count; i++) { | |||||
| set_unique.emplace(data[i]); | |||||
| } | |||||
| std::vector<float> data_unique; | |||||
| data_unique.assign(set_unique.begin(), set_unique.end()); | |||||
| std::vector<float> clusters{}; | |||||
| if (set_unique.size() < k) { | |||||
| return clusters; | |||||
| } | |||||
| // init cluster | |||||
| float ratio = static_cast<float>(data_unique.size()) / (k - 1); | |||||
| std::sort(data_unique.begin(), data_unique.end()); | |||||
| for (size_t i = 0; i < k; i++) { | |||||
| size_t index = std::floor(i * ratio); | |||||
| if (i * ratio - index > 0) { | |||||
| clusters.emplace_back((data_unique[index] + data_unique[index + 1]) / 2); | |||||
| } else { | |||||
| clusters.emplace_back(data_unique[index]); | |||||
| } | |||||
| } | |||||
| return clusters; | |||||
| } | |||||
| std::vector<int8_t> KMeans(float *data, size_t elem_count, size_t k, size_t epochs, schema::QuantParamT *quantParam) { | |||||
| std::vector<float> clusters = InitClusters(data, elem_count, k); | |||||
| std::vector<int8_t> clusters_index{}; | |||||
| if (clusters.size() < k) { | |||||
| MS_LOG(WARNING) << "K is less than the size of data so KMeans function is not executed."; | |||||
| return clusters_index; | |||||
| } | |||||
| for (size_t epoch = 0; epoch < epochs; epoch++) { | |||||
| clusters_index.clear(); | |||||
| std::vector<std::vector<float>> clusters_data(clusters.size()); | |||||
| for (size_t i = 0; i < elem_count; i++) { | |||||
| size_t index = 0; | |||||
| float min_distance = pow(data[i] - clusters[0], 2); | |||||
| for (size_t j = 1; j < clusters.size(); j++) { | |||||
| if (pow(data[i] - clusters[j], 2) < min_distance) { | |||||
| min_distance = pow(data[i] - clusters[j], 2); | |||||
| index = j; | |||||
| } | |||||
| } | |||||
| clusters_index.emplace_back(index + INT8_MIN); | |||||
| clusters_data[index].emplace_back(data[i]); | |||||
| } | |||||
| for (size_t j = 0; j < clusters.size(); j++) { | |||||
| if (clusters_data[j].size() > 0) { | |||||
| clusters[j] = std::accumulate(clusters_data[j].begin(), clusters_data[j].end(), 0.0) / clusters_data[j].size(); | |||||
| } | |||||
| } | |||||
| } | |||||
| // update data | |||||
| quantParam->clusters = clusters; | |||||
| return clusters_index; | |||||
| } | |||||
| } // namespace quant | } // namespace quant | ||||
| } // namespace lite | } // namespace lite | ||||
| } // namespace mindspore | } // namespace mindspore | ||||
| @@ -72,15 +72,9 @@ STATUS CalQuantizationParams(schema::QuantParamT *quantParam, double mMin, doubl | |||||
| STATUS CalQuantizationParams(schema::QuantParamT *quantParam, double mMin, double mMax, bool narrowRange = false, | STATUS CalQuantizationParams(schema::QuantParamT *quantParam, double mMin, double mMax, bool narrowRange = false, | ||||
| int numBits = UINT8_QUANTIZATION); | int numBits = UINT8_QUANTIZATION); | ||||
| bool SearchLowerBound(const std::vector<float> &data, const size_t &index, const float &max_tmp, float *min_tmp, | |||||
| size_t *min_idx); | |||||
| std::pair<float, float> OutlierMethod(std::vector<float> min_datas, std::vector<float> max_datas); | |||||
| bool SearchUpperBound(const std::vector<float> &data, const size_t &index, float *max_tmp, const float &min_tmp, | |||||
| size_t *max_idx); | |||||
| float CalPercentile(const std::vector<float> &datas, const int &percent); | |||||
| std::pair<float, float> PercentMethod(std::vector<float> min_datas, std::vector<float> max_datas); | |||||
| std::vector<int8_t> KMeans(float *data, size_t elem_count, size_t k, size_t epochs, schema::QuantParamT *quantParam); | |||||
| template <typename T> | template <typename T> | ||||
| T QuantizeData(const float originData, const schema::QuantParamT *quantParam) { | T QuantizeData(const float originData, const schema::QuantParamT *quantParam) { | ||||
| @@ -213,7 +207,7 @@ STATUS QuantFilter(ParamValueLitePtr weight, std::shared_ptr<PrimitiveC> primiti | |||||
| average_raw += raw_data; | average_raw += raw_data; | ||||
| } | } | ||||
| } | } | ||||
| if (quantType == QuantType_WeightQuant) { | |||||
| if (quantType == QuantType_WeightQuant && quant_param.clusters.size() == 0) { | |||||
| // mean | // mean | ||||
| average_dequant = average_dequant / one_filter_size; | average_dequant = average_dequant / one_filter_size; | ||||
| average_raw = average_raw / one_filter_size; | average_raw = average_raw / one_filter_size; | ||||
| @@ -261,17 +255,21 @@ STATUS QuantFilter(ParamValueLitePtr weight, std::shared_ptr<PrimitiveC> primiti | |||||
| } | } | ||||
| schema::QuantParamT quant_param; | schema::QuantParamT quant_param; | ||||
| STATUS status = CalQuantizationParams(&quant_param, min, max, false, quant_max, quant_min, bitNum); | |||||
| if (status != RET_OK) { | |||||
| MS_LOG(ERROR) << "CalQuantizationParams failed" << status; | |||||
| return status; | |||||
| if (quant_param.clusters.size() == 0) { | |||||
| STATUS status = CalQuantizationParams(&quant_param, min, max, false, quant_max, quant_min, bitNum); | |||||
| if (status != RET_OK) { | |||||
| MS_LOG(ERROR) << "CalQuantizationParams failed" << status; | |||||
| return status; | |||||
| } | |||||
| } | } | ||||
| quant_params.emplace_back(quant_param); | quant_params.emplace_back(quant_param); | ||||
| // update data and datatype | // update data and datatype | ||||
| for (uint32_t i = 0; i < elem_count; i++) { | for (uint32_t i = 0; i < elem_count; i++) { | ||||
| float raw_data = raw_datas[i]; | float raw_data = raw_datas[i]; | ||||
| auto quant_data = QuantizeData<T>(raw_data, quant_param, quant_max, quant_min); | |||||
| quant_datas[i] = quant_data; | |||||
| if (quant_param.clusters.size() == 0) { | |||||
| auto quant_data = QuantizeData<T>(raw_data, quant_param, quant_max, quant_min); | |||||
| quant_datas[i] = quant_data; | |||||
| } | |||||
| } | } | ||||
| auto ret = memcpy_s(raw_datas, weight->tensor_size(), quant_datas.data(), elem_count * sizeof(T)); | auto ret = memcpy_s(raw_datas, weight->tensor_size(), quant_datas.data(), elem_count * sizeof(T)); | ||||
| if (ret != EOK) { | if (ret != EOK) { | ||||