| @@ -38,6 +38,12 @@ table QuantParam { | |||
| multiplier: int = 1; // calculate fixed point multiplier method | |||
| } | |||
| enum WeightQunatCompressType: int { | |||
| NONE, | |||
| INDEXING, | |||
| SPARSE | |||
| } | |||
| table Tensor { | |||
| nodeType: int; | |||
| // data type | |||
| @@ -52,6 +58,7 @@ table Tensor { | |||
| quantClusters: [float]; | |||
| name: string; | |||
| enableHuffmanCode: bool = false; | |||
| weightQunatCompressType: WeightQunatCompressType = NONE; | |||
| } | |||
| enum QuantType: int { | |||
| @@ -45,6 +45,12 @@ namespace lite { | |||
| namespace { | |||
| int DecompressTensor(const schema::Tensor &src_tensor, Tensor *dst_tensor) { | |||
| MS_ASSERT(dst_tensor != nullptr); | |||
| if (src_tensor.weightQunatCompressType() == schema::WeightQunatCompressType_INDEXING) { | |||
| return IndexingDecompress(src_tensor, dst_tensor); | |||
| } else if (src_tensor.weightQunatCompressType() == schema::WeightQunatCompressType_SPARSE) { | |||
| return SparseDecompress(src_tensor, dst_tensor); | |||
| } | |||
| bool need_bit_unpack = src_tensor.quantParams() != nullptr && src_tensor.quantParams()->size() > 0 && | |||
| src_tensor.quantParams()->Get(0) != nullptr && src_tensor.quantParams()->Get(0)->inited(); | |||
| if (need_bit_unpack) { | |||
| @@ -20,6 +20,167 @@ | |||
| #include "src/huffman_decode.h" | |||
| namespace mindspore::lite { | |||
| std::vector<bool> StringToBitVector(const std::string &str) { | |||
| std::vector<bool> vec(str.size() * 8); | |||
| size_t index = 0; | |||
| for (auto ch : str) { | |||
| for (size_t shift = 8; shift > 0; shift--) { | |||
| vec[index++] = (ch >> (shift - 1)) & 0x1; | |||
| } | |||
| } | |||
| return vec; | |||
| } | |||
| STATUS IndexingDecompress(const schema::Tensor &src_tensor, Tensor *dst_tensor) { | |||
| MS_LOG(ERROR) << "un-index weight"; | |||
| auto bit_num = src_tensor.quantParams()->Get(0)->numBits(); | |||
| std::string str(reinterpret_cast<const char *>(src_tensor.data()->data()), src_tensor.data()->size()); | |||
| auto bit_vec = StringToBitVector(str); | |||
| size_t index = 0; | |||
| // parse unique_value_cnt | |||
| size_t unique_value_cnt = 0; | |||
| for (int i = 0; i < bit_num; i++) { | |||
| bool bit = bit_vec[index++]; | |||
| unique_value_cnt |= bit << (bit_num - i - 1); | |||
| } | |||
| if (unique_value_cnt == 0) { | |||
| unique_value_cnt = 1 << bit_num; | |||
| } | |||
| // parse unique_value_set; | |||
| std::vector<int> unique_values; | |||
| for (size_t i = 0; i < unique_value_cnt; i++) { | |||
| int unique_value = 0; | |||
| for (int j = 0; j < bit_num; j++) { | |||
| bool bit = bit_vec[index++]; | |||
| unique_value |= bit << (bit_num - j - 1); | |||
| } | |||
| // unsigned to signed | |||
| unique_values.push_back(unique_value - (1 << (bit_num - 1))); | |||
| } | |||
| // parse index | |||
| std::vector<size_t> unique_value_index_vec; | |||
| auto elem_cnt = dst_tensor->ElementsNum(); | |||
| size_t unique_value_bit = ceil(log2(unique_value_cnt)); | |||
| for (int i = 0; i < elem_cnt; i++) { | |||
| size_t unique_value_index = 0; | |||
| for (size_t j = 0; j < unique_value_bit; j++) { | |||
| bool bit = bit_vec[index++]; | |||
| unique_value_index |= bit << (unique_value_bit - j - 1); | |||
| } | |||
| unique_value_index_vec.push_back(unique_value_index); | |||
| } | |||
| if (dst_tensor->data_c() != nullptr) { | |||
| MS_LOG(ERROR) << "data_c not null"; | |||
| return RET_ERROR; | |||
| } | |||
| auto ret = dst_tensor->MallocData(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Malloc tensor data failed"; | |||
| return RET_NULL_PTR; | |||
| } | |||
| auto dst_data = dst_tensor->data_c(); | |||
| if (bit_num <= 8) { | |||
| ret = UnIndexTensorData<int8_t>(unique_values, unique_value_index_vec, dst_data, dst_tensor->Size()); | |||
| } else { | |||
| ret = UnIndexTensorData<int16_t>(unique_values, unique_value_index_vec, dst_data, dst_tensor->Size()); | |||
| } | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "UnIndexTensorData error"; | |||
| return RET_ERROR; | |||
| } | |||
| return RET_OK; | |||
| } | |||
| STATUS SparseDecompress(const schema::Tensor &src_tensor, Tensor *dst_tensor) { | |||
| MS_LOG(ERROR) << "un-sparse weight"; | |||
| size_t bit_num = src_tensor.quantParams()->Get(0)->numBits(); | |||
| std::string str(reinterpret_cast<const char *>(src_tensor.data()->data()), src_tensor.data()->size()); | |||
| auto bit_vec = StringToBitVector(str); | |||
| size_t index = 0; | |||
| // parse coor_best_bit | |||
| size_t coor_best_bit = 0; | |||
| for (size_t i = 0; i < 8; i++) { | |||
| bool bit = bit_vec[index++]; | |||
| coor_best_bit |= bit << (8 - i - 1); | |||
| } | |||
| // parse nz_cnt | |||
| size_t nz_cnt = 0; | |||
| for (size_t i = 0; i < 32; i++) { | |||
| bool bit = bit_vec[index++]; | |||
| nz_cnt |= bit << (32 - i - 1); | |||
| } | |||
| // parse unique_value cnt | |||
| size_t unique_value_cnt = 0; | |||
| for (size_t i = 0; i < bit_num; i++) { | |||
| bool bit = bit_vec[index++]; | |||
| unique_value_cnt |= bit << (bit_num - i - 1); | |||
| } | |||
| if (unique_value_cnt == 0) { | |||
| unique_value_cnt = 1 << bit_num; | |||
| } | |||
| // parse unique_values | |||
| std::vector<int> unique_values; | |||
| for (size_t i = 0; i < unique_value_cnt; i++) { | |||
| int unique_value = 0; | |||
| for (size_t j = 0; j < bit_num; j++) { | |||
| bool bit = bit_vec[index++]; | |||
| unique_value |= bit << (bit_num - j - 1); | |||
| } | |||
| // unsigned to signed | |||
| unique_values.push_back(unique_value - (1 << (bit_num - 1))); | |||
| } | |||
| // parse index | |||
| std::vector<size_t> unique_value_index_vec; | |||
| auto elem_cnt = dst_tensor->ElementsNum(); | |||
| size_t unique_value_bit = ceil(log2(unique_value_cnt)); | |||
| for (size_t i = 0; i < nz_cnt; i++) { | |||
| size_t unique_value_index = 0; | |||
| for (size_t j = 0; j < unique_value_bit; j++) { | |||
| bool bit = bit_vec[index++]; | |||
| unique_value_index |= bit << (unique_value_bit - j - 1); | |||
| } | |||
| unique_value_index_vec.push_back(unique_value_index); | |||
| } | |||
| // parse coors | |||
| std::vector<size_t> coor_vec; | |||
| for (size_t i = 0; i < nz_cnt; i++) { | |||
| size_t coor = 0; | |||
| for (size_t j = 0; j < coor_best_bit; j++) { | |||
| bool bit = bit_vec[index++]; | |||
| coor |= bit << (coor_best_bit - j - 1); | |||
| } | |||
| coor_vec.push_back(coor); | |||
| } | |||
| if (dst_tensor->data_c() != nullptr) { | |||
| MS_LOG(ERROR) << "data_c not null"; | |||
| return RET_ERROR; | |||
| } | |||
| auto ret = dst_tensor->MallocData(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Malloc tensor data failed"; | |||
| return RET_NULL_PTR; | |||
| } | |||
| auto dst_data = dst_tensor->data_c(); | |||
| if (bit_num <= 8) { | |||
| ret = UnSparseTensorData<int8_t>(unique_values, unique_value_index_vec, coor_vec, src_tensor.quantParams(), | |||
| elem_cnt, coor_best_bit, dst_data, dst_tensor->Size()); | |||
| } else { | |||
| ret = UnSparseTensorData<int16_t>(unique_values, unique_value_index_vec, coor_vec, src_tensor.quantParams(), | |||
| elem_cnt, coor_best_bit, dst_data, dst_tensor->Size()); | |||
| } | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "UnSparseTensorData error"; | |||
| return RET_ERROR; | |||
| } | |||
| return RET_OK; | |||
| } | |||
| int WeightDecoder::DequantWeight(lite::Tensor *input_tensor, bool channel_first, TypeId dst_data_type) { | |||
| MS_ASSERT(input_tensor != nullptr); | |||
| if (input_tensor->data_type() != kNumberTypeInt8 && input_tensor->data_type() != kNumberTypeInt16) { | |||
| @@ -21,6 +21,8 @@ | |||
| #include <utility> | |||
| #include <vector> | |||
| #include <queue> | |||
| #include <limits> | |||
| #include <string> | |||
| #include <cmath> | |||
| #include "nnacl/matmul_parameter.h" | |||
| #include "src/lite_kernel.h" | |||
| @@ -30,6 +32,94 @@ | |||
| static constexpr int kPerTensor = 1; | |||
| namespace mindspore::lite { | |||
| template <typename T> | |||
| STATUS UnIndexTensorData(const std::vector<int> &unique_values, const std::vector<size_t> &indices, void *dst_data, | |||
| size_t dst_data_size) { | |||
| std::vector<T> un_indexed_data; | |||
| for (auto index : indices) { | |||
| if (index >= unique_values.size()) { | |||
| MS_LOG(ERROR) << "index: " << index << " size: " << unique_values.size(); | |||
| return RET_ERROR; | |||
| } | |||
| if (unique_values[index] > std::numeric_limits<T>::max() || unique_values[index] < std::numeric_limits<T>::min()) { | |||
| MS_LOG(ERROR) << "data: " << unique_values[index] << " max: " << std::numeric_limits<T>::max() | |||
| << " min: " << std::numeric_limits<T>::min(); | |||
| return RET_ERROR; | |||
| } | |||
| un_indexed_data.push_back(static_cast<T>(unique_values[index])); | |||
| } | |||
| if (un_indexed_data.size() * sizeof(T) != dst_data_size) { | |||
| MS_LOG(ERROR) << "un idnexed data size: " << un_indexed_data.size() * sizeof(T) | |||
| << " expected by tensor: " << dst_data_size; | |||
| return false; | |||
| } | |||
| memcpy(dst_data, un_indexed_data.data(), un_indexed_data.size() * sizeof(T)); | |||
| return RET_OK; | |||
| } | |||
| template <typename T> | |||
| STATUS UnSparseTensorData(const std::vector<int> &unique_values, const std::vector<size_t> &indices, | |||
| const std::vector<size_t> &coors, | |||
| const flatbuffers::Vector<flatbuffers::Offset<schema::QuantParam>> *quant_params, | |||
| size_t elem_cnt, size_t coor_best_bit, void *dst_data, size_t dst_data_size) { | |||
| std::vector<T> un_sparsed_data; | |||
| size_t data_index = 0; | |||
| auto nz_cnt = indices.size(); | |||
| MS_ASSERT(nz_cnt == coors.size()); | |||
| auto channel_cnt = quant_params->size(); | |||
| auto elem_perchannel = elem_cnt / channel_cnt; | |||
| for (size_t i = 0; i < nz_cnt; i++) { | |||
| auto index = indices[i]; | |||
| if (index >= unique_values.size()) { | |||
| MS_LOG(ERROR) << "index: " << index << " size: " << unique_values.size(); | |||
| return RET_ERROR; | |||
| } | |||
| auto nz = unique_values[index]; | |||
| if (nz > std::numeric_limits<T>::max() || nz < std::numeric_limits<T>::min()) { | |||
| MS_LOG(ERROR) << "data: " << nz << " max: " << std::numeric_limits<T>::max() | |||
| << " min: " << std::numeric_limits<T>::min(); | |||
| return RET_ERROR; | |||
| } | |||
| auto coor = coors[i]; | |||
| auto cur_channel = data_index / elem_perchannel; | |||
| auto zp = quant_params->Get(cur_channel)->zeroPoint(); | |||
| for (size_t j = 0; j < coor; j++) { | |||
| un_sparsed_data.push_back(zp); | |||
| data_index++; | |||
| } | |||
| un_sparsed_data.push_back(static_cast<T>(unique_values[index])); | |||
| data_index++; | |||
| } | |||
| if (un_sparsed_data.size() * sizeof(T) > dst_data_size) { | |||
| MS_LOG(ERROR) << "un-sparsed data size: " << un_sparsed_data.size() * sizeof(T) | |||
| << " tensor size: " << dst_data_size; | |||
| return false; | |||
| } else if (un_sparsed_data.size() * sizeof(T) < dst_data_size && | |||
| (un_sparsed_data.size() + (1 << coor_best_bit) - 1) * sizeof(T) < dst_data_size) { | |||
| MS_LOG(ERROR) << "un-sparsed data size: " << un_sparsed_data.size() * sizeof(T) << " tensor size: " << dst_data_size | |||
| << " coor_best_bit: " << coor_best_bit; | |||
| return false; | |||
| } | |||
| for (; data_index < dst_data_size / sizeof(T); data_index++) { | |||
| auto cur_channel = data_index / elem_perchannel; | |||
| auto zp = quant_params->Get(cur_channel)->zeroPoint(); | |||
| un_sparsed_data.push_back(static_cast<T>(zp)); | |||
| } | |||
| memcpy(dst_data, un_sparsed_data.data(), un_sparsed_data.size() * sizeof(T)); | |||
| return RET_OK; | |||
| } | |||
| std::vector<bool> StringToBitVector(const std::string &str); | |||
| STATUS SparseDecompress(const schema::Tensor &src_tensor, Tensor *dst_tensor); | |||
| STATUS IndexingDecompress(const schema::Tensor &src_tensor, Tensor *dst_tensor); | |||
| class WeightDecoder { | |||
| public: | |||
| static int UnPackToInt(const schema::Tensor &src_tensor, lite::Tensor *dst_tensor); | |||
| @@ -0,0 +1 @@ | |||
| ml_segmentation_matting 130 | |||
| @@ -182,6 +182,23 @@ function Run_Converter() { | |||
| fi | |||
| done < ${models_tflite_weightquant_config} | |||
| # Convert caffe weightquant models: | |||
| while read line; do | |||
| weight_quant_line_info=${line} | |||
| if [[ ${weight_quant_line_info} == \#* ]]; then | |||
| continue | |||
| fi | |||
| model_name=`echo ${weight_quant_line_info}|awk -F ' ' '{print $1}'` | |||
| echo ${model_name} >> "${run_converter_log_file}" | |||
| echo './converter_lite --fmk=CAFFE --modelFile='${models_path}'/'${model_name}'.prototxt --weightFile='${models_path}'/'${model_name}'.caffemodel --outputFile='${ms_models_path}'/'${model_name}_weightquant' --quantType=WeightQuant --bitNum=8 --quantWeightChannel=0' >> "${run_converter_log_file}" | |||
| ./converter_lite --fmk=CAFFE --modelFile=${models_path}/${model_name}.prototxt --weightFile=${models_path}/${model_name}.caffemodel --outputFile=${ms_models_path}/${model_name}_weightquant --quantType=WeightQuant --bitNum=8 --quantWeightChannel=0 | |||
| if [ $? = 0 ]; then | |||
| converter_result='converter caffe_weight_quant '${model_name}' pass';echo ${converter_result} >> ${run_converter_result_file} | |||
| else | |||
| converter_result='converter caffe_weight_quant '${model_name}' failed';echo ${converter_result} >> ${run_converter_result_file};return 1 | |||
| fi | |||
| done < ${models_caffe_weightquant_config} | |||
| # Convert mindir weightquant models: | |||
| while read line; do | |||
| weight_quant_line_info=${line} | |||
| @@ -595,6 +612,24 @@ function Run_x86() { | |||
| fi | |||
| done < ${models_tflite_weightquant_config} | |||
| # Run caffe weightquant converted models: | |||
| while read line; do | |||
| weight_quant_line_info=${line} | |||
| if [[ ${weight_quant_line_info} == \#* ]]; then | |||
| continue | |||
| fi | |||
| model_name=`echo ${weight_quant_line_info}|awk -F ' ' '{print $1}'` | |||
| accuracy_limit=`echo ${weight_quant_line_info}|awk -F ' ' '{print $2}'` | |||
| echo ${model_name} >> "${run_x86_log_file}" | |||
| echo './benchmark --modelFile='${ms_models_path}'/'${model_name}'.ms --inDataFile='${models_path}'/input_output/input/'${model_name}'.ms.bin --benchmarkDataFile='${models_path}'/input_output/output/'${model_name}'.ms.out --accuracyThreshold=${accuracy_limit}' >> "${run_x86_log_file}" | |||
| ./benchmark --modelFile=${ms_models_path}/${model_name}_weightquant.ms --inDataFile=${models_path}/input_output/input/${model_name}.ms.bin --benchmarkDataFile=${models_path}/input_output/output/${model_name}.ms.out --accuracyThreshold=${accuracy_limit}>> "${run_x86_log_file}" | |||
| if [ $? = 0 ]; then | |||
| run_result='x86: '${model_name}_weightquant' pass'; echo ${run_result} >> ${run_benchmark_result_file} | |||
| else | |||
| run_result='x86: '${model_name}_weightquant' failed'; echo ${run_result} >> ${run_benchmark_result_file}; return 1 | |||
| fi | |||
| done < ${models_caffe_weightquant_config} | |||
| # Run tf weightquant converted models: | |||
| while read line; do | |||
| weight_quant_line_info=${line} | |||
| @@ -2423,6 +2458,7 @@ version=${file_name_array[2]} | |||
| models_tflite_config=${basepath}/models_tflite.cfg | |||
| models_tf_config=${basepath}/models_tf.cfg | |||
| models_caffe_config=${basepath}/models_caffe.cfg | |||
| models_caffe_weightquant_config=${basepath}/models_caffe_weightquant.cfg | |||
| models_tflite_awaretraining_config=${basepath}/models_tflite_awaretraining.cfg | |||
| models_tflite_posttraining_config=${basepath}/models_tflite_posttraining.cfg | |||
| models_caffe_posttraining_config=${basepath}/models_caffe_posttraining.cfg | |||
| @@ -105,6 +105,31 @@ int AnfExporter::SetPostTrainOutputTensorType(const std::unique_ptr<schema::Meta | |||
| return RET_OK; | |||
| } | |||
| static STATUS CompressTensor(schema::TensorT *tensor_input, const std::unique_ptr<schema::CNodeT> &dst_node) { | |||
| if (!tensor_input->quantParams.empty() && tensor_input->quantParams.front()->inited) { | |||
| int bit_num = tensor_input->quantParams.at(0)->numBits; | |||
| // Pack Repetition | |||
| auto repetition_packed = false; | |||
| MS_LOG(ERROR) << dst_node->name; | |||
| if (dst_node->quantType == schema::QuantType_QUANT_WEIGHT) { | |||
| if (bit_num <= 8) { | |||
| repetition_packed = PackRepetition<int8_t>(bit_num, tensor_input); | |||
| } else { | |||
| repetition_packed = PackRepetition<int16_t>(bit_num, tensor_input); | |||
| } | |||
| } | |||
| if (bit_num != 8 && bit_num != 16 && !repetition_packed) { | |||
| auto status = DoBitPack(bit_num, tensor_input); | |||
| if (status != RET_OK) { | |||
| MS_LOG(ERROR) << "do bit pack failed. " << status; | |||
| return RET_ERROR; | |||
| } | |||
| } | |||
| } | |||
| return RET_OK; | |||
| } | |||
| int AnfExporter::ConvertQuantParam(const std::unique_ptr<schema::MetaGraphT> &meta_graph, | |||
| const std::shared_ptr<mindspore::Primitive> &primitive, | |||
| const std::unique_ptr<schema::CNodeT> &dst_node) { | |||
| @@ -146,16 +171,9 @@ int AnfExporter::ConvertQuantParam(const std::unique_ptr<schema::MetaGraphT> &me | |||
| tensor_input->quantParams.emplace_back(std::move(input_quant_param_ptr)); | |||
| } | |||
| } | |||
| if (!tensor_input->quantParams.empty()) { | |||
| int bit_num = tensor_input->quantParams.at(0)->numBits; | |||
| if (bit_num != 8 && bit_num != 16) { | |||
| auto status = DoBitPack(bit_num, tensor_input); | |||
| if (status != RET_OK) { | |||
| MS_LOG(ERROR) << "do bit pack failed. " << status; | |||
| return RET_ERROR; | |||
| } | |||
| } | |||
| if (CompressTensor(tensor_input, dst_node) != RET_OK) { | |||
| MS_LOG(ERROR) << "CompressTensor error"; | |||
| return RET_ERROR; | |||
| } | |||
| } | |||
| @@ -694,5 +694,20 @@ std::vector<int> GetTransposePerm(MetaGraphT *graph, const std::unique_ptr<CNode | |||
| } | |||
| return perm; | |||
| } | |||
| std::string BoolVectorToString(const std::vector<bool> &bool_vec) { | |||
| size_t size_in_byte = ceil(bool_vec.size() / 8.0); | |||
| std::string str(size_in_byte, '\0'); | |||
| auto iter = str.begin(); | |||
| size_t shift = 8; | |||
| for (bool bit : bool_vec) { | |||
| *iter |= bit << (shift - 1); | |||
| if (--shift == 0) { | |||
| iter++; | |||
| shift = 8; | |||
| } | |||
| } | |||
| return str; | |||
| } | |||
| } // namespace lite | |||
| } // namespace mindspore | |||
| @@ -23,6 +23,12 @@ | |||
| #include <string> | |||
| #include <memory> | |||
| #include <vector> | |||
| #include <map> | |||
| #include <set> | |||
| #include <algorithm> | |||
| #include <numeric> | |||
| #include <limits> | |||
| #include <functional> | |||
| #include "include/errorcode.h" | |||
| #include "schema/inner/model_generated.h" | |||
| #include "src/common/graph_util.h" | |||
| @@ -95,6 +101,210 @@ STATUS SetSubgraphTensorIndices(schema::MetaGraphT *meta_graphT); | |||
| std::string GetModelName(const std::string &modelFile); | |||
| std::vector<int> GetTransposePerm(schema::MetaGraphT *graph, const std::unique_ptr<schema::CNodeT> &cnode); | |||
| std::string BoolVectorToString(const std::vector<bool> &bool_vec); | |||
| template <typename T> | |||
| bool IndexingCompress(const std::set<T> &quant_data_set, const std::map<T, size_t> &unique_value_index_map, | |||
| size_t unique_value_bit, size_t unique_value_cnt, size_t pack_repetition_size_in_byte, | |||
| size_t bit_num, schema::TensorT *tensor) { | |||
| auto quant_data_array = reinterpret_cast<T *>(tensor->data.data()); | |||
| std::vector<T> quant_data(quant_data_array, quant_data_array + tensor->data.size() / sizeof(T)); | |||
| std::vector<bool> bits(pack_repetition_size_in_byte * 8); | |||
| size_t index = 0; | |||
| // write unique_value_cnt: bit_num bit for unsigned | |||
| for (size_t i = 0; i < bit_num; i++) { | |||
| bits[index++] = (unique_value_cnt >> (bit_num - i - 1)) & (0x1); | |||
| } | |||
| // write the unique value set: each value has bit_num bit signed | |||
| for (auto unique_value : quant_data_set) { | |||
| for (size_t i = 0; i < bit_num; i++) { | |||
| bits[index++] = ((unique_value + (1 << (bit_num - 1))) >> (bit_num - i - 1)) & (0x1); | |||
| } | |||
| } | |||
| // write the index: each index has unique_value_bit unsigned | |||
| for (auto quant_value : quant_data) { | |||
| for (size_t i = 0; i < unique_value_bit; i++) { | |||
| bits[index++] = (unique_value_index_map.at(quant_value) >> (unique_value_bit - i - 1)) & (0x1); | |||
| } | |||
| } | |||
| if (index > pack_repetition_size_in_byte * 8) { | |||
| MS_LOG(ERROR) << "unexpected index: " << index << " should not greater than " << pack_repetition_size_in_byte * 8; | |||
| return false; | |||
| } | |||
| // update tensor data | |||
| auto new_data_str = BoolVectorToString(bits); | |||
| auto ret = memcpy_s(tensor->data.data(), tensor->data.size(), new_data_str.c_str(), new_data_str.size()); | |||
| if (ret != EOK) { | |||
| MS_LOG(ERROR) << "memcpy error"; | |||
| return false; | |||
| } | |||
| tensor->data.resize(new_data_str.size()); | |||
| tensor->weightQunatCompressType = schema::WeightQunatCompressType_INDEXING; | |||
| MS_LOG(ERROR) << "set WeightQunatCompressType_INDEXING"; | |||
| return true; | |||
| } | |||
| template <typename T> | |||
| bool SparsityCompress(const std::set<T> &quant_data_set, const std::map<T, size_t> &unique_value_index_map, | |||
| size_t unique_value_bit, size_t unique_value_cnt, size_t pack_sparsity_size_in_byte, | |||
| size_t nz_cnt, size_t coor_best_bit, size_t bit_num, schema::TensorT *tensor) { | |||
| auto quant_data_array = reinterpret_cast<T *>(tensor->data.data()); | |||
| std::vector<T> quant_data(quant_data_array, quant_data_array + tensor->data.size() / sizeof(T)); | |||
| auto &quant_params = tensor->quantParams; | |||
| auto elem_cnt = quant_data.size(); | |||
| auto channel_cnt = quant_params.size(); | |||
| auto elem_perchannel = elem_cnt / channel_cnt; | |||
| std::vector<bool> bits(pack_sparsity_size_in_byte * 8); | |||
| int index = 0; | |||
| // coor_best_bit | |||
| for (size_t i = 0; i < 8; i++) { | |||
| bits[index++] = (coor_best_bit >> (8 - i - 1)) & 0x1; | |||
| } | |||
| // nz_cnt | |||
| for (size_t i = 0; i < 32; i++) { | |||
| bits[index++] = (nz_cnt >> (32 - i - 1)) & 0x1; | |||
| } | |||
| // unique_value cnt | |||
| for (size_t i = 0; i < bit_num; i++) { | |||
| bits[index++] = (unique_value_cnt >> (bit_num - i - 1)) & 0x1; | |||
| } | |||
| // unique_values | |||
| for (auto unique_value : quant_data_set) { | |||
| for (size_t i = 0; i < bit_num; i++) { | |||
| bits[index++] = ((unique_value + (1 << (bit_num - 1))) >> (bit_num - i - 1)) & (0x1); | |||
| } | |||
| } | |||
| // nz values indexing && get coor | |||
| std::vector<size_t> coors(nz_cnt); | |||
| int coors_index = 0; | |||
| int prev_index = -1; | |||
| for (int di = 0; (unsigned int)di < elem_cnt; di++) { | |||
| auto cur_channel = di / elem_perchannel; | |||
| auto zp = quant_params[cur_channel]->zeroPoint; | |||
| auto nz_value = quant_data[di]; | |||
| if (nz_value != zp || (di - prev_index) >= (1 << coor_best_bit)) { | |||
| MS_ASSERT(coors_index < nz_cnt); | |||
| coors[coors_index++] = di - prev_index - 1; | |||
| prev_index = di; | |||
| for (size_t i = 0; i < unique_value_bit; i++) { | |||
| bits[index++] = (unique_value_index_map.at(nz_value) >> (unique_value_bit - i - 1)) & (0x1); | |||
| } | |||
| } | |||
| } | |||
| // write coor | |||
| for (auto coor : coors) { | |||
| for (size_t i = 0; i < coor_best_bit; i++) { | |||
| bits[index++] = (coor >> (coor_best_bit - i - 1)) & 0x1; | |||
| } | |||
| } | |||
| if ((unsigned int)index > pack_sparsity_size_in_byte * 8) { | |||
| MS_LOG(ERROR) << "unexpected index: " << index << " should not greater than " << pack_sparsity_size_in_byte * 8; | |||
| return false; | |||
| } | |||
| auto new_data_str = BoolVectorToString(bits); | |||
| auto ret = memcpy_s(tensor->data.data(), tensor->data.size(), new_data_str.c_str(), new_data_str.size()); | |||
| if (ret != EOK) { | |||
| MS_LOG(ERROR) << "memcpy error"; | |||
| return false; | |||
| } | |||
| tensor->data.resize(new_data_str.size()); | |||
| tensor->weightQunatCompressType = schema::WeightQunatCompressType_SPARSE; | |||
| MS_LOG(ERROR) << "set WeightQunatCompressType_SPARSITY"; | |||
| return true; | |||
| } | |||
| template <typename T> | |||
| size_t CalCoorBestBit(const std::vector<T> &quant_data, size_t elem_cnt, | |||
| const std::vector<std::unique_ptr<schema::QuantParamT>> &quant_params, int unique_value_bit, | |||
| size_t *coor_best_bit) { | |||
| size_t best_nn_cnt = 0; | |||
| size_t min_len_in_bit = std::numeric_limits<size_t>::max(); | |||
| for (int bit = 2; bit <= 10; bit++) { | |||
| // search | |||
| size_t nn_cnt = 0; | |||
| int prev_index = -1; | |||
| auto channel_cnt = quant_params.size(); | |||
| auto elem_perchannel = elem_cnt / channel_cnt; | |||
| for (int i = 0; (unsigned int)i < elem_cnt; i++) { | |||
| auto cur_channel = i / elem_perchannel; | |||
| auto zp = quant_params[cur_channel]->zeroPoint; | |||
| if (quant_data[i] != zp || (i - prev_index) >= (1 << bit)) { | |||
| nn_cnt++; | |||
| prev_index = i; | |||
| } | |||
| } | |||
| size_t len_in_bit = nn_cnt * bit + nn_cnt * unique_value_bit; | |||
| if (len_in_bit < min_len_in_bit) { | |||
| min_len_in_bit = len_in_bit; | |||
| *coor_best_bit = bit; | |||
| best_nn_cnt = nn_cnt; | |||
| } | |||
| } | |||
| return best_nn_cnt; | |||
| } | |||
| template <typename T> | |||
| bool PackRepetition(size_t bit_num, schema::TensorT *tensor) { | |||
| auto quant_data_array = reinterpret_cast<T *>(tensor->data.data()); | |||
| std::vector<T> quant_data(quant_data_array, quant_data_array + tensor->data.size() / sizeof(T)); | |||
| auto elem_cnt = quant_data.size(); | |||
| auto dims = tensor->dims; | |||
| size_t elem_cnt_by_dims = std::accumulate(dims.begin(), dims.end(), 1, std::multiplies<>()); | |||
| if (elem_cnt != elem_cnt_by_dims) { | |||
| MS_LOG(ERROR) << "elem_cnt: " << elem_cnt << " not equal: " << elem_cnt_by_dims; | |||
| return false; | |||
| } | |||
| auto &quant_params = tensor->quantParams; | |||
| std::set<T> quant_data_set; | |||
| for (auto quant_value : quant_data) { | |||
| quant_data_set.insert(quant_value); | |||
| } | |||
| std::map<T, size_t> unique_value_index_map; | |||
| auto index = 0; | |||
| for (auto value : quant_data_set) { | |||
| unique_value_index_map[value] = index++; | |||
| } | |||
| auto unique_value_cnt = quant_data_set.size(); | |||
| size_t unique_value_bit = ceil(log2(unique_value_cnt)); | |||
| auto pack_repetition_size_in_bit = bit_num + bit_num * unique_value_cnt + unique_value_bit * elem_cnt; | |||
| size_t pack_repetition_size_in_byte = ceil(pack_repetition_size_in_bit / 8.0); | |||
| size_t origin_size_in_byte = ceil(bit_num * elem_cnt / 8.0); | |||
| size_t coor_best_bit = 0; | |||
| auto nz_cnt = CalCoorBestBit<T>(quant_data, elem_cnt, quant_params, unique_value_bit, &coor_best_bit); | |||
| // 1. coor_best_bit 2. nz_cnt 3. quant_data_set size 4. unique_values 5. unique_value indexing 6. nz values coord | |||
| auto pack_sparsity_size_in_bit = | |||
| 1 * 8 + 4 * 8 + bit_num + bit_num * unique_value_cnt + unique_value_bit * nz_cnt + nz_cnt * coor_best_bit; | |||
| size_t pack_sparsity_size_in_byte = ceil(pack_sparsity_size_in_bit / 8.0); | |||
| MS_LOG(ERROR) << "coor_best_bit: " << coor_best_bit << " ori: " << origin_size_in_byte | |||
| << " indexing: " << pack_repetition_size_in_byte << " sparse: " << pack_sparsity_size_in_byte; | |||
| auto min_byte_need = std::min({origin_size_in_byte, pack_repetition_size_in_byte, pack_sparsity_size_in_byte}); | |||
| if (min_byte_need == origin_size_in_byte) { | |||
| return false; | |||
| } else if (min_byte_need == pack_repetition_size_in_byte) { | |||
| MS_LOG(ERROR) << "from " << origin_size_in_byte << " to " << pack_repetition_size_in_byte; | |||
| return IndexingCompress<T>(quant_data_set, unique_value_index_map, unique_value_bit, unique_value_cnt, | |||
| pack_repetition_size_in_byte, bit_num, tensor); | |||
| } else if (min_byte_need == pack_sparsity_size_in_byte) { | |||
| MS_LOG(ERROR) << "from " << origin_size_in_byte << " to " << pack_sparsity_size_in_byte; | |||
| return SparsityCompress<T>(quant_data_set, unique_value_index_map, unique_value_bit, unique_value_cnt, | |||
| pack_sparsity_size_in_byte, nz_cnt, coor_best_bit, bit_num, tensor); | |||
| } else { | |||
| MS_LOG(ERROR) << "unexpected: " << min_byte_need << " not in {" << origin_size_in_byte << " " | |||
| << pack_repetition_size_in_byte << " " << pack_sparsity_size_in_byte << "}"; | |||
| } | |||
| return false; | |||
| } | |||
| } // namespace lite | |||
| } // namespace mindspore | |||
| @@ -311,7 +311,9 @@ STATUS CalQuantizationParams(schema::QuantParamT *quantParam, double mMin, doubl | |||
| } | |||
| const double zeroPointFromMin = quantMinFloat - mMin / scale; | |||
| int zeroPoint = static_cast<int32_t>(std::round(zeroPointFromMin)); | |||
| if (scale < SCALE_THREASHOLD) { | |||
| zeroPoint = 0; | |||
| } | |||
| // The zero point should always be in the range of quantized value, | |||
| // [qmin, qmax]. | |||
| MS_ASSERT(zeroPoint >= quantMin); | |||
| @@ -47,7 +47,7 @@ | |||
| namespace mindspore::lite::quant { | |||
| static constexpr size_t UINT8_QUANTIZATION = 8; | |||
| static constexpr size_t WEIGHT_INDEX = 1; | |||
| static constexpr double SCALE_THREASHOLD = 1e-38; | |||
| const char kMethodMaxMin[] = "MAX_MIN"; | |||
| const char kMethodKL[] = "KL"; | |||
| const char kMethodOutlier[] = "RemovalOutlier"; | |||
| @@ -163,7 +163,9 @@ T QuantizeData(float originData, const schema::QuantParamT &quantParam, int quan | |||
| const auto narrowRange = quantParam.narrowRange; | |||
| const int maxLimit = quant_max; | |||
| const int minLimit = quant_min; | |||
| if (scale <= SCALE_THREASHOLD) { | |||
| return 0; | |||
| } | |||
| return [maxLimit, minLimit, zeroPoint, scale, narrowRange, originData] { | |||
| auto quant_data = std::round(originData / scale + zeroPoint); | |||
| if (quant_data > maxLimit) { | |||