| @@ -38,6 +38,12 @@ table QuantParam { | |||||
| multiplier: int = 1; // calculate fixed point multiplier method | multiplier: int = 1; // calculate fixed point multiplier method | ||||
| } | } | ||||
| enum WeightQunatCompressType: int { | |||||
| NONE, | |||||
| INDEXING, | |||||
| SPARSE | |||||
| } | |||||
| table Tensor { | table Tensor { | ||||
| nodeType: int; | nodeType: int; | ||||
| // data type | // data type | ||||
| @@ -52,6 +58,7 @@ table Tensor { | |||||
| quantClusters: [float]; | quantClusters: [float]; | ||||
| name: string; | name: string; | ||||
| enableHuffmanCode: bool = false; | enableHuffmanCode: bool = false; | ||||
| weightQunatCompressType: WeightQunatCompressType = NONE; | |||||
| } | } | ||||
| enum QuantType: int { | enum QuantType: int { | ||||
| @@ -45,6 +45,12 @@ namespace lite { | |||||
| namespace { | namespace { | ||||
| int DecompressTensor(const schema::Tensor &src_tensor, Tensor *dst_tensor) { | int DecompressTensor(const schema::Tensor &src_tensor, Tensor *dst_tensor) { | ||||
| MS_ASSERT(dst_tensor != nullptr); | MS_ASSERT(dst_tensor != nullptr); | ||||
| if (src_tensor.weightQunatCompressType() == schema::WeightQunatCompressType_INDEXING) { | |||||
| return IndexingDecompress(src_tensor, dst_tensor); | |||||
| } else if (src_tensor.weightQunatCompressType() == schema::WeightQunatCompressType_SPARSE) { | |||||
| return SparseDecompress(src_tensor, dst_tensor); | |||||
| } | |||||
| bool need_bit_unpack = src_tensor.quantParams() != nullptr && src_tensor.quantParams()->size() > 0 && | bool need_bit_unpack = src_tensor.quantParams() != nullptr && src_tensor.quantParams()->size() > 0 && | ||||
| src_tensor.quantParams()->Get(0) != nullptr && src_tensor.quantParams()->Get(0)->inited(); | src_tensor.quantParams()->Get(0) != nullptr && src_tensor.quantParams()->Get(0)->inited(); | ||||
| if (need_bit_unpack) { | if (need_bit_unpack) { | ||||
| @@ -20,6 +20,167 @@ | |||||
| #include "src/huffman_decode.h" | #include "src/huffman_decode.h" | ||||
| namespace mindspore::lite { | namespace mindspore::lite { | ||||
| std::vector<bool> StringToBitVector(const std::string &str) { | |||||
| std::vector<bool> vec(str.size() * 8); | |||||
| size_t index = 0; | |||||
| for (auto ch : str) { | |||||
| for (size_t shift = 8; shift > 0; shift--) { | |||||
| vec[index++] = (ch >> (shift - 1)) & 0x1; | |||||
| } | |||||
| } | |||||
| return vec; | |||||
| } | |||||
| STATUS IndexingDecompress(const schema::Tensor &src_tensor, Tensor *dst_tensor) { | |||||
| MS_LOG(ERROR) << "un-index weight"; | |||||
| auto bit_num = src_tensor.quantParams()->Get(0)->numBits(); | |||||
| std::string str(reinterpret_cast<const char *>(src_tensor.data()->data()), src_tensor.data()->size()); | |||||
| auto bit_vec = StringToBitVector(str); | |||||
| size_t index = 0; | |||||
| // parse unique_value_cnt | |||||
| size_t unique_value_cnt = 0; | |||||
| for (int i = 0; i < bit_num; i++) { | |||||
| bool bit = bit_vec[index++]; | |||||
| unique_value_cnt |= bit << (bit_num - i - 1); | |||||
| } | |||||
| if (unique_value_cnt == 0) { | |||||
| unique_value_cnt = 1 << bit_num; | |||||
| } | |||||
| // parse unique_value_set; | |||||
| std::vector<int> unique_values; | |||||
| for (size_t i = 0; i < unique_value_cnt; i++) { | |||||
| int unique_value = 0; | |||||
| for (int j = 0; j < bit_num; j++) { | |||||
| bool bit = bit_vec[index++]; | |||||
| unique_value |= bit << (bit_num - j - 1); | |||||
| } | |||||
| // unsigned to signed | |||||
| unique_values.push_back(unique_value - (1 << (bit_num - 1))); | |||||
| } | |||||
| // parse index | |||||
| std::vector<size_t> unique_value_index_vec; | |||||
| auto elem_cnt = dst_tensor->ElementsNum(); | |||||
| size_t unique_value_bit = ceil(log2(unique_value_cnt)); | |||||
| for (int i = 0; i < elem_cnt; i++) { | |||||
| size_t unique_value_index = 0; | |||||
| for (size_t j = 0; j < unique_value_bit; j++) { | |||||
| bool bit = bit_vec[index++]; | |||||
| unique_value_index |= bit << (unique_value_bit - j - 1); | |||||
| } | |||||
| unique_value_index_vec.push_back(unique_value_index); | |||||
| } | |||||
| if (dst_tensor->data_c() != nullptr) { | |||||
| MS_LOG(ERROR) << "data_c not null"; | |||||
| return RET_ERROR; | |||||
| } | |||||
| auto ret = dst_tensor->MallocData(); | |||||
| if (ret != RET_OK) { | |||||
| MS_LOG(ERROR) << "Malloc tensor data failed"; | |||||
| return RET_NULL_PTR; | |||||
| } | |||||
| auto dst_data = dst_tensor->data_c(); | |||||
| if (bit_num <= 8) { | |||||
| ret = UnIndexTensorData<int8_t>(unique_values, unique_value_index_vec, dst_data, dst_tensor->Size()); | |||||
| } else { | |||||
| ret = UnIndexTensorData<int16_t>(unique_values, unique_value_index_vec, dst_data, dst_tensor->Size()); | |||||
| } | |||||
| if (ret != RET_OK) { | |||||
| MS_LOG(ERROR) << "UnIndexTensorData error"; | |||||
| return RET_ERROR; | |||||
| } | |||||
| return RET_OK; | |||||
| } | |||||
| STATUS SparseDecompress(const schema::Tensor &src_tensor, Tensor *dst_tensor) { | |||||
| MS_LOG(ERROR) << "un-sparse weight"; | |||||
| size_t bit_num = src_tensor.quantParams()->Get(0)->numBits(); | |||||
| std::string str(reinterpret_cast<const char *>(src_tensor.data()->data()), src_tensor.data()->size()); | |||||
| auto bit_vec = StringToBitVector(str); | |||||
| size_t index = 0; | |||||
| // parse coor_best_bit | |||||
| size_t coor_best_bit = 0; | |||||
| for (size_t i = 0; i < 8; i++) { | |||||
| bool bit = bit_vec[index++]; | |||||
| coor_best_bit |= bit << (8 - i - 1); | |||||
| } | |||||
| // parse nz_cnt | |||||
| size_t nz_cnt = 0; | |||||
| for (size_t i = 0; i < 32; i++) { | |||||
| bool bit = bit_vec[index++]; | |||||
| nz_cnt |= bit << (32 - i - 1); | |||||
| } | |||||
| // parse unique_value cnt | |||||
| size_t unique_value_cnt = 0; | |||||
| for (size_t i = 0; i < bit_num; i++) { | |||||
| bool bit = bit_vec[index++]; | |||||
| unique_value_cnt |= bit << (bit_num - i - 1); | |||||
| } | |||||
| if (unique_value_cnt == 0) { | |||||
| unique_value_cnt = 1 << bit_num; | |||||
| } | |||||
| // parse unique_values | |||||
| std::vector<int> unique_values; | |||||
| for (size_t i = 0; i < unique_value_cnt; i++) { | |||||
| int unique_value = 0; | |||||
| for (size_t j = 0; j < bit_num; j++) { | |||||
| bool bit = bit_vec[index++]; | |||||
| unique_value |= bit << (bit_num - j - 1); | |||||
| } | |||||
| // unsigned to signed | |||||
| unique_values.push_back(unique_value - (1 << (bit_num - 1))); | |||||
| } | |||||
| // parse index | |||||
| std::vector<size_t> unique_value_index_vec; | |||||
| auto elem_cnt = dst_tensor->ElementsNum(); | |||||
| size_t unique_value_bit = ceil(log2(unique_value_cnt)); | |||||
| for (size_t i = 0; i < nz_cnt; i++) { | |||||
| size_t unique_value_index = 0; | |||||
| for (size_t j = 0; j < unique_value_bit; j++) { | |||||
| bool bit = bit_vec[index++]; | |||||
| unique_value_index |= bit << (unique_value_bit - j - 1); | |||||
| } | |||||
| unique_value_index_vec.push_back(unique_value_index); | |||||
| } | |||||
| // parse coors | |||||
| std::vector<size_t> coor_vec; | |||||
| for (size_t i = 0; i < nz_cnt; i++) { | |||||
| size_t coor = 0; | |||||
| for (size_t j = 0; j < coor_best_bit; j++) { | |||||
| bool bit = bit_vec[index++]; | |||||
| coor |= bit << (coor_best_bit - j - 1); | |||||
| } | |||||
| coor_vec.push_back(coor); | |||||
| } | |||||
| if (dst_tensor->data_c() != nullptr) { | |||||
| MS_LOG(ERROR) << "data_c not null"; | |||||
| return RET_ERROR; | |||||
| } | |||||
| auto ret = dst_tensor->MallocData(); | |||||
| if (ret != RET_OK) { | |||||
| MS_LOG(ERROR) << "Malloc tensor data failed"; | |||||
| return RET_NULL_PTR; | |||||
| } | |||||
| auto dst_data = dst_tensor->data_c(); | |||||
| if (bit_num <= 8) { | |||||
| ret = UnSparseTensorData<int8_t>(unique_values, unique_value_index_vec, coor_vec, src_tensor.quantParams(), | |||||
| elem_cnt, coor_best_bit, dst_data, dst_tensor->Size()); | |||||
| } else { | |||||
| ret = UnSparseTensorData<int16_t>(unique_values, unique_value_index_vec, coor_vec, src_tensor.quantParams(), | |||||
| elem_cnt, coor_best_bit, dst_data, dst_tensor->Size()); | |||||
| } | |||||
| if (ret != RET_OK) { | |||||
| MS_LOG(ERROR) << "UnSparseTensorData error"; | |||||
| return RET_ERROR; | |||||
| } | |||||
| return RET_OK; | |||||
| } | |||||
| int WeightDecoder::DequantWeight(lite::Tensor *input_tensor, bool channel_first, TypeId dst_data_type) { | int WeightDecoder::DequantWeight(lite::Tensor *input_tensor, bool channel_first, TypeId dst_data_type) { | ||||
| MS_ASSERT(input_tensor != nullptr); | MS_ASSERT(input_tensor != nullptr); | ||||
| if (input_tensor->data_type() != kNumberTypeInt8 && input_tensor->data_type() != kNumberTypeInt16) { | if (input_tensor->data_type() != kNumberTypeInt8 && input_tensor->data_type() != kNumberTypeInt16) { | ||||
| @@ -21,6 +21,8 @@ | |||||
| #include <utility> | #include <utility> | ||||
| #include <vector> | #include <vector> | ||||
| #include <queue> | #include <queue> | ||||
| #include <limits> | |||||
| #include <string> | |||||
| #include <cmath> | #include <cmath> | ||||
| #include "nnacl/matmul_parameter.h" | #include "nnacl/matmul_parameter.h" | ||||
| #include "src/lite_kernel.h" | #include "src/lite_kernel.h" | ||||
| @@ -30,6 +32,94 @@ | |||||
| static constexpr int kPerTensor = 1; | static constexpr int kPerTensor = 1; | ||||
| namespace mindspore::lite { | namespace mindspore::lite { | ||||
| template <typename T> | |||||
| STATUS UnIndexTensorData(const std::vector<int> &unique_values, const std::vector<size_t> &indices, void *dst_data, | |||||
| size_t dst_data_size) { | |||||
| std::vector<T> un_indexed_data; | |||||
| for (auto index : indices) { | |||||
| if (index >= unique_values.size()) { | |||||
| MS_LOG(ERROR) << "index: " << index << " size: " << unique_values.size(); | |||||
| return RET_ERROR; | |||||
| } | |||||
| if (unique_values[index] > std::numeric_limits<T>::max() || unique_values[index] < std::numeric_limits<T>::min()) { | |||||
| MS_LOG(ERROR) << "data: " << unique_values[index] << " max: " << std::numeric_limits<T>::max() | |||||
| << " min: " << std::numeric_limits<T>::min(); | |||||
| return RET_ERROR; | |||||
| } | |||||
| un_indexed_data.push_back(static_cast<T>(unique_values[index])); | |||||
| } | |||||
| if (un_indexed_data.size() * sizeof(T) != dst_data_size) { | |||||
| MS_LOG(ERROR) << "un idnexed data size: " << un_indexed_data.size() * sizeof(T) | |||||
| << " expected by tensor: " << dst_data_size; | |||||
| return false; | |||||
| } | |||||
| memcpy(dst_data, un_indexed_data.data(), un_indexed_data.size() * sizeof(T)); | |||||
| return RET_OK; | |||||
| } | |||||
| template <typename T> | |||||
| STATUS UnSparseTensorData(const std::vector<int> &unique_values, const std::vector<size_t> &indices, | |||||
| const std::vector<size_t> &coors, | |||||
| const flatbuffers::Vector<flatbuffers::Offset<schema::QuantParam>> *quant_params, | |||||
| size_t elem_cnt, size_t coor_best_bit, void *dst_data, size_t dst_data_size) { | |||||
| std::vector<T> un_sparsed_data; | |||||
| size_t data_index = 0; | |||||
| auto nz_cnt = indices.size(); | |||||
| MS_ASSERT(nz_cnt == coors.size()); | |||||
| auto channel_cnt = quant_params->size(); | |||||
| auto elem_perchannel = elem_cnt / channel_cnt; | |||||
| for (size_t i = 0; i < nz_cnt; i++) { | |||||
| auto index = indices[i]; | |||||
| if (index >= unique_values.size()) { | |||||
| MS_LOG(ERROR) << "index: " << index << " size: " << unique_values.size(); | |||||
| return RET_ERROR; | |||||
| } | |||||
| auto nz = unique_values[index]; | |||||
| if (nz > std::numeric_limits<T>::max() || nz < std::numeric_limits<T>::min()) { | |||||
| MS_LOG(ERROR) << "data: " << nz << " max: " << std::numeric_limits<T>::max() | |||||
| << " min: " << std::numeric_limits<T>::min(); | |||||
| return RET_ERROR; | |||||
| } | |||||
| auto coor = coors[i]; | |||||
| auto cur_channel = data_index / elem_perchannel; | |||||
| auto zp = quant_params->Get(cur_channel)->zeroPoint(); | |||||
| for (size_t j = 0; j < coor; j++) { | |||||
| un_sparsed_data.push_back(zp); | |||||
| data_index++; | |||||
| } | |||||
| un_sparsed_data.push_back(static_cast<T>(unique_values[index])); | |||||
| data_index++; | |||||
| } | |||||
| if (un_sparsed_data.size() * sizeof(T) > dst_data_size) { | |||||
| MS_LOG(ERROR) << "un-sparsed data size: " << un_sparsed_data.size() * sizeof(T) | |||||
| << " tensor size: " << dst_data_size; | |||||
| return false; | |||||
| } else if (un_sparsed_data.size() * sizeof(T) < dst_data_size && | |||||
| (un_sparsed_data.size() + (1 << coor_best_bit) - 1) * sizeof(T) < dst_data_size) { | |||||
| MS_LOG(ERROR) << "un-sparsed data size: " << un_sparsed_data.size() * sizeof(T) << " tensor size: " << dst_data_size | |||||
| << " coor_best_bit: " << coor_best_bit; | |||||
| return false; | |||||
| } | |||||
| for (; data_index < dst_data_size / sizeof(T); data_index++) { | |||||
| auto cur_channel = data_index / elem_perchannel; | |||||
| auto zp = quant_params->Get(cur_channel)->zeroPoint(); | |||||
| un_sparsed_data.push_back(static_cast<T>(zp)); | |||||
| } | |||||
| memcpy(dst_data, un_sparsed_data.data(), un_sparsed_data.size() * sizeof(T)); | |||||
| return RET_OK; | |||||
| } | |||||
| std::vector<bool> StringToBitVector(const std::string &str); | |||||
| STATUS SparseDecompress(const schema::Tensor &src_tensor, Tensor *dst_tensor); | |||||
| STATUS IndexingDecompress(const schema::Tensor &src_tensor, Tensor *dst_tensor); | |||||
| class WeightDecoder { | class WeightDecoder { | ||||
| public: | public: | ||||
| static int UnPackToInt(const schema::Tensor &src_tensor, lite::Tensor *dst_tensor); | static int UnPackToInt(const schema::Tensor &src_tensor, lite::Tensor *dst_tensor); | ||||
| @@ -0,0 +1 @@ | |||||
| ml_segmentation_matting 130 | |||||
| @@ -182,6 +182,23 @@ function Run_Converter() { | |||||
| fi | fi | ||||
| done < ${models_tflite_weightquant_config} | done < ${models_tflite_weightquant_config} | ||||
| # Convert caffe weightquant models: | |||||
| while read line; do | |||||
| weight_quant_line_info=${line} | |||||
| if [[ ${weight_quant_line_info} == \#* ]]; then | |||||
| continue | |||||
| fi | |||||
| model_name=`echo ${weight_quant_line_info}|awk -F ' ' '{print $1}'` | |||||
| echo ${model_name} >> "${run_converter_log_file}" | |||||
| echo './converter_lite --fmk=CAFFE --modelFile='${models_path}'/'${model_name}'.prototxt --weightFile='${models_path}'/'${model_name}'.caffemodel --outputFile='${ms_models_path}'/'${model_name}_weightquant' --quantType=WeightQuant --bitNum=8 --quantWeightChannel=0' >> "${run_converter_log_file}" | |||||
| ./converter_lite --fmk=CAFFE --modelFile=${models_path}/${model_name}.prototxt --weightFile=${models_path}/${model_name}.caffemodel --outputFile=${ms_models_path}/${model_name}_weightquant --quantType=WeightQuant --bitNum=8 --quantWeightChannel=0 | |||||
| if [ $? = 0 ]; then | |||||
| converter_result='converter caffe_weight_quant '${model_name}' pass';echo ${converter_result} >> ${run_converter_result_file} | |||||
| else | |||||
| converter_result='converter caffe_weight_quant '${model_name}' failed';echo ${converter_result} >> ${run_converter_result_file};return 1 | |||||
| fi | |||||
| done < ${models_caffe_weightquant_config} | |||||
| # Convert mindir weightquant models: | # Convert mindir weightquant models: | ||||
| while read line; do | while read line; do | ||||
| weight_quant_line_info=${line} | weight_quant_line_info=${line} | ||||
| @@ -595,6 +612,24 @@ function Run_x86() { | |||||
| fi | fi | ||||
| done < ${models_tflite_weightquant_config} | done < ${models_tflite_weightquant_config} | ||||
| # Run caffe weightquant converted models: | |||||
| while read line; do | |||||
| weight_quant_line_info=${line} | |||||
| if [[ ${weight_quant_line_info} == \#* ]]; then | |||||
| continue | |||||
| fi | |||||
| model_name=`echo ${weight_quant_line_info}|awk -F ' ' '{print $1}'` | |||||
| accuracy_limit=`echo ${weight_quant_line_info}|awk -F ' ' '{print $2}'` | |||||
| echo ${model_name} >> "${run_x86_log_file}" | |||||
| echo './benchmark --modelFile='${ms_models_path}'/'${model_name}'.ms --inDataFile='${models_path}'/input_output/input/'${model_name}'.ms.bin --benchmarkDataFile='${models_path}'/input_output/output/'${model_name}'.ms.out --accuracyThreshold=${accuracy_limit}' >> "${run_x86_log_file}" | |||||
| ./benchmark --modelFile=${ms_models_path}/${model_name}_weightquant.ms --inDataFile=${models_path}/input_output/input/${model_name}.ms.bin --benchmarkDataFile=${models_path}/input_output/output/${model_name}.ms.out --accuracyThreshold=${accuracy_limit}>> "${run_x86_log_file}" | |||||
| if [ $? = 0 ]; then | |||||
| run_result='x86: '${model_name}_weightquant' pass'; echo ${run_result} >> ${run_benchmark_result_file} | |||||
| else | |||||
| run_result='x86: '${model_name}_weightquant' failed'; echo ${run_result} >> ${run_benchmark_result_file}; return 1 | |||||
| fi | |||||
| done < ${models_caffe_weightquant_config} | |||||
| # Run tf weightquant converted models: | # Run tf weightquant converted models: | ||||
| while read line; do | while read line; do | ||||
| weight_quant_line_info=${line} | weight_quant_line_info=${line} | ||||
| @@ -2423,6 +2458,7 @@ version=${file_name_array[2]} | |||||
| models_tflite_config=${basepath}/models_tflite.cfg | models_tflite_config=${basepath}/models_tflite.cfg | ||||
| models_tf_config=${basepath}/models_tf.cfg | models_tf_config=${basepath}/models_tf.cfg | ||||
| models_caffe_config=${basepath}/models_caffe.cfg | models_caffe_config=${basepath}/models_caffe.cfg | ||||
| models_caffe_weightquant_config=${basepath}/models_caffe_weightquant.cfg | |||||
| models_tflite_awaretraining_config=${basepath}/models_tflite_awaretraining.cfg | models_tflite_awaretraining_config=${basepath}/models_tflite_awaretraining.cfg | ||||
| models_tflite_posttraining_config=${basepath}/models_tflite_posttraining.cfg | models_tflite_posttraining_config=${basepath}/models_tflite_posttraining.cfg | ||||
| models_caffe_posttraining_config=${basepath}/models_caffe_posttraining.cfg | models_caffe_posttraining_config=${basepath}/models_caffe_posttraining.cfg | ||||
| @@ -105,6 +105,31 @@ int AnfExporter::SetPostTrainOutputTensorType(const std::unique_ptr<schema::Meta | |||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| static STATUS CompressTensor(schema::TensorT *tensor_input, const std::unique_ptr<schema::CNodeT> &dst_node) { | |||||
| if (!tensor_input->quantParams.empty() && tensor_input->quantParams.front()->inited) { | |||||
| int bit_num = tensor_input->quantParams.at(0)->numBits; | |||||
| // Pack Repetition | |||||
| auto repetition_packed = false; | |||||
| MS_LOG(ERROR) << dst_node->name; | |||||
| if (dst_node->quantType == schema::QuantType_QUANT_WEIGHT) { | |||||
| if (bit_num <= 8) { | |||||
| repetition_packed = PackRepetition<int8_t>(bit_num, tensor_input); | |||||
| } else { | |||||
| repetition_packed = PackRepetition<int16_t>(bit_num, tensor_input); | |||||
| } | |||||
| } | |||||
| if (bit_num != 8 && bit_num != 16 && !repetition_packed) { | |||||
| auto status = DoBitPack(bit_num, tensor_input); | |||||
| if (status != RET_OK) { | |||||
| MS_LOG(ERROR) << "do bit pack failed. " << status; | |||||
| return RET_ERROR; | |||||
| } | |||||
| } | |||||
| } | |||||
| return RET_OK; | |||||
| } | |||||
| int AnfExporter::ConvertQuantParam(const std::unique_ptr<schema::MetaGraphT> &meta_graph, | int AnfExporter::ConvertQuantParam(const std::unique_ptr<schema::MetaGraphT> &meta_graph, | ||||
| const std::shared_ptr<mindspore::Primitive> &primitive, | const std::shared_ptr<mindspore::Primitive> &primitive, | ||||
| const std::unique_ptr<schema::CNodeT> &dst_node) { | const std::unique_ptr<schema::CNodeT> &dst_node) { | ||||
| @@ -146,16 +171,9 @@ int AnfExporter::ConvertQuantParam(const std::unique_ptr<schema::MetaGraphT> &me | |||||
| tensor_input->quantParams.emplace_back(std::move(input_quant_param_ptr)); | tensor_input->quantParams.emplace_back(std::move(input_quant_param_ptr)); | ||||
| } | } | ||||
| } | } | ||||
| if (!tensor_input->quantParams.empty()) { | |||||
| int bit_num = tensor_input->quantParams.at(0)->numBits; | |||||
| if (bit_num != 8 && bit_num != 16) { | |||||
| auto status = DoBitPack(bit_num, tensor_input); | |||||
| if (status != RET_OK) { | |||||
| MS_LOG(ERROR) << "do bit pack failed. " << status; | |||||
| return RET_ERROR; | |||||
| } | |||||
| } | |||||
| if (CompressTensor(tensor_input, dst_node) != RET_OK) { | |||||
| MS_LOG(ERROR) << "CompressTensor error"; | |||||
| return RET_ERROR; | |||||
| } | } | ||||
| } | } | ||||
| @@ -694,5 +694,20 @@ std::vector<int> GetTransposePerm(MetaGraphT *graph, const std::unique_ptr<CNode | |||||
| } | } | ||||
| return perm; | return perm; | ||||
| } | } | ||||
| std::string BoolVectorToString(const std::vector<bool> &bool_vec) { | |||||
| size_t size_in_byte = ceil(bool_vec.size() / 8.0); | |||||
| std::string str(size_in_byte, '\0'); | |||||
| auto iter = str.begin(); | |||||
| size_t shift = 8; | |||||
| for (bool bit : bool_vec) { | |||||
| *iter |= bit << (shift - 1); | |||||
| if (--shift == 0) { | |||||
| iter++; | |||||
| shift = 8; | |||||
| } | |||||
| } | |||||
| return str; | |||||
| } | |||||
| } // namespace lite | } // namespace lite | ||||
| } // namespace mindspore | } // namespace mindspore | ||||
| @@ -23,6 +23,12 @@ | |||||
| #include <string> | #include <string> | ||||
| #include <memory> | #include <memory> | ||||
| #include <vector> | #include <vector> | ||||
| #include <map> | |||||
| #include <set> | |||||
| #include <algorithm> | |||||
| #include <numeric> | |||||
| #include <limits> | |||||
| #include <functional> | |||||
| #include "include/errorcode.h" | #include "include/errorcode.h" | ||||
| #include "schema/inner/model_generated.h" | #include "schema/inner/model_generated.h" | ||||
| #include "src/common/graph_util.h" | #include "src/common/graph_util.h" | ||||
| @@ -95,6 +101,210 @@ STATUS SetSubgraphTensorIndices(schema::MetaGraphT *meta_graphT); | |||||
| std::string GetModelName(const std::string &modelFile); | std::string GetModelName(const std::string &modelFile); | ||||
| std::vector<int> GetTransposePerm(schema::MetaGraphT *graph, const std::unique_ptr<schema::CNodeT> &cnode); | std::vector<int> GetTransposePerm(schema::MetaGraphT *graph, const std::unique_ptr<schema::CNodeT> &cnode); | ||||
| std::string BoolVectorToString(const std::vector<bool> &bool_vec); | |||||
| template <typename T> | |||||
| bool IndexingCompress(const std::set<T> &quant_data_set, const std::map<T, size_t> &unique_value_index_map, | |||||
| size_t unique_value_bit, size_t unique_value_cnt, size_t pack_repetition_size_in_byte, | |||||
| size_t bit_num, schema::TensorT *tensor) { | |||||
| auto quant_data_array = reinterpret_cast<T *>(tensor->data.data()); | |||||
| std::vector<T> quant_data(quant_data_array, quant_data_array + tensor->data.size() / sizeof(T)); | |||||
| std::vector<bool> bits(pack_repetition_size_in_byte * 8); | |||||
| size_t index = 0; | |||||
| // write unique_value_cnt: bit_num bit for unsigned | |||||
| for (size_t i = 0; i < bit_num; i++) { | |||||
| bits[index++] = (unique_value_cnt >> (bit_num - i - 1)) & (0x1); | |||||
| } | |||||
| // write the unique value set: each value has bit_num bit signed | |||||
| for (auto unique_value : quant_data_set) { | |||||
| for (size_t i = 0; i < bit_num; i++) { | |||||
| bits[index++] = ((unique_value + (1 << (bit_num - 1))) >> (bit_num - i - 1)) & (0x1); | |||||
| } | |||||
| } | |||||
| // write the index: each index has unique_value_bit unsigned | |||||
| for (auto quant_value : quant_data) { | |||||
| for (size_t i = 0; i < unique_value_bit; i++) { | |||||
| bits[index++] = (unique_value_index_map.at(quant_value) >> (unique_value_bit - i - 1)) & (0x1); | |||||
| } | |||||
| } | |||||
| if (index > pack_repetition_size_in_byte * 8) { | |||||
| MS_LOG(ERROR) << "unexpected index: " << index << " should not greater than " << pack_repetition_size_in_byte * 8; | |||||
| return false; | |||||
| } | |||||
| // update tensor data | |||||
| auto new_data_str = BoolVectorToString(bits); | |||||
| auto ret = memcpy_s(tensor->data.data(), tensor->data.size(), new_data_str.c_str(), new_data_str.size()); | |||||
| if (ret != EOK) { | |||||
| MS_LOG(ERROR) << "memcpy error"; | |||||
| return false; | |||||
| } | |||||
| tensor->data.resize(new_data_str.size()); | |||||
| tensor->weightQunatCompressType = schema::WeightQunatCompressType_INDEXING; | |||||
| MS_LOG(ERROR) << "set WeightQunatCompressType_INDEXING"; | |||||
| return true; | |||||
| } | |||||
| template <typename T> | |||||
| bool SparsityCompress(const std::set<T> &quant_data_set, const std::map<T, size_t> &unique_value_index_map, | |||||
| size_t unique_value_bit, size_t unique_value_cnt, size_t pack_sparsity_size_in_byte, | |||||
| size_t nz_cnt, size_t coor_best_bit, size_t bit_num, schema::TensorT *tensor) { | |||||
| auto quant_data_array = reinterpret_cast<T *>(tensor->data.data()); | |||||
| std::vector<T> quant_data(quant_data_array, quant_data_array + tensor->data.size() / sizeof(T)); | |||||
| auto &quant_params = tensor->quantParams; | |||||
| auto elem_cnt = quant_data.size(); | |||||
| auto channel_cnt = quant_params.size(); | |||||
| auto elem_perchannel = elem_cnt / channel_cnt; | |||||
| std::vector<bool> bits(pack_sparsity_size_in_byte * 8); | |||||
| int index = 0; | |||||
| // coor_best_bit | |||||
| for (size_t i = 0; i < 8; i++) { | |||||
| bits[index++] = (coor_best_bit >> (8 - i - 1)) & 0x1; | |||||
| } | |||||
| // nz_cnt | |||||
| for (size_t i = 0; i < 32; i++) { | |||||
| bits[index++] = (nz_cnt >> (32 - i - 1)) & 0x1; | |||||
| } | |||||
| // unique_value cnt | |||||
| for (size_t i = 0; i < bit_num; i++) { | |||||
| bits[index++] = (unique_value_cnt >> (bit_num - i - 1)) & 0x1; | |||||
| } | |||||
| // unique_values | |||||
| for (auto unique_value : quant_data_set) { | |||||
| for (size_t i = 0; i < bit_num; i++) { | |||||
| bits[index++] = ((unique_value + (1 << (bit_num - 1))) >> (bit_num - i - 1)) & (0x1); | |||||
| } | |||||
| } | |||||
| // nz values indexing && get coor | |||||
| std::vector<size_t> coors(nz_cnt); | |||||
| int coors_index = 0; | |||||
| int prev_index = -1; | |||||
| for (int di = 0; (unsigned int)di < elem_cnt; di++) { | |||||
| auto cur_channel = di / elem_perchannel; | |||||
| auto zp = quant_params[cur_channel]->zeroPoint; | |||||
| auto nz_value = quant_data[di]; | |||||
| if (nz_value != zp || (di - prev_index) >= (1 << coor_best_bit)) { | |||||
| MS_ASSERT(coors_index < nz_cnt); | |||||
| coors[coors_index++] = di - prev_index - 1; | |||||
| prev_index = di; | |||||
| for (size_t i = 0; i < unique_value_bit; i++) { | |||||
| bits[index++] = (unique_value_index_map.at(nz_value) >> (unique_value_bit - i - 1)) & (0x1); | |||||
| } | |||||
| } | |||||
| } | |||||
| // write coor | |||||
| for (auto coor : coors) { | |||||
| for (size_t i = 0; i < coor_best_bit; i++) { | |||||
| bits[index++] = (coor >> (coor_best_bit - i - 1)) & 0x1; | |||||
| } | |||||
| } | |||||
| if ((unsigned int)index > pack_sparsity_size_in_byte * 8) { | |||||
| MS_LOG(ERROR) << "unexpected index: " << index << " should not greater than " << pack_sparsity_size_in_byte * 8; | |||||
| return false; | |||||
| } | |||||
| auto new_data_str = BoolVectorToString(bits); | |||||
| auto ret = memcpy_s(tensor->data.data(), tensor->data.size(), new_data_str.c_str(), new_data_str.size()); | |||||
| if (ret != EOK) { | |||||
| MS_LOG(ERROR) << "memcpy error"; | |||||
| return false; | |||||
| } | |||||
| tensor->data.resize(new_data_str.size()); | |||||
| tensor->weightQunatCompressType = schema::WeightQunatCompressType_SPARSE; | |||||
| MS_LOG(ERROR) << "set WeightQunatCompressType_SPARSITY"; | |||||
| return true; | |||||
| } | |||||
| template <typename T> | |||||
| size_t CalCoorBestBit(const std::vector<T> &quant_data, size_t elem_cnt, | |||||
| const std::vector<std::unique_ptr<schema::QuantParamT>> &quant_params, int unique_value_bit, | |||||
| size_t *coor_best_bit) { | |||||
| size_t best_nn_cnt = 0; | |||||
| size_t min_len_in_bit = std::numeric_limits<size_t>::max(); | |||||
| for (int bit = 2; bit <= 10; bit++) { | |||||
| // search | |||||
| size_t nn_cnt = 0; | |||||
| int prev_index = -1; | |||||
| auto channel_cnt = quant_params.size(); | |||||
| auto elem_perchannel = elem_cnt / channel_cnt; | |||||
| for (int i = 0; (unsigned int)i < elem_cnt; i++) { | |||||
| auto cur_channel = i / elem_perchannel; | |||||
| auto zp = quant_params[cur_channel]->zeroPoint; | |||||
| if (quant_data[i] != zp || (i - prev_index) >= (1 << bit)) { | |||||
| nn_cnt++; | |||||
| prev_index = i; | |||||
| } | |||||
| } | |||||
| size_t len_in_bit = nn_cnt * bit + nn_cnt * unique_value_bit; | |||||
| if (len_in_bit < min_len_in_bit) { | |||||
| min_len_in_bit = len_in_bit; | |||||
| *coor_best_bit = bit; | |||||
| best_nn_cnt = nn_cnt; | |||||
| } | |||||
| } | |||||
| return best_nn_cnt; | |||||
| } | |||||
| template <typename T> | |||||
| bool PackRepetition(size_t bit_num, schema::TensorT *tensor) { | |||||
| auto quant_data_array = reinterpret_cast<T *>(tensor->data.data()); | |||||
| std::vector<T> quant_data(quant_data_array, quant_data_array + tensor->data.size() / sizeof(T)); | |||||
| auto elem_cnt = quant_data.size(); | |||||
| auto dims = tensor->dims; | |||||
| size_t elem_cnt_by_dims = std::accumulate(dims.begin(), dims.end(), 1, std::multiplies<>()); | |||||
| if (elem_cnt != elem_cnt_by_dims) { | |||||
| MS_LOG(ERROR) << "elem_cnt: " << elem_cnt << " not equal: " << elem_cnt_by_dims; | |||||
| return false; | |||||
| } | |||||
| auto &quant_params = tensor->quantParams; | |||||
| std::set<T> quant_data_set; | |||||
| for (auto quant_value : quant_data) { | |||||
| quant_data_set.insert(quant_value); | |||||
| } | |||||
| std::map<T, size_t> unique_value_index_map; | |||||
| auto index = 0; | |||||
| for (auto value : quant_data_set) { | |||||
| unique_value_index_map[value] = index++; | |||||
| } | |||||
| auto unique_value_cnt = quant_data_set.size(); | |||||
| size_t unique_value_bit = ceil(log2(unique_value_cnt)); | |||||
| auto pack_repetition_size_in_bit = bit_num + bit_num * unique_value_cnt + unique_value_bit * elem_cnt; | |||||
| size_t pack_repetition_size_in_byte = ceil(pack_repetition_size_in_bit / 8.0); | |||||
| size_t origin_size_in_byte = ceil(bit_num * elem_cnt / 8.0); | |||||
| size_t coor_best_bit = 0; | |||||
| auto nz_cnt = CalCoorBestBit<T>(quant_data, elem_cnt, quant_params, unique_value_bit, &coor_best_bit); | |||||
| // 1. coor_best_bit 2. nz_cnt 3. quant_data_set size 4. unique_values 5. unique_value indexing 6. nz values coord | |||||
| auto pack_sparsity_size_in_bit = | |||||
| 1 * 8 + 4 * 8 + bit_num + bit_num * unique_value_cnt + unique_value_bit * nz_cnt + nz_cnt * coor_best_bit; | |||||
| size_t pack_sparsity_size_in_byte = ceil(pack_sparsity_size_in_bit / 8.0); | |||||
| MS_LOG(ERROR) << "coor_best_bit: " << coor_best_bit << " ori: " << origin_size_in_byte | |||||
| << " indexing: " << pack_repetition_size_in_byte << " sparse: " << pack_sparsity_size_in_byte; | |||||
| auto min_byte_need = std::min({origin_size_in_byte, pack_repetition_size_in_byte, pack_sparsity_size_in_byte}); | |||||
| if (min_byte_need == origin_size_in_byte) { | |||||
| return false; | |||||
| } else if (min_byte_need == pack_repetition_size_in_byte) { | |||||
| MS_LOG(ERROR) << "from " << origin_size_in_byte << " to " << pack_repetition_size_in_byte; | |||||
| return IndexingCompress<T>(quant_data_set, unique_value_index_map, unique_value_bit, unique_value_cnt, | |||||
| pack_repetition_size_in_byte, bit_num, tensor); | |||||
| } else if (min_byte_need == pack_sparsity_size_in_byte) { | |||||
| MS_LOG(ERROR) << "from " << origin_size_in_byte << " to " << pack_sparsity_size_in_byte; | |||||
| return SparsityCompress<T>(quant_data_set, unique_value_index_map, unique_value_bit, unique_value_cnt, | |||||
| pack_sparsity_size_in_byte, nz_cnt, coor_best_bit, bit_num, tensor); | |||||
| } else { | |||||
| MS_LOG(ERROR) << "unexpected: " << min_byte_need << " not in {" << origin_size_in_byte << " " | |||||
| << pack_repetition_size_in_byte << " " << pack_sparsity_size_in_byte << "}"; | |||||
| } | |||||
| return false; | |||||
| } | |||||
| } // namespace lite | } // namespace lite | ||||
| } // namespace mindspore | } // namespace mindspore | ||||
| @@ -311,7 +311,9 @@ STATUS CalQuantizationParams(schema::QuantParamT *quantParam, double mMin, doubl | |||||
| } | } | ||||
| const double zeroPointFromMin = quantMinFloat - mMin / scale; | const double zeroPointFromMin = quantMinFloat - mMin / scale; | ||||
| int zeroPoint = static_cast<int32_t>(std::round(zeroPointFromMin)); | int zeroPoint = static_cast<int32_t>(std::round(zeroPointFromMin)); | ||||
| if (scale < SCALE_THREASHOLD) { | |||||
| zeroPoint = 0; | |||||
| } | |||||
| // The zero point should always be in the range of quantized value, | // The zero point should always be in the range of quantized value, | ||||
| // [qmin, qmax]. | // [qmin, qmax]. | ||||
| MS_ASSERT(zeroPoint >= quantMin); | MS_ASSERT(zeroPoint >= quantMin); | ||||
| @@ -47,7 +47,7 @@ | |||||
| namespace mindspore::lite::quant { | namespace mindspore::lite::quant { | ||||
| static constexpr size_t UINT8_QUANTIZATION = 8; | static constexpr size_t UINT8_QUANTIZATION = 8; | ||||
| static constexpr size_t WEIGHT_INDEX = 1; | static constexpr size_t WEIGHT_INDEX = 1; | ||||
| static constexpr double SCALE_THREASHOLD = 1e-38; | |||||
| const char kMethodMaxMin[] = "MAX_MIN"; | const char kMethodMaxMin[] = "MAX_MIN"; | ||||
| const char kMethodKL[] = "KL"; | const char kMethodKL[] = "KL"; | ||||
| const char kMethodOutlier[] = "RemovalOutlier"; | const char kMethodOutlier[] = "RemovalOutlier"; | ||||
| @@ -163,7 +163,9 @@ T QuantizeData(float originData, const schema::QuantParamT &quantParam, int quan | |||||
| const auto narrowRange = quantParam.narrowRange; | const auto narrowRange = quantParam.narrowRange; | ||||
| const int maxLimit = quant_max; | const int maxLimit = quant_max; | ||||
| const int minLimit = quant_min; | const int minLimit = quant_min; | ||||
| if (scale <= SCALE_THREASHOLD) { | |||||
| return 0; | |||||
| } | |||||
| return [maxLimit, minLimit, zeroPoint, scale, narrowRange, originData] { | return [maxLimit, minLimit, zeroPoint, scale, narrowRange, originData] { | ||||
| auto quant_data = std::round(originData / scale + zeroPoint); | auto quant_data = std::round(originData / scale + zeroPoint); | ||||
| if (quant_data > maxLimit) { | if (quant_data > maxLimit) { | ||||