From 2e7b25f54d1c78856381d322157f53ac4ca73cb0 Mon Sep 17 00:00:00 2001 From: xutianchun Date: Wed, 21 Apr 2021 17:37:44 +0800 Subject: [PATCH] weight quant add index pack storage --- mindspore/lite/schema/model.fbs | 7 + mindspore/lite/src/lite_session.cc | 6 + mindspore/lite/src/weight_decoder.cc | 161 ++++++++++++++ mindspore/lite/src/weight_decoder.h | 90 ++++++++ .../lite/test/models_caffe_weightquant.cfg | 1 + mindspore/lite/test/run_benchmark_nets.sh | 36 +++ .../lite/tools/anf_exporter/anf_exporter.cc | 38 +++- mindspore/lite/tools/common/graph_util.cc | 15 ++ mindspore/lite/tools/common/graph_util.h | 210 ++++++++++++++++++ .../converter/quantizer/quantize_util.cc | 4 +- .../tools/converter/quantizer/quantize_util.h | 6 +- 11 files changed, 561 insertions(+), 13 deletions(-) create mode 100644 mindspore/lite/test/models_caffe_weightquant.cfg diff --git a/mindspore/lite/schema/model.fbs b/mindspore/lite/schema/model.fbs index 22769fcc72..bcdfa8ece8 100644 --- a/mindspore/lite/schema/model.fbs +++ b/mindspore/lite/schema/model.fbs @@ -38,6 +38,12 @@ table QuantParam { multiplier: int = 1; // calculate fixed point multiplier method } +enum WeightQunatCompressType: int { + NONE, + INDEXING, + SPARSE +} + table Tensor { nodeType: int; // data type @@ -52,6 +58,7 @@ table Tensor { quantClusters: [float]; name: string; enableHuffmanCode: bool = false; + weightQunatCompressType: WeightQunatCompressType = NONE; } enum QuantType: int { diff --git a/mindspore/lite/src/lite_session.cc b/mindspore/lite/src/lite_session.cc index bb0883791e..c696f55266 100644 --- a/mindspore/lite/src/lite_session.cc +++ b/mindspore/lite/src/lite_session.cc @@ -45,6 +45,12 @@ namespace lite { namespace { int DecompressTensor(const schema::Tensor &src_tensor, Tensor *dst_tensor) { MS_ASSERT(dst_tensor != nullptr); + if (src_tensor.weightQunatCompressType() == schema::WeightQunatCompressType_INDEXING) { + return IndexingDecompress(src_tensor, dst_tensor); + } else if (src_tensor.weightQunatCompressType() == schema::WeightQunatCompressType_SPARSE) { + return SparseDecompress(src_tensor, dst_tensor); + } + bool need_bit_unpack = src_tensor.quantParams() != nullptr && src_tensor.quantParams()->size() > 0 && src_tensor.quantParams()->Get(0) != nullptr && src_tensor.quantParams()->Get(0)->inited(); if (need_bit_unpack) { diff --git a/mindspore/lite/src/weight_decoder.cc b/mindspore/lite/src/weight_decoder.cc index f97ecdfd20..1e396c50b7 100644 --- a/mindspore/lite/src/weight_decoder.cc +++ b/mindspore/lite/src/weight_decoder.cc @@ -20,6 +20,167 @@ #include "src/huffman_decode.h" namespace mindspore::lite { +std::vector StringToBitVector(const std::string &str) { + std::vector vec(str.size() * 8); + size_t index = 0; + for (auto ch : str) { + for (size_t shift = 8; shift > 0; shift--) { + vec[index++] = (ch >> (shift - 1)) & 0x1; + } + } + return vec; +} + +STATUS IndexingDecompress(const schema::Tensor &src_tensor, Tensor *dst_tensor) { + MS_LOG(ERROR) << "un-index weight"; + auto bit_num = src_tensor.quantParams()->Get(0)->numBits(); + + std::string str(reinterpret_cast(src_tensor.data()->data()), src_tensor.data()->size()); + auto bit_vec = StringToBitVector(str); + size_t index = 0; + // parse unique_value_cnt + size_t unique_value_cnt = 0; + for (int i = 0; i < bit_num; i++) { + bool bit = bit_vec[index++]; + unique_value_cnt |= bit << (bit_num - i - 1); + } + if (unique_value_cnt == 0) { + unique_value_cnt = 1 << bit_num; + } + // parse unique_value_set; + std::vector unique_values; + for (size_t i = 0; i < unique_value_cnt; i++) { + int unique_value = 0; + for (int j = 0; j < bit_num; j++) { + bool bit = bit_vec[index++]; + unique_value |= bit << (bit_num - j - 1); + } + // unsigned to signed + unique_values.push_back(unique_value - (1 << (bit_num - 1))); + } + // parse index + std::vector unique_value_index_vec; + auto elem_cnt = dst_tensor->ElementsNum(); + size_t unique_value_bit = ceil(log2(unique_value_cnt)); + for (int i = 0; i < elem_cnt; i++) { + size_t unique_value_index = 0; + for (size_t j = 0; j < unique_value_bit; j++) { + bool bit = bit_vec[index++]; + unique_value_index |= bit << (unique_value_bit - j - 1); + } + unique_value_index_vec.push_back(unique_value_index); + } + + if (dst_tensor->data_c() != nullptr) { + MS_LOG(ERROR) << "data_c not null"; + return RET_ERROR; + } + auto ret = dst_tensor->MallocData(); + if (ret != RET_OK) { + MS_LOG(ERROR) << "Malloc tensor data failed"; + return RET_NULL_PTR; + } + auto dst_data = dst_tensor->data_c(); + if (bit_num <= 8) { + ret = UnIndexTensorData(unique_values, unique_value_index_vec, dst_data, dst_tensor->Size()); + } else { + ret = UnIndexTensorData(unique_values, unique_value_index_vec, dst_data, dst_tensor->Size()); + } + if (ret != RET_OK) { + MS_LOG(ERROR) << "UnIndexTensorData error"; + return RET_ERROR; + } + return RET_OK; +} + +STATUS SparseDecompress(const schema::Tensor &src_tensor, Tensor *dst_tensor) { + MS_LOG(ERROR) << "un-sparse weight"; + size_t bit_num = src_tensor.quantParams()->Get(0)->numBits(); + + std::string str(reinterpret_cast(src_tensor.data()->data()), src_tensor.data()->size()); + auto bit_vec = StringToBitVector(str); + size_t index = 0; + // parse coor_best_bit + size_t coor_best_bit = 0; + for (size_t i = 0; i < 8; i++) { + bool bit = bit_vec[index++]; + coor_best_bit |= bit << (8 - i - 1); + } + // parse nz_cnt + size_t nz_cnt = 0; + for (size_t i = 0; i < 32; i++) { + bool bit = bit_vec[index++]; + nz_cnt |= bit << (32 - i - 1); + } + // parse unique_value cnt + size_t unique_value_cnt = 0; + for (size_t i = 0; i < bit_num; i++) { + bool bit = bit_vec[index++]; + unique_value_cnt |= bit << (bit_num - i - 1); + } + if (unique_value_cnt == 0) { + unique_value_cnt = 1 << bit_num; + } + // parse unique_values + std::vector unique_values; + for (size_t i = 0; i < unique_value_cnt; i++) { + int unique_value = 0; + for (size_t j = 0; j < bit_num; j++) { + bool bit = bit_vec[index++]; + unique_value |= bit << (bit_num - j - 1); + } + // unsigned to signed + unique_values.push_back(unique_value - (1 << (bit_num - 1))); + } + // parse index + std::vector unique_value_index_vec; + auto elem_cnt = dst_tensor->ElementsNum(); + size_t unique_value_bit = ceil(log2(unique_value_cnt)); + for (size_t i = 0; i < nz_cnt; i++) { + size_t unique_value_index = 0; + for (size_t j = 0; j < unique_value_bit; j++) { + bool bit = bit_vec[index++]; + unique_value_index |= bit << (unique_value_bit - j - 1); + } + unique_value_index_vec.push_back(unique_value_index); + } + + // parse coors + std::vector coor_vec; + for (size_t i = 0; i < nz_cnt; i++) { + size_t coor = 0; + for (size_t j = 0; j < coor_best_bit; j++) { + bool bit = bit_vec[index++]; + coor |= bit << (coor_best_bit - j - 1); + } + coor_vec.push_back(coor); + } + + if (dst_tensor->data_c() != nullptr) { + MS_LOG(ERROR) << "data_c not null"; + return RET_ERROR; + } + auto ret = dst_tensor->MallocData(); + if (ret != RET_OK) { + MS_LOG(ERROR) << "Malloc tensor data failed"; + return RET_NULL_PTR; + } + auto dst_data = dst_tensor->data_c(); + + if (bit_num <= 8) { + ret = UnSparseTensorData(unique_values, unique_value_index_vec, coor_vec, src_tensor.quantParams(), + elem_cnt, coor_best_bit, dst_data, dst_tensor->Size()); + } else { + ret = UnSparseTensorData(unique_values, unique_value_index_vec, coor_vec, src_tensor.quantParams(), + elem_cnt, coor_best_bit, dst_data, dst_tensor->Size()); + } + if (ret != RET_OK) { + MS_LOG(ERROR) << "UnSparseTensorData error"; + return RET_ERROR; + } + return RET_OK; +} + int WeightDecoder::DequantWeight(lite::Tensor *input_tensor, bool channel_first, TypeId dst_data_type) { MS_ASSERT(input_tensor != nullptr); if (input_tensor->data_type() != kNumberTypeInt8 && input_tensor->data_type() != kNumberTypeInt16) { diff --git a/mindspore/lite/src/weight_decoder.h b/mindspore/lite/src/weight_decoder.h index 05a8d593df..6a564a0cb8 100644 --- a/mindspore/lite/src/weight_decoder.h +++ b/mindspore/lite/src/weight_decoder.h @@ -21,6 +21,8 @@ #include #include #include +#include +#include #include #include "nnacl/matmul_parameter.h" #include "src/lite_kernel.h" @@ -30,6 +32,94 @@ static constexpr int kPerTensor = 1; namespace mindspore::lite { + +template +STATUS UnIndexTensorData(const std::vector &unique_values, const std::vector &indices, void *dst_data, + size_t dst_data_size) { + std::vector un_indexed_data; + for (auto index : indices) { + if (index >= unique_values.size()) { + MS_LOG(ERROR) << "index: " << index << " size: " << unique_values.size(); + return RET_ERROR; + } + if (unique_values[index] > std::numeric_limits::max() || unique_values[index] < std::numeric_limits::min()) { + MS_LOG(ERROR) << "data: " << unique_values[index] << " max: " << std::numeric_limits::max() + << " min: " << std::numeric_limits::min(); + return RET_ERROR; + } + un_indexed_data.push_back(static_cast(unique_values[index])); + } + if (un_indexed_data.size() * sizeof(T) != dst_data_size) { + MS_LOG(ERROR) << "un idnexed data size: " << un_indexed_data.size() * sizeof(T) + << " expected by tensor: " << dst_data_size; + return false; + } + memcpy(dst_data, un_indexed_data.data(), un_indexed_data.size() * sizeof(T)); + + return RET_OK; +} + +template +STATUS UnSparseTensorData(const std::vector &unique_values, const std::vector &indices, + const std::vector &coors, + const flatbuffers::Vector> *quant_params, + size_t elem_cnt, size_t coor_best_bit, void *dst_data, size_t dst_data_size) { + std::vector un_sparsed_data; + size_t data_index = 0; + auto nz_cnt = indices.size(); + MS_ASSERT(nz_cnt == coors.size()); + auto channel_cnt = quant_params->size(); + auto elem_perchannel = elem_cnt / channel_cnt; + for (size_t i = 0; i < nz_cnt; i++) { + auto index = indices[i]; + if (index >= unique_values.size()) { + MS_LOG(ERROR) << "index: " << index << " size: " << unique_values.size(); + return RET_ERROR; + } + auto nz = unique_values[index]; + if (nz > std::numeric_limits::max() || nz < std::numeric_limits::min()) { + MS_LOG(ERROR) << "data: " << nz << " max: " << std::numeric_limits::max() + << " min: " << std::numeric_limits::min(); + return RET_ERROR; + } + auto coor = coors[i]; + auto cur_channel = data_index / elem_perchannel; + auto zp = quant_params->Get(cur_channel)->zeroPoint(); + for (size_t j = 0; j < coor; j++) { + un_sparsed_data.push_back(zp); + data_index++; + } + un_sparsed_data.push_back(static_cast(unique_values[index])); + data_index++; + } + if (un_sparsed_data.size() * sizeof(T) > dst_data_size) { + MS_LOG(ERROR) << "un-sparsed data size: " << un_sparsed_data.size() * sizeof(T) + << " tensor size: " << dst_data_size; + return false; + } else if (un_sparsed_data.size() * sizeof(T) < dst_data_size && + (un_sparsed_data.size() + (1 << coor_best_bit) - 1) * sizeof(T) < dst_data_size) { + MS_LOG(ERROR) << "un-sparsed data size: " << un_sparsed_data.size() * sizeof(T) << " tensor size: " << dst_data_size + << " coor_best_bit: " << coor_best_bit; + return false; + } + + for (; data_index < dst_data_size / sizeof(T); data_index++) { + auto cur_channel = data_index / elem_perchannel; + auto zp = quant_params->Get(cur_channel)->zeroPoint(); + un_sparsed_data.push_back(static_cast(zp)); + } + + memcpy(dst_data, un_sparsed_data.data(), un_sparsed_data.size() * sizeof(T)); + + return RET_OK; +} + +std::vector StringToBitVector(const std::string &str); + +STATUS SparseDecompress(const schema::Tensor &src_tensor, Tensor *dst_tensor); + +STATUS IndexingDecompress(const schema::Tensor &src_tensor, Tensor *dst_tensor); + class WeightDecoder { public: static int UnPackToInt(const schema::Tensor &src_tensor, lite::Tensor *dst_tensor); diff --git a/mindspore/lite/test/models_caffe_weightquant.cfg b/mindspore/lite/test/models_caffe_weightquant.cfg new file mode 100644 index 0000000000..212dc05f67 --- /dev/null +++ b/mindspore/lite/test/models_caffe_weightquant.cfg @@ -0,0 +1 @@ +ml_segmentation_matting 130 diff --git a/mindspore/lite/test/run_benchmark_nets.sh b/mindspore/lite/test/run_benchmark_nets.sh index 0b57f3bc38..e858ea29a0 100644 --- a/mindspore/lite/test/run_benchmark_nets.sh +++ b/mindspore/lite/test/run_benchmark_nets.sh @@ -182,6 +182,23 @@ function Run_Converter() { fi done < ${models_tflite_weightquant_config} + # Convert caffe weightquant models: + while read line; do + weight_quant_line_info=${line} + if [[ ${weight_quant_line_info} == \#* ]]; then + continue + fi + model_name=`echo ${weight_quant_line_info}|awk -F ' ' '{print $1}'` + echo ${model_name} >> "${run_converter_log_file}" + echo './converter_lite --fmk=CAFFE --modelFile='${models_path}'/'${model_name}'.prototxt --weightFile='${models_path}'/'${model_name}'.caffemodel --outputFile='${ms_models_path}'/'${model_name}_weightquant' --quantType=WeightQuant --bitNum=8 --quantWeightChannel=0' >> "${run_converter_log_file}" + ./converter_lite --fmk=CAFFE --modelFile=${models_path}/${model_name}.prototxt --weightFile=${models_path}/${model_name}.caffemodel --outputFile=${ms_models_path}/${model_name}_weightquant --quantType=WeightQuant --bitNum=8 --quantWeightChannel=0 + if [ $? = 0 ]; then + converter_result='converter caffe_weight_quant '${model_name}' pass';echo ${converter_result} >> ${run_converter_result_file} + else + converter_result='converter caffe_weight_quant '${model_name}' failed';echo ${converter_result} >> ${run_converter_result_file};return 1 + fi + done < ${models_caffe_weightquant_config} + # Convert mindir weightquant models: while read line; do weight_quant_line_info=${line} @@ -595,6 +612,24 @@ function Run_x86() { fi done < ${models_tflite_weightquant_config} + # Run caffe weightquant converted models: + while read line; do + weight_quant_line_info=${line} + if [[ ${weight_quant_line_info} == \#* ]]; then + continue + fi + model_name=`echo ${weight_quant_line_info}|awk -F ' ' '{print $1}'` + accuracy_limit=`echo ${weight_quant_line_info}|awk -F ' ' '{print $2}'` + echo ${model_name} >> "${run_x86_log_file}" + echo './benchmark --modelFile='${ms_models_path}'/'${model_name}'.ms --inDataFile='${models_path}'/input_output/input/'${model_name}'.ms.bin --benchmarkDataFile='${models_path}'/input_output/output/'${model_name}'.ms.out --accuracyThreshold=${accuracy_limit}' >> "${run_x86_log_file}" + ./benchmark --modelFile=${ms_models_path}/${model_name}_weightquant.ms --inDataFile=${models_path}/input_output/input/${model_name}.ms.bin --benchmarkDataFile=${models_path}/input_output/output/${model_name}.ms.out --accuracyThreshold=${accuracy_limit}>> "${run_x86_log_file}" + if [ $? = 0 ]; then + run_result='x86: '${model_name}_weightquant' pass'; echo ${run_result} >> ${run_benchmark_result_file} + else + run_result='x86: '${model_name}_weightquant' failed'; echo ${run_result} >> ${run_benchmark_result_file}; return 1 + fi + done < ${models_caffe_weightquant_config} + # Run tf weightquant converted models: while read line; do weight_quant_line_info=${line} @@ -2423,6 +2458,7 @@ version=${file_name_array[2]} models_tflite_config=${basepath}/models_tflite.cfg models_tf_config=${basepath}/models_tf.cfg models_caffe_config=${basepath}/models_caffe.cfg +models_caffe_weightquant_config=${basepath}/models_caffe_weightquant.cfg models_tflite_awaretraining_config=${basepath}/models_tflite_awaretraining.cfg models_tflite_posttraining_config=${basepath}/models_tflite_posttraining.cfg models_caffe_posttraining_config=${basepath}/models_caffe_posttraining.cfg diff --git a/mindspore/lite/tools/anf_exporter/anf_exporter.cc b/mindspore/lite/tools/anf_exporter/anf_exporter.cc index bf01fa9146..c8f8af40de 100644 --- a/mindspore/lite/tools/anf_exporter/anf_exporter.cc +++ b/mindspore/lite/tools/anf_exporter/anf_exporter.cc @@ -105,6 +105,31 @@ int AnfExporter::SetPostTrainOutputTensorType(const std::unique_ptr &dst_node) { + if (!tensor_input->quantParams.empty() && tensor_input->quantParams.front()->inited) { + int bit_num = tensor_input->quantParams.at(0)->numBits; + // Pack Repetition + auto repetition_packed = false; + MS_LOG(ERROR) << dst_node->name; + if (dst_node->quantType == schema::QuantType_QUANT_WEIGHT) { + if (bit_num <= 8) { + repetition_packed = PackRepetition(bit_num, tensor_input); + } else { + repetition_packed = PackRepetition(bit_num, tensor_input); + } + } + + if (bit_num != 8 && bit_num != 16 && !repetition_packed) { + auto status = DoBitPack(bit_num, tensor_input); + if (status != RET_OK) { + MS_LOG(ERROR) << "do bit pack failed. " << status; + return RET_ERROR; + } + } + } + return RET_OK; +} + int AnfExporter::ConvertQuantParam(const std::unique_ptr &meta_graph, const std::shared_ptr &primitive, const std::unique_ptr &dst_node) { @@ -146,16 +171,9 @@ int AnfExporter::ConvertQuantParam(const std::unique_ptr &me tensor_input->quantParams.emplace_back(std::move(input_quant_param_ptr)); } } - - if (!tensor_input->quantParams.empty()) { - int bit_num = tensor_input->quantParams.at(0)->numBits; - if (bit_num != 8 && bit_num != 16) { - auto status = DoBitPack(bit_num, tensor_input); - if (status != RET_OK) { - MS_LOG(ERROR) << "do bit pack failed. " << status; - return RET_ERROR; - } - } + if (CompressTensor(tensor_input, dst_node) != RET_OK) { + MS_LOG(ERROR) << "CompressTensor error"; + return RET_ERROR; } } diff --git a/mindspore/lite/tools/common/graph_util.cc b/mindspore/lite/tools/common/graph_util.cc index 1225a8f89c..d1c97d733e 100644 --- a/mindspore/lite/tools/common/graph_util.cc +++ b/mindspore/lite/tools/common/graph_util.cc @@ -694,5 +694,20 @@ std::vector GetTransposePerm(MetaGraphT *graph, const std::unique_ptr &bool_vec) { + size_t size_in_byte = ceil(bool_vec.size() / 8.0); + std::string str(size_in_byte, '\0'); + auto iter = str.begin(); + size_t shift = 8; + for (bool bit : bool_vec) { + *iter |= bit << (shift - 1); + if (--shift == 0) { + iter++; + shift = 8; + } + } + return str; +} + } // namespace lite } // namespace mindspore diff --git a/mindspore/lite/tools/common/graph_util.h b/mindspore/lite/tools/common/graph_util.h index 9d72e4ddef..6c412c84f3 100644 --- a/mindspore/lite/tools/common/graph_util.h +++ b/mindspore/lite/tools/common/graph_util.h @@ -23,6 +23,12 @@ #include #include #include +#include +#include +#include +#include +#include +#include #include "include/errorcode.h" #include "schema/inner/model_generated.h" #include "src/common/graph_util.h" @@ -95,6 +101,210 @@ STATUS SetSubgraphTensorIndices(schema::MetaGraphT *meta_graphT); std::string GetModelName(const std::string &modelFile); std::vector GetTransposePerm(schema::MetaGraphT *graph, const std::unique_ptr &cnode); + +std::string BoolVectorToString(const std::vector &bool_vec); + +template +bool IndexingCompress(const std::set &quant_data_set, const std::map &unique_value_index_map, + size_t unique_value_bit, size_t unique_value_cnt, size_t pack_repetition_size_in_byte, + size_t bit_num, schema::TensorT *tensor) { + auto quant_data_array = reinterpret_cast(tensor->data.data()); + std::vector quant_data(quant_data_array, quant_data_array + tensor->data.size() / sizeof(T)); + + std::vector bits(pack_repetition_size_in_byte * 8); + size_t index = 0; + // write unique_value_cnt: bit_num bit for unsigned + for (size_t i = 0; i < bit_num; i++) { + bits[index++] = (unique_value_cnt >> (bit_num - i - 1)) & (0x1); + } + // write the unique value set: each value has bit_num bit signed + for (auto unique_value : quant_data_set) { + for (size_t i = 0; i < bit_num; i++) { + bits[index++] = ((unique_value + (1 << (bit_num - 1))) >> (bit_num - i - 1)) & (0x1); + } + } + // write the index: each index has unique_value_bit unsigned + for (auto quant_value : quant_data) { + for (size_t i = 0; i < unique_value_bit; i++) { + bits[index++] = (unique_value_index_map.at(quant_value) >> (unique_value_bit - i - 1)) & (0x1); + } + } + if (index > pack_repetition_size_in_byte * 8) { + MS_LOG(ERROR) << "unexpected index: " << index << " should not greater than " << pack_repetition_size_in_byte * 8; + return false; + } + // update tensor data + auto new_data_str = BoolVectorToString(bits); + auto ret = memcpy_s(tensor->data.data(), tensor->data.size(), new_data_str.c_str(), new_data_str.size()); + if (ret != EOK) { + MS_LOG(ERROR) << "memcpy error"; + return false; + } + tensor->data.resize(new_data_str.size()); + + tensor->weightQunatCompressType = schema::WeightQunatCompressType_INDEXING; + MS_LOG(ERROR) << "set WeightQunatCompressType_INDEXING"; + return true; +} + +template +bool SparsityCompress(const std::set &quant_data_set, const std::map &unique_value_index_map, + size_t unique_value_bit, size_t unique_value_cnt, size_t pack_sparsity_size_in_byte, + size_t nz_cnt, size_t coor_best_bit, size_t bit_num, schema::TensorT *tensor) { + auto quant_data_array = reinterpret_cast(tensor->data.data()); + std::vector quant_data(quant_data_array, quant_data_array + tensor->data.size() / sizeof(T)); + auto &quant_params = tensor->quantParams; + auto elem_cnt = quant_data.size(); + auto channel_cnt = quant_params.size(); + auto elem_perchannel = elem_cnt / channel_cnt; + + std::vector bits(pack_sparsity_size_in_byte * 8); + int index = 0; + // coor_best_bit + for (size_t i = 0; i < 8; i++) { + bits[index++] = (coor_best_bit >> (8 - i - 1)) & 0x1; + } + // nz_cnt + for (size_t i = 0; i < 32; i++) { + bits[index++] = (nz_cnt >> (32 - i - 1)) & 0x1; + } + // unique_value cnt + for (size_t i = 0; i < bit_num; i++) { + bits[index++] = (unique_value_cnt >> (bit_num - i - 1)) & 0x1; + } + // unique_values + for (auto unique_value : quant_data_set) { + for (size_t i = 0; i < bit_num; i++) { + bits[index++] = ((unique_value + (1 << (bit_num - 1))) >> (bit_num - i - 1)) & (0x1); + } + } + // nz values indexing && get coor + std::vector coors(nz_cnt); + int coors_index = 0; + int prev_index = -1; + for (int di = 0; (unsigned int)di < elem_cnt; di++) { + auto cur_channel = di / elem_perchannel; + auto zp = quant_params[cur_channel]->zeroPoint; + auto nz_value = quant_data[di]; + if (nz_value != zp || (di - prev_index) >= (1 << coor_best_bit)) { + MS_ASSERT(coors_index < nz_cnt); + coors[coors_index++] = di - prev_index - 1; + prev_index = di; + for (size_t i = 0; i < unique_value_bit; i++) { + bits[index++] = (unique_value_index_map.at(nz_value) >> (unique_value_bit - i - 1)) & (0x1); + } + } + } + // write coor + for (auto coor : coors) { + for (size_t i = 0; i < coor_best_bit; i++) { + bits[index++] = (coor >> (coor_best_bit - i - 1)) & 0x1; + } + } + if ((unsigned int)index > pack_sparsity_size_in_byte * 8) { + MS_LOG(ERROR) << "unexpected index: " << index << " should not greater than " << pack_sparsity_size_in_byte * 8; + return false; + } + auto new_data_str = BoolVectorToString(bits); + auto ret = memcpy_s(tensor->data.data(), tensor->data.size(), new_data_str.c_str(), new_data_str.size()); + if (ret != EOK) { + MS_LOG(ERROR) << "memcpy error"; + return false; + } + tensor->data.resize(new_data_str.size()); + + tensor->weightQunatCompressType = schema::WeightQunatCompressType_SPARSE; + MS_LOG(ERROR) << "set WeightQunatCompressType_SPARSITY"; + return true; +} + +template +size_t CalCoorBestBit(const std::vector &quant_data, size_t elem_cnt, + const std::vector> &quant_params, int unique_value_bit, + size_t *coor_best_bit) { + size_t best_nn_cnt = 0; + size_t min_len_in_bit = std::numeric_limits::max(); + for (int bit = 2; bit <= 10; bit++) { + // search + size_t nn_cnt = 0; + int prev_index = -1; + auto channel_cnt = quant_params.size(); + auto elem_perchannel = elem_cnt / channel_cnt; + for (int i = 0; (unsigned int)i < elem_cnt; i++) { + auto cur_channel = i / elem_perchannel; + auto zp = quant_params[cur_channel]->zeroPoint; + if (quant_data[i] != zp || (i - prev_index) >= (1 << bit)) { + nn_cnt++; + prev_index = i; + } + } + + size_t len_in_bit = nn_cnt * bit + nn_cnt * unique_value_bit; + if (len_in_bit < min_len_in_bit) { + min_len_in_bit = len_in_bit; + *coor_best_bit = bit; + best_nn_cnt = nn_cnt; + } + } + return best_nn_cnt; +} + +template +bool PackRepetition(size_t bit_num, schema::TensorT *tensor) { + auto quant_data_array = reinterpret_cast(tensor->data.data()); + std::vector quant_data(quant_data_array, quant_data_array + tensor->data.size() / sizeof(T)); + auto elem_cnt = quant_data.size(); + auto dims = tensor->dims; + size_t elem_cnt_by_dims = std::accumulate(dims.begin(), dims.end(), 1, std::multiplies<>()); + if (elem_cnt != elem_cnt_by_dims) { + MS_LOG(ERROR) << "elem_cnt: " << elem_cnt << " not equal: " << elem_cnt_by_dims; + return false; + } + + auto &quant_params = tensor->quantParams; + + std::set quant_data_set; + for (auto quant_value : quant_data) { + quant_data_set.insert(quant_value); + } + std::map unique_value_index_map; + auto index = 0; + for (auto value : quant_data_set) { + unique_value_index_map[value] = index++; + } + + auto unique_value_cnt = quant_data_set.size(); + size_t unique_value_bit = ceil(log2(unique_value_cnt)); + auto pack_repetition_size_in_bit = bit_num + bit_num * unique_value_cnt + unique_value_bit * elem_cnt; + size_t pack_repetition_size_in_byte = ceil(pack_repetition_size_in_bit / 8.0); + size_t origin_size_in_byte = ceil(bit_num * elem_cnt / 8.0); + + size_t coor_best_bit = 0; + auto nz_cnt = CalCoorBestBit(quant_data, elem_cnt, quant_params, unique_value_bit, &coor_best_bit); + // 1. coor_best_bit 2. nz_cnt 3. quant_data_set size 4. unique_values 5. unique_value indexing 6. nz values coord + auto pack_sparsity_size_in_bit = + 1 * 8 + 4 * 8 + bit_num + bit_num * unique_value_cnt + unique_value_bit * nz_cnt + nz_cnt * coor_best_bit; + size_t pack_sparsity_size_in_byte = ceil(pack_sparsity_size_in_bit / 8.0); + MS_LOG(ERROR) << "coor_best_bit: " << coor_best_bit << " ori: " << origin_size_in_byte + << " indexing: " << pack_repetition_size_in_byte << " sparse: " << pack_sparsity_size_in_byte; + auto min_byte_need = std::min({origin_size_in_byte, pack_repetition_size_in_byte, pack_sparsity_size_in_byte}); + if (min_byte_need == origin_size_in_byte) { + return false; + } else if (min_byte_need == pack_repetition_size_in_byte) { + MS_LOG(ERROR) << "from " << origin_size_in_byte << " to " << pack_repetition_size_in_byte; + return IndexingCompress(quant_data_set, unique_value_index_map, unique_value_bit, unique_value_cnt, + pack_repetition_size_in_byte, bit_num, tensor); + } else if (min_byte_need == pack_sparsity_size_in_byte) { + MS_LOG(ERROR) << "from " << origin_size_in_byte << " to " << pack_sparsity_size_in_byte; + return SparsityCompress(quant_data_set, unique_value_index_map, unique_value_bit, unique_value_cnt, + pack_sparsity_size_in_byte, nz_cnt, coor_best_bit, bit_num, tensor); + } else { + MS_LOG(ERROR) << "unexpected: " << min_byte_need << " not in {" << origin_size_in_byte << " " + << pack_repetition_size_in_byte << " " << pack_sparsity_size_in_byte << "}"; + } + return false; +} + } // namespace lite } // namespace mindspore diff --git a/mindspore/lite/tools/converter/quantizer/quantize_util.cc b/mindspore/lite/tools/converter/quantizer/quantize_util.cc index 79f6434367..2d0be9f058 100644 --- a/mindspore/lite/tools/converter/quantizer/quantize_util.cc +++ b/mindspore/lite/tools/converter/quantizer/quantize_util.cc @@ -311,7 +311,9 @@ STATUS CalQuantizationParams(schema::QuantParamT *quantParam, double mMin, doubl } const double zeroPointFromMin = quantMinFloat - mMin / scale; int zeroPoint = static_cast(std::round(zeroPointFromMin)); - + if (scale < SCALE_THREASHOLD) { + zeroPoint = 0; + } // The zero point should always be in the range of quantized value, // [qmin, qmax]. MS_ASSERT(zeroPoint >= quantMin); diff --git a/mindspore/lite/tools/converter/quantizer/quantize_util.h b/mindspore/lite/tools/converter/quantizer/quantize_util.h index 7bc02ef41d..6e0c04d354 100644 --- a/mindspore/lite/tools/converter/quantizer/quantize_util.h +++ b/mindspore/lite/tools/converter/quantizer/quantize_util.h @@ -47,7 +47,7 @@ namespace mindspore::lite::quant { static constexpr size_t UINT8_QUANTIZATION = 8; static constexpr size_t WEIGHT_INDEX = 1; - +static constexpr double SCALE_THREASHOLD = 1e-38; const char kMethodMaxMin[] = "MAX_MIN"; const char kMethodKL[] = "KL"; const char kMethodOutlier[] = "RemovalOutlier"; @@ -163,7 +163,9 @@ T QuantizeData(float originData, const schema::QuantParamT &quantParam, int quan const auto narrowRange = quantParam.narrowRange; const int maxLimit = quant_max; const int minLimit = quant_min; - + if (scale <= SCALE_THREASHOLD) { + return 0; + } return [maxLimit, minLimit, zeroPoint, scale, narrowRange, originData] { auto quant_data = std::round(originData / scale + zeroPoint); if (quant_data > maxLimit) {