From: @jianghui58 Reviewed-by: @hangangqiang Signed-off-by:tags/v1.2.0-rc1
| @@ -74,7 +74,7 @@ void LiteSession::ConvertTensorsQuantParam(const schema::Tensor *src_tensor, lit | |||||
| dst_tensor->AddQuantParam(quant_arg); | dst_tensor->AddQuantParam(quant_arg); | ||||
| } | } | ||||
| } | } | ||||
| dst_tensor->SetEnableHuffmanCode(src_tensor->enableHuffmanCode()); | |||||
| dst_tensor->set_enable_huffman_code(src_tensor->enableHuffmanCode()); | |||||
| auto quant_clusters = src_tensor->quantClusters(); | auto quant_clusters = src_tensor->quantClusters(); | ||||
| if (quant_clusters != nullptr) { | if (quant_clusters != nullptr) { | ||||
| std::vector<float> clusters; | std::vector<float> clusters; | ||||
| @@ -451,9 +451,9 @@ void PrimitiveC::set_quant_type(const schema::QuantType &quant_type) { this->qua | |||||
| schema::QuantType PrimitiveC::quant_type() const { return quant_type_; } | schema::QuantType PrimitiveC::quant_type() const { return quant_type_; } | ||||
| bool PrimitiveC::IsEnableHuffmanCode() const { return enableHuffmanCode; } | |||||
| bool PrimitiveC::enable_huffman_code() const { return enable_huffman_code_; } | |||||
| void PrimitiveC::SetEnableHuffmanCode(bool enableHuffmanCode) { this->enableHuffmanCode = enableHuffmanCode; } | |||||
| void PrimitiveC::set_enable_huffman_code(bool enable_huffman_code) { this->enable_huffman_code_ = enable_huffman_code; } | |||||
| std::shared_ptr<PrimitiveC> GetReturnPrim() { | std::shared_ptr<PrimitiveC> GetReturnPrim() { | ||||
| auto return_primitiveT = new (std::nothrow) schema::PrimitiveT; | auto return_primitiveT = new (std::nothrow) schema::PrimitiveT; | ||||
| @@ -123,9 +123,9 @@ class PrimitiveC : public mindspore::Primitive { | |||||
| schema::QuantType quant_type() const; | schema::QuantType quant_type() const; | ||||
| bool IsEnableHuffmanCode() const; | |||||
| bool enable_huffman_code() const; | |||||
| void SetEnableHuffmanCode(bool enableHuffmanCode); | |||||
| void set_enable_huffman_code(bool enable_huffman_code); | |||||
| virtual int InferShape(std::vector<lite::Tensor *> inputs, std::vector<lite::Tensor *> outputs); | virtual int InferShape(std::vector<lite::Tensor *> inputs, std::vector<lite::Tensor *> outputs); | ||||
| @@ -158,7 +158,7 @@ class PrimitiveC : public mindspore::Primitive { | |||||
| schema::QuantType quant_type_{schema::QuantType_QUANT_NONE}; | schema::QuantType quant_type_{schema::QuantType_QUANT_NONE}; | ||||
| bool infer_flag_ = true; | bool infer_flag_ = true; | ||||
| int op_type_ = OP_TYPE_NOT_SET; | int op_type_ = OP_TYPE_NOT_SET; | ||||
| bool enableHuffmanCode = false; | |||||
| bool enable_huffman_code_ = false; | |||||
| }; | }; | ||||
| std::shared_ptr<PrimitiveC> GetReturnPrim(); | std::shared_ptr<PrimitiveC> GetReturnPrim(); | ||||
| @@ -366,9 +366,9 @@ std::vector<float> Tensor::quant_clusters() const { return this->quant_clusters_ | |||||
| void Tensor::set_quant_clusters(const std::vector<float> &clusters) { this->quant_clusters_ = clusters; } | void Tensor::set_quant_clusters(const std::vector<float> &clusters) { this->quant_clusters_ = clusters; } | ||||
| bool Tensor::IsEnableHuffmanCode() const { return enableHuffmanCode; } | |||||
| bool Tensor::enable_huffman_code() const { return enable_huffman_code_; } | |||||
| void Tensor::SetEnableHuffmanCode(bool enableHuffmanCode) { this->enableHuffmanCode = enableHuffmanCode; } | |||||
| void Tensor::set_enable_huffman_code(bool enable_huffman_code) { this->enable_huffman_code_ = enable_huffman_code; } | |||||
| std::vector<tensor::MSTensor *> TensorVectorCast(const std::vector<Tensor *> &src) { | std::vector<tensor::MSTensor *> TensorVectorCast(const std::vector<Tensor *> &src) { | ||||
| std::vector<tensor::MSTensor *> target(src.size()); | std::vector<tensor::MSTensor *> target(src.size()); | ||||
| @@ -149,9 +149,9 @@ class Tensor : public mindspore::tensor::MSTensor { | |||||
| void set_quant_clusters(const std::vector<float> &clusters); | void set_quant_clusters(const std::vector<float> &clusters); | ||||
| bool IsEnableHuffmanCode() const; | |||||
| bool enable_huffman_code() const; | |||||
| void SetEnableHuffmanCode(bool enableHuffmanCode); | |||||
| void set_enable_huffman_code(bool enable_huffman_code); | |||||
| virtual bool IsConst() const { | virtual bool IsConst() const { | ||||
| return (this->category_ == CONST_TENSOR || this->category_ == CONST_SCALAR) && this->data_ != nullptr; | return (this->category_ == CONST_TENSOR || this->category_ == CONST_SCALAR) && this->data_ != nullptr; | ||||
| @@ -202,7 +202,7 @@ class Tensor : public mindspore::tensor::MSTensor { | |||||
| std::vector<float> quant_clusters_; | std::vector<float> quant_clusters_; | ||||
| mindspore::lite::Allocator *allocator_ = nullptr; | mindspore::lite::Allocator *allocator_ = nullptr; | ||||
| Tensor *root_tensor_ = nullptr; | Tensor *root_tensor_ = nullptr; | ||||
| bool enableHuffmanCode = false; | |||||
| bool enable_huffman_code_ = false; | |||||
| }; | }; | ||||
| inline size_t DataTypeSize(const TypeId type) { | inline size_t DataTypeSize(const TypeId type) { | ||||
| @@ -1 +1 @@ | |||||
| efficientnet.mindir 40.64 9.98 | |||||
| efficientnet.mindir 41.37 9.98 | |||||
| @@ -1,3 +1,3 @@ | |||||
| retinaface_732_1280_iod.mindir | |||||
| mobilefacenet_iod.mindir | |||||
| retinaface_732_1280_iod.mindir 16.9 | |||||
| mobilefacenet_iod.mindir 13.5 | |||||
| #effnet_iod.mindir | #effnet_iod.mindir | ||||
| @@ -1,2 +1,2 @@ | |||||
| ml_face_openclose.tflite 0.5 | ml_face_openclose.tflite 0.5 | ||||
| hiai_ghostnet.tflite 5 | |||||
| hiai_ghostnet.tflite 4.7 | |||||
| @@ -238,10 +238,11 @@ function Run_Converter() { | |||||
| # Convert mindir weightquant models: | # Convert mindir weightquant models: | ||||
| while read line; do | while read line; do | ||||
| model_name=${line} | |||||
| if [[ $model_name == \#* ]]; then | |||||
| weight_quant_line_info=${line} | |||||
| if [[ $weight_quant_line_info == \#* ]]; then | |||||
| continue | continue | ||||
| fi | fi | ||||
| model_name=`echo ${weight_quant_line_info}|awk -F ' ' '{print $1}'` | |||||
| echo ${model_name} >> "${run_converter_log_file}" | echo ${model_name} >> "${run_converter_log_file}" | ||||
| echo './converter_lite --fmk=MINDIR --modelFile='${models_path}'/'${model_name}' --outputFile='${ms_models_path}'/'${model_name}' --quantType=WeightQuant --bitNum=8 --quantWeightSize=500 --quantWeightChannel=16' >> "${run_converter_log_file}" | echo './converter_lite --fmk=MINDIR --modelFile='${models_path}'/'${model_name}' --outputFile='${ms_models_path}'/'${model_name}' --quantType=WeightQuant --bitNum=8 --quantWeightSize=500 --quantWeightChannel=16' >> "${run_converter_log_file}" | ||||
| ./converter_lite --fmk=MINDIR --modelFile=$models_path/${model_name} --outputFile=${ms_models_path}/${model_name}_weightquant --quantType=WeightQuant --bitNum=8 --quantWeightSize=500 --quantWeightChannel=16 | ./converter_lite --fmk=MINDIR --modelFile=$models_path/${model_name} --outputFile=${ms_models_path}/${model_name}_weightquant --quantType=WeightQuant --bitNum=8 --quantWeightSize=500 --quantWeightChannel=16 | ||||
| @@ -538,15 +539,17 @@ function Run_x86() { | |||||
| # Run mindir weight quantization converted models: | # Run mindir weight quantization converted models: | ||||
| while read line; do | while read line; do | ||||
| model_name=${line} | |||||
| if [[ $model_name == \#* ]]; then | |||||
| weight_quant_line_info=${line} | |||||
| if [[ $weight_quant_line_info == \#* ]]; then | |||||
| continue | continue | ||||
| fi | fi | ||||
| model_name=`echo ${weight_quant_line_info}|awk -F ' ' '{print $1}'` | |||||
| accuracy_limit=`echo ${weight_quant_line_info}|awk -F ' ' '{print $2}'` | |||||
| echo ${model_name} >> "${run_x86_log_file}" | echo ${model_name} >> "${run_x86_log_file}" | ||||
| echo 'cd '${x86_path}'/mindspore-lite-'${version}'-inference-linux-x64' >> "${run_x86_log_file}" | echo 'cd '${x86_path}'/mindspore-lite-'${version}'-inference-linux-x64' >> "${run_x86_log_file}" | ||||
| cd ${x86_path}/mindspore-lite-${version}-inference-linux-x64 || return 1 | cd ${x86_path}/mindspore-lite-${version}-inference-linux-x64 || return 1 | ||||
| echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:./lib:./third_party/libjpeg-turbo/lib:./third_party/opencv/lib;./benchmark/benchmark --modelFile='${ms_models_path}'/'${model_name}'.ms --inDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/input/'${model_name}'.ms.bin --benchmarkDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/output/'${model_name}'.ms.out' >> "${run_x86_log_file}" | |||||
| export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:./lib:./third_party/libjpeg-turbo/lib:./third_party/opencv/lib;./benchmark/benchmark --modelFile=${ms_models_path}/${model_name}_weightquant.ms --inDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/input/${model_name}.ms.bin --benchmarkDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/output/${model_name}.weightquant.ms.out >> "${run_x86_log_file}" | |||||
| echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:./lib:./third_party/libjpeg-turbo/lib:./third_party/opencv/lib;./benchmark/benchmark --modelFile='${ms_models_path}'/'${model_name}'.ms --inDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/input/'${model_name}'.ms.bin --benchmarkDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/output/'${model_name}'.ms.out --accuracyThreshold=${accuracy_limit}' >> "${run_x86_log_file}" | |||||
| export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:./lib:./third_party/libjpeg-turbo/lib:./third_party/opencv/lib;./benchmark/benchmark --modelFile=${ms_models_path}/${model_name}_weightquant.ms --inDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/input/${model_name}.ms.bin --benchmarkDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/output/${model_name}.ms.out --accuracyThreshold=${accuracy_limit} >> "${run_x86_log_file}" | |||||
| if [ $? = 0 ]; then | if [ $? = 0 ]; then | ||||
| run_result='x86: '${model_name}'[weight quant] pass'; echo ${run_result} >> ${run_benchmark_result_file} | run_result='x86: '${model_name}'[weight quant] pass'; echo ${run_result} >> ${run_benchmark_result_file} | ||||
| else | else | ||||
| @@ -809,15 +812,17 @@ function Run_x86_sse() { | |||||
| # Run mindir weight quantization converted models: | # Run mindir weight quantization converted models: | ||||
| while read line; do | while read line; do | ||||
| model_name=${line} | |||||
| if [[ $model_name == \#* ]]; then | |||||
| weight_quant_line_info=${line} | |||||
| if [[ $weight_quant_line_info == \#* ]]; then | |||||
| continue | continue | ||||
| fi | fi | ||||
| model_name=`echo ${weight_quant_line_info}|awk -F ' ' '{print $1}'` | |||||
| accuracy_limit=`echo ${weight_quant_line_info}|awk -F ' ' '{print $2}'` | |||||
| echo ${model_name} >> "${run_x86_sse_log_file}" | echo ${model_name} >> "${run_x86_sse_log_file}" | ||||
| echo 'cd '${x86_path}'/mindspore-lite-'${version}'-inference-linux-x64-sse' >> "${run_x86_sse_log_file}" | echo 'cd '${x86_path}'/mindspore-lite-'${version}'-inference-linux-x64-sse' >> "${run_x86_sse_log_file}" | ||||
| cd ${x86_path}/mindspore-lite-${version}-inference-linux-x64-sse || return 1 | cd ${x86_path}/mindspore-lite-${version}-inference-linux-x64-sse || return 1 | ||||
| echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:./lib:./third_party/libjpeg-turbo/lib:./third_party/opencv/lib;./benchmark/benchmark --modelFile='${ms_models_path}'/'${model_name}'.ms --inDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/input/'${model_name}'.ms.bin --benchmarkDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/output/'${model_name}'.ms.out' >> "${run_x86_sse_log_file}" | |||||
| export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:./lib:./third_party/libjpeg-turbo/lib:./third_party/opencv/lib;./benchmark/benchmark --modelFile=${ms_models_path}/${model_name}_weightquant.ms --inDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/input/${model_name}.ms.bin --benchmarkDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/output/${model_name}.weightquant.ms.out >> "${run_x86_sse_log_file}" | |||||
| echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:./lib:./third_party/libjpeg-turbo/lib:./third_party/opencv/lib;./benchmark/benchmark --modelFile='${ms_models_path}'/'${model_name}'.ms --inDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/input/'${model_name}'.ms.bin --benchmarkDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/output/'${model_name}'.ms.out --accuracyThreshold=${accuracy_limit}' >> "${run_x86_sse_log_file}" | |||||
| export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:./lib:./third_party/libjpeg-turbo/lib:./third_party/opencv/lib;./benchmark/benchmark --modelFile=${ms_models_path}/${model_name}_weightquant.ms --inDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/input/${model_name}.ms.bin --benchmarkDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/output/${model_name}.ms.out --accuracyThreshold=${accuracy_limit} >> "${run_x86_sse_log_file}" | |||||
| if [ $? = 0 ]; then | if [ $? = 0 ]; then | ||||
| run_result='x86_sse: '${model_name}'[weight quant] pass'; echo ${run_result} >> ${run_benchmark_result_file} | run_result='x86_sse: '${model_name}'[weight quant] pass'; echo ${run_result} >> ${run_benchmark_result_file} | ||||
| else | else | ||||
| @@ -1081,15 +1086,17 @@ function Run_x86_avx() { | |||||
| # Run mindir weight quantization converted models: | # Run mindir weight quantization converted models: | ||||
| while read line; do | while read line; do | ||||
| model_name=${line} | |||||
| if [[ $model_name == \#* ]]; then | |||||
| weight_quant_line_info=${line} | |||||
| if [[ $weight_quant_line_info == \#* ]]; then | |||||
| continue | continue | ||||
| fi | fi | ||||
| model_name=`echo ${weight_quant_line_info}|awk -F ' ' '{print $1}'` | |||||
| accuracy_limit=`echo ${weight_quant_line_info}|awk -F ' ' '{print $2}'` | |||||
| echo ${model_name} >> "${run_x86_avx_log_file}" | echo ${model_name} >> "${run_x86_avx_log_file}" | ||||
| echo 'cd '${x86_path}'/mindspore-lite-'${version}'-inference-linux-x64-avx' >> "${run_x86_avx_log_file}" | echo 'cd '${x86_path}'/mindspore-lite-'${version}'-inference-linux-x64-avx' >> "${run_x86_avx_log_file}" | ||||
| cd ${x86_path}/mindspore-lite-${version}-inference-linux-x64-avx || return 1 | cd ${x86_path}/mindspore-lite-${version}-inference-linux-x64-avx || return 1 | ||||
| echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:./lib:./third_party/libjpeg-turbo/lib:./third_party/opencv/lib;./benchmark/benchmark --modelFile='${ms_models_path}'/'${model_name}'.ms --inDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/input/'${model_name}'.ms.bin --benchmarkDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/output/'${model_name}'.ms.out' >> "${run_x86_avx_log_file}" | |||||
| export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:./lib:./third_party/libjpeg-turbo/lib:./third_party/opencv/lib;./benchmark/benchmark --modelFile=${ms_models_path}/${model_name}_weightquant.ms --inDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/input/${model_name}.ms.bin --benchmarkDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/output/${model_name}.weightquant.ms.out >> "${run_x86_avx_log_file}" | |||||
| echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:./lib:./third_party/libjpeg-turbo/lib:./third_party/opencv/lib;./benchmark/benchmark --modelFile='${ms_models_path}'/'${model_name}'.ms --inDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/input/'${model_name}'.ms.bin --benchmarkDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/output/'${model_name}'.ms.out --accuracyThreshold=${accuracy_limit}' >> "${run_x86_avx_log_file}" | |||||
| export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:./lib:./third_party/libjpeg-turbo/lib:./third_party/opencv/lib;./benchmark/benchmark --modelFile=${ms_models_path}/${model_name}_weightquant.ms --inDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/input/${model_name}.ms.bin --benchmarkDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/output/${model_name}.ms.out --accuracyThreshold=${accuracy_limit} >> "${run_x86_avx_log_file}" | |||||
| if [ $? = 0 ]; then | if [ $? = 0 ]; then | ||||
| run_result='x86_avx: '${model_name}'[weight quant] pass'; echo ${run_result} >> ${run_benchmark_result_file} | run_result='x86_avx: '${model_name}'[weight quant] pass'; echo ${run_result} >> ${run_benchmark_result_file} | ||||
| else | else | ||||
| @@ -1637,14 +1644,16 @@ function Run_arm64() { | |||||
| # Run mindir weightquant converted train models: | # Run mindir weightquant converted train models: | ||||
| while read line; do | while read line; do | ||||
| model_name=${line} | |||||
| if [[ $model_name == \#* ]]; then | |||||
| weight_quant_line_info=${line} | |||||
| if [[ $weight_quant_line_info == \#* ]]; then | |||||
| continue | continue | ||||
| fi | fi | ||||
| echo ${model_name}'_train' >> "${run_arm64_log_file}" | |||||
| model_name=`echo ${weight_quant_line_info}|awk -F ' ' '{print $1}'` | |||||
| accuracy_limit=`echo ${weight_quant_line_info}|awk -F ' ' '{print $2}'` | |||||
| echo ${model_name} >> "${run_arm64_log_file}" | |||||
| echo 'cd /data/local/tmp/benchmark_test' > adb_run_cmd.txt | echo 'cd /data/local/tmp/benchmark_test' > adb_run_cmd.txt | ||||
| echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/data/local/tmp/benchmark_test;./benchmark --modelFile='${model_name}'_weightquant.ms --inDataFile=/data/local/tmp/input_output/input/'${model_name}'.ms.bin --benchmarkDataFile=/data/local/tmp/input_output/output/'${model_name}'.weightquant.ms.out --loopCount=1' >> "${run_arm64_log_file}" | |||||
| echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/data/local/tmp/benchmark_test;./benchmark --modelFile='${model_name}'_weightquant.ms --inDataFile=/data/local/tmp/input_output/input/'${model_name}'.ms.bin --benchmarkDataFile=/data/local/tmp/input_output/output/'${model_name}'.weightquant.ms.out --loopCount=1' >> adb_run_cmd.txt | |||||
| echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/data/local/tmp/benchmark_test;./benchmark --modelFile='${model_name}'_weightquant.ms --inDataFile=/data/local/tmp/input_output/input/'${model_name}'.ms.bin --benchmarkDataFile=/data/local/tmp/input_output/output/'${model_name}'.ms.out --loopCount=1 --accuracyThreshold='${accuracy_limit} >> "${run_arm64_log_file}" | |||||
| echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/data/local/tmp/benchmark_test;./benchmark --modelFile='${model_name}'_weightquant.ms --inDataFile=/data/local/tmp/input_output/input/'${model_name}'.ms.bin --benchmarkDataFile=/data/local/tmp/input_output/output/'${model_name}'.ms.out --loopCount=1 --accuracyThreshold='${accuracy_limit} >> adb_run_cmd.txt | |||||
| adb -s ${device_id} shell < adb_run_cmd.txt >> "${run_arm64_log_file}" | adb -s ${device_id} shell < adb_run_cmd.txt >> "${run_arm64_log_file}" | ||||
| if [ $? = 0 ]; then | if [ $? = 0 ]; then | ||||
| run_result='arm64: '${model_name}'[weightQuant] pass'; echo ${run_result} >> ${run_benchmark_result_file} | run_result='arm64: '${model_name}'[weightQuant] pass'; echo ${run_result} >> ${run_benchmark_result_file} | ||||
| @@ -508,7 +508,7 @@ int AnfExporter::ConvertInputParameter(const std::shared_ptr<AnfNode> &input_ano | |||||
| } | } | ||||
| paramTensor->name = input_name; | paramTensor->name = input_name; | ||||
| if (primitive_c->IsEnableHuffmanCode() && paramTensor->dataType == kNumberTypeInt8) { | |||||
| if (primitive_c->enable_huffman_code() && paramTensor->dataType == kNumberTypeInt8) { | |||||
| paramTensor->enableHuffmanCode = true; | paramTensor->enableHuffmanCode = true; | ||||
| } | } | ||||
| node_id_map_[input_name] = meta_graphT->allTensors.size(); | node_id_map_[input_name] = meta_graphT->allTensors.size(); | ||||
| @@ -52,7 +52,6 @@ | |||||
| #include "tools/optimizer/graph/inputs_adjust_pass.h" | #include "tools/optimizer/graph/inputs_adjust_pass.h" | ||||
| #include "tools/converter/quantizer/post_training_quantizer.h" | #include "tools/converter/quantizer/post_training_quantizer.h" | ||||
| #include "tools/converter/quantizer/quant_cast.h" | #include "tools/converter/quantizer/quant_cast.h" | ||||
| #include "tools/converter/quantizer/huffman_encode.h" | |||||
| #include "tools/converter/quantizer/weight_quantizer.h" | #include "tools/converter/quantizer/weight_quantizer.h" | ||||
| using std::string; | using std::string; | ||||
| @@ -243,24 +242,6 @@ int AnfTransform::DoQuantize(const FuncGraphPtr &old_graph, const converter::Fla | |||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| int AnfTransform::DoHuffmanEncode(const converter::Flags *config, const FuncGraphPtr &new_graph, | |||||
| bool enableHuffmanCode) { | |||||
| if (config->quantType == schema::QuantType_WeightQuant && enableHuffmanCode) { | |||||
| if (config->bitNum < 16 && config->bitNum > 8) { | |||||
| MS_LOG(WARNING) << "don't support huffman encode when 8 < bitNum < 16 currently."; | |||||
| return RET_OK; | |||||
| } | |||||
| auto huffman_encode = std::make_unique<lite::HuffmanEncode>(); | |||||
| auto status = huffman_encode->DoHuffmanEncode(new_graph, config->bitNum); | |||||
| if (status != RET_OK) { | |||||
| MS_LOG(ERROR) << "Huffman encode failed."; | |||||
| ReturnCode::GetSingleReturnCode()->UpdateReturnCode(status); | |||||
| return RET_ERROR; | |||||
| } | |||||
| } | |||||
| return RET_OK; | |||||
| } | |||||
| FuncGraphPtr AnfTransform::TransformSingleFuncGraph(const FuncGraphPtr &old_graph, const converter::Flags *config) { | FuncGraphPtr AnfTransform::TransformSingleFuncGraph(const FuncGraphPtr &old_graph, const converter::Flags *config) { | ||||
| MS_ASSERT(nullptr != old_graph); | MS_ASSERT(nullptr != old_graph); | ||||
| if (config == nullptr) { | if (config == nullptr) { | ||||
| @@ -315,12 +296,6 @@ FuncGraphPtr AnfTransform::TransformSingleFuncGraph(const FuncGraphPtr &old_grap | |||||
| return nullptr; | return nullptr; | ||||
| } | } | ||||
| status = DoHuffmanEncode(config, new_graph, false); | |||||
| if (status != RET_OK) { | |||||
| MS_LOG(ERROR) << "Do HuffmanCode failed."; | |||||
| return nullptr; | |||||
| } | |||||
| return new_graph; | return new_graph; | ||||
| } | } | ||||
| @@ -58,8 +58,6 @@ class AnfTransform { | |||||
| int RunTFAdjustPass(const FuncGraphPtr &old_graph, const converter::Flags *config); | int RunTFAdjustPass(const FuncGraphPtr &old_graph, const converter::Flags *config); | ||||
| int DoQuantize(const FuncGraphPtr &old_graph, const converter::Flags *config, const FuncGraphPtr &new_graph); | int DoQuantize(const FuncGraphPtr &old_graph, const converter::Flags *config, const FuncGraphPtr &new_graph); | ||||
| int DoHuffmanEncode(const converter::Flags *config, const FuncGraphPtr &new_graph, bool enableHuffmanCode); | |||||
| }; | }; | ||||
| } // namespace lite | } // namespace lite | ||||
| } // namespace mindspore | } // namespace mindspore | ||||
| @@ -227,7 +227,6 @@ int GraphDefTransform::Transform(const converter::Flags &ctx) { | |||||
| quantNodeOptimizer.AddPass(dTypeTransPass); | quantNodeOptimizer.AddPass(dTypeTransPass); | ||||
| quantNodeOptimizer.AddPass(new (std::nothrow) QuantCastFusionPass()); | quantNodeOptimizer.AddPass(new (std::nothrow) QuantCastFusionPass()); | ||||
| quantNodeOptimizer.AddPass(new (std::nothrow) IsolatedNodeRemovePass()); | quantNodeOptimizer.AddPass(new (std::nothrow) IsolatedNodeRemovePass()); | ||||
| quantNodeOptimizer.AddPass(new (std::nothrow) SetUnusedQuantParamToDefaultPass()); | |||||
| quantNodeOptimizer.AddPass(new (std::nothrow) SubgraphNodePass(old_nodes2)); | quantNodeOptimizer.AddPass(new (std::nothrow) SubgraphNodePass(old_nodes2)); | ||||
| status = quantNodeOptimizer.Run(graphDefT); | status = quantNodeOptimizer.Run(graphDefT); | ||||
| if (status != RET_OK && status != RET_NO_CHANGE) { | if (status != RET_OK && status != RET_NO_CHANGE) { | ||||
| @@ -287,6 +286,15 @@ int GraphDefTransform::Transform(const converter::Flags &ctx) { | |||||
| } | } | ||||
| } | } | ||||
| { | |||||
| Optimizer quantNodeOptimizer; | |||||
| quantNodeOptimizer.AddPass(new (std::nothrow) SetUnusedQuantParamToDefaultPass()); | |||||
| status = quantNodeOptimizer.Run(graphDefT); | |||||
| if (status != RET_OK && status != RET_NO_CHANGE) { | |||||
| MS_LOG(ERROR) << "Run quantNodeOptimizer graphPasses Failed"; | |||||
| return status; | |||||
| } | |||||
| } | |||||
| return RET_OK; | return RET_OK; | ||||
| } // namespace mindspore::lite | } // namespace mindspore::lite | ||||
| } // namespace mindspore::lite | } // namespace mindspore::lite | ||||
| @@ -24,120 +24,49 @@ | |||||
| namespace mindspore { | namespace mindspore { | ||||
| namespace lite { | namespace lite { | ||||
| STATUS HuffmanEncode::GetParamValueLitePtr(const std::shared_ptr<AnfNode> &input_node, ParamValueLitePtr *param_value) { | |||||
| if (!input_node->isa<Parameter>()) { | |||||
| return RET_CONTINUE; | |||||
| } | |||||
| auto abstract_base = input_node->abstract(); | |||||
| if (abstract_base == nullptr) { | |||||
| MS_LOG(ERROR) << "Abstract of parameter is nullptr, " << input_node->fullname_with_scope(); | |||||
| return RET_ERROR; | |||||
| } | |||||
| if (!utils::isa<abstract::AbstractTensorPtr>(abstract_base)) { | |||||
| MS_LOG(ERROR) << "Abstract of parameter should be abstract tensor, " << input_node->fullname_with_scope(); | |||||
| return RET_ERROR; | |||||
| } | |||||
| auto abstract_tensor = utils::cast<abstract::AbstractTensorPtr>(abstract_base); | |||||
| if (abstract_tensor->element() == nullptr) { | |||||
| MS_LOG(ERROR) << "abstract tensor element is nullptr, " << input_node->fullname_with_scope(); | |||||
| STATUS HuffmanEncode::DoHuffmanEncode(const ParamValueLitePtr &weight, const std::shared_ptr<PrimitiveC> &primitive_c, | |||||
| void *quant_datas, const size_t &bit_num) { | |||||
| if (quant_datas == nullptr) { | |||||
| MS_LOG(ERROR) << "quant data is nullptr"; | |||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| auto tensor_type = abstract_tensor->element()->GetTypeTrack(); | |||||
| MS_ASSERT(tensor_type != nullptr); | |||||
| auto tensor_type_id = tensor_type->type_id(); | |||||
| if (tensor_type_id != kNumberTypeInt8) { | |||||
| return RET_CONTINUE; | |||||
| } | |||||
| auto param_node = input_node->cast<ParameterPtr>(); | |||||
| if (param_node == nullptr) { | |||||
| MS_LOG(ERROR) << "parameter node is nullptr, " << input_node->fullname_with_scope(); | |||||
| return RET_ERROR; | |||||
| } | |||||
| if (!param_node->has_default()) { | |||||
| MS_LOG(WARNING) << "param_node don't have default: " << input_node->fullname_with_scope(); | |||||
| return RET_CONTINUE; | |||||
| } | |||||
| *param_value = std::static_pointer_cast<ParamValueLite>(param_node->default_param()); | |||||
| return RET_OK; | |||||
| } | |||||
| STATUS HuffmanEncode::DoHuffmanEncode(const FuncGraphPtr &func_graph, const int &bit_num) { | |||||
| auto cnodes = func_graph->GetOrderedCnodes(); | |||||
| for (auto &cnode : cnodes) { | |||||
| auto primitive_c = GetValueNode<std::shared_ptr<PrimitiveC>>(cnode->input(0)); | |||||
| if (primitive_c == nullptr) { | |||||
| MS_LOG(ERROR) << "primitive_c is nullptr: " << cnode->fullname_with_scope(); | |||||
| return RET_ERROR; | |||||
| } | |||||
| if (primitive_c->quant_type() != schema::QuantType_WeightQuant) { | |||||
| continue; | |||||
| auto *raw_datas = static_cast<int8_t *>(quant_datas); | |||||
| size_t elem_count = weight->tensor_shape_size(); | |||||
| size_t packed_size = elem_count * bit_num; | |||||
| HuffmanPriorityQueue pq; | |||||
| auto status = GetHuffmanPriorityQueue(raw_datas, elem_count, &pq); | |||||
| if (status != RET_OK) { | |||||
| MS_LOG(ERROR) << "GetHuffmanPriorityQueue failed"; | |||||
| return status; | |||||
| } | |||||
| status = BuildHuffmanTree(&pq); | |||||
| if (status != RET_OK) { | |||||
| MS_LOG(ERROR) << "BuildHuffmanTree failed"; | |||||
| return status; | |||||
| } | |||||
| status = DoHuffmanCompress(raw_datas, elem_count); | |||||
| if (status != RET_OK) { | |||||
| MS_LOG(ERROR) << "DoHuffmanCompress failed"; | |||||
| return status; | |||||
| } | |||||
| size_t ch_size = huffman_encoded_str_.length(); | |||||
| if (ch_size < packed_size) { | |||||
| auto encode_data = new (std::nothrow) char[ch_size]; | |||||
| if (encode_data == nullptr) { | |||||
| MS_LOG(ERROR) << "new char[] failed."; | |||||
| return RET_MEMORY_FAILED; | |||||
| } | } | ||||
| for (size_t i = 1; i < cnode->inputs().size(); i++) { | |||||
| auto input_node = cnode->input(i); | |||||
| ParamValueLitePtr param_value; | |||||
| auto status = GetParamValueLitePtr(input_node, ¶m_value); | |||||
| if (status == RET_CONTINUE) { | |||||
| continue; | |||||
| } else if (status == RET_ERROR) { | |||||
| MS_LOG(ERROR) << "Get param value lite ptr failed. " << cnode->fullname_with_scope(); | |||||
| return RET_ERROR; | |||||
| } | |||||
| size_t elem_count = param_value->tensor_shape_size(); | |||||
| size_t packed_size = param_value->tensor_size(); | |||||
| auto *raw_datas = static_cast<int8_t *>(param_value->tensor_addr()); | |||||
| if (raw_datas == nullptr) { | |||||
| MS_LOG(ERROR) << "rawDatas is nullptr"; | |||||
| return RET_ERROR; | |||||
| } | |||||
| if (bit_num < 8 && bit_num > 0) { | |||||
| auto dst_data = new (std::nothrow) int8_t[elem_count]; | |||||
| if (dst_data == nullptr) { | |||||
| MS_LOG(ERROR) << "new int8_t[] failed"; | |||||
| return RET_ERROR; | |||||
| } | |||||
| DequantUtil::UnpackUtil<int8_t, uint8_t>(raw_datas, packed_size, bit_num, dst_data); | |||||
| if (memcpy_s(raw_datas, elem_count, dst_data, elem_count) != EOK) { | |||||
| MS_LOG(ERROR) << "memcpy_s failed."; | |||||
| return RET_MEMORY_FAILED; | |||||
| } | |||||
| } | |||||
| HuffmanPriorityQueue pq; | |||||
| status = GetHuffmanPriorityQueue(raw_datas, elem_count, &pq); | |||||
| if (status != RET_OK) { | |||||
| MS_LOG(ERROR) << "GetHuffmanPriorityQueue failed"; | |||||
| return status; | |||||
| } | |||||
| status = BuildHuffmanTree(&pq); | |||||
| if (status != RET_OK) { | |||||
| MS_LOG(ERROR) << "BuildHuffmanTree failed"; | |||||
| return status; | |||||
| } | |||||
| status = DoHuffmanCompress(raw_datas, elem_count); | |||||
| if (status != RET_OK) { | |||||
| MS_LOG(ERROR) << "DoHuffmanCompress failed"; | |||||
| return status; | |||||
| } | |||||
| size_t ch_size = huffman_encoded_str_.length(); | |||||
| if (ch_size < packed_size) { | |||||
| auto encode_data = new (std::nothrow) char[ch_size]; | |||||
| if (encode_data == nullptr) { | |||||
| MS_LOG(ERROR) << "new char[] failed."; | |||||
| delete[] raw_datas; | |||||
| return RET_MEMORY_FAILED; | |||||
| } | |||||
| delete[] raw_datas; | |||||
| if (memcpy_s(encode_data, ch_size, huffman_encoded_str_.c_str(), ch_size) != EOK) { | |||||
| MS_LOG(ERROR) << "memcpy_s failed."; | |||||
| delete[] encode_data; | |||||
| return RET_MEMORY_FAILED; | |||||
| } | |||||
| param_value->SetTensorData(encode_data, ch_size); | |||||
| primitive_c->SetEnableHuffmanCode(true); | |||||
| } | |||||
| huffman_encoded_str_.clear(); | |||||
| huffman_table_.clear(); | |||||
| if (memcpy_s(encode_data, ch_size, huffman_encoded_str_.c_str(), ch_size) != EOK) { | |||||
| MS_LOG(ERROR) << "memcpy_s failed."; | |||||
| delete[] encode_data; | |||||
| return RET_MEMORY_FAILED; | |||||
| } | } | ||||
| weight->SetTensorData(encode_data, ch_size); | |||||
| primitive_c->set_enable_huffman_code(true); | |||||
| } | } | ||||
| huffman_encoded_str_.clear(); | |||||
| huffman_table_.clear(); | |||||
| return RET_SUCCESS; | return RET_SUCCESS; | ||||
| } | } | ||||
| @@ -60,7 +60,8 @@ class HuffmanEncode { | |||||
| STATUS GetParamValueLitePtr(const std::shared_ptr<AnfNode> &input_node, ParamValueLitePtr *param_value); | STATUS GetParamValueLitePtr(const std::shared_ptr<AnfNode> &input_node, ParamValueLitePtr *param_value); | ||||
| STATUS DoHuffmanEncode(const FuncGraphPtr &func_graph, const int &bit_num); | |||||
| STATUS DoHuffmanEncode(const ParamValueLitePtr &weight, const std::shared_ptr<PrimitiveC> &primitive_c, | |||||
| void *quant_datas, const size_t &bit_num); | |||||
| private: | private: | ||||
| std::map<int, std::string> huffman_table_; | std::map<int, std::string> huffman_table_; | ||||
| @@ -36,6 +36,7 @@ | |||||
| #include "base/base.h" | #include "base/base.h" | ||||
| #include "ir/primitive.h" | #include "ir/primitive.h" | ||||
| #include "abstract/dshape.h" | #include "abstract/dshape.h" | ||||
| #include "tools/converter/quantizer/huffman_encode.h" | |||||
| #include "tools/converter/quantizer/bitpacking.h" | #include "tools/converter/quantizer/bitpacking.h" | ||||
| #include "src/lite_session.h" | #include "src/lite_session.h" | ||||
| #include "tools/converter/graphdef_transform.h" | #include "tools/converter/graphdef_transform.h" | ||||
| @@ -92,6 +93,7 @@ class QuantStrategy { | |||||
| constexpr float delta = 0.1; | constexpr float delta = 0.1; | ||||
| constexpr float ratio = 10.0; | constexpr float ratio = 10.0; | ||||
| constexpr int percent = 10; | constexpr int percent = 10; | ||||
| constexpr int quant_param_size = 32 * 8; | |||||
| STATUS CalQuantizationParams(schema::QuantParamT *quantParam, double mMin, double mMax, bool narrowRange, int quant_max, | STATUS CalQuantizationParams(schema::QuantParamT *quantParam, double mMin, double mMax, bool narrowRange, int quant_max, | ||||
| int quant_min, int num_bits); | int quant_min, int num_bits); | ||||
| @@ -158,163 +160,159 @@ T QuantizeData(float originData, const schema::QuantParamT &quantParam, int quan | |||||
| } | } | ||||
| template <typename T> | template <typename T> | ||||
| STATUS QuantFilter(const ParamValueLitePtr &weight, const std::shared_ptr<PrimitiveC> &primitive_c, QuantType quantType, | |||||
| int quant_max, int quant_min, size_t bitNum, bool per_channel, int index = 1, bool k_means = false) { | |||||
| MS_ASSERT(weight != nullptr); | |||||
| MS_ASSERT(primitive_c != nullptr); | |||||
| STATUS DoPerChannelQuant(const ParamValueLitePtr &weight, const QuantType &quant_type, | |||||
| std::vector<schema::QuantParamT> *quant_params, const int &quant_max, const int &quant_min, | |||||
| const size_t &bit_num, const bool &k_means, std::vector<T> *quant_datas, | |||||
| std::vector<float> *dequant_datas) { | |||||
| auto dims = weight->tensor_shape(); | auto dims = weight->tensor_shape(); | ||||
| if (per_channel) { | |||||
| if (dims.size() <= 1) { | |||||
| MS_LOG(WARNING) << "dims is " << dims.size() << " can not per_channel"; | |||||
| per_channel = false; | |||||
| } | |||||
| } | |||||
| std::vector<schema::QuantParamT> quant_params; | |||||
| size_t elem_count = weight->tensor_shape_size(); | size_t elem_count = weight->tensor_shape_size(); | ||||
| auto *raw_datas = static_cast<float *>(weight->tensor_addr()); | auto *raw_datas = static_cast<float *>(weight->tensor_addr()); | ||||
| if (raw_datas == nullptr) { | |||||
| MS_LOG(ERROR) << "rawDatas is nullptr"; | |||||
| auto channels = dims[0]; | |||||
| if (channels == 0) { | |||||
| MS_LOG(ERROR) << "channels is zero"; | |||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| std::vector<T> quant_datas(elem_count); | |||||
| std::vector<float> dequant_datas(elem_count); | |||||
| if (per_channel) { | |||||
| // notice: assume Con2D\DepthwiseConv2D's weight format are same: KHWC | |||||
| // channel at first | |||||
| auto channels = dims[0]; | |||||
| if (channels == 0) { | |||||
| MS_LOG(ERROR) << "channels is zero"; | |||||
| return RET_ERROR; | |||||
| size_t one_filter_size = elem_count / channels; | |||||
| bool do_quant = quant_param_size / (sizeof(float) * 8 - bit_num) < one_filter_size; | |||||
| if (!do_quant && quant_type == QuantType_WeightQuant) { | |||||
| MS_LOG(INFO) << "too few elements in a filter, no need to quantize. " << one_filter_size; | |||||
| return RET_CONTINUE; | |||||
| } | |||||
| for (int i = 0; i < channels; i++) { | |||||
| float min = FLT_MAX; | |||||
| float max = -FLT_MAX; | |||||
| // find min and max | |||||
| for (size_t j = 0; j < one_filter_size; j++) { | |||||
| auto index = j + i * one_filter_size; | |||||
| if (index >= elem_count) { | |||||
| MS_LOG(ERROR) << "over flow!"; | |||||
| return RET_ERROR; | |||||
| } | |||||
| min = std::min(min, raw_datas[index]); | |||||
| max = std::max(max, raw_datas[index]); | |||||
| } | } | ||||
| size_t one_filter_size = elem_count / channels; | |||||
| for (int i = 0; i < channels; i++) { | |||||
| float min = FLT_MAX; | |||||
| float max = -FLT_MAX; | |||||
| // find min and max | |||||
| for (size_t j = 0; j < one_filter_size; j++) { | |||||
| auto index = j + i * one_filter_size; | |||||
| if (index >= elem_count) { | |||||
| MS_LOG(ERROR) << "over flow!"; | |||||
| return RET_ERROR; | |||||
| } | |||||
| min = std::min(min, raw_datas[index]); | |||||
| max = std::max(max, raw_datas[index]); | |||||
| schema::QuantParamT quant_param; | |||||
| STATUS status = CalQuantizationParams(&quant_param, min, max, false, quant_max, quant_min, bit_num); | |||||
| if (status != RET_OK) { | |||||
| MS_LOG(ERROR) << "CalQuantizationParams failed" << status; | |||||
| return status; | |||||
| } | |||||
| // do quantization | |||||
| double average_dequant = 0; | |||||
| double average_raw = 0; | |||||
| for (uint32_t j = 0; j < one_filter_size; j++) { | |||||
| auto index = j + i * one_filter_size; | |||||
| if (index >= elem_count) { | |||||
| MS_LOG(ERROR) << "over flow!"; | |||||
| return RET_ERROR; | |||||
| } | } | ||||
| schema::QuantParamT quant_param; | |||||
| STATUS status = CalQuantizationParams(&quant_param, min, max, false, quant_max, quant_min, bitNum); | |||||
| if (status != RET_OK) { | |||||
| MS_LOG(ERROR) << "CalQuantizationParams failed" << status; | |||||
| return status; | |||||
| float raw_data = raw_datas[index]; | |||||
| auto quant_data = QuantizeData<T>(raw_data, quant_param, quant_max, quant_min); | |||||
| (*quant_datas)[index] = quant_data; | |||||
| if (quant_type == QuantType_WeightQuant) { | |||||
| float dequant_data = quant_param.scale * (quant_data - quant_param.zeroPoint); | |||||
| (*dequant_datas)[index] = dequant_data; | |||||
| average_dequant += dequant_data; | |||||
| average_raw += raw_data; | |||||
| } | } | ||||
| // do quantization | |||||
| double average_dequant = 0; | |||||
| double average_raw = 0; | |||||
| } | |||||
| if (quant_type == QuantType_WeightQuant && !k_means) { | |||||
| // mean | |||||
| average_dequant = average_dequant / one_filter_size; | |||||
| average_raw = average_raw / one_filter_size; | |||||
| // std | |||||
| double variance_dequant = 0; | |||||
| double variance_raw = 0; | |||||
| for (uint32_t j = 0; j < one_filter_size; j++) { | for (uint32_t j = 0; j < one_filter_size; j++) { | ||||
| auto index = j + i * one_filter_size; | auto index = j + i * one_filter_size; | ||||
| if (index >= elem_count) { | if (index >= elem_count) { | ||||
| MS_LOG(ERROR) << "over flow!"; | MS_LOG(ERROR) << "over flow!"; | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| float raw_data = raw_datas[index]; | |||||
| auto quant_data = QuantizeData<T>(raw_data, quant_param, quant_max, quant_min); | |||||
| quant_datas[index] = quant_data; | |||||
| if (quantType == QuantType_WeightQuant) { | |||||
| float dequant_data = quant_param.scale * (quant_data - quant_param.zeroPoint); | |||||
| dequant_datas[index] = dequant_data; | |||||
| average_dequant += dequant_data; | |||||
| average_raw += raw_data; | |||||
| } | |||||
| variance_dequant += std::pow((*dequant_datas)[index] - average_dequant, 2); | |||||
| variance_raw += std::pow(raw_datas[index] - average_raw, 2); | |||||
| } | } | ||||
| if (quantType == QuantType_WeightQuant && !k_means) { | |||||
| // mean | |||||
| average_dequant = average_dequant / one_filter_size; | |||||
| average_raw = average_raw / one_filter_size; | |||||
| // std | |||||
| double variance_dequant = 0; | |||||
| double variance_raw = 0; | |||||
| for (uint32_t j = 0; j < one_filter_size; j++) { | |||||
| auto index = j + i * one_filter_size; | |||||
| if (index >= elem_count) { | |||||
| MS_LOG(ERROR) << "over flow!"; | |||||
| return RET_ERROR; | |||||
| } | |||||
| variance_dequant += std::pow(dequant_datas[index] - average_dequant, 2); | |||||
| variance_raw += std::pow(raw_datas[index] - average_raw, 2); | |||||
| } | |||||
| variance_dequant = std::sqrt(variance_dequant / one_filter_size); | |||||
| variance_raw = std::sqrt(variance_raw / one_filter_size); | |||||
| quant_param.varCorr = 1; | |||||
| if (variance_raw != 0 && variance_dequant != 0) { | |||||
| auto temp_var_corr = variance_raw / variance_dequant; | |||||
| if (temp_var_corr > 0 && temp_var_corr < 10) { | |||||
| quant_param.varCorr = temp_var_corr; | |||||
| } else { | |||||
| MS_LOG(WARNING) << "unexpected var_corr: " << temp_var_corr; | |||||
| } | |||||
| variance_dequant = std::sqrt(variance_dequant / one_filter_size); | |||||
| variance_raw = std::sqrt(variance_raw / one_filter_size); | |||||
| quant_param.varCorr = 1; | |||||
| if (variance_raw != 0 && variance_dequant != 0) { | |||||
| auto temp_var_corr = variance_raw / variance_dequant; | |||||
| if (temp_var_corr > 0 && temp_var_corr < 10) { | |||||
| quant_param.varCorr = temp_var_corr; | |||||
| } else { | |||||
| MS_LOG(WARNING) << "unexpected var_corr: " << temp_var_corr; | |||||
| } | } | ||||
| quant_param.meanCorr = average_raw - average_dequant * quant_param.varCorr; | |||||
| } | } | ||||
| quant_params.emplace_back(quant_param); | |||||
| quant_param.meanCorr = average_raw - average_dequant * quant_param.varCorr; | |||||
| } | } | ||||
| auto status = UpdateTensorDataAndSize(weight, quant_datas.data(), quant_datas.size() * sizeof(T)); | |||||
| quant_params->emplace_back(quant_param); | |||||
| } | |||||
| auto status = UpdateTensorDataAndSize(weight, quant_datas->data(), quant_datas->size() * sizeof(T)); | |||||
| if (status != RET_OK) { | |||||
| MS_LOG(ERROR) << "UpdateTensorDataAndSize error"; | |||||
| return RET_ERROR; | |||||
| } | |||||
| return RET_OK; | |||||
| } | |||||
| template <typename T> | |||||
| STATUS DoPerLayerQuant(const ParamValueLitePtr &weight, const QuantType &quant_type, | |||||
| std::vector<schema::QuantParamT> *quant_params, const int &quant_max, const int &quant_min, | |||||
| const size_t &bit_num, const bool &k_means, std::vector<T> *quant_datas) { | |||||
| auto dims = weight->tensor_shape(); | |||||
| size_t elem_count = weight->tensor_shape_size(); | |||||
| auto *raw_datas = static_cast<float *>(weight->tensor_addr()); | |||||
| float min = FLT_MAX; | |||||
| float max = -FLT_MIN; | |||||
| for (uint32_t i = 0; i < elem_count; i++) { | |||||
| // find max min | |||||
| min = std::min(min, raw_datas[i]); | |||||
| max = std::max(max, raw_datas[i]); | |||||
| } | |||||
| schema::QuantParamT quant_param; | |||||
| if (!k_means) { | |||||
| STATUS status = CalQuantizationParams(&quant_param, min, max, false, quant_max, quant_min, bit_num); | |||||
| if (status != RET_OK) { | if (status != RET_OK) { | ||||
| MS_LOG(ERROR) << "UpdateTensorDataAndSize error"; | |||||
| return RET_ERROR; | |||||
| } | |||||
| } else { | |||||
| // per layer | |||||
| float min = FLT_MAX; | |||||
| float max = -FLT_MIN; | |||||
| for (uint32_t i = 0; i < elem_count; i++) { | |||||
| // find max min | |||||
| min = std::min(min, raw_datas[i]); | |||||
| max = std::max(max, raw_datas[i]); | |||||
| MS_LOG(ERROR) << "CalQuantizationParams failed" << status; | |||||
| return status; | |||||
| } | } | ||||
| schema::QuantParamT quant_param; | |||||
| } | |||||
| quant_params->emplace_back(quant_param); | |||||
| // update data and datatype | |||||
| for (uint32_t i = 0; i < elem_count; i++) { | |||||
| float raw_data = raw_datas[i]; | |||||
| if (!k_means) { | if (!k_means) { | ||||
| STATUS status = CalQuantizationParams(&quant_param, min, max, false, quant_max, quant_min, bitNum); | |||||
| if (status != RET_OK) { | |||||
| MS_LOG(ERROR) << "CalQuantizationParams failed" << status; | |||||
| return status; | |||||
| } | |||||
| } | |||||
| quant_params.emplace_back(quant_param); | |||||
| // update data and datatype | |||||
| for (uint32_t i = 0; i < elem_count; i++) { | |||||
| float raw_data = raw_datas[i]; | |||||
| if (!k_means) { | |||||
| auto quant_data = QuantizeData<T>(raw_data, quant_param, quant_max, quant_min); | |||||
| quant_datas[i] = quant_data; | |||||
| } | |||||
| } | |||||
| auto status = UpdateTensorDataAndSize(weight, quant_datas.data(), quant_datas.size() * sizeof(T)); | |||||
| if (status != RET_OK) { | |||||
| MS_LOG(ERROR) << "UpdateTensorDataAndSize error"; | |||||
| return RET_ERROR; | |||||
| auto quant_data = QuantizeData<T>(raw_data, quant_param, quant_max, quant_min); | |||||
| (*quant_datas)[i] = quant_data; | |||||
| } | } | ||||
| } | } | ||||
| // do bit pack | |||||
| if (bitNum != 8 && bitNum != 16) { | |||||
| auto status = UpdateTensorDataAndSize(weight, quant_datas->data(), quant_datas->size() * sizeof(T)); | |||||
| if (status != RET_OK) { | |||||
| MS_LOG(ERROR) << "UpdateTensorDataAndSize error"; | |||||
| return RET_ERROR; | |||||
| } | |||||
| return RET_OK; | |||||
| } | |||||
| template <typename T> | |||||
| STATUS DoBitPack(const ParamValueLitePtr &weight, const size_t &bit_num, const std::vector<T> &quant_datas) { | |||||
| if (bit_num != 8 && bit_num != 16) { | |||||
| std::vector<T> data{}; | std::vector<T> data{}; | ||||
| for (size_t i = 0; i < quant_datas.size(); ++i) { | for (size_t i = 0; i < quant_datas.size(); ++i) { | ||||
| data.emplace_back((static_cast<T>(quant_datas[i]))); | data.emplace_back((static_cast<T>(quant_datas[i]))); | ||||
| } | } | ||||
| if (bitNum > 0 && bitNum < 8) { | |||||
| if (bit_num > 0 && bit_num < 8) { | |||||
| std::vector<uint8_t> pack_data{}; | std::vector<uint8_t> pack_data{}; | ||||
| BitPack::BitPacking<T, uint8_t>(bitNum, data, &pack_data); | |||||
| BitPack::BitPacking<T, uint8_t>(bit_num, data, &pack_data); | |||||
| auto status = UpdateTensorDataAndSize(weight, pack_data.data(), pack_data.size() * sizeof(uint8_t)); | auto status = UpdateTensorDataAndSize(weight, pack_data.data(), pack_data.size() * sizeof(uint8_t)); | ||||
| if (status != RET_OK) { | if (status != RET_OK) { | ||||
| MS_LOG(ERROR) << "UpdateTensorDataAndSize error"; | MS_LOG(ERROR) << "UpdateTensorDataAndSize error"; | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| } else if (bitNum > 8 && bitNum < 16) { | |||||
| } else if (bit_num > 8 && bit_num < 16) { | |||||
| std::vector<uint16_t> pack_data{}; | std::vector<uint16_t> pack_data{}; | ||||
| BitPack::BitPacking<T, uint16_t>(bitNum, data, &pack_data); | |||||
| BitPack::BitPacking<T, uint16_t>(bit_num, data, &pack_data); | |||||
| auto status = UpdateTensorDataAndSize(weight, pack_data.data(), pack_data.size() * sizeof(uint16_t)); | auto status = UpdateTensorDataAndSize(weight, pack_data.data(), pack_data.size() * sizeof(uint16_t)); | ||||
| if (status != RET_OK) { | if (status != RET_OK) { | ||||
| MS_LOG(ERROR) << "UpdateTensorDataAndSize error"; | MS_LOG(ERROR) << "UpdateTensorDataAndSize error"; | ||||
| @@ -322,17 +320,79 @@ STATUS QuantFilter(const ParamValueLitePtr &weight, const std::shared_ptr<Primit | |||||
| } | } | ||||
| } | } | ||||
| } | } | ||||
| return RET_OK; | |||||
| } | |||||
| template <typename T> | |||||
| STATUS QuantFilter(const ParamValueLitePtr &weight, const std::shared_ptr<PrimitiveC> &primitive_c, | |||||
| QuantType quant_type, int quant_max, int quant_min, size_t bit_num, bool per_channel, int index = 1, | |||||
| bool k_means = false) { | |||||
| MS_ASSERT(weight != nullptr); | |||||
| MS_ASSERT(primitive_c != nullptr); | |||||
| auto dims = weight->tensor_shape(); | |||||
| if (per_channel) { | |||||
| if (dims.size() <= 1) { | |||||
| MS_LOG(WARNING) << "dims is " << dims.size() << " can not per_channel"; | |||||
| per_channel = false; | |||||
| } | |||||
| } | |||||
| std::vector<schema::QuantParamT> quant_params; | |||||
| size_t elem_count = weight->tensor_shape_size(); | |||||
| auto *raw_datas = static_cast<float *>(weight->tensor_addr()); | |||||
| if (raw_datas == nullptr) { | |||||
| MS_LOG(ERROR) << "rawDatas is nullptr"; | |||||
| return RET_ERROR; | |||||
| } | |||||
| std::vector<T> quant_datas(elem_count); | |||||
| std::vector<float> dequant_datas(elem_count); | |||||
| int ret = RET_OK; | |||||
| if (per_channel) { | |||||
| // notice: assume Con2D\DepthwiseConv2D's weight format are same: KHWC | |||||
| // channel at first | |||||
| ret = DoPerChannelQuant<T>(weight, quant_type, &quant_params, quant_max, quant_min, bit_num, k_means, &quant_datas, | |||||
| &dequant_datas); | |||||
| if (ret == RET_CONTINUE) { | |||||
| return ret; | |||||
| } else if (ret != RET_OK) { | |||||
| MS_LOG(ERROR) << "Do per channel quant failed."; | |||||
| return ret; | |||||
| } | |||||
| } else { | |||||
| ret = DoPerLayerQuant<T>(weight, quant_type, &quant_params, quant_max, quant_min, bit_num, k_means, &quant_datas); | |||||
| if (ret != RET_OK) { | |||||
| MS_LOG(ERROR) << "Do per layer quant failed."; | |||||
| return ret; | |||||
| } | |||||
| } | |||||
| #ifdef HUFFMAN_ENCODE | |||||
| auto huffman_encode = std::make_unique<lite::HuffmanEncode>(); | |||||
| ret = huffman_encode->DoHuffmanEncode(weight, primitive_c, quant_datas.data(), bit_num); | |||||
| if (ret != RET_OK) { | |||||
| MS_LOG(ERROR) << "Do huffman encode failed."; | |||||
| return ret; | |||||
| } | |||||
| #else | |||||
| // do bit pack | |||||
| ret = DoBitPack(weight, bit_num, quant_datas); | |||||
| if (ret != RET_OK) { | |||||
| MS_LOG(ERROR) << "Do bit pack failed."; | |||||
| return ret; | |||||
| } | |||||
| #endif | |||||
| if (quant_params.empty()) { | if (quant_params.empty()) { | ||||
| MS_LOG(ERROR) << "quant_params empty"; | MS_LOG(ERROR) << "quant_params empty"; | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| if (quantType == QuantType_PostTraining) { | |||||
| if (quant_type == QuantType_PostTraining) { | |||||
| primitive_c->AddInputQuantParam(quant_params); | primitive_c->AddInputQuantParam(quant_params); | ||||
| } else { | } else { | ||||
| primitive_c->set_input_quant_param(index, quant_params); | primitive_c->set_input_quant_param(index, quant_params); | ||||
| } | } | ||||
| return RET_OK; | |||||
| return ret; | |||||
| } | } | ||||
| // utils | // utils | ||||
| @@ -109,7 +109,9 @@ STATUS WeightQuantizer::DoConvQuantize(CNodePtr cnode) { | |||||
| status = | status = | ||||
| QuantFilter<int16_t>(param_value, primitive_c, QuantType_WeightQuant, quant_max_, quant_min_, bit_num_, true); | QuantFilter<int16_t>(param_value, primitive_c, QuantType_WeightQuant, quant_max_, quant_min_, bit_num_, true); | ||||
| } | } | ||||
| if (status != RET_OK) { | |||||
| if (status == RET_CONTINUE) { | |||||
| return RET_OK; | |||||
| } else if (status != RET_OK) { | |||||
| MS_LOG(ERROR) << "QuantFilter failed : " << status; | MS_LOG(ERROR) << "QuantFilter failed : " << status; | ||||
| return status; | return status; | ||||
| } | } | ||||
| @@ -173,7 +175,9 @@ STATUS WeightQuantizer::DoMulQuantize(CNodePtr cnode) { | |||||
| status = | status = | ||||
| QuantFilter<int16_t>(param_value, primitive_c, QuantType_WeightQuant, quant_max_, quant_min_, bit_num_, true); | QuantFilter<int16_t>(param_value, primitive_c, QuantType_WeightQuant, quant_max_, quant_min_, bit_num_, true); | ||||
| } | } | ||||
| if (status != RET_OK) { | |||||
| if (status == RET_CONTINUE) { | |||||
| return RET_OK; | |||||
| } else if (status != RET_OK) { | |||||
| MS_LOG(ERROR) << "QuantFilter failed : " << status; | MS_LOG(ERROR) << "QuantFilter failed : " << status; | ||||
| return status; | return status; | ||||
| } | } | ||||
| @@ -246,7 +250,9 @@ STATUS WeightQuantizer::DoGatherQuantize(CNodePtr cnode) { | |||||
| status = | status = | ||||
| QuantFilter<int16_t>(param_value, primitive_c, QuantType_WeightQuant, quant_max_, quant_min_, bit_num_, false, 0); | QuantFilter<int16_t>(param_value, primitive_c, QuantType_WeightQuant, quant_max_, quant_min_, bit_num_, false, 0); | ||||
| } | } | ||||
| if (status != RET_OK) { | |||||
| if (status == RET_CONTINUE) { | |||||
| return RET_OK; | |||||
| } else if (status != RET_OK) { | |||||
| MS_LOG(ERROR) << "QuantFilter failed : " << status; | MS_LOG(ERROR) << "QuantFilter failed : " << status; | ||||
| return status; | return status; | ||||
| } | } | ||||
| @@ -286,7 +292,9 @@ STATUS WeightQuantizer::ProcessLstmWeightByIndex(const CNodePtr &cnode, const st | |||||
| status = QuantFilter<int16_t>(param_value, primitive_c, QuantType_WeightQuant, quant_max_, quant_min_, bit_num_, | status = QuantFilter<int16_t>(param_value, primitive_c, QuantType_WeightQuant, quant_max_, quant_min_, bit_num_, | ||||
| false, index - 1); | false, index - 1); | ||||
| } | } | ||||
| if (status != RET_OK) { | |||||
| if (status == RET_CONTINUE) { | |||||
| return RET_OK; | |||||
| } else if (status != RET_OK) { | |||||
| MS_LOG(ERROR) << "QuantFilter failed : " << status; | MS_LOG(ERROR) << "QuantFilter failed : " << status; | ||||
| return status; | return status; | ||||
| } | } | ||||
| @@ -503,7 +511,9 @@ STATUS WeightQuantizer::TryQuant(const int &bit_num_t, const ParameterPtr ¶m | |||||
| MS_LOG(ERROR) << "unexpected type_id_: " << type_id_; | MS_LOG(ERROR) << "unexpected type_id_: " << type_id_; | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| if (status != RET_OK) { | |||||
| if (status == RET_CONTINUE) { | |||||
| return RET_OK; | |||||
| } else if (status != RET_OK) { | |||||
| MS_LOG(ERROR) << "quant filter failed."; | MS_LOG(ERROR) << "quant filter failed."; | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||