diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt index 964d7d383..702fd275b 100644 --- a/tools/CMakeLists.txt +++ b/tools/CMakeLists.txt @@ -2,7 +2,17 @@ add_subdirectory(caffe) add_subdirectory(mxnet) add_subdirectory(onnx) -# add_subdirectory(quantize) + +find_package(OpenCV QUIET COMPONENTS core highgui imgproc imgcodecs) +if(NOT OpenCV_FOUND) + find_package(OpenCV QUIET COMPONENTS core highgui imgproc) +endif() + +if(OpenCV_FOUND) + add_subdirectory(quantize) +else() + message(WARNING "OpenCV not found, quantize tools won't be built") +endif() add_executable(ncnn2mem ncnn2mem.cpp) diff --git a/tools/quantize/ncnn2int8.cpp b/tools/quantize/ncnn2int8.cpp index ae2678c23..73557f173 100755 --- a/tools/quantize/ncnn2int8.cpp +++ b/tools/quantize/ncnn2int8.cpp @@ -12,18 +12,15 @@ // CONDITIONS OF ANY KIND, either express or implied. See the License for the // specific language governing permissions and limitations under the License. -#include -#include -#include -#include -#include +#ifdef _MSC_VER +#define _CRT_SECURE_NO_DEPRECATE +#endif -#include +#include +#include #include #include -#include #include -#include // ncnn public header #include "net.h" @@ -90,57 +87,57 @@ static bool read_int8scale_table(const char* filepath, std::map scales; - char *line = NULL; - char *pch = NULL; + std::vectorline(102400); + char *pch = nullptr; size_t len = 0; - ssize_t read; - while ((read = getline(&line, &len, fp)) != -1) + while (nullptr != std::fgets(line.data(), static_cast(line.size()), fp)) { - float scale = 1.f; char key[256]; - line[strcspn(line, "\r\n")] = 0; - pch = strtok (line, " "); - if (pch == NULL) break; + line[strcspn(line.data(), "\r\n")] = 0; + + pch = strtok(line.data(), " "); - bool iskey = 1; - while (pch != NULL) + if (pch == nullptr) break; + + bool is_key = true; + while (pch != nullptr) { - if (iskey) + if (is_key) { sscanf(pch, "%255s", key); - keystr = key; - iskey = 0; + + key_str = key; + is_key = false; } else { sscanf(pch, "%f", &scale); + scales.push_back(scale); } - pch = strtok (NULL, " "); + pch = strtok(nullptr, " "); } // XYZ_param_N pattern - if (strstr(keystr.c_str(), "_param_")) + if (strstr(key_str.c_str(), "_param_")) { - weight_int8scale_table[ keystr ] = scales; + weight_int8scale_table[key_str] = scales; } else { - blob_int8scale_table[ keystr ] = scales; + blob_int8scale_table[key_str] = scales; } - keystr.clear(); + key_str.clear(); scales.clear(); } @@ -155,7 +152,7 @@ public: // 0=fp32 1=fp16 2=int8 int storage_type; std::map > blob_int8scale_table; - std::map > weight_int8scale_table; + std::map > weight_int8scale_table; public: int quantize_convolution(); @@ -174,29 +171,30 @@ public: int NetQuantize::quantize_convolution() { - const int layer_count = layers.size(); - for (int i=0; i(layers.size()); + for (int i = 0; i < layer_count; i++) { // find convoultion layer if (layers[i]->type != "Convolution") continue; // find convolution layer - std::map >::iterator iter_data = blob_int8scale_table.find(layers[i]->name); + auto iter_data = blob_int8scale_table.find(layers[i]->name); if (iter_data == blob_int8scale_table.end()) continue; char key[256]; sprintf(key, "%s_param_0", layers[i]->name.c_str()); - std::map >::iterator iter = weight_int8scale_table.find(key); + + auto iter = weight_int8scale_table.find(key); if (iter == weight_int8scale_table.end()) { fprintf(stderr, "this layer need to be quantized, but no scale param!\n"); return -1; } - + // Convolution - quantize weight from fp32 to int8 - ncnn::Convolution* convolution = (ncnn::Convolution*)layers[i]; + auto convolution = (ncnn::Convolution*)layers[i]; std::vector weight_data_int8_scales = iter->second; @@ -210,7 +208,7 @@ int NetQuantize::quantize_convolution() const int weight_data_size_output = convolution->weight_data_size / convolution->num_output; // quantize weight to int8 - for (int n=0; nnum_output; n++) + for (int n = 0; n < convolution->num_output; n++) { ncnn::Layer* op = ncnn::create_layer(ncnn::LayerType::Quantize); @@ -240,29 +238,30 @@ int NetQuantize::quantize_convolution() int NetQuantize::quantize_convolutiondepthwise() { - const int layer_count = layers.size(); - for (int i=0; i(layers.size()); + for (int i = 0; i < layer_count; i++) { // find convoultion layer if (layers[i]->type != "ConvolutionDepthWise") continue; // find convolutiondepthwise layer - std::map >::iterator iter_data = blob_int8scale_table.find(layers[i]->name); + auto iter_data = blob_int8scale_table.find(layers[i]->name); if (iter_data == blob_int8scale_table.end()) continue; char key[256]; sprintf(key, "%s_param_0", layers[i]->name.c_str()); - std::map >::iterator iter = weight_int8scale_table.find(key); + + auto iter = weight_int8scale_table.find(key); if (iter == weight_int8scale_table.end()) { fprintf(stderr, "this layer need to be quantized, but no scale param!\n"); return -1; } - + // Convolution - quantize weight from fp32 to int8 - ncnn::ConvolutionDepthWise* convdw = (ncnn::ConvolutionDepthWise*)layers[i]; + auto convdw = (ncnn::ConvolutionDepthWise*)layers[i]; std::vector weight_data_int8_scales = iter->second; @@ -276,7 +275,7 @@ int NetQuantize::quantize_convolutiondepthwise() const int weight_data_size_output = convdw->weight_data_size / convdw->group; // quantize weight to int8 - for (int n=0; ngroup; n++) + for (int n = 0; n < convdw->group; n++) { ncnn::Layer* op = ncnn::create_layer(ncnn::LayerType::Quantize); @@ -306,29 +305,30 @@ int NetQuantize::quantize_convolutiondepthwise() int NetQuantize::quantize_innerproduct() { - const int layer_count = layers.size(); - for (int i=0; i(layers.size()); + for (int i = 0; i < layer_count; i++) { // find convoultion layer if (layers[i]->type != "InnerProduct") continue; // find InnerProduct layer - std::map >::iterator iter_data = blob_int8scale_table.find(layers[i]->name); + auto iter_data = blob_int8scale_table.find(layers[i]->name); if (iter_data == blob_int8scale_table.end()) continue; char key[256]; sprintf(key, "%s_param_0", layers[i]->name.c_str()); - std::map >::iterator iter = weight_int8scale_table.find(key); + + auto iter = weight_int8scale_table.find(key); if (iter == weight_int8scale_table.end()) { fprintf(stderr, "this layer need to be quantized, but no scale param!\n"); return -1; } - + // InnerProduct - quantize weight from fp32 to int8 - ncnn::InnerProduct* fc = (ncnn::InnerProduct*)layers[i]; + auto fc = (ncnn::InnerProduct*)layers[i]; std::vector weight_data_int8_scales = iter->second; @@ -342,7 +342,7 @@ int NetQuantize::quantize_innerproduct() const int weight_data_size_output = fc->weight_data_size / fc->num_output; // quantize weight to int8 - for (int n=0; nnum_output; n++) + for (int n = 0; n < fc->num_output; n++) { ncnn::Layer* op = ncnn::create_layer(ncnn::LayerType::Quantize); @@ -376,7 +376,7 @@ int NetQuantize::fprintf_param_int_array(int id, const ncnn::Mat& m, FILE* pp) const int* ptr = m; fprintf(pp, " -%d=%d", 23300 + id, count); - for (int i=0; i(alignSize(nwrite, 4)); + unsigned char padding[4] = { 0x00, 0x00, 0x00, 0x00 }; fwrite(padding, sizeof(unsigned char), nalign - nwrite, bp); return 0; @@ -433,8 +433,8 @@ int NetQuantize::fwrite_weight_data(const ncnn::Mat& data, FILE* bp) // padding to 32bit align int nwrite = ftell(bp) - p0; - int nalign = alignSize(nwrite, 4); - unsigned char padding[4] = {0x00, 0x00, 0x00, 0x00}; + int nalign = static_cast(alignSize(nwrite, 4)); + unsigned char padding[4] = { 0x00, 0x00, 0x00, 0x00 }; fwrite(padding, sizeof(unsigned char), nalign - nwrite, bp); return 0; @@ -447,11 +447,11 @@ int NetQuantize::save(const char* parampath, const char* binpath) fprintf(pp, "7767517\n"); - const int layer_count = layers.size(); + const int layer_count = static_cast(layers.size()); int layer_count_fused = 0; std::set blob_names; - for (int i=0; itype == "ncnnfused") @@ -459,42 +459,42 @@ int NetQuantize::save(const char* parampath, const char* binpath) layer_count_fused++; - int bottom_count = layer->bottoms.size(); - for (int j=0; j(layer->bottoms.size()); + for (int j = 0; j < bottom_count; j++) { int bottom_blob_index = layer->bottoms[j]; blob_names.insert(blobs[bottom_blob_index].name); } - int top_count = layer->tops.size(); - for (int j=0; j(layer->tops.size()); + for (int j = 0; j < top_count; j++) { int top_blob_index = layer->tops[j]; blob_names.insert(blobs[top_blob_index].name); } } - int blob_count_fused = blob_names.size(); + int blob_count_fused = static_cast(blob_names.size()); fprintf(pp, "%d %d\n", layer_count_fused, blob_count_fused); - for (int i=0; itype == "ncnnfused") continue; - int bottom_count = layer->bottoms.size(); - int top_count = layer->tops.size(); + int bottom_count = static_cast(layer->bottoms.size()); + int top_count = static_cast(layer->tops.size()); fprintf(pp, "%-24s %-24s %d %d", layer->type.c_str(), layer->name.c_str(), bottom_count, top_count); - for (int j=0; jbottoms[j]; fprintf(pp, " %s", blobs[bottom_blob_index].name.c_str()); } - for (int j=0; jtops[j]; fprintf(pp, " %s", blobs[top_blob_index].name.c_str()); @@ -581,12 +581,13 @@ int NetQuantize::save(const char* parampath, const char* binpath) // write int8_scale data if (op->int8_scale_term) - { + { std::vector weight_int8scale; std::vector blob_int8scale; char key[256]; - sprintf(key, "%s_param_0", layer->name.c_str()); + sprintf(key, "%s_param_0", layers[i]->name.c_str()); + if (weight_int8scale_table.find(std::string(key)) != weight_int8scale_table.end()) { weight_int8scale = weight_int8scale_table[std::string(key)]; @@ -630,12 +631,13 @@ int NetQuantize::save(const char* parampath, const char* binpath) // write int8_scale data if (op->int8_scale_term) - { + { std::vector weight_int8scale; std::vector blob_int8scale; char key[256]; - sprintf(key, "%s_param_0", layer->name.c_str()); + sprintf(key, "%s_param_0", layers[i]->name.c_str()); + if (weight_int8scale_table.find(std::string(key)) != weight_int8scale_table.end()) { weight_int8scale = weight_int8scale_table[std::string(key)]; @@ -649,7 +651,7 @@ int NetQuantize::save(const char* parampath, const char* binpath) // write int8_scale data fwrite(weight_int8scale.data(), sizeof(float), weight_int8scale.size(), bp); fwrite(blob_int8scale.data(), sizeof(float), blob_int8scale.size(), bp); - } + } } else if (layer->type == "Crop") { @@ -781,12 +783,13 @@ int NetQuantize::save(const char* parampath, const char* binpath) // write int8_scale data if (op->int8_scale_term) - { + { std::vector weight_int8scale; std::vector blob_int8scale; char key[256]; - sprintf(key, "%s_param_0", layer->name.c_str()); + sprintf(key, "%s_param_0", layers[i]->name.c_str()); + if (weight_int8scale_table.find(std::string(key)) != weight_int8scale_table.end()) { weight_int8scale = weight_int8scale_table[std::string(key)]; @@ -800,7 +803,7 @@ int NetQuantize::save(const char* parampath, const char* binpath) // write int8_scale data fwrite(weight_int8scale.data(), sizeof(float), weight_int8scale.size(), bp); fwrite(blob_int8scale.data(), sizeof(float), blob_int8scale.size(), bp); - } + } } else if (layer->type == "Input") { @@ -880,7 +883,7 @@ int NetQuantize::save(const char* parampath, const char* binpath) fprintf_param_value(" 3=%d", scale_data_size) fprintf_param_value(" 4=%d", across_channel) - fwrite_weight_data(op->scale_data, bp); + fwrite_weight_data(op->scale_data, bp); } else if (layer->type == "Padding") { @@ -1170,7 +1173,7 @@ int main(int argc, char** argv) quantizer.load_param(inparam); quantizer.load_model(inbin); - + quantizer.quantize_convolution(); quantizer.quantize_convolutiondepthwise(); quantizer.quantize_innerproduct(); diff --git a/tools/quantize/ncnn2table.cpp b/tools/quantize/ncnn2table.cpp index 40d3d0efc..8a7339e8f 100755 --- a/tools/quantize/ncnn2table.cpp +++ b/tools/quantize/ncnn2table.cpp @@ -1,912 +1,944 @@ -// BUG1989 is pleased to support the open source community by supporting ncnn available. -// -// author:BUG1989 (https://github.com/BUG1989/) Long-term support. -// author:JansonZhu (https://github.com/JansonZhu) Implemented the function of entropy calibration. -// -// Copyright (C) 2019 BUG1989. All rights reserved. -// -// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except -// in compliance with the License. You may obtain a copy of the License at -// -// https://opensource.org/licenses/BSD-3-Clause -// -// Unless required by applicable law or agreed to in writing, software distributed -// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR -// CONDITIONS OF ANY KIND, either express or implied. See the License for the -// specific language governing permissions and limitations under the License. - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -// ncnn public header -#include "platform.h" -#include "net.h" -#include "cpu.h" -#include "benchmark.h" - -// ncnn private header -#include "layer/convolution.h" -#include "layer/convolutiondepthwise.h" -#include "layer/innerproduct.h" - -static ncnn::Option g_default_option; -static ncnn::UnlockedPoolAllocator g_blob_pool_allocator; -static ncnn::PoolAllocator g_workspace_pool_allocator; - -// Get the filenames from direct path -int parse_images_dir(const char *base_path, std::vector& file_path) -{ - DIR *dir; - struct dirent *ptr; - - if ((dir=opendir(base_path)) == NULL) - { - perror("Open dir error..."); - exit(1); - } - - while ((ptr=readdir(dir)) != NULL) - { - if(strcmp(ptr->d_name,".")==0 || strcmp(ptr->d_name,"..")==0) ///current dir OR parrent dir - { - continue; - } - - std::string path = base_path; - file_path.push_back(path + ptr->d_name); - } - closedir(dir); - - return 0; -} - -class QuantNet : public ncnn::Net -{ -public: - int get_conv_names(); - int get_conv_bottom_blob_names(); - int get_conv_weight_blob_scales(); - int get_input_names(); - -public: - std::vector conv_names; - std::map conv_bottom_blob_names; - std::map > weight_scales; - std::vector input_names; -}; - -int QuantNet::get_input_names() -{ - for (size_t i=0; itype == "Input") - { - for (size_t j=0; jtops.size(); j++) - { - int blob_index = layer->tops[j]; - std::string name = blobs[blob_index].name.c_str(); - input_names.push_back(name); - } - } - } - - return 0; -} - -int QuantNet::get_conv_names() -{ - for (size_t i=0; itype == "Convolution" || layer->type == "ConvolutionDepthWise" || layer->type == "InnerProduct") - { - std::string name = layer->name; - conv_names.push_back(name); - } - } - - return 0; -} - -int QuantNet::get_conv_bottom_blob_names() -{ - // find conv bottom name or index - for (size_t i=0; itype == "Convolution" || layer->type == "ConvolutionDepthWise" || layer->type == "InnerProduct") - { - std::string name = layer->name; - std::string bottom_blob_name = blobs[layer->bottoms[0]].name; - conv_bottom_blob_names[name] = bottom_blob_name; - } - } - - return 0; -} - -int QuantNet::get_conv_weight_blob_scales() -{ - for (size_t i=0; itype == "Convolution") - { - std::string name = layer->name; - const int weight_data_size_output = ((ncnn::Convolution*)layer)->weight_data_size / ((ncnn::Convolution*)layer)->num_output; - std::vector scales; - - // int8 winograd F43 needs weight data to use 6bit quantization - bool quant_6bit = false; - int kernel_w = ((ncnn::Convolution*)layer)->kernel_w; - int kernel_h = ((ncnn::Convolution*)layer)->kernel_h; - int dilation_w = ((ncnn::Convolution*)layer)->dilation_w; - int dilation_h = ((ncnn::Convolution*)layer)->dilation_h; - int stride_w = ((ncnn::Convolution*)layer)->stride_w; - int stride_h = ((ncnn::Convolution*)layer)->stride_h; - - if (kernel_w == 3 && kernel_h == 3 && dilation_w == 1 && dilation_h == 1 && stride_w == 1 && stride_h == 1) - quant_6bit = true; - - for (int n=0; n<((ncnn::Convolution*)layer)->num_output; n++) - { - const ncnn::Mat weight_data_n = ((ncnn::Convolution*)layer)->weight_data.range(weight_data_size_output * n, weight_data_size_output); - const float *data_n = weight_data_n; - float max_value = std::numeric_limits::min(); - - for (int i = 0; i < weight_data_size_output; i++) - max_value = std::max(max_value, std::fabs(data_n[i])); - - if (quant_6bit) - scales.push_back(31 / max_value); - else - scales.push_back(127 / max_value); - } - - weight_scales[name] = scales; - } - - if (layer->type == "ConvolutionDepthWise") - { - std::string name = layer->name; - const int weight_data_size_output = ((ncnn::ConvolutionDepthWise*)layer)->weight_data_size / ((ncnn::ConvolutionDepthWise*)layer)->group; - std::vector scales; - - for (int n=0; n<((ncnn::ConvolutionDepthWise*)layer)->group; n++) - { - const ncnn::Mat weight_data_n = ((ncnn::ConvolutionDepthWise*)layer)->weight_data.range(weight_data_size_output * n, weight_data_size_output); - const float *data_n = weight_data_n; - float max_value = std::numeric_limits::min(); - - for (int i = 0; i < weight_data_size_output; i++) - max_value = std::max(max_value, std::fabs(data_n[i])); - - scales.push_back(127 / max_value); - } - - weight_scales[name] = scales; - } - - if (layer->type == "InnerProduct") - { - std::string name = layer->name; - const int weight_data_size_output = ((ncnn::InnerProduct*)layer)->weight_data_size / ((ncnn::InnerProduct*)layer)->num_output; - std::vector scales; - - for (int n=0; n<((ncnn::InnerProduct*)layer)->num_output; n++) - { - const ncnn::Mat weight_data_n = ((ncnn::InnerProduct*)layer)->weight_data.range(weight_data_size_output * n, weight_data_size_output); - const float *data_n = weight_data_n; - float max_value = std::numeric_limits::min(); - - for (int i = 0; i < weight_data_size_output; i++) - max_value = std::max(max_value, std::fabs(data_n[i])); - - scales.push_back(127 / max_value); - } - - weight_scales[name] = scales; - } - } - - return 0; -} - -class QuantizeData -{ -public: - QuantizeData(std::string layer_name, int num); - - int initial_blob_max(ncnn::Mat data); - int initial_histogram_interval(); - int initial_histogram_value(); - - int normalize_histogram(); - int update_histogram(ncnn::Mat data); - - float compute_kl_divergence(const std::vector &dist_a, const std::vector &dist_b); - int threshold_distribution(const std::vector &distribution, const int target_bin=128); - float get_data_blob_scale(); - -public: - std::string name; - - float max_value; - int num_bins; - float histogram_interval; - std::vector histogram; - - float threshold; - int threshold_bin; - float scale; -}; - -QuantizeData::QuantizeData(std::string layer_name, int num) -{ - name = layer_name; - max_value = 0.0; - num_bins = num; - histogram_interval = 0.0; - histogram.resize(num_bins); - initial_histogram_value(); -} - -int QuantizeData::initial_blob_max(ncnn::Mat data) -{ - int channel_num = data.c; - int size = data.w * data.h; - - for (int q=0; q(std::abs(data_n[i]) / histogram_interval), 2047); - - histogram[index]++; - } - } - - return 0; -} - -float QuantizeData::compute_kl_divergence(const std::vector &dist_a, const std::vector &dist_b) -{ - const int length = dist_a.size(); - assert(dist_b.size() == length); - float result = 0; - - for (int i=0; i &distribution, const int target_bin) -{ - int target_threshold = target_bin; - float min_kl_divergence = 1000; - const int length = distribution.size(); - - std::vector quantize_distribution(target_bin); - - float threshold_sum = 0; - for (int threshold=target_bin; threshold t_distribution(distribution.begin(), distribution.begin()+threshold); - - t_distribution[threshold-1] += threshold_sum; - threshold_sum -= distribution[threshold]; - - // get P - fill(quantize_distribution.begin(), quantize_distribution.end(), 0); - - const float num_per_bin = static_cast(threshold) / target_bin; - - for (int i=0; i start) - { - const float left_scale = left_upper - start; - quantize_distribution[i] += left_scale * distribution[left_upper - 1]; - } - - const int right_lower = floor(end); - - if (right_lower < end) - { - - const float right_scale = end - right_lower; - quantize_distribution[i] += right_scale * distribution[right_lower]; - } - - for (int j=left_upper; j expand_distribution(threshold, 0); - - for (int i=0; i start) - { - left_scale = left_upper - start; - if (distribution[left_upper - 1] != 0) - { - count += left_scale; - } - } - - const int right_lower = floor(end); - float right_scale = 0; - if (right_lower < end) - { - right_scale = end - right_lower; - if (distribution[right_lower] != 0) - { - count += right_scale; - } - } - - for (int j=left_upper; j start) - { - if (distribution[left_upper - 1] != 0) - { - expand_distribution[left_upper - 1] += expand_value * left_scale; - } - } - if (right_lower < end) - { - if (distribution[right_lower] != 0) - { - expand_distribution[right_lower] += expand_value * right_scale; - } - } - for (int j=left_upper; j filenames, const char* param_path, const char* bin_path, const char* table_path, struct PreParam per_param) -{ - int size = filenames.size(); - - QuantNet net; - net.opt = g_default_option; - - net.load_param(param_path); - net.load_model(bin_path); - - float mean_vals[3], norm_vals[3]; - int weith = per_param.weith; - int height = per_param.height; - bool swapRB = per_param.swapRB; - - mean_vals[0] = per_param.mean[0]; - mean_vals[1] = per_param.mean[1]; - mean_vals[2] = per_param.mean[2]; - - norm_vals[0] = per_param.norm[0]; - norm_vals[1] = per_param.norm[1]; - norm_vals[2] = per_param.norm[2]; - - g_blob_pool_allocator.clear(); - g_workspace_pool_allocator.clear(); - - net.get_input_names(); - net.get_conv_names(); - net.get_conv_bottom_blob_names(); - net.get_conv_weight_blob_scales(); - - if (net.input_names.size() <= 0) - { - fprintf(stderr, "not found [Input] Layer, Check your ncnn.param \n"); - return -1; - } - - FILE *fp=fopen(table_path, "w"); - - // save quantization scale of weight - printf("====> Quantize the parameters.\n"); - for (size_t i=0; i weight_scale_n = net.weight_scales[layer_name]; - - fprintf(fp, "%s_param_0 ", layer_name.c_str()); - for (size_t j=0; j quantize_datas; - - for (size_t i=0; i Quantize the activation.\n"); - printf(" ====> step 1 : find the max value.\n"); - - for (size_t i=0; i 2 - cv::Mat bgr = cv::imread(img_name, cv::IMREAD_COLOR); -#else - cv::Mat bgr = cv::imread(img_name, CV_LOAD_IMAGE_COLOR); -#endif - if (bgr.empty()) - { - fprintf(stderr, "cv::imread %s failed\n", img_name.c_str()); - return -1; - } - - ncnn::Mat in = ncnn::Mat::from_pixels_resize(bgr.data, swapRB ? ncnn::Mat::PIXEL_BGR2RGB : ncnn::Mat::PIXEL_BGR, bgr.cols, bgr.rows, weith, height); - in.substract_mean_normalize(mean_vals, norm_vals); - - ncnn::Extractor ex = net.create_extractor(); - ex.input(net.input_names[0].c_str(), in); - - for (size_t i=0; i step 2 : generate the histogram_interval.\n"); - for (size_t i=0; i step 3 : generate the histogram.\n"); - for (size_t i=0; i 2 - cv::Mat bgr = cv::imread(img_name, cv::IMREAD_COLOR); -#else - cv::Mat bgr = cv::imread(img_name, CV_LOAD_IMAGE_COLOR); -#endif - if (bgr.empty()) - { - fprintf(stderr, "cv::imread %s failed\n", img_name.c_str()); - return -1; - } - - ncnn::Mat in = ncnn::Mat::from_pixels_resize(bgr.data, swapRB ? ncnn::Mat::PIXEL_BGR2RGB : ncnn::Mat::PIXEL_BGR, bgr.cols, bgr.rows, weith, height); - in.substract_mean_normalize(mean_vals, norm_vals); - - ncnn::Extractor ex = net.create_extractor(); - ex.input(net.input_names[0].c_str(), in); - - for (size_t i=0; i step 4 : using kld to find the best threshold value.\n"); - for (size_t i=0; i Save the calibration table done.\n"); - - return 0; -} - -// usage -void showUsage() -{ - std::cout << "usage: ncnn2table [-h] [-p] [-b] [-o] [-m] [-n] [-s] [-t]" << std::endl; - std::cout << " -h, --help show this help message and exit" << std::endl; - std::cout << " -p, --param path to ncnn.param file" << std::endl; - std::cout << " -b, --bin path to ncnn.bin file" << std::endl; - std::cout << " -i, --images path to calibration images" << std::endl; - std::cout << " -o, --output path to output calibration tbale file" << std::endl; - std::cout << " -m, --mean value of mean" << std::endl; - std::cout << " -n, --norm value of normalize(scale value,defualt is 1)" << std::endl; - std::cout << " -s, --size the size of input image(using the resize the original image,default is w=224,h=224)" << std::endl; - std::cout << " -c --swapRB flag which indicates that swap first and last channels in 3-channel image is necessary" << std::endl; - std::cout << " -t, --thread number of threads(defalut is 1)" << std::endl; - std::cout << "example: ./ncnn2table --param squeezenet-fp32.param --bin squeezenet-fp32.bin --images images/ --output squeezenet.table --mean 104,117,123 --norm 1,1,1 --size 227,227 --swapRB --thread 2" << std::endl; -} - -// string.split('x') -std::vector split(const std::string &str,const std::string &pattern) -{ - //const char* convert to char* - char * strc = new char[strlen(str.c_str())+1]; - strcpy(strc, str.c_str()); - std::vector resultVec; - char* tmpStr = strtok(strc, pattern.c_str()); - while (tmpStr != NULL) - { - resultVec.push_back(std::string(tmpStr)); - tmpStr = strtok(NULL, pattern.c_str()); - } - - delete[] strc; - - return resultVec; -} - -int main(int argc, char** argv) -{ - std::cout << "--- ncnn post training quantization tool --- " << __TIME__ << " " << __DATE__ << std::endl; - - char* imagepath = NULL; - char* parampath = NULL; - char* binpath = NULL; - char* tablepath = NULL; - int num_threads = 1; - - struct PreParam pre_param = { - .mean = {104.f, 117.f, 103.f}, - .norm = {1.f, 1.f, 1.f}, - .weith = 224, - .height =224, - .swapRB = false - }; - - int c; - - while (1) - { - int option_index = 0; - static struct option long_options[] = - { - {"param", required_argument, 0, 'p' }, - {"bin", required_argument, 0, 'b' }, - {"images", required_argument, 0, 'i' }, - {"output", required_argument, 0, 'o' }, - {"mean", required_argument, 0, 'm' }, - {"norm", required_argument, 0, 'n' }, - {"size", required_argument, 0, 's' }, - {"swapRB", no_argument, 0, 'c' }, - {"thread", required_argument, 0, 't' }, - {"help", no_argument, 0, 'h' }, - {0, 0, 0, 0 } - }; - - c = getopt_long(argc, argv, "p:b:i:o:m:n:s:ct:h", long_options, &option_index); - if (c == -1) - break; - - switch (c) - { - case 'p': - printf("param = '%s'\n", optarg); - parampath = optarg; - break; - - case 'b': - printf("bin = '%s'\n", optarg); - binpath = optarg; - break; - - case 'i': - printf("images = '%s'\n", optarg); - imagepath = optarg; - break; - - case 'o': - printf("output = '%s'\n", optarg); - tablepath = optarg; - break; - - case 'm': - { - printf("mean = '%s'\n", optarg); - std::string temp(optarg); - std::vector array = split(temp, ","); - pre_param.mean[0] = atof(array[0].c_str()); - pre_param.mean[1] = atof(array[1].c_str()); - pre_param.mean[2] = atof(array[2].c_str()); - } - break; - - case 'n': - { - printf("norm = '%s'\n", optarg); - std::string temp(optarg); - std::vector array = split(temp, ","); - pre_param.norm[0] = atof(array[0].c_str()); - pre_param.norm[1] = atof(array[1].c_str()); - pre_param.norm[2] = atof(array[2].c_str()); - } - break; - - case 's': - { - printf("size = '%s'\n", optarg); - std::string temp(optarg); - std::vector array = split(temp, ","); - pre_param.weith = atoi(array[0].c_str()); - pre_param.height = atoi(array[1].c_str()); - } - break; - - case 'c': - { - printf("swapRB = '%s'\n", "true"); - pre_param.swapRB = true; - } - break; - case 't': - printf("thread = '%s'\n", optarg); - num_threads = atoi(optarg); - break; - - case 'h': - case '?': - showUsage(); - return 0; - - default: - showUsage(); - } - } - - // check the input param - if (imagepath == NULL || parampath == NULL || binpath == NULL || tablepath == NULL) - { - fprintf(stderr, "someone path maybe empty,please check it and try again.\n"); - return 0; - } - - g_blob_pool_allocator.set_size_compare_ratio(0.0f); - g_workspace_pool_allocator.set_size_compare_ratio(0.5f); - - // default option - g_default_option.lightmode = true; - g_default_option.num_threads = num_threads; - g_default_option.blob_allocator = &g_blob_pool_allocator; - g_default_option.workspace_allocator = &g_workspace_pool_allocator; - - g_default_option.use_winograd_convolution = true; - g_default_option.use_sgemm_convolution = true; - g_default_option.use_int8_inference = true; - g_default_option.use_fp16_packed = true; - g_default_option.use_fp16_storage = true; - g_default_option.use_fp16_arithmetic = true; - g_default_option.use_int8_storage = true; - g_default_option.use_int8_arithmetic = true; - - ncnn::set_cpu_powersave(2); - ncnn::set_omp_dynamic(0); - ncnn::set_omp_num_threads(num_threads); - - std::vector filenames; - - // parse the image file. - parse_images_dir(imagepath, filenames); - - // get the calibration table file, and save it. - int ret = post_training_quantize(filenames, parampath, binpath, tablepath, pre_param); - if (!ret) - fprintf(stderr, "\nNCNN Int8 Calibration table create success, best wish for your INT8 inference has a low accuracy loss...\\(^▽^)/...233...\n"); - - return 0; -} +// BUG1989 is pleased to support the open source community by supporting ncnn available. +// +// author:BUG1989 (https://github.com/BUG1989/) Long-term support. +// author:JansonZhu (https://github.com/JansonZhu) Implemented the function of entropy calibration. +// +// Copyright (C) 2019 BUG1989. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifdef _MSC_VER +#define _CRT_SECURE_NO_DEPRECATE +#endif + +#include +#include +#include +#include +#include +#include +#include + +#include + +// ncnn public header +#include "net.h" +#include "cpu.h" +#include "benchmark.h" + +// ncnn private header +#include "layer/convolution.h" +#include "layer/convolutiondepthwise.h" +#include "layer/innerproduct.h" + +static ncnn::Option g_default_option; +static ncnn::UnlockedPoolAllocator g_blob_pool_allocator; +static ncnn::PoolAllocator g_workspace_pool_allocator; + +// Get the file names from direct path +int parse_images_dir(const std::string& base_path, std::vector& file_path) +{ + file_path.clear(); + + const cv::String base_path_str(base_path); + std::vector image_list; + + cv::glob(base_path_str, image_list, true); + + for (auto& image_path : image_list) + { + file_path.emplace_back(image_path); + } + + return 0; +} + +class QuantNet : public ncnn::Net +{ +public: + int get_conv_names(); + int get_conv_bottom_blob_names(); + int get_conv_weight_blob_scales(); + int get_input_names(); + +public: + std::vector conv_names; + std::map conv_bottom_blob_names; + std::map > weight_scales; + std::vector input_names; +}; + +int QuantNet::get_input_names() +{ + for (auto layer : layers) + { + if (layer->type == "Input") + { + for (int blob_index : layer->tops) + { + std::string name = blobs[blob_index].name; + input_names.push_back(name); + } + } + } + + return 0; +} + +int QuantNet::get_conv_names() +{ + for (size_t i = 0; i < layers.size(); i++) + { + const auto layer = layers[i]; + + if (layer->type == "Convolution" || layer->type == "ConvolutionDepthWise" || layer->type == "InnerProduct") + { + std::string name = layer->name; + conv_names.push_back(name); + } + } + + return 0; +} + +int QuantNet::get_conv_bottom_blob_names() +{ + // find conv bottom name or index + for (auto layer : layers) + { + if (layer->type == "Convolution" || layer->type == "ConvolutionDepthWise" || layer->type == "InnerProduct") + { + auto name = layer->name; + const auto bottom_blob_name = blobs[layer->bottoms[0]].name; + conv_bottom_blob_names[name] = bottom_blob_name; + } + } + + return 0; +} + +int QuantNet::get_conv_weight_blob_scales() +{ + for (auto layer : layers) + { + if (layer->type == "Convolution") + { + std::string name = layer->name; + const int weight_data_size_output = static_cast(layer)->weight_data_size / static_cast(layer)->num_output; + std::vector scales; + + // int8 winograd F43 needs weight data to use 6bit quantization + bool quant_6bit = false; + int kernel_w = static_cast(layer)->kernel_w; + int kernel_h = static_cast(layer)->kernel_h; + int dilation_w = static_cast(layer)->dilation_w; + int dilation_h = static_cast(layer)->dilation_h; + int stride_w = static_cast(layer)->stride_w; + int stride_h = static_cast(layer)->stride_h; + + if (kernel_w == 3 && kernel_h == 3 && dilation_w == 1 && dilation_h == 1 && stride_w == 1 && stride_h == 1) + quant_6bit = true; + + for (int n = 0; n < static_cast(layer)->num_output; n++) + { + const ncnn::Mat weight_data_n = static_cast(layer)->weight_data.range(weight_data_size_output * n, weight_data_size_output); + const float *data_n = weight_data_n; + float max_value = std::numeric_limits::min(); + + for (int k = 0; k < weight_data_size_output; k++) + { + max_value = std::max(max_value, std::fabs(data_n[k])); + } + + if (quant_6bit) + { + scales.push_back(31 / max_value); + } + else + { + scales.push_back(127 / max_value); + } + } + + weight_scales[name] = scales; + } + + if (layer->type == "ConvolutionDepthWise") + { + std::string name = layer->name; + const int weight_data_size_output = static_cast(layer)->weight_data_size / static_cast(layer)->group; + std::vector scales; + + for (int n = 0; n < static_cast(layer)->group; n++) + { + const ncnn::Mat weight_data_n = static_cast(layer)->weight_data.range(weight_data_size_output * n, weight_data_size_output); + const float *data_n = weight_data_n; + float max_value = std::numeric_limits::min(); + + for (int k = 0; k < weight_data_size_output; k++) + { + max_value = std::max(max_value, std::fabs(data_n[k])); + } + + scales.push_back(127 / max_value); + } + + weight_scales[name] = scales; + } + + if (layer->type == "InnerProduct") + { + std::string name = layer->name; + const int weight_data_size_output = static_cast(layer)->weight_data_size / static_cast(layer)->num_output; + std::vector scales; + + for (int n = 0; n < static_cast(layer)->num_output; n++) + { + const ncnn::Mat weight_data_n = static_cast(layer)->weight_data.range(weight_data_size_output * n, weight_data_size_output); + const float *data_n = weight_data_n; + float max_value = std::numeric_limits::min(); + + for (int k = 0; k < weight_data_size_output; k++) + max_value = std::max(max_value, std::fabs(data_n[k])); + + scales.push_back(127 / max_value); + } + + weight_scales[name] = scales; + } + } + + return 0; +} + +class QuantizeData +{ +public: + QuantizeData(const std::string& layer_name, const int& num); + + int initial_blob_max(ncnn::Mat data); + int initial_histogram_interval(); + int initial_histogram_value(); + + int normalize_histogram(); + int update_histogram(ncnn::Mat data); + + float compute_kl_divergence(const std::vector &dist_a, const std::vector &dist_b) const; + int threshold_distribution(const std::vector &distribution, const int target_bin = 128) const; + float get_data_blob_scale(); + +public: + std::string name; + + float max_value; + int num_bins; + float histogram_interval; + std::vector histogram; + + float threshold; + int threshold_bin; + float scale; +}; + +QuantizeData::QuantizeData(const std::string& layer_name, const int& num) +{ + name = layer_name; + max_value = 0.f; + num_bins = num; + histogram_interval = 0.f; + histogram.resize(num_bins); + initial_histogram_value(); + + threshold = 0.f; + threshold_bin = 0; + scale = 1.0f; +} + +int QuantizeData::initial_blob_max(ncnn::Mat data) +{ + const int channel_num = data.c; + const int size = data.w * data.h; + + for (int q = 0; q < channel_num; q++) + { + const float *data_n = data.channel(q); + for (int i = 0; i < size; i++) + { + max_value = std::max(max_value, std::fabs(data_n[i])); + } + } + + return 0; +} + +int QuantizeData::initial_histogram_interval() +{ + histogram_interval = max_value / static_cast(num_bins); + + return 0; +} + +int QuantizeData::initial_histogram_value() +{ + for (float& i : histogram) + { + i = 0.00001f; + } + + return 0; +} + +int QuantizeData::normalize_histogram() +{ + const auto length = histogram.size(); + float sum = 0; + + for (size_t i = 0; i < length; i++) + sum += histogram[i]; + + for (size_t i = 0; i < length; i++) + histogram[i] /= sum; + + return 0; +} + +int QuantizeData::update_histogram(ncnn::Mat data) +{ + const int channel_num = data.c; + const int size = data.w * data.h; + + for (int q = 0; q < channel_num; q++) + { + const float *data_n = data.channel(q); + for (int i = 0; i < size; i++) + { + if (data_n[i] == 0) + continue; + + const int index = std::min(static_cast(std::abs(data_n[i]) / histogram_interval), 2047); + + histogram[index]++; + } + } + + return 0; +} + +float QuantizeData::compute_kl_divergence(const std::vector &dist_a, const std::vector &dist_b) const +{ + const auto length = dist_a.size(); + assert(dist_b.size() == length); + float result = 0; + + for (size_t i = 0; i < length; i++) + { + if (dist_a[i] != 0) + { + if (dist_b[i] == 0) + { + result += 1; + } + else + { + result += dist_a[i] * log(dist_a[i] / dist_b[i]); + } + } + } + + return result; +} + +int QuantizeData::threshold_distribution(const std::vector &distribution, const int target_bin) const +{ + int target_threshold = target_bin; + float min_kl_divergence = 1000; + const int length = static_cast(distribution.size()); + + std::vector quantize_distribution(target_bin); + + float threshold_sum = 0; + for (int threshold = target_bin; threshold < length; threshold++) + { + threshold_sum += distribution[threshold]; + } + + for (int threshold = target_bin; threshold < length; threshold++) + { + + std::vector t_distribution(distribution.begin(), distribution.begin() + threshold); + + t_distribution[threshold - 1] += threshold_sum; + threshold_sum -= distribution[threshold]; + + // get P + fill(quantize_distribution.begin(), quantize_distribution.end(), 0.0f); + + const auto num_per_bin = static_cast(threshold) / static_cast(target_bin); + + for (int i = 0; i < target_bin; i++) + { + const auto start = static_cast(i) * num_per_bin; + const auto end = start + num_per_bin; + + const auto left_upper = static_cast(ceil(start)); + if (static_cast(left_upper) > start) + { + const auto left_scale = static_cast(left_upper) - start; + quantize_distribution[i] += left_scale * distribution[left_upper - 1]; + } + + const auto right_lower = static_cast(floor(end)); + + if (static_cast(right_lower) < end) + { + + const auto right_scale = end - static_cast(right_lower); + quantize_distribution[i] += right_scale * distribution[right_lower]; + } + + for (int j = left_upper; j < right_lower; j++) + { + quantize_distribution[i] += distribution[j]; + } + } + + // get Q + std::vector expand_distribution(threshold, 0); + + for (int i = 0; i < target_bin; i++) + { + const auto start = static_cast(i) * num_per_bin; + const auto end = start + num_per_bin; + + float count = 0; + + const int left_upper = static_cast(ceil(start)); + float left_scale = 0; + if (static_cast(left_upper) > start) + { + left_scale = static_cast(left_upper) - start; + if (distribution[left_upper - 1] != 0) + { + count += left_scale; + } + } + + const int right_lower = static_cast(floor(end)); + float right_scale = 0; + if (static_cast(right_lower) < end) + { + right_scale = end - static_cast(right_lower); + if (distribution[right_lower] != 0) + { + count += right_scale; + } + } + + for (int j = left_upper; j < right_lower; j++) + { + if (distribution[j] != 0) + { + count++; + } + } + + const auto expand_value = quantize_distribution[i] / count; + + if (static_cast(left_upper) > start) + { + if (distribution[left_upper - 1] != 0) + { + expand_distribution[left_upper - 1] += expand_value * left_scale; + } + } + if (static_cast(right_lower) < end) + { + if (distribution[right_lower] != 0) + { + expand_distribution[right_lower] += expand_value * right_scale; + } + } + for (int j = left_upper; j < right_lower; j++) + { + if (distribution[j] != 0) + { + expand_distribution[j] += expand_value; + } + } + } + + // kl + const float kl_divergence = compute_kl_divergence(t_distribution, expand_distribution); + + // the best num of bin + if (kl_divergence < min_kl_divergence) + { + min_kl_divergence = kl_divergence; + target_threshold = threshold; + } + } + + return target_threshold; +} + +float QuantizeData::get_data_blob_scale() +{ + normalize_histogram(); + threshold_bin = threshold_distribution(histogram); + threshold = (static_cast(threshold_bin) + 0.5f) * histogram_interval; + scale = 127 / threshold; + return scale; +} + +struct PreParam +{ + float mean[3]; + float norm[3]; + int width; + int height; + bool swapRB; +}; + +static int post_training_quantize(const std::vector& image_list, const std::string& param_path, const std::string& bin_path, const std::string& table_path, struct PreParam& per_param) +{ + auto size = image_list.size(); + + QuantNet net; + net.opt = g_default_option; + + net.load_param(param_path.c_str()); + net.load_model(bin_path.c_str()); + + float mean_vals[3]; + float norm_vals[3]; + + int width = per_param.width; + int height = per_param.height; + bool swapRB = per_param.swapRB; + + mean_vals[0] = per_param.mean[0]; + mean_vals[1] = per_param.mean[1]; + mean_vals[2] = per_param.mean[2]; + + norm_vals[0] = per_param.norm[0]; + norm_vals[1] = per_param.norm[1]; + norm_vals[2] = per_param.norm[2]; + + g_blob_pool_allocator.clear(); + g_workspace_pool_allocator.clear(); + + net.get_input_names(); + net.get_conv_names(); + net.get_conv_bottom_blob_names(); + net.get_conv_weight_blob_scales(); + + if (net.input_names.empty()) + { + fprintf(stderr, "not found [Input] Layer, Check your ncnn.param \n"); + return -1; + } + + FILE *fp = fopen(table_path.c_str(), "w"); + + // save quantization scale of weight + printf("====> Quantize the parameters.\n"); + for (size_t i = 0; i < net.conv_names.size(); i++) + { + std::string layer_name = net.conv_names[i]; + std::string blob_name = net.conv_bottom_blob_names[layer_name]; + std::vector weight_scale_n = net.weight_scales[layer_name]; + + fprintf(fp, "%s_param_0 ", layer_name.c_str()); + for (float j : weight_scale_n) + { + fprintf(fp, "%f ", j); + } + fprintf(fp, "\n"); + } + + // initial quantization data + std::vector quantize_datas; + + for (size_t i = 0; i < net.conv_names.size(); i++) + { + std::string layer_name = net.conv_names[i]; + + QuantizeData quantize_data(layer_name, 2048); + quantize_datas.push_back(quantize_data); + } + + // step 1 count the max value + printf("====> Quantize the activation.\n"); + printf(" ====> step 1 : find the max value.\n"); + + for (size_t i = 0; i < image_list.size(); i++) + { + std::string img_name = image_list[i]; + + if ((i + 1) % 100 == 0) + { + fprintf(stderr, " %d/%d\n", static_cast(i + 1), static_cast(size)); + } + +#if OpenCV_VERSION_MAJOR > 2 + cv::Mat bgr = cv::imread(img_name, cv::IMREAD_COLOR); +#else + cv::Mat bgr = cv::imread(img_name, CV_LOAD_IMAGE_COLOR); +#endif + if (bgr.empty()) + { + fprintf(stderr, "cv::imread %s failed\n", img_name.c_str()); + return -1; + } + + ncnn::Mat in = ncnn::Mat::from_pixels_resize(bgr.data, swapRB ? ncnn::Mat::PIXEL_BGR2RGB : ncnn::Mat::PIXEL_BGR, bgr.cols, bgr.rows, width, height); + in.substract_mean_normalize(mean_vals, norm_vals); + + ncnn::Extractor ex = net.create_extractor(); + ex.input(net.input_names[0].c_str(), in); + + for (size_t j = 0; j < net.conv_names.size(); j++) + { + std::string layer_name = net.conv_names[j]; + std::string blob_name = net.conv_bottom_blob_names[layer_name]; + + ncnn::Mat out; + ex.extract(blob_name.c_str(), out); + + for (auto& quantize_data : quantize_datas) + { + if (quantize_data.name == layer_name) + { + quantize_data.initial_blob_max(out); + break; + } + } + } + } + + // step 2 histogram_interval + printf(" ====> step 2 : generate the histogram_interval.\n"); + for (size_t i = 0; i < net.conv_names.size(); i++) + { + std::string layer_name = net.conv_names[i]; + + for (auto& quantize_data : quantize_datas) + { + if (quantize_data.name == layer_name) + { + quantize_data.initial_histogram_interval(); + + fprintf(stderr, "%-20s : max = %-15f interval = %-10f\n", quantize_data.name.c_str(), quantize_data.max_value, quantize_data.histogram_interval); + break; + } + } + } + + // step 3 histogram + printf(" ====> step 3 : generate the histogram.\n"); + for (size_t i = 0; i < image_list.size(); i++) + { + std::string img_name = image_list[i]; + + if ((i + 1) % 100 == 0) + fprintf(stderr, " %d/%d\n", (int)(i + 1), (int)size); +#if OpenCV_VERSION_MAJOR > 2 + cv::Mat bgr = cv::imread(img_name, cv::IMREAD_COLOR); +#else + cv::Mat bgr = cv::imread(img_name, CV_LOAD_IMAGE_COLOR); +#endif + if (bgr.empty()) + { + fprintf(stderr, "cv::imread %s failed\n", img_name.c_str()); + return -1; + } + + ncnn::Mat in = ncnn::Mat::from_pixels_resize(bgr.data, swapRB ? ncnn::Mat::PIXEL_BGR2RGB : ncnn::Mat::PIXEL_BGR, bgr.cols, bgr.rows, width, height); + in.substract_mean_normalize(mean_vals, norm_vals); + + ncnn::Extractor ex = net.create_extractor(); + ex.input(net.input_names[0].c_str(), in); + + for (size_t k = 0; k < net.conv_names.size(); k++) + { + std::string layer_name = net.conv_names[k]; + std::string blob_name = net.conv_bottom_blob_names[layer_name]; + + ncnn::Mat out; + ex.extract(blob_name.c_str(), out); + + for (auto& quantize_data : quantize_datas) + { + if (quantize_data.name == layer_name) + { + quantize_data.update_histogram(out); + break; + } + } + } + } + + // step4 kld + printf(" ====> step 4 : using kld to find the best threshold value.\n"); + for (size_t i = 0; i < net.conv_names.size(); i++) + { + std::string layer_name = net.conv_names[i]; + std::string blob_name = net.conv_bottom_blob_names[layer_name]; + fprintf(stderr, "%-20s ", layer_name.c_str()); + + for (auto& quantize_data : quantize_datas) + { + if (quantize_data.name == layer_name) + { + quantize_data.get_data_blob_scale(); + fprintf(stderr, "bin : %-8d threshold : %-15f interval : %-10f scale : %-10f\n", + quantize_data.threshold_bin, + quantize_data.threshold, + quantize_data.histogram_interval, + quantize_data.scale); + + fprintf(fp, "%s %f\n", layer_name.c_str(), quantize_data.scale); + + break; + } + } + } + + fclose(fp); + printf("====> Save the calibration table done.\n"); + + return 0; +} + +// usage +void showUsage() +{ + std::cout << "example: ./ncnn2table --param=squeezenet-fp32.param --bin=squeezenet-fp32.bin --images=images/ --output=squeezenet.table --mean=104,117,123 --norm=1,1,1 --size=227,227 --swapRB --thread=2" << std::endl; +} + +int main(int argc, char** argv) +{ + std::cout << "--- ncnn post training quantization tool --- " << __TIME__ << " " << __DATE__ << std::endl; + + const cv::CommandLineParser parser(argc, argv, + { + + "{help h usage ? | | print this message }" + "{param p | | path to ncnn.param file }" + "{bin b | | path to ncnn.bin file }" + "{images i | | path to calibration images }" + "{output o | | path to output calibration table file }" + "{mean m | | value of mean }" + "{norm n | | value of normalize(scale value,default is 1 }" + "{size s | | the size of input image(using the resize the original image,default is w=224,h=224) }" + "{swapRB c | | flag which indicates that swap first and last channels in 3-channel image is necessary }" + "{thread t | 4 | count of processing threads }" + }); + + if (parser.has("help")) + { + parser.printMessage(); + showUsage(); + return 0; + } + + if (!parser.has("param") || !parser.has("bin") || !parser.has("images") || !parser.has("output") || !parser.has("mean") || !parser.has("norm")) + { + std::cout << "Inputs is does not include all needed param, pleas check..." << std::endl; + parser.printMessage(); + showUsage(); + return 0; + } + + const std::string image_folder_path = parser.get("images"); + const std::string ncnn_param_file_path = parser.get("param"); + const std::string ncnn_bin_file_path = parser.get("bin"); + const std::string saved_table_file_path = parser.get("output"); + + // check the input param + if (image_folder_path.empty() || ncnn_param_file_path.empty() || ncnn_bin_file_path.empty() || saved_table_file_path.empty()) + { + fprintf(stderr, "One or more path may be empty, please check and try again.\n"); + return 0; + } + + const auto num_threads = parser.get("thread"); + + struct PreParam pre_param { + {104.f, 117.f, 103.f}, + { 1.f, 1.f, 1.f }, + 224, + 224, + false + }; + + const auto find_all_value_in_string = [](const std::string& values_string, std::vector& value) + { + std::vector masks_pos; + + for (size_t i = 0; i < values_string.size(); i++) + { + if (',' == values_string[i]) + { + masks_pos.push_back(static_cast(i)); + } + } + + // check + if (masks_pos.empty()) + { + fprintf(stderr, "ERROR: Cannot find any ',' in string, please check.\n"); + return -1; + } + + if (2 != masks_pos.size()) + { + fprintf(stderr, "ERROR: Char ',' in fist of string, please check.\n"); + return -1; + } + + if (masks_pos.front() == 0) + { + fprintf(stderr, "ERROR: Char ',' in fist of string, please check.\n"); + return -1; + } + + if (masks_pos.back() == 0) + { + fprintf(stderr, "ERROR: Char ',' in last of string, please check.\n"); + return -1; + } + + for (size_t i = 0; i < masks_pos.size(); i++) + { + if (i > 0) + { + if (!(masks_pos[i] - masks_pos[i - 1] > 1)) + { + fprintf(stderr, "ERROR: Neighbouring char ',' was found.\n"); + return -1; + } + } + } + + const cv::String ch0_val_str = values_string.substr(0, masks_pos[0]); + const cv::String ch1_val_str = values_string.substr(masks_pos[0] + 1, masks_pos[1] - masks_pos[0] - 1); + const cv::String ch2_val_str = values_string.substr(masks_pos[1] + 1, values_string.size() - masks_pos[1] - 1); + + value.emplace_back(static_cast(std::atof(std::string(ch0_val_str).c_str()))); + value.emplace_back(static_cast(std::atof(std::string(ch1_val_str).c_str()))); + value.emplace_back(static_cast(std::atof(std::string(ch2_val_str).c_str()))); + + return 0; + }; + + if (parser.has("mean")) + { + const std::string mean_str = parser.get("mean"); + + std::vector mean_values; + const auto ret = find_all_value_in_string(mean_str, mean_values); + if (0 != ret && 3 != mean_values.size()) + { + fprintf(stderr, "ERROR: Searching mean value from --mean was failed.\n"); + + return -1; + } + + pre_param.mean[0] = mean_values[0]; + pre_param.mean[1] = mean_values[1]; + pre_param.mean[2] = mean_values[2]; + } + + if (parser.has("norm")) + { + const std::string norm_str = parser.get("norm"); + + std::vector norm_values; + const auto ret = find_all_value_in_string(norm_str, norm_values); + if (0 != ret && 3 != norm_values.size()) + { + fprintf(stderr, "ERROR: Searching mean value from --mean was failed, please check --mean param.\n"); + + return -1; + } + + pre_param.norm[0] = norm_values[0]; + pre_param.norm[1] = norm_values[1]; + pre_param.norm[2] = norm_values[2]; + } + + if (parser.has("size")) + { + cv::String size_str = parser.get("size"); + + auto sep_pos = size_str.find_first_of(','); + + if (cv::String::npos != sep_pos && sep_pos < size_str.size()) + { + cv::String width_value_str; + cv::String height_value_str; + + width_value_str = size_str.substr(0, sep_pos); + height_value_str = size_str.substr(sep_pos + 1, size_str.size() - sep_pos - 1); + + pre_param.width = static_cast(std::atoi(std::string(width_value_str).c_str())); + pre_param.height = static_cast(std::atoi(std::string(height_value_str).c_str())); + } + else + { + fprintf(stderr, "ERROR: Searching size value from --size was failed, please check --size param.\n"); + + return -1; + } + } + + if (parser.has("swapRB")) + { + pre_param.swapRB = true; + } + + g_blob_pool_allocator.set_size_compare_ratio(0.0f); + g_workspace_pool_allocator.set_size_compare_ratio(0.5f); + + // default option + g_default_option.lightmode = true; + g_default_option.num_threads = num_threads; + g_default_option.blob_allocator = &g_blob_pool_allocator; + g_default_option.workspace_allocator = &g_workspace_pool_allocator; + + g_default_option.use_winograd_convolution = true; + g_default_option.use_sgemm_convolution = true; + g_default_option.use_int8_inference = true; + g_default_option.use_fp16_packed = true; + g_default_option.use_fp16_storage = true; + g_default_option.use_fp16_arithmetic = true; + g_default_option.use_int8_storage = true; + g_default_option.use_int8_arithmetic = true; + + ncnn::set_cpu_powersave(2); + ncnn::set_omp_dynamic(0); + ncnn::set_omp_num_threads(num_threads); + + std::vector image_file_path_list; + + // parse the image file. + parse_images_dir(image_folder_path, image_file_path_list); + + // get the calibration table file, and save it. + const auto ret = post_training_quantize(image_file_path_list, ncnn_param_file_path, ncnn_bin_file_path, saved_table_file_path, pre_param); + if (!ret) + { + fprintf(stderr, "\nNCNN Int8 Calibration table create success, best wish for your INT8 inference has a low accuracy loss...\\(^0^)/...233...\n"); + } + + return 0; +}