diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt
index 964d7d383..702fd275b 100644
--- a/tools/CMakeLists.txt
+++ b/tools/CMakeLists.txt
@@ -2,7 +2,17 @@
 add_subdirectory(caffe)
 add_subdirectory(mxnet)
 add_subdirectory(onnx)
-# add_subdirectory(quantize)
+
+find_package(OpenCV QUIET COMPONENTS core highgui imgproc imgcodecs)
+if(NOT OpenCV_FOUND)
+    find_package(OpenCV QUIET COMPONENTS core highgui imgproc)
+endif()
+
+if(OpenCV_FOUND)
+    add_subdirectory(quantize)
+else()
+    message(WARNING "OpenCV not found, quantize tools won't be built")
+endif()
 
 add_executable(ncnn2mem ncnn2mem.cpp)
 
diff --git a/tools/quantize/ncnn2int8.cpp b/tools/quantize/ncnn2int8.cpp
index ae2678c23..73557f173 100755
--- a/tools/quantize/ncnn2int8.cpp
+++ b/tools/quantize/ncnn2int8.cpp
@@ -12,18 +12,15 @@
 // CONDITIONS OF ANY KIND, either express or implied. See the License for the
 // specific language governing permissions and limitations under the License.
 
-#include <stdio.h>
-#include <string.h>
-#include <limits.h>
-#include <math.h>
-#include <assert.h>
+#ifdef _MSC_VER
+#define _CRT_SECURE_NO_DEPRECATE
+#endif
 
-#include <fstream>
+#include <cstdio>
+#include <cstring>
 #include <vector>
 #include <set>
-#include <limits>
 #include <map>
-#include <algorithm>
 
 // ncnn public header
 #include "net.h"
@@ -90,57 +87,57 @@ static bool read_int8scale_table(const char* filepath, std::map<std::string, std
     FILE* fp = fopen(filepath, "rb");
     if (!fp)
     {
-        fprintf(stderr, "fopen %s failed\n", filepath);
+        fprintf(stderr, "Open %s failed.\n", filepath);
         return false;
     }
 
-    bool in_scale_vector = false;
-
-    std::string keystr;
+    std::string key_str;
     std::vector<float> scales;
 
-    char *line = NULL;
-    char *pch = NULL;
+    std::vector<char>line(102400);
+    char *pch = nullptr;
     size_t len = 0;
-    ssize_t read;
 
-    while ((read = getline(&line, &len, fp)) != -1)
+    while (nullptr != std::fgets(line.data(), static_cast<int>(line.size()), fp))
     {
-
         float scale = 1.f;
         char key[256];
-        line[strcspn(line, "\r\n")] = 0;
-        pch = strtok (line, " ");
-        if (pch == NULL) break;
+        line[strcspn(line.data(), "\r\n")] = 0;
+
+        pch = strtok(line.data(), " ");
 
-        bool iskey = 1;
-        while (pch != NULL)
+        if (pch == nullptr) break;
+
+        bool is_key = true;
+        while (pch != nullptr)
         {
-            if (iskey)
+            if (is_key)
             {
                 sscanf(pch, "%255s", key);
-                keystr = key;
-                iskey = 0;
+
+                key_str = key;
+                is_key = false;
             }
             else
             {
                 sscanf(pch, "%f", &scale);
+
                 scales.push_back(scale);
             }
 
-            pch = strtok (NULL, " ");
+            pch = strtok(nullptr, " ");
         }
 
         // XYZ_param_N pattern
-        if (strstr(keystr.c_str(), "_param_"))
+        if (strstr(key_str.c_str(), "_param_"))
         {
-            weight_int8scale_table[ keystr ] = scales;
+            weight_int8scale_table[key_str] = scales;
         }
         else
         {
-            blob_int8scale_table[ keystr ] = scales;
+            blob_int8scale_table[key_str] = scales;
         }
-        keystr.clear();
+        key_str.clear();
         scales.clear();
     }
 
@@ -155,7 +152,7 @@ public:
     // 0=fp32 1=fp16 2=int8
     int storage_type;
     std::map<std::string, std::vector<float> > blob_int8scale_table;
-    std::map<std::string, std::vector<float> > weight_int8scale_table; 
+    std::map<std::string, std::vector<float> > weight_int8scale_table;
 
 public:
     int quantize_convolution();
@@ -174,29 +171,30 @@ public:
 
 int NetQuantize::quantize_convolution()
 {
-    const int layer_count = layers.size();
-    for (int i=0; i<layer_count; i++)
+    const int layer_count = static_cast<int>(layers.size());
+    for (int i = 0; i < layer_count; i++)
     {
         // find convoultion layer
         if (layers[i]->type != "Convolution")
             continue;
 
         // find convolution layer
-        std::map<std::string, std::vector<float> >::iterator iter_data = blob_int8scale_table.find(layers[i]->name);
+        auto iter_data = blob_int8scale_table.find(layers[i]->name);
         if (iter_data == blob_int8scale_table.end())
             continue;
 
         char key[256];
         sprintf(key, "%s_param_0", layers[i]->name.c_str());
-        std::map<std::string, std::vector<float> >::iterator iter = weight_int8scale_table.find(key);
+
+        auto iter = weight_int8scale_table.find(key);
         if (iter == weight_int8scale_table.end())
         {
             fprintf(stderr, "this layer need to be quantized, but no scale param!\n");
             return -1;
         }
-            
+
         // Convolution - quantize weight from fp32 to int8
-        ncnn::Convolution* convolution = (ncnn::Convolution*)layers[i];
+        auto convolution = (ncnn::Convolution*)layers[i];
 
         std::vector<float> weight_data_int8_scales = iter->second;
 
@@ -210,7 +208,7 @@ int NetQuantize::quantize_convolution()
             const int weight_data_size_output = convolution->weight_data_size / convolution->num_output;
 
             // quantize weight to int8
-            for (int n=0; n<convolution->num_output; n++)
+            for (int n = 0; n < convolution->num_output; n++)
             {
                 ncnn::Layer* op = ncnn::create_layer(ncnn::LayerType::Quantize);
 
@@ -240,29 +238,30 @@ int NetQuantize::quantize_convolution()
 
 int NetQuantize::quantize_convolutiondepthwise()
 {
-    const int layer_count = layers.size();
-    for (int i=0; i<layer_count; i++)
+    const int layer_count = static_cast<int>(layers.size());
+    for (int i = 0; i < layer_count; i++)
     {
         // find convoultion layer
         if (layers[i]->type != "ConvolutionDepthWise")
             continue;
 
         // find convolutiondepthwise layer
-        std::map<std::string, std::vector<float> >::iterator iter_data = blob_int8scale_table.find(layers[i]->name);
+        auto iter_data = blob_int8scale_table.find(layers[i]->name);
         if (iter_data == blob_int8scale_table.end())
             continue;
 
         char key[256];
         sprintf(key, "%s_param_0", layers[i]->name.c_str());
-        std::map<std::string, std::vector<float> >::iterator iter = weight_int8scale_table.find(key);
+
+        auto iter = weight_int8scale_table.find(key);
         if (iter == weight_int8scale_table.end())
         {
             fprintf(stderr, "this layer need to be quantized, but no scale param!\n");
             return -1;
         }
-            
+
         // Convolution - quantize weight from fp32 to int8
-        ncnn::ConvolutionDepthWise* convdw = (ncnn::ConvolutionDepthWise*)layers[i];
+        auto convdw = (ncnn::ConvolutionDepthWise*)layers[i];
 
         std::vector<float> weight_data_int8_scales = iter->second;
 
@@ -276,7 +275,7 @@ int NetQuantize::quantize_convolutiondepthwise()
             const int weight_data_size_output = convdw->weight_data_size / convdw->group;
 
             // quantize weight to int8
-            for (int n=0; n<convdw->group; n++)
+            for (int n = 0; n < convdw->group; n++)
             {
                 ncnn::Layer* op = ncnn::create_layer(ncnn::LayerType::Quantize);
 
@@ -306,29 +305,30 @@ int NetQuantize::quantize_convolutiondepthwise()
 
 int NetQuantize::quantize_innerproduct()
 {
-    const int layer_count = layers.size();
-    for (int i=0; i<layer_count; i++)
+    const int layer_count = static_cast<int>(layers.size());
+    for (int i = 0; i < layer_count; i++)
     {
         // find convoultion layer
         if (layers[i]->type != "InnerProduct")
             continue;
 
         // find InnerProduct layer
-        std::map<std::string, std::vector<float> >::iterator iter_data = blob_int8scale_table.find(layers[i]->name);
+        auto iter_data = blob_int8scale_table.find(layers[i]->name);
         if (iter_data == blob_int8scale_table.end())
             continue;
 
         char key[256];
         sprintf(key, "%s_param_0", layers[i]->name.c_str());
-        std::map<std::string, std::vector<float> >::iterator iter = weight_int8scale_table.find(key);
+
+        auto iter = weight_int8scale_table.find(key);
         if (iter == weight_int8scale_table.end())
         {
             fprintf(stderr, "this layer need to be quantized, but no scale param!\n");
             return -1;
         }
-            
+
         // InnerProduct - quantize weight from fp32 to int8
-        ncnn::InnerProduct* fc = (ncnn::InnerProduct*)layers[i];
+        auto fc = (ncnn::InnerProduct*)layers[i];
 
         std::vector<float> weight_data_int8_scales = iter->second;
 
@@ -342,7 +342,7 @@ int NetQuantize::quantize_innerproduct()
             const int weight_data_size_output = fc->weight_data_size / fc->num_output;
 
             // quantize weight to int8
-            for (int n=0; n<fc->num_output; n++)
+            for (int n = 0; n < fc->num_output; n++)
             {
                 ncnn::Layer* op = ncnn::create_layer(ncnn::LayerType::Quantize);
 
@@ -376,7 +376,7 @@ int NetQuantize::fprintf_param_int_array(int id, const ncnn::Mat& m, FILE* pp)
     const int* ptr = m;
 
     fprintf(pp, " -%d=%d", 23300 + id, count);
-    for (int i=0; i<count; i++)
+    for (int i = 0; i < count; i++)
     {
         fprintf(pp, ",%d", ptr[i]);
     }
@@ -390,7 +390,7 @@ int NetQuantize::fprintf_param_float_array(int id, const ncnn::Mat& m, FILE* pp)
     const float* ptr = m;
 
     fprintf(pp, " -%d=%d", 23300 + id, count);
-    for (int i=0; i<count; i++)
+    for (int i = 0; i < count; i++)
     {
         fprintf(pp, ",%f", ptr[i]);
     }
@@ -400,7 +400,7 @@ int NetQuantize::fprintf_param_float_array(int id, const ncnn::Mat& m, FILE* pp)
 
 static inline size_t alignSize(size_t sz, int n)
 {
-    return (sz + n-1) & -n;
+    return (sz + n - 1) & -n;
 }
 
 int NetQuantize::fwrite_weight_tag_data(int tag, const ncnn::Mat& data, FILE* bp)
@@ -417,8 +417,8 @@ int NetQuantize::fwrite_weight_tag_data(int tag, const ncnn::Mat& data, FILE* bp
 
     // padding to 32bit align
     int nwrite = ftell(bp) - p0;
-    int nalign = alignSize(nwrite, 4);
-    unsigned char padding[4] = {0x00, 0x00, 0x00, 0x00};
+    int nalign = static_cast<int>(alignSize(nwrite, 4));
+    unsigned char padding[4] = { 0x00, 0x00, 0x00, 0x00 };
     fwrite(padding, sizeof(unsigned char), nalign - nwrite, bp);
 
     return 0;
@@ -433,8 +433,8 @@ int NetQuantize::fwrite_weight_data(const ncnn::Mat& data, FILE* bp)
 
     // padding to 32bit align
     int nwrite = ftell(bp) - p0;
-    int nalign = alignSize(nwrite, 4);
-    unsigned char padding[4] = {0x00, 0x00, 0x00, 0x00};
+    int nalign = static_cast<int>(alignSize(nwrite, 4));
+    unsigned char padding[4] = { 0x00, 0x00, 0x00, 0x00 };
     fwrite(padding, sizeof(unsigned char), nalign - nwrite, bp);
 
     return 0;
@@ -447,11 +447,11 @@ int NetQuantize::save(const char* parampath, const char* binpath)
 
     fprintf(pp, "7767517\n");
 
-    const int layer_count = layers.size();
+    const int layer_count = static_cast<int>(layers.size());
 
     int layer_count_fused = 0;
     std::set<std::string> blob_names;
-    for (int i=0; i<layer_count; i++)
+    for (int i = 0; i < layer_count; i++)
     {
         const ncnn::Layer* layer = layers[i];
         if (layer->type == "ncnnfused")
@@ -459,42 +459,42 @@ int NetQuantize::save(const char* parampath, const char* binpath)
 
         layer_count_fused++;
 
-        int bottom_count = layer->bottoms.size();
-        for (int j=0; j<bottom_count; j++)
+        int bottom_count = static_cast<int>(layer->bottoms.size());
+        for (int j = 0; j < bottom_count; j++)
         {
             int bottom_blob_index = layer->bottoms[j];
             blob_names.insert(blobs[bottom_blob_index].name);
         }
 
-        int top_count = layer->tops.size();
-        for (int j=0; j<top_count; j++)
+        int top_count = static_cast<int>(layer->tops.size());
+        for (int j = 0; j < top_count; j++)
         {
             int top_blob_index = layer->tops[j];
             blob_names.insert(blobs[top_blob_index].name);
         }
     }
 
-    int blob_count_fused = blob_names.size();
+    int blob_count_fused = static_cast<int>(blob_names.size());
 
     fprintf(pp, "%d %d\n", layer_count_fused, blob_count_fused);
 
-    for (int i=0; i<layer_count; i++)
+    for (int i = 0; i < layer_count; i++)
     {
         const ncnn::Layer* layer = layers[i];
         if (layer->type == "ncnnfused")
             continue;
 
-        int bottom_count = layer->bottoms.size();
-        int top_count = layer->tops.size();
+        int bottom_count = static_cast<int>(layer->bottoms.size());
+        int top_count = static_cast<int>(layer->tops.size());
 
         fprintf(pp, "%-24s %-24s %d %d", layer->type.c_str(), layer->name.c_str(), bottom_count, top_count);
 
-        for (int j=0; j<bottom_count; j++)
+        for (int j = 0; j < bottom_count; j++)
         {
             int bottom_blob_index = layer->bottoms[j];
             fprintf(pp, " %s", blobs[bottom_blob_index].name.c_str());
         }
-        for (int j=0; j<top_count; j++)
+        for (int j = 0; j < top_count; j++)
         {
             int top_blob_index = layer->tops[j];
             fprintf(pp, " %s", blobs[top_blob_index].name.c_str());
@@ -581,12 +581,13 @@ int NetQuantize::save(const char* parampath, const char* binpath)
 
             // write int8_scale data
             if (op->int8_scale_term)
-            {            
+            {
                 std::vector<float> weight_int8scale;
                 std::vector<float> blob_int8scale;
 
                 char key[256];
-                sprintf(key, "%s_param_0", layer->name.c_str());
+                sprintf(key, "%s_param_0", layers[i]->name.c_str());
+
                 if (weight_int8scale_table.find(std::string(key)) != weight_int8scale_table.end())
                 {
                     weight_int8scale = weight_int8scale_table[std::string(key)];
@@ -630,12 +631,13 @@ int NetQuantize::save(const char* parampath, const char* binpath)
 
             // write int8_scale data
             if (op->int8_scale_term)
-            {            
+            {
                 std::vector<float> weight_int8scale;
                 std::vector<float> blob_int8scale;
 
                 char key[256];
-                sprintf(key, "%s_param_0", layer->name.c_str());
+                sprintf(key, "%s_param_0", layers[i]->name.c_str());
+
                 if (weight_int8scale_table.find(std::string(key)) != weight_int8scale_table.end())
                 {
                     weight_int8scale = weight_int8scale_table[std::string(key)];
@@ -649,7 +651,7 @@ int NetQuantize::save(const char* parampath, const char* binpath)
                 // write int8_scale data
                 fwrite(weight_int8scale.data(), sizeof(float), weight_int8scale.size(), bp);
                 fwrite(blob_int8scale.data(), sizeof(float), blob_int8scale.size(), bp);
-            }            
+            }
         }
         else if (layer->type == "Crop")
         {
@@ -781,12 +783,13 @@ int NetQuantize::save(const char* parampath, const char* binpath)
 
             // write int8_scale data
             if (op->int8_scale_term)
-            {            
+            {
                 std::vector<float> weight_int8scale;
                 std::vector<float> blob_int8scale;
 
                 char key[256];
-                sprintf(key, "%s_param_0", layer->name.c_str());
+                sprintf(key, "%s_param_0", layers[i]->name.c_str());
+
                 if (weight_int8scale_table.find(std::string(key)) != weight_int8scale_table.end())
                 {
                     weight_int8scale = weight_int8scale_table[std::string(key)];
@@ -800,7 +803,7 @@ int NetQuantize::save(const char* parampath, const char* binpath)
                 // write int8_scale data
                 fwrite(weight_int8scale.data(), sizeof(float), weight_int8scale.size(), bp);
                 fwrite(blob_int8scale.data(), sizeof(float), blob_int8scale.size(), bp);
-            }            
+            }
         }
         else if (layer->type == "Input")
         {
@@ -880,7 +883,7 @@ int NetQuantize::save(const char* parampath, const char* binpath)
             fprintf_param_value(" 3=%d", scale_data_size)
             fprintf_param_value(" 4=%d", across_channel)
 
-            fwrite_weight_data(op->scale_data, bp);
+                fwrite_weight_data(op->scale_data, bp);
         }
         else if (layer->type == "Padding")
         {
@@ -1170,7 +1173,7 @@ int main(int argc, char** argv)
 
     quantizer.load_param(inparam);
     quantizer.load_model(inbin);
-    
+
     quantizer.quantize_convolution();
     quantizer.quantize_convolutiondepthwise();
     quantizer.quantize_innerproduct();
diff --git a/tools/quantize/ncnn2table.cpp b/tools/quantize/ncnn2table.cpp
index 40d3d0efc..8a7339e8f 100755
--- a/tools/quantize/ncnn2table.cpp
+++ b/tools/quantize/ncnn2table.cpp
@@ -1,912 +1,944 @@
-// BUG1989 is pleased to support the open source community by supporting ncnn available.
-//
-// author:BUG1989 (https://github.com/BUG1989/) Long-term support.
-// author:JansonZhu (https://github.com/JansonZhu) Implemented the function of entropy calibration.
-//
-// Copyright (C) 2019 BUG1989. All rights reserved.
-//
-// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
-// in compliance with the License. You may obtain a copy of the License at
-//
-// https://opensource.org/licenses/BSD-3-Clause
-//
-// Unless required by applicable law or agreed to in writing, software distributed
-// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
-// CONDITIONS OF ANY KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations under the License.
-
-#include <stdio.h>
-#include <unistd.h>
-#include <getopt.h>
-#include <string.h>
-#include <vector>
-#include <iostream>
-#include <fstream>
-#include <dirent.h>
-#include <stdlib.h>
-#include <algorithm>
-#include <map>
-#include <opencv2/core/core.hpp>
-#include <opencv2/highgui/highgui.hpp>
-
-// ncnn public header
-#include "platform.h"
-#include "net.h"
-#include "cpu.h"
-#include "benchmark.h"
-
-// ncnn private header
-#include "layer/convolution.h"
-#include "layer/convolutiondepthwise.h"
-#include "layer/innerproduct.h"
-
-static ncnn::Option g_default_option;
-static ncnn::UnlockedPoolAllocator g_blob_pool_allocator;
-static ncnn::PoolAllocator g_workspace_pool_allocator;
-
-// Get the filenames from direct path
-int parse_images_dir(const char *base_path, std::vector<std::string>& file_path)
-{
-    DIR *dir;
-    struct dirent *ptr;
-
-    if ((dir=opendir(base_path)) == NULL)
-    {
-        perror("Open dir error...");
-        exit(1);
-    }
-
-    while ((ptr=readdir(dir)) != NULL)
-    {
-        if(strcmp(ptr->d_name,".")==0 || strcmp(ptr->d_name,"..")==0)    ///current dir OR parrent dir
-        {
-            continue;
-        } 
-
-        std::string path = base_path;
-        file_path.push_back(path + ptr->d_name);
-    }
-    closedir(dir);
-
-    return 0;
-}
-
-class QuantNet : public ncnn::Net
-{
-public:
-    int get_conv_names();
-    int get_conv_bottom_blob_names();
-    int get_conv_weight_blob_scales();
-    int get_input_names();
-
-public:
-    std::vector<std::string> conv_names;
-    std::map<std::string,std::string> conv_bottom_blob_names;
-    std::map<std::string,std::vector<float> > weight_scales;
-    std::vector<std::string> input_names;
-};
-
-int QuantNet::get_input_names()
-{
-    for (size_t i=0; i<layers.size(); i++)
-    {
-        ncnn::Layer* layer = layers[i];
-        if (layer->type == "Input")
-        {
-            for (size_t  j=0; j<layer->tops.size(); j++)
-            {
-                int blob_index = layer->tops[j];
-                std::string name = blobs[blob_index].name.c_str();
-                input_names.push_back(name);
-            }
-        }
-    }
-
-    return 0;
-}
-
-int QuantNet::get_conv_names()
-{
-    for (size_t i=0; i<layers.size(); i++)
-    {
-        ncnn::Layer* layer = layers[i];
-        
-        if (layer->type == "Convolution" || layer->type == "ConvolutionDepthWise" || layer->type == "InnerProduct")
-        {
-            std::string name = layer->name;
-            conv_names.push_back(name);
-        }
-    }        
-
-    return 0;
-}
-
-int QuantNet::get_conv_bottom_blob_names()
-{
-    // find conv bottom name or index
-    for (size_t i=0; i<layers.size(); i++)
-    {
-        ncnn::Layer* layer = layers[i];
-        
-        if (layer->type == "Convolution" || layer->type == "ConvolutionDepthWise" || layer->type == "InnerProduct")
-        {
-            std::string name = layer->name;
-            std::string bottom_blob_name = blobs[layer->bottoms[0]].name;
-            conv_bottom_blob_names[name] = bottom_blob_name;
-        }
-    }
-
-    return 0;
-}
-
-int QuantNet::get_conv_weight_blob_scales()
-{
-    for (size_t i=0; i<layers.size(); i++)
-    {
-        ncnn::Layer* layer = layers[i];
-        
-        if (layer->type == "Convolution")
-        {
-            std::string name = layer->name;
-            const int weight_data_size_output = ((ncnn::Convolution*)layer)->weight_data_size / ((ncnn::Convolution*)layer)->num_output;
-            std::vector<float> scales;
-
-            // int8 winograd F43 needs weight data to use 6bit quantization
-            bool quant_6bit = false;
-            int kernel_w = ((ncnn::Convolution*)layer)->kernel_w;
-            int kernel_h = ((ncnn::Convolution*)layer)->kernel_h;
-            int dilation_w = ((ncnn::Convolution*)layer)->dilation_w;
-            int dilation_h = ((ncnn::Convolution*)layer)->dilation_h;
-            int stride_w = ((ncnn::Convolution*)layer)->stride_w;
-            int stride_h = ((ncnn::Convolution*)layer)->stride_h;
-
-            if (kernel_w == 3 && kernel_h == 3 && dilation_w == 1 && dilation_h == 1 && stride_w == 1 && stride_h == 1)
-                quant_6bit = true;
-
-            for (int n=0; n<((ncnn::Convolution*)layer)->num_output; n++)
-            {
-                const ncnn::Mat weight_data_n = ((ncnn::Convolution*)layer)->weight_data.range(weight_data_size_output * n, weight_data_size_output);
-                const float *data_n = weight_data_n;
-                float max_value = std::numeric_limits<float>::min();
-
-                for (int i = 0; i < weight_data_size_output; i++)
-                    max_value = std::max(max_value, std::fabs(data_n[i]));
-
-                if (quant_6bit)
-                    scales.push_back(31 / max_value);
-                else
-                    scales.push_back(127 / max_value);
-            }
-
-            weight_scales[name] = scales;
-        }
-        
-        if (layer->type == "ConvolutionDepthWise")
-        {
-            std::string name = layer->name;
-            const int weight_data_size_output = ((ncnn::ConvolutionDepthWise*)layer)->weight_data_size / ((ncnn::ConvolutionDepthWise*)layer)->group;
-            std::vector<float> scales;
-
-            for (int n=0; n<((ncnn::ConvolutionDepthWise*)layer)->group; n++)
-            {
-                const ncnn::Mat weight_data_n = ((ncnn::ConvolutionDepthWise*)layer)->weight_data.range(weight_data_size_output * n, weight_data_size_output);
-                const float *data_n = weight_data_n;
-                float max_value = std::numeric_limits<float>::min();
-
-                for (int i = 0; i < weight_data_size_output; i++)
-                    max_value = std::max(max_value, std::fabs(data_n[i]));
-
-                scales.push_back(127 / max_value); 
-            }
-
-            weight_scales[name] = scales;                
-        }
-
-        if (layer->type == "InnerProduct")
-        {
-            std::string name = layer->name;
-            const int weight_data_size_output = ((ncnn::InnerProduct*)layer)->weight_data_size / ((ncnn::InnerProduct*)layer)->num_output;
-            std::vector<float> scales;
-
-            for (int n=0; n<((ncnn::InnerProduct*)layer)->num_output; n++)
-            {
-                const ncnn::Mat weight_data_n = ((ncnn::InnerProduct*)layer)->weight_data.range(weight_data_size_output * n, weight_data_size_output);
-                const float *data_n = weight_data_n;
-                float max_value = std::numeric_limits<float>::min();
-
-                for (int i = 0; i < weight_data_size_output; i++)
-                    max_value = std::max(max_value, std::fabs(data_n[i]));
-
-                scales.push_back(127 / max_value);
-            }
-
-            weight_scales[name] = scales;            
-        }
-    }              
-
-    return 0;
-}
-
-class QuantizeData
-{
-public:
-    QuantizeData(std::string layer_name, int num);
-
-    int initial_blob_max(ncnn::Mat data);
-    int initial_histogram_interval();
-    int initial_histogram_value();
-
-    int normalize_histogram(); 
-    int update_histogram(ncnn::Mat data);
-
-    float compute_kl_divergence(const std::vector<float> &dist_a, const std::vector<float> &dist_b);
-    int threshold_distribution(const std::vector<float> &distribution, const int target_bin=128);
-    float get_data_blob_scale();
-
-public:
-    std::string name;
-
-    float max_value;
-    int num_bins;
-    float histogram_interval;
-    std::vector<float> histogram;
-    
-    float threshold;
-    int threshold_bin;
-    float scale;
-};
-
-QuantizeData::QuantizeData(std::string layer_name, int num)
-{
-    name = layer_name;
-    max_value = 0.0;
-    num_bins = num;
-    histogram_interval = 0.0;
-    histogram.resize(num_bins);
-    initial_histogram_value();
-}
-
-int QuantizeData::initial_blob_max(ncnn::Mat data)
-{
-    int channel_num = data.c;
-    int size = data.w * data.h;
-
-    for (int q=0; q<channel_num; q++)
-    {
-        const float *data_n = data.channel(q);
-        for(int i=0; i<size; i++)
-        {
-            max_value = std::max(max_value, std::fabs(data_n[i]));
-        }
-    }
-
-    return 0;
-}
-
-int QuantizeData::initial_histogram_interval()
-{
-    histogram_interval = max_value / num_bins;
-
-    return 0;
-}
-
-int QuantizeData::initial_histogram_value()
-{
-    for (size_t i=0; i<histogram.size(); i++)
-    {
-        histogram[i] = 0.00001;
-    }
-
-    return 0;
-}
-
-int QuantizeData::normalize_histogram() 
-{
-    const int length = histogram.size();
-    float sum = 0;
-    
-    for (int i=0; i<length; i++)
-        sum += histogram[i];
-
-    for (int i=0; i<length; i++) 
-        histogram[i] /= sum;
-
-    return 0;
-}
-
-int QuantizeData::update_histogram(ncnn::Mat data)
-{
-    int channel_num = data.c;
-    int size = data.w * data.h;
-
-    for (int q=0; q<channel_num; q++)
-    {
-        const float *data_n = data.channel(q);
-        for(int i=0; i<size; i++)
-        {
-            if (data_n[i] == 0)
-                continue;
-
-            int index = std::min(static_cast<int>(std::abs(data_n[i]) / histogram_interval), 2047);
-
-            histogram[index]++;
-        }
-    }        
-
-    return 0;
-}
-
-float QuantizeData::compute_kl_divergence(const std::vector<float> &dist_a, const std::vector<float> &dist_b) 
-{
-    const int length = dist_a.size();
-    assert(dist_b.size() == length);
-    float result = 0;
-
-    for (int i=0; i<length; i++) 
-    {
-        if (dist_a[i] != 0) 
-        {
-            if (dist_b[i] == 0) 
-            {
-                result += 1;
-            } 
-            else 
-            {
-                result += dist_a[i] * log(dist_a[i] / dist_b[i]);
-            }
-        }
-    }
-
-    return result;
-}
-
-int QuantizeData::threshold_distribution(const std::vector<float> &distribution, const int target_bin) 
-{
-    int target_threshold = target_bin;
-    float min_kl_divergence = 1000;
-    const int length = distribution.size();
-
-    std::vector<float> quantize_distribution(target_bin);
-
-    float threshold_sum = 0;
-    for (int threshold=target_bin; threshold<length; threshold++) 
-    {
-        threshold_sum += distribution[threshold];
-    }
-
-    for (int threshold=target_bin; threshold<length; threshold++) 
-    {
-
-        std::vector<float> t_distribution(distribution.begin(), distribution.begin()+threshold);
-        
-        t_distribution[threshold-1] += threshold_sum; 
-        threshold_sum -= distribution[threshold];
-
-        // get P
-        fill(quantize_distribution.begin(), quantize_distribution.end(), 0);
-        
-        const float num_per_bin = static_cast<float>(threshold) / target_bin;
-
-        for (int i=0; i<target_bin; i++) 
-        {
-            const float start = i * num_per_bin;
-            const float end = start + num_per_bin;
-
-            const int left_upper = ceil(start);
-            if (left_upper > start) 
-            {
-                const float left_scale = left_upper - start;
-                quantize_distribution[i] += left_scale * distribution[left_upper - 1];
-            }
-
-            const int right_lower = floor(end);
-
-            if (right_lower < end) 
-            {
-
-                const float right_scale = end - right_lower;
-                quantize_distribution[i] += right_scale * distribution[right_lower];
-            }
-
-            for (int j=left_upper; j<right_lower; j++) 
-            {
-                quantize_distribution[i] += distribution[j];
-            }
-        }
-
-        // get Q
-        std::vector<float> expand_distribution(threshold, 0);
-
-        for (int i=0; i<target_bin; i++) 
-        {
-            const float start = i * num_per_bin;
-            const float end = start + num_per_bin;
-
-            float count = 0;
-
-            const int left_upper = ceil(start);
-            float left_scale = 0;
-            if (left_upper > start) 
-            {
-                left_scale = left_upper - start;
-                if (distribution[left_upper - 1] != 0) 
-                {
-                    count += left_scale;
-                }
-            }
-
-            const int right_lower = floor(end);
-            float right_scale = 0;
-            if (right_lower < end) 
-            {
-                right_scale = end - right_lower;
-                if (distribution[right_lower] != 0) 
-                {
-                    count += right_scale;
-                }
-            }
-
-            for (int j=left_upper; j<right_lower; j++) 
-            {
-                if (distribution[j] != 0) 
-                {
-                    count++;
-                }
-            }
-
-            const float expand_value = quantize_distribution[i] / count;
-
-            if (left_upper > start) 
-            {
-                if (distribution[left_upper - 1] != 0) 
-                {
-                    expand_distribution[left_upper - 1] += expand_value * left_scale;
-                }
-            }
-            if (right_lower < end) 
-            {
-                if (distribution[right_lower] != 0) 
-                {
-                    expand_distribution[right_lower] += expand_value * right_scale;
-                }
-            }
-            for (int j=left_upper; j<right_lower; j++) 
-            {
-                if (distribution[j] != 0) 
-                {
-                    expand_distribution[j] += expand_value;
-                }
-            }
-        }
-
-        // kl
-        float kl_divergence = compute_kl_divergence(t_distribution, expand_distribution);
-
-        // the best num of bin
-        if (kl_divergence < min_kl_divergence) 
-        {
-            min_kl_divergence = kl_divergence;
-            target_threshold = threshold;
-        }
-    }
-
-    return target_threshold;
-}
-
-float QuantizeData::get_data_blob_scale()
-{   
-    normalize_histogram();
-    threshold_bin = threshold_distribution(histogram);
-    threshold = (threshold_bin + 0.5) * histogram_interval;
-    scale = 127 / threshold;
-    return scale;
-}
-
-struct PreParam
-{
-    float mean[3];
-    float norm[3];
-    int weith;
-    int height;
-    bool swapRB;
-};
-
-static int post_training_quantize(const std::vector<std::string> filenames, const char* param_path, const char* bin_path, const char* table_path, struct PreParam per_param)
-{
-    int size = filenames.size();
-
-    QuantNet net;
-    net.opt = g_default_option;
-
-    net.load_param(param_path);
-    net.load_model(bin_path);
-
-    float mean_vals[3], norm_vals[3];
-    int weith = per_param.weith;
-    int height = per_param.height;
-    bool swapRB = per_param.swapRB;
-
-    mean_vals[0] = per_param.mean[0];
-    mean_vals[1] = per_param.mean[1];
-    mean_vals[2] = per_param.mean[2];
-
-    norm_vals[0] = per_param.norm[0];
-    norm_vals[1] = per_param.norm[1];
-    norm_vals[2] = per_param.norm[2];
-
-    g_blob_pool_allocator.clear();
-    g_workspace_pool_allocator.clear();
-
-    net.get_input_names();
-    net.get_conv_names();
-    net.get_conv_bottom_blob_names();
-    net.get_conv_weight_blob_scales();
-
-    if (net.input_names.size() <= 0)
-    {
-        fprintf(stderr, "not found [Input] Layer, Check your ncnn.param \n");
-        return -1;
-    }
-    
-    FILE *fp=fopen(table_path, "w");
-
-    // save quantization scale of weight 
-    printf("====> Quantize the parameters.\n");    
-    for (size_t i=0; i<net.conv_names.size(); i++)
-    {
-        std::string layer_name = net.conv_names[i];
-        std::string blob_name = net.conv_bottom_blob_names[layer_name];
-        std::vector<float> weight_scale_n = net.weight_scales[layer_name];
-
-        fprintf(fp, "%s_param_0 ", layer_name.c_str());
-        for (size_t j=0; j<weight_scale_n.size(); j++)
-            fprintf(fp, "%f ", weight_scale_n[j]);
-        fprintf(fp, "\n");        
-    }
-
-    // initial quantization data
-    std::vector<QuantizeData> quantize_datas;
-    
-    for (size_t i=0; i<net.conv_names.size(); i++)
-    {
-        std::string layer_name = net.conv_names[i];
-
-        QuantizeData quantize_data(layer_name, 2048);
-        quantize_datas.push_back(quantize_data);
-    }    
-
-    // step 1 count the max value
-    printf("====> Quantize the activation.\n"); 
-    printf("    ====> step 1 : find the max value.\n");
-
-    for (size_t i=0; i<filenames.size(); i++)
-    {
-        std::string img_name = filenames[i];
-
-        if ((i+1)%100 == 0)
-            fprintf(stderr, "          %d/%d\n", (int)(i+1), (int)size);
-
-#if OpenCV_VERSION_MAJOR > 2
-        cv::Mat bgr = cv::imread(img_name, cv::IMREAD_COLOR);
-#else
-        cv::Mat bgr = cv::imread(img_name, CV_LOAD_IMAGE_COLOR);
-#endif
-        if (bgr.empty())
-        {
-            fprintf(stderr, "cv::imread %s failed\n", img_name.c_str());
-            return -1;
-        }
-
-        ncnn::Mat in = ncnn::Mat::from_pixels_resize(bgr.data, swapRB ? ncnn::Mat::PIXEL_BGR2RGB : ncnn::Mat::PIXEL_BGR, bgr.cols, bgr.rows, weith, height);
-        in.substract_mean_normalize(mean_vals, norm_vals);
-
-        ncnn::Extractor ex = net.create_extractor();
-        ex.input(net.input_names[0].c_str(), in);
-
-        for (size_t i=0; i<net.conv_names.size(); i++)
-        {
-            std::string layer_name = net.conv_names[i];
-            std::string blob_name = net.conv_bottom_blob_names[layer_name];
-
-            ncnn::Mat out;
-            ex.extract(blob_name.c_str(), out);  
-
-            for (size_t j=0; j<quantize_datas.size(); j++)
-            {
-                if (quantize_datas[j].name == layer_name)
-                {
-                    quantize_datas[j].initial_blob_max(out);
-                    break;
-                }
-            }
-        }         
-    }
-
-    // step 2 histogram_interval
-    printf("    ====> step 2 : generate the histogram_interval.\n");
-    for (size_t i=0; i<net.conv_names.size(); i++)
-    {
-        std::string layer_name = net.conv_names[i];
-
-        for (size_t j=0; j<quantize_datas.size(); j++)
-        {
-            if (quantize_datas[j].name == layer_name)
-            {
-                quantize_datas[j].initial_histogram_interval();
-
-                fprintf(stderr, "%-20s : max = %-15f interval = %-10f\n", quantize_datas[j].name.c_str(), quantize_datas[j].max_value, quantize_datas[j].histogram_interval);
-                break;
-            }
-        }
-    }    
-
-    // step 3 histogram
-    printf("    ====> step 3 : generate the histogram.\n");
-    for (size_t i=0; i<filenames.size(); i++)
-    {
-        std::string img_name = filenames[i];
-
-        if ((i+1)%100 == 0)
-            fprintf(stderr, "          %d/%d\n", (int)(i+1), (int)size);
-#if OpenCV_VERSION_MAJOR > 2
-        cv::Mat bgr = cv::imread(img_name, cv::IMREAD_COLOR);
-#else
-        cv::Mat bgr = cv::imread(img_name, CV_LOAD_IMAGE_COLOR);
-#endif
-        if (bgr.empty())
-        {
-            fprintf(stderr, "cv::imread %s failed\n", img_name.c_str());
-            return -1;
-        }  
-
-        ncnn::Mat in = ncnn::Mat::from_pixels_resize(bgr.data, swapRB ? ncnn::Mat::PIXEL_BGR2RGB : ncnn::Mat::PIXEL_BGR, bgr.cols, bgr.rows, weith, height);
-        in.substract_mean_normalize(mean_vals, norm_vals);
-      
-        ncnn::Extractor ex = net.create_extractor();
-        ex.input(net.input_names[0].c_str(), in);
-
-        for (size_t i=0; i<net.conv_names.size(); i++)
-        {
-            std::string layer_name = net.conv_names[i];
-            std::string blob_name = net.conv_bottom_blob_names[layer_name];
-
-            ncnn::Mat out;
-            ex.extract(blob_name.c_str(), out);  
-
-            for (size_t j=0; j<quantize_datas.size(); j++)
-            {
-                if (quantize_datas[j].name == layer_name)
-                {
-                    quantize_datas[j].update_histogram(out);
-                    break;
-                }
-            }
-        }     
-    }
-
-    // step4 kld
-    printf("    ====> step 4 : using kld to find the best threshold value.\n");
-    for (size_t i=0; i<net.conv_names.size(); i++)
-    {
-        std::string layer_name = net.conv_names[i];
-        std::string blob_name = net.conv_bottom_blob_names[layer_name];
-        fprintf(stderr, "%-20s ", layer_name.c_str());
-
-        for (size_t j=0; j<quantize_datas.size(); j++)
-        {
-            if (quantize_datas[j].name == layer_name)
-            {
-                quantize_datas[j].get_data_blob_scale();
-                fprintf(stderr, "bin : %-8d threshold : %-15f interval : %-10f scale : %-10f\n", \
-                                                                quantize_datas[j].threshold_bin, \
-                                                                quantize_datas[j].threshold, \
-                                                                quantize_datas[j].histogram_interval, \
-                                                                quantize_datas[j].scale);
-
-                fprintf(fp, "%s %f\n", layer_name.c_str(), quantize_datas[j].scale);
-
-                break;
-            }
-        }
-    }
-
-    fclose(fp);
-    printf("====> Save the calibration table done.\n");
-
-    return 0;
-}
-
-// usage
-void showUsage() 
-{
-    std::cout << "usage: ncnn2table [-h] [-p] [-b] [-o] [-m] [-n] [-s] [-t]" << std::endl;
-    std::cout << " -h, --help       show this help message and exit" << std::endl;
-    std::cout << " -p, --param      path to ncnn.param file" << std::endl;
-    std::cout << " -b, --bin        path to ncnn.bin file" << std::endl;
-    std::cout << " -i, --images     path to calibration images" << std::endl;
-    std::cout << " -o, --output     path to output calibration tbale file" << std::endl;
-    std::cout << " -m, --mean       value of mean" << std::endl;
-    std::cout << " -n, --norm       value of normalize(scale value,defualt is 1)" << std::endl;
-    std::cout << " -s, --size       the size of input image(using the resize the original image,default is w=224,h=224)" << std::endl;
-    std::cout << " -c  --swapRB     flag which indicates that swap first and last channels in 3-channel image is necessary" << std::endl;
-    std::cout << " -t, --thread     number of threads(defalut is 1)" << std::endl;    
-    std::cout << "example: ./ncnn2table --param squeezenet-fp32.param --bin squeezenet-fp32.bin --images images/ --output squeezenet.table --mean 104,117,123 --norm 1,1,1 --size 227,227 --swapRB --thread 2" << std::endl;
-}
-
-// string.split('x')
-std::vector<std::string> split(const std::string &str,const std::string &pattern)
-{
-    //const char* convert to char*
-    char * strc = new char[strlen(str.c_str())+1];
-    strcpy(strc, str.c_str());
-    std::vector<std::string> resultVec;
-    char* tmpStr = strtok(strc, pattern.c_str());
-    while (tmpStr != NULL)
-    {
-        resultVec.push_back(std::string(tmpStr));
-        tmpStr = strtok(NULL, pattern.c_str());
-    }
-
-    delete[] strc;
-
-    return resultVec;
-}
-
-int main(int argc, char** argv)
-{
-    std::cout << "--- ncnn post training quantization tool --- " << __TIME__ << " " << __DATE__ << std::endl;
-
-    char* imagepath = NULL;
-    char* parampath = NULL;
-    char* binpath = NULL;
-    char* tablepath = NULL;
-    int num_threads = 1;
-
-    struct PreParam pre_param = {
-        .mean = {104.f, 117.f, 103.f}, 
-        .norm = {1.f, 1.f, 1.f}, 
-        .weith = 224, 
-        .height =224,
-        .swapRB = false
-    };
-
-    int c;
-
-    while (1) 
-    {
-        int option_index = 0;
-        static struct option long_options[] = 
-        {
-            {"param",   required_argument, 0,  'p' },
-            {"bin",     required_argument, 0,  'b' },
-            {"images",  required_argument, 0,  'i' },
-            {"output",  required_argument, 0,  'o' },
-            {"mean",    required_argument, 0,  'm' },
-            {"norm",    required_argument, 0,  'n' },
-            {"size",    required_argument, 0,  's' },
-            {"swapRB",  no_argument,       0,  'c' },
-            {"thread",  required_argument, 0,  't' },
-            {"help",    no_argument,       0,  'h' },
-            {0,         0,                 0,  0 }
-        };
-
-        c = getopt_long(argc, argv, "p:b:i:o:m:n:s:ct:h", long_options, &option_index);
-        if (c == -1)
-            break;
-
-        switch (c) 
-        {
-        case 'p':
-            printf("param = '%s'\n", optarg);
-            parampath = optarg;
-            break;
-
-        case 'b':
-            printf("bin = '%s'\n", optarg);
-            binpath = optarg;
-            break;
-
-        case 'i':
-            printf("images = '%s'\n", optarg);
-            imagepath = optarg;
-            break;
-
-        case 'o':
-            printf("output = '%s'\n", optarg);
-            tablepath = optarg;
-            break;
-
-        case 'm':
-        {
-            printf("mean = '%s'\n", optarg);
-            std::string temp(optarg);
-            std::vector<std::string> array = split(temp, ",");
-            pre_param.mean[0] = atof(array[0].c_str());
-            pre_param.mean[1] = atof(array[1].c_str());
-            pre_param.mean[2] = atof(array[2].c_str());
-        }
-            break;
-
-        case 'n':
-        {
-            printf("norm = '%s'\n", optarg);
-            std::string temp(optarg);
-            std::vector<std::string> array = split(temp, ",");
-            pre_param.norm[0] = atof(array[0].c_str());
-            pre_param.norm[1] = atof(array[1].c_str());
-            pre_param.norm[2] = atof(array[2].c_str());
-        }
-            break;
-
-        case 's':
-        {
-            printf("size = '%s'\n", optarg);
-            std::string temp(optarg);
-            std::vector<std::string> array = split(temp, ",");
-            pre_param.weith = atoi(array[0].c_str());
-            pre_param.height = atoi(array[1].c_str());
-        }
-            break;                        
-
-        case 'c':
-        {
-            printf("swapRB = '%s'\n", "true");
-            pre_param.swapRB = true;
-        }
-            break;
-        case 't':
-            printf("thread = '%s'\n", optarg);
-            num_threads = atoi(optarg);
-            break;            
-
-        case 'h':
-        case '?':
-            showUsage();
-            return 0;
-
-        default:
-            showUsage();
-        }
-    }
-
-    // check the input param
-    if (imagepath == NULL || parampath == NULL || binpath == NULL || tablepath == NULL)
-    {
-        fprintf(stderr, "someone path maybe empty,please check it and try again.\n");
-        return 0;
-    }
-
-    g_blob_pool_allocator.set_size_compare_ratio(0.0f);
-    g_workspace_pool_allocator.set_size_compare_ratio(0.5f);
-
-    // default option
-    g_default_option.lightmode = true;
-    g_default_option.num_threads = num_threads;
-    g_default_option.blob_allocator = &g_blob_pool_allocator;
-    g_default_option.workspace_allocator = &g_workspace_pool_allocator;
-
-    g_default_option.use_winograd_convolution = true;
-    g_default_option.use_sgemm_convolution = true;
-    g_default_option.use_int8_inference = true;
-    g_default_option.use_fp16_packed = true;
-    g_default_option.use_fp16_storage = true;
-    g_default_option.use_fp16_arithmetic = true;
-    g_default_option.use_int8_storage = true;
-    g_default_option.use_int8_arithmetic = true;
-
-    ncnn::set_cpu_powersave(2);
-    ncnn::set_omp_dynamic(0);
-    ncnn::set_omp_num_threads(num_threads);  
-
-    std::vector<std::string> filenames;
-
-    // parse the image file.
-    parse_images_dir(imagepath, filenames);
-
-    // get the calibration table file, and save it.
-    int ret = post_training_quantize(filenames, parampath, binpath, tablepath, pre_param);
-    if (!ret)
-        fprintf(stderr, "\nNCNN Int8 Calibration table create success, best wish for your INT8 inference has a low accuracy loss...\\(^▽^)/...233...\n");
-
-    return 0;
-}
+// BUG1989 is pleased to support the open source community by supporting ncnn available.
+//
+// author:BUG1989 (https://github.com/BUG1989/) Long-term support.
+// author:JansonZhu (https://github.com/JansonZhu) Implemented the function of entropy calibration.
+//
+// Copyright (C) 2019 BUG1989. All rights reserved.
+//
+// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
+// in compliance with the License. You may obtain a copy of the License at
+//
+// https://opensource.org/licenses/BSD-3-Clause
+//
+// Unless required by applicable law or agreed to in writing, software distributed
+// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+#ifdef _MSC_VER
+#define _CRT_SECURE_NO_DEPRECATE
+#endif
+
+#include <cstdio>
+#include <cstring>
+#include <vector>
+#include <iostream>
+#include <cstdlib>
+#include <algorithm>
+#include <map>
+
+#include <opencv2/opencv.hpp>
+
+// ncnn public header
+#include "net.h"
+#include "cpu.h"
+#include "benchmark.h"
+
+// ncnn private header
+#include "layer/convolution.h"
+#include "layer/convolutiondepthwise.h"
+#include "layer/innerproduct.h"
+
+static ncnn::Option g_default_option;
+static ncnn::UnlockedPoolAllocator g_blob_pool_allocator;
+static ncnn::PoolAllocator g_workspace_pool_allocator;
+
+// Get the file names from direct path
+int parse_images_dir(const std::string& base_path, std::vector<std::string>& file_path)
+{
+    file_path.clear();
+
+    const cv::String base_path_str(base_path);
+    std::vector<cv::String> image_list;
+
+    cv::glob(base_path_str, image_list, true);
+
+    for (auto& image_path : image_list)
+    {
+        file_path.emplace_back(image_path);
+    }
+
+    return 0;
+}
+
+class QuantNet : public ncnn::Net
+{
+public:
+    int get_conv_names();
+    int get_conv_bottom_blob_names();
+    int get_conv_weight_blob_scales();
+    int get_input_names();
+
+public:
+    std::vector<std::string> conv_names;
+    std::map<std::string, std::string> conv_bottom_blob_names;
+    std::map<std::string, std::vector<float> > weight_scales;
+    std::vector<std::string> input_names;
+};
+
+int QuantNet::get_input_names()
+{
+    for (auto layer : layers)
+    {
+        if (layer->type == "Input")
+        {
+            for (int blob_index : layer->tops)
+            {
+                std::string name = blobs[blob_index].name;
+                input_names.push_back(name);
+            }
+        }
+    }
+
+    return 0;
+}
+
+int QuantNet::get_conv_names()
+{
+    for (size_t i = 0; i < layers.size(); i++)
+    {
+        const auto layer = layers[i];
+
+        if (layer->type == "Convolution" || layer->type == "ConvolutionDepthWise" || layer->type == "InnerProduct")
+        {
+            std::string name = layer->name;
+            conv_names.push_back(name);
+        }
+    }
+
+    return 0;
+}
+
+int QuantNet::get_conv_bottom_blob_names()
+{
+    // find conv bottom name or index
+    for (auto layer : layers)
+    {
+        if (layer->type == "Convolution" || layer->type == "ConvolutionDepthWise" || layer->type == "InnerProduct")
+        {
+            auto name = layer->name;
+            const auto bottom_blob_name = blobs[layer->bottoms[0]].name;
+            conv_bottom_blob_names[name] = bottom_blob_name;
+        }
+    }
+
+    return 0;
+}
+
+int QuantNet::get_conv_weight_blob_scales()
+{
+    for (auto layer : layers)
+    {
+        if (layer->type == "Convolution")
+        {
+            std::string name = layer->name;
+            const int weight_data_size_output = static_cast<ncnn::Convolution*>(layer)->weight_data_size / static_cast<ncnn::Convolution*>(layer)->num_output;
+            std::vector<float> scales;
+
+            // int8 winograd F43 needs weight data to use 6bit quantization
+            bool quant_6bit = false;
+            int kernel_w = static_cast<ncnn::Convolution*>(layer)->kernel_w;
+            int kernel_h = static_cast<ncnn::Convolution*>(layer)->kernel_h;
+            int dilation_w = static_cast<ncnn::Convolution*>(layer)->dilation_w;
+            int dilation_h = static_cast<ncnn::Convolution*>(layer)->dilation_h;
+            int stride_w = static_cast<ncnn::Convolution*>(layer)->stride_w;
+            int stride_h = static_cast<ncnn::Convolution*>(layer)->stride_h;
+
+            if (kernel_w == 3 && kernel_h == 3 && dilation_w == 1 && dilation_h == 1 && stride_w == 1 && stride_h == 1)
+                quant_6bit = true;
+
+            for (int n = 0; n < static_cast<ncnn::Convolution*>(layer)->num_output; n++)
+            {
+                const ncnn::Mat weight_data_n = static_cast<ncnn::Convolution*>(layer)->weight_data.range(weight_data_size_output * n, weight_data_size_output);
+                const float *data_n = weight_data_n;
+                float max_value = std::numeric_limits<float>::min();
+
+                for (int k = 0; k < weight_data_size_output; k++)
+                {
+                    max_value = std::max(max_value, std::fabs(data_n[k]));
+                }
+
+                if (quant_6bit)
+                {
+                    scales.push_back(31 / max_value);
+                }
+                else
+                {
+                    scales.push_back(127 / max_value);
+                }
+            }
+
+            weight_scales[name] = scales;
+        }
+
+        if (layer->type == "ConvolutionDepthWise")
+        {
+            std::string name = layer->name;
+            const int weight_data_size_output = static_cast<ncnn::ConvolutionDepthWise*>(layer)->weight_data_size / static_cast<ncnn::ConvolutionDepthWise*>(layer)->group;
+            std::vector<float> scales;
+
+            for (int n = 0; n < static_cast<ncnn::ConvolutionDepthWise*>(layer)->group; n++)
+            {
+                const ncnn::Mat weight_data_n = static_cast<ncnn::ConvolutionDepthWise*>(layer)->weight_data.range(weight_data_size_output * n, weight_data_size_output);
+                const float *data_n = weight_data_n;
+                float max_value = std::numeric_limits<float>::min();
+
+                for (int k = 0; k < weight_data_size_output; k++)
+                {
+                    max_value = std::max(max_value, std::fabs(data_n[k]));
+                }
+
+                scales.push_back(127 / max_value);
+            }
+
+            weight_scales[name] = scales;
+        }
+
+        if (layer->type == "InnerProduct")
+        {
+            std::string name = layer->name;
+            const int weight_data_size_output = static_cast<ncnn::InnerProduct*>(layer)->weight_data_size / static_cast<ncnn::InnerProduct*>(layer)->num_output;
+            std::vector<float> scales;
+
+            for (int n = 0; n < static_cast<ncnn::InnerProduct*>(layer)->num_output; n++)
+            {
+                const ncnn::Mat weight_data_n = static_cast<ncnn::InnerProduct*>(layer)->weight_data.range(weight_data_size_output * n, weight_data_size_output);
+                const float *data_n = weight_data_n;
+                float max_value = std::numeric_limits<float>::min();
+
+                for (int k = 0; k < weight_data_size_output; k++)
+                    max_value = std::max(max_value, std::fabs(data_n[k]));
+
+                scales.push_back(127 / max_value);
+            }
+
+            weight_scales[name] = scales;
+        }
+    }
+
+    return 0;
+}
+
+class QuantizeData
+{
+public:
+    QuantizeData(const std::string& layer_name, const int& num);
+
+    int initial_blob_max(ncnn::Mat data);
+    int initial_histogram_interval();
+    int initial_histogram_value();
+
+    int normalize_histogram();
+    int update_histogram(ncnn::Mat data);
+
+    float compute_kl_divergence(const std::vector<float> &dist_a, const std::vector<float> &dist_b) const;
+    int threshold_distribution(const std::vector<float> &distribution, const int target_bin = 128) const;
+    float get_data_blob_scale();
+
+public:
+    std::string name;
+
+    float max_value;
+    int num_bins;
+    float histogram_interval;
+    std::vector<float> histogram;
+
+    float threshold;
+    int threshold_bin;
+    float scale;
+};
+
+QuantizeData::QuantizeData(const std::string& layer_name, const int& num)
+{
+    name = layer_name;
+    max_value = 0.f;
+    num_bins = num;
+    histogram_interval = 0.f;
+    histogram.resize(num_bins);
+    initial_histogram_value();
+
+    threshold = 0.f;
+    threshold_bin = 0;
+    scale = 1.0f;
+}
+
+int QuantizeData::initial_blob_max(ncnn::Mat data)
+{
+    const int channel_num = data.c;
+    const int size = data.w * data.h;
+
+    for (int q = 0; q < channel_num; q++)
+    {
+        const float *data_n = data.channel(q);
+        for (int i = 0; i < size; i++)
+        {
+            max_value = std::max(max_value, std::fabs(data_n[i]));
+        }
+    }
+
+    return 0;
+}
+
+int QuantizeData::initial_histogram_interval()
+{
+    histogram_interval = max_value / static_cast<float>(num_bins);
+
+    return 0;
+}
+
+int QuantizeData::initial_histogram_value()
+{
+    for (float& i : histogram)
+    {
+        i = 0.00001f;
+    }
+
+    return 0;
+}
+
+int QuantizeData::normalize_histogram()
+{
+    const auto length = histogram.size();
+    float sum = 0;
+
+    for (size_t i = 0; i < length; i++)
+        sum += histogram[i];
+
+    for (size_t i = 0; i < length; i++)
+        histogram[i] /= sum;
+
+    return 0;
+}
+
+int QuantizeData::update_histogram(ncnn::Mat data)
+{
+    const int channel_num = data.c;
+    const int size = data.w * data.h;
+
+    for (int q = 0; q < channel_num; q++)
+    {
+        const float *data_n = data.channel(q);
+        for (int i = 0; i < size; i++)
+        {
+            if (data_n[i] == 0)
+                continue;
+
+            const int index = std::min(static_cast<int>(std::abs(data_n[i]) / histogram_interval), 2047);
+
+            histogram[index]++;
+        }
+    }
+
+    return 0;
+}
+
+float QuantizeData::compute_kl_divergence(const std::vector<float> &dist_a, const std::vector<float> &dist_b) const
+{
+    const auto length = dist_a.size();
+    assert(dist_b.size() == length);
+    float result = 0;
+
+    for (size_t i = 0; i < length; i++)
+    {
+        if (dist_a[i] != 0)
+        {
+            if (dist_b[i] == 0)
+            {
+                result += 1;
+            }
+            else
+            {
+                result += dist_a[i] * log(dist_a[i] / dist_b[i]);
+            }
+        }
+    }
+
+    return result;
+}
+
+int QuantizeData::threshold_distribution(const std::vector<float> &distribution, const int target_bin) const
+{
+    int target_threshold = target_bin;
+    float min_kl_divergence = 1000;
+    const int length = static_cast<int>(distribution.size());
+
+    std::vector<float> quantize_distribution(target_bin);
+
+    float threshold_sum = 0;
+    for (int threshold = target_bin; threshold < length; threshold++)
+    {
+        threshold_sum += distribution[threshold];
+    }
+
+    for (int threshold = target_bin; threshold < length; threshold++)
+    {
+
+        std::vector<float> t_distribution(distribution.begin(), distribution.begin() + threshold);
+
+        t_distribution[threshold - 1] += threshold_sum;
+        threshold_sum -= distribution[threshold];
+
+        // get P
+        fill(quantize_distribution.begin(), quantize_distribution.end(), 0.0f);
+
+        const auto num_per_bin = static_cast<float>(threshold) / static_cast<float>(target_bin);
+
+        for (int i = 0; i < target_bin; i++)
+        {
+            const auto start = static_cast<float>(i) * num_per_bin;
+            const auto end = start + num_per_bin;
+
+            const auto left_upper = static_cast<int>(ceil(start));
+            if (static_cast<float>(left_upper) > start)
+            {
+                const auto left_scale = static_cast<float>(left_upper) - start;
+                quantize_distribution[i] += left_scale * distribution[left_upper - 1];
+            }
+
+            const auto right_lower = static_cast<int>(floor(end));
+
+            if (static_cast<float>(right_lower) < end)
+            {
+
+                const auto right_scale = end - static_cast<float>(right_lower);
+                quantize_distribution[i] += right_scale * distribution[right_lower];
+            }
+
+            for (int j = left_upper; j < right_lower; j++)
+            {
+                quantize_distribution[i] += distribution[j];
+            }
+        }
+
+        // get Q
+        std::vector<float> expand_distribution(threshold, 0);
+
+        for (int i = 0; i < target_bin; i++)
+        {
+            const auto start = static_cast<float>(i) * num_per_bin;
+            const auto end = start + num_per_bin;
+
+            float count = 0;
+
+            const int left_upper = static_cast<int>(ceil(start));
+            float left_scale = 0;
+            if (static_cast<float>(left_upper) > start)
+            {
+                left_scale = static_cast<float>(left_upper) - start;
+                if (distribution[left_upper - 1] != 0)
+                {
+                    count += left_scale;
+                }
+            }
+
+            const int right_lower = static_cast<int>(floor(end));
+            float right_scale = 0;
+            if (static_cast<float>(right_lower) < end)
+            {
+                right_scale = end - static_cast<float>(right_lower);
+                if (distribution[right_lower] != 0)
+                {
+                    count += right_scale;
+                }
+            }
+
+            for (int j = left_upper; j < right_lower; j++)
+            {
+                if (distribution[j] != 0)
+                {
+                    count++;
+                }
+            }
+
+            const auto expand_value = quantize_distribution[i] / count;
+
+            if (static_cast<float>(left_upper) > start)
+            {
+                if (distribution[left_upper - 1] != 0)
+                {
+                    expand_distribution[left_upper - 1] += expand_value * left_scale;
+                }
+            }
+            if (static_cast<float>(right_lower) < end)
+            {
+                if (distribution[right_lower] != 0)
+                {
+                    expand_distribution[right_lower] += expand_value * right_scale;
+                }
+            }
+            for (int j = left_upper; j < right_lower; j++)
+            {
+                if (distribution[j] != 0)
+                {
+                    expand_distribution[j] += expand_value;
+                }
+            }
+        }
+
+        // kl
+        const float kl_divergence = compute_kl_divergence(t_distribution, expand_distribution);
+
+        // the best num of bin
+        if (kl_divergence < min_kl_divergence)
+        {
+            min_kl_divergence = kl_divergence;
+            target_threshold = threshold;
+        }
+    }
+
+    return target_threshold;
+}
+
+float QuantizeData::get_data_blob_scale()
+{
+    normalize_histogram();
+    threshold_bin = threshold_distribution(histogram);
+    threshold = (static_cast<float>(threshold_bin) + 0.5f) * histogram_interval;
+    scale = 127 / threshold;
+    return scale;
+}
+
+struct PreParam
+{
+    float mean[3];
+    float norm[3];
+    int width;
+    int height;
+    bool swapRB;
+};
+
+static int post_training_quantize(const std::vector<std::string>& image_list, const std::string& param_path, const std::string& bin_path, const std::string& table_path, struct PreParam& per_param)
+{
+    auto size = image_list.size();
+
+    QuantNet net;
+    net.opt = g_default_option;
+
+    net.load_param(param_path.c_str());
+    net.load_model(bin_path.c_str());
+
+    float mean_vals[3];
+    float norm_vals[3];
+
+    int width = per_param.width;
+    int height = per_param.height;
+    bool swapRB = per_param.swapRB;
+
+    mean_vals[0] = per_param.mean[0];
+    mean_vals[1] = per_param.mean[1];
+    mean_vals[2] = per_param.mean[2];
+
+    norm_vals[0] = per_param.norm[0];
+    norm_vals[1] = per_param.norm[1];
+    norm_vals[2] = per_param.norm[2];
+
+    g_blob_pool_allocator.clear();
+    g_workspace_pool_allocator.clear();
+
+    net.get_input_names();
+    net.get_conv_names();
+    net.get_conv_bottom_blob_names();
+    net.get_conv_weight_blob_scales();
+
+    if (net.input_names.empty())
+    {
+        fprintf(stderr, "not found [Input] Layer, Check your ncnn.param \n");
+        return -1;
+    }
+
+    FILE *fp = fopen(table_path.c_str(), "w");
+
+    // save quantization scale of weight 
+    printf("====> Quantize the parameters.\n");
+    for (size_t i = 0; i < net.conv_names.size(); i++)
+    {
+        std::string layer_name = net.conv_names[i];
+        std::string blob_name = net.conv_bottom_blob_names[layer_name];
+        std::vector<float> weight_scale_n = net.weight_scales[layer_name];
+
+        fprintf(fp, "%s_param_0 ", layer_name.c_str());
+        for (float j : weight_scale_n)
+        {
+            fprintf(fp, "%f ", j);
+        }
+        fprintf(fp, "\n");
+    }
+
+    // initial quantization data
+    std::vector<QuantizeData> quantize_datas;
+
+    for (size_t i = 0; i < net.conv_names.size(); i++)
+    {
+        std::string layer_name = net.conv_names[i];
+
+        QuantizeData quantize_data(layer_name, 2048);
+        quantize_datas.push_back(quantize_data);
+    }
+
+    // step 1 count the max value
+    printf("====> Quantize the activation.\n");
+    printf("    ====> step 1 : find the max value.\n");
+
+    for (size_t i = 0; i < image_list.size(); i++)
+    {
+        std::string img_name = image_list[i];
+
+        if ((i + 1) % 100 == 0)
+        {
+            fprintf(stderr, "          %d/%d\n", static_cast<int>(i + 1), static_cast<int>(size));
+        }
+
+#if OpenCV_VERSION_MAJOR > 2
+        cv::Mat bgr = cv::imread(img_name, cv::IMREAD_COLOR);
+#else
+        cv::Mat bgr = cv::imread(img_name, CV_LOAD_IMAGE_COLOR);
+#endif
+        if (bgr.empty())
+        {
+            fprintf(stderr, "cv::imread %s failed\n", img_name.c_str());
+            return -1;
+        }
+
+        ncnn::Mat in = ncnn::Mat::from_pixels_resize(bgr.data, swapRB ? ncnn::Mat::PIXEL_BGR2RGB : ncnn::Mat::PIXEL_BGR, bgr.cols, bgr.rows, width, height);
+        in.substract_mean_normalize(mean_vals, norm_vals);
+
+        ncnn::Extractor ex = net.create_extractor();
+        ex.input(net.input_names[0].c_str(), in);
+
+        for (size_t j = 0; j < net.conv_names.size(); j++)
+        {
+            std::string layer_name = net.conv_names[j];
+            std::string blob_name = net.conv_bottom_blob_names[layer_name];
+
+            ncnn::Mat out;
+            ex.extract(blob_name.c_str(), out);
+
+            for (auto& quantize_data : quantize_datas)
+            {
+                if (quantize_data.name == layer_name)
+                {
+                    quantize_data.initial_blob_max(out);
+                    break;
+                }
+            }
+        }
+    }
+
+    // step 2 histogram_interval
+    printf("    ====> step 2 : generate the histogram_interval.\n");
+    for (size_t i = 0; i < net.conv_names.size(); i++)
+    {
+        std::string layer_name = net.conv_names[i];
+
+        for (auto& quantize_data : quantize_datas)
+        {
+            if (quantize_data.name == layer_name)
+            {
+                quantize_data.initial_histogram_interval();
+
+                fprintf(stderr, "%-20s : max = %-15f interval = %-10f\n", quantize_data.name.c_str(), quantize_data.max_value, quantize_data.histogram_interval);
+                break;
+            }
+        }
+    }
+
+    // step 3 histogram
+    printf("    ====> step 3 : generate the histogram.\n");
+    for (size_t i = 0; i < image_list.size(); i++)
+    {
+        std::string img_name = image_list[i];
+
+        if ((i + 1) % 100 == 0)
+            fprintf(stderr, "          %d/%d\n", (int)(i + 1), (int)size);
+#if OpenCV_VERSION_MAJOR > 2
+        cv::Mat bgr = cv::imread(img_name, cv::IMREAD_COLOR);
+#else
+        cv::Mat bgr = cv::imread(img_name, CV_LOAD_IMAGE_COLOR);
+#endif
+        if (bgr.empty())
+        {
+            fprintf(stderr, "cv::imread %s failed\n", img_name.c_str());
+            return -1;
+        }
+
+        ncnn::Mat in = ncnn::Mat::from_pixels_resize(bgr.data, swapRB ? ncnn::Mat::PIXEL_BGR2RGB : ncnn::Mat::PIXEL_BGR, bgr.cols, bgr.rows, width, height);
+        in.substract_mean_normalize(mean_vals, norm_vals);
+
+        ncnn::Extractor ex = net.create_extractor();
+        ex.input(net.input_names[0].c_str(), in);
+
+        for (size_t k = 0; k < net.conv_names.size(); k++)
+        {
+            std::string layer_name = net.conv_names[k];
+            std::string blob_name = net.conv_bottom_blob_names[layer_name];
+
+            ncnn::Mat out;
+            ex.extract(blob_name.c_str(), out);
+
+            for (auto& quantize_data : quantize_datas)
+            {
+                if (quantize_data.name == layer_name)
+                {
+                    quantize_data.update_histogram(out);
+                    break;
+                }
+            }
+        }
+    }
+
+    // step4 kld
+    printf("    ====> step 4 : using kld to find the best threshold value.\n");
+    for (size_t i = 0; i < net.conv_names.size(); i++)
+    {
+        std::string layer_name = net.conv_names[i];
+        std::string blob_name = net.conv_bottom_blob_names[layer_name];
+        fprintf(stderr, "%-20s ", layer_name.c_str());
+
+        for (auto& quantize_data : quantize_datas)
+        {
+            if (quantize_data.name == layer_name)
+            {
+                quantize_data.get_data_blob_scale();
+                fprintf(stderr, "bin : %-8d threshold : %-15f interval : %-10f scale : %-10f\n",
+                    quantize_data.threshold_bin,
+                    quantize_data.threshold,
+                    quantize_data.histogram_interval,
+                    quantize_data.scale);
+
+                fprintf(fp, "%s %f\n", layer_name.c_str(), quantize_data.scale);
+
+                break;
+            }
+        }
+    }
+
+    fclose(fp);
+    printf("====> Save the calibration table done.\n");
+
+    return 0;
+}
+
+// usage
+void showUsage()
+{
+    std::cout << "example: ./ncnn2table --param=squeezenet-fp32.param --bin=squeezenet-fp32.bin --images=images/ --output=squeezenet.table --mean=104,117,123 --norm=1,1,1 --size=227,227 --swapRB --thread=2" << std::endl;
+}
+
+int main(int argc, char** argv)
+{
+    std::cout << "--- ncnn post training quantization tool --- " << __TIME__ << " " << __DATE__ << std::endl;
+
+    const cv::CommandLineParser parser(argc, argv,
+        {
+
+            "{help h usage ? |   | print this message }"
+            "{param p        |   | path to ncnn.param file }"
+            "{bin b          |   | path to ncnn.bin file }"
+            "{images i       |   | path to calibration images }"
+            "{output o       |   | path to output calibration table file }"
+            "{mean m         |   | value of mean }"
+            "{norm n         |   | value of normalize(scale value,default is 1 }"
+            "{size s         |   | the size of input image(using the resize the original image,default is w=224,h=224) }"
+            "{swapRB c       |   | flag which indicates that swap first and last channels in 3-channel image is necessary }"
+            "{thread t       | 4 | count of processing threads }"
+        });
+
+    if (parser.has("help"))
+    {
+        parser.printMessage();
+        showUsage();
+        return 0;
+    }
+
+    if (!parser.has("param") || !parser.has("bin") || !parser.has("images") || !parser.has("output") || !parser.has("mean") || !parser.has("norm"))
+    {
+        std::cout << "Inputs is does not include all needed param, pleas check..." << std::endl;
+        parser.printMessage();
+        showUsage();
+        return 0;
+    }
+
+    const std::string image_folder_path = parser.get<cv::String>("images");
+    const std::string ncnn_param_file_path = parser.get<cv::String>("param");
+    const std::string ncnn_bin_file_path = parser.get<cv::String>("bin");
+    const std::string saved_table_file_path = parser.get<cv::String>("output");
+
+    // check the input param
+    if (image_folder_path.empty() || ncnn_param_file_path.empty() || ncnn_bin_file_path.empty() || saved_table_file_path.empty())
+    {
+        fprintf(stderr, "One or more path may be empty, please check and try again.\n");
+        return 0;
+    }
+
+    const auto num_threads = parser.get<int>("thread");
+
+    struct PreParam pre_param {
+        {104.f, 117.f, 103.f},
+        { 1.f, 1.f, 1.f },
+            224,
+            224,
+            false
+    };
+
+    const auto find_all_value_in_string = [](const std::string& values_string, std::vector<float>& value)
+    {
+        std::vector<int> masks_pos;
+
+        for (size_t i = 0; i < values_string.size(); i++)
+        {
+            if (',' == values_string[i])
+            {
+                masks_pos.push_back(static_cast<int>(i));
+            }
+        }
+
+        // check
+        if (masks_pos.empty())
+        {
+            fprintf(stderr, "ERROR: Cannot find any ',' in string, please check.\n");
+            return -1;
+        }
+
+        if (2 != masks_pos.size())
+        {
+            fprintf(stderr, "ERROR: Char ',' in fist of string, please check.\n");
+            return -1;
+        }
+
+        if (masks_pos.front() == 0)
+        {
+            fprintf(stderr, "ERROR: Char ',' in fist of string, please check.\n");
+            return -1;
+        }
+
+        if (masks_pos.back() == 0)
+        {
+            fprintf(stderr, "ERROR: Char ',' in last of string, please check.\n");
+            return -1;
+        }
+
+        for (size_t i = 0; i < masks_pos.size(); i++)
+        {
+            if (i > 0)
+            {
+                if (!(masks_pos[i] - masks_pos[i - 1] > 1))
+                {
+                    fprintf(stderr, "ERROR: Neighbouring char ',' was found.\n");
+                    return -1;
+                }
+            }
+        }
+
+        const cv::String ch0_val_str = values_string.substr(0, masks_pos[0]);
+        const cv::String ch1_val_str = values_string.substr(masks_pos[0] + 1, masks_pos[1] - masks_pos[0] - 1);
+        const cv::String ch2_val_str = values_string.substr(masks_pos[1] + 1, values_string.size() - masks_pos[1] - 1);
+
+        value.emplace_back(static_cast<float>(std::atof(std::string(ch0_val_str).c_str())));
+        value.emplace_back(static_cast<float>(std::atof(std::string(ch1_val_str).c_str())));
+        value.emplace_back(static_cast<float>(std::atof(std::string(ch2_val_str).c_str())));
+
+        return 0;
+    };
+
+    if (parser.has("mean"))
+    {
+        const std::string mean_str = parser.get<std::string>("mean");
+
+        std::vector<float> mean_values;
+        const auto ret = find_all_value_in_string(mean_str, mean_values);
+        if (0 != ret && 3 != mean_values.size())
+        {
+            fprintf(stderr, "ERROR: Searching mean value from --mean was failed.\n");
+
+            return -1;
+        }
+
+        pre_param.mean[0] = mean_values[0];
+        pre_param.mean[1] = mean_values[1];
+        pre_param.mean[2] = mean_values[2];
+    }
+
+    if (parser.has("norm"))
+    {
+        const std::string norm_str = parser.get<std::string>("norm");
+
+        std::vector<float> norm_values;
+        const auto ret = find_all_value_in_string(norm_str, norm_values);
+        if (0 != ret && 3 != norm_values.size())
+        {
+            fprintf(stderr, "ERROR: Searching mean value from --mean was failed, please check --mean param.\n");
+
+            return -1;
+        }
+
+        pre_param.norm[0] = norm_values[0];
+        pre_param.norm[1] = norm_values[1];
+        pre_param.norm[2] = norm_values[2];
+    }
+
+    if (parser.has("size"))
+    {
+        cv::String size_str = parser.get<std::string>("size");
+
+        auto sep_pos = size_str.find_first_of(',');
+
+        if (cv::String::npos != sep_pos && sep_pos < size_str.size())
+        {
+            cv::String width_value_str;
+            cv::String height_value_str;
+
+            width_value_str = size_str.substr(0, sep_pos);
+            height_value_str = size_str.substr(sep_pos + 1, size_str.size() - sep_pos - 1);
+
+            pre_param.width = static_cast<int>(std::atoi(std::string(width_value_str).c_str()));
+            pre_param.height = static_cast<int>(std::atoi(std::string(height_value_str).c_str()));
+        }
+        else
+        {
+            fprintf(stderr, "ERROR: Searching size value from --size was failed, please check --size param.\n");
+
+            return -1;
+        }
+    }
+
+    if (parser.has("swapRB"))
+    {
+        pre_param.swapRB = true;
+    }
+
+    g_blob_pool_allocator.set_size_compare_ratio(0.0f);
+    g_workspace_pool_allocator.set_size_compare_ratio(0.5f);
+
+    // default option
+    g_default_option.lightmode = true;
+    g_default_option.num_threads = num_threads;
+    g_default_option.blob_allocator = &g_blob_pool_allocator;
+    g_default_option.workspace_allocator = &g_workspace_pool_allocator;
+
+    g_default_option.use_winograd_convolution = true;
+    g_default_option.use_sgemm_convolution = true;
+    g_default_option.use_int8_inference = true;
+    g_default_option.use_fp16_packed = true;
+    g_default_option.use_fp16_storage = true;
+    g_default_option.use_fp16_arithmetic = true;
+    g_default_option.use_int8_storage = true;
+    g_default_option.use_int8_arithmetic = true;
+
+    ncnn::set_cpu_powersave(2);
+    ncnn::set_omp_dynamic(0);
+    ncnn::set_omp_num_threads(num_threads);
+
+    std::vector<std::string> image_file_path_list;
+
+    // parse the image file.
+    parse_images_dir(image_folder_path, image_file_path_list);
+
+    // get the calibration table file, and save it.
+    const auto ret = post_training_quantize(image_file_path_list, ncnn_param_file_path, ncnn_bin_file_path, saved_table_file_path, pre_param);
+    if (!ret)
+    {
+        fprintf(stderr, "\nNCNN Int8 Calibration table create success, best wish for your INT8 inference has a low accuracy loss...\\(^0^)/...233...\n");
+    }
+
+    return 0;
+}