Browse Source

fix quantize tools cross platform issues (#1505)

* 1. for cross platform, use cv::CommandLineParser instead of getopt.h
2. use cv::glob to search folder instead of dirent.h
3. fix some other warnings of function from cstdlib
4. add some const and some auto

* 1. fix some other warnings of function from cstdlib
2. add some const and some auto
3. i forgot...

* switch quantize tools default on

* check OpenCV first, if not found, disable quantize tools

* add _CRT_SECURE_NO_DEPRECATE for msvc and remove *_s functions

* add _CRT_SECURE_NO_DEPRECATE for msvc and remove *_s functions

* keep the one line style block

* folding bracket

* dynamic_cast -> static_cast
tags/20200226
kalcohol nihui 6 years ago
parent
commit
6916f45237
3 changed files with 1039 additions and 994 deletions
  1. +11
    -1
      tools/CMakeLists.txt
  2. +84
    -81
      tools/quantize/ncnn2int8.cpp
  3. +944
    -912
      tools/quantize/ncnn2table.cpp

+ 11
- 1
tools/CMakeLists.txt View File

@@ -2,7 +2,17 @@
add_subdirectory(caffe)
add_subdirectory(mxnet)
add_subdirectory(onnx)
# add_subdirectory(quantize)
find_package(OpenCV QUIET COMPONENTS core highgui imgproc imgcodecs)
if(NOT OpenCV_FOUND)
find_package(OpenCV QUIET COMPONENTS core highgui imgproc)
endif()
if(OpenCV_FOUND)
add_subdirectory(quantize)
else()
message(WARNING "OpenCV not found, quantize tools won't be built")
endif()
add_executable(ncnn2mem ncnn2mem.cpp)


+ 84
- 81
tools/quantize/ncnn2int8.cpp View File

@@ -12,18 +12,15 @@
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.

#include <stdio.h>
#include <string.h>
#include <limits.h>
#include <math.h>
#include <assert.h>
#ifdef _MSC_VER
#define _CRT_SECURE_NO_DEPRECATE
#endif

#include <fstream>
#include <cstdio>
#include <cstring>
#include <vector>
#include <set>
#include <limits>
#include <map>
#include <algorithm>

// ncnn public header
#include "net.h"
@@ -90,57 +87,57 @@ static bool read_int8scale_table(const char* filepath, std::map<std::string, std
FILE* fp = fopen(filepath, "rb");
if (!fp)
{
fprintf(stderr, "fopen %s failed\n", filepath);
fprintf(stderr, "Open %s failed.\n", filepath);
return false;
}

bool in_scale_vector = false;

std::string keystr;
std::string key_str;
std::vector<float> scales;

char *line = NULL;
char *pch = NULL;
std::vector<char>line(102400);
char *pch = nullptr;
size_t len = 0;
ssize_t read;

while ((read = getline(&line, &len, fp)) != -1)
while (nullptr != std::fgets(line.data(), static_cast<int>(line.size()), fp))
{

float scale = 1.f;
char key[256];
line[strcspn(line, "\r\n")] = 0;
pch = strtok (line, " ");
if (pch == NULL) break;
line[strcspn(line.data(), "\r\n")] = 0;
pch = strtok(line.data(), " ");

bool iskey = 1;
while (pch != NULL)
if (pch == nullptr) break;

bool is_key = true;
while (pch != nullptr)
{
if (iskey)
if (is_key)
{
sscanf(pch, "%255s", key);
keystr = key;
iskey = 0;

key_str = key;
is_key = false;
}
else
{
sscanf(pch, "%f", &scale);

scales.push_back(scale);
}

pch = strtok (NULL, " ");
pch = strtok(nullptr, " ");
}

// XYZ_param_N pattern
if (strstr(keystr.c_str(), "_param_"))
if (strstr(key_str.c_str(), "_param_"))
{
weight_int8scale_table[ keystr ] = scales;
weight_int8scale_table[key_str] = scales;
}
else
{
blob_int8scale_table[ keystr ] = scales;
blob_int8scale_table[key_str] = scales;
}
keystr.clear();
key_str.clear();
scales.clear();
}

@@ -155,7 +152,7 @@ public:
// 0=fp32 1=fp16 2=int8
int storage_type;
std::map<std::string, std::vector<float> > blob_int8scale_table;
std::map<std::string, std::vector<float> > weight_int8scale_table;
std::map<std::string, std::vector<float> > weight_int8scale_table;

public:
int quantize_convolution();
@@ -174,29 +171,30 @@ public:

int NetQuantize::quantize_convolution()
{
const int layer_count = layers.size();
for (int i=0; i<layer_count; i++)
const int layer_count = static_cast<int>(layers.size());
for (int i = 0; i < layer_count; i++)
{
// find convoultion layer
if (layers[i]->type != "Convolution")
continue;

// find convolution layer
std::map<std::string, std::vector<float> >::iterator iter_data = blob_int8scale_table.find(layers[i]->name);
auto iter_data = blob_int8scale_table.find(layers[i]->name);
if (iter_data == blob_int8scale_table.end())
continue;

char key[256];
sprintf(key, "%s_param_0", layers[i]->name.c_str());
std::map<std::string, std::vector<float> >::iterator iter = weight_int8scale_table.find(key);

auto iter = weight_int8scale_table.find(key);
if (iter == weight_int8scale_table.end())
{
fprintf(stderr, "this layer need to be quantized, but no scale param!\n");
return -1;
}
// Convolution - quantize weight from fp32 to int8
ncnn::Convolution* convolution = (ncnn::Convolution*)layers[i];
auto convolution = (ncnn::Convolution*)layers[i];

std::vector<float> weight_data_int8_scales = iter->second;

@@ -210,7 +208,7 @@ int NetQuantize::quantize_convolution()
const int weight_data_size_output = convolution->weight_data_size / convolution->num_output;

// quantize weight to int8
for (int n=0; n<convolution->num_output; n++)
for (int n = 0; n < convolution->num_output; n++)
{
ncnn::Layer* op = ncnn::create_layer(ncnn::LayerType::Quantize);

@@ -240,29 +238,30 @@ int NetQuantize::quantize_convolution()

int NetQuantize::quantize_convolutiondepthwise()
{
const int layer_count = layers.size();
for (int i=0; i<layer_count; i++)
const int layer_count = static_cast<int>(layers.size());
for (int i = 0; i < layer_count; i++)
{
// find convoultion layer
if (layers[i]->type != "ConvolutionDepthWise")
continue;

// find convolutiondepthwise layer
std::map<std::string, std::vector<float> >::iterator iter_data = blob_int8scale_table.find(layers[i]->name);
auto iter_data = blob_int8scale_table.find(layers[i]->name);
if (iter_data == blob_int8scale_table.end())
continue;

char key[256];
sprintf(key, "%s_param_0", layers[i]->name.c_str());
std::map<std::string, std::vector<float> >::iterator iter = weight_int8scale_table.find(key);

auto iter = weight_int8scale_table.find(key);
if (iter == weight_int8scale_table.end())
{
fprintf(stderr, "this layer need to be quantized, but no scale param!\n");
return -1;
}
// Convolution - quantize weight from fp32 to int8
ncnn::ConvolutionDepthWise* convdw = (ncnn::ConvolutionDepthWise*)layers[i];
auto convdw = (ncnn::ConvolutionDepthWise*)layers[i];

std::vector<float> weight_data_int8_scales = iter->second;

@@ -276,7 +275,7 @@ int NetQuantize::quantize_convolutiondepthwise()
const int weight_data_size_output = convdw->weight_data_size / convdw->group;

// quantize weight to int8
for (int n=0; n<convdw->group; n++)
for (int n = 0; n < convdw->group; n++)
{
ncnn::Layer* op = ncnn::create_layer(ncnn::LayerType::Quantize);

@@ -306,29 +305,30 @@ int NetQuantize::quantize_convolutiondepthwise()

int NetQuantize::quantize_innerproduct()
{
const int layer_count = layers.size();
for (int i=0; i<layer_count; i++)
const int layer_count = static_cast<int>(layers.size());
for (int i = 0; i < layer_count; i++)
{
// find convoultion layer
if (layers[i]->type != "InnerProduct")
continue;

// find InnerProduct layer
std::map<std::string, std::vector<float> >::iterator iter_data = blob_int8scale_table.find(layers[i]->name);
auto iter_data = blob_int8scale_table.find(layers[i]->name);
if (iter_data == blob_int8scale_table.end())
continue;

char key[256];
sprintf(key, "%s_param_0", layers[i]->name.c_str());
std::map<std::string, std::vector<float> >::iterator iter = weight_int8scale_table.find(key);

auto iter = weight_int8scale_table.find(key);
if (iter == weight_int8scale_table.end())
{
fprintf(stderr, "this layer need to be quantized, but no scale param!\n");
return -1;
}
// InnerProduct - quantize weight from fp32 to int8
ncnn::InnerProduct* fc = (ncnn::InnerProduct*)layers[i];
auto fc = (ncnn::InnerProduct*)layers[i];

std::vector<float> weight_data_int8_scales = iter->second;

@@ -342,7 +342,7 @@ int NetQuantize::quantize_innerproduct()
const int weight_data_size_output = fc->weight_data_size / fc->num_output;

// quantize weight to int8
for (int n=0; n<fc->num_output; n++)
for (int n = 0; n < fc->num_output; n++)
{
ncnn::Layer* op = ncnn::create_layer(ncnn::LayerType::Quantize);

@@ -376,7 +376,7 @@ int NetQuantize::fprintf_param_int_array(int id, const ncnn::Mat& m, FILE* pp)
const int* ptr = m;

fprintf(pp, " -%d=%d", 23300 + id, count);
for (int i=0; i<count; i++)
for (int i = 0; i < count; i++)
{
fprintf(pp, ",%d", ptr[i]);
}
@@ -390,7 +390,7 @@ int NetQuantize::fprintf_param_float_array(int id, const ncnn::Mat& m, FILE* pp)
const float* ptr = m;

fprintf(pp, " -%d=%d", 23300 + id, count);
for (int i=0; i<count; i++)
for (int i = 0; i < count; i++)
{
fprintf(pp, ",%f", ptr[i]);
}
@@ -400,7 +400,7 @@ int NetQuantize::fprintf_param_float_array(int id, const ncnn::Mat& m, FILE* pp)

static inline size_t alignSize(size_t sz, int n)
{
return (sz + n-1) & -n;
return (sz + n - 1) & -n;
}

int NetQuantize::fwrite_weight_tag_data(int tag, const ncnn::Mat& data, FILE* bp)
@@ -417,8 +417,8 @@ int NetQuantize::fwrite_weight_tag_data(int tag, const ncnn::Mat& data, FILE* bp

// padding to 32bit align
int nwrite = ftell(bp) - p0;
int nalign = alignSize(nwrite, 4);
unsigned char padding[4] = {0x00, 0x00, 0x00, 0x00};
int nalign = static_cast<int>(alignSize(nwrite, 4));
unsigned char padding[4] = { 0x00, 0x00, 0x00, 0x00 };
fwrite(padding, sizeof(unsigned char), nalign - nwrite, bp);

return 0;
@@ -433,8 +433,8 @@ int NetQuantize::fwrite_weight_data(const ncnn::Mat& data, FILE* bp)

// padding to 32bit align
int nwrite = ftell(bp) - p0;
int nalign = alignSize(nwrite, 4);
unsigned char padding[4] = {0x00, 0x00, 0x00, 0x00};
int nalign = static_cast<int>(alignSize(nwrite, 4));
unsigned char padding[4] = { 0x00, 0x00, 0x00, 0x00 };
fwrite(padding, sizeof(unsigned char), nalign - nwrite, bp);

return 0;
@@ -447,11 +447,11 @@ int NetQuantize::save(const char* parampath, const char* binpath)

fprintf(pp, "7767517\n");

const int layer_count = layers.size();
const int layer_count = static_cast<int>(layers.size());

int layer_count_fused = 0;
std::set<std::string> blob_names;
for (int i=0; i<layer_count; i++)
for (int i = 0; i < layer_count; i++)
{
const ncnn::Layer* layer = layers[i];
if (layer->type == "ncnnfused")
@@ -459,42 +459,42 @@ int NetQuantize::save(const char* parampath, const char* binpath)

layer_count_fused++;

int bottom_count = layer->bottoms.size();
for (int j=0; j<bottom_count; j++)
int bottom_count = static_cast<int>(layer->bottoms.size());
for (int j = 0; j < bottom_count; j++)
{
int bottom_blob_index = layer->bottoms[j];
blob_names.insert(blobs[bottom_blob_index].name);
}

int top_count = layer->tops.size();
for (int j=0; j<top_count; j++)
int top_count = static_cast<int>(layer->tops.size());
for (int j = 0; j < top_count; j++)
{
int top_blob_index = layer->tops[j];
blob_names.insert(blobs[top_blob_index].name);
}
}

int blob_count_fused = blob_names.size();
int blob_count_fused = static_cast<int>(blob_names.size());

fprintf(pp, "%d %d\n", layer_count_fused, blob_count_fused);

for (int i=0; i<layer_count; i++)
for (int i = 0; i < layer_count; i++)
{
const ncnn::Layer* layer = layers[i];
if (layer->type == "ncnnfused")
continue;

int bottom_count = layer->bottoms.size();
int top_count = layer->tops.size();
int bottom_count = static_cast<int>(layer->bottoms.size());
int top_count = static_cast<int>(layer->tops.size());

fprintf(pp, "%-24s %-24s %d %d", layer->type.c_str(), layer->name.c_str(), bottom_count, top_count);

for (int j=0; j<bottom_count; j++)
for (int j = 0; j < bottom_count; j++)
{
int bottom_blob_index = layer->bottoms[j];
fprintf(pp, " %s", blobs[bottom_blob_index].name.c_str());
}
for (int j=0; j<top_count; j++)
for (int j = 0; j < top_count; j++)
{
int top_blob_index = layer->tops[j];
fprintf(pp, " %s", blobs[top_blob_index].name.c_str());
@@ -581,12 +581,13 @@ int NetQuantize::save(const char* parampath, const char* binpath)

// write int8_scale data
if (op->int8_scale_term)
{
{
std::vector<float> weight_int8scale;
std::vector<float> blob_int8scale;

char key[256];
sprintf(key, "%s_param_0", layer->name.c_str());
sprintf(key, "%s_param_0", layers[i]->name.c_str());

if (weight_int8scale_table.find(std::string(key)) != weight_int8scale_table.end())
{
weight_int8scale = weight_int8scale_table[std::string(key)];
@@ -630,12 +631,13 @@ int NetQuantize::save(const char* parampath, const char* binpath)

// write int8_scale data
if (op->int8_scale_term)
{
{
std::vector<float> weight_int8scale;
std::vector<float> blob_int8scale;

char key[256];
sprintf(key, "%s_param_0", layer->name.c_str());
sprintf(key, "%s_param_0", layers[i]->name.c_str());

if (weight_int8scale_table.find(std::string(key)) != weight_int8scale_table.end())
{
weight_int8scale = weight_int8scale_table[std::string(key)];
@@ -649,7 +651,7 @@ int NetQuantize::save(const char* parampath, const char* binpath)
// write int8_scale data
fwrite(weight_int8scale.data(), sizeof(float), weight_int8scale.size(), bp);
fwrite(blob_int8scale.data(), sizeof(float), blob_int8scale.size(), bp);
}
}
}
else if (layer->type == "Crop")
{
@@ -781,12 +783,13 @@ int NetQuantize::save(const char* parampath, const char* binpath)

// write int8_scale data
if (op->int8_scale_term)
{
{
std::vector<float> weight_int8scale;
std::vector<float> blob_int8scale;

char key[256];
sprintf(key, "%s_param_0", layer->name.c_str());
sprintf(key, "%s_param_0", layers[i]->name.c_str());

if (weight_int8scale_table.find(std::string(key)) != weight_int8scale_table.end())
{
weight_int8scale = weight_int8scale_table[std::string(key)];
@@ -800,7 +803,7 @@ int NetQuantize::save(const char* parampath, const char* binpath)
// write int8_scale data
fwrite(weight_int8scale.data(), sizeof(float), weight_int8scale.size(), bp);
fwrite(blob_int8scale.data(), sizeof(float), blob_int8scale.size(), bp);
}
}
}
else if (layer->type == "Input")
{
@@ -880,7 +883,7 @@ int NetQuantize::save(const char* parampath, const char* binpath)
fprintf_param_value(" 3=%d", scale_data_size)
fprintf_param_value(" 4=%d", across_channel)

fwrite_weight_data(op->scale_data, bp);
fwrite_weight_data(op->scale_data, bp);
}
else if (layer->type == "Padding")
{
@@ -1170,7 +1173,7 @@ int main(int argc, char** argv)

quantizer.load_param(inparam);
quantizer.load_model(inbin);
quantizer.quantize_convolution();
quantizer.quantize_convolutiondepthwise();
quantizer.quantize_innerproduct();


+ 944
- 912
tools/quantize/ncnn2table.cpp
File diff suppressed because it is too large
View File


Loading…
Cancel
Save