Browse Source

better ncnn2table multithreading, print parsed parameters, print progress

tags/20210720
nihui 5 years ago
parent
commit
71bc617a05
1 changed files with 137 additions and 8 deletions
  1. +137
    -8
      tools/quantize/ncnn2table.cpp

+ 137
- 8
tools/quantize/ncnn2table.cpp View File

@@ -224,6 +224,9 @@ int QuantNet::quantize_KL()

const int num_histogram_bins = 2048;

std::vector<ncnn::UnlockedPoolAllocator> blob_allocators(quantize_num_threads);
std::vector<ncnn::UnlockedPoolAllocator> workspace_allocators(quantize_num_threads);

// initialize conv weight scales
#pragma omp parallel for num_threads(quantize_num_threads)
for (int i = 0; i < conv_layer_count; i++)
@@ -323,11 +326,20 @@ int QuantNet::quantize_KL()
}

// count the absmax
#pragma omp parallel for num_threads(quantize_num_threads)
#pragma omp parallel for num_threads(quantize_num_threads) schedule(static, 1)
for (int i = 0; i < image_count; i++)
{
if (i % 100 == 0)
{
fprintf(stderr, "count the absmax %.2f%% [ %d / %d ]\n", i * 100.f / image_count, i, image_count);
}

ncnn::Extractor ex = create_extractor();

const int thread_num = ncnn::get_omp_thread_num();
ex.set_blob_allocator(&blob_allocators[thread_num]);
ex.set_workspace_allocator(&workspace_allocators[thread_num]);

for (int j = 0; j < input_blob_count; j++)
{
const std::string& imagepath = listspaths[j][i];
@@ -393,11 +405,20 @@ int QuantNet::quantize_KL()
}

// build histogram
#pragma omp parallel for num_threads(quantize_num_threads)
#pragma omp parallel for num_threads(quantize_num_threads) schedule(static, 1)
for (int i = 0; i < image_count; i++)
{
if (i % 100 == 0)
{
fprintf(stderr, "build histogram %.2f%% [ %d / %d ]\n", i * 100.f / image_count, i, image_count);
}

ncnn::Extractor ex = create_extractor();

const int thread_num = ncnn::get_omp_thread_num();
ex.set_blob_allocator(&blob_allocators[thread_num]);
ex.set_workspace_allocator(&workspace_allocators[thread_num]);

for (int j = 0; j < input_blob_count; j++)
{
const std::string& imagepath = listspaths[j][i];
@@ -675,6 +696,9 @@ int QuantNet::quantize_ACIQ()
const int conv_bottom_blob_count = (int)conv_bottom_blobs.size();
const int image_count = (int)listspaths[0].size();

std::vector<ncnn::UnlockedPoolAllocator> blob_allocators(quantize_num_threads);
std::vector<ncnn::UnlockedPoolAllocator> workspace_allocators(quantize_num_threads);

// initialize conv weight scales
#pragma omp parallel for num_threads(quantize_num_threads)
for (int i = 0; i < conv_layer_count; i++)
@@ -777,12 +801,21 @@ int QuantNet::quantize_ACIQ()
}
}

// count the absmax abssum
#pragma omp parallel for num_threads(quantize_num_threads)
// count the absmax
#pragma omp parallel for num_threads(quantize_num_threads) schedule(static, 1)
for (int i = 0; i < image_count; i++)
{
if (i % 100 == 0)
{
fprintf(stderr, "count the absmax %.2f%% [ %d / %d ]\n", i * 100.f / image_count, i, image_count);
}

ncnn::Extractor ex = create_extractor();

const int thread_num = ncnn::get_omp_thread_num();
ex.set_blob_allocator(&blob_allocators[thread_num]);
ex.set_workspace_allocator(&workspace_allocators[thread_num]);

for (int j = 0; j < input_blob_count; j++)
{
const std::string& imagepath = listspaths[j][i];
@@ -991,6 +1024,9 @@ int QuantNet::quantize_EQ()
const int conv_layer_count = (int)conv_layers.size();
const int conv_bottom_blob_count = (int)conv_bottom_blobs.size();

std::vector<ncnn::UnlockedPoolAllocator> blob_allocators(quantize_num_threads);
std::vector<ncnn::UnlockedPoolAllocator> workspace_allocators(quantize_num_threads);

// max 50 images for EQ
const int image_count = std::min((int)listspaths[0].size(), 50);

@@ -1015,11 +1051,20 @@ int QuantNet::quantize_EQ()

std::vector<double> avgsims(search_steps, 0.0);

#pragma omp parallel for num_threads(quantize_num_threads)
#pragma omp parallel for num_threads(quantize_num_threads) schedule(static, 1)
for (int ii = 0; ii < image_count; ii++)
{
if (ii % 100 == 0)
{
fprintf(stderr, "search weight scale %.2f%% [ %d / %d ] for %d / %d of %d / %d\n", ii * 100.f / image_count, ii, image_count, j, weight_scale.w, i, conv_layer_count);
}

ncnn::Extractor ex = create_extractor();

const int thread_num = ncnn::get_omp_thread_num();
ex.set_blob_allocator(&blob_allocators[thread_num]);
ex.set_workspace_allocator(&workspace_allocators[thread_num]);

for (int jj = 0; jj < input_blob_count; jj++)
{
const std::string& imagepath = listspaths[jj][ii];
@@ -1121,11 +1166,20 @@ int QuantNet::quantize_EQ()

std::vector<double> avgsims(search_steps, 0.0);

#pragma omp parallel for num_threads(quantize_num_threads)
#pragma omp parallel for num_threads(quantize_num_threads) schedule(static, 1)
for (int ii = 0; ii < image_count; ii++)
{
if (ii % 100 == 0)
{
fprintf(stderr, "search bottom blob scale %.2f%% [ %d / %d ] for %d / %d of %d / %d\n", ii * 100.f / image_count, ii, image_count, j, bottom_blob_scale.w, i, conv_layer_count);
}

ncnn::Extractor ex = create_extractor();

const int thread_num = ncnn::get_omp_thread_num();
ex.set_blob_allocator(&blob_allocators[thread_num]);
ex.set_workspace_allocator(&workspace_allocators[thread_num]);

for (int jj = 0; jj < input_blob_count; jj++)
{
const std::string& imagepath = listspaths[jj][ii];
@@ -1454,6 +1508,64 @@ static std::vector<int> parse_comma_pixel_type_list(char* s)
return aps;
}

static void print_float_array_list(const std::vector<std::vector<float> >& list)
{
for (size_t i = 0; i < list.size(); i++)
{
const std::vector<float>& array = list[i];
fprintf(stderr, "[");
for (size_t j = 0; j < array.size(); j++)
{
fprintf(stderr, "%f", array[j]);
if (j != array.size() - 1)
fprintf(stderr, ",");
}
fprintf(stderr, "]");
if (i != list.size() - 1)
fprintf(stderr, ",");
}
}

static void print_int_array_list(const std::vector<std::vector<int> >& list)
{
for (size_t i = 0; i < list.size(); i++)
{
const std::vector<int>& array = list[i];
fprintf(stderr, "[");
for (size_t j = 0; j < array.size(); j++)
{
fprintf(stderr, "%d", array[j]);
if (j != array.size() - 1)
fprintf(stderr, ",");
}
fprintf(stderr, "]");
if (i != list.size() - 1)
fprintf(stderr, ",");
}
}

static void print_pixel_type_list(const std::vector<int>& list)
{
for (size_t i = 0; i < list.size(); i++)
{
const int type = list[i];
if (type == -233)
fprintf(stderr, "RAW");
if (type == ncnn::Mat::PIXEL_RGB)
fprintf(stderr, "RGB");
if (type == ncnn::Mat::PIXEL_BGR)
fprintf(stderr, "BGR");
if (type == ncnn::Mat::PIXEL_GRAY)
fprintf(stderr, "GRAY");
if (type == ncnn::Mat::PIXEL_RGBA)
fprintf(stderr, "RGBA");
if (type == ncnn::Mat::PIXEL_BGRA)
fprintf(stderr, "BGRA");
if (i != list.size() - 1)
fprintf(stderr, ",");
}
}

static void show_usage()
{
fprintf(stderr, "Usage: ncnn2table [ncnnparam] [ncnnbin] [list,...] [ncnntable] [(key=value)...]\n");
@@ -1523,8 +1635,6 @@ int main(int argc, char** argv)
const char* key = kv;
char* value = eqs + 1;

fprintf(stderr, "%s = %s\n", key, value);

// load mean norm shape
if (memcmp(key, "mean", 4) == 0)
net.means = parse_comma_float_array_list(value);
@@ -1573,6 +1683,25 @@ int main(int argc, char** argv)
return -1;
}

// print quantnet config
{
fprintf(stderr, "mean = ");
print_float_array_list(net.means);
fprintf(stderr, "\n");
fprintf(stderr, "norm = ");
print_float_array_list(net.norms);
fprintf(stderr, "\n");
fprintf(stderr, "shape = ");
print_int_array_list(net.shapes);
fprintf(stderr, "\n");
fprintf(stderr, "pixel = ");
print_pixel_type_list(net.type_to_pixels);
fprintf(stderr, "\n");
fprintf(stderr, "thread = %d\n", net.quantize_num_threads);
fprintf(stderr, "method = %s\n", method.c_str());
fprintf(stderr, "---------------------------------------\n");
}

if (method == "kl")
{
net.quantize_KL();


Loading…
Cancel
Save