From 71bc617a05f073d8b623dcc95e88eb5e1eb16aa5 Mon Sep 17 00:00:00 2001 From: nihui Date: Sat, 29 May 2021 21:43:29 +0800 Subject: [PATCH] better ncnn2table multithreading, print parsed parameters, print progress --- tools/quantize/ncnn2table.cpp | 145 ++++++++++++++++++++++++++++++++-- 1 file changed, 137 insertions(+), 8 deletions(-) diff --git a/tools/quantize/ncnn2table.cpp b/tools/quantize/ncnn2table.cpp index 00d6897fd..77807fc2c 100644 --- a/tools/quantize/ncnn2table.cpp +++ b/tools/quantize/ncnn2table.cpp @@ -224,6 +224,9 @@ int QuantNet::quantize_KL() const int num_histogram_bins = 2048; + std::vector blob_allocators(quantize_num_threads); + std::vector workspace_allocators(quantize_num_threads); + // initialize conv weight scales #pragma omp parallel for num_threads(quantize_num_threads) for (int i = 0; i < conv_layer_count; i++) @@ -323,11 +326,20 @@ int QuantNet::quantize_KL() } // count the absmax - #pragma omp parallel for num_threads(quantize_num_threads) + #pragma omp parallel for num_threads(quantize_num_threads) schedule(static, 1) for (int i = 0; i < image_count; i++) { + if (i % 100 == 0) + { + fprintf(stderr, "count the absmax %.2f%% [ %d / %d ]\n", i * 100.f / image_count, i, image_count); + } + ncnn::Extractor ex = create_extractor(); + const int thread_num = ncnn::get_omp_thread_num(); + ex.set_blob_allocator(&blob_allocators[thread_num]); + ex.set_workspace_allocator(&workspace_allocators[thread_num]); + for (int j = 0; j < input_blob_count; j++) { const std::string& imagepath = listspaths[j][i]; @@ -393,11 +405,20 @@ int QuantNet::quantize_KL() } // build histogram - #pragma omp parallel for num_threads(quantize_num_threads) + #pragma omp parallel for num_threads(quantize_num_threads) schedule(static, 1) for (int i = 0; i < image_count; i++) { + if (i % 100 == 0) + { + fprintf(stderr, "build histogram %.2f%% [ %d / %d ]\n", i * 100.f / image_count, i, image_count); + } + ncnn::Extractor ex = create_extractor(); + const int thread_num = ncnn::get_omp_thread_num(); + ex.set_blob_allocator(&blob_allocators[thread_num]); + ex.set_workspace_allocator(&workspace_allocators[thread_num]); + for (int j = 0; j < input_blob_count; j++) { const std::string& imagepath = listspaths[j][i]; @@ -675,6 +696,9 @@ int QuantNet::quantize_ACIQ() const int conv_bottom_blob_count = (int)conv_bottom_blobs.size(); const int image_count = (int)listspaths[0].size(); + std::vector blob_allocators(quantize_num_threads); + std::vector workspace_allocators(quantize_num_threads); + // initialize conv weight scales #pragma omp parallel for num_threads(quantize_num_threads) for (int i = 0; i < conv_layer_count; i++) @@ -777,12 +801,21 @@ int QuantNet::quantize_ACIQ() } } - // count the absmax abssum - #pragma omp parallel for num_threads(quantize_num_threads) + // count the absmax + #pragma omp parallel for num_threads(quantize_num_threads) schedule(static, 1) for (int i = 0; i < image_count; i++) { + if (i % 100 == 0) + { + fprintf(stderr, "count the absmax %.2f%% [ %d / %d ]\n", i * 100.f / image_count, i, image_count); + } + ncnn::Extractor ex = create_extractor(); + const int thread_num = ncnn::get_omp_thread_num(); + ex.set_blob_allocator(&blob_allocators[thread_num]); + ex.set_workspace_allocator(&workspace_allocators[thread_num]); + for (int j = 0; j < input_blob_count; j++) { const std::string& imagepath = listspaths[j][i]; @@ -991,6 +1024,9 @@ int QuantNet::quantize_EQ() const int conv_layer_count = (int)conv_layers.size(); const int conv_bottom_blob_count = (int)conv_bottom_blobs.size(); + std::vector blob_allocators(quantize_num_threads); + std::vector workspace_allocators(quantize_num_threads); + // max 50 images for EQ const int image_count = std::min((int)listspaths[0].size(), 50); @@ -1015,11 +1051,20 @@ int QuantNet::quantize_EQ() std::vector avgsims(search_steps, 0.0); - #pragma omp parallel for num_threads(quantize_num_threads) + #pragma omp parallel for num_threads(quantize_num_threads) schedule(static, 1) for (int ii = 0; ii < image_count; ii++) { + if (ii % 100 == 0) + { + fprintf(stderr, "search weight scale %.2f%% [ %d / %d ] for %d / %d of %d / %d\n", ii * 100.f / image_count, ii, image_count, j, weight_scale.w, i, conv_layer_count); + } + ncnn::Extractor ex = create_extractor(); + const int thread_num = ncnn::get_omp_thread_num(); + ex.set_blob_allocator(&blob_allocators[thread_num]); + ex.set_workspace_allocator(&workspace_allocators[thread_num]); + for (int jj = 0; jj < input_blob_count; jj++) { const std::string& imagepath = listspaths[jj][ii]; @@ -1121,11 +1166,20 @@ int QuantNet::quantize_EQ() std::vector avgsims(search_steps, 0.0); - #pragma omp parallel for num_threads(quantize_num_threads) + #pragma omp parallel for num_threads(quantize_num_threads) schedule(static, 1) for (int ii = 0; ii < image_count; ii++) { + if (ii % 100 == 0) + { + fprintf(stderr, "search bottom blob scale %.2f%% [ %d / %d ] for %d / %d of %d / %d\n", ii * 100.f / image_count, ii, image_count, j, bottom_blob_scale.w, i, conv_layer_count); + } + ncnn::Extractor ex = create_extractor(); + const int thread_num = ncnn::get_omp_thread_num(); + ex.set_blob_allocator(&blob_allocators[thread_num]); + ex.set_workspace_allocator(&workspace_allocators[thread_num]); + for (int jj = 0; jj < input_blob_count; jj++) { const std::string& imagepath = listspaths[jj][ii]; @@ -1454,6 +1508,64 @@ static std::vector parse_comma_pixel_type_list(char* s) return aps; } +static void print_float_array_list(const std::vector >& list) +{ + for (size_t i = 0; i < list.size(); i++) + { + const std::vector& array = list[i]; + fprintf(stderr, "["); + for (size_t j = 0; j < array.size(); j++) + { + fprintf(stderr, "%f", array[j]); + if (j != array.size() - 1) + fprintf(stderr, ","); + } + fprintf(stderr, "]"); + if (i != list.size() - 1) + fprintf(stderr, ","); + } +} + +static void print_int_array_list(const std::vector >& list) +{ + for (size_t i = 0; i < list.size(); i++) + { + const std::vector& array = list[i]; + fprintf(stderr, "["); + for (size_t j = 0; j < array.size(); j++) + { + fprintf(stderr, "%d", array[j]); + if (j != array.size() - 1) + fprintf(stderr, ","); + } + fprintf(stderr, "]"); + if (i != list.size() - 1) + fprintf(stderr, ","); + } +} + +static void print_pixel_type_list(const std::vector& list) +{ + for (size_t i = 0; i < list.size(); i++) + { + const int type = list[i]; + if (type == -233) + fprintf(stderr, "RAW"); + if (type == ncnn::Mat::PIXEL_RGB) + fprintf(stderr, "RGB"); + if (type == ncnn::Mat::PIXEL_BGR) + fprintf(stderr, "BGR"); + if (type == ncnn::Mat::PIXEL_GRAY) + fprintf(stderr, "GRAY"); + if (type == ncnn::Mat::PIXEL_RGBA) + fprintf(stderr, "RGBA"); + if (type == ncnn::Mat::PIXEL_BGRA) + fprintf(stderr, "BGRA"); + if (i != list.size() - 1) + fprintf(stderr, ","); + } +} + static void show_usage() { fprintf(stderr, "Usage: ncnn2table [ncnnparam] [ncnnbin] [list,...] [ncnntable] [(key=value)...]\n"); @@ -1523,8 +1635,6 @@ int main(int argc, char** argv) const char* key = kv; char* value = eqs + 1; - fprintf(stderr, "%s = %s\n", key, value); - // load mean norm shape if (memcmp(key, "mean", 4) == 0) net.means = parse_comma_float_array_list(value); @@ -1573,6 +1683,25 @@ int main(int argc, char** argv) return -1; } + // print quantnet config + { + fprintf(stderr, "mean = "); + print_float_array_list(net.means); + fprintf(stderr, "\n"); + fprintf(stderr, "norm = "); + print_float_array_list(net.norms); + fprintf(stderr, "\n"); + fprintf(stderr, "shape = "); + print_int_array_list(net.shapes); + fprintf(stderr, "\n"); + fprintf(stderr, "pixel = "); + print_pixel_type_list(net.type_to_pixels); + fprintf(stderr, "\n"); + fprintf(stderr, "thread = %d\n", net.quantize_num_threads); + fprintf(stderr, "method = %s\n", method.c_str()); + fprintf(stderr, "---------------------------------------\n"); + } + if (method == "kl") { net.quantize_KL();