better ncnn2table multithreading, print parsed parameters, print progress

5 years ago · 71bc617a05
--- a/tools/quantize/ncnn2table.cpp
+++ b/tools/quantize/ncnn2table.cpp
@@ -224,6 +224,9 @@ int QuantNet::quantize_KL()

    const int num_histogram_bins = 2048;

    std::vector<ncnn::UnlockedPoolAllocator> blob_allocators(quantize_num_threads);
    std::vector<ncnn::UnlockedPoolAllocator> workspace_allocators(quantize_num_threads);

    // initialize conv weight scales
    #pragma omp parallel for num_threads(quantize_num_threads)
    for (int i = 0; i < conv_layer_count; i++)
@@ -323,11 +326,20 @@ int QuantNet::quantize_KL()
    }

    // count the absmax
    #pragma omp parallel for num_threads(quantize_num_threads)
    #pragma omp parallel for num_threads(quantize_num_threads) schedule(static, 1)
    for (int i = 0; i < image_count; i++)
    {
        if (i % 100 == 0)
        {
            fprintf(stderr, "count the absmax %.2f%% [ %d / %d ]\n", i * 100.f / image_count, i, image_count);
        }

        ncnn::Extractor ex = create_extractor();

        const int thread_num = ncnn::get_omp_thread_num();
        ex.set_blob_allocator(&blob_allocators[thread_num]);
        ex.set_workspace_allocator(&workspace_allocators[thread_num]);

        for (int j = 0; j < input_blob_count; j++)
        {
            const std::string& imagepath = listspaths[j][i];
@@ -393,11 +405,20 @@ int QuantNet::quantize_KL()
    }

    // build histogram
    #pragma omp parallel for num_threads(quantize_num_threads)
    #pragma omp parallel for num_threads(quantize_num_threads) schedule(static, 1)
    for (int i = 0; i < image_count; i++)
    {
        if (i % 100 == 0)
        {
            fprintf(stderr, "build histogram %.2f%% [ %d / %d ]\n", i * 100.f / image_count, i, image_count);
        }

        ncnn::Extractor ex = create_extractor();

        const int thread_num = ncnn::get_omp_thread_num();
        ex.set_blob_allocator(&blob_allocators[thread_num]);
        ex.set_workspace_allocator(&workspace_allocators[thread_num]);

        for (int j = 0; j < input_blob_count; j++)
        {
            const std::string& imagepath = listspaths[j][i];
@@ -675,6 +696,9 @@ int QuantNet::quantize_ACIQ()
    const int conv_bottom_blob_count = (int)conv_bottom_blobs.size();
    const int image_count = (int)listspaths[0].size();

    std::vector<ncnn::UnlockedPoolAllocator> blob_allocators(quantize_num_threads);
    std::vector<ncnn::UnlockedPoolAllocator> workspace_allocators(quantize_num_threads);

    // initialize conv weight scales
    #pragma omp parallel for num_threads(quantize_num_threads)
    for (int i = 0; i < conv_layer_count; i++)
@@ -777,12 +801,21 @@ int QuantNet::quantize_ACIQ()
        }
    }

    // count the absmax abssum
    #pragma omp parallel for num_threads(quantize_num_threads)
    // count the absmax
    #pragma omp parallel for num_threads(quantize_num_threads) schedule(static, 1)
    for (int i = 0; i < image_count; i++)
    {
        if (i % 100 == 0)
        {
            fprintf(stderr, "count the absmax %.2f%% [ %d / %d ]\n", i * 100.f / image_count, i, image_count);
        }

        ncnn::Extractor ex = create_extractor();

        const int thread_num = ncnn::get_omp_thread_num();
        ex.set_blob_allocator(&blob_allocators[thread_num]);
        ex.set_workspace_allocator(&workspace_allocators[thread_num]);

        for (int j = 0; j < input_blob_count; j++)
        {
            const std::string& imagepath = listspaths[j][i];
@@ -991,6 +1024,9 @@ int QuantNet::quantize_EQ()
    const int conv_layer_count = (int)conv_layers.size();
    const int conv_bottom_blob_count = (int)conv_bottom_blobs.size();

    std::vector<ncnn::UnlockedPoolAllocator> blob_allocators(quantize_num_threads);
    std::vector<ncnn::UnlockedPoolAllocator> workspace_allocators(quantize_num_threads);

    // max 50 images for EQ
    const int image_count = std::min((int)listspaths[0].size(), 50);

@@ -1015,11 +1051,20 @@ int QuantNet::quantize_EQ()

            std::vector<double> avgsims(search_steps, 0.0);

            #pragma omp parallel for num_threads(quantize_num_threads)
            #pragma omp parallel for num_threads(quantize_num_threads) schedule(static, 1)
            for (int ii = 0; ii < image_count; ii++)
            {
                if (ii % 100 == 0)
                {
                    fprintf(stderr, "search weight scale %.2f%% [ %d / %d ] for %d / %d of %d / %d\n", ii * 100.f / image_count, ii, image_count, j, weight_scale.w, i, conv_layer_count);
                }

                ncnn::Extractor ex = create_extractor();

                const int thread_num = ncnn::get_omp_thread_num();
                ex.set_blob_allocator(&blob_allocators[thread_num]);
                ex.set_workspace_allocator(&workspace_allocators[thread_num]);

                for (int jj = 0; jj < input_blob_count; jj++)
                {
                    const std::string& imagepath = listspaths[jj][ii];
@@ -1121,11 +1166,20 @@ int QuantNet::quantize_EQ()

            std::vector<double> avgsims(search_steps, 0.0);

            #pragma omp parallel for num_threads(quantize_num_threads)
            #pragma omp parallel for num_threads(quantize_num_threads) schedule(static, 1)
            for (int ii = 0; ii < image_count; ii++)
            {
                if (ii % 100 == 0)
                {
                    fprintf(stderr, "search bottom blob scale %.2f%% [ %d / %d ] for %d / %d of %d / %d\n", ii * 100.f / image_count, ii, image_count, j, bottom_blob_scale.w, i, conv_layer_count);
                }

                ncnn::Extractor ex = create_extractor();

                const int thread_num = ncnn::get_omp_thread_num();
                ex.set_blob_allocator(&blob_allocators[thread_num]);
                ex.set_workspace_allocator(&workspace_allocators[thread_num]);

                for (int jj = 0; jj < input_blob_count; jj++)
                {
                    const std::string& imagepath = listspaths[jj][ii];
@@ -1454,6 +1508,64 @@ static std::vector<int> parse_comma_pixel_type_list(char* s)
    return aps;
 }

 static void print_float_array_list(const std::vector<std::vector<float> >& list)
 {
    for (size_t i = 0; i < list.size(); i++)
    {
        const std::vector<float>& array = list[i];
        fprintf(stderr, "[");
        for (size_t j = 0; j < array.size(); j++)
        {
            fprintf(stderr, "%f", array[j]);
            if (j != array.size() - 1)
                fprintf(stderr, ",");
        }
        fprintf(stderr, "]");
        if (i != list.size() - 1)
            fprintf(stderr, ",");
    }
 }

 static void print_int_array_list(const std::vector<std::vector<int> >& list)
 {
    for (size_t i = 0; i < list.size(); i++)
    {
        const std::vector<int>& array = list[i];
        fprintf(stderr, "[");
        for (size_t j = 0; j < array.size(); j++)
        {
            fprintf(stderr, "%d", array[j]);
            if (j != array.size() - 1)
                fprintf(stderr, ",");
        }
        fprintf(stderr, "]");
        if (i != list.size() - 1)
            fprintf(stderr, ",");
    }
 }

 static void print_pixel_type_list(const std::vector<int>& list)
 {
    for (size_t i = 0; i < list.size(); i++)
    {
        const int type = list[i];
        if (type == -233)
            fprintf(stderr, "RAW");
        if (type == ncnn::Mat::PIXEL_RGB)
            fprintf(stderr, "RGB");
        if (type == ncnn::Mat::PIXEL_BGR)
            fprintf(stderr, "BGR");
        if (type == ncnn::Mat::PIXEL_GRAY)
            fprintf(stderr, "GRAY");
        if (type == ncnn::Mat::PIXEL_RGBA)
            fprintf(stderr, "RGBA");
        if (type == ncnn::Mat::PIXEL_BGRA)
            fprintf(stderr, "BGRA");
        if (i != list.size() - 1)
            fprintf(stderr, ",");
    }
 }

 static void show_usage()
 {
    fprintf(stderr, "Usage: ncnn2table [ncnnparam] [ncnnbin] [list,...] [ncnntable] [(key=value)...]\n");
@@ -1523,8 +1635,6 @@ int main(int argc, char** argv)
        const char* key = kv;
        char* value = eqs + 1;

        fprintf(stderr, "%s = %s\n", key, value);

        // load mean norm shape
        if (memcmp(key, "mean", 4) == 0)
            net.means = parse_comma_float_array_list(value);
@@ -1573,6 +1683,25 @@ int main(int argc, char** argv)
        return -1;
    }

    // print quantnet config
    {
        fprintf(stderr, "mean = ");
        print_float_array_list(net.means);
        fprintf(stderr, "\n");
        fprintf(stderr, "norm = ");
        print_float_array_list(net.norms);
        fprintf(stderr, "\n");
        fprintf(stderr, "shape = ");
        print_int_array_list(net.shapes);
        fprintf(stderr, "\n");
        fprintf(stderr, "pixel = ");
        print_pixel_type_list(net.type_to_pixels);
        fprintf(stderr, "\n");
        fprintf(stderr, "thread = %d\n", net.quantize_num_threads);
        fprintf(stderr, "method = %s\n", method.c_str());
        fprintf(stderr, "---------------------------------------\n");
    }

    if (method == "kl")
    {
        net.quantize_KL();