From 8fad97cc68610bd0f404efafcf2180c1bc120dcc Mon Sep 17 00:00:00 2001
From: Jimmy <jinmingyi1998@sina.cn>
Date: Thu, 24 Jun 2021 19:48:47 +0800
Subject: [PATCH] [ncnn2table]do not always resize image  (#3023)

* it's possible not to resize image
---
 .../quantized-int8-inference.md               |  7 ++
 tools/quantize/ncnn2table.cpp                 | 78 ++++++++++---------
 2 files changed, 49 insertions(+), 36 deletions(-)
diff --git a/docs/how-to-use-and-FAQ/quantized-int8-inference.md b/docs/how-to-use-and-FAQ/quantized-int8-inference.md
index b0e2dc454..cf8e05c20 100644
--- a/docs/how-to-use-and-FAQ/quantized-int8-inference.md
+++ b/docs/how-to-use-and-FAQ/quantized-int8-inference.md
@@ -25,6 +25,13 @@ find images/ -type f > imagelist.txt
 
 * mean and norm are the values you passed to ```Mat::substract_mean_normalize()```
 * shape is the blob shape of your model, [w,h] or [w,h,c]
+
+>
+    * if w and h both are given, image will be resized to exactly size.
+    * if w and h both are zero or negative, image will not be resized.
+    * if only h is zero or negative, image's width will scaled resize to w, keeping aspect ratio.
+    * if only w is zero or negative, image's height will scaled resize to h
+
 * pixel is the pixel format of your model, image pixels will be converted to this type before ```Extractor::input()```
 * thread is the CPU thread count that could be used for parallel inference
 * method is the post training quantization algorithm, kl and aciq are currently supported
diff --git a/tools/quantize/ncnn2table.cpp b/tools/quantize/ncnn2table.cpp
index c249fb6f4..54ad546c7 100644
--- a/tools/quantize/ncnn2table.cpp
+++ b/tools/quantize/ncnn2table.cpp
@@ -202,6 +202,42 @@ void QuantNet::print_quant_info() const
     }
 }
 
+/**
+ * Read and resize image
+ * shape is input as [w,h,...]
+ * if w and h both are given, image will be resized to exactly size.
+ * if w and h both are zero or negative, image will not be resized.
+ * if only h is zero or negative, image's width will scaled resize to w, keeping aspect ratio.
+ * if only w is zero or negative, image's height will scaled resize to h
+ * @return ncnn::Mat
+ */
+
+inline ncnn::Mat read_and_resize_image(const std::vector<int>& shape, const std::string& imagepath, int pixel_convert_type)
+{
+    int target_w = shape[0];
+    int target_h = shape[1];
+    cv::Mat bgr = cv::imread(imagepath, 1);
+    if (target_h <= 0 && target_w <= 0)
+    {
+        return ncnn::Mat::from_pixels(bgr.data, pixel_convert_type, bgr.cols, bgr.rows);
+    }
+    if (target_h <= 0 || target_w <= 0)
+    {
+        float scale = 1.0;
+        if (target_h <= 0)
+        {
+            scale = 1.0 * bgr.cols / target_w;
+            target_h = int(1.0 * bgr.rows / scale);
+        }
+        if (target_w <= 0)
+        {
+            scale = 1.0 * bgr.rows / target_h;
+            target_w = int(1.0 * bgr.cols / scale);
+        }
+    }
+    return ncnn::Mat::from_pixels_resize(bgr.data, pixel_convert_type, bgr.cols, bgr.rows, target_w, target_h);
+}
+
 static float compute_kl_divergence(const std::vector<float>& a, const std::vector<float>& b)
 {
     const size_t length = a.size();
@@ -342,8 +378,6 @@ int QuantNet::quantize_KL()
 
         for (int j = 0; j < input_blob_count; j++)
         {
-            const std::string& imagepath = listspaths[j][i];
-            const std::vector<int>& shape = shapes[j];
             const int type_to_pixel = type_to_pixels[j];
             const std::vector<float>& mean_vals = means[j];
             const std::vector<float>& norm_vals = norms[j];
@@ -353,12 +387,8 @@ int QuantNet::quantize_KL()
             {
                 pixel_convert_type = pixel_convert_type | (type_to_pixel << ncnn::Mat::PIXEL_CONVERT_SHIFT);
             }
-            const int target_w = shape[0];
-            const int target_h = shape[1];
-
-            cv::Mat bgr = cv::imread(imagepath, 1);
 
-            ncnn::Mat in = ncnn::Mat::from_pixels_resize(bgr.data, pixel_convert_type, bgr.cols, bgr.rows, target_w, target_h);
+            ncnn::Mat in = read_and_resize_image(shapes[j], listspaths[j][i], pixel_convert_type);
 
             in.substract_mean_normalize(mean_vals.data(), norm_vals.data());
 
@@ -421,8 +451,6 @@ int QuantNet::quantize_KL()
 
         for (int j = 0; j < input_blob_count; j++)
         {
-            const std::string& imagepath = listspaths[j][i];
-            const std::vector<int>& shape = shapes[j];
             const int type_to_pixel = type_to_pixels[j];
             const std::vector<float>& mean_vals = means[j];
             const std::vector<float>& norm_vals = norms[j];
@@ -432,12 +460,8 @@ int QuantNet::quantize_KL()
             {
                 pixel_convert_type = pixel_convert_type | (type_to_pixel << ncnn::Mat::PIXEL_CONVERT_SHIFT);
             }
-            const int target_w = shape[0];
-            const int target_h = shape[1];
 
-            cv::Mat bgr = cv::imread(imagepath, 1);
-
-            ncnn::Mat in = ncnn::Mat::from_pixels_resize(bgr.data, pixel_convert_type, bgr.cols, bgr.rows, target_w, target_h);
+            ncnn::Mat in = read_and_resize_image(shapes[j], listspaths[j][i], pixel_convert_type);
 
             in.substract_mean_normalize(mean_vals.data(), norm_vals.data());
 
@@ -818,8 +842,6 @@ int QuantNet::quantize_ACIQ()
 
         for (int j = 0; j < input_blob_count; j++)
         {
-            const std::string& imagepath = listspaths[j][i];
-            const std::vector<int>& shape = shapes[j];
             const int type_to_pixel = type_to_pixels[j];
             const std::vector<float>& mean_vals = means[j];
             const std::vector<float>& norm_vals = norms[j];
@@ -829,12 +851,8 @@ int QuantNet::quantize_ACIQ()
             {
                 pixel_convert_type = pixel_convert_type | (type_to_pixel << ncnn::Mat::PIXEL_CONVERT_SHIFT);
             }
-            const int target_w = shape[0];
-            const int target_h = shape[1];
-
-            cv::Mat bgr = cv::imread(imagepath, 1);
 
-            ncnn::Mat in = ncnn::Mat::from_pixels_resize(bgr.data, pixel_convert_type, bgr.cols, bgr.rows, target_w, target_h);
+            ncnn::Mat in = read_and_resize_image(shapes[j], listspaths[j][i], pixel_convert_type);
 
             in.substract_mean_normalize(mean_vals.data(), norm_vals.data());
 
@@ -1067,8 +1085,6 @@ int QuantNet::quantize_EQ()
 
                 for (int jj = 0; jj < input_blob_count; jj++)
                 {
-                    const std::string& imagepath = listspaths[jj][ii];
-                    const std::vector<int>& shape = shapes[jj];
                     const int type_to_pixel = type_to_pixels[jj];
                     const std::vector<float>& mean_vals = means[jj];
                     const std::vector<float>& norm_vals = norms[jj];
@@ -1078,12 +1094,8 @@ int QuantNet::quantize_EQ()
                     {
                         pixel_convert_type = pixel_convert_type | (type_to_pixel << ncnn::Mat::PIXEL_CONVERT_SHIFT);
                     }
-                    const int target_w = shape[0];
-                    const int target_h = shape[1];
 
-                    cv::Mat bgr = cv::imread(imagepath, 1);
-
-                    ncnn::Mat in = ncnn::Mat::from_pixels_resize(bgr.data, pixel_convert_type, bgr.cols, bgr.rows, target_w, target_h);
+                    ncnn::Mat in = read_and_resize_image(shapes[j], listspaths[j][i], pixel_convert_type);
 
                     in.substract_mean_normalize(mean_vals.data(), norm_vals.data());
 
@@ -1182,8 +1194,6 @@ int QuantNet::quantize_EQ()
 
                 for (int jj = 0; jj < input_blob_count; jj++)
                 {
-                    const std::string& imagepath = listspaths[jj][ii];
-                    const std::vector<int>& shape = shapes[jj];
                     const int type_to_pixel = type_to_pixels[jj];
                     const std::vector<float>& mean_vals = means[jj];
                     const std::vector<float>& norm_vals = norms[jj];
@@ -1193,12 +1203,8 @@ int QuantNet::quantize_EQ()
                     {
                         pixel_convert_type = pixel_convert_type | (type_to_pixel << ncnn::Mat::PIXEL_CONVERT_SHIFT);
                     }
-                    const int target_w = shape[0];
-                    const int target_h = shape[1];
-
-                    cv::Mat bgr = cv::imread(imagepath, 1);
 
-                    ncnn::Mat in = ncnn::Mat::from_pixels_resize(bgr.data, pixel_convert_type, bgr.cols, bgr.rows, target_w, target_h);
+                    ncnn::Mat in = read_and_resize_image(shapes[j], listspaths[j][i], pixel_convert_type);
 
                     in.substract_mean_normalize(mean_vals.data(), norm_vals.data());
 
@@ -1571,7 +1577,7 @@ static void show_usage()
     fprintf(stderr, "Usage: ncnn2table [ncnnparam] [ncnnbin] [list,...] [ncnntable] [(key=value)...]\n");
     fprintf(stderr, "  mean=[104.0,117.0,123.0],...\n");
     fprintf(stderr, "  norm=[1.0,1.0,1.0],...\n");
-    fprintf(stderr, "  shape=[224,224,3],...[w,h,c] or [w,h]\n");
+    fprintf(stderr, "  shape=[224,224,3],...[w,h,c] or [w,h] **[0,0] will not resize\n");
     fprintf(stderr, "  pixel=RAW/RGB/BGR/GRAY/RGBA/BGRA,...\n");
     fprintf(stderr, "  thread=8\n");
     fprintf(stderr, "  method=kl/aciq/eq\n");