From e887f04d031c56d87bc37774d3aff76eeeacc5f3 Mon Sep 17 00:00:00 2001
From: Feng Wang <wangfeng19950315@163.com>
Date: Fri, 29 Oct 2021 21:19:02 +0800
Subject: [PATCH] update(example): preprocess logic of YOLOX (#3315)

---
 examples/yolox.cpp | 13 ++++---------
 1 file changed, 4 insertions(+), 9 deletions(-)
diff --git a/examples/yolox.cpp b/examples/yolox.cpp
index 0d3dd4c75..d4707cf94 100644
--- a/examples/yolox.cpp
+++ b/examples/yolox.cpp
@@ -257,7 +257,9 @@ static int detect_yolox(const cv::Mat& bgr, std::vector<Object>& objects)
     yolox.register_custom_layer("YoloV5Focus", YoloV5Focus_layer_creator);
 
     // original pretrained model from https://github.com/Megvii-BaseDetection/YOLOX
-    // ncnn model param: https://github.com/Megvii-BaseDetection/storage/releases/download/0.0.1/yolox_s_ncnn.tar.gz
+    // ncnn model param: https://github.com/Megvii-BaseDetection/YOLOX/releases/download/0.1.1rc0/yolox_s_ncnn.tar.gz
+    // NOTE that newest version YOLOX remove normalization of model (minus mean and then div by std),
+    // which might cause your model outputs becoming a total mess, plz check carefully.
     yolox.load_param("yolox.param");
     yolox.load_model("yolox.bin");
 
@@ -279,7 +281,7 @@ static int detect_yolox(const cv::Mat& bgr, std::vector<Object>& objects)
         h = YOLOX_TARGET_SIZE;
         w = w * scale;
     }
-    ncnn::Mat in = ncnn::Mat::from_pixels_resize(bgr.data, ncnn::Mat::PIXEL_BGR2RGB, img_w, img_h, w, h);
+    ncnn::Mat in = ncnn::Mat::from_pixels_resize(bgr.data, ncnn::Mat::PIXEL_BGR, img_w, img_h, w, h);
 
     // pad to YOLOX_TARGET_SIZE rectangle
     int wpad = YOLOX_TARGET_SIZE - w;
@@ -289,13 +291,6 @@ static int detect_yolox(const cv::Mat& bgr, std::vector<Object>& objects)
     // which means users don't need to extra padding info to decode boxes coordinate.
     ncnn::copy_make_border(in, in_pad, 0, hpad, 0, wpad, ncnn::BORDER_CONSTANT, 114.f);
 
-    // python 0-1 input tensor with rgb_means = (0.485, 0.456, 0.406), std = (0.229, 0.224, 0.225)
-    // so for 0-255 input image, rgb_mean should multiply 255 and norm should div by std.
-    const float mean_vals[3] = {255.f * 0.485f, 255.f * 0.456, 255.f * 0.406f};
-    const float norm_vals[3] = {1 / (255.f * 0.229f), 1 / (255.f * 0.224f), 1 / (255.f * 0.225f)};
-
-    in_pad.substract_mean_normalize(mean_vals, norm_vals);
-
     ncnn::Extractor ex = yolox.create_extractor();
 
     ex.input("images", in_pad);