From 60d4ff09d00632ef15b54f1f6cef7bfd54e78e59 Mon Sep 17 00:00:00 2001 From: yehao <527282401@qq.com> Date: Sun, 27 Oct 2019 21:51:38 +0800 Subject: [PATCH] add mobilenetv3-ssd (#1335) * update * Update mobilenetv3ssdlite.cpp * Update mobilenetv3ssdlite.cpp * Update mobilenetv3ssdlite.cpp --- examples/mobilenetv3ssdlite.cpp | 178 ++++++++++++++++++++++++++++++++ src/layer/priorbox.cpp | 11 ++ src/layer/priorbox.h | 2 + 3 files changed, 191 insertions(+) create mode 100644 examples/mobilenetv3ssdlite.cpp diff --git a/examples/mobilenetv3ssdlite.cpp b/examples/mobilenetv3ssdlite.cpp new file mode 100644 index 000000000..5a9e3c173 --- /dev/null +++ b/examples/mobilenetv3ssdlite.cpp @@ -0,0 +1,178 @@ +// Tencent is pleased to support the open source community by making ncnn available. +// +// Copyright (C) 2018 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#include +#include +#include +#include +#include + +#include "platform.h" +#include "net.h" +#if NCNN_VULKAN +#include "gpu.h" +#endif // NCNN_VULKAN + +template +const T& clamp(const T&v, const T& lo, const T& hi) +{ + assert(!(hi < lo)); + return v < lo ? lo : hi < v ? hi : v; +} + +struct Object +{ + cv::Rect_ rect; + int label; + float prob; +}; + +static int detect_mobilenetv3(const cv::Mat& bgr, std::vector& objects) +{ + ncnn::Net mobilenetv3; + +#if NCNN_VULKAN + mobilenetv3.opt.use_vulkan_compute = true; +#endif // NCNN_VULKAN + + // converted ncnn model from https://github.com/ujsyehao/mobilenetv3-ssd + mobilenetv3.load_param("./mobilenetv3_ssdlite_voc.param"); + mobilenetv3.load_model("./mobilenetv3_ssdlite_voc.bin"); + + const int target_size = 300; + + int img_w = bgr.cols; + int img_h = bgr.rows; + + ncnn::Mat in = ncnn::Mat::from_pixels_resize(bgr.data, ncnn::Mat::PIXEL_BGR2RGB, bgr.cols, bgr.rows, target_size, target_size); + + const float mean_vals[3] = {123.675f, 116.28f, 103.53f}; + const float norm_vals[3] = {1.0f, 1.0f, 1.0f}; + in.substract_mean_normalize(mean_vals, norm_vals); + + ncnn::Extractor ex = mobilenetv3.create_extractor(); + ex.set_light_mode(true); + ex.set_num_threads(4); + + ex.input("input", in); + + ncnn::Mat out; + ex.extract("detection_out",out); + +// printf("%d %d %d\n", out.w, out.h, out.c); + objects.clear(); + for (int i=0; i& objects) +{ + static const char* class_names[] = {"background", + "aeroplane", "bicycle", "bird", "boat", + "bottle", "bus", "car", "cat", "chair", + "cow", "diningtable", "dog", "horse", + "motorbike", "person", "pottedplant", + "sheep", "sofa", "train", "tvmonitor"}; + + cv::Mat image = bgr.clone(); + + for (size_t i = 0; i < objects.size(); i++) + { + if (objects[i].prob > 0.6) + { + const Object& obj = objects[i]; + + fprintf(stderr, "%d = %.5f at %.2f %.2f %.2f x %.2f\n", obj.label, obj.prob, + obj.rect.x, obj.rect.y, obj.rect.width, obj.rect.height); + + cv::rectangle(image, obj.rect, cv::Scalar(255, 0, 0)); + + char text[256]; + sprintf(text, "%s %.1f%%", class_names[obj.label], obj.prob * 100); + + int baseLine = 0; + cv::Size label_size = cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine); + + int x = obj.rect.x; + int y = obj.rect.y - label_size.height - baseLine; + if (y < 0) + y = 0; + if (x + label_size.width > image.cols) + x = image.cols - label_size.width; + + cv::rectangle(image, cv::Rect(cv::Point(x, y), cv::Size(label_size.width, label_size.height + baseLine)), + cv::Scalar(255, 255, 255), -1); + + cv::putText(image, text, cv::Point(x, y + label_size.height), + cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(0, 0, 0)); + } + } + + cv::imshow("image", image); + cv::waitKey(0); +} + +int main(int argc, char** argv) +{ + if (argc != 2) + { + fprintf(stderr, "Usage: %s [imagepath]\n", argv[0]); + return -1; + } + + const char* imagepath = argv[1]; + + cv::Mat m = cv::imread(imagepath, 1); + if (m.empty()) + { + fprintf(stderr, "cv::imread %s failed\n", imagepath); + return -1; + } + +#if NCNN_VULKAN + ncnn::create_gpu_instance(); +#endif // NCNN_VULKAN + + std::vector objects; + detect_mobilenetv3(m, objects); + +#if NCNN_VULKAN + ncnn::destroy_gpu_instance(); +#endif // NCNN_VULKAN + + draw_objects(m, objects); + + return 0; +} diff --git a/src/layer/priorbox.cpp b/src/layer/priorbox.cpp index a104186ea..858785cc5 100644 --- a/src/layer/priorbox.cpp +++ b/src/layer/priorbox.cpp @@ -42,6 +42,8 @@ int PriorBox::load_param(const ParamDict& pd) step_width = pd.get(11, -233.f); step_height = pd.get(12, -233.f); offset = pd.get(13, 0.f); + step_mmdetection = pd.get(14, 0); + center_mmdetection = pd.get(15, 0); return 0; } @@ -137,8 +139,12 @@ int PriorBox::forward(const std::vector& bottom_blobs, std::vector& to float step_h = step_height; if (step_w == -233) step_w = (float)image_w / w; + if (step_mmdetection) + step_w = ceil((float)image_w / w); if (step_h == -233) step_h = (float)image_h / h; + if (step_mmdetection) + step_h = ceil((float)image_h / h); int num_min_size = min_sizes.w; int num_max_size = max_sizes.w; @@ -160,6 +166,11 @@ int PriorBox::forward(const std::vector& bottom_blobs, std::vector& to float center_x = offset * step_w; float center_y = offset * step_h + i * step_h; + if (center_mmdetection) + { + center_x = offset * (step_w - 1); + center_y = offset * (step_h - 1) + i * step_h; + } for (int j = 0; j < w; j++) { diff --git a/src/layer/priorbox.h b/src/layer/priorbox.h index 249a65f25..5e734e1b4 100644 --- a/src/layer/priorbox.h +++ b/src/layer/priorbox.h @@ -40,6 +40,8 @@ public: float step_width; float step_height; float offset; + bool step_mmdetection; + bool center_mmdetection; }; } // namespace ncnn