From 4e4f0baa73e3700807ede6bb7e171af513a620ad Mon Sep 17 00:00:00 2001
From: nihuini <nihuini@tencent.com>
Date: Thu, 23 Jul 2020 14:20:54 +0800
Subject: [PATCH] set openmp blocktime 20 for reducing power consumption,
 blocktime option

---
 src/cpu.cpp    | 20 +++++++++++++++++++-
 src/cpu.h      |  3 +++
 src/net.cpp    | 11 +++++++++++
 src/option.cpp |  2 ++
 src/option.h   |  5 +++++
 5 files changed, 40 insertions(+), 1 deletion(-)

diff --git a/src/cpu.cpp b/src/cpu.cpp
index a0f522de1..f4e84eba4 100644
--- a/src/cpu.cpp
+++ b/src/cpu.cpp
@@ -602,11 +602,29 @@ void set_omp_dynamic(int dynamic)
 
 int get_omp_thread_num()
 {
-#if _OPENMP
+#ifdef _OPENMP
     return omp_get_thread_num();
 #else
     return 0;
 #endif
 }
 
+int get_kmp_blocktime()
+{
+#if defined(_OPENMP) && __clang__
+    return kmp_get_blocktime();
+#else
+    return 0;
+#endif
+}
+
+void set_kmp_blocktime(int time_ms)
+{
+#if defined(_OPENMP) && __clang__
+    kmp_set_blocktime(time_ms);
+#else
+    (void)time_ms;
+#endif
+}
+
 } // namespace ncnn
diff --git a/src/cpu.h b/src/cpu.h
index 5e10c4e47..8c8c977b0 100644
--- a/src/cpu.h
+++ b/src/cpu.h
@@ -59,6 +59,9 @@ void set_omp_dynamic(int dynamic);
 
 int get_omp_thread_num();
 
+int get_kmp_blocktime();
+void set_kmp_blocktime(int time_ms);
+
 } // namespace ncnn
 
 #endif // NCNN_CPU_H
diff --git a/src/net.cpp b/src/net.cpp
index a664549c8..efaae8d63 100644
--- a/src/net.cpp
+++ b/src/net.cpp
@@ -16,6 +16,7 @@
 
 #include "convolution.h"
 #include "convolutiondepthwise.h"
+#include "cpu.h"
 #include "datareader.h"
 #include "layer_type.h"
 #include "modelbin.h"
@@ -2556,6 +2557,9 @@ int Extractor::extract(int blob_index, Mat& feat)
     if (blob_index < 0 || blob_index >= (int)blob_mats.size())
         return -1;
 
+    int old_blocktime = get_kmp_blocktime();
+    set_kmp_blocktime(opt.openmp_blocktime);
+
     int ret = 0;
 
     if (blob_mats[blob_index].dims == 0)
@@ -2660,6 +2664,8 @@ int Extractor::extract(int blob_index, Mat& feat)
         feat = bottom_blob_unpacked;
     }
 
+    set_kmp_blocktime(old_blocktime);
+
     return ret;
 }
 
@@ -2751,6 +2757,9 @@ int Extractor::extract(int blob_index, VkImageMat& feat, VkCompute& cmd)
     if (blob_index < 0 || blob_index >= (int)blob_mats.size())
         return -1;
 
+    int old_blocktime = get_kmp_blocktime();
+    set_kmp_blocktime(opt.openmp_blocktime);
+
     int ret = 0;
 
     if (blob_mats_gpu_image[blob_index].dims == 0)
@@ -2767,6 +2776,8 @@ int Extractor::extract(int blob_index, VkImageMat& feat, VkCompute& cmd)
 
     feat = blob_mats_gpu_image[blob_index];
 
+    set_kmp_blocktime(old_blocktime);
+
     return ret;
 }
 #endif // NCNN_VULKAN
diff --git a/src/option.cpp b/src/option.cpp
index 9cfa643a6..6d23a3974 100644
--- a/src/option.cpp
+++ b/src/option.cpp
@@ -32,6 +32,8 @@ Option::Option()
     pipeline_cache = 0;
 #endif // NCNN_VULKAN
 
+    openmp_blocktime = 20;
+
     use_winograd_convolution = true;
     use_sgemm_convolution = true;
     use_int8_inference = true;
diff --git a/src/option.h b/src/option.h
index 3e82080d6..9e72df4f8 100644
--- a/src/option.h
+++ b/src/option.h
@@ -61,6 +61,11 @@ public:
     PipelineCache* pipeline_cache;
 #endif // NCNN_VULKAN
 
+    // the time openmp threads busy-wait for more work before going to sleep
+    // default value is 20ms to keep the cores enabled
+    // without too much extra power consumption afterwards
+    int openmp_blocktime;
+
     // enable winograd convolution optimization
     // improve convolution 3x3 stride1 performace, may consume more memory
     // changes should be applied before loading network structure and weight