From daa18abb191e5ae6cae8d22096fb9768ed056d34 Mon Sep 17 00:00:00 2001 From: DC Technology <412200533@qq.com> Date: Tue, 22 Jul 2025 01:49:44 +0800 Subject: [PATCH 01/11] add threadUtillTools --- .gitignore | 1 + CMakeLists.txt | 1 + src/CMakeLists.txt | 12 ++++ src/TheadInfo.cpp | 68 ++++++++++++++++++++ src/TheadInfo.h | 29 +++++++++ src/cpu.cpp | 34 ++++++++++ src/cpu.h | 5 ++ src/layer.h | 5 ++ src/layer/absval.cpp | 46 ++++++++++++++ src/layer/absval.h | 1 + src/layer/batchnorm.h | 2 + src/platform.h.in | 1 + src/thread.cpp | 143 ++++++++++++++++++++++++++++++++++++++++++ src/thread.h | 39 ++++++++++++ 14 files changed, 387 insertions(+) create mode 100644 src/TheadInfo.cpp create mode 100644 src/TheadInfo.h create mode 100644 src/thread.cpp create mode 100644 src/thread.h diff --git a/.gitignore b/.gitignore index cd69c526f..97e44879b 100644 --- a/.gitignore +++ b/.gitignore @@ -60,3 +60,4 @@ python/setup.py # Xmake .xmake/ +CMakePresets.json diff --git a/CMakeLists.txt b/CMakeLists.txt index 800bf47ca..9a86c8ca5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -89,6 +89,7 @@ option(NCNN_PYTHON "build python api" OFF) option(NCNN_INT8 "int8 inference" ON) option(NCNN_BF16 "bf16 inference" ON) option(NCNN_FORCE_INLINE "force inline some function" ON) +option(NCNN_MUTITHREAD "enable multi thread bata" ON) if(ANDROID OR IOS OR NCNN_SIMPLESTL) option(NCNN_DISABLE_RTTI "disable rtti" ON) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 261221104..57f1fbf42 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -48,6 +48,13 @@ if(ANDROID) list(APPEND ncnn_SRCS mat_pixel_android.cpp) endif() +if(NCNN_MUTITHREAD) + list(APPEND ncnn_SRCS thread.cpp) + if(WIN32) + list(APPEND ncnn_SRCS ThreadInfo.cpp) + endif() +endif() + ncnn_src_group(ncnn_SRCS "sources") include_directories("${CMAKE_CURRENT_SOURCE_DIR}/layer/${NCNN_TARGET_ARCH}") @@ -266,6 +273,11 @@ if(NCNN_THREADS) target_link_libraries(ncnn PUBLIC pthread) endif() endif() +if(NCNN_MUTITHREAD) + if(NOT WIN32 AND (NOT NCNN_SIMPLEOMP) AND (NOT NCNN_SIMPLESTL)) + target_link_libraries(ncnn PUBLIC -pthread) + endif() +endif() if(NCNN_VULKAN) if(NCNN_SIMPLEVK) diff --git a/src/TheadInfo.cpp b/src/TheadInfo.cpp new file mode 100644 index 000000000..01b3c9b92 --- /dev/null +++ b/src/TheadInfo.cpp @@ -0,0 +1,68 @@ +#ifdef NCNN_MUTITHREAD +#ifdef _WIN32 + +#include "TheadInfo.h" +namespace ncnn +{ + +// 初始化静态成员 +ThreadInfo* ThreadInfo::thread_info = nullptr; + +ThreadInfo::ThreadInfo(/* args */) +{ + int groupCount = GetActiveProcessorGroupCount(); + for (WORD group = 0; group < groupCount; group++) { + DWORD processorsInGroup = GetActiveProcessorCount(group); + for (int i = 0; i < static_cast(processorsInGroup); i++) { + CoreInfo info; + info.group = group; + info.id = i + core_infos.size(); + info.affinity = (static_cast(1) << i); + core_infos.push_back(info); + } + } +} + +ThreadInfo* ThreadInfo::get() +{ + static Mutex lock; + AutoLock guard(lock); + + if (!thread_info) + { + thread_info = new ThreadInfo(); + } + return thread_info; +} + +CoreInfo ThreadInfo::getCurrentCore() +{ + // 获取当前线程运行的CPU核心(支持多处理器组) + DWORD_PTR process_affinity, system_affinity; + GetProcessAffinityMask(GetCurrentProcess(), &process_affinity, &system_affinity); + + // 使用扩展API获取处理器组信息 + PROCESSOR_NUMBER proc_num; + GetCurrentProcessorNumberEx(&proc_num); + + for (const auto& core : core_infos) + { + // 匹配组号和组内核心编号 + if (core.group == proc_num.Group && (core.affinity & (1ULL << proc_num.Number))) + { + return core; + } + } + + // 未找到时返回默认值 + return { -1, -1, 0 }; +} + +void ThreadInfo::getAllCore(std::vector& out) +{ + out = core_infos; +} +} + +#endif +#endif diff --git a/src/TheadInfo.h b/src/TheadInfo.h new file mode 100644 index 000000000..6dd0669bf --- /dev/null +++ b/src/TheadInfo.h @@ -0,0 +1,29 @@ +#ifndef THREAD_INFO_H +#define THREAD_INFO_H +#ifdef NCNN_MUTITHREAD +#if defined _WIN32 +#include "cpu.h" +namespace ncnn +{ +struct CoreInfo{ + public: + int id; + int group; + DWORD_PTR affinity; +}; +class TheadInfo +{ +private: + static ThreadInfo* thread_info; + std::vector core_infos; + TheadInfo(/* args */); +public: + static ThreadInfo* get(); + CoreInfo getCurrentCore(); + void getAllCore(std::vector& out); +}; +} + +#endif +#endif +#endif \ No newline at end of file diff --git a/src/cpu.cpp b/src/cpu.cpp index 9f91812b9..021aa5a1f 100644 --- a/src/cpu.cpp +++ b/src/cpu.cpp @@ -3266,4 +3266,38 @@ int set_flush_denormals(int flush_denormals) #endif } +int get_multi_thread_batch(){ +#if defined(_NCNN_MUTITHREAD) + #if defined _WIN32 + DWORD length = 0; + GetLogicalProcessorInformation(NULL, &length); + if (GetLastError() != ERROR_INSUFFICIENT_BUFFER) + return 0; + + PSYSTEM_LOGICAL_PROCESSOR_INFORMATION buffer = + (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION)malloc(length); + + int count = 0; + if (GetLogicalProcessorInformation(buffer, &length)) + { + DWORD offset = 0; + while (offset < length) + { + if (buffer->Relationship == RelationProcessorCore) + count++; + + offset += sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION); + buffer++; + } + } + free(buffer); + return count; + #else + return get_cpu_count(); + #endif +#else + return get_cpu_count(); +#endif +} + } // namespace ncnn diff --git a/src/cpu.h b/src/cpu.h index cbf417111..7ffd2f6fc 100644 --- a/src/cpu.h +++ b/src/cpu.h @@ -8,6 +8,7 @@ #if defined _WIN32 #define WIN32_LEAN_AND_MEAN +#define _WIN32_WINNT 0x0601 // Windows 7+ #include #endif #if defined __ANDROID__ || defined __linux__ @@ -172,6 +173,10 @@ NCNN_EXPORT void set_kmp_blocktime(int time_ms); NCNN_EXPORT int get_flush_denormals(); NCNN_EXPORT int set_flush_denormals(int flush_denormals); + +// multi thread batch inference +NCNN_EXPORT int get_multi_thread_batch(); + } // namespace ncnn #endif // NCNN_CPU_H diff --git a/src/layer.h b/src/layer.h index 5351de1c0..c65656d12 100644 --- a/src/layer.h +++ b/src/layer.h @@ -94,6 +94,10 @@ public: // return 0 if success virtual int forward_inplace(std::vector& bottom_top_blobs, const Option& opt) const; virtual int forward_inplace(Mat& bottom_top_blob, const Option& opt) const; + /// @brief mutithread work function + /// @param workspace thread infomation + /// @return 0 if success + virtual int forward_thread(void* workspace); #if NCNN_VULKAN public: @@ -139,6 +143,7 @@ public: // layer factory function typedef Layer* (*layer_creator_func)(void*); typedef void (*layer_destroyer_func)(Layer*, void*); +typedef int (*layer_work_func)(Layer*,void*); struct layer_registry_entry { diff --git a/src/layer/absval.cpp b/src/layer/absval.cpp index 2f38d3520..2d1cea4c5 100644 --- a/src/layer/absval.cpp +++ b/src/layer/absval.cpp @@ -2,6 +2,7 @@ // SPDX-License-Identifier: BSD-3-Clause #include "absval.h" +#include "thread.h" namespace ncnn { @@ -17,6 +18,15 @@ int AbsVal::forward_inplace(Mat& bottom_top_blob, const Option& opt) const int h = bottom_top_blob.h; int channels = bottom_top_blob.c; int size = w * h; + if(opt.num_threads > 64){ + ThreadWorkspace workspace; + workspace.layer = (Layer*)this; + MutilThread thread(workspace,opt); + std::vector workspace_blobs; + workspace_blobs.push_back(bottom_top_blob); + thread.join(workspace_blobs); + return 0; + } #pragma omp parallel for num_threads(opt.num_threads) for (int q = 0; q < channels; q++) @@ -33,4 +43,40 @@ int AbsVal::forward_inplace(Mat& bottom_top_blob, const Option& opt) const return 0; } +int AbsVal::forward_thread(void* workspace) +{ + ThreadInfoExc* info = (ThreadInfoExc*)workspace; + Mat& bottom_top_blob = info->mats->at(0); + if(bottom_top_blob.elemsize==1){ + int8_t* ptr = (int8_t*)bottom_top_blob.data; + const int8_t flag = 1<<7; + for (size_t i = info->start_index; i < info->end_index; i++) + { + if(ptr[i]&flag){ + ptr[i] = -ptr[i]; + } + } + }else if (bottom_top_blob.elemsize==2) + { + int16_t* ptr = (int16_t*)bottom_top_blob.data; + const int16_t flag = 1<<15; + for (size_t i = info->start_index; i < info->end_index; i++) + { + if(ptr[i]&flag){ + ptr[i] = -ptr[i]; + } + } + }else{ + float* ptr = (float*)bottom_top_blob.data; + for (size_t i = info->start_index; i < info->end_index; i++) + { + if(ptr[i]<0){ + ptr[i] = -ptr[i]; + } + } + } + + return 0; +} + } // namespace ncnn diff --git a/src/layer/absval.h b/src/layer/absval.h index deb9540d0..619cfeb64 100644 --- a/src/layer/absval.h +++ b/src/layer/absval.h @@ -14,6 +14,7 @@ public: AbsVal(); virtual int forward_inplace(Mat& bottom_top_blob, const Option& opt) const; + virtual int forward_thread(void* workspace); }; } // namespace ncnn diff --git a/src/layer/batchnorm.h b/src/layer/batchnorm.h index 6043d0e41..0deedba46 100644 --- a/src/layer/batchnorm.h +++ b/src/layer/batchnorm.h @@ -19,6 +19,8 @@ public: virtual int forward_inplace(Mat& bottom_top_blob, const Option& opt) const; + virtual int forward_thread(void* workspace); + public: // param int channels; diff --git a/src/platform.h.in b/src/platform.h.in index 8b7357eec..79a79db04 100644 --- a/src/platform.h.in +++ b/src/platform.h.in @@ -57,6 +57,7 @@ #cmakedefine01 NCNN_INT8 #cmakedefine01 NCNN_BF16 #cmakedefine01 NCNN_FORCE_INLINE +#cmakedefine01 NCNN_MUTITHREAD #cmakedefine NCNN_VERSION_STRING "@NCNN_VERSION_STRING@" diff --git a/src/thread.cpp b/src/thread.cpp new file mode 100644 index 000000000..8da515902 --- /dev/null +++ b/src/thread.cpp @@ -0,0 +1,143 @@ +#include "thread.h" +#include "cpu.h" + +#if defined _WIN32 +DWORD WINAPI winWorker(LPVOID lpParam) +{ + ncnn::ThreadInfoExc* info = (ncnn::ThreadInfoExc*)lpParam; + if (info->coreinfo->group >= 0 && info->coreinfo->affinity != 0) { + GROUP_AFFINITY groupAffinity; + ZeroMemory(&groupAffinity, sizeof(groupAffinity)); + groupAffinity.Group = static_cast(info->coreinfo->group); + groupAffinity.Mask = info->coreinfo->affinity; + + return SetThreadGroupAffinity(GetCurrentThread(), &groupAffinity, NULL) != 0; + } + info->workspace->layer->forward_thread(info); + info->manager->threadsComplete[info->threadid] = true; + delete info; + return 0; +} +#else +void* pthreadWorker(void* lpParam) +{ + ncnn::ThreadInfoExc* info = (ncnn::ThreadInfoExc*)lpParam; + cpu_set_t cpuset; + CPU_ZERO(&cpuset); + CPU_SET(info->threadid, &cpuset); + + // 绑定到指定核心 + pthread_t current_thread = pthread_self(); + pthread_setaffinity_np(current_thread, sizeof(cpu_set_t), &cpuset); + info->workspace->layer->forward_thread(info); + info->manager->threadsComplete[info->threadid] = true; + delete info; + return nullptr; +} +#endif +namespace ncnn +{ +MutilThread::MutilThread(ThreadWorkspace _workspace, const Option& opt) +{ + workspace = _workspace; + m_opt = opt; + threadsComplete.resize(opt.num_threads); + for(int i=0;i& mats) +{ + #if defined _WIN32 + Mat mat = mats[0]; + CoreInfo cur = TheadInfo::get()->getCurrentCore(); + std::vector cores; + TheadInfo::get()->getAllCore(cores); + std::vector handles; + ThreadInfoExc* curinfo = nullptr; + size_t workersize = ((mat.w*mat.h*mat.d)/m_opt.num_threads +1)*mat.c*mat.elemsize; + size_t matlen = mats.size(); + for(int i=0;ithreadid = i; + info->start_index = i*workersize; + info->end_index = (i+1)*workersize; + if(info->end_index>matlen){ + info->end_index = matlen; + } + info->workspace = &workspace; + info->mats = &mats; + info->opt = &m_opt; + info->coreinfo = &cores[i]; + threadsComplete[i] = false; + info->manager = this; + if(cur.id==cores[i].id){ + helpid = i; + threadsComplete[i] = true; + handles.push_back(nullptr); + curinfo = info; + continue; + } + handles.push_back(CreateThread(nullptr,0,winWorker,info,0,nullptr)); + } + workspace.layer->forward_inplace(curinfo); + delete curinfo; + bool check = true; + do{ + check = false; + for(int i=0;i pthread_handles; + ThreadInfoExc* curinfo = nullptr; + size_t workersize = ((mat.w*mat.h*mat.d)/m_opt.num_threads +1)*mat.c*mat.elemsize; + size_t matlen = mats.size(); + for(int i=0;ithreadid = i; + info->start_index = i*workersize; + info->end_index = (i+1)*workersize; + if(info->end_index>matlen){ + info->end_index = matlen; + } + info->workspace = &workspace; + info->mats = &mats; + info->opt = &m_opt; + threadsComplete[i] = false; + info->manager = this; + if(cur.id==cores[i].id){ + helpid = i; + threadsComplete[i] = true; + curinfo = info; + continue; + } + pthread_handles.push_back(pthread_create(&pthread_handles[i], nullptr, pthreadWorker, info)); + } + workspace.layer->forward_inplace(curinfo); + delete curinfo; + for (size_t i = 0; i < pthread_handles.size(); i++) + { + pthread_join(pthread_handles[i], nullptr); + } + #endif +} +} // namespace ncnn \ No newline at end of file diff --git a/src/thread.h b/src/thread.h new file mode 100644 index 000000000..7e6a43773 --- /dev/null +++ b/src/thread.h @@ -0,0 +1,39 @@ +#ifndef THREAD_H +#define THREAD_H +#include "layer.h" +#include "TheadInfo.h" +#if defined __ANDROID__ || defined __linux__ || defined __APPLE__ +#include +#endif +namespace ncnn +{ + struct ThreadInfoExc{ + int threadid; + size_t start_index; + size_t end_index; + ThreadWorkspace* workspace; + std::vector* mats; + Option* opt; + MutilThread* manager; + #if defined _WIN32 + CoreInfo* coreinfo; + #endif + }; + struct ThreadWorkspace{ + Layer* layer; + }; + class MutilThread + { + private: + Option m_opt; + volatile int helpid; + ThreadWorkspace workspace; + public: + MutilThread(ThreadWorkspace _workspace,const Option& opt); + void join(std::vector& mats); + std::vector threadsComplete; + ~MutilThread(); + }; + +} // namespace ncnn +#endif From 9c5280034a386385e5c4373506099d7f5d4bd35a Mon Sep 17 00:00:00 2001 From: DaChengTechnology <12637177+DaChengTechnology@users.noreply.github.com> Date: Mon, 21 Jul 2025 18:12:26 +0000 Subject: [PATCH 02/11] apply code-format changes --- src/TheadInfo.cpp | 21 +++++++------ src/TheadInfo.h | 9 +++--- src/cpu.cpp | 18 +++++------ src/cpu.h | 1 - src/layer.h | 2 +- src/layer/absval.cpp | 30 +++++++++++------- src/thread.cpp | 73 +++++++++++++++++++++++++------------------- src/thread.h | 60 ++++++++++++++++++------------------ 8 files changed, 118 insertions(+), 96 deletions(-) diff --git a/src/TheadInfo.cpp b/src/TheadInfo.cpp index 01b3c9b92..f49f6eb8a 100644 --- a/src/TheadInfo.cpp +++ b/src/TheadInfo.cpp @@ -2,8 +2,7 @@ #ifdef _WIN32 #include "TheadInfo.h" -namespace ncnn -{ +namespace ncnn { // 初始化静态成员 ThreadInfo* ThreadInfo::thread_info = nullptr; @@ -11,9 +10,11 @@ ThreadInfo* ThreadInfo::thread_info = nullptr; ThreadInfo::ThreadInfo(/* args */) { int groupCount = GetActiveProcessorGroupCount(); - for (WORD group = 0; group < groupCount; group++) { + for (WORD group = 0; group < groupCount; group++) + { DWORD processorsInGroup = GetActiveProcessorCount(group); - for (int i = 0; i < static_cast(processorsInGroup); i++) { + for (int i = 0; i < static_cast(processorsInGroup); i++) + { CoreInfo info; info.group = group; info.id = i + core_infos.size(); @@ -27,7 +28,7 @@ ThreadInfo* ThreadInfo::get() { static Mutex lock; AutoLock guard(lock); - + if (!thread_info) { thread_info = new ThreadInfo(); @@ -40,11 +41,11 @@ CoreInfo ThreadInfo::getCurrentCore() // 获取当前线程运行的CPU核心(支持多处理器组) DWORD_PTR process_affinity, system_affinity; GetProcessAffinityMask(GetCurrentProcess(), &process_affinity, &system_affinity); - + // 使用扩展API获取处理器组信息 PROCESSOR_NUMBER proc_num; GetCurrentProcessorNumberEx(&proc_num); - + for (const auto& core : core_infos) { // 匹配组号和组内核心编号 @@ -53,16 +54,16 @@ CoreInfo ThreadInfo::getCurrentCore() return core; } } - + // 未找到时返回默认值 - return { -1, -1, 0 }; + return {-1, -1, 0}; } void ThreadInfo::getAllCore(std::vector& out) { out = core_infos; } -} +} // namespace ncnn #endif #endif diff --git a/src/TheadInfo.h b/src/TheadInfo.h index 6dd0669bf..25f1b74c3 100644 --- a/src/TheadInfo.h +++ b/src/TheadInfo.h @@ -3,10 +3,10 @@ #ifdef NCNN_MUTITHREAD #if defined _WIN32 #include "cpu.h" -namespace ncnn +namespace ncnn { +struct CoreInfo { -struct CoreInfo{ - public: +public: int id; int group; DWORD_PTR affinity; @@ -17,12 +17,13 @@ private: static ThreadInfo* thread_info; std::vector core_infos; TheadInfo(/* args */); + public: static ThreadInfo* get(); CoreInfo getCurrentCore(); void getAllCore(std::vector& out); }; -} +} // namespace ncnn #endif #endif diff --git a/src/cpu.cpp b/src/cpu.cpp index 021aa5a1f..022981259 100644 --- a/src/cpu.cpp +++ b/src/cpu.cpp @@ -3266,17 +3266,17 @@ int set_flush_denormals(int flush_denormals) #endif } -int get_multi_thread_batch(){ +int get_multi_thread_batch() +{ #if defined(_NCNN_MUTITHREAD) - #if defined _WIN32 - DWORD length = 0; +#if defined _WIN32 + DWORD length = 0; GetLogicalProcessorInformation(NULL, &length); if (GetLastError() != ERROR_INSUFFICIENT_BUFFER) return 0; - PSYSTEM_LOGICAL_PROCESSOR_INFORMATION buffer = - (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION)malloc(length); - + PSYSTEM_LOGICAL_PROCESSOR_INFORMATION buffer = (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION)malloc(length); + int count = 0; if (GetLogicalProcessorInformation(buffer, &length)) { @@ -3285,16 +3285,16 @@ int get_multi_thread_batch(){ { if (buffer->Relationship == RelationProcessorCore) count++; - + offset += sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION); buffer++; } } free(buffer); return count; - #else +#else return get_cpu_count(); - #endif +#endif #else return get_cpu_count(); #endif diff --git a/src/cpu.h b/src/cpu.h index 7ffd2f6fc..cf0f8e87e 100644 --- a/src/cpu.h +++ b/src/cpu.h @@ -173,7 +173,6 @@ NCNN_EXPORT void set_kmp_blocktime(int time_ms); NCNN_EXPORT int get_flush_denormals(); NCNN_EXPORT int set_flush_denormals(int flush_denormals); - // multi thread batch inference NCNN_EXPORT int get_multi_thread_batch(); diff --git a/src/layer.h b/src/layer.h index c65656d12..5bfd58742 100644 --- a/src/layer.h +++ b/src/layer.h @@ -143,7 +143,7 @@ public: // layer factory function typedef Layer* (*layer_creator_func)(void*); typedef void (*layer_destroyer_func)(Layer*, void*); -typedef int (*layer_work_func)(Layer*,void*); +typedef int (*layer_work_func)(Layer*, void*); struct layer_registry_entry { diff --git a/src/layer/absval.cpp b/src/layer/absval.cpp index 2d1cea4c5..61e355dda 100644 --- a/src/layer/absval.cpp +++ b/src/layer/absval.cpp @@ -18,10 +18,11 @@ int AbsVal::forward_inplace(Mat& bottom_top_blob, const Option& opt) const int h = bottom_top_blob.h; int channels = bottom_top_blob.c; int size = w * h; - if(opt.num_threads > 64){ + if (opt.num_threads > 64) + { ThreadWorkspace workspace; workspace.layer = (Layer*)this; - MutilThread thread(workspace,opt); + MutilThread thread(workspace, opt); std::vector workspace_blobs; workspace_blobs.push_back(bottom_top_blob); thread.join(workspace_blobs); @@ -47,35 +48,42 @@ int AbsVal::forward_thread(void* workspace) { ThreadInfoExc* info = (ThreadInfoExc*)workspace; Mat& bottom_top_blob = info->mats->at(0); - if(bottom_top_blob.elemsize==1){ + if (bottom_top_blob.elemsize == 1) + { int8_t* ptr = (int8_t*)bottom_top_blob.data; - const int8_t flag = 1<<7; + const int8_t flag = 1 << 7; for (size_t i = info->start_index; i < info->end_index; i++) { - if(ptr[i]&flag){ + if (ptr[i] & flag) + { ptr[i] = -ptr[i]; } } - }else if (bottom_top_blob.elemsize==2) + } + else if (bottom_top_blob.elemsize == 2) { int16_t* ptr = (int16_t*)bottom_top_blob.data; - const int16_t flag = 1<<15; + const int16_t flag = 1 << 15; for (size_t i = info->start_index; i < info->end_index; i++) { - if(ptr[i]&flag){ + if (ptr[i] & flag) + { ptr[i] = -ptr[i]; } } - }else{ + } + else + { float* ptr = (float*)bottom_top_blob.data; for (size_t i = info->start_index; i < info->end_index; i++) { - if(ptr[i]<0){ + if (ptr[i] < 0) + { ptr[i] = -ptr[i]; } } } - + return 0; } diff --git a/src/thread.cpp b/src/thread.cpp index 8da515902..587c67666 100644 --- a/src/thread.cpp +++ b/src/thread.cpp @@ -5,15 +5,16 @@ DWORD WINAPI winWorker(LPVOID lpParam) { ncnn::ThreadInfoExc* info = (ncnn::ThreadInfoExc*)lpParam; - if (info->coreinfo->group >= 0 && info->coreinfo->affinity != 0) { + if (info->coreinfo->group >= 0 && info->coreinfo->affinity != 0) + { GROUP_AFFINITY groupAffinity; ZeroMemory(&groupAffinity, sizeof(groupAffinity)); groupAffinity.Group = static_cast(info->coreinfo->group); groupAffinity.Mask = info->coreinfo->affinity; - + return SetThreadGroupAffinity(GetCurrentThread(), &groupAffinity, NULL) != 0; } - info->workspace->layer->forward_thread(info); + info->workspace->layer->forward_thread(info); info->manager->threadsComplete[info->threadid] = true; delete info; return 0; @@ -25,24 +26,24 @@ void* pthreadWorker(void* lpParam) cpu_set_t cpuset; CPU_ZERO(&cpuset); CPU_SET(info->threadid, &cpuset); - + // 绑定到指定核心 pthread_t current_thread = pthread_self(); pthread_setaffinity_np(current_thread, sizeof(cpu_set_t), &cpuset); - info->workspace->layer->forward_thread(info); + info->workspace->layer->forward_thread(info); info->manager->threadsComplete[info->threadid] = true; delete info; return nullptr; } #endif -namespace ncnn -{ +namespace ncnn { MutilThread::MutilThread(ThreadWorkspace _workspace, const Option& opt) { workspace = _workspace; m_opt = opt; threadsComplete.resize(opt.num_threads); - for(int i=0;i& mats) { - #if defined _WIN32 +#if defined _WIN32 Mat mat = mats[0]; CoreInfo cur = TheadInfo::get()->getCurrentCore(); std::vector cores; TheadInfo::get()->getAllCore(cores); std::vector handles; ThreadInfoExc* curinfo = nullptr; - size_t workersize = ((mat.w*mat.h*mat.d)/m_opt.num_threads +1)*mat.c*mat.elemsize; + size_t workersize = ((mat.w * mat.h * mat.d) / m_opt.num_threads + 1) * mat.c * mat.elemsize; size_t matlen = mats.size(); - for(int i=0;ithreadid = i; - info->start_index = i*workersize; - info->end_index = (i+1)*workersize; - if(info->end_index>matlen){ + info->start_index = i * workersize; + info->end_index = (i + 1) * workersize; + if (info->end_index > matlen) + { info->end_index = matlen; } info->workspace = &workspace; @@ -78,45 +81,52 @@ void MutilThread::join(std::vector& mats) info->coreinfo = &cores[i]; threadsComplete[i] = false; info->manager = this; - if(cur.id==cores[i].id){ + if (cur.id == cores[i].id) + { helpid = i; threadsComplete[i] = true; handles.push_back(nullptr); curinfo = info; continue; } - handles.push_back(CreateThread(nullptr,0,winWorker,info,0,nullptr)); + handles.push_back(CreateThread(nullptr, 0, winWorker, info, 0, nullptr)); } workspace.layer->forward_inplace(curinfo); delete curinfo; bool check = true; - do{ + do + { check = false; - for(int i=0;i pthread_handles; ThreadInfoExc* curinfo = nullptr; - size_t workersize = ((mat.w*mat.h*mat.d)/m_opt.num_threads +1)*mat.c*mat.elemsize; + size_t workersize = ((mat.w * mat.h * mat.d) / m_opt.num_threads + 1) * mat.c * mat.elemsize; size_t matlen = mats.size(); - for(int i=0;ithreadid = i; - info->start_index = i*workersize; - info->end_index = (i+1)*workersize; - if(info->end_index>matlen){ + info->start_index = i * workersize; + info->end_index = (i + 1) * workersize; + if (info->end_index > matlen) + { info->end_index = matlen; } info->workspace = &workspace; @@ -124,7 +134,8 @@ void MutilThread::join(std::vector& mats) info->opt = &m_opt; threadsComplete[i] = false; info->manager = this; - if(cur.id==cores[i].id){ + if (cur.id == cores[i].id) + { helpid = i; threadsComplete[i] = true; curinfo = info; @@ -138,6 +149,6 @@ void MutilThread::join(std::vector& mats) { pthread_join(pthread_handles[i], nullptr); } - #endif +#endif } } // namespace ncnn \ No newline at end of file diff --git a/src/thread.h b/src/thread.h index 7e6a43773..163555cfe 100644 --- a/src/thread.h +++ b/src/thread.h @@ -5,35 +5,37 @@ #if defined __ANDROID__ || defined __linux__ || defined __APPLE__ #include #endif -namespace ncnn +namespace ncnn { +struct ThreadInfoExc { - struct ThreadInfoExc{ - int threadid; - size_t start_index; - size_t end_index; - ThreadWorkspace* workspace; - std::vector* mats; - Option* opt; - MutilThread* manager; - #if defined _WIN32 - CoreInfo* coreinfo; - #endif - }; - struct ThreadWorkspace{ - Layer* layer; - }; - class MutilThread - { - private: - Option m_opt; - volatile int helpid; - ThreadWorkspace workspace; - public: - MutilThread(ThreadWorkspace _workspace,const Option& opt); - void join(std::vector& mats); - std::vector threadsComplete; - ~MutilThread(); - }; - + int threadid; + size_t start_index; + size_t end_index; + ThreadWorkspace* workspace; + std::vector* mats; + Option* opt; + MutilThread* manager; +#if defined _WIN32 + CoreInfo* coreinfo; +#endif +}; +struct ThreadWorkspace +{ + Layer* layer; +}; +class MutilThread +{ +private: + Option m_opt; + volatile int helpid; + ThreadWorkspace workspace; + +public: + MutilThread(ThreadWorkspace _workspace, const Option& opt); + void join(std::vector& mats); + std::vector threadsComplete; + ~MutilThread(); +}; + } // namespace ncnn #endif From 2dc940de7fade4cfc5f7aba6d8784b4784080855 Mon Sep 17 00:00:00 2001 From: DC Technology <412200533@qq.com> Date: Tue, 22 Jul 2025 02:13:36 +0800 Subject: [PATCH 03/11] fixt hread.h --- src/thread.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/thread.h b/src/thread.h index 7e6a43773..62f0c1c44 100644 --- a/src/thread.h +++ b/src/thread.h @@ -7,6 +7,9 @@ #endif namespace ncnn { + struct ThreadWorkspace{ + Layer* layer; + }; struct ThreadInfoExc{ int threadid; size_t start_index; @@ -19,9 +22,6 @@ namespace ncnn CoreInfo* coreinfo; #endif }; - struct ThreadWorkspace{ - Layer* layer; - }; class MutilThread { private: From 79a768dbdd64804307a11d7bc487c6d0c5faea23 Mon Sep 17 00:00:00 2001 From: DC Technology <412200533@qq.com> Date: Tue, 22 Jul 2025 02:17:47 +0800 Subject: [PATCH 04/11] fix --- src/thread.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/thread.h b/src/thread.h index 788e86c05..bb97cb7d6 100644 --- a/src/thread.h +++ b/src/thread.h @@ -6,8 +6,6 @@ #include #endif namespace ncnn { -struct ThreadInfoExc -{ struct ThreadWorkspace{ Layer* layer; }; From 6d98cc39ce86118ee899171fca6f4349dc8f6e73 Mon Sep 17 00:00:00 2001 From: DaChengTechnology <12637177+DaChengTechnology@users.noreply.github.com> Date: Mon, 21 Jul 2025 18:19:57 +0000 Subject: [PATCH 05/11] apply code-format changes --- src/thread.h | 59 +++++++++++++++++++++++++++------------------------- 1 file changed, 31 insertions(+), 28 deletions(-) diff --git a/src/thread.h b/src/thread.h index bb97cb7d6..6e6fc30a2 100644 --- a/src/thread.h +++ b/src/thread.h @@ -6,33 +6,36 @@ #include #endif namespace ncnn { - struct ThreadWorkspace{ - Layer* layer; - }; - struct ThreadInfoExc{ - int threadid; - size_t start_index; - size_t end_index; - ThreadWorkspace* workspace; - std::vector* mats; - Option* opt; - MutilThread* manager; - #if defined _WIN32 - CoreInfo* coreinfo; - #endif - }; - class MutilThread - { - private: - Option m_opt; - volatile int helpid; - ThreadWorkspace workspace; - public: - MutilThread(ThreadWorkspace _workspace,const Option& opt); - void join(std::vector& mats); - std::vector threadsComplete; - ~MutilThread(); - }; - +struct ThreadWorkspace +{ + Layer* layer; +}; +struct ThreadInfoExc +{ + int threadid; + size_t start_index; + size_t end_index; + ThreadWorkspace* workspace; + std::vector* mats; + Option* opt; + MutilThread* manager; +#if defined _WIN32 + CoreInfo* coreinfo; +#endif +}; +class MutilThread +{ +private: + Option m_opt; + volatile int helpid; + ThreadWorkspace workspace; + +public: + MutilThread(ThreadWorkspace _workspace, const Option& opt); + void join(std::vector& mats); + std::vector threadsComplete; + ~MutilThread(); +}; + } // namespace ncnn #endif From 1d2bf3ca5939c214f0d49e0738919bcbf066a87c Mon Sep 17 00:00:00 2001 From: DC Technology <412200533@qq.com> Date: Tue, 22 Jul 2025 13:14:30 +0800 Subject: [PATCH 06/11] add define --- src/thread.h | 1 + 1 file changed, 1 insertion(+) diff --git a/src/thread.h b/src/thread.h index bb97cb7d6..cf97ed6fe 100644 --- a/src/thread.h +++ b/src/thread.h @@ -6,6 +6,7 @@ #include #endif namespace ncnn { + class MutilThread; struct ThreadWorkspace{ Layer* layer; }; From dd710680457f83399ee11762f634aa01b7de4fde Mon Sep 17 00:00:00 2001 From: DaChengTechnology <12637177+DaChengTechnology@users.noreply.github.com> Date: Tue, 22 Jul 2025 05:29:44 +0000 Subject: [PATCH 07/11] apply code-format changes --- src/thread.h | 61 +++++++++++++++++++++++++++------------------------- 1 file changed, 32 insertions(+), 29 deletions(-) diff --git a/src/thread.h b/src/thread.h index cf97ed6fe..9f12c71d4 100644 --- a/src/thread.h +++ b/src/thread.h @@ -6,34 +6,37 @@ #include #endif namespace ncnn { - class MutilThread; - struct ThreadWorkspace{ - Layer* layer; - }; - struct ThreadInfoExc{ - int threadid; - size_t start_index; - size_t end_index; - ThreadWorkspace* workspace; - std::vector* mats; - Option* opt; - MutilThread* manager; - #if defined _WIN32 - CoreInfo* coreinfo; - #endif - }; - class MutilThread - { - private: - Option m_opt; - volatile int helpid; - ThreadWorkspace workspace; - public: - MutilThread(ThreadWorkspace _workspace,const Option& opt); - void join(std::vector& mats); - std::vector threadsComplete; - ~MutilThread(); - }; - +class MutilThread; +struct ThreadWorkspace +{ + Layer* layer; +}; +struct ThreadInfoExc +{ + int threadid; + size_t start_index; + size_t end_index; + ThreadWorkspace* workspace; + std::vector* mats; + Option* opt; + MutilThread* manager; +#if defined _WIN32 + CoreInfo* coreinfo; +#endif +}; +class MutilThread +{ +private: + Option m_opt; + volatile int helpid; + ThreadWorkspace workspace; + +public: + MutilThread(ThreadWorkspace _workspace, const Option& opt); + void join(std::vector& mats); + std::vector threadsComplete; + ~MutilThread(); +}; + } // namespace ncnn #endif From a88e5549d2face1bfaebd2820ad7a7fac0d255f1 Mon Sep 17 00:00:00 2001 From: DC Technology <412200533@qq.com> Date: Tue, 22 Jul 2025 16:10:05 +0800 Subject: [PATCH 08/11] surpport android and like linux class.not surport ios ,macos and hormonyOS --- src/thread.cpp | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/src/thread.cpp b/src/thread.cpp index 587c67666..1b7568f13 100644 --- a/src/thread.cpp +++ b/src/thread.cpp @@ -1,5 +1,9 @@ #include "thread.h" #include "cpu.h" +#if defined __ANDROID__ || defined __linux__ +#include +#endif + #if defined _WIN32 DWORD WINAPI winWorker(LPVOID lpParam) @@ -23,13 +27,14 @@ DWORD WINAPI winWorker(LPVOID lpParam) void* pthreadWorker(void* lpParam) { ncnn::ThreadInfoExc* info = (ncnn::ThreadInfoExc*)lpParam; + #if defined __ANDROID__ || defined __linux__ cpu_set_t cpuset; CPU_ZERO(&cpuset); CPU_SET(info->threadid, &cpuset); - // 绑定到指定核心 pthread_t current_thread = pthread_self(); pthread_setaffinity_np(current_thread, sizeof(cpu_set_t), &cpuset); + #endif info->workspace->layer->forward_thread(info); info->manager->threadsComplete[info->threadid] = true; delete info; @@ -115,6 +120,12 @@ void MutilThread::join(std::vector& mats) } handles.clear(); #else + Mat mat = mats[0]; + int curid = -1; + #if defined __ANDROID__ || defined __linux__ + curid = sched_getcpu(); + #endif + std::vector pthread_handles; ThreadInfoExc* curinfo = nullptr; size_t workersize = ((mat.w * mat.h * mat.d) / m_opt.num_threads + 1) * mat.c * mat.elemsize; @@ -134,7 +145,7 @@ void MutilThread::join(std::vector& mats) info->opt = &m_opt; threadsComplete[i] = false; info->manager = this; - if (cur.id == cores[i].id) + if (curid == cores[i].id && curid > 1) { helpid = i; threadsComplete[i] = true; From 8caa9c36f895a39134643d645231855665f5c316 Mon Sep 17 00:00:00 2001 From: DaChengTechnology <12637177+DaChengTechnology@users.noreply.github.com> Date: Tue, 22 Jul 2025 08:20:25 +0000 Subject: [PATCH 09/11] apply code-format changes --- src/thread.cpp | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/thread.cpp b/src/thread.cpp index 1b7568f13..62d60f13d 100644 --- a/src/thread.cpp +++ b/src/thread.cpp @@ -4,7 +4,6 @@ #include #endif - #if defined _WIN32 DWORD WINAPI winWorker(LPVOID lpParam) { @@ -27,14 +26,14 @@ DWORD WINAPI winWorker(LPVOID lpParam) void* pthreadWorker(void* lpParam) { ncnn::ThreadInfoExc* info = (ncnn::ThreadInfoExc*)lpParam; - #if defined __ANDROID__ || defined __linux__ +#if defined __ANDROID__ || defined __linux__ cpu_set_t cpuset; CPU_ZERO(&cpuset); CPU_SET(info->threadid, &cpuset); // 绑定到指定核心 pthread_t current_thread = pthread_self(); pthread_setaffinity_np(current_thread, sizeof(cpu_set_t), &cpuset); - #endif +#endif info->workspace->layer->forward_thread(info); info->manager->threadsComplete[info->threadid] = true; delete info; @@ -122,9 +121,9 @@ void MutilThread::join(std::vector& mats) #else Mat mat = mats[0]; int curid = -1; - #if defined __ANDROID__ || defined __linux__ +#if defined __ANDROID__ || defined __linux__ curid = sched_getcpu(); - #endif +#endif std::vector pthread_handles; ThreadInfoExc* curinfo = nullptr; From e695869d7512c0ec325ce5fd8d51b2ba3ae56b03 Mon Sep 17 00:00:00 2001 From: DC Technology <412200533@qq.com> Date: Tue, 29 Jul 2025 21:04:00 +0800 Subject: [PATCH 10/11] add test and fix build --- CMakeLists.txt | 2 +- src/CMakeLists.txt | 2 +- src/TheadInfo.h | 4 +- src/cpu.cpp | 20 ++++++-- src/cpu.h | 1 + src/layer.cpp | 5 ++ src/thread.cpp | 12 ++--- tests/CMakeLists.txt | 1 + tests/test_thread.cpp | 112 ++++++++++++++++++++++++++++++++++++++++++ 9 files changed, 146 insertions(+), 13 deletions(-) create mode 100644 tests/test_thread.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 9a86c8ca5..a4ed26be1 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -81,7 +81,7 @@ option(NCNN_SIMPLEVK "minimal in-house vulkan loader" ON) option(NCNN_SYSTEM_GLSLANG "use system glslang library" OFF) option(NCNN_RUNTIME_CPU "runtime dispatch cpu routines" ON) option(NCNN_DISABLE_PIC "disable position-independent code" OFF) -option(NCNN_BUILD_TESTS "build tests" OFF) +option(NCNN_BUILD_TESTS "build tests" ON) option(NCNN_COVERAGE "build for coverage" OFF) option(NCNN_ASAN "build for address sanitizer" OFF) option(NCNN_BUILD_BENCHMARK "build benchmark" ON) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 57f1fbf42..a704c0b55 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -51,7 +51,7 @@ endif() if(NCNN_MUTITHREAD) list(APPEND ncnn_SRCS thread.cpp) if(WIN32) - list(APPEND ncnn_SRCS ThreadInfo.cpp) + list(APPEND ncnn_SRCS TheadInfo.cpp) endif() endif() diff --git a/src/TheadInfo.h b/src/TheadInfo.h index 25f1b74c3..7ab03b697 100644 --- a/src/TheadInfo.h +++ b/src/TheadInfo.h @@ -11,12 +11,12 @@ public: int group; DWORD_PTR affinity; }; -class TheadInfo +class ThreadInfo { private: static ThreadInfo* thread_info; std::vector core_infos; - TheadInfo(/* args */); + ThreadInfo(/* args */); public: static ThreadInfo* get(); diff --git a/src/cpu.cpp b/src/cpu.cpp index 022981259..93680f46b 100644 --- a/src/cpu.cpp +++ b/src/cpu.cpp @@ -1424,12 +1424,21 @@ static std::vector get_max_freq_mhz() static int set_sched_affinity(const ncnn::CpuSet& thread_affinity_mask) { + #ifdef _WIN32 + GROUP_AFFINITY groupAffinity; + ZeroMemory(&groupAffinity, sizeof(groupAffinity)); + groupAffinity.Group = static_cast(thread_affinity_mask.cpu_group); + groupAffinity.Mask = thread_affinity_mask.mask; + + SetThreadGroupAffinity(GetCurrentThread(), &groupAffinity, NULL); + #else DWORD_PTR prev_mask = SetThreadAffinityMask(GetCurrentThread(), thread_affinity_mask.mask); if (prev_mask == 0) { NCNN_LOGE("SetThreadAffinityMask failed %d", GetLastError()); return -1; } + #endif return 0; } @@ -2266,22 +2275,27 @@ CpuSet::CpuSet() void CpuSet::enable(int cpu) { - mask |= ((ULONG_PTR)1 << cpu); + cpu_group = cpu/64; + mask |= ((ULONG_PTR)1 << (cpu-cpu_group*64)); } void CpuSet::disable(int cpu) { - mask &= ~((ULONG_PTR)1 << cpu); + cpu_group = cpu/64; + mask &= ~((ULONG_PTR)1 << (cpu-cpu_group*64)); } void CpuSet::disable_all() { + cpu_group = 0; mask = 0; } bool CpuSet::is_enabled(int cpu) const { - return mask & ((ULONG_PTR)1 << cpu); + if (cpu_group != cpu/64) + return false; + return mask & ((ULONG_PTR)1 << (cpu-cpu_group*64)); } int CpuSet::num_enabled() const diff --git a/src/cpu.h b/src/cpu.h index cf0f8e87e..0c8761a53 100644 --- a/src/cpu.h +++ b/src/cpu.h @@ -31,6 +31,7 @@ public: public: #if defined _WIN32 + int cpu_group; ULONG_PTR mask; #endif #if defined __ANDROID__ || defined __linux__ diff --git a/src/layer.cpp b/src/layer.cpp index f1b849dad..4792c7231 100644 --- a/src/layer.cpp +++ b/src/layer.cpp @@ -98,6 +98,11 @@ int Layer::forward_inplace(Mat& /*bottom_top_blob*/, const Option& /*opt*/) cons return -1; } +int Layer::forward_thread(void* /*info*/) const +{ + return -1; +} + #if NCNN_VULKAN int Layer::upload_model(VkTransfer& /*cmd*/, const Option& /*opt*/) { diff --git a/src/thread.cpp b/src/thread.cpp index 1b7568f13..fa382713f 100644 --- a/src/thread.cpp +++ b/src/thread.cpp @@ -16,7 +16,7 @@ DWORD WINAPI winWorker(LPVOID lpParam) groupAffinity.Group = static_cast(info->coreinfo->group); groupAffinity.Mask = info->coreinfo->affinity; - return SetThreadGroupAffinity(GetCurrentThread(), &groupAffinity, NULL) != 0; + SetThreadGroupAffinity(GetCurrentThread(), &groupAffinity, NULL); } info->workspace->layer->forward_thread(info); info->manager->threadsComplete[info->threadid] = true; @@ -63,9 +63,9 @@ void MutilThread::join(std::vector& mats) { #if defined _WIN32 Mat mat = mats[0]; - CoreInfo cur = TheadInfo::get()->getCurrentCore(); + CoreInfo cur = ThreadInfo::get()->getCurrentCore(); std::vector cores; - TheadInfo::get()->getAllCore(cores); + ThreadInfo::get()->getAllCore(cores); std::vector handles; ThreadInfoExc* curinfo = nullptr; size_t workersize = ((mat.w * mat.h * mat.d) / m_opt.num_threads + 1) * mat.c * mat.elemsize; @@ -96,7 +96,7 @@ void MutilThread::join(std::vector& mats) } handles.push_back(CreateThread(nullptr, 0, winWorker, info, 0, nullptr)); } - workspace.layer->forward_inplace(curinfo); + workspace.layer->forward_thread(curinfo); delete curinfo; bool check = true; do @@ -145,7 +145,7 @@ void MutilThread::join(std::vector& mats) info->opt = &m_opt; threadsComplete[i] = false; info->manager = this; - if (curid == cores[i].id && curid > 1) + if (curid == cores[i].id && curid > -1) { helpid = i; threadsComplete[i] = true; @@ -154,7 +154,7 @@ void MutilThread::join(std::vector& mats) } pthread_handles.push_back(pthread_create(&pthread_handles[i], nullptr, pthreadWorker, info)); } - workspace.layer->forward_inplace(curinfo); + workspace.layer->forward_thread(curinfo); delete curinfo; for (size_t i = 0; i < pthread_handles.size(); i++) { diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 9d5b6517e..25c92367c 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -62,6 +62,7 @@ ncnn_add_test(c_api) ncnn_add_test(cpu) ncnn_add_test(expression) ncnn_add_test(paramdict) +ncnn_add_test(thread) if(NCNN_VULKAN) ncnn_add_test(command) diff --git a/tests/test_thread.cpp b/tests/test_thread.cpp new file mode 100644 index 000000000..9b96169df --- /dev/null +++ b/tests/test_thread.cpp @@ -0,0 +1,112 @@ +#include "testutil.h" +#include "thread.h" + +class TestLayer : public ncnn::Layer +{ +public: + virtual int forward_inplace(Mat& bottom_top_blob, const Option& opt) + { + ThreadWorkspace workspace; + workspace.layer = (Layer*)this; + MutilThread thread(workspace, opt); + std::vector workspace_blobs; + workspace_blobs.push_back(bottom_top_blob); + thread.join(workspace_blobs); + return 0; + } + virtual int forward_thread(void* workspace) + { + ThreadInfoExc* info = (ThreadInfoExc*)workspace; + Mat& bottom_top_blob = info->mats->at(0); + if (bottom_top_blob.elemsize == 1) + { + int8_t* ptr = (int8_t*)bottom_top_blob.data; + const int8_t flag = 1 << 7; + for (size_t i = info->start_index; i < info->end_index; i++) + { + if (ptr[i] & flag) + { + ptr[i] = -ptr[i]; + } + } + } + else if (bottom_top_blob.elemsize == 2) + { + int16_t* ptr = (int16_t*)bottom_top_blob.data; + const int16_t flag = 1 << 15; + for (size_t i = info->start_index; i < info->end_index; i++) + { + if (ptr[i] & flag) + { + ptr[i] = -ptr[i]; + } + } + } + else + { + float* ptr = (float*)bottom_top_blob.data; + for (size_t i = info->start_index; i < info->end_index; i++) + { + if (ptr[i] < 0) + { + ptr[i] = -ptr[i]; + } + } + } + + return 0; + } +}; + +static int test_thread(const ncnn::Mat& a) +{ + ncnn::ParamDict pd; + + std::vector weights(0); + + int ret = test_layer("TestLayer", pd, weights, a); + if (ret != 0) + { + fprintf(stderr, "test_thread failed a.dims=%d a=(%d %d %d %d)\n", a.dims, a.w, a.h, a.d, a.c); + } + + return ret; +} + +static int test_thread_0(){ + return 0 + || test_thread(RandomMat(5,6,7,24)) + || test_thread(RandomMat(5,6,7,12)) + || test_thread(RandomMat(5,6,7,13)); + +} + +static int test_thread_1(){ + return 0 + || test_thread(RandomMat(5,7,24)) + || test_thread(RandomMat(5,6,24)) + || test_thread(RandomMat(7,9,24)); +} + +static int test_thread_2(){ + return 0 + || test_thread(RandomMat(7,12)) + || test_thread(RandomMat(5,12)) + || test_thread(RandomMat(9,12)); +} + +static int test_thread_3(){ + return 0 + || test_thread(RandomMat(7)) + || test_thread(RandomMat(128)) + || test_thread(RandomMat(256)); +} + +int main() +{ + return 0 + || test_thread_0() + || test_thread_1() + || test_thread_2() + || test_thread_3(); +} \ No newline at end of file From 75e90b47c451170902f6829a21ad4c7674f2b968 Mon Sep 17 00:00:00 2001 From: DaChengTechnology <12637177+DaChengTechnology@users.noreply.github.com> Date: Tue, 29 Jul 2025 13:21:50 +0000 Subject: [PATCH 11/11] apply code-format changes --- src/cpu.cpp | 30 +++++++++++++++--------------- src/thread.cpp | 2 +- tests/test_thread.cpp | 39 +++++++++++++++++++++------------------ 3 files changed, 37 insertions(+), 34 deletions(-) diff --git a/src/cpu.cpp b/src/cpu.cpp index 93680f46b..f623f3b1a 100644 --- a/src/cpu.cpp +++ b/src/cpu.cpp @@ -1424,21 +1424,21 @@ static std::vector get_max_freq_mhz() static int set_sched_affinity(const ncnn::CpuSet& thread_affinity_mask) { - #ifdef _WIN32 - GROUP_AFFINITY groupAffinity; - ZeroMemory(&groupAffinity, sizeof(groupAffinity)); - groupAffinity.Group = static_cast(thread_affinity_mask.cpu_group); - groupAffinity.Mask = thread_affinity_mask.mask; - - SetThreadGroupAffinity(GetCurrentThread(), &groupAffinity, NULL); - #else +#ifdef _WIN32 + GROUP_AFFINITY groupAffinity; + ZeroMemory(&groupAffinity, sizeof(groupAffinity)); + groupAffinity.Group = static_cast(thread_affinity_mask.cpu_group); + groupAffinity.Mask = thread_affinity_mask.mask; + + SetThreadGroupAffinity(GetCurrentThread(), &groupAffinity, NULL); +#else DWORD_PTR prev_mask = SetThreadAffinityMask(GetCurrentThread(), thread_affinity_mask.mask); if (prev_mask == 0) { NCNN_LOGE("SetThreadAffinityMask failed %d", GetLastError()); return -1; } - #endif +#endif return 0; } @@ -2275,14 +2275,14 @@ CpuSet::CpuSet() void CpuSet::enable(int cpu) { - cpu_group = cpu/64; - mask |= ((ULONG_PTR)1 << (cpu-cpu_group*64)); + cpu_group = cpu / 64; + mask |= ((ULONG_PTR)1 << (cpu - cpu_group * 64)); } void CpuSet::disable(int cpu) { - cpu_group = cpu/64; - mask &= ~((ULONG_PTR)1 << (cpu-cpu_group*64)); + cpu_group = cpu / 64; + mask &= ~((ULONG_PTR)1 << (cpu - cpu_group * 64)); } void CpuSet::disable_all() @@ -2293,9 +2293,9 @@ void CpuSet::disable_all() bool CpuSet::is_enabled(int cpu) const { - if (cpu_group != cpu/64) + if (cpu_group != cpu / 64) return false; - return mask & ((ULONG_PTR)1 << (cpu-cpu_group*64)); + return mask & ((ULONG_PTR)1 << (cpu - cpu_group * 64)); } int CpuSet::num_enabled() const diff --git a/src/thread.cpp b/src/thread.cpp index e3df4ef64..779f46c87 100644 --- a/src/thread.cpp +++ b/src/thread.cpp @@ -64,7 +64,7 @@ void MutilThread::join(std::vector& mats) Mat mat = mats[0]; CoreInfo cur = ThreadInfo::get()->getCurrentCore(); std::vector cores; - ThreadInfo::get()->getAllCore(cores); + ThreadInfo::get()->getAllCore(cores); std::vector handles; ThreadInfoExc* curinfo = nullptr; size_t workersize = ((mat.w * mat.h * mat.d) / m_opt.num_threads + 1) * mat.c * mat.elemsize; diff --git a/tests/test_thread.cpp b/tests/test_thread.cpp index 9b96169df..883d1e56b 100644 --- a/tests/test_thread.cpp +++ b/tests/test_thread.cpp @@ -73,38 +73,41 @@ static int test_thread(const ncnn::Mat& a) return ret; } -static int test_thread_0(){ +static int test_thread_0() +{ return 0 - || test_thread(RandomMat(5,6,7,24)) - || test_thread(RandomMat(5,6,7,12)) - || test_thread(RandomMat(5,6,7,13)); - + || test_thread(RandomMat(5, 6, 7, 24)) + || test_thread(RandomMat(5, 6, 7, 12)) + || test_thread(RandomMat(5, 6, 7, 13)); } -static int test_thread_1(){ +static int test_thread_1() +{ return 0 - || test_thread(RandomMat(5,7,24)) - || test_thread(RandomMat(5,6,24)) - || test_thread(RandomMat(7,9,24)); + || test_thread(RandomMat(5, 7, 24)) + || test_thread(RandomMat(5, 6, 24)) + || test_thread(RandomMat(7, 9, 24)); } -static int test_thread_2(){ +static int test_thread_2() +{ return 0 - || test_thread(RandomMat(7,12)) - || test_thread(RandomMat(5,12)) - || test_thread(RandomMat(9,12)); + || test_thread(RandomMat(7, 12)) + || test_thread(RandomMat(5, 12)) + || test_thread(RandomMat(9, 12)); } -static int test_thread_3(){ +static int test_thread_3() +{ return 0 - || test_thread(RandomMat(7)) - || test_thread(RandomMat(128)) - || test_thread(RandomMat(256)); + || test_thread(RandomMat(7)) + || test_thread(RandomMat(128)) + || test_thread(RandomMat(256)); } int main() { - return 0 + return 0 || test_thread_0() || test_thread_1() || test_thread_2()