diff --git a/src/cpu.cpp b/src/cpu.cpp index 1ceae2644..49320bf94 100644 --- a/src/cpu.cpp +++ b/src/cpu.cpp @@ -142,6 +142,71 @@ static cpu_type_t g_hw_cputype = get_hw_cputype(); static cpu_subtype_t g_hw_cpusubtype = get_hw_cpusubtype(); #endif // __IOS__ +#if defined __ANDROID__ || defined __linux__ +CpuSet::CpuSet() +{ + disable_all(); +} + +void CpuSet::enable(int cpu) +{ + CPU_SET(cpu, &cpu_set); +} + +void CpuSet::disable(int cpu) +{ + CPU_CLR(cpu, &cpu_set); +} + +void CpuSet::disable_all() +{ + CPU_ZERO(&cpu_set); +} + +bool CpuSet::is_enabled(int cpu) const +{ + return CPU_ISSET(cpu, &cpu_set); +} + +int CpuSet::num_enabled() const +{ + int num_enabled = 0; + for (int i = 0; i < (int)sizeof(cpu_set_t) * 8; i++) + { + if (is_enabled(i)) + num_enabled++; + } + + return num_enabled; +} +#else // defined __ANDROID__ || defined __linux__ +CpuSet::CpuSet() +{ +} + +void CpuSet::enable(int /* cpu */) +{ +} + +void CpuSet::disable(int /* cpu */) +{ +} + +void CpuSet::disable_all() +{ +} + +bool CpuSet::is_enabled(int /* cpu */) const +{ + return true; +} + +int CpuSet::num_enabled() const +{ + return get_cpu_count(); +} +#endif // defined __ANDROID__ || defined __linux__ + int cpu_support_arm_neon() { #if defined __ANDROID__ || defined __linux__ @@ -290,11 +355,6 @@ static int get_cpucount() if (count < 1) count = 1; - if (count > (int)sizeof(size_t) * 8) - { - NCNN_LOGE("more than %d cpu detected, thread affinity may not work properly :(", (int)sizeof(size_t) * 8); - } - return count; } @@ -377,23 +437,8 @@ static int get_max_freq_khz(int cpuid) return max_freq_khz; } -static int set_sched_affinity(size_t thread_affinity_mask) +static int set_sched_affinity(const CpuSet& thread_affinity_mask) { - // cpu_set_t definition - // ref http://stackoverflow.com/questions/16319725/android-set-thread-affinity -#define NCNN_CPU_SETSIZE 1024 -#define __NCNN_NCPUBITS (8 * sizeof(unsigned long)) - typedef struct - { - unsigned long __bits[NCNN_CPU_SETSIZE / __NCNN_NCPUBITS]; - } cpu_set_t; - -#define NCNN_CPU_SET(cpu, cpusetp) \ - ((cpusetp)->__bits[(cpu) / __NCNN_NCPUBITS] |= (1UL << ((cpu) % __NCNN_NCPUBITS))) - -#define NCNN_CPU_ZERO(cpusetp) \ - memset((cpusetp), 0, sizeof(cpu_set_t)) - // set affinity for thread #ifdef __GLIBC__ pid_t pid = syscall(SYS_gettid); @@ -404,17 +449,8 @@ static int set_sched_affinity(size_t thread_affinity_mask) pid_t pid = gettid(); #endif #endif - cpu_set_t mask; - NCNN_CPU_ZERO(&mask); - for (int i = 0; i < (int)sizeof(size_t) * 8; i++) - { - if (thread_affinity_mask & (1ul << i)) - { - NCNN_CPU_SET(i, &mask); - } - } - int syscallret = syscall(__NR_sched_setaffinity, pid, sizeof(mask), &mask); + int syscallret = syscall(__NR_sched_setaffinity, pid, sizeof(cpu_set_t), &thread_affinity_mask.cpu_set); if (syscallret) { NCNN_LOGE("syscall error %d", syscallret); @@ -440,7 +476,7 @@ int set_cpu_powersave(int powersave) return -1; } - size_t thread_affinity_mask = get_cpu_thread_affinity_mask(powersave); + const CpuSet& thread_affinity_mask = get_cpu_thread_affinity_mask(powersave); int ret = set_cpu_thread_affinity(thread_affinity_mask); if (ret != 0) @@ -451,13 +487,13 @@ int set_cpu_powersave(int powersave) return 0; } -static size_t g_thread_affinity_mask_all = 0; -static size_t g_thread_affinity_mask_little = 0; -static size_t g_thread_affinity_mask_big = 0; +static CpuSet g_thread_affinity_mask_all; +static CpuSet g_thread_affinity_mask_little; +static CpuSet g_thread_affinity_mask_big; static int setup_thread_affinity_masks() { - g_thread_affinity_mask_all = (1ul << g_cpucount) - 1; + g_thread_affinity_mask_all.disable_all(); #if defined __ANDROID__ || defined __linux__ int max_freq_khz_min = INT_MAX; @@ -480,7 +516,7 @@ static int setup_thread_affinity_masks() int max_freq_khz_medium = (max_freq_khz_min + max_freq_khz_max) / 2; if (max_freq_khz_medium == max_freq_khz_max) { - g_thread_affinity_mask_little = 0; + g_thread_affinity_mask_little.disable_all(); g_thread_affinity_mask_big = g_thread_affinity_mask_all; return 0; } @@ -488,32 +524,22 @@ static int setup_thread_affinity_masks() for (int i = 0; i < g_cpucount; i++) { if (cpu_max_freq_khz[i] < max_freq_khz_medium) - g_thread_affinity_mask_little |= (1ul << i); + g_thread_affinity_mask_little.enable(i); else - g_thread_affinity_mask_big |= (1ul << i); + g_thread_affinity_mask_big.enable(i); } #else // TODO implement me for other platforms - g_thread_affinity_mask_little = 0; + g_thread_affinity_mask_little.disable_all(); g_thread_affinity_mask_big = g_thread_affinity_mask_all; #endif return 0; } -size_t get_cpu_thread_affinity_mask(int powersave) +const CpuSet& get_cpu_thread_affinity_mask(int powersave) { - if (g_thread_affinity_mask_all == 0) - { - setup_thread_affinity_masks(); - } - - if (g_thread_affinity_mask_little == 0) - { - // SMP cpu powersave not supported - // fallback to all cores anyway - return g_thread_affinity_mask_all; - } + setup_thread_affinity_masks(); if (powersave == 0) return g_thread_affinity_mask_all; @@ -530,15 +556,10 @@ size_t get_cpu_thread_affinity_mask(int powersave) return g_thread_affinity_mask_all; } -int set_cpu_thread_affinity(size_t thread_affinity_mask) +int set_cpu_thread_affinity(const CpuSet& thread_affinity_mask) { #if defined __ANDROID__ || defined __linux__ - int num_threads = 0; - for (int i = 0; i < (int)sizeof(size_t) * 8; i++) - { - if (thread_affinity_mask & (1ul << i)) - num_threads++; - } + int num_threads = thread_affinity_mask.num_enabled(); #ifdef _OPENMP // set affinity for each thread diff --git a/src/cpu.h b/src/cpu.h index 8c8c977b0..9877dbf52 100644 --- a/src/cpu.h +++ b/src/cpu.h @@ -17,8 +17,28 @@ #include +#if defined __ANDROID__ || defined __linux__ +#include // cpu_set_t +#endif + namespace ncnn { +class CpuSet +{ +public: + CpuSet(); + void enable(int cpu); + void disable(int cpu); + void disable_all(); + bool is_enabled(int cpu) const; + int num_enabled() const; + +public: +#if defined __ANDROID__ || defined __linux__ + cpu_set_t cpu_set; +#endif +}; + // test optional cpu features // neon = armv7 neon or aarch64 asimd int cpu_support_arm_neon(); @@ -45,10 +65,10 @@ int get_cpu_powersave(); int set_cpu_powersave(int powersave); // convenient wrapper -size_t get_cpu_thread_affinity_mask(int powersave); +const CpuSet& get_cpu_thread_affinity_mask(int powersave); // set explicit thread affinity -int set_cpu_thread_affinity(size_t thread_affinity_mask); +int set_cpu_thread_affinity(const CpuSet& thread_affinity_mask); // misc function wrapper for openmp routines int get_omp_num_threads();