|
|
|
@@ -142,6 +142,71 @@ static cpu_type_t g_hw_cputype = get_hw_cputype(); |
|
|
|
static cpu_subtype_t g_hw_cpusubtype = get_hw_cpusubtype(); |
|
|
|
#endif // __IOS__ |
|
|
|
|
|
|
|
#if defined __ANDROID__ || defined __linux__ |
|
|
|
CpuSet::CpuSet() |
|
|
|
{ |
|
|
|
disable_all(); |
|
|
|
} |
|
|
|
|
|
|
|
void CpuSet::enable(int cpu) |
|
|
|
{ |
|
|
|
CPU_SET(cpu, &cpu_set); |
|
|
|
} |
|
|
|
|
|
|
|
void CpuSet::disable(int cpu) |
|
|
|
{ |
|
|
|
CPU_CLR(cpu, &cpu_set); |
|
|
|
} |
|
|
|
|
|
|
|
void CpuSet::disable_all() |
|
|
|
{ |
|
|
|
CPU_ZERO(&cpu_set); |
|
|
|
} |
|
|
|
|
|
|
|
bool CpuSet::is_enabled(int cpu) const |
|
|
|
{ |
|
|
|
return CPU_ISSET(cpu, &cpu_set); |
|
|
|
} |
|
|
|
|
|
|
|
int CpuSet::num_enabled() const |
|
|
|
{ |
|
|
|
int num_enabled = 0; |
|
|
|
for (int i = 0; i < (int)sizeof(cpu_set_t) * 8; i++) |
|
|
|
{ |
|
|
|
if (is_enabled(i)) |
|
|
|
num_enabled++; |
|
|
|
} |
|
|
|
|
|
|
|
return num_enabled; |
|
|
|
} |
|
|
|
#else // defined __ANDROID__ || defined __linux__ |
|
|
|
CpuSet::CpuSet() |
|
|
|
{ |
|
|
|
} |
|
|
|
|
|
|
|
void CpuSet::enable(int /* cpu */) |
|
|
|
{ |
|
|
|
} |
|
|
|
|
|
|
|
void CpuSet::disable(int /* cpu */) |
|
|
|
{ |
|
|
|
} |
|
|
|
|
|
|
|
void CpuSet::disable_all() |
|
|
|
{ |
|
|
|
} |
|
|
|
|
|
|
|
bool CpuSet::is_enabled(int /* cpu */) const |
|
|
|
{ |
|
|
|
return true; |
|
|
|
} |
|
|
|
|
|
|
|
int CpuSet::num_enabled() const |
|
|
|
{ |
|
|
|
return get_cpu_count(); |
|
|
|
} |
|
|
|
#endif // defined __ANDROID__ || defined __linux__ |
|
|
|
|
|
|
|
int cpu_support_arm_neon() |
|
|
|
{ |
|
|
|
#if defined __ANDROID__ || defined __linux__ |
|
|
|
@@ -290,11 +355,6 @@ static int get_cpucount() |
|
|
|
if (count < 1) |
|
|
|
count = 1; |
|
|
|
|
|
|
|
if (count > (int)sizeof(size_t) * 8) |
|
|
|
{ |
|
|
|
NCNN_LOGE("more than %d cpu detected, thread affinity may not work properly :(", (int)sizeof(size_t) * 8); |
|
|
|
} |
|
|
|
|
|
|
|
return count; |
|
|
|
} |
|
|
|
|
|
|
|
@@ -377,23 +437,8 @@ static int get_max_freq_khz(int cpuid) |
|
|
|
return max_freq_khz; |
|
|
|
} |
|
|
|
|
|
|
|
static int set_sched_affinity(size_t thread_affinity_mask) |
|
|
|
static int set_sched_affinity(const CpuSet& thread_affinity_mask) |
|
|
|
{ |
|
|
|
// cpu_set_t definition |
|
|
|
// ref http://stackoverflow.com/questions/16319725/android-set-thread-affinity |
|
|
|
#define NCNN_CPU_SETSIZE 1024 |
|
|
|
#define __NCNN_NCPUBITS (8 * sizeof(unsigned long)) |
|
|
|
typedef struct |
|
|
|
{ |
|
|
|
unsigned long __bits[NCNN_CPU_SETSIZE / __NCNN_NCPUBITS]; |
|
|
|
} cpu_set_t; |
|
|
|
|
|
|
|
#define NCNN_CPU_SET(cpu, cpusetp) \ |
|
|
|
((cpusetp)->__bits[(cpu) / __NCNN_NCPUBITS] |= (1UL << ((cpu) % __NCNN_NCPUBITS))) |
|
|
|
|
|
|
|
#define NCNN_CPU_ZERO(cpusetp) \ |
|
|
|
memset((cpusetp), 0, sizeof(cpu_set_t)) |
|
|
|
|
|
|
|
// set affinity for thread |
|
|
|
#ifdef __GLIBC__ |
|
|
|
pid_t pid = syscall(SYS_gettid); |
|
|
|
@@ -404,17 +449,8 @@ static int set_sched_affinity(size_t thread_affinity_mask) |
|
|
|
pid_t pid = gettid(); |
|
|
|
#endif |
|
|
|
#endif |
|
|
|
cpu_set_t mask; |
|
|
|
NCNN_CPU_ZERO(&mask); |
|
|
|
for (int i = 0; i < (int)sizeof(size_t) * 8; i++) |
|
|
|
{ |
|
|
|
if (thread_affinity_mask & (1ul << i)) |
|
|
|
{ |
|
|
|
NCNN_CPU_SET(i, &mask); |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
int syscallret = syscall(__NR_sched_setaffinity, pid, sizeof(mask), &mask); |
|
|
|
int syscallret = syscall(__NR_sched_setaffinity, pid, sizeof(cpu_set_t), &thread_affinity_mask.cpu_set); |
|
|
|
if (syscallret) |
|
|
|
{ |
|
|
|
NCNN_LOGE("syscall error %d", syscallret); |
|
|
|
@@ -440,7 +476,7 @@ int set_cpu_powersave(int powersave) |
|
|
|
return -1; |
|
|
|
} |
|
|
|
|
|
|
|
size_t thread_affinity_mask = get_cpu_thread_affinity_mask(powersave); |
|
|
|
const CpuSet& thread_affinity_mask = get_cpu_thread_affinity_mask(powersave); |
|
|
|
|
|
|
|
int ret = set_cpu_thread_affinity(thread_affinity_mask); |
|
|
|
if (ret != 0) |
|
|
|
@@ -451,13 +487,13 @@ int set_cpu_powersave(int powersave) |
|
|
|
return 0; |
|
|
|
} |
|
|
|
|
|
|
|
static size_t g_thread_affinity_mask_all = 0; |
|
|
|
static size_t g_thread_affinity_mask_little = 0; |
|
|
|
static size_t g_thread_affinity_mask_big = 0; |
|
|
|
static CpuSet g_thread_affinity_mask_all; |
|
|
|
static CpuSet g_thread_affinity_mask_little; |
|
|
|
static CpuSet g_thread_affinity_mask_big; |
|
|
|
|
|
|
|
static int setup_thread_affinity_masks() |
|
|
|
{ |
|
|
|
g_thread_affinity_mask_all = (1ul << g_cpucount) - 1; |
|
|
|
g_thread_affinity_mask_all.disable_all(); |
|
|
|
|
|
|
|
#if defined __ANDROID__ || defined __linux__ |
|
|
|
int max_freq_khz_min = INT_MAX; |
|
|
|
@@ -480,7 +516,7 @@ static int setup_thread_affinity_masks() |
|
|
|
int max_freq_khz_medium = (max_freq_khz_min + max_freq_khz_max) / 2; |
|
|
|
if (max_freq_khz_medium == max_freq_khz_max) |
|
|
|
{ |
|
|
|
g_thread_affinity_mask_little = 0; |
|
|
|
g_thread_affinity_mask_little.disable_all(); |
|
|
|
g_thread_affinity_mask_big = g_thread_affinity_mask_all; |
|
|
|
return 0; |
|
|
|
} |
|
|
|
@@ -488,32 +524,22 @@ static int setup_thread_affinity_masks() |
|
|
|
for (int i = 0; i < g_cpucount; i++) |
|
|
|
{ |
|
|
|
if (cpu_max_freq_khz[i] < max_freq_khz_medium) |
|
|
|
g_thread_affinity_mask_little |= (1ul << i); |
|
|
|
g_thread_affinity_mask_little.enable(i); |
|
|
|
else |
|
|
|
g_thread_affinity_mask_big |= (1ul << i); |
|
|
|
g_thread_affinity_mask_big.enable(i); |
|
|
|
} |
|
|
|
#else |
|
|
|
// TODO implement me for other platforms |
|
|
|
g_thread_affinity_mask_little = 0; |
|
|
|
g_thread_affinity_mask_little.disable_all(); |
|
|
|
g_thread_affinity_mask_big = g_thread_affinity_mask_all; |
|
|
|
#endif |
|
|
|
|
|
|
|
return 0; |
|
|
|
} |
|
|
|
|
|
|
|
size_t get_cpu_thread_affinity_mask(int powersave) |
|
|
|
const CpuSet& get_cpu_thread_affinity_mask(int powersave) |
|
|
|
{ |
|
|
|
if (g_thread_affinity_mask_all == 0) |
|
|
|
{ |
|
|
|
setup_thread_affinity_masks(); |
|
|
|
} |
|
|
|
|
|
|
|
if (g_thread_affinity_mask_little == 0) |
|
|
|
{ |
|
|
|
// SMP cpu powersave not supported |
|
|
|
// fallback to all cores anyway |
|
|
|
return g_thread_affinity_mask_all; |
|
|
|
} |
|
|
|
setup_thread_affinity_masks(); |
|
|
|
|
|
|
|
if (powersave == 0) |
|
|
|
return g_thread_affinity_mask_all; |
|
|
|
@@ -530,15 +556,10 @@ size_t get_cpu_thread_affinity_mask(int powersave) |
|
|
|
return g_thread_affinity_mask_all; |
|
|
|
} |
|
|
|
|
|
|
|
int set_cpu_thread_affinity(size_t thread_affinity_mask) |
|
|
|
int set_cpu_thread_affinity(const CpuSet& thread_affinity_mask) |
|
|
|
{ |
|
|
|
#if defined __ANDROID__ || defined __linux__ |
|
|
|
int num_threads = 0; |
|
|
|
for (int i = 0; i < (int)sizeof(size_t) * 8; i++) |
|
|
|
{ |
|
|
|
if (thread_affinity_mask & (1ul << i)) |
|
|
|
num_threads++; |
|
|
|
} |
|
|
|
int num_threads = thread_affinity_mask.num_enabled(); |
|
|
|
|
|
|
|
#ifdef _OPENMP |
|
|
|
// set affinity for each thread |
|
|
|
|