You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

cpu.cpp 19 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850
  1. // Tencent is pleased to support the open source community by making ncnn available.
  2. //
  3. // Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved.
  4. //
  5. // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
  6. // in compliance with the License. You may obtain a copy of the License at
  7. //
  8. // https://opensource.org/licenses/BSD-3-Clause
  9. //
  10. // Unless required by applicable law or agreed to in writing, software distributed
  11. // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
  12. // CONDITIONS OF ANY KIND, either express or implied. See the License for the
  13. // specific language governing permissions and limitations under the License.
  14. #include "cpu.h"
  15. #include "platform.h"
  16. #include <limits.h>
  17. #include <stdio.h>
  18. #include <string.h>
  19. #ifdef _OPENMP
  20. #if NCNN_SIMPLEOMP
  21. #include "simpleomp.h"
  22. #else
  23. #include <omp.h>
  24. #endif
  25. #endif
  26. #ifdef _MSC_VER
  27. #include <intrin.h> // __cpuid()
  28. #include <immintrin.h> // _xgetbv()
  29. #endif
  30. #ifdef __EMSCRIPTEN__
  31. #include <emscripten/threading.h>
  32. #endif
  33. #if defined __ANDROID__ || defined __linux__
  34. #include <stdint.h>
  35. #include <sys/syscall.h>
  36. #include <unistd.h>
  37. #endif
  38. #if __APPLE__
  39. #include <mach/mach.h>
  40. #include <mach/machine.h>
  41. #include <mach/thread_act.h>
  42. #include <sys/sysctl.h>
  43. #include <sys/types.h>
  44. #include "TargetConditionals.h"
  45. #if TARGET_OS_IPHONE
  46. #define __IOS__ 1
  47. #endif
  48. // define missing cpu model for old sdk
  49. #ifndef CPUFAMILY_ARM_HURRICANE
  50. #define CPUFAMILY_ARM_HURRICANE 0x67ceee93
  51. #endif
  52. #ifndef CPUFAMILY_ARM_MONSOON_MISTRAL
  53. #define CPUFAMILY_ARM_MONSOON_MISTRAL 0xe81e7ef6
  54. #endif
  55. #ifndef CPUFAMILY_ARM_VORTEX_TEMPEST
  56. #define CPUFAMILY_ARM_VORTEX_TEMPEST 0x07d34b9f
  57. #endif
  58. #ifndef CPUFAMILY_ARM_LIGHTNING_THUNDER
  59. #define CPUFAMILY_ARM_LIGHTNING_THUNDER 0x462504d2
  60. #endif
  61. #ifndef CPUFAMILY_ARM_FIRESTORM_ICESTORM
  62. #define CPUFAMILY_ARM_FIRESTORM_ICESTORM 0x1b588bb3
  63. #endif
  64. #endif
  65. namespace ncnn {
  66. #if defined __ANDROID__ || defined __linux__
  67. // extract the ELF HW capabilities bitmap from /proc/self/auxv
  68. static unsigned int get_elf_hwcap_from_proc_self_auxv()
  69. {
  70. FILE* fp = fopen("/proc/self/auxv", "rb");
  71. if (!fp)
  72. {
  73. return 0;
  74. }
  75. #define AT_HWCAP 16
  76. #define AT_HWCAP2 26
  77. #if __aarch64__
  78. struct
  79. {
  80. uint64_t tag;
  81. uint64_t value;
  82. } entry;
  83. #else
  84. struct
  85. {
  86. unsigned int tag;
  87. unsigned int value;
  88. } entry;
  89. #endif
  90. unsigned int result = 0;
  91. while (!feof(fp))
  92. {
  93. int nread = fread((char*)&entry, sizeof(entry), 1, fp);
  94. if (nread != 1)
  95. break;
  96. if (entry.tag == 0 && entry.value == 0)
  97. break;
  98. if (entry.tag == AT_HWCAP)
  99. {
  100. result = entry.value;
  101. break;
  102. }
  103. }
  104. fclose(fp);
  105. return result;
  106. }
  107. static unsigned int g_hwcaps = get_elf_hwcap_from_proc_self_auxv();
  108. #if __aarch64__
  109. // from arch/arm64/include/uapi/asm/hwcap.h
  110. #define HWCAP_ASIMD (1 << 1)
  111. #define HWCAP_ASIMDHP (1 << 10)
  112. #else
  113. // from arch/arm/include/uapi/asm/hwcap.h
  114. #define HWCAP_NEON (1 << 12)
  115. #define HWCAP_VFPv4 (1 << 16)
  116. #endif
  117. #endif // defined __ANDROID__ || defined __linux__
  118. #if __APPLE__
  119. static unsigned int get_hw_cpufamily()
  120. {
  121. unsigned int value = 0;
  122. size_t len = sizeof(value);
  123. sysctlbyname("hw.cpufamily", &value, &len, NULL, 0);
  124. return value;
  125. }
  126. static cpu_type_t get_hw_cputype()
  127. {
  128. cpu_type_t value = 0;
  129. size_t len = sizeof(value);
  130. sysctlbyname("hw.cputype", &value, &len, NULL, 0);
  131. return value;
  132. }
  133. static cpu_subtype_t get_hw_cpusubtype()
  134. {
  135. cpu_subtype_t value = 0;
  136. size_t len = sizeof(value);
  137. sysctlbyname("hw.cpusubtype", &value, &len, NULL, 0);
  138. return value;
  139. }
  140. static unsigned int g_hw_cpufamily = get_hw_cpufamily();
  141. static cpu_type_t g_hw_cputype = get_hw_cputype();
  142. static cpu_subtype_t g_hw_cpusubtype = get_hw_cpusubtype();
  143. #endif // __APPLE__
  144. #if defined __ANDROID__ || defined __linux__
  145. CpuSet::CpuSet()
  146. {
  147. disable_all();
  148. }
  149. void CpuSet::enable(int cpu)
  150. {
  151. CPU_SET(cpu, &cpu_set);
  152. }
  153. void CpuSet::disable(int cpu)
  154. {
  155. CPU_CLR(cpu, &cpu_set);
  156. }
  157. void CpuSet::disable_all()
  158. {
  159. CPU_ZERO(&cpu_set);
  160. }
  161. bool CpuSet::is_enabled(int cpu) const
  162. {
  163. return CPU_ISSET(cpu, &cpu_set);
  164. }
  165. int CpuSet::num_enabled() const
  166. {
  167. int num_enabled = 0;
  168. for (int i = 0; i < (int)sizeof(cpu_set_t) * 8; i++)
  169. {
  170. if (is_enabled(i))
  171. num_enabled++;
  172. }
  173. return num_enabled;
  174. }
  175. #elif __APPLE__
  176. CpuSet::CpuSet()
  177. {
  178. disable_all();
  179. }
  180. void CpuSet::enable(int cpu)
  181. {
  182. policy |= (1 << cpu);
  183. }
  184. void CpuSet::disable(int cpu)
  185. {
  186. policy &= ~(1 << cpu);
  187. }
  188. void CpuSet::disable_all()
  189. {
  190. policy = 0;
  191. }
  192. bool CpuSet::is_enabled(int cpu) const
  193. {
  194. return policy & (1 << cpu);
  195. }
  196. int CpuSet::num_enabled() const
  197. {
  198. int num_enabled = 0;
  199. for (int i = 0; i < (int)sizeof(policy) * 8; i++)
  200. {
  201. if (is_enabled(i))
  202. num_enabled++;
  203. }
  204. return num_enabled;
  205. }
  206. #else
  207. CpuSet::CpuSet()
  208. {
  209. }
  210. void CpuSet::enable(int /* cpu */)
  211. {
  212. }
  213. void CpuSet::disable(int /* cpu */)
  214. {
  215. }
  216. void CpuSet::disable_all()
  217. {
  218. }
  219. bool CpuSet::is_enabled(int /* cpu */) const
  220. {
  221. return true;
  222. }
  223. int CpuSet::num_enabled() const
  224. {
  225. return get_cpu_count();
  226. }
  227. #endif
  228. int cpu_support_arm_neon()
  229. {
  230. #if defined __ANDROID__ || defined __linux__
  231. #if __aarch64__
  232. return g_hwcaps & HWCAP_ASIMD;
  233. #else
  234. return g_hwcaps & HWCAP_NEON;
  235. #endif
  236. #elif __APPLE__
  237. #if __aarch64__
  238. return g_hw_cputype == CPU_TYPE_ARM64;
  239. #else
  240. return g_hw_cputype == CPU_TYPE_ARM && g_hw_cpusubtype > CPU_SUBTYPE_ARM_V7;
  241. #endif
  242. #else
  243. return 0;
  244. #endif
  245. }
  246. int cpu_support_arm_vfpv4()
  247. {
  248. #if defined __ANDROID__ || defined __linux__
  249. #if __aarch64__
  250. // neon always enable fma and fp16
  251. return g_hwcaps & HWCAP_ASIMD;
  252. #else
  253. return g_hwcaps & HWCAP_VFPv4;
  254. #endif
  255. #elif __APPLE__
  256. #if __aarch64__
  257. return g_hw_cputype == CPU_TYPE_ARM64;
  258. #else
  259. return g_hw_cputype == CPU_TYPE_ARM && g_hw_cpusubtype > CPU_SUBTYPE_ARM_V7S;
  260. #endif
  261. #else
  262. return 0;
  263. #endif
  264. }
  265. int cpu_support_arm_asimdhp()
  266. {
  267. #if defined __ANDROID__ || defined __linux__
  268. #if __aarch64__
  269. return g_hwcaps & HWCAP_ASIMDHP;
  270. #else
  271. return 0;
  272. #endif
  273. #elif __APPLE__
  274. #if __aarch64__
  275. return g_hw_cpufamily == CPUFAMILY_ARM_MONSOON_MISTRAL || g_hw_cpufamily == CPUFAMILY_ARM_VORTEX_TEMPEST || g_hw_cpufamily == CPUFAMILY_ARM_LIGHTNING_THUNDER || g_hw_cpufamily == CPUFAMILY_ARM_FIRESTORM_ICESTORM;
  276. #else
  277. return 0;
  278. #endif
  279. #else
  280. return 0;
  281. #endif
  282. }
  283. int cpu_support_x86_avx2()
  284. {
  285. #if (_M_AMD64 || __x86_64__) || (_M_IX86 || __i386__)
  286. #if defined(_MSC_VER)
  287. // TODO move to init function
  288. int cpu_info[4];
  289. __cpuid(cpu_info, 0);
  290. int nIds = cpu_info[0];
  291. if (nIds < 7)
  292. return 0;
  293. __cpuid(cpu_info, 1);
  294. // check AVX XSAVE OSXSAVE
  295. if (!(cpu_info[2] & 0x10000000) || !(cpu_info[2] & 0x04000000) || !(cpu_info[2] & 0x08000000))
  296. return 0;
  297. // check XSAVE enabled by kernel
  298. if ((_xgetbv(0) & 6) != 6)
  299. return 0;
  300. __cpuid(cpu_info, 7);
  301. return cpu_info[1] & 0x00000020;
  302. #elif defined(__clang__)
  303. #if __clang_major__ >= 6
  304. __builtin_cpu_init();
  305. #endif
  306. return __builtin_cpu_supports("avx2");
  307. #elif __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)
  308. __builtin_cpu_init();
  309. return __builtin_cpu_supports("avx2");
  310. #else
  311. // TODO: other x86 compilers checking avx2 here
  312. NCNN_LOGE("AVX2 detection method is unknown for current compiler");
  313. return 0;
  314. #endif
  315. #else
  316. return 0;
  317. #endif
  318. }
  319. static int get_cpucount()
  320. {
  321. int count = 0;
  322. #ifdef __EMSCRIPTEN__
  323. if (emscripten_has_threading_support())
  324. count = emscripten_num_logical_cores();
  325. else
  326. count = 1;
  327. #elif defined __ANDROID__ || defined __linux__
  328. // get cpu count from /proc/cpuinfo
  329. FILE* fp = fopen("/proc/cpuinfo", "rb");
  330. if (!fp)
  331. return 1;
  332. char line[1024];
  333. while (!feof(fp))
  334. {
  335. char* s = fgets(line, 1024, fp);
  336. if (!s)
  337. break;
  338. if (memcmp(line, "processor", 9) == 0)
  339. {
  340. count++;
  341. }
  342. }
  343. fclose(fp);
  344. #elif __APPLE__
  345. size_t len = sizeof(count);
  346. sysctlbyname("hw.ncpu", &count, &len, NULL, 0);
  347. #else
  348. #ifdef _OPENMP
  349. count = omp_get_max_threads();
  350. #else
  351. count = 1;
  352. #endif // _OPENMP
  353. #endif
  354. if (count < 1)
  355. count = 1;
  356. return count;
  357. }
  358. static int g_cpucount = get_cpucount();
  359. int get_cpu_count()
  360. {
  361. return g_cpucount;
  362. }
  363. int get_little_cpu_count()
  364. {
  365. return get_cpu_thread_affinity_mask(1).num_enabled();
  366. }
  367. int get_big_cpu_count()
  368. {
  369. return get_cpu_thread_affinity_mask(2).num_enabled();
  370. }
  371. #if defined __ANDROID__ || defined __linux__
  372. static int get_max_freq_khz(int cpuid)
  373. {
  374. // first try, for all possible cpu
  375. char path[256];
  376. sprintf(path, "/sys/devices/system/cpu/cpufreq/stats/cpu%d/time_in_state", cpuid);
  377. FILE* fp = fopen(path, "rb");
  378. if (!fp)
  379. {
  380. // second try, for online cpu
  381. sprintf(path, "/sys/devices/system/cpu/cpu%d/cpufreq/stats/time_in_state", cpuid);
  382. fp = fopen(path, "rb");
  383. if (fp)
  384. {
  385. int max_freq_khz = 0;
  386. while (!feof(fp))
  387. {
  388. int freq_khz = 0;
  389. int nscan = fscanf(fp, "%d %*d", &freq_khz);
  390. if (nscan != 1)
  391. break;
  392. if (freq_khz > max_freq_khz)
  393. max_freq_khz = freq_khz;
  394. }
  395. fclose(fp);
  396. if (max_freq_khz != 0)
  397. return max_freq_khz;
  398. fp = NULL;
  399. }
  400. if (!fp)
  401. {
  402. // third try, for online cpu
  403. sprintf(path, "/sys/devices/system/cpu/cpu%d/cpufreq/cpuinfo_max_freq", cpuid);
  404. fp = fopen(path, "rb");
  405. if (!fp)
  406. return -1;
  407. int max_freq_khz = -1;
  408. int nscan = fscanf(fp, "%d", &max_freq_khz);
  409. if (nscan != 1)
  410. {
  411. NCNN_LOGE("fscanf cpuinfo_max_freq error %d", nscan);
  412. }
  413. fclose(fp);
  414. return max_freq_khz;
  415. }
  416. }
  417. int max_freq_khz = 0;
  418. while (!feof(fp))
  419. {
  420. int freq_khz = 0;
  421. int nscan = fscanf(fp, "%d %*d", &freq_khz);
  422. if (nscan != 1)
  423. break;
  424. if (freq_khz > max_freq_khz)
  425. max_freq_khz = freq_khz;
  426. }
  427. fclose(fp);
  428. return max_freq_khz;
  429. }
  430. static int set_sched_affinity(const CpuSet& thread_affinity_mask)
  431. {
  432. // set affinity for thread
  433. #if defined(__GLIBC__) || defined(__OHOS__)
  434. pid_t pid = syscall(SYS_gettid);
  435. #else
  436. #if defined(PI3) || (defined(__MUSL__) && __MUSL_MINOR__ <= 14)
  437. pid_t pid = getpid();
  438. #else
  439. pid_t pid = gettid();
  440. #endif
  441. #endif
  442. int syscallret = syscall(__NR_sched_setaffinity, pid, sizeof(cpu_set_t), &thread_affinity_mask.cpu_set);
  443. if (syscallret)
  444. {
  445. NCNN_LOGE("syscall error %d", syscallret);
  446. return -1;
  447. }
  448. return 0;
  449. }
  450. #endif // defined __ANDROID__ || defined __linux__
  451. #if __APPLE__
  452. static int set_sched_affinity(const CpuSet& thread_affinity_mask)
  453. {
  454. // https://developer.apple.com/library/archive/releasenotes/Performance/RN-AffinityAPI/index.html
  455. // http://www.hybridkernel.com/2015/01/18/binding_threads_to_cores_osx.html
  456. // https://gist.github.com/Coneko/4234842
  457. // This is a quite outdated document. Apple will not allow developers to set CPU affinity.
  458. // In OS X 10.5 it worked, later it became a suggestion to OS X, then in 10.10 or so (as well in later ones), macOS will ignore any affinity settings.
  459. // see https://github.com/Tencent/ncnn/pull/2335#discussion_r528233919 --- AmeAkio
  460. int affinity_tag = THREAD_AFFINITY_TAG_NULL;
  461. for (int i = 0; i < (int)sizeof(thread_affinity_mask.policy) * 8; i++)
  462. {
  463. if (thread_affinity_mask.is_enabled(i))
  464. {
  465. affinity_tag = i + 1;
  466. break;
  467. }
  468. }
  469. mach_port_t tid = pthread_mach_thread_np(pthread_self());
  470. thread_affinity_policy_data_t policy_data;
  471. policy_data.affinity_tag = affinity_tag;
  472. int ret = thread_policy_set(tid, THREAD_AFFINITY_POLICY, (thread_policy_t)&policy_data, THREAD_AFFINITY_POLICY_COUNT);
  473. if (ret && ret != KERN_NOT_SUPPORTED)
  474. {
  475. NCNN_LOGE("thread_policy_set error %d", ret);
  476. return -1;
  477. }
  478. return 0;
  479. }
  480. #endif // __APPLE__
  481. static int g_powersave = 0;
  482. int get_cpu_powersave()
  483. {
  484. return g_powersave;
  485. }
  486. int set_cpu_powersave(int powersave)
  487. {
  488. if (powersave < 0 || powersave > 2)
  489. {
  490. NCNN_LOGE("powersave %d not supported", powersave);
  491. return -1;
  492. }
  493. const CpuSet& thread_affinity_mask = get_cpu_thread_affinity_mask(powersave);
  494. int ret = set_cpu_thread_affinity(thread_affinity_mask);
  495. if (ret != 0)
  496. return ret;
  497. g_powersave = powersave;
  498. return 0;
  499. }
  500. static CpuSet g_thread_affinity_mask_all;
  501. static CpuSet g_thread_affinity_mask_little;
  502. static CpuSet g_thread_affinity_mask_big;
  503. static int setup_thread_affinity_masks()
  504. {
  505. g_thread_affinity_mask_all.disable_all();
  506. #if defined __ANDROID__ || defined __linux__
  507. int max_freq_khz_min = INT_MAX;
  508. int max_freq_khz_max = 0;
  509. std::vector<int> cpu_max_freq_khz(g_cpucount);
  510. for (int i = 0; i < g_cpucount; i++)
  511. {
  512. int max_freq_khz = get_max_freq_khz(i);
  513. // NCNN_LOGE("%d max freq = %d khz", i, max_freq_khz);
  514. cpu_max_freq_khz[i] = max_freq_khz;
  515. if (max_freq_khz > max_freq_khz_max)
  516. max_freq_khz_max = max_freq_khz;
  517. if (max_freq_khz < max_freq_khz_min)
  518. max_freq_khz_min = max_freq_khz;
  519. }
  520. int max_freq_khz_medium = (max_freq_khz_min + max_freq_khz_max) / 2;
  521. if (max_freq_khz_medium == max_freq_khz_max)
  522. {
  523. g_thread_affinity_mask_little.disable_all();
  524. g_thread_affinity_mask_big = g_thread_affinity_mask_all;
  525. return 0;
  526. }
  527. for (int i = 0; i < g_cpucount; i++)
  528. {
  529. if (cpu_max_freq_khz[i] < max_freq_khz_medium)
  530. g_thread_affinity_mask_little.enable(i);
  531. else
  532. g_thread_affinity_mask_big.enable(i);
  533. }
  534. #elif __APPLE__
  535. // affinity info from cpu model
  536. if (g_hw_cpufamily == CPUFAMILY_ARM_MONSOON_MISTRAL)
  537. {
  538. // 2 + 4
  539. g_thread_affinity_mask_big.enable(0);
  540. g_thread_affinity_mask_big.enable(1);
  541. g_thread_affinity_mask_little.enable(2);
  542. g_thread_affinity_mask_little.enable(3);
  543. g_thread_affinity_mask_little.enable(4);
  544. g_thread_affinity_mask_little.enable(5);
  545. }
  546. else if (g_hw_cpufamily == CPUFAMILY_ARM_VORTEX_TEMPEST || g_hw_cpufamily == CPUFAMILY_ARM_LIGHTNING_THUNDER || g_hw_cpufamily == CPUFAMILY_ARM_FIRESTORM_ICESTORM)
  547. {
  548. // 2 + 4 or 4 + 4
  549. if (get_cpu_count() == 6)
  550. {
  551. g_thread_affinity_mask_big.enable(0);
  552. g_thread_affinity_mask_big.enable(1);
  553. g_thread_affinity_mask_little.enable(2);
  554. g_thread_affinity_mask_little.enable(3);
  555. g_thread_affinity_mask_little.enable(4);
  556. g_thread_affinity_mask_little.enable(5);
  557. }
  558. else
  559. {
  560. g_thread_affinity_mask_big.enable(0);
  561. g_thread_affinity_mask_big.enable(1);
  562. g_thread_affinity_mask_big.enable(2);
  563. g_thread_affinity_mask_big.enable(3);
  564. g_thread_affinity_mask_little.enable(4);
  565. g_thread_affinity_mask_little.enable(5);
  566. g_thread_affinity_mask_little.enable(6);
  567. g_thread_affinity_mask_little.enable(7);
  568. }
  569. }
  570. else
  571. {
  572. // smp models
  573. g_thread_affinity_mask_little.disable_all();
  574. g_thread_affinity_mask_big = g_thread_affinity_mask_all;
  575. }
  576. #else
  577. // TODO implement me for other platforms
  578. g_thread_affinity_mask_little.disable_all();
  579. g_thread_affinity_mask_big = g_thread_affinity_mask_all;
  580. #endif
  581. return 0;
  582. }
  583. const CpuSet& get_cpu_thread_affinity_mask(int powersave)
  584. {
  585. setup_thread_affinity_masks();
  586. if (powersave == 0)
  587. return g_thread_affinity_mask_all;
  588. if (powersave == 1)
  589. return g_thread_affinity_mask_little;
  590. if (powersave == 2)
  591. return g_thread_affinity_mask_big;
  592. NCNN_LOGE("powersave %d not supported", powersave);
  593. // fallback to all cores anyway
  594. return g_thread_affinity_mask_all;
  595. }
  596. int set_cpu_thread_affinity(const CpuSet& thread_affinity_mask)
  597. {
  598. #if defined __ANDROID__ || defined __linux__
  599. int num_threads = thread_affinity_mask.num_enabled();
  600. #ifdef _OPENMP
  601. // set affinity for each thread
  602. set_omp_num_threads(num_threads);
  603. std::vector<int> ssarets(num_threads, 0);
  604. #pragma omp parallel for num_threads(num_threads)
  605. for (int i = 0; i < num_threads; i++)
  606. {
  607. ssarets[i] = set_sched_affinity(thread_affinity_mask);
  608. }
  609. for (int i = 0; i < num_threads; i++)
  610. {
  611. if (ssarets[i] != 0)
  612. return -1;
  613. }
  614. #else
  615. int ssaret = set_sched_affinity(thread_affinity_mask);
  616. if (ssaret != 0)
  617. return -1;
  618. #endif
  619. return 0;
  620. #elif __APPLE__
  621. #ifdef _OPENMP
  622. int num_threads = thread_affinity_mask.num_enabled();
  623. // set affinity for each thread
  624. set_omp_num_threads(num_threads);
  625. std::vector<int> ssarets(num_threads, 0);
  626. #pragma omp parallel for num_threads(num_threads)
  627. for (int i = 0; i < num_threads; i++)
  628. {
  629. // assign one core for each thread
  630. int core = -1 - i;
  631. for (int j = 0; j < (int)sizeof(thread_affinity_mask.policy) * 8; j++)
  632. {
  633. if (thread_affinity_mask.is_enabled(j))
  634. {
  635. if (core == -1)
  636. {
  637. core = j;
  638. break;
  639. }
  640. else
  641. {
  642. core++;
  643. }
  644. }
  645. }
  646. CpuSet this_thread_affinity_mask;
  647. if (core != -1 - i)
  648. {
  649. this_thread_affinity_mask.enable(core);
  650. }
  651. ssarets[i] = set_sched_affinity(this_thread_affinity_mask);
  652. }
  653. for (int i = 0; i < num_threads; i++)
  654. {
  655. if (ssarets[i] != 0)
  656. return -1;
  657. }
  658. #else
  659. int ssaret = set_sched_affinity(thread_affinity_mask);
  660. if (ssaret != 0)
  661. return -1;
  662. #endif
  663. return 0;
  664. #else
  665. // TODO
  666. (void)thread_affinity_mask;
  667. return -1;
  668. #endif
  669. }
  670. int get_omp_num_threads()
  671. {
  672. #ifdef _OPENMP
  673. return omp_get_num_threads();
  674. #else
  675. return 1;
  676. #endif
  677. }
  678. void set_omp_num_threads(int num_threads)
  679. {
  680. #ifdef _OPENMP
  681. omp_set_num_threads(num_threads);
  682. #else
  683. (void)num_threads;
  684. #endif
  685. }
  686. int get_omp_dynamic()
  687. {
  688. #ifdef _OPENMP
  689. return omp_get_dynamic();
  690. #else
  691. return 0;
  692. #endif
  693. }
  694. void set_omp_dynamic(int dynamic)
  695. {
  696. #ifdef _OPENMP
  697. omp_set_dynamic(dynamic);
  698. #else
  699. (void)dynamic;
  700. #endif
  701. }
  702. int get_omp_thread_num()
  703. {
  704. #ifdef _OPENMP
  705. return omp_get_thread_num();
  706. #else
  707. return 0;
  708. #endif
  709. }
  710. int get_kmp_blocktime()
  711. {
  712. #if defined(_OPENMP) && __clang__
  713. return kmp_get_blocktime();
  714. #else
  715. return 0;
  716. #endif
  717. }
  718. void set_kmp_blocktime(int time_ms)
  719. {
  720. #if defined(_OPENMP) && __clang__
  721. kmp_set_blocktime(time_ms);
  722. #else
  723. (void)time_ms;
  724. #endif
  725. }
  726. } // namespace ncnn