You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

cpu.cpp 12 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549
  1. // Tencent is pleased to support the open source community by making ncnn available.
  2. //
  3. // Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved.
  4. //
  5. // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
  6. // in compliance with the License. You may obtain a copy of the License at
  7. //
  8. // https://opensource.org/licenses/BSD-3-Clause
  9. //
  10. // Unless required by applicable law or agreed to in writing, software distributed
  11. // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
  12. // CONDITIONS OF ANY KIND, either express or implied. See the License for the
  13. // specific language governing permissions and limitations under the License.
  14. #include "cpu.h"
  15. #include <stdio.h>
  16. #include <string.h>
  17. #include <vector>
  18. #ifdef _OPENMP
  19. #include <omp.h>
  20. #endif
  21. #ifdef __ANDROID__
  22. #include <sys/syscall.h>
  23. #include <unistd.h>
  24. #include <stdint.h>
  25. #endif
  26. #if __APPLE__
  27. #include "TargetConditionals.h"
  28. #if TARGET_OS_IPHONE
  29. #include <sys/types.h>
  30. #include <sys/sysctl.h>
  31. #include <mach/machine.h>
  32. #define __IOS__ 1
  33. #endif
  34. #endif
  35. namespace ncnn {
  36. #ifdef __ANDROID__
  37. // extract the ELF HW capabilities bitmap from /proc/self/auxv
  38. static unsigned int get_elf_hwcap_from_proc_self_auxv()
  39. {
  40. FILE* fp = fopen("/proc/self/auxv", "rb");
  41. if (!fp)
  42. {
  43. return 0;
  44. }
  45. #define AT_HWCAP 16
  46. #define AT_HWCAP2 26
  47. #if __aarch64__
  48. struct { uint64_t tag; uint64_t value; } entry;
  49. #else
  50. struct { unsigned int tag; unsigned int value; } entry;
  51. #endif
  52. unsigned int result = 0;
  53. while (!feof(fp))
  54. {
  55. int nread = fread((char*)&entry, sizeof(entry), 1, fp);
  56. if (nread != 1)
  57. break;
  58. if (entry.tag == 0 && entry.value == 0)
  59. break;
  60. if (entry.tag == AT_HWCAP)
  61. {
  62. result = entry.value;
  63. break;
  64. }
  65. }
  66. fclose(fp);
  67. return result;
  68. }
  69. static unsigned int g_hwcaps = get_elf_hwcap_from_proc_self_auxv();
  70. #if __aarch64__
  71. // from arch/arm64/include/uapi/asm/hwcap.h
  72. #define HWCAP_ASIMD (1 << 1)
  73. #define HWCAP_ASIMDHP (1 << 10)
  74. #else
  75. // from arch/arm/include/uapi/asm/hwcap.h
  76. #define HWCAP_NEON (1 << 12)
  77. #define HWCAP_VFPv4 (1 << 16)
  78. #endif
  79. #endif // __ANDROID__
  80. #if __IOS__
  81. static unsigned int get_hw_cpufamily()
  82. {
  83. unsigned int value = 0;
  84. size_t len = sizeof(value);
  85. sysctlbyname("hw.cpufamily", &value, &len, NULL, 0);
  86. return value;
  87. }
  88. static cpu_type_t get_hw_cputype()
  89. {
  90. cpu_type_t value = 0;
  91. size_t len = sizeof(value);
  92. sysctlbyname("hw.cputype", &value, &len, NULL, 0);
  93. return value;
  94. }
  95. static cpu_subtype_t get_hw_cpusubtype()
  96. {
  97. cpu_subtype_t value = 0;
  98. size_t len = sizeof(value);
  99. sysctlbyname("hw.cpusubtype", &value, &len, NULL, 0);
  100. return value;
  101. }
  102. static unsigned int g_hw_cpufamily = get_hw_cpufamily();
  103. static cpu_type_t g_hw_cputype = get_hw_cputype();
  104. static cpu_subtype_t g_hw_cpusubtype = get_hw_cpusubtype();
  105. #endif // __IOS__
  106. int cpu_support_arm_neon()
  107. {
  108. #ifdef __ANDROID__
  109. #if __aarch64__
  110. return g_hwcaps & HWCAP_ASIMD;
  111. #else
  112. return g_hwcaps & HWCAP_NEON;
  113. #endif
  114. #elif __IOS__
  115. #if __aarch64__
  116. return g_hw_cputype == CPU_TYPE_ARM64;
  117. #else
  118. return g_hw_cputype == CPU_TYPE_ARM && g_hw_cpusubtype > CPU_SUBTYPE_ARM_V7;
  119. #endif
  120. #else
  121. return 0;
  122. #endif
  123. }
  124. int cpu_support_arm_vfpv4()
  125. {
  126. #ifdef __ANDROID__
  127. #if __aarch64__
  128. // neon always enable fma and fp16
  129. return g_hwcaps & HWCAP_ASIMD;
  130. #else
  131. return g_hwcaps & HWCAP_VFPv4;
  132. #endif
  133. #elif __IOS__
  134. #if __aarch64__
  135. return g_hw_cputype == CPU_TYPE_ARM64;
  136. #else
  137. return g_hw_cputype == CPU_TYPE_ARM && g_hw_cpusubtype > CPU_SUBTYPE_ARM_V7S;
  138. #endif
  139. #else
  140. return 0;
  141. #endif
  142. }
  143. int cpu_support_arm_asimdhp()
  144. {
  145. #ifdef __ANDROID__
  146. #if __aarch64__
  147. return g_hwcaps & HWCAP_ASIMDHP;
  148. #else
  149. return 0;
  150. #endif
  151. #elif __IOS__
  152. #if __aarch64__
  153. #ifndef CPUFAMILY_ARM_HURRICANE
  154. #define CPUFAMILY_ARM_HURRICANE 0x67ceee93
  155. #endif
  156. #ifndef CPUFAMILY_ARM_MONSOON_MISTRAL
  157. #define CPUFAMILY_ARM_MONSOON_MISTRAL 0xe81e7ef6
  158. #endif
  159. return g_hw_cpufamily == CPUFAMILY_ARM_HURRICANE || g_hw_cpufamily == CPUFAMILY_ARM_MONSOON_MISTRAL;
  160. #else
  161. return 0;
  162. #endif
  163. #else
  164. return 0;
  165. #endif
  166. }
  167. static int get_cpucount()
  168. {
  169. #ifdef __ANDROID__
  170. // get cpu count from /proc/cpuinfo
  171. FILE* fp = fopen("/proc/cpuinfo", "rb");
  172. if (!fp)
  173. return 1;
  174. int count = 0;
  175. char line[1024];
  176. while (!feof(fp))
  177. {
  178. char* s = fgets(line, 1024, fp);
  179. if (!s)
  180. break;
  181. if (memcmp(line, "processor", 9) == 0)
  182. {
  183. count++;
  184. }
  185. }
  186. fclose(fp);
  187. if (count < 1)
  188. count = 1;
  189. return count;
  190. #elif __IOS__
  191. int count = 0;
  192. size_t len = sizeof(count);
  193. sysctlbyname("hw.ncpu", &count, &len, NULL, 0);
  194. if (count < 1)
  195. count = 1;
  196. return count;
  197. #else
  198. #ifdef _OPENMP
  199. return omp_get_max_threads();
  200. #else
  201. return 1;
  202. #endif // _OPENMP
  203. #endif
  204. }
  205. static int g_cpucount = get_cpucount();
  206. int get_cpu_count()
  207. {
  208. return g_cpucount;
  209. }
  210. #ifdef __ANDROID__
  211. static int get_max_freq_khz(int cpuid)
  212. {
  213. // first try, for all possible cpu
  214. char path[256];
  215. sprintf(path, "/sys/devices/system/cpu/cpufreq/stats/cpu%d/time_in_state", cpuid);
  216. FILE* fp = fopen(path, "rb");
  217. if (!fp)
  218. {
  219. // second try, for online cpu
  220. sprintf(path, "/sys/devices/system/cpu/cpu%d/cpufreq/stats/time_in_state", cpuid);
  221. fp = fopen(path, "rb");
  222. if (fp)
  223. {
  224. int max_freq_khz = 0;
  225. while (!feof(fp))
  226. {
  227. int freq_khz = 0;
  228. int nscan = fscanf(fp, "%d %*d", &freq_khz);
  229. if (nscan != 1)
  230. break;
  231. if (freq_khz > max_freq_khz)
  232. max_freq_khz = freq_khz;
  233. }
  234. fclose(fp);
  235. if (max_freq_khz != 0)
  236. return max_freq_khz;
  237. fp = NULL;
  238. }
  239. if (!fp)
  240. {
  241. // third try, for online cpu
  242. sprintf(path, "/sys/devices/system/cpu/cpu%d/cpufreq/cpuinfo_max_freq", cpuid);
  243. fp = fopen(path, "rb");
  244. if (!fp)
  245. return -1;
  246. int max_freq_khz = -1;
  247. fscanf(fp, "%d", &max_freq_khz);
  248. fclose(fp);
  249. return max_freq_khz;
  250. }
  251. }
  252. int max_freq_khz = 0;
  253. while (!feof(fp))
  254. {
  255. int freq_khz = 0;
  256. int nscan = fscanf(fp, "%d %*d", &freq_khz);
  257. if (nscan != 1)
  258. break;
  259. if (freq_khz > max_freq_khz)
  260. max_freq_khz = freq_khz;
  261. }
  262. fclose(fp);
  263. return max_freq_khz;
  264. }
  265. static int set_sched_affinity(const std::vector<int>& cpuids)
  266. {
  267. // cpu_set_t definition
  268. // ref http://stackoverflow.com/questions/16319725/android-set-thread-affinity
  269. #define CPU_SETSIZE 1024
  270. #define __NCPUBITS (8 * sizeof (unsigned long))
  271. typedef struct
  272. {
  273. unsigned long __bits[CPU_SETSIZE / __NCPUBITS];
  274. } cpu_set_t;
  275. #define CPU_SET(cpu, cpusetp) \
  276. ((cpusetp)->__bits[(cpu)/__NCPUBITS] |= (1UL << ((cpu) % __NCPUBITS)))
  277. #define CPU_ZERO(cpusetp) \
  278. memset((cpusetp), 0, sizeof(cpu_set_t))
  279. // set affinity for thread
  280. #ifdef __GLIBC__
  281. pid_t pid = syscall(SYS_gettid);
  282. #else
  283. #ifdef PI3
  284. pid_t pid = getpid();
  285. #else
  286. pid_t pid = gettid();
  287. #endif
  288. #endif
  289. cpu_set_t mask;
  290. CPU_ZERO(&mask);
  291. for (int i=0; i<(int)cpuids.size(); i++)
  292. {
  293. CPU_SET(cpuids[i], &mask);
  294. }
  295. int syscallret = syscall(__NR_sched_setaffinity, pid, sizeof(mask), &mask);
  296. if (syscallret)
  297. {
  298. fprintf(stderr, "syscall error %d\n", syscallret);
  299. return -1;
  300. }
  301. return 0;
  302. }
  303. static int sort_cpuid_by_max_frequency(std::vector<int>& cpuids, int* little_cluster_offset)
  304. {
  305. const int cpu_count = cpuids.size();
  306. *little_cluster_offset = 0;
  307. if (cpu_count == 0)
  308. return 0;
  309. std::vector<int> cpu_max_freq_khz;
  310. cpu_max_freq_khz.resize(cpu_count);
  311. for (int i=0; i<cpu_count; i++)
  312. {
  313. int max_freq_khz = get_max_freq_khz(i);
  314. // printf("%d max freq = %d khz\n", i, max_freq_khz);
  315. cpuids[i] = i;
  316. cpu_max_freq_khz[i] = max_freq_khz;
  317. }
  318. // sort cpuid as big core first
  319. // simple bubble sort
  320. for (int i=0; i<cpu_count; i++)
  321. {
  322. for (int j=i+1; j<cpu_count; j++)
  323. {
  324. if (cpu_max_freq_khz[i] < cpu_max_freq_khz[j])
  325. {
  326. // swap
  327. int tmp = cpuids[i];
  328. cpuids[i] = cpuids[j];
  329. cpuids[j] = tmp;
  330. tmp = cpu_max_freq_khz[i];
  331. cpu_max_freq_khz[i] = cpu_max_freq_khz[j];
  332. cpu_max_freq_khz[j] = tmp;
  333. }
  334. }
  335. }
  336. // SMP
  337. int mid_max_freq_khz = (cpu_max_freq_khz.front() + cpu_max_freq_khz.back()) / 2;
  338. if (mid_max_freq_khz == cpu_max_freq_khz.back())
  339. return 0;
  340. for (int i=0; i<cpu_count; i++)
  341. {
  342. if (cpu_max_freq_khz[i] < mid_max_freq_khz)
  343. {
  344. *little_cluster_offset = i;
  345. break;
  346. }
  347. }
  348. return 0;
  349. }
  350. #endif // __ANDROID__
  351. static int g_powersave = 0;
  352. int get_cpu_powersave()
  353. {
  354. return g_powersave;
  355. }
  356. int set_cpu_powersave(int powersave)
  357. {
  358. #ifdef __ANDROID__
  359. static std::vector<int> sorted_cpuids;
  360. static int little_cluster_offset = 0;
  361. if (sorted_cpuids.empty())
  362. {
  363. // 0 ~ g_cpucount
  364. sorted_cpuids.resize(g_cpucount);
  365. for (int i=0; i<g_cpucount; i++)
  366. {
  367. sorted_cpuids[i] = i;
  368. }
  369. // descent sort by max frequency
  370. sort_cpuid_by_max_frequency(sorted_cpuids, &little_cluster_offset);
  371. }
  372. if (little_cluster_offset == 0 && powersave != 0)
  373. {
  374. powersave = 0;
  375. fprintf(stderr, "SMP cpu powersave not supported\n");
  376. }
  377. // prepare affinity cpuid
  378. std::vector<int> cpuids;
  379. if (powersave == 0)
  380. {
  381. cpuids = sorted_cpuids;
  382. }
  383. else if (powersave == 1)
  384. {
  385. cpuids = std::vector<int>(sorted_cpuids.begin() + little_cluster_offset, sorted_cpuids.end());
  386. }
  387. else if (powersave == 2)
  388. {
  389. cpuids = std::vector<int>(sorted_cpuids.begin(), sorted_cpuids.begin() + little_cluster_offset);
  390. }
  391. else
  392. {
  393. fprintf(stderr, "powersave %d not supported\n", powersave);
  394. return -1;
  395. }
  396. #ifdef _OPENMP
  397. // set affinity for each thread
  398. int num_threads = cpuids.size();
  399. omp_set_num_threads(num_threads);
  400. std::vector<int> ssarets(num_threads, 0);
  401. #pragma omp parallel for
  402. for (int i=0; i<num_threads; i++)
  403. {
  404. ssarets[i] = set_sched_affinity(cpuids);
  405. }
  406. for (int i=0; i<num_threads; i++)
  407. {
  408. if (ssarets[i] != 0)
  409. {
  410. return -1;
  411. }
  412. }
  413. #else
  414. int ssaret = set_sched_affinity(cpuids);
  415. if (ssaret != 0)
  416. {
  417. return -1;
  418. }
  419. #endif
  420. g_powersave = powersave;
  421. return 0;
  422. #elif __IOS__
  423. // thread affinity not supported on ios
  424. return -1;
  425. #else
  426. // TODO
  427. (void) powersave; // Avoid unused parameter warning.
  428. return -1;
  429. #endif
  430. }
  431. int get_omp_num_threads()
  432. {
  433. #ifdef _OPENMP
  434. return omp_get_num_threads();
  435. #else
  436. return 1;
  437. #endif
  438. }
  439. void set_omp_num_threads(int num_threads)
  440. {
  441. #ifdef _OPENMP
  442. omp_set_num_threads(num_threads);
  443. #else
  444. (void)num_threads;
  445. #endif
  446. }
  447. int get_omp_dynamic()
  448. {
  449. #ifdef _OPENMP
  450. return omp_get_dynamic();
  451. #else
  452. return 0;
  453. #endif
  454. }
  455. void set_omp_dynamic(int dynamic)
  456. {
  457. #ifdef _OPENMP
  458. omp_set_dynamic(dynamic);
  459. #else
  460. (void)dynamic;
  461. #endif
  462. }
  463. } // namespace ncnn