You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

cpu.cpp 9.6 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471
  1. // Tencent is pleased to support the open source community by making ncnn available.
  2. //
  3. // Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved.
  4. //
  5. // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
  6. // in compliance with the License. You may obtain a copy of the License at
  7. //
  8. // https://opensource.org/licenses/BSD-3-Clause
  9. //
  10. // Unless required by applicable law or agreed to in writing, software distributed
  11. // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
  12. // CONDITIONS OF ANY KIND, either express or implied. See the License for the
  13. // specific language governing permissions and limitations under the License.
  14. #include "cpu.h"
  15. #include <stdio.h>
  16. #include <vector>
  17. #ifdef _OPENMP
  18. #include <omp.h>
  19. #endif
  20. #ifdef __ANDROID__
  21. #include <sys/syscall.h>
  22. #include <unistd.h>
  23. #endif
  24. #if __APPLE__
  25. #include "TargetConditionals.h"
  26. #if TARGET_OS_IPHONE
  27. #include <sys/types.h>
  28. #include <sys/sysctl.h>
  29. #include <mach/machine.h>
  30. #define __IOS__ 1
  31. #endif
  32. #endif
  33. namespace ncnn {
  34. #ifdef __ANDROID__
  35. // extract the ELF HW capabilities bitmap from /proc/self/auxv
  36. static unsigned int get_elf_hwcap_from_proc_self_auxv()
  37. {
  38. FILE* fp = fopen("/proc/self/auxv", "rb");
  39. if (!fp)
  40. {
  41. return 0;
  42. }
  43. #define AT_HWCAP 16
  44. #define AT_HWCAP2 26
  45. struct { unsigned int tag; unsigned int value; } entry;
  46. unsigned int result = 0;
  47. while (!feof(fp))
  48. {
  49. int nread = fread((char*)&entry, sizeof(entry), 1, fp);
  50. if (nread != 1)
  51. break;
  52. if (entry.tag == 0 && entry.value == 0)
  53. break;
  54. if (entry.tag == AT_HWCAP)
  55. {
  56. result = entry.value;
  57. break;
  58. }
  59. }
  60. fclose(fp);
  61. return result;
  62. }
  63. static unsigned int g_hwcaps = get_elf_hwcap_from_proc_self_auxv();
  64. #if __aarch64__
  65. // from arch/arm64/include/uapi/asm/hwcap.h
  66. #define HWCAP_ASIMD (1 << 1)
  67. #define HWCAP_ASIMDHP (1 << 10)
  68. #else
  69. // from arch/arm/include/uapi/asm/hwcap.h
  70. #define HWCAP_NEON (1 << 12)
  71. #define HWCAP_VFPv4 (1 << 16)
  72. #endif
  73. #endif // __ANDROID__
  74. #if __IOS__
  75. static cpu_type_t get_hw_cputype()
  76. {
  77. cpu_type_t value = 0;
  78. size_t len = sizeof(value);
  79. sysctlbyname("hw.cputype", &value, &len, NULL, 0);
  80. return value;
  81. }
  82. static cpu_subtype_t get_hw_cpusubtype()
  83. {
  84. cpu_subtype_t value = 0;
  85. size_t len = sizeof(value);
  86. sysctlbyname("hw.cpusubtype", &value, &len, NULL, 0);
  87. return value;
  88. }
  89. static cpu_type_t g_hw_cputype = get_hw_cputype();
  90. static cpu_subtype_t g_hw_cpusubtype = get_hw_cpusubtype();
  91. #endif // __IOS__
  92. int cpu_support_arm_neon()
  93. {
  94. #ifdef __ANDROID__
  95. #if __aarch64__
  96. return g_hwcaps & HWCAP_ASIMD;
  97. #else
  98. return g_hwcaps & HWCAP_NEON;
  99. #endif
  100. #elif __IOS__
  101. #if __aarch64__
  102. return g_hw_cputype == CPU_TYPE_ARM64;
  103. #else
  104. return g_hw_cputype == CPU_TYPE_ARM && g_hw_cpusubtype > CPU_SUBTYPE_ARM_V7;
  105. #endif
  106. #else
  107. return 0;
  108. #endif
  109. }
  110. int cpu_support_arm_vfpv4()
  111. {
  112. #ifdef __ANDROID__
  113. #if __aarch64__
  114. // neon always enable fma and fp16
  115. return g_hwcaps & HWCAP_ASIMD;
  116. #else
  117. return g_hwcaps & HWCAP_VFPv4;
  118. #endif
  119. #elif __IOS__
  120. #if __aarch64__
  121. return g_hw_cputype == CPU_TYPE_ARM64;
  122. #else
  123. return g_hw_cputype == CPU_TYPE_ARM && g_hw_cpusubtype > CPU_SUBTYPE_ARM_V7S;
  124. #endif
  125. #else
  126. return 0;
  127. #endif
  128. }
  129. int cpu_support_arm_asimdhp()
  130. {
  131. #ifdef __ANDROID__
  132. #if __aarch64__
  133. return g_hwcaps & HWCAP_ASIMDHP;
  134. #else
  135. return 0;
  136. #endif
  137. #elif __IOS__
  138. #if __aarch64__
  139. return 0;
  140. #else
  141. return 0;
  142. #endif
  143. #else
  144. return 0;
  145. #endif
  146. }
  147. static int get_cpucount()
  148. {
  149. #ifdef __ANDROID__
  150. // get cpu count from /proc/cpuinfo
  151. FILE* fp = fopen("/proc/cpuinfo", "rb");
  152. if (!fp)
  153. return 1;
  154. int count = 0;
  155. char line[1024];
  156. while (!feof(fp))
  157. {
  158. char* s = fgets(line, 1024, fp);
  159. if (!s)
  160. break;
  161. if (memcmp(line, "processor", 9) == 0)
  162. {
  163. count++;
  164. }
  165. }
  166. fclose(fp);
  167. if (count < 1)
  168. count = 1;
  169. return count;
  170. #elif __IOS__
  171. int count = 0;
  172. size_t len = sizeof(count);
  173. sysctlbyname("hw.ncpu", &count, &len, NULL, 0);
  174. if (count < 1)
  175. count = 1;
  176. return count;
  177. #else
  178. return 1;
  179. #endif
  180. }
  181. static int g_cpucount = get_cpucount();
  182. int get_cpu_count()
  183. {
  184. return g_cpucount;
  185. }
  186. #ifdef __ANDROID__
  187. static int get_max_freq_khz(int cpuid)
  188. {
  189. char path[256];
  190. sprintf(path, "/sys/devices/system/cpu/cpufreq/stats/cpu%d/time_in_state", cpuid);
  191. FILE* fp = fopen(path, "rb");
  192. if (!fp)
  193. return -1;
  194. int max_freq_khz = 0;
  195. while (!feof(fp))
  196. {
  197. int freq_khz = 0;
  198. int nscan = fscanf(fp, "%d %*d", &freq_khz);
  199. if (nscan != 1)
  200. break;
  201. if (freq_khz > max_freq_khz)
  202. max_freq_khz = freq_khz;
  203. }
  204. fclose(fp);
  205. return max_freq_khz;
  206. }
  207. static int set_sched_affinity(const std::vector<int>& cpuids)
  208. {
  209. // cpu_set_t definition
  210. // ref http://stackoverflow.com/questions/16319725/android-set-thread-affinity
  211. #define CPU_SETSIZE 1024
  212. #define __NCPUBITS (8 * sizeof (unsigned long))
  213. typedef struct
  214. {
  215. unsigned long __bits[CPU_SETSIZE / __NCPUBITS];
  216. } cpu_set_t;
  217. #define CPU_SET(cpu, cpusetp) \
  218. ((cpusetp)->__bits[(cpu)/__NCPUBITS] |= (1UL << ((cpu) % __NCPUBITS)))
  219. #define CPU_ZERO(cpusetp) \
  220. memset((cpusetp), 0, sizeof(cpu_set_t))
  221. // set affinity for thread
  222. pid_t pid = gettid();
  223. cpu_set_t mask;
  224. CPU_ZERO(&mask);
  225. for (int i=0; i<(int)cpuids.size(); i++)
  226. {
  227. CPU_SET(cpuids[i], &mask);
  228. }
  229. int syscallret = syscall(__NR_sched_setaffinity, pid, sizeof(mask), &mask);
  230. if (syscallret)
  231. {
  232. fprintf(stderr, "syscall error %d\n", syscallret);
  233. return -1;
  234. }
  235. return 0;
  236. }
  237. static int sort_cpuid_by_max_frequency(std::vector<int>& cpuids, int* little_cluster_offset)
  238. {
  239. const int cpu_count = cpuids.size();
  240. *little_cluster_offset = 0;
  241. if (cpu_count == 0)
  242. return 0;
  243. std::vector<int> cpu_max_freq_khz;
  244. cpu_max_freq_khz.resize(cpu_count);
  245. for (int i=0; i<cpu_count; i++)
  246. {
  247. int max_freq_khz = get_max_freq_khz(i);
  248. // printf("%d max freq = %d khz\n", i, max_freq_khz);
  249. cpuids[i] = i;
  250. cpu_max_freq_khz[i] = max_freq_khz;
  251. }
  252. // sort cpuid as big core first
  253. // simple bubble sort
  254. for (int i=0; i<cpu_count; i++)
  255. {
  256. for (int j=i+1; j<cpu_count; j++)
  257. {
  258. if (cpu_max_freq_khz[i] < cpu_max_freq_khz[j])
  259. {
  260. // swap
  261. int tmp = cpuids[i];
  262. cpuids[i] = cpuids[j];
  263. cpuids[j] = tmp;
  264. tmp = cpu_max_freq_khz[i];
  265. cpu_max_freq_khz[i] = cpu_max_freq_khz[j];
  266. cpu_max_freq_khz[j] = tmp;
  267. }
  268. }
  269. }
  270. // SMP
  271. int mid_max_freq_khz = (cpu_max_freq_khz.front() + cpu_max_freq_khz.back()) / 2;
  272. if (mid_max_freq_khz == cpu_max_freq_khz.back())
  273. return 0;
  274. for (int i=0; i<cpu_count; i++)
  275. {
  276. if (cpu_max_freq_khz[i] < mid_max_freq_khz)
  277. {
  278. *little_cluster_offset = i;
  279. break;
  280. }
  281. }
  282. return 0;
  283. }
  284. #endif // __ANDROID__
  285. static int g_powersave = 0;
  286. int get_cpu_powersave()
  287. {
  288. return g_powersave;
  289. }
  290. int set_cpu_powersave(int powersave)
  291. {
  292. #ifdef __ANDROID__
  293. static std::vector<int> sorted_cpuids;
  294. static int little_cluster_offset = 0;
  295. if (sorted_cpuids.empty())
  296. {
  297. // 0 ~ g_cpucount
  298. sorted_cpuids.resize(g_cpucount);
  299. for (int i=0; i<g_cpucount; i++)
  300. {
  301. sorted_cpuids[i] = i;
  302. }
  303. // descent sort by max frequency
  304. sort_cpuid_by_max_frequency(sorted_cpuids, &little_cluster_offset);
  305. }
  306. if (little_cluster_offset == 0)
  307. {
  308. fprintf(stderr, "SMP cpu powersave not supported\n");
  309. return -1;
  310. }
  311. // prepare affinity cpuid
  312. std::vector<int> cpuids;
  313. if (powersave == 0)
  314. {
  315. cpuids = sorted_cpuids;
  316. }
  317. else if (powersave == 1)
  318. {
  319. cpuids = std::vector<int>(sorted_cpuids.begin() + little_cluster_offset, sorted_cpuids.end());
  320. }
  321. else if (powersave == 2)
  322. {
  323. cpuids = std::vector<int>(sorted_cpuids.begin(), sorted_cpuids.begin() + + little_cluster_offset);
  324. }
  325. else
  326. {
  327. fprintf(stderr, "powersave %d not supported\n", powersave);
  328. return -1;
  329. }
  330. #ifdef _OPENMP
  331. // set affinity for each thread
  332. int num_threads = cpuids.size();
  333. omp_set_num_threads(num_threads);
  334. std::vector<int> ssarets(num_threads, 0);
  335. #pragma omp parallel for
  336. for (int i=0; i<num_threads; i++)
  337. {
  338. ssarets[i] = set_sched_affinity(cpuids);
  339. }
  340. for (int i=0; i<num_threads; i++)
  341. {
  342. if (ssarets[i] != 0)
  343. {
  344. return -1;
  345. }
  346. }
  347. #else
  348. int ssaret = set_sched_affinity(cpuids);
  349. if (ssaret != 0)
  350. {
  351. return -1;
  352. }
  353. #endif
  354. g_powersave = powersave;
  355. return 0;
  356. #elif __IOS__
  357. // thread affinity not supported on ios
  358. return -1;
  359. #else
  360. // TODO
  361. return -1;
  362. #endif
  363. }
  364. int get_omp_num_threads()
  365. {
  366. #ifdef _OPENMP
  367. return omp_get_num_threads();
  368. #else
  369. return 1;
  370. #endif
  371. }
  372. void set_omp_num_threads(int num_threads)
  373. {
  374. #ifdef _OPENMP
  375. omp_set_num_threads(num_threads);
  376. #else
  377. (void)num_threads;
  378. #endif
  379. }
  380. int get_omp_dynamic()
  381. {
  382. #ifdef _OPENMP
  383. return omp_get_dynamic();
  384. #else
  385. return 0;
  386. #endif
  387. }
  388. void set_omp_dynamic(int dynamic)
  389. {
  390. #ifdef _OPENMP
  391. omp_set_dynamic(dynamic);
  392. #else
  393. (void)dynamic;
  394. #endif
  395. }
  396. } // namespace ncnn

No Description

Contributors (1)