You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

README.md 13 kB

7 years ago
7 years ago
7 years ago
7 years ago
7 years ago
8 years ago
8 years ago
8 years ago
8 years ago
8 years ago
8 years ago
8 years ago
8 years ago
8 years ago
8 years ago
8 years ago
8 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302
  1. benchncnn can be used to test neural network inference performance
  2. Only the network definition files (ncnn param) are required.
  3. The large model binary files (ncnn bin) are not loaded but generated randomly for speed test.
  4. More model networks may be added later.
  5. ---
  6. Build
  7. ```
  8. # assume you have already build ncnn library successfully
  9. # uncomment the following line in <ncnn-root-dir>/CMakeLists.txt with your favorite editor
  10. # add_subdirectory(benchmark)
  11. $ cd <ncnn-root-dir>/<your-build-dir>
  12. $ make -j4
  13. # you can find benchncnn binary in <ncnn-root-dir>/<your-build-dir>/benchmark
  14. ```
  15. Usage
  16. ```
  17. # copy all param files to the current directory
  18. $ ./benchncnn [loop count] [num threads] [powersave] [gpu device]
  19. ```
  20. run benchncnn on android device
  21. ```
  22. # for running on android device, upload to /data/local/tmp/ folder
  23. $ adb push benchncnn /data/local/tmp/
  24. $ adb push <ncnn-root-dir>/benchmark/*.param /data/local/tmp/
  25. $ adb shell
  26. # executed in android adb shell
  27. $ cd /data/local/tmp/
  28. $ ./benchncnn [loop count] [num threads] [powersave] [gpu device]
  29. ```
  30. Parameter
  31. |param|options|default|
  32. |---|---|---|
  33. |loop count|1~N|4|
  34. |num threads|1~N|max_cpu_count|
  35. |powersave|0=all cores, 1=little cores only, 2=big cores only|0|
  36. |gpu device|-1=cpu-only, 0=gpu0, 1=gpu1 ...|-1|
  37. ---
  38. Typical output (executed in android adb shell)
  39. Qualcomm MSM8996 Snapdragon 820 (Kyro 2.15GHz x 2 + Kyro 1.6GHz x 2)
  40. ```
  41. root@msm8996:/data/local/tmp/ncnn # ./benchncnn 8 4 0
  42. loop_count = 8
  43. num_threads = 4
  44. powersave = 0
  45. squeezenet min = 23.20 max = 24.06 avg = 23.63
  46. mobilenet min = 35.89 max = 36.41 avg = 36.09
  47. mobilenet_v2 min = 27.04 max = 28.62 avg = 27.39
  48. shufflenet min = 15.47 max = 16.45 avg = 16.00
  49. googlenet min = 85.42 max = 86.15 avg = 85.81
  50. resnet18 min = 76.82 max = 79.63 avg = 78.50
  51. alexnet min = 147.66 max = 156.92 avg = 152.95
  52. vgg16 min = 493.50 max = 515.03 avg = 507.34
  53. squeezenet-ssd min = 56.31 max = 59.35 avg = 57.49
  54. mobilenet-ssd min = 68.95 max = 74.24 avg = 71.39
  55. mobilenet-yolo min = 142.52 max = 149.72 avg = 148.23
  56. root@msm8996:/data/local/tmp/ncnn # ./benchncnn 8 1 2
  57. loop_count = 8
  58. num_threads = 1
  59. powersave = 2
  60. squeezenet min = 53.26 max = 53.37 avg = 53.31
  61. mobilenet min = 96.37 max = 97.09 avg = 96.63
  62. mobilenet_v2 min = 63.00 max = 63.25 avg = 63.09
  63. shufflenet min = 28.22 max = 28.88 avg = 28.48
  64. googlenet min = 226.21 max = 228.31 avg = 227.22
  65. resnet18 min = 197.35 max = 198.55 avg = 197.84
  66. alexnet min = 445.32 max = 449.62 avg = 446.65
  67. vgg16 min = 1416.39 max = 1450.95 avg = 1440.63
  68. squeezenet-ssd min = 119.37 max = 119.77 avg = 119.56
  69. mobilenet-ssd min = 183.04 max = 185.12 avg = 183.59
  70. mobilenet-yolo min = 366.91 max = 369.87 avg = 368.40
  71. ```
  72. Qualcomm MSM8994 Snapdragon 810 (Cortex-A57 2.0GHz x 4 + Cortex-A53 1.55GHz x 4)
  73. ```
  74. angler:/data/local/tmp $ ./benchncnn 8 8 0
  75. loop_count = 8
  76. num_threads = 8
  77. powersave = 0
  78. squeezenet min = 35.57 max = 36.56 avg = 36.13
  79. mobilenet min = 44.80 max = 56.80 avg = 47.91
  80. mobilenet_v2 min = 46.80 max = 64.64 avg = 50.34
  81. shufflenet min = 28.24 max = 30.27 avg = 29.36
  82. googlenet min = 118.82 max = 132.80 avg = 123.74
  83. resnet18 min = 119.55 max = 141.99 avg = 126.78
  84. alexnet min = 104.52 max = 125.98 avg = 110.17
  85. vgg16 min = 815.12 max = 930.98 avg = 878.57
  86. squeezenet-ssd min = 111.05 max = 130.23 avg = 119.43
  87. mobilenet-ssd min = 88.88 max = 108.96 avg = 98.38
  88. mobilenet-yolo min = 220.57 max = 263.42 avg = 241.03
  89. ```
  90. Qualcomm MSM8916 Snapdragon 410 (Cortex-A53 1.2GHz x 4)
  91. ```
  92. HM2014812:/data/local/tmp # ./benchncnn 8 4 0
  93. loop_count = 8
  94. num_threads = 4
  95. powersave = 0
  96. squeezenet min = 79.70 max = 85.42 avg = 82.22
  97. mobilenet min = 119.87 max = 125.63 avg = 123.46
  98. mobilenet_v2 min = 125.65 max = 131.16 avg = 128.20
  99. shufflenet min = 60.95 max = 66.03 avg = 63.03
  100. googlenet min = 237.47 max = 256.79 avg = 245.65
  101. resnet18 min = 239.73 max = 250.41 avg = 245.87
  102. alexnet min = 248.66 max = 279.08 avg = 267.41
  103. vgg16 min = 1429.50 max = 1510.46 avg = 1465.25
  104. squeezenet-ssd min = 203.33 max = 213.85 avg = 209.81
  105. mobilenet-ssd min = 215.26 max = 224.23 avg = 219.73
  106. mobilenet-yolo min = 506.41 max = 520.50 avg = 513.30
  107. ```
  108. Raspberry Pi 3 Model B+ Broadcom BCM2837B0, Cortex-A53 (ARMv8) (1.4GHz x 4 )
  109. ```
  110. pi@raspberrypi:~ $ ./benchncnn 8 4 0
  111. loop_count = 8
  112. num_threads = 4
  113. powersave = 0
  114. squeezenet min = 108.66 max = 109.24 avg = 108.96
  115. mobilenet min = 151.78 max = 152.92 avg = 152.31
  116. mobilenet_v2 min = 193.14 max = 195.56 avg = 194.50
  117. shufflenet min = 91.41 max = 92.19 avg = 91.75
  118. googlenet min = 302.02 max = 304.08 avg = 303.24
  119. resnet18 min = 411.93 max = 423.14 avg = 416.54
  120. alexnet min = 275.54 max = 276.50 avg = 276.13
  121. vgg16 min = 1845.36 max = 1925.95 avg = 1902.28
  122. squeezenet-ssd min = 313.86 max = 317.35 avg = 315.28
  123. mobilenet-ssd min = 262.91 max = 264.92 avg = 263.85
  124. mobilenet-yolo min = 638.73 max = 641.27 avg = 639.87
  125. ```
  126. Rockchip RK3399 (Cortex-A72 1.8GHz x 2 + Cortex-A53 1.5GHz x 4)
  127. ```
  128. rk3399_firefly_box:/data/local/tmp/ncnn # ./benchncnn 8 6 0
  129. loop_count = 8
  130. num_threads = 6
  131. powersave = 0
  132. squeezenet min = 47.28 max = 70.41 avg = 53.37
  133. mobilenet min = 68.74 max = 176.25 avg = 82.80
  134. mobilenet_v2 min = 71.72 max = 180.24 avg = 86.19
  135. shufflenet min = 34.90 max = 36.14 avg = 35.54
  136. googlenet min = 158.35 max = 301.30 avg = 191.26
  137. resnet18 min = 190.96 max = 274.38 avg = 214.78
  138. alexnet min = 199.21 max = 334.18 avg = 227.98
  139. vgg16 min = 988.46 max = 1019.90 avg = 1000.14
  140. squeezenet-ssd min = 134.83 max = 223.23 avg = 148.35
  141. mobilenet-ssd min = 121.47 max = 235.44 avg = 149.53
  142. mobilenet-yolo min = 295.01 max = 413.26 avg = 327.84
  143. rk3399_firefly_box:/data/local/tmp/ncnn # ./benchncnn 8 2 2
  144. loop_count = 8
  145. num_threads = 2
  146. powersave = 2
  147. squeezenet min = 51.64 max = 55.08 avg = 52.36
  148. mobilenet min = 88.23 max = 91.07 avg = 88.89
  149. mobilenet_v2 min = 84.98 max = 86.21 avg = 85.74
  150. shufflenet min = 36.04 max = 38.40 avg = 36.82
  151. googlenet min = 185.42 max = 188.76 avg = 186.77
  152. resnet18 min = 202.72 max = 212.27 avg = 206.91
  153. alexnet min = 203.89 max = 222.28 avg = 215.28
  154. vgg16 min = 901.60 max = 1013.80 avg = 948.13
  155. squeezenet-ssd min = 139.85 max = 147.36 avg = 142.18
  156. mobilenet-ssd min = 156.35 max = 161.21 avg = 157.96
  157. mobilenet-yolo min = 365.75 max = 380.79 avg = 371.31
  158. rk3399_firefly_box:/data/local/tmp/ncnn # ./benchncnn 8 1 2
  159. loop_count = 8
  160. num_threads = 1
  161. powersave = 2
  162. squeezenet min = 83.73 max = 86.78 avg = 84.94
  163. mobilenet min = 142.90 max = 147.71 avg = 144.64
  164. mobilenet_v2 min = 119.18 max = 132.26 avg = 123.92
  165. shufflenet min = 52.81 max = 55.84 avg = 53.63
  166. googlenet min = 316.69 max = 324.03 avg = 319.34
  167. resnet18 min = 318.96 max = 331.31 avg = 322.68
  168. alexnet min = 340.86 max = 365.09 avg = 348.99
  169. vgg16 min = 1593.88 max = 1611.65 avg = 1602.36
  170. squeezenet-ssd min = 199.00 max = 209.26 avg = 204.65
  171. mobilenet-ssd min = 268.03 max = 275.70 avg = 270.74
  172. mobilenet-yolo min = 589.43 max = 605.75 avg = 595.67
  173. rk3399_firefly_box:/data/local/tmp/ncnn # ./benchncnn 8 1 1
  174. loop_count = 8
  175. num_threads = 1
  176. powersave = 1
  177. squeezenet min = 167.48 max = 173.60 avg = 169.23
  178. mobilenet min = 272.88 max = 278.71 avg = 274.73
  179. mobilenet_v2 min = 235.35 max = 239.87 avg = 237.05
  180. shufflenet min = 111.79 max = 127.11 avg = 114.13
  181. googlenet min = 669.47 max = 673.68 avg = 671.23
  182. resnet18 min = 701.96 max = 714.85 avg = 708.56
  183. alexnet min = 989.36 max = 990.63 avg = 989.96
  184. vgg16 min = 3746.20 max = 3835.75 avg = 3788.90
  185. squeezenet-ssd min = 445.71 max = 455.03 avg = 449.07
  186. mobilenet-ssd min = 511.59 max = 520.00 avg = 514.59
  187. mobilenet-yolo min = 1088.56 max = 1093.53 avg = 1090.39
  188. ```
  189. Rockchip RK3288 (Cortex-A17 1.8GHz x 4)
  190. ```
  191. root@rk3288:/data/local/tmp/ncnn # ./benchncnn 8 4 0
  192. loop_count = 8
  193. num_threads = 4
  194. powersave = 0
  195. squeezenet min = 51.43 max = 74.02 avg = 55.91
  196. mobilenet min = 102.06 max = 125.67 avg = 106.02
  197. mobilenet_v2 min = 80.09 max = 99.23 avg = 85.40
  198. shufflenet min = 34.91 max = 35.75 avg = 35.25
  199. googlenet min = 181.72 max = 252.12 avg = 210.67
  200. resnet18 min = 198.86 max = 240.69 avg = 214.87
  201. alexnet min = 154.68 max = 208.60 avg = 168.75
  202. vgg16 min = 1019.49 max = 1231.92 avg = 1129.09
  203. squeezenet-ssd min = 133.38 max = 241.11 avg = 167.77
  204. mobilenet-ssd min = 156.71 max = 216.70 avg = 175.31
  205. mobilenet-yolo min = 396.78 max = 482.60 avg = 433.34
  206. root@rk3288:/data/local/tmp/ncnn # ./benchncnn 8 1 0
  207. loop_count = 8
  208. num_threads = 1
  209. powersave = 0
  210. squeezenet min = 137.93 max = 140.76 avg = 138.71
  211. mobilenet min = 244.01 max = 248.27 avg = 246.24
  212. mobilenet_v2 min = 177.94 max = 181.57 avg = 179.24
  213. shufflenet min = 77.61 max = 78.30 avg = 77.94
  214. googlenet min = 548.75 max = 559.40 avg = 553.00
  215. resnet18 min = 493.66 max = 510.55 avg = 500.37
  216. alexnet min = 564.20 max = 604.87 avg = 581.30
  217. vgg16 min = 2425.03 max = 2447.25 avg = 2433.38
  218. squeezenet-ssd min = 298.26 max = 304.67 avg = 302.00
  219. mobilenet-ssd min = 465.65 max = 473.33 avg = 469.86
  220. mobilenet-yolo min = 997.95 max = 1012.45 avg = 1002.32
  221. ```
  222. HiSilicon Hi3519V101 (Cortex-A17 1.2GHz x 1)
  223. ```
  224. root@Hi3519:/ncnn-benchmark # taskset 2 ./benchncnn 8 1 0
  225. loop_count = 8
  226. num_threads = 1
  227. powersave = 0
  228. squeezenet min = 272.97 max = 275.84 avg = 274.85
  229. squeezenet-int8 min = 200.87 max = 202.47 avg = 201.74
  230. mobilenet min = 480.90 max = 482.16 avg = 481.64
  231. mobilenet_v2 min = 350.01 max = 352.39 avg = 350.81
  232. shufflenet min = 152.40 max = 153.17 avg = 152.80
  233. googlenet min = 1096.65 max = 1101.35 avg = 1099.21
  234. resnet18 min = 983.92 max = 987.00 avg = 985.25
  235. alexnet min = 1140.30 max = 1141.55 avg = 1140.92
  236. squeezenet-ssd min = 574.62 max = 580.12 avg = 577.23
  237. mobilenet-ssd min = 960.26 max = 969.13 avg = 965.93
  238. mobilenet-yolo min = 1867.78 max = 1880.08 avg = 1873.89
  239. ```
  240. iPhone 5S (Apple A7 1.3GHz x 2)
  241. ```
  242. iPhone:~ root# ./benchncnn 8 2 0
  243. loop_count = 8
  244. num_threads = 2
  245. powersave = 0
  246. squeezenet min = 70.94 max = 72.40 avg = 71.75
  247. mobilenet min = 89.24 max = 92.21 avg = 90.60
  248. mobilenet_v2 min = 71.70 max = 74.43 avg = 73.68
  249. shufflenet min = 35.48 max = 41.40 avg = 38.94
  250. googlenet min = 282.76 max = 295.00 avg = 289.64
  251. resnet18 min = 251.99 max = 260.40 avg = 255.23
  252. alexnet min = 329.07 max = 337.75 avg = 333.24
  253. vgg16 min = 4547.25 max = 4706.56 avg = 4647.60
  254. squeezenet-ssd min = 171.23 max = 180.49 avg = 175.54
  255. mobilenet-ssd min = 174.56 max = 192.69 avg = 179.60
  256. mobilenet-yolo min = 357.90 max = 363.93 avg = 360.97
  257. ```
  258. Freescale i.MX7 Dual (Cortex A7 1.0GHz x 2)
  259. ```
  260. imx7d_pico:/data/local/tmp # ./benchncnn 8 2 0
  261. loop_count = 8
  262. num_threads = 2
  263. powersave = 0
  264. squeezenet min = 269.26 max = 278.84 avg = 273.10
  265. mobilenet min = 442.79 max = 445.82 avg = 444.46
  266. mobilenet_v2 min = 362.19 max = 364.58 avg = 363.33
  267. shufflenet min = 171.30 max = 190.63 avg = 177.52
  268. googlenet min = 975.95 max = 986.11 avg = 980.51
  269. resnet18 min = 1016.60 max = 1035.50 avg = 1021.75
  270. alexnet min = 1240.54 max = 1254.86 avg = 1247.18
  271. vgg16 min = 0.00 max = 0.00 avg = 0.00 (FAIL due to out of memory)
  272. squeezenet-ssd min = 614.93 max = 623.15 avg = 619.56
  273. mobilenet-ssd min = 842.83 max = 884.64 avg = 855.40
  274. mobilenet-yolo min = 1772.24 max = 1924.37 avg = 1805.75
  275. ```