You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

benchmark.py 5.7 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180
  1. # Copyright 2020 Tencent
  2. # SPDX-License-Identifier: BSD-3-Clause
  3. import sys
  4. import time
  5. import ncnn
  6. param_root = "../../benchmark"
  7. g_warmup_loop_count = 8
  8. g_loop_count = 4
  9. g_enable_cooling_down = True
  10. g_vkdev = None
  11. g_blob_vkallocator = None
  12. g_staging_vkallocator = None
  13. g_blob_pool_allocator = ncnn.UnlockedPoolAllocator()
  14. g_workspace_pool_allocator = ncnn.PoolAllocator()
  15. def benchmark(comment, _in, opt):
  16. _in.fill(0.01)
  17. g_blob_pool_allocator.clear()
  18. g_workspace_pool_allocator.clear()
  19. if opt.use_vulkan_compute:
  20. g_blob_vkallocator.clear()
  21. g_staging_vkallocator.clear()
  22. net = ncnn.Net()
  23. net.opt = opt
  24. if net.opt.use_vulkan_compute:
  25. net.set_vulkan_device(g_vkdev)
  26. net.load_param(param_root + comment + ".param")
  27. dr = ncnn.DataReaderFromEmpty()
  28. net.load_model(dr)
  29. input_names = net.input_names()
  30. output_names = net.output_names()
  31. if g_enable_cooling_down:
  32. time.sleep(10)
  33. # warm up
  34. for i in range(g_warmup_loop_count):
  35. # test with statement
  36. with net.create_extractor() as ex:
  37. ex.input(input_names[0], _in)
  38. ex.extract(output_names[0])
  39. time_min = sys.float_info.max
  40. time_max = -sys.float_info.max
  41. time_avg = 0.0
  42. for i in range(g_loop_count):
  43. start = time.time()
  44. # test net keep alive until ex freed
  45. ex = net.create_extractor()
  46. ex.input(input_names[0], _in)
  47. ex.extract(output_names[0])
  48. end = time.time()
  49. timespan = end - start
  50. time_min = timespan if timespan < time_min else time_min
  51. time_max = timespan if timespan > time_max else time_max
  52. time_avg += timespan
  53. time_avg /= g_loop_count
  54. print(
  55. "%20s min = %7.2f max = %7.2f avg = %7.2f"
  56. % (comment, time_min * 1000, time_max * 1000, time_avg * 1000)
  57. )
  58. if __name__ == "__main__":
  59. loop_count = 4
  60. num_threads = ncnn.get_cpu_count()
  61. powersave = 0
  62. gpu_device = -1
  63. cooling_down = 1
  64. argc = len(sys.argv)
  65. if argc >= 2:
  66. loop_count = int(sys.argv[1])
  67. if argc >= 3:
  68. num_threads = int(sys.argv[2])
  69. if argc >= 4:
  70. powersave = int(sys.argv[3])
  71. if argc >= 5:
  72. gpu_device = int(sys.argv[4])
  73. if argc >= 6:
  74. cooling_down = int(sys.argv[5])
  75. use_vulkan_compute = gpu_device != -1
  76. g_enable_cooling_down = cooling_down != 0
  77. g_loop_count = loop_count
  78. g_blob_pool_allocator.set_size_compare_ratio(0.0)
  79. g_workspace_pool_allocator.set_size_compare_ratio(0.5)
  80. if use_vulkan_compute:
  81. g_warmup_loop_count = 10
  82. g_vkdev = ncnn.get_gpu_device(gpu_device)
  83. g_blob_vkallocator = ncnn.VkBlobAllocator(g_vkdev)
  84. g_staging_vkallocator = ncnn.VkStagingAllocator(g_vkdev)
  85. opt = ncnn.Option()
  86. opt.lightmode = True
  87. opt.num_threads = num_threads
  88. opt.blob_allocator = g_blob_pool_allocator
  89. opt.workspace_allocator = g_workspace_pool_allocator
  90. if use_vulkan_compute:
  91. opt.blob_vkallocator = g_blob_vkallocator
  92. opt.workspace_vkallocator = g_blob_vkallocator
  93. opt.staging_vkallocator = g_staging_vkallocator
  94. opt.use_winograd_convolution = True
  95. opt.use_sgemm_convolution = True
  96. opt.use_int8_inference = True
  97. opt.use_vulkan_compute = use_vulkan_compute
  98. opt.use_fp16_packed = True
  99. opt.use_fp16_storage = True
  100. opt.use_fp16_arithmetic = True
  101. opt.use_int8_storage = True
  102. opt.use_int8_arithmetic = True
  103. opt.use_packing_layout = True
  104. opt.use_shader_pack8 = False
  105. ncnn.set_cpu_powersave(powersave)
  106. ncnn.set_omp_dynamic(0)
  107. ncnn.set_omp_num_threads(num_threads)
  108. print("loop_count =", loop_count)
  109. print("num_threads =", num_threads)
  110. print("powersave =", ncnn.get_cpu_powersave())
  111. print("gpu_device =", gpu_device)
  112. print("cooling_down =", g_enable_cooling_down)
  113. benchmark("squeezenet", ncnn.Mat((227, 227, 3)), opt)
  114. benchmark("squeezenet_int8", ncnn.Mat((227, 227, 3)), opt)
  115. benchmark("mobilenet", ncnn.Mat((224, 224, 3)), opt)
  116. benchmark("mobilenet_int8", ncnn.Mat((224, 224, 3)), opt)
  117. benchmark("mobilenet_v2", ncnn.Mat((224, 224, 3)), opt)
  118. # benchmark("mobilenet_v2_int8", ncnn.Mat(w=224, h=224, c=3), opt)
  119. benchmark("mobilenet_v3", ncnn.Mat((224, 224, 3)), opt)
  120. benchmark("shufflenet", ncnn.Mat((224, 224, 3)), opt)
  121. benchmark("shufflenet_v2", ncnn.Mat((224, 224, 3)), opt)
  122. benchmark("mnasnet", ncnn.Mat((224, 224, 3)), opt)
  123. benchmark("proxylessnasnet", ncnn.Mat((224, 224, 3)), opt)
  124. benchmark("efficientnet_b0", ncnn.Mat((224, 224, 3)), opt)
  125. benchmark("regnety_400m", ncnn.Mat((224, 224, 3)), opt)
  126. benchmark("blazeface", ncnn.Mat((128, 128, 3)), opt)
  127. benchmark("googlenet", ncnn.Mat((224, 224, 3)), opt)
  128. benchmark("googlenet_int8", ncnn.Mat((224, 224, 3)), opt)
  129. benchmark("resnet18", ncnn.Mat((224, 224, 3)), opt)
  130. benchmark("resnet18_int8", ncnn.Mat((224, 224, 3)), opt)
  131. benchmark("alexnet", ncnn.Mat((227, 227, 3)), opt)
  132. benchmark("vgg16", ncnn.Mat((224, 224, 3)), opt)
  133. benchmark("vgg16_int8", ncnn.Mat((224, 224, 3)), opt)
  134. benchmark("resnet50", ncnn.Mat((224, 224, 3)), opt)
  135. benchmark("resnet50_int8", ncnn.Mat((224, 224, 3)), opt)
  136. benchmark("squeezenet_ssd", ncnn.Mat((300, 300, 3)), opt)
  137. benchmark("squeezenet_ssd_int8", ncnn.Mat((300, 300, 3)), opt)
  138. benchmark("mobilenet_ssd", ncnn.Mat((300, 300, 3)), opt)
  139. benchmark("mobilenet_ssd_int8", ncnn.Mat((300, 300, 3)), opt)
  140. benchmark("mobilenet_yolo", ncnn.Mat((416, 416, 3)), opt)
  141. benchmark("mobilenetv2_yolov3", ncnn.Mat((352, 352, 3)), opt)
  142. benchmark("yolov4-tiny", ncnn.Mat((416, 416, 3)), opt)