You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

benchmark.py 6.3 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189
  1. # Tencent is pleased to support the open source community by making ncnn available.
  2. #
  3. # Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved.
  4. #
  5. # Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
  6. # in compliance with the License. You may obtain a copy of the License at
  7. #
  8. # https://opensource.org/licenses/BSD-3-Clause
  9. #
  10. # Unless required by applicable law or agreed to in writing, software distributed
  11. # under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
  12. # CONDITIONS OF ANY KIND, either express or implied. See the License for the
  13. # specific language governing permissions and limitations under the License.
  14. import sys
  15. import time
  16. import ncnn
  17. param_root = "../benchmark/"
  18. g_warmup_loop_count = 8
  19. g_loop_count = 4
  20. g_enable_cooling_down = True
  21. g_vkdev = None
  22. g_blob_vkallocator = None
  23. g_staging_vkallocator = None
  24. g_blob_pool_allocator = ncnn.UnlockedPoolAllocator()
  25. g_workspace_pool_allocator = ncnn.PoolAllocator()
  26. def benchmark(comment, _in, opt):
  27. _in.fill(0.01)
  28. g_blob_pool_allocator.clear()
  29. g_workspace_pool_allocator.clear()
  30. if opt.use_vulkan_compute:
  31. g_blob_vkallocator.clear()
  32. g_staging_vkallocator.clear()
  33. net = ncnn.Net()
  34. net.opt = opt
  35. if net.opt.use_vulkan_compute:
  36. net.set_vulkan_device(g_vkdev)
  37. net.load_param(param_root + comment + ".param")
  38. dr = ncnn.DataReaderFromEmpty()
  39. net.load_model(dr)
  40. if g_enable_cooling_down:
  41. time.sleep(10)
  42. # warm up
  43. for i in range(g_warmup_loop_count):
  44. # test with statement
  45. with net.create_extractor() as ex:
  46. ex.input("data", _in)
  47. ex.extract("output")
  48. time_min = sys.float_info.max
  49. time_max = -sys.float_info.max
  50. time_avg = 0.0
  51. for i in range(g_loop_count):
  52. start = time.time()
  53. # test net keep alive until ex freed
  54. ex = net.create_extractor()
  55. ex.input("data", _in)
  56. ex.extract("output")
  57. end = time.time()
  58. timespan = end - start
  59. time_min = timespan if timespan < time_min else time_min
  60. time_max = timespan if timespan > time_max else time_max
  61. time_avg += timespan
  62. time_avg /= g_loop_count
  63. print(
  64. "%20s min = %7.2f max = %7.2f avg = %7.2f"
  65. % (comment, time_min * 1000, time_max * 1000, time_avg * 1000)
  66. )
  67. if __name__ == "__main__":
  68. loop_count = 4
  69. num_threads = ncnn.get_cpu_count()
  70. powersave = 0
  71. gpu_device = -1
  72. cooling_down = 1
  73. argc = len(sys.argv)
  74. if argc >= 2:
  75. loop_count = int(sys.argv[1])
  76. if argc >= 3:
  77. num_threads = int(sys.argv[2])
  78. if argc >= 4:
  79. powersave = int(sys.argv[3])
  80. if argc >= 5:
  81. gpu_device = int(sys.argv[4])
  82. if argc >= 6:
  83. cooling_down = int(sys.argv[5])
  84. use_vulkan_compute = gpu_device != -1
  85. g_enable_cooling_down = cooling_down != 0
  86. g_loop_count = loop_count
  87. g_blob_pool_allocator.set_size_compare_ratio(0.0)
  88. g_workspace_pool_allocator.set_size_compare_ratio(0.5)
  89. if use_vulkan_compute:
  90. g_warmup_loop_count = 10
  91. g_vkdev = ncnn.get_gpu_device(gpu_device)
  92. g_blob_vkallocator = ncnn.VkBlobAllocator(g_vkdev)
  93. g_staging_vkallocator = ncnn.VkStagingAllocator(g_vkdev)
  94. opt = ncnn.Option()
  95. opt.lightmode = True
  96. opt.num_threads = num_threads
  97. opt.blob_allocator = g_blob_pool_allocator
  98. opt.workspace_allocator = g_workspace_pool_allocator
  99. if use_vulkan_compute:
  100. opt.blob_vkallocator = g_blob_vkallocator
  101. opt.workspace_vkallocator = g_blob_vkallocator
  102. opt.staging_vkallocator = g_staging_vkallocator
  103. opt.use_winograd_convolution = True
  104. opt.use_sgemm_convolution = True
  105. opt.use_int8_inference = True
  106. opt.use_vulkan_compute = use_vulkan_compute
  107. opt.use_fp16_packed = True
  108. opt.use_fp16_storage = True
  109. opt.use_fp16_arithmetic = True
  110. opt.use_int8_storage = True
  111. opt.use_int8_arithmetic = True
  112. opt.use_packing_layout = True
  113. opt.use_shader_pack8 = False
  114. opt.use_image_storage = False
  115. ncnn.set_cpu_powersave(powersave)
  116. ncnn.set_omp_dynamic(0)
  117. ncnn.set_omp_num_threads(num_threads)
  118. print("loop_count =", loop_count)
  119. print("num_threads =", num_threads)
  120. print("powersave =", ncnn.get_cpu_powersave())
  121. print("gpu_device =", gpu_device)
  122. print("cooling_down =", g_enable_cooling_down)
  123. benchmark("squeezenet", ncnn.Mat((227, 227, 3)), opt)
  124. benchmark("squeezenet_int8", ncnn.Mat((227, 227, 3)), opt)
  125. benchmark("mobilenet", ncnn.Mat((224, 224, 3)), opt)
  126. benchmark("mobilenet_int8", ncnn.Mat((224, 224, 3)), opt)
  127. benchmark("mobilenet_v2", ncnn.Mat((224, 224, 3)), opt)
  128. # benchmark("mobilenet_v2_int8", ncnn.Mat(w=224, h=224, c=3), opt)
  129. benchmark("mobilenet_v3", ncnn.Mat((224, 224, 3)), opt)
  130. benchmark("shufflenet", ncnn.Mat((224, 224, 3)), opt)
  131. benchmark("shufflenet_v2", ncnn.Mat((224, 224, 3)), opt)
  132. benchmark("mnasnet", ncnn.Mat((224, 224, 3)), opt)
  133. benchmark("proxylessnasnet", ncnn.Mat((224, 224, 3)), opt)
  134. benchmark("efficientnet_b0", ncnn.Mat((224, 224, 3)), opt)
  135. benchmark("regnety_400m", ncnn.Mat((224, 224, 3)), opt)
  136. benchmark("blazeface", ncnn.Mat((128, 128, 3)), opt)
  137. benchmark("googlenet", ncnn.Mat((224, 224, 3)), opt)
  138. benchmark("googlenet_int8", ncnn.Mat((224, 224, 3)), opt)
  139. benchmark("resnet18", ncnn.Mat((224, 224, 3)), opt)
  140. benchmark("resnet18_int8", ncnn.Mat((224, 224, 3)), opt)
  141. benchmark("alexnet", ncnn.Mat((227, 227, 3)), opt)
  142. benchmark("vgg16", ncnn.Mat((224, 224, 3)), opt)
  143. benchmark("vgg16_int8", ncnn.Mat((224, 224, 3)), opt)
  144. benchmark("resnet50", ncnn.Mat((224, 224, 3)), opt)
  145. benchmark("resnet50_int8", ncnn.Mat((224, 224, 3)), opt)
  146. benchmark("squeezenet_ssd", ncnn.Mat((300, 300, 3)), opt)
  147. benchmark("squeezenet_ssd_int8", ncnn.Mat((300, 300, 3)), opt)
  148. benchmark("mobilenet_ssd", ncnn.Mat((300, 300, 3)), opt)
  149. benchmark("mobilenet_ssd_int8", ncnn.Mat((300, 300, 3)), opt)
  150. benchmark("mobilenet_yolo", ncnn.Mat((416, 416, 3)), opt)
  151. benchmark("mobilenetv2_yolov3", ncnn.Mat((352, 352, 3)), opt)
  152. benchmark("yolov4-tiny", ncnn.Mat((416, 416, 3)), opt)