You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

benchmark.py 6.3 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191
  1. # Tencent is pleased to support the open source community by making ncnn available.
  2. #
  3. # Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved.
  4. #
  5. # Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
  6. # in compliance with the License. You may obtain a copy of the License at
  7. #
  8. # https://opensource.org/licenses/BSD-3-Clause
  9. #
  10. # Unless required by applicable law or agreed to in writing, software distributed
  11. # under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
  12. # CONDITIONS OF ANY KIND, either express or implied. See the License for the
  13. # specific language governing permissions and limitations under the License.
  14. import sys
  15. import time
  16. import ncnn
  17. param_root = "../benchmark/"
  18. g_warmup_loop_count = 8
  19. g_loop_count = 4
  20. g_enable_cooling_down = True
  21. g_vkdev = None
  22. g_blob_vkallocator = None
  23. g_staging_vkallocator = None
  24. g_blob_pool_allocator = ncnn.UnlockedPoolAllocator()
  25. g_workspace_pool_allocator = ncnn.PoolAllocator()
  26. def benchmark(comment, _in, opt):
  27. _in.fill(0.01)
  28. g_blob_pool_allocator.clear()
  29. g_workspace_pool_allocator.clear()
  30. if opt.use_vulkan_compute:
  31. g_blob_vkallocator.clear()
  32. g_staging_vkallocator.clear()
  33. net = ncnn.Net()
  34. net.opt = opt
  35. if net.opt.use_vulkan_compute:
  36. net.set_vulkan_device(g_vkdev)
  37. net.load_param(param_root + comment + ".param")
  38. dr = ncnn.DataReaderFromEmpty()
  39. net.load_model(dr)
  40. if g_enable_cooling_down:
  41. time.sleep(10)
  42. # warm up
  43. for i in range(g_warmup_loop_count):
  44. ex = net.create_extractor()
  45. ex.input("data", _in)
  46. ex.extract("output")
  47. time_min = sys.float_info.max
  48. time_max = -sys.float_info.max
  49. time_avg = 0.0
  50. for i in range(g_loop_count):
  51. start = time.time()
  52. ex = net.create_extractor()
  53. ex.input("data", _in)
  54. ex.extract("output")
  55. end = time.time()
  56. timespan = end - start
  57. time_min = timespan if timespan < time_min else time_min
  58. time_max = timespan if timespan > time_max else time_max
  59. time_avg += timespan
  60. # extractor need relese manually when build ncnn with vuklan,
  61. # due to python relese ex after net, but in extractor.destruction use net
  62. ex = None
  63. time_avg /= g_loop_count
  64. print(
  65. "%20s min = %7.2f max = %7.2f avg = %7.2f"
  66. % (comment, time_min * 1000, time_max * 1000, time_avg * 1000)
  67. )
  68. if __name__ == "__main__":
  69. loop_count = 4
  70. num_threads = ncnn.get_cpu_count()
  71. powersave = 0
  72. gpu_device = -1
  73. cooling_down = 1
  74. argc = len(sys.argv)
  75. if argc >= 2:
  76. loop_count = int(sys.argv[1])
  77. if argc >= 3:
  78. num_threads = int(sys.argv[2])
  79. if argc >= 4:
  80. powersave = int(sys.argv[3])
  81. if argc >= 5:
  82. gpu_device = int(sys.argv[4])
  83. if argc >= 6:
  84. cooling_down = int(sys.argv[5])
  85. use_vulkan_compute = gpu_device != -1
  86. g_enable_cooling_down = cooling_down != 0
  87. g_loop_count = loop_count
  88. g_blob_pool_allocator.set_size_compare_ratio(0.0)
  89. g_workspace_pool_allocator.set_size_compare_ratio(0.5)
  90. if use_vulkan_compute:
  91. g_warmup_loop_count = 10
  92. g_vkdev = ncnn.get_gpu_device(gpu_device)
  93. g_blob_vkallocator = ncnn.VkBlobAllocator(g_vkdev)
  94. g_staging_vkallocator = ncnn.VkStagingAllocator(g_vkdev)
  95. opt = ncnn.Option()
  96. opt.lightmode = True
  97. opt.num_threads = num_threads
  98. opt.blob_allocator = g_blob_pool_allocator
  99. opt.workspace_allocator = g_workspace_pool_allocator
  100. if use_vulkan_compute:
  101. opt.blob_vkallocator = g_blob_vkallocator
  102. opt.workspace_vkallocator = g_blob_vkallocator
  103. opt.staging_vkallocator = g_staging_vkallocator
  104. opt.use_winograd_convolution = True
  105. opt.use_sgemm_convolution = True
  106. opt.use_int8_inference = True
  107. opt.use_vulkan_compute = use_vulkan_compute
  108. opt.use_fp16_packed = True
  109. opt.use_fp16_storage = True
  110. opt.use_fp16_arithmetic = True
  111. opt.use_int8_storage = True
  112. opt.use_int8_arithmetic = True
  113. opt.use_packing_layout = True
  114. opt.use_shader_pack8 = False
  115. opt.use_image_storage = False
  116. ncnn.set_cpu_powersave(powersave)
  117. ncnn.set_omp_dynamic(0)
  118. ncnn.set_omp_num_threads(num_threads)
  119. print("loop_count =", loop_count)
  120. print("num_threads =", num_threads)
  121. print("powersave =", ncnn.get_cpu_powersave())
  122. print("gpu_device =", gpu_device)
  123. print("cooling_down =", g_enable_cooling_down)
  124. benchmark("squeezenet", ncnn.Mat((227, 227, 3)), opt)
  125. benchmark("squeezenet_int8", ncnn.Mat((227, 227, 3)), opt)
  126. benchmark("mobilenet", ncnn.Mat((224, 224, 3)), opt)
  127. benchmark("mobilenet_int8", ncnn.Mat((224, 224, 3)), opt)
  128. benchmark("mobilenet_v2", ncnn.Mat((224, 224, 3)), opt)
  129. # benchmark("mobilenet_v2_int8", ncnn.Mat(w=224, h=224, c=3), opt)
  130. benchmark("mobilenet_v3", ncnn.Mat((224, 224, 3)), opt)
  131. benchmark("shufflenet", ncnn.Mat((224, 224, 3)), opt)
  132. benchmark("shufflenet_v2", ncnn.Mat((224, 224, 3)), opt)
  133. benchmark("mnasnet", ncnn.Mat((224, 224, 3)), opt)
  134. benchmark("proxylessnasnet", ncnn.Mat((224, 224, 3)), opt)
  135. benchmark("efficientnet_b0", ncnn.Mat((224, 224, 3)), opt)
  136. benchmark("regnety_400m", ncnn.Mat((224, 224, 3)), opt)
  137. benchmark("blazeface", ncnn.Mat((128, 128, 3)), opt)
  138. benchmark("googlenet", ncnn.Mat((224, 224, 3)), opt)
  139. benchmark("googlenet_int8", ncnn.Mat((224, 224, 3)), opt)
  140. benchmark("resnet18", ncnn.Mat((224, 224, 3)), opt)
  141. benchmark("resnet18_int8", ncnn.Mat((224, 224, 3)), opt)
  142. benchmark("alexnet", ncnn.Mat((227, 227, 3)), opt)
  143. benchmark("vgg16", ncnn.Mat((224, 224, 3)), opt)
  144. benchmark("vgg16_int8", ncnn.Mat((224, 224, 3)), opt)
  145. benchmark("resnet50", ncnn.Mat((224, 224, 3)), opt)
  146. benchmark("resnet50_int8", ncnn.Mat((224, 224, 3)), opt)
  147. benchmark("squeezenet_ssd", ncnn.Mat((300, 300, 3)), opt)
  148. benchmark("squeezenet_ssd_int8", ncnn.Mat((300, 300, 3)), opt)
  149. benchmark("mobilenet_ssd", ncnn.Mat((300, 300, 3)), opt)
  150. benchmark("mobilenet_ssd_int8", ncnn.Mat((300, 300, 3)), opt)
  151. benchmark("mobilenet_yolo", ncnn.Mat((416, 416, 3)), opt)
  152. benchmark("mobilenetv2_yolov3", ncnn.Mat((352, 352, 3)), opt)
  153. benchmark("yolov4-tiny", ncnn.Mat((416, 416, 3)), opt)