diff --git a/benchmark/benchncnn.cpp b/benchmark/benchncnn.cpp index 1a31015e5..fc6d0ddc1 100644 --- a/benchmark/benchncnn.cpp +++ b/benchmark/benchncnn.cpp @@ -98,8 +98,6 @@ public: static int g_warmup_loop_count = 3; static int g_loop_count = 4; -static ncnn::Option g_default_option; - static ncnn::UnlockedPoolAllocator g_blob_pool_allocator; static ncnn::PoolAllocator g_workspace_pool_allocator; @@ -109,11 +107,11 @@ static ncnn::VkAllocator* g_blob_vkallocator = 0; static ncnn::VkAllocator* g_staging_vkallocator = 0; #endif // NCNN_VULKAN -void benchmark(const char* comment, const ncnn::Mat& in) +void benchmark(const char* comment, const ncnn::Mat& in, const ncnn::Option& opt) { ncnn::BenchNet net; - net.opt = g_default_option; + net.opt = opt; #if NCNN_VULKAN if (net.opt.use_vulkan_compute) @@ -228,24 +226,26 @@ int main(int argc, char** argv) #endif // NCNN_VULKAN // default option - g_default_option.lightmode = true; - g_default_option.num_threads = num_threads; - g_default_option.blob_allocator = &g_blob_pool_allocator; - g_default_option.workspace_allocator = &g_workspace_pool_allocator; + ncnn::Option opt; + opt.lightmode = true; + opt.num_threads = num_threads; + opt.blob_allocator = &g_blob_pool_allocator; + opt.workspace_allocator = &g_workspace_pool_allocator; #if NCNN_VULKAN - g_default_option.blob_vkallocator = g_blob_vkallocator; - g_default_option.workspace_vkallocator = g_blob_vkallocator; - g_default_option.staging_vkallocator = g_staging_vkallocator; + opt.blob_vkallocator = g_blob_vkallocator; + opt.workspace_vkallocator = g_blob_vkallocator; + opt.staging_vkallocator = g_staging_vkallocator; #endif // NCNN_VULKAN - g_default_option.use_winograd_convolution = true; - g_default_option.use_sgemm_convolution = true; - g_default_option.use_int8_inference = true; - g_default_option.use_vulkan_compute = use_vulkan_compute; - g_default_option.use_fp16_packed = true; - g_default_option.use_fp16_storage = true; - g_default_option.use_fp16_arithmetic = true; - g_default_option.use_int8_storage = true; - g_default_option.use_int8_arithmetic = true; + opt.use_winograd_convolution = true; + opt.use_sgemm_convolution = true; + opt.use_int8_inference = true; + opt.use_vulkan_compute = use_vulkan_compute; + opt.use_fp16_packed = true; + opt.use_fp16_storage = true; + opt.use_fp16_arithmetic = true; + opt.use_int8_storage = true; + opt.use_int8_arithmetic = true; + opt.use_packing_layout = true; ncnn::set_cpu_powersave(powersave); @@ -258,84 +258,116 @@ int main(int argc, char** argv) fprintf(stderr, "gpu_device = %d\n", gpu_device); // run - benchmark("squeezenet", ncnn::Mat(227, 227, 3)); + benchmark("squeezenet", ncnn::Mat(227, 227, 3), opt); #if NCNN_VULKAN if (!use_vulkan_compute) #endif // NCNN_VULKAN - benchmark("squeezenet_int8", ncnn::Mat(227, 227, 3)); + { + opt.use_packing_layout = false; + benchmark("squeezenet_int8", ncnn::Mat(227, 227, 3), opt); + opt.use_packing_layout = true; + } - benchmark("mobilenet", ncnn::Mat(224, 224, 3)); + benchmark("mobilenet", ncnn::Mat(224, 224, 3), opt); #if NCNN_VULKAN if (!use_vulkan_compute) #endif // NCNN_VULKAN - benchmark("mobilenet_int8", ncnn::Mat(224, 224, 3)); + { + opt.use_packing_layout = false; + benchmark("mobilenet_int8", ncnn::Mat(224, 224, 3), opt); + opt.use_packing_layout = true; + } - benchmark("mobilenet_v2", ncnn::Mat(224, 224, 3)); + benchmark("mobilenet_v2", ncnn::Mat(224, 224, 3), opt); // #if NCNN_VULKAN // if (!use_vulkan_compute) // #endif // NCNN_VULKAN -// benchmark("mobilenet_v2_int8", ncnn::Mat(224, 224, 3)); +// benchmark("mobilenet_v2_int8", ncnn::Mat(224, 224, 3), opt); - benchmark("mobilenet_v3", ncnn::Mat(224, 224, 3)); + benchmark("mobilenet_v3", ncnn::Mat(224, 224, 3), opt); - benchmark("shufflenet", ncnn::Mat(224, 224, 3)); + benchmark("shufflenet", ncnn::Mat(224, 224, 3), opt); - benchmark("shufflenet_v2", ncnn::Mat(224, 224, 3)); + benchmark("shufflenet_v2", ncnn::Mat(224, 224, 3), opt); - benchmark("mnasnet", ncnn::Mat(224, 224, 3)); + benchmark("mnasnet", ncnn::Mat(224, 224, 3), opt); - benchmark("proxylessnasnet", ncnn::Mat(224, 224, 3)); + benchmark("proxylessnasnet", ncnn::Mat(224, 224, 3), opt); - benchmark("googlenet", ncnn::Mat(224, 224, 3)); + benchmark("googlenet", ncnn::Mat(224, 224, 3), opt); #if NCNN_VULKAN if (!use_vulkan_compute) #endif // NCNN_VULKAN - benchmark("googlenet_int8", ncnn::Mat(224, 224, 3)); + { + opt.use_packing_layout = false; + benchmark("googlenet_int8", ncnn::Mat(224, 224, 3), opt); + opt.use_packing_layout = true; + } - benchmark("resnet18", ncnn::Mat(224, 224, 3)); + benchmark("resnet18", ncnn::Mat(224, 224, 3), opt); #if NCNN_VULKAN if (!use_vulkan_compute) #endif // NCNN_VULKAN - benchmark("resnet18_int8", ncnn::Mat(224, 224, 3)); + { + opt.use_packing_layout = false; + benchmark("resnet18_int8", ncnn::Mat(224, 224, 3), opt); + opt.use_packing_layout = true; + } - benchmark("alexnet", ncnn::Mat(227, 227, 3)); + benchmark("alexnet", ncnn::Mat(227, 227, 3), opt); - benchmark("vgg16", ncnn::Mat(224, 224, 3)); + benchmark("vgg16", ncnn::Mat(224, 224, 3), opt); #if NCNN_VULKAN if (!use_vulkan_compute) #endif // NCNN_VULKAN - benchmark("vgg16_int8", ncnn::Mat(224, 224, 3)); + { + opt.use_packing_layout = false; + benchmark("vgg16_int8", ncnn::Mat(224, 224, 3), opt); + opt.use_packing_layout = true; + } - benchmark("resnet50", ncnn::Mat(224, 224, 3)); + benchmark("resnet50", ncnn::Mat(224, 224, 3), opt); #if NCNN_VULKAN if (!use_vulkan_compute) #endif // NCNN_VULKAN - benchmark("resnet50_int8", ncnn::Mat(224, 224, 3)); + { + opt.use_packing_layout = false; + benchmark("resnet50_int8", ncnn::Mat(224, 224, 3), opt); + opt.use_packing_layout = true; + } - benchmark("squeezenet_ssd", ncnn::Mat(300, 300, 3)); + benchmark("squeezenet_ssd", ncnn::Mat(300, 300, 3), opt); #if NCNN_VULKAN if (!use_vulkan_compute) #endif // NCNN_VULKAN - benchmark("squeezenet_ssd_int8", ncnn::Mat(300, 300, 3)); + { + opt.use_packing_layout = false; + benchmark("squeezenet_ssd_int8", ncnn::Mat(300, 300, 3), opt); + opt.use_packing_layout = true; + } - benchmark("mobilenet_ssd", ncnn::Mat(300, 300, 3)); + benchmark("mobilenet_ssd", ncnn::Mat(300, 300, 3), opt); #if NCNN_VULKAN if (!use_vulkan_compute) #endif // NCNN_VULKAN - benchmark("mobilenet_ssd_int8", ncnn::Mat(300, 300, 3)); + { + opt.use_packing_layout = false; + benchmark("mobilenet_ssd_int8", ncnn::Mat(300, 300, 3), opt); + opt.use_packing_layout = true; + } - benchmark("mobilenet_yolo", ncnn::Mat(416, 416, 3)); + benchmark("mobilenet_yolo", ncnn::Mat(416, 416, 3), opt); - benchmark("mobilenetv2_yolov3", ncnn::Mat(352, 352, 3)); + benchmark("mobilenetv2_yolov3", ncnn::Mat(352, 352, 3), opt); #if NCNN_VULKAN delete g_blob_vkallocator; diff --git a/src/layer/arm/convolution_arm.cpp b/src/layer/arm/convolution_arm.cpp index 686013b8d..855c0f962 100644 --- a/src/layer/arm/convolution_arm.cpp +++ b/src/layer/arm/convolution_arm.cpp @@ -54,6 +54,7 @@ Convolution_arm::Convolution_arm() { #if __ARM_NEON support_packing = true; + use_fp32_packing_inference = false; #endif // __ARM_NEON activation = 0; @@ -102,7 +103,16 @@ int Convolution_arm::create_pipeline(const Option& opt) int num_input = weight_data_size / maxk / num_output; #if __ARM_NEON - if (opt.use_packing_layout) + bool weight_data_is_float32 = (weight_data.elemsize == (size_t)4u); + + use_fp32_packing_inference = opt.use_packing_layout && weight_data_is_float32 && !use_int8_inference; + + if (use_int8_inference) + { + support_packing = false; + } + + if (use_fp32_packing_inference) { // pack4 @@ -188,6 +198,8 @@ int Convolution_arm::create_pipeline(const Option& opt) } } } + + return 0; } // pack1to4 @@ -230,6 +242,8 @@ int Convolution_arm::create_pipeline(const Option& opt) } } } + + return 0; } // pack4to1 @@ -281,6 +295,8 @@ int Convolution_arm::create_pipeline(const Option& opt) } } } + + return 0; } } // opt.use_packing_layout @@ -525,7 +541,7 @@ int Convolution_arm::forward(const Mat& bottom_blob, Mat& top_blob, const Option // value = value + bias #if __ARM_NEON - if (opt.use_packing_layout) + if (use_fp32_packing_inference) { int w = bottom_blob.w; diff --git a/src/layer/arm/convolution_arm.h b/src/layer/arm/convolution_arm.h index 49afd35cd..093b0ec02 100644 --- a/src/layer/arm/convolution_arm.h +++ b/src/layer/arm/convolution_arm.h @@ -46,6 +46,8 @@ public: Mat weight_sgemm_data; std::vector weight_3x3_winograd23_int8_data; + bool use_fp32_packing_inference; + // pack4 Mat weight_data_pack4; Mat weight_data_pack1to4; diff --git a/src/layer/arm/convolutiondepthwise_arm.cpp b/src/layer/arm/convolutiondepthwise_arm.cpp index 159e07018..de25f645a 100644 --- a/src/layer/arm/convolutiondepthwise_arm.cpp +++ b/src/layer/arm/convolutiondepthwise_arm.cpp @@ -40,6 +40,7 @@ ConvolutionDepthWise_arm::ConvolutionDepthWise_arm() { #if __ARM_NEON support_packing = true; + use_fp32_packing_inference = false; #endif // __ARM_NEON activation = 0; @@ -89,7 +90,16 @@ int ConvolutionDepthWise_arm::create_pipeline(const Option& opt) int channels = (weight_data_size / group) / maxk / (num_output / group) * group; #if __ARM_NEON - if (opt.use_packing_layout) + bool weight_data_is_float32 = (weight_data.elemsize == (size_t)4u); + + use_fp32_packing_inference = opt.use_packing_layout && weight_data_is_float32 && !use_int8_inference; + + if (use_int8_inference) + { + support_packing = false; + } + + if (use_fp32_packing_inference) { // depth-wise @@ -100,6 +110,8 @@ int ConvolutionDepthWise_arm::create_pipeline(const Option& opt) { Mat weight_data_r2 = weight_data.reshape(maxk, group); convert_packing(weight_data_r2, weight_data_pack4, 4); + + return 0; } } @@ -298,7 +310,7 @@ int ConvolutionDepthWise_arm::forward(const Mat& bottom_blob, Mat& top_blob, con size_t out_elemsize = elemsize / elempack * out_elempack; #if __ARM_NEON - if (opt.use_packing_layout) + if (use_fp32_packing_inference) { const int maxk = kernel_w * kernel_h; diff --git a/src/layer/arm/convolutiondepthwise_arm.h b/src/layer/arm/convolutiondepthwise_arm.h index 3eff74a4f..0707b2112 100644 --- a/src/layer/arm/convolutiondepthwise_arm.h +++ b/src/layer/arm/convolutiondepthwise_arm.h @@ -33,6 +33,8 @@ public: Layer* activation; std::vector group_ops; + bool use_fp32_packing_inference; + // packing Mat weight_data_pack4; }; diff --git a/src/layer/arm/innerproduct_arm.cpp b/src/layer/arm/innerproduct_arm.cpp index 6721103d6..456ffc7e0 100644 --- a/src/layer/arm/innerproduct_arm.cpp +++ b/src/layer/arm/innerproduct_arm.cpp @@ -30,6 +30,7 @@ InnerProduct_arm::InnerProduct_arm() { #if __ARM_NEON support_packing = true; + use_fp32_packing_inference = false; #endif // __ARM_NEON flatten = 0; @@ -38,7 +39,16 @@ InnerProduct_arm::InnerProduct_arm() int InnerProduct_arm::create_pipeline(const Option& opt) { #if __ARM_NEON - if (opt.use_packing_layout) + bool weight_data_is_float32 = (weight_data.elemsize == (size_t)4u); + + use_fp32_packing_inference = opt.use_packing_layout && weight_data_is_float32 && !use_int8_inference; + + if (use_int8_inference) + { + support_packing = false; + } + + if (use_fp32_packing_inference) { { @@ -85,7 +95,7 @@ int InnerProduct_arm::forward(const Mat& bottom_blob, Mat& top_blob, const Optio int size = w * h; #if __ARM_NEON - if (opt.use_packing_layout) + if (use_fp32_packing_inference) { if (elempack == 4) diff --git a/src/layer/arm/innerproduct_arm.h b/src/layer/arm/innerproduct_arm.h index 0233ea506..dbced4604 100644 --- a/src/layer/arm/innerproduct_arm.h +++ b/src/layer/arm/innerproduct_arm.h @@ -30,6 +30,8 @@ public: virtual int forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const; public: + bool use_fp32_packing_inference; + ncnn::Layer* flatten; }; diff --git a/src/layer/convolution.cpp b/src/layer/convolution.cpp index c96f27923..b0b6d3e72 100644 --- a/src/layer/convolution.cpp +++ b/src/layer/convolution.cpp @@ -77,20 +77,17 @@ int Convolution::load_model(const ModelBin& mb) int Convolution::create_pipeline(const Option& opt) { - use_int8_inference = opt.use_int8_inference; - - if (int8_scale_term == 0) - use_int8_inference = false; - bool weight_data_is_int8 = (weight_data.elemsize == (size_t)1u); bool weight_data_is_float32 = (weight_data.elemsize == (size_t)4u); - if (weight_data_is_int8 && !use_int8_inference) + if (weight_data_is_int8 && !opt.use_int8_inference) { fprintf(stderr, "quantized int8 weight loaded but use_int8_inference disabled\n"); return -1; } + use_int8_inference = opt.use_int8_inference && (weight_data_is_int8 || (weight_data_is_float32 && int8_scale_term)); + // runtime quantize the weight data if (weight_data_is_float32 && use_int8_inference) { diff --git a/src/layer/convolutiondepthwise.cpp b/src/layer/convolutiondepthwise.cpp index 946ff83db..70cd80c8f 100644 --- a/src/layer/convolutiondepthwise.cpp +++ b/src/layer/convolutiondepthwise.cpp @@ -100,20 +100,17 @@ int ConvolutionDepthWise::load_model(const ModelBin& mb) int ConvolutionDepthWise::create_pipeline(const Option& opt) { - use_int8_inference = opt.use_int8_inference; - - if (int8_scale_term == 0) - use_int8_inference = false; - bool weight_data_is_int8 = (weight_data.elemsize == (size_t)1u); bool weight_data_is_float32 = (weight_data.elemsize == (size_t)4u); - if (weight_data_is_int8 && !use_int8_inference) + if (weight_data_is_int8 && !opt.use_int8_inference) { fprintf(stderr, "quantized int8 weight loaded but use_int8_inference disabled\n"); return -1; } + use_int8_inference = opt.use_int8_inference && (weight_data_is_int8 || (weight_data_is_float32 && int8_scale_term)); + if (weight_data_is_float32 && use_int8_inference) { // quantize weight to int8 diff --git a/src/layer/innerproduct.cpp b/src/layer/innerproduct.cpp index f4b0e97f5..b27551500 100644 --- a/src/layer/innerproduct.cpp +++ b/src/layer/innerproduct.cpp @@ -64,20 +64,17 @@ int InnerProduct::load_model(const ModelBin& mb) int InnerProduct::create_pipeline(const Option& opt) { - use_int8_inference = opt.use_int8_inference; - - if (int8_scale_term == 0) - use_int8_inference = false; - bool weight_data_is_int8 = (weight_data.elemsize == (size_t)1u); bool weight_data_is_float32 = (weight_data.elemsize == (size_t)4u); - if (weight_data_is_int8 && !use_int8_inference) + if (weight_data_is_int8 && !opt.use_int8_inference) { fprintf(stderr, "quantized int8 weight loaded but use_int8_inference disabled\n"); return -1; } + use_int8_inference = opt.use_int8_inference && (weight_data_is_int8 || (weight_data_is_float32 && int8_scale_term)); + // initial the quantize,dequantize op layer if (use_int8_inference) {