From 2f8b31c3b455e2d2cf754a09e69c3edeef6a0fb0 Mon Sep 17 00:00:00 2001 From: nihui Date: Wed, 16 Oct 2019 21:50:44 +0800 Subject: [PATCH] unroll outch 2 for conv3x3s1 pack1to4 --- src/layer/arm/convolution_3x3_pack1to4.h | 353 ++++++++++++++++++++++- 1 file changed, 352 insertions(+), 1 deletion(-) diff --git a/src/layer/arm/convolution_3x3_pack1to4.h b/src/layer/arm/convolution_3x3_pack1to4.h index f515cc1d3..977766d87 100644 --- a/src/layer/arm/convolution_3x3_pack1to4.h +++ b/src/layer/arm/convolution_3x3_pack1to4.h @@ -21,8 +21,359 @@ static void conv3x3s1_pack1to4_neon(const Mat& bottom_blob, Mat& top_blob, const const float* bias = _bias; + int nn_outch = 0; + int remain_outch_start = 0; + +#if __ARM_NEON && __aarch64__ + nn_outch = outch >> 1; + remain_outch_start = nn_outch << 1; + #pragma omp parallel for num_threads(opt.num_threads) - for (int p=0; p