From 9280a068fef8f5b76a5b528afb7ea30edaba6f26 Mon Sep 17 00:00:00 2001 From: nihuini Date: Wed, 27 Dec 2017 19:14:34 +0800 Subject: [PATCH] unroll outch for convolution 3x3 winograd64, reduce memory usage --- src/layer/arm/convolution_3x3.h | 524 +++++++++++++++++++++++++++++++- 1 file changed, 523 insertions(+), 1 deletion(-) diff --git a/src/layer/arm/convolution_3x3.h b/src/layer/arm/convolution_3x3.h index afd938e6e..95f5b8fbd 100644 --- a/src/layer/arm/convolution_3x3.h +++ b/src/layer/arm/convolution_3x3.h @@ -683,6 +683,7 @@ static void conv3x3s1_winograd64_neon(const Mat& bottom_blob, Mat& top_blob, con } } + bottom_blob_bordered = Mat(); // END transform input // BEGIN dot @@ -692,8 +693,528 @@ static void conv3x3s1_winograd64_neon(const Mat& bottom_blob, Mat& top_blob, con int h_tm = outh / 6 * 8; top_blob_tm.create(8*8, w_tm/8 * h_tm/8, outch); + int nn_outch = outch >> 2; + int remain_outch_start = nn_outch << 2; + #pragma omp parallel for - for (int p = 0; p