diff --git a/src/layer/arm/convolution_3x3.h b/src/layer/arm/convolution_3x3.h index ac9ea9528..431d65e97 100644 --- a/src/layer/arm/convolution_3x3.h +++ b/src/layer/arm/convolution_3x3.h @@ -10923,7 +10923,14 @@ static void conv3x3s1_winograd64_neon5(const Mat& bottom_blob, Mat& top_blob, co // BEGIN transform output Mat top_blob_bordered; - top_blob_bordered.create(outw, outh, outch, 4u, opt.workspace_allocator); + if (outw == top_blob.w && outh == top_blob.h) + { + top_blob_bordered = top_blob; + } + else + { + top_blob_bordered.create(outw, outh, outch, 4u, opt.workspace_allocator); + } { // const float otm[6][8] = { // {1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 32.0f, 32.0f, 0.0f}, @@ -11699,7 +11706,8 @@ static void conv3x3s1_winograd64_neon5(const Mat& bottom_blob, Mat& top_blob, co // END transform output // cut result pad - copy_cut_border(top_blob_bordered, top_blob, 0, top_blob_bordered.h - top_blob.h, 0, top_blob_bordered.w - top_blob.w, opt.blob_allocator, opt.num_threads); + if (top_blob_bordered.w != top_blob.w || top_blob_bordered.h != top_blob.h) + copy_cut_border(top_blob_bordered, top_blob, 0, top_blob_bordered.h - top_blob.h, 0, top_blob_bordered.w - top_blob.w, opt.blob_allocator, opt.num_threads); } static void conv3x3s2_neon(const Mat& bottom_blob, Mat& top_blob, const Mat& _kernel, const Mat& _bias, const Option& opt)