From d2f7fc5a76cfed3e033fc040429f2a2a1a58b9ef Mon Sep 17 00:00:00 2001 From: nihuini Date: Wed, 25 Mar 2020 10:47:58 +0800 Subject: [PATCH] fix dwconv5x5s1 pack4 bf16s on aarch64 --- src/layer/arm/convolutiondepthwise_5x5_pack4_bf16s.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/layer/arm/convolutiondepthwise_5x5_pack4_bf16s.h b/src/layer/arm/convolutiondepthwise_5x5_pack4_bf16s.h index ebcc23a57..f3c7d4450 100644 --- a/src/layer/arm/convolutiondepthwise_5x5_pack4_bf16s.h +++ b/src/layer/arm/convolutiondepthwise_5x5_pack4_bf16s.h @@ -110,6 +110,7 @@ static void convdw5x5s1_pack4_bf16s_neon(const Mat& bottom_blob, Mat& top_blob, "fmla v24.4s, v14.4s, v16.4s \n" "shll v18.4s, v18.4h, #16 \n" "fmla v25.4s, v14.4s, v17.4s \n" + "shll v19.4s, v19.4h, #16 \n" "fmla v26.4s, v14.4s, v18.4s \n" "prfm pldl1keep, [%3, #256] \n" "ld1 {v20.4h, v21.4h, v22.4h, v23.4h}, [%3] \n"// r14 r15 r16 r17 @@ -119,7 +120,6 @@ static void convdw5x5s1_pack4_bf16s_neon(const Mat& bottom_blob, Mat& top_blob, "fmla v28.4s, v15.4s, v16.4s \n" "fmla v29.4s, v15.4s, v17.4s \n" - "shll v19.4s, v19.4h, #16 \n" "fmla v30.4s, v15.4s, v18.4s \n" "fmla v31.4s, v15.4s, v19.4s \n"