Browse Source

fix dwconv5x5s1 pack4 bf16s on aarch64

tags/20200413
nihuini 6 years ago
parent
commit
d2f7fc5a76
1 changed files with 1 additions and 1 deletions
  1. +1
    -1
      src/layer/arm/convolutiondepthwise_5x5_pack4_bf16s.h

+ 1
- 1
src/layer/arm/convolutiondepthwise_5x5_pack4_bf16s.h View File

@@ -110,6 +110,7 @@ static void convdw5x5s1_pack4_bf16s_neon(const Mat& bottom_blob, Mat& top_blob,
"fmla v24.4s, v14.4s, v16.4s \n" "fmla v24.4s, v14.4s, v16.4s \n"
"shll v18.4s, v18.4h, #16 \n" "shll v18.4s, v18.4h, #16 \n"
"fmla v25.4s, v14.4s, v17.4s \n" "fmla v25.4s, v14.4s, v17.4s \n"
"shll v19.4s, v19.4h, #16 \n"
"fmla v26.4s, v14.4s, v18.4s \n" "fmla v26.4s, v14.4s, v18.4s \n"
"prfm pldl1keep, [%3, #256] \n" "prfm pldl1keep, [%3, #256] \n"
"ld1 {v20.4h, v21.4h, v22.4h, v23.4h}, [%3] \n"// r14 r15 r16 r17 "ld1 {v20.4h, v21.4h, v22.4h, v23.4h}, [%3] \n"// r14 r15 r16 r17
@@ -119,7 +120,6 @@ static void convdw5x5s1_pack4_bf16s_neon(const Mat& bottom_blob, Mat& top_blob,


"fmla v28.4s, v15.4s, v16.4s \n" "fmla v28.4s, v15.4s, v16.4s \n"
"fmla v29.4s, v15.4s, v17.4s \n" "fmla v29.4s, v15.4s, v17.4s \n"
"shll v19.4s, v19.4h, #16 \n"
"fmla v30.4s, v15.4s, v18.4s \n" "fmla v30.4s, v15.4s, v18.4s \n"
"fmla v31.4s, v15.4s, v19.4s \n" "fmla v31.4s, v15.4s, v19.4s \n"




Loading…
Cancel
Save