From 9376ba71c1251f82994de14cee0a662677037c0b Mon Sep 17 00:00:00 2001 From: nihui Date: Sun, 5 Jun 2022 14:17:28 +0800 Subject: [PATCH] less unroll for unaryop arm, fix padding arm warning --- src/layer/arm/padding_arm.cpp | 8 ++++++++ src/layer/arm/unaryop_arm.cpp | 18 ------------------ src/layer/arm/unaryop_arm_asimdhp.cpp | 16 ---------------- 3 files changed, 8 insertions(+), 34 deletions(-) diff --git a/src/layer/arm/padding_arm.cpp b/src/layer/arm/padding_arm.cpp index 9abeec395..3a2463911 100644 --- a/src/layer/arm/padding_arm.cpp +++ b/src/layer/arm/padding_arm.cpp @@ -419,6 +419,8 @@ int Padding_arm::forward_bf16s_fp16s(const Mat& bottom_blob, Mat& top_blob, cons else #endif { + // shall never reach here + pad_value = vdup_n_u16(0); } // *INDENT-ON* // clang-format on @@ -464,6 +466,8 @@ int Padding_arm::forward_bf16s_fp16s(const Mat& bottom_blob, Mat& top_blob, cons else #endif { + // shall never reach here + pad_value = vdup_n_u16(0); } // *INDENT-ON* // clang-format on @@ -516,6 +520,8 @@ int Padding_arm::forward_bf16s_fp16s(const Mat& bottom_blob, Mat& top_blob, cons else #endif { + // shall never reach here + pad_value = vdup_n_u16(0); } // *INDENT-ON* // clang-format on @@ -574,6 +580,8 @@ int Padding_arm::forward_bf16s_fp16s(const Mat& bottom_blob, Mat& top_blob, cons else #endif { + // shall never reach here + pad_value = vdup_n_u16(0); } // *INDENT-ON* // clang-format on diff --git a/src/layer/arm/unaryop_arm.cpp b/src/layer/arm/unaryop_arm.cpp index 7223f1504..b585f0d5f 100644 --- a/src/layer/arm/unaryop_arm.cpp +++ b/src/layer/arm/unaryop_arm.cpp @@ -58,24 +58,6 @@ static int unary_op_inplace(Mat& a, const Option& opt) int i = 0; #if __ARM_NEON -#if __aarch64__ - for (; i + 15 < size; i += 16) - { - float32x4_t _p0 = vld1q_f32(ptr); - float32x4_t _p1 = vld1q_f32(ptr + 4); - float32x4_t _p2 = vld1q_f32(ptr + 8); - float32x4_t _p3 = vld1q_f32(ptr + 12); - _p0 = op.func_pack4(_p0); - _p1 = op.func_pack4(_p1); - _p2 = op.func_pack4(_p2); - _p3 = op.func_pack4(_p3); - vst1q_f32(ptr, _p0); - vst1q_f32(ptr + 4, _p1); - vst1q_f32(ptr + 8, _p2); - vst1q_f32(ptr + 12, _p3); - ptr += 16; - } -#endif // __aarch64__ for (; i + 7 < size; i += 8) { float32x4_t _p0 = vld1q_f32(ptr); diff --git a/src/layer/arm/unaryop_arm_asimdhp.cpp b/src/layer/arm/unaryop_arm_asimdhp.cpp index bbf866ff0..2cd816290 100644 --- a/src/layer/arm/unaryop_arm_asimdhp.cpp +++ b/src/layer/arm/unaryop_arm_asimdhp.cpp @@ -44,22 +44,6 @@ static int unary_op_inplace_fp16s(Mat& a, const Option& opt) __fp16* ptr = a.channel(q); int i = 0; - for (; i + 31 < size; i += 32) - { - float16x8_t _p0 = vld1q_f16(ptr); - float16x8_t _p1 = vld1q_f16(ptr + 8); - float16x8_t _p2 = vld1q_f16(ptr + 16); - float16x8_t _p3 = vld1q_f16(ptr + 24); - _p0 = op.func_pack8(_p0); - _p1 = op.func_pack8(_p1); - _p2 = op.func_pack8(_p2); - _p3 = op.func_pack8(_p3); - vst1q_f16(ptr, _p0); - vst1q_f16(ptr + 8, _p1); - vst1q_f16(ptr + 16, _p2); - vst1q_f16(ptr + 24, _p3); - ptr += 32; - } for (; i + 15 < size; i += 16) { float16x8_t _p0 = vld1q_f16(ptr);