From 3ac6335ba30da9af5ef8232286a59cdbe3bd1bfd Mon Sep 17 00:00:00 2001 From: nihui Date: Sat, 5 Oct 2019 16:11:15 +0800 Subject: [PATCH] hardsigmoid and hardswish pack4 --- src/layer/arm/hardsigmoid_arm.cpp | 40 ++++++++++++++++++++++++++++++ src/layer/arm/hardsigmoid_arm.h | 2 ++ src/layer/arm/hardswish_arm.cpp | 41 +++++++++++++++++++++++++++++++ src/layer/arm/hardswish_arm.h | 2 ++ 4 files changed, 85 insertions(+) diff --git a/src/layer/arm/hardsigmoid_arm.cpp b/src/layer/arm/hardsigmoid_arm.cpp index 54a9e9843..6c1489b22 100644 --- a/src/layer/arm/hardsigmoid_arm.cpp +++ b/src/layer/arm/hardsigmoid_arm.cpp @@ -22,12 +22,52 @@ namespace ncnn { DEFINE_LAYER_CREATOR(HardSigmoid_arm) +HardSigmoid_arm::HardSigmoid_arm() +{ +#if __ARM_NEON + support_packing = true; +#endif // __ARM_NEON +} + int HardSigmoid_arm::forward_inplace(Mat& bottom_top_blob, const Option& opt) const { int w = bottom_top_blob.w; int h = bottom_top_blob.h; int channels = bottom_top_blob.c; int size = w * h; + int elempack = bottom_top_blob.elempack; + +#if __ARM_NEON + if (opt.use_packing_layout) + { + + if (elempack == 4) + { + #pragma omp parallel for num_threads(opt.num_threads) + for (int q=0; q