| @@ -22,12 +22,52 @@ namespace ncnn { | |||||
| DEFINE_LAYER_CREATOR(HardSigmoid_arm) | DEFINE_LAYER_CREATOR(HardSigmoid_arm) | ||||
| HardSigmoid_arm::HardSigmoid_arm() | |||||
| { | |||||
| #if __ARM_NEON | |||||
| support_packing = true; | |||||
| #endif // __ARM_NEON | |||||
| } | |||||
| int HardSigmoid_arm::forward_inplace(Mat& bottom_top_blob, const Option& opt) const | int HardSigmoid_arm::forward_inplace(Mat& bottom_top_blob, const Option& opt) const | ||||
| { | { | ||||
| int w = bottom_top_blob.w; | int w = bottom_top_blob.w; | ||||
| int h = bottom_top_blob.h; | int h = bottom_top_blob.h; | ||||
| int channels = bottom_top_blob.c; | int channels = bottom_top_blob.c; | ||||
| int size = w * h; | int size = w * h; | ||||
| int elempack = bottom_top_blob.elempack; | |||||
| #if __ARM_NEON | |||||
| if (opt.use_packing_layout) | |||||
| { | |||||
| if (elempack == 4) | |||||
| { | |||||
| #pragma omp parallel for num_threads(opt.num_threads) | |||||
| for (int q=0; q<channels; q++) | |||||
| { | |||||
| float* ptr = bottom_top_blob.channel(q); | |||||
| float32x4_t _zero = vdupq_n_f32(0.f); | |||||
| float32x4_t _one = vdupq_n_f32(1.f); | |||||
| for (int i=0; i<size; i++) | |||||
| { | |||||
| float32x4_t _p = vld1q_f32(ptr); | |||||
| float32x4_t _ans = vdupq_n_f32(beta); | |||||
| _ans = vmlaq_n_f32(_ans, _p, alpha); | |||||
| _ans = vmaxq_f32(_ans, _zero); | |||||
| _ans = vminq_f32(_ans, _one); | |||||
| vst1q_f32(ptr, _ans); | |||||
| ptr += 4; | |||||
| } | |||||
| } | |||||
| return 0; | |||||
| } | |||||
| } // opt.use_packing_layout | |||||
| #endif // __ARM_NEON | |||||
| #pragma omp parallel for num_threads(opt.num_threads) | #pragma omp parallel for num_threads(opt.num_threads) | ||||
| for (int q=0; q<channels; q++) | for (int q=0; q<channels; q++) | ||||
| @@ -22,6 +22,8 @@ namespace ncnn { | |||||
| class HardSigmoid_arm : virtual public HardSigmoid | class HardSigmoid_arm : virtual public HardSigmoid | ||||
| { | { | ||||
| public: | public: | ||||
| HardSigmoid_arm(); | |||||
| virtual int forward_inplace(Mat& bottom_top_blob, const Option& opt) const; | virtual int forward_inplace(Mat& bottom_top_blob, const Option& opt) const; | ||||
| }; | }; | ||||
| @@ -22,12 +22,53 @@ namespace ncnn { | |||||
| DEFINE_LAYER_CREATOR(HardSwish_arm) | DEFINE_LAYER_CREATOR(HardSwish_arm) | ||||
| HardSwish_arm::HardSwish_arm() | |||||
| { | |||||
| #if __ARM_NEON | |||||
| support_packing = true; | |||||
| #endif // __ARM_NEON | |||||
| } | |||||
| int HardSwish_arm::forward_inplace(Mat& bottom_top_blob, const Option& opt) const | int HardSwish_arm::forward_inplace(Mat& bottom_top_blob, const Option& opt) const | ||||
| { | { | ||||
| int w = bottom_top_blob.w; | int w = bottom_top_blob.w; | ||||
| int h = bottom_top_blob.h; | int h = bottom_top_blob.h; | ||||
| int channels = bottom_top_blob.c; | int channels = bottom_top_blob.c; | ||||
| int size = w * h; | int size = w * h; | ||||
| int elempack = bottom_top_blob.elempack; | |||||
| #if __ARM_NEON | |||||
| if (opt.use_packing_layout) | |||||
| { | |||||
| if (elempack == 4) | |||||
| { | |||||
| #pragma omp parallel for num_threads(opt.num_threads) | |||||
| for (int q=0; q<channels; q++) | |||||
| { | |||||
| float* ptr = bottom_top_blob.channel(q); | |||||
| float32x4_t _zero = vdupq_n_f32(0.f); | |||||
| float32x4_t _one = vdupq_n_f32(1.f); | |||||
| for (int i=0; i<size; i++) | |||||
| { | |||||
| float32x4_t _p = vld1q_f32(ptr); | |||||
| float32x4_t _ans = vdupq_n_f32(beta); | |||||
| _ans = vmlaq_n_f32(_ans, _p, alpha); | |||||
| _ans = vmaxq_f32(_ans, _zero); | |||||
| _ans = vminq_f32(_ans, _one); | |||||
| _ans = vmulq_f32(_ans, _p); | |||||
| vst1q_f32(ptr, _ans); | |||||
| ptr += 4; | |||||
| } | |||||
| } | |||||
| return 0; | |||||
| } | |||||
| } // opt.use_packing_layout | |||||
| #endif // __ARM_NEON | |||||
| #pragma omp parallel for num_threads(opt.num_threads) | #pragma omp parallel for num_threads(opt.num_threads) | ||||
| for (int q=0; q<channels; q++) | for (int q=0; q<channels; q++) | ||||
| @@ -22,6 +22,8 @@ namespace ncnn { | |||||
| class HardSwish_arm : virtual public HardSwish | class HardSwish_arm : virtual public HardSwish | ||||
| { | { | ||||
| public: | public: | ||||
| HardSwish_arm(); | |||||
| virtual int forward_inplace(Mat& bottom_top_blob, const Option& opt) const; | virtual int forward_inplace(Mat& bottom_top_blob, const Option& opt) const; | ||||
| }; | }; | ||||