From b4e3615ee4f601dabea323a4ea7f24c8579b0738 Mon Sep 17 00:00:00 2001 From: nihuini Date: Mon, 21 Aug 2017 16:03:52 +0800 Subject: [PATCH] depth-wise optimize --- src/layer/arm/convolutiondepthwise_arm.cpp | 29 ++++++++++++++++- src/layer/convolutiondepthwise.cpp | 38 ++++++++++++++++++++++ 2 files changed, 66 insertions(+), 1 deletion(-) diff --git a/src/layer/arm/convolutiondepthwise_arm.cpp b/src/layer/arm/convolutiondepthwise_arm.cpp index 95557d377..c36453e03 100644 --- a/src/layer/arm/convolutiondepthwise_arm.cpp +++ b/src/layer/arm/convolutiondepthwise_arm.cpp @@ -14,6 +14,10 @@ #include "convolutiondepthwise_arm.h" +#ifdef _OPENMP +#include +#endif + namespace ncnn { #include "convolution_1x1.h" @@ -126,7 +130,30 @@ int ConvolutionDepthWise_arm::forward(const Mat& bottom_blob, Mat& top_blob) con const int maxk = kernel_size * kernel_size; - // TODO optimize for channels == group == num_output + // depth-wise + if (channels == group && group == num_output) + { +#ifdef _OPENMP + int nested_current = omp_get_nested(); + omp_set_nested(0); +#endif + + #pragma omp parallel for + for (int g=0; g