From 640a0372d50ee56a6877d458f456fed7940f8580 Mon Sep 17 00:00:00 2001 From: nihuini Date: Wed, 24 Jul 2019 18:40:52 +0800 Subject: [PATCH] pooling pack4 arm neon --- src/layer/arm/pooling_arm.cpp | 281 +++++++++++++++++++++++++++++++++- 1 file changed, 276 insertions(+), 5 deletions(-) diff --git a/src/layer/arm/pooling_arm.cpp b/src/layer/arm/pooling_arm.cpp index 3cd2cd21d..25c716052 100644 --- a/src/layer/arm/pooling_arm.cpp +++ b/src/layer/arm/pooling_arm.cpp @@ -31,6 +31,282 @@ int Pooling_arm::forward(const Mat& bottom_blob, Mat& top_blob, const Option& op // max value in NxN window // avg value in NxN window + int w = bottom_blob.w; + int h = bottom_blob.h; + int channels = bottom_blob.c; + size_t elemsize = bottom_blob.elemsize; + int elempack = bottom_blob.elempack; + + if (opt.use_packing_layout) + { + +// fprintf(stderr, "Pooling input %d x %d pad = %d %d %d %d ksize=%d %d stride=%d %d\n", w, h, pad_left, pad_right, pad_top, pad_bottom, kernel_w, kernel_h, stride_w, stride_h); + + if (elempack == 4) + { + if (global_pooling) + { + top_blob.create(channels, elemsize, elempack, opt.blob_allocator); + if (top_blob.empty()) + return -100; + + int size = w * h; + + if (pooling_type == PoolMethod_MAX) + { + #pragma omp parallel for num_threads(opt.num_threads) + for (int q=0; q 0 || hpad > 0) + { + copy_make_border(bottom_blob, bottom_blob_bordered, hpad / 2, hpad - hpad / 2, wpad / 2, wpad - wpad / 2, BORDER_CONSTANT, pad_value, opt.workspace_allocator, opt.num_threads); + if (bottom_blob_bordered.empty()) + return -100; + } + + w = bottom_blob_bordered.w; + h = bottom_blob_bordered.h; + } + + int outw = (w - kernel_w) / stride_w + 1; + int outh = (h - kernel_h) / stride_h + 1; + + top_blob.create(outw, outh, channels, elemsize, elempack, opt.blob_allocator); + if (top_blob.empty()) + return -100; + + const int maxk = kernel_w * kernel_h; + + // kernel offsets + std::vector _space_ofs(maxk); + int* space_ofs = &_space_ofs[0]; + { + int p1 = 0; + int p2 = 0; + int gap = w - kernel_w; + for (int i = 0; i < kernel_h; i++) + { + for (int j = 0; j < kernel_w; j++) + { + space_ofs[p1] = p2; + p1++; + p2++; + } + p2 += gap; + } + } + + if (pooling_type == PoolMethod_MAX) + { + #pragma omp parallel for num_threads(opt.num_threads) + for (int q=0; q