From 9f51c2132267edcdeefd0b1a4e67424b7b2b8096 Mon Sep 17 00:00:00 2001 From: nihuini Date: Fri, 18 Aug 2017 10:11:08 +0800 Subject: [PATCH] implement depth-wise convolution --- src/CMakeLists.txt | 1 + src/layer.h | 1 + src/layer/convolutiondepthwise.cpp | 196 +++++++++++++++++++++++++++++ src/layer/convolutiondepthwise.h | 45 +++++++ tools/caffe2ncnn.cpp | 19 ++- 5 files changed, 261 insertions(+), 1 deletion(-) create mode 100644 src/layer/convolutiondepthwise.cpp create mode 100644 src/layer/convolutiondepthwise.h diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index ad0eb8504..f1a55be47 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -121,6 +121,7 @@ ncnn_add_layer(RNN OFF) ncnn_add_layer(LSTM OFF) ncnn_add_layer(BinaryOp) ncnn_add_layer(UnaryOp) +ncnn_add_layer(ConvolutionDepthWise) add_library(ncnn STATIC ${ncnn_SRCS}) diff --git a/src/layer.h b/src/layer.h index ca3e1298d..d4b2def43 100644 --- a/src/layer.h +++ b/src/layer.h @@ -132,6 +132,7 @@ enum LSTM = 39, BinaryOp = 40, UnaryOp = 41, + ConvolutionDepthWise = 42, CustomBit = (1<<8), }; diff --git a/src/layer/convolutiondepthwise.cpp b/src/layer/convolutiondepthwise.cpp new file mode 100644 index 000000000..fb2364832 --- /dev/null +++ b/src/layer/convolutiondepthwise.cpp @@ -0,0 +1,196 @@ +// Tencent is pleased to support the open source community by making ncnn available. +// +// Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#include "convolutiondepthwise.h" + +namespace ncnn { + +DEFINE_LAYER_CREATOR(ConvolutionDepthWise) + +ConvolutionDepthWise::ConvolutionDepthWise() +{ + one_blob_only = true; + support_inplace = false; +} + +ConvolutionDepthWise::~ConvolutionDepthWise() +{ +} + +#if NCNN_STDIO +#if NCNN_STRING +int ConvolutionDepthWise::load_param(FILE* paramfp) +{ + int clpr = Convolution::load_param(paramfp); + if (clpr != 0) + return clpr; + + int nscan = fscanf(paramfp, "%d", &group); + if (nscan != 1) + { + fprintf(stderr, "ConvolutionDepthWise load_param failed %d\n", nscan); + return -1; + } + + return 0; +} +#endif // NCNN_STRING +int ConvolutionDepthWise::load_param_bin(FILE* paramfp) +{ + int clpbr = Convolution::load_param_bin(paramfp); + if (clpbr != 0) + return clpbr; + + fread(&group, sizeof(int), 1, paramfp); + + return 0; +} + +#endif // NCNN_STDIO + +int ConvolutionDepthWise::load_param(const unsigned char*& mem) +{ + Convolution::load_param(mem); + + group = *(int*)(mem); + mem += 4; + + return 0; +} + +int ConvolutionDepthWise::forward(const Mat& bottom_blob, Mat& top_blob) const +{ + if (group == 1) + { + return Convolution::forward(bottom_blob, top_blob); + } + + // convolv with NxN kernel + // value = value + bias + + int w = bottom_blob.w; + int h = bottom_blob.h; + int channels = bottom_blob.c; + + if (channels % group != 0 || num_output % group != 0) + { + // reject invalid group + return -100; + } + +// fprintf(stderr, "ConvolutionDepthWise input %d x %d pad = %d ksize=%d stride=%d\n", w, h, pad, kernel_size, stride); + + const int kernel_extent = dilation * (kernel_size - 1) + 1; + + Mat bottom_blob_bordered = bottom_blob; + if (pad > 0) + { + copy_make_border(bottom_blob, bottom_blob_bordered, pad, pad, pad, pad, BORDER_CONSTANT, 0.f); + if (bottom_blob_bordered.empty()) + return -100; + + w = bottom_blob_bordered.w; + h = bottom_blob_bordered.h; + } + else if (pad == -233) + { + int wpad = kernel_extent + (w - 1) / stride * stride - w; + int hpad = kernel_extent + (h - 1) / stride * stride - h; + + copy_make_border(bottom_blob, bottom_blob_bordered, hpad / 2, hpad - hpad / 2, wpad / 2, wpad - wpad / 2, BORDER_CONSTANT, 0.f); + if (bottom_blob_bordered.empty()) + return -100; + + w = bottom_blob_bordered.w; + h = bottom_blob_bordered.h; + } + + int outw = (w - kernel_extent) / stride + 1; + int outh = (h - kernel_extent) / stride + 1; + + top_blob.create(outw, outh, num_output); + if (top_blob.empty()) + return -100; + + const int maxk = kernel_size * kernel_size; + + // kernel offsets + std::vector _space_ofs(maxk); + int* space_ofs = &_space_ofs[0]; + { + int p1 = 0; + int p2 = 0; + int gap = w * dilation - kernel_size * dilation; + for (int i = 0; i < kernel_size; i++) + { + for (int j = 0; j < kernel_size; j++) + { + space_ofs[p1] = p2; + p1++; + p2 += dilation; + } + p2 += gap; + } + } + + const int channels_g = channels / group; + const int num_output_g = num_output / group; + + #pragma omp parallel for collapse(2) + for (int g=0; g