diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 42ab68032..75ac4e56d 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -145,6 +145,7 @@ ncnn_add_layer(MultiHeadAttention) ncnn_add_layer(GELU) ncnn_add_layer(Convolution1D) ncnn_add_layer(Pooling1D) +ncnn_add_layer(ConvolutionDepthWise1D) if(NCNN_VULKAN) ncnn_add_shader(${CMAKE_CURRENT_SOURCE_DIR}/convert_ycbcr.comp) diff --git a/src/layer/convolution1d.cpp b/src/layer/convolution1d.cpp index 2c11fe913..49bc015cc 100644 --- a/src/layer/convolution1d.cpp +++ b/src/layer/convolution1d.cpp @@ -16,6 +16,8 @@ #include "layer_type.h" +#include "fused_activation.h" + namespace ncnn { Convolution1D::Convolution1D() @@ -118,40 +120,7 @@ int Convolution1D::forward(const Mat& bottom_blob, Mat& top_blob, const Option& kptr += kernel_w; } - if (activation_type == 1) - { - sum = std::max(sum, 0.f); - } - else if (activation_type == 2) - { - float slope = activation_params[0]; - sum = sum > 0.f ? sum : sum * slope; - } - else if (activation_type == 3) - { - float min = activation_params[0]; - float max = activation_params[1]; - if (sum < min) - sum = min; - if (sum > max) - sum = max; - } - else if (activation_type == 4) - { - sum = static_cast(1.f / (1.f + exp(-sum))); - } - else if (activation_type == 5) - { - const float MISH_THRESHOLD = 20; - float x = sum, y; - if (x > MISH_THRESHOLD) - y = x; - else if (x < -MISH_THRESHOLD) - y = expf(x); - else - y = logf(expf(x) + 1); - sum = static_cast(x * tanh(y)); - } + sum = activation_ss(sum, activation_type, activation_params); outptr[j] = sum; } diff --git a/src/layer/convolutiondepthwise1d.cpp b/src/layer/convolutiondepthwise1d.cpp new file mode 100644 index 000000000..66657fe88 --- /dev/null +++ b/src/layer/convolutiondepthwise1d.cpp @@ -0,0 +1,229 @@ +// Tencent is pleased to support the open source community by making ncnn available. +// +// Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#include "convolutiondepthwise1d.h" + +#include "layer_type.h" + +#include "fused_activation.h" + +namespace ncnn { + +ConvolutionDepthWise1D::ConvolutionDepthWise1D() +{ + one_blob_only = true; + support_inplace = false; +} + +int ConvolutionDepthWise1D::load_param(const ParamDict& pd) +{ + num_output = pd.get(0, 0); + kernel_w = pd.get(1, 0); + dilation_w = pd.get(2, 1); + stride_w = pd.get(3, 1); + pad_left = pd.get(4, 0); + pad_right = pd.get(15, pad_left); + pad_value = pd.get(18, 0.f); + bias_term = pd.get(5, 0); + weight_data_size = pd.get(6, 0); + group = pd.get(7, 1); + activation_type = pd.get(9, 0); + activation_params = pd.get(10, Mat()); + + if (num_output % group != 0) + { + // reject invalid group + return -100; + } + + return 0; +} + +int ConvolutionDepthWise1D::load_model(const ModelBin& mb) +{ + weight_data = mb.load(weight_data_size, 0); + if (weight_data.empty()) + return -100; + + if (bias_term) + { + bias_data = mb.load(num_output, 1); + if (bias_data.empty()) + return -100; + } + + return 0; +} + +int ConvolutionDepthWise1D::create_pipeline(const Option& opt) +{ + return 0; +} + +int ConvolutionDepthWise1D::forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const +{ + // convolv with NxN kernel + // value = value + bias + + int w = bottom_blob.w; + int h = bottom_blob.h; + size_t elemsize = bottom_blob.elemsize; + + if (h % group != 0 || num_output % group != 0) + { + // reject invalid group + return -100; + } + + // NCNN_LOGE("ConvolutionDepthWise1D input %d x %d pad = %d ksize=%d stride=%d", w, h, pad_w, kernel_w, stride_w); + + const int kernel_extent_w = dilation_w * (kernel_w - 1) + 1; + + Mat bottom_blob_bordered; + make_padding(bottom_blob, bottom_blob_bordered, opt); + if (bottom_blob_bordered.empty()) + return -100; + + w = bottom_blob_bordered.w; + h = bottom_blob_bordered.h; + + int outw = (w - kernel_extent_w) / stride_w + 1; + + // float32 + top_blob.create(outw, num_output, elemsize, opt.blob_allocator); + if (top_blob.empty()) + return -100; + + // depth-wise + if (h == group && group == num_output) + { + #pragma omp parallel for num_threads(opt.num_threads) + for (int g = 0; g < group; g++) + { + float* outptr = top_blob.row(g); + const float* kptr = (const float*)weight_data + kernel_w * g; + + for (int j = 0; j < outw; j++) + { + float sum = 0.f; + + if (bias_term) + sum = bias_data[g]; + + const float* sptr = bottom_blob_bordered.row(g) + j * stride_w; + + for (int k = 0; k < kernel_w; k++) + { + float val = *sptr; + float w = kptr[k]; + sum += val * w; + + sptr += dilation_w; + } + + outptr[j] = activation_ss(sum, activation_type, activation_params); + } + } + } + else + { + // group convolution + const int h_g = h / group; + const int num_output_g = num_output / group; + +#ifdef _WIN32 + #pragma omp parallel for num_threads(opt.num_threads) +#else // _WIN32 + #pragma omp parallel for collapse(2) num_threads(opt.num_threads) +#endif // _WIN32 + for (int g = 0; g < group; g++) + { + for (int p = 0; p < num_output_g; p++) + { + float* outptr = top_blob.row(g * num_output_g + p); + const float* weight_data_ptr = (const float*)weight_data + kernel_w * h_g * num_output_g * g; + + for (int j = 0; j < outw; j++) + { + float sum = 0.f; + + if (bias_term) + sum = bias_data[num_output_g * g + p]; + + const float* kptr = weight_data_ptr + kernel_w * h_g * p; + + // h_g + for (int q = 0; q < h_g; q++) + { + const float* sptr = bottom_blob_bordered.row(h_g * g + q) + j * stride_w; + + for (int k = 0; k < kernel_w; k++) + { + float val = *sptr; + float w = kptr[k]; + sum += val * w; + + sptr += dilation_w; + } + + kptr += kernel_w; + } + + outptr[j] = activation_ss(sum, activation_type, activation_params); + } + } + } + } + + return 0; +} + +void ConvolutionDepthWise1D::make_padding(const Mat& bottom_blob, Mat& bottom_blob_bordered, const Option& opt) const +{ + int w = bottom_blob.w; + + const int kernel_extent_w = dilation_w * (kernel_w - 1) + 1; + + bottom_blob_bordered = bottom_blob; + if (pad_left > 0 || pad_right > 0) + { + Option opt_b = opt; + opt_b.blob_allocator = opt.workspace_allocator; + copy_make_border(bottom_blob, bottom_blob_bordered, 0, 0, pad_left, pad_right, BORDER_CONSTANT, pad_value, opt_b); + } + else if (pad_left == -233 && pad_right == -233) + { + // tensorflow padding=SAME or onnx padding=SAME_UPPER + int wpad = kernel_extent_w + (w - 1) / stride_w * stride_w - w; + if (wpad > 0) + { + Option opt_b = opt; + opt_b.blob_allocator = opt.workspace_allocator; + copy_make_border(bottom_blob, bottom_blob_bordered, 0, 0, wpad / 2, wpad - wpad / 2, BORDER_CONSTANT, pad_value, opt_b); + } + } + else if (pad_left == -234 && pad_right == -234) + { + // onnx padding=SAME_LOWER + int wpad = kernel_extent_w + (w - 1) / stride_w * stride_w - w; + if (wpad > 0) + { + Option opt_b = opt; + opt_b.blob_allocator = opt.workspace_allocator; + copy_make_border(bottom_blob, bottom_blob_bordered, 0, 0, wpad - wpad / 2, wpad / 2, BORDER_CONSTANT, pad_value, opt_b); + } + } +} + +} // namespace ncnn diff --git a/src/layer/convolutiondepthwise1d.h b/src/layer/convolutiondepthwise1d.h new file mode 100644 index 000000000..3af0f951c --- /dev/null +++ b/src/layer/convolutiondepthwise1d.h @@ -0,0 +1,63 @@ +// Tencent is pleased to support the open source community by making ncnn available. +// +// Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef LAYER_CONVOLUTIONDEPTHWISE1D_H +#define LAYER_CONVOLUTIONDEPTHWISE1D_H + +#include "layer.h" + +namespace ncnn { + +class ConvolutionDepthWise1D : public Layer +{ +public: + ConvolutionDepthWise1D(); + + virtual int load_param(const ParamDict& pd); + + virtual int load_model(const ModelBin& mb); + + virtual int create_pipeline(const Option& opt); + + virtual int forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const; + +protected: + void make_padding(const Mat& bottom_blob, Mat& bottom_blob_bordered, const Option& opt) const; + +public: + // param + int num_output; + int kernel_w; + int dilation_w; + int stride_w; + int pad_left; // -233=SAME_UPPER -234=SAME_LOWER + int pad_right; + float pad_value; + int bias_term; + + int weight_data_size; + int group; + + // 0=none 1=relu 2=leakyrelu 3=clip 4=sigmoid + int activation_type; + Mat activation_params; + + // model + Mat weight_data; + Mat bias_data; +}; + +} // namespace ncnn + +#endif // LAYER_CONVOLUTIONDEPTHWISE1D_H diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index d228621a0..253e4c967 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -64,6 +64,7 @@ ncnn_add_layer_test(Concat) ncnn_add_layer_test(Convolution) ncnn_add_layer_test(Convolution1D) ncnn_add_layer_test(ConvolutionDepthWise) +ncnn_add_layer_test(ConvolutionDepthWise1D) ncnn_add_layer_test(Crop) ncnn_add_layer_test(Deconvolution) ncnn_add_layer_test(DeconvolutionDepthWise) diff --git a/tests/test_convolutiondepthwise1d.cpp b/tests/test_convolutiondepthwise1d.cpp new file mode 100644 index 000000000..57236e7d3 --- /dev/null +++ b/tests/test_convolutiondepthwise1d.cpp @@ -0,0 +1,133 @@ +// Tencent is pleased to support the open source community by making ncnn available. +// +// Copyright (C) 2019 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#include "layer/convolutiondepthwise1d.h" +#include "testutil.h" + +static int test_convolutiondepthwise1d(int w, int h, int outh, int kernel, int dilation, int stride, int pad, int bias, int group) +{ + ncnn::Mat a = RandomMat(w, h); + + ncnn::ParamDict pd; + pd.set(0, outh); // num_output + pd.set(1, kernel); // kernel_w + pd.set(2, dilation); // dilation_w + pd.set(3, stride); // stride_w + pd.set(4, pad); // pad_w + pd.set(5, bias); // bias_term + pd.set(6, outh / group * h / group * kernel * kernel * group); + pd.set(7, group); + + int activation_type = RAND() % 7; // 0 1 2 3 4 5 6 + ncnn::Mat activation_params(2); + activation_params[0] = (activation_type == 6) ? RandomFloat(0, 1) : RandomFloat(-1, 0); // alpha + activation_params[1] = RandomFloat(0, 1); // beta + pd.set(9, activation_type); + pd.set(10, activation_params); + + std::vector weights(2); + weights[0] = RandomMat(outh / group * h / group * kernel * kernel * group); + weights[1] = RandomMat(outh); + + int ret = test_layer("ConvolutionDepthWise1D", pd, weights, a); + if (ret != 0) + { + fprintf(stderr, "test_convolutiondepthwise1d failed w=%d h=%d outh=%d kernel=%d dilation=%d stride=%d pad=%d bias=%d group=%d act=%d actparams=[%f,%f]\n", w, h, outh, kernel, dilation, stride, pad, bias, group, activation_type, activation_params[0], activation_params[1]); + } + + return ret; +} + +static int test_convolutiondepthwise1d_0() +{ + static const int kdsp[16][4] = { + {1, 1, 1, 0}, + {1, 1, 2, 0}, + {2, 1, 1, 1}, + {2, 1, 2, -233}, + {3, 1, 1, 1}, + {3, 1, 2, 1}, + {3, 2, 1, 1}, + {4, 1, 1, 2}, + {4, 1, 2, -233}, + {4, 2, 1, -234}, + {5, 1, 1, -234}, + {5, 1, 2, 2}, + {5, 2, 2, 2}, + {7, 1, 1, 3}, + {7, 1, 2, 3}, + {7, 2, 1, -233}, + }; + + for (int i = 0; i < 16; i++) + { + const int k = kdsp[i][0]; + const int d = kdsp[i][1]; + const int s = kdsp[i][2]; + const int p = kdsp[i][3]; + + int ret = 0 + || test_convolutiondepthwise1d(15, 1, 1, k, d, s, p, 1, 1) + || test_convolutiondepthwise1d(15, 2, 2, k, d, s, p, 0, 1) + || test_convolutiondepthwise1d(15, 2, 2, k, d, s, p, 1, 2) + || test_convolutiondepthwise1d(15, 3, 3, k, d, s, p, 0, 3) + || test_convolutiondepthwise1d(15, 4, 2, k, d, s, p, 1, 2) + || test_convolutiondepthwise1d(15, 4, 4, k, d, s, p, 0, 4) + || test_convolutiondepthwise1d(15, 7, 7, k, d, s, p, 1, 7) + || test_convolutiondepthwise1d(15, 8, 8, k, d, s, p, 0, 2) + || test_convolutiondepthwise1d(15, 8, 8, k, d, s, p, 1, 8) + || test_convolutiondepthwise1d(15, 12, 12, k, d, s, p, 0, 4) + || test_convolutiondepthwise1d(15, 15, 15, k, d, s, p, 1, 15) + || test_convolutiondepthwise1d(15, 16, 8, k, d, s, p, 0, 2) + || test_convolutiondepthwise1d(15, 16, 16, k, d, s, p, 1, 16) + || test_convolutiondepthwise1d(18, 1, 1, k, d, s, p, 1, 1) + || test_convolutiondepthwise1d(18, 2, 2, k, d, s, p, 0, 1) + || test_convolutiondepthwise1d(18, 2, 2, k, d, s, p, 1, 2) + || test_convolutiondepthwise1d(18, 3, 3, k, d, s, p, 0, 3) + || test_convolutiondepthwise1d(18, 4, 2, k, d, s, p, 1, 2) + || test_convolutiondepthwise1d(18, 4, 4, k, d, s, p, 0, 4) + || test_convolutiondepthwise1d(18, 7, 7, k, d, s, p, 1, 7) + || test_convolutiondepthwise1d(18, 8, 8, k, d, s, p, 0, 2) + || test_convolutiondepthwise1d(18, 8, 8, k, d, s, p, 1, 8) + || test_convolutiondepthwise1d(18, 12, 12, k, d, s, p, 0, 4) + || test_convolutiondepthwise1d(18, 15, 15, k, d, s, p, 1, 15) + || test_convolutiondepthwise1d(18, 16, 8, k, d, s, p, 0, 2) + || test_convolutiondepthwise1d(18, 16, 16, k, d, s, p, 1, 16) + || test_convolutiondepthwise1d(25, 1, 1, k, d, s, p, 1, 1) + || test_convolutiondepthwise1d(25, 2, 2, k, d, s, p, 0, 1) + || test_convolutiondepthwise1d(25, 2, 2, k, d, s, p, 1, 2) + || test_convolutiondepthwise1d(25, 3, 3, k, d, s, p, 0, 3) + || test_convolutiondepthwise1d(25, 4, 2, k, d, s, p, 1, 2) + || test_convolutiondepthwise1d(25, 4, 4, k, d, s, p, 0, 4) + || test_convolutiondepthwise1d(25, 7, 7, k, d, s, p, 1, 7) + || test_convolutiondepthwise1d(25, 8, 8, k, d, s, p, 0, 2) + || test_convolutiondepthwise1d(25, 8, 8, k, d, s, p, 1, 8) + || test_convolutiondepthwise1d(25, 12, 12, k, d, s, p, 0, 4) + || test_convolutiondepthwise1d(25, 15, 15, k, d, s, p, 1, 15) + || test_convolutiondepthwise1d(25, 16, 8, k, d, s, p, 0, 2) + || test_convolutiondepthwise1d(25, 16, 16, k, d, s, p, 1, 16); + + if (ret != 0) + return -1; + } + + return 0; +} + +int main() +{ + SRAND(7767517); + + return test_convolutiondepthwise1d_0(); +}