diff --git a/docs/developer-guide/operators.md b/docs/developer-guide/operators.md index 9a49070a1..9ee694677 100644 --- a/docs/developer-guide/operators.md +++ b/docs/developer-guide/operators.md @@ -15,6 +15,7 @@ * [ConvolutionDepthWise1D](#convolutiondepthwise1d) * [ConvolutionDepthWise3D](#convolutiondepthwise3d) * [Crop](#crop) +* [CumulativeSum](#cumulativesum) * [Deconvolution](#deconvolution) * [Deconvolution1D](#deconvolution1d) * [Deconvolution3D](#deconvolution3d) @@ -449,6 +450,20 @@ y = crop(x) | 10 | ends | array | [ ] | | | 11 | axes | array | [ ] | | +# CumulativeSum + +If axis < 0, we use axis = x.dims + axis + +It implements https://pytorch.org/docs/stable/generated/torch.cumsum.html + +* one_blob_only +* support_inplace + +| param id | name | type | default | description | +| --------- | ------------- | ----- | --------- | ----------------- | +| 0 | axis | int | 0 | | + + # Deconvolution ``` x2 = deconv(x, weight, kernel, stride, dilation) + bias diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 61b495b81..080ca9247 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -160,6 +160,7 @@ ncnn_add_layer(GLU) ncnn_add_layer(Fold) ncnn_add_layer(Unfold) ncnn_add_layer(GridSample) +ncnn_add_layer(CumulativeSum) if(NCNN_VULKAN) ncnn_add_shader(${CMAKE_CURRENT_SOURCE_DIR}/convert_ycbcr.comp) diff --git a/src/layer/cumulativesum.cpp b/src/layer/cumulativesum.cpp new file mode 100644 index 000000000..7cff0f518 --- /dev/null +++ b/src/layer/cumulativesum.cpp @@ -0,0 +1,171 @@ +// Copyright (c) 2023 Xiaomi Corp. (author: Fangjun Kuang) +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this +// file except in compliance with the License. You may obtain a copy of the +// License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +// License for the specific language governing permissions and limitations under +// the License. + +#include "cumulativesum.h" + +namespace ncnn { + +CumulativeSum::CumulativeSum() +{ + one_blob_only = true; + support_inplace = true; +} + +int CumulativeSum::load_param(const ParamDict& pd) +{ + axis = pd.get(0, 0); + + return 0; +} + +int CumulativeSum::forward_inplace(Mat& bottom_top_blob, const Option& opt) const +{ + int dims = bottom_top_blob.dims; + int positive_axis = axis < 0 ? dims + axis : axis; + + if (dims == 1) + { // ignore axis + int w = bottom_top_blob.w; + + float* ptr = bottom_top_blob; + + for (int i = 1; i < w; ++i) + { + ptr[i] = ptr[i] + ptr[i - 1]; + } + + return 0; + } // if (dims == 1) + + if (dims == 2 && positive_axis == 0) + { + // sum over rows + int w = bottom_top_blob.w; + int h = bottom_top_blob.h; + + for (int i = 1; i < h; ++i) + { + const float* prev_row = bottom_top_blob.row(i - 1); + float* this_row = bottom_top_blob.row(i); + + for (int k = 0; k < w; ++k) + { + this_row[k] = this_row[k] + prev_row[k]; + } + } + + return 0; + } // if (dims == 2 && positive_axis == 0) + + if (dims == 2 && positive_axis == 1) + { + // sum over columns + int w = bottom_top_blob.w; + int h = bottom_top_blob.h; + + #pragma omp parallel for num_threads(opt.num_threads) + for (int i = 0; i < h; ++i) + { + float* ptr = bottom_top_blob.row(i); + + for (int k = 1; k < w; ++k) + { + ptr[k] = ptr[k] + ptr[k - 1]; + } + } + + return 0; + } // if (dims == 2 && positive_axis == 1) + + if (dims == 3 && positive_axis == 0) + { + // sum over channels + int w = bottom_top_blob.w; + int h = bottom_top_blob.h; + int c = bottom_top_blob.c; + + int size = w * h; + + for (int i = 1; i < c; ++i) + { + const float* prev = bottom_top_blob.channel(i - 1); + float* cur = bottom_top_blob.channel(i); + + for (int k = 0; k < size; ++k) + { + cur[k] = cur[k] + prev[k]; + } + } + + return 0; + } // if (dims == 3 && positive_axis == 0) + + if (dims == 3 && positive_axis == 1) + { + // sum over rows within each channel + + int w = bottom_top_blob.w; + int h = bottom_top_blob.h; + int c = bottom_top_blob.c; + + #pragma omp parallel for num_threads(opt.num_threads) + for (int q = 0; q < c; ++q) + { + Mat this_channel = bottom_top_blob.channel(q); + + for (int i = 1; i < h; ++i) + { + const float* prev_row = this_channel.row(i - 1); + float* this_row = this_channel.row(i); + + for (int k = 0; k < w; ++k) + { + this_row[k] = this_row[k] + prev_row[k]; + } + } + } + + return 0; + } // if (dims == 3 && positive_axis == 1) + + if (dims == 3 && positive_axis == 2) + { + // sum over columns within each channel + + int w = bottom_top_blob.w; + int h = bottom_top_blob.h; + int c = bottom_top_blob.c; + + #pragma omp parallel for num_threads(opt.num_threads) + for (int q = 0; q < c; ++q) + { + Mat this_channel = bottom_top_blob.channel(q); + + for (int i = 0; i < h; ++i) + { + float* ptr = this_channel.row(i); + for (int k = 1; k < w; ++k) + { + ptr[k] = ptr[k] + ptr[k - 1]; + } + } + } + + return 0; + } // if (dims == 3 && positive_axis == 2) + + return -100; +} + +} // namespace ncnn diff --git a/src/layer/cumulativesum.h b/src/layer/cumulativesum.h new file mode 100644 index 000000000..e50f370b7 --- /dev/null +++ b/src/layer/cumulativesum.h @@ -0,0 +1,37 @@ +// Copyright (c) 2023 Xiaomi Corp. (author: Fangjun Kuang) +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this +// file except in compliance with the License. You may obtain a copy of the +// License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +// License for the specific language governing permissions and limitations under +// the License. + +#ifndef LAYER_CUMULATIVESUM_H +#define LAYER_CUMULATIVESUM_H + +#include "layer.h" + +namespace ncnn { + +class CumulativeSum : public Layer +{ +public: + CumulativeSum(); + + virtual int load_param(const ParamDict& pd); + + virtual int forward_inplace(Mat& bottom_top_blob, const Option& opt) const; + +public: + int axis; +}; + +} // namespace ncnn + +#endif // LAYER_CUMULATIVESUM_H diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 967fbd72b..cf7c87bab 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -81,6 +81,7 @@ ncnn_add_layer_test(ConvolutionDepthWise) ncnn_add_layer_test(ConvolutionDepthWise1D) ncnn_add_layer_test(ConvolutionDepthWise3D) ncnn_add_layer_test(Crop) +ncnn_add_layer_test(CumulativeSum) ncnn_add_layer_test(Deconvolution) ncnn_add_layer_test(Deconvolution1D) ncnn_add_layer_test(Deconvolution3D) diff --git a/tests/test_cumulativesum.cpp b/tests/test_cumulativesum.cpp new file mode 100644 index 000000000..de38ab44d --- /dev/null +++ b/tests/test_cumulativesum.cpp @@ -0,0 +1,70 @@ +// Copyright (c) 2023 Xiaomi Corp. (author: Fangjun Kuang) +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#include "layer/cumulativesum.h" +#include "testutil.h" + +static int test_cumulativesum(const ncnn::Mat& a, int axis) +{ + ncnn::ParamDict pd; + pd.set(0, axis); + + std::vector weights(0); + + int ret = test_layer("CumulativeSum", pd, weights, a); + if (ret != 0) + { + fprintf(stderr, "test_cumulativesum failed a.dims=%d a=(%d %d %d) axis=%d\n", a.dims, a.w, a.h, a.c, axis); + } + + return ret; +} + +static int test_cumulativesum_1d() +{ + return 0 + || test_cumulativesum(RandomMat(6), 0) + || test_cumulativesum(RandomMat(10), 0) + || test_cumulativesum(RandomMat(10), -1) + || test_cumulativesum(RandomMat(10), -2) + || test_cumulativesum(RandomMat(101), 0); +} + +static int test_cumulativesum_2d() +{ + return 0 + || test_cumulativesum(RandomMat(6, 8), 0) + || test_cumulativesum(RandomMat(20, 103), 1) + || test_cumulativesum(RandomMat(106, 50), -1) + || test_cumulativesum(RandomMat(106, 50), -2); +} + +static int test_cumulativesum_3d() +{ + return 0 + || test_cumulativesum(RandomMat(10, 6, 8), 0) + || test_cumulativesum(RandomMat(303, 20, 103), 1) + || test_cumulativesum(RandomMat(106, 50, 99), 2) + || test_cumulativesum(RandomMat(303, 200, 103), -1) + || test_cumulativesum(RandomMat(303, 200, 103), -2) + || test_cumulativesum(RandomMat(303, 200, 103), -2); +} + +int main() +{ + SRAND(7767517); + + return 0 + || test_cumulativesum_1d() + || test_cumulativesum_2d() + || test_cumulativesum_3d(); +} diff --git a/tools/pnnx/src/CMakeLists.txt b/tools/pnnx/src/CMakeLists.txt index 7f141bb16..8e0c72ee6 100644 --- a/tools/pnnx/src/CMakeLists.txt +++ b/tools/pnnx/src/CMakeLists.txt @@ -206,6 +206,7 @@ set(pnnx_pass_level2_SRCS pass_level2/torch_clamp.cpp pass_level2/torch_clone.cpp pass_level2/torch_complex.cpp + pass_level2/torch_cumsum.cpp pass_level2/torch_dequantize.cpp pass_level2/torch_einsum.cpp pass_level2/torch_empty.cpp @@ -497,6 +498,7 @@ set(pnnx_pass_ncnn_SRCS pass_ncnn/torch_bmm.cpp pass_ncnn/torch_clamp.cpp pass_ncnn/torch_clone.cpp + pass_ncnn/torch_cumsum.cpp pass_ncnn/torch_flatten.cpp pass_ncnn/torch_logsumexp.cpp pass_ncnn/torch_matmul.cpp diff --git a/tools/pnnx/src/pass_level2/torch_cumsum.cpp b/tools/pnnx/src/pass_level2/torch_cumsum.cpp new file mode 100644 index 000000000..b669fa4d8 --- /dev/null +++ b/tools/pnnx/src/pass_level2/torch_cumsum.cpp @@ -0,0 +1,43 @@ +// Tencent is pleased to support the open source community by making ncnn available. +// +// Copyright (C) 2021 THL A29 Limited, a Tencent company. All rights reserved. +// 2023 Xiaomi Corp. (author: Fangjun Kuang) +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#include "pass_level2.h" + +namespace pnnx { + +class torch_cumsum : public GraphRewriterPass +{ +public: + const char* match_pattern_graph() const + { + return R"PNNXIR(7767517 +5 5 +pnnx.Input input_0 0 1 input +pnnx.Input input_1 0 1 dim +prim::Constant op_1 0 1 dtype value=* +aten::cumsum op_2 3 1 input dim dtype out +pnnx.Output output 1 0 out +)PNNXIR"; + } + + const char* type_str() const + { + return "torch.cumsum"; + } +}; + +REGISTER_GLOBAL_PNNX_GRAPH_REWRITER_PASS(torch_cumsum, 20) + +} // namespace pnnx diff --git a/tools/pnnx/src/pass_ncnn/torch_cumsum.cpp b/tools/pnnx/src/pass_ncnn/torch_cumsum.cpp new file mode 100644 index 000000000..5906d8c88 --- /dev/null +++ b/tools/pnnx/src/pass_ncnn/torch_cumsum.cpp @@ -0,0 +1,57 @@ +// Tencent is pleased to support the open source community by making ncnn available. +// +// Copyright (C) 2021 THL A29 Limited, a Tencent company. All rights reserved. +// 2023 Xiaomi Corp. (author: Fangjun Kuang) +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#include "pass_ncnn.h" + +namespace pnnx { + +namespace ncnn { + +class torch_cumsum : public GraphRewriterPass +{ +public: + const char* match_pattern_graph() const + { + return R"PNNXIR(7767517 +3 2 +pnnx.Input input 0 1 input +torch.cumsum op_0 1 1 input out dim=%dim +pnnx.Output output 1 0 out +)PNNXIR"; + } + + const char* type_str() const + { + return "CumulativeSum"; + } + + const char* name_str() const + { + return "cumsum"; + } + + void write(Operator* op, const std::map& captured_params) const + { + const int dim = captured_params.at("dim").i; + + op->params["0"] = dim; + } +}; + +REGISTER_GLOBAL_PNNX_NCNN_GRAPH_REWRITER_PASS(torch_cumsum, 20) + +} // namespace ncnn + +} // namespace pnnx diff --git a/tools/pnnx/tests/CMakeLists.txt b/tools/pnnx/tests/CMakeLists.txt index 43fe2974f..9d2177790 100644 --- a/tools/pnnx/tests/CMakeLists.txt +++ b/tools/pnnx/tests/CMakeLists.txt @@ -188,6 +188,7 @@ pnnx_add_test(torch_cat) pnnx_add_test(torch_chunk) pnnx_add_test(torch_clone) pnnx_add_test(torch_complex) +pnnx_add_test(torch_cumsum) pnnx_add_test(torch_einsum) pnnx_add_test(torch_eq) pnnx_add_test(torch_flatten) diff --git a/tools/pnnx/tests/ncnn/CMakeLists.txt b/tools/pnnx/tests/ncnn/CMakeLists.txt index b3aec911c..caeb42c89 100644 --- a/tools/pnnx/tests/ncnn/CMakeLists.txt +++ b/tools/pnnx/tests/ncnn/CMakeLists.txt @@ -140,6 +140,7 @@ pnnx_ncnn_add_test(torch_bmm) pnnx_ncnn_add_test(torch_cat) pnnx_ncnn_add_test(torch_chunk) pnnx_ncnn_add_test(torch_clone) +pnnx_ncnn_add_test(torch_cumsum) pnnx_ncnn_add_test(torch_einsum) pnnx_ncnn_add_test(torch_logsumexp) pnnx_ncnn_add_test(torch_matmul) diff --git a/tools/pnnx/tests/ncnn/test_torch_cumsum.py b/tools/pnnx/tests/ncnn/test_torch_cumsum.py new file mode 100644 index 000000000..27b97b7b1 --- /dev/null +++ b/tools/pnnx/tests/ncnn/test_torch_cumsum.py @@ -0,0 +1,70 @@ +# Tencent is pleased to support the open source community by making ncnn available. +# +# Copyright (C) 2021 THL A29 Limited, a Tencent company. All rights reserved. +# 2023 Xiaomi Corp. (author: Fangjun Kuang) +# +# Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +# in compliance with the License. You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software distributed +# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +# CONDITIONS OF ANY KIND, either express or implied. See the License for the +# specific language governing permissions and limitations under the License. + +import torch +import torch.nn as nn +import torch.nn.functional as F + +class Model(nn.Module): + def __init__(self): + super(Model, self).__init__() + + def forward(self, x, y, z): + # x - 3d + # y - 2d + # z - 1d + x0 = torch.cumsum(x, dim=0) + x1 = torch.cumsum(x, dim=1) + x2 = torch.cumsum(x, dim=2) + + y0 = torch.cumsum(y, dim=0) + y1 = torch.cumsum(y, dim=1) + + z0 = torch.cumsum(z, dim=0) + return x0, x1, x2, y0, y1, z0 + +def test(): + net = Model() + net.eval() + + torch.manual_seed(0) + x = torch.rand(2, 3, 16) + y = torch.rand(5, 9) + z = torch.rand(3) + + a = net(x, y, z) + + # export torchscript + mod = torch.jit.trace(net, (x, y, z)) + mod.save("test_torch_cumsum.pt") + + # torchscript to pnnx + import os + os.system("../../src/pnnx test_torch_cumsum.pt inputshape=[2,3,16],[5,9],[3]") + + # ncnn inference + import test_torch_cumsum_ncnn + b = test_torch_cumsum_ncnn.test_inference() + + for a0, b0 in zip(a, b): + if not torch.allclose(a0, b0, 1e-4, 1e-4): + return False + return True + +if __name__ == "__main__": + if test(): + exit(0) + else: + exit(1) diff --git a/tools/pnnx/tests/test_torch_cumsum.py b/tools/pnnx/tests/test_torch_cumsum.py new file mode 100644 index 000000000..5ae6e874f --- /dev/null +++ b/tools/pnnx/tests/test_torch_cumsum.py @@ -0,0 +1,70 @@ +# Tencent is pleased to support the open source community by making ncnn available. +# +# Copyright (C) 2021 THL A29 Limited, a Tencent company. All rights reserved. +# 2023 Xiaomi Corp. (author: Fangjun Kuang) +# +# Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +# in compliance with the License. You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software distributed +# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +# CONDITIONS OF ANY KIND, either express or implied. See the License for the +# specific language governing permissions and limitations under the License. + +import torch +import torch.nn as nn +import torch.nn.functional as F + +class Model(nn.Module): + def __init__(self): + super(Model, self).__init__() + + def forward(self, x, y, z): + # x - 3d + # y - 2d + # z - 1d + x0 = torch.cumsum(x, dim=0) + x1 = torch.cumsum(x, dim=1) + x2 = torch.cumsum(x, dim=2) + + y0 = torch.cumsum(y, dim=0) + y1 = torch.cumsum(y, dim=1) + + z0 = torch.cumsum(z, dim=0) + return x0, x1, x2, y0, y1, z0 + +def test(): + net = Model() + net.eval() + + torch.manual_seed(0) + x = torch.rand(2, 3, 16) + y = torch.rand(5, 9) + z = torch.rand(14) + + a = net(x, y, z) + + # export torchscript + mod = torch.jit.trace(net, (x, y, z)) + mod.save("test_torch_cumsum.pt") + + # torchscript to pnnx + import os + os.system("../src/pnnx test_torch_cumsum.pt inputshape=[2,3,16],[5,9],[14]") + + # pnnx inference + import test_torch_cumsum_pnnx + b = test_torch_cumsum_pnnx.test_inference() + + for a0, b0 in zip(a, b): + if not torch.equal(a0, b0): + return False + return True + +if __name__ == "__main__": + if test(): + exit(0) + else: + exit(1)