From 296e0022df6021cd404e43f74b8f8744fcb3f35d Mon Sep 17 00:00:00 2001 From: nihuini Date: Wed, 21 Aug 2019 15:00:35 +0800 Subject: [PATCH] deconvolution output adj and output shape --- src/layer/arm/convolution_arm.cpp | 40 +-- src/layer/arm/convolutiondepthwise_arm.cpp | 20 +- src/layer/arm/deconvolution_arm.cpp | 132 ++++++++- src/layer/arm/deconvolutiondepthwise_arm.cpp | 185 ++++++++++-- src/layer/convolution.cpp | 20 +- src/layer/convolutiondepthwise.cpp | 20 +- src/layer/deconvolution.cpp | 70 ++++- src/layer/deconvolution.h | 4 + src/layer/deconvolutiondepthwise.cpp | 70 ++++- src/layer/deconvolutiondepthwise.h | 4 + src/layer/vulkan/convolution_vulkan.cpp | 12 +- .../vulkan/convolutiondepthwise_vulkan.cpp | 12 +- src/layer/vulkan/deconvolution_vulkan.cpp | 166 +++++++++-- src/layer/vulkan/deconvolution_vulkan.h | 2 + .../vulkan/deconvolutiondepthwise_vulkan.cpp | 271 ++++++++++++++++-- .../vulkan/deconvolutiondepthwise_vulkan.h | 2 + tools/mxnet/mxnet2ncnn.cpp | 33 ++- tools/onnx/onnx2ncnn.cpp | 24 +- 18 files changed, 897 insertions(+), 190 deletions(-) diff --git a/src/layer/arm/convolution_arm.cpp b/src/layer/arm/convolution_arm.cpp index b3af11d7f..1c72603c7 100644 --- a/src/layer/arm/convolution_arm.cpp +++ b/src/layer/arm/convolution_arm.cpp @@ -512,11 +512,6 @@ int Convolution_arm::forward(const Mat& bottom_blob, Mat& top_blob, const Option Option opt_b = opt; opt_b.blob_allocator = opt.workspace_allocator; copy_make_border(bottom_blob, bottom_blob_bordered, pad_top, pad_bottom, pad_left, pad_right, BORDER_CONSTANT, 0.f, opt_b); - if (bottom_blob_bordered.empty()) - return -100; - - w = bottom_blob_bordered.w; - h = bottom_blob_bordered.h; } else if (pad_left == -233 && pad_right == -233 && pad_top == -233 && pad_bottom == -233) { @@ -527,12 +522,7 @@ int Convolution_arm::forward(const Mat& bottom_blob, Mat& top_blob, const Option Option opt_b = opt; opt_b.blob_allocator = opt.workspace_allocator; copy_make_border(bottom_blob, bottom_blob_bordered, hpad / 2, hpad - hpad / 2, wpad / 2, wpad - wpad / 2, BORDER_CONSTANT, 0.f, opt_b); - if (bottom_blob_bordered.empty()) - return -100; } - - w = bottom_blob_bordered.w; - h = bottom_blob_bordered.h; } else if (pad_left == -234 && pad_right == -234 && pad_top == -234 && pad_bottom == -234) { @@ -543,13 +533,13 @@ int Convolution_arm::forward(const Mat& bottom_blob, Mat& top_blob, const Option Option opt_b = opt; opt_b.blob_allocator = opt.workspace_allocator; copy_make_border(bottom_blob, bottom_blob_bordered, hpad - hpad / 2, hpad / 2, wpad - wpad / 2, wpad / 2, BORDER_CONSTANT, 0.f, opt_b); - if (bottom_blob_bordered.empty()) - return -100; } - - w = bottom_blob_bordered.w; - h = bottom_blob_bordered.h; } + if (bottom_blob_bordered.empty()) + return -100; + + w = bottom_blob_bordered.w; + h = bottom_blob_bordered.h; int outw = (w - kernel_extent_w) / stride_w + 1; int outh = (h - kernel_extent_h) / stride_h + 1; @@ -1010,11 +1000,6 @@ int Convolution_arm::forward(const Mat& bottom_blob, Mat& top_blob, const Option Option opt_b = opt; opt_b.blob_allocator = opt.workspace_allocator; copy_make_border(bottom_blob_unbordered, bottom_blob_bordered, pad_top, pad_bottom, pad_left, pad_right, BORDER_CONSTANT, 0.f, opt_b); - if (bottom_blob_bordered.empty()) - return -100; - - w = bottom_blob_bordered.w; - h = bottom_blob_bordered.h; } else if (pad_left == -233 && pad_right == -233 && pad_top == -233 && pad_bottom == -233) { @@ -1025,12 +1010,7 @@ int Convolution_arm::forward(const Mat& bottom_blob, Mat& top_blob, const Option Option opt_b = opt; opt_b.blob_allocator = opt.workspace_allocator; copy_make_border(bottom_blob_unbordered, bottom_blob_bordered, hpad / 2, hpad - hpad / 2, wpad / 2, wpad - wpad / 2, BORDER_CONSTANT, 0.f, opt_b); - if (bottom_blob_bordered.empty()) - return -100; } - - w = bottom_blob_bordered.w; - h = bottom_blob_bordered.h; } else if (pad_left == -234 && pad_right == -234 && pad_top == -234 && pad_bottom == -234) { @@ -1041,13 +1021,13 @@ int Convolution_arm::forward(const Mat& bottom_blob, Mat& top_blob, const Option Option opt_b = opt; opt_b.blob_allocator = opt.workspace_allocator; copy_make_border(bottom_blob_unbordered, bottom_blob_bordered, hpad - hpad / 2, hpad / 2, wpad - wpad / 2, wpad / 2, BORDER_CONSTANT, 0.f, opt_b); - if (bottom_blob_bordered.empty()) - return -100; } - - w = bottom_blob_bordered.w; - h = bottom_blob_bordered.h; } + if (bottom_blob_bordered.empty()) + return -100; + + w = bottom_blob_bordered.w; + h = bottom_blob_bordered.h; int outw = (w - kernel_size) / stride + 1; int outh = (h - kernel_size) / stride + 1; diff --git a/src/layer/arm/convolutiondepthwise_arm.cpp b/src/layer/arm/convolutiondepthwise_arm.cpp index 97bd7d79a..22b1b0d02 100644 --- a/src/layer/arm/convolutiondepthwise_arm.cpp +++ b/src/layer/arm/convolutiondepthwise_arm.cpp @@ -440,11 +440,6 @@ int ConvolutionDepthWise_arm::forward(const Mat& bottom_blob, Mat& top_blob, con Option opt_b = opt; opt_b.blob_allocator = opt.workspace_allocator; copy_make_border(bottom_blob_unbordered, bottom_blob_bordered, pad_top, pad_bottom, pad_left, pad_right, BORDER_CONSTANT, 0.f, opt_b); - if (bottom_blob_bordered.empty()) - return -100; - - w = bottom_blob_bordered.w; - h = bottom_blob_bordered.h; } else if (pad_left == -233 && pad_right == -233 && pad_top == -233 && pad_bottom == -233) { @@ -455,12 +450,7 @@ int ConvolutionDepthWise_arm::forward(const Mat& bottom_blob, Mat& top_blob, con Option opt_b = opt; opt_b.blob_allocator = opt.workspace_allocator; copy_make_border(bottom_blob_unbordered, bottom_blob_bordered, hpad / 2, hpad - hpad / 2, wpad / 2, wpad - wpad / 2, BORDER_CONSTANT, 0.f, opt_b); - if (bottom_blob_bordered.empty()) - return -100; } - - w = bottom_blob_bordered.w; - h = bottom_blob_bordered.h; } else if (pad_left == -234 && pad_right == -234 && pad_top == -234 && pad_bottom == -234) { @@ -471,13 +461,13 @@ int ConvolutionDepthWise_arm::forward(const Mat& bottom_blob, Mat& top_blob, con Option opt_b = opt; opt_b.blob_allocator = opt.workspace_allocator; copy_make_border(bottom_blob_unbordered, bottom_blob_bordered, hpad - hpad / 2, hpad / 2, wpad - wpad / 2, wpad / 2, BORDER_CONSTANT, 0.f, opt_b); - if (bottom_blob_bordered.empty()) - return -100; } - - w = bottom_blob_bordered.w; - h = bottom_blob_bordered.h; } + if (bottom_blob_bordered.empty()) + return -100; + + w = bottom_blob_bordered.w; + h = bottom_blob_bordered.h; int outw = (w - kernel_extent_w) / stride_w + 1; int outh = (h - kernel_extent_h) / stride_h + 1; diff --git a/src/layer/arm/deconvolution_arm.cpp b/src/layer/arm/deconvolution_arm.cpp index 6f481c6b2..80b19a001 100644 --- a/src/layer/arm/deconvolution_arm.cpp +++ b/src/layer/arm/deconvolution_arm.cpp @@ -299,19 +299,22 @@ int Deconvolution_arm::forward(const Mat& bottom_blob, Mat& top_blob, const Opti size_t out_elemsize = elemsize / elempack * out_elempack; Mat top_blob_bordered; - if (pad_left > 0 || pad_right > 0 || pad_top > 0 || pad_bottom > 0) + if (output_w == outw && output_h == outh && output_pad_right == 0 && output_pad_bottom == 0) + { + top_blob_bordered = top_blob; + top_blob_bordered.create(outw, outh, num_output / out_elempack, out_elemsize, out_elempack, opt.blob_allocator); + } + else if (pad_left > 0 || pad_right > 0 || pad_top > 0 || pad_bottom > 0 || output_pad_right > 0 || output_pad_bottom > 0 || (output_w > 0 && output_h > 0)) { top_blob_bordered.create(outw, outh, num_output / out_elempack, out_elemsize, out_elempack, opt.workspace_allocator); - if (top_blob_bordered.empty()) - return -100; } else { top_blob_bordered = top_blob; top_blob_bordered.create(outw, outh, num_output / out_elempack, out_elemsize, out_elempack, opt.blob_allocator); - if (top_blob_bordered.empty()) - return -100; } + if (top_blob_bordered.empty()) + return -100; const int maxk = kernel_w * kernel_h; @@ -629,9 +632,58 @@ int Deconvolution_arm::forward(const Mat& bottom_blob, Mat& top_blob, const Opti } } - if (pad_left > 0 || pad_right > 0 || pad_top > 0 || pad_bottom > 0) + if (output_w == outw && output_h == outh && output_pad_right == 0 && output_pad_bottom == 0) + { + top_blob = top_blob_bordered; + } + else if (pad_left > 0 || pad_right > 0 || pad_top > 0 || pad_bottom > 0) + { + if (output_pad_right > 0 || output_pad_bottom > 0) + { + Mat top_blob_unbordered; + Option opt_ub = opt; + opt_ub.blob_allocator = opt.workspace_allocator; + copy_cut_border(top_blob_bordered, top_blob_unbordered, pad_top, pad_bottom, pad_left, pad_right, opt_ub); + if (top_blob_unbordered.empty()) + return -100; + + copy_make_border(top_blob_unbordered, top_blob, 0, output_pad_bottom, 0, output_pad_right, BORDER_CONSTANT, 0.f, opt); + } + else + { + copy_cut_border(top_blob_bordered, top_blob, pad_top, pad_bottom, pad_left, pad_right, opt); + } + if (top_blob.empty()) + return -100; + + outw = top_blob.w; + outh = top_blob.h; + } + else if (output_w > 0 && output_h > 0) { - copy_cut_border(top_blob_bordered, top_blob, pad_top, pad_bottom, pad_left, pad_right, opt); + Mat top_blob_bordered_adj = top_blob_bordered; + if (output_pad_right > 0 || output_pad_bottom > 0) + { + Option opt_b = opt; + opt_b.blob_allocator = opt.workspace_allocator; + copy_make_border(top_blob_bordered, top_blob_bordered_adj, 0, output_pad_bottom, 0, output_pad_right, BORDER_CONSTANT, 0.f, opt_b); + if (top_blob_bordered_adj.empty()) + return -100; + } + + int wcut = top_blob_bordered_adj.w - output_w; + int hcut = top_blob_bordered_adj.h - output_h; + + if (pad_left == -233 || pad_right == -233 || pad_top == -233 || pad_bottom == -233) + { + // onnx padding=SAME_UPPER + copy_cut_border(top_blob_bordered_adj, top_blob, hcut / 2, hcut - hcut / 2, wcut / 2, wcut - wcut / 2, opt); + } + else if (pad_left == -234 || pad_right == -234 || pad_top == -234 || pad_bottom == -234) + { + // onnx padding=SAME_LOWER + copy_cut_border(top_blob_bordered_adj, top_blob, hcut - hcut / 2, hcut / 2, wcut - wcut / 2, wcut / 2, opt); + } if (top_blob.empty()) return -100; @@ -690,25 +742,77 @@ int Deconvolution_arm::forward(const Mat& bottom_blob, Mat& top_blob, const Opti int outh = (h - 1) * stride + kernel_size; Mat top_blob_bordered; - if (pad_left > 0 || pad_right > 0 || pad_top > 0 || pad_bottom > 0) + if (output_w == outw && output_h == outh && output_pad_right == 0 && output_pad_bottom == 0) + { + top_blob_bordered = top_blob; + top_blob_bordered.create(outw, outh, num_output, elemsize, opt.blob_allocator); + } + else if (pad_left > 0 || pad_right > 0 || pad_top > 0 || pad_bottom > 0 || output_pad_right > 0 || output_pad_bottom > 0 || (output_w > 0 && output_h > 0)) { top_blob_bordered.create(outw, outh, num_output, elemsize, opt.workspace_allocator); - if (top_blob_bordered.empty()) - return -100; } else { top_blob_bordered = top_blob; top_blob_bordered.create(outw, outh, num_output, elemsize, opt.blob_allocator); - if (top_blob_bordered.empty()) - return -100; } + if (top_blob_bordered.empty()) + return -100; deconv(bottom_blob, top_blob_bordered, weight_data, bias_data, opt); - if (pad_left > 0 || pad_right > 0 || pad_top > 0 || pad_bottom > 0) + if (output_w == outw && output_h == outh && output_pad_right == 0 && output_pad_bottom == 0) + { + top_blob = top_blob_bordered; + } + else if (pad_left > 0 || pad_right > 0 || pad_top > 0 || pad_bottom > 0) + { + if (output_pad_right > 0 || output_pad_bottom > 0) + { + Mat top_blob_unbordered; + Option opt_ub = opt; + opt_ub.blob_allocator = opt.workspace_allocator; + copy_cut_border(top_blob_bordered, top_blob_unbordered, pad_top, pad_bottom, pad_left, pad_right, opt_ub); + if (top_blob_unbordered.empty()) + return -100; + + copy_make_border(top_blob_unbordered, top_blob, 0, output_pad_bottom, 0, output_pad_right, BORDER_CONSTANT, 0.f, opt); + } + else + { + copy_cut_border(top_blob_bordered, top_blob, pad_top, pad_bottom, pad_left, pad_right, opt); + } + if (top_blob.empty()) + return -100; + + outw = top_blob.w; + outh = top_blob.h; + } + else if (output_w > 0 && output_h > 0) { - copy_cut_border(top_blob_bordered, top_blob, pad_top, pad_bottom, pad_left, pad_right, opt); + Mat top_blob_bordered_adj = top_blob_bordered; + if (output_pad_right > 0 || output_pad_bottom > 0) + { + Option opt_b = opt; + opt_b.blob_allocator = opt.workspace_allocator; + copy_make_border(top_blob_bordered, top_blob_bordered_adj, 0, output_pad_bottom, 0, output_pad_right, BORDER_CONSTANT, 0.f, opt_b); + if (top_blob_bordered_adj.empty()) + return -100; + } + + int wcut = top_blob_bordered_adj.w - output_w; + int hcut = top_blob_bordered_adj.h - output_h; + + if (pad_left == -233 || pad_right == -233 || pad_top == -233 || pad_bottom == -233) + { + // onnx padding=SAME_UPPER + copy_cut_border(top_blob_bordered_adj, top_blob, hcut / 2, hcut - hcut / 2, wcut / 2, wcut - wcut / 2, opt); + } + else if (pad_left == -234 || pad_right == -234 || pad_top == -234 || pad_bottom == -234) + { + // onnx padding=SAME_LOWER + copy_cut_border(top_blob_bordered_adj, top_blob, hcut - hcut / 2, hcut / 2, wcut - wcut / 2, wcut / 2, opt); + } if (top_blob.empty()) return -100; diff --git a/src/layer/arm/deconvolutiondepthwise_arm.cpp b/src/layer/arm/deconvolutiondepthwise_arm.cpp index cc5b67949..2bae9f13f 100644 --- a/src/layer/arm/deconvolutiondepthwise_arm.cpp +++ b/src/layer/arm/deconvolutiondepthwise_arm.cpp @@ -397,19 +397,22 @@ int DeconvolutionDepthWise_arm::forward(const Mat& bottom_blob, Mat& top_blob, c { Mat top_blob_bordered; - if (pad_left > 0 || pad_right > 0 || pad_top > 0 || pad_bottom > 0) + if (output_w == outw && output_h == outh && output_pad_right == 0 && output_pad_bottom == 0) + { + top_blob_bordered = top_blob; + top_blob_bordered.create(outw, outh, num_output / out_elempack, out_elemsize, out_elempack, opt.blob_allocator); + } + else if (pad_left > 0 || pad_right > 0 || pad_top > 0 || pad_bottom > 0 || output_pad_right > 0 || output_pad_bottom > 0 || (output_w > 0 && output_h > 0)) { top_blob_bordered.create(outw, outh, num_output / out_elempack, out_elemsize, out_elempack, opt.workspace_allocator); - if (top_blob_bordered.empty()) - return -100; } else { top_blob_bordered = top_blob; top_blob_bordered.create(outw, outh, num_output / out_elempack, out_elemsize, out_elempack, opt.blob_allocator); - if (top_blob_bordered.empty()) - return -100; } + if (top_blob_bordered.empty()) + return -100; const int maxk = kernel_w * kernel_h; @@ -507,9 +510,58 @@ int DeconvolutionDepthWise_arm::forward(const Mat& bottom_blob, Mat& top_blob, c } } - if (pad_left > 0 || pad_right > 0 || pad_top > 0 || pad_bottom > 0) + if (output_w == outw && output_h == outh && output_pad_right == 0 && output_pad_bottom == 0) { - copy_cut_border(top_blob_bordered, top_blob, pad_top, pad_bottom, pad_left, pad_right, opt); + top_blob = top_blob_bordered; + } + else if (pad_left > 0 || pad_right > 0 || pad_top > 0 || pad_bottom > 0) + { + if (output_pad_right > 0 || output_pad_bottom > 0) + { + Mat top_blob_unbordered; + Option opt_ub = opt; + opt_ub.blob_allocator = opt.workspace_allocator; + copy_cut_border(top_blob_bordered, top_blob_unbordered, pad_top, pad_bottom, pad_left, pad_right, opt_ub); + if (top_blob_unbordered.empty()) + return -100; + + copy_make_border(top_blob_unbordered, top_blob, 0, output_pad_bottom, 0, output_pad_right, BORDER_CONSTANT, 0.f, opt); + } + else + { + copy_cut_border(top_blob_bordered, top_blob, pad_top, pad_bottom, pad_left, pad_right, opt); + } + if (top_blob.empty()) + return -100; + + outw = top_blob.w; + outh = top_blob.h; + } + else if (output_w > 0 && output_h > 0) + { + Mat top_blob_bordered_adj = top_blob_bordered; + if (output_pad_right > 0 || output_pad_bottom > 0) + { + Option opt_b = opt; + opt_b.blob_allocator = opt.workspace_allocator; + copy_make_border(top_blob_bordered, top_blob_bordered_adj, 0, output_pad_bottom, 0, output_pad_right, BORDER_CONSTANT, 0.f, opt_b); + if (top_blob_bordered_adj.empty()) + return -100; + } + + int wcut = top_blob_bordered_adj.w - output_w; + int hcut = top_blob_bordered_adj.h - output_h; + + if (pad_left == -233 || pad_right == -233 || pad_top == -233 || pad_bottom == -233) + { + // onnx padding=SAME_UPPER + copy_cut_border(top_blob_bordered_adj, top_blob, hcut / 2, hcut - hcut / 2, wcut / 2, wcut - wcut / 2, opt); + } + else if (pad_left == -234 || pad_right == -234 || pad_top == -234 || pad_bottom == -234) + { + // onnx padding=SAME_LOWER + copy_cut_border(top_blob_bordered_adj, top_blob, hcut - hcut / 2, hcut / 2, wcut - wcut / 2, wcut / 2, opt); + } if (top_blob.empty()) return -100; @@ -888,9 +940,58 @@ int DeconvolutionDepthWise_arm::forward(const Mat& bottom_blob, Mat& top_blob, c top_blob_bordered = top_blob_bordered_unpacked; } - if (pad_left > 0 || pad_right > 0 || pad_top > 0 || pad_bottom > 0) + if (output_w == outw && output_h == outh && output_pad_right == 0 && output_pad_bottom == 0) { - copy_cut_border(top_blob_bordered, top_blob, pad_top, pad_bottom, pad_left, pad_right, opt); + top_blob = top_blob_bordered; + } + else if (pad_left > 0 || pad_right > 0 || pad_top > 0 || pad_bottom > 0) + { + if (output_pad_right > 0 || output_pad_bottom > 0) + { + Mat top_blob_unbordered; + Option opt_ub = opt; + opt_ub.blob_allocator = opt.workspace_allocator; + copy_cut_border(top_blob_bordered, top_blob_unbordered, pad_top, pad_bottom, pad_left, pad_right, opt_ub); + if (top_blob_unbordered.empty()) + return -100; + + copy_make_border(top_blob_unbordered, top_blob, 0, output_pad_bottom, 0, output_pad_right, BORDER_CONSTANT, 0.f, opt); + } + else + { + copy_cut_border(top_blob_bordered, top_blob, pad_top, pad_bottom, pad_left, pad_right, opt); + } + if (top_blob.empty()) + return -100; + + outw = top_blob.w; + outh = top_blob.h; + } + else if (output_w > 0 && output_h > 0) + { + Mat top_blob_bordered_adj = top_blob_bordered; + if (output_pad_right > 0 || output_pad_bottom > 0) + { + Option opt_b = opt; + opt_b.blob_allocator = opt.workspace_allocator; + copy_make_border(top_blob_bordered, top_blob_bordered_adj, 0, output_pad_bottom, 0, output_pad_right, BORDER_CONSTANT, 0.f, opt_b); + if (top_blob_bordered_adj.empty()) + return -100; + } + + int wcut = top_blob_bordered_adj.w - output_w; + int hcut = top_blob_bordered_adj.h - output_h; + + if (pad_left == -233 || pad_right == -233 || pad_top == -233 || pad_bottom == -233) + { + // onnx padding=SAME_UPPER + copy_cut_border(top_blob_bordered_adj, top_blob, hcut / 2, hcut - hcut / 2, wcut / 2, wcut - wcut / 2, opt); + } + else if (pad_left == -234 || pad_right == -234 || pad_top == -234 || pad_bottom == -234) + { + // onnx padding=SAME_LOWER + copy_cut_border(top_blob_bordered_adj, top_blob, hcut - hcut / 2, hcut / 2, wcut - wcut / 2, wcut / 2, opt); + } if (top_blob.empty()) return -100; @@ -908,19 +1009,22 @@ int DeconvolutionDepthWise_arm::forward(const Mat& bottom_blob, Mat& top_blob, c #endif // __ARM_NEON Mat top_blob_bordered; - if (pad_left > 0 || pad_right > 0 || pad_top > 0 || pad_bottom > 0) + if (output_w == outw && output_h == outh && output_pad_right == 0 && output_pad_bottom == 0) + { + top_blob_bordered = top_blob; + top_blob_bordered.create(outw, outh, num_output, elemsize, opt.blob_allocator); + } + else if (pad_left > 0 || pad_right > 0 || pad_top > 0 || pad_bottom > 0 || output_pad_right > 0 || output_pad_bottom > 0 || (output_w > 0 && output_h > 0)) { top_blob_bordered.create(outw, outh, num_output, elemsize, opt.workspace_allocator); - if (top_blob_bordered.empty()) - return -100; } else { top_blob_bordered = top_blob; top_blob_bordered.create(outw, outh, num_output, elemsize, opt.blob_allocator); - if (top_blob_bordered.empty()) - return -100; } + if (top_blob_bordered.empty()) + return -100; // depth-wise if (channels == group && group == num_output) @@ -961,9 +1065,58 @@ int DeconvolutionDepthWise_arm::forward(const Mat& bottom_blob, Mat& top_blob, c } } - if (pad_left > 0 || pad_right > 0 || pad_top > 0 || pad_bottom > 0) + if (output_w == outw && output_h == outh && output_pad_right == 0 && output_pad_bottom == 0) + { + top_blob = top_blob_bordered; + } + else if (pad_left > 0 || pad_right > 0 || pad_top > 0 || pad_bottom > 0) + { + if (output_pad_right > 0 || output_pad_bottom > 0) + { + Mat top_blob_unbordered; + Option opt_ub = opt; + opt_ub.blob_allocator = opt.workspace_allocator; + copy_cut_border(top_blob_bordered, top_blob_unbordered, pad_top, pad_bottom, pad_left, pad_right, opt_ub); + if (top_blob_unbordered.empty()) + return -100; + + copy_make_border(top_blob_unbordered, top_blob, 0, output_pad_bottom, 0, output_pad_right, BORDER_CONSTANT, 0.f, opt); + } + else + { + copy_cut_border(top_blob_bordered, top_blob, pad_top, pad_bottom, pad_left, pad_right, opt); + } + if (top_blob.empty()) + return -100; + + outw = top_blob.w; + outh = top_blob.h; + } + else if (output_w > 0 && output_h > 0) { - copy_cut_border(top_blob_bordered, top_blob, pad_top, pad_bottom, pad_left, pad_right, opt); + Mat top_blob_bordered_adj = top_blob_bordered; + if (output_pad_right > 0 || output_pad_bottom > 0) + { + Option opt_b = opt; + opt_b.blob_allocator = opt.workspace_allocator; + copy_make_border(top_blob_bordered, top_blob_bordered_adj, 0, output_pad_bottom, 0, output_pad_right, BORDER_CONSTANT, 0.f, opt_b); + if (top_blob_bordered_adj.empty()) + return -100; + } + + int wcut = top_blob_bordered_adj.w - output_w; + int hcut = top_blob_bordered_adj.h - output_h; + + if (pad_left == -233 || pad_right == -233 || pad_top == -233 || pad_bottom == -233) + { + // onnx padding=SAME_UPPER + copy_cut_border(top_blob_bordered_adj, top_blob, hcut / 2, hcut - hcut / 2, wcut / 2, wcut - wcut / 2, opt); + } + else if (pad_left == -234 || pad_right == -234 || pad_top == -234 || pad_bottom == -234) + { + // onnx padding=SAME_LOWER + copy_cut_border(top_blob_bordered_adj, top_blob, hcut - hcut / 2, hcut / 2, wcut - wcut / 2, wcut / 2, opt); + } if (top_blob.empty()) return -100; diff --git a/src/layer/convolution.cpp b/src/layer/convolution.cpp index ec153bae3..e89ef48ff 100644 --- a/src/layer/convolution.cpp +++ b/src/layer/convolution.cpp @@ -336,11 +336,6 @@ int Convolution::forward(const Mat& bottom_blob, Mat& top_blob, const Option& op Option opt_b = opt; opt_b.blob_allocator = opt.workspace_allocator; copy_make_border(bottom_blob_unbordered, bottom_blob_bordered, pad_top, pad_bottom, pad_left, pad_right, BORDER_CONSTANT, 0.f, opt_b); - if (bottom_blob_bordered.empty()) - return -100; - - w = bottom_blob_bordered.w; - h = bottom_blob_bordered.h; } else if (pad_left == -233 && pad_right == -233 && pad_top == -233 && pad_bottom == -233) { @@ -352,12 +347,7 @@ int Convolution::forward(const Mat& bottom_blob, Mat& top_blob, const Option& op Option opt_b = opt; opt_b.blob_allocator = opt.workspace_allocator; copy_make_border(bottom_blob_unbordered, bottom_blob_bordered, hpad / 2, hpad - hpad / 2, wpad / 2, wpad - wpad / 2, BORDER_CONSTANT, 0.f, opt_b); - if (bottom_blob_bordered.empty()) - return -100; } - - w = bottom_blob_bordered.w; - h = bottom_blob_bordered.h; } else if (pad_left == -234 && pad_right == -234 && pad_top == -234 && pad_bottom == -234) { @@ -369,13 +359,13 @@ int Convolution::forward(const Mat& bottom_blob, Mat& top_blob, const Option& op Option opt_b = opt; opt_b.blob_allocator = opt.workspace_allocator; copy_make_border(bottom_blob_unbordered, bottom_blob_bordered, hpad - hpad / 2, hpad / 2, wpad - wpad / 2, wpad / 2, BORDER_CONSTANT, 0.f, opt_b); - if (bottom_blob_bordered.empty()) - return -100; } - - w = bottom_blob_bordered.w; - h = bottom_blob_bordered.h; } + if (bottom_blob_bordered.empty()) + return -100; + + w = bottom_blob_bordered.w; + h = bottom_blob_bordered.h; int outw = (w - kernel_extent_w) / stride_w + 1; int outh = (h - kernel_extent_h) / stride_h + 1; diff --git a/src/layer/convolutiondepthwise.cpp b/src/layer/convolutiondepthwise.cpp index 4fe27a2a1..d993d6a36 100644 --- a/src/layer/convolutiondepthwise.cpp +++ b/src/layer/convolutiondepthwise.cpp @@ -329,11 +329,6 @@ int ConvolutionDepthWise::forward(const Mat& bottom_blob, Mat& top_blob, const O Option opt_b = opt; opt_b.blob_allocator = opt.workspace_allocator; copy_make_border(bottom_blob_unbordered, bottom_blob_bordered, pad_top, pad_bottom, pad_left, pad_right, BORDER_CONSTANT, 0.f, opt_b); - if (bottom_blob_bordered.empty()) - return -100; - - w = bottom_blob_bordered.w; - h = bottom_blob_bordered.h; } else if (pad_left == -233 && pad_right == -233 && pad_top == -233 && pad_bottom == -233) { @@ -345,12 +340,7 @@ int ConvolutionDepthWise::forward(const Mat& bottom_blob, Mat& top_blob, const O Option opt_b = opt; opt_b.blob_allocator = opt.workspace_allocator; copy_make_border(bottom_blob_unbordered, bottom_blob_bordered, hpad / 2, hpad - hpad / 2, wpad / 2, wpad - wpad / 2, BORDER_CONSTANT, 0.f, opt_b); - if (bottom_blob_bordered.empty()) - return -100; } - - w = bottom_blob_bordered.w; - h = bottom_blob_bordered.h; } else if (pad_left == -234 && pad_right == -234 && pad_top == -234 && pad_bottom == -234) { @@ -362,13 +352,13 @@ int ConvolutionDepthWise::forward(const Mat& bottom_blob, Mat& top_blob, const O Option opt_b = opt; opt_b.blob_allocator = opt.workspace_allocator; copy_make_border(bottom_blob_unbordered, bottom_blob_bordered, hpad - hpad / 2, hpad / 2, wpad - wpad / 2, wpad / 2, BORDER_CONSTANT, 0.f, opt_b); - if (bottom_blob_bordered.empty()) - return -100; } - - w = bottom_blob_bordered.w; - h = bottom_blob_bordered.h; } + if (bottom_blob_bordered.empty()) + return -100; + + w = bottom_blob_bordered.w; + h = bottom_blob_bordered.h; int outw = (w - kernel_extent_w) / stride_w + 1; int outh = (h - kernel_extent_h) / stride_h + 1; diff --git a/src/layer/deconvolution.cpp b/src/layer/deconvolution.cpp index cdbe2651a..adf5db585 100644 --- a/src/layer/deconvolution.cpp +++ b/src/layer/deconvolution.cpp @@ -39,6 +39,10 @@ int Deconvolution::load_param(const ParamDict& pd) pad_right = pd.get(15, pad_left); pad_top = pd.get(14, pad_left); pad_bottom = pd.get(16, pad_top); + output_pad_right = pd.get(18, 0); + output_pad_bottom = pd.get(19, output_pad_right); + output_w = pd.get(20, 0); + output_h = pd.get(21, output_w); bias_term = pd.get(5, 0); weight_data_size = pd.get(6, 0); activation_type = pd.get(9, 0); @@ -82,19 +86,22 @@ int Deconvolution::forward(const Mat& bottom_blob, Mat& top_blob, const Option& int outh = (h - 1) * stride_h + kernel_extent_h; Mat top_blob_bordered; - if (pad_left > 0 || pad_right > 0 || pad_top > 0 || pad_bottom > 0) + if (output_w == outw && output_h == outh && output_pad_right == 0 && output_pad_bottom == 0) + { + top_blob_bordered = top_blob; + top_blob_bordered.create(outw, outh, num_output, elemsize, opt.blob_allocator); + } + else if (pad_left > 0 || pad_right > 0 || pad_top > 0 || pad_bottom > 0 || output_pad_right > 0 || output_pad_bottom > 0 || (output_w > 0 && output_h > 0)) { top_blob_bordered.create(outw, outh, num_output, elemsize, opt.workspace_allocator); - if (top_blob_bordered.empty()) - return -100; } else { top_blob_bordered = top_blob; top_blob_bordered.create(outw, outh, num_output, elemsize, opt.blob_allocator); - if (top_blob_bordered.empty()) - return -100; } + if (top_blob_bordered.empty()) + return -100; const int maxk = kernel_w * kernel_h; @@ -200,9 +207,58 @@ int Deconvolution::forward(const Mat& bottom_blob, Mat& top_blob, const Option& } } - if (pad_left > 0 || pad_right > 0 || pad_top > 0 || pad_bottom > 0) + if (output_w == outw && output_h == outh && output_pad_right == 0 && output_pad_bottom == 0) { - copy_cut_border(top_blob_bordered, top_blob, pad_top, pad_bottom, pad_left, pad_right, opt); + top_blob = top_blob_bordered; + } + else if (pad_left > 0 || pad_right > 0 || pad_top > 0 || pad_bottom > 0) + { + if (output_pad_right > 0 || output_pad_bottom > 0) + { + Mat top_blob_unbordered; + Option opt_ub = opt; + opt_ub.blob_allocator = opt.workspace_allocator; + copy_cut_border(top_blob_bordered, top_blob_unbordered, pad_top, pad_bottom, pad_left, pad_right, opt_ub); + if (top_blob_unbordered.empty()) + return -100; + + copy_make_border(top_blob_unbordered, top_blob, 0, output_pad_bottom, 0, output_pad_right, BORDER_CONSTANT, 0.f, opt); + } + else + { + copy_cut_border(top_blob_bordered, top_blob, pad_top, pad_bottom, pad_left, pad_right, opt); + } + if (top_blob.empty()) + return -100; + + outw = top_blob.w; + outh = top_blob.h; + } + else if (output_w > 0 && output_h > 0) + { + Mat top_blob_bordered_adj = top_blob_bordered; + if (output_pad_right > 0 || output_pad_bottom > 0) + { + Option opt_b = opt; + opt_b.blob_allocator = opt.workspace_allocator; + copy_make_border(top_blob_bordered, top_blob_bordered_adj, 0, output_pad_bottom, 0, output_pad_right, BORDER_CONSTANT, 0.f, opt_b); + if (top_blob_bordered_adj.empty()) + return -100; + } + + int wcut = top_blob_bordered_adj.w - output_w; + int hcut = top_blob_bordered_adj.h - output_h; + + if (pad_left == -233 || pad_right == -233 || pad_top == -233 || pad_bottom == -233) + { + // onnx padding=SAME_UPPER + copy_cut_border(top_blob_bordered_adj, top_blob, hcut / 2, hcut - hcut / 2, wcut / 2, wcut - wcut / 2, opt); + } + else if (pad_left == -234 || pad_right == -234 || pad_top == -234 || pad_bottom == -234) + { + // onnx padding=SAME_LOWER + copy_cut_border(top_blob_bordered_adj, top_blob, hcut - hcut / 2, hcut / 2, wcut - wcut / 2, wcut / 2, opt); + } if (top_blob.empty()) return -100; diff --git a/src/layer/deconvolution.h b/src/layer/deconvolution.h index 75b2039c7..fb67307b8 100644 --- a/src/layer/deconvolution.h +++ b/src/layer/deconvolution.h @@ -43,6 +43,10 @@ public: int pad_right; int pad_top; int pad_bottom; + int output_pad_right; + int output_pad_bottom; + int output_w; + int output_h; int bias_term; int weight_data_size; diff --git a/src/layer/deconvolutiondepthwise.cpp b/src/layer/deconvolutiondepthwise.cpp index cfd86eaf4..095881982 100644 --- a/src/layer/deconvolutiondepthwise.cpp +++ b/src/layer/deconvolutiondepthwise.cpp @@ -39,6 +39,10 @@ int DeconvolutionDepthWise::load_param(const ParamDict& pd) pad_right = pd.get(15, pad_left); pad_top = pd.get(14, pad_left); pad_bottom = pd.get(16, pad_top); + output_pad_right = pd.get(18, 0); + output_pad_bottom = pd.get(19, output_pad_right); + output_w = pd.get(20, 0); + output_h = pd.get(21, output_w); bias_term = pd.get(5, 0); weight_data_size = pd.get(6, 0); group = pd.get(7, 1); @@ -87,19 +91,22 @@ int DeconvolutionDepthWise::forward(const Mat& bottom_blob, Mat& top_blob, const int outh = (h - 1) * stride_h + kernel_extent_h; Mat top_blob_bordered; - if (pad_left > 0 || pad_right > 0 || pad_top > 0 || pad_bottom > 0) + if (output_w == outw && output_h == outh && output_pad_right == 0 && output_pad_bottom == 0) + { + top_blob_bordered = top_blob; + top_blob_bordered.create(outw, outh, num_output, elemsize, opt.blob_allocator); + } + else if (pad_left > 0 || pad_right > 0 || pad_top > 0 || pad_bottom > 0 || output_pad_right > 0 || output_pad_bottom > 0 || (output_w > 0 && output_h > 0)) { top_blob_bordered.create(outw, outh, num_output, elemsize, opt.workspace_allocator); - if (top_blob_bordered.empty()) - return -100; } else { top_blob_bordered = top_blob; top_blob_bordered.create(outw, outh, num_output, elemsize, opt.blob_allocator); - if (top_blob_bordered.empty()) - return -100; } + if (top_blob_bordered.empty()) + return -100; const int maxk = kernel_w * kernel_h; @@ -296,9 +303,58 @@ int DeconvolutionDepthWise::forward(const Mat& bottom_blob, Mat& top_blob, const } } - if (pad_left > 0 || pad_right > 0 || pad_top > 0 || pad_bottom > 0) + if (output_w == outw && output_h == outh && output_pad_right == 0 && output_pad_bottom == 0) { - copy_cut_border(top_blob_bordered, top_blob, pad_top, pad_bottom, pad_left, pad_right, opt); + top_blob = top_blob_bordered; + } + else if (pad_left > 0 || pad_right > 0 || pad_top > 0 || pad_bottom > 0) + { + if (output_pad_right > 0 || output_pad_bottom > 0) + { + Mat top_blob_unbordered; + Option opt_ub = opt; + opt_ub.blob_allocator = opt.workspace_allocator; + copy_cut_border(top_blob_bordered, top_blob_unbordered, pad_top, pad_bottom, pad_left, pad_right, opt_ub); + if (top_blob_unbordered.empty()) + return -100; + + copy_make_border(top_blob_unbordered, top_blob, 0, output_pad_bottom, 0, output_pad_right, BORDER_CONSTANT, 0.f, opt); + } + else + { + copy_cut_border(top_blob_bordered, top_blob, pad_top, pad_bottom, pad_left, pad_right, opt); + } + if (top_blob.empty()) + return -100; + + outw = top_blob.w; + outh = top_blob.h; + } + else if (output_w > 0 && output_h > 0) + { + Mat top_blob_bordered_adj = top_blob_bordered; + if (output_pad_right > 0 || output_pad_bottom > 0) + { + Option opt_b = opt; + opt_b.blob_allocator = opt.workspace_allocator; + copy_make_border(top_blob_bordered, top_blob_bordered_adj, 0, output_pad_bottom, 0, output_pad_right, BORDER_CONSTANT, 0.f, opt_b); + if (top_blob_bordered_adj.empty()) + return -100; + } + + int wcut = top_blob_bordered_adj.w - output_w; + int hcut = top_blob_bordered_adj.h - output_h; + + if (pad_left == -233 || pad_right == -233 || pad_top == -233 || pad_bottom == -233) + { + // onnx padding=SAME_UPPER + copy_cut_border(top_blob_bordered_adj, top_blob, hcut / 2, hcut - hcut / 2, wcut / 2, wcut - wcut / 2, opt); + } + else if (pad_left == -234 || pad_right == -234 || pad_top == -234 || pad_bottom == -234) + { + // onnx padding=SAME_LOWER + copy_cut_border(top_blob_bordered_adj, top_blob, hcut - hcut / 2, hcut / 2, wcut - wcut / 2, wcut / 2, opt); + } if (top_blob.empty()) return -100; diff --git a/src/layer/deconvolutiondepthwise.h b/src/layer/deconvolutiondepthwise.h index 609d05345..71a59a640 100644 --- a/src/layer/deconvolutiondepthwise.h +++ b/src/layer/deconvolutiondepthwise.h @@ -43,6 +43,10 @@ public: int pad_right; int pad_top; int pad_bottom; + int output_pad_right; + int output_pad_bottom; + int output_w; + int output_h; int bias_term; int weight_data_size; diff --git a/src/layer/vulkan/convolution_vulkan.cpp b/src/layer/vulkan/convolution_vulkan.cpp index ec3617290..53fae99fa 100644 --- a/src/layer/vulkan/convolution_vulkan.cpp +++ b/src/layer/vulkan/convolution_vulkan.cpp @@ -868,9 +868,6 @@ int Convolution_vulkan::forward(const VkMat& bottom_blob, VkMat& top_blob, VkCom opt_pad.blob_vkallocator = opt.workspace_vkallocator; padding->forward(bottom_blob, bottom_blob_bordered, cmd, opt_pad); - - w = bottom_blob_bordered.w; - h = bottom_blob_bordered.h; } else if (pad_left == -233 && pad_right == -233 && pad_top == -233 && pad_bottom == -233) { @@ -898,9 +895,6 @@ int Convolution_vulkan::forward(const VkMat& bottom_blob, VkMat& top_blob, VkCom padding->forward(padding_inputs, padding_outputs, cmd, opt_pad); bottom_blob_bordered = padding_outputs[0]; } - - w = bottom_blob_bordered.w; - h = bottom_blob_bordered.h; } else if (pad_left == -234 && pad_right == -234 && pad_top == -234 && pad_bottom == -234) { @@ -928,11 +922,11 @@ int Convolution_vulkan::forward(const VkMat& bottom_blob, VkMat& top_blob, VkCom padding->forward(padding_inputs, padding_outputs, cmd, opt_pad); bottom_blob_bordered = padding_outputs[0]; } - - w = bottom_blob_bordered.w; - h = bottom_blob_bordered.h; } + w = bottom_blob_bordered.w; + h = bottom_blob_bordered.h; + int outw = (w - kernel_extent_w) / stride_w + 1; int outh = (h - kernel_extent_h) / stride_h + 1; int out_elempack = num_output % 4 == 0 ? 4 : 1; diff --git a/src/layer/vulkan/convolutiondepthwise_vulkan.cpp b/src/layer/vulkan/convolutiondepthwise_vulkan.cpp index 51af7de02..baa93ca63 100644 --- a/src/layer/vulkan/convolutiondepthwise_vulkan.cpp +++ b/src/layer/vulkan/convolutiondepthwise_vulkan.cpp @@ -473,9 +473,6 @@ int ConvolutionDepthWise_vulkan::forward(const VkMat& bottom_blob, VkMat& top_bl opt_pad.blob_vkallocator = opt.workspace_vkallocator; padding->forward(bottom_blob, bottom_blob_bordered, cmd, opt_pad); - - w = bottom_blob_bordered.w; - h = bottom_blob_bordered.h; } else if (pad_left == -233 && pad_right == -233 && pad_top == -233 && pad_bottom == -233) { @@ -503,9 +500,6 @@ int ConvolutionDepthWise_vulkan::forward(const VkMat& bottom_blob, VkMat& top_bl padding->forward(padding_inputs, padding_outputs, cmd, opt_pad); bottom_blob_bordered = padding_outputs[0]; } - - w = bottom_blob_bordered.w; - h = bottom_blob_bordered.h; } else if (pad_left == -234 && pad_right == -234 && pad_top == -234 && pad_bottom == -234) { @@ -533,11 +527,11 @@ int ConvolutionDepthWise_vulkan::forward(const VkMat& bottom_blob, VkMat& top_bl padding->forward(padding_inputs, padding_outputs, cmd, opt_pad); bottom_blob_bordered = padding_outputs[0]; } - - w = bottom_blob_bordered.w; - h = bottom_blob_bordered.h; } + w = bottom_blob_bordered.w; + h = bottom_blob_bordered.h; + int outw = (w - kernel_extent_w) / stride_w + 1; int outh = (h - kernel_extent_h) / stride_h + 1; int out_elempack = num_output % 4 == 0 ? 4 : 1; diff --git a/src/layer/vulkan/deconvolution_vulkan.cpp b/src/layer/vulkan/deconvolution_vulkan.cpp index eae711b53..b341bf5cd 100644 --- a/src/layer/vulkan/deconvolution_vulkan.cpp +++ b/src/layer/vulkan/deconvolution_vulkan.cpp @@ -25,6 +25,9 @@ Deconvolution_vulkan::Deconvolution_vulkan() support_vulkan = true; crop = 0; + output_pad = 0; + output_crop = 0; + pipeline_deconvolution = 0; pipeline_deconvolution_pack4 = 0; pipeline_deconvolution_pack1to4 = 0; @@ -47,6 +50,37 @@ int Deconvolution_vulkan::create_pipeline(const Option& opt) crop->create_pipeline(opt); } + { + output_pad = ncnn::create_layer(ncnn::LayerType::Padding); + output_pad->vkdev = vkdev; + + ncnn::ParamDict pd; + pd.set(0, 0); + pd.set(1, output_pad_bottom); + pd.set(2, 0); + pd.set(3, output_pad_right); + pd.set(4, 0); + pd.set(5, 0.f); + + output_pad->load_param(pd); + + output_pad->create_pipeline(opt); + } + + { + output_crop = ncnn::create_layer(ncnn::LayerType::Crop); + output_crop->vkdev = vkdev; + + ncnn::ParamDict pd; + pd.set(0, -233); + pd.set(1, -233); + pd.set(2, -233); + + output_crop->load_param(pd); + + output_crop->create_pipeline(opt); + } + const int maxk = kernel_w * kernel_h; int num_input = weight_data_size / maxk / num_output; @@ -106,6 +140,20 @@ int Deconvolution_vulkan::destroy_pipeline(const Option& opt) crop = 0; } + if (output_pad) + { + output_pad->destroy_pipeline(opt); + delete output_pad; + output_pad = 0; + } + + if (output_crop) + { + output_crop->destroy_pipeline(opt); + delete output_crop; + output_crop = 0; + } + delete pipeline_deconvolution; pipeline_deconvolution = 0; @@ -351,18 +399,20 @@ int Deconvolution_vulkan::forward(const VkMat& bottom_blob, VkMat& top_blob, VkC } VkMat top_blob_bordered; - if (pad_left > 0 || pad_right > 0 || pad_top > 0 || pad_bottom > 0) + if (output_w == outw && output_h == outh && output_pad_right == 0 && output_pad_bottom == 0) + { + top_blob_bordered.create(outw, outh, num_output / out_elempack, out_elemsize, out_elempack, opt.blob_vkallocator, opt.staging_vkallocator); + } + else if (pad_left > 0 || pad_right > 0 || pad_top > 0 || pad_bottom > 0 || output_pad_right > 0 || output_pad_bottom > 0 || (output_w > 0 && output_h > 0)) { top_blob_bordered.create(outw, outh, num_output / out_elempack, out_elemsize, out_elempack, opt.workspace_vkallocator, opt.staging_vkallocator); - if (top_blob_bordered.empty()) - return -100; } else { top_blob_bordered.create(outw, outh, num_output / out_elempack, out_elemsize, out_elempack, opt.blob_vkallocator, opt.staging_vkallocator); - if (top_blob_bordered.empty()) - return -100; } + if (top_blob_bordered.empty()) + return -100; std::vector bindings(4); bindings[0] = bottom_blob; @@ -420,19 +470,103 @@ int Deconvolution_vulkan::forward(const VkMat& bottom_blob, VkMat& top_blob, VkC cmd.record_pipeline(pipeline, bindings, constants, top_blob_bordered); - if (pad_left > 0 || pad_right > 0 || pad_top > 0 || pad_bottom > 0) + if (output_w == outw && output_h == outh && output_pad_right == 0 && output_pad_bottom == 0) { - VkMat reference_blob; - reference_blob.dims = 2; - reference_blob.w = top_blob_bordered.w - pad_left - pad_right; - reference_blob.h = top_blob_bordered.h - pad_top - pad_bottom; + top_blob = top_blob_bordered; + } + else if (pad_left > 0 || pad_right > 0 || pad_top > 0 || pad_bottom > 0) + { + if (output_pad_right > 0 || output_pad_bottom > 0) + { + VkMat top_blob_unbordered; + { + ncnn::Option opt_ub = opt; + opt_ub.blob_vkallocator = opt.workspace_vkallocator; + + VkMat reference_blob; + reference_blob.dims = 2; + reference_blob.w = top_blob_bordered.w - pad_left - pad_right; + reference_blob.h = top_blob_bordered.h - pad_top - pad_bottom; + + std::vector crop_bottom_blobs(2); + crop_bottom_blobs[0] = top_blob_bordered; + crop_bottom_blobs[1] = reference_blob; + std::vector crop_top_blobs(1); + crop->forward(crop_bottom_blobs, crop_top_blobs, cmd, opt_ub); + top_blob_unbordered = crop_top_blobs[0]; + } + + output_pad->forward(top_blob_unbordered, top_blob, cmd, opt); + } + else + { + VkMat reference_blob; + reference_blob.dims = 2; + reference_blob.w = top_blob_bordered.w - pad_left - pad_right; + reference_blob.h = top_blob_bordered.h - pad_top - pad_bottom; + + std::vector crop_bottom_blobs(2); + crop_bottom_blobs[0] = top_blob_bordered; + crop_bottom_blobs[1] = reference_blob; + std::vector crop_top_blobs(1); + crop->forward(crop_bottom_blobs, crop_top_blobs, cmd, opt); + top_blob = crop_top_blobs[0]; + } + if (top_blob.empty()) + return -100; - std::vector crop_bottom_blobs(2); - crop_bottom_blobs[0] = top_blob_bordered; - crop_bottom_blobs[1] = reference_blob; - std::vector crop_top_blobs(1); - crop->forward(crop_bottom_blobs, crop_top_blobs, cmd, opt); - top_blob = crop_top_blobs[0]; + outw = top_blob.w; + outh = top_blob.h; + } + else if (output_w > 0 && output_h > 0) + { + VkMat top_blob_bordered_adj = top_blob_bordered; + if (output_pad_right > 0 || output_pad_bottom > 0) + { + ncnn::Option opt_pad = opt; + opt_pad.blob_vkallocator = opt.workspace_vkallocator; + output_pad->forward(top_blob_bordered, top_blob_bordered_adj, cmd, opt_pad); + if (top_blob_bordered_adj.empty()) + return -100; + } + + int wcut = top_blob_bordered_adj.w - output_w; + int hcut = top_blob_bordered_adj.h - output_h; + + VkMat crop_param_blob(4, (size_t)4u, 1, opt.staging_vkallocator, opt.staging_vkallocator); + crop_param_blob.prepare_staging_buffer(); + int* crop_params = crop_param_blob.mapped(); + + if (pad_left == -233 || pad_right == -233 || pad_top == -233 || pad_bottom == -233) + { + // onnx padding=SAME_UPPER + crop_params[0] = wcut / 2; + crop_params[1] = hcut / 2; + crop_params[2] = 0; + crop_params[3] = top_blob_bordered_adj.w - wcut; + crop_params[4] = top_blob_bordered_adj.h - hcut; + crop_params[5] = top_blob_bordered_adj.c; + } + else if (pad_left == -234 || pad_right == -234 || pad_top == -234 || pad_bottom == -234) + { + // onnx padding=SAME_LOWER + crop_params[0] = wcut - wcut / 2; + crop_params[1] = hcut - hcut / 2; + crop_params[2] = 0; + crop_params[3] = top_blob_bordered_adj.w - wcut; + crop_params[4] = top_blob_bordered_adj.h - hcut; + crop_params[5] = top_blob_bordered_adj.c; + } + + std::vector crop_inputs(2); + crop_inputs[0] = top_blob_bordered_adj; + crop_inputs[1] = crop_param_blob; + + std::vector crop_outputs(1); + output_crop->forward(crop_inputs, crop_outputs, cmd, opt); + top_blob = crop_outputs[0]; + if (top_blob.empty()) + return -100; outw = top_blob.w; outh = top_blob.h; diff --git a/src/layer/vulkan/deconvolution_vulkan.h b/src/layer/vulkan/deconvolution_vulkan.h index 9b7d30f36..b7bfc3c3d 100644 --- a/src/layer/vulkan/deconvolution_vulkan.h +++ b/src/layer/vulkan/deconvolution_vulkan.h @@ -36,6 +36,8 @@ public: VkMat bias_data_gpu; ncnn::Layer* crop; + ncnn::Layer* output_pad; + ncnn::Layer* output_crop; Pipeline* pipeline_deconvolution; diff --git a/src/layer/vulkan/deconvolutiondepthwise_vulkan.cpp b/src/layer/vulkan/deconvolutiondepthwise_vulkan.cpp index d0ead874b..852859b6a 100644 --- a/src/layer/vulkan/deconvolutiondepthwise_vulkan.cpp +++ b/src/layer/vulkan/deconvolutiondepthwise_vulkan.cpp @@ -25,6 +25,8 @@ DeconvolutionDepthWise_vulkan::DeconvolutionDepthWise_vulkan() support_vulkan = true; crop = 0; + output_pad = 0; + output_crop = 0; packing_pack1 = 0; packing_pack4 = 0; @@ -53,6 +55,37 @@ int DeconvolutionDepthWise_vulkan::create_pipeline(const Option& opt) crop->create_pipeline(opt); } + { + output_pad = ncnn::create_layer(ncnn::LayerType::Padding); + output_pad->vkdev = vkdev; + + ncnn::ParamDict pd; + pd.set(0, 0); + pd.set(1, output_pad_bottom); + pd.set(2, 0); + pd.set(3, output_pad_right); + pd.set(4, 0); + pd.set(5, 0.f); + + output_pad->load_param(pd); + + output_pad->create_pipeline(opt); + } + + { + output_crop = ncnn::create_layer(ncnn::LayerType::Crop); + output_crop->vkdev = vkdev; + + ncnn::ParamDict pd; + pd.set(0, -233); + pd.set(1, -233); + pd.set(2, -233); + + output_crop->load_param(pd); + + output_crop->create_pipeline(opt); + } + std::vector specializations(11); specializations[0].i = kernel_w; specializations[1].i = kernel_h; @@ -165,6 +198,20 @@ int DeconvolutionDepthWise_vulkan::destroy_pipeline(const Option& opt) crop = 0; } + if (output_pad) + { + output_pad->destroy_pipeline(opt); + delete output_pad; + output_pad = 0; + } + + if (output_crop) + { + output_crop->destroy_pipeline(opt); + delete output_crop; + output_crop = 0; + } + if (packing_pack1) { packing_pack1->destroy_pipeline(opt); @@ -495,18 +542,20 @@ int DeconvolutionDepthWise_vulkan::forward(const VkMat& bottom_blob, VkMat& top_ } VkMat top_blob_bordered; - if (pad_left > 0 || pad_right > 0 || pad_top > 0 || pad_bottom > 0) + if (output_w == outw && output_h == outh && output_pad_right == 0 && output_pad_bottom == 0) + { + top_blob_bordered.create(outw, outh, num_output / out_elempack, out_elemsize, out_elempack, opt.blob_vkallocator, opt.staging_vkallocator); + } + else if (pad_left > 0 || pad_right > 0 || pad_top > 0 || pad_bottom > 0 || output_pad_right > 0 || output_pad_bottom > 0 || (output_w > 0 && output_h > 0)) { top_blob_bordered.create(outw, outh, num_output / out_elempack, out_elemsize, out_elempack, opt.workspace_vkallocator, opt.staging_vkallocator); - if (top_blob_bordered.empty()) - return -100; } else { top_blob_bordered.create(outw, outh, num_output / out_elempack, out_elemsize, out_elempack, opt.blob_vkallocator, opt.staging_vkallocator); - if (top_blob_bordered.empty()) - return -100; } + if (top_blob_bordered.empty()) + return -100; // depth-wise if (channels == group / elempack && group / elempack == num_output / elempack) @@ -534,19 +583,103 @@ int DeconvolutionDepthWise_vulkan::forward(const VkMat& bottom_blob, VkMat& top_ // record cmd.record_pipeline(pipeline, bindings, constants, top_blob_bordered); - if (pad_left > 0 || pad_right > 0 || pad_top > 0 || pad_bottom > 0) + if (output_w == outw && output_h == outh && output_pad_right == 0 && output_pad_bottom == 0) { - VkMat reference_blob; - reference_blob.dims = 2; - reference_blob.w = top_blob_bordered.w - pad_left - pad_right; - reference_blob.h = top_blob_bordered.h - pad_top - pad_bottom; + top_blob = top_blob_bordered; + } + else if (pad_left > 0 || pad_right > 0 || pad_top > 0 || pad_bottom > 0) + { + if (output_pad_right > 0 || output_pad_bottom > 0) + { + VkMat top_blob_unbordered; + { + ncnn::Option opt_ub = opt; + opt_ub.blob_vkallocator = opt.workspace_vkallocator; + + VkMat reference_blob; + reference_blob.dims = 2; + reference_blob.w = top_blob_bordered.w - pad_left - pad_right; + reference_blob.h = top_blob_bordered.h - pad_top - pad_bottom; + + std::vector crop_bottom_blobs(2); + crop_bottom_blobs[0] = top_blob_bordered; + crop_bottom_blobs[1] = reference_blob; + std::vector crop_top_blobs(1); + crop->forward(crop_bottom_blobs, crop_top_blobs, cmd, opt_ub); + top_blob_unbordered = crop_top_blobs[0]; + } - std::vector crop_bottom_blobs(2); - crop_bottom_blobs[0] = top_blob_bordered; - crop_bottom_blobs[1] = reference_blob; - std::vector crop_top_blobs(1); - crop->forward(crop_bottom_blobs, crop_top_blobs, cmd, opt); - top_blob = crop_top_blobs[0]; + output_pad->forward(top_blob_unbordered, top_blob, cmd, opt); + } + else + { + VkMat reference_blob; + reference_blob.dims = 2; + reference_blob.w = top_blob_bordered.w - pad_left - pad_right; + reference_blob.h = top_blob_bordered.h - pad_top - pad_bottom; + + std::vector crop_bottom_blobs(2); + crop_bottom_blobs[0] = top_blob_bordered; + crop_bottom_blobs[1] = reference_blob; + std::vector crop_top_blobs(1); + crop->forward(crop_bottom_blobs, crop_top_blobs, cmd, opt); + top_blob = crop_top_blobs[0]; + } + if (top_blob.empty()) + return -100; + + outw = top_blob.w; + outh = top_blob.h; + } + else if (output_w > 0 && output_h > 0) + { + VkMat top_blob_bordered_adj = top_blob_bordered; + if (output_pad_right > 0 || output_pad_bottom > 0) + { + ncnn::Option opt_pad = opt; + opt_pad.blob_vkallocator = opt.workspace_vkallocator; + output_pad->forward(top_blob_bordered, top_blob_bordered_adj, cmd, opt_pad); + if (top_blob_bordered_adj.empty()) + return -100; + } + + int wcut = top_blob_bordered_adj.w - output_w; + int hcut = top_blob_bordered_adj.h - output_h; + + VkMat crop_param_blob(4, (size_t)4u, 1, opt.staging_vkallocator, opt.staging_vkallocator); + crop_param_blob.prepare_staging_buffer(); + int* crop_params = crop_param_blob.mapped(); + + if (pad_left == -233 || pad_right == -233 || pad_top == -233 || pad_bottom == -233) + { + // onnx padding=SAME_UPPER + crop_params[0] = wcut / 2; + crop_params[1] = hcut / 2; + crop_params[2] = 0; + crop_params[3] = top_blob_bordered_adj.w - wcut; + crop_params[4] = top_blob_bordered_adj.h - hcut; + crop_params[5] = top_blob_bordered_adj.c; + } + else if (pad_left == -234 || pad_right == -234 || pad_top == -234 || pad_bottom == -234) + { + // onnx padding=SAME_LOWER + crop_params[0] = wcut - wcut / 2; + crop_params[1] = hcut - hcut / 2; + crop_params[2] = 0; + crop_params[3] = top_blob_bordered_adj.w - wcut; + crop_params[4] = top_blob_bordered_adj.h - hcut; + crop_params[5] = top_blob_bordered_adj.c; + } + + std::vector crop_inputs(2); + crop_inputs[0] = top_blob_bordered_adj; + crop_inputs[1] = crop_param_blob; + + std::vector crop_outputs(1); + output_crop->forward(crop_inputs, crop_outputs, cmd, opt); + top_blob = crop_outputs[0]; + if (top_blob.empty()) + return -100; outw = top_blob.w; outh = top_blob.h; @@ -646,19 +779,103 @@ int DeconvolutionDepthWise_vulkan::forward(const VkMat& bottom_blob, VkMat& top_ top_blob_bordered = top_blob_unpacked; } - if (pad_left > 0 || pad_right > 0 || pad_top > 0 || pad_bottom > 0) + if (output_w == outw && output_h == outh && output_pad_right == 0 && output_pad_bottom == 0) + { + top_blob = top_blob_bordered; + } + else if (pad_left > 0 || pad_right > 0 || pad_top > 0 || pad_bottom > 0) + { + if (output_pad_right > 0 || output_pad_bottom > 0) + { + VkMat top_blob_unbordered; + { + ncnn::Option opt_ub = opt; + opt_ub.blob_vkallocator = opt.workspace_vkallocator; + + VkMat reference_blob; + reference_blob.dims = 2; + reference_blob.w = top_blob_bordered.w - pad_left - pad_right; + reference_blob.h = top_blob_bordered.h - pad_top - pad_bottom; + + std::vector crop_bottom_blobs(2); + crop_bottom_blobs[0] = top_blob_bordered; + crop_bottom_blobs[1] = reference_blob; + std::vector crop_top_blobs(1); + crop->forward(crop_bottom_blobs, crop_top_blobs, cmd, opt_ub); + top_blob_unbordered = crop_top_blobs[0]; + } + + output_pad->forward(top_blob_unbordered, top_blob, cmd, opt); + } + else + { + VkMat reference_blob; + reference_blob.dims = 2; + reference_blob.w = top_blob_bordered.w - pad_left - pad_right; + reference_blob.h = top_blob_bordered.h - pad_top - pad_bottom; + + std::vector crop_bottom_blobs(2); + crop_bottom_blobs[0] = top_blob_bordered; + crop_bottom_blobs[1] = reference_blob; + std::vector crop_top_blobs(1); + crop->forward(crop_bottom_blobs, crop_top_blobs, cmd, opt); + top_blob = crop_top_blobs[0]; + } + if (top_blob.empty()) + return -100; + + outw = top_blob.w; + outh = top_blob.h; + } + else if (output_w > 0 && output_h > 0) { - VkMat reference_blob; - reference_blob.dims = 2; - reference_blob.w = top_blob_bordered.w - pad_left - pad_right; - reference_blob.h = top_blob_bordered.h - pad_top - pad_bottom; + VkMat top_blob_bordered_adj = top_blob_bordered; + if (output_pad_right > 0 || output_pad_bottom > 0) + { + ncnn::Option opt_pad = opt; + opt_pad.blob_vkallocator = opt.workspace_vkallocator; + output_pad->forward(top_blob_bordered, top_blob_bordered_adj, cmd, opt_pad); + if (top_blob_bordered_adj.empty()) + return -100; + } + + int wcut = top_blob_bordered_adj.w - output_w; + int hcut = top_blob_bordered_adj.h - output_h; + + VkMat crop_param_blob(4, (size_t)4u, 1, opt.staging_vkallocator, opt.staging_vkallocator); + crop_param_blob.prepare_staging_buffer(); + int* crop_params = crop_param_blob.mapped(); + + if (pad_left == -233 || pad_right == -233 || pad_top == -233 || pad_bottom == -233) + { + // onnx padding=SAME_UPPER + crop_params[0] = wcut / 2; + crop_params[1] = hcut / 2; + crop_params[2] = 0; + crop_params[3] = top_blob_bordered_adj.w - wcut; + crop_params[4] = top_blob_bordered_adj.h - hcut; + crop_params[5] = top_blob_bordered_adj.c; + } + else if (pad_left == -234 || pad_right == -234 || pad_top == -234 || pad_bottom == -234) + { + // onnx padding=SAME_LOWER + crop_params[0] = wcut - wcut / 2; + crop_params[1] = hcut - hcut / 2; + crop_params[2] = 0; + crop_params[3] = top_blob_bordered_adj.w - wcut; + crop_params[4] = top_blob_bordered_adj.h - hcut; + crop_params[5] = top_blob_bordered_adj.c; + } + + std::vector crop_inputs(2); + crop_inputs[0] = top_blob_bordered_adj; + crop_inputs[1] = crop_param_blob; - std::vector crop_bottom_blobs(2); - crop_bottom_blobs[0] = top_blob_bordered; - crop_bottom_blobs[1] = reference_blob; - std::vector crop_top_blobs(1); - crop->forward(crop_bottom_blobs, crop_top_blobs, cmd, opt); - top_blob = crop_top_blobs[0]; + std::vector crop_outputs(1); + output_crop->forward(crop_inputs, crop_outputs, cmd, opt); + top_blob = crop_outputs[0]; + if (top_blob.empty()) + return -100; outw = top_blob.w; outh = top_blob.h; diff --git a/src/layer/vulkan/deconvolutiondepthwise_vulkan.h b/src/layer/vulkan/deconvolutiondepthwise_vulkan.h index e6ca27997..827043d6b 100644 --- a/src/layer/vulkan/deconvolutiondepthwise_vulkan.h +++ b/src/layer/vulkan/deconvolutiondepthwise_vulkan.h @@ -36,6 +36,8 @@ public: VkMat bias_data_gpu; ncnn::Layer* crop; + ncnn::Layer* output_pad; + ncnn::Layer* output_crop; ncnn::Layer* packing_pack1; ncnn::Layer* packing_pack4; diff --git a/tools/mxnet/mxnet2ncnn.cpp b/tools/mxnet/mxnet2ncnn.cpp index 027147dff..577651138 100644 --- a/tools/mxnet/mxnet2ncnn.cpp +++ b/tools/mxnet/mxnet2ncnn.cpp @@ -1761,6 +1761,8 @@ int main(int argc, char** argv) std::vector dilate = n.attr("dilate"); std::vector stride = n.attr("stride"); std::vector pad = n.attr("pad"); + std::vector adj = n.attr("adj"); + std::vector target_shape = n.attr("target_shape"); int no_bias = n.attr("no_bias"); int num_group = n.attr("num_group"); @@ -1789,11 +1791,32 @@ int main(int argc, char** argv) fprintf(pp, " 13=%d", stride[0]); } - if (pad.size() == 1) { - fprintf(pp, " 4=%d", pad[0]); - } else if (pad.size() == 2) { - fprintf(pp, " 4=%d", pad[1]); - fprintf(pp, " 14=%d", pad[0]); + if (target_shape.size() == 0) + { + if (pad.size() == 1) { + fprintf(pp, " 4=%d", pad[0]); + } else if (pad.size() == 2) { + fprintf(pp, " 4=%d", pad[1]); + fprintf(pp, " 14=%d", pad[0]); + } + + if (adj.size() == 1) { + fprintf(pp, " 18=%d", adj[0]); + } else if (adj.size() == 2) { + fprintf(pp, " 18=%d", adj[1]); + fprintf(pp, " 19=%d", adj[0]); + } + } + else + { + fprintf(pp, " 4=-233"); + + if (target_shape.size() == 1) { + fprintf(pp, " 20=%d", target_shape[0]); + } else if (target_shape.size() == 2) { + fprintf(pp, " 20=%d", target_shape[1]); + fprintf(pp, " 21=%d", target_shape[0]); + } } fprintf(pp, " 5=%d", no_bias == 1 ? 0 : 1); diff --git a/tools/onnx/onnx2ncnn.cpp b/tools/onnx/onnx2ncnn.cpp index e2c08e5fe..2d4a29ef1 100644 --- a/tools/onnx/onnx2ncnn.cpp +++ b/tools/onnx/onnx2ncnn.cpp @@ -1257,7 +1257,7 @@ int main(int argc, char** argv) } else if (op == "AveragePool" || op == "MaxPool") { - std::string auto_pad = get_node_attr_s(node, "auto_pad");//TODO + std::string auto_pad = get_node_attr_s(node, "auto_pad"); std::vector kernel_shape = get_node_attr_ai(node, "kernel_shape"); std::vector strides = get_node_attr_ai(node, "strides"); std::vector pads = get_node_attr_ai(node, "pads"); @@ -1386,7 +1386,7 @@ int main(int argc, char** argv) int num_filter = W.dims(0); int has_bias = node.input_size() == 3 ? 1 : 0; - std::string auto_pad = get_node_attr_s(node, "auto_pad");//TODO + std::string auto_pad = get_node_attr_s(node, "auto_pad"); std::vector kernel_shape = get_node_attr_ai(node, "kernel_shape"); std::vector dilations = get_node_attr_ai(node, "dilations"); std::vector strides = get_node_attr_ai(node, "strides"); @@ -1466,12 +1466,12 @@ int main(int argc, char** argv) int has_bias = node.input_size() == 3 ? 1 : 0; - std::string auto_pad = get_node_attr_s(node, "auto_pad");//TODO + std::string auto_pad = get_node_attr_s(node, "auto_pad"); std::vector kernel_shape = get_node_attr_ai(node, "kernel_shape"); std::vector dilations = get_node_attr_ai(node, "dilations"); std::vector strides = get_node_attr_ai(node, "strides"); - std::vector output_padding = get_node_attr_ai(node, "output_padding");//TODO implement adj - std::vector output_shape = get_node_attr_ai(node, "output_shape");//TODO + std::vector output_padding = get_node_attr_ai(node, "output_padding"); + std::vector output_shape = get_node_attr_ai(node, "output_shape"); std::vector pads = get_node_attr_ai(node, "pads"); int group = get_node_attr_i(node, "group", 1); int num_filter = W.dims(1) * group; @@ -1524,6 +1524,20 @@ int main(int argc, char** argv) } + if (output_padding.size() == 1) { + fprintf(pp, " 18=%d", output_padding[0]); + } else if (output_padding.size() == 2) { + fprintf(pp, " 18=%d", output_padding[1]); + fprintf(pp, " 19=%d", output_padding[0]); + } + + if (output_shape.size() == 1) { + fprintf(pp, " 20=%d", output_shape[0]); + } else if (output_shape.size() == 2) { + fprintf(pp, " 20=%d", output_shape[1]); + fprintf(pp, " 21=%d", output_shape[0]); + } + fprintf(pp, " 5=%d", has_bias); fprintf(pp, " 6=%d", get_tensor_proto_data_size(W));