Browse Source

deconvolution output adj and output shape

tags/20190908
nihuini 6 years ago
parent
commit
296e0022df
18 changed files with 897 additions and 190 deletions
  1. +10
    -30
      src/layer/arm/convolution_arm.cpp
  2. +5
    -15
      src/layer/arm/convolutiondepthwise_arm.cpp
  3. +118
    -14
      src/layer/arm/deconvolution_arm.cpp
  4. +169
    -16
      src/layer/arm/deconvolutiondepthwise_arm.cpp
  5. +5
    -15
      src/layer/convolution.cpp
  6. +5
    -15
      src/layer/convolutiondepthwise.cpp
  7. +63
    -7
      src/layer/deconvolution.cpp
  8. +4
    -0
      src/layer/deconvolution.h
  9. +63
    -7
      src/layer/deconvolutiondepthwise.cpp
  10. +4
    -0
      src/layer/deconvolutiondepthwise.h
  11. +3
    -9
      src/layer/vulkan/convolution_vulkan.cpp
  12. +3
    -9
      src/layer/vulkan/convolutiondepthwise_vulkan.cpp
  13. +150
    -16
      src/layer/vulkan/deconvolution_vulkan.cpp
  14. +2
    -0
      src/layer/vulkan/deconvolution_vulkan.h
  15. +244
    -27
      src/layer/vulkan/deconvolutiondepthwise_vulkan.cpp
  16. +2
    -0
      src/layer/vulkan/deconvolutiondepthwise_vulkan.h
  17. +28
    -5
      tools/mxnet/mxnet2ncnn.cpp
  18. +19
    -5
      tools/onnx/onnx2ncnn.cpp

+ 10
- 30
src/layer/arm/convolution_arm.cpp View File

@@ -512,11 +512,6 @@ int Convolution_arm::forward(const Mat& bottom_blob, Mat& top_blob, const Option
Option opt_b = opt;
opt_b.blob_allocator = opt.workspace_allocator;
copy_make_border(bottom_blob, bottom_blob_bordered, pad_top, pad_bottom, pad_left, pad_right, BORDER_CONSTANT, 0.f, opt_b);
if (bottom_blob_bordered.empty())
return -100;

w = bottom_blob_bordered.w;
h = bottom_blob_bordered.h;
}
else if (pad_left == -233 && pad_right == -233 && pad_top == -233 && pad_bottom == -233)
{
@@ -527,12 +522,7 @@ int Convolution_arm::forward(const Mat& bottom_blob, Mat& top_blob, const Option
Option opt_b = opt;
opt_b.blob_allocator = opt.workspace_allocator;
copy_make_border(bottom_blob, bottom_blob_bordered, hpad / 2, hpad - hpad / 2, wpad / 2, wpad - wpad / 2, BORDER_CONSTANT, 0.f, opt_b);
if (bottom_blob_bordered.empty())
return -100;
}

w = bottom_blob_bordered.w;
h = bottom_blob_bordered.h;
}
else if (pad_left == -234 && pad_right == -234 && pad_top == -234 && pad_bottom == -234)
{
@@ -543,13 +533,13 @@ int Convolution_arm::forward(const Mat& bottom_blob, Mat& top_blob, const Option
Option opt_b = opt;
opt_b.blob_allocator = opt.workspace_allocator;
copy_make_border(bottom_blob, bottom_blob_bordered, hpad - hpad / 2, hpad / 2, wpad - wpad / 2, wpad / 2, BORDER_CONSTANT, 0.f, opt_b);
if (bottom_blob_bordered.empty())
return -100;
}

w = bottom_blob_bordered.w;
h = bottom_blob_bordered.h;
}
if (bottom_blob_bordered.empty())
return -100;

w = bottom_blob_bordered.w;
h = bottom_blob_bordered.h;

int outw = (w - kernel_extent_w) / stride_w + 1;
int outh = (h - kernel_extent_h) / stride_h + 1;
@@ -1010,11 +1000,6 @@ int Convolution_arm::forward(const Mat& bottom_blob, Mat& top_blob, const Option
Option opt_b = opt;
opt_b.blob_allocator = opt.workspace_allocator;
copy_make_border(bottom_blob_unbordered, bottom_blob_bordered, pad_top, pad_bottom, pad_left, pad_right, BORDER_CONSTANT, 0.f, opt_b);
if (bottom_blob_bordered.empty())
return -100;

w = bottom_blob_bordered.w;
h = bottom_blob_bordered.h;
}
else if (pad_left == -233 && pad_right == -233 && pad_top == -233 && pad_bottom == -233)
{
@@ -1025,12 +1010,7 @@ int Convolution_arm::forward(const Mat& bottom_blob, Mat& top_blob, const Option
Option opt_b = opt;
opt_b.blob_allocator = opt.workspace_allocator;
copy_make_border(bottom_blob_unbordered, bottom_blob_bordered, hpad / 2, hpad - hpad / 2, wpad / 2, wpad - wpad / 2, BORDER_CONSTANT, 0.f, opt_b);
if (bottom_blob_bordered.empty())
return -100;
}

w = bottom_blob_bordered.w;
h = bottom_blob_bordered.h;
}
else if (pad_left == -234 && pad_right == -234 && pad_top == -234 && pad_bottom == -234)
{
@@ -1041,13 +1021,13 @@ int Convolution_arm::forward(const Mat& bottom_blob, Mat& top_blob, const Option
Option opt_b = opt;
opt_b.blob_allocator = opt.workspace_allocator;
copy_make_border(bottom_blob_unbordered, bottom_blob_bordered, hpad - hpad / 2, hpad / 2, wpad - wpad / 2, wpad / 2, BORDER_CONSTANT, 0.f, opt_b);
if (bottom_blob_bordered.empty())
return -100;
}

w = bottom_blob_bordered.w;
h = bottom_blob_bordered.h;
}
if (bottom_blob_bordered.empty())
return -100;

w = bottom_blob_bordered.w;
h = bottom_blob_bordered.h;

int outw = (w - kernel_size) / stride + 1;
int outh = (h - kernel_size) / stride + 1;


+ 5
- 15
src/layer/arm/convolutiondepthwise_arm.cpp View File

@@ -440,11 +440,6 @@ int ConvolutionDepthWise_arm::forward(const Mat& bottom_blob, Mat& top_blob, con
Option opt_b = opt;
opt_b.blob_allocator = opt.workspace_allocator;
copy_make_border(bottom_blob_unbordered, bottom_blob_bordered, pad_top, pad_bottom, pad_left, pad_right, BORDER_CONSTANT, 0.f, opt_b);
if (bottom_blob_bordered.empty())
return -100;

w = bottom_blob_bordered.w;
h = bottom_blob_bordered.h;
}
else if (pad_left == -233 && pad_right == -233 && pad_top == -233 && pad_bottom == -233)
{
@@ -455,12 +450,7 @@ int ConvolutionDepthWise_arm::forward(const Mat& bottom_blob, Mat& top_blob, con
Option opt_b = opt;
opt_b.blob_allocator = opt.workspace_allocator;
copy_make_border(bottom_blob_unbordered, bottom_blob_bordered, hpad / 2, hpad - hpad / 2, wpad / 2, wpad - wpad / 2, BORDER_CONSTANT, 0.f, opt_b);
if (bottom_blob_bordered.empty())
return -100;
}

w = bottom_blob_bordered.w;
h = bottom_blob_bordered.h;
}
else if (pad_left == -234 && pad_right == -234 && pad_top == -234 && pad_bottom == -234)
{
@@ -471,13 +461,13 @@ int ConvolutionDepthWise_arm::forward(const Mat& bottom_blob, Mat& top_blob, con
Option opt_b = opt;
opt_b.blob_allocator = opt.workspace_allocator;
copy_make_border(bottom_blob_unbordered, bottom_blob_bordered, hpad - hpad / 2, hpad / 2, wpad - wpad / 2, wpad / 2, BORDER_CONSTANT, 0.f, opt_b);
if (bottom_blob_bordered.empty())
return -100;
}

w = bottom_blob_bordered.w;
h = bottom_blob_bordered.h;
}
if (bottom_blob_bordered.empty())
return -100;

w = bottom_blob_bordered.w;
h = bottom_blob_bordered.h;

int outw = (w - kernel_extent_w) / stride_w + 1;
int outh = (h - kernel_extent_h) / stride_h + 1;


+ 118
- 14
src/layer/arm/deconvolution_arm.cpp View File

@@ -299,19 +299,22 @@ int Deconvolution_arm::forward(const Mat& bottom_blob, Mat& top_blob, const Opti
size_t out_elemsize = elemsize / elempack * out_elempack;

Mat top_blob_bordered;
if (pad_left > 0 || pad_right > 0 || pad_top > 0 || pad_bottom > 0)
if (output_w == outw && output_h == outh && output_pad_right == 0 && output_pad_bottom == 0)
{
top_blob_bordered = top_blob;
top_blob_bordered.create(outw, outh, num_output / out_elempack, out_elemsize, out_elempack, opt.blob_allocator);
}
else if (pad_left > 0 || pad_right > 0 || pad_top > 0 || pad_bottom > 0 || output_pad_right > 0 || output_pad_bottom > 0 || (output_w > 0 && output_h > 0))
{
top_blob_bordered.create(outw, outh, num_output / out_elempack, out_elemsize, out_elempack, opt.workspace_allocator);
if (top_blob_bordered.empty())
return -100;
}
else
{
top_blob_bordered = top_blob;
top_blob_bordered.create(outw, outh, num_output / out_elempack, out_elemsize, out_elempack, opt.blob_allocator);
if (top_blob_bordered.empty())
return -100;
}
if (top_blob_bordered.empty())
return -100;

const int maxk = kernel_w * kernel_h;

@@ -629,9 +632,58 @@ int Deconvolution_arm::forward(const Mat& bottom_blob, Mat& top_blob, const Opti
}
}

if (pad_left > 0 || pad_right > 0 || pad_top > 0 || pad_bottom > 0)
if (output_w == outw && output_h == outh && output_pad_right == 0 && output_pad_bottom == 0)
{
top_blob = top_blob_bordered;
}
else if (pad_left > 0 || pad_right > 0 || pad_top > 0 || pad_bottom > 0)
{
if (output_pad_right > 0 || output_pad_bottom > 0)
{
Mat top_blob_unbordered;
Option opt_ub = opt;
opt_ub.blob_allocator = opt.workspace_allocator;
copy_cut_border(top_blob_bordered, top_blob_unbordered, pad_top, pad_bottom, pad_left, pad_right, opt_ub);
if (top_blob_unbordered.empty())
return -100;

copy_make_border(top_blob_unbordered, top_blob, 0, output_pad_bottom, 0, output_pad_right, BORDER_CONSTANT, 0.f, opt);
}
else
{
copy_cut_border(top_blob_bordered, top_blob, pad_top, pad_bottom, pad_left, pad_right, opt);
}
if (top_blob.empty())
return -100;

outw = top_blob.w;
outh = top_blob.h;
}
else if (output_w > 0 && output_h > 0)
{
copy_cut_border(top_blob_bordered, top_blob, pad_top, pad_bottom, pad_left, pad_right, opt);
Mat top_blob_bordered_adj = top_blob_bordered;
if (output_pad_right > 0 || output_pad_bottom > 0)
{
Option opt_b = opt;
opt_b.blob_allocator = opt.workspace_allocator;
copy_make_border(top_blob_bordered, top_blob_bordered_adj, 0, output_pad_bottom, 0, output_pad_right, BORDER_CONSTANT, 0.f, opt_b);
if (top_blob_bordered_adj.empty())
return -100;
}

int wcut = top_blob_bordered_adj.w - output_w;
int hcut = top_blob_bordered_adj.h - output_h;

if (pad_left == -233 || pad_right == -233 || pad_top == -233 || pad_bottom == -233)
{
// onnx padding=SAME_UPPER
copy_cut_border(top_blob_bordered_adj, top_blob, hcut / 2, hcut - hcut / 2, wcut / 2, wcut - wcut / 2, opt);
}
else if (pad_left == -234 || pad_right == -234 || pad_top == -234 || pad_bottom == -234)
{
// onnx padding=SAME_LOWER
copy_cut_border(top_blob_bordered_adj, top_blob, hcut - hcut / 2, hcut / 2, wcut - wcut / 2, wcut / 2, opt);
}
if (top_blob.empty())
return -100;

@@ -690,25 +742,77 @@ int Deconvolution_arm::forward(const Mat& bottom_blob, Mat& top_blob, const Opti
int outh = (h - 1) * stride + kernel_size;

Mat top_blob_bordered;
if (pad_left > 0 || pad_right > 0 || pad_top > 0 || pad_bottom > 0)
if (output_w == outw && output_h == outh && output_pad_right == 0 && output_pad_bottom == 0)
{
top_blob_bordered = top_blob;
top_blob_bordered.create(outw, outh, num_output, elemsize, opt.blob_allocator);
}
else if (pad_left > 0 || pad_right > 0 || pad_top > 0 || pad_bottom > 0 || output_pad_right > 0 || output_pad_bottom > 0 || (output_w > 0 && output_h > 0))
{
top_blob_bordered.create(outw, outh, num_output, elemsize, opt.workspace_allocator);
if (top_blob_bordered.empty())
return -100;
}
else
{
top_blob_bordered = top_blob;
top_blob_bordered.create(outw, outh, num_output, elemsize, opt.blob_allocator);
if (top_blob_bordered.empty())
return -100;
}
if (top_blob_bordered.empty())
return -100;

deconv(bottom_blob, top_blob_bordered, weight_data, bias_data, opt);

if (pad_left > 0 || pad_right > 0 || pad_top > 0 || pad_bottom > 0)
if (output_w == outw && output_h == outh && output_pad_right == 0 && output_pad_bottom == 0)
{
top_blob = top_blob_bordered;
}
else if (pad_left > 0 || pad_right > 0 || pad_top > 0 || pad_bottom > 0)
{
if (output_pad_right > 0 || output_pad_bottom > 0)
{
Mat top_blob_unbordered;
Option opt_ub = opt;
opt_ub.blob_allocator = opt.workspace_allocator;
copy_cut_border(top_blob_bordered, top_blob_unbordered, pad_top, pad_bottom, pad_left, pad_right, opt_ub);
if (top_blob_unbordered.empty())
return -100;

copy_make_border(top_blob_unbordered, top_blob, 0, output_pad_bottom, 0, output_pad_right, BORDER_CONSTANT, 0.f, opt);
}
else
{
copy_cut_border(top_blob_bordered, top_blob, pad_top, pad_bottom, pad_left, pad_right, opt);
}
if (top_blob.empty())
return -100;

outw = top_blob.w;
outh = top_blob.h;
}
else if (output_w > 0 && output_h > 0)
{
copy_cut_border(top_blob_bordered, top_blob, pad_top, pad_bottom, pad_left, pad_right, opt);
Mat top_blob_bordered_adj = top_blob_bordered;
if (output_pad_right > 0 || output_pad_bottom > 0)
{
Option opt_b = opt;
opt_b.blob_allocator = opt.workspace_allocator;
copy_make_border(top_blob_bordered, top_blob_bordered_adj, 0, output_pad_bottom, 0, output_pad_right, BORDER_CONSTANT, 0.f, opt_b);
if (top_blob_bordered_adj.empty())
return -100;
}

int wcut = top_blob_bordered_adj.w - output_w;
int hcut = top_blob_bordered_adj.h - output_h;

if (pad_left == -233 || pad_right == -233 || pad_top == -233 || pad_bottom == -233)
{
// onnx padding=SAME_UPPER
copy_cut_border(top_blob_bordered_adj, top_blob, hcut / 2, hcut - hcut / 2, wcut / 2, wcut - wcut / 2, opt);
}
else if (pad_left == -234 || pad_right == -234 || pad_top == -234 || pad_bottom == -234)
{
// onnx padding=SAME_LOWER
copy_cut_border(top_blob_bordered_adj, top_blob, hcut - hcut / 2, hcut / 2, wcut - wcut / 2, wcut / 2, opt);
}
if (top_blob.empty())
return -100;



+ 169
- 16
src/layer/arm/deconvolutiondepthwise_arm.cpp View File

@@ -397,19 +397,22 @@ int DeconvolutionDepthWise_arm::forward(const Mat& bottom_blob, Mat& top_blob, c
{

Mat top_blob_bordered;
if (pad_left > 0 || pad_right > 0 || pad_top > 0 || pad_bottom > 0)
if (output_w == outw && output_h == outh && output_pad_right == 0 && output_pad_bottom == 0)
{
top_blob_bordered = top_blob;
top_blob_bordered.create(outw, outh, num_output / out_elempack, out_elemsize, out_elempack, opt.blob_allocator);
}
else if (pad_left > 0 || pad_right > 0 || pad_top > 0 || pad_bottom > 0 || output_pad_right > 0 || output_pad_bottom > 0 || (output_w > 0 && output_h > 0))
{
top_blob_bordered.create(outw, outh, num_output / out_elempack, out_elemsize, out_elempack, opt.workspace_allocator);
if (top_blob_bordered.empty())
return -100;
}
else
{
top_blob_bordered = top_blob;
top_blob_bordered.create(outw, outh, num_output / out_elempack, out_elemsize, out_elempack, opt.blob_allocator);
if (top_blob_bordered.empty())
return -100;
}
if (top_blob_bordered.empty())
return -100;

const int maxk = kernel_w * kernel_h;

@@ -507,9 +510,58 @@ int DeconvolutionDepthWise_arm::forward(const Mat& bottom_blob, Mat& top_blob, c
}
}

if (pad_left > 0 || pad_right > 0 || pad_top > 0 || pad_bottom > 0)
if (output_w == outw && output_h == outh && output_pad_right == 0 && output_pad_bottom == 0)
{
copy_cut_border(top_blob_bordered, top_blob, pad_top, pad_bottom, pad_left, pad_right, opt);
top_blob = top_blob_bordered;
}
else if (pad_left > 0 || pad_right > 0 || pad_top > 0 || pad_bottom > 0)
{
if (output_pad_right > 0 || output_pad_bottom > 0)
{
Mat top_blob_unbordered;
Option opt_ub = opt;
opt_ub.blob_allocator = opt.workspace_allocator;
copy_cut_border(top_blob_bordered, top_blob_unbordered, pad_top, pad_bottom, pad_left, pad_right, opt_ub);
if (top_blob_unbordered.empty())
return -100;

copy_make_border(top_blob_unbordered, top_blob, 0, output_pad_bottom, 0, output_pad_right, BORDER_CONSTANT, 0.f, opt);
}
else
{
copy_cut_border(top_blob_bordered, top_blob, pad_top, pad_bottom, pad_left, pad_right, opt);
}
if (top_blob.empty())
return -100;

outw = top_blob.w;
outh = top_blob.h;
}
else if (output_w > 0 && output_h > 0)
{
Mat top_blob_bordered_adj = top_blob_bordered;
if (output_pad_right > 0 || output_pad_bottom > 0)
{
Option opt_b = opt;
opt_b.blob_allocator = opt.workspace_allocator;
copy_make_border(top_blob_bordered, top_blob_bordered_adj, 0, output_pad_bottom, 0, output_pad_right, BORDER_CONSTANT, 0.f, opt_b);
if (top_blob_bordered_adj.empty())
return -100;
}

int wcut = top_blob_bordered_adj.w - output_w;
int hcut = top_blob_bordered_adj.h - output_h;

if (pad_left == -233 || pad_right == -233 || pad_top == -233 || pad_bottom == -233)
{
// onnx padding=SAME_UPPER
copy_cut_border(top_blob_bordered_adj, top_blob, hcut / 2, hcut - hcut / 2, wcut / 2, wcut - wcut / 2, opt);
}
else if (pad_left == -234 || pad_right == -234 || pad_top == -234 || pad_bottom == -234)
{
// onnx padding=SAME_LOWER
copy_cut_border(top_blob_bordered_adj, top_blob, hcut - hcut / 2, hcut / 2, wcut - wcut / 2, wcut / 2, opt);
}
if (top_blob.empty())
return -100;

@@ -888,9 +940,58 @@ int DeconvolutionDepthWise_arm::forward(const Mat& bottom_blob, Mat& top_blob, c
top_blob_bordered = top_blob_bordered_unpacked;
}

if (pad_left > 0 || pad_right > 0 || pad_top > 0 || pad_bottom > 0)
if (output_w == outw && output_h == outh && output_pad_right == 0 && output_pad_bottom == 0)
{
copy_cut_border(top_blob_bordered, top_blob, pad_top, pad_bottom, pad_left, pad_right, opt);
top_blob = top_blob_bordered;
}
else if (pad_left > 0 || pad_right > 0 || pad_top > 0 || pad_bottom > 0)
{
if (output_pad_right > 0 || output_pad_bottom > 0)
{
Mat top_blob_unbordered;
Option opt_ub = opt;
opt_ub.blob_allocator = opt.workspace_allocator;
copy_cut_border(top_blob_bordered, top_blob_unbordered, pad_top, pad_bottom, pad_left, pad_right, opt_ub);
if (top_blob_unbordered.empty())
return -100;

copy_make_border(top_blob_unbordered, top_blob, 0, output_pad_bottom, 0, output_pad_right, BORDER_CONSTANT, 0.f, opt);
}
else
{
copy_cut_border(top_blob_bordered, top_blob, pad_top, pad_bottom, pad_left, pad_right, opt);
}
if (top_blob.empty())
return -100;

outw = top_blob.w;
outh = top_blob.h;
}
else if (output_w > 0 && output_h > 0)
{
Mat top_blob_bordered_adj = top_blob_bordered;
if (output_pad_right > 0 || output_pad_bottom > 0)
{
Option opt_b = opt;
opt_b.blob_allocator = opt.workspace_allocator;
copy_make_border(top_blob_bordered, top_blob_bordered_adj, 0, output_pad_bottom, 0, output_pad_right, BORDER_CONSTANT, 0.f, opt_b);
if (top_blob_bordered_adj.empty())
return -100;
}

int wcut = top_blob_bordered_adj.w - output_w;
int hcut = top_blob_bordered_adj.h - output_h;

if (pad_left == -233 || pad_right == -233 || pad_top == -233 || pad_bottom == -233)
{
// onnx padding=SAME_UPPER
copy_cut_border(top_blob_bordered_adj, top_blob, hcut / 2, hcut - hcut / 2, wcut / 2, wcut - wcut / 2, opt);
}
else if (pad_left == -234 || pad_right == -234 || pad_top == -234 || pad_bottom == -234)
{
// onnx padding=SAME_LOWER
copy_cut_border(top_blob_bordered_adj, top_blob, hcut - hcut / 2, hcut / 2, wcut - wcut / 2, wcut / 2, opt);
}
if (top_blob.empty())
return -100;

@@ -908,19 +1009,22 @@ int DeconvolutionDepthWise_arm::forward(const Mat& bottom_blob, Mat& top_blob, c
#endif // __ARM_NEON

Mat top_blob_bordered;
if (pad_left > 0 || pad_right > 0 || pad_top > 0 || pad_bottom > 0)
if (output_w == outw && output_h == outh && output_pad_right == 0 && output_pad_bottom == 0)
{
top_blob_bordered = top_blob;
top_blob_bordered.create(outw, outh, num_output, elemsize, opt.blob_allocator);
}
else if (pad_left > 0 || pad_right > 0 || pad_top > 0 || pad_bottom > 0 || output_pad_right > 0 || output_pad_bottom > 0 || (output_w > 0 && output_h > 0))
{
top_blob_bordered.create(outw, outh, num_output, elemsize, opt.workspace_allocator);
if (top_blob_bordered.empty())
return -100;
}
else
{
top_blob_bordered = top_blob;
top_blob_bordered.create(outw, outh, num_output, elemsize, opt.blob_allocator);
if (top_blob_bordered.empty())
return -100;
}
if (top_blob_bordered.empty())
return -100;

// depth-wise
if (channels == group && group == num_output)
@@ -961,9 +1065,58 @@ int DeconvolutionDepthWise_arm::forward(const Mat& bottom_blob, Mat& top_blob, c
}
}

if (pad_left > 0 || pad_right > 0 || pad_top > 0 || pad_bottom > 0)
if (output_w == outw && output_h == outh && output_pad_right == 0 && output_pad_bottom == 0)
{
top_blob = top_blob_bordered;
}
else if (pad_left > 0 || pad_right > 0 || pad_top > 0 || pad_bottom > 0)
{
if (output_pad_right > 0 || output_pad_bottom > 0)
{
Mat top_blob_unbordered;
Option opt_ub = opt;
opt_ub.blob_allocator = opt.workspace_allocator;
copy_cut_border(top_blob_bordered, top_blob_unbordered, pad_top, pad_bottom, pad_left, pad_right, opt_ub);
if (top_blob_unbordered.empty())
return -100;

copy_make_border(top_blob_unbordered, top_blob, 0, output_pad_bottom, 0, output_pad_right, BORDER_CONSTANT, 0.f, opt);
}
else
{
copy_cut_border(top_blob_bordered, top_blob, pad_top, pad_bottom, pad_left, pad_right, opt);
}
if (top_blob.empty())
return -100;

outw = top_blob.w;
outh = top_blob.h;
}
else if (output_w > 0 && output_h > 0)
{
copy_cut_border(top_blob_bordered, top_blob, pad_top, pad_bottom, pad_left, pad_right, opt);
Mat top_blob_bordered_adj = top_blob_bordered;
if (output_pad_right > 0 || output_pad_bottom > 0)
{
Option opt_b = opt;
opt_b.blob_allocator = opt.workspace_allocator;
copy_make_border(top_blob_bordered, top_blob_bordered_adj, 0, output_pad_bottom, 0, output_pad_right, BORDER_CONSTANT, 0.f, opt_b);
if (top_blob_bordered_adj.empty())
return -100;
}

int wcut = top_blob_bordered_adj.w - output_w;
int hcut = top_blob_bordered_adj.h - output_h;

if (pad_left == -233 || pad_right == -233 || pad_top == -233 || pad_bottom == -233)
{
// onnx padding=SAME_UPPER
copy_cut_border(top_blob_bordered_adj, top_blob, hcut / 2, hcut - hcut / 2, wcut / 2, wcut - wcut / 2, opt);
}
else if (pad_left == -234 || pad_right == -234 || pad_top == -234 || pad_bottom == -234)
{
// onnx padding=SAME_LOWER
copy_cut_border(top_blob_bordered_adj, top_blob, hcut - hcut / 2, hcut / 2, wcut - wcut / 2, wcut / 2, opt);
}
if (top_blob.empty())
return -100;



+ 5
- 15
src/layer/convolution.cpp View File

@@ -336,11 +336,6 @@ int Convolution::forward(const Mat& bottom_blob, Mat& top_blob, const Option& op
Option opt_b = opt;
opt_b.blob_allocator = opt.workspace_allocator;
copy_make_border(bottom_blob_unbordered, bottom_blob_bordered, pad_top, pad_bottom, pad_left, pad_right, BORDER_CONSTANT, 0.f, opt_b);
if (bottom_blob_bordered.empty())
return -100;

w = bottom_blob_bordered.w;
h = bottom_blob_bordered.h;
}
else if (pad_left == -233 && pad_right == -233 && pad_top == -233 && pad_bottom == -233)
{
@@ -352,12 +347,7 @@ int Convolution::forward(const Mat& bottom_blob, Mat& top_blob, const Option& op
Option opt_b = opt;
opt_b.blob_allocator = opt.workspace_allocator;
copy_make_border(bottom_blob_unbordered, bottom_blob_bordered, hpad / 2, hpad - hpad / 2, wpad / 2, wpad - wpad / 2, BORDER_CONSTANT, 0.f, opt_b);
if (bottom_blob_bordered.empty())
return -100;
}

w = bottom_blob_bordered.w;
h = bottom_blob_bordered.h;
}
else if (pad_left == -234 && pad_right == -234 && pad_top == -234 && pad_bottom == -234)
{
@@ -369,13 +359,13 @@ int Convolution::forward(const Mat& bottom_blob, Mat& top_blob, const Option& op
Option opt_b = opt;
opt_b.blob_allocator = opt.workspace_allocator;
copy_make_border(bottom_blob_unbordered, bottom_blob_bordered, hpad - hpad / 2, hpad / 2, wpad - wpad / 2, wpad / 2, BORDER_CONSTANT, 0.f, opt_b);
if (bottom_blob_bordered.empty())
return -100;
}

w = bottom_blob_bordered.w;
h = bottom_blob_bordered.h;
}
if (bottom_blob_bordered.empty())
return -100;

w = bottom_blob_bordered.w;
h = bottom_blob_bordered.h;

int outw = (w - kernel_extent_w) / stride_w + 1;
int outh = (h - kernel_extent_h) / stride_h + 1;


+ 5
- 15
src/layer/convolutiondepthwise.cpp View File

@@ -329,11 +329,6 @@ int ConvolutionDepthWise::forward(const Mat& bottom_blob, Mat& top_blob, const O
Option opt_b = opt;
opt_b.blob_allocator = opt.workspace_allocator;
copy_make_border(bottom_blob_unbordered, bottom_blob_bordered, pad_top, pad_bottom, pad_left, pad_right, BORDER_CONSTANT, 0.f, opt_b);
if (bottom_blob_bordered.empty())
return -100;

w = bottom_blob_bordered.w;
h = bottom_blob_bordered.h;
}
else if (pad_left == -233 && pad_right == -233 && pad_top == -233 && pad_bottom == -233)
{
@@ -345,12 +340,7 @@ int ConvolutionDepthWise::forward(const Mat& bottom_blob, Mat& top_blob, const O
Option opt_b = opt;
opt_b.blob_allocator = opt.workspace_allocator;
copy_make_border(bottom_blob_unbordered, bottom_blob_bordered, hpad / 2, hpad - hpad / 2, wpad / 2, wpad - wpad / 2, BORDER_CONSTANT, 0.f, opt_b);
if (bottom_blob_bordered.empty())
return -100;
}

w = bottom_blob_bordered.w;
h = bottom_blob_bordered.h;
}
else if (pad_left == -234 && pad_right == -234 && pad_top == -234 && pad_bottom == -234)
{
@@ -362,13 +352,13 @@ int ConvolutionDepthWise::forward(const Mat& bottom_blob, Mat& top_blob, const O
Option opt_b = opt;
opt_b.blob_allocator = opt.workspace_allocator;
copy_make_border(bottom_blob_unbordered, bottom_blob_bordered, hpad - hpad / 2, hpad / 2, wpad - wpad / 2, wpad / 2, BORDER_CONSTANT, 0.f, opt_b);
if (bottom_blob_bordered.empty())
return -100;
}

w = bottom_blob_bordered.w;
h = bottom_blob_bordered.h;
}
if (bottom_blob_bordered.empty())
return -100;

w = bottom_blob_bordered.w;
h = bottom_blob_bordered.h;

int outw = (w - kernel_extent_w) / stride_w + 1;
int outh = (h - kernel_extent_h) / stride_h + 1;


+ 63
- 7
src/layer/deconvolution.cpp View File

@@ -39,6 +39,10 @@ int Deconvolution::load_param(const ParamDict& pd)
pad_right = pd.get(15, pad_left);
pad_top = pd.get(14, pad_left);
pad_bottom = pd.get(16, pad_top);
output_pad_right = pd.get(18, 0);
output_pad_bottom = pd.get(19, output_pad_right);
output_w = pd.get(20, 0);
output_h = pd.get(21, output_w);
bias_term = pd.get(5, 0);
weight_data_size = pd.get(6, 0);
activation_type = pd.get(9, 0);
@@ -82,19 +86,22 @@ int Deconvolution::forward(const Mat& bottom_blob, Mat& top_blob, const Option&
int outh = (h - 1) * stride_h + kernel_extent_h;

Mat top_blob_bordered;
if (pad_left > 0 || pad_right > 0 || pad_top > 0 || pad_bottom > 0)
if (output_w == outw && output_h == outh && output_pad_right == 0 && output_pad_bottom == 0)
{
top_blob_bordered = top_blob;
top_blob_bordered.create(outw, outh, num_output, elemsize, opt.blob_allocator);
}
else if (pad_left > 0 || pad_right > 0 || pad_top > 0 || pad_bottom > 0 || output_pad_right > 0 || output_pad_bottom > 0 || (output_w > 0 && output_h > 0))
{
top_blob_bordered.create(outw, outh, num_output, elemsize, opt.workspace_allocator);
if (top_blob_bordered.empty())
return -100;
}
else
{
top_blob_bordered = top_blob;
top_blob_bordered.create(outw, outh, num_output, elemsize, opt.blob_allocator);
if (top_blob_bordered.empty())
return -100;
}
if (top_blob_bordered.empty())
return -100;

const int maxk = kernel_w * kernel_h;

@@ -200,9 +207,58 @@ int Deconvolution::forward(const Mat& bottom_blob, Mat& top_blob, const Option&
}
}

if (pad_left > 0 || pad_right > 0 || pad_top > 0 || pad_bottom > 0)
if (output_w == outw && output_h == outh && output_pad_right == 0 && output_pad_bottom == 0)
{
copy_cut_border(top_blob_bordered, top_blob, pad_top, pad_bottom, pad_left, pad_right, opt);
top_blob = top_blob_bordered;
}
else if (pad_left > 0 || pad_right > 0 || pad_top > 0 || pad_bottom > 0)
{
if (output_pad_right > 0 || output_pad_bottom > 0)
{
Mat top_blob_unbordered;
Option opt_ub = opt;
opt_ub.blob_allocator = opt.workspace_allocator;
copy_cut_border(top_blob_bordered, top_blob_unbordered, pad_top, pad_bottom, pad_left, pad_right, opt_ub);
if (top_blob_unbordered.empty())
return -100;

copy_make_border(top_blob_unbordered, top_blob, 0, output_pad_bottom, 0, output_pad_right, BORDER_CONSTANT, 0.f, opt);
}
else
{
copy_cut_border(top_blob_bordered, top_blob, pad_top, pad_bottom, pad_left, pad_right, opt);
}
if (top_blob.empty())
return -100;

outw = top_blob.w;
outh = top_blob.h;
}
else if (output_w > 0 && output_h > 0)
{
Mat top_blob_bordered_adj = top_blob_bordered;
if (output_pad_right > 0 || output_pad_bottom > 0)
{
Option opt_b = opt;
opt_b.blob_allocator = opt.workspace_allocator;
copy_make_border(top_blob_bordered, top_blob_bordered_adj, 0, output_pad_bottom, 0, output_pad_right, BORDER_CONSTANT, 0.f, opt_b);
if (top_blob_bordered_adj.empty())
return -100;
}

int wcut = top_blob_bordered_adj.w - output_w;
int hcut = top_blob_bordered_adj.h - output_h;

if (pad_left == -233 || pad_right == -233 || pad_top == -233 || pad_bottom == -233)
{
// onnx padding=SAME_UPPER
copy_cut_border(top_blob_bordered_adj, top_blob, hcut / 2, hcut - hcut / 2, wcut / 2, wcut - wcut / 2, opt);
}
else if (pad_left == -234 || pad_right == -234 || pad_top == -234 || pad_bottom == -234)
{
// onnx padding=SAME_LOWER
copy_cut_border(top_blob_bordered_adj, top_blob, hcut - hcut / 2, hcut / 2, wcut - wcut / 2, wcut / 2, opt);
}
if (top_blob.empty())
return -100;



+ 4
- 0
src/layer/deconvolution.h View File

@@ -43,6 +43,10 @@ public:
int pad_right;
int pad_top;
int pad_bottom;
int output_pad_right;
int output_pad_bottom;
int output_w;
int output_h;
int bias_term;

int weight_data_size;


+ 63
- 7
src/layer/deconvolutiondepthwise.cpp View File

@@ -39,6 +39,10 @@ int DeconvolutionDepthWise::load_param(const ParamDict& pd)
pad_right = pd.get(15, pad_left);
pad_top = pd.get(14, pad_left);
pad_bottom = pd.get(16, pad_top);
output_pad_right = pd.get(18, 0);
output_pad_bottom = pd.get(19, output_pad_right);
output_w = pd.get(20, 0);
output_h = pd.get(21, output_w);
bias_term = pd.get(5, 0);
weight_data_size = pd.get(6, 0);
group = pd.get(7, 1);
@@ -87,19 +91,22 @@ int DeconvolutionDepthWise::forward(const Mat& bottom_blob, Mat& top_blob, const
int outh = (h - 1) * stride_h + kernel_extent_h;

Mat top_blob_bordered;
if (pad_left > 0 || pad_right > 0 || pad_top > 0 || pad_bottom > 0)
if (output_w == outw && output_h == outh && output_pad_right == 0 && output_pad_bottom == 0)
{
top_blob_bordered = top_blob;
top_blob_bordered.create(outw, outh, num_output, elemsize, opt.blob_allocator);
}
else if (pad_left > 0 || pad_right > 0 || pad_top > 0 || pad_bottom > 0 || output_pad_right > 0 || output_pad_bottom > 0 || (output_w > 0 && output_h > 0))
{
top_blob_bordered.create(outw, outh, num_output, elemsize, opt.workspace_allocator);
if (top_blob_bordered.empty())
return -100;
}
else
{
top_blob_bordered = top_blob;
top_blob_bordered.create(outw, outh, num_output, elemsize, opt.blob_allocator);
if (top_blob_bordered.empty())
return -100;
}
if (top_blob_bordered.empty())
return -100;

const int maxk = kernel_w * kernel_h;

@@ -296,9 +303,58 @@ int DeconvolutionDepthWise::forward(const Mat& bottom_blob, Mat& top_blob, const
}
}

if (pad_left > 0 || pad_right > 0 || pad_top > 0 || pad_bottom > 0)
if (output_w == outw && output_h == outh && output_pad_right == 0 && output_pad_bottom == 0)
{
copy_cut_border(top_blob_bordered, top_blob, pad_top, pad_bottom, pad_left, pad_right, opt);
top_blob = top_blob_bordered;
}
else if (pad_left > 0 || pad_right > 0 || pad_top > 0 || pad_bottom > 0)
{
if (output_pad_right > 0 || output_pad_bottom > 0)
{
Mat top_blob_unbordered;
Option opt_ub = opt;
opt_ub.blob_allocator = opt.workspace_allocator;
copy_cut_border(top_blob_bordered, top_blob_unbordered, pad_top, pad_bottom, pad_left, pad_right, opt_ub);
if (top_blob_unbordered.empty())
return -100;

copy_make_border(top_blob_unbordered, top_blob, 0, output_pad_bottom, 0, output_pad_right, BORDER_CONSTANT, 0.f, opt);
}
else
{
copy_cut_border(top_blob_bordered, top_blob, pad_top, pad_bottom, pad_left, pad_right, opt);
}
if (top_blob.empty())
return -100;

outw = top_blob.w;
outh = top_blob.h;
}
else if (output_w > 0 && output_h > 0)
{
Mat top_blob_bordered_adj = top_blob_bordered;
if (output_pad_right > 0 || output_pad_bottom > 0)
{
Option opt_b = opt;
opt_b.blob_allocator = opt.workspace_allocator;
copy_make_border(top_blob_bordered, top_blob_bordered_adj, 0, output_pad_bottom, 0, output_pad_right, BORDER_CONSTANT, 0.f, opt_b);
if (top_blob_bordered_adj.empty())
return -100;
}

int wcut = top_blob_bordered_adj.w - output_w;
int hcut = top_blob_bordered_adj.h - output_h;

if (pad_left == -233 || pad_right == -233 || pad_top == -233 || pad_bottom == -233)
{
// onnx padding=SAME_UPPER
copy_cut_border(top_blob_bordered_adj, top_blob, hcut / 2, hcut - hcut / 2, wcut / 2, wcut - wcut / 2, opt);
}
else if (pad_left == -234 || pad_right == -234 || pad_top == -234 || pad_bottom == -234)
{
// onnx padding=SAME_LOWER
copy_cut_border(top_blob_bordered_adj, top_blob, hcut - hcut / 2, hcut / 2, wcut - wcut / 2, wcut / 2, opt);
}
if (top_blob.empty())
return -100;



+ 4
- 0
src/layer/deconvolutiondepthwise.h View File

@@ -43,6 +43,10 @@ public:
int pad_right;
int pad_top;
int pad_bottom;
int output_pad_right;
int output_pad_bottom;
int output_w;
int output_h;
int bias_term;

int weight_data_size;


+ 3
- 9
src/layer/vulkan/convolution_vulkan.cpp View File

@@ -868,9 +868,6 @@ int Convolution_vulkan::forward(const VkMat& bottom_blob, VkMat& top_blob, VkCom
opt_pad.blob_vkallocator = opt.workspace_vkallocator;

padding->forward(bottom_blob, bottom_blob_bordered, cmd, opt_pad);

w = bottom_blob_bordered.w;
h = bottom_blob_bordered.h;
}
else if (pad_left == -233 && pad_right == -233 && pad_top == -233 && pad_bottom == -233)
{
@@ -898,9 +895,6 @@ int Convolution_vulkan::forward(const VkMat& bottom_blob, VkMat& top_blob, VkCom
padding->forward(padding_inputs, padding_outputs, cmd, opt_pad);
bottom_blob_bordered = padding_outputs[0];
}

w = bottom_blob_bordered.w;
h = bottom_blob_bordered.h;
}
else if (pad_left == -234 && pad_right == -234 && pad_top == -234 && pad_bottom == -234)
{
@@ -928,11 +922,11 @@ int Convolution_vulkan::forward(const VkMat& bottom_blob, VkMat& top_blob, VkCom
padding->forward(padding_inputs, padding_outputs, cmd, opt_pad);
bottom_blob_bordered = padding_outputs[0];
}

w = bottom_blob_bordered.w;
h = bottom_blob_bordered.h;
}

w = bottom_blob_bordered.w;
h = bottom_blob_bordered.h;

int outw = (w - kernel_extent_w) / stride_w + 1;
int outh = (h - kernel_extent_h) / stride_h + 1;
int out_elempack = num_output % 4 == 0 ? 4 : 1;


+ 3
- 9
src/layer/vulkan/convolutiondepthwise_vulkan.cpp View File

@@ -473,9 +473,6 @@ int ConvolutionDepthWise_vulkan::forward(const VkMat& bottom_blob, VkMat& top_bl
opt_pad.blob_vkallocator = opt.workspace_vkallocator;

padding->forward(bottom_blob, bottom_blob_bordered, cmd, opt_pad);

w = bottom_blob_bordered.w;
h = bottom_blob_bordered.h;
}
else if (pad_left == -233 && pad_right == -233 && pad_top == -233 && pad_bottom == -233)
{
@@ -503,9 +500,6 @@ int ConvolutionDepthWise_vulkan::forward(const VkMat& bottom_blob, VkMat& top_bl
padding->forward(padding_inputs, padding_outputs, cmd, opt_pad);
bottom_blob_bordered = padding_outputs[0];
}

w = bottom_blob_bordered.w;
h = bottom_blob_bordered.h;
}
else if (pad_left == -234 && pad_right == -234 && pad_top == -234 && pad_bottom == -234)
{
@@ -533,11 +527,11 @@ int ConvolutionDepthWise_vulkan::forward(const VkMat& bottom_blob, VkMat& top_bl
padding->forward(padding_inputs, padding_outputs, cmd, opt_pad);
bottom_blob_bordered = padding_outputs[0];
}

w = bottom_blob_bordered.w;
h = bottom_blob_bordered.h;
}

w = bottom_blob_bordered.w;
h = bottom_blob_bordered.h;

int outw = (w - kernel_extent_w) / stride_w + 1;
int outh = (h - kernel_extent_h) / stride_h + 1;
int out_elempack = num_output % 4 == 0 ? 4 : 1;


+ 150
- 16
src/layer/vulkan/deconvolution_vulkan.cpp View File

@@ -25,6 +25,9 @@ Deconvolution_vulkan::Deconvolution_vulkan()
support_vulkan = true;

crop = 0;
output_pad = 0;
output_crop = 0;

pipeline_deconvolution = 0;
pipeline_deconvolution_pack4 = 0;
pipeline_deconvolution_pack1to4 = 0;
@@ -47,6 +50,37 @@ int Deconvolution_vulkan::create_pipeline(const Option& opt)
crop->create_pipeline(opt);
}

{
output_pad = ncnn::create_layer(ncnn::LayerType::Padding);
output_pad->vkdev = vkdev;

ncnn::ParamDict pd;
pd.set(0, 0);
pd.set(1, output_pad_bottom);
pd.set(2, 0);
pd.set(3, output_pad_right);
pd.set(4, 0);
pd.set(5, 0.f);

output_pad->load_param(pd);

output_pad->create_pipeline(opt);
}

{
output_crop = ncnn::create_layer(ncnn::LayerType::Crop);
output_crop->vkdev = vkdev;

ncnn::ParamDict pd;
pd.set(0, -233);
pd.set(1, -233);
pd.set(2, -233);

output_crop->load_param(pd);

output_crop->create_pipeline(opt);
}

const int maxk = kernel_w * kernel_h;
int num_input = weight_data_size / maxk / num_output;

@@ -106,6 +140,20 @@ int Deconvolution_vulkan::destroy_pipeline(const Option& opt)
crop = 0;
}

if (output_pad)
{
output_pad->destroy_pipeline(opt);
delete output_pad;
output_pad = 0;
}

if (output_crop)
{
output_crop->destroy_pipeline(opt);
delete output_crop;
output_crop = 0;
}

delete pipeline_deconvolution;
pipeline_deconvolution = 0;

@@ -351,18 +399,20 @@ int Deconvolution_vulkan::forward(const VkMat& bottom_blob, VkMat& top_blob, VkC
}

VkMat top_blob_bordered;
if (pad_left > 0 || pad_right > 0 || pad_top > 0 || pad_bottom > 0)
if (output_w == outw && output_h == outh && output_pad_right == 0 && output_pad_bottom == 0)
{
top_blob_bordered.create(outw, outh, num_output / out_elempack, out_elemsize, out_elempack, opt.blob_vkallocator, opt.staging_vkallocator);
}
else if (pad_left > 0 || pad_right > 0 || pad_top > 0 || pad_bottom > 0 || output_pad_right > 0 || output_pad_bottom > 0 || (output_w > 0 && output_h > 0))
{
top_blob_bordered.create(outw, outh, num_output / out_elempack, out_elemsize, out_elempack, opt.workspace_vkallocator, opt.staging_vkallocator);
if (top_blob_bordered.empty())
return -100;
}
else
{
top_blob_bordered.create(outw, outh, num_output / out_elempack, out_elemsize, out_elempack, opt.blob_vkallocator, opt.staging_vkallocator);
if (top_blob_bordered.empty())
return -100;
}
if (top_blob_bordered.empty())
return -100;

std::vector<VkMat> bindings(4);
bindings[0] = bottom_blob;
@@ -420,19 +470,103 @@ int Deconvolution_vulkan::forward(const VkMat& bottom_blob, VkMat& top_blob, VkC

cmd.record_pipeline(pipeline, bindings, constants, top_blob_bordered);

if (pad_left > 0 || pad_right > 0 || pad_top > 0 || pad_bottom > 0)
if (output_w == outw && output_h == outh && output_pad_right == 0 && output_pad_bottom == 0)
{
VkMat reference_blob;
reference_blob.dims = 2;
reference_blob.w = top_blob_bordered.w - pad_left - pad_right;
reference_blob.h = top_blob_bordered.h - pad_top - pad_bottom;
top_blob = top_blob_bordered;
}
else if (pad_left > 0 || pad_right > 0 || pad_top > 0 || pad_bottom > 0)
{
if (output_pad_right > 0 || output_pad_bottom > 0)
{
VkMat top_blob_unbordered;
{
ncnn::Option opt_ub = opt;
opt_ub.blob_vkallocator = opt.workspace_vkallocator;

VkMat reference_blob;
reference_blob.dims = 2;
reference_blob.w = top_blob_bordered.w - pad_left - pad_right;
reference_blob.h = top_blob_bordered.h - pad_top - pad_bottom;

std::vector<VkMat> crop_bottom_blobs(2);
crop_bottom_blobs[0] = top_blob_bordered;
crop_bottom_blobs[1] = reference_blob;
std::vector<VkMat> crop_top_blobs(1);
crop->forward(crop_bottom_blobs, crop_top_blobs, cmd, opt_ub);
top_blob_unbordered = crop_top_blobs[0];
}

output_pad->forward(top_blob_unbordered, top_blob, cmd, opt);
}
else
{
VkMat reference_blob;
reference_blob.dims = 2;
reference_blob.w = top_blob_bordered.w - pad_left - pad_right;
reference_blob.h = top_blob_bordered.h - pad_top - pad_bottom;

std::vector<VkMat> crop_bottom_blobs(2);
crop_bottom_blobs[0] = top_blob_bordered;
crop_bottom_blobs[1] = reference_blob;
std::vector<VkMat> crop_top_blobs(1);
crop->forward(crop_bottom_blobs, crop_top_blobs, cmd, opt);
top_blob = crop_top_blobs[0];
}
if (top_blob.empty())
return -100;

std::vector<VkMat> crop_bottom_blobs(2);
crop_bottom_blobs[0] = top_blob_bordered;
crop_bottom_blobs[1] = reference_blob;
std::vector<VkMat> crop_top_blobs(1);
crop->forward(crop_bottom_blobs, crop_top_blobs, cmd, opt);
top_blob = crop_top_blobs[0];
outw = top_blob.w;
outh = top_blob.h;
}
else if (output_w > 0 && output_h > 0)
{
VkMat top_blob_bordered_adj = top_blob_bordered;
if (output_pad_right > 0 || output_pad_bottom > 0)
{
ncnn::Option opt_pad = opt;
opt_pad.blob_vkallocator = opt.workspace_vkallocator;
output_pad->forward(top_blob_bordered, top_blob_bordered_adj, cmd, opt_pad);
if (top_blob_bordered_adj.empty())
return -100;
}

int wcut = top_blob_bordered_adj.w - output_w;
int hcut = top_blob_bordered_adj.h - output_h;

VkMat crop_param_blob(4, (size_t)4u, 1, opt.staging_vkallocator, opt.staging_vkallocator);
crop_param_blob.prepare_staging_buffer();
int* crop_params = crop_param_blob.mapped();

if (pad_left == -233 || pad_right == -233 || pad_top == -233 || pad_bottom == -233)
{
// onnx padding=SAME_UPPER
crop_params[0] = wcut / 2;
crop_params[1] = hcut / 2;
crop_params[2] = 0;
crop_params[3] = top_blob_bordered_adj.w - wcut;
crop_params[4] = top_blob_bordered_adj.h - hcut;
crop_params[5] = top_blob_bordered_adj.c;
}
else if (pad_left == -234 || pad_right == -234 || pad_top == -234 || pad_bottom == -234)
{
// onnx padding=SAME_LOWER
crop_params[0] = wcut - wcut / 2;
crop_params[1] = hcut - hcut / 2;
crop_params[2] = 0;
crop_params[3] = top_blob_bordered_adj.w - wcut;
crop_params[4] = top_blob_bordered_adj.h - hcut;
crop_params[5] = top_blob_bordered_adj.c;
}

std::vector<VkMat> crop_inputs(2);
crop_inputs[0] = top_blob_bordered_adj;
crop_inputs[1] = crop_param_blob;

std::vector<VkMat> crop_outputs(1);
output_crop->forward(crop_inputs, crop_outputs, cmd, opt);
top_blob = crop_outputs[0];
if (top_blob.empty())
return -100;

outw = top_blob.w;
outh = top_blob.h;


+ 2
- 0
src/layer/vulkan/deconvolution_vulkan.h View File

@@ -36,6 +36,8 @@ public:
VkMat bias_data_gpu;

ncnn::Layer* crop;
ncnn::Layer* output_pad;
ncnn::Layer* output_crop;

Pipeline* pipeline_deconvolution;



+ 244
- 27
src/layer/vulkan/deconvolutiondepthwise_vulkan.cpp View File

@@ -25,6 +25,8 @@ DeconvolutionDepthWise_vulkan::DeconvolutionDepthWise_vulkan()
support_vulkan = true;

crop = 0;
output_pad = 0;
output_crop = 0;
packing_pack1 = 0;
packing_pack4 = 0;

@@ -53,6 +55,37 @@ int DeconvolutionDepthWise_vulkan::create_pipeline(const Option& opt)
crop->create_pipeline(opt);
}

{
output_pad = ncnn::create_layer(ncnn::LayerType::Padding);
output_pad->vkdev = vkdev;

ncnn::ParamDict pd;
pd.set(0, 0);
pd.set(1, output_pad_bottom);
pd.set(2, 0);
pd.set(3, output_pad_right);
pd.set(4, 0);
pd.set(5, 0.f);

output_pad->load_param(pd);

output_pad->create_pipeline(opt);
}

{
output_crop = ncnn::create_layer(ncnn::LayerType::Crop);
output_crop->vkdev = vkdev;

ncnn::ParamDict pd;
pd.set(0, -233);
pd.set(1, -233);
pd.set(2, -233);

output_crop->load_param(pd);

output_crop->create_pipeline(opt);
}

std::vector<vk_specialization_type> specializations(11);
specializations[0].i = kernel_w;
specializations[1].i = kernel_h;
@@ -165,6 +198,20 @@ int DeconvolutionDepthWise_vulkan::destroy_pipeline(const Option& opt)
crop = 0;
}

if (output_pad)
{
output_pad->destroy_pipeline(opt);
delete output_pad;
output_pad = 0;
}

if (output_crop)
{
output_crop->destroy_pipeline(opt);
delete output_crop;
output_crop = 0;
}

if (packing_pack1)
{
packing_pack1->destroy_pipeline(opt);
@@ -495,18 +542,20 @@ int DeconvolutionDepthWise_vulkan::forward(const VkMat& bottom_blob, VkMat& top_
}

VkMat top_blob_bordered;
if (pad_left > 0 || pad_right > 0 || pad_top > 0 || pad_bottom > 0)
if (output_w == outw && output_h == outh && output_pad_right == 0 && output_pad_bottom == 0)
{
top_blob_bordered.create(outw, outh, num_output / out_elempack, out_elemsize, out_elempack, opt.blob_vkallocator, opt.staging_vkallocator);
}
else if (pad_left > 0 || pad_right > 0 || pad_top > 0 || pad_bottom > 0 || output_pad_right > 0 || output_pad_bottom > 0 || (output_w > 0 && output_h > 0))
{
top_blob_bordered.create(outw, outh, num_output / out_elempack, out_elemsize, out_elempack, opt.workspace_vkallocator, opt.staging_vkallocator);
if (top_blob_bordered.empty())
return -100;
}
else
{
top_blob_bordered.create(outw, outh, num_output / out_elempack, out_elemsize, out_elempack, opt.blob_vkallocator, opt.staging_vkallocator);
if (top_blob_bordered.empty())
return -100;
}
if (top_blob_bordered.empty())
return -100;

// depth-wise
if (channels == group / elempack && group / elempack == num_output / elempack)
@@ -534,19 +583,103 @@ int DeconvolutionDepthWise_vulkan::forward(const VkMat& bottom_blob, VkMat& top_
// record
cmd.record_pipeline(pipeline, bindings, constants, top_blob_bordered);

if (pad_left > 0 || pad_right > 0 || pad_top > 0 || pad_bottom > 0)
if (output_w == outw && output_h == outh && output_pad_right == 0 && output_pad_bottom == 0)
{
VkMat reference_blob;
reference_blob.dims = 2;
reference_blob.w = top_blob_bordered.w - pad_left - pad_right;
reference_blob.h = top_blob_bordered.h - pad_top - pad_bottom;
top_blob = top_blob_bordered;
}
else if (pad_left > 0 || pad_right > 0 || pad_top > 0 || pad_bottom > 0)
{
if (output_pad_right > 0 || output_pad_bottom > 0)
{
VkMat top_blob_unbordered;
{
ncnn::Option opt_ub = opt;
opt_ub.blob_vkallocator = opt.workspace_vkallocator;

VkMat reference_blob;
reference_blob.dims = 2;
reference_blob.w = top_blob_bordered.w - pad_left - pad_right;
reference_blob.h = top_blob_bordered.h - pad_top - pad_bottom;

std::vector<VkMat> crop_bottom_blobs(2);
crop_bottom_blobs[0] = top_blob_bordered;
crop_bottom_blobs[1] = reference_blob;
std::vector<VkMat> crop_top_blobs(1);
crop->forward(crop_bottom_blobs, crop_top_blobs, cmd, opt_ub);
top_blob_unbordered = crop_top_blobs[0];
}

std::vector<VkMat> crop_bottom_blobs(2);
crop_bottom_blobs[0] = top_blob_bordered;
crop_bottom_blobs[1] = reference_blob;
std::vector<VkMat> crop_top_blobs(1);
crop->forward(crop_bottom_blobs, crop_top_blobs, cmd, opt);
top_blob = crop_top_blobs[0];
output_pad->forward(top_blob_unbordered, top_blob, cmd, opt);
}
else
{
VkMat reference_blob;
reference_blob.dims = 2;
reference_blob.w = top_blob_bordered.w - pad_left - pad_right;
reference_blob.h = top_blob_bordered.h - pad_top - pad_bottom;

std::vector<VkMat> crop_bottom_blobs(2);
crop_bottom_blobs[0] = top_blob_bordered;
crop_bottom_blobs[1] = reference_blob;
std::vector<VkMat> crop_top_blobs(1);
crop->forward(crop_bottom_blobs, crop_top_blobs, cmd, opt);
top_blob = crop_top_blobs[0];
}
if (top_blob.empty())
return -100;

outw = top_blob.w;
outh = top_blob.h;
}
else if (output_w > 0 && output_h > 0)
{
VkMat top_blob_bordered_adj = top_blob_bordered;
if (output_pad_right > 0 || output_pad_bottom > 0)
{
ncnn::Option opt_pad = opt;
opt_pad.blob_vkallocator = opt.workspace_vkallocator;
output_pad->forward(top_blob_bordered, top_blob_bordered_adj, cmd, opt_pad);
if (top_blob_bordered_adj.empty())
return -100;
}

int wcut = top_blob_bordered_adj.w - output_w;
int hcut = top_blob_bordered_adj.h - output_h;

VkMat crop_param_blob(4, (size_t)4u, 1, opt.staging_vkallocator, opt.staging_vkallocator);
crop_param_blob.prepare_staging_buffer();
int* crop_params = crop_param_blob.mapped();

if (pad_left == -233 || pad_right == -233 || pad_top == -233 || pad_bottom == -233)
{
// onnx padding=SAME_UPPER
crop_params[0] = wcut / 2;
crop_params[1] = hcut / 2;
crop_params[2] = 0;
crop_params[3] = top_blob_bordered_adj.w - wcut;
crop_params[4] = top_blob_bordered_adj.h - hcut;
crop_params[5] = top_blob_bordered_adj.c;
}
else if (pad_left == -234 || pad_right == -234 || pad_top == -234 || pad_bottom == -234)
{
// onnx padding=SAME_LOWER
crop_params[0] = wcut - wcut / 2;
crop_params[1] = hcut - hcut / 2;
crop_params[2] = 0;
crop_params[3] = top_blob_bordered_adj.w - wcut;
crop_params[4] = top_blob_bordered_adj.h - hcut;
crop_params[5] = top_blob_bordered_adj.c;
}

std::vector<VkMat> crop_inputs(2);
crop_inputs[0] = top_blob_bordered_adj;
crop_inputs[1] = crop_param_blob;

std::vector<VkMat> crop_outputs(1);
output_crop->forward(crop_inputs, crop_outputs, cmd, opt);
top_blob = crop_outputs[0];
if (top_blob.empty())
return -100;

outw = top_blob.w;
outh = top_blob.h;
@@ -646,19 +779,103 @@ int DeconvolutionDepthWise_vulkan::forward(const VkMat& bottom_blob, VkMat& top_
top_blob_bordered = top_blob_unpacked;
}

if (pad_left > 0 || pad_right > 0 || pad_top > 0 || pad_bottom > 0)
if (output_w == outw && output_h == outh && output_pad_right == 0 && output_pad_bottom == 0)
{
top_blob = top_blob_bordered;
}
else if (pad_left > 0 || pad_right > 0 || pad_top > 0 || pad_bottom > 0)
{
if (output_pad_right > 0 || output_pad_bottom > 0)
{
VkMat top_blob_unbordered;
{
ncnn::Option opt_ub = opt;
opt_ub.blob_vkallocator = opt.workspace_vkallocator;

VkMat reference_blob;
reference_blob.dims = 2;
reference_blob.w = top_blob_bordered.w - pad_left - pad_right;
reference_blob.h = top_blob_bordered.h - pad_top - pad_bottom;

std::vector<VkMat> crop_bottom_blobs(2);
crop_bottom_blobs[0] = top_blob_bordered;
crop_bottom_blobs[1] = reference_blob;
std::vector<VkMat> crop_top_blobs(1);
crop->forward(crop_bottom_blobs, crop_top_blobs, cmd, opt_ub);
top_blob_unbordered = crop_top_blobs[0];
}

output_pad->forward(top_blob_unbordered, top_blob, cmd, opt);
}
else
{
VkMat reference_blob;
reference_blob.dims = 2;
reference_blob.w = top_blob_bordered.w - pad_left - pad_right;
reference_blob.h = top_blob_bordered.h - pad_top - pad_bottom;

std::vector<VkMat> crop_bottom_blobs(2);
crop_bottom_blobs[0] = top_blob_bordered;
crop_bottom_blobs[1] = reference_blob;
std::vector<VkMat> crop_top_blobs(1);
crop->forward(crop_bottom_blobs, crop_top_blobs, cmd, opt);
top_blob = crop_top_blobs[0];
}
if (top_blob.empty())
return -100;

outw = top_blob.w;
outh = top_blob.h;
}
else if (output_w > 0 && output_h > 0)
{
VkMat reference_blob;
reference_blob.dims = 2;
reference_blob.w = top_blob_bordered.w - pad_left - pad_right;
reference_blob.h = top_blob_bordered.h - pad_top - pad_bottom;
VkMat top_blob_bordered_adj = top_blob_bordered;
if (output_pad_right > 0 || output_pad_bottom > 0)
{
ncnn::Option opt_pad = opt;
opt_pad.blob_vkallocator = opt.workspace_vkallocator;
output_pad->forward(top_blob_bordered, top_blob_bordered_adj, cmd, opt_pad);
if (top_blob_bordered_adj.empty())
return -100;
}

int wcut = top_blob_bordered_adj.w - output_w;
int hcut = top_blob_bordered_adj.h - output_h;

VkMat crop_param_blob(4, (size_t)4u, 1, opt.staging_vkallocator, opt.staging_vkallocator);
crop_param_blob.prepare_staging_buffer();
int* crop_params = crop_param_blob.mapped();

if (pad_left == -233 || pad_right == -233 || pad_top == -233 || pad_bottom == -233)
{
// onnx padding=SAME_UPPER
crop_params[0] = wcut / 2;
crop_params[1] = hcut / 2;
crop_params[2] = 0;
crop_params[3] = top_blob_bordered_adj.w - wcut;
crop_params[4] = top_blob_bordered_adj.h - hcut;
crop_params[5] = top_blob_bordered_adj.c;
}
else if (pad_left == -234 || pad_right == -234 || pad_top == -234 || pad_bottom == -234)
{
// onnx padding=SAME_LOWER
crop_params[0] = wcut - wcut / 2;
crop_params[1] = hcut - hcut / 2;
crop_params[2] = 0;
crop_params[3] = top_blob_bordered_adj.w - wcut;
crop_params[4] = top_blob_bordered_adj.h - hcut;
crop_params[5] = top_blob_bordered_adj.c;
}

std::vector<VkMat> crop_inputs(2);
crop_inputs[0] = top_blob_bordered_adj;
crop_inputs[1] = crop_param_blob;

std::vector<VkMat> crop_bottom_blobs(2);
crop_bottom_blobs[0] = top_blob_bordered;
crop_bottom_blobs[1] = reference_blob;
std::vector<VkMat> crop_top_blobs(1);
crop->forward(crop_bottom_blobs, crop_top_blobs, cmd, opt);
top_blob = crop_top_blobs[0];
std::vector<VkMat> crop_outputs(1);
output_crop->forward(crop_inputs, crop_outputs, cmd, opt);
top_blob = crop_outputs[0];
if (top_blob.empty())
return -100;

outw = top_blob.w;
outh = top_blob.h;


+ 2
- 0
src/layer/vulkan/deconvolutiondepthwise_vulkan.h View File

@@ -36,6 +36,8 @@ public:
VkMat bias_data_gpu;

ncnn::Layer* crop;
ncnn::Layer* output_pad;
ncnn::Layer* output_crop;
ncnn::Layer* packing_pack1;
ncnn::Layer* packing_pack4;



+ 28
- 5
tools/mxnet/mxnet2ncnn.cpp View File

@@ -1761,6 +1761,8 @@ int main(int argc, char** argv)
std::vector<int> dilate = n.attr("dilate");
std::vector<int> stride = n.attr("stride");
std::vector<int> pad = n.attr("pad");
std::vector<int> adj = n.attr("adj");
std::vector<int> target_shape = n.attr("target_shape");
int no_bias = n.attr("no_bias");
int num_group = n.attr("num_group");

@@ -1789,11 +1791,32 @@ int main(int argc, char** argv)
fprintf(pp, " 13=%d", stride[0]);
}

if (pad.size() == 1) {
fprintf(pp, " 4=%d", pad[0]);
} else if (pad.size() == 2) {
fprintf(pp, " 4=%d", pad[1]);
fprintf(pp, " 14=%d", pad[0]);
if (target_shape.size() == 0)
{
if (pad.size() == 1) {
fprintf(pp, " 4=%d", pad[0]);
} else if (pad.size() == 2) {
fprintf(pp, " 4=%d", pad[1]);
fprintf(pp, " 14=%d", pad[0]);
}

if (adj.size() == 1) {
fprintf(pp, " 18=%d", adj[0]);
} else if (adj.size() == 2) {
fprintf(pp, " 18=%d", adj[1]);
fprintf(pp, " 19=%d", adj[0]);
}
}
else
{
fprintf(pp, " 4=-233");

if (target_shape.size() == 1) {
fprintf(pp, " 20=%d", target_shape[0]);
} else if (target_shape.size() == 2) {
fprintf(pp, " 20=%d", target_shape[1]);
fprintf(pp, " 21=%d", target_shape[0]);
}
}

fprintf(pp, " 5=%d", no_bias == 1 ? 0 : 1);


+ 19
- 5
tools/onnx/onnx2ncnn.cpp View File

@@ -1257,7 +1257,7 @@ int main(int argc, char** argv)
}
else if (op == "AveragePool" || op == "MaxPool")
{
std::string auto_pad = get_node_attr_s(node, "auto_pad");//TODO
std::string auto_pad = get_node_attr_s(node, "auto_pad");
std::vector<int> kernel_shape = get_node_attr_ai(node, "kernel_shape");
std::vector<int> strides = get_node_attr_ai(node, "strides");
std::vector<int> pads = get_node_attr_ai(node, "pads");
@@ -1386,7 +1386,7 @@ int main(int argc, char** argv)
int num_filter = W.dims(0);
int has_bias = node.input_size() == 3 ? 1 : 0;

std::string auto_pad = get_node_attr_s(node, "auto_pad");//TODO
std::string auto_pad = get_node_attr_s(node, "auto_pad");
std::vector<int> kernel_shape = get_node_attr_ai(node, "kernel_shape");
std::vector<int> dilations = get_node_attr_ai(node, "dilations");
std::vector<int> strides = get_node_attr_ai(node, "strides");
@@ -1466,12 +1466,12 @@ int main(int argc, char** argv)

int has_bias = node.input_size() == 3 ? 1 : 0;

std::string auto_pad = get_node_attr_s(node, "auto_pad");//TODO
std::string auto_pad = get_node_attr_s(node, "auto_pad");
std::vector<int> kernel_shape = get_node_attr_ai(node, "kernel_shape");
std::vector<int> dilations = get_node_attr_ai(node, "dilations");
std::vector<int> strides = get_node_attr_ai(node, "strides");
std::vector<int> output_padding = get_node_attr_ai(node, "output_padding");//TODO implement adj
std::vector<int> output_shape = get_node_attr_ai(node, "output_shape");//TODO
std::vector<int> output_padding = get_node_attr_ai(node, "output_padding");
std::vector<int> output_shape = get_node_attr_ai(node, "output_shape");
std::vector<int> pads = get_node_attr_ai(node, "pads");
int group = get_node_attr_i(node, "group", 1);
int num_filter = W.dims(1) * group;
@@ -1524,6 +1524,20 @@ int main(int argc, char** argv)

}

if (output_padding.size() == 1) {
fprintf(pp, " 18=%d", output_padding[0]);
} else if (output_padding.size() == 2) {
fprintf(pp, " 18=%d", output_padding[1]);
fprintf(pp, " 19=%d", output_padding[0]);
}

if (output_shape.size() == 1) {
fprintf(pp, " 20=%d", output_shape[0]);
} else if (output_shape.size() == 2) {
fprintf(pp, " 20=%d", output_shape[1]);
fprintf(pp, " 21=%d", output_shape[0]);
}

fprintf(pp, " 5=%d", has_bias);

fprintf(pp, " 6=%d", get_tensor_proto_data_size(W));


Loading…
Cancel
Save