| @@ -512,11 +512,6 @@ int Convolution_arm::forward(const Mat& bottom_blob, Mat& top_blob, const Option | |||
| Option opt_b = opt; | |||
| opt_b.blob_allocator = opt.workspace_allocator; | |||
| copy_make_border(bottom_blob, bottom_blob_bordered, pad_top, pad_bottom, pad_left, pad_right, BORDER_CONSTANT, 0.f, opt_b); | |||
| if (bottom_blob_bordered.empty()) | |||
| return -100; | |||
| w = bottom_blob_bordered.w; | |||
| h = bottom_blob_bordered.h; | |||
| } | |||
| else if (pad_left == -233 && pad_right == -233 && pad_top == -233 && pad_bottom == -233) | |||
| { | |||
| @@ -527,12 +522,7 @@ int Convolution_arm::forward(const Mat& bottom_blob, Mat& top_blob, const Option | |||
| Option opt_b = opt; | |||
| opt_b.blob_allocator = opt.workspace_allocator; | |||
| copy_make_border(bottom_blob, bottom_blob_bordered, hpad / 2, hpad - hpad / 2, wpad / 2, wpad - wpad / 2, BORDER_CONSTANT, 0.f, opt_b); | |||
| if (bottom_blob_bordered.empty()) | |||
| return -100; | |||
| } | |||
| w = bottom_blob_bordered.w; | |||
| h = bottom_blob_bordered.h; | |||
| } | |||
| else if (pad_left == -234 && pad_right == -234 && pad_top == -234 && pad_bottom == -234) | |||
| { | |||
| @@ -543,13 +533,13 @@ int Convolution_arm::forward(const Mat& bottom_blob, Mat& top_blob, const Option | |||
| Option opt_b = opt; | |||
| opt_b.blob_allocator = opt.workspace_allocator; | |||
| copy_make_border(bottom_blob, bottom_blob_bordered, hpad - hpad / 2, hpad / 2, wpad - wpad / 2, wpad / 2, BORDER_CONSTANT, 0.f, opt_b); | |||
| if (bottom_blob_bordered.empty()) | |||
| return -100; | |||
| } | |||
| w = bottom_blob_bordered.w; | |||
| h = bottom_blob_bordered.h; | |||
| } | |||
| if (bottom_blob_bordered.empty()) | |||
| return -100; | |||
| w = bottom_blob_bordered.w; | |||
| h = bottom_blob_bordered.h; | |||
| int outw = (w - kernel_extent_w) / stride_w + 1; | |||
| int outh = (h - kernel_extent_h) / stride_h + 1; | |||
| @@ -1010,11 +1000,6 @@ int Convolution_arm::forward(const Mat& bottom_blob, Mat& top_blob, const Option | |||
| Option opt_b = opt; | |||
| opt_b.blob_allocator = opt.workspace_allocator; | |||
| copy_make_border(bottom_blob_unbordered, bottom_blob_bordered, pad_top, pad_bottom, pad_left, pad_right, BORDER_CONSTANT, 0.f, opt_b); | |||
| if (bottom_blob_bordered.empty()) | |||
| return -100; | |||
| w = bottom_blob_bordered.w; | |||
| h = bottom_blob_bordered.h; | |||
| } | |||
| else if (pad_left == -233 && pad_right == -233 && pad_top == -233 && pad_bottom == -233) | |||
| { | |||
| @@ -1025,12 +1010,7 @@ int Convolution_arm::forward(const Mat& bottom_blob, Mat& top_blob, const Option | |||
| Option opt_b = opt; | |||
| opt_b.blob_allocator = opt.workspace_allocator; | |||
| copy_make_border(bottom_blob_unbordered, bottom_blob_bordered, hpad / 2, hpad - hpad / 2, wpad / 2, wpad - wpad / 2, BORDER_CONSTANT, 0.f, opt_b); | |||
| if (bottom_blob_bordered.empty()) | |||
| return -100; | |||
| } | |||
| w = bottom_blob_bordered.w; | |||
| h = bottom_blob_bordered.h; | |||
| } | |||
| else if (pad_left == -234 && pad_right == -234 && pad_top == -234 && pad_bottom == -234) | |||
| { | |||
| @@ -1041,13 +1021,13 @@ int Convolution_arm::forward(const Mat& bottom_blob, Mat& top_blob, const Option | |||
| Option opt_b = opt; | |||
| opt_b.blob_allocator = opt.workspace_allocator; | |||
| copy_make_border(bottom_blob_unbordered, bottom_blob_bordered, hpad - hpad / 2, hpad / 2, wpad - wpad / 2, wpad / 2, BORDER_CONSTANT, 0.f, opt_b); | |||
| if (bottom_blob_bordered.empty()) | |||
| return -100; | |||
| } | |||
| w = bottom_blob_bordered.w; | |||
| h = bottom_blob_bordered.h; | |||
| } | |||
| if (bottom_blob_bordered.empty()) | |||
| return -100; | |||
| w = bottom_blob_bordered.w; | |||
| h = bottom_blob_bordered.h; | |||
| int outw = (w - kernel_size) / stride + 1; | |||
| int outh = (h - kernel_size) / stride + 1; | |||
| @@ -440,11 +440,6 @@ int ConvolutionDepthWise_arm::forward(const Mat& bottom_blob, Mat& top_blob, con | |||
| Option opt_b = opt; | |||
| opt_b.blob_allocator = opt.workspace_allocator; | |||
| copy_make_border(bottom_blob_unbordered, bottom_blob_bordered, pad_top, pad_bottom, pad_left, pad_right, BORDER_CONSTANT, 0.f, opt_b); | |||
| if (bottom_blob_bordered.empty()) | |||
| return -100; | |||
| w = bottom_blob_bordered.w; | |||
| h = bottom_blob_bordered.h; | |||
| } | |||
| else if (pad_left == -233 && pad_right == -233 && pad_top == -233 && pad_bottom == -233) | |||
| { | |||
| @@ -455,12 +450,7 @@ int ConvolutionDepthWise_arm::forward(const Mat& bottom_blob, Mat& top_blob, con | |||
| Option opt_b = opt; | |||
| opt_b.blob_allocator = opt.workspace_allocator; | |||
| copy_make_border(bottom_blob_unbordered, bottom_blob_bordered, hpad / 2, hpad - hpad / 2, wpad / 2, wpad - wpad / 2, BORDER_CONSTANT, 0.f, opt_b); | |||
| if (bottom_blob_bordered.empty()) | |||
| return -100; | |||
| } | |||
| w = bottom_blob_bordered.w; | |||
| h = bottom_blob_bordered.h; | |||
| } | |||
| else if (pad_left == -234 && pad_right == -234 && pad_top == -234 && pad_bottom == -234) | |||
| { | |||
| @@ -471,13 +461,13 @@ int ConvolutionDepthWise_arm::forward(const Mat& bottom_blob, Mat& top_blob, con | |||
| Option opt_b = opt; | |||
| opt_b.blob_allocator = opt.workspace_allocator; | |||
| copy_make_border(bottom_blob_unbordered, bottom_blob_bordered, hpad - hpad / 2, hpad / 2, wpad - wpad / 2, wpad / 2, BORDER_CONSTANT, 0.f, opt_b); | |||
| if (bottom_blob_bordered.empty()) | |||
| return -100; | |||
| } | |||
| w = bottom_blob_bordered.w; | |||
| h = bottom_blob_bordered.h; | |||
| } | |||
| if (bottom_blob_bordered.empty()) | |||
| return -100; | |||
| w = bottom_blob_bordered.w; | |||
| h = bottom_blob_bordered.h; | |||
| int outw = (w - kernel_extent_w) / stride_w + 1; | |||
| int outh = (h - kernel_extent_h) / stride_h + 1; | |||
| @@ -299,19 +299,22 @@ int Deconvolution_arm::forward(const Mat& bottom_blob, Mat& top_blob, const Opti | |||
| size_t out_elemsize = elemsize / elempack * out_elempack; | |||
| Mat top_blob_bordered; | |||
| if (pad_left > 0 || pad_right > 0 || pad_top > 0 || pad_bottom > 0) | |||
| if (output_w == outw && output_h == outh && output_pad_right == 0 && output_pad_bottom == 0) | |||
| { | |||
| top_blob_bordered = top_blob; | |||
| top_blob_bordered.create(outw, outh, num_output / out_elempack, out_elemsize, out_elempack, opt.blob_allocator); | |||
| } | |||
| else if (pad_left > 0 || pad_right > 0 || pad_top > 0 || pad_bottom > 0 || output_pad_right > 0 || output_pad_bottom > 0 || (output_w > 0 && output_h > 0)) | |||
| { | |||
| top_blob_bordered.create(outw, outh, num_output / out_elempack, out_elemsize, out_elempack, opt.workspace_allocator); | |||
| if (top_blob_bordered.empty()) | |||
| return -100; | |||
| } | |||
| else | |||
| { | |||
| top_blob_bordered = top_blob; | |||
| top_blob_bordered.create(outw, outh, num_output / out_elempack, out_elemsize, out_elempack, opt.blob_allocator); | |||
| if (top_blob_bordered.empty()) | |||
| return -100; | |||
| } | |||
| if (top_blob_bordered.empty()) | |||
| return -100; | |||
| const int maxk = kernel_w * kernel_h; | |||
| @@ -629,9 +632,58 @@ int Deconvolution_arm::forward(const Mat& bottom_blob, Mat& top_blob, const Opti | |||
| } | |||
| } | |||
| if (pad_left > 0 || pad_right > 0 || pad_top > 0 || pad_bottom > 0) | |||
| if (output_w == outw && output_h == outh && output_pad_right == 0 && output_pad_bottom == 0) | |||
| { | |||
| top_blob = top_blob_bordered; | |||
| } | |||
| else if (pad_left > 0 || pad_right > 0 || pad_top > 0 || pad_bottom > 0) | |||
| { | |||
| if (output_pad_right > 0 || output_pad_bottom > 0) | |||
| { | |||
| Mat top_blob_unbordered; | |||
| Option opt_ub = opt; | |||
| opt_ub.blob_allocator = opt.workspace_allocator; | |||
| copy_cut_border(top_blob_bordered, top_blob_unbordered, pad_top, pad_bottom, pad_left, pad_right, opt_ub); | |||
| if (top_blob_unbordered.empty()) | |||
| return -100; | |||
| copy_make_border(top_blob_unbordered, top_blob, 0, output_pad_bottom, 0, output_pad_right, BORDER_CONSTANT, 0.f, opt); | |||
| } | |||
| else | |||
| { | |||
| copy_cut_border(top_blob_bordered, top_blob, pad_top, pad_bottom, pad_left, pad_right, opt); | |||
| } | |||
| if (top_blob.empty()) | |||
| return -100; | |||
| outw = top_blob.w; | |||
| outh = top_blob.h; | |||
| } | |||
| else if (output_w > 0 && output_h > 0) | |||
| { | |||
| copy_cut_border(top_blob_bordered, top_blob, pad_top, pad_bottom, pad_left, pad_right, opt); | |||
| Mat top_blob_bordered_adj = top_blob_bordered; | |||
| if (output_pad_right > 0 || output_pad_bottom > 0) | |||
| { | |||
| Option opt_b = opt; | |||
| opt_b.blob_allocator = opt.workspace_allocator; | |||
| copy_make_border(top_blob_bordered, top_blob_bordered_adj, 0, output_pad_bottom, 0, output_pad_right, BORDER_CONSTANT, 0.f, opt_b); | |||
| if (top_blob_bordered_adj.empty()) | |||
| return -100; | |||
| } | |||
| int wcut = top_blob_bordered_adj.w - output_w; | |||
| int hcut = top_blob_bordered_adj.h - output_h; | |||
| if (pad_left == -233 || pad_right == -233 || pad_top == -233 || pad_bottom == -233) | |||
| { | |||
| // onnx padding=SAME_UPPER | |||
| copy_cut_border(top_blob_bordered_adj, top_blob, hcut / 2, hcut - hcut / 2, wcut / 2, wcut - wcut / 2, opt); | |||
| } | |||
| else if (pad_left == -234 || pad_right == -234 || pad_top == -234 || pad_bottom == -234) | |||
| { | |||
| // onnx padding=SAME_LOWER | |||
| copy_cut_border(top_blob_bordered_adj, top_blob, hcut - hcut / 2, hcut / 2, wcut - wcut / 2, wcut / 2, opt); | |||
| } | |||
| if (top_blob.empty()) | |||
| return -100; | |||
| @@ -690,25 +742,77 @@ int Deconvolution_arm::forward(const Mat& bottom_blob, Mat& top_blob, const Opti | |||
| int outh = (h - 1) * stride + kernel_size; | |||
| Mat top_blob_bordered; | |||
| if (pad_left > 0 || pad_right > 0 || pad_top > 0 || pad_bottom > 0) | |||
| if (output_w == outw && output_h == outh && output_pad_right == 0 && output_pad_bottom == 0) | |||
| { | |||
| top_blob_bordered = top_blob; | |||
| top_blob_bordered.create(outw, outh, num_output, elemsize, opt.blob_allocator); | |||
| } | |||
| else if (pad_left > 0 || pad_right > 0 || pad_top > 0 || pad_bottom > 0 || output_pad_right > 0 || output_pad_bottom > 0 || (output_w > 0 && output_h > 0)) | |||
| { | |||
| top_blob_bordered.create(outw, outh, num_output, elemsize, opt.workspace_allocator); | |||
| if (top_blob_bordered.empty()) | |||
| return -100; | |||
| } | |||
| else | |||
| { | |||
| top_blob_bordered = top_blob; | |||
| top_blob_bordered.create(outw, outh, num_output, elemsize, opt.blob_allocator); | |||
| if (top_blob_bordered.empty()) | |||
| return -100; | |||
| } | |||
| if (top_blob_bordered.empty()) | |||
| return -100; | |||
| deconv(bottom_blob, top_blob_bordered, weight_data, bias_data, opt); | |||
| if (pad_left > 0 || pad_right > 0 || pad_top > 0 || pad_bottom > 0) | |||
| if (output_w == outw && output_h == outh && output_pad_right == 0 && output_pad_bottom == 0) | |||
| { | |||
| top_blob = top_blob_bordered; | |||
| } | |||
| else if (pad_left > 0 || pad_right > 0 || pad_top > 0 || pad_bottom > 0) | |||
| { | |||
| if (output_pad_right > 0 || output_pad_bottom > 0) | |||
| { | |||
| Mat top_blob_unbordered; | |||
| Option opt_ub = opt; | |||
| opt_ub.blob_allocator = opt.workspace_allocator; | |||
| copy_cut_border(top_blob_bordered, top_blob_unbordered, pad_top, pad_bottom, pad_left, pad_right, opt_ub); | |||
| if (top_blob_unbordered.empty()) | |||
| return -100; | |||
| copy_make_border(top_blob_unbordered, top_blob, 0, output_pad_bottom, 0, output_pad_right, BORDER_CONSTANT, 0.f, opt); | |||
| } | |||
| else | |||
| { | |||
| copy_cut_border(top_blob_bordered, top_blob, pad_top, pad_bottom, pad_left, pad_right, opt); | |||
| } | |||
| if (top_blob.empty()) | |||
| return -100; | |||
| outw = top_blob.w; | |||
| outh = top_blob.h; | |||
| } | |||
| else if (output_w > 0 && output_h > 0) | |||
| { | |||
| copy_cut_border(top_blob_bordered, top_blob, pad_top, pad_bottom, pad_left, pad_right, opt); | |||
| Mat top_blob_bordered_adj = top_blob_bordered; | |||
| if (output_pad_right > 0 || output_pad_bottom > 0) | |||
| { | |||
| Option opt_b = opt; | |||
| opt_b.blob_allocator = opt.workspace_allocator; | |||
| copy_make_border(top_blob_bordered, top_blob_bordered_adj, 0, output_pad_bottom, 0, output_pad_right, BORDER_CONSTANT, 0.f, opt_b); | |||
| if (top_blob_bordered_adj.empty()) | |||
| return -100; | |||
| } | |||
| int wcut = top_blob_bordered_adj.w - output_w; | |||
| int hcut = top_blob_bordered_adj.h - output_h; | |||
| if (pad_left == -233 || pad_right == -233 || pad_top == -233 || pad_bottom == -233) | |||
| { | |||
| // onnx padding=SAME_UPPER | |||
| copy_cut_border(top_blob_bordered_adj, top_blob, hcut / 2, hcut - hcut / 2, wcut / 2, wcut - wcut / 2, opt); | |||
| } | |||
| else if (pad_left == -234 || pad_right == -234 || pad_top == -234 || pad_bottom == -234) | |||
| { | |||
| // onnx padding=SAME_LOWER | |||
| copy_cut_border(top_blob_bordered_adj, top_blob, hcut - hcut / 2, hcut / 2, wcut - wcut / 2, wcut / 2, opt); | |||
| } | |||
| if (top_blob.empty()) | |||
| return -100; | |||
| @@ -397,19 +397,22 @@ int DeconvolutionDepthWise_arm::forward(const Mat& bottom_blob, Mat& top_blob, c | |||
| { | |||
| Mat top_blob_bordered; | |||
| if (pad_left > 0 || pad_right > 0 || pad_top > 0 || pad_bottom > 0) | |||
| if (output_w == outw && output_h == outh && output_pad_right == 0 && output_pad_bottom == 0) | |||
| { | |||
| top_blob_bordered = top_blob; | |||
| top_blob_bordered.create(outw, outh, num_output / out_elempack, out_elemsize, out_elempack, opt.blob_allocator); | |||
| } | |||
| else if (pad_left > 0 || pad_right > 0 || pad_top > 0 || pad_bottom > 0 || output_pad_right > 0 || output_pad_bottom > 0 || (output_w > 0 && output_h > 0)) | |||
| { | |||
| top_blob_bordered.create(outw, outh, num_output / out_elempack, out_elemsize, out_elempack, opt.workspace_allocator); | |||
| if (top_blob_bordered.empty()) | |||
| return -100; | |||
| } | |||
| else | |||
| { | |||
| top_blob_bordered = top_blob; | |||
| top_blob_bordered.create(outw, outh, num_output / out_elempack, out_elemsize, out_elempack, opt.blob_allocator); | |||
| if (top_blob_bordered.empty()) | |||
| return -100; | |||
| } | |||
| if (top_blob_bordered.empty()) | |||
| return -100; | |||
| const int maxk = kernel_w * kernel_h; | |||
| @@ -507,9 +510,58 @@ int DeconvolutionDepthWise_arm::forward(const Mat& bottom_blob, Mat& top_blob, c | |||
| } | |||
| } | |||
| if (pad_left > 0 || pad_right > 0 || pad_top > 0 || pad_bottom > 0) | |||
| if (output_w == outw && output_h == outh && output_pad_right == 0 && output_pad_bottom == 0) | |||
| { | |||
| copy_cut_border(top_blob_bordered, top_blob, pad_top, pad_bottom, pad_left, pad_right, opt); | |||
| top_blob = top_blob_bordered; | |||
| } | |||
| else if (pad_left > 0 || pad_right > 0 || pad_top > 0 || pad_bottom > 0) | |||
| { | |||
| if (output_pad_right > 0 || output_pad_bottom > 0) | |||
| { | |||
| Mat top_blob_unbordered; | |||
| Option opt_ub = opt; | |||
| opt_ub.blob_allocator = opt.workspace_allocator; | |||
| copy_cut_border(top_blob_bordered, top_blob_unbordered, pad_top, pad_bottom, pad_left, pad_right, opt_ub); | |||
| if (top_blob_unbordered.empty()) | |||
| return -100; | |||
| copy_make_border(top_blob_unbordered, top_blob, 0, output_pad_bottom, 0, output_pad_right, BORDER_CONSTANT, 0.f, opt); | |||
| } | |||
| else | |||
| { | |||
| copy_cut_border(top_blob_bordered, top_blob, pad_top, pad_bottom, pad_left, pad_right, opt); | |||
| } | |||
| if (top_blob.empty()) | |||
| return -100; | |||
| outw = top_blob.w; | |||
| outh = top_blob.h; | |||
| } | |||
| else if (output_w > 0 && output_h > 0) | |||
| { | |||
| Mat top_blob_bordered_adj = top_blob_bordered; | |||
| if (output_pad_right > 0 || output_pad_bottom > 0) | |||
| { | |||
| Option opt_b = opt; | |||
| opt_b.blob_allocator = opt.workspace_allocator; | |||
| copy_make_border(top_blob_bordered, top_blob_bordered_adj, 0, output_pad_bottom, 0, output_pad_right, BORDER_CONSTANT, 0.f, opt_b); | |||
| if (top_blob_bordered_adj.empty()) | |||
| return -100; | |||
| } | |||
| int wcut = top_blob_bordered_adj.w - output_w; | |||
| int hcut = top_blob_bordered_adj.h - output_h; | |||
| if (pad_left == -233 || pad_right == -233 || pad_top == -233 || pad_bottom == -233) | |||
| { | |||
| // onnx padding=SAME_UPPER | |||
| copy_cut_border(top_blob_bordered_adj, top_blob, hcut / 2, hcut - hcut / 2, wcut / 2, wcut - wcut / 2, opt); | |||
| } | |||
| else if (pad_left == -234 || pad_right == -234 || pad_top == -234 || pad_bottom == -234) | |||
| { | |||
| // onnx padding=SAME_LOWER | |||
| copy_cut_border(top_blob_bordered_adj, top_blob, hcut - hcut / 2, hcut / 2, wcut - wcut / 2, wcut / 2, opt); | |||
| } | |||
| if (top_blob.empty()) | |||
| return -100; | |||
| @@ -888,9 +940,58 @@ int DeconvolutionDepthWise_arm::forward(const Mat& bottom_blob, Mat& top_blob, c | |||
| top_blob_bordered = top_blob_bordered_unpacked; | |||
| } | |||
| if (pad_left > 0 || pad_right > 0 || pad_top > 0 || pad_bottom > 0) | |||
| if (output_w == outw && output_h == outh && output_pad_right == 0 && output_pad_bottom == 0) | |||
| { | |||
| copy_cut_border(top_blob_bordered, top_blob, pad_top, pad_bottom, pad_left, pad_right, opt); | |||
| top_blob = top_blob_bordered; | |||
| } | |||
| else if (pad_left > 0 || pad_right > 0 || pad_top > 0 || pad_bottom > 0) | |||
| { | |||
| if (output_pad_right > 0 || output_pad_bottom > 0) | |||
| { | |||
| Mat top_blob_unbordered; | |||
| Option opt_ub = opt; | |||
| opt_ub.blob_allocator = opt.workspace_allocator; | |||
| copy_cut_border(top_blob_bordered, top_blob_unbordered, pad_top, pad_bottom, pad_left, pad_right, opt_ub); | |||
| if (top_blob_unbordered.empty()) | |||
| return -100; | |||
| copy_make_border(top_blob_unbordered, top_blob, 0, output_pad_bottom, 0, output_pad_right, BORDER_CONSTANT, 0.f, opt); | |||
| } | |||
| else | |||
| { | |||
| copy_cut_border(top_blob_bordered, top_blob, pad_top, pad_bottom, pad_left, pad_right, opt); | |||
| } | |||
| if (top_blob.empty()) | |||
| return -100; | |||
| outw = top_blob.w; | |||
| outh = top_blob.h; | |||
| } | |||
| else if (output_w > 0 && output_h > 0) | |||
| { | |||
| Mat top_blob_bordered_adj = top_blob_bordered; | |||
| if (output_pad_right > 0 || output_pad_bottom > 0) | |||
| { | |||
| Option opt_b = opt; | |||
| opt_b.blob_allocator = opt.workspace_allocator; | |||
| copy_make_border(top_blob_bordered, top_blob_bordered_adj, 0, output_pad_bottom, 0, output_pad_right, BORDER_CONSTANT, 0.f, opt_b); | |||
| if (top_blob_bordered_adj.empty()) | |||
| return -100; | |||
| } | |||
| int wcut = top_blob_bordered_adj.w - output_w; | |||
| int hcut = top_blob_bordered_adj.h - output_h; | |||
| if (pad_left == -233 || pad_right == -233 || pad_top == -233 || pad_bottom == -233) | |||
| { | |||
| // onnx padding=SAME_UPPER | |||
| copy_cut_border(top_blob_bordered_adj, top_blob, hcut / 2, hcut - hcut / 2, wcut / 2, wcut - wcut / 2, opt); | |||
| } | |||
| else if (pad_left == -234 || pad_right == -234 || pad_top == -234 || pad_bottom == -234) | |||
| { | |||
| // onnx padding=SAME_LOWER | |||
| copy_cut_border(top_blob_bordered_adj, top_blob, hcut - hcut / 2, hcut / 2, wcut - wcut / 2, wcut / 2, opt); | |||
| } | |||
| if (top_blob.empty()) | |||
| return -100; | |||
| @@ -908,19 +1009,22 @@ int DeconvolutionDepthWise_arm::forward(const Mat& bottom_blob, Mat& top_blob, c | |||
| #endif // __ARM_NEON | |||
| Mat top_blob_bordered; | |||
| if (pad_left > 0 || pad_right > 0 || pad_top > 0 || pad_bottom > 0) | |||
| if (output_w == outw && output_h == outh && output_pad_right == 0 && output_pad_bottom == 0) | |||
| { | |||
| top_blob_bordered = top_blob; | |||
| top_blob_bordered.create(outw, outh, num_output, elemsize, opt.blob_allocator); | |||
| } | |||
| else if (pad_left > 0 || pad_right > 0 || pad_top > 0 || pad_bottom > 0 || output_pad_right > 0 || output_pad_bottom > 0 || (output_w > 0 && output_h > 0)) | |||
| { | |||
| top_blob_bordered.create(outw, outh, num_output, elemsize, opt.workspace_allocator); | |||
| if (top_blob_bordered.empty()) | |||
| return -100; | |||
| } | |||
| else | |||
| { | |||
| top_blob_bordered = top_blob; | |||
| top_blob_bordered.create(outw, outh, num_output, elemsize, opt.blob_allocator); | |||
| if (top_blob_bordered.empty()) | |||
| return -100; | |||
| } | |||
| if (top_blob_bordered.empty()) | |||
| return -100; | |||
| // depth-wise | |||
| if (channels == group && group == num_output) | |||
| @@ -961,9 +1065,58 @@ int DeconvolutionDepthWise_arm::forward(const Mat& bottom_blob, Mat& top_blob, c | |||
| } | |||
| } | |||
| if (pad_left > 0 || pad_right > 0 || pad_top > 0 || pad_bottom > 0) | |||
| if (output_w == outw && output_h == outh && output_pad_right == 0 && output_pad_bottom == 0) | |||
| { | |||
| top_blob = top_blob_bordered; | |||
| } | |||
| else if (pad_left > 0 || pad_right > 0 || pad_top > 0 || pad_bottom > 0) | |||
| { | |||
| if (output_pad_right > 0 || output_pad_bottom > 0) | |||
| { | |||
| Mat top_blob_unbordered; | |||
| Option opt_ub = opt; | |||
| opt_ub.blob_allocator = opt.workspace_allocator; | |||
| copy_cut_border(top_blob_bordered, top_blob_unbordered, pad_top, pad_bottom, pad_left, pad_right, opt_ub); | |||
| if (top_blob_unbordered.empty()) | |||
| return -100; | |||
| copy_make_border(top_blob_unbordered, top_blob, 0, output_pad_bottom, 0, output_pad_right, BORDER_CONSTANT, 0.f, opt); | |||
| } | |||
| else | |||
| { | |||
| copy_cut_border(top_blob_bordered, top_blob, pad_top, pad_bottom, pad_left, pad_right, opt); | |||
| } | |||
| if (top_blob.empty()) | |||
| return -100; | |||
| outw = top_blob.w; | |||
| outh = top_blob.h; | |||
| } | |||
| else if (output_w > 0 && output_h > 0) | |||
| { | |||
| copy_cut_border(top_blob_bordered, top_blob, pad_top, pad_bottom, pad_left, pad_right, opt); | |||
| Mat top_blob_bordered_adj = top_blob_bordered; | |||
| if (output_pad_right > 0 || output_pad_bottom > 0) | |||
| { | |||
| Option opt_b = opt; | |||
| opt_b.blob_allocator = opt.workspace_allocator; | |||
| copy_make_border(top_blob_bordered, top_blob_bordered_adj, 0, output_pad_bottom, 0, output_pad_right, BORDER_CONSTANT, 0.f, opt_b); | |||
| if (top_blob_bordered_adj.empty()) | |||
| return -100; | |||
| } | |||
| int wcut = top_blob_bordered_adj.w - output_w; | |||
| int hcut = top_blob_bordered_adj.h - output_h; | |||
| if (pad_left == -233 || pad_right == -233 || pad_top == -233 || pad_bottom == -233) | |||
| { | |||
| // onnx padding=SAME_UPPER | |||
| copy_cut_border(top_blob_bordered_adj, top_blob, hcut / 2, hcut - hcut / 2, wcut / 2, wcut - wcut / 2, opt); | |||
| } | |||
| else if (pad_left == -234 || pad_right == -234 || pad_top == -234 || pad_bottom == -234) | |||
| { | |||
| // onnx padding=SAME_LOWER | |||
| copy_cut_border(top_blob_bordered_adj, top_blob, hcut - hcut / 2, hcut / 2, wcut - wcut / 2, wcut / 2, opt); | |||
| } | |||
| if (top_blob.empty()) | |||
| return -100; | |||
| @@ -336,11 +336,6 @@ int Convolution::forward(const Mat& bottom_blob, Mat& top_blob, const Option& op | |||
| Option opt_b = opt; | |||
| opt_b.blob_allocator = opt.workspace_allocator; | |||
| copy_make_border(bottom_blob_unbordered, bottom_blob_bordered, pad_top, pad_bottom, pad_left, pad_right, BORDER_CONSTANT, 0.f, opt_b); | |||
| if (bottom_blob_bordered.empty()) | |||
| return -100; | |||
| w = bottom_blob_bordered.w; | |||
| h = bottom_blob_bordered.h; | |||
| } | |||
| else if (pad_left == -233 && pad_right == -233 && pad_top == -233 && pad_bottom == -233) | |||
| { | |||
| @@ -352,12 +347,7 @@ int Convolution::forward(const Mat& bottom_blob, Mat& top_blob, const Option& op | |||
| Option opt_b = opt; | |||
| opt_b.blob_allocator = opt.workspace_allocator; | |||
| copy_make_border(bottom_blob_unbordered, bottom_blob_bordered, hpad / 2, hpad - hpad / 2, wpad / 2, wpad - wpad / 2, BORDER_CONSTANT, 0.f, opt_b); | |||
| if (bottom_blob_bordered.empty()) | |||
| return -100; | |||
| } | |||
| w = bottom_blob_bordered.w; | |||
| h = bottom_blob_bordered.h; | |||
| } | |||
| else if (pad_left == -234 && pad_right == -234 && pad_top == -234 && pad_bottom == -234) | |||
| { | |||
| @@ -369,13 +359,13 @@ int Convolution::forward(const Mat& bottom_blob, Mat& top_blob, const Option& op | |||
| Option opt_b = opt; | |||
| opt_b.blob_allocator = opt.workspace_allocator; | |||
| copy_make_border(bottom_blob_unbordered, bottom_blob_bordered, hpad - hpad / 2, hpad / 2, wpad - wpad / 2, wpad / 2, BORDER_CONSTANT, 0.f, opt_b); | |||
| if (bottom_blob_bordered.empty()) | |||
| return -100; | |||
| } | |||
| w = bottom_blob_bordered.w; | |||
| h = bottom_blob_bordered.h; | |||
| } | |||
| if (bottom_blob_bordered.empty()) | |||
| return -100; | |||
| w = bottom_blob_bordered.w; | |||
| h = bottom_blob_bordered.h; | |||
| int outw = (w - kernel_extent_w) / stride_w + 1; | |||
| int outh = (h - kernel_extent_h) / stride_h + 1; | |||
| @@ -329,11 +329,6 @@ int ConvolutionDepthWise::forward(const Mat& bottom_blob, Mat& top_blob, const O | |||
| Option opt_b = opt; | |||
| opt_b.blob_allocator = opt.workspace_allocator; | |||
| copy_make_border(bottom_blob_unbordered, bottom_blob_bordered, pad_top, pad_bottom, pad_left, pad_right, BORDER_CONSTANT, 0.f, opt_b); | |||
| if (bottom_blob_bordered.empty()) | |||
| return -100; | |||
| w = bottom_blob_bordered.w; | |||
| h = bottom_blob_bordered.h; | |||
| } | |||
| else if (pad_left == -233 && pad_right == -233 && pad_top == -233 && pad_bottom == -233) | |||
| { | |||
| @@ -345,12 +340,7 @@ int ConvolutionDepthWise::forward(const Mat& bottom_blob, Mat& top_blob, const O | |||
| Option opt_b = opt; | |||
| opt_b.blob_allocator = opt.workspace_allocator; | |||
| copy_make_border(bottom_blob_unbordered, bottom_blob_bordered, hpad / 2, hpad - hpad / 2, wpad / 2, wpad - wpad / 2, BORDER_CONSTANT, 0.f, opt_b); | |||
| if (bottom_blob_bordered.empty()) | |||
| return -100; | |||
| } | |||
| w = bottom_blob_bordered.w; | |||
| h = bottom_blob_bordered.h; | |||
| } | |||
| else if (pad_left == -234 && pad_right == -234 && pad_top == -234 && pad_bottom == -234) | |||
| { | |||
| @@ -362,13 +352,13 @@ int ConvolutionDepthWise::forward(const Mat& bottom_blob, Mat& top_blob, const O | |||
| Option opt_b = opt; | |||
| opt_b.blob_allocator = opt.workspace_allocator; | |||
| copy_make_border(bottom_blob_unbordered, bottom_blob_bordered, hpad - hpad / 2, hpad / 2, wpad - wpad / 2, wpad / 2, BORDER_CONSTANT, 0.f, opt_b); | |||
| if (bottom_blob_bordered.empty()) | |||
| return -100; | |||
| } | |||
| w = bottom_blob_bordered.w; | |||
| h = bottom_blob_bordered.h; | |||
| } | |||
| if (bottom_blob_bordered.empty()) | |||
| return -100; | |||
| w = bottom_blob_bordered.w; | |||
| h = bottom_blob_bordered.h; | |||
| int outw = (w - kernel_extent_w) / stride_w + 1; | |||
| int outh = (h - kernel_extent_h) / stride_h + 1; | |||
| @@ -39,6 +39,10 @@ int Deconvolution::load_param(const ParamDict& pd) | |||
| pad_right = pd.get(15, pad_left); | |||
| pad_top = pd.get(14, pad_left); | |||
| pad_bottom = pd.get(16, pad_top); | |||
| output_pad_right = pd.get(18, 0); | |||
| output_pad_bottom = pd.get(19, output_pad_right); | |||
| output_w = pd.get(20, 0); | |||
| output_h = pd.get(21, output_w); | |||
| bias_term = pd.get(5, 0); | |||
| weight_data_size = pd.get(6, 0); | |||
| activation_type = pd.get(9, 0); | |||
| @@ -82,19 +86,22 @@ int Deconvolution::forward(const Mat& bottom_blob, Mat& top_blob, const Option& | |||
| int outh = (h - 1) * stride_h + kernel_extent_h; | |||
| Mat top_blob_bordered; | |||
| if (pad_left > 0 || pad_right > 0 || pad_top > 0 || pad_bottom > 0) | |||
| if (output_w == outw && output_h == outh && output_pad_right == 0 && output_pad_bottom == 0) | |||
| { | |||
| top_blob_bordered = top_blob; | |||
| top_blob_bordered.create(outw, outh, num_output, elemsize, opt.blob_allocator); | |||
| } | |||
| else if (pad_left > 0 || pad_right > 0 || pad_top > 0 || pad_bottom > 0 || output_pad_right > 0 || output_pad_bottom > 0 || (output_w > 0 && output_h > 0)) | |||
| { | |||
| top_blob_bordered.create(outw, outh, num_output, elemsize, opt.workspace_allocator); | |||
| if (top_blob_bordered.empty()) | |||
| return -100; | |||
| } | |||
| else | |||
| { | |||
| top_blob_bordered = top_blob; | |||
| top_blob_bordered.create(outw, outh, num_output, elemsize, opt.blob_allocator); | |||
| if (top_blob_bordered.empty()) | |||
| return -100; | |||
| } | |||
| if (top_blob_bordered.empty()) | |||
| return -100; | |||
| const int maxk = kernel_w * kernel_h; | |||
| @@ -200,9 +207,58 @@ int Deconvolution::forward(const Mat& bottom_blob, Mat& top_blob, const Option& | |||
| } | |||
| } | |||
| if (pad_left > 0 || pad_right > 0 || pad_top > 0 || pad_bottom > 0) | |||
| if (output_w == outw && output_h == outh && output_pad_right == 0 && output_pad_bottom == 0) | |||
| { | |||
| copy_cut_border(top_blob_bordered, top_blob, pad_top, pad_bottom, pad_left, pad_right, opt); | |||
| top_blob = top_blob_bordered; | |||
| } | |||
| else if (pad_left > 0 || pad_right > 0 || pad_top > 0 || pad_bottom > 0) | |||
| { | |||
| if (output_pad_right > 0 || output_pad_bottom > 0) | |||
| { | |||
| Mat top_blob_unbordered; | |||
| Option opt_ub = opt; | |||
| opt_ub.blob_allocator = opt.workspace_allocator; | |||
| copy_cut_border(top_blob_bordered, top_blob_unbordered, pad_top, pad_bottom, pad_left, pad_right, opt_ub); | |||
| if (top_blob_unbordered.empty()) | |||
| return -100; | |||
| copy_make_border(top_blob_unbordered, top_blob, 0, output_pad_bottom, 0, output_pad_right, BORDER_CONSTANT, 0.f, opt); | |||
| } | |||
| else | |||
| { | |||
| copy_cut_border(top_blob_bordered, top_blob, pad_top, pad_bottom, pad_left, pad_right, opt); | |||
| } | |||
| if (top_blob.empty()) | |||
| return -100; | |||
| outw = top_blob.w; | |||
| outh = top_blob.h; | |||
| } | |||
| else if (output_w > 0 && output_h > 0) | |||
| { | |||
| Mat top_blob_bordered_adj = top_blob_bordered; | |||
| if (output_pad_right > 0 || output_pad_bottom > 0) | |||
| { | |||
| Option opt_b = opt; | |||
| opt_b.blob_allocator = opt.workspace_allocator; | |||
| copy_make_border(top_blob_bordered, top_blob_bordered_adj, 0, output_pad_bottom, 0, output_pad_right, BORDER_CONSTANT, 0.f, opt_b); | |||
| if (top_blob_bordered_adj.empty()) | |||
| return -100; | |||
| } | |||
| int wcut = top_blob_bordered_adj.w - output_w; | |||
| int hcut = top_blob_bordered_adj.h - output_h; | |||
| if (pad_left == -233 || pad_right == -233 || pad_top == -233 || pad_bottom == -233) | |||
| { | |||
| // onnx padding=SAME_UPPER | |||
| copy_cut_border(top_blob_bordered_adj, top_blob, hcut / 2, hcut - hcut / 2, wcut / 2, wcut - wcut / 2, opt); | |||
| } | |||
| else if (pad_left == -234 || pad_right == -234 || pad_top == -234 || pad_bottom == -234) | |||
| { | |||
| // onnx padding=SAME_LOWER | |||
| copy_cut_border(top_blob_bordered_adj, top_blob, hcut - hcut / 2, hcut / 2, wcut - wcut / 2, wcut / 2, opt); | |||
| } | |||
| if (top_blob.empty()) | |||
| return -100; | |||
| @@ -43,6 +43,10 @@ public: | |||
| int pad_right; | |||
| int pad_top; | |||
| int pad_bottom; | |||
| int output_pad_right; | |||
| int output_pad_bottom; | |||
| int output_w; | |||
| int output_h; | |||
| int bias_term; | |||
| int weight_data_size; | |||
| @@ -39,6 +39,10 @@ int DeconvolutionDepthWise::load_param(const ParamDict& pd) | |||
| pad_right = pd.get(15, pad_left); | |||
| pad_top = pd.get(14, pad_left); | |||
| pad_bottom = pd.get(16, pad_top); | |||
| output_pad_right = pd.get(18, 0); | |||
| output_pad_bottom = pd.get(19, output_pad_right); | |||
| output_w = pd.get(20, 0); | |||
| output_h = pd.get(21, output_w); | |||
| bias_term = pd.get(5, 0); | |||
| weight_data_size = pd.get(6, 0); | |||
| group = pd.get(7, 1); | |||
| @@ -87,19 +91,22 @@ int DeconvolutionDepthWise::forward(const Mat& bottom_blob, Mat& top_blob, const | |||
| int outh = (h - 1) * stride_h + kernel_extent_h; | |||
| Mat top_blob_bordered; | |||
| if (pad_left > 0 || pad_right > 0 || pad_top > 0 || pad_bottom > 0) | |||
| if (output_w == outw && output_h == outh && output_pad_right == 0 && output_pad_bottom == 0) | |||
| { | |||
| top_blob_bordered = top_blob; | |||
| top_blob_bordered.create(outw, outh, num_output, elemsize, opt.blob_allocator); | |||
| } | |||
| else if (pad_left > 0 || pad_right > 0 || pad_top > 0 || pad_bottom > 0 || output_pad_right > 0 || output_pad_bottom > 0 || (output_w > 0 && output_h > 0)) | |||
| { | |||
| top_blob_bordered.create(outw, outh, num_output, elemsize, opt.workspace_allocator); | |||
| if (top_blob_bordered.empty()) | |||
| return -100; | |||
| } | |||
| else | |||
| { | |||
| top_blob_bordered = top_blob; | |||
| top_blob_bordered.create(outw, outh, num_output, elemsize, opt.blob_allocator); | |||
| if (top_blob_bordered.empty()) | |||
| return -100; | |||
| } | |||
| if (top_blob_bordered.empty()) | |||
| return -100; | |||
| const int maxk = kernel_w * kernel_h; | |||
| @@ -296,9 +303,58 @@ int DeconvolutionDepthWise::forward(const Mat& bottom_blob, Mat& top_blob, const | |||
| } | |||
| } | |||
| if (pad_left > 0 || pad_right > 0 || pad_top > 0 || pad_bottom > 0) | |||
| if (output_w == outw && output_h == outh && output_pad_right == 0 && output_pad_bottom == 0) | |||
| { | |||
| copy_cut_border(top_blob_bordered, top_blob, pad_top, pad_bottom, pad_left, pad_right, opt); | |||
| top_blob = top_blob_bordered; | |||
| } | |||
| else if (pad_left > 0 || pad_right > 0 || pad_top > 0 || pad_bottom > 0) | |||
| { | |||
| if (output_pad_right > 0 || output_pad_bottom > 0) | |||
| { | |||
| Mat top_blob_unbordered; | |||
| Option opt_ub = opt; | |||
| opt_ub.blob_allocator = opt.workspace_allocator; | |||
| copy_cut_border(top_blob_bordered, top_blob_unbordered, pad_top, pad_bottom, pad_left, pad_right, opt_ub); | |||
| if (top_blob_unbordered.empty()) | |||
| return -100; | |||
| copy_make_border(top_blob_unbordered, top_blob, 0, output_pad_bottom, 0, output_pad_right, BORDER_CONSTANT, 0.f, opt); | |||
| } | |||
| else | |||
| { | |||
| copy_cut_border(top_blob_bordered, top_blob, pad_top, pad_bottom, pad_left, pad_right, opt); | |||
| } | |||
| if (top_blob.empty()) | |||
| return -100; | |||
| outw = top_blob.w; | |||
| outh = top_blob.h; | |||
| } | |||
| else if (output_w > 0 && output_h > 0) | |||
| { | |||
| Mat top_blob_bordered_adj = top_blob_bordered; | |||
| if (output_pad_right > 0 || output_pad_bottom > 0) | |||
| { | |||
| Option opt_b = opt; | |||
| opt_b.blob_allocator = opt.workspace_allocator; | |||
| copy_make_border(top_blob_bordered, top_blob_bordered_adj, 0, output_pad_bottom, 0, output_pad_right, BORDER_CONSTANT, 0.f, opt_b); | |||
| if (top_blob_bordered_adj.empty()) | |||
| return -100; | |||
| } | |||
| int wcut = top_blob_bordered_adj.w - output_w; | |||
| int hcut = top_blob_bordered_adj.h - output_h; | |||
| if (pad_left == -233 || pad_right == -233 || pad_top == -233 || pad_bottom == -233) | |||
| { | |||
| // onnx padding=SAME_UPPER | |||
| copy_cut_border(top_blob_bordered_adj, top_blob, hcut / 2, hcut - hcut / 2, wcut / 2, wcut - wcut / 2, opt); | |||
| } | |||
| else if (pad_left == -234 || pad_right == -234 || pad_top == -234 || pad_bottom == -234) | |||
| { | |||
| // onnx padding=SAME_LOWER | |||
| copy_cut_border(top_blob_bordered_adj, top_blob, hcut - hcut / 2, hcut / 2, wcut - wcut / 2, wcut / 2, opt); | |||
| } | |||
| if (top_blob.empty()) | |||
| return -100; | |||
| @@ -43,6 +43,10 @@ public: | |||
| int pad_right; | |||
| int pad_top; | |||
| int pad_bottom; | |||
| int output_pad_right; | |||
| int output_pad_bottom; | |||
| int output_w; | |||
| int output_h; | |||
| int bias_term; | |||
| int weight_data_size; | |||
| @@ -868,9 +868,6 @@ int Convolution_vulkan::forward(const VkMat& bottom_blob, VkMat& top_blob, VkCom | |||
| opt_pad.blob_vkallocator = opt.workspace_vkallocator; | |||
| padding->forward(bottom_blob, bottom_blob_bordered, cmd, opt_pad); | |||
| w = bottom_blob_bordered.w; | |||
| h = bottom_blob_bordered.h; | |||
| } | |||
| else if (pad_left == -233 && pad_right == -233 && pad_top == -233 && pad_bottom == -233) | |||
| { | |||
| @@ -898,9 +895,6 @@ int Convolution_vulkan::forward(const VkMat& bottom_blob, VkMat& top_blob, VkCom | |||
| padding->forward(padding_inputs, padding_outputs, cmd, opt_pad); | |||
| bottom_blob_bordered = padding_outputs[0]; | |||
| } | |||
| w = bottom_blob_bordered.w; | |||
| h = bottom_blob_bordered.h; | |||
| } | |||
| else if (pad_left == -234 && pad_right == -234 && pad_top == -234 && pad_bottom == -234) | |||
| { | |||
| @@ -928,11 +922,11 @@ int Convolution_vulkan::forward(const VkMat& bottom_blob, VkMat& top_blob, VkCom | |||
| padding->forward(padding_inputs, padding_outputs, cmd, opt_pad); | |||
| bottom_blob_bordered = padding_outputs[0]; | |||
| } | |||
| w = bottom_blob_bordered.w; | |||
| h = bottom_blob_bordered.h; | |||
| } | |||
| w = bottom_blob_bordered.w; | |||
| h = bottom_blob_bordered.h; | |||
| int outw = (w - kernel_extent_w) / stride_w + 1; | |||
| int outh = (h - kernel_extent_h) / stride_h + 1; | |||
| int out_elempack = num_output % 4 == 0 ? 4 : 1; | |||
| @@ -473,9 +473,6 @@ int ConvolutionDepthWise_vulkan::forward(const VkMat& bottom_blob, VkMat& top_bl | |||
| opt_pad.blob_vkallocator = opt.workspace_vkallocator; | |||
| padding->forward(bottom_blob, bottom_blob_bordered, cmd, opt_pad); | |||
| w = bottom_blob_bordered.w; | |||
| h = bottom_blob_bordered.h; | |||
| } | |||
| else if (pad_left == -233 && pad_right == -233 && pad_top == -233 && pad_bottom == -233) | |||
| { | |||
| @@ -503,9 +500,6 @@ int ConvolutionDepthWise_vulkan::forward(const VkMat& bottom_blob, VkMat& top_bl | |||
| padding->forward(padding_inputs, padding_outputs, cmd, opt_pad); | |||
| bottom_blob_bordered = padding_outputs[0]; | |||
| } | |||
| w = bottom_blob_bordered.w; | |||
| h = bottom_blob_bordered.h; | |||
| } | |||
| else if (pad_left == -234 && pad_right == -234 && pad_top == -234 && pad_bottom == -234) | |||
| { | |||
| @@ -533,11 +527,11 @@ int ConvolutionDepthWise_vulkan::forward(const VkMat& bottom_blob, VkMat& top_bl | |||
| padding->forward(padding_inputs, padding_outputs, cmd, opt_pad); | |||
| bottom_blob_bordered = padding_outputs[0]; | |||
| } | |||
| w = bottom_blob_bordered.w; | |||
| h = bottom_blob_bordered.h; | |||
| } | |||
| w = bottom_blob_bordered.w; | |||
| h = bottom_blob_bordered.h; | |||
| int outw = (w - kernel_extent_w) / stride_w + 1; | |||
| int outh = (h - kernel_extent_h) / stride_h + 1; | |||
| int out_elempack = num_output % 4 == 0 ? 4 : 1; | |||
| @@ -25,6 +25,9 @@ Deconvolution_vulkan::Deconvolution_vulkan() | |||
| support_vulkan = true; | |||
| crop = 0; | |||
| output_pad = 0; | |||
| output_crop = 0; | |||
| pipeline_deconvolution = 0; | |||
| pipeline_deconvolution_pack4 = 0; | |||
| pipeline_deconvolution_pack1to4 = 0; | |||
| @@ -47,6 +50,37 @@ int Deconvolution_vulkan::create_pipeline(const Option& opt) | |||
| crop->create_pipeline(opt); | |||
| } | |||
| { | |||
| output_pad = ncnn::create_layer(ncnn::LayerType::Padding); | |||
| output_pad->vkdev = vkdev; | |||
| ncnn::ParamDict pd; | |||
| pd.set(0, 0); | |||
| pd.set(1, output_pad_bottom); | |||
| pd.set(2, 0); | |||
| pd.set(3, output_pad_right); | |||
| pd.set(4, 0); | |||
| pd.set(5, 0.f); | |||
| output_pad->load_param(pd); | |||
| output_pad->create_pipeline(opt); | |||
| } | |||
| { | |||
| output_crop = ncnn::create_layer(ncnn::LayerType::Crop); | |||
| output_crop->vkdev = vkdev; | |||
| ncnn::ParamDict pd; | |||
| pd.set(0, -233); | |||
| pd.set(1, -233); | |||
| pd.set(2, -233); | |||
| output_crop->load_param(pd); | |||
| output_crop->create_pipeline(opt); | |||
| } | |||
| const int maxk = kernel_w * kernel_h; | |||
| int num_input = weight_data_size / maxk / num_output; | |||
| @@ -106,6 +140,20 @@ int Deconvolution_vulkan::destroy_pipeline(const Option& opt) | |||
| crop = 0; | |||
| } | |||
| if (output_pad) | |||
| { | |||
| output_pad->destroy_pipeline(opt); | |||
| delete output_pad; | |||
| output_pad = 0; | |||
| } | |||
| if (output_crop) | |||
| { | |||
| output_crop->destroy_pipeline(opt); | |||
| delete output_crop; | |||
| output_crop = 0; | |||
| } | |||
| delete pipeline_deconvolution; | |||
| pipeline_deconvolution = 0; | |||
| @@ -351,18 +399,20 @@ int Deconvolution_vulkan::forward(const VkMat& bottom_blob, VkMat& top_blob, VkC | |||
| } | |||
| VkMat top_blob_bordered; | |||
| if (pad_left > 0 || pad_right > 0 || pad_top > 0 || pad_bottom > 0) | |||
| if (output_w == outw && output_h == outh && output_pad_right == 0 && output_pad_bottom == 0) | |||
| { | |||
| top_blob_bordered.create(outw, outh, num_output / out_elempack, out_elemsize, out_elempack, opt.blob_vkallocator, opt.staging_vkallocator); | |||
| } | |||
| else if (pad_left > 0 || pad_right > 0 || pad_top > 0 || pad_bottom > 0 || output_pad_right > 0 || output_pad_bottom > 0 || (output_w > 0 && output_h > 0)) | |||
| { | |||
| top_blob_bordered.create(outw, outh, num_output / out_elempack, out_elemsize, out_elempack, opt.workspace_vkallocator, opt.staging_vkallocator); | |||
| if (top_blob_bordered.empty()) | |||
| return -100; | |||
| } | |||
| else | |||
| { | |||
| top_blob_bordered.create(outw, outh, num_output / out_elempack, out_elemsize, out_elempack, opt.blob_vkallocator, opt.staging_vkallocator); | |||
| if (top_blob_bordered.empty()) | |||
| return -100; | |||
| } | |||
| if (top_blob_bordered.empty()) | |||
| return -100; | |||
| std::vector<VkMat> bindings(4); | |||
| bindings[0] = bottom_blob; | |||
| @@ -420,19 +470,103 @@ int Deconvolution_vulkan::forward(const VkMat& bottom_blob, VkMat& top_blob, VkC | |||
| cmd.record_pipeline(pipeline, bindings, constants, top_blob_bordered); | |||
| if (pad_left > 0 || pad_right > 0 || pad_top > 0 || pad_bottom > 0) | |||
| if (output_w == outw && output_h == outh && output_pad_right == 0 && output_pad_bottom == 0) | |||
| { | |||
| VkMat reference_blob; | |||
| reference_blob.dims = 2; | |||
| reference_blob.w = top_blob_bordered.w - pad_left - pad_right; | |||
| reference_blob.h = top_blob_bordered.h - pad_top - pad_bottom; | |||
| top_blob = top_blob_bordered; | |||
| } | |||
| else if (pad_left > 0 || pad_right > 0 || pad_top > 0 || pad_bottom > 0) | |||
| { | |||
| if (output_pad_right > 0 || output_pad_bottom > 0) | |||
| { | |||
| VkMat top_blob_unbordered; | |||
| { | |||
| ncnn::Option opt_ub = opt; | |||
| opt_ub.blob_vkallocator = opt.workspace_vkallocator; | |||
| VkMat reference_blob; | |||
| reference_blob.dims = 2; | |||
| reference_blob.w = top_blob_bordered.w - pad_left - pad_right; | |||
| reference_blob.h = top_blob_bordered.h - pad_top - pad_bottom; | |||
| std::vector<VkMat> crop_bottom_blobs(2); | |||
| crop_bottom_blobs[0] = top_blob_bordered; | |||
| crop_bottom_blobs[1] = reference_blob; | |||
| std::vector<VkMat> crop_top_blobs(1); | |||
| crop->forward(crop_bottom_blobs, crop_top_blobs, cmd, opt_ub); | |||
| top_blob_unbordered = crop_top_blobs[0]; | |||
| } | |||
| output_pad->forward(top_blob_unbordered, top_blob, cmd, opt); | |||
| } | |||
| else | |||
| { | |||
| VkMat reference_blob; | |||
| reference_blob.dims = 2; | |||
| reference_blob.w = top_blob_bordered.w - pad_left - pad_right; | |||
| reference_blob.h = top_blob_bordered.h - pad_top - pad_bottom; | |||
| std::vector<VkMat> crop_bottom_blobs(2); | |||
| crop_bottom_blobs[0] = top_blob_bordered; | |||
| crop_bottom_blobs[1] = reference_blob; | |||
| std::vector<VkMat> crop_top_blobs(1); | |||
| crop->forward(crop_bottom_blobs, crop_top_blobs, cmd, opt); | |||
| top_blob = crop_top_blobs[0]; | |||
| } | |||
| if (top_blob.empty()) | |||
| return -100; | |||
| std::vector<VkMat> crop_bottom_blobs(2); | |||
| crop_bottom_blobs[0] = top_blob_bordered; | |||
| crop_bottom_blobs[1] = reference_blob; | |||
| std::vector<VkMat> crop_top_blobs(1); | |||
| crop->forward(crop_bottom_blobs, crop_top_blobs, cmd, opt); | |||
| top_blob = crop_top_blobs[0]; | |||
| outw = top_blob.w; | |||
| outh = top_blob.h; | |||
| } | |||
| else if (output_w > 0 && output_h > 0) | |||
| { | |||
| VkMat top_blob_bordered_adj = top_blob_bordered; | |||
| if (output_pad_right > 0 || output_pad_bottom > 0) | |||
| { | |||
| ncnn::Option opt_pad = opt; | |||
| opt_pad.blob_vkallocator = opt.workspace_vkallocator; | |||
| output_pad->forward(top_blob_bordered, top_blob_bordered_adj, cmd, opt_pad); | |||
| if (top_blob_bordered_adj.empty()) | |||
| return -100; | |||
| } | |||
| int wcut = top_blob_bordered_adj.w - output_w; | |||
| int hcut = top_blob_bordered_adj.h - output_h; | |||
| VkMat crop_param_blob(4, (size_t)4u, 1, opt.staging_vkallocator, opt.staging_vkallocator); | |||
| crop_param_blob.prepare_staging_buffer(); | |||
| int* crop_params = crop_param_blob.mapped(); | |||
| if (pad_left == -233 || pad_right == -233 || pad_top == -233 || pad_bottom == -233) | |||
| { | |||
| // onnx padding=SAME_UPPER | |||
| crop_params[0] = wcut / 2; | |||
| crop_params[1] = hcut / 2; | |||
| crop_params[2] = 0; | |||
| crop_params[3] = top_blob_bordered_adj.w - wcut; | |||
| crop_params[4] = top_blob_bordered_adj.h - hcut; | |||
| crop_params[5] = top_blob_bordered_adj.c; | |||
| } | |||
| else if (pad_left == -234 || pad_right == -234 || pad_top == -234 || pad_bottom == -234) | |||
| { | |||
| // onnx padding=SAME_LOWER | |||
| crop_params[0] = wcut - wcut / 2; | |||
| crop_params[1] = hcut - hcut / 2; | |||
| crop_params[2] = 0; | |||
| crop_params[3] = top_blob_bordered_adj.w - wcut; | |||
| crop_params[4] = top_blob_bordered_adj.h - hcut; | |||
| crop_params[5] = top_blob_bordered_adj.c; | |||
| } | |||
| std::vector<VkMat> crop_inputs(2); | |||
| crop_inputs[0] = top_blob_bordered_adj; | |||
| crop_inputs[1] = crop_param_blob; | |||
| std::vector<VkMat> crop_outputs(1); | |||
| output_crop->forward(crop_inputs, crop_outputs, cmd, opt); | |||
| top_blob = crop_outputs[0]; | |||
| if (top_blob.empty()) | |||
| return -100; | |||
| outw = top_blob.w; | |||
| outh = top_blob.h; | |||
| @@ -36,6 +36,8 @@ public: | |||
| VkMat bias_data_gpu; | |||
| ncnn::Layer* crop; | |||
| ncnn::Layer* output_pad; | |||
| ncnn::Layer* output_crop; | |||
| Pipeline* pipeline_deconvolution; | |||
| @@ -25,6 +25,8 @@ DeconvolutionDepthWise_vulkan::DeconvolutionDepthWise_vulkan() | |||
| support_vulkan = true; | |||
| crop = 0; | |||
| output_pad = 0; | |||
| output_crop = 0; | |||
| packing_pack1 = 0; | |||
| packing_pack4 = 0; | |||
| @@ -53,6 +55,37 @@ int DeconvolutionDepthWise_vulkan::create_pipeline(const Option& opt) | |||
| crop->create_pipeline(opt); | |||
| } | |||
| { | |||
| output_pad = ncnn::create_layer(ncnn::LayerType::Padding); | |||
| output_pad->vkdev = vkdev; | |||
| ncnn::ParamDict pd; | |||
| pd.set(0, 0); | |||
| pd.set(1, output_pad_bottom); | |||
| pd.set(2, 0); | |||
| pd.set(3, output_pad_right); | |||
| pd.set(4, 0); | |||
| pd.set(5, 0.f); | |||
| output_pad->load_param(pd); | |||
| output_pad->create_pipeline(opt); | |||
| } | |||
| { | |||
| output_crop = ncnn::create_layer(ncnn::LayerType::Crop); | |||
| output_crop->vkdev = vkdev; | |||
| ncnn::ParamDict pd; | |||
| pd.set(0, -233); | |||
| pd.set(1, -233); | |||
| pd.set(2, -233); | |||
| output_crop->load_param(pd); | |||
| output_crop->create_pipeline(opt); | |||
| } | |||
| std::vector<vk_specialization_type> specializations(11); | |||
| specializations[0].i = kernel_w; | |||
| specializations[1].i = kernel_h; | |||
| @@ -165,6 +198,20 @@ int DeconvolutionDepthWise_vulkan::destroy_pipeline(const Option& opt) | |||
| crop = 0; | |||
| } | |||
| if (output_pad) | |||
| { | |||
| output_pad->destroy_pipeline(opt); | |||
| delete output_pad; | |||
| output_pad = 0; | |||
| } | |||
| if (output_crop) | |||
| { | |||
| output_crop->destroy_pipeline(opt); | |||
| delete output_crop; | |||
| output_crop = 0; | |||
| } | |||
| if (packing_pack1) | |||
| { | |||
| packing_pack1->destroy_pipeline(opt); | |||
| @@ -495,18 +542,20 @@ int DeconvolutionDepthWise_vulkan::forward(const VkMat& bottom_blob, VkMat& top_ | |||
| } | |||
| VkMat top_blob_bordered; | |||
| if (pad_left > 0 || pad_right > 0 || pad_top > 0 || pad_bottom > 0) | |||
| if (output_w == outw && output_h == outh && output_pad_right == 0 && output_pad_bottom == 0) | |||
| { | |||
| top_blob_bordered.create(outw, outh, num_output / out_elempack, out_elemsize, out_elempack, opt.blob_vkallocator, opt.staging_vkallocator); | |||
| } | |||
| else if (pad_left > 0 || pad_right > 0 || pad_top > 0 || pad_bottom > 0 || output_pad_right > 0 || output_pad_bottom > 0 || (output_w > 0 && output_h > 0)) | |||
| { | |||
| top_blob_bordered.create(outw, outh, num_output / out_elempack, out_elemsize, out_elempack, opt.workspace_vkallocator, opt.staging_vkallocator); | |||
| if (top_blob_bordered.empty()) | |||
| return -100; | |||
| } | |||
| else | |||
| { | |||
| top_blob_bordered.create(outw, outh, num_output / out_elempack, out_elemsize, out_elempack, opt.blob_vkallocator, opt.staging_vkallocator); | |||
| if (top_blob_bordered.empty()) | |||
| return -100; | |||
| } | |||
| if (top_blob_bordered.empty()) | |||
| return -100; | |||
| // depth-wise | |||
| if (channels == group / elempack && group / elempack == num_output / elempack) | |||
| @@ -534,19 +583,103 @@ int DeconvolutionDepthWise_vulkan::forward(const VkMat& bottom_blob, VkMat& top_ | |||
| // record | |||
| cmd.record_pipeline(pipeline, bindings, constants, top_blob_bordered); | |||
| if (pad_left > 0 || pad_right > 0 || pad_top > 0 || pad_bottom > 0) | |||
| if (output_w == outw && output_h == outh && output_pad_right == 0 && output_pad_bottom == 0) | |||
| { | |||
| VkMat reference_blob; | |||
| reference_blob.dims = 2; | |||
| reference_blob.w = top_blob_bordered.w - pad_left - pad_right; | |||
| reference_blob.h = top_blob_bordered.h - pad_top - pad_bottom; | |||
| top_blob = top_blob_bordered; | |||
| } | |||
| else if (pad_left > 0 || pad_right > 0 || pad_top > 0 || pad_bottom > 0) | |||
| { | |||
| if (output_pad_right > 0 || output_pad_bottom > 0) | |||
| { | |||
| VkMat top_blob_unbordered; | |||
| { | |||
| ncnn::Option opt_ub = opt; | |||
| opt_ub.blob_vkallocator = opt.workspace_vkallocator; | |||
| VkMat reference_blob; | |||
| reference_blob.dims = 2; | |||
| reference_blob.w = top_blob_bordered.w - pad_left - pad_right; | |||
| reference_blob.h = top_blob_bordered.h - pad_top - pad_bottom; | |||
| std::vector<VkMat> crop_bottom_blobs(2); | |||
| crop_bottom_blobs[0] = top_blob_bordered; | |||
| crop_bottom_blobs[1] = reference_blob; | |||
| std::vector<VkMat> crop_top_blobs(1); | |||
| crop->forward(crop_bottom_blobs, crop_top_blobs, cmd, opt_ub); | |||
| top_blob_unbordered = crop_top_blobs[0]; | |||
| } | |||
| std::vector<VkMat> crop_bottom_blobs(2); | |||
| crop_bottom_blobs[0] = top_blob_bordered; | |||
| crop_bottom_blobs[1] = reference_blob; | |||
| std::vector<VkMat> crop_top_blobs(1); | |||
| crop->forward(crop_bottom_blobs, crop_top_blobs, cmd, opt); | |||
| top_blob = crop_top_blobs[0]; | |||
| output_pad->forward(top_blob_unbordered, top_blob, cmd, opt); | |||
| } | |||
| else | |||
| { | |||
| VkMat reference_blob; | |||
| reference_blob.dims = 2; | |||
| reference_blob.w = top_blob_bordered.w - pad_left - pad_right; | |||
| reference_blob.h = top_blob_bordered.h - pad_top - pad_bottom; | |||
| std::vector<VkMat> crop_bottom_blobs(2); | |||
| crop_bottom_blobs[0] = top_blob_bordered; | |||
| crop_bottom_blobs[1] = reference_blob; | |||
| std::vector<VkMat> crop_top_blobs(1); | |||
| crop->forward(crop_bottom_blobs, crop_top_blobs, cmd, opt); | |||
| top_blob = crop_top_blobs[0]; | |||
| } | |||
| if (top_blob.empty()) | |||
| return -100; | |||
| outw = top_blob.w; | |||
| outh = top_blob.h; | |||
| } | |||
| else if (output_w > 0 && output_h > 0) | |||
| { | |||
| VkMat top_blob_bordered_adj = top_blob_bordered; | |||
| if (output_pad_right > 0 || output_pad_bottom > 0) | |||
| { | |||
| ncnn::Option opt_pad = opt; | |||
| opt_pad.blob_vkallocator = opt.workspace_vkallocator; | |||
| output_pad->forward(top_blob_bordered, top_blob_bordered_adj, cmd, opt_pad); | |||
| if (top_blob_bordered_adj.empty()) | |||
| return -100; | |||
| } | |||
| int wcut = top_blob_bordered_adj.w - output_w; | |||
| int hcut = top_blob_bordered_adj.h - output_h; | |||
| VkMat crop_param_blob(4, (size_t)4u, 1, opt.staging_vkallocator, opt.staging_vkallocator); | |||
| crop_param_blob.prepare_staging_buffer(); | |||
| int* crop_params = crop_param_blob.mapped(); | |||
| if (pad_left == -233 || pad_right == -233 || pad_top == -233 || pad_bottom == -233) | |||
| { | |||
| // onnx padding=SAME_UPPER | |||
| crop_params[0] = wcut / 2; | |||
| crop_params[1] = hcut / 2; | |||
| crop_params[2] = 0; | |||
| crop_params[3] = top_blob_bordered_adj.w - wcut; | |||
| crop_params[4] = top_blob_bordered_adj.h - hcut; | |||
| crop_params[5] = top_blob_bordered_adj.c; | |||
| } | |||
| else if (pad_left == -234 || pad_right == -234 || pad_top == -234 || pad_bottom == -234) | |||
| { | |||
| // onnx padding=SAME_LOWER | |||
| crop_params[0] = wcut - wcut / 2; | |||
| crop_params[1] = hcut - hcut / 2; | |||
| crop_params[2] = 0; | |||
| crop_params[3] = top_blob_bordered_adj.w - wcut; | |||
| crop_params[4] = top_blob_bordered_adj.h - hcut; | |||
| crop_params[5] = top_blob_bordered_adj.c; | |||
| } | |||
| std::vector<VkMat> crop_inputs(2); | |||
| crop_inputs[0] = top_blob_bordered_adj; | |||
| crop_inputs[1] = crop_param_blob; | |||
| std::vector<VkMat> crop_outputs(1); | |||
| output_crop->forward(crop_inputs, crop_outputs, cmd, opt); | |||
| top_blob = crop_outputs[0]; | |||
| if (top_blob.empty()) | |||
| return -100; | |||
| outw = top_blob.w; | |||
| outh = top_blob.h; | |||
| @@ -646,19 +779,103 @@ int DeconvolutionDepthWise_vulkan::forward(const VkMat& bottom_blob, VkMat& top_ | |||
| top_blob_bordered = top_blob_unpacked; | |||
| } | |||
| if (pad_left > 0 || pad_right > 0 || pad_top > 0 || pad_bottom > 0) | |||
| if (output_w == outw && output_h == outh && output_pad_right == 0 && output_pad_bottom == 0) | |||
| { | |||
| top_blob = top_blob_bordered; | |||
| } | |||
| else if (pad_left > 0 || pad_right > 0 || pad_top > 0 || pad_bottom > 0) | |||
| { | |||
| if (output_pad_right > 0 || output_pad_bottom > 0) | |||
| { | |||
| VkMat top_blob_unbordered; | |||
| { | |||
| ncnn::Option opt_ub = opt; | |||
| opt_ub.blob_vkallocator = opt.workspace_vkallocator; | |||
| VkMat reference_blob; | |||
| reference_blob.dims = 2; | |||
| reference_blob.w = top_blob_bordered.w - pad_left - pad_right; | |||
| reference_blob.h = top_blob_bordered.h - pad_top - pad_bottom; | |||
| std::vector<VkMat> crop_bottom_blobs(2); | |||
| crop_bottom_blobs[0] = top_blob_bordered; | |||
| crop_bottom_blobs[1] = reference_blob; | |||
| std::vector<VkMat> crop_top_blobs(1); | |||
| crop->forward(crop_bottom_blobs, crop_top_blobs, cmd, opt_ub); | |||
| top_blob_unbordered = crop_top_blobs[0]; | |||
| } | |||
| output_pad->forward(top_blob_unbordered, top_blob, cmd, opt); | |||
| } | |||
| else | |||
| { | |||
| VkMat reference_blob; | |||
| reference_blob.dims = 2; | |||
| reference_blob.w = top_blob_bordered.w - pad_left - pad_right; | |||
| reference_blob.h = top_blob_bordered.h - pad_top - pad_bottom; | |||
| std::vector<VkMat> crop_bottom_blobs(2); | |||
| crop_bottom_blobs[0] = top_blob_bordered; | |||
| crop_bottom_blobs[1] = reference_blob; | |||
| std::vector<VkMat> crop_top_blobs(1); | |||
| crop->forward(crop_bottom_blobs, crop_top_blobs, cmd, opt); | |||
| top_blob = crop_top_blobs[0]; | |||
| } | |||
| if (top_blob.empty()) | |||
| return -100; | |||
| outw = top_blob.w; | |||
| outh = top_blob.h; | |||
| } | |||
| else if (output_w > 0 && output_h > 0) | |||
| { | |||
| VkMat reference_blob; | |||
| reference_blob.dims = 2; | |||
| reference_blob.w = top_blob_bordered.w - pad_left - pad_right; | |||
| reference_blob.h = top_blob_bordered.h - pad_top - pad_bottom; | |||
| VkMat top_blob_bordered_adj = top_blob_bordered; | |||
| if (output_pad_right > 0 || output_pad_bottom > 0) | |||
| { | |||
| ncnn::Option opt_pad = opt; | |||
| opt_pad.blob_vkallocator = opt.workspace_vkallocator; | |||
| output_pad->forward(top_blob_bordered, top_blob_bordered_adj, cmd, opt_pad); | |||
| if (top_blob_bordered_adj.empty()) | |||
| return -100; | |||
| } | |||
| int wcut = top_blob_bordered_adj.w - output_w; | |||
| int hcut = top_blob_bordered_adj.h - output_h; | |||
| VkMat crop_param_blob(4, (size_t)4u, 1, opt.staging_vkallocator, opt.staging_vkallocator); | |||
| crop_param_blob.prepare_staging_buffer(); | |||
| int* crop_params = crop_param_blob.mapped(); | |||
| if (pad_left == -233 || pad_right == -233 || pad_top == -233 || pad_bottom == -233) | |||
| { | |||
| // onnx padding=SAME_UPPER | |||
| crop_params[0] = wcut / 2; | |||
| crop_params[1] = hcut / 2; | |||
| crop_params[2] = 0; | |||
| crop_params[3] = top_blob_bordered_adj.w - wcut; | |||
| crop_params[4] = top_blob_bordered_adj.h - hcut; | |||
| crop_params[5] = top_blob_bordered_adj.c; | |||
| } | |||
| else if (pad_left == -234 || pad_right == -234 || pad_top == -234 || pad_bottom == -234) | |||
| { | |||
| // onnx padding=SAME_LOWER | |||
| crop_params[0] = wcut - wcut / 2; | |||
| crop_params[1] = hcut - hcut / 2; | |||
| crop_params[2] = 0; | |||
| crop_params[3] = top_blob_bordered_adj.w - wcut; | |||
| crop_params[4] = top_blob_bordered_adj.h - hcut; | |||
| crop_params[5] = top_blob_bordered_adj.c; | |||
| } | |||
| std::vector<VkMat> crop_inputs(2); | |||
| crop_inputs[0] = top_blob_bordered_adj; | |||
| crop_inputs[1] = crop_param_blob; | |||
| std::vector<VkMat> crop_bottom_blobs(2); | |||
| crop_bottom_blobs[0] = top_blob_bordered; | |||
| crop_bottom_blobs[1] = reference_blob; | |||
| std::vector<VkMat> crop_top_blobs(1); | |||
| crop->forward(crop_bottom_blobs, crop_top_blobs, cmd, opt); | |||
| top_blob = crop_top_blobs[0]; | |||
| std::vector<VkMat> crop_outputs(1); | |||
| output_crop->forward(crop_inputs, crop_outputs, cmd, opt); | |||
| top_blob = crop_outputs[0]; | |||
| if (top_blob.empty()) | |||
| return -100; | |||
| outw = top_blob.w; | |||
| outh = top_blob.h; | |||
| @@ -36,6 +36,8 @@ public: | |||
| VkMat bias_data_gpu; | |||
| ncnn::Layer* crop; | |||
| ncnn::Layer* output_pad; | |||
| ncnn::Layer* output_crop; | |||
| ncnn::Layer* packing_pack1; | |||
| ncnn::Layer* packing_pack4; | |||
| @@ -1761,6 +1761,8 @@ int main(int argc, char** argv) | |||
| std::vector<int> dilate = n.attr("dilate"); | |||
| std::vector<int> stride = n.attr("stride"); | |||
| std::vector<int> pad = n.attr("pad"); | |||
| std::vector<int> adj = n.attr("adj"); | |||
| std::vector<int> target_shape = n.attr("target_shape"); | |||
| int no_bias = n.attr("no_bias"); | |||
| int num_group = n.attr("num_group"); | |||
| @@ -1789,11 +1791,32 @@ int main(int argc, char** argv) | |||
| fprintf(pp, " 13=%d", stride[0]); | |||
| } | |||
| if (pad.size() == 1) { | |||
| fprintf(pp, " 4=%d", pad[0]); | |||
| } else if (pad.size() == 2) { | |||
| fprintf(pp, " 4=%d", pad[1]); | |||
| fprintf(pp, " 14=%d", pad[0]); | |||
| if (target_shape.size() == 0) | |||
| { | |||
| if (pad.size() == 1) { | |||
| fprintf(pp, " 4=%d", pad[0]); | |||
| } else if (pad.size() == 2) { | |||
| fprintf(pp, " 4=%d", pad[1]); | |||
| fprintf(pp, " 14=%d", pad[0]); | |||
| } | |||
| if (adj.size() == 1) { | |||
| fprintf(pp, " 18=%d", adj[0]); | |||
| } else if (adj.size() == 2) { | |||
| fprintf(pp, " 18=%d", adj[1]); | |||
| fprintf(pp, " 19=%d", adj[0]); | |||
| } | |||
| } | |||
| else | |||
| { | |||
| fprintf(pp, " 4=-233"); | |||
| if (target_shape.size() == 1) { | |||
| fprintf(pp, " 20=%d", target_shape[0]); | |||
| } else if (target_shape.size() == 2) { | |||
| fprintf(pp, " 20=%d", target_shape[1]); | |||
| fprintf(pp, " 21=%d", target_shape[0]); | |||
| } | |||
| } | |||
| fprintf(pp, " 5=%d", no_bias == 1 ? 0 : 1); | |||
| @@ -1257,7 +1257,7 @@ int main(int argc, char** argv) | |||
| } | |||
| else if (op == "AveragePool" || op == "MaxPool") | |||
| { | |||
| std::string auto_pad = get_node_attr_s(node, "auto_pad");//TODO | |||
| std::string auto_pad = get_node_attr_s(node, "auto_pad"); | |||
| std::vector<int> kernel_shape = get_node_attr_ai(node, "kernel_shape"); | |||
| std::vector<int> strides = get_node_attr_ai(node, "strides"); | |||
| std::vector<int> pads = get_node_attr_ai(node, "pads"); | |||
| @@ -1386,7 +1386,7 @@ int main(int argc, char** argv) | |||
| int num_filter = W.dims(0); | |||
| int has_bias = node.input_size() == 3 ? 1 : 0; | |||
| std::string auto_pad = get_node_attr_s(node, "auto_pad");//TODO | |||
| std::string auto_pad = get_node_attr_s(node, "auto_pad"); | |||
| std::vector<int> kernel_shape = get_node_attr_ai(node, "kernel_shape"); | |||
| std::vector<int> dilations = get_node_attr_ai(node, "dilations"); | |||
| std::vector<int> strides = get_node_attr_ai(node, "strides"); | |||
| @@ -1466,12 +1466,12 @@ int main(int argc, char** argv) | |||
| int has_bias = node.input_size() == 3 ? 1 : 0; | |||
| std::string auto_pad = get_node_attr_s(node, "auto_pad");//TODO | |||
| std::string auto_pad = get_node_attr_s(node, "auto_pad"); | |||
| std::vector<int> kernel_shape = get_node_attr_ai(node, "kernel_shape"); | |||
| std::vector<int> dilations = get_node_attr_ai(node, "dilations"); | |||
| std::vector<int> strides = get_node_attr_ai(node, "strides"); | |||
| std::vector<int> output_padding = get_node_attr_ai(node, "output_padding");//TODO implement adj | |||
| std::vector<int> output_shape = get_node_attr_ai(node, "output_shape");//TODO | |||
| std::vector<int> output_padding = get_node_attr_ai(node, "output_padding"); | |||
| std::vector<int> output_shape = get_node_attr_ai(node, "output_shape"); | |||
| std::vector<int> pads = get_node_attr_ai(node, "pads"); | |||
| int group = get_node_attr_i(node, "group", 1); | |||
| int num_filter = W.dims(1) * group; | |||
| @@ -1524,6 +1524,20 @@ int main(int argc, char** argv) | |||
| } | |||
| if (output_padding.size() == 1) { | |||
| fprintf(pp, " 18=%d", output_padding[0]); | |||
| } else if (output_padding.size() == 2) { | |||
| fprintf(pp, " 18=%d", output_padding[1]); | |||
| fprintf(pp, " 19=%d", output_padding[0]); | |||
| } | |||
| if (output_shape.size() == 1) { | |||
| fprintf(pp, " 20=%d", output_shape[0]); | |||
| } else if (output_shape.size() == 2) { | |||
| fprintf(pp, " 20=%d", output_shape[1]); | |||
| fprintf(pp, " 21=%d", output_shape[0]); | |||
| } | |||
| fprintf(pp, " 5=%d", has_bias); | |||
| fprintf(pp, " 6=%d", get_tensor_proto_data_size(W)); | |||