|
|
|
@@ -10923,7 +10923,14 @@ static void conv3x3s1_winograd64_neon5(const Mat& bottom_blob, Mat& top_blob, co |
|
|
|
|
|
|
|
// BEGIN transform output |
|
|
|
Mat top_blob_bordered; |
|
|
|
top_blob_bordered.create(outw, outh, outch, 4u, opt.workspace_allocator); |
|
|
|
if (outw == top_blob.w && outh == top_blob.h) |
|
|
|
{ |
|
|
|
top_blob_bordered = top_blob; |
|
|
|
} |
|
|
|
else |
|
|
|
{ |
|
|
|
top_blob_bordered.create(outw, outh, outch, 4u, opt.workspace_allocator); |
|
|
|
} |
|
|
|
{ |
|
|
|
// const float otm[6][8] = { |
|
|
|
// {1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 32.0f, 32.0f, 0.0f}, |
|
|
|
@@ -11699,7 +11706,8 @@ static void conv3x3s1_winograd64_neon5(const Mat& bottom_blob, Mat& top_blob, co |
|
|
|
// END transform output |
|
|
|
|
|
|
|
// cut result pad |
|
|
|
copy_cut_border(top_blob_bordered, top_blob, 0, top_blob_bordered.h - top_blob.h, 0, top_blob_bordered.w - top_blob.w, opt.blob_allocator, opt.num_threads); |
|
|
|
if (top_blob_bordered.w != top_blob.w || top_blob_bordered.h != top_blob.h) |
|
|
|
copy_cut_border(top_blob_bordered, top_blob, 0, top_blob_bordered.h - top_blob.h, 0, top_blob_bordered.w - top_blob.w, opt.blob_allocator, opt.num_threads); |
|
|
|
} |
|
|
|
|
|
|
|
static void conv3x3s2_neon(const Mat& bottom_blob, Mat& top_blob, const Mat& _kernel, const Mat& _bias, const Option& opt) |
|
|
|
|