switch to winograd4

8 years ago · 03c1f63c2e
--- a/src/layer/arm/convolution_arm.cpp
+++ b/src/layer/arm/convolution_arm.cpp
@@ -14,8 +14,6 @@

 #include "convolution_arm.h"

 #include "cpu.h"

 namespace ncnn {

 #include "convolution_1x1.h"
@@ -170,30 +168,9 @@ int Convolution_arm::forward(const Mat& bottom_blob, Mat& top_blob) const
    if (top_blob.empty())
        return -100;

    if (use_winograd3x3 && w <= 80 && h <= 80)
    if (use_winograd3x3 && w <= 120 && h <= 120)
    {
        int num_threads = get_omp_num_threads();
        if (num_threads == 1 || (channels >= 64 && num_output >= 64))
        {
 #if __aarch64__
            // always faster than the default
            conv3x3s1_winograd64_neon2(bottom_blob_bordered, top_blob, weight_3x3_winograd64_data, bias_data);
 #else
            if (w <= 50 && h <= 50)
            {
                // another path for small image
                conv3x3s1_winograd64_neon2(bottom_blob_bordered, top_blob, weight_3x3_winograd64_data, bias_data);
            }
            else
            {
                conv3x3s1_winograd64_neon(bottom_blob_bordered, top_blob, weight_3x3_winograd64_data, bias_data);
            }
 #endif // __aarch64__
        }
        else
        {
            conv(bottom_blob_bordered, top_blob, weight_data, bias_data);
        }
        conv3x3s1_winograd64_neon4(bottom_blob_bordered, top_blob, weight_3x3_winograd64_data, bias_data);
    }
    else
        conv(bottom_blob_bordered, top_blob, weight_data, bias_data);