Browse Source

neon optimize for depthwise convolution 5x5 :P

tags/20181228
nihuini 7 years ago
parent
commit
8fda293f91
2 changed files with 2288 additions and 0 deletions
  1. +2267
    -0
      src/layer/arm/convolutiondepthwise_5x5.h
  2. +21
    -0
      src/layer/arm/convolutiondepthwise_arm.cpp

+ 2267
- 0
src/layer/arm/convolutiondepthwise_5x5.h
File diff suppressed because it is too large
View File


+ 21
- 0
src/layer/arm/convolutiondepthwise_arm.cpp View File

@@ -23,6 +23,7 @@
namespace ncnn {

#include "convolutiondepthwise_3x3.h"
#include "convolutiondepthwise_5x5.h"

#include "convolutiondepthwise_3x3_int8.h"

@@ -65,6 +66,13 @@ int ConvolutionDepthWise_arm::load_model(const ModelBin& mb)
return 0;
}
}
if (kernel_w == 5 && kernel_h == 5 && dilation_w == 1 && dilation_h == 1)
{
if ((stride_w == 1 && stride_h == 1) || (stride_w == 2 && stride_h == 2))
{
return 0;
}
}
}

const int channels_g = channels / group;
@@ -243,6 +251,19 @@ int ConvolutionDepthWise_arm::forward(const Mat& bottom_blob, Mat& top_blob, con
return 0;
}
}
if (kernel_w == 5 && kernel_h == 5 && dilation_w == 1 && dilation_h == 1)
{
if (stride_w == 1 && stride_h == 1)
{
convdw5x5s1_neon(bottom_blob_bordered, top_blob, weight_data, bias_data, opt);
return 0;
}
else if (stride_w == 2 && stride_h == 2)
{
convdw5x5s2_neon(bottom_blob_bordered, top_blob, weight_data, bias_data, opt);
return 0;
}
}
}

#pragma omp parallel for num_threads(opt.num_threads)


Loading…
Cancel
Save