Browse Source

conv1x1s1 bf16s neon kernel

tags/20200413
nihui 6 years ago
parent
commit
90e6be457b
2 changed files with 2342 additions and 1 deletions
  1. +2322
    -0
      src/layer/arm/convolution_1x1_bf16s.h
  2. +20
    -1
      src/layer/arm/convolution_arm.cpp

+ 2322
- 0
src/layer/arm/convolution_1x1_bf16s.h
File diff suppressed because it is too large
View File


+ 20
- 1
src/layer/arm/convolution_arm.cpp View File

@@ -37,6 +37,8 @@ namespace ncnn {
#include "convolution_1x1_int8.h"
#include "convolution_3x3_int8.h"

#include "convolution_1x1_bf16s.h"

#if __ARM_NEON
#include "convolution_1x1_pack4.h"
#include "convolution_1x1_pack4to1.h"
@@ -1192,7 +1194,14 @@ int Convolution_arm::create_pipeline_bf16s(const Option& opt)
// pack1
if (elempack == 1 && out_elempack == 1)
{
ncnn::cast_float32_to_bfloat16(weight_data, weight_data_bf16, opt);
if (kernel_w == 1 && kernel_h == 1 && dilation_w == 1 && dilation_h == 1 && stride_w == 1 && stride_h == 1)
{
conv1x1s1_sgemm_transform_kernel_bf16s_neon(weight_data, weight_data_bf16, num_input, num_output);
}
else
{
ncnn::cast_float32_to_bfloat16(weight_data, weight_data_bf16, opt);
}
}

return 0;
@@ -1542,6 +1551,16 @@ int Convolution_arm::forward_bf16s(const Mat& bottom_blob, Mat& top_blob, const

if (elempack == 1 && out_elempack == 1)
{
if (kernel_w == 1 && kernel_h == 1 && dilation_w == 1 && dilation_h == 1 && stride_w == 1 && stride_h == 1)
{
conv1x1s1_sgemm_bf16s_neon(bottom_blob_bordered, top_blob, weight_data_bf16, bias_data, opt);

if (activation)
{
activation->forward_inplace(top_blob, opt);
}
}
else
{
// num_output
#pragma omp parallel for num_threads(opt.num_threads)


Loading…
Cancel
Save