Browse Source

use sgemm fp32 on arm platform,optimize conv1x1s2 (#1031)

tags/20190611
BUG1989 nihui 7 years ago
parent
commit
d9f269fa3d
8 changed files with 1587 additions and 13 deletions
  1. +19
    -4
      src/layer/arm/convolution_arm.cpp
  2. +1
    -0
      src/layer/arm/convolution_arm.h
  3. +1532
    -0
      src/layer/arm/convolution_sgemm.h
  4. +2
    -2
      src/layer/arm/convolutiondepthwise_arm.cpp
  5. +2
    -2
      src/layer/arm/deconvolution_arm.cpp
  6. +2
    -2
      src/layer/arm/deconvolutiondepthwise_arm.cpp
  7. +28
    -2
      src/net.cpp
  8. +1
    -1
      src/net.h

+ 19
- 4
src/layer/arm/convolution_arm.cpp View File

@@ -29,6 +29,7 @@ namespace ncnn {
#include "convolution_4x4.h"
#include "convolution_5x5.h"
#include "convolution_7x7.h"
#include "convolution_sgemm.h"
#include "convolution_sgemm_int8.h"
#include "convolution_1x1_int8.h"
#include "convolution_3x3_int8.h"
@@ -79,7 +80,7 @@ int Convolution_arm::create_pipeline(const Option& opt)
if (activation)
{
Option opt_cpu = opt;
opt_cpu.vulkan_compute = false;
opt_cpu.use_vulkan_compute = false;
activation->create_pipeline(opt_cpu);
}

@@ -155,6 +156,13 @@ int Convolution_arm::create_pipeline(const Option& opt)
conv3x3s2_transform_kernel_neon(weight_data, weight_3x3s2_data, num_input, num_output);
}

{
int kernel_size = kernel_w * kernel_h;
int num_input = weight_data_size / kernel_size / num_output;

conv_im2col_sgemm_transform_kernel_neon(weight_data, weight_sgemm_data, num_input, num_output, kernel_size);
}

return 0;
}

@@ -163,7 +171,7 @@ int Convolution_arm::destroy_pipeline(const Option& opt)
if (activation)
{
Option opt_cpu = opt;
opt_cpu.vulkan_compute = false;
opt_cpu.use_vulkan_compute = false;
activation->destroy_pipeline(opt_cpu);
delete activation;
activation = 0;
@@ -581,10 +589,17 @@ int Convolution_arm::forward(const Mat& bottom_blob, Mat& top_blob, const Option
{
conv1x1s1_sgemm_neon(bottom_blob_bordered, top_blob, weight_1x1_sgemm_data, bias_data, opt);
}
else if (kernel_w == 3 && kernel_h == 3 && dilation_w == 1 && dilation_h == 1 && stride_w == 2 && stride_h == 2)
else if (kernel_w == 1 && kernel_h == 1 && dilation_w == 1 && dilation_h == 1 && stride_w == 2 && stride_h == 2)
{
conv3x3s2_packed_neon(bottom_blob_bordered, top_blob, weight_3x3s2_data, bias_data, opt);
conv_im2col_sgemm_neon(bottom_blob_bordered, top_blob, weight_sgemm_data, bias_data, kernel_w, kernel_h, stride_w, stride_h, opt);
}
else if (kernel_w == 3 && kernel_h == 3 && dilation_w == 1 && dilation_h == 1 && stride_w == 2 && stride_h == 2)
{
if (outw >=8 && outh >=8)
conv3x3s2_packed_neon(bottom_blob_bordered, top_blob, weight_3x3s2_data, bias_data, opt);
else
conv_im2col_sgemm_neon(bottom_blob_bordered, top_blob, weight_sgemm_data, bias_data, kernel_w, kernel_h, stride_w, stride_h, opt);
}
else
conv(bottom_blob_bordered, top_blob, weight_data, bias_data, opt);



+ 1
- 0
src/layer/arm/convolution_arm.h View File

@@ -43,6 +43,7 @@ public:
Mat weight_1x1s1_sgemm_int8_data;
Mat weight_3x3_winograd23_data;
Mat weight_sgemm_int8_data;
Mat weight_sgemm_data;
std::vector<Mat> weight_3x3_winograd23_int8_data;
};



+ 1532
- 0
src/layer/arm/convolution_sgemm.h
File diff suppressed because it is too large
View File


+ 2
- 2
src/layer/arm/convolutiondepthwise_arm.cpp View File

@@ -37,7 +37,7 @@ ConvolutionDepthWise_arm::ConvolutionDepthWise_arm()
int ConvolutionDepthWise_arm::create_pipeline(const Option& opt)
{
Option opt_cpu = opt;
opt_cpu.vulkan_compute = false;
opt_cpu.use_vulkan_compute = false;

if (activation_type == 1)
{
@@ -175,7 +175,7 @@ int ConvolutionDepthWise_arm::create_pipeline(const Option& opt)
int ConvolutionDepthWise_arm::destroy_pipeline(const Option& opt)
{
Option opt_cpu = opt;
opt_cpu.vulkan_compute = false;
opt_cpu.use_vulkan_compute = false;

if (activation)
{


+ 2
- 2
src/layer/arm/deconvolution_arm.cpp View File

@@ -68,7 +68,7 @@ int Deconvolution_arm::create_pipeline(const Option& opt)
if (activation)
{
Option opt_cpu = opt;
opt_cpu.vulkan_compute = false;
opt_cpu.use_vulkan_compute = false;
activation->create_pipeline(opt_cpu);
}

@@ -80,7 +80,7 @@ int Deconvolution_arm::destroy_pipeline(const Option& opt)
if (activation)
{
Option opt_cpu = opt;
opt_cpu.vulkan_compute = false;
opt_cpu.use_vulkan_compute = false;
activation->destroy_pipeline(opt_cpu);
delete activation;
activation = 0;


+ 2
- 2
src/layer/arm/deconvolutiondepthwise_arm.cpp View File

@@ -65,7 +65,7 @@ int DeconvolutionDepthWise_arm::create_pipeline(const Option& opt)
if (activation)
{
Option opt_cpu = opt;
opt_cpu.vulkan_compute = false;
opt_cpu.use_vulkan_compute = false;
activation->create_pipeline(opt_cpu);
}

@@ -77,7 +77,7 @@ int DeconvolutionDepthWise_arm::destroy_pipeline(const Option& opt)
if (activation)
{
Option opt_cpu = opt;
opt_cpu.vulkan_compute = false;
opt_cpu.use_vulkan_compute = false;
activation->destroy_pipeline(opt_cpu);
delete activation;
activation = 0;


+ 28
- 2
src/net.cpp View File

@@ -832,18 +832,38 @@ int Net::load_model(const unsigned char* _mem)
return mem - _mem;
}

void Net::fuse_network()
int Net::fuse_network()
{
// set the int8 op fusion:requantize
#if NCNN_STRING && NCNN_REQUANT
// fprintf(stderr, "Test op fusion to int8 implement:\n");
// parse the network whether is a quantization model
bool net_quantized = false;
for (size_t i=0; i<layers.size(); i++)
{
Layer* layer = layers[i];
if (layer->type == "Convolution" || layer->type == "ConvolutionDepthWise")
{
if (layer->type == "Convolution" && (((Convolution*)layer)->use_int8_inference == false))
continue;
if (layer->type == "ConvolutionDepthWise" && (((ConvolutionDepthWise*)layer)->use_int8_inference == false))
continue;
net_quantized = true;
}
}

if (net_quantized == false)
return 0;

for (size_t i=0; i<layers.size(); i++)
{
Layer* layer = layers[i];

if (layer->type == "Convolution" || layer->type == "ConvolutionDepthWise")
{
if (((Convolution*)layer)->use_int8_inference == false)
if (layer->type == "Convolution" && (((Convolution*)layer)->use_int8_inference == false))
continue;
if (layer->type == "ConvolutionDepthWise" && (((ConvolutionDepthWise*)layer)->use_int8_inference == false))
continue;

for (size_t n=0; n<blobs[layer->tops[0]].consumers.size(); n++)
@@ -858,6 +878,11 @@ void Net::fuse_network()

if (layer_next_2->type == "Convolution" || layer_next_2->type == "ConvolutionDepthWise")
{
if (layer_next_2->type == "Convolution" && ((Convolution*)layer_next_2)->use_int8_inference == false)
continue;
if (layer_next_2->type == "ConvolutionDepthWise" && ((ConvolutionDepthWise*)layer_next_2)->use_int8_inference == false)
continue;

// fprintf(stderr, "%s, %s, %s\n", layer->name.c_str(), layer_next->name.c_str(), layer_next_2->name.c_str());
if (layer->type == "Convolution" && layer_next_2->type == "Convolution")
{
@@ -934,6 +959,7 @@ void Net::fuse_network()
}
}
#endif
return 0;
}

void Net::clear()


+ 1
- 1
src/net.h View File

@@ -96,7 +96,7 @@ public:
protected:
// parse the structure of network
// fuse int8 op dequantize and quantize by requantize
void fuse_network();
int fuse_network();

#if NCNN_VULKAN



Loading…
Cancel
Save