| @@ -29,6 +29,7 @@ namespace ncnn { | |||
| #include "convolution_4x4.h" | |||
| #include "convolution_5x5.h" | |||
| #include "convolution_7x7.h" | |||
| #include "convolution_sgemm.h" | |||
| #include "convolution_sgemm_int8.h" | |||
| #include "convolution_1x1_int8.h" | |||
| #include "convolution_3x3_int8.h" | |||
| @@ -79,7 +80,7 @@ int Convolution_arm::create_pipeline(const Option& opt) | |||
| if (activation) | |||
| { | |||
| Option opt_cpu = opt; | |||
| opt_cpu.vulkan_compute = false; | |||
| opt_cpu.use_vulkan_compute = false; | |||
| activation->create_pipeline(opt_cpu); | |||
| } | |||
| @@ -155,6 +156,13 @@ int Convolution_arm::create_pipeline(const Option& opt) | |||
| conv3x3s2_transform_kernel_neon(weight_data, weight_3x3s2_data, num_input, num_output); | |||
| } | |||
| { | |||
| int kernel_size = kernel_w * kernel_h; | |||
| int num_input = weight_data_size / kernel_size / num_output; | |||
| conv_im2col_sgemm_transform_kernel_neon(weight_data, weight_sgemm_data, num_input, num_output, kernel_size); | |||
| } | |||
| return 0; | |||
| } | |||
| @@ -163,7 +171,7 @@ int Convolution_arm::destroy_pipeline(const Option& opt) | |||
| if (activation) | |||
| { | |||
| Option opt_cpu = opt; | |||
| opt_cpu.vulkan_compute = false; | |||
| opt_cpu.use_vulkan_compute = false; | |||
| activation->destroy_pipeline(opt_cpu); | |||
| delete activation; | |||
| activation = 0; | |||
| @@ -581,10 +589,17 @@ int Convolution_arm::forward(const Mat& bottom_blob, Mat& top_blob, const Option | |||
| { | |||
| conv1x1s1_sgemm_neon(bottom_blob_bordered, top_blob, weight_1x1_sgemm_data, bias_data, opt); | |||
| } | |||
| else if (kernel_w == 3 && kernel_h == 3 && dilation_w == 1 && dilation_h == 1 && stride_w == 2 && stride_h == 2) | |||
| else if (kernel_w == 1 && kernel_h == 1 && dilation_w == 1 && dilation_h == 1 && stride_w == 2 && stride_h == 2) | |||
| { | |||
| conv3x3s2_packed_neon(bottom_blob_bordered, top_blob, weight_3x3s2_data, bias_data, opt); | |||
| conv_im2col_sgemm_neon(bottom_blob_bordered, top_blob, weight_sgemm_data, bias_data, kernel_w, kernel_h, stride_w, stride_h, opt); | |||
| } | |||
| else if (kernel_w == 3 && kernel_h == 3 && dilation_w == 1 && dilation_h == 1 && stride_w == 2 && stride_h == 2) | |||
| { | |||
| if (outw >=8 && outh >=8) | |||
| conv3x3s2_packed_neon(bottom_blob_bordered, top_blob, weight_3x3s2_data, bias_data, opt); | |||
| else | |||
| conv_im2col_sgemm_neon(bottom_blob_bordered, top_blob, weight_sgemm_data, bias_data, kernel_w, kernel_h, stride_w, stride_h, opt); | |||
| } | |||
| else | |||
| conv(bottom_blob_bordered, top_blob, weight_data, bias_data, opt); | |||
| @@ -43,6 +43,7 @@ public: | |||
| Mat weight_1x1s1_sgemm_int8_data; | |||
| Mat weight_3x3_winograd23_data; | |||
| Mat weight_sgemm_int8_data; | |||
| Mat weight_sgemm_data; | |||
| std::vector<Mat> weight_3x3_winograd23_int8_data; | |||
| }; | |||
| @@ -37,7 +37,7 @@ ConvolutionDepthWise_arm::ConvolutionDepthWise_arm() | |||
| int ConvolutionDepthWise_arm::create_pipeline(const Option& opt) | |||
| { | |||
| Option opt_cpu = opt; | |||
| opt_cpu.vulkan_compute = false; | |||
| opt_cpu.use_vulkan_compute = false; | |||
| if (activation_type == 1) | |||
| { | |||
| @@ -175,7 +175,7 @@ int ConvolutionDepthWise_arm::create_pipeline(const Option& opt) | |||
| int ConvolutionDepthWise_arm::destroy_pipeline(const Option& opt) | |||
| { | |||
| Option opt_cpu = opt; | |||
| opt_cpu.vulkan_compute = false; | |||
| opt_cpu.use_vulkan_compute = false; | |||
| if (activation) | |||
| { | |||
| @@ -68,7 +68,7 @@ int Deconvolution_arm::create_pipeline(const Option& opt) | |||
| if (activation) | |||
| { | |||
| Option opt_cpu = opt; | |||
| opt_cpu.vulkan_compute = false; | |||
| opt_cpu.use_vulkan_compute = false; | |||
| activation->create_pipeline(opt_cpu); | |||
| } | |||
| @@ -80,7 +80,7 @@ int Deconvolution_arm::destroy_pipeline(const Option& opt) | |||
| if (activation) | |||
| { | |||
| Option opt_cpu = opt; | |||
| opt_cpu.vulkan_compute = false; | |||
| opt_cpu.use_vulkan_compute = false; | |||
| activation->destroy_pipeline(opt_cpu); | |||
| delete activation; | |||
| activation = 0; | |||
| @@ -65,7 +65,7 @@ int DeconvolutionDepthWise_arm::create_pipeline(const Option& opt) | |||
| if (activation) | |||
| { | |||
| Option opt_cpu = opt; | |||
| opt_cpu.vulkan_compute = false; | |||
| opt_cpu.use_vulkan_compute = false; | |||
| activation->create_pipeline(opt_cpu); | |||
| } | |||
| @@ -77,7 +77,7 @@ int DeconvolutionDepthWise_arm::destroy_pipeline(const Option& opt) | |||
| if (activation) | |||
| { | |||
| Option opt_cpu = opt; | |||
| opt_cpu.vulkan_compute = false; | |||
| opt_cpu.use_vulkan_compute = false; | |||
| activation->destroy_pipeline(opt_cpu); | |||
| delete activation; | |||
| activation = 0; | |||
| @@ -832,18 +832,38 @@ int Net::load_model(const unsigned char* _mem) | |||
| return mem - _mem; | |||
| } | |||
| void Net::fuse_network() | |||
| int Net::fuse_network() | |||
| { | |||
| // set the int8 op fusion:requantize | |||
| #if NCNN_STRING && NCNN_REQUANT | |||
| // fprintf(stderr, "Test op fusion to int8 implement:\n"); | |||
| // parse the network whether is a quantization model | |||
| bool net_quantized = false; | |||
| for (size_t i=0; i<layers.size(); i++) | |||
| { | |||
| Layer* layer = layers[i]; | |||
| if (layer->type == "Convolution" || layer->type == "ConvolutionDepthWise") | |||
| { | |||
| if (layer->type == "Convolution" && (((Convolution*)layer)->use_int8_inference == false)) | |||
| continue; | |||
| if (layer->type == "ConvolutionDepthWise" && (((ConvolutionDepthWise*)layer)->use_int8_inference == false)) | |||
| continue; | |||
| net_quantized = true; | |||
| } | |||
| } | |||
| if (net_quantized == false) | |||
| return 0; | |||
| for (size_t i=0; i<layers.size(); i++) | |||
| { | |||
| Layer* layer = layers[i]; | |||
| if (layer->type == "Convolution" || layer->type == "ConvolutionDepthWise") | |||
| { | |||
| if (((Convolution*)layer)->use_int8_inference == false) | |||
| if (layer->type == "Convolution" && (((Convolution*)layer)->use_int8_inference == false)) | |||
| continue; | |||
| if (layer->type == "ConvolutionDepthWise" && (((ConvolutionDepthWise*)layer)->use_int8_inference == false)) | |||
| continue; | |||
| for (size_t n=0; n<blobs[layer->tops[0]].consumers.size(); n++) | |||
| @@ -858,6 +878,11 @@ void Net::fuse_network() | |||
| if (layer_next_2->type == "Convolution" || layer_next_2->type == "ConvolutionDepthWise") | |||
| { | |||
| if (layer_next_2->type == "Convolution" && ((Convolution*)layer_next_2)->use_int8_inference == false) | |||
| continue; | |||
| if (layer_next_2->type == "ConvolutionDepthWise" && ((ConvolutionDepthWise*)layer_next_2)->use_int8_inference == false) | |||
| continue; | |||
| // fprintf(stderr, "%s, %s, %s\n", layer->name.c_str(), layer_next->name.c_str(), layer_next_2->name.c_str()); | |||
| if (layer->type == "Convolution" && layer_next_2->type == "Convolution") | |||
| { | |||
| @@ -934,6 +959,7 @@ void Net::fuse_network() | |||
| } | |||
| } | |||
| #endif | |||
| return 0; | |||
| } | |||
| void Net::clear() | |||
| @@ -96,7 +96,7 @@ public: | |||
| protected: | |||
| // parse the structure of network | |||
| // fuse int8 op dequantize and quantize by requantize | |||
| void fuse_network(); | |||
| int fuse_network(); | |||
| #if NCNN_VULKAN | |||