From 57a7101fc609cf01a1a1bceb4dfdf4cc08e585c7 Mon Sep 17 00:00:00 2001 From: nihuini Date: Fri, 11 Feb 2022 16:56:03 +0800 Subject: [PATCH] fix ci, second try --- src/layer/x86/convolution_3x3_pack8to1_int8.h | 8 ++++---- src/layer/x86/convolution_x86.cpp | 16 ++++++++-------- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/src/layer/x86/convolution_3x3_pack8to1_int8.h b/src/layer/x86/convolution_3x3_pack8to1_int8.h index 4cf050cae..f7b8b42f1 100644 --- a/src/layer/x86/convolution_3x3_pack8to1_int8.h +++ b/src/layer/x86/convolution_3x3_pack8to1_int8.h @@ -968,10 +968,10 @@ static void conv3x3s1_winograd42_pack8to1_int8_sse(const Mat& bottom_blob, Mat& _sum1 = _mm_add_epi32(_sum1, _mm_unpackhi_epi16(_sl0, _sh0)); _sum2 = _mm_add_epi32(_sum2, _mm_unpacklo_epi16(_sl1, _sh1)); _sum3 = _mm_add_epi32(_sum3, _mm_unpackhi_epi16(_sl1, _sh1)); - _sum4 = _mm_add_epi32(_sum0, _mm_unpacklo_epi16(_sl2, _sh2)); - _sum5 = _mm_add_epi32(_sum1, _mm_unpackhi_epi16(_sl2, _sh2)); - _sum6 = _mm_add_epi32(_sum2, _mm_unpacklo_epi16(_sl3, _sh3)); - _sum7 = _mm_add_epi32(_sum3, _mm_unpackhi_epi16(_sl3, _sh3)); + _sum4 = _mm_add_epi32(_sum4, _mm_unpacklo_epi16(_sl2, _sh2)); + _sum5 = _mm_add_epi32(_sum5, _mm_unpackhi_epi16(_sl2, _sh2)); + _sum6 = _mm_add_epi32(_sum6, _mm_unpacklo_epi16(_sl3, _sh3)); + _sum7 = _mm_add_epi32(_sum7, _mm_unpackhi_epi16(_sl3, _sh3)); k0 += 8; r0 += 32; diff --git a/src/layer/x86/convolution_x86.cpp b/src/layer/x86/convolution_x86.cpp index 1bfdf1d20..16a21c7ff 100644 --- a/src/layer/x86/convolution_x86.cpp +++ b/src/layer/x86/convolution_x86.cpp @@ -1342,10 +1342,10 @@ int Convolution_x86::create_pipeline_int8_x86(const Option& opt) { convolution_im2col_sgemm_transform_kernel_pack8to1_int8_sse(weight_data, weight_sgemm_data, num_input, num_output, kernel_w, kernel_h); } - // else if (opt.use_winograd_convolution && kernel_w == 3 && kernel_h == 3 && dilation_w == 1 && dilation_h == 1 && stride_w == 1 && stride_h == 1) - // { - // conv3x3s1_winograd42_transform_kernel_pack8to1_int8_sse(weight_data, weight_3x3_winograd42_data, num_input, num_output, opt); - // } + else if (opt.use_winograd_convolution && kernel_w == 3 && kernel_h == 3 && dilation_w == 1 && dilation_h == 1 && stride_w == 1 && stride_h == 1) + { + conv3x3s1_winograd42_transform_kernel_pack8to1_int8_sse(weight_data, weight_3x3_winograd42_data, num_input, num_output, opt); + } else if (opt.use_sgemm_convolution) // TODO better condition && num_input >= 8 && num_output >= 8) { convolution_im2col_sgemm_transform_kernel_pack8to1_int8_sse(weight_data, weight_sgemm_data, num_input, num_output, kernel_w, kernel_h); @@ -1569,10 +1569,10 @@ int Convolution_x86::forward_int8_x86(const Mat& bottom_blob, Mat& top_blob, con { conv1x1s2_sgemm_pack8to1_int8_sse(bottom_blob_bordered, top_blob_int32, weight_sgemm_data, opt); } - // else if (opt.use_winograd_convolution && kernel_w == 3 && kernel_h == 3 && dilation_w == 1 && dilation_h == 1 && stride_w == 1 && stride_h == 1) - // { - // conv3x3s1_winograd42_pack8to1_int8_sse(bottom_blob_bordered, top_blob_int32, weight_3x3_winograd42_data, opt); - // } + else if (opt.use_winograd_convolution && kernel_w == 3 && kernel_h == 3 && dilation_w == 1 && dilation_h == 1 && stride_w == 1 && stride_h == 1) + { + conv3x3s1_winograd42_pack8to1_int8_sse(bottom_blob_bordered, top_blob_int32, weight_3x3_winograd42_data, opt); + } else if (opt.use_sgemm_convolution) // TODO better condition && num_input >= 8 && num_output >= 8) { convolution_im2col_sgemm_pack8to1_int8_sse(bottom_blob_bordered, top_blob_int32, weight_sgemm_data, kernel_w, kernel_h, dilation_w, dilation_h, stride_w, stride_h, opt);