Browse Source

fix ci, second try

tags/20220216
nihuini 4 years ago
parent
commit
57a7101fc6
No known key found for this signature in database GPG Key ID: 98FD8F4EBC3E5DB8
2 changed files with 12 additions and 12 deletions
  1. +4
    -4
      src/layer/x86/convolution_3x3_pack8to1_int8.h
  2. +8
    -8
      src/layer/x86/convolution_x86.cpp

+ 4
- 4
src/layer/x86/convolution_3x3_pack8to1_int8.h View File

@@ -968,10 +968,10 @@ static void conv3x3s1_winograd42_pack8to1_int8_sse(const Mat& bottom_blob, Mat&
_sum1 = _mm_add_epi32(_sum1, _mm_unpackhi_epi16(_sl0, _sh0));
_sum2 = _mm_add_epi32(_sum2, _mm_unpacklo_epi16(_sl1, _sh1));
_sum3 = _mm_add_epi32(_sum3, _mm_unpackhi_epi16(_sl1, _sh1));
_sum4 = _mm_add_epi32(_sum0, _mm_unpacklo_epi16(_sl2, _sh2));
_sum5 = _mm_add_epi32(_sum1, _mm_unpackhi_epi16(_sl2, _sh2));
_sum6 = _mm_add_epi32(_sum2, _mm_unpacklo_epi16(_sl3, _sh3));
_sum7 = _mm_add_epi32(_sum3, _mm_unpackhi_epi16(_sl3, _sh3));
_sum4 = _mm_add_epi32(_sum4, _mm_unpacklo_epi16(_sl2, _sh2));
_sum5 = _mm_add_epi32(_sum5, _mm_unpackhi_epi16(_sl2, _sh2));
_sum6 = _mm_add_epi32(_sum6, _mm_unpacklo_epi16(_sl3, _sh3));
_sum7 = _mm_add_epi32(_sum7, _mm_unpackhi_epi16(_sl3, _sh3));

k0 += 8;
r0 += 32;


+ 8
- 8
src/layer/x86/convolution_x86.cpp View File

@@ -1342,10 +1342,10 @@ int Convolution_x86::create_pipeline_int8_x86(const Option& opt)
{
convolution_im2col_sgemm_transform_kernel_pack8to1_int8_sse(weight_data, weight_sgemm_data, num_input, num_output, kernel_w, kernel_h);
}
// else if (opt.use_winograd_convolution && kernel_w == 3 && kernel_h == 3 && dilation_w == 1 && dilation_h == 1 && stride_w == 1 && stride_h == 1)
// {
// conv3x3s1_winograd42_transform_kernel_pack8to1_int8_sse(weight_data, weight_3x3_winograd42_data, num_input, num_output, opt);
// }
else if (opt.use_winograd_convolution && kernel_w == 3 && kernel_h == 3 && dilation_w == 1 && dilation_h == 1 && stride_w == 1 && stride_h == 1)
{
conv3x3s1_winograd42_transform_kernel_pack8to1_int8_sse(weight_data, weight_3x3_winograd42_data, num_input, num_output, opt);
}
else if (opt.use_sgemm_convolution) // TODO better condition && num_input >= 8 && num_output >= 8)
{
convolution_im2col_sgemm_transform_kernel_pack8to1_int8_sse(weight_data, weight_sgemm_data, num_input, num_output, kernel_w, kernel_h);
@@ -1569,10 +1569,10 @@ int Convolution_x86::forward_int8_x86(const Mat& bottom_blob, Mat& top_blob, con
{
conv1x1s2_sgemm_pack8to1_int8_sse(bottom_blob_bordered, top_blob_int32, weight_sgemm_data, opt);
}
// else if (opt.use_winograd_convolution && kernel_w == 3 && kernel_h == 3 && dilation_w == 1 && dilation_h == 1 && stride_w == 1 && stride_h == 1)
// {
// conv3x3s1_winograd42_pack8to1_int8_sse(bottom_blob_bordered, top_blob_int32, weight_3x3_winograd42_data, opt);
// }
else if (opt.use_winograd_convolution && kernel_w == 3 && kernel_h == 3 && dilation_w == 1 && dilation_h == 1 && stride_w == 1 && stride_h == 1)
{
conv3x3s1_winograd42_pack8to1_int8_sse(bottom_blob_bordered, top_blob_int32, weight_3x3_winograd42_data, opt);
}
else if (opt.use_sgemm_convolution) // TODO better condition && num_input >= 8 && num_output >= 8)
{
convolution_im2col_sgemm_pack8to1_int8_sse(bottom_blob_bordered, top_blob_int32, weight_sgemm_data, kernel_w, kernel_h, dilation_w, dilation_h, stride_w, stride_h, opt);


Loading…
Cancel
Save