From f268e0f83643f2b55db9eb331bc3a1d7808210a5 Mon Sep 17 00:00:00 2001 From: Megvii Engine Team Date: Mon, 22 Jun 2020 19:55:27 +0800 Subject: [PATCH] fix(dnn/x86): fix x86 compile in gcc 5.4 GitOrigin-RevId: b8683b1ad2afba5119f434b1e0b231ee0918db16 --- .../x86/matrix_mul/int8/kernel_avx2_4x16x2.h | 23 +++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/dnn/src/x86/matrix_mul/int8/kernel_avx2_4x16x2.h b/dnn/src/x86/matrix_mul/int8/kernel_avx2_4x16x2.h index 01565354..ff32e2d3 100644 --- a/dnn/src/x86/matrix_mul/int8/kernel_avx2_4x16x2.h +++ b/dnn/src/x86/matrix_mul/int8/kernel_avx2_4x16x2.h @@ -17,13 +17,21 @@ #include "src/common/utils.h" #include "src/x86/matrix_mul/common/common.h" +#define DNN_AVX2_TARGET +#if !defined(__clang__) +//! bypass gcc bug https://bugs.launchpad.net/ubuntu/+source/gcc-5/+bug/1642109 +#pragma GCC target("avx2") +#else +#undef DNN_AVX2_TARGET +#define DNN_AVX2_TARGET MEGDNN_ATTRIBUTE_TARGET("avx2") +#endif namespace megdnn { namespace x86 { namespace matmul_avx2_4x16x2 { + template -MEGDNN_ATTRIBUTE_TARGET("avx2") -void store_overflow(void* ptr, __m256i a); +DNN_AVX2_TARGET void store_overflow(void* ptr, __m256i a); template <> void store_overflow(void* ptr, __m256i a) { @@ -33,13 +41,14 @@ void store_overflow(void* ptr, __m256i a) { a = _mm256_permutevar8x32_epi32(a, idx); _mm_storeu_si128((__m128i*)ptr, _mm256_extractf128_si256(a, 0)); } + template <> void store_overflow(void* ptr, __m256i a) { _mm256_storeu_si256((__m256i*)(ptr), a); } + template -MEGDNN_ATTRIBUTE_TARGET("avx2") -void store_overflow(void* ptr, __m256i a, int remain); +DNN_AVX2_TARGET void store_overflow(void* ptr, __m256i a, int remain); template <> void store_overflow(void* ptr, __m256i a, int remain) { @@ -51,6 +60,7 @@ void store_overflow(void* ptr, __m256i a, int remain) { _mm_maskmoveu_si128(_mm256_extractf128_si256(a, 0), mask, reinterpret_cast(ptr)); } + template <> void store_overflow(void* ptr, __m256i a, int remain) { __m256i mask = _m256_continue_mask(remain); @@ -870,4 +880,9 @@ static inline void gemm_s8s8s32_avx2_4x16x2_pack_at(dt_int16* out, } // namespace x86 } // namespace megdnn +#if !defined(__clang__) +#pragma GCC reset_options +#endif +#undef DNN_AVX2_TARGET + // vim: syntax=cpp.doxygen \ No newline at end of file