move x86 abs_mathfun to x86_mathfun (#4659)

* move x86 abs_mathfun to x86_mathfun * Improve the implementation of AVX and AVX512F abs. * Unified naming rules --------- Co-authored-by: MouriNaruto <Mouri_Naruto@Outlook.com>
3 years ago · bd15e32517
--- a/src/layer/x86/avx512_mathfun.h
+++ b/src/layer/x86/avx512_mathfun.h
@@ -847,4 +847,13 @@ static NCNN_FORCEINLINE __m512 atan2512_ps(__m512 y, __m512 x)
    return _mm512_mask_mov_ps(special_result, normal_mode, normal_result);
 }

 static NCNN_FORCEINLINE __m512 abs512_ps(__m512 x)
 {
    // Use negative zero as the sign bit mask.
    const __m512 magic_negative_zero = _mm512_set1_ps(-0.0f);

    // return (!magic_negative_zero && x);
    return _mm512_andnot_ps(magic_negative_zero, x);
 }

 #endif // AVX512_MATHFUN_H
--- a/src/layer/x86/avx_mathfun.h
+++ b/src/layer/x86/avx_mathfun.h
@@ -1078,4 +1078,13 @@ static NCNN_FORCEINLINE __m256 atan2256_ps(__m256 y, __m256 x)
               _mm256_andnot_ps(normal_mode, special_result));
 }

 static NCNN_FORCEINLINE __m256 abs256_ps(__m256 x)
 {
    // Use negative zero as the sign bit mask.
    const __m256 magic_negative_zero = _mm256_set1_ps(-0.0f);

    // return (!magic_negative_zero && x);
    return _mm256_andnot_ps(magic_negative_zero, x);
 }

 #endif // AVX_MATHFUN_H
--- a/src/layer/x86/sse_mathfun.h
+++ b/src/layer/x86/sse_mathfun.h
@@ -1148,4 +1148,13 @@ static NCNN_FORCEINLINE __m128 atan2_ps(__m128 y, __m128 x)
               _mm_andnot_ps(normal_mode, special_result));
 }

 static NCNN_FORCEINLINE __m128 abs_ps(__m128 inputs)
 {
    // Use negative zero as the sign bit mask.
    const __m128 magic_negative_zero = _mm_set_ps1(-0.0f);

    // return (!magic_negative_zero && x);
    return _mm_andnot_ps(magic_negative_zero, inputs);
 }

 #endif // SSE_MATHFUN_H
--- a/src/layer/x86/unaryop_x86.cpp
+++ b/src/layer/x86/unaryop_x86.cpp
@@ -106,17 +106,17 @@ struct unary_op_abs
 #if __SSE2__
    __m128 func_pack4(const __m128& x) const
    {
        return abs_sse(x);
        return abs_ps(x);
    }
 #if __AVX__
    __m256 func_pack8(const __m256& x) const
    {
        return abs_avx(x);
        return abs256_ps(x);
    }
 #if __AVX512F__
    __m512 func_pack16(const __m512& x) const
    {
        return abs_avx512(x);
        return abs512_ps(x);
    }
 #endif // __AVX512F__
 #endif // __AVX__
--- a/src/layer/x86/x86_activation.h
+++ b/src/layer/x86/x86_activation.h
@@ -56,15 +56,6 @@ static NCNN_FORCEINLINE __m128 hardswish_sse(__m128 inputs, __m128 a, __m128 b)
    return _mm_mul_ps(b, inputs);
 }

 static NCNN_FORCEINLINE __m128 abs_sse(__m128 inputs)
 {
    // Use negative zero as the sign bit mask.
    const __m128 magic_negative_zero = _mm_set_ps1(-0.0f);

    // return (!magic_negative_zero && x);
    return _mm_andnot_ps(magic_negative_zero, inputs);
 }

 static NCNN_FORCEINLINE __m128 lrelu_sse(__m128 inputs, float slope)
 {
    __m128 pos = _mm_max_ps(_mm_setzero_ps(), inputs);
@@ -169,11 +160,6 @@ static NCNN_FORCEINLINE __m256 hardswish_avx(__m256 inputs, __m256 a, __m256 b)
    return _mm256_mul_ps(b, inputs);
 }

 static NCNN_FORCEINLINE __m256 abs_avx(__m256 inputs)
 {
    return _mm256_max_ps(_mm256_sub_ps(_mm256_setzero_ps(), inputs), inputs);
 }

 static NCNN_FORCEINLINE __m256 lrelu_avx(__m256 inputs, float slope)
 {
    __m256 pos = _mm256_max_ps(_mm256_setzero_ps(), inputs);
@@ -273,11 +259,6 @@ static NCNN_FORCEINLINE __m512 hardswish_avx512(__m512 inputs, __m512 a, __m512
    return _mm512_mul_ps(b, inputs);
 }

 static NCNN_FORCEINLINE __m512 abs_avx512(__m512 inputs)
 {
    return _mm512_castsi512_ps(_mm512_and_epi32(_mm512_castps_si512(inputs), _mm512_set1_epi32(0x7fffffff)));
 }

 static NCNN_FORCEINLINE __m512 lrelu_avx512(__m512 inputs, float slope)
 {
    __mmask16 _is_negative = _mm512_cmp_ps_mask(inputs, _mm512_setzero_ps(), _CMP_LT_OQ);