|
|
|
@@ -691,6 +691,18 @@ static NCNN_FORCEINLINE void sincos256_ps(__m256 x, __m256* s, __m256* c) |
|
|
|
*c = _mm256_xor_ps(xmm2, sign_bit_cos); |
|
|
|
} |
|
|
|
|
|
|
|
static NCNN_FORCEINLINE __m256 tan256_ps(__m256 x) |
|
|
|
{ |
|
|
|
__m256 ysin, ycos; |
|
|
|
__m256 eps = _mm256_set1_ps(1E-8f); |
|
|
|
sincos256_ps(x, &ysin, &ycos); |
|
|
|
__m256 mask = _mm256_cmp_ps(ycos, _mm256_setzero_ps(), _CMP_EQ_OS); |
|
|
|
__m256 _tmp = _mm256_and_ps(eps, mask); |
|
|
|
ycos = _mm256_add_ps(ycos, _tmp); |
|
|
|
__m256 ytan = _mm256_div_ps(ysin, ycos); |
|
|
|
return ytan; |
|
|
|
} |
|
|
|
|
|
|
|
static NCNN_FORCEINLINE __m256 pow256_ps(__m256 a, __m256 b) |
|
|
|
{ |
|
|
|
// pow(x, m) = exp(m * log(x)) |
|
|
|
|