Browse Source

arm neon optimization for tan pack4 (#3722)

Co-authored-by: jasonZhang892 <jasonZhang892@users.noreply.github.com>
tags/20220701
jasonZhang GitHub 4 years ago
parent
commit
8e2e4db3f4
No known key found for this signature in database GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 9 additions and 8 deletions
  1. +8
    -0
      src/layer/arm/neon_mathfun.h
  2. +1
    -8
      src/layer/arm/unaryop_arm.cpp

+ 8
- 0
src/layer/arm/neon_mathfun.h View File

@@ -310,6 +310,14 @@ static inline float32x4_t div_ps(float32x4_t a, float32x4_t b)
#endif
}

static inline float32x4_t tan_ps(float32x4_t x)
{
float32x4_t ysin, ycos;
sincos_ps(x, &ysin, &ycos);
float32x4_t ytan = div_ps(ysin, ycos);
return ytan;
}

static inline float32x4_t pow_ps(float32x4_t a, float32x4_t b)
{
// pow(x, m) = exp(m * log(x))


+ 1
- 8
src/layer/arm/unaryop_arm.cpp View File

@@ -185,14 +185,7 @@ struct unary_op_tan_pack4
{
float32x4_t operator()(const float32x4_t& x) const
{
// TODO neon optimize
float tmp[4];
vst1q_f32(tmp, x);
tmp[0] = tan(tmp[0]);
tmp[1] = tan(tmp[1]);
tmp[2] = tan(tmp[2]);
tmp[3] = tan(tmp[3]);
return vld1q_f32(tmp);
return tan_ps(x);
}
};



Loading…
Cancel
Save