From d764b5b87eeffbc21595bfd3187a574eb7e0d44a Mon Sep 17 00:00:00 2001
From: nihui <shuizhuyuanluo@126.com>
Date: Wed, 9 Jun 2021 14:12:48 +0800
Subject: [PATCH] fix tanh_ps rvv with large input

---
 src/layer/riscv/rvv_mathfun.h       | 3 +--
 src/layer/riscv/rvv_mathfun_fp16s.h | 3 +--
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/src/layer/riscv/rvv_mathfun.h b/src/layer/riscv/rvv_mathfun.h
index b4d208070..58d49f429 100644
--- a/src/layer/riscv/rvv_mathfun.h
+++ b/src/layer/riscv/rvv_mathfun.h
@@ -333,8 +333,7 @@ _RVV_FLOAT32_COS_OP(8, 4)
         y = vfadd_vv_f32m##LMUL(y, x, vl);                                                                                            \
                                                                                                                                       \
         /* abs(x) > HALFMAXLOGF */                                                                                                    \
-        vbool##MLEN##_t mask_pos = vmfgt_vf_f32m##LMUL##_b##MLEN(x, 0.f, vl);                                                         \
-        vfloat32m##LMUL##_t y1 = vfmerge_vfm_f32m##LMUL(mask_pos, vfmv_v_f_f32m##LMUL(1.f, vl), -1.f, vl);                            \
+        vfloat32m##LMUL##_t y1 = vfsgnj_vv_f32m##LMUL(vfmv_v_f_f32m##LMUL(1.f, vl), x, vl);                                           \
                                                                                                                                       \
         y = vmerge_vvm_f32m##LMUL(mask_l, y, y0, vl);                                                                                 \
         y = vmerge_vvm_f32m##LMUL(mask_l2, y, y1, vl);                                                                                \
diff --git a/src/layer/riscv/rvv_mathfun_fp16s.h b/src/layer/riscv/rvv_mathfun_fp16s.h
index b2777ac23..475cbecbf 100644
--- a/src/layer/riscv/rvv_mathfun_fp16s.h
+++ b/src/layer/riscv/rvv_mathfun_fp16s.h
@@ -333,8 +333,7 @@ _RVV_FLOAT16_COS_OP(8, 2)
         y = vfadd_vv_f16m##LMUL(y, x, vl);                                                                                            \
                                                                                                                                       \
         /* abs(x) > HALFMAXLOGF */                                                                                                    \
-        vbool##MLEN##_t mask_pos = vmfgt_vf_f16m##LMUL##_b##MLEN(x, 0.f, vl);                                                         \
-        vfloat16m##LMUL##_t y1 = vfmerge_vfm_f16m##LMUL(mask_pos, vfmv_v_f_f16m##LMUL(1.f, vl), -1.f, vl);                            \
+        vfloat16m##LMUL##_t y1 = vfsgnj_vv_f16m##LMUL(vfmv_v_f_f16m##LMUL(1.f, vl), x, vl);                                           \
                                                                                                                                       \
         y = vmerge_vvm_f16m##LMUL(mask_l, y, y0, vl);                                                                                 \
         y = vmerge_vvm_f16m##LMUL(mask_l2, y, y1, vl);                                                                                \