| @@ -133,31 +133,49 @@ constexpr uint16_t kFp16MaxValidExp = 0x001E; | |||
| constexpr uint16_t kFp16MaxMan = 0x03FF; | |||
| /// @ingroup fp16 basic operator | |||
| /// @brief get sign of fp16 | |||
| #define FP16_EXTRAC_SIGN(x) (((x) >> 15) & 1) | |||
| inline uint16_t FP16_EXTRAC_SIGN(const uint16_t x) { | |||
| return (((x) >> 15) & 1); | |||
| } | |||
| /// @ingroup fp16 basic operator | |||
| /// @brief get exponent of fp16 | |||
| #define FP16_EXTRAC_EXP(x) (((x) >> 10) & kFp16MaxExp) | |||
| inline uint16_t FP16_EXTRAC_EXP(const uint16_t x) { | |||
| return (((x) >> 10) & kFp16MaxExp); | |||
| } | |||
| /// @ingroup fp16 basic operator | |||
| /// @brief get mantissa of fp16 | |||
| #define FP16_EXTRAC_MAN(x) ((((x) >> 0) & 0x3FF) | (((((x) >> 10) & 0x1F) > 0 ? 1 : 0) * 0x400)) | |||
| inline uint16_t FP16_EXTRAC_MAN(const uint16_t x) { | |||
| return ((((x) >> 0) & 0x3FF) | (((((x) >> 10) & 0x1F) > 0 ? 1 : 0) * 0x400)); | |||
| } | |||
| /// @ingroup fp16 basic operator | |||
| /// @brief constructor of fp16 from sign exponent and mantissa | |||
| #define FP16_CONSTRUCTOR(s, e, m) (((s) << kFp16SignIndex) | ((e) << kFp16ManLen) | ((m) & kFp16MaxMan)) | |||
| inline uint16_t FP16_CONSTRUCTOR(const uint16_t s, const uint16_t e, const uint16_t m) { | |||
| return (((s) << kFp16SignIndex) | ((e) << kFp16ManLen) | ((m) & kFp16MaxMan)); | |||
| } | |||
| /// @ingroup fp16 special value judgment | |||
| /// @brief whether a fp16 is zero | |||
| #define FP16_IS_ZERO(x) (((x) & kFp16AbsMax) == 0) | |||
| inline bool FP16_IS_ZERO(const uint16_t x) { | |||
| return (((x) & kFp16AbsMax) == 0); | |||
| } | |||
| /// @ingroup fp16 special value judgment | |||
| /// @brief whether a fp16 is a denormalized value | |||
| #define FP16_IS_DENORM(x) ((((x) & kFp16ExpMask) == 0)) | |||
| inline bool FP16_IS_DENORM(const uint16_t x) { | |||
| return ((((x) & kFp16ExpMask) == 0)); | |||
| } | |||
| /// @ingroup fp16 special value judgment | |||
| /// @brief whether a fp16 is infinite | |||
| #define FP16_IS_INF(x) (((x)&kFp16AbsMax) == kFp16ExpMask) | |||
| inline bool FP16_IS_INF(const uint16_t x) { | |||
| return (((x)&kFp16AbsMax) == kFp16ExpMask); | |||
| } | |||
| /// @ingroup fp16 special value judgment | |||
| /// @brief whether a fp16 is NaN | |||
| #define FP16_IS_NAN(x) ((((x) & kFp16ExpMask) == kFp16ExpMask) && ((x) & kFp16ManMask)) | |||
| inline bool FP16_IS_NAN(const uint16_t x) { | |||
| return ((((x) & kFp16ExpMask) == kFp16ExpMask) && ((x) & kFp16ManMask)); | |||
| } | |||
| /// @ingroup fp16 special value judgment | |||
| /// @brief whether a fp16 is invalid | |||
| #define FP16_IS_INVALID(x) (((x) & kFp16ExpMask) == kFp16ExpMask) | |||
| inline bool FP16_IS_INVALID(const uint16_t x) { | |||
| return (((x) & kFp16ExpMask) == kFp16ExpMask); | |||
| } | |||
| /// @ingroup fp32 basic parameter | |||
| /// @brief fp32 exponent bias | |||
| constexpr uint16_t kFp32ExpBias = 127U; | |||
| @@ -601,7 +619,7 @@ T GetManSum(int16_t e_a, const T &m_a, int16_t e_b, const T &m_b) { | |||
| T sum = 0; | |||
| if (e_a != e_b) { | |||
| T m_tmp = 0; | |||
| int16_t e_tmp = staic_cast<int16_t>(std::abs(e_a - e_b)); | |||
| int16_t e_tmp = std::abs(e_a - e_b); | |||
| if (e_a > e_b) { | |||
| m_tmp = m_b; | |||
| m_tmp = RightShift(m_tmp, e_tmp); | |||