|
|
|
@@ -131,10 +131,6 @@ constexpr uint16_t kFp16MaxValidExp = 0x001E; |
|
|
|
/// @ingroup fp16 basic parameter |
|
|
|
/// @brief maximum mantissa value of fp16(11111 11111) |
|
|
|
constexpr uint16_t kFp16MaxMan = 0x03FF; |
|
|
|
/// @ingroup fp16 basic parameter |
|
|
|
/// @brief absolute minimum normal value of fp16 |
|
|
|
/// (E=1,M=0 D=2^(-14)=0.00006103515625) |
|
|
|
constexpr uint16_t kFp16MinNormal = 1.0f / (2 << 14); |
|
|
|
/// @ingroup fp16 basic operator |
|
|
|
/// @brief get sign of fp16 |
|
|
|
#define FP16_EXTRAC_SIGN(x) (((x) >> 15) & 1) |
|
|
|
@@ -605,14 +601,14 @@ T GetManSum(int16_t e_a, const T &m_a, int16_t e_b, const T &m_b) { |
|
|
|
T sum = 0; |
|
|
|
if (e_a != e_b) { |
|
|
|
T m_tmp = 0; |
|
|
|
int16_t e_tmp = std::abs(e_a - e_b); |
|
|
|
int16_t e_tmp = staic_cast<int16_t>(std::abs(e_a - e_b)); |
|
|
|
if (e_a > e_b) { |
|
|
|
m_tmp = m_b; |
|
|
|
m_tmp = RightShift(m_tmp, e_tmp); |
|
|
|
sum = m_a + m_tmp; |
|
|
|
} else { |
|
|
|
m_tmp = m_a; |
|
|
|
m_tmp = RightShift(m_tmp, e_tmp); |
|
|
|
m_tm= RightShift(m_tmp, e_tmp); |
|
|
|
sum = m_tmp + m_b; |
|
|
|
} |
|
|
|
} else { |
|
|
|
|