| @@ -150,7 +150,7 @@ static double Fp16ToDouble(const uint16_t &fp_val) { | |||
| /// @return Return uint8 value of fp16_t object | |||
| static uint8_t GetUint8ValByMan(uint8_t s_ret, const uint64_t &long_int_m, const uint16_t &shift_out) { | |||
| bool need_round = IsRoundOne(long_int_m, shift_out + kFp16ManLen); | |||
| auto m_ret = static_cast<uint8_t>((long_int_m >> (kFp16ManLen + shift_out)) & kBitLen8Max); | |||
| auto m_ret = static_cast<uint8_t>((long_int_m >> static_cast<uint64_t>(kFp16ManLen + shift_out)) & kBitLen8Max); | |||
| need_round = need_round && ((s_ret == 0 && m_ret < kInt8Max) || (s_ret == 1 && m_ret <= kInt8Max)); | |||
| if (need_round) { | |||
| m_ret++; | |||
| @@ -258,7 +258,7 @@ static uint8_t Fp16ToUInt8(const uint16_t &fp_val) { | |||
| } | |||
| if (overflow_flag == 0U) { | |||
| bool need_round = IsRoundOne(long_int_m, shift_out + kFp16ManLen); | |||
| m_ret = static_cast<uint8_t>((long_int_m >> (kFp16ManLen + shift_out)) & kBitLen8Max); | |||
| m_ret = static_cast<uint8_t>((long_int_m >> static_cast<uint64_t>(kFp16ManLen + shift_out)) & kBitLen8Max); | |||
| if (need_round && m_ret != kBitLen8Max) { | |||
| m_ret++; | |||
| } | |||
| @@ -280,7 +280,7 @@ static uint8_t Fp16ToUInt8(const uint16_t &fp_val) { | |||
| /// @return Return uint16 value of fp16_t object | |||
| static uint16_t GetUint16ValByMan(uint16_t s_ret, const uint64_t &long_int_m, const uint16_t &shift_out) { | |||
| bool need_round = IsRoundOne(long_int_m, shift_out + kFp16ManLen); | |||
| auto m_ret = static_cast<uint16_t>((long_int_m >> (kFp16ManLen + shift_out)) & kBitLen16Max); | |||
| auto m_ret = static_cast<uint16_t>((long_int_m >> static_cast<uint64_t>(kFp16ManLen + shift_out)) & kBitLen16Max); | |||
| if (need_round && m_ret < kInt16Max) { | |||
| m_ret++; | |||
| } | |||
| @@ -378,7 +378,7 @@ static uint16_t Fp16ToUInt16(const uint16_t &fp_val) { | |||
| } | |||
| } | |||
| bool need_round = IsRoundOne(long_int_m, shift_out + kFp16ManLen); | |||
| m_ret = static_cast<uint16_t>((long_int_m >> (kFp16ManLen + shift_out)) & kBitLen16Max); | |||
| m_ret = static_cast<uint16_t>((long_int_m >> static_cast<uint64_t>(kFp16ManLen + shift_out)) & kBitLen16Max); | |||
| if (need_round && m_ret != kBitLen16Max) { | |||
| m_ret++; | |||
| } | |||
| @@ -419,7 +419,7 @@ static int32_t Fp16ToInt32(const uint16_t &fp_val) { | |||
| } | |||
| } | |||
| bool need_round = IsRoundOne(long_int_m, shift_out + kFp16ManLen); | |||
| auto m_ret = static_cast<uint32_t>((long_int_m >> (kFp16ManLen + shift_out)) & kBitLen32Max); | |||
| auto m_ret = static_cast<uint32_t>((long_int_m >> static_cast<uint64_t>(kFp16ManLen + shift_out)) & kBitLen32Max); | |||
| if (need_round && m_ret < kInt32Max) { | |||
| m_ret++; | |||
| } | |||
| @@ -468,7 +468,7 @@ static uint32_t Fp16ToUInt32(const uint16_t &fp_val) { | |||
| } | |||
| } | |||
| bool need_round = IsRoundOne(long_int_m, shift_out + kFp16ManLen); | |||
| m_ret = static_cast<uint32_t>(long_int_m >> (kFp16ManLen + shift_out)) & kBitLen32Max; | |||
| m_ret = static_cast<uint32_t>(long_int_m >> static_cast<uint64_t>(kFp16ManLen + shift_out)) & kBitLen32Max; | |||
| if (need_round && m_ret != kBitLen32Max) { | |||
| m_ret++; | |||
| } | |||
| @@ -562,10 +562,10 @@ static uint16_t Fp16Add(uint16_t v_1, uint16_t v_2) { | |||
| int16_t e_ret = std::max(e_a, e_b); | |||
| int16_t e_tmp = std::abs(e_a - e_b); | |||
| if (e_a > e_b) { | |||
| m_trunc = (m_b << (static_cast<uint16_t>(kBitShift32) - static_cast<uint16_t>(e_tmp))); | |||
| m_trunc = (m_b << static_cast<uint32_t>(static_cast<uint16_t>(kBitShift32) - static_cast<uint16_t>(e_tmp))); | |||
| m_b = RightShift(m_b, e_tmp); | |||
| } else if (e_a < e_b) { | |||
| m_trunc = (m_a << (static_cast<uint16_t>(kBitShift32) - static_cast<uint16_t>(e_tmp))); | |||
| m_trunc = (m_a << static_cast<uint32_t>(static_cast<uint16_t>(kBitShift32) - static_cast<uint16_t>(e_tmp))); | |||
| m_a = RightShift(m_a, e_tmp); | |||
| } | |||
| // calculate mantissav | |||
| @@ -959,7 +959,7 @@ static void SetValByUint16Val(const uint16_t &input_val, const uint16_t &sign, u | |||
| for (int i = 1; i < e_tmp; i++) { | |||
| trunc_mask = (trunc_mask << 1) + 1; | |||
| } | |||
| uint32_t m_trunc = (m_tmp & trunc_mask) << (static_cast<uint16_t>(kBitShift32) - e_tmp); | |||
| uint32_t m_trunc = (m_tmp & trunc_mask) << static_cast<uint32_t>(static_cast<uint16_t>(kBitShift32) - e_tmp); | |||
| for (int i = 0; i < e_tmp; i++) { | |||
| m_tmp = (m_tmp >> 1); | |||
| e_ret = e_ret + 1; | |||
| @@ -978,7 +978,7 @@ static void SetValByUint16Val(const uint16_t &input_val, const uint16_t &sign, u | |||
| } | |||
| } else { | |||
| e_ret = static_cast<int16_t>(kFp16ExpBias); | |||
| m_tmp = m_tmp << (kManBitLength - len); | |||
| m_tmp = m_tmp << static_cast<uint16_t>(kManBitLength - len); | |||
| e_ret = e_ret + (len - 1); | |||
| } | |||
| auto m_ret = static_cast<uint16_t>(m_tmp); | |||
| @@ -1018,7 +1018,7 @@ fp16_t &fp16_t::operator=(const uint16_t &ui_val) { | |||
| for (int i = 1; i < e_tmp; i++) { | |||
| trunc_mask = (trunc_mask << 1) + 1; | |||
| } | |||
| m_trunc = (m_ret & trunc_mask) << (static_cast<uint16_t>(kBitShift32) - e_tmp); | |||
| m_trunc = (m_ret & trunc_mask) << static_cast<uint32_t>(static_cast<uint16_t>(kBitShift32) - e_tmp); | |||
| for (int i = 0; i < e_tmp; i++) { | |||
| m_ret = (m_ret >> 1); | |||
| e_ret = e_ret + 1; | |||
| @@ -1040,7 +1040,7 @@ fp16_t &fp16_t::operator=(const uint16_t &ui_val) { | |||
| } | |||
| } else { | |||
| e_ret = static_cast<int16_t>(kFp16ExpBias); | |||
| m_ret = m_ret << (static_cast<uint16_t>(kDim11) - len); | |||
| m_ret = m_ret << static_cast<uint16_t>(static_cast<uint16_t>(kDim11) - len); | |||
| e_ret = e_ret + (len - 1); | |||
| } | |||
| val = FP16_CONSTRUCTOR(0u, static_cast<uint16_t>(e_ret), m_ret); | |||
| @@ -1062,7 +1062,7 @@ static void SetValByUint32Val(const uint32_t &input_val, const uint16_t &sign, u | |||
| for (int i = 1; i < e_tmp; i++) { | |||
| trunc_mask = (trunc_mask << 1) + 1; | |||
| } | |||
| m_trunc = (m_tmp & trunc_mask) << (static_cast<uint16_t>(kBitShift32) - e_tmp); | |||
| m_trunc = (m_tmp & trunc_mask) << static_cast<uint32_t>(static_cast<uint16_t>(kBitShift32) - e_tmp); | |||
| for (int i = 0; i < e_tmp; i++) { | |||
| m_tmp = (m_tmp >> 1); | |||
| e_ret = e_ret + 1; | |||
| @@ -1085,7 +1085,7 @@ static void SetValByUint32Val(const uint32_t &input_val, const uint16_t &sign, u | |||
| } | |||
| } else { | |||
| e_ret = static_cast<int16_t>(kFp16ExpBias); | |||
| m_tmp = m_tmp << (static_cast<uint16_t>(kDim11) - len); | |||
| m_tmp = m_tmp << static_cast<uint32_t>(static_cast<uint16_t>(kDim11) - len); | |||
| e_ret = e_ret + (len - 1); | |||
| } | |||
| auto m_ret = static_cast<uint16_t>(m_tmp); | |||
| @@ -1147,7 +1147,7 @@ fp16_t &fp16_t::operator=(const uint32_t &ui_val) { | |||
| } | |||
| } else { | |||
| e_ret = static_cast<int16_t>(kFp16ExpBias); | |||
| m_tmp = m_tmp << (static_cast<uint16_t>(kDim11) - len); | |||
| m_tmp = m_tmp << static_cast<uint32_t>((static_cast<uint16_t>(kDim11) - len)); | |||
| e_ret = e_ret + (len - 1); | |||
| } | |||
| auto m_ret = static_cast<uint16_t>(m_tmp); | |||