solve fp16tofp32 bug

5 years ago · 8a29d90d3c
--- a/mindspore/lite/schema/model.fbs
+++ b/mindspore/lite/schema/model.fbs
@@ -90,7 +90,6 @@ union PrimitiveType {
    Rsqrt,
    ExpandDims,
    Tile,
    Fp16Cast,
    Cast,
    Shape,
    Nchw2Nhwc,
--- a/mindspore/lite/schema/ops.fbs
+++ b/mindspore/lite/schema/ops.fbs
@@ -581,11 +581,6 @@ table Cast {
    dstT: int;
 }

 table Fp16Cast {
    srcT: int;
    dstT: int;
 }

 table QuantDTypeCast {
    srcT: int;
    dstT: int;
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/cast.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/cast.cc
@@ -27,7 +27,6 @@ using mindspore::lite::KernelRegistrar;
 using mindspore::lite::RET_ERROR;
 using mindspore::lite::RET_OK;
 using mindspore::schema::PrimitiveType_Cast;
 using mindspore::schema::PrimitiveType_Fp16Cast;

 namespace mindspore::kernel {
 namespace {
@@ -74,6 +73,9 @@ int CastCPUKernel::DoCast(int thread_id) {
    if (input_data_type == kNumberTypeFloat32 && output_data_type == kNumberTypeInt32) {
      Float32ToInt32(reinterpret_cast<float *>(input->Data()) + offset,
                     reinterpret_cast<int32_t *>(output_data) + offset, data_num);
    } else if (input_data_type == kNumberTypeFloat32 && output_data_type == kNumberTypeFloat16) {
      Float32ToFp16(reinterpret_cast<float *>(input->Data()) + offset,
                     reinterpret_cast<uint16_t *>(output_data) + offset, data_num);
    } else {
      MS_LOG(ERROR) << "Unsupported datatype from " << input_data_type << " to " << output_data_type;
      return RET_ERROR;
@@ -89,8 +91,8 @@ int CastCPUKernel::DoCast(int thread_id) {
                       reinterpret_cast<float *>(output_data) + offset, data_num);
        break;
      case kNumberTypeFloat16:
        Fp16ToFloat32(reinterpret_cast<int16_t *>(input->Data()) + offset,
                       reinterpret_cast<float *>(output_data) + offset, data_num);
        Fp16ToFloat32(reinterpret_cast<uint16_t *>(input->Data()) + offset,
                      reinterpret_cast<float *>(output_data) + offset, data_num);
        break;
      default:
        MS_LOG(ERROR) << "Unsupported input data type " << input_data_type;
@@ -144,5 +146,7 @@ kernel::LiteKernel *CpuCastFp32KernelCreator(const std::vector<lite::tensor::Ten
 }

 REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_Cast, CpuCastFp32KernelCreator)
 REG_KERNEL(kCPU, kNumberTypeFloat16, PrimitiveType_Fp16Cast, CpuCastFp32KernelCreator)
 #ifndef ENABLE_ARM64
 REG_KERNEL(kCPU, kNumberTypeFloat16, PrimitiveType_Cast, CpuCastFp32KernelCreator)
 #endif
 }  // namespace mindspore::kernel
--- a/mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/cast.c
+++ b/mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/cast.c
@@ -41,12 +41,18 @@ void Int32ToFloat32(const int32_t *input, float *output, int number) {
  }
 }

 void Fp16ToFloat32(const int16_t *input, float *output, int number) {
 void Fp16ToFloat32(const uint16_t *input, float *output, int number) {
  for (int i = 0; i < number; ++i) {
    output[i] = ShortToFloat32(input[i]);
  }
 }

 void Float32ToFp16(const float *input, uint16_t *output, int number) {
  for (int i = 0; i < number; ++i) {
    output[i] = Float32ToShort(input[i]);
  }
 }

 void Float32ToInt32(const float *input, int32_t *output, int number) {
  for (int i = 0; i < number; ++i) {
    output[i] = (int32_t)input[i];
--- a/mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/cast.h
+++ b/mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/cast.h
@@ -35,7 +35,8 @@ void Uint8ToFloat32(const uint8_t *input, float *output, int number);
 void Uint8ToInt8(const uint8_t *input, int8_t *output, int number);
 void Int8ToUint8(const int8_t *input, uint8_t *output, int number);
 void Int32ToFloat32(const int32_t *input, float *output, int number);
 void Fp16ToFloat32(const int16_t *input, float *output, int number);
 void Fp16ToFloat32(const uint16_t *input, float *output, int number);
 void Float32ToFp16(const float *input, uint16_t *output, int number);
 void Float32ToInt32(const float *input, int32_t *output, int number);
 #ifdef __cplusplus
 }
--- a/mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/common_func.c
+++ b/mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/common_func.c
@@ -126,123 +126,74 @@ void PostConvFuncFp32C8(const float *c8_out_ptr, float *out_ptr, const float *bi
  return;
 }

 static const unsigned int FP32_BIT_SIZE = 32;
 static const unsigned int FP32_EXPONENT_BIAS = 127;
 static const unsigned int FP32_SIGNIFICAND = 23;

 static const unsigned int FP32_EXPONENT_MAX = 255;

 static const unsigned int FP16_BIT_SIZE = 16;
 static const unsigned int FP16_EXPONENT_BIAS = 15;
 static const unsigned int FP16_SIGNIFICAND = 10;

 static const int FP16_EXPONENT_MAX = 30;
 static const int FP16_EXPONENT_MIN = -10;

 float ShortToFloat32(int16_t srcValue) {
  uint16_t expHalf16 = srcValue & 0x7C00;
  int exp1 = (int)(expHalf16);
  uint16_t mantissa16 = srcValue & 0x03FF;
  int mantissa1 = (int)(mantissa16);
  int sign = (int)(srcValue & 0x8000);
  sign = sign << FP16_BIT_SIZE;

  // nan or inf
  if (expHalf16 == 0x7C00) {
    // nan
    if (mantissa16 > 0) {
      int res = (0x7FC00000 | sign);
      int *iRes = &res;
      auto fres = (float)(*iRes);
      return fres;
    }
    // inf
    int res = (0x7F800000 | sign);
    int *iRes = &res;
    auto fres = (float)(*iRes);
    return fres;
  }
  if (expHalf16 != 0) {
    exp1 += ((FP32_EXPONENT_BIAS - FP16_EXPONENT_BIAS) << FP16_SIGNIFICAND);  // exponents converted to float32 bias
    int res = (exp1 | mantissa1);
    res = res << (FP32_SIGNIFICAND - FP16_SIGNIFICAND);
    res = (res | sign);
    int *iRes = &res;
    auto fres = (float)(*iRes);
    return fres;
 union float32_bits {
  unsigned int u;
  float f;
 };
 typedef union float32_bits float32_bits;

 float ShortToFloat32(uint16_t srcValue) {
  const float32_bits magic = {113 << 23};
  const unsigned int shifted_exp = 0x7c00 << 13;  // exponent mask after shift
  float32_bits o;

  o.u = (srcValue & 0x7fff) << 13;       // exponent/mantissa bits
  unsigned int exp = shifted_exp & o.u;  // just the exponent
  o.u += (127 - 15) << 23;               // exponent adjust

  // handle exponent special cases
  if (exp == shifted_exp) {   // Inf/NaN?
    o.u += (128 - 16) << 23;  // extra exp adjust
  } else if (exp == 0) {      // Zero/Denormal?
    o.u += 1 << 23;           // extra exp adjust
    o.f -= magic.f;           // renormalize
  }

  int xmm1 = exp1 > (1 << FP16_SIGNIFICAND) ? exp1 : (1 << FP16_SIGNIFICAND);
  xmm1 = (xmm1 << (FP32_SIGNIFICAND - FP16_SIGNIFICAND));
  xmm1 += ((FP32_EXPONENT_BIAS - FP16_EXPONENT_BIAS - FP16_SIGNIFICAND)
    << FP32_SIGNIFICAND);  // add the bias difference to xmm1
  xmm1 = xmm1 | sign;             // Combine with the sign mask

  auto res = (float)(mantissa1);  // Convert mantissa to float
  int *ixmm1 = NULL;
  ixmm1 = &xmm1;
  res *= (float)(*ixmm1);

  return res;
  o.u |= (srcValue & 0x8000) << 16;  // sign bit
  return o.f;
 }

 // __gnu_f2h_ieee
 int16_t Float32ToShort(float srcValue) {
  float *psrcValue = NULL;
  psrcValue = &srcValue;
  auto srcValueBit = (unsigned int)(*psrcValue);
  int sign = srcValueBit >> (FP32_BIT_SIZE - 1);
  int mantissa = srcValueBit & 0x007FFFFF;
  // exponent
  int exp = ((srcValueBit & 0x7F800000) >> FP32_SIGNIFICAND) + FP16_EXPONENT_BIAS - FP32_EXPONENT_BIAS;
  int16_t res;
  if (exp > 0 && exp < FP16_EXPONENT_MAX) {
    // use rte rounding mode, round the significand, combine sign, exponent and significand into a short.
    res = (sign << (FP16_BIT_SIZE - 1)) | (exp << FP16_SIGNIFICAND) |
          ((mantissa + 0x00001000) >> (FP32_SIGNIFICAND - FP16_SIGNIFICAND));
  } else if (srcValueBit == 0) {
    res = 0;
  } else {
    if (exp <= 0) {
      if (exp < FP16_EXPONENT_MIN) {
        // value is less than min half float point
        res = 0;
      } else {
        // normalized single, magnitude is less than min normal half float point.
        mantissa = (mantissa | 0x00800000) >> (1 - exp);
        // round to nearest
        if ((mantissa & 0x00001000) > 0) {
          mantissa = mantissa + 0x00002000;
        }
        // combine sign & mantissa (exp is zero to get denormalized number)
        res = (sign << FP16_EXPONENT_BIAS) | (mantissa >> (FP32_SIGNIFICAND - FP16_SIGNIFICAND));
      }
    } else if (exp == (FP32_EXPONENT_MAX - FP32_EXPONENT_BIAS + FP16_EXPONENT_BIAS)) {
      if (mantissa == 0) {
        // input float is infinity, return infinity half
        res = (sign << FP16_EXPONENT_BIAS) | 0x7C00;
      } else {
        // input float is NaN, return half NaN
        res = (sign << FP16_EXPONENT_BIAS) | 0x7C00 | (mantissa >> (FP32_SIGNIFICAND - FP16_SIGNIFICAND));
      }
 uint16_t Float32ToShort(float srcValue) {
  float32_bits f;
  f.f = srcValue;

  const float32_bits f32infty = {255 << 23};
  const float32_bits f16max = {(127 + 16) << 23};
  const float32_bits denorm_magic = {((127 - 15) + (23 - 10) + 1) << 23};
  unsigned int sign_mask = 0x80000000u;
  uint16_t o;

  unsigned int sign = f.u & sign_mask;
  f.u ^= sign;

  // NOTE all the integer compares in this function can be safely
  // compiled into signed compares since all operands are below
  // 0x80000000. Important if you want fast straight SSE2 code
  // (since there's no unsigned PCMPGTD).

  if (f.u >= f16max.u) {                       // result is Inf or NaN (all exponent bits set)
    o = (f.u > f32infty.u) ? 0x7e00 : 0x7c00;  // NaN->qNaN and Inf->Inf
  } else {                                     // (De)normalized number or zero
    if (f.u < (113 << 23)) {                   // resulting FP16 is subnormal or zero
      // use a magic value to align our 10 mantissa bits at the bottom of
      // the float. as long as FP addition is round-to-nearest-even this
      // just works.
      f.f += denorm_magic.f;

      // and one integer subtract of the bias later, we have our final float!
      o = (uint16_t)(f.u - denorm_magic.u);
    } else {
      // exp > 0, normalized single, round to nearest
      if ((mantissa & 0x00001000) > 0) {
        mantissa = mantissa + 0x00002000;
        if ((mantissa & 0x00800000) > 0) {
          mantissa = 0;
          exp = exp + 1;
        }
      }
      if (exp > FP16_EXPONENT_MAX) {
        // exponent overflow - return infinity half
        res = (sign << FP16_EXPONENT_BIAS) | 0x7C00;
      } else {
        // combine sign, exp and mantissa into normalized half
        res = (sign << FP16_EXPONENT_BIAS) | (exp << FP16_SIGNIFICAND) |
              (mantissa >> (FP32_SIGNIFICAND - FP16_SIGNIFICAND));
      }
      unsigned int mant_odd = (f.u >> 13) & 1;  // resulting mantissa is odd

      // update exponent, rounding bias part 1
      f.u += ((unsigned int)(15 - 127) << 23) + 0xfff;
      // rounding bias part 2
      f.u += mant_odd;
      // take the bits!
      o = (uint16_t)(f.u >> 13);
    }
  }
  return res;

  o |= (uint16_t)(sign >> 16);
  return o;
 }
--- a/mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/common_func.h
+++ b/mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/common_func.h
@@ -37,9 +37,10 @@ void MatrixSub(const float *a_ptr, const float *b_ptr, float *dst, size_t a_stri
               size_t row, size_t col);
 void MatrixMultiAdd(float *c11, float *c12, float *c21, float *c22, float *x_ptr, size_t row, size_t col,
                    size_t c_stride, size_t x_stride);
 int16_t Float32ToShort(float srcValue);
 float ShortToFloat32(uint16_t srcValue);

 uint16_t Float32ToShort(float srcValue);

 float ShortToFloat32(int16_t srcValue);

 #ifdef ENABLE_ARM
 void ConvDwFp32Center(float *dst, const float *src, const float *weight, const float *bias, size_t height, size_t width,
--- a/mindspore/lite/tools/converter/parser/tflite/tflite_dequantize_parser.cc
+++ b/mindspore/lite/tools/converter/parser/tflite/tflite_dequantize_parser.cc
@@ -58,7 +58,7 @@ STATUS TfliteDequantizeParser::Parse(const std::unique_ptr<tflite::OperatorT> &t
    return RET_ERROR;
  }

  op->primitive->value.type = schema::PrimitiveType_Fp16Cast;
  op->primitive->value.type = schema::PrimitiveType_Cast;
  op->primitive->value.value = attr.release();
  return 0;
 }
--- a/mindspore/lite/tools/optimizer/fusion/constant_folding_fusion.cc
+++ b/mindspore/lite/tools/optimizer/fusion/constant_folding_fusion.cc
@@ -161,6 +161,7 @@ const AnfNodePtr ConstFoldPass::Process(const FuncGraphPtr &func_graph, const An
        MS_LOG(EXCEPTION) << "run kernel failed, name: " << lite_kernel->name();
      }
      auto new_parameter = CreateNewParamter(func_graph, output_tensors.front());
      new_parameter->set_name(input_node->fullname_with_scope());
      any_node->set_input(i, new_parameter);
    }
  }