Browse Source

solve fp16tofp32 bug

tags/v0.7.0-beta
cjh9368 5 years ago
parent
commit
8a29d90d3c
9 changed files with 85 additions and 127 deletions
  1. +0
    -1
      mindspore/lite/schema/model.fbs
  2. +0
    -5
      mindspore/lite/schema/ops.fbs
  3. +8
    -4
      mindspore/lite/src/runtime/kernel/arm/fp32/cast.cc
  4. +7
    -1
      mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/cast.c
  5. +2
    -1
      mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/cast.h
  6. +63
    -112
      mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/common_func.c
  7. +3
    -2
      mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/common_func.h
  8. +1
    -1
      mindspore/lite/tools/converter/parser/tflite/tflite_dequantize_parser.cc
  9. +1
    -0
      mindspore/lite/tools/optimizer/fusion/constant_folding_fusion.cc

+ 0
- 1
mindspore/lite/schema/model.fbs View File

@@ -90,7 +90,6 @@ union PrimitiveType {
Rsqrt,
ExpandDims,
Tile,
Fp16Cast,
Cast,
Shape,
Nchw2Nhwc,


+ 0
- 5
mindspore/lite/schema/ops.fbs View File

@@ -581,11 +581,6 @@ table Cast {
dstT: int;
}

table Fp16Cast {
srcT: int;
dstT: int;
}

table QuantDTypeCast {
srcT: int;
dstT: int;


+ 8
- 4
mindspore/lite/src/runtime/kernel/arm/fp32/cast.cc View File

@@ -27,7 +27,6 @@ using mindspore::lite::KernelRegistrar;
using mindspore::lite::RET_ERROR;
using mindspore::lite::RET_OK;
using mindspore::schema::PrimitiveType_Cast;
using mindspore::schema::PrimitiveType_Fp16Cast;

namespace mindspore::kernel {
namespace {
@@ -74,6 +73,9 @@ int CastCPUKernel::DoCast(int thread_id) {
if (input_data_type == kNumberTypeFloat32 && output_data_type == kNumberTypeInt32) {
Float32ToInt32(reinterpret_cast<float *>(input->Data()) + offset,
reinterpret_cast<int32_t *>(output_data) + offset, data_num);
} else if (input_data_type == kNumberTypeFloat32 && output_data_type == kNumberTypeFloat16) {
Float32ToFp16(reinterpret_cast<float *>(input->Data()) + offset,
reinterpret_cast<uint16_t *>(output_data) + offset, data_num);
} else {
MS_LOG(ERROR) << "Unsupported datatype from " << input_data_type << " to " << output_data_type;
return RET_ERROR;
@@ -89,8 +91,8 @@ int CastCPUKernel::DoCast(int thread_id) {
reinterpret_cast<float *>(output_data) + offset, data_num);
break;
case kNumberTypeFloat16:
Fp16ToFloat32(reinterpret_cast<int16_t *>(input->Data()) + offset,
reinterpret_cast<float *>(output_data) + offset, data_num);
Fp16ToFloat32(reinterpret_cast<uint16_t *>(input->Data()) + offset,
reinterpret_cast<float *>(output_data) + offset, data_num);
break;
default:
MS_LOG(ERROR) << "Unsupported input data type " << input_data_type;
@@ -144,5 +146,7 @@ kernel::LiteKernel *CpuCastFp32KernelCreator(const std::vector<lite::tensor::Ten
}

REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_Cast, CpuCastFp32KernelCreator)
REG_KERNEL(kCPU, kNumberTypeFloat16, PrimitiveType_Fp16Cast, CpuCastFp32KernelCreator)
#ifndef ENABLE_ARM64
REG_KERNEL(kCPU, kNumberTypeFloat16, PrimitiveType_Cast, CpuCastFp32KernelCreator)
#endif
} // namespace mindspore::kernel

+ 7
- 1
mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/cast.c View File

@@ -41,12 +41,18 @@ void Int32ToFloat32(const int32_t *input, float *output, int number) {
}
}

void Fp16ToFloat32(const int16_t *input, float *output, int number) {
void Fp16ToFloat32(const uint16_t *input, float *output, int number) {
for (int i = 0; i < number; ++i) {
output[i] = ShortToFloat32(input[i]);
}
}

void Float32ToFp16(const float *input, uint16_t *output, int number) {
for (int i = 0; i < number; ++i) {
output[i] = Float32ToShort(input[i]);
}
}

void Float32ToInt32(const float *input, int32_t *output, int number) {
for (int i = 0; i < number; ++i) {
output[i] = (int32_t)input[i];


+ 2
- 1
mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/cast.h View File

@@ -35,7 +35,8 @@ void Uint8ToFloat32(const uint8_t *input, float *output, int number);
void Uint8ToInt8(const uint8_t *input, int8_t *output, int number);
void Int8ToUint8(const int8_t *input, uint8_t *output, int number);
void Int32ToFloat32(const int32_t *input, float *output, int number);
void Fp16ToFloat32(const int16_t *input, float *output, int number);
void Fp16ToFloat32(const uint16_t *input, float *output, int number);
void Float32ToFp16(const float *input, uint16_t *output, int number);
void Float32ToInt32(const float *input, int32_t *output, int number);
#ifdef __cplusplus
}


+ 63
- 112
mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/common_func.c View File

@@ -126,123 +126,74 @@ void PostConvFuncFp32C8(const float *c8_out_ptr, float *out_ptr, const float *bi
return;
}

static const unsigned int FP32_BIT_SIZE = 32;
static const unsigned int FP32_EXPONENT_BIAS = 127;
static const unsigned int FP32_SIGNIFICAND = 23;

static const unsigned int FP32_EXPONENT_MAX = 255;

static const unsigned int FP16_BIT_SIZE = 16;
static const unsigned int FP16_EXPONENT_BIAS = 15;
static const unsigned int FP16_SIGNIFICAND = 10;

static const int FP16_EXPONENT_MAX = 30;
static const int FP16_EXPONENT_MIN = -10;

float ShortToFloat32(int16_t srcValue) {
uint16_t expHalf16 = srcValue & 0x7C00;
int exp1 = (int)(expHalf16);
uint16_t mantissa16 = srcValue & 0x03FF;
int mantissa1 = (int)(mantissa16);
int sign = (int)(srcValue & 0x8000);
sign = sign << FP16_BIT_SIZE;

// nan or inf
if (expHalf16 == 0x7C00) {
// nan
if (mantissa16 > 0) {
int res = (0x7FC00000 | sign);
int *iRes = &res;
auto fres = (float)(*iRes);
return fres;
}
// inf
int res = (0x7F800000 | sign);
int *iRes = &res;
auto fres = (float)(*iRes);
return fres;
}
if (expHalf16 != 0) {
exp1 += ((FP32_EXPONENT_BIAS - FP16_EXPONENT_BIAS) << FP16_SIGNIFICAND); // exponents converted to float32 bias
int res = (exp1 | mantissa1);
res = res << (FP32_SIGNIFICAND - FP16_SIGNIFICAND);
res = (res | sign);
int *iRes = &res;
auto fres = (float)(*iRes);
return fres;
union float32_bits {
unsigned int u;
float f;
};
typedef union float32_bits float32_bits;

float ShortToFloat32(uint16_t srcValue) {
const float32_bits magic = {113 << 23};
const unsigned int shifted_exp = 0x7c00 << 13; // exponent mask after shift
float32_bits o;

o.u = (srcValue & 0x7fff) << 13; // exponent/mantissa bits
unsigned int exp = shifted_exp & o.u; // just the exponent
o.u += (127 - 15) << 23; // exponent adjust

// handle exponent special cases
if (exp == shifted_exp) { // Inf/NaN?
o.u += (128 - 16) << 23; // extra exp adjust
} else if (exp == 0) { // Zero/Denormal?
o.u += 1 << 23; // extra exp adjust
o.f -= magic.f; // renormalize
}

int xmm1 = exp1 > (1 << FP16_SIGNIFICAND) ? exp1 : (1 << FP16_SIGNIFICAND);
xmm1 = (xmm1 << (FP32_SIGNIFICAND - FP16_SIGNIFICAND));
xmm1 += ((FP32_EXPONENT_BIAS - FP16_EXPONENT_BIAS - FP16_SIGNIFICAND)
<< FP32_SIGNIFICAND); // add the bias difference to xmm1
xmm1 = xmm1 | sign; // Combine with the sign mask

auto res = (float)(mantissa1); // Convert mantissa to float
int *ixmm1 = NULL;
ixmm1 = &xmm1;
res *= (float)(*ixmm1);

return res;
o.u |= (srcValue & 0x8000) << 16; // sign bit
return o.f;
}

// __gnu_f2h_ieee
int16_t Float32ToShort(float srcValue) {
float *psrcValue = NULL;
psrcValue = &srcValue;
auto srcValueBit = (unsigned int)(*psrcValue);
int sign = srcValueBit >> (FP32_BIT_SIZE - 1);
int mantissa = srcValueBit & 0x007FFFFF;
// exponent
int exp = ((srcValueBit & 0x7F800000) >> FP32_SIGNIFICAND) + FP16_EXPONENT_BIAS - FP32_EXPONENT_BIAS;
int16_t res;
if (exp > 0 && exp < FP16_EXPONENT_MAX) {
// use rte rounding mode, round the significand, combine sign, exponent and significand into a short.
res = (sign << (FP16_BIT_SIZE - 1)) | (exp << FP16_SIGNIFICAND) |
((mantissa + 0x00001000) >> (FP32_SIGNIFICAND - FP16_SIGNIFICAND));
} else if (srcValueBit == 0) {
res = 0;
} else {
if (exp <= 0) {
if (exp < FP16_EXPONENT_MIN) {
// value is less than min half float point
res = 0;
} else {
// normalized single, magnitude is less than min normal half float point.
mantissa = (mantissa | 0x00800000) >> (1 - exp);
// round to nearest
if ((mantissa & 0x00001000) > 0) {
mantissa = mantissa + 0x00002000;
}
// combine sign & mantissa (exp is zero to get denormalized number)
res = (sign << FP16_EXPONENT_BIAS) | (mantissa >> (FP32_SIGNIFICAND - FP16_SIGNIFICAND));
}
} else if (exp == (FP32_EXPONENT_MAX - FP32_EXPONENT_BIAS + FP16_EXPONENT_BIAS)) {
if (mantissa == 0) {
// input float is infinity, return infinity half
res = (sign << FP16_EXPONENT_BIAS) | 0x7C00;
} else {
// input float is NaN, return half NaN
res = (sign << FP16_EXPONENT_BIAS) | 0x7C00 | (mantissa >> (FP32_SIGNIFICAND - FP16_SIGNIFICAND));
}
uint16_t Float32ToShort(float srcValue) {
float32_bits f;
f.f = srcValue;

const float32_bits f32infty = {255 << 23};
const float32_bits f16max = {(127 + 16) << 23};
const float32_bits denorm_magic = {((127 - 15) + (23 - 10) + 1) << 23};
unsigned int sign_mask = 0x80000000u;
uint16_t o;

unsigned int sign = f.u & sign_mask;
f.u ^= sign;

// NOTE all the integer compares in this function can be safely
// compiled into signed compares since all operands are below
// 0x80000000. Important if you want fast straight SSE2 code
// (since there's no unsigned PCMPGTD).

if (f.u >= f16max.u) { // result is Inf or NaN (all exponent bits set)
o = (f.u > f32infty.u) ? 0x7e00 : 0x7c00; // NaN->qNaN and Inf->Inf
} else { // (De)normalized number or zero
if (f.u < (113 << 23)) { // resulting FP16 is subnormal or zero
// use a magic value to align our 10 mantissa bits at the bottom of
// the float. as long as FP addition is round-to-nearest-even this
// just works.
f.f += denorm_magic.f;

// and one integer subtract of the bias later, we have our final float!
o = (uint16_t)(f.u - denorm_magic.u);
} else {
// exp > 0, normalized single, round to nearest
if ((mantissa & 0x00001000) > 0) {
mantissa = mantissa + 0x00002000;
if ((mantissa & 0x00800000) > 0) {
mantissa = 0;
exp = exp + 1;
}
}
if (exp > FP16_EXPONENT_MAX) {
// exponent overflow - return infinity half
res = (sign << FP16_EXPONENT_BIAS) | 0x7C00;
} else {
// combine sign, exp and mantissa into normalized half
res = (sign << FP16_EXPONENT_BIAS) | (exp << FP16_SIGNIFICAND) |
(mantissa >> (FP32_SIGNIFICAND - FP16_SIGNIFICAND));
}
unsigned int mant_odd = (f.u >> 13) & 1; // resulting mantissa is odd

// update exponent, rounding bias part 1
f.u += ((unsigned int)(15 - 127) << 23) + 0xfff;
// rounding bias part 2
f.u += mant_odd;
// take the bits!
o = (uint16_t)(f.u >> 13);
}
}
return res;

o |= (uint16_t)(sign >> 16);
return o;
}

+ 3
- 2
mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/common_func.h View File

@@ -37,9 +37,10 @@ void MatrixSub(const float *a_ptr, const float *b_ptr, float *dst, size_t a_stri
size_t row, size_t col);
void MatrixMultiAdd(float *c11, float *c12, float *c21, float *c22, float *x_ptr, size_t row, size_t col,
size_t c_stride, size_t x_stride);
int16_t Float32ToShort(float srcValue);
float ShortToFloat32(uint16_t srcValue);

uint16_t Float32ToShort(float srcValue);

float ShortToFloat32(int16_t srcValue);

#ifdef ENABLE_ARM
void ConvDwFp32Center(float *dst, const float *src, const float *weight, const float *bias, size_t height, size_t width,


+ 1
- 1
mindspore/lite/tools/converter/parser/tflite/tflite_dequantize_parser.cc View File

@@ -58,7 +58,7 @@ STATUS TfliteDequantizeParser::Parse(const std::unique_ptr<tflite::OperatorT> &t
return RET_ERROR;
}

op->primitive->value.type = schema::PrimitiveType_Fp16Cast;
op->primitive->value.type = schema::PrimitiveType_Cast;
op->primitive->value.value = attr.release();
return 0;
}


+ 1
- 0
mindspore/lite/tools/optimizer/fusion/constant_folding_fusion.cc View File

@@ -161,6 +161,7 @@ const AnfNodePtr ConstFoldPass::Process(const FuncGraphPtr &func_graph, const An
MS_LOG(EXCEPTION) << "run kernel failed, name: " << lite_kernel->name();
}
auto new_parameter = CreateNewParamter(func_graph, output_tensors.front());
new_parameter->set_name(input_node->fullname_with_scope());
any_node->set_input(i, new_parameter);
}
}


Loading…
Cancel
Save