Browse Source

!22231 [MS][LITE][Develop] enable fp16 nc4hw4

Merge pull request !22231 from sunsuodong/enable_fp16_nc4hw4
tags/v1.5.0-rc1
i-robot Gitee 4 years ago
parent
commit
899b2fb192
4 changed files with 12 additions and 7 deletions
  1. +5
    -5
      mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/conv_fp16.c
  2. +5
    -0
      mindspore/lite/src/runtime/runtime_pass.cc
  3. +1
    -1
      mindspore/lite/test/config/models_tf_fp16.cfg
  4. +1
    -1
      mindspore/lite/test/config/models_tflite_fp16.cfg

+ 5
- 5
mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/conv_fp16.c View File

@@ -93,13 +93,13 @@ void ConvOutNc8hw8Fp16(const float16_t *input_data, float16_t *packed_input, con
#else
RowMajor2Col12MajorFp16Opt(packed_input, col_major_input, tile_n, deep);
#endif
for (int j = 0; j < weight_block; j++) {
const float16_t *cur_weight = packed_weight;
const float16_t *cur_bias = bias_data;
for (int j = 0; j < weight_block; j++, cur_weight += C8NUM * deep, cur_bias += C8NUM) {
int real_weight_row = (j != weight_block - 1) ? C8NUM : conv_param->output_channel_ - j * C8NUM;
int weight_offset = j * C8NUM * deep;
int bias_offset = j * real_weight_row;
int out_offset = j * output_hw * C8NUM + i * tile_n * real_weight_row;
MatMulFp16(col_major_input, packed_weight + weight_offset, output_data + out_offset, bias_data + bias_offset,
conv_param->act_type_, deep, real_in_row, real_weight_row, real_weight_row, OutType_Nhwc);
MatMulFp16(col_major_input, cur_weight, output_data + out_offset, cur_bias, conv_param->act_type_, deep,
real_in_row, real_weight_row, real_weight_row, OutType_Nhwc);
}
}
}


+ 5
- 0
mindspore/lite/src/runtime/runtime_pass.cc View File

@@ -144,6 +144,11 @@ bool Nc4hw4PassValid(const InnerContext *context, std::vector<kernel::LiteKernel
}
}
}

if (context->IsCpuFloat16Enabled()) {
return true;
}

return false;
}



+ 1
- 1
mindspore/lite/test/config/models_tf_fp16.cfg View File

@@ -68,7 +68,7 @@ ml_vision_guide_detection2.pb;1;1,320,320,1 1
ml_tts_encoder.pb;4:2,4,3,1;1,44:1:1:1 9
# encoder_0111_control_flow.pb is same as ml_tts_encoder_control_flow.pb
#encoder_0111_control_flow.pb;4;1:1,44:1:1 10
ml_video_edit_video_segment_gauss_adaptis_part2.pb;2:2,1 11
ml_video_edit_video_segment_gauss_adaptis_part2.pb;2:2,1 12.1
ml_video_edit_img_segment_adaptise.pb;2:2,1 40
ml_video_edit_person_divison_video;2:2,1 38
ml_video_edit_oneclick_adaptis.pb;3:2,1,3 6


+ 1
- 1
mindspore/lite/test/config/models_tflite_fp16.cfg View File

@@ -213,7 +213,7 @@ bloom_isface.tflite 0.5
# The output values of conv layers range from -e±5 to e±5, which almost reaches the representation limit of fp16. In
# this range, the fp16 data will has big bias. And the accumulation of this bias lowers the final precision.
hiai_object_detect_814.tflite 14
ml_video_edit_video_segment_gauss_adaptis_part2_pb2tflite.tflite;2:2,1 11
ml_video_edit_video_segment_gauss_adaptis_part2_pb2tflite.tflite;2:2,1 12.1
ml_video_edit_img_segment_adaptise_pb2tflite.tflite;2:2,1 0.5
hdc_tb_cn_neg.tflite;3:3,1,2 295
# The input of hiai_cv_labelDetectorModel_v3.tflite is between 0-255.


Loading…
Cancel
Save