!22231 [MS][LITE][Develop] enable fp16 nc4hw4

Merge pull request !22231 from sunsuodong/enable_fp16_nc4hw4
4 years ago · 899b2fb192
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/conv_fp16.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/conv_fp16.c
@@ -93,13 +93,13 @@ void ConvOutNc8hw8Fp16(const float16_t *input_data, float16_t *packed_input, con
 #else
      RowMajor2Col12MajorFp16Opt(packed_input, col_major_input, tile_n, deep);
 #endif
      for (int j = 0; j < weight_block; j++) {
      const float16_t *cur_weight = packed_weight;
      const float16_t *cur_bias = bias_data;
      for (int j = 0; j < weight_block; j++, cur_weight += C8NUM * deep, cur_bias += C8NUM) {
        int real_weight_row = (j != weight_block - 1) ? C8NUM : conv_param->output_channel_ - j * C8NUM;
        int weight_offset = j * C8NUM * deep;
        int bias_offset = j * real_weight_row;
        int out_offset = j * output_hw * C8NUM + i * tile_n * real_weight_row;
        MatMulFp16(col_major_input, packed_weight + weight_offset, output_data + out_offset, bias_data + bias_offset,
                   conv_param->act_type_, deep, real_in_row, real_weight_row, real_weight_row, OutType_Nhwc);
        MatMulFp16(col_major_input, cur_weight, output_data + out_offset, cur_bias, conv_param->act_type_, deep,
                   real_in_row, real_weight_row, real_weight_row, OutType_Nhwc);
      }
    }
  }
--- a/mindspore/lite/src/runtime/runtime_pass.cc
+++ b/mindspore/lite/src/runtime/runtime_pass.cc
@@ -144,6 +144,11 @@ bool Nc4hw4PassValid(const InnerContext *context, std::vector<kernel::LiteKernel
      }
    }
  }

  if (context->IsCpuFloat16Enabled()) {
    return true;
  }

  return false;
 }

--- a/mindspore/lite/test/config/models_tf_fp16.cfg
+++ b/mindspore/lite/test/config/models_tf_fp16.cfg
@@ -68,7 +68,7 @@ ml_vision_guide_detection2.pb;1;1,320,320,1 1
 ml_tts_encoder.pb;4:2,4,3,1;1,44:1:1:1 9
 # encoder_0111_control_flow.pb is same as ml_tts_encoder_control_flow.pb
 #encoder_0111_control_flow.pb;4;1:1,44:1:1 10
 ml_video_edit_video_segment_gauss_adaptis_part2.pb;2:2,1 11
 ml_video_edit_video_segment_gauss_adaptis_part2.pb;2:2,1 12.1
 ml_video_edit_img_segment_adaptise.pb;2:2,1 40
 ml_video_edit_person_divison_video;2:2,1 38
 ml_video_edit_oneclick_adaptis.pb;3:2,1,3 6
--- a/mindspore/lite/test/config/models_tflite_fp16.cfg
+++ b/mindspore/lite/test/config/models_tflite_fp16.cfg
@@ -213,7 +213,7 @@ bloom_isface.tflite 0.5
 # The output values of conv layers range from -e±5 to e±5, which almost reaches the representation limit of fp16. In
 # this range, the fp16 data will has big bias. And the accumulation of this bias lowers the final precision.
 hiai_object_detect_814.tflite 14
 ml_video_edit_video_segment_gauss_adaptis_part2_pb2tflite.tflite;2:2,1 11
 ml_video_edit_video_segment_gauss_adaptis_part2_pb2tflite.tflite;2:2,1 12.1
 ml_video_edit_img_segment_adaptise_pb2tflite.tflite;2:2,1 0.5
 hdc_tb_cn_neg.tflite;3:3,1,2 295
 # The input of hiai_cv_labelDetectorModel_v3.tflite is between 0-255.