!5210 resize int8 precision improve

Merge pull request !5210 from zhaozhenlong/lite/issue/resize_int8_weight_calc
5 years ago · ab9641558f
--- a/mindspore/lite/nnacl/int8/resize.c
+++ b/mindspore/lite/nnacl/int8/resize.c
@@ -86,6 +86,62 @@ int ResizeBilinearInt8(const int8_t *input_data, int8_t *output_data, const int
  return NNACL_OK;
 }

 int ResizeBilinearInt8WithFloatWeight(const int8_t *input_data, int8_t *output_data, const int *input_shape,
                                      const int *output_shape, const bool align_corners, QuantArg *quant_in,
                                      QuantArg *quant_out, const QuantMulArg *mul_arg, int tid, int thread_num) {
  if (input_data == NULL || output_data == NULL || input_shape == NULL || output_shape == NULL) {
    return NNACL_NULL_PTR;
  }

  int32_t in_n = input_shape[0];
  int32_t in_h = input_shape[1];
  int32_t in_w = input_shape[2];
  int32_t in_c = input_shape[3];

  int32_t new_height = output_shape[1];
  int32_t new_width = output_shape[2];
  float height_scale, width_scale;
  ComputeScaleFloat(in_h, new_height, align_corners, &height_scale);
  ComputeScaleFloat(in_w, new_width, align_corners, &width_scale);

  int n, h, w, c;
  for (n = 0; n < in_n; n++) {
    for (h = tid; h < new_height; h += thread_num) {
      float actual_y;
      int bottom, top;
      float bottom_weight, top_weight;
      ComputeInterpolationArgsFloatWeight(h, height_scale, in_h, &actual_y, &bottom, &bottom_weight, &top, &top_weight);
      for (w = 0; w < new_width; w++) {
        float actual_x;
        int left, right;
        float left_weight, right_weight;
        ComputeInterpolationArgsFloatWeight(w, width_scale, in_w, &actual_x, &left, &left_weight, &right,
                                            &right_weight);
        for (c = 0; c < in_c; c++) {
          float bottom_left_value = ((int32_t)input_data[offset(input_shape, n, bottom, left, c)] - quant_in->zp_) *
                                    bottom_weight * left_weight;
          float bottom_right_value = ((int32_t)input_data[offset(input_shape, n, bottom, right, c)] - quant_in->zp_) *
                                     bottom_weight * right_weight;
          float top_left_value =
            ((int32_t)input_data[offset(input_shape, n, top, left, c)] - quant_in->zp_) * top_weight * left_weight;
          float top_right_value =
            ((int32_t)input_data[offset(input_shape, n, top, right, c)] - quant_in->zp_) * top_weight * right_weight;
          float interp_value = bottom_left_value + bottom_right_value + top_left_value + top_right_value;

          const int out_interp_value = MultiplyByQuantizedMultiplier((int32_t)interp_value, mul_arg->multiplier_,
                                                                     mul_arg->left_shift_, mul_arg->right_shift_) +
                                       quant_out->zp_;
          int8_t out_value;
          out_value = out_interp_value > INT8_MAX ? INT8_MAX : out_interp_value;
          out_value = out_value < INT8_MIN ? INT8_MIN : out_value;
          output_data[offset(output_shape, n, h, w, c)] = out_value;
        }
      }
    }
  }
  return NNACL_OK;
 }

 int ResizeNearestNeighborInt8Simple(const int8_t *input_data, int8_t *output_data, const int *input_shape,
                                    const int *output_shape, const bool align_corners, int tid, int thread_num) {
  int batch, y, x, c;
@@ -133,6 +189,22 @@ void ComputeInterpolationArgs(const int32_t pos, const int32_t scale, const int3
  *scaled_high_weight = *scaled_pos - (1 << 10) * (*low);
 }

 void ComputeScaleFloat(const int32_t in_value, const int32_t out_value, const bool align_corners, float *scale) {
  *scale = (float)in_value / out_value;
  if (align_corners && out_value > 1) {
    *scale = (float)(in_value - 1) / (out_value - 1);
  }
 }

 void ComputeInterpolationArgsFloatWeight(const int32_t pos, const float scale, const int32_t size, float *actual_pos,
                                         int32_t *low, float *low_weight, int32_t *high, float *high_weight) {
  *actual_pos = pos * scale;
  *low = *actual_pos > 0 ? floor(*actual_pos) : 0;
  *low_weight = 1.0 - (*actual_pos - *low);
  *high = *low + 1 < size ? *low + 1 : size - 1;
  *high_weight = *actual_pos - (*low);
 }

 void ComputeNearestNeighborInt(const int32_t pos, const int in_size, const int32_t new_size, const bool align_corners,
                               int32_t *nearest) {
  if (new_size == 0) {
--- a/mindspore/lite/nnacl/int8/resize.h
+++ b/mindspore/lite/nnacl/int8/resize.h
@@ -31,6 +31,20 @@ int ResizeBilinearInt8(const int8_t *input_data, int8_t *output_data, const int
                       const bool align_corners, QuantArg *quant_in, QuantArg *quant_out, const QuantMulArg *mul_arg,
                       int tid, int thread_num);

 int ResizeBilinearInt8WithFloatWeight(const int8_t *input_data, int8_t *output_data, const int *input_shape,
                                      const int *output_shape, const bool align_corners, QuantArg *quant_in,
                                      QuantArg *quant_out, const QuantMulArg *mul_arg, int tid, int thread_num);

 void ComputeScale(const int32_t in_value, const int32_t out_value, const bool align_corners, int32_t *scale);

 void ComputeInterpolationArgs(const int32_t pos, const int32_t scale, const int32_t size, int32_t *scaled_pos,
                              int32_t *low, int32_t *scaled_low_weight, int32_t *high, int32_t *scaled_high_weight);

 void ComputeScaleFloat(const int32_t in_value, const int32_t out_value, const bool align_corners, float *scale);

 void ComputeInterpolationArgsFloatWeight(const int32_t pos, const float scale, const int32_t size, float *actual_pos,
                                         int32_t *low, float *low_weight, int32_t *high, float *high_weight);

 int ResizeNearestNeighborInt8Simple(const int8_t *input_data, int8_t *output_data, const int *input_shape,
                                    const int *output_shape, const bool align_corners, int tid, int thread_num);

@@ -38,11 +52,6 @@ int ResizeNearestNeighborInt8(const int8_t *input_data, int8_t *output_data, con
                              const int *output_shape, const bool align_corners, const QuantMulArg *multiplier,
                              QuantArg *quant_in, QuantArg *quant_out, int tid, int thread_num);

 void ComputeScale(const int32_t in_value, const int32_t out_value, const bool align_corners, int32_t *scale);

 void ComputeInterpolationArgs(const int32_t pos, const int32_t scale, const int32_t size, int32_t *scaled_pos,
                              int32_t *low, int32_t *scaled_low_weight, int32_t *high, int32_t *scaled_high_weight);

 void ComputeNearestNeighborInt(const int32_t pos, const int in_size, const int32_t new_size, const bool align_corners,
                               int32_t *nearest);
 #ifdef __cplusplus
--- a/mindspore/lite/src/runtime/kernel/arm/int8/resize_int8.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/resize_int8.cc
@@ -35,9 +35,9 @@ int ResizeInt8CPUKernel::Init() {
  if (ret != RET_OK) {
    return ret;
  }
  quant_in_ = new(std::nothrow) QuantArg;
  quant_in_ = new (std::nothrow) QuantArg;
  MS_ASSERT(quant_in_);
  quant_out_ = new(std::nothrow) QuantArg;
  quant_out_ = new (std::nothrow) QuantArg;
  MS_ASSERT(quant_out_);
  auto input = in_tensors_.at(0);
  quant_in_->zp_ = input->GetQuantParams().front().zeroPoint;
@@ -46,7 +46,7 @@ int ResizeInt8CPUKernel::Init() {
  quant_out_->zp_ = output->GetQuantParams().front().zeroPoint;
  quant_out_->scale_ = output->GetQuantParams().front().scale;

  multiplier_ = new(std::nothrow) QuantMulArg;
  multiplier_ = new (std::nothrow) QuantMulArg;
  MS_ASSERT(multiplier_);
  QuantizeRoundParameter(quant_in_->scale_ / quant_out_->scale_, &multiplier_->multiplier_, &multiplier_->left_shift_,
                         &multiplier_->right_shift_);
@@ -85,9 +85,14 @@ int ResizeInt8CPUKernel::RunImpl(int task_id) {
  int ret = 0;
  switch (method_) {
    case static_cast<int>(schema::ResizeMethod_BILINEAR): {
      ret = ResizeBilinearInt8(input_data, output_data, input_shape.data(), out_tensors_[0]->shape().data(),
                               align_corners_, quant_in_, quant_out_, multiplier_, task_id, context_->thread_num_);

      if (quant_in_->zp_ == 0) {
        ret = ResizeBilinearInt8(input_data, output_data, input_shape.data(), out_tensors_[0]->shape().data(),
                                 align_corners_, quant_in_, quant_out_, multiplier_, task_id, context_->thread_num_);
      } else {
        ret = ResizeBilinearInt8WithFloatWeight(input_data, output_data, input_shape.data(),
                                                out_tensors_[0]->shape().data(), align_corners_, quant_in_, quant_out_,
                                                multiplier_, task_id, context_->thread_num_);
      }
      break;
    }
    case static_cast<int>(schema::ResizeMethod_NEAREST_NEIGHBOR): {
@@ -95,25 +100,12 @@ int ResizeInt8CPUKernel::RunImpl(int task_id) {
      bool same_scale = abs(quant_out_->scale_ - quant_in_->scale_) < 1e-6;
      if (same_zp && same_scale) {
        ret =
            ResizeNearestNeighborInt8Simple(input_data,
                                            output_data,
                                            input_shape.data(),
                                            out_tensors_[0]->shape().data(),
                                            align_corners_,
                                            task_id,
                                            context_->thread_num_);
          ResizeNearestNeighborInt8Simple(input_data, output_data, input_shape.data(), out_tensors_[0]->shape().data(),
                                          align_corners_, task_id, context_->thread_num_);
      } else {
        ret =
            ResizeNearestNeighborInt8(input_data,
                                      output_data,
                                      input_shape.data(),
                                      out_tensors_[0]->shape().data(),
                                      align_corners_,
                                      multiplier_,
                                      quant_in_,
                                      quant_out_,
                                      task_id,
                                      context_->thread_num_);
          ResizeNearestNeighborInt8(input_data, output_data, input_shape.data(), out_tensors_[0]->shape().data(),
                                    align_corners_, multiplier_, quant_in_, quant_out_, task_id, context_->thread_num_);
      }
      break;
    }
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/resize_bilinear_int8_tests.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/resize_bilinear_int8_tests.cc
@@ -84,14 +84,13 @@ TEST_F(TestResizeBilinearInt8, Bilinear0) {
  int8_t output_data[16] = {0};
  std::vector<int> in_shape = {1, 2, 2, 1};
  std::vector<int> out_shape = {1, 4, 4, 1};
  const lite::tensor::QuantArg quant_in = {0.005f, 2};
  const lite::tensor::QuantArg quant_out = {0.008f, 5};
  const lite::tensor::QuantArg quant_in = {0.005f, 0};
  const lite::tensor::QuantArg quant_out = {0.008f, 0};
  bool align_corners = false;
  int thread_num = 1;
  int8_t expect[16] = {4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 6, 6, 5, 5, 6, 6};
  int8_t expect[16] = {0, 0, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 2, 2, 2};

  Prepare(in_shape, out_shape, input_data, output_data, quant_in, quant_out, align_corners, thread_num);
  kernel_->Init();
  kernel_->Run();

  CompareOutputInt8(output_data, expect, 16, err_percent_);
@@ -104,20 +103,19 @@ TEST_F(TestResizeBilinearInt8, Bilinear1) {
  int8_t input_data[] = {0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
                         20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39};
  int8_t output_data[160] = {0};
  const lite::tensor::QuantArg quant_in = {0.005f, 2};
  const lite::tensor::QuantArg quant_out = {0.008f, 5};
  const lite::tensor::QuantArg quant_in = {0.005f, 0};
  const lite::tensor::QuantArg quant_out = {0.008f, 0};
  int thread_num = 1;
  bool align_corners = false;
  int8_t expect[160] = {4,  4,  5,  6,  6,  5,  6,  7,  7,  8,  7,  8,  8,  9,  9,  7,  8,  8,  9,  9,  7,  8,  8,
                        9,  9,  8,  9,  10, 10, 11, 10, 11, 11, 12, 13, 10, 11, 11, 12, 13, 10, 11, 11, 12, 13, 12,
                        12, 13, 13, 14, 13, 14, 14, 15, 16, 13, 14, 14, 15, 16, 10, 11, 11, 12, 13, 12, 12, 13, 13,
                        14, 13, 14, 14, 15, 16, 13, 14, 14, 15, 16, 16, 17, 18, 18, 19, 18, 18, 19, 20, 20, 19, 20,
                        21, 21, 22, 19, 20, 21, 21, 22, 19, 20, 21, 21, 22, 21, 22, 22, 23, 23, 23, 23, 24, 24, 25,
                        23, 23, 24, 24, 25, 23, 23, 24, 24, 25, 24, 25, 25, 26, 27, 26, 26, 27, 28, 28, 26, 26, 27,
                        28, 28, 23, 23, 24, 24, 25, 24, 25, 25, 26, 27, 26, 26, 27, 28, 28, 26, 26, 27, 28, 28};
  int8_t expect[160] = {0,  1,  1,  2,  2,  2,  2,  3,  3,  4,  3,  4,  4,  5,  6,  3,  4,  4,  5,  6,  3,  4,  4,
                        5,  6,  5,  5,  6,  7,  7,  6,  7,  8,  8,  9,  6,  7,  8,  8,  9,  6,  7,  7,  8,  9,  8,
                        8,  9,  10, 10, 9,  10, 11, 11, 12, 9,  10, 11, 11, 12, 6,  7,  7,  8,  9,  8,  8,  9,  10,
                        10, 9,  10, 11, 11, 12, 9,  10, 11, 11, 12, 13, 13, 14, 14, 15, 14, 15, 15, 16, 17, 16, 16,
                        17, 18, 18, 16, 16, 17, 18, 18, 16, 16, 17, 18, 18, 17, 18, 18, 19, 20, 19, 19, 20, 21, 21,
                        19, 19, 20, 21, 21, 19, 19, 20, 21, 21, 20, 21, 22, 22, 23, 22, 23, 23, 24, 24, 22, 23, 23,
                        24, 24, 19, 19, 20, 21, 21, 20, 21, 22, 22, 23, 22, 23, 23, 24, 24, 22, 23, 23, 24, 24};

  Prepare(in_shape, out_shape, input_data, output_data, quant_in, quant_out, align_corners, thread_num);
  kernel_->Init();
  kernel_->Run();

  CompareOutputInt8(output_data, expect, 160, err_percent_);
@@ -131,22 +129,49 @@ TEST_F(TestResizeBilinearInt8, Bilinear2) {
                         20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39};
  int8_t output_data[160] = {0};

  const lite::tensor::QuantArg quant_in = {0.005f, 0};
  const lite::tensor::QuantArg quant_out = {0.008f, 0};
  int thread_num = 2;
  bool align_corners = true;
  int8_t expect[160] = {0,  1,  1,  2,  2,  1,  2,  2,  3,  4,  2,  3,  3,  4,  5,  3,  4,  4,  5,  6,  2,  3,  3,
                        4,  5,  3,  4,  4,  5,  6,  4,  5,  5,  6,  7,  5,  6,  6,  7,  8,  4,  5,  5,  6,  7,  5,
                        6,  6,  7,  8,  6,  7,  8,  8,  9,  7,  8,  9,  9,  10, 6,  7,  7,  8,  9,  7,  8,  9,  9,
                        10, 8,  9,  10, 10, 11, 9,  10, 11, 11, 12, 13, 13, 14, 14, 15, 14, 14, 15, 15, 16, 15, 15,
                        16, 16, 17, 16, 16, 17, 18, 18, 15, 15, 16, 16, 17, 16, 16, 17, 18, 18, 17, 17, 18, 19, 19,
                        18, 18, 19, 20, 20, 17, 17, 18, 19, 19, 18, 18, 19, 20, 20, 19, 19, 20, 21, 21, 20, 20, 21,
                        22, 22, 19, 19, 20, 21, 21, 20, 20, 21, 22, 22, 21, 21, 22, 23, 23, 22, 23, 23, 24, 24};

  Prepare(in_shape, out_shape, input_data, output_data, quant_in, quant_out, align_corners, thread_num);
  kernel_->Run();

  CompareOutputInt8(output_data, expect, 160, err_percent_);
 }

 // 2*2*2*5 -> 2*4*4*5 thread num 2, align corners zp -128
 TEST_F(TestResizeBilinearInt8, Bilinear3) {
  std::vector<int> in_shape = {2, 2, 2, 5};
  std::vector<int> out_shape = {2, 4, 4, 5};
  int8_t input_data[] = {0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
                         20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39};
  int8_t output_data[160] = {0};

  const lite::tensor::QuantArg quant_in = {0.005f, 2};
  const lite::tensor::QuantArg quant_out = {0.008f, 5};
  const lite::tensor::QuantArg quant_out = {0.005f, 2};
  int thread_num = 2;
  bool align_corners = true;
  int8_t expect[160] = {4,  4,  5,  6,  6,  5,  5,  6,  7,  7,  6,  6,  7,  8,  8,  7,  8,  8,  9,  9,  6,  6,  7,
                        8,  8,  7,  8,  8,  9,  9,  8,  9,  9,  10, 10, 9,  10, 10, 11, 11, 8,  9,  9,  10, 10, 9,
                        10, 10, 11, 11, 10, 11, 11, 12, 13, 11, 12, 12, 13, 14, 10, 11, 11, 12, 13, 11, 12, 12, 13,
                        14, 12, 13, 13, 14, 15, 13, 14, 14, 15, 16, 16, 17, 18, 18, 19, 17, 18, 19, 19, 20, 18, 19,
                        20, 20, 21, 19, 20, 21, 21, 22, 18, 19, 20, 20, 21, 19, 20, 21, 21, 22, 20, 21, 22, 22, 23,
                        21, 22, 23, 23, 24, 20, 21, 22, 22, 23, 21, 22, 23, 23, 24, 23, 23, 24, 24, 25, 24, 24, 25,
                        25, 26, 23, 23, 24, 24, 25, 24, 24, 25, 25, 26, 25, 25, 26, 26, 27, 26, 26, 27, 28, 28};
  int8_t expect[160] = {0,  1,  2,  3,  4,  2,  3,  4,  5,  6,  3,  4,  5,  6,  7,  5,  6,  7,  8,  9,  3,  4,  5,
                        6,  7,  5,  6,  7,  8,  9,  7,  8,  9,  10, 11, 8,  9,  10, 11, 12, 7,  8,  9,  10, 11, 8,
                        9,  10, 11, 12, 10, 11, 12, 13, 14, 12, 13, 14, 15, 16, 10, 11, 12, 13, 14, 12, 13, 14, 15,
                        16, 13, 14, 15, 16, 17, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 22, 23, 24, 25, 26, 23, 24,
                        25, 26, 27, 25, 26, 27, 28, 29, 23, 24, 25, 26, 27, 25, 26, 27, 28, 29, 27, 28, 29, 30, 31,
                        28, 29, 30, 31, 32, 27, 28, 29, 30, 31, 28, 29, 30, 31, 32, 30, 31, 32, 33, 34, 32, 33, 34,
                        35, 36, 30, 31, 32, 33, 34, 32, 33, 34, 35, 36, 33, 34, 35, 36, 37, 35, 36, 37, 38, 39};

  Prepare(in_shape, out_shape, input_data, output_data, quant_in, quant_out, align_corners, thread_num);
  kernel_->Init();
  kernel_->Run();

  err_percent_ = 0.325f;
  CompareOutputInt8(output_data, expect, 160, err_percent_);
 }

 }  // namespace mindspore