optimize int8 resize bilinear

5 years ago · e2695df21c
--- a/mindspore/lite/nnacl/int8/resize_int8.c
+++ b/mindspore/lite/nnacl/int8/resize_int8.c
@@ -19,129 +19,131 @@
 #include "nnacl/quantization/fixed_point.h"
 #include "nnacl/errorcode.h"

 int ResizeBilinearInt8(const int8_t *input_data, int8_t *output_data, const int *input_shape, const int *output_shape,
                       const bool align_corners, QuantArg *quant_in, QuantArg *quant_out, const QuantMulArg *mul_arg,
                       int tid, int thread_num) {
  if (input_data == NULL || output_data == NULL || input_shape == NULL || output_shape == NULL) {
    return NNACL_NULL_PTR;
  }

  int32_t in_n = input_shape[0];
  int32_t in_h = input_shape[1];
  int32_t in_w = input_shape[2];
  int32_t in_c = input_shape[3];

  int32_t new_height = output_shape[1];
  int32_t new_width = output_shape[2];
  int32_t height_scale = 0, width_scale = 0;
  ComputeScale(in_h, new_height, align_corners, &height_scale);
  ComputeScale(in_w, new_width, align_corners, &width_scale);

  int n, h, w, c;
  for (n = 0; n < in_n; n++) {
    for (h = tid; h < new_height; h += thread_num) {
      const int base_offset = 20;
      int scaled_actual_y;
      int bottom, top;
      int scaled_bottom_weight, scaled_top_weight;
      ComputeInterpolationArgs(h, height_scale, in_h, &scaled_actual_y, &bottom, &scaled_bottom_weight, &top,
                               &scaled_top_weight);
      for (w = 0; w < new_width; w++) {
        int scaled_actual_x;
        int left, right;
        int scaled_left_weight, scaled_right_weight;
        ComputeInterpolationArgs(w, width_scale, in_w, &scaled_actual_x, &left, &scaled_left_weight, &right,
                                 &scaled_right_weight);
        for (c = 0; c < in_c; c++) {
          const int64_t bottom_left_value =
            (int64_t)(input_data[offset(input_shape, n, bottom, left, c)] - quant_in->zp_) * scaled_bottom_weight *
            scaled_left_weight;
          const int64_t bottom_right_value =
            (int64_t)(input_data[offset(input_shape, n, bottom, right, c)] - quant_in->zp_) * scaled_bottom_weight *
            scaled_right_weight;
          const int64_t top_left_value = (int64_t)(input_data[offset(input_shape, n, top, left, c)] - quant_in->zp_) *
                                         scaled_top_weight * scaled_left_weight;
          const int64_t top_right_value = (int64_t)(input_data[offset(input_shape, n, top, right, c)] - quant_in->zp_) *
                                          scaled_top_weight * scaled_right_weight;
          const int64_t scaled_interp_value = bottom_left_value + bottom_right_value + top_left_value + top_right_value;
          int32_t interp_value;
          if (scaled_interp_value >= 0) {
            interp_value = (scaled_interp_value + (1 << 19)) / (1 << 20);
          } else {
            interp_value = (scaled_interp_value - (1 << 19)) / (1 << 20);
          }

          const int out_interp_value =
            MultiplyByQuantizedMultiplier(interp_value, mul_arg->multiplier_, mul_arg->left_shift_ + base_offset,
                                          mul_arg->right_shift_ - base_offset) +
            quant_out->zp_;
          int8_t out_value;
          out_value = out_interp_value > INT8_MAX ? INT8_MAX : out_interp_value;
          out_value = out_value < INT8_MIN ? INT8_MIN : out_value;
          output_data[offset(output_shape, n, h, w, c)] = out_value;
        }
 int ResizeBilinearInt8(const int8_t *input_ptr, int8_t *output_ptr, int batch, int in_h, int in_w, int out_h, int out_w,
                       int channel, int index, int count, ResizeQuantArg quant_arg) {
  int in_plane = in_h * in_w;
  int out_plane = out_h * out_w;
  for (int n = 0; n < batch; n++) {
    const int8_t *in_b_ptr = input_ptr + n * in_plane * channel;
    int8_t *out_b_ptr = output_ptr + n * out_plane * channel;
    for (int t = 0; t < count; t++) {
      int ori_out_h = (index + t) / out_w;
      int ori_out_w = (index + t) % out_w;
      int32_t x_lower_value = quant_arg.x_axis_lower_[ori_out_w];
      int32_t x_upper_value = quant_arg.x_axis_upper_[ori_out_w];
      int32_t y_lower_value = quant_arg.y_axis_lower_[ori_out_h];
      int32_t y_upper_value = quant_arg.y_axis_upper_[ori_out_h];
      int32_t weight_x = quant_arg.x_axis_index_[ori_out_w] - (1 << 10) * x_lower_value;
      int32_t one_minus_weight_x = (1 << 10) - weight_x;
      int32_t weight_y = quant_arg.y_axis_index_[ori_out_h] - (1 << 10) * y_lower_value;
      int32_t one_minus_weight_y = (1 << 10) - weight_y;
      int64_t left_bottom_coef = (int64_t)(one_minus_weight_x * one_minus_weight_y);
      int64_t left_top_coef = (int64_t)(weight_y * one_minus_weight_x);
      int64_t right_bottom_coef = (int64_t)(weight_x * one_minus_weight_y);
      int64_t right_top_coef = (int64_t)(weight_x * weight_y);
      int input_lb_index = (y_lower_value * in_w + x_lower_value) * channel;
      int input_lt_index = (y_upper_value * in_w + x_lower_value) * channel;
      int input_rb_index = (y_lower_value * in_w + x_upper_value) * channel;
      int input_rt_index = (y_upper_value * in_w + x_upper_value) * channel;
      int c = 0;
      for (; c < channel; c++) {
        int64_t out_left_bottom = left_bottom_coef * in_b_ptr[input_lb_index];
        int64_t out_left_top = left_top_coef * in_b_ptr[input_lt_index];
        int64_t out_right_bottom = right_bottom_coef * in_b_ptr[input_rb_index];
        int64_t out_right_top = right_top_coef * in_b_ptr[input_rt_index];
        int64_t out_value = out_left_bottom + out_left_top + out_right_bottom + out_right_top;
        out_b_ptr[0] = (int8_t)((out_value + (1 << 19)) / (1 << 20));
        input_lb_index++;
        input_lt_index++;
        input_rb_index++;
        input_rt_index++;
        out_b_ptr++;
      }
    }
  }
  return NNACL_OK;
 }

 int ResizeBilinearInt8WithFloatWeight(const int8_t *input_data, int8_t *output_data, const int *input_shape,
                                      const int *output_shape, const bool align_corners, QuantArg *quant_in,
                                      QuantArg *quant_out, const QuantMulArg *mul_arg, int tid, int thread_num) {
  if (input_data == NULL || output_data == NULL || input_shape == NULL || output_shape == NULL) {
    return NNACL_NULL_PTR;
  }

  int32_t in_n = input_shape[0];
  int32_t in_h = input_shape[1];
  int32_t in_w = input_shape[2];
  int32_t in_c = input_shape[3];

  int32_t new_height = output_shape[1];
  int32_t new_width = output_shape[2];
  float height_scale, width_scale;
  int ret = ComputeScaleFloat(in_h, new_height, align_corners, &height_scale);
  if (ret != NNACL_OK) {
    return ret;
  }
  ret = ComputeScaleFloat(in_w, new_width, align_corners, &width_scale);
  if (ret != NNACL_OK) {
    return ret;
  }

  int n, h, w, c;
  for (n = 0; n < in_n; n++) {
    for (h = tid; h < new_height; h += thread_num) {
      float actual_y;
      int bottom, top;
      float bottom_weight, top_weight;
      ComputeInterpolationArgsFloatWeight(h, height_scale, in_h, &actual_y, &bottom, &bottom_weight, &top, &top_weight);
      for (w = 0; w < new_width; w++) {
        float actual_x;
        int left, right;
        float left_weight, right_weight;
        ComputeInterpolationArgsFloatWeight(w, width_scale, in_w, &actual_x, &left, &left_weight, &right,
                                            &right_weight);
        for (c = 0; c < in_c; c++) {
          float bottom_left_value = ((int32_t)input_data[offset(input_shape, n, bottom, left, c)] - quant_in->zp_) *
                                    bottom_weight * left_weight;
          float bottom_right_value = ((int32_t)input_data[offset(input_shape, n, bottom, right, c)] - quant_in->zp_) *
                                     bottom_weight * right_weight;
          float top_left_value =
            ((int32_t)input_data[offset(input_shape, n, top, left, c)] - quant_in->zp_) * top_weight * left_weight;
          float top_right_value =
            ((int32_t)input_data[offset(input_shape, n, top, right, c)] - quant_in->zp_) * top_weight * right_weight;
          float interp_value = bottom_left_value + bottom_right_value + top_left_value + top_right_value;

          const int out_interp_value = MultiplyByQuantizedMultiplier((int32_t)interp_value, mul_arg->multiplier_,
                                                                     mul_arg->left_shift_, mul_arg->right_shift_) +
                                       quant_out->zp_;
          int8_t out_value;
          out_value = out_interp_value > INT8_MAX ? INT8_MAX : out_interp_value;
          out_value = out_value < INT8_MIN ? INT8_MIN : out_value;
          output_data[offset(output_shape, n, h, w, c)] = out_value;
        }
 int ResizeBilinearWithFloatScaleInt8(const int8_t *input_ptr, int8_t *output_ptr, int batch, int in_h, int in_w,
                                     int out_h, int out_w, int channel, int index, int count,
                                     ResizeFloatScaleQuantArg quant_arg) {
  int in_plane = in_h * in_w;
  int out_plane = out_h * out_w;
  for (int n = 0; n < batch; n++) {
    const int8_t *in_b_ptr = input_ptr + n * in_plane * channel;
    int8_t *out_b_ptr = output_ptr + n * out_plane * channel;
    for (int t = 0; t < count; t++) {
      int ori_out_h = (index + t) / out_w;
      int ori_out_w = (index + t) % out_w;
      int32_t x_lower_value = quant_arg.x_axis_lower_[ori_out_w];
      int32_t x_upper_value = quant_arg.x_axis_upper_[ori_out_w];
      int32_t y_lower_value = quant_arg.y_axis_lower_[ori_out_h];
      int32_t y_upper_value = quant_arg.y_axis_upper_[ori_out_h];
      float weight_x = quant_arg.x_axis_index_[ori_out_w] - x_lower_value;
      float one_minus_weight_x = 1 - weight_x;
      float weight_y = quant_arg.y_axis_index_[ori_out_h] - y_lower_value;
      float one_minus_weight_y = 1 - weight_y;
      float left_bottom_coef = one_minus_weight_x * one_minus_weight_y;
      float left_top_coef = weight_y * one_minus_weight_x;
      float right_bottom_coef = weight_x * one_minus_weight_y;
      float right_top_coef = weight_x * weight_y;
      int input_lb_index = (y_lower_value * in_w + x_lower_value) * channel;
      int input_lt_index = (y_upper_value * in_w + x_lower_value) * channel;
      int input_rb_index = (y_lower_value * in_w + x_upper_value) * channel;
      int input_rt_index = (y_upper_value * in_w + x_upper_value) * channel;
      int c = 0;
 #ifdef ENABLE_ARM
      for (; c < channel; c += 4) {
        float32x4_t in_lb;
        in_lb[0] = (float)in_b_ptr[input_lb_index];
        in_lb[1] = (float)in_b_ptr[input_lb_index + 1];
        in_lb[2] = (float)in_b_ptr[input_lb_index + 2];
        in_lb[3] = (float)in_b_ptr[input_lb_index + 3];
        float32x4_t out_left_bottom = vmulq_n_f32(in_lb, left_bottom_coef);
        float32x4_t in_lt;
        in_lt[0] = (float)in_b_ptr[input_lt_index];
        in_lt[1] = (float)in_b_ptr[input_lt_index + 1];
        in_lt[2] = (float)in_b_ptr[input_lt_index + 2];
        in_lt[3] = (float)in_b_ptr[input_lt_index + 3];
        float32x4_t out_left_top = vmulq_n_f32(in_lt, left_top_coef);
        float32x4_t in_rb;
        in_rb[0] = (float)in_b_ptr[input_rb_index];
        in_rb[1] = (float)in_b_ptr[input_rb_index + 1];
        in_rb[2] = (float)in_b_ptr[input_rb_index + 2];
        in_rb[3] = (float)in_b_ptr[input_rb_index + 3];
        float32x4_t out_right_bottom = vmulq_n_f32(in_rb, right_bottom_coef);
        float32x4_t in_rt;
        in_rt[0] = (float)in_b_ptr[input_rt_index];
        in_rt[1] = (float)in_b_ptr[input_rt_index + 1];
        in_rt[2] = (float)in_b_ptr[input_rt_index + 2];
        in_rt[3] = (float)in_b_ptr[input_rt_index + 3];
        float32x4_t out_right_top = vmulq_n_f32(in_rt, right_top_coef);
        float32x4_t out_value1 = vaddq_f32(out_left_bottom, out_left_top);
        float32x4_t out_value2 = vaddq_f32(out_right_top, out_right_bottom);
        float32x4_t out_value = vaddq_f32(out_value1, out_value2);
        out_b_ptr[0] = (int8_t)(out_value[0]);
        out_b_ptr[1] = (int8_t)(out_value[1]);
        out_b_ptr[2] = (int8_t)(out_value[2]);
        out_b_ptr[3] = (int8_t)(out_value[3]);
        input_lb_index += 4;
        input_lt_index += 4;
        input_rb_index += 4;
        input_rt_index += 4;
        out_b_ptr += 4;
      }
 #endif
      for (; c < channel; c++) {
        float out_left_bottom = left_bottom_coef * in_b_ptr[input_lb_index];
        float out_left_top = left_top_coef * in_b_ptr[input_lt_index];
        float out_right_bottom = right_bottom_coef * in_b_ptr[input_rb_index];
        float out_right_top = right_top_coef * in_b_ptr[input_rt_index];
        float out_value = out_left_bottom + out_left_top + out_right_bottom + out_right_top;
        out_b_ptr[0] = (int8_t)(out_value);
        input_lb_index++;
        input_lt_index++;
        input_rb_index++;
        input_rt_index++;
        out_b_ptr++;
      }
    }
  }
@@ -175,46 +177,6 @@ int ResizeNearestNeighborInt8Simple(const int8_t *input_data, int8_t *output_dat
  return NNACL_OK;
 }

 void ComputeScale(const int32_t in_value, const int32_t out_value, const bool align_corners, int32_t *scale) {
  if (out_value == 0) {
    return;
  }
  *scale = (in_value * (1 << 10) + out_value / 2) / out_value;
  if (align_corners && out_value > 1) {
    *scale = ((in_value - 1) * (1 << 10) + (out_value - 1) / 2) / (out_value - 1);
  }
 }

 void ComputeInterpolationArgs(const int32_t pos, const int32_t scale, const int32_t size, int32_t *scaled_pos,
                              int32_t *low, int32_t *scaled_low_weight, int32_t *high, int32_t *scaled_high_weight) {
  *scaled_pos = pos * scale;
  int scale_back = *scaled_pos / (1 << 10);
  *low = scale_back > 0 ? scale_back : 0;
  *scaled_low_weight = (1 << 10) - (*scaled_pos - (1 << 10) * (*low));
  *high = scale_back + 1 < size ? scale_back + 1 : size - 1;
  *scaled_high_weight = *scaled_pos - (1 << 10) * (*low);
 }

 int ComputeScaleFloat(const int32_t in_value, const int32_t out_value, const bool align_corners, float *scale) {
  if (out_value == 0) {
    return NNACL_ERRCODE_DIVISOR_ZERO;
  }
  *scale = (float)in_value / out_value;
  if (align_corners && out_value > 1) {
    *scale = (float)(in_value - 1) / (out_value - 1);
  }
  return NNACL_OK;
 }

 void ComputeInterpolationArgsFloatWeight(const int32_t pos, const float scale, const int32_t size, float *actual_pos,
                                         int32_t *low, float *low_weight, int32_t *high, float *high_weight) {
  *actual_pos = pos * scale;
  *low = *actual_pos > 0 ? floor(*actual_pos) : 0;
  *low_weight = 1.0 - (*actual_pos - *low);
  *high = *low + 1 < size ? *low + 1 : size - 1;
  *high_weight = *actual_pos - (*low);
 }

 void ComputeNearestNeighborInt(const int32_t pos, const int in_size, const int32_t new_size, const bool align_corners,
                               int32_t *nearest) {
  if (new_size == 0) {
--- a/mindspore/lite/nnacl/int8/resize_int8.h
+++ b/mindspore/lite/nnacl/int8/resize_int8.h
@@ -27,23 +27,12 @@
 #ifdef __cplusplus
 extern "C" {
 #endif
 int ResizeBilinearInt8(const int8_t *input_data, int8_t *output_data, const int *input_shape, const int *output_shape,
                       const bool align_corners, QuantArg *quant_in, QuantArg *quant_out, const QuantMulArg *mul_arg,
                       int tid, int thread_num);
 int ResizeBilinearInt8(const int8_t *input_ptr, int8_t *output_ptr, int batch, int in_h, int in_w, int out_h, int out_w,
                       int channel, int index, int count, ResizeQuantArg quant_arg);

 int ResizeBilinearInt8WithFloatWeight(const int8_t *input_data, int8_t *output_data, const int *input_shape,
                                      const int *output_shape, const bool align_corners, QuantArg *quant_in,
                                      QuantArg *quant_out, const QuantMulArg *mul_arg, int tid, int thread_num);

 void ComputeScale(const int32_t in_value, const int32_t out_value, const bool align_corners, int32_t *scale);

 void ComputeInterpolationArgs(const int32_t pos, const int32_t scale, const int32_t size, int32_t *scaled_pos,
                              int32_t *low, int32_t *scaled_low_weight, int32_t *high, int32_t *scaled_high_weight);

 int ComputeScaleFloat(const int32_t in_value, const int32_t out_value, const bool align_corners, float *scale);

 void ComputeInterpolationArgsFloatWeight(const int32_t pos, const float scale, const int32_t size, float *actual_pos,
                                         int32_t *low, float *low_weight, int32_t *high, float *high_weight);
 int ResizeBilinearWithFloatScaleInt8(const int8_t *input_ptr, int8_t *output_ptr, int batch, int in_h, int in_w,
                                     int out_h, int out_w, int channel, int index, int count,
                                     ResizeFloatScaleQuantArg quant_arg);

 int ResizeNearestNeighborInt8Simple(const int8_t *input_data, int8_t *output_data, const int *input_shape,
                                    const int *output_shape, const bool align_corners, int tid, int thread_num);
--- a/mindspore/lite/nnacl/quantization/quantize.h
+++ b/mindspore/lite/nnacl/quantization/quantize.h
@@ -260,6 +260,28 @@ typedef struct LeakyReluQuantArg {
  int element_num;
 } LeakyReluQuantArg;

 typedef struct ResizeQuantArg {
  int32_t ratio_x_;
  int32_t ratio_y_;
  int32_t *x_axis_index_;
  int32_t *x_axis_lower_;
  int32_t *x_axis_upper_;
  int32_t *y_axis_index_;
  int32_t *y_axis_lower_;
  int32_t *y_axis_upper_;
 } ResizeQuantArg;

 typedef struct ResizeFloatScaleQuantArg {
  float ratio_x_;
  float ratio_y_;
  float *x_axis_index_;
  int32_t *x_axis_lower_;
  int32_t *x_axis_upper_;
  float *y_axis_index_;
  int32_t *y_axis_lower_;
  int32_t *y_axis_upper_;
 } ResizeFloatScaleQuantArg;

 #ifdef __cplusplus
 extern "C" {
 #endif
--- a/mindspore/lite/src/runtime/kernel/arm/int8/resize_int8.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/resize_int8.cc
@@ -16,6 +16,7 @@

 #include "src/runtime/kernel/arm/int8/resize_int8.h"
 #include <vector>
 #include <algorithm>
 #include "include/errorcode.h"
 #include "nnacl/int8/resize_int8.h"
 #include "schema/model_generated.h"
@@ -32,6 +33,40 @@ using mindspore::lite::RET_OK;
 using mindspore::lite::KernelRegistrar;

 namespace mindspore::kernel {
 void ResizeInt8CPUKernel::FreeResizeBiLinear() {
  free(resize_quant_arg_.x_axis_index_);
  free(resize_quant_arg_.x_axis_lower_);
  free(resize_quant_arg_.x_axis_upper_);
  free(resize_quant_arg_.y_axis_index_);
  free(resize_quant_arg_.y_axis_lower_);
  free(resize_quant_arg_.y_axis_upper_);
 }

 void ResizeInt8CPUKernel::FreeFloatResizeBiLinear() {
  free(resize_float_quant_arg_.x_axis_index_);
  free(resize_float_quant_arg_.x_axis_lower_);
  free(resize_float_quant_arg_.x_axis_upper_);
  free(resize_float_quant_arg_.y_axis_index_);
  free(resize_float_quant_arg_.y_axis_lower_);
  free(resize_float_quant_arg_.y_axis_upper_);
 }

 ResizeInt8CPUKernel::~ResizeInt8CPUKernel() {
  if (method_ == schema::ResizeMethod_LINEAR) {
    if (quant_in_->zp_ == 0) {
      FreeResizeBiLinear();
    } else {
      FreeFloatResizeBiLinear();
    }
  }
  delete quant_out_;
  quant_out_ = nullptr;
  delete quant_in_;
  quant_in_ = nullptr;
  delete multiplier_;
  multiplier_ = nullptr;
 }

 int ResizeInt8CPUKernel::Init() {
  auto ret = ResizeBaseCPUKernel::Init();
  if (ret != RET_OK) {
@@ -58,6 +93,195 @@ int ResizeInt8CPUKernel::Init() {
  return ReSize();
 }

 int ResizeInt8CPUKernel::InitResizeQuantArg() {
  auto out_shape = out_tensors_.front()->shape();
  resize_quant_arg_.x_axis_index_ = reinterpret_cast<int32_t *>(malloc(out_shape[2] * sizeof(int32_t)));
  if (resize_quant_arg_.x_axis_index_ == nullptr) {
    MS_LOG(ERROR) << "malloc x axis index array failed.";
    return RET_ERROR;
  }
  resize_quant_arg_.x_axis_lower_ = reinterpret_cast<int32_t *>(malloc(out_shape[2] * sizeof(int32_t)));
  if (resize_quant_arg_.x_axis_lower_ == nullptr) {
    MS_LOG(ERROR) << "malloc x_axis_lower_ array failed.";
    return RET_ERROR;
  }
  resize_quant_arg_.x_axis_upper_ = reinterpret_cast<int32_t *>(malloc(out_shape[2] * sizeof(int32_t)));
  if (resize_quant_arg_.x_axis_upper_ == nullptr) {
    MS_LOG(ERROR) << "malloc x_axis_upper_ array failed.";
    return RET_ERROR;
  }
  resize_quant_arg_.y_axis_index_ = reinterpret_cast<int32_t *>(malloc(out_shape[1] * sizeof(int32_t)));
  if (resize_quant_arg_.y_axis_index_ == nullptr) {
    MS_LOG(ERROR) << "malloc y_axis_index_ array failed.";
    return RET_ERROR;
  }
  resize_quant_arg_.y_axis_lower_ = reinterpret_cast<int32_t *>(malloc(out_shape[1] * sizeof(int32_t)));
  if (resize_quant_arg_.y_axis_lower_ == nullptr) {
    MS_LOG(ERROR) << "malloc y_axis_lower_ array failed.";
    return RET_ERROR;
  }
  resize_quant_arg_.y_axis_upper_ = reinterpret_cast<int32_t *>(malloc(out_shape[1] * sizeof(int32_t)));
  if (resize_quant_arg_.y_axis_upper_ == nullptr) {
    MS_LOG(ERROR) << "malloc y_axis_upper_ array failed.";
    return RET_ERROR;
  }
  return RET_OK;
 }

 int ResizeInt8CPUKernel::CalRatio() {
  auto in_tensor = in_tensors_.front();
  auto in_width = in_tensor->Width();
  auto in_height = in_tensor->Height();
  auto out_tensor = out_tensors_.front();
  auto out_width = out_tensor->Width();
  auto out_height = out_tensor->Height();
  resize_quant_arg_.ratio_x_ = ((1 << 10) * in_width + out_width / 2) / out_width;
  resize_quant_arg_.ratio_y_ = ((1 << 10) * in_height + out_height / 2) / out_height;
  if (align_corners_ && out_width > 1) {
    resize_quant_arg_.ratio_x_ = ((1 << 10) * (in_width - 1) + (out_width - 1) / 2) / (out_width - 1);
  }
  if (align_corners_ && out_height > 1) {
    resize_quant_arg_.ratio_y_ = ((1 << 10) * (in_height - 1) + (out_height - 1) / 2) / (out_height - 1);
  }
  return RET_OK;
 }

 int ResizeInt8CPUKernel::CalInterpolationRange() {
  for (int i = 0; i < out_tensors_.front()->Height(); ++i) {
    int32_t scaled_index = i * resize_quant_arg_.ratio_y_;
    resize_quant_arg_.y_axis_index_[i] = scaled_index;
    resize_quant_arg_.y_axis_lower_[i] = std::max(scaled_index / (1 << 10), 0);
    resize_quant_arg_.y_axis_upper_[i] = std::min(scaled_index / (1 << 10) + 1, in_tensors_.front()->Height() - 1);
  }
  for (int i = 0; i < out_tensors_.front()->Width(); ++i) {
    int32_t scaled_index = i * resize_quant_arg_.ratio_x_;
    resize_quant_arg_.x_axis_index_[i] = scaled_index;
    resize_quant_arg_.x_axis_lower_[i] = std::max(scaled_index / (1 << 10), 0);
    resize_quant_arg_.x_axis_upper_[i] = std::min(scaled_index / (1 << 10) + 1, in_tensors_.front()->Width() - 1);
  }
  return RET_OK;
 }

 int ResizeInt8CPUKernel::InitResizeFloatQuantArg() {
  auto out_shape = out_tensors_.front()->shape();
  resize_float_quant_arg_.x_axis_index_ = reinterpret_cast<float *>(malloc(out_shape[2] * sizeof(float)));
  if (resize_float_quant_arg_.x_axis_index_ == nullptr) {
    MS_LOG(ERROR) << "malloc x axis index array failed.";
    return RET_ERROR;
  }
  resize_float_quant_arg_.x_axis_lower_ = reinterpret_cast<int32_t *>(malloc(out_shape[2] * sizeof(int32_t)));
  if (resize_float_quant_arg_.x_axis_lower_ == nullptr) {
    MS_LOG(ERROR) << "malloc x_axis_lower_ array failed.";
    return RET_ERROR;
  }
  resize_float_quant_arg_.x_axis_upper_ = reinterpret_cast<int32_t *>(malloc(out_shape[2] * sizeof(int32_t)));
  if (resize_float_quant_arg_.x_axis_upper_ == nullptr) {
    MS_LOG(ERROR) << "malloc x_axis_upper_ array failed.";
    return RET_ERROR;
  }
  resize_float_quant_arg_.y_axis_index_ = reinterpret_cast<float *>(malloc(out_shape[1] * sizeof(float)));
  if (resize_float_quant_arg_.y_axis_index_ == nullptr) {
    MS_LOG(ERROR) << "malloc y_axis_index_ array failed.";
    return RET_ERROR;
  }
  resize_float_quant_arg_.y_axis_lower_ = reinterpret_cast<int32_t *>(malloc(out_shape[1] * sizeof(int32_t)));
  if (resize_float_quant_arg_.y_axis_lower_ == nullptr) {
    MS_LOG(ERROR) << "malloc y_axis_lower_ array failed.";
    return RET_ERROR;
  }
  resize_float_quant_arg_.y_axis_upper_ = reinterpret_cast<int32_t *>(malloc(out_shape[1] * sizeof(int32_t)));
  if (resize_float_quant_arg_.y_axis_upper_ == nullptr) {
    MS_LOG(ERROR) << "malloc y_axis_upper_ array failed.";
    return RET_ERROR;
  }
  return RET_OK;
 }

 int ResizeInt8CPUKernel::CalFloatRatio() {
  auto in_tensor = in_tensors_.front();
  auto in_width = in_tensor->Width();
  auto in_height = in_tensor->Height();
  auto out_tensor = out_tensors_.front();
  auto out_width = out_tensor->Width();
  auto out_height = out_tensor->Height();
  resize_float_quant_arg_.ratio_x_ = static_cast<float>(in_width) / out_width;
  resize_float_quant_arg_.ratio_y_ = static_cast<float>(in_height) / out_height;
  if (align_corners_ && out_width > 1) {
    resize_float_quant_arg_.ratio_x_ = static_cast<float>(in_width - 1) / (out_width - 1);
  }
  if (align_corners_ && out_height > 1) {
    resize_float_quant_arg_.ratio_y_ = static_cast<float>(in_height - 1) / (out_height - 1);
  }
  return RET_OK;
 }

 int ResizeInt8CPUKernel::CalFloatInterpolationRange() {
  for (int i = 0; i < out_tensors_.front()->Height(); ++i) {
    float scaled_index = i * resize_float_quant_arg_.ratio_y_;
    int lower_index = std::floor(scaled_index);
    resize_float_quant_arg_.y_axis_index_[i] = scaled_index;
    resize_float_quant_arg_.y_axis_lower_[i] = std::max(lower_index, 0);
    resize_float_quant_arg_.y_axis_upper_[i] = std::min(lower_index + 1, in_tensors_.front()->Height() - 1);
  }
  for (int i = 0; i < out_tensors_.front()->Width(); ++i) {
    float scaled_index = i * resize_float_quant_arg_.ratio_x_;
    int lower_index = std::floor(scaled_index);
    resize_float_quant_arg_.x_axis_index_[i] = scaled_index;
    resize_float_quant_arg_.x_axis_lower_[i] = std::max(lower_index, 0);
    resize_float_quant_arg_.x_axis_upper_[i] = std::min(lower_index + 1, in_tensors_.front()->Width() - 1);
  }
  return RET_OK;
 }

 int ResizeInt8CPUKernel::InitResizeBiLinear() {
  auto ret = InitResizeQuantArg();
  if (ret != RET_OK) {
    MS_LOG(ERROR) << "Resize Int8 Op Resize Failed.";
    return ret;
  }
  ret = CalRatio();
  if (ret != RET_OK) {
    MS_LOG(ERROR) << "Cal ratio Failed.";
    return ret;
  }
  ret = CalInterpolationRange();
  if (ret != RET_OK) {
    MS_LOG(ERROR) << "Cal range of interpolation Failed.";
    return ret;
  }
  return RET_OK;
 }

 int ResizeInt8CPUKernel::InitFloatResizeBiLinear() {
  auto ret = InitResizeFloatQuantArg();
  if (ret != RET_OK) {
    MS_LOG(ERROR) << "Resize Int8 Op Resize Failed.";
    return ret;
  }
  ret = CalFloatRatio();
  if (ret != RET_OK) {
    MS_LOG(ERROR) << "Cal ratio Failed.";
    return ret;
  }
  ret = CalFloatInterpolationRange();
  if (ret != RET_OK) {
    MS_LOG(ERROR) << "Cal range of interpolation Failed.";
    return ret;
  }
  return RET_OK;
 }

 int ResizeInt8CPUKernel::ReSize() {
  if (method_ == schema::ResizeMethod_LINEAR) {
    if (quant_in_->zp_ == 0) {
      return InitResizeBiLinear();
    } else {
      return InitFloatResizeBiLinear();
    }
  }
  return RET_OK;
 }

 int ResizeInt8Impl(void *cdata, int task_id) {
  auto resize = reinterpret_cast<ResizeInt8CPUKernel *>(cdata);
  auto error_code = resize->RunImpl(task_id);
@@ -87,14 +311,24 @@ int ResizeInt8CPUKernel::RunImpl(int task_id) {
  int ret = 0;
  switch (method_) {
    case static_cast<int>(schema::ResizeMethod_LINEAR): {
      auto out_tensor = out_tensors_.front();
      auto out_c = out_tensor->Channel();
      int plane = out_tensor->Height() * out_tensor->Width();
      int num = UP_DIV(plane, context_->thread_num_);
      int start_index = task_id * num;
      int count = plane - start_index;
      count = count > num ? num : count;
      auto out_ptr = output_data + start_index * out_c;
      if (quant_in_->zp_ == 0) {
        ret = ResizeBilinearInt8(input_data, output_data, input_shape.data(), out_tensors_[0]->shape().data(),
                                 align_corners_, quant_in_, quant_out_, multiplier_, task_id, context_->thread_num_);
        ret =
          ResizeBilinearInt8(input_data, out_ptr, out_tensor->Batch(), input->Height(), input->Width(),
                             out_tensor->Height(), out_tensor->Width(), out_c, start_index, count, resize_quant_arg_);
      } else {
        ret = ResizeBilinearInt8WithFloatWeight(input_data, output_data, input_shape.data(),
                                                out_tensors_[0]->shape().data(), align_corners_, quant_in_, quant_out_,
                                                multiplier_, task_id, context_->thread_num_);
        ret = ResizeBilinearWithFloatScaleInt8(input_data, out_ptr, out_tensor->Batch(), input->Height(),
                                               input->Width(), out_tensor->Height(), out_tensor->Width(), out_c,
                                               start_index, count, resize_float_quant_arg_);
      }

      break;
    }
    case static_cast<int>(schema::ResizeMethod_NEAREST): {
--- a/mindspore/lite/src/runtime/kernel/arm/int8/resize_int8.h
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/resize_int8.h
@@ -32,17 +32,20 @@ class ResizeInt8CPUKernel : public ResizeBaseCPUKernel {
                      const mindspore::lite::PrimitiveC *primitive)
      : ResizeBaseCPUKernel(parameter, inputs, outputs, ctx, primitive) {}

  ~ResizeInt8CPUKernel() {
    delete quant_out_;
    quant_out_ = nullptr;
    delete quant_in_;
    quant_in_ = nullptr;
    delete multiplier_;
    multiplier_ = nullptr;
  }
  ~ResizeInt8CPUKernel() override;

  int Init() override;
  int ReSize() override { return 0; };
  int ReSize() override;
  int InitResizeBiLinear();
  int InitFloatResizeBiLinear();
  int InitResizeQuantArg();
  int CalRatio();
  int CalInterpolationRange();
  void FreeResizeBiLinear();
  int InitResizeFloatQuantArg();
  int CalFloatRatio();
  int CalFloatInterpolationRange();
  void FreeFloatResizeBiLinear();
  int Run() override;
  int RunImpl(int task_id);

@@ -50,6 +53,8 @@ class ResizeInt8CPUKernel : public ResizeBaseCPUKernel {
  QuantArg *quant_in_;
  QuantArg *quant_out_;
  QuantMulArg *multiplier_;
  ResizeQuantArg resize_quant_arg_;
  ResizeFloatScaleQuantArg resize_float_quant_arg_;
 };
 }  // namespace mindspore::kernel