Browse Source

optimize int8 resize bilinear

tags/v1.1.0
fuzhiye 5 years ago
parent
commit
e2695df21c
5 changed files with 399 additions and 187 deletions
  1. +119
    -157
      mindspore/lite/nnacl/int8/resize_int8.c
  2. +5
    -16
      mindspore/lite/nnacl/int8/resize_int8.h
  3. +22
    -0
      mindspore/lite/nnacl/quantization/quantize.h
  4. +239
    -5
      mindspore/lite/src/runtime/kernel/arm/int8/resize_int8.cc
  5. +14
    -9
      mindspore/lite/src/runtime/kernel/arm/int8/resize_int8.h

+ 119
- 157
mindspore/lite/nnacl/int8/resize_int8.c View File

@@ -19,129 +19,131 @@
#include "nnacl/quantization/fixed_point.h"
#include "nnacl/errorcode.h"

int ResizeBilinearInt8(const int8_t *input_data, int8_t *output_data, const int *input_shape, const int *output_shape,
const bool align_corners, QuantArg *quant_in, QuantArg *quant_out, const QuantMulArg *mul_arg,
int tid, int thread_num) {
if (input_data == NULL || output_data == NULL || input_shape == NULL || output_shape == NULL) {
return NNACL_NULL_PTR;
}

int32_t in_n = input_shape[0];
int32_t in_h = input_shape[1];
int32_t in_w = input_shape[2];
int32_t in_c = input_shape[3];

int32_t new_height = output_shape[1];
int32_t new_width = output_shape[2];
int32_t height_scale = 0, width_scale = 0;
ComputeScale(in_h, new_height, align_corners, &height_scale);
ComputeScale(in_w, new_width, align_corners, &width_scale);

int n, h, w, c;
for (n = 0; n < in_n; n++) {
for (h = tid; h < new_height; h += thread_num) {
const int base_offset = 20;
int scaled_actual_y;
int bottom, top;
int scaled_bottom_weight, scaled_top_weight;
ComputeInterpolationArgs(h, height_scale, in_h, &scaled_actual_y, &bottom, &scaled_bottom_weight, &top,
&scaled_top_weight);
for (w = 0; w < new_width; w++) {
int scaled_actual_x;
int left, right;
int scaled_left_weight, scaled_right_weight;
ComputeInterpolationArgs(w, width_scale, in_w, &scaled_actual_x, &left, &scaled_left_weight, &right,
&scaled_right_weight);
for (c = 0; c < in_c; c++) {
const int64_t bottom_left_value =
(int64_t)(input_data[offset(input_shape, n, bottom, left, c)] - quant_in->zp_) * scaled_bottom_weight *
scaled_left_weight;
const int64_t bottom_right_value =
(int64_t)(input_data[offset(input_shape, n, bottom, right, c)] - quant_in->zp_) * scaled_bottom_weight *
scaled_right_weight;
const int64_t top_left_value = (int64_t)(input_data[offset(input_shape, n, top, left, c)] - quant_in->zp_) *
scaled_top_weight * scaled_left_weight;
const int64_t top_right_value = (int64_t)(input_data[offset(input_shape, n, top, right, c)] - quant_in->zp_) *
scaled_top_weight * scaled_right_weight;
const int64_t scaled_interp_value = bottom_left_value + bottom_right_value + top_left_value + top_right_value;
int32_t interp_value;
if (scaled_interp_value >= 0) {
interp_value = (scaled_interp_value + (1 << 19)) / (1 << 20);
} else {
interp_value = (scaled_interp_value - (1 << 19)) / (1 << 20);
}

const int out_interp_value =
MultiplyByQuantizedMultiplier(interp_value, mul_arg->multiplier_, mul_arg->left_shift_ + base_offset,
mul_arg->right_shift_ - base_offset) +
quant_out->zp_;
int8_t out_value;
out_value = out_interp_value > INT8_MAX ? INT8_MAX : out_interp_value;
out_value = out_value < INT8_MIN ? INT8_MIN : out_value;
output_data[offset(output_shape, n, h, w, c)] = out_value;
}
int ResizeBilinearInt8(const int8_t *input_ptr, int8_t *output_ptr, int batch, int in_h, int in_w, int out_h, int out_w,
int channel, int index, int count, ResizeQuantArg quant_arg) {
int in_plane = in_h * in_w;
int out_plane = out_h * out_w;
for (int n = 0; n < batch; n++) {
const int8_t *in_b_ptr = input_ptr + n * in_plane * channel;
int8_t *out_b_ptr = output_ptr + n * out_plane * channel;
for (int t = 0; t < count; t++) {
int ori_out_h = (index + t) / out_w;
int ori_out_w = (index + t) % out_w;
int32_t x_lower_value = quant_arg.x_axis_lower_[ori_out_w];
int32_t x_upper_value = quant_arg.x_axis_upper_[ori_out_w];
int32_t y_lower_value = quant_arg.y_axis_lower_[ori_out_h];
int32_t y_upper_value = quant_arg.y_axis_upper_[ori_out_h];
int32_t weight_x = quant_arg.x_axis_index_[ori_out_w] - (1 << 10) * x_lower_value;
int32_t one_minus_weight_x = (1 << 10) - weight_x;
int32_t weight_y = quant_arg.y_axis_index_[ori_out_h] - (1 << 10) * y_lower_value;
int32_t one_minus_weight_y = (1 << 10) - weight_y;
int64_t left_bottom_coef = (int64_t)(one_minus_weight_x * one_minus_weight_y);
int64_t left_top_coef = (int64_t)(weight_y * one_minus_weight_x);
int64_t right_bottom_coef = (int64_t)(weight_x * one_minus_weight_y);
int64_t right_top_coef = (int64_t)(weight_x * weight_y);
int input_lb_index = (y_lower_value * in_w + x_lower_value) * channel;
int input_lt_index = (y_upper_value * in_w + x_lower_value) * channel;
int input_rb_index = (y_lower_value * in_w + x_upper_value) * channel;
int input_rt_index = (y_upper_value * in_w + x_upper_value) * channel;
int c = 0;
for (; c < channel; c++) {
int64_t out_left_bottom = left_bottom_coef * in_b_ptr[input_lb_index];
int64_t out_left_top = left_top_coef * in_b_ptr[input_lt_index];
int64_t out_right_bottom = right_bottom_coef * in_b_ptr[input_rb_index];
int64_t out_right_top = right_top_coef * in_b_ptr[input_rt_index];
int64_t out_value = out_left_bottom + out_left_top + out_right_bottom + out_right_top;
out_b_ptr[0] = (int8_t)((out_value + (1 << 19)) / (1 << 20));
input_lb_index++;
input_lt_index++;
input_rb_index++;
input_rt_index++;
out_b_ptr++;
}
}
}
return NNACL_OK;
}

int ResizeBilinearInt8WithFloatWeight(const int8_t *input_data, int8_t *output_data, const int *input_shape,
const int *output_shape, const bool align_corners, QuantArg *quant_in,
QuantArg *quant_out, const QuantMulArg *mul_arg, int tid, int thread_num) {
if (input_data == NULL || output_data == NULL || input_shape == NULL || output_shape == NULL) {
return NNACL_NULL_PTR;
}

int32_t in_n = input_shape[0];
int32_t in_h = input_shape[1];
int32_t in_w = input_shape[2];
int32_t in_c = input_shape[3];

int32_t new_height = output_shape[1];
int32_t new_width = output_shape[2];
float height_scale, width_scale;
int ret = ComputeScaleFloat(in_h, new_height, align_corners, &height_scale);
if (ret != NNACL_OK) {
return ret;
}
ret = ComputeScaleFloat(in_w, new_width, align_corners, &width_scale);
if (ret != NNACL_OK) {
return ret;
}

int n, h, w, c;
for (n = 0; n < in_n; n++) {
for (h = tid; h < new_height; h += thread_num) {
float actual_y;
int bottom, top;
float bottom_weight, top_weight;
ComputeInterpolationArgsFloatWeight(h, height_scale, in_h, &actual_y, &bottom, &bottom_weight, &top, &top_weight);
for (w = 0; w < new_width; w++) {
float actual_x;
int left, right;
float left_weight, right_weight;
ComputeInterpolationArgsFloatWeight(w, width_scale, in_w, &actual_x, &left, &left_weight, &right,
&right_weight);
for (c = 0; c < in_c; c++) {
float bottom_left_value = ((int32_t)input_data[offset(input_shape, n, bottom, left, c)] - quant_in->zp_) *
bottom_weight * left_weight;
float bottom_right_value = ((int32_t)input_data[offset(input_shape, n, bottom, right, c)] - quant_in->zp_) *
bottom_weight * right_weight;
float top_left_value =
((int32_t)input_data[offset(input_shape, n, top, left, c)] - quant_in->zp_) * top_weight * left_weight;
float top_right_value =
((int32_t)input_data[offset(input_shape, n, top, right, c)] - quant_in->zp_) * top_weight * right_weight;
float interp_value = bottom_left_value + bottom_right_value + top_left_value + top_right_value;

const int out_interp_value = MultiplyByQuantizedMultiplier((int32_t)interp_value, mul_arg->multiplier_,
mul_arg->left_shift_, mul_arg->right_shift_) +
quant_out->zp_;
int8_t out_value;
out_value = out_interp_value > INT8_MAX ? INT8_MAX : out_interp_value;
out_value = out_value < INT8_MIN ? INT8_MIN : out_value;
output_data[offset(output_shape, n, h, w, c)] = out_value;
}
int ResizeBilinearWithFloatScaleInt8(const int8_t *input_ptr, int8_t *output_ptr, int batch, int in_h, int in_w,
int out_h, int out_w, int channel, int index, int count,
ResizeFloatScaleQuantArg quant_arg) {
int in_plane = in_h * in_w;
int out_plane = out_h * out_w;
for (int n = 0; n < batch; n++) {
const int8_t *in_b_ptr = input_ptr + n * in_plane * channel;
int8_t *out_b_ptr = output_ptr + n * out_plane * channel;
for (int t = 0; t < count; t++) {
int ori_out_h = (index + t) / out_w;
int ori_out_w = (index + t) % out_w;
int32_t x_lower_value = quant_arg.x_axis_lower_[ori_out_w];
int32_t x_upper_value = quant_arg.x_axis_upper_[ori_out_w];
int32_t y_lower_value = quant_arg.y_axis_lower_[ori_out_h];
int32_t y_upper_value = quant_arg.y_axis_upper_[ori_out_h];
float weight_x = quant_arg.x_axis_index_[ori_out_w] - x_lower_value;
float one_minus_weight_x = 1 - weight_x;
float weight_y = quant_arg.y_axis_index_[ori_out_h] - y_lower_value;
float one_minus_weight_y = 1 - weight_y;
float left_bottom_coef = one_minus_weight_x * one_minus_weight_y;
float left_top_coef = weight_y * one_minus_weight_x;
float right_bottom_coef = weight_x * one_minus_weight_y;
float right_top_coef = weight_x * weight_y;
int input_lb_index = (y_lower_value * in_w + x_lower_value) * channel;
int input_lt_index = (y_upper_value * in_w + x_lower_value) * channel;
int input_rb_index = (y_lower_value * in_w + x_upper_value) * channel;
int input_rt_index = (y_upper_value * in_w + x_upper_value) * channel;
int c = 0;
#ifdef ENABLE_ARM
for (; c < channel; c += 4) {
float32x4_t in_lb;
in_lb[0] = (float)in_b_ptr[input_lb_index];
in_lb[1] = (float)in_b_ptr[input_lb_index + 1];
in_lb[2] = (float)in_b_ptr[input_lb_index + 2];
in_lb[3] = (float)in_b_ptr[input_lb_index + 3];
float32x4_t out_left_bottom = vmulq_n_f32(in_lb, left_bottom_coef);
float32x4_t in_lt;
in_lt[0] = (float)in_b_ptr[input_lt_index];
in_lt[1] = (float)in_b_ptr[input_lt_index + 1];
in_lt[2] = (float)in_b_ptr[input_lt_index + 2];
in_lt[3] = (float)in_b_ptr[input_lt_index + 3];
float32x4_t out_left_top = vmulq_n_f32(in_lt, left_top_coef);
float32x4_t in_rb;
in_rb[0] = (float)in_b_ptr[input_rb_index];
in_rb[1] = (float)in_b_ptr[input_rb_index + 1];
in_rb[2] = (float)in_b_ptr[input_rb_index + 2];
in_rb[3] = (float)in_b_ptr[input_rb_index + 3];
float32x4_t out_right_bottom = vmulq_n_f32(in_rb, right_bottom_coef);
float32x4_t in_rt;
in_rt[0] = (float)in_b_ptr[input_rt_index];
in_rt[1] = (float)in_b_ptr[input_rt_index + 1];
in_rt[2] = (float)in_b_ptr[input_rt_index + 2];
in_rt[3] = (float)in_b_ptr[input_rt_index + 3];
float32x4_t out_right_top = vmulq_n_f32(in_rt, right_top_coef);
float32x4_t out_value1 = vaddq_f32(out_left_bottom, out_left_top);
float32x4_t out_value2 = vaddq_f32(out_right_top, out_right_bottom);
float32x4_t out_value = vaddq_f32(out_value1, out_value2);
out_b_ptr[0] = (int8_t)(out_value[0]);
out_b_ptr[1] = (int8_t)(out_value[1]);
out_b_ptr[2] = (int8_t)(out_value[2]);
out_b_ptr[3] = (int8_t)(out_value[3]);
input_lb_index += 4;
input_lt_index += 4;
input_rb_index += 4;
input_rt_index += 4;
out_b_ptr += 4;
}
#endif
for (; c < channel; c++) {
float out_left_bottom = left_bottom_coef * in_b_ptr[input_lb_index];
float out_left_top = left_top_coef * in_b_ptr[input_lt_index];
float out_right_bottom = right_bottom_coef * in_b_ptr[input_rb_index];
float out_right_top = right_top_coef * in_b_ptr[input_rt_index];
float out_value = out_left_bottom + out_left_top + out_right_bottom + out_right_top;
out_b_ptr[0] = (int8_t)(out_value);
input_lb_index++;
input_lt_index++;
input_rb_index++;
input_rt_index++;
out_b_ptr++;
}
}
}
@@ -175,46 +177,6 @@ int ResizeNearestNeighborInt8Simple(const int8_t *input_data, int8_t *output_dat
return NNACL_OK;
}

void ComputeScale(const int32_t in_value, const int32_t out_value, const bool align_corners, int32_t *scale) {
if (out_value == 0) {
return;
}
*scale = (in_value * (1 << 10) + out_value / 2) / out_value;
if (align_corners && out_value > 1) {
*scale = ((in_value - 1) * (1 << 10) + (out_value - 1) / 2) / (out_value - 1);
}
}

void ComputeInterpolationArgs(const int32_t pos, const int32_t scale, const int32_t size, int32_t *scaled_pos,
int32_t *low, int32_t *scaled_low_weight, int32_t *high, int32_t *scaled_high_weight) {
*scaled_pos = pos * scale;
int scale_back = *scaled_pos / (1 << 10);
*low = scale_back > 0 ? scale_back : 0;
*scaled_low_weight = (1 << 10) - (*scaled_pos - (1 << 10) * (*low));
*high = scale_back + 1 < size ? scale_back + 1 : size - 1;
*scaled_high_weight = *scaled_pos - (1 << 10) * (*low);
}

int ComputeScaleFloat(const int32_t in_value, const int32_t out_value, const bool align_corners, float *scale) {
if (out_value == 0) {
return NNACL_ERRCODE_DIVISOR_ZERO;
}
*scale = (float)in_value / out_value;
if (align_corners && out_value > 1) {
*scale = (float)(in_value - 1) / (out_value - 1);
}
return NNACL_OK;
}

void ComputeInterpolationArgsFloatWeight(const int32_t pos, const float scale, const int32_t size, float *actual_pos,
int32_t *low, float *low_weight, int32_t *high, float *high_weight) {
*actual_pos = pos * scale;
*low = *actual_pos > 0 ? floor(*actual_pos) : 0;
*low_weight = 1.0 - (*actual_pos - *low);
*high = *low + 1 < size ? *low + 1 : size - 1;
*high_weight = *actual_pos - (*low);
}

void ComputeNearestNeighborInt(const int32_t pos, const int in_size, const int32_t new_size, const bool align_corners,
int32_t *nearest) {
if (new_size == 0) {


+ 5
- 16
mindspore/lite/nnacl/int8/resize_int8.h View File

@@ -27,23 +27,12 @@
#ifdef __cplusplus
extern "C" {
#endif
int ResizeBilinearInt8(const int8_t *input_data, int8_t *output_data, const int *input_shape, const int *output_shape,
const bool align_corners, QuantArg *quant_in, QuantArg *quant_out, const QuantMulArg *mul_arg,
int tid, int thread_num);
int ResizeBilinearInt8(const int8_t *input_ptr, int8_t *output_ptr, int batch, int in_h, int in_w, int out_h, int out_w,
int channel, int index, int count, ResizeQuantArg quant_arg);

int ResizeBilinearInt8WithFloatWeight(const int8_t *input_data, int8_t *output_data, const int *input_shape,
const int *output_shape, const bool align_corners, QuantArg *quant_in,
QuantArg *quant_out, const QuantMulArg *mul_arg, int tid, int thread_num);

void ComputeScale(const int32_t in_value, const int32_t out_value, const bool align_corners, int32_t *scale);

void ComputeInterpolationArgs(const int32_t pos, const int32_t scale, const int32_t size, int32_t *scaled_pos,
int32_t *low, int32_t *scaled_low_weight, int32_t *high, int32_t *scaled_high_weight);

int ComputeScaleFloat(const int32_t in_value, const int32_t out_value, const bool align_corners, float *scale);

void ComputeInterpolationArgsFloatWeight(const int32_t pos, const float scale, const int32_t size, float *actual_pos,
int32_t *low, float *low_weight, int32_t *high, float *high_weight);
int ResizeBilinearWithFloatScaleInt8(const int8_t *input_ptr, int8_t *output_ptr, int batch, int in_h, int in_w,
int out_h, int out_w, int channel, int index, int count,
ResizeFloatScaleQuantArg quant_arg);

int ResizeNearestNeighborInt8Simple(const int8_t *input_data, int8_t *output_data, const int *input_shape,
const int *output_shape, const bool align_corners, int tid, int thread_num);


+ 22
- 0
mindspore/lite/nnacl/quantization/quantize.h View File

@@ -260,6 +260,28 @@ typedef struct LeakyReluQuantArg {
int element_num;
} LeakyReluQuantArg;

typedef struct ResizeQuantArg {
int32_t ratio_x_;
int32_t ratio_y_;
int32_t *x_axis_index_;
int32_t *x_axis_lower_;
int32_t *x_axis_upper_;
int32_t *y_axis_index_;
int32_t *y_axis_lower_;
int32_t *y_axis_upper_;
} ResizeQuantArg;

typedef struct ResizeFloatScaleQuantArg {
float ratio_x_;
float ratio_y_;
float *x_axis_index_;
int32_t *x_axis_lower_;
int32_t *x_axis_upper_;
float *y_axis_index_;
int32_t *y_axis_lower_;
int32_t *y_axis_upper_;
} ResizeFloatScaleQuantArg;

#ifdef __cplusplus
extern "C" {
#endif


+ 239
- 5
mindspore/lite/src/runtime/kernel/arm/int8/resize_int8.cc View File

@@ -16,6 +16,7 @@

#include "src/runtime/kernel/arm/int8/resize_int8.h"
#include <vector>
#include <algorithm>
#include "include/errorcode.h"
#include "nnacl/int8/resize_int8.h"
#include "schema/model_generated.h"
@@ -32,6 +33,40 @@ using mindspore::lite::RET_OK;
using mindspore::lite::KernelRegistrar;

namespace mindspore::kernel {
void ResizeInt8CPUKernel::FreeResizeBiLinear() {
free(resize_quant_arg_.x_axis_index_);
free(resize_quant_arg_.x_axis_lower_);
free(resize_quant_arg_.x_axis_upper_);
free(resize_quant_arg_.y_axis_index_);
free(resize_quant_arg_.y_axis_lower_);
free(resize_quant_arg_.y_axis_upper_);
}

void ResizeInt8CPUKernel::FreeFloatResizeBiLinear() {
free(resize_float_quant_arg_.x_axis_index_);
free(resize_float_quant_arg_.x_axis_lower_);
free(resize_float_quant_arg_.x_axis_upper_);
free(resize_float_quant_arg_.y_axis_index_);
free(resize_float_quant_arg_.y_axis_lower_);
free(resize_float_quant_arg_.y_axis_upper_);
}

ResizeInt8CPUKernel::~ResizeInt8CPUKernel() {
if (method_ == schema::ResizeMethod_LINEAR) {
if (quant_in_->zp_ == 0) {
FreeResizeBiLinear();
} else {
FreeFloatResizeBiLinear();
}
}
delete quant_out_;
quant_out_ = nullptr;
delete quant_in_;
quant_in_ = nullptr;
delete multiplier_;
multiplier_ = nullptr;
}

int ResizeInt8CPUKernel::Init() {
auto ret = ResizeBaseCPUKernel::Init();
if (ret != RET_OK) {
@@ -58,6 +93,195 @@ int ResizeInt8CPUKernel::Init() {
return ReSize();
}

int ResizeInt8CPUKernel::InitResizeQuantArg() {
auto out_shape = out_tensors_.front()->shape();
resize_quant_arg_.x_axis_index_ = reinterpret_cast<int32_t *>(malloc(out_shape[2] * sizeof(int32_t)));
if (resize_quant_arg_.x_axis_index_ == nullptr) {
MS_LOG(ERROR) << "malloc x axis index array failed.";
return RET_ERROR;
}
resize_quant_arg_.x_axis_lower_ = reinterpret_cast<int32_t *>(malloc(out_shape[2] * sizeof(int32_t)));
if (resize_quant_arg_.x_axis_lower_ == nullptr) {
MS_LOG(ERROR) << "malloc x_axis_lower_ array failed.";
return RET_ERROR;
}
resize_quant_arg_.x_axis_upper_ = reinterpret_cast<int32_t *>(malloc(out_shape[2] * sizeof(int32_t)));
if (resize_quant_arg_.x_axis_upper_ == nullptr) {
MS_LOG(ERROR) << "malloc x_axis_upper_ array failed.";
return RET_ERROR;
}
resize_quant_arg_.y_axis_index_ = reinterpret_cast<int32_t *>(malloc(out_shape[1] * sizeof(int32_t)));
if (resize_quant_arg_.y_axis_index_ == nullptr) {
MS_LOG(ERROR) << "malloc y_axis_index_ array failed.";
return RET_ERROR;
}
resize_quant_arg_.y_axis_lower_ = reinterpret_cast<int32_t *>(malloc(out_shape[1] * sizeof(int32_t)));
if (resize_quant_arg_.y_axis_lower_ == nullptr) {
MS_LOG(ERROR) << "malloc y_axis_lower_ array failed.";
return RET_ERROR;
}
resize_quant_arg_.y_axis_upper_ = reinterpret_cast<int32_t *>(malloc(out_shape[1] * sizeof(int32_t)));
if (resize_quant_arg_.y_axis_upper_ == nullptr) {
MS_LOG(ERROR) << "malloc y_axis_upper_ array failed.";
return RET_ERROR;
}
return RET_OK;
}

int ResizeInt8CPUKernel::CalRatio() {
auto in_tensor = in_tensors_.front();
auto in_width = in_tensor->Width();
auto in_height = in_tensor->Height();
auto out_tensor = out_tensors_.front();
auto out_width = out_tensor->Width();
auto out_height = out_tensor->Height();
resize_quant_arg_.ratio_x_ = ((1 << 10) * in_width + out_width / 2) / out_width;
resize_quant_arg_.ratio_y_ = ((1 << 10) * in_height + out_height / 2) / out_height;
if (align_corners_ && out_width > 1) {
resize_quant_arg_.ratio_x_ = ((1 << 10) * (in_width - 1) + (out_width - 1) / 2) / (out_width - 1);
}
if (align_corners_ && out_height > 1) {
resize_quant_arg_.ratio_y_ = ((1 << 10) * (in_height - 1) + (out_height - 1) / 2) / (out_height - 1);
}
return RET_OK;
}

int ResizeInt8CPUKernel::CalInterpolationRange() {
for (int i = 0; i < out_tensors_.front()->Height(); ++i) {
int32_t scaled_index = i * resize_quant_arg_.ratio_y_;
resize_quant_arg_.y_axis_index_[i] = scaled_index;
resize_quant_arg_.y_axis_lower_[i] = std::max(scaled_index / (1 << 10), 0);
resize_quant_arg_.y_axis_upper_[i] = std::min(scaled_index / (1 << 10) + 1, in_tensors_.front()->Height() - 1);
}
for (int i = 0; i < out_tensors_.front()->Width(); ++i) {
int32_t scaled_index = i * resize_quant_arg_.ratio_x_;
resize_quant_arg_.x_axis_index_[i] = scaled_index;
resize_quant_arg_.x_axis_lower_[i] = std::max(scaled_index / (1 << 10), 0);
resize_quant_arg_.x_axis_upper_[i] = std::min(scaled_index / (1 << 10) + 1, in_tensors_.front()->Width() - 1);
}
return RET_OK;
}

int ResizeInt8CPUKernel::InitResizeFloatQuantArg() {
auto out_shape = out_tensors_.front()->shape();
resize_float_quant_arg_.x_axis_index_ = reinterpret_cast<float *>(malloc(out_shape[2] * sizeof(float)));
if (resize_float_quant_arg_.x_axis_index_ == nullptr) {
MS_LOG(ERROR) << "malloc x axis index array failed.";
return RET_ERROR;
}
resize_float_quant_arg_.x_axis_lower_ = reinterpret_cast<int32_t *>(malloc(out_shape[2] * sizeof(int32_t)));
if (resize_float_quant_arg_.x_axis_lower_ == nullptr) {
MS_LOG(ERROR) << "malloc x_axis_lower_ array failed.";
return RET_ERROR;
}
resize_float_quant_arg_.x_axis_upper_ = reinterpret_cast<int32_t *>(malloc(out_shape[2] * sizeof(int32_t)));
if (resize_float_quant_arg_.x_axis_upper_ == nullptr) {
MS_LOG(ERROR) << "malloc x_axis_upper_ array failed.";
return RET_ERROR;
}
resize_float_quant_arg_.y_axis_index_ = reinterpret_cast<float *>(malloc(out_shape[1] * sizeof(float)));
if (resize_float_quant_arg_.y_axis_index_ == nullptr) {
MS_LOG(ERROR) << "malloc y_axis_index_ array failed.";
return RET_ERROR;
}
resize_float_quant_arg_.y_axis_lower_ = reinterpret_cast<int32_t *>(malloc(out_shape[1] * sizeof(int32_t)));
if (resize_float_quant_arg_.y_axis_lower_ == nullptr) {
MS_LOG(ERROR) << "malloc y_axis_lower_ array failed.";
return RET_ERROR;
}
resize_float_quant_arg_.y_axis_upper_ = reinterpret_cast<int32_t *>(malloc(out_shape[1] * sizeof(int32_t)));
if (resize_float_quant_arg_.y_axis_upper_ == nullptr) {
MS_LOG(ERROR) << "malloc y_axis_upper_ array failed.";
return RET_ERROR;
}
return RET_OK;
}

int ResizeInt8CPUKernel::CalFloatRatio() {
auto in_tensor = in_tensors_.front();
auto in_width = in_tensor->Width();
auto in_height = in_tensor->Height();
auto out_tensor = out_tensors_.front();
auto out_width = out_tensor->Width();
auto out_height = out_tensor->Height();
resize_float_quant_arg_.ratio_x_ = static_cast<float>(in_width) / out_width;
resize_float_quant_arg_.ratio_y_ = static_cast<float>(in_height) / out_height;
if (align_corners_ && out_width > 1) {
resize_float_quant_arg_.ratio_x_ = static_cast<float>(in_width - 1) / (out_width - 1);
}
if (align_corners_ && out_height > 1) {
resize_float_quant_arg_.ratio_y_ = static_cast<float>(in_height - 1) / (out_height - 1);
}
return RET_OK;
}

int ResizeInt8CPUKernel::CalFloatInterpolationRange() {
for (int i = 0; i < out_tensors_.front()->Height(); ++i) {
float scaled_index = i * resize_float_quant_arg_.ratio_y_;
int lower_index = std::floor(scaled_index);
resize_float_quant_arg_.y_axis_index_[i] = scaled_index;
resize_float_quant_arg_.y_axis_lower_[i] = std::max(lower_index, 0);
resize_float_quant_arg_.y_axis_upper_[i] = std::min(lower_index + 1, in_tensors_.front()->Height() - 1);
}
for (int i = 0; i < out_tensors_.front()->Width(); ++i) {
float scaled_index = i * resize_float_quant_arg_.ratio_x_;
int lower_index = std::floor(scaled_index);
resize_float_quant_arg_.x_axis_index_[i] = scaled_index;
resize_float_quant_arg_.x_axis_lower_[i] = std::max(lower_index, 0);
resize_float_quant_arg_.x_axis_upper_[i] = std::min(lower_index + 1, in_tensors_.front()->Width() - 1);
}
return RET_OK;
}

int ResizeInt8CPUKernel::InitResizeBiLinear() {
auto ret = InitResizeQuantArg();
if (ret != RET_OK) {
MS_LOG(ERROR) << "Resize Int8 Op Resize Failed.";
return ret;
}
ret = CalRatio();
if (ret != RET_OK) {
MS_LOG(ERROR) << "Cal ratio Failed.";
return ret;
}
ret = CalInterpolationRange();
if (ret != RET_OK) {
MS_LOG(ERROR) << "Cal range of interpolation Failed.";
return ret;
}
return RET_OK;
}

int ResizeInt8CPUKernel::InitFloatResizeBiLinear() {
auto ret = InitResizeFloatQuantArg();
if (ret != RET_OK) {
MS_LOG(ERROR) << "Resize Int8 Op Resize Failed.";
return ret;
}
ret = CalFloatRatio();
if (ret != RET_OK) {
MS_LOG(ERROR) << "Cal ratio Failed.";
return ret;
}
ret = CalFloatInterpolationRange();
if (ret != RET_OK) {
MS_LOG(ERROR) << "Cal range of interpolation Failed.";
return ret;
}
return RET_OK;
}

int ResizeInt8CPUKernel::ReSize() {
if (method_ == schema::ResizeMethod_LINEAR) {
if (quant_in_->zp_ == 0) {
return InitResizeBiLinear();
} else {
return InitFloatResizeBiLinear();
}
}
return RET_OK;
}

int ResizeInt8Impl(void *cdata, int task_id) {
auto resize = reinterpret_cast<ResizeInt8CPUKernel *>(cdata);
auto error_code = resize->RunImpl(task_id);
@@ -87,14 +311,24 @@ int ResizeInt8CPUKernel::RunImpl(int task_id) {
int ret = 0;
switch (method_) {
case static_cast<int>(schema::ResizeMethod_LINEAR): {
auto out_tensor = out_tensors_.front();
auto out_c = out_tensor->Channel();
int plane = out_tensor->Height() * out_tensor->Width();
int num = UP_DIV(plane, context_->thread_num_);
int start_index = task_id * num;
int count = plane - start_index;
count = count > num ? num : count;
auto out_ptr = output_data + start_index * out_c;
if (quant_in_->zp_ == 0) {
ret = ResizeBilinearInt8(input_data, output_data, input_shape.data(), out_tensors_[0]->shape().data(),
align_corners_, quant_in_, quant_out_, multiplier_, task_id, context_->thread_num_);
ret =
ResizeBilinearInt8(input_data, out_ptr, out_tensor->Batch(), input->Height(), input->Width(),
out_tensor->Height(), out_tensor->Width(), out_c, start_index, count, resize_quant_arg_);
} else {
ret = ResizeBilinearInt8WithFloatWeight(input_data, output_data, input_shape.data(),
out_tensors_[0]->shape().data(), align_corners_, quant_in_, quant_out_,
multiplier_, task_id, context_->thread_num_);
ret = ResizeBilinearWithFloatScaleInt8(input_data, out_ptr, out_tensor->Batch(), input->Height(),
input->Width(), out_tensor->Height(), out_tensor->Width(), out_c,
start_index, count, resize_float_quant_arg_);
}

break;
}
case static_cast<int>(schema::ResizeMethod_NEAREST): {


+ 14
- 9
mindspore/lite/src/runtime/kernel/arm/int8/resize_int8.h View File

@@ -32,17 +32,20 @@ class ResizeInt8CPUKernel : public ResizeBaseCPUKernel {
const mindspore::lite::PrimitiveC *primitive)
: ResizeBaseCPUKernel(parameter, inputs, outputs, ctx, primitive) {}

~ResizeInt8CPUKernel() {
delete quant_out_;
quant_out_ = nullptr;
delete quant_in_;
quant_in_ = nullptr;
delete multiplier_;
multiplier_ = nullptr;
}
~ResizeInt8CPUKernel() override;

int Init() override;
int ReSize() override { return 0; };
int ReSize() override;
int InitResizeBiLinear();
int InitFloatResizeBiLinear();
int InitResizeQuantArg();
int CalRatio();
int CalInterpolationRange();
void FreeResizeBiLinear();
int InitResizeFloatQuantArg();
int CalFloatRatio();
int CalFloatInterpolationRange();
void FreeFloatResizeBiLinear();
int Run() override;
int RunImpl(int task_id);

@@ -50,6 +53,8 @@ class ResizeInt8CPUKernel : public ResizeBaseCPUKernel {
QuantArg *quant_in_;
QuantArg *quant_out_;
QuantMulArg *multiplier_;
ResizeQuantArg resize_quant_arg_;
ResizeFloatScaleQuantArg resize_float_quant_arg_;
};
} // namespace mindspore::kernel



Loading…
Cancel
Save