diff --git a/mindspore/lite/nnacl/int8/resize_int8.c b/mindspore/lite/nnacl/int8/resize_int8.c index 84d937f5a6..c8c3cc61af 100644 --- a/mindspore/lite/nnacl/int8/resize_int8.c +++ b/mindspore/lite/nnacl/int8/resize_int8.c @@ -19,129 +19,131 @@ #include "nnacl/quantization/fixed_point.h" #include "nnacl/errorcode.h" -int ResizeBilinearInt8(const int8_t *input_data, int8_t *output_data, const int *input_shape, const int *output_shape, - const bool align_corners, QuantArg *quant_in, QuantArg *quant_out, const QuantMulArg *mul_arg, - int tid, int thread_num) { - if (input_data == NULL || output_data == NULL || input_shape == NULL || output_shape == NULL) { - return NNACL_NULL_PTR; - } - - int32_t in_n = input_shape[0]; - int32_t in_h = input_shape[1]; - int32_t in_w = input_shape[2]; - int32_t in_c = input_shape[3]; - - int32_t new_height = output_shape[1]; - int32_t new_width = output_shape[2]; - int32_t height_scale = 0, width_scale = 0; - ComputeScale(in_h, new_height, align_corners, &height_scale); - ComputeScale(in_w, new_width, align_corners, &width_scale); - - int n, h, w, c; - for (n = 0; n < in_n; n++) { - for (h = tid; h < new_height; h += thread_num) { - const int base_offset = 20; - int scaled_actual_y; - int bottom, top; - int scaled_bottom_weight, scaled_top_weight; - ComputeInterpolationArgs(h, height_scale, in_h, &scaled_actual_y, &bottom, &scaled_bottom_weight, &top, - &scaled_top_weight); - for (w = 0; w < new_width; w++) { - int scaled_actual_x; - int left, right; - int scaled_left_weight, scaled_right_weight; - ComputeInterpolationArgs(w, width_scale, in_w, &scaled_actual_x, &left, &scaled_left_weight, &right, - &scaled_right_weight); - for (c = 0; c < in_c; c++) { - const int64_t bottom_left_value = - (int64_t)(input_data[offset(input_shape, n, bottom, left, c)] - quant_in->zp_) * scaled_bottom_weight * - scaled_left_weight; - const int64_t bottom_right_value = - (int64_t)(input_data[offset(input_shape, n, bottom, right, c)] - quant_in->zp_) * scaled_bottom_weight * - scaled_right_weight; - const int64_t top_left_value = (int64_t)(input_data[offset(input_shape, n, top, left, c)] - quant_in->zp_) * - scaled_top_weight * scaled_left_weight; - const int64_t top_right_value = (int64_t)(input_data[offset(input_shape, n, top, right, c)] - quant_in->zp_) * - scaled_top_weight * scaled_right_weight; - const int64_t scaled_interp_value = bottom_left_value + bottom_right_value + top_left_value + top_right_value; - int32_t interp_value; - if (scaled_interp_value >= 0) { - interp_value = (scaled_interp_value + (1 << 19)) / (1 << 20); - } else { - interp_value = (scaled_interp_value - (1 << 19)) / (1 << 20); - } - - const int out_interp_value = - MultiplyByQuantizedMultiplier(interp_value, mul_arg->multiplier_, mul_arg->left_shift_ + base_offset, - mul_arg->right_shift_ - base_offset) + - quant_out->zp_; - int8_t out_value; - out_value = out_interp_value > INT8_MAX ? INT8_MAX : out_interp_value; - out_value = out_value < INT8_MIN ? INT8_MIN : out_value; - output_data[offset(output_shape, n, h, w, c)] = out_value; - } +int ResizeBilinearInt8(const int8_t *input_ptr, int8_t *output_ptr, int batch, int in_h, int in_w, int out_h, int out_w, + int channel, int index, int count, ResizeQuantArg quant_arg) { + int in_plane = in_h * in_w; + int out_plane = out_h * out_w; + for (int n = 0; n < batch; n++) { + const int8_t *in_b_ptr = input_ptr + n * in_plane * channel; + int8_t *out_b_ptr = output_ptr + n * out_plane * channel; + for (int t = 0; t < count; t++) { + int ori_out_h = (index + t) / out_w; + int ori_out_w = (index + t) % out_w; + int32_t x_lower_value = quant_arg.x_axis_lower_[ori_out_w]; + int32_t x_upper_value = quant_arg.x_axis_upper_[ori_out_w]; + int32_t y_lower_value = quant_arg.y_axis_lower_[ori_out_h]; + int32_t y_upper_value = quant_arg.y_axis_upper_[ori_out_h]; + int32_t weight_x = quant_arg.x_axis_index_[ori_out_w] - (1 << 10) * x_lower_value; + int32_t one_minus_weight_x = (1 << 10) - weight_x; + int32_t weight_y = quant_arg.y_axis_index_[ori_out_h] - (1 << 10) * y_lower_value; + int32_t one_minus_weight_y = (1 << 10) - weight_y; + int64_t left_bottom_coef = (int64_t)(one_minus_weight_x * one_minus_weight_y); + int64_t left_top_coef = (int64_t)(weight_y * one_minus_weight_x); + int64_t right_bottom_coef = (int64_t)(weight_x * one_minus_weight_y); + int64_t right_top_coef = (int64_t)(weight_x * weight_y); + int input_lb_index = (y_lower_value * in_w + x_lower_value) * channel; + int input_lt_index = (y_upper_value * in_w + x_lower_value) * channel; + int input_rb_index = (y_lower_value * in_w + x_upper_value) * channel; + int input_rt_index = (y_upper_value * in_w + x_upper_value) * channel; + int c = 0; + for (; c < channel; c++) { + int64_t out_left_bottom = left_bottom_coef * in_b_ptr[input_lb_index]; + int64_t out_left_top = left_top_coef * in_b_ptr[input_lt_index]; + int64_t out_right_bottom = right_bottom_coef * in_b_ptr[input_rb_index]; + int64_t out_right_top = right_top_coef * in_b_ptr[input_rt_index]; + int64_t out_value = out_left_bottom + out_left_top + out_right_bottom + out_right_top; + out_b_ptr[0] = (int8_t)((out_value + (1 << 19)) / (1 << 20)); + input_lb_index++; + input_lt_index++; + input_rb_index++; + input_rt_index++; + out_b_ptr++; } } } return NNACL_OK; } -int ResizeBilinearInt8WithFloatWeight(const int8_t *input_data, int8_t *output_data, const int *input_shape, - const int *output_shape, const bool align_corners, QuantArg *quant_in, - QuantArg *quant_out, const QuantMulArg *mul_arg, int tid, int thread_num) { - if (input_data == NULL || output_data == NULL || input_shape == NULL || output_shape == NULL) { - return NNACL_NULL_PTR; - } - - int32_t in_n = input_shape[0]; - int32_t in_h = input_shape[1]; - int32_t in_w = input_shape[2]; - int32_t in_c = input_shape[3]; - - int32_t new_height = output_shape[1]; - int32_t new_width = output_shape[2]; - float height_scale, width_scale; - int ret = ComputeScaleFloat(in_h, new_height, align_corners, &height_scale); - if (ret != NNACL_OK) { - return ret; - } - ret = ComputeScaleFloat(in_w, new_width, align_corners, &width_scale); - if (ret != NNACL_OK) { - return ret; - } - - int n, h, w, c; - for (n = 0; n < in_n; n++) { - for (h = tid; h < new_height; h += thread_num) { - float actual_y; - int bottom, top; - float bottom_weight, top_weight; - ComputeInterpolationArgsFloatWeight(h, height_scale, in_h, &actual_y, &bottom, &bottom_weight, &top, &top_weight); - for (w = 0; w < new_width; w++) { - float actual_x; - int left, right; - float left_weight, right_weight; - ComputeInterpolationArgsFloatWeight(w, width_scale, in_w, &actual_x, &left, &left_weight, &right, - &right_weight); - for (c = 0; c < in_c; c++) { - float bottom_left_value = ((int32_t)input_data[offset(input_shape, n, bottom, left, c)] - quant_in->zp_) * - bottom_weight * left_weight; - float bottom_right_value = ((int32_t)input_data[offset(input_shape, n, bottom, right, c)] - quant_in->zp_) * - bottom_weight * right_weight; - float top_left_value = - ((int32_t)input_data[offset(input_shape, n, top, left, c)] - quant_in->zp_) * top_weight * left_weight; - float top_right_value = - ((int32_t)input_data[offset(input_shape, n, top, right, c)] - quant_in->zp_) * top_weight * right_weight; - float interp_value = bottom_left_value + bottom_right_value + top_left_value + top_right_value; - - const int out_interp_value = MultiplyByQuantizedMultiplier((int32_t)interp_value, mul_arg->multiplier_, - mul_arg->left_shift_, mul_arg->right_shift_) + - quant_out->zp_; - int8_t out_value; - out_value = out_interp_value > INT8_MAX ? INT8_MAX : out_interp_value; - out_value = out_value < INT8_MIN ? INT8_MIN : out_value; - output_data[offset(output_shape, n, h, w, c)] = out_value; - } +int ResizeBilinearWithFloatScaleInt8(const int8_t *input_ptr, int8_t *output_ptr, int batch, int in_h, int in_w, + int out_h, int out_w, int channel, int index, int count, + ResizeFloatScaleQuantArg quant_arg) { + int in_plane = in_h * in_w; + int out_plane = out_h * out_w; + for (int n = 0; n < batch; n++) { + const int8_t *in_b_ptr = input_ptr + n * in_plane * channel; + int8_t *out_b_ptr = output_ptr + n * out_plane * channel; + for (int t = 0; t < count; t++) { + int ori_out_h = (index + t) / out_w; + int ori_out_w = (index + t) % out_w; + int32_t x_lower_value = quant_arg.x_axis_lower_[ori_out_w]; + int32_t x_upper_value = quant_arg.x_axis_upper_[ori_out_w]; + int32_t y_lower_value = quant_arg.y_axis_lower_[ori_out_h]; + int32_t y_upper_value = quant_arg.y_axis_upper_[ori_out_h]; + float weight_x = quant_arg.x_axis_index_[ori_out_w] - x_lower_value; + float one_minus_weight_x = 1 - weight_x; + float weight_y = quant_arg.y_axis_index_[ori_out_h] - y_lower_value; + float one_minus_weight_y = 1 - weight_y; + float left_bottom_coef = one_minus_weight_x * one_minus_weight_y; + float left_top_coef = weight_y * one_minus_weight_x; + float right_bottom_coef = weight_x * one_minus_weight_y; + float right_top_coef = weight_x * weight_y; + int input_lb_index = (y_lower_value * in_w + x_lower_value) * channel; + int input_lt_index = (y_upper_value * in_w + x_lower_value) * channel; + int input_rb_index = (y_lower_value * in_w + x_upper_value) * channel; + int input_rt_index = (y_upper_value * in_w + x_upper_value) * channel; + int c = 0; +#ifdef ENABLE_ARM + for (; c < channel; c += 4) { + float32x4_t in_lb; + in_lb[0] = (float)in_b_ptr[input_lb_index]; + in_lb[1] = (float)in_b_ptr[input_lb_index + 1]; + in_lb[2] = (float)in_b_ptr[input_lb_index + 2]; + in_lb[3] = (float)in_b_ptr[input_lb_index + 3]; + float32x4_t out_left_bottom = vmulq_n_f32(in_lb, left_bottom_coef); + float32x4_t in_lt; + in_lt[0] = (float)in_b_ptr[input_lt_index]; + in_lt[1] = (float)in_b_ptr[input_lt_index + 1]; + in_lt[2] = (float)in_b_ptr[input_lt_index + 2]; + in_lt[3] = (float)in_b_ptr[input_lt_index + 3]; + float32x4_t out_left_top = vmulq_n_f32(in_lt, left_top_coef); + float32x4_t in_rb; + in_rb[0] = (float)in_b_ptr[input_rb_index]; + in_rb[1] = (float)in_b_ptr[input_rb_index + 1]; + in_rb[2] = (float)in_b_ptr[input_rb_index + 2]; + in_rb[3] = (float)in_b_ptr[input_rb_index + 3]; + float32x4_t out_right_bottom = vmulq_n_f32(in_rb, right_bottom_coef); + float32x4_t in_rt; + in_rt[0] = (float)in_b_ptr[input_rt_index]; + in_rt[1] = (float)in_b_ptr[input_rt_index + 1]; + in_rt[2] = (float)in_b_ptr[input_rt_index + 2]; + in_rt[3] = (float)in_b_ptr[input_rt_index + 3]; + float32x4_t out_right_top = vmulq_n_f32(in_rt, right_top_coef); + float32x4_t out_value1 = vaddq_f32(out_left_bottom, out_left_top); + float32x4_t out_value2 = vaddq_f32(out_right_top, out_right_bottom); + float32x4_t out_value = vaddq_f32(out_value1, out_value2); + out_b_ptr[0] = (int8_t)(out_value[0]); + out_b_ptr[1] = (int8_t)(out_value[1]); + out_b_ptr[2] = (int8_t)(out_value[2]); + out_b_ptr[3] = (int8_t)(out_value[3]); + input_lb_index += 4; + input_lt_index += 4; + input_rb_index += 4; + input_rt_index += 4; + out_b_ptr += 4; + } +#endif + for (; c < channel; c++) { + float out_left_bottom = left_bottom_coef * in_b_ptr[input_lb_index]; + float out_left_top = left_top_coef * in_b_ptr[input_lt_index]; + float out_right_bottom = right_bottom_coef * in_b_ptr[input_rb_index]; + float out_right_top = right_top_coef * in_b_ptr[input_rt_index]; + float out_value = out_left_bottom + out_left_top + out_right_bottom + out_right_top; + out_b_ptr[0] = (int8_t)(out_value); + input_lb_index++; + input_lt_index++; + input_rb_index++; + input_rt_index++; + out_b_ptr++; } } } @@ -175,46 +177,6 @@ int ResizeNearestNeighborInt8Simple(const int8_t *input_data, int8_t *output_dat return NNACL_OK; } -void ComputeScale(const int32_t in_value, const int32_t out_value, const bool align_corners, int32_t *scale) { - if (out_value == 0) { - return; - } - *scale = (in_value * (1 << 10) + out_value / 2) / out_value; - if (align_corners && out_value > 1) { - *scale = ((in_value - 1) * (1 << 10) + (out_value - 1) / 2) / (out_value - 1); - } -} - -void ComputeInterpolationArgs(const int32_t pos, const int32_t scale, const int32_t size, int32_t *scaled_pos, - int32_t *low, int32_t *scaled_low_weight, int32_t *high, int32_t *scaled_high_weight) { - *scaled_pos = pos * scale; - int scale_back = *scaled_pos / (1 << 10); - *low = scale_back > 0 ? scale_back : 0; - *scaled_low_weight = (1 << 10) - (*scaled_pos - (1 << 10) * (*low)); - *high = scale_back + 1 < size ? scale_back + 1 : size - 1; - *scaled_high_weight = *scaled_pos - (1 << 10) * (*low); -} - -int ComputeScaleFloat(const int32_t in_value, const int32_t out_value, const bool align_corners, float *scale) { - if (out_value == 0) { - return NNACL_ERRCODE_DIVISOR_ZERO; - } - *scale = (float)in_value / out_value; - if (align_corners && out_value > 1) { - *scale = (float)(in_value - 1) / (out_value - 1); - } - return NNACL_OK; -} - -void ComputeInterpolationArgsFloatWeight(const int32_t pos, const float scale, const int32_t size, float *actual_pos, - int32_t *low, float *low_weight, int32_t *high, float *high_weight) { - *actual_pos = pos * scale; - *low = *actual_pos > 0 ? floor(*actual_pos) : 0; - *low_weight = 1.0 - (*actual_pos - *low); - *high = *low + 1 < size ? *low + 1 : size - 1; - *high_weight = *actual_pos - (*low); -} - void ComputeNearestNeighborInt(const int32_t pos, const int in_size, const int32_t new_size, const bool align_corners, int32_t *nearest) { if (new_size == 0) { diff --git a/mindspore/lite/nnacl/int8/resize_int8.h b/mindspore/lite/nnacl/int8/resize_int8.h index d13c78c90c..438bba4d7e 100644 --- a/mindspore/lite/nnacl/int8/resize_int8.h +++ b/mindspore/lite/nnacl/int8/resize_int8.h @@ -27,23 +27,12 @@ #ifdef __cplusplus extern "C" { #endif -int ResizeBilinearInt8(const int8_t *input_data, int8_t *output_data, const int *input_shape, const int *output_shape, - const bool align_corners, QuantArg *quant_in, QuantArg *quant_out, const QuantMulArg *mul_arg, - int tid, int thread_num); +int ResizeBilinearInt8(const int8_t *input_ptr, int8_t *output_ptr, int batch, int in_h, int in_w, int out_h, int out_w, + int channel, int index, int count, ResizeQuantArg quant_arg); -int ResizeBilinearInt8WithFloatWeight(const int8_t *input_data, int8_t *output_data, const int *input_shape, - const int *output_shape, const bool align_corners, QuantArg *quant_in, - QuantArg *quant_out, const QuantMulArg *mul_arg, int tid, int thread_num); - -void ComputeScale(const int32_t in_value, const int32_t out_value, const bool align_corners, int32_t *scale); - -void ComputeInterpolationArgs(const int32_t pos, const int32_t scale, const int32_t size, int32_t *scaled_pos, - int32_t *low, int32_t *scaled_low_weight, int32_t *high, int32_t *scaled_high_weight); - -int ComputeScaleFloat(const int32_t in_value, const int32_t out_value, const bool align_corners, float *scale); - -void ComputeInterpolationArgsFloatWeight(const int32_t pos, const float scale, const int32_t size, float *actual_pos, - int32_t *low, float *low_weight, int32_t *high, float *high_weight); +int ResizeBilinearWithFloatScaleInt8(const int8_t *input_ptr, int8_t *output_ptr, int batch, int in_h, int in_w, + int out_h, int out_w, int channel, int index, int count, + ResizeFloatScaleQuantArg quant_arg); int ResizeNearestNeighborInt8Simple(const int8_t *input_data, int8_t *output_data, const int *input_shape, const int *output_shape, const bool align_corners, int tid, int thread_num); diff --git a/mindspore/lite/nnacl/quantization/quantize.h b/mindspore/lite/nnacl/quantization/quantize.h index f0e8f9a873..b43bbc1931 100644 --- a/mindspore/lite/nnacl/quantization/quantize.h +++ b/mindspore/lite/nnacl/quantization/quantize.h @@ -260,6 +260,28 @@ typedef struct LeakyReluQuantArg { int element_num; } LeakyReluQuantArg; +typedef struct ResizeQuantArg { + int32_t ratio_x_; + int32_t ratio_y_; + int32_t *x_axis_index_; + int32_t *x_axis_lower_; + int32_t *x_axis_upper_; + int32_t *y_axis_index_; + int32_t *y_axis_lower_; + int32_t *y_axis_upper_; +} ResizeQuantArg; + +typedef struct ResizeFloatScaleQuantArg { + float ratio_x_; + float ratio_y_; + float *x_axis_index_; + int32_t *x_axis_lower_; + int32_t *x_axis_upper_; + float *y_axis_index_; + int32_t *y_axis_lower_; + int32_t *y_axis_upper_; +} ResizeFloatScaleQuantArg; + #ifdef __cplusplus extern "C" { #endif diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/resize_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/resize_int8.cc index b8710e2f73..8c37c43920 100644 --- a/mindspore/lite/src/runtime/kernel/arm/int8/resize_int8.cc +++ b/mindspore/lite/src/runtime/kernel/arm/int8/resize_int8.cc @@ -16,6 +16,7 @@ #include "src/runtime/kernel/arm/int8/resize_int8.h" #include +#include #include "include/errorcode.h" #include "nnacl/int8/resize_int8.h" #include "schema/model_generated.h" @@ -32,6 +33,40 @@ using mindspore::lite::RET_OK; using mindspore::lite::KernelRegistrar; namespace mindspore::kernel { +void ResizeInt8CPUKernel::FreeResizeBiLinear() { + free(resize_quant_arg_.x_axis_index_); + free(resize_quant_arg_.x_axis_lower_); + free(resize_quant_arg_.x_axis_upper_); + free(resize_quant_arg_.y_axis_index_); + free(resize_quant_arg_.y_axis_lower_); + free(resize_quant_arg_.y_axis_upper_); +} + +void ResizeInt8CPUKernel::FreeFloatResizeBiLinear() { + free(resize_float_quant_arg_.x_axis_index_); + free(resize_float_quant_arg_.x_axis_lower_); + free(resize_float_quant_arg_.x_axis_upper_); + free(resize_float_quant_arg_.y_axis_index_); + free(resize_float_quant_arg_.y_axis_lower_); + free(resize_float_quant_arg_.y_axis_upper_); +} + +ResizeInt8CPUKernel::~ResizeInt8CPUKernel() { + if (method_ == schema::ResizeMethod_LINEAR) { + if (quant_in_->zp_ == 0) { + FreeResizeBiLinear(); + } else { + FreeFloatResizeBiLinear(); + } + } + delete quant_out_; + quant_out_ = nullptr; + delete quant_in_; + quant_in_ = nullptr; + delete multiplier_; + multiplier_ = nullptr; +} + int ResizeInt8CPUKernel::Init() { auto ret = ResizeBaseCPUKernel::Init(); if (ret != RET_OK) { @@ -58,6 +93,195 @@ int ResizeInt8CPUKernel::Init() { return ReSize(); } +int ResizeInt8CPUKernel::InitResizeQuantArg() { + auto out_shape = out_tensors_.front()->shape(); + resize_quant_arg_.x_axis_index_ = reinterpret_cast(malloc(out_shape[2] * sizeof(int32_t))); + if (resize_quant_arg_.x_axis_index_ == nullptr) { + MS_LOG(ERROR) << "malloc x axis index array failed."; + return RET_ERROR; + } + resize_quant_arg_.x_axis_lower_ = reinterpret_cast(malloc(out_shape[2] * sizeof(int32_t))); + if (resize_quant_arg_.x_axis_lower_ == nullptr) { + MS_LOG(ERROR) << "malloc x_axis_lower_ array failed."; + return RET_ERROR; + } + resize_quant_arg_.x_axis_upper_ = reinterpret_cast(malloc(out_shape[2] * sizeof(int32_t))); + if (resize_quant_arg_.x_axis_upper_ == nullptr) { + MS_LOG(ERROR) << "malloc x_axis_upper_ array failed."; + return RET_ERROR; + } + resize_quant_arg_.y_axis_index_ = reinterpret_cast(malloc(out_shape[1] * sizeof(int32_t))); + if (resize_quant_arg_.y_axis_index_ == nullptr) { + MS_LOG(ERROR) << "malloc y_axis_index_ array failed."; + return RET_ERROR; + } + resize_quant_arg_.y_axis_lower_ = reinterpret_cast(malloc(out_shape[1] * sizeof(int32_t))); + if (resize_quant_arg_.y_axis_lower_ == nullptr) { + MS_LOG(ERROR) << "malloc y_axis_lower_ array failed."; + return RET_ERROR; + } + resize_quant_arg_.y_axis_upper_ = reinterpret_cast(malloc(out_shape[1] * sizeof(int32_t))); + if (resize_quant_arg_.y_axis_upper_ == nullptr) { + MS_LOG(ERROR) << "malloc y_axis_upper_ array failed."; + return RET_ERROR; + } + return RET_OK; +} + +int ResizeInt8CPUKernel::CalRatio() { + auto in_tensor = in_tensors_.front(); + auto in_width = in_tensor->Width(); + auto in_height = in_tensor->Height(); + auto out_tensor = out_tensors_.front(); + auto out_width = out_tensor->Width(); + auto out_height = out_tensor->Height(); + resize_quant_arg_.ratio_x_ = ((1 << 10) * in_width + out_width / 2) / out_width; + resize_quant_arg_.ratio_y_ = ((1 << 10) * in_height + out_height / 2) / out_height; + if (align_corners_ && out_width > 1) { + resize_quant_arg_.ratio_x_ = ((1 << 10) * (in_width - 1) + (out_width - 1) / 2) / (out_width - 1); + } + if (align_corners_ && out_height > 1) { + resize_quant_arg_.ratio_y_ = ((1 << 10) * (in_height - 1) + (out_height - 1) / 2) / (out_height - 1); + } + return RET_OK; +} + +int ResizeInt8CPUKernel::CalInterpolationRange() { + for (int i = 0; i < out_tensors_.front()->Height(); ++i) { + int32_t scaled_index = i * resize_quant_arg_.ratio_y_; + resize_quant_arg_.y_axis_index_[i] = scaled_index; + resize_quant_arg_.y_axis_lower_[i] = std::max(scaled_index / (1 << 10), 0); + resize_quant_arg_.y_axis_upper_[i] = std::min(scaled_index / (1 << 10) + 1, in_tensors_.front()->Height() - 1); + } + for (int i = 0; i < out_tensors_.front()->Width(); ++i) { + int32_t scaled_index = i * resize_quant_arg_.ratio_x_; + resize_quant_arg_.x_axis_index_[i] = scaled_index; + resize_quant_arg_.x_axis_lower_[i] = std::max(scaled_index / (1 << 10), 0); + resize_quant_arg_.x_axis_upper_[i] = std::min(scaled_index / (1 << 10) + 1, in_tensors_.front()->Width() - 1); + } + return RET_OK; +} + +int ResizeInt8CPUKernel::InitResizeFloatQuantArg() { + auto out_shape = out_tensors_.front()->shape(); + resize_float_quant_arg_.x_axis_index_ = reinterpret_cast(malloc(out_shape[2] * sizeof(float))); + if (resize_float_quant_arg_.x_axis_index_ == nullptr) { + MS_LOG(ERROR) << "malloc x axis index array failed."; + return RET_ERROR; + } + resize_float_quant_arg_.x_axis_lower_ = reinterpret_cast(malloc(out_shape[2] * sizeof(int32_t))); + if (resize_float_quant_arg_.x_axis_lower_ == nullptr) { + MS_LOG(ERROR) << "malloc x_axis_lower_ array failed."; + return RET_ERROR; + } + resize_float_quant_arg_.x_axis_upper_ = reinterpret_cast(malloc(out_shape[2] * sizeof(int32_t))); + if (resize_float_quant_arg_.x_axis_upper_ == nullptr) { + MS_LOG(ERROR) << "malloc x_axis_upper_ array failed."; + return RET_ERROR; + } + resize_float_quant_arg_.y_axis_index_ = reinterpret_cast(malloc(out_shape[1] * sizeof(float))); + if (resize_float_quant_arg_.y_axis_index_ == nullptr) { + MS_LOG(ERROR) << "malloc y_axis_index_ array failed."; + return RET_ERROR; + } + resize_float_quant_arg_.y_axis_lower_ = reinterpret_cast(malloc(out_shape[1] * sizeof(int32_t))); + if (resize_float_quant_arg_.y_axis_lower_ == nullptr) { + MS_LOG(ERROR) << "malloc y_axis_lower_ array failed."; + return RET_ERROR; + } + resize_float_quant_arg_.y_axis_upper_ = reinterpret_cast(malloc(out_shape[1] * sizeof(int32_t))); + if (resize_float_quant_arg_.y_axis_upper_ == nullptr) { + MS_LOG(ERROR) << "malloc y_axis_upper_ array failed."; + return RET_ERROR; + } + return RET_OK; +} + +int ResizeInt8CPUKernel::CalFloatRatio() { + auto in_tensor = in_tensors_.front(); + auto in_width = in_tensor->Width(); + auto in_height = in_tensor->Height(); + auto out_tensor = out_tensors_.front(); + auto out_width = out_tensor->Width(); + auto out_height = out_tensor->Height(); + resize_float_quant_arg_.ratio_x_ = static_cast(in_width) / out_width; + resize_float_quant_arg_.ratio_y_ = static_cast(in_height) / out_height; + if (align_corners_ && out_width > 1) { + resize_float_quant_arg_.ratio_x_ = static_cast(in_width - 1) / (out_width - 1); + } + if (align_corners_ && out_height > 1) { + resize_float_quant_arg_.ratio_y_ = static_cast(in_height - 1) / (out_height - 1); + } + return RET_OK; +} + +int ResizeInt8CPUKernel::CalFloatInterpolationRange() { + for (int i = 0; i < out_tensors_.front()->Height(); ++i) { + float scaled_index = i * resize_float_quant_arg_.ratio_y_; + int lower_index = std::floor(scaled_index); + resize_float_quant_arg_.y_axis_index_[i] = scaled_index; + resize_float_quant_arg_.y_axis_lower_[i] = std::max(lower_index, 0); + resize_float_quant_arg_.y_axis_upper_[i] = std::min(lower_index + 1, in_tensors_.front()->Height() - 1); + } + for (int i = 0; i < out_tensors_.front()->Width(); ++i) { + float scaled_index = i * resize_float_quant_arg_.ratio_x_; + int lower_index = std::floor(scaled_index); + resize_float_quant_arg_.x_axis_index_[i] = scaled_index; + resize_float_quant_arg_.x_axis_lower_[i] = std::max(lower_index, 0); + resize_float_quant_arg_.x_axis_upper_[i] = std::min(lower_index + 1, in_tensors_.front()->Width() - 1); + } + return RET_OK; +} + +int ResizeInt8CPUKernel::InitResizeBiLinear() { + auto ret = InitResizeQuantArg(); + if (ret != RET_OK) { + MS_LOG(ERROR) << "Resize Int8 Op Resize Failed."; + return ret; + } + ret = CalRatio(); + if (ret != RET_OK) { + MS_LOG(ERROR) << "Cal ratio Failed."; + return ret; + } + ret = CalInterpolationRange(); + if (ret != RET_OK) { + MS_LOG(ERROR) << "Cal range of interpolation Failed."; + return ret; + } + return RET_OK; +} + +int ResizeInt8CPUKernel::InitFloatResizeBiLinear() { + auto ret = InitResizeFloatQuantArg(); + if (ret != RET_OK) { + MS_LOG(ERROR) << "Resize Int8 Op Resize Failed."; + return ret; + } + ret = CalFloatRatio(); + if (ret != RET_OK) { + MS_LOG(ERROR) << "Cal ratio Failed."; + return ret; + } + ret = CalFloatInterpolationRange(); + if (ret != RET_OK) { + MS_LOG(ERROR) << "Cal range of interpolation Failed."; + return ret; + } + return RET_OK; +} + +int ResizeInt8CPUKernel::ReSize() { + if (method_ == schema::ResizeMethod_LINEAR) { + if (quant_in_->zp_ == 0) { + return InitResizeBiLinear(); + } else { + return InitFloatResizeBiLinear(); + } + } + return RET_OK; +} + int ResizeInt8Impl(void *cdata, int task_id) { auto resize = reinterpret_cast(cdata); auto error_code = resize->RunImpl(task_id); @@ -87,14 +311,24 @@ int ResizeInt8CPUKernel::RunImpl(int task_id) { int ret = 0; switch (method_) { case static_cast(schema::ResizeMethod_LINEAR): { + auto out_tensor = out_tensors_.front(); + auto out_c = out_tensor->Channel(); + int plane = out_tensor->Height() * out_tensor->Width(); + int num = UP_DIV(plane, context_->thread_num_); + int start_index = task_id * num; + int count = plane - start_index; + count = count > num ? num : count; + auto out_ptr = output_data + start_index * out_c; if (quant_in_->zp_ == 0) { - ret = ResizeBilinearInt8(input_data, output_data, input_shape.data(), out_tensors_[0]->shape().data(), - align_corners_, quant_in_, quant_out_, multiplier_, task_id, context_->thread_num_); + ret = + ResizeBilinearInt8(input_data, out_ptr, out_tensor->Batch(), input->Height(), input->Width(), + out_tensor->Height(), out_tensor->Width(), out_c, start_index, count, resize_quant_arg_); } else { - ret = ResizeBilinearInt8WithFloatWeight(input_data, output_data, input_shape.data(), - out_tensors_[0]->shape().data(), align_corners_, quant_in_, quant_out_, - multiplier_, task_id, context_->thread_num_); + ret = ResizeBilinearWithFloatScaleInt8(input_data, out_ptr, out_tensor->Batch(), input->Height(), + input->Width(), out_tensor->Height(), out_tensor->Width(), out_c, + start_index, count, resize_float_quant_arg_); } + break; } case static_cast(schema::ResizeMethod_NEAREST): { diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/resize_int8.h b/mindspore/lite/src/runtime/kernel/arm/int8/resize_int8.h index ba858c5072..8ee8dd7bfc 100644 --- a/mindspore/lite/src/runtime/kernel/arm/int8/resize_int8.h +++ b/mindspore/lite/src/runtime/kernel/arm/int8/resize_int8.h @@ -32,17 +32,20 @@ class ResizeInt8CPUKernel : public ResizeBaseCPUKernel { const mindspore::lite::PrimitiveC *primitive) : ResizeBaseCPUKernel(parameter, inputs, outputs, ctx, primitive) {} - ~ResizeInt8CPUKernel() { - delete quant_out_; - quant_out_ = nullptr; - delete quant_in_; - quant_in_ = nullptr; - delete multiplier_; - multiplier_ = nullptr; - } + ~ResizeInt8CPUKernel() override; int Init() override; - int ReSize() override { return 0; }; + int ReSize() override; + int InitResizeBiLinear(); + int InitFloatResizeBiLinear(); + int InitResizeQuantArg(); + int CalRatio(); + int CalInterpolationRange(); + void FreeResizeBiLinear(); + int InitResizeFloatQuantArg(); + int CalFloatRatio(); + int CalFloatInterpolationRange(); + void FreeFloatResizeBiLinear(); int Run() override; int RunImpl(int task_id); @@ -50,6 +53,8 @@ class ResizeInt8CPUKernel : public ResizeBaseCPUKernel { QuantArg *quant_in_; QuantArg *quant_out_; QuantMulArg *multiplier_; + ResizeQuantArg resize_quant_arg_; + ResizeFloatScaleQuantArg resize_float_quant_arg_; }; } // namespace mindspore::kernel