| @@ -19,129 +19,131 @@ | |||
| #include "nnacl/quantization/fixed_point.h" | |||
| #include "nnacl/errorcode.h" | |||
| int ResizeBilinearInt8(const int8_t *input_data, int8_t *output_data, const int *input_shape, const int *output_shape, | |||
| const bool align_corners, QuantArg *quant_in, QuantArg *quant_out, const QuantMulArg *mul_arg, | |||
| int tid, int thread_num) { | |||
| if (input_data == NULL || output_data == NULL || input_shape == NULL || output_shape == NULL) { | |||
| return NNACL_NULL_PTR; | |||
| } | |||
| int32_t in_n = input_shape[0]; | |||
| int32_t in_h = input_shape[1]; | |||
| int32_t in_w = input_shape[2]; | |||
| int32_t in_c = input_shape[3]; | |||
| int32_t new_height = output_shape[1]; | |||
| int32_t new_width = output_shape[2]; | |||
| int32_t height_scale = 0, width_scale = 0; | |||
| ComputeScale(in_h, new_height, align_corners, &height_scale); | |||
| ComputeScale(in_w, new_width, align_corners, &width_scale); | |||
| int n, h, w, c; | |||
| for (n = 0; n < in_n; n++) { | |||
| for (h = tid; h < new_height; h += thread_num) { | |||
| const int base_offset = 20; | |||
| int scaled_actual_y; | |||
| int bottom, top; | |||
| int scaled_bottom_weight, scaled_top_weight; | |||
| ComputeInterpolationArgs(h, height_scale, in_h, &scaled_actual_y, &bottom, &scaled_bottom_weight, &top, | |||
| &scaled_top_weight); | |||
| for (w = 0; w < new_width; w++) { | |||
| int scaled_actual_x; | |||
| int left, right; | |||
| int scaled_left_weight, scaled_right_weight; | |||
| ComputeInterpolationArgs(w, width_scale, in_w, &scaled_actual_x, &left, &scaled_left_weight, &right, | |||
| &scaled_right_weight); | |||
| for (c = 0; c < in_c; c++) { | |||
| const int64_t bottom_left_value = | |||
| (int64_t)(input_data[offset(input_shape, n, bottom, left, c)] - quant_in->zp_) * scaled_bottom_weight * | |||
| scaled_left_weight; | |||
| const int64_t bottom_right_value = | |||
| (int64_t)(input_data[offset(input_shape, n, bottom, right, c)] - quant_in->zp_) * scaled_bottom_weight * | |||
| scaled_right_weight; | |||
| const int64_t top_left_value = (int64_t)(input_data[offset(input_shape, n, top, left, c)] - quant_in->zp_) * | |||
| scaled_top_weight * scaled_left_weight; | |||
| const int64_t top_right_value = (int64_t)(input_data[offset(input_shape, n, top, right, c)] - quant_in->zp_) * | |||
| scaled_top_weight * scaled_right_weight; | |||
| const int64_t scaled_interp_value = bottom_left_value + bottom_right_value + top_left_value + top_right_value; | |||
| int32_t interp_value; | |||
| if (scaled_interp_value >= 0) { | |||
| interp_value = (scaled_interp_value + (1 << 19)) / (1 << 20); | |||
| } else { | |||
| interp_value = (scaled_interp_value - (1 << 19)) / (1 << 20); | |||
| } | |||
| const int out_interp_value = | |||
| MultiplyByQuantizedMultiplier(interp_value, mul_arg->multiplier_, mul_arg->left_shift_ + base_offset, | |||
| mul_arg->right_shift_ - base_offset) + | |||
| quant_out->zp_; | |||
| int8_t out_value; | |||
| out_value = out_interp_value > INT8_MAX ? INT8_MAX : out_interp_value; | |||
| out_value = out_value < INT8_MIN ? INT8_MIN : out_value; | |||
| output_data[offset(output_shape, n, h, w, c)] = out_value; | |||
| } | |||
| int ResizeBilinearInt8(const int8_t *input_ptr, int8_t *output_ptr, int batch, int in_h, int in_w, int out_h, int out_w, | |||
| int channel, int index, int count, ResizeQuantArg quant_arg) { | |||
| int in_plane = in_h * in_w; | |||
| int out_plane = out_h * out_w; | |||
| for (int n = 0; n < batch; n++) { | |||
| const int8_t *in_b_ptr = input_ptr + n * in_plane * channel; | |||
| int8_t *out_b_ptr = output_ptr + n * out_plane * channel; | |||
| for (int t = 0; t < count; t++) { | |||
| int ori_out_h = (index + t) / out_w; | |||
| int ori_out_w = (index + t) % out_w; | |||
| int32_t x_lower_value = quant_arg.x_axis_lower_[ori_out_w]; | |||
| int32_t x_upper_value = quant_arg.x_axis_upper_[ori_out_w]; | |||
| int32_t y_lower_value = quant_arg.y_axis_lower_[ori_out_h]; | |||
| int32_t y_upper_value = quant_arg.y_axis_upper_[ori_out_h]; | |||
| int32_t weight_x = quant_arg.x_axis_index_[ori_out_w] - (1 << 10) * x_lower_value; | |||
| int32_t one_minus_weight_x = (1 << 10) - weight_x; | |||
| int32_t weight_y = quant_arg.y_axis_index_[ori_out_h] - (1 << 10) * y_lower_value; | |||
| int32_t one_minus_weight_y = (1 << 10) - weight_y; | |||
| int64_t left_bottom_coef = (int64_t)(one_minus_weight_x * one_minus_weight_y); | |||
| int64_t left_top_coef = (int64_t)(weight_y * one_minus_weight_x); | |||
| int64_t right_bottom_coef = (int64_t)(weight_x * one_minus_weight_y); | |||
| int64_t right_top_coef = (int64_t)(weight_x * weight_y); | |||
| int input_lb_index = (y_lower_value * in_w + x_lower_value) * channel; | |||
| int input_lt_index = (y_upper_value * in_w + x_lower_value) * channel; | |||
| int input_rb_index = (y_lower_value * in_w + x_upper_value) * channel; | |||
| int input_rt_index = (y_upper_value * in_w + x_upper_value) * channel; | |||
| int c = 0; | |||
| for (; c < channel; c++) { | |||
| int64_t out_left_bottom = left_bottom_coef * in_b_ptr[input_lb_index]; | |||
| int64_t out_left_top = left_top_coef * in_b_ptr[input_lt_index]; | |||
| int64_t out_right_bottom = right_bottom_coef * in_b_ptr[input_rb_index]; | |||
| int64_t out_right_top = right_top_coef * in_b_ptr[input_rt_index]; | |||
| int64_t out_value = out_left_bottom + out_left_top + out_right_bottom + out_right_top; | |||
| out_b_ptr[0] = (int8_t)((out_value + (1 << 19)) / (1 << 20)); | |||
| input_lb_index++; | |||
| input_lt_index++; | |||
| input_rb_index++; | |||
| input_rt_index++; | |||
| out_b_ptr++; | |||
| } | |||
| } | |||
| } | |||
| return NNACL_OK; | |||
| } | |||
| int ResizeBilinearInt8WithFloatWeight(const int8_t *input_data, int8_t *output_data, const int *input_shape, | |||
| const int *output_shape, const bool align_corners, QuantArg *quant_in, | |||
| QuantArg *quant_out, const QuantMulArg *mul_arg, int tid, int thread_num) { | |||
| if (input_data == NULL || output_data == NULL || input_shape == NULL || output_shape == NULL) { | |||
| return NNACL_NULL_PTR; | |||
| } | |||
| int32_t in_n = input_shape[0]; | |||
| int32_t in_h = input_shape[1]; | |||
| int32_t in_w = input_shape[2]; | |||
| int32_t in_c = input_shape[3]; | |||
| int32_t new_height = output_shape[1]; | |||
| int32_t new_width = output_shape[2]; | |||
| float height_scale, width_scale; | |||
| int ret = ComputeScaleFloat(in_h, new_height, align_corners, &height_scale); | |||
| if (ret != NNACL_OK) { | |||
| return ret; | |||
| } | |||
| ret = ComputeScaleFloat(in_w, new_width, align_corners, &width_scale); | |||
| if (ret != NNACL_OK) { | |||
| return ret; | |||
| } | |||
| int n, h, w, c; | |||
| for (n = 0; n < in_n; n++) { | |||
| for (h = tid; h < new_height; h += thread_num) { | |||
| float actual_y; | |||
| int bottom, top; | |||
| float bottom_weight, top_weight; | |||
| ComputeInterpolationArgsFloatWeight(h, height_scale, in_h, &actual_y, &bottom, &bottom_weight, &top, &top_weight); | |||
| for (w = 0; w < new_width; w++) { | |||
| float actual_x; | |||
| int left, right; | |||
| float left_weight, right_weight; | |||
| ComputeInterpolationArgsFloatWeight(w, width_scale, in_w, &actual_x, &left, &left_weight, &right, | |||
| &right_weight); | |||
| for (c = 0; c < in_c; c++) { | |||
| float bottom_left_value = ((int32_t)input_data[offset(input_shape, n, bottom, left, c)] - quant_in->zp_) * | |||
| bottom_weight * left_weight; | |||
| float bottom_right_value = ((int32_t)input_data[offset(input_shape, n, bottom, right, c)] - quant_in->zp_) * | |||
| bottom_weight * right_weight; | |||
| float top_left_value = | |||
| ((int32_t)input_data[offset(input_shape, n, top, left, c)] - quant_in->zp_) * top_weight * left_weight; | |||
| float top_right_value = | |||
| ((int32_t)input_data[offset(input_shape, n, top, right, c)] - quant_in->zp_) * top_weight * right_weight; | |||
| float interp_value = bottom_left_value + bottom_right_value + top_left_value + top_right_value; | |||
| const int out_interp_value = MultiplyByQuantizedMultiplier((int32_t)interp_value, mul_arg->multiplier_, | |||
| mul_arg->left_shift_, mul_arg->right_shift_) + | |||
| quant_out->zp_; | |||
| int8_t out_value; | |||
| out_value = out_interp_value > INT8_MAX ? INT8_MAX : out_interp_value; | |||
| out_value = out_value < INT8_MIN ? INT8_MIN : out_value; | |||
| output_data[offset(output_shape, n, h, w, c)] = out_value; | |||
| } | |||
| int ResizeBilinearWithFloatScaleInt8(const int8_t *input_ptr, int8_t *output_ptr, int batch, int in_h, int in_w, | |||
| int out_h, int out_w, int channel, int index, int count, | |||
| ResizeFloatScaleQuantArg quant_arg) { | |||
| int in_plane = in_h * in_w; | |||
| int out_plane = out_h * out_w; | |||
| for (int n = 0; n < batch; n++) { | |||
| const int8_t *in_b_ptr = input_ptr + n * in_plane * channel; | |||
| int8_t *out_b_ptr = output_ptr + n * out_plane * channel; | |||
| for (int t = 0; t < count; t++) { | |||
| int ori_out_h = (index + t) / out_w; | |||
| int ori_out_w = (index + t) % out_w; | |||
| int32_t x_lower_value = quant_arg.x_axis_lower_[ori_out_w]; | |||
| int32_t x_upper_value = quant_arg.x_axis_upper_[ori_out_w]; | |||
| int32_t y_lower_value = quant_arg.y_axis_lower_[ori_out_h]; | |||
| int32_t y_upper_value = quant_arg.y_axis_upper_[ori_out_h]; | |||
| float weight_x = quant_arg.x_axis_index_[ori_out_w] - x_lower_value; | |||
| float one_minus_weight_x = 1 - weight_x; | |||
| float weight_y = quant_arg.y_axis_index_[ori_out_h] - y_lower_value; | |||
| float one_minus_weight_y = 1 - weight_y; | |||
| float left_bottom_coef = one_minus_weight_x * one_minus_weight_y; | |||
| float left_top_coef = weight_y * one_minus_weight_x; | |||
| float right_bottom_coef = weight_x * one_minus_weight_y; | |||
| float right_top_coef = weight_x * weight_y; | |||
| int input_lb_index = (y_lower_value * in_w + x_lower_value) * channel; | |||
| int input_lt_index = (y_upper_value * in_w + x_lower_value) * channel; | |||
| int input_rb_index = (y_lower_value * in_w + x_upper_value) * channel; | |||
| int input_rt_index = (y_upper_value * in_w + x_upper_value) * channel; | |||
| int c = 0; | |||
| #ifdef ENABLE_ARM | |||
| for (; c < channel; c += 4) { | |||
| float32x4_t in_lb; | |||
| in_lb[0] = (float)in_b_ptr[input_lb_index]; | |||
| in_lb[1] = (float)in_b_ptr[input_lb_index + 1]; | |||
| in_lb[2] = (float)in_b_ptr[input_lb_index + 2]; | |||
| in_lb[3] = (float)in_b_ptr[input_lb_index + 3]; | |||
| float32x4_t out_left_bottom = vmulq_n_f32(in_lb, left_bottom_coef); | |||
| float32x4_t in_lt; | |||
| in_lt[0] = (float)in_b_ptr[input_lt_index]; | |||
| in_lt[1] = (float)in_b_ptr[input_lt_index + 1]; | |||
| in_lt[2] = (float)in_b_ptr[input_lt_index + 2]; | |||
| in_lt[3] = (float)in_b_ptr[input_lt_index + 3]; | |||
| float32x4_t out_left_top = vmulq_n_f32(in_lt, left_top_coef); | |||
| float32x4_t in_rb; | |||
| in_rb[0] = (float)in_b_ptr[input_rb_index]; | |||
| in_rb[1] = (float)in_b_ptr[input_rb_index + 1]; | |||
| in_rb[2] = (float)in_b_ptr[input_rb_index + 2]; | |||
| in_rb[3] = (float)in_b_ptr[input_rb_index + 3]; | |||
| float32x4_t out_right_bottom = vmulq_n_f32(in_rb, right_bottom_coef); | |||
| float32x4_t in_rt; | |||
| in_rt[0] = (float)in_b_ptr[input_rt_index]; | |||
| in_rt[1] = (float)in_b_ptr[input_rt_index + 1]; | |||
| in_rt[2] = (float)in_b_ptr[input_rt_index + 2]; | |||
| in_rt[3] = (float)in_b_ptr[input_rt_index + 3]; | |||
| float32x4_t out_right_top = vmulq_n_f32(in_rt, right_top_coef); | |||
| float32x4_t out_value1 = vaddq_f32(out_left_bottom, out_left_top); | |||
| float32x4_t out_value2 = vaddq_f32(out_right_top, out_right_bottom); | |||
| float32x4_t out_value = vaddq_f32(out_value1, out_value2); | |||
| out_b_ptr[0] = (int8_t)(out_value[0]); | |||
| out_b_ptr[1] = (int8_t)(out_value[1]); | |||
| out_b_ptr[2] = (int8_t)(out_value[2]); | |||
| out_b_ptr[3] = (int8_t)(out_value[3]); | |||
| input_lb_index += 4; | |||
| input_lt_index += 4; | |||
| input_rb_index += 4; | |||
| input_rt_index += 4; | |||
| out_b_ptr += 4; | |||
| } | |||
| #endif | |||
| for (; c < channel; c++) { | |||
| float out_left_bottom = left_bottom_coef * in_b_ptr[input_lb_index]; | |||
| float out_left_top = left_top_coef * in_b_ptr[input_lt_index]; | |||
| float out_right_bottom = right_bottom_coef * in_b_ptr[input_rb_index]; | |||
| float out_right_top = right_top_coef * in_b_ptr[input_rt_index]; | |||
| float out_value = out_left_bottom + out_left_top + out_right_bottom + out_right_top; | |||
| out_b_ptr[0] = (int8_t)(out_value); | |||
| input_lb_index++; | |||
| input_lt_index++; | |||
| input_rb_index++; | |||
| input_rt_index++; | |||
| out_b_ptr++; | |||
| } | |||
| } | |||
| } | |||
| @@ -175,46 +177,6 @@ int ResizeNearestNeighborInt8Simple(const int8_t *input_data, int8_t *output_dat | |||
| return NNACL_OK; | |||
| } | |||
| void ComputeScale(const int32_t in_value, const int32_t out_value, const bool align_corners, int32_t *scale) { | |||
| if (out_value == 0) { | |||
| return; | |||
| } | |||
| *scale = (in_value * (1 << 10) + out_value / 2) / out_value; | |||
| if (align_corners && out_value > 1) { | |||
| *scale = ((in_value - 1) * (1 << 10) + (out_value - 1) / 2) / (out_value - 1); | |||
| } | |||
| } | |||
| void ComputeInterpolationArgs(const int32_t pos, const int32_t scale, const int32_t size, int32_t *scaled_pos, | |||
| int32_t *low, int32_t *scaled_low_weight, int32_t *high, int32_t *scaled_high_weight) { | |||
| *scaled_pos = pos * scale; | |||
| int scale_back = *scaled_pos / (1 << 10); | |||
| *low = scale_back > 0 ? scale_back : 0; | |||
| *scaled_low_weight = (1 << 10) - (*scaled_pos - (1 << 10) * (*low)); | |||
| *high = scale_back + 1 < size ? scale_back + 1 : size - 1; | |||
| *scaled_high_weight = *scaled_pos - (1 << 10) * (*low); | |||
| } | |||
| int ComputeScaleFloat(const int32_t in_value, const int32_t out_value, const bool align_corners, float *scale) { | |||
| if (out_value == 0) { | |||
| return NNACL_ERRCODE_DIVISOR_ZERO; | |||
| } | |||
| *scale = (float)in_value / out_value; | |||
| if (align_corners && out_value > 1) { | |||
| *scale = (float)(in_value - 1) / (out_value - 1); | |||
| } | |||
| return NNACL_OK; | |||
| } | |||
| void ComputeInterpolationArgsFloatWeight(const int32_t pos, const float scale, const int32_t size, float *actual_pos, | |||
| int32_t *low, float *low_weight, int32_t *high, float *high_weight) { | |||
| *actual_pos = pos * scale; | |||
| *low = *actual_pos > 0 ? floor(*actual_pos) : 0; | |||
| *low_weight = 1.0 - (*actual_pos - *low); | |||
| *high = *low + 1 < size ? *low + 1 : size - 1; | |||
| *high_weight = *actual_pos - (*low); | |||
| } | |||
| void ComputeNearestNeighborInt(const int32_t pos, const int in_size, const int32_t new_size, const bool align_corners, | |||
| int32_t *nearest) { | |||
| if (new_size == 0) { | |||
| @@ -27,23 +27,12 @@ | |||
| #ifdef __cplusplus | |||
| extern "C" { | |||
| #endif | |||
| int ResizeBilinearInt8(const int8_t *input_data, int8_t *output_data, const int *input_shape, const int *output_shape, | |||
| const bool align_corners, QuantArg *quant_in, QuantArg *quant_out, const QuantMulArg *mul_arg, | |||
| int tid, int thread_num); | |||
| int ResizeBilinearInt8(const int8_t *input_ptr, int8_t *output_ptr, int batch, int in_h, int in_w, int out_h, int out_w, | |||
| int channel, int index, int count, ResizeQuantArg quant_arg); | |||
| int ResizeBilinearInt8WithFloatWeight(const int8_t *input_data, int8_t *output_data, const int *input_shape, | |||
| const int *output_shape, const bool align_corners, QuantArg *quant_in, | |||
| QuantArg *quant_out, const QuantMulArg *mul_arg, int tid, int thread_num); | |||
| void ComputeScale(const int32_t in_value, const int32_t out_value, const bool align_corners, int32_t *scale); | |||
| void ComputeInterpolationArgs(const int32_t pos, const int32_t scale, const int32_t size, int32_t *scaled_pos, | |||
| int32_t *low, int32_t *scaled_low_weight, int32_t *high, int32_t *scaled_high_weight); | |||
| int ComputeScaleFloat(const int32_t in_value, const int32_t out_value, const bool align_corners, float *scale); | |||
| void ComputeInterpolationArgsFloatWeight(const int32_t pos, const float scale, const int32_t size, float *actual_pos, | |||
| int32_t *low, float *low_weight, int32_t *high, float *high_weight); | |||
| int ResizeBilinearWithFloatScaleInt8(const int8_t *input_ptr, int8_t *output_ptr, int batch, int in_h, int in_w, | |||
| int out_h, int out_w, int channel, int index, int count, | |||
| ResizeFloatScaleQuantArg quant_arg); | |||
| int ResizeNearestNeighborInt8Simple(const int8_t *input_data, int8_t *output_data, const int *input_shape, | |||
| const int *output_shape, const bool align_corners, int tid, int thread_num); | |||
| @@ -260,6 +260,28 @@ typedef struct LeakyReluQuantArg { | |||
| int element_num; | |||
| } LeakyReluQuantArg; | |||
| typedef struct ResizeQuantArg { | |||
| int32_t ratio_x_; | |||
| int32_t ratio_y_; | |||
| int32_t *x_axis_index_; | |||
| int32_t *x_axis_lower_; | |||
| int32_t *x_axis_upper_; | |||
| int32_t *y_axis_index_; | |||
| int32_t *y_axis_lower_; | |||
| int32_t *y_axis_upper_; | |||
| } ResizeQuantArg; | |||
| typedef struct ResizeFloatScaleQuantArg { | |||
| float ratio_x_; | |||
| float ratio_y_; | |||
| float *x_axis_index_; | |||
| int32_t *x_axis_lower_; | |||
| int32_t *x_axis_upper_; | |||
| float *y_axis_index_; | |||
| int32_t *y_axis_lower_; | |||
| int32_t *y_axis_upper_; | |||
| } ResizeFloatScaleQuantArg; | |||
| #ifdef __cplusplus | |||
| extern "C" { | |||
| #endif | |||
| @@ -16,6 +16,7 @@ | |||
| #include "src/runtime/kernel/arm/int8/resize_int8.h" | |||
| #include <vector> | |||
| #include <algorithm> | |||
| #include "include/errorcode.h" | |||
| #include "nnacl/int8/resize_int8.h" | |||
| #include "schema/model_generated.h" | |||
| @@ -32,6 +33,40 @@ using mindspore::lite::RET_OK; | |||
| using mindspore::lite::KernelRegistrar; | |||
| namespace mindspore::kernel { | |||
| void ResizeInt8CPUKernel::FreeResizeBiLinear() { | |||
| free(resize_quant_arg_.x_axis_index_); | |||
| free(resize_quant_arg_.x_axis_lower_); | |||
| free(resize_quant_arg_.x_axis_upper_); | |||
| free(resize_quant_arg_.y_axis_index_); | |||
| free(resize_quant_arg_.y_axis_lower_); | |||
| free(resize_quant_arg_.y_axis_upper_); | |||
| } | |||
| void ResizeInt8CPUKernel::FreeFloatResizeBiLinear() { | |||
| free(resize_float_quant_arg_.x_axis_index_); | |||
| free(resize_float_quant_arg_.x_axis_lower_); | |||
| free(resize_float_quant_arg_.x_axis_upper_); | |||
| free(resize_float_quant_arg_.y_axis_index_); | |||
| free(resize_float_quant_arg_.y_axis_lower_); | |||
| free(resize_float_quant_arg_.y_axis_upper_); | |||
| } | |||
| ResizeInt8CPUKernel::~ResizeInt8CPUKernel() { | |||
| if (method_ == schema::ResizeMethod_LINEAR) { | |||
| if (quant_in_->zp_ == 0) { | |||
| FreeResizeBiLinear(); | |||
| } else { | |||
| FreeFloatResizeBiLinear(); | |||
| } | |||
| } | |||
| delete quant_out_; | |||
| quant_out_ = nullptr; | |||
| delete quant_in_; | |||
| quant_in_ = nullptr; | |||
| delete multiplier_; | |||
| multiplier_ = nullptr; | |||
| } | |||
| int ResizeInt8CPUKernel::Init() { | |||
| auto ret = ResizeBaseCPUKernel::Init(); | |||
| if (ret != RET_OK) { | |||
| @@ -58,6 +93,195 @@ int ResizeInt8CPUKernel::Init() { | |||
| return ReSize(); | |||
| } | |||
| int ResizeInt8CPUKernel::InitResizeQuantArg() { | |||
| auto out_shape = out_tensors_.front()->shape(); | |||
| resize_quant_arg_.x_axis_index_ = reinterpret_cast<int32_t *>(malloc(out_shape[2] * sizeof(int32_t))); | |||
| if (resize_quant_arg_.x_axis_index_ == nullptr) { | |||
| MS_LOG(ERROR) << "malloc x axis index array failed."; | |||
| return RET_ERROR; | |||
| } | |||
| resize_quant_arg_.x_axis_lower_ = reinterpret_cast<int32_t *>(malloc(out_shape[2] * sizeof(int32_t))); | |||
| if (resize_quant_arg_.x_axis_lower_ == nullptr) { | |||
| MS_LOG(ERROR) << "malloc x_axis_lower_ array failed."; | |||
| return RET_ERROR; | |||
| } | |||
| resize_quant_arg_.x_axis_upper_ = reinterpret_cast<int32_t *>(malloc(out_shape[2] * sizeof(int32_t))); | |||
| if (resize_quant_arg_.x_axis_upper_ == nullptr) { | |||
| MS_LOG(ERROR) << "malloc x_axis_upper_ array failed."; | |||
| return RET_ERROR; | |||
| } | |||
| resize_quant_arg_.y_axis_index_ = reinterpret_cast<int32_t *>(malloc(out_shape[1] * sizeof(int32_t))); | |||
| if (resize_quant_arg_.y_axis_index_ == nullptr) { | |||
| MS_LOG(ERROR) << "malloc y_axis_index_ array failed."; | |||
| return RET_ERROR; | |||
| } | |||
| resize_quant_arg_.y_axis_lower_ = reinterpret_cast<int32_t *>(malloc(out_shape[1] * sizeof(int32_t))); | |||
| if (resize_quant_arg_.y_axis_lower_ == nullptr) { | |||
| MS_LOG(ERROR) << "malloc y_axis_lower_ array failed."; | |||
| return RET_ERROR; | |||
| } | |||
| resize_quant_arg_.y_axis_upper_ = reinterpret_cast<int32_t *>(malloc(out_shape[1] * sizeof(int32_t))); | |||
| if (resize_quant_arg_.y_axis_upper_ == nullptr) { | |||
| MS_LOG(ERROR) << "malloc y_axis_upper_ array failed."; | |||
| return RET_ERROR; | |||
| } | |||
| return RET_OK; | |||
| } | |||
| int ResizeInt8CPUKernel::CalRatio() { | |||
| auto in_tensor = in_tensors_.front(); | |||
| auto in_width = in_tensor->Width(); | |||
| auto in_height = in_tensor->Height(); | |||
| auto out_tensor = out_tensors_.front(); | |||
| auto out_width = out_tensor->Width(); | |||
| auto out_height = out_tensor->Height(); | |||
| resize_quant_arg_.ratio_x_ = ((1 << 10) * in_width + out_width / 2) / out_width; | |||
| resize_quant_arg_.ratio_y_ = ((1 << 10) * in_height + out_height / 2) / out_height; | |||
| if (align_corners_ && out_width > 1) { | |||
| resize_quant_arg_.ratio_x_ = ((1 << 10) * (in_width - 1) + (out_width - 1) / 2) / (out_width - 1); | |||
| } | |||
| if (align_corners_ && out_height > 1) { | |||
| resize_quant_arg_.ratio_y_ = ((1 << 10) * (in_height - 1) + (out_height - 1) / 2) / (out_height - 1); | |||
| } | |||
| return RET_OK; | |||
| } | |||
| int ResizeInt8CPUKernel::CalInterpolationRange() { | |||
| for (int i = 0; i < out_tensors_.front()->Height(); ++i) { | |||
| int32_t scaled_index = i * resize_quant_arg_.ratio_y_; | |||
| resize_quant_arg_.y_axis_index_[i] = scaled_index; | |||
| resize_quant_arg_.y_axis_lower_[i] = std::max(scaled_index / (1 << 10), 0); | |||
| resize_quant_arg_.y_axis_upper_[i] = std::min(scaled_index / (1 << 10) + 1, in_tensors_.front()->Height() - 1); | |||
| } | |||
| for (int i = 0; i < out_tensors_.front()->Width(); ++i) { | |||
| int32_t scaled_index = i * resize_quant_arg_.ratio_x_; | |||
| resize_quant_arg_.x_axis_index_[i] = scaled_index; | |||
| resize_quant_arg_.x_axis_lower_[i] = std::max(scaled_index / (1 << 10), 0); | |||
| resize_quant_arg_.x_axis_upper_[i] = std::min(scaled_index / (1 << 10) + 1, in_tensors_.front()->Width() - 1); | |||
| } | |||
| return RET_OK; | |||
| } | |||
| int ResizeInt8CPUKernel::InitResizeFloatQuantArg() { | |||
| auto out_shape = out_tensors_.front()->shape(); | |||
| resize_float_quant_arg_.x_axis_index_ = reinterpret_cast<float *>(malloc(out_shape[2] * sizeof(float))); | |||
| if (resize_float_quant_arg_.x_axis_index_ == nullptr) { | |||
| MS_LOG(ERROR) << "malloc x axis index array failed."; | |||
| return RET_ERROR; | |||
| } | |||
| resize_float_quant_arg_.x_axis_lower_ = reinterpret_cast<int32_t *>(malloc(out_shape[2] * sizeof(int32_t))); | |||
| if (resize_float_quant_arg_.x_axis_lower_ == nullptr) { | |||
| MS_LOG(ERROR) << "malloc x_axis_lower_ array failed."; | |||
| return RET_ERROR; | |||
| } | |||
| resize_float_quant_arg_.x_axis_upper_ = reinterpret_cast<int32_t *>(malloc(out_shape[2] * sizeof(int32_t))); | |||
| if (resize_float_quant_arg_.x_axis_upper_ == nullptr) { | |||
| MS_LOG(ERROR) << "malloc x_axis_upper_ array failed."; | |||
| return RET_ERROR; | |||
| } | |||
| resize_float_quant_arg_.y_axis_index_ = reinterpret_cast<float *>(malloc(out_shape[1] * sizeof(float))); | |||
| if (resize_float_quant_arg_.y_axis_index_ == nullptr) { | |||
| MS_LOG(ERROR) << "malloc y_axis_index_ array failed."; | |||
| return RET_ERROR; | |||
| } | |||
| resize_float_quant_arg_.y_axis_lower_ = reinterpret_cast<int32_t *>(malloc(out_shape[1] * sizeof(int32_t))); | |||
| if (resize_float_quant_arg_.y_axis_lower_ == nullptr) { | |||
| MS_LOG(ERROR) << "malloc y_axis_lower_ array failed."; | |||
| return RET_ERROR; | |||
| } | |||
| resize_float_quant_arg_.y_axis_upper_ = reinterpret_cast<int32_t *>(malloc(out_shape[1] * sizeof(int32_t))); | |||
| if (resize_float_quant_arg_.y_axis_upper_ == nullptr) { | |||
| MS_LOG(ERROR) << "malloc y_axis_upper_ array failed."; | |||
| return RET_ERROR; | |||
| } | |||
| return RET_OK; | |||
| } | |||
| int ResizeInt8CPUKernel::CalFloatRatio() { | |||
| auto in_tensor = in_tensors_.front(); | |||
| auto in_width = in_tensor->Width(); | |||
| auto in_height = in_tensor->Height(); | |||
| auto out_tensor = out_tensors_.front(); | |||
| auto out_width = out_tensor->Width(); | |||
| auto out_height = out_tensor->Height(); | |||
| resize_float_quant_arg_.ratio_x_ = static_cast<float>(in_width) / out_width; | |||
| resize_float_quant_arg_.ratio_y_ = static_cast<float>(in_height) / out_height; | |||
| if (align_corners_ && out_width > 1) { | |||
| resize_float_quant_arg_.ratio_x_ = static_cast<float>(in_width - 1) / (out_width - 1); | |||
| } | |||
| if (align_corners_ && out_height > 1) { | |||
| resize_float_quant_arg_.ratio_y_ = static_cast<float>(in_height - 1) / (out_height - 1); | |||
| } | |||
| return RET_OK; | |||
| } | |||
| int ResizeInt8CPUKernel::CalFloatInterpolationRange() { | |||
| for (int i = 0; i < out_tensors_.front()->Height(); ++i) { | |||
| float scaled_index = i * resize_float_quant_arg_.ratio_y_; | |||
| int lower_index = std::floor(scaled_index); | |||
| resize_float_quant_arg_.y_axis_index_[i] = scaled_index; | |||
| resize_float_quant_arg_.y_axis_lower_[i] = std::max(lower_index, 0); | |||
| resize_float_quant_arg_.y_axis_upper_[i] = std::min(lower_index + 1, in_tensors_.front()->Height() - 1); | |||
| } | |||
| for (int i = 0; i < out_tensors_.front()->Width(); ++i) { | |||
| float scaled_index = i * resize_float_quant_arg_.ratio_x_; | |||
| int lower_index = std::floor(scaled_index); | |||
| resize_float_quant_arg_.x_axis_index_[i] = scaled_index; | |||
| resize_float_quant_arg_.x_axis_lower_[i] = std::max(lower_index, 0); | |||
| resize_float_quant_arg_.x_axis_upper_[i] = std::min(lower_index + 1, in_tensors_.front()->Width() - 1); | |||
| } | |||
| return RET_OK; | |||
| } | |||
| int ResizeInt8CPUKernel::InitResizeBiLinear() { | |||
| auto ret = InitResizeQuantArg(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Resize Int8 Op Resize Failed."; | |||
| return ret; | |||
| } | |||
| ret = CalRatio(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Cal ratio Failed."; | |||
| return ret; | |||
| } | |||
| ret = CalInterpolationRange(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Cal range of interpolation Failed."; | |||
| return ret; | |||
| } | |||
| return RET_OK; | |||
| } | |||
| int ResizeInt8CPUKernel::InitFloatResizeBiLinear() { | |||
| auto ret = InitResizeFloatQuantArg(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Resize Int8 Op Resize Failed."; | |||
| return ret; | |||
| } | |||
| ret = CalFloatRatio(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Cal ratio Failed."; | |||
| return ret; | |||
| } | |||
| ret = CalFloatInterpolationRange(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Cal range of interpolation Failed."; | |||
| return ret; | |||
| } | |||
| return RET_OK; | |||
| } | |||
| int ResizeInt8CPUKernel::ReSize() { | |||
| if (method_ == schema::ResizeMethod_LINEAR) { | |||
| if (quant_in_->zp_ == 0) { | |||
| return InitResizeBiLinear(); | |||
| } else { | |||
| return InitFloatResizeBiLinear(); | |||
| } | |||
| } | |||
| return RET_OK; | |||
| } | |||
| int ResizeInt8Impl(void *cdata, int task_id) { | |||
| auto resize = reinterpret_cast<ResizeInt8CPUKernel *>(cdata); | |||
| auto error_code = resize->RunImpl(task_id); | |||
| @@ -87,14 +311,24 @@ int ResizeInt8CPUKernel::RunImpl(int task_id) { | |||
| int ret = 0; | |||
| switch (method_) { | |||
| case static_cast<int>(schema::ResizeMethod_LINEAR): { | |||
| auto out_tensor = out_tensors_.front(); | |||
| auto out_c = out_tensor->Channel(); | |||
| int plane = out_tensor->Height() * out_tensor->Width(); | |||
| int num = UP_DIV(plane, context_->thread_num_); | |||
| int start_index = task_id * num; | |||
| int count = plane - start_index; | |||
| count = count > num ? num : count; | |||
| auto out_ptr = output_data + start_index * out_c; | |||
| if (quant_in_->zp_ == 0) { | |||
| ret = ResizeBilinearInt8(input_data, output_data, input_shape.data(), out_tensors_[0]->shape().data(), | |||
| align_corners_, quant_in_, quant_out_, multiplier_, task_id, context_->thread_num_); | |||
| ret = | |||
| ResizeBilinearInt8(input_data, out_ptr, out_tensor->Batch(), input->Height(), input->Width(), | |||
| out_tensor->Height(), out_tensor->Width(), out_c, start_index, count, resize_quant_arg_); | |||
| } else { | |||
| ret = ResizeBilinearInt8WithFloatWeight(input_data, output_data, input_shape.data(), | |||
| out_tensors_[0]->shape().data(), align_corners_, quant_in_, quant_out_, | |||
| multiplier_, task_id, context_->thread_num_); | |||
| ret = ResizeBilinearWithFloatScaleInt8(input_data, out_ptr, out_tensor->Batch(), input->Height(), | |||
| input->Width(), out_tensor->Height(), out_tensor->Width(), out_c, | |||
| start_index, count, resize_float_quant_arg_); | |||
| } | |||
| break; | |||
| } | |||
| case static_cast<int>(schema::ResizeMethod_NEAREST): { | |||
| @@ -32,17 +32,20 @@ class ResizeInt8CPUKernel : public ResizeBaseCPUKernel { | |||
| const mindspore::lite::PrimitiveC *primitive) | |||
| : ResizeBaseCPUKernel(parameter, inputs, outputs, ctx, primitive) {} | |||
| ~ResizeInt8CPUKernel() { | |||
| delete quant_out_; | |||
| quant_out_ = nullptr; | |||
| delete quant_in_; | |||
| quant_in_ = nullptr; | |||
| delete multiplier_; | |||
| multiplier_ = nullptr; | |||
| } | |||
| ~ResizeInt8CPUKernel() override; | |||
| int Init() override; | |||
| int ReSize() override { return 0; }; | |||
| int ReSize() override; | |||
| int InitResizeBiLinear(); | |||
| int InitFloatResizeBiLinear(); | |||
| int InitResizeQuantArg(); | |||
| int CalRatio(); | |||
| int CalInterpolationRange(); | |||
| void FreeResizeBiLinear(); | |||
| int InitResizeFloatQuantArg(); | |||
| int CalFloatRatio(); | |||
| int CalFloatInterpolationRange(); | |||
| void FreeFloatResizeBiLinear(); | |||
| int Run() override; | |||
| int RunImpl(int task_id); | |||
| @@ -50,6 +53,8 @@ class ResizeInt8CPUKernel : public ResizeBaseCPUKernel { | |||
| QuantArg *quant_in_; | |||
| QuantArg *quant_out_; | |||
| QuantMulArg *multiplier_; | |||
| ResizeQuantArg resize_quant_arg_; | |||
| ResizeFloatScaleQuantArg resize_float_quant_arg_; | |||
| }; | |||
| } // namespace mindspore::kernel | |||