| @@ -19,129 +19,131 @@ | |||||
| #include "nnacl/quantization/fixed_point.h" | #include "nnacl/quantization/fixed_point.h" | ||||
| #include "nnacl/errorcode.h" | #include "nnacl/errorcode.h" | ||||
| int ResizeBilinearInt8(const int8_t *input_data, int8_t *output_data, const int *input_shape, const int *output_shape, | |||||
| const bool align_corners, QuantArg *quant_in, QuantArg *quant_out, const QuantMulArg *mul_arg, | |||||
| int tid, int thread_num) { | |||||
| if (input_data == NULL || output_data == NULL || input_shape == NULL || output_shape == NULL) { | |||||
| return NNACL_NULL_PTR; | |||||
| } | |||||
| int32_t in_n = input_shape[0]; | |||||
| int32_t in_h = input_shape[1]; | |||||
| int32_t in_w = input_shape[2]; | |||||
| int32_t in_c = input_shape[3]; | |||||
| int32_t new_height = output_shape[1]; | |||||
| int32_t new_width = output_shape[2]; | |||||
| int32_t height_scale = 0, width_scale = 0; | |||||
| ComputeScale(in_h, new_height, align_corners, &height_scale); | |||||
| ComputeScale(in_w, new_width, align_corners, &width_scale); | |||||
| int n, h, w, c; | |||||
| for (n = 0; n < in_n; n++) { | |||||
| for (h = tid; h < new_height; h += thread_num) { | |||||
| const int base_offset = 20; | |||||
| int scaled_actual_y; | |||||
| int bottom, top; | |||||
| int scaled_bottom_weight, scaled_top_weight; | |||||
| ComputeInterpolationArgs(h, height_scale, in_h, &scaled_actual_y, &bottom, &scaled_bottom_weight, &top, | |||||
| &scaled_top_weight); | |||||
| for (w = 0; w < new_width; w++) { | |||||
| int scaled_actual_x; | |||||
| int left, right; | |||||
| int scaled_left_weight, scaled_right_weight; | |||||
| ComputeInterpolationArgs(w, width_scale, in_w, &scaled_actual_x, &left, &scaled_left_weight, &right, | |||||
| &scaled_right_weight); | |||||
| for (c = 0; c < in_c; c++) { | |||||
| const int64_t bottom_left_value = | |||||
| (int64_t)(input_data[offset(input_shape, n, bottom, left, c)] - quant_in->zp_) * scaled_bottom_weight * | |||||
| scaled_left_weight; | |||||
| const int64_t bottom_right_value = | |||||
| (int64_t)(input_data[offset(input_shape, n, bottom, right, c)] - quant_in->zp_) * scaled_bottom_weight * | |||||
| scaled_right_weight; | |||||
| const int64_t top_left_value = (int64_t)(input_data[offset(input_shape, n, top, left, c)] - quant_in->zp_) * | |||||
| scaled_top_weight * scaled_left_weight; | |||||
| const int64_t top_right_value = (int64_t)(input_data[offset(input_shape, n, top, right, c)] - quant_in->zp_) * | |||||
| scaled_top_weight * scaled_right_weight; | |||||
| const int64_t scaled_interp_value = bottom_left_value + bottom_right_value + top_left_value + top_right_value; | |||||
| int32_t interp_value; | |||||
| if (scaled_interp_value >= 0) { | |||||
| interp_value = (scaled_interp_value + (1 << 19)) / (1 << 20); | |||||
| } else { | |||||
| interp_value = (scaled_interp_value - (1 << 19)) / (1 << 20); | |||||
| } | |||||
| const int out_interp_value = | |||||
| MultiplyByQuantizedMultiplier(interp_value, mul_arg->multiplier_, mul_arg->left_shift_ + base_offset, | |||||
| mul_arg->right_shift_ - base_offset) + | |||||
| quant_out->zp_; | |||||
| int8_t out_value; | |||||
| out_value = out_interp_value > INT8_MAX ? INT8_MAX : out_interp_value; | |||||
| out_value = out_value < INT8_MIN ? INT8_MIN : out_value; | |||||
| output_data[offset(output_shape, n, h, w, c)] = out_value; | |||||
| } | |||||
| int ResizeBilinearInt8(const int8_t *input_ptr, int8_t *output_ptr, int batch, int in_h, int in_w, int out_h, int out_w, | |||||
| int channel, int index, int count, ResizeQuantArg quant_arg) { | |||||
| int in_plane = in_h * in_w; | |||||
| int out_plane = out_h * out_w; | |||||
| for (int n = 0; n < batch; n++) { | |||||
| const int8_t *in_b_ptr = input_ptr + n * in_plane * channel; | |||||
| int8_t *out_b_ptr = output_ptr + n * out_plane * channel; | |||||
| for (int t = 0; t < count; t++) { | |||||
| int ori_out_h = (index + t) / out_w; | |||||
| int ori_out_w = (index + t) % out_w; | |||||
| int32_t x_lower_value = quant_arg.x_axis_lower_[ori_out_w]; | |||||
| int32_t x_upper_value = quant_arg.x_axis_upper_[ori_out_w]; | |||||
| int32_t y_lower_value = quant_arg.y_axis_lower_[ori_out_h]; | |||||
| int32_t y_upper_value = quant_arg.y_axis_upper_[ori_out_h]; | |||||
| int32_t weight_x = quant_arg.x_axis_index_[ori_out_w] - (1 << 10) * x_lower_value; | |||||
| int32_t one_minus_weight_x = (1 << 10) - weight_x; | |||||
| int32_t weight_y = quant_arg.y_axis_index_[ori_out_h] - (1 << 10) * y_lower_value; | |||||
| int32_t one_minus_weight_y = (1 << 10) - weight_y; | |||||
| int64_t left_bottom_coef = (int64_t)(one_minus_weight_x * one_minus_weight_y); | |||||
| int64_t left_top_coef = (int64_t)(weight_y * one_minus_weight_x); | |||||
| int64_t right_bottom_coef = (int64_t)(weight_x * one_minus_weight_y); | |||||
| int64_t right_top_coef = (int64_t)(weight_x * weight_y); | |||||
| int input_lb_index = (y_lower_value * in_w + x_lower_value) * channel; | |||||
| int input_lt_index = (y_upper_value * in_w + x_lower_value) * channel; | |||||
| int input_rb_index = (y_lower_value * in_w + x_upper_value) * channel; | |||||
| int input_rt_index = (y_upper_value * in_w + x_upper_value) * channel; | |||||
| int c = 0; | |||||
| for (; c < channel; c++) { | |||||
| int64_t out_left_bottom = left_bottom_coef * in_b_ptr[input_lb_index]; | |||||
| int64_t out_left_top = left_top_coef * in_b_ptr[input_lt_index]; | |||||
| int64_t out_right_bottom = right_bottom_coef * in_b_ptr[input_rb_index]; | |||||
| int64_t out_right_top = right_top_coef * in_b_ptr[input_rt_index]; | |||||
| int64_t out_value = out_left_bottom + out_left_top + out_right_bottom + out_right_top; | |||||
| out_b_ptr[0] = (int8_t)((out_value + (1 << 19)) / (1 << 20)); | |||||
| input_lb_index++; | |||||
| input_lt_index++; | |||||
| input_rb_index++; | |||||
| input_rt_index++; | |||||
| out_b_ptr++; | |||||
| } | } | ||||
| } | } | ||||
| } | } | ||||
| return NNACL_OK; | return NNACL_OK; | ||||
| } | } | ||||
| int ResizeBilinearInt8WithFloatWeight(const int8_t *input_data, int8_t *output_data, const int *input_shape, | |||||
| const int *output_shape, const bool align_corners, QuantArg *quant_in, | |||||
| QuantArg *quant_out, const QuantMulArg *mul_arg, int tid, int thread_num) { | |||||
| if (input_data == NULL || output_data == NULL || input_shape == NULL || output_shape == NULL) { | |||||
| return NNACL_NULL_PTR; | |||||
| } | |||||
| int32_t in_n = input_shape[0]; | |||||
| int32_t in_h = input_shape[1]; | |||||
| int32_t in_w = input_shape[2]; | |||||
| int32_t in_c = input_shape[3]; | |||||
| int32_t new_height = output_shape[1]; | |||||
| int32_t new_width = output_shape[2]; | |||||
| float height_scale, width_scale; | |||||
| int ret = ComputeScaleFloat(in_h, new_height, align_corners, &height_scale); | |||||
| if (ret != NNACL_OK) { | |||||
| return ret; | |||||
| } | |||||
| ret = ComputeScaleFloat(in_w, new_width, align_corners, &width_scale); | |||||
| if (ret != NNACL_OK) { | |||||
| return ret; | |||||
| } | |||||
| int n, h, w, c; | |||||
| for (n = 0; n < in_n; n++) { | |||||
| for (h = tid; h < new_height; h += thread_num) { | |||||
| float actual_y; | |||||
| int bottom, top; | |||||
| float bottom_weight, top_weight; | |||||
| ComputeInterpolationArgsFloatWeight(h, height_scale, in_h, &actual_y, &bottom, &bottom_weight, &top, &top_weight); | |||||
| for (w = 0; w < new_width; w++) { | |||||
| float actual_x; | |||||
| int left, right; | |||||
| float left_weight, right_weight; | |||||
| ComputeInterpolationArgsFloatWeight(w, width_scale, in_w, &actual_x, &left, &left_weight, &right, | |||||
| &right_weight); | |||||
| for (c = 0; c < in_c; c++) { | |||||
| float bottom_left_value = ((int32_t)input_data[offset(input_shape, n, bottom, left, c)] - quant_in->zp_) * | |||||
| bottom_weight * left_weight; | |||||
| float bottom_right_value = ((int32_t)input_data[offset(input_shape, n, bottom, right, c)] - quant_in->zp_) * | |||||
| bottom_weight * right_weight; | |||||
| float top_left_value = | |||||
| ((int32_t)input_data[offset(input_shape, n, top, left, c)] - quant_in->zp_) * top_weight * left_weight; | |||||
| float top_right_value = | |||||
| ((int32_t)input_data[offset(input_shape, n, top, right, c)] - quant_in->zp_) * top_weight * right_weight; | |||||
| float interp_value = bottom_left_value + bottom_right_value + top_left_value + top_right_value; | |||||
| const int out_interp_value = MultiplyByQuantizedMultiplier((int32_t)interp_value, mul_arg->multiplier_, | |||||
| mul_arg->left_shift_, mul_arg->right_shift_) + | |||||
| quant_out->zp_; | |||||
| int8_t out_value; | |||||
| out_value = out_interp_value > INT8_MAX ? INT8_MAX : out_interp_value; | |||||
| out_value = out_value < INT8_MIN ? INT8_MIN : out_value; | |||||
| output_data[offset(output_shape, n, h, w, c)] = out_value; | |||||
| } | |||||
| int ResizeBilinearWithFloatScaleInt8(const int8_t *input_ptr, int8_t *output_ptr, int batch, int in_h, int in_w, | |||||
| int out_h, int out_w, int channel, int index, int count, | |||||
| ResizeFloatScaleQuantArg quant_arg) { | |||||
| int in_plane = in_h * in_w; | |||||
| int out_plane = out_h * out_w; | |||||
| for (int n = 0; n < batch; n++) { | |||||
| const int8_t *in_b_ptr = input_ptr + n * in_plane * channel; | |||||
| int8_t *out_b_ptr = output_ptr + n * out_plane * channel; | |||||
| for (int t = 0; t < count; t++) { | |||||
| int ori_out_h = (index + t) / out_w; | |||||
| int ori_out_w = (index + t) % out_w; | |||||
| int32_t x_lower_value = quant_arg.x_axis_lower_[ori_out_w]; | |||||
| int32_t x_upper_value = quant_arg.x_axis_upper_[ori_out_w]; | |||||
| int32_t y_lower_value = quant_arg.y_axis_lower_[ori_out_h]; | |||||
| int32_t y_upper_value = quant_arg.y_axis_upper_[ori_out_h]; | |||||
| float weight_x = quant_arg.x_axis_index_[ori_out_w] - x_lower_value; | |||||
| float one_minus_weight_x = 1 - weight_x; | |||||
| float weight_y = quant_arg.y_axis_index_[ori_out_h] - y_lower_value; | |||||
| float one_minus_weight_y = 1 - weight_y; | |||||
| float left_bottom_coef = one_minus_weight_x * one_minus_weight_y; | |||||
| float left_top_coef = weight_y * one_minus_weight_x; | |||||
| float right_bottom_coef = weight_x * one_minus_weight_y; | |||||
| float right_top_coef = weight_x * weight_y; | |||||
| int input_lb_index = (y_lower_value * in_w + x_lower_value) * channel; | |||||
| int input_lt_index = (y_upper_value * in_w + x_lower_value) * channel; | |||||
| int input_rb_index = (y_lower_value * in_w + x_upper_value) * channel; | |||||
| int input_rt_index = (y_upper_value * in_w + x_upper_value) * channel; | |||||
| int c = 0; | |||||
| #ifdef ENABLE_ARM | |||||
| for (; c < channel; c += 4) { | |||||
| float32x4_t in_lb; | |||||
| in_lb[0] = (float)in_b_ptr[input_lb_index]; | |||||
| in_lb[1] = (float)in_b_ptr[input_lb_index + 1]; | |||||
| in_lb[2] = (float)in_b_ptr[input_lb_index + 2]; | |||||
| in_lb[3] = (float)in_b_ptr[input_lb_index + 3]; | |||||
| float32x4_t out_left_bottom = vmulq_n_f32(in_lb, left_bottom_coef); | |||||
| float32x4_t in_lt; | |||||
| in_lt[0] = (float)in_b_ptr[input_lt_index]; | |||||
| in_lt[1] = (float)in_b_ptr[input_lt_index + 1]; | |||||
| in_lt[2] = (float)in_b_ptr[input_lt_index + 2]; | |||||
| in_lt[3] = (float)in_b_ptr[input_lt_index + 3]; | |||||
| float32x4_t out_left_top = vmulq_n_f32(in_lt, left_top_coef); | |||||
| float32x4_t in_rb; | |||||
| in_rb[0] = (float)in_b_ptr[input_rb_index]; | |||||
| in_rb[1] = (float)in_b_ptr[input_rb_index + 1]; | |||||
| in_rb[2] = (float)in_b_ptr[input_rb_index + 2]; | |||||
| in_rb[3] = (float)in_b_ptr[input_rb_index + 3]; | |||||
| float32x4_t out_right_bottom = vmulq_n_f32(in_rb, right_bottom_coef); | |||||
| float32x4_t in_rt; | |||||
| in_rt[0] = (float)in_b_ptr[input_rt_index]; | |||||
| in_rt[1] = (float)in_b_ptr[input_rt_index + 1]; | |||||
| in_rt[2] = (float)in_b_ptr[input_rt_index + 2]; | |||||
| in_rt[3] = (float)in_b_ptr[input_rt_index + 3]; | |||||
| float32x4_t out_right_top = vmulq_n_f32(in_rt, right_top_coef); | |||||
| float32x4_t out_value1 = vaddq_f32(out_left_bottom, out_left_top); | |||||
| float32x4_t out_value2 = vaddq_f32(out_right_top, out_right_bottom); | |||||
| float32x4_t out_value = vaddq_f32(out_value1, out_value2); | |||||
| out_b_ptr[0] = (int8_t)(out_value[0]); | |||||
| out_b_ptr[1] = (int8_t)(out_value[1]); | |||||
| out_b_ptr[2] = (int8_t)(out_value[2]); | |||||
| out_b_ptr[3] = (int8_t)(out_value[3]); | |||||
| input_lb_index += 4; | |||||
| input_lt_index += 4; | |||||
| input_rb_index += 4; | |||||
| input_rt_index += 4; | |||||
| out_b_ptr += 4; | |||||
| } | |||||
| #endif | |||||
| for (; c < channel; c++) { | |||||
| float out_left_bottom = left_bottom_coef * in_b_ptr[input_lb_index]; | |||||
| float out_left_top = left_top_coef * in_b_ptr[input_lt_index]; | |||||
| float out_right_bottom = right_bottom_coef * in_b_ptr[input_rb_index]; | |||||
| float out_right_top = right_top_coef * in_b_ptr[input_rt_index]; | |||||
| float out_value = out_left_bottom + out_left_top + out_right_bottom + out_right_top; | |||||
| out_b_ptr[0] = (int8_t)(out_value); | |||||
| input_lb_index++; | |||||
| input_lt_index++; | |||||
| input_rb_index++; | |||||
| input_rt_index++; | |||||
| out_b_ptr++; | |||||
| } | } | ||||
| } | } | ||||
| } | } | ||||
| @@ -175,46 +177,6 @@ int ResizeNearestNeighborInt8Simple(const int8_t *input_data, int8_t *output_dat | |||||
| return NNACL_OK; | return NNACL_OK; | ||||
| } | } | ||||
| void ComputeScale(const int32_t in_value, const int32_t out_value, const bool align_corners, int32_t *scale) { | |||||
| if (out_value == 0) { | |||||
| return; | |||||
| } | |||||
| *scale = (in_value * (1 << 10) + out_value / 2) / out_value; | |||||
| if (align_corners && out_value > 1) { | |||||
| *scale = ((in_value - 1) * (1 << 10) + (out_value - 1) / 2) / (out_value - 1); | |||||
| } | |||||
| } | |||||
| void ComputeInterpolationArgs(const int32_t pos, const int32_t scale, const int32_t size, int32_t *scaled_pos, | |||||
| int32_t *low, int32_t *scaled_low_weight, int32_t *high, int32_t *scaled_high_weight) { | |||||
| *scaled_pos = pos * scale; | |||||
| int scale_back = *scaled_pos / (1 << 10); | |||||
| *low = scale_back > 0 ? scale_back : 0; | |||||
| *scaled_low_weight = (1 << 10) - (*scaled_pos - (1 << 10) * (*low)); | |||||
| *high = scale_back + 1 < size ? scale_back + 1 : size - 1; | |||||
| *scaled_high_weight = *scaled_pos - (1 << 10) * (*low); | |||||
| } | |||||
| int ComputeScaleFloat(const int32_t in_value, const int32_t out_value, const bool align_corners, float *scale) { | |||||
| if (out_value == 0) { | |||||
| return NNACL_ERRCODE_DIVISOR_ZERO; | |||||
| } | |||||
| *scale = (float)in_value / out_value; | |||||
| if (align_corners && out_value > 1) { | |||||
| *scale = (float)(in_value - 1) / (out_value - 1); | |||||
| } | |||||
| return NNACL_OK; | |||||
| } | |||||
| void ComputeInterpolationArgsFloatWeight(const int32_t pos, const float scale, const int32_t size, float *actual_pos, | |||||
| int32_t *low, float *low_weight, int32_t *high, float *high_weight) { | |||||
| *actual_pos = pos * scale; | |||||
| *low = *actual_pos > 0 ? floor(*actual_pos) : 0; | |||||
| *low_weight = 1.0 - (*actual_pos - *low); | |||||
| *high = *low + 1 < size ? *low + 1 : size - 1; | |||||
| *high_weight = *actual_pos - (*low); | |||||
| } | |||||
| void ComputeNearestNeighborInt(const int32_t pos, const int in_size, const int32_t new_size, const bool align_corners, | void ComputeNearestNeighborInt(const int32_t pos, const int in_size, const int32_t new_size, const bool align_corners, | ||||
| int32_t *nearest) { | int32_t *nearest) { | ||||
| if (new_size == 0) { | if (new_size == 0) { | ||||
| @@ -27,23 +27,12 @@ | |||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| extern "C" { | extern "C" { | ||||
| #endif | #endif | ||||
| int ResizeBilinearInt8(const int8_t *input_data, int8_t *output_data, const int *input_shape, const int *output_shape, | |||||
| const bool align_corners, QuantArg *quant_in, QuantArg *quant_out, const QuantMulArg *mul_arg, | |||||
| int tid, int thread_num); | |||||
| int ResizeBilinearInt8(const int8_t *input_ptr, int8_t *output_ptr, int batch, int in_h, int in_w, int out_h, int out_w, | |||||
| int channel, int index, int count, ResizeQuantArg quant_arg); | |||||
| int ResizeBilinearInt8WithFloatWeight(const int8_t *input_data, int8_t *output_data, const int *input_shape, | |||||
| const int *output_shape, const bool align_corners, QuantArg *quant_in, | |||||
| QuantArg *quant_out, const QuantMulArg *mul_arg, int tid, int thread_num); | |||||
| void ComputeScale(const int32_t in_value, const int32_t out_value, const bool align_corners, int32_t *scale); | |||||
| void ComputeInterpolationArgs(const int32_t pos, const int32_t scale, const int32_t size, int32_t *scaled_pos, | |||||
| int32_t *low, int32_t *scaled_low_weight, int32_t *high, int32_t *scaled_high_weight); | |||||
| int ComputeScaleFloat(const int32_t in_value, const int32_t out_value, const bool align_corners, float *scale); | |||||
| void ComputeInterpolationArgsFloatWeight(const int32_t pos, const float scale, const int32_t size, float *actual_pos, | |||||
| int32_t *low, float *low_weight, int32_t *high, float *high_weight); | |||||
| int ResizeBilinearWithFloatScaleInt8(const int8_t *input_ptr, int8_t *output_ptr, int batch, int in_h, int in_w, | |||||
| int out_h, int out_w, int channel, int index, int count, | |||||
| ResizeFloatScaleQuantArg quant_arg); | |||||
| int ResizeNearestNeighborInt8Simple(const int8_t *input_data, int8_t *output_data, const int *input_shape, | int ResizeNearestNeighborInt8Simple(const int8_t *input_data, int8_t *output_data, const int *input_shape, | ||||
| const int *output_shape, const bool align_corners, int tid, int thread_num); | const int *output_shape, const bool align_corners, int tid, int thread_num); | ||||
| @@ -260,6 +260,28 @@ typedef struct LeakyReluQuantArg { | |||||
| int element_num; | int element_num; | ||||
| } LeakyReluQuantArg; | } LeakyReluQuantArg; | ||||
| typedef struct ResizeQuantArg { | |||||
| int32_t ratio_x_; | |||||
| int32_t ratio_y_; | |||||
| int32_t *x_axis_index_; | |||||
| int32_t *x_axis_lower_; | |||||
| int32_t *x_axis_upper_; | |||||
| int32_t *y_axis_index_; | |||||
| int32_t *y_axis_lower_; | |||||
| int32_t *y_axis_upper_; | |||||
| } ResizeQuantArg; | |||||
| typedef struct ResizeFloatScaleQuantArg { | |||||
| float ratio_x_; | |||||
| float ratio_y_; | |||||
| float *x_axis_index_; | |||||
| int32_t *x_axis_lower_; | |||||
| int32_t *x_axis_upper_; | |||||
| float *y_axis_index_; | |||||
| int32_t *y_axis_lower_; | |||||
| int32_t *y_axis_upper_; | |||||
| } ResizeFloatScaleQuantArg; | |||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| extern "C" { | extern "C" { | ||||
| #endif | #endif | ||||
| @@ -16,6 +16,7 @@ | |||||
| #include "src/runtime/kernel/arm/int8/resize_int8.h" | #include "src/runtime/kernel/arm/int8/resize_int8.h" | ||||
| #include <vector> | #include <vector> | ||||
| #include <algorithm> | |||||
| #include "include/errorcode.h" | #include "include/errorcode.h" | ||||
| #include "nnacl/int8/resize_int8.h" | #include "nnacl/int8/resize_int8.h" | ||||
| #include "schema/model_generated.h" | #include "schema/model_generated.h" | ||||
| @@ -32,6 +33,40 @@ using mindspore::lite::RET_OK; | |||||
| using mindspore::lite::KernelRegistrar; | using mindspore::lite::KernelRegistrar; | ||||
| namespace mindspore::kernel { | namespace mindspore::kernel { | ||||
| void ResizeInt8CPUKernel::FreeResizeBiLinear() { | |||||
| free(resize_quant_arg_.x_axis_index_); | |||||
| free(resize_quant_arg_.x_axis_lower_); | |||||
| free(resize_quant_arg_.x_axis_upper_); | |||||
| free(resize_quant_arg_.y_axis_index_); | |||||
| free(resize_quant_arg_.y_axis_lower_); | |||||
| free(resize_quant_arg_.y_axis_upper_); | |||||
| } | |||||
| void ResizeInt8CPUKernel::FreeFloatResizeBiLinear() { | |||||
| free(resize_float_quant_arg_.x_axis_index_); | |||||
| free(resize_float_quant_arg_.x_axis_lower_); | |||||
| free(resize_float_quant_arg_.x_axis_upper_); | |||||
| free(resize_float_quant_arg_.y_axis_index_); | |||||
| free(resize_float_quant_arg_.y_axis_lower_); | |||||
| free(resize_float_quant_arg_.y_axis_upper_); | |||||
| } | |||||
| ResizeInt8CPUKernel::~ResizeInt8CPUKernel() { | |||||
| if (method_ == schema::ResizeMethod_LINEAR) { | |||||
| if (quant_in_->zp_ == 0) { | |||||
| FreeResizeBiLinear(); | |||||
| } else { | |||||
| FreeFloatResizeBiLinear(); | |||||
| } | |||||
| } | |||||
| delete quant_out_; | |||||
| quant_out_ = nullptr; | |||||
| delete quant_in_; | |||||
| quant_in_ = nullptr; | |||||
| delete multiplier_; | |||||
| multiplier_ = nullptr; | |||||
| } | |||||
| int ResizeInt8CPUKernel::Init() { | int ResizeInt8CPUKernel::Init() { | ||||
| auto ret = ResizeBaseCPUKernel::Init(); | auto ret = ResizeBaseCPUKernel::Init(); | ||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| @@ -58,6 +93,195 @@ int ResizeInt8CPUKernel::Init() { | |||||
| return ReSize(); | return ReSize(); | ||||
| } | } | ||||
| int ResizeInt8CPUKernel::InitResizeQuantArg() { | |||||
| auto out_shape = out_tensors_.front()->shape(); | |||||
| resize_quant_arg_.x_axis_index_ = reinterpret_cast<int32_t *>(malloc(out_shape[2] * sizeof(int32_t))); | |||||
| if (resize_quant_arg_.x_axis_index_ == nullptr) { | |||||
| MS_LOG(ERROR) << "malloc x axis index array failed."; | |||||
| return RET_ERROR; | |||||
| } | |||||
| resize_quant_arg_.x_axis_lower_ = reinterpret_cast<int32_t *>(malloc(out_shape[2] * sizeof(int32_t))); | |||||
| if (resize_quant_arg_.x_axis_lower_ == nullptr) { | |||||
| MS_LOG(ERROR) << "malloc x_axis_lower_ array failed."; | |||||
| return RET_ERROR; | |||||
| } | |||||
| resize_quant_arg_.x_axis_upper_ = reinterpret_cast<int32_t *>(malloc(out_shape[2] * sizeof(int32_t))); | |||||
| if (resize_quant_arg_.x_axis_upper_ == nullptr) { | |||||
| MS_LOG(ERROR) << "malloc x_axis_upper_ array failed."; | |||||
| return RET_ERROR; | |||||
| } | |||||
| resize_quant_arg_.y_axis_index_ = reinterpret_cast<int32_t *>(malloc(out_shape[1] * sizeof(int32_t))); | |||||
| if (resize_quant_arg_.y_axis_index_ == nullptr) { | |||||
| MS_LOG(ERROR) << "malloc y_axis_index_ array failed."; | |||||
| return RET_ERROR; | |||||
| } | |||||
| resize_quant_arg_.y_axis_lower_ = reinterpret_cast<int32_t *>(malloc(out_shape[1] * sizeof(int32_t))); | |||||
| if (resize_quant_arg_.y_axis_lower_ == nullptr) { | |||||
| MS_LOG(ERROR) << "malloc y_axis_lower_ array failed."; | |||||
| return RET_ERROR; | |||||
| } | |||||
| resize_quant_arg_.y_axis_upper_ = reinterpret_cast<int32_t *>(malloc(out_shape[1] * sizeof(int32_t))); | |||||
| if (resize_quant_arg_.y_axis_upper_ == nullptr) { | |||||
| MS_LOG(ERROR) << "malloc y_axis_upper_ array failed."; | |||||
| return RET_ERROR; | |||||
| } | |||||
| return RET_OK; | |||||
| } | |||||
| int ResizeInt8CPUKernel::CalRatio() { | |||||
| auto in_tensor = in_tensors_.front(); | |||||
| auto in_width = in_tensor->Width(); | |||||
| auto in_height = in_tensor->Height(); | |||||
| auto out_tensor = out_tensors_.front(); | |||||
| auto out_width = out_tensor->Width(); | |||||
| auto out_height = out_tensor->Height(); | |||||
| resize_quant_arg_.ratio_x_ = ((1 << 10) * in_width + out_width / 2) / out_width; | |||||
| resize_quant_arg_.ratio_y_ = ((1 << 10) * in_height + out_height / 2) / out_height; | |||||
| if (align_corners_ && out_width > 1) { | |||||
| resize_quant_arg_.ratio_x_ = ((1 << 10) * (in_width - 1) + (out_width - 1) / 2) / (out_width - 1); | |||||
| } | |||||
| if (align_corners_ && out_height > 1) { | |||||
| resize_quant_arg_.ratio_y_ = ((1 << 10) * (in_height - 1) + (out_height - 1) / 2) / (out_height - 1); | |||||
| } | |||||
| return RET_OK; | |||||
| } | |||||
| int ResizeInt8CPUKernel::CalInterpolationRange() { | |||||
| for (int i = 0; i < out_tensors_.front()->Height(); ++i) { | |||||
| int32_t scaled_index = i * resize_quant_arg_.ratio_y_; | |||||
| resize_quant_arg_.y_axis_index_[i] = scaled_index; | |||||
| resize_quant_arg_.y_axis_lower_[i] = std::max(scaled_index / (1 << 10), 0); | |||||
| resize_quant_arg_.y_axis_upper_[i] = std::min(scaled_index / (1 << 10) + 1, in_tensors_.front()->Height() - 1); | |||||
| } | |||||
| for (int i = 0; i < out_tensors_.front()->Width(); ++i) { | |||||
| int32_t scaled_index = i * resize_quant_arg_.ratio_x_; | |||||
| resize_quant_arg_.x_axis_index_[i] = scaled_index; | |||||
| resize_quant_arg_.x_axis_lower_[i] = std::max(scaled_index / (1 << 10), 0); | |||||
| resize_quant_arg_.x_axis_upper_[i] = std::min(scaled_index / (1 << 10) + 1, in_tensors_.front()->Width() - 1); | |||||
| } | |||||
| return RET_OK; | |||||
| } | |||||
| int ResizeInt8CPUKernel::InitResizeFloatQuantArg() { | |||||
| auto out_shape = out_tensors_.front()->shape(); | |||||
| resize_float_quant_arg_.x_axis_index_ = reinterpret_cast<float *>(malloc(out_shape[2] * sizeof(float))); | |||||
| if (resize_float_quant_arg_.x_axis_index_ == nullptr) { | |||||
| MS_LOG(ERROR) << "malloc x axis index array failed."; | |||||
| return RET_ERROR; | |||||
| } | |||||
| resize_float_quant_arg_.x_axis_lower_ = reinterpret_cast<int32_t *>(malloc(out_shape[2] * sizeof(int32_t))); | |||||
| if (resize_float_quant_arg_.x_axis_lower_ == nullptr) { | |||||
| MS_LOG(ERROR) << "malloc x_axis_lower_ array failed."; | |||||
| return RET_ERROR; | |||||
| } | |||||
| resize_float_quant_arg_.x_axis_upper_ = reinterpret_cast<int32_t *>(malloc(out_shape[2] * sizeof(int32_t))); | |||||
| if (resize_float_quant_arg_.x_axis_upper_ == nullptr) { | |||||
| MS_LOG(ERROR) << "malloc x_axis_upper_ array failed."; | |||||
| return RET_ERROR; | |||||
| } | |||||
| resize_float_quant_arg_.y_axis_index_ = reinterpret_cast<float *>(malloc(out_shape[1] * sizeof(float))); | |||||
| if (resize_float_quant_arg_.y_axis_index_ == nullptr) { | |||||
| MS_LOG(ERROR) << "malloc y_axis_index_ array failed."; | |||||
| return RET_ERROR; | |||||
| } | |||||
| resize_float_quant_arg_.y_axis_lower_ = reinterpret_cast<int32_t *>(malloc(out_shape[1] * sizeof(int32_t))); | |||||
| if (resize_float_quant_arg_.y_axis_lower_ == nullptr) { | |||||
| MS_LOG(ERROR) << "malloc y_axis_lower_ array failed."; | |||||
| return RET_ERROR; | |||||
| } | |||||
| resize_float_quant_arg_.y_axis_upper_ = reinterpret_cast<int32_t *>(malloc(out_shape[1] * sizeof(int32_t))); | |||||
| if (resize_float_quant_arg_.y_axis_upper_ == nullptr) { | |||||
| MS_LOG(ERROR) << "malloc y_axis_upper_ array failed."; | |||||
| return RET_ERROR; | |||||
| } | |||||
| return RET_OK; | |||||
| } | |||||
| int ResizeInt8CPUKernel::CalFloatRatio() { | |||||
| auto in_tensor = in_tensors_.front(); | |||||
| auto in_width = in_tensor->Width(); | |||||
| auto in_height = in_tensor->Height(); | |||||
| auto out_tensor = out_tensors_.front(); | |||||
| auto out_width = out_tensor->Width(); | |||||
| auto out_height = out_tensor->Height(); | |||||
| resize_float_quant_arg_.ratio_x_ = static_cast<float>(in_width) / out_width; | |||||
| resize_float_quant_arg_.ratio_y_ = static_cast<float>(in_height) / out_height; | |||||
| if (align_corners_ && out_width > 1) { | |||||
| resize_float_quant_arg_.ratio_x_ = static_cast<float>(in_width - 1) / (out_width - 1); | |||||
| } | |||||
| if (align_corners_ && out_height > 1) { | |||||
| resize_float_quant_arg_.ratio_y_ = static_cast<float>(in_height - 1) / (out_height - 1); | |||||
| } | |||||
| return RET_OK; | |||||
| } | |||||
| int ResizeInt8CPUKernel::CalFloatInterpolationRange() { | |||||
| for (int i = 0; i < out_tensors_.front()->Height(); ++i) { | |||||
| float scaled_index = i * resize_float_quant_arg_.ratio_y_; | |||||
| int lower_index = std::floor(scaled_index); | |||||
| resize_float_quant_arg_.y_axis_index_[i] = scaled_index; | |||||
| resize_float_quant_arg_.y_axis_lower_[i] = std::max(lower_index, 0); | |||||
| resize_float_quant_arg_.y_axis_upper_[i] = std::min(lower_index + 1, in_tensors_.front()->Height() - 1); | |||||
| } | |||||
| for (int i = 0; i < out_tensors_.front()->Width(); ++i) { | |||||
| float scaled_index = i * resize_float_quant_arg_.ratio_x_; | |||||
| int lower_index = std::floor(scaled_index); | |||||
| resize_float_quant_arg_.x_axis_index_[i] = scaled_index; | |||||
| resize_float_quant_arg_.x_axis_lower_[i] = std::max(lower_index, 0); | |||||
| resize_float_quant_arg_.x_axis_upper_[i] = std::min(lower_index + 1, in_tensors_.front()->Width() - 1); | |||||
| } | |||||
| return RET_OK; | |||||
| } | |||||
| int ResizeInt8CPUKernel::InitResizeBiLinear() { | |||||
| auto ret = InitResizeQuantArg(); | |||||
| if (ret != RET_OK) { | |||||
| MS_LOG(ERROR) << "Resize Int8 Op Resize Failed."; | |||||
| return ret; | |||||
| } | |||||
| ret = CalRatio(); | |||||
| if (ret != RET_OK) { | |||||
| MS_LOG(ERROR) << "Cal ratio Failed."; | |||||
| return ret; | |||||
| } | |||||
| ret = CalInterpolationRange(); | |||||
| if (ret != RET_OK) { | |||||
| MS_LOG(ERROR) << "Cal range of interpolation Failed."; | |||||
| return ret; | |||||
| } | |||||
| return RET_OK; | |||||
| } | |||||
| int ResizeInt8CPUKernel::InitFloatResizeBiLinear() { | |||||
| auto ret = InitResizeFloatQuantArg(); | |||||
| if (ret != RET_OK) { | |||||
| MS_LOG(ERROR) << "Resize Int8 Op Resize Failed."; | |||||
| return ret; | |||||
| } | |||||
| ret = CalFloatRatio(); | |||||
| if (ret != RET_OK) { | |||||
| MS_LOG(ERROR) << "Cal ratio Failed."; | |||||
| return ret; | |||||
| } | |||||
| ret = CalFloatInterpolationRange(); | |||||
| if (ret != RET_OK) { | |||||
| MS_LOG(ERROR) << "Cal range of interpolation Failed."; | |||||
| return ret; | |||||
| } | |||||
| return RET_OK; | |||||
| } | |||||
| int ResizeInt8CPUKernel::ReSize() { | |||||
| if (method_ == schema::ResizeMethod_LINEAR) { | |||||
| if (quant_in_->zp_ == 0) { | |||||
| return InitResizeBiLinear(); | |||||
| } else { | |||||
| return InitFloatResizeBiLinear(); | |||||
| } | |||||
| } | |||||
| return RET_OK; | |||||
| } | |||||
| int ResizeInt8Impl(void *cdata, int task_id) { | int ResizeInt8Impl(void *cdata, int task_id) { | ||||
| auto resize = reinterpret_cast<ResizeInt8CPUKernel *>(cdata); | auto resize = reinterpret_cast<ResizeInt8CPUKernel *>(cdata); | ||||
| auto error_code = resize->RunImpl(task_id); | auto error_code = resize->RunImpl(task_id); | ||||
| @@ -87,14 +311,24 @@ int ResizeInt8CPUKernel::RunImpl(int task_id) { | |||||
| int ret = 0; | int ret = 0; | ||||
| switch (method_) { | switch (method_) { | ||||
| case static_cast<int>(schema::ResizeMethod_LINEAR): { | case static_cast<int>(schema::ResizeMethod_LINEAR): { | ||||
| auto out_tensor = out_tensors_.front(); | |||||
| auto out_c = out_tensor->Channel(); | |||||
| int plane = out_tensor->Height() * out_tensor->Width(); | |||||
| int num = UP_DIV(plane, context_->thread_num_); | |||||
| int start_index = task_id * num; | |||||
| int count = plane - start_index; | |||||
| count = count > num ? num : count; | |||||
| auto out_ptr = output_data + start_index * out_c; | |||||
| if (quant_in_->zp_ == 0) { | if (quant_in_->zp_ == 0) { | ||||
| ret = ResizeBilinearInt8(input_data, output_data, input_shape.data(), out_tensors_[0]->shape().data(), | |||||
| align_corners_, quant_in_, quant_out_, multiplier_, task_id, context_->thread_num_); | |||||
| ret = | |||||
| ResizeBilinearInt8(input_data, out_ptr, out_tensor->Batch(), input->Height(), input->Width(), | |||||
| out_tensor->Height(), out_tensor->Width(), out_c, start_index, count, resize_quant_arg_); | |||||
| } else { | } else { | ||||
| ret = ResizeBilinearInt8WithFloatWeight(input_data, output_data, input_shape.data(), | |||||
| out_tensors_[0]->shape().data(), align_corners_, quant_in_, quant_out_, | |||||
| multiplier_, task_id, context_->thread_num_); | |||||
| ret = ResizeBilinearWithFloatScaleInt8(input_data, out_ptr, out_tensor->Batch(), input->Height(), | |||||
| input->Width(), out_tensor->Height(), out_tensor->Width(), out_c, | |||||
| start_index, count, resize_float_quant_arg_); | |||||
| } | } | ||||
| break; | break; | ||||
| } | } | ||||
| case static_cast<int>(schema::ResizeMethod_NEAREST): { | case static_cast<int>(schema::ResizeMethod_NEAREST): { | ||||
| @@ -32,17 +32,20 @@ class ResizeInt8CPUKernel : public ResizeBaseCPUKernel { | |||||
| const mindspore::lite::PrimitiveC *primitive) | const mindspore::lite::PrimitiveC *primitive) | ||||
| : ResizeBaseCPUKernel(parameter, inputs, outputs, ctx, primitive) {} | : ResizeBaseCPUKernel(parameter, inputs, outputs, ctx, primitive) {} | ||||
| ~ResizeInt8CPUKernel() { | |||||
| delete quant_out_; | |||||
| quant_out_ = nullptr; | |||||
| delete quant_in_; | |||||
| quant_in_ = nullptr; | |||||
| delete multiplier_; | |||||
| multiplier_ = nullptr; | |||||
| } | |||||
| ~ResizeInt8CPUKernel() override; | |||||
| int Init() override; | int Init() override; | ||||
| int ReSize() override { return 0; }; | |||||
| int ReSize() override; | |||||
| int InitResizeBiLinear(); | |||||
| int InitFloatResizeBiLinear(); | |||||
| int InitResizeQuantArg(); | |||||
| int CalRatio(); | |||||
| int CalInterpolationRange(); | |||||
| void FreeResizeBiLinear(); | |||||
| int InitResizeFloatQuantArg(); | |||||
| int CalFloatRatio(); | |||||
| int CalFloatInterpolationRange(); | |||||
| void FreeFloatResizeBiLinear(); | |||||
| int Run() override; | int Run() override; | ||||
| int RunImpl(int task_id); | int RunImpl(int task_id); | ||||
| @@ -50,6 +53,8 @@ class ResizeInt8CPUKernel : public ResizeBaseCPUKernel { | |||||
| QuantArg *quant_in_; | QuantArg *quant_in_; | ||||
| QuantArg *quant_out_; | QuantArg *quant_out_; | ||||
| QuantMulArg *multiplier_; | QuantMulArg *multiplier_; | ||||
| ResizeQuantArg resize_quant_arg_; | |||||
| ResizeFloatScaleQuantArg resize_float_quant_arg_; | |||||
| }; | }; | ||||
| } // namespace mindspore::kernel | } // namespace mindspore::kernel | ||||