From: @zhaozhenlong Reviewed-by: @zhanghaibo5,@zhang_xue_tong Signed-off-by: @zhang_xue_tongpull/15694/MERGE
| @@ -15,11 +15,11 @@ | |||||
| */ | */ | ||||
| #include "nnacl/base/slice_base.h" | #include "nnacl/base/slice_base.h" | ||||
| #include <string.h> | #include <string.h> | ||||
| void PadSliceParameterTo4D(SliceParameter *param) { | |||||
| int32_t begin[DIMENSION_4D]; | |||||
| int32_t end[DIMENSION_4D]; | |||||
| int32_t slice_size[DIMENSION_4D]; | |||||
| int32_t data_shape[DIMENSION_4D]; | |||||
| void PadSliceParameterTo8D(SliceParameter *param) { | |||||
| int32_t begin[DIMENSION_8D]; | |||||
| int32_t end[DIMENSION_8D]; | |||||
| int32_t slice_size[DIMENSION_8D]; | |||||
| int32_t data_shape[DIMENSION_8D]; | |||||
| for (int32_t i = 0; i < param->param_length_; ++i) { | for (int32_t i = 0; i < param->param_length_; ++i) { | ||||
| begin[i] = param->begin_[i]; | begin[i] = param->begin_[i]; | ||||
| end[i] = param->end_[i]; | end[i] = param->end_[i]; | ||||
| @@ -27,7 +27,7 @@ void PadSliceParameterTo4D(SliceParameter *param) { | |||||
| data_shape[i] = param->shape_[i]; | data_shape[i] = param->shape_[i]; | ||||
| } | } | ||||
| int32_t real_index = param->param_length_ - 1; | int32_t real_index = param->param_length_ - 1; | ||||
| for (int32_t i = DIMENSION_4D - 1; i >= 0; --i) { | |||||
| for (int32_t i = DIMENSION_8D - 1; i >= 0; --i) { | |||||
| if (real_index >= 0) { | if (real_index >= 0) { | ||||
| param->begin_[i] = begin[real_index]; | param->begin_[i] = begin[real_index]; | ||||
| param->end_[i] = end[real_index]; | param->end_[i] = end[real_index]; | ||||
| @@ -40,39 +40,55 @@ void PadSliceParameterTo4D(SliceParameter *param) { | |||||
| param->shape_[i] = 1; | param->shape_[i] = 1; | ||||
| } | } | ||||
| } | } | ||||
| param->param_length_ = DIMENSION_4D; | |||||
| param->param_length_ = DIMENSION_8D; | |||||
| } | } | ||||
| void DoSlice(const void *input, void *output, SliceParameter *param, int thread_id, int data_size) { | void DoSlice(const void *input, void *output, SliceParameter *param, int thread_id, int data_size) { | ||||
| int8_t *int8_in = (int8_t *)input; | int8_t *int8_in = (int8_t *)input; | ||||
| int8_t *int8_out = (int8_t *)output; | int8_t *int8_out = (int8_t *)output; | ||||
| int32_t out_dim1 = param->size_[1]; | |||||
| int32_t out_dim2 = param->size_[2]; | |||||
| int32_t out_dim3 = param->size_[3]; | |||||
| size_t out_stride2 = out_dim3; | |||||
| size_t out_stride1 = out_stride2 * out_dim2; | |||||
| size_t out_stride0 = out_stride1 * out_dim1; | |||||
| size_t count_per_thread = UP_DIV(out_dim1, param->op_parameter_.thread_num_); | |||||
| size_t thread_stride = thread_id * count_per_thread; | |||||
| size_t copy_size = param->size_[3] * data_size; | |||||
| size_t in_stride2 = param->shape_[3]; | |||||
| size_t in_stride1 = param->shape_[2] * in_stride2; | |||||
| size_t in_stride0 = param->shape_[1] * in_stride1; | |||||
| for (int i = 0; i < param->size_[0]; ++i) { | |||||
| size_t out_offset0 = i * out_stride0; | |||||
| size_t in_offset0 = (i + param->begin_[0]) * in_stride0 + param->begin_[3]; | |||||
| for (size_t j = 0; j < count_per_thread; ++j) { | |||||
| size_t k = j + thread_stride; | |||||
| if (k >= out_dim1) { | |||||
| break; | |||||
| } | |||||
| size_t out_offset1 = k * out_stride1 + out_offset0; | |||||
| size_t in_offset1 = (k + param->begin_[1]) * in_stride1 + in_offset0; | |||||
| for (int l = 0; l < out_dim2; ++l) { | |||||
| size_t out_offset = out_offset1 + l * out_stride2; | |||||
| size_t in_offset = in_offset1 + (l + param->begin_[2]) * in_stride2; | |||||
| memcpy(int8_out + out_offset * data_size, int8_in + in_offset * data_size, copy_size); | |||||
| size_t out_stride[8]; | |||||
| out_stride[7] = 1; | |||||
| for (int i = 6; i >= 0; --i) { | |||||
| out_stride[i] = out_stride[i + 1] * param->size_[i + 1]; | |||||
| } | |||||
| size_t count_per_thread = UP_DIV(param->size_[5], param->op_parameter_.thread_num_); | |||||
| size_t thread_begin = thread_id * count_per_thread; | |||||
| size_t thread_end = MSMIN(param->size_[5], thread_begin + count_per_thread); | |||||
| size_t copy_size = param->size_[7] * data_size; | |||||
| size_t in_stride[8]; | |||||
| in_stride[7] = 1; | |||||
| for (int i = 6; i >= 0; --i) { | |||||
| in_stride[i] = param->shape_[i + 1] * in_stride[i + 1]; | |||||
| } | |||||
| for (int ii = 0; ii < param->size_[0]; ++ii) { | |||||
| size_t out_offset0 = ii * out_stride[0]; | |||||
| size_t in_offset0 = (ii + param->begin_[0]) * in_stride[0] + param->begin_[7]; | |||||
| for (int jj = 0; jj < param->size_[1]; ++jj) { | |||||
| size_t out_offset1 = jj * out_stride[1] + out_offset0; | |||||
| size_t in_offset1 = (jj + param->begin_[1]) * in_stride[1] + in_offset0; | |||||
| for (int kk = 0; kk < param->size_[2]; ++kk) { | |||||
| size_t out_offset2 = kk * out_stride[2] + out_offset1; | |||||
| size_t in_offset2 = (kk + param->begin_[2]) * in_stride[2] + in_offset1; | |||||
| for (int ll = 0; ll < param->size_[3]; ++ll) { | |||||
| size_t out_offset3 = ll * out_stride[3] + out_offset2; | |||||
| size_t in_offset3 = (ll + param->begin_[3]) * in_stride[3] + in_offset2; | |||||
| for (int i = 0; i < param->size_[4]; ++i) { | |||||
| size_t out_offset4 = i * out_stride[4] + out_offset3; | |||||
| size_t in_offset4 = (i + param->begin_[4]) * in_stride[4] + in_offset3; | |||||
| for (size_t j = thread_begin; j < thread_end; ++j) { | |||||
| size_t out_offset5 = j * out_stride[5] + out_offset4; | |||||
| size_t in_offset5 = (j + param->begin_[5]) * in_stride[5] + in_offset4; | |||||
| for (int k = 0; k < param->size_[6]; ++k) { | |||||
| size_t out_offset6 = k * out_stride[6] + out_offset5; | |||||
| size_t in_offset6 = (k + param->begin_[6]) * in_stride[6] + in_offset5; | |||||
| memcpy(int8_out + out_offset6 * data_size, int8_in + in_offset6 * data_size, copy_size); | |||||
| } | |||||
| } | |||||
| } | |||||
| } | |||||
| } | } | ||||
| } | } | ||||
| } | } | ||||
| @@ -82,19 +98,34 @@ void DoSliceNoParallel(const void *input, void *output, SliceParameter *param, i | |||||
| int8_t *int8_in = (int8_t *)input; | int8_t *int8_in = (int8_t *)input; | ||||
| int8_t *int8_out = (int8_t *)output; | int8_t *int8_out = (int8_t *)output; | ||||
| size_t copy_size = param->size_[3] * data_size; | |||||
| size_t in_stride2 = param->shape_[3]; | |||||
| size_t in_stride1 = param->shape_[2] * in_stride2; | |||||
| size_t in_stride0 = param->shape_[1] * in_stride1; | |||||
| size_t copy_size = param->size_[7] * data_size; | |||||
| size_t in_stride[8]; | |||||
| in_stride[7] = 1; | |||||
| for (int i = 6; i >= 0; --i) { | |||||
| in_stride[i] = param->shape_[i + 1] * in_stride[i + 1]; | |||||
| } | |||||
| size_t out_offset = 0; | size_t out_offset = 0; | ||||
| for (int32_t dim0 = param->begin_[0]; dim0 < param->end_[0]; ++dim0) { | for (int32_t dim0 = param->begin_[0]; dim0 < param->end_[0]; ++dim0) { | ||||
| size_t in_offset0 = dim0 * in_stride0 + param->begin_[3]; | |||||
| size_t in_offset0 = dim0 * in_stride[0] + param->begin_[7]; | |||||
| for (size_t dim1 = param->begin_[1]; dim1 < param->end_[1]; ++dim1) { | for (size_t dim1 = param->begin_[1]; dim1 < param->end_[1]; ++dim1) { | ||||
| size_t in_offset1 = dim1 * in_stride1 + in_offset0; | |||||
| size_t in_offset1 = dim1 * in_stride[1] + in_offset0; | |||||
| for (int32_t dim2 = param->begin_[2]; dim2 < param->end_[2]; ++dim2) { | for (int32_t dim2 = param->begin_[2]; dim2 < param->end_[2]; ++dim2) { | ||||
| size_t in_offset = in_offset1 + dim2 * in_stride2; | |||||
| memcpy(int8_out + out_offset * data_size, int8_in + in_offset * data_size, copy_size); | |||||
| out_offset += param->size_[3]; | |||||
| size_t in_offset2 = in_offset1 + dim2 * in_stride[2]; | |||||
| for (int32_t dim3 = param->begin_[3]; dim3 < param->end_[3]; ++dim3) { | |||||
| size_t in_offset3 = in_offset2 + dim3 * in_stride[3]; | |||||
| for (int32_t dim4 = param->begin_[4]; dim4 < param->end_[4]; ++dim4) { | |||||
| size_t in_offset4 = in_offset3 + dim4 * in_stride[4]; | |||||
| for (int32_t dim5 = param->begin_[5]; dim5 < param->end_[5]; ++dim5) { | |||||
| size_t in_offset5 = in_offset4 + dim5 * in_stride[5]; | |||||
| for (int32_t dim6 = param->begin_[6]; dim6 < param->end_[6]; ++dim6) { | |||||
| size_t in_offset6 = in_offset5 + dim6 * in_stride[6]; | |||||
| memcpy(int8_out + out_offset * data_size, int8_in + in_offset6 * data_size, copy_size); | |||||
| out_offset += param->size_[7]; | |||||
| } | |||||
| } | |||||
| } | |||||
| } | |||||
| } | } | ||||
| } | } | ||||
| } | } | ||||
| @@ -23,7 +23,7 @@ | |||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| extern "C" { | extern "C" { | ||||
| #endif | #endif | ||||
| void PadSliceParameterTo4D(SliceParameter *param); | |||||
| void PadSliceParameterTo8D(SliceParameter *param); | |||||
| void DoSlice(const void *input, void *output, SliceParameter *param, int thread_id, int data_size); | void DoSlice(const void *input, void *output, SliceParameter *param, int thread_id, int data_size); | ||||
| void DoSliceNoParallel(const void *input, void *output, SliceParameter *param, int data_size); | void DoSliceNoParallel(const void *input, void *output, SliceParameter *param, int data_size); | ||||
| @@ -16,104 +16,77 @@ | |||||
| #include "nnacl/int8/slice_int8.h" | #include "nnacl/int8/slice_int8.h" | ||||
| int SliceInt8NoParallel(const int8_t *input, int8_t *output, SliceParameter *param) { | |||||
| int SliceInt8(const int8_t *input, int8_t *output, SliceParameter *param, int thread_id) { | |||||
| double input_scale = param->quant_arg_.in_args_.scale_; | double input_scale = param->quant_arg_.in_args_.scale_; | ||||
| int input_zp = param->quant_arg_.in_args_.zp_; | int input_zp = param->quant_arg_.in_args_.zp_; | ||||
| double output_scale = param->quant_arg_.out_args_.scale_; | double output_scale = param->quant_arg_.out_args_.scale_; | ||||
| int output_zp = param->quant_arg_.out_args_.zp_; | int output_zp = param->quant_arg_.out_args_.zp_; | ||||
| const int base_offset = 20; | |||||
| int act_min = param->quant_arg_.output_activation_min_; | int act_min = param->quant_arg_.output_activation_min_; | ||||
| int act_max = param->quant_arg_.output_activation_max_; | int act_max = param->quant_arg_.output_activation_max_; | ||||
| int equal_quant = 0; | |||||
| double multiplier = input_scale / output_scale; | |||||
| if (input_scale == output_scale && input_zp == output_zp) { | |||||
| equal_quant = 1; | |||||
| size_t out_stride[8]; | |||||
| out_stride[7] = 1; | |||||
| for (int i = 6; i >= 0; --i) { | |||||
| out_stride[i] = out_stride[i + 1] * param->size_[i + 1]; | |||||
| } | } | ||||
| int32_t end_n = param->begin_[0] + param->size_[0]; | |||||
| int32_t end_h = param->begin_[1] + param->size_[1]; | |||||
| int32_t end_w = param->begin_[2] + param->size_[2]; | |||||
| int unit_count = param->size_[3]; | |||||
| int unit_size = unit_count * sizeof(int8_t); | |||||
| int in_stride2 = param->shape_[3]; | |||||
| int in_stride1 = param->shape_[2] * in_stride2; | |||||
| int in_stride0 = param->shape_[1] * in_stride1; | |||||
| int out_offset = 0; | |||||
| int n, h, w, c; | |||||
| for (n = param->begin_[0]; n < end_n; ++n) { | |||||
| size_t in_offset0 = n * in_stride0; | |||||
| for (h = param->begin_[1]; h < end_h; ++h) { | |||||
| size_t in_offset1 = h * in_stride1 + in_offset0; | |||||
| for (w = param->begin_[2]; w < end_w; ++w) { | |||||
| size_t in_offset = in_offset1 + w * in_stride2; | |||||
| if (equal_quant == 1) { | |||||
| memcpy(output + out_offset, input + in_offset, unit_size); | |||||
| } else { | |||||
| for (c = 0; c < unit_count; ++c) { | |||||
| int32_t output_val = round(multiplier * (input[in_offset + c] - input_zp)) + output_zp; | |||||
| output[c + out_offset] = (int8_t)MSMAX(act_min, MSMIN(output_val, act_max)); | |||||
| } | |||||
| } | |||||
| out_offset += unit_count; | |||||
| } | |||||
| } | |||||
| int count_per_thread = UP_DIV(param->size_[5], param->op_parameter_.thread_num_); | |||||
| size_t thread_begin = thread_id * count_per_thread; | |||||
| size_t thread_end = MSMIN(param->size_[5], thread_begin + count_per_thread); | |||||
| int unit_size = param->size_[7] * sizeof(int8_t); | |||||
| size_t in_stride[8]; | |||||
| in_stride[7] = 1; | |||||
| for (int i = 6; i >= 0; --i) { | |||||
| in_stride[i] = param->shape_[i + 1] * in_stride[i + 1]; | |||||
| } | } | ||||
| return 0; | |||||
| } | |||||
| int SliceInt8(const int8_t *input, int8_t *output, SliceParameter *param, int thread_id) { | |||||
| double input_scale = param->quant_arg_.in_args_.scale_; | |||||
| int input_zp = param->quant_arg_.in_args_.zp_; | |||||
| double output_scale = param->quant_arg_.out_args_.scale_; | |||||
| int output_zp = param->quant_arg_.out_args_.zp_; | |||||
| int act_min = param->quant_arg_.output_activation_min_; | |||||
| int act_max = param->quant_arg_.output_activation_max_; | |||||
| int32_t out_dim1 = param->size_[1]; | |||||
| int32_t out_dim2 = param->size_[2]; | |||||
| int32_t out_dim3 = param->size_[3]; | |||||
| int out_stride2 = out_dim3; | |||||
| int out_stride1 = out_stride2 * out_dim2; | |||||
| int out_stride0 = out_stride1 * out_dim1; | |||||
| int count_per_thread = UP_DIV(out_dim1, param->op_parameter_.thread_num_); | |||||
| int thread_stride = thread_id * count_per_thread; | |||||
| int unit_size = param->size_[3] * sizeof(int8_t); | |||||
| int in_stride2 = param->shape_[3]; | |||||
| int in_stride1 = param->shape_[2] * in_stride2; | |||||
| int in_stride0 = param->shape_[1] * in_stride1; | |||||
| int n, h, w, c; | |||||
| int i, j, k, l, n, h, w, c; | |||||
| int equal_quant = 0; | int equal_quant = 0; | ||||
| double multiplier = input_scale / output_scale; | |||||
| if (input_scale == output_scale && input_zp == output_zp) { | if (input_scale == output_scale && input_zp == output_zp) { | ||||
| equal_quant = 1; | equal_quant = 1; | ||||
| } | } | ||||
| for (n = 0; n < param->size_[0]; ++n) { | |||||
| size_t out_offset0 = n * out_stride0; | |||||
| size_t in_offset0 = (n + param->begin_[0]) * in_stride0 + param->begin_[3]; | |||||
| for (h = 0; h < count_per_thread; ++h) { | |||||
| size_t k = h + thread_stride; | |||||
| if (k >= out_dim1) { | |||||
| break; | |||||
| } | |||||
| size_t out_offset1 = k * out_stride1 + out_offset0; | |||||
| size_t in_offset1 = (k + param->begin_[1]) * in_stride1 + in_offset0; | |||||
| for (w = 0; w < out_dim2; ++w) { | |||||
| size_t out_offset = out_offset1 + w * out_stride2; | |||||
| size_t in_offset = in_offset1 + (w + param->begin_[2]) * in_stride2; | |||||
| if (equal_quant == 1) { | |||||
| memcpy(output + out_offset, input + in_offset, unit_size); | |||||
| } else { | |||||
| for (c = 0; c < out_dim3; ++c) { | |||||
| int32_t output_val = round(multiplier * (input[in_offset + c] - input_zp)) + output_zp; | |||||
| output[c + out_offset] = (int8_t)MSMAX(act_min, MSMIN(output_val, act_max)); | |||||
| for (i = 0; i < param->size_[0]; ++i) { | |||||
| size_t out_offset0 = i * out_stride[0]; | |||||
| size_t in_offset0 = (i + param->begin_[0]) * in_stride[0] + param->begin_[7]; | |||||
| for (j = 0; j < param->size_[1]; ++j) { | |||||
| size_t out_offset1 = j * out_stride[1] + out_offset0; | |||||
| size_t in_offset1 = (j + param->begin_[1]) * in_stride[1] + in_offset0; | |||||
| for (k = 0; k < param->size_[2]; ++k) { | |||||
| size_t out_offset2 = k * out_stride[2] + out_offset1; | |||||
| size_t in_offset2 = (k + param->begin_[2]) * in_stride[2] + in_offset1; | |||||
| for (l = 0; l < param->size_[3]; ++l) { | |||||
| size_t out_offset3 = l * out_stride[3] + out_offset2; | |||||
| size_t in_offset3 = (l + param->begin_[3]) * in_stride[3] + in_offset2; | |||||
| for (n = 0; n < param->size_[4]; ++n) { | |||||
| size_t out_offset4 = n * out_stride[4] + out_offset3; | |||||
| size_t in_offset4 = (n + param->begin_[4]) * in_stride[4] + in_offset3; | |||||
| for (h = thread_begin; h < thread_end; ++h) { | |||||
| size_t out_offset5 = h * out_stride[5] + out_offset4; | |||||
| size_t in_offset5 = (h + param->begin_[5]) * in_stride[5] + in_offset4; | |||||
| for (w = 0; w < param->size_[6]; ++w) { | |||||
| size_t out_offset = w * out_stride[6] + out_offset5; | |||||
| size_t in_offset = (w + param->begin_[6]) * in_stride[6] + in_offset5; | |||||
| if (equal_quant == 1) { | |||||
| memcpy(output + out_offset, input + in_offset, unit_size); | |||||
| } else { | |||||
| for (c = 0; c < param->size_[7]; ++c) { | |||||
| int32_t output_val = MultiplyByQuantizedMultiplier( | |||||
| input[in_offset + c] - input_zp, param->quant_arg_.multiplier_.multiplier_, | |||||
| param->quant_arg_.multiplier_.left_shift_ + base_offset, | |||||
| param->quant_arg_.multiplier_.right_shift_ - base_offset) + | |||||
| output_zp; | |||||
| output_val = MSMAX(INT8_MIN, MSMIN(output_val, INT8_MAX)); | |||||
| output[c + out_offset] = (int8_t)MSMAX(act_min, MSMIN(output_val, act_max)); | |||||
| } | |||||
| } | |||||
| } | |||||
| } | |||||
| } | } | ||||
| } | } | ||||
| } | } | ||||
| } | } | ||||
| } | } | ||||
| return 0; | return 0; | ||||
| } | } | ||||
| @@ -25,7 +25,7 @@ | |||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| extern "C" { | extern "C" { | ||||
| #endif | #endif | ||||
| int SliceInt8NoParallel(const int8_t *input, int8_t *output, SliceParameter *param); | |||||
| int SliceInt8NoParallel(const int8_t *input, int8_t *output, SliceParameter *param, const QuantMulArg *multiplier); | |||||
| int SliceInt8(const int8_t *input, int8_t *output, SliceParameter *param, int thread_id); | int SliceInt8(const int8_t *input, int8_t *output, SliceParameter *param, int thread_id); | ||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| } | } | ||||
| @@ -49,6 +49,7 @@ | |||||
| #define DIMENSION_4D 4 | #define DIMENSION_4D 4 | ||||
| #define DIMENSION_6D 6 | #define DIMENSION_6D 6 | ||||
| #define DIMENSION_7D 7 | #define DIMENSION_7D 7 | ||||
| #define DIMENSION_8D 8 | |||||
| #define kInputIndex 0 | #define kInputIndex 0 | ||||
| #define kWeightIndex 1 | #define kWeightIndex 1 | ||||
| #define kBiasIndex 2 | #define kBiasIndex 2 | ||||
| @@ -24,6 +24,7 @@ typedef struct SliceQuantArg { | |||||
| QuantArg out_args_; | QuantArg out_args_; | ||||
| int output_activation_min_; | int output_activation_min_; | ||||
| int output_activation_max_; | int output_activation_max_; | ||||
| QuantMulArg multiplier_; | |||||
| } SliceQuantArg; | } SliceQuantArg; | ||||
| typedef struct SliceParameter { | typedef struct SliceParameter { | ||||
| @@ -31,11 +32,11 @@ typedef struct SliceParameter { | |||||
| OpParameter op_parameter_; | OpParameter op_parameter_; | ||||
| // shape correlative | // shape correlative | ||||
| int32_t shape_[COMM_SHAPE_SIZE]; | |||||
| int32_t begin_[COMM_SHAPE_SIZE]; | |||||
| int32_t end_[COMM_SHAPE_SIZE]; | |||||
| int32_t size_[COMM_SHAPE_SIZE]; | |||||
| int32_t axis_[COMM_SHAPE_SIZE]; | |||||
| int32_t shape_[DIMENSION_8D]; | |||||
| int32_t begin_[DIMENSION_8D]; | |||||
| int32_t end_[DIMENSION_8D]; | |||||
| int32_t size_[DIMENSION_8D]; | |||||
| int32_t axis_[DIMENSION_8D]; | |||||
| // other parameter | // other parameter | ||||
| SliceQuantArg quant_arg_; | SliceQuantArg quant_arg_; | ||||
| @@ -40,14 +40,14 @@ int SliceCPUKernel::ReSize() { | |||||
| MS_ASSERT(in_tensor->shape().size() == begin_tensor->ElementsNum()); | MS_ASSERT(in_tensor->shape().size() == begin_tensor->ElementsNum()); | ||||
| MS_ASSERT(in_tensor->shape().size() == size_tensor->ElementsNum()); | MS_ASSERT(in_tensor->shape().size() == size_tensor->ElementsNum()); | ||||
| MS_ASSERT(in_tensor->shape().size() <= DIMENSION_4D); | |||||
| MS_ASSERT(in_tensor->shape().size() <= DIMENSION_8D); | |||||
| auto begin = reinterpret_cast<int32_t *>(begin_tensor->data_c()); | auto begin = reinterpret_cast<int32_t *>(begin_tensor->data_c()); | ||||
| auto size = reinterpret_cast<int32_t *>(size_tensor->data_c()); | auto size = reinterpret_cast<int32_t *>(size_tensor->data_c()); | ||||
| param_->param_length_ = in_tensor->shape().size(); | param_->param_length_ = in_tensor->shape().size(); | ||||
| if (param_->param_length_ > DIMENSION_4D) { | |||||
| MS_LOG(ERROR) << "input dimension num should <= " << DIMENSION_4D; | |||||
| if (param_->param_length_ > DIMENSION_8D) { | |||||
| MS_LOG(ERROR) << "input dimension num should <= " << DIMENSION_8D; | |||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| for (int i = 0; i < param_->param_length_; ++i) { | for (int i = 0; i < param_->param_length_; ++i) { | ||||
| @@ -56,8 +56,8 @@ int SliceCPUKernel::ReSize() { | |||||
| param_->size_[i] = size[i] < 0 ? param_->shape_[i] - param_->begin_[i] : size[i]; | param_->size_[i] = size[i] < 0 ? param_->shape_[i] - param_->begin_[i] : size[i]; | ||||
| param_->end_[i] = param_->begin_[i] + param_->size_[i]; | param_->end_[i] = param_->begin_[i] + param_->size_[i]; | ||||
| } | } | ||||
| if (param_->param_length_ < DIMENSION_4D) { | |||||
| PadSliceParameterTo4D(param_); | |||||
| if (param_->param_length_ < DIMENSION_8D) { | |||||
| PadSliceParameterTo8D(param_); | |||||
| } | } | ||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| @@ -76,7 +76,8 @@ int SliceCPUKernel::SliceParallelRun(int thread_id) { | |||||
| } | } | ||||
| int SliceCPUKernel::Run() { | int SliceCPUKernel::Run() { | ||||
| if (param_->size_[1] < op_parameter_->thread_num_) { | |||||
| // param_ shape info has already been extended to 8d | |||||
| if (param_->size_[5] < op_parameter_->thread_num_) { | |||||
| DoSliceNoParallel(in_tensors_.at(0)->data_c(), out_tensors_.at(0)->data_c(), param_, | DoSliceNoParallel(in_tensors_.at(0)->data_c(), out_tensors_.at(0)->data_c(), param_, | ||||
| lite::DataTypeSize(in_tensors_.at(0)->data_type())); | lite::DataTypeSize(in_tensors_.at(0)->data_type())); | ||||
| return RET_OK; | return RET_OK; | ||||
| @@ -42,6 +42,11 @@ int SliceInt8CPUKernel::Init() { | |||||
| param_->quant_arg_.out_args_.scale_ = out_quant_args.front().scale; | param_->quant_arg_.out_args_.scale_ = out_quant_args.front().scale; | ||||
| param_->quant_arg_.out_args_.zp_ = out_quant_args.front().zeroPoint; | param_->quant_arg_.out_args_.zp_ = out_quant_args.front().zeroPoint; | ||||
| QuantizeRoundParameterWithDoublePrecision(param_->quant_arg_.in_args_.scale_ / param_->quant_arg_.out_args_.scale_, | |||||
| ¶m_->quant_arg_.multiplier_.multiplier_, | |||||
| ¶m_->quant_arg_.multiplier_.left_shift_, | |||||
| ¶m_->quant_arg_.multiplier_.right_shift_); | |||||
| param_->quant_arg_.output_activation_max_ = std::numeric_limits<int8_t>::max(); | param_->quant_arg_.output_activation_max_ = std::numeric_limits<int8_t>::max(); | ||||
| param_->quant_arg_.output_activation_min_ = std::numeric_limits<int8_t>::min(); | param_->quant_arg_.output_activation_min_ = std::numeric_limits<int8_t>::min(); | ||||
| if (!InferShapeDone()) { | if (!InferShapeDone()) { | ||||
| @@ -51,9 +56,9 @@ int SliceInt8CPUKernel::Init() { | |||||
| } | } | ||||
| int SliceInt8CPUKernel::DoSlice(int task_id) { | int SliceInt8CPUKernel::DoSlice(int task_id) { | ||||
| const int8_t *input_data = reinterpret_cast<const int8_t *>(in_tensors_.at(0)->MutableData()); | |||||
| const int8_t *input_data = reinterpret_cast<const int8_t *>(in_tensors_.at(0)->data_c()); | |||||
| MS_ASSERT(input_data); | MS_ASSERT(input_data); | ||||
| int8_t *output_data = reinterpret_cast<int8_t *>(out_tensors_.at(0)->MutableData()); | |||||
| int8_t *output_data = reinterpret_cast<int8_t *>(out_tensors_.at(0)->data_c()); | |||||
| MS_ASSERT(output_data); | MS_ASSERT(output_data); | ||||
| auto ret = SliceInt8(input_data, output_data, param_, task_id); | auto ret = SliceInt8(input_data, output_data, param_, task_id); | ||||
| @@ -73,17 +78,9 @@ int SliceInt8Run(void *cdata, int task_id) { | |||||
| } | } | ||||
| int SliceInt8CPUKernel::Run() { | int SliceInt8CPUKernel::Run() { | ||||
| const int8_t *input_data = reinterpret_cast<const int8_t *>(in_tensors_.at(0)->MutableData()); | |||||
| MS_ASSERT(input_data); | |||||
| int8_t *output_data = reinterpret_cast<int8_t *>(out_tensors_.at(0)->MutableData()); | |||||
| MS_ASSERT(output_data); | |||||
| mindspore::lite::STATUS ret = RET_ERROR; | |||||
| if (param_->size_[1] < param_->op_parameter_.thread_num_) { | |||||
| ret = SliceInt8NoParallel(input_data, output_data, param_); | |||||
| } else { | |||||
| ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, SliceInt8Run, this, | |||||
| op_parameter_->thread_num_); | |||||
| } | |||||
| // param_ shape info has already been extended to 8d | |||||
| auto ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, SliceInt8Run, this, | |||||
| op_parameter_->thread_num_); | |||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "SliceInt8Run error, error_code[" << ret << "]"; | MS_LOG(ERROR) << "SliceInt8Run error, error_code[" << ret << "]"; | ||||
| @@ -30,29 +30,29 @@ class TestSliceInt8 : public mindspore::CommonTest { | |||||
| TEST_F(TestSliceInt8, SliceInt8) { | TEST_F(TestSliceInt8, SliceInt8) { | ||||
| lite::Tensor in_tensor(kNumberTypeInt8, {1, 3, 2, 3}); | lite::Tensor in_tensor(kNumberTypeInt8, {1, 3, 2, 3}); | ||||
| lite::Tensor out_tensor(kNumberTypeInt8, {1, 1, 2, 3}); | |||||
| lite::Tensor out_tensor(kNumberTypeInt8, {1, 2, 2, 3}); | |||||
| int8_t input_data[] = {105, 35, -27, 0, -63, 99, 16, 45, 67, -49, -115, 106, -98, 119, 103, 81, -114, 68}; | int8_t input_data[] = {105, 35, -27, 0, -63, 99, 16, 45, 67, -49, -115, 106, -98, 119, 103, 81, -114, 68}; | ||||
| int8_t output_data[12]; | int8_t output_data[12]; | ||||
| in_tensor.set_data(input_data); | in_tensor.set_data(input_data); | ||||
| out_tensor.set_data(output_data); | out_tensor.set_data(output_data); | ||||
| lite::Tensor begin_tensor(kNumberTypeInt32, {4}); | |||||
| int begin_data[4] = {0, 1, 0, 0}; | |||||
| begin_tensor.set_data(begin_data); | |||||
| lite::Tensor size_tensor(kNumberTypeInt32, {4}); | |||||
| int size_data[4] = {1, 2, 2, 3}; | |||||
| size_tensor.set_data(size_data); | |||||
| const lite::QuantArg quant_in0 = {0.00784314f, 0}; // -1.0--1.0 -> 0--255 | const lite::QuantArg quant_in0 = {0.00784314f, 0}; // -1.0--1.0 -> 0--255 | ||||
| const lite::QuantArg quant_out = {0.00784314f, 0}; | const lite::QuantArg quant_out = {0.00784314f, 0}; | ||||
| in_tensor.AddQuantParam(quant_in0); | in_tensor.AddQuantParam(quant_in0); | ||||
| out_tensor.AddQuantParam(quant_out); | out_tensor.AddQuantParam(quant_out); | ||||
| std::vector<lite::Tensor *> inputs = {&in_tensor}; | |||||
| std::vector<lite::Tensor *> inputs = {&in_tensor, &begin_tensor, &size_tensor}; | |||||
| std::vector<lite::Tensor *> outputs = {&out_tensor}; | std::vector<lite::Tensor *> outputs = {&out_tensor}; | ||||
| SliceParameter parameter; | |||||
| parameter.begin_[0] = 1; | |||||
| parameter.begin_[1] = 0; | |||||
| parameter.begin_[2] = 0; | |||||
| parameter.size_[0] = -1; | |||||
| parameter.size_[1] = -1; | |||||
| parameter.size_[2] = -1; | |||||
| parameter.param_length_ = 3; | |||||
| SliceParameter *parameter = new (std::nothrow) SliceParameter; | |||||
| parameter->op_parameter_.infer_flag_ = true; | |||||
| kernel::KernelKey desc = {kernel::KERNEL_ARCH::kCPU, kNumberTypeInt8, schema::PrimitiveType_SliceFusion}; | kernel::KernelKey desc = {kernel::KERNEL_ARCH::kCPU, kNumberTypeInt8, schema::PrimitiveType_SliceFusion}; | ||||
| @@ -61,7 +61,7 @@ TEST_F(TestSliceInt8, SliceInt8) { | |||||
| auto ctx = std::make_shared<lite::InnerContext>(); | auto ctx = std::make_shared<lite::InnerContext>(); | ||||
| ASSERT_EQ(lite::RET_OK, ctx->Init()); | ASSERT_EQ(lite::RET_OK, ctx->Init()); | ||||
| auto kernel = creator(inputs, outputs, reinterpret_cast<OpParameter *>(¶meter), ctx.get(), desc); | |||||
| auto kernel = creator(inputs, outputs, reinterpret_cast<OpParameter *>(parameter), ctx.get(), desc); | |||||
| ASSERT_NE(kernel, nullptr); | ASSERT_NE(kernel, nullptr); | ||||
| auto ret = kernel->Run(); | auto ret = kernel->Run(); | ||||
| EXPECT_EQ(0, ret); | EXPECT_EQ(0, ret); | ||||
| @@ -72,6 +72,373 @@ TEST_F(TestSliceInt8, SliceInt8) { | |||||
| } | } | ||||
| in_tensor.set_data(nullptr); | in_tensor.set_data(nullptr); | ||||
| begin_tensor.set_data(nullptr); | |||||
| size_tensor.set_data(nullptr); | |||||
| out_tensor.set_data(nullptr); | out_tensor.set_data(nullptr); | ||||
| delete kernel; | |||||
| } | |||||
| TEST_F(TestSliceInt8, Slice5D) { | |||||
| lite::Tensor in_tensor(kNumberTypeInt8, {2, 1, 3, 2, 3}); | |||||
| lite::Tensor out_tensor(kNumberTypeInt8, {1, 1, 2, 2, 3}); | |||||
| int8_t input_data[] = {105, 35, -27, 0, -63, 99, 16, 45, 67, -49, -115, 106, -98, 119, 103, 81, -114, 68, | |||||
| 105, 35, -27, 0, -63, 99, 16, 45, 67, -49, -115, 106, -98, 119, 103, 81, -114, 68}; | |||||
| int8_t output_data[12]; | |||||
| in_tensor.set_data(input_data); | |||||
| out_tensor.set_data(output_data); | |||||
| lite::Tensor begin_tensor(kNumberTypeInt32, {5}); | |||||
| int begin_data[5] = {0, 0, 1, 0, 0}; | |||||
| begin_tensor.set_data(begin_data); | |||||
| lite::Tensor size_tensor(kNumberTypeInt32, {5}); | |||||
| int size_data[5] = {1, 1, 2, 2, 3}; | |||||
| size_tensor.set_data(size_data); | |||||
| const lite::QuantArg quant_in0 = {0.00784314f, 0}; // -1.0--1.0 -> 0--255 | |||||
| const lite::QuantArg quant_out = {0.00784314f, 0}; | |||||
| in_tensor.AddQuantParam(quant_in0); | |||||
| out_tensor.AddQuantParam(quant_out); | |||||
| std::vector<lite::Tensor *> inputs = {&in_tensor, &begin_tensor, &size_tensor}; | |||||
| std::vector<lite::Tensor *> outputs = {&out_tensor}; | |||||
| SliceParameter *parameter = new (std::nothrow) SliceParameter; | |||||
| parameter->op_parameter_.infer_flag_ = true; | |||||
| kernel::KernelKey desc = {kernel::KERNEL_ARCH::kCPU, kNumberTypeInt8, schema::PrimitiveType_SliceFusion}; | |||||
| auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); | |||||
| ASSERT_NE(creator, nullptr); | |||||
| auto ctx = std::make_shared<lite::InnerContext>(); | |||||
| ASSERT_EQ(lite::RET_OK, ctx->Init()); | |||||
| auto kernel = creator(inputs, outputs, reinterpret_cast<OpParameter *>(parameter), ctx.get(), desc); | |||||
| ASSERT_NE(kernel, nullptr); | |||||
| auto ret = kernel->Run(); | |||||
| EXPECT_EQ(0, ret); | |||||
| int8_t expect0[12] = {16, 45, 67, -49, -115, 106, -98, 119, 103, 81, -114, 68}; | |||||
| for (int i = 0; i < 12; ++i) { | |||||
| EXPECT_EQ(output_data[i], expect0[i]); | |||||
| } | |||||
| in_tensor.set_data(nullptr); | |||||
| out_tensor.set_data(nullptr); | |||||
| begin_tensor.set_data(nullptr); | |||||
| size_tensor.set_data(nullptr); | |||||
| delete kernel; | |||||
| } | |||||
| TEST_F(TestSliceInt8, Slice6D) { | |||||
| lite::Tensor in_tensor(kNumberTypeInt8, {2, 1, 1, 3, 2, 3}); | |||||
| lite::Tensor out_tensor(kNumberTypeInt8, {1, 1, 1, 2, 2, 3}); | |||||
| int8_t input_data[] = {105, 35, -27, 0, -63, 99, 16, 45, 67, -49, -115, 106, -98, 119, 103, 81, -114, 68, | |||||
| 105, 35, -27, 0, -63, 99, 16, 45, 67, -49, -115, 106, -98, 119, 103, 81, -114, 68}; | |||||
| int8_t output_data[12]; | |||||
| in_tensor.set_data(input_data); | |||||
| out_tensor.set_data(output_data); | |||||
| lite::Tensor begin_tensor(kNumberTypeInt32, {6}); | |||||
| int begin_data[6] = {0, 0, 0, 1, 0, 0}; | |||||
| begin_tensor.set_data(begin_data); | |||||
| lite::Tensor size_tensor(kNumberTypeInt32, {6}); | |||||
| int size_data[6] = {1, 1, 1, 2, 2, 3}; | |||||
| size_tensor.set_data(size_data); | |||||
| const lite::QuantArg quant_in0 = {0.00784314f, 0}; // -1.0--1.0 -> 0--255 | |||||
| const lite::QuantArg quant_out = {0.00784314f, 0}; | |||||
| in_tensor.AddQuantParam(quant_in0); | |||||
| out_tensor.AddQuantParam(quant_out); | |||||
| std::vector<lite::Tensor *> inputs = {&in_tensor, &begin_tensor, &size_tensor}; | |||||
| std::vector<lite::Tensor *> outputs = {&out_tensor}; | |||||
| SliceParameter *parameter = new (std::nothrow) SliceParameter; | |||||
| parameter->op_parameter_.infer_flag_ = true; | |||||
| kernel::KernelKey desc = {kernel::KERNEL_ARCH::kCPU, kNumberTypeInt8, schema::PrimitiveType_SliceFusion}; | |||||
| auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); | |||||
| ASSERT_NE(creator, nullptr); | |||||
| auto ctx = std::make_shared<lite::InnerContext>(); | |||||
| ASSERT_EQ(lite::RET_OK, ctx->Init()); | |||||
| auto kernel = creator(inputs, outputs, reinterpret_cast<OpParameter *>(parameter), ctx.get(), desc); | |||||
| ASSERT_NE(kernel, nullptr); | |||||
| auto ret = kernel->Run(); | |||||
| EXPECT_EQ(0, ret); | |||||
| int8_t expect0[12] = {16, 45, 67, -49, -115, 106, -98, 119, 103, 81, -114, 68}; | |||||
| for (int i = 0; i < 12; ++i) { | |||||
| EXPECT_EQ(output_data[i], expect0[i]); | |||||
| } | |||||
| in_tensor.set_data(nullptr); | |||||
| out_tensor.set_data(nullptr); | |||||
| begin_tensor.set_data(nullptr); | |||||
| size_tensor.set_data(nullptr); | |||||
| delete kernel; | |||||
| } | |||||
| TEST_F(TestSliceInt8, Slice7D) { | |||||
| lite::Tensor in_tensor(kNumberTypeInt8, {2, 1, 1, 1, 3, 2, 3}); | |||||
| lite::Tensor out_tensor(kNumberTypeInt8, {1, 1, 1, 1, 2, 2, 3}); | |||||
| int8_t input_data[] = {105, 35, -27, 0, -63, 99, 16, 45, 67, -49, -115, 106, -98, 119, 103, 81, -114, 68, | |||||
| 105, 35, -27, 0, -63, 99, 16, 45, 67, -49, -115, 106, -98, 119, 103, 81, -114, 68}; | |||||
| int8_t output_data[12]; | |||||
| in_tensor.set_data(input_data); | |||||
| out_tensor.set_data(output_data); | |||||
| lite::Tensor begin_tensor(kNumberTypeInt32, {7}); | |||||
| int begin_data[7] = {0, 0, 0, 0, 1, 0, 0}; | |||||
| begin_tensor.set_data(begin_data); | |||||
| lite::Tensor size_tensor(kNumberTypeInt32, {7}); | |||||
| int size_data[7] = {1, 1, 1, 1, 2, 2, 3}; | |||||
| size_tensor.set_data(size_data); | |||||
| const lite::QuantArg quant_in0 = {0.00784314f, 0}; // -1.0--1.0 -> 0--255 | |||||
| const lite::QuantArg quant_out = {0.00784314f, 0}; | |||||
| in_tensor.AddQuantParam(quant_in0); | |||||
| out_tensor.AddQuantParam(quant_out); | |||||
| std::vector<lite::Tensor *> inputs = {&in_tensor, &begin_tensor, &size_tensor}; | |||||
| std::vector<lite::Tensor *> outputs = {&out_tensor}; | |||||
| SliceParameter *parameter = new (std::nothrow) SliceParameter; | |||||
| parameter->op_parameter_.infer_flag_ = true; | |||||
| kernel::KernelKey desc = {kernel::KERNEL_ARCH::kCPU, kNumberTypeInt8, schema::PrimitiveType_SliceFusion}; | |||||
| auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); | |||||
| ASSERT_NE(creator, nullptr); | |||||
| auto ctx = std::make_shared<lite::InnerContext>(); | |||||
| ASSERT_EQ(lite::RET_OK, ctx->Init()); | |||||
| auto kernel = creator(inputs, outputs, reinterpret_cast<OpParameter *>(parameter), ctx.get(), desc); | |||||
| ASSERT_NE(kernel, nullptr); | |||||
| auto ret = kernel->Run(); | |||||
| EXPECT_EQ(0, ret); | |||||
| int8_t expect0[12] = {16, 45, 67, -49, -115, 106, -98, 119, 103, 81, -114, 68}; | |||||
| for (int i = 0; i < 12; ++i) { | |||||
| EXPECT_EQ(output_data[i], expect0[i]); | |||||
| } | |||||
| in_tensor.set_data(nullptr); | |||||
| out_tensor.set_data(nullptr); | |||||
| begin_tensor.set_data(nullptr); | |||||
| size_tensor.set_data(nullptr); | |||||
| delete kernel; | |||||
| } | |||||
| TEST_F(TestSliceInt8, Slice8D) { | |||||
| lite::Tensor in_tensor(kNumberTypeInt8, {2, 1, 1, 1, 1, 3, 2, 3}); | |||||
| lite::Tensor out_tensor(kNumberTypeInt8, {1, 1, 1, 1, 1, 1, 2, 3}); | |||||
| lite::Tensor begin_tensor(kNumberTypeInt32, {8}); | |||||
| int begin_data[8] = {1, 0, 0, 0, 0, 1, 0, 0}; | |||||
| begin_tensor.set_data(begin_data); | |||||
| lite::Tensor size_tensor(kNumberTypeInt32, {8}); | |||||
| int size_data[8] = {1, 1, 1, 1, 1, 2, 2, 3}; | |||||
| size_tensor.set_data(size_data); | |||||
| int8_t input_data[] = {105, 35, -27, 0, -63, 99, 16, 45, 67, -49, -115, 106, -98, 119, 103, 81, -114, 68, | |||||
| 105, 35, -27, 0, -63, 99, 16, 45, 67, -49, -115, 106, -98, 119, 103, 81, -114, 68}; | |||||
| int8_t output_data[12]; | |||||
| in_tensor.set_data(input_data); | |||||
| out_tensor.set_data(output_data); | |||||
| const lite::QuantArg quant_in0 = {0.00784314f, 0}; // -1.0--1.0 -> 0--255 | |||||
| const lite::QuantArg quant_out = {0.00784314f, 0}; | |||||
| in_tensor.AddQuantParam(quant_in0); | |||||
| out_tensor.AddQuantParam(quant_out); | |||||
| std::vector<lite::Tensor *> inputs = {&in_tensor, &begin_tensor, &size_tensor}; | |||||
| std::vector<lite::Tensor *> outputs = {&out_tensor}; | |||||
| SliceParameter *parameter = new (std::nothrow) SliceParameter; | |||||
| parameter->op_parameter_.infer_flag_ = true; | |||||
| kernel::KernelKey desc = {kernel::KERNEL_ARCH::kCPU, kNumberTypeInt8, schema::PrimitiveType_SliceFusion}; | |||||
| auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); | |||||
| ASSERT_NE(creator, nullptr); | |||||
| auto ctx = std::make_shared<lite::InnerContext>(); | |||||
| ASSERT_EQ(lite::RET_OK, ctx->Init()); | |||||
| auto kernel = creator(inputs, outputs, reinterpret_cast<OpParameter *>(parameter), ctx.get(), desc); | |||||
| ASSERT_NE(kernel, nullptr); | |||||
| auto ret = kernel->Run(); | |||||
| EXPECT_EQ(0, ret); | |||||
| int8_t expect0[12] = {16, 45, 67, -49, -115, 106, -98, 119, 103, 81, -114, 68}; | |||||
| for (int i = 0; i < 12; ++i) { | |||||
| EXPECT_EQ(output_data[i], expect0[i]); | |||||
| } | |||||
| in_tensor.set_data(nullptr); | |||||
| out_tensor.set_data(nullptr); | |||||
| begin_tensor.set_data(nullptr); | |||||
| size_tensor.set_data(nullptr); | |||||
| delete kernel; | |||||
| } | |||||
| TEST_F(TestSliceInt8, SliceDiffQuantArgs) { | |||||
| lite::Tensor in_tensor(kNumberTypeInt8, {2, 1, 1, 1, 1, 3, 2, 3}); | |||||
| lite::Tensor out_tensor(kNumberTypeInt8, {1, 1, 1, 1, 1, 1, 2, 3}); | |||||
| lite::Tensor begin_tensor(kNumberTypeInt32, {8}); | |||||
| int begin_data[8] = {1, 0, 0, 0, 0, 1, 0, 0}; | |||||
| begin_tensor.set_data(begin_data); | |||||
| lite::Tensor size_tensor(kNumberTypeInt32, {8}); | |||||
| int size_data[8] = {1, 1, 1, 1, 1, 2, 2, 3}; | |||||
| size_tensor.set_data(size_data); | |||||
| int8_t input_data[] = {105, 35, -27, 0, -63, 99, 16, 45, 67, -49, -115, 106, -98, 119, 103, 81, -114, 68, | |||||
| 105, 35, -27, 0, -63, 99, 16, 45, 67, -49, -115, 106, -98, 119, 103, 81, -114, 68}; | |||||
| int8_t output_data[12]; | |||||
| in_tensor.set_data(input_data); | |||||
| out_tensor.set_data(output_data); | |||||
| const lite::QuantArg quant_in0 = {0.00784314f, 0}; // -1.0--1.0 -> 0--255 | |||||
| const lite::QuantArg quant_out = {0.01568628f, 0}; | |||||
| in_tensor.AddQuantParam(quant_in0); | |||||
| out_tensor.AddQuantParam(quant_out); | |||||
| std::vector<lite::Tensor *> inputs = {&in_tensor, &begin_tensor, &size_tensor}; | |||||
| std::vector<lite::Tensor *> outputs = {&out_tensor}; | |||||
| SliceParameter *parameter = new (std::nothrow) SliceParameter; | |||||
| parameter->op_parameter_.infer_flag_ = true; | |||||
| kernel::KernelKey desc = {kernel::KERNEL_ARCH::kCPU, kNumberTypeInt8, schema::PrimitiveType_SliceFusion}; | |||||
| auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); | |||||
| ASSERT_NE(creator, nullptr); | |||||
| auto ctx = std::make_shared<lite::InnerContext>(); | |||||
| ASSERT_EQ(lite::RET_OK, ctx->Init()); | |||||
| auto kernel = creator(inputs, outputs, reinterpret_cast<OpParameter *>(parameter), ctx.get(), desc); | |||||
| ASSERT_NE(kernel, nullptr); | |||||
| auto ret = kernel->Run(); | |||||
| EXPECT_EQ(0, ret); | |||||
| int8_t expect0[12] = {8, 23, 34, -25, -58, 53, -49, 60, 52, 41, -57, 34}; | |||||
| for (int i = 0; i < 12; ++i) { | |||||
| EXPECT_EQ(output_data[i], expect0[i]); | |||||
| } | |||||
| in_tensor.set_data(nullptr); | |||||
| out_tensor.set_data(nullptr); | |||||
| begin_tensor.set_data(nullptr); | |||||
| size_tensor.set_data(nullptr); | |||||
| delete kernel; | |||||
| } | |||||
| TEST_F(TestSliceInt8, SliceSingleThread) { | |||||
| lite::Tensor in_tensor(kNumberTypeInt8, {2, 1, 1, 1, 1, 3, 2, 3}); | |||||
| lite::Tensor out_tensor(kNumberTypeInt8, {1, 1, 1, 1, 1, 1, 2, 3}); | |||||
| lite::Tensor begin_tensor(kNumberTypeInt32, {8}); | |||||
| int begin_data[8] = {1, 0, 0, 0, 0, 1, 0, 0}; | |||||
| begin_tensor.set_data(begin_data); | |||||
| lite::Tensor size_tensor(kNumberTypeInt32, {8}); | |||||
| int size_data[8] = {1, 1, 1, 1, 1, 2, 2, 3}; | |||||
| size_tensor.set_data(size_data); | |||||
| int8_t input_data[] = {105, 35, -27, 0, -63, 99, 16, 45, 67, -49, -115, 106, -98, 119, 103, 81, -114, 68, | |||||
| 105, 35, -27, 0, -63, 99, 16, 45, 67, -49, -115, 106, -98, 119, 103, 81, -114, 68}; | |||||
| int8_t output_data[12]; | |||||
| in_tensor.set_data(input_data); | |||||
| out_tensor.set_data(output_data); | |||||
| const lite::QuantArg quant_in0 = {0.00784314f, 0}; // -1.0--1.0 -> 0--255 | |||||
| const lite::QuantArg quant_out = {0.00784314f, 0}; | |||||
| in_tensor.AddQuantParam(quant_in0); | |||||
| out_tensor.AddQuantParam(quant_out); | |||||
| std::vector<lite::Tensor *> inputs = {&in_tensor, &begin_tensor, &size_tensor}; | |||||
| std::vector<lite::Tensor *> outputs = {&out_tensor}; | |||||
| SliceParameter *parameter = new (std::nothrow) SliceParameter; | |||||
| parameter->op_parameter_.infer_flag_ = true; | |||||
| kernel::KernelKey desc = {kernel::KERNEL_ARCH::kCPU, kNumberTypeInt8, schema::PrimitiveType_SliceFusion}; | |||||
| auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); | |||||
| ASSERT_NE(creator, nullptr); | |||||
| auto ctx = std::make_shared<lite::InnerContext>(); | |||||
| ctx->thread_num_ = 1; | |||||
| ASSERT_EQ(lite::RET_OK, ctx->Init()); | |||||
| auto kernel = creator(inputs, outputs, reinterpret_cast<OpParameter *>(parameter), ctx.get(), desc); | |||||
| ASSERT_NE(kernel, nullptr); | |||||
| auto ret = kernel->Run(); | |||||
| EXPECT_EQ(0, ret); | |||||
| int8_t expect0[12] = {16, 45, 67, -49, -115, 106, -98, 119, 103, 81, -114, 68}; | |||||
| for (int i = 0; i < 12; ++i) { | |||||
| EXPECT_EQ(output_data[i], expect0[i]); | |||||
| } | |||||
| in_tensor.set_data(nullptr); | |||||
| out_tensor.set_data(nullptr); | |||||
| begin_tensor.set_data(nullptr); | |||||
| size_tensor.set_data(nullptr); | |||||
| delete kernel; | |||||
| } | |||||
| TEST_F(TestSliceInt8, Slice4Thread) { | |||||
| lite::Tensor in_tensor(kNumberTypeInt8, {2, 1, 1, 1, 1, 3, 2, 3}); | |||||
| lite::Tensor out_tensor(kNumberTypeInt8, {1, 1, 1, 1, 1, 1, 2, 3}); | |||||
| lite::Tensor begin_tensor(kNumberTypeInt32, {8}); | |||||
| int begin_data[8] = {1, 0, 0, 0, 0, 1, 0, 0}; | |||||
| begin_tensor.set_data(begin_data); | |||||
| lite::Tensor size_tensor(kNumberTypeInt32, {8}); | |||||
| int size_data[8] = {1, 1, 1, 1, 1, 2, 2, 3}; | |||||
| size_tensor.set_data(size_data); | |||||
| int8_t input_data[] = {105, 35, -27, 0, -63, 99, 16, 45, 67, -49, -115, 106, -98, 119, 103, 81, -114, 68, | |||||
| 105, 35, -27, 0, -63, 99, 16, 45, 67, -49, -115, 106, -98, 119, 103, 81, -114, 68}; | |||||
| int8_t output_data[12]; | |||||
| in_tensor.set_data(input_data); | |||||
| out_tensor.set_data(output_data); | |||||
| const lite::QuantArg quant_in0 = {0.00784314f, 0}; // -1.0--1.0 -> 0--255 | |||||
| const lite::QuantArg quant_out = {0.00784314f, 0}; | |||||
| in_tensor.AddQuantParam(quant_in0); | |||||
| out_tensor.AddQuantParam(quant_out); | |||||
| std::vector<lite::Tensor *> inputs = {&in_tensor, &begin_tensor, &size_tensor}; | |||||
| std::vector<lite::Tensor *> outputs = {&out_tensor}; | |||||
| SliceParameter *parameter = new (std::nothrow) SliceParameter; | |||||
| parameter->op_parameter_.infer_flag_ = true; | |||||
| kernel::KernelKey desc = {kernel::KERNEL_ARCH::kCPU, kNumberTypeInt8, schema::PrimitiveType_SliceFusion}; | |||||
| auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); | |||||
| ASSERT_NE(creator, nullptr); | |||||
| auto ctx = std::make_shared<lite::InnerContext>(); | |||||
| ctx->thread_num_ = 4; | |||||
| ASSERT_EQ(lite::RET_OK, ctx->Init()); | |||||
| auto kernel = creator(inputs, outputs, reinterpret_cast<OpParameter *>(parameter), ctx.get(), desc); | |||||
| ASSERT_NE(kernel, nullptr); | |||||
| auto ret = kernel->Run(); | |||||
| EXPECT_EQ(0, ret); | |||||
| int8_t expect0[12] = {16, 45, 67, -49, -115, 106, -98, 119, 103, 81, -114, 68}; | |||||
| for (int i = 0; i < 12; ++i) { | |||||
| EXPECT_EQ(output_data[i], expect0[i]); | |||||
| } | |||||
| in_tensor.set_data(nullptr); | |||||
| out_tensor.set_data(nullptr); | |||||
| begin_tensor.set_data(nullptr); | |||||
| size_tensor.set_data(nullptr); | |||||
| delete kernel; | |||||
| } | } | ||||
| } // namespace mindspore | } // namespace mindspore | ||||