From: @yangruoqi713 Reviewed-by: @zhang_xue_tong,@zhanghaibo5 Signed-off-by: @zhang_xue_tongpull/15716/MERGE
| @@ -28,19 +28,19 @@ int16x4_t ClacSumHalfWordMul(int16x4_t scaled_input0, int16x4_t scaled_input1, i | |||||
| } | } | ||||
| void MulInt8NEON(int8_t *input0_data, int8_t *input1_data, int8_t *output_data, int64_t real_dst_count, | void MulInt8NEON(int8_t *input0_data, int8_t *input1_data, int8_t *output_data, int64_t real_dst_count, | ||||
| MulQuantArg para, int *index) { | |||||
| int32x4_t output_multiplier_vec = vdupq_n_s32(para.output_multiplier_); | |||||
| int32x4_t left_shift_out_vec = vdupq_n_s32(1 << para.shift_left_); | |||||
| int32x4_t right_shift_out_vec = vdupq_n_s32(-para.shift_right_); | |||||
| int16x8_t out_zp_vec = vdupq_n_s16(para.out_quant_arg_.zp_); | |||||
| int8x16_t out_min_vec = vdupq_n_s8(para.output_activation_min_); | |||||
| int8x16_t out_max_vec = vdupq_n_s8(para.output_activation_max_); | |||||
| int8x8_t out_min_vec_s8 = vdup_n_s8(para.output_activation_min_); | |||||
| int8x8_t out_max_vec_s8 = vdup_n_s8(para.output_activation_max_); | |||||
| MulQuantArg *quant_arg, int *index) { | |||||
| int32x4_t output_multiplier_vec = vdupq_n_s32(quant_arg->output_multiplier_); | |||||
| int32x4_t left_shift_out_vec = vdupq_n_s32(1 << quant_arg->shift_left_); | |||||
| int32x4_t right_shift_out_vec = vdupq_n_s32(-quant_arg->shift_right_); | |||||
| int16x8_t out_zp_vec = vdupq_n_s16(quant_arg->out_quant_arg_.zp_); | |||||
| int8x16_t out_min_vec = vdupq_n_s8(quant_arg->output_activation_min_); | |||||
| int8x16_t out_max_vec = vdupq_n_s8(quant_arg->output_activation_max_); | |||||
| int8x8_t out_min_vec_s8 = vdup_n_s8(quant_arg->output_activation_min_); | |||||
| int8x8_t out_max_vec_s8 = vdup_n_s8(quant_arg->output_activation_max_); | |||||
| for (; (*index) <= real_dst_count - 16; (*index) += 16) { | for (; (*index) <= real_dst_count - 16; (*index) += 16) { | ||||
| int16x8_t zp1_vec = vdupq_n_s16(para.in_quant_args_[0].zp_); | |||||
| int16x8_t zp2_vec = vdupq_n_s16(para.in_quant_args_[1].zp_); | |||||
| int16x8_t zp1_vec = vdupq_n_s16(quant_arg->in_quant_args_[0].zp_); | |||||
| int16x8_t zp2_vec = vdupq_n_s16(quant_arg->in_quant_args_[1].zp_); | |||||
| int8x16_t input0_vec = vld1q_s8(input0_data + *index); | int8x16_t input0_vec = vld1q_s8(input0_data + *index); | ||||
| int8x16_t input1_vec = vld1q_s8(input1_data + *index); | int8x16_t input1_vec = vld1q_s8(input1_data + *index); | ||||
| int16x8_t input0_low = vmovl_s8(vget_low_s8(input0_vec)); | int16x8_t input0_low = vmovl_s8(vget_low_s8(input0_vec)); | ||||
| @@ -81,8 +81,8 @@ void MulInt8NEON(int8_t *input0_data, int8_t *input1_data, int8_t *output_data, | |||||
| output_data += 16; | output_data += 16; | ||||
| } | } | ||||
| for (; (*index) <= real_dst_count - 8; (*index) += 8) { | for (; (*index) <= real_dst_count - 8; (*index) += 8) { | ||||
| int16x8_t input0_val = LoadAndAddOffset(input0_data, *index, para.in_quant_args_[0].zp_); | |||||
| int16x8_t input1_val = LoadAndAddOffset(input1_data, *index, para.in_quant_args_[1].zp_); | |||||
| int16x8_t input0_val = LoadAndAddOffset(input0_data, *index, quant_arg->in_quant_args_[0].zp_); | |||||
| int16x8_t input1_val = LoadAndAddOffset(input1_data, *index, quant_arg->in_quant_args_[1].zp_); | |||||
| int16x4_t input0_low = vget_low_s16(input0_val); | int16x4_t input0_low = vget_low_s16(input0_val); | ||||
| int16x4_t input0_high = vget_high_s16(input0_val); | int16x4_t input0_high = vget_high_s16(input0_val); | ||||
| @@ -105,23 +105,23 @@ void MulInt8NEON(int8_t *input0_data, int8_t *input1_data, int8_t *output_data, | |||||
| #endif | #endif | ||||
| void FastMul(int8_t *input0_data, int8_t *input1_data, int8_t *output_data, int depth, int64_t real_dst_count, | void FastMul(int8_t *input0_data, int8_t *input1_data, int8_t *output_data, int depth, int64_t real_dst_count, | ||||
| bool input1_broad, MulQuantArg para) { | |||||
| bool input1_broad, MulQuantArg *quant_arg) { | |||||
| // input0 need broadcast | // input0 need broadcast | ||||
| int32_t zp1 = para.in_quant_args_[0].zp_; | |||||
| int32_t zp2 = para.in_quant_args_[1].zp_; | |||||
| int32_t zp1 = quant_arg->in_quant_args_[0].zp_; | |||||
| int32_t zp2 = quant_arg->in_quant_args_[1].zp_; | |||||
| if (input1_broad) { | if (input1_broad) { | ||||
| zp1 = para.in_quant_args_[1].zp_; | |||||
| zp2 = para.in_quant_args_[0].zp_; | |||||
| zp1 = quant_arg->in_quant_args_[1].zp_; | |||||
| zp2 = quant_arg->in_quant_args_[0].zp_; | |||||
| } | } | ||||
| #ifdef ENABLE_ARM | #ifdef ENABLE_ARM | ||||
| int32x4_t output_multiplier_vec = vdupq_n_s32(para.output_multiplier_); | |||||
| int32x4_t left_shift_out_vec = vdupq_n_s32(1 << para.shift_left_); | |||||
| int32x4_t right_shift_out_vec = vdupq_n_s32(-para.shift_right_); | |||||
| int16x8_t out_zp_vec = vdupq_n_s16(para.out_quant_arg_.zp_); | |||||
| int8x16_t out_min_vec = vdupq_n_s8(para.output_activation_min_); | |||||
| int8x16_t out_max_vec = vdupq_n_s8(para.output_activation_max_); | |||||
| int8x8_t out_min_vec_s8 = vdup_n_s8(para.output_activation_min_); | |||||
| int8x8_t out_max_vec_s8 = vdup_n_s8(para.output_activation_max_); | |||||
| int32x4_t output_multiplier_vec = vdupq_n_s32(quant_arg->output_multiplier_); | |||||
| int32x4_t left_shift_out_vec = vdupq_n_s32(1 << quant_arg->shift_left_); | |||||
| int32x4_t right_shift_out_vec = vdupq_n_s32(-quant_arg->shift_right_); | |||||
| int16x8_t out_zp_vec = vdupq_n_s16(quant_arg->out_quant_arg_.zp_); | |||||
| int8x16_t out_min_vec = vdupq_n_s8(quant_arg->output_activation_min_); | |||||
| int8x16_t out_max_vec = vdupq_n_s8(quant_arg->output_activation_max_); | |||||
| int8x8_t out_min_vec_s8 = vdup_n_s8(quant_arg->output_activation_min_); | |||||
| int8x8_t out_max_vec_s8 = vdup_n_s8(quant_arg->output_activation_max_); | |||||
| int16x8_t zp1_vec = vdupq_n_s16(zp1); | int16x8_t zp1_vec = vdupq_n_s16(zp1); | ||||
| int16x8_t zp2_vec = vdupq_n_s16(zp2); | int16x8_t zp2_vec = vdupq_n_s16(zp2); | ||||
| #endif | #endif | ||||
| @@ -199,13 +199,14 @@ void FastMul(int8_t *input0_data, int8_t *input1_data, int8_t *output_data, int | |||||
| for (; j < depth; ++j) { | for (; j < depth; ++j) { | ||||
| const int32_t input0_val = zp1 + input0_data[j]; | const int32_t input0_val = zp1 + input0_data[j]; | ||||
| const int32_t input1_val = zp2 + input1_data[0]; | const int32_t input1_val = zp2 + input1_data[0]; | ||||
| int32_t mul_result = RoundingDivideByPOT( | |||||
| SaturatingRoundingDoublingHighMul(input0_val * input1_val * (1 << para.shift_left_), para.output_multiplier_), | |||||
| para.shift_right_); | |||||
| mul_result += para.out_quant_arg_.zp_; | |||||
| mul_result = mul_result < para.output_activation_max_ ? mul_result : para.output_activation_max_; | |||||
| mul_result = mul_result > para.output_activation_min_ ? mul_result : para.output_activation_min_; | |||||
| int32_t mul_result = | |||||
| RoundingDivideByPOT(SaturatingRoundingDoublingHighMul(input0_val * input1_val * (1 << quant_arg->shift_left_), | |||||
| quant_arg->output_multiplier_), | |||||
| quant_arg->shift_right_); | |||||
| mul_result += quant_arg->out_quant_arg_.zp_; | |||||
| mul_result = mul_result < quant_arg->output_activation_max_ ? mul_result : quant_arg->output_activation_max_; | |||||
| mul_result = mul_result > quant_arg->output_activation_min_ ? mul_result : quant_arg->output_activation_min_; | |||||
| output_data[0] = (int8_t)mul_result; | output_data[0] = (int8_t)mul_result; | ||||
| input1_data++; | input1_data++; | ||||
| output_data++; | output_data++; | ||||
| @@ -214,21 +215,23 @@ void FastMul(int8_t *input0_data, int8_t *input1_data, int8_t *output_data, int | |||||
| return; | return; | ||||
| } | } | ||||
| void Mul(int8_t *input0_data, int8_t *input1_data, int8_t *output_data, int64_t real_dst_count, MulQuantArg para) { | |||||
| void Mul(int8_t *input0_data, int8_t *input1_data, int8_t *output_data, int64_t real_dst_count, | |||||
| MulQuantArg *quant_arg) { | |||||
| int index = 0; | int index = 0; | ||||
| #ifdef ENABLE_NEON | #ifdef ENABLE_NEON | ||||
| MulInt8NEON(input0_data, input1_data, output_data, real_dst_count, para, &index); | |||||
| MulInt8NEON(input0_data, input1_data, output_data, real_dst_count, quant_arg, &index); | |||||
| #endif | #endif | ||||
| for (; index < real_dst_count; ++index) { | for (; index < real_dst_count; ++index) { | ||||
| const int32_t input0_val = para.in_quant_args_[0].zp_ + input0_data[index]; | |||||
| const int32_t input1_val = para.in_quant_args_[1].zp_ + input1_data[index]; | |||||
| int32_t mul_result = RoundingDivideByPOT( | |||||
| SaturatingRoundingDoublingHighMul(input0_val * input1_val * (1 << para.shift_left_), para.output_multiplier_), | |||||
| para.shift_right_); | |||||
| mul_result += para.out_quant_arg_.zp_; | |||||
| mul_result = mul_result < para.output_activation_max_ ? mul_result : para.output_activation_max_; | |||||
| mul_result = mul_result > para.output_activation_min_ ? mul_result : para.output_activation_min_; | |||||
| const int32_t input0_val = quant_arg->in_quant_args_[0].zp_ + input0_data[index]; | |||||
| const int32_t input1_val = quant_arg->in_quant_args_[1].zp_ + input1_data[index]; | |||||
| int32_t mul_result = | |||||
| RoundingDivideByPOT(SaturatingRoundingDoublingHighMul(input0_val * input1_val * (1 << quant_arg->shift_left_), | |||||
| quant_arg->output_multiplier_), | |||||
| quant_arg->shift_right_); | |||||
| mul_result += quant_arg->out_quant_arg_.zp_; | |||||
| mul_result = mul_result < quant_arg->output_activation_max_ ? mul_result : quant_arg->output_activation_max_; | |||||
| mul_result = mul_result > quant_arg->output_activation_min_ ? mul_result : quant_arg->output_activation_min_; | |||||
| output_data[index] = (int8_t)mul_result; | output_data[index] = (int8_t)mul_result; | ||||
| } | } | ||||
| return; | return; | ||||
| @@ -28,9 +28,9 @@ | |||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| extern "C" { | extern "C" { | ||||
| #endif | #endif | ||||
| void Mul(int8_t *input0_data, int8_t *input1_data, int8_t *output_data, int64_t real_dst_count, MulQuantArg para); | |||||
| void Mul(int8_t *input0_data, int8_t *input1_data, int8_t *output_data, int64_t real_dst_count, MulQuantArg *quant_arg); | |||||
| void FastMul(int8_t *input0_data, int8_t *input1_data, int8_t *output_data, int depth, int64_t real_dst_count, | void FastMul(int8_t *input0_data, int8_t *input1_data, int8_t *output_data, int depth, int64_t real_dst_count, | ||||
| bool input1_broad, MulQuantArg para); | |||||
| bool input1_broad, MulQuantArg *quant_arg); | |||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| } | } | ||||
| #endif | #endif | ||||
| @@ -17,7 +17,7 @@ | |||||
| #include "nnacl/int8/softmax_int8.h" | #include "nnacl/int8/softmax_int8.h" | ||||
| int SoftmaxInt8(const int8_t *input_ptr, int8_t *output_ptr, int count, int *exp_data, int *sum_data, | int SoftmaxInt8(const int8_t *input_ptr, int8_t *output_ptr, int count, int *exp_data, int *sum_data, | ||||
| SoftmaxQuantArg quant_param, SoftmaxParameter *parameter) { | |||||
| SoftmaxQuantArg *quant_param, SoftmaxParameter *parameter) { | |||||
| int32_t axis = parameter->axis_; | int32_t axis = parameter->axis_; | ||||
| int n_dim = parameter->n_dim_; | int n_dim = parameter->n_dim_; | ||||
| int *input_shape = parameter->input_shape_; | int *input_shape = parameter->input_shape_; | ||||
| @@ -32,7 +32,7 @@ int SoftmaxInt8(const int8_t *input_ptr, int8_t *output_ptr, int count, int *exp | |||||
| int outter_offset = o * axis_shape_size * inner_size; | int outter_offset = o * axis_shape_size * inner_size; | ||||
| for (int c = 0; c < inner_size; c++) { | for (int c = 0; c < inner_size; c++) { | ||||
| int8_t max_row = quant_param.output_activation_min_; | |||||
| int8_t max_row = quant_param->output_activation_min_; | |||||
| for (int i = 0; i < axis_shape_size; ++i) { | for (int i = 0; i < axis_shape_size; ++i) { | ||||
| int axis_offset = outter_offset + c + i * inner_size; | int axis_offset = outter_offset + c + i * inner_size; | ||||
| max_row = MSMAX(max_row, input_ptr[axis_offset]); | max_row = MSMAX(max_row, input_ptr[axis_offset]); | ||||
| @@ -43,7 +43,7 @@ int SoftmaxInt8(const int8_t *input_ptr, int8_t *output_ptr, int count, int *exp | |||||
| int axis_offset = outter_offset + c + i * inner_size; | int axis_offset = outter_offset + c + i * inner_size; | ||||
| const int32_t input_val = input_ptr[axis_offset] - max_row; | const int32_t input_val = input_ptr[axis_offset] - max_row; | ||||
| const int32_t input_scaled = SaturatingRoundingDoublingHighMul( | const int32_t input_scaled = SaturatingRoundingDoublingHighMul( | ||||
| input_val * (1 << (unsigned int)quant_param.shift_left_), quant_param.output_multiplier_); | |||||
| input_val * (1 << (unsigned int)quant_param->shift_left_), quant_param->output_multiplier_); | |||||
| int exp_val = exp_on_negative_values(input_scaled, 5); | int exp_val = exp_on_negative_values(input_scaled, 5); | ||||
| exp_data[axis_offset] = exp_val; | exp_data[axis_offset] = exp_val; | ||||
| exp_sum = exp_sum + Rescale(exp_val, 0, 12); | exp_sum = exp_sum + Rescale(exp_val, 0, 12); | ||||
| @@ -58,9 +58,9 @@ int SoftmaxInt8(const int8_t *input_ptr, int8_t *output_ptr, int count, int *exp | |||||
| int unsat_output = RoundingDivideByPOT( | int unsat_output = RoundingDivideByPOT( | ||||
| SaturatingRoundingDoublingHighMul(shifted_scale, exp_data[axis_offset + c]), num_bits_over_unit + 31 - 8); | SaturatingRoundingDoublingHighMul(shifted_scale, exp_data[axis_offset + c]), num_bits_over_unit + 31 - 8); | ||||
| int raw_output = unsat_output + quant_param.output_activation_min_; | |||||
| int raw_output = unsat_output + quant_param->output_activation_min_; | |||||
| output_ptr[axis_offset + c] = | output_ptr[axis_offset + c] = | ||||
| (int8_t)MSMAX(quant_param.output_activation_min_, MSMIN(raw_output, quant_param.output_activation_max_)); | |||||
| (int8_t)MSMAX(quant_param->output_activation_min_, MSMIN(raw_output, quant_param->output_activation_max_)); | |||||
| } | } | ||||
| } | } | ||||
| } | } | ||||
| @@ -27,7 +27,7 @@ | |||||
| extern "C" { | extern "C" { | ||||
| #endif | #endif | ||||
| int SoftmaxInt8(const int8_t *input_ptr, int8_t *output_ptr, int count, int *exp_data, int *sum_data, | int SoftmaxInt8(const int8_t *input_ptr, int8_t *output_ptr, int count, int *exp_data, int *sum_data, | ||||
| SoftmaxQuantArg quant_param, SoftmaxParameter *parameter); | |||||
| SoftmaxQuantArg *quant_param, SoftmaxParameter *parameter); | |||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| } | } | ||||
| #endif | #endif | ||||
| @@ -34,7 +34,6 @@ typedef struct MulParameter { | |||||
| OpParameter op_parameter_; | OpParameter op_parameter_; | ||||
| // other parameter | // other parameter | ||||
| int thread_count_; | int thread_count_; | ||||
| MulQuantArg mul_quant_arg_; | |||||
| } MulParameter; | } MulParameter; | ||||
| #endif // MINDSPORE_NNACL_MUL_PARAMETER_H_ | #endif // MINDSPORE_NNACL_MUL_PARAMETER_H_ | ||||
| @@ -69,7 +69,7 @@ class DefaultAllocator : public Allocator { | |||||
| std::unordered_map<void *, MemBuf *> allocatedList_; | std::unordered_map<void *, MemBuf *> allocatedList_; | ||||
| std::multimap<size_t, MemBuf *> freeList_; | std::multimap<size_t, MemBuf *> freeList_; | ||||
| // 6 is empirical value | // 6 is empirical value | ||||
| int shiftFactor_ = 6; | |||||
| unsigned shiftFactor_ = 6; | |||||
| bool lockFlag_ = true; | bool lockFlag_ = true; | ||||
| }; | }; | ||||
| @@ -42,7 +42,7 @@ class ConstantOfShapeCPUKernel : public LiteKernel { | |||||
| private: | private: | ||||
| ConstantOfShapeParameter *param_ = nullptr; | ConstantOfShapeParameter *param_ = nullptr; | ||||
| void *output_ptr_ = nullptr; | void *output_ptr_ = nullptr; | ||||
| int thread_stride_; | |||||
| int thread_stride_ = 0; | |||||
| }; | }; | ||||
| } // namespace mindspore::kernel | } // namespace mindspore::kernel | ||||
| @@ -33,16 +33,22 @@ ConvParameter *CreateNewConvParameter(ConvParameter *parameter) { | |||||
| return conv_parameter; | return conv_parameter; | ||||
| } | } | ||||
| void FreeCurrentConv(ConvParameter *conv_param, const std::vector<lite::Tensor *> *new_inputs, | |||||
| const std::vector<lite::Tensor *> *new_outputs) { | |||||
| void FreeCurrentConv(ConvParameter *conv_param, std::vector<lite::Tensor *> *new_inputs, | |||||
| std::vector<lite::Tensor *> *new_outputs) { | |||||
| if (conv_param != nullptr) { | if (conv_param != nullptr) { | ||||
| free(conv_param); | free(conv_param); | ||||
| } | } | ||||
| for (auto &in_tensor : *new_inputs) { | |||||
| delete in_tensor; | |||||
| if (new_inputs != nullptr) { | |||||
| for (auto &in_tensor : *new_inputs) { | |||||
| delete in_tensor; | |||||
| in_tensor = nullptr; | |||||
| } | |||||
| } | } | ||||
| for (auto &out_tensor : *new_outputs) { | |||||
| delete out_tensor; | |||||
| if (new_outputs != nullptr) { | |||||
| for (auto &out_tensor : *new_outputs) { | |||||
| delete out_tensor; | |||||
| out_tensor = nullptr; | |||||
| } | |||||
| } | } | ||||
| } | } | ||||
| @@ -112,6 +118,7 @@ void GroupConvCreator::FreeGroupConvs() { | |||||
| delete out_tensor; | delete out_tensor; | ||||
| } | } | ||||
| delete sub_conv; | delete sub_conv; | ||||
| sub_conv = nullptr; | |||||
| } | } | ||||
| group_convs_.clear(); | group_convs_.clear(); | ||||
| } | } | ||||
| @@ -37,7 +37,7 @@ class TileCPUKernel : public LiteKernel { | |||||
| int RunSimpleTile(); | int RunSimpleTile(); | ||||
| void ComputeStrides(const int *shape, int *strides, int ndim); | void ComputeStrides(const int *shape, int *strides, int ndim); | ||||
| void FillOneDimTileParam(); | void FillOneDimTileParam(); | ||||
| bool one_dim_tile_; | |||||
| bool one_dim_tile_ = false; | |||||
| uint8_t *input_addr_ = nullptr; | uint8_t *input_addr_ = nullptr; | ||||
| uint8_t *output_addr_ = nullptr; | uint8_t *output_addr_ = nullptr; | ||||
| TileParameter *tile_parameter_ = nullptr; | TileParameter *tile_parameter_ = nullptr; | ||||
| @@ -33,7 +33,7 @@ class GroupConvolutionFP16CPUKernel : public GroupConvolutionBaseCPUKernel { | |||||
| : GroupConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, std::move(group_convs), group_num) { | : GroupConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, std::move(group_convs), group_num) { | ||||
| } // opParameter(in channel, out channel) in this kernel has been split to groups, if | } // opParameter(in channel, out channel) in this kernel has been split to groups, if | ||||
| // you want to get real params, multiply in channel / out channel with group num | // you want to get real params, multiply in channel / out channel with group num | ||||
| ~GroupConvolutionFP16CPUKernel() = default; | |||||
| int SeparateInput(int group_id) override; | int SeparateInput(int group_id) override; | ||||
| int PostConcat(int group_id) override; | int PostConcat(int group_id) override; | ||||
| }; | }; | ||||
| @@ -36,8 +36,8 @@ class BatchToSpaceCPUKernel : public LiteKernel { | |||||
| int Processinput(); | int Processinput(); | ||||
| private: | private: | ||||
| int32_t block_shape_[BATCH_TO_SPACE_BLOCK_SHAPE_SIZE]; | |||||
| int32_t crops_[COMM_SHAPE_SIZE]; | |||||
| int32_t block_shape_[BATCH_TO_SPACE_BLOCK_SHAPE_SIZE] = {0}; | |||||
| int32_t crops_[COMM_SHAPE_SIZE] = {0}; | |||||
| bool no_crop_ = false; | bool no_crop_ = false; | ||||
| }; | }; | ||||
| } // namespace mindspore::kernel | } // namespace mindspore::kernel | ||||
| @@ -25,34 +25,44 @@ using mindspore::lite::RET_OK; | |||||
| using mindspore::schema::PrimitiveType_BroadcastTo; | using mindspore::schema::PrimitiveType_BroadcastTo; | ||||
| namespace mindspore::kernel { | namespace mindspore::kernel { | ||||
| BroadcastToCPUKernel::~BroadcastToCPUKernel() { | |||||
| if (shape_info_ != nullptr) { | |||||
| free(shape_info_); | |||||
| shape_info_ = nullptr; | |||||
| } | |||||
| } | |||||
| int BroadcastToCPUKernel::ReSize() { | int BroadcastToCPUKernel::ReSize() { | ||||
| auto input_shape = in_tensors_.at(0)->shape(); | auto input_shape = in_tensors_.at(0)->shape(); | ||||
| for (size_t i = 0; i < input_shape.size(); ++i) { | for (size_t i = 0; i < input_shape.size(); ++i) { | ||||
| shape_info_.input_shape_[i] = input_shape[i]; | |||||
| shape_info_->input_shape_[i] = input_shape[i]; | |||||
| } | } | ||||
| shape_info_.input_shape_size_ = static_cast<int>(input_shape.size()); | |||||
| shape_info_->input_shape_size_ = static_cast<int>(input_shape.size()); | |||||
| auto output_shape = out_tensors_.at(0)->shape(); | auto output_shape = out_tensors_.at(0)->shape(); | ||||
| for (size_t i = 0; i < output_shape.size(); ++i) { | for (size_t i = 0; i < output_shape.size(); ++i) { | ||||
| shape_info_.output_shape_[i] = output_shape[i]; | |||||
| shape_info_->output_shape_[i] = output_shape[i]; | |||||
| } | } | ||||
| shape_info_.output_shape_size_ = static_cast<int>(output_shape.size()); | |||||
| shape_info_->output_shape_size_ = static_cast<int>(output_shape.size()); | |||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| int BroadcastToCPUKernel::Init() { | int BroadcastToCPUKernel::Init() { | ||||
| shape_info_ = reinterpret_cast<BroadcastShapeInfo *>(malloc(sizeof(BroadcastShapeInfo))); | |||||
| if (shape_info_ == nullptr) { | |||||
| MS_LOG(ERROR) << "Malloc BroadcastShapeInfo failed!"; | |||||
| return RET_ERROR; | |||||
| } | |||||
| if (!InferShapeDone()) { | if (!InferShapeDone()) { | ||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| return ReSize(); | return ReSize(); | ||||
| } | } | ||||
| int BroadcastToCPUKernel::Run() { | int BroadcastToCPUKernel::Run() { | ||||
| const auto input_data = reinterpret_cast<float *>(in_tensors_.at(0)->MutableData()); | const auto input_data = reinterpret_cast<float *>(in_tensors_.at(0)->MutableData()); | ||||
| auto output_data = reinterpret_cast<float *>(out_tensors_.at(0)->MutableData()); | auto output_data = reinterpret_cast<float *>(out_tensors_.at(0)->MutableData()); | ||||
| return BroadcastTo(float, input_data, &shape_info_, output_data); | |||||
| return BroadcastTo(float, input_data, shape_info_, output_data); | |||||
| } | } | ||||
| REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_BroadcastTo, LiteKernelCreator<BroadcastToCPUKernel>) | REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_BroadcastTo, LiteKernelCreator<BroadcastToCPUKernel>) | ||||
| @@ -27,14 +27,14 @@ class BroadcastToCPUKernel : public LiteKernel { | |||||
| BroadcastToCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs, | BroadcastToCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs, | ||||
| const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx) | const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx) | ||||
| : LiteKernel(parameter, inputs, outputs, ctx) {} | : LiteKernel(parameter, inputs, outputs, ctx) {} | ||||
| ~BroadcastToCPUKernel() = default; | |||||
| ~BroadcastToCPUKernel(); | |||||
| int Init() override; | int Init() override; | ||||
| int ReSize() override; | int ReSize() override; | ||||
| int Run() override; | int Run() override; | ||||
| private: | private: | ||||
| BroadcastShapeInfo shape_info_; | |||||
| BroadcastShapeInfo *shape_info_ = nullptr; | |||||
| }; | }; | ||||
| } // namespace mindspore::kernel | } // namespace mindspore::kernel | ||||
| @@ -168,9 +168,13 @@ kernel::LiteKernel *ConvolutionDelegateCPUKernel::CpuConvFp32KernelSelect() { | |||||
| kernel::LiteKernel *CpuConvDwFp32KernelCreator(const std::vector<lite::Tensor *> &inputs, | kernel::LiteKernel *CpuConvDwFp32KernelCreator(const std::vector<lite::Tensor *> &inputs, | ||||
| const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter, | const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter, | ||||
| const InnerContext *ctx) { | const InnerContext *ctx) { | ||||
| if (opParameter == nullptr) { | |||||
| MS_LOG(ERROR) << "Get null opParameter for CpuConvDwFp32KernelCreator."; | |||||
| return nullptr; | |||||
| } | |||||
| auto conv_param = reinterpret_cast<ConvParameter *>(opParameter); | auto conv_param = reinterpret_cast<ConvParameter *>(opParameter); | ||||
| kernel::LiteKernel *kernel = nullptr; | kernel::LiteKernel *kernel = nullptr; | ||||
| if (opParameter != nullptr && opParameter->infer_flag_) { | |||||
| if (opParameter->infer_flag_) { | |||||
| #if defined(ENABLE_ARM) || (defined(ENABLE_SSE) && !defined(ENABLE_AVX)) | #if defined(ENABLE_ARM) || (defined(ENABLE_SSE) && !defined(ENABLE_AVX)) | ||||
| if (CheckConvDw1DWinograd(conv_param, ctx->thread_num_)) { | if (CheckConvDw1DWinograd(conv_param, ctx->thread_num_)) { | ||||
| kernel = new (std::nothrow) kernel::ConvolutionDepthwise3x3CPUKernel(opParameter, inputs, outputs, ctx); | kernel = new (std::nothrow) kernel::ConvolutionDepthwise3x3CPUKernel(opParameter, inputs, outputs, ctx); | ||||
| @@ -209,9 +213,14 @@ kernel::LiteKernel *CpuGroupConvFp32KernelCreator(const std::vector<lite::Tensor | |||||
| group_conv_creator.get_group_conv()->emplace_back(new (std::nothrow) ConvolutionDelegateCPUKernel( | group_conv_creator.get_group_conv()->emplace_back(new (std::nothrow) ConvolutionDelegateCPUKernel( | ||||
| reinterpret_cast<OpParameter *>(new_conv_param), new_inputs, new_outputs, ctx)); | reinterpret_cast<OpParameter *>(new_conv_param), new_inputs, new_outputs, ctx)); | ||||
| } | } | ||||
| return new (std::nothrow) | |||||
| auto group_kernel = new (std::nothrow) | |||||
| GroupConvolutionFp32CPUKernel(op_parameter, inputs, outputs, ctx, *(group_conv_creator.get_group_conv()), | GroupConvolutionFp32CPUKernel(op_parameter, inputs, outputs, ctx, *(group_conv_creator.get_group_conv()), | ||||
| reinterpret_cast<ConvParameter *>(op_parameter)->group_); | reinterpret_cast<ConvParameter *>(op_parameter)->group_); | ||||
| if (group_kernel == nullptr) { | |||||
| MS_LOG(ERROR) << "New GroupConvolutionFp32CPUKernel failed."; | |||||
| return nullptr; | |||||
| } | |||||
| return group_kernel; | |||||
| } | } | ||||
| /* creator func */ | /* creator func */ | ||||
| @@ -38,11 +38,11 @@ namespace mindspore::kernel { | |||||
| int ConvolutionCPUKernel::InitWeightBias() { | int ConvolutionCPUKernel::InitWeightBias() { | ||||
| auto filter_tensor = in_tensors_.at(kWeightIndex); | auto filter_tensor = in_tensors_.at(kWeightIndex); | ||||
| int in_channel = filter_tensor->Channel(); | |||||
| int out_channel = filter_tensor->Batch(); | |||||
| size_t in_channel = filter_tensor->Channel(); | |||||
| size_t out_channel = filter_tensor->Batch(); | |||||
| conv_param_->input_channel_ = in_channel; | conv_param_->input_channel_ = in_channel; | ||||
| conv_param_->output_channel_ = out_channel; | conv_param_->output_channel_ = out_channel; | ||||
| int kernel_plane = filter_tensor->Height() * filter_tensor->Width(); | |||||
| size_t kernel_plane = filter_tensor->Height() * filter_tensor->Width(); | |||||
| int oc_block_num = UP_ROUND(out_channel, OC_BLOCK); | int oc_block_num = UP_ROUND(out_channel, OC_BLOCK); | ||||
| int pack_weight_size = oc_block_num * in_channel * kernel_plane; | int pack_weight_size = oc_block_num * in_channel * kernel_plane; | ||||
| @@ -25,8 +25,8 @@ using mindspore::lite::RET_MEMORY_FAILED; | |||||
| using mindspore::lite::RET_OK; | using mindspore::lite::RET_OK; | ||||
| namespace mindspore::kernel { | namespace mindspore::kernel { | ||||
| int ConvolutionWinogradCPUKernel::WinogradFilterTransform(const float *weight_data, float *matrix_g, float *matrix_gt, | |||||
| int oc_block) { | |||||
| int ConvolutionWinogradCPUKernel::WinogradFilterTransform(const float *weight_data, float *matrix_g, | |||||
| const float *matrix_gt, int oc_block) { | |||||
| if (oc_block == 0) { | if (oc_block == 0) { | ||||
| MS_LOG(ERROR) << "Divide by zero"; | MS_LOG(ERROR) << "Divide by zero"; | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| @@ -48,7 +48,7 @@ class ConvolutionWinogradCPUKernel : public ConvolutionBaseCPUKernel { | |||||
| int InitWeightBias(); | int InitWeightBias(); | ||||
| int InitTmpBuffer(); | int InitTmpBuffer(); | ||||
| int ConfigInputOutput(); | int ConfigInputOutput(); | ||||
| int WinogradFilterTransform(const float *weight_data, float *matrix_g, float *matrix_gt, int oc_block); | |||||
| int WinogradFilterTransform(const float *weight_data, float *matrix_g, const float *matrix_gt, int oc_block); | |||||
| private: | private: | ||||
| void FreeTmpBuffer() { | void FreeTmpBuffer() { | ||||
| @@ -82,8 +82,8 @@ class ConvolutionWinogradCPUKernel : public ConvolutionBaseCPUKernel { | |||||
| float *col_buffer_ = nullptr; | float *col_buffer_ = nullptr; | ||||
| float *trans_weight_ = nullptr; | float *trans_weight_ = nullptr; | ||||
| TmpBufferAddress tmp_buffer_address_list_[4]; | TmpBufferAddress tmp_buffer_address_list_[4]; | ||||
| InputTransFunc in_func_; | |||||
| OutputTransFunc out_func_; | |||||
| InputTransFunc in_func_ = nullptr; | |||||
| OutputTransFunc out_func_ = nullptr; | |||||
| }; | }; | ||||
| } // namespace mindspore::kernel | } // namespace mindspore::kernel | ||||
| @@ -32,6 +32,7 @@ class GroupConvolutionFp32CPUKernel : public GroupConvolutionBaseCPUKernel { | |||||
| : GroupConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, std::move(group_convs), group_num) { | : GroupConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, std::move(group_convs), group_num) { | ||||
| } // opParameter(in channel, out channel) in this kernel has been split to groups, if | } // opParameter(in channel, out channel) in this kernel has been split to groups, if | ||||
| // you want to get real params, multiply in channel / out channel with group num | // you want to get real params, multiply in channel / out channel with group num | ||||
| ~GroupConvolutionFp32CPUKernel() = default; | |||||
| int SeparateInput(int group_id) override; | int SeparateInput(int group_id) override; | ||||
| int PostConcat(int group_id) override; | int PostConcat(int group_id) override; | ||||
| }; | }; | ||||
| @@ -51,7 +51,7 @@ class TransposeCPUKernel : public LiteKernel { | |||||
| TransposeParameter *param_ = nullptr; | TransposeParameter *param_ = nullptr; | ||||
| TransposeFunc NHNCTransposeFunc_ = nullptr; | TransposeFunc NHNCTransposeFunc_ = nullptr; | ||||
| int thread_count_ = 0; | int thread_count_ = 0; | ||||
| int nhnc_param_[3]; | |||||
| int nhnc_param_[3] = {0}; | |||||
| int dims_ = 0; | int dims_ = 0; | ||||
| }; | }; | ||||
| } // namespace mindspore::kernel | } // namespace mindspore::kernel | ||||
| @@ -35,6 +35,7 @@ class PhiloxRandom { | |||||
| counter_[2] = static_cast<uint32_t>(seed_hi); | counter_[2] = static_cast<uint32_t>(seed_hi); | ||||
| counter_[3] = static_cast<uint32_t>(seed_hi >> 32); | counter_[3] = static_cast<uint32_t>(seed_hi >> 32); | ||||
| } | } | ||||
| ~PhiloxRandom() = default; | |||||
| // Skip the specified number of samples of 128-bits in the current stream. | // Skip the specified number of samples of 128-bits in the current stream. | ||||
| void Skip(uint64_t count) { | void Skip(uint64_t count) { | ||||
| @@ -27,40 +27,52 @@ using mindspore::lite::RET_OK; | |||||
| using mindspore::schema::PrimitiveType_AddFusion; | using mindspore::schema::PrimitiveType_AddFusion; | ||||
| namespace mindspore::kernel { | namespace mindspore::kernel { | ||||
| QuantizedAddCPUKernel::~QuantizedAddCPUKernel() { | |||||
| if (para_ != nullptr) { | |||||
| free(para_); | |||||
| para_ = nullptr; | |||||
| } | |||||
| } | |||||
| int QuantizedAddCPUKernel::Init() { | int QuantizedAddCPUKernel::Init() { | ||||
| para_ = reinterpret_cast<AddQuantParameter *>(malloc(sizeof(AddQuantParameter))); | |||||
| if (para_ == nullptr) { | |||||
| MS_LOG(ERROR) << "Malloc AddQuantParameter for add int8 op failed!"; | |||||
| return RET_ERROR; | |||||
| } | |||||
| auto *input0 = in_tensors_.at(0); | auto *input0 = in_tensors_.at(0); | ||||
| auto *input1 = in_tensors_.at(1); | auto *input1 = in_tensors_.at(1); | ||||
| auto *output = out_tensors_.at(0); | auto *output = out_tensors_.at(0); | ||||
| para_.in0_args_.zp_ = input0->quant_params().front().zeroPoint * -1; | |||||
| para_.in1_args_.zp_ = input1->quant_params().front().zeroPoint * -1; | |||||
| para_.out_zp_ = output->quant_params().front().zeroPoint; | |||||
| para_->in0_args_.zp_ = input0->quant_params().front().zeroPoint * -1; | |||||
| para_->in1_args_.zp_ = input1->quant_params().front().zeroPoint * -1; | |||||
| para_->out_zp_ = output->quant_params().front().zeroPoint; | |||||
| const double in0_scale = input0->quant_params().front().scale; | const double in0_scale = input0->quant_params().front().scale; | ||||
| const double in1_scale = input1->quant_params().front().scale; | const double in1_scale = input1->quant_params().front().scale; | ||||
| const double out_scale = output->quant_params().front().scale; | const double out_scale = output->quant_params().front().scale; | ||||
| para_.left_shift_ = 20; | |||||
| para_->left_shift_ = 20; | |||||
| const double twice_max_input_scale = 2 * std::max(in0_scale, in1_scale); | const double twice_max_input_scale = 2 * std::max(in0_scale, in1_scale); | ||||
| const double in0_multiplier = in0_scale / twice_max_input_scale; | const double in0_multiplier = in0_scale / twice_max_input_scale; | ||||
| const double in1_multiplier = in1_scale / twice_max_input_scale; | const double in1_multiplier = in1_scale / twice_max_input_scale; | ||||
| const double out_multiplier = twice_max_input_scale / ((1 << para_.left_shift_) * out_scale); | |||||
| const double out_multiplier = twice_max_input_scale / ((1 << para_->left_shift_) * out_scale); | |||||
| QuantizeMultiplierSmallerThanOne(in0_multiplier, ¶_.in0_args_.multiplier_, ¶_.in0_args_.left_shift_); | |||||
| QuantizeMultiplierSmallerThanOne(in1_multiplier, ¶_.in1_args_.multiplier_, ¶_.in1_args_.left_shift_); | |||||
| QuantizeMultiplierSmallerThanOne(out_multiplier, ¶_.out_multiplier_, ¶_.out_left_shift_); | |||||
| QuantizeMultiplierSmallerThanOne(in0_multiplier, &(para_->in0_args_.multiplier_), &(para_->in0_args_.left_shift_)); | |||||
| QuantizeMultiplierSmallerThanOne(in1_multiplier, &(para_->in1_args_.multiplier_), &(para_->in1_args_.left_shift_)); | |||||
| QuantizeMultiplierSmallerThanOne(out_multiplier, &(para_->out_multiplier_), &(para_->out_left_shift_)); | |||||
| para_.in0_args_.right_shift_ = -para_.in0_args_.left_shift_ > 0 ? 0 : para_.in0_args_.left_shift_; | |||||
| para_.in1_args_.right_shift_ = -para_.in1_args_.left_shift_ > 0 ? 0 : para_.in1_args_.left_shift_; | |||||
| para_.out_right_shift_ = -para_.out_left_shift_ > 0 ? 0 : para_.out_left_shift_; | |||||
| para_->in0_args_.right_shift_ = -para_->in0_args_.left_shift_ > 0 ? 0 : para_->in0_args_.left_shift_; | |||||
| para_->in1_args_.right_shift_ = -para_->in1_args_.left_shift_ > 0 ? 0 : para_->in1_args_.left_shift_; | |||||
| para_->out_right_shift_ = -para_->out_left_shift_ > 0 ? 0 : para_->out_left_shift_; | |||||
| para_.in0_args_.left_shift_ = -para_.in0_args_.left_shift_ > 0 ? -para_.in0_args_.left_shift_ : 0; | |||||
| para_.in1_args_.left_shift_ = -para_.in1_args_.left_shift_ > 0 ? -para_.in1_args_.left_shift_ : 0; | |||||
| para_.out_left_shift_ = -para_.out_left_shift_ > 0 ? -para_.out_left_shift_ : 0; | |||||
| para_->in0_args_.left_shift_ = -para_->in0_args_.left_shift_ > 0 ? -para_->in0_args_.left_shift_ : 0; | |||||
| para_->in1_args_.left_shift_ = -para_->in1_args_.left_shift_ > 0 ? -para_->in1_args_.left_shift_ : 0; | |||||
| para_->out_left_shift_ = -para_->out_left_shift_ > 0 ? -para_->out_left_shift_ : 0; | |||||
| auto act = arith_para_->activation_type_; | auto act = arith_para_->activation_type_; | ||||
| CalculateActivationRangeQuantized(act == ActType_Relu, act == ActType_Relu6, para_.out_zp_, | |||||
| static_cast<float>(out_scale), ¶_.min_, ¶_.max_); | |||||
| CalculateActivationRangeQuantized(act == ActType_Relu, act == ActType_Relu6, para_->out_zp_, | |||||
| static_cast<float>(out_scale), &(para_->min_), &(para_->max_)); | |||||
| if (!InferShapeDone()) { | if (!InferShapeDone()) { | ||||
| return RET_OK; | return RET_OK; | ||||
| @@ -154,9 +166,9 @@ void QuantizedAddCPUKernel::BroadcastRun(int task_id) { | |||||
| cur_out = output_data_ + task_id * stride * in_size_ + i * in_size_; | cur_out = output_data_ + task_id * stride * in_size_ + i * in_size_; | ||||
| } | } | ||||
| #ifdef ENABLE_AVX | #ifdef ENABLE_AVX | ||||
| AddInt8_AVX2(cur_in0, cur_in1, cur_out, in_size_, ¶_); | |||||
| AddInt8_AVX2(cur_in0, cur_in1, cur_out, in_size_, para_); | |||||
| #else | #else | ||||
| AddInt8(cur_in0, cur_in1, cur_out, in_size_, ¶_); | |||||
| AddInt8(cur_in0, cur_in1, cur_out, in_size_, para_); | |||||
| #endif | #endif | ||||
| } | } | ||||
| return; | return; | ||||
| @@ -182,18 +194,18 @@ int QuantizedAddCPUKernel::DoExecute(int task_id) { | |||||
| if (support_opt_add_) { | if (support_opt_add_) { | ||||
| int8_t *ptr_in = arith_para_->in_elements_num0_ == 1 ? cur_in1 : cur_in0; | int8_t *ptr_in = arith_para_->in_elements_num0_ == 1 ? cur_in1 : cur_in0; | ||||
| int8_t element_in = arith_para_->in_elements_num0_ == 1 ? input0_data_[0] : input1_data_[0]; | int8_t element_in = arith_para_->in_elements_num0_ == 1 ? input0_data_[0] : input1_data_[0]; | ||||
| AddQuantQrgs *ptr_args = arith_para_->in_elements_num0_ == 1 ? ¶_.in1_args_ : ¶_.in0_args_; | |||||
| AddQuantQrgs *ele_args = arith_para_->in_elements_num0_ == 1 ? ¶_.in0_args_ : ¶_.in1_args_; | |||||
| AddQuantQrgs *ptr_args = arith_para_->in_elements_num0_ == 1 ? &(para_->in1_args_) : &(para_->in0_args_); | |||||
| AddQuantQrgs *ele_args = arith_para_->in_elements_num0_ == 1 ? &(para_->in0_args_) : &(para_->in1_args_); | |||||
| #ifdef ENABLE_AVX | #ifdef ENABLE_AVX | ||||
| AddOptInt8_AVX2(ptr_in, element_in, cur_out, rest_count, ¶_, ptr_args, ele_args); | |||||
| AddOptInt8_AVX2(ptr_in, element_in, cur_out, rest_count, para_, ptr_args, ele_args); | |||||
| #else | #else | ||||
| AddOptInt8(ptr_in, element_in, cur_out, rest_count, ¶_, ptr_args, ele_args); | |||||
| AddOptInt8(ptr_in, element_in, cur_out, rest_count, para_, ptr_args, ele_args); | |||||
| #endif | #endif | ||||
| } else { | } else { | ||||
| #ifdef ENABLE_AVX | #ifdef ENABLE_AVX | ||||
| AddInt8_AVX2(cur_in0, cur_in1, cur_out, rest_count, ¶_); | |||||
| AddInt8_AVX2(cur_in0, cur_in1, cur_out, rest_count, para_); | |||||
| #else | #else | ||||
| AddInt8(cur_in0, cur_in1, cur_out, rest_count, ¶_); | |||||
| AddInt8(cur_in0, cur_in1, cur_out, rest_count, para_); | |||||
| #endif | #endif | ||||
| } | } | ||||
| @@ -32,7 +32,7 @@ class QuantizedAddCPUKernel : public LiteKernel { | |||||
| : LiteKernel(parameter, inputs, outputs, ctx) { | : LiteKernel(parameter, inputs, outputs, ctx) { | ||||
| arith_para_ = reinterpret_cast<ArithmeticParameter *>(parameter); | arith_para_ = reinterpret_cast<ArithmeticParameter *>(parameter); | ||||
| } | } | ||||
| ~QuantizedAddCPUKernel() override = default; | |||||
| ~QuantizedAddCPUKernel() override; | |||||
| int Init() override; | int Init() override; | ||||
| int ReSize() override; | int ReSize() override; | ||||
| @@ -43,7 +43,7 @@ class QuantizedAddCPUKernel : public LiteKernel { | |||||
| void BroadcastRun(int task_id); | void BroadcastRun(int task_id); | ||||
| private: | private: | ||||
| AddQuantParameter para_; | |||||
| AddQuantParameter *para_ = nullptr; | |||||
| ArithmeticParameter *arith_para_ = nullptr; | ArithmeticParameter *arith_para_ = nullptr; | ||||
| int in_size_ = 0; | int in_size_ = 0; | ||||
| int out_size_ = 0; | int out_size_ = 0; | ||||
| @@ -27,18 +27,39 @@ using mindspore::schema::PrimitiveType_ArgMaxFusion; | |||||
| using mindspore::schema::PrimitiveType_ArgMinFusion; | using mindspore::schema::PrimitiveType_ArgMinFusion; | ||||
| namespace mindspore::kernel { | namespace mindspore::kernel { | ||||
| ArgMinMaxInt8CPUKernel::~ArgMinMaxInt8CPUKernel() { | |||||
| if (in_quant_arg_ != nullptr) { | |||||
| free(in_quant_arg_); | |||||
| in_quant_arg_ = nullptr; | |||||
| } | |||||
| if (out_quant_arg_ != nullptr) { | |||||
| free(out_quant_arg_); | |||||
| out_quant_arg_ = nullptr; | |||||
| } | |||||
| } | |||||
| int ArgMinMaxInt8CPUKernel::Init() { | int ArgMinMaxInt8CPUKernel::Init() { | ||||
| auto param = reinterpret_cast<ArgMinMaxParameter *>(op_parameter_); | auto param = reinterpret_cast<ArgMinMaxParameter *>(op_parameter_); | ||||
| param->data_type_ = kNumberTypeInt8; | param->data_type_ = kNumberTypeInt8; | ||||
| in_quant_arg_ = reinterpret_cast<QuantArg *>(malloc(sizeof(QuantArg))); | |||||
| if (in_quant_arg_ == nullptr) { | |||||
| MS_LOG(ERROR) << "Malloc QuantArg for argmin or argmax int8 op failed!"; | |||||
| return RET_ERROR; | |||||
| } | |||||
| auto *input_tensor = in_tensors_.at(kInputIndex); | auto *input_tensor = in_tensors_.at(kInputIndex); | ||||
| auto in_quant_args = input_tensor->quant_params(); | auto in_quant_args = input_tensor->quant_params(); | ||||
| in_quant_arg_.scale_ = in_quant_args.front().scale; | |||||
| in_quant_arg_.zp_ = in_quant_args.front().zeroPoint; | |||||
| in_quant_arg_->scale_ = in_quant_args.front().scale; | |||||
| in_quant_arg_->zp_ = in_quant_args.front().zeroPoint; | |||||
| auto *out_tensor = out_tensors_.at(kOutputIndex); | auto *out_tensor = out_tensors_.at(kOutputIndex); | ||||
| auto out_quant_args = out_tensor->quant_params(); | auto out_quant_args = out_tensor->quant_params(); | ||||
| out_quant_arg_.scale_ = out_quant_args.front().scale; | |||||
| out_quant_arg_.zp_ = out_quant_args.front().zeroPoint; | |||||
| out_quant_arg_->scale_ = out_quant_args.front().scale; | |||||
| out_quant_arg_->zp_ = out_quant_args.front().zeroPoint; | |||||
| out_quant_arg_ = reinterpret_cast<QuantArg *>(malloc(sizeof(QuantArg))); | |||||
| if (out_quant_arg_ == nullptr) { | |||||
| MS_LOG(ERROR) << "Malloc QuantArg for argmin or argmax int8 op failed!"; | |||||
| return RET_ERROR; | |||||
| } | |||||
| if (!InferShapeDone()) { | if (!InferShapeDone()) { | ||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| @@ -72,22 +93,22 @@ int ArgMinMaxInt8CPUKernel::Run() { | |||||
| auto in_shape = input->shape(); | auto in_shape = input->shape(); | ||||
| auto param = reinterpret_cast<ArgMinMaxParameter *>(op_parameter_); | auto param = reinterpret_cast<ArgMinMaxParameter *>(op_parameter_); | ||||
| if (param->topk_ == 1) { | if (param->topk_ == 1) { | ||||
| Int8ArgMinMaxQuant(input_data, output_data, in_shape.data(), param, &in_quant_arg_, &out_quant_arg_); | |||||
| Int8ArgMinMaxQuant(input_data, output_data, in_shape.data(), param, in_quant_arg_, out_quant_arg_); | |||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| switch (param->axis_) { | switch (param->axis_) { | ||||
| case 0: | case 0: | ||||
| Int8ArgMinMaxDim0(input_data, output_data, in_shape.data(), param, &in_quant_arg_, &out_quant_arg_); | |||||
| Int8ArgMinMaxDim0(input_data, output_data, in_shape.data(), param, in_quant_arg_, out_quant_arg_); | |||||
| break; | break; | ||||
| case 1: | case 1: | ||||
| Int8ArgMinMaxDim1(input_data, output_data, in_shape.data(), param, &in_quant_arg_, &out_quant_arg_); | |||||
| Int8ArgMinMaxDim1(input_data, output_data, in_shape.data(), param, in_quant_arg_, out_quant_arg_); | |||||
| break; | break; | ||||
| case 2: | case 2: | ||||
| Int8ArgMinMaxDim2(input_data, output_data, in_shape.data(), param, &in_quant_arg_, &out_quant_arg_); | |||||
| Int8ArgMinMaxDim2(input_data, output_data, in_shape.data(), param, in_quant_arg_, out_quant_arg_); | |||||
| break; | break; | ||||
| case 3: | case 3: | ||||
| Int8ArgMinMaxDim3(input_data, output_data, in_shape.data(), param, &in_quant_arg_, &out_quant_arg_); | |||||
| Int8ArgMinMaxDim3(input_data, output_data, in_shape.data(), param, in_quant_arg_, out_quant_arg_); | |||||
| break; | break; | ||||
| default: | default: | ||||
| MS_LOG(ERROR) << "axis is invalid"; | MS_LOG(ERROR) << "axis is invalid"; | ||||
| @@ -30,15 +30,15 @@ class ArgMinMaxInt8CPUKernel : public LiteKernel { | |||||
| const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx) | const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx) | ||||
| : LiteKernel(parameter, inputs, outputs, ctx) {} | : LiteKernel(parameter, inputs, outputs, ctx) {} | ||||
| ~ArgMinMaxInt8CPUKernel() = default; | |||||
| ~ArgMinMaxInt8CPUKernel() override; | |||||
| int Init() override; | int Init() override; | ||||
| int ReSize() override; | int ReSize() override; | ||||
| int Run() override; | int Run() override; | ||||
| private: | private: | ||||
| QuantArg in_quant_arg_; | |||||
| QuantArg out_quant_arg_; | |||||
| QuantArg *in_quant_arg_ = nullptr; | |||||
| QuantArg *out_quant_arg_ = nullptr; | |||||
| }; | }; | ||||
| } // namespace mindspore::kernel | } // namespace mindspore::kernel | ||||
| @@ -24,18 +24,38 @@ using mindspore::schema::PrimitiveType_BatchToSpace; | |||||
| using mindspore::schema::PrimitiveType_BatchToSpaceND; | using mindspore::schema::PrimitiveType_BatchToSpaceND; | ||||
| namespace mindspore::kernel { | namespace mindspore::kernel { | ||||
| BatchToSpaceInt8CPUKernel::~BatchToSpaceInt8CPUKernel() { | |||||
| if (in_quant_arg_ != nullptr) { | |||||
| free(in_quant_arg_); | |||||
| in_quant_arg_ = nullptr; | |||||
| } | |||||
| if (out_quant_arg_ != nullptr) { | |||||
| free(out_quant_arg_); | |||||
| out_quant_arg_ = nullptr; | |||||
| } | |||||
| } | |||||
| int BatchToSpaceInt8CPUKernel::Init() { | int BatchToSpaceInt8CPUKernel::Init() { | ||||
| MS_ASSERT(in_tensors_.at(0)->format() == schema::Format::Format_NHWC); | MS_ASSERT(in_tensors_.at(0)->format() == schema::Format::Format_NHWC); | ||||
| in_quant_arg_ = reinterpret_cast<QuantArg *>(malloc(sizeof(QuantArg))); | |||||
| if (in_quant_arg_ == nullptr) { | |||||
| MS_LOG(ERROR) << "Malloc QuantArg for BatchToSpace int8 op failed!"; | |||||
| return RET_ERROR; | |||||
| } | |||||
| auto *input_tensor = in_tensors_.at(kInputIndex); | auto *input_tensor = in_tensors_.at(kInputIndex); | ||||
| auto in_quant_args = input_tensor->quant_params(); | auto in_quant_args = input_tensor->quant_params(); | ||||
| in_quant_arg_.scale_ = in_quant_args.front().scale; | |||||
| in_quant_arg_.zp_ = in_quant_args.front().zeroPoint; | |||||
| in_quant_arg_->scale_ = in_quant_args.front().scale; | |||||
| in_quant_arg_->zp_ = in_quant_args.front().zeroPoint; | |||||
| out_quant_arg_ = reinterpret_cast<QuantArg *>(malloc(sizeof(QuantArg))); | |||||
| if (out_quant_arg_ == nullptr) { | |||||
| MS_LOG(ERROR) << "Malloc QuantArg for BatchToSpace int8 op failed!"; | |||||
| return RET_ERROR; | |||||
| } | |||||
| auto *out_tensor = out_tensors_.at(kOutputIndex); | auto *out_tensor = out_tensors_.at(kOutputIndex); | ||||
| auto out_quant_args = out_tensor->quant_params(); | auto out_quant_args = out_tensor->quant_params(); | ||||
| out_quant_arg_.scale_ = out_quant_args.front().scale; | |||||
| out_quant_arg_.zp_ = out_quant_args.front().zeroPoint; | |||||
| out_quant_arg_->scale_ = out_quant_args.front().scale; | |||||
| out_quant_arg_->zp_ = out_quant_args.front().zeroPoint; | |||||
| if (!InferShapeDone()) { | if (!InferShapeDone()) { | ||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| @@ -56,7 +76,7 @@ int BatchToSpaceInt8CPUKernel::Run() { | |||||
| auto out_shape = output->shape(); | auto out_shape = output->shape(); | ||||
| BatchToSpaceParameter *param = reinterpret_cast<BatchToSpaceParameter *>(this->op_parameter_); | BatchToSpaceParameter *param = reinterpret_cast<BatchToSpaceParameter *>(this->op_parameter_); | ||||
| if (in_quant_arg_.scale_ == out_quant_arg_.scale_ && in_quant_arg_.zp_ == out_quant_arg_.zp_) { | |||||
| if (in_quant_arg_->scale_ == out_quant_arg_->scale_ && in_quant_arg_->zp_ == out_quant_arg_->zp_) { | |||||
| if (param->no_crop_) { | if (param->no_crop_) { | ||||
| BatchToSpaceNoCropForNHWC(input_data, output_data, in_shape.data(), out_shape[0], param->block_shape_, | BatchToSpaceNoCropForNHWC(input_data, output_data, in_shape.data(), out_shape[0], param->block_shape_, | ||||
| sizeof(int8_t)); | sizeof(int8_t)); | ||||
| @@ -67,10 +87,10 @@ int BatchToSpaceInt8CPUKernel::Run() { | |||||
| } else { | } else { | ||||
| if (param->no_crop_) { | if (param->no_crop_) { | ||||
| BatchToSpaceNoCropForNHWCInt8(input_data, output_data, in_shape.data(), out_shape[0], param->block_shape_, | BatchToSpaceNoCropForNHWCInt8(input_data, output_data, in_shape.data(), out_shape[0], param->block_shape_, | ||||
| &in_quant_arg_, &out_quant_arg_); | |||||
| in_quant_arg_, out_quant_arg_); | |||||
| } else { | } else { | ||||
| BatchToSpaceForNHWCInt8(input_data, output_data, in_shape.data(), out_shape[0], param->block_shape_, | BatchToSpaceForNHWCInt8(input_data, output_data, in_shape.data(), out_shape[0], param->block_shape_, | ||||
| param->crops_, &in_quant_arg_, &out_quant_arg_); | |||||
| param->crops_, in_quant_arg_, out_quant_arg_); | |||||
| } | } | ||||
| } | } | ||||
| @@ -30,15 +30,15 @@ class BatchToSpaceInt8CPUKernel : public LiteKernel { | |||||
| const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx) | const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx) | ||||
| : LiteKernel(parameter, inputs, outputs, ctx) {} | : LiteKernel(parameter, inputs, outputs, ctx) {} | ||||
| ~BatchToSpaceInt8CPUKernel() = default; | |||||
| ~BatchToSpaceInt8CPUKernel() override; | |||||
| int Init() override; | int Init() override; | ||||
| int ReSize() override; | int ReSize() override; | ||||
| int Run() override; | int Run() override; | ||||
| private: | private: | ||||
| QuantArg in_quant_arg_; | |||||
| QuantArg out_quant_arg_; | |||||
| QuantArg *in_quant_arg_ = nullptr; | |||||
| QuantArg *out_quant_arg_ = nullptr; | |||||
| }; | }; | ||||
| } // namespace mindspore::kernel | } // namespace mindspore::kernel | ||||
| @@ -62,7 +62,7 @@ class DeConvInt8CPUKernel : public ConvolutionBaseCPUKernel { | |||||
| int8_t *output_ptr_ = nullptr; | int8_t *output_ptr_ = nullptr; | ||||
| size_t thread_count_ = 1; | size_t thread_count_ = 1; | ||||
| size_t thread_stride_ = 0; | size_t thread_stride_ = 0; | ||||
| MATMUL_OPT_R4_FUNC matmul_func_; | |||||
| MATMUL_OPT_R4_FUNC matmul_func_ = nullptr; | |||||
| MatMulParameter *matmul_param_ = nullptr; | MatMulParameter *matmul_param_ = nullptr; | ||||
| bool support_optimize_ = true; | bool support_optimize_ = true; | ||||
| }; | }; | ||||
| @@ -25,18 +25,39 @@ using mindspore::lite::RET_PARAM_INVALID; | |||||
| using mindspore::schema::PrimitiveType_DepthToSpace; | using mindspore::schema::PrimitiveType_DepthToSpace; | ||||
| namespace mindspore::kernel { | namespace mindspore::kernel { | ||||
| DepthToSpaceInt8CPUKernel::~DepthToSpaceInt8CPUKernel() { | |||||
| if (in_quant_arg_ != nullptr) { | |||||
| free(in_quant_arg_); | |||||
| in_quant_arg_ = nullptr; | |||||
| } | |||||
| if (out_quant_arg_ != nullptr) { | |||||
| free(out_quant_arg_); | |||||
| out_quant_arg_ = nullptr; | |||||
| } | |||||
| } | |||||
| int DepthToSpaceInt8CPUKernel::Init() { | int DepthToSpaceInt8CPUKernel::Init() { | ||||
| param_->data_type_size_ = sizeof(int8_t); | param_->data_type_size_ = sizeof(int8_t); | ||||
| in_quant_arg_ = reinterpret_cast<QuantArg *>(malloc(sizeof(QuantArg))); | |||||
| if (in_quant_arg_ == nullptr) { | |||||
| MS_LOG(ERROR) << "Malloc QuantArg for DepthToSpace int8 op failed!"; | |||||
| return RET_ERROR; | |||||
| } | |||||
| auto *input_tensor = in_tensors_.at(kInputIndex); | auto *input_tensor = in_tensors_.at(kInputIndex); | ||||
| auto in_quant_args = input_tensor->quant_params(); | auto in_quant_args = input_tensor->quant_params(); | ||||
| in_quant_arg_.scale_ = in_quant_args.front().scale; | |||||
| in_quant_arg_.zp_ = in_quant_args.front().zeroPoint; | |||||
| in_quant_arg_->scale_ = in_quant_args.front().scale; | |||||
| in_quant_arg_->zp_ = in_quant_args.front().zeroPoint; | |||||
| out_quant_arg_ = reinterpret_cast<QuantArg *>(malloc(sizeof(QuantArg))); | |||||
| if (out_quant_arg_ == nullptr) { | |||||
| MS_LOG(ERROR) << "Malloc QuantArg for DepthToSpace int8 op failed!"; | |||||
| return RET_ERROR; | |||||
| } | |||||
| auto *out_tensor = out_tensors_.at(kOutputIndex); | auto *out_tensor = out_tensors_.at(kOutputIndex); | ||||
| auto out_quant_args = out_tensor->quant_params(); | auto out_quant_args = out_tensor->quant_params(); | ||||
| out_quant_arg_.scale_ = out_quant_args.front().scale; | |||||
| out_quant_arg_.zp_ = out_quant_args.front().zeroPoint; | |||||
| out_quant_arg_->scale_ = out_quant_args.front().scale; | |||||
| out_quant_arg_->zp_ = out_quant_args.front().zeroPoint; | |||||
| if (!InferShapeDone()) { | if (!InferShapeDone()) { | ||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| @@ -51,10 +72,10 @@ int DepthToSpaceInt8CPUKernel::Run() { | |||||
| const int8_t *input_data = reinterpret_cast<const int8_t *>(input->data_c()); | const int8_t *input_data = reinterpret_cast<const int8_t *>(input->data_c()); | ||||
| int8_t *output_data = reinterpret_cast<int8_t *>(output->data_c()); | int8_t *output_data = reinterpret_cast<int8_t *>(output->data_c()); | ||||
| auto in_shape = input->shape(); | auto in_shape = input->shape(); | ||||
| if (in_quant_arg_.scale_ == out_quant_arg_.scale_ && in_quant_arg_.zp_ == out_quant_arg_.zp_) { | |||||
| if (in_quant_arg_->scale_ == out_quant_arg_->scale_ && in_quant_arg_->zp_ == out_quant_arg_->zp_) { | |||||
| DepthToSpaceForNHWC(input_data, output_data, in_shape.data(), param_); | DepthToSpaceForNHWC(input_data, output_data, in_shape.data(), param_); | ||||
| } else { | } else { | ||||
| DepthToSpaceForNHWCInt8(input_data, output_data, in_shape.data(), param_, &in_quant_arg_, &out_quant_arg_); | |||||
| DepthToSpaceForNHWCInt8(input_data, output_data, in_shape.data(), param_, in_quant_arg_, out_quant_arg_); | |||||
| } | } | ||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| @@ -29,15 +29,15 @@ class DepthToSpaceInt8CPUKernel : public DepthToSpaceBaseCPUKernel { | |||||
| DepthToSpaceInt8CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs, | DepthToSpaceInt8CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs, | ||||
| const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx) | const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx) | ||||
| : DepthToSpaceBaseCPUKernel(parameter, inputs, outputs, ctx) {} | : DepthToSpaceBaseCPUKernel(parameter, inputs, outputs, ctx) {} | ||||
| ~DepthToSpaceInt8CPUKernel() = default; | |||||
| ~DepthToSpaceInt8CPUKernel() override; | |||||
| int Init() override; | int Init() override; | ||||
| int ReSize() override; | int ReSize() override; | ||||
| int Run() override; | int Run() override; | ||||
| private: | private: | ||||
| QuantArg in_quant_arg_; | |||||
| QuantArg out_quant_arg_; | |||||
| QuantArg *in_quant_arg_ = nullptr; | |||||
| QuantArg *out_quant_arg_ = nullptr; | |||||
| }; | }; | ||||
| } // namespace mindspore::kernel | } // namespace mindspore::kernel | ||||
| @@ -28,7 +28,12 @@ using mindspore::lite::RET_OK; | |||||
| using mindspore::schema::PrimitiveType_DivFusion; | using mindspore::schema::PrimitiveType_DivFusion; | ||||
| namespace mindspore::kernel { | namespace mindspore::kernel { | ||||
| DivInt8CPUKernel::~DivInt8CPUKernel() { | |||||
| if (quant_args_ != nullptr) { | |||||
| free(quant_args_); | |||||
| quant_args_ = nullptr; | |||||
| } | |||||
| } | |||||
| int DivInt8CPUKernel::Init() { | int DivInt8CPUKernel::Init() { | ||||
| lite::Tensor *input0 = in_tensors_.at(0); | lite::Tensor *input0 = in_tensors_.at(0); | ||||
| lite::Tensor *input1 = in_tensors_.at(1); | lite::Tensor *input1 = in_tensors_.at(1); | ||||
| @@ -39,19 +44,25 @@ int DivInt8CPUKernel::Init() { | |||||
| broadcast_ = input0->ElementsNum() != input1->ElementsNum(); | broadcast_ = input0->ElementsNum() != input1->ElementsNum(); | ||||
| param_.in0_args_.scale_ = input0->quant_params().front().scale; | |||||
| param_.in0_args_.zp_ = -input0->quant_params().front().zeroPoint; | |||||
| param_.in1_args_.scale_ = input1->quant_params().front().scale; | |||||
| param_.in1_args_.zp_ = -input1->quant_params().front().zeroPoint; | |||||
| param_.out_args_.scale_ = output->quant_params().front().scale; | |||||
| param_.out_args_.zp_ = output->quant_params().front().zeroPoint; | |||||
| quant_args_ = reinterpret_cast<DivQuantArg *>(malloc(sizeof(DivQuantArg))); | |||||
| if (quant_args_ == nullptr) { | |||||
| MS_LOG(ERROR) << "Malloc DivQuantArg for Div int8 op failed!"; | |||||
| return RET_ERROR; | |||||
| } | |||||
| quant_args_->in0_args_.scale_ = input0->quant_params().front().scale; | |||||
| quant_args_->in0_args_.zp_ = -input0->quant_params().front().zeroPoint; | |||||
| quant_args_->in1_args_.scale_ = input1->quant_params().front().scale; | |||||
| quant_args_->in1_args_.zp_ = -input1->quant_params().front().zeroPoint; | |||||
| quant_args_->out_args_.scale_ = output->quant_params().front().scale; | |||||
| quant_args_->out_args_.zp_ = output->quant_params().front().zeroPoint; | |||||
| const double real_multiplier = param_.in0_args_.scale_ / (param_.in1_args_.scale_ * param_.out_args_.scale_); | |||||
| const double real_multiplier = | |||||
| quant_args_->in0_args_.scale_ / (quant_args_->in1_args_.scale_ * quant_args_->out_args_.scale_); | |||||
| QuantizeMultiplier(real_multiplier, ¶m_.output_multiplier_, ¶m_.output_shift_); | |||||
| QuantizeMultiplier(real_multiplier, &quant_args_->output_multiplier_, &quant_args_->output_shift_); | |||||
| param_.output_activation_min_ = std::numeric_limits<int8_t>::min(); | |||||
| param_.output_activation_max_ = std::numeric_limits<int8_t>::max(); | |||||
| quant_args_->output_activation_min_ = std::numeric_limits<int8_t>::min(); | |||||
| quant_args_->output_activation_max_ = std::numeric_limits<int8_t>::max(); | |||||
| if (!InferShapeDone()) { | if (!InferShapeDone()) { | ||||
| return RET_OK; | return RET_OK; | ||||
| @@ -74,10 +85,10 @@ int DivInt8CPUKernel::DoExecute(int task_id) { | |||||
| auto ret = RET_OK; | auto ret = RET_OK; | ||||
| if (broadcast_) { | if (broadcast_) { | ||||
| ret = DivInt8(tile0_data_ + task_id * count, tile1_data_ + task_id * count, output_data_ + task_id * count, count, | ret = DivInt8(tile0_data_ + task_id * count, tile1_data_ + task_id * count, output_data_ + task_id * count, count, | ||||
| ¶m_); | |||||
| quant_args_); | |||||
| } else { | } else { | ||||
| ret = DivInt8(input0_data_ + task_id * count, input1_data_ + task_id * count, output_data_ + task_id * count, count, | ret = DivInt8(input0_data_ + task_id * count, input1_data_ + task_id * count, output_data_ + task_id * count, count, | ||||
| ¶m_); | |||||
| quant_args_); | |||||
| } | } | ||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| @@ -27,7 +27,7 @@ class DivInt8CPUKernel : public LiteKernel { | |||||
| explicit DivInt8CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs, | explicit DivInt8CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs, | ||||
| const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx) | const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx) | ||||
| : LiteKernel(parameter, inputs, outputs, ctx) {} | : LiteKernel(parameter, inputs, outputs, ctx) {} | ||||
| ~DivInt8CPUKernel() override {} | |||||
| ~DivInt8CPUKernel() override; | |||||
| int Init() override; | int Init() override; | ||||
| int ReSize() override; | int ReSize() override; | ||||
| @@ -35,7 +35,7 @@ class DivInt8CPUKernel : public LiteKernel { | |||||
| int DoExecute(int task_id); | int DoExecute(int task_id); | ||||
| private: | private: | ||||
| DivQuantArg param_; | |||||
| DivQuantArg *quant_args_ = nullptr; | |||||
| int8_t *tile0_data_ = nullptr; | int8_t *tile0_data_ = nullptr; | ||||
| int8_t *tile1_data_ = nullptr; | int8_t *tile1_data_ = nullptr; | ||||
| bool broadcast_ = false; | bool broadcast_ = false; | ||||
| @@ -32,6 +32,7 @@ class GroupConvolutionInt8CPUKernel : public GroupConvolutionBaseCPUKernel { | |||||
| : GroupConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, std::move(group_convs), group_num) { | : GroupConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, std::move(group_convs), group_num) { | ||||
| } // opParameter(in channel, out channel) in this kernel has been split to groups, if | } // opParameter(in channel, out channel) in this kernel has been split to groups, if | ||||
| // you want to get real params, multiply in channel / out channel with group num | // you want to get real params, multiply in channel / out channel with group num | ||||
| ~GroupConvolutionInt8CPUKernel() = default; | |||||
| int SeparateInput(int group_id) override; | int SeparateInput(int group_id) override; | ||||
| int PostConcat(int group_id) override; | int PostConcat(int group_id) override; | ||||
| }; | }; | ||||
| @@ -24,16 +24,28 @@ using mindspore::lite::RET_OK; | |||||
| using mindspore::schema::PrimitiveType_L2NormalizeFusion; | using mindspore::schema::PrimitiveType_L2NormalizeFusion; | ||||
| namespace mindspore::kernel { | namespace mindspore::kernel { | ||||
| L2NormInt8CPUKernel::~L2NormInt8CPUKernel() { | |||||
| if (quant_param_ != nullptr) { | |||||
| free(quant_param_); | |||||
| quant_param_ = nullptr; | |||||
| } | |||||
| } | |||||
| int L2NormInt8CPUKernel::Init() { | int L2NormInt8CPUKernel::Init() { | ||||
| lite::Tensor *input = in_tensors_.at(0); | lite::Tensor *input = in_tensors_.at(0); | ||||
| lite::Tensor *output = out_tensors_.at(0); | lite::Tensor *output = out_tensors_.at(0); | ||||
| MS_ASSERT(input); | MS_ASSERT(input); | ||||
| MS_ASSERT(output); | MS_ASSERT(output); | ||||
| quant_param_.in_.scale_ = input->quant_params().front().scale; | |||||
| quant_param_.in_.zp_ = input->quant_params().front().zeroPoint; | |||||
| quant_param_.out_.scale_ = output->quant_params().front().scale; | |||||
| quant_param_.out_.zp_ = output->quant_params().front().zeroPoint; | |||||
| quant_param_ = reinterpret_cast<L2NormQuantArg *>(malloc(sizeof(L2NormQuantArg))); | |||||
| if (quant_param_ == nullptr) { | |||||
| MS_LOG(ERROR) << "Malloc L2NormQuantArg for L2Norm int8 op failed!"; | |||||
| return RET_ERROR; | |||||
| } | |||||
| quant_param_->in_.scale_ = input->quant_params().front().scale; | |||||
| quant_param_->in_.zp_ = input->quant_params().front().zeroPoint; | |||||
| quant_param_->out_.scale_ = output->quant_params().front().scale; | |||||
| quant_param_->out_.zp_ = output->quant_params().front().zeroPoint; | |||||
| return ReSize(); | return ReSize(); | ||||
| } | } | ||||
| @@ -68,7 +80,7 @@ int L2NormInt8CPUKernel::DoExecute(int task_id) { | |||||
| int8_t *output_data = static_cast<int8_t *>(out_tensors().front()->MutableData()); | int8_t *output_data = static_cast<int8_t *>(out_tensors().front()->MutableData()); | ||||
| MS_ASSERT(output_data); | MS_ASSERT(output_data); | ||||
| MS_ASSERT(l2_norm_param_); | MS_ASSERT(l2_norm_param_); | ||||
| return L2NormalizationInt8(input_data, output_data, l2_norm_param_, &quant_param_, begin, end); | |||||
| return L2NormalizationInt8(input_data, output_data, l2_norm_param_, quant_param_, begin, end); | |||||
| } | } | ||||
| REG_KERNEL(kCPU, kNumberTypeInt8, PrimitiveType_L2NormalizeFusion, LiteKernelCreator<L2NormInt8CPUKernel>) | REG_KERNEL(kCPU, kNumberTypeInt8, PrimitiveType_L2NormalizeFusion, LiteKernelCreator<L2NormInt8CPUKernel>) | ||||
| @@ -26,14 +26,14 @@ class L2NormInt8CPUKernel : public L2NormCPUKernel { | |||||
| explicit L2NormInt8CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs, | explicit L2NormInt8CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs, | ||||
| const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx) | const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx) | ||||
| : L2NormCPUKernel(parameter, inputs, outputs, ctx) {} | : L2NormCPUKernel(parameter, inputs, outputs, ctx) {} | ||||
| ~L2NormInt8CPUKernel() {} | |||||
| ~L2NormInt8CPUKernel() override; | |||||
| int Init() override; | int Init() override; | ||||
| int Run() override; | int Run() override; | ||||
| int DoExecute(int tId); | int DoExecute(int tId); | ||||
| private: | private: | ||||
| L2NormQuantArg quant_param_; | |||||
| L2NormQuantArg *quant_param_ = nullptr; | |||||
| }; | }; | ||||
| } // namespace mindspore::kernel | } // namespace mindspore::kernel | ||||
| @@ -24,6 +24,10 @@ using mindspore::schema::PrimitiveType_LayerNormFusion; | |||||
| namespace mindspore::kernel { | namespace mindspore::kernel { | ||||
| LayerNormInt8CPUKernel::~LayerNormInt8CPUKernel() { | LayerNormInt8CPUKernel::~LayerNormInt8CPUKernel() { | ||||
| if (quant_param_ != nullptr) { | |||||
| free(quant_param_); | |||||
| quant_param_ = nullptr; | |||||
| } | |||||
| if (gamma_ptr_ != nullptr) { | if (gamma_ptr_ != nullptr) { | ||||
| free(gamma_ptr_); | free(gamma_ptr_); | ||||
| gamma_ptr_ = nullptr; | gamma_ptr_ = nullptr; | ||||
| @@ -38,10 +42,15 @@ int LayerNormInt8CPUKernel::SetQuantArgs() { | |||||
| lite::Tensor *input = in_tensors_.at(0); | lite::Tensor *input = in_tensors_.at(0); | ||||
| lite::Tensor *output = out_tensors_.at(0); | lite::Tensor *output = out_tensors_.at(0); | ||||
| quant_param_.in_zp_ = input->quant_params().front().zeroPoint; | |||||
| quant_param_.in_scale_ = input->quant_params().front().scale; | |||||
| quant_param_.out_zp_ = output->quant_params().front().zeroPoint; | |||||
| quant_param_.out_scale_ = output->quant_params().front().scale; | |||||
| quant_param_ = reinterpret_cast<LayerNormQuantArg *>(malloc(sizeof(LayerNormQuantArg))); | |||||
| if (quant_param_ == nullptr) { | |||||
| MS_LOG(ERROR) << "Malloc LayerNormQuantArg for LayerNorm int8 op failed!"; | |||||
| return RET_ERROR; | |||||
| } | |||||
| quant_param_->in_zp_ = input->quant_params().front().zeroPoint; | |||||
| quant_param_->in_scale_ = input->quant_params().front().scale; | |||||
| quant_param_->out_zp_ = output->quant_params().front().zeroPoint; | |||||
| quant_param_->out_scale_ = output->quant_params().front().scale; | |||||
| lite::Tensor *gamma_tensor = in_tensors_.at(1); | lite::Tensor *gamma_tensor = in_tensors_.at(1); | ||||
| lite::Tensor *beta_tensor = in_tensors_.at(2); | lite::Tensor *beta_tensor = in_tensors_.at(2); | ||||
| @@ -67,7 +76,7 @@ int LayerNormInt8CPUKernel::SetQuantArgs() { | |||||
| } | } | ||||
| int32_t *src_beta = reinterpret_cast<int32_t *>(beta_tensor->data_c()); | int32_t *src_beta = reinterpret_cast<int32_t *>(beta_tensor->data_c()); | ||||
| for (int i = 0; i < beta_tensor->ElementsNum(); i++) { | for (int i = 0; i < beta_tensor->ElementsNum(); i++) { | ||||
| beta_ptr_[i] = src_beta[i] * quant_param_.in_scale_ * gamma_scale; | |||||
| beta_ptr_[i] = src_beta[i] * quant_param_->in_scale_ * gamma_scale; | |||||
| } | } | ||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| @@ -109,7 +118,7 @@ int LayerNormInt8CPUKernel::ReSize() { | |||||
| } | } | ||||
| int LayerNormInt8CPUKernel::DoExecute(int task_id) { | int LayerNormInt8CPUKernel::DoExecute(int task_id) { | ||||
| LayerNormInt8(src_ptr_, gamma_ptr_, beta_ptr_, dst_ptr_, param_, &quant_param_, task_id); | |||||
| LayerNormInt8(src_ptr_, gamma_ptr_, beta_ptr_, dst_ptr_, param_, quant_param_, task_id); | |||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| @@ -44,7 +44,7 @@ class LayerNormInt8CPUKernel : public LiteKernel { | |||||
| private: | private: | ||||
| LayerNormParameter *param_ = nullptr; | LayerNormParameter *param_ = nullptr; | ||||
| LayerNormQuantArg quant_param_; | |||||
| LayerNormQuantArg *quant_param_ = nullptr; | |||||
| int8_t *src_ptr_ = nullptr; | int8_t *src_ptr_ = nullptr; | ||||
| int8_t *dst_ptr_ = nullptr; | int8_t *dst_ptr_ = nullptr; | ||||
| float *gamma_ptr_ = nullptr; | float *gamma_ptr_ = nullptr; | ||||
| @@ -41,14 +41,15 @@ int MatmulBaseInt8CPUKernel::RunImpl(int task_id) { | |||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| int32_t *cur_left = filter_per_channel_ ? quant_.left_shift_ + cur_stride : quant_.left_shift_; | |||||
| int32_t *cur_right = filter_per_channel_ ? quant_.right_shift_ + cur_stride : quant_.right_shift_; | |||||
| int32_t *cur_mul = filter_per_channel_ ? quant_.quant_multiplier_ + cur_stride : quant_.quant_multiplier_; | |||||
| int32_t *cur_zp = filter_per_channel_ ? quant_.filter_zp_ + cur_stride : quant_.filter_zp_; | |||||
| int32_t *cur_left = filter_per_channel_ ? quant_param_->left_shift_ + cur_stride : quant_param_->left_shift_; | |||||
| int32_t *cur_right = filter_per_channel_ ? quant_param_->right_shift_ + cur_stride : quant_param_->right_shift_; | |||||
| int32_t *cur_mul = | |||||
| filter_per_channel_ ? quant_param_->quant_multiplier_ + cur_stride : quant_param_->quant_multiplier_; | |||||
| int32_t *cur_zp = filter_per_channel_ ? quant_param_->filter_zp_ + cur_stride : quant_param_->filter_zp_; | |||||
| MatmulInt8Opt(pack_a_ptr_, batch_b_ptr_ + cur_stride * param_->deep_16_, batch_c_ptr_ + cur_stride, param_->row_, | MatmulInt8Opt(pack_a_ptr_, batch_b_ptr_ + cur_stride * param_->deep_16_, batch_c_ptr_ + cur_stride, param_->row_, | ||||
| cur_oc, param_->deep_16_, input_sums_, weight_bias_sums_ + cur_stride, quant_.out_act_min_, | |||||
| quant_.out_act_max_, quant_.output_.zp_, cur_mul, cur_left, cur_right, param_->col_, | |||||
| cur_oc, param_->deep_16_, input_sums_, weight_bias_sums_ + cur_stride, quant_param_->out_act_min_, | |||||
| quant_param_->out_act_max_, quant_param_->output_.zp_, cur_mul, cur_left, cur_right, param_->col_, | |||||
| filter_per_channel_, cur_zp); | filter_per_channel_, cur_zp); | ||||
| return RET_OK; | return RET_OK; | ||||
| @@ -63,29 +64,32 @@ MatmulBaseInt8CPUKernel::~MatmulBaseInt8CPUKernel() { | |||||
| free(bias_ptr_); | free(bias_ptr_); | ||||
| bias_ptr_ = nullptr; | bias_ptr_ = nullptr; | ||||
| } | } | ||||
| return; | |||||
| if (quant_param_ != nullptr) { | |||||
| free(quant_param_); | |||||
| quant_param_ = nullptr; | |||||
| } | |||||
| } | } | ||||
| void MatmulBaseInt8CPUKernel::FreeQuantParam() { | void MatmulBaseInt8CPUKernel::FreeQuantParam() { | ||||
| if (quant_.filter_scale_ != nullptr) { | |||||
| free(quant_.filter_scale_); | |||||
| quant_.filter_scale_ = nullptr; | |||||
| if (quant_param_->filter_scale_ != nullptr) { | |||||
| free(quant_param_->filter_scale_); | |||||
| quant_param_->filter_scale_ = nullptr; | |||||
| } | } | ||||
| if (quant_.filter_zp_ != nullptr) { | |||||
| free(quant_.filter_zp_); | |||||
| quant_.filter_zp_ = nullptr; | |||||
| if (quant_param_->filter_zp_ != nullptr) { | |||||
| free(quant_param_->filter_zp_); | |||||
| quant_param_->filter_zp_ = nullptr; | |||||
| } | } | ||||
| if (quant_.left_shift_ != nullptr) { | |||||
| free(quant_.left_shift_); | |||||
| quant_.left_shift_ = nullptr; | |||||
| if (quant_param_->left_shift_ != nullptr) { | |||||
| free(quant_param_->left_shift_); | |||||
| quant_param_->left_shift_ = nullptr; | |||||
| } | } | ||||
| if (quant_.right_shift_ != nullptr) { | |||||
| free(quant_.right_shift_); | |||||
| quant_.right_shift_ = nullptr; | |||||
| if (quant_param_->right_shift_ != nullptr) { | |||||
| free(quant_param_->right_shift_); | |||||
| quant_param_->right_shift_ = nullptr; | |||||
| } | } | ||||
| if (quant_.quant_multiplier_ != nullptr) { | |||||
| free(quant_.quant_multiplier_); | |||||
| quant_.quant_multiplier_ = nullptr; | |||||
| if (quant_param_->quant_multiplier_ != nullptr) { | |||||
| free(quant_param_->quant_multiplier_); | |||||
| quant_param_->quant_multiplier_ = nullptr; | |||||
| } | } | ||||
| return; | return; | ||||
| } | } | ||||
| @@ -99,24 +103,29 @@ int MatmulBaseInt8CPUKernel::MallocQuantParam() { | |||||
| int init_size = filter_per_channel_ ? col : 1; | int init_size = filter_per_channel_ ? col : 1; | ||||
| quant_.filter_scale_ = reinterpret_cast<float *>(malloc(init_size * sizeof(float))); | |||||
| if (quant_.filter_scale_ == nullptr) { | |||||
| quant_param_ = reinterpret_cast<MatmulQuantParameter *>(malloc(sizeof(MatmulQuantParameter))); | |||||
| if (quant_param_ == nullptr) { | |||||
| MS_LOG(ERROR) << "Malloc MatmulQuantParameter for Matmul int8 op failed!"; | |||||
| return RET_ERROR; | |||||
| } | |||||
| quant_param_->filter_scale_ = reinterpret_cast<float *>(malloc(init_size * sizeof(float))); | |||||
| if (quant_param_->filter_scale_ == nullptr) { | |||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| quant_.filter_zp_ = reinterpret_cast<int32_t *>(malloc(init_size * sizeof(int32_t))); | |||||
| if (quant_.filter_zp_ == nullptr) { | |||||
| quant_param_->filter_zp_ = reinterpret_cast<int32_t *>(malloc(init_size * sizeof(int32_t))); | |||||
| if (quant_param_->filter_zp_ == nullptr) { | |||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| quant_.left_shift_ = reinterpret_cast<int32_t *>(malloc(init_size * sizeof(int32_t))); | |||||
| if (quant_.left_shift_ == nullptr) { | |||||
| quant_param_->left_shift_ = reinterpret_cast<int32_t *>(malloc(init_size * sizeof(int32_t))); | |||||
| if (quant_param_->left_shift_ == nullptr) { | |||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| quant_.right_shift_ = reinterpret_cast<int32_t *>(malloc(init_size * sizeof(int32_t))); | |||||
| if (quant_.right_shift_ == nullptr) { | |||||
| quant_param_->right_shift_ = reinterpret_cast<int32_t *>(malloc(init_size * sizeof(int32_t))); | |||||
| if (quant_param_->right_shift_ == nullptr) { | |||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| quant_.quant_multiplier_ = reinterpret_cast<int32_t *>(malloc(init_size * sizeof(int32_t))); | |||||
| if (quant_.quant_multiplier_ == nullptr) { | |||||
| quant_param_->quant_multiplier_ = reinterpret_cast<int32_t *>(malloc(init_size * sizeof(int32_t))); | |||||
| if (quant_param_->quant_multiplier_ == nullptr) { | |||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| return RET_OK; | return RET_OK; | ||||
| @@ -124,32 +133,32 @@ int MatmulBaseInt8CPUKernel::MallocQuantParam() { | |||||
| void MatmulBaseInt8CPUKernel::InitQuantParam() { | void MatmulBaseInt8CPUKernel::InitQuantParam() { | ||||
| auto in_quant_params = in_tensors_.at(0)->quant_params(); | auto in_quant_params = in_tensors_.at(0)->quant_params(); | ||||
| quant_.input_.zp_ = in_quant_params.front().zeroPoint; | |||||
| quant_.input_.scale_ = in_quant_params.front().scale; | |||||
| quant_param_->input_.zp_ = in_quant_params.front().zeroPoint; | |||||
| quant_param_->input_.scale_ = in_quant_params.front().scale; | |||||
| auto out_quant_params = out_tensors_.at(0)->quant_params(); | auto out_quant_params = out_tensors_.at(0)->quant_params(); | ||||
| quant_.output_.zp_ = out_quant_params.front().zeroPoint; | |||||
| quant_.output_.scale_ = out_quant_params.front().scale; | |||||
| quant_param_->output_.zp_ = out_quant_params.front().zeroPoint; | |||||
| quant_param_->output_.scale_ = out_quant_params.front().scale; | |||||
| auto weight_tensor = in_tensors_.at(1); | auto weight_tensor = in_tensors_.at(1); | ||||
| int weight_quant_num = filter_per_channel_ ? weight_tensor->shape().front() : 1; | int weight_quant_num = filter_per_channel_ ? weight_tensor->shape().front() : 1; | ||||
| auto weight_quant_params = weight_tensor->quant_params(); | auto weight_quant_params = weight_tensor->quant_params(); | ||||
| for (int i = 0; i < weight_quant_num; i++) { | for (int i = 0; i < weight_quant_num; i++) { | ||||
| quant_.filter_zp_[i] = weight_quant_params[i].zeroPoint; | |||||
| quant_.filter_scale_[i] = weight_quant_params[i].scale; | |||||
| quant_param_->filter_zp_[i] = weight_quant_params[i].zeroPoint; | |||||
| quant_param_->filter_scale_[i] = weight_quant_params[i].scale; | |||||
| } | } | ||||
| for (int i = 0; i < weight_quant_num; ++i) { | for (int i = 0; i < weight_quant_num; ++i) { | ||||
| const double in_scale = static_cast<double>(quant_.input_.scale_ * quant_.filter_scale_[i]); | |||||
| double real_multiplier = in_scale / static_cast<double>(quant_.output_.scale_); | |||||
| QuantizeRoundParameterWithDoublePrecision(real_multiplier, &quant_.quant_multiplier_[i], &quant_.left_shift_[i], | |||||
| &quant_.right_shift_[i]); | |||||
| const double in_scale = static_cast<double>(quant_param_->input_.scale_ * quant_param_->filter_scale_[i]); | |||||
| double real_multiplier = in_scale / static_cast<double>(quant_param_->output_.scale_); | |||||
| QuantizeRoundParameterWithDoublePrecision(real_multiplier, &quant_param_->quant_multiplier_[i], | |||||
| &quant_param_->left_shift_[i], &quant_param_->right_shift_[i]); | |||||
| } | } | ||||
| CalculateActivationRangeQuantized(param_->act_type_ == ActType_Relu, param_->act_type_ == ActType_Relu6, | CalculateActivationRangeQuantized(param_->act_type_ == ActType_Relu, param_->act_type_ == ActType_Relu6, | ||||
| quant_.output_.zp_, quant_.output_.scale_, &quant_.out_act_min_, | |||||
| &quant_.out_act_max_); | |||||
| quant_param_->output_.zp_, quant_param_->output_.scale_, | |||||
| &quant_param_->out_act_min_, &quant_param_->out_act_max_); | |||||
| } | } | ||||
| void MatmulBaseInt8CPUKernel::InitParameter() { | void MatmulBaseInt8CPUKernel::InitParameter() { | ||||
| @@ -207,16 +216,16 @@ void MatmulBaseInt8CPUKernel::TransferB() { | |||||
| #else | #else | ||||
| RowMajor2Row16x4MajorInt8(current_weight, current_b_pack, param_->col_, param_->deep_); | RowMajor2Row16x4MajorInt8(current_weight, current_b_pack, param_->col_, param_->deep_); | ||||
| #endif | #endif | ||||
| CalcWeightBiasSums(current_weight, param_->deep_, param_->col_, quant_.input_.zp_, quant_.filter_zp_, bias_ptr_, | |||||
| current_sums, ColMajor, filter_per_channel_); | |||||
| CalcWeightBiasSums(current_weight, param_->deep_, param_->col_, quant_param_->input_.zp_, | |||||
| quant_param_->filter_zp_, bias_ptr_, current_sums, ColMajor, filter_per_channel_); | |||||
| } else { | } else { | ||||
| #ifdef ENABLE_ARM32 | #ifdef ENABLE_ARM32 | ||||
| RowMajor2Col16x2MajorInt8(current_weight, current_b_pack, param_->deep_, param_->col_); | RowMajor2Col16x2MajorInt8(current_weight, current_b_pack, param_->deep_, param_->col_); | ||||
| #else | #else | ||||
| RowMajor2Col16x4MajorInt8(current_weight, param_->deep_, param_->col_, current_b_pack); | RowMajor2Col16x4MajorInt8(current_weight, param_->deep_, param_->col_, current_b_pack); | ||||
| #endif | #endif | ||||
| CalcWeightBiasSums(current_weight, param_->deep_, param_->col_, quant_.input_.zp_, quant_.filter_zp_, bias_ptr_, | |||||
| current_sums, RowMajor, false); | |||||
| CalcWeightBiasSums(current_weight, param_->deep_, param_->col_, quant_param_->input_.zp_, | |||||
| quant_param_->filter_zp_, bias_ptr_, current_sums, RowMajor, false); | |||||
| } | } | ||||
| } | } | ||||
| return; | return; | ||||
| @@ -312,7 +321,7 @@ int MatmulBaseInt8CPUKernel::Run() { | |||||
| int8_t *a_ptr = reinterpret_cast<int8_t *>(in_tensors_.at(0)->data_c()); | int8_t *a_ptr = reinterpret_cast<int8_t *>(in_tensors_.at(0)->data_c()); | ||||
| int8_t *c_ptr = reinterpret_cast<int8_t *>(out_tensors_.at(0)->data_c()); | int8_t *c_ptr = reinterpret_cast<int8_t *>(out_tensors_.at(0)->data_c()); | ||||
| int32_t tmp_weight_zp = filter_per_channel_ ? 1 : quant_.filter_zp_[0]; | |||||
| int32_t tmp_weight_zp = filter_per_channel_ ? 1 : quant_param_->filter_zp_[0]; | |||||
| for (int i = 0; i < param_->batch; i++) { | for (int i = 0; i < param_->batch; i++) { | ||||
| auto current_src_a = a_ptr + i * param_->row_ * param_->deep_; | auto current_src_a = a_ptr + i * param_->row_ * param_->deep_; | ||||
| if (param_->a_transpose_) { | if (param_->a_transpose_) { | ||||
| @@ -63,7 +63,7 @@ class MatmulBaseInt8CPUKernel : public LiteKernel { | |||||
| protected: | protected: | ||||
| MatMulParameter *param_ = nullptr; | MatMulParameter *param_ = nullptr; | ||||
| MatmulQuantParameter quant_; | |||||
| MatmulQuantParameter *quant_param_ = nullptr; | |||||
| int thread_count_ = 1; | int thread_count_ = 1; | ||||
| int thread_stride_ = 0; | int thread_stride_ = 0; | ||||
| int8_t *pack_a_ptr_ = nullptr; | int8_t *pack_a_ptr_ = nullptr; | ||||
| @@ -25,6 +25,13 @@ using mindspore::lite::RET_OK; | |||||
| using mindspore::schema::PrimitiveType_MulFusion; | using mindspore::schema::PrimitiveType_MulFusion; | ||||
| namespace mindspore::kernel { | namespace mindspore::kernel { | ||||
| MulInt8CPUKernel::~MulInt8CPUKernel() { | |||||
| if (quant_args_ != nullptr) { | |||||
| free(quant_args_); | |||||
| quant_args_ = nullptr; | |||||
| } | |||||
| } | |||||
| int MulInt8CPUKernel::Init() { | int MulInt8CPUKernel::Init() { | ||||
| lite::Tensor *input0 = in_tensors_.at(0); | lite::Tensor *input0 = in_tensors_.at(0); | ||||
| lite::Tensor *input1 = in_tensors_.at(1); | lite::Tensor *input1 = in_tensors_.at(1); | ||||
| @@ -33,24 +40,28 @@ int MulInt8CPUKernel::Init() { | |||||
| MS_ASSERT(input1); | MS_ASSERT(input1); | ||||
| MS_ASSERT(output); | MS_ASSERT(output); | ||||
| para_.mul_quant_arg_.in_quant_args_[0].scale_ = input0->quant_params().front().scale; | |||||
| para_.mul_quant_arg_.in_quant_args_[0].zp_ = input0->quant_params().front().zeroPoint * -1; | |||||
| para_.mul_quant_arg_.in_quant_args_[1].scale_ = input1->quant_params().front().scale; | |||||
| para_.mul_quant_arg_.in_quant_args_[1].zp_ = input1->quant_params().front().zeroPoint * -1; | |||||
| para_.mul_quant_arg_.out_quant_arg_.scale_ = output->quant_params().front().scale; | |||||
| para_.mul_quant_arg_.out_quant_arg_.zp_ = output->quant_params().front().zeroPoint; | |||||
| para_.mul_quant_arg_.output_activation_max_ = std::numeric_limits<int8_t>::max(); | |||||
| para_.mul_quant_arg_.output_activation_min_ = std::numeric_limits<int8_t>::min(); | |||||
| const double real_multiplier = | |||||
| (para_.mul_quant_arg_.in_quant_args_[0].scale_ * para_.mul_quant_arg_.in_quant_args_[1].scale_) / | |||||
| para_.mul_quant_arg_.out_quant_arg_.scale_; | |||||
| quant_args_ = reinterpret_cast<MulQuantArg *>(malloc(sizeof(MulQuantArg))); | |||||
| if (quant_args_ == nullptr) { | |||||
| MS_LOG(ERROR) << "Malloc MulQuantArg for Mul int8 op failed!"; | |||||
| return RET_ERROR; | |||||
| } | |||||
| quant_args_->in_quant_args_[0].scale_ = input0->quant_params().front().scale; | |||||
| quant_args_->in_quant_args_[0].zp_ = input0->quant_params().front().zeroPoint * -1; | |||||
| quant_args_->in_quant_args_[1].scale_ = input1->quant_params().front().scale; | |||||
| quant_args_->in_quant_args_[1].zp_ = input1->quant_params().front().zeroPoint * -1; | |||||
| quant_args_->out_quant_arg_.scale_ = output->quant_params().front().scale; | |||||
| quant_args_->out_quant_arg_.zp_ = output->quant_params().front().zeroPoint; | |||||
| quant_args_->output_activation_max_ = std::numeric_limits<int8_t>::max(); | |||||
| quant_args_->output_activation_min_ = std::numeric_limits<int8_t>::min(); | |||||
| const double real_multiplier = (quant_args_->in_quant_args_[0].scale_ * quant_args_->in_quant_args_[1].scale_) / | |||||
| quant_args_->out_quant_arg_.scale_; | |||||
| int right_shift = 0; | int right_shift = 0; | ||||
| QuantizeMultiplierSmallerThanOne(real_multiplier, ¶_.mul_quant_arg_.output_multiplier_, &right_shift); | |||||
| QuantizeMultiplierSmallerThanOne(real_multiplier, &quant_args_->output_multiplier_, &right_shift); | |||||
| para_.mul_quant_arg_.shift_left_ = right_shift < 0 ? -right_shift : 0; | |||||
| para_.mul_quant_arg_.shift_right_ = right_shift > 0 ? right_shift : 0; | |||||
| quant_args_->shift_left_ = right_shift < 0 ? -right_shift : 0; | |||||
| quant_args_->shift_right_ = right_shift > 0 ? right_shift : 0; | |||||
| if (!InferShapeDone()) { | if (!InferShapeDone()) { | ||||
| return RET_OK; | return RET_OK; | ||||
| @@ -202,8 +213,7 @@ int MulInt8CPUKernel::FastDoExecute(int task_id) { | |||||
| cur_input0_data = input1_data_; | cur_input0_data = input1_data_; | ||||
| cur_input1_data = input0_data_ + task_id * count_unit_ * depth; | cur_input1_data = input0_data_ + task_id * count_unit_ * depth; | ||||
| } | } | ||||
| FastMul(cur_input0_data, cur_input1_data, cur_output_data, depth, real_dst_count, input1_hw_broadcast_, | |||||
| para_.mul_quant_arg_); | |||||
| FastMul(cur_input0_data, cur_input1_data, cur_output_data, depth, real_dst_count, input1_hw_broadcast_, quant_args_); | |||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| @@ -216,7 +226,7 @@ int MulInt8CPUKernel::DoExecute(int task_id) { | |||||
| int8_t *cur_input1_data = input1_data_ + task_id * count_unit_; | int8_t *cur_input1_data = input1_data_ + task_id * count_unit_; | ||||
| int8_t *cur_output_data = output_data_ + task_id * count_unit_; | int8_t *cur_output_data = output_data_ + task_id * count_unit_; | ||||
| Mul(cur_input0_data, cur_input1_data, cur_output_data, real_dst_count, para_.mul_quant_arg_); | |||||
| Mul(cur_input0_data, cur_input1_data, cur_output_data, real_dst_count, quant_args_); | |||||
| return lite::RET_OK; | return lite::RET_OK; | ||||
| } | } | ||||
| @@ -33,7 +33,7 @@ class MulInt8CPUKernel : public LiteKernel { | |||||
| : LiteKernel(parameter, inputs, outputs, ctx), ctx_(ctx), thread_count_(ctx_->thread_num_) { | : LiteKernel(parameter, inputs, outputs, ctx), ctx_(ctx), thread_count_(ctx_->thread_num_) { | ||||
| tile_para = reinterpret_cast<ArithmeticParameter *>(parameter); | tile_para = reinterpret_cast<ArithmeticParameter *>(parameter); | ||||
| } | } | ||||
| ~MulInt8CPUKernel() override{}; | |||||
| ~MulInt8CPUKernel() override; | |||||
| int Init() override; | int Init() override; | ||||
| int ReSize() override; | int ReSize() override; | ||||
| @@ -46,7 +46,7 @@ class MulInt8CPUKernel : public LiteKernel { | |||||
| private: | private: | ||||
| const lite::InnerContext *ctx_ = nullptr; | const lite::InnerContext *ctx_ = nullptr; | ||||
| ArithmeticParameter *tile_para = nullptr; | ArithmeticParameter *tile_para = nullptr; | ||||
| MulParameter para_; | |||||
| MulQuantArg *quant_args_ = nullptr; | |||||
| bool fast_hw_broadcast_ = false; | bool fast_hw_broadcast_ = false; | ||||
| bool input1_hw_broadcast_ = false; | bool input1_hw_broadcast_ = false; | ||||
| int thread_count_ = 1; | int thread_count_ = 1; | ||||
| @@ -30,35 +30,46 @@ using mindspore::lite::RET_NULL_PTR; | |||||
| using mindspore::schema::PrimitiveType_Softmax; | using mindspore::schema::PrimitiveType_Softmax; | ||||
| namespace mindspore::kernel { | namespace mindspore::kernel { | ||||
| SoftmaxInt8CPUKernel::~SoftmaxInt8CPUKernel() { | |||||
| if (quant_param_ != nullptr) { | |||||
| free(quant_param_); | |||||
| quant_param_ = nullptr; | |||||
| } | |||||
| } | |||||
| int SoftmaxInt8CPUKernel::Init() { | int SoftmaxInt8CPUKernel::Init() { | ||||
| auto ret = SoftmaxBaseCPUKernel::Init(); | auto ret = SoftmaxBaseCPUKernel::Init(); | ||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| return ret; | return ret; | ||||
| } | } | ||||
| quant_param_ = reinterpret_cast<SoftmaxQuantArg *>(malloc(sizeof(SoftmaxQuantArg))); | |||||
| if (quant_param_ == nullptr) { | |||||
| MS_LOG(ERROR) << "Malloc SoftmaxQuantArg for Softmax int8 op failed!"; | |||||
| return RET_ERROR; | |||||
| } | |||||
| auto *input_tensor = in_tensors_.at(kInputIndex); | auto *input_tensor = in_tensors_.at(kInputIndex); | ||||
| MS_ASSERT(input_tensor); | MS_ASSERT(input_tensor); | ||||
| auto in_quant_args = input_tensor->quant_params(); | auto in_quant_args = input_tensor->quant_params(); | ||||
| quant_params_.in_quant_args_.scale_ = in_quant_args.front().scale; | |||||
| quant_params_.in_quant_args_.zp_ = -in_quant_args.front().zeroPoint; | |||||
| quant_param_->in_quant_args_.scale_ = in_quant_args.front().scale; | |||||
| quant_param_->in_quant_args_.zp_ = -in_quant_args.front().zeroPoint; | |||||
| auto *out_tensor = out_tensors_.at(kOutputIndex); | auto *out_tensor = out_tensors_.at(kOutputIndex); | ||||
| MS_ASSERT(out_tensor); | MS_ASSERT(out_tensor); | ||||
| auto out_quant_args = out_tensor->quant_params(); | auto out_quant_args = out_tensor->quant_params(); | ||||
| quant_params_.out_quant_arg_.scale_ = out_quant_args.front().scale; | |||||
| quant_params_.out_quant_arg_.zp_ = -out_quant_args.front().zeroPoint; | |||||
| quant_params_.output_activation_min_ = std::numeric_limits<int8_t>::min(); | |||||
| quant_params_.output_activation_max_ = std::numeric_limits<int8_t>::max(); | |||||
| quant_param_->out_quant_arg_.scale_ = out_quant_args.front().scale; | |||||
| quant_param_->out_quant_arg_.zp_ = -out_quant_args.front().zeroPoint; | |||||
| quant_param_->output_activation_min_ = std::numeric_limits<int8_t>::min(); | |||||
| quant_param_->output_activation_max_ = std::numeric_limits<int8_t>::max(); | |||||
| const double input_real_multiplier = | const double input_real_multiplier = | ||||
| MSMIN(quant_params_.in_quant_args_.scale_ * (1 << (unsigned int)(31 - 5)), (1ll << 31) - 1.0); | |||||
| MSMIN(quant_param_->in_quant_args_.scale_ * (1 << (unsigned int)(31 - 5)), (1ll << 31) - 1.0); | |||||
| int right_shift = 0; | int right_shift = 0; | ||||
| QuantizeMultiplierSmallerThanOne(input_real_multiplier, &quant_params_.output_multiplier_, &right_shift); | |||||
| quant_params_.shift_left_ = right_shift < 0 ? -right_shift : 0; | |||||
| quant_params_.shift_right_ = right_shift > 0 ? right_shift : 0; | |||||
| QuantizeMultiplierSmallerThanOne(input_real_multiplier, &quant_param_->output_multiplier_, &right_shift); | |||||
| quant_param_->shift_left_ = right_shift < 0 ? -right_shift : 0; | |||||
| quant_param_->shift_right_ = right_shift > 0 ? right_shift : 0; | |||||
| if (!InferShapeDone()) { | if (!InferShapeDone()) { | ||||
| return RET_OK; | return RET_OK; | ||||
| @@ -91,7 +102,7 @@ int SoftmaxInt8CPUKernel::DoSoftmax(int task_id) { | |||||
| int stride_size = stride * task_id * inner_size; | int stride_size = stride * task_id * inner_size; | ||||
| auto error_code = SoftmaxInt8(input_ptr + stride_size, output_ptr + stride_size, count, exp_data_ + stride_size, | auto error_code = SoftmaxInt8(input_ptr + stride_size, output_ptr + stride_size, count, exp_data_ + stride_size, | ||||
| sum_data_, quant_params_, softmax_param_); | |||||
| sum_data_, quant_param_, softmax_param_); | |||||
| if (error_code != RET_OK) { | if (error_code != RET_OK) { | ||||
| MS_LOG(ERROR) << "DoSoftmax error task_id[" << task_id << "] error_code[" << error_code << "]"; | MS_LOG(ERROR) << "DoSoftmax error task_id[" << task_id << "] error_code[" << error_code << "]"; | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| @@ -27,7 +27,7 @@ class SoftmaxInt8CPUKernel : public SoftmaxBaseCPUKernel { | |||||
| SoftmaxInt8CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs, | SoftmaxInt8CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs, | ||||
| const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx) | const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx) | ||||
| : SoftmaxBaseCPUKernel(parameter, inputs, outputs, ctx) {} | : SoftmaxBaseCPUKernel(parameter, inputs, outputs, ctx) {} | ||||
| ~SoftmaxInt8CPUKernel() = default; | |||||
| ~SoftmaxInt8CPUKernel() override; | |||||
| int Init() override; | int Init() override; | ||||
| int ReSize() override; | int ReSize() override; | ||||
| @@ -37,7 +37,7 @@ class SoftmaxInt8CPUKernel : public SoftmaxBaseCPUKernel { | |||||
| private: | private: | ||||
| int *sum_data_ = nullptr; | int *sum_data_ = nullptr; | ||||
| int *exp_data_ = nullptr; | int *exp_data_ = nullptr; | ||||
| SoftmaxQuantArg quant_params_; | |||||
| SoftmaxQuantArg *quant_param_ = nullptr; | |||||
| }; | }; | ||||
| } // namespace mindspore::kernel | } // namespace mindspore::kernel | ||||
| @@ -25,6 +25,13 @@ using mindspore::lite::RET_OK; | |||||
| using mindspore::schema::PrimitiveType_SubFusion; | using mindspore::schema::PrimitiveType_SubFusion; | ||||
| namespace mindspore::kernel { | namespace mindspore::kernel { | ||||
| SubInt8CPUKernel::~SubInt8CPUKernel() { | |||||
| if (quant_param_ != nullptr) { | |||||
| free(quant_param_); | |||||
| quant_param_ = nullptr; | |||||
| } | |||||
| } | |||||
| int SubInt8CPUKernel::Init() { | int SubInt8CPUKernel::Init() { | ||||
| lite::Tensor *input0 = in_tensors_.at(0); | lite::Tensor *input0 = in_tensors_.at(0); | ||||
| lite::Tensor *input1 = in_tensors_.at(1); | lite::Tensor *input1 = in_tensors_.at(1); | ||||
| @@ -35,37 +42,45 @@ int SubInt8CPUKernel::Init() { | |||||
| broadcast_ = input0->ElementsNum() != input1->ElementsNum(); | broadcast_ = input0->ElementsNum() != input1->ElementsNum(); | ||||
| param_.in0_args_.scale_ = input0->quant_params().front().scale; | |||||
| param_.in0_args_.zp_ = -input0->quant_params().front().zeroPoint; | |||||
| param_.in1_args_.scale_ = input1->quant_params().front().scale; | |||||
| param_.in1_args_.zp_ = -input1->quant_params().front().zeroPoint; | |||||
| param_.out_args_.scale_ = output->quant_params().front().scale; | |||||
| param_.out_args_.zp_ = output->quant_params().front().zeroPoint; | |||||
| quant_param_ = reinterpret_cast<SubQuantArg *>(malloc(sizeof(SubQuantArg))); | |||||
| if (quant_param_ == nullptr) { | |||||
| MS_LOG(ERROR) << "Malloc SubQuantArg for Sub int8 op failed!"; | |||||
| return RET_ERROR; | |||||
| } | |||||
| quant_param_->in0_args_.scale_ = input0->quant_params().front().scale; | |||||
| quant_param_->in0_args_.zp_ = -input0->quant_params().front().zeroPoint; | |||||
| quant_param_->in1_args_.scale_ = input1->quant_params().front().scale; | |||||
| quant_param_->in1_args_.zp_ = -input1->quant_params().front().zeroPoint; | |||||
| quant_param_->out_args_.scale_ = output->quant_params().front().scale; | |||||
| quant_param_->out_args_.zp_ = output->quant_params().front().zeroPoint; | |||||
| const int left_shift = 20; | const int left_shift = 20; | ||||
| const double twice_max_input_scale = 2 * std::max(param_.in0_args_.scale_, param_.in1_args_.scale_); | |||||
| const double real_input0_multiplier = param_.in0_args_.scale_ / twice_max_input_scale; | |||||
| const double real_input1_multiplier = param_.in1_args_.scale_ / twice_max_input_scale; | |||||
| const double real_output_multiplier = twice_max_input_scale / ((1 << left_shift) * param_.out_args_.scale_); | |||||
| const double twice_max_input_scale = 2 * std::max(quant_param_->in0_args_.scale_, quant_param_->in1_args_.scale_); | |||||
| const double real_input0_multiplier = quant_param_->in0_args_.scale_ / twice_max_input_scale; | |||||
| const double real_input1_multiplier = quant_param_->in1_args_.scale_ / twice_max_input_scale; | |||||
| const double real_output_multiplier = twice_max_input_scale / ((1 << left_shift) * quant_param_->out_args_.scale_); | |||||
| QuantizeMultiplierSmallerThanOne(real_input0_multiplier, ¶m_.input0_multiplier_, ¶m_.input0_shift_); | |||||
| QuantizeMultiplierSmallerThanOne(real_input1_multiplier, ¶m_.input1_multiplier_, ¶m_.input1_shift_); | |||||
| QuantizeMultiplierSmallerThanOne(real_output_multiplier, ¶m_.output_multiplier_, ¶m_.output_shift_); | |||||
| QuantizeMultiplierSmallerThanOne(real_input0_multiplier, &quant_param_->input0_multiplier_, | |||||
| &quant_param_->input0_shift_); | |||||
| QuantizeMultiplierSmallerThanOne(real_input1_multiplier, &quant_param_->input1_multiplier_, | |||||
| &quant_param_->input1_shift_); | |||||
| QuantizeMultiplierSmallerThanOne(real_output_multiplier, &quant_param_->output_multiplier_, | |||||
| &quant_param_->output_shift_); | |||||
| param_.output_activation_min_ = std::numeric_limits<int8_t>::min(); | |||||
| param_.output_activation_max_ = std::numeric_limits<int8_t>::max(); | |||||
| quant_param_->output_activation_min_ = std::numeric_limits<int8_t>::min(); | |||||
| quant_param_->output_activation_max_ = std::numeric_limits<int8_t>::max(); | |||||
| int left_shift0 = -param_.input0_shift_ > 0 ? -param_.input0_shift_ : 0; | |||||
| param_.right_shift0_ = -param_.input0_shift_ > 0 ? 0 : param_.input0_shift_; | |||||
| int left_shift0 = -quant_param_->input0_shift_ > 0 ? -quant_param_->input0_shift_ : 0; | |||||
| quant_param_->right_shift0_ = -quant_param_->input0_shift_ > 0 ? 0 : quant_param_->input0_shift_; | |||||
| int left_shift1 = -param_.input1_shift_ > 0 ? -param_.input1_shift_ : 0; | |||||
| param_.right_shift1_ = -param_.input1_shift_ > 0 ? 0 : param_.input1_shift_; | |||||
| int left_shift1 = -quant_param_->input1_shift_ > 0 ? -quant_param_->input1_shift_ : 0; | |||||
| quant_param_->right_shift1_ = -quant_param_->input1_shift_ > 0 ? 0 : quant_param_->input1_shift_; | |||||
| param_.left_shift_out_ = -param_.output_shift_ > 0 ? -param_.output_shift_ : 0; | |||||
| param_.right_shift_out_ = -param_.output_shift_ > 0 ? 0 : param_.output_shift_; | |||||
| quant_param_->left_shift_out_ = -quant_param_->output_shift_ > 0 ? -quant_param_->output_shift_ : 0; | |||||
| quant_param_->right_shift_out_ = -quant_param_->output_shift_ > 0 ? 0 : quant_param_->output_shift_; | |||||
| param_.left_shift_result0_ = (1 << left_shift) * ((1 << left_shift0)); | |||||
| param_.left_shift_result1_ = (1 << left_shift) * ((1 << left_shift1)); | |||||
| quant_param_->left_shift_result0_ = (1 << left_shift) * ((1 << left_shift0)); | |||||
| quant_param_->left_shift_result1_ = (1 << left_shift) * ((1 << left_shift1)); | |||||
| MS_ASSERT(left_shift + left_shift0 == left_shift); | MS_ASSERT(left_shift + left_shift0 == left_shift); | ||||
| MS_ASSERT(left_shift + left_shift1 == left_shift); | MS_ASSERT(left_shift + left_shift1 == left_shift); | ||||
| @@ -94,10 +109,10 @@ int SubInt8CPUKernel::DoExecute(int task_id) { | |||||
| auto ret = RET_OK; | auto ret = RET_OK; | ||||
| if (broadcast_) { | if (broadcast_) { | ||||
| ret = SubInt8(tile0_data_ + task_id * stride, tile1_data_ + task_id * stride, output_data_ + task_id * stride, | ret = SubInt8(tile0_data_ + task_id * stride, tile1_data_ + task_id * stride, output_data_ + task_id * stride, | ||||
| count, ¶m_); | |||||
| count, quant_param_); | |||||
| } else { | } else { | ||||
| ret = SubInt8(input0_data_ + task_id * stride, input1_data_ + task_id * stride, output_data_ + task_id * stride, | ret = SubInt8(input0_data_ + task_id * stride, input1_data_ + task_id * stride, output_data_ + task_id * stride, | ||||
| count, ¶m_); | |||||
| count, quant_param_); | |||||
| } | } | ||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| @@ -31,7 +31,7 @@ class SubInt8CPUKernel : public LiteKernel { | |||||
| explicit SubInt8CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs, | explicit SubInt8CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs, | ||||
| const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx) | const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx) | ||||
| : LiteKernel(parameter, inputs, outputs, ctx) {} | : LiteKernel(parameter, inputs, outputs, ctx) {} | ||||
| ~SubInt8CPUKernel() = default; | |||||
| ~SubInt8CPUKernel() override; | |||||
| int Init() override; | int Init() override; | ||||
| int ReSize() override; | int ReSize() override; | ||||
| @@ -39,7 +39,7 @@ class SubInt8CPUKernel : public LiteKernel { | |||||
| int DoExecute(int task_id); | int DoExecute(int task_id); | ||||
| private: | private: | ||||
| SubQuantArg param_; | |||||
| SubQuantArg *quant_param_ = nullptr; | |||||
| int8_t *tile0_data_ = nullptr; | int8_t *tile0_data_ = nullptr; | ||||
| int8_t *tile1_data_ = nullptr; | int8_t *tile1_data_ = nullptr; | ||||
| bool broadcast_ = false; | bool broadcast_ = false; | ||||
| @@ -62,7 +62,7 @@ class TransposeInt8CPUKernel : public LiteKernel { | |||||
| int num_unit_ = 0; | int num_unit_ = 0; | ||||
| int in_shape_[8] = {0}; | int in_shape_[8] = {0}; | ||||
| int out_shape_[8] = {0}; | int out_shape_[8] = {0}; | ||||
| int nhnc_param_[3]; | |||||
| int nhnc_param_[3] = {0}; | |||||
| }; | }; | ||||
| } // namespace mindspore::kernel | } // namespace mindspore::kernel | ||||
| @@ -29,6 +29,9 @@ const schema::Primitive *SearchSubGraph::CreatePartialPrimitive(int64_t subgraph | |||||
| fbb.Finish(prim_offset); | fbb.Finish(prim_offset); | ||||
| auto tmp_buf = fbb.GetBufferPointer(); | auto tmp_buf = fbb.GetBufferPointer(); | ||||
| auto prim_buf = reinterpret_cast<char *>(malloc(fbb.GetSize())); | auto prim_buf = reinterpret_cast<char *>(malloc(fbb.GetSize())); | ||||
| if (prim_buf == nullptr) { | |||||
| return nullptr; | |||||
| } | |||||
| memcpy(prim_buf, tmp_buf, fbb.GetSize()); | memcpy(prim_buf, tmp_buf, fbb.GetSize()); | ||||
| auto primitive = flatbuffers::GetRoot<schema::Primitive>(prim_buf); | auto primitive = flatbuffers::GetRoot<schema::Primitive>(prim_buf); | ||||
| @@ -59,6 +62,7 @@ void SearchSubGraph::ConvertSubGraphToModel() { | |||||
| Model::Node *new_partial_node = new (std::nothrow) Model::Node(); | Model::Node *new_partial_node = new (std::nothrow) Model::Node(); | ||||
| if (new_partial_node == nullptr) { | if (new_partial_node == nullptr) { | ||||
| MS_LOG(ERROR) << "New partial node failed!"; | MS_LOG(ERROR) << "New partial node failed!"; | ||||
| free(new_sub_graph); | |||||
| return; | return; | ||||
| } | } | ||||
| new_partial_node->name_ = "Partial-subgraph-split-" + std::to_string(new_sub_index); | new_partial_node->name_ = "Partial-subgraph-split-" + std::to_string(new_sub_index); | ||||
| @@ -230,15 +234,17 @@ void SearchSubGraph::InitMainGraphDevice() { return; } | |||||
| void SearchSubGraph::SubgraphFusion() { | void SearchSubGraph::SubgraphFusion() { | ||||
| while (sub_graphs_.size() > 2) { | while (sub_graphs_.size() > 2) { | ||||
| size_t sub1_index = 0; | size_t sub1_index = 0; | ||||
| int sub2_index = -1; | |||||
| size_t sub2_index = 0; | |||||
| bool is_found = false; | |||||
| for (; sub1_index < sub_graphs_.size(); sub1_index++) { | for (; sub1_index < sub_graphs_.size(); sub1_index++) { | ||||
| for (size_t tmp2 = sub1_index + 1; tmp2 < sub_graphs_.size(); tmp2++) { | for (size_t tmp2 = sub1_index + 1; tmp2 < sub_graphs_.size(); tmp2++) { | ||||
| if (sub_graphs_[sub1_index].device_ == sub_graphs_[tmp2].device_) { | if (sub_graphs_[sub1_index].device_ == sub_graphs_[tmp2].device_) { | ||||
| sub2_index = tmp2; | sub2_index = tmp2; | ||||
| is_found = true; | |||||
| break; | break; | ||||
| } | } | ||||
| } | } | ||||
| if (sub2_index != -1) { | |||||
| if (!is_found) { | |||||
| break; | break; | ||||
| } | } | ||||
| } | } | ||||