From 875ae72ae3d34917aeeccec701784ff45f94ddae Mon Sep 17 00:00:00 2001 From: yangruoqi713 Date: Mon, 26 Apr 2021 19:28:33 +0800 Subject: [PATCH] [MSLITE][Develop] clean codex in lite op runtime module --- .../kernel_compiler/cpu/nnacl/int8/mul_int8.c | 91 ++++++++------- .../kernel_compiler/cpu/nnacl/int8/mul_int8.h | 4 +- .../cpu/nnacl/int8/softmax_int8.c | 10 +- .../cpu/nnacl/int8/softmax_int8.h | 2 +- .../kernel_compiler/cpu/nnacl/mul_parameter.h | 1 - mindspore/lite/src/runtime/allocator.h | 2 +- .../kernel/arm/base/constant_of_shape.h | 2 +- .../arm/base/group_convolution_creator.cc | 19 +++- .../src/runtime/kernel/arm/base/tile_base.h | 2 +- .../kernel/arm/fp16/group_convolution_fp16.h | 2 +- .../kernel/arm/fp32/batch_to_space_fp32.h | 4 +- .../kernel/arm/fp32/broadcast_to_fp32.cc | 24 ++-- .../kernel/arm/fp32/broadcast_to_fp32.h | 4 +- .../arm/fp32/convolution_delegate_fp32.cc | 13 ++- .../kernel/arm/fp32/convolution_fp32.cc | 6 +- .../arm/fp32/convolution_winograd_fp32.cc | 4 +- .../arm/fp32/convolution_winograd_fp32.h | 6 +- .../kernel/arm/fp32/group_convolution_fp32.h | 1 + .../runtime/kernel/arm/fp32/transpose_fp32.h | 2 +- .../kernel/arm/fp32/uniform_real_fp32.cc | 1 + .../src/runtime/kernel/arm/int8/add_int8.cc | 60 ++++++---- .../src/runtime/kernel/arm/int8/add_int8.h | 4 +- .../runtime/kernel/arm/int8/argminmax_int8.cc | 39 +++++-- .../runtime/kernel/arm/int8/argminmax_int8.h | 6 +- .../kernel/arm/int8/batch_to_space_int8.cc | 36 ++++-- .../kernel/arm/int8/batch_to_space_int8.h | 6 +- .../kernel/arm/int8/deconvolution_int8.h | 2 +- .../kernel/arm/int8/depth_to_space_int8.cc | 33 +++++- .../kernel/arm/int8/depth_to_space_int8.h | 6 +- .../src/runtime/kernel/arm/int8/div_int8.cc | 37 +++--- .../src/runtime/kernel/arm/int8/div_int8.h | 4 +- .../kernel/arm/int8/group_convolution_int8.h | 1 + .../runtime/kernel/arm/int8/l2_norm_int8.cc | 22 +++- .../runtime/kernel/arm/int8/l2_norm_int8.h | 4 +- .../kernel/arm/int8/layer_norm_int8.cc | 21 +++- .../runtime/kernel/arm/int8/layer_norm_int8.h | 2 +- .../kernel/arm/int8/matmul_base_int8.cc | 107 ++++++++++-------- .../kernel/arm/int8/matmul_base_int8.h | 2 +- .../src/runtime/kernel/arm/int8/mul_int8.cc | 46 +++++--- .../src/runtime/kernel/arm/int8/mul_int8.h | 4 +- .../runtime/kernel/arm/int8/softmax_int8.cc | 33 ++++-- .../runtime/kernel/arm/int8/softmax_int8.h | 4 +- .../src/runtime/kernel/arm/int8/sub_int8.cc | 65 +++++++---- .../src/runtime/kernel/arm/int8/sub_int8.h | 4 +- .../runtime/kernel/arm/int8/transpose_int8.h | 2 +- mindspore/lite/src/sub_graph_split.cc | 10 +- 46 files changed, 474 insertions(+), 286 deletions(-) diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/mul_int8.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/mul_int8.c index 7dce85d201..fbda674d0c 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/mul_int8.c +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/mul_int8.c @@ -28,19 +28,19 @@ int16x4_t ClacSumHalfWordMul(int16x4_t scaled_input0, int16x4_t scaled_input1, i } void MulInt8NEON(int8_t *input0_data, int8_t *input1_data, int8_t *output_data, int64_t real_dst_count, - MulQuantArg para, int *index) { - int32x4_t output_multiplier_vec = vdupq_n_s32(para.output_multiplier_); - int32x4_t left_shift_out_vec = vdupq_n_s32(1 << para.shift_left_); - int32x4_t right_shift_out_vec = vdupq_n_s32(-para.shift_right_); - int16x8_t out_zp_vec = vdupq_n_s16(para.out_quant_arg_.zp_); - int8x16_t out_min_vec = vdupq_n_s8(para.output_activation_min_); - int8x16_t out_max_vec = vdupq_n_s8(para.output_activation_max_); - int8x8_t out_min_vec_s8 = vdup_n_s8(para.output_activation_min_); - int8x8_t out_max_vec_s8 = vdup_n_s8(para.output_activation_max_); + MulQuantArg *quant_arg, int *index) { + int32x4_t output_multiplier_vec = vdupq_n_s32(quant_arg->output_multiplier_); + int32x4_t left_shift_out_vec = vdupq_n_s32(1 << quant_arg->shift_left_); + int32x4_t right_shift_out_vec = vdupq_n_s32(-quant_arg->shift_right_); + int16x8_t out_zp_vec = vdupq_n_s16(quant_arg->out_quant_arg_.zp_); + int8x16_t out_min_vec = vdupq_n_s8(quant_arg->output_activation_min_); + int8x16_t out_max_vec = vdupq_n_s8(quant_arg->output_activation_max_); + int8x8_t out_min_vec_s8 = vdup_n_s8(quant_arg->output_activation_min_); + int8x8_t out_max_vec_s8 = vdup_n_s8(quant_arg->output_activation_max_); for (; (*index) <= real_dst_count - 16; (*index) += 16) { - int16x8_t zp1_vec = vdupq_n_s16(para.in_quant_args_[0].zp_); - int16x8_t zp2_vec = vdupq_n_s16(para.in_quant_args_[1].zp_); + int16x8_t zp1_vec = vdupq_n_s16(quant_arg->in_quant_args_[0].zp_); + int16x8_t zp2_vec = vdupq_n_s16(quant_arg->in_quant_args_[1].zp_); int8x16_t input0_vec = vld1q_s8(input0_data + *index); int8x16_t input1_vec = vld1q_s8(input1_data + *index); int16x8_t input0_low = vmovl_s8(vget_low_s8(input0_vec)); @@ -81,8 +81,8 @@ void MulInt8NEON(int8_t *input0_data, int8_t *input1_data, int8_t *output_data, output_data += 16; } for (; (*index) <= real_dst_count - 8; (*index) += 8) { - int16x8_t input0_val = LoadAndAddOffset(input0_data, *index, para.in_quant_args_[0].zp_); - int16x8_t input1_val = LoadAndAddOffset(input1_data, *index, para.in_quant_args_[1].zp_); + int16x8_t input0_val = LoadAndAddOffset(input0_data, *index, quant_arg->in_quant_args_[0].zp_); + int16x8_t input1_val = LoadAndAddOffset(input1_data, *index, quant_arg->in_quant_args_[1].zp_); int16x4_t input0_low = vget_low_s16(input0_val); int16x4_t input0_high = vget_high_s16(input0_val); @@ -105,23 +105,23 @@ void MulInt8NEON(int8_t *input0_data, int8_t *input1_data, int8_t *output_data, #endif void FastMul(int8_t *input0_data, int8_t *input1_data, int8_t *output_data, int depth, int64_t real_dst_count, - bool input1_broad, MulQuantArg para) { + bool input1_broad, MulQuantArg *quant_arg) { // input0 need broadcast - int32_t zp1 = para.in_quant_args_[0].zp_; - int32_t zp2 = para.in_quant_args_[1].zp_; + int32_t zp1 = quant_arg->in_quant_args_[0].zp_; + int32_t zp2 = quant_arg->in_quant_args_[1].zp_; if (input1_broad) { - zp1 = para.in_quant_args_[1].zp_; - zp2 = para.in_quant_args_[0].zp_; + zp1 = quant_arg->in_quant_args_[1].zp_; + zp2 = quant_arg->in_quant_args_[0].zp_; } #ifdef ENABLE_ARM - int32x4_t output_multiplier_vec = vdupq_n_s32(para.output_multiplier_); - int32x4_t left_shift_out_vec = vdupq_n_s32(1 << para.shift_left_); - int32x4_t right_shift_out_vec = vdupq_n_s32(-para.shift_right_); - int16x8_t out_zp_vec = vdupq_n_s16(para.out_quant_arg_.zp_); - int8x16_t out_min_vec = vdupq_n_s8(para.output_activation_min_); - int8x16_t out_max_vec = vdupq_n_s8(para.output_activation_max_); - int8x8_t out_min_vec_s8 = vdup_n_s8(para.output_activation_min_); - int8x8_t out_max_vec_s8 = vdup_n_s8(para.output_activation_max_); + int32x4_t output_multiplier_vec = vdupq_n_s32(quant_arg->output_multiplier_); + int32x4_t left_shift_out_vec = vdupq_n_s32(1 << quant_arg->shift_left_); + int32x4_t right_shift_out_vec = vdupq_n_s32(-quant_arg->shift_right_); + int16x8_t out_zp_vec = vdupq_n_s16(quant_arg->out_quant_arg_.zp_); + int8x16_t out_min_vec = vdupq_n_s8(quant_arg->output_activation_min_); + int8x16_t out_max_vec = vdupq_n_s8(quant_arg->output_activation_max_); + int8x8_t out_min_vec_s8 = vdup_n_s8(quant_arg->output_activation_min_); + int8x8_t out_max_vec_s8 = vdup_n_s8(quant_arg->output_activation_max_); int16x8_t zp1_vec = vdupq_n_s16(zp1); int16x8_t zp2_vec = vdupq_n_s16(zp2); #endif @@ -199,13 +199,14 @@ void FastMul(int8_t *input0_data, int8_t *input1_data, int8_t *output_data, int for (; j < depth; ++j) { const int32_t input0_val = zp1 + input0_data[j]; const int32_t input1_val = zp2 + input1_data[0]; - int32_t mul_result = RoundingDivideByPOT( - SaturatingRoundingDoublingHighMul(input0_val * input1_val * (1 << para.shift_left_), para.output_multiplier_), - para.shift_right_); - - mul_result += para.out_quant_arg_.zp_; - mul_result = mul_result < para.output_activation_max_ ? mul_result : para.output_activation_max_; - mul_result = mul_result > para.output_activation_min_ ? mul_result : para.output_activation_min_; + int32_t mul_result = + RoundingDivideByPOT(SaturatingRoundingDoublingHighMul(input0_val * input1_val * (1 << quant_arg->shift_left_), + quant_arg->output_multiplier_), + quant_arg->shift_right_); + + mul_result += quant_arg->out_quant_arg_.zp_; + mul_result = mul_result < quant_arg->output_activation_max_ ? mul_result : quant_arg->output_activation_max_; + mul_result = mul_result > quant_arg->output_activation_min_ ? mul_result : quant_arg->output_activation_min_; output_data[0] = (int8_t)mul_result; input1_data++; output_data++; @@ -214,21 +215,23 @@ void FastMul(int8_t *input0_data, int8_t *input1_data, int8_t *output_data, int return; } -void Mul(int8_t *input0_data, int8_t *input1_data, int8_t *output_data, int64_t real_dst_count, MulQuantArg para) { +void Mul(int8_t *input0_data, int8_t *input1_data, int8_t *output_data, int64_t real_dst_count, + MulQuantArg *quant_arg) { int index = 0; #ifdef ENABLE_NEON - MulInt8NEON(input0_data, input1_data, output_data, real_dst_count, para, &index); + MulInt8NEON(input0_data, input1_data, output_data, real_dst_count, quant_arg, &index); #endif for (; index < real_dst_count; ++index) { - const int32_t input0_val = para.in_quant_args_[0].zp_ + input0_data[index]; - const int32_t input1_val = para.in_quant_args_[1].zp_ + input1_data[index]; - int32_t mul_result = RoundingDivideByPOT( - SaturatingRoundingDoublingHighMul(input0_val * input1_val * (1 << para.shift_left_), para.output_multiplier_), - para.shift_right_); - - mul_result += para.out_quant_arg_.zp_; - mul_result = mul_result < para.output_activation_max_ ? mul_result : para.output_activation_max_; - mul_result = mul_result > para.output_activation_min_ ? mul_result : para.output_activation_min_; + const int32_t input0_val = quant_arg->in_quant_args_[0].zp_ + input0_data[index]; + const int32_t input1_val = quant_arg->in_quant_args_[1].zp_ + input1_data[index]; + int32_t mul_result = + RoundingDivideByPOT(SaturatingRoundingDoublingHighMul(input0_val * input1_val * (1 << quant_arg->shift_left_), + quant_arg->output_multiplier_), + quant_arg->shift_right_); + + mul_result += quant_arg->out_quant_arg_.zp_; + mul_result = mul_result < quant_arg->output_activation_max_ ? mul_result : quant_arg->output_activation_max_; + mul_result = mul_result > quant_arg->output_activation_min_ ? mul_result : quant_arg->output_activation_min_; output_data[index] = (int8_t)mul_result; } return; diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/mul_int8.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/mul_int8.h index af074cba07..f19d8e40f8 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/mul_int8.h +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/mul_int8.h @@ -28,9 +28,9 @@ #ifdef __cplusplus extern "C" { #endif -void Mul(int8_t *input0_data, int8_t *input1_data, int8_t *output_data, int64_t real_dst_count, MulQuantArg para); +void Mul(int8_t *input0_data, int8_t *input1_data, int8_t *output_data, int64_t real_dst_count, MulQuantArg *quant_arg); void FastMul(int8_t *input0_data, int8_t *input1_data, int8_t *output_data, int depth, int64_t real_dst_count, - bool input1_broad, MulQuantArg para); + bool input1_broad, MulQuantArg *quant_arg); #ifdef __cplusplus } #endif diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/softmax_int8.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/softmax_int8.c index 58e151f453..6dbb57f172 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/softmax_int8.c +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/softmax_int8.c @@ -17,7 +17,7 @@ #include "nnacl/int8/softmax_int8.h" int SoftmaxInt8(const int8_t *input_ptr, int8_t *output_ptr, int count, int *exp_data, int *sum_data, - SoftmaxQuantArg quant_param, SoftmaxParameter *parameter) { + SoftmaxQuantArg *quant_param, SoftmaxParameter *parameter) { int32_t axis = parameter->axis_; int n_dim = parameter->n_dim_; int *input_shape = parameter->input_shape_; @@ -32,7 +32,7 @@ int SoftmaxInt8(const int8_t *input_ptr, int8_t *output_ptr, int count, int *exp int outter_offset = o * axis_shape_size * inner_size; for (int c = 0; c < inner_size; c++) { - int8_t max_row = quant_param.output_activation_min_; + int8_t max_row = quant_param->output_activation_min_; for (int i = 0; i < axis_shape_size; ++i) { int axis_offset = outter_offset + c + i * inner_size; max_row = MSMAX(max_row, input_ptr[axis_offset]); @@ -43,7 +43,7 @@ int SoftmaxInt8(const int8_t *input_ptr, int8_t *output_ptr, int count, int *exp int axis_offset = outter_offset + c + i * inner_size; const int32_t input_val = input_ptr[axis_offset] - max_row; const int32_t input_scaled = SaturatingRoundingDoublingHighMul( - input_val * (1 << (unsigned int)quant_param.shift_left_), quant_param.output_multiplier_); + input_val * (1 << (unsigned int)quant_param->shift_left_), quant_param->output_multiplier_); int exp_val = exp_on_negative_values(input_scaled, 5); exp_data[axis_offset] = exp_val; exp_sum = exp_sum + Rescale(exp_val, 0, 12); @@ -58,9 +58,9 @@ int SoftmaxInt8(const int8_t *input_ptr, int8_t *output_ptr, int count, int *exp int unsat_output = RoundingDivideByPOT( SaturatingRoundingDoublingHighMul(shifted_scale, exp_data[axis_offset + c]), num_bits_over_unit + 31 - 8); - int raw_output = unsat_output + quant_param.output_activation_min_; + int raw_output = unsat_output + quant_param->output_activation_min_; output_ptr[axis_offset + c] = - (int8_t)MSMAX(quant_param.output_activation_min_, MSMIN(raw_output, quant_param.output_activation_max_)); + (int8_t)MSMAX(quant_param->output_activation_min_, MSMIN(raw_output, quant_param->output_activation_max_)); } } } diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/softmax_int8.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/softmax_int8.h index dfe2213669..8bf1a36615 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/softmax_int8.h +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/softmax_int8.h @@ -27,7 +27,7 @@ extern "C" { #endif int SoftmaxInt8(const int8_t *input_ptr, int8_t *output_ptr, int count, int *exp_data, int *sum_data, - SoftmaxQuantArg quant_param, SoftmaxParameter *parameter); + SoftmaxQuantArg *quant_param, SoftmaxParameter *parameter); #ifdef __cplusplus } #endif diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/mul_parameter.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/mul_parameter.h index d36daf32a2..d275594a27 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/mul_parameter.h +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/mul_parameter.h @@ -34,7 +34,6 @@ typedef struct MulParameter { OpParameter op_parameter_; // other parameter int thread_count_; - MulQuantArg mul_quant_arg_; } MulParameter; #endif // MINDSPORE_NNACL_MUL_PARAMETER_H_ diff --git a/mindspore/lite/src/runtime/allocator.h b/mindspore/lite/src/runtime/allocator.h index 95f7c6c8d4..c4ac18e769 100644 --- a/mindspore/lite/src/runtime/allocator.h +++ b/mindspore/lite/src/runtime/allocator.h @@ -69,7 +69,7 @@ class DefaultAllocator : public Allocator { std::unordered_map allocatedList_; std::multimap freeList_; // 6 is empirical value - int shiftFactor_ = 6; + unsigned shiftFactor_ = 6; bool lockFlag_ = true; }; diff --git a/mindspore/lite/src/runtime/kernel/arm/base/constant_of_shape.h b/mindspore/lite/src/runtime/kernel/arm/base/constant_of_shape.h index 0fa4174f08..514301e3be 100644 --- a/mindspore/lite/src/runtime/kernel/arm/base/constant_of_shape.h +++ b/mindspore/lite/src/runtime/kernel/arm/base/constant_of_shape.h @@ -42,7 +42,7 @@ class ConstantOfShapeCPUKernel : public LiteKernel { private: ConstantOfShapeParameter *param_ = nullptr; void *output_ptr_ = nullptr; - int thread_stride_; + int thread_stride_ = 0; }; } // namespace mindspore::kernel diff --git a/mindspore/lite/src/runtime/kernel/arm/base/group_convolution_creator.cc b/mindspore/lite/src/runtime/kernel/arm/base/group_convolution_creator.cc index c96fe1686f..400bfb6c97 100644 --- a/mindspore/lite/src/runtime/kernel/arm/base/group_convolution_creator.cc +++ b/mindspore/lite/src/runtime/kernel/arm/base/group_convolution_creator.cc @@ -33,16 +33,22 @@ ConvParameter *CreateNewConvParameter(ConvParameter *parameter) { return conv_parameter; } -void FreeCurrentConv(ConvParameter *conv_param, const std::vector *new_inputs, - const std::vector *new_outputs) { +void FreeCurrentConv(ConvParameter *conv_param, std::vector *new_inputs, + std::vector *new_outputs) { if (conv_param != nullptr) { free(conv_param); } - for (auto &in_tensor : *new_inputs) { - delete in_tensor; + if (new_inputs != nullptr) { + for (auto &in_tensor : *new_inputs) { + delete in_tensor; + in_tensor = nullptr; + } } - for (auto &out_tensor : *new_outputs) { - delete out_tensor; + if (new_outputs != nullptr) { + for (auto &out_tensor : *new_outputs) { + delete out_tensor; + out_tensor = nullptr; + } } } @@ -112,6 +118,7 @@ void GroupConvCreator::FreeGroupConvs() { delete out_tensor; } delete sub_conv; + sub_conv = nullptr; } group_convs_.clear(); } diff --git a/mindspore/lite/src/runtime/kernel/arm/base/tile_base.h b/mindspore/lite/src/runtime/kernel/arm/base/tile_base.h index 8e6020a6fb..044ac94233 100644 --- a/mindspore/lite/src/runtime/kernel/arm/base/tile_base.h +++ b/mindspore/lite/src/runtime/kernel/arm/base/tile_base.h @@ -37,7 +37,7 @@ class TileCPUKernel : public LiteKernel { int RunSimpleTile(); void ComputeStrides(const int *shape, int *strides, int ndim); void FillOneDimTileParam(); - bool one_dim_tile_; + bool one_dim_tile_ = false; uint8_t *input_addr_ = nullptr; uint8_t *output_addr_ = nullptr; TileParameter *tile_parameter_ = nullptr; diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/group_convolution_fp16.h b/mindspore/lite/src/runtime/kernel/arm/fp16/group_convolution_fp16.h index 8cf449cd47..22dcc87455 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp16/group_convolution_fp16.h +++ b/mindspore/lite/src/runtime/kernel/arm/fp16/group_convolution_fp16.h @@ -33,7 +33,7 @@ class GroupConvolutionFP16CPUKernel : public GroupConvolutionBaseCPUKernel { : GroupConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, std::move(group_convs), group_num) { } // opParameter(in channel, out channel) in this kernel has been split to groups, if // you want to get real params, multiply in channel / out channel with group num - + ~GroupConvolutionFP16CPUKernel() = default; int SeparateInput(int group_id) override; int PostConcat(int group_id) override; }; diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/batch_to_space_fp32.h b/mindspore/lite/src/runtime/kernel/arm/fp32/batch_to_space_fp32.h index 25a4ca020f..8e4f70a06e 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/batch_to_space_fp32.h +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/batch_to_space_fp32.h @@ -36,8 +36,8 @@ class BatchToSpaceCPUKernel : public LiteKernel { int Processinput(); private: - int32_t block_shape_[BATCH_TO_SPACE_BLOCK_SHAPE_SIZE]; - int32_t crops_[COMM_SHAPE_SIZE]; + int32_t block_shape_[BATCH_TO_SPACE_BLOCK_SHAPE_SIZE] = {0}; + int32_t crops_[COMM_SHAPE_SIZE] = {0}; bool no_crop_ = false; }; } // namespace mindspore::kernel diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/broadcast_to_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/broadcast_to_fp32.cc index f1ce54d295..4c9f8251f6 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/broadcast_to_fp32.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/broadcast_to_fp32.cc @@ -25,34 +25,44 @@ using mindspore::lite::RET_OK; using mindspore::schema::PrimitiveType_BroadcastTo; namespace mindspore::kernel { +BroadcastToCPUKernel::~BroadcastToCPUKernel() { + if (shape_info_ != nullptr) { + free(shape_info_); + shape_info_ = nullptr; + } +} + int BroadcastToCPUKernel::ReSize() { auto input_shape = in_tensors_.at(0)->shape(); for (size_t i = 0; i < input_shape.size(); ++i) { - shape_info_.input_shape_[i] = input_shape[i]; + shape_info_->input_shape_[i] = input_shape[i]; } - shape_info_.input_shape_size_ = static_cast(input_shape.size()); + shape_info_->input_shape_size_ = static_cast(input_shape.size()); auto output_shape = out_tensors_.at(0)->shape(); for (size_t i = 0; i < output_shape.size(); ++i) { - shape_info_.output_shape_[i] = output_shape[i]; + shape_info_->output_shape_[i] = output_shape[i]; } - shape_info_.output_shape_size_ = static_cast(output_shape.size()); + shape_info_->output_shape_size_ = static_cast(output_shape.size()); return RET_OK; } int BroadcastToCPUKernel::Init() { + shape_info_ = reinterpret_cast(malloc(sizeof(BroadcastShapeInfo))); + if (shape_info_ == nullptr) { + MS_LOG(ERROR) << "Malloc BroadcastShapeInfo failed!"; + return RET_ERROR; + } if (!InferShapeDone()) { return RET_OK; } - return ReSize(); } int BroadcastToCPUKernel::Run() { const auto input_data = reinterpret_cast(in_tensors_.at(0)->MutableData()); auto output_data = reinterpret_cast(out_tensors_.at(0)->MutableData()); - - return BroadcastTo(float, input_data, &shape_info_, output_data); + return BroadcastTo(float, input_data, shape_info_, output_data); } REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_BroadcastTo, LiteKernelCreator) diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/broadcast_to_fp32.h b/mindspore/lite/src/runtime/kernel/arm/fp32/broadcast_to_fp32.h index c54dc4407d..ef22b19676 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/broadcast_to_fp32.h +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/broadcast_to_fp32.h @@ -27,14 +27,14 @@ class BroadcastToCPUKernel : public LiteKernel { BroadcastToCPUKernel(OpParameter *parameter, const std::vector &inputs, const std::vector &outputs, const lite::InnerContext *ctx) : LiteKernel(parameter, inputs, outputs, ctx) {} - ~BroadcastToCPUKernel() = default; + ~BroadcastToCPUKernel(); int Init() override; int ReSize() override; int Run() override; private: - BroadcastShapeInfo shape_info_; + BroadcastShapeInfo *shape_info_ = nullptr; }; } // namespace mindspore::kernel diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_delegate_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_delegate_fp32.cc index a02b951366..711ee9010b 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_delegate_fp32.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_delegate_fp32.cc @@ -168,9 +168,13 @@ kernel::LiteKernel *ConvolutionDelegateCPUKernel::CpuConvFp32KernelSelect() { kernel::LiteKernel *CpuConvDwFp32KernelCreator(const std::vector &inputs, const std::vector &outputs, OpParameter *opParameter, const InnerContext *ctx) { + if (opParameter == nullptr) { + MS_LOG(ERROR) << "Get null opParameter for CpuConvDwFp32KernelCreator."; + return nullptr; + } auto conv_param = reinterpret_cast(opParameter); kernel::LiteKernel *kernel = nullptr; - if (opParameter != nullptr && opParameter->infer_flag_) { + if (opParameter->infer_flag_) { #if defined(ENABLE_ARM) || (defined(ENABLE_SSE) && !defined(ENABLE_AVX)) if (CheckConvDw1DWinograd(conv_param, ctx->thread_num_)) { kernel = new (std::nothrow) kernel::ConvolutionDepthwise3x3CPUKernel(opParameter, inputs, outputs, ctx); @@ -209,9 +213,14 @@ kernel::LiteKernel *CpuGroupConvFp32KernelCreator(const std::vectoremplace_back(new (std::nothrow) ConvolutionDelegateCPUKernel( reinterpret_cast(new_conv_param), new_inputs, new_outputs, ctx)); } - return new (std::nothrow) + auto group_kernel = new (std::nothrow) GroupConvolutionFp32CPUKernel(op_parameter, inputs, outputs, ctx, *(group_conv_creator.get_group_conv()), reinterpret_cast(op_parameter)->group_); + if (group_kernel == nullptr) { + MS_LOG(ERROR) << "New GroupConvolutionFp32CPUKernel failed."; + return nullptr; + } + return group_kernel; } /* creator func */ diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_fp32.cc index 22c29e6007..75f7f63a48 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_fp32.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_fp32.cc @@ -38,11 +38,11 @@ namespace mindspore::kernel { int ConvolutionCPUKernel::InitWeightBias() { auto filter_tensor = in_tensors_.at(kWeightIndex); - int in_channel = filter_tensor->Channel(); - int out_channel = filter_tensor->Batch(); + size_t in_channel = filter_tensor->Channel(); + size_t out_channel = filter_tensor->Batch(); conv_param_->input_channel_ = in_channel; conv_param_->output_channel_ = out_channel; - int kernel_plane = filter_tensor->Height() * filter_tensor->Width(); + size_t kernel_plane = filter_tensor->Height() * filter_tensor->Width(); int oc_block_num = UP_ROUND(out_channel, OC_BLOCK); int pack_weight_size = oc_block_num * in_channel * kernel_plane; diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_winograd_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_winograd_fp32.cc index cdbdbf5d4b..89a3ab945b 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_winograd_fp32.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_winograd_fp32.cc @@ -25,8 +25,8 @@ using mindspore::lite::RET_MEMORY_FAILED; using mindspore::lite::RET_OK; namespace mindspore::kernel { -int ConvolutionWinogradCPUKernel::WinogradFilterTransform(const float *weight_data, float *matrix_g, float *matrix_gt, - int oc_block) { +int ConvolutionWinogradCPUKernel::WinogradFilterTransform(const float *weight_data, float *matrix_g, + const float *matrix_gt, int oc_block) { if (oc_block == 0) { MS_LOG(ERROR) << "Divide by zero"; return RET_ERROR; diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_winograd_fp32.h b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_winograd_fp32.h index 90e066ebc2..358935210f 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_winograd_fp32.h +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_winograd_fp32.h @@ -48,7 +48,7 @@ class ConvolutionWinogradCPUKernel : public ConvolutionBaseCPUKernel { int InitWeightBias(); int InitTmpBuffer(); int ConfigInputOutput(); - int WinogradFilterTransform(const float *weight_data, float *matrix_g, float *matrix_gt, int oc_block); + int WinogradFilterTransform(const float *weight_data, float *matrix_g, const float *matrix_gt, int oc_block); private: void FreeTmpBuffer() { @@ -82,8 +82,8 @@ class ConvolutionWinogradCPUKernel : public ConvolutionBaseCPUKernel { float *col_buffer_ = nullptr; float *trans_weight_ = nullptr; TmpBufferAddress tmp_buffer_address_list_[4]; - InputTransFunc in_func_; - OutputTransFunc out_func_; + InputTransFunc in_func_ = nullptr; + OutputTransFunc out_func_ = nullptr; }; } // namespace mindspore::kernel diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/group_convolution_fp32.h b/mindspore/lite/src/runtime/kernel/arm/fp32/group_convolution_fp32.h index c71400ed80..4795d41977 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/group_convolution_fp32.h +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/group_convolution_fp32.h @@ -32,6 +32,7 @@ class GroupConvolutionFp32CPUKernel : public GroupConvolutionBaseCPUKernel { : GroupConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, std::move(group_convs), group_num) { } // opParameter(in channel, out channel) in this kernel has been split to groups, if // you want to get real params, multiply in channel / out channel with group num + ~GroupConvolutionFp32CPUKernel() = default; int SeparateInput(int group_id) override; int PostConcat(int group_id) override; }; diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/transpose_fp32.h b/mindspore/lite/src/runtime/kernel/arm/fp32/transpose_fp32.h index 9aa20507a7..c0691fb5bb 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/transpose_fp32.h +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/transpose_fp32.h @@ -51,7 +51,7 @@ class TransposeCPUKernel : public LiteKernel { TransposeParameter *param_ = nullptr; TransposeFunc NHNCTransposeFunc_ = nullptr; int thread_count_ = 0; - int nhnc_param_[3]; + int nhnc_param_[3] = {0}; int dims_ = 0; }; } // namespace mindspore::kernel diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/uniform_real_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/uniform_real_fp32.cc index 8c6eb4f7dd..bcce63a768 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/uniform_real_fp32.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/uniform_real_fp32.cc @@ -35,6 +35,7 @@ class PhiloxRandom { counter_[2] = static_cast(seed_hi); counter_[3] = static_cast(seed_hi >> 32); } + ~PhiloxRandom() = default; // Skip the specified number of samples of 128-bits in the current stream. void Skip(uint64_t count) { diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/add_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/add_int8.cc index b40c7248a8..211e99bfc3 100644 --- a/mindspore/lite/src/runtime/kernel/arm/int8/add_int8.cc +++ b/mindspore/lite/src/runtime/kernel/arm/int8/add_int8.cc @@ -27,40 +27,52 @@ using mindspore::lite::RET_OK; using mindspore::schema::PrimitiveType_AddFusion; namespace mindspore::kernel { +QuantizedAddCPUKernel::~QuantizedAddCPUKernel() { + if (para_ != nullptr) { + free(para_); + para_ = nullptr; + } +} + int QuantizedAddCPUKernel::Init() { + para_ = reinterpret_cast(malloc(sizeof(AddQuantParameter))); + if (para_ == nullptr) { + MS_LOG(ERROR) << "Malloc AddQuantParameter for add int8 op failed!"; + return RET_ERROR; + } auto *input0 = in_tensors_.at(0); auto *input1 = in_tensors_.at(1); auto *output = out_tensors_.at(0); - para_.in0_args_.zp_ = input0->quant_params().front().zeroPoint * -1; - para_.in1_args_.zp_ = input1->quant_params().front().zeroPoint * -1; - para_.out_zp_ = output->quant_params().front().zeroPoint; + para_->in0_args_.zp_ = input0->quant_params().front().zeroPoint * -1; + para_->in1_args_.zp_ = input1->quant_params().front().zeroPoint * -1; + para_->out_zp_ = output->quant_params().front().zeroPoint; const double in0_scale = input0->quant_params().front().scale; const double in1_scale = input1->quant_params().front().scale; const double out_scale = output->quant_params().front().scale; - para_.left_shift_ = 20; + para_->left_shift_ = 20; const double twice_max_input_scale = 2 * std::max(in0_scale, in1_scale); const double in0_multiplier = in0_scale / twice_max_input_scale; const double in1_multiplier = in1_scale / twice_max_input_scale; - const double out_multiplier = twice_max_input_scale / ((1 << para_.left_shift_) * out_scale); + const double out_multiplier = twice_max_input_scale / ((1 << para_->left_shift_) * out_scale); - QuantizeMultiplierSmallerThanOne(in0_multiplier, ¶_.in0_args_.multiplier_, ¶_.in0_args_.left_shift_); - QuantizeMultiplierSmallerThanOne(in1_multiplier, ¶_.in1_args_.multiplier_, ¶_.in1_args_.left_shift_); - QuantizeMultiplierSmallerThanOne(out_multiplier, ¶_.out_multiplier_, ¶_.out_left_shift_); + QuantizeMultiplierSmallerThanOne(in0_multiplier, &(para_->in0_args_.multiplier_), &(para_->in0_args_.left_shift_)); + QuantizeMultiplierSmallerThanOne(in1_multiplier, &(para_->in1_args_.multiplier_), &(para_->in1_args_.left_shift_)); + QuantizeMultiplierSmallerThanOne(out_multiplier, &(para_->out_multiplier_), &(para_->out_left_shift_)); - para_.in0_args_.right_shift_ = -para_.in0_args_.left_shift_ > 0 ? 0 : para_.in0_args_.left_shift_; - para_.in1_args_.right_shift_ = -para_.in1_args_.left_shift_ > 0 ? 0 : para_.in1_args_.left_shift_; - para_.out_right_shift_ = -para_.out_left_shift_ > 0 ? 0 : para_.out_left_shift_; + para_->in0_args_.right_shift_ = -para_->in0_args_.left_shift_ > 0 ? 0 : para_->in0_args_.left_shift_; + para_->in1_args_.right_shift_ = -para_->in1_args_.left_shift_ > 0 ? 0 : para_->in1_args_.left_shift_; + para_->out_right_shift_ = -para_->out_left_shift_ > 0 ? 0 : para_->out_left_shift_; - para_.in0_args_.left_shift_ = -para_.in0_args_.left_shift_ > 0 ? -para_.in0_args_.left_shift_ : 0; - para_.in1_args_.left_shift_ = -para_.in1_args_.left_shift_ > 0 ? -para_.in1_args_.left_shift_ : 0; - para_.out_left_shift_ = -para_.out_left_shift_ > 0 ? -para_.out_left_shift_ : 0; + para_->in0_args_.left_shift_ = -para_->in0_args_.left_shift_ > 0 ? -para_->in0_args_.left_shift_ : 0; + para_->in1_args_.left_shift_ = -para_->in1_args_.left_shift_ > 0 ? -para_->in1_args_.left_shift_ : 0; + para_->out_left_shift_ = -para_->out_left_shift_ > 0 ? -para_->out_left_shift_ : 0; auto act = arith_para_->activation_type_; - CalculateActivationRangeQuantized(act == ActType_Relu, act == ActType_Relu6, para_.out_zp_, - static_cast(out_scale), ¶_.min_, ¶_.max_); + CalculateActivationRangeQuantized(act == ActType_Relu, act == ActType_Relu6, para_->out_zp_, + static_cast(out_scale), &(para_->min_), &(para_->max_)); if (!InferShapeDone()) { return RET_OK; @@ -154,9 +166,9 @@ void QuantizedAddCPUKernel::BroadcastRun(int task_id) { cur_out = output_data_ + task_id * stride * in_size_ + i * in_size_; } #ifdef ENABLE_AVX - AddInt8_AVX2(cur_in0, cur_in1, cur_out, in_size_, ¶_); + AddInt8_AVX2(cur_in0, cur_in1, cur_out, in_size_, para_); #else - AddInt8(cur_in0, cur_in1, cur_out, in_size_, ¶_); + AddInt8(cur_in0, cur_in1, cur_out, in_size_, para_); #endif } return; @@ -182,18 +194,18 @@ int QuantizedAddCPUKernel::DoExecute(int task_id) { if (support_opt_add_) { int8_t *ptr_in = arith_para_->in_elements_num0_ == 1 ? cur_in1 : cur_in0; int8_t element_in = arith_para_->in_elements_num0_ == 1 ? input0_data_[0] : input1_data_[0]; - AddQuantQrgs *ptr_args = arith_para_->in_elements_num0_ == 1 ? ¶_.in1_args_ : ¶_.in0_args_; - AddQuantQrgs *ele_args = arith_para_->in_elements_num0_ == 1 ? ¶_.in0_args_ : ¶_.in1_args_; + AddQuantQrgs *ptr_args = arith_para_->in_elements_num0_ == 1 ? &(para_->in1_args_) : &(para_->in0_args_); + AddQuantQrgs *ele_args = arith_para_->in_elements_num0_ == 1 ? &(para_->in0_args_) : &(para_->in1_args_); #ifdef ENABLE_AVX - AddOptInt8_AVX2(ptr_in, element_in, cur_out, rest_count, ¶_, ptr_args, ele_args); + AddOptInt8_AVX2(ptr_in, element_in, cur_out, rest_count, para_, ptr_args, ele_args); #else - AddOptInt8(ptr_in, element_in, cur_out, rest_count, ¶_, ptr_args, ele_args); + AddOptInt8(ptr_in, element_in, cur_out, rest_count, para_, ptr_args, ele_args); #endif } else { #ifdef ENABLE_AVX - AddInt8_AVX2(cur_in0, cur_in1, cur_out, rest_count, ¶_); + AddInt8_AVX2(cur_in0, cur_in1, cur_out, rest_count, para_); #else - AddInt8(cur_in0, cur_in1, cur_out, rest_count, ¶_); + AddInt8(cur_in0, cur_in1, cur_out, rest_count, para_); #endif } diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/add_int8.h b/mindspore/lite/src/runtime/kernel/arm/int8/add_int8.h index 79be464b73..d6bed790b6 100644 --- a/mindspore/lite/src/runtime/kernel/arm/int8/add_int8.h +++ b/mindspore/lite/src/runtime/kernel/arm/int8/add_int8.h @@ -32,7 +32,7 @@ class QuantizedAddCPUKernel : public LiteKernel { : LiteKernel(parameter, inputs, outputs, ctx) { arith_para_ = reinterpret_cast(parameter); } - ~QuantizedAddCPUKernel() override = default; + ~QuantizedAddCPUKernel() override; int Init() override; int ReSize() override; @@ -43,7 +43,7 @@ class QuantizedAddCPUKernel : public LiteKernel { void BroadcastRun(int task_id); private: - AddQuantParameter para_; + AddQuantParameter *para_ = nullptr; ArithmeticParameter *arith_para_ = nullptr; int in_size_ = 0; int out_size_ = 0; diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/argminmax_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/argminmax_int8.cc index 242ec1ab67..c814f41e26 100644 --- a/mindspore/lite/src/runtime/kernel/arm/int8/argminmax_int8.cc +++ b/mindspore/lite/src/runtime/kernel/arm/int8/argminmax_int8.cc @@ -27,18 +27,39 @@ using mindspore::schema::PrimitiveType_ArgMaxFusion; using mindspore::schema::PrimitiveType_ArgMinFusion; namespace mindspore::kernel { +ArgMinMaxInt8CPUKernel::~ArgMinMaxInt8CPUKernel() { + if (in_quant_arg_ != nullptr) { + free(in_quant_arg_); + in_quant_arg_ = nullptr; + } + if (out_quant_arg_ != nullptr) { + free(out_quant_arg_); + out_quant_arg_ = nullptr; + } +} + int ArgMinMaxInt8CPUKernel::Init() { auto param = reinterpret_cast(op_parameter_); param->data_type_ = kNumberTypeInt8; + in_quant_arg_ = reinterpret_cast(malloc(sizeof(QuantArg))); + if (in_quant_arg_ == nullptr) { + MS_LOG(ERROR) << "Malloc QuantArg for argmin or argmax int8 op failed!"; + return RET_ERROR; + } auto *input_tensor = in_tensors_.at(kInputIndex); auto in_quant_args = input_tensor->quant_params(); - in_quant_arg_.scale_ = in_quant_args.front().scale; - in_quant_arg_.zp_ = in_quant_args.front().zeroPoint; + in_quant_arg_->scale_ = in_quant_args.front().scale; + in_quant_arg_->zp_ = in_quant_args.front().zeroPoint; auto *out_tensor = out_tensors_.at(kOutputIndex); auto out_quant_args = out_tensor->quant_params(); - out_quant_arg_.scale_ = out_quant_args.front().scale; - out_quant_arg_.zp_ = out_quant_args.front().zeroPoint; + out_quant_arg_->scale_ = out_quant_args.front().scale; + out_quant_arg_->zp_ = out_quant_args.front().zeroPoint; + out_quant_arg_ = reinterpret_cast(malloc(sizeof(QuantArg))); + if (out_quant_arg_ == nullptr) { + MS_LOG(ERROR) << "Malloc QuantArg for argmin or argmax int8 op failed!"; + return RET_ERROR; + } if (!InferShapeDone()) { return RET_OK; } @@ -72,22 +93,22 @@ int ArgMinMaxInt8CPUKernel::Run() { auto in_shape = input->shape(); auto param = reinterpret_cast(op_parameter_); if (param->topk_ == 1) { - Int8ArgMinMaxQuant(input_data, output_data, in_shape.data(), param, &in_quant_arg_, &out_quant_arg_); + Int8ArgMinMaxQuant(input_data, output_data, in_shape.data(), param, in_quant_arg_, out_quant_arg_); return RET_OK; } switch (param->axis_) { case 0: - Int8ArgMinMaxDim0(input_data, output_data, in_shape.data(), param, &in_quant_arg_, &out_quant_arg_); + Int8ArgMinMaxDim0(input_data, output_data, in_shape.data(), param, in_quant_arg_, out_quant_arg_); break; case 1: - Int8ArgMinMaxDim1(input_data, output_data, in_shape.data(), param, &in_quant_arg_, &out_quant_arg_); + Int8ArgMinMaxDim1(input_data, output_data, in_shape.data(), param, in_quant_arg_, out_quant_arg_); break; case 2: - Int8ArgMinMaxDim2(input_data, output_data, in_shape.data(), param, &in_quant_arg_, &out_quant_arg_); + Int8ArgMinMaxDim2(input_data, output_data, in_shape.data(), param, in_quant_arg_, out_quant_arg_); break; case 3: - Int8ArgMinMaxDim3(input_data, output_data, in_shape.data(), param, &in_quant_arg_, &out_quant_arg_); + Int8ArgMinMaxDim3(input_data, output_data, in_shape.data(), param, in_quant_arg_, out_quant_arg_); break; default: MS_LOG(ERROR) << "axis is invalid"; diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/argminmax_int8.h b/mindspore/lite/src/runtime/kernel/arm/int8/argminmax_int8.h index 5b5fbc6eb5..4902eb4c57 100644 --- a/mindspore/lite/src/runtime/kernel/arm/int8/argminmax_int8.h +++ b/mindspore/lite/src/runtime/kernel/arm/int8/argminmax_int8.h @@ -30,15 +30,15 @@ class ArgMinMaxInt8CPUKernel : public LiteKernel { const std::vector &outputs, const lite::InnerContext *ctx) : LiteKernel(parameter, inputs, outputs, ctx) {} - ~ArgMinMaxInt8CPUKernel() = default; + ~ArgMinMaxInt8CPUKernel() override; int Init() override; int ReSize() override; int Run() override; private: - QuantArg in_quant_arg_; - QuantArg out_quant_arg_; + QuantArg *in_quant_arg_ = nullptr; + QuantArg *out_quant_arg_ = nullptr; }; } // namespace mindspore::kernel diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/batch_to_space_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/batch_to_space_int8.cc index f1ed9a205a..08f1399523 100644 --- a/mindspore/lite/src/runtime/kernel/arm/int8/batch_to_space_int8.cc +++ b/mindspore/lite/src/runtime/kernel/arm/int8/batch_to_space_int8.cc @@ -24,18 +24,38 @@ using mindspore::schema::PrimitiveType_BatchToSpace; using mindspore::schema::PrimitiveType_BatchToSpaceND; namespace mindspore::kernel { +BatchToSpaceInt8CPUKernel::~BatchToSpaceInt8CPUKernel() { + if (in_quant_arg_ != nullptr) { + free(in_quant_arg_); + in_quant_arg_ = nullptr; + } + if (out_quant_arg_ != nullptr) { + free(out_quant_arg_); + out_quant_arg_ = nullptr; + } +} + int BatchToSpaceInt8CPUKernel::Init() { MS_ASSERT(in_tensors_.at(0)->format() == schema::Format::Format_NHWC); - + in_quant_arg_ = reinterpret_cast(malloc(sizeof(QuantArg))); + if (in_quant_arg_ == nullptr) { + MS_LOG(ERROR) << "Malloc QuantArg for BatchToSpace int8 op failed!"; + return RET_ERROR; + } auto *input_tensor = in_tensors_.at(kInputIndex); auto in_quant_args = input_tensor->quant_params(); - in_quant_arg_.scale_ = in_quant_args.front().scale; - in_quant_arg_.zp_ = in_quant_args.front().zeroPoint; + in_quant_arg_->scale_ = in_quant_args.front().scale; + in_quant_arg_->zp_ = in_quant_args.front().zeroPoint; + out_quant_arg_ = reinterpret_cast(malloc(sizeof(QuantArg))); + if (out_quant_arg_ == nullptr) { + MS_LOG(ERROR) << "Malloc QuantArg for BatchToSpace int8 op failed!"; + return RET_ERROR; + } auto *out_tensor = out_tensors_.at(kOutputIndex); auto out_quant_args = out_tensor->quant_params(); - out_quant_arg_.scale_ = out_quant_args.front().scale; - out_quant_arg_.zp_ = out_quant_args.front().zeroPoint; + out_quant_arg_->scale_ = out_quant_args.front().scale; + out_quant_arg_->zp_ = out_quant_args.front().zeroPoint; if (!InferShapeDone()) { return RET_OK; } @@ -56,7 +76,7 @@ int BatchToSpaceInt8CPUKernel::Run() { auto out_shape = output->shape(); BatchToSpaceParameter *param = reinterpret_cast(this->op_parameter_); - if (in_quant_arg_.scale_ == out_quant_arg_.scale_ && in_quant_arg_.zp_ == out_quant_arg_.zp_) { + if (in_quant_arg_->scale_ == out_quant_arg_->scale_ && in_quant_arg_->zp_ == out_quant_arg_->zp_) { if (param->no_crop_) { BatchToSpaceNoCropForNHWC(input_data, output_data, in_shape.data(), out_shape[0], param->block_shape_, sizeof(int8_t)); @@ -67,10 +87,10 @@ int BatchToSpaceInt8CPUKernel::Run() { } else { if (param->no_crop_) { BatchToSpaceNoCropForNHWCInt8(input_data, output_data, in_shape.data(), out_shape[0], param->block_shape_, - &in_quant_arg_, &out_quant_arg_); + in_quant_arg_, out_quant_arg_); } else { BatchToSpaceForNHWCInt8(input_data, output_data, in_shape.data(), out_shape[0], param->block_shape_, - param->crops_, &in_quant_arg_, &out_quant_arg_); + param->crops_, in_quant_arg_, out_quant_arg_); } } diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/batch_to_space_int8.h b/mindspore/lite/src/runtime/kernel/arm/int8/batch_to_space_int8.h index adee42a2ce..c3de0abbd0 100644 --- a/mindspore/lite/src/runtime/kernel/arm/int8/batch_to_space_int8.h +++ b/mindspore/lite/src/runtime/kernel/arm/int8/batch_to_space_int8.h @@ -30,15 +30,15 @@ class BatchToSpaceInt8CPUKernel : public LiteKernel { const std::vector &outputs, const lite::InnerContext *ctx) : LiteKernel(parameter, inputs, outputs, ctx) {} - ~BatchToSpaceInt8CPUKernel() = default; + ~BatchToSpaceInt8CPUKernel() override; int Init() override; int ReSize() override; int Run() override; private: - QuantArg in_quant_arg_; - QuantArg out_quant_arg_; + QuantArg *in_quant_arg_ = nullptr; + QuantArg *out_quant_arg_ = nullptr; }; } // namespace mindspore::kernel diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/deconvolution_int8.h b/mindspore/lite/src/runtime/kernel/arm/int8/deconvolution_int8.h index c09f1226ee..a0f476a939 100644 --- a/mindspore/lite/src/runtime/kernel/arm/int8/deconvolution_int8.h +++ b/mindspore/lite/src/runtime/kernel/arm/int8/deconvolution_int8.h @@ -62,7 +62,7 @@ class DeConvInt8CPUKernel : public ConvolutionBaseCPUKernel { int8_t *output_ptr_ = nullptr; size_t thread_count_ = 1; size_t thread_stride_ = 0; - MATMUL_OPT_R4_FUNC matmul_func_; + MATMUL_OPT_R4_FUNC matmul_func_ = nullptr; MatMulParameter *matmul_param_ = nullptr; bool support_optimize_ = true; }; diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/depth_to_space_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/depth_to_space_int8.cc index af90bec4c6..743d4e9caa 100644 --- a/mindspore/lite/src/runtime/kernel/arm/int8/depth_to_space_int8.cc +++ b/mindspore/lite/src/runtime/kernel/arm/int8/depth_to_space_int8.cc @@ -25,18 +25,39 @@ using mindspore::lite::RET_PARAM_INVALID; using mindspore::schema::PrimitiveType_DepthToSpace; namespace mindspore::kernel { +DepthToSpaceInt8CPUKernel::~DepthToSpaceInt8CPUKernel() { + if (in_quant_arg_ != nullptr) { + free(in_quant_arg_); + in_quant_arg_ = nullptr; + } + if (out_quant_arg_ != nullptr) { + free(out_quant_arg_); + out_quant_arg_ = nullptr; + } +} + int DepthToSpaceInt8CPUKernel::Init() { param_->data_type_size_ = sizeof(int8_t); + in_quant_arg_ = reinterpret_cast(malloc(sizeof(QuantArg))); + if (in_quant_arg_ == nullptr) { + MS_LOG(ERROR) << "Malloc QuantArg for DepthToSpace int8 op failed!"; + return RET_ERROR; + } auto *input_tensor = in_tensors_.at(kInputIndex); auto in_quant_args = input_tensor->quant_params(); - in_quant_arg_.scale_ = in_quant_args.front().scale; - in_quant_arg_.zp_ = in_quant_args.front().zeroPoint; + in_quant_arg_->scale_ = in_quant_args.front().scale; + in_quant_arg_->zp_ = in_quant_args.front().zeroPoint; + out_quant_arg_ = reinterpret_cast(malloc(sizeof(QuantArg))); + if (out_quant_arg_ == nullptr) { + MS_LOG(ERROR) << "Malloc QuantArg for DepthToSpace int8 op failed!"; + return RET_ERROR; + } auto *out_tensor = out_tensors_.at(kOutputIndex); auto out_quant_args = out_tensor->quant_params(); - out_quant_arg_.scale_ = out_quant_args.front().scale; - out_quant_arg_.zp_ = out_quant_args.front().zeroPoint; + out_quant_arg_->scale_ = out_quant_args.front().scale; + out_quant_arg_->zp_ = out_quant_args.front().zeroPoint; if (!InferShapeDone()) { return RET_OK; } @@ -51,10 +72,10 @@ int DepthToSpaceInt8CPUKernel::Run() { const int8_t *input_data = reinterpret_cast(input->data_c()); int8_t *output_data = reinterpret_cast(output->data_c()); auto in_shape = input->shape(); - if (in_quant_arg_.scale_ == out_quant_arg_.scale_ && in_quant_arg_.zp_ == out_quant_arg_.zp_) { + if (in_quant_arg_->scale_ == out_quant_arg_->scale_ && in_quant_arg_->zp_ == out_quant_arg_->zp_) { DepthToSpaceForNHWC(input_data, output_data, in_shape.data(), param_); } else { - DepthToSpaceForNHWCInt8(input_data, output_data, in_shape.data(), param_, &in_quant_arg_, &out_quant_arg_); + DepthToSpaceForNHWCInt8(input_data, output_data, in_shape.data(), param_, in_quant_arg_, out_quant_arg_); } return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/depth_to_space_int8.h b/mindspore/lite/src/runtime/kernel/arm/int8/depth_to_space_int8.h index b986c6f057..e2a7091b8b 100644 --- a/mindspore/lite/src/runtime/kernel/arm/int8/depth_to_space_int8.h +++ b/mindspore/lite/src/runtime/kernel/arm/int8/depth_to_space_int8.h @@ -29,15 +29,15 @@ class DepthToSpaceInt8CPUKernel : public DepthToSpaceBaseCPUKernel { DepthToSpaceInt8CPUKernel(OpParameter *parameter, const std::vector &inputs, const std::vector &outputs, const lite::InnerContext *ctx) : DepthToSpaceBaseCPUKernel(parameter, inputs, outputs, ctx) {} - ~DepthToSpaceInt8CPUKernel() = default; + ~DepthToSpaceInt8CPUKernel() override; int Init() override; int ReSize() override; int Run() override; private: - QuantArg in_quant_arg_; - QuantArg out_quant_arg_; + QuantArg *in_quant_arg_ = nullptr; + QuantArg *out_quant_arg_ = nullptr; }; } // namespace mindspore::kernel diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/div_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/div_int8.cc index 0e1959fbca..6827698ca7 100644 --- a/mindspore/lite/src/runtime/kernel/arm/int8/div_int8.cc +++ b/mindspore/lite/src/runtime/kernel/arm/int8/div_int8.cc @@ -28,7 +28,12 @@ using mindspore::lite::RET_OK; using mindspore::schema::PrimitiveType_DivFusion; namespace mindspore::kernel { - +DivInt8CPUKernel::~DivInt8CPUKernel() { + if (quant_args_ != nullptr) { + free(quant_args_); + quant_args_ = nullptr; + } +} int DivInt8CPUKernel::Init() { lite::Tensor *input0 = in_tensors_.at(0); lite::Tensor *input1 = in_tensors_.at(1); @@ -39,19 +44,25 @@ int DivInt8CPUKernel::Init() { broadcast_ = input0->ElementsNum() != input1->ElementsNum(); - param_.in0_args_.scale_ = input0->quant_params().front().scale; - param_.in0_args_.zp_ = -input0->quant_params().front().zeroPoint; - param_.in1_args_.scale_ = input1->quant_params().front().scale; - param_.in1_args_.zp_ = -input1->quant_params().front().zeroPoint; - param_.out_args_.scale_ = output->quant_params().front().scale; - param_.out_args_.zp_ = output->quant_params().front().zeroPoint; + quant_args_ = reinterpret_cast(malloc(sizeof(DivQuantArg))); + if (quant_args_ == nullptr) { + MS_LOG(ERROR) << "Malloc DivQuantArg for Div int8 op failed!"; + return RET_ERROR; + } + quant_args_->in0_args_.scale_ = input0->quant_params().front().scale; + quant_args_->in0_args_.zp_ = -input0->quant_params().front().zeroPoint; + quant_args_->in1_args_.scale_ = input1->quant_params().front().scale; + quant_args_->in1_args_.zp_ = -input1->quant_params().front().zeroPoint; + quant_args_->out_args_.scale_ = output->quant_params().front().scale; + quant_args_->out_args_.zp_ = output->quant_params().front().zeroPoint; - const double real_multiplier = param_.in0_args_.scale_ / (param_.in1_args_.scale_ * param_.out_args_.scale_); + const double real_multiplier = + quant_args_->in0_args_.scale_ / (quant_args_->in1_args_.scale_ * quant_args_->out_args_.scale_); - QuantizeMultiplier(real_multiplier, ¶m_.output_multiplier_, ¶m_.output_shift_); + QuantizeMultiplier(real_multiplier, &quant_args_->output_multiplier_, &quant_args_->output_shift_); - param_.output_activation_min_ = std::numeric_limits::min(); - param_.output_activation_max_ = std::numeric_limits::max(); + quant_args_->output_activation_min_ = std::numeric_limits::min(); + quant_args_->output_activation_max_ = std::numeric_limits::max(); if (!InferShapeDone()) { return RET_OK; @@ -74,10 +85,10 @@ int DivInt8CPUKernel::DoExecute(int task_id) { auto ret = RET_OK; if (broadcast_) { ret = DivInt8(tile0_data_ + task_id * count, tile1_data_ + task_id * count, output_data_ + task_id * count, count, - ¶m_); + quant_args_); } else { ret = DivInt8(input0_data_ + task_id * count, input1_data_ + task_id * count, output_data_ + task_id * count, count, - ¶m_); + quant_args_); } if (ret != RET_OK) { diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/div_int8.h b/mindspore/lite/src/runtime/kernel/arm/int8/div_int8.h index d352fed35e..0606487e5e 100644 --- a/mindspore/lite/src/runtime/kernel/arm/int8/div_int8.h +++ b/mindspore/lite/src/runtime/kernel/arm/int8/div_int8.h @@ -27,7 +27,7 @@ class DivInt8CPUKernel : public LiteKernel { explicit DivInt8CPUKernel(OpParameter *parameter, const std::vector &inputs, const std::vector &outputs, const lite::InnerContext *ctx) : LiteKernel(parameter, inputs, outputs, ctx) {} - ~DivInt8CPUKernel() override {} + ~DivInt8CPUKernel() override; int Init() override; int ReSize() override; @@ -35,7 +35,7 @@ class DivInt8CPUKernel : public LiteKernel { int DoExecute(int task_id); private: - DivQuantArg param_; + DivQuantArg *quant_args_ = nullptr; int8_t *tile0_data_ = nullptr; int8_t *tile1_data_ = nullptr; bool broadcast_ = false; diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/group_convolution_int8.h b/mindspore/lite/src/runtime/kernel/arm/int8/group_convolution_int8.h index 8fc6e8c56b..84ebfc08b5 100644 --- a/mindspore/lite/src/runtime/kernel/arm/int8/group_convolution_int8.h +++ b/mindspore/lite/src/runtime/kernel/arm/int8/group_convolution_int8.h @@ -32,6 +32,7 @@ class GroupConvolutionInt8CPUKernel : public GroupConvolutionBaseCPUKernel { : GroupConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, std::move(group_convs), group_num) { } // opParameter(in channel, out channel) in this kernel has been split to groups, if // you want to get real params, multiply in channel / out channel with group num + ~GroupConvolutionInt8CPUKernel() = default; int SeparateInput(int group_id) override; int PostConcat(int group_id) override; }; diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/l2_norm_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/l2_norm_int8.cc index 1c43a02fde..47dfcac603 100644 --- a/mindspore/lite/src/runtime/kernel/arm/int8/l2_norm_int8.cc +++ b/mindspore/lite/src/runtime/kernel/arm/int8/l2_norm_int8.cc @@ -24,16 +24,28 @@ using mindspore::lite::RET_OK; using mindspore::schema::PrimitiveType_L2NormalizeFusion; namespace mindspore::kernel { +L2NormInt8CPUKernel::~L2NormInt8CPUKernel() { + if (quant_param_ != nullptr) { + free(quant_param_); + quant_param_ = nullptr; + } +} + int L2NormInt8CPUKernel::Init() { lite::Tensor *input = in_tensors_.at(0); lite::Tensor *output = out_tensors_.at(0); MS_ASSERT(input); MS_ASSERT(output); - quant_param_.in_.scale_ = input->quant_params().front().scale; - quant_param_.in_.zp_ = input->quant_params().front().zeroPoint; - quant_param_.out_.scale_ = output->quant_params().front().scale; - quant_param_.out_.zp_ = output->quant_params().front().zeroPoint; + quant_param_ = reinterpret_cast(malloc(sizeof(L2NormQuantArg))); + if (quant_param_ == nullptr) { + MS_LOG(ERROR) << "Malloc L2NormQuantArg for L2Norm int8 op failed!"; + return RET_ERROR; + } + quant_param_->in_.scale_ = input->quant_params().front().scale; + quant_param_->in_.zp_ = input->quant_params().front().zeroPoint; + quant_param_->out_.scale_ = output->quant_params().front().scale; + quant_param_->out_.zp_ = output->quant_params().front().zeroPoint; return ReSize(); } @@ -68,7 +80,7 @@ int L2NormInt8CPUKernel::DoExecute(int task_id) { int8_t *output_data = static_cast(out_tensors().front()->MutableData()); MS_ASSERT(output_data); MS_ASSERT(l2_norm_param_); - return L2NormalizationInt8(input_data, output_data, l2_norm_param_, &quant_param_, begin, end); + return L2NormalizationInt8(input_data, output_data, l2_norm_param_, quant_param_, begin, end); } REG_KERNEL(kCPU, kNumberTypeInt8, PrimitiveType_L2NormalizeFusion, LiteKernelCreator) diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/l2_norm_int8.h b/mindspore/lite/src/runtime/kernel/arm/int8/l2_norm_int8.h index 28df43c1f7..ca194d6307 100644 --- a/mindspore/lite/src/runtime/kernel/arm/int8/l2_norm_int8.h +++ b/mindspore/lite/src/runtime/kernel/arm/int8/l2_norm_int8.h @@ -26,14 +26,14 @@ class L2NormInt8CPUKernel : public L2NormCPUKernel { explicit L2NormInt8CPUKernel(OpParameter *parameter, const std::vector &inputs, const std::vector &outputs, const lite::InnerContext *ctx) : L2NormCPUKernel(parameter, inputs, outputs, ctx) {} - ~L2NormInt8CPUKernel() {} + ~L2NormInt8CPUKernel() override; int Init() override; int Run() override; int DoExecute(int tId); private: - L2NormQuantArg quant_param_; + L2NormQuantArg *quant_param_ = nullptr; }; } // namespace mindspore::kernel diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/layer_norm_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/layer_norm_int8.cc index ed3b1316c7..0b6719b224 100644 --- a/mindspore/lite/src/runtime/kernel/arm/int8/layer_norm_int8.cc +++ b/mindspore/lite/src/runtime/kernel/arm/int8/layer_norm_int8.cc @@ -24,6 +24,10 @@ using mindspore::schema::PrimitiveType_LayerNormFusion; namespace mindspore::kernel { LayerNormInt8CPUKernel::~LayerNormInt8CPUKernel() { + if (quant_param_ != nullptr) { + free(quant_param_); + quant_param_ = nullptr; + } if (gamma_ptr_ != nullptr) { free(gamma_ptr_); gamma_ptr_ = nullptr; @@ -38,10 +42,15 @@ int LayerNormInt8CPUKernel::SetQuantArgs() { lite::Tensor *input = in_tensors_.at(0); lite::Tensor *output = out_tensors_.at(0); - quant_param_.in_zp_ = input->quant_params().front().zeroPoint; - quant_param_.in_scale_ = input->quant_params().front().scale; - quant_param_.out_zp_ = output->quant_params().front().zeroPoint; - quant_param_.out_scale_ = output->quant_params().front().scale; + quant_param_ = reinterpret_cast(malloc(sizeof(LayerNormQuantArg))); + if (quant_param_ == nullptr) { + MS_LOG(ERROR) << "Malloc LayerNormQuantArg for LayerNorm int8 op failed!"; + return RET_ERROR; + } + quant_param_->in_zp_ = input->quant_params().front().zeroPoint; + quant_param_->in_scale_ = input->quant_params().front().scale; + quant_param_->out_zp_ = output->quant_params().front().zeroPoint; + quant_param_->out_scale_ = output->quant_params().front().scale; lite::Tensor *gamma_tensor = in_tensors_.at(1); lite::Tensor *beta_tensor = in_tensors_.at(2); @@ -67,7 +76,7 @@ int LayerNormInt8CPUKernel::SetQuantArgs() { } int32_t *src_beta = reinterpret_cast(beta_tensor->data_c()); for (int i = 0; i < beta_tensor->ElementsNum(); i++) { - beta_ptr_[i] = src_beta[i] * quant_param_.in_scale_ * gamma_scale; + beta_ptr_[i] = src_beta[i] * quant_param_->in_scale_ * gamma_scale; } return RET_OK; } @@ -109,7 +118,7 @@ int LayerNormInt8CPUKernel::ReSize() { } int LayerNormInt8CPUKernel::DoExecute(int task_id) { - LayerNormInt8(src_ptr_, gamma_ptr_, beta_ptr_, dst_ptr_, param_, &quant_param_, task_id); + LayerNormInt8(src_ptr_, gamma_ptr_, beta_ptr_, dst_ptr_, param_, quant_param_, task_id); return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/layer_norm_int8.h b/mindspore/lite/src/runtime/kernel/arm/int8/layer_norm_int8.h index e12d0ab06a..6591bd5c3c 100644 --- a/mindspore/lite/src/runtime/kernel/arm/int8/layer_norm_int8.h +++ b/mindspore/lite/src/runtime/kernel/arm/int8/layer_norm_int8.h @@ -44,7 +44,7 @@ class LayerNormInt8CPUKernel : public LiteKernel { private: LayerNormParameter *param_ = nullptr; - LayerNormQuantArg quant_param_; + LayerNormQuantArg *quant_param_ = nullptr; int8_t *src_ptr_ = nullptr; int8_t *dst_ptr_ = nullptr; float *gamma_ptr_ = nullptr; diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/matmul_base_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/matmul_base_int8.cc index c0038c6b6f..9731626ec4 100644 --- a/mindspore/lite/src/runtime/kernel/arm/int8/matmul_base_int8.cc +++ b/mindspore/lite/src/runtime/kernel/arm/int8/matmul_base_int8.cc @@ -41,14 +41,15 @@ int MatmulBaseInt8CPUKernel::RunImpl(int task_id) { return RET_OK; } - int32_t *cur_left = filter_per_channel_ ? quant_.left_shift_ + cur_stride : quant_.left_shift_; - int32_t *cur_right = filter_per_channel_ ? quant_.right_shift_ + cur_stride : quant_.right_shift_; - int32_t *cur_mul = filter_per_channel_ ? quant_.quant_multiplier_ + cur_stride : quant_.quant_multiplier_; - int32_t *cur_zp = filter_per_channel_ ? quant_.filter_zp_ + cur_stride : quant_.filter_zp_; + int32_t *cur_left = filter_per_channel_ ? quant_param_->left_shift_ + cur_stride : quant_param_->left_shift_; + int32_t *cur_right = filter_per_channel_ ? quant_param_->right_shift_ + cur_stride : quant_param_->right_shift_; + int32_t *cur_mul = + filter_per_channel_ ? quant_param_->quant_multiplier_ + cur_stride : quant_param_->quant_multiplier_; + int32_t *cur_zp = filter_per_channel_ ? quant_param_->filter_zp_ + cur_stride : quant_param_->filter_zp_; MatmulInt8Opt(pack_a_ptr_, batch_b_ptr_ + cur_stride * param_->deep_16_, batch_c_ptr_ + cur_stride, param_->row_, - cur_oc, param_->deep_16_, input_sums_, weight_bias_sums_ + cur_stride, quant_.out_act_min_, - quant_.out_act_max_, quant_.output_.zp_, cur_mul, cur_left, cur_right, param_->col_, + cur_oc, param_->deep_16_, input_sums_, weight_bias_sums_ + cur_stride, quant_param_->out_act_min_, + quant_param_->out_act_max_, quant_param_->output_.zp_, cur_mul, cur_left, cur_right, param_->col_, filter_per_channel_, cur_zp); return RET_OK; @@ -63,29 +64,32 @@ MatmulBaseInt8CPUKernel::~MatmulBaseInt8CPUKernel() { free(bias_ptr_); bias_ptr_ = nullptr; } - return; + if (quant_param_ != nullptr) { + free(quant_param_); + quant_param_ = nullptr; + } } void MatmulBaseInt8CPUKernel::FreeQuantParam() { - if (quant_.filter_scale_ != nullptr) { - free(quant_.filter_scale_); - quant_.filter_scale_ = nullptr; + if (quant_param_->filter_scale_ != nullptr) { + free(quant_param_->filter_scale_); + quant_param_->filter_scale_ = nullptr; } - if (quant_.filter_zp_ != nullptr) { - free(quant_.filter_zp_); - quant_.filter_zp_ = nullptr; + if (quant_param_->filter_zp_ != nullptr) { + free(quant_param_->filter_zp_); + quant_param_->filter_zp_ = nullptr; } - if (quant_.left_shift_ != nullptr) { - free(quant_.left_shift_); - quant_.left_shift_ = nullptr; + if (quant_param_->left_shift_ != nullptr) { + free(quant_param_->left_shift_); + quant_param_->left_shift_ = nullptr; } - if (quant_.right_shift_ != nullptr) { - free(quant_.right_shift_); - quant_.right_shift_ = nullptr; + if (quant_param_->right_shift_ != nullptr) { + free(quant_param_->right_shift_); + quant_param_->right_shift_ = nullptr; } - if (quant_.quant_multiplier_ != nullptr) { - free(quant_.quant_multiplier_); - quant_.quant_multiplier_ = nullptr; + if (quant_param_->quant_multiplier_ != nullptr) { + free(quant_param_->quant_multiplier_); + quant_param_->quant_multiplier_ = nullptr; } return; } @@ -99,24 +103,29 @@ int MatmulBaseInt8CPUKernel::MallocQuantParam() { int init_size = filter_per_channel_ ? col : 1; - quant_.filter_scale_ = reinterpret_cast(malloc(init_size * sizeof(float))); - if (quant_.filter_scale_ == nullptr) { + quant_param_ = reinterpret_cast(malloc(sizeof(MatmulQuantParameter))); + if (quant_param_ == nullptr) { + MS_LOG(ERROR) << "Malloc MatmulQuantParameter for Matmul int8 op failed!"; + return RET_ERROR; + } + quant_param_->filter_scale_ = reinterpret_cast(malloc(init_size * sizeof(float))); + if (quant_param_->filter_scale_ == nullptr) { return RET_ERROR; } - quant_.filter_zp_ = reinterpret_cast(malloc(init_size * sizeof(int32_t))); - if (quant_.filter_zp_ == nullptr) { + quant_param_->filter_zp_ = reinterpret_cast(malloc(init_size * sizeof(int32_t))); + if (quant_param_->filter_zp_ == nullptr) { return RET_ERROR; } - quant_.left_shift_ = reinterpret_cast(malloc(init_size * sizeof(int32_t))); - if (quant_.left_shift_ == nullptr) { + quant_param_->left_shift_ = reinterpret_cast(malloc(init_size * sizeof(int32_t))); + if (quant_param_->left_shift_ == nullptr) { return RET_ERROR; } - quant_.right_shift_ = reinterpret_cast(malloc(init_size * sizeof(int32_t))); - if (quant_.right_shift_ == nullptr) { + quant_param_->right_shift_ = reinterpret_cast(malloc(init_size * sizeof(int32_t))); + if (quant_param_->right_shift_ == nullptr) { return RET_ERROR; } - quant_.quant_multiplier_ = reinterpret_cast(malloc(init_size * sizeof(int32_t))); - if (quant_.quant_multiplier_ == nullptr) { + quant_param_->quant_multiplier_ = reinterpret_cast(malloc(init_size * sizeof(int32_t))); + if (quant_param_->quant_multiplier_ == nullptr) { return RET_ERROR; } return RET_OK; @@ -124,32 +133,32 @@ int MatmulBaseInt8CPUKernel::MallocQuantParam() { void MatmulBaseInt8CPUKernel::InitQuantParam() { auto in_quant_params = in_tensors_.at(0)->quant_params(); - quant_.input_.zp_ = in_quant_params.front().zeroPoint; - quant_.input_.scale_ = in_quant_params.front().scale; + quant_param_->input_.zp_ = in_quant_params.front().zeroPoint; + quant_param_->input_.scale_ = in_quant_params.front().scale; auto out_quant_params = out_tensors_.at(0)->quant_params(); - quant_.output_.zp_ = out_quant_params.front().zeroPoint; - quant_.output_.scale_ = out_quant_params.front().scale; + quant_param_->output_.zp_ = out_quant_params.front().zeroPoint; + quant_param_->output_.scale_ = out_quant_params.front().scale; auto weight_tensor = in_tensors_.at(1); int weight_quant_num = filter_per_channel_ ? weight_tensor->shape().front() : 1; auto weight_quant_params = weight_tensor->quant_params(); for (int i = 0; i < weight_quant_num; i++) { - quant_.filter_zp_[i] = weight_quant_params[i].zeroPoint; - quant_.filter_scale_[i] = weight_quant_params[i].scale; + quant_param_->filter_zp_[i] = weight_quant_params[i].zeroPoint; + quant_param_->filter_scale_[i] = weight_quant_params[i].scale; } for (int i = 0; i < weight_quant_num; ++i) { - const double in_scale = static_cast(quant_.input_.scale_ * quant_.filter_scale_[i]); - double real_multiplier = in_scale / static_cast(quant_.output_.scale_); - QuantizeRoundParameterWithDoublePrecision(real_multiplier, &quant_.quant_multiplier_[i], &quant_.left_shift_[i], - &quant_.right_shift_[i]); + const double in_scale = static_cast(quant_param_->input_.scale_ * quant_param_->filter_scale_[i]); + double real_multiplier = in_scale / static_cast(quant_param_->output_.scale_); + QuantizeRoundParameterWithDoublePrecision(real_multiplier, &quant_param_->quant_multiplier_[i], + &quant_param_->left_shift_[i], &quant_param_->right_shift_[i]); } CalculateActivationRangeQuantized(param_->act_type_ == ActType_Relu, param_->act_type_ == ActType_Relu6, - quant_.output_.zp_, quant_.output_.scale_, &quant_.out_act_min_, - &quant_.out_act_max_); + quant_param_->output_.zp_, quant_param_->output_.scale_, + &quant_param_->out_act_min_, &quant_param_->out_act_max_); } void MatmulBaseInt8CPUKernel::InitParameter() { @@ -207,16 +216,16 @@ void MatmulBaseInt8CPUKernel::TransferB() { #else RowMajor2Row16x4MajorInt8(current_weight, current_b_pack, param_->col_, param_->deep_); #endif - CalcWeightBiasSums(current_weight, param_->deep_, param_->col_, quant_.input_.zp_, quant_.filter_zp_, bias_ptr_, - current_sums, ColMajor, filter_per_channel_); + CalcWeightBiasSums(current_weight, param_->deep_, param_->col_, quant_param_->input_.zp_, + quant_param_->filter_zp_, bias_ptr_, current_sums, ColMajor, filter_per_channel_); } else { #ifdef ENABLE_ARM32 RowMajor2Col16x2MajorInt8(current_weight, current_b_pack, param_->deep_, param_->col_); #else RowMajor2Col16x4MajorInt8(current_weight, param_->deep_, param_->col_, current_b_pack); #endif - CalcWeightBiasSums(current_weight, param_->deep_, param_->col_, quant_.input_.zp_, quant_.filter_zp_, bias_ptr_, - current_sums, RowMajor, false); + CalcWeightBiasSums(current_weight, param_->deep_, param_->col_, quant_param_->input_.zp_, + quant_param_->filter_zp_, bias_ptr_, current_sums, RowMajor, false); } } return; @@ -312,7 +321,7 @@ int MatmulBaseInt8CPUKernel::Run() { int8_t *a_ptr = reinterpret_cast(in_tensors_.at(0)->data_c()); int8_t *c_ptr = reinterpret_cast(out_tensors_.at(0)->data_c()); - int32_t tmp_weight_zp = filter_per_channel_ ? 1 : quant_.filter_zp_[0]; + int32_t tmp_weight_zp = filter_per_channel_ ? 1 : quant_param_->filter_zp_[0]; for (int i = 0; i < param_->batch; i++) { auto current_src_a = a_ptr + i * param_->row_ * param_->deep_; if (param_->a_transpose_) { diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/matmul_base_int8.h b/mindspore/lite/src/runtime/kernel/arm/int8/matmul_base_int8.h index b62e7820ed..863a2dfc7b 100644 --- a/mindspore/lite/src/runtime/kernel/arm/int8/matmul_base_int8.h +++ b/mindspore/lite/src/runtime/kernel/arm/int8/matmul_base_int8.h @@ -63,7 +63,7 @@ class MatmulBaseInt8CPUKernel : public LiteKernel { protected: MatMulParameter *param_ = nullptr; - MatmulQuantParameter quant_; + MatmulQuantParameter *quant_param_ = nullptr; int thread_count_ = 1; int thread_stride_ = 0; int8_t *pack_a_ptr_ = nullptr; diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/mul_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/mul_int8.cc index 3644174e13..6377e1b615 100644 --- a/mindspore/lite/src/runtime/kernel/arm/int8/mul_int8.cc +++ b/mindspore/lite/src/runtime/kernel/arm/int8/mul_int8.cc @@ -25,6 +25,13 @@ using mindspore::lite::RET_OK; using mindspore::schema::PrimitiveType_MulFusion; namespace mindspore::kernel { +MulInt8CPUKernel::~MulInt8CPUKernel() { + if (quant_args_ != nullptr) { + free(quant_args_); + quant_args_ = nullptr; + } +} + int MulInt8CPUKernel::Init() { lite::Tensor *input0 = in_tensors_.at(0); lite::Tensor *input1 = in_tensors_.at(1); @@ -33,24 +40,28 @@ int MulInt8CPUKernel::Init() { MS_ASSERT(input1); MS_ASSERT(output); - para_.mul_quant_arg_.in_quant_args_[0].scale_ = input0->quant_params().front().scale; - para_.mul_quant_arg_.in_quant_args_[0].zp_ = input0->quant_params().front().zeroPoint * -1; - para_.mul_quant_arg_.in_quant_args_[1].scale_ = input1->quant_params().front().scale; - para_.mul_quant_arg_.in_quant_args_[1].zp_ = input1->quant_params().front().zeroPoint * -1; - para_.mul_quant_arg_.out_quant_arg_.scale_ = output->quant_params().front().scale; - para_.mul_quant_arg_.out_quant_arg_.zp_ = output->quant_params().front().zeroPoint; - para_.mul_quant_arg_.output_activation_max_ = std::numeric_limits::max(); - para_.mul_quant_arg_.output_activation_min_ = std::numeric_limits::min(); - - const double real_multiplier = - (para_.mul_quant_arg_.in_quant_args_[0].scale_ * para_.mul_quant_arg_.in_quant_args_[1].scale_) / - para_.mul_quant_arg_.out_quant_arg_.scale_; + quant_args_ = reinterpret_cast(malloc(sizeof(MulQuantArg))); + if (quant_args_ == nullptr) { + MS_LOG(ERROR) << "Malloc MulQuantArg for Mul int8 op failed!"; + return RET_ERROR; + } + quant_args_->in_quant_args_[0].scale_ = input0->quant_params().front().scale; + quant_args_->in_quant_args_[0].zp_ = input0->quant_params().front().zeroPoint * -1; + quant_args_->in_quant_args_[1].scale_ = input1->quant_params().front().scale; + quant_args_->in_quant_args_[1].zp_ = input1->quant_params().front().zeroPoint * -1; + quant_args_->out_quant_arg_.scale_ = output->quant_params().front().scale; + quant_args_->out_quant_arg_.zp_ = output->quant_params().front().zeroPoint; + quant_args_->output_activation_max_ = std::numeric_limits::max(); + quant_args_->output_activation_min_ = std::numeric_limits::min(); + + const double real_multiplier = (quant_args_->in_quant_args_[0].scale_ * quant_args_->in_quant_args_[1].scale_) / + quant_args_->out_quant_arg_.scale_; int right_shift = 0; - QuantizeMultiplierSmallerThanOne(real_multiplier, ¶_.mul_quant_arg_.output_multiplier_, &right_shift); + QuantizeMultiplierSmallerThanOne(real_multiplier, &quant_args_->output_multiplier_, &right_shift); - para_.mul_quant_arg_.shift_left_ = right_shift < 0 ? -right_shift : 0; - para_.mul_quant_arg_.shift_right_ = right_shift > 0 ? right_shift : 0; + quant_args_->shift_left_ = right_shift < 0 ? -right_shift : 0; + quant_args_->shift_right_ = right_shift > 0 ? right_shift : 0; if (!InferShapeDone()) { return RET_OK; @@ -202,8 +213,7 @@ int MulInt8CPUKernel::FastDoExecute(int task_id) { cur_input0_data = input1_data_; cur_input1_data = input0_data_ + task_id * count_unit_ * depth; } - FastMul(cur_input0_data, cur_input1_data, cur_output_data, depth, real_dst_count, input1_hw_broadcast_, - para_.mul_quant_arg_); + FastMul(cur_input0_data, cur_input1_data, cur_output_data, depth, real_dst_count, input1_hw_broadcast_, quant_args_); return RET_OK; } @@ -216,7 +226,7 @@ int MulInt8CPUKernel::DoExecute(int task_id) { int8_t *cur_input1_data = input1_data_ + task_id * count_unit_; int8_t *cur_output_data = output_data_ + task_id * count_unit_; - Mul(cur_input0_data, cur_input1_data, cur_output_data, real_dst_count, para_.mul_quant_arg_); + Mul(cur_input0_data, cur_input1_data, cur_output_data, real_dst_count, quant_args_); return lite::RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/mul_int8.h b/mindspore/lite/src/runtime/kernel/arm/int8/mul_int8.h index f10f53ae33..512b5e46ac 100644 --- a/mindspore/lite/src/runtime/kernel/arm/int8/mul_int8.h +++ b/mindspore/lite/src/runtime/kernel/arm/int8/mul_int8.h @@ -33,7 +33,7 @@ class MulInt8CPUKernel : public LiteKernel { : LiteKernel(parameter, inputs, outputs, ctx), ctx_(ctx), thread_count_(ctx_->thread_num_) { tile_para = reinterpret_cast(parameter); } - ~MulInt8CPUKernel() override{}; + ~MulInt8CPUKernel() override; int Init() override; int ReSize() override; @@ -46,7 +46,7 @@ class MulInt8CPUKernel : public LiteKernel { private: const lite::InnerContext *ctx_ = nullptr; ArithmeticParameter *tile_para = nullptr; - MulParameter para_; + MulQuantArg *quant_args_ = nullptr; bool fast_hw_broadcast_ = false; bool input1_hw_broadcast_ = false; int thread_count_ = 1; diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/softmax_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/softmax_int8.cc index 8d929f240e..05757407e7 100644 --- a/mindspore/lite/src/runtime/kernel/arm/int8/softmax_int8.cc +++ b/mindspore/lite/src/runtime/kernel/arm/int8/softmax_int8.cc @@ -30,35 +30,46 @@ using mindspore::lite::RET_NULL_PTR; using mindspore::schema::PrimitiveType_Softmax; namespace mindspore::kernel { +SoftmaxInt8CPUKernel::~SoftmaxInt8CPUKernel() { + if (quant_param_ != nullptr) { + free(quant_param_); + quant_param_ = nullptr; + } +} int SoftmaxInt8CPUKernel::Init() { auto ret = SoftmaxBaseCPUKernel::Init(); if (ret != RET_OK) { return ret; } + quant_param_ = reinterpret_cast(malloc(sizeof(SoftmaxQuantArg))); + if (quant_param_ == nullptr) { + MS_LOG(ERROR) << "Malloc SoftmaxQuantArg for Softmax int8 op failed!"; + return RET_ERROR; + } auto *input_tensor = in_tensors_.at(kInputIndex); MS_ASSERT(input_tensor); auto in_quant_args = input_tensor->quant_params(); - quant_params_.in_quant_args_.scale_ = in_quant_args.front().scale; - quant_params_.in_quant_args_.zp_ = -in_quant_args.front().zeroPoint; + quant_param_->in_quant_args_.scale_ = in_quant_args.front().scale; + quant_param_->in_quant_args_.zp_ = -in_quant_args.front().zeroPoint; auto *out_tensor = out_tensors_.at(kOutputIndex); MS_ASSERT(out_tensor); auto out_quant_args = out_tensor->quant_params(); - quant_params_.out_quant_arg_.scale_ = out_quant_args.front().scale; - quant_params_.out_quant_arg_.zp_ = -out_quant_args.front().zeroPoint; - quant_params_.output_activation_min_ = std::numeric_limits::min(); - quant_params_.output_activation_max_ = std::numeric_limits::max(); + quant_param_->out_quant_arg_.scale_ = out_quant_args.front().scale; + quant_param_->out_quant_arg_.zp_ = -out_quant_args.front().zeroPoint; + quant_param_->output_activation_min_ = std::numeric_limits::min(); + quant_param_->output_activation_max_ = std::numeric_limits::max(); const double input_real_multiplier = - MSMIN(quant_params_.in_quant_args_.scale_ * (1 << (unsigned int)(31 - 5)), (1ll << 31) - 1.0); + MSMIN(quant_param_->in_quant_args_.scale_ * (1 << (unsigned int)(31 - 5)), (1ll << 31) - 1.0); int right_shift = 0; - QuantizeMultiplierSmallerThanOne(input_real_multiplier, &quant_params_.output_multiplier_, &right_shift); - quant_params_.shift_left_ = right_shift < 0 ? -right_shift : 0; - quant_params_.shift_right_ = right_shift > 0 ? right_shift : 0; + QuantizeMultiplierSmallerThanOne(input_real_multiplier, &quant_param_->output_multiplier_, &right_shift); + quant_param_->shift_left_ = right_shift < 0 ? -right_shift : 0; + quant_param_->shift_right_ = right_shift > 0 ? right_shift : 0; if (!InferShapeDone()) { return RET_OK; @@ -91,7 +102,7 @@ int SoftmaxInt8CPUKernel::DoSoftmax(int task_id) { int stride_size = stride * task_id * inner_size; auto error_code = SoftmaxInt8(input_ptr + stride_size, output_ptr + stride_size, count, exp_data_ + stride_size, - sum_data_, quant_params_, softmax_param_); + sum_data_, quant_param_, softmax_param_); if (error_code != RET_OK) { MS_LOG(ERROR) << "DoSoftmax error task_id[" << task_id << "] error_code[" << error_code << "]"; return RET_ERROR; diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/softmax_int8.h b/mindspore/lite/src/runtime/kernel/arm/int8/softmax_int8.h index aec062c4eb..55642c49bb 100644 --- a/mindspore/lite/src/runtime/kernel/arm/int8/softmax_int8.h +++ b/mindspore/lite/src/runtime/kernel/arm/int8/softmax_int8.h @@ -27,7 +27,7 @@ class SoftmaxInt8CPUKernel : public SoftmaxBaseCPUKernel { SoftmaxInt8CPUKernel(OpParameter *parameter, const std::vector &inputs, const std::vector &outputs, const lite::InnerContext *ctx) : SoftmaxBaseCPUKernel(parameter, inputs, outputs, ctx) {} - ~SoftmaxInt8CPUKernel() = default; + ~SoftmaxInt8CPUKernel() override; int Init() override; int ReSize() override; @@ -37,7 +37,7 @@ class SoftmaxInt8CPUKernel : public SoftmaxBaseCPUKernel { private: int *sum_data_ = nullptr; int *exp_data_ = nullptr; - SoftmaxQuantArg quant_params_; + SoftmaxQuantArg *quant_param_ = nullptr; }; } // namespace mindspore::kernel diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/sub_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/sub_int8.cc index 7709631342..fe6ff0006d 100644 --- a/mindspore/lite/src/runtime/kernel/arm/int8/sub_int8.cc +++ b/mindspore/lite/src/runtime/kernel/arm/int8/sub_int8.cc @@ -25,6 +25,13 @@ using mindspore::lite::RET_OK; using mindspore::schema::PrimitiveType_SubFusion; namespace mindspore::kernel { +SubInt8CPUKernel::~SubInt8CPUKernel() { + if (quant_param_ != nullptr) { + free(quant_param_); + quant_param_ = nullptr; + } +} + int SubInt8CPUKernel::Init() { lite::Tensor *input0 = in_tensors_.at(0); lite::Tensor *input1 = in_tensors_.at(1); @@ -35,37 +42,45 @@ int SubInt8CPUKernel::Init() { broadcast_ = input0->ElementsNum() != input1->ElementsNum(); - param_.in0_args_.scale_ = input0->quant_params().front().scale; - param_.in0_args_.zp_ = -input0->quant_params().front().zeroPoint; - param_.in1_args_.scale_ = input1->quant_params().front().scale; - param_.in1_args_.zp_ = -input1->quant_params().front().zeroPoint; - param_.out_args_.scale_ = output->quant_params().front().scale; - param_.out_args_.zp_ = output->quant_params().front().zeroPoint; + quant_param_ = reinterpret_cast(malloc(sizeof(SubQuantArg))); + if (quant_param_ == nullptr) { + MS_LOG(ERROR) << "Malloc SubQuantArg for Sub int8 op failed!"; + return RET_ERROR; + } + quant_param_->in0_args_.scale_ = input0->quant_params().front().scale; + quant_param_->in0_args_.zp_ = -input0->quant_params().front().zeroPoint; + quant_param_->in1_args_.scale_ = input1->quant_params().front().scale; + quant_param_->in1_args_.zp_ = -input1->quant_params().front().zeroPoint; + quant_param_->out_args_.scale_ = output->quant_params().front().scale; + quant_param_->out_args_.zp_ = output->quant_params().front().zeroPoint; const int left_shift = 20; - const double twice_max_input_scale = 2 * std::max(param_.in0_args_.scale_, param_.in1_args_.scale_); - const double real_input0_multiplier = param_.in0_args_.scale_ / twice_max_input_scale; - const double real_input1_multiplier = param_.in1_args_.scale_ / twice_max_input_scale; - const double real_output_multiplier = twice_max_input_scale / ((1 << left_shift) * param_.out_args_.scale_); + const double twice_max_input_scale = 2 * std::max(quant_param_->in0_args_.scale_, quant_param_->in1_args_.scale_); + const double real_input0_multiplier = quant_param_->in0_args_.scale_ / twice_max_input_scale; + const double real_input1_multiplier = quant_param_->in1_args_.scale_ / twice_max_input_scale; + const double real_output_multiplier = twice_max_input_scale / ((1 << left_shift) * quant_param_->out_args_.scale_); - QuantizeMultiplierSmallerThanOne(real_input0_multiplier, ¶m_.input0_multiplier_, ¶m_.input0_shift_); - QuantizeMultiplierSmallerThanOne(real_input1_multiplier, ¶m_.input1_multiplier_, ¶m_.input1_shift_); - QuantizeMultiplierSmallerThanOne(real_output_multiplier, ¶m_.output_multiplier_, ¶m_.output_shift_); + QuantizeMultiplierSmallerThanOne(real_input0_multiplier, &quant_param_->input0_multiplier_, + &quant_param_->input0_shift_); + QuantizeMultiplierSmallerThanOne(real_input1_multiplier, &quant_param_->input1_multiplier_, + &quant_param_->input1_shift_); + QuantizeMultiplierSmallerThanOne(real_output_multiplier, &quant_param_->output_multiplier_, + &quant_param_->output_shift_); - param_.output_activation_min_ = std::numeric_limits::min(); - param_.output_activation_max_ = std::numeric_limits::max(); + quant_param_->output_activation_min_ = std::numeric_limits::min(); + quant_param_->output_activation_max_ = std::numeric_limits::max(); - int left_shift0 = -param_.input0_shift_ > 0 ? -param_.input0_shift_ : 0; - param_.right_shift0_ = -param_.input0_shift_ > 0 ? 0 : param_.input0_shift_; + int left_shift0 = -quant_param_->input0_shift_ > 0 ? -quant_param_->input0_shift_ : 0; + quant_param_->right_shift0_ = -quant_param_->input0_shift_ > 0 ? 0 : quant_param_->input0_shift_; - int left_shift1 = -param_.input1_shift_ > 0 ? -param_.input1_shift_ : 0; - param_.right_shift1_ = -param_.input1_shift_ > 0 ? 0 : param_.input1_shift_; + int left_shift1 = -quant_param_->input1_shift_ > 0 ? -quant_param_->input1_shift_ : 0; + quant_param_->right_shift1_ = -quant_param_->input1_shift_ > 0 ? 0 : quant_param_->input1_shift_; - param_.left_shift_out_ = -param_.output_shift_ > 0 ? -param_.output_shift_ : 0; - param_.right_shift_out_ = -param_.output_shift_ > 0 ? 0 : param_.output_shift_; + quant_param_->left_shift_out_ = -quant_param_->output_shift_ > 0 ? -quant_param_->output_shift_ : 0; + quant_param_->right_shift_out_ = -quant_param_->output_shift_ > 0 ? 0 : quant_param_->output_shift_; - param_.left_shift_result0_ = (1 << left_shift) * ((1 << left_shift0)); - param_.left_shift_result1_ = (1 << left_shift) * ((1 << left_shift1)); + quant_param_->left_shift_result0_ = (1 << left_shift) * ((1 << left_shift0)); + quant_param_->left_shift_result1_ = (1 << left_shift) * ((1 << left_shift1)); MS_ASSERT(left_shift + left_shift0 == left_shift); MS_ASSERT(left_shift + left_shift1 == left_shift); @@ -94,10 +109,10 @@ int SubInt8CPUKernel::DoExecute(int task_id) { auto ret = RET_OK; if (broadcast_) { ret = SubInt8(tile0_data_ + task_id * stride, tile1_data_ + task_id * stride, output_data_ + task_id * stride, - count, ¶m_); + count, quant_param_); } else { ret = SubInt8(input0_data_ + task_id * stride, input1_data_ + task_id * stride, output_data_ + task_id * stride, - count, ¶m_); + count, quant_param_); } if (ret != RET_OK) { diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/sub_int8.h b/mindspore/lite/src/runtime/kernel/arm/int8/sub_int8.h index 0ecb6e6ca4..3958467acf 100644 --- a/mindspore/lite/src/runtime/kernel/arm/int8/sub_int8.h +++ b/mindspore/lite/src/runtime/kernel/arm/int8/sub_int8.h @@ -31,7 +31,7 @@ class SubInt8CPUKernel : public LiteKernel { explicit SubInt8CPUKernel(OpParameter *parameter, const std::vector &inputs, const std::vector &outputs, const lite::InnerContext *ctx) : LiteKernel(parameter, inputs, outputs, ctx) {} - ~SubInt8CPUKernel() = default; + ~SubInt8CPUKernel() override; int Init() override; int ReSize() override; @@ -39,7 +39,7 @@ class SubInt8CPUKernel : public LiteKernel { int DoExecute(int task_id); private: - SubQuantArg param_; + SubQuantArg *quant_param_ = nullptr; int8_t *tile0_data_ = nullptr; int8_t *tile1_data_ = nullptr; bool broadcast_ = false; diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/transpose_int8.h b/mindspore/lite/src/runtime/kernel/arm/int8/transpose_int8.h index a0f6366ea5..a0b4db3ec9 100644 --- a/mindspore/lite/src/runtime/kernel/arm/int8/transpose_int8.h +++ b/mindspore/lite/src/runtime/kernel/arm/int8/transpose_int8.h @@ -62,7 +62,7 @@ class TransposeInt8CPUKernel : public LiteKernel { int num_unit_ = 0; int in_shape_[8] = {0}; int out_shape_[8] = {0}; - int nhnc_param_[3]; + int nhnc_param_[3] = {0}; }; } // namespace mindspore::kernel diff --git a/mindspore/lite/src/sub_graph_split.cc b/mindspore/lite/src/sub_graph_split.cc index e8654d81e9..c7592316d8 100644 --- a/mindspore/lite/src/sub_graph_split.cc +++ b/mindspore/lite/src/sub_graph_split.cc @@ -29,6 +29,9 @@ const schema::Primitive *SearchSubGraph::CreatePartialPrimitive(int64_t subgraph fbb.Finish(prim_offset); auto tmp_buf = fbb.GetBufferPointer(); auto prim_buf = reinterpret_cast(malloc(fbb.GetSize())); + if (prim_buf == nullptr) { + return nullptr; + } memcpy(prim_buf, tmp_buf, fbb.GetSize()); auto primitive = flatbuffers::GetRoot(prim_buf); @@ -59,6 +62,7 @@ void SearchSubGraph::ConvertSubGraphToModel() { Model::Node *new_partial_node = new (std::nothrow) Model::Node(); if (new_partial_node == nullptr) { MS_LOG(ERROR) << "New partial node failed!"; + free(new_sub_graph); return; } new_partial_node->name_ = "Partial-subgraph-split-" + std::to_string(new_sub_index); @@ -230,15 +234,17 @@ void SearchSubGraph::InitMainGraphDevice() { return; } void SearchSubGraph::SubgraphFusion() { while (sub_graphs_.size() > 2) { size_t sub1_index = 0; - int sub2_index = -1; + size_t sub2_index = 0; + bool is_found = false; for (; sub1_index < sub_graphs_.size(); sub1_index++) { for (size_t tmp2 = sub1_index + 1; tmp2 < sub_graphs_.size(); tmp2++) { if (sub_graphs_[sub1_index].device_ == sub_graphs_[tmp2].device_) { sub2_index = tmp2; + is_found = true; break; } } - if (sub2_index != -1) { + if (!is_found) { break; } }