From: @fuzhiye Reviewed-by: @zhanghaibo5,@hangangqiang Signed-off-by: @hangangqiangtags/v1.2.0-rc1
| @@ -101,7 +101,7 @@ mov x9, x4 | |||||
| ldrsb w14, [x7], #1 | ldrsb w14, [x7], #1 | ||||
| ldrsh w15, [x8], #2 | ldrsh w15, [x8], #2 | ||||
| ldr w16, [x0], #4 | ldr w16, [x0], #4 | ||||
| add w14, w14, w6 | |||||
| sub w14, w14, w6 | |||||
| sxth w14, w14 | sxth w14, w14 | ||||
| madd w14, w14, w15, w16 | madd w14, w14, w15, w16 | ||||
| @@ -112,7 +112,9 @@ int ElementNegativeFp16(float16_t *input, float16_t *output, int element_size) { | |||||
| int ElementReciprocalFp16(float16_t *input, float16_t *output, int element_size) { | int ElementReciprocalFp16(float16_t *input, float16_t *output, int element_size) { | ||||
| for (int i = 0; i < element_size; ++i) { | for (int i = 0; i < element_size; ++i) { | ||||
| assert(input[i] != 0.0f); | |||||
| if (input[i] == 0.0f) { | |||||
| return NNACL_ERR; | |||||
| } | |||||
| output[i] = 1.f / input[i]; | output[i] = 1.f / input[i]; | ||||
| } | } | ||||
| return NNACL_OK; | return NNACL_OK; | ||||
| @@ -132,7 +132,9 @@ int ElementNegative(const float *input, float *output, const int element_size) { | |||||
| int ElementReciprocal(const float *input, float *output, const int element_size) { | int ElementReciprocal(const float *input, float *output, const int element_size) { | ||||
| for (int i = 0; i < element_size; ++i) { | for (int i = 0; i < element_size; ++i) { | ||||
| assert(input[i] != 0.0f); | |||||
| if (input[i] == 0.0f) { | |||||
| return NNACL_ERR; | |||||
| } | |||||
| output[i] = 1.f / input[i]; | output[i] = 1.f / input[i]; | ||||
| } | } | ||||
| return NNACL_OK; | return NNACL_OK; | ||||
| @@ -183,7 +183,7 @@ void AddOptInt8(const int8_t *ptr_in, const int8_t element_in, int8_t *output, i | |||||
| const int32x4_t ele_left_vec = vdupq_n_s32(ele_left_shift); | const int32x4_t ele_left_vec = vdupq_n_s32(ele_left_shift); | ||||
| const int32x4_t ptr_right_vec = vdupq_n_s32(-ptr_args->right_shift_); | const int32x4_t ptr_right_vec = vdupq_n_s32(-ptr_args->right_shift_); | ||||
| const int32x4_t ele_right_vec = vdupq_n_s32(-ptr_args->right_shift_); | |||||
| const int32x4_t ele_right_vec = vdupq_n_s32(-ele_args->right_shift_); | |||||
| const int32x4_t out_left_vec = vdupq_n_s32(params->out_left_shift_); | const int32x4_t out_left_vec = vdupq_n_s32(params->out_left_shift_); | ||||
| const int32x4_t out_right_vec = vdupq_n_s32(-params->out_right_shift_); | const int32x4_t out_right_vec = vdupq_n_s32(-params->out_right_shift_); | ||||
| @@ -288,7 +288,9 @@ int Int8ElementReciprocal(int8_t *input, int8_t *output, int element_size, Arith | |||||
| float bias = in_zp * in_scale; | float bias = in_zp * in_scale; | ||||
| for (int i = 0; i < element_size; i++) { | for (int i = 0; i < element_size; i++) { | ||||
| float input_f32 = input[i] * in_scale + bias; | float input_f32 = input[i] * in_scale + bias; | ||||
| assert(input_f32 != 0.0f); | |||||
| if (input_f32 == 0.0f) { | |||||
| return NNACL_ERR; | |||||
| } | |||||
| int32_t output_tmp = round(1.f / (input_f32 * out_scale)) + out_zp; | int32_t output_tmp = round(1.f / (input_f32 * out_scale)) + out_zp; | ||||
| if (output_tmp > para.output_activation_max_) { | if (output_tmp > para.output_activation_max_) { | ||||
| output[i] = para.output_activation_max_; | output[i] = para.output_activation_max_; | ||||
| @@ -24,7 +24,7 @@ void ConvDwFp32Avx5x5(float *output, float **input, const float *weights, const | |||||
| input_stride /= sizeof(float *); | input_stride /= sizeof(float *); | ||||
| size_t c8 = UP_DIV(channels, C8NUM) * C8NUM; | size_t c8 = UP_DIV(channels, C8NUM) * C8NUM; | ||||
| size_t c8_mod = channels % C8NUM; | size_t c8_mod = channels % C8NUM; | ||||
| int kernel = 25; | |||||
| const int kernel = 25; | |||||
| for (int i = 0; i < output_width; ++i) { | for (int i = 0; i < output_width; ++i) { | ||||
| float *in[kernel]; | float *in[kernel]; | ||||
| for (int k = 0; k < kernel; k++) { | for (int k = 0; k < kernel; k++) { | ||||
| @@ -146,7 +146,7 @@ void PadCPUKernel::InitMirrorPadBlock() { | |||||
| std::vector<int> pad_cord(pad_region.size()); | std::vector<int> pad_cord(pad_region.size()); | ||||
| for (int pos = 0; pos < remain_size; ++pos) { | for (int pos = 0; pos < remain_size; ++pos) { | ||||
| int dst_basic_offset = 0; | |||||
| const int dst_basic_offset = 0; | |||||
| for (int index = 1; index < region_size; ++index) { | for (int index = 1; index < region_size; ++index) { | ||||
| int dst_offset = dst_basic_offset; | int dst_offset = dst_basic_offset; | ||||
| @@ -34,8 +34,8 @@ int SigmoidCrossEntropyWithLogitsGradCPUKernel::Execute(int task_id) { | |||||
| auto labels = reinterpret_cast<float *>(in_tensors_.at(1)->MutableData()); | auto labels = reinterpret_cast<float *>(in_tensors_.at(1)->MutableData()); | ||||
| auto dloss = reinterpret_cast<float *>(in_tensors_.at(2)->MutableData()); | auto dloss = reinterpret_cast<float *>(in_tensors_.at(2)->MutableData()); | ||||
| auto *out = reinterpret_cast<float *>(out_tensors_.at(0)->MutableData()); | auto *out = reinterpret_cast<float *>(out_tensors_.at(0)->MutableData()); | ||||
| float zero = 0.0f; | |||||
| float one = 1.0f; | |||||
| const float zero = 0.0f; | |||||
| const float one = 1.0f; | |||||
| size_t tensor_len = in_tensors_.at(0)->ElementsNum(); | size_t tensor_len = in_tensors_.at(0)->ElementsNum(); | ||||
| for (uint64_t i = 0; i < tensor_len; ++i) { | for (uint64_t i = 0; i < tensor_len; ++i) { | ||||
| @@ -143,7 +143,9 @@ void QuantizedAddCPUKernel::BroadcastRun(int task_id) { | |||||
| if (real_out_count <= 0) { | if (real_out_count <= 0) { | ||||
| return; | return; | ||||
| } | } | ||||
| int8_t *cur_in0, *cur_in1, *cur_out; | |||||
| int8_t *cur_in0 = nullptr; | |||||
| int8_t *cur_in1 = nullptr; | |||||
| int8_t *cur_out = nullptr; | |||||
| for (int i = 0; i < real_out_count; i++) { | for (int i = 0; i < real_out_count; i++) { | ||||
| if (arith_para_->in_elements_num0_ == arith_para_->out_elements_num_) { | if (arith_para_->in_elements_num0_ == arith_para_->out_elements_num_) { | ||||
| cur_in0 = input0_data_ + task_id * stride * in_size_ + i * in_size_; | cur_in0 = input0_data_ + task_id * stride * in_size_ + i * in_size_; | ||||
| @@ -68,7 +68,7 @@ ge::Operator *CreateOperator(const std::vector<ge::Operator *> &npu_inputs, cons | |||||
| int ArithmeticNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *> &inputs, | int ArithmeticNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *> &inputs, | ||||
| const std::vector<lite::Tensor *> &outputs, | const std::vector<lite::Tensor *> &outputs, | ||||
| const std::vector<ge::Operator *> &npu_inputs) { | const std::vector<ge::Operator *> &npu_inputs) { | ||||
| ge::Operator *op; | |||||
| ge::Operator *op = nullptr; | |||||
| switch (primitive_->Type()) { | switch (primitive_->Type()) { | ||||
| case PrimitiveType_Mul: | case PrimitiveType_Mul: | ||||
| op = CreateOperator<hiai::op::Mul>(npu_inputs, name_); | op = CreateOperator<hiai::op::Mul>(npu_inputs, name_); | ||||
| @@ -54,7 +54,7 @@ ge::Operator *CreateOperator(ge::Operator *input, const std::string &name) { | |||||
| int ArithmeticSelfNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *> &inputs, | int ArithmeticSelfNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *> &inputs, | ||||
| const std::vector<lite::Tensor *> &outputs, | const std::vector<lite::Tensor *> &outputs, | ||||
| const std::vector<ge::Operator *> &npu_inputs) { | const std::vector<ge::Operator *> &npu_inputs) { | ||||
| ge::Operator *op; | |||||
| ge::Operator *op = nullptr; | |||||
| switch (primitive_->Type()) { | switch (primitive_->Type()) { | ||||
| case PrimitiveType_Cos: | case PrimitiveType_Cos: | ||||
| op = CreateOperator<hiai::op::Cos>(npu_inputs[0], name_); | op = CreateOperator<hiai::op::Cos>(npu_inputs[0], name_); | ||||