| @@ -15,6 +15,7 @@ | |||||
| */ | */ | ||||
| #include "nnacl/arithmetic_common.h" | #include "nnacl/arithmetic_common.h" | ||||
| #include "nnacl/nnacl_utils.h" | |||||
| void TileOneDimension(float *inData, float *outData, int dim, size_t ndim, int *inShape, int *inStrides, | void TileOneDimension(float *inData, float *outData, int dim, size_t ndim, int *inShape, int *inStrides, | ||||
| int *outStrides, int *multiple) { | int *outStrides, int *multiple) { | ||||
| @@ -61,6 +62,8 @@ void ComputeStrides(const int *shape, int *strides, const int ndim) { | |||||
| } | } | ||||
| void CalcMultiplesAndStrides(ArithmeticParameter *param) { | void CalcMultiplesAndStrides(ArithmeticParameter *param) { | ||||
| NNACL_ASSERT(param->in_shape0_[i] != 0); | |||||
| NNACL_ASSERT(param->in_shape1_[i] != 0); | |||||
| for (size_t i = 0; i < param->ndim_; i++) { | for (size_t i = 0; i < param->ndim_; i++) { | ||||
| param->multiples0_[i] = param->out_shape_[i] / param->in_shape0_[i]; | param->multiples0_[i] = param->out_shape_[i] / param->in_shape0_[i]; | ||||
| param->multiples1_[i] = param->out_shape_[i] / param->in_shape1_[i]; | param->multiples1_[i] = param->out_shape_[i] / param->in_shape1_[i]; | ||||
| @@ -17,6 +17,7 @@ | |||||
| #include "nnacl/fp16/arithmetic_fp16.h" | #include "nnacl/fp16/arithmetic_fp16.h" | ||||
| #include <math.h> | #include <math.h> | ||||
| #include "nnacl/arithmetic_common.h" | #include "nnacl/arithmetic_common.h" | ||||
| #include "nnacl/nnacl_utils.h" | |||||
| void TileOneDimensionFp16(float16_t *inData, float16_t *outData, int dim, size_t ndim, int *inShape, int *inStrides, | void TileOneDimensionFp16(float16_t *inData, float16_t *outData, int dim, size_t ndim, int *inShape, int *inStrides, | ||||
| int *outStrides, int *multiple) { | int *outStrides, int *multiple) { | ||||
| @@ -544,6 +545,7 @@ int ElementDivFp16(float16_t *input0, float16_t *input1, float16_t *output, int | |||||
| } | } | ||||
| #endif | #endif | ||||
| for (; index < element_size; index++) { | for (; index < element_size; index++) { | ||||
| NNACL_ASSERT(input1[index] != 0); | |||||
| output[index] = input0[index] / input1[index]; | output[index] = input0[index] / input1[index]; | ||||
| } | } | ||||
| return NNACL_OK; | return NNACL_OK; | ||||
| @@ -565,6 +567,7 @@ int ElementOptDivFp16(float16_t *input0, float16_t *input1, float16_t *output, i | |||||
| } | } | ||||
| #endif | #endif | ||||
| for (; index < element_size; index++) { | for (; index < element_size; index++) { | ||||
| NNACL_ASSERT(input1[index] != 0); | |||||
| output[index] = input0[0] / input1[index]; | output[index] = input0[0] / input1[index]; | ||||
| } | } | ||||
| } else { | } else { | ||||
| @@ -601,6 +604,7 @@ int ElementDivReluFp16(float16_t *input0, float16_t *input1, float16_t *output, | |||||
| if (input1[index] == 0) { | if (input1[index] == 0) { | ||||
| return NNACL_ERRCODE_DIVISOR_ZERO; | return NNACL_ERRCODE_DIVISOR_ZERO; | ||||
| } | } | ||||
| NNACL_ASSERT(input1[index] != 0); | |||||
| float16_t res = input0[index] / input1[index]; | float16_t res = input0[index] / input1[index]; | ||||
| output[index] = res > 0 ? res : 0; | output[index] = res > 0 ? res : 0; | ||||
| } | } | ||||
| @@ -627,6 +631,7 @@ int ElementOptDivReluFp16(float16_t *input0, float16_t *input1, float16_t *outpu | |||||
| if (input1[index] == 0) { | if (input1[index] == 0) { | ||||
| return NNACL_ERRCODE_DIVISOR_ZERO; | return NNACL_ERRCODE_DIVISOR_ZERO; | ||||
| } | } | ||||
| NNACL_ASSERT(input1[index] != 0); | |||||
| output[index] = MSMAX(input0[0] / input1[index], 0); | output[index] = MSMAX(input0[0] / input1[index], 0); | ||||
| } | } | ||||
| } else { | } else { | ||||
| @@ -724,10 +729,12 @@ int ElementOptFloorModFp16(float16_t *input0, float16_t *input1, float16_t *outp | |||||
| ArithmeticParameter *param) { | ArithmeticParameter *param) { | ||||
| if (param->in_elements_num1_ == 1) { | if (param->in_elements_num1_ == 1) { | ||||
| for (int i = 0; i < element_size; ++i) { | for (int i = 0; i < element_size; ++i) { | ||||
| NNACL_ASSERT(input1[0] != 0); | |||||
| output[i] = input0[i] - floorf(input0[i] / input1[0]) * input1[0]; | output[i] = input0[i] - floorf(input0[i] / input1[0]) * input1[0]; | ||||
| } | } | ||||
| } else { | } else { | ||||
| for (int i = 0; i < element_size; ++i) { | for (int i = 0; i < element_size; ++i) { | ||||
| NNACL_ASSERT(input1[i] != 0); | |||||
| output[i] = input0[i] - floorf(input0[i] / input1[i]) * input1[i]; | output[i] = input0[i] - floorf(input0[i] / input1[i]) * input1[i]; | ||||
| } | } | ||||
| } | } | ||||
| @@ -736,6 +743,7 @@ int ElementOptFloorModFp16(float16_t *input0, float16_t *input1, float16_t *outp | |||||
| int ElementFloorDivFp16(float16_t *input0, float16_t *input1, float16_t *output, int element_size) { | int ElementFloorDivFp16(float16_t *input0, float16_t *input1, float16_t *output, int element_size) { | ||||
| for (int i = 0; i < element_size; ++i) { | for (int i = 0; i < element_size; ++i) { | ||||
| NNACL_ASSERT(input1[i] != 0); | |||||
| output[i] = floorf(input0[i] / input1[i]); | output[i] = floorf(input0[i] / input1[i]); | ||||
| } | } | ||||
| return NNACL_OK; | return NNACL_OK; | ||||
| @@ -744,10 +752,12 @@ int ElementOptFloorDivFp16(float16_t *input0, float16_t *input1, float16_t *outp | |||||
| ArithmeticParameter *param) { | ArithmeticParameter *param) { | ||||
| if (param->in_elements_num1_ == 1) { | if (param->in_elements_num1_ == 1) { | ||||
| for (int i = 0; i < element_size; ++i) { | for (int i = 0; i < element_size; ++i) { | ||||
| NNACL_ASSERT(input1[0] != 0); | |||||
| output[i] = floorf(input0[i] / input1[0]); | output[i] = floorf(input0[i] / input1[0]); | ||||
| } | } | ||||
| } else { | } else { | ||||
| for (int i = 0; i < element_size; ++i) { | for (int i = 0; i < element_size; ++i) { | ||||
| NNACL_ASSERT(input1[i] != 0); | |||||
| output[i] = floorf(input0[i] / input1[i]); | output[i] = floorf(input0[i] / input1[i]); | ||||
| } | } | ||||
| } | } | ||||
| @@ -18,6 +18,7 @@ | |||||
| #include <math.h> | #include <math.h> | ||||
| #include "nnacl/errorcode.h" | #include "nnacl/errorcode.h" | ||||
| #include "nnacl/op_base.h" | #include "nnacl/op_base.h" | ||||
| #include "nnacl/nnacl_utils.h" | |||||
| float IntersectionOverUnion(const BboxCorner *a, const BboxCorner *b) { | float IntersectionOverUnion(const BboxCorner *a, const BboxCorner *b) { | ||||
| const float area_a = (a->ymax - a->ymin) * (a->xmax - a->xmin); | const float area_a = (a->ymax - a->ymin) * (a->xmax - a->xmin); | ||||
| @@ -147,11 +148,8 @@ int DetectionPostProcessFast(const int num_boxes, const int num_classes_with_bg, | |||||
| for (int j = 0; j < max_classes_per_anchor; ++j) { | for (int j = 0; j < max_classes_per_anchor; ++j) { | ||||
| *((BboxCorner *)(output_boxes) + out_num) = *box; | *((BboxCorner *)(output_boxes) + out_num) = *box; | ||||
| output_scores[out_num] = input_scores[indexes[j]]; | output_scores[out_num] = input_scores[indexes[j]]; | ||||
| if (num_classes_with_bg != 0) { | |||||
| output_classes[out_num++] = (float)(indexes[j] % num_classes_with_bg - first_class_index); | |||||
| } else { | |||||
| return NNACL_ERRCODE_DIVISOR_ZERO; | |||||
| } | |||||
| NNACL_ASSERT(num_classes_with_bg != 0); | |||||
| output_classes[out_num++] = (float)(indexes[j] % num_classes_with_bg - first_class_index); | |||||
| } | } | ||||
| } | } | ||||
| *output_num = (float)out_num; | *output_num = (float)out_num; | ||||
| @@ -214,6 +212,7 @@ int DetectionPostProcessRegular(const int num_boxes, const int num_classes_with_ | |||||
| } | } | ||||
| for (int i = 0; i < param->max_detections_ * param->max_classes_per_detection_; ++i) { | for (int i = 0; i < param->max_detections_ * param->max_classes_per_detection_; ++i) { | ||||
| if (i < all_classes_output_num) { | if (i < all_classes_output_num) { | ||||
| NNACL_ASSERT(num_classes_with_bg != 0); | |||||
| const int box_index = all_indexes[i] / num_classes_with_bg; | const int box_index = all_indexes[i] / num_classes_with_bg; | ||||
| const int class_index = all_indexes[i] % num_classes_with_bg - first_class_index; | const int class_index = all_indexes[i] % num_classes_with_bg - first_class_index; | ||||
| *((BboxCorner *)(output_boxes) + i) = *((BboxCorner *)(decoded_boxes) + box_index); | *((BboxCorner *)(output_boxes) + i) = *((BboxCorner *)(decoded_boxes) + box_index); | ||||
| @@ -33,11 +33,11 @@ int OneHot(const int *indices, float *output, const OneHotParameter *one_hot_par | |||||
| float *output_ptr = output + i * depth * inner_size; | float *output_ptr = output + i * depth * inner_size; | ||||
| for (k = 0; k < inner_size; k++) { | for (k = 0; k < inner_size; k++) { | ||||
| int index = indices[i * inner_size + k]; | int index = indices[i * inner_size + k]; | ||||
| if (index >= depth) { | |||||
| return NNACL_ERRCODE_INDEX_OUT_OF_RANGE; | |||||
| } | |||||
| for (j = 0; j < depth; j++) { | for (j = 0; j < depth; j++) { | ||||
| *output_ptr = off_value; | *output_ptr = off_value; | ||||
| if (index >= depth) { | |||||
| return NNACL_ERRCODE_INDEX_OUT_OF_RANGE; | |||||
| } | |||||
| if (index == j) { | if (index == j) { | ||||
| *output_ptr = on_value; | *output_ptr = on_value; | ||||
| } | } | ||||
| @@ -65,12 +65,9 @@ int ROIPooling(float *in_ptr, float *out_ptr, const float *roi, float *max_c, in | |||||
| hend = MSMIN(MSMAX(hend + roi_start_h, 0), height_); | hend = MSMIN(MSMAX(hend + roi_start_h, 0), height_); | ||||
| wstart = MSMIN(MSMAX(wstart + roi_start_w, 0), width_); | wstart = MSMIN(MSMAX(wstart + roi_start_w, 0), width_); | ||||
| wend = MSMIN(MSMAX(wend + roi_start_w, 0), width_); | wend = MSMIN(MSMAX(wend + roi_start_w, 0), width_); | ||||
| bool is_empty = (hend <= hstart) || (wend <= wstart); | |||||
| for (int j = 0; j < channels_; ++j) { | for (int j = 0; j < channels_; ++j) { | ||||
| max_c[j] = -__FLT_MAX__; | |||||
| bool is_empty = (hend <= hstart) || (wend <= wstart); | |||||
| if (is_empty) { | |||||
| max_c[j] = 0; | |||||
| } | |||||
| max_c[j] = is_empty ? 0 : -__FLT_MAX__; | |||||
| } | } | ||||
| int pooled_index = i * param->out_strides_[0] + ph * param->out_strides_[1] + pw * param->out_strides_[2]; | int pooled_index = i * param->out_strides_[0] + ph * param->out_strides_[1] + pw * param->out_strides_[2]; | ||||
| int bd_index = hstart * param->in_strides_[1]; | int bd_index = hstart * param->in_strides_[1]; | ||||
| @@ -189,8 +189,6 @@ int ReduceMeanInt8(const int outer_size, const int inner_size, const int axis_si | |||||
| const int32_t *inner_src = outer_src + k; | const int32_t *inner_src = outer_src + k; | ||||
| int32_t *inner_dst = outer_dst + k; | int32_t *inner_dst = outer_dst + k; | ||||
| int32_t sum = 0; | int32_t sum = 0; | ||||
| // (x - zp_in) * scale_in = mean[(item - zp_in) * scale_in] | |||||
| // x = mean(item-zp_in) + zp_in | |||||
| for (i = 0; i < axis_size; i++) { | for (i = 0; i < axis_size; i++) { | ||||
| int32_t tmp = inner_src[i * inner_size] - quant->in_zp_; | int32_t tmp = inner_src[i * inner_size] - quant->in_zp_; | ||||
| if (isAddOverflow(sum, tmp)) { | if (isAddOverflow(sum, tmp)) { | ||||
| @@ -226,14 +224,12 @@ int ReduceMeanLastAxis(const int outer_size, const int inner_size, const int axi | |||||
| int8_t *inner_dst = outer_dst + k; | int8_t *inner_dst = outer_dst + k; | ||||
| int32_t sum = 0; | int32_t sum = 0; | ||||
| for (i = 0; i < axis_size; i++) { | for (i = 0; i < axis_size; i++) { | ||||
| // y = mean(x-zp_in) * scale + zp_out | |||||
| int32_t tmp = inner_src[i * inner_size] - quant->in_zp_; | int32_t tmp = inner_src[i * inner_size] - quant->in_zp_; | ||||
| if (isAddOverflow(tmp, sum)) { | if (isAddOverflow(tmp, sum)) { | ||||
| return NNACL_ERRCODE_ADD_OVERFLOW; | return NNACL_ERRCODE_ADD_OVERFLOW; | ||||
| } | } | ||||
| sum += tmp; | sum += tmp; | ||||
| } | } | ||||
| // sum / num | |||||
| int32_t mean = RoundingDivideByPOT( | int32_t mean = RoundingDivideByPOT( | ||||
| SaturatingRoundingDoublingHighMul(sum * (1 << (unsigned int)quant->mean_left_shift_), quant->mean_multiplier_), | SaturatingRoundingDoublingHighMul(sum * (1 << (unsigned int)quant->mean_left_shift_), quant->mean_multiplier_), | ||||
| quant->mean_right_shift_); | quant->mean_right_shift_); | ||||
| @@ -466,7 +462,6 @@ int ReduceProdLastAxis(const int outer_size, const int inner_size, const int axi | |||||
| int8_t *inner_dst = outer_dst + k; | int8_t *inner_dst = outer_dst + k; | ||||
| int32_t prod = 1; | int32_t prod = 1; | ||||
| for (i = 0; i < axis_size; i++) { | for (i = 0; i < axis_size; i++) { | ||||
| // quant_out = prod(quant_in-zp) * (scale_in^num/scale_out) + zp_out | |||||
| int32_t tmp = inner_src[i * inner_size] - quant->in_zp_; | int32_t tmp = inner_src[i * inner_size] - quant->in_zp_; | ||||
| if (isMulOverflow(prod, tmp)) { | if (isMulOverflow(prod, tmp)) { | ||||
| return NNACL_ERRCODE_MUL_OVERFLOW; | return NNACL_ERRCODE_MUL_OVERFLOW; | ||||
| @@ -541,7 +536,6 @@ int ReduceSumSquareLastAxis(const int outer_size, const int inner_size, const in | |||||
| const int32_t *inner_src = outer_src + k; | const int32_t *inner_src = outer_src + k; | ||||
| int8_t *inner_dst = outer_dst + k; | int8_t *inner_dst = outer_dst + k; | ||||
| int32_t sum = 0; | int32_t sum = 0; | ||||
| // quant_out = sum((quant_in - zp)^2) * scale_in^2 / scale_out + zp_out | |||||
| for (i = 0; i < axis_size; i++) { | for (i = 0; i < axis_size; i++) { | ||||
| int32_t tmp; | int32_t tmp; | ||||
| if (isMulOverflow(inner_src[i * inner_size] - quant->in_zp_, inner_src[i * inner_size] - quant->in_zp_)) { | if (isMulOverflow(inner_src[i * inner_size] - quant->in_zp_, inner_src[i * inner_size] - quant->in_zp_)) { | ||||
| @@ -23,7 +23,7 @@ void ReluXInt8(const int8_t *src, int length, int8_t *dst, ReluXQuantArg *arg) { | |||||
| } | } | ||||
| const int32_t input_val = src[i] - arg->input_arg.zp_; | const int32_t input_val = src[i] - arg->input_arg.zp_; | ||||
| const int32_t scaled_input = SaturatingRoundingDoublingHighMul(input_val, arg->input_multiplier_); | const int32_t scaled_input = SaturatingRoundingDoublingHighMul(input_val, arg->input_multiplier_); | ||||
| const int32_t shifted_input = RoundingDivideByPOT(scaled_input * (1 << arg->left_shift_), -arg->right_shift_); | |||||
| const int32_t shifted_input = RoundingDivideByPOT(scaled_input * (1U << arg->left_shift_), -arg->right_shift_); | |||||
| const int32_t output = shifted_input + arg->output_arg.zp_; | const int32_t output = shifted_input + arg->output_arg.zp_; | ||||
| dst[i] = (int8_t)MSMIN(output, arg->quantized_output_max); | dst[i] = (int8_t)MSMIN(output, arg->quantized_output_max); | ||||
| } | } | ||||
| @@ -183,7 +183,7 @@ void ComputeNearestNeighborInt(const int32_t pos, const int in_size, const int32 | |||||
| return; | return; | ||||
| } | } | ||||
| *nearest = (in_size * pos) / new_size; | *nearest = (in_size * pos) / new_size; | ||||
| if (align_corners) { | |||||
| if (align_corners && new_size != 1) { | |||||
| *nearest = ((in_size - 1) * pos + (new_size - 1) / 2) / (new_size - 1); | *nearest = ((in_size - 1) * pos + (new_size - 1) / 2) / (new_size - 1); | ||||
| } | } | ||||
| *nearest = *nearest < in_size ? *nearest : in_size - 1; | *nearest = *nearest < in_size ? *nearest : in_size - 1; | ||||
| @@ -166,13 +166,15 @@ FusionPattern &FusionPattern::Finish() { | |||||
| } | } | ||||
| this->outputOpId = ids.front(); | this->outputOpId = ids.front(); | ||||
| auto outputNode = GetPatternOp(this->outputOpId); | auto outputNode = GetPatternOp(this->outputOpId); | ||||
| MS_ASSERT(outputNode != nullptr); | |||||
| outputNode->isTail = true; | |||||
| if (outputNode != nullptr) { | |||||
| outputNode->isTail = true; | |||||
| } | |||||
| for (auto inputNodeId : inputNodeIds) { | for (auto inputNodeId : inputNodeIds) { | ||||
| auto inputNode = GetPatternOp(inputNodeId); | auto inputNode = GetPatternOp(inputNodeId); | ||||
| MS_ASSERT(inputNode != nullptr); | |||||
| inputNode->isHead = true; | |||||
| if (inputNode != nullptr) { | |||||
| inputNode->isHead = true; | |||||
| } | |||||
| } | } | ||||
| return *this; | return *this; | ||||
| } | } | ||||
| @@ -550,6 +550,10 @@ bool IsMultiOutputTensors(const FuncGraphPtr &graph, const AnfNodePtr &node) { | |||||
| return 0; | return 0; | ||||
| } | } | ||||
| auto output_node_list = GetRealNodeUsedList(graph, node); | auto output_node_list = GetRealNodeUsedList(graph, node); | ||||
| if (output_node_list == nullptr) { | |||||
| MS_LOG(ERROR) << "output node list is nullptr"; | |||||
| return false; | |||||
| } | |||||
| if (output_node_list->size() != 1) { | if (output_node_list->size() != 1) { | ||||
| MS_LOG(DEBUG) << "fusion node has multi output nodes"; | MS_LOG(DEBUG) << "fusion node has multi output nodes"; | ||||
| return true; | return true; | ||||
| @@ -47,23 +47,23 @@ bool NodePass::Run(const FuncGraphPtr &func_graph) { | |||||
| if (seen_node.count(node) > 0 || !manager->all_nodes().contains(node)) { | if (seen_node.count(node) > 0 || !manager->all_nodes().contains(node)) { | ||||
| continue; | continue; | ||||
| } | } | ||||
| (void) seen_node.insert(node); | |||||
| (void)seen_node.insert(node); | |||||
| AnfNodePtr new_node = Run(func_graph, node); | AnfNodePtr new_node = Run(func_graph, node); | ||||
| bool change = (new_node != nullptr); | bool change = (new_node != nullptr); | ||||
| if (new_node != nullptr && new_node != node) { | if (new_node != nullptr && new_node != node) { | ||||
| (void) manager->Replace(node, new_node); | |||||
| (void) seen_node.erase(node); | |||||
| (void)manager->Replace(node, new_node); | |||||
| (void)seen_node.erase(node); | |||||
| } else if (new_node == nullptr) { | } else if (new_node == nullptr) { | ||||
| new_node = node; | new_node = node; | ||||
| } | } | ||||
| if (new_node && IsValueNode<FuncGraph>(new_node)) { | |||||
| if (IsValueNode<FuncGraph>(new_node)) { | |||||
| auto const_func_graph = GetValueNode<FuncGraphPtr>(new_node); | auto const_func_graph = GetValueNode<FuncGraphPtr>(new_node); | ||||
| if (const_func_graph == nullptr) { | if (const_func_graph == nullptr) { | ||||
| lite::ReturnCode::GetSingleReturnCode()->UpdateReturnCode(lite::RET_NULL_PTR); | lite::ReturnCode::GetSingleReturnCode()->UpdateReturnCode(lite::RET_NULL_PTR); | ||||
| return false; | return false; | ||||
| } | } | ||||
| to_process.push_back(const_func_graph->output()); | to_process.push_back(const_func_graph->output()); | ||||
| } else if (new_node && new_node->isa<CNode>()) { | |||||
| } else if (new_node->isa<CNode>()) { | |||||
| if (IsGraphKernel(new_node)) { | if (IsGraphKernel(new_node)) { | ||||
| to_process.push_back(new_node); | to_process.push_back(new_node); | ||||
| } | } | ||||
| @@ -73,7 +73,7 @@ bool NodePass::Run(const FuncGraphPtr &func_graph) { | |||||
| return false; | return false; | ||||
| } | } | ||||
| auto inputs = cnode->inputs(); | auto inputs = cnode->inputs(); | ||||
| (void) to_process.insert(to_process.end(), inputs.begin(), inputs.end()); | |||||
| (void)to_process.insert(to_process.end(), inputs.begin(), inputs.end()); | |||||
| } | } | ||||
| changes = changes || change; | changes = changes || change; | ||||
| if (changes) { | if (changes) { | ||||