| @@ -38,17 +38,13 @@ void ArgMinMaxTopk1(const void *input, void *output, const int *shape, ArgMinMax | |||||
| int axis_count = 1; | int axis_count = 1; | ||||
| int after_axis_count = 1; | int after_axis_count = 1; | ||||
| GetCalcParameter(shape, param->dims_size_, param->axis_, &pre_axis_count, &axis_count, &after_axis_count); | GetCalcParameter(shape, param->dims_size_, param->axis_, &pre_axis_count, &axis_count, &after_axis_count); | ||||
| switch (param->data_type_) { | |||||
| case FLOAT_DATA_TYPE: { | |||||
| if (param->get_max_) { | |||||
| ArgMax(input, output, param, pre_axis_count, axis_count, after_axis_count); | |||||
| } else { | |||||
| ArgMin(input, output, param, pre_axis_count, axis_count, after_axis_count); | |||||
| } | |||||
| break; | |||||
| } | |||||
| default: | |||||
| break; | |||||
| if (param->data_type_ != FLOAT_DATA_TYPE) { | |||||
| return; | |||||
| } | |||||
| if (param->get_max_) { | |||||
| ArgMax(input, output, param, pre_axis_count, axis_count, after_axis_count); | |||||
| } else { | |||||
| ArgMin(input, output, param, pre_axis_count, axis_count, after_axis_count); | |||||
| } | } | ||||
| } | } | ||||
| @@ -35,6 +35,8 @@ void Fp16Crop(const float16_t *input, float16_t *output, int task_id, CropParame | |||||
| case 4: | case 4: | ||||
| Fp16Crop4D(input, output, task_id, para); | Fp16Crop4D(input, output, task_id, para); | ||||
| break; | break; | ||||
| default: | |||||
| break; | |||||
| } | } | ||||
| } | } | ||||
| @@ -16,16 +16,16 @@ | |||||
| #include "nnacl/fp16/matmul_fp16.h" | #include "nnacl/fp16/matmul_fp16.h" | ||||
| void ColMajor2Row8MajorFp16(void *src_ptr, float16_t *dst_ptr, size_t row, size_t col, bool src_float16) { | |||||
| void ColMajor2Row8MajorFp16(const void *src_ptr, float16_t *dst_ptr, size_t row, size_t col, bool src_float16) { | |||||
| int row_c8 = row / C8NUM * C8NUM; | int row_c8 = row / C8NUM * C8NUM; | ||||
| int col_c8 = col / C8NUM * C8NUM; | int col_c8 = col / C8NUM * C8NUM; | ||||
| int ci = 0; | int ci = 0; | ||||
| if (src_float16) { | if (src_float16) { | ||||
| float16_t *src = (float16_t *)src_ptr; | |||||
| const float16_t *src = (const float16_t *)src_ptr; | |||||
| for (; ci < col_c8; ci += C8NUM) { | for (; ci < col_c8; ci += C8NUM) { | ||||
| int ri = 0; | int ri = 0; | ||||
| for (; ri < row_c8; ri += C8NUM) { | for (; ri < row_c8; ri += C8NUM) { | ||||
| float16_t *src_ptr1 = src + ci * row + ri; | |||||
| const float16_t *src_ptr1 = src + ci * row + ri; | |||||
| float16_t *dst_ptr1 = dst_ptr + ci * row + ri * C8NUM; | float16_t *dst_ptr1 = dst_ptr + ci * row + ri * C8NUM; | ||||
| #ifdef ENABLE_ARM64 | #ifdef ENABLE_ARM64 | ||||
| size_t strid_row = row * 2; | size_t strid_row = row * 2; | ||||
| @@ -93,7 +93,7 @@ void ColMajor2Row8MajorFp16(void *src_ptr, float16_t *dst_ptr, size_t row, size_ | |||||
| #endif | #endif | ||||
| } | } | ||||
| for (; ri < row; ++ri) { | for (; ri < row; ++ri) { | ||||
| float16_t *src_ptr1 = src + ci * row; | |||||
| const float16_t *src_ptr1 = src + ci * row; | |||||
| float16_t *dst_ptr1 = dst_ptr + ci * row; | float16_t *dst_ptr1 = dst_ptr + ci * row; | ||||
| for (int tc = 0; tc < C8NUM; ++tc) { | for (int tc = 0; tc < C8NUM; ++tc) { | ||||
| dst_ptr1[ri * C8NUM + tc] = src_ptr1[tc * row + ri]; | dst_ptr1[ri * C8NUM + tc] = src_ptr1[tc * row + ri]; | ||||
| @@ -108,11 +108,11 @@ void ColMajor2Row8MajorFp16(void *src_ptr, float16_t *dst_ptr, size_t row, size_ | |||||
| } | } | ||||
| } | } | ||||
| } else { | } else { | ||||
| float *src = (float *)src_ptr; | |||||
| const float *src = (const float *)src_ptr; | |||||
| for (; ci < col_c8; ci += C8NUM) { | for (; ci < col_c8; ci += C8NUM) { | ||||
| int ri = 0; | int ri = 0; | ||||
| for (; ri < row_c8; ri += C8NUM) { | for (; ri < row_c8; ri += C8NUM) { | ||||
| float *src_ptr1 = src + ci * row + ri; | |||||
| const float *src_ptr1 = src + ci * row + ri; | |||||
| float16_t *dst_ptr1 = dst_ptr + ci * row + ri * C8NUM; | float16_t *dst_ptr1 = dst_ptr + ci * row + ri * C8NUM; | ||||
| #ifdef ENABLE_ARM64 | #ifdef ENABLE_ARM64 | ||||
| size_t strid_row = row * 4; | size_t strid_row = row * 4; | ||||
| @@ -197,7 +197,7 @@ void ColMajor2Row8MajorFp16(void *src_ptr, float16_t *dst_ptr, size_t row, size_ | |||||
| #endif | #endif | ||||
| } | } | ||||
| for (; ri < row; ++ri) { | for (; ri < row; ++ri) { | ||||
| float *src_ptr1 = src + ci * row; | |||||
| const float *src_ptr1 = src + ci * row; | |||||
| float16_t *dst_ptr1 = dst_ptr + ci * row; | float16_t *dst_ptr1 = dst_ptr + ci * row; | ||||
| for (int tc = 0; tc < C8NUM; ++tc) { | for (int tc = 0; tc < C8NUM; ++tc) { | ||||
| dst_ptr1[ri * C8NUM + tc] = (float16_t)(src_ptr1[tc * row + ri]); | dst_ptr1[ri * C8NUM + tc] = (float16_t)(src_ptr1[tc * row + ri]); | ||||
| @@ -274,18 +274,18 @@ void MatVecMulFp16(const float16_t *a, const float16_t *b, float16_t *c, const f | |||||
| MatVecMulFp16Neon64(a, b, c, bias, (int)act_type, depth, col); | MatVecMulFp16Neon64(a, b, c, bias, (int)act_type, depth, col); | ||||
| } | } | ||||
| void RowMajor2Col16MajorFp16Opt(float16_t *src_ptr, float16_t *dst_ptr, size_t row, size_t col) { | |||||
| void RowMajor2Col16MajorFp16Opt(const float16_t *src_ptr, float16_t *dst_ptr, size_t row, size_t col) { | |||||
| size_t row_up_16 = UP_ROUND(row, C16NUM); | size_t row_up_16 = UP_ROUND(row, C16NUM); | ||||
| size_t row16 = row / C16NUM * C16NUM; | size_t row16 = row / C16NUM * C16NUM; | ||||
| size_t col8 = col / C8NUM * C8NUM; | size_t col8 = col / C8NUM * C8NUM; | ||||
| float16_t *src_r = src_ptr; | |||||
| const float16_t *src_r = src_ptr; | |||||
| float16_t *dst_r = dst_ptr; | float16_t *dst_r = dst_ptr; | ||||
| size_t ri = 0; | size_t ri = 0; | ||||
| for (; ri < row16; ri += C16NUM) { | for (; ri < row16; ri += C16NUM) { | ||||
| size_t ci = 0; | size_t ci = 0; | ||||
| for (; ci < col8; ci += C8NUM) { | for (; ci < col8; ci += C8NUM) { | ||||
| float16_t *src_c = src_r + ci; | |||||
| const float16_t *src_c = src_r + ci; | |||||
| float16_t *dst_c = dst_r + ci * C16NUM; | float16_t *dst_c = dst_r + ci * C16NUM; | ||||
| #ifdef ENABLE_ARM64 | #ifdef ENABLE_ARM64 | ||||
| @@ -403,7 +403,7 @@ void RowMajor2Col16MajorFp16Opt(float16_t *src_ptr, float16_t *dst_ptr, size_t r | |||||
| #endif | #endif | ||||
| } | } | ||||
| for (; ci < col; ci++) { | for (; ci < col; ci++) { | ||||
| float16_t *src_c = src_r + ci; | |||||
| const float16_t *src_c = src_r + ci; | |||||
| float16_t *dst_c = dst_r + ci * C16NUM; | float16_t *dst_c = dst_r + ci * C16NUM; | ||||
| for (size_t i = 0; i < C16NUM; i++) { | for (size_t i = 0; i < C16NUM; i++) { | ||||
| dst_c[i] = src_c[i * col]; | dst_c[i] = src_c[i * col]; | ||||
| @@ -428,57 +428,57 @@ void RowMajor2Col16MajorFp16Opt(float16_t *src_ptr, float16_t *dst_ptr, size_t r | |||||
| return; | return; | ||||
| } | } | ||||
| void RowMajor2Col16MajorFp16(void *src, float16_t *dst, int row, int col, bool is_fp32_src) { | |||||
| void RowMajor2Col16MajorFp16(const void *src, float16_t *dst, int row, int col, bool is_fp32_src) { | |||||
| for (int r = 0; r < row; r++) { | for (int r = 0; r < row; r++) { | ||||
| for (int c = 0; c < col; c++) { | for (int c = 0; c < col; c++) { | ||||
| int r_div16 = r / 16; | int r_div16 = r / 16; | ||||
| int r_mod16 = r % 16; | int r_mod16 = r % 16; | ||||
| if (is_fp32_src) { | if (is_fp32_src) { | ||||
| dst[r_div16 * 16 * col + c * 16 + r_mod16] = (float16_t)(((float *)src)[r * col + c]); | |||||
| dst[r_div16 * 16 * col + c * 16 + r_mod16] = (float16_t)(((const float *)src)[r * col + c]); | |||||
| } else { | } else { | ||||
| dst[r_div16 * 16 * col + c * 16 + r_mod16] = ((float16_t *)src)[r * col + c]; | |||||
| dst[r_div16 * 16 * col + c * 16 + r_mod16] = ((const float16_t *)src)[r * col + c]; | |||||
| } | } | ||||
| } | } | ||||
| } | } | ||||
| } | } | ||||
| void RowMajor2Row16MajorFp16(void *src, float16_t *dst, int row, int col, bool is_fp32_src) { | |||||
| void RowMajor2Row16MajorFp16(const void *src, float16_t *dst, int row, int col, bool is_fp32_src) { | |||||
| for (int r = 0; r < row; r++) { | for (int r = 0; r < row; r++) { | ||||
| for (int c = 0; c < col; c++) { | for (int c = 0; c < col; c++) { | ||||
| int c_div16 = c / 16; | int c_div16 = c / 16; | ||||
| int c_mod16 = c % 16; | int c_mod16 = c % 16; | ||||
| if (is_fp32_src) { | if (is_fp32_src) { | ||||
| dst[c_div16 * 16 * row + r * 16 + c_mod16] = (float16_t)(((float *)src)[r * col + c]); | |||||
| dst[c_div16 * 16 * row + r * 16 + c_mod16] = (float16_t)(((const float *)src)[r * col + c]); | |||||
| } else { | } else { | ||||
| dst[c_div16 * 16 * row + r * 16 + c_mod16] = ((float16_t *)src)[r * col + c]; | |||||
| dst[c_div16 * 16 * row + r * 16 + c_mod16] = ((const float16_t *)src)[r * col + c]; | |||||
| } | } | ||||
| } | } | ||||
| } | } | ||||
| } | } | ||||
| void RowMajor2Row8MajorFp16(void *src, float16_t *dst, int row, int col, bool is_fp32_src) { | |||||
| void RowMajor2Row8MajorFp16(const void *src, float16_t *dst, int row, int col, bool is_fp32_src) { | |||||
| for (int r = 0; r < row; r++) { | for (int r = 0; r < row; r++) { | ||||
| for (int c = 0; c < col; c++) { | for (int c = 0; c < col; c++) { | ||||
| int c_div8 = c / 8; | int c_div8 = c / 8; | ||||
| int c_mod8 = c % 8; | int c_mod8 = c % 8; | ||||
| if (is_fp32_src) { | if (is_fp32_src) { | ||||
| dst[c_div8 * 8 * row + r * 8 + c_mod8] = (float16_t)(((float *)src)[r * col + c]); | |||||
| dst[c_div8 * 8 * row + r * 8 + c_mod8] = (float16_t)(((const float *)src)[r * col + c]); | |||||
| } else { | } else { | ||||
| dst[c_div8 * 8 * row + r * 8 + c_mod8] = ((float16_t *)src)[r * col + c]; | |||||
| dst[c_div8 * 8 * row + r * 8 + c_mod8] = ((const float16_t *)src)[r * col + c]; | |||||
| } | } | ||||
| } | } | ||||
| } | } | ||||
| } | } | ||||
| void RowMajor2Col8MajorFp16(void *src, float16_t *dst, int row, int col, bool is_fp32_src) { | |||||
| void RowMajor2Col8MajorFp16(const void *src, float16_t *dst, int row, int col, bool is_fp32_src) { | |||||
| for (int r = 0; r < row; r++) { | for (int r = 0; r < row; r++) { | ||||
| for (int c = 0; c < col; c++) { | for (int c = 0; c < col; c++) { | ||||
| int r_div8 = r / 8; | int r_div8 = r / 8; | ||||
| int r_mod8 = r % 8; | int r_mod8 = r % 8; | ||||
| if (is_fp32_src) { | if (is_fp32_src) { | ||||
| dst[r_div8 * 8 * col + c * 8 + r_mod8] = (float16_t)(((float *)src)[r * col + c]); | |||||
| dst[r_div8 * 8 * col + c * 8 + r_mod8] = (float16_t)(((const float *)src)[r * col + c]); | |||||
| } else { | } else { | ||||
| dst[r_div8 * 8 * col + c * 8 + r_mod8] = ((float16_t *)src)[r * col + c]; | |||||
| dst[r_div8 * 8 * col + c * 8 + r_mod8] = ((const float16_t *)src)[r * col + c]; | |||||
| } | } | ||||
| } | } | ||||
| } | } | ||||
| @@ -38,9 +38,9 @@ void MatMulFp16(const float16_t *a, const float16_t *b, float16_t *c, const floa | |||||
| void MatVecMulFp16(const float16_t *a, const float16_t *b, float16_t *c, const float16_t *bias, ActType act_type, | void MatVecMulFp16(const float16_t *a, const float16_t *b, float16_t *c, const float16_t *bias, ActType act_type, | ||||
| int depth, int col); | int depth, int col); | ||||
| void ColMajor2Row8MajorFp16(void *src_ptr, float16_t *dst_ptr, size_t row, size_t col, bool src_float16); | |||||
| void ColMajor2Row8MajorFp16(const void *src_ptr, float16_t *dst_ptr, size_t row, size_t col, bool src_float16); | |||||
| void RowMajor2Col16MajorFp16Opt(float16_t *src_ptr, float16_t *dst_ptr, size_t row, size_t col); | |||||
| void RowMajor2Col16MajorFp16Opt(const float16_t *src_ptr, float16_t *dst_ptr, size_t row, size_t col); | |||||
| void MatmulFp16Neon64(const float16_t *a, const float16_t *b, float16_t *c, const float16_t *bias, int act_type, | void MatmulFp16Neon64(const float16_t *a, const float16_t *b, float16_t *c, const float16_t *bias, int act_type, | ||||
| size_t depth, size_t row, size_t col, size_t stride, bool write_nhwc); | size_t depth, size_t row, size_t col, size_t stride, bool write_nhwc); | ||||
| @@ -51,13 +51,13 @@ void MatmulFp16Neon64Opt(const float16_t *a, const float16_t *b, float16_t *c, c | |||||
| void MatVecMulFp16Neon64(const float16_t *a, const float16_t *b, float16_t *c, const float16_t *bias, int act_type, | void MatVecMulFp16Neon64(const float16_t *a, const float16_t *b, float16_t *c, const float16_t *bias, int act_type, | ||||
| int depth, int col); | int depth, int col); | ||||
| void RowMajor2Col16MajorFp16(void *src, float16_t *dst, int row, int col, bool is_fp32_src); | |||||
| void RowMajor2Col16MajorFp16(const void *src, float16_t *dst, int row, int col, bool is_fp32_src); | |||||
| void RowMajor2Row16MajorFp16(void *src, float16_t *dst, int row, int col, bool is_fp32_src); | |||||
| void RowMajor2Row16MajorFp16(const void *src, float16_t *dst, int row, int col, bool is_fp32_src); | |||||
| void RowMajor2Row8MajorFp16(void *src, float16_t *dst, int row, int col, bool is_fp32_src); | |||||
| void RowMajor2Row8MajorFp16(const void *src, float16_t *dst, int row, int col, bool is_fp32_src); | |||||
| void RowMajor2Col8MajorFp16(void *src, float16_t *dst, int row, int col, bool is_fp32_src); | |||||
| void RowMajor2Col8MajorFp16(const void *src, float16_t *dst, int row, int col, bool is_fp32_src); | |||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| } | } | ||||
| @@ -16,6 +16,7 @@ | |||||
| #include "nnacl/fp32/arithmetic_fp32.h" | #include "nnacl/fp32/arithmetic_fp32.h" | ||||
| #include <math.h> | #include <math.h> | ||||
| #include <float.h> | |||||
| #define ACCURACY_DATA 0.00000001 | #define ACCURACY_DATA 0.00000001 | ||||
| @@ -964,7 +965,7 @@ int ElementNotEqual(const float *input0, const float *input1, float *output, con | |||||
| } | } | ||||
| #endif | #endif | ||||
| for (; index < element_size; index++) { | for (; index < element_size; index++) { | ||||
| output[index] = (float)(input0[index] != input1[index]); | |||||
| output[index] = (float)(fabsf(input0[index] - input1[index]) > FLT_EPSILON); | |||||
| } | } | ||||
| return NNACL_OK; | return NNACL_OK; | ||||
| } | } | ||||
| @@ -996,7 +997,7 @@ int ElementEqual(const float *input0, const float *input1, float *output, const | |||||
| } | } | ||||
| #endif | #endif | ||||
| for (; index < element_size; index++) { | for (; index < element_size; index++) { | ||||
| output[index] = (float)(input0[index] == input1[index]); | |||||
| output[index] = (float)(fabsf(input0[index] - input1[index]) <= FLT_EPSILON); | |||||
| } | } | ||||
| return NNACL_OK; | return NNACL_OK; | ||||
| } | } | ||||
| @@ -17,6 +17,7 @@ | |||||
| #include "nnacl/fp32/broadcast_to_fp32.h" | #include "nnacl/fp32/broadcast_to_fp32.h" | ||||
| #include <string.h> | #include <string.h> | ||||
| #include "nnacl/op_base.h" | #include "nnacl/op_base.h" | ||||
| #include "nnacl/errorcode.h" | |||||
| void PadBroadcastShapeInfo(BroadcastShapeInfo *shape_info) { | void PadBroadcastShapeInfo(BroadcastShapeInfo *shape_info) { | ||||
| if (shape_info->input_shape_size_ < DIMENSION_4D) { | if (shape_info->input_shape_size_ < DIMENSION_4D) { | ||||
| @@ -51,7 +52,7 @@ void PadBroadcastShapeInfo(BroadcastShapeInfo *shape_info) { | |||||
| int BroadcastTo(const float *input, BroadcastShapeInfo *shape_info, float *output) { | int BroadcastTo(const float *input, BroadcastShapeInfo *shape_info, float *output) { | ||||
| if (shape_info->input_shape_size_ > DIMENSION_4D || shape_info->output_shape_size_ > DIMENSION_4D) { | if (shape_info->input_shape_size_ > DIMENSION_4D || shape_info->output_shape_size_ > DIMENSION_4D) { | ||||
| return -1; | |||||
| return NNACL_ERR; | |||||
| } | } | ||||
| PadBroadcastShapeInfo(shape_info); | PadBroadcastShapeInfo(shape_info); | ||||
| size_t input_dim_offset[DIMENSION_4D - 1]; | size_t input_dim_offset[DIMENSION_4D - 1]; | ||||
| @@ -98,5 +99,5 @@ int BroadcastTo(const float *input, BroadcastShapeInfo *shape_info, float *outpu | |||||
| memcpy(out_base + output_dim_offset[0] * dim0, out_base, output_dim_offset[0]); | memcpy(out_base + output_dim_offset[0] * dim0, out_base, output_dim_offset[0]); | ||||
| } | } | ||||
| } | } | ||||
| return 0; | |||||
| return NNACL_OK; | |||||
| } | } | ||||
| @@ -116,11 +116,10 @@ void WinogradTransRight(const float *S, const float *B, float *M, size_t w, size | |||||
| } | } | ||||
| #endif | #endif | ||||
| union float32_bits { | |||||
| typedef union float32_bits { | |||||
| unsigned int u; | unsigned int u; | ||||
| float f; | float f; | ||||
| }; | |||||
| typedef union float32_bits float32_bits; | |||||
| } float32_bits; | |||||
| float ShortToFloat32(uint16_t src_value) { | float ShortToFloat32(uint16_t src_value) { | ||||
| const float32_bits magic = {113 << 23}; | const float32_bits magic = {113 << 23}; | ||||
| @@ -93,19 +93,21 @@ void InitSlidingParam(SlidingWindowParam *sliding, const ConvParameter *conv_par | |||||
| int top = 0; | int top = 0; | ||||
| int bottom = conv_param->output_h_; | int bottom = conv_param->output_h_; | ||||
| for (; left * conv_param->stride_w_ < conv_param->pad_l_; left++) { | |||||
| while (left * conv_param->stride_w_ < conv_param->pad_l_) { | |||||
| left++; | |||||
| } | } | ||||
| for (; (right - 1) * conv_param->stride_w_ - conv_param->pad_l_ + conv_param->kernel_w_ * conv_param->dilation_w_ > | |||||
| while ((right - 1) * conv_param->stride_w_ - conv_param->pad_l_ + conv_param->kernel_w_ * conv_param->dilation_w_ > | |||||
| conv_param->input_w_ && | conv_param->input_w_ && | ||||
| right > left; | |||||
| right--) { | |||||
| right > left) { | |||||
| right--; | |||||
| } | } | ||||
| for (; top * conv_param->stride_h_ < conv_param->pad_u_; top++) { | |||||
| while (top * conv_param->stride_h_ < conv_param->pad_u_) { | |||||
| top++; | |||||
| } | } | ||||
| for (; (bottom - 1) * conv_param->stride_h_ - conv_param->pad_u_ + conv_param->kernel_h_ * conv_param->dilation_h_ > | |||||
| while ((bottom - 1) * conv_param->stride_h_ - conv_param->pad_u_ + conv_param->kernel_h_ * conv_param->dilation_h_ > | |||||
| conv_param->input_h_ && | conv_param->input_h_ && | ||||
| bottom > top; | |||||
| bottom--) { | |||||
| bottom > top) { | |||||
| bottom--; | |||||
| } | } | ||||
| sliding->left_ = left; | sliding->left_ = left; | ||||
| sliding->right_ = right; | sliding->right_ = right; | ||||
| @@ -28,11 +28,10 @@ inline int Stride(const int *shape, int rank, int index) { | |||||
| int Gather(float *input, int outer_size, int inner_size, int limit, const int *indices, int indices_element_size, | int Gather(float *input, int outer_size, int inner_size, int limit, const int *indices, int indices_element_size, | ||||
| float *output) { | float *output) { | ||||
| int i, m; | |||||
| for (m = 0; m < outer_size; ++m) { | |||||
| for (int m = 0; m < outer_size; ++m) { | |||||
| float *inputm = input + inner_size * m * limit; | float *inputm = input + inner_size * m * limit; | ||||
| float *outputm = output + inner_size * m * indices_element_size; | float *outputm = output + inner_size * m * indices_element_size; | ||||
| for (i = 0; i < indices_element_size; ++i) { | |||||
| for (int i = 0; i < indices_element_size; ++i) { | |||||
| if (indices[i] < 0 || indices[i] > limit) { | if (indices[i] < 0 || indices[i] > limit) { | ||||
| return NNACL_ERR; | return NNACL_ERR; | ||||
| } | } | ||||
| @@ -44,11 +43,10 @@ int Gather(float *input, int outer_size, int inner_size, int limit, const int *i | |||||
| int GatherInt32(const int32_t *input, int outer_size, int inner_size, int limit, const int *indices, | int GatherInt32(const int32_t *input, int outer_size, int inner_size, int limit, const int *indices, | ||||
| int indices_element_size, int32_t *output) { | int indices_element_size, int32_t *output) { | ||||
| int i, m; | |||||
| for (m = 0; m < outer_size; ++m) { | |||||
| for (int m = 0; m < outer_size; ++m) { | |||||
| const int32_t *inputm = input + inner_size * m * limit; | const int32_t *inputm = input + inner_size * m * limit; | ||||
| int32_t *outputm = output + inner_size * m * indices_element_size; | int32_t *outputm = output + inner_size * m * indices_element_size; | ||||
| for (i = 0; i < indices_element_size; ++i) { | |||||
| for (int i = 0; i < indices_element_size; ++i) { | |||||
| if (indices[i] < 0 || indices[i] > limit) { | if (indices[i] < 0 || indices[i] > limit) { | ||||
| return NNACL_ERR; | return NNACL_ERR; | ||||
| } | } | ||||
| @@ -24,14 +24,13 @@ int InstanceNorm(const int outer_size, const int inner_size, const float *src_da | |||||
| if (src_data == NULL || dst_data == NULL || scale_data == NULL || bias_data == NULL) { | if (src_data == NULL || dst_data == NULL || scale_data == NULL || bias_data == NULL) { | ||||
| return NNACL_NULL_PTR; | return NNACL_NULL_PTR; | ||||
| } | } | ||||
| int i, j; | |||||
| for (j = task_id; j < outer_size; j += thread_num) { | |||||
| for (int j = task_id; j < outer_size; j += thread_num) { | |||||
| int offset = (j / param->channel_) * inner_size * param->channel_; | int offset = (j / param->channel_) * inner_size * param->channel_; | ||||
| const float *src = src_data + offset; | const float *src = src_data + offset; | ||||
| float *dst = dst_data + offset; | float *dst = dst_data + offset; | ||||
| float mean = 0.0f; | float mean = 0.0f; | ||||
| float square_mean = 0.0f; | float square_mean = 0.0f; | ||||
| for (i = 0; i < inner_size; i++) { | |||||
| for (int i = 0; i < inner_size; i++) { | |||||
| int idx = j % param->channel_ + i * param->channel_; | int idx = j % param->channel_ + i * param->channel_; | ||||
| mean += src[idx]; | mean += src[idx]; | ||||
| square_mean += src[idx] * src[idx]; | square_mean += src[idx] * src[idx]; | ||||
| @@ -39,7 +38,7 @@ int InstanceNorm(const int outer_size, const int inner_size, const float *src_da | |||||
| mean /= (float)inner_size; | mean /= (float)inner_size; | ||||
| square_mean /= (float)inner_size; | square_mean /= (float)inner_size; | ||||
| const float deno = 1 / sqrtf(square_mean - mean * mean + param->epsilon_); | const float deno = 1 / sqrtf(square_mean - mean * mean + param->epsilon_); | ||||
| for (i = 0; i < inner_size; ++i) { | |||||
| for (int i = 0; i < inner_size; ++i) { | |||||
| int idx = j % param->channel_ + i * param->channel_; | int idx = j % param->channel_ + i * param->channel_; | ||||
| int scale_idx = (j / param->channel_) * param->channel_ + j % param->channel_; | int scale_idx = (j / param->channel_) * param->channel_ + j % param->channel_; | ||||
| dst[idx] = ((src[idx] - mean) * deno) * scale_data[scale_idx] + bias_data[scale_idx]; | dst[idx] = ((src[idx] - mean) * deno) * scale_data[scale_idx] + bias_data[scale_idx]; | ||||
| @@ -27,20 +27,19 @@ int LayerNorm(const int outer_size, const int inner_size, const float *src_data, | |||||
| if (affine && (gamma_data == NULL || beta_data == NULL)) { | if (affine && (gamma_data == NULL || beta_data == NULL)) { | ||||
| return NNACL_NULL_PTR; | return NNACL_NULL_PTR; | ||||
| } | } | ||||
| int i, j; | |||||
| for (j = tid; j < outer_size; j += thread_num) { | |||||
| for (int j = tid; j < outer_size; j += thread_num) { | |||||
| const float *src = src_data + j * inner_size; | const float *src = src_data + j * inner_size; | ||||
| float *dst = dst_data + j * inner_size; | float *dst = dst_data + j * inner_size; | ||||
| float mean = 0.0f; | float mean = 0.0f; | ||||
| float square_mean = 0.0f; | float square_mean = 0.0f; | ||||
| for (i = 0; i < inner_size; i++) { | |||||
| for (int i = 0; i < inner_size; i++) { | |||||
| mean += src[i]; | mean += src[i]; | ||||
| square_mean += src[i] * src[i]; | square_mean += src[i] * src[i]; | ||||
| } | } | ||||
| mean /= (float)inner_size; | mean /= (float)inner_size; | ||||
| square_mean /= (float)inner_size; | square_mean /= (float)inner_size; | ||||
| const float deno = 1 / sqrtf(square_mean - mean * mean + epsilon); | const float deno = 1 / sqrtf(square_mean - mean * mean + epsilon); | ||||
| for (i = 0; i < inner_size; ++i) { | |||||
| for (int i = 0; i < inner_size; ++i) { | |||||
| dst[i] = (src[i] - mean) * deno; | dst[i] = (src[i] - mean) * deno; | ||||
| if (affine) { | if (affine) { | ||||
| dst[i] = dst[i] * gamma_data[i] + beta_data[i]; | dst[i] = dst[i] * gamma_data[i] + beta_data[i]; | ||||
| @@ -19,24 +19,21 @@ | |||||
| int LocalResponseNorm(float *input_ptr, int out_size, int channel, float *output_ptr, | int LocalResponseNorm(float *input_ptr, int out_size, int channel, float *output_ptr, | ||||
| LocalResponseNormParameter *param) { | LocalResponseNormParameter *param) { | ||||
| int i, j, k; | |||||
| int left, right; | |||||
| int depth_radius = param->depth_radius_; | int depth_radius = param->depth_radius_; | ||||
| float bias = param->bias_; | float bias = param->bias_; | ||||
| float alpha = param->alpha_; | float alpha = param->alpha_; | ||||
| float beta = param->beta_; | float beta = param->beta_; | ||||
| for (i = 0; i < out_size; i++) { | |||||
| for (int i = 0; i < out_size; i++) { | |||||
| float *in_data = input_ptr + i * channel; | float *in_data = input_ptr + i * channel; | ||||
| float *out_data = output_ptr + i * channel; | float *out_data = output_ptr + i * channel; | ||||
| for (j = 0; j < channel; j++) { | |||||
| left = MSMAX(0, j - depth_radius); | |||||
| right = MSMIN(channel - 1, j + depth_radius); | |||||
| for (int j = 0; j < channel; j++) { | |||||
| int left = MSMAX(0, j - depth_radius); | |||||
| int right = MSMIN(channel - 1, j + depth_radius); | |||||
| float sum = 0.0; | float sum = 0.0; | ||||
| for (k = left; k <= right; k++) { | |||||
| for (int k = left; k <= right; k++) { | |||||
| const float in_val = in_data[k]; | const float in_val = in_data[k]; | ||||
| sum += in_val * in_val; | sum += in_val * in_val; | ||||
| } | } | ||||
| @@ -148,7 +148,7 @@ void TransposeCommInt8(const int8_t *in_data, int8_t *out_data, const int *strid | |||||
| } | } | ||||
| } | } | ||||
| int DoTransposeInt8(const int8_t *in_data, int8_t *out_data, int *input_shape, const int *output_shape, | |||||
| int DoTransposeInt8(const int8_t *in_data, int8_t *out_data, const int *output_shape, | |||||
| TransposeParameter *transpose_param, int h_start, int h_end, int *dim_size, int *position) { | TransposeParameter *transpose_param, int h_start, int h_end, int *dim_size, int *position) { | ||||
| if (in_data == NULL || out_data == NULL) { | if (in_data == NULL || out_data == NULL) { | ||||
| return NNACL_NULL_PTR; | return NNACL_NULL_PTR; | ||||
| @@ -25,7 +25,7 @@ | |||||
| extern "C" { | extern "C" { | ||||
| #endif | #endif | ||||
| int DoTransposeInt8(const int8_t *in_data, int8_t *out_data, int *input_shape, const int *output_shape, | |||||
| int DoTransposeInt8(const int8_t *in_data, int8_t *out_data, const int *output_shape, | |||||
| TransposeParameter *transpose_param, int h_start, int h_end, int *dim_size, int *position); | TransposeParameter *transpose_param, int h_start, int h_end, int *dim_size, int *position); | ||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| @@ -32,6 +32,9 @@ int ThreadDivSqrtSum(const float *input_ptr, float *output_ptr, const L2NormPara | |||||
| bool is_relu = param->act_type_ == ActType_Relu; | bool is_relu = param->act_type_ == ActType_Relu; | ||||
| bool is_relu6 = param->act_type_ == ActType_Relu6; | bool is_relu6 = param->act_type_ == ActType_Relu6; | ||||
| int i; | int i; | ||||
| if (sqrt_sum == 0) { | |||||
| return NNACL_ERRCODE_DIVISOR_ZERO; | |||||
| } | |||||
| for (i = begin; i < end; i++) { | for (i = begin; i < end; i++) { | ||||
| float tmp = input_ptr[i] / sqrt_sum; | float tmp = input_ptr[i] / sqrt_sum; | ||||
| if (is_relu) { | if (is_relu) { | ||||
| @@ -23,7 +23,9 @@ void Polynomial(const float *interval, float *m, int degree) { | |||||
| for (int i = 0; i < degree; ++i) { | for (int i = 0; i < degree; ++i) { | ||||
| float mul = 1; | float mul = 1; | ||||
| for (int j = 0; j < degree; ++j) { | for (int j = 0; j < degree; ++j) { | ||||
| if (i == j) continue; | |||||
| if (i == j) { | |||||
| continue; | |||||
| } | |||||
| mul *= (interval[i] - interval[j]); | mul *= (interval[i] - interval[j]); | ||||
| } | } | ||||
| m[i] = mul; | m[i] = mul; | ||||
| @@ -35,7 +37,9 @@ void DiagonalPlusMatrix(const float *matrix, float *diagonal_matrix, int degree) | |||||
| memset(diagonal_matrix, 0, data_num * sizeof(float)); | memset(diagonal_matrix, 0, data_num * sizeof(float)); | ||||
| for (int i = 0; i < degree; ++i) { | for (int i = 0; i < degree; ++i) { | ||||
| for (int j = 0; j < degree; ++j) { | for (int j = 0; j < degree; ++j) { | ||||
| if (j == i) diagonal_matrix[i * (degree + 1) + j] = matrix[i]; | |||||
| if (j == i) { | |||||
| diagonal_matrix[i * (degree + 1) + j] = matrix[i]; | |||||
| } | |||||
| } | } | ||||
| } | } | ||||
| diagonal_matrix[data_num - 1] = 1; | diagonal_matrix[data_num - 1] = 1; | ||||
| @@ -207,7 +211,10 @@ int CookToomFilter(float *matrix_a, float *matrix_at, float *matrix_b, float *ma | |||||
| MatrixTranspose(matrix_a, matrix_at, in_unit, out_unit); | MatrixTranspose(matrix_a, matrix_at, in_unit, out_unit); | ||||
| // get matrix B | // get matrix B | ||||
| B(interval, matrix_bt, in_unit); | |||||
| int ret = B(interval, matrix_bt, in_unit); | |||||
| if (ret != NNACL_OK) { | |||||
| return ret; | |||||
| } | |||||
| MatrixTranspose(matrix_bt, matrix_b, in_unit, in_unit); | MatrixTranspose(matrix_bt, matrix_b, in_unit, in_unit); | ||||
| MatrixMultiply(diagonal_matrix, matrix_b, matrix_bt, in_unit, in_unit, in_unit); | MatrixMultiply(diagonal_matrix, matrix_b, matrix_bt, in_unit, in_unit, in_unit); | ||||
| MatrixTranspose(matrix_bt, matrix_b, in_unit, in_unit); | MatrixTranspose(matrix_bt, matrix_b, in_unit, in_unit); | ||||
| @@ -17,4 +17,4 @@ | |||||
| #include <stdio.h> | #include <stdio.h> | ||||
| #include <string.h> | #include <string.h> | ||||
| void ApproximateZerosLike(float *input, float *output, int number) { memset(output, 0.0, number * sizeof(float)); } | |||||
| void ApproximateZerosLike(float *output, int number) { memset(output, 0.0, number * sizeof(float)); } | |||||
| @@ -21,7 +21,7 @@ | |||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| extern "C" { | extern "C" { | ||||
| #endif | #endif | ||||
| void ApproximateZerosLike(float *input, float *output, int number); | |||||
| void ApproximateZerosLike(float *output, int number); | |||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| } | } | ||||
| #endif | #endif | ||||
| @@ -79,7 +79,7 @@ Registry ApplyMomentumRegistry(schema::PrimitiveType_ApplyMomentum, ApplyMomentu | |||||
| #endif | #endif | ||||
| int ApplyMomentum::InferShape(std::vector<lite::Tensor *> inputs, std::vector<lite::Tensor *> outputs) { | int ApplyMomentum::InferShape(std::vector<lite::Tensor *> inputs, std::vector<lite::Tensor *> outputs) { | ||||
| if (5 != inputs.size()) { | |||||
| if (inputs.size() != 5) { | |||||
| MS_LOG(ERROR) << "ApplyMomentum should have at least 5 input tensors"; | MS_LOG(ERROR) << "ApplyMomentum should have at least 5 input tensors"; | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| @@ -54,10 +54,6 @@ int BiasAdd::UnPackAttr(const Primitive &prim, const std::vector<AnfNodePtr> &in | |||||
| attr->axis = CastToInt(prim.GetAttr("axis"), true); | attr->axis = CastToInt(prim.GetAttr("axis"), true); | ||||
| } | } | ||||
| this->primitive_->value.value = attr; | this->primitive_->value.value = attr; | ||||
| if (this->primitive_->value.value == nullptr) { | |||||
| MS_LOG(ERROR) << "primitive value is nullptr"; | |||||
| return RET_ERROR; | |||||
| } | |||||
| } | } | ||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| @@ -52,10 +52,6 @@ int BiasGrad::UnPackAttr(const Primitive &prim, const std::vector<AnfNodePtr> &i | |||||
| attr->axis = CastToInt(prim.GetAttr("axis"), true); | attr->axis = CastToInt(prim.GetAttr("axis"), true); | ||||
| } | } | ||||
| this->primitive_->value.value = attr; | this->primitive_->value.value = attr; | ||||
| if (this->primitive_->value.value == nullptr) { | |||||
| MS_LOG(ERROR) << "primitive value is nullptr"; | |||||
| return RET_ERROR; | |||||
| } | |||||
| } | } | ||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| @@ -91,11 +87,11 @@ Registry BiasGradRegistry(schema::PrimitiveType_BiasGrad, BiasGradCreator); | |||||
| #endif | #endif | ||||
| int BiasGrad::InferShape(std::vector<Tensor *> inputs, std::vector<Tensor *> outputs) { | int BiasGrad::InferShape(std::vector<Tensor *> inputs, std::vector<Tensor *> outputs) { | ||||
| if (1 != inputs.size()) { | |||||
| if (inputs.size() != 1) { | |||||
| MS_LOG(ERROR) << "BiasGrad should have one input"; | MS_LOG(ERROR) << "BiasGrad should have one input"; | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| if (1 != outputs.size()) { | |||||
| if (outputs.size() != 1) { | |||||
| MS_LOG(ERROR) << "BiasGrad should have one output"; | MS_LOG(ERROR) << "BiasGrad should have one output"; | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| @@ -56,10 +56,6 @@ int BNGrad::UnPackAttr(const Primitive &prim, const std::vector<AnfNodePtr> &inp | |||||
| attr->eps = GetValue<float>(prim.GetAttr("epsilon")); | attr->eps = GetValue<float>(prim.GetAttr("epsilon")); | ||||
| } | } | ||||
| this->primitive_->value.value = attr; | this->primitive_->value.value = attr; | ||||
| if (this->primitive_->value.value == nullptr) { | |||||
| MS_LOG(ERROR) << "primitive value is nullptr"; | |||||
| return RET_ERROR; | |||||
| } | |||||
| } | } | ||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| @@ -85,11 +81,11 @@ float BNGrad::GetEps() const { return this->primitive_->value_as_BNGrad()->eps() | |||||
| float BNGrad::GetMomentum() const { return this->primitive_->value_as_BNGrad()->momentum(); } | float BNGrad::GetMomentum() const { return this->primitive_->value_as_BNGrad()->momentum(); } | ||||
| #endif | #endif | ||||
| int BNGrad::InferShape(std::vector<lite::Tensor *> inputs, std::vector<lite::Tensor *> outputs) { | int BNGrad::InferShape(std::vector<lite::Tensor *> inputs, std::vector<lite::Tensor *> outputs) { | ||||
| if (6 != inputs.size()) { | |||||
| if (inputs.size() != 6) { | |||||
| MS_LOG(ERROR) << "BNGrad should have five inputs"; | MS_LOG(ERROR) << "BNGrad should have five inputs"; | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| if (3 != outputs.size()) { | |||||
| if (outputs.size() != 3) { | |||||
| MS_LOG(ERROR) << "BNGrad should have three outputs"; | MS_LOG(ERROR) << "BNGrad should have three outputs"; | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| @@ -53,10 +53,6 @@ int Cast::UnPackAttr(const Primitive &prim, const std::vector<AnfNodePtr> &input | |||||
| attr->srcT = srcAnf->number_type(); | attr->srcT = srcAnf->number_type(); | ||||
| attr->dstT = dstAnf->number_type(); | attr->dstT = dstAnf->number_type(); | ||||
| this->primitive_->value.value = attr; | this->primitive_->value.value = attr; | ||||
| if (this->primitive_->value.value == nullptr) { | |||||
| MS_LOG(ERROR) << "primitive value is nullptr"; | |||||
| return RET_ERROR; | |||||
| } | |||||
| } | } | ||||
| return RET_OK; | return RET_OK; | ||||
| @@ -131,6 +131,10 @@ void ConvertConvWeight(const ParameterPtr ¶m_node) { | |||||
| void Conv2D::PopulaterConv2DMultiGroup(const Primitive &prim, schema::PrimitiveT *primitive, const int &group, | void Conv2D::PopulaterConv2DMultiGroup(const Primitive &prim, schema::PrimitiveT *primitive, const int &group, | ||||
| const std::vector<AnfNodePtr> &inputs) { | const std::vector<AnfNodePtr> &inputs) { | ||||
| auto attr = std::make_unique<schema::DepthwiseConv2DT>(); | auto attr = std::make_unique<schema::DepthwiseConv2DT>(); | ||||
| if (attr.get() == nullptr) { | |||||
| MS_LOG(ERROR) << "Memory allocation failed"; | |||||
| return; | |||||
| } | |||||
| auto format = GetValue<std::string>(prim.GetAttr("data_format")); | auto format = GetValue<std::string>(prim.GetAttr("data_format")); | ||||
| if (format == "NCHW") { | if (format == "NCHW") { | ||||
| attr->format = schema::Format::Format_NCHW; | attr->format = schema::Format::Format_NCHW; | ||||
| @@ -203,6 +207,10 @@ void Conv2D::PopulaterConv2DMultiGroup(const Primitive &prim, schema::PrimitiveT | |||||
| void Conv2D::PopulaterConv2DSingleGroup(const Primitive &prim, schema::PrimitiveT *primitive, const int &group) { | void Conv2D::PopulaterConv2DSingleGroup(const Primitive &prim, schema::PrimitiveT *primitive, const int &group) { | ||||
| auto attr = std::make_unique<schema::Conv2DT>(); | auto attr = std::make_unique<schema::Conv2DT>(); | ||||
| if (attr.get() == nullptr) { | |||||
| MS_LOG(ERROR) << "Memory allocation failed"; | |||||
| return; | |||||
| } | |||||
| attr->group = group; | attr->group = group; | ||||
| auto format = GetValue<std::string>(prim.GetAttr("data_format")); | auto format = GetValue<std::string>(prim.GetAttr("data_format")); | ||||
| if (format == "NCHW") { | if (format == "NCHW") { | ||||
| @@ -124,6 +124,10 @@ void ConvertConvWeight(const ParameterPtr ¶m_node) { | |||||
| void DeConv2D::PopulaterConv2DMultiGroup(const Primitive &prim, schema::PrimitiveT *primitive, const int &group, | void DeConv2D::PopulaterConv2DMultiGroup(const Primitive &prim, schema::PrimitiveT *primitive, const int &group, | ||||
| const std::vector<AnfNodePtr> &inputs) { | const std::vector<AnfNodePtr> &inputs) { | ||||
| auto attr = std::make_unique<schema::DeDepthwiseConv2DT>(); | auto attr = std::make_unique<schema::DeDepthwiseConv2DT>(); | ||||
| if (attr.get() == nullptr) { | |||||
| MS_LOG(ERROR) << "Memory allocation failed"; | |||||
| return; | |||||
| } | |||||
| auto format = GetValue<std::string>(prim.GetAttr("data_format")); | auto format = GetValue<std::string>(prim.GetAttr("data_format")); | ||||
| if (format == "NCHW") { | if (format == "NCHW") { | ||||
| attr->format = schema::Format::Format_NCHW; | attr->format = schema::Format::Format_NCHW; | ||||
| @@ -186,6 +190,10 @@ void DeConv2D::PopulaterConv2DMultiGroup(const Primitive &prim, schema::Primitiv | |||||
| void DeConv2D::PopulaterDeConv2DSingleGroup(const Primitive &prim, schema::PrimitiveT *primitive, const int &group) { | void DeConv2D::PopulaterDeConv2DSingleGroup(const Primitive &prim, schema::PrimitiveT *primitive, const int &group) { | ||||
| auto attr = std::make_unique<schema::DeConv2DT>(); | auto attr = std::make_unique<schema::DeConv2DT>(); | ||||
| if (attr.get() == nullptr) { | |||||
| MS_LOG(ERROR) << "Memory allocation failed"; | |||||
| return; | |||||
| } | |||||
| attr->group = group; | attr->group = group; | ||||
| auto format = GetValue<std::string>(prim.GetAttr("data_format")); | auto format = GetValue<std::string>(prim.GetAttr("data_format")); | ||||
| if (format == "NCHW") { | if (format == "NCHW") { | ||||
| @@ -45,7 +45,7 @@ int DeDepthwiseConv2D::GetActivationType() const { | |||||
| } | } | ||||
| void DeDepthwiseConv2D::SetFormat(int format) { | void DeDepthwiseConv2D::SetFormat(int format) { | ||||
| this->primitive_->value.AsDeDepthwiseConv2D()->format = (schema::Format)format; | |||||
| this->primitive_->value.AsDeDepthwiseConv2D()->format = static_cast<schema::Format>(format); | |||||
| } | } | ||||
| void DeDepthwiseConv2D::SetChannelIn(int channel_in) { | void DeDepthwiseConv2D::SetChannelIn(int channel_in) { | ||||
| this->primitive_->value.AsDeDepthwiseConv2D()->channelIn = channel_in; | this->primitive_->value.AsDeDepthwiseConv2D()->channelIn = channel_in; | ||||
| @@ -58,7 +58,7 @@ void DeDepthwiseConv2D::SetKernelH(int kernel_h) { this->primitive_->value.AsDeD | |||||
| void DeDepthwiseConv2D::SetStrideW(int stride_w) { this->primitive_->value.AsDeDepthwiseConv2D()->strideW = stride_w; } | void DeDepthwiseConv2D::SetStrideW(int stride_w) { this->primitive_->value.AsDeDepthwiseConv2D()->strideW = stride_w; } | ||||
| void DeDepthwiseConv2D::SetStrideH(int stride_h) { this->primitive_->value.AsDeDepthwiseConv2D()->strideH = stride_h; } | void DeDepthwiseConv2D::SetStrideH(int stride_h) { this->primitive_->value.AsDeDepthwiseConv2D()->strideH = stride_h; } | ||||
| void DeDepthwiseConv2D::SetPadMode(int pad_mode) { | void DeDepthwiseConv2D::SetPadMode(int pad_mode) { | ||||
| this->primitive_->value.AsDeDepthwiseConv2D()->padMode = (schema::PadMode)pad_mode; | |||||
| this->primitive_->value.AsDeDepthwiseConv2D()->padMode = static_cast<schema::PadMode>(pad_mode); | |||||
| } | } | ||||
| void DeDepthwiseConv2D::SetPadUp(int pad_up) { this->primitive_->value.AsDeDepthwiseConv2D()->padUp = pad_up; } | void DeDepthwiseConv2D::SetPadUp(int pad_up) { this->primitive_->value.AsDeDepthwiseConv2D()->padUp = pad_up; } | ||||
| void DeDepthwiseConv2D::SetPadDown(int pad_down) { this->primitive_->value.AsDeDepthwiseConv2D()->padDown = pad_down; } | void DeDepthwiseConv2D::SetPadDown(int pad_down) { this->primitive_->value.AsDeDepthwiseConv2D()->padDown = pad_down; } | ||||
| @@ -70,7 +70,7 @@ void DeDepthwiseConv2D::SetDilateW(int dilate_w) { this->primitive_->value.AsDeD | |||||
| void DeDepthwiseConv2D::SetDilateH(int dilate_h) { this->primitive_->value.AsDeDepthwiseConv2D()->dilateH = dilate_h; } | void DeDepthwiseConv2D::SetDilateH(int dilate_h) { this->primitive_->value.AsDeDepthwiseConv2D()->dilateH = dilate_h; } | ||||
| void DeDepthwiseConv2D::SetHasBias(bool has_bias) { this->primitive_->value.AsDeDepthwiseConv2D()->hasBias = has_bias; } | void DeDepthwiseConv2D::SetHasBias(bool has_bias) { this->primitive_->value.AsDeDepthwiseConv2D()->hasBias = has_bias; } | ||||
| void DeDepthwiseConv2D::SetActivationType(int activation_type) { | void DeDepthwiseConv2D::SetActivationType(int activation_type) { | ||||
| this->primitive_->value.AsDeDepthwiseConv2D()->activationType = (schema::ActivationType)activation_type; | |||||
| this->primitive_->value.AsDeDepthwiseConv2D()->activationType = static_cast<schema::ActivationType>(activation_type); | |||||
| } | } | ||||
| #else | #else | ||||
| @@ -48,7 +48,7 @@ bool DepthwiseConv2D::GetHasBias() const { return this->primitive_->value.AsDept | |||||
| int DepthwiseConv2D::GetActivationType() const { return this->primitive_->value.AsDepthwiseConv2D()->activationType; } | int DepthwiseConv2D::GetActivationType() const { return this->primitive_->value.AsDepthwiseConv2D()->activationType; } | ||||
| void DepthwiseConv2D::SetFormat(int format) { | void DepthwiseConv2D::SetFormat(int format) { | ||||
| this->primitive_->value.AsDepthwiseConv2D()->format = (schema::Format)format; | |||||
| this->primitive_->value.AsDepthwiseConv2D()->format = static_cast<schema::Format>(format); | |||||
| } | } | ||||
| void DepthwiseConv2D::SetChannelIn(int channel_in) { | void DepthwiseConv2D::SetChannelIn(int channel_in) { | ||||
| this->primitive_->value.AsDepthwiseConv2D()->channelIn = channel_in; | this->primitive_->value.AsDepthwiseConv2D()->channelIn = channel_in; | ||||
| @@ -61,7 +61,7 @@ void DepthwiseConv2D::SetKernelH(int kernel_h) { this->primitive_->value.AsDepth | |||||
| void DepthwiseConv2D::SetStrideW(int stride_w) { this->primitive_->value.AsDepthwiseConv2D()->strideW = stride_w; } | void DepthwiseConv2D::SetStrideW(int stride_w) { this->primitive_->value.AsDepthwiseConv2D()->strideW = stride_w; } | ||||
| void DepthwiseConv2D::SetStrideH(int stride_h) { this->primitive_->value.AsDepthwiseConv2D()->strideH = stride_h; } | void DepthwiseConv2D::SetStrideH(int stride_h) { this->primitive_->value.AsDepthwiseConv2D()->strideH = stride_h; } | ||||
| void DepthwiseConv2D::SetPadMode(int pad_mode) { | void DepthwiseConv2D::SetPadMode(int pad_mode) { | ||||
| this->primitive_->value.AsDepthwiseConv2D()->padMode = (schema::PadMode)pad_mode; | |||||
| this->primitive_->value.AsDepthwiseConv2D()->padMode = static_cast<schema::PadMode>(pad_mode); | |||||
| } | } | ||||
| void DepthwiseConv2D::SetPadUp(int pad_up) { this->primitive_->value.AsDepthwiseConv2D()->padUp = pad_up; } | void DepthwiseConv2D::SetPadUp(int pad_up) { this->primitive_->value.AsDepthwiseConv2D()->padUp = pad_up; } | ||||
| void DepthwiseConv2D::SetPadDown(int pad_down) { this->primitive_->value.AsDepthwiseConv2D()->padDown = pad_down; } | void DepthwiseConv2D::SetPadDown(int pad_down) { this->primitive_->value.AsDepthwiseConv2D()->padDown = pad_down; } | ||||
| @@ -71,7 +71,7 @@ void DepthwiseConv2D::SetDilateW(int dilate_w) { this->primitive_->value.AsDepth | |||||
| void DepthwiseConv2D::SetDilateH(int dilate_h) { this->primitive_->value.AsDepthwiseConv2D()->dilateH = dilate_h; } | void DepthwiseConv2D::SetDilateH(int dilate_h) { this->primitive_->value.AsDepthwiseConv2D()->dilateH = dilate_h; } | ||||
| void DepthwiseConv2D::SetHasBias(bool has_bias) { this->primitive_->value.AsDepthwiseConv2D()->hasBias = has_bias; } | void DepthwiseConv2D::SetHasBias(bool has_bias) { this->primitive_->value.AsDepthwiseConv2D()->hasBias = has_bias; } | ||||
| void DepthwiseConv2D::SetActivationType(int activation_type) { | void DepthwiseConv2D::SetActivationType(int activation_type) { | ||||
| this->primitive_->value.AsDepthwiseConv2D()->activationType = (schema::ActivationType)activation_type; | |||||
| this->primitive_->value.AsDepthwiseConv2D()->activationType = static_cast<schema::ActivationType>(activation_type); | |||||
| } | } | ||||
| int DepthwiseConv2D::UnPackAttr(const Primitive &prim, const std::vector<AnfNodePtr> &inputs) { | int DepthwiseConv2D::UnPackAttr(const Primitive &prim, const std::vector<AnfNodePtr> &inputs) { | ||||
| @@ -40,10 +40,6 @@ int Dequant::UnPackAttr(const Primitive &prim, const std::vector<AnfNodePtr> &in | |||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| this->primitive_->value.value = attr; | this->primitive_->value.value = attr; | ||||
| if (this->primitive_->value.value == nullptr) { | |||||
| MS_LOG(ERROR) << "primitive value is nullptr"; | |||||
| return RET_ERROR; | |||||
| } | |||||
| } | } | ||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| @@ -32,7 +32,7 @@ void FullConnection::SetHasBias(bool has_bias) { this->primitive_->value.AsFullC | |||||
| void FullConnection::SetAxis(int axis) { this->primitive_->value.AsFullConnection()->axis = axis; } | void FullConnection::SetAxis(int axis) { this->primitive_->value.AsFullConnection()->axis = axis; } | ||||
| void FullConnection::SetUseAxis(bool use_axis) { this->primitive_->value.AsFullConnection()->useAxis = use_axis; } | void FullConnection::SetUseAxis(bool use_axis) { this->primitive_->value.AsFullConnection()->useAxis = use_axis; } | ||||
| void FullConnection::SetActivationType(int activationType) { | void FullConnection::SetActivationType(int activationType) { | ||||
| this->primitive_->value.AsFullConnection()->activationType = (schema::ActivationType)activationType; | |||||
| this->primitive_->value.AsFullConnection()->activationType = static_cast<schema::ActivationType>(activationType); | |||||
| } | } | ||||
| #else | #else | ||||
| int FullConnection::UnPackToFlatBuilder(const schema::Primitive *primitive, flatbuffers::FlatBufferBuilder *fbb) { | int FullConnection::UnPackToFlatBuilder(const schema::Primitive *primitive, flatbuffers::FlatBufferBuilder *fbb) { | ||||
| @@ -53,10 +53,6 @@ int FusedBatchNorm::UnPackAttr(const Primitive &prim, const std::vector<AnfNodeP | |||||
| attr->epsilon = GetValue<float>(prim.GetAttr("epsilon")); | attr->epsilon = GetValue<float>(prim.GetAttr("epsilon")); | ||||
| attr->momentum = GetValue<float>(prim.GetAttr("momentum")); | attr->momentum = GetValue<float>(prim.GetAttr("momentum")); | ||||
| this->primitive_->value.value = attr; | this->primitive_->value.value = attr; | ||||
| if (this->primitive_->value.value == nullptr) { | |||||
| MS_LOG(ERROR) << "new primitiveT value failed"; | |||||
| return RET_ERROR; | |||||
| } | |||||
| } | } | ||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| @@ -88,7 +84,9 @@ Registry FusedBatchNormRegistry(schema::PrimitiveType_FusedBatchNorm, FusedBatch | |||||
| int FusedBatchNorm::InferShape(std::vector<lite::Tensor *> inputs_, std::vector<lite::Tensor *> outputs_) { | int FusedBatchNorm::InferShape(std::vector<lite::Tensor *> inputs_, std::vector<lite::Tensor *> outputs_) { | ||||
| for (size_t i = 0; i < inputs_.size(); i++) { | for (size_t i = 0; i < inputs_.size(); i++) { | ||||
| if (outputs_.size() <= i) break; | |||||
| if (outputs_.size() <= i) { | |||||
| break; | |||||
| } | |||||
| outputs_.at(i)->set_shape(inputs_.at(i)->shape()); | outputs_.at(i)->set_shape(inputs_.at(i)->shape()); | ||||
| outputs_.at(i)->set_data_type(inputs_.at(i)->data_type()); | outputs_.at(i)->set_data_type(inputs_.at(i)->data_type()); | ||||
| outputs_.at(i)->set_format(inputs_.at(i)->format()); | outputs_.at(i)->set_format(inputs_.at(i)->format()); | ||||
| @@ -64,8 +64,7 @@ class PrimitiveC : public mindspore::Primitive { | |||||
| // Argument primitive is deliverd into PrimitiveC and will be deleted in ~PrimitiveC(). | // Argument primitive is deliverd into PrimitiveC and will be deleted in ~PrimitiveC(). | ||||
| // Caller should not delete primitive. | // Caller should not delete primitive. | ||||
| explicit PrimitiveC(const std::string &name, schema::PrimitiveT *primitive) | |||||
| : Primitive(name), primitive_(primitive) {} | |||||
| PrimitiveC(const std::string &name, schema::PrimitiveT *primitive) : Primitive(name), primitive_(primitive) {} | |||||
| PrimitiveC() : Primitive(""), primitive_(nullptr) {} | PrimitiveC() : Primitive(""), primitive_(nullptr) {} | ||||
| @@ -175,7 +174,7 @@ class PrimitiveC { | |||||
| template <typename T, typename = std::enable_if<std::is_base_of<PrimitiveC, T>::value>> | template <typename T, typename = std::enable_if<std::is_base_of<PrimitiveC, T>::value>> | ||||
| static PrimitiveC *NewPrimitiveC(const schema::Primitive *primitive) { | static PrimitiveC *NewPrimitiveC(const schema::Primitive *primitive) { | ||||
| auto primc = new T(); | |||||
| auto primc = new (std::nothrow) T(); | |||||
| if (primc == nullptr) { | if (primc == nullptr) { | ||||
| MS_LOG(ERROR) << "new PrimitiveC failed"; | MS_LOG(ERROR) << "new PrimitiveC failed"; | ||||
| return nullptr; | return nullptr; | ||||
| @@ -30,10 +30,8 @@ namespace mindspore::kernel { | |||||
| int ZerosLikeCPUKernel::Init() { return RET_OK; } | int ZerosLikeCPUKernel::Init() { return RET_OK; } | ||||
| int ZerosLikeCPUKernel::Run() { | int ZerosLikeCPUKernel::Run() { | ||||
| auto input = in_tensors_.at(0); | |||||
| auto input_data = reinterpret_cast<float *>(input->MutableData()); | |||||
| auto output_data = reinterpret_cast<float *>(out_tensors_.at(0)->MutableData()); | auto output_data = reinterpret_cast<float *>(out_tensors_.at(0)->MutableData()); | ||||
| ApproximateZerosLike(input_data, output_data, input->ElementsNum()); | |||||
| ApproximateZerosLike(output_data, in_tensors_.at(0)->ElementsNum()); | |||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| @@ -118,7 +118,7 @@ int TransposeInt8CPUKernel::DoTranspose(int task_id) { | |||||
| position = position_ + task_id * transpose_param_->num_axes_; | position = position_ + task_id * transpose_param_->num_axes_; | ||||
| } | } | ||||
| auto ret = DoTransposeInt8(in_ptr_, out_ptr_, in_shape_, out_shape_, transpose_param_, thread_offset, | |||||
| auto ret = DoTransposeInt8(in_ptr_, out_ptr_, out_shape_, transpose_param_, thread_offset, | |||||
| thread_offset + num_unit_thread, dim_size, position); | thread_offset + num_unit_thread, dim_size, position); | ||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "Transpose error task_id[" << task_id << "] error_code[" << ret << "]"; | MS_LOG(ERROR) << "Transpose error task_id[" << task_id << "] error_code[" << ret << "]"; | ||||
| @@ -59,6 +59,7 @@ STATUS ReadProtoFromText(const char *file, google::protobuf::Message *message) { | |||||
| bool status = google::protobuf::TextFormat::Parse(&input, message); | bool status = google::protobuf::TextFormat::Parse(&input, message); | ||||
| if (!status) { | if (!status) { | ||||
| MS_LOG(ERROR) << "call [google::protobuf::TextFormat::Parse] func status fail, please check your text file."; | MS_LOG(ERROR) << "call [google::protobuf::TextFormat::Parse] func status fail, please check your text file."; | ||||
| fs.close(); | |||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| @@ -74,7 +74,7 @@ STATUS MatMulBiasAddFusionPass::DoFusion(MetaGraphT *graph, const std::string &p | |||||
| // biasadd node the second tensor is not constant tensor, don't fusion | // biasadd node the second tensor is not constant tensor, don't fusion | ||||
| auto baNodeInputIndex = baNode->inputIndex; | auto baNodeInputIndex = baNode->inputIndex; | ||||
| if (baNodeInputIndex.size() != BIASADD_OP_INPUT_NUM) { | if (baNodeInputIndex.size() != BIASADD_OP_INPUT_NUM) { | ||||
| MS_LOG(ERROR) << "%s node tensors number is invalid! "; // baNode->name.c_str()); | |||||
| MS_LOG(ERROR) << "input num is invalid! node: " << baNode->name.c_str(); | |||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| MS_ASSERT(graph->allTensors.size() > baNodeInputIndex.at(BIASADD_OP_BIAS_INDEX)); | MS_ASSERT(graph->allTensors.size() > baNodeInputIndex.at(BIASADD_OP_BIAS_INDEX)); | ||||
| @@ -88,7 +88,7 @@ STATUS MatMulBiasAddFusionPass::DoFusion(MetaGraphT *graph, const std::string &p | |||||
| // 1. add biasTensor for matMul | // 1. add biasTensor for matMul | ||||
| auto status = AddFullConnectionBiasTensor(matMulPath, baPath, graph); | auto status = AddFullConnectionBiasTensor(matMulPath, baPath, graph); | ||||
| if (RET_OK != status) { | if (RET_OK != status) { | ||||
| MS_LOG(ERROR) << "AddFullConnectionBiasTensor failed, %d"; // status); | |||||
| MS_LOG(ERROR) << "AddFullConnectionBiasTensor failed, ret: " << status; | |||||
| return status; | return status; | ||||
| } | } | ||||
| @@ -113,17 +113,16 @@ STATUS MatMulBiasAddFusionPass::DoFusion(MetaGraphT *graph, const std::string &p | |||||
| MergeNodeAttrFromPost(matMulNode, baNode); | MergeNodeAttrFromPost(matMulNode, baNode); | ||||
| status = IsolateOneWayNode(graph, baPath->nodeIdx); | status = IsolateOneWayNode(graph, baPath->nodeIdx); | ||||
| if (status != RET_OK) { | if (status != RET_OK) { | ||||
| MS_LOG(ERROR) << "IsolateOneWayNode failed, subGraph: %zu, node: %zu, error: %d"; | |||||
| // baPath->subGraphIdx, baPath->nodeIdx, status); | |||||
| MS_LOG(ERROR) << "IsolateOneWayNode failed, subGraph: " << baPath->subGraphIdx << ", node: " << baPath->nodeIdx | |||||
| << ", ret: " << status; | |||||
| return status; | return status; | ||||
| } | } | ||||
| // 4. addTranspose node | // 4. addTranspose node | ||||
| status = InsertTransposeNode(graph, matMulPath); | status = InsertTransposeNode(graph, matMulPath); | ||||
| if (status != RET_OK) { | if (status != RET_OK) { | ||||
| MS_LOG(ERROR) | |||||
| << "InsertTransposeNode failed, subGraph: %zu, node: %zu, error: %d"; // matMulPath->subGraphIdx, | |||||
| // matMulPath->nodeIdx, status); | |||||
| MS_LOG(ERROR) << "InsertTransposeNode failed, subGraph: " << matMulPath->subGraphIdx | |||||
| << ", node: " << matMulPath->nodeIdx << ", ret: " << status; | |||||
| return status; | return status; | ||||
| } | } | ||||
| return RET_OK; | return RET_OK; | ||||
| @@ -162,7 +161,7 @@ STATUS MatMulBiasAddFusionPass::InsertTransposeNode(MetaGraphT *graph, const std | |||||
| matmulOpIter = | matmulOpIter = | ||||
| InsertNode(graph, matmulOpIter, kBefore, needInsertIdx, std::move(transNode), &errorCode, TransposeOpCopyer); | InsertNode(graph, matmulOpIter, kBefore, needInsertIdx, std::move(transNode), &errorCode, TransposeOpCopyer); | ||||
| if (errorCode != RET_OK) { | if (errorCode != RET_OK) { | ||||
| MS_LOG(ERROR) << "InsertNode failed: %d"; // errorCode); | |||||
| MS_LOG(ERROR) << "InsertNode failed: " << errorCode; | |||||
| return errorCode; | return errorCode; | ||||
| } | } | ||||
| } | } | ||||
| @@ -187,7 +186,7 @@ STATUS MatMulBiasAddFusionPass::AddFullConnectionBiasTensor(const std::shared_pt | |||||
| // check biasTensor | // check biasTensor | ||||
| auto baWeightTensorIdxes = baNode->inputIndex; | auto baWeightTensorIdxes = baNode->inputIndex; | ||||
| if (baWeightTensorIdxes.size() != BIASADD_OP_INPUT_NUM) { | if (baWeightTensorIdxes.size() != BIASADD_OP_INPUT_NUM) { | ||||
| MS_LOG(ERROR) << "%s node tensors number is invalid! "; // baNode->name.c_str()); | |||||
| MS_LOG(ERROR) << "input number is invalid! node: " << baNode->name.c_str(); | |||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| MS_ASSERT(graph->allTensors.size() > baWeightTensorIdxes.at(BIASADD_OP_BIAS_INDEX)); | MS_ASSERT(graph->allTensors.size() > baWeightTensorIdxes.at(BIASADD_OP_BIAS_INDEX)); | ||||
| @@ -196,7 +195,7 @@ STATUS MatMulBiasAddFusionPass::AddFullConnectionBiasTensor(const std::shared_pt | |||||
| auto biasDims = biasTensor->dims; | auto biasDims = biasTensor->dims; | ||||
| // if biasTensor is a scaler | // if biasTensor is a scaler | ||||
| if (biasDims.empty() && biasTensor->data.data() == nullptr) { | if (biasDims.empty() && biasTensor->data.data() == nullptr) { | ||||
| MS_LOG(ERROR) << "BiasAdd node %s bias tensor is invalid"; // baNode->name.c_str()); | |||||
| MS_LOG(ERROR) << "bias tensor is invalid, node: " << baNode->name.c_str(); | |||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| if (!biasDims.empty() && biasDims.size() != BIASADD_WEIGHT_SHAPE_SIZE) { | if (!biasDims.empty() && biasDims.size() != BIASADD_WEIGHT_SHAPE_SIZE) { | ||||
| @@ -142,7 +142,7 @@ STATUS TransOpInsertPass::Run(schema::MetaGraphT *graph) { | |||||
| changed = false; | changed = false; | ||||
| for (auto iter = graph->nodes.begin(); iter != graph->nodes.end(); iter++) { | for (auto iter = graph->nodes.begin(); iter != graph->nodes.end(); iter++) { | ||||
| auto &node = *iter; | auto &node = *iter; | ||||
| if (node == nullptr && node->primitive == nullptr) { | |||||
| if (node == nullptr || node->primitive == nullptr) { | |||||
| MS_LOG(ERROR) << "node or primitive null"; | MS_LOG(ERROR) << "node or primitive null"; | ||||
| return RET_NULL_PTR; | return RET_NULL_PTR; | ||||
| } | } | ||||
| @@ -53,6 +53,7 @@ STATUS CaffeReduceParser::Parse(const caffe::LayerParameter &proto, const caffe: | |||||
| break; | break; | ||||
| case caffe::ReductionParameter_ReductionOp_ASUM: | case caffe::ReductionParameter_ReductionOp_ASUM: | ||||
| attr->mode = schema::ReduceMode_ReduceASum; | attr->mode = schema::ReduceMode_ReduceASum; | ||||
| break; | |||||
| default: | default: | ||||
| MS_LOG(ERROR) << "reduce parse params fail, unsupported opration: " << reduce_param.operation(); | MS_LOG(ERROR) << "reduce parse params fail, unsupported opration: " << reduce_param.operation(); | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| @@ -174,7 +174,7 @@ STATUS OnnxConvParser::Parse(const onnx::GraphProto &onnx_graph, const onnx::Nod | |||||
| attr->activationType = schema::ActivationType_NO_ACTIVATION; | attr->activationType = schema::ActivationType_NO_ACTIVATION; | ||||
| } | } | ||||
| if (attr != nullptr && attr->group > kSingleGroup && attr->group == attr->channelIn) { | |||||
| if (attr->group > kSingleGroup && attr->group == attr->channelIn) { | |||||
| if (!ParseGroupConvolution(attr, op)) { | if (!ParseGroupConvolution(attr, op)) { | ||||
| MS_LOG(ERROR) << "Convert Convolution to Depthwise failed"; | MS_LOG(ERROR) << "Convert Convolution to Depthwise failed"; | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| @@ -43,7 +43,7 @@ class Quantizer { | |||||
| public: | public: | ||||
| explicit Quantizer(FuncGraphPtr graph) : funcGraph(std::move(graph)) {} | explicit Quantizer(FuncGraphPtr graph) : funcGraph(std::move(graph)) {} | ||||
| ~Quantizer() = default; | |||||
| virtual ~Quantizer() = default; | |||||
| virtual STATUS RemoveFakeQuant(); | virtual STATUS RemoveFakeQuant(); | ||||