Merge pull request !7928 from liuwenhao/mastertags/v1.1.0
| @@ -22,9 +22,9 @@ | |||||
| #include "nnacl/arithmetic_common.h" | #include "nnacl/arithmetic_common.h" | ||||
| #include "nnacl/fp32/arithmetic.h" | #include "nnacl/fp32/arithmetic.h" | ||||
| typedef int (*ArithmeticRun)(float *input0, float *input1, float *output, int element_size); | |||||
| typedef int (*ArithmeticOptRun)(float *input0, float *input1, float *output, int element_size, | |||||
| ArithmeticParameter *param); | |||||
| typedef int (*ArithmeticRun)(const float *input0, const float *input1, float *output, const int element_size); | |||||
| typedef int (*ArithmeticOptRun)(const float *input0, const float *input1, float *output, const int element_size, | |||||
| const ArithmeticParameter *param); | |||||
| int BroadcastRun(float *input0, float *input1, float *output, int dim, int out_count, int break_pos, | int BroadcastRun(float *input0, float *input1, float *output, int dim, int out_count, int break_pos, | ||||
| ArithmeticRun arithmetic_run, ArithmeticParameter *params) { | ArithmeticRun arithmetic_run, ArithmeticParameter *params) { | ||||
| @@ -52,7 +52,7 @@ void TileOneDimensionUint8(uint8_t *inData, uint8_t *outData, int dim, size_t nd | |||||
| } | } | ||||
| } | } | ||||
| void ComputeStrides(int *shape, int *strides, int ndim) { | |||||
| void ComputeStrides(const int *shape, int *strides, const int ndim) { | |||||
| int stride = 1; | int stride = 1; | ||||
| for (int i = ndim - 1; i >= 0; i--) { | for (int i = ndim - 1; i >= 0; i--) { | ||||
| strides[i] = stride; | strides[i] = stride; | ||||
| @@ -49,7 +49,7 @@ extern "C" { | |||||
| #endif | #endif | ||||
| void TileOneDimension(float *inData, float *outData, int dim, size_t ndim, int *inShape, int *inStrides, | void TileOneDimension(float *inData, float *outData, int dim, size_t ndim, int *inShape, int *inStrides, | ||||
| int *outStrides, int *multiple); | int *outStrides, int *multiple); | ||||
| void ComputeStrides(int *shape, int *strides, int ndim); | |||||
| void ComputeStrides(const int *shape, int *strides, const int ndim); | |||||
| void CalcMultiplesAndStrides(ArithmeticParameter *param); | void CalcMultiplesAndStrides(ArithmeticParameter *param); | ||||
| @@ -16,7 +16,7 @@ | |||||
| #include "nnacl/depth_to_space.h" | #include "nnacl/depth_to_space.h" | ||||
| #include <string.h> | #include <string.h> | ||||
| void DepthToSpaceForNHWC(const void *input, void *output, int *in_shape, DepthToSpaceParameter *param) { | |||||
| void DepthToSpaceForNHWC(const void *input, void *output, const int *in_shape, const DepthToSpaceParameter *param) { | |||||
| int32_t block_size = param->block_size_; | int32_t block_size = param->block_size_; | ||||
| int32_t in_shape_dim2 = in_shape[2]; | int32_t in_shape_dim2 = in_shape[2]; | ||||
| int32_t in_shape_dim1 = in_shape[1]; | int32_t in_shape_dim1 = in_shape[1]; | ||||
| @@ -20,7 +20,7 @@ | |||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| extern "C" { | extern "C" { | ||||
| #endif | #endif | ||||
| void DepthToSpaceForNHWC(const void *input, void *output, int *in_shape, DepthToSpaceParameter *param); | |||||
| void DepthToSpaceForNHWC(const void *input, void *output, const int *in_shape, const DepthToSpaceParameter *param); | |||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| } | } | ||||
| #endif | #endif | ||||
| @@ -19,7 +19,8 @@ | |||||
| #define ACCURACY_DATA 0.00000001 | #define ACCURACY_DATA 0.00000001 | ||||
| int ElementOptMul(float *input0, float *input1, float *output, int element_size, ArithmeticParameter *param) { | |||||
| int ElementOptMul(const float *input0, const float *input1, float *output, const int element_size, | |||||
| const ArithmeticParameter *param) { | |||||
| #ifdef ENABLE_NEON | #ifdef ENABLE_NEON | ||||
| float32x4_t vin0_opt = vdupq_n_f32(input0[0]); | float32x4_t vin0_opt = vdupq_n_f32(input0[0]); | ||||
| float32x4_t vin1_opt = vdupq_n_f32(input1[0]); | float32x4_t vin1_opt = vdupq_n_f32(input1[0]); | ||||
| @@ -51,7 +52,8 @@ int ElementOptMul(float *input0, float *input1, float *output, int element_size, | |||||
| return NNACL_OK; | return NNACL_OK; | ||||
| } | } | ||||
| int ElementOptMulRelu(float *input0, float *input1, float *output, int element_size, ArithmeticParameter *param) { | |||||
| int ElementOptMulRelu(const float *input0, const float *input1, float *output, const int element_size, | |||||
| const ArithmeticParameter *param) { | |||||
| #ifdef ENABLE_NEON | #ifdef ENABLE_NEON | ||||
| float32x4_t vin0_opt = vdupq_n_f32(input0[0]); | float32x4_t vin0_opt = vdupq_n_f32(input0[0]); | ||||
| float32x4_t vin1_opt = vdupq_n_f32(input1[0]); | float32x4_t vin1_opt = vdupq_n_f32(input1[0]); | ||||
| @@ -84,7 +86,8 @@ int ElementOptMulRelu(float *input0, float *input1, float *output, int element_s | |||||
| return NNACL_OK; | return NNACL_OK; | ||||
| } | } | ||||
| int ElementOptMulRelu6(float *input0, float *input1, float *output, int element_size, ArithmeticParameter *param) { | |||||
| int ElementOptMulRelu6(const float *input0, const float *input1, float *output, const int element_size, | |||||
| const ArithmeticParameter *param) { | |||||
| #ifdef ENABLE_NEON | #ifdef ENABLE_NEON | ||||
| float32x4_t vin0_opt = vdupq_n_f32(input0[0]); | float32x4_t vin0_opt = vdupq_n_f32(input0[0]); | ||||
| float32x4_t vin1_opt = vdupq_n_f32(input1[0]); | float32x4_t vin1_opt = vdupq_n_f32(input1[0]); | ||||
| @@ -118,7 +121,8 @@ int ElementOptMulRelu6(float *input0, float *input1, float *output, int element_ | |||||
| return NNACL_OK; | return NNACL_OK; | ||||
| } | } | ||||
| int ElementOptMulInt(int *input0, int *input1, int *output, int element_size, ArithmeticParameter *param) { | |||||
| int ElementOptMulInt(const int *input0, const int *input1, int *output, const int element_size, | |||||
| const ArithmeticParameter *param) { | |||||
| #ifdef ENABLE_NEON | #ifdef ENABLE_NEON | ||||
| int32x4_t vin0_opt = vdupq_n_s32(input0[0]); | int32x4_t vin0_opt = vdupq_n_s32(input0[0]); | ||||
| int32x4_t vin1_opt = vdupq_n_s32(input1[0]); | int32x4_t vin1_opt = vdupq_n_s32(input1[0]); | ||||
| @@ -150,7 +154,8 @@ int ElementOptMulInt(int *input0, int *input1, int *output, int element_size, Ar | |||||
| return NNACL_OK; | return NNACL_OK; | ||||
| } | } | ||||
| int ElementOptMulReluInt(int *input0, int *input1, int *output, int element_size, ArithmeticParameter *param) { | |||||
| int ElementOptMulReluInt(const int *input0, const int *input1, int *output, const int element_size, | |||||
| const ArithmeticParameter *param) { | |||||
| #ifdef ENABLE_NEON | #ifdef ENABLE_NEON | ||||
| int32x4_t vin0_opt = vdupq_n_s32(input0[0]); | int32x4_t vin0_opt = vdupq_n_s32(input0[0]); | ||||
| int32x4_t vin1_opt = vdupq_n_s32(input1[0]); | int32x4_t vin1_opt = vdupq_n_s32(input1[0]); | ||||
| @@ -183,7 +188,8 @@ int ElementOptMulReluInt(int *input0, int *input1, int *output, int element_size | |||||
| return NNACL_OK; | return NNACL_OK; | ||||
| } | } | ||||
| int ElementOptMulRelu6Int(int *input0, int *input1, int *output, int element_size, ArithmeticParameter *param) { | |||||
| int ElementOptMulRelu6Int(const int *input0, const int *input1, int *output, const int element_size, | |||||
| const ArithmeticParameter *param) { | |||||
| #ifdef ENABLE_NEON | #ifdef ENABLE_NEON | ||||
| int32x4_t vin0_opt = vdupq_n_s32(input0[0]); | int32x4_t vin0_opt = vdupq_n_s32(input0[0]); | ||||
| int32x4_t vin1_opt = vdupq_n_s32(input1[0]); | int32x4_t vin1_opt = vdupq_n_s32(input1[0]); | ||||
| @@ -217,7 +223,8 @@ int ElementOptMulRelu6Int(int *input0, int *input1, int *output, int element_siz | |||||
| return NNACL_OK; | return NNACL_OK; | ||||
| } | } | ||||
| int ElementOptSub(float *input0, float *input1, float *output, int element_size, ArithmeticParameter *param) { | |||||
| int ElementOptSub(const float *input0, const float *input1, float *output, const int element_size, | |||||
| const ArithmeticParameter *param) { | |||||
| #ifdef ENABLE_NEON | #ifdef ENABLE_NEON | ||||
| float32x4_t vin0_opt = vdupq_n_f32(input0[0]); | float32x4_t vin0_opt = vdupq_n_f32(input0[0]); | ||||
| float32x4_t vin1_opt = vdupq_n_f32(input1[0]); | float32x4_t vin1_opt = vdupq_n_f32(input1[0]); | ||||
| @@ -249,7 +256,8 @@ int ElementOptSub(float *input0, float *input1, float *output, int element_size, | |||||
| return NNACL_OK; | return NNACL_OK; | ||||
| } | } | ||||
| int ElementOptSubRelu(float *input0, float *input1, float *output, int element_size, ArithmeticParameter *param) { | |||||
| int ElementOptSubRelu(const float *input0, const float *input1, float *output, const int element_size, | |||||
| const ArithmeticParameter *param) { | |||||
| #ifdef ENABLE_NEON | #ifdef ENABLE_NEON | ||||
| float32x4_t vin0_opt = vdupq_n_f32(input0[0]); | float32x4_t vin0_opt = vdupq_n_f32(input0[0]); | ||||
| float32x4_t vin1_opt = vdupq_n_f32(input1[0]); | float32x4_t vin1_opt = vdupq_n_f32(input1[0]); | ||||
| @@ -282,7 +290,8 @@ int ElementOptSubRelu(float *input0, float *input1, float *output, int element_s | |||||
| return NNACL_OK; | return NNACL_OK; | ||||
| } | } | ||||
| int ElementOptSubRelu6(float *input0, float *input1, float *output, int element_size, ArithmeticParameter *param) { | |||||
| int ElementOptSubRelu6(const float *input0, const float *input1, float *output, const int element_size, | |||||
| const ArithmeticParameter *param) { | |||||
| #ifdef ENABLE_NEON | #ifdef ENABLE_NEON | ||||
| float32x4_t vin0_opt = vdupq_n_f32(input0[0]); | float32x4_t vin0_opt = vdupq_n_f32(input0[0]); | ||||
| float32x4_t vin1_opt = vdupq_n_f32(input1[0]); | float32x4_t vin1_opt = vdupq_n_f32(input1[0]); | ||||
| @@ -316,7 +325,8 @@ int ElementOptSubRelu6(float *input0, float *input1, float *output, int element_ | |||||
| return NNACL_OK; | return NNACL_OK; | ||||
| } | } | ||||
| int ElementOptAdd(float *input0, float *input1, float *output, int element_size, ArithmeticParameter *param) { | |||||
| int ElementOptAdd(const float *input0, const float *input1, float *output, const int element_size, | |||||
| const ArithmeticParameter *param) { | |||||
| #ifdef ENABLE_NEON | #ifdef ENABLE_NEON | ||||
| float32x4_t vin0_opt = vdupq_n_f32(input0[0]); | float32x4_t vin0_opt = vdupq_n_f32(input0[0]); | ||||
| float32x4_t vin1_opt = vdupq_n_f32(input1[0]); | float32x4_t vin1_opt = vdupq_n_f32(input1[0]); | ||||
| @@ -348,7 +358,8 @@ int ElementOptAdd(float *input0, float *input1, float *output, int element_size, | |||||
| return NNACL_OK; | return NNACL_OK; | ||||
| } | } | ||||
| int ElementOptAddInt(int *input0, int *input1, int *output, int element_size, ArithmeticParameter *param) { | |||||
| int ElementOptAddInt(const int *input0, const int *input1, int *output, const int element_size, | |||||
| const ArithmeticParameter *param) { | |||||
| #ifdef ENABLE_NEON | #ifdef ENABLE_NEON | ||||
| int32x4_t vin0_opt = vdupq_n_s32(input0[0]); | int32x4_t vin0_opt = vdupq_n_s32(input0[0]); | ||||
| int32x4_t vin1_opt = vdupq_n_s32(input1[0]); | int32x4_t vin1_opt = vdupq_n_s32(input1[0]); | ||||
| @@ -380,7 +391,8 @@ int ElementOptAddInt(int *input0, int *input1, int *output, int element_size, Ar | |||||
| return NNACL_OK; | return NNACL_OK; | ||||
| } | } | ||||
| int ElementOptAddRelu(float *input0, float *input1, float *output, int element_size, ArithmeticParameter *param) { | |||||
| int ElementOptAddRelu(const float *input0, const float *input1, float *output, const int element_size, | |||||
| const ArithmeticParameter *param) { | |||||
| #ifdef ENABLE_NEON | #ifdef ENABLE_NEON | ||||
| float32x4_t vin0_opt = vdupq_n_f32(input0[0]); | float32x4_t vin0_opt = vdupq_n_f32(input0[0]); | ||||
| float32x4_t vin1_opt = vdupq_n_f32(input1[0]); | float32x4_t vin1_opt = vdupq_n_f32(input1[0]); | ||||
| @@ -413,7 +425,8 @@ int ElementOptAddRelu(float *input0, float *input1, float *output, int element_s | |||||
| return NNACL_OK; | return NNACL_OK; | ||||
| } | } | ||||
| int ElementOptAddRelu6(float *input0, float *input1, float *output, int element_size, ArithmeticParameter *param) { | |||||
| int ElementOptAddRelu6(const float *input0, const float *input1, float *output, const int element_size, | |||||
| const ArithmeticParameter *param) { | |||||
| #ifdef ENABLE_NEON | #ifdef ENABLE_NEON | ||||
| float32x4_t vin0_opt = vdupq_n_f32(input0[0]); | float32x4_t vin0_opt = vdupq_n_f32(input0[0]); | ||||
| float32x4_t vin1_opt = vdupq_n_f32(input1[0]); | float32x4_t vin1_opt = vdupq_n_f32(input1[0]); | ||||
| @@ -448,7 +461,8 @@ int ElementOptAddRelu6(float *input0, float *input1, float *output, int element_ | |||||
| return NNACL_OK; | return NNACL_OK; | ||||
| } | } | ||||
| int ElementOptDiv(float *input0, float *input1, float *output, int element_size, ArithmeticParameter *param) { | |||||
| int ElementOptDiv(const float *input0, const float *input1, float *output, const int element_size, | |||||
| const ArithmeticParameter *param) { | |||||
| if (param->in_elements_num0_ == 1) { | if (param->in_elements_num0_ == 1) { | ||||
| for (int index = 0; index < element_size; index++) { | for (int index = 0; index < element_size; index++) { | ||||
| output[index] = input0[0] / input1[index]; | output[index] = input0[0] / input1[index]; | ||||
| @@ -464,7 +478,8 @@ int ElementOptDiv(float *input0, float *input1, float *output, int element_size, | |||||
| return NNACL_OK; | return NNACL_OK; | ||||
| } | } | ||||
| int ElementOptDivRelu(float *input0, float *input1, float *output, int element_size, ArithmeticParameter *param) { | |||||
| int ElementOptDivRelu(const float *input0, const float *input1, float *output, const int element_size, | |||||
| const ArithmeticParameter *param) { | |||||
| if (param->in_elements_num0_ == 1) { | if (param->in_elements_num0_ == 1) { | ||||
| for (int index = 0; index < element_size; index++) { | for (int index = 0; index < element_size; index++) { | ||||
| output[index] = input0[0] / input1[index]; | output[index] = input0[0] / input1[index]; | ||||
| @@ -479,7 +494,8 @@ int ElementOptDivRelu(float *input0, float *input1, float *output, int element_s | |||||
| return NNACL_OK; | return NNACL_OK; | ||||
| } | } | ||||
| int ElementOptDivRelu6(float *input0, float *input1, float *output, int element_size, ArithmeticParameter *param) { | |||||
| int ElementOptDivRelu6(const float *input0, const float *input1, float *output, const int element_size, | |||||
| const ArithmeticParameter *param) { | |||||
| if (param->in_elements_num0_ == 1) { | if (param->in_elements_num0_ == 1) { | ||||
| for (int index = 0; index < element_size; index++) { | for (int index = 0; index < element_size; index++) { | ||||
| output[index] = MSMIN(MSMAX(input0[0] / input1[index], 0), 6); | output[index] = MSMIN(MSMAX(input0[0] / input1[index], 0), 6); | ||||
| @@ -492,7 +508,7 @@ int ElementOptDivRelu6(float *input0, float *input1, float *output, int element_ | |||||
| return NNACL_OK; | return NNACL_OK; | ||||
| } | } | ||||
| int ElementMul(float *input0, float *input1, float *output, int element_size) { | |||||
| int ElementMul(const float *input0, const float *input1, float *output, const int element_size) { | |||||
| int index = 0; | int index = 0; | ||||
| #ifdef ENABLE_NEON | #ifdef ENABLE_NEON | ||||
| for (; index <= element_size - 4; index += C4NUM) { | for (; index <= element_size - 4; index += C4NUM) { | ||||
| @@ -508,7 +524,7 @@ int ElementMul(float *input0, float *input1, float *output, int element_size) { | |||||
| return NNACL_OK; | return NNACL_OK; | ||||
| } | } | ||||
| int ElementMulRelu(float *input0, float *input1, float *output, int element_size) { | |||||
| int ElementMulRelu(const float *input0, const float *input1, float *output, const int element_size) { | |||||
| int index = 0; | int index = 0; | ||||
| #ifdef ENABLE_NEON | #ifdef ENABLE_NEON | ||||
| float32x4_t zeros = vdupq_n_f32(0.0f); | float32x4_t zeros = vdupq_n_f32(0.0f); | ||||
| @@ -527,7 +543,7 @@ int ElementMulRelu(float *input0, float *input1, float *output, int element_size | |||||
| return NNACL_OK; | return NNACL_OK; | ||||
| } | } | ||||
| int ElementMulRelu6(float *input0, float *input1, float *output, int element_size) { | |||||
| int ElementMulRelu6(const float *input0, const float *input1, float *output, const int element_size) { | |||||
| int index = 0; | int index = 0; | ||||
| #ifdef ENABLE_NEON | #ifdef ENABLE_NEON | ||||
| float32x4_t zeros = vdupq_n_f32(0.0f); | float32x4_t zeros = vdupq_n_f32(0.0f); | ||||
| @@ -545,7 +561,7 @@ int ElementMulRelu6(float *input0, float *input1, float *output, int element_siz | |||||
| return NNACL_OK; | return NNACL_OK; | ||||
| } | } | ||||
| int ElementMulInt(int *input0, int *input1, int *output, int element_size) { | |||||
| int ElementMulInt(const int *input0, const int *input1, int *output, const int element_size) { | |||||
| int index = 0; | int index = 0; | ||||
| #ifdef ENABLE_NEON | #ifdef ENABLE_NEON | ||||
| for (; index <= element_size - 4; index += C4NUM) { | for (; index <= element_size - 4; index += C4NUM) { | ||||
| @@ -561,7 +577,7 @@ int ElementMulInt(int *input0, int *input1, int *output, int element_size) { | |||||
| return NNACL_OK; | return NNACL_OK; | ||||
| } | } | ||||
| int ElementMulReluInt(int *input0, int *input1, int *output, int element_size) { | |||||
| int ElementMulReluInt(const int *input0, const int *input1, int *output, const int element_size) { | |||||
| int index = 0; | int index = 0; | ||||
| #ifdef ENABLE_NEON | #ifdef ENABLE_NEON | ||||
| int32x4_t zeros = vdupq_n_s32(0); | int32x4_t zeros = vdupq_n_s32(0); | ||||
| @@ -580,7 +596,7 @@ int ElementMulReluInt(int *input0, int *input1, int *output, int element_size) { | |||||
| return NNACL_OK; | return NNACL_OK; | ||||
| } | } | ||||
| int ElementMulRelu6Int(int *input0, int *input1, int *output, int element_size) { | |||||
| int ElementMulRelu6Int(const int *input0, const int *input1, int *output, const int element_size) { | |||||
| int index = 0; | int index = 0; | ||||
| #ifdef ENABLE_NEON | #ifdef ENABLE_NEON | ||||
| int32x4_t zeros = vdupq_n_s32(0); | int32x4_t zeros = vdupq_n_s32(0); | ||||
| @@ -604,7 +620,7 @@ int BroadcastMul(float *input0, float *input1, float *tile_input0, float *tile_i | |||||
| return ElementMul(tile_input0, tile_input1, output, element_size); | return ElementMul(tile_input0, tile_input1, output, element_size); | ||||
| } | } | ||||
| int ElementAdd(float *input0, float *input1, float *output, int element_size) { | |||||
| int ElementAdd(const float *input0, const float *input1, float *output, const int element_size) { | |||||
| int index = 0; | int index = 0; | ||||
| #ifdef ENABLE_NEON | #ifdef ENABLE_NEON | ||||
| for (; index <= element_size - 4; index += C4NUM) { | for (; index <= element_size - 4; index += C4NUM) { | ||||
| @@ -620,7 +636,7 @@ int ElementAdd(float *input0, float *input1, float *output, int element_size) { | |||||
| return NNACL_OK; | return NNACL_OK; | ||||
| } | } | ||||
| int ElementAddRelu(float *input0, float *input1, float *output, int element_size) { | |||||
| int ElementAddRelu(const float *input0, const float *input1, float *output, const int element_size) { | |||||
| int index = 0; | int index = 0; | ||||
| #ifdef ENABLE_NEON | #ifdef ENABLE_NEON | ||||
| float32x4_t zeros = vdupq_n_f32(0.0f); | float32x4_t zeros = vdupq_n_f32(0.0f); | ||||
| @@ -639,7 +655,7 @@ int ElementAddRelu(float *input0, float *input1, float *output, int element_size | |||||
| return NNACL_OK; | return NNACL_OK; | ||||
| } | } | ||||
| int ElementAddRelu6(float *input0, float *input1, float *output, int element_size) { | |||||
| int ElementAddRelu6(const float *input0, const float *input1, float *output, const int element_size) { | |||||
| int index = 0; | int index = 0; | ||||
| #ifdef ENABLE_NEON | #ifdef ENABLE_NEON | ||||
| float32x4_t zeros = vdupq_n_f32(0.0f); | float32x4_t zeros = vdupq_n_f32(0.0f); | ||||
| @@ -657,7 +673,7 @@ int ElementAddRelu6(float *input0, float *input1, float *output, int element_siz | |||||
| return NNACL_OK; | return NNACL_OK; | ||||
| } | } | ||||
| int ElementAddInt(int *input0, int *input1, int *output, int element_size) { | |||||
| int ElementAddInt(const int *input0, const int *input1, int *output, const int element_size) { | |||||
| int index = 0; | int index = 0; | ||||
| #ifdef ENABLE_NEON | #ifdef ENABLE_NEON | ||||
| for (; index <= element_size - 4; index += C4NUM) { | for (; index <= element_size - 4; index += C4NUM) { | ||||
| @@ -692,7 +708,7 @@ int BroadcastAddInt8(int8_t *input0, int8_t *input1, int8_t *tile_input0, int8_t | |||||
| return ElementAddInt8(tile_input0, tile_input1, output, element_size); | return ElementAddInt8(tile_input0, tile_input1, output, element_size); | ||||
| } | } | ||||
| int ElementSub(float *input0, float *input1, float *output, int element_size) { | |||||
| int ElementSub(const float *input0, const float *input1, float *output, const int element_size) { | |||||
| int index = 0; | int index = 0; | ||||
| #ifdef ENABLE_NEON | #ifdef ENABLE_NEON | ||||
| for (; index <= element_size - 4; index += C4NUM) { | for (; index <= element_size - 4; index += C4NUM) { | ||||
| @@ -708,7 +724,7 @@ int ElementSub(float *input0, float *input1, float *output, int element_size) { | |||||
| return NNACL_OK; | return NNACL_OK; | ||||
| } | } | ||||
| int ElementSubRelu(float *input0, float *input1, float *output, int element_size) { | |||||
| int ElementSubRelu(const float *input0, const float *input1, float *output, const int element_size) { | |||||
| int index = 0; | int index = 0; | ||||
| #ifdef ENABLE_NEON | #ifdef ENABLE_NEON | ||||
| float32x4_t zeros = vdupq_n_f32(0.0f); | float32x4_t zeros = vdupq_n_f32(0.0f); | ||||
| @@ -727,7 +743,7 @@ int ElementSubRelu(float *input0, float *input1, float *output, int element_size | |||||
| return NNACL_OK; | return NNACL_OK; | ||||
| } | } | ||||
| int ElementSubRelu6(float *input0, float *input1, float *output, int element_size) { | |||||
| int ElementSubRelu6(const float *input0, const float *input1, float *output, const int element_size) { | |||||
| int index = 0; | int index = 0; | ||||
| #ifdef ENABLE_NEON | #ifdef ENABLE_NEON | ||||
| float32x4_t zeros = vdupq_n_f32(0.0f); | float32x4_t zeros = vdupq_n_f32(0.0f); | ||||
| @@ -752,14 +768,14 @@ int BroadcastSub(float *input0, float *input1, float *tile_input0, float *tile_i | |||||
| return ElementSub(tile_input0, tile_input1, output, element_size); | return ElementSub(tile_input0, tile_input1, output, element_size); | ||||
| } | } | ||||
| int ElementDiv(float *input0, float *input1, float *output, int element_size) { | |||||
| int ElementDiv(const float *input0, const float *input1, float *output, const int element_size) { | |||||
| for (int i = 0; i < element_size; i++) { | for (int i = 0; i < element_size; i++) { | ||||
| output[i] = input0[i] / input1[i]; | output[i] = input0[i] / input1[i]; | ||||
| } | } | ||||
| return NNACL_OK; | return NNACL_OK; | ||||
| } | } | ||||
| int ElementDivRelu(float *input0, float *input1, float *output, int element_size) { | |||||
| int ElementDivRelu(const float *input0, const float *input1, float *output, const int element_size) { | |||||
| for (int i = 0; i < element_size; i++) { | for (int i = 0; i < element_size; i++) { | ||||
| float res = input0[i] / input1[i]; | float res = input0[i] / input1[i]; | ||||
| output[i] = res > 0 ? res : 0; | output[i] = res > 0 ? res : 0; | ||||
| @@ -767,7 +783,7 @@ int ElementDivRelu(float *input0, float *input1, float *output, int element_size | |||||
| return NNACL_OK; | return NNACL_OK; | ||||
| } | } | ||||
| int ElementDivRelu6(float *input0, float *input1, float *output, int element_size) { | |||||
| int ElementDivRelu6(const float *input0, const float *input1, float *output, const int element_size) { | |||||
| for (int i = 0; i < element_size; i++) { | for (int i = 0; i < element_size; i++) { | ||||
| output[i] = MSMIN(MSMAX(input0[i] / input1[i], 0), 6); | output[i] = MSMIN(MSMAX(input0[i] / input1[i], 0), 6); | ||||
| } | } | ||||
| @@ -780,14 +796,14 @@ int BroadcastDiv(float *input0, float *input1, float *tile_input0, float *tile_i | |||||
| return ElementDiv(tile_input0, tile_input1, output, element_size); | return ElementDiv(tile_input0, tile_input1, output, element_size); | ||||
| } | } | ||||
| int ElementFloorMod(float *input0, float *input1, float *output, int element_size) { | |||||
| int ElementFloorMod(const float *input0, const float *input1, float *output, const int element_size) { | |||||
| for (int i = 0; i < element_size; i++) { | for (int i = 0; i < element_size; i++) { | ||||
| output[i] = input0[i] - floorf(input0[i] / input1[i]) * input1[i]; | output[i] = input0[i] - floorf(input0[i] / input1[i]) * input1[i]; | ||||
| } | } | ||||
| return NNACL_OK; | return NNACL_OK; | ||||
| } | } | ||||
| int ElementFloorModInt(int *input0, int *input1, int *output, int element_size) { | |||||
| int ElementFloorModInt(const int *input0, const int *input1, int *output, const int element_size) { | |||||
| for (int i = 0; i < element_size; i++) { | for (int i = 0; i < element_size; i++) { | ||||
| output[i] = input0[i] - (input0[i] / input1[i]) * input1[i]; | output[i] = input0[i] - (input0[i] / input1[i]) * input1[i]; | ||||
| } | } | ||||
| @@ -800,14 +816,14 @@ int BroadcastFloorMod(float *input0, float *input1, float *tile_input0, float *t | |||||
| return ElementFloorMod(tile_input0, tile_input1, output, element_size); | return ElementFloorMod(tile_input0, tile_input1, output, element_size); | ||||
| } | } | ||||
| int ElementFloorDiv(float *input0, float *input1, float *output, int element_size) { | |||||
| int ElementFloorDiv(const float *input0, const float *input1, float *output, const int element_size) { | |||||
| for (int i = 0; i < element_size; i++) { | for (int i = 0; i < element_size; i++) { | ||||
| output[i] = floorf(input0[i] / input1[i]); | output[i] = floorf(input0[i] / input1[i]); | ||||
| } | } | ||||
| return NNACL_OK; | return NNACL_OK; | ||||
| } | } | ||||
| int ElementFloorDivInt(int *input0, int *input1, int *output, int element_size) { | |||||
| int ElementFloorDivInt(const int *input0, const int *input1, int *output, const int element_size) { | |||||
| for (int i = 0; i < element_size; i++) { | for (int i = 0; i < element_size; i++) { | ||||
| output[i] = input0[i] / input1[i]; | output[i] = input0[i] / input1[i]; | ||||
| } | } | ||||
| @@ -820,7 +836,7 @@ int BroadcastFloorDiv(float *input0, float *input1, float *tile_input0, float *t | |||||
| return ElementFloorDiv(tile_input0, tile_input1, output, element_size); | return ElementFloorDiv(tile_input0, tile_input1, output, element_size); | ||||
| } | } | ||||
| int ElementLogicalAnd(float *input0, float *input1, float *output, int element_size) { | |||||
| int ElementLogicalAnd(const float *input0, const float *input1, float *output, const int element_size) { | |||||
| int index = 0; | int index = 0; | ||||
| #ifdef ENABLE_NEON | #ifdef ENABLE_NEON | ||||
| float32x4_t vtrue = vdupq_n_f32(1); | float32x4_t vtrue = vdupq_n_f32(1); | ||||
| @@ -840,7 +856,7 @@ int ElementLogicalAnd(float *input0, float *input1, float *output, int element_s | |||||
| return NNACL_OK; | return NNACL_OK; | ||||
| } | } | ||||
| int ElementSquaredDifference(float *input0, float *input1, float *output, int element_size) { | |||||
| int ElementSquaredDifference(const float *input0, const float *input1, float *output, const int element_size) { | |||||
| ElementSub(input0, input1, output, element_size); | ElementSub(input0, input1, output, element_size); | ||||
| return ElementMul(output, output, output, element_size); | return ElementMul(output, output, output, element_size); | ||||
| } | } | ||||
| @@ -857,7 +873,7 @@ int BroadcastLogicalAnd(float *input0, float *input1, float *tile_input0, float | |||||
| return ElementLogicalAnd(tile_input0, tile_input1, output, element_size); | return ElementLogicalAnd(tile_input0, tile_input1, output, element_size); | ||||
| } | } | ||||
| int ElementLogicalOr(float *input0, float *input1, float *output, int element_size) { | |||||
| int ElementLogicalOr(const float *input0, const float *input1, float *output, const int element_size) { | |||||
| int index = 0; | int index = 0; | ||||
| #ifdef ENABLE_NEON | #ifdef ENABLE_NEON | ||||
| float32x4_t vtrue = vdupq_n_f32(1); | float32x4_t vtrue = vdupq_n_f32(1); | ||||
| @@ -883,7 +899,7 @@ int BroadcastLogicalOr(float *input0, float *input1, float *tile_input0, float * | |||||
| return ElementLogicalOr(tile_input0, tile_input1, output, element_size); | return ElementLogicalOr(tile_input0, tile_input1, output, element_size); | ||||
| } | } | ||||
| int ElementMaximum(float *input0, float *input1, float *output, int element_size) { | |||||
| int ElementMaximum(const float *input0, const float *input1, float *output, const int element_size) { | |||||
| int index = 0; | int index = 0; | ||||
| #ifdef ENABLE_NEON | #ifdef ENABLE_NEON | ||||
| for (; index <= element_size - 4; index += C4NUM) { | for (; index <= element_size - 4; index += C4NUM) { | ||||
| @@ -905,7 +921,7 @@ int BroadcastMaximum(float *input0, float *input1, float *tile_input0, float *ti | |||||
| return ElementMaximum(tile_input0, tile_input1, output, element_size); | return ElementMaximum(tile_input0, tile_input1, output, element_size); | ||||
| } | } | ||||
| int ElementMinimum(float *input0, float *input1, float *output, int element_size) { | |||||
| int ElementMinimum(const float *input0, const float *input1, float *output, const int element_size) { | |||||
| int index = 0; | int index = 0; | ||||
| #ifdef ENABLE_NEON | #ifdef ENABLE_NEON | ||||
| for (; index <= element_size - 4; index += C4NUM) { | for (; index <= element_size - 4; index += C4NUM) { | ||||
| @@ -935,7 +951,7 @@ float FloatNotEqualCheck(float in0, float in1) { | |||||
| return (float)true; | return (float)true; | ||||
| } | } | ||||
| int ElementNotEqual(float *input0, float *input1, float *output, int element_size) { | |||||
| int ElementNotEqual(const float *input0, const float *input1, float *output, const int element_size) { | |||||
| int index = 0; | int index = 0; | ||||
| #ifdef ENABLE_NEON | #ifdef ENABLE_NEON | ||||
| float32x4_t vtrue = vdupq_n_f32(1); | float32x4_t vtrue = vdupq_n_f32(1); | ||||
| @@ -967,7 +983,7 @@ float FloatEqualCheck(float in0, float in1) { | |||||
| return (float)false; | return (float)false; | ||||
| } | } | ||||
| int ElementEqual(float *input0, float *input1, float *output, int element_size) { | |||||
| int ElementEqual(const float *input0, const float *input1, float *output, const int element_size) { | |||||
| int index = 0; | int index = 0; | ||||
| #ifdef ENABLE_NEON | #ifdef ENABLE_NEON | ||||
| float32x4_t vtrue = vdupq_n_f32(1); | float32x4_t vtrue = vdupq_n_f32(1); | ||||
| @@ -991,7 +1007,7 @@ int BroadcastEqual(float *input0, float *input1, float *tile_input0, float *tile | |||||
| return ElementEqual(tile_input0, tile_input1, output, element_size); | return ElementEqual(tile_input0, tile_input1, output, element_size); | ||||
| } | } | ||||
| int ElementLess(float *input0, float *input1, float *output, int element_size) { | |||||
| int ElementLess(const float *input0, const float *input1, float *output, const int element_size) { | |||||
| int index = 0; | int index = 0; | ||||
| #ifdef ENABLE_NEON | #ifdef ENABLE_NEON | ||||
| float32x4_t vtrue = vdupq_n_f32(1); | float32x4_t vtrue = vdupq_n_f32(1); | ||||
| @@ -1015,7 +1031,7 @@ int BroadcastLess(float *input0, float *input1, float *tile_input0, float *tile_ | |||||
| return ElementLess(tile_input0, tile_input1, output, element_size); | return ElementLess(tile_input0, tile_input1, output, element_size); | ||||
| } | } | ||||
| int ElementLessEqual(float *input0, float *input1, float *output, int element_size) { | |||||
| int ElementLessEqual(const float *input0, const float *input1, float *output, const int element_size) { | |||||
| int index = 0; | int index = 0; | ||||
| #ifdef ENABLE_NEON | #ifdef ENABLE_NEON | ||||
| float32x4_t vtrue = vdupq_n_f32(1); | float32x4_t vtrue = vdupq_n_f32(1); | ||||
| @@ -1039,7 +1055,7 @@ int BroadcastLessEqual(float *input0, float *input1, float *tile_input0, float * | |||||
| return ElementLessEqual(tile_input0, tile_input1, output, element_size); | return ElementLessEqual(tile_input0, tile_input1, output, element_size); | ||||
| } | } | ||||
| int ElementGreater(float *input0, float *input1, float *output, int element_size) { | |||||
| int ElementGreater(const float *input0, const float *input1, float *output, const int element_size) { | |||||
| int index = 0; | int index = 0; | ||||
| #ifdef ENABLE_NEON | #ifdef ENABLE_NEON | ||||
| float32x4_t vtrue = vdupq_n_f32(1); | float32x4_t vtrue = vdupq_n_f32(1); | ||||
| @@ -1063,7 +1079,7 @@ int BroadcastGreater(float *input0, float *input1, float *tile_input0, float *ti | |||||
| return ElementGreater(tile_input0, tile_input1, output, element_size); | return ElementGreater(tile_input0, tile_input1, output, element_size); | ||||
| } | } | ||||
| int ElementGreaterEqual(float *input0, float *input1, float *output, int element_size) { | |||||
| int ElementGreaterEqual(const float *input0, const float *input1, float *output, const int element_size) { | |||||
| int index = 0; | int index = 0; | ||||
| #ifdef ENABLE_NEON | #ifdef ENABLE_NEON | ||||
| float32x4_t vtrue = vdupq_n_f32(1); | float32x4_t vtrue = vdupq_n_f32(1); | ||||
| @@ -26,105 +26,121 @@ | |||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| extern "C" { | extern "C" { | ||||
| #endif | #endif | ||||
| int ElementOptAdd(float *input0, float *input1, float *output, int element_size, ArithmeticParameter *param); | |||||
| int ElementOptAddInt(int *input0, int *input1, int *output, int element_size, ArithmeticParameter *param); | |||||
| int ElementOptAddRelu(float *input0, float *input1, float *output, int element_size, ArithmeticParameter *param); | |||||
| int ElementOptAddRelu6(float *input0, float *input1, float *output, int element_size, ArithmeticParameter *param); | |||||
| int ElementOptSub(float *input0, float *input1, float *output, int element_size, ArithmeticParameter *param); | |||||
| int ElementOptSubRelu(float *input0, float *input1, float *output, int element_size, ArithmeticParameter *param); | |||||
| int ElementOptSubRelu6(float *input0, float *input1, float *output, int element_size, ArithmeticParameter *param); | |||||
| int ElementOptMul(float *input0, float *input1, float *output, int element_size, ArithmeticParameter *param); | |||||
| int ElementOptMulRelu(float *input0, float *input1, float *output, int element_size, ArithmeticParameter *param); | |||||
| int ElementOptMulRelu6(float *input0, float *input1, float *output, int element_size, ArithmeticParameter *param); | |||||
| int ElementOptMulInt(int *input0, int *input1, int *output, int element_size, ArithmeticParameter *param); | |||||
| int ElementOptMulReluInt(int *input0, int *input1, int *output, int element_size, ArithmeticParameter *param); | |||||
| int ElementOptMulRelu6Int(int *input0, int *input1, int *output, int element_size, ArithmeticParameter *param); | |||||
| int ElementOptDiv(float *input0, float *input1, float *output, int element_size, ArithmeticParameter *param); | |||||
| int ElementOptDivRelu(float *input0, float *input1, float *output, int element_size, ArithmeticParameter *param); | |||||
| int ElementOptDivRelu6(float *input0, float *input1, float *output, int element_size, ArithmeticParameter *param); | |||||
| int ElementMul(float *input0, float *input1, float *output, int element_size); | |||||
| int ElementMulRelu(float *input0, float *input1, float *output, int element_size); | |||||
| int ElementMulRelu6(float *input0, float *input1, float *output, int element_size); | |||||
| int ElementMulInt(int *input0, int *input1, int *output, int element_size); | |||||
| int ElementMulReluInt(int *input0, int *input1, int *output, int element_size); | |||||
| int ElementMulRelu6Int(int *input0, int *input1, int *output, int element_size); | |||||
| int ElementOptAdd(const float *input0, const float *input1, float *output, const int element_size, | |||||
| const ArithmeticParameter *param); | |||||
| int ElementOptAddInt(const int *input0, const int *input1, int *output, const int element_size, | |||||
| const ArithmeticParameter *param); | |||||
| int ElementOptAddRelu(const float *input0, const float *input1, float *output, const int element_size, | |||||
| const ArithmeticParameter *param); | |||||
| int ElementOptAddRelu6(const float *input0, const float *input1, float *output, const int element_size, | |||||
| const ArithmeticParameter *param); | |||||
| int ElementOptSub(const float *input0, const float *input1, float *output, const int element_size, | |||||
| const ArithmeticParameter *param); | |||||
| int ElementOptSubRelu(const float *input0, const float *input1, float *output, const int element_size, | |||||
| const ArithmeticParameter *param); | |||||
| int ElementOptSubRelu6(const float *input0, const float *input1, float *output, const int element_size, | |||||
| const ArithmeticParameter *param); | |||||
| int ElementOptMul(const float *input0, const float *input1, float *output, const int element_size, | |||||
| const ArithmeticParameter *param); | |||||
| int ElementOptMulRelu(const float *input0, const float *input1, float *output, const int element_size, | |||||
| const ArithmeticParameter *param); | |||||
| int ElementOptMulRelu6(const float *input0, const float *input1, float *output, const int element_size, | |||||
| const ArithmeticParameter *param); | |||||
| int ElementOptMulInt(const int *input0, const int *input1, int *output, const int element_size, | |||||
| const ArithmeticParameter *param); | |||||
| int ElementOptMulReluInt(const int *input0, const int *input1, int *output, const int element_size, | |||||
| const ArithmeticParameter *param); | |||||
| int ElementOptMulRelu6Int(const int *input0, const int *input1, int *output, const int element_size, | |||||
| const ArithmeticParameter *param); | |||||
| int ElementOptDiv(const float *input0, const float *input1, float *output, const int element_size, | |||||
| const ArithmeticParameter *param); | |||||
| int ElementOptDivRelu(const float *input0, const float *input1, float *output, const int element_size, | |||||
| const ArithmeticParameter *param); | |||||
| int ElementOptDivRelu6(const float *input0, const float *input1, float *output, const int element_size, | |||||
| const ArithmeticParameter *param); | |||||
| int ElementMul(const float *input0, const float *input1, float *output, const int element_size); | |||||
| int ElementMulRelu(const float *input0, const float *input1, float *output, const int element_size); | |||||
| int ElementMulRelu6(const float *input0, const float *input1, float *output, const int element_size); | |||||
| int ElementMulInt(const int *input0, const int *input1, int *output, const int element_size); | |||||
| int ElementMulReluInt(const int *input0, const int *input1, int *output, const int element_size); | |||||
| int ElementMulRelu6Int(const int *input0, const int *input1, int *output, const int element_size); | |||||
| int BroadcastMul(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output, int element_size, | int BroadcastMul(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output, int element_size, | ||||
| ArithmeticParameter *param); | ArithmeticParameter *param); | ||||
| int ElementAdd(float *input0, float *input1, float *output, int element_size); | |||||
| int ElementAddRelu(float *input0, float *input1, float *output, int element_size); | |||||
| int ElementAddRelu6(float *input0, float *input1, float *output, int element_size); | |||||
| int ElementAddInt(int *input0, int *input1, int *output, int element_size); | |||||
| int ElementAdd(const float *input0, const float *input1, float *output, const int element_size); | |||||
| int ElementAddRelu(const float *input0, const float *input1, float *output, const int element_size); | |||||
| int ElementAddRelu6(const float *input0, const float *input1, float *output, const int element_size); | |||||
| int ElementAddInt(const int *input0, const int *input1, int *output, const int element_size); | |||||
| int BroadcastAdd(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output, int element_size, | int BroadcastAdd(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output, int element_size, | ||||
| ArithmeticParameter *param); | ArithmeticParameter *param); | ||||
| int BroadcastAddInt8(int8_t *input0, int8_t *input1, int8_t *tile_input0, int8_t *tile_input1, int8_t *output, | int BroadcastAddInt8(int8_t *input0, int8_t *input1, int8_t *tile_input0, int8_t *tile_input1, int8_t *output, | ||||
| int element_size, ArithmeticParameter *param); | int element_size, ArithmeticParameter *param); | ||||
| int ElementSub(float *input0, float *input1, float *output, int element_size); | |||||
| int ElementSubRelu(float *input0, float *input1, float *output, int element_size); | |||||
| int ElementSubRelu6(float *input0, float *input1, float *output, int element_size); | |||||
| int ElementSub(const float *input0, const float *input1, float *output, const int element_size); | |||||
| int ElementSubRelu(const float *input0, const float *input1, float *output, const int element_size); | |||||
| int ElementSubRelu6(const float *input0, const float *input1, float *output, const int element_size); | |||||
| int BroadcastSub(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output, int element_size, | int BroadcastSub(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output, int element_size, | ||||
| ArithmeticParameter *param); | ArithmeticParameter *param); | ||||
| int ElementDiv(float *input0, float *input1, float *output, int element_size); | |||||
| int ElementDivRelu(float *input0, float *input1, float *output, int element_size); | |||||
| int ElementDivRelu6(float *input0, float *input1, float *output, int element_size); | |||||
| int ElementDiv(const float *input0, const float *input1, float *output, const int element_size); | |||||
| int ElementDivRelu(const float *input0, const float *input1, float *output, const int element_size); | |||||
| int ElementDivRelu6(const float *input0, const float *input1, float *output, const int element_size); | |||||
| int BroadcastDiv(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output, int element_size, | int BroadcastDiv(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output, int element_size, | ||||
| ArithmeticParameter *param); | ArithmeticParameter *param); | ||||
| int ElementLogicalAnd(float *input0, float *input1, float *output, int element_size); | |||||
| int ElementLogicalAnd(const float *input0, const float *input1, float *output, const int element_size); | |||||
| int BroadcastLogicalAnd(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output, | int BroadcastLogicalAnd(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output, | ||||
| int element_size, ArithmeticParameter *param); | int element_size, ArithmeticParameter *param); | ||||
| int ElementLogicalOr(float *input0, float *input1, float *output, int element_size); | |||||
| int ElementLogicalOr(const float *input0, const float *input1, float *output, const int element_size); | |||||
| int BroadcastLogicalOr(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output, | int BroadcastLogicalOr(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output, | ||||
| int element_size, ArithmeticParameter *param); | int element_size, ArithmeticParameter *param); | ||||
| int ElementMaximum(float *input0, float *input1, float *output, int element_size); | |||||
| int ElementMaximum(const float *input0, const float *input1, float *output, const int element_size); | |||||
| int BroadcastMaximum(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output, | int BroadcastMaximum(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output, | ||||
| int element_size, ArithmeticParameter *param); | int element_size, ArithmeticParameter *param); | ||||
| int ElementMinimum(float *input0, float *input1, float *output, int element_size); | |||||
| int ElementMinimum(const float *input0, const float *input1, float *output, const int element_size); | |||||
| int BroadcastMinimum(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output, | int BroadcastMinimum(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output, | ||||
| int element_size, ArithmeticParameter *param); | int element_size, ArithmeticParameter *param); | ||||
| int ElementFloorDiv(float *input0, float *input1, float *output, int element_size); | |||||
| int ElementFloorDivInt(int *input0, int *input1, int *output, int element_size); | |||||
| int ElementFloorDiv(const float *input0, const float *input1, float *output, const int element_size); | |||||
| int ElementFloorDivInt(const int *input0, const int *input1, int *output, const int element_size); | |||||
| int BroadcastFloorDiv(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output, | int BroadcastFloorDiv(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output, | ||||
| int element_size, ArithmeticParameter *param); | int element_size, ArithmeticParameter *param); | ||||
| int ElementFloorMod(float *input0, float *input1, float *output, int element_size); | |||||
| int ElementFloorModInt(int *input0, int *input1, int *output, int element_size); | |||||
| int ElementFloorMod(const float *input0, const float *input1, float *output, const int element_size); | |||||
| int ElementFloorModInt(const int *input0, const int *input1, int *output, const int element_size); | |||||
| int BroadcastFloorMod(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output, | int BroadcastFloorMod(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output, | ||||
| int element_size, ArithmeticParameter *param); | int element_size, ArithmeticParameter *param); | ||||
| int ElementSquaredDifference(float *input0, float *input1, float *output, int element_size); | |||||
| int ElementSquaredDifference(const float *input0, const float *input1, float *output, const int element_size); | |||||
| int BroadcastSquaredDifference(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output, | int BroadcastSquaredDifference(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output, | ||||
| int element_size, ArithmeticParameter *param); | int element_size, ArithmeticParameter *param); | ||||
| int ElementNotEqual(float *input0, float *input1, float *output, int element_size); | |||||
| int ElementNotEqual(const float *input0, const float *input1, float *output, const int element_size); | |||||
| int BroadcastNotEqual(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output, | int BroadcastNotEqual(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output, | ||||
| int element_size, ArithmeticParameter *param); | int element_size, ArithmeticParameter *param); | ||||
| int ElementEqual(float *input0, float *input1, float *output, int element_size); | |||||
| int ElementEqual(const float *input0, const float *input1, float *output, const int element_size); | |||||
| int BroadcastEqual(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output, | int BroadcastEqual(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output, | ||||
| int element_size, ArithmeticParameter *param); | int element_size, ArithmeticParameter *param); | ||||
| int ElementLess(float *input0, float *input1, float *output, int element_size); | |||||
| int ElementLess(const float *input0, const float *input1, float *output, const int element_size); | |||||
| int BroadcastLess(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output, int element_size, | int BroadcastLess(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output, int element_size, | ||||
| ArithmeticParameter *param); | ArithmeticParameter *param); | ||||
| int ElementLessEqual(float *input0, float *input1, float *output, int element_size); | |||||
| int ElementLessEqual(const float *input0, const float *input1, float *output, const int element_size); | |||||
| int BroadcastLessEqual(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output, | int BroadcastLessEqual(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output, | ||||
| int element_size, ArithmeticParameter *param); | int element_size, ArithmeticParameter *param); | ||||
| int ElementGreater(float *input0, float *input1, float *output, int element_size); | |||||
| int ElementGreater(const float *input0, const float *input1, float *output, const int element_size); | |||||
| int BroadcastGreater(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output, | int BroadcastGreater(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output, | ||||
| int element_size, ArithmeticParameter *param); | int element_size, ArithmeticParameter *param); | ||||
| int ElementGreaterEqual(float *input0, float *input1, float *output, int element_size); | |||||
| int ElementGreaterEqual(const float *input0, const float *input1, float *output, const int element_size); | |||||
| int BroadcastGreaterEqual(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output, | int BroadcastGreaterEqual(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output, | ||||
| int element_size, ArithmeticParameter *param); | int element_size, ArithmeticParameter *param); | ||||
| @@ -18,7 +18,6 @@ | |||||
| #include <math.h> | #include <math.h> | ||||
| #include "nnacl/batchnorm_parameter.h" | #include "nnacl/batchnorm_parameter.h" | ||||
| #include "nnacl/op_base.h" | #include "nnacl/op_base.h" | ||||
| #include "nnacl/errorcode.h" | |||||
| void BatchNormFp32(const void *input, const void *mean, const void *variance, BatchNormParameter *param, int task_id, | void BatchNormFp32(const void *input, const void *mean, const void *variance, BatchNormParameter *param, int task_id, | ||||
| void *output) { | void *output) { | ||||
| @@ -21,7 +21,7 @@ | |||||
| #include "nnacl/fp32/matmul.h" | #include "nnacl/fp32/matmul.h" | ||||
| // fp32 conv common | // fp32 conv common | ||||
| void ConvFp32(float *input_data, float *packed_input, float *packed_weight, const float *bias_data, | |||||
| void ConvFp32(float *input_data, float *packed_input, const float *packed_weight, const float *bias_data, | |||||
| float *col_major_input, float *output_data, int task_id, ConvParameter *conv_param) { | float *col_major_input, float *output_data, int task_id, ConvParameter *conv_param) { | ||||
| int kernel_h = conv_param->kernel_h_; | int kernel_h = conv_param->kernel_h_; | ||||
| int kernel_w = conv_param->kernel_w_; | int kernel_w = conv_param->kernel_w_; | ||||
| @@ -70,7 +70,7 @@ void ConvFp32(float *input_data, float *packed_input, float *packed_weight, cons | |||||
| } | } | ||||
| // fp32 conv winograd | // fp32 conv winograd | ||||
| void ConvWinogardFp32(float *input_data, float *trans_weight, const float *bias_data, float *output_data, | |||||
| void ConvWinogardFp32(float *input_data, const float *trans_weight, const float *bias_data, float *output_data, | |||||
| TmpBufferAddress *buffer_list, int task_id, ConvParameter *conv_param, InputTransFunc in_func, | TmpBufferAddress *buffer_list, int task_id, ConvParameter *conv_param, InputTransFunc in_func, | ||||
| OutputTransFunc out_func) { | OutputTransFunc out_func) { | ||||
| int thread_num = conv_param->thread_num_; | int thread_num = conv_param->thread_num_; | ||||
| @@ -34,11 +34,11 @@ extern "C" { | |||||
| #endif | #endif | ||||
| // fp32 convolution common (im2col+gemm) | // fp32 convolution common (im2col+gemm) | ||||
| void ConvFp32(float *input_data, float *packed_input, float *packed_weight, const float *bias_data, | |||||
| void ConvFp32(float *input_data, float *packed_input, const float *packed_weight, const float *bias_data, | |||||
| float *col_major_input, float *output_data, int task_id, ConvParameter *conv_param); | float *col_major_input, float *output_data, int task_id, ConvParameter *conv_param); | ||||
| // fp32 convolution winograd | // fp32 convolution winograd | ||||
| void ConvWinogardFp32(float *input_data, float *trans_weight, const float *bias_data, float *output_data, | |||||
| void ConvWinogardFp32(float *input_data, const float *trans_weight, const float *bias_data, float *output_data, | |||||
| TmpBufferAddress *buffer_list, int task_id, ConvParameter *conv_param, InputTransFunc in_func, | TmpBufferAddress *buffer_list, int task_id, ConvParameter *conv_param, InputTransFunc in_func, | ||||
| OutputTransFunc out_func); | OutputTransFunc out_func); | ||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| @@ -56,7 +56,9 @@ int PackDeConvWgDataFp32(float *nhwc_weight, DeConvComputeUnit *unit, ConvParame | |||||
| /* winograd AT */ | /* winograd AT */ | ||||
| unit->winograd_.AT_ = malloc(unit->winograd_.i_ * unit->winograd_.o_ * sizeof(float)); | unit->winograd_.AT_ = malloc(unit->winograd_.i_ * unit->winograd_.o_ * sizeof(float)); | ||||
| if (unit->winograd_.AT_ == NULL) { | if (unit->winograd_.AT_ == NULL) { | ||||
| free(current_unit_weight); | |||||
| if (current_unit_weight != NULL) { | |||||
| free(current_unit_weight); | |||||
| } | |||||
| return NNACL_NULL_PTR; | return NNACL_NULL_PTR; | ||||
| } | } | ||||
| memcpy(unit->winograd_.AT_, matrix_at, unit->winograd_.i_ * unit->winograd_.o_ * sizeof(float)); | memcpy(unit->winograd_.AT_, matrix_at, unit->winograd_.i_ * unit->winograd_.o_ * sizeof(float)); | ||||
| @@ -64,8 +66,12 @@ int PackDeConvWgDataFp32(float *nhwc_weight, DeConvComputeUnit *unit, ConvParame | |||||
| /* winograd BT */ | /* winograd BT */ | ||||
| unit->winograd_.BT_ = malloc(unit->winograd_.o_ * unit->winograd_.o_ * sizeof(float)); | unit->winograd_.BT_ = malloc(unit->winograd_.o_ * unit->winograd_.o_ * sizeof(float)); | ||||
| if (unit->winograd_.BT_ == NULL) { | if (unit->winograd_.BT_ == NULL) { | ||||
| free(current_unit_weight); | |||||
| free(unit->winograd_.AT_); | |||||
| if (current_unit_weight != NULL) { | |||||
| free(current_unit_weight); | |||||
| } | |||||
| if (unit->winograd_.AT_ != NULL) { | |||||
| free(unit->winograd_.AT_); | |||||
| } | |||||
| return NNACL_NULL_PTR; | return NNACL_NULL_PTR; | ||||
| } | } | ||||
| memcpy(unit->winograd_.BT_, matrix_bt, unit->winograd_.o_ * unit->winograd_.o_ * sizeof(float)); | memcpy(unit->winograd_.BT_, matrix_bt, unit->winograd_.o_ * unit->winograd_.o_ * sizeof(float)); | ||||
| @@ -74,9 +80,15 @@ int PackDeConvWgDataFp32(float *nhwc_weight, DeConvComputeUnit *unit, ConvParame | |||||
| size = conv_param->input_channel_ * conv_param->output_channel_ * unit->winograd_.kh_ * unit->winograd_.kw_; | size = conv_param->input_channel_ * conv_param->output_channel_ * unit->winograd_.kh_ * unit->winograd_.kw_; | ||||
| float *winograd_unit_weight = (float *)malloc(size * sizeof(float)); | float *winograd_unit_weight = (float *)malloc(size * sizeof(float)); | ||||
| if (winograd_unit_weight == NULL) { | if (winograd_unit_weight == NULL) { | ||||
| free(current_unit_weight); | |||||
| free(unit->winograd_.AT_); | |||||
| free(unit->winograd_.BT_); | |||||
| if (current_unit_weight != NULL) { | |||||
| free(current_unit_weight); | |||||
| } | |||||
| if (unit->winograd_.AT_ != NULL) { | |||||
| free(unit->winograd_.AT_); | |||||
| } | |||||
| if (unit->winograd_.BT_ != NULL) { | |||||
| free(unit->winograd_.BT_); | |||||
| } | |||||
| return NNACL_NULL_PTR; | return NNACL_NULL_PTR; | ||||
| } | } | ||||
| WinogradWeightTransform(current_unit_weight, winograd_unit_weight, matrix_g, matrix_gt, C4NUM, unit->winograd_.kh_, | WinogradWeightTransform(current_unit_weight, winograd_unit_weight, matrix_g, matrix_gt, C4NUM, unit->winograd_.kh_, | ||||
| @@ -105,7 +117,9 @@ int PackDeConvWgDataFp32(float *nhwc_weight, DeConvComputeUnit *unit, ConvParame | |||||
| } | } | ||||
| } | } | ||||
| free(current_unit_weight); | |||||
| if (current_unit_weight != NULL) { | |||||
| free(current_unit_weight); | |||||
| } | |||||
| return NNACL_OK; | return NNACL_OK; | ||||
| } | } | ||||
| @@ -317,7 +331,7 @@ void DeConvWgMerge(const float *src, float *dst, size_t src_stride, size_t dst_s | |||||
| return; | return; | ||||
| } | } | ||||
| void _deConvWinograd(float *tile_in, float *tile_out, float *weight_buf, float *tmp_buf, float *at_buf, | |||||
| void _deConvWinograd(const float *tile_in, float *tile_out, float *weight_buf, float *tmp_buf, float *at_buf, | |||||
| float *a_mid_buf, float *trans_a_buf, bool *transfered, float *bt_buf, float *b_tmp_buf, | float *a_mid_buf, float *trans_a_buf, bool *transfered, float *bt_buf, float *b_tmp_buf, | ||||
| int unit_size, int w_start, int h_start, ConvParameter *conv_param, DeConvParam *deconv_param) { | int unit_size, int w_start, int h_start, ConvParameter *conv_param, DeConvParam *deconv_param) { | ||||
| int winograd_plane = unit_size * unit_size; | int winograd_plane = unit_size * unit_size; | ||||
| @@ -357,8 +371,8 @@ void _deConvWinograd(float *tile_in, float *tile_out, float *weight_buf, float * | |||||
| return; | return; | ||||
| } | } | ||||
| void _deConvCommon(float *tile_in, float *tile_out, float *weight, float *tmp_buf, int h_start, int w_start, int h_size, | |||||
| int w_size, ConvParameter *conv_param, DeConvParam *deconv_param) { | |||||
| void _deConvCommon(float *tile_in, float *tile_out, const float *weight, float *tmp_buf, int h_start, int w_start, | |||||
| int h_size, int w_size, ConvParameter *conv_param, DeConvParam *deconv_param) { | |||||
| int count = deconv_param->oc_div4_ * w_size * h_size; | int count = deconv_param->oc_div4_ * w_size * h_size; | ||||
| int in_stride = DECONV_WINOGRAD_DEFAULT_TILE * deconv_param->ic_up4_; | int in_stride = DECONV_WINOGRAD_DEFAULT_TILE * deconv_param->ic_up4_; | ||||
| int out_stride = DECONV_WINOGRAD_DEFAULT_TILE * deconv_param->oc_up4_; | int out_stride = DECONV_WINOGRAD_DEFAULT_TILE * deconv_param->oc_up4_; | ||||
| @@ -274,9 +274,9 @@ int NmsMultiClassesFast(const int num_boxes, const int num_classes_with_bg, cons | |||||
| return output_num; | return output_num; | ||||
| } | } | ||||
| int DetectionPostProcess(const int num_boxes, const int num_classes_with_bg, float *input_boxes, float *input_scores, | |||||
| float *input_anchors, float *output_boxes, float *output_classes, float *output_scores, | |||||
| float *output_num, DetectionPostProcessParameter *param) { | |||||
| int DetectionPostProcess(const int num_boxes, const int num_classes_with_bg, float *input_boxes, | |||||
| const float *input_scores, float *input_anchors, float *output_boxes, float *output_classes, | |||||
| float *output_scores, float *output_num, DetectionPostProcessParameter *param) { | |||||
| BboxCenter scaler; | BboxCenter scaler; | ||||
| scaler.y = param->y_scale_; | scaler.y = param->y_scale_; | ||||
| scaler.x = param->x_scale_; | scaler.x = param->x_scale_; | ||||
| @@ -43,9 +43,9 @@ typedef struct { | |||||
| extern "C" { | extern "C" { | ||||
| #endif | #endif | ||||
| int DetectionPostProcess(const int num_boxes, const int num_classes_with_bg, float *input_boxes, float *input_scores, | |||||
| float *input_anchors, float *output_boxes, float *output_classes, float *output_scores, | |||||
| float *output_num, DetectionPostProcessParameter *param); | |||||
| int DetectionPostProcess(const int num_boxes, const int num_classes_with_bg, float *input_boxes, | |||||
| const float *input_scores, float *input_anchors, float *output_boxes, float *output_classes, | |||||
| float *output_scores, float *output_num, DetectionPostProcessParameter *param); | |||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| } | } | ||||
| #endif | #endif | ||||
| @@ -18,7 +18,7 @@ | |||||
| #include <math.h> | #include <math.h> | ||||
| #include "nnacl/errorcode.h" | #include "nnacl/errorcode.h" | ||||
| void Calculate_Data(float *input_data, float *output_data, int num, EluParameter *parameter) { | |||||
| void Calculate_Data(const float *input_data, float *output_data, int num, EluParameter *parameter) { | |||||
| output_data[num] = input_data[num] < 0 ? parameter->alpha_ * expm1(input_data[num]) : input_data[num]; | output_data[num] = input_data[num] < 0 ? parameter->alpha_ * expm1(input_data[num]) : input_data[num]; | ||||
| } | } | ||||
| @@ -18,7 +18,7 @@ | |||||
| #include <math.h> | #include <math.h> | ||||
| #include "nnacl/errorcode.h" | #include "nnacl/errorcode.h" | ||||
| int Exp(float *input_data, float *output_data, ExpParameter *parameter, int task_id) { | |||||
| int Exp(const float *input_data, float *output_data, ExpParameter *parameter, int task_id) { | |||||
| if (parameter->scale_ == 1) { | if (parameter->scale_ == 1) { | ||||
| for (size_t i = task_id; i < parameter->element_num_; i += parameter->thread_num_) { | for (size_t i = task_id; i < parameter->element_num_; i += parameter->thread_num_) { | ||||
| output_data[i] = expf(input_data[i]); | output_data[i] = expf(input_data[i]); | ||||
| @@ -33,7 +33,7 @@ typedef struct ExpParameter { | |||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| extern "C" { | extern "C" { | ||||
| #endif | #endif | ||||
| int Exp(float *input_data, float *output_data, ExpParameter *parameter, int task_id); | |||||
| int Exp(const float *input_data, float *output_data, ExpParameter *parameter, int task_id); | |||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| } | } | ||||
| #endif | #endif | ||||
| @@ -18,7 +18,7 @@ | |||||
| #include <string.h> | #include <string.h> | ||||
| #include "nnacl/errorcode.h" | #include "nnacl/errorcode.h" | ||||
| int ExpandDims(void *input_ptr, void *output_ptr, size_t data_size) { | |||||
| int ExpandDims(const void *input_ptr, void *output_ptr, size_t data_size) { | |||||
| memcpy(output_ptr, input_ptr, data_size); | memcpy(output_ptr, input_ptr, data_size); | ||||
| return NNACL_OK; | return NNACL_OK; | ||||
| } | } | ||||
| @@ -27,7 +27,7 @@ typedef struct ExpandDimsParameter { | |||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| extern "C" { | extern "C" { | ||||
| #endif | #endif | ||||
| int ExpandDims(void *input_ptr, void *output_ptr, size_t data_size); | |||||
| int ExpandDims(const void *input_ptr, void *output_ptr, size_t data_size); | |||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| } | } | ||||
| #endif | #endif | ||||
| @@ -18,7 +18,7 @@ | |||||
| #include <string.h> | #include <string.h> | ||||
| #include "nnacl/errorcode.h" | #include "nnacl/errorcode.h" | ||||
| inline int Stride(int *shape, int rank, int index) { | |||||
| inline int Stride(const int *shape, int rank, int index) { | |||||
| int i, stride = 1; | int i, stride = 1; | ||||
| for (i = index + 1; i < rank; ++i) { | for (i = index + 1; i < rank; ++i) { | ||||
| stride *= shape[i]; | stride *= shape[i]; | ||||
| @@ -26,7 +26,7 @@ inline int Stride(int *shape, int rank, int index) { | |||||
| return stride; | return stride; | ||||
| } | } | ||||
| int Gather(float *input, int outer_size, int inner_size, int limit, int *indices, int indices_element_size, | |||||
| int Gather(float *input, int outer_size, int inner_size, int limit, const int *indices, int indices_element_size, | |||||
| float *output) { | float *output) { | ||||
| int i, m; | int i, m; | ||||
| for (m = 0; m < outer_size; ++m) { | for (m = 0; m < outer_size; ++m) { | ||||
| @@ -42,7 +42,7 @@ int Gather(float *input, int outer_size, int inner_size, int limit, int *indices | |||||
| return NNACL_OK; | return NNACL_OK; | ||||
| } | } | ||||
| int GatherInt32(const int32_t *input, int outer_size, int inner_size, int limit, int *indices, | |||||
| int GatherInt32(const int32_t *input, int outer_size, int inner_size, int limit, const int *indices, | |||||
| int indices_element_size, int32_t *output) { | int indices_element_size, int32_t *output) { | ||||
| int i, m; | int i, m; | ||||
| for (m = 0; m < outer_size; ++m) { | for (m = 0; m < outer_size; ++m) { | ||||
| @@ -22,10 +22,10 @@ | |||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| extern "C" { | extern "C" { | ||||
| #endif | #endif | ||||
| int Gather(float *input, int outer_size, int inner_size, int limit, int *indices, int indices_element_size, | |||||
| int Gather(float *input, int outer_size, int inner_size, int limit, const int *indices, int indices_element_size, | |||||
| float *output); | float *output); | ||||
| int GatherInt32(const int32_t *input, int outer_size, int inner_size, int limit, int *indices, int indices_element_size, | |||||
| int32_t *output); | |||||
| int GatherInt32(const int32_t *input, int outer_size, int inner_size, int limit, const int *indices, | |||||
| int indices_element_size, int32_t *output); | |||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| } | } | ||||
| #endif | #endif | ||||
| @@ -18,7 +18,7 @@ | |||||
| #include <string.h> | #include <string.h> | ||||
| #include "nnacl/errorcode.h" | #include "nnacl/errorcode.h" | ||||
| int GatherNd(float *input, float *output, int *in_offset, int area, int count) { | |||||
| int GatherNd(const float *input, float *output, int *in_offset, int area, int count) { | |||||
| int i = 0; | int i = 0; | ||||
| for (i = 0; i < count; i++) { | for (i = 0; i < count; i++) { | ||||
| (void)memcpy(output + area * i, input + in_offset[i], area * sizeof(float)); | (void)memcpy(output + area * i, input + in_offset[i], area * sizeof(float)); | ||||
| @@ -27,7 +27,7 @@ typedef struct GatherNdParameter { | |||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| extern "C" { | extern "C" { | ||||
| #endif | #endif | ||||
| int GatherNd(float *input, float *output, int *in_offset, int area, int count); | |||||
| int GatherNd(const float *input, float *output, int *in_offset, int area, int count); | |||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| } | } | ||||
| #endif | #endif | ||||
| @@ -79,13 +79,13 @@ void ElementMulAcc(const float *input0, const float *input1, float *output, int | |||||
| } | } | ||||
| } | } | ||||
| void UpdataState(float *cell_state, float *forget_gate, float *input_gate, float *cell_gate, int batch, | |||||
| void UpdataState(float *cell_state, float *forget_gate, const float *input_gate, float *cell_gate, int batch, | |||||
| int hidden_size) { | int hidden_size) { | ||||
| ElementMul(forget_gate, cell_state, cell_state, batch * hidden_size); | ElementMul(forget_gate, cell_state, cell_state, batch * hidden_size); | ||||
| ElementMulAcc(input_gate, cell_gate, cell_state, batch * hidden_size); | ElementMulAcc(input_gate, cell_gate, cell_state, batch * hidden_size); | ||||
| } | } | ||||
| void UpdataOutput(float *cell_state, float *output_gate, float *hidden_state, int batch, int hidden_size) { | |||||
| void UpdataOutput(const float *cell_state, float *output_gate, float *hidden_state, int batch, int hidden_size) { | |||||
| Tanh(cell_state, batch * hidden_size, hidden_state); | Tanh(cell_state, batch * hidden_size, hidden_state); | ||||
| ElementMul(hidden_state, output_gate, hidden_state, batch * hidden_size); | ElementMul(hidden_state, output_gate, hidden_state, batch * hidden_size); | ||||
| } | } | ||||
| @@ -16,7 +16,7 @@ | |||||
| #include "nnacl/fp32/matmul.h" | #include "nnacl/fp32/matmul.h" | ||||
| void RowMajor2ColMajor(float *src_ptr, float *dst_ptr, int row, int col) { | |||||
| void RowMajor2ColMajor(const float *src_ptr, float *dst_ptr, int row, int col) { | |||||
| for (int r = 0; r < row; ++r) { | for (int r = 0; r < row; ++r) { | ||||
| for (int c = 0; c < col; ++c) { | for (int c = 0; c < col; ++c) { | ||||
| dst_ptr[c * row + r] = src_ptr[r * col + c]; | dst_ptr[c * row + r] = src_ptr[r * col + c]; | ||||
| @@ -29,7 +29,7 @@ extern "C" { | |||||
| void MatMulOpt(const float *a, const float *b, float *c, const float *bias, ActType act_type, int deep, int row, | void MatMulOpt(const float *a, const float *b, float *c, const float *bias, ActType act_type, int deep, int row, | ||||
| int col, size_t stride, int out_type); | int col, size_t stride, int out_type); | ||||
| void MatVecMul(const float *a, const float *b, float *c, const float *bias, ActType act_type, int depth, int col); | void MatVecMul(const float *a, const float *b, float *c, const float *bias, ActType act_type, int depth, int col); | ||||
| void RowMajor2ColMajor(float *src_ptr, float *dst_ptr, int row, int col); | |||||
| void RowMajor2ColMajor(const float *src_ptr, float *dst_ptr, int row, int col); | |||||
| void RowMajor2Row4Major(float *src_ptr, float *dst_ptr, int row, int col); | void RowMajor2Row4Major(float *src_ptr, float *dst_ptr, int row, int col); | ||||
| void RowMajor2Row8Major(float *src_ptr, float *dst_ptr, int row, int col); | void RowMajor2Row8Major(float *src_ptr, float *dst_ptr, int row, int col); | ||||
| void RowMajor2Row12Major(float *src_ptr, float *dst_ptr, int row, int col); | void RowMajor2Row12Major(float *src_ptr, float *dst_ptr, int row, int col); | ||||
| @@ -65,7 +65,7 @@ int PrepareResizeBilinear(const int *input_shape, const int *output_shape, bool | |||||
| } | } | ||||
| int ResizeBilinear(const float *input_data, float *output_data, const int *input_shape, const int *output_shape, | int ResizeBilinear(const float *input_data, float *output_data, const int *input_shape, const int *output_shape, | ||||
| int *y_bottoms, int *y_tops, int *x_lefts, int *x_rights, float *y_bottom_weights, | |||||
| int *y_bottoms, const int *y_tops, int *x_lefts, int *x_rights, float *y_bottom_weights, | |||||
| float *x_left_weights, int n_h_begin, int n_h_end) { | float *x_left_weights, int n_h_begin, int n_h_end) { | ||||
| if (input_data == NULL || output_data == NULL || input_shape == NULL || output_shape == NULL || y_bottoms == NULL || | if (input_data == NULL || output_data == NULL || input_shape == NULL || output_shape == NULL || y_bottoms == NULL || | ||||
| y_tops == NULL || x_lefts == NULL || x_rights == NULL || y_bottom_weights == NULL || x_left_weights == NULL) { | y_tops == NULL || x_lefts == NULL || x_rights == NULL || y_bottom_weights == NULL || x_left_weights == NULL) { | ||||
| @@ -155,7 +155,7 @@ int ResizeBilinear(const float *input_data, float *output_data, const int *input | |||||
| } | } | ||||
| int InterpRow(const float *src_line, float *linear_output, int new_width, float *x_left_weights, int *x_lefts, | int InterpRow(const float *src_line, float *linear_output, int new_width, float *x_left_weights, int *x_lefts, | ||||
| int *x_rights, int in_c) { | |||||
| const int *x_rights, int in_c) { | |||||
| int w; | int w; | ||||
| for (w = 0; w < new_width; w++) { | for (w = 0; w < new_width; w++) { | ||||
| int c = 0; | int c = 0; | ||||
| @@ -208,7 +208,7 @@ int InterpCol(const float *bottom_line, const float *top_line, float *output, in | |||||
| } | } | ||||
| int ResizeBilinear2(const float *input_data, float *output_data, const int *input_shape, const int *output_shape, | int ResizeBilinear2(const float *input_data, float *output_data, const int *input_shape, const int *output_shape, | ||||
| int *y_bottoms, int *y_tops, int *x_lefts, int *x_rights, float *y_bottom_weights, | |||||
| int *y_bottoms, const int *y_tops, int *x_lefts, int *x_rights, float *y_bottom_weights, | |||||
| float *x_left_weights, float *line0, float *line1, int n_h_begin, int n_h_end) { | float *x_left_weights, float *line0, float *line1, int n_h_begin, int n_h_end) { | ||||
| if (input_data == NULL || output_data == NULL || input_shape == NULL || output_shape == NULL || y_bottoms == NULL || | if (input_data == NULL || output_data == NULL || input_shape == NULL || output_shape == NULL || y_bottoms == NULL || | ||||
| y_tops == NULL || x_lefts == NULL || x_rights == NULL || y_bottom_weights == NULL || x_left_weights == NULL) { | y_tops == NULL || x_lefts == NULL || x_rights == NULL || y_bottom_weights == NULL || x_left_weights == NULL) { | ||||
| @@ -30,11 +30,11 @@ int PrepareResizeBilinear(const int *input_shape, const int *output_shape, bool | |||||
| int *y_tops, int *x_lefts, int *x_rights, float *y_bottom_weights, float *x_left_weights); | int *y_tops, int *x_lefts, int *x_rights, float *y_bottom_weights, float *x_left_weights); | ||||
| int ResizeBilinear(const float *input_data, float *output_data, const int *input_shape, const int *output_shape, | int ResizeBilinear(const float *input_data, float *output_data, const int *input_shape, const int *output_shape, | ||||
| int *y_bottoms, int *y_tops, int *x_lefts, int *x_rights, float *y_bottom_weights, | |||||
| int *y_bottoms, const int *y_tops, int *x_lefts, int *x_rights, float *y_bottom_weights, | |||||
| float *x_left_weights, int n_h_begin, int n_h_end); | float *x_left_weights, int n_h_begin, int n_h_end); | ||||
| int ResizeBilinear2(const float *input_data, float *output_data, const int *input_shape, const int *output_shape, | int ResizeBilinear2(const float *input_data, float *output_data, const int *input_shape, const int *output_shape, | ||||
| int *y_bottoms, int *y_tops, int *x_lefts, int *x_rights, float *y_bottom_weights, | |||||
| int *y_bottoms, const int *y_tops, int *x_lefts, int *x_rights, float *y_bottom_weights, | |||||
| float *x_left_weights, float *line0, float *line1, int n_h_begin, int n_h_end); | float *x_left_weights, float *line0, float *line1, int n_h_begin, int n_h_end); | ||||
| int ResizeNearestNeighbor(const float *input_data, float *output_data, const int *input_shape, const int *output_shape, | int ResizeNearestNeighbor(const float *input_data, float *output_data, const int *input_shape, const int *output_shape, | ||||
| @@ -20,7 +20,7 @@ | |||||
| #include "nnacl/errorcode.h" | #include "nnacl/errorcode.h" | ||||
| #include "nnacl/op_base.h" | #include "nnacl/op_base.h" | ||||
| int ROIPooling(float *in_ptr, float *out_ptr, float *roi, float *max_c, int tid, ROIPoolingParameter *param) { | |||||
| int ROIPooling(float *in_ptr, float *out_ptr, const float *roi, float *max_c, int tid, ROIPoolingParameter *param) { | |||||
| int num_rois = param->output_n_; | int num_rois = param->output_n_; | ||||
| int units = UP_DIV(num_rois, param->thread_num_); | int units = UP_DIV(num_rois, param->thread_num_); | ||||
| int roi_st = tid * units; | int roi_st = tid * units; | ||||
| @@ -40,7 +40,7 @@ typedef struct ROIPoolingParameter { | |||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| extern "C" { | extern "C" { | ||||
| #endif | #endif | ||||
| int ROIPooling(float *in_ptr, float *out_ptr, float *roi, float *max_c, int tid, ROIPoolingParameter *param); | |||||
| int ROIPooling(float *in_ptr, float *out_ptr, const float *roi, float *max_c, int tid, ROIPoolingParameter *param); | |||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| } | } | ||||
| #endif | #endif | ||||
| @@ -18,7 +18,7 @@ | |||||
| #ifdef ENABLE_ARM | #ifdef ENABLE_ARM | ||||
| #include <arm_neon.h> | #include <arm_neon.h> | ||||
| #endif | #endif | ||||
| void ScaleInner(float *in_data, float *out_data, float *scale, float *offset, int outer_start, int outer_end, | |||||
| void ScaleInner(float *in_data, float *out_data, const float *scale, float *offset, int outer_start, int outer_end, | |||||
| int axis_size, int inner_size) { | int axis_size, int inner_size) { | ||||
| for (int out = outer_start; out < outer_end; out++) { | for (int out = outer_start; out < outer_end; out++) { | ||||
| int out_offset = out * axis_size * inner_size; | int out_offset = out * axis_size * inner_size; | ||||
| @@ -43,7 +43,7 @@ void ScaleInner(float *in_data, float *out_data, float *scale, float *offset, in | |||||
| } | } | ||||
| } | } | ||||
| void ScaleAxis(float *in_data, float *out_data, float *scale, float *offset, int outer_start, int outer_end, | |||||
| void ScaleAxis(float *in_data, float *out_data, const float *scale, float *offset, int outer_start, int outer_end, | |||||
| int axis_size) { | int axis_size) { | ||||
| for (int out = outer_start; out < outer_end; out++) { | for (int out = outer_start; out < outer_end; out++) { | ||||
| int out_offset = out * axis_size; | int out_offset = out * axis_size; | ||||
| @@ -78,7 +78,7 @@ void DoScale(float *in_data, float *out_data, float *scale, float *offset, int t | |||||
| } | } | ||||
| } | } | ||||
| void ScaleInnerRelu(float *in_data, float *out_data, float *scale, float *offset, int outer_start, int outer_end, | |||||
| void ScaleInnerRelu(float *in_data, float *out_data, const float *scale, float *offset, int outer_start, int outer_end, | |||||
| int axis_size, int inner_size) { | int axis_size, int inner_size) { | ||||
| #ifdef ENABLE_ARM64 | #ifdef ENABLE_ARM64 | ||||
| float32x4_t zeros = {0, 0, 0, 0}; | float32x4_t zeros = {0, 0, 0, 0}; | ||||
| @@ -108,7 +108,7 @@ void ScaleInnerRelu(float *in_data, float *out_data, float *scale, float *offset | |||||
| } | } | ||||
| } | } | ||||
| void ScaleAxisRelu(float *in_data, float *out_data, float *scale, float *offset, int outer_start, int outer_end, | |||||
| void ScaleAxisRelu(float *in_data, float *out_data, const float *scale, float *offset, int outer_start, int outer_end, | |||||
| int axis_size) { | int axis_size) { | ||||
| #ifdef ENABLE_ARM64 | #ifdef ENABLE_ARM64 | ||||
| float32x4_t zeros = {0, 0, 0, 0}; | float32x4_t zeros = {0, 0, 0, 0}; | ||||
| @@ -149,7 +149,7 @@ void DoScaleRelu(float *in_data, float *out_data, float *scale, float *offset, i | |||||
| } | } | ||||
| } | } | ||||
| void ScaleInnerRelu6(float *in_data, float *out_data, float *scale, float *offset, int outer_start, int outer_end, | |||||
| void ScaleInnerRelu6(float *in_data, float *out_data, const float *scale, float *offset, int outer_start, int outer_end, | |||||
| int axis_size, int inner_size) { | int axis_size, int inner_size) { | ||||
| #ifdef ENABLE_ARM64 | #ifdef ENABLE_ARM64 | ||||
| float32x4_t zeros = {0, 0, 0, 0}; | float32x4_t zeros = {0, 0, 0, 0}; | ||||
| @@ -180,7 +180,7 @@ void ScaleInnerRelu6(float *in_data, float *out_data, float *scale, float *offse | |||||
| } | } | ||||
| } | } | ||||
| void ScaleAxisRelu6(float *in_data, float *out_data, float *scale, float *offset, int outer_start, int outer_end, | |||||
| void ScaleAxisRelu6(float *in_data, float *out_data, const float *scale, float *offset, int outer_start, int outer_end, | |||||
| int axis_size) { | int axis_size) { | ||||
| #ifdef ENABLE_ARM64 | #ifdef ENABLE_ARM64 | ||||
| float32x4_t zeros = {0, 0, 0, 0}; | float32x4_t zeros = {0, 0, 0, 0}; | ||||
| @@ -17,7 +17,6 @@ | |||||
| #include "nnacl/fp32/slice.h" | #include "nnacl/fp32/slice.h" | ||||
| #include <string.h> | #include <string.h> | ||||
| #include "nnacl/op_base.h" | #include "nnacl/op_base.h" | ||||
| #include "nnacl/errorcode.h" | |||||
| void PadSliceParameterTo4D(SliceParameter *param) { | void PadSliceParameterTo4D(SliceParameter *param) { | ||||
| int32_t begin[DIMENSION_4D]; | int32_t begin[DIMENSION_4D]; | ||||
| @@ -16,7 +16,7 @@ | |||||
| #include "nnacl/fp32/space_to_batch.h" | #include "nnacl/fp32/space_to_batch.h" | ||||
| #include "nnacl/arithmetic_common.h" | #include "nnacl/arithmetic_common.h" | ||||
| void DoSpaceToBatchNHWC(const float *input, float *output, int *block_sizes, int *in_shape, int *out_shape) { | |||||
| void DoSpaceToBatchNHWC(const float *input, float *output, const int *block_sizes, int *in_shape, int *out_shape) { | |||||
| int out_dim0 = out_shape[0]; | int out_dim0 = out_shape[0]; | ||||
| int out_dim1 = out_shape[1]; | int out_dim1 = out_shape[1]; | ||||
| int out_dim2 = out_shape[2]; | int out_dim2 = out_shape[2]; | ||||
| @@ -45,7 +45,7 @@ void DoSpaceToBatchNHWC(const float *input, float *output, int *block_sizes, int | |||||
| } | } | ||||
| } | } | ||||
| void DoSpaceToBatchPaddingNHWC(const float *input, float *output, int *in_shape, int *padding, int *out_shape) { | |||||
| void DoSpaceToBatchPaddingNHWC(const float *input, float *output, int *in_shape, const int *padding, int *out_shape) { | |||||
| int in_h = in_shape[1]; | int in_h = in_shape[1]; | ||||
| int in_w = in_shape[2]; | int in_w = in_shape[2]; | ||||
| int in_c = in_shape[3]; | int in_c = in_shape[3]; | ||||
| @@ -63,8 +63,8 @@ void DoSpaceToBatchPaddingNHWC(const float *input, float *output, int *in_shape, | |||||
| for (int i = 0; i < in_shape[0]; ++i) { | for (int i = 0; i < in_shape[0]; ++i) { | ||||
| size_t in_offset0 = i * in_strides[0]; | size_t in_offset0 = i * in_strides[0]; | ||||
| for (int pad_h_top = 0; pad_h_top < padding[0]; ++pad_h_top) { | for (int pad_h_top = 0; pad_h_top < padding[0]; ++pad_h_top) { | ||||
| memset(output + out_offset, 0, ped_h_size); | |||||
| out_offset += ped_h_num; | |||||
| memset(output + out_offset, 0, ped_h_size); | |||||
| out_offset += ped_h_num; | |||||
| } | } | ||||
| for (int j = 0; j < in_h; ++j) { | for (int j = 0; j < in_h; ++j) { | ||||
| size_t in_offset1 = in_offset0 + j * in_strides[1]; | size_t in_offset1 = in_offset0 + j * in_strides[1]; | ||||
| @@ -30,8 +30,8 @@ typedef struct SpaceToBatchParameter { | |||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| extern "C" { | extern "C" { | ||||
| #endif | #endif | ||||
| void DoSpaceToBatchNHWC(const float *input, float *output, int *block_sizes, int *in_shape, int *out_shape); | |||||
| void DoSpaceToBatchPaddingNHWC(const float *input, float *output, int *in_shape, int *padding, int *out_shape); | |||||
| void DoSpaceToBatchNHWC(const float *input, float *output, const int *block_sizes, int *in_shape, int *out_shape); | |||||
| void DoSpaceToBatchPaddingNHWC(const float *input, float *output, int *in_shape, const int *padding, int *out_shape); | |||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| } | } | ||||
| #endif | #endif | ||||
| @@ -15,9 +15,8 @@ | |||||
| */ | */ | ||||
| #include "nnacl/fp32/sparse_to_dense.h" | #include "nnacl/fp32/sparse_to_dense.h" | ||||
| void SparseToDense(int **sparse_indices, int *output_shape, | |||||
| float *sparse_values, float default_value, float *output, | |||||
| bool isScalar, int index_start, int index_end, int out_width) { | |||||
| void SparseToDense(int **sparse_indices, int *output_shape, const float *sparse_values, float default_value, | |||||
| float *output, bool isScalar, int index_start, int index_end, int out_width) { | |||||
| for (int i = index_start; i < index_end; i++) { | for (int i = index_start; i < index_end; i++) { | ||||
| for (int j = 0; j < out_width; j++) { | for (int j = 0; j < out_width; j++) { | ||||
| output[i * out_width + j] = default_value; | output[i * out_width + j] = default_value; | ||||
| @@ -31,14 +30,12 @@ void SparseToDense(int **sparse_indices, int *output_shape, | |||||
| int index; | int index; | ||||
| if (isScalar == true) { | if (isScalar == true) { | ||||
| for (int i = index_start; i < index_end; i++) { | for (int i = index_start; i < index_end; i++) { | ||||
| index = d1 * sparse_indices[i][0] + d2 * sparse_indices[i][1] + | |||||
| d3 * sparse_indices[i][2] + sparse_indices[i][3]; | |||||
| index = d1 * sparse_indices[i][0] + d2 * sparse_indices[i][1] + d3 * sparse_indices[i][2] + sparse_indices[i][3]; | |||||
| output[index] = sparse_values[0]; | output[index] = sparse_values[0]; | ||||
| } | } | ||||
| } else { | } else { | ||||
| for (int i = index_start; i < index_end; i++) { | for (int i = index_start; i < index_end; i++) { | ||||
| index = d1 * sparse_indices[i][0] + d2 * sparse_indices[i][1] + | |||||
| d3 * sparse_indices[i][2] + sparse_indices[i][3]; | |||||
| index = d1 * sparse_indices[i][0] + d2 * sparse_indices[i][1] + d3 * sparse_indices[i][2] + sparse_indices[i][3]; | |||||
| output[index] = sparse_values[i]; | output[index] = sparse_values[i]; | ||||
| } | } | ||||
| } | } | ||||
| @@ -21,9 +21,8 @@ | |||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| extern "C" { | extern "C" { | ||||
| #endif | #endif | ||||
| void SparseToDense(int **sparse_indices_vect, int *output_shape, | |||||
| float *sparse_values, float default_value, float *output, | |||||
| bool isScalar, int index_start, int index_end, int out_width); | |||||
| void SparseToDense(int **sparse_indices_vect, int *output_shape, const float *sparse_values, float default_value, | |||||
| float *output, bool isScalar, int index_start, int index_end, int out_width); | |||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| } | } | ||||
| #endif | #endif | ||||
| @@ -17,7 +17,7 @@ | |||||
| #include "nnacl/fp32/tile.h" | #include "nnacl/fp32/tile.h" | ||||
| #include <string.h> | #include <string.h> | ||||
| void DoCopyData(float *input_data, float *output_data, size_t size, size_t multiple) { | |||||
| void DoCopyData(const float *input_data, float *output_data, size_t size, size_t multiple) { | |||||
| float *out_data = output_data; | float *out_data = output_data; | ||||
| for (size_t i = 0; i < multiple; ++i) { | for (size_t i = 0; i < multiple; ++i) { | ||||
| (void)memcpy(out_data, input_data, size * sizeof(float)); | (void)memcpy(out_data, input_data, size * sizeof(float)); | ||||
| @@ -16,7 +16,7 @@ | |||||
| #include "nnacl/fp32/unique.h" | #include "nnacl/fp32/unique.h" | ||||
| int Find(float *array, int len, float target) { | |||||
| int Find(const float *array, int len, float target) { | |||||
| for (int i = 0; i < len; ++i) { | for (int i = 0; i < len; ++i) { | ||||
| if (array[i] == target) { | if (array[i] == target) { | ||||
| return i; | return i; | ||||
| @@ -25,7 +25,7 @@ int Find(float *array, int len, float target) { | |||||
| return -1; | return -1; | ||||
| } | } | ||||
| void Unique(float *input, int input_len, float *output0, int *output0_len, int *output1) { | |||||
| void Unique(const float *input, int input_len, float *output0, int *output0_len, int *output1) { | |||||
| *output0_len = 0; | *output0_len = 0; | ||||
| for (int i = 0; i < input_len; i++) { | for (int i = 0; i < input_len; i++) { | ||||
| int idx = Find(output0, *output0_len, input[i]); | int idx = Find(output0, *output0_len, input[i]); | ||||
| @@ -26,7 +26,7 @@ typedef struct UniqueParameter { | |||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| extern "C" { | extern "C" { | ||||
| #endif | #endif | ||||
| void Unique(float *input, int input_len, float *output0, int *output0_len, int *output1); | |||||
| void Unique(const float *input, int input_len, float *output0, int *output0_len, int *output1); | |||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| } | } | ||||
| #endif | #endif | ||||
| @@ -491,8 +491,8 @@ void ConvDw3x3Int8Pad(int8_t *output_data, const int8_t *input_data, const int16 | |||||
| /*conv depthwise sliding window perchannel int8 begin*/ | /*conv depthwise sliding window perchannel int8 begin*/ | ||||
| void DepthwiseBorderPixelInt8(int8_t *dst, const int8_t *src, const int16_t *weight, const int32_t *bias, int height, | void DepthwiseBorderPixelInt8(int8_t *dst, const int8_t *src, const int16_t *weight, const int32_t *bias, int height, | ||||
| int width, int in_kh_step, int in_kw_step, int kernel_w, int8_t *input_zp, | int width, int in_kh_step, int in_kw_step, int kernel_w, int8_t *input_zp, | ||||
| int32_t *out_zp, int *out_multiplier, int *left_shift, int *right_shift, int32_t *acc_min, | |||||
| int32_t *acc_max) { | |||||
| int32_t *out_zp, int *out_multiplier, int *left_shift, const int *right_shift, | |||||
| int32_t *acc_min, int32_t *acc_max) { | |||||
| int tmp_buffer[C8NUM]; | int tmp_buffer[C8NUM]; | ||||
| for (int i = 0; i < C8NUM; i++) { | for (int i = 0; i < C8NUM; i++) { | ||||
| tmp_buffer[i] = 0; | tmp_buffer[i] = 0; | ||||
| @@ -94,7 +94,7 @@ void ConvInt8(int8_t *input_data, int8_t *packed_input, int8_t *matmul_input, in | |||||
| unit_size = UP_ROUND(kernel_plane * in_channel, C16NUM); | unit_size = UP_ROUND(kernel_plane * in_channel, C16NUM); | ||||
| } | } | ||||
| #endif | #endif | ||||
| bool per_channel; | |||||
| bool per_channel = false; | |||||
| if (conv_param->conv_quant_arg_.per_channel_ & FILTER_PER_CHANNEL) { | if (conv_param->conv_quant_arg_.per_channel_ & FILTER_PER_CHANNEL) { | ||||
| input_sum_offset = tile_n * up_round_oc; | input_sum_offset = tile_n * up_round_oc; | ||||
| per_channel = true; | per_channel = true; | ||||
| @@ -16,7 +16,7 @@ | |||||
| #include "nnacl/int8/depth_to_space_int8.h" | #include "nnacl/int8/depth_to_space_int8.h" | ||||
| #include <string.h> | #include <string.h> | ||||
| void DepthToSpaceForNHWCInt8(const int8_t *input, int8_t *output, int *in_shape, DepthToSpaceParameter *param, | |||||
| void DepthToSpaceForNHWCInt8(const int8_t *input, int8_t *output, const int *in_shape, DepthToSpaceParameter *param, | |||||
| QuantArg *in_quant_arg, QuantArg *out_quant_arg) { | QuantArg *in_quant_arg, QuantArg *out_quant_arg) { | ||||
| int32_t block_size = param->block_size_; | int32_t block_size = param->block_size_; | ||||
| int32_t in_shape_dim2 = in_shape[2]; | int32_t in_shape_dim2 = in_shape[2]; | ||||
| @@ -22,7 +22,7 @@ | |||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| extern "C" { | extern "C" { | ||||
| #endif | #endif | ||||
| void DepthToSpaceForNHWCInt8(const int8_t *input, int8_t *output, int *in_shape, DepthToSpaceParameter *param, | |||||
| void DepthToSpaceForNHWCInt8(const int8_t *input, int8_t *output, const int *in_shape, DepthToSpaceParameter *param, | |||||
| QuantArg *in_quant_arg, QuantArg *out_quant_arg); | QuantArg *in_quant_arg, QuantArg *out_quant_arg); | ||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| } | } | ||||
| @@ -18,7 +18,7 @@ | |||||
| #include <string.h> | #include <string.h> | ||||
| #include "nnacl/errorcode.h" | #include "nnacl/errorcode.h" | ||||
| int GatherNdInt8(int8_t *input, int8_t *output, int *in_offset, int area, int count, GatherQuantArg param) { | |||||
| int GatherNdInt8(int8_t *input, int8_t *output, const int *in_offset, int area, int count, GatherQuantArg param) { | |||||
| double alpha = param.alpha_; | double alpha = param.alpha_; | ||||
| int z1 = param.zp_in_; | int z1 = param.zp_in_; | ||||
| int z2 = param.zp_out_; | int z2 = param.zp_out_; | ||||
| @@ -23,7 +23,7 @@ | |||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| extern "C" { | extern "C" { | ||||
| #endif | #endif | ||||
| int GatherNdInt8(int8_t *in_data, int8_t *out_data, int *in_offset, int area, int count, GatherQuantArg param); | |||||
| int GatherNdInt8(int8_t *in_data, int8_t *out_data, const int *in_offset, int area, int count, GatherQuantArg param); | |||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| } | } | ||||
| #endif | #endif | ||||
| @@ -19,7 +19,7 @@ | |||||
| #include "nnacl/quantization/quantize.h" | #include "nnacl/quantization/quantize.h" | ||||
| #include "nnacl/errorcode.h" | #include "nnacl/errorcode.h" | ||||
| int GatherInt8(int8_t *in_data, int8_t *out_data, int outer_size, int inner_size, int limit, int *indices, | |||||
| int GatherInt8(int8_t *in_data, int8_t *out_data, int outer_size, int inner_size, int limit, const int *indices, | |||||
| int indices_element_size, GatherQuantArg para) { | int indices_element_size, GatherQuantArg para) { | ||||
| double alpha = para.alpha_; | double alpha = para.alpha_; | ||||
| int z1 = para.zp_in_; | int z1 = para.zp_in_; | ||||
| @@ -23,7 +23,7 @@ | |||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| extern "C" { | extern "C" { | ||||
| #endif | #endif | ||||
| int GatherInt8(int8_t *in_data, int8_t *out_data, int outer_size, int inner_size, int limit, int *indices, | |||||
| int GatherInt8(int8_t *in_data, int8_t *out_data, int outer_size, int inner_size, int limit, const int *indices, | |||||
| int indices_element_size, GatherQuantArg para); | int indices_element_size, GatherQuantArg para); | ||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| } | } | ||||
| @@ -301,7 +301,7 @@ void CalcInputSums(int8_t *input, int row, int col, int weight_zp, int *dst, Dat | |||||
| } | } | ||||
| // dst: bias + depth*input_zp*weight_zp - input_zp*weight_col_sums | // dst: bias + depth*input_zp*weight_zp - input_zp*weight_col_sums | ||||
| void CalcWeightBiasSums(int8_t *weight, int row, int col, int input_zp, int weight_zp, int *bias, int *dst, | |||||
| void CalcWeightBiasSums(int8_t *weight, int row, int col, int input_zp, int weight_zp, const int *bias, int *dst, | |||||
| DataOrder order) { | DataOrder order) { | ||||
| for (int c = 0; c < col; ++c) { | for (int c = 0; c < col; ++c) { | ||||
| int sum = 0; | int sum = 0; | ||||
| @@ -35,7 +35,7 @@ void MatMulInt8_16x4_r(const int8_t *a, const int8_t *b, int8_t *dst, size_t row | |||||
| void RowMajor2Row16x4MajorInt8(int8_t *src_ptr, int8_t *dst_ptr, int row, int col); | void RowMajor2Row16x4MajorInt8(int8_t *src_ptr, int8_t *dst_ptr, int row, int col); | ||||
| void RowMajor2Col16x4MajorInt8(int8_t *src, int row, int col, int8_t *dst); | void RowMajor2Col16x4MajorInt8(int8_t *src, int row, int col, int8_t *dst); | ||||
| void CalcInputSums(int8_t *input, int row, int col, int weight_zp, int *dst, DataOrder order); | void CalcInputSums(int8_t *input, int row, int col, int weight_zp, int *dst, DataOrder order); | ||||
| void CalcWeightBiasSums(int8_t *weight, int row, int col, int input_zp, int weight_zp, int *bias, int *dst, | |||||
| void CalcWeightBiasSums(int8_t *weight, int row, int col, int input_zp, int weight_zp, const int *bias, int *dst, | |||||
| DataOrder order); | DataOrder order); | ||||
| /* 8x4 4x8 -> 8x8 */ | /* 8x4 4x8 -> 8x8 */ | ||||
| @@ -17,7 +17,6 @@ | |||||
| #include "nnacl/int8/slice_int8.h" | #include "nnacl/int8/slice_int8.h" | ||||
| #include <string.h> | #include <string.h> | ||||
| #include "nnacl/quantization/fixed_point.h" | #include "nnacl/quantization/fixed_point.h" | ||||
| #include "nnacl/errorcode.h" | |||||
| int SliceInt8NoParallel(const int8_t *input, int8_t *output, SliceParameter *param) { | int SliceInt8NoParallel(const int8_t *input, int8_t *output, SliceParameter *param) { | ||||
| double input_scale = param->quant_arg_.in_args_.scale_; | double input_scale = param->quant_arg_.in_args_.scale_; | ||||
| @@ -18,7 +18,6 @@ | |||||
| #include <math.h> | #include <math.h> | ||||
| #include "nnacl/quantization/fixed_point.h" | #include "nnacl/quantization/fixed_point.h" | ||||
| #include "nnacl/quantization/quantize.h" | #include "nnacl/quantization/quantize.h" | ||||
| #include "nnacl/errorcode.h" | |||||
| int SoftmaxInt8(const int8_t *input_ptr, int8_t *output_ptr, int count, int *exp_data, int *sum_data, | int SoftmaxInt8(const int8_t *input_ptr, int8_t *output_ptr, int count, int *exp_data, int *sum_data, | ||||
| SoftmaxQuantArg quant_param, SoftmaxParameter *parameter) { | SoftmaxQuantArg quant_param, SoftmaxParameter *parameter) { | ||||
| @@ -16,7 +16,8 @@ | |||||
| #include "nnacl/int8/space_to_batch_int8.h" | #include "nnacl/int8/space_to_batch_int8.h" | ||||
| #include "nnacl/arithmetic_common.h" | #include "nnacl/arithmetic_common.h" | ||||
| void DoSpaceToBatchNHWCInt8(const int8_t *input, int8_t *output, int *block_sizes, int *in_shape, int *out_shape) { | |||||
| void DoSpaceToBatchNHWCInt8(const int8_t *input, int8_t *output, const int *block_sizes, int *in_shape, | |||||
| int *out_shape) { | |||||
| int out_dim0 = out_shape[0]; | int out_dim0 = out_shape[0]; | ||||
| int out_dim1 = out_shape[1]; | int out_dim1 = out_shape[1]; | ||||
| int out_dim2 = out_shape[2]; | int out_dim2 = out_shape[2]; | ||||
| @@ -45,8 +46,8 @@ void DoSpaceToBatchNHWCInt8(const int8_t *input, int8_t *output, int *block_size | |||||
| } | } | ||||
| } | } | ||||
| void DoSpaceToBatchPaddingNHWCInt8(const int8_t *input, int8_t *output, int *in_shape, int *padding, int *out_shape, | |||||
| int32_t zp) { | |||||
| void DoSpaceToBatchPaddingNHWCInt8(const int8_t *input, int8_t *output, int *in_shape, const int *padding, | |||||
| int *out_shape, int32_t zp) { | |||||
| int in_h = in_shape[1]; | int in_h = in_shape[1]; | ||||
| int in_w = in_shape[2]; | int in_w = in_shape[2]; | ||||
| int in_c = in_shape[3]; | int in_c = in_shape[3]; | ||||
| @@ -21,9 +21,9 @@ | |||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| extern "C" { | extern "C" { | ||||
| #endif | #endif | ||||
| void DoSpaceToBatchNHWCInt8(const int8_t *input, int8_t *output, int *block_sizes, int *in_shape, int *out_shape); | |||||
| void DoSpaceToBatchPaddingNHWCInt8(const int8_t *input, int8_t *output, int *in_shape, int *padding, int *out_shape, | |||||
| int32_t zp); | |||||
| void DoSpaceToBatchNHWCInt8(const int8_t *input, int8_t *output, const int *block_sizes, int *in_shape, int *out_shape); | |||||
| void DoSpaceToBatchPaddingNHWCInt8(const int8_t *input, int8_t *output, int *in_shape, const int *padding, | |||||
| int *out_shape, int32_t zp); | |||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| } | } | ||||
| #endif | #endif | ||||
| @@ -17,7 +17,6 @@ | |||||
| #include "nnacl/unsqueeze_parameter.h" | #include "nnacl/unsqueeze_parameter.h" | ||||
| #include "nnacl/int8/unsqueeze_int8.h" | #include "nnacl/int8/unsqueeze_int8.h" | ||||
| #include <string.h> | #include <string.h> | ||||
| #include "nnacl/errorcode.h" | |||||
| int Int8Unsqueeze(int8_t *input_ptr, int8_t *output_ptr, UnSqueezeParameter *para_, size_t data_size, int task_id) { | int Int8Unsqueeze(int8_t *input_ptr, int8_t *output_ptr, UnSqueezeParameter *para_, size_t data_size, int task_id) { | ||||
| float output_scale = para_->quant_arg.out_quant_args_.scale_; | float output_scale = para_->quant_arg.out_quant_args_.scale_; | ||||
| @@ -19,7 +19,7 @@ | |||||
| #include "nnacl/winograd_utils.h" | #include "nnacl/winograd_utils.h" | ||||
| #include "nnacl/errorcode.h" | #include "nnacl/errorcode.h" | ||||
| void Polynomial(float *interval, float *m, int degree) { | |||||
| void Polynomial(const float *interval, float *m, int degree) { | |||||
| for (int i = 0; i < degree; ++i) { | for (int i = 0; i < degree; ++i) { | ||||
| float mul = 1; | float mul = 1; | ||||
| for (int j = 0; j < degree; ++j) { | for (int j = 0; j < degree; ++j) { | ||||
| @@ -30,7 +30,7 @@ void Polynomial(float *interval, float *m, int degree) { | |||||
| } | } | ||||
| } | } | ||||
| void DiagonalPlusMatrix(float *matrix, float *diagonal_matrix, int degree) { | |||||
| void DiagonalPlusMatrix(const float *matrix, float *diagonal_matrix, int degree) { | |||||
| int data_num = (degree + 1) * (degree + 1); | int data_num = (degree + 1) * (degree + 1); | ||||
| memset(diagonal_matrix, 0, data_num * sizeof(float)); | memset(diagonal_matrix, 0, data_num * sizeof(float)); | ||||
| for (int i = 0; i < degree; ++i) { | for (int i = 0; i < degree; ++i) { | ||||
| @@ -41,7 +41,7 @@ void DiagonalPlusMatrix(float *matrix, float *diagonal_matrix, int degree) { | |||||
| diagonal_matrix[data_num - 1] = 1; | diagonal_matrix[data_num - 1] = 1; | ||||
| } | } | ||||
| void ResidueMatrix(float *interval, float *b, int row, int col) { | |||||
| void ResidueMatrix(const float *interval, float *b, int row, int col) { | |||||
| // row : input unit, col : output_unit | // row : input unit, col : output_unit | ||||
| // result : matrix b | // result : matrix b | ||||
| int len = row * col; | int len = row * col; | ||||
| @@ -87,7 +87,7 @@ int LT(float *poly_array, float *matrix_lt, int n) { | |||||
| return NNACL_OK; | return NNACL_OK; | ||||
| } | } | ||||
| void T(float *poly_array, float *matrix_t, int n) { | |||||
| void T(const float *poly_array, float *matrix_t, int n) { | |||||
| memset(matrix_t, 0, n * (n + 1) * sizeof(float)); | memset(matrix_t, 0, n * (n + 1) * sizeof(float)); | ||||
| for (int i = 0; i < n; ++i) { | for (int i = 0; i < n; ++i) { | ||||
| for (int j = 0; j < n + 1; ++j) { | for (int j = 0; j < n + 1; ++j) { | ||||
| @@ -148,7 +148,7 @@ void GenerateIntervalArray(float *array, float interval, int degree) { | |||||
| } | } | ||||
| } | } | ||||
| void MatrixTranspose(float *matrix, float *trans_matrix, int row, int col) { | |||||
| void MatrixTranspose(const float *matrix, float *trans_matrix, int row, int col) { | |||||
| for (int i = 0; i < col; ++i) { | for (int i = 0; i < col; ++i) { | ||||
| for (int j = 0; j < row; ++j) { | for (int j = 0; j < row; ++j) { | ||||
| trans_matrix[i * row + j] = matrix[j * col + i]; | trans_matrix[i * row + j] = matrix[j * col + i]; | ||||
| @@ -255,7 +255,7 @@ void MatrixMultiplyVec(const float32x4_t *matrix_a, const float32x4_t *matrix_b, | |||||
| } | } | ||||
| #endif | #endif | ||||
| int WinogradWeightTransform(const float *weight_data, float *winograd_data, float *matrix_g, float *matrix_gt, | |||||
| int WinogradWeightTransform(const float *weight_data, float *winograd_data, float *matrix_g, const float *matrix_gt, | |||||
| int oc_block, int input_unit, int kernel_unit, int channel, int batch, bool pack) { | int oc_block, int input_unit, int kernel_unit, int channel, int batch, bool pack) { | ||||
| // original weight format : ohwi | // original weight format : ohwi | ||||
| int oc_block_num = UP_DIV(batch, oc_block); | int oc_block_num = UP_DIV(batch, oc_block); | ||||
| @@ -26,21 +26,21 @@ | |||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| extern "C" { | extern "C" { | ||||
| #endif | #endif | ||||
| void Polynomial(float *interval, float *m, int degree); | |||||
| void Polynomial(const float *interval, float *m, int degree); | |||||
| void DiagonalPlusMatrix(float *matrix, float *diagonal_matrix, int degree); | |||||
| void DiagonalPlusMatrix(const float *matrix, float *diagonal_matrix, int degree); | |||||
| void ResidueMatrix(float *interval, float *b, int row, int col); | |||||
| void ResidueMatrix(const float *interval, float *b, int row, int col); | |||||
| int LT(float *poly_array, float *matrix_lt, int n); | int LT(float *poly_array, float *matrix_lt, int n); | ||||
| void T(float *poly_array, float *matrix_t, int n); | |||||
| void T(const float *poly_array, float *matrix_t, int n); | |||||
| int B(float *poly_array, float *matrix_b, int in_unit); | int B(float *poly_array, float *matrix_b, int in_unit); | ||||
| void GenerateIntervalArray(float *array, float interval, int degree); | void GenerateIntervalArray(float *array, float interval, int degree); | ||||
| void MatrixTranspose(float *matrix, float *trans_matrix, int row, int col); | |||||
| void MatrixTranspose(const float *matrix, float *trans_matrix, int row, int col); | |||||
| void MatrixMultiply(const float *matrix_a, const float *matrix_b, float *matrix_c, int m, int k, int n); | void MatrixMultiply(const float *matrix_a, const float *matrix_b, float *matrix_c, int m, int k, int n); | ||||
| @@ -49,7 +49,7 @@ int CookToomFilter(float *matrix_a, float *matrix_at, float *matrix_b, float *ma | |||||
| void MatrixMultiplyWinograd(const float *matix_a, const float *matrix_b, float *matrix_c, int m, int k, int n, | void MatrixMultiplyWinograd(const float *matix_a, const float *matrix_b, float *matrix_c, int m, int k, int n, | ||||
| int in_channel, int c4_channel); | int in_channel, int c4_channel); | ||||
| int WinogradWeightTransform(const float *weight_data, float *winograd_data, float *matrix_g, float *matrix_gt, | |||||
| int WinogradWeightTransform(const float *weight_data, float *winograd_data, float *matrix_g, const float *matrix_gt, | |||||
| int oc_block, int input_unit_, int kernel_unit_, int channel, int batch, bool pack); | int oc_block, int input_unit_, int kernel_unit_, int channel, int batch, bool pack); | ||||
| #ifdef ENABLE_ARM | #ifdef ENABLE_ARM | ||||
| @@ -36,6 +36,9 @@ float StdPowerImpl(float x, float exponent) { return pow(x, exponent); } | |||||
| void Power(const float *input, const float *exponent, float *output, int len, float scale, float shift, | void Power(const float *input, const float *exponent, float *output, int len, float scale, float shift, | ||||
| bool broadcast) { | bool broadcast) { | ||||
| if (input == NULL || exponent == NULL) { | |||||
| return; | |||||
| } | |||||
| if (broadcast) { | if (broadcast) { | ||||
| if (CheckInteger(*exponent)) { | if (CheckInteger(*exponent)) { | ||||
| for (int i = 0; i < len; ++i) { | for (int i = 0; i < len; ++i) { | ||||
| @@ -71,7 +71,7 @@ void CalculateActivationRangeQuantized(bool is_relu, bool is_relu6, int32_t zp, | |||||
| } | } | ||||
| // quantize from float to int8 | // quantize from float to int8 | ||||
| void Quantize(float *input_data, int length, float scale, int zero_point, int8_t *output_data) { | |||||
| void Quantize(const float *input_data, int length, float scale, int zero_point, int8_t *output_data) { | |||||
| for (int i = 0; i < length; ++i) { | for (int i = 0; i < length; ++i) { | ||||
| int q = (int)round(input_data[i] / scale + zero_point); | int q = (int)round(input_data[i] / scale + zero_point); | ||||
| q = q > SCHAR_MAX ? SCHAR_MAX : q; | q = q > SCHAR_MAX ? SCHAR_MAX : q; | ||||
| @@ -276,7 +276,7 @@ int32_t QuantizeToInt8(float real_value, float scale, int32_t zp); | |||||
| void CalculateActivationRangeQuantized(bool is_relu, bool is_relu6, int32_t zp, float scale, int *mini, int *maxi); | void CalculateActivationRangeQuantized(bool is_relu, bool is_relu6, int32_t zp, float scale, int *mini, int *maxi); | ||||
| // quantize from float to int8 | // quantize from float to int8 | ||||
| void Quantize(float *input_data, int length, float scale, int zero_point, int8_t *output_data); | |||||
| void Quantize(const float *input_data, int length, float scale, int zero_point, int8_t *output_data); | |||||
| // dequantize from int8 to float | // dequantize from int8 to float | ||||
| void Dequantize(int8_t *input_data, int length, float scale, int zero_point, float *output_data); | void Dequantize(int8_t *input_data, int length, float scale, int zero_point, float *output_data); | ||||
| @@ -17,4 +17,4 @@ | |||||
| #include "nnacl/reshape.h" | #include "nnacl/reshape.h" | ||||
| #include <string.h> | #include <string.h> | ||||
| void Reshape(void *input_ptr, void *output_ptr, size_t data_size) { memcpy(output_ptr, input_ptr, data_size); } | |||||
| void Reshape(const void *input_ptr, void *output_ptr, size_t data_size) { memcpy(output_ptr, input_ptr, data_size); } | |||||
| @@ -21,7 +21,7 @@ | |||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| extern "C" { | extern "C" { | ||||
| #endif | #endif | ||||
| void Reshape(void *input_ptr, void *output_ptr, size_t data_size); | |||||
| void Reshape(const void *input_ptr, void *output_ptr, size_t data_size); | |||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| } | } | ||||
| #endif | #endif | ||||
| @@ -18,7 +18,7 @@ | |||||
| #include <string.h> | #include <string.h> | ||||
| #include "nnacl/arithmetic_common.h" | #include "nnacl/arithmetic_common.h" | ||||
| void ReverseSequence(float *input0, void *input1, float *output, ReverseSequenceParameter *para) { | |||||
| void ReverseSequence(float *input0, const void *input1, float *output, ReverseSequenceParameter *para) { | |||||
| (void)memcpy(output, input0, para->total_data_size_); | (void)memcpy(output, input0, para->total_data_size_); | ||||
| ComputeStrides(para->input_shape0_, para->input_stride_, para->ndim_); | ComputeStrides(para->input_shape0_, para->input_stride_, para->ndim_); | ||||
| ComputeStrides(para->output_shape_, para->output_stride_, para->ndim_); | ComputeStrides(para->output_shape_, para->output_stride_, para->ndim_); | ||||
| @@ -40,7 +40,7 @@ typedef struct ReverseSequenceParameter { | |||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| extern "C" { | extern "C" { | ||||
| #endif | #endif | ||||
| void ReverseSequence(float *input0, void *input1, float *output, ReverseSequenceParameter *para); | |||||
| void ReverseSequence(float *input0, const void *input1, float *output, ReverseSequenceParameter *para); | |||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| } | } | ||||
| #endif | #endif | ||||
| @@ -19,7 +19,7 @@ | |||||
| #include <stdio.h> | #include <stdio.h> | ||||
| #include "nnacl/errorcode.h" | #include "nnacl/errorcode.h" | ||||
| int DoScatterND(float *output_ptr, float *update, int *output_unit_offsets, int unit_size, int num_units) { | |||||
| int DoScatterND(float *output_ptr, const float *update, int *output_unit_offsets, int unit_size, int num_units) { | |||||
| if (output_ptr == NULL || update == NULL || output_unit_offsets == NULL || unit_size <= 0 || num_units < 0) { | if (output_ptr == NULL || update == NULL || output_unit_offsets == NULL || unit_size <= 0 || num_units < 0) { | ||||
| return NNACL_ERR; | return NNACL_ERR; | ||||
| } | } | ||||
| @@ -26,7 +26,7 @@ typedef struct ScatterNDParameter { | |||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| extern "C" { | extern "C" { | ||||
| #endif | #endif | ||||
| int DoScatterND(float *output_ptr, float *update, int *output_unit_offsets, int unit_size, int num_units); | |||||
| int DoScatterND(float *output_ptr, const float *update, int *output_unit_offsets, int unit_size, int num_units); | |||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| } | } | ||||
| #endif | #endif | ||||
| @@ -18,7 +18,7 @@ | |||||
| #include <string.h> | #include <string.h> | ||||
| #include "nnacl/errorcode.h" | #include "nnacl/errorcode.h" | ||||
| int DoSqueeze(float *in_data, float *out_data, size_t data_size) { | |||||
| int DoSqueeze(const float *in_data, float *out_data, size_t data_size) { | |||||
| if (in_data == NULL || out_data == NULL) { | if (in_data == NULL || out_data == NULL) { | ||||
| return -1; | return -1; | ||||
| } | } | ||||
| @@ -27,7 +27,7 @@ typedef struct SqueezeParameter { | |||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| extern "C" { | extern "C" { | ||||
| #endif | #endif | ||||
| int DoSqueeze(float *input_ptr, float *output_ptr, size_t data_size); | |||||
| int DoSqueeze(const float *input_ptr, float *output_ptr, size_t data_size); | |||||
| int DoSqueezeInt32(int32_t *in_data, int32_t *out_data, size_t data_size); | int DoSqueezeInt32(int32_t *in_data, int32_t *out_data, size_t data_size); | ||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| } | } | ||||
| @@ -18,7 +18,7 @@ | |||||
| #include <string.h> | #include <string.h> | ||||
| #include "nnacl/errorcode.h" | #include "nnacl/errorcode.h" | ||||
| void TransposeDim2(float *in_data, float *out_data, int *strides, int *out_strides, int *perm, int *output_shape, | |||||
| void TransposeDim2(float *in_data, float *out_data, const int *strides, int *out_strides, int *perm, int *output_shape, | |||||
| int h_start, int h_end) { | int h_start, int h_end) { | ||||
| const int stride0 = strides[perm[0]]; | const int stride0 = strides[perm[0]]; | ||||
| const int stride1 = strides[perm[1]]; | const int stride1 = strides[perm[1]]; | ||||
| @@ -33,7 +33,7 @@ void TransposeDim2(float *in_data, float *out_data, int *strides, int *out_strid | |||||
| } | } | ||||
| } | } | ||||
| void TransposeDim3(float *in_data, float *out_data, int *strides, int *out_strides, int *perm, int *output_shape, | |||||
| void TransposeDim3(float *in_data, float *out_data, const int *strides, int *out_strides, int *perm, int *output_shape, | |||||
| int h_start, int h_end) { | int h_start, int h_end) { | ||||
| const int stride0 = strides[perm[0]]; | const int stride0 = strides[perm[0]]; | ||||
| const int stride1 = strides[perm[1]]; | const int stride1 = strides[perm[1]]; | ||||
| @@ -56,7 +56,7 @@ void TransposeDim3(float *in_data, float *out_data, int *strides, int *out_strid | |||||
| } | } | ||||
| } | } | ||||
| void TransposeDim4(float *in_data, float *out_data, int *strides, int *out_strides, int *perm, int *output_shape, | |||||
| void TransposeDim4(float *in_data, float *out_data, const int *strides, int *out_strides, int *perm, int *output_shape, | |||||
| int h_start, int h_end) { | int h_start, int h_end) { | ||||
| const int stride0 = strides[perm[0]]; | const int stride0 = strides[perm[0]]; | ||||
| const int stride1 = strides[perm[1]]; | const int stride1 = strides[perm[1]]; | ||||
| @@ -88,7 +88,7 @@ void TransposeDim4(float *in_data, float *out_data, int *strides, int *out_strid | |||||
| } | } | ||||
| } | } | ||||
| void TransposeDim5(float *in_data, float *out_data, int *strides, int *out_strides, int *perm, int *output_shape, | |||||
| void TransposeDim5(float *in_data, float *out_data, const int *strides, int *out_strides, int *perm, int *output_shape, | |||||
| int h_start, int h_end) { | int h_start, int h_end) { | ||||
| const int stride0 = strides[perm[0]]; | const int stride0 = strides[perm[0]]; | ||||
| const int stride1 = strides[perm[1]]; | const int stride1 = strides[perm[1]]; | ||||
| @@ -127,7 +127,7 @@ void TransposeDim5(float *in_data, float *out_data, int *strides, int *out_strid | |||||
| } | } | ||||
| } | } | ||||
| void TransposeDims(float *in_data, float *out_data, int *strides, int *out_strides, int *perm, int *output_shape, | |||||
| void TransposeDims(float *in_data, float *out_data, const int *strides, int *out_strides, int *perm, int *output_shape, | |||||
| int h_start, int h_end, int dims, int *size, int *position) { | int h_start, int h_end, int dims, int *size, int *position) { | ||||
| *(size + dims - 1) = 1; | *(size + dims - 1) = 1; | ||||
| for (int i = dims - 1; i > 0; --i) { | for (int i = dims - 1; i > 0; --i) { | ||||
| @@ -34,15 +34,15 @@ extern "C" { | |||||
| #endif | #endif | ||||
| int DoTranspose(float *in_data, float *out_data, int *input_shape, int *output_shape, | int DoTranspose(float *in_data, float *out_data, int *input_shape, int *output_shape, | ||||
| TransposeParameter *transpose_param, int h_start, int h_end, int *size, int *position); | TransposeParameter *transpose_param, int h_start, int h_end, int *size, int *position); | ||||
| void TransposeDim2(float *in_data, float *out_data, int *strides, int *out_strides, int *perm, int *output_shape, | |||||
| void TransposeDim2(float *in_data, float *out_data, const int *strides, int *out_strides, int *perm, int *output_shape, | |||||
| int h_start, int h_end); | int h_start, int h_end); | ||||
| void TransposeDim3(float *in_data, float *out_data, int *strides, int *out_strides, int *perm, int *output_shape, | |||||
| void TransposeDim3(float *in_data, float *out_data, const int *strides, int *out_strides, int *perm, int *output_shape, | |||||
| int h_start, int h_end); | int h_start, int h_end); | ||||
| void TransposeDim4(float *in_data, float *out_data, int *strides, int *out_strides, int *perm, int *output_shape, | |||||
| void TransposeDim4(float *in_data, float *out_data, const int *strides, int *out_strides, int *perm, int *output_shape, | |||||
| int h_start, int h_end); | int h_start, int h_end); | ||||
| void TransposeDim5(float *in_data, float *out_data, int *strides, int *out_strides, int *perm, int *output_shape, | |||||
| void TransposeDim5(float *in_data, float *out_data, const int *strides, int *out_strides, int *perm, int *output_shape, | |||||
| int h_start, int h_end); | int h_start, int h_end); | ||||
| void TransposeDims(float *in_data, float *out_data, int *strides, int *out_strides, int *perm, int *output_shape, | |||||
| void TransposeDims(float *in_data, float *out_data, const int *strides, int *out_strides, int *perm, int *output_shape, | |||||
| int h_start, int h_end, int dims, int *size, int *position); | int h_start, int h_end, int dims, int *size, int *position); | ||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| } | } | ||||
| @@ -17,7 +17,7 @@ | |||||
| #include "nnacl/unstack.h" | #include "nnacl/unstack.h" | ||||
| #include <string.h> | #include <string.h> | ||||
| void Unistack(float *input, float **output, UnstackParameter *para) { | |||||
| void Unistack(const float *input, float **output, UnstackParameter *para) { | |||||
| for (int j = 0; j < para->num_; j++) { | for (int j = 0; j < para->num_; j++) { | ||||
| float *out_addr = output[j]; | float *out_addr = output[j]; | ||||
| int out_offset = 0; | int out_offset = 0; | ||||
| @@ -31,7 +31,7 @@ typedef struct UnstackParameter { | |||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| extern "C" { | extern "C" { | ||||
| #endif | #endif | ||||
| void Unistack(float *input, float **output, UnstackParameter *para); | |||||
| void Unistack(const float *input, float **output, UnstackParameter *para); | |||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| } | } | ||||
| #endif | #endif | ||||
| @@ -15,7 +15,7 @@ | |||||
| */ | */ | ||||
| #include "nnacl/where.h" | #include "nnacl/where.h" | ||||
| void Where(bool *input, float *input1, float *input2, float *output, WhereParameter *where_param_, int task_id) { | |||||
| void Where(bool *input, float *input1, const float *input2, float *output, WhereParameter *where_param_, int task_id) { | |||||
| for (int i = task_id; i < where_param_->number_; i += where_param_->op_parameter_.thread_num_) { | for (int i = task_id; i < where_param_->number_; i += where_param_->op_parameter_.thread_num_) { | ||||
| if (input[where_param_->num_ > 1 ? i : 0] == true) { | if (input[where_param_->num_ > 1 ? i : 0] == true) { | ||||
| output[i] = input1[where_param_->num1_ > 1 ? i : 0]; | output[i] = input1[where_param_->num1_ > 1 ? i : 0]; | ||||
| @@ -30,7 +30,7 @@ typedef struct WhereParameter { | |||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| extern "C" { | extern "C" { | ||||
| #endif | #endif | ||||
| void Where(bool *input, float *input1, float *input2, float *output, WhereParameter *where_param_, int task_id); | |||||
| void Where(bool *input, float *input1, const float *input2, float *output, WhereParameter *where_param_, int task_id); | |||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| } | } | ||||
| #endif | #endif | ||||
| @@ -75,8 +75,8 @@ static OutputTransFunc OutputTransFuncRelu6List8[] = {NULL, | |||||
| OutputTransform8x6Relu6Unit, | OutputTransform8x6Relu6Unit, | ||||
| OutputTransform8x7Relu6Unit}; | OutputTransform8x7Relu6Unit}; | ||||
| void GeneralInputTransformUnit(const float *src_data, float *dst_data, float *matrix_b, float *matrix_bt, int src_step, | |||||
| int dst_step, int in_unit) { | |||||
| void GeneralInputTransformUnit(const float *src_data, float *dst_data, float *matrix_b, const float *matrix_bt, | |||||
| int src_step, int dst_step, int in_unit) { | |||||
| int len = in_unit * in_unit; | int len = in_unit * in_unit; | ||||
| if (len > MAX_LEN) return; | if (len > MAX_LEN) return; | ||||
| #ifdef ENABLE_ARM | #ifdef ENABLE_ARM | ||||
| @@ -113,7 +113,7 @@ void GeneralInputTransformUnit(const float *src_data, float *dst_data, float *ma | |||||
| } | } | ||||
| void GeneralOutputTransformUnit(const float *src_data, float *dst_data, const float *bias_data, float *matrix_a, | void GeneralOutputTransformUnit(const float *src_data, float *dst_data, const float *bias_data, float *matrix_a, | ||||
| float *matrix_at, int src_step, int dst_step, int in_unit, int out_unit) { | |||||
| const float *matrix_at, int src_step, int dst_step, int in_unit, int out_unit) { | |||||
| int src_len = in_unit * in_unit; | int src_len = in_unit * in_unit; | ||||
| if (src_len > MAX_LEN) { | if (src_len > MAX_LEN) { | ||||
| return; | return; | ||||
| @@ -33,11 +33,11 @@ typedef void (*InputTransFunc)(const float *src_data, float *dst_data, int src_s | |||||
| typedef void (*OutputTransFunc)(const float *src_data, float *dst_data, const float *bias_data, int src_step, | typedef void (*OutputTransFunc)(const float *src_data, float *dst_data, const float *bias_data, int src_step, | ||||
| int dst_step, int out_c, int r_w, int r_h, int r_c); | int dst_step, int out_c, int r_w, int r_h, int r_c); | ||||
| void GeneralInputTransformUnit(const float *src_data, float *dst_data, float *matrix_b, float *matrix_bt, int src_step, | |||||
| int dst_step, int in_unit); | |||||
| void GeneralInputTransformUnit(const float *src_data, float *dst_data, float *matrix_b, const float *matrix_bt, | |||||
| int src_step, int dst_step, int in_unit); | |||||
| void GeneralOutputTransformUnit(const float *src_data, float *dst_data, const float *bias_data, float *matrix_a, | void GeneralOutputTransformUnit(const float *src_data, float *dst_data, const float *bias_data, float *matrix_a, | ||||
| float *matrix_at, int src_step, int dst_step, int in_unit, int out_unit); | |||||
| const float *matrix_at, int src_step, int dst_step, int in_unit, int out_unit); | |||||
| #define Load16Data \ | #define Load16Data \ | ||||
| src[0] = vld1q_f32(src_data + 0 * src_step); \ | src[0] = vld1q_f32(src_data + 0 * src_step); \ | ||||
| @@ -85,7 +85,7 @@ std::string RealPath(const char *path) { | |||||
| return res; | return res; | ||||
| } | } | ||||
| int CompareOutputData(float *output_data, size_t output_size, float *correct_data, size_t data_size) { | |||||
| int CompareOutputData(const float *output_data, size_t output_size, float *correct_data, size_t data_size) { | |||||
| if (output_size != data_size) { | if (output_size != data_size) { | ||||
| printf("compare failed, output_size %zu isn't equal to data_size %zu.\n", output_size, data_size); | printf("compare failed, output_size %zu isn't equal to data_size %zu.\n", output_size, data_size); | ||||
| return 0; | return 0; | ||||
| @@ -58,7 +58,7 @@ inline int WriteToBin(const std::string &file_path, void *data, size_t size) { | |||||
| return 0; | return 0; | ||||
| } | } | ||||
| int CompareOutputData(float *output_data, size_t output_num, float *correct_data, size_t data_size); | |||||
| int CompareOutputData(const float *output_data, size_t output_num, float *correct_data, size_t data_size); | |||||
| int CompareOutput(float *output_data, size_t output_num, std::string file_path); | int CompareOutput(float *output_data, size_t output_num, std::string file_path); | ||||
| std::string GetAndroidPackageName(); | std::string GetAndroidPackageName(); | ||||
| @@ -21,7 +21,7 @@ | |||||
| namespace mindspore { | namespace mindspore { | ||||
| namespace lite { | namespace lite { | ||||
| static float CompareOutputRelativeData(float *output_data, float *correct_data, int data_size) { | |||||
| static float CompareOutputRelativeData(const float *output_data, float *correct_data, int data_size) { | |||||
| float error = 0; | float error = 0; | ||||
| // relative error | // relative error | ||||
| @@ -45,6 +45,7 @@ int BatchNorm::UnPackAttr(const Primitive &prim, const std::vector<AnfNodePtr> & | |||||
| if (attr == nullptr) { | if (attr == nullptr) { | ||||
| MS_LOG(ERROR) << "new FusedBatchNormT failed"; | MS_LOG(ERROR) << "new FusedBatchNormT failed"; | ||||
| delete this->primitive_; | delete this->primitive_; | ||||
| this->primitive_ = nullptr; | |||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| attr->epsilon = GetValue<float>(prim.GetAttr("epsilon")); | attr->epsilon = GetValue<float>(prim.GetAttr("epsilon")); | ||||
| @@ -46,6 +46,10 @@ int FusedBatchNorm::UnPackAttr(const Primitive &prim, const std::vector<AnfNodeP | |||||
| } | } | ||||
| if (this->primitive_->value.value == nullptr) { | if (this->primitive_->value.value == nullptr) { | ||||
| auto attr = new (std::nothrow) schema::FusedBatchNormT(); | auto attr = new (std::nothrow) schema::FusedBatchNormT(); | ||||
| if (attr == nullptr) { | |||||
| MS_LOG(ERROR) << "new attr value failed"; | |||||
| return RET_ERROR; | |||||
| } | |||||
| attr->epsilon = GetValue<float>(prim.GetAttr("epsilon")); | attr->epsilon = GetValue<float>(prim.GetAttr("epsilon")); | ||||
| attr->momentum = GetValue<float>(prim.GetAttr("momentum")); | attr->momentum = GetValue<float>(prim.GetAttr("momentum")); | ||||
| this->primitive_->value.value = attr; | this->primitive_->value.value = attr; | ||||
| @@ -46,6 +46,7 @@ int InstanceNorm::UnPackAttr(const Primitive &prim, const std::vector<AnfNodePtr | |||||
| if (attr == nullptr) { | if (attr == nullptr) { | ||||
| MS_LOG(ERROR) << "new InstanceNormT failed"; | MS_LOG(ERROR) << "new InstanceNormT failed"; | ||||
| delete this->primitive_; | delete this->primitive_; | ||||
| this->primitive_ = nullptr; | |||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| attr->epsilon = GetValue<float>(prim.GetAttr("epsilon")); | attr->epsilon = GetValue<float>(prim.GetAttr("epsilon")); | ||||
| @@ -33,6 +33,10 @@ OpParameter *PopulateLayerNormParameter(const mindspore::lite::PrimitiveC *primi | |||||
| auto normalized_shape = param->GetNormalizedShape(); | auto normalized_shape = param->GetNormalizedShape(); | ||||
| layer_norm_parameter->normalized_dims_ = normalized_shape.size(); | layer_norm_parameter->normalized_dims_ = normalized_shape.size(); | ||||
| layer_norm_parameter->normalized_shape_ = reinterpret_cast<int *>(malloc(normalized_shape.size() * sizeof(int))); | layer_norm_parameter->normalized_shape_ = reinterpret_cast<int *>(malloc(normalized_shape.size() * sizeof(int))); | ||||
| if (layer_norm_parameter->normalized_shape_ == nullptr) { | |||||
| MS_LOG(ERROR) << "malloc layer_norm_parameter->normalized_shape_ failed."; | |||||
| return nullptr; | |||||
| } | |||||
| for (size_t i = 0; i < normalized_shape.size(); i++) { | for (size_t i = 0; i < normalized_shape.size(); i++) { | ||||
| layer_norm_parameter->normalized_shape_[i] = normalized_shape[i]; | layer_norm_parameter->normalized_shape_[i] = normalized_shape[i]; | ||||
| } | } | ||||
| @@ -54,11 +54,18 @@ int Resize::UnPackAttr(const Primitive &prim, const std::vector<AnfNodePtr> &inp | |||||
| } | } | ||||
| if (this->primitive_->value.value == nullptr) { | if (this->primitive_->value.value == nullptr) { | ||||
| auto attr = new (std::nothrow) schema::ResizeT(); | auto attr = new (std::nothrow) schema::ResizeT(); | ||||
| if (attr == nullptr) { | |||||
| MS_LOG(ERROR) << "new attr value failed"; | |||||
| return RET_ERROR; | |||||
| } | |||||
| if (prim.instance_name() == "ResizeNearestNeighbor") { | if (prim.instance_name() == "ResizeNearestNeighbor") { | ||||
| attr->method = schema::ResizeMethod_NEAREST; | attr->method = schema::ResizeMethod_NEAREST; | ||||
| } else if (prim.instance_name() == "ResizeBilinear") { | } else if (prim.instance_name() == "ResizeBilinear") { | ||||
| attr->method = schema::ResizeMethod_LINEAR; | attr->method = schema::ResizeMethod_LINEAR; | ||||
| } else { | } else { | ||||
| if (attr != nullptr) { | |||||
| delete attr; | |||||
| } | |||||
| MS_LOG(ERROR) << "wrong resize type"; | MS_LOG(ERROR) << "wrong resize type"; | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| @@ -69,6 +76,9 @@ int Resize::UnPackAttr(const Primitive &prim, const std::vector<AnfNodePtr> &inp | |||||
| this->primitive_->value.value = attr; | this->primitive_->value.value = attr; | ||||
| if (this->primitive_->value.value == nullptr) { | if (this->primitive_->value.value == nullptr) { | ||||
| if (attr != nullptr) { | |||||
| delete attr; | |||||
| } | |||||
| MS_LOG(ERROR) << "new primitiveT value failed"; | MS_LOG(ERROR) << "new primitiveT value failed"; | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| @@ -40,7 +40,7 @@ class BatchToSpaceBaseCPUKernel : public LiteKernel { | |||||
| bool IsNoCrop() const { return no_crop_; } | bool IsNoCrop() const { return no_crop_; } | ||||
| private: | private: | ||||
| bool no_crop_; | |||||
| bool no_crop_ = false; | |||||
| }; | }; | ||||
| } // namespace mindspore::kernel | } // namespace mindspore::kernel | ||||
| @@ -43,9 +43,9 @@ class ConcatBaseCPUKernel : public LiteKernel { | |||||
| int Run() override { return 0; } | int Run() override { return 0; } | ||||
| protected: | protected: | ||||
| int axis_; | |||||
| const InnerContext *ctx_; | |||||
| int thread_count_; | |||||
| int axis_ = 0; | |||||
| const InnerContext *ctx_ = nullptr; | |||||
| int thread_count_ = 1; | |||||
| ConcatParameter *concat_param_ = nullptr; | ConcatParameter *concat_param_ = nullptr; | ||||
| }; | }; | ||||
| } // namespace mindspore::kernel | } // namespace mindspore::kernel | ||||
| @@ -60,11 +60,11 @@ class ConvolutionBaseCPUKernel : public LiteKernel { | |||||
| protected: | protected: | ||||
| void *bias_data_ = nullptr; | void *bias_data_ = nullptr; | ||||
| const InnerContext *ctx_; | |||||
| ConvParameter *conv_param_; | |||||
| ConvQuantArg *conv_quant_arg_; | |||||
| int tile_num_; | |||||
| int thread_count_; | |||||
| const InnerContext *ctx_ = nullptr; | |||||
| ConvParameter *conv_param_ = nullptr; | |||||
| ConvQuantArg *conv_quant_arg_ = nullptr; | |||||
| int tile_num_ = 0; | |||||
| int thread_count_ = 1; | |||||
| }; | }; | ||||
| } // namespace mindspore::kernel | } // namespace mindspore::kernel | ||||
| @@ -38,8 +38,8 @@ class DetectionPostProcessBaseCPUKernel : public LiteKernel { | |||||
| int Run() override; | int Run() override; | ||||
| protected: | protected: | ||||
| float *input_boxes; | |||||
| float *input_scores; | |||||
| float *input_boxes = nullptr; | |||||
| float *input_scores = nullptr; | |||||
| virtual int GetInputData() = 0; | virtual int GetInputData() = 0; | ||||
| }; | }; | ||||
| @@ -40,10 +40,10 @@ class FullconnectionBaseCPUKernel : public LiteKernel { | |||||
| int Run() override { return 0; } | int Run() override { return 0; } | ||||
| protected: | protected: | ||||
| MatMulParameter *fc_param_; | |||||
| int thread_stride_; | |||||
| const InnerContext *ctx_; | |||||
| int thread_count_; | |||||
| MatMulParameter *fc_param_ = nullptr; | |||||
| int thread_stride_ = 0; | |||||
| const InnerContext *ctx_ = nullptr; | |||||
| int thread_count_ = 1; | |||||
| }; | }; | ||||
| } // namespace mindspore::kernel | } // namespace mindspore::kernel | ||||
| @@ -40,10 +40,10 @@ class MatmulBaseCPUKernel : public LiteKernel { | |||||
| int Run() override { return 0; } | int Run() override { return 0; } | ||||
| protected: | protected: | ||||
| MatMulParameter *params_; | |||||
| int thread_stride_; | |||||
| const InnerContext *ctx_; | |||||
| int thread_count_; | |||||
| MatMulParameter *params_ = nullptr; | |||||
| int thread_stride_ = 0; | |||||
| const InnerContext *ctx_ = nullptr; | |||||
| int thread_count_ = 0; | |||||
| }; | }; | ||||
| } // namespace mindspore::kernel | } // namespace mindspore::kernel | ||||
| @@ -71,6 +71,7 @@ void PoolingBaseCPUKernel::FreeQuantParam() { | |||||
| } | } | ||||
| } | } | ||||
| free(pooling_quant_arg_); | free(pooling_quant_arg_); | ||||
| pooling_quant_arg_ = nullptr; | |||||
| } | } | ||||
| } | } | ||||
| @@ -37,12 +37,12 @@ class ResizeBaseCPUKernel : public LiteKernel { | |||||
| int ReSize() override { return 0; }; | int ReSize() override { return 0; }; | ||||
| protected: | protected: | ||||
| int method_; | |||||
| int64_t new_height_; | |||||
| int64_t new_width_; | |||||
| bool align_corners_; | |||||
| bool preserve_aspect_ratio; | |||||
| bool const_shape_; | |||||
| int method_ = 0; | |||||
| int64_t new_height_ = 0; | |||||
| int64_t new_width_ = 0; | |||||
| bool align_corners_ = false; | |||||
| bool preserve_aspect_ratio = false; | |||||
| bool const_shape_ = false; | |||||
| private: | private: | ||||
| int CheckParameters(); | int CheckParameters(); | ||||
| @@ -39,12 +39,12 @@ class SplitBaseCPUKernel : public LiteKernel { | |||||
| int Run() override { return 0; } | int Run() override { return 0; } | ||||
| protected: | protected: | ||||
| const InnerContext *ctx_; | |||||
| int thread_count_; | |||||
| int thread_n_stride_; | |||||
| int thread_n_num_; | |||||
| int num_unit_; | |||||
| SplitParameter *param; | |||||
| const InnerContext *ctx_ = nullptr; | |||||
| int thread_count_ = 1; | |||||
| int thread_n_stride_ = 0; | |||||
| int thread_n_num_ = 0; | |||||
| int num_unit_ = 0; | |||||
| SplitParameter *param = nullptr; | |||||
| }; | }; | ||||
| } // namespace mindspore::kernel | } // namespace mindspore::kernel | ||||
| @@ -43,12 +43,12 @@ using mindspore::schema::PrimitiveType_Sub; | |||||
| namespace mindspore::kernel { | namespace mindspore::kernel { | ||||
| class ArithmeticCPUKernel : public LiteKernel { | class ArithmeticCPUKernel : public LiteKernel { | ||||
| typedef int (*ArithmeticRun)(float *input0, float *input1, float *output, int element_size); | |||||
| typedef int (*ArithmeticOptRun)(float *input0, float *input1, float *output, int element_size, | |||||
| ArithmeticParameter *param); | |||||
| typedef int (*ArithmeticIntRun)(int *input0, int *input1, int *output, int element_size); | |||||
| typedef int (*ArithmeticOptIntRun)(int *input0, int *input1, int *output, int element_size, | |||||
| ArithmeticParameter *param); | |||||
| typedef int (*ArithmeticRun)(const float *input0, const float *input1, float *output, const int element_size); | |||||
| typedef int (*ArithmeticOptRun)(const float *input0, const float *input1, float *output, const int element_size, | |||||
| const ArithmeticParameter *param); | |||||
| typedef int (*ArithmeticIntRun)(const int *input0, const int *input1, int *output, const int element_size); | |||||
| typedef int (*ArithmeticOptIntRun)(const int *input0, const int *input1, int *output, const int element_size, | |||||
| const ArithmeticParameter *param); | |||||
| public: | public: | ||||
| ArithmeticCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs, | ArithmeticCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs, | ||||
| @@ -40,8 +40,8 @@ class ConstantOfShapeCPUKernel : public LiteKernel { | |||||
| int DoExecute(int task_id); | int DoExecute(int task_id); | ||||
| private: | private: | ||||
| ConstantOfShapeParameter *param_; | |||||
| void *out_ptr_; | |||||
| ConstantOfShapeParameter *param_ = nullptr; | |||||
| void *out_ptr_ = nullptr; | |||||
| }; | }; | ||||
| } // namespace mindspore::kernel | } // namespace mindspore::kernel | ||||
| @@ -198,7 +198,12 @@ kernel::LiteKernel *CpuGroupConvFp32KernelCreator(const std::vector<lite::Tensor | |||||
| auto conv_param = reinterpret_cast<ConvParameter *>(op_parameter); | auto conv_param = reinterpret_cast<ConvParameter *>(op_parameter); | ||||
| int out_channel = inputs.at(kWeightIndex)->Batch(); | int out_channel = inputs.at(kWeightIndex)->Batch(); | ||||
| int new_in_channel = inputs.at(kWeightIndex)->Channel(); | int new_in_channel = inputs.at(kWeightIndex)->Channel(); | ||||
| int new_out_channel = out_channel / group; | |||||
| int new_out_channel = 0; | |||||
| if (group == 0) { | |||||
| MS_LOG(ERROR) << "Divisor 'group' cannot be 0."; | |||||
| } else { | |||||
| new_out_channel = out_channel / group; | |||||
| } | |||||
| int kernel_h = conv_param->kernel_h_; | int kernel_h = conv_param->kernel_h_; | ||||
| int kernel_w = conv_param->kernel_w_; | int kernel_w = conv_param->kernel_w_; | ||||
| int input_num = inputs.size(); | int input_num = inputs.size(); | ||||
| @@ -52,11 +52,11 @@ class DeConvolutionCPUKernel : public ConvolutionBaseCPUKernel { | |||||
| private: | private: | ||||
| MatMulParameter *matmul_param_ = nullptr; | MatMulParameter *matmul_param_ = nullptr; | ||||
| int input_plane_; | |||||
| int kernel_plane_; | |||||
| int output_plane_; | |||||
| int thread_count_; | |||||
| int thread_stride_; | |||||
| int input_plane_ = 0; | |||||
| int kernel_plane_ = 0; | |||||
| int output_plane_ = 0; | |||||
| int thread_count_ = 1; | |||||
| int thread_stride_ = 0; | |||||
| float *weight_ptr_ = nullptr; | float *weight_ptr_ = nullptr; | ||||
| float *pack_input_ = nullptr; | float *pack_input_ = nullptr; | ||||
| float *pack_output_ = nullptr; | float *pack_output_ = nullptr; | ||||
| @@ -58,15 +58,15 @@ class DeConvolutionWinogradCPUKernel : public ConvolutionBaseCPUKernel { | |||||
| void FreeRunBuf(); | void FreeRunBuf(); | ||||
| private: | private: | ||||
| DeConvParam *deconv_param_; | |||||
| DeConvParam *deconv_param_ = nullptr; | |||||
| float *nhwc_input_ = nullptr; | float *nhwc_input_ = nullptr; | ||||
| float *nhwc_output_ = nullptr; | float *nhwc_output_ = nullptr; | ||||
| float *nc4hw4_output_ = nullptr; | float *nc4hw4_output_ = nullptr; | ||||
| float *tile_input_ = nullptr; | float *tile_input_ = nullptr; | ||||
| float *tile_output_ = nullptr; | float *tile_output_ = nullptr; | ||||
| std::mutex lock_; | std::mutex lock_; | ||||
| int thread_num_hw_; | |||||
| int thread_stride_hw_; | |||||
| int thread_num_hw_ = 0; | |||||
| int thread_stride_hw_ = 0; | |||||
| }; | }; | ||||
| } // namespace mindspore::kernel | } // namespace mindspore::kernel | ||||
| #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_DECONVOLUTION_WINOGRAD_H_ | #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_DECONVOLUTION_WINOGRAD_H_ | ||||
| @@ -36,13 +36,13 @@ class EluCPUKernel : public LiteKernel { | |||||
| int DoExcute(int task_id); | int DoExcute(int task_id); | ||||
| protected: | protected: | ||||
| const lite::InnerContext *ctx_; | |||||
| int thread_count_; | |||||
| EluParameter *elu_parameter_; | |||||
| const lite::InnerContext *ctx_ = nullptr; | |||||
| int thread_count_ = 1; | |||||
| EluParameter *elu_parameter_ = nullptr; | |||||
| private: | private: | ||||
| float *input_addr; | |||||
| float *output_addr; | |||||
| float *input_addr = nullptr; | |||||
| float *output_addr = nullptr; | |||||
| }; | }; | ||||
| } // namespace mindspore::kernel | } // namespace mindspore::kernel | ||||
| @@ -43,14 +43,14 @@ class EmbeddingLookupCPUKernel : public LiteKernel { | |||||
| int DoExcute(int task_id); | int DoExcute(int task_id); | ||||
| protected: | protected: | ||||
| const lite::InnerContext *ctx_; | |||||
| int thread_count_; | |||||
| EmbeddingLookupParameter *embedding_lookup_parameter_; | |||||
| const lite::InnerContext *ctx_ = nullptr; | |||||
| int thread_count_ = 1; | |||||
| EmbeddingLookupParameter *embedding_lookup_parameter_ = nullptr; | |||||
| private: | private: | ||||
| float *input_addr_; | |||||
| float *output_addr_; | |||||
| int *ids_addr_; | |||||
| float *input_addr_ = nullptr; | |||||
| float *output_addr_ = nullptr; | |||||
| int *ids_addr_ = nullptr; | |||||
| }; | }; | ||||
| } // namespace mindspore::kernel | } // namespace mindspore::kernel | ||||
| @@ -36,13 +36,13 @@ class ExpCPUKernel : public LiteKernel { | |||||
| int DoExcute(int task_id); | int DoExcute(int task_id); | ||||
| protected: | protected: | ||||
| const lite::InnerContext *ctx_; | |||||
| int thread_count_; | |||||
| ExpParameter *exp_parameter_; | |||||
| const lite::InnerContext *ctx_ = nullptr; | |||||
| int thread_count_ = 1; | |||||
| ExpParameter *exp_parameter_ = nullptr; | |||||
| private: | private: | ||||
| float *input_addr_; | |||||
| float *output_addr_; | |||||
| float *input_addr_ = nullptr; | |||||
| float *output_addr_ = nullptr; | |||||
| }; | }; | ||||
| } // namespace mindspore::kernel | } // namespace mindspore::kernel | ||||
| @@ -64,6 +64,10 @@ int FullconnectionCPUKernel::ReSize() { | |||||
| if (in_tensors_.size() == 3) { | if (in_tensors_.size() == 3) { | ||||
| int col_tmp = is_vector_input_ ? fc_param_->col_ : fc_param_->col_8_; | int col_tmp = is_vector_input_ ? fc_param_->col_ : fc_param_->col_8_; | ||||
| bias_ptr_ = reinterpret_cast<float *>(malloc(col_tmp * sizeof(float))); | bias_ptr_ = reinterpret_cast<float *>(malloc(col_tmp * sizeof(float))); | ||||
| if (bias_ptr_ == nullptr) { | |||||
| MS_LOG(ERROR) << "malloc bias_ptr_ failed"; | |||||
| return RET_ERROR; | |||||
| } | |||||
| memcpy(bias_ptr_, in_tensors_[2]->MutableData(), fc_param_->col_ * sizeof(float)); | memcpy(bias_ptr_, in_tensors_[2]->MutableData(), fc_param_->col_ * sizeof(float)); | ||||
| } | } | ||||