Merge pull request !7928 from liuwenhao/mastertags/v1.1.0
| @@ -22,9 +22,9 @@ | |||
| #include "nnacl/arithmetic_common.h" | |||
| #include "nnacl/fp32/arithmetic.h" | |||
| typedef int (*ArithmeticRun)(float *input0, float *input1, float *output, int element_size); | |||
| typedef int (*ArithmeticOptRun)(float *input0, float *input1, float *output, int element_size, | |||
| ArithmeticParameter *param); | |||
| typedef int (*ArithmeticRun)(const float *input0, const float *input1, float *output, const int element_size); | |||
| typedef int (*ArithmeticOptRun)(const float *input0, const float *input1, float *output, const int element_size, | |||
| const ArithmeticParameter *param); | |||
| int BroadcastRun(float *input0, float *input1, float *output, int dim, int out_count, int break_pos, | |||
| ArithmeticRun arithmetic_run, ArithmeticParameter *params) { | |||
| @@ -52,7 +52,7 @@ void TileOneDimensionUint8(uint8_t *inData, uint8_t *outData, int dim, size_t nd | |||
| } | |||
| } | |||
| void ComputeStrides(int *shape, int *strides, int ndim) { | |||
| void ComputeStrides(const int *shape, int *strides, const int ndim) { | |||
| int stride = 1; | |||
| for (int i = ndim - 1; i >= 0; i--) { | |||
| strides[i] = stride; | |||
| @@ -49,7 +49,7 @@ extern "C" { | |||
| #endif | |||
| void TileOneDimension(float *inData, float *outData, int dim, size_t ndim, int *inShape, int *inStrides, | |||
| int *outStrides, int *multiple); | |||
| void ComputeStrides(int *shape, int *strides, int ndim); | |||
| void ComputeStrides(const int *shape, int *strides, const int ndim); | |||
| void CalcMultiplesAndStrides(ArithmeticParameter *param); | |||
| @@ -16,7 +16,7 @@ | |||
| #include "nnacl/depth_to_space.h" | |||
| #include <string.h> | |||
| void DepthToSpaceForNHWC(const void *input, void *output, int *in_shape, DepthToSpaceParameter *param) { | |||
| void DepthToSpaceForNHWC(const void *input, void *output, const int *in_shape, const DepthToSpaceParameter *param) { | |||
| int32_t block_size = param->block_size_; | |||
| int32_t in_shape_dim2 = in_shape[2]; | |||
| int32_t in_shape_dim1 = in_shape[1]; | |||
| @@ -20,7 +20,7 @@ | |||
| #ifdef __cplusplus | |||
| extern "C" { | |||
| #endif | |||
| void DepthToSpaceForNHWC(const void *input, void *output, int *in_shape, DepthToSpaceParameter *param); | |||
| void DepthToSpaceForNHWC(const void *input, void *output, const int *in_shape, const DepthToSpaceParameter *param); | |||
| #ifdef __cplusplus | |||
| } | |||
| #endif | |||
| @@ -19,7 +19,8 @@ | |||
| #define ACCURACY_DATA 0.00000001 | |||
| int ElementOptMul(float *input0, float *input1, float *output, int element_size, ArithmeticParameter *param) { | |||
| int ElementOptMul(const float *input0, const float *input1, float *output, const int element_size, | |||
| const ArithmeticParameter *param) { | |||
| #ifdef ENABLE_NEON | |||
| float32x4_t vin0_opt = vdupq_n_f32(input0[0]); | |||
| float32x4_t vin1_opt = vdupq_n_f32(input1[0]); | |||
| @@ -51,7 +52,8 @@ int ElementOptMul(float *input0, float *input1, float *output, int element_size, | |||
| return NNACL_OK; | |||
| } | |||
| int ElementOptMulRelu(float *input0, float *input1, float *output, int element_size, ArithmeticParameter *param) { | |||
| int ElementOptMulRelu(const float *input0, const float *input1, float *output, const int element_size, | |||
| const ArithmeticParameter *param) { | |||
| #ifdef ENABLE_NEON | |||
| float32x4_t vin0_opt = vdupq_n_f32(input0[0]); | |||
| float32x4_t vin1_opt = vdupq_n_f32(input1[0]); | |||
| @@ -84,7 +86,8 @@ int ElementOptMulRelu(float *input0, float *input1, float *output, int element_s | |||
| return NNACL_OK; | |||
| } | |||
| int ElementOptMulRelu6(float *input0, float *input1, float *output, int element_size, ArithmeticParameter *param) { | |||
| int ElementOptMulRelu6(const float *input0, const float *input1, float *output, const int element_size, | |||
| const ArithmeticParameter *param) { | |||
| #ifdef ENABLE_NEON | |||
| float32x4_t vin0_opt = vdupq_n_f32(input0[0]); | |||
| float32x4_t vin1_opt = vdupq_n_f32(input1[0]); | |||
| @@ -118,7 +121,8 @@ int ElementOptMulRelu6(float *input0, float *input1, float *output, int element_ | |||
| return NNACL_OK; | |||
| } | |||
| int ElementOptMulInt(int *input0, int *input1, int *output, int element_size, ArithmeticParameter *param) { | |||
| int ElementOptMulInt(const int *input0, const int *input1, int *output, const int element_size, | |||
| const ArithmeticParameter *param) { | |||
| #ifdef ENABLE_NEON | |||
| int32x4_t vin0_opt = vdupq_n_s32(input0[0]); | |||
| int32x4_t vin1_opt = vdupq_n_s32(input1[0]); | |||
| @@ -150,7 +154,8 @@ int ElementOptMulInt(int *input0, int *input1, int *output, int element_size, Ar | |||
| return NNACL_OK; | |||
| } | |||
| int ElementOptMulReluInt(int *input0, int *input1, int *output, int element_size, ArithmeticParameter *param) { | |||
| int ElementOptMulReluInt(const int *input0, const int *input1, int *output, const int element_size, | |||
| const ArithmeticParameter *param) { | |||
| #ifdef ENABLE_NEON | |||
| int32x4_t vin0_opt = vdupq_n_s32(input0[0]); | |||
| int32x4_t vin1_opt = vdupq_n_s32(input1[0]); | |||
| @@ -183,7 +188,8 @@ int ElementOptMulReluInt(int *input0, int *input1, int *output, int element_size | |||
| return NNACL_OK; | |||
| } | |||
| int ElementOptMulRelu6Int(int *input0, int *input1, int *output, int element_size, ArithmeticParameter *param) { | |||
| int ElementOptMulRelu6Int(const int *input0, const int *input1, int *output, const int element_size, | |||
| const ArithmeticParameter *param) { | |||
| #ifdef ENABLE_NEON | |||
| int32x4_t vin0_opt = vdupq_n_s32(input0[0]); | |||
| int32x4_t vin1_opt = vdupq_n_s32(input1[0]); | |||
| @@ -217,7 +223,8 @@ int ElementOptMulRelu6Int(int *input0, int *input1, int *output, int element_siz | |||
| return NNACL_OK; | |||
| } | |||
| int ElementOptSub(float *input0, float *input1, float *output, int element_size, ArithmeticParameter *param) { | |||
| int ElementOptSub(const float *input0, const float *input1, float *output, const int element_size, | |||
| const ArithmeticParameter *param) { | |||
| #ifdef ENABLE_NEON | |||
| float32x4_t vin0_opt = vdupq_n_f32(input0[0]); | |||
| float32x4_t vin1_opt = vdupq_n_f32(input1[0]); | |||
| @@ -249,7 +256,8 @@ int ElementOptSub(float *input0, float *input1, float *output, int element_size, | |||
| return NNACL_OK; | |||
| } | |||
| int ElementOptSubRelu(float *input0, float *input1, float *output, int element_size, ArithmeticParameter *param) { | |||
| int ElementOptSubRelu(const float *input0, const float *input1, float *output, const int element_size, | |||
| const ArithmeticParameter *param) { | |||
| #ifdef ENABLE_NEON | |||
| float32x4_t vin0_opt = vdupq_n_f32(input0[0]); | |||
| float32x4_t vin1_opt = vdupq_n_f32(input1[0]); | |||
| @@ -282,7 +290,8 @@ int ElementOptSubRelu(float *input0, float *input1, float *output, int element_s | |||
| return NNACL_OK; | |||
| } | |||
| int ElementOptSubRelu6(float *input0, float *input1, float *output, int element_size, ArithmeticParameter *param) { | |||
| int ElementOptSubRelu6(const float *input0, const float *input1, float *output, const int element_size, | |||
| const ArithmeticParameter *param) { | |||
| #ifdef ENABLE_NEON | |||
| float32x4_t vin0_opt = vdupq_n_f32(input0[0]); | |||
| float32x4_t vin1_opt = vdupq_n_f32(input1[0]); | |||
| @@ -316,7 +325,8 @@ int ElementOptSubRelu6(float *input0, float *input1, float *output, int element_ | |||
| return NNACL_OK; | |||
| } | |||
| int ElementOptAdd(float *input0, float *input1, float *output, int element_size, ArithmeticParameter *param) { | |||
| int ElementOptAdd(const float *input0, const float *input1, float *output, const int element_size, | |||
| const ArithmeticParameter *param) { | |||
| #ifdef ENABLE_NEON | |||
| float32x4_t vin0_opt = vdupq_n_f32(input0[0]); | |||
| float32x4_t vin1_opt = vdupq_n_f32(input1[0]); | |||
| @@ -348,7 +358,8 @@ int ElementOptAdd(float *input0, float *input1, float *output, int element_size, | |||
| return NNACL_OK; | |||
| } | |||
| int ElementOptAddInt(int *input0, int *input1, int *output, int element_size, ArithmeticParameter *param) { | |||
| int ElementOptAddInt(const int *input0, const int *input1, int *output, const int element_size, | |||
| const ArithmeticParameter *param) { | |||
| #ifdef ENABLE_NEON | |||
| int32x4_t vin0_opt = vdupq_n_s32(input0[0]); | |||
| int32x4_t vin1_opt = vdupq_n_s32(input1[0]); | |||
| @@ -380,7 +391,8 @@ int ElementOptAddInt(int *input0, int *input1, int *output, int element_size, Ar | |||
| return NNACL_OK; | |||
| } | |||
| int ElementOptAddRelu(float *input0, float *input1, float *output, int element_size, ArithmeticParameter *param) { | |||
| int ElementOptAddRelu(const float *input0, const float *input1, float *output, const int element_size, | |||
| const ArithmeticParameter *param) { | |||
| #ifdef ENABLE_NEON | |||
| float32x4_t vin0_opt = vdupq_n_f32(input0[0]); | |||
| float32x4_t vin1_opt = vdupq_n_f32(input1[0]); | |||
| @@ -413,7 +425,8 @@ int ElementOptAddRelu(float *input0, float *input1, float *output, int element_s | |||
| return NNACL_OK; | |||
| } | |||
| int ElementOptAddRelu6(float *input0, float *input1, float *output, int element_size, ArithmeticParameter *param) { | |||
| int ElementOptAddRelu6(const float *input0, const float *input1, float *output, const int element_size, | |||
| const ArithmeticParameter *param) { | |||
| #ifdef ENABLE_NEON | |||
| float32x4_t vin0_opt = vdupq_n_f32(input0[0]); | |||
| float32x4_t vin1_opt = vdupq_n_f32(input1[0]); | |||
| @@ -448,7 +461,8 @@ int ElementOptAddRelu6(float *input0, float *input1, float *output, int element_ | |||
| return NNACL_OK; | |||
| } | |||
| int ElementOptDiv(float *input0, float *input1, float *output, int element_size, ArithmeticParameter *param) { | |||
| int ElementOptDiv(const float *input0, const float *input1, float *output, const int element_size, | |||
| const ArithmeticParameter *param) { | |||
| if (param->in_elements_num0_ == 1) { | |||
| for (int index = 0; index < element_size; index++) { | |||
| output[index] = input0[0] / input1[index]; | |||
| @@ -464,7 +478,8 @@ int ElementOptDiv(float *input0, float *input1, float *output, int element_size, | |||
| return NNACL_OK; | |||
| } | |||
| int ElementOptDivRelu(float *input0, float *input1, float *output, int element_size, ArithmeticParameter *param) { | |||
| int ElementOptDivRelu(const float *input0, const float *input1, float *output, const int element_size, | |||
| const ArithmeticParameter *param) { | |||
| if (param->in_elements_num0_ == 1) { | |||
| for (int index = 0; index < element_size; index++) { | |||
| output[index] = input0[0] / input1[index]; | |||
| @@ -479,7 +494,8 @@ int ElementOptDivRelu(float *input0, float *input1, float *output, int element_s | |||
| return NNACL_OK; | |||
| } | |||
| int ElementOptDivRelu6(float *input0, float *input1, float *output, int element_size, ArithmeticParameter *param) { | |||
| int ElementOptDivRelu6(const float *input0, const float *input1, float *output, const int element_size, | |||
| const ArithmeticParameter *param) { | |||
| if (param->in_elements_num0_ == 1) { | |||
| for (int index = 0; index < element_size; index++) { | |||
| output[index] = MSMIN(MSMAX(input0[0] / input1[index], 0), 6); | |||
| @@ -492,7 +508,7 @@ int ElementOptDivRelu6(float *input0, float *input1, float *output, int element_ | |||
| return NNACL_OK; | |||
| } | |||
| int ElementMul(float *input0, float *input1, float *output, int element_size) { | |||
| int ElementMul(const float *input0, const float *input1, float *output, const int element_size) { | |||
| int index = 0; | |||
| #ifdef ENABLE_NEON | |||
| for (; index <= element_size - 4; index += C4NUM) { | |||
| @@ -508,7 +524,7 @@ int ElementMul(float *input0, float *input1, float *output, int element_size) { | |||
| return NNACL_OK; | |||
| } | |||
| int ElementMulRelu(float *input0, float *input1, float *output, int element_size) { | |||
| int ElementMulRelu(const float *input0, const float *input1, float *output, const int element_size) { | |||
| int index = 0; | |||
| #ifdef ENABLE_NEON | |||
| float32x4_t zeros = vdupq_n_f32(0.0f); | |||
| @@ -527,7 +543,7 @@ int ElementMulRelu(float *input0, float *input1, float *output, int element_size | |||
| return NNACL_OK; | |||
| } | |||
| int ElementMulRelu6(float *input0, float *input1, float *output, int element_size) { | |||
| int ElementMulRelu6(const float *input0, const float *input1, float *output, const int element_size) { | |||
| int index = 0; | |||
| #ifdef ENABLE_NEON | |||
| float32x4_t zeros = vdupq_n_f32(0.0f); | |||
| @@ -545,7 +561,7 @@ int ElementMulRelu6(float *input0, float *input1, float *output, int element_siz | |||
| return NNACL_OK; | |||
| } | |||
| int ElementMulInt(int *input0, int *input1, int *output, int element_size) { | |||
| int ElementMulInt(const int *input0, const int *input1, int *output, const int element_size) { | |||
| int index = 0; | |||
| #ifdef ENABLE_NEON | |||
| for (; index <= element_size - 4; index += C4NUM) { | |||
| @@ -561,7 +577,7 @@ int ElementMulInt(int *input0, int *input1, int *output, int element_size) { | |||
| return NNACL_OK; | |||
| } | |||
| int ElementMulReluInt(int *input0, int *input1, int *output, int element_size) { | |||
| int ElementMulReluInt(const int *input0, const int *input1, int *output, const int element_size) { | |||
| int index = 0; | |||
| #ifdef ENABLE_NEON | |||
| int32x4_t zeros = vdupq_n_s32(0); | |||
| @@ -580,7 +596,7 @@ int ElementMulReluInt(int *input0, int *input1, int *output, int element_size) { | |||
| return NNACL_OK; | |||
| } | |||
| int ElementMulRelu6Int(int *input0, int *input1, int *output, int element_size) { | |||
| int ElementMulRelu6Int(const int *input0, const int *input1, int *output, const int element_size) { | |||
| int index = 0; | |||
| #ifdef ENABLE_NEON | |||
| int32x4_t zeros = vdupq_n_s32(0); | |||
| @@ -604,7 +620,7 @@ int BroadcastMul(float *input0, float *input1, float *tile_input0, float *tile_i | |||
| return ElementMul(tile_input0, tile_input1, output, element_size); | |||
| } | |||
| int ElementAdd(float *input0, float *input1, float *output, int element_size) { | |||
| int ElementAdd(const float *input0, const float *input1, float *output, const int element_size) { | |||
| int index = 0; | |||
| #ifdef ENABLE_NEON | |||
| for (; index <= element_size - 4; index += C4NUM) { | |||
| @@ -620,7 +636,7 @@ int ElementAdd(float *input0, float *input1, float *output, int element_size) { | |||
| return NNACL_OK; | |||
| } | |||
| int ElementAddRelu(float *input0, float *input1, float *output, int element_size) { | |||
| int ElementAddRelu(const float *input0, const float *input1, float *output, const int element_size) { | |||
| int index = 0; | |||
| #ifdef ENABLE_NEON | |||
| float32x4_t zeros = vdupq_n_f32(0.0f); | |||
| @@ -639,7 +655,7 @@ int ElementAddRelu(float *input0, float *input1, float *output, int element_size | |||
| return NNACL_OK; | |||
| } | |||
| int ElementAddRelu6(float *input0, float *input1, float *output, int element_size) { | |||
| int ElementAddRelu6(const float *input0, const float *input1, float *output, const int element_size) { | |||
| int index = 0; | |||
| #ifdef ENABLE_NEON | |||
| float32x4_t zeros = vdupq_n_f32(0.0f); | |||
| @@ -657,7 +673,7 @@ int ElementAddRelu6(float *input0, float *input1, float *output, int element_siz | |||
| return NNACL_OK; | |||
| } | |||
| int ElementAddInt(int *input0, int *input1, int *output, int element_size) { | |||
| int ElementAddInt(const int *input0, const int *input1, int *output, const int element_size) { | |||
| int index = 0; | |||
| #ifdef ENABLE_NEON | |||
| for (; index <= element_size - 4; index += C4NUM) { | |||
| @@ -692,7 +708,7 @@ int BroadcastAddInt8(int8_t *input0, int8_t *input1, int8_t *tile_input0, int8_t | |||
| return ElementAddInt8(tile_input0, tile_input1, output, element_size); | |||
| } | |||
| int ElementSub(float *input0, float *input1, float *output, int element_size) { | |||
| int ElementSub(const float *input0, const float *input1, float *output, const int element_size) { | |||
| int index = 0; | |||
| #ifdef ENABLE_NEON | |||
| for (; index <= element_size - 4; index += C4NUM) { | |||
| @@ -708,7 +724,7 @@ int ElementSub(float *input0, float *input1, float *output, int element_size) { | |||
| return NNACL_OK; | |||
| } | |||
| int ElementSubRelu(float *input0, float *input1, float *output, int element_size) { | |||
| int ElementSubRelu(const float *input0, const float *input1, float *output, const int element_size) { | |||
| int index = 0; | |||
| #ifdef ENABLE_NEON | |||
| float32x4_t zeros = vdupq_n_f32(0.0f); | |||
| @@ -727,7 +743,7 @@ int ElementSubRelu(float *input0, float *input1, float *output, int element_size | |||
| return NNACL_OK; | |||
| } | |||
| int ElementSubRelu6(float *input0, float *input1, float *output, int element_size) { | |||
| int ElementSubRelu6(const float *input0, const float *input1, float *output, const int element_size) { | |||
| int index = 0; | |||
| #ifdef ENABLE_NEON | |||
| float32x4_t zeros = vdupq_n_f32(0.0f); | |||
| @@ -752,14 +768,14 @@ int BroadcastSub(float *input0, float *input1, float *tile_input0, float *tile_i | |||
| return ElementSub(tile_input0, tile_input1, output, element_size); | |||
| } | |||
| int ElementDiv(float *input0, float *input1, float *output, int element_size) { | |||
| int ElementDiv(const float *input0, const float *input1, float *output, const int element_size) { | |||
| for (int i = 0; i < element_size; i++) { | |||
| output[i] = input0[i] / input1[i]; | |||
| } | |||
| return NNACL_OK; | |||
| } | |||
| int ElementDivRelu(float *input0, float *input1, float *output, int element_size) { | |||
| int ElementDivRelu(const float *input0, const float *input1, float *output, const int element_size) { | |||
| for (int i = 0; i < element_size; i++) { | |||
| float res = input0[i] / input1[i]; | |||
| output[i] = res > 0 ? res : 0; | |||
| @@ -767,7 +783,7 @@ int ElementDivRelu(float *input0, float *input1, float *output, int element_size | |||
| return NNACL_OK; | |||
| } | |||
| int ElementDivRelu6(float *input0, float *input1, float *output, int element_size) { | |||
| int ElementDivRelu6(const float *input0, const float *input1, float *output, const int element_size) { | |||
| for (int i = 0; i < element_size; i++) { | |||
| output[i] = MSMIN(MSMAX(input0[i] / input1[i], 0), 6); | |||
| } | |||
| @@ -780,14 +796,14 @@ int BroadcastDiv(float *input0, float *input1, float *tile_input0, float *tile_i | |||
| return ElementDiv(tile_input0, tile_input1, output, element_size); | |||
| } | |||
| int ElementFloorMod(float *input0, float *input1, float *output, int element_size) { | |||
| int ElementFloorMod(const float *input0, const float *input1, float *output, const int element_size) { | |||
| for (int i = 0; i < element_size; i++) { | |||
| output[i] = input0[i] - floorf(input0[i] / input1[i]) * input1[i]; | |||
| } | |||
| return NNACL_OK; | |||
| } | |||
| int ElementFloorModInt(int *input0, int *input1, int *output, int element_size) { | |||
| int ElementFloorModInt(const int *input0, const int *input1, int *output, const int element_size) { | |||
| for (int i = 0; i < element_size; i++) { | |||
| output[i] = input0[i] - (input0[i] / input1[i]) * input1[i]; | |||
| } | |||
| @@ -800,14 +816,14 @@ int BroadcastFloorMod(float *input0, float *input1, float *tile_input0, float *t | |||
| return ElementFloorMod(tile_input0, tile_input1, output, element_size); | |||
| } | |||
| int ElementFloorDiv(float *input0, float *input1, float *output, int element_size) { | |||
| int ElementFloorDiv(const float *input0, const float *input1, float *output, const int element_size) { | |||
| for (int i = 0; i < element_size; i++) { | |||
| output[i] = floorf(input0[i] / input1[i]); | |||
| } | |||
| return NNACL_OK; | |||
| } | |||
| int ElementFloorDivInt(int *input0, int *input1, int *output, int element_size) { | |||
| int ElementFloorDivInt(const int *input0, const int *input1, int *output, const int element_size) { | |||
| for (int i = 0; i < element_size; i++) { | |||
| output[i] = input0[i] / input1[i]; | |||
| } | |||
| @@ -820,7 +836,7 @@ int BroadcastFloorDiv(float *input0, float *input1, float *tile_input0, float *t | |||
| return ElementFloorDiv(tile_input0, tile_input1, output, element_size); | |||
| } | |||
| int ElementLogicalAnd(float *input0, float *input1, float *output, int element_size) { | |||
| int ElementLogicalAnd(const float *input0, const float *input1, float *output, const int element_size) { | |||
| int index = 0; | |||
| #ifdef ENABLE_NEON | |||
| float32x4_t vtrue = vdupq_n_f32(1); | |||
| @@ -840,7 +856,7 @@ int ElementLogicalAnd(float *input0, float *input1, float *output, int element_s | |||
| return NNACL_OK; | |||
| } | |||
| int ElementSquaredDifference(float *input0, float *input1, float *output, int element_size) { | |||
| int ElementSquaredDifference(const float *input0, const float *input1, float *output, const int element_size) { | |||
| ElementSub(input0, input1, output, element_size); | |||
| return ElementMul(output, output, output, element_size); | |||
| } | |||
| @@ -857,7 +873,7 @@ int BroadcastLogicalAnd(float *input0, float *input1, float *tile_input0, float | |||
| return ElementLogicalAnd(tile_input0, tile_input1, output, element_size); | |||
| } | |||
| int ElementLogicalOr(float *input0, float *input1, float *output, int element_size) { | |||
| int ElementLogicalOr(const float *input0, const float *input1, float *output, const int element_size) { | |||
| int index = 0; | |||
| #ifdef ENABLE_NEON | |||
| float32x4_t vtrue = vdupq_n_f32(1); | |||
| @@ -883,7 +899,7 @@ int BroadcastLogicalOr(float *input0, float *input1, float *tile_input0, float * | |||
| return ElementLogicalOr(tile_input0, tile_input1, output, element_size); | |||
| } | |||
| int ElementMaximum(float *input0, float *input1, float *output, int element_size) { | |||
| int ElementMaximum(const float *input0, const float *input1, float *output, const int element_size) { | |||
| int index = 0; | |||
| #ifdef ENABLE_NEON | |||
| for (; index <= element_size - 4; index += C4NUM) { | |||
| @@ -905,7 +921,7 @@ int BroadcastMaximum(float *input0, float *input1, float *tile_input0, float *ti | |||
| return ElementMaximum(tile_input0, tile_input1, output, element_size); | |||
| } | |||
| int ElementMinimum(float *input0, float *input1, float *output, int element_size) { | |||
| int ElementMinimum(const float *input0, const float *input1, float *output, const int element_size) { | |||
| int index = 0; | |||
| #ifdef ENABLE_NEON | |||
| for (; index <= element_size - 4; index += C4NUM) { | |||
| @@ -935,7 +951,7 @@ float FloatNotEqualCheck(float in0, float in1) { | |||
| return (float)true; | |||
| } | |||
| int ElementNotEqual(float *input0, float *input1, float *output, int element_size) { | |||
| int ElementNotEqual(const float *input0, const float *input1, float *output, const int element_size) { | |||
| int index = 0; | |||
| #ifdef ENABLE_NEON | |||
| float32x4_t vtrue = vdupq_n_f32(1); | |||
| @@ -967,7 +983,7 @@ float FloatEqualCheck(float in0, float in1) { | |||
| return (float)false; | |||
| } | |||
| int ElementEqual(float *input0, float *input1, float *output, int element_size) { | |||
| int ElementEqual(const float *input0, const float *input1, float *output, const int element_size) { | |||
| int index = 0; | |||
| #ifdef ENABLE_NEON | |||
| float32x4_t vtrue = vdupq_n_f32(1); | |||
| @@ -991,7 +1007,7 @@ int BroadcastEqual(float *input0, float *input1, float *tile_input0, float *tile | |||
| return ElementEqual(tile_input0, tile_input1, output, element_size); | |||
| } | |||
| int ElementLess(float *input0, float *input1, float *output, int element_size) { | |||
| int ElementLess(const float *input0, const float *input1, float *output, const int element_size) { | |||
| int index = 0; | |||
| #ifdef ENABLE_NEON | |||
| float32x4_t vtrue = vdupq_n_f32(1); | |||
| @@ -1015,7 +1031,7 @@ int BroadcastLess(float *input0, float *input1, float *tile_input0, float *tile_ | |||
| return ElementLess(tile_input0, tile_input1, output, element_size); | |||
| } | |||
| int ElementLessEqual(float *input0, float *input1, float *output, int element_size) { | |||
| int ElementLessEqual(const float *input0, const float *input1, float *output, const int element_size) { | |||
| int index = 0; | |||
| #ifdef ENABLE_NEON | |||
| float32x4_t vtrue = vdupq_n_f32(1); | |||
| @@ -1039,7 +1055,7 @@ int BroadcastLessEqual(float *input0, float *input1, float *tile_input0, float * | |||
| return ElementLessEqual(tile_input0, tile_input1, output, element_size); | |||
| } | |||
| int ElementGreater(float *input0, float *input1, float *output, int element_size) { | |||
| int ElementGreater(const float *input0, const float *input1, float *output, const int element_size) { | |||
| int index = 0; | |||
| #ifdef ENABLE_NEON | |||
| float32x4_t vtrue = vdupq_n_f32(1); | |||
| @@ -1063,7 +1079,7 @@ int BroadcastGreater(float *input0, float *input1, float *tile_input0, float *ti | |||
| return ElementGreater(tile_input0, tile_input1, output, element_size); | |||
| } | |||
| int ElementGreaterEqual(float *input0, float *input1, float *output, int element_size) { | |||
| int ElementGreaterEqual(const float *input0, const float *input1, float *output, const int element_size) { | |||
| int index = 0; | |||
| #ifdef ENABLE_NEON | |||
| float32x4_t vtrue = vdupq_n_f32(1); | |||
| @@ -26,105 +26,121 @@ | |||
| #ifdef __cplusplus | |||
| extern "C" { | |||
| #endif | |||
| int ElementOptAdd(float *input0, float *input1, float *output, int element_size, ArithmeticParameter *param); | |||
| int ElementOptAddInt(int *input0, int *input1, int *output, int element_size, ArithmeticParameter *param); | |||
| int ElementOptAddRelu(float *input0, float *input1, float *output, int element_size, ArithmeticParameter *param); | |||
| int ElementOptAddRelu6(float *input0, float *input1, float *output, int element_size, ArithmeticParameter *param); | |||
| int ElementOptSub(float *input0, float *input1, float *output, int element_size, ArithmeticParameter *param); | |||
| int ElementOptSubRelu(float *input0, float *input1, float *output, int element_size, ArithmeticParameter *param); | |||
| int ElementOptSubRelu6(float *input0, float *input1, float *output, int element_size, ArithmeticParameter *param); | |||
| int ElementOptMul(float *input0, float *input1, float *output, int element_size, ArithmeticParameter *param); | |||
| int ElementOptMulRelu(float *input0, float *input1, float *output, int element_size, ArithmeticParameter *param); | |||
| int ElementOptMulRelu6(float *input0, float *input1, float *output, int element_size, ArithmeticParameter *param); | |||
| int ElementOptMulInt(int *input0, int *input1, int *output, int element_size, ArithmeticParameter *param); | |||
| int ElementOptMulReluInt(int *input0, int *input1, int *output, int element_size, ArithmeticParameter *param); | |||
| int ElementOptMulRelu6Int(int *input0, int *input1, int *output, int element_size, ArithmeticParameter *param); | |||
| int ElementOptDiv(float *input0, float *input1, float *output, int element_size, ArithmeticParameter *param); | |||
| int ElementOptDivRelu(float *input0, float *input1, float *output, int element_size, ArithmeticParameter *param); | |||
| int ElementOptDivRelu6(float *input0, float *input1, float *output, int element_size, ArithmeticParameter *param); | |||
| int ElementMul(float *input0, float *input1, float *output, int element_size); | |||
| int ElementMulRelu(float *input0, float *input1, float *output, int element_size); | |||
| int ElementMulRelu6(float *input0, float *input1, float *output, int element_size); | |||
| int ElementMulInt(int *input0, int *input1, int *output, int element_size); | |||
| int ElementMulReluInt(int *input0, int *input1, int *output, int element_size); | |||
| int ElementMulRelu6Int(int *input0, int *input1, int *output, int element_size); | |||
| int ElementOptAdd(const float *input0, const float *input1, float *output, const int element_size, | |||
| const ArithmeticParameter *param); | |||
| int ElementOptAddInt(const int *input0, const int *input1, int *output, const int element_size, | |||
| const ArithmeticParameter *param); | |||
| int ElementOptAddRelu(const float *input0, const float *input1, float *output, const int element_size, | |||
| const ArithmeticParameter *param); | |||
| int ElementOptAddRelu6(const float *input0, const float *input1, float *output, const int element_size, | |||
| const ArithmeticParameter *param); | |||
| int ElementOptSub(const float *input0, const float *input1, float *output, const int element_size, | |||
| const ArithmeticParameter *param); | |||
| int ElementOptSubRelu(const float *input0, const float *input1, float *output, const int element_size, | |||
| const ArithmeticParameter *param); | |||
| int ElementOptSubRelu6(const float *input0, const float *input1, float *output, const int element_size, | |||
| const ArithmeticParameter *param); | |||
| int ElementOptMul(const float *input0, const float *input1, float *output, const int element_size, | |||
| const ArithmeticParameter *param); | |||
| int ElementOptMulRelu(const float *input0, const float *input1, float *output, const int element_size, | |||
| const ArithmeticParameter *param); | |||
| int ElementOptMulRelu6(const float *input0, const float *input1, float *output, const int element_size, | |||
| const ArithmeticParameter *param); | |||
| int ElementOptMulInt(const int *input0, const int *input1, int *output, const int element_size, | |||
| const ArithmeticParameter *param); | |||
| int ElementOptMulReluInt(const int *input0, const int *input1, int *output, const int element_size, | |||
| const ArithmeticParameter *param); | |||
| int ElementOptMulRelu6Int(const int *input0, const int *input1, int *output, const int element_size, | |||
| const ArithmeticParameter *param); | |||
| int ElementOptDiv(const float *input0, const float *input1, float *output, const int element_size, | |||
| const ArithmeticParameter *param); | |||
| int ElementOptDivRelu(const float *input0, const float *input1, float *output, const int element_size, | |||
| const ArithmeticParameter *param); | |||
| int ElementOptDivRelu6(const float *input0, const float *input1, float *output, const int element_size, | |||
| const ArithmeticParameter *param); | |||
| int ElementMul(const float *input0, const float *input1, float *output, const int element_size); | |||
| int ElementMulRelu(const float *input0, const float *input1, float *output, const int element_size); | |||
| int ElementMulRelu6(const float *input0, const float *input1, float *output, const int element_size); | |||
| int ElementMulInt(const int *input0, const int *input1, int *output, const int element_size); | |||
| int ElementMulReluInt(const int *input0, const int *input1, int *output, const int element_size); | |||
| int ElementMulRelu6Int(const int *input0, const int *input1, int *output, const int element_size); | |||
| int BroadcastMul(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output, int element_size, | |||
| ArithmeticParameter *param); | |||
| int ElementAdd(float *input0, float *input1, float *output, int element_size); | |||
| int ElementAddRelu(float *input0, float *input1, float *output, int element_size); | |||
| int ElementAddRelu6(float *input0, float *input1, float *output, int element_size); | |||
| int ElementAddInt(int *input0, int *input1, int *output, int element_size); | |||
| int ElementAdd(const float *input0, const float *input1, float *output, const int element_size); | |||
| int ElementAddRelu(const float *input0, const float *input1, float *output, const int element_size); | |||
| int ElementAddRelu6(const float *input0, const float *input1, float *output, const int element_size); | |||
| int ElementAddInt(const int *input0, const int *input1, int *output, const int element_size); | |||
| int BroadcastAdd(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output, int element_size, | |||
| ArithmeticParameter *param); | |||
| int BroadcastAddInt8(int8_t *input0, int8_t *input1, int8_t *tile_input0, int8_t *tile_input1, int8_t *output, | |||
| int element_size, ArithmeticParameter *param); | |||
| int ElementSub(float *input0, float *input1, float *output, int element_size); | |||
| int ElementSubRelu(float *input0, float *input1, float *output, int element_size); | |||
| int ElementSubRelu6(float *input0, float *input1, float *output, int element_size); | |||
| int ElementSub(const float *input0, const float *input1, float *output, const int element_size); | |||
| int ElementSubRelu(const float *input0, const float *input1, float *output, const int element_size); | |||
| int ElementSubRelu6(const float *input0, const float *input1, float *output, const int element_size); | |||
| int BroadcastSub(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output, int element_size, | |||
| ArithmeticParameter *param); | |||
| int ElementDiv(float *input0, float *input1, float *output, int element_size); | |||
| int ElementDivRelu(float *input0, float *input1, float *output, int element_size); | |||
| int ElementDivRelu6(float *input0, float *input1, float *output, int element_size); | |||
| int ElementDiv(const float *input0, const float *input1, float *output, const int element_size); | |||
| int ElementDivRelu(const float *input0, const float *input1, float *output, const int element_size); | |||
| int ElementDivRelu6(const float *input0, const float *input1, float *output, const int element_size); | |||
| int BroadcastDiv(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output, int element_size, | |||
| ArithmeticParameter *param); | |||
| int ElementLogicalAnd(float *input0, float *input1, float *output, int element_size); | |||
| int ElementLogicalAnd(const float *input0, const float *input1, float *output, const int element_size); | |||
| int BroadcastLogicalAnd(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output, | |||
| int element_size, ArithmeticParameter *param); | |||
| int ElementLogicalOr(float *input0, float *input1, float *output, int element_size); | |||
| int ElementLogicalOr(const float *input0, const float *input1, float *output, const int element_size); | |||
| int BroadcastLogicalOr(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output, | |||
| int element_size, ArithmeticParameter *param); | |||
| int ElementMaximum(float *input0, float *input1, float *output, int element_size); | |||
| int ElementMaximum(const float *input0, const float *input1, float *output, const int element_size); | |||
| int BroadcastMaximum(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output, | |||
| int element_size, ArithmeticParameter *param); | |||
| int ElementMinimum(float *input0, float *input1, float *output, int element_size); | |||
| int ElementMinimum(const float *input0, const float *input1, float *output, const int element_size); | |||
| int BroadcastMinimum(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output, | |||
| int element_size, ArithmeticParameter *param); | |||
| int ElementFloorDiv(float *input0, float *input1, float *output, int element_size); | |||
| int ElementFloorDivInt(int *input0, int *input1, int *output, int element_size); | |||
| int ElementFloorDiv(const float *input0, const float *input1, float *output, const int element_size); | |||
| int ElementFloorDivInt(const int *input0, const int *input1, int *output, const int element_size); | |||
| int BroadcastFloorDiv(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output, | |||
| int element_size, ArithmeticParameter *param); | |||
| int ElementFloorMod(float *input0, float *input1, float *output, int element_size); | |||
| int ElementFloorModInt(int *input0, int *input1, int *output, int element_size); | |||
| int ElementFloorMod(const float *input0, const float *input1, float *output, const int element_size); | |||
| int ElementFloorModInt(const int *input0, const int *input1, int *output, const int element_size); | |||
| int BroadcastFloorMod(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output, | |||
| int element_size, ArithmeticParameter *param); | |||
| int ElementSquaredDifference(float *input0, float *input1, float *output, int element_size); | |||
| int ElementSquaredDifference(const float *input0, const float *input1, float *output, const int element_size); | |||
| int BroadcastSquaredDifference(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output, | |||
| int element_size, ArithmeticParameter *param); | |||
| int ElementNotEqual(float *input0, float *input1, float *output, int element_size); | |||
| int ElementNotEqual(const float *input0, const float *input1, float *output, const int element_size); | |||
| int BroadcastNotEqual(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output, | |||
| int element_size, ArithmeticParameter *param); | |||
| int ElementEqual(float *input0, float *input1, float *output, int element_size); | |||
| int ElementEqual(const float *input0, const float *input1, float *output, const int element_size); | |||
| int BroadcastEqual(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output, | |||
| int element_size, ArithmeticParameter *param); | |||
| int ElementLess(float *input0, float *input1, float *output, int element_size); | |||
| int ElementLess(const float *input0, const float *input1, float *output, const int element_size); | |||
| int BroadcastLess(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output, int element_size, | |||
| ArithmeticParameter *param); | |||
| int ElementLessEqual(float *input0, float *input1, float *output, int element_size); | |||
| int ElementLessEqual(const float *input0, const float *input1, float *output, const int element_size); | |||
| int BroadcastLessEqual(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output, | |||
| int element_size, ArithmeticParameter *param); | |||
| int ElementGreater(float *input0, float *input1, float *output, int element_size); | |||
| int ElementGreater(const float *input0, const float *input1, float *output, const int element_size); | |||
| int BroadcastGreater(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output, | |||
| int element_size, ArithmeticParameter *param); | |||
| int ElementGreaterEqual(float *input0, float *input1, float *output, int element_size); | |||
| int ElementGreaterEqual(const float *input0, const float *input1, float *output, const int element_size); | |||
| int BroadcastGreaterEqual(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output, | |||
| int element_size, ArithmeticParameter *param); | |||
| @@ -18,7 +18,6 @@ | |||
| #include <math.h> | |||
| #include "nnacl/batchnorm_parameter.h" | |||
| #include "nnacl/op_base.h" | |||
| #include "nnacl/errorcode.h" | |||
| void BatchNormFp32(const void *input, const void *mean, const void *variance, BatchNormParameter *param, int task_id, | |||
| void *output) { | |||
| @@ -21,7 +21,7 @@ | |||
| #include "nnacl/fp32/matmul.h" | |||
| // fp32 conv common | |||
| void ConvFp32(float *input_data, float *packed_input, float *packed_weight, const float *bias_data, | |||
| void ConvFp32(float *input_data, float *packed_input, const float *packed_weight, const float *bias_data, | |||
| float *col_major_input, float *output_data, int task_id, ConvParameter *conv_param) { | |||
| int kernel_h = conv_param->kernel_h_; | |||
| int kernel_w = conv_param->kernel_w_; | |||
| @@ -70,7 +70,7 @@ void ConvFp32(float *input_data, float *packed_input, float *packed_weight, cons | |||
| } | |||
| // fp32 conv winograd | |||
| void ConvWinogardFp32(float *input_data, float *trans_weight, const float *bias_data, float *output_data, | |||
| void ConvWinogardFp32(float *input_data, const float *trans_weight, const float *bias_data, float *output_data, | |||
| TmpBufferAddress *buffer_list, int task_id, ConvParameter *conv_param, InputTransFunc in_func, | |||
| OutputTransFunc out_func) { | |||
| int thread_num = conv_param->thread_num_; | |||
| @@ -34,11 +34,11 @@ extern "C" { | |||
| #endif | |||
| // fp32 convolution common (im2col+gemm) | |||
| void ConvFp32(float *input_data, float *packed_input, float *packed_weight, const float *bias_data, | |||
| void ConvFp32(float *input_data, float *packed_input, const float *packed_weight, const float *bias_data, | |||
| float *col_major_input, float *output_data, int task_id, ConvParameter *conv_param); | |||
| // fp32 convolution winograd | |||
| void ConvWinogardFp32(float *input_data, float *trans_weight, const float *bias_data, float *output_data, | |||
| void ConvWinogardFp32(float *input_data, const float *trans_weight, const float *bias_data, float *output_data, | |||
| TmpBufferAddress *buffer_list, int task_id, ConvParameter *conv_param, InputTransFunc in_func, | |||
| OutputTransFunc out_func); | |||
| #ifdef __cplusplus | |||
| @@ -56,7 +56,9 @@ int PackDeConvWgDataFp32(float *nhwc_weight, DeConvComputeUnit *unit, ConvParame | |||
| /* winograd AT */ | |||
| unit->winograd_.AT_ = malloc(unit->winograd_.i_ * unit->winograd_.o_ * sizeof(float)); | |||
| if (unit->winograd_.AT_ == NULL) { | |||
| free(current_unit_weight); | |||
| if (current_unit_weight != NULL) { | |||
| free(current_unit_weight); | |||
| } | |||
| return NNACL_NULL_PTR; | |||
| } | |||
| memcpy(unit->winograd_.AT_, matrix_at, unit->winograd_.i_ * unit->winograd_.o_ * sizeof(float)); | |||
| @@ -64,8 +66,12 @@ int PackDeConvWgDataFp32(float *nhwc_weight, DeConvComputeUnit *unit, ConvParame | |||
| /* winograd BT */ | |||
| unit->winograd_.BT_ = malloc(unit->winograd_.o_ * unit->winograd_.o_ * sizeof(float)); | |||
| if (unit->winograd_.BT_ == NULL) { | |||
| free(current_unit_weight); | |||
| free(unit->winograd_.AT_); | |||
| if (current_unit_weight != NULL) { | |||
| free(current_unit_weight); | |||
| } | |||
| if (unit->winograd_.AT_ != NULL) { | |||
| free(unit->winograd_.AT_); | |||
| } | |||
| return NNACL_NULL_PTR; | |||
| } | |||
| memcpy(unit->winograd_.BT_, matrix_bt, unit->winograd_.o_ * unit->winograd_.o_ * sizeof(float)); | |||
| @@ -74,9 +80,15 @@ int PackDeConvWgDataFp32(float *nhwc_weight, DeConvComputeUnit *unit, ConvParame | |||
| size = conv_param->input_channel_ * conv_param->output_channel_ * unit->winograd_.kh_ * unit->winograd_.kw_; | |||
| float *winograd_unit_weight = (float *)malloc(size * sizeof(float)); | |||
| if (winograd_unit_weight == NULL) { | |||
| free(current_unit_weight); | |||
| free(unit->winograd_.AT_); | |||
| free(unit->winograd_.BT_); | |||
| if (current_unit_weight != NULL) { | |||
| free(current_unit_weight); | |||
| } | |||
| if (unit->winograd_.AT_ != NULL) { | |||
| free(unit->winograd_.AT_); | |||
| } | |||
| if (unit->winograd_.BT_ != NULL) { | |||
| free(unit->winograd_.BT_); | |||
| } | |||
| return NNACL_NULL_PTR; | |||
| } | |||
| WinogradWeightTransform(current_unit_weight, winograd_unit_weight, matrix_g, matrix_gt, C4NUM, unit->winograd_.kh_, | |||
| @@ -105,7 +117,9 @@ int PackDeConvWgDataFp32(float *nhwc_weight, DeConvComputeUnit *unit, ConvParame | |||
| } | |||
| } | |||
| free(current_unit_weight); | |||
| if (current_unit_weight != NULL) { | |||
| free(current_unit_weight); | |||
| } | |||
| return NNACL_OK; | |||
| } | |||
| @@ -317,7 +331,7 @@ void DeConvWgMerge(const float *src, float *dst, size_t src_stride, size_t dst_s | |||
| return; | |||
| } | |||
| void _deConvWinograd(float *tile_in, float *tile_out, float *weight_buf, float *tmp_buf, float *at_buf, | |||
| void _deConvWinograd(const float *tile_in, float *tile_out, float *weight_buf, float *tmp_buf, float *at_buf, | |||
| float *a_mid_buf, float *trans_a_buf, bool *transfered, float *bt_buf, float *b_tmp_buf, | |||
| int unit_size, int w_start, int h_start, ConvParameter *conv_param, DeConvParam *deconv_param) { | |||
| int winograd_plane = unit_size * unit_size; | |||
| @@ -357,8 +371,8 @@ void _deConvWinograd(float *tile_in, float *tile_out, float *weight_buf, float * | |||
| return; | |||
| } | |||
| void _deConvCommon(float *tile_in, float *tile_out, float *weight, float *tmp_buf, int h_start, int w_start, int h_size, | |||
| int w_size, ConvParameter *conv_param, DeConvParam *deconv_param) { | |||
| void _deConvCommon(float *tile_in, float *tile_out, const float *weight, float *tmp_buf, int h_start, int w_start, | |||
| int h_size, int w_size, ConvParameter *conv_param, DeConvParam *deconv_param) { | |||
| int count = deconv_param->oc_div4_ * w_size * h_size; | |||
| int in_stride = DECONV_WINOGRAD_DEFAULT_TILE * deconv_param->ic_up4_; | |||
| int out_stride = DECONV_WINOGRAD_DEFAULT_TILE * deconv_param->oc_up4_; | |||
| @@ -274,9 +274,9 @@ int NmsMultiClassesFast(const int num_boxes, const int num_classes_with_bg, cons | |||
| return output_num; | |||
| } | |||
| int DetectionPostProcess(const int num_boxes, const int num_classes_with_bg, float *input_boxes, float *input_scores, | |||
| float *input_anchors, float *output_boxes, float *output_classes, float *output_scores, | |||
| float *output_num, DetectionPostProcessParameter *param) { | |||
| int DetectionPostProcess(const int num_boxes, const int num_classes_with_bg, float *input_boxes, | |||
| const float *input_scores, float *input_anchors, float *output_boxes, float *output_classes, | |||
| float *output_scores, float *output_num, DetectionPostProcessParameter *param) { | |||
| BboxCenter scaler; | |||
| scaler.y = param->y_scale_; | |||
| scaler.x = param->x_scale_; | |||
| @@ -43,9 +43,9 @@ typedef struct { | |||
| extern "C" { | |||
| #endif | |||
| int DetectionPostProcess(const int num_boxes, const int num_classes_with_bg, float *input_boxes, float *input_scores, | |||
| float *input_anchors, float *output_boxes, float *output_classes, float *output_scores, | |||
| float *output_num, DetectionPostProcessParameter *param); | |||
| int DetectionPostProcess(const int num_boxes, const int num_classes_with_bg, float *input_boxes, | |||
| const float *input_scores, float *input_anchors, float *output_boxes, float *output_classes, | |||
| float *output_scores, float *output_num, DetectionPostProcessParameter *param); | |||
| #ifdef __cplusplus | |||
| } | |||
| #endif | |||
| @@ -18,7 +18,7 @@ | |||
| #include <math.h> | |||
| #include "nnacl/errorcode.h" | |||
| void Calculate_Data(float *input_data, float *output_data, int num, EluParameter *parameter) { | |||
| void Calculate_Data(const float *input_data, float *output_data, int num, EluParameter *parameter) { | |||
| output_data[num] = input_data[num] < 0 ? parameter->alpha_ * expm1(input_data[num]) : input_data[num]; | |||
| } | |||
| @@ -18,7 +18,7 @@ | |||
| #include <math.h> | |||
| #include "nnacl/errorcode.h" | |||
| int Exp(float *input_data, float *output_data, ExpParameter *parameter, int task_id) { | |||
| int Exp(const float *input_data, float *output_data, ExpParameter *parameter, int task_id) { | |||
| if (parameter->scale_ == 1) { | |||
| for (size_t i = task_id; i < parameter->element_num_; i += parameter->thread_num_) { | |||
| output_data[i] = expf(input_data[i]); | |||
| @@ -33,7 +33,7 @@ typedef struct ExpParameter { | |||
| #ifdef __cplusplus | |||
| extern "C" { | |||
| #endif | |||
| int Exp(float *input_data, float *output_data, ExpParameter *parameter, int task_id); | |||
| int Exp(const float *input_data, float *output_data, ExpParameter *parameter, int task_id); | |||
| #ifdef __cplusplus | |||
| } | |||
| #endif | |||
| @@ -18,7 +18,7 @@ | |||
| #include <string.h> | |||
| #include "nnacl/errorcode.h" | |||
| int ExpandDims(void *input_ptr, void *output_ptr, size_t data_size) { | |||
| int ExpandDims(const void *input_ptr, void *output_ptr, size_t data_size) { | |||
| memcpy(output_ptr, input_ptr, data_size); | |||
| return NNACL_OK; | |||
| } | |||
| @@ -27,7 +27,7 @@ typedef struct ExpandDimsParameter { | |||
| #ifdef __cplusplus | |||
| extern "C" { | |||
| #endif | |||
| int ExpandDims(void *input_ptr, void *output_ptr, size_t data_size); | |||
| int ExpandDims(const void *input_ptr, void *output_ptr, size_t data_size); | |||
| #ifdef __cplusplus | |||
| } | |||
| #endif | |||
| @@ -18,7 +18,7 @@ | |||
| #include <string.h> | |||
| #include "nnacl/errorcode.h" | |||
| inline int Stride(int *shape, int rank, int index) { | |||
| inline int Stride(const int *shape, int rank, int index) { | |||
| int i, stride = 1; | |||
| for (i = index + 1; i < rank; ++i) { | |||
| stride *= shape[i]; | |||
| @@ -26,7 +26,7 @@ inline int Stride(int *shape, int rank, int index) { | |||
| return stride; | |||
| } | |||
| int Gather(float *input, int outer_size, int inner_size, int limit, int *indices, int indices_element_size, | |||
| int Gather(float *input, int outer_size, int inner_size, int limit, const int *indices, int indices_element_size, | |||
| float *output) { | |||
| int i, m; | |||
| for (m = 0; m < outer_size; ++m) { | |||
| @@ -42,7 +42,7 @@ int Gather(float *input, int outer_size, int inner_size, int limit, int *indices | |||
| return NNACL_OK; | |||
| } | |||
| int GatherInt32(const int32_t *input, int outer_size, int inner_size, int limit, int *indices, | |||
| int GatherInt32(const int32_t *input, int outer_size, int inner_size, int limit, const int *indices, | |||
| int indices_element_size, int32_t *output) { | |||
| int i, m; | |||
| for (m = 0; m < outer_size; ++m) { | |||
| @@ -22,10 +22,10 @@ | |||
| #ifdef __cplusplus | |||
| extern "C" { | |||
| #endif | |||
| int Gather(float *input, int outer_size, int inner_size, int limit, int *indices, int indices_element_size, | |||
| int Gather(float *input, int outer_size, int inner_size, int limit, const int *indices, int indices_element_size, | |||
| float *output); | |||
| int GatherInt32(const int32_t *input, int outer_size, int inner_size, int limit, int *indices, int indices_element_size, | |||
| int32_t *output); | |||
| int GatherInt32(const int32_t *input, int outer_size, int inner_size, int limit, const int *indices, | |||
| int indices_element_size, int32_t *output); | |||
| #ifdef __cplusplus | |||
| } | |||
| #endif | |||
| @@ -18,7 +18,7 @@ | |||
| #include <string.h> | |||
| #include "nnacl/errorcode.h" | |||
| int GatherNd(float *input, float *output, int *in_offset, int area, int count) { | |||
| int GatherNd(const float *input, float *output, int *in_offset, int area, int count) { | |||
| int i = 0; | |||
| for (i = 0; i < count; i++) { | |||
| (void)memcpy(output + area * i, input + in_offset[i], area * sizeof(float)); | |||
| @@ -27,7 +27,7 @@ typedef struct GatherNdParameter { | |||
| #ifdef __cplusplus | |||
| extern "C" { | |||
| #endif | |||
| int GatherNd(float *input, float *output, int *in_offset, int area, int count); | |||
| int GatherNd(const float *input, float *output, int *in_offset, int area, int count); | |||
| #ifdef __cplusplus | |||
| } | |||
| #endif | |||
| @@ -79,13 +79,13 @@ void ElementMulAcc(const float *input0, const float *input1, float *output, int | |||
| } | |||
| } | |||
| void UpdataState(float *cell_state, float *forget_gate, float *input_gate, float *cell_gate, int batch, | |||
| void UpdataState(float *cell_state, float *forget_gate, const float *input_gate, float *cell_gate, int batch, | |||
| int hidden_size) { | |||
| ElementMul(forget_gate, cell_state, cell_state, batch * hidden_size); | |||
| ElementMulAcc(input_gate, cell_gate, cell_state, batch * hidden_size); | |||
| } | |||
| void UpdataOutput(float *cell_state, float *output_gate, float *hidden_state, int batch, int hidden_size) { | |||
| void UpdataOutput(const float *cell_state, float *output_gate, float *hidden_state, int batch, int hidden_size) { | |||
| Tanh(cell_state, batch * hidden_size, hidden_state); | |||
| ElementMul(hidden_state, output_gate, hidden_state, batch * hidden_size); | |||
| } | |||
| @@ -16,7 +16,7 @@ | |||
| #include "nnacl/fp32/matmul.h" | |||
| void RowMajor2ColMajor(float *src_ptr, float *dst_ptr, int row, int col) { | |||
| void RowMajor2ColMajor(const float *src_ptr, float *dst_ptr, int row, int col) { | |||
| for (int r = 0; r < row; ++r) { | |||
| for (int c = 0; c < col; ++c) { | |||
| dst_ptr[c * row + r] = src_ptr[r * col + c]; | |||
| @@ -29,7 +29,7 @@ extern "C" { | |||
| void MatMulOpt(const float *a, const float *b, float *c, const float *bias, ActType act_type, int deep, int row, | |||
| int col, size_t stride, int out_type); | |||
| void MatVecMul(const float *a, const float *b, float *c, const float *bias, ActType act_type, int depth, int col); | |||
| void RowMajor2ColMajor(float *src_ptr, float *dst_ptr, int row, int col); | |||
| void RowMajor2ColMajor(const float *src_ptr, float *dst_ptr, int row, int col); | |||
| void RowMajor2Row4Major(float *src_ptr, float *dst_ptr, int row, int col); | |||
| void RowMajor2Row8Major(float *src_ptr, float *dst_ptr, int row, int col); | |||
| void RowMajor2Row12Major(float *src_ptr, float *dst_ptr, int row, int col); | |||
| @@ -65,7 +65,7 @@ int PrepareResizeBilinear(const int *input_shape, const int *output_shape, bool | |||
| } | |||
| int ResizeBilinear(const float *input_data, float *output_data, const int *input_shape, const int *output_shape, | |||
| int *y_bottoms, int *y_tops, int *x_lefts, int *x_rights, float *y_bottom_weights, | |||
| int *y_bottoms, const int *y_tops, int *x_lefts, int *x_rights, float *y_bottom_weights, | |||
| float *x_left_weights, int n_h_begin, int n_h_end) { | |||
| if (input_data == NULL || output_data == NULL || input_shape == NULL || output_shape == NULL || y_bottoms == NULL || | |||
| y_tops == NULL || x_lefts == NULL || x_rights == NULL || y_bottom_weights == NULL || x_left_weights == NULL) { | |||
| @@ -155,7 +155,7 @@ int ResizeBilinear(const float *input_data, float *output_data, const int *input | |||
| } | |||
| int InterpRow(const float *src_line, float *linear_output, int new_width, float *x_left_weights, int *x_lefts, | |||
| int *x_rights, int in_c) { | |||
| const int *x_rights, int in_c) { | |||
| int w; | |||
| for (w = 0; w < new_width; w++) { | |||
| int c = 0; | |||
| @@ -208,7 +208,7 @@ int InterpCol(const float *bottom_line, const float *top_line, float *output, in | |||
| } | |||
| int ResizeBilinear2(const float *input_data, float *output_data, const int *input_shape, const int *output_shape, | |||
| int *y_bottoms, int *y_tops, int *x_lefts, int *x_rights, float *y_bottom_weights, | |||
| int *y_bottoms, const int *y_tops, int *x_lefts, int *x_rights, float *y_bottom_weights, | |||
| float *x_left_weights, float *line0, float *line1, int n_h_begin, int n_h_end) { | |||
| if (input_data == NULL || output_data == NULL || input_shape == NULL || output_shape == NULL || y_bottoms == NULL || | |||
| y_tops == NULL || x_lefts == NULL || x_rights == NULL || y_bottom_weights == NULL || x_left_weights == NULL) { | |||
| @@ -30,11 +30,11 @@ int PrepareResizeBilinear(const int *input_shape, const int *output_shape, bool | |||
| int *y_tops, int *x_lefts, int *x_rights, float *y_bottom_weights, float *x_left_weights); | |||
| int ResizeBilinear(const float *input_data, float *output_data, const int *input_shape, const int *output_shape, | |||
| int *y_bottoms, int *y_tops, int *x_lefts, int *x_rights, float *y_bottom_weights, | |||
| int *y_bottoms, const int *y_tops, int *x_lefts, int *x_rights, float *y_bottom_weights, | |||
| float *x_left_weights, int n_h_begin, int n_h_end); | |||
| int ResizeBilinear2(const float *input_data, float *output_data, const int *input_shape, const int *output_shape, | |||
| int *y_bottoms, int *y_tops, int *x_lefts, int *x_rights, float *y_bottom_weights, | |||
| int *y_bottoms, const int *y_tops, int *x_lefts, int *x_rights, float *y_bottom_weights, | |||
| float *x_left_weights, float *line0, float *line1, int n_h_begin, int n_h_end); | |||
| int ResizeNearestNeighbor(const float *input_data, float *output_data, const int *input_shape, const int *output_shape, | |||
| @@ -20,7 +20,7 @@ | |||
| #include "nnacl/errorcode.h" | |||
| #include "nnacl/op_base.h" | |||
| int ROIPooling(float *in_ptr, float *out_ptr, float *roi, float *max_c, int tid, ROIPoolingParameter *param) { | |||
| int ROIPooling(float *in_ptr, float *out_ptr, const float *roi, float *max_c, int tid, ROIPoolingParameter *param) { | |||
| int num_rois = param->output_n_; | |||
| int units = UP_DIV(num_rois, param->thread_num_); | |||
| int roi_st = tid * units; | |||
| @@ -40,7 +40,7 @@ typedef struct ROIPoolingParameter { | |||
| #ifdef __cplusplus | |||
| extern "C" { | |||
| #endif | |||
| int ROIPooling(float *in_ptr, float *out_ptr, float *roi, float *max_c, int tid, ROIPoolingParameter *param); | |||
| int ROIPooling(float *in_ptr, float *out_ptr, const float *roi, float *max_c, int tid, ROIPoolingParameter *param); | |||
| #ifdef __cplusplus | |||
| } | |||
| #endif | |||
| @@ -18,7 +18,7 @@ | |||
| #ifdef ENABLE_ARM | |||
| #include <arm_neon.h> | |||
| #endif | |||
| void ScaleInner(float *in_data, float *out_data, float *scale, float *offset, int outer_start, int outer_end, | |||
| void ScaleInner(float *in_data, float *out_data, const float *scale, float *offset, int outer_start, int outer_end, | |||
| int axis_size, int inner_size) { | |||
| for (int out = outer_start; out < outer_end; out++) { | |||
| int out_offset = out * axis_size * inner_size; | |||
| @@ -43,7 +43,7 @@ void ScaleInner(float *in_data, float *out_data, float *scale, float *offset, in | |||
| } | |||
| } | |||
| void ScaleAxis(float *in_data, float *out_data, float *scale, float *offset, int outer_start, int outer_end, | |||
| void ScaleAxis(float *in_data, float *out_data, const float *scale, float *offset, int outer_start, int outer_end, | |||
| int axis_size) { | |||
| for (int out = outer_start; out < outer_end; out++) { | |||
| int out_offset = out * axis_size; | |||
| @@ -78,7 +78,7 @@ void DoScale(float *in_data, float *out_data, float *scale, float *offset, int t | |||
| } | |||
| } | |||
| void ScaleInnerRelu(float *in_data, float *out_data, float *scale, float *offset, int outer_start, int outer_end, | |||
| void ScaleInnerRelu(float *in_data, float *out_data, const float *scale, float *offset, int outer_start, int outer_end, | |||
| int axis_size, int inner_size) { | |||
| #ifdef ENABLE_ARM64 | |||
| float32x4_t zeros = {0, 0, 0, 0}; | |||
| @@ -108,7 +108,7 @@ void ScaleInnerRelu(float *in_data, float *out_data, float *scale, float *offset | |||
| } | |||
| } | |||
| void ScaleAxisRelu(float *in_data, float *out_data, float *scale, float *offset, int outer_start, int outer_end, | |||
| void ScaleAxisRelu(float *in_data, float *out_data, const float *scale, float *offset, int outer_start, int outer_end, | |||
| int axis_size) { | |||
| #ifdef ENABLE_ARM64 | |||
| float32x4_t zeros = {0, 0, 0, 0}; | |||
| @@ -149,7 +149,7 @@ void DoScaleRelu(float *in_data, float *out_data, float *scale, float *offset, i | |||
| } | |||
| } | |||
| void ScaleInnerRelu6(float *in_data, float *out_data, float *scale, float *offset, int outer_start, int outer_end, | |||
| void ScaleInnerRelu6(float *in_data, float *out_data, const float *scale, float *offset, int outer_start, int outer_end, | |||
| int axis_size, int inner_size) { | |||
| #ifdef ENABLE_ARM64 | |||
| float32x4_t zeros = {0, 0, 0, 0}; | |||
| @@ -180,7 +180,7 @@ void ScaleInnerRelu6(float *in_data, float *out_data, float *scale, float *offse | |||
| } | |||
| } | |||
| void ScaleAxisRelu6(float *in_data, float *out_data, float *scale, float *offset, int outer_start, int outer_end, | |||
| void ScaleAxisRelu6(float *in_data, float *out_data, const float *scale, float *offset, int outer_start, int outer_end, | |||
| int axis_size) { | |||
| #ifdef ENABLE_ARM64 | |||
| float32x4_t zeros = {0, 0, 0, 0}; | |||
| @@ -17,7 +17,6 @@ | |||
| #include "nnacl/fp32/slice.h" | |||
| #include <string.h> | |||
| #include "nnacl/op_base.h" | |||
| #include "nnacl/errorcode.h" | |||
| void PadSliceParameterTo4D(SliceParameter *param) { | |||
| int32_t begin[DIMENSION_4D]; | |||
| @@ -16,7 +16,7 @@ | |||
| #include "nnacl/fp32/space_to_batch.h" | |||
| #include "nnacl/arithmetic_common.h" | |||
| void DoSpaceToBatchNHWC(const float *input, float *output, int *block_sizes, int *in_shape, int *out_shape) { | |||
| void DoSpaceToBatchNHWC(const float *input, float *output, const int *block_sizes, int *in_shape, int *out_shape) { | |||
| int out_dim0 = out_shape[0]; | |||
| int out_dim1 = out_shape[1]; | |||
| int out_dim2 = out_shape[2]; | |||
| @@ -45,7 +45,7 @@ void DoSpaceToBatchNHWC(const float *input, float *output, int *block_sizes, int | |||
| } | |||
| } | |||
| void DoSpaceToBatchPaddingNHWC(const float *input, float *output, int *in_shape, int *padding, int *out_shape) { | |||
| void DoSpaceToBatchPaddingNHWC(const float *input, float *output, int *in_shape, const int *padding, int *out_shape) { | |||
| int in_h = in_shape[1]; | |||
| int in_w = in_shape[2]; | |||
| int in_c = in_shape[3]; | |||
| @@ -63,8 +63,8 @@ void DoSpaceToBatchPaddingNHWC(const float *input, float *output, int *in_shape, | |||
| for (int i = 0; i < in_shape[0]; ++i) { | |||
| size_t in_offset0 = i * in_strides[0]; | |||
| for (int pad_h_top = 0; pad_h_top < padding[0]; ++pad_h_top) { | |||
| memset(output + out_offset, 0, ped_h_size); | |||
| out_offset += ped_h_num; | |||
| memset(output + out_offset, 0, ped_h_size); | |||
| out_offset += ped_h_num; | |||
| } | |||
| for (int j = 0; j < in_h; ++j) { | |||
| size_t in_offset1 = in_offset0 + j * in_strides[1]; | |||
| @@ -30,8 +30,8 @@ typedef struct SpaceToBatchParameter { | |||
| #ifdef __cplusplus | |||
| extern "C" { | |||
| #endif | |||
| void DoSpaceToBatchNHWC(const float *input, float *output, int *block_sizes, int *in_shape, int *out_shape); | |||
| void DoSpaceToBatchPaddingNHWC(const float *input, float *output, int *in_shape, int *padding, int *out_shape); | |||
| void DoSpaceToBatchNHWC(const float *input, float *output, const int *block_sizes, int *in_shape, int *out_shape); | |||
| void DoSpaceToBatchPaddingNHWC(const float *input, float *output, int *in_shape, const int *padding, int *out_shape); | |||
| #ifdef __cplusplus | |||
| } | |||
| #endif | |||
| @@ -15,9 +15,8 @@ | |||
| */ | |||
| #include "nnacl/fp32/sparse_to_dense.h" | |||
| void SparseToDense(int **sparse_indices, int *output_shape, | |||
| float *sparse_values, float default_value, float *output, | |||
| bool isScalar, int index_start, int index_end, int out_width) { | |||
| void SparseToDense(int **sparse_indices, int *output_shape, const float *sparse_values, float default_value, | |||
| float *output, bool isScalar, int index_start, int index_end, int out_width) { | |||
| for (int i = index_start; i < index_end; i++) { | |||
| for (int j = 0; j < out_width; j++) { | |||
| output[i * out_width + j] = default_value; | |||
| @@ -31,14 +30,12 @@ void SparseToDense(int **sparse_indices, int *output_shape, | |||
| int index; | |||
| if (isScalar == true) { | |||
| for (int i = index_start; i < index_end; i++) { | |||
| index = d1 * sparse_indices[i][0] + d2 * sparse_indices[i][1] + | |||
| d3 * sparse_indices[i][2] + sparse_indices[i][3]; | |||
| index = d1 * sparse_indices[i][0] + d2 * sparse_indices[i][1] + d3 * sparse_indices[i][2] + sparse_indices[i][3]; | |||
| output[index] = sparse_values[0]; | |||
| } | |||
| } else { | |||
| for (int i = index_start; i < index_end; i++) { | |||
| index = d1 * sparse_indices[i][0] + d2 * sparse_indices[i][1] + | |||
| d3 * sparse_indices[i][2] + sparse_indices[i][3]; | |||
| index = d1 * sparse_indices[i][0] + d2 * sparse_indices[i][1] + d3 * sparse_indices[i][2] + sparse_indices[i][3]; | |||
| output[index] = sparse_values[i]; | |||
| } | |||
| } | |||
| @@ -21,9 +21,8 @@ | |||
| #ifdef __cplusplus | |||
| extern "C" { | |||
| #endif | |||
| void SparseToDense(int **sparse_indices_vect, int *output_shape, | |||
| float *sparse_values, float default_value, float *output, | |||
| bool isScalar, int index_start, int index_end, int out_width); | |||
| void SparseToDense(int **sparse_indices_vect, int *output_shape, const float *sparse_values, float default_value, | |||
| float *output, bool isScalar, int index_start, int index_end, int out_width); | |||
| #ifdef __cplusplus | |||
| } | |||
| #endif | |||
| @@ -17,7 +17,7 @@ | |||
| #include "nnacl/fp32/tile.h" | |||
| #include <string.h> | |||
| void DoCopyData(float *input_data, float *output_data, size_t size, size_t multiple) { | |||
| void DoCopyData(const float *input_data, float *output_data, size_t size, size_t multiple) { | |||
| float *out_data = output_data; | |||
| for (size_t i = 0; i < multiple; ++i) { | |||
| (void)memcpy(out_data, input_data, size * sizeof(float)); | |||
| @@ -16,7 +16,7 @@ | |||
| #include "nnacl/fp32/unique.h" | |||
| int Find(float *array, int len, float target) { | |||
| int Find(const float *array, int len, float target) { | |||
| for (int i = 0; i < len; ++i) { | |||
| if (array[i] == target) { | |||
| return i; | |||
| @@ -25,7 +25,7 @@ int Find(float *array, int len, float target) { | |||
| return -1; | |||
| } | |||
| void Unique(float *input, int input_len, float *output0, int *output0_len, int *output1) { | |||
| void Unique(const float *input, int input_len, float *output0, int *output0_len, int *output1) { | |||
| *output0_len = 0; | |||
| for (int i = 0; i < input_len; i++) { | |||
| int idx = Find(output0, *output0_len, input[i]); | |||
| @@ -26,7 +26,7 @@ typedef struct UniqueParameter { | |||
| #ifdef __cplusplus | |||
| extern "C" { | |||
| #endif | |||
| void Unique(float *input, int input_len, float *output0, int *output0_len, int *output1); | |||
| void Unique(const float *input, int input_len, float *output0, int *output0_len, int *output1); | |||
| #ifdef __cplusplus | |||
| } | |||
| #endif | |||
| @@ -491,8 +491,8 @@ void ConvDw3x3Int8Pad(int8_t *output_data, const int8_t *input_data, const int16 | |||
| /*conv depthwise sliding window perchannel int8 begin*/ | |||
| void DepthwiseBorderPixelInt8(int8_t *dst, const int8_t *src, const int16_t *weight, const int32_t *bias, int height, | |||
| int width, int in_kh_step, int in_kw_step, int kernel_w, int8_t *input_zp, | |||
| int32_t *out_zp, int *out_multiplier, int *left_shift, int *right_shift, int32_t *acc_min, | |||
| int32_t *acc_max) { | |||
| int32_t *out_zp, int *out_multiplier, int *left_shift, const int *right_shift, | |||
| int32_t *acc_min, int32_t *acc_max) { | |||
| int tmp_buffer[C8NUM]; | |||
| for (int i = 0; i < C8NUM; i++) { | |||
| tmp_buffer[i] = 0; | |||
| @@ -94,7 +94,7 @@ void ConvInt8(int8_t *input_data, int8_t *packed_input, int8_t *matmul_input, in | |||
| unit_size = UP_ROUND(kernel_plane * in_channel, C16NUM); | |||
| } | |||
| #endif | |||
| bool per_channel; | |||
| bool per_channel = false; | |||
| if (conv_param->conv_quant_arg_.per_channel_ & FILTER_PER_CHANNEL) { | |||
| input_sum_offset = tile_n * up_round_oc; | |||
| per_channel = true; | |||
| @@ -16,7 +16,7 @@ | |||
| #include "nnacl/int8/depth_to_space_int8.h" | |||
| #include <string.h> | |||
| void DepthToSpaceForNHWCInt8(const int8_t *input, int8_t *output, int *in_shape, DepthToSpaceParameter *param, | |||
| void DepthToSpaceForNHWCInt8(const int8_t *input, int8_t *output, const int *in_shape, DepthToSpaceParameter *param, | |||
| QuantArg *in_quant_arg, QuantArg *out_quant_arg) { | |||
| int32_t block_size = param->block_size_; | |||
| int32_t in_shape_dim2 = in_shape[2]; | |||
| @@ -22,7 +22,7 @@ | |||
| #ifdef __cplusplus | |||
| extern "C" { | |||
| #endif | |||
| void DepthToSpaceForNHWCInt8(const int8_t *input, int8_t *output, int *in_shape, DepthToSpaceParameter *param, | |||
| void DepthToSpaceForNHWCInt8(const int8_t *input, int8_t *output, const int *in_shape, DepthToSpaceParameter *param, | |||
| QuantArg *in_quant_arg, QuantArg *out_quant_arg); | |||
| #ifdef __cplusplus | |||
| } | |||
| @@ -18,7 +18,7 @@ | |||
| #include <string.h> | |||
| #include "nnacl/errorcode.h" | |||
| int GatherNdInt8(int8_t *input, int8_t *output, int *in_offset, int area, int count, GatherQuantArg param) { | |||
| int GatherNdInt8(int8_t *input, int8_t *output, const int *in_offset, int area, int count, GatherQuantArg param) { | |||
| double alpha = param.alpha_; | |||
| int z1 = param.zp_in_; | |||
| int z2 = param.zp_out_; | |||
| @@ -23,7 +23,7 @@ | |||
| #ifdef __cplusplus | |||
| extern "C" { | |||
| #endif | |||
| int GatherNdInt8(int8_t *in_data, int8_t *out_data, int *in_offset, int area, int count, GatherQuantArg param); | |||
| int GatherNdInt8(int8_t *in_data, int8_t *out_data, const int *in_offset, int area, int count, GatherQuantArg param); | |||
| #ifdef __cplusplus | |||
| } | |||
| #endif | |||
| @@ -19,7 +19,7 @@ | |||
| #include "nnacl/quantization/quantize.h" | |||
| #include "nnacl/errorcode.h" | |||
| int GatherInt8(int8_t *in_data, int8_t *out_data, int outer_size, int inner_size, int limit, int *indices, | |||
| int GatherInt8(int8_t *in_data, int8_t *out_data, int outer_size, int inner_size, int limit, const int *indices, | |||
| int indices_element_size, GatherQuantArg para) { | |||
| double alpha = para.alpha_; | |||
| int z1 = para.zp_in_; | |||
| @@ -23,7 +23,7 @@ | |||
| #ifdef __cplusplus | |||
| extern "C" { | |||
| #endif | |||
| int GatherInt8(int8_t *in_data, int8_t *out_data, int outer_size, int inner_size, int limit, int *indices, | |||
| int GatherInt8(int8_t *in_data, int8_t *out_data, int outer_size, int inner_size, int limit, const int *indices, | |||
| int indices_element_size, GatherQuantArg para); | |||
| #ifdef __cplusplus | |||
| } | |||
| @@ -301,7 +301,7 @@ void CalcInputSums(int8_t *input, int row, int col, int weight_zp, int *dst, Dat | |||
| } | |||
| // dst: bias + depth*input_zp*weight_zp - input_zp*weight_col_sums | |||
| void CalcWeightBiasSums(int8_t *weight, int row, int col, int input_zp, int weight_zp, int *bias, int *dst, | |||
| void CalcWeightBiasSums(int8_t *weight, int row, int col, int input_zp, int weight_zp, const int *bias, int *dst, | |||
| DataOrder order) { | |||
| for (int c = 0; c < col; ++c) { | |||
| int sum = 0; | |||
| @@ -35,7 +35,7 @@ void MatMulInt8_16x4_r(const int8_t *a, const int8_t *b, int8_t *dst, size_t row | |||
| void RowMajor2Row16x4MajorInt8(int8_t *src_ptr, int8_t *dst_ptr, int row, int col); | |||
| void RowMajor2Col16x4MajorInt8(int8_t *src, int row, int col, int8_t *dst); | |||
| void CalcInputSums(int8_t *input, int row, int col, int weight_zp, int *dst, DataOrder order); | |||
| void CalcWeightBiasSums(int8_t *weight, int row, int col, int input_zp, int weight_zp, int *bias, int *dst, | |||
| void CalcWeightBiasSums(int8_t *weight, int row, int col, int input_zp, int weight_zp, const int *bias, int *dst, | |||
| DataOrder order); | |||
| /* 8x4 4x8 -> 8x8 */ | |||
| @@ -17,7 +17,6 @@ | |||
| #include "nnacl/int8/slice_int8.h" | |||
| #include <string.h> | |||
| #include "nnacl/quantization/fixed_point.h" | |||
| #include "nnacl/errorcode.h" | |||
| int SliceInt8NoParallel(const int8_t *input, int8_t *output, SliceParameter *param) { | |||
| double input_scale = param->quant_arg_.in_args_.scale_; | |||
| @@ -18,7 +18,6 @@ | |||
| #include <math.h> | |||
| #include "nnacl/quantization/fixed_point.h" | |||
| #include "nnacl/quantization/quantize.h" | |||
| #include "nnacl/errorcode.h" | |||
| int SoftmaxInt8(const int8_t *input_ptr, int8_t *output_ptr, int count, int *exp_data, int *sum_data, | |||
| SoftmaxQuantArg quant_param, SoftmaxParameter *parameter) { | |||
| @@ -16,7 +16,8 @@ | |||
| #include "nnacl/int8/space_to_batch_int8.h" | |||
| #include "nnacl/arithmetic_common.h" | |||
| void DoSpaceToBatchNHWCInt8(const int8_t *input, int8_t *output, int *block_sizes, int *in_shape, int *out_shape) { | |||
| void DoSpaceToBatchNHWCInt8(const int8_t *input, int8_t *output, const int *block_sizes, int *in_shape, | |||
| int *out_shape) { | |||
| int out_dim0 = out_shape[0]; | |||
| int out_dim1 = out_shape[1]; | |||
| int out_dim2 = out_shape[2]; | |||
| @@ -45,8 +46,8 @@ void DoSpaceToBatchNHWCInt8(const int8_t *input, int8_t *output, int *block_size | |||
| } | |||
| } | |||
| void DoSpaceToBatchPaddingNHWCInt8(const int8_t *input, int8_t *output, int *in_shape, int *padding, int *out_shape, | |||
| int32_t zp) { | |||
| void DoSpaceToBatchPaddingNHWCInt8(const int8_t *input, int8_t *output, int *in_shape, const int *padding, | |||
| int *out_shape, int32_t zp) { | |||
| int in_h = in_shape[1]; | |||
| int in_w = in_shape[2]; | |||
| int in_c = in_shape[3]; | |||
| @@ -21,9 +21,9 @@ | |||
| #ifdef __cplusplus | |||
| extern "C" { | |||
| #endif | |||
| void DoSpaceToBatchNHWCInt8(const int8_t *input, int8_t *output, int *block_sizes, int *in_shape, int *out_shape); | |||
| void DoSpaceToBatchPaddingNHWCInt8(const int8_t *input, int8_t *output, int *in_shape, int *padding, int *out_shape, | |||
| int32_t zp); | |||
| void DoSpaceToBatchNHWCInt8(const int8_t *input, int8_t *output, const int *block_sizes, int *in_shape, int *out_shape); | |||
| void DoSpaceToBatchPaddingNHWCInt8(const int8_t *input, int8_t *output, int *in_shape, const int *padding, | |||
| int *out_shape, int32_t zp); | |||
| #ifdef __cplusplus | |||
| } | |||
| #endif | |||
| @@ -17,7 +17,6 @@ | |||
| #include "nnacl/unsqueeze_parameter.h" | |||
| #include "nnacl/int8/unsqueeze_int8.h" | |||
| #include <string.h> | |||
| #include "nnacl/errorcode.h" | |||
| int Int8Unsqueeze(int8_t *input_ptr, int8_t *output_ptr, UnSqueezeParameter *para_, size_t data_size, int task_id) { | |||
| float output_scale = para_->quant_arg.out_quant_args_.scale_; | |||
| @@ -19,7 +19,7 @@ | |||
| #include "nnacl/winograd_utils.h" | |||
| #include "nnacl/errorcode.h" | |||
| void Polynomial(float *interval, float *m, int degree) { | |||
| void Polynomial(const float *interval, float *m, int degree) { | |||
| for (int i = 0; i < degree; ++i) { | |||
| float mul = 1; | |||
| for (int j = 0; j < degree; ++j) { | |||
| @@ -30,7 +30,7 @@ void Polynomial(float *interval, float *m, int degree) { | |||
| } | |||
| } | |||
| void DiagonalPlusMatrix(float *matrix, float *diagonal_matrix, int degree) { | |||
| void DiagonalPlusMatrix(const float *matrix, float *diagonal_matrix, int degree) { | |||
| int data_num = (degree + 1) * (degree + 1); | |||
| memset(diagonal_matrix, 0, data_num * sizeof(float)); | |||
| for (int i = 0; i < degree; ++i) { | |||
| @@ -41,7 +41,7 @@ void DiagonalPlusMatrix(float *matrix, float *diagonal_matrix, int degree) { | |||
| diagonal_matrix[data_num - 1] = 1; | |||
| } | |||
| void ResidueMatrix(float *interval, float *b, int row, int col) { | |||
| void ResidueMatrix(const float *interval, float *b, int row, int col) { | |||
| // row : input unit, col : output_unit | |||
| // result : matrix b | |||
| int len = row * col; | |||
| @@ -87,7 +87,7 @@ int LT(float *poly_array, float *matrix_lt, int n) { | |||
| return NNACL_OK; | |||
| } | |||
| void T(float *poly_array, float *matrix_t, int n) { | |||
| void T(const float *poly_array, float *matrix_t, int n) { | |||
| memset(matrix_t, 0, n * (n + 1) * sizeof(float)); | |||
| for (int i = 0; i < n; ++i) { | |||
| for (int j = 0; j < n + 1; ++j) { | |||
| @@ -148,7 +148,7 @@ void GenerateIntervalArray(float *array, float interval, int degree) { | |||
| } | |||
| } | |||
| void MatrixTranspose(float *matrix, float *trans_matrix, int row, int col) { | |||
| void MatrixTranspose(const float *matrix, float *trans_matrix, int row, int col) { | |||
| for (int i = 0; i < col; ++i) { | |||
| for (int j = 0; j < row; ++j) { | |||
| trans_matrix[i * row + j] = matrix[j * col + i]; | |||
| @@ -255,7 +255,7 @@ void MatrixMultiplyVec(const float32x4_t *matrix_a, const float32x4_t *matrix_b, | |||
| } | |||
| #endif | |||
| int WinogradWeightTransform(const float *weight_data, float *winograd_data, float *matrix_g, float *matrix_gt, | |||
| int WinogradWeightTransform(const float *weight_data, float *winograd_data, float *matrix_g, const float *matrix_gt, | |||
| int oc_block, int input_unit, int kernel_unit, int channel, int batch, bool pack) { | |||
| // original weight format : ohwi | |||
| int oc_block_num = UP_DIV(batch, oc_block); | |||
| @@ -26,21 +26,21 @@ | |||
| #ifdef __cplusplus | |||
| extern "C" { | |||
| #endif | |||
| void Polynomial(float *interval, float *m, int degree); | |||
| void Polynomial(const float *interval, float *m, int degree); | |||
| void DiagonalPlusMatrix(float *matrix, float *diagonal_matrix, int degree); | |||
| void DiagonalPlusMatrix(const float *matrix, float *diagonal_matrix, int degree); | |||
| void ResidueMatrix(float *interval, float *b, int row, int col); | |||
| void ResidueMatrix(const float *interval, float *b, int row, int col); | |||
| int LT(float *poly_array, float *matrix_lt, int n); | |||
| void T(float *poly_array, float *matrix_t, int n); | |||
| void T(const float *poly_array, float *matrix_t, int n); | |||
| int B(float *poly_array, float *matrix_b, int in_unit); | |||
| void GenerateIntervalArray(float *array, float interval, int degree); | |||
| void MatrixTranspose(float *matrix, float *trans_matrix, int row, int col); | |||
| void MatrixTranspose(const float *matrix, float *trans_matrix, int row, int col); | |||
| void MatrixMultiply(const float *matrix_a, const float *matrix_b, float *matrix_c, int m, int k, int n); | |||
| @@ -49,7 +49,7 @@ int CookToomFilter(float *matrix_a, float *matrix_at, float *matrix_b, float *ma | |||
| void MatrixMultiplyWinograd(const float *matix_a, const float *matrix_b, float *matrix_c, int m, int k, int n, | |||
| int in_channel, int c4_channel); | |||
| int WinogradWeightTransform(const float *weight_data, float *winograd_data, float *matrix_g, float *matrix_gt, | |||
| int WinogradWeightTransform(const float *weight_data, float *winograd_data, float *matrix_g, const float *matrix_gt, | |||
| int oc_block, int input_unit_, int kernel_unit_, int channel, int batch, bool pack); | |||
| #ifdef ENABLE_ARM | |||
| @@ -36,6 +36,9 @@ float StdPowerImpl(float x, float exponent) { return pow(x, exponent); } | |||
| void Power(const float *input, const float *exponent, float *output, int len, float scale, float shift, | |||
| bool broadcast) { | |||
| if (input == NULL || exponent == NULL) { | |||
| return; | |||
| } | |||
| if (broadcast) { | |||
| if (CheckInteger(*exponent)) { | |||
| for (int i = 0; i < len; ++i) { | |||
| @@ -71,7 +71,7 @@ void CalculateActivationRangeQuantized(bool is_relu, bool is_relu6, int32_t zp, | |||
| } | |||
| // quantize from float to int8 | |||
| void Quantize(float *input_data, int length, float scale, int zero_point, int8_t *output_data) { | |||
| void Quantize(const float *input_data, int length, float scale, int zero_point, int8_t *output_data) { | |||
| for (int i = 0; i < length; ++i) { | |||
| int q = (int)round(input_data[i] / scale + zero_point); | |||
| q = q > SCHAR_MAX ? SCHAR_MAX : q; | |||
| @@ -276,7 +276,7 @@ int32_t QuantizeToInt8(float real_value, float scale, int32_t zp); | |||
| void CalculateActivationRangeQuantized(bool is_relu, bool is_relu6, int32_t zp, float scale, int *mini, int *maxi); | |||
| // quantize from float to int8 | |||
| void Quantize(float *input_data, int length, float scale, int zero_point, int8_t *output_data); | |||
| void Quantize(const float *input_data, int length, float scale, int zero_point, int8_t *output_data); | |||
| // dequantize from int8 to float | |||
| void Dequantize(int8_t *input_data, int length, float scale, int zero_point, float *output_data); | |||
| @@ -17,4 +17,4 @@ | |||
| #include "nnacl/reshape.h" | |||
| #include <string.h> | |||
| void Reshape(void *input_ptr, void *output_ptr, size_t data_size) { memcpy(output_ptr, input_ptr, data_size); } | |||
| void Reshape(const void *input_ptr, void *output_ptr, size_t data_size) { memcpy(output_ptr, input_ptr, data_size); } | |||
| @@ -21,7 +21,7 @@ | |||
| #ifdef __cplusplus | |||
| extern "C" { | |||
| #endif | |||
| void Reshape(void *input_ptr, void *output_ptr, size_t data_size); | |||
| void Reshape(const void *input_ptr, void *output_ptr, size_t data_size); | |||
| #ifdef __cplusplus | |||
| } | |||
| #endif | |||
| @@ -18,7 +18,7 @@ | |||
| #include <string.h> | |||
| #include "nnacl/arithmetic_common.h" | |||
| void ReverseSequence(float *input0, void *input1, float *output, ReverseSequenceParameter *para) { | |||
| void ReverseSequence(float *input0, const void *input1, float *output, ReverseSequenceParameter *para) { | |||
| (void)memcpy(output, input0, para->total_data_size_); | |||
| ComputeStrides(para->input_shape0_, para->input_stride_, para->ndim_); | |||
| ComputeStrides(para->output_shape_, para->output_stride_, para->ndim_); | |||
| @@ -40,7 +40,7 @@ typedef struct ReverseSequenceParameter { | |||
| #ifdef __cplusplus | |||
| extern "C" { | |||
| #endif | |||
| void ReverseSequence(float *input0, void *input1, float *output, ReverseSequenceParameter *para); | |||
| void ReverseSequence(float *input0, const void *input1, float *output, ReverseSequenceParameter *para); | |||
| #ifdef __cplusplus | |||
| } | |||
| #endif | |||
| @@ -19,7 +19,7 @@ | |||
| #include <stdio.h> | |||
| #include "nnacl/errorcode.h" | |||
| int DoScatterND(float *output_ptr, float *update, int *output_unit_offsets, int unit_size, int num_units) { | |||
| int DoScatterND(float *output_ptr, const float *update, int *output_unit_offsets, int unit_size, int num_units) { | |||
| if (output_ptr == NULL || update == NULL || output_unit_offsets == NULL || unit_size <= 0 || num_units < 0) { | |||
| return NNACL_ERR; | |||
| } | |||
| @@ -26,7 +26,7 @@ typedef struct ScatterNDParameter { | |||
| #ifdef __cplusplus | |||
| extern "C" { | |||
| #endif | |||
| int DoScatterND(float *output_ptr, float *update, int *output_unit_offsets, int unit_size, int num_units); | |||
| int DoScatterND(float *output_ptr, const float *update, int *output_unit_offsets, int unit_size, int num_units); | |||
| #ifdef __cplusplus | |||
| } | |||
| #endif | |||
| @@ -18,7 +18,7 @@ | |||
| #include <string.h> | |||
| #include "nnacl/errorcode.h" | |||
| int DoSqueeze(float *in_data, float *out_data, size_t data_size) { | |||
| int DoSqueeze(const float *in_data, float *out_data, size_t data_size) { | |||
| if (in_data == NULL || out_data == NULL) { | |||
| return -1; | |||
| } | |||
| @@ -27,7 +27,7 @@ typedef struct SqueezeParameter { | |||
| #ifdef __cplusplus | |||
| extern "C" { | |||
| #endif | |||
| int DoSqueeze(float *input_ptr, float *output_ptr, size_t data_size); | |||
| int DoSqueeze(const float *input_ptr, float *output_ptr, size_t data_size); | |||
| int DoSqueezeInt32(int32_t *in_data, int32_t *out_data, size_t data_size); | |||
| #ifdef __cplusplus | |||
| } | |||
| @@ -18,7 +18,7 @@ | |||
| #include <string.h> | |||
| #include "nnacl/errorcode.h" | |||
| void TransposeDim2(float *in_data, float *out_data, int *strides, int *out_strides, int *perm, int *output_shape, | |||
| void TransposeDim2(float *in_data, float *out_data, const int *strides, int *out_strides, int *perm, int *output_shape, | |||
| int h_start, int h_end) { | |||
| const int stride0 = strides[perm[0]]; | |||
| const int stride1 = strides[perm[1]]; | |||
| @@ -33,7 +33,7 @@ void TransposeDim2(float *in_data, float *out_data, int *strides, int *out_strid | |||
| } | |||
| } | |||
| void TransposeDim3(float *in_data, float *out_data, int *strides, int *out_strides, int *perm, int *output_shape, | |||
| void TransposeDim3(float *in_data, float *out_data, const int *strides, int *out_strides, int *perm, int *output_shape, | |||
| int h_start, int h_end) { | |||
| const int stride0 = strides[perm[0]]; | |||
| const int stride1 = strides[perm[1]]; | |||
| @@ -56,7 +56,7 @@ void TransposeDim3(float *in_data, float *out_data, int *strides, int *out_strid | |||
| } | |||
| } | |||
| void TransposeDim4(float *in_data, float *out_data, int *strides, int *out_strides, int *perm, int *output_shape, | |||
| void TransposeDim4(float *in_data, float *out_data, const int *strides, int *out_strides, int *perm, int *output_shape, | |||
| int h_start, int h_end) { | |||
| const int stride0 = strides[perm[0]]; | |||
| const int stride1 = strides[perm[1]]; | |||
| @@ -88,7 +88,7 @@ void TransposeDim4(float *in_data, float *out_data, int *strides, int *out_strid | |||
| } | |||
| } | |||
| void TransposeDim5(float *in_data, float *out_data, int *strides, int *out_strides, int *perm, int *output_shape, | |||
| void TransposeDim5(float *in_data, float *out_data, const int *strides, int *out_strides, int *perm, int *output_shape, | |||
| int h_start, int h_end) { | |||
| const int stride0 = strides[perm[0]]; | |||
| const int stride1 = strides[perm[1]]; | |||
| @@ -127,7 +127,7 @@ void TransposeDim5(float *in_data, float *out_data, int *strides, int *out_strid | |||
| } | |||
| } | |||
| void TransposeDims(float *in_data, float *out_data, int *strides, int *out_strides, int *perm, int *output_shape, | |||
| void TransposeDims(float *in_data, float *out_data, const int *strides, int *out_strides, int *perm, int *output_shape, | |||
| int h_start, int h_end, int dims, int *size, int *position) { | |||
| *(size + dims - 1) = 1; | |||
| for (int i = dims - 1; i > 0; --i) { | |||
| @@ -34,15 +34,15 @@ extern "C" { | |||
| #endif | |||
| int DoTranspose(float *in_data, float *out_data, int *input_shape, int *output_shape, | |||
| TransposeParameter *transpose_param, int h_start, int h_end, int *size, int *position); | |||
| void TransposeDim2(float *in_data, float *out_data, int *strides, int *out_strides, int *perm, int *output_shape, | |||
| void TransposeDim2(float *in_data, float *out_data, const int *strides, int *out_strides, int *perm, int *output_shape, | |||
| int h_start, int h_end); | |||
| void TransposeDim3(float *in_data, float *out_data, int *strides, int *out_strides, int *perm, int *output_shape, | |||
| void TransposeDim3(float *in_data, float *out_data, const int *strides, int *out_strides, int *perm, int *output_shape, | |||
| int h_start, int h_end); | |||
| void TransposeDim4(float *in_data, float *out_data, int *strides, int *out_strides, int *perm, int *output_shape, | |||
| void TransposeDim4(float *in_data, float *out_data, const int *strides, int *out_strides, int *perm, int *output_shape, | |||
| int h_start, int h_end); | |||
| void TransposeDim5(float *in_data, float *out_data, int *strides, int *out_strides, int *perm, int *output_shape, | |||
| void TransposeDim5(float *in_data, float *out_data, const int *strides, int *out_strides, int *perm, int *output_shape, | |||
| int h_start, int h_end); | |||
| void TransposeDims(float *in_data, float *out_data, int *strides, int *out_strides, int *perm, int *output_shape, | |||
| void TransposeDims(float *in_data, float *out_data, const int *strides, int *out_strides, int *perm, int *output_shape, | |||
| int h_start, int h_end, int dims, int *size, int *position); | |||
| #ifdef __cplusplus | |||
| } | |||
| @@ -17,7 +17,7 @@ | |||
| #include "nnacl/unstack.h" | |||
| #include <string.h> | |||
| void Unistack(float *input, float **output, UnstackParameter *para) { | |||
| void Unistack(const float *input, float **output, UnstackParameter *para) { | |||
| for (int j = 0; j < para->num_; j++) { | |||
| float *out_addr = output[j]; | |||
| int out_offset = 0; | |||
| @@ -31,7 +31,7 @@ typedef struct UnstackParameter { | |||
| #ifdef __cplusplus | |||
| extern "C" { | |||
| #endif | |||
| void Unistack(float *input, float **output, UnstackParameter *para); | |||
| void Unistack(const float *input, float **output, UnstackParameter *para); | |||
| #ifdef __cplusplus | |||
| } | |||
| #endif | |||
| @@ -15,7 +15,7 @@ | |||
| */ | |||
| #include "nnacl/where.h" | |||
| void Where(bool *input, float *input1, float *input2, float *output, WhereParameter *where_param_, int task_id) { | |||
| void Where(bool *input, float *input1, const float *input2, float *output, WhereParameter *where_param_, int task_id) { | |||
| for (int i = task_id; i < where_param_->number_; i += where_param_->op_parameter_.thread_num_) { | |||
| if (input[where_param_->num_ > 1 ? i : 0] == true) { | |||
| output[i] = input1[where_param_->num1_ > 1 ? i : 0]; | |||
| @@ -30,7 +30,7 @@ typedef struct WhereParameter { | |||
| #ifdef __cplusplus | |||
| extern "C" { | |||
| #endif | |||
| void Where(bool *input, float *input1, float *input2, float *output, WhereParameter *where_param_, int task_id); | |||
| void Where(bool *input, float *input1, const float *input2, float *output, WhereParameter *where_param_, int task_id); | |||
| #ifdef __cplusplus | |||
| } | |||
| #endif | |||
| @@ -75,8 +75,8 @@ static OutputTransFunc OutputTransFuncRelu6List8[] = {NULL, | |||
| OutputTransform8x6Relu6Unit, | |||
| OutputTransform8x7Relu6Unit}; | |||
| void GeneralInputTransformUnit(const float *src_data, float *dst_data, float *matrix_b, float *matrix_bt, int src_step, | |||
| int dst_step, int in_unit) { | |||
| void GeneralInputTransformUnit(const float *src_data, float *dst_data, float *matrix_b, const float *matrix_bt, | |||
| int src_step, int dst_step, int in_unit) { | |||
| int len = in_unit * in_unit; | |||
| if (len > MAX_LEN) return; | |||
| #ifdef ENABLE_ARM | |||
| @@ -113,7 +113,7 @@ void GeneralInputTransformUnit(const float *src_data, float *dst_data, float *ma | |||
| } | |||
| void GeneralOutputTransformUnit(const float *src_data, float *dst_data, const float *bias_data, float *matrix_a, | |||
| float *matrix_at, int src_step, int dst_step, int in_unit, int out_unit) { | |||
| const float *matrix_at, int src_step, int dst_step, int in_unit, int out_unit) { | |||
| int src_len = in_unit * in_unit; | |||
| if (src_len > MAX_LEN) { | |||
| return; | |||
| @@ -33,11 +33,11 @@ typedef void (*InputTransFunc)(const float *src_data, float *dst_data, int src_s | |||
| typedef void (*OutputTransFunc)(const float *src_data, float *dst_data, const float *bias_data, int src_step, | |||
| int dst_step, int out_c, int r_w, int r_h, int r_c); | |||
| void GeneralInputTransformUnit(const float *src_data, float *dst_data, float *matrix_b, float *matrix_bt, int src_step, | |||
| int dst_step, int in_unit); | |||
| void GeneralInputTransformUnit(const float *src_data, float *dst_data, float *matrix_b, const float *matrix_bt, | |||
| int src_step, int dst_step, int in_unit); | |||
| void GeneralOutputTransformUnit(const float *src_data, float *dst_data, const float *bias_data, float *matrix_a, | |||
| float *matrix_at, int src_step, int dst_step, int in_unit, int out_unit); | |||
| const float *matrix_at, int src_step, int dst_step, int in_unit, int out_unit); | |||
| #define Load16Data \ | |||
| src[0] = vld1q_f32(src_data + 0 * src_step); \ | |||
| @@ -85,7 +85,7 @@ std::string RealPath(const char *path) { | |||
| return res; | |||
| } | |||
| int CompareOutputData(float *output_data, size_t output_size, float *correct_data, size_t data_size) { | |||
| int CompareOutputData(const float *output_data, size_t output_size, float *correct_data, size_t data_size) { | |||
| if (output_size != data_size) { | |||
| printf("compare failed, output_size %zu isn't equal to data_size %zu.\n", output_size, data_size); | |||
| return 0; | |||
| @@ -58,7 +58,7 @@ inline int WriteToBin(const std::string &file_path, void *data, size_t size) { | |||
| return 0; | |||
| } | |||
| int CompareOutputData(float *output_data, size_t output_num, float *correct_data, size_t data_size); | |||
| int CompareOutputData(const float *output_data, size_t output_num, float *correct_data, size_t data_size); | |||
| int CompareOutput(float *output_data, size_t output_num, std::string file_path); | |||
| std::string GetAndroidPackageName(); | |||
| @@ -21,7 +21,7 @@ | |||
| namespace mindspore { | |||
| namespace lite { | |||
| static float CompareOutputRelativeData(float *output_data, float *correct_data, int data_size) { | |||
| static float CompareOutputRelativeData(const float *output_data, float *correct_data, int data_size) { | |||
| float error = 0; | |||
| // relative error | |||
| @@ -45,6 +45,7 @@ int BatchNorm::UnPackAttr(const Primitive &prim, const std::vector<AnfNodePtr> & | |||
| if (attr == nullptr) { | |||
| MS_LOG(ERROR) << "new FusedBatchNormT failed"; | |||
| delete this->primitive_; | |||
| this->primitive_ = nullptr; | |||
| return RET_ERROR; | |||
| } | |||
| attr->epsilon = GetValue<float>(prim.GetAttr("epsilon")); | |||
| @@ -46,6 +46,10 @@ int FusedBatchNorm::UnPackAttr(const Primitive &prim, const std::vector<AnfNodeP | |||
| } | |||
| if (this->primitive_->value.value == nullptr) { | |||
| auto attr = new (std::nothrow) schema::FusedBatchNormT(); | |||
| if (attr == nullptr) { | |||
| MS_LOG(ERROR) << "new attr value failed"; | |||
| return RET_ERROR; | |||
| } | |||
| attr->epsilon = GetValue<float>(prim.GetAttr("epsilon")); | |||
| attr->momentum = GetValue<float>(prim.GetAttr("momentum")); | |||
| this->primitive_->value.value = attr; | |||
| @@ -46,6 +46,7 @@ int InstanceNorm::UnPackAttr(const Primitive &prim, const std::vector<AnfNodePtr | |||
| if (attr == nullptr) { | |||
| MS_LOG(ERROR) << "new InstanceNormT failed"; | |||
| delete this->primitive_; | |||
| this->primitive_ = nullptr; | |||
| return RET_ERROR; | |||
| } | |||
| attr->epsilon = GetValue<float>(prim.GetAttr("epsilon")); | |||
| @@ -33,6 +33,10 @@ OpParameter *PopulateLayerNormParameter(const mindspore::lite::PrimitiveC *primi | |||
| auto normalized_shape = param->GetNormalizedShape(); | |||
| layer_norm_parameter->normalized_dims_ = normalized_shape.size(); | |||
| layer_norm_parameter->normalized_shape_ = reinterpret_cast<int *>(malloc(normalized_shape.size() * sizeof(int))); | |||
| if (layer_norm_parameter->normalized_shape_ == nullptr) { | |||
| MS_LOG(ERROR) << "malloc layer_norm_parameter->normalized_shape_ failed."; | |||
| return nullptr; | |||
| } | |||
| for (size_t i = 0; i < normalized_shape.size(); i++) { | |||
| layer_norm_parameter->normalized_shape_[i] = normalized_shape[i]; | |||
| } | |||
| @@ -54,11 +54,18 @@ int Resize::UnPackAttr(const Primitive &prim, const std::vector<AnfNodePtr> &inp | |||
| } | |||
| if (this->primitive_->value.value == nullptr) { | |||
| auto attr = new (std::nothrow) schema::ResizeT(); | |||
| if (attr == nullptr) { | |||
| MS_LOG(ERROR) << "new attr value failed"; | |||
| return RET_ERROR; | |||
| } | |||
| if (prim.instance_name() == "ResizeNearestNeighbor") { | |||
| attr->method = schema::ResizeMethod_NEAREST; | |||
| } else if (prim.instance_name() == "ResizeBilinear") { | |||
| attr->method = schema::ResizeMethod_LINEAR; | |||
| } else { | |||
| if (attr != nullptr) { | |||
| delete attr; | |||
| } | |||
| MS_LOG(ERROR) << "wrong resize type"; | |||
| return RET_ERROR; | |||
| } | |||
| @@ -69,6 +76,9 @@ int Resize::UnPackAttr(const Primitive &prim, const std::vector<AnfNodePtr> &inp | |||
| this->primitive_->value.value = attr; | |||
| if (this->primitive_->value.value == nullptr) { | |||
| if (attr != nullptr) { | |||
| delete attr; | |||
| } | |||
| MS_LOG(ERROR) << "new primitiveT value failed"; | |||
| return RET_ERROR; | |||
| } | |||
| @@ -40,7 +40,7 @@ class BatchToSpaceBaseCPUKernel : public LiteKernel { | |||
| bool IsNoCrop() const { return no_crop_; } | |||
| private: | |||
| bool no_crop_; | |||
| bool no_crop_ = false; | |||
| }; | |||
| } // namespace mindspore::kernel | |||
| @@ -43,9 +43,9 @@ class ConcatBaseCPUKernel : public LiteKernel { | |||
| int Run() override { return 0; } | |||
| protected: | |||
| int axis_; | |||
| const InnerContext *ctx_; | |||
| int thread_count_; | |||
| int axis_ = 0; | |||
| const InnerContext *ctx_ = nullptr; | |||
| int thread_count_ = 1; | |||
| ConcatParameter *concat_param_ = nullptr; | |||
| }; | |||
| } // namespace mindspore::kernel | |||
| @@ -60,11 +60,11 @@ class ConvolutionBaseCPUKernel : public LiteKernel { | |||
| protected: | |||
| void *bias_data_ = nullptr; | |||
| const InnerContext *ctx_; | |||
| ConvParameter *conv_param_; | |||
| ConvQuantArg *conv_quant_arg_; | |||
| int tile_num_; | |||
| int thread_count_; | |||
| const InnerContext *ctx_ = nullptr; | |||
| ConvParameter *conv_param_ = nullptr; | |||
| ConvQuantArg *conv_quant_arg_ = nullptr; | |||
| int tile_num_ = 0; | |||
| int thread_count_ = 1; | |||
| }; | |||
| } // namespace mindspore::kernel | |||
| @@ -38,8 +38,8 @@ class DetectionPostProcessBaseCPUKernel : public LiteKernel { | |||
| int Run() override; | |||
| protected: | |||
| float *input_boxes; | |||
| float *input_scores; | |||
| float *input_boxes = nullptr; | |||
| float *input_scores = nullptr; | |||
| virtual int GetInputData() = 0; | |||
| }; | |||
| @@ -40,10 +40,10 @@ class FullconnectionBaseCPUKernel : public LiteKernel { | |||
| int Run() override { return 0; } | |||
| protected: | |||
| MatMulParameter *fc_param_; | |||
| int thread_stride_; | |||
| const InnerContext *ctx_; | |||
| int thread_count_; | |||
| MatMulParameter *fc_param_ = nullptr; | |||
| int thread_stride_ = 0; | |||
| const InnerContext *ctx_ = nullptr; | |||
| int thread_count_ = 1; | |||
| }; | |||
| } // namespace mindspore::kernel | |||
| @@ -40,10 +40,10 @@ class MatmulBaseCPUKernel : public LiteKernel { | |||
| int Run() override { return 0; } | |||
| protected: | |||
| MatMulParameter *params_; | |||
| int thread_stride_; | |||
| const InnerContext *ctx_; | |||
| int thread_count_; | |||
| MatMulParameter *params_ = nullptr; | |||
| int thread_stride_ = 0; | |||
| const InnerContext *ctx_ = nullptr; | |||
| int thread_count_ = 0; | |||
| }; | |||
| } // namespace mindspore::kernel | |||
| @@ -71,6 +71,7 @@ void PoolingBaseCPUKernel::FreeQuantParam() { | |||
| } | |||
| } | |||
| free(pooling_quant_arg_); | |||
| pooling_quant_arg_ = nullptr; | |||
| } | |||
| } | |||
| @@ -37,12 +37,12 @@ class ResizeBaseCPUKernel : public LiteKernel { | |||
| int ReSize() override { return 0; }; | |||
| protected: | |||
| int method_; | |||
| int64_t new_height_; | |||
| int64_t new_width_; | |||
| bool align_corners_; | |||
| bool preserve_aspect_ratio; | |||
| bool const_shape_; | |||
| int method_ = 0; | |||
| int64_t new_height_ = 0; | |||
| int64_t new_width_ = 0; | |||
| bool align_corners_ = false; | |||
| bool preserve_aspect_ratio = false; | |||
| bool const_shape_ = false; | |||
| private: | |||
| int CheckParameters(); | |||
| @@ -39,12 +39,12 @@ class SplitBaseCPUKernel : public LiteKernel { | |||
| int Run() override { return 0; } | |||
| protected: | |||
| const InnerContext *ctx_; | |||
| int thread_count_; | |||
| int thread_n_stride_; | |||
| int thread_n_num_; | |||
| int num_unit_; | |||
| SplitParameter *param; | |||
| const InnerContext *ctx_ = nullptr; | |||
| int thread_count_ = 1; | |||
| int thread_n_stride_ = 0; | |||
| int thread_n_num_ = 0; | |||
| int num_unit_ = 0; | |||
| SplitParameter *param = nullptr; | |||
| }; | |||
| } // namespace mindspore::kernel | |||
| @@ -43,12 +43,12 @@ using mindspore::schema::PrimitiveType_Sub; | |||
| namespace mindspore::kernel { | |||
| class ArithmeticCPUKernel : public LiteKernel { | |||
| typedef int (*ArithmeticRun)(float *input0, float *input1, float *output, int element_size); | |||
| typedef int (*ArithmeticOptRun)(float *input0, float *input1, float *output, int element_size, | |||
| ArithmeticParameter *param); | |||
| typedef int (*ArithmeticIntRun)(int *input0, int *input1, int *output, int element_size); | |||
| typedef int (*ArithmeticOptIntRun)(int *input0, int *input1, int *output, int element_size, | |||
| ArithmeticParameter *param); | |||
| typedef int (*ArithmeticRun)(const float *input0, const float *input1, float *output, const int element_size); | |||
| typedef int (*ArithmeticOptRun)(const float *input0, const float *input1, float *output, const int element_size, | |||
| const ArithmeticParameter *param); | |||
| typedef int (*ArithmeticIntRun)(const int *input0, const int *input1, int *output, const int element_size); | |||
| typedef int (*ArithmeticOptIntRun)(const int *input0, const int *input1, int *output, const int element_size, | |||
| const ArithmeticParameter *param); | |||
| public: | |||
| ArithmeticCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs, | |||
| @@ -40,8 +40,8 @@ class ConstantOfShapeCPUKernel : public LiteKernel { | |||
| int DoExecute(int task_id); | |||
| private: | |||
| ConstantOfShapeParameter *param_; | |||
| void *out_ptr_; | |||
| ConstantOfShapeParameter *param_ = nullptr; | |||
| void *out_ptr_ = nullptr; | |||
| }; | |||
| } // namespace mindspore::kernel | |||
| @@ -198,7 +198,12 @@ kernel::LiteKernel *CpuGroupConvFp32KernelCreator(const std::vector<lite::Tensor | |||
| auto conv_param = reinterpret_cast<ConvParameter *>(op_parameter); | |||
| int out_channel = inputs.at(kWeightIndex)->Batch(); | |||
| int new_in_channel = inputs.at(kWeightIndex)->Channel(); | |||
| int new_out_channel = out_channel / group; | |||
| int new_out_channel = 0; | |||
| if (group == 0) { | |||
| MS_LOG(ERROR) << "Divisor 'group' cannot be 0."; | |||
| } else { | |||
| new_out_channel = out_channel / group; | |||
| } | |||
| int kernel_h = conv_param->kernel_h_; | |||
| int kernel_w = conv_param->kernel_w_; | |||
| int input_num = inputs.size(); | |||
| @@ -52,11 +52,11 @@ class DeConvolutionCPUKernel : public ConvolutionBaseCPUKernel { | |||
| private: | |||
| MatMulParameter *matmul_param_ = nullptr; | |||
| int input_plane_; | |||
| int kernel_plane_; | |||
| int output_plane_; | |||
| int thread_count_; | |||
| int thread_stride_; | |||
| int input_plane_ = 0; | |||
| int kernel_plane_ = 0; | |||
| int output_plane_ = 0; | |||
| int thread_count_ = 1; | |||
| int thread_stride_ = 0; | |||
| float *weight_ptr_ = nullptr; | |||
| float *pack_input_ = nullptr; | |||
| float *pack_output_ = nullptr; | |||
| @@ -58,15 +58,15 @@ class DeConvolutionWinogradCPUKernel : public ConvolutionBaseCPUKernel { | |||
| void FreeRunBuf(); | |||
| private: | |||
| DeConvParam *deconv_param_; | |||
| DeConvParam *deconv_param_ = nullptr; | |||
| float *nhwc_input_ = nullptr; | |||
| float *nhwc_output_ = nullptr; | |||
| float *nc4hw4_output_ = nullptr; | |||
| float *tile_input_ = nullptr; | |||
| float *tile_output_ = nullptr; | |||
| std::mutex lock_; | |||
| int thread_num_hw_; | |||
| int thread_stride_hw_; | |||
| int thread_num_hw_ = 0; | |||
| int thread_stride_hw_ = 0; | |||
| }; | |||
| } // namespace mindspore::kernel | |||
| #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_DECONVOLUTION_WINOGRAD_H_ | |||
| @@ -36,13 +36,13 @@ class EluCPUKernel : public LiteKernel { | |||
| int DoExcute(int task_id); | |||
| protected: | |||
| const lite::InnerContext *ctx_; | |||
| int thread_count_; | |||
| EluParameter *elu_parameter_; | |||
| const lite::InnerContext *ctx_ = nullptr; | |||
| int thread_count_ = 1; | |||
| EluParameter *elu_parameter_ = nullptr; | |||
| private: | |||
| float *input_addr; | |||
| float *output_addr; | |||
| float *input_addr = nullptr; | |||
| float *output_addr = nullptr; | |||
| }; | |||
| } // namespace mindspore::kernel | |||
| @@ -43,14 +43,14 @@ class EmbeddingLookupCPUKernel : public LiteKernel { | |||
| int DoExcute(int task_id); | |||
| protected: | |||
| const lite::InnerContext *ctx_; | |||
| int thread_count_; | |||
| EmbeddingLookupParameter *embedding_lookup_parameter_; | |||
| const lite::InnerContext *ctx_ = nullptr; | |||
| int thread_count_ = 1; | |||
| EmbeddingLookupParameter *embedding_lookup_parameter_ = nullptr; | |||
| private: | |||
| float *input_addr_; | |||
| float *output_addr_; | |||
| int *ids_addr_; | |||
| float *input_addr_ = nullptr; | |||
| float *output_addr_ = nullptr; | |||
| int *ids_addr_ = nullptr; | |||
| }; | |||
| } // namespace mindspore::kernel | |||
| @@ -36,13 +36,13 @@ class ExpCPUKernel : public LiteKernel { | |||
| int DoExcute(int task_id); | |||
| protected: | |||
| const lite::InnerContext *ctx_; | |||
| int thread_count_; | |||
| ExpParameter *exp_parameter_; | |||
| const lite::InnerContext *ctx_ = nullptr; | |||
| int thread_count_ = 1; | |||
| ExpParameter *exp_parameter_ = nullptr; | |||
| private: | |||
| float *input_addr_; | |||
| float *output_addr_; | |||
| float *input_addr_ = nullptr; | |||
| float *output_addr_ = nullptr; | |||
| }; | |||
| } // namespace mindspore::kernel | |||
| @@ -64,6 +64,10 @@ int FullconnectionCPUKernel::ReSize() { | |||
| if (in_tensors_.size() == 3) { | |||
| int col_tmp = is_vector_input_ ? fc_param_->col_ : fc_param_->col_8_; | |||
| bias_ptr_ = reinterpret_cast<float *>(malloc(col_tmp * sizeof(float))); | |||
| if (bias_ptr_ == nullptr) { | |||
| MS_LOG(ERROR) << "malloc bias_ptr_ failed"; | |||
| return RET_ERROR; | |||
| } | |||
| memcpy(bias_ptr_, in_tensors_[2]->MutableData(), fc_param_->col_ * sizeof(float)); | |||
| } | |||