From: @sunsuodong Reviewed-by: @zhang_xue_tong,@zhanghaibo5 Signed-off-by: @zhang_xue_tongtags/v1.1.0
| @@ -33,7 +33,7 @@ void GetCalcParameter(const int *shape, int dims_number, int axis, int *pre_axis | |||||
| } | } | ||||
| } | } | ||||
| void ArgMinMaxTopk1(const void *input, void *output, const int *shape, ArgMinMaxParameter *param) { | |||||
| void ArgMinMaxTopk1(const void *input, void *output, const int *shape, const ArgMinMaxParameter *param) { | |||||
| int pre_axis_count = 1; | int pre_axis_count = 1; | ||||
| int axis_count = 1; | int axis_count = 1; | ||||
| int after_axis_count = 1; | int after_axis_count = 1; | ||||
| @@ -48,7 +48,7 @@ void ArgMinMaxTopk1(const void *input, void *output, const int *shape, ArgMinMax | |||||
| } | } | ||||
| } | } | ||||
| void ArgMinMaxTopknFp32(const float *input, float *output, const int *in_shape, ArgMinMaxParameter *param) { | |||||
| void ArgMinMaxTopknFp32(const float *input, float *output, const int *in_shape, const ArgMinMaxParameter *param) { | |||||
| if (param->get_max_) { | if (param->get_max_) { | ||||
| switch (param->axis_) { | switch (param->axis_) { | ||||
| case 0: | case 0: | ||||
| @@ -82,7 +82,7 @@ void ArgMinMaxTopknFp32(const float *input, float *output, const int *in_shape, | |||||
| } | } | ||||
| } | } | ||||
| void ArgMinMax(const void *input, void *output, const int *in_shape, ArgMinMaxParameter *param) { | |||||
| void ArgMinMax(const void *input, void *output, const int *in_shape, const ArgMinMaxParameter *param) { | |||||
| if (param->topk_ == 1) { | if (param->topk_ == 1) { | ||||
| ArgMinMaxTopk1(input, output, in_shape, param); | ArgMinMaxTopk1(input, output, in_shape, param); | ||||
| return; | return; | ||||
| @@ -21,7 +21,7 @@ | |||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| extern "C" { | extern "C" { | ||||
| #endif | #endif | ||||
| void ArgMinMax(const void *input, void *output, const int *in_shape, ArgMinMaxParameter *param); | |||||
| void ArgMinMax(const void *input, void *output, const int *in_shape, const ArgMinMaxParameter *param); | |||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| } | } | ||||
| #endif | #endif | ||||
| @@ -17,8 +17,8 @@ | |||||
| #include "nnacl/arithmetic_common.h" | #include "nnacl/arithmetic_common.h" | ||||
| #include "nnacl/nnacl_utils.h" | #include "nnacl/nnacl_utils.h" | ||||
| void TileOneDimension(float *inData, float *outData, int dim, size_t ndim, int *inShape, int *inStrides, | |||||
| int *outStrides, int *multiple) { | |||||
| void TileOneDimension(const float *inData, float *outData, int dim, size_t ndim, const int *inShape, | |||||
| const int *inStrides, const int *outStrides, const int *multiple) { | |||||
| int srcDimSize = inShape[dim]; | int srcDimSize = inShape[dim]; | ||||
| if (dim == ndim - 1) { | if (dim == ndim - 1) { | ||||
| for (int i = 0; i < multiple[dim]; i++) { | for (int i = 0; i < multiple[dim]; i++) { | ||||
| @@ -35,8 +35,8 @@ void TileOneDimension(float *inData, float *outData, int dim, size_t ndim, int * | |||||
| } | } | ||||
| } | } | ||||
| void TileOneDimensionUint8(uint8_t *inData, uint8_t *outData, int dim, size_t ndim, int *inShape, int *inStrides, | |||||
| int *outStrides, int *multiple) { | |||||
| void TileOneDimensionUint8(const uint8_t *inData, uint8_t *outData, int dim, size_t ndim, const int *inShape, | |||||
| const int *inStrides, const int *outStrides, const int *multiple) { | |||||
| int srcDimSize = inShape[dim]; | int srcDimSize = inShape[dim]; | ||||
| if (dim == ndim - 1) { | if (dim == ndim - 1) { | ||||
| for (int i = 0; i < multiple[dim]; i++) { | for (int i = 0; i < multiple[dim]; i++) { | ||||
| @@ -74,7 +74,8 @@ void CalcMultiplesAndStrides(ArithmeticParameter *param) { | |||||
| ComputeStrides(param->out_shape_, param->out_strides_, param->ndim_); | ComputeStrides(param->out_shape_, param->out_strides_, param->ndim_); | ||||
| } | } | ||||
| void TileDimensions(float *data0, float *data1, float *tile_data0, float *tile_data1, ArithmeticParameter *param) { | |||||
| void TileDimensions(const float *data0, const float *data1, float *tile_data0, float *tile_data1, | |||||
| ArithmeticParameter *param) { | |||||
| CalcMultiplesAndStrides(param); | CalcMultiplesAndStrides(param); | ||||
| TileOneDimension(data0, tile_data0, 0, param->ndim_, param->in_shape0_, param->in_strides0_, param->out_strides_, | TileOneDimension(data0, tile_data0, 0, param->ndim_, param->in_shape0_, param->in_strides0_, param->out_strides_, | ||||
| param->multiples0_); | param->multiples0_); | ||||
| @@ -82,7 +83,7 @@ void TileDimensions(float *data0, float *data1, float *tile_data0, float *tile_d | |||||
| param->multiples1_); | param->multiples1_); | ||||
| } | } | ||||
| void TileDimensionsUint8(uint8_t *data0, uint8_t *data1, uint8_t *tile_data0, uint8_t *tile_data1, | |||||
| void TileDimensionsUint8(const uint8_t *data0, const uint8_t *data1, uint8_t *tile_data0, uint8_t *tile_data1, | |||||
| ArithmeticParameter *param) { | ArithmeticParameter *param) { | ||||
| CalcMultiplesAndStrides(param); | CalcMultiplesAndStrides(param); | ||||
| TileOneDimensionUint8(data0, tile_data0, 0, param->ndim_, param->in_shape0_, param->in_strides0_, param->out_strides_, | TileOneDimensionUint8(data0, tile_data0, 0, param->ndim_, param->in_shape0_, param->in_strides0_, param->out_strides_, | ||||
| @@ -91,7 +92,7 @@ void TileDimensionsUint8(uint8_t *data0, uint8_t *data1, uint8_t *tile_data0, ui | |||||
| param->multiples1_); | param->multiples1_); | ||||
| } | } | ||||
| void TileDimensionsInt8(int8_t *data0, int8_t *data1, int8_t *tile_data0, int8_t *tile_data1, | |||||
| void TileDimensionsInt8(const int8_t *data0, const int8_t *data1, int8_t *tile_data0, int8_t *tile_data1, | |||||
| ArithmeticParameter *param) { | ArithmeticParameter *param) { | ||||
| CalcMultiplesAndStrides(param); | CalcMultiplesAndStrides(param); | ||||
| TileOneDimensionUint8((uint8_t *)(data0), (uint8_t *)(tile_data0), 0, param->ndim_, param->in_shape0_, | TileOneDimensionUint8((uint8_t *)(data0), (uint8_t *)(tile_data0), 0, param->ndim_, param->in_shape0_, | ||||
| @@ -47,18 +47,19 @@ typedef struct ArithmeticParameter { | |||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| extern "C" { | extern "C" { | ||||
| #endif | #endif | ||||
| void TileOneDimension(float *inData, float *outData, int dim, size_t ndim, int *inShape, int *inStrides, | |||||
| int *outStrides, int *multiple); | |||||
| void TileOneDimension(const float *inData, float *outData, int dim, size_t ndim, const int *inShape, | |||||
| const int *inStrides, const int *outStrides, const int *multiple); | |||||
| void ComputeStrides(const int *shape, int *strides, const int ndim); | void ComputeStrides(const int *shape, int *strides, const int ndim); | ||||
| void CalcMultiplesAndStrides(ArithmeticParameter *param); | void CalcMultiplesAndStrides(ArithmeticParameter *param); | ||||
| void TileOneDimensionUint8(uint8_t *inData, uint8_t *outData, int dim, size_t ndim, int *inShape, int *inStrides, | |||||
| int *outStrides, int *multiple); | |||||
| void TileDimensions(float *data0, float *data1, float *tile_data0, float *tile_data1, ArithmeticParameter *param); | |||||
| void TileDimensionsUint8(uint8_t *data0, uint8_t *data1, uint8_t *tile_data0, uint8_t *tile_data1, | |||||
| void TileOneDimensionUint8(const uint8_t *inData, uint8_t *outData, int dim, size_t ndim, const int *inShape, | |||||
| const int *inStrides, const int *outStrides, const int *multiple); | |||||
| void TileDimensions(const float *data0, const float *data1, float *tile_data0, float *tile_data1, | |||||
| ArithmeticParameter *param); | |||||
| void TileDimensionsUint8(const uint8_t *data0, const uint8_t *data1, uint8_t *tile_data0, uint8_t *tile_data1, | |||||
| ArithmeticParameter *param); | ArithmeticParameter *param); | ||||
| void TileDimensionsInt8(int8_t *data0, int8_t *data1, int8_t *tile_data0, int8_t *tile_data1, | |||||
| void TileDimensionsInt8(const int8_t *data0, const int8_t *data1, int8_t *tile_data0, int8_t *tile_data1, | |||||
| ArithmeticParameter *param); | ArithmeticParameter *param); | ||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| } | } | ||||
| @@ -16,6 +16,6 @@ | |||||
| #include "nnacl/flatten.h" | #include "nnacl/flatten.h" | ||||
| #include <string.h> | #include <string.h> | ||||
| void Flatten(const void *input, void *output, FlattenParameter *flatten_param) { | |||||
| void Flatten(const void *input, void *output, const FlattenParameter *flatten_param) { | |||||
| memcpy(output, input, flatten_param->size); | memcpy(output, input, flatten_param->size); | ||||
| } | } | ||||
| @@ -25,7 +25,7 @@ typedef struct FlattenParameter { | |||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| extern "C" { | extern "C" { | ||||
| #endif | #endif | ||||
| void Flatten(const void *input, void *output, FlattenParameter *flatten_param); | |||||
| void Flatten(const void *input, void *output, const FlattenParameter *flatten_param); | |||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| } | } | ||||
| #endif | #endif | ||||
| @@ -43,7 +43,7 @@ int ArgCompareDescFp32(const void *a, const void *b) { | |||||
| return 0; | return 0; | ||||
| } | } | ||||
| void ArgMaxDim0OutValue(const float *input, float *output, const int *in_shape, ArgMinMaxParameter *param) { | |||||
| void ArgMaxDim0OutValue(const float *input, float *output, const int *in_shape, const ArgMinMaxParameter *param) { | |||||
| for (int32_t i = 0; i < param->in_strides_[0]; ++i) { | for (int32_t i = 0; i < param->in_strides_[0]; ++i) { | ||||
| for (int j = 0; j < in_shape[0]; ++j) { | for (int j = 0; j < in_shape[0]; ++j) { | ||||
| size_t offset = param->in_strides_[0] * j + i; | size_t offset = param->in_strides_[0] * j + i; | ||||
| @@ -58,7 +58,7 @@ void ArgMaxDim0OutValue(const float *input, float *output, const int *in_shape, | |||||
| } | } | ||||
| } | } | ||||
| void ArgMaxDim0OutIndex(const float *input, float *output, const int *in_shape, ArgMinMaxParameter *param) { | |||||
| void ArgMaxDim0OutIndex(const float *input, float *output, const int *in_shape, const ArgMinMaxParameter *param) { | |||||
| for (int32_t i = 0; i < param->in_strides_[0]; ++i) { | for (int32_t i = 0; i < param->in_strides_[0]; ++i) { | ||||
| for (int j = 0; j < in_shape[0]; ++j) { | for (int j = 0; j < in_shape[0]; ++j) { | ||||
| size_t offset = param->in_strides_[0] * j + i; | size_t offset = param->in_strides_[0] * j + i; | ||||
| @@ -73,7 +73,7 @@ void ArgMaxDim0OutIndex(const float *input, float *output, const int *in_shape, | |||||
| } | } | ||||
| } | } | ||||
| void ArgMinDim0OutValue(const float *input, float *output, const int *in_shape, ArgMinMaxParameter *param) { | |||||
| void ArgMinDim0OutValue(const float *input, float *output, const int *in_shape, const ArgMinMaxParameter *param) { | |||||
| for (int32_t i = 0; i < param->in_strides_[0]; ++i) { | for (int32_t i = 0; i < param->in_strides_[0]; ++i) { | ||||
| for (int j = 0; j < in_shape[0]; ++j) { | for (int j = 0; j < in_shape[0]; ++j) { | ||||
| size_t offset = param->in_strides_[0] * j + i; | size_t offset = param->in_strides_[0] * j + i; | ||||
| @@ -88,7 +88,7 @@ void ArgMinDim0OutValue(const float *input, float *output, const int *in_shape, | |||||
| } | } | ||||
| } | } | ||||
| void ArgMinDim0OutIndex(const float *input, float *output, const int *in_shape, ArgMinMaxParameter *param) { | |||||
| void ArgMinDim0OutIndex(const float *input, float *output, const int *in_shape, const ArgMinMaxParameter *param) { | |||||
| for (int32_t i = 0; i < param->in_strides_[0]; ++i) { | for (int32_t i = 0; i < param->in_strides_[0]; ++i) { | ||||
| for (int j = 0; j < in_shape[0]; ++j) { | for (int j = 0; j < in_shape[0]; ++j) { | ||||
| size_t offset = param->in_strides_[0] * j + i; | size_t offset = param->in_strides_[0] * j + i; | ||||
| @@ -103,7 +103,7 @@ void ArgMinDim0OutIndex(const float *input, float *output, const int *in_shape, | |||||
| } | } | ||||
| } | } | ||||
| void ArgMaxDim1OutValue(const float *input, float *output, const int *in_shape, ArgMinMaxParameter *param) { | |||||
| void ArgMaxDim1OutValue(const float *input, float *output, const int *in_shape, const ArgMinMaxParameter *param) { | |||||
| int in_shape1 = in_shape[1]; | int in_shape1 = in_shape[1]; | ||||
| for (int i = 0; i < in_shape[0]; ++i) { | for (int i = 0; i < in_shape[0]; ++i) { | ||||
| size_t in_dim0_offset = i * param->in_strides_[0]; | size_t in_dim0_offset = i * param->in_strides_[0]; | ||||
| @@ -123,7 +123,7 @@ void ArgMaxDim1OutValue(const float *input, float *output, const int *in_shape, | |||||
| } | } | ||||
| } | } | ||||
| void ArgMaxDim1OutIndex(const float *input, float *output, const int *in_shape, ArgMinMaxParameter *param) { | |||||
| void ArgMaxDim1OutIndex(const float *input, float *output, const int *in_shape, const ArgMinMaxParameter *param) { | |||||
| int in_shape1 = in_shape[1]; | int in_shape1 = in_shape[1]; | ||||
| for (int i = 0; i < in_shape[0]; ++i) { | for (int i = 0; i < in_shape[0]; ++i) { | ||||
| size_t in_dim0_offset = i * param->in_strides_[0]; | size_t in_dim0_offset = i * param->in_strides_[0]; | ||||
| @@ -143,7 +143,7 @@ void ArgMaxDim1OutIndex(const float *input, float *output, const int *in_shape, | |||||
| } | } | ||||
| } | } | ||||
| void ArgMinDim1OutValue(const float *input, float *output, const int *in_shape, ArgMinMaxParameter *param) { | |||||
| void ArgMinDim1OutValue(const float *input, float *output, const int *in_shape, const ArgMinMaxParameter *param) { | |||||
| int in_shape1 = in_shape[1]; | int in_shape1 = in_shape[1]; | ||||
| for (int i = 0; i < in_shape[0]; ++i) { | for (int i = 0; i < in_shape[0]; ++i) { | ||||
| size_t in_dim0_offset = i * param->in_strides_[0]; | size_t in_dim0_offset = i * param->in_strides_[0]; | ||||
| @@ -163,7 +163,7 @@ void ArgMinDim1OutValue(const float *input, float *output, const int *in_shape, | |||||
| } | } | ||||
| } | } | ||||
| void ArgMinDim1OutIndex(const float *input, float *output, const int *in_shape, ArgMinMaxParameter *param) { | |||||
| void ArgMinDim1OutIndex(const float *input, float *output, const int *in_shape, const ArgMinMaxParameter *param) { | |||||
| int in_shape1 = in_shape[1]; | int in_shape1 = in_shape[1]; | ||||
| for (int i = 0; i < in_shape[0]; ++i) { | for (int i = 0; i < in_shape[0]; ++i) { | ||||
| size_t in_dim0_offset = i * param->in_strides_[0]; | size_t in_dim0_offset = i * param->in_strides_[0]; | ||||
| @@ -183,7 +183,7 @@ void ArgMinDim1OutIndex(const float *input, float *output, const int *in_shape, | |||||
| } | } | ||||
| } | } | ||||
| void ArgMaxDim2OutValue(const float *input, float *output, const int *in_shape, ArgMinMaxParameter *param) { | |||||
| void ArgMaxDim2OutValue(const float *input, float *output, const int *in_shape, const ArgMinMaxParameter *param) { | |||||
| int in_shape1 = in_shape[1]; | int in_shape1 = in_shape[1]; | ||||
| int in_shape2 = in_shape[2]; | int in_shape2 = in_shape[2]; | ||||
| for (int i = 0; i < in_shape[0]; ++i) { | for (int i = 0; i < in_shape[0]; ++i) { | ||||
| @@ -208,7 +208,7 @@ void ArgMaxDim2OutValue(const float *input, float *output, const int *in_shape, | |||||
| } | } | ||||
| } | } | ||||
| void ArgMaxDim2OutIndex(const float *input, float *output, const int *in_shape, ArgMinMaxParameter *param) { | |||||
| void ArgMaxDim2OutIndex(const float *input, float *output, const int *in_shape, const ArgMinMaxParameter *param) { | |||||
| int in_shape1 = in_shape[1]; | int in_shape1 = in_shape[1]; | ||||
| int in_shape2 = in_shape[2]; | int in_shape2 = in_shape[2]; | ||||
| for (int i = 0; i < in_shape[0]; ++i) { | for (int i = 0; i < in_shape[0]; ++i) { | ||||
| @@ -233,7 +233,7 @@ void ArgMaxDim2OutIndex(const float *input, float *output, const int *in_shape, | |||||
| } | } | ||||
| } | } | ||||
| void ArgMinDim2OutValue(const float *input, float *output, const int *in_shape, ArgMinMaxParameter *param) { | |||||
| void ArgMinDim2OutValue(const float *input, float *output, const int *in_shape, const ArgMinMaxParameter *param) { | |||||
| int in_shape1 = in_shape[1]; | int in_shape1 = in_shape[1]; | ||||
| int in_shape2 = in_shape[2]; | int in_shape2 = in_shape[2]; | ||||
| for (int i = 0; i < in_shape[0]; ++i) { | for (int i = 0; i < in_shape[0]; ++i) { | ||||
| @@ -258,7 +258,7 @@ void ArgMinDim2OutValue(const float *input, float *output, const int *in_shape, | |||||
| } | } | ||||
| } | } | ||||
| void ArgMinDim2OutIndex(const float *input, float *output, const int *in_shape, ArgMinMaxParameter *param) { | |||||
| void ArgMinDim2OutIndex(const float *input, float *output, const int *in_shape, const ArgMinMaxParameter *param) { | |||||
| int in_shape1 = in_shape[1]; | int in_shape1 = in_shape[1]; | ||||
| int in_shape2 = in_shape[2]; | int in_shape2 = in_shape[2]; | ||||
| for (int i = 0; i < in_shape[0]; ++i) { | for (int i = 0; i < in_shape[0]; ++i) { | ||||
| @@ -283,7 +283,7 @@ void ArgMinDim2OutIndex(const float *input, float *output, const int *in_shape, | |||||
| } | } | ||||
| } | } | ||||
| void ArgMaxDim3OutValue(const float *input, float *output, const int *in_shape, ArgMinMaxParameter *param) { | |||||
| void ArgMaxDim3OutValue(const float *input, float *output, const int *in_shape, const ArgMinMaxParameter *param) { | |||||
| int in_shape1 = in_shape[1]; | int in_shape1 = in_shape[1]; | ||||
| int in_shape2 = in_shape[2]; | int in_shape2 = in_shape[2]; | ||||
| int in_shape3 = in_shape[3]; | int in_shape3 = in_shape[3]; | ||||
| @@ -311,7 +311,7 @@ void ArgMaxDim3OutValue(const float *input, float *output, const int *in_shape, | |||||
| } | } | ||||
| } | } | ||||
| void ArgMaxDim3OutIndex(const float *input, float *output, const int *in_shape, ArgMinMaxParameter *param) { | |||||
| void ArgMaxDim3OutIndex(const float *input, float *output, const int *in_shape, const ArgMinMaxParameter *param) { | |||||
| int in_shape1 = in_shape[1]; | int in_shape1 = in_shape[1]; | ||||
| int in_shape2 = in_shape[2]; | int in_shape2 = in_shape[2]; | ||||
| int in_shape3 = in_shape[3]; | int in_shape3 = in_shape[3]; | ||||
| @@ -339,7 +339,7 @@ void ArgMaxDim3OutIndex(const float *input, float *output, const int *in_shape, | |||||
| } | } | ||||
| } | } | ||||
| void ArgMinDim3OutValue(const float *input, float *output, const int *in_shape, ArgMinMaxParameter *param) { | |||||
| void ArgMinDim3OutValue(const float *input, float *output, const int *in_shape, const ArgMinMaxParameter *param) { | |||||
| int in_shape1 = in_shape[1]; | int in_shape1 = in_shape[1]; | ||||
| int in_shape2 = in_shape[2]; | int in_shape2 = in_shape[2]; | ||||
| int in_shape3 = in_shape[3]; | int in_shape3 = in_shape[3]; | ||||
| @@ -367,7 +367,7 @@ void ArgMinDim3OutValue(const float *input, float *output, const int *in_shape, | |||||
| } | } | ||||
| } | } | ||||
| void ArgMinDim3OutIndex(const float *input, float *output, const int *in_shape, ArgMinMaxParameter *param) { | |||||
| void ArgMinDim3OutIndex(const float *input, float *output, const int *in_shape, const ArgMinMaxParameter *param) { | |||||
| int in_shape1 = in_shape[1]; | int in_shape1 = in_shape[1]; | ||||
| int in_shape2 = in_shape[2]; | int in_shape2 = in_shape[2]; | ||||
| int in_shape3 = in_shape[3]; | int in_shape3 = in_shape[3]; | ||||
| @@ -395,7 +395,7 @@ void ArgMinDim3OutIndex(const float *input, float *output, const int *in_shape, | |||||
| } | } | ||||
| } | } | ||||
| void ArgMaxDim0(const float *input, float *output, const int *in_shape, ArgMinMaxParameter *param) { | |||||
| void ArgMaxDim0(const float *input, float *output, const int *in_shape, const ArgMinMaxParameter *param) { | |||||
| if (param->out_value_) { | if (param->out_value_) { | ||||
| ArgMaxDim0OutValue(input, output, in_shape, param); | ArgMaxDim0OutValue(input, output, in_shape, param); | ||||
| } else { | } else { | ||||
| @@ -403,7 +403,7 @@ void ArgMaxDim0(const float *input, float *output, const int *in_shape, ArgMinMa | |||||
| } | } | ||||
| } | } | ||||
| void ArgMinDim0(const float *input, float *output, const int *in_shape, ArgMinMaxParameter *param) { | |||||
| void ArgMinDim0(const float *input, float *output, const int *in_shape, const ArgMinMaxParameter *param) { | |||||
| if (param->out_value_) { | if (param->out_value_) { | ||||
| ArgMinDim0OutValue(input, output, in_shape, param); | ArgMinDim0OutValue(input, output, in_shape, param); | ||||
| } else { | } else { | ||||
| @@ -411,7 +411,7 @@ void ArgMinDim0(const float *input, float *output, const int *in_shape, ArgMinMa | |||||
| } | } | ||||
| } | } | ||||
| void ArgMaxDim1(const float *input, float *output, const int *in_shape, ArgMinMaxParameter *param) { | |||||
| void ArgMaxDim1(const float *input, float *output, const int *in_shape, const ArgMinMaxParameter *param) { | |||||
| if (param->out_value_) { | if (param->out_value_) { | ||||
| ArgMaxDim1OutValue(input, output, in_shape, param); | ArgMaxDim1OutValue(input, output, in_shape, param); | ||||
| } else { | } else { | ||||
| @@ -419,7 +419,7 @@ void ArgMaxDim1(const float *input, float *output, const int *in_shape, ArgMinMa | |||||
| } | } | ||||
| } | } | ||||
| void ArgMinDim1(const float *input, float *output, const int *in_shape, ArgMinMaxParameter *param) { | |||||
| void ArgMinDim1(const float *input, float *output, const int *in_shape, const ArgMinMaxParameter *param) { | |||||
| if (param->out_value_) { | if (param->out_value_) { | ||||
| ArgMinDim1OutValue(input, output, in_shape, param); | ArgMinDim1OutValue(input, output, in_shape, param); | ||||
| } else { | } else { | ||||
| @@ -427,7 +427,7 @@ void ArgMinDim1(const float *input, float *output, const int *in_shape, ArgMinMa | |||||
| } | } | ||||
| } | } | ||||
| void ArgMaxDim2(const float *input, float *output, const int *in_shape, ArgMinMaxParameter *param) { | |||||
| void ArgMaxDim2(const float *input, float *output, const int *in_shape, const ArgMinMaxParameter *param) { | |||||
| if (param->out_value_) { | if (param->out_value_) { | ||||
| ArgMaxDim2OutValue(input, output, in_shape, param); | ArgMaxDim2OutValue(input, output, in_shape, param); | ||||
| } else { | } else { | ||||
| @@ -435,7 +435,7 @@ void ArgMaxDim2(const float *input, float *output, const int *in_shape, ArgMinMa | |||||
| } | } | ||||
| } | } | ||||
| void ArgMinDim2(const float *input, float *output, const int *in_shape, ArgMinMaxParameter *param) { | |||||
| void ArgMinDim2(const float *input, float *output, const int *in_shape, const ArgMinMaxParameter *param) { | |||||
| if (param->out_value_) { | if (param->out_value_) { | ||||
| ArgMinDim2OutValue(input, output, in_shape, param); | ArgMinDim2OutValue(input, output, in_shape, param); | ||||
| } else { | } else { | ||||
| @@ -443,7 +443,7 @@ void ArgMinDim2(const float *input, float *output, const int *in_shape, ArgMinMa | |||||
| } | } | ||||
| } | } | ||||
| void ArgMaxDim3(const float *input, float *output, const int *in_shape, ArgMinMaxParameter *param) { | |||||
| void ArgMaxDim3(const float *input, float *output, const int *in_shape, const ArgMinMaxParameter *param) { | |||||
| if (param->out_value_) { | if (param->out_value_) { | ||||
| ArgMaxDim3OutValue(input, output, in_shape, param); | ArgMaxDim3OutValue(input, output, in_shape, param); | ||||
| } else { | } else { | ||||
| @@ -451,7 +451,7 @@ void ArgMaxDim3(const float *input, float *output, const int *in_shape, ArgMinMa | |||||
| } | } | ||||
| } | } | ||||
| void ArgMinDim3(const float *input, float *output, const int *in_shape, ArgMinMaxParameter *param) { | |||||
| void ArgMinDim3(const float *input, float *output, const int *in_shape, const ArgMinMaxParameter *param) { | |||||
| if (param->out_value_) { | if (param->out_value_) { | ||||
| ArgMinDim3OutValue(input, output, in_shape, param); | ArgMinDim3OutValue(input, output, in_shape, param); | ||||
| } else { | } else { | ||||
| @@ -459,7 +459,7 @@ void ArgMinDim3(const float *input, float *output, const int *in_shape, ArgMinMa | |||||
| } | } | ||||
| } | } | ||||
| void ArgMax(const float *input, float *output, ArgMinMaxParameter *param, int pre_axis_count, int axis_count, | |||||
| void ArgMax(const float *input, float *output, const ArgMinMaxParameter *param, int pre_axis_count, int axis_count, | |||||
| int after_axis_count) { | int after_axis_count) { | ||||
| bool out_value = param->out_value_; | bool out_value = param->out_value_; | ||||
| for (int i = 0; i < pre_axis_count; ++i) { | for (int i = 0; i < pre_axis_count; ++i) { | ||||
| @@ -480,7 +480,7 @@ void ArgMax(const float *input, float *output, ArgMinMaxParameter *param, int pr | |||||
| } | } | ||||
| } | } | ||||
| void ArgMin(const float *input, float *output, ArgMinMaxParameter *param, int pre_axis_count, int axis_count, | |||||
| void ArgMin(const float *input, float *output, const ArgMinMaxParameter *param, int pre_axis_count, int axis_count, | |||||
| int after_axis_count) { | int after_axis_count) { | ||||
| bool out_value = param->out_value_; | bool out_value = param->out_value_; | ||||
| for (int i = 0; i < pre_axis_count; ++i) { | for (int i = 0; i < pre_axis_count; ++i) { | ||||
| @@ -21,18 +21,18 @@ | |||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| extern "C" { | extern "C" { | ||||
| #endif | #endif | ||||
| void ArgMax(const float *input, float *output, ArgMinMaxParameter *param, int pre_axis_count, int axis_count, | |||||
| void ArgMax(const float *input, float *output, const ArgMinMaxParameter *param, int pre_axis_count, int axis_count, | |||||
| int after_axis_count); | int after_axis_count); | ||||
| void ArgMin(const float *input, float *output, ArgMinMaxParameter *param, int pre_axis_count, int axis_count, | |||||
| void ArgMin(const float *input, float *output, const ArgMinMaxParameter *param, int pre_axis_count, int axis_count, | |||||
| int after_axis_count); | int after_axis_count); | ||||
| void ArgMaxDim0(const float *input, float *output, const int *in_shape, ArgMinMaxParameter *param); | |||||
| void ArgMinDim0(const float *input, float *output, const int *in_shape, ArgMinMaxParameter *param); | |||||
| void ArgMaxDim1(const float *input, float *output, const int *in_shape, ArgMinMaxParameter *param); | |||||
| void ArgMinDim1(const float *input, float *output, const int *in_shape, ArgMinMaxParameter *param); | |||||
| void ArgMaxDim2(const float *input, float *output, const int *in_shape, ArgMinMaxParameter *param); | |||||
| void ArgMinDim2(const float *input, float *output, const int *in_shape, ArgMinMaxParameter *param); | |||||
| void ArgMaxDim3(const float *input, float *output, const int *in_shape, ArgMinMaxParameter *param); | |||||
| void ArgMinDim3(const float *input, float *output, const int *in_shape, ArgMinMaxParameter *param); | |||||
| void ArgMaxDim0(const float *input, float *output, const int *in_shape, const ArgMinMaxParameter *param); | |||||
| void ArgMinDim0(const float *input, float *output, const int *in_shape, const ArgMinMaxParameter *param); | |||||
| void ArgMaxDim1(const float *input, float *output, const int *in_shape, const ArgMinMaxParameter *param); | |||||
| void ArgMinDim1(const float *input, float *output, const int *in_shape, const ArgMinMaxParameter *param); | |||||
| void ArgMaxDim2(const float *input, float *output, const int *in_shape, const ArgMinMaxParameter *param); | |||||
| void ArgMinDim2(const float *input, float *output, const int *in_shape, const ArgMinMaxParameter *param); | |||||
| void ArgMaxDim3(const float *input, float *output, const int *in_shape, const ArgMinMaxParameter *param); | |||||
| void ArgMinDim3(const float *input, float *output, const int *in_shape, const ArgMinMaxParameter *param); | |||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| } | } | ||||
| #endif | #endif | ||||
| @@ -615,8 +615,8 @@ int ElementMulRelu6Int(const int *input0, const int *input1, int *output, const | |||||
| return NNACL_OK; | return NNACL_OK; | ||||
| } | } | ||||
| int BroadcastMul(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output, int element_size, | |||||
| ArithmeticParameter *param) { | |||||
| int BroadcastMul(const float *input0, const float *input1, float *tile_input0, float *tile_input1, float *output, | |||||
| int element_size, ArithmeticParameter *param) { | |||||
| TileDimensions(input0, input1, tile_input0, tile_input1, param); | TileDimensions(input0, input1, tile_input0, tile_input1, param); | ||||
| return ElementMul(tile_input0, tile_input1, output, element_size); | return ElementMul(tile_input0, tile_input1, output, element_size); | ||||
| } | } | ||||
| @@ -690,21 +690,21 @@ int ElementAddInt(const int *input0, const int *input1, int *output, const int e | |||||
| return NNACL_OK; | return NNACL_OK; | ||||
| } | } | ||||
| int ElementAddInt8(int8_t *input0, int8_t *input1, int8_t *output, int element_size) { | |||||
| int ElementAddInt8(const int8_t *input0, const int8_t *input1, int8_t *output, int element_size) { | |||||
| for (int i = 0; i < element_size; i++) { | for (int i = 0; i < element_size; i++) { | ||||
| output[i] = input0[i] + input1[i]; | output[i] = input0[i] + input1[i]; | ||||
| } | } | ||||
| return NNACL_OK; | return NNACL_OK; | ||||
| } | } | ||||
| int BroadcastAdd(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output, int element_size, | |||||
| ArithmeticParameter *param) { | |||||
| int BroadcastAdd(const float *input0, const float *input1, float *tile_input0, float *tile_input1, float *output, | |||||
| int element_size, ArithmeticParameter *param) { | |||||
| TileDimensions(input0, input1, tile_input0, tile_input1, param); | TileDimensions(input0, input1, tile_input0, tile_input1, param); | ||||
| return ElementAdd(tile_input0, tile_input1, output, element_size); | return ElementAdd(tile_input0, tile_input1, output, element_size); | ||||
| } | } | ||||
| int BroadcastAddInt8(int8_t *input0, int8_t *input1, int8_t *tile_input0, int8_t *tile_input1, int8_t *output, | |||||
| int element_size, ArithmeticParameter *param) { | |||||
| int BroadcastAddInt8(const int8_t *input0, const int8_t *input1, int8_t *tile_input0, int8_t *tile_input1, | |||||
| int8_t *output, int element_size, ArithmeticParameter *param) { | |||||
| TileDimensionsInt8(input0, input1, tile_input0, tile_input1, param); | TileDimensionsInt8(input0, input1, tile_input0, tile_input1, param); | ||||
| return ElementAddInt8(tile_input0, tile_input1, output, element_size); | return ElementAddInt8(tile_input0, tile_input1, output, element_size); | ||||
| } | } | ||||
| @@ -763,8 +763,8 @@ int ElementSubRelu6(const float *input0, const float *input1, float *output, con | |||||
| return NNACL_OK; | return NNACL_OK; | ||||
| } | } | ||||
| int BroadcastSub(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output, int element_size, | |||||
| ArithmeticParameter *param) { | |||||
| int BroadcastSub(const float *input0, const float *input1, float *tile_input0, float *tile_input1, float *output, | |||||
| int element_size, ArithmeticParameter *param) { | |||||
| TileDimensions(input0, input1, tile_input0, tile_input1, param); | TileDimensions(input0, input1, tile_input0, tile_input1, param); | ||||
| return ElementSub(tile_input0, tile_input1, output, element_size); | return ElementSub(tile_input0, tile_input1, output, element_size); | ||||
| } | } | ||||
| @@ -791,8 +791,8 @@ int ElementDivRelu6(const float *input0, const float *input1, float *output, con | |||||
| return NNACL_OK; | return NNACL_OK; | ||||
| } | } | ||||
| int BroadcastDiv(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output, int element_size, | |||||
| ArithmeticParameter *param) { | |||||
| int BroadcastDiv(const float *input0, const float *input1, float *tile_input0, float *tile_input1, float *output, | |||||
| int element_size, ArithmeticParameter *param) { | |||||
| TileDimensions(input0, input1, tile_input0, tile_input1, param); | TileDimensions(input0, input1, tile_input0, tile_input1, param); | ||||
| return ElementDiv(tile_input0, tile_input1, output, element_size); | return ElementDiv(tile_input0, tile_input1, output, element_size); | ||||
| } | } | ||||
| @@ -811,7 +811,7 @@ int ElementFloorModInt(const int *input0, const int *input1, int *output, const | |||||
| return NNACL_OK; | return NNACL_OK; | ||||
| } | } | ||||
| int BroadcastFloorMod(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output, | |||||
| int BroadcastFloorMod(const float *input0, const float *input1, float *tile_input0, float *tile_input1, float *output, | |||||
| int element_size, ArithmeticParameter *param) { | int element_size, ArithmeticParameter *param) { | ||||
| TileDimensions(input0, input1, tile_input0, tile_input1, param); | TileDimensions(input0, input1, tile_input0, tile_input1, param); | ||||
| return ElementFloorMod(tile_input0, tile_input1, output, element_size); | return ElementFloorMod(tile_input0, tile_input1, output, element_size); | ||||
| @@ -831,7 +831,7 @@ int ElementFloorDivInt(const int *input0, const int *input1, int *output, const | |||||
| return NNACL_OK; | return NNACL_OK; | ||||
| } | } | ||||
| int BroadcastFloorDiv(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output, | |||||
| int BroadcastFloorDiv(const float *input0, const float *input1, float *tile_input0, float *tile_input1, float *output, | |||||
| int element_size, ArithmeticParameter *param) { | int element_size, ArithmeticParameter *param) { | ||||
| TileDimensions(input0, input1, tile_input0, tile_input1, param); | TileDimensions(input0, input1, tile_input0, tile_input1, param); | ||||
| return ElementFloorDiv(tile_input0, tile_input1, output, element_size); | return ElementFloorDiv(tile_input0, tile_input1, output, element_size); | ||||
| @@ -862,13 +862,13 @@ int ElementSquaredDifference(const float *input0, const float *input1, float *ou | |||||
| return ElementMul(output, output, output, element_size); | return ElementMul(output, output, output, element_size); | ||||
| } | } | ||||
| int BroadcastSquaredDifference(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output, | |||||
| int element_size, ArithmeticParameter *param) { | |||||
| int BroadcastSquaredDifference(const float *input0, const float *input1, float *tile_input0, float *tile_input1, | |||||
| float *output, int element_size, ArithmeticParameter *param) { | |||||
| BroadcastSub(input0, input1, tile_input0, tile_input1, output, element_size, param); | BroadcastSub(input0, input1, tile_input0, tile_input1, output, element_size, param); | ||||
| return ElementMul(output, output, output, element_size); | return ElementMul(output, output, output, element_size); | ||||
| } | } | ||||
| int BroadcastLogicalAnd(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output, | |||||
| int BroadcastLogicalAnd(const float *input0, const float *input1, float *tile_input0, float *tile_input1, float *output, | |||||
| int element_size, ArithmeticParameter *param) { | int element_size, ArithmeticParameter *param) { | ||||
| TileDimensions(input0, input1, tile_input0, tile_input1, param); | TileDimensions(input0, input1, tile_input0, tile_input1, param); | ||||
| return ElementLogicalAnd(tile_input0, tile_input1, output, element_size); | return ElementLogicalAnd(tile_input0, tile_input1, output, element_size); | ||||
| @@ -894,7 +894,7 @@ int ElementLogicalOr(const float *input0, const float *input1, float *output, co | |||||
| return NNACL_OK; | return NNACL_OK; | ||||
| } | } | ||||
| int BroadcastLogicalOr(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output, | |||||
| int BroadcastLogicalOr(const float *input0, const float *input1, float *tile_input0, float *tile_input1, float *output, | |||||
| int element_size, ArithmeticParameter *param) { | int element_size, ArithmeticParameter *param) { | ||||
| TileDimensions(input0, input1, tile_input0, tile_input1, param); | TileDimensions(input0, input1, tile_input0, tile_input1, param); | ||||
| return ElementLogicalOr(tile_input0, tile_input1, output, element_size); | return ElementLogicalOr(tile_input0, tile_input1, output, element_size); | ||||
| @@ -916,7 +916,7 @@ int ElementMaximum(const float *input0, const float *input1, float *output, cons | |||||
| return NNACL_OK; | return NNACL_OK; | ||||
| } | } | ||||
| int BroadcastMaximum(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output, | |||||
| int BroadcastMaximum(const float *input0, const float *input1, float *tile_input0, float *tile_input1, float *output, | |||||
| int element_size, ArithmeticParameter *param) { | int element_size, ArithmeticParameter *param) { | ||||
| TileDimensions(input0, input1, tile_input0, tile_input1, param); | TileDimensions(input0, input1, tile_input0, tile_input1, param); | ||||
| return ElementMaximum(tile_input0, tile_input1, output, element_size); | return ElementMaximum(tile_input0, tile_input1, output, element_size); | ||||
| @@ -938,7 +938,7 @@ int ElementMinimum(const float *input0, const float *input1, float *output, cons | |||||
| return NNACL_OK; | return NNACL_OK; | ||||
| } | } | ||||
| int BroadcastMinimum(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output, | |||||
| int BroadcastMinimum(const float *input0, const float *input1, float *tile_input0, float *tile_input1, float *output, | |||||
| int element_size, ArithmeticParameter *param) { | int element_size, ArithmeticParameter *param) { | ||||
| TileDimensions(input0, input1, tile_input0, tile_input1, param); | TileDimensions(input0, input1, tile_input0, tile_input1, param); | ||||
| return ElementMinimum(tile_input0, tile_input1, output, element_size); | return ElementMinimum(tile_input0, tile_input1, output, element_size); | ||||
| @@ -970,7 +970,7 @@ int ElementNotEqual(const float *input0, const float *input1, float *output, con | |||||
| return NNACL_OK; | return NNACL_OK; | ||||
| } | } | ||||
| int BroadcastNotEqual(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output, | |||||
| int BroadcastNotEqual(const float *input0, const float *input1, float *tile_input0, float *tile_input1, float *output, | |||||
| int element_size, ArithmeticParameter *param) { | int element_size, ArithmeticParameter *param) { | ||||
| TileDimensions(input0, input1, tile_input0, tile_input1, param); | TileDimensions(input0, input1, tile_input0, tile_input1, param); | ||||
| return ElementNotEqual(tile_input0, tile_input1, output, element_size); | return ElementNotEqual(tile_input0, tile_input1, output, element_size); | ||||
| @@ -1002,7 +1002,7 @@ int ElementEqual(const float *input0, const float *input1, float *output, const | |||||
| return NNACL_OK; | return NNACL_OK; | ||||
| } | } | ||||
| int BroadcastEqual(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output, | |||||
| int BroadcastEqual(const float *input0, const float *input1, float *tile_input0, float *tile_input1, float *output, | |||||
| int element_size, ArithmeticParameter *param) { | int element_size, ArithmeticParameter *param) { | ||||
| TileDimensions(input0, input1, tile_input0, tile_input1, param); | TileDimensions(input0, input1, tile_input0, tile_input1, param); | ||||
| return ElementEqual(tile_input0, tile_input1, output, element_size); | return ElementEqual(tile_input0, tile_input1, output, element_size); | ||||
| @@ -1026,8 +1026,8 @@ int ElementLess(const float *input0, const float *input1, float *output, const i | |||||
| return NNACL_OK; | return NNACL_OK; | ||||
| } | } | ||||
| int BroadcastLess(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output, int element_size, | |||||
| ArithmeticParameter *param) { | |||||
| int BroadcastLess(const float *input0, const float *input1, float *tile_input0, float *tile_input1, float *output, | |||||
| int element_size, ArithmeticParameter *param) { | |||||
| TileDimensions(input0, input1, tile_input0, tile_input1, param); | TileDimensions(input0, input1, tile_input0, tile_input1, param); | ||||
| return ElementLess(tile_input0, tile_input1, output, element_size); | return ElementLess(tile_input0, tile_input1, output, element_size); | ||||
| } | } | ||||
| @@ -1050,7 +1050,7 @@ int ElementLessEqual(const float *input0, const float *input1, float *output, co | |||||
| return NNACL_OK; | return NNACL_OK; | ||||
| } | } | ||||
| int BroadcastLessEqual(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output, | |||||
| int BroadcastLessEqual(const float *input0, const float *input1, float *tile_input0, float *tile_input1, float *output, | |||||
| int element_size, ArithmeticParameter *param) { | int element_size, ArithmeticParameter *param) { | ||||
| TileDimensions(input0, input1, tile_input0, tile_input1, param); | TileDimensions(input0, input1, tile_input0, tile_input1, param); | ||||
| return ElementLessEqual(tile_input0, tile_input1, output, element_size); | return ElementLessEqual(tile_input0, tile_input1, output, element_size); | ||||
| @@ -1074,7 +1074,7 @@ int ElementGreater(const float *input0, const float *input1, float *output, cons | |||||
| return NNACL_OK; | return NNACL_OK; | ||||
| } | } | ||||
| int BroadcastGreater(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output, | |||||
| int BroadcastGreater(const float *input0, const float *input1, float *tile_input0, float *tile_input1, float *output, | |||||
| int element_size, ArithmeticParameter *param) { | int element_size, ArithmeticParameter *param) { | ||||
| TileDimensions(input0, input1, tile_input0, tile_input1, param); | TileDimensions(input0, input1, tile_input0, tile_input1, param); | ||||
| return ElementGreater(tile_input0, tile_input1, output, element_size); | return ElementGreater(tile_input0, tile_input1, output, element_size); | ||||
| @@ -1098,8 +1098,8 @@ int ElementGreaterEqual(const float *input0, const float *input1, float *output, | |||||
| return NNACL_OK; | return NNACL_OK; | ||||
| } | } | ||||
| int BroadcastGreaterEqual(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output, | |||||
| int element_size, ArithmeticParameter *param) { | |||||
| int BroadcastGreaterEqual(const float *input0, const float *input1, float *tile_input0, float *tile_input1, | |||||
| float *output, int element_size, ArithmeticParameter *param) { | |||||
| TileDimensions(input0, input1, tile_input0, tile_input1, param); | TileDimensions(input0, input1, tile_input0, tile_input1, param); | ||||
| return ElementGreaterEqual(tile_input0, tile_input1, output, element_size); | return ElementGreaterEqual(tile_input0, tile_input1, output, element_size); | ||||
| } | } | ||||
| @@ -1111,7 +1111,7 @@ int ArithmeticInferShape(int **in_shape, size_t *dim_size, int *out_shape, int * | |||||
| int *in_datatype, int *out_datatype, OpParameter *param) { | int *in_datatype, int *out_datatype, OpParameter *param) { | ||||
| *out_format = in_format[0]; | *out_format = in_format[0]; | ||||
| *out_datatype = in_datatype[0]; | *out_datatype = in_datatype[0]; | ||||
| ArithmeticParameter *arithmetic_parameter = (ArithmeticParameter *)param; | |||||
| const ArithmeticParameter *arithmetic_parameter = (const ArithmeticParameter *)param; | |||||
| int ndim0 = dim_size[0]; | int ndim0 = dim_size[0]; | ||||
| int ndim1 = dim_size[1]; | int ndim1 = dim_size[1]; | ||||
| int *in_shape0 = in_shape[0]; | int *in_shape0 = in_shape[0]; | ||||
| @@ -64,85 +64,85 @@ int ElementMulRelu6(const float *input0, const float *input1, float *output, con | |||||
| int ElementMulInt(const int *input0, const int *input1, int *output, const int element_size); | int ElementMulInt(const int *input0, const int *input1, int *output, const int element_size); | ||||
| int ElementMulReluInt(const int *input0, const int *input1, int *output, const int element_size); | int ElementMulReluInt(const int *input0, const int *input1, int *output, const int element_size); | ||||
| int ElementMulRelu6Int(const int *input0, const int *input1, int *output, const int element_size); | int ElementMulRelu6Int(const int *input0, const int *input1, int *output, const int element_size); | ||||
| int BroadcastMul(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output, int element_size, | |||||
| ArithmeticParameter *param); | |||||
| int BroadcastMul(const float *input0, const float *input1, float *tile_input0, float *tile_input1, float *output, | |||||
| int element_size, ArithmeticParameter *param); | |||||
| int ElementAdd(const float *input0, const float *input1, float *output, const int element_size); | int ElementAdd(const float *input0, const float *input1, float *output, const int element_size); | ||||
| int ElementAddRelu(const float *input0, const float *input1, float *output, const int element_size); | int ElementAddRelu(const float *input0, const float *input1, float *output, const int element_size); | ||||
| int ElementAddRelu6(const float *input0, const float *input1, float *output, const int element_size); | int ElementAddRelu6(const float *input0, const float *input1, float *output, const int element_size); | ||||
| int ElementAddInt(const int *input0, const int *input1, int *output, const int element_size); | int ElementAddInt(const int *input0, const int *input1, int *output, const int element_size); | ||||
| int BroadcastAdd(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output, int element_size, | |||||
| ArithmeticParameter *param); | |||||
| int BroadcastAddInt8(int8_t *input0, int8_t *input1, int8_t *tile_input0, int8_t *tile_input1, int8_t *output, | |||||
| int element_size, ArithmeticParameter *param); | |||||
| int BroadcastAdd(const float *input0, const float *input1, float *tile_input0, float *tile_input1, float *output, | |||||
| int element_size, ArithmeticParameter *param); | |||||
| int BroadcastAddInt8(const int8_t *input0, const int8_t *input1, int8_t *tile_input0, int8_t *tile_input1, | |||||
| int8_t *output, int element_size, ArithmeticParameter *param); | |||||
| int ElementSub(const float *input0, const float *input1, float *output, const int element_size); | int ElementSub(const float *input0, const float *input1, float *output, const int element_size); | ||||
| int ElementSubRelu(const float *input0, const float *input1, float *output, const int element_size); | int ElementSubRelu(const float *input0, const float *input1, float *output, const int element_size); | ||||
| int ElementSubRelu6(const float *input0, const float *input1, float *output, const int element_size); | int ElementSubRelu6(const float *input0, const float *input1, float *output, const int element_size); | ||||
| int BroadcastSub(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output, int element_size, | |||||
| ArithmeticParameter *param); | |||||
| int BroadcastSub(const float *input0, const float *input1, float *tile_input0, float *tile_input1, float *output, | |||||
| int element_size, ArithmeticParameter *param); | |||||
| int ElementDiv(const float *input0, const float *input1, float *output, const int element_size); | int ElementDiv(const float *input0, const float *input1, float *output, const int element_size); | ||||
| int ElementDivRelu(const float *input0, const float *input1, float *output, const int element_size); | int ElementDivRelu(const float *input0, const float *input1, float *output, const int element_size); | ||||
| int ElementDivRelu6(const float *input0, const float *input1, float *output, const int element_size); | int ElementDivRelu6(const float *input0, const float *input1, float *output, const int element_size); | ||||
| int BroadcastDiv(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output, int element_size, | |||||
| ArithmeticParameter *param); | |||||
| int BroadcastDiv(const float *input0, const float *input1, float *tile_input0, float *tile_input1, float *output, | |||||
| int element_size, ArithmeticParameter *param); | |||||
| int ElementLogicalAnd(const float *input0, const float *input1, float *output, const int element_size); | int ElementLogicalAnd(const float *input0, const float *input1, float *output, const int element_size); | ||||
| int BroadcastLogicalAnd(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output, | |||||
| int BroadcastLogicalAnd(const float *input0, const float *input1, float *tile_input0, float *tile_input1, float *output, | |||||
| int element_size, ArithmeticParameter *param); | int element_size, ArithmeticParameter *param); | ||||
| int ElementLogicalOr(const float *input0, const float *input1, float *output, const int element_size); | int ElementLogicalOr(const float *input0, const float *input1, float *output, const int element_size); | ||||
| int BroadcastLogicalOr(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output, | |||||
| int BroadcastLogicalOr(const float *input0, const float *input1, float *tile_input0, float *tile_input1, float *output, | |||||
| int element_size, ArithmeticParameter *param); | int element_size, ArithmeticParameter *param); | ||||
| int ElementMaximum(const float *input0, const float *input1, float *output, const int element_size); | int ElementMaximum(const float *input0, const float *input1, float *output, const int element_size); | ||||
| int BroadcastMaximum(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output, | |||||
| int BroadcastMaximum(const float *input0, const float *input1, float *tile_input0, float *tile_input1, float *output, | |||||
| int element_size, ArithmeticParameter *param); | int element_size, ArithmeticParameter *param); | ||||
| int ElementMinimum(const float *input0, const float *input1, float *output, const int element_size); | int ElementMinimum(const float *input0, const float *input1, float *output, const int element_size); | ||||
| int BroadcastMinimum(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output, | |||||
| int BroadcastMinimum(const float *input0, const float *input1, float *tile_input0, float *tile_input1, float *output, | |||||
| int element_size, ArithmeticParameter *param); | int element_size, ArithmeticParameter *param); | ||||
| int ElementFloorDiv(const float *input0, const float *input1, float *output, const int element_size); | int ElementFloorDiv(const float *input0, const float *input1, float *output, const int element_size); | ||||
| int ElementFloorDivInt(const int *input0, const int *input1, int *output, const int element_size); | int ElementFloorDivInt(const int *input0, const int *input1, int *output, const int element_size); | ||||
| int BroadcastFloorDiv(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output, | |||||
| int BroadcastFloorDiv(const float *input0, const float *input1, float *tile_input0, float *tile_input1, float *output, | |||||
| int element_size, ArithmeticParameter *param); | int element_size, ArithmeticParameter *param); | ||||
| int ElementFloorMod(const float *input0, const float *input1, float *output, const int element_size); | int ElementFloorMod(const float *input0, const float *input1, float *output, const int element_size); | ||||
| int ElementFloorModInt(const int *input0, const int *input1, int *output, const int element_size); | int ElementFloorModInt(const int *input0, const int *input1, int *output, const int element_size); | ||||
| int BroadcastFloorMod(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output, | |||||
| int BroadcastFloorMod(const float *input0, const float *input1, float *tile_input0, float *tile_input1, float *output, | |||||
| int element_size, ArithmeticParameter *param); | int element_size, ArithmeticParameter *param); | ||||
| int ElementSquaredDifference(const float *input0, const float *input1, float *output, const int element_size); | int ElementSquaredDifference(const float *input0, const float *input1, float *output, const int element_size); | ||||
| int BroadcastSquaredDifference(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output, | |||||
| int element_size, ArithmeticParameter *param); | |||||
| int BroadcastSquaredDifference(const float *input0, const float *input1, float *tile_input0, float *tile_input1, | |||||
| float *output, int element_size, ArithmeticParameter *param); | |||||
| int ElementNotEqual(const float *input0, const float *input1, float *output, const int element_size); | int ElementNotEqual(const float *input0, const float *input1, float *output, const int element_size); | ||||
| int BroadcastNotEqual(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output, | |||||
| int BroadcastNotEqual(const float *input0, const float *input1, float *tile_input0, float *tile_input1, float *output, | |||||
| int element_size, ArithmeticParameter *param); | int element_size, ArithmeticParameter *param); | ||||
| int ElementEqual(const float *input0, const float *input1, float *output, const int element_size); | int ElementEqual(const float *input0, const float *input1, float *output, const int element_size); | ||||
| int BroadcastEqual(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output, | |||||
| int BroadcastEqual(const float *input0, const float *input1, float *tile_input0, float *tile_input1, float *output, | |||||
| int element_size, ArithmeticParameter *param); | int element_size, ArithmeticParameter *param); | ||||
| int ElementLess(const float *input0, const float *input1, float *output, const int element_size); | int ElementLess(const float *input0, const float *input1, float *output, const int element_size); | ||||
| int BroadcastLess(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output, int element_size, | |||||
| ArithmeticParameter *param); | |||||
| int BroadcastLess(const float *input0, const float *input1, float *tile_input0, float *tile_input1, float *output, | |||||
| int element_size, ArithmeticParameter *param); | |||||
| int ElementLessEqual(const float *input0, const float *input1, float *output, const int element_size); | int ElementLessEqual(const float *input0, const float *input1, float *output, const int element_size); | ||||
| int BroadcastLessEqual(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output, | |||||
| int BroadcastLessEqual(const float *input0, const float *input1, float *tile_input0, float *tile_input1, float *output, | |||||
| int element_size, ArithmeticParameter *param); | int element_size, ArithmeticParameter *param); | ||||
| int ElementGreater(const float *input0, const float *input1, float *output, const int element_size); | int ElementGreater(const float *input0, const float *input1, float *output, const int element_size); | ||||
| int BroadcastGreater(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output, | |||||
| int BroadcastGreater(const float *input0, const float *input1, float *tile_input0, float *tile_input1, float *output, | |||||
| int element_size, ArithmeticParameter *param); | int element_size, ArithmeticParameter *param); | ||||
| int ElementGreaterEqual(const float *input0, const float *input1, float *output, const int element_size); | int ElementGreaterEqual(const float *input0, const float *input1, float *output, const int element_size); | ||||
| int BroadcastGreaterEqual(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output, | |||||
| int element_size, ArithmeticParameter *param); | |||||
| int BroadcastGreaterEqual(const float *input0, const float *input1, float *tile_input0, float *tile_input1, | |||||
| float *output, int element_size, ArithmeticParameter *param); | |||||
| #ifdef ENABLE_NNACL_INFER_SHAPE | #ifdef ENABLE_NNACL_INFER_SHAPE | ||||
| int ArithmeticInferShape(int **in_shape, size_t *dim_size, int *out_shape, int *in_format, int *out_format, | int ArithmeticInferShape(int **in_shape, size_t *dim_size, int *out_shape, int *in_format, int *out_format, | ||||
| @@ -19,8 +19,8 @@ | |||||
| #include "nnacl/batchnorm_parameter.h" | #include "nnacl/batchnorm_parameter.h" | ||||
| #include "nnacl/op_base.h" | #include "nnacl/op_base.h" | ||||
| void BatchNormFp32(const void *input, const void *mean, const void *variance, BatchNormParameter *param, int task_id, | |||||
| void *output) { | |||||
| void BatchNormFp32(const void *input, const void *mean, const void *variance, const BatchNormParameter *param, | |||||
| int task_id, void *output) { | |||||
| int units_per_thread = UP_DIV(param->unit_, param->op_parameter_.thread_num_); | int units_per_thread = UP_DIV(param->unit_, param->op_parameter_.thread_num_); | ||||
| int completed_units = task_id * units_per_thread; | int completed_units = task_id * units_per_thread; | ||||
| int cur_unit = MSMIN(units_per_thread, param->unit_ - completed_units); | int cur_unit = MSMIN(units_per_thread, param->unit_ - completed_units); | ||||
| @@ -37,7 +37,7 @@ void BatchNormFp32(const void *input, const void *mean, const void *variance, Ba | |||||
| } | } | ||||
| void FusedBatchNormFp32(const void *input, const void *scale, const void *offset, const void *mean, | void FusedBatchNormFp32(const void *input, const void *scale, const void *offset, const void *mean, | ||||
| const void *variance, BatchNormParameter *param, int task_id, void *output) { | |||||
| const void *variance, const BatchNormParameter *param, int task_id, void *output) { | |||||
| int units_per_thread = UP_DIV(param->unit_, param->op_parameter_.thread_num_); | int units_per_thread = UP_DIV(param->unit_, param->op_parameter_.thread_num_); | ||||
| int completed_units = task_id * units_per_thread; | int completed_units = task_id * units_per_thread; | ||||
| int cur_unit = MSMIN(units_per_thread, param->unit_ - completed_units); | int cur_unit = MSMIN(units_per_thread, param->unit_ - completed_units); | ||||
| @@ -53,7 +53,7 @@ void FusedBatchNormFp32(const void *input, const void *scale, const void *offset | |||||
| } | } | ||||
| } | } | ||||
| void FusedBatchNormFp32MeanVar(const float *input, float *run_mean, float *run_var, BatchNormParameter *param, | |||||
| void FusedBatchNormFp32MeanVar(const float *input, float *run_mean, float *run_var, const BatchNormParameter *param, | |||||
| float *save_mean, float *save_var) { | float *save_mean, float *save_var) { | ||||
| const float N = (float)param->unit_; | const float N = (float)param->unit_; | ||||
| const float VN = N; | const float VN = N; | ||||
| @@ -23,12 +23,12 @@ | |||||
| extern "C" { | extern "C" { | ||||
| #endif | #endif | ||||
| void BatchNormFp32(const void *input, const void *mean, const void *variance, BatchNormParameter *param, int task_id, | |||||
| void *output); | |||||
| void BatchNormFp32(const void *input, const void *mean, const void *variance, const BatchNormParameter *param, | |||||
| int task_id, void *output); | |||||
| void FusedBatchNormFp32(const void *input, const void *scale, const void *offset, const void *mean, | void FusedBatchNormFp32(const void *input, const void *scale, const void *offset, const void *mean, | ||||
| const void *variance, BatchNormParameter *param, int task_id, void *output); | |||||
| const void *variance, const BatchNormParameter *param, int task_id, void *output); | |||||
| void FusedBatchNormFp32MeanVar(const float *input, float *run_mean, float *run_var, BatchNormParameter *param, | |||||
| void FusedBatchNormFp32MeanVar(const float *input, float *run_mean, float *run_var, const BatchNormParameter *param, | |||||
| float *save_mean, float *save_var); | float *save_mean, float *save_var); | ||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| } | } | ||||
| @@ -17,8 +17,8 @@ | |||||
| #include "nnacl/fp32/concat_fp32.h" | #include "nnacl/fp32/concat_fp32.h" | ||||
| #include <string.h> | #include <string.h> | ||||
| void Concat(void **input, int input_num, int axis, int **inputs_output_shape, size_t shape_size, void *output, | |||||
| int task_id, int thread_num) { | |||||
| void Concat(const void **input, int input_num, int axis, const int **inputs_output_shape, size_t shape_size, | |||||
| void *output, int task_id, int thread_num) { | |||||
| int before_axis_size = 1; | int before_axis_size = 1; | ||||
| for (int i = 0; i < axis; ++i) { | for (int i = 0; i < axis; ++i) { | ||||
| before_axis_size *= inputs_output_shape[0][i]; | before_axis_size *= inputs_output_shape[0][i]; | ||||
| @@ -32,12 +32,12 @@ void Concat(void **input, int input_num, int axis, int **inputs_output_shape, si | |||||
| uint8_t *dst_base = (output); | uint8_t *dst_base = (output); | ||||
| size_t output_stride = after_axis_size * inputs_output_shape[input_num][axis]; | size_t output_stride = after_axis_size * inputs_output_shape[input_num][axis]; | ||||
| for (int i = 0; i < input_num; ++i) { | for (int i = 0; i < input_num; ++i) { | ||||
| uint8_t *src_base = (input[i]); | |||||
| const uint8_t *src_base = (input[i]); | |||||
| size_t input_stride = after_axis_size * inputs_output_shape[i][axis]; | size_t input_stride = after_axis_size * inputs_output_shape[i][axis]; | ||||
| int offset = UP_DIV(input_stride, thread_num); | int offset = UP_DIV(input_stride, thread_num); | ||||
| int count = MSMIN(offset, input_stride - offset * task_id); | int count = MSMIN(offset, input_stride - offset * task_id); | ||||
| for (int j = 0; j < before_axis_size; j++) { | for (int j = 0; j < before_axis_size; j++) { | ||||
| uint8_t *src = src_base + j * input_stride + task_id * offset; | |||||
| const uint8_t *src = src_base + j * input_stride + task_id * offset; | |||||
| uint8_t *dst = dst_base + j * output_stride + axis_offset * after_axis_size + task_id * offset; | uint8_t *dst = dst_base + j * output_stride + axis_offset * after_axis_size + task_id * offset; | ||||
| memcpy(dst, src, count); | memcpy(dst, src, count); | ||||
| } | } | ||||
| @@ -22,8 +22,8 @@ | |||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| extern "C" { | extern "C" { | ||||
| #endif | #endif | ||||
| void Concat(void **input, int input_num, int axis, int **inputs_output_shape, size_t shape_size, void *output, | |||||
| int task_id, int thread_num); | |||||
| void Concat(const void **input, int input_num, int axis, const int **inputs_output_shape, size_t shape_size, | |||||
| void *output, int task_id, int thread_num); | |||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| } | } | ||||
| #endif | #endif | ||||
| @@ -16,7 +16,7 @@ | |||||
| #include "nnacl/fp32/constant_of_shape_fp32.h" | #include "nnacl/fp32/constant_of_shape_fp32.h" | ||||
| int ConstantOfShape(float *output, int tid, ConstantOfShapeParameter *param) { | |||||
| int ConstantOfShape(float *output, int tid, const ConstantOfShapeParameter *param) { | |||||
| int size = param->unit_; | int size = param->unit_; | ||||
| float data = param->value_; | float data = param->value_; | ||||
| int ind_st = MSMIN(tid * size, param->element_sz_); | int ind_st = MSMIN(tid * size, param->element_sz_); | ||||
| @@ -27,7 +27,7 @@ int ConstantOfShape(float *output, int tid, ConstantOfShapeParameter *param) { | |||||
| return NNACL_OK; | return NNACL_OK; | ||||
| } | } | ||||
| int ConstantOfShapeInt(int32_t *output, int tid, ConstantOfShapeParameter *param) { | |||||
| int ConstantOfShapeInt(int32_t *output, int tid, const ConstantOfShapeParameter *param) { | |||||
| int size = param->unit_; | int size = param->unit_; | ||||
| float data = param->value_; | float data = param->value_; | ||||
| int ind_st = MSMIN(tid * size, param->element_sz_); | int ind_st = MSMIN(tid * size, param->element_sz_); | ||||
| @@ -33,8 +33,8 @@ typedef struct ConstantOfShapeParameter { | |||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| extern "C" { | extern "C" { | ||||
| #endif | #endif | ||||
| int ConstantOfShape(float *output, int tid, ConstantOfShapeParameter *param); | |||||
| int ConstantOfShapeInt(int32_t *output, int tid, ConstantOfShapeParameter *param); | |||||
| int ConstantOfShape(float *output, int tid, const ConstantOfShapeParameter *param); | |||||
| int ConstantOfShapeInt(int32_t *output, int tid, const ConstantOfShapeParameter *param); | |||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| } | } | ||||
| #endif | #endif | ||||
| @@ -22,7 +22,7 @@ | |||||
| // fp32 conv common | // fp32 conv common | ||||
| void ConvFp32(const float *input_data, float *packed_input, const float *packed_weight, const float *bias_data, | void ConvFp32(const float *input_data, float *packed_input, const float *packed_weight, const float *bias_data, | ||||
| float *col_major_input, float *output_data, int task_id, ConvParameter *conv_param) { | |||||
| float *col_major_input, float *output_data, int task_id, const ConvParameter *conv_param) { | |||||
| int out_channel = conv_param->output_channel_; | int out_channel = conv_param->output_channel_; | ||||
| int deep = conv_param->kernel_h_ * conv_param->kernel_w_ * conv_param->input_channel_; | int deep = conv_param->kernel_h_ * conv_param->kernel_w_ * conv_param->input_channel_; | ||||
| int output_count = conv_param->output_h_ * conv_param->output_w_; | int output_count = conv_param->output_h_ * conv_param->output_w_; | ||||
| @@ -61,8 +61,8 @@ void ConvFp32(const float *input_data, float *packed_input, const float *packed_ | |||||
| // fp32 conv winograd | // fp32 conv winograd | ||||
| void ConvWinogardFp32(const float *input_data, const float *trans_weight, const float *bias_data, float *output_data, | void ConvWinogardFp32(const float *input_data, const float *trans_weight, const float *bias_data, float *output_data, | ||||
| TmpBufferAddress *buffer_list, int task_id, ConvParameter *conv_param, InputTransFunc in_func, | |||||
| OutputTransFunc out_func) { | |||||
| TmpBufferAddress *buffer_list, int task_id, const ConvParameter *conv_param, | |||||
| InputTransFunc in_func, OutputTransFunc out_func) { | |||||
| int in_channel = conv_param->input_channel_; | int in_channel = conv_param->input_channel_; | ||||
| int out_w_block = UP_DIV(conv_param->output_w_, conv_param->output_unit_); | int out_w_block = UP_DIV(conv_param->output_w_, conv_param->output_unit_); | ||||
| int out_h_block = UP_DIV(conv_param->output_h_, conv_param->output_unit_); | int out_h_block = UP_DIV(conv_param->output_h_, conv_param->output_unit_); | ||||
| @@ -35,12 +35,12 @@ extern "C" { | |||||
| // fp32 convolution common (im2col+gemm) | // fp32 convolution common (im2col+gemm) | ||||
| void ConvFp32(const float *input_data, float *packed_input, const float *packed_weight, const float *bias_data, | void ConvFp32(const float *input_data, float *packed_input, const float *packed_weight, const float *bias_data, | ||||
| float *col_major_input, float *output_data, int task_id, ConvParameter *conv_param); | |||||
| float *col_major_input, float *output_data, int task_id, const ConvParameter *conv_param); | |||||
| // fp32 convolution winograd | // fp32 convolution winograd | ||||
| void ConvWinogardFp32(const float *input_data, const float *trans_weight, const float *bias_data, float *output_data, | void ConvWinogardFp32(const float *input_data, const float *trans_weight, const float *bias_data, float *output_data, | ||||
| TmpBufferAddress *buffer_list, int task_id, ConvParameter *conv_param, InputTransFunc in_func, | |||||
| OutputTransFunc out_func); | |||||
| TmpBufferAddress *buffer_list, int task_id, const ConvParameter *conv_param, | |||||
| InputTransFunc in_func, OutputTransFunc out_func); | |||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| } | } | ||||
| #endif | #endif | ||||
| @@ -18,7 +18,7 @@ | |||||
| #include "nnacl/op_base.h" | #include "nnacl/op_base.h" | ||||
| #include "nnacl/crop_parameter.h" | #include "nnacl/crop_parameter.h" | ||||
| void Pad4DOffset(CropParameter *crop_param, int64_t *offset) { | |||||
| void Pad4DOffset(const CropParameter *crop_param, int64_t *offset) { | |||||
| int axis = crop_param->axis_; | int axis = crop_param->axis_; | ||||
| for (int i = DIMENSION_4D - 1; i >= 0; --i) { | for (int i = DIMENSION_4D - 1; i >= 0; --i) { | ||||
| int offset_index = i - axis; | int offset_index = i - axis; | ||||
| @@ -30,8 +30,8 @@ void Pad4DOffset(CropParameter *crop_param, int64_t *offset) { | |||||
| } | } | ||||
| } | } | ||||
| void Crop4D(const float *input, float *output, const int *in_shape, const int *out_shape, CropParameter *crop_param, | |||||
| int thread_id) { | |||||
| void Crop4D(const float *input, float *output, const int *in_shape, const int *out_shape, | |||||
| const CropParameter *crop_param, int thread_id) { | |||||
| int64_t offset_pad[DIMENSION_4D]; | int64_t offset_pad[DIMENSION_4D]; | ||||
| Pad4DOffset(crop_param, offset_pad); | Pad4DOffset(crop_param, offset_pad); | ||||
| int out_shape1 = out_shape[1]; | int out_shape1 = out_shape[1]; | ||||
| @@ -66,7 +66,7 @@ void Crop4D(const float *input, float *output, const int *in_shape, const int *o | |||||
| } | } | ||||
| void Crop4DNoParallel(const float *input, float *output, const int *in_shape, const int *out_shape, | void Crop4DNoParallel(const float *input, float *output, const int *in_shape, const int *out_shape, | ||||
| CropParameter *crop_param) { | |||||
| const CropParameter *crop_param) { | |||||
| int64_t offset_pad[DIMENSION_4D]; | int64_t offset_pad[DIMENSION_4D]; | ||||
| Pad4DOffset(crop_param, offset_pad); | Pad4DOffset(crop_param, offset_pad); | ||||
| size_t in_dim2_stride = in_shape[3]; | size_t in_dim2_stride = in_shape[3]; | ||||
| @@ -23,10 +23,10 @@ | |||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| extern "C" { | extern "C" { | ||||
| #endif | #endif | ||||
| void Crop4D(const float *input, float *output, const int *in_shape, const int *out_shape, CropParameter *crop_param, | |||||
| int thread_id); | |||||
| void Crop4D(const float *input, float *output, const int *in_shape, const int *out_shape, | |||||
| const CropParameter *crop_param, int thread_id); | |||||
| void Crop4DNoParallel(const float *input, float *output, const int *in_shape, const int *out_shape, | void Crop4DNoParallel(const float *input, float *output, const int *in_shape, const int *out_shape, | ||||
| CropParameter *crop_param); | |||||
| const CropParameter *crop_param); | |||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| } | } | ||||
| #endif | #endif | ||||
| @@ -34,7 +34,7 @@ void PackDeConvWeightFp32(const float *weight, float *dst, int input_channel, in | |||||
| } | } | ||||
| void DeConvPostFp32C8(const float *src, float *tmp, const float *bias, float *dst, int output_channel, | void DeConvPostFp32C8(const float *src, float *tmp, const float *bias, float *dst, int output_channel, | ||||
| ConvParameter *conv_param) { | |||||
| const ConvParameter *conv_param) { | |||||
| /* arm64 row12x8-major(ih*iw x oc*kh*kw) -> row8-major(oh*ow x oc) */ | /* arm64 row12x8-major(ih*iw x oc*kh*kw) -> row8-major(oh*ow x oc) */ | ||||
| /* arm32 row4x8-major(ih*iw x oc*kh*kw) -> row8-major(oh*ow x oc) */ | /* arm32 row4x8-major(ih*iw x oc*kh*kw) -> row8-major(oh*ow x oc) */ | ||||
| size_t input_plane = conv_param->input_w_ * conv_param->input_h_; | size_t input_plane = conv_param->input_w_ * conv_param->input_h_; | ||||
| @@ -30,7 +30,7 @@ extern "C" { | |||||
| #endif | #endif | ||||
| void PackDeConvWeightFp32(const float *weight, float *dst, int input_channel, int output_channel, int plane); | void PackDeConvWeightFp32(const float *weight, float *dst, int input_channel, int output_channel, int plane); | ||||
| void DeConvPostFp32C8(const float *src, float *tmp_out, const float *bias, float *dst, int output_channel, | void DeConvPostFp32C8(const float *src, float *tmp_out, const float *bias, float *dst, int output_channel, | ||||
| ConvParameter *conv_param); | |||||
| const ConvParameter *conv_param); | |||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| } | } | ||||
| #endif | #endif | ||||
| @@ -16,8 +16,8 @@ | |||||
| #include "nnacl/fp32/deconv_winograd_fp32.h" | #include "nnacl/fp32/deconv_winograd_fp32.h" | ||||
| int PackDeConvWgDataFp32(float *nhwc_weight, DeConvComputeUnit *unit, ConvParameter *conv_param, | |||||
| DeConvParam *deconv_param) { | |||||
| int PackDeConvWgDataFp32(const float *nhwc_weight, DeConvComputeUnit *unit, const ConvParameter *conv_param, | |||||
| const DeConvParam *deconv_param) { | |||||
| int tmp_kernel_plane = unit->w_size_ * unit->h_size_; | int tmp_kernel_plane = unit->w_size_ * unit->h_size_; | ||||
| int size = conv_param->input_channel_ * conv_param->output_channel_ * tmp_kernel_plane; | int size = conv_param->input_channel_ * conv_param->output_channel_ * tmp_kernel_plane; | ||||
| float *current_unit_weight = (float *)malloc(size * sizeof(float)); | float *current_unit_weight = (float *)malloc(size * sizeof(float)); | ||||
| @@ -25,13 +25,14 @@ int PackDeConvWgDataFp32(float *nhwc_weight, DeConvComputeUnit *unit, ConvParame | |||||
| return NNACL_NULL_PTR; | return NNACL_NULL_PTR; | ||||
| } | } | ||||
| for (int ic = 0; ic < conv_param->input_channel_; ic++) { | for (int ic = 0; ic < conv_param->input_channel_; ic++) { | ||||
| float *src_ic = nhwc_weight + deconv_param->kernel_plane_ * conv_param->output_channel_ * ic; | |||||
| const float *src_ic = nhwc_weight + deconv_param->kernel_plane_ * conv_param->output_channel_ * ic; | |||||
| float *dst_ic = current_unit_weight + tmp_kernel_plane * conv_param->output_channel_ * ic; | float *dst_ic = current_unit_weight + tmp_kernel_plane * conv_param->output_channel_ * ic; | ||||
| for (int uhi = 0; uhi < unit->h_size_; uhi++) { | for (int uhi = 0; uhi < unit->h_size_; uhi++) { | ||||
| for (int uwi = 0; uwi < unit->w_size_; uwi++) { | for (int uwi = 0; uwi < unit->w_size_; uwi++) { | ||||
| int src_h_offset = unit->h_start_ + uhi * conv_param->stride_h_; | int src_h_offset = unit->h_start_ + uhi * conv_param->stride_h_; | ||||
| int src_w_offset = unit->w_start_ + uwi * conv_param->stride_w_; | int src_w_offset = unit->w_start_ + uwi * conv_param->stride_w_; | ||||
| float *src_hw = src_ic + (src_h_offset * conv_param->kernel_w_ + src_w_offset) * conv_param->output_channel_; | |||||
| const float *src_hw = | |||||
| src_ic + (src_h_offset * conv_param->kernel_w_ + src_w_offset) * conv_param->output_channel_; | |||||
| float *dst_hw = dst_ic + (uhi * unit->w_size_ + uwi) * conv_param->output_channel_; | float *dst_hw = dst_ic + (uhi * unit->w_size_ + uwi) * conv_param->output_channel_; | ||||
| memcpy(dst_hw, src_hw, conv_param->output_channel_ * sizeof(float)); | memcpy(dst_hw, src_hw, conv_param->output_channel_ * sizeof(float)); | ||||
| } | } | ||||
| @@ -132,10 +133,10 @@ int PackDeConvWgDataFp32(float *nhwc_weight, DeConvComputeUnit *unit, ConvParame | |||||
| return NNACL_OK; | return NNACL_OK; | ||||
| } | } | ||||
| void DeConvWgInputPack(float *src_ptr, float *dst_ptr, int channel, int stride) { | |||||
| void DeConvWgInputPack(const float *src_ptr, float *dst_ptr, int channel, int stride) { | |||||
| int ic4div = channel / C4NUM; | int ic4div = channel / C4NUM; | ||||
| int ic4mod = channel % C4NUM; | int ic4mod = channel % C4NUM; | ||||
| float *src = src_ptr; | |||||
| const float *src = src_ptr; | |||||
| float *dst = dst_ptr; | float *dst = dst_ptr; | ||||
| for (int ic = 0; ic < ic4div; ic++) { | for (int ic = 0; ic < ic4div; ic++) { | ||||
| @@ -340,9 +341,10 @@ void DeConvWgMerge(const float *src, float *dst, size_t src_stride, size_t dst_s | |||||
| return; | return; | ||||
| } | } | ||||
| void DeConvWgCalWgFp32(const float *tile_in, float *tile_out, float *weight_buf, float *tmp_buf, const float *at_buf, | |||||
| float *a_mid_buf, float *trans_a_buf, bool *transfered, const float *bt_buf, float *b_tmp_buf, | |||||
| int unit_size, int w_start, int h_start, ConvParameter *conv_param, DeConvParam *deconv_param) { | |||||
| void DeConvWgCalWgFp32(const float *tile_in, float *tile_out, const float *weight_buf, float *tmp_buf, | |||||
| const float *at_buf, float *a_mid_buf, float *trans_a_buf, bool *transfered, const float *bt_buf, | |||||
| float *b_tmp_buf, int unit_size, int w_start, int h_start, const ConvParameter *conv_param, | |||||
| const DeConvParam *deconv_param) { | |||||
| int winograd_plane = unit_size * unit_size; | int winograd_plane = unit_size * unit_size; | ||||
| if (!transfered[unit_size]) { | if (!transfered[unit_size]) { | ||||
| WinogradTransLeft(tile_in, at_buf, a_mid_buf, DECONV_WINOGRAD_DEFAULT_UNIT, unit_size, DECONV_WINOGRAD_DEFAULT_UNIT, | WinogradTransLeft(tile_in, at_buf, a_mid_buf, DECONV_WINOGRAD_DEFAULT_UNIT, unit_size, DECONV_WINOGRAD_DEFAULT_UNIT, | ||||
| @@ -355,7 +357,7 @@ void DeConvWgCalWgFp32(const float *tile_in, float *tile_out, float *weight_buf, | |||||
| for (int index = 0; index < winograd_plane; index++) { | for (int index = 0; index < winograd_plane; index++) { | ||||
| float *src = trans_a_buf + index * DECONV_WINOGRAD_DEFAULT_TILE * deconv_param->ic_up4_; | float *src = trans_a_buf + index * DECONV_WINOGRAD_DEFAULT_TILE * deconv_param->ic_up4_; | ||||
| float *dst = tmp_buf + index * deconv_param->oc_up4_ * DECONV_WINOGRAD_DEFAULT_TILE; | float *dst = tmp_buf + index * deconv_param->oc_up4_ * DECONV_WINOGRAD_DEFAULT_TILE; | ||||
| float *weight = weight_buf + index * deconv_param->ic_up4_ * deconv_param->oc_up4_; | |||||
| const float *weight = weight_buf + index * deconv_param->ic_up4_ * deconv_param->oc_up4_; | |||||
| TiledC4MatmulFp32(dst, src, weight, DECONV_WINOGRAD_DEFAULT_TILE * C4NUM, deconv_param->ic_div4_, | TiledC4MatmulFp32(dst, src, weight, DECONV_WINOGRAD_DEFAULT_TILE * C4NUM, deconv_param->ic_div4_, | ||||
| deconv_param->oc_div4_); | deconv_param->oc_div4_); | ||||
| } | } | ||||
| @@ -380,15 +382,16 @@ void DeConvWgCalWgFp32(const float *tile_in, float *tile_out, float *weight_buf, | |||||
| return; | return; | ||||
| } | } | ||||
| void DeConvWgCalCommFp32(float *tile_in, float *tile_out, const float *weight, float *tmp_buf, int h_start, int w_start, | |||||
| int h_size, int w_size, ConvParameter *conv_param, DeConvParam *deconv_param) { | |||||
| void DeConvWgCalCommFp32(const float *tile_in, float *tile_out, const float *weight, float *tmp_buf, int h_start, | |||||
| int w_start, int h_size, int w_size, const ConvParameter *conv_param, | |||||
| const DeConvParam *deconv_param) { | |||||
| int count = deconv_param->oc_div4_ * w_size * h_size; | int count = deconv_param->oc_div4_ * w_size * h_size; | ||||
| int in_stride = DECONV_WINOGRAD_DEFAULT_TILE * deconv_param->ic_up4_; | int in_stride = DECONV_WINOGRAD_DEFAULT_TILE * deconv_param->ic_up4_; | ||||
| int out_stride = DECONV_WINOGRAD_DEFAULT_TILE * deconv_param->oc_up4_; | int out_stride = DECONV_WINOGRAD_DEFAULT_TILE * deconv_param->oc_up4_; | ||||
| for (int hi = 0; hi < DECONV_WINOGRAD_DEFAULT_UNIT; hi++) { | for (int hi = 0; hi < DECONV_WINOGRAD_DEFAULT_UNIT; hi++) { | ||||
| for (int wi = 0; wi < DECONV_WINOGRAD_DEFAULT_UNIT; wi++) { | for (int wi = 0; wi < DECONV_WINOGRAD_DEFAULT_UNIT; wi++) { | ||||
| float *src_in = tile_in + (wi + hi * DECONV_WINOGRAD_DEFAULT_UNIT) * in_stride; | |||||
| const float *src_in = tile_in + (wi + hi * DECONV_WINOGRAD_DEFAULT_UNIT) * in_stride; | |||||
| TiledC4MatmulFp32(tmp_buf, src_in, weight, DECONV_WINOGRAD_DEFAULT_TILE * 4, deconv_param->ic_div4_, count); | TiledC4MatmulFp32(tmp_buf, src_in, weight, DECONV_WINOGRAD_DEFAULT_TILE * 4, deconv_param->ic_div4_, count); | ||||
| for (int uhi = 0; uhi < h_size; uhi++) { | for (int uhi = 0; uhi < h_size; uhi++) { | ||||
| @@ -406,8 +409,8 @@ void DeConvWgCalCommFp32(float *tile_in, float *tile_out, const float *weight, f | |||||
| return; | return; | ||||
| } | } | ||||
| void DeconvWg(float *nhwc_input_, float *tile_in, float *tile_out, int start_index, int calculate_count, | |||||
| ConvParameter *conv_param, DeConvParam *deconv_param, int task_id) { | |||||
| void DeconvWg(const float *nhwc_input_, float *tile_in, float *tile_out, int start_index, int calculate_count, | |||||
| const ConvParameter *conv_param, DeConvParam *deconv_param, int task_id) { | |||||
| /* pack tile input */ | /* pack tile input */ | ||||
| int tile_in_unit_stride = deconv_param->ic_up4_ * DECONV_WINOGRAD_DEFAULT_TILE; | int tile_in_unit_stride = deconv_param->ic_up4_ * DECONV_WINOGRAD_DEFAULT_TILE; | ||||
| #ifdef ENABLE_ARM | #ifdef ENABLE_ARM | ||||
| @@ -439,7 +442,7 @@ void DeconvWg(float *nhwc_input_, float *tile_in, float *tile_out, int start_ind | |||||
| continue; | continue; | ||||
| } | } | ||||
| float *src = nhwc_input_ + (w_index + h_index * conv_param->input_w_) * conv_param->input_channel_; | |||||
| const float *src = nhwc_input_ + (w_index + h_index * conv_param->input_w_) * conv_param->input_channel_; | |||||
| DeConvWgInputPack(src, dst, conv_param->input_channel_, DECONV_WINOGRAD_DEFAULT_TILE * C4NUM); | DeConvWgInputPack(src, dst, conv_param->input_channel_, DECONV_WINOGRAD_DEFAULT_TILE * C4NUM); | ||||
| } | } | ||||
| } | } | ||||
| @@ -474,8 +477,8 @@ void DeconvWg(float *nhwc_input_, float *tile_in, float *tile_out, int start_ind | |||||
| return; | return; | ||||
| } | } | ||||
| void DeconvWgPost(float *tile_out, float *nc4hw4_output, ConvParameter *conv_param, DeConvParam *deconv_param, | |||||
| int calculate_count, int tile_index) { | |||||
| void DeconvWgPost(const float *tile_out, float *nc4hw4_output, const ConvParameter *conv_param, | |||||
| const DeConvParam *deconv_param, int calculate_count, int tile_index) { | |||||
| /* merge */ | /* merge */ | ||||
| int src_unit_stride = deconv_param->oc_up4_ * DECONV_WINOGRAD_DEFAULT_TILE; | int src_unit_stride = deconv_param->oc_up4_ * DECONV_WINOGRAD_DEFAULT_TILE; | ||||
| @@ -483,7 +486,7 @@ void DeconvWgPost(float *tile_out, float *nc4hw4_output, ConvParameter *conv_par | |||||
| int dst_stride = conv_param->output_w_ * conv_param->output_h_ * C4NUM; | int dst_stride = conv_param->output_w_ * conv_param->output_h_ * C4NUM; | ||||
| for (int index = 0; index < calculate_count; ++index) { | for (int index = 0; index < calculate_count; ++index) { | ||||
| float *src_start = tile_out + index * C4NUM; | |||||
| const float *src_start = tile_out + index * C4NUM; | |||||
| int plane_index = tile_index * DECONV_WINOGRAD_DEFAULT_TILE + index; | int plane_index = tile_index * DECONV_WINOGRAD_DEFAULT_TILE + index; | ||||
| int w_unit_index = plane_index % deconv_param->in_tile_w_count_; | int w_unit_index = plane_index % deconv_param->in_tile_w_count_; | ||||
| @@ -499,7 +502,7 @@ void DeconvWgPost(float *tile_out, float *nc4hw4_output, ConvParameter *conv_par | |||||
| for (int hi = merge_h_start; hi < merge_h_end; hi++) { | for (int hi = merge_h_start; hi < merge_h_end; hi++) { | ||||
| for (int wi = merge_w_start; wi < merge_w_end; wi++) { | for (int wi = merge_w_start; wi < merge_w_end; wi++) { | ||||
| float *src = src_start + (hi * deconv_param->out_tile_w_ + wi) * src_unit_stride; | |||||
| const float *src = src_start + (hi * deconv_param->out_tile_w_ + wi) * src_unit_stride; | |||||
| float *dst = dst_start + (hi * conv_param->output_w_ + wi) * C4NUM; | float *dst = dst_start + (hi * conv_param->output_w_ + wi) * C4NUM; | ||||
| DeConvWgMerge(src, dst, src_stride, dst_stride, deconv_param->oc_div4_); | DeConvWgMerge(src, dst, src_stride, dst_stride, deconv_param->oc_div4_); | ||||
| } | } | ||||
| @@ -28,12 +28,12 @@ | |||||
| extern "C" { | extern "C" { | ||||
| #endif | #endif | ||||
| int PackDeConvWgDataFp32(float *nhwc_weight, DeConvComputeUnit *unit, ConvParameter *conv_param, | |||||
| DeConvParam *deconv_param); | |||||
| void DeconvWg(float *nhwc_input_, float *tile_in, float *tile_out, int start_index, int calculate_count, | |||||
| ConvParameter *conv_param, DeConvParam *deconv_param, int task_id); | |||||
| void DeconvWgPost(float *tile_out, float *nc4hw4_output, ConvParameter *conv_param, DeConvParam *deconv_param, | |||||
| int calculate_count, int tile_index); | |||||
| int PackDeConvWgDataFp32(const float *nhwc_weight, DeConvComputeUnit *unit, const ConvParameter *conv_param, | |||||
| const DeConvParam *deconv_param); | |||||
| void DeconvWg(const float *nhwc_input_, float *tile_in, float *tile_out, int start_index, int calculate_count, | |||||
| const ConvParameter *conv_param, DeConvParam *deconv_param, int task_id); | |||||
| void DeconvWgPost(const float *tile_out, float *nc4hw4_output, const ConvParameter *conv_param, | |||||
| const DeConvParam *deconv_param, int calculate_count, int tile_index); | |||||
| void TiledC4MatmulFp32(float *dst, const float *src, const float *weight, size_t ic4, size_t cal_num, size_t oc4); | void TiledC4MatmulFp32(float *dst, const float *src, const float *weight, size_t ic4, size_t cal_num, size_t oc4); | ||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| @@ -36,8 +36,8 @@ float IntersectionOverUnion(const BboxCorner *a, const BboxCorner *b) { | |||||
| return inter / (area_a + area_b - inter); | return inter / (area_a + area_b - inter); | ||||
| } | } | ||||
| int DecodeBoxes(const int num_boxes, const float *input_boxes, const float *anchors, | |||||
| DetectionPostProcessParameter *param) { | |||||
| int DecodeBoxes(int num_boxes, const float *input_boxes, const float *anchors, | |||||
| const DetectionPostProcessParameter *param) { | |||||
| if (input_boxes == NULL || anchors == NULL || param == NULL) { | if (input_boxes == NULL || anchors == NULL || param == NULL) { | ||||
| return NNACL_NULL_PTR; | return NNACL_NULL_PTR; | ||||
| } | } | ||||
| @@ -37,8 +37,8 @@ typedef struct { | |||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| extern "C" { | extern "C" { | ||||
| #endif | #endif | ||||
| int DecodeBoxes(const int num_boxes, const float *input_boxes, const float *anchors, | |||||
| DetectionPostProcessParameter *param); | |||||
| int DecodeBoxes(int num_boxes, const float *input_boxes, const float *anchors, | |||||
| const DetectionPostProcessParameter *param); | |||||
| int NmsMultiClassesFastCore(const int num_boxes, const int num_classes_with_bg, const float *input_scores, | int NmsMultiClassesFastCore(const int num_boxes, const int num_classes_with_bg, const float *input_scores, | ||||
| void (*)(const float *, int *, int, int), const DetectionPostProcessParameter *param, | void (*)(const float *, int *, int, int), const DetectionPostProcessParameter *param, | ||||
| @@ -18,11 +18,11 @@ | |||||
| #include <math.h> | #include <math.h> | ||||
| #include "nnacl/errorcode.h" | #include "nnacl/errorcode.h" | ||||
| void Calculate_Data(const float *input_data, float *output_data, int num, EluParameter *parameter) { | |||||
| void Calculate_Data(const float *input_data, float *output_data, int num, const EluParameter *parameter) { | |||||
| output_data[num] = input_data[num] < 0 ? parameter->alpha_ * expm1(input_data[num]) : input_data[num]; | output_data[num] = input_data[num] < 0 ? parameter->alpha_ * expm1(input_data[num]) : input_data[num]; | ||||
| } | } | ||||
| int Elu(const float *input_data, float *output_data, EluParameter *parameter, int task_id) { | |||||
| int Elu(const float *input_data, float *output_data, const EluParameter *parameter, int task_id) { | |||||
| for (size_t i = task_id; i < parameter->in_size_; i += parameter->op_parameter_.thread_num_) { | for (size_t i = task_id; i < parameter->in_size_; i += parameter->op_parameter_.thread_num_) { | ||||
| Calculate_Data(input_data, output_data, i, parameter); | Calculate_Data(input_data, output_data, i, parameter); | ||||
| } | } | ||||
| @@ -28,7 +28,7 @@ typedef struct EluParameter { | |||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| extern "C" { | extern "C" { | ||||
| #endif | #endif | ||||
| int Elu(const float *input_data, float *output_data, EluParameter *parameter, int task_id); | |||||
| int Elu(const float *input_data, float *output_data, const EluParameter *parameter, int task_id); | |||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| } | } | ||||
| #endif | #endif | ||||
| @@ -31,7 +31,8 @@ void l2_regulate(float *data, int size, float max_norm) { | |||||
| return; | return; | ||||
| } | } | ||||
| int CopyData(float *input_data, int *ids, float *output_data, int num, EmbeddingLookupParameter *parameter) { | |||||
| int CopyData(float *input_data, const int *ids, float *output_data, int num, | |||||
| const EmbeddingLookupParameter *parameter) { | |||||
| if (ids[num] >= parameter->layer_num_ || ids[num] < 0) { | if (ids[num] >= parameter->layer_num_ || ids[num] < 0) { | ||||
| return NNACL_ERRCODE_INDEX_OUT_OF_RANGE; | return NNACL_ERRCODE_INDEX_OUT_OF_RANGE; | ||||
| } | } | ||||
| @@ -46,7 +47,8 @@ int CopyData(float *input_data, int *ids, float *output_data, int num, Embedding | |||||
| return NNACL_OK; | return NNACL_OK; | ||||
| } | } | ||||
| int EmbeddingLookup(float *input_data, int *ids, float *output_data, EmbeddingLookupParameter *parameter, int task_id) { | |||||
| int EmbeddingLookup(float *input_data, const int *ids, float *output_data, const EmbeddingLookupParameter *parameter, | |||||
| int task_id) { | |||||
| for (size_t i = task_id; i < parameter->ids_size_; i += parameter->op_parameter_.thread_num_) { | for (size_t i = task_id; i < parameter->ids_size_; i += parameter->op_parameter_.thread_num_) { | ||||
| int ret = CopyData(input_data, ids, output_data, i, parameter); | int ret = CopyData(input_data, ids, output_data, i, parameter); | ||||
| if (ret != NNACL_OK) { | if (ret != NNACL_OK) { | ||||
| @@ -31,7 +31,8 @@ typedef struct EmbeddingLookupParameter { | |||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| extern "C" { | extern "C" { | ||||
| #endif | #endif | ||||
| int EmbeddingLookup(float *input_data, int *ids, float *output_data, EmbeddingLookupParameter *parameter, int task_id); | |||||
| int EmbeddingLookup(float *input_data, const int *ids, float *output_data, const EmbeddingLookupParameter *parameter, | |||||
| int task_id); | |||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| } | } | ||||
| #endif | #endif | ||||
| @@ -19,7 +19,7 @@ | |||||
| #include <string.h> | #include <string.h> | ||||
| #include "nnacl/errorcode.h" | #include "nnacl/errorcode.h" | ||||
| int Exp(const float *input_data, float *output_data, ExpParameter *parameter, int task_id) { | |||||
| int Exp(const float *input_data, float *output_data, const ExpParameter *parameter, int task_id) { | |||||
| if (parameter->scale_ == 1) { | if (parameter->scale_ == 1) { | ||||
| for (size_t i = task_id; i < parameter->element_num_; i += parameter->thread_num_) { | for (size_t i = task_id; i < parameter->element_num_; i += parameter->thread_num_) { | ||||
| output_data[i] = expf(input_data[i]); | output_data[i] = expf(input_data[i]); | ||||
| @@ -33,7 +33,7 @@ typedef struct ExpParameter { | |||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| extern "C" { | extern "C" { | ||||
| #endif | #endif | ||||
| int Exp(const float *input_data, float *output_data, ExpParameter *parameter, int task_id); | |||||
| int Exp(const float *input_data, float *output_data, const ExpParameter *parameter, int task_id); | |||||
| void ExpFp32(const float *src, float *dst, int num); | void ExpFp32(const float *src, float *dst, int num); | ||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| } | } | ||||
| @@ -26,10 +26,10 @@ inline int Stride(const int *shape, int rank, int index) { | |||||
| return stride; | return stride; | ||||
| } | } | ||||
| int Gather(float *input, int outer_size, int inner_size, int limit, const int *indices, int indices_element_size, | |||||
| int Gather(const float *input, int outer_size, int inner_size, int limit, const int *indices, int indices_element_size, | |||||
| float *output) { | float *output) { | ||||
| for (int m = 0; m < outer_size; ++m) { | for (int m = 0; m < outer_size; ++m) { | ||||
| float *inputm = input + inner_size * m * limit; | |||||
| const float *inputm = input + inner_size * m * limit; | |||||
| float *outputm = output + inner_size * m * indices_element_size; | float *outputm = output + inner_size * m * indices_element_size; | ||||
| for (int i = 0; i < indices_element_size; ++i) { | for (int i = 0; i < indices_element_size; ++i) { | ||||
| if (indices[i] < 0 || indices[i] > limit) { | if (indices[i] < 0 || indices[i] > limit) { | ||||
| @@ -22,7 +22,7 @@ | |||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| extern "C" { | extern "C" { | ||||
| #endif | #endif | ||||
| int Gather(float *input, int outer_size, int inner_size, int limit, const int *indices, int indices_element_size, | |||||
| int Gather(const float *input, int outer_size, int inner_size, int limit, const int *indices, int indices_element_size, | |||||
| float *output); | float *output); | ||||
| int GatherInt32(const int32_t *input, int outer_size, int inner_size, int limit, const int *indices, | int GatherInt32(const int32_t *input, int outer_size, int inner_size, int limit, const int *indices, | ||||
| int indices_element_size, int32_t *output); | int indices_element_size, int32_t *output); | ||||
| @@ -18,9 +18,8 @@ | |||||
| #include "nnacl/errorcode.h" | #include "nnacl/errorcode.h" | ||||
| #include "nnacl/op_base.h" | #include "nnacl/op_base.h" | ||||
| int InstanceNorm(const int outer_size, const int inner_size, const float *src_data, const float *scale_data, | |||||
| const float *bias_data, InstanceNormParameter *param, float *dst_data, const int task_id, | |||||
| const int thread_num) { | |||||
| int InstanceNorm(int outer_size, int inner_size, const float *src_data, const float *scale_data, const float *bias_data, | |||||
| const InstanceNormParameter *param, float *dst_data, int task_id, int thread_num) { | |||||
| if (src_data == NULL || dst_data == NULL || scale_data == NULL || bias_data == NULL) { | if (src_data == NULL || dst_data == NULL || scale_data == NULL || bias_data == NULL) { | ||||
| return NNACL_NULL_PTR; | return NNACL_NULL_PTR; | ||||
| } | } | ||||
| @@ -23,9 +23,8 @@ | |||||
| extern "C" { | extern "C" { | ||||
| #endif | #endif | ||||
| int InstanceNorm(const int outer_size, const int inner_size, const float *src_data, const float *scale_data, | |||||
| const float *bias_data, InstanceNormParameter *param, float *dst_data, const int task_id, | |||||
| const int thread_num); | |||||
| int InstanceNorm(int outer_size, int inner_size, const float *src_data, const float *scale_data, const float *bias_data, | |||||
| const InstanceNormParameter *param, float *dst_data, int task_id, int thread_num); | |||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| } | } | ||||
| #endif | #endif | ||||
| @@ -18,9 +18,8 @@ | |||||
| #include "nnacl/errorcode.h" | #include "nnacl/errorcode.h" | ||||
| #include "nnacl/op_base.h" | #include "nnacl/op_base.h" | ||||
| int LayerNorm(const int outer_size, const int inner_size, const float *src_data, const float *gamma_data, | |||||
| const float *beta_data, const bool affine, const float epsilon, float *dst_data, const int tid, | |||||
| const int thread_num) { | |||||
| int LayerNorm(int outer_size, int inner_size, const float *src_data, const float *gamma_data, const float *beta_data, | |||||
| bool affine, float epsilon, float *dst_data, int tid, int thread_num) { | |||||
| if (src_data == NULL || dst_data == NULL) { | if (src_data == NULL || dst_data == NULL) { | ||||
| return NNACL_NULL_PTR; | return NNACL_NULL_PTR; | ||||
| } | } | ||||
| @@ -23,9 +23,8 @@ | |||||
| extern "C" { | extern "C" { | ||||
| #endif | #endif | ||||
| int LayerNorm(const int outer_size, const int inner_size, const float *src_data, const float *gamma_data, | |||||
| const float *beta_data, const bool affine, const float epsilon, float *dst_data, const int tid, | |||||
| const int thread_num); | |||||
| int LayerNorm(int outer_size, int inner_size, const float *src_data, const float *gamma_data, const float *beta_data, | |||||
| bool affine, float epsilon, float *dst_data, int tid, int thread_num); | |||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| } | } | ||||
| #endif | #endif | ||||
| @@ -17,15 +17,15 @@ | |||||
| #include "nnacl/fp32/local_response_norm_fp32.h" | #include "nnacl/fp32/local_response_norm_fp32.h" | ||||
| #include <math.h> | #include <math.h> | ||||
| int LocalResponseNorm(float *input_ptr, int out_size, int channel, float *output_ptr, | |||||
| LocalResponseNormParameter *param) { | |||||
| int LocalResponseNorm(const float *input_ptr, int out_size, int channel, float *output_ptr, | |||||
| const LocalResponseNormParameter *param) { | |||||
| int depth_radius = param->depth_radius_; | int depth_radius = param->depth_radius_; | ||||
| float bias = param->bias_; | float bias = param->bias_; | ||||
| float alpha = param->alpha_; | float alpha = param->alpha_; | ||||
| float beta = param->beta_; | float beta = param->beta_; | ||||
| for (int i = 0; i < out_size; i++) { | for (int i = 0; i < out_size; i++) { | ||||
| float *in_data = input_ptr + i * channel; | |||||
| const float *in_data = input_ptr + i * channel; | |||||
| float *out_data = output_ptr + i * channel; | float *out_data = output_ptr + i * channel; | ||||
| for (int j = 0; j < channel; j++) { | for (int j = 0; j < channel; j++) { | ||||
| @@ -30,8 +30,8 @@ typedef struct LocalResponseNormParameter { | |||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| extern "C" { | extern "C" { | ||||
| #endif | #endif | ||||
| int LocalResponseNorm(float *input_ptr, int out_size, int channel, float *output_ptr, | |||||
| LocalResponseNormParameter *param); | |||||
| int LocalResponseNorm(const float *input_ptr, int out_size, int channel, float *output_ptr, | |||||
| const LocalResponseNormParameter *param); | |||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| } | } | ||||
| #endif | #endif | ||||
| @@ -19,7 +19,7 @@ | |||||
| #include "nnacl/fp32/activation_fp32.h" | #include "nnacl/fp32/activation_fp32.h" | ||||
| #include "nnacl/fp32/arithmetic_fp32.h" | #include "nnacl/fp32/arithmetic_fp32.h" | ||||
| void InitGate(float *gate_buffer, const float *bias, LstmParameter *lstm_parm) { | |||||
| void InitGate(float *gate_buffer, const float *bias, const LstmParameter *lstm_parm) { | |||||
| int gate_offest = 0; | int gate_offest = 0; | ||||
| for (int l = 0; l < 4; l++) { | for (int l = 0; l < 4; l++) { | ||||
| int batch_offest = gate_offest; | int batch_offest = gate_offest; | ||||
| @@ -94,7 +94,7 @@ void LstmStepUnit(float *output, const float *input, const float *input_input_we | |||||
| const float *input_cell_weight, const float *input_output_weight, const float *state_input_weight, | const float *input_cell_weight, const float *input_output_weight, const float *state_input_weight, | ||||
| const float *state_forget_weight, const float *state_cell_weight, const float *state_output_weight, | const float *state_forget_weight, const float *state_cell_weight, const float *state_output_weight, | ||||
| const float *bias, float *hidden_state, float *cell_state, float *gate_buffer, | const float *bias, float *hidden_state, float *cell_state, float *gate_buffer, | ||||
| LstmParameter *lstm_parm) { | |||||
| const LstmParameter *lstm_parm) { | |||||
| InitGate(gate_buffer, bias, lstm_parm); | InitGate(gate_buffer, bias, lstm_parm); | ||||
| float *input_gate = gate_buffer; | float *input_gate = gate_buffer; | ||||
| @@ -139,7 +139,7 @@ void LstmStepUnit(float *output, const float *input, const float *input_input_we | |||||
| } | } | ||||
| void Lstm(float *output, const float *input, const float *weight_i, const float *weight_h, const float *bias, | void Lstm(float *output, const float *input, const float *weight_i, const float *weight_h, const float *bias, | ||||
| float *hidden_state, float *cell_state, float *gate_buffer, LstmParameter *lstm_parm) { | |||||
| float *hidden_state, float *cell_state, float *gate_buffer, const LstmParameter *lstm_parm) { | |||||
| // forward | // forward | ||||
| const float *input_input_weight = weight_i; | const float *input_input_weight = weight_i; | ||||
| const float *input_forget_weight = weight_i + lstm_parm->input_size_ * lstm_parm->hidden_size_ * 2; | const float *input_forget_weight = weight_i + lstm_parm->input_size_ * lstm_parm->hidden_size_ * 2; | ||||
| @@ -34,7 +34,7 @@ typedef struct LstmParameter { | |||||
| extern "C" { | extern "C" { | ||||
| #endif | #endif | ||||
| void Lstm(float *output, const float *input, const float *weight_i, const float *weight_h, const float *bias, | void Lstm(float *output, const float *input, const float *weight_i, const float *weight_h, const float *bias, | ||||
| float *hidden_state, float *cell_state, float *gate_buffer, LstmParameter *lstm_parm); | |||||
| float *hidden_state, float *cell_state, float *gate_buffer, const LstmParameter *lstm_parm); | |||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| } | } | ||||
| #endif | #endif | ||||
| @@ -18,7 +18,7 @@ | |||||
| #include "nnacl/common_func.h" | #include "nnacl/common_func.h" | ||||
| void Pad(const float *input_data, float *output_data, const int *input_shape, const int *output_shape, | void Pad(const float *input_data, float *output_data, const int *input_shape, const int *output_shape, | ||||
| const int *paddings, const int tid, const int thread_num) { | |||||
| const int *paddings, int tid, int thread_num) { | |||||
| int in[4], out[4]; | int in[4], out[4]; | ||||
| for (in[0] = 0; in[0] < input_shape[0]; in[0]++) { | for (in[0] = 0; in[0] < input_shape[0]; in[0]++) { | ||||
| out[0] = in[0] + paddings[0]; | out[0] = in[0] + paddings[0]; | ||||
| @@ -28,7 +28,7 @@ | |||||
| extern "C" { | extern "C" { | ||||
| #endif | #endif | ||||
| void Pad(const float *input_data, float *output_data, const int *input_shape, const int *output_shape, | void Pad(const float *input_data, float *output_data, const int *input_shape, const int *output_shape, | ||||
| const int *paddings, const int tid, const int thread_num); | |||||
| const int *paddings, int tid, int thread_num); | |||||
| void MirrorPad(const float *input_data, float *output_data, const int *input_shape, const PadParameter *pad_param, | void MirrorPad(const float *input_data, float *output_data, const int *input_shape, const PadParameter *pad_param, | ||||
| int begin, int end); | int begin, int end); | ||||
| @@ -18,8 +18,8 @@ | |||||
| #include <float.h> | #include <float.h> | ||||
| #include "nnacl/errorcode.h" | #include "nnacl/errorcode.h" | ||||
| int AvgPooling(const float *input_ptr, float *output_ptr, PoolingParameter *pooling_param, int task_id, float minf, | |||||
| float maxf) { | |||||
| int AvgPooling(const float *input_ptr, float *output_ptr, const PoolingParameter *pooling_param, int task_id, | |||||
| float minf, float maxf) { | |||||
| int win_w = pooling_param->window_w_; | int win_w = pooling_param->window_w_; | ||||
| int win_h = pooling_param->window_h_; | int win_h = pooling_param->window_h_; | ||||
| int channel = pooling_param->input_channel_; | int channel = pooling_param->input_channel_; | ||||
| @@ -144,8 +144,8 @@ int AvgPooling(const float *input_ptr, float *output_ptr, PoolingParameter *pool | |||||
| return NNACL_OK; | return NNACL_OK; | ||||
| } | } | ||||
| void MaxPooling(const float *input_ptr, float *output_ptr, PoolingParameter *pooling_param, int task_id, float minf, | |||||
| float maxf) { | |||||
| void MaxPooling(const float *input_ptr, float *output_ptr, const PoolingParameter *pooling_param, int task_id, | |||||
| float minf, float maxf) { | |||||
| int win_w = pooling_param->window_w_; | int win_w = pooling_param->window_w_; | ||||
| int win_h = pooling_param->window_h_; | int win_h = pooling_param->window_h_; | ||||
| int channel = pooling_param->input_channel_; | int channel = pooling_param->input_channel_; | ||||
| @@ -27,10 +27,10 @@ | |||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| extern "C" { | extern "C" { | ||||
| #endif | #endif | ||||
| int AvgPooling(const float *input_ptr, float *output_ptr, PoolingParameter *pooling_param, int task_id, float minf, | |||||
| float maxf); | |||||
| void MaxPooling(const float *input_ptr, float *output_ptr, PoolingParameter *pooling_param, int task_id, float minf, | |||||
| float maxf); | |||||
| int AvgPooling(const float *input_ptr, float *output_ptr, const PoolingParameter *pooling_param, int task_id, | |||||
| float minf, float maxf); | |||||
| void MaxPooling(const float *input_ptr, float *output_ptr, const PoolingParameter *pooling_param, int task_id, | |||||
| float minf, float maxf); | |||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| } | } | ||||
| #endif | #endif | ||||
| @@ -18,7 +18,7 @@ | |||||
| #include <arm_neon.h> | #include <arm_neon.h> | ||||
| #endif | #endif | ||||
| void PRelu(float *input, float *output, PReluParameter *prelu_param_, int task_id) { | |||||
| void PRelu(float *input, float *output, const PReluParameter *prelu_param_, int task_id) { | |||||
| float *negetive_slope_value = prelu_param_->slope_; | float *negetive_slope_value = prelu_param_->slope_; | ||||
| int c4 = prelu_param_->channel_num_ / C4NUM; | int c4 = prelu_param_->channel_num_ / C4NUM; | ||||
| int channel_num = prelu_param_->channel_num_; | int channel_num = prelu_param_->channel_num_; | ||||
| @@ -81,7 +81,7 @@ void PRelu(float *input, float *output, PReluParameter *prelu_param_, int task_i | |||||
| int c4_offset = tile_offset + k * C4NUM; | int c4_offset = tile_offset + k * C4NUM; | ||||
| int slope_offset = k * C4NUM; | int slope_offset = k * C4NUM; | ||||
| for (int l = 0; l < C4NUM; ++l) { | for (int l = 0; l < C4NUM; ++l) { | ||||
| float in_data = input_ptr[c4_offset + l]; | |||||
| const float in_data = input_ptr[c4_offset + l]; | |||||
| output_ptr[c4_offset + l] = | output_ptr[c4_offset + l] = | ||||
| (in_data < 0 ? in_data : 0) * negetive_slope_value[slope_offset + l] + (in_data > 0 ? in_data : 0); | (in_data < 0 ? in_data : 0) * negetive_slope_value[slope_offset + l] + (in_data > 0 ? in_data : 0); | ||||
| } | } | ||||
| @@ -93,7 +93,7 @@ void PRelu(float *input, float *output, PReluParameter *prelu_param_, int task_i | |||||
| int offset = m * channel_num; | int offset = m * channel_num; | ||||
| for (int k = c_s; k < channel_num; ++k) { | for (int k = c_s; k < channel_num; ++k) { | ||||
| int c4_offset = offset + k; | int c4_offset = offset + k; | ||||
| float in_data = input_ptr[c4_offset]; | |||||
| const float in_data = input_ptr[c4_offset]; | |||||
| if (in_data >= 0) { | if (in_data >= 0) { | ||||
| output_ptr[c4_offset] = in_data; | output_ptr[c4_offset] = in_data; | ||||
| } else { | } else { | ||||
| @@ -104,7 +104,7 @@ void PRelu(float *input, float *output, PReluParameter *prelu_param_, int task_i | |||||
| } | } | ||||
| } | } | ||||
| void PReluShareChannel(float *input, float *output, PReluParameter *prelu_param_, int task_id) { | |||||
| void PReluShareChannel(float *input, float *output, const PReluParameter *prelu_param_, int task_id) { | |||||
| for (int j = task_id; j < prelu_param_->tile_block_; j += prelu_param_->op_parameter_.thread_num_) { | for (int j = task_id; j < prelu_param_->tile_block_; j += prelu_param_->op_parameter_.thread_num_) { | ||||
| int cal_index; | int cal_index; | ||||
| #ifdef ENABLE_NEON | #ifdef ENABLE_NEON | ||||
| @@ -22,9 +22,9 @@ | |||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| extern "C" { | extern "C" { | ||||
| #endif | #endif | ||||
| void PRelu(float *input, float *output, PReluParameter *prelu_param_, int task_id); | |||||
| void PRelu(float *input, float *output, const PReluParameter *prelu_param_, int task_id); | |||||
| void PReluShareChannel(float *input, float *output, PReluParameter *prelu_param_, int task_id); | |||||
| void PReluShareChannel(float *input, float *output, const PReluParameter *prelu_param_, int task_id); | |||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| } | } | ||||
| #endif | #endif | ||||
| @@ -23,8 +23,8 @@ | |||||
| #include "nnacl/reduce_parameter.h" | #include "nnacl/reduce_parameter.h" | ||||
| #endif | #endif | ||||
| int ReduceMean(const int outer_size, const int inner_size, const int axis_size, const float *src_data, float *dst_data, | |||||
| const int tid, const int thread_num) { | |||||
| int ReduceMean(int outer_size, int inner_size, int axis_size, const float *src_data, float *dst_data, int tid, | |||||
| int thread_num) { | |||||
| if (src_data == NULL || dst_data == NULL) { | if (src_data == NULL || dst_data == NULL) { | ||||
| return NNACL_NULL_PTR; | return NNACL_NULL_PTR; | ||||
| } | } | ||||
| @@ -44,8 +44,8 @@ int ReduceMean(const int outer_size, const int inner_size, const int axis_size, | |||||
| } | } | ||||
| return NNACL_OK; | return NNACL_OK; | ||||
| } | } | ||||
| int ReduceSum(const int outer_size, const int inner_size, const int axis_size, const float *src_data, float *dst_data, | |||||
| const int tid, const int thread_num) { | |||||
| int ReduceSum(int outer_size, int inner_size, int axis_size, const float *src_data, float *dst_data, int tid, | |||||
| int thread_num) { | |||||
| if (src_data == NULL || dst_data == NULL) { | if (src_data == NULL || dst_data == NULL) { | ||||
| return NNACL_NULL_PTR; | return NNACL_NULL_PTR; | ||||
| } | } | ||||
| @@ -81,8 +81,8 @@ int ReduceSum(const int outer_size, const int inner_size, const int axis_size, c | |||||
| } | } | ||||
| return NNACL_OK; | return NNACL_OK; | ||||
| } | } | ||||
| int ReduceMax(const int outer_size, const int inner_size, const int axis_size, const float *src_data, float *dst_data, | |||||
| const int tid, const int thread_num) { | |||||
| int ReduceMax(int outer_size, int inner_size, int axis_size, const float *src_data, float *dst_data, int tid, | |||||
| int thread_num) { | |||||
| if (src_data == NULL || dst_data == NULL) { | if (src_data == NULL || dst_data == NULL) { | ||||
| return NNACL_NULL_PTR; | return NNACL_NULL_PTR; | ||||
| } | } | ||||
| @@ -102,8 +102,8 @@ int ReduceMax(const int outer_size, const int inner_size, const int axis_size, c | |||||
| } | } | ||||
| return NNACL_OK; | return NNACL_OK; | ||||
| } | } | ||||
| int ReduceMin(const int outer_size, const int inner_size, const int axis_size, const float *src_data, float *dst_data, | |||||
| const int tid, const int thread_num) { | |||||
| int ReduceMin(int outer_size, int inner_size, int axis_size, const float *src_data, float *dst_data, int tid, | |||||
| int thread_num) { | |||||
| if (src_data == NULL || dst_data == NULL) { | if (src_data == NULL || dst_data == NULL) { | ||||
| return NNACL_NULL_PTR; | return NNACL_NULL_PTR; | ||||
| } | } | ||||
| @@ -123,8 +123,8 @@ int ReduceMin(const int outer_size, const int inner_size, const int axis_size, c | |||||
| } | } | ||||
| return NNACL_OK; | return NNACL_OK; | ||||
| } | } | ||||
| int IntReduceMin(const int outer_size, const int inner_size, const int axis_size, const int *src_data, int *dst_data, | |||||
| const int tid, const int thread_num) { | |||||
| int IntReduceMin(int outer_size, int inner_size, int axis_size, const int *src_data, int *dst_data, int tid, | |||||
| int thread_num) { | |||||
| if (src_data == NULL || dst_data == NULL) { | if (src_data == NULL || dst_data == NULL) { | ||||
| return NNACL_NULL_PTR; | return NNACL_NULL_PTR; | ||||
| } | } | ||||
| @@ -144,8 +144,8 @@ int IntReduceMin(const int outer_size, const int inner_size, const int axis_size | |||||
| } | } | ||||
| return NNACL_OK; | return NNACL_OK; | ||||
| } | } | ||||
| int ReduceProd(const int outer_size, const int inner_size, const int axis_size, const float *src_data, float *dst_data, | |||||
| const int tid, const int thread_num) { | |||||
| int ReduceProd(int outer_size, int inner_size, int axis_size, const float *src_data, float *dst_data, int tid, | |||||
| int thread_num) { | |||||
| if (src_data == NULL || dst_data == NULL) { | if (src_data == NULL || dst_data == NULL) { | ||||
| return NNACL_NULL_PTR; | return NNACL_NULL_PTR; | ||||
| } | } | ||||
| @@ -166,8 +166,8 @@ int ReduceProd(const int outer_size, const int inner_size, const int axis_size, | |||||
| return NNACL_OK; | return NNACL_OK; | ||||
| } | } | ||||
| int IntReduceProd(const int outer_size, const int inner_size, const int axis_size, const int *src_data, int *dst_data, | |||||
| const int tid, const int thread_num) { | |||||
| int IntReduceProd(int outer_size, int inner_size, int axis_size, const int *src_data, int *dst_data, int tid, | |||||
| int thread_num) { | |||||
| if (src_data == NULL || dst_data == NULL) { | if (src_data == NULL || dst_data == NULL) { | ||||
| return NNACL_NULL_PTR; | return NNACL_NULL_PTR; | ||||
| } | } | ||||
| @@ -190,8 +190,8 @@ int IntReduceProd(const int outer_size, const int inner_size, const int axis_siz | |||||
| } | } | ||||
| return NNACL_OK; | return NNACL_OK; | ||||
| } | } | ||||
| int ReduceSumSquare(const int outer_size, const int inner_size, const int axis_size, const float *src_data, | |||||
| float *dst_data, const int tid, const int thread_num) { | |||||
| int ReduceSumSquare(int outer_size, int inner_size, int axis_size, const float *src_data, float *dst_data, int tid, | |||||
| int thread_num) { | |||||
| if (src_data == NULL || dst_data == NULL) { | if (src_data == NULL || dst_data == NULL) { | ||||
| return NNACL_NULL_PTR; | return NNACL_NULL_PTR; | ||||
| } | } | ||||
| @@ -22,22 +22,22 @@ | |||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| extern "C" { | extern "C" { | ||||
| #endif | #endif | ||||
| int ReduceMean(const int outer_size, const int inner_size, const int axis_size, const float *src_data, float *dst_data, | |||||
| const int tid, const int thread_num); | |||||
| int ReduceSum(const int outer_size, const int inner_size, const int axis_size, const float *src_data, float *dst_data, | |||||
| const int tid, const int thread_num); | |||||
| int ReduceMax(const int outer_size, const int inner_size, const int axis_size, const float *src_data, float *dst_data, | |||||
| const int tid, const int thread_num); | |||||
| int ReduceMin(const int outer_size, const int inner_size, const int axis_size, const float *src_data, float *dst_data, | |||||
| const int tid, const int thread_num); | |||||
| int IntReduceMin(const int outer_size, const int inner_size, const int axis_size, const int *src_data, int *dst_data, | |||||
| const int tid, const int thread_num); | |||||
| int ReduceProd(const int outer_size, const int inner_size, const int axis_size, const float *src_data, float *dst_data, | |||||
| const int tid, const int thread_num); | |||||
| int IntReduceProd(const int outer_size, const int inner_size, const int axis_size, const int *src_data, int *dst_data, | |||||
| const int tid, const int thread_num); | |||||
| int ReduceSumSquare(const int outer_size, const int inner_size, const int axis_size, const float *src_data, | |||||
| float *dst_data, const int tid, const int thread_num); | |||||
| int ReduceMean(int outer_size, int inner_size, int axis_size, const float *src_data, float *dst_data, int tid, | |||||
| int thread_num); | |||||
| int ReduceSum(int outer_size, int inner_size, int axis_size, const float *src_data, float *dst_data, int tid, | |||||
| int thread_num); | |||||
| int ReduceMax(int outer_size, int inner_size, int axis_size, const float *src_data, float *dst_data, int tid, | |||||
| int thread_num); | |||||
| int ReduceMin(int outer_size, int inner_size, int axis_size, const float *src_data, float *dst_data, int tid, | |||||
| int thread_num); | |||||
| int IntReduceMin(int outer_size, int inner_size, int axis_size, const int *src_data, int *dst_data, int tid, | |||||
| int thread_num); | |||||
| int ReduceProd(int outer_size, int inner_size, int axis_size, const float *src_data, float *dst_data, int tid, | |||||
| int thread_num); | |||||
| int IntReduceProd(int outer_size, int inner_size, int axis_size, const int *src_data, int *dst_data, int tid, | |||||
| int thread_num); | |||||
| int ReduceSumSquare(int outer_size, int inner_size, int axis_size, const float *src_data, float *dst_data, int tid, | |||||
| int thread_num); | |||||
| #ifdef ENABLE_NNACL_INFER_SHAPE | #ifdef ENABLE_NNACL_INFER_SHAPE | ||||
| int ReduceInferShape(int **in_shape, size_t *dim_size, int *out_shape, int *in_format, int *out_format, | int ReduceInferShape(int **in_shape, size_t *dim_size, int *out_shape, int *in_format, int *out_format, | ||||
| @@ -20,7 +20,8 @@ | |||||
| #include "nnacl/errorcode.h" | #include "nnacl/errorcode.h" | ||||
| #include "nnacl/op_base.h" | #include "nnacl/op_base.h" | ||||
| int ROIPooling(float *in_ptr, float *out_ptr, const float *roi, float *max_c, int tid, ROIPoolingParameter *param) { | |||||
| int ROIPooling(const float *in_ptr, float *out_ptr, const float *roi, float *max_c, int tid, | |||||
| const ROIPoolingParameter *param) { | |||||
| int num_rois = param->output_n_; | int num_rois = param->output_n_; | ||||
| int units = UP_DIV(num_rois, param->thread_num_); | int units = UP_DIV(num_rois, param->thread_num_); | ||||
| int roi_st = tid * units; | int roi_st = tid * units; | ||||
| @@ -52,7 +53,7 @@ int ROIPooling(float *in_ptr, float *out_ptr, const float *roi, float *max_c, in | |||||
| float bin_size_h = (float)roi_height / (float)pooled_height; | float bin_size_h = (float)roi_height / (float)pooled_height; | ||||
| float bin_size_w = (float)roi_width / (float)pooled_width; | float bin_size_w = (float)roi_width / (float)pooled_width; | ||||
| float *batch_data = in_ptr + param->in_strides_[kNHWC_N] * roi_batch_ind; | |||||
| const float *batch_data = in_ptr + param->in_strides_[kNHWC_N] * roi_batch_ind; | |||||
| for (int ph = 0; ph < pooled_height; ++ph) { | for (int ph = 0; ph < pooled_height; ++ph) { | ||||
| for (int pw = 0; pw < pooled_width; ++pw) { | for (int pw = 0; pw < pooled_width; ++pw) { | ||||
| @@ -40,7 +40,8 @@ typedef struct ROIPoolingParameter { | |||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| extern "C" { | extern "C" { | ||||
| #endif | #endif | ||||
| int ROIPooling(float *in_ptr, float *out_ptr, const float *roi, float *max_c, int tid, ROIPoolingParameter *param); | |||||
| int ROIPooling(const float *in_ptr, float *out_ptr, const float *roi, float *max_c, int tid, | |||||
| const ROIPoolingParameter *param); | |||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| } | } | ||||
| #endif | #endif | ||||
| @@ -66,7 +66,7 @@ void ScaleAxis(const float *in_data, float *out_data, const float *scale, const | |||||
| } | } | ||||
| void DoScale(const float *in_data, float *out_data, const float *scale, const float *offset, int task_id, | void DoScale(const float *in_data, float *out_data, const float *scale, const float *offset, int task_id, | ||||
| ScaleParameter *scale_param) { | |||||
| const ScaleParameter *scale_param) { | |||||
| int outer_step = UP_DIV(scale_param->outer_size_, scale_param->op_parameter_.thread_num_); | int outer_step = UP_DIV(scale_param->outer_size_, scale_param->op_parameter_.thread_num_); | ||||
| int outer_start = task_id * outer_step; | int outer_start = task_id * outer_step; | ||||
| int outer_end = MSMIN(outer_start + outer_step, scale_param->outer_size_); | int outer_end = MSMIN(outer_start + outer_step, scale_param->outer_size_); | ||||
| @@ -137,7 +137,7 @@ void ScaleAxisRelu(const float *in_data, float *out_data, const float *scale, co | |||||
| } | } | ||||
| void DoScaleRelu(const float *in_data, float *out_data, const float *scale, const float *offset, int task_id, | void DoScaleRelu(const float *in_data, float *out_data, const float *scale, const float *offset, int task_id, | ||||
| ScaleParameter *scale_param) { | |||||
| const ScaleParameter *scale_param) { | |||||
| int outer_step = UP_DIV(scale_param->outer_size_, scale_param->op_parameter_.thread_num_); | int outer_step = UP_DIV(scale_param->outer_size_, scale_param->op_parameter_.thread_num_); | ||||
| int outer_start = task_id * outer_step; | int outer_start = task_id * outer_step; | ||||
| int outer_end = MSMIN(outer_start + outer_step, scale_param->outer_size_); | int outer_end = MSMIN(outer_start + outer_step, scale_param->outer_size_); | ||||
| @@ -210,7 +210,7 @@ void ScaleAxisRelu6(const float *in_data, float *out_data, const float *scale, c | |||||
| } | } | ||||
| void DoScaleRelu6(const float *in_data, float *out_data, const float *scale, const float *offset, int task_id, | void DoScaleRelu6(const float *in_data, float *out_data, const float *scale, const float *offset, int task_id, | ||||
| ScaleParameter *scale_param) { | |||||
| const ScaleParameter *scale_param) { | |||||
| int outer_step = UP_DIV(scale_param->outer_size_, scale_param->op_parameter_.thread_num_); | int outer_step = UP_DIV(scale_param->outer_size_, scale_param->op_parameter_.thread_num_); | ||||
| int outer_start = task_id * outer_step; | int outer_start = task_id * outer_step; | ||||
| int outer_end = MSMIN(outer_start + outer_step, scale_param->outer_size_); | int outer_end = MSMIN(outer_start + outer_step, scale_param->outer_size_); | ||||
| @@ -23,11 +23,11 @@ | |||||
| extern "C" { | extern "C" { | ||||
| #endif | #endif | ||||
| void DoScale(const float *in_data, float *out_data, const float *scale, const float *offset, int task_id, | void DoScale(const float *in_data, float *out_data, const float *scale, const float *offset, int task_id, | ||||
| ScaleParameter *scale_param); | |||||
| const ScaleParameter *scale_param); | |||||
| void DoScaleRelu(const float *in_data, float *out_data, const float *scale, const float *offset, int task_id, | void DoScaleRelu(const float *in_data, float *out_data, const float *scale, const float *offset, int task_id, | ||||
| ScaleParameter *scale_param); | |||||
| const ScaleParameter *scale_param); | |||||
| void DoScaleRelu6(const float *in_data, float *out_data, const float *scale, const float *offset, int task_id, | void DoScaleRelu6(const float *in_data, float *out_data, const float *scale, const float *offset, int task_id, | ||||
| ScaleParameter *scale_param); | |||||
| const ScaleParameter *scale_param); | |||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| } | } | ||||
| #endif | #endif | ||||
| @@ -46,7 +46,7 @@ void PadSliceParameterTo4D(SliceParameter *param) { | |||||
| param->param_length_ = DIMENSION_4D; | param->param_length_ = DIMENSION_4D; | ||||
| } | } | ||||
| void DoSlice(const float *input, float *output, SliceParameter *param, int thread_id) { | |||||
| void DoSlice(const float *input, float *output, const SliceParameter *param, int thread_id) { | |||||
| int32_t out_dim1 = param->size_[1]; | int32_t out_dim1 = param->size_[1]; | ||||
| int32_t out_dim2 = param->size_[2]; | int32_t out_dim2 = param->size_[2]; | ||||
| int32_t out_dim3 = param->size_[3]; | int32_t out_dim3 = param->size_[3]; | ||||
| @@ -78,7 +78,7 @@ void DoSlice(const float *input, float *output, SliceParameter *param, int threa | |||||
| } | } | ||||
| } | } | ||||
| void DoSliceNoParallel(const float *input, float *output, SliceParameter *param) { | |||||
| void DoSliceNoParallel(const float *input, float *output, const SliceParameter *param) { | |||||
| size_t copy_size = param->size_[3] * sizeof(float); | size_t copy_size = param->size_[3] * sizeof(float); | ||||
| size_t in_stride2 = param->shape_[3]; | size_t in_stride2 = param->shape_[3]; | ||||
| size_t in_stride1 = param->shape_[2] * in_stride2; | size_t in_stride1 = param->shape_[2] * in_stride2; | ||||
| @@ -23,8 +23,8 @@ | |||||
| extern "C" { | extern "C" { | ||||
| #endif | #endif | ||||
| void PadSliceParameterTo4D(SliceParameter *param); | void PadSliceParameterTo4D(SliceParameter *param); | ||||
| void DoSlice(const float *input, float *output, SliceParameter *param, int thread_id); | |||||
| void DoSliceNoParallel(const float *input, float *output, SliceParameter *param); | |||||
| void DoSlice(const float *input, float *output, const SliceParameter *param, int thread_id); | |||||
| void DoSliceNoParallel(const float *input, float *output, const SliceParameter *param); | |||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| } | } | ||||
| #endif | #endif | ||||
| @@ -88,10 +88,10 @@ void SoftmaxLastAxis(const float *src, float *dst, int batch, int channel) { | |||||
| } | } | ||||
| // output = exp(input) / reduce_sum(exp(input), axis) | // output = exp(input) / reduce_sum(exp(input), axis) | ||||
| void Softmax(const float *input_ptr, float *output_ptr, float *sum_data, SoftmaxParameter *parameter) { | |||||
| void Softmax(const float *input_ptr, float *output_ptr, float *sum_data, const SoftmaxParameter *parameter) { | |||||
| int axis = parameter->axis_; | int axis = parameter->axis_; | ||||
| int n_dim = parameter->n_dim_; | int n_dim = parameter->n_dim_; | ||||
| int *input_shape = parameter->input_shape_; | |||||
| const int *input_shape = parameter->input_shape_; | |||||
| int inner_size = 1; | int inner_size = 1; | ||||
| int outter_size = 1; | int outter_size = 1; | ||||
| @@ -22,7 +22,7 @@ | |||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| extern "C" { | extern "C" { | ||||
| #endif | #endif | ||||
| void Softmax(const float *input_ptr, float *output_ptr, float *sum_data, SoftmaxParameter *parameter); | |||||
| void Softmax(const float *input_ptr, float *output_ptr, float *sum_data, const SoftmaxParameter *parameter); | |||||
| void SoftmaxLastAxis(const float *src, float *dst, int batch, int channel); | void SoftmaxLastAxis(const float *src, float *dst, int batch, int channel); | ||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| } | } | ||||
| @@ -214,7 +214,7 @@ void PackInputSum16x4Int8(const int8_t *input, int32_t *input_sum, int32_t *filt | |||||
| return; | return; | ||||
| } | } | ||||
| void Im2ColPackUnitFp32(const float *input_data, ConvParameter *conv_param, float *packed_input, int real_cal_num, | |||||
| void Im2ColPackUnitFp32(const float *input_data, const ConvParameter *conv_param, float *packed_input, int real_cal_num, | |||||
| int block_index) { | int block_index) { | ||||
| // input format : nhwc | // input format : nhwc | ||||
| int kernel_h = conv_param->kernel_h_; | int kernel_h = conv_param->kernel_h_; | ||||
| @@ -27,7 +27,7 @@ | |||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| extern "C" { | extern "C" { | ||||
| #endif | #endif | ||||
| void Im2ColPackUnitFp32(const float *input_data, ConvParameter *conv_param, float *packed_input, int real_cal_num, | |||||
| void Im2ColPackUnitFp32(const float *input_data, const ConvParameter *conv_param, float *packed_input, int real_cal_num, | |||||
| int block_index); | int block_index); | ||||
| void PackHWCToWHC(const float *src, float *dst, int height, int width, int channel); | void PackHWCToWHC(const float *src, float *dst, int height, int width, int channel); | ||||
| @@ -18,7 +18,8 @@ | |||||
| // fp32 conv winograd | // fp32 conv winograd | ||||
| void WinogradInputTransform(const float *input_data, float *trans_input, float *tmp_data, int cal_num, | void WinogradInputTransform(const float *input_data, float *trans_input, float *tmp_data, int cal_num, | ||||
| int out_tile_index, int out_w_block_num, ConvParameter *conv_param, InputTransFunc func) { | |||||
| int out_tile_index, int out_w_block_num, const ConvParameter *conv_param, | |||||
| InputTransFunc func) { | |||||
| int input_unit = conv_param->input_unit_; | int input_unit = conv_param->input_unit_; | ||||
| int output_unit = conv_param->output_unit_; | int output_unit = conv_param->output_unit_; | ||||
| int in_channel = conv_param->input_channel_; | int in_channel = conv_param->input_channel_; | ||||
| @@ -96,7 +97,8 @@ void WinogradInputTransform(const float *input_data, float *trans_input, float * | |||||
| } | } | ||||
| void WinogradOutputTransform(const float *gemm_out, float *out_data, const float *bias_data, int cal_num, | void WinogradOutputTransform(const float *gemm_out, float *out_data, const float *bias_data, int cal_num, | ||||
| int out_tile_index, int output_unit_num, ConvParameter *conv_param, OutputTransFunc func) { | |||||
| int out_tile_index, int output_unit_num, const ConvParameter *conv_param, | |||||
| OutputTransFunc func) { | |||||
| int output_unit = conv_param->output_unit_; | int output_unit = conv_param->output_unit_; | ||||
| int output_w = conv_param->output_w_; | int output_w = conv_param->output_w_; | ||||
| int output_h = conv_param->output_h_; | int output_h = conv_param->output_h_; | ||||
| @@ -33,10 +33,12 @@ extern "C" { | |||||
| #endif | #endif | ||||
| // for fp32 winograd input/output transform | // for fp32 winograd input/output transform | ||||
| void WinogradInputTransform(const float *input_data, float *trans_input, float *tmp_data, int cal_num, | void WinogradInputTransform(const float *input_data, float *trans_input, float *tmp_data, int cal_num, | ||||
| int out_tile_index, int out_w_block_num, ConvParameter *conv_param, InputTransFunc func); | |||||
| int out_tile_index, int out_w_block_num, const ConvParameter *conv_param, | |||||
| InputTransFunc func); | |||||
| void WinogradOutputTransform(const float *gemm_out, float *out_data, const float *bias_data, int cal_num, | void WinogradOutputTransform(const float *gemm_out, float *out_data, const float *bias_data, int cal_num, | ||||
| int out_tile_index, int output_unit_num, ConvParameter *conv_param, OutputTransFunc func); | |||||
| int out_tile_index, int output_unit_num, const ConvParameter *conv_param, | |||||
| OutputTransFunc func); | |||||
| // for int8 convolution 3x3 filter/input/output transform | // for int8 convolution 3x3 filter/input/output transform | ||||
| void Conv3x3Int8InputUnit(int16_t *tmp_data, int16_t *trans_input_data, size_t step, int input_zp); | void Conv3x3Int8InputUnit(int16_t *tmp_data, int16_t *trans_input_data, size_t step, int input_zp); | ||||
| @@ -46,8 +46,8 @@ int ConcatCPUKernel::ReSize() { return ConcatBaseCPUKernel::ReSize(); } | |||||
| int ConcatCPUKernel::DoConcat(int task_id) { | int ConcatCPUKernel::DoConcat(int task_id) { | ||||
| auto input_num = in_tensors_.size(); | auto input_num = in_tensors_.size(); | ||||
| std::vector<void *> inputs_addr(input_num, nullptr); | |||||
| std::vector<int *> inputs_output_shape(input_num + 1, nullptr); | |||||
| std::vector<const void *> inputs_addr(input_num, nullptr); | |||||
| std::vector<const int *> inputs_output_shape(input_num + 1, nullptr); | |||||
| std::vector<std::vector<int>> shapes; | std::vector<std::vector<int>> shapes; | ||||
| for (size_t i = 0; i < input_num; ++i) { | for (size_t i = 0; i < input_num; ++i) { | ||||
| @@ -59,8 +59,8 @@ int ConcatCPUKernel::DoConcat(int task_id) { | |||||
| inputs_output_shape[input_num] = output_shape.data(); | inputs_output_shape[input_num] = output_shape.data(); | ||||
| auto output_addr = out_tensors_.at(0)->MutableData(); | auto output_addr = out_tensors_.at(0)->MutableData(); | ||||
| Concat(reinterpret_cast<void **>(inputs_addr.data()), input_num, axis_, inputs_output_shape.data(), | |||||
| output_shape.size(), output_addr, task_id, thread_count_); | |||||
| Concat(inputs_addr.data(), input_num, axis_, inputs_output_shape.data(), output_shape.size(), output_addr, task_id, | |||||
| thread_count_); | |||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||