From: @sunsuodong Reviewed-by: @zhang_xue_tong,@zhanghaibo5 Signed-off-by: @zhang_xue_tongtags/v1.1.0
| @@ -33,7 +33,7 @@ void GetCalcParameter(const int *shape, int dims_number, int axis, int *pre_axis | |||
| } | |||
| } | |||
| void ArgMinMaxTopk1(const void *input, void *output, const int *shape, ArgMinMaxParameter *param) { | |||
| void ArgMinMaxTopk1(const void *input, void *output, const int *shape, const ArgMinMaxParameter *param) { | |||
| int pre_axis_count = 1; | |||
| int axis_count = 1; | |||
| int after_axis_count = 1; | |||
| @@ -48,7 +48,7 @@ void ArgMinMaxTopk1(const void *input, void *output, const int *shape, ArgMinMax | |||
| } | |||
| } | |||
| void ArgMinMaxTopknFp32(const float *input, float *output, const int *in_shape, ArgMinMaxParameter *param) { | |||
| void ArgMinMaxTopknFp32(const float *input, float *output, const int *in_shape, const ArgMinMaxParameter *param) { | |||
| if (param->get_max_) { | |||
| switch (param->axis_) { | |||
| case 0: | |||
| @@ -82,7 +82,7 @@ void ArgMinMaxTopknFp32(const float *input, float *output, const int *in_shape, | |||
| } | |||
| } | |||
| void ArgMinMax(const void *input, void *output, const int *in_shape, ArgMinMaxParameter *param) { | |||
| void ArgMinMax(const void *input, void *output, const int *in_shape, const ArgMinMaxParameter *param) { | |||
| if (param->topk_ == 1) { | |||
| ArgMinMaxTopk1(input, output, in_shape, param); | |||
| return; | |||
| @@ -21,7 +21,7 @@ | |||
| #ifdef __cplusplus | |||
| extern "C" { | |||
| #endif | |||
| void ArgMinMax(const void *input, void *output, const int *in_shape, ArgMinMaxParameter *param); | |||
| void ArgMinMax(const void *input, void *output, const int *in_shape, const ArgMinMaxParameter *param); | |||
| #ifdef __cplusplus | |||
| } | |||
| #endif | |||
| @@ -17,8 +17,8 @@ | |||
| #include "nnacl/arithmetic_common.h" | |||
| #include "nnacl/nnacl_utils.h" | |||
| void TileOneDimension(float *inData, float *outData, int dim, size_t ndim, int *inShape, int *inStrides, | |||
| int *outStrides, int *multiple) { | |||
| void TileOneDimension(const float *inData, float *outData, int dim, size_t ndim, const int *inShape, | |||
| const int *inStrides, const int *outStrides, const int *multiple) { | |||
| int srcDimSize = inShape[dim]; | |||
| if (dim == ndim - 1) { | |||
| for (int i = 0; i < multiple[dim]; i++) { | |||
| @@ -35,8 +35,8 @@ void TileOneDimension(float *inData, float *outData, int dim, size_t ndim, int * | |||
| } | |||
| } | |||
| void TileOneDimensionUint8(uint8_t *inData, uint8_t *outData, int dim, size_t ndim, int *inShape, int *inStrides, | |||
| int *outStrides, int *multiple) { | |||
| void TileOneDimensionUint8(const uint8_t *inData, uint8_t *outData, int dim, size_t ndim, const int *inShape, | |||
| const int *inStrides, const int *outStrides, const int *multiple) { | |||
| int srcDimSize = inShape[dim]; | |||
| if (dim == ndim - 1) { | |||
| for (int i = 0; i < multiple[dim]; i++) { | |||
| @@ -74,7 +74,8 @@ void CalcMultiplesAndStrides(ArithmeticParameter *param) { | |||
| ComputeStrides(param->out_shape_, param->out_strides_, param->ndim_); | |||
| } | |||
| void TileDimensions(float *data0, float *data1, float *tile_data0, float *tile_data1, ArithmeticParameter *param) { | |||
| void TileDimensions(const float *data0, const float *data1, float *tile_data0, float *tile_data1, | |||
| ArithmeticParameter *param) { | |||
| CalcMultiplesAndStrides(param); | |||
| TileOneDimension(data0, tile_data0, 0, param->ndim_, param->in_shape0_, param->in_strides0_, param->out_strides_, | |||
| param->multiples0_); | |||
| @@ -82,7 +83,7 @@ void TileDimensions(float *data0, float *data1, float *tile_data0, float *tile_d | |||
| param->multiples1_); | |||
| } | |||
| void TileDimensionsUint8(uint8_t *data0, uint8_t *data1, uint8_t *tile_data0, uint8_t *tile_data1, | |||
| void TileDimensionsUint8(const uint8_t *data0, const uint8_t *data1, uint8_t *tile_data0, uint8_t *tile_data1, | |||
| ArithmeticParameter *param) { | |||
| CalcMultiplesAndStrides(param); | |||
| TileOneDimensionUint8(data0, tile_data0, 0, param->ndim_, param->in_shape0_, param->in_strides0_, param->out_strides_, | |||
| @@ -91,7 +92,7 @@ void TileDimensionsUint8(uint8_t *data0, uint8_t *data1, uint8_t *tile_data0, ui | |||
| param->multiples1_); | |||
| } | |||
| void TileDimensionsInt8(int8_t *data0, int8_t *data1, int8_t *tile_data0, int8_t *tile_data1, | |||
| void TileDimensionsInt8(const int8_t *data0, const int8_t *data1, int8_t *tile_data0, int8_t *tile_data1, | |||
| ArithmeticParameter *param) { | |||
| CalcMultiplesAndStrides(param); | |||
| TileOneDimensionUint8((uint8_t *)(data0), (uint8_t *)(tile_data0), 0, param->ndim_, param->in_shape0_, | |||
| @@ -47,18 +47,19 @@ typedef struct ArithmeticParameter { | |||
| #ifdef __cplusplus | |||
| extern "C" { | |||
| #endif | |||
| void TileOneDimension(float *inData, float *outData, int dim, size_t ndim, int *inShape, int *inStrides, | |||
| int *outStrides, int *multiple); | |||
| void TileOneDimension(const float *inData, float *outData, int dim, size_t ndim, const int *inShape, | |||
| const int *inStrides, const int *outStrides, const int *multiple); | |||
| void ComputeStrides(const int *shape, int *strides, const int ndim); | |||
| void CalcMultiplesAndStrides(ArithmeticParameter *param); | |||
| void TileOneDimensionUint8(uint8_t *inData, uint8_t *outData, int dim, size_t ndim, int *inShape, int *inStrides, | |||
| int *outStrides, int *multiple); | |||
| void TileDimensions(float *data0, float *data1, float *tile_data0, float *tile_data1, ArithmeticParameter *param); | |||
| void TileDimensionsUint8(uint8_t *data0, uint8_t *data1, uint8_t *tile_data0, uint8_t *tile_data1, | |||
| void TileOneDimensionUint8(const uint8_t *inData, uint8_t *outData, int dim, size_t ndim, const int *inShape, | |||
| const int *inStrides, const int *outStrides, const int *multiple); | |||
| void TileDimensions(const float *data0, const float *data1, float *tile_data0, float *tile_data1, | |||
| ArithmeticParameter *param); | |||
| void TileDimensionsUint8(const uint8_t *data0, const uint8_t *data1, uint8_t *tile_data0, uint8_t *tile_data1, | |||
| ArithmeticParameter *param); | |||
| void TileDimensionsInt8(int8_t *data0, int8_t *data1, int8_t *tile_data0, int8_t *tile_data1, | |||
| void TileDimensionsInt8(const int8_t *data0, const int8_t *data1, int8_t *tile_data0, int8_t *tile_data1, | |||
| ArithmeticParameter *param); | |||
| #ifdef __cplusplus | |||
| } | |||
| @@ -16,6 +16,6 @@ | |||
| #include "nnacl/flatten.h" | |||
| #include <string.h> | |||
| void Flatten(const void *input, void *output, FlattenParameter *flatten_param) { | |||
| void Flatten(const void *input, void *output, const FlattenParameter *flatten_param) { | |||
| memcpy(output, input, flatten_param->size); | |||
| } | |||
| @@ -25,7 +25,7 @@ typedef struct FlattenParameter { | |||
| #ifdef __cplusplus | |||
| extern "C" { | |||
| #endif | |||
| void Flatten(const void *input, void *output, FlattenParameter *flatten_param); | |||
| void Flatten(const void *input, void *output, const FlattenParameter *flatten_param); | |||
| #ifdef __cplusplus | |||
| } | |||
| #endif | |||
| @@ -43,7 +43,7 @@ int ArgCompareDescFp32(const void *a, const void *b) { | |||
| return 0; | |||
| } | |||
| void ArgMaxDim0OutValue(const float *input, float *output, const int *in_shape, ArgMinMaxParameter *param) { | |||
| void ArgMaxDim0OutValue(const float *input, float *output, const int *in_shape, const ArgMinMaxParameter *param) { | |||
| for (int32_t i = 0; i < param->in_strides_[0]; ++i) { | |||
| for (int j = 0; j < in_shape[0]; ++j) { | |||
| size_t offset = param->in_strides_[0] * j + i; | |||
| @@ -58,7 +58,7 @@ void ArgMaxDim0OutValue(const float *input, float *output, const int *in_shape, | |||
| } | |||
| } | |||
| void ArgMaxDim0OutIndex(const float *input, float *output, const int *in_shape, ArgMinMaxParameter *param) { | |||
| void ArgMaxDim0OutIndex(const float *input, float *output, const int *in_shape, const ArgMinMaxParameter *param) { | |||
| for (int32_t i = 0; i < param->in_strides_[0]; ++i) { | |||
| for (int j = 0; j < in_shape[0]; ++j) { | |||
| size_t offset = param->in_strides_[0] * j + i; | |||
| @@ -73,7 +73,7 @@ void ArgMaxDim0OutIndex(const float *input, float *output, const int *in_shape, | |||
| } | |||
| } | |||
| void ArgMinDim0OutValue(const float *input, float *output, const int *in_shape, ArgMinMaxParameter *param) { | |||
| void ArgMinDim0OutValue(const float *input, float *output, const int *in_shape, const ArgMinMaxParameter *param) { | |||
| for (int32_t i = 0; i < param->in_strides_[0]; ++i) { | |||
| for (int j = 0; j < in_shape[0]; ++j) { | |||
| size_t offset = param->in_strides_[0] * j + i; | |||
| @@ -88,7 +88,7 @@ void ArgMinDim0OutValue(const float *input, float *output, const int *in_shape, | |||
| } | |||
| } | |||
| void ArgMinDim0OutIndex(const float *input, float *output, const int *in_shape, ArgMinMaxParameter *param) { | |||
| void ArgMinDim0OutIndex(const float *input, float *output, const int *in_shape, const ArgMinMaxParameter *param) { | |||
| for (int32_t i = 0; i < param->in_strides_[0]; ++i) { | |||
| for (int j = 0; j < in_shape[0]; ++j) { | |||
| size_t offset = param->in_strides_[0] * j + i; | |||
| @@ -103,7 +103,7 @@ void ArgMinDim0OutIndex(const float *input, float *output, const int *in_shape, | |||
| } | |||
| } | |||
| void ArgMaxDim1OutValue(const float *input, float *output, const int *in_shape, ArgMinMaxParameter *param) { | |||
| void ArgMaxDim1OutValue(const float *input, float *output, const int *in_shape, const ArgMinMaxParameter *param) { | |||
| int in_shape1 = in_shape[1]; | |||
| for (int i = 0; i < in_shape[0]; ++i) { | |||
| size_t in_dim0_offset = i * param->in_strides_[0]; | |||
| @@ -123,7 +123,7 @@ void ArgMaxDim1OutValue(const float *input, float *output, const int *in_shape, | |||
| } | |||
| } | |||
| void ArgMaxDim1OutIndex(const float *input, float *output, const int *in_shape, ArgMinMaxParameter *param) { | |||
| void ArgMaxDim1OutIndex(const float *input, float *output, const int *in_shape, const ArgMinMaxParameter *param) { | |||
| int in_shape1 = in_shape[1]; | |||
| for (int i = 0; i < in_shape[0]; ++i) { | |||
| size_t in_dim0_offset = i * param->in_strides_[0]; | |||
| @@ -143,7 +143,7 @@ void ArgMaxDim1OutIndex(const float *input, float *output, const int *in_shape, | |||
| } | |||
| } | |||
| void ArgMinDim1OutValue(const float *input, float *output, const int *in_shape, ArgMinMaxParameter *param) { | |||
| void ArgMinDim1OutValue(const float *input, float *output, const int *in_shape, const ArgMinMaxParameter *param) { | |||
| int in_shape1 = in_shape[1]; | |||
| for (int i = 0; i < in_shape[0]; ++i) { | |||
| size_t in_dim0_offset = i * param->in_strides_[0]; | |||
| @@ -163,7 +163,7 @@ void ArgMinDim1OutValue(const float *input, float *output, const int *in_shape, | |||
| } | |||
| } | |||
| void ArgMinDim1OutIndex(const float *input, float *output, const int *in_shape, ArgMinMaxParameter *param) { | |||
| void ArgMinDim1OutIndex(const float *input, float *output, const int *in_shape, const ArgMinMaxParameter *param) { | |||
| int in_shape1 = in_shape[1]; | |||
| for (int i = 0; i < in_shape[0]; ++i) { | |||
| size_t in_dim0_offset = i * param->in_strides_[0]; | |||
| @@ -183,7 +183,7 @@ void ArgMinDim1OutIndex(const float *input, float *output, const int *in_shape, | |||
| } | |||
| } | |||
| void ArgMaxDim2OutValue(const float *input, float *output, const int *in_shape, ArgMinMaxParameter *param) { | |||
| void ArgMaxDim2OutValue(const float *input, float *output, const int *in_shape, const ArgMinMaxParameter *param) { | |||
| int in_shape1 = in_shape[1]; | |||
| int in_shape2 = in_shape[2]; | |||
| for (int i = 0; i < in_shape[0]; ++i) { | |||
| @@ -208,7 +208,7 @@ void ArgMaxDim2OutValue(const float *input, float *output, const int *in_shape, | |||
| } | |||
| } | |||
| void ArgMaxDim2OutIndex(const float *input, float *output, const int *in_shape, ArgMinMaxParameter *param) { | |||
| void ArgMaxDim2OutIndex(const float *input, float *output, const int *in_shape, const ArgMinMaxParameter *param) { | |||
| int in_shape1 = in_shape[1]; | |||
| int in_shape2 = in_shape[2]; | |||
| for (int i = 0; i < in_shape[0]; ++i) { | |||
| @@ -233,7 +233,7 @@ void ArgMaxDim2OutIndex(const float *input, float *output, const int *in_shape, | |||
| } | |||
| } | |||
| void ArgMinDim2OutValue(const float *input, float *output, const int *in_shape, ArgMinMaxParameter *param) { | |||
| void ArgMinDim2OutValue(const float *input, float *output, const int *in_shape, const ArgMinMaxParameter *param) { | |||
| int in_shape1 = in_shape[1]; | |||
| int in_shape2 = in_shape[2]; | |||
| for (int i = 0; i < in_shape[0]; ++i) { | |||
| @@ -258,7 +258,7 @@ void ArgMinDim2OutValue(const float *input, float *output, const int *in_shape, | |||
| } | |||
| } | |||
| void ArgMinDim2OutIndex(const float *input, float *output, const int *in_shape, ArgMinMaxParameter *param) { | |||
| void ArgMinDim2OutIndex(const float *input, float *output, const int *in_shape, const ArgMinMaxParameter *param) { | |||
| int in_shape1 = in_shape[1]; | |||
| int in_shape2 = in_shape[2]; | |||
| for (int i = 0; i < in_shape[0]; ++i) { | |||
| @@ -283,7 +283,7 @@ void ArgMinDim2OutIndex(const float *input, float *output, const int *in_shape, | |||
| } | |||
| } | |||
| void ArgMaxDim3OutValue(const float *input, float *output, const int *in_shape, ArgMinMaxParameter *param) { | |||
| void ArgMaxDim3OutValue(const float *input, float *output, const int *in_shape, const ArgMinMaxParameter *param) { | |||
| int in_shape1 = in_shape[1]; | |||
| int in_shape2 = in_shape[2]; | |||
| int in_shape3 = in_shape[3]; | |||
| @@ -311,7 +311,7 @@ void ArgMaxDim3OutValue(const float *input, float *output, const int *in_shape, | |||
| } | |||
| } | |||
| void ArgMaxDim3OutIndex(const float *input, float *output, const int *in_shape, ArgMinMaxParameter *param) { | |||
| void ArgMaxDim3OutIndex(const float *input, float *output, const int *in_shape, const ArgMinMaxParameter *param) { | |||
| int in_shape1 = in_shape[1]; | |||
| int in_shape2 = in_shape[2]; | |||
| int in_shape3 = in_shape[3]; | |||
| @@ -339,7 +339,7 @@ void ArgMaxDim3OutIndex(const float *input, float *output, const int *in_shape, | |||
| } | |||
| } | |||
| void ArgMinDim3OutValue(const float *input, float *output, const int *in_shape, ArgMinMaxParameter *param) { | |||
| void ArgMinDim3OutValue(const float *input, float *output, const int *in_shape, const ArgMinMaxParameter *param) { | |||
| int in_shape1 = in_shape[1]; | |||
| int in_shape2 = in_shape[2]; | |||
| int in_shape3 = in_shape[3]; | |||
| @@ -367,7 +367,7 @@ void ArgMinDim3OutValue(const float *input, float *output, const int *in_shape, | |||
| } | |||
| } | |||
| void ArgMinDim3OutIndex(const float *input, float *output, const int *in_shape, ArgMinMaxParameter *param) { | |||
| void ArgMinDim3OutIndex(const float *input, float *output, const int *in_shape, const ArgMinMaxParameter *param) { | |||
| int in_shape1 = in_shape[1]; | |||
| int in_shape2 = in_shape[2]; | |||
| int in_shape3 = in_shape[3]; | |||
| @@ -395,7 +395,7 @@ void ArgMinDim3OutIndex(const float *input, float *output, const int *in_shape, | |||
| } | |||
| } | |||
| void ArgMaxDim0(const float *input, float *output, const int *in_shape, ArgMinMaxParameter *param) { | |||
| void ArgMaxDim0(const float *input, float *output, const int *in_shape, const ArgMinMaxParameter *param) { | |||
| if (param->out_value_) { | |||
| ArgMaxDim0OutValue(input, output, in_shape, param); | |||
| } else { | |||
| @@ -403,7 +403,7 @@ void ArgMaxDim0(const float *input, float *output, const int *in_shape, ArgMinMa | |||
| } | |||
| } | |||
| void ArgMinDim0(const float *input, float *output, const int *in_shape, ArgMinMaxParameter *param) { | |||
| void ArgMinDim0(const float *input, float *output, const int *in_shape, const ArgMinMaxParameter *param) { | |||
| if (param->out_value_) { | |||
| ArgMinDim0OutValue(input, output, in_shape, param); | |||
| } else { | |||
| @@ -411,7 +411,7 @@ void ArgMinDim0(const float *input, float *output, const int *in_shape, ArgMinMa | |||
| } | |||
| } | |||
| void ArgMaxDim1(const float *input, float *output, const int *in_shape, ArgMinMaxParameter *param) { | |||
| void ArgMaxDim1(const float *input, float *output, const int *in_shape, const ArgMinMaxParameter *param) { | |||
| if (param->out_value_) { | |||
| ArgMaxDim1OutValue(input, output, in_shape, param); | |||
| } else { | |||
| @@ -419,7 +419,7 @@ void ArgMaxDim1(const float *input, float *output, const int *in_shape, ArgMinMa | |||
| } | |||
| } | |||
| void ArgMinDim1(const float *input, float *output, const int *in_shape, ArgMinMaxParameter *param) { | |||
| void ArgMinDim1(const float *input, float *output, const int *in_shape, const ArgMinMaxParameter *param) { | |||
| if (param->out_value_) { | |||
| ArgMinDim1OutValue(input, output, in_shape, param); | |||
| } else { | |||
| @@ -427,7 +427,7 @@ void ArgMinDim1(const float *input, float *output, const int *in_shape, ArgMinMa | |||
| } | |||
| } | |||
| void ArgMaxDim2(const float *input, float *output, const int *in_shape, ArgMinMaxParameter *param) { | |||
| void ArgMaxDim2(const float *input, float *output, const int *in_shape, const ArgMinMaxParameter *param) { | |||
| if (param->out_value_) { | |||
| ArgMaxDim2OutValue(input, output, in_shape, param); | |||
| } else { | |||
| @@ -435,7 +435,7 @@ void ArgMaxDim2(const float *input, float *output, const int *in_shape, ArgMinMa | |||
| } | |||
| } | |||
| void ArgMinDim2(const float *input, float *output, const int *in_shape, ArgMinMaxParameter *param) { | |||
| void ArgMinDim2(const float *input, float *output, const int *in_shape, const ArgMinMaxParameter *param) { | |||
| if (param->out_value_) { | |||
| ArgMinDim2OutValue(input, output, in_shape, param); | |||
| } else { | |||
| @@ -443,7 +443,7 @@ void ArgMinDim2(const float *input, float *output, const int *in_shape, ArgMinMa | |||
| } | |||
| } | |||
| void ArgMaxDim3(const float *input, float *output, const int *in_shape, ArgMinMaxParameter *param) { | |||
| void ArgMaxDim3(const float *input, float *output, const int *in_shape, const ArgMinMaxParameter *param) { | |||
| if (param->out_value_) { | |||
| ArgMaxDim3OutValue(input, output, in_shape, param); | |||
| } else { | |||
| @@ -451,7 +451,7 @@ void ArgMaxDim3(const float *input, float *output, const int *in_shape, ArgMinMa | |||
| } | |||
| } | |||
| void ArgMinDim3(const float *input, float *output, const int *in_shape, ArgMinMaxParameter *param) { | |||
| void ArgMinDim3(const float *input, float *output, const int *in_shape, const ArgMinMaxParameter *param) { | |||
| if (param->out_value_) { | |||
| ArgMinDim3OutValue(input, output, in_shape, param); | |||
| } else { | |||
| @@ -459,7 +459,7 @@ void ArgMinDim3(const float *input, float *output, const int *in_shape, ArgMinMa | |||
| } | |||
| } | |||
| void ArgMax(const float *input, float *output, ArgMinMaxParameter *param, int pre_axis_count, int axis_count, | |||
| void ArgMax(const float *input, float *output, const ArgMinMaxParameter *param, int pre_axis_count, int axis_count, | |||
| int after_axis_count) { | |||
| bool out_value = param->out_value_; | |||
| for (int i = 0; i < pre_axis_count; ++i) { | |||
| @@ -480,7 +480,7 @@ void ArgMax(const float *input, float *output, ArgMinMaxParameter *param, int pr | |||
| } | |||
| } | |||
| void ArgMin(const float *input, float *output, ArgMinMaxParameter *param, int pre_axis_count, int axis_count, | |||
| void ArgMin(const float *input, float *output, const ArgMinMaxParameter *param, int pre_axis_count, int axis_count, | |||
| int after_axis_count) { | |||
| bool out_value = param->out_value_; | |||
| for (int i = 0; i < pre_axis_count; ++i) { | |||
| @@ -21,18 +21,18 @@ | |||
| #ifdef __cplusplus | |||
| extern "C" { | |||
| #endif | |||
| void ArgMax(const float *input, float *output, ArgMinMaxParameter *param, int pre_axis_count, int axis_count, | |||
| void ArgMax(const float *input, float *output, const ArgMinMaxParameter *param, int pre_axis_count, int axis_count, | |||
| int after_axis_count); | |||
| void ArgMin(const float *input, float *output, ArgMinMaxParameter *param, int pre_axis_count, int axis_count, | |||
| void ArgMin(const float *input, float *output, const ArgMinMaxParameter *param, int pre_axis_count, int axis_count, | |||
| int after_axis_count); | |||
| void ArgMaxDim0(const float *input, float *output, const int *in_shape, ArgMinMaxParameter *param); | |||
| void ArgMinDim0(const float *input, float *output, const int *in_shape, ArgMinMaxParameter *param); | |||
| void ArgMaxDim1(const float *input, float *output, const int *in_shape, ArgMinMaxParameter *param); | |||
| void ArgMinDim1(const float *input, float *output, const int *in_shape, ArgMinMaxParameter *param); | |||
| void ArgMaxDim2(const float *input, float *output, const int *in_shape, ArgMinMaxParameter *param); | |||
| void ArgMinDim2(const float *input, float *output, const int *in_shape, ArgMinMaxParameter *param); | |||
| void ArgMaxDim3(const float *input, float *output, const int *in_shape, ArgMinMaxParameter *param); | |||
| void ArgMinDim3(const float *input, float *output, const int *in_shape, ArgMinMaxParameter *param); | |||
| void ArgMaxDim0(const float *input, float *output, const int *in_shape, const ArgMinMaxParameter *param); | |||
| void ArgMinDim0(const float *input, float *output, const int *in_shape, const ArgMinMaxParameter *param); | |||
| void ArgMaxDim1(const float *input, float *output, const int *in_shape, const ArgMinMaxParameter *param); | |||
| void ArgMinDim1(const float *input, float *output, const int *in_shape, const ArgMinMaxParameter *param); | |||
| void ArgMaxDim2(const float *input, float *output, const int *in_shape, const ArgMinMaxParameter *param); | |||
| void ArgMinDim2(const float *input, float *output, const int *in_shape, const ArgMinMaxParameter *param); | |||
| void ArgMaxDim3(const float *input, float *output, const int *in_shape, const ArgMinMaxParameter *param); | |||
| void ArgMinDim3(const float *input, float *output, const int *in_shape, const ArgMinMaxParameter *param); | |||
| #ifdef __cplusplus | |||
| } | |||
| #endif | |||
| @@ -615,8 +615,8 @@ int ElementMulRelu6Int(const int *input0, const int *input1, int *output, const | |||
| return NNACL_OK; | |||
| } | |||
| int BroadcastMul(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output, int element_size, | |||
| ArithmeticParameter *param) { | |||
| int BroadcastMul(const float *input0, const float *input1, float *tile_input0, float *tile_input1, float *output, | |||
| int element_size, ArithmeticParameter *param) { | |||
| TileDimensions(input0, input1, tile_input0, tile_input1, param); | |||
| return ElementMul(tile_input0, tile_input1, output, element_size); | |||
| } | |||
| @@ -690,21 +690,21 @@ int ElementAddInt(const int *input0, const int *input1, int *output, const int e | |||
| return NNACL_OK; | |||
| } | |||
| int ElementAddInt8(int8_t *input0, int8_t *input1, int8_t *output, int element_size) { | |||
| int ElementAddInt8(const int8_t *input0, const int8_t *input1, int8_t *output, int element_size) { | |||
| for (int i = 0; i < element_size; i++) { | |||
| output[i] = input0[i] + input1[i]; | |||
| } | |||
| return NNACL_OK; | |||
| } | |||
| int BroadcastAdd(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output, int element_size, | |||
| ArithmeticParameter *param) { | |||
| int BroadcastAdd(const float *input0, const float *input1, float *tile_input0, float *tile_input1, float *output, | |||
| int element_size, ArithmeticParameter *param) { | |||
| TileDimensions(input0, input1, tile_input0, tile_input1, param); | |||
| return ElementAdd(tile_input0, tile_input1, output, element_size); | |||
| } | |||
| int BroadcastAddInt8(int8_t *input0, int8_t *input1, int8_t *tile_input0, int8_t *tile_input1, int8_t *output, | |||
| int element_size, ArithmeticParameter *param) { | |||
| int BroadcastAddInt8(const int8_t *input0, const int8_t *input1, int8_t *tile_input0, int8_t *tile_input1, | |||
| int8_t *output, int element_size, ArithmeticParameter *param) { | |||
| TileDimensionsInt8(input0, input1, tile_input0, tile_input1, param); | |||
| return ElementAddInt8(tile_input0, tile_input1, output, element_size); | |||
| } | |||
| @@ -763,8 +763,8 @@ int ElementSubRelu6(const float *input0, const float *input1, float *output, con | |||
| return NNACL_OK; | |||
| } | |||
| int BroadcastSub(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output, int element_size, | |||
| ArithmeticParameter *param) { | |||
| int BroadcastSub(const float *input0, const float *input1, float *tile_input0, float *tile_input1, float *output, | |||
| int element_size, ArithmeticParameter *param) { | |||
| TileDimensions(input0, input1, tile_input0, tile_input1, param); | |||
| return ElementSub(tile_input0, tile_input1, output, element_size); | |||
| } | |||
| @@ -791,8 +791,8 @@ int ElementDivRelu6(const float *input0, const float *input1, float *output, con | |||
| return NNACL_OK; | |||
| } | |||
| int BroadcastDiv(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output, int element_size, | |||
| ArithmeticParameter *param) { | |||
| int BroadcastDiv(const float *input0, const float *input1, float *tile_input0, float *tile_input1, float *output, | |||
| int element_size, ArithmeticParameter *param) { | |||
| TileDimensions(input0, input1, tile_input0, tile_input1, param); | |||
| return ElementDiv(tile_input0, tile_input1, output, element_size); | |||
| } | |||
| @@ -811,7 +811,7 @@ int ElementFloorModInt(const int *input0, const int *input1, int *output, const | |||
| return NNACL_OK; | |||
| } | |||
| int BroadcastFloorMod(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output, | |||
| int BroadcastFloorMod(const float *input0, const float *input1, float *tile_input0, float *tile_input1, float *output, | |||
| int element_size, ArithmeticParameter *param) { | |||
| TileDimensions(input0, input1, tile_input0, tile_input1, param); | |||
| return ElementFloorMod(tile_input0, tile_input1, output, element_size); | |||
| @@ -831,7 +831,7 @@ int ElementFloorDivInt(const int *input0, const int *input1, int *output, const | |||
| return NNACL_OK; | |||
| } | |||
| int BroadcastFloorDiv(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output, | |||
| int BroadcastFloorDiv(const float *input0, const float *input1, float *tile_input0, float *tile_input1, float *output, | |||
| int element_size, ArithmeticParameter *param) { | |||
| TileDimensions(input0, input1, tile_input0, tile_input1, param); | |||
| return ElementFloorDiv(tile_input0, tile_input1, output, element_size); | |||
| @@ -862,13 +862,13 @@ int ElementSquaredDifference(const float *input0, const float *input1, float *ou | |||
| return ElementMul(output, output, output, element_size); | |||
| } | |||
| int BroadcastSquaredDifference(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output, | |||
| int element_size, ArithmeticParameter *param) { | |||
| int BroadcastSquaredDifference(const float *input0, const float *input1, float *tile_input0, float *tile_input1, | |||
| float *output, int element_size, ArithmeticParameter *param) { | |||
| BroadcastSub(input0, input1, tile_input0, tile_input1, output, element_size, param); | |||
| return ElementMul(output, output, output, element_size); | |||
| } | |||
| int BroadcastLogicalAnd(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output, | |||
| int BroadcastLogicalAnd(const float *input0, const float *input1, float *tile_input0, float *tile_input1, float *output, | |||
| int element_size, ArithmeticParameter *param) { | |||
| TileDimensions(input0, input1, tile_input0, tile_input1, param); | |||
| return ElementLogicalAnd(tile_input0, tile_input1, output, element_size); | |||
| @@ -894,7 +894,7 @@ int ElementLogicalOr(const float *input0, const float *input1, float *output, co | |||
| return NNACL_OK; | |||
| } | |||
| int BroadcastLogicalOr(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output, | |||
| int BroadcastLogicalOr(const float *input0, const float *input1, float *tile_input0, float *tile_input1, float *output, | |||
| int element_size, ArithmeticParameter *param) { | |||
| TileDimensions(input0, input1, tile_input0, tile_input1, param); | |||
| return ElementLogicalOr(tile_input0, tile_input1, output, element_size); | |||
| @@ -916,7 +916,7 @@ int ElementMaximum(const float *input0, const float *input1, float *output, cons | |||
| return NNACL_OK; | |||
| } | |||
| int BroadcastMaximum(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output, | |||
| int BroadcastMaximum(const float *input0, const float *input1, float *tile_input0, float *tile_input1, float *output, | |||
| int element_size, ArithmeticParameter *param) { | |||
| TileDimensions(input0, input1, tile_input0, tile_input1, param); | |||
| return ElementMaximum(tile_input0, tile_input1, output, element_size); | |||
| @@ -938,7 +938,7 @@ int ElementMinimum(const float *input0, const float *input1, float *output, cons | |||
| return NNACL_OK; | |||
| } | |||
| int BroadcastMinimum(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output, | |||
| int BroadcastMinimum(const float *input0, const float *input1, float *tile_input0, float *tile_input1, float *output, | |||
| int element_size, ArithmeticParameter *param) { | |||
| TileDimensions(input0, input1, tile_input0, tile_input1, param); | |||
| return ElementMinimum(tile_input0, tile_input1, output, element_size); | |||
| @@ -970,7 +970,7 @@ int ElementNotEqual(const float *input0, const float *input1, float *output, con | |||
| return NNACL_OK; | |||
| } | |||
| int BroadcastNotEqual(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output, | |||
| int BroadcastNotEqual(const float *input0, const float *input1, float *tile_input0, float *tile_input1, float *output, | |||
| int element_size, ArithmeticParameter *param) { | |||
| TileDimensions(input0, input1, tile_input0, tile_input1, param); | |||
| return ElementNotEqual(tile_input0, tile_input1, output, element_size); | |||
| @@ -1002,7 +1002,7 @@ int ElementEqual(const float *input0, const float *input1, float *output, const | |||
| return NNACL_OK; | |||
| } | |||
| int BroadcastEqual(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output, | |||
| int BroadcastEqual(const float *input0, const float *input1, float *tile_input0, float *tile_input1, float *output, | |||
| int element_size, ArithmeticParameter *param) { | |||
| TileDimensions(input0, input1, tile_input0, tile_input1, param); | |||
| return ElementEqual(tile_input0, tile_input1, output, element_size); | |||
| @@ -1026,8 +1026,8 @@ int ElementLess(const float *input0, const float *input1, float *output, const i | |||
| return NNACL_OK; | |||
| } | |||
| int BroadcastLess(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output, int element_size, | |||
| ArithmeticParameter *param) { | |||
| int BroadcastLess(const float *input0, const float *input1, float *tile_input0, float *tile_input1, float *output, | |||
| int element_size, ArithmeticParameter *param) { | |||
| TileDimensions(input0, input1, tile_input0, tile_input1, param); | |||
| return ElementLess(tile_input0, tile_input1, output, element_size); | |||
| } | |||
| @@ -1050,7 +1050,7 @@ int ElementLessEqual(const float *input0, const float *input1, float *output, co | |||
| return NNACL_OK; | |||
| } | |||
| int BroadcastLessEqual(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output, | |||
| int BroadcastLessEqual(const float *input0, const float *input1, float *tile_input0, float *tile_input1, float *output, | |||
| int element_size, ArithmeticParameter *param) { | |||
| TileDimensions(input0, input1, tile_input0, tile_input1, param); | |||
| return ElementLessEqual(tile_input0, tile_input1, output, element_size); | |||
| @@ -1074,7 +1074,7 @@ int ElementGreater(const float *input0, const float *input1, float *output, cons | |||
| return NNACL_OK; | |||
| } | |||
| int BroadcastGreater(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output, | |||
| int BroadcastGreater(const float *input0, const float *input1, float *tile_input0, float *tile_input1, float *output, | |||
| int element_size, ArithmeticParameter *param) { | |||
| TileDimensions(input0, input1, tile_input0, tile_input1, param); | |||
| return ElementGreater(tile_input0, tile_input1, output, element_size); | |||
| @@ -1098,8 +1098,8 @@ int ElementGreaterEqual(const float *input0, const float *input1, float *output, | |||
| return NNACL_OK; | |||
| } | |||
| int BroadcastGreaterEqual(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output, | |||
| int element_size, ArithmeticParameter *param) { | |||
| int BroadcastGreaterEqual(const float *input0, const float *input1, float *tile_input0, float *tile_input1, | |||
| float *output, int element_size, ArithmeticParameter *param) { | |||
| TileDimensions(input0, input1, tile_input0, tile_input1, param); | |||
| return ElementGreaterEqual(tile_input0, tile_input1, output, element_size); | |||
| } | |||
| @@ -1111,7 +1111,7 @@ int ArithmeticInferShape(int **in_shape, size_t *dim_size, int *out_shape, int * | |||
| int *in_datatype, int *out_datatype, OpParameter *param) { | |||
| *out_format = in_format[0]; | |||
| *out_datatype = in_datatype[0]; | |||
| ArithmeticParameter *arithmetic_parameter = (ArithmeticParameter *)param; | |||
| const ArithmeticParameter *arithmetic_parameter = (const ArithmeticParameter *)param; | |||
| int ndim0 = dim_size[0]; | |||
| int ndim1 = dim_size[1]; | |||
| int *in_shape0 = in_shape[0]; | |||
| @@ -64,85 +64,85 @@ int ElementMulRelu6(const float *input0, const float *input1, float *output, con | |||
| int ElementMulInt(const int *input0, const int *input1, int *output, const int element_size); | |||
| int ElementMulReluInt(const int *input0, const int *input1, int *output, const int element_size); | |||
| int ElementMulRelu6Int(const int *input0, const int *input1, int *output, const int element_size); | |||
| int BroadcastMul(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output, int element_size, | |||
| ArithmeticParameter *param); | |||
| int BroadcastMul(const float *input0, const float *input1, float *tile_input0, float *tile_input1, float *output, | |||
| int element_size, ArithmeticParameter *param); | |||
| int ElementAdd(const float *input0, const float *input1, float *output, const int element_size); | |||
| int ElementAddRelu(const float *input0, const float *input1, float *output, const int element_size); | |||
| int ElementAddRelu6(const float *input0, const float *input1, float *output, const int element_size); | |||
| int ElementAddInt(const int *input0, const int *input1, int *output, const int element_size); | |||
| int BroadcastAdd(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output, int element_size, | |||
| ArithmeticParameter *param); | |||
| int BroadcastAddInt8(int8_t *input0, int8_t *input1, int8_t *tile_input0, int8_t *tile_input1, int8_t *output, | |||
| int element_size, ArithmeticParameter *param); | |||
| int BroadcastAdd(const float *input0, const float *input1, float *tile_input0, float *tile_input1, float *output, | |||
| int element_size, ArithmeticParameter *param); | |||
| int BroadcastAddInt8(const int8_t *input0, const int8_t *input1, int8_t *tile_input0, int8_t *tile_input1, | |||
| int8_t *output, int element_size, ArithmeticParameter *param); | |||
| int ElementSub(const float *input0, const float *input1, float *output, const int element_size); | |||
| int ElementSubRelu(const float *input0, const float *input1, float *output, const int element_size); | |||
| int ElementSubRelu6(const float *input0, const float *input1, float *output, const int element_size); | |||
| int BroadcastSub(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output, int element_size, | |||
| ArithmeticParameter *param); | |||
| int BroadcastSub(const float *input0, const float *input1, float *tile_input0, float *tile_input1, float *output, | |||
| int element_size, ArithmeticParameter *param); | |||
| int ElementDiv(const float *input0, const float *input1, float *output, const int element_size); | |||
| int ElementDivRelu(const float *input0, const float *input1, float *output, const int element_size); | |||
| int ElementDivRelu6(const float *input0, const float *input1, float *output, const int element_size); | |||
| int BroadcastDiv(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output, int element_size, | |||
| ArithmeticParameter *param); | |||
| int BroadcastDiv(const float *input0, const float *input1, float *tile_input0, float *tile_input1, float *output, | |||
| int element_size, ArithmeticParameter *param); | |||
| int ElementLogicalAnd(const float *input0, const float *input1, float *output, const int element_size); | |||
| int BroadcastLogicalAnd(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output, | |||
| int BroadcastLogicalAnd(const float *input0, const float *input1, float *tile_input0, float *tile_input1, float *output, | |||
| int element_size, ArithmeticParameter *param); | |||
| int ElementLogicalOr(const float *input0, const float *input1, float *output, const int element_size); | |||
| int BroadcastLogicalOr(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output, | |||
| int BroadcastLogicalOr(const float *input0, const float *input1, float *tile_input0, float *tile_input1, float *output, | |||
| int element_size, ArithmeticParameter *param); | |||
| int ElementMaximum(const float *input0, const float *input1, float *output, const int element_size); | |||
| int BroadcastMaximum(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output, | |||
| int BroadcastMaximum(const float *input0, const float *input1, float *tile_input0, float *tile_input1, float *output, | |||
| int element_size, ArithmeticParameter *param); | |||
| int ElementMinimum(const float *input0, const float *input1, float *output, const int element_size); | |||
| int BroadcastMinimum(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output, | |||
| int BroadcastMinimum(const float *input0, const float *input1, float *tile_input0, float *tile_input1, float *output, | |||
| int element_size, ArithmeticParameter *param); | |||
| int ElementFloorDiv(const float *input0, const float *input1, float *output, const int element_size); | |||
| int ElementFloorDivInt(const int *input0, const int *input1, int *output, const int element_size); | |||
| int BroadcastFloorDiv(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output, | |||
| int BroadcastFloorDiv(const float *input0, const float *input1, float *tile_input0, float *tile_input1, float *output, | |||
| int element_size, ArithmeticParameter *param); | |||
| int ElementFloorMod(const float *input0, const float *input1, float *output, const int element_size); | |||
| int ElementFloorModInt(const int *input0, const int *input1, int *output, const int element_size); | |||
| int BroadcastFloorMod(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output, | |||
| int BroadcastFloorMod(const float *input0, const float *input1, float *tile_input0, float *tile_input1, float *output, | |||
| int element_size, ArithmeticParameter *param); | |||
| int ElementSquaredDifference(const float *input0, const float *input1, float *output, const int element_size); | |||
| int BroadcastSquaredDifference(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output, | |||
| int element_size, ArithmeticParameter *param); | |||
| int BroadcastSquaredDifference(const float *input0, const float *input1, float *tile_input0, float *tile_input1, | |||
| float *output, int element_size, ArithmeticParameter *param); | |||
| int ElementNotEqual(const float *input0, const float *input1, float *output, const int element_size); | |||
| int BroadcastNotEqual(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output, | |||
| int BroadcastNotEqual(const float *input0, const float *input1, float *tile_input0, float *tile_input1, float *output, | |||
| int element_size, ArithmeticParameter *param); | |||
| int ElementEqual(const float *input0, const float *input1, float *output, const int element_size); | |||
| int BroadcastEqual(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output, | |||
| int BroadcastEqual(const float *input0, const float *input1, float *tile_input0, float *tile_input1, float *output, | |||
| int element_size, ArithmeticParameter *param); | |||
| int ElementLess(const float *input0, const float *input1, float *output, const int element_size); | |||
| int BroadcastLess(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output, int element_size, | |||
| ArithmeticParameter *param); | |||
| int BroadcastLess(const float *input0, const float *input1, float *tile_input0, float *tile_input1, float *output, | |||
| int element_size, ArithmeticParameter *param); | |||
| int ElementLessEqual(const float *input0, const float *input1, float *output, const int element_size); | |||
| int BroadcastLessEqual(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output, | |||
| int BroadcastLessEqual(const float *input0, const float *input1, float *tile_input0, float *tile_input1, float *output, | |||
| int element_size, ArithmeticParameter *param); | |||
| int ElementGreater(const float *input0, const float *input1, float *output, const int element_size); | |||
| int BroadcastGreater(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output, | |||
| int BroadcastGreater(const float *input0, const float *input1, float *tile_input0, float *tile_input1, float *output, | |||
| int element_size, ArithmeticParameter *param); | |||
| int ElementGreaterEqual(const float *input0, const float *input1, float *output, const int element_size); | |||
| int BroadcastGreaterEqual(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output, | |||
| int element_size, ArithmeticParameter *param); | |||
| int BroadcastGreaterEqual(const float *input0, const float *input1, float *tile_input0, float *tile_input1, | |||
| float *output, int element_size, ArithmeticParameter *param); | |||
| #ifdef ENABLE_NNACL_INFER_SHAPE | |||
| int ArithmeticInferShape(int **in_shape, size_t *dim_size, int *out_shape, int *in_format, int *out_format, | |||
| @@ -19,8 +19,8 @@ | |||
| #include "nnacl/batchnorm_parameter.h" | |||
| #include "nnacl/op_base.h" | |||
| void BatchNormFp32(const void *input, const void *mean, const void *variance, BatchNormParameter *param, int task_id, | |||
| void *output) { | |||
| void BatchNormFp32(const void *input, const void *mean, const void *variance, const BatchNormParameter *param, | |||
| int task_id, void *output) { | |||
| int units_per_thread = UP_DIV(param->unit_, param->op_parameter_.thread_num_); | |||
| int completed_units = task_id * units_per_thread; | |||
| int cur_unit = MSMIN(units_per_thread, param->unit_ - completed_units); | |||
| @@ -37,7 +37,7 @@ void BatchNormFp32(const void *input, const void *mean, const void *variance, Ba | |||
| } | |||
| void FusedBatchNormFp32(const void *input, const void *scale, const void *offset, const void *mean, | |||
| const void *variance, BatchNormParameter *param, int task_id, void *output) { | |||
| const void *variance, const BatchNormParameter *param, int task_id, void *output) { | |||
| int units_per_thread = UP_DIV(param->unit_, param->op_parameter_.thread_num_); | |||
| int completed_units = task_id * units_per_thread; | |||
| int cur_unit = MSMIN(units_per_thread, param->unit_ - completed_units); | |||
| @@ -53,7 +53,7 @@ void FusedBatchNormFp32(const void *input, const void *scale, const void *offset | |||
| } | |||
| } | |||
| void FusedBatchNormFp32MeanVar(const float *input, float *run_mean, float *run_var, BatchNormParameter *param, | |||
| void FusedBatchNormFp32MeanVar(const float *input, float *run_mean, float *run_var, const BatchNormParameter *param, | |||
| float *save_mean, float *save_var) { | |||
| const float N = (float)param->unit_; | |||
| const float VN = N; | |||
| @@ -23,12 +23,12 @@ | |||
| extern "C" { | |||
| #endif | |||
| void BatchNormFp32(const void *input, const void *mean, const void *variance, BatchNormParameter *param, int task_id, | |||
| void *output); | |||
| void BatchNormFp32(const void *input, const void *mean, const void *variance, const BatchNormParameter *param, | |||
| int task_id, void *output); | |||
| void FusedBatchNormFp32(const void *input, const void *scale, const void *offset, const void *mean, | |||
| const void *variance, BatchNormParameter *param, int task_id, void *output); | |||
| const void *variance, const BatchNormParameter *param, int task_id, void *output); | |||
| void FusedBatchNormFp32MeanVar(const float *input, float *run_mean, float *run_var, BatchNormParameter *param, | |||
| void FusedBatchNormFp32MeanVar(const float *input, float *run_mean, float *run_var, const BatchNormParameter *param, | |||
| float *save_mean, float *save_var); | |||
| #ifdef __cplusplus | |||
| } | |||
| @@ -17,8 +17,8 @@ | |||
| #include "nnacl/fp32/concat_fp32.h" | |||
| #include <string.h> | |||
| void Concat(void **input, int input_num, int axis, int **inputs_output_shape, size_t shape_size, void *output, | |||
| int task_id, int thread_num) { | |||
| void Concat(const void **input, int input_num, int axis, const int **inputs_output_shape, size_t shape_size, | |||
| void *output, int task_id, int thread_num) { | |||
| int before_axis_size = 1; | |||
| for (int i = 0; i < axis; ++i) { | |||
| before_axis_size *= inputs_output_shape[0][i]; | |||
| @@ -32,12 +32,12 @@ void Concat(void **input, int input_num, int axis, int **inputs_output_shape, si | |||
| uint8_t *dst_base = (output); | |||
| size_t output_stride = after_axis_size * inputs_output_shape[input_num][axis]; | |||
| for (int i = 0; i < input_num; ++i) { | |||
| uint8_t *src_base = (input[i]); | |||
| const uint8_t *src_base = (input[i]); | |||
| size_t input_stride = after_axis_size * inputs_output_shape[i][axis]; | |||
| int offset = UP_DIV(input_stride, thread_num); | |||
| int count = MSMIN(offset, input_stride - offset * task_id); | |||
| for (int j = 0; j < before_axis_size; j++) { | |||
| uint8_t *src = src_base + j * input_stride + task_id * offset; | |||
| const uint8_t *src = src_base + j * input_stride + task_id * offset; | |||
| uint8_t *dst = dst_base + j * output_stride + axis_offset * after_axis_size + task_id * offset; | |||
| memcpy(dst, src, count); | |||
| } | |||
| @@ -22,8 +22,8 @@ | |||
| #ifdef __cplusplus | |||
| extern "C" { | |||
| #endif | |||
| void Concat(void **input, int input_num, int axis, int **inputs_output_shape, size_t shape_size, void *output, | |||
| int task_id, int thread_num); | |||
| void Concat(const void **input, int input_num, int axis, const int **inputs_output_shape, size_t shape_size, | |||
| void *output, int task_id, int thread_num); | |||
| #ifdef __cplusplus | |||
| } | |||
| #endif | |||
| @@ -16,7 +16,7 @@ | |||
| #include "nnacl/fp32/constant_of_shape_fp32.h" | |||
| int ConstantOfShape(float *output, int tid, ConstantOfShapeParameter *param) { | |||
| int ConstantOfShape(float *output, int tid, const ConstantOfShapeParameter *param) { | |||
| int size = param->unit_; | |||
| float data = param->value_; | |||
| int ind_st = MSMIN(tid * size, param->element_sz_); | |||
| @@ -27,7 +27,7 @@ int ConstantOfShape(float *output, int tid, ConstantOfShapeParameter *param) { | |||
| return NNACL_OK; | |||
| } | |||
| int ConstantOfShapeInt(int32_t *output, int tid, ConstantOfShapeParameter *param) { | |||
| int ConstantOfShapeInt(int32_t *output, int tid, const ConstantOfShapeParameter *param) { | |||
| int size = param->unit_; | |||
| float data = param->value_; | |||
| int ind_st = MSMIN(tid * size, param->element_sz_); | |||
| @@ -33,8 +33,8 @@ typedef struct ConstantOfShapeParameter { | |||
| #ifdef __cplusplus | |||
| extern "C" { | |||
| #endif | |||
| int ConstantOfShape(float *output, int tid, ConstantOfShapeParameter *param); | |||
| int ConstantOfShapeInt(int32_t *output, int tid, ConstantOfShapeParameter *param); | |||
| int ConstantOfShape(float *output, int tid, const ConstantOfShapeParameter *param); | |||
| int ConstantOfShapeInt(int32_t *output, int tid, const ConstantOfShapeParameter *param); | |||
| #ifdef __cplusplus | |||
| } | |||
| #endif | |||
| @@ -22,7 +22,7 @@ | |||
| // fp32 conv common | |||
| void ConvFp32(const float *input_data, float *packed_input, const float *packed_weight, const float *bias_data, | |||
| float *col_major_input, float *output_data, int task_id, ConvParameter *conv_param) { | |||
| float *col_major_input, float *output_data, int task_id, const ConvParameter *conv_param) { | |||
| int out_channel = conv_param->output_channel_; | |||
| int deep = conv_param->kernel_h_ * conv_param->kernel_w_ * conv_param->input_channel_; | |||
| int output_count = conv_param->output_h_ * conv_param->output_w_; | |||
| @@ -61,8 +61,8 @@ void ConvFp32(const float *input_data, float *packed_input, const float *packed_ | |||
| // fp32 conv winograd | |||
| void ConvWinogardFp32(const float *input_data, const float *trans_weight, const float *bias_data, float *output_data, | |||
| TmpBufferAddress *buffer_list, int task_id, ConvParameter *conv_param, InputTransFunc in_func, | |||
| OutputTransFunc out_func) { | |||
| TmpBufferAddress *buffer_list, int task_id, const ConvParameter *conv_param, | |||
| InputTransFunc in_func, OutputTransFunc out_func) { | |||
| int in_channel = conv_param->input_channel_; | |||
| int out_w_block = UP_DIV(conv_param->output_w_, conv_param->output_unit_); | |||
| int out_h_block = UP_DIV(conv_param->output_h_, conv_param->output_unit_); | |||
| @@ -35,12 +35,12 @@ extern "C" { | |||
| // fp32 convolution common (im2col+gemm) | |||
| void ConvFp32(const float *input_data, float *packed_input, const float *packed_weight, const float *bias_data, | |||
| float *col_major_input, float *output_data, int task_id, ConvParameter *conv_param); | |||
| float *col_major_input, float *output_data, int task_id, const ConvParameter *conv_param); | |||
| // fp32 convolution winograd | |||
| void ConvWinogardFp32(const float *input_data, const float *trans_weight, const float *bias_data, float *output_data, | |||
| TmpBufferAddress *buffer_list, int task_id, ConvParameter *conv_param, InputTransFunc in_func, | |||
| OutputTransFunc out_func); | |||
| TmpBufferAddress *buffer_list, int task_id, const ConvParameter *conv_param, | |||
| InputTransFunc in_func, OutputTransFunc out_func); | |||
| #ifdef __cplusplus | |||
| } | |||
| #endif | |||
| @@ -18,7 +18,7 @@ | |||
| #include "nnacl/op_base.h" | |||
| #include "nnacl/crop_parameter.h" | |||
| void Pad4DOffset(CropParameter *crop_param, int64_t *offset) { | |||
| void Pad4DOffset(const CropParameter *crop_param, int64_t *offset) { | |||
| int axis = crop_param->axis_; | |||
| for (int i = DIMENSION_4D - 1; i >= 0; --i) { | |||
| int offset_index = i - axis; | |||
| @@ -30,8 +30,8 @@ void Pad4DOffset(CropParameter *crop_param, int64_t *offset) { | |||
| } | |||
| } | |||
| void Crop4D(const float *input, float *output, const int *in_shape, const int *out_shape, CropParameter *crop_param, | |||
| int thread_id) { | |||
| void Crop4D(const float *input, float *output, const int *in_shape, const int *out_shape, | |||
| const CropParameter *crop_param, int thread_id) { | |||
| int64_t offset_pad[DIMENSION_4D]; | |||
| Pad4DOffset(crop_param, offset_pad); | |||
| int out_shape1 = out_shape[1]; | |||
| @@ -66,7 +66,7 @@ void Crop4D(const float *input, float *output, const int *in_shape, const int *o | |||
| } | |||
| void Crop4DNoParallel(const float *input, float *output, const int *in_shape, const int *out_shape, | |||
| CropParameter *crop_param) { | |||
| const CropParameter *crop_param) { | |||
| int64_t offset_pad[DIMENSION_4D]; | |||
| Pad4DOffset(crop_param, offset_pad); | |||
| size_t in_dim2_stride = in_shape[3]; | |||
| @@ -23,10 +23,10 @@ | |||
| #ifdef __cplusplus | |||
| extern "C" { | |||
| #endif | |||
| void Crop4D(const float *input, float *output, const int *in_shape, const int *out_shape, CropParameter *crop_param, | |||
| int thread_id); | |||
| void Crop4D(const float *input, float *output, const int *in_shape, const int *out_shape, | |||
| const CropParameter *crop_param, int thread_id); | |||
| void Crop4DNoParallel(const float *input, float *output, const int *in_shape, const int *out_shape, | |||
| CropParameter *crop_param); | |||
| const CropParameter *crop_param); | |||
| #ifdef __cplusplus | |||
| } | |||
| #endif | |||
| @@ -34,7 +34,7 @@ void PackDeConvWeightFp32(const float *weight, float *dst, int input_channel, in | |||
| } | |||
| void DeConvPostFp32C8(const float *src, float *tmp, const float *bias, float *dst, int output_channel, | |||
| ConvParameter *conv_param) { | |||
| const ConvParameter *conv_param) { | |||
| /* arm64 row12x8-major(ih*iw x oc*kh*kw) -> row8-major(oh*ow x oc) */ | |||
| /* arm32 row4x8-major(ih*iw x oc*kh*kw) -> row8-major(oh*ow x oc) */ | |||
| size_t input_plane = conv_param->input_w_ * conv_param->input_h_; | |||
| @@ -30,7 +30,7 @@ extern "C" { | |||
| #endif | |||
| void PackDeConvWeightFp32(const float *weight, float *dst, int input_channel, int output_channel, int plane); | |||
| void DeConvPostFp32C8(const float *src, float *tmp_out, const float *bias, float *dst, int output_channel, | |||
| ConvParameter *conv_param); | |||
| const ConvParameter *conv_param); | |||
| #ifdef __cplusplus | |||
| } | |||
| #endif | |||
| @@ -16,8 +16,8 @@ | |||
| #include "nnacl/fp32/deconv_winograd_fp32.h" | |||
| int PackDeConvWgDataFp32(float *nhwc_weight, DeConvComputeUnit *unit, ConvParameter *conv_param, | |||
| DeConvParam *deconv_param) { | |||
| int PackDeConvWgDataFp32(const float *nhwc_weight, DeConvComputeUnit *unit, const ConvParameter *conv_param, | |||
| const DeConvParam *deconv_param) { | |||
| int tmp_kernel_plane = unit->w_size_ * unit->h_size_; | |||
| int size = conv_param->input_channel_ * conv_param->output_channel_ * tmp_kernel_plane; | |||
| float *current_unit_weight = (float *)malloc(size * sizeof(float)); | |||
| @@ -25,13 +25,14 @@ int PackDeConvWgDataFp32(float *nhwc_weight, DeConvComputeUnit *unit, ConvParame | |||
| return NNACL_NULL_PTR; | |||
| } | |||
| for (int ic = 0; ic < conv_param->input_channel_; ic++) { | |||
| float *src_ic = nhwc_weight + deconv_param->kernel_plane_ * conv_param->output_channel_ * ic; | |||
| const float *src_ic = nhwc_weight + deconv_param->kernel_plane_ * conv_param->output_channel_ * ic; | |||
| float *dst_ic = current_unit_weight + tmp_kernel_plane * conv_param->output_channel_ * ic; | |||
| for (int uhi = 0; uhi < unit->h_size_; uhi++) { | |||
| for (int uwi = 0; uwi < unit->w_size_; uwi++) { | |||
| int src_h_offset = unit->h_start_ + uhi * conv_param->stride_h_; | |||
| int src_w_offset = unit->w_start_ + uwi * conv_param->stride_w_; | |||
| float *src_hw = src_ic + (src_h_offset * conv_param->kernel_w_ + src_w_offset) * conv_param->output_channel_; | |||
| const float *src_hw = | |||
| src_ic + (src_h_offset * conv_param->kernel_w_ + src_w_offset) * conv_param->output_channel_; | |||
| float *dst_hw = dst_ic + (uhi * unit->w_size_ + uwi) * conv_param->output_channel_; | |||
| memcpy(dst_hw, src_hw, conv_param->output_channel_ * sizeof(float)); | |||
| } | |||
| @@ -132,10 +133,10 @@ int PackDeConvWgDataFp32(float *nhwc_weight, DeConvComputeUnit *unit, ConvParame | |||
| return NNACL_OK; | |||
| } | |||
| void DeConvWgInputPack(float *src_ptr, float *dst_ptr, int channel, int stride) { | |||
| void DeConvWgInputPack(const float *src_ptr, float *dst_ptr, int channel, int stride) { | |||
| int ic4div = channel / C4NUM; | |||
| int ic4mod = channel % C4NUM; | |||
| float *src = src_ptr; | |||
| const float *src = src_ptr; | |||
| float *dst = dst_ptr; | |||
| for (int ic = 0; ic < ic4div; ic++) { | |||
| @@ -340,9 +341,10 @@ void DeConvWgMerge(const float *src, float *dst, size_t src_stride, size_t dst_s | |||
| return; | |||
| } | |||
| void DeConvWgCalWgFp32(const float *tile_in, float *tile_out, float *weight_buf, float *tmp_buf, const float *at_buf, | |||
| float *a_mid_buf, float *trans_a_buf, bool *transfered, const float *bt_buf, float *b_tmp_buf, | |||
| int unit_size, int w_start, int h_start, ConvParameter *conv_param, DeConvParam *deconv_param) { | |||
| void DeConvWgCalWgFp32(const float *tile_in, float *tile_out, const float *weight_buf, float *tmp_buf, | |||
| const float *at_buf, float *a_mid_buf, float *trans_a_buf, bool *transfered, const float *bt_buf, | |||
| float *b_tmp_buf, int unit_size, int w_start, int h_start, const ConvParameter *conv_param, | |||
| const DeConvParam *deconv_param) { | |||
| int winograd_plane = unit_size * unit_size; | |||
| if (!transfered[unit_size]) { | |||
| WinogradTransLeft(tile_in, at_buf, a_mid_buf, DECONV_WINOGRAD_DEFAULT_UNIT, unit_size, DECONV_WINOGRAD_DEFAULT_UNIT, | |||
| @@ -355,7 +357,7 @@ void DeConvWgCalWgFp32(const float *tile_in, float *tile_out, float *weight_buf, | |||
| for (int index = 0; index < winograd_plane; index++) { | |||
| float *src = trans_a_buf + index * DECONV_WINOGRAD_DEFAULT_TILE * deconv_param->ic_up4_; | |||
| float *dst = tmp_buf + index * deconv_param->oc_up4_ * DECONV_WINOGRAD_DEFAULT_TILE; | |||
| float *weight = weight_buf + index * deconv_param->ic_up4_ * deconv_param->oc_up4_; | |||
| const float *weight = weight_buf + index * deconv_param->ic_up4_ * deconv_param->oc_up4_; | |||
| TiledC4MatmulFp32(dst, src, weight, DECONV_WINOGRAD_DEFAULT_TILE * C4NUM, deconv_param->ic_div4_, | |||
| deconv_param->oc_div4_); | |||
| } | |||
| @@ -380,15 +382,16 @@ void DeConvWgCalWgFp32(const float *tile_in, float *tile_out, float *weight_buf, | |||
| return; | |||
| } | |||
| void DeConvWgCalCommFp32(float *tile_in, float *tile_out, const float *weight, float *tmp_buf, int h_start, int w_start, | |||
| int h_size, int w_size, ConvParameter *conv_param, DeConvParam *deconv_param) { | |||
| void DeConvWgCalCommFp32(const float *tile_in, float *tile_out, const float *weight, float *tmp_buf, int h_start, | |||
| int w_start, int h_size, int w_size, const ConvParameter *conv_param, | |||
| const DeConvParam *deconv_param) { | |||
| int count = deconv_param->oc_div4_ * w_size * h_size; | |||
| int in_stride = DECONV_WINOGRAD_DEFAULT_TILE * deconv_param->ic_up4_; | |||
| int out_stride = DECONV_WINOGRAD_DEFAULT_TILE * deconv_param->oc_up4_; | |||
| for (int hi = 0; hi < DECONV_WINOGRAD_DEFAULT_UNIT; hi++) { | |||
| for (int wi = 0; wi < DECONV_WINOGRAD_DEFAULT_UNIT; wi++) { | |||
| float *src_in = tile_in + (wi + hi * DECONV_WINOGRAD_DEFAULT_UNIT) * in_stride; | |||
| const float *src_in = tile_in + (wi + hi * DECONV_WINOGRAD_DEFAULT_UNIT) * in_stride; | |||
| TiledC4MatmulFp32(tmp_buf, src_in, weight, DECONV_WINOGRAD_DEFAULT_TILE * 4, deconv_param->ic_div4_, count); | |||
| for (int uhi = 0; uhi < h_size; uhi++) { | |||
| @@ -406,8 +409,8 @@ void DeConvWgCalCommFp32(float *tile_in, float *tile_out, const float *weight, f | |||
| return; | |||
| } | |||
| void DeconvWg(float *nhwc_input_, float *tile_in, float *tile_out, int start_index, int calculate_count, | |||
| ConvParameter *conv_param, DeConvParam *deconv_param, int task_id) { | |||
| void DeconvWg(const float *nhwc_input_, float *tile_in, float *tile_out, int start_index, int calculate_count, | |||
| const ConvParameter *conv_param, DeConvParam *deconv_param, int task_id) { | |||
| /* pack tile input */ | |||
| int tile_in_unit_stride = deconv_param->ic_up4_ * DECONV_WINOGRAD_DEFAULT_TILE; | |||
| #ifdef ENABLE_ARM | |||
| @@ -439,7 +442,7 @@ void DeconvWg(float *nhwc_input_, float *tile_in, float *tile_out, int start_ind | |||
| continue; | |||
| } | |||
| float *src = nhwc_input_ + (w_index + h_index * conv_param->input_w_) * conv_param->input_channel_; | |||
| const float *src = nhwc_input_ + (w_index + h_index * conv_param->input_w_) * conv_param->input_channel_; | |||
| DeConvWgInputPack(src, dst, conv_param->input_channel_, DECONV_WINOGRAD_DEFAULT_TILE * C4NUM); | |||
| } | |||
| } | |||
| @@ -474,8 +477,8 @@ void DeconvWg(float *nhwc_input_, float *tile_in, float *tile_out, int start_ind | |||
| return; | |||
| } | |||
| void DeconvWgPost(float *tile_out, float *nc4hw4_output, ConvParameter *conv_param, DeConvParam *deconv_param, | |||
| int calculate_count, int tile_index) { | |||
| void DeconvWgPost(const float *tile_out, float *nc4hw4_output, const ConvParameter *conv_param, | |||
| const DeConvParam *deconv_param, int calculate_count, int tile_index) { | |||
| /* merge */ | |||
| int src_unit_stride = deconv_param->oc_up4_ * DECONV_WINOGRAD_DEFAULT_TILE; | |||
| @@ -483,7 +486,7 @@ void DeconvWgPost(float *tile_out, float *nc4hw4_output, ConvParameter *conv_par | |||
| int dst_stride = conv_param->output_w_ * conv_param->output_h_ * C4NUM; | |||
| for (int index = 0; index < calculate_count; ++index) { | |||
| float *src_start = tile_out + index * C4NUM; | |||
| const float *src_start = tile_out + index * C4NUM; | |||
| int plane_index = tile_index * DECONV_WINOGRAD_DEFAULT_TILE + index; | |||
| int w_unit_index = plane_index % deconv_param->in_tile_w_count_; | |||
| @@ -499,7 +502,7 @@ void DeconvWgPost(float *tile_out, float *nc4hw4_output, ConvParameter *conv_par | |||
| for (int hi = merge_h_start; hi < merge_h_end; hi++) { | |||
| for (int wi = merge_w_start; wi < merge_w_end; wi++) { | |||
| float *src = src_start + (hi * deconv_param->out_tile_w_ + wi) * src_unit_stride; | |||
| const float *src = src_start + (hi * deconv_param->out_tile_w_ + wi) * src_unit_stride; | |||
| float *dst = dst_start + (hi * conv_param->output_w_ + wi) * C4NUM; | |||
| DeConvWgMerge(src, dst, src_stride, dst_stride, deconv_param->oc_div4_); | |||
| } | |||
| @@ -28,12 +28,12 @@ | |||
| extern "C" { | |||
| #endif | |||
| int PackDeConvWgDataFp32(float *nhwc_weight, DeConvComputeUnit *unit, ConvParameter *conv_param, | |||
| DeConvParam *deconv_param); | |||
| void DeconvWg(float *nhwc_input_, float *tile_in, float *tile_out, int start_index, int calculate_count, | |||
| ConvParameter *conv_param, DeConvParam *deconv_param, int task_id); | |||
| void DeconvWgPost(float *tile_out, float *nc4hw4_output, ConvParameter *conv_param, DeConvParam *deconv_param, | |||
| int calculate_count, int tile_index); | |||
| int PackDeConvWgDataFp32(const float *nhwc_weight, DeConvComputeUnit *unit, const ConvParameter *conv_param, | |||
| const DeConvParam *deconv_param); | |||
| void DeconvWg(const float *nhwc_input_, float *tile_in, float *tile_out, int start_index, int calculate_count, | |||
| const ConvParameter *conv_param, DeConvParam *deconv_param, int task_id); | |||
| void DeconvWgPost(const float *tile_out, float *nc4hw4_output, const ConvParameter *conv_param, | |||
| const DeConvParam *deconv_param, int calculate_count, int tile_index); | |||
| void TiledC4MatmulFp32(float *dst, const float *src, const float *weight, size_t ic4, size_t cal_num, size_t oc4); | |||
| #ifdef __cplusplus | |||
| @@ -36,8 +36,8 @@ float IntersectionOverUnion(const BboxCorner *a, const BboxCorner *b) { | |||
| return inter / (area_a + area_b - inter); | |||
| } | |||
| int DecodeBoxes(const int num_boxes, const float *input_boxes, const float *anchors, | |||
| DetectionPostProcessParameter *param) { | |||
| int DecodeBoxes(int num_boxes, const float *input_boxes, const float *anchors, | |||
| const DetectionPostProcessParameter *param) { | |||
| if (input_boxes == NULL || anchors == NULL || param == NULL) { | |||
| return NNACL_NULL_PTR; | |||
| } | |||
| @@ -37,8 +37,8 @@ typedef struct { | |||
| #ifdef __cplusplus | |||
| extern "C" { | |||
| #endif | |||
| int DecodeBoxes(const int num_boxes, const float *input_boxes, const float *anchors, | |||
| DetectionPostProcessParameter *param); | |||
| int DecodeBoxes(int num_boxes, const float *input_boxes, const float *anchors, | |||
| const DetectionPostProcessParameter *param); | |||
| int NmsMultiClassesFastCore(const int num_boxes, const int num_classes_with_bg, const float *input_scores, | |||
| void (*)(const float *, int *, int, int), const DetectionPostProcessParameter *param, | |||
| @@ -18,11 +18,11 @@ | |||
| #include <math.h> | |||
| #include "nnacl/errorcode.h" | |||
| void Calculate_Data(const float *input_data, float *output_data, int num, EluParameter *parameter) { | |||
| void Calculate_Data(const float *input_data, float *output_data, int num, const EluParameter *parameter) { | |||
| output_data[num] = input_data[num] < 0 ? parameter->alpha_ * expm1(input_data[num]) : input_data[num]; | |||
| } | |||
| int Elu(const float *input_data, float *output_data, EluParameter *parameter, int task_id) { | |||
| int Elu(const float *input_data, float *output_data, const EluParameter *parameter, int task_id) { | |||
| for (size_t i = task_id; i < parameter->in_size_; i += parameter->op_parameter_.thread_num_) { | |||
| Calculate_Data(input_data, output_data, i, parameter); | |||
| } | |||
| @@ -28,7 +28,7 @@ typedef struct EluParameter { | |||
| #ifdef __cplusplus | |||
| extern "C" { | |||
| #endif | |||
| int Elu(const float *input_data, float *output_data, EluParameter *parameter, int task_id); | |||
| int Elu(const float *input_data, float *output_data, const EluParameter *parameter, int task_id); | |||
| #ifdef __cplusplus | |||
| } | |||
| #endif | |||
| @@ -31,7 +31,8 @@ void l2_regulate(float *data, int size, float max_norm) { | |||
| return; | |||
| } | |||
| int CopyData(float *input_data, int *ids, float *output_data, int num, EmbeddingLookupParameter *parameter) { | |||
| int CopyData(float *input_data, const int *ids, float *output_data, int num, | |||
| const EmbeddingLookupParameter *parameter) { | |||
| if (ids[num] >= parameter->layer_num_ || ids[num] < 0) { | |||
| return NNACL_ERRCODE_INDEX_OUT_OF_RANGE; | |||
| } | |||
| @@ -46,7 +47,8 @@ int CopyData(float *input_data, int *ids, float *output_data, int num, Embedding | |||
| return NNACL_OK; | |||
| } | |||
| int EmbeddingLookup(float *input_data, int *ids, float *output_data, EmbeddingLookupParameter *parameter, int task_id) { | |||
| int EmbeddingLookup(float *input_data, const int *ids, float *output_data, const EmbeddingLookupParameter *parameter, | |||
| int task_id) { | |||
| for (size_t i = task_id; i < parameter->ids_size_; i += parameter->op_parameter_.thread_num_) { | |||
| int ret = CopyData(input_data, ids, output_data, i, parameter); | |||
| if (ret != NNACL_OK) { | |||
| @@ -31,7 +31,8 @@ typedef struct EmbeddingLookupParameter { | |||
| #ifdef __cplusplus | |||
| extern "C" { | |||
| #endif | |||
| int EmbeddingLookup(float *input_data, int *ids, float *output_data, EmbeddingLookupParameter *parameter, int task_id); | |||
| int EmbeddingLookup(float *input_data, const int *ids, float *output_data, const EmbeddingLookupParameter *parameter, | |||
| int task_id); | |||
| #ifdef __cplusplus | |||
| } | |||
| #endif | |||
| @@ -19,7 +19,7 @@ | |||
| #include <string.h> | |||
| #include "nnacl/errorcode.h" | |||
| int Exp(const float *input_data, float *output_data, ExpParameter *parameter, int task_id) { | |||
| int Exp(const float *input_data, float *output_data, const ExpParameter *parameter, int task_id) { | |||
| if (parameter->scale_ == 1) { | |||
| for (size_t i = task_id; i < parameter->element_num_; i += parameter->thread_num_) { | |||
| output_data[i] = expf(input_data[i]); | |||
| @@ -33,7 +33,7 @@ typedef struct ExpParameter { | |||
| #ifdef __cplusplus | |||
| extern "C" { | |||
| #endif | |||
| int Exp(const float *input_data, float *output_data, ExpParameter *parameter, int task_id); | |||
| int Exp(const float *input_data, float *output_data, const ExpParameter *parameter, int task_id); | |||
| void ExpFp32(const float *src, float *dst, int num); | |||
| #ifdef __cplusplus | |||
| } | |||
| @@ -26,10 +26,10 @@ inline int Stride(const int *shape, int rank, int index) { | |||
| return stride; | |||
| } | |||
| int Gather(float *input, int outer_size, int inner_size, int limit, const int *indices, int indices_element_size, | |||
| int Gather(const float *input, int outer_size, int inner_size, int limit, const int *indices, int indices_element_size, | |||
| float *output) { | |||
| for (int m = 0; m < outer_size; ++m) { | |||
| float *inputm = input + inner_size * m * limit; | |||
| const float *inputm = input + inner_size * m * limit; | |||
| float *outputm = output + inner_size * m * indices_element_size; | |||
| for (int i = 0; i < indices_element_size; ++i) { | |||
| if (indices[i] < 0 || indices[i] > limit) { | |||
| @@ -22,7 +22,7 @@ | |||
| #ifdef __cplusplus | |||
| extern "C" { | |||
| #endif | |||
| int Gather(float *input, int outer_size, int inner_size, int limit, const int *indices, int indices_element_size, | |||
| int Gather(const float *input, int outer_size, int inner_size, int limit, const int *indices, int indices_element_size, | |||
| float *output); | |||
| int GatherInt32(const int32_t *input, int outer_size, int inner_size, int limit, const int *indices, | |||
| int indices_element_size, int32_t *output); | |||
| @@ -18,9 +18,8 @@ | |||
| #include "nnacl/errorcode.h" | |||
| #include "nnacl/op_base.h" | |||
| int InstanceNorm(const int outer_size, const int inner_size, const float *src_data, const float *scale_data, | |||
| const float *bias_data, InstanceNormParameter *param, float *dst_data, const int task_id, | |||
| const int thread_num) { | |||
| int InstanceNorm(int outer_size, int inner_size, const float *src_data, const float *scale_data, const float *bias_data, | |||
| const InstanceNormParameter *param, float *dst_data, int task_id, int thread_num) { | |||
| if (src_data == NULL || dst_data == NULL || scale_data == NULL || bias_data == NULL) { | |||
| return NNACL_NULL_PTR; | |||
| } | |||
| @@ -23,9 +23,8 @@ | |||
| extern "C" { | |||
| #endif | |||
| int InstanceNorm(const int outer_size, const int inner_size, const float *src_data, const float *scale_data, | |||
| const float *bias_data, InstanceNormParameter *param, float *dst_data, const int task_id, | |||
| const int thread_num); | |||
| int InstanceNorm(int outer_size, int inner_size, const float *src_data, const float *scale_data, const float *bias_data, | |||
| const InstanceNormParameter *param, float *dst_data, int task_id, int thread_num); | |||
| #ifdef __cplusplus | |||
| } | |||
| #endif | |||
| @@ -18,9 +18,8 @@ | |||
| #include "nnacl/errorcode.h" | |||
| #include "nnacl/op_base.h" | |||
| int LayerNorm(const int outer_size, const int inner_size, const float *src_data, const float *gamma_data, | |||
| const float *beta_data, const bool affine, const float epsilon, float *dst_data, const int tid, | |||
| const int thread_num) { | |||
| int LayerNorm(int outer_size, int inner_size, const float *src_data, const float *gamma_data, const float *beta_data, | |||
| bool affine, float epsilon, float *dst_data, int tid, int thread_num) { | |||
| if (src_data == NULL || dst_data == NULL) { | |||
| return NNACL_NULL_PTR; | |||
| } | |||
| @@ -23,9 +23,8 @@ | |||
| extern "C" { | |||
| #endif | |||
| int LayerNorm(const int outer_size, const int inner_size, const float *src_data, const float *gamma_data, | |||
| const float *beta_data, const bool affine, const float epsilon, float *dst_data, const int tid, | |||
| const int thread_num); | |||
| int LayerNorm(int outer_size, int inner_size, const float *src_data, const float *gamma_data, const float *beta_data, | |||
| bool affine, float epsilon, float *dst_data, int tid, int thread_num); | |||
| #ifdef __cplusplus | |||
| } | |||
| #endif | |||
| @@ -17,15 +17,15 @@ | |||
| #include "nnacl/fp32/local_response_norm_fp32.h" | |||
| #include <math.h> | |||
| int LocalResponseNorm(float *input_ptr, int out_size, int channel, float *output_ptr, | |||
| LocalResponseNormParameter *param) { | |||
| int LocalResponseNorm(const float *input_ptr, int out_size, int channel, float *output_ptr, | |||
| const LocalResponseNormParameter *param) { | |||
| int depth_radius = param->depth_radius_; | |||
| float bias = param->bias_; | |||
| float alpha = param->alpha_; | |||
| float beta = param->beta_; | |||
| for (int i = 0; i < out_size; i++) { | |||
| float *in_data = input_ptr + i * channel; | |||
| const float *in_data = input_ptr + i * channel; | |||
| float *out_data = output_ptr + i * channel; | |||
| for (int j = 0; j < channel; j++) { | |||
| @@ -30,8 +30,8 @@ typedef struct LocalResponseNormParameter { | |||
| #ifdef __cplusplus | |||
| extern "C" { | |||
| #endif | |||
| int LocalResponseNorm(float *input_ptr, int out_size, int channel, float *output_ptr, | |||
| LocalResponseNormParameter *param); | |||
| int LocalResponseNorm(const float *input_ptr, int out_size, int channel, float *output_ptr, | |||
| const LocalResponseNormParameter *param); | |||
| #ifdef __cplusplus | |||
| } | |||
| #endif | |||
| @@ -19,7 +19,7 @@ | |||
| #include "nnacl/fp32/activation_fp32.h" | |||
| #include "nnacl/fp32/arithmetic_fp32.h" | |||
| void InitGate(float *gate_buffer, const float *bias, LstmParameter *lstm_parm) { | |||
| void InitGate(float *gate_buffer, const float *bias, const LstmParameter *lstm_parm) { | |||
| int gate_offest = 0; | |||
| for (int l = 0; l < 4; l++) { | |||
| int batch_offest = gate_offest; | |||
| @@ -94,7 +94,7 @@ void LstmStepUnit(float *output, const float *input, const float *input_input_we | |||
| const float *input_cell_weight, const float *input_output_weight, const float *state_input_weight, | |||
| const float *state_forget_weight, const float *state_cell_weight, const float *state_output_weight, | |||
| const float *bias, float *hidden_state, float *cell_state, float *gate_buffer, | |||
| LstmParameter *lstm_parm) { | |||
| const LstmParameter *lstm_parm) { | |||
| InitGate(gate_buffer, bias, lstm_parm); | |||
| float *input_gate = gate_buffer; | |||
| @@ -139,7 +139,7 @@ void LstmStepUnit(float *output, const float *input, const float *input_input_we | |||
| } | |||
| void Lstm(float *output, const float *input, const float *weight_i, const float *weight_h, const float *bias, | |||
| float *hidden_state, float *cell_state, float *gate_buffer, LstmParameter *lstm_parm) { | |||
| float *hidden_state, float *cell_state, float *gate_buffer, const LstmParameter *lstm_parm) { | |||
| // forward | |||
| const float *input_input_weight = weight_i; | |||
| const float *input_forget_weight = weight_i + lstm_parm->input_size_ * lstm_parm->hidden_size_ * 2; | |||
| @@ -34,7 +34,7 @@ typedef struct LstmParameter { | |||
| extern "C" { | |||
| #endif | |||
| void Lstm(float *output, const float *input, const float *weight_i, const float *weight_h, const float *bias, | |||
| float *hidden_state, float *cell_state, float *gate_buffer, LstmParameter *lstm_parm); | |||
| float *hidden_state, float *cell_state, float *gate_buffer, const LstmParameter *lstm_parm); | |||
| #ifdef __cplusplus | |||
| } | |||
| #endif | |||
| @@ -18,7 +18,7 @@ | |||
| #include "nnacl/common_func.h" | |||
| void Pad(const float *input_data, float *output_data, const int *input_shape, const int *output_shape, | |||
| const int *paddings, const int tid, const int thread_num) { | |||
| const int *paddings, int tid, int thread_num) { | |||
| int in[4], out[4]; | |||
| for (in[0] = 0; in[0] < input_shape[0]; in[0]++) { | |||
| out[0] = in[0] + paddings[0]; | |||
| @@ -28,7 +28,7 @@ | |||
| extern "C" { | |||
| #endif | |||
| void Pad(const float *input_data, float *output_data, const int *input_shape, const int *output_shape, | |||
| const int *paddings, const int tid, const int thread_num); | |||
| const int *paddings, int tid, int thread_num); | |||
| void MirrorPad(const float *input_data, float *output_data, const int *input_shape, const PadParameter *pad_param, | |||
| int begin, int end); | |||
| @@ -18,8 +18,8 @@ | |||
| #include <float.h> | |||
| #include "nnacl/errorcode.h" | |||
| int AvgPooling(const float *input_ptr, float *output_ptr, PoolingParameter *pooling_param, int task_id, float minf, | |||
| float maxf) { | |||
| int AvgPooling(const float *input_ptr, float *output_ptr, const PoolingParameter *pooling_param, int task_id, | |||
| float minf, float maxf) { | |||
| int win_w = pooling_param->window_w_; | |||
| int win_h = pooling_param->window_h_; | |||
| int channel = pooling_param->input_channel_; | |||
| @@ -144,8 +144,8 @@ int AvgPooling(const float *input_ptr, float *output_ptr, PoolingParameter *pool | |||
| return NNACL_OK; | |||
| } | |||
| void MaxPooling(const float *input_ptr, float *output_ptr, PoolingParameter *pooling_param, int task_id, float minf, | |||
| float maxf) { | |||
| void MaxPooling(const float *input_ptr, float *output_ptr, const PoolingParameter *pooling_param, int task_id, | |||
| float minf, float maxf) { | |||
| int win_w = pooling_param->window_w_; | |||
| int win_h = pooling_param->window_h_; | |||
| int channel = pooling_param->input_channel_; | |||
| @@ -27,10 +27,10 @@ | |||
| #ifdef __cplusplus | |||
| extern "C" { | |||
| #endif | |||
| int AvgPooling(const float *input_ptr, float *output_ptr, PoolingParameter *pooling_param, int task_id, float minf, | |||
| float maxf); | |||
| void MaxPooling(const float *input_ptr, float *output_ptr, PoolingParameter *pooling_param, int task_id, float minf, | |||
| float maxf); | |||
| int AvgPooling(const float *input_ptr, float *output_ptr, const PoolingParameter *pooling_param, int task_id, | |||
| float minf, float maxf); | |||
| void MaxPooling(const float *input_ptr, float *output_ptr, const PoolingParameter *pooling_param, int task_id, | |||
| float minf, float maxf); | |||
| #ifdef __cplusplus | |||
| } | |||
| #endif | |||
| @@ -18,7 +18,7 @@ | |||
| #include <arm_neon.h> | |||
| #endif | |||
| void PRelu(float *input, float *output, PReluParameter *prelu_param_, int task_id) { | |||
| void PRelu(float *input, float *output, const PReluParameter *prelu_param_, int task_id) { | |||
| float *negetive_slope_value = prelu_param_->slope_; | |||
| int c4 = prelu_param_->channel_num_ / C4NUM; | |||
| int channel_num = prelu_param_->channel_num_; | |||
| @@ -81,7 +81,7 @@ void PRelu(float *input, float *output, PReluParameter *prelu_param_, int task_i | |||
| int c4_offset = tile_offset + k * C4NUM; | |||
| int slope_offset = k * C4NUM; | |||
| for (int l = 0; l < C4NUM; ++l) { | |||
| float in_data = input_ptr[c4_offset + l]; | |||
| const float in_data = input_ptr[c4_offset + l]; | |||
| output_ptr[c4_offset + l] = | |||
| (in_data < 0 ? in_data : 0) * negetive_slope_value[slope_offset + l] + (in_data > 0 ? in_data : 0); | |||
| } | |||
| @@ -93,7 +93,7 @@ void PRelu(float *input, float *output, PReluParameter *prelu_param_, int task_i | |||
| int offset = m * channel_num; | |||
| for (int k = c_s; k < channel_num; ++k) { | |||
| int c4_offset = offset + k; | |||
| float in_data = input_ptr[c4_offset]; | |||
| const float in_data = input_ptr[c4_offset]; | |||
| if (in_data >= 0) { | |||
| output_ptr[c4_offset] = in_data; | |||
| } else { | |||
| @@ -104,7 +104,7 @@ void PRelu(float *input, float *output, PReluParameter *prelu_param_, int task_i | |||
| } | |||
| } | |||
| void PReluShareChannel(float *input, float *output, PReluParameter *prelu_param_, int task_id) { | |||
| void PReluShareChannel(float *input, float *output, const PReluParameter *prelu_param_, int task_id) { | |||
| for (int j = task_id; j < prelu_param_->tile_block_; j += prelu_param_->op_parameter_.thread_num_) { | |||
| int cal_index; | |||
| #ifdef ENABLE_NEON | |||
| @@ -22,9 +22,9 @@ | |||
| #ifdef __cplusplus | |||
| extern "C" { | |||
| #endif | |||
| void PRelu(float *input, float *output, PReluParameter *prelu_param_, int task_id); | |||
| void PRelu(float *input, float *output, const PReluParameter *prelu_param_, int task_id); | |||
| void PReluShareChannel(float *input, float *output, PReluParameter *prelu_param_, int task_id); | |||
| void PReluShareChannel(float *input, float *output, const PReluParameter *prelu_param_, int task_id); | |||
| #ifdef __cplusplus | |||
| } | |||
| #endif | |||
| @@ -23,8 +23,8 @@ | |||
| #include "nnacl/reduce_parameter.h" | |||
| #endif | |||
| int ReduceMean(const int outer_size, const int inner_size, const int axis_size, const float *src_data, float *dst_data, | |||
| const int tid, const int thread_num) { | |||
| int ReduceMean(int outer_size, int inner_size, int axis_size, const float *src_data, float *dst_data, int tid, | |||
| int thread_num) { | |||
| if (src_data == NULL || dst_data == NULL) { | |||
| return NNACL_NULL_PTR; | |||
| } | |||
| @@ -44,8 +44,8 @@ int ReduceMean(const int outer_size, const int inner_size, const int axis_size, | |||
| } | |||
| return NNACL_OK; | |||
| } | |||
| int ReduceSum(const int outer_size, const int inner_size, const int axis_size, const float *src_data, float *dst_data, | |||
| const int tid, const int thread_num) { | |||
| int ReduceSum(int outer_size, int inner_size, int axis_size, const float *src_data, float *dst_data, int tid, | |||
| int thread_num) { | |||
| if (src_data == NULL || dst_data == NULL) { | |||
| return NNACL_NULL_PTR; | |||
| } | |||
| @@ -81,8 +81,8 @@ int ReduceSum(const int outer_size, const int inner_size, const int axis_size, c | |||
| } | |||
| return NNACL_OK; | |||
| } | |||
| int ReduceMax(const int outer_size, const int inner_size, const int axis_size, const float *src_data, float *dst_data, | |||
| const int tid, const int thread_num) { | |||
| int ReduceMax(int outer_size, int inner_size, int axis_size, const float *src_data, float *dst_data, int tid, | |||
| int thread_num) { | |||
| if (src_data == NULL || dst_data == NULL) { | |||
| return NNACL_NULL_PTR; | |||
| } | |||
| @@ -102,8 +102,8 @@ int ReduceMax(const int outer_size, const int inner_size, const int axis_size, c | |||
| } | |||
| return NNACL_OK; | |||
| } | |||
| int ReduceMin(const int outer_size, const int inner_size, const int axis_size, const float *src_data, float *dst_data, | |||
| const int tid, const int thread_num) { | |||
| int ReduceMin(int outer_size, int inner_size, int axis_size, const float *src_data, float *dst_data, int tid, | |||
| int thread_num) { | |||
| if (src_data == NULL || dst_data == NULL) { | |||
| return NNACL_NULL_PTR; | |||
| } | |||
| @@ -123,8 +123,8 @@ int ReduceMin(const int outer_size, const int inner_size, const int axis_size, c | |||
| } | |||
| return NNACL_OK; | |||
| } | |||
| int IntReduceMin(const int outer_size, const int inner_size, const int axis_size, const int *src_data, int *dst_data, | |||
| const int tid, const int thread_num) { | |||
| int IntReduceMin(int outer_size, int inner_size, int axis_size, const int *src_data, int *dst_data, int tid, | |||
| int thread_num) { | |||
| if (src_data == NULL || dst_data == NULL) { | |||
| return NNACL_NULL_PTR; | |||
| } | |||
| @@ -144,8 +144,8 @@ int IntReduceMin(const int outer_size, const int inner_size, const int axis_size | |||
| } | |||
| return NNACL_OK; | |||
| } | |||
| int ReduceProd(const int outer_size, const int inner_size, const int axis_size, const float *src_data, float *dst_data, | |||
| const int tid, const int thread_num) { | |||
| int ReduceProd(int outer_size, int inner_size, int axis_size, const float *src_data, float *dst_data, int tid, | |||
| int thread_num) { | |||
| if (src_data == NULL || dst_data == NULL) { | |||
| return NNACL_NULL_PTR; | |||
| } | |||
| @@ -166,8 +166,8 @@ int ReduceProd(const int outer_size, const int inner_size, const int axis_size, | |||
| return NNACL_OK; | |||
| } | |||
| int IntReduceProd(const int outer_size, const int inner_size, const int axis_size, const int *src_data, int *dst_data, | |||
| const int tid, const int thread_num) { | |||
| int IntReduceProd(int outer_size, int inner_size, int axis_size, const int *src_data, int *dst_data, int tid, | |||
| int thread_num) { | |||
| if (src_data == NULL || dst_data == NULL) { | |||
| return NNACL_NULL_PTR; | |||
| } | |||
| @@ -190,8 +190,8 @@ int IntReduceProd(const int outer_size, const int inner_size, const int axis_siz | |||
| } | |||
| return NNACL_OK; | |||
| } | |||
| int ReduceSumSquare(const int outer_size, const int inner_size, const int axis_size, const float *src_data, | |||
| float *dst_data, const int tid, const int thread_num) { | |||
| int ReduceSumSquare(int outer_size, int inner_size, int axis_size, const float *src_data, float *dst_data, int tid, | |||
| int thread_num) { | |||
| if (src_data == NULL || dst_data == NULL) { | |||
| return NNACL_NULL_PTR; | |||
| } | |||
| @@ -22,22 +22,22 @@ | |||
| #ifdef __cplusplus | |||
| extern "C" { | |||
| #endif | |||
| int ReduceMean(const int outer_size, const int inner_size, const int axis_size, const float *src_data, float *dst_data, | |||
| const int tid, const int thread_num); | |||
| int ReduceSum(const int outer_size, const int inner_size, const int axis_size, const float *src_data, float *dst_data, | |||
| const int tid, const int thread_num); | |||
| int ReduceMax(const int outer_size, const int inner_size, const int axis_size, const float *src_data, float *dst_data, | |||
| const int tid, const int thread_num); | |||
| int ReduceMin(const int outer_size, const int inner_size, const int axis_size, const float *src_data, float *dst_data, | |||
| const int tid, const int thread_num); | |||
| int IntReduceMin(const int outer_size, const int inner_size, const int axis_size, const int *src_data, int *dst_data, | |||
| const int tid, const int thread_num); | |||
| int ReduceProd(const int outer_size, const int inner_size, const int axis_size, const float *src_data, float *dst_data, | |||
| const int tid, const int thread_num); | |||
| int IntReduceProd(const int outer_size, const int inner_size, const int axis_size, const int *src_data, int *dst_data, | |||
| const int tid, const int thread_num); | |||
| int ReduceSumSquare(const int outer_size, const int inner_size, const int axis_size, const float *src_data, | |||
| float *dst_data, const int tid, const int thread_num); | |||
| int ReduceMean(int outer_size, int inner_size, int axis_size, const float *src_data, float *dst_data, int tid, | |||
| int thread_num); | |||
| int ReduceSum(int outer_size, int inner_size, int axis_size, const float *src_data, float *dst_data, int tid, | |||
| int thread_num); | |||
| int ReduceMax(int outer_size, int inner_size, int axis_size, const float *src_data, float *dst_data, int tid, | |||
| int thread_num); | |||
| int ReduceMin(int outer_size, int inner_size, int axis_size, const float *src_data, float *dst_data, int tid, | |||
| int thread_num); | |||
| int IntReduceMin(int outer_size, int inner_size, int axis_size, const int *src_data, int *dst_data, int tid, | |||
| int thread_num); | |||
| int ReduceProd(int outer_size, int inner_size, int axis_size, const float *src_data, float *dst_data, int tid, | |||
| int thread_num); | |||
| int IntReduceProd(int outer_size, int inner_size, int axis_size, const int *src_data, int *dst_data, int tid, | |||
| int thread_num); | |||
| int ReduceSumSquare(int outer_size, int inner_size, int axis_size, const float *src_data, float *dst_data, int tid, | |||
| int thread_num); | |||
| #ifdef ENABLE_NNACL_INFER_SHAPE | |||
| int ReduceInferShape(int **in_shape, size_t *dim_size, int *out_shape, int *in_format, int *out_format, | |||
| @@ -20,7 +20,8 @@ | |||
| #include "nnacl/errorcode.h" | |||
| #include "nnacl/op_base.h" | |||
| int ROIPooling(float *in_ptr, float *out_ptr, const float *roi, float *max_c, int tid, ROIPoolingParameter *param) { | |||
| int ROIPooling(const float *in_ptr, float *out_ptr, const float *roi, float *max_c, int tid, | |||
| const ROIPoolingParameter *param) { | |||
| int num_rois = param->output_n_; | |||
| int units = UP_DIV(num_rois, param->thread_num_); | |||
| int roi_st = tid * units; | |||
| @@ -52,7 +53,7 @@ int ROIPooling(float *in_ptr, float *out_ptr, const float *roi, float *max_c, in | |||
| float bin_size_h = (float)roi_height / (float)pooled_height; | |||
| float bin_size_w = (float)roi_width / (float)pooled_width; | |||
| float *batch_data = in_ptr + param->in_strides_[kNHWC_N] * roi_batch_ind; | |||
| const float *batch_data = in_ptr + param->in_strides_[kNHWC_N] * roi_batch_ind; | |||
| for (int ph = 0; ph < pooled_height; ++ph) { | |||
| for (int pw = 0; pw < pooled_width; ++pw) { | |||
| @@ -40,7 +40,8 @@ typedef struct ROIPoolingParameter { | |||
| #ifdef __cplusplus | |||
| extern "C" { | |||
| #endif | |||
| int ROIPooling(float *in_ptr, float *out_ptr, const float *roi, float *max_c, int tid, ROIPoolingParameter *param); | |||
| int ROIPooling(const float *in_ptr, float *out_ptr, const float *roi, float *max_c, int tid, | |||
| const ROIPoolingParameter *param); | |||
| #ifdef __cplusplus | |||
| } | |||
| #endif | |||
| @@ -66,7 +66,7 @@ void ScaleAxis(const float *in_data, float *out_data, const float *scale, const | |||
| } | |||
| void DoScale(const float *in_data, float *out_data, const float *scale, const float *offset, int task_id, | |||
| ScaleParameter *scale_param) { | |||
| const ScaleParameter *scale_param) { | |||
| int outer_step = UP_DIV(scale_param->outer_size_, scale_param->op_parameter_.thread_num_); | |||
| int outer_start = task_id * outer_step; | |||
| int outer_end = MSMIN(outer_start + outer_step, scale_param->outer_size_); | |||
| @@ -137,7 +137,7 @@ void ScaleAxisRelu(const float *in_data, float *out_data, const float *scale, co | |||
| } | |||
| void DoScaleRelu(const float *in_data, float *out_data, const float *scale, const float *offset, int task_id, | |||
| ScaleParameter *scale_param) { | |||
| const ScaleParameter *scale_param) { | |||
| int outer_step = UP_DIV(scale_param->outer_size_, scale_param->op_parameter_.thread_num_); | |||
| int outer_start = task_id * outer_step; | |||
| int outer_end = MSMIN(outer_start + outer_step, scale_param->outer_size_); | |||
| @@ -210,7 +210,7 @@ void ScaleAxisRelu6(const float *in_data, float *out_data, const float *scale, c | |||
| } | |||
| void DoScaleRelu6(const float *in_data, float *out_data, const float *scale, const float *offset, int task_id, | |||
| ScaleParameter *scale_param) { | |||
| const ScaleParameter *scale_param) { | |||
| int outer_step = UP_DIV(scale_param->outer_size_, scale_param->op_parameter_.thread_num_); | |||
| int outer_start = task_id * outer_step; | |||
| int outer_end = MSMIN(outer_start + outer_step, scale_param->outer_size_); | |||
| @@ -23,11 +23,11 @@ | |||
| extern "C" { | |||
| #endif | |||
| void DoScale(const float *in_data, float *out_data, const float *scale, const float *offset, int task_id, | |||
| ScaleParameter *scale_param); | |||
| const ScaleParameter *scale_param); | |||
| void DoScaleRelu(const float *in_data, float *out_data, const float *scale, const float *offset, int task_id, | |||
| ScaleParameter *scale_param); | |||
| const ScaleParameter *scale_param); | |||
| void DoScaleRelu6(const float *in_data, float *out_data, const float *scale, const float *offset, int task_id, | |||
| ScaleParameter *scale_param); | |||
| const ScaleParameter *scale_param); | |||
| #ifdef __cplusplus | |||
| } | |||
| #endif | |||
| @@ -46,7 +46,7 @@ void PadSliceParameterTo4D(SliceParameter *param) { | |||
| param->param_length_ = DIMENSION_4D; | |||
| } | |||
| void DoSlice(const float *input, float *output, SliceParameter *param, int thread_id) { | |||
| void DoSlice(const float *input, float *output, const SliceParameter *param, int thread_id) { | |||
| int32_t out_dim1 = param->size_[1]; | |||
| int32_t out_dim2 = param->size_[2]; | |||
| int32_t out_dim3 = param->size_[3]; | |||
| @@ -78,7 +78,7 @@ void DoSlice(const float *input, float *output, SliceParameter *param, int threa | |||
| } | |||
| } | |||
| void DoSliceNoParallel(const float *input, float *output, SliceParameter *param) { | |||
| void DoSliceNoParallel(const float *input, float *output, const SliceParameter *param) { | |||
| size_t copy_size = param->size_[3] * sizeof(float); | |||
| size_t in_stride2 = param->shape_[3]; | |||
| size_t in_stride1 = param->shape_[2] * in_stride2; | |||
| @@ -23,8 +23,8 @@ | |||
| extern "C" { | |||
| #endif | |||
| void PadSliceParameterTo4D(SliceParameter *param); | |||
| void DoSlice(const float *input, float *output, SliceParameter *param, int thread_id); | |||
| void DoSliceNoParallel(const float *input, float *output, SliceParameter *param); | |||
| void DoSlice(const float *input, float *output, const SliceParameter *param, int thread_id); | |||
| void DoSliceNoParallel(const float *input, float *output, const SliceParameter *param); | |||
| #ifdef __cplusplus | |||
| } | |||
| #endif | |||
| @@ -88,10 +88,10 @@ void SoftmaxLastAxis(const float *src, float *dst, int batch, int channel) { | |||
| } | |||
| // output = exp(input) / reduce_sum(exp(input), axis) | |||
| void Softmax(const float *input_ptr, float *output_ptr, float *sum_data, SoftmaxParameter *parameter) { | |||
| void Softmax(const float *input_ptr, float *output_ptr, float *sum_data, const SoftmaxParameter *parameter) { | |||
| int axis = parameter->axis_; | |||
| int n_dim = parameter->n_dim_; | |||
| int *input_shape = parameter->input_shape_; | |||
| const int *input_shape = parameter->input_shape_; | |||
| int inner_size = 1; | |||
| int outter_size = 1; | |||
| @@ -22,7 +22,7 @@ | |||
| #ifdef __cplusplus | |||
| extern "C" { | |||
| #endif | |||
| void Softmax(const float *input_ptr, float *output_ptr, float *sum_data, SoftmaxParameter *parameter); | |||
| void Softmax(const float *input_ptr, float *output_ptr, float *sum_data, const SoftmaxParameter *parameter); | |||
| void SoftmaxLastAxis(const float *src, float *dst, int batch, int channel); | |||
| #ifdef __cplusplus | |||
| } | |||
| @@ -214,7 +214,7 @@ void PackInputSum16x4Int8(const int8_t *input, int32_t *input_sum, int32_t *filt | |||
| return; | |||
| } | |||
| void Im2ColPackUnitFp32(const float *input_data, ConvParameter *conv_param, float *packed_input, int real_cal_num, | |||
| void Im2ColPackUnitFp32(const float *input_data, const ConvParameter *conv_param, float *packed_input, int real_cal_num, | |||
| int block_index) { | |||
| // input format : nhwc | |||
| int kernel_h = conv_param->kernel_h_; | |||
| @@ -27,7 +27,7 @@ | |||
| #ifdef __cplusplus | |||
| extern "C" { | |||
| #endif | |||
| void Im2ColPackUnitFp32(const float *input_data, ConvParameter *conv_param, float *packed_input, int real_cal_num, | |||
| void Im2ColPackUnitFp32(const float *input_data, const ConvParameter *conv_param, float *packed_input, int real_cal_num, | |||
| int block_index); | |||
| void PackHWCToWHC(const float *src, float *dst, int height, int width, int channel); | |||
| @@ -18,7 +18,8 @@ | |||
| // fp32 conv winograd | |||
| void WinogradInputTransform(const float *input_data, float *trans_input, float *tmp_data, int cal_num, | |||
| int out_tile_index, int out_w_block_num, ConvParameter *conv_param, InputTransFunc func) { | |||
| int out_tile_index, int out_w_block_num, const ConvParameter *conv_param, | |||
| InputTransFunc func) { | |||
| int input_unit = conv_param->input_unit_; | |||
| int output_unit = conv_param->output_unit_; | |||
| int in_channel = conv_param->input_channel_; | |||
| @@ -96,7 +97,8 @@ void WinogradInputTransform(const float *input_data, float *trans_input, float * | |||
| } | |||
| void WinogradOutputTransform(const float *gemm_out, float *out_data, const float *bias_data, int cal_num, | |||
| int out_tile_index, int output_unit_num, ConvParameter *conv_param, OutputTransFunc func) { | |||
| int out_tile_index, int output_unit_num, const ConvParameter *conv_param, | |||
| OutputTransFunc func) { | |||
| int output_unit = conv_param->output_unit_; | |||
| int output_w = conv_param->output_w_; | |||
| int output_h = conv_param->output_h_; | |||
| @@ -33,10 +33,12 @@ extern "C" { | |||
| #endif | |||
| // for fp32 winograd input/output transform | |||
| void WinogradInputTransform(const float *input_data, float *trans_input, float *tmp_data, int cal_num, | |||
| int out_tile_index, int out_w_block_num, ConvParameter *conv_param, InputTransFunc func); | |||
| int out_tile_index, int out_w_block_num, const ConvParameter *conv_param, | |||
| InputTransFunc func); | |||
| void WinogradOutputTransform(const float *gemm_out, float *out_data, const float *bias_data, int cal_num, | |||
| int out_tile_index, int output_unit_num, ConvParameter *conv_param, OutputTransFunc func); | |||
| int out_tile_index, int output_unit_num, const ConvParameter *conv_param, | |||
| OutputTransFunc func); | |||
| // for int8 convolution 3x3 filter/input/output transform | |||
| void Conv3x3Int8InputUnit(int16_t *tmp_data, int16_t *trans_input_data, size_t step, int input_zp); | |||
| @@ -46,8 +46,8 @@ int ConcatCPUKernel::ReSize() { return ConcatBaseCPUKernel::ReSize(); } | |||
| int ConcatCPUKernel::DoConcat(int task_id) { | |||
| auto input_num = in_tensors_.size(); | |||
| std::vector<void *> inputs_addr(input_num, nullptr); | |||
| std::vector<int *> inputs_output_shape(input_num + 1, nullptr); | |||
| std::vector<const void *> inputs_addr(input_num, nullptr); | |||
| std::vector<const int *> inputs_output_shape(input_num + 1, nullptr); | |||
| std::vector<std::vector<int>> shapes; | |||
| for (size_t i = 0; i < input_num; ++i) { | |||
| @@ -59,8 +59,8 @@ int ConcatCPUKernel::DoConcat(int task_id) { | |||
| inputs_output_shape[input_num] = output_shape.data(); | |||
| auto output_addr = out_tensors_.at(0)->MutableData(); | |||
| Concat(reinterpret_cast<void **>(inputs_addr.data()), input_num, axis_, inputs_output_shape.data(), | |||
| output_shape.size(), output_addr, task_id, thread_count_); | |||
| Concat(inputs_addr.data(), input_num, axis_, inputs_output_shape.data(), output_shape.size(), output_addr, task_id, | |||
| thread_count_); | |||
| return RET_OK; | |||
| } | |||