From: @ling_qiao_min Reviewed-by: Signed-off-by:tags/v1.2.0-rc1
| @@ -16,9 +16,7 @@ | |||
| #ifndef MINDSPORE_LITE_NNACL_ADDER_H_ | |||
| #define MINDSPORE_LITE_NNACL_ADDER_H_ | |||
| #include <math.h> | |||
| #include "nnacl/op_base.h" | |||
| #include "nnacl/quantization/fixed_point.h" | |||
| typedef struct AdderParameter { | |||
| OpParameter op_parameter_; | |||
| @@ -13,9 +13,18 @@ | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "nnacl/flatten.h" | |||
| #include <string.h> | |||
| void Flatten(const void *input, void *output, const FlattenParameter *flatten_param) { | |||
| memcpy(output, input, flatten_param->size); | |||
| #include "nnacl/arithmetic.h" | |||
| void CalcMultiplesAndStrides(ArithmeticParameter *param) { | |||
| NNACL_ASSERT(param->in_shape0_[i] != 0); | |||
| NNACL_ASSERT(param->in_shape1_[i] != 0); | |||
| for (size_t i = 0; i < param->ndim_; i++) { | |||
| param->multiples0_[i] = param->out_shape_[i] / param->in_shape0_[i]; | |||
| param->multiples1_[i] = param->out_shape_[i] / param->in_shape1_[i]; | |||
| } | |||
| // cal strides | |||
| ComputeStrides(param->in_shape0_, param->in_strides0_, param->ndim_); | |||
| ComputeStrides(param->in_shape1_, param->in_strides1_, param->ndim_); | |||
| ComputeStrides(param->out_shape_, param->out_strides_, param->ndim_); | |||
| } | |||
| @@ -13,23 +13,41 @@ | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_LITE_NNACL_FLATTEN_H_ | |||
| #define MINDSPORE_LITE_NNACL_FLATTEN_H_ | |||
| #ifndef MINDSPORE_LITE_NNACL_ARTITHMETIC_H_ | |||
| #define MINDSPORE_LITE_NNACL_ARTITHMETIC_H_ | |||
| #include "nnacl/op_base.h" | |||
| #include "nnacl/common_func.h" | |||
| #include "nnacl/nnacl_utils.h" | |||
| typedef struct FlattenParameter { | |||
| // Primitive parameter | |||
| typedef struct ArithmeticParameter { | |||
| OpParameter op_parameter_; | |||
| // other parameter | |||
| int size; | |||
| } FlattenParameter; | |||
| bool broadcasting_; | |||
| size_t ndim_; | |||
| int activation_type_; | |||
| int in_shape0_[10]; | |||
| int in_elements_num0_; | |||
| int in_shape1_[10]; | |||
| int in_elements_num1_; | |||
| int out_shape_[10]; | |||
| int out_elements_num_; | |||
| int in_strides0_[10]; | |||
| int in_strides1_[10]; | |||
| int out_strides_[10]; | |||
| int multiples0_[10]; | |||
| int multiples1_[10]; | |||
| } ArithmeticParameter; | |||
| #ifdef __cplusplus | |||
| extern "C" { | |||
| #endif | |||
| void Flatten(const void *input, void *output, const FlattenParameter *flatten_param); | |||
| void CalcMultiplesAndStrides(ArithmeticParameter *param); | |||
| #ifdef __cplusplus | |||
| } | |||
| #endif | |||
| #endif // MINDSPORE_LITE_NNACL_FLATTEN_H_ | |||
| #endif // MINDSPORE_LITE_NNACL_ARTITHMETIC_H_ | |||
| @@ -1,102 +0,0 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "nnacl/arithmetic_common.h" | |||
| #include "nnacl/nnacl_utils.h" | |||
| void TileOneDimension(const float *inData, float *outData, int dim, size_t ndim, const int *inShape, | |||
| const int *inStrides, const int *outStrides, const int *multiple) { | |||
| int srcDimSize = inShape[dim]; | |||
| if (dim == ndim - 1) { | |||
| for (int i = 0; i < multiple[dim]; i++) { | |||
| memcpy(outData, inData, srcDimSize * sizeof(float)); | |||
| outData += srcDimSize; | |||
| } | |||
| return; | |||
| } | |||
| for (size_t i = 0; i < srcDimSize; i++) { | |||
| for (size_t j = 0; j < multiple[dim]; j++) { | |||
| TileOneDimension(inData + inStrides[dim] * i, outData + outStrides[dim] * (i + j * srcDimSize), dim + 1, ndim, | |||
| inShape, inStrides, outStrides, multiple); | |||
| } | |||
| } | |||
| } | |||
| void TileOneDimensionUint8(const uint8_t *inData, uint8_t *outData, int dim, size_t ndim, const int *inShape, | |||
| const int *inStrides, const int *outStrides, const int *multiple) { | |||
| int srcDimSize = inShape[dim]; | |||
| if (dim == ndim - 1) { | |||
| for (int i = 0; i < multiple[dim]; i++) { | |||
| memcpy(outData, inData, srcDimSize * sizeof(uint8_t)); | |||
| outData += srcDimSize; | |||
| } | |||
| return; | |||
| } | |||
| for (size_t i = 0; i < srcDimSize; i++) { | |||
| for (size_t j = 0; j < multiple[dim]; j++) { | |||
| TileOneDimensionUint8(inData + inStrides[dim] * i, outData + outStrides[dim] * (i + j * srcDimSize), dim + 1, | |||
| ndim, inShape, inStrides, outStrides, multiple); | |||
| } | |||
| } | |||
| } | |||
| void ComputeStrides(const int *shape, int *strides, const int ndim) { | |||
| int stride = 1; | |||
| for (int i = ndim - 1; i >= 0; i--) { | |||
| strides[i] = stride; | |||
| stride *= shape[i]; | |||
| } | |||
| } | |||
| void CalcMultiplesAndStrides(ArithmeticParameter *param) { | |||
| NNACL_ASSERT(param->in_shape0_[i] != 0); | |||
| NNACL_ASSERT(param->in_shape1_[i] != 0); | |||
| for (size_t i = 0; i < param->ndim_; i++) { | |||
| param->multiples0_[i] = param->out_shape_[i] / param->in_shape0_[i]; | |||
| param->multiples1_[i] = param->out_shape_[i] / param->in_shape1_[i]; | |||
| } | |||
| // cal strides | |||
| ComputeStrides(param->in_shape0_, param->in_strides0_, param->ndim_); | |||
| ComputeStrides(param->in_shape1_, param->in_strides1_, param->ndim_); | |||
| ComputeStrides(param->out_shape_, param->out_strides_, param->ndim_); | |||
| } | |||
| void TileDimensions(const float *data0, const float *data1, float *tile_data0, float *tile_data1, | |||
| ArithmeticParameter *param) { | |||
| CalcMultiplesAndStrides(param); | |||
| TileOneDimension(data0, tile_data0, 0, param->ndim_, param->in_shape0_, param->in_strides0_, param->out_strides_, | |||
| param->multiples0_); | |||
| TileOneDimension(data1, tile_data1, 0, param->ndim_, param->in_shape1_, param->in_strides1_, param->out_strides_, | |||
| param->multiples1_); | |||
| } | |||
| void TileDimensionsUint8(const uint8_t *data0, const uint8_t *data1, uint8_t *tile_data0, uint8_t *tile_data1, | |||
| ArithmeticParameter *param) { | |||
| CalcMultiplesAndStrides(param); | |||
| TileOneDimensionUint8(data0, tile_data0, 0, param->ndim_, param->in_shape0_, param->in_strides0_, param->out_strides_, | |||
| param->multiples0_); | |||
| TileOneDimensionUint8(data1, tile_data1, 0, param->ndim_, param->in_shape1_, param->in_strides1_, param->out_strides_, | |||
| param->multiples1_); | |||
| } | |||
| void TileDimensionsInt8(const int8_t *data0, const int8_t *data1, int8_t *tile_data0, int8_t *tile_data1, | |||
| ArithmeticParameter *param) { | |||
| CalcMultiplesAndStrides(param); | |||
| TileOneDimensionUint8((uint8_t *)(data0), (uint8_t *)(tile_data0), 0, param->ndim_, param->in_shape0_, | |||
| param->in_strides0_, param->out_strides_, param->multiples0_); | |||
| TileOneDimensionUint8((uint8_t *)(data1), (uint8_t *)(tile_data1), 0, param->ndim_, param->in_shape1_, | |||
| param->in_strides1_, param->out_strides_, param->multiples1_); | |||
| } | |||
| @@ -1,68 +0,0 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_LITE_NNACL_ARITHMETIC_COMMON_H_ | |||
| #define MINDSPORE_LITE_NNACL_ARITHMETIC_COMMON_H_ | |||
| #ifdef ENABLE_NEON | |||
| #include <arm_neon.h> | |||
| #endif | |||
| #include <string.h> | |||
| #include "nnacl/op_base.h" | |||
| #include "nnacl/arithmetic_common.h" | |||
| typedef struct ArithmeticParameter { | |||
| OpParameter op_parameter_; | |||
| bool broadcasting_; | |||
| size_t ndim_; | |||
| int activation_type_; | |||
| int in_shape0_[10]; | |||
| int in_elements_num0_; | |||
| int in_shape1_[10]; | |||
| int in_elements_num1_; | |||
| int out_shape_[10]; | |||
| int out_elements_num_; | |||
| int in_strides0_[10]; | |||
| int in_strides1_[10]; | |||
| int out_strides_[10]; | |||
| int multiples0_[10]; | |||
| int multiples1_[10]; | |||
| } ArithmeticParameter; | |||
| #ifdef __cplusplus | |||
| extern "C" { | |||
| #endif | |||
| void TileOneDimension(const float *inData, float *outData, int dim, size_t ndim, const int *inShape, | |||
| const int *inStrides, const int *outStrides, const int *multiple); | |||
| void ComputeStrides(const int *shape, int *strides, const int ndim); | |||
| void CalcMultiplesAndStrides(ArithmeticParameter *param); | |||
| void TileOneDimensionUint8(const uint8_t *inData, uint8_t *outData, int dim, size_t ndim, const int *inShape, | |||
| const int *inStrides, const int *outStrides, const int *multiple); | |||
| void TileDimensions(const float *data0, const float *data1, float *tile_data0, float *tile_data1, | |||
| ArithmeticParameter *param); | |||
| void TileDimensionsUint8(const uint8_t *data0, const uint8_t *data1, uint8_t *tile_data0, uint8_t *tile_data1, | |||
| ArithmeticParameter *param); | |||
| void TileDimensionsInt8(const int8_t *data0, const int8_t *data1, int8_t *tile_data0, int8_t *tile_data1, | |||
| ArithmeticParameter *param); | |||
| #ifdef __cplusplus | |||
| } | |||
| #endif | |||
| #endif // MINDSPORE_LITE_NNACL_ARITHMETIC_COMMON_H_ | |||
| @@ -1,22 +0,0 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_LITE_NNACL_ARTITHMETIC_PARAMETER_H_ | |||
| #define MINDSPORE_LITE_NNACL_ARTITHMETIC_PARAMETER_H_ | |||
| #include "nnacl/op_attribute.h" | |||
| #endif // MINDSPORE_LITE_NNACL_ARTITHMETIC_PARAMETER_H_ | |||
| @@ -15,7 +15,6 @@ | |||
| */ | |||
| #include "nnacl/batch_to_space.h" | |||
| #include "nnacl/arithmetic_common.h" | |||
| void BatchToSpaceNoCropForNHWC(const void *input, void *output, const int *in_shape, int out_n, const int *block, | |||
| int data_size) { | |||
| @@ -15,6 +15,8 @@ | |||
| */ | |||
| #ifndef MINDSPORE_LITE_NNACL_BATCH_TO_SPACE_H_ | |||
| #define MINDSPORE_LITE_NNACL_BATCH_TO_SPACE_H_ | |||
| #include <string.h> | |||
| #include "nnacl/op_base.h" | |||
| #define BATCH_TO_SPACE_BLOCK_SHAPE_SIZE 2 | |||
| @@ -63,6 +63,14 @@ static inline int GetStride(int *strides, const int *shape, int length) { | |||
| return stride; | |||
| } | |||
| inline void ComputeStrides(const int *shape, int *strides, const int ndim) { | |||
| int stride = 1; | |||
| for (int i = ndim - 1; i >= 0; i--) { | |||
| strides[i] = stride; | |||
| stride *= shape[i]; | |||
| } | |||
| } | |||
| #ifdef ENABLE_ARM64 | |||
| void BiasAdd(const float *bias, float *data, size_t oc4, size_t plan_size); | |||
| void BiasAddRelu6(const float *bias, float *data, size_t oc4, size_t plan_size); | |||
| @@ -16,7 +16,7 @@ | |||
| #include "nnacl/fp16/arithmetic_fp16.h" | |||
| #include <math.h> | |||
| #include "nnacl/arithmetic_common.h" | |||
| #include "nnacl/common_func.h" | |||
| #include "nnacl/nnacl_utils.h" | |||
| void TileOneDimensionFp16(float16_t *inData, float16_t *outData, int dim, size_t ndim, int *inShape, int *inStrides, | |||
| @@ -20,7 +20,7 @@ | |||
| #include <arm_neon.h> | |||
| #endif | |||
| #include "nnacl/op_base.h" | |||
| #include "nnacl/arithmetic_common.h" | |||
| #include "nnacl/arithmetic.h" | |||
| #include "nnacl/errorcode.h" | |||
| #ifdef __cplusplus | |||
| @@ -107,7 +107,7 @@ int ElementMinimumFp16(float16_t *input0, float16_t *input1, float16_t *output, | |||
| int ElementNotEqualFp16(float16_t *input0, float16_t *input1, uint8_t *output, int element_size); | |||
| int ElementEqualFp16(float16_t *input0, float16_t *input1, uint8_t *output, int element_size); | |||
| int ElementLessFp16(float16_t *input0, float16_t *input1, uint8_t *output, int element_size); | |||
| int ElementLessEqual(float16_t *input0, float16_t *input1, uint8_t *output, int element_size); | |||
| int ElementLessEqualFp16(float16_t *input0, float16_t *input1, uint8_t *output, int element_size); | |||
| int ElementGreaterFp16(float16_t *input0, float16_t *input1, uint8_t *output, int element_size); | |||
| int ElementGreaterEqualFp16(float16_t *input0, float16_t *input1, uint8_t *output, int element_size); | |||
| @@ -15,7 +15,7 @@ | |||
| */ | |||
| #include "nnacl/fp16/stack_fp16.h" | |||
| #include "nnacl/arithmetic_common.h" | |||
| #include "nnacl/common_func.h" | |||
| size_t Fp16GetStackCopyNum(int axis, int *in_shape, size_t shape_size) { | |||
| size_t one_input_size = 1; | |||
| @@ -20,144 +20,97 @@ | |||
| #include <arm_neon.h> | |||
| #endif | |||
| #include "nnacl/op_base.h" | |||
| #include "nnacl/arithmetic_common.h" | |||
| #include "nnacl/arithmetic.h" | |||
| #include "nnacl/errorcode.h" | |||
| #ifdef __cplusplus | |||
| extern "C" { | |||
| #endif | |||
| int ElementOptAdd(const float *input0, const float *input1, float *output, const int element_size, | |||
| const ArithmeticParameter *param); | |||
| int ElementOptAddInt(const int *input0, const int *input1, int *output, const int element_size, | |||
| const ArithmeticParameter *param); | |||
| int ElementOptAddRelu(const float *input0, const float *input1, float *output, const int element_size, | |||
| const ArithmeticParameter *param); | |||
| int ElementOptAddRelu6(const float *input0, const float *input1, float *output, const int element_size, | |||
| const ArithmeticParameter *param); | |||
| int ElementOptSub(const float *input0, const float *input1, float *output, const int element_size, | |||
| const ArithmeticParameter *param); | |||
| int ElementOptSubInt(const int *input0, const int *input1, int *output, const int element_size, | |||
| const ArithmeticParameter *param); | |||
| int ElementOptSubRelu(const float *input0, const float *input1, float *output, const int element_size, | |||
| const ArithmeticParameter *param); | |||
| int ElementOptSubRelu6(const float *input0, const float *input1, float *output, const int element_size, | |||
| const ArithmeticParameter *param); | |||
| int ElementOptMul(const float *input0, const float *input1, float *output, const int element_size, | |||
| const ArithmeticParameter *param); | |||
| int ElementOptMulRelu(const float *input0, const float *input1, float *output, const int element_size, | |||
| const ArithmeticParameter *param); | |||
| int ElementOptMulRelu6(const float *input0, const float *input1, float *output, const int element_size, | |||
| const ArithmeticParameter *param); | |||
| int ElementOptMulInt(const int *input0, const int *input1, int *output, const int element_size, | |||
| const ArithmeticParameter *param); | |||
| int ElementOptMulReluInt(const int *input0, const int *input1, int *output, const int element_size, | |||
| const ArithmeticParameter *param); | |||
| int ElementOptMulRelu6Int(const int *input0, const int *input1, int *output, const int element_size, | |||
| const ArithmeticParameter *param); | |||
| int ElementOptDiv(const float *input0, const float *input1, float *output, const int element_size, | |||
| const ArithmeticParameter *param); | |||
| int ElementOptDivRelu(const float *input0, const float *input1, float *output, const int element_size, | |||
| const ArithmeticParameter *param); | |||
| int ElementOptDivRelu6(const float *input0, const float *input1, float *output, const int element_size, | |||
| const ArithmeticParameter *param); | |||
| int ElementOptDivInt(const int *input0, const int *input1, int *output, const int element_size, | |||
| const ArithmeticParameter *param); | |||
| int ElementMul(const float *input0, const float *input1, float *output, const int element_size); | |||
| int ElementMulRelu(const float *input0, const float *input1, float *output, const int element_size); | |||
| int ElementMulRelu6(const float *input0, const float *input1, float *output, const int element_size); | |||
| int ElementMulInt(const int *input0, const int *input1, int *output, const int element_size); | |||
| int ElementMulReluInt(const int *input0, const int *input1, int *output, const int element_size); | |||
| int ElementMulRelu6Int(const int *input0, const int *input1, int *output, const int element_size); | |||
| int BroadcastMul(const float *input0, const float *input1, float *tile_input0, float *tile_input1, float *output, | |||
| int element_size, ArithmeticParameter *param); | |||
| int ElementAdd(const float *input0, const float *input1, float *output, const int element_size); | |||
| int ElementAddRelu(const float *input0, const float *input1, float *output, const int element_size); | |||
| int ElementAddRelu6(const float *input0, const float *input1, float *output, const int element_size); | |||
| int ElementAddInt(const int *input0, const int *input1, int *output, const int element_size); | |||
| int BroadcastAdd(const float *input0, const float *input1, float *tile_input0, float *tile_input1, float *output, | |||
| int element_size, ArithmeticParameter *param); | |||
| int BroadcastAddInt8(const int8_t *input0, const int8_t *input1, int8_t *tile_input0, int8_t *tile_input1, | |||
| int8_t *output, int element_size, ArithmeticParameter *param); | |||
| int ElementSub(const float *input0, const float *input1, float *output, const int element_size); | |||
| int ElementSubInt(const int *input0, const int *input1, int *output, const int element_size); | |||
| int ElementSubRelu(const float *input0, const float *input1, float *output, const int element_size); | |||
| int ElementSubRelu6(const float *input0, const float *input1, float *output, const int element_size); | |||
| int BroadcastSub(const float *input0, const float *input1, float *tile_input0, float *tile_input1, float *output, | |||
| int element_size, ArithmeticParameter *param); | |||
| int ElementDiv(const float *input0, const float *input1, float *output, const int element_size); | |||
| int ElementDivRelu(const float *input0, const float *input1, float *output, const int element_size); | |||
| int ElementDivRelu6(const float *input0, const float *input1, float *output, const int element_size); | |||
| int BroadcastDiv(const float *input0, const float *input1, float *tile_input0, float *tile_input1, float *output, | |||
| int element_size, ArithmeticParameter *param); | |||
| int ElementLogicalAnd(const float *input0, const float *input1, float *output, const int element_size); | |||
| int ElementLogicalAndInt(const int *input0, const int *input1, int *output, const int element_size); | |||
| int ElementLogicalAndBool(const bool *input0, const bool *input1, bool *output, const int element_size); | |||
| int BroadcastLogicalAnd(const float *input0, const float *input1, float *tile_input0, float *tile_input1, float *output, | |||
| int element_size, ArithmeticParameter *param); | |||
| int ElementLogicalOr(const float *input0, const float *input1, float *output, const int element_size); | |||
| int BroadcastLogicalOr(const float *input0, const float *input1, float *tile_input0, float *tile_input1, float *output, | |||
| int element_size, ArithmeticParameter *param); | |||
| int ElementMaximum(const float *input0, const float *input1, float *output, const int element_size); | |||
| int ElementMaximumInt(const int *input0, const int *input1, int *output, const int element_size); | |||
| int BroadcastMaximum(const float *input0, const float *input1, float *tile_input0, float *tile_input1, float *output, | |||
| int element_size, ArithmeticParameter *param); | |||
| int ElementMinimum(const float *input0, const float *input1, float *output, const int element_size); | |||
| int BroadcastMinimum(const float *input0, const float *input1, float *tile_input0, float *tile_input1, float *output, | |||
| int element_size, ArithmeticParameter *param); | |||
| int ElementFloorDiv(const float *input0, const float *input1, float *output, const int element_size); | |||
| int ElementFloorDivInt(const int *input0, const int *input1, int *output, const int element_size); | |||
| int BroadcastFloorDiv(const float *input0, const float *input1, float *tile_input0, float *tile_input1, float *output, | |||
| int element_size, ArithmeticParameter *param); | |||
| int ElementFloorMod(const float *input0, const float *input1, float *output, const int element_size); | |||
| int ElementFloorModInt(const int *input0, const int *input1, int *output, const int element_size); | |||
| int BroadcastFloorMod(const float *input0, const float *input1, float *tile_input0, float *tile_input1, float *output, | |||
| int element_size, ArithmeticParameter *param); | |||
| int ElementMod(const float *input0, const float *input1, float *output, const int element_size); | |||
| int ElementModInt(const int *input0, const int *input1, int *output, const int element_size); | |||
| int ElementOptMod(const float *input0, const float *input1, float *output, const int element_size, | |||
| const ArithmeticParameter *param); | |||
| int ElementOptModInt(const int *input0, const int *input1, int *output, const int element_size, | |||
| const ArithmeticParameter *param); | |||
| int ElementSquaredDifference(const float *input0, const float *input1, float *output, const int element_size); | |||
| int BroadcastSquaredDifference(const float *input0, const float *input1, float *tile_input0, float *tile_input1, | |||
| float *output, int element_size, ArithmeticParameter *param); | |||
| int ElementNotEqual(const float *input0, const float *input1, float *output, const int element_size); | |||
| int BroadcastNotEqual(const float *input0, const float *input1, float *tile_input0, float *tile_input1, float *output, | |||
| int element_size, ArithmeticParameter *param); | |||
| int ElementEqual(const float *input0, const float *input1, float *output, const int element_size); | |||
| int BroadcastEqual(const float *input0, const float *input1, float *tile_input0, float *tile_input1, float *output, | |||
| int element_size, ArithmeticParameter *param); | |||
| int ElementLess(const float *input0, const float *input1, float *output, const int element_size); | |||
| int BroadcastLess(const float *input0, const float *input1, float *tile_input0, float *tile_input1, float *output, | |||
| int element_size, ArithmeticParameter *param); | |||
| int ElementLessEqual(const float *input0, const float *input1, float *output, const int element_size); | |||
| int BroadcastLessEqual(const float *input0, const float *input1, float *tile_input0, float *tile_input1, float *output, | |||
| int element_size, ArithmeticParameter *param); | |||
| int ElementGreater(const float *input0, const float *input1, float *output, const int element_size); | |||
| int BroadcastGreater(const float *input0, const float *input1, float *tile_input0, float *tile_input1, float *output, | |||
| int element_size, ArithmeticParameter *param); | |||
| int ElementGreaterEqual(const float *input0, const float *input1, float *output, const int element_size); | |||
| int BroadcastGreaterEqual(const float *input0, const float *input1, float *tile_input0, float *tile_input1, | |||
| float *output, int element_size, ArithmeticParameter *param); | |||
| void TileOneDimensionFp32(const float *inData, float *outData, int dim, size_t ndim, const int *inShape, | |||
| const int *inStrides, const int *outStrides, const int *multiple); | |||
| void TileDimensionsFp32(const float *data0, const float *data1, float *tile_data0, float *tile_data1, | |||
| ArithmeticParameter *param); | |||
| /* Mul */ | |||
| int ElementMul(const float *in0, const float *in1, float *out, int size); | |||
| int ElementMulRelu(const float *in0, const float *in1, float *out, int size); | |||
| int ElementMulRelu6(const float *in0, const float *in1, float *out, int size); | |||
| int ElementMulInt(const int *in0, const int *in1, int *out, int size); | |||
| int ElementMulReluInt(const int *in0, const int *in1, int *out, int size); | |||
| int ElementMulRelu6Int(const int *in0, const int *in1, int *out, int size); | |||
| int ElementOptMul(const float *in0, const float *in1, float *out, int size, const ArithmeticParameter *param); | |||
| int ElementOptMulRelu(const float *in0, const float *in1, float *out, int size, const ArithmeticParameter *param); | |||
| int ElementOptMulRelu6(const float *in0, const float *in1, float *out, int size, const ArithmeticParameter *param); | |||
| int ElementOptMulInt(const int *in0, const int *in1, int *out, int size, const ArithmeticParameter *param); | |||
| int ElementOptMulReluInt(const int *in0, const int *in1, int *out, int size, const ArithmeticParameter *param); | |||
| int ElementOptMulRelu6Int(const int *in0, const int *in1, int *out, int size, const ArithmeticParameter *param); | |||
| int BroadcastMul(const float *in0, const float *in1, float *tile_in0, float *tile_in1, float *out, int size, | |||
| ArithmeticParameter *param); | |||
| /* Add */ | |||
| int ElementAdd(const float *in0, const float *in1, float *out, int size); | |||
| int ElementAddRelu(const float *in0, const float *in1, float *out, int size); | |||
| int ElementAddRelu6(const float *in0, const float *in1, float *out, int size); | |||
| int ElementAddInt(const int *in0, const int *in1, int *out, int size); | |||
| int ElementOptAdd(const float *in0, const float *in1, float *out, int size, const ArithmeticParameter *param); | |||
| int ElementOptAddInt(const int *in0, const int *in1, int *out, int size, const ArithmeticParameter *param); | |||
| int ElementOptAddRelu(const float *in0, const float *in1, float *out, int size, const ArithmeticParameter *param); | |||
| int ElementOptAddRelu6(const float *in0, const float *in1, float *out, int size, const ArithmeticParameter *param); | |||
| int BroadcastAdd(const float *in0, const float *in1, float *tile_in0, float *tile_in1, float *out, int size, | |||
| ArithmeticParameter *param); | |||
| /* Sub */ | |||
| int ElementSub(const float *in0, const float *in1, float *out, int size); | |||
| int ElementSubInt(const int *in0, const int *in1, int *out, int size); | |||
| int ElementSubRelu(const float *in0, const float *in1, float *out, int size); | |||
| int ElementSubRelu6(const float *in0, const float *in1, float *out, int size); | |||
| int ElementOptSub(const float *in0, const float *in1, float *out, int size, const ArithmeticParameter *param); | |||
| int ElementOptSubRelu(const float *in0, const float *in1, float *out, int size, const ArithmeticParameter *param); | |||
| int ElementOptSubRelu6(const float *in0, const float *in1, float *out, int size, const ArithmeticParameter *param); | |||
| int ElementOptSubInt(const int *in0, const int *in1, int *out, int size, const ArithmeticParameter *param); | |||
| /* Div */ | |||
| int ElementDiv(const float *in0, const float *in1, float *out, int size); | |||
| int ElementDivRelu(const float *in0, const float *in1, float *out, int size); | |||
| int ElementDivRelu6(const float *in0, const float *in1, float *out, int size); | |||
| int ElementOptDiv(const float *in0, const float *in1, float *out, int size, const ArithmeticParameter *param); | |||
| int ElementOptDivRelu(const float *in0, const float *in1, float *out, int size, const ArithmeticParameter *param); | |||
| int ElementOptDivRelu6(const float *in0, const float *in1, float *out, int size, const ArithmeticParameter *param); | |||
| int ElementOptDivInt(const int *in0, const int *in1, int *out, int size, const ArithmeticParameter *param); | |||
| int BroadcastDiv(const float *in0, const float *in1, float *tile_in0, float *tile_in1, float *out, int size, | |||
| ArithmeticParameter *param); | |||
| /* logical and */ | |||
| int ElementLogicalAnd(const float *in0, const float *in1, float *out, int size); | |||
| int ElementLogicalAndInt(const int *in0, const int *in1, int *out, int size); | |||
| int ElementLogicalAndBool(const bool *in0, const bool *in1, bool *out, int size); | |||
| /* logical or */ | |||
| int ElementLogicalOr(const float *in0, const float *in1, float *out, int size); | |||
| /* Element Squared Difference */ | |||
| int ElementSquaredDifference(const float *in0, const float *in1, float *out, int size); | |||
| /* max min */ | |||
| int ElementMaximum(const float *in0, const float *in1, float *out, int size); | |||
| int ElementMinimum(const float *in0, const float *in1, float *out, int size); | |||
| int ElementMaximumInt(const int *in0, const int *in1, int *out, int size); | |||
| int BroadcastMaximum(const float *in0, const float *in1, float *tile_input0, float *tile_input1, float *out, int size, | |||
| ArithmeticParameter *param); | |||
| /* floor div */ | |||
| int ElementFloorDiv(const float *in0, const float *in1, float *out, int size); | |||
| int ElementFloorDivInt(const int *in0, const int *in1, int *out, int size); | |||
| /* floor mod */ | |||
| int ElementFloorMod(const float *in0, const float *in1, float *out, int size); | |||
| int ElementFloorModInt(const int *in0, const int *in1, int *out, int size); | |||
| /* mod */ | |||
| int ElementMod(const float *in0, const float *in1, float *out, int size); | |||
| int ElementModInt(const int *in0, const int *in1, int *out, int size); | |||
| int ElementOptMod(const float *in0, const float *in1, float *out, int size, const ArithmeticParameter *param); | |||
| int ElementOptModInt(const int *in0, const int *in1, int *out, int size, const ArithmeticParameter *param); | |||
| #ifdef ENABLE_NNACL_INFER_SHAPE | |||
| int ArithmeticInferShape(int **in_shape, size_t *dim_size, int *out_shape, int *in_format, int *out_format, | |||
| @@ -14,7 +14,6 @@ | |||
| * limitations under the License. | |||
| */ | |||
| #include "nnacl/fp32/space_to_batch_fp32.h" | |||
| #include "nnacl/arithmetic_common.h" | |||
| void DoSpaceToBatch(const float *input, float *output, const int *in_shape, const int *out_shape, const int *in_stride, | |||
| const int *out_stride, const int *blocks, const int *paddings, int thread, int task_id) { | |||
| @@ -15,6 +15,8 @@ | |||
| */ | |||
| #ifndef MINDSPORE_LITE_SRC_BACKEND_ARM_NNACL_FP32_SPACE_TO_BATCH_H_ | |||
| #define MINDSPORE_LITE_SRC_BACKEND_ARM_NNACL_FP32_SPACE_TO_BATCH_H_ | |||
| #include <string.h> | |||
| #include "nnacl/op_base.h" | |||
| typedef struct SpaceToBatchParameter { | |||
| @@ -14,7 +14,7 @@ | |||
| * limitations under the License. | |||
| */ | |||
| #include "nnacl/fp32/space_to_depth_fp32.h" | |||
| #include "nnacl/arithmetic_common.h" | |||
| #include "nnacl/common_func.h" | |||
| #include "nnacl/errorcode.h" | |||
| #include "nnacl/op_base.h" | |||
| @@ -15,7 +15,7 @@ | |||
| */ | |||
| #include "nnacl/fp32/stack_fp32.h" | |||
| #include "nnacl/arithmetic_common.h" | |||
| #include "nnacl/common_func.h" | |||
| size_t GetStackCopyNum(int axis, const int *in_shape, size_t shape_size) { | |||
| size_t one_input_size = 1; | |||
| @@ -312,3 +312,16 @@ void AddOptInt8(const int8_t *ptr_in, const int8_t element_in, int8_t *output, i | |||
| } | |||
| return; | |||
| } | |||
| int ElementAddInt8(const int8_t *in0, const int8_t *in1, int8_t *out, int size) { | |||
| for (int i = 0; i < size; i++) { | |||
| out[i] = in0[i] + in1[i]; | |||
| } | |||
| return NNACL_OK; | |||
| } | |||
| int BroadcastAddInt8(const int8_t *in0, const int8_t *in1, int8_t *tile_in0, int8_t *tile_in1, int8_t *out, int size, | |||
| ArithmeticParameter *param) { | |||
| TileDimensionsInt8(in0, in1, tile_in0, tile_in1, param); | |||
| return ElementAddInt8(tile_in0, tile_in1, out, size); | |||
| } | |||
| @@ -18,6 +18,9 @@ | |||
| #define MINDSPORE_LITE_NNACL_ADD_INT8_H_ | |||
| #include "nnacl/op_base.h" | |||
| #include "nnacl/errorcode.h" | |||
| #include "nnacl/arithmetic.h" | |||
| #include "nnacl/int8/arithmetic_int8.h" | |||
| typedef struct AddQuantQrgs { | |||
| int32_t zp_; | |||
| @@ -48,6 +51,10 @@ void AddInt8(const int8_t *input0, const int8_t *input1, int8_t *output, int siz | |||
| void AddOptInt8(const int8_t *ptr_in, const int8_t element_in, int8_t *output, int size, AddQuantParameter *params, | |||
| AddQuantQrgs *ptr_args, AddQuantQrgs *ele_args); | |||
| int ElementAddInt8(const int8_t *in0, const int8_t *in1, int8_t *out, int size); | |||
| int BroadcastAddInt8(const int8_t *in0, const int8_t *in1, int8_t *tile_in0, int8_t *tile_in1, int8_t *out, int size, | |||
| ArithmeticParameter *param); | |||
| #ifdef __cplusplus | |||
| } | |||
| #endif | |||
| @@ -20,6 +20,33 @@ | |||
| #endif | |||
| #include "nnacl/errorcode.h" | |||
| void TileOneDimensionInt8(const int8_t *inData, int8_t *outData, int dim, size_t ndim, const int *inShape, | |||
| const int *inStrides, const int *outStrides, const int *multiple) { | |||
| int srcDimSize = inShape[dim]; | |||
| if (dim == ndim - 1) { | |||
| for (int i = 0; i < multiple[dim]; i++) { | |||
| memcpy(outData, inData, srcDimSize * sizeof(int8_t)); | |||
| outData += srcDimSize; | |||
| } | |||
| return; | |||
| } | |||
| for (size_t i = 0; i < srcDimSize; i++) { | |||
| for (size_t j = 0; j < multiple[dim]; j++) { | |||
| TileOneDimensionInt8(inData + inStrides[dim] * i, outData + outStrides[dim] * (i + j * srcDimSize), dim + 1, ndim, | |||
| inShape, inStrides, outStrides, multiple); | |||
| } | |||
| } | |||
| } | |||
| void TileDimensionsInt8(const int8_t *data0, const int8_t *data1, int8_t *tile_data0, int8_t *tile_data1, | |||
| ArithmeticParameter *param) { | |||
| CalcMultiplesAndStrides(param); | |||
| TileOneDimensionInt8(data0, tile_data0, 0, param->ndim_, param->in_shape0_, param->in_strides0_, param->out_strides_, | |||
| param->multiples0_); | |||
| TileOneDimensionInt8(data1, tile_data1, 0, param->ndim_, param->in_shape1_, param->in_strides1_, param->out_strides_, | |||
| param->multiples1_); | |||
| } | |||
| #define ACCURACY_DATA 0.00000001 | |||
| int ElementNotEqualInt8(int8_t *input0, int8_t *input1, uint8_t *output, int element_size, | |||
| @@ -17,11 +17,17 @@ | |||
| #define MINDSPORE_LITE_NNACL_INT8_ARITHMETIC_INT8_H_ | |||
| #include "nnacl/op_base.h" | |||
| #include "nnacl/arithmetic.h" | |||
| #include "nnacl/quantization/quantize.h" | |||
| #ifdef __cplusplus | |||
| extern "C" { | |||
| #endif | |||
| void TileOneDimensionInt8(const int8_t *inData, int8_t *outData, int dim, size_t ndim, const int *inShape, | |||
| const int *inStrides, const int *outStrides, const int *multiple); | |||
| void TileDimensionsInt8(const int8_t *data0, const int8_t *data1, int8_t *tile_data0, int8_t *tile_data1, | |||
| ArithmeticParameter *param); | |||
| int ElementNotEqualInt8(int8_t *input0, int8_t *input1, uint8_t *output, int element_size, | |||
| ArithmeticQuantArg *quant_arg); | |||
| @@ -14,7 +14,7 @@ | |||
| * limitations under the License. | |||
| */ | |||
| #include "nnacl/int8/space_to_batch_int8.h" | |||
| #include "nnacl/arithmetic_common.h" | |||
| #include "nnacl/common_func.h" | |||
| void DoSpaceToBatchNHWCInt8(const int8_t *input, int8_t *output, const int *block_sizes, const int *in_shape, | |||
| const int *out_shape) { | |||
| @@ -16,7 +16,7 @@ | |||
| #include "nnacl/reverse_sequence.h" | |||
| #include <string.h> | |||
| #include "nnacl/arithmetic_common.h" | |||
| #include "nnacl/common_func.h" | |||
| void ReverseSequence(float *input0, const void *input1, float *output, ReverseSequenceParameter *para) { | |||
| (void)memcpy(output, input0, para->total_data_size_); | |||
| @@ -21,7 +21,7 @@ | |||
| #include <set> | |||
| #include <cmath> | |||
| #include "src/ops/primitive_c.h" | |||
| #include "nnacl/arithmetic_common.h" | |||
| #include "nnacl/arithmetic.h" | |||
| namespace mindspore { | |||
| namespace lite { | |||
| @@ -17,7 +17,7 @@ | |||
| #include "src/ops/add.h" | |||
| #include "src/ops/primitive_c.h" | |||
| #include "src/ops/populate/populate_register.h" | |||
| #include "nnacl/arithmetic_common.h" | |||
| #include "nnacl/arithmetic.h" | |||
| #include "src/ops/populate/arithmetic_populate.h" | |||
| namespace mindspore { | |||
| @@ -16,7 +16,7 @@ | |||
| #include "src/ops/primitive_c.h" | |||
| #include "src/ops/populate/populate_register.h" | |||
| #include "nnacl/arithmetic_common.h" | |||
| #include "nnacl/arithmetic.h" | |||
| namespace mindspore { | |||
| namespace lite { | |||
| @@ -16,7 +16,7 @@ | |||
| #include "src/ops/primitive_c.h" | |||
| #include "src/ops/populate/populate_register.h" | |||
| #include "nnacl/arithmetic_common.h" | |||
| #include "nnacl/arithmetic.h" | |||
| namespace mindspore { | |||
| namespace lite { | |||
| @@ -16,18 +16,17 @@ | |||
| #include "src/ops/primitive_c.h" | |||
| #include "src/ops/populate/populate_register.h" | |||
| #include "nnacl/flatten.h" | |||
| namespace mindspore { | |||
| namespace lite { | |||
| OpParameter *PopulateFlattenParameter(const mindspore::lite::PrimitiveC *primitive) { | |||
| FlattenParameter *flatten_param = reinterpret_cast<FlattenParameter *>(malloc(sizeof(FlattenParameter))); | |||
| OpParameter *flatten_param = reinterpret_cast<OpParameter *>(malloc(sizeof(OpParameter))); | |||
| if (flatten_param == nullptr) { | |||
| MS_LOG(ERROR) << "malloc FlattenParameter failed."; | |||
| return nullptr; | |||
| } | |||
| memset(flatten_param, 0, sizeof(FlattenParameter)); | |||
| flatten_param->op_parameter_.type_ = primitive->Type(); | |||
| memset(flatten_param, 0, sizeof(OpParameter)); | |||
| flatten_param->type_ = primitive->Type(); | |||
| return reinterpret_cast<OpParameter *>(flatten_param); | |||
| } | |||
| @@ -15,7 +15,7 @@ | |||
| */ | |||
| #include "src/ops/mul.h" | |||
| #include "nnacl/arithmetic_common.h" | |||
| #include "nnacl/arithmetic.h" | |||
| #include "src/ops/primitive_c.h" | |||
| #include "src/ops/populate/populate_register.h" | |||
| #include "src/ops/populate/arithmetic_populate.h" | |||
| @@ -23,13 +23,13 @@ namespace mindspore { | |||
| namespace lite { | |||
| OpParameter *PopulateSqueezeParameter(const mindspore::lite::PrimitiveC *primitive) { | |||
| SqueezeParameter *squeeze_param = reinterpret_cast<SqueezeParameter *>(malloc(sizeof(SqueezeParameter))); | |||
| OpParameter *squeeze_param = reinterpret_cast<OpParameter *>(malloc(sizeof(OpParameter))); | |||
| if (squeeze_param == nullptr) { | |||
| MS_LOG(ERROR) << "malloc SqueezeParameter failed."; | |||
| return nullptr; | |||
| } | |||
| memset(squeeze_param, 0, sizeof(SqueezeParameter)); | |||
| squeeze_param->op_parameter_.type_ = primitive->Type(); | |||
| memset(squeeze_param, 0, sizeof(OpParameter)); | |||
| squeeze_param->type_ = primitive->Type(); | |||
| return reinterpret_cast<OpParameter *>(squeeze_param); | |||
| } | |||
| Registry SqueezeParameterRegistry(schema::PrimitiveType_Squeeze, PopulateSqueezeParameter); | |||
| @@ -17,7 +17,7 @@ | |||
| #include "src/ops/sub.h" | |||
| #include "src/ops/primitive_c.h" | |||
| #include "src/ops/populate/populate_register.h" | |||
| #include "nnacl/arithmetic_common.h" | |||
| #include "nnacl/arithmetic.h" | |||
| #include "src/ops/populate/arithmetic_populate.h" | |||
| namespace mindspore { | |||
| @@ -16,7 +16,7 @@ | |||
| #include "src/runtime/kernel/arm/base/depth_to_space_base.h" | |||
| #include "nnacl/depth_to_space.h" | |||
| #include "src/runtime/kernel/arm/fp32/depth_to_space_fp32.h" | |||
| #include "nnacl/arithmetic_common.h" | |||
| #include "nnacl/common_func.h" | |||
| #include "schema/model_generated.h" | |||
| #include "src/kernel_registry.h" | |||
| #include "include/errorcode.h" | |||
| @@ -39,7 +39,7 @@ ARITHMETIC_COMPARE_FUNC_INFO_FP16 arithmetic_cp_fun_table_fp16[] = { | |||
| {PrimitiveType_NotEqual, schema::ActivationType_NO_ACTIVATION, ElementNotEqualFp16, ElementOptNotEqualFp16}, | |||
| {PrimitiveType_Equal, schema::ActivationType_NO_ACTIVATION, ElementEqualFp16, ElementOptEqualFp16}, | |||
| {PrimitiveType_Less, schema::ActivationType_NO_ACTIVATION, ElementLessFp16, ElementOptLessFp16}, | |||
| {PrimitiveType_LessEqual, schema::ActivationType_NO_ACTIVATION, ElementLessEqual, ElementOptLessEqualFp16}, | |||
| {PrimitiveType_LessEqual, schema::ActivationType_NO_ACTIVATION, ElementLessEqualFp16, ElementOptLessEqualFp16}, | |||
| {PrimitiveType_Greater, schema::ActivationType_NO_ACTIVATION, ElementGreaterFp16, ElementOptGreaterFp16}, | |||
| {PrimitiveType_GreaterEqual, schema::ActivationType_NO_ACTIVATION, ElementGreaterEqualFp16, | |||
| ElementOptGreaterEqualFp16}}; | |||
| @@ -19,7 +19,7 @@ | |||
| #include <vector> | |||
| #include "include/errorcode.h" | |||
| #include "nnacl/fp32/arg_min_max_fp32.h" | |||
| #include "nnacl/arithmetic_common.h" | |||
| #include "nnacl/common_func.h" | |||
| #include "src/lite_kernel.h" | |||
| namespace mindspore::kernel { | |||
| @@ -29,7 +29,6 @@ using mindspore::lite::RET_OK; | |||
| using mindspore::schema::PrimitiveType_Eltwise; | |||
| namespace mindspore::kernel { | |||
| ArithmeticCPUKernel::~ArithmeticCPUKernel() { | |||
| FreeTmpPtr(); | |||
| return; | |||
| @@ -72,9 +71,10 @@ int ArithmeticCPUKernel::InitBroadCastCase() { | |||
| if (input0_ptr_ == nullptr) { | |||
| return RET_ERROR; | |||
| } | |||
| TileOneDimension(reinterpret_cast<float *>(in_tensors_[0]->data_c()), reinterpret_cast<float *>(input0_ptr_), 0, | |||
| arithmeticParameter_->ndim_, arithmeticParameter_->in_shape0_, arithmeticParameter_->in_strides0_, | |||
| arithmeticParameter_->out_strides_, arithmeticParameter_->multiples0_); | |||
| TileOneDimensionFp32(reinterpret_cast<float *>(in_tensors_[0]->data_c()), reinterpret_cast<float *>(input0_ptr_), 0, | |||
| arithmeticParameter_->ndim_, arithmeticParameter_->in_shape0_, | |||
| arithmeticParameter_->in_strides0_, arithmeticParameter_->out_strides_, | |||
| arithmeticParameter_->multiples0_); | |||
| arithmeticParameter_->broadcasting_ = false; | |||
| input0_broadcast_ = true; | |||
| } | |||
| @@ -85,9 +85,10 @@ int ArithmeticCPUKernel::InitBroadCastCase() { | |||
| FreeTmpPtr(); | |||
| return RET_ERROR; | |||
| } | |||
| TileOneDimension(reinterpret_cast<float *>(in_tensors_[1]->data_c()), reinterpret_cast<float *>(input1_ptr_), 0, | |||
| arithmeticParameter_->ndim_, arithmeticParameter_->in_shape1_, arithmeticParameter_->in_strides1_, | |||
| arithmeticParameter_->out_strides_, arithmeticParameter_->multiples1_); | |||
| TileOneDimensionFp32(reinterpret_cast<float *>(in_tensors_[1]->data_c()), reinterpret_cast<float *>(input1_ptr_), 0, | |||
| arithmeticParameter_->ndim_, arithmeticParameter_->in_shape1_, | |||
| arithmeticParameter_->in_strides1_, arithmeticParameter_->out_strides_, | |||
| arithmeticParameter_->multiples1_); | |||
| arithmeticParameter_->broadcasting_ = false; | |||
| input1_broadcast_ = true; | |||
| } | |||
| @@ -18,7 +18,6 @@ | |||
| #include <vector> | |||
| #include "include/errorcode.h" | |||
| #include "nnacl/arithmetic_common.h" | |||
| #include "nnacl/depth_to_space.h" | |||
| #include "src/runtime/kernel/arm/base/depth_to_space_base.h" | |||
| @@ -17,7 +17,6 @@ | |||
| #include "src/runtime/kernel/arm/fp32/flatten_fp32.h" | |||
| #include "schema/model_generated.h" | |||
| #include "src/kernel_registry.h" | |||
| #include "nnacl/flatten.h" | |||
| #include "include/errorcode.h" | |||
| using mindspore::kernel::KERNEL_ARCH::kCPU; | |||
| @@ -34,19 +33,12 @@ int FlattenCPUKernel::Init() { | |||
| return ReSize(); | |||
| } | |||
| int FlattenCPUKernel::ReSize() { | |||
| auto output_shape = out_tensors_.at(0)->shape(); | |||
| flatten_param_->size = sizeof(float); | |||
| for (size_t i = 0; i < output_shape.size(); i++) { | |||
| flatten_param_->size *= output_shape.at(i); | |||
| } | |||
| return RET_OK; | |||
| } | |||
| int FlattenCPUKernel::ReSize() { return RET_OK; } | |||
| int FlattenCPUKernel::Run() { | |||
| auto input = reinterpret_cast<float *>(in_tensors_.at(0)->MutableData()); | |||
| auto output = reinterpret_cast<float *>(out_tensors_.at(0)->MutableData()); | |||
| Flatten(input, output, flatten_param_); | |||
| auto input = in_tensors_.at(0); | |||
| auto output = out_tensors_.at(0); | |||
| memcpy(output->data_c(), input->data_c(), output->Size()); | |||
| return RET_OK; | |||
| } | |||
| @@ -18,9 +18,7 @@ | |||
| #include <vector> | |||
| #include "src/lite_kernel.h" | |||
| #include "include/context.h" | |||
| #include "nnacl/flatten.h" | |||
| using mindspore::lite::InnerContext; | |||
| @@ -30,17 +28,12 @@ class FlattenCPUKernel : public LiteKernel { | |||
| FlattenCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs, | |||
| const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx, | |||
| const mindspore::lite::PrimitiveC *primitive) | |||
| : LiteKernel(parameter, inputs, outputs, ctx, primitive) { | |||
| flatten_param_ = reinterpret_cast<FlattenParameter *>(parameter); | |||
| } | |||
| : LiteKernel(parameter, inputs, outputs, ctx, primitive) {} | |||
| ~FlattenCPUKernel() override = default; | |||
| int Init() override; | |||
| int ReSize() override; | |||
| int Run() override; | |||
| private: | |||
| FlattenParameter *flatten_param_; | |||
| }; | |||
| } // namespace mindspore::kernel | |||
| @@ -19,7 +19,7 @@ | |||
| #include <vector> | |||
| #include "src/lite_kernel.h" | |||
| #include "nnacl/fp32/space_to_batch_fp32.h" | |||
| #include "nnacl/arithmetic_common.h" | |||
| #include "nnacl/common_func.h" | |||
| namespace mindspore::kernel { | |||
| class SpaceToBatchCPUKernel : public LiteKernel { | |||
| @@ -15,9 +15,6 @@ | |||
| */ | |||
| #include "src/runtime/kernel/arm/int8/add_int8.h" | |||
| #include <limits> | |||
| #include <algorithm> | |||
| #include "nnacl/arithmetic_common.h" | |||
| #include "nnacl/quantization/quantize.h" | |||
| #include "src/runtime/runtime_api.h" | |||
| #include "src/kernel_registry.h" | |||
| @@ -17,9 +17,11 @@ | |||
| #define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_INT8_ADD_INT8_H_ | |||
| #include <vector> | |||
| #include <limits> | |||
| #include <algorithm> | |||
| #include "src/lite_kernel.h" | |||
| #include "nnacl/int8/add_int8.h" | |||
| #include "nnacl/arithmetic_common.h" | |||
| #include "nnacl/arithmetic.h" | |||
| #include "src/runtime/runtime_api.h" | |||
| namespace mindspore::kernel { | |||
| @@ -19,7 +19,7 @@ | |||
| #include <vector> | |||
| #include "nnacl/quantization/quantize.h" | |||
| #include "nnacl/int8/arg_min_max_int8.h" | |||
| #include "nnacl/arithmetic_common.h" | |||
| #include "nnacl/common_func.h" | |||
| #include "include/errorcode.h" | |||
| #include "src/lite_kernel.h" | |||
| @@ -17,7 +17,7 @@ | |||
| #include "src/runtime/kernel/arm/int8/arithmetic_int8.h" | |||
| #include "src/runtime/kernel/arm/int8/add_int8.h" | |||
| #include "src/runtime/kernel/arm/int8/mul_int8.h" | |||
| #include "nnacl/arithmetic_common.h" | |||
| #include "nnacl/arithmetic.h" | |||
| #include "schema/model_generated.h" | |||
| #include "src/kernel_registry.h" | |||
| #include "src/runtime/runtime_api.h" | |||
| @@ -18,8 +18,9 @@ | |||
| #include <vector> | |||
| #include "src/lite_kernel.h" | |||
| #include "nnacl/fp32/unique_fp32.h" | |||
| #include "nnacl/arithmetic_common.h" | |||
| #include "nnacl/arithmetic.h" | |||
| #include "nnacl/int8/add_int8.h" | |||
| #include "nnacl/int8/arithmetic_int8.h" | |||
| namespace mindspore::kernel { | |||
| class BiasAddInt8CPUKernel : public LiteKernel { | |||
| @@ -27,7 +27,6 @@ | |||
| #include "nnacl/int8/matmul_int8.h" | |||
| #include "src/runtime/kernel/arm/base/layout_transform.h" | |||
| #include "src/runtime/kernel/arm/base/convolution_base.h" | |||
| #include "nnacl/arithmetic_common.h" | |||
| namespace mindspore::kernel { | |||
| class DeConvInt8CPUKernel : public ConvolutionBaseCPUKernel { | |||
| @@ -17,7 +17,7 @@ | |||
| #include "src/runtime/kernel/arm/int8/div_int8.h" | |||
| #include <limits> | |||
| #include <algorithm> | |||
| #include "nnacl/arithmetic_common.h" | |||
| #include "nnacl/int8/arithmetic_int8.h" | |||
| #include "src/runtime/runtime_api.h" | |||
| #include "src/kernel_registry.h" | |||
| #include "include/errorcode.h" | |||
| @@ -114,9 +114,9 @@ int DivInt8CPUKernel::Run() { | |||
| tile1_data_ = nullptr; | |||
| return RET_ERROR; | |||
| } | |||
| TileDimensionsUint8(static_cast<uint8_t *>(in_tensors_.at(0)->MutableData()), | |||
| static_cast<uint8_t *>(in_tensors_.at(1)->MutableData()), | |||
| reinterpret_cast<uint8_t *>(tile0_data_), reinterpret_cast<uint8_t *>(tile1_data_), &tile_para); | |||
| TileDimensionsInt8(static_cast<int8_t *>(in_tensors_.at(0)->MutableData()), | |||
| static_cast<int8_t *>(in_tensors_.at(1)->MutableData()), reinterpret_cast<int8_t *>(tile0_data_), | |||
| reinterpret_cast<int8_t *>(tile1_data_), &tile_para); | |||
| } | |||
| auto ret = ParallelLaunch(this->context_->thread_pool_, DivInt8Run, this, op_parameter_->thread_num_); | |||
| if (broadcast_) { | |||
| @@ -15,10 +15,6 @@ | |||
| */ | |||
| #include "src/runtime/kernel/arm/int8/mul_int8.h" | |||
| #include <limits> | |||
| #include <algorithm> | |||
| #include "nnacl/arithmetic_common.h" | |||
| #include "nnacl/int8/mul_int8.h" | |||
| #include "src/runtime/runtime_api.h" | |||
| #include "src/kernel_registry.h" | |||
| #include "include/errorcode.h" | |||
| @@ -17,9 +17,12 @@ | |||
| #define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_INT8_MUL_INT8_H_ | |||
| #include <vector> | |||
| #include <limits> | |||
| #include <algorithm> | |||
| #include "src/lite_kernel.h" | |||
| #include "nnacl/mul_parameter.h" | |||
| #include "nnacl/arithmetic_common.h" | |||
| #include "nnacl/int8/mul_int8.h" | |||
| #include "nnacl/int8/arithmetic_int8.h" | |||
| #include "src/runtime/runtime_api.h" | |||
| namespace mindspore::kernel { | |||
| @@ -15,11 +15,6 @@ | |||
| */ | |||
| #include "src/runtime/kernel/arm/int8/scale_int8.h" | |||
| #include <string.h> | |||
| #include <vector> | |||
| #include "nnacl/int8/scale_int8.h" | |||
| #include "nnacl/arithmetic_common.h" | |||
| #include "schema/model_generated.h" | |||
| #include "src/kernel_registry.h" | |||
| #include "include/errorcode.h" | |||
| @@ -66,9 +61,9 @@ int ScaleInt8CPUKernel::InitScaleOffset() { | |||
| return RET_ERROR; | |||
| } | |||
| malloced_scale_ = true; | |||
| TileOneDimensionUint8(reinterpret_cast<uint8_t *>(in_tensors_.at(1)->data_c()), | |||
| reinterpret_cast<uint8_t *>(input1_data_), 0, tile_para->ndim_, tile_para->in_shape1_, | |||
| tile_para->in_strides1_, tile_para->out_strides_, tile_para->multiples1_); | |||
| TileOneDimensionInt8(reinterpret_cast<int8_t *>(in_tensors_.at(1)->data_c()), | |||
| reinterpret_cast<int8_t *>(input1_data_), 0, tile_para->ndim_, tile_para->in_shape1_, | |||
| tile_para->in_strides1_, tile_para->out_strides_, tile_para->multiples1_); | |||
| } | |||
| } | |||
| @@ -93,9 +88,9 @@ int ScaleInt8CPUKernel::InitScaleOffset() { | |||
| return RET_ERROR; | |||
| } | |||
| malloced_offset_ = true; | |||
| TileOneDimensionUint8(reinterpret_cast<uint8_t *>(in_tensors_.at(2)->data_c()), | |||
| reinterpret_cast<uint8_t *>(input2_data_), 0, tile_para->ndim_, tile_para->in_shape1_, | |||
| tile_para->in_strides1_, tile_para->out_strides_, tile_para->multiples1_); | |||
| TileOneDimensionInt8(reinterpret_cast<int8_t *>(in_tensors_.at(2)->data_c()), | |||
| reinterpret_cast<int8_t *>(input2_data_), 0, tile_para->ndim_, tile_para->in_shape1_, | |||
| tile_para->in_strides1_, tile_para->out_strides_, tile_para->multiples1_); | |||
| } | |||
| } | |||
| } | |||
| @@ -305,9 +300,9 @@ int ScaleInt8CPUKernel::Run() { | |||
| MS_LOG(ERROR) << "malloc input1_data_ failed."; | |||
| return RET_ERROR; | |||
| } | |||
| TileOneDimensionUint8(reinterpret_cast<uint8_t *>(in_tensors_.at(1)->data_c()), | |||
| reinterpret_cast<uint8_t *>(input1_data_), 0, tile_para->ndim_, tile_para->in_shape1_, | |||
| tile_para->in_strides1_, tile_para->out_strides_, tile_para->multiples1_); | |||
| TileOneDimensionInt8(reinterpret_cast<int8_t *>(in_tensors_.at(1)->data_c()), | |||
| reinterpret_cast<int8_t *>(input1_data_), 0, tile_para->ndim_, tile_para->in_shape1_, | |||
| tile_para->in_strides1_, tile_para->out_strides_, tile_para->multiples1_); | |||
| } | |||
| // If has bias, bias is passed by previous node case, need do broadcasting online | |||
| @@ -319,9 +314,9 @@ int ScaleInt8CPUKernel::Run() { | |||
| input1_data_ = nullptr; | |||
| return RET_ERROR; | |||
| } | |||
| TileOneDimensionUint8(reinterpret_cast<uint8_t *>(in_tensors_.at(2)->data_c()), | |||
| reinterpret_cast<uint8_t *>(input2_data_), 0, tile_para->ndim_, tile_para->in_shape1_, | |||
| tile_para->in_strides1_, tile_para->out_strides_, tile_para->multiples1_); | |||
| TileOneDimensionInt8(reinterpret_cast<int8_t *>(in_tensors_.at(2)->data_c()), | |||
| reinterpret_cast<int8_t *>(input2_data_), 0, tile_para->ndim_, tile_para->in_shape1_, | |||
| tile_para->in_strides1_, tile_para->out_strides_, tile_para->multiples1_); | |||
| } | |||
| auto ret = ParallelLaunch(this->context_->thread_pool_, ScaleRunInt8, this, op_parameter_->thread_num_); | |||
| @@ -17,14 +17,15 @@ | |||
| #ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_INT8_SCALE_INT8_H_ | |||
| #define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_INT8_SCALE_INT8_H_ | |||
| #include <string.h> | |||
| #include <vector> | |||
| #include "src/lite_kernel.h" | |||
| #include "nnacl/scale.h" | |||
| #include "nnacl/quantization/quantize.h" | |||
| #include "nnacl/arithmetic_common.h" | |||
| #include "nnacl/int8/arithmetic_int8.h" | |||
| #include "nnacl/int8/scale_int8.h" | |||
| namespace mindspore::kernel { | |||
| class ScaleInt8CPUKernel : public LiteKernel { | |||
| public: | |||
| ScaleInt8CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs, | |||
| @@ -15,10 +15,6 @@ | |||
| */ | |||
| #include "src/runtime/kernel/arm/int8/sub_int8.h" | |||
| #include <limits> | |||
| #include <algorithm> | |||
| #include "nnacl/arithmetic_common.h" | |||
| #include "nnacl/quantization/quantize.h" | |||
| #include "src/runtime/runtime_api.h" | |||
| #include "src/kernel_registry.h" | |||
| #include "include/errorcode.h" | |||
| @@ -29,7 +25,6 @@ using mindspore::lite::RET_OK; | |||
| using mindspore::schema::PrimitiveType_Sub; | |||
| namespace mindspore::kernel { | |||
| int SubInt8CPUKernel::Init() { | |||
| lite::Tensor *input0 = in_tensors_.at(0); | |||
| lite::Tensor *input1 = in_tensors_.at(1); | |||
| @@ -142,9 +137,9 @@ int SubInt8CPUKernel::Run() { | |||
| context_->allocator->Free(tile0_data_); | |||
| return RET_ERROR; | |||
| } | |||
| TileDimensionsUint8(static_cast<uint8_t *>(in_tensors_.at(0)->MutableData()), | |||
| static_cast<uint8_t *>(in_tensors_.at(1)->MutableData()), | |||
| reinterpret_cast<uint8_t *>(tile0_data_), reinterpret_cast<uint8_t *>(tile1_data_), &tile_para); | |||
| TileDimensionsInt8(static_cast<int8_t *>(in_tensors_.at(0)->data_c()), | |||
| static_cast<int8_t *>(in_tensors_.at(1)->data_c()), reinterpret_cast<int8_t *>(tile0_data_), | |||
| reinterpret_cast<int8_t *>(tile1_data_), &tile_para); | |||
| } | |||
| auto ret = ParallelLaunch(this->context_->thread_pool_, SubInt8Run, this, op_parameter_->thread_num_); | |||
| if (broadcast_) { | |||
| @@ -17,6 +17,10 @@ | |||
| #define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_INT8_SUB_INT8_H_ | |||
| #include <vector> | |||
| #include <limits> | |||
| #include <algorithm> | |||
| #include "nnacl/int8/arithmetic_int8.h" | |||
| #include "nnacl/quantization/quantize.h" | |||
| #include "src/lite_kernel.h" | |||
| #include "nnacl/int8/sub_int8.h" | |||
| #include "src/runtime/runtime_api.h" | |||
| @@ -18,7 +18,6 @@ | |||
| #define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_NPU_ELTWISE_NPU_H_ | |||
| #include <vector> | |||
| #include "src/ops/eltwise.h" | |||
| #include "nnacl/arithmetic_common.h" | |||
| #include "src/runtime/kernel/npu/npu_kernel.h" | |||
| #include "include/graph/op/all_ops.h" | |||
| namespace mindspore::kernel { | |||
| @@ -19,7 +19,6 @@ | |||
| #include <vector> | |||
| #include "nnacl/resize_parameter.h" | |||
| #include "src/ops/resize.h" | |||
| #include "nnacl/arithmetic_common.h" | |||
| #include "src/runtime/kernel/npu/npu_kernel.h" | |||
| #include "include/graph/op/all_ops.h" | |||
| namespace mindspore::kernel { | |||
| @@ -30,7 +30,7 @@ | |||
| #include "src/ops/power_grad.h" | |||
| #include "nnacl/power_parameter.h" | |||
| #include "src/ops/bias_grad.h" | |||
| #include "nnacl/arithmetic_common.h" | |||
| #include "nnacl/arithmetic.h" | |||
| #include "nnacl/fp32_grad/optimizer.h" | |||
| #include "src/ops/apply_momentum.h" | |||
| #include "src/ops/sgd.h" | |||
| @@ -16,7 +16,7 @@ | |||
| #include "src/common/log_adapter.h" | |||
| #include "common/common_test.h" | |||
| #include "mindspore/lite/nnacl/batch_to_space.h" | |||
| #include "mindspore/lite/nnacl/arithmetic_common.h" | |||
| #include "mindspore/lite/nnacl/common_func.h" | |||
| namespace mindspore { | |||
| @@ -16,7 +16,7 @@ | |||
| #include "src/common/log_adapter.h" | |||
| #include "common/common_test.h" | |||
| #include "mindspore/lite/nnacl/depth_to_space.h" | |||
| #include "mindspore/lite/nnacl/arithmetic_common.h" | |||
| #include "mindspore/lite/nnacl/common_func.h" | |||
| namespace mindspore { | |||
| @@ -1,77 +0,0 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include <iostream> | |||
| #include <memory> | |||
| #include "schema/inner/model_generated.h" | |||
| #include "common/common_test.h" | |||
| #include "mindspore/lite/src/runtime/kernel/arm/int8/bias_add_int8.h" | |||
| #include "mindspore/lite/src/kernel_registry.h" | |||
| using mindspore::lite::DeviceType; | |||
| namespace mindspore { | |||
| class TestBiasAddInt8 : public mindspore::CommonTest { | |||
| public: | |||
| TestBiasAddInt8() {} | |||
| }; | |||
| TEST_F(TestBiasAddInt8, BiasAdd) { | |||
| lite::Tensor in_tensor0(kNumberTypeInt8, {1, 2, 3, 2}); | |||
| lite::Tensor in_tensor1(kNumberTypeInt8, {2}); | |||
| lite::Tensor out_tensor(kNumberTypeInt8, {1, 2, 3, 2}); | |||
| int8_t input_data0[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}; | |||
| int8_t input_data1[] = {1, 1}; | |||
| int8_t output_data[12] = {0}; | |||
| in_tensor0.set_data(input_data0); | |||
| in_tensor1.set_data(input_data1); | |||
| out_tensor.set_data(output_data); | |||
| std::vector<lite::Tensor *> inputs = {&in_tensor0, &in_tensor1}; | |||
| std::vector<lite::Tensor *> outputs = {&out_tensor}; | |||
| ArithmeticParameter parameter = {}; | |||
| int dims[] = {1, 2, 3, 4}; | |||
| parameter.ndim_ = 4; | |||
| for (int i = 0; i < 4; i++) { | |||
| parameter.in_shape0_[i] = dims[i]; | |||
| parameter.in_shape1_[i] = 1; | |||
| parameter.out_shape_[i] = dims[i]; | |||
| } | |||
| parameter.in_shape1_[3] = dims[3]; | |||
| kernel::KernelKey desc = {kernel::KERNEL_ARCH::kCPU, kNumberTypeInt8, schema::PrimitiveType_BiasAdd}; | |||
| auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); | |||
| EXPECT_NE(creator, nullptr); | |||
| auto ctx = std::make_shared<lite::InnerContext>(); | |||
| ASSERT_EQ(lite::RET_OK, ctx->Init()); | |||
| auto kernel = creator(inputs, outputs, reinterpret_cast<OpParameter *>(¶meter), ctx.get(), desc, nullptr); | |||
| EXPECT_NE(kernel, nullptr); | |||
| auto ret = kernel->Run(); | |||
| EXPECT_EQ(0, ret); | |||
| float expect[] = {2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13}; | |||
| for (int i = 0; i < 12; ++i) { | |||
| EXPECT_EQ(output_data[i], expect[i]); | |||
| } | |||
| in_tensor0.set_data(nullptr); | |||
| in_tensor1.set_data(nullptr); | |||
| out_tensor.set_data(nullptr); | |||
| } | |||
| } // namespace mindspore | |||
| @@ -14,7 +14,7 @@ | |||
| * limitations under the License. | |||
| */ | |||
| #include "ut/src/runtime/kernel/opencl/common.h" | |||
| #include "nnacl/arithmetic_common.h" | |||
| #include "nnacl/arithmetic.h" | |||
| namespace mindspore::lite::opencl::test { | |||