From eaaf92a5345bf0f46e402e3cc6c8738136468e1c Mon Sep 17 00:00:00 2001 From: zhaodezan Date: Tue, 13 Oct 2020 22:44:47 -0400 Subject: [PATCH] floormod and Add of arithmetic support int type --- mindspore/lite/nnacl/fp32/arithmetic.c | 46 +++++++++++++++++++ mindspore/lite/nnacl/fp32/arithmetic.h | 3 ++ .../src/runtime/kernel/arm/fp32/arithmetic.cc | 4 ++ .../src/runtime/kernel/arm/fp32/arithmetic.h | 2 + 4 files changed, 55 insertions(+) diff --git a/mindspore/lite/nnacl/fp32/arithmetic.c b/mindspore/lite/nnacl/fp32/arithmetic.c index 05dc3339ba..2b3fcff28d 100644 --- a/mindspore/lite/nnacl/fp32/arithmetic.c +++ b/mindspore/lite/nnacl/fp32/arithmetic.c @@ -348,6 +348,38 @@ int ElementOptAdd(float *input0, float *input1, float *output, int element_size, return NNACL_OK; } +int ElementOptAddInt(int *input0, int *input1, int *output, int element_size, ArithmeticParameter *param) { +#ifdef ENABLE_NEON + int32x4_t vin0_opt = vdupq_n_s32(input0[0]); + int32x4_t vin1_opt = vdupq_n_s32(input1[0]); +#endif + int index = 0; + if (param->in_elements_num0_ == 1) { +#ifdef ENABLE_NEON + for (; index <= element_size - 4; index += C4NUM) { + int32x4_t vin1 = vld1q_s32(input1 + index); + int32x4_t vout = vaddq_s32(vin0_opt, vin1); + vst1q_s32(output + index, vout); + } +#endif + for (; index < element_size; index++) { + output[index] = input0[0] + input1[index]; + } + } else { +#ifdef ENABLE_NEON + for (; index <= element_size - 4; index += C4NUM) { + int32x4_t vin0 = vld1q_s32(input0 + index); + int32x4_t vout = vaddq_s32(vin0, vin1_opt); + vst1q_s32(output + index, vout); + } +#endif + for (; index < element_size; index++) { + output[index] = input0[index] + input1[0]; + } + } + return NNACL_OK; +} + int ElementOptAddRelu(float *input0, float *input1, float *output, int element_size, ArithmeticParameter *param) { #ifdef ENABLE_NEON float32x4_t vin0_opt = vdupq_n_f32(input0[0]); @@ -739,6 +771,13 @@ int ElementFloorMod(float *input0, float *input1, float *output, int element_siz return NNACL_OK; } +int ElementFloorModInt(int *input0, int *input1, int *output, int element_size) { + for (int i = 0; i < element_size; i++) { + output[i] = input0[i] - (input0[i] / input1[i]) * input1[i]; + } + return NNACL_OK; +} + int BroadcastFloorMod(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output, int element_size, ArithmeticParameter *param) { TileDimensions(input0, input1, tile_input0, tile_input1, param); @@ -752,6 +791,13 @@ int ElementFloorDiv(float *input0, float *input1, float *output, int element_siz return NNACL_OK; } +int ElementFloorDivInt(int *input0, int *input1, int *output, int element_size) { + for (int i = 0; i < element_size; i++) { + output[i] = input0[i] / input1[i]; + } + return NNACL_OK; +} + int BroadcastFloorDiv(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output, int element_size, ArithmeticParameter *param) { TileDimensions(input0, input1, tile_input0, tile_input1, param); diff --git a/mindspore/lite/nnacl/fp32/arithmetic.h b/mindspore/lite/nnacl/fp32/arithmetic.h index 58f3200a1a..7c2050a425 100644 --- a/mindspore/lite/nnacl/fp32/arithmetic.h +++ b/mindspore/lite/nnacl/fp32/arithmetic.h @@ -27,6 +27,7 @@ extern "C" { #endif int ElementOptAdd(float *input0, float *input1, float *output, int element_size, ArithmeticParameter *param); +int ElementOptAddInt(int *input0, int *input1, int *output, int element_size, ArithmeticParameter *param); int ElementOptAddRelu(float *input0, float *input1, float *output, int element_size, ArithmeticParameter *param); int ElementOptAddRelu6(float *input0, float *input1, float *output, int element_size, ArithmeticParameter *param); int ElementOptSub(float *input0, float *input1, float *output, int element_size, ArithmeticParameter *param); @@ -87,10 +88,12 @@ int BroadcastMinimum(float *input0, float *input1, float *tile_input0, float *ti int element_size, ArithmeticParameter *param); int ElementFloorDiv(float *input0, float *input1, float *output, int element_size); +int ElementFloorDivInt(int *input0, int *input1, int *output, int element_size); int BroadcastFloorDiv(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output, int element_size, ArithmeticParameter *param); int ElementFloorMod(float *input0, float *input1, float *output, int element_size); +int ElementFloorModInt(int *input0, int *input1, int *output, int element_size); int BroadcastFloorMod(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output, int element_size, ArithmeticParameter *param); diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/arithmetic.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/arithmetic.cc index b963f8747c..e6552994d7 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/arithmetic.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/arithmetic.cc @@ -83,6 +83,7 @@ int ArithmeticCPUKernel::ReSize() { default: arithmeticParameter_->broadcasting_ = false; arithmetic_opt_run_ = ElementOptAdd; + arithmetic_opt_run_int_ = ElementOptAddInt; break; } break; @@ -299,6 +300,7 @@ kernel::LiteKernel *CpuArithmeticFp32KernelCreator(const std::vector