|
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149 |
- /**
- * Copyright 2020 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
- #include "nnacl/fp32/arithmetic.h"
- #include <math.h>
-
- #define ACCURACY_DATA 0.00000001
-
- int ElementOptMul(float *input0, float *input1, float *output, int element_size, ArithmeticParameter *param) {
- #ifdef ENABLE_NEON
- float32x4_t vin0_opt = vdupq_n_f32(input0[0]);
- float32x4_t vin1_opt = vdupq_n_f32(input1[0]);
- #endif
- int index = 0;
- if (param->in_elements_num0_ == 1) {
- #ifdef ENABLE_NEON
- for (; index <= element_size - 4; index += C4NUM) {
- float32x4_t vin1 = vld1q_f32(input1 + index);
- float32x4_t vout = vmulq_f32(vin0_opt, vin1);
- vst1q_f32(output + index, vout);
- }
- #endif
- for (; index < element_size; index++) {
- output[index] = input0[0] * input1[index];
- }
- } else {
- #ifdef ENABLE_NEON
- for (; index <= element_size - 4; index += C4NUM) {
- float32x4_t vin0 = vld1q_f32(input0 + index);
- float32x4_t vout = vmulq_f32(vin0, vin1_opt);
- vst1q_f32(output + index, vout);
- }
- #endif
- for (; index < element_size; index++) {
- output[index] = input0[index] * input1[0];
- }
- }
- return NNACL_OK;
- }
-
- int ElementOptMulRelu(float *input0, float *input1, float *output, int element_size, ArithmeticParameter *param) {
- #ifdef ENABLE_NEON
- float32x4_t vin0_opt = vdupq_n_f32(input0[0]);
- float32x4_t vin1_opt = vdupq_n_f32(input1[0]);
- float32x4_t zeros = vdupq_n_f32(0.0f);
- #endif
- int index = 0;
- if (param->in_elements_num0_ == 1) {
- #ifdef ENABLE_NEON
- for (; index <= element_size - 4; index += C4NUM) {
- float32x4_t vin1 = vld1q_f32(input1 + index);
- float32x4_t vout = vmaxq_f32(vmulq_f32(vin0_opt, vin1), zeros);
- vst1q_f32(output + index, vout);
- }
- #endif
- for (; index < element_size; index++) {
- output[index] = MSMAX(input0[0] * input1[index], 0);
- }
- } else {
- #ifdef ENABLE_NEON
- for (; index <= element_size - 4; index += C4NUM) {
- float32x4_t vin0 = vld1q_f32(input0 + index);
- float32x4_t vout = vmaxq_f32(vmulq_f32(vin0, vin1_opt), zeros);
- vst1q_f32(output + index, vout);
- }
- #endif
- for (; index < element_size; index++) {
- output[index] = MSMAX(input0[index] * input1[0], 0);
- }
- }
- return NNACL_OK;
- }
-
- int ElementOptMulRelu6(float *input0, float *input1, float *output, int element_size, ArithmeticParameter *param) {
- #ifdef ENABLE_NEON
- float32x4_t vin0_opt = vdupq_n_f32(input0[0]);
- float32x4_t vin1_opt = vdupq_n_f32(input1[0]);
- float32x4_t zeros = vdupq_n_f32(0.0f);
- float32x4_t bounds = vdupq_n_f32(6.0f);
- #endif
- int index = 0;
- if (param->in_elements_num0_ == 1) {
- #ifdef ENABLE_NEON
- for (; index <= element_size - 4; index += C4NUM) {
- float32x4_t vin1 = vld1q_f32(input1 + index);
- float32x4_t vout = vminq_f32(vmaxq_f32(vmulq_f32(vin0_opt, vin1), zeros), bounds);
- vst1q_f32(output + index, vout);
- }
- #endif
- for (; index < element_size; index++) {
- output[index] = MSMIN(MSMAX(input0[0] * input1[index], 0), 6);
- }
- } else {
- #ifdef ENABLE_NEON
- for (; index <= element_size - 4; index += C4NUM) {
- float32x4_t vin0 = vld1q_f32(input0 + index);
- float32x4_t vout = vminq_f32(vmaxq_f32(vmulq_f32(vin0, vin1_opt), zeros), bounds);
- vst1q_f32(output + index, vout);
- }
- #endif
- for (; index < element_size; index++) {
- output[index] = MSMIN(MSMAX(input0[index] * input1[0], 0), 6);
- }
- }
- return NNACL_OK;
- }
-
- int ElementOptMulInt(int *input0, int *input1, int *output, int element_size, ArithmeticParameter *param) {
- #ifdef ENABLE_NEON
- int32x4_t vin0_opt = vdupq_n_s32(input0[0]);
- int32x4_t vin1_opt = vdupq_n_s32(input1[0]);
- #endif
- int index = 0;
- if (param->in_elements_num0_ == 1) {
- #ifdef ENABLE_NEON
- for (; index <= element_size - 4; index += C4NUM) {
- int32x4_t vin1 = vld1q_s32(input1 + index);
- int32x4_t vout = vmulq_s32(vin0_opt, vin1);
- vst1q_s32(output + index, vout);
- }
- #endif
- for (; index < element_size; index++) {
- output[index] = input0[0] * input1[index];
- }
- } else {
- #ifdef ENABLE_NEON
- for (; index <= element_size - 4; index += C4NUM) {
- int32x4_t vin0 = vld1q_s32(input0 + index);
- int32x4_t vout = vmulq_s32(vin0, vin1_opt);
- vst1q_s32(output + index, vout);
- }
- #endif
- for (; index < element_size; index++) {
- output[index] = input0[index] * input1[0];
- }
- }
- return NNACL_OK;
- }
-
- int ElementOptMulReluInt(int *input0, int *input1, int *output, int element_size, ArithmeticParameter *param) {
- #ifdef ENABLE_NEON
- int32x4_t vin0_opt = vdupq_n_s32(input0[0]);
- int32x4_t vin1_opt = vdupq_n_s32(input1[0]);
- int32x4_t zeros = vdupq_n_s32(0);
- #endif
- int index = 0;
- if (param->in_elements_num0_ == 1) {
- #ifdef ENABLE_NEON
- for (; index <= element_size - 4; index += C4NUM) {
- int32x4_t vin1 = vld1q_s32(input1 + index);
- int32x4_t vout = vmaxq_s32(vmulq_s32(vin0_opt, vin1), zeros);
- vst1q_s32(output + index, vout);
- }
- #endif
- for (; index < element_size; index++) {
- output[index] = MSMAX(input0[0] * input1[index], 0);
- }
- } else {
- #ifdef ENABLE_NEON
- for (; index <= element_size - 4; index += C4NUM) {
- int32x4_t vin0 = vld1q_s32(input0 + index);
- int32x4_t vout = vmaxq_s32(vmulq_s32(vin0, vin1_opt), zeros);
- vst1q_s32(output + index, vout);
- }
- #endif
- for (; index < element_size; index++) {
- output[index] = MSMAX(input0[index] * input1[0], 0);
- }
- }
- return NNACL_OK;
- }
-
- int ElementOptMulRelu6Int(int *input0, int *input1, int *output, int element_size, ArithmeticParameter *param) {
- #ifdef ENABLE_NEON
- int32x4_t vin0_opt = vdupq_n_s32(input0[0]);
- int32x4_t vin1_opt = vdupq_n_s32(input1[0]);
- int32x4_t zeros = vdupq_n_s32(0);
- int32x4_t bounds = vdupq_n_s32(6);
- #endif
- int index = 0;
- if (param->in_elements_num0_ == 1) {
- #ifdef ENABLE_NEON
- for (; index <= element_size - 4; index += C4NUM) {
- int32x4_t vin1 = vld1q_s32(input1 + index);
- int32x4_t vout = vminq_s32(vmaxq_s32(vmulq_s32(vin0_opt, vin1), zeros), bounds);
- vst1q_s32(output + index, vout);
- }
- #endif
- for (; index < element_size; index++) {
- output[index] = MSMIN(MSMAX(input0[0] * input1[index], 0), 6);
- }
- } else {
- #ifdef ENABLE_NEON
- for (; index <= element_size - 4; index += C4NUM) {
- int32x4_t vin0 = vld1q_s32(input0 + index);
- int32x4_t vout = vminq_s32(vmaxq_s32(vmulq_s32(vin0, vin1_opt), zeros), bounds);
- vst1q_s32(output + index, vout);
- }
- #endif
- for (; index < element_size; index++) {
- output[index] = MSMIN(MSMAX(input0[index] * input1[0], 0), 6);
- }
- }
- return NNACL_OK;
- }
-
- int ElementOptSub(float *input0, float *input1, float *output, int element_size, ArithmeticParameter *param) {
- #ifdef ENABLE_NEON
- float32x4_t vin0_opt = vdupq_n_f32(input0[0]);
- float32x4_t vin1_opt = vdupq_n_f32(input1[0]);
- #endif
- int index = 0;
- if (param->in_elements_num0_ == 1) {
- #ifdef ENABLE_NEON
- for (; index <= element_size - 4; index += C4NUM) {
- float32x4_t vin1 = vld1q_f32(input1 + index);
- float32x4_t vout = vsubq_f32(vin0_opt, vin1);
- vst1q_f32(output + index, vout);
- }
- #endif
- for (; index < element_size; index++) {
- output[index] = input0[0] - input1[index];
- }
- } else {
- #ifdef ENABLE_NEON
- for (; index <= element_size - 4; index += C4NUM) {
- float32x4_t vin0 = vld1q_f32(input0 + index);
- float32x4_t vout = vsubq_f32(vin0, vin1_opt);
- vst1q_f32(output + index, vout);
- }
- #endif
- for (; index < element_size; index++) {
- output[index] = input0[index] - input1[0];
- }
- }
- return NNACL_OK;
- }
-
- int ElementOptSubRelu(float *input0, float *input1, float *output, int element_size, ArithmeticParameter *param) {
- #ifdef ENABLE_NEON
- float32x4_t vin0_opt = vdupq_n_f32(input0[0]);
- float32x4_t vin1_opt = vdupq_n_f32(input1[0]);
- float32x4_t zeros = vdupq_n_f32(0.0f);
- #endif
- int index = 0;
- if (param->in_elements_num0_ == 1) {
- #ifdef ENABLE_NEON
- for (; index <= element_size - 4; index += C4NUM) {
- float32x4_t vin1 = vld1q_f32(input1 + index);
- float32x4_t vout = vmaxq_f32(vsubq_f32(vin0_opt, vin1), zeros);
- vst1q_f32(output + index, vout);
- }
- #endif
- for (; index < element_size; index++) {
- output[index] = MSMAX(input0[0] - input1[index], 0);
- }
- } else {
- #ifdef ENABLE_NEON
- for (; index <= element_size - 4; index += C4NUM) {
- float32x4_t vin0 = vld1q_f32(input0 + index);
- float32x4_t vout = vmaxq_f32(vsubq_f32(vin0, vin1_opt), zeros);
- vst1q_f32(output + index, vout);
- }
- #endif
- for (; index < element_size; index++) {
- output[index] = MSMAX(input0[index] - input1[0], 0);
- }
- }
- return NNACL_OK;
- }
-
- int ElementOptSubRelu6(float *input0, float *input1, float *output, int element_size, ArithmeticParameter *param) {
- #ifdef ENABLE_NEON
- float32x4_t vin0_opt = vdupq_n_f32(input0[0]);
- float32x4_t vin1_opt = vdupq_n_f32(input1[0]);
- float32x4_t zeros = vdupq_n_f32(0.0f);
- float32x4_t bounds = vdupq_n_f32(6.0f);
- #endif
- int index = 0;
- if (param->in_elements_num0_ == 1) {
- #ifdef ENABLE_NEON
- for (; index <= element_size - 4; index += C4NUM) {
- float32x4_t vin1 = vld1q_f32(input1 + index);
- float32x4_t vout = vminq_f32(vmaxq_f32(vsubq_f32(vin0_opt, vin1), zeros), bounds);
- vst1q_f32(output + index, vout);
- }
- #endif
- for (; index < element_size; index++) {
- output[index] = MSMIN(MSMAX(input0[0] - input1[index], 0), 6);
- }
- } else {
- #ifdef ENABLE_NEON
- for (; index <= element_size - 4; index += C4NUM) {
- float32x4_t vin0 = vld1q_f32(input0 + index);
- float32x4_t vout = vminq_f32(vmaxq_f32(vsubq_f32(vin0, vin1_opt), zeros), bounds);
- vst1q_f32(output + index, vout);
- }
- #endif
- for (; index < element_size; index++) {
- output[index] = MSMIN(MSMAX(input0[index] - input1[0], 0), 6);
- }
- }
- return NNACL_OK;
- }
-
- int ElementOptAdd(float *input0, float *input1, float *output, int element_size, ArithmeticParameter *param) {
- #ifdef ENABLE_NEON
- float32x4_t vin0_opt = vdupq_n_f32(input0[0]);
- float32x4_t vin1_opt = vdupq_n_f32(input1[0]);
- #endif
- int index = 0;
- if (param->in_elements_num0_ == 1) {
- #ifdef ENABLE_NEON
- for (; index <= element_size - 4; index += C4NUM) {
- float32x4_t vin1 = vld1q_f32(input1 + index);
- float32x4_t vout = vaddq_f32(vin0_opt, vin1);
- vst1q_f32(output + index, vout);
- }
- #endif
- for (; index < element_size; index++) {
- output[index] = input0[0] + input1[index];
- }
- } else {
- #ifdef ENABLE_NEON
- for (; index <= element_size - 4; index += C4NUM) {
- float32x4_t vin0 = vld1q_f32(input0 + index);
- float32x4_t vout = vaddq_f32(vin0, vin1_opt);
- vst1q_f32(output + index, vout);
- }
- #endif
- for (; index < element_size; index++) {
- output[index] = input0[index] + input1[0];
- }
- }
- return NNACL_OK;
- }
-
- int ElementOptAddInt(int *input0, int *input1, int *output, int element_size, ArithmeticParameter *param) {
- #ifdef ENABLE_NEON
- int32x4_t vin0_opt = vdupq_n_s32(input0[0]);
- int32x4_t vin1_opt = vdupq_n_s32(input1[0]);
- #endif
- int index = 0;
- if (param->in_elements_num0_ == 1) {
- #ifdef ENABLE_NEON
- for (; index <= element_size - 4; index += C4NUM) {
- int32x4_t vin1 = vld1q_s32(input1 + index);
- int32x4_t vout = vaddq_s32(vin0_opt, vin1);
- vst1q_s32(output + index, vout);
- }
- #endif
- for (; index < element_size; index++) {
- output[index] = input0[0] + input1[index];
- }
- } else {
- #ifdef ENABLE_NEON
- for (; index <= element_size - 4; index += C4NUM) {
- int32x4_t vin0 = vld1q_s32(input0 + index);
- int32x4_t vout = vaddq_s32(vin0, vin1_opt);
- vst1q_s32(output + index, vout);
- }
- #endif
- for (; index < element_size; index++) {
- output[index] = input0[index] + input1[0];
- }
- }
- return NNACL_OK;
- }
-
- int ElementOptAddRelu(float *input0, float *input1, float *output, int element_size, ArithmeticParameter *param) {
- #ifdef ENABLE_NEON
- float32x4_t vin0_opt = vdupq_n_f32(input0[0]);
- float32x4_t vin1_opt = vdupq_n_f32(input1[0]);
- float32x4_t zeros = vdupq_n_f32(0.0f);
- #endif
- int index = 0;
- if (param->in_elements_num0_ == 1) {
- #ifdef ENABLE_NEON
- for (; index <= element_size - 4; index += C4NUM) {
- float32x4_t vin1 = vld1q_f32(input1 + index);
- float32x4_t vout = vmaxq_f32(vaddq_f32(vin0_opt, vin1), zeros);
- vst1q_f32(output + index, vout);
- }
- #endif
- for (; index < element_size; index++) {
- output[index] = MSMAX(input0[0] + input1[index], 0);
- }
- } else {
- #ifdef ENABLE_NEON
- for (; index <= element_size - 4; index += C4NUM) {
- float32x4_t vin0 = vld1q_f32(input0 + index);
- float32x4_t vout = vmaxq_f32(vaddq_f32(vin0, vin1_opt), zeros);
- vst1q_f32(output + index, vout);
- }
- #endif
- for (; index < element_size; index++) {
- output[index] = MSMAX(input0[index] + input1[0], 0);
- }
- }
- return NNACL_OK;
- }
-
- int ElementOptAddRelu6(float *input0, float *input1, float *output, int element_size, ArithmeticParameter *param) {
- #ifdef ENABLE_NEON
- float32x4_t vin0_opt = vdupq_n_f32(input0[0]);
- float32x4_t vin1_opt = vdupq_n_f32(input1[0]);
- float32x4_t zeros = vdupq_n_f32(0.0f);
- float32x4_t bounds = vdupq_n_f32(6.0f);
- #endif
- int index = 0;
- if (param->in_elements_num0_ == 1) {
- #ifdef ENABLE_NEON
- for (; index <= element_size - 4; index += C4NUM) {
- float32x4_t vin1 = vld1q_f32(input1 + index);
- float32x4_t vout = vminq_f32(vmaxq_f32(vaddq_f32(vin0_opt, vin1), zeros), bounds);
- vst1q_f32(output + index, vout);
- }
- #endif
- for (; index < element_size; index++) {
- output[index] = MSMIN(MSMAX(input0[0] + input1[index], 0), 6);
- }
- } else {
- #ifdef ENABLE_NEON
- for (; index <= element_size - 4; index += C4NUM) {
- float32x4_t vin0 = vld1q_f32(input0 + index);
- float32x4_t vout = vminq_f32(vmaxq_f32(vaddq_f32(vin0, vin1_opt), zeros), bounds);
- vst1q_f32(output + index, vout);
- }
- #endif
- for (; index < element_size; index++) {
- output[index] = MSMIN(MSMAX(input0[index] + input1[0], 0), 6);
- }
- }
-
- return NNACL_OK;
- }
-
- int ElementOptDiv(float *input0, float *input1, float *output, int element_size, ArithmeticParameter *param) {
- if (param->in_elements_num0_ == 1) {
- for (int index = 0; index < element_size; index++) {
- output[index] = input0[0] / input1[index];
- }
- } else {
- if (input1[0] == 0) {
- return NNACL_ERRCODE_DIVISOR_ZERO;
- }
- for (int index = 0; index < element_size; index++) {
- output[index] = input0[index] / input1[0];
- }
- }
- return NNACL_OK;
- }
-
- int ElementOptDivRelu(float *input0, float *input1, float *output, int element_size, ArithmeticParameter *param) {
- if (param->in_elements_num0_ == 1) {
- for (int index = 0; index < element_size; index++) {
- output[index] = input0[0] / input1[index];
- output[index] = output[index] > 0 ? output[index] : 0;
- }
- } else {
- for (int index = 0; index < element_size; index++) {
- output[index] = input0[index] / input1[0];
- output[index] = output[index] > 0 ? output[index] : 0;
- }
- }
- return NNACL_OK;
- }
-
- int ElementOptDivRelu6(float *input0, float *input1, float *output, int element_size, ArithmeticParameter *param) {
- if (param->in_elements_num0_ == 1) {
- for (int index = 0; index < element_size; index++) {
- output[index] = MSMIN(MSMAX(input0[0] / input1[index], 0), 6);
- }
- } else {
- for (int index = 0; index < element_size; index++) {
- output[index] = MSMIN(MSMAX(input0[index] / input1[0], 0), 6);
- }
- }
- return NNACL_OK;
- }
-
- int ElementMul(float *input0, float *input1, float *output, int element_size) {
- int index = 0;
- #ifdef ENABLE_NEON
- for (; index <= element_size - 4; index += C4NUM) {
- float32x4_t vin0 = vld1q_f32(input0 + index);
- float32x4_t vin1 = vld1q_f32(input1 + index);
- float32x4_t vout = vmulq_f32(vin0, vin1);
- vst1q_f32(output + index, vout);
- }
- #endif
- for (; index < element_size; index++) {
- output[index] = input0[index] * input1[index];
- }
- return NNACL_OK;
- }
-
- int ElementMulRelu(float *input0, float *input1, float *output, int element_size) {
- int index = 0;
- #ifdef ENABLE_NEON
- float32x4_t zeros = vdupq_n_f32(0.0f);
- for (; index <= element_size - 4; index += C4NUM) {
- float32x4_t vin0 = vld1q_f32(input0 + index);
- float32x4_t vin1 = vld1q_f32(input1 + index);
- float32x4_t vout = vmulq_f32(vin0, vin1);
- vout = vbslq_f32(vcgtq_f32(vout, zeros), vout, zeros);
- vst1q_f32(output + index, vout);
- }
- #endif
- for (; index < element_size; index++) {
- float res = input0[index] * input1[index];
- output[index] = res > 0 ? res : 0;
- }
- return NNACL_OK;
- }
-
- int ElementMulRelu6(float *input0, float *input1, float *output, int element_size) {
- int index = 0;
- #ifdef ENABLE_NEON
- float32x4_t zeros = vdupq_n_f32(0.0f);
- float32x4_t bounds = vdupq_n_f32(6.0f);
- for (; index <= element_size - 4; index += C4NUM) {
- float32x4_t vin0 = vld1q_f32(input0 + index);
- float32x4_t vin1 = vld1q_f32(input1 + index);
- float32x4_t vout = vminq_f32(vmaxq_f32(vmulq_f32(vin0, vin1), zeros), bounds);
- vst1q_f32(output + index, vout);
- }
- #endif
- for (; index < element_size; index++) {
- output[index] = MSMIN(MSMAX(input0[index] * input1[index], 0), 6);
- }
- return NNACL_OK;
- }
-
- int ElementMulInt(int *input0, int *input1, int *output, int element_size) {
- int index = 0;
- #ifdef ENABLE_NEON
- for (; index <= element_size - 4; index += C4NUM) {
- int32x4_t vin0 = vld1q_s32(input0 + index);
- int32x4_t vin1 = vld1q_s32(input1 + index);
- int32x4_t vout = vmulq_s32(vin0, vin1);
- vst1q_s32(output + index, vout);
- }
- #endif
- for (; index < element_size; index++) {
- output[index] = input0[index] * input1[index];
- }
- return NNACL_OK;
- }
-
- int ElementMulReluInt(int *input0, int *input1, int *output, int element_size) {
- int index = 0;
- #ifdef ENABLE_NEON
- int32x4_t zeros = vdupq_n_s32(0);
- for (; index <= element_size - 4; index += C4NUM) {
- int32x4_t vin0 = vld1q_s32(input0 + index);
- int32x4_t vin1 = vld1q_s32(input1 + index);
- int32x4_t vout = vmulq_s32(vin0, vin1);
- vout = vbslq_s32(vcgtq_s32(vout, zeros), vout, zeros);
- vst1q_s32(output + index, vout);
- }
- #endif
- for (; index < element_size; index++) {
- float res = input0[index] * input1[index];
- output[index] = res > 0 ? res : 0;
- }
- return NNACL_OK;
- }
-
- int ElementMulRelu6Int(int *input0, int *input1, int *output, int element_size) {
- int index = 0;
- #ifdef ENABLE_NEON
- int32x4_t zeros = vdupq_n_s32(0);
- int32x4_t bounds = vdupq_n_s32(6);
- for (; index <= element_size - 4; index += C4NUM) {
- int32x4_t vin0 = vld1q_s32(input0 + index);
- int32x4_t vin1 = vld1q_s32(input1 + index);
- int32x4_t vout = vminq_s32(vmaxq_s32(vmulq_s32(vin0, vin1), zeros), bounds);
- vst1q_s32(output + index, vout);
- }
- #endif
- for (; index < element_size; index++) {
- output[index] = MSMIN(MSMAX(input0[index] * input1[index], 0), 6);
- }
- return NNACL_OK;
- }
-
- int BroadcastMul(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output, int element_size,
- ArithmeticParameter *param) {
- TileDimensions(input0, input1, tile_input0, tile_input1, param);
- return ElementMul(tile_input0, tile_input1, output, element_size);
- }
-
- int ElementAdd(float *input0, float *input1, float *output, int element_size) {
- int index = 0;
- #ifdef ENABLE_NEON
- for (; index <= element_size - 4; index += C4NUM) {
- float32x4_t vin0 = vld1q_f32(input0 + index);
- float32x4_t vin1 = vld1q_f32(input1 + index);
- float32x4_t vout = vaddq_f32(vin0, vin1);
- vst1q_f32(output + index, vout);
- }
- #endif
- for (; index < element_size; index++) {
- output[index] = input0[index] + input1[index];
- }
- return NNACL_OK;
- }
-
- int ElementAddRelu(float *input0, float *input1, float *output, int element_size) {
- int index = 0;
- #ifdef ENABLE_NEON
- float32x4_t zeros = vdupq_n_f32(0.0f);
- for (; index <= element_size - 4; index += C4NUM) {
- float32x4_t vin0 = vld1q_f32(input0 + index);
- float32x4_t vin1 = vld1q_f32(input1 + index);
- float32x4_t vout = vaddq_f32(vin0, vin1);
- vout = vbslq_f32(vcgtq_f32(vout, zeros), vout, zeros);
- vst1q_f32(output + index, vout);
- }
- #endif
- for (; index < element_size; index++) {
- float res = input0[index] + input1[index];
- output[index] = res > 0 ? res : 0;
- }
- return NNACL_OK;
- }
-
- int ElementAddRelu6(float *input0, float *input1, float *output, int element_size) {
- int index = 0;
- #ifdef ENABLE_NEON
- float32x4_t zeros = vdupq_n_f32(0.0f);
- float32x4_t bounds = vdupq_n_f32(6.0f);
- for (; index <= element_size - 4; index += C4NUM) {
- float32x4_t vin0 = vld1q_f32(input0 + index);
- float32x4_t vin1 = vld1q_f32(input1 + index);
- float32x4_t vout = vminq_f32(vmaxq_f32(vaddq_f32(vin0, vin1), zeros), bounds);
- vst1q_f32(output + index, vout);
- }
- #endif
- for (; index < element_size; index++) {
- output[index] = MSMIN(MSMAX(input0[index] + input1[index], 0), 6);
- }
- return NNACL_OK;
- }
-
- int ElementAddInt(int *input0, int *input1, int *output, int element_size) {
- int index = 0;
- #ifdef ENABLE_NEON
- for (; index <= element_size - 4; index += C4NUM) {
- int32x4_t vin0 = vld1q_s32(input0 + index);
- int32x4_t vin1 = vld1q_s32(input1 + index);
- int32x4_t vout = vaddq_s32(vin0, vin1);
- vst1q_s32(output + index, vout);
- }
- #endif
- for (; index < element_size; index++) {
- output[index] = input0[index] + input1[index];
- }
- return NNACL_OK;
- }
-
- int ElementAddInt8(int8_t *input0, int8_t *input1, int8_t *output, int element_size) {
- for (int i = 0; i < element_size; i++) {
- output[i] = input0[i] + input1[i];
- }
- return NNACL_OK;
- }
-
- int BroadcastAdd(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output, int element_size,
- ArithmeticParameter *param) {
- TileDimensions(input0, input1, tile_input0, tile_input1, param);
- return ElementAdd(tile_input0, tile_input1, output, element_size);
- }
-
- int BroadcastAddInt8(int8_t *input0, int8_t *input1, int8_t *tile_input0, int8_t *tile_input1, int8_t *output,
- int element_size, ArithmeticParameter *param) {
- TileDimensionsInt8(input0, input1, tile_input0, tile_input1, param);
- return ElementAddInt8(tile_input0, tile_input1, output, element_size);
- }
-
- int ElementSub(float *input0, float *input1, float *output, int element_size) {
- int index = 0;
- #ifdef ENABLE_NEON
- for (; index <= element_size - 4; index += C4NUM) {
- float32x4_t vin0 = vld1q_f32(input0 + index);
- float32x4_t vin1 = vld1q_f32(input1 + index);
- float32x4_t vout = vsubq_f32(vin0, vin1);
- vst1q_f32(output + index, vout);
- }
- #endif
- for (; index < element_size; index++) {
- output[index] = input0[index] - input1[index];
- }
- return NNACL_OK;
- }
-
- int ElementSubRelu(float *input0, float *input1, float *output, int element_size) {
- int index = 0;
- #ifdef ENABLE_NEON
- float32x4_t zeros = vdupq_n_f32(0.0f);
- for (; index <= element_size - 4; index += C4NUM) {
- float32x4_t vin0 = vld1q_f32(input0 + index);
- float32x4_t vin1 = vld1q_f32(input1 + index);
- float32x4_t vout = vsubq_f32(vin0, vin1);
- vout = vbslq_f32(vcgtq_f32(vout, zeros), vout, zeros);
- vst1q_f32(output + index, vout);
- }
- #endif
- for (; index < element_size; index++) {
- float res = input0[index] - input1[index];
- output[index] = res > 0 ? res : 0;
- }
- return NNACL_OK;
- }
-
- int ElementSubRelu6(float *input0, float *input1, float *output, int element_size) {
- int index = 0;
- #ifdef ENABLE_NEON
- float32x4_t zeros = vdupq_n_f32(0.0f);
- float32x4_t bounds = vdupq_n_f32(6.0f);
- for (; index <= element_size - 4; index += C4NUM) {
- float32x4_t vin0 = vld1q_f32(input0 + index);
- float32x4_t vin1 = vld1q_f32(input1 + index);
- float32x4_t vout = vminq_f32(vmaxq_f32(vsubq_f32(vin0, vin1), zeros), bounds);
- vst1q_f32(output + index, vout);
- }
- #endif
- for (; index < element_size; index++) {
- output[index] = MSMIN(MSMAX(input0[index] - input1[index], 0), 6);
- }
-
- return NNACL_OK;
- }
-
- int BroadcastSub(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output, int element_size,
- ArithmeticParameter *param) {
- TileDimensions(input0, input1, tile_input0, tile_input1, param);
- return ElementSub(tile_input0, tile_input1, output, element_size);
- }
-
- int ElementDiv(float *input0, float *input1, float *output, int element_size) {
- for (int i = 0; i < element_size; i++) {
- output[i] = input0[i] / input1[i];
- }
- return NNACL_OK;
- }
-
- int ElementDivRelu(float *input0, float *input1, float *output, int element_size) {
- for (int i = 0; i < element_size; i++) {
- float res = input0[i] / input1[i];
- output[i] = res > 0 ? res : 0;
- }
- return NNACL_OK;
- }
-
- int ElementDivRelu6(float *input0, float *input1, float *output, int element_size) {
- for (int i = 0; i < element_size; i++) {
- output[i] = MSMIN(MSMAX(input0[i] / input1[i], 0), 6);
- }
- return NNACL_OK;
- }
-
- int BroadcastDiv(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output, int element_size,
- ArithmeticParameter *param) {
- TileDimensions(input0, input1, tile_input0, tile_input1, param);
- return ElementDiv(tile_input0, tile_input1, output, element_size);
- }
-
- int ElementFloorMod(float *input0, float *input1, float *output, int element_size) {
- for (int i = 0; i < element_size; i++) {
- output[i] = input0[i] - floorf(input0[i] / input1[i]) * input1[i];
- }
- return NNACL_OK;
- }
-
- int ElementFloorModInt(int *input0, int *input1, int *output, int element_size) {
- for (int i = 0; i < element_size; i++) {
- output[i] = input0[i] - (input0[i] / input1[i]) * input1[i];
- }
- return NNACL_OK;
- }
-
- int BroadcastFloorMod(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output,
- int element_size, ArithmeticParameter *param) {
- TileDimensions(input0, input1, tile_input0, tile_input1, param);
- return ElementFloorMod(tile_input0, tile_input1, output, element_size);
- }
-
- int ElementFloorDiv(float *input0, float *input1, float *output, int element_size) {
- for (int i = 0; i < element_size; i++) {
- output[i] = floorf(input0[i] / input1[i]);
- }
- return NNACL_OK;
- }
-
- int ElementFloorDivInt(int *input0, int *input1, int *output, int element_size) {
- for (int i = 0; i < element_size; i++) {
- output[i] = input0[i] / input1[i];
- }
- return NNACL_OK;
- }
-
- int BroadcastFloorDiv(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output,
- int element_size, ArithmeticParameter *param) {
- TileDimensions(input0, input1, tile_input0, tile_input1, param);
- return ElementFloorDiv(tile_input0, tile_input1, output, element_size);
- }
-
- int ElementLogicalAnd(float *input0, float *input1, float *output, int element_size) {
- int index = 0;
- #ifdef ENABLE_NEON
- float32x4_t vtrue = vdupq_n_f32(1);
- float32x4_t vfalse = vdupq_n_f32(0);
- uint32x4_t mask = vmovq_n_u32(((uint32_t)(1u << 31) - 1));
- uint32x4_t zeros = vdupq_n_u32(0);
- for (; index <= element_size - 4; index += C4NUM) {
- uint32x4_t vin0 = vandq_u32(vreinterpretq_s32_f32(vld1q_f32(input0 + index)), mask);
- uint32x4_t vin1 = vandq_u32(vreinterpretq_s32_f32(vld1q_f32(input1 + index)), mask);
- float32x4_t vout = vbslq_f32(vceqq_u32(vandq_u32(vin0, vin1), zeros), vfalse, vtrue);
- vst1q_f32(output + index, vout);
- }
- #endif
- for (; index < element_size; index++) {
- output[index] = (float)((bool)(input0[index]) & (bool)(input1[index]));
- }
- return NNACL_OK;
- }
-
- int ElementSquaredDifference(float *input0, float *input1, float *output, int element_size) {
- ElementSub(input0, input1, output, element_size);
- return ElementMul(output, output, output, element_size);
- }
-
- int BroadcastSquaredDifference(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output,
- int element_size, ArithmeticParameter *param) {
- BroadcastSub(input0, input1, tile_input0, tile_input1, output, element_size, param);
- return ElementMul(output, output, output, element_size);
- }
-
- int BroadcastLogicalAnd(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output,
- int element_size, ArithmeticParameter *param) {
- TileDimensions(input0, input1, tile_input0, tile_input1, param);
- return ElementLogicalAnd(tile_input0, tile_input1, output, element_size);
- }
-
- int ElementLogicalOr(float *input0, float *input1, float *output, int element_size) {
- int index = 0;
- #ifdef ENABLE_NEON
- float32x4_t vtrue = vdupq_n_f32(1);
- float32x4_t vfalse = vdupq_n_f32(0);
- uint32x4_t mask = vmovq_n_u32(((uint32_t)(1u << 31) - 1));
- uint32x4_t zeros = vdupq_n_u32(0);
- for (; index <= element_size - 4; index += C4NUM) {
- uint32x4_t vin0 = vandq_u32(vreinterpretq_s32_f32(vld1q_f32(input0 + index)), mask);
- uint32x4_t vin1 = vandq_u32(vreinterpretq_s32_f32(vld1q_f32(input1 + index)), mask);
- float32x4_t vout = vbslq_f32(vceqq_u32(vorrq_u32(vin0, vin1), zeros), vfalse, vtrue);
- vst1q_f32(output + index, vout);
- }
- #endif
- for (; index < element_size; index++) {
- output[index] = (float)((bool)(input0[index]) | (bool)(input1[index]));
- }
- return NNACL_OK;
- }
-
- int BroadcastLogicalOr(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output,
- int element_size, ArithmeticParameter *param) {
- TileDimensions(input0, input1, tile_input0, tile_input1, param);
- return ElementLogicalOr(tile_input0, tile_input1, output, element_size);
- }
-
- int ElementMaximum(float *input0, float *input1, float *output, int element_size) {
- int index = 0;
- #ifdef ENABLE_NEON
- for (; index <= element_size - 4; index += C4NUM) {
- float32x4_t vin0 = vld1q_f32(input0 + index);
- float32x4_t vin1 = vld1q_f32(input1 + index);
- float32x4_t vout = vmaxq_f32(vin0, vin1);
- vst1q_f32(output + index, vout);
- }
- #endif
- for (; index < element_size; index++) {
- output[index] = input0[index] > input1[index] ? input0[index] : input1[index];
- }
- return NNACL_OK;
- }
-
- int BroadcastMaximum(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output,
- int element_size, ArithmeticParameter *param) {
- TileDimensions(input0, input1, tile_input0, tile_input1, param);
- return ElementMaximum(tile_input0, tile_input1, output, element_size);
- }
-
- int ElementMinimum(float *input0, float *input1, float *output, int element_size) {
- int index = 0;
- #ifdef ENABLE_NEON
- for (; index <= element_size - 4; index += C4NUM) {
- float32x4_t vin0 = vld1q_f32(input0 + index);
- float32x4_t vin1 = vld1q_f32(input1 + index);
- float32x4_t vout = vminq_f32(vin0, vin1);
- vst1q_f32(output + index, vout);
- }
- #endif
- for (; index < element_size; index++) {
- output[index] = input0[index] > input1[index] ? input1[index] : input0[index];
- }
- return NNACL_OK;
- }
-
- int BroadcastMinimum(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output,
- int element_size, ArithmeticParameter *param) {
- TileDimensions(input0, input1, tile_input0, tile_input1, param);
- return ElementMinimum(tile_input0, tile_input1, output, element_size);
- }
-
- float FloatNotEqualCheck(float in0, float in1) {
- float tmp = in0 - in1;
- if (tmp <= ACCURACY_DATA && tmp >= -ACCURACY_DATA) {
- return (float)false;
- }
- return (float)true;
- }
-
- int ElementNotEqual(float *input0, float *input1, float *output, int element_size) {
- int index = 0;
- #ifdef ENABLE_NEON
- float32x4_t vtrue = vdupq_n_f32(1);
- float32x4_t vfalse = vdupq_n_f32(0);
- for (; index <= element_size - 4; index += C4NUM) {
- float32x4_t vin0 = vld1q_f32(input0 + index);
- float32x4_t vin1 = vld1q_f32(input1 + index);
- float32x4_t vout = vbslq_f32(vceqq_f32(vin0, vin1), vfalse, vtrue);
- vst1q_f32(output + index, vout);
- }
- #endif
- for (; index < element_size; index++) {
- output[index] = (float)(input0[index] != input1[index]);
- }
- return NNACL_OK;
- }
-
- int BroadcastNotEqual(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output,
- int element_size, ArithmeticParameter *param) {
- TileDimensions(input0, input1, tile_input0, tile_input1, param);
- return ElementNotEqual(tile_input0, tile_input1, output, element_size);
- }
-
- float FloatEqualCheck(float in0, float in1) {
- float tmp = in0 - in1;
- if (tmp <= ACCURACY_DATA && tmp >= -ACCURACY_DATA) {
- return (float)true;
- }
- return (float)false;
- }
-
- int ElementEqual(float *input0, float *input1, float *output, int element_size) {
- int index = 0;
- #ifdef ENABLE_NEON
- float32x4_t vtrue = vdupq_n_f32(1);
- float32x4_t vfalse = vdupq_n_f32(0);
- for (; index <= element_size - 4; index += C4NUM) {
- float32x4_t vin0 = vld1q_f32(input0 + index);
- float32x4_t vin1 = vld1q_f32(input1 + index);
- float32x4_t vout = vbslq_f32(vceqq_f32(vin0, vin1), vtrue, vfalse);
- vst1q_f32(output + index, vout);
- }
- #endif
- for (; index < element_size; index++) {
- output[index] = (float)(input0[index] == input1[index]);
- }
- return NNACL_OK;
- }
-
- int BroadcastEqual(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output,
- int element_size, ArithmeticParameter *param) {
- TileDimensions(input0, input1, tile_input0, tile_input1, param);
- return ElementEqual(tile_input0, tile_input1, output, element_size);
- }
-
- int ElementLess(float *input0, float *input1, float *output, int element_size) {
- int index = 0;
- #ifdef ENABLE_NEON
- float32x4_t vtrue = vdupq_n_f32(1);
- float32x4_t vfalse = vdupq_n_f32(0);
- for (; index <= element_size - 4; index += C4NUM) {
- float32x4_t vin0 = vld1q_f32(input0 + index);
- float32x4_t vin1 = vld1q_f32(input1 + index);
- float32x4_t vout = vbslq_f32(vcltq_f32(vin0, vin1), vtrue, vfalse);
- vst1q_f32(output + index, vout);
- }
- #endif
- for (; index < element_size; index++) {
- output[index] = (float)(input0[index] < input1[index]);
- }
- return NNACL_OK;
- }
-
- int BroadcastLess(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output, int element_size,
- ArithmeticParameter *param) {
- TileDimensions(input0, input1, tile_input0, tile_input1, param);
- return ElementLess(tile_input0, tile_input1, output, element_size);
- }
-
- int ElementLessEqual(float *input0, float *input1, float *output, int element_size) {
- int index = 0;
- #ifdef ENABLE_NEON
- float32x4_t vtrue = vdupq_n_f32(1);
- float32x4_t vfalse = vdupq_n_f32(0);
- for (; index <= element_size - 4; index += C4NUM) {
- float32x4_t vin0 = vld1q_f32(input0 + index);
- float32x4_t vin1 = vld1q_f32(input1 + index);
- float32x4_t vout = vbslq_f32(vcleq_f32(vin0, vin1), vtrue, vfalse);
- vst1q_f32(output + index, vout);
- }
- #endif
- for (; index < element_size; index++) {
- output[index] = (float)(input0[index] <= input1[index]);
- }
- return NNACL_OK;
- }
-
- int BroadcastLessEqual(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output,
- int element_size, ArithmeticParameter *param) {
- TileDimensions(input0, input1, tile_input0, tile_input1, param);
- return ElementLessEqual(tile_input0, tile_input1, output, element_size);
- }
-
- int ElementGreater(float *input0, float *input1, float *output, int element_size) {
- int index = 0;
- #ifdef ENABLE_NEON
- float32x4_t vtrue = vdupq_n_f32(1);
- float32x4_t vfalse = vdupq_n_f32(0);
- for (; index <= element_size - 4; index += C4NUM) {
- float32x4_t vin0 = vld1q_f32(input0 + index);
- float32x4_t vin1 = vld1q_f32(input1 + index);
- float32x4_t vout = vbslq_f32(vcgtq_f32(vin0, vin1), vtrue, vfalse);
- vst1q_f32(output + index, vout);
- }
- #endif
- for (; index < element_size; index++) {
- output[index] = (float)(input0[index] > input1[index]);
- }
- return NNACL_OK;
- }
-
- int BroadcastGreater(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output,
- int element_size, ArithmeticParameter *param) {
- TileDimensions(input0, input1, tile_input0, tile_input1, param);
- return ElementGreater(tile_input0, tile_input1, output, element_size);
- }
-
- int ElementGreaterEqual(float *input0, float *input1, float *output, int element_size) {
- int index = 0;
- #ifdef ENABLE_NEON
- float32x4_t vtrue = vdupq_n_f32(1);
- float32x4_t vfalse = vdupq_n_f32(0);
- for (; index <= element_size - 4; index += C4NUM) {
- float32x4_t vin0 = vld1q_f32(input0 + index);
- float32x4_t vin1 = vld1q_f32(input1 + index);
- float32x4_t vout = vbslq_f32(vcgeq_f32(vin0, vin1), vtrue, vfalse);
- vst1q_f32(output + index, vout);
- }
- #endif
- for (; index < element_size; index++) {
- output[index] = (float)(input0[index] >= input1[index]);
- }
- return NNACL_OK;
- }
-
- int BroadcastGreaterEqual(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output,
- int element_size, ArithmeticParameter *param) {
- TileDimensions(input0, input1, tile_input0, tile_input1, param);
- return ElementGreaterEqual(tile_input0, tile_input1, output, element_size);
- }
-
- #undef ACCURACY_DATA
-
- #ifdef ENABLE_NNACL_INFER_SHAPE
- int ArithmeticInferShape(int **in_shape, size_t *dim_size, int *out_shape, int *in_format, int *out_format,
- int *in_datatype, int *out_datatype, OpParameter *param) {
- *out_format = in_format[0];
- *out_datatype = in_datatype[0];
- ArithmeticParameter *arithmetic_parameter = (ArithmeticParameter *)param;
- int ndim0 = dim_size[0];
- int ndim1 = dim_size[1];
- int *in_shape0 = in_shape[0];
- int *in_shape1 = in_shape[1];
- if (ndim0 < ndim1) {
- arithmetic_parameter->ndim_ = ndim1;
- int fill_dim_num = ndim1 - ndim0;
- int j = 0;
- for (int i = 0; i < ndim1; ++i) {
- if (i < fill_dim_num) {
- arithmetic_parameter->in_shape0_[i] = 1;
- } else {
- arithmetic_parameter->in_shape0_[i] = in_shape0[j++];
- }
- arithmetic_parameter->in_shape1_[i] = in_shape1[i];
- }
- } else if (ndim0 > ndim1) {
- arithmetic_parameter->ndim_ = ndim0;
- int fill_dim_num = ndim0 - ndim1;
- int j = 0;
- for (int i = 0; i < ndim0; ++i) {
- if (i < fill_dim_num) {
- arithmetic_parameter->in_shape1_[i] = 1;
- } else {
- arithmetic_parameter->in_shape1_[i] = in_shape1[j++];
- }
- arithmetic_parameter->in_shape0_[i] = in_shape0[i];
- }
- } else {
- arithmetic_parameter->ndim_ = ndim0;
- for (int i = 0; i < ndim0; ++i) {
- arithmetic_parameter->in_shape0_[i] = in_shape0[i];
- arithmetic_parameter->in_shape1_[i] = in_shape1[i];
- }
- }
- int j = 0;
- for (size_t i = 0; i < arithmetic_parameter->ndim_; ++i) {
- if (arithmetic_parameter->in_shape0_[i] != arithmetic_parameter->in_shape1_[i]) {
- if (arithmetic_parameter->in_shape0_[i] == 1) {
- out_shape[j++] = arithmetic_parameter->in_shape1_[i];
- } else if (arithmetic_parameter->in_shape1_[i] == 1) {
- out_shape[j++] = arithmetic_parameter->in_shape0_[i];
- } else {
- return NNACL_PARAM_INVALID;
- }
- } else {
- out_shape[j++] = arithmetic_parameter->in_shape0_[i];
- }
- }
- return NNACL_OK;
- }
- #endif
|