From: @lzkcode Reviewed-by: Signed-off-by:pull/13579/MERGE
| @@ -0,0 +1,117 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "nnacl/fp16/power_fp16.h" | |||
| #include "nnacl/errorcode.h" | |||
| #if defined(ENABLE_NEON) | |||
| float16x8_t OptimizedPowerSimdFp16(float16x8_t x, const void *exponent) { | |||
| int tmp = (int)(*(float16_t *)exponent); | |||
| int exp = abs(tmp); | |||
| float16x8_t result = vmovq_n_f16(1.0f); | |||
| while (exp) { | |||
| if (exp % 2) { | |||
| result *= x; | |||
| } | |||
| x *= x; | |||
| exp = exp / 2; | |||
| } | |||
| if (tmp >= 0) { | |||
| return result; | |||
| } | |||
| return 1 / result; | |||
| } | |||
| #endif | |||
| float16_t OptimizedPowerScalarFp16(float16_t x, const void *exponent) { | |||
| int tmp = *(float16_t *)exponent; | |||
| int exp = abs(tmp); | |||
| float16_t result = 1; | |||
| while (exp) { | |||
| if (exp % 2) { | |||
| result *= x; | |||
| } | |||
| x *= x; | |||
| exp = exp / 2; | |||
| } | |||
| return tmp >= 0 ? result : 1 / result; | |||
| } | |||
| void PowerBroadCastFp16(const float16_t *input, const float16_t *exponent, float16_t *output, int len, float scale, | |||
| float shift) { | |||
| PowerScalarFunFp16 PowerScalarFunFp16_ = NULL; | |||
| #if defined(ENABLE_NEON) | |||
| PowerSimdFunFp16 PowerSimdFunFp16_ = NULL; | |||
| #endif | |||
| if (CheckInteger(*exponent)) { | |||
| #if defined(ENABLE_NEON) | |||
| PowerSimdFunFp16_ = OptimizedPowerSimdFp16; | |||
| #endif | |||
| PowerScalarFunFp16_ = OptimizedPowerScalarFp16; | |||
| } else { | |||
| #if defined(ENABLE_NEON) | |||
| PowerSimdFunFp16_ = StdPowerSimdFp16; | |||
| #endif | |||
| PowerScalarFunFp16_ = StdPowerScalarFp16; | |||
| } | |||
| int i = 0; | |||
| #ifdef ENABLE_NEON | |||
| int len_c8 = UP_ROUND(len, C8NUM); | |||
| float16x8_t scale_8 = vmovq_n_f16(scale); | |||
| float16x8_t shift_8 = vmovq_n_f16(shift); | |||
| for (; i < len_c8; i += C8NUM) { | |||
| float16x8_t result = PowerSimdFunFp16_(scale_8 * vld1q_f16(input + i) + shift_8, exponent); | |||
| vst1q_f16(output + i, result); | |||
| } | |||
| #endif | |||
| for (; i < len; ++i) { | |||
| output[i] = PowerScalarFunFp16_(scale * input[i] + shift, exponent); | |||
| } | |||
| } | |||
| void PowerSingleFp16(const float16_t *input, const float16_t *exponent, float16_t *output, int len, float scale, | |||
| float shift) { | |||
| int i = 0; | |||
| PowerScalarFunFp16 PowerScalarFunFp16_ = NULL; | |||
| #ifdef ENABLE_NEON | |||
| int len_c8 = UP_ROUND(len, C8NUM); | |||
| float16x8_t scale_8 = vmovq_n_f16(scale); | |||
| float16x8_t shift_8 = vmovq_n_f16(shift); | |||
| for (; i < len_c8; i += C8NUM) { | |||
| float16x8_t tmp_8 = scale_8 * vld1q_f16(input + i) + shift_8; | |||
| for (int j = 0; j < 8; ++j) { | |||
| PowerScalarFunFp16_ = CheckInteger(exponent[i + j]) ? OptimizedPowerScalarFp16 : StdPowerScalarFp16; | |||
| output[i + j] = PowerScalarFunFp16_(tmp_8[j], exponent + i + j); | |||
| } | |||
| } | |||
| #endif | |||
| for (; i < len; ++i) { | |||
| PowerScalarFunFp16_ = CheckInteger(exponent[i]) ? OptimizedPowerScalarFp16 : StdPowerScalarFp16; | |||
| output[i] = PowerScalarFunFp16_(scale * input[i] + shift, exponent + i); | |||
| } | |||
| } | |||
| int PowerFp16(const float16_t *input, const float16_t *exponent, float16_t *output, int len, float scale, float shift, | |||
| bool broadcast) { | |||
| if (input == NULL || exponent == NULL || output == NULL) { | |||
| return NNACL_NULL_PTR; | |||
| } | |||
| PowerFunFp16 PowerFunFp16_ = NULL; | |||
| PowerFunFp16_ = broadcast ? PowerBroadCastFp16 : PowerSingleFp16; | |||
| PowerFunFp16_(input, exponent, output, len, scale, shift); | |||
| return NNACL_OK; | |||
| } | |||
| @@ -0,0 +1,63 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_LITE_NNACL_FP16_POWER_FP16_H_ | |||
| #define MINDSPORE_LITE_NNACL_FP16_POWER_FP16_H_ | |||
| #include <math.h> | |||
| #include "nnacl/op_base.h" | |||
| #include "nnacl/power_parameter.h" | |||
| #if defined(ENABLE_NEON) | |||
| typedef float16x8_t (*PowerSimdFunFp16)(float16x8_t x, const void *exponent); | |||
| #endif | |||
| typedef float16_t (*PowerScalarFunFp16)(float16_t x, const void *exponent); | |||
| typedef void (*PowerFunFp16)(const float16_t *, const float16_t *, float16_t *, int, float, float); | |||
| #ifdef __cplusplus | |||
| extern "C" { | |||
| #endif | |||
| inline bool CheckInteger(float16_t f) { return floorf(f) == f; } | |||
| static inline float16_t StdPowerScalarFp16(float16_t x, const void *exponent) { | |||
| return powf(x, *(float16_t *)exponent); | |||
| } | |||
| #if defined(ENABLE_NEON) | |||
| static inline float16x8_t StdPowerSimdFp16(float16x8_t x, const void *exponent) { | |||
| float16x8_t result; | |||
| result[0] = powf(x[0], *(float16_t *)exponent); | |||
| result[1] = powf(x[1], *(float16_t *)exponent); | |||
| result[2] = powf(x[2], *(float16_t *)exponent); | |||
| result[3] = powf(x[3], *(float16_t *)exponent); | |||
| result[4] = powf(x[4], *(float16_t *)exponent); | |||
| result[5] = powf(x[5], *(float16_t *)exponent); | |||
| result[6] = powf(x[6], *(float16_t *)exponent); | |||
| result[7] = powf(x[7], *(float16_t *)exponent); | |||
| return result; | |||
| } | |||
| #endif | |||
| int PowerFp16(const float16_t *input, const float16_t *exponent, float16_t *output, int len, float scale, float shift, | |||
| bool broadcast); | |||
| void PowerSingleFp16(const float16_t *input, const float16_t *exponent, float16_t *output, int len, float scale, | |||
| float shift); | |||
| void PowerBroadCastFp16(const float16_t *input, const float16_t *exponent, float16_t *output, int len, float scale, | |||
| float shift); | |||
| #ifdef __cplusplus | |||
| } | |||
| #endif | |||
| #endif // MINDSPORE_LITE_NNACL_FP16_POWER_FP16_H_ | |||
| @@ -1,5 +1,5 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * Copyright 2020-2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| @@ -15,47 +15,98 @@ | |||
| */ | |||
| #include "nnacl/fp32/power_fp32.h" | |||
| #include "nnacl/errorcode.h" | |||
| bool CheckInteger(float f) { return floorf(f) == f; } | |||
| #if defined(ENABLE_ARM) || defined(ENABLE_AVX) || defined(ENABLE_SSE) | |||
| MS_FLOAT32X4 OptimizedPowerSimd(MS_FLOAT32X4 x, const void *exponent) { | |||
| int exp = abs((int)(*(float *)exponent)); | |||
| MS_FLOAT32X4 result = MS_MOVQ_F32(1.0f); | |||
| while (exp) { | |||
| if (exp % 2) { | |||
| result *= x; | |||
| } | |||
| x *= x; | |||
| exp = exp / 2; | |||
| } | |||
| if (*(float *)exponent >= 0) { | |||
| return result; | |||
| } | |||
| return 1 / result; | |||
| } | |||
| #endif | |||
| float OptimizedPowerImpl(float x, int exponent) { | |||
| int exp = abs(exponent); | |||
| float OptimizedPowerScalar(float x, const void *exponent) { | |||
| int exp = abs((int)(*(float *)exponent)); | |||
| float result = 1; | |||
| float iterator = x; | |||
| while (exp) { | |||
| if (exp % 2) { | |||
| result *= iterator; | |||
| result *= x; | |||
| } | |||
| iterator *= iterator; | |||
| x *= x; | |||
| exp = exp / 2; | |||
| } | |||
| return exponent >= 0 ? result : 1 / result; | |||
| return *(float *)exponent >= 0 ? result : 1 / result; | |||
| } | |||
| float StdPowerImpl(float x, float exponent) { return pow(x, exponent); } | |||
| void PowerBroadCast(const float *input, const float *exponent, float *output, int len, float scale, float shift) { | |||
| PowerScalarFun PowerScalarFun_ = NULL; | |||
| #if defined(ENABLE_ARM) || defined(ENABLE_AVX) || defined(ENABLE_SSE) | |||
| PowerSimdFun PowerSimdFun_ = NULL; | |||
| #endif | |||
| void Power(const float *input, const float *exponent, float *output, int len, float scale, float shift, | |||
| bool broadcast) { | |||
| if (input == NULL || exponent == NULL) { | |||
| return; | |||
| } | |||
| if (broadcast) { | |||
| if (CheckInteger(*exponent)) { | |||
| for (int i = 0; i < len; ++i) { | |||
| output[i] = OptimizedPowerImpl(scale * input[i] + shift, (int)(*exponent)); | |||
| } | |||
| } else { | |||
| for (int i = 0; i < len; ++i) { | |||
| output[i] = StdPowerImpl(scale * input[i] + shift, *exponent); | |||
| } | |||
| } | |||
| if (CheckInteger(*exponent)) { | |||
| #if defined(ENABLE_ARM) || defined(ENABLE_AVX) || defined(ENABLE_SSE) | |||
| PowerSimdFun_ = OptimizedPowerSimd; | |||
| #endif | |||
| PowerScalarFun_ = OptimizedPowerScalar; | |||
| } else { | |||
| for (int i = 0; i < len; ++i) { | |||
| if (CheckInteger(*exponent)) { | |||
| output[i] = OptimizedPowerImpl(scale * input[i] + shift, (int)exponent[i]); | |||
| } else { | |||
| output[i] = StdPowerImpl(scale * input[i] + shift, exponent[i]); | |||
| } | |||
| #if defined(ENABLE_ARM) || defined(ENABLE_AVX) || defined(ENABLE_SSE) | |||
| PowerSimdFun_ = StdPowerSimd; | |||
| #endif | |||
| PowerScalarFun_ = StdPowerScalar; | |||
| } | |||
| int i = 0; | |||
| #if defined(ENABLE_AVX) || defined(ENABLE_SSE) || defined(ENABLE_ARM) | |||
| int len_c4 = UP_ROUND(len, C4NUM); | |||
| MS_FLOAT32X4 scale_4 = MS_MOVQ_F32(scale); | |||
| MS_FLOAT32X4 shift_4 = MS_MOVQ_F32(shift); | |||
| for (; i < len_c4; i += C4NUM) { | |||
| MS_FLOAT32X4 result = PowerSimdFun_(scale_4 * MS_LDQ_F32(input + i) + shift_4, exponent); | |||
| MS_STQ_F32(output + i, result); | |||
| } | |||
| #endif | |||
| for (; i < len; ++i) { | |||
| output[i] = PowerScalarFun_(scale * input[i] + shift, exponent); | |||
| } | |||
| } | |||
| void PowerSingle(const float *input, const float *exponent, float *output, int len, float scale, float shift) { | |||
| int i = 0; | |||
| PowerScalarFun PowerScalarFun_ = NULL; | |||
| #if defined(ENABLE_AVX) || defined(ENABLE_SSE) || defined(ENABLE_ARM) | |||
| int len_c4 = UP_ROUND(len, C4NUM); | |||
| MS_FLOAT32X4 scale_4 = MS_MOVQ_F32(scale); | |||
| MS_FLOAT32X4 shift_4 = MS_MOVQ_F32(shift); | |||
| for (; i < len_c4; i += C4NUM) { | |||
| MS_FLOAT32X4 tmp_4 = scale_4 * MS_LDQ_F32(input + i) + shift_4; | |||
| for (int j = 0; j < 4; ++j) { | |||
| PowerScalarFun_ = CheckInteger(exponent[i + j]) ? OptimizedPowerScalar : StdPowerScalar; | |||
| output[i + j] = PowerScalarFun_(tmp_4[j], exponent + i + j); | |||
| } | |||
| } | |||
| #endif | |||
| for (; i < len; ++i) { | |||
| PowerScalarFun_ = CheckInteger(exponent[i]) ? OptimizedPowerScalar : StdPowerScalar; | |||
| output[i] = PowerScalarFun_(scale * input[i] + shift, exponent + i); | |||
| } | |||
| } | |||
| int Power(const float *input, const float *exponent, float *output, int len, float scale, float shift, bool broadcast) { | |||
| if (input == NULL || exponent == NULL || output == NULL) { | |||
| return NNACL_NULL_PTR; | |||
| } | |||
| PowerFun PowerFun_ = NULL; | |||
| PowerFun_ = broadcast ? PowerBroadCast : PowerSingle; | |||
| PowerFun_(input, exponent, output, len, scale, shift); | |||
| return NNACL_OK; | |||
| } | |||
| @@ -1,5 +1,5 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * Copyright 2020-2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| @@ -21,10 +21,31 @@ | |||
| #include "nnacl/op_base.h" | |||
| #include "nnacl/power_parameter.h" | |||
| #if defined(ENABLE_ARM) || defined(ENABLE_AVX) || defined(ENABLE_SSE) | |||
| typedef MS_FLOAT32X4 (*PowerSimdFun)(MS_FLOAT32X4 x, const void *exponent); | |||
| #endif | |||
| typedef void (*PowerFun)(const float *, const float *, float *, int, float, float); | |||
| typedef float (*PowerScalarFun)(float x, const void *exponent); | |||
| #ifdef __cplusplus | |||
| extern "C" { | |||
| #endif | |||
| void Power(const float *input, const float *exponent, float *output, int len, float scale, float shift, bool broadcast); | |||
| inline bool CheckInteger(float f) { return floorf(f) == f; } | |||
| static inline float StdPowerScalar(float x, const void *exponent) { return powf(x, *(float *)exponent); } | |||
| #if defined(ENABLE_ARM) || defined(ENABLE_AVX) || defined(ENABLE_SSE) | |||
| static inline MS_FLOAT32X4 StdPowerSimd(MS_FLOAT32X4 x, const void *exponent) { | |||
| MS_FLOAT32X4 result; | |||
| for (int i = 0; i < 4; ++i) { | |||
| result[i] = powf(x[i], *(float *)exponent); | |||
| } | |||
| return result; | |||
| } | |||
| #endif | |||
| int Power(const float *input, const float *exponent, float *output, int len, float scale, float shift, bool broadcast); | |||
| void PowerSingle(const float *input, const float *exponent, float *output, int len, float scale, float shift); | |||
| void PowerBroadCast(const float *input, const float *exponent, float *output, int len, float scale, float shift); | |||
| #ifdef __cplusplus | |||
| } | |||
| #endif | |||
| @@ -0,0 +1,129 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "src/runtime/kernel/arm/fp16/power_fp16.h" | |||
| #include "schema/model_generated.h" | |||
| #include "src/kernel_registry.h" | |||
| #include "src/runtime/runtime_api.h" | |||
| #include "include/errorcode.h" | |||
| using mindspore::lite::KernelRegistrar; | |||
| using mindspore::lite::RET_ERROR; | |||
| using mindspore::lite::RET_NULL_PTR; | |||
| using mindspore::lite::RET_OK; | |||
| using mindspore::schema::PrimitiveType_PowFusion; | |||
| namespace mindspore::kernel { | |||
| int PowerFp16CPUKernel::Init() { | |||
| MS_ASSERT(in_tensors_.size() == 2); | |||
| exp_tensor_ = in_tensors_[1]; | |||
| MS_ASSERT(exp_tensor_ != nullptr); | |||
| if (exp_tensor_->IsConst()) { | |||
| auto ret = GetExpData(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "GetExpData is error in Init()!"; | |||
| return ret; | |||
| } | |||
| } | |||
| return RET_OK; | |||
| } | |||
| int PowerFp16CPUKernel::ReSize() { return RET_OK; } | |||
| int PowerFp16CPUKernel::GetExpData() { | |||
| exp_data_type_ = exp_tensor_->data_type(); | |||
| if (exp_data_type_ == kNumberTypeFloat || exp_data_type_ == kNumberTypeFloat32) { | |||
| exp_data_ = reinterpret_cast<float16_t *>(malloc(exp_tensor_->ElementsNum() * sizeof(float16_t))); | |||
| if (exp_data_ == nullptr) { | |||
| MS_LOG(ERROR) << "exp_data_ is nullptr"; | |||
| return RET_NULL_PTR; | |||
| } | |||
| auto exp = reinterpret_cast<float *>(exp_tensor_->MutableData()); | |||
| if (exp == nullptr) { | |||
| MS_LOG(ERROR) << "exp is nullptr!"; | |||
| return RET_NULL_PTR; | |||
| } | |||
| for (int i = 0; i < exp_tensor_->ElementsNum(); ++i) { | |||
| exp_data_[i] = (float16_t)(exp[i]); | |||
| } | |||
| } else { | |||
| exp_data_ = reinterpret_cast<float16_t *>(exp_tensor_->MutableData()); | |||
| if (exp_data_ == nullptr) { | |||
| MS_LOG(ERROR) << "exp_data_ is nullptr"; | |||
| return RET_NULL_PTR; | |||
| } | |||
| } | |||
| return RET_OK; | |||
| } | |||
| int PowerImplFp16(void *cdata, int task_id) { | |||
| auto kernel = reinterpret_cast<PowerFp16CPUKernel *>(cdata); | |||
| auto ret = kernel->RunImpl(task_id); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "PowerFp16Impl error: " << ret; | |||
| return ret; | |||
| } | |||
| return RET_OK; | |||
| } | |||
| int PowerFp16CPUKernel::Run() { | |||
| if (exp_data_ == nullptr) { | |||
| auto ret = GetExpData(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "GetExpData is error in run!"; | |||
| return ret; | |||
| } | |||
| } | |||
| auto ret = ParallelLaunch(this->context_->thread_pool_, PowerImplFp16, this, thread_count_); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "PowerFp16CPUKernel error: " << ret; | |||
| return RET_ERROR; | |||
| } | |||
| return RET_OK; | |||
| } | |||
| int PowerFp16CPUKernel::RunImpl(int task_id) { | |||
| auto x_addr = reinterpret_cast<float16_t *>(in_tensors_.at(0)->MutableData()); | |||
| MS_ASSERT(x_addr); | |||
| auto output_addr = reinterpret_cast<float16_t *>(out_tensors_.at(0)->MutableData()); | |||
| MS_ASSERT(output_addr); | |||
| auto size = in_tensors_.at(0)->ElementsNum(); | |||
| int stride = UP_DIV(size, thread_count_); | |||
| int len = MSMIN(stride, size - stride * task_id); | |||
| if (len <= 0) { | |||
| return RET_OK; | |||
| } | |||
| bool broadcast = true; | |||
| broadcast = in_tensors_[0]->shape() == in_tensors_[1]->shape() ? false : true; | |||
| float16_t *cur_exp = nullptr; | |||
| if (broadcast) { | |||
| cur_exp = exp_data_; | |||
| } else { | |||
| cur_exp = exp_data_ + stride * task_id; | |||
| } | |||
| PowerFp16(x_addr + stride * task_id, cur_exp, output_addr + stride * task_id, len, scale_, shift_, broadcast); | |||
| return RET_OK; | |||
| } | |||
| PowerFp16CPUKernel::~PowerFp16CPUKernel() { | |||
| if ((exp_data_type_ == kNumberTypeFloat || exp_data_type_ == kNumberTypeFloat32) && exp_data_ != nullptr) { | |||
| free(exp_data_); | |||
| exp_data_ = nullptr; | |||
| } | |||
| } | |||
| REG_KERNEL(kCPU, kNumberTypeFloat16, PrimitiveType_PowFusion, LiteKernelCreator<PowerFp16CPUKernel>) | |||
| } // namespace mindspore::kernel | |||
| @@ -0,0 +1,52 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP16_POWER_H_ | |||
| #define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP16_POWER_H_ | |||
| #include <vector> | |||
| #include "src/lite_kernel.h" | |||
| #include "include/context.h" | |||
| #include "mindspore/lite/nnacl/fp16/power_fp16.h" | |||
| namespace mindspore::kernel { | |||
| class PowerFp16CPUKernel : public LiteKernel { | |||
| public: | |||
| PowerFp16CPUKernel(OpParameter *param, const std::vector<lite::Tensor *> &inputs, | |||
| const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx) | |||
| : LiteKernel(param, inputs, outputs, ctx), | |||
| thread_count_(ctx->thread_num_), | |||
| scale_(reinterpret_cast<PowerParameter *>(op_parameter_)->scale_), | |||
| shift_(reinterpret_cast<PowerParameter *>(op_parameter_)->shift_) {} | |||
| ~PowerFp16CPUKernel() override; | |||
| int Init() override; | |||
| int ReSize() override; | |||
| int Run() override; | |||
| int RunImpl(int task_id); | |||
| private: | |||
| int GetExpData(); | |||
| int thread_count_; | |||
| float scale_; | |||
| float shift_; | |||
| float16_t *exp_data_ = nullptr; | |||
| lite::Tensor *exp_tensor_ = nullptr; | |||
| TypeId exp_data_type_; | |||
| }; | |||
| } // namespace mindspore::kernel | |||
| #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP16_POWER_H_ | |||