| @@ -0,0 +1,33 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_NNACL_EXP_PARAMETER_H_ | |||
| #define MINDSPORE_NNACL_EXP_PARAMETER_H_ | |||
| #include "nnacl/op_base.h" | |||
| typedef struct ExpParameter { | |||
| // Primitive parameter | |||
| OpParameter op_parameter_; | |||
| float base_; | |||
| float scale_; | |||
| float shift_; | |||
| // other parameter | |||
| float in_scale_; | |||
| float out_scale_; | |||
| int element_num_; | |||
| } ExpParameter; | |||
| #endif // MINDSPORE_NNACL_EXP_PARAMETER_H_ | |||
| @@ -13,12 +13,22 @@ | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "nnacl/fp16/exp_fp16.h" | |||
| #include <math.h> | |||
| #include <string.h> | |||
| #include "nnacl/errorcode.h" | |||
| #if defined(ENABLE_NEON) | |||
| static inline void simd_exp_fp16(float16x8_t input, float16_t *dst) { | |||
| static float16x8_t maxv = {88.0f, 88.0f, 88.0f, 88.0f, 88.0f, 88.0f, 88.0f, 88.0f}; | |||
| static float16x8_t minv = {-88.0f, -88.0f, -88.0f, -88.0f, -88.0f, -88.0f, -88.0f, -88.0f}; | |||
| input = vmaxq_f16(minv, vminq_f16(input, maxv)); | |||
| float32x4_t input_low = vcvt_f32_f16(vget_low_f16(input)); | |||
| float32x4_t input_high = vcvt_f32_f16(vget_high_f16(input)); | |||
| vst1q_f16(dst, vcombine_f16(vcvt_f16_f32(VexpFp32(input_low)), vcvt_f16_f32(VexpFp32(input_high)))); | |||
| } | |||
| #endif | |||
| void ExpFp16(const float16_t *src, float16_t *dst, int num) { | |||
| int i = 0; | |||
| #ifdef ENABLE_NEON | |||
| @@ -31,3 +41,42 @@ void ExpFp16(const float16_t *src, float16_t *dst, int num) { | |||
| single_exp_fp16(src[i], dst + i); | |||
| } | |||
| } | |||
| int ExpFusionFp16(const float16_t *src, float16_t *dst, const ExpParameter *param, int task_id) { | |||
| int stride = UP_DIV(param->element_num_, param->op_parameter_.thread_num_); | |||
| int start = stride * task_id; | |||
| int end = MSMIN(param->element_num_, start + stride); | |||
| int num = end - start; | |||
| if (param->scale_ == 1) { | |||
| ExpFp16(src + start, dst + start, num); | |||
| } else { | |||
| int i = 0; | |||
| #ifdef ENABLE_ARM64 | |||
| MS_FLOAT16X8 scale = MS_MOVQ_F16(param->in_scale_); | |||
| int count = (num / C8NUM) * C8NUM; | |||
| for (; i < count; i += C8NUM) { | |||
| simd_exp_fp16(MS_MULQ_F16(MS_LDQ_F16(src + i), scale), dst + i); | |||
| } | |||
| #endif | |||
| for (; i < num; ++i) { | |||
| single_exp_fp16(src[i] * param->in_scale_, dst + i); | |||
| } | |||
| } | |||
| if (param->out_scale_ != 1) { | |||
| int i = 0; | |||
| #ifdef ENABLE_ARM64 | |||
| MS_FLOAT16X8 scale = MS_MOVQ_F16(param->out_scale_); | |||
| int count = (num / C8NUM) * C8NUM; | |||
| for (; i < count; i += C8NUM) { | |||
| simd_exp_fp16(MS_LDQ_F16(src + i), dst + i); | |||
| MS_STQ_F16(dst + i, MS_MULQ_F16(MS_LDQ_F16(dst + i), scale)); | |||
| } | |||
| #endif | |||
| for (; i < num; ++i) { | |||
| single_exp_fp16(src[i], dst + i); | |||
| dst[i] *= param->out_scale_; | |||
| } | |||
| } | |||
| return NNACL_OK; | |||
| } | |||
| @@ -13,46 +13,19 @@ | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_NNACL_FP16_EXP_H_ | |||
| #define MINDSPORE_NNACL_FP16_EXP_H_ | |||
| #ifndef MINDSPORE_NNACL_FP16_EXP_FP16_H_ | |||
| #define MINDSPORE_NNACL_FP16_EXP_FP16_H_ | |||
| #include "nnacl/op_base.h" | |||
| #include "nnacl/exp_parameter.h" | |||
| #include "nnacl/fp32/exp_fp32.h" | |||
| #include "nnacl/intrinsics/ms_simd_instructions_fp16.h" | |||
| #ifdef __cplusplus | |||
| extern "C" { | |||
| #endif | |||
| void ExpFp16(const float16_t *src, float16_t *dst, int num); | |||
| #if defined(ENABLE_NEON) | |||
| static inline float32x4_t exp_fp32(float32x4_t input) { | |||
| static float32x4_t param[] = {{0.693147f, 0.693147f, 0.693147f, 0.693147f}, | |||
| {1.0f / 120, 1.0f / 120, 1.0f / 120, 1.0f / 120}, | |||
| {1.0f / 24, 1.0f / 24, 1.0f / 24, 1.0f / 24}, | |||
| {1.0f / 6, 1.0f / 6, 1.0f / 6, 1.0f / 6}, | |||
| {0.5f, 0.5f, 0.5f, 0.5f}, | |||
| {1.0f, 1.0f, 1.0f, 1.0f}}; | |||
| int32x4_t integer = vcvtq_s32_f32(input / param[0]); | |||
| float32x4_t decimal = input - vcvtq_f32_s32(integer) * param[0]; | |||
| int32x4_t int_exp = vshlq_s32((integer + vmovq_n_s32(127)), vmovq_n_s32(23)); | |||
| float32x4_t decimal_exp = | |||
| param[5] + | |||
| decimal * (param[5] + decimal * (param[4] + decimal * (param[3] + decimal * (param[2] + decimal * param[1])))); | |||
| decimal_exp = decimal_exp * vld1q_f32((float *)(&int_exp)); | |||
| return decimal_exp; | |||
| } | |||
| static inline void simd_exp_fp16(float16x8_t input, float16_t *dst) { | |||
| static float16x8_t maxv = {88.0f, 88.0f, 88.0f, 88.0f, 88.0f, 88.0f, 88.0f, 88.0f}; | |||
| static float16x8_t minv = {-88.0f, -88.0f, -88.0f, -88.0f, -88.0f, -88.0f, -88.0f, -88.0f}; | |||
| input = vmaxq_f16(minv, vminq_f16(input, maxv)); | |||
| float32x4_t input_low = vcvt_f32_f16(vget_low_f16(input)); | |||
| float32x4_t input_high = vcvt_f32_f16(vget_high_f16(input)); | |||
| vst1q_f16(dst, vcombine_f16(vcvt_f16_f32(exp_fp32(input_low)), vcvt_f16_f32(exp_fp32(input_high)))); | |||
| } | |||
| #endif | |||
| int ExpFusionFp16(const float16_t *src, float16_t *dst, const ExpParameter *param, int task_id); | |||
| static inline void single_exp_fp16(float16_t src, float16_t *dst) { | |||
| static float param[] = {0.693147f, 1.0f / 120, 1.0f / 24, 1.0f / 6, 1.0f / 2, 1.0f}; | |||
| @@ -64,8 +37,9 @@ static inline void single_exp_fp16(float16_t src, float16_t *dst) { | |||
| 1.0f + decimal * (1.0f + decimal * (0.5f + decimal * (param[3] + decimal * (param[2] + decimal * param[1])))); | |||
| *dst = (float16_t)(*((float *)&int_exp) * decimal_exp); | |||
| } | |||
| #ifdef __cplusplus | |||
| } | |||
| #endif | |||
| #endif // MINDSPORE_NNACL_FP16_EXP_H_ | |||
| #endif // MINDSPORE_NNACL_FP16_EXP_FP16_H_ | |||
| @@ -1,5 +1,5 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * Copyright 2020-2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| @@ -19,27 +19,6 @@ | |||
| #include <string.h> | |||
| #include "nnacl/errorcode.h" | |||
| int Exp(const float *input_data, float *output_data, const ExpParameter *parameter, int task_id) { | |||
| if (parameter->thread_num_ == 0) { | |||
| return NNACL_PARAM_INVALID; | |||
| } | |||
| if (parameter->scale_ == 1) { | |||
| for (size_t i = task_id; i < parameter->element_num_; i += parameter->thread_num_) { | |||
| output_data[i] = expf(input_data[i]); | |||
| } | |||
| } else { | |||
| for (size_t i = task_id; i < parameter->element_num_; i += parameter->thread_num_) { | |||
| output_data[i] = expf(input_data[i] * parameter->in_scale_); | |||
| } | |||
| } | |||
| if (parameter->out_scale_ != 1) { | |||
| for (size_t i = task_id; i < parameter->element_num_; i += parameter->thread_num_) { | |||
| output_data[i] = output_data[i] * parameter->out_scale_; | |||
| } | |||
| } | |||
| return NNACL_OK; | |||
| } | |||
| void ExpFp32(const float *src, float *dst, int num) { | |||
| int i = 0; | |||
| #ifdef ENABLE_ARM64 | |||
| @@ -52,3 +31,42 @@ void ExpFp32(const float *src, float *dst, int num) { | |||
| single_exp(src[i], dst + i); | |||
| } | |||
| } | |||
| int ExpFusionFp32(const float *src, float *dst, const ExpParameter *param, int task_id) { | |||
| int stride = UP_DIV(param->element_num_, param->op_parameter_.thread_num_); | |||
| int start = stride * task_id; | |||
| int end = MSMIN(param->element_num_, start + stride); | |||
| int num = end - start; | |||
| if (param->scale_ == 1) { | |||
| ExpFp32(src + start, dst + start, num); | |||
| } else { | |||
| int i = 0; | |||
| #ifdef ENABLE_ARM64 | |||
| MS_FLOAT32X4 scale = MS_MOVQ_F32(param->in_scale_); | |||
| int count = (num / C4NUM) * C4NUM; | |||
| for (; i < count; i += C4NUM) { | |||
| simd_exp(MS_MULQ_F32(MS_LDQ_F32(src + i), scale), dst + i); | |||
| } | |||
| #endif | |||
| for (; i < num; ++i) { | |||
| single_exp(src[i] * param->in_scale_, dst + i); | |||
| } | |||
| } | |||
| if (param->out_scale_ != 1) { | |||
| int i = 0; | |||
| #ifdef ENABLE_ARM64 | |||
| MS_FLOAT32X4 scale = {param->out_scale_, param->out_scale_, param->out_scale_, param->out_scale_}; | |||
| int count = (num / C4NUM) * C4NUM; | |||
| for (; i < count; i += C4NUM) { | |||
| simd_exp(MS_LDQ_F32(src + i), dst + i); | |||
| MS_STQ_F32(dst + i, MS_MULQ_F32(MS_LDQ_F32(dst + i), scale)); | |||
| } | |||
| #endif | |||
| for (; i < num; ++i) { | |||
| single_exp(src[i], dst + i); | |||
| dst[i] *= param->out_scale_; | |||
| } | |||
| } | |||
| return NNACL_OK; | |||
| } | |||
| @@ -1,5 +1,5 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * Copyright 2020-2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| @@ -18,48 +18,40 @@ | |||
| #define MINDSPORE_NNACL_FP32_EXP_H_ | |||
| #include "nnacl/op_base.h" | |||
| #include "nnacl/exp_parameter.h" | |||
| #if defined(ENABLE_ARM) || defined(ENABLE_SSE) | |||
| #include "nnacl/intrinsics/ms_simd_instructions.h" | |||
| #endif | |||
| typedef struct ExpParameter { | |||
| // Primitive parameter | |||
| OpParameter op_parameter_; | |||
| float base_; | |||
| float scale_; | |||
| float shift_; | |||
| // other parameter | |||
| int thread_num_; | |||
| float in_scale_; | |||
| float out_scale_; | |||
| int element_num_; | |||
| } ExpParameter; | |||
| #ifdef __cplusplus | |||
| extern "C" { | |||
| #endif | |||
| int Exp(const float *input_data, float *output_data, const ExpParameter *parameter, int task_id); | |||
| void ExpFp32(const float *src, float *dst, int num); | |||
| int ExpFusionFp32(const float *src, float *dst, const ExpParameter *param, int task_id); | |||
| #if defined(ENABLE_ARM) || defined(ENABLE_SSE) | |||
| static inline void simd_exp(MS_FLOAT32X4 input, float *dst) { | |||
| static MS_FLOAT32X4 maxv = {88.0f, 88.0f, 88.0f, 88.0f}; | |||
| static MS_FLOAT32X4 minv = {-88.0f, -88.0f, -88.0f, -88.0f}; | |||
| static inline MS_FLOAT32X4 VexpFp32(MS_FLOAT32X4 input) { | |||
| static MS_FLOAT32X4 param[] = {{0.693147f, 0.693147f, 0.693147f, 0.693147f}, | |||
| {1.0f / 120, 1.0f / 120, 1.0f / 120, 1.0f / 120}, | |||
| {1.0f / 24, 1.0f / 24, 1.0f / 24, 1.0f / 24}, | |||
| {1.0f / 6, 1.0f / 6, 1.0f / 6, 1.0f / 6}, | |||
| {0.5f, 0.5f, 0.5f, 0.5f}, | |||
| {1.0f, 1.0f, 1.0f, 1.0f}}; | |||
| input = MS_MAXQ_F32(minv, MS_MINQ_F32(input, maxv)); | |||
| MS_INT32X4 integer = MS_CVTQPS_EPI32(MS_DIVQ_F32(input, param[0])); | |||
| MS_FLOAT32X4 decimal = MS_SUBQ_F32(input, MS_MULQ_F32(MS_CVTQEPI32_PS(integer), param[0])); | |||
| MS_INT32X4 int_exp = MS_SLLIQ_EPI32(MS_ADDQ_EPI32(integer, MS_MOVQ_EPI32(127)), 23); | |||
| MS_FLOAT32X4 tmp = MS_MULQ_F32(decimal, (MS_ADDQ_F32(param[2], MS_MULQ_F32(decimal, param[1])))); | |||
| tmp = MS_MULQ_F32(decimal, MS_ADDQ_F32(param[4], MS_MULQ_F32(decimal, MS_ADDQ_F32(param[3], tmp)))); | |||
| MS_FLOAT32X4 decimal_exp = MS_ADDQ_F32(param[5], MS_MULQ_F32(decimal, MS_ADDQ_F32(param[5], tmp))); | |||
| MS_STQ_F32(dst, MS_MULQ_F32(decimal_exp, MS_CAST_F32_S32(int_exp))); | |||
| return MS_MULQ_F32(decimal_exp, MS_CAST_F32_S32(int_exp)); | |||
| } | |||
| static inline void simd_exp(MS_FLOAT32X4 input, float *dst) { | |||
| static MS_FLOAT32X4 maxv = {88.0f, 88.0f, 88.0f, 88.0f}; | |||
| static MS_FLOAT32X4 minv = {-88.0f, -88.0f, -88.0f, -88.0f}; | |||
| input = MS_MAXQ_F32(minv, MS_MINQ_F32(input, maxv)); | |||
| MS_STQ_F32(dst, VexpFp32(input)); | |||
| } | |||
| #endif | |||
| @@ -24,7 +24,6 @@ using mindspore::schema::PrimitiveType_ExpFusion; | |||
| namespace mindspore::lite::micro::nnacl { | |||
| int ExpFP32Coder::Prepare(CoderContext *context) { | |||
| exp_parameter_ = reinterpret_cast<ExpParameter *>(parameter_); | |||
| exp_parameter_->thread_num_ = exp_parameter_->op_parameter_.thread_num_; | |||
| float log_ = (exp_parameter_->base_ == -1) ? 1 : logf(exp_parameter_->base_); | |||
| exp_parameter_->in_scale_ = exp_parameter_->scale_ * log_; | |||
| if (exp_parameter_->shift_ == 0) { | |||
| @@ -129,8 +129,8 @@ void NNaclFp32Serializer::CodeStruct(const std::string &name, const SpliceParame | |||
| void NNaclFp32Serializer::CodeStruct(const std::string &name, const ExpParameter &exp_parameter) { | |||
| CodeBaseStruct("ExpParameter", name, exp_parameter.op_parameter_, exp_parameter.base_, exp_parameter.scale_, | |||
| exp_parameter.shift_, exp_parameter.thread_num_, exp_parameter.in_scale_, exp_parameter.out_scale_, | |||
| exp_parameter.element_num_); | |||
| exp_parameter.shift_, exp_parameter.op_parameter_.thread_num_, exp_parameter.in_scale_, | |||
| exp_parameter.out_scale_, exp_parameter.element_num_); | |||
| } | |||
| void NNaclFp32Serializer::CodeStruct(const std::string &name, const StridedSliceParameter &strided_slice_parameter) { | |||
| @@ -0,0 +1,34 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "src/runtime/kernel/arm/fp16/exp_fp16.h" | |||
| #include "nnacl/fp16/exp_fp16.h" | |||
| #include "include/errorcode.h" | |||
| #include "src/kernel_registry.h" | |||
| using mindspore::lite::KernelRegistrar; | |||
| using mindspore::lite::RET_OK; | |||
| using mindspore::schema::PrimitiveType_ExpFusion; | |||
| namespace mindspore::kernel { | |||
| int ExpFp16CPUKernel::DoExcute(int task_id) { | |||
| ExpFusionFp16(reinterpret_cast<float16_t *>(input_addr_), reinterpret_cast<float16_t *>(output_addr_), param_, | |||
| task_id); | |||
| return RET_OK; | |||
| } | |||
| REG_KERNEL(kCPU, kNumberTypeFloat16, PrimitiveType_ExpFusion, LiteKernelCreator<ExpFp16CPUKernel>) | |||
| } // namespace mindspore::kernel | |||
| @@ -0,0 +1,34 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP16_EXP_FP16_H_ | |||
| #define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP16_EXP_FP16_H_ | |||
| #include <vector> | |||
| #include "src/runtime/kernel/arm/fp32/exp_fp32.h" | |||
| namespace mindspore::kernel { | |||
| class ExpFp16CPUKernel : public ExpCPUKernel { | |||
| public: | |||
| explicit ExpFp16CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs, | |||
| const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx) | |||
| : ExpCPUKernel(parameter, inputs, outputs, ctx) {} | |||
| ~ExpFp16CPUKernel() override{}; | |||
| int DoExcute(int task_id) override; | |||
| }; | |||
| } // namespace mindspore::kernel | |||
| #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP16_EXP_FP16_H_ | |||
| @@ -1,5 +1,5 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * Copyright 2020-2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| @@ -13,7 +13,6 @@ | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "src/runtime/kernel/arm/fp32/exp_fp32.h" | |||
| #include <cmath> | |||
| #include "include/errorcode.h" | |||
| @@ -26,34 +25,31 @@ using mindspore::schema::PrimitiveType_ExpFusion; | |||
| namespace mindspore::kernel { | |||
| int ExpCPUKernel::Init() { | |||
| exp_parameter_ = reinterpret_cast<ExpParameter *>(op_parameter_); | |||
| exp_parameter_->thread_num_ = thread_count_; | |||
| float log_base = (param_->base_ == -1) ? 1 : logf(param_->base_); | |||
| param_->in_scale_ = param_->scale_ * log_base; | |||
| if (param_->shift_ == 0) { | |||
| param_->out_scale_ = 1; | |||
| } else { | |||
| if (log_base == 1) { | |||
| param_->out_scale_ = expf(param_->shift_); | |||
| } else { | |||
| param_->out_scale_ = powf(param_->base_, param_->shift_); | |||
| } | |||
| } | |||
| param_->op_parameter_.thread_num_ = ms_context_->thread_num_; | |||
| if (!InferShapeDone()) { | |||
| return RET_OK; | |||
| } | |||
| return ReSize(); | |||
| } | |||
| int ExpCPUKernel::ReSize() { | |||
| exp_parameter_->thread_num_ = thread_count_; | |||
| float log_ = (exp_parameter_->base_ == -1) ? 1 : logf(exp_parameter_->base_); | |||
| exp_parameter_->in_scale_ = exp_parameter_->scale_ * log_; | |||
| if (exp_parameter_->shift_ == 0) { | |||
| exp_parameter_->out_scale_ = 1; | |||
| } else { | |||
| if (log_ == 1) { | |||
| exp_parameter_->out_scale_ = expf(exp_parameter_->shift_); | |||
| } else { | |||
| exp_parameter_->out_scale_ = powf(exp_parameter_->base_, exp_parameter_->shift_); | |||
| } | |||
| } | |||
| param_->element_num_ = in_tensors_.front()->ElementsNum(); | |||
| return RET_OK; | |||
| } | |||
| int ExpCPUKernel::DoExcute(int task_id) { | |||
| Exp(input_addr_, output_addr_, exp_parameter_, task_id); | |||
| ExpFusionFp32(reinterpret_cast<float *>(input_addr_), reinterpret_cast<float *>(output_addr_), param_, task_id); | |||
| return RET_OK; | |||
| } | |||
| @@ -68,11 +64,10 @@ int ExpRun(void *cdata, int task_id, float lhs_scale, float rhs_scale) { | |||
| } | |||
| int ExpCPUKernel::Run() { | |||
| input_addr_ = reinterpret_cast<float *>(in_tensors_.front()->MutableData()); | |||
| output_addr_ = reinterpret_cast<float *>(out_tensors_.front()->MutableData()); | |||
| exp_parameter_->element_num_ = in_tensors_.front()->ElementsNum(); | |||
| input_addr_ = in_tensors_.front()->MutableData(); | |||
| output_addr_ = out_tensors_.front()->MutableData(); | |||
| auto ret = ParallelLaunch(this->ms_context_, ExpRun, this, exp_parameter_->thread_num_); | |||
| auto ret = ParallelLaunch(this->ms_context_, ExpRun, this, ms_context_->thread_num_); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Exp error: error_code[" << ret << "]"; | |||
| return RET_ERROR; | |||
| @@ -1,5 +1,5 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * Copyright 2020-2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| @@ -13,7 +13,6 @@ | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_EXP_H_ | |||
| #define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_EXP_H_ | |||
| @@ -26,22 +25,20 @@ class ExpCPUKernel : public InnerKernel { | |||
| public: | |||
| explicit ExpCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs, | |||
| const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx) | |||
| : InnerKernel(parameter, inputs, outputs, ctx), ctx_(ctx), thread_count_(ctx->thread_num_) {} | |||
| : InnerKernel(parameter, inputs, outputs, ctx) { | |||
| param_ = reinterpret_cast<ExpParameter *>(parameter); | |||
| } | |||
| ~ExpCPUKernel() override{}; | |||
| int Init() override; | |||
| int ReSize() override; | |||
| int Run() override; | |||
| int DoExcute(int task_id); | |||
| virtual int DoExcute(int task_id); | |||
| protected: | |||
| const lite::InnerContext *ctx_ = nullptr; | |||
| int thread_count_ = 1; | |||
| ExpParameter *exp_parameter_ = nullptr; | |||
| private: | |||
| float *input_addr_ = nullptr; | |||
| float *output_addr_ = nullptr; | |||
| ExpParameter *param_ = nullptr; | |||
| void *input_addr_ = nullptr; | |||
| void *output_addr_ = nullptr; | |||
| }; | |||
| } // namespace mindspore::kernel | |||