| @@ -146,3 +146,24 @@ int SwishFp16(const float16_t *src, float16_t *dst, int ele_num) { | |||||
| } | } | ||||
| return NNACL_OK; | return NNACL_OK; | ||||
| } | } | ||||
| int HardTanhFp16(const float16_t *src, int length, float16_t *dst, float min_val, float max_val) { | |||||
| if (max_val <= min_val) { | |||||
| return NNACL_ERR; | |||||
| } | |||||
| int i = 0; | |||||
| if (min_val == FLT_MIN) { | |||||
| for (i = 0; i < length; ++i) { | |||||
| dst[i] = src[i] > max_val ? max_val : src[i]; | |||||
| } | |||||
| } else if (max_val == FLT_MAX) { | |||||
| for (i = 0; i < length; ++i) { | |||||
| dst[i] = src[i] < min_val ? min_val : src[i]; | |||||
| } | |||||
| } else { | |||||
| for (i = 0; i < length; ++i) { | |||||
| dst[i] = src[i] < min_val ? min_val : (src[i] > max_val ? max_val : src[i]); | |||||
| } | |||||
| } | |||||
| return NNACL_OK; | |||||
| } | |||||
| @@ -23,12 +23,6 @@ | |||||
| #include "nnacl/op_base.h" | #include "nnacl/op_base.h" | ||||
| #include "mindspore/lite/nnacl/int8/fixed_point.h" | #include "mindspore/lite/nnacl/int8/fixed_point.h" | ||||
| typedef struct ActivationParameter { | |||||
| OpParameter op_parameter_; | |||||
| int type_; | |||||
| float alpha_; | |||||
| } ActivationParameter; | |||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| extern "C" { | extern "C" { | ||||
| #endif | #endif | ||||
| @@ -39,6 +33,7 @@ int SigmoidFp16(const float16_t *src, float16_t *dst, int ele_num); | |||||
| int TanhFp16(const float16_t *src, float16_t *dst, int ele_num); | int TanhFp16(const float16_t *src, float16_t *dst, int ele_num); | ||||
| int HSwishFp16(const float16_t *src, float16_t *dst, int ele_num); | int HSwishFp16(const float16_t *src, float16_t *dst, int ele_num); | ||||
| int SwishFp16(const float16_t *src, float16_t *dst, int ele_num); | int SwishFp16(const float16_t *src, float16_t *dst, int ele_num); | ||||
| int HardTanhFp16(const float16_t *src, int length, float16_t *dst, float min_val, float max_val); | |||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| } | } | ||||
| #endif | #endif | ||||
| @@ -39,3 +39,25 @@ int ReduceMeanFp16(const int outer_size, const int inner_size, const int axis_si | |||||
| } | } | ||||
| return NNACL_OK; | return NNACL_OK; | ||||
| } | } | ||||
| int ReduceMaxFp16(int outer_size, int inner_size, int axis_size, const float16_t *src_data, float16_t *dst_data, | |||||
| int tid, int thread_num) { | |||||
| if (src_data == NULL || dst_data == NULL) { | |||||
| return NNACL_NULL_PTR; | |||||
| } | |||||
| int i, j, k; | |||||
| for (j = tid; j < outer_size; j += thread_num) { | |||||
| const float16_t *outer_src = src_data + j * axis_size * inner_size; | |||||
| float16_t *outer_dst = dst_data + j * inner_size; | |||||
| for (k = 0; k < inner_size; k++) { | |||||
| const float16_t *inner_src = outer_src + k; | |||||
| float16_t *inner_dst = outer_dst + k; | |||||
| float tmp = -FLT_MAX; | |||||
| for (i = 0; i < axis_size; i++) { | |||||
| tmp = tmp > inner_src[i * inner_size] ? tmp : inner_src[i * inner_size]; | |||||
| } | |||||
| *inner_dst = tmp; | |||||
| } | |||||
| } | |||||
| return NNACL_OK; | |||||
| } | |||||
| @@ -27,7 +27,8 @@ extern "C" { | |||||
| #endif | #endif | ||||
| int ReduceMeanFp16(const int outer_size, const int inner_size, const int axis_size, const float16_t *src_data, | int ReduceMeanFp16(const int outer_size, const int inner_size, const int axis_size, const float16_t *src_data, | ||||
| float16_t *dst_data, const int tid, const int thread_num); | float16_t *dst_data, const int tid, const int thread_num); | ||||
| int ReduceMaxFp16(int outer_size, int inner_size, int axis_size, const float16_t *src_data, float16_t *dst_data, | |||||
| int tid, int thread_num); | |||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| } | } | ||||
| #endif | #endif | ||||
| @@ -37,7 +37,7 @@ int ActivationFp16CPUKernel::Init() { | |||||
| if (type_ != schema::ActivationType_RELU && type_ != schema::ActivationType_RELU6 && | if (type_ != schema::ActivationType_RELU && type_ != schema::ActivationType_RELU6 && | ||||
| type_ != schema::ActivationType_LEAKY_RELU && type_ != schema::ActivationType_SIGMOID && | type_ != schema::ActivationType_LEAKY_RELU && type_ != schema::ActivationType_SIGMOID && | ||||
| type_ != schema::ActivationType_TANH && type_ != schema::ActivationType_HSWISH && | type_ != schema::ActivationType_TANH && type_ != schema::ActivationType_HSWISH && | ||||
| type_ != schema::ActivationType_SWISH) { | |||||
| type_ != schema::ActivationType_SWISH && type_ != schema::ActivationType_HARD_TANH) { | |||||
| MS_LOG(ERROR) << "Activation fp16 not support type: " << type_; | MS_LOG(ERROR) << "Activation fp16 not support type: " << type_; | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| @@ -67,6 +67,9 @@ int ActivationFp16CPUKernel::DoActivation(int task_id) { | |||||
| error_code = HSwishFp16(fp16_input_ + stride * task_id, fp16_output_ + stride * task_id, count); | error_code = HSwishFp16(fp16_input_ + stride * task_id, fp16_output_ + stride * task_id, count); | ||||
| } else if (type_ == schema::ActivationType_SWISH) { | } else if (type_ == schema::ActivationType_SWISH) { | ||||
| error_code = SwishFp16(fp16_input_ + stride * task_id, fp16_output_ + stride * task_id, count); | error_code = SwishFp16(fp16_input_ + stride * task_id, fp16_output_ + stride * task_id, count); | ||||
| } else if (type_ == schema::ActivationType_HARD_TANH) { | |||||
| error_code = | |||||
| HardTanhFp16(fp16_input_ + stride * task_id, count, fp16_output_ + stride * task_id, min_val_, max_val_); | |||||
| } else { | } else { | ||||
| MS_LOG(ERROR) << "Activation fp16 not support type: " << type_; | MS_LOG(ERROR) << "Activation fp16 not support type: " << type_; | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| @@ -19,6 +19,7 @@ | |||||
| #include <vector> | #include <vector> | ||||
| #include "src/lite_kernel.h" | #include "src/lite_kernel.h" | ||||
| #include "nnacl/fp32/activation_fp32.h" | |||||
| #include "nnacl/fp16/activation_fp16.h" | #include "nnacl/fp16/activation_fp16.h" | ||||
| namespace mindspore::kernel { | namespace mindspore::kernel { | ||||
| @@ -30,6 +31,8 @@ class ActivationFp16CPUKernel : public LiteKernel { | |||||
| : LiteKernel(param, inputs, outputs, ctx, primitive), thread_count_(ctx->thread_num_) { | : LiteKernel(param, inputs, outputs, ctx, primitive), thread_count_(ctx->thread_num_) { | ||||
| type_ = (reinterpret_cast<ActivationParameter *>(param))->type_; | type_ = (reinterpret_cast<ActivationParameter *>(param))->type_; | ||||
| alpha_ = (float16_t)((reinterpret_cast<ActivationParameter *>(param))->alpha_); | alpha_ = (float16_t)((reinterpret_cast<ActivationParameter *>(param))->alpha_); | ||||
| min_val_ = (reinterpret_cast<ActivationParameter *>(param))->min_val_; | |||||
| max_val_ = (reinterpret_cast<ActivationParameter *>(param))->max_val_; | |||||
| } | } | ||||
| ~ActivationFp16CPUKernel() override = default; | ~ActivationFp16CPUKernel() override = default; | ||||
| @@ -42,6 +45,8 @@ class ActivationFp16CPUKernel : public LiteKernel { | |||||
| int thread_count_; | int thread_count_; | ||||
| int type_; | int type_; | ||||
| float16_t alpha_; | float16_t alpha_; | ||||
| float min_val_; | |||||
| float max_val_; | |||||
| float16_t *fp16_input_ = nullptr; | float16_t *fp16_input_ = nullptr; | ||||
| float16_t *fp16_output_ = nullptr; | float16_t *fp16_output_ = nullptr; | ||||
| }; | }; | ||||
| @@ -44,12 +44,17 @@ int ReduceFp16CPUKernel::Init() { | |||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| return ret; | return ret; | ||||
| } | } | ||||
| if (mode_ != static_cast<int>(ReduceMode_ReduceMean)) { | |||||
| MS_LOG(ERROR) << "Reduce fp16 only support ReduceMode_ReduceMean"; | |||||
| return RET_ERROR; | |||||
| switch (mode_) { | |||||
| case static_cast<int>(ReduceMode_ReduceMean): | |||||
| reducer_ = ReduceMeanFp16; | |||||
| break; | |||||
| case static_cast<int>(ReduceMode_ReduceMax): | |||||
| reducer_ = ReduceMaxFp16; | |||||
| break; | |||||
| default: | |||||
| MS_LOG(ERROR) << "Reduce unsupported reduce mode: " << mode_; | |||||
| return RET_ERROR; | |||||
| } | } | ||||
| reducer_ = ReduceMeanFp16; | |||||
| if (!InferShapeDone()) { | if (!InferShapeDone()) { | ||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||