| @@ -1,5 +1,5 @@ | |||
| /** | |||
| * Copyright 2020-2021 Huawei Technologies Co., Ltd | |||
| * Copyright 2020-2022 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| @@ -530,6 +530,13 @@ typedef struct QuantMulArg { | |||
| int right_shift_; | |||
| } QuantMulArg; | |||
| typedef struct DtCostContext { | |||
| int64_t total_num_; | |||
| float bytes_loaded_; | |||
| float bytes_stored_; | |||
| float compute_cost_; | |||
| } DtCostContext; | |||
| typedef enum ActType { ActType_No, ActType_Relu, ActType_Sigmod, ActType_Relu6, ActType_Prelu } ActType; | |||
| typedef enum PadMode { Pad_pad, Pad_same, Pad_valid } PadMode; | |||
| typedef enum RoundingMode { Rounding_No, Rounding_Away_from_zero, Rounding_Up } RoundingMode; | |||
| @@ -1,5 +1,5 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * Copyright 2021-2022 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| @@ -215,6 +215,32 @@ int GetMaxFrequency(int core_id) { | |||
| return max_freq; | |||
| } | |||
| float CoreAffinity::GetServerFrequency() { | |||
| float max_freq = -1.0f; | |||
| #ifdef SERVER_INFERENCE | |||
| // The CPU cores in the server of the numa architecture are the same. | |||
| // The main frequency of the first core is obtained. | |||
| FILE *fp = popen("cat /proc/cpuinfo|grep cpu\\ MHz | sed -e 's/.*:[^0-9]//'", "r"); | |||
| if (fp == nullptr) { | |||
| THREAD_ERROR("get system cpuinfo frequency failed"); | |||
| return max_freq; | |||
| } | |||
| while (feof(fp) == 0) { | |||
| float freq = 0; | |||
| int tmp = fscanf(fp, "%f", &freq); | |||
| if (tmp != 1) { | |||
| break; | |||
| } | |||
| if (max_freq < freq) { | |||
| max_freq = freq; | |||
| } | |||
| } | |||
| (void)fclose(fp); | |||
| #endif | |||
| return max_freq; // MHz | |||
| } | |||
| #ifdef _WIN32 | |||
| void SetWindowsAffinity(HANDLE thread, DWORD_PTR mask) { | |||
| THREAD_INFO("Bind thread[%ld] to core[%lld].", GetThreadId(thread), mask); | |||
| @@ -1,5 +1,5 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * Copyright 2021-2022 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| @@ -58,6 +58,7 @@ class CoreAffinity { | |||
| int BindProcess(BindMode bind_mode); | |||
| std::vector<int> GetCoreId(size_t thread_num, BindMode bind_mode); | |||
| void SetCoreId(const std::vector<int> &core_list); | |||
| static float GetServerFrequency(); | |||
| private: | |||
| #ifdef _WIN32 | |||
| @@ -324,6 +324,11 @@ int ThreadPool::InitAffinityInfo() { | |||
| return THREAD_ERROR; | |||
| } | |||
| #endif | |||
| #ifdef SERVER_INFERENCE | |||
| server_cpu_frequence = CoreAffinity::GetServerFrequency() / 1000.0f; // 1GHz = 1000MHz | |||
| #endif | |||
| return THREAD_OK; | |||
| } | |||
| @@ -1,5 +1,5 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * Copyright 2021-2022 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| @@ -149,6 +149,7 @@ class MS_CORE_API ThreadPool { | |||
| void ActiveWorkers() const; | |||
| void SetWorkerIdMap(); | |||
| const std::unordered_map<std::thread::id, size_t> &GetWorkerIdMap() const { return worker_ids_; } | |||
| float GetServerCpuFrequence() const { return server_cpu_frequence; } | |||
| protected: | |||
| ThreadPool() = default; | |||
| @@ -174,6 +175,7 @@ class MS_CORE_API ThreadPool { | |||
| bool occupied_actor_thread_{true}; | |||
| int max_spin_count_{kDefaultSpinCount}; | |||
| int min_spin_count_{kMinSpinCount}; | |||
| float server_cpu_frequence = -1.0f; // Unit : GHz | |||
| }; | |||
| } // namespace mindspore | |||
| #endif // MINDSPORE_CORE_MINDRT_RUNTIME_THREADPOOL_H_ | |||
| @@ -1,5 +1,5 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * Copyright 2020-2022 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| @@ -399,6 +399,67 @@ void InnerContext::ReplaceLinkInfoSenderWithNewOne(void *new_sender, void *old_s | |||
| } | |||
| } | |||
| #ifdef SERVER_INFERENCE | |||
| float DtCostModel::load_cost_ = 1.0 / 64 * 11; // 64: L2 cache size, 11 : L2 cache latency on Haswell | |||
| float DtCostModel::store_cost_ = 1.0 / 64 * 11; // 64: L2 cache size, 11 : L2 cache latency on Haswell | |||
| float DtCostModel::compute_cycles_ = 1.0f; | |||
| int DtCostModel::startup_cycles_ = 100000; | |||
| int DtCostModel::per_thread_cycles_ = 100000; | |||
| int DtCostModel::task_size_ = 40000; | |||
| int DtCostModel::get_optimal_thread_num(const DtCostContext *dt_cost_context, const int thread_num) { | |||
| const int64_t max_oversharding_factor = 4; | |||
| int64_t block_size = | |||
| MSVALID(max_oversharding_factor * thread_num, thread_block_size(dt_cost_context), dt_cost_context->total_num_); | |||
| int64_t block_count = UP_DIV(dt_cost_context->total_num_, block_size); | |||
| int64_t max_block_size = MSMIN(dt_cost_context->total_num_, 2 * block_size); | |||
| double max_efficiency = static_cast<double>(block_count) / (UP_DIV(block_count, thread_num) * thread_num); | |||
| for (int64_t prev_block_count = block_count; max_efficiency < 1.0 && prev_block_count > 1;) { | |||
| int64_t cur_block_size = UP_DIV(dt_cost_context->total_num_, prev_block_count - 1); | |||
| if (cur_block_size > max_block_size) { | |||
| break; | |||
| } | |||
| const int64_t cur_block_count = UP_DIV(dt_cost_context->total_num_, cur_block_size); | |||
| MS_ASSERT(cur_block_count < prev_block_count); | |||
| prev_block_count = cur_block_count; | |||
| const double cur_efficiency = | |||
| static_cast<double>(cur_block_count) / (UP_DIV(cur_block_count, thread_num) * thread_num); | |||
| if (cur_efficiency + 0.01 >= max_efficiency) { // update threshold : 0.01 | |||
| block_size = cur_block_size; | |||
| block_count = cur_block_count; | |||
| if (max_efficiency < cur_efficiency) { | |||
| max_efficiency = cur_efficiency; | |||
| } | |||
| } | |||
| } | |||
| return block_count; | |||
| } | |||
| int UpdateThreadNum(const Context *context, const DtCostContext *dt_cost_context, int task_num) { | |||
| if (task_num <= 1) { | |||
| return task_num; | |||
| } | |||
| ThreadPool *pool = static_cast<const lite::InnerContext *>(context)->thread_pool(); | |||
| if (pool == nullptr) { | |||
| MS_LOG(ERROR) << "thread pool is nullptr"; | |||
| return RET_NULL_PTR; | |||
| } | |||
| if (dt_cost_context != nullptr) { | |||
| if (DtCostModel::thread_num(dt_cost_context) == 1) { | |||
| return 1; | |||
| } | |||
| int opt_thread = static_cast<int>(DtCostModel::parallel_degree(dt_cost_context)); | |||
| task_num = MSVALID(1, opt_thread, task_num); | |||
| } | |||
| return task_num; | |||
| } | |||
| #endif | |||
| int ParallelLaunch(const Context *context, const Func &func, Content content, int task_num) { | |||
| ThreadPool *pool = static_cast<const lite::InnerContext *>(context)->thread_pool(); | |||
| if (pool == nullptr) { | |||
| @@ -1,5 +1,5 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * Copyright 2020-2022 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| @@ -120,6 +120,44 @@ struct InnerContext : public Context { | |||
| std::unordered_map<void *, std::set<void *>> link_info_{}; | |||
| }; | |||
| #ifdef SERVER_INFERENCE | |||
| struct DtCostModel { | |||
| static float unit_cost(const DtCostContext *dt_cost_context) { | |||
| return load_cost_ * dt_cost_context->bytes_loaded_ + store_cost_ * dt_cost_context->bytes_stored_ + | |||
| dt_cost_context->compute_cost_ * compute_cycles_; | |||
| } | |||
| static float total_cost(const DtCostContext *dt_cost_context) { | |||
| return dt_cost_context->total_num_ * unit_cost(dt_cost_context); | |||
| } | |||
| // thread_num assesses parallel thread num. Value of 1.0 means ideal parallel task size. Values < 1.0 mean that task | |||
| // granularity needs to be increased to mitigate parallelization overheads. | |||
| static float parallel_degree(const DtCostContext *dt_cost_context) { | |||
| return total_cost(dt_cost_context) / task_size_; | |||
| } | |||
| static int thread_num(const DtCostContext *dt_cost_context) { | |||
| return MSMAX(1, static_cast<int>((total_cost(dt_cost_context) - startup_cycles_) / per_thread_cycles_ + 0.9)); | |||
| } | |||
| static int64_t thread_block_size(const DtCostContext *dt_cost_context) { | |||
| return static_cast<int64_t>(task_size_ / unit_cost(dt_cost_context)); | |||
| } | |||
| static int get_optimal_thread_num(const DtCostContext *dt_cost_context, const int thread_num); | |||
| static float load_cost_; // 64: L2 cache size, 11 : L2 cache latency on Haswell | |||
| static float store_cost_; // 64: L2 cache size, 11 : L2 cache latency on Haswell | |||
| static float compute_cycles_; | |||
| static int startup_cycles_; | |||
| static int per_thread_cycles_; | |||
| static int task_size_; | |||
| }; | |||
| int UpdateThreadNum(const Context *context, const DtCostContext *dt_cost_context, int task_num); | |||
| #endif | |||
| int ParallelLaunch(const Context *context, const Func &func, Content content, int task_num); | |||
| } // namespace mindspore::lite | |||
| @@ -1,5 +1,5 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * Copyright 2021-2022 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| @@ -190,6 +190,7 @@ class InnerKernel : public Kernel { | |||
| size_t workspace_size_ = 0; | |||
| void *workspace_ = nullptr; | |||
| const lite::Context *ms_context_ = nullptr; | |||
| std::unique_ptr<DtCostContext> dt_cost_context_ = nullptr; | |||
| }; | |||
| } // namespace mindspore::kernel | |||
| @@ -1,5 +1,5 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * Copyright 2020-2022 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| @@ -33,6 +33,30 @@ using mindspore::schema::ActivationType_SWISH; | |||
| using mindspore::schema::PrimitiveType_Activation; | |||
| namespace mindspore::kernel { | |||
| namespace { | |||
| const std::map<int, float> dt_activation_cost_map_ = { | |||
| {schema::ActivationType_RELU, 1.806f}, | |||
| {schema::ActivationType_RELU6, 1.806f}, | |||
| {schema::ActivationType_LEAKY_RELU, 1.806f}, | |||
| // {schema::ActivationType_SIGMOID, 10.0f}, {schema::ActivationType_TANH, 10.0f}, | |||
| // {schema::ActivationType_SWISH, 1.0f}, {schema::ActivationType_HSWISH, 1.0f}, | |||
| // {schema::ActivationType_HSIGMOID, 1.0f}, {schema::ActivationType_HARD_TANH, 1.0f}, | |||
| // {schema::ActivationType_GELU, 1.0f}, {schema::ActivationType_SOFTPLUS, 1.0f}, {schema::ActivationType_ELU, 1.0f}, | |||
| }; | |||
| } // namespace | |||
| #ifdef SERVER_INFERENCE | |||
| int ActivationCPUKernel::SetDtCostContext() { | |||
| if (dt_activation_cost_map_.count(type_) > 0) { | |||
| dt_cost_context_ = std::make_unique<DtCostContext>(); | |||
| dt_cost_context_->bytes_loaded_ = 1; | |||
| dt_cost_context_->bytes_stored_ = 1; | |||
| dt_cost_context_->compute_cost_ = dt_activation_cost_map_.at(type_); | |||
| } | |||
| return RET_OK; | |||
| } | |||
| #endif | |||
| int ActivationCPUKernel::Prepare() { | |||
| CHECK_LESS_RETURN(in_tensors_.size(), 1); | |||
| CHECK_LESS_RETURN(out_tensors_.size(), 1); | |||
| @@ -55,6 +79,11 @@ int ActivationCPUKernel::Prepare() { | |||
| return RET_ERROR; | |||
| } | |||
| } | |||
| #ifdef SERVER_INFERENCE | |||
| if (SetDtCostContext() != RET_OK) { | |||
| return RET_ERROR; | |||
| } | |||
| #endif | |||
| return RET_OK; | |||
| } | |||
| @@ -163,6 +192,12 @@ int ActivationRun(void *cdata, int task_id, float lhs_scale, float rhs_scale) { | |||
| } | |||
| int ActivationCPUKernel::Run() { | |||
| #ifdef SERVER_INFERENCE | |||
| if (dt_cost_context_ != nullptr) { | |||
| dt_cost_context_->total_num_ = in_tensors_.at(0)->ElementsNum(); | |||
| thread_count_ = UpdateThreadNum(this->ms_context_, dt_cost_context_.get(), thread_count_); | |||
| } | |||
| #endif | |||
| int error_code = ParallelLaunch(this->ms_context_, ActivationRun, this, thread_count_); | |||
| if (error_code != RET_OK) { | |||
| MS_LOG(ERROR) << "Activation function error error_code[" << error_code << "]"; | |||
| @@ -1,5 +1,5 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * Copyright 2020-2022 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| @@ -18,6 +18,8 @@ | |||
| #define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_ACTIVATION_H_ | |||
| #include <vector> | |||
| #include <map> | |||
| #include <memory> | |||
| #include "src/inner_kernel.h" | |||
| #include "nnacl/fp32/activation_fp32.h" | |||
| @@ -34,6 +36,7 @@ class ActivationCPUKernel : public InnerKernel { | |||
| } | |||
| ~ActivationCPUKernel() override = default; | |||
| int SetDtCostContext(); | |||
| int Prepare() override; | |||
| int ReSize() override; | |||
| int Run() override; | |||
| @@ -1,5 +1,5 @@ | |||
| /** | |||
| * Copyright 2020-2021 Huawei Technologies Co., Ltd | |||
| * Copyright 2020-2022 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| @@ -24,10 +24,61 @@ using mindspore::lite::RET_OK; | |||
| using mindspore::schema::PrimitiveType_Eltwise; | |||
| namespace mindspore::kernel { | |||
| namespace { | |||
| const std::map<std::pair<int, int>, float> dt_arithmetic_cost_map_ = { | |||
| // {{PrimitiveType_MulFusion, schema::ActivationType_RELU}, 1.0f}, | |||
| // {{PrimitiveType_MulFusion, schema::ActivationType_RELU6}, 1.0f}, | |||
| // {{PrimitiveType_MulFusion, schema::ActivationType_NO_ACTIVATION}, 1.0f}, | |||
| {{PrimitiveType_AddFusion, schema::ActivationType_RELU}, 1.806f}, | |||
| {{PrimitiveType_AddFusion, schema::ActivationType_RELU6}, 1.806f}, | |||
| {{PrimitiveType_AddFusion, schema::ActivationType_NO_ACTIVATION}, 1.275f}, | |||
| {{PrimitiveType_SubFusion, schema::ActivationType_RELU}, 1.806f}, | |||
| {{PrimitiveType_SubFusion, schema::ActivationType_RELU6}, 1.806f}, | |||
| {{PrimitiveType_SubFusion, schema::ActivationType_NO_ACTIVATION}, 1.275f}, | |||
| // {{PrimitiveType_DivFusion, schema::ActivationType_RELU}, 1.0f}, | |||
| // {{PrimitiveType_DivFusion, schema::ActivationType_RELU6}, 1.0f}, | |||
| // {{PrimitiveType_DivFusion, schema::ActivationType_NO_ACTIVATION}, 1.0f}, | |||
| // {{PrimitiveType_RealDiv, schema::ActivationType_RELU}, 1.0f}, | |||
| // {{PrimitiveType_RealDiv, schema::ActivationType_RELU6}, 1.0f}, | |||
| // {{PrimitiveType_RealDiv, schema::ActivationType_NO_ACTIVATION}, 1.0f}, | |||
| // {{PrimitiveType_LogicalAnd, schema::ActivationType_NO_ACTIVATION}, 1.0f}, | |||
| // {{PrimitiveType_LogicalOr, schema::ActivationType_NO_ACTIVATION}, 1.0f}, | |||
| // {{PrimitiveType_Maximum, schema::ActivationType_NO_ACTIVATION}, 1.0f}, | |||
| // {{PrimitiveType_Minimum, schema::ActivationType_NO_ACTIVATION}, 1.0f}, | |||
| // {{PrimitiveType_FloorMod, schema::ActivationType_NO_ACTIVATION}, 1.0f}, | |||
| // {{PrimitiveType_FloorDiv, schema::ActivationType_NO_ACTIVATION}, 1.0f}, | |||
| // {{PrimitiveType_Mod, schema::ActivationType_NO_ACTIVATION}, 1.0f}, | |||
| // {{PrimitiveType_SquaredDifference, schema::ActivationType_NO_ACTIVATION}, 1.0f}, | |||
| }; | |||
| } // namespace | |||
| #ifdef SERVER_INFERENCE | |||
| int ArithmeticCPUKernel::SetDtCostContext() { | |||
| std::pair<int, int> fusion_type = std::make_pair(param_->op_parameter_.type_, param_->activation_type_); | |||
| if (dt_arithmetic_cost_map_.count(fusion_type) > 0) { | |||
| dt_cost_context_ = std::make_unique<DtCostContext>(); | |||
| dt_cost_context_->bytes_loaded_ = 1; | |||
| dt_cost_context_->bytes_stored_ = 1; | |||
| dt_cost_context_->compute_cost_ = dt_arithmetic_cost_map_.at(fusion_type); | |||
| } | |||
| return RET_OK; | |||
| } | |||
| #endif | |||
| int ArithmeticCPUKernel::Prepare() { | |||
| CHECK_LESS_RETURN(in_tensors_.size(), C2NUM); | |||
| CHECK_LESS_RETURN(out_tensors_.size(), 1); | |||
| auto primitive_type = param_->op_parameter_.type_; | |||
| #ifdef SERVER_INFERENCE | |||
| if (SetDtCostContext() != RET_OK) { | |||
| return RET_ERROR; | |||
| } | |||
| #endif | |||
| if (primitive_type == schema::PrimitiveType_Eltwise) { | |||
| switch (param_->eltwise_mode_) { | |||
| case schema::EltwiseMode_PROD: | |||
| @@ -437,9 +488,17 @@ int ArithmeticCPUKernel::Run() { | |||
| } | |||
| output_ptr_ = out_tensors_[0]->data(); | |||
| CHECK_NULL_RETURN(output_ptr_); | |||
| batch_a_ptr_ = static_cast<uint8_t *>(input0_ptr_); | |||
| batch_b_ptr_ = static_cast<uint8_t *>(input1_ptr_); | |||
| batch_c_ptr_ = static_cast<uint8_t *>(output_ptr_); | |||
| #ifdef SERVER_INFERENCE | |||
| if (dt_cost_context_ != nullptr) { | |||
| dt_cost_context_->total_num_ = in_tensors_.at(0)->ElementsNum(); | |||
| op_parameter_->thread_num_ = UpdateThreadNum(this->ms_context_, dt_cost_context_.get(), op_parameter_->thread_num_); | |||
| } | |||
| #endif | |||
| auto ret = ParallelLaunch(this->ms_context_, ArithmeticsRun, this, op_parameter_->thread_num_); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "arithmetic failed"; | |||
| @@ -1,5 +1,5 @@ | |||
| /** | |||
| * Copyright 2020-2021 Huawei Technologies Co., Ltd | |||
| * Copyright 2020-2022 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| @@ -17,6 +17,9 @@ | |||
| #define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_ARITHMETIC_FP32_H_ | |||
| #include <vector> | |||
| #include <map> | |||
| #include <memory> | |||
| #include <utility> | |||
| #include "src/inner_kernel.h" | |||
| #include "nnacl/fp32/arithmetic_fp32.h" | |||
| @@ -114,6 +117,7 @@ class ArithmeticCPUKernel : public InnerKernel { | |||
| int BiasCalc(int task_id); | |||
| void FreeConstTileBuff(); | |||
| bool IsBiasCalc() const; | |||
| int SetDtCostContext(); | |||
| ArithmeticRun arithmetic_run_ = nullptr; | |||
| ArithmeticOptRun arithmetic_opt_run_ = nullptr; | |||
| ArithmeticIntRun arithmetic_run_int_ = nullptr; | |||
| @@ -1,5 +1,5 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * Copyright 2020-2022 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| @@ -27,8 +27,37 @@ struct TYPE_FUNC_INFO { | |||
| int primitive_type_ = 0; | |||
| ArithmeticSelfFunc func_ = nullptr; | |||
| }; | |||
| const std::map<int, float> dt_arithmetic_self_cost_map_ = { | |||
| // {schema::PrimitiveType_Abs, 0.5f}, | |||
| // {schema::PrimitiveType_Cos, 1.0f}, | |||
| // {schema::PrimitiveType_Log, 1.0f}, | |||
| // {schema::PrimitiveType_Square, 10.0f}, | |||
| {schema::PrimitiveType_Sqrt, 1.806f}, | |||
| // {schema::PrimitiveType_Rsqrt, 1.0f}, | |||
| // {schema::PrimitiveType_Sin, 1.0f}, | |||
| // {schema::PrimitiveType_LogicalNot, 1.0f}, | |||
| // {schema::PrimitiveType_Floor, 1.0f}, | |||
| // {schema::PrimitiveType_Ceil, 1.0f}, | |||
| // {schema::PrimitiveType_Round, 1.0f}, | |||
| // {schema::PrimitiveType_Neg, 1.0f}, | |||
| // {schema::PrimitiveType_Reciprocal, 1.0f}, | |||
| // {schema::PrimitiveType_Erf, 1.0f}, | |||
| }; | |||
| } // namespace | |||
| #ifdef SERVER_INFERENCE | |||
| int ArithmeticSelfCPUKernel::SetDtCostContext() { | |||
| if (dt_arithmetic_self_cost_map_.count(type_) > 0) { | |||
| dt_cost_context_ = std::make_unique<DtCostContext>(); | |||
| dt_cost_context_->bytes_loaded_ = 1; | |||
| dt_cost_context_->bytes_stored_ = 1; | |||
| dt_cost_context_->compute_cost_ = dt_arithmetic_self_cost_map_.at(type_); | |||
| } | |||
| return RET_OK; | |||
| } | |||
| #endif | |||
| ArithmeticSelfFunc ArithmeticSelfCPUKernel::GetArithmeticSelfFun(int primitive_type) const { | |||
| TYPE_FUNC_INFO type_func_table[] = {{mindspore::schema::PrimitiveType_Abs, ElementAbs}, | |||
| {mindspore::schema::PrimitiveType_Cos, ElementCos}, | |||
| @@ -62,6 +91,11 @@ ArithmeticSelfBoolFunc ArithmeticSelfCPUKernel::GetArithmeticSelfBoolFun(int pri | |||
| int ArithmeticSelfCPUKernel::Prepare() { | |||
| CHECK_NOT_EQUAL_RETURN(in_tensors_.size(), 1); | |||
| CHECK_NOT_EQUAL_RETURN(out_tensors_.size(), 1); | |||
| #ifdef SERVER_INFERENCE | |||
| if (SetDtCostContext() != RET_OK) { | |||
| return RET_ERROR; | |||
| } | |||
| #endif | |||
| if (!InferShapeDone()) { | |||
| return RET_OK; | |||
| } | |||
| @@ -117,6 +151,12 @@ int ArithmeticSelfRun(void *cdata, int task_id, float lhs_scale, float rhs_scale | |||
| } | |||
| int ArithmeticSelfCPUKernel::Run() { | |||
| #ifdef SERVER_INFERENCE | |||
| if (dt_cost_context_ != nullptr) { | |||
| dt_cost_context_->total_num_ = in_tensors_.at(0)->ElementsNum(); | |||
| op_parameter_->thread_num_ = UpdateThreadNum(this->ms_context_, dt_cost_context_.get(), op_parameter_->thread_num_); | |||
| } | |||
| #endif | |||
| auto ret = ParallelLaunch(this->ms_context_, ArithmeticSelfRun, this, op_parameter_->thread_num_); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "ArithmeticSelfRun error error_code[" << ret << "]"; | |||
| @@ -1,5 +1,5 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * Copyright 2020-2022 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| @@ -17,6 +17,8 @@ | |||
| #define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_ARITHMETIC_SELF_H_ | |||
| #include <vector> | |||
| #include <map> | |||
| #include <memory> | |||
| #include "src/inner_kernel.h" | |||
| using mindspore::schema::PrimitiveType_Abs; | |||
| @@ -47,6 +49,7 @@ class ArithmeticSelfCPUKernel : public InnerKernel { | |||
| } | |||
| ~ArithmeticSelfCPUKernel() override = default; | |||
| int SetDtCostContext(); | |||
| int Prepare() override; | |||
| int ReSize() override; | |||
| int Run() override; | |||
| @@ -50,8 +50,8 @@ class DynamicQuantCPUKernel : public InnerKernel { | |||
| float real_min_array_[8]; | |||
| float real_max_array_[8]; | |||
| float real_min_; | |||
| float real_max_; | |||
| float real_min_ = FLT_MAX; | |||
| float real_max_ = FLT_MIN; | |||
| int32_t src_dtype_{0}; | |||
| int32_t dst_dtype_{0}; | |||
| bool symmetric_ = false; | |||