Browse Source

dynamic thread cut, version 2

feature/build-system-rewrite
greatpanc 4 years ago
parent
commit
c90ead5d0f
15 changed files with 300 additions and 15 deletions
  1. +8
    -1
      mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/op_base.h
  2. +27
    -1
      mindspore/core/mindrt/src/thread/core_affinity.cc
  3. +2
    -1
      mindspore/core/mindrt/src/thread/core_affinity.h
  4. +5
    -0
      mindspore/core/mindrt/src/thread/threadpool.cc
  5. +3
    -1
      mindspore/core/mindrt/src/thread/threadpool.h
  6. +62
    -1
      mindspore/lite/src/inner_context.cc
  7. +39
    -1
      mindspore/lite/src/inner_context.h
  8. +2
    -1
      mindspore/lite/src/inner_kernel.h
  9. +36
    -1
      mindspore/lite/src/runtime/kernel/arm/fp32/activation_fp32.cc
  10. +4
    -1
      mindspore/lite/src/runtime/kernel/arm/fp32/activation_fp32.h
  11. +60
    -1
      mindspore/lite/src/runtime/kernel/arm/fp32/arithmetic_fp32.cc
  12. +5
    -1
      mindspore/lite/src/runtime/kernel/arm/fp32/arithmetic_fp32.h
  13. +41
    -1
      mindspore/lite/src/runtime/kernel/arm/fp32/arithmetic_self_fp32.cc
  14. +4
    -1
      mindspore/lite/src/runtime/kernel/arm/fp32/arithmetic_self_fp32.h
  15. +2
    -2
      mindspore/lite/src/runtime/kernel/arm/int8/dynamic_quant.h

+ 8
- 1
mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/op_base.h View File

@@ -1,5 +1,5 @@
/**
* Copyright 2020-2021 Huawei Technologies Co., Ltd
* Copyright 2020-2022 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -530,6 +530,13 @@ typedef struct QuantMulArg {
int right_shift_;
} QuantMulArg;

typedef struct DtCostContext {
int64_t total_num_;
float bytes_loaded_;
float bytes_stored_;
float compute_cost_;
} DtCostContext;

typedef enum ActType { ActType_No, ActType_Relu, ActType_Sigmod, ActType_Relu6, ActType_Prelu } ActType;
typedef enum PadMode { Pad_pad, Pad_same, Pad_valid } PadMode;
typedef enum RoundingMode { Rounding_No, Rounding_Away_from_zero, Rounding_Up } RoundingMode;


+ 27
- 1
mindspore/core/mindrt/src/thread/core_affinity.cc View File

@@ -1,5 +1,5 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
* Copyright 2021-2022 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -215,6 +215,32 @@ int GetMaxFrequency(int core_id) {
return max_freq;
}

float CoreAffinity::GetServerFrequency() {
float max_freq = -1.0f;
#ifdef SERVER_INFERENCE
// The CPU cores in the server of the numa architecture are the same.
// The main frequency of the first core is obtained.
FILE *fp = popen("cat /proc/cpuinfo|grep cpu\\ MHz | sed -e 's/.*:[^0-9]//'", "r");
if (fp == nullptr) {
THREAD_ERROR("get system cpuinfo frequency failed");
return max_freq;
}

while (feof(fp) == 0) {
float freq = 0;
int tmp = fscanf(fp, "%f", &freq);
if (tmp != 1) {
break;
}
if (max_freq < freq) {
max_freq = freq;
}
}
(void)fclose(fp);
#endif
return max_freq; // MHz
}

#ifdef _WIN32
void SetWindowsAffinity(HANDLE thread, DWORD_PTR mask) {
THREAD_INFO("Bind thread[%ld] to core[%lld].", GetThreadId(thread), mask);


+ 2
- 1
mindspore/core/mindrt/src/thread/core_affinity.h View File

@@ -1,5 +1,5 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
* Copyright 2021-2022 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -58,6 +58,7 @@ class CoreAffinity {
int BindProcess(BindMode bind_mode);
std::vector<int> GetCoreId(size_t thread_num, BindMode bind_mode);
void SetCoreId(const std::vector<int> &core_list);
static float GetServerFrequency();

private:
#ifdef _WIN32


+ 5
- 0
mindspore/core/mindrt/src/thread/threadpool.cc View File

@@ -324,6 +324,11 @@ int ThreadPool::InitAffinityInfo() {
return THREAD_ERROR;
}
#endif

#ifdef SERVER_INFERENCE
server_cpu_frequence = CoreAffinity::GetServerFrequency() / 1000.0f; // 1GHz = 1000MHz
#endif

return THREAD_OK;
}



+ 3
- 1
mindspore/core/mindrt/src/thread/threadpool.h View File

@@ -1,5 +1,5 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
* Copyright 2021-2022 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -149,6 +149,7 @@ class MS_CORE_API ThreadPool {
void ActiveWorkers() const;
void SetWorkerIdMap();
const std::unordered_map<std::thread::id, size_t> &GetWorkerIdMap() const { return worker_ids_; }
float GetServerCpuFrequence() const { return server_cpu_frequence; }

protected:
ThreadPool() = default;
@@ -174,6 +175,7 @@ class MS_CORE_API ThreadPool {
bool occupied_actor_thread_{true};
int max_spin_count_{kDefaultSpinCount};
int min_spin_count_{kMinSpinCount};
float server_cpu_frequence = -1.0f; // Unit : GHz
};
} // namespace mindspore
#endif // MINDSPORE_CORE_MINDRT_RUNTIME_THREADPOOL_H_

+ 62
- 1
mindspore/lite/src/inner_context.cc View File

@@ -1,5 +1,5 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
* Copyright 2020-2022 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -399,6 +399,67 @@ void InnerContext::ReplaceLinkInfoSenderWithNewOne(void *new_sender, void *old_s
}
}

#ifdef SERVER_INFERENCE
float DtCostModel::load_cost_ = 1.0 / 64 * 11; // 64: L2 cache size, 11 : L2 cache latency on Haswell
float DtCostModel::store_cost_ = 1.0 / 64 * 11; // 64: L2 cache size, 11 : L2 cache latency on Haswell
float DtCostModel::compute_cycles_ = 1.0f;

int DtCostModel::startup_cycles_ = 100000;
int DtCostModel::per_thread_cycles_ = 100000;
int DtCostModel::task_size_ = 40000;

int DtCostModel::get_optimal_thread_num(const DtCostContext *dt_cost_context, const int thread_num) {
const int64_t max_oversharding_factor = 4;

int64_t block_size =
MSVALID(max_oversharding_factor * thread_num, thread_block_size(dt_cost_context), dt_cost_context->total_num_);
int64_t block_count = UP_DIV(dt_cost_context->total_num_, block_size);

int64_t max_block_size = MSMIN(dt_cost_context->total_num_, 2 * block_size);
double max_efficiency = static_cast<double>(block_count) / (UP_DIV(block_count, thread_num) * thread_num);
for (int64_t prev_block_count = block_count; max_efficiency < 1.0 && prev_block_count > 1;) {
int64_t cur_block_size = UP_DIV(dt_cost_context->total_num_, prev_block_count - 1);
if (cur_block_size > max_block_size) {
break;
}
const int64_t cur_block_count = UP_DIV(dt_cost_context->total_num_, cur_block_size);
MS_ASSERT(cur_block_count < prev_block_count);
prev_block_count = cur_block_count;
const double cur_efficiency =
static_cast<double>(cur_block_count) / (UP_DIV(cur_block_count, thread_num) * thread_num);
if (cur_efficiency + 0.01 >= max_efficiency) { // update threshold : 0.01
block_size = cur_block_size;
block_count = cur_block_count;
if (max_efficiency < cur_efficiency) {
max_efficiency = cur_efficiency;
}
}
}

return block_count;
}

int UpdateThreadNum(const Context *context, const DtCostContext *dt_cost_context, int task_num) {
if (task_num <= 1) {
return task_num;
}
ThreadPool *pool = static_cast<const lite::InnerContext *>(context)->thread_pool();
if (pool == nullptr) {
MS_LOG(ERROR) << "thread pool is nullptr";
return RET_NULL_PTR;
}

if (dt_cost_context != nullptr) {
if (DtCostModel::thread_num(dt_cost_context) == 1) {
return 1;
}
int opt_thread = static_cast<int>(DtCostModel::parallel_degree(dt_cost_context));
task_num = MSVALID(1, opt_thread, task_num);
}
return task_num;
}
#endif

int ParallelLaunch(const Context *context, const Func &func, Content content, int task_num) {
ThreadPool *pool = static_cast<const lite::InnerContext *>(context)->thread_pool();
if (pool == nullptr) {


+ 39
- 1
mindspore/lite/src/inner_context.h View File

@@ -1,5 +1,5 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
* Copyright 2020-2022 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -120,6 +120,44 @@ struct InnerContext : public Context {
std::unordered_map<void *, std::set<void *>> link_info_{};
};

#ifdef SERVER_INFERENCE
struct DtCostModel {
static float unit_cost(const DtCostContext *dt_cost_context) {
return load_cost_ * dt_cost_context->bytes_loaded_ + store_cost_ * dt_cost_context->bytes_stored_ +
dt_cost_context->compute_cost_ * compute_cycles_;
}

static float total_cost(const DtCostContext *dt_cost_context) {
return dt_cost_context->total_num_ * unit_cost(dt_cost_context);
}

// thread_num assesses parallel thread num. Value of 1.0 means ideal parallel task size. Values < 1.0 mean that task
// granularity needs to be increased to mitigate parallelization overheads.
static float parallel_degree(const DtCostContext *dt_cost_context) {
return total_cost(dt_cost_context) / task_size_;
}

static int thread_num(const DtCostContext *dt_cost_context) {
return MSMAX(1, static_cast<int>((total_cost(dt_cost_context) - startup_cycles_) / per_thread_cycles_ + 0.9));
}

static int64_t thread_block_size(const DtCostContext *dt_cost_context) {
return static_cast<int64_t>(task_size_ / unit_cost(dt_cost_context));
}
static int get_optimal_thread_num(const DtCostContext *dt_cost_context, const int thread_num);

static float load_cost_; // 64: L2 cache size, 11 : L2 cache latency on Haswell
static float store_cost_; // 64: L2 cache size, 11 : L2 cache latency on Haswell
static float compute_cycles_;

static int startup_cycles_;
static int per_thread_cycles_;
static int task_size_;
};

int UpdateThreadNum(const Context *context, const DtCostContext *dt_cost_context, int task_num);
#endif

int ParallelLaunch(const Context *context, const Func &func, Content content, int task_num);
} // namespace mindspore::lite



+ 2
- 1
mindspore/lite/src/inner_kernel.h View File

@@ -1,5 +1,5 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
* Copyright 2021-2022 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -190,6 +190,7 @@ class InnerKernel : public Kernel {
size_t workspace_size_ = 0;
void *workspace_ = nullptr;
const lite::Context *ms_context_ = nullptr;
std::unique_ptr<DtCostContext> dt_cost_context_ = nullptr;
};
} // namespace mindspore::kernel



+ 36
- 1
mindspore/lite/src/runtime/kernel/arm/fp32/activation_fp32.cc View File

@@ -1,5 +1,5 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
* Copyright 2020-2022 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -33,6 +33,30 @@ using mindspore::schema::ActivationType_SWISH;
using mindspore::schema::PrimitiveType_Activation;

namespace mindspore::kernel {
namespace {
const std::map<int, float> dt_activation_cost_map_ = {
{schema::ActivationType_RELU, 1.806f},
{schema::ActivationType_RELU6, 1.806f},
{schema::ActivationType_LEAKY_RELU, 1.806f},
// {schema::ActivationType_SIGMOID, 10.0f}, {schema::ActivationType_TANH, 10.0f},
// {schema::ActivationType_SWISH, 1.0f}, {schema::ActivationType_HSWISH, 1.0f},
// {schema::ActivationType_HSIGMOID, 1.0f}, {schema::ActivationType_HARD_TANH, 1.0f},
// {schema::ActivationType_GELU, 1.0f}, {schema::ActivationType_SOFTPLUS, 1.0f}, {schema::ActivationType_ELU, 1.0f},
};
} // namespace

#ifdef SERVER_INFERENCE
int ActivationCPUKernel::SetDtCostContext() {
if (dt_activation_cost_map_.count(type_) > 0) {
dt_cost_context_ = std::make_unique<DtCostContext>();
dt_cost_context_->bytes_loaded_ = 1;
dt_cost_context_->bytes_stored_ = 1;
dt_cost_context_->compute_cost_ = dt_activation_cost_map_.at(type_);
}
return RET_OK;
}
#endif

int ActivationCPUKernel::Prepare() {
CHECK_LESS_RETURN(in_tensors_.size(), 1);
CHECK_LESS_RETURN(out_tensors_.size(), 1);
@@ -55,6 +79,11 @@ int ActivationCPUKernel::Prepare() {
return RET_ERROR;
}
}
#ifdef SERVER_INFERENCE
if (SetDtCostContext() != RET_OK) {
return RET_ERROR;
}
#endif
return RET_OK;
}

@@ -163,6 +192,12 @@ int ActivationRun(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
}

int ActivationCPUKernel::Run() {
#ifdef SERVER_INFERENCE
if (dt_cost_context_ != nullptr) {
dt_cost_context_->total_num_ = in_tensors_.at(0)->ElementsNum();
thread_count_ = UpdateThreadNum(this->ms_context_, dt_cost_context_.get(), thread_count_);
}
#endif
int error_code = ParallelLaunch(this->ms_context_, ActivationRun, this, thread_count_);
if (error_code != RET_OK) {
MS_LOG(ERROR) << "Activation function error error_code[" << error_code << "]";


+ 4
- 1
mindspore/lite/src/runtime/kernel/arm/fp32/activation_fp32.h View File

@@ -1,5 +1,5 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
* Copyright 2020-2022 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -18,6 +18,8 @@
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_ACTIVATION_H_

#include <vector>
#include <map>
#include <memory>
#include "src/inner_kernel.h"
#include "nnacl/fp32/activation_fp32.h"

@@ -34,6 +36,7 @@ class ActivationCPUKernel : public InnerKernel {
}
~ActivationCPUKernel() override = default;

int SetDtCostContext();
int Prepare() override;
int ReSize() override;
int Run() override;


+ 60
- 1
mindspore/lite/src/runtime/kernel/arm/fp32/arithmetic_fp32.cc View File

@@ -1,5 +1,5 @@
/**
* Copyright 2020-2021 Huawei Technologies Co., Ltd
* Copyright 2020-2022 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -24,10 +24,61 @@ using mindspore::lite::RET_OK;
using mindspore::schema::PrimitiveType_Eltwise;

namespace mindspore::kernel {
namespace {
const std::map<std::pair<int, int>, float> dt_arithmetic_cost_map_ = {
// {{PrimitiveType_MulFusion, schema::ActivationType_RELU}, 1.0f},
// {{PrimitiveType_MulFusion, schema::ActivationType_RELU6}, 1.0f},
// {{PrimitiveType_MulFusion, schema::ActivationType_NO_ACTIVATION}, 1.0f},

{{PrimitiveType_AddFusion, schema::ActivationType_RELU}, 1.806f},
{{PrimitiveType_AddFusion, schema::ActivationType_RELU6}, 1.806f},
{{PrimitiveType_AddFusion, schema::ActivationType_NO_ACTIVATION}, 1.275f},

{{PrimitiveType_SubFusion, schema::ActivationType_RELU}, 1.806f},
{{PrimitiveType_SubFusion, schema::ActivationType_RELU6}, 1.806f},
{{PrimitiveType_SubFusion, schema::ActivationType_NO_ACTIVATION}, 1.275f},

// {{PrimitiveType_DivFusion, schema::ActivationType_RELU}, 1.0f},
// {{PrimitiveType_DivFusion, schema::ActivationType_RELU6}, 1.0f},
// {{PrimitiveType_DivFusion, schema::ActivationType_NO_ACTIVATION}, 1.0f},

// {{PrimitiveType_RealDiv, schema::ActivationType_RELU}, 1.0f},
// {{PrimitiveType_RealDiv, schema::ActivationType_RELU6}, 1.0f},
// {{PrimitiveType_RealDiv, schema::ActivationType_NO_ACTIVATION}, 1.0f},

// {{PrimitiveType_LogicalAnd, schema::ActivationType_NO_ACTIVATION}, 1.0f},
// {{PrimitiveType_LogicalOr, schema::ActivationType_NO_ACTIVATION}, 1.0f},
// {{PrimitiveType_Maximum, schema::ActivationType_NO_ACTIVATION}, 1.0f},
// {{PrimitiveType_Minimum, schema::ActivationType_NO_ACTIVATION}, 1.0f},
// {{PrimitiveType_FloorMod, schema::ActivationType_NO_ACTIVATION}, 1.0f},
// {{PrimitiveType_FloorDiv, schema::ActivationType_NO_ACTIVATION}, 1.0f},
// {{PrimitiveType_Mod, schema::ActivationType_NO_ACTIVATION}, 1.0f},
// {{PrimitiveType_SquaredDifference, schema::ActivationType_NO_ACTIVATION}, 1.0f},
};
} // namespace

#ifdef SERVER_INFERENCE
int ArithmeticCPUKernel::SetDtCostContext() {
std::pair<int, int> fusion_type = std::make_pair(param_->op_parameter_.type_, param_->activation_type_);
if (dt_arithmetic_cost_map_.count(fusion_type) > 0) {
dt_cost_context_ = std::make_unique<DtCostContext>();
dt_cost_context_->bytes_loaded_ = 1;
dt_cost_context_->bytes_stored_ = 1;
dt_cost_context_->compute_cost_ = dt_arithmetic_cost_map_.at(fusion_type);
}
return RET_OK;
}
#endif

int ArithmeticCPUKernel::Prepare() {
CHECK_LESS_RETURN(in_tensors_.size(), C2NUM);
CHECK_LESS_RETURN(out_tensors_.size(), 1);
auto primitive_type = param_->op_parameter_.type_;
#ifdef SERVER_INFERENCE
if (SetDtCostContext() != RET_OK) {
return RET_ERROR;
}
#endif
if (primitive_type == schema::PrimitiveType_Eltwise) {
switch (param_->eltwise_mode_) {
case schema::EltwiseMode_PROD:
@@ -437,9 +488,17 @@ int ArithmeticCPUKernel::Run() {
}
output_ptr_ = out_tensors_[0]->data();
CHECK_NULL_RETURN(output_ptr_);

batch_a_ptr_ = static_cast<uint8_t *>(input0_ptr_);
batch_b_ptr_ = static_cast<uint8_t *>(input1_ptr_);
batch_c_ptr_ = static_cast<uint8_t *>(output_ptr_);

#ifdef SERVER_INFERENCE
if (dt_cost_context_ != nullptr) {
dt_cost_context_->total_num_ = in_tensors_.at(0)->ElementsNum();
op_parameter_->thread_num_ = UpdateThreadNum(this->ms_context_, dt_cost_context_.get(), op_parameter_->thread_num_);
}
#endif
auto ret = ParallelLaunch(this->ms_context_, ArithmeticsRun, this, op_parameter_->thread_num_);
if (ret != RET_OK) {
MS_LOG(ERROR) << "arithmetic failed";


+ 5
- 1
mindspore/lite/src/runtime/kernel/arm/fp32/arithmetic_fp32.h View File

@@ -1,5 +1,5 @@
/**
* Copyright 2020-2021 Huawei Technologies Co., Ltd
* Copyright 2020-2022 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -17,6 +17,9 @@
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_ARITHMETIC_FP32_H_

#include <vector>
#include <map>
#include <memory>
#include <utility>
#include "src/inner_kernel.h"
#include "nnacl/fp32/arithmetic_fp32.h"

@@ -114,6 +117,7 @@ class ArithmeticCPUKernel : public InnerKernel {
int BiasCalc(int task_id);
void FreeConstTileBuff();
bool IsBiasCalc() const;
int SetDtCostContext();
ArithmeticRun arithmetic_run_ = nullptr;
ArithmeticOptRun arithmetic_opt_run_ = nullptr;
ArithmeticIntRun arithmetic_run_int_ = nullptr;


+ 41
- 1
mindspore/lite/src/runtime/kernel/arm/fp32/arithmetic_self_fp32.cc View File

@@ -1,5 +1,5 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
* Copyright 2020-2022 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -27,8 +27,37 @@ struct TYPE_FUNC_INFO {
int primitive_type_ = 0;
ArithmeticSelfFunc func_ = nullptr;
};

const std::map<int, float> dt_arithmetic_self_cost_map_ = {
// {schema::PrimitiveType_Abs, 0.5f},
// {schema::PrimitiveType_Cos, 1.0f},
// {schema::PrimitiveType_Log, 1.0f},
// {schema::PrimitiveType_Square, 10.0f},
{schema::PrimitiveType_Sqrt, 1.806f},
// {schema::PrimitiveType_Rsqrt, 1.0f},
// {schema::PrimitiveType_Sin, 1.0f},
// {schema::PrimitiveType_LogicalNot, 1.0f},
// {schema::PrimitiveType_Floor, 1.0f},
// {schema::PrimitiveType_Ceil, 1.0f},
// {schema::PrimitiveType_Round, 1.0f},
// {schema::PrimitiveType_Neg, 1.0f},
// {schema::PrimitiveType_Reciprocal, 1.0f},
// {schema::PrimitiveType_Erf, 1.0f},
};
} // namespace

#ifdef SERVER_INFERENCE
int ArithmeticSelfCPUKernel::SetDtCostContext() {
if (dt_arithmetic_self_cost_map_.count(type_) > 0) {
dt_cost_context_ = std::make_unique<DtCostContext>();
dt_cost_context_->bytes_loaded_ = 1;
dt_cost_context_->bytes_stored_ = 1;
dt_cost_context_->compute_cost_ = dt_arithmetic_self_cost_map_.at(type_);
}
return RET_OK;
}
#endif

ArithmeticSelfFunc ArithmeticSelfCPUKernel::GetArithmeticSelfFun(int primitive_type) const {
TYPE_FUNC_INFO type_func_table[] = {{mindspore::schema::PrimitiveType_Abs, ElementAbs},
{mindspore::schema::PrimitiveType_Cos, ElementCos},
@@ -62,6 +91,11 @@ ArithmeticSelfBoolFunc ArithmeticSelfCPUKernel::GetArithmeticSelfBoolFun(int pri
int ArithmeticSelfCPUKernel::Prepare() {
CHECK_NOT_EQUAL_RETURN(in_tensors_.size(), 1);
CHECK_NOT_EQUAL_RETURN(out_tensors_.size(), 1);
#ifdef SERVER_INFERENCE
if (SetDtCostContext() != RET_OK) {
return RET_ERROR;
}
#endif
if (!InferShapeDone()) {
return RET_OK;
}
@@ -117,6 +151,12 @@ int ArithmeticSelfRun(void *cdata, int task_id, float lhs_scale, float rhs_scale
}

int ArithmeticSelfCPUKernel::Run() {
#ifdef SERVER_INFERENCE
if (dt_cost_context_ != nullptr) {
dt_cost_context_->total_num_ = in_tensors_.at(0)->ElementsNum();
op_parameter_->thread_num_ = UpdateThreadNum(this->ms_context_, dt_cost_context_.get(), op_parameter_->thread_num_);
}
#endif
auto ret = ParallelLaunch(this->ms_context_, ArithmeticSelfRun, this, op_parameter_->thread_num_);
if (ret != RET_OK) {
MS_LOG(ERROR) << "ArithmeticSelfRun error error_code[" << ret << "]";


+ 4
- 1
mindspore/lite/src/runtime/kernel/arm/fp32/arithmetic_self_fp32.h View File

@@ -1,5 +1,5 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
* Copyright 2020-2022 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -17,6 +17,8 @@
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_ARITHMETIC_SELF_H_

#include <vector>
#include <map>
#include <memory>
#include "src/inner_kernel.h"

using mindspore::schema::PrimitiveType_Abs;
@@ -47,6 +49,7 @@ class ArithmeticSelfCPUKernel : public InnerKernel {
}
~ArithmeticSelfCPUKernel() override = default;

int SetDtCostContext();
int Prepare() override;
int ReSize() override;
int Run() override;


+ 2
- 2
mindspore/lite/src/runtime/kernel/arm/int8/dynamic_quant.h View File

@@ -50,8 +50,8 @@ class DynamicQuantCPUKernel : public InnerKernel {

float real_min_array_[8];
float real_max_array_[8];
float real_min_;
float real_max_;
float real_min_ = FLT_MAX;
float real_max_ = FLT_MIN;
int32_t src_dtype_{0};
int32_t dst_dtype_{0};
bool symmetric_ = false;


Loading…
Cancel
Save