Browse Source

modify some cpu kernel thread num

tags/v1.1.0
kswang 5 years ago
parent
commit
853b3a3f30
5 changed files with 30 additions and 10 deletions
  1. +4
    -0
      mindspore/ccsrc/backend/kernel_compiler/cpu/adam_delta_cpu_kernel.cc
  2. +7
    -3
      mindspore/ccsrc/backend/kernel_compiler/cpu/embedding_look_up_cpu_kernel.cc
  3. +6
    -2
      mindspore/ccsrc/backend/kernel_compiler/cpu/scatter_nd_update_cpu_kernel.cc
  4. +8
    -4
      mindspore/ccsrc/backend/kernel_compiler/cpu/sparse_optimizer_cpu_kernel.h
  5. +5
    -1
      mindspore/ccsrc/backend/kernel_compiler/cpu/unique_cpu_kernel.cc

+ 4
- 0
mindspore/ccsrc/backend/kernel_compiler/cpu/adam_delta_cpu_kernel.cc View File

@@ -24,7 +24,11 @@
namespace mindspore {
namespace kernel {
constexpr size_t kAdamDeltaInputSize = 9;
#ifdef ENABLE_D
constexpr size_t kUsedThreadNum = 23;
#else
constexpr size_t kUsedThreadNum = 8;
#endif
namespace {
struct ComputeParam {
float *delta_{nullptr};


+ 7
- 3
mindspore/ccsrc/backend/kernel_compiler/cpu/embedding_look_up_cpu_kernel.cc View File

@@ -22,6 +22,11 @@
namespace mindspore {
namespace kernel {
namespace {
#ifdef ENABLE_D
constexpr size_t kUsedThreadNum = 23;
#else
constexpr size_t kUsedThreadNum = 8;
#endif
template <typename T>
void LookUpTableTask(const float *input_addr, const T *indices_addr, float *output_addr, size_t indices_lens,
size_t outer_dim_size, T offset, size_t first_dim_size) {
@@ -92,10 +97,9 @@ void EmbeddingLookUpCPUKernel::LaunchKernel(const std::vector<kernel::AddressPtr
auto input_addr = reinterpret_cast<float *>(inputs[0]->addr);
auto indices_addr = reinterpret_cast<T *>(inputs[1]->addr);
auto output_addr = reinterpret_cast<float *>(outputs[0]->addr);
const size_t kMaxThreadNum = 16;
size_t thread_num = indices_lens_ / 10000 + 1;
thread_num = thread_num > kMaxThreadNum ? kMaxThreadNum : thread_num;
std::thread threads[kMaxThreadNum];
thread_num = thread_num > kUsedThreadNum ? kUsedThreadNum : thread_num;
std::thread threads[kUsedThreadNum];
size_t task_proc_lens = (indices_lens_ + thread_num - 1) / thread_num;
size_t i;
size_t task_offset = 0;


+ 6
- 2
mindspore/ccsrc/backend/kernel_compiler/cpu/scatter_nd_update_cpu_kernel.cc View File

@@ -22,6 +22,11 @@
namespace mindspore {
namespace kernel {
namespace {
#ifdef ENABLE_D
constexpr size_t kUsedThreadNum = 23;
#else
constexpr size_t kUsedThreadNum = 8;
#endif
template <typename T>
void Compute(const ComputeParams<T> *params, const size_t start, const size_t end) {
MS_EXCEPTION_IF_NULL(params);
@@ -115,10 +120,9 @@ void ScatterNdUpdateCPUKernel::LaunchKernel(const std::vector<AddressPtr> &input
params.indices_unit_rank_ = indices_unit_rank_;
params.out_strides_ = &out_strides_;

const size_t thread_num = 24;
std::vector<Task> tasks;
size_t start = 0;
size_t once_compute_size = (num_units_ + thread_num - 1) / thread_num;
size_t once_compute_size = (num_units_ + kUsedThreadNum - 1) / kUsedThreadNum;
while (start < num_units_) {
size_t end = (start + once_compute_size) > num_units_ ? num_units_ : (start + once_compute_size);
auto task = [&params, start, end]() -> int {


+ 8
- 4
mindspore/ccsrc/backend/kernel_compiler/cpu/sparse_optimizer_cpu_kernel.h View File

@@ -27,6 +27,11 @@

namespace mindspore {
namespace kernel {
#ifdef ENABLE_D
constexpr size_t kUsedThreadNum = 23;
#else
constexpr size_t kUsedThreadNum = 8;
#endif
template <typename T>
struct SparseGradient {
float *value_{nullptr};
@@ -95,7 +100,7 @@ class SparseOptimizerCPUKernel : public CPUKernel {
static void BucketReduceSparseGradient(const ReduceSparseGradientParam<T> &param) {
MS_LOG(DEBUG) << "Start";
MS_EXCEPTION_IF_NULL(param.input_grad_);
size_t thread_num = 23;
size_t thread_num = kUsedThreadNum;
if (param.input_grad_->indices_size_ < thread_num) {
thread_num = param.input_grad_->indices_size_;
}
@@ -120,11 +125,10 @@ class SparseOptimizerCPUKernel : public CPUKernel {
template <typename T>
void MultiThreadCompute(const MultiThreadComputeFunc<T> &func, MultiThreadComputeParams<T> *params,
size_t total_compute_size) const {
const size_t kThreadNum = 24;
std::vector<std::thread> threads;
threads.reserve(kThreadNum);
threads.reserve(kUsedThreadNum);
size_t start = 0;
size_t once_compute_size = (total_compute_size + kThreadNum - 1) / kThreadNum;
size_t once_compute_size = (total_compute_size + kUsedThreadNum - 1) / kUsedThreadNum;
while (start < total_compute_size) {
size_t end = (start + once_compute_size) > total_compute_size ? total_compute_size : (start + once_compute_size);
threads.emplace_back(std::thread(func, params, start, end));


+ 5
- 1
mindspore/ccsrc/backend/kernel_compiler/cpu/unique_cpu_kernel.cc View File

@@ -20,7 +20,11 @@
namespace mindspore {
namespace kernel {
const size_t kUseBucketUniqueSize = 100000;
const size_t kUniqueThreadNum = 23;
#ifdef ENABLE_D
constexpr size_t kUniqueThreadNum = 23;
#else
constexpr size_t kUniqueThreadNum = 8;
#endif
void UniqueCPUKernel::InitKernel(const CNodePtr &kernel_node) {
node_ = kernel_node;
CheckParam(kernel_node);


Loading…
Cancel
Save