modify some cpu kernel thread num

5 years ago · 853b3a3f30
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/adam_delta_cpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/adam_delta_cpu_kernel.cc
@@ -24,7 +24,11 @@
 namespace mindspore {
 namespace kernel {
 constexpr size_t kAdamDeltaInputSize = 9;
 #ifdef ENABLE_D
 constexpr size_t kUsedThreadNum = 23;
 #else
 constexpr size_t kUsedThreadNum = 8;
 #endif
 namespace {
 struct ComputeParam {
  float *delta_{nullptr};
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/embedding_look_up_cpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/embedding_look_up_cpu_kernel.cc
@@ -22,6 +22,11 @@
 namespace mindspore {
 namespace kernel {
 namespace {
 #ifdef ENABLE_D
 constexpr size_t kUsedThreadNum = 23;
 #else
 constexpr size_t kUsedThreadNum = 8;
 #endif
 template <typename T>
 void LookUpTableTask(const float *input_addr, const T *indices_addr, float *output_addr, size_t indices_lens,
                     size_t outer_dim_size, T offset, size_t first_dim_size) {
@@ -92,10 +97,9 @@ void EmbeddingLookUpCPUKernel::LaunchKernel(const std::vector<kernel::AddressPtr
  auto input_addr = reinterpret_cast<float *>(inputs[0]->addr);
  auto indices_addr = reinterpret_cast<T *>(inputs[1]->addr);
  auto output_addr = reinterpret_cast<float *>(outputs[0]->addr);
  const size_t kMaxThreadNum = 16;
  size_t thread_num = indices_lens_ / 10000 + 1;
  thread_num = thread_num > kMaxThreadNum ? kMaxThreadNum : thread_num;
  std::thread threads[kMaxThreadNum];
  thread_num = thread_num > kUsedThreadNum ? kUsedThreadNum : thread_num;
  std::thread threads[kUsedThreadNum];
  size_t task_proc_lens = (indices_lens_ + thread_num - 1) / thread_num;
  size_t i;
  size_t task_offset = 0;
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/scatter_nd_update_cpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/scatter_nd_update_cpu_kernel.cc
@@ -22,6 +22,11 @@
 namespace mindspore {
 namespace kernel {
 namespace {
 #ifdef ENABLE_D
 constexpr size_t kUsedThreadNum = 23;
 #else
 constexpr size_t kUsedThreadNum = 8;
 #endif
 template <typename T>
 void Compute(const ComputeParams<T> *params, const size_t start, const size_t end) {
  MS_EXCEPTION_IF_NULL(params);
@@ -115,10 +120,9 @@ void ScatterNdUpdateCPUKernel::LaunchKernel(const std::vector<AddressPtr> &input
  params.indices_unit_rank_ = indices_unit_rank_;
  params.out_strides_ = &out_strides_;

  const size_t thread_num = 24;
  std::vector<Task> tasks;
  size_t start = 0;
  size_t once_compute_size = (num_units_ + thread_num - 1) / thread_num;
  size_t once_compute_size = (num_units_ + kUsedThreadNum - 1) / kUsedThreadNum;
  while (start < num_units_) {
    size_t end = (start + once_compute_size) > num_units_ ? num_units_ : (start + once_compute_size);
    auto task = [&params, start, end]() -> int {
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/sparse_optimizer_cpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/sparse_optimizer_cpu_kernel.h
@@ -27,6 +27,11 @@

 namespace mindspore {
 namespace kernel {
 #ifdef ENABLE_D
 constexpr size_t kUsedThreadNum = 23;
 #else
 constexpr size_t kUsedThreadNum = 8;
 #endif
 template <typename T>
 struct SparseGradient {
  float *value_{nullptr};
@@ -95,7 +100,7 @@ class SparseOptimizerCPUKernel : public CPUKernel {
  static void BucketReduceSparseGradient(const ReduceSparseGradientParam<T> &param) {
    MS_LOG(DEBUG) << "Start";
    MS_EXCEPTION_IF_NULL(param.input_grad_);
    size_t thread_num = 23;
    size_t thread_num = kUsedThreadNum;
    if (param.input_grad_->indices_size_ < thread_num) {
      thread_num = param.input_grad_->indices_size_;
    }
@@ -120,11 +125,10 @@ class SparseOptimizerCPUKernel : public CPUKernel {
  template <typename T>
  void MultiThreadCompute(const MultiThreadComputeFunc<T> &func, MultiThreadComputeParams<T> *params,
                          size_t total_compute_size) const {
    const size_t kThreadNum = 24;
    std::vector<std::thread> threads;
    threads.reserve(kThreadNum);
    threads.reserve(kUsedThreadNum);
    size_t start = 0;
    size_t once_compute_size = (total_compute_size + kThreadNum - 1) / kThreadNum;
    size_t once_compute_size = (total_compute_size + kUsedThreadNum - 1) / kUsedThreadNum;
    while (start < total_compute_size) {
      size_t end = (start + once_compute_size) > total_compute_size ? total_compute_size : (start + once_compute_size);
      threads.emplace_back(std::thread(func, params, start, end));
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/unique_cpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/unique_cpu_kernel.cc
@@ -20,7 +20,11 @@
 namespace mindspore {
 namespace kernel {
 const size_t kUseBucketUniqueSize = 100000;
 const size_t kUniqueThreadNum = 23;
 #ifdef ENABLE_D
 constexpr size_t kUniqueThreadNum = 23;
 #else
 constexpr size_t kUniqueThreadNum = 8;
 #endif
 void UniqueCPUKernel::InitKernel(const CNodePtr &kernel_node) {
  node_ = kernel_node;
  CheckParam(kernel_node);