|
|
|
@@ -27,6 +27,11 @@ |
|
|
|
|
|
|
|
namespace mindspore { |
|
|
|
namespace kernel { |
|
|
|
#ifdef ENABLE_D |
|
|
|
constexpr size_t kUsedThreadNum = 23; |
|
|
|
#else |
|
|
|
constexpr size_t kUsedThreadNum = 8; |
|
|
|
#endif |
|
|
|
template <typename T> |
|
|
|
struct SparseGradient { |
|
|
|
float *value_{nullptr}; |
|
|
|
@@ -95,7 +100,7 @@ class SparseOptimizerCPUKernel : public CPUKernel { |
|
|
|
static void BucketReduceSparseGradient(const ReduceSparseGradientParam<T> ¶m) { |
|
|
|
MS_LOG(DEBUG) << "Start"; |
|
|
|
MS_EXCEPTION_IF_NULL(param.input_grad_); |
|
|
|
size_t thread_num = 23; |
|
|
|
size_t thread_num = kUsedThreadNum; |
|
|
|
if (param.input_grad_->indices_size_ < thread_num) { |
|
|
|
thread_num = param.input_grad_->indices_size_; |
|
|
|
} |
|
|
|
@@ -120,11 +125,10 @@ class SparseOptimizerCPUKernel : public CPUKernel { |
|
|
|
template <typename T> |
|
|
|
void MultiThreadCompute(const MultiThreadComputeFunc<T> &func, MultiThreadComputeParams<T> *params, |
|
|
|
size_t total_compute_size) const { |
|
|
|
const size_t kThreadNum = 24; |
|
|
|
std::vector<std::thread> threads; |
|
|
|
threads.reserve(kThreadNum); |
|
|
|
threads.reserve(kUsedThreadNum); |
|
|
|
size_t start = 0; |
|
|
|
size_t once_compute_size = (total_compute_size + kThreadNum - 1) / kThreadNum; |
|
|
|
size_t once_compute_size = (total_compute_size + kUsedThreadNum - 1) / kUsedThreadNum; |
|
|
|
while (start < total_compute_size) { |
|
|
|
size_t end = (start + once_compute_size) > total_compute_size ? total_compute_size : (start + once_compute_size); |
|
|
|
threads.emplace_back(std::thread(func, params, start, end)); |
|
|
|
|