| @@ -876,12 +876,13 @@ bool IsWeightBoundary(const AnfNodePtr &node) { | |||||
| return false; | return false; | ||||
| } | } | ||||
| void MultiThreadCompute(const MultiThreadComputeFunc &func, MultiThreadComputeParams *params, size_t thread_num, | |||||
| void MultiThreadCompute(const MultiThreadComputeFunc &func, MultiThreadComputeParams *params, | |||||
| size_t total_compute_size) { | size_t total_compute_size) { | ||||
| const size_t kThreadNum = 24; | |||||
| std::vector<std::thread> threads; | std::vector<std::thread> threads; | ||||
| threads.reserve(thread_num); | |||||
| threads.reserve(kThreadNum); | |||||
| size_t start = 0; | size_t start = 0; | ||||
| size_t once_compute_size = (total_compute_size + thread_num - 1) / thread_num; | |||||
| size_t once_compute_size = (total_compute_size + kThreadNum - 1) / kThreadNum; | |||||
| while (start < total_compute_size) { | while (start < total_compute_size) { | ||||
| size_t end = (start + once_compute_size) > total_compute_size ? total_compute_size : (start + once_compute_size); | size_t end = (start + once_compute_size) > total_compute_size ? total_compute_size : (start + once_compute_size); | ||||
| threads.emplace_back(std::thread(func, params, start, end)); | threads.emplace_back(std::thread(func, params, start, end)); | ||||
| @@ -128,7 +128,7 @@ void GetValidKernelNodes(const FuncGraphPtr &func_graph, std::vector<AnfNodePtr> | |||||
| bool GetInputTensorValue(const AnfNodePtr &anf_node, size_t input_idx, nlohmann::json *const node_json); | bool GetInputTensorValue(const AnfNodePtr &anf_node, size_t input_idx, nlohmann::json *const node_json); | ||||
| void GetGraphRealOutput(const FuncGraphPtr &func_graph, std::vector<std::pair<AnfNodePtr, size_t>> *node_list); | void GetGraphRealOutput(const FuncGraphPtr &func_graph, std::vector<std::pair<AnfNodePtr, size_t>> *node_list); | ||||
| bool IsWeightBoundary(const AnfNodePtr &node); | bool IsWeightBoundary(const AnfNodePtr &node); | ||||
| void MultiThreadCompute(const MultiThreadComputeFunc &func, MultiThreadComputeParams *params, size_t thread_num, | |||||
| void MultiThreadCompute(const MultiThreadComputeFunc &func, MultiThreadComputeParams *params, | |||||
| size_t total_compute_size); | size_t total_compute_size); | ||||
| } // namespace kernel | } // namespace kernel | ||||
| } // namespace mindspore | } // namespace mindspore | ||||
| @@ -155,15 +155,14 @@ bool SparseApplyAdamCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inp | |||||
| input_params.v_ = v; | input_params.v_ = v; | ||||
| input_params.beta1_ = beta1; | input_params.beta1_ = beta1; | ||||
| input_params.beta2_ = beta2; | input_params.beta2_ = beta2; | ||||
| const size_t kThreadNum = 16; | |||||
| MultiThreadCompute(ComputeMomentum, &input_params, kThreadNum, total_dim_size); | |||||
| MultiThreadCompute(ComputeMomentum, &input_params, total_dim_size); | |||||
| input_params.m_t_ = m_t; | input_params.m_t_ = m_t; | ||||
| input_params.use_nesterov_ = use_nesterov_; | input_params.use_nesterov_ = use_nesterov_; | ||||
| input_params.sparse_grad_ = unique_sparse_grad; | input_params.sparse_grad_ = unique_sparse_grad; | ||||
| input_params.var_first_dim_size_ = var_first_dim_size_; | input_params.var_first_dim_size_ = var_first_dim_size_; | ||||
| input_params.var_outer_dim_size_ = var_outer_dim_size_; | input_params.var_outer_dim_size_ = var_outer_dim_size_; | ||||
| MultiThreadCompute(ComputeAdam, &input_params, kThreadNum, unique_sparse_grad.indices_size_); | |||||
| MultiThreadCompute(ComputeAdam, &input_params, unique_sparse_grad.indices_size_); | |||||
| if (use_nesterov_) { | if (use_nesterov_) { | ||||
| input_params.m_ = input_params.m_t_; | input_params.m_ = input_params.m_t_; | ||||
| @@ -171,7 +170,7 @@ bool SparseApplyAdamCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inp | |||||
| input_params.var_ = var; | input_params.var_ = var; | ||||
| input_params.lr_ = lr; | input_params.lr_ = lr; | ||||
| input_params.epsilon_ = epsilon; | input_params.epsilon_ = epsilon; | ||||
| MultiThreadCompute(ComputeWeight, &input_params, kThreadNum, total_dim_size); | |||||
| MultiThreadCompute(ComputeWeight, &input_params, total_dim_size); | |||||
| return true; | return true; | ||||
| } | } | ||||
| } // namespace kernel | } // namespace kernel | ||||
| @@ -145,8 +145,7 @@ bool SparseApplyFtrlCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inp | |||||
| input_params.sparse_grad_ = unique_sparse_grad; | input_params.sparse_grad_ = unique_sparse_grad; | ||||
| input_params.var_first_dim_size_ = var_first_dim_size_; | input_params.var_first_dim_size_ = var_first_dim_size_; | ||||
| input_params.var_outer_dim_size_ = var_outer_dim_size_; | input_params.var_outer_dim_size_ = var_outer_dim_size_; | ||||
| const size_t kThreadNum = 16; | |||||
| MultiThreadCompute(ComputeFtrl, &input_params, kThreadNum, unique_sparse_grad.indices_size_); | |||||
| MultiThreadCompute(ComputeFtrl, &input_params, unique_sparse_grad.indices_size_); | |||||
| return true; | return true; | ||||
| } | } | ||||
| } // namespace kernel | } // namespace kernel | ||||
| @@ -139,8 +139,7 @@ bool SparseApplyLazyAdamCPUKernel::Launch(const std::vector<kernel::AddressPtr> | |||||
| input_params.sparse_grad_ = unique_sparse_grad; | input_params.sparse_grad_ = unique_sparse_grad; | ||||
| input_params.var_first_dim_size_ = var_first_dim_size_; | input_params.var_first_dim_size_ = var_first_dim_size_; | ||||
| input_params.var_outer_dim_size_ = var_outer_dim_size_; | input_params.var_outer_dim_size_ = var_outer_dim_size_; | ||||
| const size_t kThreadNum = 16; | |||||
| MultiThreadCompute(ComputeLazyAdam, &input_params, kThreadNum, unique_sparse_grad.indices_size_); | |||||
| MultiThreadCompute(ComputeLazyAdam, &input_params, unique_sparse_grad.indices_size_); | |||||
| return true; | return true; | ||||
| } | } | ||||
| } // namespace kernel | } // namespace kernel | ||||
| @@ -132,8 +132,7 @@ bool SparseApplyProximalAdagradCPUKernel::Launch(const std::vector<kernel::Addre | |||||
| input_params.sparse_grad_ = unique_sparse_grad; | input_params.sparse_grad_ = unique_sparse_grad; | ||||
| input_params.var_first_dim_size_ = var_first_dim_size_; | input_params.var_first_dim_size_ = var_first_dim_size_; | ||||
| input_params.var_outer_dim_size_ = var_outer_dim_size_; | input_params.var_outer_dim_size_ = var_outer_dim_size_; | ||||
| const size_t kThreadNum = 16; | |||||
| MultiThreadCompute(ComputeProximalAdagrad, &input_params, kThreadNum, unique_sparse_grad.indices_size_); | |||||
| MultiThreadCompute(ComputeProximalAdagrad, &input_params, unique_sparse_grad.indices_size_); | |||||
| return true; | return true; | ||||
| } | } | ||||
| } // namespace kernel | } // namespace kernel | ||||