You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

parameter_server.h 30 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731
  1. /**
  2. * Copyright 2020 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #ifndef MINDSPORE_CCSRC_FRONTEND_PARALLEL_PS_PARAMETER_SERVER_H_
  17. #define MINDSPORE_CCSRC_FRONTEND_PARALLEL_PS_PARAMETER_SERVER_H_
  18. #include <unistd.h>
  19. #include <unordered_map>
  20. #include <string>
  21. #include <iostream>
  22. #include <memory>
  23. #include <vector>
  24. #include <mutex>
  25. #include <condition_variable>
  26. #include <thread>
  27. #include <cmath>
  28. #include <random>
  29. #include <utility>
  30. #include <list>
  31. #include <map>
  32. #include "ir/func_graph.h"
  33. #include "backend/session/session_basic.h"
  34. #include "backend/session/anf_runtime_algorithm.h"
  35. #include "backend/session/session_factory.h"
  36. #include "frontend/parallel/ps/common.h"
  37. #include "frontend/parallel/ps/optimizer_info.h"
  38. #include "frontend/parallel/ps/optimizer_info_builder.h"
  39. #include "frontend/parallel/ps/util.h"
  40. #include "runtime/device/cpu/kernel_select_cpu.h"
  41. #include "utils/ms_context.h"
  42. #include "backend/kernel_compiler/kernel.h"
  43. #include "backend/kernel_compiler/cpu/cpu_kernel_factory.h"
  44. #include "backend/kernel_compiler/cpu/ps/pserver_kernel.h"
  45. #include "backend/kernel_compiler/cpu/ps/sparse_apply_adam_ps_kernel.h"
  46. #include "backend/kernel_compiler/cpu/ps/sparse_apply_lazy_adam_ps_kernel.h"
  47. #include "backend/kernel_compiler/cpu/ps/sparse_apply_ftrl_ps_kernel.h"
  48. #include "backend/kernel_compiler/cpu/ps/apply_momentum_ps_kernel.h"
  49. #include "backend/kernel_compiler/cpu/ps/embedding_look_up_ps_kernel.h"
  50. namespace mindspore {
  51. namespace parallel {
  52. namespace ps {
  53. using mindspore::kernel::ps::PServerKernel;
  54. using AnfAlgo = session::AnfRuntimeAlgorithm;
  55. template <typename T>
  56. class ParameterServer {
  57. public:
  58. static ParameterServer &GetInstance() {
  59. static ParameterServer instance;
  60. return instance;
  61. }
  62. void Run(const FuncGraphPtr &func_graph);
  63. private:
  64. ParameterServer()
  65. : pserver_num_(0),
  66. worker_num_(0),
  67. rank_id_(0),
  68. grad_accum_count_(0),
  69. ps_(new ::ps::KVServer<T>(0)),
  70. handler_(nullptr),
  71. func_graph_(nullptr),
  72. sess_(nullptr),
  73. running_(true),
  74. thread_(nullptr) {}
  75. ~ParameterServer() = default;
  76. ParameterServer(const ParameterServer &) = delete;
  77. ParameterServer &operator=(const ParameterServer &) = delete;
  78. class ServerHandler {
  79. public:
  80. explicit ServerHandler(ParameterServer *ps) : ps_(ps) {}
  81. void Init();
  82. void operator()(const ::ps::KVMeta &req_meta, const ::ps::KVPairs<T> &req_data, ::ps::KVServer<T> *server);
  83. private:
  84. void HandlePushReq(const ::ps::KVMeta &req_meta, const ::ps::KVPairs<T> &req_data, ::ps::KVPairs<T> *res);
  85. void HandlePullReq(const ::ps::KVMeta &req_meta, const ::ps::KVPairs<T> &req_data, ::ps::KVPairs<T> *res);
  86. void HandleInitWeights(const ::ps::KVMeta &req_meta, const ::ps::KVPairs<T> &req_data, ::ps::KVPairs<T> *res);
  87. void HandleInitWeightToOptimId(const ::ps::KVMeta &req_meta, const ::ps::KVPairs<T> &req_data,
  88. ::ps::KVPairs<T> *res);
  89. void HandleInitInputsShape(const ::ps::KVMeta &req_meta, const ::ps::KVPairs<T> &req_data, ::ps::KVPairs<T> *res);
  90. void HandleInitEmbeddings(const ::ps::KVMeta &req_meta, const ::ps::KVPairs<T> &req_data, ::ps::KVPairs<T> *res);
  91. void HandleCheckReadyForPush(const ::ps::KVMeta &req_meta, const ::ps::KVPairs<T> &req_data, ::ps::KVPairs<T> *res);
  92. void HandleCheckReadyForPull(const ::ps::KVMeta &req_meta, const ::ps::KVPairs<T> &req_data, ::ps::KVPairs<T> *res);
  93. void HandleEmbeddingLookup(const ::ps::KVMeta &req_meta, const ::ps::KVPairs<T> &req_data, ::ps::KVPairs<T> *res);
  94. void HandleFinalize(const ::ps::KVMeta &req_meta, const ::ps::KVPairs<T> &req_data, ::ps::KVPairs<T> *res);
  95. ParameterServer *ps_;
  96. typedef void (ServerHandler::*RequestHandler)(const ::ps::KVMeta &req_meta, const ::ps::KVPairs<T> &req_data,
  97. ::ps::KVPairs<T> *res);
  98. std::unordered_map<int, RequestHandler> handlers_;
  99. std::unordered_map<Key, bool> init_weights_;
  100. std::unordered_map<Key, bool> init_weight_to_optim_;
  101. std::unordered_map<Key, bool> init_optim_info_;
  102. };
  103. bool Init(const FuncGraphPtr &func_graph);
  104. void InitOptimInfoBuilders();
  105. void InitWeightKeyToOptims(const Key &key, const int &optim_id);
  106. void InitOptimInputsShape(const Keys &keys, const Values &values, const Lengths &lengths);
  107. void InitWeight(const Key &key, const WeightPtr &weight);
  108. void InitGrad(const Key &key, const GradPtr &grad);
  109. void InitEmbeddingTable(const Key &key,
  110. const std::shared_ptr<std::vector<std::shared_ptr<std::vector<size_t>>>> &shapes);
  111. bool HasWeight(const Key &key);
  112. void Finalize();
  113. void UpdateWeights();
  114. void AccumGrad(const Keys &key, const Values &values, const Lengths &lengths);
  115. WeightPtr weight(const Key &key);
  116. void DoEmbeddingLookup(Key key, const LookupIds &lookup_ids, ::ps::KVPairs<T> *res);
  117. int SumOfShapes(const std::vector<int> &shapes) const;
  118. bool ReadyForUpdateWeights();
  119. bool ReadyForPush(const Key &key);
  120. bool ReadyForPull(const Key &key);
  121. void ResetGradAccumCount();
  122. const CNodePtr GetCNode(const std::string &name) const;
  123. std::mutex &mutex();
  124. void GetEmbeddingTableParamPtr();
  125. void SyncEmbeddingTables();
  126. size_t pserver_num_;
  127. size_t worker_num_;
  128. size_t rank_id_;
  129. size_t grad_accum_count_;
  130. std::unique_ptr<::ps::KVServer<T>> ps_;
  131. std::unique_ptr<ServerHandler> handler_;
  132. FuncGraphPtr func_graph_;
  133. std::shared_ptr<session::SessionBasic> sess_;
  134. bool running_;
  135. std::unordered_map<Key, std::shared_ptr<PServerKernel>> optimizers_;
  136. std::unordered_map<Key, InputsShapePtr> optim_inputs_shape_;
  137. std::unordered_map<Key, std::shared_ptr<OptimizerInfo>> optim_infos_;
  138. std::unordered_map<std::string, std::shared_ptr<OptimizerInfoBuilder>> optim_info_builders_;
  139. std::unordered_map<Key, std::string> weight_key_to_optims_;
  140. std::unordered_map<Key, std::string> weight_key_to_optim_op_;
  141. std::unordered_map<Key, WeightPtr> weights_;
  142. std::unordered_map<Key, bool> is_embedding_;
  143. std::unordered_map<Key, WeightPtr> grads_;
  144. std::unordered_map<Key, size_t> grads_accum_counter_;
  145. std::unordered_map<Key, std::shared_ptr<PServerKernel>> embedding_lookup_ops_;
  146. std::unordered_map<Key, uint64_t> tokens_;
  147. std::mutex mutex_;
  148. std::condition_variable apply_grads_cv_;
  149. std::unique_ptr<std::thread> thread_;
  150. std::map<Key, ParameterPtr> embedding_tables_;
  151. friend class ServerHandler;
  152. };
  153. class FuncGraph;
  154. template <typename T>
  155. void ParameterServer<T>::ServerHandler::operator()(const ::ps::KVMeta &req_meta, const ::ps::KVPairs<T> &req_data,
  156. ::ps::KVServer<T> *server) {
  157. ::ps::KVPairs<T> res;
  158. if (handlers_.count(req_meta.cmd) > 0) {
  159. auto &handler_ptr = handlers_[req_meta.cmd];
  160. (this->*handler_ptr)(req_meta, req_data, &res);
  161. } else if (req_meta.push) {
  162. HandlePushReq(req_meta, req_data, &res);
  163. } else {
  164. HandlePullReq(req_meta, req_data, &res);
  165. }
  166. server->Response(req_meta, res);
  167. }
  168. template <typename T>
  169. void ParameterServer<T>::ServerHandler::Init() {
  170. handlers_[kInitWeightsCmd] = &ServerHandler::HandleInitWeights;
  171. handlers_[kInitWeightToOptimIdCmd] = &ServerHandler::HandleInitWeightToOptimId;
  172. handlers_[kInitOptimInputsShapeCmd] = &ServerHandler::HandleInitInputsShape;
  173. handlers_[kInitEmbeddingsCmd] = &ServerHandler::HandleInitEmbeddings;
  174. handlers_[kCheckReadyForPushCmd] = &ServerHandler::HandleCheckReadyForPush;
  175. handlers_[kCheckReadyForPullCmd] = &ServerHandler::HandleCheckReadyForPull;
  176. handlers_[kEmbeddingLookupCmd] = &ServerHandler::HandleEmbeddingLookup;
  177. handlers_[kFinalizeCmd] = &ServerHandler::HandleFinalize;
  178. }
  179. template <typename T>
  180. void ParameterServer<T>::ServerHandler::HandlePushReq(const ::ps::KVMeta &req_meta, const ::ps::KVPairs<T> &req_data,
  181. ::ps::KVPairs<T> *res) {
  182. ps_->AccumGrad(req_data.keys, req_data.vals, req_data.lens);
  183. }
  184. template <typename T>
  185. void ParameterServer<T>::ServerHandler::HandlePullReq(const ::ps::KVMeta &req_meta, const ::ps::KVPairs<T> &req_data,
  186. ::ps::KVPairs<T> *res) {
  187. res->keys = req_data.keys;
  188. ::ps::Key key = req_data.keys[0];
  189. res->vals = *(ps_->weight(key));
  190. }
  191. template <typename T>
  192. void ParameterServer<T>::ServerHandler::HandleInitWeights(const ::ps::KVMeta &req_meta,
  193. const ::ps::KVPairs<T> &req_data, ::ps::KVPairs<T> *res) {
  194. std::unique_lock<std::mutex> lock(ps_->mutex());
  195. size_t key_num = req_data.keys.size();
  196. T *data_ptr = req_data.vals.data();
  197. size_t pos = 0;
  198. for (size_t i = 0; i < key_num; i++) {
  199. Key key = req_data.keys[i];
  200. size_t data_len = req_data.lens.size() != key_num ? req_data.vals.size() / key_num : req_data.lens[i];
  201. if (!ps_->HasWeight(key)) {
  202. WeightPtr weight_ptr = std::make_shared<::ps::SArray<T>>();
  203. weight_ptr->CopyFrom(data_ptr + pos, data_len);
  204. ps_->InitWeight(key, weight_ptr);
  205. GradPtr grad_ptr = std::make_shared<::ps::SArray<T>>(data_len, 0);
  206. ps_->InitGrad(key, grad_ptr);
  207. }
  208. pos += data_len;
  209. }
  210. }
  211. template <typename T>
  212. void ParameterServer<T>::ServerHandler::HandleInitWeightToOptimId(const ::ps::KVMeta &req_meta,
  213. const ::ps::KVPairs<T> &req_data,
  214. ::ps::KVPairs<T> *res) {
  215. std::unique_lock<std::mutex> lock(ps_->mutex());
  216. size_t key_num = req_data.keys.size();
  217. for (size_t i = 0; i < key_num; i++) {
  218. Key key = req_data.keys[i];
  219. T val = req_data.vals[i];
  220. if (init_weight_to_optim_[key]) {
  221. continue;
  222. } else {
  223. init_weight_to_optim_[key] = true;
  224. }
  225. ps_->InitWeightKeyToOptims(key, val);
  226. }
  227. }
  228. template <typename T>
  229. void ParameterServer<T>::ServerHandler::HandleInitInputsShape(const ::ps::KVMeta &req_meta,
  230. const ::ps::KVPairs<T> &req_data, ::ps::KVPairs<T> *res) {
  231. std::unique_lock<std::mutex> lock(ps_->mutex());
  232. const Key &key = req_data.keys[0];
  233. if (init_optim_info_[key]) {
  234. return;
  235. } else {
  236. init_optim_info_[key] = true;
  237. }
  238. ps_->InitOptimInputsShape(req_data.keys, req_data.vals, req_data.lens);
  239. }
  240. template <typename T>
  241. void ParameterServer<T>::ServerHandler::HandleInitEmbeddings(const ::ps::KVMeta &req_meta,
  242. const ::ps::KVPairs<T> &req_data, ::ps::KVPairs<T> *res) {
  243. std::unique_lock<std::mutex> lock(ps_->mutex());
  244. const Key &key = req_data.keys[0];
  245. MS_LOG(INFO) << "Initializing embedding table for key:" << key;
  246. std::shared_ptr<std::vector<std::shared_ptr<std::vector<size_t>>>> shapes =
  247. std::make_shared<std::vector<std::shared_ptr<std::vector<size_t>>>>();
  248. std::shared_ptr<std::vector<size_t>> input_shape = std::make_shared<std::vector<size_t>>();
  249. std::shared_ptr<std::vector<size_t>> indices_shape = std::make_shared<std::vector<size_t>>();
  250. std::shared_ptr<std::vector<size_t>> output_shape = std::make_shared<std::vector<size_t>>();
  251. shapes->push_back(input_shape);
  252. shapes->push_back(indices_shape);
  253. shapes->push_back(output_shape);
  254. const Lengths &lens = req_data.lens;
  255. size_t index = 0;
  256. for (int i = 0; i < lens[0]; i++) {
  257. input_shape->push_back(static_cast<size_t>(req_data.vals[index++]));
  258. }
  259. for (int j = 0; j < lens[1]; j++) {
  260. indices_shape->push_back(static_cast<size_t>(req_data.vals[index++]));
  261. }
  262. for (int k = 0; k < lens[2]; k++) {
  263. output_shape->push_back(static_cast<size_t>(req_data.vals[index++]));
  264. }
  265. ps_->InitEmbeddingTable(key, shapes);
  266. }
  267. template <typename T>
  268. void ParameterServer<T>::ServerHandler::HandleCheckReadyForPush(const ::ps::KVMeta &req_meta,
  269. const ::ps::KVPairs<T> &req_data,
  270. ::ps::KVPairs<T> *res) {
  271. const Key &key = req_data.keys[0];
  272. bool ready = ps_->ReadyForPush(key);
  273. res->keys.push_back(key);
  274. res->vals.push_back(ready);
  275. }
  276. template <typename T>
  277. void ParameterServer<T>::ServerHandler::HandleCheckReadyForPull(const ::ps::KVMeta &req_meta,
  278. const ::ps::KVPairs<T> &req_data,
  279. ::ps::KVPairs<T> *res) {
  280. const Key &key = req_data.keys[0];
  281. bool ready = ps_->ReadyForPull(key);
  282. res->keys.push_back(key);
  283. res->vals.push_back(ready);
  284. }
  285. template <typename T>
  286. void ParameterServer<T>::ServerHandler::HandleEmbeddingLookup(const ::ps::KVMeta &req_meta,
  287. const ::ps::KVPairs<T> &req_data, ::ps::KVPairs<T> *res) {
  288. const Key &key = req_data.keys[0];
  289. for (size_t i = 1; i < req_data.keys.size(); i++) {
  290. res->keys.push_back(req_data.keys[i]);
  291. }
  292. ps_->DoEmbeddingLookup(key, req_data.keys.segment(1, req_data.keys.size()), res);
  293. }
  294. template <typename T>
  295. void ParameterServer<T>::ServerHandler::HandleFinalize(const ::ps::KVMeta &req_meta, const ::ps::KVPairs<T> &req_data,
  296. ::ps::KVPairs<T> *res) {
  297. ps_->Finalize();
  298. }
  299. template <typename T>
  300. bool ParameterServer<T>::Init(const FuncGraphPtr &func_graph) {
  301. pserver_num_ = ::ps::NumServers();
  302. worker_num_ = ::ps::NumWorkers();
  303. func_graph_ = func_graph;
  304. rank_id_ = ::ps::MyRank();
  305. handler_.reset(new ServerHandler(this));
  306. handler_->Init();
  307. InitOptimInfoBuilders();
  308. ps_->set_request_handle(*handler_);
  309. thread_.reset(new std::thread(&ParameterServer::UpdateWeights, this));
  310. GetEmbeddingTableParamPtr();
  311. return true;
  312. }
  313. template <typename T>
  314. void ParameterServer<T>::InitOptimInfoBuilders() {
  315. std::shared_ptr<OptimizerInfoBuilder> momentum_info_builder = std::make_shared<MomentumOptimInfoBuilder>();
  316. std::shared_ptr<OptimizerInfoBuilder> sparse_adam_info_builder = std::make_shared<SparseAdamOptimInfoBuilder>();
  317. std::shared_ptr<OptimizerInfoBuilder> sparse_ftrl_info_builder = std::make_shared<SparseFtrlOptimInfoBuilder>();
  318. optim_info_builders_[kApplyMomentum] = momentum_info_builder;
  319. optim_info_builders_[kSparseAdam] = sparse_adam_info_builder;
  320. optim_info_builders_[kSparseFtrl] = sparse_ftrl_info_builder;
  321. }
  322. template <typename T>
  323. void ParameterServer<T>::InitWeightKeyToOptims(const Key &key, const int &optim_id) {
  324. if (weight_key_to_optims_.count(key) > 0 || Util::optimizer_name(optim_id) == "") {
  325. return;
  326. }
  327. weight_key_to_optims_[key] = Util::optimizer_name(optim_id);
  328. weight_key_to_optim_op_[key] = Util::optimizer_node_name(optim_id);
  329. MS_LOG(INFO) << "Initializing optimizer id for key:" << key << ", optimizer name:" << weight_key_to_optims_[key]
  330. << ", optimizer op name:" << weight_key_to_optim_op_[key];
  331. }
  332. template <typename T>
  333. void ParameterServer<T>::InitOptimInputsShape(const Keys &keys, const Values &values, const Lengths &lengths) {
  334. InputsShapePtr inputs_shape = std::make_shared<InputsShape>();
  335. int val_idx = 0;
  336. const Key &key = keys[0];
  337. MS_LOG(INFO) << "Initializing optimizer inputs shape for key:" << key;
  338. if (optim_inputs_shape_.count(key) == 0) {
  339. optim_inputs_shape_[key] = inputs_shape;
  340. }
  341. for (size_t i = 0; i < keys.size(); i++) {
  342. auto shape = std::make_shared<std::vector<size_t>>();
  343. inputs_shape->push_back(shape);
  344. int len = lengths[i];
  345. for (int j = 0; j < len; j++) {
  346. shape->push_back(values[val_idx++]);
  347. }
  348. }
  349. if (weight_key_to_optims_.count(key) > 0) {
  350. const std::string &optim_name = weight_key_to_optims_[key];
  351. const std::string &optim_op_name = weight_key_to_optim_op_[key];
  352. if (optimizers_.count(key) == 0 && optim_inputs_shape_.count(key) > 0) {
  353. const CNodePtr cnode = GetCNode(optim_op_name);
  354. MS_EXCEPTION_IF_NULL(cnode);
  355. if (optim_name == kSparseAdam) {
  356. std::shared_ptr<PServerKernel> optimizer =
  357. std::make_shared<kernel::ps::SparseApplyAdamPSKernel>(rank_id_, pserver_num_, worker_num_);
  358. optimizer->InitKernel(cnode, optim_inputs_shape_[key]);
  359. optimizers_[key] = optimizer;
  360. } else if (optim_name == kSparseLazyAdam) {
  361. std::shared_ptr<PServerKernel> optimizer =
  362. std::make_shared<kernel::ps::SparseApplyLazyAdamPSKernel>(rank_id_, pserver_num_, worker_num_);
  363. optimizer->InitKernel(cnode, optim_inputs_shape_[key]);
  364. optimizers_[key] = optimizer;
  365. } else if (optim_name == kApplyMomentum) {
  366. std::shared_ptr<PServerKernel> optimizer =
  367. std::make_shared<kernel::ps::ApplyMomentumPSKernel>(rank_id_, pserver_num_, worker_num_);
  368. optimizer->InitKernel(cnode, optim_inputs_shape_[key]);
  369. optimizers_[key] = optimizer;
  370. } else if (optim_name == kSparseFtrl) {
  371. std::shared_ptr<PServerKernel> optimizer =
  372. std::make_shared<kernel::ps::SparseApplyFtrlPSKernel>(rank_id_, pserver_num_, worker_num_);
  373. optimizer->InitKernel(cnode, optim_inputs_shape_[key]);
  374. optimizers_[key] = optimizer;
  375. }
  376. }
  377. }
  378. }
  379. template <typename T>
  380. const CNodePtr ParameterServer<T>::GetCNode(const std::string &name) const {
  381. std::list<CNodePtr> cnodes = func_graph_->GetOrderedCnodes();
  382. for (CNodePtr cnode : cnodes) {
  383. std::string fullname = cnode->fullname_with_scope();
  384. if (fullname.find(name) != std::string::npos && fullname.find("Push") != std::string::npos) {
  385. return cnode;
  386. }
  387. }
  388. return nullptr;
  389. }
  390. template <typename T>
  391. void ParameterServer<T>::InitWeight(const Key &key, const WeightPtr &weight) {
  392. if ((weights_.count(key) == 0) || (is_embedding_[key] && weights_.count(key) != 0)) {
  393. MS_LOG(INFO) << "Initializing weight for key " << key << ", server rank " << rank_id_;
  394. weights_[key] = weight;
  395. tokens_[key] = 0;
  396. is_embedding_[key] = false;
  397. }
  398. }
  399. template <typename T>
  400. void ParameterServer<T>::InitGrad(const Key &key, const GradPtr &grad) {
  401. if (grads_.count(key) == 0) {
  402. grads_[key] = grad;
  403. grads_accum_counter_[key] = 0;
  404. }
  405. }
  406. template <typename T>
  407. void ParameterServer<T>::InitEmbeddingTable(
  408. const Key &key, const std::shared_ptr<std::vector<std::shared_ptr<std::vector<size_t>>>> &shapes) {
  409. if (weights_.count(key) == 0) {
  410. std::shared_ptr<PServerKernel> lookup =
  411. std::make_shared<kernel::ps::EmbeddingLookUpPSKernel>(rank_id_, pserver_num_, worker_num_);
  412. lookup->InitKernel(shapes);
  413. embedding_lookup_ops_[key] = lookup;
  414. // Init embedding weight
  415. const std::vector<size_t> &input_shapes = lookup->input_sizes();
  416. size_t total_dims = 1;
  417. for (auto shape : input_shapes) {
  418. total_dims *= shape;
  419. }
  420. WeightPtr embedding = std::make_shared<Weight>(total_dims, 0);
  421. T *embedding_data = embedding->data();
  422. std::default_random_engine engine;
  423. std::normal_distribution<float> random(0, 0.01);
  424. for (size_t i = 0; i < total_dims; i++) {
  425. embedding_data[i] = random(engine);
  426. }
  427. weights_[key] = embedding;
  428. tokens_[key] = 0;
  429. is_embedding_[key] = true;
  430. grads_accum_counter_[key] = 0;
  431. }
  432. }
  433. template <typename T>
  434. bool ParameterServer<T>::HasWeight(const Key &key) {
  435. return (weights_.count(key) > 0 && !is_embedding_.count(key));
  436. }
  437. template <typename T>
  438. void ParameterServer<T>::Finalize() {
  439. running_ = false;
  440. apply_grads_cv_.notify_one();
  441. SyncEmbeddingTables();
  442. }
  443. template <typename T>
  444. void ParameterServer<T>::UpdateWeights() {
  445. while (true) {
  446. std::unique_lock<std::mutex> lock(mutex_);
  447. apply_grads_cv_.wait(lock, [this] { return this->ReadyForUpdateWeights() || !running_; });
  448. if (!running_) {
  449. break;
  450. }
  451. for (auto iter = weights_.begin(); iter != weights_.end(); iter++) {
  452. Key key = iter->first;
  453. WeightPtr weight_ptr = iter->second;
  454. std::shared_ptr<PServerKernel> optimizer = nullptr;
  455. if (weight_key_to_optims_.count(key) > 0) {
  456. optimizer = optimizers_[key];
  457. }
  458. MS_EXCEPTION_IF_NULL(optimizer);
  459. std::shared_ptr<OptimizerInfo> optim_info = optim_infos_[key];
  460. if (optim_info == nullptr) {
  461. continue;
  462. }
  463. const std::vector<kernel::AddressPtr> &inputs = optim_info->inputs();
  464. const std::vector<kernel::AddressPtr> &workspaces = optim_info->workspaces();
  465. const std::vector<kernel::AddressPtr> &outputs = optim_info->outputs();
  466. optim_info->ComputeMean(worker_num_);
  467. optimizer->Execute(inputs, workspaces, outputs);
  468. optim_info->Reset();
  469. if (!is_embedding_[key]) {
  470. tokens_[key] = worker_num_;
  471. }
  472. }
  473. ResetGradAccumCount();
  474. }
  475. }
  476. template <typename T>
  477. void ParameterServer<T>::AccumGrad(const Keys &keys, const Values &values, const Lengths &lengths) {
  478. std::unique_lock<std::mutex> lock(mutex_);
  479. const Key &key = keys[0];
  480. std::shared_ptr<OptimizerInfo> optim_info = optim_infos_[key];
  481. // Create or update the optimizer info
  482. if (optim_info == nullptr) {
  483. const std::shared_ptr<OptimizerInfoBuilder> &builder = optim_info_builders_[weight_key_to_optims_[key]];
  484. std::shared_ptr<kernel::ps::PServerKernel> pserver_kernel = optimizers_[key];
  485. if (pserver_kernel == nullptr) {
  486. MS_LOG(EXCEPTION) << "no optimizer found for key " << key << " optim name " << weight_key_to_optims_[key];
  487. }
  488. MS_EXCEPTION_IF_NULL(pserver_kernel);
  489. OptimizerInfo *optim =
  490. builder->Build(pserver_kernel, weights_[key], keys, values, lengths, optim_inputs_shape_[key], worker_num_);
  491. optim_info.reset(optim);
  492. optim_infos_[key] = optim_info;
  493. } else {
  494. optim_info->Update(values, lengths);
  495. optim_info->Accumulate(values, lengths);
  496. }
  497. grads_accum_counter_[key] += 1;
  498. if (grads_accum_counter_[key] == worker_num_) {
  499. grad_accum_count_++;
  500. }
  501. if (ReadyForUpdateWeights()) {
  502. apply_grads_cv_.notify_one();
  503. }
  504. }
  505. template <typename T>
  506. WeightPtr ParameterServer<T>::weight(const Key &key) {
  507. std::unique_lock<std::mutex> lock(mutex_);
  508. if (weights_.count(key) == 0) {
  509. MS_LOG(EXCEPTION) << "Invalid weight key " << key;
  510. }
  511. WeightPtr weight_ptr = weights_[key];
  512. WeightPtr copy_weight_ptr = std::make_shared<::ps::SArray<T>>(weight_ptr->size(), 0);
  513. copy_weight_ptr->CopyFrom(weight_ptr->data(), weight_ptr->size());
  514. tokens_[key] -= 1;
  515. return copy_weight_ptr;
  516. }
  517. template <typename T>
  518. void ParameterServer<T>::DoEmbeddingLookup(Key key, const LookupIds &lookup_ids, ::ps::KVPairs<T> *res) {
  519. std::unique_lock<std::mutex> lock(mutex_);
  520. if (weights_.count(key) == 0) {
  521. MS_LOG(ERROR) << "Invalid embedding table key " << key;
  522. return;
  523. }
  524. if (embedding_lookup_ops_.count(key) == 0) {
  525. MS_LOG(ERROR) << "Invalid embedding lookup op key " << key;
  526. return;
  527. }
  528. WeightPtr table_ptr = weights_[key];
  529. std::shared_ptr<PServerKernel> table_lookup_op = embedding_lookup_ops_[key];
  530. // Update shapes of lookup operator
  531. std::shared_ptr<std::vector<std::shared_ptr<std::vector<size_t>>>> shapes =
  532. std::make_shared<std::vector<std::shared_ptr<std::vector<size_t>>>>();
  533. std::shared_ptr<std::vector<size_t>> indices_shape = std::make_shared<std::vector<size_t>>();
  534. indices_shape->emplace_back(lookup_ids.size());
  535. shapes->push_back(indices_shape);
  536. table_lookup_op->ReInit(shapes);
  537. const std::vector<size_t> output_shapes = table_lookup_op->output_sizes();
  538. std::vector<kernel::AddressPtr> inputs;
  539. AddressPtr embedding_table = std::make_shared<kernel::Address>();
  540. AddressPtr indices = std::make_shared<kernel::Address>();
  541. inputs.push_back(embedding_table);
  542. inputs.push_back(indices);
  543. embedding_table->addr = table_ptr->data();
  544. embedding_table->size = table_ptr->size() * sizeof(T);
  545. std::unique_ptr<int[]> tmp_ids(new int[lookup_ids.size()]);
  546. for (size_t i = 0; i < lookup_ids.size(); i++) {
  547. tmp_ids[i] = static_cast<int>(lookup_ids[i]);
  548. }
  549. indices->addr = tmp_ids.get();
  550. indices->size = lookup_ids.size() * sizeof(int);
  551. std::vector<kernel::AddressPtr> workspaces;
  552. std::vector<kernel::AddressPtr> outputs;
  553. AddressPtr output = std::make_shared<kernel::Address>();
  554. std::shared_ptr<Values> addr = std::make_shared<Values>(output_shapes[0] / sizeof(T), 0);
  555. output->addr = addr->data();
  556. output->size = output_shapes[0];
  557. outputs.push_back(output);
  558. table_lookup_op->Execute(inputs, workspaces, outputs);
  559. res->vals = *addr;
  560. res->lens.push_back(res->vals.size());
  561. }
  562. template <typename T>
  563. int ParameterServer<T>::SumOfShapes(const std::vector<int> &shapes) const {
  564. int sum = 1;
  565. for (auto shape : shapes) {
  566. sum *= shape;
  567. }
  568. return sum;
  569. }
  570. template <typename T>
  571. inline bool ParameterServer<T>::ReadyForUpdateWeights() {
  572. return grads_accum_counter_.size() > 0 && grad_accum_count_ == grads_accum_counter_.size();
  573. }
  574. template <typename T>
  575. inline bool ParameterServer<T>::ReadyForPush(const Key &key) {
  576. std::unique_lock<std::mutex> lock(mutex_);
  577. if (weights_.empty()) {
  578. MS_LOG(EXCEPTION) << "The weights in server is empty. Many reasons could cause this: 1.The Worker didn't send "
  579. "kInitWeightsCmd command. 2.The Server failed to initialize weights.";
  580. }
  581. return grad_accum_count_ < weights_.size() && tokens_[key] <= 0;
  582. }
  583. template <typename T>
  584. inline bool ParameterServer<T>::ReadyForPull(const Key &key) {
  585. std::unique_lock<std::mutex> lock(mutex_);
  586. if (tokens_.count(key) == 0 || weights_[key] == 0) {
  587. MS_LOG(EXCEPTION) << "Invalid weight key " << key;
  588. }
  589. return tokens_[key] > 0;
  590. }
  591. template <typename T>
  592. inline void ParameterServer<T>::ResetGradAccumCount() {
  593. grad_accum_count_ = 0;
  594. for (auto iter = grads_accum_counter_.begin(); iter != grads_accum_counter_.end(); iter++) {
  595. grads_accum_counter_[iter->first] = 0;
  596. }
  597. }
  598. template <typename T>
  599. inline std::mutex &ParameterServer<T>::mutex() {
  600. return mutex_;
  601. }
  602. template <typename T>
  603. void ParameterServer<T>::GetEmbeddingTableParamPtr() {
  604. MS_EXCEPTION_IF_NULL(func_graph_);
  605. auto cnodes = func_graph_->GetOrderedCnodes();
  606. Key count = 0;
  607. for (auto cnode : cnodes) {
  608. std::string cnode_name = AnfAlgo::GetCNodeName(cnode);
  609. if (cnode_name == kEmbeddingLookupOpName) {
  610. auto embedding_table = AnfAlgo::GetInputNode(cnode, 0);
  611. MS_EXCEPTION_IF_NULL(embedding_table);
  612. MS_LOG(INFO) << "Embedding table name is " << embedding_table->fullname_with_scope() << ", key is " << count;
  613. embedding_tables_.insert(std::make_pair(count, embedding_table->cast<ParameterPtr>()));
  614. count++;
  615. }
  616. }
  617. }
  618. template <typename T>
  619. void ParameterServer<T>::SyncEmbeddingTables() {
  620. for (auto embedding_table : embedding_tables_) {
  621. Key key = embedding_table.first;
  622. if (embedding_lookup_ops_.count(key) == 0) {
  623. MS_LOG(EXCEPTION) << "Can't find look up PS kernel for key " << key;
  624. }
  625. auto lookup = embedding_lookup_ops_[key];
  626. const std::vector<size_t> &input_shapes = lookup->input_sizes();
  627. std::vector<int> new_tensor_shape(input_shapes.begin(), input_shapes.end());
  628. tensor::TensorPtr new_tensor = std::make_shared<tensor::Tensor>(kNumberTypeFloat32, new_tensor_shape);
  629. float *new_tensor_data_ptr = reinterpret_cast<float *>(new_tensor->data_c());
  630. size_t new_tensor_size = static_cast<size_t>(new_tensor->data().nbytes());
  631. size_t embedding_table_size = weights_[key]->size() * sizeof(float);
  632. if (new_tensor_size != embedding_table_size) {
  633. MS_LOG(EXCEPTION) << "Shape of embedding table can't match. New tensor size:" << new_tensor_size
  634. << ", embedding_table size:" << embedding_table_size;
  635. }
  636. int ret = memcpy_s(new_tensor_data_ptr, new_tensor_size, weights_[key]->data(), embedding_table_size);
  637. if (ret != 0) {
  638. MS_LOG(EXCEPTION) << "memcpy_s error, errorno(" << ret << ")";
  639. }
  640. auto paramter_tensor_ptr = embedding_table.second->default_param();
  641. MS_EXCEPTION_IF_NULL(paramter_tensor_ptr);
  642. paramter_tensor_ptr->cast<tensor::TensorPtr>()->AssignValue(*new_tensor);
  643. }
  644. }
  645. template <typename T>
  646. void ParameterServer<T>::Run(const FuncGraphPtr &func_graph) {
  647. ::ps::Start(0);
  648. if (!::ps::IsServer()) {
  649. std::cout << "This is not ther Server" << std::endl;
  650. return;
  651. }
  652. Init(func_graph);
  653. Util::SetRankId(rank_id_);
  654. thread_->join();
  655. ::ps::Finalize(0, true);
  656. }
  657. } // namespace ps
  658. } // namespace parallel
  659. } // namespace mindspore
  660. #endif // MINDSPORE_CCSRC_FRONTEND_PARALLEL_PS_PARAMETER_SERVER_H_