You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

parameter_server.h 26 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663
  1. /**
  2. * Copyright 2020 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #ifndef MINDSPORE_CCSRC_FRONTEND_PARALLEL_PS_PARAMETER_SERVER_H_
  17. #define MINDSPORE_CCSRC_FRONTEND_PARALLEL_PS_PARAMETER_SERVER_H_
  18. #include <unistd.h>
  19. #include <unordered_map>
  20. #include <string>
  21. #include <iostream>
  22. #include <memory>
  23. #include <vector>
  24. #include <mutex>
  25. #include <condition_variable>
  26. #include <thread>
  27. #include <cmath>
  28. #include <random>
  29. #include <list>
  30. #include "ir/func_graph.h"
  31. #include "backend/session/session_basic.h"
  32. #include "backend/session/anf_runtime_algorithm.h"
  33. #include "backend/session/session_factory.h"
  34. #include "frontend/parallel/ps/common.h"
  35. #include "frontend/parallel/ps/optimizer_info.h"
  36. #include "frontend/parallel/ps/optimizer_info_builder.h"
  37. #include "frontend/parallel/ps/util.h"
  38. #include "runtime/device/cpu/kernel_select_cpu.h"
  39. #include "utils/ms_context.h"
  40. #include "backend/kernel_compiler/kernel.h"
  41. #include "backend/kernel_compiler/cpu/cpu_kernel_factory.h"
  42. #include "backend/kernel_compiler/cpu/ps/pserver_kernel.h"
  43. #include "backend/kernel_compiler/cpu/ps/sparse_apply_adam_ps_kernel.h"
  44. #include "backend/kernel_compiler/cpu/ps/sparse_apply_lazy_adam_ps_kernel.h"
  45. #include "backend/kernel_compiler/cpu/ps/sparse_apply_ftrl_ps_kernel.h"
  46. #include "backend/kernel_compiler/cpu/ps/apply_momentum_ps_kernel.h"
  47. #include "backend/kernel_compiler/cpu/ps/embedding_look_up_ps_kernel.h"
  48. namespace mindspore {
  49. namespace parallel {
  50. namespace ps {
  51. using mindspore::kernel::ps::PServerKernel;
  52. template <typename T>
  53. class ParameterServer {
  54. public:
  55. static ParameterServer &GetInstance() {
  56. static ParameterServer instance;
  57. return instance;
  58. }
  59. void Run(const FuncGraphPtr &func_graph);
  60. private:
  61. ParameterServer()
  62. : pserver_num_(0),
  63. worker_num_(0),
  64. rank_id_(0),
  65. grad_accum_count_(0),
  66. ps_(new ::ps::KVServer<T>(0)),
  67. handler_(nullptr),
  68. func_graph_(nullptr),
  69. sess_(nullptr),
  70. running_(true),
  71. thread_(nullptr) {}
  72. ~ParameterServer() = default;
  73. ParameterServer(const ParameterServer &) = delete;
  74. ParameterServer &operator=(const ParameterServer &) = delete;
  75. class ServerHandler {
  76. public:
  77. explicit ServerHandler(ParameterServer *ps) : ps_(ps) {}
  78. void Init();
  79. void operator()(const ::ps::KVMeta &req_meta, const ::ps::KVPairs<T> &req_data, ::ps::KVServer<T> *server);
  80. private:
  81. void HandlePushReq(const ::ps::KVMeta &req_meta, const ::ps::KVPairs<T> &req_data, ::ps::KVPairs<T> *res);
  82. void HandlePullReq(const ::ps::KVMeta &req_meta, const ::ps::KVPairs<T> &req_data, ::ps::KVPairs<T> *res);
  83. void HandleInitWeights(const ::ps::KVMeta &req_meta, const ::ps::KVPairs<T> &req_data, ::ps::KVPairs<T> *res);
  84. void HandleInitWeightToOptimId(const ::ps::KVMeta &req_meta, const ::ps::KVPairs<T> &req_data,
  85. ::ps::KVPairs<T> *res);
  86. void HandleInitInputsShape(const ::ps::KVMeta &req_meta, const ::ps::KVPairs<T> &req_data, ::ps::KVPairs<T> *res);
  87. void HandleInitEmbeddings(const ::ps::KVMeta &req_meta, const ::ps::KVPairs<T> &req_data, ::ps::KVPairs<T> *res);
  88. void HandleCheckReadyForPush(const ::ps::KVMeta &req_meta, const ::ps::KVPairs<T> &req_data, ::ps::KVPairs<T> *res);
  89. void HandleCheckReadyForPull(const ::ps::KVMeta &req_meta, const ::ps::KVPairs<T> &req_data, ::ps::KVPairs<T> *res);
  90. void HandleEmbeddingLookup(const ::ps::KVMeta &req_meta, const ::ps::KVPairs<T> &req_data, ::ps::KVPairs<T> *res);
  91. void HandleFinalize(const ::ps::KVMeta &req_meta, const ::ps::KVPairs<T> &req_data, ::ps::KVPairs<T> *res);
  92. ParameterServer *ps_;
  93. typedef void (ServerHandler::*RequestHandler)(const ::ps::KVMeta &req_meta, const ::ps::KVPairs<T> &req_data,
  94. ::ps::KVPairs<T> *res);
  95. std::unordered_map<int, RequestHandler> handlers_;
  96. std::unordered_map<Key, bool> init_weights_;
  97. std::unordered_map<Key, bool> init_weight_to_optim_;
  98. std::unordered_map<Key, bool> init_optim_info_;
  99. };
  100. bool Init(const FuncGraphPtr &func_graph);
  101. void InitOptimInfoBuilders();
  102. void InitWeightKeyToOptims(const Key &key, const int &optim_id);
  103. void InitOptimInputsShape(const Keys &keys, const Values &values, const Lengths &lengths);
  104. void InitWeight(const Key &key, const WeightPtr &weight);
  105. void InitGrad(const Key &key, const GradPtr &grad);
  106. void InitEmbeddingTable(const Key &key,
  107. const std::shared_ptr<std::vector<std::shared_ptr<std::vector<size_t>>>> &shapes);
  108. void Finalize();
  109. void UpdateWeights();
  110. void AccumGrad(const Keys &key, const Values &values, const Lengths &lengths);
  111. WeightPtr weight(const Key &key);
  112. void DoEmbeddingLookup(Key key, const LookupIds &lookup_ids, ::ps::KVPairs<T> *res);
  113. int SumOfShapes(const std::vector<int> &shapes) const;
  114. bool ReadyForUpdateWeights();
  115. bool ReadyForPush(const Key &key);
  116. bool ReadyForPull(const Key &key);
  117. void ResetGradAccumCount();
  118. const CNodePtr GetCNode(const std::string &name) const;
  119. std::mutex &mutex();
  120. size_t pserver_num_;
  121. size_t worker_num_;
  122. size_t rank_id_;
  123. size_t grad_accum_count_;
  124. std::unique_ptr<::ps::KVServer<T>> ps_;
  125. std::unique_ptr<ServerHandler> handler_;
  126. FuncGraphPtr func_graph_;
  127. std::shared_ptr<session::SessionBasic> sess_;
  128. bool running_;
  129. std::unordered_map<Key, std::shared_ptr<PServerKernel>> optimizers_;
  130. std::unordered_map<Key, InputsShapePtr> optim_inputs_shape_;
  131. std::unordered_map<Key, std::shared_ptr<OptimizerInfo>> optim_infos_;
  132. std::unordered_map<std::string, std::shared_ptr<OptimizerInfoBuilder>> optim_info_builders_;
  133. std::unordered_map<Key, std::string> weight_key_to_optims_;
  134. std::unordered_map<Key, std::string> weight_key_to_optim_op_;
  135. std::unordered_map<Key, WeightPtr> weights_;
  136. std::unordered_map<Key, bool> is_embedding_;
  137. std::unordered_map<Key, WeightPtr> grads_;
  138. std::unordered_map<Key, size_t> grads_accum_counter_;
  139. std::unordered_map<Key, std::shared_ptr<PServerKernel>> embedding_lookup_ops_;
  140. std::unordered_map<Key, uint64_t> tokens_;
  141. std::mutex mutex_;
  142. std::condition_variable apply_grads_cv_;
  143. std::unique_ptr<std::thread> thread_;
  144. friend class ServerHandler;
  145. };
  146. class FuncGraph;
  147. template <typename T>
  148. void ParameterServer<T>::ServerHandler::operator()(const ::ps::KVMeta &req_meta, const ::ps::KVPairs<T> &req_data,
  149. ::ps::KVServer<T> *server) {
  150. ::ps::KVPairs<T> res;
  151. if (handlers_.count(req_meta.cmd) > 0) {
  152. auto &handler_ptr = handlers_[req_meta.cmd];
  153. (this->*handler_ptr)(req_meta, req_data, &res);
  154. } else if (req_meta.push) {
  155. HandlePushReq(req_meta, req_data, &res);
  156. } else {
  157. HandlePullReq(req_meta, req_data, &res);
  158. }
  159. server->Response(req_meta, res);
  160. }
  161. template <typename T>
  162. void ParameterServer<T>::ServerHandler::Init() {
  163. handlers_[kInitWeightsCmd] = &ServerHandler::HandleInitWeights;
  164. handlers_[kInitWeightToOptimIdCmd] = &ServerHandler::HandleInitWeightToOptimId;
  165. handlers_[kInitOptimInputsShapeCmd] = &ServerHandler::HandleInitInputsShape;
  166. handlers_[kInitEmbeddingsCmd] = &ServerHandler::HandleInitEmbeddings;
  167. handlers_[kCheckReadyForPushCmd] = &ServerHandler::HandleCheckReadyForPush;
  168. handlers_[kCheckReadyForPullCmd] = &ServerHandler::HandleCheckReadyForPull;
  169. handlers_[kEmbeddingLookupCmd] = &ServerHandler::HandleEmbeddingLookup;
  170. handlers_[kFinalizeCmd] = &ServerHandler::HandleFinalize;
  171. }
  172. template <typename T>
  173. void ParameterServer<T>::ServerHandler::HandlePushReq(const ::ps::KVMeta &req_meta, const ::ps::KVPairs<T> &req_data,
  174. ::ps::KVPairs<T> *res) {
  175. ps_->AccumGrad(req_data.keys, req_data.vals, req_data.lens);
  176. }
  177. template <typename T>
  178. void ParameterServer<T>::ServerHandler::HandlePullReq(const ::ps::KVMeta &req_meta, const ::ps::KVPairs<T> &req_data,
  179. ::ps::KVPairs<T> *res) {
  180. res->keys = req_data.keys;
  181. ::ps::Key key = req_data.keys[0];
  182. res->vals = *(ps_->weight(key));
  183. }
  184. template <typename T>
  185. void ParameterServer<T>::ServerHandler::HandleInitWeights(const ::ps::KVMeta &req_meta,
  186. const ::ps::KVPairs<T> &req_data, ::ps::KVPairs<T> *res) {
  187. std::unique_lock<std::mutex> lock(ps_->mutex());
  188. size_t key_num = req_data.keys.size();
  189. T *data_ptr = req_data.vals.data();
  190. size_t pos = 0;
  191. for (size_t i = 0; i < key_num; i++) {
  192. Key key = req_data.keys[i];
  193. size_t data_len = req_data.lens.size() != key_num ? req_data.vals.size() / key_num : req_data.lens[i];
  194. WeightPtr weight_ptr = std::make_shared<::ps::SArray<T>>();
  195. weight_ptr->CopyFrom(data_ptr + pos, data_len);
  196. ps_->InitWeight(key, weight_ptr);
  197. GradPtr grad_ptr = std::make_shared<::ps::SArray<T>>(data_len, 0);
  198. ps_->InitGrad(key, grad_ptr);
  199. pos += data_len;
  200. }
  201. }
  202. template <typename T>
  203. void ParameterServer<T>::ServerHandler::HandleInitWeightToOptimId(const ::ps::KVMeta &req_meta,
  204. const ::ps::KVPairs<T> &req_data,
  205. ::ps::KVPairs<T> *res) {
  206. std::unique_lock<std::mutex> lock(ps_->mutex());
  207. size_t key_num = req_data.keys.size();
  208. for (size_t i = 0; i < key_num; i++) {
  209. Key key = req_data.keys[i];
  210. T val = req_data.vals[i];
  211. if (init_weight_to_optim_[key]) {
  212. continue;
  213. } else {
  214. init_weight_to_optim_[key] = true;
  215. }
  216. ps_->InitWeightKeyToOptims(key, val);
  217. }
  218. }
  219. template <typename T>
  220. void ParameterServer<T>::ServerHandler::HandleInitInputsShape(const ::ps::KVMeta &req_meta,
  221. const ::ps::KVPairs<T> &req_data, ::ps::KVPairs<T> *res) {
  222. std::unique_lock<std::mutex> lock(ps_->mutex());
  223. const Key &key = req_data.keys[0];
  224. if (init_optim_info_[key]) {
  225. return;
  226. } else {
  227. init_optim_info_[key] = true;
  228. }
  229. ps_->InitOptimInputsShape(req_data.keys, req_data.vals, req_data.lens);
  230. }
  231. template <typename T>
  232. void ParameterServer<T>::ServerHandler::HandleInitEmbeddings(const ::ps::KVMeta &req_meta,
  233. const ::ps::KVPairs<T> &req_data, ::ps::KVPairs<T> *res) {
  234. std::unique_lock<std::mutex> lock(ps_->mutex());
  235. const Key &key = req_data.keys[0];
  236. std::shared_ptr<std::vector<std::shared_ptr<std::vector<size_t>>>> shapes =
  237. std::make_shared<std::vector<std::shared_ptr<std::vector<size_t>>>>();
  238. std::shared_ptr<std::vector<size_t>> input_shape = std::make_shared<std::vector<size_t>>();
  239. std::shared_ptr<std::vector<size_t>> indices_shape = std::make_shared<std::vector<size_t>>();
  240. std::shared_ptr<std::vector<size_t>> output_shape = std::make_shared<std::vector<size_t>>();
  241. shapes->push_back(input_shape);
  242. shapes->push_back(indices_shape);
  243. shapes->push_back(output_shape);
  244. const Lengths &lens = req_data.lens;
  245. size_t index = 0;
  246. for (int i = 0; i < lens[0]; i++) {
  247. input_shape->push_back(static_cast<size_t>(req_data.vals[index++]));
  248. }
  249. for (int j = 0; j < lens[1]; j++) {
  250. indices_shape->push_back(static_cast<size_t>(req_data.vals[index++]));
  251. }
  252. for (int k = 0; k < lens[2]; k++) {
  253. output_shape->push_back(static_cast<size_t>(req_data.vals[index++]));
  254. }
  255. ps_->InitEmbeddingTable(key, shapes);
  256. }
  257. template <typename T>
  258. void ParameterServer<T>::ServerHandler::HandleCheckReadyForPush(const ::ps::KVMeta &req_meta,
  259. const ::ps::KVPairs<T> &req_data,
  260. ::ps::KVPairs<T> *res) {
  261. const Key &key = req_data.keys[0];
  262. bool ready = ps_->ReadyForPush(key);
  263. res->keys.push_back(key);
  264. res->vals.push_back(ready);
  265. }
  266. template <typename T>
  267. void ParameterServer<T>::ServerHandler::HandleCheckReadyForPull(const ::ps::KVMeta &req_meta,
  268. const ::ps::KVPairs<T> &req_data,
  269. ::ps::KVPairs<T> *res) {
  270. const Key &key = req_data.keys[0];
  271. bool ready = ps_->ReadyForPull(key);
  272. res->keys.push_back(key);
  273. res->vals.push_back(ready);
  274. }
  275. template <typename T>
  276. void ParameterServer<T>::ServerHandler::HandleEmbeddingLookup(const ::ps::KVMeta &req_meta,
  277. const ::ps::KVPairs<T> &req_data, ::ps::KVPairs<T> *res) {
  278. const Key &key = req_data.keys[0];
  279. for (size_t i = 0; i < req_data.keys.size(); i++) {
  280. res->keys.push_back(req_data.keys[i]);
  281. }
  282. ps_->DoEmbeddingLookup(key, req_data.keys.segment(1, req_data.keys.size()), res);
  283. }
  284. template <typename T>
  285. void ParameterServer<T>::ServerHandler::HandleFinalize(const ::ps::KVMeta &req_meta, const ::ps::KVPairs<T> &req_data,
  286. ::ps::KVPairs<T> *res) {
  287. ps_->Finalize();
  288. }
  289. template <typename T>
  290. bool ParameterServer<T>::Init(const FuncGraphPtr &func_graph) {
  291. pserver_num_ = ::ps::NumServers();
  292. worker_num_ = ::ps::NumWorkers();
  293. func_graph_ = func_graph;
  294. rank_id_ = ::ps::MyRank();
  295. handler_.reset(new ServerHandler(this));
  296. handler_->Init();
  297. InitOptimInfoBuilders();
  298. ps_->set_request_handle(*handler_);
  299. thread_.reset(new std::thread(&ParameterServer::UpdateWeights, this));
  300. return true;
  301. }
  302. template <typename T>
  303. void ParameterServer<T>::InitOptimInfoBuilders() {
  304. std::shared_ptr<OptimizerInfoBuilder> momentum_info_builder = std::make_shared<MomentumOptimInfoBuilder>();
  305. std::shared_ptr<OptimizerInfoBuilder> sparse_adam_info_builder = std::make_shared<SparseAdamOptimInfoBuilder>();
  306. std::shared_ptr<OptimizerInfoBuilder> sparse_ftrl_info_builder = std::make_shared<SparseFtrlOptimInfoBuilder>();
  307. optim_info_builders_[kApplyMomentum] = momentum_info_builder;
  308. optim_info_builders_[kSparseAdam] = sparse_adam_info_builder;
  309. optim_info_builders_[kSparseFtrl] = sparse_ftrl_info_builder;
  310. }
  311. template <typename T>
  312. void ParameterServer<T>::InitWeightKeyToOptims(const Key &key, const int &optim_id) {
  313. if (weight_key_to_optims_.count(key) > 0 || Util::optimizer_name(optim_id) == "") {
  314. return;
  315. }
  316. weight_key_to_optims_[key] = Util::optimizer_name(optim_id);
  317. weight_key_to_optim_op_[key] = Util::optimizer_node_name(optim_id);
  318. }
  319. template <typename T>
  320. void ParameterServer<T>::InitOptimInputsShape(const Keys &keys, const Values &values, const Lengths &lengths) {
  321. InputsShapePtr inputs_shape = std::make_shared<InputsShape>();
  322. int val_idx = 0;
  323. const Key &key = keys[0];
  324. if (optim_inputs_shape_.count(key) == 0) {
  325. optim_inputs_shape_[key] = inputs_shape;
  326. }
  327. for (size_t i = 0; i < keys.size(); i++) {
  328. auto shape = std::make_shared<std::vector<size_t>>();
  329. inputs_shape->push_back(shape);
  330. int len = lengths[i];
  331. for (int j = 0; j < len; j++) {
  332. shape->push_back(values[val_idx++]);
  333. }
  334. }
  335. if (weight_key_to_optims_.count(key) > 0) {
  336. const std::string &optim_name = weight_key_to_optims_[key];
  337. const std::string &optim_op_name = weight_key_to_optim_op_[key];
  338. if (optimizers_.count(key) == 0 && optim_inputs_shape_.count(key) > 0) {
  339. const CNodePtr cnode = GetCNode(optim_op_name);
  340. MS_EXCEPTION_IF_NULL(cnode);
  341. if (optim_name == kSparseAdam) {
  342. std::shared_ptr<PServerKernel> optimizer =
  343. std::make_shared<kernel::ps::SparseApplyAdamPSKernel>(rank_id_, pserver_num_);
  344. optimizer->InitKernel(cnode, optim_inputs_shape_[key]);
  345. optimizers_[key] = optimizer;
  346. } else if (optim_name == kSparseLazyAdam) {
  347. std::shared_ptr<PServerKernel> optimizer =
  348. std::make_shared<kernel::ps::SparseApplyLazyAdamPSKernel>(rank_id_, pserver_num_);
  349. optimizer->InitKernel(cnode, optim_inputs_shape_[key]);
  350. optimizers_[key] = optimizer;
  351. } else if (optim_name == kApplyMomentum) {
  352. std::shared_ptr<PServerKernel> optimizer =
  353. std::make_shared<kernel::ps::ApplyMomentumPSKernel>(rank_id_, pserver_num_);
  354. optimizer->InitKernel(cnode, optim_inputs_shape_[key]);
  355. optimizers_[key] = optimizer;
  356. } else if (optim_name == kSparseFtrl) {
  357. std::shared_ptr<PServerKernel> optimizer =
  358. std::make_shared<kernel::ps::SparseApplyFtrlPSKernel>(rank_id_, pserver_num_);
  359. optimizer->InitKernel(cnode, optim_inputs_shape_[key]);
  360. optimizers_[key] = optimizer;
  361. }
  362. }
  363. }
  364. }
  365. template <typename T>
  366. const CNodePtr ParameterServer<T>::GetCNode(const std::string &name) const {
  367. std::list<CNodePtr> cnodes = func_graph_->GetOrderedCnodes();
  368. for (CNodePtr cnode : cnodes) {
  369. std::string fullname = cnode->fullname_with_scope();
  370. if (fullname.find(name) != std::string::npos && fullname.find("Push") != std::string::npos) {
  371. return cnode;
  372. }
  373. }
  374. return nullptr;
  375. }
  376. template <typename T>
  377. void ParameterServer<T>::InitWeight(const Key &key, const WeightPtr &weight) {
  378. MS_LOG(INFO) << "Initializing weight for key " << key;
  379. if ((weights_.count(key) == 0) || (is_embedding_[key] && weights_.count(key) != 0)) {
  380. weights_[key] = weight;
  381. tokens_[key] = 0;
  382. is_embedding_[key] = false;
  383. }
  384. }
  385. template <typename T>
  386. void ParameterServer<T>::InitGrad(const Key &key, const GradPtr &grad) {
  387. if (grads_.count(key) == 0) {
  388. grads_[key] = grad;
  389. grads_accum_counter_[key] = 0;
  390. }
  391. }
  392. template <typename T>
  393. void ParameterServer<T>::InitEmbeddingTable(
  394. const Key &key, const std::shared_ptr<std::vector<std::shared_ptr<std::vector<size_t>>>> &shapes) {
  395. MS_LOG(INFO) << "Initializing embedding table for key " << key;
  396. std::shared_ptr<PServerKernel> lookup = std::make_shared<kernel::ps::EmbeddingLookUpPSKernel>(rank_id_, pserver_num_);
  397. lookup->InitKernel(shapes);
  398. embedding_lookup_ops_[key] = lookup;
  399. // Init embedding weight
  400. const std::vector<size_t> &input_shapes = lookup->input_sizes();
  401. size_t total_dims = 1;
  402. for (auto shape : input_shapes) {
  403. total_dims *= shape;
  404. }
  405. WeightPtr embedding = std::make_shared<Weight>(total_dims, 0);
  406. T *embedding_data = embedding->data();
  407. std::default_random_engine engine;
  408. std::normal_distribution<float> random(0, 0.01);
  409. for (size_t i = 0; i < total_dims; i++) {
  410. embedding_data[i] = random(engine);
  411. }
  412. weights_[key] = embedding;
  413. tokens_[key] = 0;
  414. is_embedding_[key] = true;
  415. grads_accum_counter_[key] = 0;
  416. }
  417. template <typename T>
  418. void ParameterServer<T>::Finalize() {
  419. running_ = false;
  420. apply_grads_cv_.notify_one();
  421. }
  422. template <typename T>
  423. void ParameterServer<T>::UpdateWeights() {
  424. while (true) {
  425. std::unique_lock<std::mutex> lock(mutex_);
  426. apply_grads_cv_.wait(lock, [this] { return this->ReadyForUpdateWeights() || !running_; });
  427. if (!running_) {
  428. break;
  429. }
  430. for (auto iter = weights_.begin(); iter != weights_.end(); iter++) {
  431. Key key = iter->first;
  432. WeightPtr weight_ptr = iter->second;
  433. std::shared_ptr<PServerKernel> optimizer = nullptr;
  434. if (weight_key_to_optims_.count(key) > 0) {
  435. optimizer = optimizers_[key];
  436. }
  437. MS_EXCEPTION_IF_NULL(optimizer);
  438. std::shared_ptr<OptimizerInfo> optim_info = optim_infos_[key];
  439. if (optim_info == nullptr) {
  440. continue;
  441. }
  442. const std::vector<kernel::AddressPtr> &inputs = optim_info->inputs();
  443. const std::vector<kernel::AddressPtr> &workspaces = optim_info->workspaces();
  444. const std::vector<kernel::AddressPtr> &outputs = optim_info->outputs();
  445. optim_info->ComputeMean(worker_num_);
  446. optimizer->Execute(inputs, workspaces, outputs);
  447. optim_info->Reset();
  448. if (!is_embedding_[key]) {
  449. tokens_[key] = worker_num_;
  450. }
  451. }
  452. ResetGradAccumCount();
  453. }
  454. }
  455. template <typename T>
  456. void ParameterServer<T>::AccumGrad(const Keys &keys, const Values &values, const Lengths &lengths) {
  457. std::unique_lock<std::mutex> lock(mutex_);
  458. const Key &key = keys[0];
  459. std::shared_ptr<OptimizerInfo> optim_info = optim_infos_[key];
  460. // Create or update the optimizer info
  461. if (optim_info == nullptr) {
  462. const std::shared_ptr<OptimizerInfoBuilder> &builder = optim_info_builders_[weight_key_to_optims_[key]];
  463. std::shared_ptr<kernel::ps::PServerKernel> pserver_kernel = optimizers_[key];
  464. if (pserver_kernel == nullptr) {
  465. MS_LOG(EXCEPTION) << "no optimizer found for key " << key << " optim name " << weight_key_to_optims_[key];
  466. }
  467. MS_EXCEPTION_IF_NULL(pserver_kernel);
  468. OptimizerInfo *optim =
  469. builder->Build(pserver_kernel, weights_[key], keys, values, lengths, optim_inputs_shape_[key], worker_num_);
  470. optim_info.reset(optim);
  471. optim_infos_[key] = optim_info;
  472. } else {
  473. optim_info->Update(values, lengths);
  474. optim_info->Accumulate(values, lengths);
  475. }
  476. grads_accum_counter_[key] += 1;
  477. if (grads_accum_counter_[key] == worker_num_) {
  478. grad_accum_count_++;
  479. }
  480. if (ReadyForUpdateWeights()) {
  481. apply_grads_cv_.notify_one();
  482. }
  483. }
  484. template <typename T>
  485. WeightPtr ParameterServer<T>::weight(const Key &key) {
  486. std::unique_lock<std::mutex> lock(mutex_);
  487. if (weights_.count(key) == 0) {
  488. MS_LOG(EXCEPTION) << "Invalid weight key " << key;
  489. }
  490. WeightPtr weight_ptr = weights_[key];
  491. WeightPtr copy_weight_ptr = std::make_shared<::ps::SArray<T>>(weight_ptr->size(), 0);
  492. copy_weight_ptr->CopyFrom(weight_ptr->data(), weight_ptr->size());
  493. tokens_[key] -= 1;
  494. return copy_weight_ptr;
  495. }
  496. template <typename T>
  497. void ParameterServer<T>::DoEmbeddingLookup(Key key, const LookupIds &lookup_ids, ::ps::KVPairs<T> *res) {
  498. std::unique_lock<std::mutex> lock(mutex_);
  499. if (weights_.count(key) == 0) {
  500. MS_LOG(ERROR) << "Invalid embedding table key " << key;
  501. return;
  502. }
  503. if (embedding_lookup_ops_.count(key) == 0) {
  504. MS_LOG(ERROR) << "Invalid embedding lookup op key " << key;
  505. return;
  506. }
  507. WeightPtr table_ptr = weights_[key];
  508. std::shared_ptr<PServerKernel> table_lookup_op = embedding_lookup_ops_[key];
  509. // Update shapes of lookup operator
  510. std::shared_ptr<std::vector<std::shared_ptr<std::vector<size_t>>>> shapes =
  511. std::make_shared<std::vector<std::shared_ptr<std::vector<size_t>>>>();
  512. std::shared_ptr<std::vector<size_t>> indices_shape = std::make_shared<std::vector<size_t>>();
  513. indices_shape->emplace_back(lookup_ids.size());
  514. shapes->push_back(indices_shape);
  515. table_lookup_op->ReInit(shapes);
  516. const std::vector<size_t> output_shapes = table_lookup_op->output_sizes();
  517. std::vector<kernel::AddressPtr> inputs;
  518. AddressPtr embedding_table = std::make_shared<kernel::Address>();
  519. AddressPtr indices = std::make_shared<kernel::Address>();
  520. inputs.push_back(embedding_table);
  521. inputs.push_back(indices);
  522. embedding_table->addr = table_ptr->data();
  523. embedding_table->size = table_ptr->size() * sizeof(T);
  524. std::unique_ptr<int[]> tmp_ids(new int[lookup_ids.size()]);
  525. for (size_t i = 0; i < lookup_ids.size(); i++) {
  526. tmp_ids[i] = static_cast<int>(lookup_ids[i]);
  527. }
  528. indices->addr = tmp_ids.get();
  529. indices->size = lookup_ids.size() * sizeof(int);
  530. std::vector<kernel::AddressPtr> workspaces;
  531. std::vector<kernel::AddressPtr> outputs;
  532. AddressPtr output = std::make_shared<kernel::Address>();
  533. std::shared_ptr<Values> addr = std::make_shared<Values>(output_shapes[0] / sizeof(T), 0);
  534. output->addr = addr->data();
  535. output->size = output_shapes[0];
  536. outputs.push_back(output);
  537. table_lookup_op->Execute(inputs, workspaces, outputs);
  538. res->vals = *addr;
  539. res->lens.push_back(res->vals.size());
  540. }
  541. template <typename T>
  542. int ParameterServer<T>::SumOfShapes(const std::vector<int> &shapes) const {
  543. int sum = 1;
  544. for (auto shape : shapes) {
  545. sum *= shape;
  546. }
  547. return sum;
  548. }
  549. template <typename T>
  550. inline bool ParameterServer<T>::ReadyForUpdateWeights() {
  551. return grads_accum_counter_.size() > 0 && grad_accum_count_ == grads_accum_counter_.size();
  552. }
  553. template <typename T>
  554. inline bool ParameterServer<T>::ReadyForPush(const Key &key) {
  555. std::unique_lock<std::mutex> lock(mutex_);
  556. if (weights_.empty()) {
  557. MS_LOG(EXCEPTION) << "The weights in server is empty. Many reasons could cause this: 1.The Worker didn't send "
  558. "kInitWeightsCmd command. 2.The Server failed to initialize weights.";
  559. }
  560. return grad_accum_count_ < weights_.size() && tokens_[key] <= 0;
  561. }
  562. template <typename T>
  563. inline bool ParameterServer<T>::ReadyForPull(const Key &key) {
  564. std::unique_lock<std::mutex> lock(mutex_);
  565. if (tokens_.count(key) == 0 || weights_[key] == 0) {
  566. MS_LOG(EXCEPTION) << "Invalid weight key " << key;
  567. }
  568. return tokens_[key] > 0;
  569. }
  570. template <typename T>
  571. inline void ParameterServer<T>::ResetGradAccumCount() {
  572. grad_accum_count_ = 0;
  573. for (auto iter = grads_accum_counter_.begin(); iter != grads_accum_counter_.end(); iter++) {
  574. grads_accum_counter_[iter->first] = 0;
  575. }
  576. }
  577. template <typename T>
  578. inline std::mutex &ParameterServer<T>::mutex() {
  579. return mutex_;
  580. }
  581. template <typename T>
  582. void ParameterServer<T>::Run(const FuncGraphPtr &func_graph) {
  583. ::ps::Start(0);
  584. if (!::ps::IsServer()) {
  585. std::cout << "This is not ther Server" << std::endl;
  586. return;
  587. }
  588. Init(func_graph);
  589. thread_->join();
  590. ::ps::Finalize(0, true);
  591. exit(1);
  592. }
  593. } // namespace ps
  594. } // namespace parallel
  595. } // namespace mindspore
  596. #endif // MINDSPORE_CCSRC_FRONTEND_PARALLEL_PS_PARAMETER_SERVER_H_