You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

parameter_server.h 32 kB

5 years ago
5 years ago
5 years ago
5 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785
  1. /**
  2. * Copyright 2020 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #ifndef MINDSPORE_CCSRC_PS_PARAMETER_SERVER_H_
  17. #define MINDSPORE_CCSRC_PS_PARAMETER_SERVER_H_
  18. #include <unistd.h>
  19. #include <unordered_map>
  20. #include <string>
  21. #include <iostream>
  22. #include <memory>
  23. #include <vector>
  24. #include <mutex>
  25. #include <condition_variable>
  26. #include <thread>
  27. #include <cmath>
  28. #include <random>
  29. #include <utility>
  30. #include <list>
  31. #include <map>
  32. #include <functional>
  33. #include "ir/func_graph.h"
  34. #include "backend/session/session_basic.h"
  35. #include "backend/session/anf_runtime_algorithm.h"
  36. #include "backend/session/session_factory.h"
  37. #include "ps/common.h"
  38. #include "ps/optimizer_info.h"
  39. #include "ps/optimizer_info_builder.h"
  40. #include "ps/util.h"
  41. #include "ps/ps_context.h"
  42. #include "runtime/device/cpu/kernel_select_cpu.h"
  43. #include "utils/ms_context.h"
  44. #include "backend/kernel_compiler/kernel.h"
  45. #include "backend/kernel_compiler/cpu/cpu_kernel_factory.h"
  46. #include "backend/kernel_compiler/cpu/ps/pserver_kernel.h"
  47. #include "backend/kernel_compiler/cpu/ps/sparse_apply_adam_ps_kernel.h"
  48. #include "backend/kernel_compiler/cpu/ps/sparse_apply_lazy_adam_ps_kernel.h"
  49. #include "backend/kernel_compiler/cpu/ps/sparse_apply_ftrl_ps_kernel.h"
  50. #include "backend/kernel_compiler/cpu/ps/apply_momentum_ps_kernel.h"
  51. #include "backend/kernel_compiler/cpu/ps/embedding_look_up_ps_kernel.h"
  52. namespace mindspore {
  53. namespace ps {
  54. using mindspore::kernel::ps::PServerKernel;
  55. using AnfAlgo = session::AnfRuntimeAlgorithm;
  56. template <typename T>
  57. class ParameterServer {
  58. public:
  59. static ParameterServer &GetInstance() {
  60. static ParameterServer instance;
  61. return instance;
  62. }
  63. void Run(const FuncGraphPtr &func_graph);
  64. private:
  65. ParameterServer()
  66. : pserver_num_(0),
  67. worker_num_(0),
  68. rank_id_(0),
  69. grad_accum_count_(0),
  70. ps_(new ::ps::KVServer<T>(0)),
  71. handler_(nullptr),
  72. func_graph_(nullptr),
  73. sess_(nullptr),
  74. running_(true),
  75. thread_(nullptr) {}
  76. ~ParameterServer() = default;
  77. ParameterServer(const ParameterServer &) = delete;
  78. ParameterServer &operator=(const ParameterServer &) = delete;
  79. class ServerHandler {
  80. public:
  81. explicit ServerHandler(ParameterServer *ps) : ps_(ps) {}
  82. ~ServerHandler() = default;
  83. void Init();
  84. void operator()(const ::ps::KVMeta &req_meta, const ::ps::KVPairs<T> &req_data, ::ps::KVServer<T> *server);
  85. private:
  86. void HandlePushReq(const ::ps::KVMeta &req_meta, const ::ps::KVPairs<T> &req_data, ::ps::KVPairs<T> *res);
  87. void HandlePullReq(const ::ps::KVMeta &req_meta, const ::ps::KVPairs<T> &req_data, ::ps::KVPairs<T> *res);
  88. void HandleInitWeights(const ::ps::KVMeta &req_meta, const ::ps::KVPairs<T> &req_data, ::ps::KVPairs<T> *res);
  89. void HandleInitWeightToOptimId(const ::ps::KVMeta &req_meta, const ::ps::KVPairs<T> &req_data,
  90. ::ps::KVPairs<T> *res);
  91. void HandleInitInputsShape(const ::ps::KVMeta &req_meta, const ::ps::KVPairs<T> &req_data, ::ps::KVPairs<T> *res);
  92. void HandleInitEmbeddings(const ::ps::KVMeta &req_meta, const ::ps::KVPairs<T> &req_data, ::ps::KVPairs<T> *res);
  93. void HandleCheckReadyForPush(const ::ps::KVMeta &req_meta, const ::ps::KVPairs<T> &req_data, ::ps::KVPairs<T> *res);
  94. void HandleCheckReadyForPull(const ::ps::KVMeta &req_meta, const ::ps::KVPairs<T> &req_data, ::ps::KVPairs<T> *res);
  95. void HandleEmbeddingLookup(const ::ps::KVMeta &req_meta, const ::ps::KVPairs<T> &req_data, ::ps::KVPairs<T> *res);
  96. void HandleFinalize(const ::ps::KVMeta &req_meta, const ::ps::KVPairs<T> &req_data, ::ps::KVPairs<T> *res);
  97. ParameterServer *ps_;
  98. typedef void (ServerHandler::*RequestHandler)(const ::ps::KVMeta &req_meta, const ::ps::KVPairs<T> &req_data,
  99. ::ps::KVPairs<T> *res);
  100. std::unordered_map<int, RequestHandler> handlers_;
  101. std::unordered_map<Key, bool> init_weights_;
  102. std::unordered_map<Key, bool> init_weight_to_optim_;
  103. std::unordered_map<Key, bool> init_optim_info_;
  104. };
  105. bool Init(const FuncGraphPtr &func_graph);
  106. void InitOptimInfoBuilders();
  107. void InitWeightKeyToOptims(const Key &key, const int &optim_id);
  108. void InitOptimInputsShape(const Keys &keys, const Values &values, const Lengths &lengths);
  109. void InitWeight(const Key &key, const WeightPtr &weight);
  110. void InitGrad(const Key &key, const GradPtr &grad);
  111. void InitEmbeddingTable(const Key &key,
  112. const std::shared_ptr<std::vector<std::shared_ptr<std::vector<size_t>>>> &shapes);
  113. bool HasWeight(const Key &key);
  114. void Finalize();
  115. void UpdateWeights();
  116. void AccumGrad(const Keys &key, const Values &values, const Lengths &lengths);
  117. WeightPtr weight(const Key &key);
  118. void DoEmbeddingLookup(Key key, const LookupIds &lookup_ids, ::ps::KVPairs<T> *res);
  119. bool ReadyForUpdateWeights();
  120. bool ReadyForPush(const Key &key);
  121. bool ReadyForPull(const Key &key);
  122. void ResetGradAccumCount();
  123. const CNodePtr GetCNode(const std::string &name) const;
  124. std::mutex &mutex();
  125. void GetEmbeddingTableParamPtr();
  126. void SyncEmbeddingTables();
  127. size_t pserver_num_;
  128. size_t worker_num_;
  129. size_t rank_id_;
  130. size_t grad_accum_count_;
  131. std::unique_ptr<::ps::KVServer<T>> ps_;
  132. std::unique_ptr<ServerHandler> handler_;
  133. FuncGraphPtr func_graph_;
  134. std::shared_ptr<session::SessionBasic> sess_;
  135. bool running_;
  136. std::unordered_map<Key, std::shared_ptr<PServerKernel>> optimizers_;
  137. std::unordered_map<Key, InputsShapePtr> optim_inputs_shape_;
  138. std::unordered_map<Key, InputsShapePtr> original_optim_inputs_shape_;
  139. std::unordered_map<Key, std::shared_ptr<OptimizerInfo>> optim_infos_;
  140. std::unordered_map<std::string, std::shared_ptr<OptimizerInfoBuilder>> optim_info_builders_;
  141. std::unordered_map<Key, std::string> weight_key_to_optims_;
  142. std::unordered_map<Key, std::string> weight_key_to_optim_op_;
  143. std::unordered_map<Key, WeightPtr> weights_;
  144. std::unordered_map<Key, bool> is_embedding_;
  145. std::unordered_map<Key, WeightPtr> grads_;
  146. std::unordered_map<Key, size_t> grads_accum_counter_;
  147. std::unordered_map<Key, std::shared_ptr<PServerKernel>> embedding_lookup_ops_;
  148. std::unordered_map<Key, uint64_t> tokens_;
  149. std::mutex mutex_;
  150. std::condition_variable apply_grads_cv_;
  151. std::unique_ptr<std::thread> thread_;
  152. std::map<Key, ParameterPtr> embedding_tables_;
  153. friend class ServerHandler;
  154. };
  155. class FuncGraph;
  156. template <typename T>
  157. void ParameterServer<T>::ServerHandler::operator()(const ::ps::KVMeta &req_meta, const ::ps::KVPairs<T> &req_data,
  158. ::ps::KVServer<T> *server) {
  159. MS_EXCEPTION_IF_NULL(server);
  160. ::ps::KVPairs<T> res;
  161. if (handlers_.count(req_meta.cmd) > 0) {
  162. auto &handler_ptr = handlers_[req_meta.cmd];
  163. (this->*handler_ptr)(req_meta, req_data, &res);
  164. } else if (req_meta.push) {
  165. HandlePushReq(req_meta, req_data, &res);
  166. } else {
  167. HandlePullReq(req_meta, req_data, &res);
  168. }
  169. server->Response(req_meta, res);
  170. }
  171. template <typename T>
  172. void ParameterServer<T>::ServerHandler::Init() {
  173. handlers_[kInitWeightsCmd] = &ServerHandler::HandleInitWeights;
  174. handlers_[kInitWeightToOptimIdCmd] = &ServerHandler::HandleInitWeightToOptimId;
  175. handlers_[kInitOptimInputsShapeCmd] = &ServerHandler::HandleInitInputsShape;
  176. handlers_[kInitEmbeddingsCmd] = &ServerHandler::HandleInitEmbeddings;
  177. handlers_[kCheckReadyForPushCmd] = &ServerHandler::HandleCheckReadyForPush;
  178. handlers_[kCheckReadyForPullCmd] = &ServerHandler::HandleCheckReadyForPull;
  179. handlers_[kEmbeddingLookupCmd] = &ServerHandler::HandleEmbeddingLookup;
  180. handlers_[kFinalizeCmd] = &ServerHandler::HandleFinalize;
  181. }
  182. template <typename T>
  183. void ParameterServer<T>::ServerHandler::HandlePushReq(const ::ps::KVMeta &req_meta, const ::ps::KVPairs<T> &req_data,
  184. ::ps::KVPairs<T> *res) {
  185. MS_EXCEPTION_IF_NULL(res);
  186. ps_->AccumGrad(req_data.keys, req_data.vals, req_data.lens);
  187. }
  188. template <typename T>
  189. void ParameterServer<T>::ServerHandler::HandlePullReq(const ::ps::KVMeta &req_meta, const ::ps::KVPairs<T> &req_data,
  190. ::ps::KVPairs<T> *res) {
  191. MS_EXCEPTION_IF_NULL(res);
  192. res->keys = req_data.keys;
  193. ::ps::Key key = req_data.keys[0];
  194. res->vals = *(ps_->weight(key));
  195. }
  196. template <typename T>
  197. void ParameterServer<T>::ServerHandler::HandleInitWeights(const ::ps::KVMeta &req_meta,
  198. const ::ps::KVPairs<T> &req_data, ::ps::KVPairs<T> *res) {
  199. std::unique_lock<std::mutex> lock(ps_->mutex());
  200. MS_EXCEPTION_IF_NULL(res);
  201. size_t key_num = req_data.keys.size();
  202. T *data_ptr = req_data.vals.data();
  203. size_t pos = 0;
  204. for (size_t i = 0; i < key_num; i++) {
  205. Key key = req_data.keys[i];
  206. size_t data_len = req_data.lens.size() != key_num ? req_data.vals.size() / key_num : req_data.lens[i];
  207. if (!ps_->HasWeight(key)) {
  208. WeightPtr weight_ptr = std::make_shared<::ps::SArray<T>>();
  209. MS_EXCEPTION_IF_NULL(weight_ptr);
  210. weight_ptr->CopyFrom(data_ptr + pos, data_len);
  211. ps_->InitWeight(key, weight_ptr);
  212. GradPtr grad_ptr = std::make_shared<::ps::SArray<T>>(data_len, 0);
  213. MS_EXCEPTION_IF_NULL(grad_ptr);
  214. ps_->InitGrad(key, grad_ptr);
  215. }
  216. pos += data_len;
  217. }
  218. }
  219. template <typename T>
  220. void ParameterServer<T>::ServerHandler::HandleInitWeightToOptimId(const ::ps::KVMeta &req_meta,
  221. const ::ps::KVPairs<T> &req_data,
  222. ::ps::KVPairs<T> *res) {
  223. std::unique_lock<std::mutex> lock(ps_->mutex());
  224. MS_EXCEPTION_IF_NULL(res);
  225. size_t key_num = req_data.keys.size();
  226. for (size_t i = 0; i < key_num; i++) {
  227. Key key = req_data.keys[i];
  228. T val = req_data.vals[i];
  229. if (init_weight_to_optim_[key]) {
  230. continue;
  231. } else {
  232. init_weight_to_optim_[key] = true;
  233. }
  234. ps_->InitWeightKeyToOptims(key, val);
  235. }
  236. }
  237. template <typename T>
  238. void ParameterServer<T>::ServerHandler::HandleInitInputsShape(const ::ps::KVMeta &req_meta,
  239. const ::ps::KVPairs<T> &req_data, ::ps::KVPairs<T> *res) {
  240. std::unique_lock<std::mutex> lock(ps_->mutex());
  241. MS_EXCEPTION_IF_NULL(res);
  242. const Key &key = req_data.keys[0];
  243. if (init_optim_info_[key]) {
  244. return;
  245. } else {
  246. init_optim_info_[key] = true;
  247. }
  248. ps_->InitOptimInputsShape(req_data.keys, req_data.vals, req_data.lens);
  249. }
  250. template <typename T>
  251. void ParameterServer<T>::ServerHandler::HandleInitEmbeddings(const ::ps::KVMeta &req_meta,
  252. const ::ps::KVPairs<T> &req_data, ::ps::KVPairs<T> *res) {
  253. std::unique_lock<std::mutex> lock(ps_->mutex());
  254. MS_EXCEPTION_IF_NULL(res);
  255. const Key &key = req_data.keys[0];
  256. MS_LOG(INFO) << "Initializing embedding table for key:" << key;
  257. std::shared_ptr<std::vector<std::shared_ptr<std::vector<size_t>>>> shapes =
  258. std::make_shared<std::vector<std::shared_ptr<std::vector<size_t>>>>();
  259. MS_EXCEPTION_IF_NULL(shapes);
  260. std::shared_ptr<std::vector<size_t>> input_shape = std::make_shared<std::vector<size_t>>();
  261. MS_EXCEPTION_IF_NULL(input_shape);
  262. std::shared_ptr<std::vector<size_t>> indices_shape = std::make_shared<std::vector<size_t>>();
  263. MS_EXCEPTION_IF_NULL(indices_shape);
  264. std::shared_ptr<std::vector<size_t>> output_shape = std::make_shared<std::vector<size_t>>();
  265. MS_EXCEPTION_IF_NULL(output_shape);
  266. shapes->push_back(input_shape);
  267. shapes->push_back(indices_shape);
  268. shapes->push_back(output_shape);
  269. const Lengths &lens = req_data.lens;
  270. size_t index = 0;
  271. for (int i = 0; i < lens[0]; i++) {
  272. input_shape->push_back(static_cast<size_t>(req_data.vals[index++]));
  273. }
  274. for (int j = 0; j < lens[1]; j++) {
  275. indices_shape->push_back(static_cast<size_t>(req_data.vals[index++]));
  276. }
  277. for (int k = 0; k < lens[2]; k++) {
  278. output_shape->push_back(static_cast<size_t>(req_data.vals[index++]));
  279. }
  280. ps_->InitEmbeddingTable(key, shapes);
  281. }
  282. template <typename T>
  283. void ParameterServer<T>::ServerHandler::HandleCheckReadyForPush(const ::ps::KVMeta &req_meta,
  284. const ::ps::KVPairs<T> &req_data,
  285. ::ps::KVPairs<T> *res) {
  286. MS_EXCEPTION_IF_NULL(res);
  287. const Key &key = req_data.keys[0];
  288. bool ready = ps_->ReadyForPush(key);
  289. res->keys.push_back(key);
  290. res->vals.push_back(ready);
  291. }
  292. template <typename T>
  293. void ParameterServer<T>::ServerHandler::HandleCheckReadyForPull(const ::ps::KVMeta &req_meta,
  294. const ::ps::KVPairs<T> &req_data,
  295. ::ps::KVPairs<T> *res) {
  296. MS_EXCEPTION_IF_NULL(res);
  297. const Key &key = req_data.keys[0];
  298. bool ready = ps_->ReadyForPull(key);
  299. res->keys.push_back(key);
  300. res->vals.push_back(ready);
  301. }
  302. template <typename T>
  303. void ParameterServer<T>::ServerHandler::HandleEmbeddingLookup(const ::ps::KVMeta &req_meta,
  304. const ::ps::KVPairs<T> &req_data, ::ps::KVPairs<T> *res) {
  305. MS_EXCEPTION_IF_NULL(res);
  306. const Key &key = req_data.keys[0];
  307. for (size_t i = 1; i < req_data.keys.size(); i++) {
  308. res->keys.push_back(req_data.keys[i]);
  309. }
  310. ps_->DoEmbeddingLookup(key, req_data.keys.segment(1, req_data.keys.size()), res);
  311. }
  312. template <typename T>
  313. void ParameterServer<T>::ServerHandler::HandleFinalize(const ::ps::KVMeta &req_meta, const ::ps::KVPairs<T> &req_data,
  314. ::ps::KVPairs<T> *res) {
  315. MS_EXCEPTION_IF_NULL(res);
  316. ps_->Finalize();
  317. }
  318. template <typename T>
  319. bool ParameterServer<T>::Init(const FuncGraphPtr &func_graph) {
  320. pserver_num_ = ::ps::NumServers();
  321. worker_num_ = ::ps::NumWorkers();
  322. func_graph_ = func_graph;
  323. rank_id_ = ::ps::MyRank();
  324. handler_.reset(new ServerHandler(this));
  325. handler_->Init();
  326. InitOptimInfoBuilders();
  327. ps_->set_request_handle(*handler_);
  328. thread_.reset(new std::thread(&ParameterServer::UpdateWeights, this));
  329. GetEmbeddingTableParamPtr();
  330. return true;
  331. }
  332. template <typename T>
  333. void ParameterServer<T>::InitOptimInfoBuilders() {
  334. std::shared_ptr<OptimizerInfoBuilder> momentum_info_builder = std::make_shared<MomentumOptimInfoBuilder>(worker_num_);
  335. std::shared_ptr<OptimizerInfoBuilder> sparse_adam_info_builder =
  336. std::make_shared<SparseAdamOptimInfoBuilder>(worker_num_);
  337. std::shared_ptr<OptimizerInfoBuilder> sparse_ftrl_info_builder =
  338. std::make_shared<SparseFtrlOptimInfoBuilder>(worker_num_);
  339. optim_info_builders_[kApplyMomentum] = momentum_info_builder;
  340. optim_info_builders_[kSparseAdam] = sparse_adam_info_builder;
  341. optim_info_builders_[kSparseFtrl] = sparse_ftrl_info_builder;
  342. }
  343. template <typename T>
  344. void ParameterServer<T>::InitWeightKeyToOptims(const Key &key, const int &optim_id) {
  345. if (weight_key_to_optims_.count(key) > 0 || Util::optimizer_name(optim_id) == "") {
  346. return;
  347. }
  348. weight_key_to_optims_[key] = Util::optimizer_name(optim_id);
  349. weight_key_to_optim_op_[key] = Util::optimizer_node_name(optim_id);
  350. MS_LOG(INFO) << "Initializing optimizer id for key:" << key << ", optimizer name:" << weight_key_to_optims_[key]
  351. << ", optimizer op name:" << weight_key_to_optim_op_[key];
  352. }
  353. template <typename T>
  354. void ParameterServer<T>::InitOptimInputsShape(const Keys &keys, const Values &values, const Lengths &lengths) {
  355. InputsShapePtr inputs_shape = std::make_shared<InputsShape>();
  356. MS_EXCEPTION_IF_NULL(inputs_shape);
  357. InputsShapePtr original_inputs_shape = std::make_shared<InputsShape>();
  358. MS_EXCEPTION_IF_NULL(original_inputs_shape);
  359. int val_idx = 0;
  360. const Key &key = keys[0];
  361. MS_LOG(INFO) << "Initializing optimizer inputs shape for key:" << key;
  362. if (optim_inputs_shape_.count(key) == 0) {
  363. original_optim_inputs_shape_[key] = original_inputs_shape;
  364. optim_inputs_shape_[key] = inputs_shape;
  365. }
  366. for (size_t i = 0; i < keys.size(); i++) {
  367. auto shape = std::make_shared<std::vector<size_t>>();
  368. MS_EXCEPTION_IF_NULL(shape);
  369. auto original_shape = std::make_shared<std::vector<size_t>>();
  370. MS_EXCEPTION_IF_NULL(original_shape);
  371. inputs_shape->push_back(shape);
  372. original_inputs_shape->push_back(original_shape);
  373. for (int j = 0; j < lengths[i]; j++) {
  374. shape->push_back(values[val_idx]);
  375. original_shape->push_back(values[val_idx++]);
  376. }
  377. }
  378. if (weight_key_to_optims_.count(key) > 0) {
  379. const std::string &optim_name = weight_key_to_optims_[key];
  380. const std::string &optim_op_name = weight_key_to_optim_op_[key];
  381. if (optimizers_.count(key) == 0 && optim_inputs_shape_.count(key) > 0) {
  382. const CNodePtr cnode = GetCNode(optim_op_name);
  383. MS_EXCEPTION_IF_NULL(cnode);
  384. if (optim_name == kSparseAdam) {
  385. std::shared_ptr<PServerKernel> optimizer =
  386. std::make_shared<kernel::ps::SparseApplyAdamPSKernel>(rank_id_, pserver_num_, worker_num_);
  387. optimizer->InitKernel(cnode, optim_inputs_shape_[key]);
  388. optimizers_[key] = optimizer;
  389. } else if (optim_name == kSparseLazyAdam) {
  390. std::shared_ptr<PServerKernel> optimizer =
  391. std::make_shared<kernel::ps::SparseApplyLazyAdamPSKernel>(rank_id_, pserver_num_, worker_num_);
  392. optimizer->InitKernel(cnode, optim_inputs_shape_[key]);
  393. optimizers_[key] = optimizer;
  394. } else if (optim_name == kApplyMomentum) {
  395. std::shared_ptr<PServerKernel> optimizer =
  396. std::make_shared<kernel::ps::ApplyMomentumPSKernel>(rank_id_, pserver_num_, worker_num_);
  397. optimizer->InitKernel(cnode, optim_inputs_shape_[key]);
  398. optimizers_[key] = optimizer;
  399. } else if (optim_name == kSparseFtrl) {
  400. std::shared_ptr<PServerKernel> optimizer =
  401. std::make_shared<kernel::ps::SparseApplyFtrlPSKernel>(rank_id_, pserver_num_, worker_num_);
  402. optimizer->InitKernel(cnode, optim_inputs_shape_[key]);
  403. optimizers_[key] = optimizer;
  404. }
  405. }
  406. }
  407. }
  408. template <typename T>
  409. const CNodePtr ParameterServer<T>::GetCNode(const std::string &name) const {
  410. std::list<CNodePtr> cnodes = func_graph_->GetOrderedCnodes();
  411. for (CNodePtr cnode : cnodes) {
  412. MS_EXCEPTION_IF_NULL(cnode);
  413. std::string fullname = cnode->fullname_with_scope();
  414. if (fullname.find(name) != std::string::npos && fullname.find("Push") != std::string::npos) {
  415. return cnode;
  416. }
  417. }
  418. return nullptr;
  419. }
  420. template <typename T>
  421. void ParameterServer<T>::InitWeight(const Key &key, const WeightPtr &weight) {
  422. MS_EXCEPTION_IF_NULL(weight);
  423. if ((weights_.count(key) == 0) || (is_embedding_[key] && weights_.count(key) != 0)) {
  424. MS_LOG(INFO) << "Initializing weight for key " << key << ", server rank " << rank_id_;
  425. weights_[key] = weight;
  426. tokens_[key] = 0;
  427. is_embedding_[key] = false;
  428. }
  429. }
  430. template <typename T>
  431. void ParameterServer<T>::InitGrad(const Key &key, const GradPtr &grad) {
  432. MS_EXCEPTION_IF_NULL(grad);
  433. if (grads_.count(key) == 0) {
  434. grads_[key] = grad;
  435. grads_accum_counter_[key] = 0;
  436. }
  437. }
  438. template <typename T>
  439. void ParameterServer<T>::InitEmbeddingTable(
  440. const Key &key, const std::shared_ptr<std::vector<std::shared_ptr<std::vector<size_t>>>> &shapes) {
  441. MS_EXCEPTION_IF_NULL(shapes);
  442. if (weights_.count(key) == 0) {
  443. std::shared_ptr<PServerKernel> lookup =
  444. std::make_shared<kernel::ps::EmbeddingLookUpPSKernel>(rank_id_, pserver_num_, worker_num_);
  445. lookup->InitKernel(shapes);
  446. embedding_lookup_ops_[key] = lookup;
  447. // Init embedding weight
  448. const std::vector<size_t> &input_shapes = lookup->input_sizes();
  449. size_t total_dims =
  450. std::accumulate(input_shapes.begin(), input_shapes.end(), IntToSize(1), std::multiplies<size_t>());
  451. WeightPtr embedding = std::make_shared<Weight>(total_dims, 0);
  452. MS_EXCEPTION_IF_NULL(embedding);
  453. T *embedding_data = embedding->data();
  454. std::default_random_engine engine;
  455. std::normal_distribution<float> random(0, 0.01);
  456. for (size_t i = 0; i < total_dims; i++) {
  457. embedding_data[i] = random(engine);
  458. }
  459. weights_[key] = embedding;
  460. tokens_[key] = 0;
  461. is_embedding_[key] = true;
  462. grads_accum_counter_[key] = 0;
  463. }
  464. }
  465. template <typename T>
  466. bool ParameterServer<T>::HasWeight(const Key &key) {
  467. return (weights_.count(key) > 0 && !is_embedding_.count(key));
  468. }
  469. template <typename T>
  470. void ParameterServer<T>::Finalize() {
  471. running_ = false;
  472. apply_grads_cv_.notify_one();
  473. SyncEmbeddingTables();
  474. }
  475. template <typename T>
  476. void ParameterServer<T>::UpdateWeights() {
  477. while (true) {
  478. std::unique_lock<std::mutex> lock(mutex_);
  479. apply_grads_cv_.wait(lock, [this] { return this->ReadyForUpdateWeights() || !running_; });
  480. if (!running_) {
  481. break;
  482. }
  483. for (auto iter = weights_.begin(); iter != weights_.end(); iter++) {
  484. Key key = iter->first;
  485. WeightPtr weight_ptr = iter->second;
  486. std::shared_ptr<PServerKernel> optimizer = nullptr;
  487. if (weight_key_to_optims_.count(key) > 0) {
  488. optimizer = optimizers_[key];
  489. }
  490. MS_EXCEPTION_IF_NULL(optimizer);
  491. std::shared_ptr<OptimizerInfo> optim_info = optim_infos_[key];
  492. if (optim_info != nullptr) {
  493. const std::vector<kernel::AddressPtr> &inputs = optim_info->inputs();
  494. const std::vector<kernel::AddressPtr> &workspaces = optim_info->workspaces();
  495. const std::vector<kernel::AddressPtr> &outputs = optim_info->outputs();
  496. std::vector<std::vector<size_t>> shapes = {};
  497. std::vector<size_t> indices_shape = {};
  498. indices_shape.emplace_back(optim_info->indice_size());
  499. shapes.push_back(indices_shape);
  500. if (original_optim_inputs_shape_.count(key) != 0) {
  501. for (auto input_shapes : *(original_optim_inputs_shape_[key])) {
  502. shapes.push_back(*input_shapes);
  503. }
  504. }
  505. optimizer->ReInit(shapes);
  506. optim_info->ComputeMean(shapes, worker_num_, pserver_num_, rank_id_);
  507. optimizer->Execute(inputs, workspaces, outputs);
  508. optim_info->Reset();
  509. }
  510. if (!is_embedding_[key]) {
  511. tokens_[key] = worker_num_;
  512. }
  513. }
  514. ResetGradAccumCount();
  515. }
  516. }
  517. template <typename T>
  518. void ParameterServer<T>::AccumGrad(const Keys &keys, const Values &values, const Lengths &lengths) {
  519. std::unique_lock<std::mutex> lock(mutex_);
  520. const Key &key = keys[0];
  521. bool no_sparse_grad = values.size() == 1 && values[0] == -100;
  522. if (!no_sparse_grad) {
  523. std::shared_ptr<OptimizerInfo> optim_info = optim_infos_[key];
  524. // Create or update the optimizer info
  525. if (optim_info == nullptr) {
  526. const std::shared_ptr<OptimizerInfoBuilder> &builder = optim_info_builders_[weight_key_to_optims_[key]];
  527. std::shared_ptr<kernel::ps::PServerKernel> pserver_kernel = optimizers_[key];
  528. if (pserver_kernel == nullptr) {
  529. MS_LOG(EXCEPTION) << "no optimizer found for key " << key << " optim name " << weight_key_to_optims_[key];
  530. }
  531. MS_EXCEPTION_IF_NULL(pserver_kernel);
  532. OptimizerInfo *optim =
  533. builder->Build(pserver_kernel, weights_[key], keys, values, lengths, optim_inputs_shape_[key], worker_num_);
  534. optim_info.reset(optim);
  535. optim_infos_[key] = optim_info;
  536. } else {
  537. optim_info->Update(values, lengths);
  538. optim_info->Accumulate(values, lengths);
  539. }
  540. }
  541. grads_accum_counter_[key] += 1;
  542. if (grads_accum_counter_[key] == worker_num_) {
  543. grad_accum_count_++;
  544. }
  545. if (ReadyForUpdateWeights()) {
  546. apply_grads_cv_.notify_one();
  547. }
  548. }
  549. template <typename T>
  550. WeightPtr ParameterServer<T>::weight(const Key &key) {
  551. std::unique_lock<std::mutex> lock(mutex_);
  552. if (weights_.count(key) == 0) {
  553. MS_LOG(EXCEPTION) << "Invalid weight key " << key;
  554. }
  555. WeightPtr weight_ptr = weights_[key];
  556. MS_EXCEPTION_IF_NULL(weight_ptr);
  557. WeightPtr copy_weight_ptr = std::make_shared<::ps::SArray<T>>(weight_ptr->size(), 0);
  558. MS_EXCEPTION_IF_NULL(copy_weight_ptr);
  559. copy_weight_ptr->CopyFrom(weight_ptr->data(), weight_ptr->size());
  560. tokens_[key] -= 1;
  561. return copy_weight_ptr;
  562. }
  563. template <typename T>
  564. void ParameterServer<T>::DoEmbeddingLookup(Key key, const LookupIds &lookup_ids, ::ps::KVPairs<T> *res) {
  565. std::unique_lock<std::mutex> lock(mutex_);
  566. MS_EXCEPTION_IF_NULL(res);
  567. if (weights_.count(key) == 0) {
  568. MS_LOG(ERROR) << "Invalid embedding table key " << key;
  569. return;
  570. }
  571. if (embedding_lookup_ops_.count(key) == 0) {
  572. MS_LOG(ERROR) << "Invalid embedding lookup op key " << key;
  573. return;
  574. }
  575. WeightPtr table_ptr = weights_[key];
  576. MS_EXCEPTION_IF_NULL(table_ptr);
  577. std::shared_ptr<PServerKernel> table_lookup_op = embedding_lookup_ops_[key];
  578. MS_EXCEPTION_IF_NULL(table_lookup_op);
  579. // Update shapes of lookup operator
  580. std::vector<std::vector<size_t>> shapes = {};
  581. std::vector<size_t> indices_shape = {};
  582. indices_shape.emplace_back(lookup_ids.size());
  583. shapes.push_back(indices_shape);
  584. table_lookup_op->ReInit(shapes);
  585. const std::vector<size_t> output_shapes = table_lookup_op->output_sizes();
  586. std::vector<kernel::AddressPtr> inputs;
  587. AddressPtr embedding_table = std::make_shared<kernel::Address>();
  588. MS_EXCEPTION_IF_NULL(embedding_table);
  589. AddressPtr indices = std::make_shared<kernel::Address>();
  590. MS_EXCEPTION_IF_NULL(indices);
  591. inputs.push_back(embedding_table);
  592. inputs.push_back(indices);
  593. embedding_table->addr = table_ptr->data();
  594. embedding_table->size = table_ptr->size() * sizeof(T);
  595. std::unique_ptr<int[]> tmp_ids(new int[lookup_ids.size()]);
  596. MS_EXCEPTION_IF_NULL(tmp_ids);
  597. for (size_t i = 0; i < lookup_ids.size(); i++) {
  598. tmp_ids[i] = static_cast<int>(lookup_ids[i]);
  599. }
  600. indices->addr = tmp_ids.get();
  601. indices->size = lookup_ids.size() * sizeof(int);
  602. std::vector<kernel::AddressPtr> workspaces;
  603. std::vector<kernel::AddressPtr> outputs;
  604. AddressPtr output = std::make_shared<kernel::Address>();
  605. MS_EXCEPTION_IF_NULL(output);
  606. std::shared_ptr<Values> addr = std::make_shared<Values>(output_shapes[0] / sizeof(T), 0);
  607. MS_EXCEPTION_IF_NULL(addr);
  608. output->addr = addr->data();
  609. output->size = output_shapes[0];
  610. outputs.push_back(output);
  611. table_lookup_op->Execute(inputs, workspaces, outputs);
  612. res->vals = *addr;
  613. res->lens.push_back(res->vals.size());
  614. }
  615. template <typename T>
  616. inline bool ParameterServer<T>::ReadyForUpdateWeights() {
  617. return grads_accum_counter_.size() > 0 && grad_accum_count_ == grads_accum_counter_.size();
  618. }
  619. template <typename T>
  620. inline bool ParameterServer<T>::ReadyForPush(const Key &key) {
  621. std::unique_lock<std::mutex> lock(mutex_);
  622. if (weights_.empty()) {
  623. MS_LOG(EXCEPTION) << "The weights in server is empty. Many reasons could cause this: 1.The Worker didn't send "
  624. "kInitWeightsCmd command. 2.The Server failed to initialize weights.";
  625. }
  626. return grad_accum_count_ < weights_.size() && tokens_[key] <= 0;
  627. }
  628. template <typename T>
  629. inline bool ParameterServer<T>::ReadyForPull(const Key &key) {
  630. std::unique_lock<std::mutex> lock(mutex_);
  631. if (tokens_.count(key) == 0 || weights_[key] == 0) {
  632. MS_LOG(EXCEPTION) << "Invalid weight key " << key;
  633. }
  634. return tokens_[key] > 0;
  635. }
  636. template <typename T>
  637. inline void ParameterServer<T>::ResetGradAccumCount() {
  638. grad_accum_count_ = 0;
  639. for (auto iter = grads_accum_counter_.begin(); iter != grads_accum_counter_.end(); iter++) {
  640. grads_accum_counter_[iter->first] = 0;
  641. }
  642. }
  643. template <typename T>
  644. inline std::mutex &ParameterServer<T>::mutex() {
  645. return mutex_;
  646. }
  647. template <typename T>
  648. void ParameterServer<T>::GetEmbeddingTableParamPtr() {
  649. MS_EXCEPTION_IF_NULL(func_graph_);
  650. auto cnodes = func_graph_->GetOrderedCnodes();
  651. Key count = 0;
  652. for (auto cnode : cnodes) {
  653. MS_EXCEPTION_IF_NULL(cnode);
  654. std::string cnode_name = AnfAlgo::GetCNodeName(cnode);
  655. if (cnode_name == kEmbeddingLookupOpName) {
  656. auto embedding_table = AnfAlgo::GetInputNode(cnode, 0);
  657. MS_EXCEPTION_IF_NULL(embedding_table);
  658. MS_LOG(INFO) << "Embedding table name is " << embedding_table->fullname_with_scope() << ", key is " << count;
  659. embedding_tables_.insert(std::make_pair(count, embedding_table->cast<ParameterPtr>()));
  660. count++;
  661. }
  662. }
  663. }
  664. template <typename T>
  665. void ParameterServer<T>::SyncEmbeddingTables() {
  666. for (auto embedding_table : embedding_tables_) {
  667. Key key = embedding_table.first;
  668. if (embedding_lookup_ops_.count(key) == 0) {
  669. MS_LOG(WARNING) << "Can't find look up PS kernel for key " << key;
  670. continue;
  671. }
  672. auto lookup = embedding_lookup_ops_[key];
  673. const std::vector<size_t> &input_shapes = lookup->input_sizes();
  674. std::vector<int> new_tensor_shape(input_shapes.begin(), input_shapes.end());
  675. tensor::TensorPtr new_tensor = std::make_shared<tensor::Tensor>(kNumberTypeFloat32, new_tensor_shape);
  676. MS_EXCEPTION_IF_NULL(new_tensor);
  677. float *new_tensor_data_ptr = reinterpret_cast<float *>(new_tensor->data_c());
  678. size_t new_tensor_size = static_cast<size_t>(new_tensor->data().nbytes());
  679. size_t embedding_table_size = weights_[key]->size() * sizeof(float);
  680. if (new_tensor_size != embedding_table_size) {
  681. MS_LOG(EXCEPTION) << "Shape of embedding table can't match. New tensor size:" << new_tensor_size
  682. << ", embedding_table size:" << embedding_table_size;
  683. }
  684. MS_EXCEPTION_IF_NULL(new_tensor_data_ptr);
  685. MS_EXCEPTION_IF_NULL(weights_[key]->data());
  686. int ret = memcpy_s(new_tensor_data_ptr, new_tensor_size, weights_[key]->data(), embedding_table_size);
  687. if (ret != 0) {
  688. MS_LOG(EXCEPTION) << "memcpy_s error, errorno(" << ret << ")";
  689. return;
  690. }
  691. auto paramter_tensor_ptr = embedding_table.second->default_param();
  692. MS_EXCEPTION_IF_NULL(paramter_tensor_ptr);
  693. paramter_tensor_ptr->cast<tensor::TensorPtr>()->AssignValue(*new_tensor);
  694. }
  695. }
  696. template <typename T>
  697. void ParameterServer<T>::Run(const FuncGraphPtr &func_graph) {
  698. MS_EXCEPTION_IF_NULL(func_graph);
  699. MS_LOG(INFO) << "PServer starts connecting to scheduler and workers...";
  700. ::ps::Start(0);
  701. MS_LOG(INFO) << "PServer connected successfully.";
  702. if (!::ps::IsServer()) {
  703. std::cout << "This is not ther Server" << std::endl;
  704. return;
  705. }
  706. Init(func_graph);
  707. PSContext::instance()->SetPSRankId(rank_id_);
  708. thread_->join();
  709. MS_LOG(INFO) << "PServer finished updating models, starts finalizing...";
  710. ::ps::Finalize(0, true);
  711. MS_LOG(INFO) << "PServer finalized successfully.";
  712. }
  713. } // namespace ps
  714. } // namespace mindspore
  715. #endif // MINDSPORE_CCSRC_PS_PARAMETER_SERVER_H_