You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

optimizer_info.cc 14 kB

5 years ago
5 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369
  1. /**
  2. * Copyright 2020 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "ps/optimizer_info.h"
  17. #include <map>
  18. #include <memory>
  19. #include <string>
  20. #include <functional>
  21. #include "ps/util.h"
  22. namespace mindspore {
  23. namespace ps {
  24. void OptimizerInfo::AddWorkspace(const AddressPtr &workspace) { workspaces_.push_back(workspace); }
  25. const std::vector<AddressPtr> &OptimizerInfo::inputs() { return inputs_; }
  26. const std::vector<AddressPtr> &OptimizerInfo::workspaces() { return workspaces_; }
  27. const std::vector<AddressPtr> &OptimizerInfo::outputs() { return outputs_; }
  28. bool OptimizerInfo::IsSparse() const { return false; }
  29. const size_t OptimizerInfo::indice_size() const { return 0; }
  30. size_t OptimizerInfo::grad_index() { return 0; }
  31. size_t OptimizerInfo::indices_index() { return 0; }
  32. template <typename T>
  33. void OptimizerInfo::UpdateOptimInputValue(const std::string &optim_type, const std::string &input_name, void *data,
  34. const Lengths &lens) {
  35. if (kOptimToOriginIdx.count(optim_type) == 0 || kOptimToPSSendIdx.count(optim_type) == 0) {
  36. MS_LOG(EXCEPTION) << "Optimizer type " << optim_type << " in not supported.";
  37. }
  38. const OptimOriginIdx &origin_input_map = kOptimToOriginIdx.at(optim_type);
  39. const OptimPSSendIdx &ps_send_index_map = kOptimToPSSendIdx.at(optim_type);
  40. if (ps_send_index_map.count(input_name) == 0 || origin_input_map.count(input_name) == 0) {
  41. MS_LOG(EXCEPTION) << "Optimizer " << optim_type << " has no input for " << input_name;
  42. }
  43. size_t origin_index = origin_input_map.at(input_name);
  44. size_t ps_send_index = ps_send_index_map.at(input_name);
  45. if (ps_send_index > lens.size() || origin_index > inputs_.size()) {
  46. MS_LOG(EXCEPTION) << "Index is out of bound for optimizer " << optim_type << ", origin_index:" << origin_index
  47. << ", ps_send_index:" << ps_send_index;
  48. }
  49. EXC_IF_VEC_IDX_OOB(lens, ps_send_index);
  50. size_t size = lens[ps_send_index] * sizeof(T);
  51. size_t offset = std::accumulate(lens.begin(), lens.begin() + ps_send_index, 0, std::plus<int>());
  52. AddressPtr optim_input = inputs_[origin_index];
  53. MS_EXCEPTION_IF_NULL(optim_input);
  54. void *dst_data = optim_input->addr;
  55. T *src_data = reinterpret_cast<T *>(data) + offset;
  56. MS_EXCEPTION_IF_NULL(dst_data);
  57. MS_EXCEPTION_IF_NULL(src_data);
  58. int ret = memcpy_s(optim_input->addr, optim_input->size, src_data, size);
  59. if (ret != 0) {
  60. MS_LOG(EXCEPTION) << "memcpy_s error, errorno(" << ret << ")";
  61. return;
  62. }
  63. return;
  64. }
  65. void DenseOptimInfo::Accumulate(const Values &values, const Lengths &lengths) {
  66. MS_EXCEPTION_IF_NULL(gradient()->addr);
  67. float *accum_grad_data = reinterpret_cast<float *>(gradient()->addr);
  68. size_t size = gradient()->size / sizeof(float);
  69. size_t grad_index = this->grad_index();
  70. size_t grad_offset = 0;
  71. for (size_t i = 0; i < grad_index; i++) {
  72. grad_offset += lengths[i];
  73. }
  74. float *grad_data = values.data() + grad_offset;
  75. CHECK_EQ(size, static_cast<size_t>(lengths[grad_index]));
  76. for (size_t i = 0; i < size; i++) {
  77. accum_grad_data[i] += grad_data[i];
  78. }
  79. }
  80. void DenseOptimInfo::ComputeMean(const std::vector<std::vector<size_t>> &, size_t n, size_t, size_t) {
  81. if (n > 1) {
  82. float *accum_grad_data = reinterpret_cast<float *>(gradient()->addr);
  83. size_t size = gradient()->size / sizeof(float);
  84. for (size_t i = 0; i < size; i++) {
  85. accum_grad_data[i] /= n;
  86. }
  87. }
  88. }
  89. void DenseOptimInfo::Reset() {
  90. MS_EXCEPTION_IF_NULL(gradient()->addr);
  91. int ret = memset_s(gradient()->addr, gradient()->size, 0x00, gradient()->size);
  92. if (ret != 0) {
  93. MS_LOG(EXCEPTION) << "memset_s error, errorno(" << ret << ")";
  94. return;
  95. }
  96. }
  97. void SparseOptimInfo::Accumulate(const Values &values, const Lengths &lengths) {
  98. // Append grad data to the end
  99. float *accum_grad_data = reinterpret_cast<float *>(gradient()->addr);
  100. MS_EXCEPTION_IF_NULL(accum_grad_data);
  101. size_t grad_index = this->grad_index();
  102. size_t grad_offset = 0;
  103. for (size_t i = 0; i < grad_index; i++) {
  104. grad_offset += lengths[i];
  105. }
  106. float *incr_grad_data = values.data() + grad_offset;
  107. MS_EXCEPTION_IF_NULL(incr_grad_data);
  108. size_t incr_grad_size = lengths[grad_index] * sizeof(float);
  109. size_t dst_size = incr_grad_size;
  110. size_t src_size = incr_grad_size;
  111. void *dst_data = accum_grad_data + grads_offset_;
  112. void *src_data = incr_grad_data;
  113. MS_EXCEPTION_IF_NULL(dst_data);
  114. MS_EXCEPTION_IF_NULL(src_data);
  115. auto ret = memcpy_s(dst_data, dst_size, src_data, src_size);
  116. if (ret != 0) {
  117. MS_LOG(EXCEPTION) << "memcpy_s error, errorno(" << ret << ")";
  118. return;
  119. }
  120. grads_offset_ += lengths[grad_index];
  121. gradient()->size += incr_grad_size;
  122. // Append indice data to the end
  123. int *accum_indices_data = reinterpret_cast<int *>(indices()->addr);
  124. MS_EXCEPTION_IF_NULL(accum_indices_data);
  125. size_t indices_index = this->indices_index();
  126. size_t indice_offset = 0;
  127. for (size_t i = 0; i < indices_index; i++) {
  128. indice_offset += lengths[i];
  129. }
  130. int *incr_indice_data = reinterpret_cast<int *>(values.data()) + indice_offset;
  131. MS_EXCEPTION_IF_NULL(incr_indice_data);
  132. size_t incr_indice_size = lengths[indices_index];
  133. size_t incr_indice_data_size = incr_indice_size * sizeof(int);
  134. dst_size = incr_indice_data_size;
  135. src_size = incr_indice_data_size;
  136. dst_data = accum_indices_data + indices_offset_;
  137. src_data = incr_indice_data;
  138. MS_EXCEPTION_IF_NULL(dst_data);
  139. MS_EXCEPTION_IF_NULL(src_data);
  140. auto ret2 = memcpy_s(dst_data, dst_size, src_data, src_size);
  141. if (ret2 != 0) {
  142. MS_LOG(EXCEPTION) << "memcpy_s error, errorno(" << ret2 << ")";
  143. return;
  144. }
  145. indices_offset_ += lengths[indices_index];
  146. indices()->size += incr_indice_data_size;
  147. }
  148. void SparseOptimInfo::ComputeMean(const std::vector<std::vector<size_t>> &shapes, size_t n, size_t server_num,
  149. size_t rank_id) {
  150. MS_EXCEPTION_IF_NULL(gradient());
  151. MS_EXCEPTION_IF_NULL(indices());
  152. size_t indices_size = static_cast<size_t>(indices()->size / sizeof(int));
  153. int segment_size = gradient()->size / indices()->size;
  154. std::vector<float> new_grad(indices_size * segment_size);
  155. std::vector<int> new_indices(indices_size);
  156. mindspore::kernel::SparseGradient<int> unique_sparse_grad({new_grad.data(), new_indices.data(), indices_size});
  157. if (shapes.size() < 2 || shapes[1].empty()) {
  158. MS_LOG(EXCEPTION) << "No input shape found";
  159. }
  160. auto input_shapes = shapes[1];
  161. if (input_shapes.size() == 0) {
  162. MS_LOG(EXCEPTION) << "Invalid input shapes";
  163. }
  164. int first_dim_size = input_shapes.front();
  165. int outer_dim_size = segment_size;
  166. if (first_dim_size == 0 || outer_dim_size == 0) {
  167. MS_LOG(ERROR) << "Invalid first dim size";
  168. }
  169. MS_EXCEPTION_IF_NULL(gradient()->addr);
  170. MS_EXCEPTION_IF_NULL(indices()->addr);
  171. float *grad_data = reinterpret_cast<float *>(gradient()->addr);
  172. int *indices_data = reinterpret_cast<int *>(indices()->addr);
  173. size_t original_row_count = input_shapes.front();
  174. if (original_row_count > 0) {
  175. size_t offset = 0;
  176. std::map<int, int> rank_dims = Util::AllRankLocalShard(original_row_count, rank_id, server_num);
  177. for (size_t i = 0; i < rank_id; i++) {
  178. if (rank_dims.count(i) == 0) {
  179. MS_LOG(EXCEPTION) << "No local shard number for rank " << i;
  180. }
  181. offset += rank_dims[i];
  182. }
  183. for (size_t i = 0; i < indices_size; i++) {
  184. indices_data[i] -= offset;
  185. }
  186. }
  187. Util::ReduceSparseGradient(grad_data, indices_data, indices_size, segment_size, first_dim_size, outer_dim_size,
  188. &unique_sparse_grad);
  189. int reduced_grad_size = unique_sparse_grad.indices_size_ * segment_size * sizeof(float);
  190. MS_EXCEPTION_IF_NULL(unique_sparse_grad.value_);
  191. auto ret = memcpy_s(gradient()->addr, gradient()->size, unique_sparse_grad.value_, reduced_grad_size);
  192. if (ret != 0) {
  193. MS_LOG(EXCEPTION) << "memcpy_s error, errorno(" << ret << ")";
  194. return;
  195. }
  196. int reduced_indice_size = unique_sparse_grad.indices_size_ * sizeof(int);
  197. MS_EXCEPTION_IF_NULL(unique_sparse_grad.indices_);
  198. ret = memcpy_s(indices()->addr, indices()->size, unique_sparse_grad.indices_, reduced_indice_size);
  199. if (ret != 0) {
  200. MS_LOG(EXCEPTION) << "memcpy_s error, errorno(" << ret << ")";
  201. return;
  202. }
  203. gradient()->size = reduced_grad_size;
  204. indices()->size = reduced_indice_size;
  205. for (size_t i = 0; i < unique_sparse_grad.indices_size_ * segment_size; i++) {
  206. grad_data[i] = grad_data[i] / n;
  207. }
  208. }
  209. void SparseOptimInfo::Reset() {
  210. gradient()->size = 0;
  211. indices()->size = 0;
  212. grads_offset_ = 0;
  213. indices_offset_ = 0;
  214. }
  215. MomentumOptimInfo::MomentumOptimInfo(const AddressPtr &weight, const AddressPtr &accumulate,
  216. const AddressPtr &learning_rate, const AddressPtr &gradient,
  217. const AddressPtr &momentum) {
  218. inputs_.push_back(weight);
  219. inputs_.push_back(accumulate);
  220. inputs_.push_back(learning_rate);
  221. inputs_.push_back(gradient);
  222. inputs_.push_back(momentum);
  223. }
  224. void MomentumOptimInfo::Update(const Values &values, const Lengths &lens) {
  225. UpdateOptimInputValue<float>(kApplyMomentum, "lr", values.data(), lens);
  226. }
  227. const size_t SparseOptimInfo::indice_size() const { return indices_offset_; }
  228. const AddressPtr &MomentumOptimInfo::gradient() {
  229. size_t origin_grad_index = kMomentumOriginIdx.at("grad");
  230. EXC_IF_VEC_IDX_OOB(inputs_, origin_grad_index);
  231. return inputs_[origin_grad_index];
  232. }
  233. const AddressPtr &MomentumOptimInfo::indices() {
  234. size_t origin_grad_index = kMomentumOriginIdx.at("grad");
  235. EXC_IF_VEC_IDX_OOB(inputs_, origin_grad_index);
  236. return inputs_[origin_grad_index];
  237. }
  238. size_t MomentumOptimInfo::grad_index() {
  239. size_t ps_grad_index = kMomentumPSSendIdx.at("grad");
  240. return ps_grad_index;
  241. }
  242. SparseAdamOptimInfo::SparseAdamOptimInfo(const AddressPtr &weight, const AddressPtr &m, const AddressPtr &v,
  243. const AddressPtr &beta1_power, const AddressPtr &beta2_power,
  244. const AddressPtr &learning_rate, const AddressPtr &beta1,
  245. const AddressPtr &beta2, const AddressPtr &epsilon, const AddressPtr &grad,
  246. const AddressPtr &indices) {
  247. inputs_.push_back(weight);
  248. inputs_.push_back(m);
  249. inputs_.push_back(v);
  250. inputs_.push_back(beta1_power);
  251. inputs_.push_back(beta2_power);
  252. inputs_.push_back(learning_rate);
  253. inputs_.push_back(beta1);
  254. inputs_.push_back(beta2);
  255. inputs_.push_back(epsilon);
  256. inputs_.push_back(grad);
  257. inputs_.push_back(indices);
  258. grads_offset_ = grad->size / sizeof(float);
  259. indices_offset_ = indices->size / sizeof(int);
  260. }
  261. void SparseAdamOptimInfo::Update(const Values &values, const Lengths &lens) {
  262. UpdateOptimInputValue<float>(kSparseAdam, "beta1_power", values.data(), lens);
  263. UpdateOptimInputValue<float>(kSparseAdam, "beta2_power", values.data(), lens);
  264. UpdateOptimInputValue<float>(kSparseAdam, "lr", values.data(), lens);
  265. UpdateOptimInputValue<float>(kSparseAdam, "beta1", values.data(), lens);
  266. UpdateOptimInputValue<float>(kSparseAdam, "beta2", values.data(), lens);
  267. UpdateOptimInputValue<float>(kSparseAdam, "eps", values.data(), lens);
  268. }
  269. const AddressPtr &SparseAdamOptimInfo::gradient() {
  270. size_t origin_grad_index = kSparseAdamOriginIdx.at("grad");
  271. EXC_IF_VEC_IDX_OOB(inputs_, origin_grad_index);
  272. return inputs_[origin_grad_index];
  273. }
  274. const AddressPtr &SparseAdamOptimInfo::indices() {
  275. size_t origin_indices_index = kSparseAdamOriginIdx.at("indices");
  276. EXC_IF_VEC_IDX_OOB(inputs_, origin_indices_index);
  277. return inputs_[origin_indices_index];
  278. }
  279. bool SparseAdamOptimInfo::IsSparse() const { return true; }
  280. size_t SparseAdamOptimInfo::grad_index() {
  281. size_t ps_grad_index = kSparseAdamPSSendIdx.at("grad");
  282. return ps_grad_index;
  283. }
  284. size_t SparseAdamOptimInfo::indices_index() {
  285. size_t ps_indices_index = kSparseAdamPSSendIdx.at("indices");
  286. return ps_indices_index;
  287. }
  288. SparseFtrlOptimInfo::SparseFtrlOptimInfo(const AddressPtr &weight, const AddressPtr &accum, const AddressPtr &linear,
  289. const AddressPtr &grad, const AddressPtr &indices) {
  290. inputs_.push_back(weight);
  291. inputs_.push_back(accum);
  292. inputs_.push_back(linear);
  293. inputs_.push_back(grad);
  294. inputs_.push_back(indices);
  295. grads_offset_ = grad->size / sizeof(float);
  296. indices_offset_ = indices->size / sizeof(int);
  297. }
  298. const AddressPtr &SparseFtrlOptimInfo::gradient() {
  299. size_t origin_grad_index = kSparseFtrlOriginIdx.at("grad");
  300. EXC_IF_VEC_IDX_OOB(inputs_, origin_grad_index);
  301. return inputs_[origin_grad_index];
  302. }
  303. const AddressPtr &SparseFtrlOptimInfo::indices() {
  304. size_t origin_indices_index = kSparseFtrlOriginIdx.at("indices");
  305. EXC_IF_VEC_IDX_OOB(inputs_, origin_indices_index);
  306. return inputs_[origin_indices_index];
  307. }
  308. bool SparseFtrlOptimInfo::IsSparse() const { return true; }
  309. size_t SparseFtrlOptimInfo::grad_index() {
  310. size_t ps_grad_index = kSparseFtrlPSSendIdx.at("grad");
  311. return ps_grad_index;
  312. }
  313. size_t SparseFtrlOptimInfo::indices_index() {
  314. size_t ps_indices_index = kSparseFtrlPSSendIdx.at("indices");
  315. return ps_indices_index;
  316. }
  317. } // namespace ps
  318. } // namespace mindspore