You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

optimizer_info.cc 14 kB

5 years ago
5 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374
  1. /**
  2. * Copyright 2020 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "ps/optimizer_info.h"
  17. #include <map>
  18. #include <memory>
  19. #include <string>
  20. #include <functional>
  21. #include "ps/util.h"
  22. namespace mindspore {
  23. namespace ps {
  24. void OptimizerInfo::AddWorkspace(const AddressPtr &workspace) { workspaces_.push_back(workspace); }
  25. const std::vector<AddressPtr> &OptimizerInfo::inputs() { return inputs_; }
  26. const std::vector<AddressPtr> &OptimizerInfo::workspaces() { return workspaces_; }
  27. const std::vector<AddressPtr> &OptimizerInfo::outputs() { return outputs_; }
  28. bool OptimizerInfo::IsSparse() const { return false; }
  29. const size_t OptimizerInfo::indice_size() const { return 0; }
  30. size_t OptimizerInfo::grad_index() { return 0; }
  31. size_t OptimizerInfo::indices_index() { return 0; }
  32. template <typename T>
  33. void OptimizerInfo::UpdateOptimInputValue(const std::string &optim_type, const std::string &input_name, void *data,
  34. const Lengths &lens) {
  35. if (kOptimToOriginIdx.count(optim_type) == 0 || kOptimToPSSendIdx.count(optim_type) == 0) {
  36. MS_LOG(EXCEPTION) << "Optimizer type " << optim_type << " in not supported.";
  37. }
  38. const OptimOriginIdx &origin_input_map = kOptimToOriginIdx.at(optim_type);
  39. const OptimPSSendIdx &ps_send_index_map = kOptimToPSSendIdx.at(optim_type);
  40. if (ps_send_index_map.count(input_name) == 0 || origin_input_map.count(input_name) == 0) {
  41. MS_LOG(EXCEPTION) << "Optimizer " << optim_type << " has no input for " << input_name;
  42. }
  43. size_t origin_index = origin_input_map.at(input_name);
  44. size_t ps_send_index = ps_send_index_map.at(input_name);
  45. if (ps_send_index > lens.size() || origin_index > inputs_.size()) {
  46. MS_LOG(EXCEPTION) << "Index is out of bound for optimizer " << optim_type << ", origin_index:" << origin_index
  47. << ", ps_send_index:" << ps_send_index;
  48. }
  49. EXC_IF_VEC_IDX_OOB(lens, ps_send_index);
  50. size_t size = lens[ps_send_index] * sizeof(T);
  51. size_t offset = std::accumulate(lens.begin(), lens.begin() + ps_send_index, 0, std::plus<int>());
  52. AddressPtr optim_input = inputs_[origin_index];
  53. MS_EXCEPTION_IF_NULL(optim_input);
  54. void *dst_data = optim_input->addr;
  55. T *src_data = reinterpret_cast<T *>(data) + offset;
  56. MS_EXCEPTION_IF_NULL(dst_data);
  57. MS_EXCEPTION_IF_NULL(src_data);
  58. int64_t ret = memcpy_s(optim_input->addr, optim_input->size, src_data, size);
  59. if (ret != 0) {
  60. MS_LOG(EXCEPTION) << "memcpy_s error, errorno(" << ret << ")";
  61. return;
  62. }
  63. return;
  64. }
  65. void DenseOptimInfo::Accumulate(const Values &values, const Lengths &lengths) {
  66. MS_EXCEPTION_IF_NULL(gradient()->addr);
  67. float *accum_grad_data = reinterpret_cast<float *>(gradient()->addr);
  68. size_t size = gradient()->size / sizeof(float);
  69. size_t grad_index = this->grad_index();
  70. size_t grad_offset = 0;
  71. for (size_t i = 0; i < grad_index; i++) {
  72. grad_offset += lengths[i];
  73. }
  74. float *grad_data = values.data() + grad_offset;
  75. CHECK_EQ(size, static_cast<size_t>(lengths[grad_index]));
  76. for (size_t i = 0; i < size; i++) {
  77. accum_grad_data[i] += grad_data[i];
  78. }
  79. }
  80. void DenseOptimInfo::ComputeMean(const std::vector<std::vector<size_t>> &, size_t n, size_t, size_t) {
  81. if (n > 1) {
  82. float *accum_grad_data = reinterpret_cast<float *>(gradient()->addr);
  83. size_t size = gradient()->size / sizeof(float);
  84. for (size_t i = 0; i < size; i++) {
  85. accum_grad_data[i] /= n;
  86. }
  87. }
  88. }
  89. void DenseOptimInfo::Reset() {
  90. MS_EXCEPTION_IF_NULL(gradient()->addr);
  91. int64_t ret = memset_s(gradient()->addr, gradient()->size, 0x00, gradient()->size);
  92. if (ret != 0) {
  93. MS_LOG(EXCEPTION) << "memset_s error, errorno(" << ret << ")";
  94. return;
  95. }
  96. }
  97. void SparseOptimInfo::Accumulate(const Values &values, const Lengths &lengths) {
  98. // Append grad data to the end
  99. float *accum_grad_data = reinterpret_cast<float *>(gradient()->addr);
  100. MS_EXCEPTION_IF_NULL(accum_grad_data);
  101. size_t grad_index = this->grad_index();
  102. size_t grad_offset = 0;
  103. for (size_t i = 0; i < grad_index; i++) {
  104. grad_offset += lengths[i];
  105. }
  106. float *incr_grad_data = values.data() + grad_offset;
  107. MS_EXCEPTION_IF_NULL(incr_grad_data);
  108. size_t incr_grad_size = lengths[grad_index] * sizeof(float);
  109. size_t dst_size = incr_grad_size;
  110. size_t src_size = incr_grad_size;
  111. void *dst_data = accum_grad_data + grads_offset_;
  112. void *src_data = incr_grad_data;
  113. MS_EXCEPTION_IF_NULL(dst_data);
  114. MS_EXCEPTION_IF_NULL(src_data);
  115. int64_t ret = memcpy_s(dst_data, dst_size, src_data, src_size);
  116. if (ret != 0) {
  117. MS_LOG(EXCEPTION) << "memcpy_s error, errorno(" << ret << ")";
  118. return;
  119. }
  120. grads_offset_ += lengths[grad_index];
  121. gradient()->size += incr_grad_size;
  122. // Append indice data to the end
  123. int *accum_indices_data = reinterpret_cast<int *>(indices()->addr);
  124. MS_EXCEPTION_IF_NULL(accum_indices_data);
  125. size_t indices_index = this->indices_index();
  126. size_t indice_offset = 0;
  127. for (size_t i = 0; i < indices_index; i++) {
  128. indice_offset += lengths[i];
  129. }
  130. int *incr_indice_data = reinterpret_cast<int *>(values.data()) + indice_offset;
  131. MS_EXCEPTION_IF_NULL(incr_indice_data);
  132. size_t incr_indice_size = lengths[indices_index];
  133. size_t incr_indice_data_size = incr_indice_size * sizeof(int);
  134. dst_size = incr_indice_data_size;
  135. src_size = incr_indice_data_size;
  136. dst_data = accum_indices_data + indices_offset_;
  137. src_data = incr_indice_data;
  138. MS_EXCEPTION_IF_NULL(dst_data);
  139. MS_EXCEPTION_IF_NULL(src_data);
  140. auto ret2 = memcpy_s(dst_data, dst_size, src_data, src_size);
  141. if (ret2 != 0) {
  142. MS_LOG(EXCEPTION) << "memcpy_s error, errorno(" << ret2 << ")";
  143. return;
  144. }
  145. indices_offset_ += lengths[indices_index];
  146. indices()->size += incr_indice_data_size;
  147. }
  148. void SparseOptimInfo::ComputeMean(const std::vector<std::vector<size_t>> &shapes, size_t n, size_t server_num,
  149. size_t rank_id) {
  150. MS_EXCEPTION_IF_NULL(gradient());
  151. MS_EXCEPTION_IF_NULL(indices());
  152. size_t indices_size = static_cast<size_t>(indices()->size / sizeof(int));
  153. size_t segment_size = gradient()->size / indices()->size;
  154. std::vector<float> new_grad(indices_size * segment_size);
  155. std::vector<int> new_indices(indices_size);
  156. mindspore::kernel::SparseGradient<int> unique_sparse_grad({new_grad.data(), new_indices.data(), indices_size});
  157. if (shapes.size() < 2 || shapes[1].empty()) {
  158. MS_LOG(EXCEPTION) << "No input shape found";
  159. }
  160. auto input_shapes = shapes[1];
  161. if (input_shapes.size() == 0) {
  162. MS_LOG(EXCEPTION) << "Invalid input shapes";
  163. }
  164. size_t first_dim_size = input_shapes.front();
  165. size_t outer_dim_size = segment_size;
  166. if (first_dim_size == 0 || outer_dim_size == 0) {
  167. MS_LOG(ERROR) << "Invalid first dim size";
  168. }
  169. MS_EXCEPTION_IF_NULL(gradient()->addr);
  170. MS_EXCEPTION_IF_NULL(indices()->addr);
  171. float *grad_data = reinterpret_cast<float *>(gradient()->addr);
  172. int *indices_data = reinterpret_cast<int *>(indices()->addr);
  173. if (sharded_) {
  174. size_t original_row_count = input_shapes.front();
  175. if (original_row_count > 0) {
  176. size_t offset = 0;
  177. std::map<int64_t, int64_t> rank_dims = Util::AllRankLocalShard(original_row_count, rank_id, server_num);
  178. for (size_t i = 0; i < rank_id; i++) {
  179. if (rank_dims.count(i) == 0) {
  180. MS_LOG(EXCEPTION) << "No local shard number for rank " << i;
  181. }
  182. offset += rank_dims[i];
  183. }
  184. for (size_t i = 0; i < indices_size; i++) {
  185. indices_data[i] -= offset;
  186. }
  187. }
  188. }
  189. Util::ReduceSparseGradient(grad_data, indices_data, indices_size, segment_size, first_dim_size, outer_dim_size,
  190. &unique_sparse_grad);
  191. int64_t reduced_grad_size = unique_sparse_grad.indices_size_ * segment_size * sizeof(float);
  192. MS_EXCEPTION_IF_NULL(unique_sparse_grad.value_);
  193. int64_t ret = memcpy_s(gradient()->addr, gradient()->size, unique_sparse_grad.value_, reduced_grad_size);
  194. if (ret != 0) {
  195. MS_LOG(EXCEPTION) << "memcpy_s error, errorno(" << ret << ")";
  196. return;
  197. }
  198. int64_t reduced_indice_size = unique_sparse_grad.indices_size_ * sizeof(int);
  199. MS_EXCEPTION_IF_NULL(unique_sparse_grad.indices_);
  200. ret = memcpy_s(indices()->addr, indices()->size, unique_sparse_grad.indices_, reduced_indice_size);
  201. if (ret != 0) {
  202. MS_LOG(EXCEPTION) << "memcpy_s error, errorno(" << ret << ")";
  203. return;
  204. }
  205. gradient()->size = reduced_grad_size;
  206. indices()->size = reduced_indice_size;
  207. for (size_t i = 0; i < unique_sparse_grad.indices_size_ * segment_size; i++) {
  208. grad_data[i] = grad_data[i] / n;
  209. }
  210. }
  211. void SparseOptimInfo::Reset() {
  212. gradient()->size = 0;
  213. indices()->size = 0;
  214. grads_offset_ = 0;
  215. indices_offset_ = 0;
  216. }
  217. MomentumOptimInfo::MomentumOptimInfo(const AddressPtr &weight, const AddressPtr &accumulate,
  218. const AddressPtr &learning_rate, const AddressPtr &gradient,
  219. const AddressPtr &momentum) {
  220. inputs_.push_back(weight);
  221. inputs_.push_back(accumulate);
  222. inputs_.push_back(learning_rate);
  223. inputs_.push_back(gradient);
  224. inputs_.push_back(momentum);
  225. }
  226. void MomentumOptimInfo::Update(const Values &values, const Lengths &lens) {
  227. UpdateOptimInputValue<float>(kApplyMomentum, "lr", values.data(), lens);
  228. }
  229. const size_t SparseOptimInfo::indice_size() const { return indices_offset_; }
  230. const AddressPtr &MomentumOptimInfo::gradient() {
  231. size_t origin_grad_index = kMomentumOriginIdx.at("grad");
  232. EXC_IF_VEC_IDX_OOB(inputs_, origin_grad_index);
  233. return inputs_[origin_grad_index];
  234. }
  235. const AddressPtr &MomentumOptimInfo::indices() {
  236. size_t origin_grad_index = kMomentumOriginIdx.at("grad");
  237. EXC_IF_VEC_IDX_OOB(inputs_, origin_grad_index);
  238. return inputs_[origin_grad_index];
  239. }
  240. size_t MomentumOptimInfo::grad_index() {
  241. size_t ps_grad_index = kMomentumPSSendIdx.at("grad");
  242. return ps_grad_index;
  243. }
  244. SparseAdamOptimInfo::SparseAdamOptimInfo(const AddressPtr &weight, const AddressPtr &m, const AddressPtr &v,
  245. const AddressPtr &beta1_power, const AddressPtr &beta2_power,
  246. const AddressPtr &learning_rate, const AddressPtr &beta1,
  247. const AddressPtr &beta2, const AddressPtr &epsilon, const AddressPtr &grad,
  248. const AddressPtr &indices, bool sharded) {
  249. inputs_.push_back(weight);
  250. inputs_.push_back(m);
  251. inputs_.push_back(v);
  252. inputs_.push_back(beta1_power);
  253. inputs_.push_back(beta2_power);
  254. inputs_.push_back(learning_rate);
  255. inputs_.push_back(beta1);
  256. inputs_.push_back(beta2);
  257. inputs_.push_back(epsilon);
  258. inputs_.push_back(grad);
  259. inputs_.push_back(indices);
  260. grads_offset_ = grad->size / sizeof(float);
  261. indices_offset_ = indices->size / sizeof(int);
  262. sharded_ = sharded;
  263. }
  264. void SparseAdamOptimInfo::Update(const Values &values, const Lengths &lens) {
  265. UpdateOptimInputValue<float>(kSparseAdam, "beta1_power", values.data(), lens);
  266. UpdateOptimInputValue<float>(kSparseAdam, "beta2_power", values.data(), lens);
  267. UpdateOptimInputValue<float>(kSparseAdam, "lr", values.data(), lens);
  268. UpdateOptimInputValue<float>(kSparseAdam, "beta1", values.data(), lens);
  269. UpdateOptimInputValue<float>(kSparseAdam, "beta2", values.data(), lens);
  270. UpdateOptimInputValue<float>(kSparseAdam, "eps", values.data(), lens);
  271. }
  272. const AddressPtr &SparseAdamOptimInfo::gradient() {
  273. size_t origin_grad_index = kSparseAdamOriginIdx.at("grad");
  274. EXC_IF_VEC_IDX_OOB(inputs_, origin_grad_index);
  275. return inputs_[origin_grad_index];
  276. }
  277. const AddressPtr &SparseAdamOptimInfo::indices() {
  278. size_t origin_indices_index = kSparseAdamOriginIdx.at("indices");
  279. EXC_IF_VEC_IDX_OOB(inputs_, origin_indices_index);
  280. return inputs_[origin_indices_index];
  281. }
  282. bool SparseAdamOptimInfo::IsSparse() const { return true; }
  283. size_t SparseAdamOptimInfo::grad_index() {
  284. size_t ps_grad_index = kSparseAdamPSSendIdx.at("grad");
  285. return ps_grad_index;
  286. }
  287. size_t SparseAdamOptimInfo::indices_index() {
  288. size_t ps_indices_index = kSparseAdamPSSendIdx.at("indices");
  289. return ps_indices_index;
  290. }
  291. SparseFtrlOptimInfo::SparseFtrlOptimInfo(const AddressPtr &weight, const AddressPtr &accum, const AddressPtr &linear,
  292. const AddressPtr &grad, const AddressPtr &indices, bool sharded) {
  293. inputs_.push_back(weight);
  294. inputs_.push_back(accum);
  295. inputs_.push_back(linear);
  296. inputs_.push_back(grad);
  297. inputs_.push_back(indices);
  298. grads_offset_ = grad->size / sizeof(float);
  299. indices_offset_ = indices->size / sizeof(int);
  300. sharded_ = sharded;
  301. }
  302. const AddressPtr &SparseFtrlOptimInfo::gradient() {
  303. size_t origin_grad_index = kSparseFtrlOriginIdx.at("grad");
  304. EXC_IF_VEC_IDX_OOB(inputs_, origin_grad_index);
  305. return inputs_[origin_grad_index];
  306. }
  307. const AddressPtr &SparseFtrlOptimInfo::indices() {
  308. size_t origin_indices_index = kSparseFtrlOriginIdx.at("indices");
  309. EXC_IF_VEC_IDX_OOB(inputs_, origin_indices_index);
  310. return inputs_[origin_indices_index];
  311. }
  312. bool SparseFtrlOptimInfo::IsSparse() const { return true; }
  313. size_t SparseFtrlOptimInfo::grad_index() {
  314. size_t ps_grad_index = kSparseFtrlPSSendIdx.at("grad");
  315. return ps_grad_index;
  316. }
  317. size_t SparseFtrlOptimInfo::indices_index() {
  318. size_t ps_indices_index = kSparseFtrlPSSendIdx.at("indices");
  319. return ps_indices_index;
  320. }
  321. } // namespace ps
  322. } // namespace mindspore