You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

optimizer_info.cc 8.6 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249
  1. /**
  2. * Copyright 2020 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "frontend/parallel/ps/optimizer_info.h"
  17. #include <memory>
  18. namespace mindspore {
  19. namespace parallel {
  20. namespace ps {
  21. void OptimizerInfo::AddWorkspace(const AddressPtr &workspace) { workspaces_.push_back(workspace); }
  22. const std::vector<AddressPtr> &OptimizerInfo::inputs() { return inputs_; }
  23. const std::vector<AddressPtr> &OptimizerInfo::workspaces() { return workspaces_; }
  24. const std::vector<AddressPtr> &OptimizerInfo::outputs() { return outputs_; }
  25. bool OptimizerInfo::IsSparse() const { return false; }
  26. size_t OptimizerInfo::grad_index() { return 0; }
  27. size_t OptimizerInfo::indices_index() { return 0; }
  28. void OptimizerInfo::UpdateWeight(const WeightPtr &weight) {
  29. AddressPtr weight_addr = std::make_shared<kernel::Address>();
  30. weight_addr->addr = weight->data();
  31. weight_addr->size = weight->size();
  32. inputs_[0] = weight_addr;
  33. }
  34. void DenseOptimInfo::Accumulate(const Values &values, const Lengths &lengths) {
  35. float *accum_grad_data = reinterpret_cast<float *>(gradient()->addr);
  36. size_t size = gradient()->size / sizeof(float);
  37. size_t grad_index = this->grad_index();
  38. size_t grad_offset = 0;
  39. for (size_t i = 0; i < grad_index; i++) {
  40. grad_offset += lengths[i];
  41. }
  42. float *grad_data = values.data() + grad_offset;
  43. CHECK_EQ(size, static_cast<size_t>(lengths[grad_index]));
  44. for (size_t i = 0; i < size; i++) {
  45. accum_grad_data[i] += grad_data[i];
  46. }
  47. }
  48. void DenseOptimInfo::ComputeMean(size_t n) {
  49. if (n > 1) {
  50. float *accum_grad_data = reinterpret_cast<float *>(gradient()->addr);
  51. size_t size = gradient()->size / sizeof(float);
  52. for (size_t i = 0; i < size; i++) {
  53. accum_grad_data[i] /= n;
  54. }
  55. }
  56. }
  57. void DenseOptimInfo::Reset() { memset_s(gradient()->addr, gradient()->size, 0x00, gradient()->size); }
  58. void SparseOptimInfo::Accumulate(const Values &values, const Lengths &lengths) {
  59. // Append grad data to the end
  60. float *accum_grad_data = reinterpret_cast<float *>(gradient()->addr);
  61. size_t grad_index = this->grad_index();
  62. size_t grad_offset = 0;
  63. for (size_t i = 0; i < grad_index; i++) {
  64. grad_offset += lengths[i];
  65. }
  66. float *incr_grad_data = values.data() + grad_offset;
  67. size_t incr_grad_size = lengths[grad_index] * sizeof(float);
  68. auto ret = memcpy_s(accum_grad_data + grads_offset_, incr_grad_size, incr_grad_data, incr_grad_size);
  69. if (ret != 0) {
  70. MS_LOG(EXCEPTION) << "memcpy_s error, errorno(" << ret << ")";
  71. }
  72. grads_offset_ += lengths[grad_index];
  73. gradient()->size += incr_grad_size;
  74. // Append indice data to the end
  75. int *accum_indices_data = reinterpret_cast<int *>(indices()->addr);
  76. size_t indices_index = this->indices_index();
  77. size_t indice_offset = 0;
  78. for (size_t i = 0; i < indices_index; i++) {
  79. indice_offset += lengths[i];
  80. }
  81. int *incr_indice_data = reinterpret_cast<int *>(values.data() + indice_offset);
  82. size_t incr_indice_size = lengths[indices_index] * sizeof(float);
  83. auto ret2 = memcpy_s(accum_indices_data + indices_offset_, incr_indice_size, incr_indice_data, incr_indice_size);
  84. if (ret2 != 0) {
  85. MS_LOG(EXCEPTION) << "memcpy_s error, errorno(" << ret2 << ")";
  86. }
  87. indices_offset_ += lengths[indices_index];
  88. indices()->size += incr_indice_size;
  89. }
  90. void SparseOptimInfo::Reset() {
  91. auto &gradient = this->gradient();
  92. gradient->size = 0;
  93. auto &indices = this->indices();
  94. indices->size = 0;
  95. grads_offset_ = 0;
  96. indices_offset_ = 0;
  97. }
  98. MomentumOptimInfo::MomentumOptimInfo(const AddressPtr &weight, const AddressPtr &accumulate,
  99. const AddressPtr &learning_rate, const AddressPtr &gradient,
  100. const AddressPtr &momentum) {
  101. inputs_.push_back(weight);
  102. inputs_.push_back(accumulate);
  103. inputs_.push_back(learning_rate);
  104. inputs_.push_back(gradient);
  105. inputs_.push_back(momentum);
  106. }
  107. void MomentumOptimInfo::Update(const Values &values, const Lengths &lens) {
  108. size_t lr_offset = 0;
  109. float *lr = values.data() + lr_offset;
  110. auto ret = memcpy_s(inputs_[2]->addr, sizeof(float), lr, sizeof(float));
  111. if (ret != 0) {
  112. MS_LOG(EXCEPTION) << "memcpy_s error, errorno(" << ret << ")";
  113. }
  114. }
  115. const AddressPtr &MomentumOptimInfo::gradient() { return inputs_[3]; }
  116. const AddressPtr &MomentumOptimInfo::indices() { return inputs_[3]; }
  117. size_t MomentumOptimInfo::grad_index() { return 1; }
  118. SparseAdamOptimInfo::SparseAdamOptimInfo(const AddressPtr &weight, const AddressPtr &m, const AddressPtr &v,
  119. const AddressPtr &beta1_power, const AddressPtr &beta2_power,
  120. const AddressPtr &learning_rate, const AddressPtr &beta1,
  121. const AddressPtr &beta2, const AddressPtr &epsilon, const AddressPtr &grad,
  122. const AddressPtr &indices) {
  123. inputs_.push_back(weight);
  124. inputs_.push_back(m);
  125. inputs_.push_back(v);
  126. inputs_.push_back(beta1_power);
  127. inputs_.push_back(beta2_power);
  128. inputs_.push_back(learning_rate);
  129. inputs_.push_back(beta1);
  130. inputs_.push_back(beta2);
  131. inputs_.push_back(epsilon);
  132. inputs_.push_back(grad);
  133. inputs_.push_back(indices);
  134. grads_offset_ = grad->size / sizeof(float);
  135. indices_offset_ = indices->size / sizeof(int);
  136. }
  137. void SparseAdamOptimInfo::Update(const Values &values, const Lengths &lens) {
  138. float *data_ptr = values.data();
  139. int offset = 0;
  140. AddressPtr &beta1_power = inputs_[3];
  141. int size = lens[0];
  142. int bytes = sizeof(float);
  143. auto ret = memcpy_s(beta1_power->addr, size * bytes, data_ptr + offset, size * bytes);
  144. if (ret != 0) {
  145. MS_LOG(EXCEPTION) << "memcpy_s error, errorno(" << ret << ")";
  146. }
  147. offset += size;
  148. AddressPtr &beta2_power = inputs_[4];
  149. size = lens[1];
  150. ret = memcpy_s(beta2_power->addr, size * bytes, data_ptr + offset, size * bytes);
  151. if (ret != 0) {
  152. MS_LOG(EXCEPTION) << "memcpy_s error, errorno(" << ret << ")";
  153. }
  154. offset += size;
  155. AddressPtr &lr = inputs_[5];
  156. size = lens[2];
  157. ret = memcpy_s(lr->addr, size * bytes, data_ptr + offset, size * bytes);
  158. if (ret != 0) {
  159. MS_LOG(EXCEPTION) << "memcpy_s error, errorno(" << ret << ")";
  160. }
  161. offset += size;
  162. AddressPtr &beta1 = inputs_[6];
  163. size = lens[3];
  164. ret = memcpy_s(beta1->addr, size * bytes, data_ptr + offset, size * bytes);
  165. if (ret != 0) {
  166. MS_LOG(EXCEPTION) << "memcpy_s error, errorno(" << ret << ")";
  167. }
  168. offset += size;
  169. AddressPtr &beta2 = inputs_[7];
  170. size = lens[4];
  171. ret = memcpy_s(beta2->addr, size * bytes, data_ptr + offset, size * bytes);
  172. if (ret != 0) {
  173. MS_LOG(EXCEPTION) << "memcpy_s error, errorno(" << ret << ")";
  174. }
  175. offset += size;
  176. AddressPtr &epsilon = inputs_[8];
  177. size = lens[5];
  178. ret = memcpy_s(epsilon->addr, size * bytes, data_ptr + offset, size * bytes);
  179. if (ret != 0) {
  180. MS_LOG(EXCEPTION) << "memcpy_s error, errorno(" << ret << ")";
  181. }
  182. }
  183. const AddressPtr &SparseAdamOptimInfo::gradient() { return inputs_[9]; }
  184. const AddressPtr &SparseAdamOptimInfo::indices() { return inputs_[10]; }
  185. bool SparseAdamOptimInfo::IsSparse() const { return true; }
  186. size_t SparseAdamOptimInfo::grad_index() { return 6; }
  187. size_t SparseAdamOptimInfo::indices_index() { return 7; }
  188. SparseFtrlOptimInfo::SparseFtrlOptimInfo(const AddressPtr &weight, const AddressPtr &accum, const AddressPtr &linear,
  189. const AddressPtr &grad, const AddressPtr &indices) {
  190. inputs_.push_back(weight);
  191. inputs_.push_back(accum);
  192. inputs_.push_back(linear);
  193. inputs_.push_back(grad);
  194. inputs_.push_back(indices);
  195. grads_offset_ = grad->size / sizeof(float);
  196. indices_offset_ = indices->size / sizeof(int);
  197. }
  198. const AddressPtr &SparseFtrlOptimInfo::gradient() { return inputs_[3]; }
  199. const AddressPtr &SparseFtrlOptimInfo::indices() { return inputs_[4]; }
  200. bool SparseFtrlOptimInfo::IsSparse() const { return true; }
  201. size_t SparseFtrlOptimInfo::grad_index() { return 0; }
  202. size_t SparseFtrlOptimInfo::indices_index() { return 1; }
  203. } // namespace ps
  204. } // namespace parallel
  205. } // namespace mindspore