You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

optimizer_info.h 4.6 kB

5 years ago
5 years ago
5 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129
  1. /**
  2. * Copyright 2020 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #ifndef MINDSPORE_CCSRC_PS_OPTIMIZER_INFO_H_
  17. #define MINDSPORE_CCSRC_PS_OPTIMIZER_INFO_H_
  18. #include <vector>
  19. #include <string>
  20. #include "backend/kernel_compiler/kernel.h"
  21. #include "ps/common.h"
  22. namespace mindspore {
  23. namespace ps {
  24. using mindspore::kernel::AddressPtr;
  25. class OptimizerInfo {
  26. public:
  27. OptimizerInfo() = default;
  28. virtual ~OptimizerInfo() = default;
  29. virtual void Update(const Values &values, const Lengths &lengths) {}
  30. virtual void Accumulate(const Values &values, const Lengths &lengths) = 0;
  31. virtual void ComputeMean(const std::vector<std::vector<size_t>> &shapes, size_t n, size_t server_num,
  32. size_t rank_id) {}
  33. virtual void Reset() {}
  34. void AddWorkspace(const AddressPtr &workspace);
  35. virtual const AddressPtr &gradient() = 0;
  36. virtual const AddressPtr &indices() = 0;
  37. virtual const size_t indice_size() const;
  38. const std::vector<AddressPtr> &inputs();
  39. const std::vector<AddressPtr> &workspaces();
  40. const std::vector<AddressPtr> &outputs();
  41. virtual bool IsSparse() const;
  42. virtual size_t grad_index();
  43. virtual size_t indices_index();
  44. protected:
  45. template <typename T>
  46. void UpdateOptimInputValue(const std::string &optim_type, const std::string &input_name, void *data,
  47. const Lengths &lens);
  48. std::vector<AddressPtr> inputs_;
  49. std::vector<AddressPtr> workspaces_;
  50. std::vector<AddressPtr> outputs_;
  51. };
  52. class DenseOptimInfo : public OptimizerInfo {
  53. public:
  54. DenseOptimInfo() = default;
  55. ~DenseOptimInfo() override = default;
  56. void Accumulate(const Values &values, const Lengths &lens) override;
  57. void ComputeMean(const std::vector<std::vector<size_t>> &shapes, size_t n, size_t server_num,
  58. size_t rank_id) override;
  59. void Reset() override;
  60. };
  61. class SparseOptimInfo : public OptimizerInfo {
  62. public:
  63. SparseOptimInfo() = default;
  64. ~SparseOptimInfo() override = default;
  65. void Accumulate(const Values &values, const Lengths &lens) override;
  66. void ComputeMean(const std::vector<std::vector<size_t>> &shapes, size_t n, size_t server_num,
  67. size_t rank_id) override;
  68. void Reset() override;
  69. const size_t indice_size() const override;
  70. protected:
  71. size_t grads_offset_{0};
  72. size_t indices_offset_{0};
  73. };
  74. class MomentumOptimInfo : public DenseOptimInfo {
  75. public:
  76. MomentumOptimInfo(const AddressPtr &weight, const AddressPtr &accumulate, const AddressPtr &learning_rate,
  77. const AddressPtr &gradient, const AddressPtr &momentum);
  78. ~MomentumOptimInfo() override = default;
  79. void Update(const Values &values, const Lengths &lens) override;
  80. const AddressPtr &gradient();
  81. const AddressPtr &indices();
  82. size_t grad_index() override;
  83. };
  84. class SparseAdamOptimInfo : public SparseOptimInfo {
  85. public:
  86. SparseAdamOptimInfo(const AddressPtr &weight, const AddressPtr &m, const AddressPtr &v, const AddressPtr &beta1_power,
  87. const AddressPtr &beta2_power, const AddressPtr &learning_rate, const AddressPtr &beta1,
  88. const AddressPtr &beta2, const AddressPtr &epsilon, const AddressPtr &grad,
  89. const AddressPtr &indices);
  90. ~SparseAdamOptimInfo() override = default;
  91. void Update(const Values &values, const Lengths &lens) override;
  92. const AddressPtr &gradient();
  93. const AddressPtr &indices();
  94. bool IsSparse() const override;
  95. size_t grad_index() override;
  96. size_t indices_index() override;
  97. };
  98. class SparseFtrlOptimInfo : public SparseOptimInfo {
  99. public:
  100. SparseFtrlOptimInfo(const AddressPtr &weight, const AddressPtr &accum, const AddressPtr &linear,
  101. const AddressPtr &grad, const AddressPtr &indices);
  102. ~SparseFtrlOptimInfo() override = default;
  103. const AddressPtr &gradient();
  104. const AddressPtr &indices();
  105. bool IsSparse() const override;
  106. size_t grad_index() override;
  107. size_t indices_index() override;
  108. };
  109. } // namespace ps
  110. } // namespace mindspore
  111. #endif // MINDSPORE_CCSRC_PS_OPTIMIZER_INFO_H_