You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

costmodel.h 13 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304
  1. /**
  2. * Copyright 2019 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #ifndef MINDSPORE_CCSRC_PARALLEL_AUTO_PARALLEL_COSTMODEL_H_
  17. #define MINDSPORE_CCSRC_PARALLEL_AUTO_PARALLEL_COSTMODEL_H_
  18. #include <algorithm>
  19. #include <memory>
  20. #include <string>
  21. #include <utility>
  22. #include <vector>
  23. #include "parallel/strategy.h"
  24. #include "parallel/tensor_layout/tensor_info.h"
  25. namespace mindspore {
  26. namespace parallel {
  27. struct Decision;
  28. using OperatorName = std::string;
  29. using Attr = std::pair<std::string, ValuePtr>;
  30. using Param = std::pair<std::pair<std::string, ValuePtr>, int32_t>;
  31. using OperatorParams = std::vector<Param>;
  32. using OperatorAttrs = std::vector<Attr>;
  33. // OutPutInfo.fist: true if the operator's output is a tuple
  34. // OutPutInfo.second: elements number of the tuple output. Only meaningful if OutPutInfo.fist is true.
  35. using OutPutInfo = std::pair<bool, uint32_t>;
  36. using OutPutInfoVector = std::vector<OutPutInfo>;
  37. using OperatorArgs = std::pair<OperatorAttrs, OperatorParams>;
  38. using Operator = std::pair<OperatorName, OperatorArgs>;
  39. using OperatorVector = std::vector<Operator>;
  40. using RedistributionOpListPtr = std::shared_ptr<std::pair<OperatorVector, OutPutInfoVector>>;
  41. struct Cost {
  42. Cost();
  43. Cost(double computation, double commuication, const std::shared_ptr<Decision> &decision_ = nullptr)
  44. : computation_cost_(computation), communication_cost_(commuication), decision_ptr_(std::move(decision_)) {
  45. memory_with_reuse_ = 0.0;
  46. communication_without_parameter_ = 0.0;
  47. communication_with_partial_para_ = 0.0;
  48. communication_redis_forward_ = 0.0;
  49. communication_redis_backward_ = 0.0;
  50. }
  51. // 'memory_with_reuse_' calculates the peak memory usage in a training phase
  52. double memory_with_reuse_;
  53. // 'computation_cost_' models the training time of an iteration in a training phase
  54. double computation_cost_;
  55. // 'communication_cost_' includes communications from operators (forward and backward) and edges
  56. double communication_cost_;
  57. // communication_without_parameter_ = communication_cost_ - (backward communication from operators)
  58. double communication_without_parameter_;
  59. // communication_with_partial_para_ =
  60. // communication_without_parameter_ + COST_MODEL_GAMMA * (communication_cost_ - communication_without_parameter_ )
  61. double communication_with_partial_para_;
  62. double communication_redis_forward_;
  63. double communication_redis_backward_;
  64. std::shared_ptr<Decision> decision_ptr_;
  65. };
  66. using CostPtr = std::shared_ptr<Cost>;
  67. using CostPtrList = std::vector<std::shared_ptr<Cost>>;
  68. class StrategyWithCost {
  69. public:
  70. StrategyWithCost(StrategyPtr strategy, std::vector<TensorInfo> inputs_, std::vector<TensorInfo> outputs_)
  71. : strategy_ptr(std::move(strategy)), inputs_ptr(std::move(inputs_)), outputs_ptr(std::move(outputs_)) {}
  72. StrategyWithCost(const StrategyWithCost &swc) = delete;
  73. StrategyWithCost(StrategyWithCost &&swc)
  74. : strategy_ptr(swc.strategy_ptr),
  75. inputs_ptr(swc.inputs_ptr),
  76. outputs_ptr(swc.outputs_ptr),
  77. cost_list(swc.cost_list) {}
  78. ~StrategyWithCost() = default;
  79. StrategyPtr strategy_ptr;
  80. std::vector<TensorInfo> inputs_ptr;
  81. std::vector<TensorInfo> outputs_ptr;
  82. CostPtrList cost_list;
  83. };
  84. enum DecisionType {
  85. OP_ELIMINATION,
  86. EDGE_ELIMINATION,
  87. MERGE_ELIMINATION,
  88. CONTRACT_ELIMINATION,
  89. TRIANGLE_ELIMINATION,
  90. STAR_ELIMINATION,
  91. FINAL_TYPE,
  92. FINAL_SINGLE
  93. };
  94. struct Decision : public Base {
  95. ~Decision() override = default;
  96. DecisionType type_;
  97. };
  98. // 'OpEliminationDecision' is for the Operator Elimination in DP algorithm: u --> v --> w ==> u --> w.
  99. // This data structure records the strategy 'op_strategy_' for v, the edge cost 'left_cost_' for 'u --> v', the
  100. // operator cost 'middle_cost_' for v, and the edge cost 'right_cost_' for 'v --> w'
  101. struct OpEliminationDecision : public Decision {
  102. OpEliminationDecision(StrategyPtr op_stra, CostPtr l_cost, CostPtr m_cost, CostPtr r_cost)
  103. : op_strategy_(std::move(op_stra)),
  104. left_cost_(std::move(l_cost)),
  105. middle_cost_(std::move(m_cost)),
  106. right_cost_(std::move(r_cost)) {
  107. type_ = DecisionType::OP_ELIMINATION;
  108. }
  109. StrategyPtr op_strategy_;
  110. CostPtr left_cost_;
  111. CostPtr middle_cost_;
  112. CostPtr right_cost_;
  113. MS_DECLARE_PARENT(OpEliminationDecision, Decision);
  114. };
  115. /* 'EdgeEliminationDecision' is for the Edge Elimination in DP algorithm:
  116. ____
  117. / \
  118. u v ==> u --> v, which replace the multi-edges by a single edge.
  119. \____/
  120. This data structure records the cost list for all edges 'edges_cost_list_'
  121. */
  122. struct EdgeEliminationDecision : public Decision {
  123. explicit EdgeEliminationDecision(CostPtrList cost_list) : edges_cost_list_(std::move(cost_list)) {
  124. type_ = DecisionType::EDGE_ELIMINATION;
  125. }
  126. CostPtrList edges_cost_list_;
  127. MS_DECLARE_PARENT(EdgeEliminationDecision, Decision);
  128. };
  129. // 'MergeEliminationDecision' is for the Merge Elimination in DP algorithm:
  130. // w
  131. // |
  132. // | ==> u --> v
  133. // u --> v In the original graph, v has two alive incoming edges, w has one alive outgoing edge,
  134. // and w has zero alive incoming edges. After the Merge Elimination, the result graph contains only 'u -- >v'.
  135. // This data structure records the strategy 'merged_op_strategy_' for operator 'w',
  136. // the cost 'merged_op_cost_' for operator 'w', and the edge cost 'edge_cost_' for 'w --> v'.
  137. struct MergeEliminationDecision : public Decision {
  138. MergeEliminationDecision(StrategyPtr op_stra, CostPtr op_cost, CostPtr edge_c, StrategyPtr tar_op_stra,
  139. CostPtr target_op_c)
  140. : merged_op_strategy_(std::move(op_stra)),
  141. merged_op_cost_(std::move(op_cost)),
  142. edge_cost_(std::move(edge_c)),
  143. target_op_strategy_(std::move(tar_op_stra)),
  144. target_op_cost_(std::move(target_op_c)) {
  145. type_ = DecisionType::MERGE_ELIMINATION;
  146. }
  147. StrategyPtr merged_op_strategy_;
  148. CostPtr merged_op_cost_;
  149. CostPtr edge_cost_;
  150. StrategyPtr target_op_strategy_;
  151. CostPtr target_op_cost_;
  152. MS_DECLARE_PARENT(MergeEliminationDecision, Decision);
  153. };
  154. // 'ContractEliminationDecision' is for the Contract Elimination in DP algorithm:
  155. // u --> v
  156. // |
  157. // | ==> u --> w
  158. // w In the original graph, u has two alive outgoing edges, v has one alive incoming edge,
  159. // and v has zero outgoing edge. After the Contract Elimination, the result graph contains only 'u --> w'.
  160. // This data structure records the strategy 'contracted_op_strategy_' for operator 'v', the cost for
  161. // operator 'contracted_op_cost_', and the edge cost for 'edge_cost_'.
  162. struct ContractEliminationDecision : public Decision {
  163. ContractEliminationDecision(StrategyPtr contra_stra, CostPtr contra_op_cost, CostPtr edge_cost,
  164. StrategyPtr target_stra, CostPtr tar_cost)
  165. : contracted_op_strategy_(std::move(contra_stra)),
  166. contracted_op_cost_(std::move(contra_op_cost)),
  167. edge_cost_(std::move(edge_cost)),
  168. target_op_strategy_(std::move(target_stra)),
  169. target_cost_(std::move(tar_cost)) {
  170. type_ = DecisionType::CONTRACT_ELIMINATION;
  171. }
  172. StrategyPtr contracted_op_strategy_;
  173. CostPtr contracted_op_cost_;
  174. CostPtr edge_cost_;
  175. StrategyPtr target_op_strategy_;
  176. CostPtr target_cost_;
  177. MS_DECLARE_PARENT(ContractEliminationDecision, Decision);
  178. };
  179. /* 'TriangleEliminationDecision' is for the Triangle Elimination in DP algorithm:
  180. *
  181. * u
  182. * / \
  183. * / \
  184. * v --- w ==> v --- w In the original graph, u has 2 outgoing edges, v has 1 outgoing edge,
  185. * and w has 2 incoming edges, u can be eliminated into v.
  186. * 'eliminated_op_strategy_' is for u, 'eliminated_op_cost_' is for u, 'eliminated_left_edge_' is for edge u --> v,
  187. * 'eliminated_right_edge_' is for edge u --> w.
  188. */
  189. struct TriangleEliminationDecision : public Decision {
  190. TriangleEliminationDecision(StrategyPtr elimi_stra, CostPtr elimi_op_cost, CostPtr l_edge_cost, CostPtr r_edge_cost,
  191. StrategyPtr left_stra, CostPtr l_node_cost)
  192. : eliminated_op_strategy_(std::move(elimi_stra)),
  193. eliminated_op_cost_(std::move(elimi_op_cost)),
  194. left_edge_cost_(std::move(l_edge_cost)),
  195. right_edge_cost_(std::move(r_edge_cost)),
  196. left_node_strategy_(std::move(left_stra)),
  197. left_node_cost_(std::move(l_node_cost)) {
  198. type_ = DecisionType::TRIANGLE_ELIMINATION;
  199. }
  200. StrategyPtr eliminated_op_strategy_;
  201. CostPtr eliminated_op_cost_;
  202. CostPtr left_edge_cost_;
  203. CostPtr right_edge_cost_;
  204. StrategyPtr left_node_strategy_;
  205. CostPtr left_node_cost_;
  206. MS_DECLARE_PARENT(TriangleEliminationDecision, Decision);
  207. };
  208. /* 'StarEliminationDecision' is for the Star Elimination in DP algorithm:
  209. *
  210. * v <--- u ---> w ==> v w In the original graph, u has 0 incoming edges, and multiple outgoing edges.
  211. * In addition, v and w have other complicated connections, resulting in v and w can not be performed other
  212. * eliminations. After the StarElimination, u is merged into v, and the resulting graph is splitted into multiple
  213. * connected components.
  214. * NOTE: this elimination MUST be performed only when the above 5 operation cannot be applied.
  215. */
  216. struct StarEliminationDecision : public Decision {
  217. StarEliminationDecision(StrategyPtr elimi_op_stra, CostPtr elimi_op_cost, CostPtrList succ_edges_clist,
  218. std::vector<StrategyPtr> succ_ops_stra_list, CostPtrList succ_ops_clist)
  219. : eliminated_op_strategy_(std::move(elimi_op_stra)),
  220. eliminated_op_cost_(std::move(elimi_op_cost)),
  221. succ_edges_cost_list_(std::move(succ_edges_clist)),
  222. succ_ops_stra_list_(std::move(succ_ops_stra_list)),
  223. succ_ops_cost_list_(std::move(succ_ops_clist)) {
  224. type_ = DecisionType::STAR_ELIMINATION;
  225. }
  226. StrategyPtr eliminated_op_strategy_;
  227. CostPtr eliminated_op_cost_;
  228. CostPtrList succ_edges_cost_list_;
  229. std::vector<StrategyPtr> succ_ops_stra_list_;
  230. CostPtrList succ_ops_cost_list_;
  231. MS_DECLARE_PARENT(StarEliminationDecision, Decision);
  232. };
  233. // This data structure records the decision for the graph which contains two nodes: u --> v. This includes
  234. // the strategy 'u_strategy_' for 'u', the strategy 'v_strategy_' for 'v', the cost 'left_cost_' for 'u'.
  235. struct FinalDecision : public Decision {
  236. FinalDecision(StrategyPtr u_stra, StrategyPtr v_stra, CostPtr l_cost, CostPtr m_cost, CostPtr r_cost)
  237. : u_strategy_(std::move(u_stra)),
  238. v_strategy_(std::move(v_stra)),
  239. left_cost_(std::move(l_cost)),
  240. middle_cost_(std::move(m_cost)),
  241. right_cost_(std::move(r_cost)) {
  242. type_ = DecisionType::FINAL_TYPE;
  243. }
  244. StrategyPtr u_strategy_;
  245. StrategyPtr v_strategy_;
  246. CostPtr left_cost_;
  247. CostPtr middle_cost_;
  248. CostPtr right_cost_;
  249. MS_DECLARE_PARENT(FinalDecision, Decision);
  250. };
  251. // This data structure records the final decision for the graph containing a single node: u. This includes
  252. // the strategy 'u_strategy_' for 'u', the cost 'u_cost_' for 'u'.
  253. struct FinalSingleDecision : public Decision {
  254. FinalSingleDecision(StrategyPtr u_stra, CostPtr u_cost) : u_strategy_(std::move(u_stra)), u_cost_(std::move(u_cost)) {
  255. type_ = DecisionType::FINAL_SINGLE;
  256. }
  257. StrategyPtr u_strategy_;
  258. CostPtr u_cost_;
  259. MS_DECLARE_PARENT(FinalSingleDecision, Decision);
  260. };
  261. using DecisionPtr = std::shared_ptr<Decision>;
  262. using OpEliminationDecisionPtr = std::shared_ptr<OpEliminationDecision>;
  263. using EdgeEliminationDecisionPtr = std::shared_ptr<EdgeEliminationDecision>;
  264. using MergeEliminationDecisionPtr = std::shared_ptr<MergeEliminationDecision>;
  265. using ContractEliminationDecisionPtr = std::shared_ptr<ContractEliminationDecision>;
  266. using TriangleEliminationDecisionPtr = std::shared_ptr<TriangleEliminationDecision>;
  267. using StarEliminationDecisionPtr = std::shared_ptr<StarEliminationDecision>;
  268. using FinalDecisionPtr = std::shared_ptr<FinalDecision>;
  269. using FinalSingleDecisionPtr = std::shared_ptr<FinalSingleDecision>;
  270. void Simplify(CostPtrList *clist);
  271. void SimplifyForDreasingCommunicationWithPartialPara(CostPtrList *clist);
  272. void RefineForPracticalCost(const CostPtr &, bool is_redistribution);
  273. } // namespace parallel
  274. } // namespace mindspore
  275. #endif // MINDSPORE_CCSRC_PARALLEL_AUTO_PARALLEL_COSTMODEL_H_