You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

costmodel.h 13 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311
  1. /**
  2. * Copyright 2019 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #ifndef MINDSPORE_CCSRC_PARALLEL_AUTO_PARALLEL_COSTMODEL_H_
  17. #define MINDSPORE_CCSRC_PARALLEL_AUTO_PARALLEL_COSTMODEL_H_
  18. #include <algorithm>
  19. #include <memory>
  20. #include <string>
  21. #include <utility>
  22. #include <vector>
  23. #include "parallel/strategy.h"
  24. #include "parallel/tensor_layout/tensor_info.h"
  25. namespace mindspore {
  26. namespace parallel {
  27. struct Decision;
  28. using OperatorName = std::string;
  29. using Attr = std::pair<std::string, ValuePtr>;
  30. using Param = std::pair<std::pair<std::string, ValuePtr>, int32_t>;
  31. using OperatorParams = std::vector<Param>;
  32. using OperatorAttrs = std::vector<Attr>;
  33. // OutPutInfo.fist: true if the operator's output is a tuple
  34. // OutPutInfo.second: elements number of the tuple output. Only meaningful if OutPutInfo.fist is true.
  35. using OutPutInfo = std::pair<bool, uint32_t>;
  36. using OutPutInfoVector = std::vector<OutPutInfo>;
  37. using OperatorArgs = std::pair<OperatorAttrs, OperatorParams>;
  38. using Operator = std::pair<OperatorName, OperatorArgs>;
  39. using OperatorVector = std::vector<Operator>;
  40. using RedistributionOpListPtr = std::shared_ptr<std::pair<OperatorVector, OutPutInfoVector>>;
  41. struct Cost {
  42. Cost();
  43. Cost(double computation, double commuication, const std::shared_ptr<Decision> &decision_ = nullptr)
  44. : computation_cost_(computation), communication_cost_(commuication), decision_ptr_(std::move(decision_)) {
  45. memory_with_reuse_ = 0.0;
  46. communication_without_parameter_ = 0.0;
  47. communication_with_partial_para_ = 0.0;
  48. communication_redis_forward_ = 0.0;
  49. communication_redis_backward_ = 0.0;
  50. communication_forward_ = 0.0;
  51. }
  52. // 'memory_with_reuse_' calculates the peak memory usage in a training (or inference) phase
  53. double memory_with_reuse_;
  54. // 'computation_cost_' models the training time of an iteration in a training phase. Currently, this is calculated
  55. // by ONLY forward phase
  56. double computation_cost_;
  57. // 'communication_cost_' includes communications from operators (forward and backward) and edges (redistribution)
  58. double communication_cost_;
  59. // communication_without_parameter_ = communication_cost_ - (backward communication from operators)
  60. double communication_without_parameter_;
  61. // communication_with_partial_para_ =
  62. // communication_without_parameter_ + COST_MODEL_GAMMA * (communication_cost_ - communication_without_parameter_ )
  63. double communication_with_partial_para_;
  64. // communication_forward_ = communication cost from operators (only forward phase) and forward redistribution.
  65. double communication_forward_;
  66. double communication_redis_forward_;
  67. double communication_redis_backward_;
  68. std::shared_ptr<Decision> decision_ptr_;
  69. };
  70. using CostPtr = std::shared_ptr<Cost>;
  71. using CostPtrList = std::vector<std::shared_ptr<Cost>>;
  72. class StrategyWithCost {
  73. public:
  74. StrategyWithCost(StrategyPtr strategy, std::vector<TensorInfo> inputs_, std::vector<TensorInfo> outputs_)
  75. : strategy_ptr(std::move(strategy)), inputs_ptr(std::move(inputs_)), outputs_ptr(std::move(outputs_)) {}
  76. StrategyWithCost(const StrategyWithCost &swc) = delete;
  77. StrategyWithCost(StrategyWithCost &&swc)
  78. : strategy_ptr(swc.strategy_ptr),
  79. inputs_ptr(swc.inputs_ptr),
  80. outputs_ptr(swc.outputs_ptr),
  81. cost_list(swc.cost_list) {}
  82. ~StrategyWithCost() = default;
  83. StrategyPtr strategy_ptr;
  84. std::vector<TensorInfo> inputs_ptr;
  85. std::vector<TensorInfo> outputs_ptr;
  86. CostPtrList cost_list;
  87. };
  88. enum DecisionType {
  89. OP_ELIMINATION,
  90. EDGE_ELIMINATION,
  91. MERGE_ELIMINATION,
  92. CONTRACT_ELIMINATION,
  93. TRIANGLE_ELIMINATION,
  94. STAR_ELIMINATION,
  95. FINAL_TYPE,
  96. FINAL_SINGLE
  97. };
  98. struct Decision : public Base {
  99. ~Decision() override = default;
  100. DecisionType type_;
  101. };
  102. // 'OpEliminationDecision' is for the Operator Elimination in DP algorithm: u --> v --> w ==> u --> w.
  103. // This data structure records the strategy 'op_strategy_' for v, the edge cost 'left_cost_' for 'u --> v', the
  104. // operator cost 'middle_cost_' for v, and the edge cost 'right_cost_' for 'v --> w'
  105. struct OpEliminationDecision : public Decision {
  106. OpEliminationDecision(StrategyPtr op_stra, CostPtr l_cost, CostPtr m_cost, CostPtr r_cost)
  107. : op_strategy_(std::move(op_stra)),
  108. left_cost_(std::move(l_cost)),
  109. middle_cost_(std::move(m_cost)),
  110. right_cost_(std::move(r_cost)) {
  111. type_ = DecisionType::OP_ELIMINATION;
  112. }
  113. StrategyPtr op_strategy_;
  114. CostPtr left_cost_;
  115. CostPtr middle_cost_;
  116. CostPtr right_cost_;
  117. MS_DECLARE_PARENT(OpEliminationDecision, Decision);
  118. };
  119. /* 'EdgeEliminationDecision' is for the Edge Elimination in DP algorithm:
  120. ____
  121. / \
  122. u v ==> u --> v, which replace the multi-edges by a single edge.
  123. \____/
  124. This data structure records the cost list for all edges 'edges_cost_list_'
  125. */
  126. struct EdgeEliminationDecision : public Decision {
  127. explicit EdgeEliminationDecision(CostPtrList cost_list) : edges_cost_list_(std::move(cost_list)) {
  128. type_ = DecisionType::EDGE_ELIMINATION;
  129. }
  130. CostPtrList edges_cost_list_;
  131. MS_DECLARE_PARENT(EdgeEliminationDecision, Decision);
  132. };
  133. // 'MergeEliminationDecision' is for the Merge Elimination in DP algorithm:
  134. // w
  135. // |
  136. // | ==> u --> v
  137. // u --> v In the original graph, v has two alive incoming edges, w has one alive outgoing edge,
  138. // and w has zero alive incoming edges. After the Merge Elimination, the result graph contains only 'u -- >v'.
  139. // This data structure records the strategy 'merged_op_strategy_' for operator 'w',
  140. // the cost 'merged_op_cost_' for operator 'w', and the edge cost 'edge_cost_' for 'w --> v'.
  141. struct MergeEliminationDecision : public Decision {
  142. MergeEliminationDecision(StrategyPtr op_stra, CostPtr op_cost, CostPtr edge_c, StrategyPtr tar_op_stra,
  143. CostPtr target_op_c)
  144. : merged_op_strategy_(std::move(op_stra)),
  145. merged_op_cost_(std::move(op_cost)),
  146. edge_cost_(std::move(edge_c)),
  147. target_op_strategy_(std::move(tar_op_stra)),
  148. target_op_cost_(std::move(target_op_c)) {
  149. type_ = DecisionType::MERGE_ELIMINATION;
  150. }
  151. StrategyPtr merged_op_strategy_;
  152. CostPtr merged_op_cost_;
  153. CostPtr edge_cost_;
  154. StrategyPtr target_op_strategy_;
  155. CostPtr target_op_cost_;
  156. MS_DECLARE_PARENT(MergeEliminationDecision, Decision);
  157. };
  158. // 'ContractEliminationDecision' is for the Contract Elimination in DP algorithm:
  159. // u --> v
  160. // |
  161. // | ==> u --> w
  162. // w In the original graph, u has two alive outgoing edges, v has one alive incoming edge,
  163. // and v has zero outgoing edge. After the Contract Elimination, the result graph contains only 'u --> w'.
  164. // This data structure records the strategy 'contracted_op_strategy_' for operator 'v', the cost for
  165. // operator 'contracted_op_cost_', and the edge cost for 'edge_cost_'.
  166. struct ContractEliminationDecision : public Decision {
  167. ContractEliminationDecision(StrategyPtr contra_stra, CostPtr contra_op_cost, CostPtr edge_cost,
  168. StrategyPtr target_stra, CostPtr tar_cost)
  169. : contracted_op_strategy_(std::move(contra_stra)),
  170. contracted_op_cost_(std::move(contra_op_cost)),
  171. edge_cost_(std::move(edge_cost)),
  172. target_op_strategy_(std::move(target_stra)),
  173. target_cost_(std::move(tar_cost)) {
  174. type_ = DecisionType::CONTRACT_ELIMINATION;
  175. }
  176. StrategyPtr contracted_op_strategy_;
  177. CostPtr contracted_op_cost_;
  178. CostPtr edge_cost_;
  179. StrategyPtr target_op_strategy_;
  180. CostPtr target_cost_;
  181. MS_DECLARE_PARENT(ContractEliminationDecision, Decision);
  182. };
  183. /* 'TriangleEliminationDecision' is for the Triangle Elimination in DP algorithm:
  184. *
  185. * u
  186. * / \
  187. * / \
  188. * v --- w ==> v --- w In the original graph, u has 2 outgoing edges, v has 1 outgoing edge,
  189. * and w has 2 incoming edges, u can be eliminated into v.
  190. * 'eliminated_op_strategy_' is for u, 'eliminated_op_cost_' is for u, 'eliminated_left_edge_' is for edge u --> v,
  191. * 'eliminated_right_edge_' is for edge u --> w.
  192. */
  193. struct TriangleEliminationDecision : public Decision {
  194. TriangleEliminationDecision(StrategyPtr elimi_stra, CostPtr elimi_op_cost, CostPtr l_edge_cost, CostPtr r_edge_cost,
  195. StrategyPtr left_stra, CostPtr l_node_cost, StrategyPtr right_stra)
  196. : eliminated_op_strategy_(std::move(elimi_stra)),
  197. eliminated_op_cost_(std::move(elimi_op_cost)),
  198. left_edge_cost_(std::move(l_edge_cost)),
  199. right_edge_cost_(std::move(r_edge_cost)),
  200. left_node_strategy_(std::move(left_stra)),
  201. left_node_cost_(std::move(l_node_cost)),
  202. right_node_strategy_(std::move(right_stra)) {
  203. type_ = DecisionType::TRIANGLE_ELIMINATION;
  204. }
  205. StrategyPtr eliminated_op_strategy_;
  206. CostPtr eliminated_op_cost_;
  207. CostPtr left_edge_cost_;
  208. CostPtr right_edge_cost_;
  209. StrategyPtr left_node_strategy_;
  210. CostPtr left_node_cost_;
  211. StrategyPtr right_node_strategy_;
  212. MS_DECLARE_PARENT(TriangleEliminationDecision, Decision);
  213. };
  214. /* 'StarEliminationDecision' is for the Star Elimination in DP algorithm:
  215. *
  216. * v <--- u ---> w ==> v w In the original graph, u has 0 incoming edges, and multiple outgoing edges.
  217. * In addition, v and w have other complicated connections, resulting in v and w can not be performed other
  218. * eliminations. After the StarElimination, u is merged into v, and the resulting graph is splitted into multiple
  219. * connected components.
  220. * NOTE: this elimination MUST be performed only when the above 5 operation cannot be applied.
  221. */
  222. struct StarEliminationDecision : public Decision {
  223. StarEliminationDecision(StrategyPtr elimi_op_stra, CostPtr elimi_op_cost, CostPtrList succ_edges_clist,
  224. std::vector<StrategyPtr> succ_ops_stra_list, CostPtrList succ_ops_clist)
  225. : eliminated_op_strategy_(std::move(elimi_op_stra)),
  226. eliminated_op_cost_(std::move(elimi_op_cost)),
  227. succ_edges_cost_list_(std::move(succ_edges_clist)),
  228. succ_ops_stra_list_(std::move(succ_ops_stra_list)),
  229. succ_ops_cost_list_(std::move(succ_ops_clist)) {
  230. type_ = DecisionType::STAR_ELIMINATION;
  231. }
  232. StrategyPtr eliminated_op_strategy_;
  233. CostPtr eliminated_op_cost_;
  234. CostPtrList succ_edges_cost_list_;
  235. std::vector<StrategyPtr> succ_ops_stra_list_;
  236. CostPtrList succ_ops_cost_list_;
  237. MS_DECLARE_PARENT(StarEliminationDecision, Decision);
  238. };
  239. // This data structure records the decision for the graph which contains two nodes: u --> v. This includes
  240. // the strategy 'u_strategy_' for 'u', the strategy 'v_strategy_' for 'v', the cost 'left_cost_' for 'u'.
  241. struct FinalDecision : public Decision {
  242. FinalDecision(StrategyPtr u_stra, StrategyPtr v_stra, CostPtr l_cost, CostPtr m_cost, CostPtr r_cost)
  243. : u_strategy_(std::move(u_stra)),
  244. v_strategy_(std::move(v_stra)),
  245. left_cost_(std::move(l_cost)),
  246. middle_cost_(std::move(m_cost)),
  247. right_cost_(std::move(r_cost)) {
  248. type_ = DecisionType::FINAL_TYPE;
  249. }
  250. StrategyPtr u_strategy_;
  251. StrategyPtr v_strategy_;
  252. CostPtr left_cost_;
  253. CostPtr middle_cost_;
  254. CostPtr right_cost_;
  255. MS_DECLARE_PARENT(FinalDecision, Decision);
  256. };
  257. // This data structure records the final decision for the graph containing a single node: u. This includes
  258. // the strategy 'u_strategy_' for 'u', the cost 'u_cost_' for 'u'.
  259. struct FinalSingleDecision : public Decision {
  260. FinalSingleDecision(StrategyPtr u_stra, CostPtr u_cost) : u_strategy_(std::move(u_stra)), u_cost_(std::move(u_cost)) {
  261. type_ = DecisionType::FINAL_SINGLE;
  262. }
  263. StrategyPtr u_strategy_;
  264. CostPtr u_cost_;
  265. MS_DECLARE_PARENT(FinalSingleDecision, Decision);
  266. };
  267. using DecisionPtr = std::shared_ptr<Decision>;
  268. using OpEliminationDecisionPtr = std::shared_ptr<OpEliminationDecision>;
  269. using EdgeEliminationDecisionPtr = std::shared_ptr<EdgeEliminationDecision>;
  270. using MergeEliminationDecisionPtr = std::shared_ptr<MergeEliminationDecision>;
  271. using ContractEliminationDecisionPtr = std::shared_ptr<ContractEliminationDecision>;
  272. using TriangleEliminationDecisionPtr = std::shared_ptr<TriangleEliminationDecision>;
  273. using StarEliminationDecisionPtr = std::shared_ptr<StarEliminationDecision>;
  274. using FinalDecisionPtr = std::shared_ptr<FinalDecision>;
  275. using FinalSingleDecisionPtr = std::shared_ptr<FinalSingleDecision>;
  276. void Simplify(CostPtrList *clist);
  277. void SimplifyForDecreasingCommunicationForward(CostPtrList *clist);
  278. void SimplifyForDecreasingCommunicationWithPartialPara(CostPtrList *clist);
  279. void RefineForPracticalCost(const CostPtr &, bool is_redistribution);
  280. } // namespace parallel
  281. } // namespace mindspore
  282. #endif // MINDSPORE_CCSRC_PARALLEL_AUTO_PARALLEL_COSTMODEL_H_