You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

costmodel.h 15 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350
  1. /**
  2. * Copyright 2019 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #ifndef MINDSPORE_CCSRC_FRONTEND_PARALLEL_AUTO_PARALLEL_COSTMODEL_H_
  17. #define MINDSPORE_CCSRC_FRONTEND_PARALLEL_AUTO_PARALLEL_COSTMODEL_H_
  18. #include <algorithm>
  19. #include <memory>
  20. #include <string>
  21. #include <utility>
  22. #include <vector>
  23. #include "frontend/parallel/strategy.h"
  24. #include "frontend/parallel/tensor_layout/tensor_info.h"
  25. #include "frontend/parallel/costmodel_context.h"
  26. namespace mindspore {
  27. namespace parallel {
  28. struct Decision;
  29. using OperatorName = std::string;
  30. using Attr = std::pair<std::string, ValuePtr>;
  31. using Param = std::pair<std::pair<std::string, ValuePtr>, int64_t>;
  32. using OperatorParams = std::vector<Param>;
  33. using OperatorAttrs = std::vector<Attr>;
  34. // OutPutInfo.fist: true if the operator's output is a tuple
  35. // OutPutInfo.second: elements number of the tuple output. Only meaningful if OutPutInfo.fist is true.
  36. using OutPutInfo = std::pair<bool, uint64_t>;
  37. using OutPutInfoVector = std::vector<OutPutInfo>;
  38. using OperatorArgs = std::pair<OperatorAttrs, OperatorParams>;
  39. using Operator = std::pair<OperatorName, OperatorArgs>;
  40. using OperatorVector = std::vector<Operator>;
  41. using RedistributionOpListPtr = std::shared_ptr<std::pair<OperatorVector, OutPutInfoVector>>;
  42. struct Cost {
  43. Cost();
  44. Cost(double computation, double communication, const std::shared_ptr<Decision> &decision_ = nullptr)
  45. : computation_cost_(computation), communication_cost_(communication), decision_ptr_(std::move(decision_)) {
  46. memory_with_reuse_ = 0.0;
  47. communication_without_parameter_ = 0.0;
  48. communication_with_partial_para_ = 0.0;
  49. communication_redis_forward_ = 0.0;
  50. communication_redis_backward_ = 0.0;
  51. communication_forward_ = 0.0;
  52. }
  53. // 'memory_with_reuse_' calculates the peak memory usage in a training (or inference) phase
  54. double memory_with_reuse_;
  55. // 'computation_cost_' models the training time of an iteration in a training phase. Currently, this is calculated
  56. // by ONLY forward phase
  57. double computation_cost_;
  58. // 'communication_cost_' includes communications from operators (forward and backward) and edges (redistribution)
  59. double communication_cost_;
  60. // communication_without_parameter_ = communication_cost_ - (backward communication from operators)
  61. double communication_without_parameter_;
  62. // communication_with_partial_para_ =
  63. // communication_without_parameter_ + COST_MODEL_GAMMA * (communication_cost_ - communication_without_parameter_ )
  64. double communication_with_partial_para_;
  65. // communication_forward_ = communication cost from operators (only forward phase) and forward redistribution.
  66. double communication_forward_;
  67. double communication_redis_forward_;
  68. double communication_redis_backward_;
  69. std::shared_ptr<Decision> decision_ptr_;
  70. };
  71. using CostPtr = std::shared_ptr<Cost>;
  72. using CostPtrList = std::vector<std::shared_ptr<Cost>>;
  73. class StrategyWithCost {
  74. public:
  75. StrategyWithCost(StrategyPtr strategy, std::vector<TensorInfo> inputs_, std::vector<TensorInfo> outputs_)
  76. : strategy_ptr(std::move(strategy)), inputs_ptr(std::move(inputs_)), outputs_ptr(std::move(outputs_)) {}
  77. StrategyWithCost(StrategyPtr strategy, CostPtrList c_list)
  78. : strategy_ptr(std::move(strategy)), cost_list(std::move(c_list)) {}
  79. StrategyWithCost(const StrategyWithCost &swc) = delete;
  80. StrategyWithCost(StrategyWithCost &&swc)
  81. : strategy_ptr(swc.strategy_ptr),
  82. inputs_ptr(swc.inputs_ptr),
  83. outputs_ptr(swc.outputs_ptr),
  84. cost_list(swc.cost_list) {}
  85. ~StrategyWithCost() = default;
  86. StrategyPtr strategy_ptr;
  87. std::vector<TensorInfo> inputs_ptr;
  88. std::vector<TensorInfo> outputs_ptr;
  89. CostPtrList cost_list;
  90. };
  91. enum DecisionType {
  92. OP_ELIMINATION,
  93. EDGE_ELIMINATION,
  94. MERGE_ELIMINATION,
  95. CONTRACT_ELIMINATION,
  96. SOURCE_ELIMINATION,
  97. TRIANGLE_ELIMINATION,
  98. STAR_ELIMINATION,
  99. FINAL_TYPE,
  100. FINAL_SINGLE
  101. };
  102. struct Decision : public Base {
  103. ~Decision() override = default;
  104. DecisionType type_;
  105. };
  106. // 'OpEliminationDecision' is for the Operator Elimination in DP algorithm: u --> v --> w ==> u --> w.
  107. // This data structure records the strategy 'op_strategy_' for v, the edge cost 'left_cost_' for 'u --> v', the
  108. // operator cost 'middle_cost_' for v, and the edge cost 'right_cost_' for 'v --> w'
  109. struct OpEliminationDecision : public Decision {
  110. OpEliminationDecision(StrategyPtr op_stra, CostPtr l_cost, CostPtr m_cost, CostPtr r_cost)
  111. : op_strategy_(std::move(op_stra)),
  112. left_cost_(std::move(l_cost)),
  113. middle_cost_(std::move(m_cost)),
  114. right_cost_(std::move(r_cost)) {
  115. type_ = DecisionType::OP_ELIMINATION;
  116. }
  117. StrategyPtr op_strategy_;
  118. CostPtr left_cost_;
  119. CostPtr middle_cost_;
  120. CostPtr right_cost_;
  121. MS_DECLARE_PARENT(OpEliminationDecision, Decision);
  122. };
  123. /* 'EdgeEliminationDecision' is for the Edge Elimination in DP algorithm:
  124. ____
  125. / \
  126. u v ==> u --> v, which replace the multi-edges by a single edge.
  127. \____/
  128. This data structure records the cost list for all edges 'edges_cost_list_'
  129. */
  130. struct EdgeEliminationDecision : public Decision {
  131. explicit EdgeEliminationDecision(CostPtrList cost_list) : edges_cost_list_(std::move(cost_list)) {
  132. type_ = DecisionType::EDGE_ELIMINATION;
  133. }
  134. CostPtrList edges_cost_list_;
  135. MS_DECLARE_PARENT(EdgeEliminationDecision, Decision);
  136. };
  137. // 'MergeEliminationDecision' is for the Merge Elimination in DP algorithm:
  138. // w
  139. // |
  140. // | ==> u --> v
  141. // u --> v In the original graph, v has two alive incoming edges, w has one alive outgoing edge,
  142. // and w has zero alive incoming edges. After the Merge Elimination, the result graph contains only 'u -- >v'.
  143. // This data structure records the strategy 'merged_op_strategy_' for operator 'w',
  144. // the cost 'merged_op_cost_' for operator 'w', and the edge cost 'edge_cost_' for 'w --> v'.
  145. struct MergeEliminationDecision : public Decision {
  146. MergeEliminationDecision(StrategyPtr op_stra, CostPtr op_cost, CostPtr edge_c, StrategyPtr tar_op_stra,
  147. CostPtr target_op_c)
  148. : merged_op_strategy_(std::move(op_stra)),
  149. merged_op_cost_(std::move(op_cost)),
  150. edge_cost_(std::move(edge_c)),
  151. target_op_strategy_(std::move(tar_op_stra)),
  152. target_op_cost_(std::move(target_op_c)) {
  153. type_ = DecisionType::MERGE_ELIMINATION;
  154. }
  155. StrategyPtr merged_op_strategy_;
  156. CostPtr merged_op_cost_;
  157. CostPtr edge_cost_;
  158. StrategyPtr target_op_strategy_;
  159. CostPtr target_op_cost_;
  160. MS_DECLARE_PARENT(MergeEliminationDecision, Decision);
  161. };
  162. // 'ContractEliminationDecision' is for the Contract Elimination in DP algorithm:
  163. // u --> v
  164. // |
  165. // | ==> u --> w
  166. // w In the original graph, u has two alive outgoing edges, v has one alive incoming edge,
  167. // and v has zero outgoing edge. After the Contract Elimination, the result graph contains only 'u --> w'.
  168. // This data structure records the strategy 'contracted_op_strategy_' for operator 'v', the cost for
  169. // operator 'contracted_op_cost_', and the edge cost for 'edge_cost_'.
  170. struct ContractEliminationDecision : public Decision {
  171. ContractEliminationDecision(StrategyPtr contra_stra, CostPtr contra_op_cost, CostPtr edge_cost,
  172. StrategyPtr target_stra, CostPtr tar_cost)
  173. : contracted_op_strategy_(std::move(contra_stra)),
  174. contracted_op_cost_(std::move(contra_op_cost)),
  175. edge_cost_(std::move(edge_cost)),
  176. target_op_strategy_(std::move(target_stra)),
  177. target_cost_(std::move(tar_cost)) {
  178. type_ = DecisionType::CONTRACT_ELIMINATION;
  179. }
  180. StrategyPtr contracted_op_strategy_;
  181. CostPtr contracted_op_cost_;
  182. CostPtr edge_cost_;
  183. StrategyPtr target_op_strategy_;
  184. CostPtr target_cost_;
  185. MS_DECLARE_PARENT(ContractEliminationDecision, Decision);
  186. };
  187. /* 'SourceEliminationDecision' is for the source Elimination in DP algorithm:
  188. * 1 1,5
  189. * / \ // \\
  190. * / \ // \\
  191. * / \ // \\
  192. * / \ // \\
  193. * 2 <- 5 -> 3 ==> 2 3
  194. * \ / \ /
  195. * \ / \ /
  196. * \ / \ /
  197. * 4 4
  198. *
  199. * In the original graph, '1' has two alive outgoing edges and no incoming edges. '5' has two alive outgoing edges and
  200. * no incoming edges. '4' has two alive incoming edges and no outgoing edges. Source Elimination will merge '5' into
  201. * '1' new edges are generated to replace the old ones incident to '1' and '5'.
  202. *
  203. */
  204. struct SourceEliminationDecision : public Decision {
  205. SourceEliminationDecision(StrategyPtr op1_stra, CostPtr op1_c, StrategyPtr op2_stra, CostPtr op2_c)
  206. : op1_strategy_(std::move(op1_stra)),
  207. op1_cost_(std::move(op1_c)),
  208. op2_strategy_(std::move(op2_stra)),
  209. op2_cost_(std::move(op2_c)) {
  210. type_ = DecisionType::SOURCE_ELIMINATION;
  211. }
  212. StrategyPtr op1_strategy_;
  213. CostPtr op1_cost_;
  214. StrategyPtr op2_strategy_;
  215. CostPtr op2_cost_;
  216. MS_DECLARE_PARENT(SourceEliminationDecision, Decision);
  217. };
  218. /* 'TriangleEliminationDecision' is for the Triangle Elimination in DP algorithm:
  219. *
  220. * u
  221. * / \
  222. * / \
  223. * v --- w ==> v --- w In the original graph, u has 2 outgoing edges, v has 1 outgoing edge,
  224. * and w has 2 incoming edges, u can be eliminated into v.
  225. * 'eliminated_op_strategy_' is for u, 'eliminated_op_cost_' is for u, 'eliminated_left_edge_' is for edge u --> v,
  226. * 'eliminated_right_edge_' is for edge u --> w.
  227. */
  228. struct TriangleEliminationDecision : public Decision {
  229. TriangleEliminationDecision(StrategyPtr elimi_stra, CostPtr elimi_op_cost, CostPtr l_edge_cost, CostPtr r_edge_cost,
  230. StrategyPtr left_stra, CostPtr l_node_cost, StrategyPtr right_stra, CostPtr r_node_cost)
  231. : eliminated_op_strategy_(std::move(elimi_stra)),
  232. eliminated_op_cost_(std::move(elimi_op_cost)),
  233. left_edge_cost_(std::move(l_edge_cost)),
  234. right_edge_cost_(std::move(r_edge_cost)),
  235. left_node_strategy_(std::move(left_stra)),
  236. left_node_cost_(std::move(l_node_cost)),
  237. right_node_strategy_(std::move(right_stra)),
  238. right_node_cost_(std::move(r_node_cost)) {
  239. type_ = DecisionType::TRIANGLE_ELIMINATION;
  240. }
  241. StrategyPtr eliminated_op_strategy_;
  242. CostPtr eliminated_op_cost_;
  243. CostPtr left_edge_cost_;
  244. CostPtr right_edge_cost_;
  245. StrategyPtr left_node_strategy_;
  246. CostPtr left_node_cost_;
  247. StrategyPtr right_node_strategy_;
  248. CostPtr right_node_cost_;
  249. MS_DECLARE_PARENT(TriangleEliminationDecision, Decision);
  250. };
  251. /* 'StarEliminationDecision' is for the Star Elimination in DP algorithm:
  252. *
  253. * v <--- u ---> w ==> v w In the original graph, u has 0 incoming edges, and multiple outgoing edges.
  254. * In addition, v and w have other complicated connections, resulting in v and w can not be performed other
  255. * eliminations. After the StarElimination, u is merged into v, and the resulting graph is split into multiple
  256. * connected components.
  257. * NOTE: this elimination MUST be performed only when the above 5 operation cannot be applied.
  258. */
  259. struct StarEliminationDecision : public Decision {
  260. StarEliminationDecision(StrategyPtr elimi_op_stra, CostPtr elimi_op_cost, CostPtrList succ_edges_clist,
  261. std::vector<StrategyPtr> succ_ops_stra_list, CostPtrList succ_ops_clist)
  262. : eliminated_op_strategy_(std::move(elimi_op_stra)),
  263. eliminated_op_cost_(std::move(elimi_op_cost)),
  264. succ_edges_cost_list_(std::move(succ_edges_clist)),
  265. succ_ops_stra_list_(std::move(succ_ops_stra_list)),
  266. succ_ops_cost_list_(std::move(succ_ops_clist)) {
  267. type_ = DecisionType::STAR_ELIMINATION;
  268. }
  269. StrategyPtr eliminated_op_strategy_;
  270. CostPtr eliminated_op_cost_;
  271. CostPtrList succ_edges_cost_list_;
  272. std::vector<StrategyPtr> succ_ops_stra_list_;
  273. CostPtrList succ_ops_cost_list_;
  274. MS_DECLARE_PARENT(StarEliminationDecision, Decision);
  275. };
  276. // This data structure records the decision for the graph which contains two nodes: u --> v. This includes
  277. // the strategy 'u_strategy_' for 'u', the strategy 'v_strategy_' for 'v', the cost 'left_cost_' for 'u'.
  278. struct FinalDecision : public Decision {
  279. FinalDecision(StrategyPtr u_stra, StrategyPtr v_stra, CostPtr l_cost, CostPtr m_cost, CostPtr r_cost)
  280. : u_strategy_(std::move(u_stra)),
  281. v_strategy_(std::move(v_stra)),
  282. left_cost_(std::move(l_cost)),
  283. middle_cost_(std::move(m_cost)),
  284. right_cost_(std::move(r_cost)) {
  285. type_ = DecisionType::FINAL_TYPE;
  286. }
  287. StrategyPtr u_strategy_;
  288. StrategyPtr v_strategy_;
  289. CostPtr left_cost_;
  290. CostPtr middle_cost_;
  291. CostPtr right_cost_;
  292. MS_DECLARE_PARENT(FinalDecision, Decision);
  293. };
  294. // This data structure records the final decision for the graph containing a single node: u. This includes
  295. // the strategy 'u_strategy_' for 'u', the cost 'u_cost_' for 'u'.
  296. struct FinalSingleDecision : public Decision {
  297. FinalSingleDecision(StrategyPtr u_stra, CostPtr u_cost) : u_strategy_(std::move(u_stra)), u_cost_(std::move(u_cost)) {
  298. type_ = DecisionType::FINAL_SINGLE;
  299. }
  300. StrategyPtr u_strategy_;
  301. CostPtr u_cost_;
  302. MS_DECLARE_PARENT(FinalSingleDecision, Decision);
  303. };
  304. using DecisionPtr = std::shared_ptr<Decision>;
  305. using OpEliminationDecisionPtr = std::shared_ptr<OpEliminationDecision>;
  306. using EdgeEliminationDecisionPtr = std::shared_ptr<EdgeEliminationDecision>;
  307. using MergeEliminationDecisionPtr = std::shared_ptr<MergeEliminationDecision>;
  308. using ContractEliminationDecisionPtr = std::shared_ptr<ContractEliminationDecision>;
  309. using SourceEliminationDecisionPtr = std::shared_ptr<SourceEliminationDecision>;
  310. using TriangleEliminationDecisionPtr = std::shared_ptr<TriangleEliminationDecision>;
  311. using StarEliminationDecisionPtr = std::shared_ptr<StarEliminationDecision>;
  312. using FinalDecisionPtr = std::shared_ptr<FinalDecision>;
  313. using FinalSingleDecisionPtr = std::shared_ptr<FinalSingleDecision>;
  314. void Simplify(CostPtrList *clist);
  315. void SimplifyForDecreasingCommunicationForward(CostPtrList *clist);
  316. void SimplifyForDecreasingCommunicationWithPartialPara(CostPtrList *clist);
  317. void RefineForPracticalCost(const CostPtr &, bool is_redistribution);
  318. } // namespace parallel
  319. } // namespace mindspore
  320. #endif // MINDSPORE_CCSRC_FRONTEND_PARALLEL_AUTO_PARALLEL_COSTMODEL_H_