You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

graph_costmodel.h 11 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220
  1. /**
  2. * Copyright 2019 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #ifndef MINDSPORE_CCSRC_PARALLEL_AUTO_PARALLEL_GRAPH_COSTMODEL_H_
  17. #define MINDSPORE_CCSRC_PARALLEL_AUTO_PARALLEL_GRAPH_COSTMODEL_H_
  18. #include <map>
  19. #include <memory>
  20. #include <string>
  21. #include <utility>
  22. #include <vector>
  23. #include "../../common.h"
  24. #include "common/utils.h"
  25. #include "parallel/auto_parallel/edge_costmodel.h"
  26. #include "parallel/costmodel_context.h"
  27. #include "parallel/ops_info/operator_info.h"
  28. #include "parallel/ops_info/tmp_identity_info.h"
  29. namespace mindspore {
  30. namespace parallel {
  31. #define OPERATOR_TO_OPERATOR_CONNECTOR "-"
  32. #define DEFAULT_DEVICE_MEMORY_CAPACITY (1024.0 * 1024.0 * 1024.0 * 16.0)
  33. #define DEFAULT_COST_MODEL_ALPHA 1.0
  34. #define DEFAULT_COST_MODEL_BETA 400.0
  35. #define DEFAULT_COST_MODEL_GAMMA 0.001
  36. #define DEFAULT_COST_MODEL_SIMPLIFY_CALCULATION true
  37. #define DEFAULT_COST_MODEL_COMMUNI_THRESHOLD 2048.0
  38. #define DEFAULT_COST_MODEL_COMMUNI_CONST 3072.0
  39. #define DEFAULT_COST_MODEL_COMMUNI_BIAS 1024.0
  40. #define DEFAULT_TENSOR_SLICE_ALIGNMENT_ENABLE false
  41. #define DEFAULT_TENSOR_SLICE_ALIGNMENT_SIZE 16
  42. #define DEFAULT_FULLY_USE_DEVICES true
  43. #define DEFAULT_ELEMENTWISE_OP_STRA_FOLLOW false
  44. class CostGraph;
  45. using CostGraphPtr = std::shared_ptr<CostGraph>;
  46. extern CostGraphPtr entire_costgraph;
  47. extern size_t TOTAL_OPS;
  48. extern double COST_MODEL_GAMMA;
  49. extern bool COST_MODEL_SIMPLIFY_CALCULATION;
  50. extern double DEVICE_MEMORY_CAPACITY;
  51. extern double COST_MODEL_COMMUNI_THRESHOLD;
  52. extern double COST_MODEL_COMMUNI_CONST;
  53. extern double COST_MODEL_COMMUNI_BIAS;
  54. extern bool TENSOR_SLICE_ALIGNMENT_ENABLE;
  55. extern size_t TENSOR_SLICE_ALIGNMENT_SIZE;
  56. extern bool FULLY_USE_DEVICES;
  57. extern bool ELEMENTWISE_OP_STRA_FOLLOW;
  58. class CostGraph {
  59. // 'CostGraph' consists of Operators and edges between them. An edge is created between two Operators if they have
  60. // output-input dependency relationship.
  61. public:
  62. CostGraph() {
  63. dev_memory_ = DEFAULT_DEVICE_MEMORY_CAPACITY;
  64. costmodel_alpha_ = DEFAULT_COST_MODEL_ALPHA;
  65. costmodel_beta_ = DEFAULT_COST_MODEL_BETA;
  66. }
  67. ~CostGraph() = default;
  68. void AddOperator(const OperatorInfoPtr& op) { ops_.push_back(op); }
  69. OperatorInfoPtr FindOperatorByIndex(size_t index) {
  70. if (index >= ops_.size()) {
  71. MS_LOG(ERROR) << "The index: " << index << " is out of the range of ops_: " << ops_.size() << ".";
  72. return nullptr;
  73. }
  74. return ops_[index];
  75. }
  76. void RemoveOperator(const OperatorInfoPtr& op);
  77. bool IsOperatorInCostGraph(const OperatorInfoPtr& op);
  78. // the edge is in the form: u --> v
  79. void AddEdge(OperatorInfoPtr u_node, OperatorInfoPtr v_node, const EdgePtr& edge) {
  80. std::vector<EdgePtr> curr_edges(edges_[{u_node, v_node}]);
  81. curr_edges.push_back(edge);
  82. edges_[{u_node, v_node}] = curr_edges;
  83. }
  84. // An edge is uniquely identified by its name, and its output index and input index.
  85. bool IsEdgeInCostGraph(const std::string&, size_t, size_t);
  86. void SetDeviceMemoryAndCostParameter();
  87. std::vector<std::shared_ptr<CostGraph>> ConstructConnectedComponents(std::vector<OperatorInfoPtr>);
  88. void DFS(const OperatorInfoPtr& current_op, std::map<OperatorInfoPtr, bool>* visited,
  89. const std::shared_ptr<CostGraph>& component);
  90. CostPtrList CreateFinalCostList(const OperatorInfoPtr& u, const EdgePtr& e, const OperatorInfoPtr& v);
  91. CostPtrList CreateFinalSingleCostList(const OperatorInfoPtr& u);
  92. CostPtr SelectCostWithMemoryConstraint(const CostPtrList& cost_list, double memory);
  93. CostPtr SelectCostWithMinTrainingTime(const CostPtrList& cost_list, double memory);
  94. CostPtrList SelectCostListWithMinTrainingTimeMultiple(const std::vector<CostPtrList>& all_costlist, double memory);
  95. Status SearchStrategyForMultiNodeFinalGraph(const std::vector<OperatorInfoPtr>&);
  96. std::vector<std::shared_ptr<Edge>> GetOriginalEdgeBetweenOperators(OperatorInfoPtr u_node, OperatorInfoPtr v_node) {
  97. return edges_[{u_node, v_node}];
  98. }
  99. double GetDeviceMemory() const { return dev_memory_; }
  100. // Search the cost_list in the final graph, and determine the optimal one
  101. Status SearchStrategy();
  102. // Given a graph which contains the following subgraph: u --> v --> w, the node v can be eliminated
  103. OperatorInfoPtr CheckOpElimination() const;
  104. // Given a graph which contains the following subgraph where there are multiple edges between u and v, these edges
  105. // can be eliminated into one
  106. std::vector<EdgePtr> CheckEdgeElimination() const;
  107. // Given a graph which contains the following subgraph:
  108. // u
  109. // |
  110. // w --- v --- x
  111. // where u has 0 incoming edge, u has 1 outgoing edge, and v has > 1 incoming edges, u can be merged into v.
  112. // u is returned.
  113. OperatorInfoPtr CheckMergeElimination() const;
  114. // Given a graph which contains the following subgraph:
  115. // u
  116. // |
  117. // v --- x
  118. // where v has 2 outgoing edges, and u has 1 incoming edges and no outgoing edges. In this case, u can be contracted
  119. // into v. u is returned.
  120. OperatorInfoPtr CheckContractElimination() const;
  121. /* Given a graph which contains the following subgraph:
  122. * u
  123. * / \
  124. * / \
  125. * v --- w
  126. * where u has 2 outgoing edges, v has 1 outgoing edge, and w has 2 incoming edges, u can be eliminated into v.
  127. * The returned value includes u and the edge <u, <v, w>>.
  128. */
  129. std::pair<OperatorInfoPtr, EdgePtr> CheckTriangleElimination() const;
  130. /* Given a graph which contains the following subgraph:
  131. * v <--- u ---> w
  132. * where u has 0 incoming edges, and multiple outgoing edges. In addition, v and w have other complicated connections,
  133. * resulting in v and w can not be performed ContractElimination. u is returned.
  134. * NOTE: this elimination MUST be performed only when the above 5 operation cannot be applied.
  135. */
  136. OperatorInfoPtr CheckStarElimination() const;
  137. // Applying Operator Elimination in DP algorithm
  138. EdgePtr EliminationOp(const OperatorInfoPtr& op);
  139. // Applying Edge Elimination in DP algorithm
  140. EdgePtr EliminationEdges(const std::vector<EdgePtr>& edges);
  141. // Applying Merge Elimination in DP algorithm
  142. OperatorInfoPtr EliminationMerge(const OperatorInfoPtr& op);
  143. void CreateMergeEliminationSubCostList(StrategyPtr op_strategy, const CostPtrList& op_cost_list,
  144. const CostPtrList& edge_cost_list, StrategyPtr tar_op_strategy,
  145. const CostPtrList& tar_cost_list, CostPtrList* tar_cost_list_new);
  146. // Applying Contract Elimination in DP algorithm
  147. OperatorInfoPtr EliminationContract(const OperatorInfoPtr& op);
  148. void CreateContractEliminationSubCostList(StrategyPtr, const CostPtrList&, const CostPtrList&, StrategyPtr,
  149. const CostPtrList&, CostPtrList*);
  150. // Applying Triangle Elimination in DP algorithm. return the left_node
  151. OperatorInfoPtr EliminationTriangle(const OperatorInfoPtr& elimi_op, const EdgePtr& edge_left_right);
  152. void CreateTriangleEliminationCostList(const OperatorInfoPtr&, const CostPtrList&, const CostPtrList&,
  153. const StrategyPtr&, const StrategyPtr&, const StrategyPtr&, const CostPtrList&,
  154. const CostPtrList&, const CostPtrList&, CostPtrList*);
  155. // Given the relevant costlist, create the TriangleElimination cost
  156. void CreateTriangleEliminationSubCostList(StrategyPtr, StrategyPtr, StrategyPtr, const CostPtr&, const CostPtrList&,
  157. const CostPtrList&, const CostPtr&, const CostPtrList&, CostPtrList*);
  158. // Applying the Star Elimination in DP algorithm. Return the successive edges of this merged_op
  159. // NOTE: this elimination MUST be performed only when the above 5 operation cannot be applied.
  160. std::vector<EdgePtr> EliminationStar(const OperatorInfoPtr& op);
  161. void CreateStarEliminationCostList(std::vector<EdgePtr>&, const StrategyPtr&, const CostPtrList&, const CostPtrList&,
  162. const StrategyPtr&, const CostPtrList&, CostPtrList*);
  163. void CreateStarEliminationSubCostList(const StrategyPtr&, const CostPtrList&, const CostPtrList&, const StrategyPtr&,
  164. const CostPtrList&, std::vector<StrategyPtr>, CostPtrList&, CostPtrList&,
  165. CostPtrList*);
  166. // When the input of a operator is neither a WEIGHT, nor a output of a subsequent operator involving WEIGHT, then
  167. // the memory cost can be resused.
  168. Status CalculateOpsMemoryCost();
  169. // When the input of the edge is neither a WEIGHT, nor a output of a subsequent operator involving WEIGHT, then
  170. // the memory cost can be resused.
  171. Status CalculateEdgesMemoryCost();
  172. Status ComputeOpsAndEdgesParameterInvolved();
  173. std::vector<OperatorInfoPtr> GetOperators() const { return ops_; }
  174. size_t GetNumPairs() const { return edges_.size(); }
  175. Status InitSelectedStrategy();
  176. OperatorInfoPtr FindTmpIdentityByParameterName(std::string&) const;
  177. // When TmpIdentity is used by mulitple operators, the corresponding parameter's memory cost should be calculated only
  178. // once (instead of multiple times), this method is used to correct this.
  179. Status CorrectOpsMemoryCost();
  180. // Needed by rec_parser
  181. void add_inputs_tensor_name(const std::vector<std::string>& inputs_tensor_name) {
  182. inputs_tensor_name_list_.push_back(inputs_tensor_name);
  183. }
  184. const std::vector<std::vector<std::string>> get_inputs_tensor_name_list() const { return inputs_tensor_name_list_; }
  185. void add_tuple_getitem(const std::pair<std::string, std::string>& tuple_getitem) {
  186. auto ret = tuple_getitem_list_.insert(tuple_getitem);
  187. if (ret.second == false) {
  188. MS_LOG(EXCEPTION) << "The insert item is already exist.";
  189. }
  190. }
  191. const std::map<std::string, std::string> get_tuple_getitem_list() const { return tuple_getitem_list_; }
  192. private:
  193. // Needed by rec_parser
  194. std::vector<std::vector<std::string>> inputs_tensor_name_list_;
  195. std::map<std::string, std::string> tuple_getitem_list_;
  196. double dev_memory_;
  197. double costmodel_alpha_;
  198. double costmodel_beta_;
  199. std::vector<OperatorInfoPtr> ops_;
  200. std::map<std::pair<OperatorInfoPtr, OperatorInfoPtr>, std::vector<EdgePtr>> edges_;
  201. std::vector<std::shared_ptr<CostGraph>> connected_compoents_;
  202. };
  203. } // namespace parallel
  204. } // namespace mindspore
  205. #endif // MINDSPORE_CCSRC_PARALLEL_AUTO_PARALLEL_GRAPH_COSTMODEL_H_