You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

kernel_graph.h 11 kB

5 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223
  1. /**
  2. * Copyright 2019 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #ifndef MINDSPORE_CCSRC_SESSION_KERNEL_GRAPH_H
  17. #define MINDSPORE_CCSRC_SESSION_KERNEL_GRAPH_H
  18. #include <vector>
  19. #include <unordered_map>
  20. #include <memory>
  21. #include <utility>
  22. #include <string>
  23. #include <queue>
  24. #include <map>
  25. #include <set>
  26. #include <unordered_set>
  27. #include "ir/func_graph.h"
  28. #include "ir/anf.h"
  29. #include "utils/graph_utils.h"
  30. #include "utils/contract.h"
  31. #include "device/kernel_info.h"
  32. namespace mindspore {
  33. namespace session {
  34. using AnfWithOutIndex = std::pair<AnfNodePtr, size_t>;
  35. class KernelGraph : public FuncGraph {
  36. public:
  37. KernelGraph() : graph_id_(0), start_label_(nullptr), end_goto_(nullptr), null_output_(false) {
  38. inputs_ = std::make_shared<std::vector<AnfNodePtr>>();
  39. execution_order_ = {};
  40. executable_ = true;
  41. summary_node_exist_ = false;
  42. stream_distinction_label_ = kInvalidDistincLabel;
  43. }
  44. ~KernelGraph() override;
  45. MS_DECLARE_PARENT(KernelGraph, FuncGraph);
  46. const std::vector<AnfNodePtr> &inputs() const;
  47. std::vector<AnfNodePtr> *MutableInputs() const { return inputs_.get(); }
  48. std::vector<AnfNodePtr> outputs() const;
  49. CNodePtr NewCNode(const std::vector<AnfNodePtr> &inputs) override;
  50. void CreateKernelInfoFromNewParameter(const CNodePtr &cnode);
  51. CNodePtr NewCNode(const CNodePtr &cnode);
  52. ParameterPtr NewParameter(const ParameterPtr &parameter = nullptr);
  53. ValueNodePtr NewValueNode(const ValueNodePtr &value_node = nullptr);
  54. std::vector<AnfNodePtr> SplitTupleValueNodeToNodeList(const ValueNodePtr &value_node);
  55. void set_execution_order(const std::vector<CNodePtr> &order) { execution_order_ = order; }
  56. const std::vector<CNodePtr> &execution_order() const { return execution_order_; }
  57. void SetExecOrderByDefault();
  58. uint32_t graph_id() const { return graph_id_; }
  59. void set_graph_id(uint32_t graph_id) { graph_id_ = graph_id; }
  60. // and a new front to backend anf relation to maop
  61. void FrontBackendlMapAdd(const AnfNodePtr &front_anf, const AnfNodePtr &backend_anf);
  62. // replace old backend anf with new backend anf
  63. void FrontBackendlMapUpdate(const AnfNodePtr &old_backend_anf, const AnfNodePtr &new_backend_anf);
  64. // get backend anf by front anf
  65. AnfNodePtr GetBackendAnfByFrontAnf(const AnfNodePtr &front_anf);
  66. // check backend node whether exist in map
  67. bool BackendNodeExistInFrontBackendMap(const AnfNodePtr &backend_anf);
  68. // get value node by tensor
  69. ValueNodePtr GetValueNodeByTensor(const tensor::TensorPtr &tensor);
  70. // add value node tensor relation map
  71. void TensorValueNodeMapAdd(const tensor::TensorPtr &tensor, const ValueNodePtr &value_node);
  72. // get all value nodes of graph
  73. const std::unordered_set<ValueNodePtr> graph_value_nodes() const { return graph_value_nodes_; }
  74. // add value node to graph
  75. void AddValueNodeToGraph(const ValueNodePtr &value_node);
  76. // ref output is in map
  77. bool IsInRefOutputMap(const AnfWithOutIndex &pair) const;
  78. // get ref correspond pairs
  79. AnfWithOutIndex GetRefCorrespondOutput(const AnfWithOutIndex &out_pair) const;
  80. // add ref correspond pairs
  81. void AddRefCorrespondPairs(const AnfWithOutIndex &final_pair, const AnfWithOutIndex &origin_pair);
  82. // get map
  83. std::map<AnfWithOutIndex, AnfWithOutIndex> GetRefMap() const { return ref_out_in_map_; }
  84. // checkout whether loop exist in graph
  85. void CheckLoop();
  86. // check whether graph is executable
  87. bool executable() const { return executable_; }
  88. // set executable of graph
  89. void set_executable(bool executable) { executable_ = executable; }
  90. // set summary_node of graph
  91. void set_summary_node_exist(bool summary_node_exist) { summary_node_exist_ = summary_node_exist; }
  92. // check whether exist summary node in graph
  93. bool summary_node_exist() const { return summary_node_exist_; }
  94. // set invalid inputs for control sink
  95. std::vector<bool> *MutableValidInputs() { return &valid_inputs_; }
  96. std::vector<bool> valid_inputs() const { return valid_inputs_; }
  97. // replace node in graph
  98. void ReplaceNode(NotNull<AnfNodePtr> old_anf_node, NotNull<AnfNodePtr> new_anf_node);
  99. // set stream label of graph
  100. void set_stream_distinction_label(uint32_t stream_label) { stream_distinction_label_ = stream_label; }
  101. // get stream label of graph
  102. uint32_t stream_distinction_label() { return stream_distinction_label_; }
  103. // refresh execute kernel stream label
  104. void UpdateExecuteKernelStreamLabel();
  105. // calculate the leaf graph order of root graph
  106. std::vector<std::shared_ptr<KernelGraph>> GetLeafGraphOrder();
  107. // the child graph of current graph
  108. const std::vector<std::shared_ptr<KernelGraph>> &child_graph_order() const { return child_graph_order_; }
  109. void set_child_graph_order(const std::vector<std::shared_ptr<KernelGraph>> &order) { child_graph_order_ = order; }
  110. // checkout whether current graph is leaf graph
  111. bool IsLeafGraph() const;
  112. // set input_tensors pointer of control parameter
  113. void set_input_ctrl_tensors(const std::shared_ptr<std::vector<tensor::TensorPtr>> &input_tensors_ptr) {
  114. input_ctrl_tensors_ = input_tensors_ptr;
  115. }
  116. // get input_tensors pointer of control parameter
  117. std::shared_ptr<std::vector<tensor::TensorPtr>> input_ctrl_tensors() const { return input_ctrl_tensors_; }
  118. // get parent kernel graph
  119. std::shared_ptr<KernelGraph> parent_graph() const { return parent_graph_; }
  120. // set parent kernel graph
  121. void set_parent_graph(const std::shared_ptr<KernelGraph> &parent_graph) { parent_graph_ = parent_graph; }
  122. // find anf node in graph
  123. std::vector<CNodePtr> FindNodeByPrimitive(const PrimitivePtr &primitive) const;
  124. // get real inputs
  125. const std::vector<std::pair<AnfNodePtr, std::vector<AnfNodePtr>>> &real_inputs() const { return real_inputs_; }
  126. void SetRealInput(const AnfNodePtr &parameter, const AnfNodePtr &arg);
  127. // mark unreused args
  128. void AddUnreuseArgs(const AnfNodePtr &arg, const std::shared_ptr<KernelGraph> &from_graph);
  129. const std::map<AnfNodePtr, std::shared_ptr<KernelGraph>> &unreuse_args() const { return unreuse_args_; }
  130. // used to dump ir
  131. std::string ToString() const override;
  132. // update the real input if the node is a call
  133. void UpdateCallRealInput();
  134. void set_start_label(const CNodePtr &start_label) { start_label_ = start_label; }
  135. CNodePtr get_start_label() { return start_label_; }
  136. void set_end_goto(const CNodePtr &end_goto) { end_goto_ = end_goto; }
  137. CNodePtr get_end_goto() { return end_goto_; }
  138. bool get_output_null() { return null_output_; }
  139. void set_output_null(bool is_output_null) { null_output_ = is_output_null; }
  140. void PrintGraphExecuteOrder() const;
  141. const std::map<std::string, std::pair<AnfNodePtr, int>> &summary_nodes() const { return summary_nodes_; }
  142. void set_summary_nodes(const std::map<std::string, std::pair<AnfNodePtr, int>> &nodes) { summary_nodes_ = nodes; }
  143. void AddInternalOutput(const AnfNodePtr &front_node, const AnfNodePtr &node);
  144. void ReplaceInternalOutput(const AnfNodePtr &node, const AnfNodePtr &new_node);
  145. AnfNodePtr GetInternalOutputByFrontNode(const AnfNodePtr &front_node) const;
  146. bool IsInternalOutput(const AnfNodePtr &node) const;
  147. AnfNodePtr GetFrontNodeByInternalOutput(const AnfNodePtr &node) const;
  148. void AddFinalOutputKernel(const AnfNodePtr &node);
  149. bool IsFinalOutputKernel(const AnfNodePtr &node) const;
  150. private:
  151. // remove value node form graph
  152. bool RemoveValueNodeFromGraph(const ValueNodePtr &value_node);
  153. void VisitNodeDescendants(const AnfNodePtr &node, std::queue<AnfNodePtr> *visit_queue,
  154. std::unordered_set<AnfNodePtr> *visited_nodes);
  155. // update node edge list
  156. void UpdateNodeEdgeList(std::queue<AnfNodePtr> *seed_nodes);
  157. // add node depend edge by data edge or control depend
  158. void AddDependEdge(const AnfNodePtr &node, const AnfNodePtr &input, size_t depend_edge_num);
  159. // handle control depend
  160. std::vector<AnfNodePtr> GetOutputNodes(const AnfNodePtr &node);
  161. bool HandleControlDependNode(const AnfNodePtr &node, std::queue<AnfNodePtr> *que,
  162. std::unordered_set<AnfNodePtr> *visited_nodes);
  163. void UpdateControlDependRelations(const std::vector<AnfNodePtr> &depends);
  164. std::shared_ptr<std::vector<AnfNodePtr>> inputs_;
  165. std::vector<CNodePtr> execution_order_;
  166. uint32_t graph_id_;
  167. uint32_t stream_distinction_label_;
  168. // record map bettween front anf and backend anf,use two map implement bidirectional map
  169. std::unordered_map<AnfNodePtr, AnfNodePtr> front_backend_anf_map_;
  170. std::unordered_map<AnfNodePtr, AnfNodePtr> backend_front_anf_map_;
  171. // there may be a tensor from ME backend ,a value ndoe will be create according the tensor,map record
  172. std::unordered_map<tensor::TensorPtr, ValueNodePtr> tensor_to_value_node_map_;
  173. // include all value nodes
  174. std::unordered_set<ValueNodePtr> graph_value_nodes_;
  175. std::unordered_map<AnfNodePtr, size_t> node_input_num_;
  176. std::unordered_map<AnfNodePtr, std::vector<std::pair<AnfNodePtr, size_t>>> node_input_edges_;
  177. // record map between ref final output anf with index and ref origin input with index
  178. std::map<AnfWithOutIndex, AnfWithOutIndex> ref_out_in_map_;
  179. std::unordered_map<AnfNodePtr, std::vector<std::pair<AnfNodePtr, size_t>>> node_output_edges_;
  180. std::map<std::string, std::pair<AnfNodePtr, int>> summary_nodes_;
  181. // graph needn't execute
  182. bool executable_;
  183. // exist summary node in graph
  184. bool summary_node_exist_;
  185. // valid inputs
  186. std::vector<bool> valid_inputs_;
  187. // new members for control sink process
  188. // all child grahs refers to partial node
  189. std::map<AnfNodePtr, std::shared_ptr<KernelGraph>> node_to_child_graphs_;
  190. // child graph execute order in root graph
  191. std::vector<std::shared_ptr<KernelGraph>> child_graph_order_;
  192. // input_tensors of control parameter
  193. std::shared_ptr<std::vector<tensor::TensorPtr>> input_ctrl_tensors_;
  194. // parameter graph
  195. std::shared_ptr<KernelGraph> parent_graph_;
  196. // record real parameters,inputs_ is the formal parameters
  197. std::vector<std::pair<AnfNodePtr, std::vector<AnfNodePtr>>> real_inputs_;
  198. std::map<AnfNodePtr, std::shared_ptr<KernelGraph>> unreuse_args_;
  199. CNodePtr start_label_;
  200. CNodePtr end_goto_;
  201. bool null_output_;
  202. std::unordered_map<AnfNodePtr, AnfNodePtr> front_to_internal_outputs_map_;
  203. std::unordered_map<AnfNodePtr, AnfNodePtr> internal_outputs_to_front_map_;
  204. std::set<AnfNodePtr> final_output_kernels_;
  205. };
  206. } // namespace session
  207. using KernelGraphPtr = std::shared_ptr<session::KernelGraph>;
  208. } // namespace mindspore
  209. #endif // MINDSPORE_CCSRC_SESSION_KERNEL_GRAPH_H