Merge pull request !7510 from kisnwang/optimize-find-input-need-lock-tensortags/v1.1.0
| @@ -187,6 +187,8 @@ GraphId AscendSession::CompileGraphImpl(NotNull<FuncGraphPtr> func_graph) { | |||||
| MemoryAlloc(root_graph.get()); | MemoryAlloc(root_graph.get()); | ||||
| // generate and load task into device | // generate and load task into device | ||||
| Load(root_graph); | Load(root_graph); | ||||
| root_graph->SetInputNodes(); | |||||
| root_graph->SetOptimizerFlag(); | |||||
| DumpAllGraphs(all_graphs); | DumpAllGraphs(all_graphs); | ||||
| // return the root_graph id to backend | // return the root_graph id to backend | ||||
| auto graph_id = root_graph->graph_id(); | auto graph_id = root_graph->graph_id(); | ||||
| @@ -271,7 +271,7 @@ void Executor::RunGraphAsync(const SessionPtr &session, const GraphId &graph_id, | |||||
| task->session_ = session; | task->session_ = session; | ||||
| task->graph_id_ = graph_id; | task->graph_id_ = graph_id; | ||||
| task->input_tensors_ = inputs; | task->input_tensors_ = inputs; | ||||
| task->input_need_lock_tensors_ = session->GetNeedLockInputTensors(graph_id, inputs); | |||||
| task->input_need_lock_tensors_ = session->GetInputNeedLockTensors(graph_id, inputs); | |||||
| for (auto &tensor : inputs) { | for (auto &tensor : inputs) { | ||||
| if (tensor->NeedWait()) { | if (tensor->NeedWait()) { | ||||
| if (tensor->IsGraphOutput()) { | if (tensor->IsGraphOutput()) { | ||||
| @@ -177,11 +177,7 @@ void GPUSession::LoadInputData(const std::shared_ptr<KernelGraph> &kernel_graph, | |||||
| const std::vector<tensor::TensorPtr> &inputs_const) const { | const std::vector<tensor::TensorPtr> &inputs_const) const { | ||||
| std::vector<tensor::TensorPtr> inputs(inputs_const); | std::vector<tensor::TensorPtr> inputs(inputs_const); | ||||
| MS_EXCEPTION_IF_NULL(kernel_graph); | MS_EXCEPTION_IF_NULL(kernel_graph); | ||||
| std::vector<AnfNodePtr> input_nodes; | |||||
| for (const auto &input_node : kernel_graph->inputs()) { | |||||
| auto params = AnfAlgo::GetAllOutput(input_node); | |||||
| std::copy(params.begin(), params.end(), std::back_inserter(input_nodes)); | |||||
| } | |||||
| auto &input_nodes = kernel_graph->input_nodes(); | |||||
| auto ms_context = MsContext::GetInstance(); | auto ms_context = MsContext::GetInstance(); | ||||
| MS_EXCEPTION_IF_NULL(ms_context); | MS_EXCEPTION_IF_NULL(ms_context); | ||||
| if (inputs.size() != input_nodes.size()) { | if (inputs.size() != input_nodes.size()) { | ||||
| @@ -1212,6 +1212,25 @@ void KernelGraph::UpdateGraphDynamicAttr() { | |||||
| is_dynamic_shape_ = false; | is_dynamic_shape_ = false; | ||||
| } | } | ||||
| void KernelGraph::SetInputNodes() { | |||||
| input_nodes_.clear(); | |||||
| for (const auto &input_node : inputs()) { | |||||
| auto params = AnfAlgo::GetAllOutput(input_node); | |||||
| std::copy(params.begin(), params.end(), std::back_inserter(input_nodes_)); | |||||
| } | |||||
| } | |||||
| void KernelGraph::SetOptimizerFlag() { | |||||
| has_optimizer_ = false; | |||||
| for (const auto &cnode : execution_order_) { | |||||
| MS_EXCEPTION_IF_NULL(cnode); | |||||
| if (kOptOperatorSet.find(AnfAlgo::GetCNodeName(cnode)) != kOptOperatorSet.end()) { | |||||
| has_optimizer_ = true; | |||||
| return; | |||||
| } | |||||
| } | |||||
| } | |||||
| std::string KernelGraph::ToString() const { return std::string("kernel_graph_").append(std::to_string(graph_id_)); } | std::string KernelGraph::ToString() const { return std::string("kernel_graph_").append(std::to_string(graph_id_)); } | ||||
| KernelGraph::~KernelGraph() { | KernelGraph::~KernelGraph() { | ||||
| @@ -185,6 +185,10 @@ class KernelGraph : public FuncGraph { | |||||
| void UpdateGraphDynamicAttr(); | void UpdateGraphDynamicAttr(); | ||||
| bool is_dynamic_shape() const { return is_dynamic_shape_; } | bool is_dynamic_shape() const { return is_dynamic_shape_; } | ||||
| void SetOptimizerFlag(); | |||||
| void SetInputNodes(); | |||||
| const std::vector<AnfNodePtr> &input_nodes() const { return input_nodes_; } | |||||
| bool has_optimizer() const { return has_optimizer_; } | |||||
| private: | private: | ||||
| // remove value node form graph | // remove value node form graph | ||||
| @@ -234,9 +238,9 @@ class KernelGraph : public FuncGraph { | |||||
| std::unordered_map<AnfNodePtr, std::vector<std::pair<AnfNodePtr, size_t>>> node_output_edges_; | std::unordered_map<AnfNodePtr, std::vector<std::pair<AnfNodePtr, size_t>>> node_output_edges_; | ||||
| std::map<std::string, std::pair<AnfNodePtr, int>> summary_nodes_; | std::map<std::string, std::pair<AnfNodePtr, int>> summary_nodes_; | ||||
| // graph needn't execute | // graph needn't execute | ||||
| bool executable_; | |||||
| bool executable_{false}; | |||||
| // exist summary node in graph | // exist summary node in graph | ||||
| bool summary_node_exist_; | |||||
| bool summary_node_exist_{false}; | |||||
| // valid inputs | // valid inputs | ||||
| std::vector<bool> valid_inputs_; | std::vector<bool> valid_inputs_; | ||||
| @@ -251,7 +255,7 @@ class KernelGraph : public FuncGraph { | |||||
| CNodePtr start_label_; | CNodePtr start_label_; | ||||
| CNodePtr end_goto_; | CNodePtr end_goto_; | ||||
| bool null_output_; | |||||
| bool null_output_{false}; | |||||
| std::unordered_map<AnfNodePtr, AnfNodePtr> front_to_internal_outputs_map_; | std::unordered_map<AnfNodePtr, AnfNodePtr> front_to_internal_outputs_map_; | ||||
| std::unordered_map<AnfNodePtr, std::unordered_map<int, std::pair<AnfNodePtr, bool>>> internal_outputs_to_front_map_; | std::unordered_map<AnfNodePtr, std::unordered_map<int, std::pair<AnfNodePtr, bool>>> internal_outputs_to_front_map_; | ||||
| std::unordered_map<AnfNodePtr, std::unordered_map<int, tensor::TensorPtr>> internal_outputs_tensor_map_; | std::unordered_map<AnfNodePtr, std::unordered_map<int, tensor::TensorPtr>> internal_outputs_tensor_map_; | ||||
| @@ -260,7 +264,9 @@ class KernelGraph : public FuncGraph { | |||||
| std::set<AnfNodePtr> visited_nodes_; | std::set<AnfNodePtr> visited_nodes_; | ||||
| std::map<AnfNodePtr, AnfNodePtr> edge_to_; | std::map<AnfNodePtr, AnfNodePtr> edge_to_; | ||||
| std::stack<AnfNodePtr> loop_nodes_; | std::stack<AnfNodePtr> loop_nodes_; | ||||
| bool is_dynamic_shape_; | |||||
| std::vector<AnfNodePtr> input_nodes_; | |||||
| bool has_optimizer_{false}; | |||||
| bool is_dynamic_shape_{false}; | |||||
| }; | }; | ||||
| } // namespace session | } // namespace session | ||||
| using KernelGraphPtr = std::shared_ptr<session::KernelGraph>; | using KernelGraphPtr = std::shared_ptr<session::KernelGraph>; | ||||
| @@ -852,6 +852,8 @@ KernelGraphPtr SessionBasic::ConstructKernelGraph(const AnfNodePtrList &lst, con | |||||
| graph->set_summary_node_exist(true); | graph->set_summary_node_exist(true); | ||||
| } | } | ||||
| opt::BackendCommonOptimization(graph); | opt::BackendCommonOptimization(graph); | ||||
| graph->SetInputNodes(); | |||||
| graph->SetOptimizerFlag(); | |||||
| return graph; | return graph; | ||||
| } | } | ||||
| @@ -971,11 +973,8 @@ void SessionBasic::LoadInputData(const std::shared_ptr<KernelGraph> &kernel_grap | |||||
| if (kernel_graph->input_ctrl_tensors()) { | if (kernel_graph->input_ctrl_tensors()) { | ||||
| input_ctrl_size = LoadCtrlInputTensor(kernel_graph, &inputs); | input_ctrl_size = LoadCtrlInputTensor(kernel_graph, &inputs); | ||||
| } | } | ||||
| std::vector<AnfNodePtr> input_nodes; | |||||
| for (const auto &input_node : kernel_graph->inputs()) { | |||||
| auto params = AnfAlgo::GetAllOutput(input_node); | |||||
| std::copy(params.begin(), params.end(), std::back_inserter(input_nodes)); | |||||
| } | |||||
| auto &input_nodes = kernel_graph->input_nodes(); | |||||
| if ((inputs.size() + input_ctrl_size) - 3 != input_nodes.size()) { | if ((inputs.size() + input_ctrl_size) - 3 != input_nodes.size()) { | ||||
| MS_LOG(EXCEPTION) << "Tensor input:" << inputs.size() << " is not equal graph inputs:" << input_nodes.size() | MS_LOG(EXCEPTION) << "Tensor input:" << inputs.size() << " is not equal graph inputs:" << input_nodes.size() | ||||
| << ", input_ctrl_size:" << input_ctrl_size; | << ", input_ctrl_size:" << input_ctrl_size; | ||||
| @@ -1026,19 +1025,11 @@ void SessionBasic::UpdateOutputs(const std::shared_ptr<KernelGraph> &kernel_grap | |||||
| } | } | ||||
| } | } | ||||
| std::vector<tensor::TensorPtr> SessionBasic::GetNeedLockInputTensors(const GraphId &graph_id, | |||||
| std::vector<tensor::TensorPtr> SessionBasic::GetInputNeedLockTensors(const GraphId &graph_id, | |||||
| const std::vector<tensor::TensorPtr> &inputs) { | const std::vector<tensor::TensorPtr> &inputs) { | ||||
| auto graph = GetGraph(graph_id); | auto graph = GetGraph(graph_id); | ||||
| MS_EXCEPTION_IF_NULL(graph); | MS_EXCEPTION_IF_NULL(graph); | ||||
| bool has_optimizer = false; | |||||
| for (const auto &cnode : graph->execution_order()) { | |||||
| MS_EXCEPTION_IF_NULL(cnode); | |||||
| if (kOptOperatorSet.find(AnfAlgo::GetCNodeName(cnode)) != kOptOperatorSet.end()) { | |||||
| has_optimizer = true; | |||||
| break; | |||||
| } | |||||
| } | |||||
| if (!has_optimizer) { | |||||
| if (!graph->has_optimizer()) { | |||||
| return {}; | return {}; | ||||
| } | } | ||||
| std::vector<tensor::TensorPtr> result; | std::vector<tensor::TensorPtr> result; | ||||
| @@ -1339,6 +1330,7 @@ std::shared_ptr<KernelGraph> SessionBasic::ConstructSingleOpGraph(const OpRunInf | |||||
| graph->set_execution_order(exe_order); | graph->set_execution_order(exe_order); | ||||
| // set output | // set output | ||||
| CreateOutputNode(cnode, graph); | CreateOutputNode(cnode, graph); | ||||
| graph->SetInputNodes(); | |||||
| return graph; | return graph; | ||||
| } | } | ||||
| @@ -98,7 +98,7 @@ class SessionBasic : public std::enable_shared_from_this<SessionBasic> { | |||||
| return true; | return true; | ||||
| } | } | ||||
| virtual void GetModelInputsInfo(uint32_t graph_id, std::vector<tensor::TensorPtr> *inputs) const {} | virtual void GetModelInputsInfo(uint32_t graph_id, std::vector<tensor::TensorPtr> *inputs) const {} | ||||
| std::vector<tensor::TensorPtr> GetNeedLockInputTensors(const GraphId &graph_id, | |||||
| std::vector<tensor::TensorPtr> GetInputNeedLockTensors(const GraphId &graph_id, | |||||
| const std::vector<tensor::TensorPtr> &inputs); | const std::vector<tensor::TensorPtr> &inputs); | ||||
| #ifdef ENABLE_DEBUGGER | #ifdef ENABLE_DEBUGGER | ||||
| // set debugger | // set debugger | ||||