Browse Source

!7510 optimize-find-input-need-lock-tensor

Merge pull request !7510 from kisnwang/optimize-find-input-need-lock-tensor
tags/v1.1.0
mindspore-ci-bot Gitee 5 years ago
parent
commit
b6c88d8f85
7 changed files with 41 additions and 26 deletions
  1. +2
    -0
      mindspore/ccsrc/backend/session/ascend_session.cc
  2. +1
    -1
      mindspore/ccsrc/backend/session/executor.cc
  3. +1
    -5
      mindspore/ccsrc/backend/session/gpu_session.cc
  4. +19
    -0
      mindspore/ccsrc/backend/session/kernel_graph.cc
  5. +10
    -4
      mindspore/ccsrc/backend/session/kernel_graph.h
  6. +7
    -15
      mindspore/ccsrc/backend/session/session_basic.cc
  7. +1
    -1
      mindspore/ccsrc/backend/session/session_basic.h

+ 2
- 0
mindspore/ccsrc/backend/session/ascend_session.cc View File

@@ -187,6 +187,8 @@ GraphId AscendSession::CompileGraphImpl(NotNull<FuncGraphPtr> func_graph) {
MemoryAlloc(root_graph.get()); MemoryAlloc(root_graph.get());
// generate and load task into device // generate and load task into device
Load(root_graph); Load(root_graph);
root_graph->SetInputNodes();
root_graph->SetOptimizerFlag();
DumpAllGraphs(all_graphs); DumpAllGraphs(all_graphs);
// return the root_graph id to backend // return the root_graph id to backend
auto graph_id = root_graph->graph_id(); auto graph_id = root_graph->graph_id();


+ 1
- 1
mindspore/ccsrc/backend/session/executor.cc View File

@@ -271,7 +271,7 @@ void Executor::RunGraphAsync(const SessionPtr &session, const GraphId &graph_id,
task->session_ = session; task->session_ = session;
task->graph_id_ = graph_id; task->graph_id_ = graph_id;
task->input_tensors_ = inputs; task->input_tensors_ = inputs;
task->input_need_lock_tensors_ = session->GetNeedLockInputTensors(graph_id, inputs);
task->input_need_lock_tensors_ = session->GetInputNeedLockTensors(graph_id, inputs);
for (auto &tensor : inputs) { for (auto &tensor : inputs) {
if (tensor->NeedWait()) { if (tensor->NeedWait()) {
if (tensor->IsGraphOutput()) { if (tensor->IsGraphOutput()) {


+ 1
- 5
mindspore/ccsrc/backend/session/gpu_session.cc View File

@@ -177,11 +177,7 @@ void GPUSession::LoadInputData(const std::shared_ptr<KernelGraph> &kernel_graph,
const std::vector<tensor::TensorPtr> &inputs_const) const { const std::vector<tensor::TensorPtr> &inputs_const) const {
std::vector<tensor::TensorPtr> inputs(inputs_const); std::vector<tensor::TensorPtr> inputs(inputs_const);
MS_EXCEPTION_IF_NULL(kernel_graph); MS_EXCEPTION_IF_NULL(kernel_graph);
std::vector<AnfNodePtr> input_nodes;
for (const auto &input_node : kernel_graph->inputs()) {
auto params = AnfAlgo::GetAllOutput(input_node);
std::copy(params.begin(), params.end(), std::back_inserter(input_nodes));
}
auto &input_nodes = kernel_graph->input_nodes();
auto ms_context = MsContext::GetInstance(); auto ms_context = MsContext::GetInstance();
MS_EXCEPTION_IF_NULL(ms_context); MS_EXCEPTION_IF_NULL(ms_context);
if (inputs.size() != input_nodes.size()) { if (inputs.size() != input_nodes.size()) {


+ 19
- 0
mindspore/ccsrc/backend/session/kernel_graph.cc View File

@@ -1212,6 +1212,25 @@ void KernelGraph::UpdateGraphDynamicAttr() {
is_dynamic_shape_ = false; is_dynamic_shape_ = false;
} }


void KernelGraph::SetInputNodes() {
input_nodes_.clear();
for (const auto &input_node : inputs()) {
auto params = AnfAlgo::GetAllOutput(input_node);
std::copy(params.begin(), params.end(), std::back_inserter(input_nodes_));
}
}

void KernelGraph::SetOptimizerFlag() {
has_optimizer_ = false;
for (const auto &cnode : execution_order_) {
MS_EXCEPTION_IF_NULL(cnode);
if (kOptOperatorSet.find(AnfAlgo::GetCNodeName(cnode)) != kOptOperatorSet.end()) {
has_optimizer_ = true;
return;
}
}
}

std::string KernelGraph::ToString() const { return std::string("kernel_graph_").append(std::to_string(graph_id_)); } std::string KernelGraph::ToString() const { return std::string("kernel_graph_").append(std::to_string(graph_id_)); }


KernelGraph::~KernelGraph() { KernelGraph::~KernelGraph() {


+ 10
- 4
mindspore/ccsrc/backend/session/kernel_graph.h View File

@@ -185,6 +185,10 @@ class KernelGraph : public FuncGraph {


void UpdateGraphDynamicAttr(); void UpdateGraphDynamicAttr();
bool is_dynamic_shape() const { return is_dynamic_shape_; } bool is_dynamic_shape() const { return is_dynamic_shape_; }
void SetOptimizerFlag();
void SetInputNodes();
const std::vector<AnfNodePtr> &input_nodes() const { return input_nodes_; }
bool has_optimizer() const { return has_optimizer_; }


private: private:
// remove value node form graph // remove value node form graph
@@ -234,9 +238,9 @@ class KernelGraph : public FuncGraph {
std::unordered_map<AnfNodePtr, std::vector<std::pair<AnfNodePtr, size_t>>> node_output_edges_; std::unordered_map<AnfNodePtr, std::vector<std::pair<AnfNodePtr, size_t>>> node_output_edges_;
std::map<std::string, std::pair<AnfNodePtr, int>> summary_nodes_; std::map<std::string, std::pair<AnfNodePtr, int>> summary_nodes_;
// graph needn't execute // graph needn't execute
bool executable_;
bool executable_{false};
// exist summary node in graph // exist summary node in graph
bool summary_node_exist_;
bool summary_node_exist_{false};
// valid inputs // valid inputs
std::vector<bool> valid_inputs_; std::vector<bool> valid_inputs_;


@@ -251,7 +255,7 @@ class KernelGraph : public FuncGraph {


CNodePtr start_label_; CNodePtr start_label_;
CNodePtr end_goto_; CNodePtr end_goto_;
bool null_output_;
bool null_output_{false};
std::unordered_map<AnfNodePtr, AnfNodePtr> front_to_internal_outputs_map_; std::unordered_map<AnfNodePtr, AnfNodePtr> front_to_internal_outputs_map_;
std::unordered_map<AnfNodePtr, std::unordered_map<int, std::pair<AnfNodePtr, bool>>> internal_outputs_to_front_map_; std::unordered_map<AnfNodePtr, std::unordered_map<int, std::pair<AnfNodePtr, bool>>> internal_outputs_to_front_map_;
std::unordered_map<AnfNodePtr, std::unordered_map<int, tensor::TensorPtr>> internal_outputs_tensor_map_; std::unordered_map<AnfNodePtr, std::unordered_map<int, tensor::TensorPtr>> internal_outputs_tensor_map_;
@@ -260,7 +264,9 @@ class KernelGraph : public FuncGraph {
std::set<AnfNodePtr> visited_nodes_; std::set<AnfNodePtr> visited_nodes_;
std::map<AnfNodePtr, AnfNodePtr> edge_to_; std::map<AnfNodePtr, AnfNodePtr> edge_to_;
std::stack<AnfNodePtr> loop_nodes_; std::stack<AnfNodePtr> loop_nodes_;
bool is_dynamic_shape_;
std::vector<AnfNodePtr> input_nodes_;
bool has_optimizer_{false};
bool is_dynamic_shape_{false};
}; };
} // namespace session } // namespace session
using KernelGraphPtr = std::shared_ptr<session::KernelGraph>; using KernelGraphPtr = std::shared_ptr<session::KernelGraph>;


+ 7
- 15
mindspore/ccsrc/backend/session/session_basic.cc View File

@@ -852,6 +852,8 @@ KernelGraphPtr SessionBasic::ConstructKernelGraph(const AnfNodePtrList &lst, con
graph->set_summary_node_exist(true); graph->set_summary_node_exist(true);
} }
opt::BackendCommonOptimization(graph); opt::BackendCommonOptimization(graph);
graph->SetInputNodes();
graph->SetOptimizerFlag();
return graph; return graph;
} }


@@ -971,11 +973,8 @@ void SessionBasic::LoadInputData(const std::shared_ptr<KernelGraph> &kernel_grap
if (kernel_graph->input_ctrl_tensors()) { if (kernel_graph->input_ctrl_tensors()) {
input_ctrl_size = LoadCtrlInputTensor(kernel_graph, &inputs); input_ctrl_size = LoadCtrlInputTensor(kernel_graph, &inputs);
} }
std::vector<AnfNodePtr> input_nodes;
for (const auto &input_node : kernel_graph->inputs()) {
auto params = AnfAlgo::GetAllOutput(input_node);
std::copy(params.begin(), params.end(), std::back_inserter(input_nodes));
}
auto &input_nodes = kernel_graph->input_nodes();

if ((inputs.size() + input_ctrl_size) - 3 != input_nodes.size()) { if ((inputs.size() + input_ctrl_size) - 3 != input_nodes.size()) {
MS_LOG(EXCEPTION) << "Tensor input:" << inputs.size() << " is not equal graph inputs:" << input_nodes.size() MS_LOG(EXCEPTION) << "Tensor input:" << inputs.size() << " is not equal graph inputs:" << input_nodes.size()
<< ", input_ctrl_size:" << input_ctrl_size; << ", input_ctrl_size:" << input_ctrl_size;
@@ -1026,19 +1025,11 @@ void SessionBasic::UpdateOutputs(const std::shared_ptr<KernelGraph> &kernel_grap
} }
} }


std::vector<tensor::TensorPtr> SessionBasic::GetNeedLockInputTensors(const GraphId &graph_id,
std::vector<tensor::TensorPtr> SessionBasic::GetInputNeedLockTensors(const GraphId &graph_id,
const std::vector<tensor::TensorPtr> &inputs) { const std::vector<tensor::TensorPtr> &inputs) {
auto graph = GetGraph(graph_id); auto graph = GetGraph(graph_id);
MS_EXCEPTION_IF_NULL(graph); MS_EXCEPTION_IF_NULL(graph);
bool has_optimizer = false;
for (const auto &cnode : graph->execution_order()) {
MS_EXCEPTION_IF_NULL(cnode);
if (kOptOperatorSet.find(AnfAlgo::GetCNodeName(cnode)) != kOptOperatorSet.end()) {
has_optimizer = true;
break;
}
}
if (!has_optimizer) {
if (!graph->has_optimizer()) {
return {}; return {};
} }
std::vector<tensor::TensorPtr> result; std::vector<tensor::TensorPtr> result;
@@ -1339,6 +1330,7 @@ std::shared_ptr<KernelGraph> SessionBasic::ConstructSingleOpGraph(const OpRunInf
graph->set_execution_order(exe_order); graph->set_execution_order(exe_order);
// set output // set output
CreateOutputNode(cnode, graph); CreateOutputNode(cnode, graph);
graph->SetInputNodes();
return graph; return graph;
} }




+ 1
- 1
mindspore/ccsrc/backend/session/session_basic.h View File

@@ -98,7 +98,7 @@ class SessionBasic : public std::enable_shared_from_this<SessionBasic> {
return true; return true;
} }
virtual void GetModelInputsInfo(uint32_t graph_id, std::vector<tensor::TensorPtr> *inputs) const {} virtual void GetModelInputsInfo(uint32_t graph_id, std::vector<tensor::TensorPtr> *inputs) const {}
std::vector<tensor::TensorPtr> GetNeedLockInputTensors(const GraphId &graph_id,
std::vector<tensor::TensorPtr> GetInputNeedLockTensors(const GraphId &graph_id,
const std::vector<tensor::TensorPtr> &inputs); const std::vector<tensor::TensorPtr> &inputs);
#ifdef ENABLE_DEBUGGER #ifdef ENABLE_DEBUGGER
// set debugger // set debugger


Loading…
Cancel
Save