Browse Source

!9284 Move BuildOp into RunOp

From: @jojobugfree
Reviewed-by: 
Signed-off-by:
tags/v1.1.0
mindspore-ci-bot Gitee 5 years ago
parent
commit
e706cb2512
9 changed files with 58 additions and 77 deletions
  1. +11
    -5
      mindspore/ccsrc/backend/session/ascend_session.cc
  2. +2
    -1
      mindspore/ccsrc/backend/session/ascend_session.h
  3. +5
    -19
      mindspore/ccsrc/backend/session/executor.cc
  4. +4
    -15
      mindspore/ccsrc/backend/session/executor.h
  5. +9
    -4
      mindspore/ccsrc/backend/session/gpu_session.cc
  6. +2
    -1
      mindspore/ccsrc/backend/session/gpu_session.h
  7. +19
    -9
      mindspore/ccsrc/backend/session/session_basic.cc
  8. +5
    -5
      mindspore/ccsrc/backend/session/session_basic.h
  9. +1
    -18
      mindspore/ccsrc/pipeline/pynative/pynative_execute.cc

+ 11
- 5
mindspore/ccsrc/backend/session/ascend_session.cc View File

@@ -691,22 +691,27 @@ void AscendSession::BuildOpImpl(const OpRunInfo &op_run_info, const GraphInfo &g
}

void AscendSession::RunOpImpl(const OpRunInfo &op_run_info, const GraphInfo &graph_info,
const std::vector<tensor::TensorPtr> &input_tensors, VectorRef *outputs) {
std::vector<tensor::TensorPtr> *input_tensors, VectorRef *outputs,
const std::vector<int64_t> &tensors_mask) {
MS_EXCEPTION_IF_NULL(input_tensors);
BuildOpImpl(op_run_info, graph_info, *input_tensors, tensors_mask);
EraseValueNodeTensor(tensors_mask, input_tensors);

auto graph = run_op_graphs_[graph_info];
MS_EXCEPTION_IF_NULL(graph);
MS_LOG(INFO) << "Run op " << op_run_info.op_name << " start!";
// malloc mem
RunOpMemoryAlloc(input_tensors, graph.get());
RunOpMemoryAlloc(*input_tensors, graph.get());
// Build dynamic kernel
if (op_run_info.is_dynamic_shape) {
BuildDynamicKernel(graph);
}
// load input data to device
LoadInputData(graph, input_tensors);
LoadInputData(graph, *input_tensors);
// run op
Execute(graph, false);
// get output
UpdateOutputs(graph, outputs, input_tensors);
UpdateOutputs(graph, outputs, *input_tensors);
RunOpMemoryClear(graph.get());
MS_LOG(INFO) << "Run op " << op_run_info.op_name << " finish!";
}
@@ -736,7 +741,8 @@ void AscendSession::RunOpsInGraphImpl(const GraphId &graph_id, const std::vector
// Build and run current single op
BuildOpImpl(run_info, graph_info, input_tensor_info.input_tensors, input_tensor_info.input_tensors_mask);
VectorRef op_outputs;
RunOpImpl(run_info, graph_info, input_tensor_info.input_tensors, &op_outputs);
RunOpImpl(run_info, graph_info, &input_tensor_info.input_tensors, &op_outputs,
input_tensor_info.input_tensors_mask);

// Handle inputs and outputs of current op
HandleOpInputs(input_tensor_info.input_kernel, &cnode_ref, &op_output_map);


+ 2
- 1
mindspore/ccsrc/backend/session/ascend_session.h View File

@@ -60,7 +60,8 @@ class AscendSession : public SessionBasic {
const std::vector<tensor::TensorPtr> &input_tensors,
const std::vector<int64_t> &tensors_mask) override;
void RunOpImpl(const OpRunInfo &op_run_info, const GraphInfo &graph_info,
const std::vector<tensor::TensorPtr> &input_tensors, VectorRef *outputs) override;
std::vector<tensor::TensorPtr> *input_tensors, VectorRef *outputs,
const std::vector<int64_t> &tensors_mask) override;
void RunOpsInGraphImpl(const GraphId &graph_id, const std::vector<tensor::TensorPtr> &inputs,
VectorRef *outputs) override;



+ 5
- 19
mindspore/ccsrc/backend/session/executor.cc View File

@@ -125,14 +125,9 @@ void RunGraphTask::Run() {
ExecutorManager::Instance().OnRunGraphFinished();
}

void BuildOpTask::Run() {
MS_EXCEPTION_IF_NULL(session_);
session_->BuildOpImpl(*op_run_info_, graph_info_, input_tensors_, tensors_mask_);
}

void RunOpTask::Run() {
MS_EXCEPTION_IF_NULL(session_);
session_->RunOpImpl(*op_run_info_, graph_info_, input_tensors_, &outputs_);
session_->RunOpImpl(*op_run_info_, graph_info_, input_tensors_, &outputs_, tensors_mask_);
}

void RunOpsInGraphTask::Run() {
@@ -340,25 +335,16 @@ void Executor::RunGraphAsync(const SessionPtr &session, const GraphId &graph_id,
task_cond_var_.notify_all();
}

void Executor::BuildOp(const SessionPtr &session, OpRunInfo *op_run_info, const GraphInfo &graph_info,
const std::vector<tensor::TensorPtr> &input_tensors, const std::vector<int64_t> &tensors_mask) {
auto task = std::make_shared<BuildOpTask>();
task->session_ = session;
task->op_run_info_ = op_run_info;
task->graph_info_ = graph_info;
task->input_tensors_ = input_tensors;
task->tensors_mask_ = tensors_mask;
SyncRunTask(task);
}

void Executor::RunOp(const SessionPtr &session, OpRunInfo *op_run_info, const GraphInfo &graph_info,
const std::vector<tensor::TensorPtr> &input_tensors, VectorRef *outputs) {
std::vector<tensor::TensorPtr> *input_tensors, VectorRef *outputs,
const std::vector<int64_t> &tensors_mask) {
auto task = std::make_shared<RunOpTask>();
task->session_ = session;
task->op_run_info_ = op_run_info;
task->graph_info_ = graph_info;
task->input_tensors_ = input_tensors;
for (auto &tensor : input_tensors) {
task->tensors_mask_ = tensors_mask;
for (auto &tensor : *input_tensors) {
if (tensor->NeedWait()) {
tensor->Wait();
}


+ 4
- 15
mindspore/ccsrc/backend/session/executor.h View File

@@ -110,17 +110,6 @@ class RunOpsInGraphTask : public Task {
GraphId graph_id_{0};
};

class BuildOpTask : public Task {
public:
BuildOpTask() { type_ = kBuildOp; }
~BuildOpTask() override = default;
void Run() override;
OpRunInfo *op_run_info_{nullptr};
GraphInfo graph_info_;
std::vector<tensor::TensorPtr> input_tensors_;
std::vector<int64_t> tensors_mask_;
};

class RunOpTask : public Task {
public:
RunOpTask() { type_ = kRunOp; }
@@ -128,8 +117,9 @@ class RunOpTask : public Task {
void Run() override;
OpRunInfo *op_run_info_{nullptr};
GraphInfo graph_info_;
std::vector<tensor::TensorPtr> input_tensors_;
std::vector<tensor::TensorPtr> *input_tensors_;
VectorRef outputs_;
std::vector<int64_t> tensors_mask_;
};

class CreateCommGroupTask : public Task {
@@ -170,10 +160,9 @@ class Executor {
VectorRef *outputs);
void RunGraphAsync(const SessionPtr &session, const GraphId &graph_id, const std::vector<tensor::TensorPtr> &inputs,
VectorRef *outputs);
void BuildOp(const SessionPtr &session, OpRunInfo *op_run_info, const GraphInfo &graph_info,
const std::vector<tensor::TensorPtr> &input_tensors, const std::vector<int64_t> &tensors_mask);
void RunOp(const SessionPtr &session, OpRunInfo *op_run_info, const GraphInfo &graph_info,
const std::vector<tensor::TensorPtr> &input_tensors, VectorRef *outputs);
std::vector<tensor::TensorPtr> *input_tensors, VectorRef *outputs,
const std::vector<int64_t> &tensors_mask);
void RunOpsInGraph(const SessionPtr &session, const GraphId &graph_id, const std::vector<tensor::TensorPtr> &inputs,
VectorRef *outputs);
void OnRunGraphFinished();


+ 9
- 4
mindspore/ccsrc/backend/session/gpu_session.cc View File

@@ -398,17 +398,22 @@ void GPUSession::BuildOpImpl(const OpRunInfo &op_run_info, const GraphInfo &grap
}

void GPUSession::RunOpImpl(const OpRunInfo &op_run_info, const GraphInfo &graph_info,
const std::vector<tensor::TensorPtr> &input_tensors, VectorRef *outputs) {
std::vector<tensor::TensorPtr> *input_tensors, VectorRef *outputs,
const std::vector<int64_t> &tensors_mask) {
MS_EXCEPTION_IF_NULL(input_tensors);
BuildOpImpl(op_run_info, graph_info, *input_tensors, tensors_mask);
EraseValueNodeTensor(tensors_mask, input_tensors);

auto kernel_graph = run_op_graphs_[graph_info];
MS_EXCEPTION_IF_NULL(kernel_graph);
// Remove NopOp from execution graph
opt::RemoveNopNode(kernel_graph.get());
RunOpAllocateMemory(input_tensors, kernel_graph.get());
RunOpAllocateMemory(*input_tensors, kernel_graph.get());
// Execute the computation
LoadInputData(kernel_graph, input_tensors);
LoadInputData(kernel_graph, *input_tensors);
Execute(kernel_graph);
// Fetch outputs
UpdateOutputs(kernel_graph, outputs, input_tensors);
UpdateOutputs(kernel_graph, outputs, *input_tensors);
RunOpClearMemory(kernel_graph.get());
}



+ 2
- 1
mindspore/ccsrc/backend/session/gpu_session.h View File

@@ -40,7 +40,8 @@ class GPUSession : public SessionBasic {
const std::vector<tensor::TensorPtr> &input_tensors,
const std::vector<int64_t> &tensors_mask) override;
void RunOpImpl(const OpRunInfo &op_run_info, const GraphInfo &graph_info,
const std::vector<tensor::TensorPtr> &input_tensors, VectorRef *outputs) override;
std::vector<tensor::TensorPtr> *input_tensors, VectorRef *outputs,
const std::vector<int64_t> &tensors_mask) override;

private:
void SelectKernel(const std::shared_ptr<KernelGraph> &kernel_graph) const;


+ 19
- 9
mindspore/ccsrc/backend/session/session_basic.cc View File

@@ -1593,17 +1593,11 @@ void SessionBasic::BuildGraph(GraphId graph_id) {
executor_->BuildGraph(shared_from_this(), graph_id);
}

void SessionBasic::BuildOp(OpRunInfo *op_run_info, const GraphInfo &graph_info,
const std::vector<tensor::TensorPtr> &input_tensors,
const std::vector<int64_t> &tensors_mask) {
MS_EXCEPTION_IF_NULL(executor_);
executor_->BuildOp(shared_from_this(), op_run_info, graph_info, input_tensors, tensors_mask);
}

void SessionBasic::RunOp(OpRunInfo *op_run_info, const GraphInfo &graph_info,
const std::vector<tensor::TensorPtr> &input_tensors, VectorRef *outputs) {
std::vector<tensor::TensorPtr> *input_tensors, VectorRef *outputs,
const std::vector<int64_t> &tensors_mask) {
MS_EXCEPTION_IF_NULL(executor_);
executor_->RunOp(shared_from_this(), op_run_info, graph_info, input_tensors, outputs);
executor_->RunOp(shared_from_this(), op_run_info, graph_info, input_tensors, outputs, tensors_mask);
}

void SessionBasic::RunOpsInGraph(const GraphId &graph_id, const std::vector<tensor::TensorPtr> &inputs,
@@ -1623,6 +1617,22 @@ void SessionBasic::RunGraphAsync(const GraphId &graph_id, const std::vector<tens
executor_->RunGraphAsync(shared_from_this(), graph_id, inputs, outputs);
}

void SessionBasic::EraseValueNodeTensor(const std::vector<int64_t> &tensors_mask,
std::vector<tensor::TensorPtr> *input_tensors) {
MS_EXCEPTION_IF_NULL(input_tensors);
if (input_tensors->size() != tensors_mask.size()) {
MS_LOG(EXCEPTION) << "Input tensors size " << input_tensors->size() << " should be equal to tensors mask size "
<< tensors_mask.size();
}
std::vector<tensor::TensorPtr> new_input_tensors;
for (size_t index = 0; index < tensors_mask.size(); ++index) {
if (tensors_mask[index] != kValueNodeTensorMask) {
new_input_tensors.emplace_back(input_tensors->at(index));
}
}
*input_tensors = new_input_tensors;
}

void SessionBasic::UpdateAllGraphDynamicShapeAttr(const std::vector<KernelGraphPtr> &all_graphs) {
bool is_dynamic = false;
for (const auto &graph : all_graphs) {


+ 5
- 5
mindspore/ccsrc/backend/session/session_basic.h View File

@@ -76,9 +76,8 @@ class SessionBasic : public std::enable_shared_from_this<SessionBasic> {
void BuildGraph(GraphId graphId);
void RunGraph(const GraphId &graph_id, const std::vector<tensor::TensorPtr> &inputs, VectorRef *outputs);
void RunGraphAsync(const GraphId &graph_id, const std::vector<tensor::TensorPtr> &inputs, VectorRef *outputs);
void BuildOp(OpRunInfo *, const GraphInfo &, const std::vector<tensor::TensorPtr> &input_tensors,
const std::vector<int64_t> &tensors_mask);
void RunOp(OpRunInfo *, const GraphInfo &, const std::vector<tensor::TensorPtr> &input_tensors, VectorRef *outputs);
void RunOp(OpRunInfo *, const GraphInfo &, std::vector<tensor::TensorPtr> *input_tensors, VectorRef *outputs,
const std::vector<int64_t> &tensors_mask);
void RunOpsInGraph(const GraphId &graph_id, const std::vector<tensor::TensorPtr> &inputs, VectorRef *outputs);

virtual void RegisterSummaryCallBackFunc(const CallBackFunc &callback);
@@ -137,7 +136,6 @@ class SessionBasic : public std::enable_shared_from_this<SessionBasic> {
friend class CompileGraphTask;
friend class BuildGraphTask;
friend class RunGraphTask;
friend class BuildOpTask;
friend class RunOpTask;
friend class RunOpsInGraphTask;
virtual bool IsSupportSummary() { return true; }
@@ -156,7 +154,8 @@ class SessionBasic : public std::enable_shared_from_this<SessionBasic> {
const std::vector<tensor::TensorPtr> &input_tensors,
const std::vector<int64_t> &tensors_mask) {}
virtual void RunOpImpl(const OpRunInfo &op_run_info, const GraphInfo &graph_info,
const std::vector<tensor::TensorPtr> &input_tensors, VectorRef *outputs) {}
std::vector<tensor::TensorPtr> *input_tensors, VectorRef *outputs,
const std::vector<int64_t> &tensors_mask) {}
virtual void RunOpsInGraphImpl(const GraphId &graph_id, const std::vector<tensor::TensorPtr> &inputs,
VectorRef *outputs) {}
void RunInfer(NotNull<FuncGraphPtr> func_graph, const std::vector<tensor::TensorPtr> &inputs);
@@ -165,6 +164,7 @@ class SessionBasic : public std::enable_shared_from_this<SessionBasic> {

virtual void LoadInputData(const std::shared_ptr<KernelGraph> &kernel_graph,
const std::vector<tensor::TensorPtr> &inputs_const) const;
void EraseValueNodeTensor(const std::vector<int64_t> &tensors_mask, std::vector<tensor::TensorPtr> *input_tensors);
void UpdateOutputs(const std::shared_ptr<KernelGraph> &kernel_graph, VectorRef *const outputs,
const std::vector<tensor::TensorPtr> &input_tensors) const;
void Reorder(std::vector<CNodePtr> *node_list);


+ 1
- 18
mindspore/ccsrc/pipeline/pynative/pynative_execute.cc View File

@@ -471,21 +471,6 @@ void ConstructInputTensor(const OpExecInfoPtr &op_run_info, std::vector<int64_t>
op_prim->EndRecordAddAttr();
}

void EraseValueNodeTensor(const std::vector<int64_t> &tensors_mask, std::vector<tensor::TensorPtr> *input_tensors) {
MS_EXCEPTION_IF_NULL(input_tensors);
if (input_tensors->size() != tensors_mask.size()) {
MS_LOG(EXCEPTION) << "Input tensors size " << input_tensors->size() << " should be equal to tensors mask size "
<< tensors_mask.size();
}
std::vector<tensor::TensorPtr> new_input_tensors;
for (size_t index = 0; index < tensors_mask.size(); ++index) {
if (tensors_mask[index] != kValueNodeTensorMask) {
new_input_tensors.emplace_back(input_tensors->at(index));
}
}
*input_tensors = new_input_tensors;
}

BaseRef TransformBaseRefListToTuple(const BaseRef &base_ref) {
if (utils::isa<VectorRef>(base_ref)) {
auto ref_list = utils::cast<VectorRef>(base_ref);
@@ -1302,10 +1287,8 @@ py::object PynativeExecutor::RunOpInMs(const OpExecInfoPtr &op_exec_info, Pynati
op_exec_info->is_mixed_precision_cast,
op_exec_info->next_op_name,
op_exec_info->next_input_index};
session->BuildOp(&op_run_info, graph_info, input_tensors, tensors_mask);
EraseValueNodeTensor(tensors_mask, &input_tensors);
VectorRef outputs;
session->RunOp(&op_run_info, graph_info, input_tensors, &outputs);
session->RunOp(&op_run_info, graph_info, &input_tensors, &outputs, tensors_mask);
auto result = BaseRefToPyData(outputs);
ms_context->set_param<bool>(MS_CTX_ENABLE_PYNATIVE_INFER, false);
*status = PYNATIVE_SUCCESS;


Loading…
Cancel
Save