From 664634d0708eac7d677af140dc9d2851e66ba781 Mon Sep 17 00:00:00 2001 From: chuxing Date: Thu, 31 Dec 2020 12:33:06 +0800 Subject: [PATCH 1/8] Handle EndOfSequence --- .../executor/hybrid_execution_context.cc | 27 ++++++++++++++++++ ge/hybrid/executor/hybrid_execution_context.h | 2 ++ .../executor/hybrid_model_async_executor.cc | 4 +-- ge/hybrid/executor/hybrid_model_executor.cc | 1 + ge/hybrid/executor/hybrid_model_executor.h | 1 + ge/hybrid/executor/subgraph_executor.cc | 11 +++++++- .../aicpu/aicpu_node_executor.cc | 15 +++++++++- .../node_executor/aicpu/aicpu_node_executor.h | 1 + .../compiledsubgraph/known_node_executor.cc | 28 +++++++++++++++++-- .../compiledsubgraph/known_node_executor.h | 6 ++-- ge/hybrid/node_executor/task_context.cc | 4 +++ ge/hybrid/node_executor/task_context.h | 2 ++ 12 files changed, 94 insertions(+), 8 deletions(-) diff --git a/ge/hybrid/executor/hybrid_execution_context.cc b/ge/hybrid/executor/hybrid_execution_context.cc index 491220be..77089e15 100644 --- a/ge/hybrid/executor/hybrid_execution_context.cc +++ b/ge/hybrid/executor/hybrid_execution_context.cc @@ -18,6 +18,12 @@ namespace ge { namespace hybrid { +namespace { +const uint32_t kEndOfSequence = 0x0704000a; +const uint32_t kEndOfSequenceNew = 507005; +const int32_t kModelAbortNormal = 0x0704000e; +const int32_t kModelAbortNormalNew = 507024; +} // namespace void GraphExecutionContext::SetErrorCode(Status error_code) { std::lock_guard lk(mu); this->status = error_code; @@ -27,5 +33,26 @@ Status GraphExecutionContext::GetStatus() const { std::lock_guard lk(mu); return this->status; } + +Status GraphExecutionContext::Synchronize(rtStream_t rt_stream) { + auto rt_ret = rtStreamSynchronize(rt_stream); + if (rt_ret == SUCCESS) { + return SUCCESS; + } + + if (rt_ret == kEndOfSequence || rt_ret == kEndOfSequenceNew) { + GELOGI("Got end of sequence"); + is_eos_ = true; + return SUCCESS; + } + + if (rt_ret == kModelAbortNormal || rt_ret == kModelAbortNormalNew) { + GELOGI("The model with multiple datasets aborts normally"); + return SUCCESS; + } + + GELOGE(RT_FAILED, "Failed to invoke rtStreamSynchronize, ret = %d", rt_ret); + return RT_FAILED; +} } // namespace hybrid } // namespace ge \ No newline at end of file diff --git a/ge/hybrid/executor/hybrid_execution_context.h b/ge/hybrid/executor/hybrid_execution_context.h index f1c25290..49c54d2f 100644 --- a/ge/hybrid/executor/hybrid_execution_context.h +++ b/ge/hybrid/executor/hybrid_execution_context.h @@ -36,6 +36,7 @@ namespace hybrid { struct GraphExecutionContext { void SetErrorCode(Status error_code); Status GetStatus() const; + Status Synchronize(rtStream_t rt_stream); uint64_t session_id = 0; const HybridModel *model = nullptr; @@ -49,6 +50,7 @@ struct GraphExecutionContext { DumpProperties dump_properties; bool trace_enabled = false; bool dump_enabled = false; + std::atomic_bool is_eos_; long profiling_level = 0; long iteration = 0; Status status = SUCCESS; diff --git a/ge/hybrid/executor/hybrid_model_async_executor.cc b/ge/hybrid/executor/hybrid_model_async_executor.cc index ba717a2d..c17ff0d9 100644 --- a/ge/hybrid/executor/hybrid_model_async_executor.cc +++ b/ge/hybrid/executor/hybrid_model_async_executor.cc @@ -24,7 +24,7 @@ namespace ge { namespace hybrid { namespace { -int kDataOutputIndex = 0; +const int kDataOutputIndex = 0; } HybridModelAsyncExecutor::HybridModelAsyncExecutor(HybridModel *model) : model_(model), run_flag_(false) { @@ -162,7 +162,7 @@ Status HybridModelAsyncExecutor::HandleResult(Status exec_ret, OutputData *output_data) { GELOGD("Start to handle result. model id = %u, data index = %u, execution ret = %u", model_id_, data_id, exec_ret); std::vector output_tensor_info_list; - if (exec_ret == END_OF_SEQUENCE) { + if (args.is_eos) { GELOGW("End of sequence, model id = %u", model_id_); return OnComputeDone(data_id, END_OF_SEQUENCE, output_tensor_info_list); } diff --git a/ge/hybrid/executor/hybrid_model_executor.cc b/ge/hybrid/executor/hybrid_model_executor.cc index e17998db..ee933090 100755 --- a/ge/hybrid/executor/hybrid_model_executor.cc +++ b/ge/hybrid/executor/hybrid_model_executor.cc @@ -58,6 +58,7 @@ Status HybridModelExecutor::Execute(HybridModelExecutor::ExecuteArgs &args) { context_.profiler->Reset(); } + args.is_eos = context_.is_eos_; context_.iteration += 1; return SUCCESS; } diff --git a/ge/hybrid/executor/hybrid_model_executor.h b/ge/hybrid/executor/hybrid_model_executor.h index 04aef6a5..6299d4ff 100644 --- a/ge/hybrid/executor/hybrid_model_executor.h +++ b/ge/hybrid/executor/hybrid_model_executor.h @@ -31,6 +31,7 @@ class HybridModelExecutor { std::vector input_desc; std::vector outputs; std::vector output_desc; + bool is_eos = false; }; HybridModelExecutor(HybridModel *model, uint32_t device_id, rtStream_t stream); diff --git a/ge/hybrid/executor/subgraph_executor.cc b/ge/hybrid/executor/subgraph_executor.cc index 4b6dddab..1b2024c7 100644 --- a/ge/hybrid/executor/subgraph_executor.cc +++ b/ge/hybrid/executor/subgraph_executor.cc @@ -240,6 +240,10 @@ Status SubgraphExecutor::PrepareNodes() { } if (!ready_queue_.Push(p_node_state)) { + if (context_->is_eos_) { + GELOGD("Got end of sequence"); + return SUCCESS; + } GELOGE(INTERNAL_ERROR, "[%s] Error occurs while launching tasks. quit from preparing nodes.", graph_item_->GetName().c_str()); return INTERNAL_ERROR; @@ -295,6 +299,11 @@ Status SubgraphExecutor::LaunchTasks() { "[%s] Execute node failed.", node_state->GetName().c_str()); + if (context_->is_eos_) { + GELOGD("Got end of sequence"); + ready_queue_.Stop(); + return SUCCESS; + } GELOGD("[%s] Done executing node successfully.", node_state->GetName().c_str()); } } @@ -350,7 +359,7 @@ Status SubgraphExecutor::GetOutputs(vector &outputs, std::vectorGetName().c_str()); - GE_CHK_RT_RET(rtStreamSynchronize(context_->stream)); + GE_CHK_STATUS_RET_NOLOG(context_->Synchronize(context_->stream)); GELOGD("[%s] Done synchronizing successfully.", graph_item_->GetName().c_str()); return SUCCESS; } diff --git a/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc b/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc index 43f4f6d2..0b34ecc3 100755 --- a/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc +++ b/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc @@ -19,6 +19,7 @@ #include "common/formats/formats.h" #include "aicpu/common/aicpu_task_struct.h" #include "graph/load/new_model_manager/model_manager.h" +#include "graph/utils/node_utils.h" #include "hybrid/executor/hybrid_execution_context.h" #include "hybrid/model/hybrid_model.h" #include "opskernel_manager/ops_kernel_builder_manager.h" @@ -188,6 +189,10 @@ Status AicpuNodeTaskBase::ExecuteAsync(TaskContext &context, std::functionis_eos_) { + GELOGD("[%s] Got end of sequence", node_name_.c_str()); + return SUCCESS; + } uint32_t task_id = 0; uint32_t stream_id = 0; @@ -346,7 +351,11 @@ Status AicpuTfNodeTask::Init(const HybridModel &model) { GE_CHK_RT_RET(rtMemcpy(kernel_buf_->GetData(), sizeof(STR_FWK_OP_KERNEL), &fwk_op_kernel, sizeof(STR_FWK_OP_KERNEL), RT_MEMCPY_HOST_TO_DEVICE)); - + auto node_type = NodeUtils::GetNodeType(node_item_->node); + if (node_type.find(GETNEXT) != string::npos) { + GELOGD("[%s] Is GetNext, set need sync to true, node type = %s", node_name_.c_str(), node_type.c_str()); + need_sync_ = true; + } GELOGI("Node[%s] init end.", node_name_.c_str()); return SUCCESS; } @@ -616,6 +625,10 @@ Status AicpuTfNodeTask::LaunchTask(TaskContext &context) { GE_CHK_RT_RET(rtKernelLaunchEx(kernel_buf_->GetData(), kernel_buf_->GetSize(), flag, context.GetStream())); RECORD_EXECUTION_EVENT(context.GetExecutionContext(), node_name_.c_str(), "[AicpuTfNodertKernelLaunchEx] End"); GELOGD("Node[%s] launch end.", node_name_.c_str()); + if (need_sync_) { + GELOGD("[%s] Task needs sync", node_name_.c_str()); + GE_CHK_STATUS_RET_NOLOG(context.Synchronize()); + } return SUCCESS; } diff --git a/ge/hybrid/node_executor/aicpu/aicpu_node_executor.h b/ge/hybrid/node_executor/aicpu/aicpu_node_executor.h index 1205b190..8f0b1d0a 100644 --- a/ge/hybrid/node_executor/aicpu/aicpu_node_executor.h +++ b/ge/hybrid/node_executor/aicpu/aicpu_node_executor.h @@ -144,6 +144,7 @@ class AicpuTfNodeTask : public AicpuNodeTaskBase { std::unique_ptr copy_input_data_size_dev_; std::unique_ptr copy_input_src_dev_; std::unique_ptr copy_input_dst_dev_; + bool need_sync_ = false; }; class AicpuNodeTask : public AicpuNodeTaskBase { diff --git a/ge/hybrid/node_executor/compiledsubgraph/known_node_executor.cc b/ge/hybrid/node_executor/compiledsubgraph/known_node_executor.cc index c914ac1b..3ef0a50f 100755 --- a/ge/hybrid/node_executor/compiledsubgraph/known_node_executor.cc +++ b/ge/hybrid/node_executor/compiledsubgraph/known_node_executor.cc @@ -21,6 +21,8 @@ #include "common/ge/ge_util.h" #include "graph/attr_value.h" #include "graph/debug/ge_attr_define.h" +#include "graph/utils/graph_utils.h" +#include "graph/utils/node_utils.h" #include "graph/load/new_model_manager/model_utils.h" #include "graph/load/new_model_manager/model_manager.h" #include "hybrid/executor/hybrid_execution_context.h" @@ -29,7 +31,7 @@ namespace ge { namespace hybrid { REGISTER_NODE_EXECUTOR_BUILDER(NodeExecutorManager::ExecutorType::COMPILED_SUBGRAPH, KnownNodeExecutor); -Status KnownNodeTask:: ExecuteAsync(TaskContext &context, std::function done_callback) { +Status KnownNodeTask::ExecuteAsync(TaskContext &context, std::function done_callback) { RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[KnownNodeTaskExecuteAsync] Start"); GELOGD("[%s] KnownNodeTask::ExecuteAsync in.", context.GetNodeName()); if (davinci_model_->GetTaskList().empty()) { @@ -58,6 +60,10 @@ Status KnownNodeTask:: ExecuteAsync(TaskContext &context, std::functionAssign(ge_model), "KnownNodeExecutor::LoadTask davincimodel assign failed."); - task = MakeShared(davinci_model); + bool need_sync = false; + GE_CHK_STATUS_RET_NOLOG(NeedSync(*ge_model, need_sync)); + task = MakeShared(davinci_model, need_sync); GE_CHECK_NOTNULL(task); GELOGI("[%s] KnownNodeExecutor::LoadTask success.", node->GetName().c_str()); return SUCCESS; @@ -186,5 +194,21 @@ Status KnownNodeExecutor::ExecuteTask(NodeTask &task, TaskContext &context, RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[KnownNodeExecutorExecuteTask] End"); return SUCCESS; } + +Status KnownNodeExecutor::NeedSync(GeModel &ge_model, bool &need_sync) { + auto compute_graph = GraphUtils::GetComputeGraph(ge_model.GetGraph()); + GE_CHECK_NOTNULL(compute_graph); + for (auto &node : compute_graph->GetAllNodes()) { + auto type = NodeUtils::GetNodeType(node); + if (type == GETNEXT) { + GELOGD("Contains GetNext node: %s", node->GetName().c_str()); + need_sync = true; + return SUCCESS; + } + } + + need_sync = false; + return SUCCESS; +} } // namespace hybrid } // namespace ge diff --git a/ge/hybrid/node_executor/compiledsubgraph/known_node_executor.h b/ge/hybrid/node_executor/compiledsubgraph/known_node_executor.h index 2dde993b..dfd6bbd0 100644 --- a/ge/hybrid/node_executor/compiledsubgraph/known_node_executor.h +++ b/ge/hybrid/node_executor/compiledsubgraph/known_node_executor.h @@ -27,8 +27,8 @@ class HybridModel; class KnownNodeTask : public NodeTask { public: - explicit KnownNodeTask(std::shared_ptr davinci_model) - : davinci_model_(davinci_model) + explicit KnownNodeTask(std::shared_ptr davinci_model, bool need_sync) + : davinci_model_(davinci_model), need_sync_(need_sync) {} ~KnownNodeTask() {} @@ -39,6 +39,7 @@ class KnownNodeTask : public NodeTask { private: std::shared_ptr davinci_model_ = nullptr; bool load_flag_ = false; + bool need_sync_; }; class KnownNodeExecutor : public NodeExecutor { @@ -48,6 +49,7 @@ class KnownNodeExecutor : public NodeExecutor { Status ExecuteTask(NodeTask &task, TaskContext &context, const std::function &callback) const; ~KnownNodeExecutor() {} private: + static Status NeedSync(GeModel &ge_model, bool &need_sync); std::shared_ptr davinci_model_ = nullptr; }; } // namespace hybrid diff --git a/ge/hybrid/node_executor/task_context.cc b/ge/hybrid/node_executor/task_context.cc index d15ea978..6488fbbe 100644 --- a/ge/hybrid/node_executor/task_context.cc +++ b/ge/hybrid/node_executor/task_context.cc @@ -494,5 +494,9 @@ const DumpProperties &TaskContext::GetDumpProperties() const { bool TaskContext::NeedCallback() { return node_item_->has_observer || IsDumpEnabled() || execution_context_->profiling_level > 0; } + +Status TaskContext::Synchronize() { + return execution_context_->Synchronize(GetStream()); +} } // namespace hybrid } // namespace ge diff --git a/ge/hybrid/node_executor/task_context.h b/ge/hybrid/node_executor/task_context.h index 0e85a8e3..9ddde322 100644 --- a/ge/hybrid/node_executor/task_context.h +++ b/ge/hybrid/node_executor/task_context.h @@ -102,6 +102,8 @@ class TaskContext { uint32_t GetStreamId() const; void SetStreamId(uint32_t stream_id); + Status Synchronize(); + bool IsForceInferShape() const; void SetForceInferShape(bool force_infer_shape); void *handle_ = nullptr; From 84cd741be403e9af4174007e65dff4e4cf81e5e0 Mon Sep 17 00:00:00 2001 From: chuxing Date: Mon, 4 Jan 2021 15:06:06 +0800 Subject: [PATCH 2/8] handle eos --- .../executor/hybrid_model_async_executor.cc | 5 +- ge/hybrid/executor/subgraph_executor.cc | 52 ++++++++++--------- 2 files changed, 31 insertions(+), 26 deletions(-) diff --git a/ge/hybrid/executor/hybrid_model_async_executor.cc b/ge/hybrid/executor/hybrid_model_async_executor.cc index c17ff0d9..ccef0669 100644 --- a/ge/hybrid/executor/hybrid_model_async_executor.cc +++ b/ge/hybrid/executor/hybrid_model_async_executor.cc @@ -163,8 +163,9 @@ Status HybridModelAsyncExecutor::HandleResult(Status exec_ret, GELOGD("Start to handle result. model id = %u, data index = %u, execution ret = %u", model_id_, data_id, exec_ret); std::vector output_tensor_info_list; if (args.is_eos) { - GELOGW("End of sequence, model id = %u", model_id_); - return OnComputeDone(data_id, END_OF_SEQUENCE, output_tensor_info_list); + GELOGI("End of sequence, model id = %u", model_id_); + GE_CHK_STATUS_RET_NOLOG(OnComputeDone(data_id, END_OF_SEQUENCE, output_tensor_info_list)); + return SUCCESS; } if (exec_ret != SUCCESS) { diff --git a/ge/hybrid/executor/subgraph_executor.cc b/ge/hybrid/executor/subgraph_executor.cc index 1b2024c7..6286ea8c 100644 --- a/ge/hybrid/executor/subgraph_executor.cc +++ b/ge/hybrid/executor/subgraph_executor.cc @@ -211,31 +211,26 @@ Status SubgraphExecutor::PrepareNodes() { GE_CHECK_NOTNULL(node_state); auto p_node_state = node_state.get(); - if (node_item.node_type == NETOUTPUT) { - // Wait for all inputs become valid - // after PrepareNodes returned. all output tensors and shapes are valid - GE_CHK_STATUS_RET_NOLOG(p_node_state->GetShapeInferenceState().AwaitShapesReady(*context_)); - GE_CHK_STATUS_RET_NOLOG(p_node_state->AwaitInputTensors(*context_)); - continue; - } - - // only do shape inference and compilation for nodes with dynamic shapes. - if (node_item.is_dynamic) { - auto prepare_future = pre_run_pool_.commit([this, p_node_state]() -> Status { - GetContext().SetSessionId(context_->session_id); - GE_CHK_STATUS_RET_NOLOG(InferShape(shape_inference_engine_.get(), *p_node_state)); - return PrepareForExecution(context_, *p_node_state); - }); - - p_node_state->SetPrepareFuture(std::move(prepare_future)); - } else { - GELOGD("[%s] Skipping shape inference and compilation for node with static shape.", node_item.NodeName().c_str()); - if (node_item.kernel_task == nullptr) { - GELOGW("[%s] Node of static shape got no task.", node_item.NodeName().c_str()); - GE_CHK_STATUS_RET(TaskCompileEngine::Compile(*p_node_state, context_), - "[%s] Failed to create task.", p_node_state->GetName().c_str()); + if (node_item.node_type != NETOUTPUT) { + // only do shape inference and compilation for nodes with dynamic shapes. + if (node_item.is_dynamic) { + auto prepare_future = pre_run_pool_.commit([this, p_node_state]() -> Status { + GetContext().SetSessionId(context_->session_id); + GE_CHK_STATUS_RET_NOLOG(InferShape(shape_inference_engine_.get(), *p_node_state)); + return PrepareForExecution(context_, *p_node_state); + }); + + p_node_state->SetPrepareFuture(std::move(prepare_future)); } else { - node_state->SetKernelTask(node_item.kernel_task); + GELOGD("[%s] Skipping shape inference and compilation for node with static shape.", + node_item.NodeName().c_str()); + if (node_item.kernel_task == nullptr) { + GELOGW("[%s] Node of static shape got no task.", node_item.NodeName().c_str()); + GE_CHK_STATUS_RET(TaskCompileEngine::Compile(*p_node_state, context_), + "[%s] Failed to create task.", p_node_state->GetName().c_str()); + } else { + node_state->SetKernelTask(node_item.kernel_task); + } } } @@ -288,6 +283,15 @@ Status SubgraphExecutor::LaunchTasks() { return SUCCESS; } + if (node_state->GetType() == NETOUTPUT) { + // Wait for all inputs become valid + // after PrepareNodes returned. all output tensors and shapes are valid + GE_CHK_STATUS_RET_NOLOG(node_state->GetShapeInferenceState().AwaitShapesReady(*context_)); + GE_CHK_STATUS_RET_NOLOG(node_state->AwaitInputTensors(*context_)); + GELOGD("[%s] Done executing node successfully.", node_state->GetName().c_str()); + continue; + } + GE_CHK_STATUS_RET_NOLOG(node_state->WaitForPrepareDone()); GELOGD("[%s] Start to execute.", node_state->GetName().c_str()); From 68bbf9e41c1898df4c967fe60bb84dd9c2d8fba8 Mon Sep 17 00:00:00 2001 From: chuxing Date: Tue, 5 Jan 2021 18:30:49 +0800 Subject: [PATCH 3/8] update --- ge/hybrid/executor/hybrid_model_executor.cc | 1 + ge/hybrid/model/node_item.h | 1 + .../compiledsubgraph/known_node_executor.cc | 28 ++----------------- .../compiledsubgraph/known_node_executor.h | 6 ++-- 4 files changed, 6 insertions(+), 30 deletions(-) diff --git a/ge/hybrid/executor/hybrid_model_executor.cc b/ge/hybrid/executor/hybrid_model_executor.cc index ee933090..b2ad4eb6 100755 --- a/ge/hybrid/executor/hybrid_model_executor.cc +++ b/ge/hybrid/executor/hybrid_model_executor.cc @@ -95,6 +95,7 @@ Status HybridModelExecutor::InitExecutionContext() { context_.stream = stream_; context_.model = model_; + context_.is_eos_ = false; context_.session_id = ::ge::GetContext().SessionId(); context_.ge_context = &GetThreadLocalContext(); GELOGD("session id from model = %lu, from context = %lu", model_->GetSessionId(), context_.session_id); diff --git a/ge/hybrid/model/node_item.h b/ge/hybrid/model/node_item.h index a34227c5..2290dc09 100644 --- a/ge/hybrid/model/node_item.h +++ b/ge/hybrid/model/node_item.h @@ -82,6 +82,7 @@ struct NodeItem { bool has_observer = false; bool has_optional_inputs = false; bool is_output_shape_static = true; + bool may_trigger_eos_ = false; UnknowShapeOpType shape_inference_type = DEPEND_IN_SHAPE; std::string node_name; std::string node_type; diff --git a/ge/hybrid/node_executor/compiledsubgraph/known_node_executor.cc b/ge/hybrid/node_executor/compiledsubgraph/known_node_executor.cc index 3ef0a50f..c914ac1b 100755 --- a/ge/hybrid/node_executor/compiledsubgraph/known_node_executor.cc +++ b/ge/hybrid/node_executor/compiledsubgraph/known_node_executor.cc @@ -21,8 +21,6 @@ #include "common/ge/ge_util.h" #include "graph/attr_value.h" #include "graph/debug/ge_attr_define.h" -#include "graph/utils/graph_utils.h" -#include "graph/utils/node_utils.h" #include "graph/load/new_model_manager/model_utils.h" #include "graph/load/new_model_manager/model_manager.h" #include "hybrid/executor/hybrid_execution_context.h" @@ -31,7 +29,7 @@ namespace ge { namespace hybrid { REGISTER_NODE_EXECUTOR_BUILDER(NodeExecutorManager::ExecutorType::COMPILED_SUBGRAPH, KnownNodeExecutor); -Status KnownNodeTask::ExecuteAsync(TaskContext &context, std::function done_callback) { +Status KnownNodeTask:: ExecuteAsync(TaskContext &context, std::function done_callback) { RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[KnownNodeTaskExecuteAsync] Start"); GELOGD("[%s] KnownNodeTask::ExecuteAsync in.", context.GetNodeName()); if (davinci_model_->GetTaskList().empty()) { @@ -60,10 +58,6 @@ Status KnownNodeTask::ExecuteAsync(TaskContext &context, std::function d GELOGE(rt_ret, "rtModelExecute error, ret: hybrid_model_executorOx%X", rt_ret); return FAILED;); RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[KnownNodertModelExecute] End"); - if (need_sync_) { - GELOGD("[%s] model need sync", context.GetNodeName()); - GE_CHK_STATUS_RET_NOLOG(context.Synchronize()); - } GE_CHK_STATUS_RET_NOLOG(context.RegisterCallback(done_callback)); GELOGD("[%s] KnownNodeTask::ExecuteAsync success.", context.GetNodeName()); RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[KnownNodeTaskExecuteAsync] End"); @@ -177,9 +171,7 @@ Status KnownNodeExecutor::LoadTask(const HybridModel &model, const NodePtr &node GE_CHK_STATUS_RET(davinci_model->Assign(ge_model), "KnownNodeExecutor::LoadTask davincimodel assign failed."); - bool need_sync = false; - GE_CHK_STATUS_RET_NOLOG(NeedSync(*ge_model, need_sync)); - task = MakeShared(davinci_model, need_sync); + task = MakeShared(davinci_model); GE_CHECK_NOTNULL(task); GELOGI("[%s] KnownNodeExecutor::LoadTask success.", node->GetName().c_str()); return SUCCESS; @@ -194,21 +186,5 @@ Status KnownNodeExecutor::ExecuteTask(NodeTask &task, TaskContext &context, RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[KnownNodeExecutorExecuteTask] End"); return SUCCESS; } - -Status KnownNodeExecutor::NeedSync(GeModel &ge_model, bool &need_sync) { - auto compute_graph = GraphUtils::GetComputeGraph(ge_model.GetGraph()); - GE_CHECK_NOTNULL(compute_graph); - for (auto &node : compute_graph->GetAllNodes()) { - auto type = NodeUtils::GetNodeType(node); - if (type == GETNEXT) { - GELOGD("Contains GetNext node: %s", node->GetName().c_str()); - need_sync = true; - return SUCCESS; - } - } - - need_sync = false; - return SUCCESS; -} } // namespace hybrid } // namespace ge diff --git a/ge/hybrid/node_executor/compiledsubgraph/known_node_executor.h b/ge/hybrid/node_executor/compiledsubgraph/known_node_executor.h index dfd6bbd0..2dde993b 100644 --- a/ge/hybrid/node_executor/compiledsubgraph/known_node_executor.h +++ b/ge/hybrid/node_executor/compiledsubgraph/known_node_executor.h @@ -27,8 +27,8 @@ class HybridModel; class KnownNodeTask : public NodeTask { public: - explicit KnownNodeTask(std::shared_ptr davinci_model, bool need_sync) - : davinci_model_(davinci_model), need_sync_(need_sync) + explicit KnownNodeTask(std::shared_ptr davinci_model) + : davinci_model_(davinci_model) {} ~KnownNodeTask() {} @@ -39,7 +39,6 @@ class KnownNodeTask : public NodeTask { private: std::shared_ptr davinci_model_ = nullptr; bool load_flag_ = false; - bool need_sync_; }; class KnownNodeExecutor : public NodeExecutor { @@ -49,7 +48,6 @@ class KnownNodeExecutor : public NodeExecutor { Status ExecuteTask(NodeTask &task, TaskContext &context, const std::function &callback) const; ~KnownNodeExecutor() {} private: - static Status NeedSync(GeModel &ge_model, bool &need_sync); std::shared_ptr davinci_model_ = nullptr; }; } // namespace hybrid From 396f29cad93c5726993d1430de60c214e13e0f7e Mon Sep 17 00:00:00 2001 From: chuxing Date: Tue, 5 Jan 2021 18:33:44 +0800 Subject: [PATCH 4/8] update --- ge/hybrid/model/node_item.h | 1 - 1 file changed, 1 deletion(-) diff --git a/ge/hybrid/model/node_item.h b/ge/hybrid/model/node_item.h index 2290dc09..a34227c5 100644 --- a/ge/hybrid/model/node_item.h +++ b/ge/hybrid/model/node_item.h @@ -82,7 +82,6 @@ struct NodeItem { bool has_observer = false; bool has_optional_inputs = false; bool is_output_shape_static = true; - bool may_trigger_eos_ = false; UnknowShapeOpType shape_inference_type = DEPEND_IN_SHAPE; std::string node_name; std::string node_type; From 922e42931d98b6569d97845f209d9ff1ba883ffa Mon Sep 17 00:00:00 2001 From: chuxing Date: Tue, 5 Jan 2021 20:14:36 +0800 Subject: [PATCH 5/8] update --- ge/hybrid/executor/hybrid_execution_context.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ge/hybrid/executor/hybrid_execution_context.cc b/ge/hybrid/executor/hybrid_execution_context.cc index 77089e15..13a6c9ec 100644 --- a/ge/hybrid/executor/hybrid_execution_context.cc +++ b/ge/hybrid/executor/hybrid_execution_context.cc @@ -36,7 +36,7 @@ Status GraphExecutionContext::GetStatus() const { Status GraphExecutionContext::Synchronize(rtStream_t rt_stream) { auto rt_ret = rtStreamSynchronize(rt_stream); - if (rt_ret == SUCCESS) { + if (rt_ret == RT_ERROR_NONE) { return SUCCESS; } From 2993411f60c0f93c53f2ec4cce58676dd5626b91 Mon Sep 17 00:00:00 2001 From: chuxing Date: Tue, 5 Jan 2021 22:20:13 +0800 Subject: [PATCH 6/8] update --- .../executor/hybrid_execution_context.cc | 2 +- ge/hybrid/executor/hybrid_execution_context.h | 14 ++++++++ ge/hybrid/executor/node_state.cc | 33 ++++++++++--------- ge/hybrid/executor/node_state.h | 2 +- ge/hybrid/executor/subgraph_context.cc | 20 ++++++++--- ge/hybrid/executor/subgraph_context.h | 6 ++-- ge/hybrid/executor/subgraph_executor.cc | 29 +++++++--------- ge/hybrid/executor/worker/execution_engine.cc | 12 +++---- .../executor/worker/shape_inference_engine.cc | 6 +--- .../aicpu/aicpu_node_executor.cc | 6 +--- ge/hybrid/node_executor/node_executor.cc | 7 ++-- 11 files changed, 75 insertions(+), 62 deletions(-) diff --git a/ge/hybrid/executor/hybrid_execution_context.cc b/ge/hybrid/executor/hybrid_execution_context.cc index 13a6c9ec..87207e94 100644 --- a/ge/hybrid/executor/hybrid_execution_context.cc +++ b/ge/hybrid/executor/hybrid_execution_context.cc @@ -43,7 +43,7 @@ Status GraphExecutionContext::Synchronize(rtStream_t rt_stream) { if (rt_ret == kEndOfSequence || rt_ret == kEndOfSequenceNew) { GELOGI("Got end of sequence"); is_eos_ = true; - return SUCCESS; + return END_OF_SEQUENCE; } if (rt_ret == kModelAbortNormal || rt_ret == kModelAbortNormalNew) { diff --git a/ge/hybrid/executor/hybrid_execution_context.h b/ge/hybrid/executor/hybrid_execution_context.h index 49c54d2f..c398e83d 100644 --- a/ge/hybrid/executor/hybrid_execution_context.h +++ b/ge/hybrid/executor/hybrid_execution_context.h @@ -31,6 +31,20 @@ #include "hybrid/executor/rt_callback_manager.h" #include "hybrid/model/hybrid_model.h" +// If expr is not SUCCESS, print the log and return the same value +#define HYBRID_CHK_STATUS_RET(expr, ...) \ + do { \ + const ge::Status _status = (expr); \ + if (_status != ge::SUCCESS) { \ + if (_status == ge::END_OF_SEQUENCE) { \ + GELOGD("Got end of sequence"); \ + } else { \ + GELOGE(_status, __VA_ARGS__); \ + } \ + return _status; \ + } \ + } while (0) + namespace ge { namespace hybrid { struct GraphExecutionContext { diff --git a/ge/hybrid/executor/node_state.cc b/ge/hybrid/executor/node_state.cc index ceed40b0..93c6c58c 100644 --- a/ge/hybrid/executor/node_state.cc +++ b/ge/hybrid/executor/node_state.cc @@ -98,6 +98,11 @@ Status ShapeInferenceState::AwaitShapesReady(const GraphExecutionContext &contex break; } + if (context.is_eos_) { + GELOGD("[%s] Await pending shape cancelled due to end of sequence", node_item.NodeName().c_str()); + return END_OF_SEQUENCE; + } + if (context.GetStatus() != SUCCESS) { GELOGE(FAILED, "[%s] Await pending shape cancelled", node_item.NodeName().c_str()); break; @@ -114,7 +119,8 @@ Status ShapeInferenceState::AwaitShapesReady(const GraphExecutionContext &contex auto idx = p.first; auto &future = p.second; RECORD_SHAPE_INFERENCE_EVENT(&context, node_item.NodeName().c_str(), "[AwaitShape] [idx = %u] Start", idx); - auto src_tensor_desc = future.GetTensorDesc(); + GeTensorDescPtr src_tensor_desc; + GE_CHK_STATUS_RET_NOLOG(future.GetTensorDesc(src_tensor_desc)); GE_CHECK_NOTNULL(src_tensor_desc); RECORD_SHAPE_INFERENCE_EVENT(&context, node_item.NodeName().c_str(), "[AwaitShape] [idx = %u] End", idx); @@ -156,10 +162,11 @@ Status NodeState::AwaitInputTensors(GraphExecutionContext &context) const { node_item_->NodeName().c_str(), "[AwaitNodeDone] [%s] Start", src_node->GetName().c_str()); - if (!subgraph_context_->Await(src_node)) { - GELOGE(INTERNAL_ERROR, "[%s] Await node [%s] failed.", GetName().c_str(), src_node->GetName().c_str()); - return INTERNAL_ERROR; - } + + HYBRID_CHK_STATUS_RET(subgraph_context_->Await(src_node), + "[%s] Await node [%s] failed.", + GetName().c_str(), + src_node->GetName().c_str()); RECORD_EXECUTION_EVENT(&context, node_item_->NodeName().c_str(), @@ -183,24 +190,18 @@ Status NodeState::WaitForPrepareDone() { Status ShapeFuture::Get(GeShape &ori_shape, GeShape &shape) { GELOGD("Start to wait node: %s for getting shape", src_node_->GetName().c_str()); - if (!subgraph_context_->Await(src_node_)) { - GELOGE(INTERNAL_ERROR, "cancelled"); - return INTERNAL_ERROR; - } - + HYBRID_CHK_STATUS_RET(subgraph_context_->Await(src_node_), "cancelled"); shape = src_node_->GetOpDesc()->MutableOutputDesc(src_index_)->MutableShape(); ori_shape = src_node_->GetOpDesc()->MutableOutputDesc(src_index_)->GetOriginShape(); GELOGD("Get shape from %s:%u. shape = [%s]", src_node_->GetName().c_str(), src_index_, shape.ToString().c_str()); return SUCCESS; } -GeTensorDescPtr ShapeFuture::GetTensorDesc() { +Status ShapeFuture::GetTensorDesc(GeTensorDescPtr &tensor_desc) { GELOGD("Start to wait node: %s for getting shape", src_node_->GetName().c_str()); - if (!subgraph_context_->Await(src_node_)) { - GELOGE(INTERNAL_ERROR, "cancelled"); - return nullptr; - } - return src_node_->GetOpDesc()->MutableOutputDesc(src_index_); + HYBRID_CHK_STATUS_RET(subgraph_context_->Await(src_node_), "cancelled"); + tensor_desc = src_node_->GetOpDesc()->MutableOutputDesc(src_index_); + return SUCCESS; } } // namespace hybrid } // namespace ge diff --git a/ge/hybrid/executor/node_state.h b/ge/hybrid/executor/node_state.h index 312e177f..02a362b4 100644 --- a/ge/hybrid/executor/node_state.h +++ b/ge/hybrid/executor/node_state.h @@ -35,7 +35,7 @@ class ShapeFuture { ShapeFuture(NodePtr src_node, uint32_t src_index, SubgraphContext *subgraph_context); ~ShapeFuture() = default; Status Get(GeShape &ori_shape, GeShape &shape); - GeTensorDescPtr GetTensorDesc(); + Status GetTensorDesc(GeTensorDescPtr &tensor_desc); private: NodePtr src_node_; diff --git a/ge/hybrid/executor/subgraph_context.cc b/ge/hybrid/executor/subgraph_context.cc index 923c2aa3..0889e51e 100644 --- a/ge/hybrid/executor/subgraph_context.cc +++ b/ge/hybrid/executor/subgraph_context.cc @@ -20,8 +20,8 @@ namespace ge { namespace hybrid { -SubgraphContext::SubgraphContext(const GraphItem *graph_item) : graph_item_(graph_item) { - +SubgraphContext::SubgraphContext(const GraphItem *graph_item, const GraphExecutionContext *execution_context) + : graph_item_(graph_item), execution_context_(execution_context) { } Status SubgraphContext::Init() { @@ -111,12 +111,22 @@ Status SubgraphContext::GetOutputs(std::vector &outputs) { return SUCCESS; } -bool SubgraphContext::Await(const NodePtr &node) { - return node_done_manager_.Await(node); +Status SubgraphContext::Await(const NodePtr &node) { + if (node_done_manager_.Await(node)) { + return SUCCESS; + } + + if (execution_context_->is_eos_) { + return END_OF_SEQUENCE; + } + + return FAILED; } void SubgraphContext::OnError(Status error) { - GELOGE(error, "[%s] Error occurred while executing graph.", graph_item_->GetName().c_str()); + if (error != END_OF_SEQUENCE) { + GELOGE(error, "[%s] Error occurred while executing graph.", graph_item_->GetName().c_str()); + } node_done_manager_.Destroy(); } diff --git a/ge/hybrid/executor/subgraph_context.h b/ge/hybrid/executor/subgraph_context.h index b86765f7..3eb66b02 100644 --- a/ge/hybrid/executor/subgraph_context.h +++ b/ge/hybrid/executor/subgraph_context.h @@ -20,6 +20,7 @@ #include #include "hybrid/common/tensor_value.h" +#include "hybrid/executor/hybrid_model_executor.h" #include "hybrid/executor/node_state.h" #include "hybrid/executor/node_done_manager.h" #include "hybrid/model/graph_item.h" @@ -29,7 +30,7 @@ namespace ge { namespace hybrid { class SubgraphContext { public: - explicit SubgraphContext(const GraphItem *graph_item); + explicit SubgraphContext(const GraphItem *graph_item, const GraphExecutionContext *execution_context); ~SubgraphContext() = default; Status Init(); @@ -43,11 +44,12 @@ class SubgraphContext { Status GetInput(int index, TensorValue &tensor); Status GetOutputs(std::vector &outputs); - bool Await(const NodePtr &node); + Status Await(const NodePtr &node); void NodeDone(const NodePtr &node); private: friend class TaskContext; + const GraphExecutionContext *execution_context_; const GraphItem *graph_item_; std::mutex mu_; std::vector all_inputs_; diff --git a/ge/hybrid/executor/subgraph_executor.cc b/ge/hybrid/executor/subgraph_executor.cc index 6286ea8c..b59f1acb 100644 --- a/ge/hybrid/executor/subgraph_executor.cc +++ b/ge/hybrid/executor/subgraph_executor.cc @@ -163,10 +163,10 @@ Status SubgraphExecutor::ExecuteAsyncForKnownShape(const std::vectorGetName().c_str(), - known_shape_task_context_->GetNodeName()); + HYBRID_CHK_STATUS_RET(ExecutionEngine::ExecuteAsync(*node_state, known_shape_task_context_, *context_), + "[%s] Failed to execute node [%s] for known subgraph.", + graph_item_->GetName().c_str(), + known_shape_task_context_->GetNodeName()); GELOGD("[%s] Done execute non-dynamic subgraph successfully.", graph_item_->GetName().c_str()); return SUCCESS; @@ -252,10 +252,10 @@ Status SubgraphExecutor::PrepareNodes() { Status SubgraphExecutor::InferShape(ShapeInferenceEngine *shape_inference_engine, NodeState &node_state) { const auto &node_item = *node_state.GetNodeItem(); - GE_CHK_STATUS_RET(shape_inference_engine->InferShape(node_state), - "[%s] Failed to InferShape.", node_state.GetName().c_str()); - GE_CHK_STATUS_RET(shape_inference_engine->PropagateOutputShapes(node_item), - "[%s] Failed to PropagateOutputShapes.", node_state.GetName().c_str()); + HYBRID_CHK_STATUS_RET(shape_inference_engine->InferShape(node_state), + "[%s] Failed to InferShape.", node_state.GetName().c_str()); + HYBRID_CHK_STATUS_RET(shape_inference_engine->PropagateOutputShapes(node_item), + "[%s] Failed to PropagateOutputShapes.", node_state.GetName().c_str()); return SUCCESS; } @@ -299,15 +299,9 @@ Status SubgraphExecutor::LaunchTasks() { GE_CHECK_NOTNULL(task_context); task_context->SetForceInferShape(force_infer_shape_); auto shared_task_context = std::shared_ptr(task_context.release()); - GE_CHK_STATUS_RET(ExecutionEngine::ExecuteAsync(*node_state, shared_task_context, *context_), - "[%s] Execute node failed.", - node_state->GetName().c_str()); - - if (context_->is_eos_) { - GELOGD("Got end of sequence"); - ready_queue_.Stop(); - return SUCCESS; - } + HYBRID_CHK_STATUS_RET(ExecutionEngine::ExecuteAsync(*node_state, shared_task_context, *context_), + "[%s] Execute node failed.", + node_state->GetName().c_str()); GELOGD("[%s] Done executing node successfully.", node_state->GetName().c_str()); } } @@ -324,7 +318,6 @@ Status SubgraphExecutor::ScheduleTasks() { GELOGD("[%s] Start to execute subgraph.", graph_item_->GetName().c_str()); auto ret = LaunchTasks(); if (ret != SUCCESS) { - GELOGE(ret, "[%s] Failed to execute subgraph.", graph_item_->GetName().c_str()); subgraph_context_->OnError(ret); context_->SetErrorCode(ret); ready_queue_.Stop(); diff --git a/ge/hybrid/executor/worker/execution_engine.cc b/ge/hybrid/executor/worker/execution_engine.cc index 21dd8e4b..ea70ad69 100755 --- a/ge/hybrid/executor/worker/execution_engine.cc +++ b/ge/hybrid/executor/worker/execution_engine.cc @@ -406,9 +406,9 @@ Status ExecutionEngine::DoExecuteAsync(NodeState &node_state, // Wait for dependent nodes(DEPEND_COMPUTE), so that the input tensors are valid. RECORD_EXECUTION_EVENT(&context, task_context.GetNodeName(), "[AwaitDependents] Start"); - GE_CHK_STATUS_RET(node_state.AwaitInputTensors(context), - "[%s] Failed to wait for dependent nodes.", - node_state.GetName().c_str()); + HYBRID_CHK_STATUS_RET(node_state.AwaitInputTensors(context), + "[%s] Failed to wait for dependent nodes.", + node_state.GetName().c_str()); const auto &node_item = *node_state.GetNodeItem(); auto executor = node_item.node_executor; @@ -438,9 +438,9 @@ Status ExecutionEngine::DoExecuteAsync(NodeState &node_state, }); } RECORD_EXECUTION_EVENT(&context, task_context.GetNodeName(), "[ExecuteTask] Start"); - GE_CHK_STATUS_RET(node_item.node_executor->ExecuteTask(*task, task_context, callback), - "[%s] Failed to execute task", - node_state.GetName().c_str()); + HYBRID_CHK_STATUS_RET(node_item.node_executor->ExecuteTask(*task, task_context, callback), + "[%s] Failed to execute task", + node_state.GetName().c_str()); RECORD_EXECUTION_EVENT(&context, task_context.GetNodeName(), "[ExecuteTask] End"); GELOGD("[%s] Done task launch successfully.", node_state.GetName().c_str()); diff --git a/ge/hybrid/executor/worker/shape_inference_engine.cc b/ge/hybrid/executor/worker/shape_inference_engine.cc index 66d0ede2..56ae3ea3 100755 --- a/ge/hybrid/executor/worker/shape_inference_engine.cc +++ b/ge/hybrid/executor/worker/shape_inference_engine.cc @@ -99,11 +99,7 @@ Status ShapeInferenceEngine::AwaitDependentNodes(NodeState &node_state) { node_item.NodeName().c_str(), "[AwaitNodeDone] [%s] Start", src_node->GetName().c_str()); - if (!subgraph_context_->Await(src_node)) { - GELOGE(INTERNAL_ERROR, "[%s] Await node failed.", src_node->GetName().c_str()); - return INTERNAL_ERROR; - } - + HYBRID_CHK_STATUS_RET(subgraph_context_->Await(src_node), "[%s] Await node failed.", src_node->GetName().c_str()); RECORD_SHAPE_INFERENCE_EVENT(execution_context_, node_item.NodeName().c_str(), "[AwaitNodeDone] [%s] End", diff --git a/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc b/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc index 0b34ecc3..63ce65e9 100755 --- a/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc +++ b/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc @@ -188,11 +188,7 @@ Status AicpuNodeTaskBase::ExecuteAsync(TaskContext &context, std::functionis_eos_) { - GELOGD("[%s] Got end of sequence", node_name_.c_str()); - return SUCCESS; - } + HYBRID_CHK_STATUS_RET(LaunchTask(context), "[%s] Failed to launch task", node_name_.c_str()); uint32_t task_id = 0; uint32_t stream_id = 0; diff --git a/ge/hybrid/node_executor/node_executor.cc b/ge/hybrid/node_executor/node_executor.cc index fe89464b..02427b91 100755 --- a/ge/hybrid/node_executor/node_executor.cc +++ b/ge/hybrid/node_executor/node_executor.cc @@ -20,6 +20,7 @@ #include "graph/utils/node_utils.h" #include "init/gelib.h" #include "graph/utils/tensor_utils.h" +#include "hybrid/executor/hybrid_execution_context.h" #include "hybrid/model/hybrid_model.h" #include "graph/debug/ge_attr_define.h" #include "opskernel_manager/ops_kernel_builder_manager.h" @@ -44,9 +45,9 @@ Status NodeExecutor::PrepareTask(NodeTask &task, TaskContext &context) const { } Status NodeExecutor::ExecuteTask(NodeTask &task, TaskContext &context, const std::function &callback) const { - GE_CHK_STATUS_RET(task.ExecuteAsync(context, callback), - "Failed to execute task. node = %s", - context.GetNodeItem().NodeName().c_str()); + HYBRID_CHK_STATUS_RET(task.ExecuteAsync(context, callback), + "Failed to execute task. node = %s", + context.GetNodeItem().NodeName().c_str()); return SUCCESS; } From 37f2b85c774c237854db70cbbd923ab20e196983 Mon Sep 17 00:00:00 2001 From: chuxing Date: Wed, 6 Jan 2021 11:22:35 +0800 Subject: [PATCH 7/8] update --- ge/hybrid/executor/subgraph_context.cc | 1 + ge/hybrid/executor/subgraph_context.h | 4 ++-- ge/hybrid/executor/subgraph_executor.cc | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/ge/hybrid/executor/subgraph_context.cc b/ge/hybrid/executor/subgraph_context.cc index 0889e51e..0fa112a4 100644 --- a/ge/hybrid/executor/subgraph_context.cc +++ b/ge/hybrid/executor/subgraph_context.cc @@ -17,6 +17,7 @@ #include "subgraph_context.h" #include "common/debug/log.h" +#include "hybrid/executor/hybrid_model_executor.h" namespace ge { namespace hybrid { diff --git a/ge/hybrid/executor/subgraph_context.h b/ge/hybrid/executor/subgraph_context.h index 3eb66b02..8ce33f23 100644 --- a/ge/hybrid/executor/subgraph_context.h +++ b/ge/hybrid/executor/subgraph_context.h @@ -20,7 +20,7 @@ #include #include "hybrid/common/tensor_value.h" -#include "hybrid/executor/hybrid_model_executor.h" +#include "hybrid/executor/hybrid_execution_context.h" #include "hybrid/executor/node_state.h" #include "hybrid/executor/node_done_manager.h" #include "hybrid/model/graph_item.h" @@ -49,8 +49,8 @@ class SubgraphContext { private: friend class TaskContext; - const GraphExecutionContext *execution_context_; const GraphItem *graph_item_; + const GraphExecutionContext *execution_context_; std::mutex mu_; std::vector all_inputs_; std::vector all_outputs_; diff --git a/ge/hybrid/executor/subgraph_executor.cc b/ge/hybrid/executor/subgraph_executor.cc index b59f1acb..30a99ac4 100644 --- a/ge/hybrid/executor/subgraph_executor.cc +++ b/ge/hybrid/executor/subgraph_executor.cc @@ -40,7 +40,7 @@ SubgraphExecutor::~SubgraphExecutor() { Status SubgraphExecutor::Init(const std::vector &inputs, const std::vector &input_desc) { - subgraph_context_.reset(new(std::nothrow)SubgraphContext(graph_item_)); + subgraph_context_.reset(new(std::nothrow)SubgraphContext(graph_item_, context_)); GE_CHECK_NOTNULL(subgraph_context_); GE_CHK_STATUS_RET(subgraph_context_->Init(), "[%s] Failed to init subgraph context.", graph_item_->GetName().c_str()); From c225cbe16d356d6dcc958dcdccb2f4e985090829 Mon Sep 17 00:00:00 2001 From: chuxing Date: Wed, 6 Jan 2021 17:27:55 +0800 Subject: [PATCH 8/8] update --- ge/hybrid/executor/hybrid_model_executor.cc | 14 ++++++++------ ge/hybrid/executor/subgraph_executor.cc | 2 +- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/ge/hybrid/executor/hybrid_model_executor.cc b/ge/hybrid/executor/hybrid_model_executor.cc index b2ad4eb6..c47dafc1 100755 --- a/ge/hybrid/executor/hybrid_model_executor.cc +++ b/ge/hybrid/executor/hybrid_model_executor.cc @@ -50,16 +50,18 @@ Status HybridModelExecutor::Execute(HybridModelExecutor::ExecuteArgs &args) { auto ret = ExecuteGraphInternal(executor, args); Cleanup(); RECORD_MODEL_EXECUTION_EVENT(&context_, "[Cleanup] End"); - GE_CHK_STATUS_RET(ret, "Failed to execute model"); GELOGD("Model executed successfully."); - if (context_.profiler != nullptr) { context_.profiler->Dump(std::cout); context_.profiler->Reset(); } - args.is_eos = context_.is_eos_; context_.iteration += 1; + if (ret == END_OF_SEQUENCE) { + args.is_eos = true; + } else { + GE_CHK_STATUS_RET(ret, "Failed to execute model"); + } return SUCCESS; } @@ -69,13 +71,13 @@ Status HybridModelExecutor::ExecuteGraphInternal(SubgraphExecutor &executor, GE_CHK_STATUS_RET_NOLOG(ResetExecutionContext(context_)); RECORD_MODEL_EXECUTION_EVENT(&context_, "[InitContext] End"); - GE_CHK_STATUS_RET(executor.ExecuteAsync(args.inputs, args.input_desc), "Failed to execute partitioned call."); + HYBRID_CHK_STATUS_RET(executor.ExecuteAsync(args.inputs, args.input_desc), "Failed to execute partitioned call."); RECORD_MODEL_EXECUTION_EVENT(&context_, "[ExecuteAsync] End"); - GE_CHK_STATUS_RET(executor.Synchronize(), "Failed to sync root graph."); + HYBRID_CHK_STATUS_RET(executor.Synchronize(), "Failed to sync root graph."); RECORD_MODEL_EXECUTION_EVENT(&context_, "[Synchronize] End"); - GE_CHK_STATUS_RET(executor.GetOutputs(args.outputs, args.output_desc), "Failed to get outputs"); + HYBRID_CHK_STATUS_RET(executor.GetOutputs(args.outputs, args.output_desc), "Failed to get outputs"); RECORD_MODEL_EXECUTION_EVENT(&context_, "[GetOutput] End"); return SUCCESS; } diff --git a/ge/hybrid/executor/subgraph_executor.cc b/ge/hybrid/executor/subgraph_executor.cc index 30a99ac4..f7b063c7 100644 --- a/ge/hybrid/executor/subgraph_executor.cc +++ b/ge/hybrid/executor/subgraph_executor.cc @@ -139,7 +139,7 @@ Status SubgraphExecutor::ExecuteAsync(const std::vector &inputs, return ExecuteAsyncForKnownShape(inputs); } - GE_CHK_STATUS_RET(ScheduleTasks(), "[%s] Failed to execute tasks.", graph_item_->GetName().c_str()); + HYBRID_CHK_STATUS_RET(ScheduleTasks(), "[%s] Failed to execute tasks.", graph_item_->GetName().c_str()); GELOGD("[%s] Done executing subgraph successfully.", graph_item_->GetName().c_str()); return SUCCESS; }