From 92bfbe47aad88f160f2c9914c74c72b5274db9ce Mon Sep 17 00:00:00 2001 From: unknown Date: Sat, 9 Jan 2021 11:32:08 +0800 Subject: [PATCH] Remove gentask in DEPEND_COMPUTE task executor. --- .../aicpu/aicpu_node_executor.cc | 106 +++++++++--------- .../node_executor/aicpu/aicpu_node_executor.h | 11 +- 2 files changed, 61 insertions(+), 56 deletions(-) diff --git a/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc b/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc index 63ce65e9..e4cefe65 100755 --- a/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc +++ b/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc @@ -356,6 +356,38 @@ Status AicpuTfNodeTask::Init(const HybridModel &model) { return SUCCESS; } +Status AicpuTfNodeTask::SetMemCopyTask(const domi::TaskDef &task_def) { + const domi::KernelExDef &kernel_def = task_def.kernel_ex(); + if (kernel_def.args_size() > sizeof(STR_FWK_OP_KERNEL)) { + GELOGE(PARAM_INVALID, "sizeof STR_FWK_OP_KERNEL is: %lu, but args_size is: %d", + sizeof(STR_FWK_OP_KERNEL), kernel_def.args_size()); + return PARAM_INVALID; + } + GE_CHK_STATUS_RET(AllocTensorBuffer(kernel_def.task_info_size(), copy_workspace_buf_), + "Node[%s] alloc copy task workspace buf failed, size=%zu.", + node_name_.c_str(), kernel_def.task_info_size()); + + GE_CHK_RT_RET(rtMemcpy(copy_workspace_buf_->GetData(), kernel_def.task_info_size(), + kernel_def.task_info().data(), kernel_def.task_info_size(), RT_MEMCPY_HOST_TO_DEVICE)); + + STR_FWK_OP_KERNEL aicpu_task = {0}; + auto sec_ret = memcpy_s(&aicpu_task, sizeof(STR_FWK_OP_KERNEL), + kernel_def.args().data(), kernel_def.args().size()); + if (sec_ret != EOK) { + GELOGE(FAILED, "memcpy failed, ret: %d", sec_ret); + return FAILED; + } + + aicpu_task.fwkKernelBase.fwk_kernel.inputOutputAddr = reinterpret_cast(copy_ioaddr_dev_->GetData()); + aicpu_task.fwkKernelBase.fwk_kernel.workspaceBaseAddr = reinterpret_cast(copy_workspace_buf_->GetData()); + aicpu_task.fwkKernelBase.fwk_kernel.extInfoAddr = 0; + aicpu_task.fwkKernelBase.fwk_kernel.extInfoLen = 0; + + GE_CHK_RT_RET(rtMemcpy(copy_task_args_buf_->GetData(), sizeof(STR_FWK_OP_KERNEL), + &aicpu_task, sizeof(STR_FWK_OP_KERNEL), RT_MEMCPY_HOST_TO_DEVICE)); + return SUCCESS; +} + uint64_t AicpuTfNodeTask::GetStepIdAddr(const HybridModel &model) { // get step_id_addr auto var_tensor = model.GetVariable(NODE_NAME_GLOBAL_STEP); @@ -410,30 +442,6 @@ Status AicpuTfNodeTask::CopyDataToHbm(TaskContext &context, uint64_t copy_num = 0; GE_CHK_STATUS_RET_NOLOG(PrepareCopyInputs(context, out_shape_hbm, copy_num)); - STR_FWK_OP_KERNEL aicpu_task = {0}; - std::string task_info; - RECORD_CALLBACK_EVENT(context.GetExecutionContext(), node_name_.c_str(), - "[GenMemCopyTask] Start"); - GE_CHK_STATUS_RET_NOLOG(GenMemCopyTask(copy_num, aicpu_task, task_info)); - RECORD_CALLBACK_EVENT(context.GetExecutionContext(), node_name_.c_str(), - "[GenMemCopyTask] End"); - - std::unique_ptr kernel_workspace_buf; - GE_CHK_STATUS_RET(AllocTensorBuffer(task_info.size(), kernel_workspace_buf), - "Node[%s] alloc copy task workspace buf failed, size=%zu.", - node_name_.c_str(), task_info.size()); - - GE_CHK_RT_RET(rtMemcpy(kernel_workspace_buf->GetData(), task_info.size(), - task_info.data(), task_info.size(), RT_MEMCPY_HOST_TO_DEVICE)); - - aicpu_task.fwkKernelBase.fwk_kernel.inputOutputAddr = reinterpret_cast(copy_ioaddr_dev_->GetData()); - aicpu_task.fwkKernelBase.fwk_kernel.workspaceBaseAddr = reinterpret_cast(kernel_workspace_buf->GetData()); - aicpu_task.fwkKernelBase.fwk_kernel.extInfoAddr = 0; - aicpu_task.fwkKernelBase.fwk_kernel.extInfoLen = 0; - - GE_CHK_RT_RET(rtMemcpy(copy_task_args_buf_->GetData(), sizeof(STR_FWK_OP_KERNEL), - &aicpu_task, sizeof(STR_FWK_OP_KERNEL), RT_MEMCPY_HOST_TO_DEVICE)); - RECORD_CALLBACK_EVENT(context.GetExecutionContext(), node_name_.c_str(), "[LaunchCopy] Start"); GE_CHK_RT_RET(rtKernelLaunchEx(copy_task_args_buf_->GetData(), sizeof(STR_FWK_OP_KERNEL), RT_KERNEL_DEFAULT, context.GetStream())); @@ -458,25 +466,19 @@ Status AicpuTfNodeTask::PrepareCopyInputs(const TaskContext &context, node_name_.c_str(), i, summary.shape_data_ptr, summary.shape_data_size, summary.raw_data_ptr, summary.raw_data_size); - if (summary.raw_data_size > 0) { - auto output = context.GetOutput(i); - GE_CHECK_NOTNULL(output); - GE_CHECK_NOTNULL(output->GetData()); - copy_input_release_flag.emplace_back(kReleaseFlag); - copy_input_data_size.emplace_back(summary.raw_data_size); - copy_input_src.emplace_back(summary.raw_data_ptr); - copy_input_dst.emplace_back(reinterpret_cast(output->GetData())); - } - - if (summary.shape_data_size > 0) { - const auto &shape_buffer = out_shape_hbm[i]; - GE_CHECK_NOTNULL(shape_buffer); - GE_CHECK_NOTNULL(shape_buffer->GetData()); - copy_input_release_flag.emplace_back(kReleaseFlag); - copy_input_data_size.emplace_back(summary.shape_data_size); - copy_input_src.emplace_back(summary.shape_data_ptr); - copy_input_dst.emplace_back(reinterpret_cast(shape_buffer->GetData())); - } + auto output = context.GetOutput(i); + GE_CHECK_NOTNULL(output); + copy_input_release_flag.emplace_back(kReleaseFlag); + copy_input_data_size.emplace_back(summary.raw_data_size); + copy_input_src.emplace_back(summary.raw_data_ptr); + copy_input_dst.emplace_back(reinterpret_cast(output->GetData())); + + const auto &shape_buffer = out_shape_hbm[i]; + GE_CHECK_NOTNULL(shape_buffer); + copy_input_release_flag.emplace_back(kReleaseFlag); + copy_input_data_size.emplace_back(summary.shape_data_size); + copy_input_src.emplace_back(summary.shape_data_ptr); + copy_input_dst.emplace_back(reinterpret_cast(shape_buffer->GetData())); } copy_num = copy_input_release_flag.size(); @@ -498,15 +500,6 @@ Status AicpuTfNodeTask::PrepareCopyInputs(const TaskContext &context, return SUCCESS; } -Status AicpuTfNodeTask::GenMemCopyTask(uint64_t copy_num, STR_FWK_OP_KERNEL &task, std::string &task_info) { - static constexpr const char *const kKernelLibName = "aicpu_tf_kernel"; - auto kernel_builder = OpsKernelBuilderManager::Instance().GetOpsKernelBuilder(kKernelLibName); - GE_CHK_BOOL_RET_STATUS(kernel_builder != nullptr, FAILED, "Get op kernel info store[%s] failed", kKernelLibName); - auto ret = kernel_builder->GenMemCopyTask(copy_num, task, task_info); - GE_CHK_STATUS_RET(ret, "Call aicpu GenMemCopyTask failed, copy_num=%lu, ret=%u", copy_num, ret); - return SUCCESS; -} - Status AicpuTfNodeTask::UpdateShapeByHbmBuffer(TaskContext &context, const std::vector> &out_shape_hbm) { GE_CHK_BOOL_RET_STATUS(out_shape_hbm.size() == static_cast(node_item_->num_outputs), @@ -813,9 +806,9 @@ Status AiCpuNodeExecutor::LoadTask(const HybridModel &model, GE_CHK_BOOL_RET_STATUS((*task_defs).size() == 1, PARAM_INVALID, "Node[%s] task_def num[%zu] != 1", node->GetName().c_str(), (*task_defs).size()); } else { - // The number of tasks of the fourth type operator may be 2 - GE_CHK_BOOL_RET_STATUS((*task_defs).size() == 1 || (*task_defs).size() == 2, PARAM_INVALID, - "Node[%s] DEPEND_COMPUTE task_def num[%zu] != 1 or 2", + // The number of tasks of the fourth type operator must be 2 + GE_CHK_BOOL_RET_STATUS((*task_defs).size() == 2, PARAM_INVALID, + "Node[%s] DEPEND_COMPUTE task_def num[%zu] != 2", node->GetName().c_str(), (*task_defs).size()); } const auto &task_def = (*task_defs)[0]; @@ -836,6 +829,9 @@ Status AiCpuNodeExecutor::LoadTask(const HybridModel &model, "Load task for node %s failed.", node->GetName().c_str()); GE_CHK_STATUS_RET(aicpu_task->Init(model), "Node[%s] task init failed.", node->GetName().c_str()); + if (node_item->shape_inference_type == DEPEND_COMPUTE) { + GE_CHK_STATUS_RET_NOLOG(aicpu_task->SetMemCopyTask((*task_defs)[1])); + } task = std::move(aicpu_task); GELOGD("Node[%s] load task end.", node->GetName().c_str()); diff --git a/ge/hybrid/node_executor/aicpu/aicpu_node_executor.h b/ge/hybrid/node_executor/aicpu/aicpu_node_executor.h index 8f0b1d0a..401d22a6 100644 --- a/ge/hybrid/node_executor/aicpu/aicpu_node_executor.h +++ b/ge/hybrid/node_executor/aicpu/aicpu_node_executor.h @@ -41,6 +41,8 @@ class AicpuNodeTaskBase : public NodeTask { virtual Status Init(const HybridModel &model) = 0; + virtual Status SetMemCopyTask(const domi::TaskDef &task_def) = 0; + Status UpdateArgs(TaskContext &context) override; Status ExecuteAsync(TaskContext &context, std::function done_callback) override; @@ -89,6 +91,8 @@ class AicpuTfNodeTask : public AicpuNodeTaskBase { Status Init(const HybridModel &model) override; + Status SetMemCopyTask(const domi::TaskDef &task_def) override; + protected: Status LaunchTask(TaskContext &context) override; @@ -121,7 +125,6 @@ class AicpuTfNodeTask : public AicpuNodeTaskBase { uint64_t ©_num); static Status EnsureSessionCreated(uint64_t session_id); - static Status GenMemCopyTask(uint64_t count, STR_FWK_OP_KERNEL &task, std::string &task_info); static uint64_t GetStepIdAddr(const HybridModel &model); private: // kernel buf, device mem @@ -145,6 +148,8 @@ class AicpuTfNodeTask : public AicpuNodeTaskBase { std::unique_ptr copy_input_src_dev_; std::unique_ptr copy_input_dst_dev_; bool need_sync_ = false; + + std::unique_ptr copy_workspace_buf_; }; class AicpuNodeTask : public AicpuNodeTaskBase { @@ -156,6 +161,10 @@ class AicpuNodeTask : public AicpuNodeTaskBase { Status Init(const HybridModel &model) override; + Status SetMemCopyTask(const domi::TaskDef &task_def) override { + return UNSUPPORTED; + } + protected: Status LaunchTask(TaskContext &context) override;