From 38664ebcc98441aae60fda8832c08c9c34209ecb Mon Sep 17 00:00:00 2001 From: unknown Date: Sat, 9 Jan 2021 14:33:20 +0800 Subject: [PATCH] Remove gentask in DEPEND_COMPUTE task executor. --- .../aicpu/aicpu_node_executor.cc | 37 +++++++++---------- .../node_executor/aicpu/aicpu_node_executor.h | 3 +- 2 files changed, 19 insertions(+), 21 deletions(-) diff --git a/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc b/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc index e4cefe65..f47f0774 100755 --- a/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc +++ b/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc @@ -357,27 +357,33 @@ Status AicpuTfNodeTask::Init(const HybridModel &model) { } Status AicpuTfNodeTask::SetMemCopyTask(const domi::TaskDef &task_def) { + if (node_item_->num_outputs == 0) { + GELOGD("Node[%s] type[%s] has no output, no need set mem_copy task.", + node_name_.c_str(), node_item_->node_type.c_str()); + return SUCCESS; + } + const domi::KernelExDef &kernel_def = task_def.kernel_ex(); if (kernel_def.args_size() > sizeof(STR_FWK_OP_KERNEL)) { GELOGE(PARAM_INVALID, "sizeof STR_FWK_OP_KERNEL is: %lu, but args_size is: %d", sizeof(STR_FWK_OP_KERNEL), kernel_def.args_size()); return PARAM_INVALID; } - GE_CHK_STATUS_RET(AllocTensorBuffer(kernel_def.task_info_size(), copy_workspace_buf_), - "Node[%s] alloc copy task workspace buf failed, size=%zu.", - node_name_.c_str(), kernel_def.task_info_size()); - - GE_CHK_RT_RET(rtMemcpy(copy_workspace_buf_->GetData(), kernel_def.task_info_size(), - kernel_def.task_info().data(), kernel_def.task_info_size(), RT_MEMCPY_HOST_TO_DEVICE)); - STR_FWK_OP_KERNEL aicpu_task = {0}; auto sec_ret = memcpy_s(&aicpu_task, sizeof(STR_FWK_OP_KERNEL), - kernel_def.args().data(), kernel_def.args().size()); + kernel_def.args().data(), kernel_def.args_size()); if (sec_ret != EOK) { GELOGE(FAILED, "memcpy failed, ret: %d", sec_ret); return FAILED; } + GE_CHK_STATUS_RET(AllocTensorBuffer(kernel_def.task_info_size(), copy_workspace_buf_), + "Node[%s] alloc copy task workspace buf failed, size=%zu.", + node_name_.c_str(), kernel_def.task_info_size()); + + GE_CHK_RT_RET(rtMemcpy(copy_workspace_buf_->GetData(), kernel_def.task_info_size(), + kernel_def.task_info().data(), kernel_def.task_info_size(), RT_MEMCPY_HOST_TO_DEVICE)); + aicpu_task.fwkKernelBase.fwk_kernel.inputOutputAddr = reinterpret_cast(copy_ioaddr_dev_->GetData()); aicpu_task.fwkKernelBase.fwk_kernel.workspaceBaseAddr = reinterpret_cast(copy_workspace_buf_->GetData()); aicpu_task.fwkKernelBase.fwk_kernel.extInfoAddr = 0; @@ -439,8 +445,7 @@ Status AicpuTfNodeTask::CopyDataToHbm(TaskContext &context, "Node[%s] has %d outputs but out shape is %zu.", node_name_.c_str(), node_item_->num_outputs, out_shape_hbm.size()); - uint64_t copy_num = 0; - GE_CHK_STATUS_RET_NOLOG(PrepareCopyInputs(context, out_shape_hbm, copy_num)); + GE_CHK_STATUS_RET_NOLOG(PrepareCopyInputs(context, out_shape_hbm)); RECORD_CALLBACK_EVENT(context.GetExecutionContext(), node_name_.c_str(), "[LaunchCopy] Start"); GE_CHK_RT_RET(rtKernelLaunchEx(copy_task_args_buf_->GetData(), sizeof(STR_FWK_OP_KERNEL), @@ -453,8 +458,7 @@ Status AicpuTfNodeTask::CopyDataToHbm(TaskContext &context, } Status AicpuTfNodeTask::PrepareCopyInputs(const TaskContext &context, - const std::vector> &out_shape_hbm, - uint64_t ©_num) { + const std::vector> &out_shape_hbm) { std::vector copy_input_release_flag; std::vector copy_input_data_size; std::vector copy_input_src; @@ -481,13 +485,8 @@ Status AicpuTfNodeTask::PrepareCopyInputs(const TaskContext &context, copy_input_dst.emplace_back(reinterpret_cast(shape_buffer->GetData())); } - copy_num = copy_input_release_flag.size(); - - GE_CHK_BOOL_RET_STATUS(copy_num > 0, INTERNAL_ERROR, - "Node[%s] need copy num is 0", node_name_.c_str()); - - // copy task need copy output and output shape - const size_t copy_input_buf_len = copy_num * sizeof(uint64_t); + // copy task need copy all output_data and output_shape, len is 2 * output_num + const size_t copy_input_buf_len = node_item_->num_outputs * 2 * sizeof(uint64_t); GE_CHK_RT_RET(rtMemcpy(copy_input_release_flag_dev_->GetData(), copy_input_release_flag_dev_->GetSize(), ©_input_release_flag[0], copy_input_buf_len, RT_MEMCPY_HOST_TO_DEVICE)); diff --git a/ge/hybrid/node_executor/aicpu/aicpu_node_executor.h b/ge/hybrid/node_executor/aicpu/aicpu_node_executor.h index bbe60ee8..c6e63ee0 100644 --- a/ge/hybrid/node_executor/aicpu/aicpu_node_executor.h +++ b/ge/hybrid/node_executor/aicpu/aicpu_node_executor.h @@ -124,8 +124,7 @@ class AicpuTfNodeTask : public AicpuNodeTaskBase { const std::vector> &out_shape_hbm); Status PrepareCopyInputs(const TaskContext &context, - const std::vector> &out_shape_hbm, - uint64_t ©_num); + const std::vector> &out_shape_hbm); static Status EnsureSessionCreated(uint64_t session_id); static uint64_t GetStepIdAddr(const HybridModel &model);