From 3df21332c5365d0865a931f14ed9eb659156c6ad Mon Sep 17 00:00:00 2001 From: wuweikang Date: Tue, 26 Jan 2021 16:10:10 +0800 Subject: [PATCH] invoke sub kernel with tiling_key in dynamic shape scene --- ge/hybrid/model/hybrid_model_builder.cc | 4 +- .../node_executor/aicore/aicore_op_task.cc | 143 +++++++++++++++--- .../node_executor/aicore/aicore_op_task.h | 7 + 3 files changed, 134 insertions(+), 20 deletions(-) diff --git a/ge/hybrid/model/hybrid_model_builder.cc b/ge/hybrid/model/hybrid_model_builder.cc index b314c6a7..0e7f2681 100755 --- a/ge/hybrid/model/hybrid_model_builder.cc +++ b/ge/hybrid/model/hybrid_model_builder.cc @@ -1083,6 +1083,8 @@ Status HybridModelBuilder::IndexTaskDefs() { op_index = task_def.kernel_ex().op_index(); } else if (task_type == RT_MODEL_TASK_HCCL) { op_index = task_def.kernel_hccl().op_index(); + } else if (task_type == RT_MODEL_TASK_ALL_KERNEL) { + op_index = task_def.kernel_with_handle().context().op_index(); } else { GELOGD("Skip task type: %d", static_cast(task_type)); continue; @@ -1095,7 +1097,7 @@ Status HybridModelBuilder::IndexTaskDefs() { } auto &node = iter->second; - if (task_type == RT_MODEL_TASK_KERNEL) { + if (task_type == RT_MODEL_TASK_KERNEL || task_type == RT_MODEL_TASK_ALL_KERNEL) { ge_model->GetTBEKernelStore().LoadTBEKernelBinToOpDesc(node->GetOpDesc()); } diff --git a/ge/hybrid/node_executor/aicore/aicore_op_task.cc b/ge/hybrid/node_executor/aicore/aicore_op_task.cc index f1bd6466..c2b1b879 100644 --- a/ge/hybrid/node_executor/aicore/aicore_op_task.cc +++ b/ge/hybrid/node_executor/aicore/aicore_op_task.cc @@ -68,7 +68,7 @@ Status AiCoreOpTask::RegisterTbeHandle(const OpDesc &op_desc) { if (rt_ret != RT_ERROR_NONE) { void *bin_handle = nullptr; if (!kernel_store.FindTBEHandle(stub_name_.c_str(), bin_handle)) { - GELOGI("TBE: can't find the kernel_name[%s] in HandleMap", stub_name_.c_str()); + GELOGI("TBE: can't find the binfile_key[%s] in HandleMap", stub_name_.c_str()); rtDevBinary_t binary; std::string json_string; GE_IF_BOOL_EXEC(AttrUtils::GetStr(op_desc_ptr, TVM_ATTR_NAME_MAGIC, json_string), @@ -95,7 +95,7 @@ Status AiCoreOpTask::RegisterTbeHandle(const OpDesc &op_desc) { GE_IF_BOOL_EXEC(!meta_data.empty(), GE_CHK_RT_RET(rtMetadataRegister(bin_handle, meta_data.c_str()))); kernel_store.StoreTBEHandle(stub_name_.c_str(), bin_handle, tbe_kernel); } else { - GELOGI("TBE: find the kernel_name[%s] in HandleMap", stub_name_.c_str()); + GELOGI("TBE: find the binfile_key[%s] in HandleMap", stub_name_.c_str()); kernel_store.ReferTBEHandle(stub_name_.c_str()); } std::string kernel_name; @@ -107,26 +107,58 @@ Status AiCoreOpTask::RegisterTbeHandle(const OpDesc &op_desc) { return SUCCESS; } -Status AiCoreOpTask::InitWithTaskDef(const OpDesc &op_desc, const domi::TaskDef &task_def) { - GE_CHK_STATUS_RET(ValidateTaskDef(task_def), - "[%s] Failed to validate task def: [%s]", - op_desc.GetName().c_str(), - task_def.DebugString().c_str()); +Status AiCoreOpTask::RegisterKernelHandle(const OpDesc &op_desc) { + auto tbe_kernel = op_desc.TryGetExtAttr(OP_EXTATTR_NAME_TBE_KERNEL, TBEKernelPtr()); + if (tbe_kernel == nullptr) { + GELOGE(INTERNAL_ERROR, "TBE: %s can't find tvm bin file!", op_desc.GetName().c_str()); + return INTERNAL_ERROR; + } + TBEHandleStore &kernel_store = TBEHandleStore::GetInstance(); + void *bin_handle = nullptr; + if (!kernel_store.FindTBEHandle(stub_name_.c_str(), bin_handle)) { + GELOGI("TBE: can't find the binfile_key[%s] in HandleMap", stub_name_.c_str()); + rtDevBinary_t binary; + std::string json_string; + GE_IF_BOOL_EXEC(AttrUtils::GetStr(&op_desc, TVM_ATTR_NAME_MAGIC, json_string), + GELOGI("Get original type of session_graph_id.")); + if (json_string == "RT_DEV_BINARY_MAGIC_ELF_AICPU") { + binary.magic = RT_DEV_BINARY_MAGIC_ELF_AICPU; + } else if (json_string == "RT_DEV_BINARY_MAGIC_ELF") { + binary.magic = RT_DEV_BINARY_MAGIC_ELF; + } else if (json_string == "RT_DEV_BINARY_MAGIC_ELF_AIVEC") { + binary.magic = RT_DEV_BINARY_MAGIC_ELF_AIVEC; + } else { + GELOGE(PARAM_INVALID, "TBE: Invalid parameter magic number! json: %s", json_string.c_str()); + return PARAM_INVALID; + } + binary.version = 0; + binary.data = tbe_kernel->GetBinData(); + binary.length = tbe_kernel->GetBinDataSize(); + GELOGI("TBE: binary.length: %lu", binary.length); + GE_CHK_RT_RET(rtRegisterAllKernel(&binary, &bin_handle)); + handle_ = bin_handle; + kernel_store.StoreTBEHandle(stub_name_.c_str(), bin_handle, tbe_kernel); + } else { + GELOGI("TBE: find the binfile_key[%s] in HandleMap", stub_name_.c_str()); + kernel_store.ReferTBEHandle(stub_name_.c_str()); + } + + return SUCCESS; +} +Status AiCoreOpTask::InitWithKernelDef(const OpDesc &op_desc, const domi::TaskDef &task_def) { const domi::KernelDef &kernel_def = task_def.kernel(); const domi::KernelContext &context = kernel_def.context(); stub_name_ = kernel_def.stub_func(); - GE_CHK_STATUS_RET(RegisterTbeHandle(op_desc)); - GE_CHK_RT_RET(rtGetFunctionByName(stub_name_.c_str(), &stub_func_)); args_size_ = kernel_def.args_size(); block_dim_ = kernel_def.block_dim(); - // malloc args memory args_.reset(new(std::nothrow) uint8_t[args_size_]); GE_CHECK_NOTNULL(args_); - errno_t err = memcpy_s(args_.get(), args_size_, kernel_def.args().data(), args_size_); + + errno_t err = memcpy_s(args_.get(), args_size_, kernel_def.args().data(), args_size_); if (err != EOK) { GELOGE(INTERNAL_ERROR, "AiCoreTask memcpy args failed."); return INTERNAL_ERROR; @@ -156,23 +188,88 @@ Status AiCoreOpTask::InitWithTaskDef(const OpDesc &op_desc, const domi::TaskDef block_dim_, arg_base_, args_size_); + return SUCCESS; +} + +Status AiCoreOpTask::InitWithKernelDefWithHandle(const OpDesc &op_desc, const domi::TaskDef &task_def) { + const domi::KernelDefWithHandle &kernel_with_handle = task_def.kernel_with_handle(); + const domi::KernelContext &context = kernel_with_handle.context(); + + GE_CHK_STATUS_RET(RegisterKernelHandle(op_desc)); + original_kernel_key_ = kernel_with_handle.original_kernel_key(); + node_info_ = kernel_with_handle.node_info(); + args_size_ = kernel_with_handle.args_size(); + block_dim_ = kernel_with_handle.block_dim(); + // malloc args memory + args_.reset(new(std::nothrow) uint8_t[args_size_]); + GE_CHECK_NOTNULL(args_); + errno_t err = memcpy_s(args_.get(), args_size_, kernel_with_handle.args().data(), args_size_); + + if (err != EOK) { + GELOGE(INTERNAL_ERROR, "AiCoreTask memcpy args failed."); + return INTERNAL_ERROR; + } + + if (context.args_offset().size() < sizeof(uint16_t)) { + GELOGE(INTERNAL_ERROR, "Invalid args_offset, size = %zu.", context.args_offset().size()); + return INTERNAL_ERROR; + } + + const auto *args_offset_buffer = reinterpret_cast(context.args_offset().data()); + uint32_t offset = *args_offset_buffer; + if (offset > args_size_) { + GELOGE(INTERNAL_ERROR, + "[%s] Arg offset out of range. offset = %u, arg size = %u", + GetName().c_str(), + offset, + args_size_); + return INTERNAL_ERROR; + } + arg_base_ = reinterpret_cast(args_.get() + offset); + max_arg_count_ = (args_size_ - offset) / sizeof(void *); + return SUCCESS; +} + +Status AiCoreOpTask::InitWithTaskDef(const OpDesc &op_desc, const domi::TaskDef &task_def) { + GE_CHK_STATUS_RET(ValidateTaskDef(task_def), + "[%s] Failed to validate task def: [%s]", + op_desc.GetName().c_str(), + task_def.DebugString().c_str()); + + if (task_def.type() != RT_MODEL_TASK_ALL_KERNEL) { + GE_CHK_STATUS_RET(InitWithKernelDef(op_desc, task_def)); + } else { + GE_CHK_STATUS_RET(InitWithKernelDefWithHandle(op_desc, task_def)); + } return SUCCESS; } Status AiCoreOpTask::ValidateTaskDef(const domi::TaskDef &task_def) { auto task_type = static_cast(task_def.type()); - if (task_type != RT_MODEL_TASK_KERNEL) { + if (task_type != RT_MODEL_TASK_KERNEL && task_type != RT_MODEL_TASK_ALL_KERNEL) { GELOGE(INTERNAL_ERROR, "Invalid task type (%d) in AiCore CreateTask.", static_cast(task_type)); return INTERNAL_ERROR; } - const domi::KernelDef &kernel_def = task_def.kernel(); - const domi::KernelContext &context = kernel_def.context(); - auto kernel_type = static_cast(context.kernel_type()); - if (kernel_type != ccKernelType::TE) { - GELOGE(INTERNAL_ERROR, "Invalid kernel type(%d) in AiCore TaskDef.", static_cast(kernel_type)); - return INTERNAL_ERROR; + if (task_type == RT_MODEL_TASK_KERNEL) { + const domi::KernelDef &kernel_def = task_def.kernel(); + const domi::KernelContext &context = kernel_def.context(); + auto kernel_type = static_cast(context.kernel_type()); + if (kernel_type != ccKernelType::TE) { + GELOGE(INTERNAL_ERROR, "Invalid kernel type(%d) in AiCore TaskDef.", static_cast(kernel_type)); + return INTERNAL_ERROR; + } + } + + if (task_type == RT_MODEL_TASK_ALL_KERNEL) { + const domi::KernelDefWithHandle &kernel_with_handle = task_def.kernel_with_handle(); + const domi::KernelContext &context = kernel_with_handle.context(); + auto kernel_type = static_cast(context.kernel_type()); + if (kernel_type != ccKernelType::TE) { + GELOGE(INTERNAL_ERROR, "Invalid kernel type(%d) in AiCore TaskDef.", static_cast(kernel_type)); + return INTERNAL_ERROR; + } } return SUCCESS; @@ -208,6 +305,8 @@ Status AiCoreOpTask::UpdateTilingInfo(TaskContext &context) { clear_atomic_ = tiling_info.clear_atomic; tiling_data_ = tiling_info.tiling_data.str(); + tiling_key_ = tiling_info.tiling_key; + GELOGD("Successfully getting [tiling_key] : %u", tiling_key_); if (tiling_data_.empty()) { GELOGE(INTERNAL_ERROR, "[%s] Tiling data is empty.", stub_name_.c_str()); return INTERNAL_ERROR; @@ -293,7 +392,13 @@ Status AiCoreOpTask::UpdateArgs(TaskContext &task_context) { Status AiCoreOpTask::LaunchKernel(rtStream_t stream) { GELOGD("AiCoreOpTask LaunchKernel Start (task = %s, block_dim = %u).", stub_name_.c_str(), block_dim_); - GE_CHK_RT_RET(rtKernelLaunch(stub_func_, block_dim_, args_.get(), args_size_, nullptr, stream)); + if (handle_ != nullptr) { + GE_CHK_RT_RET(rtKernelLaunchWithHandle(handle_, (original_kernel_key_ + "_" + std::to_string(tiling_key_)).c_str(), + block_dim_, args_.get(), args_size_, nullptr, stream, + (node_info_ + "/" + std::to_string(tiling_key_)).c_str())); + } else { + GE_CHK_RT_RET(rtKernelLaunch(stub_func_, block_dim_, args_.get(), args_size_, nullptr, stream)); + } GELOGD("AiCoreOpTask LaunchKernel End (task = %s, block_dim = %u).", stub_name_.c_str(), block_dim_); return SUCCESS; } diff --git a/ge/hybrid/node_executor/aicore/aicore_op_task.h b/ge/hybrid/node_executor/aicore/aicore_op_task.h index 3f350531..ebfdafac 100755 --- a/ge/hybrid/node_executor/aicore/aicore_op_task.h +++ b/ge/hybrid/node_executor/aicore/aicore_op_task.h @@ -65,6 +65,9 @@ class AiCoreOpTask { Status InitWithTaskDef(const OpDesc &node, const domi::TaskDef &task_def); Status InitTilingInfo(const OpDesc &op_desc); Status RegisterTbeHandle(const OpDesc &op_desc); + Status RegisterKernelHandle(const OpDesc &op_desc); + Status InitWithKernelDef(const OpDesc &op_desc, const domi::TaskDef &task_def); + Status InitWithKernelDefWithHandle(const OpDesc &node, const domi::TaskDef &task_def); std::string stub_name_; void *stub_func_ = nullptr; @@ -73,6 +76,10 @@ class AiCoreOpTask { uint32_t block_dim_ = 1; bool clear_atomic_ = true; std::vector output_indices_to_skip_; + string original_kernel_key_; + string node_info_; + uint32_t tiling_key_ = 0; + void *handle_ = nullptr; }; class AtomicAddrCleanOpTask : public AiCoreOpTask {