|
|
|
@@ -69,7 +69,7 @@ Status AiCoreOpTask::RegisterTbeHandle(const OpDesc &op_desc) { |
|
|
|
if (rt_ret != RT_ERROR_NONE || is_single_op_) { |
|
|
|
void *bin_handle = nullptr; |
|
|
|
if (!kernel_store.FindTBEHandle(stub_name_.c_str(), bin_handle)) { |
|
|
|
GELOGI("TBE: can't find the kernel_name[%s] in HandleMap", stub_name_.c_str()); |
|
|
|
GELOGI("TBE: can't find the binfile_key[%s] in HandleMap", stub_name_.c_str()); |
|
|
|
rtDevBinary_t binary; |
|
|
|
std::string json_string; |
|
|
|
GE_IF_BOOL_EXEC(AttrUtils::GetStr(op_desc_ptr, TVM_ATTR_NAME_MAGIC, json_string), |
|
|
|
@@ -96,7 +96,7 @@ Status AiCoreOpTask::RegisterTbeHandle(const OpDesc &op_desc) { |
|
|
|
GE_IF_BOOL_EXEC(!meta_data.empty(), GE_CHK_RT_RET(rtMetadataRegister(bin_handle, meta_data.c_str()))); |
|
|
|
kernel_store.StoreTBEHandle(stub_name_.c_str(), bin_handle, tbe_kernel); |
|
|
|
} else { |
|
|
|
GELOGI("TBE: find the kernel_name[%s] in HandleMap", stub_name_.c_str()); |
|
|
|
GELOGI("TBE: find the binfile_key[%s] in HandleMap", stub_name_.c_str()); |
|
|
|
kernel_store.ReferTBEHandle(stub_name_.c_str()); |
|
|
|
} |
|
|
|
std::string kernel_name; |
|
|
|
@@ -108,26 +108,58 @@ Status AiCoreOpTask::RegisterTbeHandle(const OpDesc &op_desc) { |
|
|
|
return SUCCESS; |
|
|
|
} |
|
|
|
|
|
|
|
Status AiCoreOpTask::InitWithTaskDef(const OpDesc &op_desc, const domi::TaskDef &task_def) { |
|
|
|
GE_CHK_STATUS_RET(ValidateTaskDef(task_def), |
|
|
|
"[%s] Failed to validate task def: [%s]", |
|
|
|
op_desc.GetName().c_str(), |
|
|
|
task_def.DebugString().c_str()); |
|
|
|
Status AiCoreOpTask::RegisterKernelHandle(const OpDesc &op_desc) { |
|
|
|
auto tbe_kernel = op_desc.TryGetExtAttr(OP_EXTATTR_NAME_TBE_KERNEL, TBEKernelPtr()); |
|
|
|
if (tbe_kernel == nullptr) { |
|
|
|
GELOGE(INTERNAL_ERROR, "TBE: %s can't find tvm bin file!", op_desc.GetName().c_str()); |
|
|
|
return INTERNAL_ERROR; |
|
|
|
} |
|
|
|
TBEHandleStore &kernel_store = TBEHandleStore::GetInstance(); |
|
|
|
|
|
|
|
void *bin_handle = nullptr; |
|
|
|
if (!kernel_store.FindTBEHandle(stub_name_.c_str(), bin_handle)) { |
|
|
|
GELOGI("TBE: can't find the binfile_key[%s] in HandleMap", stub_name_.c_str()); |
|
|
|
rtDevBinary_t binary; |
|
|
|
std::string json_string; |
|
|
|
GE_IF_BOOL_EXEC(AttrUtils::GetStr(&op_desc, TVM_ATTR_NAME_MAGIC, json_string), |
|
|
|
GELOGI("Get original type of session_graph_id.")); |
|
|
|
if (json_string == "RT_DEV_BINARY_MAGIC_ELF_AICPU") { |
|
|
|
binary.magic = RT_DEV_BINARY_MAGIC_ELF_AICPU; |
|
|
|
} else if (json_string == "RT_DEV_BINARY_MAGIC_ELF") { |
|
|
|
binary.magic = RT_DEV_BINARY_MAGIC_ELF; |
|
|
|
} else if (json_string == "RT_DEV_BINARY_MAGIC_ELF_AIVEC") { |
|
|
|
binary.magic = RT_DEV_BINARY_MAGIC_ELF_AIVEC; |
|
|
|
} else { |
|
|
|
GELOGE(PARAM_INVALID, "TBE: Invalid parameter magic number! json: %s", json_string.c_str()); |
|
|
|
return PARAM_INVALID; |
|
|
|
} |
|
|
|
binary.version = 0; |
|
|
|
binary.data = tbe_kernel->GetBinData(); |
|
|
|
binary.length = tbe_kernel->GetBinDataSize(); |
|
|
|
GELOGI("TBE: binary.length: %lu", binary.length); |
|
|
|
GE_CHK_RT_RET(rtRegisterAllKernel(&binary, &bin_handle)); |
|
|
|
handle_ = bin_handle; |
|
|
|
kernel_store.StoreTBEHandle(stub_name_.c_str(), bin_handle, tbe_kernel); |
|
|
|
} else { |
|
|
|
GELOGI("TBE: find the binfile_key[%s] in HandleMap", stub_name_.c_str()); |
|
|
|
kernel_store.ReferTBEHandle(stub_name_.c_str()); |
|
|
|
} |
|
|
|
|
|
|
|
return SUCCESS; |
|
|
|
} |
|
|
|
Status AiCoreOpTask::InitWithKernelDef(const OpDesc &op_desc, const domi::TaskDef &task_def) { |
|
|
|
const domi::KernelDef &kernel_def = task_def.kernel(); |
|
|
|
const domi::KernelContext &context = kernel_def.context(); |
|
|
|
stub_name_ = kernel_def.stub_func(); |
|
|
|
|
|
|
|
GE_CHK_STATUS_RET(RegisterTbeHandle(op_desc)); |
|
|
|
|
|
|
|
GE_CHK_RT_RET(rtGetFunctionByName(stub_name_.c_str(), &stub_func_)); |
|
|
|
args_size_ = kernel_def.args_size(); |
|
|
|
block_dim_ = kernel_def.block_dim(); |
|
|
|
|
|
|
|
// malloc args memory |
|
|
|
args_.reset(new(std::nothrow) uint8_t[args_size_]); |
|
|
|
GE_CHECK_NOTNULL(args_); |
|
|
|
errno_t err = memcpy_s(args_.get(), args_size_, kernel_def.args().data(), args_size_); |
|
|
|
|
|
|
|
errno_t err = memcpy_s(args_.get(), args_size_, kernel_def.args().data(), args_size_); |
|
|
|
if (err != EOK) { |
|
|
|
GELOGE(INTERNAL_ERROR, "AiCoreTask memcpy args failed."); |
|
|
|
return INTERNAL_ERROR; |
|
|
|
@@ -157,23 +189,88 @@ Status AiCoreOpTask::InitWithTaskDef(const OpDesc &op_desc, const domi::TaskDef |
|
|
|
block_dim_, |
|
|
|
arg_base_, |
|
|
|
args_size_); |
|
|
|
return SUCCESS; |
|
|
|
} |
|
|
|
|
|
|
|
Status AiCoreOpTask::InitWithKernelDefWithHandle(const OpDesc &op_desc, const domi::TaskDef &task_def) { |
|
|
|
const domi::KernelDefWithHandle &kernel_with_handle = task_def.kernel_with_handle(); |
|
|
|
const domi::KernelContext &context = kernel_with_handle.context(); |
|
|
|
|
|
|
|
GE_CHK_STATUS_RET(RegisterKernelHandle(op_desc)); |
|
|
|
original_kernel_key_ = kernel_with_handle.original_kernel_key(); |
|
|
|
node_info_ = kernel_with_handle.node_info(); |
|
|
|
args_size_ = kernel_with_handle.args_size(); |
|
|
|
block_dim_ = kernel_with_handle.block_dim(); |
|
|
|
// malloc args memory |
|
|
|
args_.reset(new(std::nothrow) uint8_t[args_size_]); |
|
|
|
GE_CHECK_NOTNULL(args_); |
|
|
|
errno_t err = memcpy_s(args_.get(), args_size_, kernel_with_handle.args().data(), args_size_); |
|
|
|
|
|
|
|
if (err != EOK) { |
|
|
|
GELOGE(INTERNAL_ERROR, "AiCoreTask memcpy args failed."); |
|
|
|
return INTERNAL_ERROR; |
|
|
|
} |
|
|
|
|
|
|
|
if (context.args_offset().size() < sizeof(uint16_t)) { |
|
|
|
GELOGE(INTERNAL_ERROR, "Invalid args_offset, size = %zu.", context.args_offset().size()); |
|
|
|
return INTERNAL_ERROR; |
|
|
|
} |
|
|
|
|
|
|
|
const auto *args_offset_buffer = reinterpret_cast<const uint16_t *>(context.args_offset().data()); |
|
|
|
uint32_t offset = *args_offset_buffer; |
|
|
|
if (offset > args_size_) { |
|
|
|
GELOGE(INTERNAL_ERROR, |
|
|
|
"[%s] Arg offset out of range. offset = %u, arg size = %u", |
|
|
|
GetName().c_str(), |
|
|
|
offset, |
|
|
|
args_size_); |
|
|
|
return INTERNAL_ERROR; |
|
|
|
} |
|
|
|
|
|
|
|
arg_base_ = reinterpret_cast<uintptr_t *>(args_.get() + offset); |
|
|
|
max_arg_count_ = (args_size_ - offset) / sizeof(void *); |
|
|
|
return SUCCESS; |
|
|
|
} |
|
|
|
|
|
|
|
Status AiCoreOpTask::InitWithTaskDef(const OpDesc &op_desc, const domi::TaskDef &task_def) { |
|
|
|
GE_CHK_STATUS_RET(ValidateTaskDef(task_def), |
|
|
|
"[%s] Failed to validate task def: [%s]", |
|
|
|
op_desc.GetName().c_str(), |
|
|
|
task_def.DebugString().c_str()); |
|
|
|
|
|
|
|
if (task_def.type() != RT_MODEL_TASK_ALL_KERNEL) { |
|
|
|
GE_CHK_STATUS_RET(InitWithKernelDef(op_desc, task_def)); |
|
|
|
} else { |
|
|
|
GE_CHK_STATUS_RET(InitWithKernelDefWithHandle(op_desc, task_def)); |
|
|
|
} |
|
|
|
return SUCCESS; |
|
|
|
} |
|
|
|
|
|
|
|
Status AiCoreOpTask::ValidateTaskDef(const domi::TaskDef &task_def) { |
|
|
|
auto task_type = static_cast<rtModelTaskType_t>(task_def.type()); |
|
|
|
if (task_type != RT_MODEL_TASK_KERNEL) { |
|
|
|
if (task_type != RT_MODEL_TASK_KERNEL && task_type != RT_MODEL_TASK_ALL_KERNEL) { |
|
|
|
GELOGE(INTERNAL_ERROR, "Invalid task type (%d) in AiCore CreateTask.", static_cast<int>(task_type)); |
|
|
|
return INTERNAL_ERROR; |
|
|
|
} |
|
|
|
|
|
|
|
const domi::KernelDef &kernel_def = task_def.kernel(); |
|
|
|
const domi::KernelContext &context = kernel_def.context(); |
|
|
|
auto kernel_type = static_cast<ccKernelType>(context.kernel_type()); |
|
|
|
if (kernel_type != ccKernelType::TE) { |
|
|
|
GELOGE(INTERNAL_ERROR, "Invalid kernel type(%d) in AiCore TaskDef.", static_cast<int>(kernel_type)); |
|
|
|
return INTERNAL_ERROR; |
|
|
|
if (task_type == RT_MODEL_TASK_KERNEL) { |
|
|
|
const domi::KernelDef &kernel_def = task_def.kernel(); |
|
|
|
const domi::KernelContext &context = kernel_def.context(); |
|
|
|
auto kernel_type = static_cast<ccKernelType>(context.kernel_type()); |
|
|
|
if (kernel_type != ccKernelType::TE) { |
|
|
|
GELOGE(INTERNAL_ERROR, "Invalid kernel type(%d) in AiCore TaskDef.", static_cast<int>(kernel_type)); |
|
|
|
return INTERNAL_ERROR; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
if (task_type == RT_MODEL_TASK_ALL_KERNEL) { |
|
|
|
const domi::KernelDefWithHandle &kernel_with_handle = task_def.kernel_with_handle(); |
|
|
|
const domi::KernelContext &context = kernel_with_handle.context(); |
|
|
|
auto kernel_type = static_cast<ccKernelType>(context.kernel_type()); |
|
|
|
if (kernel_type != ccKernelType::TE) { |
|
|
|
GELOGE(INTERNAL_ERROR, "Invalid kernel type(%d) in AiCore TaskDef.", static_cast<int>(kernel_type)); |
|
|
|
return INTERNAL_ERROR; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
return SUCCESS; |
|
|
|
@@ -212,6 +309,8 @@ Status AiCoreOpTask::UpdateTilingInfo(TaskContext &context) { |
|
|
|
clear_atomic_ = tiling_info.clear_atomic; |
|
|
|
|
|
|
|
tiling_data_ = tiling_info.tiling_data.str(); |
|
|
|
tiling_key_ = tiling_info.tiling_key; |
|
|
|
GELOGD("Successfully getting [tiling_key] : %u", tiling_key_); |
|
|
|
if (tiling_data_.empty()) { |
|
|
|
GELOGE(INTERNAL_ERROR, "[%s] Tiling data is empty.", stub_name_.c_str()); |
|
|
|
return INTERNAL_ERROR; |
|
|
|
@@ -297,7 +396,13 @@ Status AiCoreOpTask::UpdateArgs(TaskContext &task_context) { |
|
|
|
|
|
|
|
Status AiCoreOpTask::LaunchKernel(rtStream_t stream) { |
|
|
|
GELOGD("AiCoreOpTask LaunchKernel Start (task = %s, block_dim = %u).", stub_name_.c_str(), block_dim_); |
|
|
|
GE_CHK_RT_RET(rtKernelLaunch(stub_func_, block_dim_, args_.get(), args_size_, nullptr, stream)); |
|
|
|
if (handle_ != nullptr) { |
|
|
|
GE_CHK_RT_RET(rtKernelLaunchWithHandle(handle_, (original_kernel_key_ + "_" + std::to_string(tiling_key_)).c_str(), |
|
|
|
block_dim_, args_.get(), args_size_, nullptr, stream, |
|
|
|
(node_info_ + "/" + std::to_string(tiling_key_)).c_str())); |
|
|
|
} else { |
|
|
|
GE_CHK_RT_RET(rtKernelLaunch(stub_func_, block_dim_, args_.get(), args_size_, nullptr, stream)); |
|
|
|
} |
|
|
|
GELOGD("AiCoreOpTask LaunchKernel End (task = %s, block_dim = %u).", stub_name_.c_str(), block_dim_); |
|
|
|
return SUCCESS; |
|
|
|
} |
|
|
|
|