Browse Source

Pre Merge pull request !783 from HW_KK/development

pull/783/MERGE
HW_KK Gitee 5 years ago
parent
commit
2f7bfa2e5a
3 changed files with 134 additions and 20 deletions
  1. +3
    -1
      ge/hybrid/model/hybrid_model_builder.cc
  2. +124
    -19
      ge/hybrid/node_executor/aicore/aicore_op_task.cc
  3. +7
    -0
      ge/hybrid/node_executor/aicore/aicore_op_task.h

+ 3
- 1
ge/hybrid/model/hybrid_model_builder.cc View File

@@ -1199,6 +1199,8 @@ Status HybridModelBuilder::IndexTaskDefs() {
op_index = task_def.kernel_ex().op_index();
} else if (task_type == RT_MODEL_TASK_HCCL) {
op_index = task_def.kernel_hccl().op_index();
} else if (task_type == RT_MODEL_TASK_ALL_KERNEL) {
op_index = task_def.kernel_with_handle().context().op_index();
} else {
GELOGD("Skip task type: %d", static_cast<int>(task_type));
continue;
@@ -1211,7 +1213,7 @@ Status HybridModelBuilder::IndexTaskDefs() {
}

auto &node = iter->second;
if (task_type == RT_MODEL_TASK_KERNEL) {
if (task_type == RT_MODEL_TASK_KERNEL || task_type == RT_MODEL_TASK_ALL_KERNEL) {
ge_model->GetTBEKernelStore().LoadTBEKernelBinToOpDesc(node->GetOpDesc());
}



+ 124
- 19
ge/hybrid/node_executor/aicore/aicore_op_task.cc View File

@@ -69,7 +69,7 @@ Status AiCoreOpTask::RegisterTbeHandle(const OpDesc &op_desc) {
if (rt_ret != RT_ERROR_NONE || is_single_op_) {
void *bin_handle = nullptr;
if (!kernel_store.FindTBEHandle(stub_name_.c_str(), bin_handle)) {
GELOGI("TBE: can't find the kernel_name[%s] in HandleMap", stub_name_.c_str());
GELOGI("TBE: can't find the binfile_key[%s] in HandleMap", stub_name_.c_str());
rtDevBinary_t binary;
std::string json_string;
GE_IF_BOOL_EXEC(AttrUtils::GetStr(op_desc_ptr, TVM_ATTR_NAME_MAGIC, json_string),
@@ -96,7 +96,7 @@ Status AiCoreOpTask::RegisterTbeHandle(const OpDesc &op_desc) {
GE_IF_BOOL_EXEC(!meta_data.empty(), GE_CHK_RT_RET(rtMetadataRegister(bin_handle, meta_data.c_str())));
kernel_store.StoreTBEHandle(stub_name_.c_str(), bin_handle, tbe_kernel);
} else {
GELOGI("TBE: find the kernel_name[%s] in HandleMap", stub_name_.c_str());
GELOGI("TBE: find the binfile_key[%s] in HandleMap", stub_name_.c_str());
kernel_store.ReferTBEHandle(stub_name_.c_str());
}
std::string kernel_name;
@@ -108,26 +108,58 @@ Status AiCoreOpTask::RegisterTbeHandle(const OpDesc &op_desc) {
return SUCCESS;
}

Status AiCoreOpTask::InitWithTaskDef(const OpDesc &op_desc, const domi::TaskDef &task_def) {
GE_CHK_STATUS_RET(ValidateTaskDef(task_def),
"[%s] Failed to validate task def: [%s]",
op_desc.GetName().c_str(),
task_def.DebugString().c_str());
Status AiCoreOpTask::RegisterKernelHandle(const OpDesc &op_desc) {
auto tbe_kernel = op_desc.TryGetExtAttr(OP_EXTATTR_NAME_TBE_KERNEL, TBEKernelPtr());
if (tbe_kernel == nullptr) {
GELOGE(INTERNAL_ERROR, "TBE: %s can't find tvm bin file!", op_desc.GetName().c_str());
return INTERNAL_ERROR;
}
TBEHandleStore &kernel_store = TBEHandleStore::GetInstance();

void *bin_handle = nullptr;
if (!kernel_store.FindTBEHandle(stub_name_.c_str(), bin_handle)) {
GELOGI("TBE: can't find the binfile_key[%s] in HandleMap", stub_name_.c_str());
rtDevBinary_t binary;
std::string json_string;
GE_IF_BOOL_EXEC(AttrUtils::GetStr(&op_desc, TVM_ATTR_NAME_MAGIC, json_string),
GELOGI("Get original type of session_graph_id."));
if (json_string == "RT_DEV_BINARY_MAGIC_ELF_AICPU") {
binary.magic = RT_DEV_BINARY_MAGIC_ELF_AICPU;
} else if (json_string == "RT_DEV_BINARY_MAGIC_ELF") {
binary.magic = RT_DEV_BINARY_MAGIC_ELF;
} else if (json_string == "RT_DEV_BINARY_MAGIC_ELF_AIVEC") {
binary.magic = RT_DEV_BINARY_MAGIC_ELF_AIVEC;
} else {
GELOGE(PARAM_INVALID, "TBE: Invalid parameter magic number! json: %s", json_string.c_str());
return PARAM_INVALID;
}
binary.version = 0;
binary.data = tbe_kernel->GetBinData();
binary.length = tbe_kernel->GetBinDataSize();
GELOGI("TBE: binary.length: %lu", binary.length);
GE_CHK_RT_RET(rtRegisterAllKernel(&binary, &bin_handle));
handle_ = bin_handle;
kernel_store.StoreTBEHandle(stub_name_.c_str(), bin_handle, tbe_kernel);
} else {
GELOGI("TBE: find the binfile_key[%s] in HandleMap", stub_name_.c_str());
kernel_store.ReferTBEHandle(stub_name_.c_str());
}

return SUCCESS;
}
Status AiCoreOpTask::InitWithKernelDef(const OpDesc &op_desc, const domi::TaskDef &task_def) {
const domi::KernelDef &kernel_def = task_def.kernel();
const domi::KernelContext &context = kernel_def.context();
stub_name_ = kernel_def.stub_func();

GE_CHK_STATUS_RET(RegisterTbeHandle(op_desc));

GE_CHK_RT_RET(rtGetFunctionByName(stub_name_.c_str(), &stub_func_));
args_size_ = kernel_def.args_size();
block_dim_ = kernel_def.block_dim();

// malloc args memory
args_.reset(new(std::nothrow) uint8_t[args_size_]);
GE_CHECK_NOTNULL(args_);
errno_t err = memcpy_s(args_.get(), args_size_, kernel_def.args().data(), args_size_);

errno_t err = memcpy_s(args_.get(), args_size_, kernel_def.args().data(), args_size_);
if (err != EOK) {
GELOGE(INTERNAL_ERROR, "AiCoreTask memcpy args failed.");
return INTERNAL_ERROR;
@@ -157,23 +189,88 @@ Status AiCoreOpTask::InitWithTaskDef(const OpDesc &op_desc, const domi::TaskDef
block_dim_,
arg_base_,
args_size_);
return SUCCESS;
}

Status AiCoreOpTask::InitWithKernelDefWithHandle(const OpDesc &op_desc, const domi::TaskDef &task_def) {
const domi::KernelDefWithHandle &kernel_with_handle = task_def.kernel_with_handle();
const domi::KernelContext &context = kernel_with_handle.context();

GE_CHK_STATUS_RET(RegisterKernelHandle(op_desc));
original_kernel_key_ = kernel_with_handle.original_kernel_key();
node_info_ = kernel_with_handle.node_info();
args_size_ = kernel_with_handle.args_size();
block_dim_ = kernel_with_handle.block_dim();
// malloc args memory
args_.reset(new(std::nothrow) uint8_t[args_size_]);
GE_CHECK_NOTNULL(args_);
errno_t err = memcpy_s(args_.get(), args_size_, kernel_with_handle.args().data(), args_size_);

if (err != EOK) {
GELOGE(INTERNAL_ERROR, "AiCoreTask memcpy args failed.");
return INTERNAL_ERROR;
}

if (context.args_offset().size() < sizeof(uint16_t)) {
GELOGE(INTERNAL_ERROR, "Invalid args_offset, size = %zu.", context.args_offset().size());
return INTERNAL_ERROR;
}

const auto *args_offset_buffer = reinterpret_cast<const uint16_t *>(context.args_offset().data());
uint32_t offset = *args_offset_buffer;
if (offset > args_size_) {
GELOGE(INTERNAL_ERROR,
"[%s] Arg offset out of range. offset = %u, arg size = %u",
GetName().c_str(),
offset,
args_size_);
return INTERNAL_ERROR;
}

arg_base_ = reinterpret_cast<uintptr_t *>(args_.get() + offset);
max_arg_count_ = (args_size_ - offset) / sizeof(void *);
return SUCCESS;
}

Status AiCoreOpTask::InitWithTaskDef(const OpDesc &op_desc, const domi::TaskDef &task_def) {
GE_CHK_STATUS_RET(ValidateTaskDef(task_def),
"[%s] Failed to validate task def: [%s]",
op_desc.GetName().c_str(),
task_def.DebugString().c_str());

if (task_def.type() != RT_MODEL_TASK_ALL_KERNEL) {
GE_CHK_STATUS_RET(InitWithKernelDef(op_desc, task_def));
} else {
GE_CHK_STATUS_RET(InitWithKernelDefWithHandle(op_desc, task_def));
}
return SUCCESS;
}

Status AiCoreOpTask::ValidateTaskDef(const domi::TaskDef &task_def) {
auto task_type = static_cast<rtModelTaskType_t>(task_def.type());
if (task_type != RT_MODEL_TASK_KERNEL) {
if (task_type != RT_MODEL_TASK_KERNEL && task_type != RT_MODEL_TASK_ALL_KERNEL) {
GELOGE(INTERNAL_ERROR, "Invalid task type (%d) in AiCore CreateTask.", static_cast<int>(task_type));
return INTERNAL_ERROR;
}

const domi::KernelDef &kernel_def = task_def.kernel();
const domi::KernelContext &context = kernel_def.context();
auto kernel_type = static_cast<ccKernelType>(context.kernel_type());
if (kernel_type != ccKernelType::TE) {
GELOGE(INTERNAL_ERROR, "Invalid kernel type(%d) in AiCore TaskDef.", static_cast<int>(kernel_type));
return INTERNAL_ERROR;
if (task_type == RT_MODEL_TASK_KERNEL) {
const domi::KernelDef &kernel_def = task_def.kernel();
const domi::KernelContext &context = kernel_def.context();
auto kernel_type = static_cast<ccKernelType>(context.kernel_type());
if (kernel_type != ccKernelType::TE) {
GELOGE(INTERNAL_ERROR, "Invalid kernel type(%d) in AiCore TaskDef.", static_cast<int>(kernel_type));
return INTERNAL_ERROR;
}
}

if (task_type == RT_MODEL_TASK_ALL_KERNEL) {
const domi::KernelDefWithHandle &kernel_with_handle = task_def.kernel_with_handle();
const domi::KernelContext &context = kernel_with_handle.context();
auto kernel_type = static_cast<ccKernelType>(context.kernel_type());
if (kernel_type != ccKernelType::TE) {
GELOGE(INTERNAL_ERROR, "Invalid kernel type(%d) in AiCore TaskDef.", static_cast<int>(kernel_type));
return INTERNAL_ERROR;
}
}

return SUCCESS;
@@ -212,6 +309,8 @@ Status AiCoreOpTask::UpdateTilingInfo(TaskContext &context) {
clear_atomic_ = tiling_info.clear_atomic;

tiling_data_ = tiling_info.tiling_data.str();
tiling_key_ = tiling_info.tiling_key;
GELOGD("Successfully getting [tiling_key] : %u", tiling_key_);
if (tiling_data_.empty()) {
GELOGE(INTERNAL_ERROR, "[%s] Tiling data is empty.", stub_name_.c_str());
return INTERNAL_ERROR;
@@ -297,7 +396,13 @@ Status AiCoreOpTask::UpdateArgs(TaskContext &task_context) {

Status AiCoreOpTask::LaunchKernel(rtStream_t stream) {
GELOGD("AiCoreOpTask LaunchKernel Start (task = %s, block_dim = %u).", stub_name_.c_str(), block_dim_);
GE_CHK_RT_RET(rtKernelLaunch(stub_func_, block_dim_, args_.get(), args_size_, nullptr, stream));
if (handle_ != nullptr) {
GE_CHK_RT_RET(rtKernelLaunchWithHandle(handle_, (original_kernel_key_ + "_" + std::to_string(tiling_key_)).c_str(),
block_dim_, args_.get(), args_size_, nullptr, stream,
(node_info_ + "/" + std::to_string(tiling_key_)).c_str()));
} else {
GE_CHK_RT_RET(rtKernelLaunch(stub_func_, block_dim_, args_.get(), args_size_, nullptr, stream));
}
GELOGD("AiCoreOpTask LaunchKernel End (task = %s, block_dim = %u).", stub_name_.c_str(), block_dim_);
return SUCCESS;
}


+ 7
- 0
ge/hybrid/node_executor/aicore/aicore_op_task.h View File

@@ -67,6 +67,9 @@ class AiCoreOpTask {
Status InitWithTaskDef(const OpDesc &node, const domi::TaskDef &task_def);
Status InitTilingInfo(const OpDesc &op_desc);
Status RegisterTbeHandle(const OpDesc &op_desc);
Status RegisterKernelHandle(const OpDesc &op_desc);
Status InitWithKernelDef(const OpDesc &op_desc, const domi::TaskDef &task_def);
Status InitWithKernelDefWithHandle(const OpDesc &node, const domi::TaskDef &task_def);

std::string stub_name_;
void *stub_func_ = nullptr;
@@ -76,6 +79,10 @@ class AiCoreOpTask {
bool clear_atomic_ = true;
bool is_single_op_ = false;
std::vector<int> output_indices_to_skip_;
string original_kernel_key_;
string node_info_;
uint32_t tiling_key_ = 0;
void *handle_ = nullptr;
};

class AtomicAddrCleanOpTask : public AiCoreOpTask {


Loading…
Cancel
Save