| @@ -466,18 +466,18 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelHelper::LoadModel(c | |||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelHelper::LoadRootModel(const ge::ModelData &model_data) { | |||
| if (model_data.model_data == nullptr || model_data.model_len == 0) { | |||
| GELOGE(GE_EXEC_MODEL_DATA_SIZE_INVALID, "Model_data is nullptr, or model_data_size is 0"); | |||
| return GE_EXEC_MODEL_DATA_SIZE_INVALID; | |||
| GELOGE(ACL_ERROR_GE_EXEC_MODEL_DATA_SIZE_INVALID, "Model_data is nullptr, or model_data_size is 0"); | |||
| return ACL_ERROR_GE_EXEC_MODEL_DATA_SIZE_INVALID; | |||
| } | |||
| if (is_assign_model_) { | |||
| GELOGE(GE_EXEC_LOAD_MODEL_REPEATED, "Model helper has already loaded!"); | |||
| return GE_EXEC_LOAD_MODEL_REPEATED; | |||
| GELOGE(ACL_ERROR_GE_EXEC_LOAD_MODEL_REPEATED, "Model helper has already loaded!"); | |||
| return ACL_ERROR_GE_EXEC_LOAD_MODEL_REPEATED; | |||
| } | |||
| if (ReleaseLocalModelData() != SUCCESS) { | |||
| GELOGE(INTERNAL_ERROR, "ReleaseLocalModelData failed."); | |||
| return INTERNAL_ERROR; | |||
| GELOGE(ACL_ERROR_GE_EXEC_RELEASE_MODEL_DATA, "ReleaseLocalModelData failed."); | |||
| return ACL_ERROR_GE_EXEC_RELEASE_MODEL_DATA; | |||
| } | |||
| Status status = ge::DavinciModelParser::ParseModelContent(model_data, model_addr_tmp_, model_len_tmp_); | |||
| @@ -563,7 +563,7 @@ Status ModelHelper::GenerateGeRootModel(OmFileLoadHelper &om_load_helper) { | |||
| GeModelPtr cur_model = ge::MakeShared<ge::GeModel>(); | |||
| Status ret = LoadModelData(om_load_helper, cur_model, mode_index); | |||
| if (ret != SUCCESS) { | |||
| return GE_EXEC_LOAD_MODEL_PARTITION_FAILED; | |||
| return ACL_ERROR_GE_EXEC_LOAD_MODEL_PARTITION_FAILED; | |||
| } | |||
| if (is_first_model) { | |||
| @@ -576,22 +576,22 @@ Status ModelHelper::GenerateGeRootModel(OmFileLoadHelper &om_load_helper) { | |||
| ret = LoadWeights(om_load_helper, cur_model, mode_index); | |||
| if (ret != SUCCESS) { | |||
| return GE_EXEC_LOAD_WEIGHT_PARTITION_FAILED; | |||
| return ACL_ERROR_GE_EXEC_LOAD_WEIGHT_PARTITION_FAILED; | |||
| } | |||
| ret = LoadTBEKernelStore(om_load_helper, cur_model, mode_index); | |||
| if (ret != SUCCESS) { | |||
| return GE_EXEC_LOAD_KERNEL_PARTITION_FAILED; | |||
| return ACL_ERROR_GE_EXEC_LOAD_KERNEL_PARTITION_FAILED; | |||
| } | |||
| ret = LoadCustAICPUKernelStore(om_load_helper, cur_model, mode_index); | |||
| if (ret != SUCCESS) { | |||
| return GE_EXEC_LOAD_KERNEL_PARTITION_FAILED; | |||
| return ACL_ERROR_GE_EXEC_LOAD_KERNEL_PARTITION_FAILED; | |||
| } | |||
| ret = LoadTask(om_load_helper, cur_model, mode_index); | |||
| if (ret != SUCCESS) { | |||
| return GE_EXEC_LOAD_TASK_PARTITION_FAILED; | |||
| return ACL_ERROR_GE_EXEC_LOAD_TASK_PARTITION_FAILED; | |||
| } | |||
| root_model_->SetSubgraphInstanceNameToModel(cur_model->GetName(), cur_model); | |||
| } | |||
| @@ -207,9 +207,9 @@ Status OmFileLoadHelper::LoadModelPartitionTable(uint8_t *model_data, uint32_t m | |||
| "ModelFileHeader length :%zu, ModelPartitionTable length :%zu", | |||
| index, partition_table->num, sizeof(ModelFileHeader), partition_table_size); | |||
| if (model_data_size <= cur_offset) { | |||
| GELOGE(GE_EXEC_MODEL_DATA_SIZE_INVALID, "invalid model data, partition_table->num:%u, model data size %u", | |||
| GELOGE(ACL_ERROR_GE_EXEC_MODEL_DATA_SIZE_INVALID, "invalid model data, partition_table->num:%u, model data size %u", | |||
| partition_table->num, model_data_size); | |||
| return GE_EXEC_MODEL_DATA_SIZE_INVALID; | |||
| return ACL_ERROR_GE_EXEC_MODEL_DATA_SIZE_INVALID; | |||
| } | |||
| for (uint32_t i = 0; i < partition_table->num; i++) { | |||
| @@ -231,9 +231,9 @@ Status OmFileLoadHelper::LoadModelPartitionTable(uint8_t *model_data, uint32_t m | |||
| } | |||
| if (partition.size > model_data_size || cur_offset > model_data_size - partition.size) { | |||
| GELOGE(GE_EXEC_MODEL_DATA_SIZE_INVALID, "The partition size %zu is greater than the model data size %u.", | |||
| GELOGE(ACL_ERROR_GE_EXEC_MODEL_DATA_SIZE_INVALID, "The partition size %zu is greater than the model data size %u.", | |||
| partition.size + cur_offset, model_data_size); | |||
| return GE_EXEC_MODEL_DATA_SIZE_INVALID; | |||
| return ACL_ERROR_GE_EXEC_MODEL_DATA_SIZE_INVALID; | |||
| } | |||
| cur_offset += partition.size; | |||
| GELOGD("Partition, type:%d, size:%u, model_index:%zu", static_cast<int>(partition.type), partition.size, index); | |||
| @@ -34,7 +34,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelParserBase::LoadFro | |||
| ge::ModelData &model_data) { | |||
| std::string real_path = RealPath(model_path); | |||
| if (real_path.empty()) { | |||
| GELOGE(GE_EXEC_MODEL_PATH_INVALID, "Model file path '%s' is invalid", model_path); | |||
| GELOGE(ACL_ERROR_GE_EXEC_MODEL_PATH_INVALID, "Model file path '%s' is invalid", model_path); | |||
| return ACL_ERROR_GE_EXEC_MODEL_PATH_INVALID; | |||
| } | |||
| @@ -1743,7 +1743,7 @@ Status DavinciModel::GetAIPPInfo(uint32_t index, AippConfigInfo &aipp_info) { | |||
| GE_CHECK_NOTNULL(aipp_params); | |||
| ge::GeAttrValue::NAMED_ATTRS aipp_attr; | |||
| GE_CHK_BOOL_RET_STATUS(AttrUtils::GetNamedAttrs(data_op, ATTR_NAME_AIPP, aipp_attr), GE_AIPP_NOT_EXIST, | |||
| GE_CHK_BOOL_RET_STATUS(AttrUtils::GetNamedAttrs(data_op, ATTR_NAME_AIPP, aipp_attr), ACL_ERROR_GE_AIPP_NOT_EXIST, | |||
| "Data node do not contain param aipp!"); | |||
| GE_CHK_STATUS_RET(OpUtils::ConvertAippParams(aipp_attr, aipp_params.get()), "get aipp params failed"); | |||
| GELOGI("GetAIPPInfo: node data: %s, type: %s, current index: %u, current node related input rank: %u", | |||
| @@ -2838,19 +2838,19 @@ Status DavinciModel::CreateKnownZeroCopyMap(const vector<void *> &inputs, const | |||
| return SUCCESS; | |||
| } | |||
| Status DavinciModel::UpdateKnownZeroCopyAddr() { | |||
| for (size_t i = 0; i < total_io_addrs_.size(); ++i) { | |||
| auto it_in = knonw_input_data_info_.find(total_io_addrs_[i]); | |||
| Status DavinciModel::UpdateKnownZeroCopyAddr(vector<void *> &total_io_addrs) { | |||
| for (size_t i = 0; i < total_io_addrs.size(); ++i) { | |||
| auto it_in = knonw_input_data_info_.find(total_io_addrs[i]); | |||
| if (it_in != knonw_input_data_info_.end()) { | |||
| GELOGI("DavinciModel::UpdateKnownZeroCopyAddr input %zu,v addr %p,p addr %p .", i, total_io_addrs_[i], | |||
| knonw_input_data_info_.at(total_io_addrs_[i])); | |||
| total_io_addrs_[i] = knonw_input_data_info_.at(total_io_addrs_[i]); | |||
| GELOGI("DavinciModel::UpdateKnownZeroCopyAddr input %zu,v addr %p,p addr %p .", i, total_io_addrs[i], | |||
| knonw_input_data_info_.at(total_io_addrs[i])); | |||
| total_io_addrs[i] = knonw_input_data_info_.at(total_io_addrs[i]); | |||
| } | |||
| auto it_out = knonw_output_data_info_.find(total_io_addrs_[i]); | |||
| auto it_out = knonw_output_data_info_.find(total_io_addrs[i]); | |||
| if (it_out != knonw_output_data_info_.end()) { | |||
| GELOGI("DavinciModel::UpdateKnownZeroCopyAddr output %zu,v addr %p,p addr %p .", i, total_io_addrs_[i], | |||
| knonw_output_data_info_.at(total_io_addrs_[i])); | |||
| total_io_addrs_[i] = knonw_output_data_info_.at(total_io_addrs_[i]); | |||
| GELOGI("DavinciModel::UpdateKnownZeroCopyAddr output %zu,v addr %p,p addr %p .", i, total_io_addrs[i], | |||
| knonw_output_data_info_.at(total_io_addrs[i])); | |||
| total_io_addrs[i] = knonw_output_data_info_.at(total_io_addrs[i]); | |||
| } | |||
| } | |||
| GELOGI("DavinciModel::UpdateKnownZeroCopyAddr success."); | |||
| @@ -2879,7 +2879,7 @@ Status DavinciModel::UpdateKnownNodeArgs(const vector<void *> &inputs, const vec | |||
| } else { | |||
| total_io_addrs_ = orig_total_io_addrs_; | |||
| } | |||
| GE_CHK_STATUS_RET(UpdateKnownZeroCopyAddr(), "DavinciModel::UpdateKnownZeroCopyAddr failed."); | |||
| GE_CHK_STATUS_RET(UpdateKnownZeroCopyAddr(total_io_addrs_), "DavinciModel::UpdateKnownZeroCopyAddr failed."); | |||
| if (total_args_size_ == 0) { | |||
| GELOGW("DavinciModel::UpdateKnownNodeArgs device args %p, dst size %u, pass rtMemcpy.", args_, total_args_size_); | |||
| @@ -2946,7 +2946,14 @@ Status DavinciModel::MallocKnownArgs() { | |||
| GELOGE(RT_FAILED, "Call rtMalloc failed, ret: 0x%X", rt_ret); | |||
| return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
| } | |||
| // malloc dynamic and static hybrid memory | |||
| if (total_hybrid_args_size_ != 0) { | |||
| rt_ret = rtMalloc(&hybrid_addrs_, total_hybrid_args_size_, RT_MEMORY_HBM); | |||
| if (rt_ret != RT_ERROR_NONE) { | |||
| GELOGE(RT_FAILED, "Call rtMalloc failed, ret: 0x%X", rt_ret); | |||
| return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
| } | |||
| } | |||
| // malloc fixed addr memory, eg: rts op | |||
| if (total_fixed_addr_size_ != 0) { | |||
| GELOGI("Begin to allocate fixed addr."); | |||
| @@ -490,6 +490,14 @@ class DavinciModel { | |||
| void SetTotalIOAddrs(vector<void *> &io_addrs) { | |||
| total_io_addrs_.insert(total_io_addrs_.end(), io_addrs.begin(), io_addrs.end()); | |||
| } | |||
| void SetHybridArgsSize(uint32_t args_size) { total_hybrid_args_size_ += args_size; } | |||
| uint32_t GetHybridArgsSize() { | |||
| return total_hybrid_args_size_; | |||
| } | |||
| void *GetCurrentHybridArgsAddr(uint32_t offset) { | |||
| void *cur_args = static_cast<char *>(hybrid_addrs_) + offset; | |||
| return cur_args; | |||
| } | |||
| void SetTotalFixedAddrsSize(string tensor_name, int64_t fix_addr_size); | |||
| int64_t GetFixedAddrsSize(string tensor_name); | |||
| void *GetCurrentFixedAddr(int64_t offset) const { | |||
| @@ -508,7 +516,7 @@ class DavinciModel { | |||
| Status MallocKnownArgs(); | |||
| Status UpdateKnownNodeArgs(const vector<void *> &inputs, const vector<void *> &outputs); | |||
| Status CreateKnownZeroCopyMap(const vector<void *> &inputs, const vector<void *> &outputs); | |||
| Status UpdateKnownZeroCopyAddr(); | |||
| Status UpdateKnownZeroCopyAddr(vector<void *> &total_io_addrs); | |||
| void SetKnownNodeAddrNotChanged(bool base_addr_not_changed) { base_addr_not_changed_ = base_addr_not_changed; } | |||
| Status GetOrigInputInfo(uint32_t index, OriginInputInfo &orig_input_info); | |||
| @@ -946,6 +954,8 @@ class DavinciModel { | |||
| void *args_ = nullptr; | |||
| void *args_host_ = nullptr; | |||
| void *fixed_addrs_ = nullptr; | |||
| void *hybrid_addrs_ = nullptr; | |||
| uint32_t total_hybrid_args_size_ = 0; | |||
| int64_t total_fixed_addr_size_ = 0; | |||
| std::map<const void *, void *> knonw_input_data_info_; | |||
| std::map<const void *, void *> knonw_output_data_info_; | |||
| @@ -1232,7 +1232,7 @@ Status ModelManager::ExecuteModel(uint32_t model_id, rtStream_t stream, bool asy | |||
| std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id); | |||
| GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, ACL_ERROR_GE_EXEC_MODEL_ID_INVALID, | |||
| "Invalid model id %u, check weather model has been loaded or not.", model_id); | |||
| "Invalid model id %u, check whether model has been loaded or not.", model_id); | |||
| if (davinci_model->NeedDestroyAicpuKernel()) { | |||
| GELOGI("Start to destroy specified aicpu kernel."); | |||
| @@ -372,7 +372,11 @@ Status KernelTaskInfo::SuperKernelDistribute() { | |||
| Status KernelTaskInfo::Distribute() { | |||
| GELOGD("KernelTaskInfo Distribute Start."); | |||
| if (davinci_model_->IsKnownNode()) { | |||
| args_ = davinci_model_->GetCurrentArgsAddr(args_offset_); | |||
| if (kernel_type_ == ccKernelType::TE) { | |||
| args_ = davinci_model_->GetCurrentArgsAddr(args_offset_); | |||
| } else if (kernel_type_ == ccKernelType::AI_CPU || kernel_type_ == ccKernelType::CUST_AI_CPU) { | |||
| args_ = davinci_model_->GetCurrentHybridArgsAddr(hybrid_args_offset_); | |||
| } | |||
| GELOGI("Known node %s args addr %p, offset %u.", op_desc_->GetName().c_str(), args_, args_offset_); | |||
| } | |||
| rtError_t rt_ret = RT_ERROR_NONE; | |||
| @@ -428,36 +432,31 @@ Status KernelTaskInfo::UpdateArgs() { | |||
| const RuntimeParam &rts_param = davinci_model_->GetRuntimeParam(); | |||
| vector<void *> input_data_addrs = ModelUtils::GetInputDataAddrs(rts_param, op_desc_); | |||
| vector<void *> output_data_addrs = ModelUtils::GetOutputDataAddrs(rts_param, op_desc_); | |||
| vector<void *> workspace_data_addrs = ModelUtils::GetWorkspaceDataAddrs(rts_param, op_desc_); | |||
| vector<void *> io_addrs; | |||
| if (!op_desc_->HasAttr(ATTR_DYNAMIC_SHAPE_FIXED_ADDR)) { | |||
| io_addrs.insert(io_addrs.end(), input_data_addrs.begin(), input_data_addrs.end()); | |||
| io_addrs.insert(io_addrs.end(), output_data_addrs.begin(), output_data_addrs.end()); | |||
| io_addrs.insert(io_addrs.end(), input_data_addrs.begin(), input_data_addrs.end()); | |||
| io_addrs.insert(io_addrs.end(), output_data_addrs.begin(), output_data_addrs.end()); | |||
| if (kernel_type_ == ccKernelType::TE) { | |||
| vector<void *> workspace_data_addrs = ModelUtils::GetWorkspaceDataAddrs(rts_param, op_desc_); | |||
| io_addrs.insert(io_addrs.end(), workspace_data_addrs.begin(), workspace_data_addrs.end()); | |||
| } else { | |||
| string peer_input_name; | |||
| if (AttrUtils::GetStr(op_desc_, ATTR_DYNAMIC_SHAPE_FIXED_ADDR, peer_input_name)) { | |||
| uint32_t output_index = davinci_model_->GetFixedAddrOutputIndex(peer_input_name); | |||
| if (output_index > output_data_addrs.size()) { | |||
| GELOGE(FAILED, "The output data addr size[%zu] and output index[%u] are inconsistent.", | |||
| output_data_addrs.size(), output_index); | |||
| return FAILED; | |||
| } | |||
| io_addrs.insert(io_addrs.end(), input_data_addrs.begin(), input_data_addrs.end()); | |||
| for (size_t i = 0; i < output_data_addrs.size(); ++i) { | |||
| if (i == output_index) { | |||
| void *fixed_addr = davinci_model_->GetCurrentFixedAddr(fixed_addr_offset_); | |||
| io_addrs.emplace_back(fixed_addr); | |||
| continue; | |||
| } | |||
| io_addrs.emplace_back(output_data_addrs[i]); | |||
| } | |||
| io_addrs.insert(io_addrs.end(), workspace_data_addrs.begin(), workspace_data_addrs.end()); | |||
| davinci_model_->SetTotalIOAddrs(io_addrs); | |||
| } else if (kernel_type_ == ccKernelType::AI_CPU || kernel_type_ == ccKernelType::CUST_AI_CPU) { | |||
| davinci_model_->UpdateKnownZeroCopyAddr(io_addrs); | |||
| uintptr_t io_addr = reinterpret_cast<uintptr_t>(args_addr.get()) + sizeof(aicpu::AicpuParamHead); | |||
| auto addrs_size = sizeof(uint64_t) * io_addrs.size(); | |||
| errno_t sec_ret = memcpy_s(reinterpret_cast<void *>(io_addr), addrs_size, io_addrs.data(), addrs_size); | |||
| if (sec_ret != EOK) { | |||
| GELOGE(FAILED, "memcpy failed, ret: %d", sec_ret); | |||
| return FAILED; | |||
| } | |||
| // copy args to device | |||
| rtError_t rt_ret = rtMemcpy(args_, args_size_, args_addr.get(), args_size_, RT_MEMCPY_HOST_TO_DEVICE); | |||
| if (rt_ret != RT_ERROR_NONE) { | |||
| GELOGE(RT_FAILED, "Call rt api(rtMemcpy) failed, ret: 0x%X", rt_ret); | |||
| return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
| } | |||
| } | |||
| davinci_model_->SetTotalIOAddrs(io_addrs); | |||
| GELOGI("KernelTaskInfo::UpdateArgs success."); | |||
| return SUCCESS; | |||
| } | |||
| @@ -533,33 +532,18 @@ Status KernelTaskInfo::UpdateL2Data(const domi::KernelDef &kernel_def) { | |||
| } | |||
| Status KernelTaskInfo::CalculateArgs(const domi::TaskDef &task_def, DavinciModel *davinci_model) { | |||
| domi::KernelDef kernel_def = task_def.kernel(); | |||
| uint32_t args_size = kernel_def.args_size(); | |||
| args_offset_ = davinci_model->GetTotalArgsSize(); | |||
| davinci_model->SetTotalArgsSize(args_size); | |||
| GELOGI("kernel task name , args_size %u, args_offset %u", args_size, args_offset_); | |||
| // get opcontext stored in model | |||
| const domi::KernelDef &kernel_def = task_def.kernel(); | |||
| const domi::KernelContext &context = kernel_def.context(); | |||
| // get opdesc | |||
| op_desc_ = davinci_model->GetOpByIndex(context.op_index()); | |||
| GE_CHECK_NOTNULL(op_desc_); | |||
| // alloc fixed addr | |||
| string peer_input_name; | |||
| if (AttrUtils::GetStr(op_desc_, ATTR_DYNAMIC_SHAPE_FIXED_ADDR, peer_input_name) && !peer_input_name.empty()) { | |||
| uint32_t output_index = davinci_model->GetFixedAddrOutputIndex(peer_input_name); | |||
| if (output_index > op_desc_->GetOutputsSize()) { | |||
| GELOGE(FAILED, "The output size[%zu] and output index[%u] are inconsistent.", op_desc_->GetOutputsSize(), | |||
| output_index); | |||
| return FAILED; | |||
| } | |||
| fixed_addr_offset_ = davinci_model->GetFixedAddrsSize(peer_input_name); | |||
| auto tensor_desc = op_desc_->GetOutputDesc(output_index); | |||
| int64_t tensor_size = 0; | |||
| GE_CHK_STATUS(TensorUtils::GetSize(tensor_desc, tensor_size)); | |||
| davinci_model->SetTotalFixedAddrsSize(peer_input_name, tensor_size); | |||
| GELOGI("Calculate stream switch task args , tensor size is %ld, fixed addr offset %ld", tensor_size, | |||
| fixed_addr_offset_); | |||
| kernel_type_ = static_cast<ccKernelType>(context.kernel_type()); | |||
| if (kernel_type_ == ccKernelType::TE) { | |||
| uint32_t args_size = kernel_def.args_size(); | |||
| args_offset_ = davinci_model->GetTotalArgsSize(); | |||
| davinci_model->SetTotalArgsSize(args_size); | |||
| GELOGI("kernel task name , args_size %u, args_offset %u", args_size, args_offset_); | |||
| } else if (kernel_type_ == ccKernelType::AI_CPU || kernel_type_ == ccKernelType::CUST_AI_CPU) { | |||
| hybrid_args_offset_ = davinci_model->GetHybridArgsSize(); | |||
| davinci_model->SetHybridArgsSize(kernel_def.args_size()); | |||
| GELOGI("aicpu kernel task name , args_size %u, args_offset %u", kernel_def.args_size(), hybrid_args_offset_); | |||
| } | |||
| return SUCCESS; | |||
| } | |||
| @@ -882,7 +866,7 @@ Status KernelTaskInfo::InitAicpuTask(uint32_t op_index, const domi::KernelDef &k | |||
| } | |||
| // copy args to new host memory | |||
| std::unique_ptr<uint8_t[]> args_addr(new (std::nothrow) uint8_t[args_size_]); | |||
| args_addr = std::unique_ptr<uint8_t[]>(new (std::nothrow) uint8_t[args_size_]); | |||
| GE_PRINT_DYNAMIC_MEMORY(new, "cce task physical memory.", sizeof(uint8_t) * args_size_) | |||
| errno_t sec_ret = memcpy_s(args_addr.get(), args_size_, kernel_def.args().data(), args_size_); | |||
| if (sec_ret != EOK) { | |||
| @@ -890,8 +874,23 @@ Status KernelTaskInfo::InitAicpuTask(uint32_t op_index, const domi::KernelDef &k | |||
| return FAILED; | |||
| } | |||
| const RuntimeParam &rts_param = davinci_model_->GetRuntimeParam(); | |||
| auto aicpu_param_head = reinterpret_cast<aicpu::AicpuParamHead *>(args_addr.get()); | |||
| const auto &ext_info = kernel_def.kernel_ext_info(); | |||
| auto init_ret = InitAicpuTaskExtInfo(ext_info); | |||
| if (init_ret != SUCCESS) { | |||
| GELOGE(init_ret, "Init aicpu task ext info failed, ext_info size=%zu", ext_info.size()); | |||
| return init_ret; | |||
| } | |||
| GELOGI("Node[%s] type[%s] kernel_ext_info size=%zu, aicpu_ext_info_addr_=%p", op_desc_->GetName().c_str(), | |||
| op_desc_->GetType().c_str(), ext_info.size(), aicpu_ext_info_addr_); | |||
| aicpu_param_head->extInfoAddr = reinterpret_cast<uintptr_t>(aicpu_ext_info_addr_); | |||
| aicpu_param_head->extInfoLength = static_cast<uintptr_t>(ext_info.size()); | |||
| if (davinci_model_->IsKnownNode()) { | |||
| return SUCCESS; | |||
| } | |||
| const RuntimeParam &rts_param = davinci_model_->GetRuntimeParam(); | |||
| vector<void *> input_addrs = ModelUtils::GetInputDataAddrs(rts_param, op_desc); | |||
| vector<void *> output_addrs = ModelUtils::GetOutputDataAddrs(rts_param, op_desc); | |||
| vector<void *> io_addrs; | |||
| @@ -908,19 +907,6 @@ Status KernelTaskInfo::InitAicpuTask(uint32_t op_index, const domi::KernelDef &k | |||
| } | |||
| } | |||
| auto aicpu_param_head = reinterpret_cast<aicpu::AicpuParamHead *>(args_addr.get()); | |||
| const auto &ext_info = kernel_def.kernel_ext_info(); | |||
| auto init_ret = InitAicpuTaskExtInfo(ext_info); | |||
| if (init_ret != SUCCESS) { | |||
| GELOGE(init_ret, "Init aicpu task ext info failed, ext_info size=%zu", ext_info.size()); | |||
| return init_ret; | |||
| } | |||
| GELOGI("Node[%s] type[%s] kernel_ext_info size=%zu, aicpu_ext_info_addr_=%p", op_desc_->GetName().c_str(), | |||
| op_desc_->GetType().c_str(), ext_info.size(), aicpu_ext_info_addr_); | |||
| aicpu_param_head->extInfoAddr = reinterpret_cast<uintptr_t>(aicpu_ext_info_addr_); | |||
| aicpu_param_head->extInfoLength = static_cast<uintptr_t>(ext_info.size()); | |||
| // malloc device memory for args | |||
| rtError_t rt_ret = rtMalloc(static_cast<void **>(&args_), args_size_, RT_MEMORY_HBM); | |||
| if (rt_ret != RT_ERROR_NONE) { | |||
| @@ -161,7 +161,9 @@ class KernelTaskInfo : public TaskInfo { | |||
| OpDescPtr op_desc_; | |||
| DavinciModel *davinci_model_; | |||
| uint32_t args_offset_ = 0; | |||
| uint32_t hybrid_args_offset_ = 0; | |||
| int64_t fixed_addr_offset_ = 0; | |||
| std::unique_ptr<uint8_t[]> args_addr = nullptr; | |||
| bool call_save_dump_ = false; | |||
| // aicpu ext_info device mem | |||
| @@ -540,7 +540,7 @@ Status InsertNewOpUtil::GetDataRelatedNode(NodePtr &node, std::map<NodePtr, std: | |||
| std::unique_ptr<domi::AippOpParams> aipp_params(new (std::nothrow) domi::AippOpParams()); | |||
| ge::GeAttrValue::NAMED_ATTRS aipp_attr; | |||
| GE_CHK_BOOL_RET_STATUS(AttrUtils::GetNamedAttrs(data_op, ATTR_NAME_AIPP, aipp_attr), GE_AIPP_NOT_EXIST, | |||
| GE_CHK_BOOL_RET_STATUS(AttrUtils::GetNamedAttrs(data_op, ATTR_NAME_AIPP, aipp_attr), ACL_ERROR_GE_AIPP_NOT_EXIST, | |||
| "Data node do not contain param aipp!"); | |||
| GE_CHK_STATUS_RET(OpUtils::ConvertAippParams(aipp_attr, aipp_params.get()), "get aipp params failed"); | |||