| @@ -466,18 +466,18 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelHelper::LoadModel(c | |||||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelHelper::LoadRootModel(const ge::ModelData &model_data) { | FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelHelper::LoadRootModel(const ge::ModelData &model_data) { | ||||
| if (model_data.model_data == nullptr || model_data.model_len == 0) { | if (model_data.model_data == nullptr || model_data.model_len == 0) { | ||||
| GELOGE(GE_EXEC_MODEL_DATA_SIZE_INVALID, "Model_data is nullptr, or model_data_size is 0"); | |||||
| return GE_EXEC_MODEL_DATA_SIZE_INVALID; | |||||
| GELOGE(ACL_ERROR_GE_EXEC_MODEL_DATA_SIZE_INVALID, "Model_data is nullptr, or model_data_size is 0"); | |||||
| return ACL_ERROR_GE_EXEC_MODEL_DATA_SIZE_INVALID; | |||||
| } | } | ||||
| if (is_assign_model_) { | if (is_assign_model_) { | ||||
| GELOGE(GE_EXEC_LOAD_MODEL_REPEATED, "Model helper has already loaded!"); | |||||
| return GE_EXEC_LOAD_MODEL_REPEATED; | |||||
| GELOGE(ACL_ERROR_GE_EXEC_LOAD_MODEL_REPEATED, "Model helper has already loaded!"); | |||||
| return ACL_ERROR_GE_EXEC_LOAD_MODEL_REPEATED; | |||||
| } | } | ||||
| if (ReleaseLocalModelData() != SUCCESS) { | if (ReleaseLocalModelData() != SUCCESS) { | ||||
| GELOGE(INTERNAL_ERROR, "ReleaseLocalModelData failed."); | |||||
| return INTERNAL_ERROR; | |||||
| GELOGE(ACL_ERROR_GE_EXEC_RELEASE_MODEL_DATA, "ReleaseLocalModelData failed."); | |||||
| return ACL_ERROR_GE_EXEC_RELEASE_MODEL_DATA; | |||||
| } | } | ||||
| Status status = ge::DavinciModelParser::ParseModelContent(model_data, model_addr_tmp_, model_len_tmp_); | Status status = ge::DavinciModelParser::ParseModelContent(model_data, model_addr_tmp_, model_len_tmp_); | ||||
| @@ -563,7 +563,7 @@ Status ModelHelper::GenerateGeRootModel(OmFileLoadHelper &om_load_helper) { | |||||
| GeModelPtr cur_model = ge::MakeShared<ge::GeModel>(); | GeModelPtr cur_model = ge::MakeShared<ge::GeModel>(); | ||||
| Status ret = LoadModelData(om_load_helper, cur_model, mode_index); | Status ret = LoadModelData(om_load_helper, cur_model, mode_index); | ||||
| if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
| return GE_EXEC_LOAD_MODEL_PARTITION_FAILED; | |||||
| return ACL_ERROR_GE_EXEC_LOAD_MODEL_PARTITION_FAILED; | |||||
| } | } | ||||
| if (is_first_model) { | if (is_first_model) { | ||||
| @@ -576,22 +576,22 @@ Status ModelHelper::GenerateGeRootModel(OmFileLoadHelper &om_load_helper) { | |||||
| ret = LoadWeights(om_load_helper, cur_model, mode_index); | ret = LoadWeights(om_load_helper, cur_model, mode_index); | ||||
| if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
| return GE_EXEC_LOAD_WEIGHT_PARTITION_FAILED; | |||||
| return ACL_ERROR_GE_EXEC_LOAD_WEIGHT_PARTITION_FAILED; | |||||
| } | } | ||||
| ret = LoadTBEKernelStore(om_load_helper, cur_model, mode_index); | ret = LoadTBEKernelStore(om_load_helper, cur_model, mode_index); | ||||
| if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
| return GE_EXEC_LOAD_KERNEL_PARTITION_FAILED; | |||||
| return ACL_ERROR_GE_EXEC_LOAD_KERNEL_PARTITION_FAILED; | |||||
| } | } | ||||
| ret = LoadCustAICPUKernelStore(om_load_helper, cur_model, mode_index); | ret = LoadCustAICPUKernelStore(om_load_helper, cur_model, mode_index); | ||||
| if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
| return GE_EXEC_LOAD_KERNEL_PARTITION_FAILED; | |||||
| return ACL_ERROR_GE_EXEC_LOAD_KERNEL_PARTITION_FAILED; | |||||
| } | } | ||||
| ret = LoadTask(om_load_helper, cur_model, mode_index); | ret = LoadTask(om_load_helper, cur_model, mode_index); | ||||
| if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
| return GE_EXEC_LOAD_TASK_PARTITION_FAILED; | |||||
| return ACL_ERROR_GE_EXEC_LOAD_TASK_PARTITION_FAILED; | |||||
| } | } | ||||
| root_model_->SetSubgraphInstanceNameToModel(cur_model->GetName(), cur_model); | root_model_->SetSubgraphInstanceNameToModel(cur_model->GetName(), cur_model); | ||||
| } | } | ||||
| @@ -207,9 +207,9 @@ Status OmFileLoadHelper::LoadModelPartitionTable(uint8_t *model_data, uint32_t m | |||||
| "ModelFileHeader length :%zu, ModelPartitionTable length :%zu", | "ModelFileHeader length :%zu, ModelPartitionTable length :%zu", | ||||
| index, partition_table->num, sizeof(ModelFileHeader), partition_table_size); | index, partition_table->num, sizeof(ModelFileHeader), partition_table_size); | ||||
| if (model_data_size <= cur_offset) { | if (model_data_size <= cur_offset) { | ||||
| GELOGE(GE_EXEC_MODEL_DATA_SIZE_INVALID, "invalid model data, partition_table->num:%u, model data size %u", | |||||
| GELOGE(ACL_ERROR_GE_EXEC_MODEL_DATA_SIZE_INVALID, "invalid model data, partition_table->num:%u, model data size %u", | |||||
| partition_table->num, model_data_size); | partition_table->num, model_data_size); | ||||
| return GE_EXEC_MODEL_DATA_SIZE_INVALID; | |||||
| return ACL_ERROR_GE_EXEC_MODEL_DATA_SIZE_INVALID; | |||||
| } | } | ||||
| for (uint32_t i = 0; i < partition_table->num; i++) { | for (uint32_t i = 0; i < partition_table->num; i++) { | ||||
| @@ -231,9 +231,9 @@ Status OmFileLoadHelper::LoadModelPartitionTable(uint8_t *model_data, uint32_t m | |||||
| } | } | ||||
| if (partition.size > model_data_size || cur_offset > model_data_size - partition.size) { | if (partition.size > model_data_size || cur_offset > model_data_size - partition.size) { | ||||
| GELOGE(GE_EXEC_MODEL_DATA_SIZE_INVALID, "The partition size %zu is greater than the model data size %u.", | |||||
| GELOGE(ACL_ERROR_GE_EXEC_MODEL_DATA_SIZE_INVALID, "The partition size %zu is greater than the model data size %u.", | |||||
| partition.size + cur_offset, model_data_size); | partition.size + cur_offset, model_data_size); | ||||
| return GE_EXEC_MODEL_DATA_SIZE_INVALID; | |||||
| return ACL_ERROR_GE_EXEC_MODEL_DATA_SIZE_INVALID; | |||||
| } | } | ||||
| cur_offset += partition.size; | cur_offset += partition.size; | ||||
| GELOGD("Partition, type:%d, size:%u, model_index:%zu", static_cast<int>(partition.type), partition.size, index); | GELOGD("Partition, type:%d, size:%u, model_index:%zu", static_cast<int>(partition.type), partition.size, index); | ||||
| @@ -34,7 +34,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelParserBase::LoadFro | |||||
| ge::ModelData &model_data) { | ge::ModelData &model_data) { | ||||
| std::string real_path = RealPath(model_path); | std::string real_path = RealPath(model_path); | ||||
| if (real_path.empty()) { | if (real_path.empty()) { | ||||
| GELOGE(GE_EXEC_MODEL_PATH_INVALID, "Model file path '%s' is invalid", model_path); | |||||
| GELOGE(ACL_ERROR_GE_EXEC_MODEL_PATH_INVALID, "Model file path '%s' is invalid", model_path); | |||||
| return ACL_ERROR_GE_EXEC_MODEL_PATH_INVALID; | return ACL_ERROR_GE_EXEC_MODEL_PATH_INVALID; | ||||
| } | } | ||||
| @@ -1743,7 +1743,7 @@ Status DavinciModel::GetAIPPInfo(uint32_t index, AippConfigInfo &aipp_info) { | |||||
| GE_CHECK_NOTNULL(aipp_params); | GE_CHECK_NOTNULL(aipp_params); | ||||
| ge::GeAttrValue::NAMED_ATTRS aipp_attr; | ge::GeAttrValue::NAMED_ATTRS aipp_attr; | ||||
| GE_CHK_BOOL_RET_STATUS(AttrUtils::GetNamedAttrs(data_op, ATTR_NAME_AIPP, aipp_attr), GE_AIPP_NOT_EXIST, | |||||
| GE_CHK_BOOL_RET_STATUS(AttrUtils::GetNamedAttrs(data_op, ATTR_NAME_AIPP, aipp_attr), ACL_ERROR_GE_AIPP_NOT_EXIST, | |||||
| "Data node do not contain param aipp!"); | "Data node do not contain param aipp!"); | ||||
| GE_CHK_STATUS_RET(OpUtils::ConvertAippParams(aipp_attr, aipp_params.get()), "get aipp params failed"); | GE_CHK_STATUS_RET(OpUtils::ConvertAippParams(aipp_attr, aipp_params.get()), "get aipp params failed"); | ||||
| GELOGI("GetAIPPInfo: node data: %s, type: %s, current index: %u, current node related input rank: %u", | GELOGI("GetAIPPInfo: node data: %s, type: %s, current index: %u, current node related input rank: %u", | ||||
| @@ -2838,19 +2838,19 @@ Status DavinciModel::CreateKnownZeroCopyMap(const vector<void *> &inputs, const | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| Status DavinciModel::UpdateKnownZeroCopyAddr() { | |||||
| for (size_t i = 0; i < total_io_addrs_.size(); ++i) { | |||||
| auto it_in = knonw_input_data_info_.find(total_io_addrs_[i]); | |||||
| Status DavinciModel::UpdateKnownZeroCopyAddr(vector<void *> &total_io_addrs) { | |||||
| for (size_t i = 0; i < total_io_addrs.size(); ++i) { | |||||
| auto it_in = knonw_input_data_info_.find(total_io_addrs[i]); | |||||
| if (it_in != knonw_input_data_info_.end()) { | if (it_in != knonw_input_data_info_.end()) { | ||||
| GELOGI("DavinciModel::UpdateKnownZeroCopyAddr input %zu,v addr %p,p addr %p .", i, total_io_addrs_[i], | |||||
| knonw_input_data_info_.at(total_io_addrs_[i])); | |||||
| total_io_addrs_[i] = knonw_input_data_info_.at(total_io_addrs_[i]); | |||||
| GELOGI("DavinciModel::UpdateKnownZeroCopyAddr input %zu,v addr %p,p addr %p .", i, total_io_addrs[i], | |||||
| knonw_input_data_info_.at(total_io_addrs[i])); | |||||
| total_io_addrs[i] = knonw_input_data_info_.at(total_io_addrs[i]); | |||||
| } | } | ||||
| auto it_out = knonw_output_data_info_.find(total_io_addrs_[i]); | |||||
| auto it_out = knonw_output_data_info_.find(total_io_addrs[i]); | |||||
| if (it_out != knonw_output_data_info_.end()) { | if (it_out != knonw_output_data_info_.end()) { | ||||
| GELOGI("DavinciModel::UpdateKnownZeroCopyAddr output %zu,v addr %p,p addr %p .", i, total_io_addrs_[i], | |||||
| knonw_output_data_info_.at(total_io_addrs_[i])); | |||||
| total_io_addrs_[i] = knonw_output_data_info_.at(total_io_addrs_[i]); | |||||
| GELOGI("DavinciModel::UpdateKnownZeroCopyAddr output %zu,v addr %p,p addr %p .", i, total_io_addrs[i], | |||||
| knonw_output_data_info_.at(total_io_addrs[i])); | |||||
| total_io_addrs[i] = knonw_output_data_info_.at(total_io_addrs[i]); | |||||
| } | } | ||||
| } | } | ||||
| GELOGI("DavinciModel::UpdateKnownZeroCopyAddr success."); | GELOGI("DavinciModel::UpdateKnownZeroCopyAddr success."); | ||||
| @@ -2879,7 +2879,7 @@ Status DavinciModel::UpdateKnownNodeArgs(const vector<void *> &inputs, const vec | |||||
| } else { | } else { | ||||
| total_io_addrs_ = orig_total_io_addrs_; | total_io_addrs_ = orig_total_io_addrs_; | ||||
| } | } | ||||
| GE_CHK_STATUS_RET(UpdateKnownZeroCopyAddr(), "DavinciModel::UpdateKnownZeroCopyAddr failed."); | |||||
| GE_CHK_STATUS_RET(UpdateKnownZeroCopyAddr(total_io_addrs_), "DavinciModel::UpdateKnownZeroCopyAddr failed."); | |||||
| if (total_args_size_ == 0) { | if (total_args_size_ == 0) { | ||||
| GELOGW("DavinciModel::UpdateKnownNodeArgs device args %p, dst size %u, pass rtMemcpy.", args_, total_args_size_); | GELOGW("DavinciModel::UpdateKnownNodeArgs device args %p, dst size %u, pass rtMemcpy.", args_, total_args_size_); | ||||
| @@ -2946,7 +2946,14 @@ Status DavinciModel::MallocKnownArgs() { | |||||
| GELOGE(RT_FAILED, "Call rtMalloc failed, ret: 0x%X", rt_ret); | GELOGE(RT_FAILED, "Call rtMalloc failed, ret: 0x%X", rt_ret); | ||||
| return RT_ERROR_TO_GE_STATUS(rt_ret); | return RT_ERROR_TO_GE_STATUS(rt_ret); | ||||
| } | } | ||||
| // malloc dynamic and static hybrid memory | |||||
| if (total_hybrid_args_size_ != 0) { | |||||
| rt_ret = rtMalloc(&hybrid_addrs_, total_hybrid_args_size_, RT_MEMORY_HBM); | |||||
| if (rt_ret != RT_ERROR_NONE) { | |||||
| GELOGE(RT_FAILED, "Call rtMalloc failed, ret: 0x%X", rt_ret); | |||||
| return RT_ERROR_TO_GE_STATUS(rt_ret); | |||||
| } | |||||
| } | |||||
| // malloc fixed addr memory, eg: rts op | // malloc fixed addr memory, eg: rts op | ||||
| if (total_fixed_addr_size_ != 0) { | if (total_fixed_addr_size_ != 0) { | ||||
| GELOGI("Begin to allocate fixed addr."); | GELOGI("Begin to allocate fixed addr."); | ||||
| @@ -490,6 +490,14 @@ class DavinciModel { | |||||
| void SetTotalIOAddrs(vector<void *> &io_addrs) { | void SetTotalIOAddrs(vector<void *> &io_addrs) { | ||||
| total_io_addrs_.insert(total_io_addrs_.end(), io_addrs.begin(), io_addrs.end()); | total_io_addrs_.insert(total_io_addrs_.end(), io_addrs.begin(), io_addrs.end()); | ||||
| } | } | ||||
| void SetHybridArgsSize(uint32_t args_size) { total_hybrid_args_size_ += args_size; } | |||||
| uint32_t GetHybridArgsSize() { | |||||
| return total_hybrid_args_size_; | |||||
| } | |||||
| void *GetCurrentHybridArgsAddr(uint32_t offset) { | |||||
| void *cur_args = static_cast<char *>(hybrid_addrs_) + offset; | |||||
| return cur_args; | |||||
| } | |||||
| void SetTotalFixedAddrsSize(string tensor_name, int64_t fix_addr_size); | void SetTotalFixedAddrsSize(string tensor_name, int64_t fix_addr_size); | ||||
| int64_t GetFixedAddrsSize(string tensor_name); | int64_t GetFixedAddrsSize(string tensor_name); | ||||
| void *GetCurrentFixedAddr(int64_t offset) const { | void *GetCurrentFixedAddr(int64_t offset) const { | ||||
| @@ -508,7 +516,7 @@ class DavinciModel { | |||||
| Status MallocKnownArgs(); | Status MallocKnownArgs(); | ||||
| Status UpdateKnownNodeArgs(const vector<void *> &inputs, const vector<void *> &outputs); | Status UpdateKnownNodeArgs(const vector<void *> &inputs, const vector<void *> &outputs); | ||||
| Status CreateKnownZeroCopyMap(const vector<void *> &inputs, const vector<void *> &outputs); | Status CreateKnownZeroCopyMap(const vector<void *> &inputs, const vector<void *> &outputs); | ||||
| Status UpdateKnownZeroCopyAddr(); | |||||
| Status UpdateKnownZeroCopyAddr(vector<void *> &total_io_addrs); | |||||
| void SetKnownNodeAddrNotChanged(bool base_addr_not_changed) { base_addr_not_changed_ = base_addr_not_changed; } | void SetKnownNodeAddrNotChanged(bool base_addr_not_changed) { base_addr_not_changed_ = base_addr_not_changed; } | ||||
| Status GetOrigInputInfo(uint32_t index, OriginInputInfo &orig_input_info); | Status GetOrigInputInfo(uint32_t index, OriginInputInfo &orig_input_info); | ||||
| @@ -946,6 +954,8 @@ class DavinciModel { | |||||
| void *args_ = nullptr; | void *args_ = nullptr; | ||||
| void *args_host_ = nullptr; | void *args_host_ = nullptr; | ||||
| void *fixed_addrs_ = nullptr; | void *fixed_addrs_ = nullptr; | ||||
| void *hybrid_addrs_ = nullptr; | |||||
| uint32_t total_hybrid_args_size_ = 0; | |||||
| int64_t total_fixed_addr_size_ = 0; | int64_t total_fixed_addr_size_ = 0; | ||||
| std::map<const void *, void *> knonw_input_data_info_; | std::map<const void *, void *> knonw_input_data_info_; | ||||
| std::map<const void *, void *> knonw_output_data_info_; | std::map<const void *, void *> knonw_output_data_info_; | ||||
| @@ -1232,7 +1232,7 @@ Status ModelManager::ExecuteModel(uint32_t model_id, rtStream_t stream, bool asy | |||||
| std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id); | std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id); | ||||
| GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, ACL_ERROR_GE_EXEC_MODEL_ID_INVALID, | GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, ACL_ERROR_GE_EXEC_MODEL_ID_INVALID, | ||||
| "Invalid model id %u, check weather model has been loaded or not.", model_id); | |||||
| "Invalid model id %u, check whether model has been loaded or not.", model_id); | |||||
| if (davinci_model->NeedDestroyAicpuKernel()) { | if (davinci_model->NeedDestroyAicpuKernel()) { | ||||
| GELOGI("Start to destroy specified aicpu kernel."); | GELOGI("Start to destroy specified aicpu kernel."); | ||||
| @@ -372,7 +372,11 @@ Status KernelTaskInfo::SuperKernelDistribute() { | |||||
| Status KernelTaskInfo::Distribute() { | Status KernelTaskInfo::Distribute() { | ||||
| GELOGD("KernelTaskInfo Distribute Start."); | GELOGD("KernelTaskInfo Distribute Start."); | ||||
| if (davinci_model_->IsKnownNode()) { | if (davinci_model_->IsKnownNode()) { | ||||
| args_ = davinci_model_->GetCurrentArgsAddr(args_offset_); | |||||
| if (kernel_type_ == ccKernelType::TE) { | |||||
| args_ = davinci_model_->GetCurrentArgsAddr(args_offset_); | |||||
| } else if (kernel_type_ == ccKernelType::AI_CPU || kernel_type_ == ccKernelType::CUST_AI_CPU) { | |||||
| args_ = davinci_model_->GetCurrentHybridArgsAddr(hybrid_args_offset_); | |||||
| } | |||||
| GELOGI("Known node %s args addr %p, offset %u.", op_desc_->GetName().c_str(), args_, args_offset_); | GELOGI("Known node %s args addr %p, offset %u.", op_desc_->GetName().c_str(), args_, args_offset_); | ||||
| } | } | ||||
| rtError_t rt_ret = RT_ERROR_NONE; | rtError_t rt_ret = RT_ERROR_NONE; | ||||
| @@ -428,36 +432,31 @@ Status KernelTaskInfo::UpdateArgs() { | |||||
| const RuntimeParam &rts_param = davinci_model_->GetRuntimeParam(); | const RuntimeParam &rts_param = davinci_model_->GetRuntimeParam(); | ||||
| vector<void *> input_data_addrs = ModelUtils::GetInputDataAddrs(rts_param, op_desc_); | vector<void *> input_data_addrs = ModelUtils::GetInputDataAddrs(rts_param, op_desc_); | ||||
| vector<void *> output_data_addrs = ModelUtils::GetOutputDataAddrs(rts_param, op_desc_); | vector<void *> output_data_addrs = ModelUtils::GetOutputDataAddrs(rts_param, op_desc_); | ||||
| vector<void *> workspace_data_addrs = ModelUtils::GetWorkspaceDataAddrs(rts_param, op_desc_); | |||||
| vector<void *> io_addrs; | vector<void *> io_addrs; | ||||
| if (!op_desc_->HasAttr(ATTR_DYNAMIC_SHAPE_FIXED_ADDR)) { | |||||
| io_addrs.insert(io_addrs.end(), input_data_addrs.begin(), input_data_addrs.end()); | |||||
| io_addrs.insert(io_addrs.end(), output_data_addrs.begin(), output_data_addrs.end()); | |||||
| io_addrs.insert(io_addrs.end(), input_data_addrs.begin(), input_data_addrs.end()); | |||||
| io_addrs.insert(io_addrs.end(), output_data_addrs.begin(), output_data_addrs.end()); | |||||
| if (kernel_type_ == ccKernelType::TE) { | |||||
| vector<void *> workspace_data_addrs = ModelUtils::GetWorkspaceDataAddrs(rts_param, op_desc_); | |||||
| io_addrs.insert(io_addrs.end(), workspace_data_addrs.begin(), workspace_data_addrs.end()); | io_addrs.insert(io_addrs.end(), workspace_data_addrs.begin(), workspace_data_addrs.end()); | ||||
| } else { | |||||
| string peer_input_name; | |||||
| if (AttrUtils::GetStr(op_desc_, ATTR_DYNAMIC_SHAPE_FIXED_ADDR, peer_input_name)) { | |||||
| uint32_t output_index = davinci_model_->GetFixedAddrOutputIndex(peer_input_name); | |||||
| if (output_index > output_data_addrs.size()) { | |||||
| GELOGE(FAILED, "The output data addr size[%zu] and output index[%u] are inconsistent.", | |||||
| output_data_addrs.size(), output_index); | |||||
| return FAILED; | |||||
| } | |||||
| io_addrs.insert(io_addrs.end(), input_data_addrs.begin(), input_data_addrs.end()); | |||||
| for (size_t i = 0; i < output_data_addrs.size(); ++i) { | |||||
| if (i == output_index) { | |||||
| void *fixed_addr = davinci_model_->GetCurrentFixedAddr(fixed_addr_offset_); | |||||
| io_addrs.emplace_back(fixed_addr); | |||||
| continue; | |||||
| } | |||||
| io_addrs.emplace_back(output_data_addrs[i]); | |||||
| } | |||||
| io_addrs.insert(io_addrs.end(), workspace_data_addrs.begin(), workspace_data_addrs.end()); | |||||
| davinci_model_->SetTotalIOAddrs(io_addrs); | |||||
| } else if (kernel_type_ == ccKernelType::AI_CPU || kernel_type_ == ccKernelType::CUST_AI_CPU) { | |||||
| davinci_model_->UpdateKnownZeroCopyAddr(io_addrs); | |||||
| uintptr_t io_addr = reinterpret_cast<uintptr_t>(args_addr.get()) + sizeof(aicpu::AicpuParamHead); | |||||
| auto addrs_size = sizeof(uint64_t) * io_addrs.size(); | |||||
| errno_t sec_ret = memcpy_s(reinterpret_cast<void *>(io_addr), addrs_size, io_addrs.data(), addrs_size); | |||||
| if (sec_ret != EOK) { | |||||
| GELOGE(FAILED, "memcpy failed, ret: %d", sec_ret); | |||||
| return FAILED; | |||||
| } | |||||
| // copy args to device | |||||
| rtError_t rt_ret = rtMemcpy(args_, args_size_, args_addr.get(), args_size_, RT_MEMCPY_HOST_TO_DEVICE); | |||||
| if (rt_ret != RT_ERROR_NONE) { | |||||
| GELOGE(RT_FAILED, "Call rt api(rtMemcpy) failed, ret: 0x%X", rt_ret); | |||||
| return RT_ERROR_TO_GE_STATUS(rt_ret); | |||||
| } | } | ||||
| } | } | ||||
| davinci_model_->SetTotalIOAddrs(io_addrs); | |||||
| GELOGI("KernelTaskInfo::UpdateArgs success."); | GELOGI("KernelTaskInfo::UpdateArgs success."); | ||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| @@ -533,33 +532,18 @@ Status KernelTaskInfo::UpdateL2Data(const domi::KernelDef &kernel_def) { | |||||
| } | } | ||||
| Status KernelTaskInfo::CalculateArgs(const domi::TaskDef &task_def, DavinciModel *davinci_model) { | Status KernelTaskInfo::CalculateArgs(const domi::TaskDef &task_def, DavinciModel *davinci_model) { | ||||
| domi::KernelDef kernel_def = task_def.kernel(); | |||||
| uint32_t args_size = kernel_def.args_size(); | |||||
| args_offset_ = davinci_model->GetTotalArgsSize(); | |||||
| davinci_model->SetTotalArgsSize(args_size); | |||||
| GELOGI("kernel task name , args_size %u, args_offset %u", args_size, args_offset_); | |||||
| // get opcontext stored in model | |||||
| const domi::KernelDef &kernel_def = task_def.kernel(); | |||||
| const domi::KernelContext &context = kernel_def.context(); | const domi::KernelContext &context = kernel_def.context(); | ||||
| // get opdesc | |||||
| op_desc_ = davinci_model->GetOpByIndex(context.op_index()); | |||||
| GE_CHECK_NOTNULL(op_desc_); | |||||
| // alloc fixed addr | |||||
| string peer_input_name; | |||||
| if (AttrUtils::GetStr(op_desc_, ATTR_DYNAMIC_SHAPE_FIXED_ADDR, peer_input_name) && !peer_input_name.empty()) { | |||||
| uint32_t output_index = davinci_model->GetFixedAddrOutputIndex(peer_input_name); | |||||
| if (output_index > op_desc_->GetOutputsSize()) { | |||||
| GELOGE(FAILED, "The output size[%zu] and output index[%u] are inconsistent.", op_desc_->GetOutputsSize(), | |||||
| output_index); | |||||
| return FAILED; | |||||
| } | |||||
| fixed_addr_offset_ = davinci_model->GetFixedAddrsSize(peer_input_name); | |||||
| auto tensor_desc = op_desc_->GetOutputDesc(output_index); | |||||
| int64_t tensor_size = 0; | |||||
| GE_CHK_STATUS(TensorUtils::GetSize(tensor_desc, tensor_size)); | |||||
| davinci_model->SetTotalFixedAddrsSize(peer_input_name, tensor_size); | |||||
| GELOGI("Calculate stream switch task args , tensor size is %ld, fixed addr offset %ld", tensor_size, | |||||
| fixed_addr_offset_); | |||||
| kernel_type_ = static_cast<ccKernelType>(context.kernel_type()); | |||||
| if (kernel_type_ == ccKernelType::TE) { | |||||
| uint32_t args_size = kernel_def.args_size(); | |||||
| args_offset_ = davinci_model->GetTotalArgsSize(); | |||||
| davinci_model->SetTotalArgsSize(args_size); | |||||
| GELOGI("kernel task name , args_size %u, args_offset %u", args_size, args_offset_); | |||||
| } else if (kernel_type_ == ccKernelType::AI_CPU || kernel_type_ == ccKernelType::CUST_AI_CPU) { | |||||
| hybrid_args_offset_ = davinci_model->GetHybridArgsSize(); | |||||
| davinci_model->SetHybridArgsSize(kernel_def.args_size()); | |||||
| GELOGI("aicpu kernel task name , args_size %u, args_offset %u", kernel_def.args_size(), hybrid_args_offset_); | |||||
| } | } | ||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| @@ -882,7 +866,7 @@ Status KernelTaskInfo::InitAicpuTask(uint32_t op_index, const domi::KernelDef &k | |||||
| } | } | ||||
| // copy args to new host memory | // copy args to new host memory | ||||
| std::unique_ptr<uint8_t[]> args_addr(new (std::nothrow) uint8_t[args_size_]); | |||||
| args_addr = std::unique_ptr<uint8_t[]>(new (std::nothrow) uint8_t[args_size_]); | |||||
| GE_PRINT_DYNAMIC_MEMORY(new, "cce task physical memory.", sizeof(uint8_t) * args_size_) | GE_PRINT_DYNAMIC_MEMORY(new, "cce task physical memory.", sizeof(uint8_t) * args_size_) | ||||
| errno_t sec_ret = memcpy_s(args_addr.get(), args_size_, kernel_def.args().data(), args_size_); | errno_t sec_ret = memcpy_s(args_addr.get(), args_size_, kernel_def.args().data(), args_size_); | ||||
| if (sec_ret != EOK) { | if (sec_ret != EOK) { | ||||
| @@ -890,8 +874,23 @@ Status KernelTaskInfo::InitAicpuTask(uint32_t op_index, const domi::KernelDef &k | |||||
| return FAILED; | return FAILED; | ||||
| } | } | ||||
| const RuntimeParam &rts_param = davinci_model_->GetRuntimeParam(); | |||||
| auto aicpu_param_head = reinterpret_cast<aicpu::AicpuParamHead *>(args_addr.get()); | |||||
| const auto &ext_info = kernel_def.kernel_ext_info(); | |||||
| auto init_ret = InitAicpuTaskExtInfo(ext_info); | |||||
| if (init_ret != SUCCESS) { | |||||
| GELOGE(init_ret, "Init aicpu task ext info failed, ext_info size=%zu", ext_info.size()); | |||||
| return init_ret; | |||||
| } | |||||
| GELOGI("Node[%s] type[%s] kernel_ext_info size=%zu, aicpu_ext_info_addr_=%p", op_desc_->GetName().c_str(), | |||||
| op_desc_->GetType().c_str(), ext_info.size(), aicpu_ext_info_addr_); | |||||
| aicpu_param_head->extInfoAddr = reinterpret_cast<uintptr_t>(aicpu_ext_info_addr_); | |||||
| aicpu_param_head->extInfoLength = static_cast<uintptr_t>(ext_info.size()); | |||||
| if (davinci_model_->IsKnownNode()) { | |||||
| return SUCCESS; | |||||
| } | |||||
| const RuntimeParam &rts_param = davinci_model_->GetRuntimeParam(); | |||||
| vector<void *> input_addrs = ModelUtils::GetInputDataAddrs(rts_param, op_desc); | vector<void *> input_addrs = ModelUtils::GetInputDataAddrs(rts_param, op_desc); | ||||
| vector<void *> output_addrs = ModelUtils::GetOutputDataAddrs(rts_param, op_desc); | vector<void *> output_addrs = ModelUtils::GetOutputDataAddrs(rts_param, op_desc); | ||||
| vector<void *> io_addrs; | vector<void *> io_addrs; | ||||
| @@ -908,19 +907,6 @@ Status KernelTaskInfo::InitAicpuTask(uint32_t op_index, const domi::KernelDef &k | |||||
| } | } | ||||
| } | } | ||||
| auto aicpu_param_head = reinterpret_cast<aicpu::AicpuParamHead *>(args_addr.get()); | |||||
| const auto &ext_info = kernel_def.kernel_ext_info(); | |||||
| auto init_ret = InitAicpuTaskExtInfo(ext_info); | |||||
| if (init_ret != SUCCESS) { | |||||
| GELOGE(init_ret, "Init aicpu task ext info failed, ext_info size=%zu", ext_info.size()); | |||||
| return init_ret; | |||||
| } | |||||
| GELOGI("Node[%s] type[%s] kernel_ext_info size=%zu, aicpu_ext_info_addr_=%p", op_desc_->GetName().c_str(), | |||||
| op_desc_->GetType().c_str(), ext_info.size(), aicpu_ext_info_addr_); | |||||
| aicpu_param_head->extInfoAddr = reinterpret_cast<uintptr_t>(aicpu_ext_info_addr_); | |||||
| aicpu_param_head->extInfoLength = static_cast<uintptr_t>(ext_info.size()); | |||||
| // malloc device memory for args | // malloc device memory for args | ||||
| rtError_t rt_ret = rtMalloc(static_cast<void **>(&args_), args_size_, RT_MEMORY_HBM); | rtError_t rt_ret = rtMalloc(static_cast<void **>(&args_), args_size_, RT_MEMORY_HBM); | ||||
| if (rt_ret != RT_ERROR_NONE) { | if (rt_ret != RT_ERROR_NONE) { | ||||
| @@ -161,7 +161,9 @@ class KernelTaskInfo : public TaskInfo { | |||||
| OpDescPtr op_desc_; | OpDescPtr op_desc_; | ||||
| DavinciModel *davinci_model_; | DavinciModel *davinci_model_; | ||||
| uint32_t args_offset_ = 0; | uint32_t args_offset_ = 0; | ||||
| uint32_t hybrid_args_offset_ = 0; | |||||
| int64_t fixed_addr_offset_ = 0; | int64_t fixed_addr_offset_ = 0; | ||||
| std::unique_ptr<uint8_t[]> args_addr = nullptr; | |||||
| bool call_save_dump_ = false; | bool call_save_dump_ = false; | ||||
| // aicpu ext_info device mem | // aicpu ext_info device mem | ||||
| @@ -540,7 +540,7 @@ Status InsertNewOpUtil::GetDataRelatedNode(NodePtr &node, std::map<NodePtr, std: | |||||
| std::unique_ptr<domi::AippOpParams> aipp_params(new (std::nothrow) domi::AippOpParams()); | std::unique_ptr<domi::AippOpParams> aipp_params(new (std::nothrow) domi::AippOpParams()); | ||||
| ge::GeAttrValue::NAMED_ATTRS aipp_attr; | ge::GeAttrValue::NAMED_ATTRS aipp_attr; | ||||
| GE_CHK_BOOL_RET_STATUS(AttrUtils::GetNamedAttrs(data_op, ATTR_NAME_AIPP, aipp_attr), GE_AIPP_NOT_EXIST, | |||||
| GE_CHK_BOOL_RET_STATUS(AttrUtils::GetNamedAttrs(data_op, ATTR_NAME_AIPP, aipp_attr), ACL_ERROR_GE_AIPP_NOT_EXIST, | |||||
| "Data node do not contain param aipp!"); | "Data node do not contain param aipp!"); | ||||
| GE_CHK_STATUS_RET(OpUtils::ConvertAippParams(aipp_attr, aipp_params.get()), "get aipp params failed"); | GE_CHK_STATUS_RET(OpUtils::ConvertAippParams(aipp_attr, aipp_params.get()), "get aipp params failed"); | ||||