| @@ -721,8 +721,10 @@ Status GeGenerator::BuildSingleOp(OpDescPtr &op_desc, const vector<GeTensor> &in | |||||
| GeModelPtr &ge_model = name_to_ge_model.begin()->second; | GeModelPtr &ge_model = name_to_ge_model.begin()->second; | ||||
| GELOGD("The opType in op_desc_tmp is [%s]", op_desc_tmp->GetType().c_str()); | GELOGD("The opType in op_desc_tmp is [%s]", op_desc_tmp->GetType().c_str()); | ||||
| bool dynamic_flag = false; | |||||
| if (CheckShapeReset(op_desc, dynamic_flag) == SUCCESS && dynamic_flag) { | |||||
| bool all_shape = false; | |||||
| (void)AttrUtils::GetBool(op_desc, "_AllShape", all_shape); | |||||
| if (all_shape) { | |||||
| GELOGD("Get aicpu all_shape kernel!"); | |||||
| vector<GeTensor> inputs_dynamic; | vector<GeTensor> inputs_dynamic; | ||||
| vector<GeTensor> outputs_dynamic; | vector<GeTensor> outputs_dynamic; | ||||
| GE_CHK_STATUS_RET_NOLOG(ResetTensorVecShape(inputs, inputs_dynamic)); | GE_CHK_STATUS_RET_NOLOG(ResetTensorVecShape(inputs, inputs_dynamic)); | ||||
| @@ -28,6 +28,47 @@ | |||||
| #include "graph/load/new_model_manager/model_manager.h" | #include "graph/load/new_model_manager/model_manager.h" | ||||
| namespace ge { | namespace ge { | ||||
| Status KernelExTaskInfo::InitTaskExtInfo(const std::string &ext_info) { | |||||
| if (ext_info.empty()) { | |||||
| return SUCCESS; | |||||
| } | |||||
| std::unique_ptr<uint8_t[]> copy_ext_info; | |||||
| copy_ext_info.reset(new(std::nothrow)uint8_t[ext_info.size()]); | |||||
| GE_CHECK_NOTNULL(copy_ext_info); | |||||
| auto sec_ret = memcpy_s(copy_ext_info.get(), ext_info.size(), ext_info.c_str(), ext_info.size()); | |||||
| if (sec_ret != EOK) { | |||||
| GELOGE(FAILED, "memcpy failed, ret: %d", sec_ret); | |||||
| return FAILED; | |||||
| } | |||||
| auto ext_info_data = copy_ext_info.get(); | |||||
| size_t offset = 0; | |||||
| while (offset + sizeof(aicpu::FWKAdapter::ExtInfo) <= ext_info.size()) { | |||||
| auto aicpu_ext_info = reinterpret_cast<aicpu::FWKAdapter::ExtInfo *>(ext_info_data + offset); | |||||
| GELOGD("Ext infoType=%d, infoLen=%u.", aicpu_ext_info->infoType, aicpu_ext_info->infoLen); | |||||
| if (aicpu_ext_info->infoType == aicpu::FWKAdapter::FWK_ADPT_EXT_BITMAP) { | |||||
| GE_CHK_BOOL_RET_STATUS(aicpu_ext_info->infoLen == sizeof(uint64_t), PARAM_INVALID, | |||||
| "Parse bit_map info failed as infoLen must be %zu but %u.", | |||||
| sizeof(uint64_t), aicpu_ext_info->infoLen); | |||||
| uint64_t *bit_map = reinterpret_cast<uint64_t *>(aicpu_ext_info->infoMsg); | |||||
| *(bit_map) |= 1; | |||||
| GELOGD("Update aicpu_task ext_info bit_map to 1."); | |||||
| } | |||||
| offset += sizeof(aicpu::FWKAdapter::ExtInfo); | |||||
| offset += aicpu_ext_info->infoLen; | |||||
| } | |||||
| auto rt_ret = rtMalloc(&ext_info_addr_, ext_info.size(), RT_MEMORY_HBM); | |||||
| GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, | |||||
| GELOGE(RT_FAILED, "rtMalloc ext_info error: 0x%X, size=%zu", rt_ret, ext_info.size()); | |||||
| return RT_ERROR_TO_GE_STATUS(rt_ret);) | |||||
| rt_ret = rtMemcpy(ext_info_addr_, ext_info.size(), ext_info_data, ext_info.size(), RT_MEMCPY_HOST_TO_DEVICE); | |||||
| GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, | |||||
| GELOGE(RT_FAILED, "rtMemcpy ext_info error: 0x%X, size=%zu", rt_ret, ext_info.size()); | |||||
| return RT_ERROR_TO_GE_STATUS(rt_ret);) | |||||
| return SUCCESS; | |||||
| } | |||||
| Status KernelExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) { | Status KernelExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) { | ||||
| GELOGI("KernelExTaskInfo Init Start."); | GELOGI("KernelExTaskInfo Init Start."); | ||||
| GE_CHECK_NOTNULL(davinci_model); | GE_CHECK_NOTNULL(davinci_model); | ||||
| @@ -63,16 +104,8 @@ Status KernelExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davin | |||||
| } | } | ||||
| const auto &ext_info = kernel_ex_def.kernel_ext_info(); | const auto &ext_info = kernel_ex_def.kernel_ext_info(); | ||||
| if (!ext_info.empty()) { | |||||
| auto rt_ret = rtMalloc(&ext_info_addr_, ext_info.size(), RT_MEMORY_HBM); | |||||
| GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, | |||||
| GELOGE(RT_FAILED, "rtMalloc ext_info error: 0x%X, size=%zu", rt_ret, ext_info.size()); | |||||
| return RT_ERROR_TO_GE_STATUS(rt_ret);) | |||||
| rt_ret = rtMemcpy(ext_info_addr_, ext_info.size(), ext_info.c_str(), ext_info.size(), RT_MEMCPY_HOST_TO_DEVICE); | |||||
| GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, | |||||
| GELOGE(RT_FAILED, "rtMemcpy ext_info error: 0x%X, size=%zu", rt_ret, ext_info.size()); | |||||
| return RT_ERROR_TO_GE_STATUS(rt_ret);) | |||||
| } | |||||
| GE_CHK_STATUS_RET(InitTaskExtInfo(ext_info), | |||||
| "Init aicpu tf_task ext info failed, ext_info size=%zu", ext_info.size()); | |||||
| GELOGI("Node[%s] type[%s] kernel_ext_info size=%zu, ext_info_addr_=%p", op_desc->GetName().c_str(), | GELOGI("Node[%s] type[%s] kernel_ext_info size=%zu, ext_info_addr_=%p", op_desc->GetName().c_str(), | ||||
| op_desc->GetType().c_str(), ext_info.size(), ext_info_addr_); | op_desc->GetType().c_str(), ext_info.size(), ext_info_addr_); | ||||
| @@ -62,6 +62,7 @@ class KernelExTaskInfo : public TaskInfo { | |||||
| void SetIoAddrs(const OpDescPtr &op_desc); | void SetIoAddrs(const OpDescPtr &op_desc); | ||||
| void InitDumpTask(void *addr, const OpDescPtr &op_desc); | void InitDumpTask(void *addr, const OpDescPtr &op_desc); | ||||
| Status InitTaskExtInfo(const std::string &ext_info); | |||||
| uint32_t task_id_; | uint32_t task_id_; | ||||
| uint32_t stream_id_; | uint32_t stream_id_; | ||||
| @@ -986,6 +986,13 @@ Status KernelTaskInfo::InitAicpuTaskExtInfo(const std::string &ext_info) { | |||||
| session_info->sessionId = davinci_model_->GetSessionId(); | session_info->sessionId = davinci_model_->GetSessionId(); | ||||
| session_info->sessFlag = true; | session_info->sessFlag = true; | ||||
| GELOGD("Update aicpu_task ext_info session_info session_id is %lu", session_info->sessionId); | GELOGD("Update aicpu_task ext_info session_info session_id is %lu", session_info->sessionId); | ||||
| } else if (aicpu_ext_info->infoType == aicpu::FWKAdapter::FWK_ADPT_EXT_BITMAP) { | |||||
| GE_CHK_BOOL_RET_STATUS(aicpu_ext_info->infoLen == sizeof(uint64_t), PARAM_INVALID, | |||||
| "Parse bit_map info failed as infoLen must be %zu but %u.", | |||||
| sizeof(uint64_t), aicpu_ext_info->infoLen); | |||||
| uint64_t *bit_map = reinterpret_cast<uint64_t *>(aicpu_ext_info->infoMsg); | |||||
| *(bit_map) |= 1; | |||||
| GELOGD("Update aicpu_task ext_info bit_map to 1."); | |||||
| } | } | ||||
| offset += sizeof(aicpu::FWKAdapter::ExtInfo); | offset += sizeof(aicpu::FWKAdapter::ExtInfo); | ||||
| offset += aicpu_ext_info->infoLen; | offset += aicpu_ext_info->infoLen; | ||||
| @@ -643,22 +643,22 @@ Status GraphManager::ReplaceSubgraphWithOriGraph(const ComputeGraphPtr &compute_ | |||||
| Status GraphManager::SetSubgraph(uint64_t session_id, ComputeGraphPtr compute_graph, GraphPartitioner &partitioner) { | Status GraphManager::SetSubgraph(uint64_t session_id, ComputeGraphPtr compute_graph, GraphPartitioner &partitioner) { | ||||
| GE_CHECK_NOTNULL(compute_graph); | GE_CHECK_NOTNULL(compute_graph); | ||||
| PassManager pass_for_dynamic_shape_reset_optimize; | |||||
| GE_CHK_STATUS_RET(pass_for_dynamic_shape_reset_optimize.AddPass( | |||||
| "SetSubgraph::AfterSetSubgraph::DynamicSingleOpResetShapePass", new (std::nothrow) DynamicSingleOpResetShapePass)) | |||||
| GE_TIMESTAMP_START(pass_for_dynamic_shape_reset_optimize); | |||||
| Status ret = pass_for_dynamic_shape_reset_optimize.Run(compute_graph); | |||||
| GE_TIMESTAMP_END(pass_for_dynamic_shape_reset_optimize, "SetSubgraph::AfterSetSubgraph"); | |||||
| if (ret != SUCCESS && ret != NOT_CHANGED) { | |||||
| GELOGE(ret, "Run passes when optimize subgraph failed"); | |||||
| return ret; | |||||
| } | |||||
| // PassManager pass_for_dynamic_shape_reset_optimize; | |||||
| // GE_CHK_STATUS_RET(pass_for_dynamic_shape_reset_optimize.AddPass( | |||||
| // "SetSubgraph::AfterSetSubgraph::DynamicSingleOpResetShapePass", new (std::nothrow) DynamicSingleOpResetShapePass)) | |||||
| // GE_TIMESTAMP_START(pass_for_dynamic_shape_reset_optimize); | |||||
| // Status ret = pass_for_dynamic_shape_reset_optimize.Run(compute_graph); | |||||
| // GE_TIMESTAMP_END(pass_for_dynamic_shape_reset_optimize, "SetSubgraph::AfterSetSubgraph"); | |||||
| // if (ret != SUCCESS && ret != NOT_CHANGED) { | |||||
| // GELOGE(ret, "Run passes when optimize subgraph failed"); | |||||
| // return ret; | |||||
| // } | |||||
| auto sub_graph_map = partitioner.GetSubGraphMap(); | auto sub_graph_map = partitioner.GetSubGraphMap(); | ||||
| GELOGD("Directly optimize subgraph with build mode:%s, and step:%s.", | GELOGD("Directly optimize subgraph with build mode:%s, and step:%s.", | ||||
| options_.build_mode.c_str(), | options_.build_mode.c_str(), | ||||
| options_.build_step.c_str()); | options_.build_step.c_str()); | ||||
| ret = OptimizeSubGraphWithMultiThreads(compute_graph, sub_graph_map, session_id); | |||||
| Status ret = OptimizeSubGraphWithMultiThreads(compute_graph, sub_graph_map, session_id); | |||||
| if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
| GELOGE(ret, "Multiply optimize subgraph failed"); | GELOGE(ret, "Multiply optimize subgraph failed"); | ||||
| return ret; | return ret; | ||||
| @@ -63,6 +63,9 @@ Status AicpuExtInfoHandler::Parse(const std::string &ext_info) { | |||||
| case aicpu::FWKAdapter::FWK_ADPT_EXT_SESSION_INFO: | case aicpu::FWKAdapter::FWK_ADPT_EXT_SESSION_INFO: | ||||
| GE_CHK_STATUS_RET(ParseExtSessionInfo(aicpu_ext_info), "Parse ext session info failed."); | GE_CHK_STATUS_RET(ParseExtSessionInfo(aicpu_ext_info), "Parse ext session info failed."); | ||||
| break; | break; | ||||
| case aicpu::FWKAdapter::FWK_ADPT_EXT_BITMAP: | |||||
| GE_CHK_STATUS_RET(ParseExtBitMap(aicpu_ext_info), "Parse ext bit map failed."); | |||||
| break; | |||||
| default: | default: | ||||
| GELOGD("Node[%s] ignore infoType=%d, infoLen=%u.", | GELOGD("Node[%s] ignore infoType=%d, infoLen=%u.", | ||||
| node_name_.c_str(), aicpu_ext_info->infoType, aicpu_ext_info->infoLen); | node_name_.c_str(), aicpu_ext_info->infoType, aicpu_ext_info->infoLen); | ||||
| @@ -139,6 +142,29 @@ Status AicpuExtInfoHandler::ParseExtSessionInfo(AicpuExtInfo *aicpu_ext_info) { | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| Status AicpuExtInfoHandler::ParseExtBitMap(AicpuExtInfo *aicpu_ext_info) { | |||||
| GE_CHK_BOOL_RET_STATUS(aicpu_ext_info->infoLen == sizeof(uint64_t), PARAM_INVALID, | |||||
| "Node[%s] parse bit_map info failed as infoLen must be %zu but %u.", | |||||
| node_name_.c_str(), sizeof(uint64_t), aicpu_ext_info->infoLen); | |||||
| bit_map_ = reinterpret_cast<uint64_t *>(aicpu_ext_info->infoMsg); | |||||
| GELOGI("Node[%s] bit_map info success infoLen=%u.", node_name_.c_str(), aicpu_ext_info->infoLen); | |||||
| return SUCCESS; | |||||
| } | |||||
| Status AicpuExtInfoHandler::UpdateBitMap(bool flag) { | |||||
| if (bit_map_ == nullptr) { | |||||
| GELOGD("There is no bit_map in ext_info, no need update."); | |||||
| return SUCCESS; | |||||
| } | |||||
| if (flag) { | |||||
| *(bit_map_) |= 1; | |||||
| } else { | |||||
| *(bit_map_) &= ~1; | |||||
| } | |||||
| return SUCCESS; | |||||
| } | |||||
| Status AicpuExtInfoHandler::UpdateSessionInfo(uint64_t session_id, uint64_t kernel_id, bool sess_flag) { | Status AicpuExtInfoHandler::UpdateSessionInfo(uint64_t session_id, uint64_t kernel_id, bool sess_flag) { | ||||
| if (session_info_ == nullptr) { | if (session_info_ == nullptr) { | ||||
| GELOGD("There is no session info in ext_info, no need update."); | GELOGD("There is no session info in ext_info, no need update."); | ||||
| @@ -57,6 +57,8 @@ class AicpuExtInfoHandler { | |||||
| Status UpdateSessionInfoSessionId(uint64_t session_id); | Status UpdateSessionInfoSessionId(uint64_t session_id); | ||||
| Status UpdateBitMap(bool flag); | |||||
| Status GetOutputShapeAndType(uint32_t output_index, GeShape &shape, DataType &data_type); | Status GetOutputShapeAndType(uint32_t output_index, GeShape &shape, DataType &data_type); | ||||
| private: | private: | ||||
| @@ -65,6 +67,7 @@ class AicpuExtInfoHandler { | |||||
| Status ParseExtInputShape(AicpuExtInfo *aicpu_ext_info); | Status ParseExtInputShape(AicpuExtInfo *aicpu_ext_info); | ||||
| Status ParseExtOutputShape(AicpuExtInfo *aicpu_ext_info); | Status ParseExtOutputShape(AicpuExtInfo *aicpu_ext_info); | ||||
| Status ParseExtSessionInfo(AicpuExtInfo *aicpu_ext_info); | Status ParseExtSessionInfo(AicpuExtInfo *aicpu_ext_info); | ||||
| Status ParseExtBitMap(AicpuExtInfo *aicpu_ext_info); | |||||
| static Status UpdateShapeAndType(const GeShape &shape, | static Status UpdateShapeAndType(const GeShape &shape, | ||||
| DataType data_type, | DataType data_type, | ||||
| @@ -80,6 +83,7 @@ class AicpuExtInfoHandler { | |||||
| const uint32_t output_num_; | const uint32_t output_num_; | ||||
| UnknowShapeOpType unknown_type_; | UnknowShapeOpType unknown_type_; | ||||
| AicpuSessionInfo *session_info_ = nullptr; | AicpuSessionInfo *session_info_ = nullptr; | ||||
| uint64_t *bit_map_ = nullptr; | |||||
| std::unique_ptr<uint8_t[]> ext_info_; | std::unique_ptr<uint8_t[]> ext_info_; | ||||
| size_t ext_info_len_ = 0; | size_t ext_info_len_ = 0; | ||||
| @@ -61,6 +61,7 @@ Status AicpuNodeTaskBase::InitExtInfo(const std::string &kernel_ext_info, int64_ | |||||
| GELOGD("To update aicpu_task ext_info session_info session_id to %lu", session_id); | GELOGD("To update aicpu_task ext_info session_info session_id to %lu", session_id); | ||||
| GE_CHK_STATUS_RET(aicpu_ext_handle_.UpdateSessionInfoSessionId(session_id), | GE_CHK_STATUS_RET(aicpu_ext_handle_.UpdateSessionInfoSessionId(session_id), | ||||
| "UpdateSessionInfoSessionId failed."); | "UpdateSessionInfoSessionId failed."); | ||||
| GE_CHK_STATUS_RET(aicpu_ext_handle_.UpdateBitMap(true), "UpdateBitMap failed."); | |||||
| // copy task args buf | // copy task args buf | ||||
| GE_CHK_STATUS_RET(AllocTensorBuffer(aicpu_ext_handle_.GetExtInfoLen(), ext_info_addr_dev_), | GE_CHK_STATUS_RET(AllocTensorBuffer(aicpu_ext_handle_.GetExtInfoLen(), ext_info_addr_dev_), | ||||
| @@ -137,6 +138,7 @@ Status AicpuNodeTaskBase::UpdateExtInfo() { | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| GE_CHK_STATUS_RET(aicpu_ext_handle_.UpdateBitMap(false), "UpdateBitMap failed."); | |||||
| for (auto i = 0; i < node_item_->num_inputs; ++i) { | for (auto i = 0; i < node_item_->num_inputs; ++i) { | ||||
| auto input_desc = node_item_->MutableInputDesc(i); | auto input_desc = node_item_->MutableInputDesc(i); | ||||
| GE_CHECK_NOTNULL(input_desc); | GE_CHECK_NOTNULL(input_desc); | ||||
| @@ -366,6 +366,7 @@ Status AiCpuBaseTask::SetExtInfoAndType(const std::string &kernel_ext_info, uint | |||||
| GE_CHK_STATUS_RET(aicpu_ext_handle_->UpdateSessionInfo(ULLONG_MAX, kernel_id, false), | GE_CHK_STATUS_RET(aicpu_ext_handle_->UpdateSessionInfo(ULLONG_MAX, kernel_id, false), | ||||
| "UpdateSessionInfo failed."); | "UpdateSessionInfo failed."); | ||||
| GE_CHK_STATUS_RET(aicpu_ext_handle_->UpdateBitMap(true), "UpdateBitMap failed."); | |||||
| GE_CHK_RT_RET(rtMalloc(&ext_info_addr_dev_, aicpu_ext_handle_->GetExtInfoLen(), RT_MEMORY_HBM)); | GE_CHK_RT_RET(rtMalloc(&ext_info_addr_dev_, aicpu_ext_handle_->GetExtInfoLen(), RT_MEMORY_HBM)); | ||||
| GE_CHK_RT_RET(rtMemcpy(ext_info_addr_dev_, aicpu_ext_handle_->GetExtInfoLen(), | GE_CHK_RT_RET(rtMemcpy(ext_info_addr_dev_, aicpu_ext_handle_->GetExtInfoLen(), | ||||
| @@ -403,6 +404,7 @@ Status AiCpuBaseTask::UpdateExtInfo(const std::vector<GeTensorDesc> &input_desc, | |||||
| } | } | ||||
| GE_CHECK_NOTNULL(aicpu_ext_handle_); | GE_CHECK_NOTNULL(aicpu_ext_handle_); | ||||
| GE_CHK_STATUS_RET(aicpu_ext_handle_->UpdateBitMap(false), "UpdateBitMap failed."); | |||||
| size_t non_const_index = 0; | size_t non_const_index = 0; | ||||
| for (size_t input_index = 0; input_index < num_inputs_; input_index++) { | for (size_t input_index = 0; input_index < num_inputs_; input_index++) { | ||||
| @@ -60,6 +60,7 @@ enum FWKTaskExtInfoType { | |||||
| FWK_ADPT_EXT_UPDATE_ADDR, | FWK_ADPT_EXT_UPDATE_ADDR, | ||||
| FWK_ADPT_EXT_OP_NAME, | FWK_ADPT_EXT_OP_NAME, | ||||
| FWK_ADPT_EXT_SESSION_INFO, | FWK_ADPT_EXT_SESSION_INFO, | ||||
| FWK_ADPT_EXT_BITMAP, | |||||
| FWK_ADPT_EXT_INVALID | FWK_ADPT_EXT_INVALID | ||||
| }; | }; | ||||