diff --git a/ge/generator/ge_generator.cc b/ge/generator/ge_generator.cc index d032965b..d7e6cd0c 100644 --- a/ge/generator/ge_generator.cc +++ b/ge/generator/ge_generator.cc @@ -721,8 +721,10 @@ Status GeGenerator::BuildSingleOp(OpDescPtr &op_desc, const vector &in GeModelPtr &ge_model = name_to_ge_model.begin()->second; GELOGD("The opType in op_desc_tmp is [%s]", op_desc_tmp->GetType().c_str()); - bool dynamic_flag = false; - if (CheckShapeReset(op_desc, dynamic_flag) == SUCCESS && dynamic_flag) { + bool all_shape = false; + (void)AttrUtils::GetBool(op_desc, "_AllShape", all_shape); + if (all_shape) { + GELOGD("Get aicpu all_shape kernel!"); vector inputs_dynamic; vector outputs_dynamic; GE_CHK_STATUS_RET_NOLOG(ResetTensorVecShape(inputs, inputs_dynamic)); diff --git a/ge/graph/load/new_model_manager/task_info/kernel_ex_task_info.cc b/ge/graph/load/new_model_manager/task_info/kernel_ex_task_info.cc index 98d9cb78..14b86683 100644 --- a/ge/graph/load/new_model_manager/task_info/kernel_ex_task_info.cc +++ b/ge/graph/load/new_model_manager/task_info/kernel_ex_task_info.cc @@ -28,6 +28,47 @@ #include "graph/load/new_model_manager/model_manager.h" namespace ge { +Status KernelExTaskInfo::InitTaskExtInfo(const std::string &ext_info) { + if (ext_info.empty()) { + return SUCCESS; + } + std::unique_ptr copy_ext_info; + copy_ext_info.reset(new(std::nothrow)uint8_t[ext_info.size()]); + GE_CHECK_NOTNULL(copy_ext_info); + auto sec_ret = memcpy_s(copy_ext_info.get(), ext_info.size(), ext_info.c_str(), ext_info.size()); + if (sec_ret != EOK) { + GELOGE(FAILED, "memcpy failed, ret: %d", sec_ret); + return FAILED; + } + + auto ext_info_data = copy_ext_info.get(); + size_t offset = 0; + while (offset + sizeof(aicpu::FWKAdapter::ExtInfo) <= ext_info.size()) { + auto aicpu_ext_info = reinterpret_cast(ext_info_data + offset); + GELOGD("Ext infoType=%d, infoLen=%u.", aicpu_ext_info->infoType, aicpu_ext_info->infoLen); + if (aicpu_ext_info->infoType == aicpu::FWKAdapter::FWK_ADPT_EXT_BITMAP) { + GE_CHK_BOOL_RET_STATUS(aicpu_ext_info->infoLen == sizeof(uint64_t), PARAM_INVALID, + "Parse bit_map info failed as infoLen must be %zu but %u.", + sizeof(uint64_t), aicpu_ext_info->infoLen); + uint64_t *bit_map = reinterpret_cast(aicpu_ext_info->infoMsg); + *(bit_map) |= 1; + GELOGD("Update aicpu_task ext_info bit_map to 1."); + } + offset += sizeof(aicpu::FWKAdapter::ExtInfo); + offset += aicpu_ext_info->infoLen; + } + + auto rt_ret = rtMalloc(&ext_info_addr_, ext_info.size(), RT_MEMORY_HBM); + GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, + GELOGE(RT_FAILED, "rtMalloc ext_info error: 0x%X, size=%zu", rt_ret, ext_info.size()); + return RT_ERROR_TO_GE_STATUS(rt_ret);) + rt_ret = rtMemcpy(ext_info_addr_, ext_info.size(), ext_info_data, ext_info.size(), RT_MEMCPY_HOST_TO_DEVICE); + GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, + GELOGE(RT_FAILED, "rtMemcpy ext_info error: 0x%X, size=%zu", rt_ret, ext_info.size()); + return RT_ERROR_TO_GE_STATUS(rt_ret);) + return SUCCESS; +} + Status KernelExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) { GELOGI("KernelExTaskInfo Init Start."); GE_CHECK_NOTNULL(davinci_model); @@ -63,16 +104,8 @@ Status KernelExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davin } const auto &ext_info = kernel_ex_def.kernel_ext_info(); - if (!ext_info.empty()) { - auto rt_ret = rtMalloc(&ext_info_addr_, ext_info.size(), RT_MEMORY_HBM); - GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, - GELOGE(RT_FAILED, "rtMalloc ext_info error: 0x%X, size=%zu", rt_ret, ext_info.size()); - return RT_ERROR_TO_GE_STATUS(rt_ret);) - rt_ret = rtMemcpy(ext_info_addr_, ext_info.size(), ext_info.c_str(), ext_info.size(), RT_MEMCPY_HOST_TO_DEVICE); - GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, - GELOGE(RT_FAILED, "rtMemcpy ext_info error: 0x%X, size=%zu", rt_ret, ext_info.size()); - return RT_ERROR_TO_GE_STATUS(rt_ret);) - } + GE_CHK_STATUS_RET(InitTaskExtInfo(ext_info), + "Init aicpu tf_task ext info failed, ext_info size=%zu", ext_info.size()); GELOGI("Node[%s] type[%s] kernel_ext_info size=%zu, ext_info_addr_=%p", op_desc->GetName().c_str(), op_desc->GetType().c_str(), ext_info.size(), ext_info_addr_); diff --git a/ge/graph/load/new_model_manager/task_info/kernel_ex_task_info.h b/ge/graph/load/new_model_manager/task_info/kernel_ex_task_info.h index f6873c6c..de394889 100644 --- a/ge/graph/load/new_model_manager/task_info/kernel_ex_task_info.h +++ b/ge/graph/load/new_model_manager/task_info/kernel_ex_task_info.h @@ -62,6 +62,7 @@ class KernelExTaskInfo : public TaskInfo { void SetIoAddrs(const OpDescPtr &op_desc); void InitDumpTask(void *addr, const OpDescPtr &op_desc); + Status InitTaskExtInfo(const std::string &ext_info); uint32_t task_id_; uint32_t stream_id_; diff --git a/ge/graph/load/new_model_manager/task_info/kernel_task_info.cc b/ge/graph/load/new_model_manager/task_info/kernel_task_info.cc index 83bf2779..0eee25c6 100755 --- a/ge/graph/load/new_model_manager/task_info/kernel_task_info.cc +++ b/ge/graph/load/new_model_manager/task_info/kernel_task_info.cc @@ -986,6 +986,13 @@ Status KernelTaskInfo::InitAicpuTaskExtInfo(const std::string &ext_info) { session_info->sessionId = davinci_model_->GetSessionId(); session_info->sessFlag = true; GELOGD("Update aicpu_task ext_info session_info session_id is %lu", session_info->sessionId); + } else if (aicpu_ext_info->infoType == aicpu::FWKAdapter::FWK_ADPT_EXT_BITMAP) { + GE_CHK_BOOL_RET_STATUS(aicpu_ext_info->infoLen == sizeof(uint64_t), PARAM_INVALID, + "Parse bit_map info failed as infoLen must be %zu but %u.", + sizeof(uint64_t), aicpu_ext_info->infoLen); + uint64_t *bit_map = reinterpret_cast(aicpu_ext_info->infoMsg); + *(bit_map) |= 1; + GELOGD("Update aicpu_task ext_info bit_map to 1."); } offset += sizeof(aicpu::FWKAdapter::ExtInfo); offset += aicpu_ext_info->infoLen; diff --git a/ge/graph/manager/graph_manager.cc b/ge/graph/manager/graph_manager.cc index 322ceecc..0d394243 100755 --- a/ge/graph/manager/graph_manager.cc +++ b/ge/graph/manager/graph_manager.cc @@ -643,22 +643,22 @@ Status GraphManager::ReplaceSubgraphWithOriGraph(const ComputeGraphPtr &compute_ Status GraphManager::SetSubgraph(uint64_t session_id, ComputeGraphPtr compute_graph, GraphPartitioner &partitioner) { GE_CHECK_NOTNULL(compute_graph); - PassManager pass_for_dynamic_shape_reset_optimize; - GE_CHK_STATUS_RET(pass_for_dynamic_shape_reset_optimize.AddPass( - "SetSubgraph::AfterSetSubgraph::DynamicSingleOpResetShapePass", new (std::nothrow) DynamicSingleOpResetShapePass)) - GE_TIMESTAMP_START(pass_for_dynamic_shape_reset_optimize); - Status ret = pass_for_dynamic_shape_reset_optimize.Run(compute_graph); - GE_TIMESTAMP_END(pass_for_dynamic_shape_reset_optimize, "SetSubgraph::AfterSetSubgraph"); - if (ret != SUCCESS && ret != NOT_CHANGED) { - GELOGE(ret, "Run passes when optimize subgraph failed"); - return ret; - } + // PassManager pass_for_dynamic_shape_reset_optimize; + // GE_CHK_STATUS_RET(pass_for_dynamic_shape_reset_optimize.AddPass( + // "SetSubgraph::AfterSetSubgraph::DynamicSingleOpResetShapePass", new (std::nothrow) DynamicSingleOpResetShapePass)) + // GE_TIMESTAMP_START(pass_for_dynamic_shape_reset_optimize); + // Status ret = pass_for_dynamic_shape_reset_optimize.Run(compute_graph); + // GE_TIMESTAMP_END(pass_for_dynamic_shape_reset_optimize, "SetSubgraph::AfterSetSubgraph"); + // if (ret != SUCCESS && ret != NOT_CHANGED) { + // GELOGE(ret, "Run passes when optimize subgraph failed"); + // return ret; + // } auto sub_graph_map = partitioner.GetSubGraphMap(); GELOGD("Directly optimize subgraph with build mode:%s, and step:%s.", options_.build_mode.c_str(), options_.build_step.c_str()); - ret = OptimizeSubGraphWithMultiThreads(compute_graph, sub_graph_map, session_id); + Status ret = OptimizeSubGraphWithMultiThreads(compute_graph, sub_graph_map, session_id); if (ret != SUCCESS) { GELOGE(ret, "Multiply optimize subgraph failed"); return ret; diff --git a/ge/hybrid/node_executor/aicpu/aicpu_ext_info.cc b/ge/hybrid/node_executor/aicpu/aicpu_ext_info.cc index b8acbf0e..2b43b984 100644 --- a/ge/hybrid/node_executor/aicpu/aicpu_ext_info.cc +++ b/ge/hybrid/node_executor/aicpu/aicpu_ext_info.cc @@ -63,6 +63,9 @@ Status AicpuExtInfoHandler::Parse(const std::string &ext_info) { case aicpu::FWKAdapter::FWK_ADPT_EXT_SESSION_INFO: GE_CHK_STATUS_RET(ParseExtSessionInfo(aicpu_ext_info), "Parse ext session info failed."); break; + case aicpu::FWKAdapter::FWK_ADPT_EXT_BITMAP: + GE_CHK_STATUS_RET(ParseExtBitMap(aicpu_ext_info), "Parse ext bit map failed."); + break; default: GELOGD("Node[%s] ignore infoType=%d, infoLen=%u.", node_name_.c_str(), aicpu_ext_info->infoType, aicpu_ext_info->infoLen); @@ -139,6 +142,29 @@ Status AicpuExtInfoHandler::ParseExtSessionInfo(AicpuExtInfo *aicpu_ext_info) { return SUCCESS; } +Status AicpuExtInfoHandler::ParseExtBitMap(AicpuExtInfo *aicpu_ext_info) { + GE_CHK_BOOL_RET_STATUS(aicpu_ext_info->infoLen == sizeof(uint64_t), PARAM_INVALID, + "Node[%s] parse bit_map info failed as infoLen must be %zu but %u.", + node_name_.c_str(), sizeof(uint64_t), aicpu_ext_info->infoLen); + + bit_map_ = reinterpret_cast(aicpu_ext_info->infoMsg); + GELOGI("Node[%s] bit_map info success infoLen=%u.", node_name_.c_str(), aicpu_ext_info->infoLen); + return SUCCESS; +} + +Status AicpuExtInfoHandler::UpdateBitMap(bool flag) { + if (bit_map_ == nullptr) { + GELOGD("There is no bit_map in ext_info, no need update."); + return SUCCESS; + } + if (flag) { + *(bit_map_) |= 1; + } else { + *(bit_map_) &= ~1; + } + return SUCCESS; +} + Status AicpuExtInfoHandler::UpdateSessionInfo(uint64_t session_id, uint64_t kernel_id, bool sess_flag) { if (session_info_ == nullptr) { GELOGD("There is no session info in ext_info, no need update."); diff --git a/ge/hybrid/node_executor/aicpu/aicpu_ext_info.h b/ge/hybrid/node_executor/aicpu/aicpu_ext_info.h index 2defba8f..cc50d59a 100644 --- a/ge/hybrid/node_executor/aicpu/aicpu_ext_info.h +++ b/ge/hybrid/node_executor/aicpu/aicpu_ext_info.h @@ -57,6 +57,8 @@ class AicpuExtInfoHandler { Status UpdateSessionInfoSessionId(uint64_t session_id); + Status UpdateBitMap(bool flag); + Status GetOutputShapeAndType(uint32_t output_index, GeShape &shape, DataType &data_type); private: @@ -65,6 +67,7 @@ class AicpuExtInfoHandler { Status ParseExtInputShape(AicpuExtInfo *aicpu_ext_info); Status ParseExtOutputShape(AicpuExtInfo *aicpu_ext_info); Status ParseExtSessionInfo(AicpuExtInfo *aicpu_ext_info); + Status ParseExtBitMap(AicpuExtInfo *aicpu_ext_info); static Status UpdateShapeAndType(const GeShape &shape, DataType data_type, @@ -80,6 +83,7 @@ class AicpuExtInfoHandler { const uint32_t output_num_; UnknowShapeOpType unknown_type_; AicpuSessionInfo *session_info_ = nullptr; + uint64_t *bit_map_ = nullptr; std::unique_ptr ext_info_; size_t ext_info_len_ = 0; diff --git a/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc b/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc index 63ce65e9..5ef71ef4 100755 --- a/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc +++ b/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc @@ -61,6 +61,7 @@ Status AicpuNodeTaskBase::InitExtInfo(const std::string &kernel_ext_info, int64_ GELOGD("To update aicpu_task ext_info session_info session_id to %lu", session_id); GE_CHK_STATUS_RET(aicpu_ext_handle_.UpdateSessionInfoSessionId(session_id), "UpdateSessionInfoSessionId failed."); + GE_CHK_STATUS_RET(aicpu_ext_handle_.UpdateBitMap(true), "UpdateBitMap failed."); // copy task args buf GE_CHK_STATUS_RET(AllocTensorBuffer(aicpu_ext_handle_.GetExtInfoLen(), ext_info_addr_dev_), @@ -137,6 +138,7 @@ Status AicpuNodeTaskBase::UpdateExtInfo() { return SUCCESS; } + GE_CHK_STATUS_RET(aicpu_ext_handle_.UpdateBitMap(false), "UpdateBitMap failed."); for (auto i = 0; i < node_item_->num_inputs; ++i) { auto input_desc = node_item_->MutableInputDesc(i); GE_CHECK_NOTNULL(input_desc); diff --git a/ge/single_op/task/op_task.cc b/ge/single_op/task/op_task.cc index cc63e811..5b9f073a 100755 --- a/ge/single_op/task/op_task.cc +++ b/ge/single_op/task/op_task.cc @@ -366,6 +366,7 @@ Status AiCpuBaseTask::SetExtInfoAndType(const std::string &kernel_ext_info, uint GE_CHK_STATUS_RET(aicpu_ext_handle_->UpdateSessionInfo(ULLONG_MAX, kernel_id, false), "UpdateSessionInfo failed."); + GE_CHK_STATUS_RET(aicpu_ext_handle_->UpdateBitMap(true), "UpdateBitMap failed."); GE_CHK_RT_RET(rtMalloc(&ext_info_addr_dev_, aicpu_ext_handle_->GetExtInfoLen(), RT_MEMORY_HBM)); GE_CHK_RT_RET(rtMemcpy(ext_info_addr_dev_, aicpu_ext_handle_->GetExtInfoLen(), @@ -403,6 +404,7 @@ Status AiCpuBaseTask::UpdateExtInfo(const std::vector &input_desc, } GE_CHECK_NOTNULL(aicpu_ext_handle_); + GE_CHK_STATUS_RET(aicpu_ext_handle_->UpdateBitMap(false), "UpdateBitMap failed."); size_t non_const_index = 0; for (size_t input_index = 0; input_index < num_inputs_; input_index++) { diff --git a/third_party/fwkacllib/inc/cce/fwk_adpt_struct.h b/third_party/fwkacllib/inc/cce/fwk_adpt_struct.h index 50b39d91..7a2cbc50 100644 --- a/third_party/fwkacllib/inc/cce/fwk_adpt_struct.h +++ b/third_party/fwkacllib/inc/cce/fwk_adpt_struct.h @@ -60,6 +60,7 @@ enum FWKTaskExtInfoType { FWK_ADPT_EXT_UPDATE_ADDR, FWK_ADPT_EXT_OP_NAME, FWK_ADPT_EXT_SESSION_INFO, + FWK_ADPT_EXT_BITMAP, FWK_ADPT_EXT_INVALID };