From b9152c4738293da8ab5636262432b482714f0973 Mon Sep 17 00:00:00 2001 From: yanghaoran Date: Fri, 8 Jan 2021 17:51:34 +0800 Subject: [PATCH 01/41] fix cmake args --- build.sh | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/build.sh b/build.sh index a3a6f6af..5222ab5c 100644 --- a/build.sh +++ b/build.sh @@ -134,11 +134,7 @@ build_graphengine() mk_dir "${BUILD_PATH}" cd "${BUILD_PATH}" - if [[ "X$MINDSPORE_MODE" = "Xoff" ]]; then - CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_OPEN_SRC=True -DCMAKE_INSTALL_PREFIX=${OUTPUT_PATH} -DPLATFORM=${PLATFORM} -DPRODUCT=${PRODUCT}" - else - CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_D=ON -DCMAKE_INSTALL_PREFIX=${OUTPUT_PATH}" - fi + CMAKE_ARGS="-DBUILD_PATH=$BUILD_PATH" if [[ "X$ENABLE_GE_COV" = "Xon" ]]; then CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_GE_COV=ON" @@ -156,7 +152,13 @@ build_graphengine() if [[ "X$ENABLE_GITEE" = "Xon" ]]; then CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_GITEE=ON" fi - CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_OPEN_SRC=True -DCMAKE_INSTALL_PREFIX=${OUTPUT_PATH} -DPLATFORM=${PLATFORM} -DPRODUCT=${PRODUCT}" + + if [[ "X$MINDSPORE_MODE" = "Xoff" ]]; then + CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_OPEN_SRC=True -DCMAKE_INSTALL_PREFIX=${OUTPUT_PATH} -DPLATFORM=${PLATFORM} -DPRODUCT=${PRODUCT}" + else + CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_D=ON -DCMAKE_INSTALL_PREFIX=${OUTPUT_PATH}" + fi + echo "${CMAKE_ARGS}" cmake ${CMAKE_ARGS} .. if [ $? -ne 0 ] From 5daea034392b5c0d176a95010bea2c44d8bd1ce7 Mon Sep 17 00:00:00 2001 From: "gengchao4@huawei.com" Date: Sat, 9 Jan 2021 14:23:46 +0800 Subject: [PATCH 02/41] bugfix for l1 data dump --- .../load/new_model_manager/davinci_model.cc | 35 +++++++++++-------- .../load/new_model_manager/davinci_model.h | 2 ++ 2 files changed, 23 insertions(+), 14 deletions(-) diff --git a/ge/graph/load/new_model_manager/davinci_model.cc b/ge/graph/load/new_model_manager/davinci_model.cc index 49abe17c..44f46785 100755 --- a/ge/graph/load/new_model_manager/davinci_model.cc +++ b/ge/graph/load/new_model_manager/davinci_model.cc @@ -520,6 +520,8 @@ Status DavinciModel::DoTaskSink() { GE_CHK_STATUS_RET(InitEntryTask(), "InitEntryTask failed."); + GE_CHK_STATUS_RET(InitL1DataDumperArgs(), "InitL1DataDumperArgs failed."); + GE_CHK_STATUS_RET(DistributeTask(), "Distribute failed."); GE_CHK_RT_RET(rtModelLoadComplete(rt_model_handle_)); @@ -716,19 +718,6 @@ Status DavinciModel::Init(void *dev_ptr, size_t mem_size, void *weight_ptr, size GE_CHK_STATUS_RET(DoTaskSink(), "Task sink failed"); GE_TIMESTAMP_END(DoTaskSink, "GraphLoader::DoTaskSink"); - auto all_dump_model = GetDumpProperties().GetAllDumpModel(); - bool findByOmName = all_dump_model.find(om_name_) != all_dump_model.end(); - bool findByModelName = all_dump_model.find(name_) != all_dump_model.end(); - bool dump_l1fusion_op = (all_dump_model.find(ge::DUMP_ALL_MODEL) != all_dump_model.end()) || - findByOmName || findByModelName; - if (dump_l1fusion_op) { - // malloc 2M for dump l1fusion op - GE_CHK_RT_RET(rtMalloc(&l1_fusion_addr_, kDumpL1FusionOpMByteSize, RT_MEMORY_DDR)); - - // send l1fusion dump addr to rts - GE_CHK_RT_RET(rtDumpAddrSet(rt_model_handle_, l1_fusion_addr_, kDumpL1FusionOpMByteSize, kDumpFlagOfL1Fusion)); - } - /// In zero copy model, if a aicpu operator is connected to the first or last layer, before model execution, /// the aicpu opertor needs to destroy history record, and update operator memory address. /// The model with specified aicpu operators is only marked here, and destruction is in ModelManager::ExecuteModel(). @@ -3951,7 +3940,6 @@ void DavinciModel::SetDataDumperArgs(const ComputeGraphPtr &graph, const map &variable_by_name); + Status InitL1DataDumperArgs(); + Status InitModelProfile(); Status SinkModelProfile(); From f51a80f3abc508df08cde3e08c60a7510a6f5282 Mon Sep 17 00:00:00 2001 From: "gengchao4@huawei.com" Date: Sat, 9 Jan 2021 19:32:36 +0800 Subject: [PATCH 03/41] bugfix for l1 data dump --- ge/graph/load/new_model_manager/davinci_model.cc | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/ge/graph/load/new_model_manager/davinci_model.cc b/ge/graph/load/new_model_manager/davinci_model.cc index 44f46785..c531fe13 100755 --- a/ge/graph/load/new_model_manager/davinci_model.cc +++ b/ge/graph/load/new_model_manager/davinci_model.cc @@ -4160,7 +4160,12 @@ Status DavinciModel::InitL1DataDumperArgs() { GE_CHK_RT_RET(rtMalloc(&l1_fusion_addr_, kDumpL1FusionOpMByteSize, RT_MEMORY_DDR)); // send l1fusion dump addr to rts - GE_CHK_RT_RET(rtDumpAddrSet(rt_model_handle_, l1_fusion_addr_, kDumpL1FusionOpMByteSize, kDumpFlagOfL1Fusion)); + if (rtDumpAddrSet(rt_model_handle_, l1_fusion_addr_, kDumpL1FusionOpMByteSize, kDumpFlagOfL1Fusion) != + RT_ERROR_NONE) { + GELOGE(FAILED, "Call rtDumpAddrSet failed"); + GE_CHK_RT(rtFree(l1_fusion_addr_)); + return FAILED; + } // set addr for l1 data dump data_dumper_.SetL1FusionAddr(l1_fusion_addr_); From fbdc97709006296ec7ab9902b66a1e988a9a16d7 Mon Sep 17 00:00:00 2001 From: "gengchao4@huawei.com" Date: Mon, 11 Jan 2021 10:16:00 +0800 Subject: [PATCH 04/41] bugfix for l1 data dump --- ge/graph/load/new_model_manager/davinci_model.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ge/graph/load/new_model_manager/davinci_model.cc b/ge/graph/load/new_model_manager/davinci_model.cc index c531fe13..35844b2d 100755 --- a/ge/graph/load/new_model_manager/davinci_model.cc +++ b/ge/graph/load/new_model_manager/davinci_model.cc @@ -4162,8 +4162,8 @@ Status DavinciModel::InitL1DataDumperArgs() { // send l1fusion dump addr to rts if (rtDumpAddrSet(rt_model_handle_, l1_fusion_addr_, kDumpL1FusionOpMByteSize, kDumpFlagOfL1Fusion) != RT_ERROR_NONE) { + // l1_fusion_addr_ will be free when DavinciModel destruct GELOGE(FAILED, "Call rtDumpAddrSet failed"); - GE_CHK_RT(rtFree(l1_fusion_addr_)); return FAILED; } From 77eecae44026fcb5960c3f4392e2eecc334b93cd Mon Sep 17 00:00:00 2001 From: lichun Date: Mon, 11 Jan 2021 14:21:12 +0800 Subject: [PATCH 05/41] only check EXPERIMENTAL_DYNAMIC_PARTITION in some special scenes --- ge/graph/partition/dynamic_shape_partition.cc | 30 +++++++++++++++---- 1 file changed, 24 insertions(+), 6 deletions(-) diff --git a/ge/graph/partition/dynamic_shape_partition.cc b/ge/graph/partition/dynamic_shape_partition.cc index 95f13b6f..81295c84 100755 --- a/ge/graph/partition/dynamic_shape_partition.cc +++ b/ge/graph/partition/dynamic_shape_partition.cc @@ -44,18 +44,36 @@ #define REQUIRE_SUCCESS(cond, ...) REQUIRE(((cond) == SUCCESS), __VA_ARGS__) #define REQUIRE_GRAPH_SUCCESS(cond, ...) REQUIRE(((cond) == GRAPH_SUCCESS), __VA_ARGS__) -bool IsExperimental() { - const static bool kIsExperimental = (std::getenv("EXPERIMENTAL_DYNAMIC_PARTITION") != nullptr); - return kIsExperimental; -} - namespace ge { using Cluster = DynamicShapePartitioner::Cluster; using ClusterPtr = std::shared_ptr; +static bool IsContainResourceOp(const ComputeGraphPtr &root_graph) { + for (const auto &node : root_graph->GetAllNodes()) { + GE_CHECK_NOTNULL(node->GetOpDesc()); + for (const auto &input_desc : node->GetOpDesc()->GetAllInputsDesc()) { + auto type = input_desc.GetDataType(); + if (type == DT_STRING || type == DT_RESOURCE || type == DT_STRING_REF) { + if (std::getenv("EXPERIMENTAL_DYNAMIC_PARTITION") == nullptr) { + return false; + } + } + } + for (const auto &output_desc : node->GetOpDesc()->GetAllOutputsDesc()) { + auto type = output_desc.GetDataType(); + if (type == DT_STRING || type == DT_RESOURCE || type == DT_STRING_REF) { + if (std::getenv("EXPERIMENTAL_DYNAMIC_PARTITION") == nullptr) { + return false; + } + } + } + } + return true; +} + Status DynamicShapePartitioner::Partition() { REQUIRE_NOT_NULL(root_graph_, "Graph is nullptr."); - if (!IsExperimental()) { + if (!IsContainResourceOp(root_graph_)) { GELOGD("Skip dynamic shape partition as not in experimental mode."); REQUIRE(AttrUtils::SetBool(*root_graph_, ATTR_NAME_DYNAMIC_SHAPE_PARTITIONED, false), "Failed set dynamic shape partitioned flag on root graph."); From c4496510d07241045db35abe5d6a3ecc4ff24403 Mon Sep 17 00:00:00 2001 From: zhangxiaokun Date: Mon, 11 Jan 2021 16:56:11 +0800 Subject: [PATCH 06/41] Remove subgraph control only const --- .../passes/subgraph_const_migration_pass.cc | 23 +++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/ge/graph/passes/subgraph_const_migration_pass.cc b/ge/graph/passes/subgraph_const_migration_pass.cc index f131942c..864fcec1 100644 --- a/ge/graph/passes/subgraph_const_migration_pass.cc +++ b/ge/graph/passes/subgraph_const_migration_pass.cc @@ -145,6 +145,7 @@ Status SubgraphConstMigrationPass::ClassifyGraphNodes(const ComputeGraphPtr &gra return GE_GRAPH_EMPTY_SUBGRAPH; } + set ctrl_only_const_nodes; auto &data_nodes = all_data_nodes[subgraph]; auto &const_nodes = all_const_nodes[subgraph]; for (auto &node : subgraph->GetDirectNode()) { @@ -178,15 +179,26 @@ Status SubgraphConstMigrationPass::ClassifyGraphNodes(const ComputeGraphPtr &gra peer_name_list.insert(fixed_name + ":" + std::to_string(in_anchor->GetIdx())); } + if (peer_name_list.empty()) { + ctrl_only_const_nodes.insert(node); + GELOGI("%s, Const: %s, no data link will removed", subgraph->GetName().c_str(), node->GetName().c_str()); + continue; + } + string key_of_const; for (const string &name : peer_name_list) { key_of_const += (key_of_const.empty() ? name : "_" + name); } const_nodes[key_of_const] = node; - GELOGD("%s, Key: %s, Const: %s", subgraph->GetName().c_str(), key_of_const.c_str(), node->GetName().c_str()); + GELOGD("%s, Const: %s, Key: %s", subgraph->GetName().c_str(), node->GetName().c_str(), key_of_const.c_str()); } } + + for (auto &node : ctrl_only_const_nodes) { + GE_CHK_GRAPH_STATUS_RET(GraphUtils::RemoveNodeWithoutRelink(subgraph, node), + "Remove node without relink failed, node: %s", node->GetName().c_str()); + } } return SUCCESS; @@ -352,7 +364,8 @@ Status SubgraphConstMigrationPass::DetachParallelNode(const ComputeGraphPtr &gra const auto owner_node = out_anchor->GetOwnerNode(); GELOGI("Remove Edge: %s %s", owner_node->GetName().c_str(), const_node->GetName().c_str()); if (owner_node->GetInAllNodes().empty() && owner_node->GetOutAllNodes().empty() && owner_node != data_node) { - graph->RemoveNode(owner_node); + GE_CHK_GRAPH_STATUS_RET(GraphUtils::RemoveNodeWithoutRelink(graph, owner_node), + "Remove node without relink failed, node: %s", owner_node->GetName().c_str()); } } @@ -414,7 +427,8 @@ Status SubgraphConstMigrationPass::AttachParallelNode(const ComputeGraphPtr &gra const auto owner_node = out_anchor->GetOwnerNode(); GELOGI("Remove Edge: %s %s", owner_node->GetName().c_str(), func_node->GetName().c_str()); if (owner_node->GetInAllNodes().empty() && owner_node->GetOutAllNodes().empty()) { - graph->RemoveNode(owner_node); + GE_CHK_GRAPH_STATUS_RET(GraphUtils::RemoveNodeWithoutRelink(graph, owner_node), + "Remove node without relink failed, node: %s", owner_node->GetName().c_str()); } } GE_CHK_GRAPH_STATUS_RET(GraphUtils::AddEdge(const_node->GetOutDataAnchor(kZeroIndex), in_anchor), "Add edge failed"); @@ -472,7 +486,8 @@ Status SubgraphConstMigrationPass::MoveNodeToParent(const ComputeGraphPtr &graph return FAILED; } - GE_CHK_GRAPH_STATUS_RET(subgraph->RemoveNode(move_node), "Remove node failed"); + GE_CHK_GRAPH_STATUS_RET(GraphUtils::RemoveNodeWithoutRelink(subgraph, move_node), + "Remove node without relink failed, node: %s", move_node->GetName().c_str()); GELOGI("Remove Node: %s %s", subgraph->GetName().c_str(), move_node->GetName().c_str()); } From be95290b4efce419f68e47a239b5f34f0638a2b6 Mon Sep 17 00:00:00 2001 From: zhangxiaokun Date: Mon, 11 Jan 2021 17:48:26 +0800 Subject: [PATCH 07/41] Remove subgraph control only const. --- ge/graph/passes/subgraph_const_migration_pass.cc | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/ge/graph/passes/subgraph_const_migration_pass.cc b/ge/graph/passes/subgraph_const_migration_pass.cc index 864fcec1..d2effd44 100644 --- a/ge/graph/passes/subgraph_const_migration_pass.cc +++ b/ge/graph/passes/subgraph_const_migration_pass.cc @@ -180,8 +180,12 @@ Status SubgraphConstMigrationPass::ClassifyGraphNodes(const ComputeGraphPtr &gra } if (peer_name_list.empty()) { - ctrl_only_const_nodes.insert(node); - GELOGI("%s, Const: %s, no data link will removed", subgraph->GetName().c_str(), node->GetName().c_str()); + GELOGI("%s, Const: %s, no data output", subgraph->GetName().c_str(), node->GetName().c_str()); + const auto in_all_nodes = node->GetInAllNodes(); + if (in_all_nodes.empty() || std::all_of(in_all_nodes.begin(), in_all_nodes.end(), + [](const NodePtr &n) { return n->GetType() == DATA; })) { + ctrl_only_const_nodes.insert(node); + } continue; } @@ -456,7 +460,8 @@ Status SubgraphConstMigrationPass::MoveNodeToParent(const ComputeGraphPtr &graph const map> &all_data_nodes, const string &node_key, uint32_t parent_index) { if (node_key.empty() || parent_index == kInvalidParent) { - GELOGE(FAILED, "Graph: %s, inputs is empty", graph->GetName().c_str()); + GELOGE(FAILED, "Graph: %s, node key: %s, parent index: %u invalid", + graph->GetName().c_str(), node_key.c_str(), parent_index); return FAILED; } From 9b93ff3cd5b3bcf1476f202fe879fec9df699280 Mon Sep 17 00:00:00 2001 From: zhangxiaokun Date: Mon, 11 Jan 2021 22:07:49 +0800 Subject: [PATCH 08/41] Add dependence PROTO_HEADER_HDRS --- ge/CMakeLists.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/ge/CMakeLists.txt b/ge/CMakeLists.txt index 58b6a999..117f8cf2 100755 --- a/ge/CMakeLists.txt +++ b/ge/CMakeLists.txt @@ -35,6 +35,7 @@ protobuf_generate(ge PROTO_HEADER_SRCS PROTO_HEADER_HDRS ${PROTO_HEADER_LIST}) if (NOT ENABLE_D AND NOT ENABLE_ACL AND NOT ENABLE_MS_TESTCASES) ############ libge_proto_common.a ############ add_library(ge_proto_common STATIC + ${PROTO_HEADER_HDRS} ${PROTO_SRCS} ) @@ -55,6 +56,7 @@ target_link_libraries(ge_proto_common PRIVATE ############ libge_proto_client.a ############ add_library(ge_proto_client STATIC + ${PROTO_HEADER_HDRS} ${PROTO_CLIENT_SRCS} ) From 87b78662a27edff21175ff6849fb79f9e7dc0901 Mon Sep 17 00:00:00 2001 From: unknown Date: Tue, 12 Jan 2021 16:57:37 +0800 Subject: [PATCH 09/41] Add cc_task task_info log. --- ge/single_op/task/aicpu_kernel_task_builder.cc | 4 ++++ ge/single_op/task/aicpu_task_builder.cc | 2 +- ge/single_op/task/op_task.cc | 3 ++- ge/single_op/task/op_task.h | 2 ++ 4 files changed, 9 insertions(+), 2 deletions(-) diff --git a/ge/single_op/task/aicpu_kernel_task_builder.cc b/ge/single_op/task/aicpu_kernel_task_builder.cc index 34f1ba7b..2a5f968f 100755 --- a/ge/single_op/task/aicpu_kernel_task_builder.cc +++ b/ge/single_op/task/aicpu_kernel_task_builder.cc @@ -109,6 +109,10 @@ Status AiCpuCCTaskBuilder::BuildTask(AiCpuCCTask &task, uint64_t kernel_id, cons aicpu_param_head->extInfoAddr = reinterpret_cast(task.ext_info_addr_dev_); } + task.op_type_ = op_desc_->GetName(); + task.kernel_id_ = kernel_id; + auto debug_info = BuildTaskUtils::GetTaskInfo(op_desc_); + GELOGI("[TASK_INFO] %lu/%s %s", kernel_id, task.op_type_.c_str(), debug_info.c_str()); return SUCCESS; } } // namespace ge \ No newline at end of file diff --git a/ge/single_op/task/aicpu_task_builder.cc b/ge/single_op/task/aicpu_task_builder.cc index 5fd4879e..1bfbcb3c 100755 --- a/ge/single_op/task/aicpu_task_builder.cc +++ b/ge/single_op/task/aicpu_task_builder.cc @@ -123,7 +123,7 @@ namespace ge { task.kernel_id_ = kernel_id; auto debug_info = BuildTaskUtils::GetTaskInfo(op_desc_); - GELOGI("[TASK_INFO] %s/%s %s", std::to_string(kernel_id).c_str(), task.op_type_.c_str(), debug_info.c_str()); + GELOGI("[TASK_INFO] %lu/%s %s", kernel_id, task.op_type_.c_str(), debug_info.c_str()); return SUCCESS; } } // namespace ge diff --git a/ge/single_op/task/op_task.cc b/ge/single_op/task/op_task.cc index 51c3e845..cc63e811 100755 --- a/ge/single_op/task/op_task.cc +++ b/ge/single_op/task/op_task.cc @@ -567,7 +567,7 @@ Status AiCpuTask::LaunchKernel(rtStream_t stream) { GELOGE(RT_FAILED, "Invoke rtKernelLaunch failed. ret = %d, task = %s", ret, this->op_type_.c_str()); return RT_FAILED; } - GELOGI("[TASK_INFO] %s/%s", std::to_string(kernel_id_).c_str(), op_type_.c_str()); + GELOGI("[TASK_INFO] %lu/%s", kernel_id_, op_type_.c_str()); auto status = OpenDump(stream); if (status != SUCCESS) { @@ -840,6 +840,7 @@ Status AiCpuCCTask::LaunchKernel(rtStream_t stream) { GELOGE(ret, "Invoke rtCpuKernelLaunch failed. ret = %d", ret); return ret; } + GELOGI("[TASK_INFO] %lu/%s", kernel_id_, op_type_.c_str()); GELOGD("Invoke rtCpuKernelLaunch succeeded"); auto status = OpenDump(stream); if (status != SUCCESS) { diff --git a/ge/single_op/task/op_task.h b/ge/single_op/task/op_task.h index bf78557c..2d0740a6 100644 --- a/ge/single_op/task/op_task.h +++ b/ge/single_op/task/op_task.h @@ -227,6 +227,8 @@ private: size_t io_addr_num_ = 0; bool is_custom_ = false; uint32_t dump_flag_ = RT_KERNEL_DEFAULT; + std::string op_type_; + uint64_t kernel_id_ = 0; }; } // namespace ge From e5c0bd1b97c1cdd01f7616a98a25fe00a1c7e4ad Mon Sep 17 00:00:00 2001 From: yanghaoran Date: Tue, 12 Jan 2021 17:21:04 +0800 Subject: [PATCH 10/41] add no-deprecated and no-common for mindspore mode --- ge/common/CMakeLists.txt | 2 ++ ge/ge_runtime/CMakeLists.txt | 2 ++ 2 files changed, 4 insertions(+) diff --git a/ge/common/CMakeLists.txt b/ge/common/CMakeLists.txt index aad85654..0172628c 100755 --- a/ge/common/CMakeLists.txt +++ b/ge/common/CMakeLists.txt @@ -187,6 +187,8 @@ target_compile_options(ge_common PRIVATE -fvisibility=hidden -O2 -Werror + -Wno-deprecated-declarations + -fno-common ) target_include_directories(ge_common PRIVATE diff --git a/ge/ge_runtime/CMakeLists.txt b/ge/ge_runtime/CMakeLists.txt index ded8fd39..56b5ab41 100644 --- a/ge/ge_runtime/CMakeLists.txt +++ b/ge/ge_runtime/CMakeLists.txt @@ -23,6 +23,8 @@ add_library(ge_runtime SHARED ${GE_SRC_LIST}) target_compile_options(ge_runtime PRIVATE -Werror -O2 + -Wno-deprecated-declarations + -fno-common ) target_compile_definitions(ge_runtime PRIVATE From b183dd97f934ca795c1b17f49321493d8041da66 Mon Sep 17 00:00:00 2001 From: lwx897429 Date: Tue, 12 Jan 2021 16:33:54 +0800 Subject: [PATCH 11/41] Add atc params help information. --- ge/offline/main.cc | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/ge/offline/main.cc b/ge/offline/main.cc index dc299ed7..363f9cda 100755 --- a/ge/offline/main.cc +++ b/ge/offline/main.cc @@ -292,11 +292,14 @@ class GFlagUtils { " --enable_small_channel Set enable small channel. 0(default): disable; 1: enable\n" " --enable_compress_weight Enable compress weight. true: enable; false(default): disable\n" " --compress_weight_conf Config file to compress weight\n" - " --buffer_optimize Set buffer optimize. \"l2_optimize\" (default). Set \"off_optimize\" to close\n" + " --buffer_optimize Set buffer optimize. Support \"l2_optimize\" (default), " + "\"l1_optimize\", \"off_optimize\"\n" " --mdl_bank_path Set the path of the custom repository generated after model tuning.\n" "\n[Operator Tuning]\n" " --precision_mode precision mode, support force_fp16(default), allow_mix_precision, " "allow_fp32_to_fp16, must_keep_origin_dtype.\n" + " --keep_dtype Retains the precision of certain operators in inference " + "scenarios by using a configuration file.\n" " --auto_tune_mode Set tune mode. E.g.: \"GA,RL\", support configure multiple, spit by ,\n" " --op_bank_path Set the path of the custom repository generated after operator tuning with Auto Tune.\n" " --op_select_implmode Set op select implmode. Support high_precision, high_performance. " From 6f10a03c59a89df7a1bdd6690277be32159935be Mon Sep 17 00:00:00 2001 From: zhou_lili Date: Thu, 7 Jan 2021 14:12:29 +0800 Subject: [PATCH 12/41] fix infer time and mem when online infer dynamic --- ge/CMakeLists.txt | 4 + ge/ge_inference.mk | 2 + ge/ge_runner.mk | 2 + .../load/new_model_manager/zero_copy_offset.h | 2 +- ge/graph/manager/graph_manager.cc | 32 +++- .../fuse_data_nodes_with_common_input_pass.cc | 119 +++++++++++++ .../fuse_data_nodes_with_common_input_pass.h | 38 +++++ .../no_data_out_const_elimination_pass.cc | 36 ++++ .../no_data_out_const_elimination_pass.h | 31 ++++ tests/ut/ge/CMakeLists.txt | 4 + ...a_nodes_with_common_input_pass_unittest.cc | 156 ++++++++++++++++++ ...ata_out_const_elimination_pass_unittest.cc | 75 +++++++++ 12 files changed, 495 insertions(+), 6 deletions(-) create mode 100644 ge/graph/passes/fuse_data_nodes_with_common_input_pass.cc create mode 100755 ge/graph/passes/fuse_data_nodes_with_common_input_pass.h create mode 100644 ge/graph/passes/no_data_out_const_elimination_pass.cc create mode 100644 ge/graph/passes/no_data_out_const_elimination_pass.h create mode 100644 tests/ut/ge/graph/passes/fuse_data_nodes_with_common_input_pass_unittest.cc create mode 100644 tests/ut/ge/graph/passes/no_data_out_const_elimination_pass_unittest.cc diff --git a/ge/CMakeLists.txt b/ge/CMakeLists.txt index 317ff00a..17e8e80a 100755 --- a/ge/CMakeLists.txt +++ b/ge/CMakeLists.txt @@ -157,7 +157,9 @@ set(TRAIN_SRC_LIST "graph/passes/compile_nodes_pass.cc" "graph/passes/constant_folding_pass.cc" "graph/passes/constant_fuse_same_pass.cc" + "graph/passes/fuse_data_nodes_with_common_input_pass.cc" "graph/passes/remove_same_const_pass.cc" + "graph/passes/no_data_out_const_elimination_pass.cc" "graph/passes/useless_control_out_remove_pass.cc" "graph/passes/control_trigger_pass.cc" "graph/passes/dimension_adjust_pass.cc" @@ -439,6 +441,7 @@ set(INFER_SRC_LIST "graph/passes/net_output_pass.cc" "graph/passes/replace_transshape_pass.cc" "graph/passes/constant_fuse_same_pass.cc" + "graph/passes/fuse_data_nodes_with_common_input_pass.cc" "graph/passes/print_op_pass.cc" "graph/passes/no_use_reshape_remove_pass.cc" "graph/passes/iterator_op_pass.cc" @@ -535,6 +538,7 @@ set(INFER_SRC_LIST "graph/passes/addn_pass.cc" "graph/passes/common_subexpression_elimination_pass.cc" "graph/passes/remove_same_const_pass.cc" + "graph/passes/no_data_out_const_elimination_pass.cc" "graph/passes/useless_control_out_remove_pass.cc" "graph/passes/transop_symmetry_elimination_pass.cc" "graph/passes/save_pass.cc" diff --git a/ge/ge_inference.mk b/ge/ge_inference.mk index 74d09404..1830e847 100755 --- a/ge/ge_inference.mk +++ b/ge/ge_inference.mk @@ -103,6 +103,7 @@ OMG_HOST_SRC_FILES := \ graph/passes/net_output_pass.cc \ graph/passes/replace_transshape_pass.cc \ graph/passes/constant_fuse_same_pass.cc \ + graph/passes/fuse_data_nodes_with_common_input_pass.cc \ graph/passes/print_op_pass.cc \ graph/passes/no_use_reshape_remove_pass.cc \ graph/passes/iterator_op_pass.cc \ @@ -193,6 +194,7 @@ OMG_HOST_SRC_FILES := \ graph/passes/cond_pass.cc \ graph/passes/cond_remove_pass.cc \ graph/passes/remove_same_const_pass.cc \ + graph/passes/no_data_out_const_elimination_pass.cc \ graph/passes/useless_control_out_remove_pass.cc \ graph/passes/for_pass.cc \ graph/passes/enter_pass.cc \ diff --git a/ge/ge_runner.mk b/ge/ge_runner.mk index 5a99dc8c..9dcac211 100644 --- a/ge/ge_runner.mk +++ b/ge/ge_runner.mk @@ -127,7 +127,9 @@ LIBGE_LOCAL_SRC_FILES := \ graph/passes/compile_nodes_pass.cc \ graph/passes/constant_folding_pass.cc \ graph/passes/constant_fuse_same_pass.cc \ + graph/passes/fuse_data_nodes_with_common_input_pass.cc \ graph/passes/remove_same_const_pass.cc \ + graph/passes/no_data_out_const_elimination_pass.cc \ graph/passes/useless_control_out_remove_pass.cc \ graph/passes/control_trigger_pass.cc \ graph/passes/dimension_adjust_pass.cc \ diff --git a/ge/graph/load/new_model_manager/zero_copy_offset.h b/ge/graph/load/new_model_manager/zero_copy_offset.h index 8ead742d..66fcd887 100644 --- a/ge/graph/load/new_model_manager/zero_copy_offset.h +++ b/ge/graph/load/new_model_manager/zero_copy_offset.h @@ -65,7 +65,7 @@ class ZeroCopyOffset { // data_size of Data/Netoutput int64_t GetDataSize() const { return data_size_; } // value of *outside_addrs_ from davinci_model - std::vector>> &GetOutsideAddrs() { return outside_addrs_; } + const std::vector>> &GetOutsideAddrs() { return outside_addrs_; } // name of op std::string GetOpName() const { return op_name_; } diff --git a/ge/graph/manager/graph_manager.cc b/ge/graph/manager/graph_manager.cc index aec811e4..ae516a8f 100755 --- a/ge/graph/manager/graph_manager.cc +++ b/ge/graph/manager/graph_manager.cc @@ -53,6 +53,7 @@ #include "graph/passes/dimension_adjust_pass.h" #include "graph/passes/dimension_compute_pass.h" #include "graph/passes/flow_ctrl_pass.h" +#include "graph/passes/fuse_data_nodes_with_common_input_pass.h" #include "graph/passes/identity_pass.h" #include "graph/passes/input_output_connection_identify_pass.h" #include "graph/passes/iterator_op_pass.h" @@ -70,6 +71,7 @@ #include "graph/passes/remove_same_const_pass.h" #include "graph/passes/reshape_recovery_pass.h" #include "graph/passes/reshape_remove_pass.h" +#include "graph/passes/no_data_out_const_elimination_pass.h" #include "graph/passes/same_transdata_breadth_fusion_pass.h" #include "graph/passes/subgraph_pass.h" #include "graph/passes/switch_data_edges_bypass.h" @@ -2104,6 +2106,24 @@ Status GraphManager::OptimizeStage1(ge::ComputeGraphPtr &compute_graph) { after_merge_passes.AddPass("OptimizeStage1_1::SwitchDataEdgesBypass", new (std::nothrow) SwitchDataEdgesBypass)); GE_CHK_STATUS_RET( after_merge_passes.AddPass("OptimizeStage1_1::ConstantFuseSamePass", new (std::nothrow) ConstantFuseSamePass)); + /* + * Do CSE before FuseDataNodesWithCommonInputPass to resolve the scene in bertlarge as following: + * const + * / | \ + * cast1 cast2 cast3 + * \ | / + * case + * the node `const` is the fused const node after ConstantFuseSamePass + * the nodes `cast1`, `cast2` and 'cast3' will be fused by CSE. + * in order to eliminate hard code in FuseDataNodesWithCommonInputPass, + * we do CSE before FuseDataNodesWithCommonInputPass + * But it is a temp solution, this CSE will be deleted after change pass from graph pass to node pass + */ + GE_CHK_STATUS_RET(after_merge_passes.AddPass("OptimizeStage1_1::CSEBeforeFuseDataNodesWithCommonInputPass", + new (std::nothrow) CommonSubexpressionEliminationPass)); + // FuseDataNodesWithCommonInputPass: fuse same data with common input in same graph + GE_CHK_STATUS_RET(after_merge_passes.AddPass("OptimizeStage1_1::FuseDataNodesWithCommonInputPass", + new (std::nothrow) FuseDataNodesWithCommonInputPass)); GE_CHK_STATUS_RET(after_merge_passes.AddPass("OptimizeStage1_1::CommonSubexpressionEliminationPass", new (std::nothrow) CommonSubexpressionEliminationPass)); GE_CHK_STATUS_RET(after_merge_passes.AddPass("OptimizeStage1_1::PermutePass", new (std::nothrow) PermutePass)) @@ -2226,12 +2246,14 @@ Status GraphManager::OptimizeStage1(ge::ComputeGraphPtr &compute_graph) { GELOGE(ret, "Run passes when OptimizeStage1_3 failed, ret:%u.", ret); return ret; } - NamesToPass identity_remove_pass; - GE_TIMESTAMP_START(identity_remove_pass); + NamesToPass node_pass; + GE_TIMESTAMP_START(node_pass); IdentityPass identity_force_pass(false); // after SwitchToStreamSwitchPass - identity_remove_pass.emplace_back("IdentityPass", &identity_force_pass); - ret = GEPass(compute_graph).Run(identity_remove_pass); - GE_TIMESTAMP_END(identity_remove_pass, "GraphPrepare::IdentityRemovePass"); + NoDataOutConstEliminationPass no_data_out_const_elimination_pass; + node_pass.emplace_back("IdentityPass", &identity_force_pass); + node_pass.emplace_back("NoDataOutConstEliminationPass", &no_data_out_const_elimination_pass); + ret = GEPass(compute_graph).Run(node_pass); + GE_TIMESTAMP_END(node_pass, "GraphPrepare::node_pass"); if (ret != SUCCESS) { GELOGE(ret, "Run identity remove pass for preprocess failed, ret:%u.", ret); return ret; diff --git a/ge/graph/passes/fuse_data_nodes_with_common_input_pass.cc b/ge/graph/passes/fuse_data_nodes_with_common_input_pass.cc new file mode 100644 index 00000000..ab8fc39b --- /dev/null +++ b/ge/graph/passes/fuse_data_nodes_with_common_input_pass.cc @@ -0,0 +1,119 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "graph/passes/fuse_data_nodes_with_common_input_pass.h" + +#include +#include +#include +#include +#include +#include "common/ge_inner_error_codes.h" +#include "graph/utils/op_desc_utils.h" +#include "graph/utils/type_utils.h" +#include "graph/utils/node_utils.h" + +using std::map; +using std::vector; +using std::set; +using std::string; + +namespace ge { +Status FuseDataNodesWithCommonInputPass::Run(ge::ComputeGraphPtr graph) { + if (graph == nullptr) { + GELOGE(GE_GRAPH_PARAM_NULLPTR, "Compute graph is null."); + return GE_GRAPH_PARAM_NULLPTR; + } + GELOGD("FuseDataNodesWithCommonInputPass in."); + // key: subgraph, value:--key: peer out anchor to parent node, --value: parent indexes to parent node + map>> subgraphs_to_need_fuse_nodes_info; + if (InitNeedFuseNodesInfo(graph, subgraphs_to_need_fuse_nodes_info) != SUCCESS) { + GELOGE(FAILED, "InitNeedFuseNodesInfo failed."); + return FAILED; + } + return FuseDataNodes(subgraphs_to_need_fuse_nodes_info); +} + +Status FuseDataNodesWithCommonInputPass::InitNeedFuseNodesInfo(ComputeGraphPtr &graph, + map>> &subgraphs_to_need_fuse_nodes_info) { + for (const auto &subgraph : graph->GetAllSubgraphs()) { + GE_CHECK_NOTNULL(subgraph); + auto parent_node = subgraph->GetParentNode(); + GE_CHECK_NOTNULL(parent_node); + if (parent_node->GetType() == CASE || parent_node->GetType() == IF) { + auto &peer_out_anchors_to_parent_indexes = subgraphs_to_need_fuse_nodes_info[subgraph]; + for (const auto &in_data_anchor : parent_node->GetAllInDataAnchors()) { + GE_CHECK_NOTNULL(in_data_anchor); + OutDataAnchorPtr peer_out_anchor = in_data_anchor->GetPeerOutAnchor(); + uint32_t parent_index = static_cast(in_data_anchor->GetIdx()); + GE_CHECK_NOTNULL(peer_out_anchor); + peer_out_anchors_to_parent_indexes[peer_out_anchor].insert(parent_index); + GELOGD("Peer node %s is the %d input of parent node %s in %s.", + peer_out_anchor->GetOwnerNode()->GetName().c_str(), parent_index, parent_node->GetName().c_str(), + subgraph->GetName().c_str()); + } + } + } + return SUCCESS; +} + +Status FuseDataNodesWithCommonInputPass::FuseDataNodes( + const map>> &subgraphs_to_need_fuse_nodes_info) { + for (const auto &subgraph_to_need_fuse_nodes_info : subgraphs_to_need_fuse_nodes_info) { + auto subgraph = subgraph_to_need_fuse_nodes_info.first; + for (const auto &peer_out_anchors_to_parent_indexes : subgraph_to_need_fuse_nodes_info.second) { + if (peer_out_anchors_to_parent_indexes.second.size() <= 1) { + continue; + } + // key: out anchor, value: data nodes with common input will be fused + map> peer_out_anchors_to_need_fuse_nodes; + for (const auto &node : subgraph->GetDirectNode()) { + if (node->GetType() != DATA) { + continue; + } + GE_CHECK_NOTNULL(node->GetOpDesc()); + uint32_t parent_index = 0; + if (AttrUtils::GetInt(node->GetOpDesc(), ATTR_NAME_PARENT_NODE_INDEX, parent_index)) { + if (peer_out_anchors_to_parent_indexes.second.count(parent_index) > 0) { + peer_out_anchors_to_need_fuse_nodes[peer_out_anchors_to_parent_indexes.first].emplace_back(node); + } + } + } + for (const auto &peer_out_anchor_to_need_fuse_nodes : peer_out_anchors_to_need_fuse_nodes) { + auto need_fuse_data_nodes = peer_out_anchor_to_need_fuse_nodes.second; + auto first_node = need_fuse_data_nodes.at(0); + for (size_t i = 1; i < need_fuse_data_nodes.size(); ++i) { + auto node = need_fuse_data_nodes.at(i); + GELOGI("Replace redundant data node %s by %s exist in graph: %s.", node->GetName().c_str(), + first_node->GetName().c_str(), subgraph->GetName().c_str()); + // the data node which can be fused has none input(both data and control in) + if (GraphUtils::MoveOutCtrlEdges(node, first_node) != SUCCESS) { + return FAILED; + } + if (GraphUtils::ReplaceNodeDataAnchors(first_node, node, {}, {0}) != SUCCESS) { + return FAILED; + } + if (GraphUtils::RemoveNodeWithoutRelink(subgraph, node) != SUCCESS) { + GELOGE(FAILED, "[%s] RemoveNodeWithoutRelink failed.", node->GetName().c_str()); + return FAILED; + } + } + } + } + } + return SUCCESS; +} +} // namespace ge diff --git a/ge/graph/passes/fuse_data_nodes_with_common_input_pass.h b/ge/graph/passes/fuse_data_nodes_with_common_input_pass.h new file mode 100755 index 00000000..9ff6ab89 --- /dev/null +++ b/ge/graph/passes/fuse_data_nodes_with_common_input_pass.h @@ -0,0 +1,38 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef GE_GRAPH_PASSES_FUSE_DATA_NODES_WITH_COMMON_INPUT_PASS_H_ +#define GE_GRAPH_PASSES_FUSE_DATA_NODES_WITH_COMMON_INPUT_PASS_H_ + +#include +#include +#include +#include "graph/types.h" +#include "inc/graph_pass.h" + +namespace ge { +class FuseDataNodesWithCommonInputPass : public GraphPass { + public: + Status Run(ge::ComputeGraphPtr graph) override; + + private: + Status InitNeedFuseNodesInfo(ComputeGraphPtr &graph, + map>> &subgraphs_to_need_fuse_nodes_info); + Status FuseDataNodes( + const map>> &subgraphs_to_need_fuse_nodes_info); +}; +} // namespace ge +#endif // GE_GRAPH_PASSES_FUSE_DATA_NODES_WITH_COMMON_INPUT_PASS_H_ diff --git a/ge/graph/passes/no_data_out_const_elimination_pass.cc b/ge/graph/passes/no_data_out_const_elimination_pass.cc new file mode 100644 index 00000000..c55148bd --- /dev/null +++ b/ge/graph/passes/no_data_out_const_elimination_pass.cc @@ -0,0 +1,36 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "graph/passes/no_data_out_const_elimination_pass.h" + +namespace ge { +Status NoDataOutConstEliminationPass::Run(NodePtr &node) { + GE_CHECK_NOTNULL(node); + GELOGD("RemoveConstWithoutDataPass running of %s.", node->GetName().c_str()); + if (node->GetType() == CONSTANT || node->GetType() == CONSTANTOP) { + GE_CHECK_NOTNULL(node->GetOpDesc()); + // delete const which has no input and no output of data + if (node->GetOpDesc()->GetInputsSize() == 0 && node->GetOutDataNodes().size() == 0) { + GELOGI("Remove const %s.", node->GetName().c_str()); + if (IsolateAndDeleteNode(node, {}) != SUCCESS) { + GELOGE(FAILED, "IsolateAndDeleteNode %s failed.", node->GetName().c_str()); + return FAILED; + } + } + } + return SUCCESS; +} +} // namespace ge diff --git a/ge/graph/passes/no_data_out_const_elimination_pass.h b/ge/graph/passes/no_data_out_const_elimination_pass.h new file mode 100644 index 00000000..112c4867 --- /dev/null +++ b/ge/graph/passes/no_data_out_const_elimination_pass.h @@ -0,0 +1,31 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef GE_GRAPH_PASSES_REMOVE_CONST_WITHOUT_DATA_PASS_H_ +#define GE_GRAPH_PASSES_REMOVE_CONST_WITHOUT_DATA_PASS_H_ + +#include "graph/passes/base_pass.h" +#include "framework/common/debug/ge_log.h" +#include "framework/common/util.h" + +namespace ge { +class NoDataOutConstEliminationPass : public BaseNodePass { + public: + Status Run(ge::NodePtr &node) override; +}; +} // namespace ge + +#endif // GE_GRAPH_PASSES_REMOVE_CONST_WITHOUT_DATA_PASS_H_ diff --git a/tests/ut/ge/CMakeLists.txt b/tests/ut/ge/CMakeLists.txt index 2ebe9fc9..0d4f6a66 100755 --- a/tests/ut/ge/CMakeLists.txt +++ b/tests/ut/ge/CMakeLists.txt @@ -178,6 +178,8 @@ set(COMMON_SRC_FILES "${GE_CODE_DIR}/ge/graph/passes/net_output_pass.cc" "${GE_CODE_DIR}/ge/graph/passes/replace_transshape_pass.cc" "${GE_CODE_DIR}/ge/graph/passes/constant_fuse_same_pass.cc" + "${GE_CODE_DIR}/ge/graph/passes/fuse_data_nodes_with_common_input_pass.cc" + "${GE_CODE_DIR}/ge/graph/passes/no_data_out_const_elimination_pass.cc" "${GE_CODE_DIR}/ge/graph/passes/print_op_pass.cc" "${GE_CODE_DIR}/ge/graph/passes/no_use_reshape_remove_pass.cc" "${GE_CODE_DIR}/ge/graph/passes/iterator_op_pass.cc" @@ -616,6 +618,8 @@ set(PASS_TEST_FILES "graph/passes/trans_op_depth_fusion_pass_unittest.cc" "graph/passes/transop_nearby_allreduce_fusion_pass_unittest.cc" "graph/passes/constant_folding_pass_unittest.cc" + "graph/passes/fuse_data_nodes_with_common_input_pass_unittest.cc" + "graph/passes/no_data_out_const_elimination_pass_unittest.cc" "graph/passes/stop_gradient_pass_unittest.cc" "graph/passes/prevent_gradient_pass_unittest.cc" "graph/passes/identity_pass_unittest.cc" diff --git a/tests/ut/ge/graph/passes/fuse_data_nodes_with_common_input_pass_unittest.cc b/tests/ut/ge/graph/passes/fuse_data_nodes_with_common_input_pass_unittest.cc new file mode 100644 index 00000000..1660b3c6 --- /dev/null +++ b/tests/ut/ge/graph/passes/fuse_data_nodes_with_common_input_pass_unittest.cc @@ -0,0 +1,156 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "graph/passes/fuse_data_nodes_with_common_input_pass.h" + +#include +#include +#include +#include + +#include "inc/pass_manager.h" +#include "common/ge_inner_error_codes.h" +#include "graph_builder_utils.h" +#include "graph/utils/tensor_utils.h" +#include "graph/utils/op_desc_utils.h" +#include "graph/utils/type_utils.h" +#include "graph/utils/node_utils.h" + +namespace ge { + +class UtestFuseDataNodesWithCommonInputPass : public testing::Test { +protected: + void SetUp() {} + void TearDown() {} + +public: + NodePtr MakeNode(const ComputeGraphPtr &graph, uint32_t in_num, uint32_t out_num, string name, string type) { + GeTensorDesc test_desc(GeShape(), FORMAT_NCHW, DT_FLOAT); + auto op_desc = std::make_shared(name, type); + for (auto i = 0; i < in_num; ++i) { + op_desc->AddInputDesc(test_desc); + } + for (auto i = 0; i < out_num; ++i) { + op_desc->AddOutputDesc(test_desc); + } + return graph->AddNode(op_desc); + } +}; + +/// graph with subgraph +/// const +/// | | | +/// case +/// | +/// netoutput +/// ... +/// data0 data1 data2 +/// | \ / +/// conv add +TEST_F(UtestFuseDataNodesWithCommonInputPass, graph_with_subgraph1) { + PassManager pass_manager; + pass_manager.AddPass("FuseDataNodesWithCommonInputPass", new (std::nothrow) FuseDataNodesWithCommonInputPass); + ComputeGraphPtr parent_graph = std::make_shared("parent_graph"); + auto parent_const = MakeNode(parent_graph, 0, 1, "parent_const", "Const"); + auto parent_case = MakeNode(parent_graph, 3, 1, "parent_case", "Case"); + auto parent_output = MakeNode(parent_graph, 1, 0, "parent_output", "NetOutput"); + + GeTensorDesc tensor_desc(GeShape({1,3,224,224}), FORMAT_NCHW, DT_FLOAT); + + parent_const->GetOpDesc()->UpdateOutputDesc(0, tensor_desc); + parent_case->GetOpDesc()->UpdateInputDesc(0, tensor_desc); + parent_case->GetOpDesc()->UpdateInputDesc(1, tensor_desc); + parent_case->GetOpDesc()->UpdateInputDesc(2, tensor_desc); + parent_case->GetOpDesc()->UpdateOutputDesc(0, tensor_desc); + + GraphUtils::AddEdge(parent_const->GetOutDataAnchor(0), parent_case->GetInDataAnchor(0)); + GraphUtils::AddEdge(parent_const->GetOutDataAnchor(0), parent_case->GetInDataAnchor(1)); + GraphUtils::AddEdge(parent_const->GetOutDataAnchor(0), parent_case->GetInDataAnchor(2)); + GraphUtils::AddEdge(parent_case->GetOutDataAnchor(0), parent_output->GetInDataAnchor(0)); + + ComputeGraphPtr sub_graph = std::make_shared("sub_graph"); + auto data0 = MakeNode(parent_graph, 1, 1, "data0", "Data"); + data0->GetOpDesc()->UpdateInputDesc(0, tensor_desc); + data0->GetOpDesc()->UpdateOutputDesc(0, tensor_desc); + auto data1 = MakeNode(parent_graph, 1, 1, "data1", "Data"); + data1->GetOpDesc()->UpdateInputDesc(0, tensor_desc); + data1->GetOpDesc()->UpdateOutputDesc(0, tensor_desc); + auto data2 = MakeNode(parent_graph, 1, 1, "data2", "Data"); + data2->GetOpDesc()->UpdateInputDesc(0, tensor_desc); + data2->GetOpDesc()->UpdateOutputDesc(0, tensor_desc); + (void)AttrUtils::SetInt(data0->GetOpDesc(), ATTR_NAME_PARENT_NODE_INDEX, 0); + (void)AttrUtils::SetInt(data1->GetOpDesc(), ATTR_NAME_PARENT_NODE_INDEX, 1); + (void)AttrUtils::SetInt(data2->GetOpDesc(), ATTR_NAME_PARENT_NODE_INDEX, 2); + + sub_graph->SetParentNode(parent_case); + sub_graph->SetParentGraph(parent_graph); + EXPECT_EQ(pass_manager.Run(sub_graph), SUCCESS); +} + +/// graph with subgraph +/// const +/// / \ +/// cast1 cast2 +/// \ / +/// case +/// | +/// netoutput +/// ... +/// data1 data2 +/// \ / +/// add +TEST_F(UtestFuseDataNodesWithCommonInputPass, graph_with_subgraph2) { + PassManager pass_manager; + pass_manager.AddPass("FuseDataNodesWithCommonInputPass", new (std::nothrow) FuseDataNodesWithCommonInputPass); + ComputeGraphPtr parent_graph = std::make_shared("parent_graph"); + auto parent_const = MakeNode(parent_graph, 0, 1, "parent_const", "Const"); + auto parent_cast1 = MakeNode(parent_graph, 1, 1, "parent_cast1", "Cast"); + auto parent_cast2 = MakeNode(parent_graph, 1, 1, "parent_cast2", "Cast"); + auto parent_case = MakeNode(parent_graph, 2, 1, "parent_case", "Case"); + auto parent_output = MakeNode(parent_graph, 1, 0, "parent_output", "NetOutput"); + + GeTensorDesc tensor_desc(GeShape({1,3,224,224}), FORMAT_NCHW, DT_FLOAT); + + parent_const->GetOpDesc()->UpdateOutputDesc(0, tensor_desc); + parent_cast1->GetOpDesc()->UpdateInputDesc(0, tensor_desc); + parent_cast1->GetOpDesc()->UpdateOutputDesc(0, tensor_desc); + parent_cast2->GetOpDesc()->UpdateInputDesc(0, tensor_desc); + parent_cast2->GetOpDesc()->UpdateOutputDesc(0, tensor_desc); + parent_case->GetOpDesc()->UpdateInputDesc(0, tensor_desc); + parent_case->GetOpDesc()->UpdateInputDesc(1, tensor_desc); + parent_case->GetOpDesc()->UpdateOutputDesc(0, tensor_desc); + + GraphUtils::AddEdge(parent_const->GetOutDataAnchor(0), parent_cast1->GetInDataAnchor(0)); + GraphUtils::AddEdge(parent_cast1->GetOutDataAnchor(0), parent_case->GetInDataAnchor(0)); + GraphUtils::AddEdge(parent_const->GetOutDataAnchor(0), parent_cast2->GetInDataAnchor(0)); + GraphUtils::AddEdge(parent_cast2->GetOutDataAnchor(0), parent_case->GetInDataAnchor(1)); + GraphUtils::AddEdge(parent_case->GetOutDataAnchor(0), parent_output->GetInDataAnchor(0)); + + ComputeGraphPtr sub_graph = std::make_shared("sub_graph"); + auto data0 = MakeNode(parent_graph, 1, 1, "data0", "Data"); + data0->GetOpDesc()->UpdateInputDesc(0, tensor_desc); + data0->GetOpDesc()->UpdateOutputDesc(0, tensor_desc); + auto data1 = MakeNode(parent_graph, 1, 1, "data1", "Data"); + data1->GetOpDesc()->UpdateInputDesc(0, tensor_desc); + data1->GetOpDesc()->UpdateOutputDesc(0, tensor_desc); + (void)AttrUtils::SetInt(data0->GetOpDesc(), ATTR_NAME_PARENT_NODE_INDEX, 0); + (void)AttrUtils::SetInt(data1->GetOpDesc(), ATTR_NAME_PARENT_NODE_INDEX, 1); + + sub_graph->SetParentNode(parent_case); + sub_graph->SetParentGraph(parent_graph); + EXPECT_EQ(pass_manager.Run(sub_graph), SUCCESS); +} +} // namespace ge diff --git a/tests/ut/ge/graph/passes/no_data_out_const_elimination_pass_unittest.cc b/tests/ut/ge/graph/passes/no_data_out_const_elimination_pass_unittest.cc new file mode 100644 index 00000000..c102f5c2 --- /dev/null +++ b/tests/ut/ge/graph/passes/no_data_out_const_elimination_pass_unittest.cc @@ -0,0 +1,75 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "graph/passes/no_data_out_const_elimination_pass.h" + +#include +#include +#include +#include + +#include "common/ge_inner_error_codes.h" +#include "graph/utils/graph_utils.h" + +namespace ge { + +class UtestNoDataOutConstEliminationPass : public testing::Test { +protected: + void SetUp() {} + void TearDown() {} + +public: + NodePtr MakeNode(const ComputeGraphPtr &graph, uint32_t in_num, uint32_t out_num, string name, string type) { + GeTensorDesc test_desc(GeShape(), FORMAT_NCHW, DT_FLOAT); + auto op_desc = std::make_shared(name, type); + for (auto i = 0; i < in_num; ++i) { + op_desc->AddInputDesc(test_desc); + } + for (auto i = 0; i < out_num; ++i) { + op_desc->AddOutputDesc(test_desc); + } + return graph->AddNode(op_desc); + } +}; + +/// graph with subgraph +/// const1 +/// |(control) +/// const2 +/// | +/// output +TEST_F(UtestNoDataOutConstEliminationPass, succ_graph1) { + ComputeGraphPtr graph = std::make_shared("test"); + auto const_node1 = MakeNode(graph, 0, 1, "const_node1", "Const"); + auto const_node2 = MakeNode(graph, 1, 1, "const_node2", "Const"); + auto output_node = MakeNode(graph, 1, 0, "output_node", "NetOutput"); + GeTensorDesc tensor_desc(GeShape({1,3,224,224}), FORMAT_NCHW, DT_FLOAT); + + const_node1->GetOpDesc()->UpdateOutputDesc(0, tensor_desc); + const_node2->GetOpDesc()->UpdateInputDesc(0, tensor_desc); + const_node2->GetOpDesc()->UpdateOutputDesc(0, tensor_desc); + output_node->GetOpDesc()->UpdateInputDesc(0, tensor_desc); + + GraphUtils::AddEdge(const_node1->GetOutControlAnchor(), const_node2->GetInControlAnchor()); + GraphUtils::AddEdge(const_node2->GetOutDataAnchor(0), output_node->GetInDataAnchor(0)); + + GEPass pass(graph); + NamesToPass node_pass; + NoDataOutConstEliminationPass no_data_out_const_elimination_pass; + node_pass.emplace_back("NoDataOutConstEliminationPass", &no_data_out_const_elimination_pass); + EXPECT_EQ(pass.Run(node_pass), SUCCESS); +} +} // namespace ge From 8ad9ea921a355ed07b013cd1763a530b33426720 Mon Sep 17 00:00:00 2001 From: zhou_lili Date: Wed, 13 Jan 2021 11:17:09 +0800 Subject: [PATCH 13/41] add check of ut --- ...data_nodes_with_common_input_pass_unittest.cc | 16 ++++++++++++++-- ...o_data_out_const_elimination_pass_unittest.cc | 7 +++++++ 2 files changed, 21 insertions(+), 2 deletions(-) diff --git a/tests/ut/ge/graph/passes/fuse_data_nodes_with_common_input_pass_unittest.cc b/tests/ut/ge/graph/passes/fuse_data_nodes_with_common_input_pass_unittest.cc index 1660b3c6..aa69f6a3 100644 --- a/tests/ut/ge/graph/passes/fuse_data_nodes_with_common_input_pass_unittest.cc +++ b/tests/ut/ge/graph/passes/fuse_data_nodes_with_common_input_pass_unittest.cc @@ -58,8 +58,6 @@ public: /// netoutput /// ... /// data0 data1 data2 -/// | \ / -/// conv add TEST_F(UtestFuseDataNodesWithCommonInputPass, graph_with_subgraph1) { PassManager pass_manager; pass_manager.AddPass("FuseDataNodesWithCommonInputPass", new (std::nothrow) FuseDataNodesWithCommonInputPass); @@ -81,6 +79,11 @@ TEST_F(UtestFuseDataNodesWithCommonInputPass, graph_with_subgraph1) { GraphUtils::AddEdge(parent_const->GetOutDataAnchor(0), parent_case->GetInDataAnchor(2)); GraphUtils::AddEdge(parent_case->GetOutDataAnchor(0), parent_output->GetInDataAnchor(0)); + auto case_node = parent_graph->FindNode("parent_case"); + EXPECT_NE(case_node, nullptr); + size_t input_data_node_num = case_node->GetInDataNodes().size(); + EXPECT_EQ(input_data_node_num, 3); + ComputeGraphPtr sub_graph = std::make_shared("sub_graph"); auto data0 = MakeNode(parent_graph, 1, 1, "data0", "Data"); data0->GetOpDesc()->UpdateInputDesc(0, tensor_desc); @@ -98,6 +101,12 @@ TEST_F(UtestFuseDataNodesWithCommonInputPass, graph_with_subgraph1) { sub_graph->SetParentNode(parent_case); sub_graph->SetParentGraph(parent_graph); EXPECT_EQ(pass_manager.Run(sub_graph), SUCCESS); + // after pass, data1 and data2 are fused to data0 + auto data1_node = sub_graph->FindNode("data1"); + EXPECT_EQ(data1_node, nullptr); + auto data2_node = sub_graph->FindNode("data2"); + EXPECT_EQ(data2_node, nullptr); + } /// graph with subgraph @@ -152,5 +161,8 @@ TEST_F(UtestFuseDataNodesWithCommonInputPass, graph_with_subgraph2) { sub_graph->SetParentNode(parent_case); sub_graph->SetParentGraph(parent_graph); EXPECT_EQ(pass_manager.Run(sub_graph), SUCCESS); + // after pass, data1 is fused to data0 + auto data1_node = sub_graph->FindNode("data1"); + EXPECT_EQ(data1_node, nullptr); } } // namespace ge diff --git a/tests/ut/ge/graph/passes/no_data_out_const_elimination_pass_unittest.cc b/tests/ut/ge/graph/passes/no_data_out_const_elimination_pass_unittest.cc index c102f5c2..2fa80e2f 100644 --- a/tests/ut/ge/graph/passes/no_data_out_const_elimination_pass_unittest.cc +++ b/tests/ut/ge/graph/passes/no_data_out_const_elimination_pass_unittest.cc @@ -70,6 +70,13 @@ TEST_F(UtestNoDataOutConstEliminationPass, succ_graph1) { NamesToPass node_pass; NoDataOutConstEliminationPass no_data_out_const_elimination_pass; node_pass.emplace_back("NoDataOutConstEliminationPass", &no_data_out_const_elimination_pass); + auto const1 = graph->FindNode("const_node1"); + EXPECT_NE(const1, nullptr); + EXPECT_TRUE(const1->GetInDataNodes().empty()); + EXPECT_TRUE(const1->GetOutDataNodes().empty()); EXPECT_EQ(pass.Run(node_pass), SUCCESS); + // after pass, const1 will be delete + const1 = graph->FindNode("const_node1"); + EXPECT_EQ(const1, nullptr); } } // namespace ge From 0db227b67f74685261223478be8a486cfc77c5ab Mon Sep 17 00:00:00 2001 From: zhou_lili Date: Wed, 13 Jan 2021 15:03:20 +0800 Subject: [PATCH 14/41] add check of ut --- ...a_nodes_with_common_input_pass_unittest.cc | 48 ++++++++++++------- .../passes/multi_batch_clone_pass_unittest.cc | 3 ++ 2 files changed, 34 insertions(+), 17 deletions(-) diff --git a/tests/ut/ge/graph/passes/fuse_data_nodes_with_common_input_pass_unittest.cc b/tests/ut/ge/graph/passes/fuse_data_nodes_with_common_input_pass_unittest.cc index aa69f6a3..8c3469c8 100644 --- a/tests/ut/ge/graph/passes/fuse_data_nodes_with_common_input_pass_unittest.cc +++ b/tests/ut/ge/graph/passes/fuse_data_nodes_with_common_input_pass_unittest.cc @@ -85,13 +85,13 @@ TEST_F(UtestFuseDataNodesWithCommonInputPass, graph_with_subgraph1) { EXPECT_EQ(input_data_node_num, 3); ComputeGraphPtr sub_graph = std::make_shared("sub_graph"); - auto data0 = MakeNode(parent_graph, 1, 1, "data0", "Data"); + auto data0 = MakeNode(sub_graph, 1, 1, "data0", "Data"); data0->GetOpDesc()->UpdateInputDesc(0, tensor_desc); data0->GetOpDesc()->UpdateOutputDesc(0, tensor_desc); - auto data1 = MakeNode(parent_graph, 1, 1, "data1", "Data"); + auto data1 = MakeNode(sub_graph, 1, 1, "data1", "Data"); data1->GetOpDesc()->UpdateInputDesc(0, tensor_desc); data1->GetOpDesc()->UpdateOutputDesc(0, tensor_desc); - auto data2 = MakeNode(parent_graph, 1, 1, "data2", "Data"); + auto data2 = MakeNode(sub_graph, 1, 1, "data2", "Data"); data2->GetOpDesc()->UpdateInputDesc(0, tensor_desc); data2->GetOpDesc()->UpdateOutputDesc(0, tensor_desc); (void)AttrUtils::SetInt(data0->GetOpDesc(), ATTR_NAME_PARENT_NODE_INDEX, 0); @@ -100,19 +100,28 @@ TEST_F(UtestFuseDataNodesWithCommonInputPass, graph_with_subgraph1) { sub_graph->SetParentNode(parent_case); sub_graph->SetParentGraph(parent_graph); - EXPECT_EQ(pass_manager.Run(sub_graph), SUCCESS); - // after pass, data1 and data2 are fused to data0 + parent_graph->AddSubgraph(sub_graph->GetName(), sub_graph); + size_t sub_graph_num = parent_graph->GetAllSubgraphs().size(); + EXPECT_EQ(sub_graph_num, 1); + auto data1_node = sub_graph->FindNode("data1"); - EXPECT_EQ(data1_node, nullptr); + EXPECT_NE(data1_node, nullptr); auto data2_node = sub_graph->FindNode("data2"); - EXPECT_EQ(data2_node, nullptr); + EXPECT_NE(data2_node, nullptr); + EXPECT_EQ(pass_manager.Run(parent_graph), SUCCESS); + + // after pass, data1 and data2 are fused to data0 + data1_node = sub_graph->FindNode("data1"); + EXPECT_EQ(data1_node, nullptr); + data2_node = sub_graph->FindNode("data2"); + EXPECT_EQ(data2_node, nullptr); } /// graph with subgraph /// const /// / \ -/// cast1 cast2 +/// cast1 cast1 /// \ / /// case /// | @@ -127,7 +136,6 @@ TEST_F(UtestFuseDataNodesWithCommonInputPass, graph_with_subgraph2) { ComputeGraphPtr parent_graph = std::make_shared("parent_graph"); auto parent_const = MakeNode(parent_graph, 0, 1, "parent_const", "Const"); auto parent_cast1 = MakeNode(parent_graph, 1, 1, "parent_cast1", "Cast"); - auto parent_cast2 = MakeNode(parent_graph, 1, 1, "parent_cast2", "Cast"); auto parent_case = MakeNode(parent_graph, 2, 1, "parent_case", "Case"); auto parent_output = MakeNode(parent_graph, 1, 0, "parent_output", "NetOutput"); @@ -136,23 +144,21 @@ TEST_F(UtestFuseDataNodesWithCommonInputPass, graph_with_subgraph2) { parent_const->GetOpDesc()->UpdateOutputDesc(0, tensor_desc); parent_cast1->GetOpDesc()->UpdateInputDesc(0, tensor_desc); parent_cast1->GetOpDesc()->UpdateOutputDesc(0, tensor_desc); - parent_cast2->GetOpDesc()->UpdateInputDesc(0, tensor_desc); - parent_cast2->GetOpDesc()->UpdateOutputDesc(0, tensor_desc); parent_case->GetOpDesc()->UpdateInputDesc(0, tensor_desc); parent_case->GetOpDesc()->UpdateInputDesc(1, tensor_desc); parent_case->GetOpDesc()->UpdateOutputDesc(0, tensor_desc); GraphUtils::AddEdge(parent_const->GetOutDataAnchor(0), parent_cast1->GetInDataAnchor(0)); GraphUtils::AddEdge(parent_cast1->GetOutDataAnchor(0), parent_case->GetInDataAnchor(0)); - GraphUtils::AddEdge(parent_const->GetOutDataAnchor(0), parent_cast2->GetInDataAnchor(0)); - GraphUtils::AddEdge(parent_cast2->GetOutDataAnchor(0), parent_case->GetInDataAnchor(1)); + GraphUtils::AddEdge(parent_const->GetOutDataAnchor(0), parent_cast1->GetInDataAnchor(0)); + GraphUtils::AddEdge(parent_cast1->GetOutDataAnchor(0), parent_case->GetInDataAnchor(1)); GraphUtils::AddEdge(parent_case->GetOutDataAnchor(0), parent_output->GetInDataAnchor(0)); ComputeGraphPtr sub_graph = std::make_shared("sub_graph"); - auto data0 = MakeNode(parent_graph, 1, 1, "data0", "Data"); + auto data0 = MakeNode(sub_graph, 1, 1, "data0", "Data"); data0->GetOpDesc()->UpdateInputDesc(0, tensor_desc); data0->GetOpDesc()->UpdateOutputDesc(0, tensor_desc); - auto data1 = MakeNode(parent_graph, 1, 1, "data1", "Data"); + auto data1 = MakeNode(sub_graph, 1, 1, "data1", "Data"); data1->GetOpDesc()->UpdateInputDesc(0, tensor_desc); data1->GetOpDesc()->UpdateOutputDesc(0, tensor_desc); (void)AttrUtils::SetInt(data0->GetOpDesc(), ATTR_NAME_PARENT_NODE_INDEX, 0); @@ -160,9 +166,17 @@ TEST_F(UtestFuseDataNodesWithCommonInputPass, graph_with_subgraph2) { sub_graph->SetParentNode(parent_case); sub_graph->SetParentGraph(parent_graph); - EXPECT_EQ(pass_manager.Run(sub_graph), SUCCESS); - // after pass, data1 is fused to data0 + parent_graph->AddSubgraph(sub_graph->GetName(), sub_graph); + + size_t sub_graph_num = parent_graph->GetAllSubgraphs().size(); + EXPECT_EQ(sub_graph_num, 1); auto data1_node = sub_graph->FindNode("data1"); + EXPECT_NE(data1_node, nullptr); + + EXPECT_EQ(pass_manager.Run(parent_graph), SUCCESS); + + // after pass, data1 is fused to data0 + data1_node = sub_graph->FindNode("data1"); EXPECT_EQ(data1_node, nullptr); } } // namespace ge diff --git a/tests/ut/ge/graph/passes/multi_batch_clone_pass_unittest.cc b/tests/ut/ge/graph/passes/multi_batch_clone_pass_unittest.cc index b1cd6d4d..1b75a613 100644 --- a/tests/ut/ge/graph/passes/multi_batch_clone_pass_unittest.cc +++ b/tests/ut/ge/graph/passes/multi_batch_clone_pass_unittest.cc @@ -194,6 +194,9 @@ TEST_F(UtestMultiBatchClonePass, graph_with_subgraph) { auto func_node = MakeNode(owner, 3, 1, "test_if", "If"); graph->SetParentNode(func_node); graph->SetParentGraph(owner); + owner->AddSubgraph(graph->GetName(), graph); + size_t sub_graph_num = owner->GetAllSubgraphs().size(); + EXPECT_EQ(sub_graph_num, 1); EXPECT_EQ(pass_manager.Run(graph), SUCCESS); } From 0a719fc4b7727301d1e6f8126fa40a3bb6c2feea Mon Sep 17 00:00:00 2001 From: lichun Date: Wed, 13 Jan 2021 15:27:04 +0800 Subject: [PATCH 15/41] abandon using EXPERIMENTAL_DYNAMIC_PARTITION --- ge/graph/partition/dynamic_shape_partition.cc | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/ge/graph/partition/dynamic_shape_partition.cc b/ge/graph/partition/dynamic_shape_partition.cc index 81295c84..2ec501a8 100755 --- a/ge/graph/partition/dynamic_shape_partition.cc +++ b/ge/graph/partition/dynamic_shape_partition.cc @@ -56,6 +56,11 @@ static bool IsContainResourceOp(const ComputeGraphPtr &root_graph) { if (type == DT_STRING || type == DT_RESOURCE || type == DT_STRING_REF) { if (std::getenv("EXPERIMENTAL_DYNAMIC_PARTITION") == nullptr) { return false; + } else { + GELOGE(FAILED, "In dynamic shape scene, model contains data type:" + "DT_STRING/DT_RESOURCE/DT_STRING_REF may not be supported well " + "temporarily, please retry with \"unset EXPERIMENTAL_DYNAMIC_PARTITION\"."); + break; } } } @@ -64,6 +69,11 @@ static bool IsContainResourceOp(const ComputeGraphPtr &root_graph) { if (type == DT_STRING || type == DT_RESOURCE || type == DT_STRING_REF) { if (std::getenv("EXPERIMENTAL_DYNAMIC_PARTITION") == nullptr) { return false; + } else { + GELOGE(FAILED, "In dynamic shape scene, model contains data type:" + "DT_STRING/DT_RESOURCE/DT_STRING_REF may not be supported well " + "temporarily, please retry with \"unset EXPERIMENTAL_DYNAMIC_PARTITION\"."); + break; } } } From 4e391fe74180f05d82393e8e689840c04f5921c2 Mon Sep 17 00:00:00 2001 From: lichun Date: Wed, 13 Jan 2021 15:34:44 +0800 Subject: [PATCH 16/41] abandon using EXPERIMENTAL_DYNAMIC_PARTITION --- ge/graph/partition/dynamic_shape_partition.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ge/graph/partition/dynamic_shape_partition.cc b/ge/graph/partition/dynamic_shape_partition.cc index 2ec501a8..71a4b560 100755 --- a/ge/graph/partition/dynamic_shape_partition.cc +++ b/ge/graph/partition/dynamic_shape_partition.cc @@ -48,7 +48,7 @@ namespace ge { using Cluster = DynamicShapePartitioner::Cluster; using ClusterPtr = std::shared_ptr; -static bool IsContainResourceOp(const ComputeGraphPtr &root_graph) { +static bool IsInExperimentalMode(const ComputeGraphPtr &root_graph) { for (const auto &node : root_graph->GetAllNodes()) { GE_CHECK_NOTNULL(node->GetOpDesc()); for (const auto &input_desc : node->GetOpDesc()->GetAllInputsDesc()) { @@ -83,7 +83,7 @@ static bool IsContainResourceOp(const ComputeGraphPtr &root_graph) { Status DynamicShapePartitioner::Partition() { REQUIRE_NOT_NULL(root_graph_, "Graph is nullptr."); - if (!IsContainResourceOp(root_graph_)) { + if (!IsInExperimentalMode(root_graph_)) { GELOGD("Skip dynamic shape partition as not in experimental mode."); REQUIRE(AttrUtils::SetBool(*root_graph_, ATTR_NAME_DYNAMIC_SHAPE_PARTITIONED, false), "Failed set dynamic shape partitioned flag on root graph."); From de4c65bb94880c0e6f1ea0f21f82212fc360e838 Mon Sep 17 00:00:00 2001 From: lichun Date: Wed, 13 Jan 2021 16:17:43 +0800 Subject: [PATCH 17/41] abandon using EXPERIMENTAL_DYNAMIC_PARTITION --- ge/graph/partition/dynamic_shape_partition.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ge/graph/partition/dynamic_shape_partition.cc b/ge/graph/partition/dynamic_shape_partition.cc index 71a4b560..6c81b21f 100755 --- a/ge/graph/partition/dynamic_shape_partition.cc +++ b/ge/graph/partition/dynamic_shape_partition.cc @@ -57,7 +57,7 @@ static bool IsInExperimentalMode(const ComputeGraphPtr &root_graph) { if (std::getenv("EXPERIMENTAL_DYNAMIC_PARTITION") == nullptr) { return false; } else { - GELOGE(FAILED, "In dynamic shape scene, model contains data type:" + GEEVENT("In dynamic shape scene, model contains data type:" "DT_STRING/DT_RESOURCE/DT_STRING_REF may not be supported well " "temporarily, please retry with \"unset EXPERIMENTAL_DYNAMIC_PARTITION\"."); break; @@ -70,7 +70,7 @@ static bool IsInExperimentalMode(const ComputeGraphPtr &root_graph) { if (std::getenv("EXPERIMENTAL_DYNAMIC_PARTITION") == nullptr) { return false; } else { - GELOGE(FAILED, "In dynamic shape scene, model contains data type:" + GEEVENT("In dynamic shape scene, model contains data type:" "DT_STRING/DT_RESOURCE/DT_STRING_REF may not be supported well " "temporarily, please retry with \"unset EXPERIMENTAL_DYNAMIC_PARTITION\"."); break; From ebe407e79fd666f8c61cc23f99a7e4b31ac344e1 Mon Sep 17 00:00:00 2001 From: wangxiaotian22 Date: Thu, 14 Jan 2021 16:31:26 +0800 Subject: [PATCH 18/41] gensessionid add pid prefix --- .../load/new_model_manager/model_manager.cc | 18 ++++++++++---- tests/depends/mmpa/src/mmpa_stub.cc | 5 ++++ ...el_manager_model_manager_aicpu_unittest.cc | 24 +++++++++++++++++++ 3 files changed, 42 insertions(+), 5 deletions(-) diff --git a/ge/graph/load/new_model_manager/model_manager.cc b/ge/graph/load/new_model_manager/model_manager.cc index e73c0a36..edc60e50 100755 --- a/ge/graph/load/new_model_manager/model_manager.cc +++ b/ge/graph/load/new_model_manager/model_manager.cc @@ -18,6 +18,7 @@ #include +#include "mmpa/mmpa_api.h" #include "aicpu/aicpu_schedule/aicpu_op_type_list.h" #include "common/dump/dump_manager.h" #include "common/l2_cache_optimize.h" @@ -53,7 +54,6 @@ const char *const kBatchLoadBuf = "batchLoadsoFrombuf"; const char *const kDeleteCustOp = "deleteCustOp"; const int kTimeSpecNano = 1000000000; const int kTimeSpecMiro = 1000000; -const int kSessionMaxBias = 100; const int kOpNameMaxSize = 100; struct CustAicpuSoBuf { uint64_t kernelSoBuf; @@ -1023,6 +1023,12 @@ Status ModelManager::GetAippType(uint32_t model_id, uint32_t index, InputAippTyp } Status ModelManager::GenSessionId(uint64_t &session_id) { + const uint64_t kSessionTimeMask = 0xffffffffffff0000; + const uint64_t kSessionPidMask = 0x000000000000ff00; + const uint64_t kSessionBiasMask = 0x00000000000000ff; + + const uint64_t kMaskPerOffset = 8; + std::lock_guard lock(session_id_create_mutex_); mmTimeval tv; @@ -1030,12 +1036,14 @@ Status ModelManager::GenSessionId(uint64_t &session_id) { GELOGE(INTERNAL_ERROR, "Failed to get current time."); return INTERNAL_ERROR; } - session_id = static_cast(tv.tv_sec * kTimeSpecMiro + tv.tv_usec); // 1000000us + uint64_t timestamp = static_cast(tv.tv_sec * kTimeSpecMiro + tv.tv_usec); // 1000000us + + static uint32_t pid = mmGetPid(); session_id_bias_++; - // max bais 100. - session_id_bias_ = session_id_bias_ % kSessionMaxBias; - session_id = session_id * kSessionMaxBias + session_id_bias_; + + session_id = ((timestamp< #include +#include +#include #include "common/debug/log.h" #include "common/l2_cache_optimize.h" @@ -75,4 +77,26 @@ TEST_F(UtestModelManagerModelManagerAicpu, DestroyAicpuKernel) { // EXPECT_EQ(ge::FAILED, mm.LoadModelOffline(model_id, data, nullptr, nullptr)); } +// test GenSessionId +TEST_F(UtestModelManagerModelManagerAicpu, gen_session_id) { + ModelManager manager; + uint64_t session_id; + manager.GenSessionId(session_id); + + struct timeval tv; + gettimeofday(&tv, nullptr); + uint64_t timestamp = static_cast(tv.tv_sec * 1000000); + + const uint64_t kSessionTimeMask = 0xfffffff000000000; // 不比us + const uint64_t kSessionPidMask = 0x000000000000ff00; + const uint64_t kSessionBiasMask = 0x00000000000000ff; + + uint32_t pid = getpid(); + + EXPECT_EQ(1, kSessionBiasMask & session_id); + EXPECT_EQ(pid<<8 & kSessionPidMask, kSessionPidMask & session_id); + //EXPECT_EQ(timestamp<<16 & kSessionTimeMask, kSessionTimeMask & session_id); +} + + } // namespace ge From ad5bc1bdcce060e857b1c000bbf63460ca21e3b0 Mon Sep 17 00:00:00 2001 From: wangxiaotian22 Date: Fri, 15 Jan 2021 14:00:24 +0800 Subject: [PATCH 19/41] iterator case, control edge move up to switch --- ge/graph/build/stream_allocator.cc | 29 +++++++++++++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) diff --git a/ge/graph/build/stream_allocator.cc b/ge/graph/build/stream_allocator.cc index 63112ea8..88ffda02 100644 --- a/ge/graph/build/stream_allocator.cc +++ b/ge/graph/build/stream_allocator.cc @@ -1013,6 +1013,24 @@ bool StreamAllocator::IsActivated(int64_t stream_id) const { return false; } +// Iteraotor loop : +// StreamSwitch -> StreamActive +// FpBp loop: +// StreamSwitch -> AssignAdd -> StreamActive +NodePtr FindSwitchNodeBeforeLoopActiveNode(const NodePtr &active_node) { + for (auto pre_node : active_node->GetInControlNodes()) { + if (pre_node->GetType() == STREAMSWITCH) { + return pre_node; + } + for (auto pre_pre_node : pre_node->GetInControlNodes()) { + if (pre_pre_node->GetType() == STREAMSWITCH) { + return pre_pre_node; + } + } + } + return nullptr; +} + Status StreamAllocator::SetActiveStreamsForLoop() { vector loop_active_streams; for (int64_t stream_id = 0; stream_id < stream_num_; stream_id++) { @@ -1038,6 +1056,13 @@ Status StreamAllocator::SetActiveStreamsForLoop() { bool is_loop_active = false; if (AttrUtils::GetBool(node->GetOpDesc(), ATTR_NAME_IS_LOOP_ACTIVE, is_loop_active) && is_loop_active) { vector activated_label_list; + + NodePtr pre_switch_node = FindSwitchNodeBeforeLoopActiveNode(node); + if (pre_switch_node == nullptr) { + GELOGE(FAILED, "find switch node before loop active node %s failed", node->GetName().c_str()); + return FAILED; + } + if (!AttrUtils::GetListStr(node->GetOpDesc(), ATTR_NAME_ACTIVE_LABEL_LIST, activated_label_list) || activated_label_list.empty()) { GE_CHK_BOOL_EXEC(AttrUtils::SetListInt(node->GetOpDesc(), ATTR_NAME_ACTIVE_STREAM_LIST, loop_active_streams), @@ -1053,7 +1078,7 @@ Status StreamAllocator::SetActiveStreamsForLoop() { // it may cause some stream actived by iterator next step when this stream still alive. // If above situation happen, active message will lose, cause process block in next iteration. // In order to avoid this abnormal happen, - // add event between each last node and iterator active node in target active stream + // add event between each last node and iterator switch node GELOGI("there are %zu next iterator target streams has streamswitch node.", streams_skip_iterator_event.size()); for (auto iter : stream_id_to_last_node) { if (streams_skip_iterator_event.find(iter.first) != streams_skip_iterator_event.end()) { @@ -1067,7 +1092,7 @@ Status StreamAllocator::SetActiveStreamsForLoop() { continue; } AddSendEventId(iter.second, event_num_); - AddRecvEventId(node, event_num_); + AddRecvEventId(pre_switch_node, event_num_); event_num_++; } From 7d4f981f92ddd8ae33493697799535e9e7e6b6f8 Mon Sep 17 00:00:00 2001 From: zhangxiaokun Date: Fri, 15 Jan 2021 16:00:12 +0800 Subject: [PATCH 20/41] Fix aclmdlGetOutputNameByIndex --- ge/graph/load/new_model_manager/davinci_model.cc | 9 +++++---- ge/graph/load/new_model_manager/davinci_model.h | 1 - 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/ge/graph/load/new_model_manager/davinci_model.cc b/ge/graph/load/new_model_manager/davinci_model.cc index 35844b2d..cf2d9c5f 100755 --- a/ge/graph/load/new_model_manager/davinci_model.cc +++ b/ge/graph/load/new_model_manager/davinci_model.cc @@ -722,7 +722,6 @@ Status DavinciModel::Init(void *dev_ptr, size_t mem_size, void *weight_ptr, size /// the aicpu opertor needs to destroy history record, and update operator memory address. /// The model with specified aicpu operators is only marked here, and destruction is in ModelManager::ExecuteModel(). need_destroy_aicpu_kernel_ = IsAicpuKernelConnectSpecifiedLayer(); - (void)ge::AttrUtils::GetListStr(ge_model_, ATTR_MODEL_OUT_NODES_NAME, out_node_name_); string fp_ceiling_mode; if (ge::AttrUtils::GetStr(ge_model_, ATTR_FP_CEILING_MODE, fp_ceiling_mode)) { @@ -2068,6 +2067,8 @@ void DavinciModel::CreateOutput(uint32_t index, const OpDescPtr &op_desc, InputO Status DavinciModel::InitOutputDescInfo(const vector &output_op_list) { GELOGD("Output node size: %zu", output_op_list.size()); + vector out_node_name; + (void)ge::AttrUtils::GetListStr(ge_model_, ATTR_MODEL_OUT_NODES_NAME, out_node_name); for (const auto &op_desc : output_op_list) { uint32_t out_size = static_cast(op_desc->GetInputsSize()); for (uint32_t index = 0; index < out_size; index++) { @@ -2081,11 +2082,11 @@ Status DavinciModel::InitOutputDescInfo(const vector &output_op_list) GE_CHK_BOOL_RET_STATUS(src_name.size() > index && src_index.size() > index, INTERNAL_ERROR, "construct output_name failed."); // forward compatbility, if old om has no out_node_name, need to return output follow origin way - if (out_size == out_node_name_.size()) { + if (out_size == out_node_name.size()) { // neweast plan, the index will add to name during generate model. - bool contains_colon = out_node_name_[index].find(":") != std::string::npos; + bool contains_colon = out_node_name[index].find(":") != std::string::npos; output_name = - contains_colon ? out_node_name_[index] : out_node_name_[index] + ":" + std::to_string(src_index[index]); + contains_colon ? out_node_name[index] : out_node_name[index] + ":" + std::to_string(src_index[index]); } else { output_name = std::string("output_") + std::to_string(index) + "_" + src_name[index] + "_" + std::to_string(src_index[index]); diff --git a/ge/graph/load/new_model_manager/davinci_model.h b/ge/graph/load/new_model_manager/davinci_model.h index 4108f2c7..e9804dc5 100755 --- a/ge/graph/load/new_model_manager/davinci_model.h +++ b/ge/graph/load/new_model_manager/davinci_model.h @@ -883,7 +883,6 @@ class DavinciModel { GeModelPtr ge_model_; // release after DavinciModel::Init bool need_destroy_aicpu_kernel_{false}; - vector out_node_name_; map op_list_; // release after DavinciModel::Init From d75417c9d4b6b9f995d958c23b308d79ef7eb65b Mon Sep 17 00:00:00 2001 From: zhou_lili Date: Fri, 15 Jan 2021 17:00:20 +0800 Subject: [PATCH 21/41] action of remove const data has be done by subgraph_const_migration_pass.cc --- ge/CMakeLists.txt | 2 - ge/ge_inference.mk | 1 - ge/ge_runner.mk | 1 - ge/graph/manager/graph_manager.cc | 3 - .../no_data_out_const_elimination_pass.cc | 36 -------- .../no_data_out_const_elimination_pass.h | 31 ------- tests/ut/ge/CMakeLists.txt | 2 - ...ata_out_const_elimination_pass_unittest.cc | 82 ------------------- 8 files changed, 158 deletions(-) delete mode 100644 ge/graph/passes/no_data_out_const_elimination_pass.cc delete mode 100644 ge/graph/passes/no_data_out_const_elimination_pass.h delete mode 100644 tests/ut/ge/graph/passes/no_data_out_const_elimination_pass_unittest.cc diff --git a/ge/CMakeLists.txt b/ge/CMakeLists.txt index 436c30ea..a8eabf05 100755 --- a/ge/CMakeLists.txt +++ b/ge/CMakeLists.txt @@ -204,7 +204,6 @@ set(TRAIN_SRC_LIST "graph/passes/constant_fuse_same_pass.cc" "graph/passes/fuse_data_nodes_with_common_input_pass.cc" "graph/passes/remove_same_const_pass.cc" - "graph/passes/no_data_out_const_elimination_pass.cc" "graph/passes/useless_control_out_remove_pass.cc" "graph/passes/control_trigger_pass.cc" "graph/passes/dimension_adjust_pass.cc" @@ -583,7 +582,6 @@ set(INFER_SRC_LIST "graph/passes/addn_pass.cc" "graph/passes/common_subexpression_elimination_pass.cc" "graph/passes/remove_same_const_pass.cc" - "graph/passes/no_data_out_const_elimination_pass.cc" "graph/passes/useless_control_out_remove_pass.cc" "graph/passes/transop_symmetry_elimination_pass.cc" "graph/passes/save_pass.cc" diff --git a/ge/ge_inference.mk b/ge/ge_inference.mk index 1830e847..6f9e60db 100755 --- a/ge/ge_inference.mk +++ b/ge/ge_inference.mk @@ -194,7 +194,6 @@ OMG_HOST_SRC_FILES := \ graph/passes/cond_pass.cc \ graph/passes/cond_remove_pass.cc \ graph/passes/remove_same_const_pass.cc \ - graph/passes/no_data_out_const_elimination_pass.cc \ graph/passes/useless_control_out_remove_pass.cc \ graph/passes/for_pass.cc \ graph/passes/enter_pass.cc \ diff --git a/ge/ge_runner.mk b/ge/ge_runner.mk index 9dcac211..460d5068 100644 --- a/ge/ge_runner.mk +++ b/ge/ge_runner.mk @@ -129,7 +129,6 @@ LIBGE_LOCAL_SRC_FILES := \ graph/passes/constant_fuse_same_pass.cc \ graph/passes/fuse_data_nodes_with_common_input_pass.cc \ graph/passes/remove_same_const_pass.cc \ - graph/passes/no_data_out_const_elimination_pass.cc \ graph/passes/useless_control_out_remove_pass.cc \ graph/passes/control_trigger_pass.cc \ graph/passes/dimension_adjust_pass.cc \ diff --git a/ge/graph/manager/graph_manager.cc b/ge/graph/manager/graph_manager.cc index 322ceecc..b0d412dc 100755 --- a/ge/graph/manager/graph_manager.cc +++ b/ge/graph/manager/graph_manager.cc @@ -71,7 +71,6 @@ #include "graph/passes/remove_same_const_pass.h" #include "graph/passes/reshape_recovery_pass.h" #include "graph/passes/reshape_remove_pass.h" -#include "graph/passes/no_data_out_const_elimination_pass.h" #include "graph/passes/same_transdata_breadth_fusion_pass.h" #include "graph/passes/subgraph_pass.h" #include "graph/passes/switch_data_edges_bypass.h" @@ -2249,9 +2248,7 @@ Status GraphManager::OptimizeStage1(ge::ComputeGraphPtr &compute_graph) { NamesToPass node_pass; GE_TIMESTAMP_START(node_pass); IdentityPass identity_force_pass(false); // after SwitchToStreamSwitchPass - NoDataOutConstEliminationPass no_data_out_const_elimination_pass; node_pass.emplace_back("IdentityPass", &identity_force_pass); - node_pass.emplace_back("NoDataOutConstEliminationPass", &no_data_out_const_elimination_pass); ret = GEPass(compute_graph).Run(node_pass); GE_TIMESTAMP_END(node_pass, "GraphPrepare::node_pass"); if (ret != SUCCESS) { diff --git a/ge/graph/passes/no_data_out_const_elimination_pass.cc b/ge/graph/passes/no_data_out_const_elimination_pass.cc deleted file mode 100644 index c55148bd..00000000 --- a/ge/graph/passes/no_data_out_const_elimination_pass.cc +++ /dev/null @@ -1,36 +0,0 @@ -/** - * Copyright 2021 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "graph/passes/no_data_out_const_elimination_pass.h" - -namespace ge { -Status NoDataOutConstEliminationPass::Run(NodePtr &node) { - GE_CHECK_NOTNULL(node); - GELOGD("RemoveConstWithoutDataPass running of %s.", node->GetName().c_str()); - if (node->GetType() == CONSTANT || node->GetType() == CONSTANTOP) { - GE_CHECK_NOTNULL(node->GetOpDesc()); - // delete const which has no input and no output of data - if (node->GetOpDesc()->GetInputsSize() == 0 && node->GetOutDataNodes().size() == 0) { - GELOGI("Remove const %s.", node->GetName().c_str()); - if (IsolateAndDeleteNode(node, {}) != SUCCESS) { - GELOGE(FAILED, "IsolateAndDeleteNode %s failed.", node->GetName().c_str()); - return FAILED; - } - } - } - return SUCCESS; -} -} // namespace ge diff --git a/ge/graph/passes/no_data_out_const_elimination_pass.h b/ge/graph/passes/no_data_out_const_elimination_pass.h deleted file mode 100644 index 112c4867..00000000 --- a/ge/graph/passes/no_data_out_const_elimination_pass.h +++ /dev/null @@ -1,31 +0,0 @@ -/** - * Copyright 2021 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef GE_GRAPH_PASSES_REMOVE_CONST_WITHOUT_DATA_PASS_H_ -#define GE_GRAPH_PASSES_REMOVE_CONST_WITHOUT_DATA_PASS_H_ - -#include "graph/passes/base_pass.h" -#include "framework/common/debug/ge_log.h" -#include "framework/common/util.h" - -namespace ge { -class NoDataOutConstEliminationPass : public BaseNodePass { - public: - Status Run(ge::NodePtr &node) override; -}; -} // namespace ge - -#endif // GE_GRAPH_PASSES_REMOVE_CONST_WITHOUT_DATA_PASS_H_ diff --git a/tests/ut/ge/CMakeLists.txt b/tests/ut/ge/CMakeLists.txt index 0d4f6a66..91a6620d 100755 --- a/tests/ut/ge/CMakeLists.txt +++ b/tests/ut/ge/CMakeLists.txt @@ -179,7 +179,6 @@ set(COMMON_SRC_FILES "${GE_CODE_DIR}/ge/graph/passes/replace_transshape_pass.cc" "${GE_CODE_DIR}/ge/graph/passes/constant_fuse_same_pass.cc" "${GE_CODE_DIR}/ge/graph/passes/fuse_data_nodes_with_common_input_pass.cc" - "${GE_CODE_DIR}/ge/graph/passes/no_data_out_const_elimination_pass.cc" "${GE_CODE_DIR}/ge/graph/passes/print_op_pass.cc" "${GE_CODE_DIR}/ge/graph/passes/no_use_reshape_remove_pass.cc" "${GE_CODE_DIR}/ge/graph/passes/iterator_op_pass.cc" @@ -619,7 +618,6 @@ set(PASS_TEST_FILES "graph/passes/transop_nearby_allreduce_fusion_pass_unittest.cc" "graph/passes/constant_folding_pass_unittest.cc" "graph/passes/fuse_data_nodes_with_common_input_pass_unittest.cc" - "graph/passes/no_data_out_const_elimination_pass_unittest.cc" "graph/passes/stop_gradient_pass_unittest.cc" "graph/passes/prevent_gradient_pass_unittest.cc" "graph/passes/identity_pass_unittest.cc" diff --git a/tests/ut/ge/graph/passes/no_data_out_const_elimination_pass_unittest.cc b/tests/ut/ge/graph/passes/no_data_out_const_elimination_pass_unittest.cc deleted file mode 100644 index 2fa80e2f..00000000 --- a/tests/ut/ge/graph/passes/no_data_out_const_elimination_pass_unittest.cc +++ /dev/null @@ -1,82 +0,0 @@ -/** - * Copyright 2021 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "graph/passes/no_data_out_const_elimination_pass.h" - -#include -#include -#include -#include - -#include "common/ge_inner_error_codes.h" -#include "graph/utils/graph_utils.h" - -namespace ge { - -class UtestNoDataOutConstEliminationPass : public testing::Test { -protected: - void SetUp() {} - void TearDown() {} - -public: - NodePtr MakeNode(const ComputeGraphPtr &graph, uint32_t in_num, uint32_t out_num, string name, string type) { - GeTensorDesc test_desc(GeShape(), FORMAT_NCHW, DT_FLOAT); - auto op_desc = std::make_shared(name, type); - for (auto i = 0; i < in_num; ++i) { - op_desc->AddInputDesc(test_desc); - } - for (auto i = 0; i < out_num; ++i) { - op_desc->AddOutputDesc(test_desc); - } - return graph->AddNode(op_desc); - } -}; - -/// graph with subgraph -/// const1 -/// |(control) -/// const2 -/// | -/// output -TEST_F(UtestNoDataOutConstEliminationPass, succ_graph1) { - ComputeGraphPtr graph = std::make_shared("test"); - auto const_node1 = MakeNode(graph, 0, 1, "const_node1", "Const"); - auto const_node2 = MakeNode(graph, 1, 1, "const_node2", "Const"); - auto output_node = MakeNode(graph, 1, 0, "output_node", "NetOutput"); - GeTensorDesc tensor_desc(GeShape({1,3,224,224}), FORMAT_NCHW, DT_FLOAT); - - const_node1->GetOpDesc()->UpdateOutputDesc(0, tensor_desc); - const_node2->GetOpDesc()->UpdateInputDesc(0, tensor_desc); - const_node2->GetOpDesc()->UpdateOutputDesc(0, tensor_desc); - output_node->GetOpDesc()->UpdateInputDesc(0, tensor_desc); - - GraphUtils::AddEdge(const_node1->GetOutControlAnchor(), const_node2->GetInControlAnchor()); - GraphUtils::AddEdge(const_node2->GetOutDataAnchor(0), output_node->GetInDataAnchor(0)); - - GEPass pass(graph); - NamesToPass node_pass; - NoDataOutConstEliminationPass no_data_out_const_elimination_pass; - node_pass.emplace_back("NoDataOutConstEliminationPass", &no_data_out_const_elimination_pass); - auto const1 = graph->FindNode("const_node1"); - EXPECT_NE(const1, nullptr); - EXPECT_TRUE(const1->GetInDataNodes().empty()); - EXPECT_TRUE(const1->GetOutDataNodes().empty()); - EXPECT_EQ(pass.Run(node_pass), SUCCESS); - // after pass, const1 will be delete - const1 = graph->FindNode("const_node1"); - EXPECT_EQ(const1, nullptr); -} -} // namespace ge From 14732acd6f17294ceae50de750792c62a1aaa143 Mon Sep 17 00:00:00 2001 From: TangQunzhang Date: Mon, 11 Jan 2021 11:01:41 +0800 Subject: [PATCH 22/41] Continuous memory optimization, code refactoring --- .../build/memory/binary_block_mem_assigner.cc | 4 +- ge/graph/build/memory/block_mem_assigner.cc | 264 ++++-- ge/graph/build/memory/block_mem_assigner.h | 54 +- ge/graph/build/memory/graph_mem_assigner.cc | 855 ++++++------------ ge/graph/build/memory/graph_mem_assigner.h | 24 +- .../load/new_model_manager/davinci_model.cc | 6 +- metadef | 2 +- parser | 2 +- 8 files changed, 503 insertions(+), 708 deletions(-) diff --git a/ge/graph/build/memory/binary_block_mem_assigner.cc b/ge/graph/build/memory/binary_block_mem_assigner.cc index fff589f3..97a0aed6 100644 --- a/ge/graph/build/memory/binary_block_mem_assigner.cc +++ b/ge/graph/build/memory/binary_block_mem_assigner.cc @@ -69,8 +69,8 @@ Status BinaryBlockMemAssigner::GetMemoryRanges(vector &range_ceils) { GELOGW("Vector all_memory_size is empty!"); return SUCCESS; } - if ((all_memory_size.front() == 0) || (log(kLogBase) == 0)) { - GELOGE(FAILED, "dividend is 0!"); + if ((all_memory_size.front() <= 0) || (log(kLogBase) == 0)) { + GELOGE(FAILED, "Memory size:%ld is invalid.", all_memory_size.front()); return FAILED; } // Memory size is 512 aligned, so it is not necessary to take less than 512 diff --git a/ge/graph/build/memory/block_mem_assigner.cc b/ge/graph/build/memory/block_mem_assigner.cc index 76e7efbe..21d6a49e 100755 --- a/ge/graph/build/memory/block_mem_assigner.cc +++ b/ge/graph/build/memory/block_mem_assigner.cc @@ -65,10 +65,7 @@ void AlignMemOffset(size_t &mem_align_size) { } static bool CompareLifeTime(const NodeTypeIndex &left, const NodeTypeIndex &right) { - auto left_node_op_desc = left.node->GetOpDesc(); - auto right_node_op_desc = right.node->GetOpDesc(); - if ((left_node_op_desc != nullptr) && (right_node_op_desc != nullptr) - && (left_node_op_desc->GetId() < right_node_op_desc->GetId())) { + if (left.GetLifeBegin() < right.GetLifeBegin()) { return true; } return false; @@ -100,14 +97,14 @@ bool CrossLifeTime(const NodeTypeIndex &left, const NodeTypeIndex &right) { auto left_node_op_desc = left.node->GetOpDesc(); auto right_node_op_desc = right.node->GetOpDesc(); if ((left_node_op_desc != nullptr) && (right_node_op_desc != nullptr)) { - if (left_node_op_desc->GetId() < right_node_op_desc->GetId()) { - if (left.life_time_end >= static_cast(right_node_op_desc->GetId())) { + if (left.GetLifeBegin() < right.GetLifeBegin()) { + if (left.life_time_end >= right.GetLifeBegin()) { return true; } - } else if (left_node_op_desc->GetId() == right_node_op_desc->GetId()) { + } else if (left.GetLifeBegin() == right.GetLifeBegin()) { return true; } else { - if (right.life_time_end >= static_cast(left_node_op_desc->GetId())) { + if (right.life_time_end >= left.GetLifeBegin()) { return true; } } @@ -325,12 +322,7 @@ void MemoryBlock::AddLifeReuseBlock(MemoryBlock *block, DependStreamLife &total_ size_t MemoryBlock::GetLifeBegin() { size_t life_time = 0; if (!node_type_index_list_.empty()) { - if (node_type_index_list_.front().node != nullptr) { - auto node_op_desc = node_type_index_list_.front().node->GetOpDesc(); - if (node_op_desc != nullptr) { - life_time = node_op_desc->GetId(); - } - } + life_time = node_type_index_list_.front().GetLifeBegin(); } return life_time; } @@ -417,7 +409,7 @@ void MemoryBlock::AddDependLifeBegin(DependStreamLife &total_node_depend_stream_ depend_stream_life_[stream_id_] = GetLifeBegin(); } -size_t MemoryBlock::GetLifeEnd() { +size_t MemoryBlock::GetLifeEnd() const { if (!node_type_index_list_.empty()) { return node_type_index_list_.back().life_time_end; } @@ -571,32 +563,29 @@ void BlockMemAssigner::GetOutAndWorkSpaceMem(vector &all_memory_size) { for (auto &out_anchor : n->GetAllOutDataAnchors()) { GeTensorDesc output_desc = node_op_desc->GetOutputDesc(out_anchor->GetIdx()); - bool reuse_input = false; - GE_IF_BOOL_EXEC(ge::TensorUtils::GetReuseInput(output_desc, reuse_input) != SUCCESS, - GELOGI("Get reuse_input failed")); - - if (!reuse_input) { - int64_t size = 0; - GE_IF_BOOL_EXEC(ge::TensorUtils::GetSize(output_desc, size) != SUCCESS, GELOGI("Get size failed")); - batch_all_memory_size[batch_label].emplace_back(size); - if (batch_total_size.find(batch_label) == batch_total_size.end()) { - batch_total_size[batch_label] = size; - } else { - batch_total_size[batch_label] += size; - } + int64_t size = 0; + GE_IF_BOOL_EXEC(ge::TensorUtils::GetSize(output_desc, size) != SUCCESS, GELOGI("Get size failed")); + GE_IF_BOOL_EXEC(size < 0, GELOGE(FAILED, "Node:%s size:%ld is invalid, maybe it is unknown shape node.", + node_op_desc->GetName().c_str(), size); + return;); + batch_all_memory_size[batch_label].emplace_back(size); + if (batch_total_size.find(batch_label) == batch_total_size.end()) { + batch_total_size[batch_label] = size; + } else { + batch_total_size[batch_label] += size; + } - if (!anchor_to_symbol_.empty()) { - auto iter1 = anchor_to_symbol_.find(NodeIndexIO(n, out_anchor->GetIdx(), kOut).ToString()); - if (iter1 == anchor_to_symbol_.end()) { - continue; - } - const std::string &symbol = iter1->second; - auto iter2 = symbol_size_.find(symbol); - if (iter2 == symbol_size_.end()) { - symbol_size_[symbol] = size; - } else if (size > static_cast(iter2->second)) { - iter2->second = size; - } + if (!anchor_to_symbol_.empty()) { + auto iter1 = anchor_to_symbol_.find(NodeIndexIO(n, out_anchor->GetIdx(), kOut).ToString()); + if (iter1 == anchor_to_symbol_.end()) { + continue; + } + const std::string &symbol = iter1->second; + auto iter2 = symbol_size_.find(symbol); + if (iter2 == symbol_size_.end()) { + symbol_size_[symbol] = size; + } else if (size > static_cast(iter2->second)) { + iter2->second = size; } } } @@ -637,35 +626,17 @@ bool IsDirectOutputNode(const NodePtr &node, int idx) { return false; } -void AddReusableBlockCount(const MemoryBlock &mem_block, map &reusable_block_counts) { - string key = std::to_string(mem_block.Size()); - key += "_" + std::to_string(mem_block.stream_id_); - key += "_" + std::to_string(mem_block.memory_type_); - auto it = reusable_block_counts.find(key); - if (it != reusable_block_counts.end()) { - it->second++; - } else { - reusable_block_counts[key] = 1; - } -} - -void ReduceReusableBlockCount(const MemoryBlock &mem_block, map &reusable_block_counts) { - string key = std::to_string(mem_block.Size()); - key += "_" + std::to_string(mem_block.stream_id_); - key += "_" + std::to_string(mem_block.memory_type_); - auto it = reusable_block_counts.find(key); - if (it != reusable_block_counts.end()) { - if (it->second > 0) { - it->second--; - } - } -} - -bool CanReuseBySize(const map &reusable_block_counts, const MemoryBlock &reusable_block, - size_t block_size, size_t real_size, bool continuous) { +bool CanReuseBlock(size_t continuous_life_begin, const MemoryBlock &reusable_block, size_t block_size) { bool can_reuse = false; if (reusable_block.Size() == block_size) { - can_reuse = true; + // in some continuous input case, continuous first input node's is not same as topo first node. + if (continuous_life_begin > 0) { + if (continuous_life_begin > reusable_block.GetLifeEnd()) { + can_reuse = true; + } + } else { + can_reuse = true; + } } return can_reuse; } @@ -676,6 +647,13 @@ bool BlockMemAssigner::IsOutNodeSetContinuousInput(const NodePtr &n, uint32_t ou if (n == nullptr || n->GetAllOutDataAnchors().size() <= 0) { return false; } + auto node_desc = n->GetOpDesc(); + GE_IF_BOOL_EXEC(node_desc == nullptr, GELOGE(FAILED, "Node[%s] nodedesc is null.", n->GetName().c_str()); + return false;); + std::vector offsets_for_fusion = {}; + bool has_lx_fusion_attr = + AttrUtils::GetListInt(node_desc, ATTR_NAME_OUTPUT_OFFSET_FOR_BUFFER_FUSION, offsets_for_fusion); + if (static_cast(out_index) < n->GetAllOutDataAnchors().size()) { auto out_anchor = n->GetOutDataAnchor(out_index); GE_IF_BOOL_EXEC(out_anchor == nullptr, @@ -698,16 +676,17 @@ bool BlockMemAssigner::IsOutNodeSetContinuousInput(const NodePtr &n, uint32_t ou return false;); // If GetBool fail, is_input_continuous is false. - bool is_input_continuous_no_padding = false; - (void)ge::AttrUtils::GetBool(peer_in_node_desc, ATTR_NAME_NOPADDING_CONTINUOUS_INPUT, - is_input_continuous_no_padding); - if (is_input_continuous_no_padding) { + (void)ge::AttrUtils::GetBool(peer_in_node_desc, ATTR_NAME_NOPADDING_CONTINUOUS_INPUT, is_input_continuous); + if (is_input_continuous) { reset_zero_copy_flag = true; - return false; + has_lx_fusion_attr = true; + } else { + (void)ge::AttrUtils::GetBool(peer_in_node_desc, ATTR_NAME_CONTINUOUS_INPUT, is_input_continuous); } - (void)ge::AttrUtils::GetBool(peer_in_node_desc, ATTR_NAME_CONTINUOUS_INPUT, is_input_continuous); - GE_IF_BOOL_EXEC(is_input_continuous && CheckIsZeroMemNodeType(peer_node->GetType()), + // lx_fusion memory only assign first input, broadcast's input some are variable some are not, reassign later + GE_IF_BOOL_EXEC(is_input_continuous && + (CheckIsZeroMemNodeType(peer_node->GetType()) || (has_lx_fusion_attr && (peer_in_anchor->GetIdx() != 0))), GELOGI("Node[%s] output[%u] no_need_assign_memory.", n->GetName().c_str(), out_index); no_need_assign_memory = true; return false;); @@ -721,6 +700,10 @@ bool BlockMemAssigner::IsOutNodeSetContinuousInput(const NodePtr &n, uint32_t ou // Only set attr one times. if (node_continuous_input_blocks_[peer_in_node_desc->GetName()].size() == 0) { (void)ge::AttrUtils::SetBool(peer_in_node_desc, ATTR_NAME_CONTINUOUS_INPUT_ALLOC, true); + // lx fusion case assign max size for first block, so reuse as none continuous + GE_IF_BOOL_EXEC(has_lx_fusion_attr, + is_op_reuse_mem_ = IsContinuousMemoryReuse(n, peer_node, out_index); + return false;); node_continuous_input_counts_[peer_in_node_desc->GetName()] = peer_node->GetAllInDataAnchorsSize(); } peer_input_index = peer_in_anchor->GetIdx(); @@ -733,6 +716,95 @@ bool BlockMemAssigner::IsOutNodeSetContinuousInput(const NodePtr &n, uint32_t ou return false; } +bool IsContinuousInputNodeMaxLife(const NodePtr &n, uint32_t out_index) { + if (n == nullptr) { + return false; + } + + int64_t max_node_life_time = 0; + int64_t continuous_input_node_life_time = 0; + if (static_cast(out_index) < n->GetAllOutDataAnchors().size()) { + auto out_anchor = n->GetOutDataAnchor(out_index); + if(out_anchor == nullptr) { + return false; + } + + // continuous input node's life time should be max + for (auto const &peer_in_anchor : out_anchor->GetPeerInDataAnchors()) { + if ((peer_in_anchor == nullptr) || (peer_in_anchor->GetOwnerNode() == nullptr)){ + return false; + } + auto peer_in_node_desc = peer_in_anchor->GetOwnerNode()->GetOpDesc(); + GE_IF_BOOL_EXEC(peer_in_node_desc == nullptr, + GELOGE(FAILED, "Node[%s] output[%u] peer in node desc is null.", n->GetName().c_str(), out_index); + return false;); + + if(peer_in_node_desc->GetId() > max_node_life_time) { + max_node_life_time = peer_in_node_desc->GetId(); + } + + // If GetBool fail, is_input_continuous is false. + bool is_input_continuous = false; + (void)ge::AttrUtils::GetBool(peer_in_node_desc, ATTR_NAME_NOPADDING_CONTINUOUS_INPUT, is_input_continuous); + if (!is_input_continuous) { + (void)ge::AttrUtils::GetBool(peer_in_node_desc, ATTR_NAME_CONTINUOUS_INPUT, is_input_continuous); + } + if (is_input_continuous) { + continuous_input_node_life_time = peer_in_node_desc->GetId(); + } + } + } + return ((max_node_life_time != 0) && (continuous_input_node_life_time == max_node_life_time)) ; +} + +/// +/// @ingroup GE +/// @brief Check continuous memory reuseable +/// @return void +/// +bool BlockMemAssigner::IsContinuousMemoryReuse(const NodePtr &n, const NodePtr &peer_node, uint32_t out_index) { + // n,peer_node_desc have been checked + auto node_desc = n->GetOpDesc(); + auto peer_node_desc = peer_node->GetOpDesc(); + continuous_life_begin_ = static_cast(node_desc->GetId()); + // lx fusion case check all continuous input node, firt input node's life time should be min + for (const auto &in_anchor : peer_node->GetAllInDataAnchors()) { + if ((in_anchor == nullptr) || (in_anchor->GetPeerOutAnchor() == nullptr) || + (in_anchor->GetPeerOutAnchor()->GetOwnerNode() == nullptr) || + (in_anchor->GetPeerOutAnchor()->GetOwnerNode()->GetOpDesc() == nullptr)) { + GELOGE(FAILED, "Node[%s] output[%u] peer input node desc is null.", n->GetName().c_str(), out_index); + return false; + } + auto peer_out_node_desc = in_anchor->GetPeerOutAnchor()->GetOwnerNode()->GetOpDesc(); + /// + /// node2 node1 node3 + /// | / / | + /// node5 node6 + /// firt input node's life time is not min + /// when node5's first input node2's life time is not min(node2 > node1), use node1's life time to reuse + /// + if (static_cast(peer_out_node_desc->GetId()) < continuous_life_begin_) { + continuous_life_begin_ = static_cast(peer_out_node_desc->GetId()); + GELOGI( + "Node[%s] life[%ld] output[%u] is not continuous input node[%s] life[%ld]'s min life time," + "min is node[%s] life[%zu]", + n->GetName().c_str(), node_desc->GetId(), out_index, peer_node_desc->GetName().c_str(), + peer_node_desc->GetId(), peer_out_node_desc->GetName().c_str(), continuous_life_begin_); + } + // when node3's output node5's life time is not max(node6 > node5), not reuse + if (!IsContinuousInputNodeMaxLife(in_anchor->GetPeerOutAnchor()->GetOwnerNode(), + in_anchor->GetPeerOutAnchor()->GetIdx())) { + GELOGI( + "Node[%s] life[%ld] output[%u]'s continuous input node[%s] life[%ld]'s is not node[%s] output[%d]'s " + "max life node", + n->GetName().c_str(), node_desc->GetId(), out_index, peer_node_desc->GetName().c_str(), + peer_node_desc->GetId(), peer_out_node_desc->GetName().c_str(), in_anchor->GetPeerOutAnchor()->GetIdx()); + return false; + } + } + return true; +} + /// /// @ingroup GE /// @brief Check pre_reuse flag & post_reuse glag for each symbol @@ -1018,8 +1090,9 @@ MemoryBlock *BlockMemAssigner::ApplyMemory(size_t block_size, size_t real_size, GE_IF_BOOL_EXEC(reusable_block->batch_label_ != batch_label, continue); // A node can reuse blocks of the same stream and preorder streams - if (CanReuseBySize(reusable_block_counts_, *reusable_block, block_size, real_size, continuous)) { - reusable_block->AddNodeTypeIndex({n, mem_type, out_index, false}, real_size, no_align_size); + if (CanReuseBlock(continuous_life_begin_, *reusable_block, block_size)) { + reusable_block->AddNodeTypeIndex({n, mem_type, out_index, false, continuous_life_begin_}, + real_size, no_align_size); if (mem_type == kOutput) { auto iter = anchor_to_symbol_.find(NodeIndexIO(n, out_index, kOut).ToString()); if (iter != anchor_to_symbol_.end()) { @@ -1028,7 +1101,6 @@ MemoryBlock *BlockMemAssigner::ApplyMemory(size_t block_size, size_t real_size, } reusable_block->continuous_block_ = continuous; reusable_block->ref_count_++; - ReduceReusableBlockCount(*reusable_block, reusable_block_counts_); reusable_blocks_[memory_type][stream_id].erase((++it).base()); return reusable_block; } @@ -1041,8 +1113,7 @@ MemoryBlock *BlockMemAssigner::ApplyMemory(size_t block_size, size_t real_size, // Data and netoutput need zero copy block block->is_zero_copy_ = IsZeroCopyBlock(n, continuous); - - block->Init(real_size, mem_type, n, out_index, no_align_size, node_op_desc->GetStreamId()); + block->AddNodeTypeIndex({n, mem_type, out_index, false, continuous_life_begin_}, real_size, no_align_size); block->stream_id_ = node_op_desc->GetStreamId(); block->ref_count_++; block->continuous_block_ = continuous; @@ -1142,8 +1213,23 @@ MemoryBlock *BlockMemAssigner::ApplyOutMemory(const NodePtr &n, uint32_t index, std::string symbol; if (IsSymbolExist(node_index_io, symbol)) { block = symbol_blocks_[symbol]; - block->AddNodeTypeIndex({n, kOutput, index, true}, size, no_align_size); + GE_IF_BOOL_EXEC(block == nullptr, GELOGE(FAILED, "Node %s ref block is nullptr.", node_op_desc->GetName().c_str()); + return nullptr); + // reduce old size + size_t align_size = block->Size(); + AlignMemOffset(align_size); + theory_memory_size_ -= align_size; + + auto block_size = GetBlockSize(size, ranges); + block->SetSize(block_size); + block->SetLifeTimeEnd(life_time_); + block->AddNodeTypeIndex({n, kOutput, index, true, continuous_life_begin_}, size, no_align_size); block->ref_count_++; + + // add new size + align_size = block_size; + AlignMemOffset(align_size); + theory_memory_size_ += align_size; } else { int64_t max_size = size; int64_t memory_type = RT_MEMORY_HBM; @@ -1196,7 +1282,6 @@ MemoryBlock *BlockMemAssigner::ApplyOutMemory(const NodePtr &n, uint32_t index, GE_IF_BOOL_EXEC(ge::TensorUtils::GetReuseInputIndex(*owner_node_op_desc, dst_reuse_input_index) != SUCCESS, GELOGI("Get dst_reuse_input_index failed")); if (dst_reuse_input && (dst_reuse_input_index == static_cast(in_anchor->GetIdx()))) { - block->AddNodeTypeIndex({owner_node, kOutput, i, true}, block->Size(), block->Size()); out_count_reuse_input += 1; reuse_input = true; } @@ -1237,7 +1322,7 @@ bool IsAtomicOutputMemory(const ge::NodePtr &node, uint32_t output_index, bool i if (static_cast(index) == output_index) { if (node->GetOwnerComputeGraph() != nullptr) { string graph_name = node->GetOwnerComputeGraph()->GetName(); - GELOGD("[IMAS]Atomic no assign %s name[%s] output[%ld] streamid[%ld].", graph_name.c_str(), + GELOGD("Atomic no assign %s name[%s] output[%ld] streamid[%ld].", graph_name.c_str(), op_desc->GetName().c_str(), index, op_desc->GetStreamId()); } return true; @@ -1275,7 +1360,6 @@ void BlockMemAssigner::ReleaseMemory(MemoryBlock *to_release, vectorsame_stream_) { to_release->SetLifeTimeEnd(life_time_); reusable_memory.emplace_back(to_release); - AddReusableBlockCount(*to_release, reusable_block_counts_); } } } @@ -1375,6 +1459,7 @@ Status BlockMemAssigner::AssignOutputMemoryWithReuse(const NodePtr &node, vector } is_op_reuse_mem_ = true; + continuous_life_begin_ = 0; if (op_reuse_env_valid_ == true) { vector::iterator it_name = std::find(op_no_reuse_mem_vec_.begin(), op_no_reuse_mem_vec_.end(), op_desc->GetName()); @@ -1426,7 +1511,7 @@ Status BlockMemAssigner::AssignOutputMemoryWithReuse(const NodePtr &node, vector continue; } // atomic can't be reused - bool need_change = is_op_reuse_mem_ && out_node_set_continuous_input && is_atomic; + bool need_change = is_op_reuse_mem_ && is_atomic; if (need_change) { is_op_reuse_mem_ = false; } @@ -1819,11 +1904,12 @@ void SetOffsetSize(const NodeTypeIndex &node_type, const MemoryBlock *block, } op_desc->SetWorkspace(workspace_list); } - GELOGI("[IMAS]Set %s name[%s] %s[%u] offset to [%ld] streamid[%ld] size[%zu] realsize[%zu] noalignsize[%zu] " - "life time begin[%zu] life time end[%zu] child[%d:%d:%d:%d:%d] isref[%d] batch[%s]", graph_name.c_str(), - op_desc->GetName().c_str(), node_type.GetMemType().c_str(), node_type.index, offset, op_desc->GetStreamId(), - block->Size(), real_size, no_align_size, op_desc->GetId(), end, child_block_level, block->reuse_mem_, - block->continuous_block_, block->is_zero_copy_, block->same_stream_, node_type.ref_input, + GELOGI("[IMAS]Set %s name[%s] optype[%s] %s[%u] offset to [%ld] streamid[%ld] memtype[%ld] size[%zu] realsize[%zu] " + "noalignsize[%zu] life time begin[%s] life time end[%zu] child[%d:%d:%d:%d:%d] isref[%d] batch[%s]", + graph_name.c_str(), op_desc->GetName().c_str(), node_type.node->GetType().c_str(), + node_type.GetMemType().c_str(), node_type.index, offset, op_desc->GetStreamId(),block->memory_type_, + block->Size(), real_size, no_align_size, node_type.GetLifeBeginDesc().c_str(), end, child_block_level, + block->reuse_mem_, block->continuous_block_, block->is_zero_copy_, block->same_stream_, node_type.ref_input, block->batch_label_.c_str()); } diff --git a/ge/graph/build/memory/block_mem_assigner.h b/ge/graph/build/memory/block_mem_assigner.h index 58bcda75..78584078 100755 --- a/ge/graph/build/memory/block_mem_assigner.h +++ b/ge/graph/build/memory/block_mem_assigner.h @@ -39,14 +39,15 @@ using DependStreamLife = std::map>; enum OpMemoryType { kOutput, kWorkspace }; struct NodeTypeIndex { - NodeTypeIndex(ge::NodePtr node, OpMemoryType mem_type, uint32_t index, bool ref_input = false) - : node(std::move(node)), mem_type(mem_type), index(index), ref_input(ref_input) {} + NodeTypeIndex(ge::NodePtr node, OpMemoryType mem_type, uint32_t index, bool ref_input = false, size_t begin = 0) + : node(std::move(node)), mem_type(mem_type), index(index), ref_input(ref_input), life_time_begin(begin) {} ge::NodePtr node = nullptr; OpMemoryType mem_type = kOutput; uint32_t index = 0; - size_t life_time_end = kMaxLifeTime; bool ref_input = false; + size_t life_time_begin = 0; + size_t life_time_end = kMaxLifeTime; const string GetMemType() const { if (mem_type == kOutput) { return "output"; @@ -55,6 +56,34 @@ struct NodeTypeIndex { } return "unknown"; } + + size_t GetLifeBegin() const { + if ((node == nullptr) || (node->GetOpDesc() == nullptr)) { + return 0; + } + + if ((life_time_begin > 0) && (life_time_begin < static_cast(node->GetOpDesc()->GetId()))) { + return life_time_begin; + } else { + return node->GetOpDesc()->GetId(); + } + } + + std::string GetLifeBeginDesc() const { + if (node == nullptr) { + return ""; + } + auto node_op_desc = node->GetOpDesc(); + if (node_op_desc != nullptr) { + auto life_begin = GetLifeBegin(); + if (life_begin != static_cast(node_op_desc->GetId())) { + return std::to_string(life_begin) + "-" + std::to_string(node_op_desc->GetId()); + } else { + return std::to_string(node_op_desc->GetId()); + } + } + return ""; + } }; class MemoryBlock { @@ -86,16 +115,13 @@ class MemoryBlock { symbol_list_.clear(); } - void Init(size_t real_size, OpMemoryType type, const ge::NodePtr &node, uint32_t out_index, size_t no_align_size, - int64_t stream_id) { - real_size_list_.emplace_back(real_size); - no_align_size_list_.emplace_back(no_align_size); - node_type_index_list_.emplace_back(node, type, out_index, false); - if (stream_id != stream_id_) { - same_stream_ = false; + size_t Size() const { return block_size_; } + + void SetSize(size_t size) { + if (size > block_size_) { + block_size_ = size; } } - size_t Size() const { return block_size_; } size_t AlignSize() const; @@ -143,7 +169,7 @@ class MemoryBlock { size_t GetLifeBegin(); - size_t GetLifeEnd(); + size_t GetLifeEnd() const; void AddDependLifeBegin(DependStreamLife &node_depend_stream_life); @@ -406,6 +432,7 @@ class BlockMemAssigner : public MemAssigner { bool IsOutNodeSetContinuousInput(const NodePtr &n, uint32_t out_index, std::string &peer_name, uint32_t &peer_input_index, bool &no_need_assign_memory, bool &reset_zero_copy_flag); + bool IsContinuousMemoryReuse(const NodePtr &n, const NodePtr &peer_node, uint32_t out_index); /// /// @ingroup GE /// @|+++++++++block1++++++++| |+++++++++block1++++++++| @@ -425,8 +452,6 @@ class BlockMemAssigner : public MemAssigner { std::unordered_map>> reusable_blocks_; - std::map reusable_block_counts_; - std::unordered_map>> stream_workspace_blocks_; std::unordered_map> node_out_blocks_; @@ -456,6 +481,7 @@ class BlockMemAssigner : public MemAssigner { std::string max_batch_label_; + size_t continuous_life_begin_ = 0; /// /// @ [stream1][nodeid] /// @[nodeid] [stream2][nodeid] diff --git a/ge/graph/build/memory/graph_mem_assigner.cc b/ge/graph/build/memory/graph_mem_assigner.cc index 98d073d4..f94eb275 100755 --- a/ge/graph/build/memory/graph_mem_assigner.cc +++ b/ge/graph/build/memory/graph_mem_assigner.cc @@ -35,10 +35,9 @@ namespace { const int kAllInputAddrIsAtomic = -1; const int kVirtualInputNodeMemoryReuse = 0; const int kVirtualOutputNodeMemoryReuse = 1; -const size_t kVirtualInputNodeOutputSize = 1; -const size_t kVirtualOutputNodeInputSize = 1; -const size_t kVirtualNodeDataIndex = 0; -const char *const kMbatchNodeNameFlag = "_ascend_mbatch_batch_"; +// One state per bit cannot be repeated +enum ContinuousType { kTypeInput = 1, kTypeInputNoPadding = 2, kTypeOutput = 4, kTypeOutputNoPadding = 8 }; + int64_t GetSymbolOutputOffset(const std::map &anchor_to_symbol, const std::map> &symbol_to_anchors, const ge::NodePtr &node, const uint32_t i) { @@ -136,7 +135,7 @@ ge::Status GraphMemoryAssigner::AssignVarAttr2Nodes() { return ge::SUCCESS; } -ge::Status GraphMemoryAssigner::CalculateTensorRealSizeAndOutSize(const ge::ConstGeTensorDescPtr &output_desc, +ge::Status CalculateTensorRealSizeAndOutSize(const ge::ConstGeTensorDescPtr &output_desc, int64_t dim_index, int64_t &output_mem_size, int64_t &batch_dim_num, int64_t &out_size) { graphStatus graph_status = ge::TensorUtils::GetSize(*output_desc, out_size); @@ -181,68 +180,6 @@ ge::Status GraphMemoryAssigner::CalculateTensorRealSizeAndOutSize(const ge::Cons return SUCCESS; } -Status GraphMemoryAssigner::GetMaxBatchLabel(const map> &mem_reuse_virtual_nodes_map, - int32_t mem_reuse_model, string &max_batch_label) { - for (auto &i_map : mem_reuse_virtual_nodes_map) { - vector virtual_nodes_list = i_map.second; - vector max_shape_dims; - size_t max_batch_dim = 0; - bool max_batch_dim_find = false; - for (size_t i = 0; i < virtual_nodes_list.size(); ++i) { - GE_CHECK_NOTNULL(virtual_nodes_list[i]); - OpDescPtr op_desc = virtual_nodes_list[i]->GetOpDesc(); - GE_CHECK_NOTNULL(op_desc); - - ge::ConstGeTensorDescPtr input_output_desc; - if (mem_reuse_model == kVirtualInputNodeMemoryReuse) { - input_output_desc = op_desc->GetOutputDescPtr(kVirtualNodeDataIndex); - } else if (mem_reuse_model == kVirtualOutputNodeMemoryReuse) { - input_output_desc = op_desc->GetInputDescPtr(kVirtualNodeDataIndex); - } else { - std::string error = "Invalid parameter memory reuse model, which is " + FmtToStr(mem_reuse_model); - GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); - return FAILED; - } - GE_CHECK_NOTNULL(input_output_desc); - - if (i == 0) { - // All ops must have ATTR_NAME_BATCH_LABEL, no need to check return value. - (void) ge::AttrUtils::GetStr(op_desc, ATTR_NAME_BATCH_LABEL, max_batch_label); - max_shape_dims = input_output_desc->GetShape().GetDims(); - } else { - vector current_shape_dims = input_output_desc->GetShape().GetDims(); - if (current_shape_dims.size() != max_shape_dims.size()) { - std::string error = "The shape of several nodes between multiple batches does not match."; - GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); - return FAILED; - } - for (size_t j = 0; j < current_shape_dims.size(); ++j) { - if (current_shape_dims[j] == max_shape_dims[j]) { - continue; - } - if (max_batch_dim_find && max_batch_dim != j) { - std::string error = "The shape of several nodes between multiple batches does not match."; - GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); - return FAILED; - } - max_batch_dim_find = true; - max_batch_dim = j; - if (current_shape_dims[j] > max_shape_dims[j]) { - max_shape_dims[j] = current_shape_dims[j]; - // All ops must have ATTR_NAME_BATCH_LABEL, no need to check return value. - (void) ge::AttrUtils::GetStr(op_desc, ATTR_NAME_BATCH_LABEL, max_batch_label); - } - // Only compare the first different dim in shape. - break; - } - } - } - // In every element of virtual_input_nodes_map, the label of the max batch node is the same. - break; - } - return SUCCESS; -} - Status GraphMemoryAssigner::ReAssignMemory(bool is_loop_graph, map &mem_type_to_offset) { if (memory_offset_.empty()) { GELOGE(FAILED, "memory_offset_ is empty."); @@ -250,13 +187,6 @@ Status GraphMemoryAssigner::ReAssignMemory(bool is_loop_graph, mapGetGraphMemoryMaxSize())}); + GEEVENT("[IMAS]AfterAssignMemory : %s memoffset[%zu], memtype[%ld]", compute_graph_->GetName().c_str(), + iter.second, iter.first); } return ge::FAILED; } @@ -313,22 +245,137 @@ Status GraphMemoryAssigner::AssignZeroCopyMemory(map &mem_offse return SUCCESS; } +uint32_t GetContinuousMemoryType(const OpDescPtr &op_desc) { + if (op_desc == nullptr) { + return 0; + }; + + bool is_continuous = false; + uint32_t continuous_type = 0; + // If GetBool fail, is_continuous is false. + (void)ge::AttrUtils::GetBool(op_desc, ATTR_NAME_CONTINUOUS_INPUT, is_continuous); + if (is_continuous) { + continuous_type |= kTypeInput; + } else { + (void)ge::AttrUtils::GetBool(op_desc, ATTR_NAME_NOPADDING_CONTINUOUS_INPUT, is_continuous); + if (is_continuous) { + bool attr_reuse = false; + (void)ge::AttrUtils::GetBool(op_desc, ATTR_NAME_OUTPUT_REUSE_INPUT, attr_reuse); + if (attr_reuse) { + continuous_type |= kTypeInputNoPadding; + } + } + } + + is_continuous = false; + (void)ge::AttrUtils::GetBool(op_desc, ATTR_NAME_CONTINUOUS_OUTPUT, is_continuous); + if (is_continuous) { + continuous_type |= kTypeOutput; + } else { + (void)ge::AttrUtils::GetBool(op_desc, ATTR_NAME_NOPADDING_CONTINUOUS_OUTPUT, is_continuous); + if (is_continuous) { + bool attr_reuse = false; + (void)ge::AttrUtils::GetBool(op_desc, ATTR_NAME_OUTPUT_REUSE_INPUT, attr_reuse); + if (attr_reuse) { + continuous_type |= kTypeOutputNoPadding; + } + } + } + + if (continuous_type != 0) { + GELOGI("Current node %s continuous type %d.", op_desc->GetName().c_str(), continuous_type); + } + return continuous_type; +} + +Status GetMemorySize(const OpDescPtr &op_desc, const ge::ConstGeTensorDescPtr &output_desc, uint32_t continuous_type, + int64_t &tensor_size, int64_t &nopadding_size) { + if ((op_desc == nullptr) || (output_desc == nullptr)) { + GELOGE(FAILED, "Input para is nullptr."); + return FAILED; + } + tensor_size = 0; + nopadding_size = 0; + bool is_nopadding = ((continuous_type & kTypeInputNoPadding) != 0) || ((continuous_type & kTypeOutputNoPadding) != 0); + if (is_nopadding) { + int64_t attr_dim_index; + bool get_attr_dim_flag = ge::AttrUtils::GetInt(op_desc, ATTR_NAME_REUSE_INPUT_ON_DIM_INDEX, attr_dim_index); + if (!get_attr_dim_flag) { + GELOGE(FAILED, "Get attr _reuse_input_on_dim_index failed."); + return FAILED; + } + + // Calculate tensor real size of each piece of data and out size of complete data + int64_t batch_dim_num = 1; + if (CalculateTensorRealSizeAndOutSize(output_desc, attr_dim_index, nopadding_size, batch_dim_num, tensor_size) != + SUCCESS) { + GELOGE(FAILED, "CalculateTensorRealSizeAndOutSize failed for node %s.", op_desc->GetName().c_str()); + return FAILED; + } + } else { + if (ge::TensorUtils::GetSize(*output_desc, tensor_size) != ge::SUCCESS) { + GELOGE(FAILED, "GetSize failed."); + return FAILED; + } + } + if ((tensor_size < 0) || (nopadding_size < 0)) { + GELOGE(FAILED, "GetMemorySize for node %s failed.", op_desc->GetName().c_str()); + return FAILED; + } + return SUCCESS; +} + +void AlignMemOffset(int64_t &mem_align_size) { + if (mem_align_size <= 0) { + return; + } + mem_align_size = (mem_align_size + MEM_ALIGN_SIZE - 1) / MEM_ALIGN_SIZE * MEM_ALIGN_SIZE; +} + +bool IsContinuousInputConflict(const ge::NodePtr &node, const OpDescPtr &peer_op_desc) { + bool is_peer_output_continuous = false; + // If GetBool fail, is_peer_output_continuous is false. + (void) ge::AttrUtils::GetBool(peer_op_desc, ATTR_NAME_CONTINUOUS_OUTPUT, is_peer_output_continuous); + + // Get peer node output size, if size == 1(peer node has only one output), continuous input of the node and + // continuous output of the previous node is the same, we can support it. If size != 1, there may be + // conflict between the two, we can not support it. + auto peer_output_size = peer_op_desc->GetOutputsSize(); + GE_IF_BOOL_EXEC(is_peer_output_continuous && (peer_output_size != 1), + std::string error = "Current op" + FmtToStr(node->GetOpDesc()->GetName()) + + " requires continuous input, while the previous op" + FmtToStr(peer_op_desc->GetName()) + + " requires continuous output. There may be conflict between the two." + + "This node is not supported now."; + GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); + return true;); + + bool is_peer_reference = false; + // If GetBool fail, is_peer_reference is false. + (void) AttrUtils::GetBool(peer_op_desc, ATTR_NAME_REFERENCE, is_peer_reference); + GE_IF_BOOL_EXEC(is_peer_reference, + std::string error = "Current op" + FmtToStr(node->GetOpDesc()->GetName()) + + " requires continuous input, while the previous op" + FmtToStr(peer_op_desc->GetName()) + + " requires continuous output. There may be conflict between the two." + + "This node is not supported now."; + GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); + return true;); + return false; +} + Status GraphMemoryAssigner::ReAssignContinuousMemory(bool is_loop_graph) { Status ret; for (auto &node : compute_graph_->GetAllNodes()) { - // Get the continuous input type of the node, default is false - bool is_input_continuous = false; - GE_CHECK_NOTNULL(node->GetOpDesc()); - // If GetBool fail, is_input_continuous is false. - (void) ge::AttrUtils::GetBool(node->GetOpDesc(), ATTR_NAME_CONTINUOUS_INPUT, is_input_continuous); + GE_CHECK_NOTNULL(node); + auto continuous_type = GetContinuousMemoryType(node->GetOpDesc()); // Assign continuous input memory - if (is_input_continuous) { - int64_t memory_type = RT_MEMORY_HBM; - GE_CHK_STATUS_RET(GetNodeMemoryType(node, memory_type, "input"), "Get node memory type failed."); + bool continuous_input = ((continuous_type & kTypeInput) != 0) || ((continuous_type & kTypeInputNoPadding) != 0); + int64_t memory_type = RT_MEMORY_HBM; + GE_CHK_STATUS_RET(GetNodeMemoryType(node, memory_type, "input"), "Get node memory type failed."); + if (continuous_input) { int64_t mem_clean_start = 0; int64_t mem_clean_size = 0; - ret = AssignContinuousInputMemory(node, mem_clean_start, mem_clean_size, memory_type); + ret = AssignContinuousInputMemory(node, mem_clean_start, mem_clean_size, memory_type, continuous_type); if (ret != ge::SUCCESS) { GELOGE(ret, "Assign continuous input memory failed!"); return ret; @@ -338,7 +385,6 @@ Status GraphMemoryAssigner::ReAssignContinuousMemory(bool is_loop_graph) { vector input_indexes; // If GetListInt fail, input_indexes is empty. (void) ge::AttrUtils::GetListInt(node->GetOpDesc(), ATOMIC_ATTR_INPUT_INDEX, input_indexes); - if (!input_indexes.empty() && input_indexes[0] == kAllInputAddrIsAtomic) { // check whether there is an atomic conflict between the current node and the peer out node if (!CheckInputIsSupportAtomic(node)) { @@ -350,9 +396,10 @@ Status GraphMemoryAssigner::ReAssignContinuousMemory(bool is_loop_graph) { const auto &in_control_anchor = node->GetInControlAnchor(); GE_CHECK_NOTNULL(in_control_anchor); for (const auto &peer_out_control_anchor : in_control_anchor->GetPeerOutControlAnchors()) { + GE_CHECK_NOTNULL(peer_out_control_anchor); auto peer_out_node = peer_out_control_anchor->GetOwnerNode(); if (peer_out_node->GetType() == ATOMICADDRCLEAN) { - ret = SetAtomicCleanAttr(peer_out_node, {mem_clean_start}, {mem_clean_size}); + ret = SetAtomicCleanAttr(peer_out_node, {mem_clean_start}, {mem_clean_size}, memory_type); if (ret != SUCCESS) { GELOGE(ret, "Failed to set attr for atomic addr clean node %s.", peer_out_node->GetName().c_str()); return ret; @@ -362,23 +409,12 @@ Status GraphMemoryAssigner::ReAssignContinuousMemory(bool is_loop_graph) { } } - // Get the reference type of the node, default is false - bool is_ref = false; - // If GetBool fail, is_ref is false. - (void) ge::AttrUtils::GetBool(node->GetOpDesc(), ATTR_NAME_REFERENCE, is_ref); - - // Get the continuous output type of the node, default is false - bool is_output_continuous = false; - // If GetBool fail, is_output_continuous is false. - (void) ge::AttrUtils::GetBool(node->GetOpDesc(), ATTR_NAME_CONTINUOUS_OUTPUT, is_output_continuous); - - // If the output is ref type and refers to the ref of an input, the name of the output - // and the input are the same. Ge encounters ref type, finds matching relationship according - // to the names of input and output, and allocates the same memory address, eg: HCOMBroadcast - if (!is_ref && is_output_continuous) { // Assign continuous output memory - ret = AssignContinuousOutputMemory(node); + // Assign continuous output memory + bool continuous_output = ((continuous_type & kTypeOutput) != 0) || ((continuous_type & kTypeOutputNoPadding) != 0); + if (continuous_output) { + ret = AssignContinuousOutputMemory(node, memory_type, continuous_type); if (ret != ge::SUCCESS) { - GELOGE(ret, "Assign reference memory failed!"); + GELOGE(ret, "Assign continuous output memory failed!"); return ret; } } @@ -391,520 +427,181 @@ Status GraphMemoryAssigner::ReAssignContinuousMemory(bool is_loop_graph) { } Status GraphMemoryAssigner::AssignContinuousInputMemory(const ge::NodePtr &node, int64_t &continuous_mem_start, - int64_t &continuous_mem_size, int64_t memory_type) { + int64_t &continuous_mem_size, int64_t memory_type, uint32_t continuous_type) { GELOGI("Current node %s needs continuous input.", node->GetName().c_str()); - bool continuous_input_alloc = false; - (void) ge::AttrUtils::GetBool(node->GetOpDesc(), ATTR_NAME_CONTINUOUS_INPUT_ALLOC, continuous_input_alloc); auto iter = memory_offset_.find(memory_type); if (iter == memory_offset_.end()) { std::string error = "Memory offset does not have memory type" + FmtToStr(memory_type); GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); return FAILED; } + // The head and tail of hcom continuous input should be added 512 + iter->second.mem_offset_ += MEM_ALIGN_SIZE; continuous_mem_start = iter->second.mem_offset_; + int64_t mem_offset = iter->second.mem_offset_; + int64_t extra_memory_size = 0; + bool is_continuous_input_allocated = false; + (void) ge::AttrUtils::GetBool(node->GetOpDesc(), ATTR_NAME_CONTINUOUS_INPUT_ALLOC, is_continuous_input_allocated); for (auto &in_data_anchor : node->GetAllInDataAnchors()) { + GE_IF_BOOL_EXEC(in_data_anchor == nullptr, continue); auto peer_out_data_anchor = in_data_anchor->GetPeerOutAnchor(); GE_IF_BOOL_EXEC(peer_out_data_anchor == nullptr, continue); - auto peer_op_desc = peer_out_data_anchor->GetOwnerNode()->GetOpDesc(); GE_IF_BOOL_EXEC(peer_op_desc == nullptr, continue); - bool is_peer_output_continuous = false; - // If GetBool fail, is_peer_output_continuous is false. - (void) ge::AttrUtils::GetBool(peer_op_desc, ATTR_NAME_CONTINUOUS_OUTPUT, is_peer_output_continuous); - - // Get peer node output size, if size == 1(peer node has only one output), continuous input of the node and - // continuous output of the previous node is the same, we can support it. If size != 1, there may be - // conflict between the two, we can not support it. - auto peer_output_size = peer_op_desc->GetOutputsSize(); - GE_IF_BOOL_EXEC(is_peer_output_continuous && (peer_output_size != 1), - std::string error = "Current op" + FmtToStr(node->GetOpDesc()->GetName()) + - " requires continuous input, while the previous op" + FmtToStr(peer_op_desc->GetName()) + - " requires continuous output. There may be conflict between the two." + - "This node is not supported now."; - GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); - return PARAM_INVALID;); - - bool is_peer_reference = false; - // If GetBool fail, is_peer_reference is false. - (void) AttrUtils::GetBool(peer_op_desc, ATTR_NAME_REFERENCE, is_peer_reference); - GE_IF_BOOL_EXEC(is_peer_reference, - std::string error = "Current op" + FmtToStr(node->GetOpDesc()->GetName()) + - " requires continuous input, while the previous op" + FmtToStr(peer_op_desc->GetName()) + - " requires continuous output. There may be conflict between the two." + - "This node is not supported now."; - GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); - return PARAM_INVALID;); - - vector output_list = peer_op_desc->GetOutputOffset(); - std::vector offsets_for_fusion = {}; - bool has_offset_attr = - AttrUtils::GetListInt(peer_op_desc, ATTR_NAME_OUTPUT_OFFSET_FOR_BUFFER_FUSION, offsets_for_fusion); - if (peer_out_data_anchor->GetIdx() < static_cast(output_list.size())) { - if (continuous_input_alloc && !has_offset_attr) { - if (in_data_anchor->GetIdx() == 0) { - continuous_mem_start = output_list.at(peer_out_data_anchor->GetIdx()); - } - // can not use else if, incase only one input - if (in_data_anchor->GetIdx() == static_cast(node->GetAllInDataAnchors().size()) - 1) { - int64_t tensor_desc_size = 0; - Status ret = ge::TensorUtils::GetSize(*(peer_op_desc->GetOutputDescPtr(peer_out_data_anchor->GetIdx())), - tensor_desc_size); - GE_IF_BOOL_EXEC(ret != ge::SUCCESS, GELOGE(FAILED, "GetSize failed."); return FAILED;); - - tensor_desc_size = (tensor_desc_size + MEM_ALIGN_SIZE - 1) / MEM_ALIGN_SIZE * MEM_ALIGN_SIZE; - continuous_mem_size = - output_list.at(peer_out_data_anchor->GetIdx()) - continuous_mem_start + tensor_desc_size + MEM_ALIGN_SIZE; - } - GELOGI( - "[IMAS]Check Continuous input : Set %s name[%s] output[%d] offset to [%ld] stream_id[%ld] size[%u] " - "real_size[%u].", - node->GetOwnerComputeGraph()->GetName().c_str(), peer_op_desc->GetName().c_str(), - peer_out_data_anchor->GetIdx(), output_list.at(peer_out_data_anchor->GetIdx()), peer_op_desc->GetStreamId(), - 0, 0); - continue; - } - - output_list.at(peer_out_data_anchor->GetIdx()) = iter->second.mem_offset_; - } else { - std::string error = "index" + FmtToStr(peer_out_data_anchor->GetIdx()) + " is out of range."; - GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); - GELOGE(FAILED, "index : %d is out of range.", peer_out_data_anchor->GetIdx()); - return FAILED; - } - peer_op_desc->SetOutputOffset(output_list); - size_t pre_mem_offset = iter->second.mem_offset_; + GE_IF_BOOL_EXEC(IsContinuousInputConflict(node, peer_op_desc), return PARAM_INVALID;); int64_t tensor_desc_size = 0; - if (has_offset_attr) { - if (peer_out_data_anchor->GetIdx() < static_cast(offsets_for_fusion.size())) { - auto offset_for_fusion = offsets_for_fusion[peer_out_data_anchor->GetIdx()]; - iter->second.mem_offset_ += offset_for_fusion; - } else { + int64_t nopadding_size = 0; + int64_t real_size = 0; + std::vector offsets_of_fusion = {}; + bool lx_fusion = AttrUtils::GetListInt(peer_op_desc, ATTR_NAME_OUTPUT_OFFSET_FOR_BUFFER_FUSION, offsets_of_fusion); + lx_fusion = lx_fusion && !offsets_of_fusion.empty(); + if (lx_fusion) { + if (peer_out_data_anchor->GetIdx() >= static_cast(offsets_of_fusion.size())) { std::string error = "fusion: peer node" + FmtToStr(peer_op_desc->GetName()) + " index" + FmtToStr(peer_out_data_anchor->GetIdx()) + " is out of range."; GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); return FAILED; } + nopadding_size = offsets_of_fusion[peer_out_data_anchor->GetIdx()]; + tensor_desc_size = nopadding_size; } else { - Status ret = - TensorUtils::GetSize(*(peer_op_desc->GetOutputDescPtr(peer_out_data_anchor->GetIdx())), tensor_desc_size); - GE_IF_BOOL_EXEC(ret != ge::SUCCESS, GELOGE(FAILED, "GetSize failed."); return FAILED;); - - iter->second.mem_offset_ += tensor_desc_size; - } - - // If set tensor_actual_size, Memory alignment is not required. - int32_t is_tensor_actual_size = 0; - ge::AttrUtils::GetInt(peer_op_desc, ATTR_NAME_GET_TENSOR_ACTUAL_SIZE, is_tensor_actual_size); - if (is_tensor_actual_size == 0) { - AlignMemOffset(MEM_ALIGN_SIZE, memory_type); + if (GetMemorySize(node->GetOpDesc(), peer_op_desc->GetOutputDescPtr(peer_out_data_anchor->GetIdx()), + continuous_type, tensor_desc_size, nopadding_size) != ge::SUCCESS) { + return FAILED; + } } - GELOGI( - "[IMAS]Continuous input : Set %s name[%s] output[%d] offset to [%zu] stream_id[%ld] size[%zu] " - "real_size[%ld].", node->GetOwnerComputeGraph()->GetName().c_str(), peer_op_desc->GetName().c_str(), - peer_out_data_anchor->GetIdx(), pre_mem_offset, peer_op_desc->GetStreamId(), - (iter->second.mem_offset_ - pre_mem_offset), tensor_desc_size); - } - - iter->second.mem_offset_ += MEM_ALIGN_SIZE; - if (!continuous_input_alloc) { - continuous_mem_size = iter->second.mem_offset_ - continuous_mem_start; - } - return SUCCESS; -} - -Status GraphMemoryAssigner::AssignContinuousOutputMemory(const ge::NodePtr &node) { - GELOGI("Current node %s needs continuous output.", node->GetName().c_str()); - auto out_op_desc = node->GetOpDesc(); - GE_IF_BOOL_EXEC(out_op_desc == nullptr, GELOGE(ge::FAILED, "out_op_desc is null."); return ge::FAILED); - vector output_list = out_op_desc->GetOutputOffset(); - if ((out_op_desc->GetOutputsSize() > output_list.size()) || (output_list.size() == 0)) { - GELOGE(ge::FAILED, "The size %zu of node output desc is more than output_list's size %zu.", - out_op_desc->GetOutputsSize(), output_list.size()); - return ge::FAILED; - } - - size_t mem_offset = output_list[0]; - for (auto &out_data_anchor : node->GetAllOutDataAnchors()) { - output_list[out_data_anchor->GetIdx()] = mem_offset; - int64_t tensor_desc_size = 0; - if (ge::TensorUtils::GetSize(*(out_op_desc->GetOutputDescPtr(out_data_anchor->GetIdx())), tensor_desc_size) != - ge::SUCCESS) { - GELOGE(FAILED, "GetSize failed."); - return FAILED; - } - mem_offset += tensor_desc_size; - if (mem_offset <= 0) { + bool is_nopadding = ((continuous_type & kTypeInputNoPadding) != 0) || lx_fusion; + vector output_list = peer_op_desc->GetOutputOffset(); + if (peer_out_data_anchor->GetIdx() >= static_cast(output_list.size())) { + std::string error = "index" + FmtToStr(peer_out_data_anchor->GetIdx()) + " is out of range."; + GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); return FAILED; } - mem_offset = (mem_offset + MEM_ALIGN_SIZE - 1) / MEM_ALIGN_SIZE * MEM_ALIGN_SIZE; - GELOGI( - "[IMAS]Continuous output : Set %s name[%s] output[%d] offset to [%zu] stream_id[%ld] size[%ld] " - "real_size[%ld].", - node->GetOwnerComputeGraph()->GetName().c_str(), out_op_desc->GetName().c_str(), out_data_anchor->GetIdx(), - output_list[out_data_anchor->GetIdx()], out_op_desc->GetStreamId(), tensor_desc_size, tensor_desc_size); - } - out_op_desc->SetOutputOffset(output_list); - return ge::SUCCESS; -} -Status GraphMemoryAssigner::ReAssignVirtualInputNodeMemory(NodePtr node, size_t &mem_offset_reuse) { - OpDescPtr op_desc = node->GetOpDesc(); - vector output_list = op_desc->GetOutputOffset(); - if (output_list.empty()) { - GELOGE(FAILED, "Outputoffset is empty node name:%s", node->GetName().c_str()); - return FAILED; - } - output_list.at(0) = mem_offset_reuse; - op_desc->SetOutputOffset(output_list); - GELOGI("Set virtual input node %s output offset to %zu.", op_desc->GetName().c_str(), mem_offset_reuse); - - int64_t attr_dim_index; - bool get_attr_dim_flag = ge::AttrUtils::GetInt(op_desc, ATTR_NAME_REUSE_INPUT_ON_DIM_INDEX, attr_dim_index); - if (!get_attr_dim_flag) { - GELOGE(FAILED, "Get attr _reuse_input_on_dim_index failed."); - return FAILED; - } - - size_t extra_memory_size = 0; - for (const auto &in_data_anchor : node->GetAllInDataAnchors()) { - auto peer_out_data_anchor = in_data_anchor->GetPeerOutAnchor(); - GE_CHECK_NOTNULL(peer_out_data_anchor); - auto peer_op_desc = peer_out_data_anchor->GetOwnerNode()->GetOpDesc(); - GE_CHECK_NOTNULL(peer_op_desc); - vector output_offsets = peer_op_desc->GetOutputOffset(); - if (peer_out_data_anchor->GetIdx() >= static_cast(output_offsets.size())) { - GELOGE(ge::FAILED, "Index : %d is out of range.", peer_out_data_anchor->GetIdx()); - return ge::FAILED; + // when continuous input has been allocated first input is beginning offset + bool is_allocated_first_input = is_continuous_input_allocated && (in_data_anchor->GetIdx() == 0); + if (is_allocated_first_input) { + mem_offset = output_list.at(peer_out_data_anchor->GetIdx()); + continuous_mem_start = output_list.at(peer_out_data_anchor->GetIdx()); + } else { + // set offset for input + output_list.at(peer_out_data_anchor->GetIdx()) = mem_offset; + peer_op_desc->SetOutputOffset(output_list); } - output_offsets.at(peer_out_data_anchor->GetIdx()) = mem_offset_reuse; - peer_op_desc->SetOutputOffset(output_offsets); - size_t pre_mem_offset = mem_offset_reuse; - // Calculate tensor real size of each piece of data and out size of complete data - ge::ConstGeTensorDescPtr output_desc = peer_op_desc->GetOutputDescPtr(peer_out_data_anchor->GetIdx()); - GE_CHECK_NOTNULL(output_desc); - int64_t output_mem_size; - int64_t batch_dim_num = 1; - int64_t out_size; - if (CalculateTensorRealSizeAndOutSize(output_desc, attr_dim_index, output_mem_size, batch_dim_num, out_size) != - SUCCESS) { - GELOGE(FAILED, "CalculateTensorRealSizeAndOutSize failed for node %s output [%d].", - peer_op_desc->GetName().c_str(), peer_out_data_anchor->GetIdx()); - return FAILED; + int64_t align_size = tensor_desc_size; + if (is_nopadding) { + mem_offset += nopadding_size; + extra_memory_size += (tensor_desc_size - nopadding_size); + real_size = nopadding_size; + } else { + ge::AlignMemOffset(align_size); + mem_offset += align_size; + // The head and tail of hcom continuous input should be added 512 + extra_memory_size = MEM_ALIGN_SIZE; + real_size = tensor_desc_size; } - mem_offset_reuse += output_mem_size; - extra_memory_size = extra_memory_size + out_size - output_mem_size; - - GELOGI("[IMAS]Virtual node optimize: set %s name[%s] output[%d] offset to [%zu] stream_id[%ld] size[%ld] " - "real_size[%ld].", - node->GetOwnerComputeGraph()->GetName().c_str(), peer_op_desc->GetName().c_str(), - peer_out_data_anchor->GetIdx(), pre_mem_offset, peer_op_desc->GetStreamId(), out_size, - output_mem_size); - } - mem_offset_reuse += extra_memory_size; - size_t after_mem_offset = mem_offset_reuse; - GELOGI("After reassign virtual input node[name: %s, type: %s] memory, memory offset = %zu.", - op_desc->GetName().c_str(), op_desc->GetType().c_str(), after_mem_offset); - return SUCCESS; -} - -Status GraphMemoryAssigner::ReAssignReuseAndNoPaddingContinuousInputMemory() { - map> mem_reuse_virtual_input_nodes_map; - int64_t memory_type = RT_MEMORY_HBM; - for (const auto &n : compute_graph_->GetAllNodes()) { - OpDescPtr op_desc = n->GetOpDesc(); - GE_CHECK_NOTNULL(op_desc); - bool attr_continuous = false; - bool get_continuous_flag = ge::AttrUtils::GetBool(op_desc, ATTR_NAME_NOPADDING_CONTINUOUS_INPUT, attr_continuous); - GE_IF_BOOL_EXEC(!get_continuous_flag, continue); - bool attr_reuse = false; - bool get_reuse_flag = ge::AttrUtils::GetBool(op_desc, ATTR_NAME_OUTPUT_REUSE_INPUT, attr_reuse); - GE_IF_BOOL_EXEC(!get_reuse_flag, continue); - if (attr_reuse && attr_continuous) { - if (op_desc->GetOutputsSize() != kVirtualInputNodeOutputSize) { - // When current virtual node has several outputs, can't directly determine which input is the tensor for reuse. - std::string error = "Only one output is supported, current virtual node" + FmtToStr(n->GetName()) + - " has " + FmtToStr(op_desc->GetOutputsSize()) + " outputs."; - GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); - return FAILED; - } - GE_CHK_STATUS_RET(GetNodeMemoryType(n, memory_type, "input"), "Get node memory type failed."); - auto iter = memory_offset_.find(memory_type); - if (iter == memory_offset_.end()) { - std::string error = "Memory offset does not have memory type" + FmtToStr(memory_type); - GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); - return FAILED; - } - GELOGD("Start to reassign memory for virtual input node, memory offset = %zu, memory type = %ld.", - iter->second.mem_offset_, memory_type); - string batch_label_string; - // Not all ops have ATTR_NAME_BATCH_LABEL, no need to check return value, only check out parameter - (void) ge::AttrUtils::GetStr(op_desc, ATTR_NAME_BATCH_LABEL, batch_label_string); - if (batch_label_string.empty()) { - size_t node_mem_offset = iter->second.mem_offset_; - // No ATTR_NAME_BATCH_LABEL, no need to reuse memory. - Status status = ReAssignVirtualInputNodeMemory(n, node_mem_offset); - if (status != SUCCESS) { - GELOGE(FAILED, "Reassign memory of virtual input node failed, node name: %s.", n->GetName().c_str()); - return FAILED; - } - - iter->second.mem_offset_ = node_mem_offset; - AlignMemOffset(MEM_ALIGN_SIZE, memory_type); - GELOGD("After reassign memory for virtual input node, align memory = %zu, memory type = %ld.", - iter->second.mem_offset_, memory_type); - } else { - // Has ATTR_NAME_BATCH_LABEL, for dynamic multi-batch node, need to reuse memory. - string current_node_full_name = op_desc->GetName(); - size_t pos = current_node_full_name.find(kMbatchNodeNameFlag); - if (pos == string::npos) { - GELOGE(FAILED, "Cannot find key string [%s] of multi-batch in name of virtual input node, node name: %s.", - kMbatchNodeNameFlag, n->GetName().c_str()); - return FAILED; - } - string fixed_name = current_node_full_name.substr(0, pos); - vector parallel_virtual_input_nodes; - if (mem_reuse_virtual_input_nodes_map.count(fixed_name) != 0) { - parallel_virtual_input_nodes = mem_reuse_virtual_input_nodes_map[fixed_name]; - } - parallel_virtual_input_nodes.emplace_back(n); - mem_reuse_virtual_input_nodes_map[fixed_name] = parallel_virtual_input_nodes; - } - } + GELOGI("[IMAS]Continuous input : Set %s name[%s] optype[%s] output[%d] offset to [%zu] stream_id[%ld] memtype[%ld] " + "size[%zu] realsize[%ld] nopadding[%d].", node->GetOwnerComputeGraph()->GetName().c_str(), + node->GetType().c_str(), peer_op_desc->GetName().c_str(),peer_out_data_anchor->GetIdx(), + output_list.at(peer_out_data_anchor->GetIdx()), peer_op_desc->GetStreamId(), memory_type, + is_continuous_input_allocated ? 0UL : align_size, real_size, is_nopadding); } - int32_t mem_reuse_model = 0; - if (ReAssignVirtualNodesMemory(mem_reuse_virtual_input_nodes_map, mem_reuse_model) != SUCCESS) { - GELOGE(FAILED, "Reassign memory of virtual input nodes failed."); - return FAILED; + mem_offset += extra_memory_size; + ge::AlignMemOffset(mem_offset); + continuous_mem_size = mem_offset - continuous_mem_start; + if (is_continuous_input_allocated) { + // not allocate memory here, so no need add 512 in header + iter->second.mem_offset_ -= MEM_ALIGN_SIZE; + } else { + iter->second.mem_offset_ = mem_offset; } return SUCCESS; } -Status GraphMemoryAssigner::ReAssignVirtualOutputNodeMemory(NodePtr node, size_t &mem_offset_reuse) { - OpDescPtr op_desc = node->GetOpDesc(); - - // 1. set memory of to be reused input tensor +Status GetFirstInputPeerOutOutputOffset(const ge::NodePtr &node, int64_t &mem_offset) { auto in_data_anchor_list = node->GetAllInDataAnchors(); + if (in_data_anchor_list.empty()) { + GELOGE(FAILED, "Node %s's in data anchor is empty.", node->GetName().c_str()); + return FAILED; + } auto peer_out_data_anchor = in_data_anchor_list.at(0)->GetPeerOutAnchor(); - GE_CHECK_NOTNULL(peer_out_data_anchor); + GE_IF_BOOL_EXEC(peer_out_data_anchor == nullptr, GELOGE(ge::FAILED, "peer_out_data_anchor is null."); + return ge::FAILED); auto peer_op_desc = peer_out_data_anchor->GetOwnerNode()->GetOpDesc(); - GE_CHECK_NOTNULL(peer_op_desc); + GE_IF_BOOL_EXEC(peer_op_desc == nullptr, GELOGE(ge::FAILED, "peer_op_desc is null."); return ge::FAILED); vector in_node_output_offsets = peer_op_desc->GetOutputOffset(); if (peer_out_data_anchor->GetIdx() >= static_cast(in_node_output_offsets.size())) { GELOGE(FAILED, "Index : %d is out of range.", peer_out_data_anchor->GetIdx()); return FAILED; } - in_node_output_offsets.at(peer_out_data_anchor->GetIdx()) = mem_offset_reuse; - peer_op_desc->SetOutputOffset(in_node_output_offsets); - GELOGI("Set virtual output node %s input data offset to %zu.", op_desc->GetName().c_str(), mem_offset_reuse); + mem_offset = in_node_output_offsets.at(peer_out_data_anchor->GetIdx()); + return SUCCESS; +} - // 2. set memory of output tensor - vector output_list = op_desc->GetOutputOffset(); - if (output_list.empty()) { - GELOGE(FAILED, "Outputoffset is empty, node name: %s", node->GetName().c_str()); - return FAILED; - } - if (op_desc->GetOutputsSize() > output_list.size()) { - GELOGE(FAILED, "The size %zu of op_desc is more than output_list's size %zu.", op_desc->GetOutputsSize(), - output_list.size()); - return FAILED; - } - int64_t attr_dim_index; - bool get_attr_dim_flag = ge::AttrUtils::GetInt(op_desc, ATTR_NAME_REUSE_INPUT_ON_DIM_INDEX, attr_dim_index); - if (!get_attr_dim_flag) { - GELOGE(FAILED, "Get attr _reuse_input_on_dim_index failed."); - return FAILED; +Status GraphMemoryAssigner::AssignContinuousOutputMemory(const ge::NodePtr &node, int64_t memory_type, + uint32_t continuous_type) { + GELOGI("Current node %s needs continuous output.", node->GetName().c_str()); + auto out_op_desc = node->GetOpDesc(); + GE_IF_BOOL_EXEC(out_op_desc == nullptr, GELOGE(ge::FAILED, "out_op_desc is null."); return ge::FAILED); + vector output_list = out_op_desc->GetOutputOffset(); + if ((out_op_desc->GetOutputsSize() > output_list.size()) || (output_list.size() == 0)) { + GELOGE(ge::FAILED, "The size %zu of node output desc is more than output_list's size %zu.", + out_op_desc->GetOutputsSize(), output_list.size()); + return ge::FAILED; } - size_t extra_memory_size = 0; - for (auto &out_data_anchor : node->GetAllOutDataAnchors()) { - output_list[out_data_anchor->GetIdx()] = mem_offset_reuse; - size_t pre_mem_offset = mem_offset_reuse; - - // calculate tensor real size of each piece of data and out size of complete data - ge::ConstGeTensorDescPtr output_desc = op_desc->GetOutputDescPtr(out_data_anchor->GetIdx()); - GE_CHECK_NOTNULL(output_desc); - int64_t output_mem_size; - int64_t batch_dim_num = 1; - int64_t out_size; - if (CalculateTensorRealSizeAndOutSize(output_desc, attr_dim_index, output_mem_size, batch_dim_num, out_size) != - SUCCESS) { - GELOGE(FAILED, "CalculateTensorRealSizeAndOutSize failed for node %s output [%d].", - op_desc->GetName().c_str(), out_data_anchor->GetIdx()); - return FAILED; + int64_t mem_offset = 0; + bool is_nopadding = ((continuous_type & kTypeOutputNoPadding) != 0); + if (is_nopadding) { + // out tensor memory must be reused input tensor memory + if (GetFirstInputPeerOutOutputOffset(node, mem_offset) != SUCCESS) { + return ge::FAILED; } + } else { + // Get the reference type of the node, default is false + bool is_ref = false; + // If GetBool fail, is_ref is false. + (void) ge::AttrUtils::GetBool(node->GetOpDesc(), ATTR_NAME_REFERENCE, is_ref); - mem_offset_reuse += output_mem_size; - extra_memory_size = extra_memory_size + out_size - output_mem_size; - - GELOGI("[IMAS]Virtual node optimize: set %s name[%s] output[%d] offset to [%zu], size[%ld], real_size[%ld].", - node->GetOwnerComputeGraph()->GetName().c_str(), op_desc->GetName().c_str(), out_data_anchor->GetIdx(), - pre_mem_offset, out_size, output_mem_size); - } - op_desc->SetOutputOffset(output_list); - mem_offset_reuse += extra_memory_size; - size_t after_mem_offset = mem_offset_reuse; - GELOGI("After reassign virtual output node[name: %s, type: %s] memory, memory offset = %zu.", - op_desc->GetName().c_str(), op_desc->GetType().c_str(), after_mem_offset); - return SUCCESS; -} - -Status GraphMemoryAssigner::ReAssignReuseAndNoPaddingContinuousOutputMemory() { - map> mem_reuse_virtual_output_nodes_map; - int64_t memory_type = RT_MEMORY_HBM; - for (const auto &n : compute_graph_->GetAllNodes()) { - OpDescPtr op_desc = n->GetOpDesc(); - GE_CHECK_NOTNULL(op_desc); - bool attr_continuous = false; - bool get_continuous_flag = ge::AttrUtils::GetBool(op_desc, ATTR_NAME_NOPADDING_CONTINUOUS_OUTPUT, attr_continuous); - GE_IF_BOOL_EXEC(!get_continuous_flag, continue); - bool attr_reuse = false; - bool get_reuse_flag = ge::AttrUtils::GetBool(op_desc, ATTR_NAME_OUTPUT_REUSE_INPUT, attr_reuse); - GE_IF_BOOL_EXEC(!get_reuse_flag, continue); - - if (attr_reuse && attr_continuous) { - auto in_data_anchor_list = n->GetAllInDataAnchors(); - if (in_data_anchor_list.size() != kVirtualOutputNodeInputSize) { - // When current virtual node has several inputs, can't directly determine which input is the tensor for reuse. - std::string error = "Only one input is supported, current virtual node" + FmtToStr(n->GetName()) + - " has " + FmtToStr(in_data_anchor_list.size()) + " inputs."; - GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); - return FAILED; - } - GE_CHK_STATUS_RET(GetNodeMemoryType(n, memory_type, "output"), "Get node memory type failed."); - auto iter = memory_offset_.find(memory_type); - if (iter == memory_offset_.end()) { - std::string error = "Memory offset does not have memory type" + FmtToStr(RT_MEMORY_HBM); - GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); - return FAILED; - } - GELOGD("Start to reassign memory for virtual output node, memory offset = %zu, memory type = %ld.", - iter->second.mem_offset_, memory_type); - string batch_label_string; - // Not all ops have ATTR_NAME_BATCH_LABEL, no need to check return value, only check out parameter - (void) ge::AttrUtils::GetStr(op_desc, ATTR_NAME_BATCH_LABEL, batch_label_string); - if (batch_label_string.empty()) { - size_t node_mem_offset = iter->second.mem_offset_; - // No ATTR_NAME_BATCH_LABEL, no need to reuse memory. - Status status = ReAssignVirtualOutputNodeMemory(n, node_mem_offset); - if (status != SUCCESS) { - GELOGE(FAILED, "Reassign memory of virtual output node failed, node name: %s.", n->GetName().c_str()); - return FAILED; - } - iter->second.mem_offset_ = node_mem_offset; - AlignMemOffset(MEM_ALIGN_SIZE, memory_type); - GELOGD("After reassign memory for virtual output node, align memory = %zu, memory type = %ld.", - iter->second.mem_offset_, memory_type); - } else { - // Has ATTR_NAME_BATCH_LABEL, for dynamic multi-batch node, need to reuse memory. - string current_node_full_name = op_desc->GetName(); - size_t pos = current_node_full_name.find(kMbatchNodeNameFlag); - if (pos == string::npos) { - std::string error = "Cannot find key string" + FmtToStr(kMbatchNodeNameFlag) + - " of multi-batch in name of virtual output node, the node name is " + FmtToStr(n->GetName()); - GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); - return FAILED; - } - string fixed_name = current_node_full_name.substr(0, pos); - vector parallel_virtual_output_nodes; - if (mem_reuse_virtual_output_nodes_map.count(fixed_name) != 0) { - parallel_virtual_output_nodes = mem_reuse_virtual_output_nodes_map[fixed_name]; - } - parallel_virtual_output_nodes.emplace_back(n); - mem_reuse_virtual_output_nodes_map[fixed_name] = parallel_virtual_output_nodes; - } + // If the output is ref type and refers to the ref of an input, the name of the output + // and the input are the same. Ge encounters ref type, finds matching relationship according + // to the names of input and output, and allocates the same memory address, eg: HCOMBroadcast + if (is_ref) { + GELOGI("Current node %s no needs assign continuous output because reference input by name.", + node->GetName().c_str()); + return SUCCESS; } + mem_offset = output_list[0]; } - int32_t mem_reuse_model = 1; - if (ReAssignVirtualNodesMemory(mem_reuse_virtual_output_nodes_map, mem_reuse_model) != SUCCESS) { - GELOGE(FAILED, "Reassign memory of virtual output nodes failed."); - return FAILED; - } - return SUCCESS; -} - -Status GraphMemoryAssigner::ReAssignVirtualNodesMemory(map> &mem_reuse_nodes_map, - int32_t mem_reuse_model) { - // Find max batch label value - string max_batch_label; - GE_CHK_STATUS_RET(GetMaxBatchLabel(mem_reuse_nodes_map, mem_reuse_model, max_batch_label), - "Get max batch label failed."); - PrintMemoryOffset(); - vector nodes_mem_offset_list; - for (auto &i_map : mem_reuse_nodes_map) { - vector virtual_nodes_list = i_map.second; - int64_t memory_type = RT_MEMORY_HBM; - GE_CHK_STATUS_RET(GetNodeListMemoryType(virtual_nodes_list, mem_reuse_model, memory_type), - "Get node list memory type failed."); - auto iter = memory_offset_.find(memory_type); - if (iter == memory_offset_.end()) { - std::string error = "Memory offset does not have memory type" + FmtToStr(RT_MEMORY_HBM); - GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); + for (auto &out_data_anchor : node->GetAllOutDataAnchors()) { + output_list[out_data_anchor->GetIdx()] = mem_offset; + int64_t tensor_desc_size = 0; + int64_t nopadding_size = 0; + if (GetMemorySize(out_op_desc, out_op_desc->GetOutputDescPtr(out_data_anchor->GetIdx()), continuous_type, + tensor_desc_size, nopadding_size) != ge::SUCCESS) { return FAILED; } - size_t max_batch_node_mem_offset = iter->second.mem_offset_; - nodes_mem_offset_list.emplace_back(max_batch_node_mem_offset); - for (auto &i_node : virtual_nodes_list) { - // Op_desc is not nullptr, it has been checked. - OpDescPtr op_desc = i_node->GetOpDesc(); - string batch_label_string; - // All ops must have ATTR_NAME_BATCH_LABEL, no need to check return value. - (void) ge::AttrUtils::GetStr(op_desc, ATTR_NAME_BATCH_LABEL, batch_label_string); - if (batch_label_string == max_batch_label) { - Status status = SUCCESS; - if (mem_reuse_model == kVirtualInputNodeMemoryReuse) { - status = ReAssignVirtualInputNodeMemory(i_node, max_batch_node_mem_offset); - } else if (mem_reuse_model == kVirtualOutputNodeMemoryReuse) { - status = ReAssignVirtualOutputNodeMemory(i_node, max_batch_node_mem_offset); - } else { - std::string error = "Invalid parameter memory reuse model, which is " + FmtToStr(mem_reuse_model); - GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); - return FAILED; - } - if (status != SUCCESS) { - GELOGE(FAILED, "Reassign memory of virtual node failed, node name: %s.", i_node->GetName().c_str()); - return FAILED; - } - iter->second.mem_offset_ = max_batch_node_mem_offset; - AlignMemOffset(MEM_ALIGN_SIZE, memory_type); - GELOGD("After reassign memory for virtual node, align memory = %zu, memory type = %ld.", - iter->second.mem_offset_, memory_type); - // Only assign memory of max batch nodes. - break; - } - } - } - PrintMemoryOffset(); - size_t memory_reuse_index = 0; - for (auto &i_map : mem_reuse_nodes_map) { - vector virtual_nodes_list = i_map.second; - for (auto &i_node : virtual_nodes_list) { - size_t remaining_batch_node_mem_offset = nodes_mem_offset_list[memory_reuse_index]; - Status status = SUCCESS; - if (mem_reuse_model == kVirtualInputNodeMemoryReuse) { - status = ReAssignVirtualInputNodeMemory(i_node, remaining_batch_node_mem_offset); - } else if (mem_reuse_model == kVirtualOutputNodeMemoryReuse) { - status = ReAssignVirtualOutputNodeMemory(i_node, remaining_batch_node_mem_offset); - } else { - std::string error = "Invalid parameter memory reuse model, which is " + FmtToStr(mem_reuse_model); - GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); - return FAILED; - } - - if (status != SUCCESS) { - GELOGE(FAILED, "Reassign memory of virtual node failed, node name: %s.", i_node->GetName().c_str()); - return FAILED; - } + if (is_nopadding) { + mem_offset += nopadding_size; + } else { + mem_offset += tensor_desc_size; + ge::AlignMemOffset(mem_offset); } - memory_reuse_index++; + GELOGI("[IMAS]Continuous output : Set %s name[%s] optype[%s] output[%d] offset to [%zu] stream_id[%ld] memtype[%ld]" + " size[%zu] realsize[%ld] nopadding[%d].", node->GetOwnerComputeGraph()->GetName().c_str(), + node->GetType().c_str(), out_op_desc->GetName().c_str(), out_data_anchor->GetIdx(), + output_list[out_data_anchor->GetIdx()], out_op_desc->GetStreamId(), memory_type, 0UL, + is_nopadding ? nopadding_size : tensor_desc_size, is_nopadding); } - return SUCCESS; + out_op_desc->SetOutputOffset(output_list); + return ge::SUCCESS; } Status GraphMemoryAssigner::ReAssignAtomicMemory(bool is_loop_graph) { @@ -946,7 +643,7 @@ Status GraphMemoryAssigner::ReAssignAtomicMemory(bool is_loop_graph) { GE_CHECK_NOTNULL(mem_assigner_); GE_CHECK_NOTNULL(mem_assigner_->GetPriorityAssinger()); if ((atomic_mem_size != 0) && (iter_batch.first == mem_assigner_->GetPriorityAssinger()->GetMaxBatchLabel())) { - GE_CHK_STATUS_RET(SetAtomicCleanAttr(iter.first, {atomic_mem_start}, {atomic_mem_size}), + GE_CHK_STATUS_RET(SetAtomicCleanAttr(iter.first, {atomic_mem_start}, {atomic_mem_size}, RT_MEMORY_HBM), "Failed to set attr for atomic addr clean node %s.", iter.first->GetName().c_str()); } } @@ -1084,7 +781,7 @@ Status GraphMemoryAssigner::AssignConnectNetOutputAtomicMemory(vector & } // All atomic nodes use atomic_addr_clean op independently, so we need to set the attr separately. - if (SetIndependentAtomicAttr(node, original_atomic_mem_start, mem_offset_end) != SUCCESS) { + if (SetIndependentAtomicAttr(node, original_atomic_mem_start, mem_offset_end, RT_MEMORY_HBM) != SUCCESS) { GELOGE(FAILED, "Failed to set atomic attr separately."); return FAILED; } @@ -1231,9 +928,10 @@ Status GraphMemoryAssigner::AssignAtomicOutputMemory(const ge::NodePtr &node, ve output_list[output_index] = iter->second.mem_offset_; std::string batch_label; (void)ge::AttrUtils::GetStr(op_desc, ATTR_NAME_BATCH_LABEL, batch_label); - GELOGI("[IMAS]Atomic output : Set %s name[%s] output[%ld] offset to [%zu] stream_id[%ld] size[%ld] real_size[%ld]" - " batch[%s].", compute_graph_->GetName().c_str(), op_desc->GetName().c_str(), output_index, - iter->second.mem_offset_, op_desc->GetStreamId(), size, size, batch_label.c_str()); + GELOGI("[IMAS]Atomic output : Set %s name[%s] optype[%s] output[%ld] offset to [%zu] stream_id[%ld] memtype[%ld] " + "size[%ld] real_size[%ld] batch[%s].", compute_graph_->GetName().c_str(), op_desc->GetName().c_str(), + node->GetType().c_str(), output_index, iter->second.mem_offset_, op_desc->GetStreamId(), RT_MEMORY_HBM, + size, size, batch_label.c_str()); iter->second.mem_offset_ += size; AlignMemOffset(MEM_ALIGN_SIZE, RT_MEMORY_HBM); @@ -1309,10 +1007,10 @@ Status GraphMemoryAssigner::AssignOrdinaryAtomicWorkspaceMemory(const ge::OpDesc std::string batch_label; (void)ge::AttrUtils::GetStr(op_desc, ATTR_NAME_BATCH_LABEL, batch_label); GELOGI( - "[IMAS]Atomic ordinary workspace : Set %s name[%s] workspace[%lu] offset to [%zu] stream_id[%ld] " - "size[%ld] real_size[%ld] batch[%s].", - compute_graph_->GetName().c_str(), op_desc->GetName().c_str(), workspace_index, - mem_type_iter->second.mem_offset_, op_desc->GetStreamId(), workspace_size, workspace_size, + "[IMAS]Atomic ordinary workspace : Set %s name[%s] optype[%s] workspace[%lu] offset to [%zu] stream_id[%ld] " + "memtype[%ld] size[%ld] real_size[%ld] batch[%s].", + compute_graph_->GetName().c_str(), op_desc->GetName().c_str(), op_desc->GetType().c_str(), workspace_index, + mem_type_iter->second.mem_offset_, op_desc->GetStreamId(), RT_MEMORY_HBM, workspace_size, workspace_size, batch_label.c_str()); mem_type_iter->second.mem_offset_ += workspace_size; @@ -1350,10 +1048,10 @@ Status GraphMemoryAssigner::AssignFusionAtomicWorkspaceMemory(const ge::OpDescPt std::string batch_label; (void)ge::AttrUtils::GetStr(op_desc, ATTR_NAME_BATCH_LABEL, batch_label); GELOGI( - "[IMAS]Atomic fusion workspace : Set %s name[%s] workspace[%lu] offset to [%zu] stream_id[%ld] size[%ld] " - "real_size[%ld] batch[%s].", compute_graph_->GetName().c_str(), op_desc->GetName().c_str(), workspace_index, - mem_type_iter->second.mem_offset_, op_desc->GetStreamId(), workspace_size, workspace_size, - batch_label.c_str()); + "[IMAS]Atomic fusion workspace : Set %s name[%s] optype[%s] workspace[%lu] offset to [%zu] stream_id[%ld] " + "memtype[%ld] ssize[%ld] real_size[%ld] batch[%s].", compute_graph_->GetName().c_str(), + op_desc->GetName().c_str(), op_desc->GetType().c_str(), workspace_index, mem_type_iter->second.mem_offset_, + op_desc->GetStreamId(), RT_MEMORY_HBM, workspace_size, workspace_size, batch_label.c_str()); mem_type_iter->second.mem_offset_ += workspace_size; mem_offset_end.emplace_back(mem_type_iter->second.mem_offset_); @@ -1429,7 +1127,7 @@ ge::Status GraphMemoryAssigner::SetInputOffset() { return FAILED; } for (auto pair : memory_offset_) { - GEEVENT("[IMAS]AfterAssignMemory : %s memoffset[%zu], memory type[%ld]", compute_graph_->GetName().c_str(), + GEEVENT("[IMAS]AfterAssignMemory : %s memoffset[%zu], memtype[%ld]", compute_graph_->GetName().c_str(), pair.second.mem_offset_, pair.first); } @@ -1598,7 +1296,7 @@ ge::Status GraphMemoryAssigner::UpdateOpInputOffset(const NodePtr &node) const { } Status GraphMemoryAssigner::SetIndependentAtomicAttr(const ge::NodePtr &node, int64_t atomic_mem_start, - const vector &mem_offset_end) { + const vector &mem_offset_end, int64_t memory_type) { GELOGD("Start to set independent atomic attr, atomic_addr_clean memory offset start is %ld", atomic_mem_start); // Parsing offset and size vectors @@ -1627,7 +1325,7 @@ Status GraphMemoryAssigner::SetIndependentAtomicAttr(const ge::NodePtr &node, in GELOGD("Current node memory_offset vector size is %zu, node name %s, node type is %s.", memory_offset_size.size(), peer_out_node_desc->GetName().c_str(), peer_out_node_desc->GetType().c_str()); if (peer_out_node_desc->GetType() == ATOMICADDRCLEAN) { - if (SetAtomicCleanAttr(peer_out_node, memory_offset_start, memory_offset_size) != SUCCESS) { + if (SetAtomicCleanAttr(peer_out_node, memory_offset_start, memory_offset_size, memory_type) != SUCCESS) { GELOGE(FAILED, "Set atomic clean attr failed."); return FAILED; } @@ -1638,7 +1336,7 @@ Status GraphMemoryAssigner::SetIndependentAtomicAttr(const ge::NodePtr &node, in } ge::Status GraphMemoryAssigner::SetAtomicCleanAttr(const NodePtr &node, const vector &atomic_mem_start, - const vector &atomic_mem_size) { + const vector &atomic_mem_size, int64_t memory_type) { auto node_op_desc = node->GetOpDesc(); if (node_op_desc != nullptr) { GELOGD("Node %s, set atomic clean attr start.", node->GetName().c_str()); @@ -1677,9 +1375,10 @@ ge::Status GraphMemoryAssigner::SetAtomicCleanAttr(const NodePtr &node, const ve } string atomic_mem_size_str = ss.str(); - GELOGI("[IMAS]SetAtomicCleanAttr : Set %s atomic_node name[%s] output[0] offset to [%s] streamid[%ld] size[%s]", - node->GetOwnerComputeGraph()->GetName().c_str(), node_op_desc->GetName().c_str(), - atomic_mem_start_str.c_str(), node->GetOpDesc()->GetStreamId(), atomic_mem_size_str.c_str()); + GELOGI("[IMAS]SetAtomicCleanAttr : Set %s atomic_node name[%s] optype[%s] output[0] offset to [%s] streamid[%ld]" + " memtype[%ld] size[%s]",node->GetOwnerComputeGraph()->GetName().c_str(), node_op_desc->GetName().c_str(), + node->GetType().c_str(), atomic_mem_start_str.c_str(), node->GetOpDesc()->GetStreamId(), memory_type, + atomic_mem_size_str.c_str()); } return SUCCESS; } diff --git a/ge/graph/build/memory/graph_mem_assigner.h b/ge/graph/build/memory/graph_mem_assigner.h index def24287..a380e594 100755 --- a/ge/graph/build/memory/graph_mem_assigner.h +++ b/ge/graph/build/memory/graph_mem_assigner.h @@ -119,31 +119,15 @@ class GraphMemoryAssigner { /// ge::Status ReAssignContinuousMemory(bool is_loop_graph); - ge::Status ReAssignReuseAndNoPaddingContinuousInputMemory(); - - ge::Status ReAssignReuseAndNoPaddingContinuousOutputMemory(); - - ge::Status ReAssignVirtualInputNodeMemory(NodePtr node, size_t &mem_offset_reuse); - - ge::Status ReAssignVirtualOutputNodeMemory(NodePtr node, size_t &mem_offset_reuse); - - ge::Status ReAssignVirtualNodesMemory(map> &mem_reuse_nodes_map, int32_t mem_reuse_model); - - ge::Status GetMaxBatchLabel(const map> &mem_reuse_virtual_nodes_map, - int32_t mem_reuse_model, string &max_batch_label); - - ge::Status CalculateTensorRealSizeAndOutSize(const ge::ConstGeTensorDescPtr &output_desc, int64_t dim_index, - int64_t &output_mem_size, int64_t &batch_dim_num, int64_t &out_size); - ge::Status ReAssignAtomicMemory(bool is_loop_graph); ge::Status FilterAtomicNodesForMemoryAssign(map>> &normal_atomic_nodes_map, map> &connecting_output_atomic_nodes); ge::Status AssignContinuousInputMemory(const ge::NodePtr &node, int64_t &continuous_mem_start, - int64_t &continuous_mem_size, int64_t memory_type); + int64_t &continuous_mem_size, int64_t memory_type, uint32_t continuous_type); - ge::Status AssignContinuousOutputMemory(const ge::NodePtr &node); + ge::Status AssignContinuousOutputMemory(const ge::NodePtr &node, int64_t memory_type, uint32_t continuous_type); /// /// @brief check the input of node whether support atomic attr @@ -169,10 +153,10 @@ class GraphMemoryAssigner { ge::Status AssignConnectNetOutputAtomicMemory(vector &connect_netoutput_nodes); ge::Status SetIndependentAtomicAttr(const ge::NodePtr &node, int64_t atomic_mem_start, - const std::vector &mem_offset_end); + const std::vector &mem_offset_end, int64_t memory_type); ge::Status SetAtomicCleanAttr(const ge::NodePtr &node, const std::vector &atomic_mem_start, - const std::vector &atomic_mem_size); + const std::vector &atomic_mem_size, int64_t memory_type); ge::Status IsIndependentAtomicClean(const ge::NodePtr &node, bool &is_independent_atomic_clean_node); diff --git a/ge/graph/load/new_model_manager/davinci_model.cc b/ge/graph/load/new_model_manager/davinci_model.cc index 49abe17c..988f8d0e 100755 --- a/ge/graph/load/new_model_manager/davinci_model.cc +++ b/ge/graph/load/new_model_manager/davinci_model.cc @@ -1820,7 +1820,7 @@ void DavinciModel::GetUserDesignateShapeOrder(std::vector &user_inp /// Status DavinciModel::InitAippInfo(uint32_t index, const OpDescPtr &op_desc) { if (!op_desc->HasAttr(ATTR_NAME_AIPP)) { - GELOGW("there is not AIPP related with index %u.", index); + GELOGW("There is not AIPP related with index %u.", index); return SUCCESS; } @@ -1829,7 +1829,7 @@ Status DavinciModel::InitAippInfo(uint32_t index, const OpDescPtr &op_desc) { GE_CHK_BOOL_RET_STATUS(AttrUtils::GetNamedAttrs(op_desc, ATTR_NAME_AIPP, aipp_attr), GE_AIPP_NOT_EXIST, "Data node do not contain param aipp!"); GE_CHK_STATUS_RET(OpUtils::ConvertAippParams(aipp_attr, &aipp_params), "get aipp params failed"); - GELOGI("node data: %s, type: %s, current index: %u, current node related input rank: %u", + GELOGI("Node data: %s, type: %s, current index: %u, current node related input rank: %u", op_desc->GetName().c_str(), op_desc->GetType().c_str(), index, aipp_params.related_input_rank()); AippConfigInfo aipp_info; @@ -2492,7 +2492,7 @@ Status DavinciModel::CopyOutputData(uint32_t data_id, OutputData &output_data, r uint64_t buffer_length = buffer.length; void *buffer_addr = reinterpret_cast(reinterpret_cast(buffer.data)); - GELOGI("[IMAS]CopyPlainData memcpy graph_%u type[F] output[%u] memaddr[%p] mem_size[%lu] datasize[%lu]", + GELOGI("CopyPlainData memcpy graph_%u type[F] output[%u] memaddr[%p] mem_size[%lu] datasize[%lu]", runtime_param_.graph_id, output.first, output.second.GetBasicAddr(), data_size, buffer_length); GE_CHK_RT_RET(rtMemcpy(buffer_addr, buffer_length, output.second.GetBasicAddr(), data_size, kind)); idx++; diff --git a/metadef b/metadef index dc6cceb6..fcd0833c 160000 --- a/metadef +++ b/metadef @@ -1 +1 @@ -Subproject commit dc6cceb67bc82b567bcbd6f415776644253e1467 +Subproject commit fcd0833cffcd201701f71d17db0c696c1bb01715 diff --git a/parser b/parser index 4e72aae4..1601d66b 160000 --- a/parser +++ b/parser @@ -1 +1 @@ -Subproject commit 4e72aae41e78af1a19cd965da4a45cbd988b9a75 +Subproject commit 1601d66b6187c83cbf38e762beb5538ce2c7c573 From 00cc4279444a3d50d652eab94cc22ff30e7222b6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=8E=8B=E6=B6=9B?= Date: Mon, 18 Jan 2021 09:30:47 +0800 Subject: [PATCH 23/41] =?UTF-8?q?=E5=9B=9E=E9=80=80=20'Pull=20Request=20!9?= =?UTF-8?q?53=20:=20Continuous=20memory=20optimization,=20code=20refactori?= =?UTF-8?q?ng'?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../build/memory/binary_block_mem_assigner.cc | 4 +- ge/graph/build/memory/block_mem_assigner.cc | 264 ++---- ge/graph/build/memory/block_mem_assigner.h | 54 +- ge/graph/build/memory/graph_mem_assigner.cc | 855 ++++++++++++------ ge/graph/build/memory/graph_mem_assigner.h | 24 +- .../load/new_model_manager/davinci_model.cc | 6 +- metadef | 2 +- parser | 2 +- 8 files changed, 708 insertions(+), 503 deletions(-) diff --git a/ge/graph/build/memory/binary_block_mem_assigner.cc b/ge/graph/build/memory/binary_block_mem_assigner.cc index 97a0aed6..fff589f3 100644 --- a/ge/graph/build/memory/binary_block_mem_assigner.cc +++ b/ge/graph/build/memory/binary_block_mem_assigner.cc @@ -69,8 +69,8 @@ Status BinaryBlockMemAssigner::GetMemoryRanges(vector &range_ceils) { GELOGW("Vector all_memory_size is empty!"); return SUCCESS; } - if ((all_memory_size.front() <= 0) || (log(kLogBase) == 0)) { - GELOGE(FAILED, "Memory size:%ld is invalid.", all_memory_size.front()); + if ((all_memory_size.front() == 0) || (log(kLogBase) == 0)) { + GELOGE(FAILED, "dividend is 0!"); return FAILED; } // Memory size is 512 aligned, so it is not necessary to take less than 512 diff --git a/ge/graph/build/memory/block_mem_assigner.cc b/ge/graph/build/memory/block_mem_assigner.cc index 21d6a49e..76e7efbe 100755 --- a/ge/graph/build/memory/block_mem_assigner.cc +++ b/ge/graph/build/memory/block_mem_assigner.cc @@ -65,7 +65,10 @@ void AlignMemOffset(size_t &mem_align_size) { } static bool CompareLifeTime(const NodeTypeIndex &left, const NodeTypeIndex &right) { - if (left.GetLifeBegin() < right.GetLifeBegin()) { + auto left_node_op_desc = left.node->GetOpDesc(); + auto right_node_op_desc = right.node->GetOpDesc(); + if ((left_node_op_desc != nullptr) && (right_node_op_desc != nullptr) + && (left_node_op_desc->GetId() < right_node_op_desc->GetId())) { return true; } return false; @@ -97,14 +100,14 @@ bool CrossLifeTime(const NodeTypeIndex &left, const NodeTypeIndex &right) { auto left_node_op_desc = left.node->GetOpDesc(); auto right_node_op_desc = right.node->GetOpDesc(); if ((left_node_op_desc != nullptr) && (right_node_op_desc != nullptr)) { - if (left.GetLifeBegin() < right.GetLifeBegin()) { - if (left.life_time_end >= right.GetLifeBegin()) { + if (left_node_op_desc->GetId() < right_node_op_desc->GetId()) { + if (left.life_time_end >= static_cast(right_node_op_desc->GetId())) { return true; } - } else if (left.GetLifeBegin() == right.GetLifeBegin()) { + } else if (left_node_op_desc->GetId() == right_node_op_desc->GetId()) { return true; } else { - if (right.life_time_end >= left.GetLifeBegin()) { + if (right.life_time_end >= static_cast(left_node_op_desc->GetId())) { return true; } } @@ -322,7 +325,12 @@ void MemoryBlock::AddLifeReuseBlock(MemoryBlock *block, DependStreamLife &total_ size_t MemoryBlock::GetLifeBegin() { size_t life_time = 0; if (!node_type_index_list_.empty()) { - life_time = node_type_index_list_.front().GetLifeBegin(); + if (node_type_index_list_.front().node != nullptr) { + auto node_op_desc = node_type_index_list_.front().node->GetOpDesc(); + if (node_op_desc != nullptr) { + life_time = node_op_desc->GetId(); + } + } } return life_time; } @@ -409,7 +417,7 @@ void MemoryBlock::AddDependLifeBegin(DependStreamLife &total_node_depend_stream_ depend_stream_life_[stream_id_] = GetLifeBegin(); } -size_t MemoryBlock::GetLifeEnd() const { +size_t MemoryBlock::GetLifeEnd() { if (!node_type_index_list_.empty()) { return node_type_index_list_.back().life_time_end; } @@ -563,29 +571,32 @@ void BlockMemAssigner::GetOutAndWorkSpaceMem(vector &all_memory_size) { for (auto &out_anchor : n->GetAllOutDataAnchors()) { GeTensorDesc output_desc = node_op_desc->GetOutputDesc(out_anchor->GetIdx()); - int64_t size = 0; - GE_IF_BOOL_EXEC(ge::TensorUtils::GetSize(output_desc, size) != SUCCESS, GELOGI("Get size failed")); - GE_IF_BOOL_EXEC(size < 0, GELOGE(FAILED, "Node:%s size:%ld is invalid, maybe it is unknown shape node.", - node_op_desc->GetName().c_str(), size); - return;); - batch_all_memory_size[batch_label].emplace_back(size); - if (batch_total_size.find(batch_label) == batch_total_size.end()) { - batch_total_size[batch_label] = size; - } else { - batch_total_size[batch_label] += size; - } - - if (!anchor_to_symbol_.empty()) { - auto iter1 = anchor_to_symbol_.find(NodeIndexIO(n, out_anchor->GetIdx(), kOut).ToString()); - if (iter1 == anchor_to_symbol_.end()) { - continue; + bool reuse_input = false; + GE_IF_BOOL_EXEC(ge::TensorUtils::GetReuseInput(output_desc, reuse_input) != SUCCESS, + GELOGI("Get reuse_input failed")); + + if (!reuse_input) { + int64_t size = 0; + GE_IF_BOOL_EXEC(ge::TensorUtils::GetSize(output_desc, size) != SUCCESS, GELOGI("Get size failed")); + batch_all_memory_size[batch_label].emplace_back(size); + if (batch_total_size.find(batch_label) == batch_total_size.end()) { + batch_total_size[batch_label] = size; + } else { + batch_total_size[batch_label] += size; } - const std::string &symbol = iter1->second; - auto iter2 = symbol_size_.find(symbol); - if (iter2 == symbol_size_.end()) { - symbol_size_[symbol] = size; - } else if (size > static_cast(iter2->second)) { - iter2->second = size; + + if (!anchor_to_symbol_.empty()) { + auto iter1 = anchor_to_symbol_.find(NodeIndexIO(n, out_anchor->GetIdx(), kOut).ToString()); + if (iter1 == anchor_to_symbol_.end()) { + continue; + } + const std::string &symbol = iter1->second; + auto iter2 = symbol_size_.find(symbol); + if (iter2 == symbol_size_.end()) { + symbol_size_[symbol] = size; + } else if (size > static_cast(iter2->second)) { + iter2->second = size; + } } } } @@ -626,17 +637,35 @@ bool IsDirectOutputNode(const NodePtr &node, int idx) { return false; } -bool CanReuseBlock(size_t continuous_life_begin, const MemoryBlock &reusable_block, size_t block_size) { +void AddReusableBlockCount(const MemoryBlock &mem_block, map &reusable_block_counts) { + string key = std::to_string(mem_block.Size()); + key += "_" + std::to_string(mem_block.stream_id_); + key += "_" + std::to_string(mem_block.memory_type_); + auto it = reusable_block_counts.find(key); + if (it != reusable_block_counts.end()) { + it->second++; + } else { + reusable_block_counts[key] = 1; + } +} + +void ReduceReusableBlockCount(const MemoryBlock &mem_block, map &reusable_block_counts) { + string key = std::to_string(mem_block.Size()); + key += "_" + std::to_string(mem_block.stream_id_); + key += "_" + std::to_string(mem_block.memory_type_); + auto it = reusable_block_counts.find(key); + if (it != reusable_block_counts.end()) { + if (it->second > 0) { + it->second--; + } + } +} + +bool CanReuseBySize(const map &reusable_block_counts, const MemoryBlock &reusable_block, + size_t block_size, size_t real_size, bool continuous) { bool can_reuse = false; if (reusable_block.Size() == block_size) { - // in some continuous input case, continuous first input node's is not same as topo first node. - if (continuous_life_begin > 0) { - if (continuous_life_begin > reusable_block.GetLifeEnd()) { - can_reuse = true; - } - } else { - can_reuse = true; - } + can_reuse = true; } return can_reuse; } @@ -647,13 +676,6 @@ bool BlockMemAssigner::IsOutNodeSetContinuousInput(const NodePtr &n, uint32_t ou if (n == nullptr || n->GetAllOutDataAnchors().size() <= 0) { return false; } - auto node_desc = n->GetOpDesc(); - GE_IF_BOOL_EXEC(node_desc == nullptr, GELOGE(FAILED, "Node[%s] nodedesc is null.", n->GetName().c_str()); - return false;); - std::vector offsets_for_fusion = {}; - bool has_lx_fusion_attr = - AttrUtils::GetListInt(node_desc, ATTR_NAME_OUTPUT_OFFSET_FOR_BUFFER_FUSION, offsets_for_fusion); - if (static_cast(out_index) < n->GetAllOutDataAnchors().size()) { auto out_anchor = n->GetOutDataAnchor(out_index); GE_IF_BOOL_EXEC(out_anchor == nullptr, @@ -676,17 +698,16 @@ bool BlockMemAssigner::IsOutNodeSetContinuousInput(const NodePtr &n, uint32_t ou return false;); // If GetBool fail, is_input_continuous is false. - (void)ge::AttrUtils::GetBool(peer_in_node_desc, ATTR_NAME_NOPADDING_CONTINUOUS_INPUT, is_input_continuous); - if (is_input_continuous) { + bool is_input_continuous_no_padding = false; + (void)ge::AttrUtils::GetBool(peer_in_node_desc, ATTR_NAME_NOPADDING_CONTINUOUS_INPUT, + is_input_continuous_no_padding); + if (is_input_continuous_no_padding) { reset_zero_copy_flag = true; - has_lx_fusion_attr = true; - } else { - (void)ge::AttrUtils::GetBool(peer_in_node_desc, ATTR_NAME_CONTINUOUS_INPUT, is_input_continuous); + return false; } + (void)ge::AttrUtils::GetBool(peer_in_node_desc, ATTR_NAME_CONTINUOUS_INPUT, is_input_continuous); - // lx_fusion memory only assign first input, broadcast's input some are variable some are not, reassign later - GE_IF_BOOL_EXEC(is_input_continuous && - (CheckIsZeroMemNodeType(peer_node->GetType()) || (has_lx_fusion_attr && (peer_in_anchor->GetIdx() != 0))), + GE_IF_BOOL_EXEC(is_input_continuous && CheckIsZeroMemNodeType(peer_node->GetType()), GELOGI("Node[%s] output[%u] no_need_assign_memory.", n->GetName().c_str(), out_index); no_need_assign_memory = true; return false;); @@ -700,10 +721,6 @@ bool BlockMemAssigner::IsOutNodeSetContinuousInput(const NodePtr &n, uint32_t ou // Only set attr one times. if (node_continuous_input_blocks_[peer_in_node_desc->GetName()].size() == 0) { (void)ge::AttrUtils::SetBool(peer_in_node_desc, ATTR_NAME_CONTINUOUS_INPUT_ALLOC, true); - // lx fusion case assign max size for first block, so reuse as none continuous - GE_IF_BOOL_EXEC(has_lx_fusion_attr, - is_op_reuse_mem_ = IsContinuousMemoryReuse(n, peer_node, out_index); - return false;); node_continuous_input_counts_[peer_in_node_desc->GetName()] = peer_node->GetAllInDataAnchorsSize(); } peer_input_index = peer_in_anchor->GetIdx(); @@ -716,95 +733,6 @@ bool BlockMemAssigner::IsOutNodeSetContinuousInput(const NodePtr &n, uint32_t ou return false; } -bool IsContinuousInputNodeMaxLife(const NodePtr &n, uint32_t out_index) { - if (n == nullptr) { - return false; - } - - int64_t max_node_life_time = 0; - int64_t continuous_input_node_life_time = 0; - if (static_cast(out_index) < n->GetAllOutDataAnchors().size()) { - auto out_anchor = n->GetOutDataAnchor(out_index); - if(out_anchor == nullptr) { - return false; - } - - // continuous input node's life time should be max - for (auto const &peer_in_anchor : out_anchor->GetPeerInDataAnchors()) { - if ((peer_in_anchor == nullptr) || (peer_in_anchor->GetOwnerNode() == nullptr)){ - return false; - } - auto peer_in_node_desc = peer_in_anchor->GetOwnerNode()->GetOpDesc(); - GE_IF_BOOL_EXEC(peer_in_node_desc == nullptr, - GELOGE(FAILED, "Node[%s] output[%u] peer in node desc is null.", n->GetName().c_str(), out_index); - return false;); - - if(peer_in_node_desc->GetId() > max_node_life_time) { - max_node_life_time = peer_in_node_desc->GetId(); - } - - // If GetBool fail, is_input_continuous is false. - bool is_input_continuous = false; - (void)ge::AttrUtils::GetBool(peer_in_node_desc, ATTR_NAME_NOPADDING_CONTINUOUS_INPUT, is_input_continuous); - if (!is_input_continuous) { - (void)ge::AttrUtils::GetBool(peer_in_node_desc, ATTR_NAME_CONTINUOUS_INPUT, is_input_continuous); - } - if (is_input_continuous) { - continuous_input_node_life_time = peer_in_node_desc->GetId(); - } - } - } - return ((max_node_life_time != 0) && (continuous_input_node_life_time == max_node_life_time)) ; -} - -/// -/// @ingroup GE -/// @brief Check continuous memory reuseable -/// @return void -/// -bool BlockMemAssigner::IsContinuousMemoryReuse(const NodePtr &n, const NodePtr &peer_node, uint32_t out_index) { - // n,peer_node_desc have been checked - auto node_desc = n->GetOpDesc(); - auto peer_node_desc = peer_node->GetOpDesc(); - continuous_life_begin_ = static_cast(node_desc->GetId()); - // lx fusion case check all continuous input node, firt input node's life time should be min - for (const auto &in_anchor : peer_node->GetAllInDataAnchors()) { - if ((in_anchor == nullptr) || (in_anchor->GetPeerOutAnchor() == nullptr) || - (in_anchor->GetPeerOutAnchor()->GetOwnerNode() == nullptr) || - (in_anchor->GetPeerOutAnchor()->GetOwnerNode()->GetOpDesc() == nullptr)) { - GELOGE(FAILED, "Node[%s] output[%u] peer input node desc is null.", n->GetName().c_str(), out_index); - return false; - } - auto peer_out_node_desc = in_anchor->GetPeerOutAnchor()->GetOwnerNode()->GetOpDesc(); - /// - /// node2 node1 node3 - /// | / / | - /// node5 node6 - /// firt input node's life time is not min - /// when node5's first input node2's life time is not min(node2 > node1), use node1's life time to reuse - /// - if (static_cast(peer_out_node_desc->GetId()) < continuous_life_begin_) { - continuous_life_begin_ = static_cast(peer_out_node_desc->GetId()); - GELOGI( - "Node[%s] life[%ld] output[%u] is not continuous input node[%s] life[%ld]'s min life time," - "min is node[%s] life[%zu]", - n->GetName().c_str(), node_desc->GetId(), out_index, peer_node_desc->GetName().c_str(), - peer_node_desc->GetId(), peer_out_node_desc->GetName().c_str(), continuous_life_begin_); - } - // when node3's output node5's life time is not max(node6 > node5), not reuse - if (!IsContinuousInputNodeMaxLife(in_anchor->GetPeerOutAnchor()->GetOwnerNode(), - in_anchor->GetPeerOutAnchor()->GetIdx())) { - GELOGI( - "Node[%s] life[%ld] output[%u]'s continuous input node[%s] life[%ld]'s is not node[%s] output[%d]'s " - "max life node", - n->GetName().c_str(), node_desc->GetId(), out_index, peer_node_desc->GetName().c_str(), - peer_node_desc->GetId(), peer_out_node_desc->GetName().c_str(), in_anchor->GetPeerOutAnchor()->GetIdx()); - return false; - } - } - return true; -} - /// /// @ingroup GE /// @brief Check pre_reuse flag & post_reuse glag for each symbol @@ -1090,9 +1018,8 @@ MemoryBlock *BlockMemAssigner::ApplyMemory(size_t block_size, size_t real_size, GE_IF_BOOL_EXEC(reusable_block->batch_label_ != batch_label, continue); // A node can reuse blocks of the same stream and preorder streams - if (CanReuseBlock(continuous_life_begin_, *reusable_block, block_size)) { - reusable_block->AddNodeTypeIndex({n, mem_type, out_index, false, continuous_life_begin_}, - real_size, no_align_size); + if (CanReuseBySize(reusable_block_counts_, *reusable_block, block_size, real_size, continuous)) { + reusable_block->AddNodeTypeIndex({n, mem_type, out_index, false}, real_size, no_align_size); if (mem_type == kOutput) { auto iter = anchor_to_symbol_.find(NodeIndexIO(n, out_index, kOut).ToString()); if (iter != anchor_to_symbol_.end()) { @@ -1101,6 +1028,7 @@ MemoryBlock *BlockMemAssigner::ApplyMemory(size_t block_size, size_t real_size, } reusable_block->continuous_block_ = continuous; reusable_block->ref_count_++; + ReduceReusableBlockCount(*reusable_block, reusable_block_counts_); reusable_blocks_[memory_type][stream_id].erase((++it).base()); return reusable_block; } @@ -1113,7 +1041,8 @@ MemoryBlock *BlockMemAssigner::ApplyMemory(size_t block_size, size_t real_size, // Data and netoutput need zero copy block block->is_zero_copy_ = IsZeroCopyBlock(n, continuous); - block->AddNodeTypeIndex({n, mem_type, out_index, false, continuous_life_begin_}, real_size, no_align_size); + + block->Init(real_size, mem_type, n, out_index, no_align_size, node_op_desc->GetStreamId()); block->stream_id_ = node_op_desc->GetStreamId(); block->ref_count_++; block->continuous_block_ = continuous; @@ -1213,23 +1142,8 @@ MemoryBlock *BlockMemAssigner::ApplyOutMemory(const NodePtr &n, uint32_t index, std::string symbol; if (IsSymbolExist(node_index_io, symbol)) { block = symbol_blocks_[symbol]; - GE_IF_BOOL_EXEC(block == nullptr, GELOGE(FAILED, "Node %s ref block is nullptr.", node_op_desc->GetName().c_str()); - return nullptr); - // reduce old size - size_t align_size = block->Size(); - AlignMemOffset(align_size); - theory_memory_size_ -= align_size; - - auto block_size = GetBlockSize(size, ranges); - block->SetSize(block_size); - block->SetLifeTimeEnd(life_time_); - block->AddNodeTypeIndex({n, kOutput, index, true, continuous_life_begin_}, size, no_align_size); + block->AddNodeTypeIndex({n, kOutput, index, true}, size, no_align_size); block->ref_count_++; - - // add new size - align_size = block_size; - AlignMemOffset(align_size); - theory_memory_size_ += align_size; } else { int64_t max_size = size; int64_t memory_type = RT_MEMORY_HBM; @@ -1282,6 +1196,7 @@ MemoryBlock *BlockMemAssigner::ApplyOutMemory(const NodePtr &n, uint32_t index, GE_IF_BOOL_EXEC(ge::TensorUtils::GetReuseInputIndex(*owner_node_op_desc, dst_reuse_input_index) != SUCCESS, GELOGI("Get dst_reuse_input_index failed")); if (dst_reuse_input && (dst_reuse_input_index == static_cast(in_anchor->GetIdx()))) { + block->AddNodeTypeIndex({owner_node, kOutput, i, true}, block->Size(), block->Size()); out_count_reuse_input += 1; reuse_input = true; } @@ -1322,7 +1237,7 @@ bool IsAtomicOutputMemory(const ge::NodePtr &node, uint32_t output_index, bool i if (static_cast(index) == output_index) { if (node->GetOwnerComputeGraph() != nullptr) { string graph_name = node->GetOwnerComputeGraph()->GetName(); - GELOGD("Atomic no assign %s name[%s] output[%ld] streamid[%ld].", graph_name.c_str(), + GELOGD("[IMAS]Atomic no assign %s name[%s] output[%ld] streamid[%ld].", graph_name.c_str(), op_desc->GetName().c_str(), index, op_desc->GetStreamId()); } return true; @@ -1360,6 +1275,7 @@ void BlockMemAssigner::ReleaseMemory(MemoryBlock *to_release, vectorsame_stream_) { to_release->SetLifeTimeEnd(life_time_); reusable_memory.emplace_back(to_release); + AddReusableBlockCount(*to_release, reusable_block_counts_); } } } @@ -1459,7 +1375,6 @@ Status BlockMemAssigner::AssignOutputMemoryWithReuse(const NodePtr &node, vector } is_op_reuse_mem_ = true; - continuous_life_begin_ = 0; if (op_reuse_env_valid_ == true) { vector::iterator it_name = std::find(op_no_reuse_mem_vec_.begin(), op_no_reuse_mem_vec_.end(), op_desc->GetName()); @@ -1511,7 +1426,7 @@ Status BlockMemAssigner::AssignOutputMemoryWithReuse(const NodePtr &node, vector continue; } // atomic can't be reused - bool need_change = is_op_reuse_mem_ && is_atomic; + bool need_change = is_op_reuse_mem_ && out_node_set_continuous_input && is_atomic; if (need_change) { is_op_reuse_mem_ = false; } @@ -1904,12 +1819,11 @@ void SetOffsetSize(const NodeTypeIndex &node_type, const MemoryBlock *block, } op_desc->SetWorkspace(workspace_list); } - GELOGI("[IMAS]Set %s name[%s] optype[%s] %s[%u] offset to [%ld] streamid[%ld] memtype[%ld] size[%zu] realsize[%zu] " - "noalignsize[%zu] life time begin[%s] life time end[%zu] child[%d:%d:%d:%d:%d] isref[%d] batch[%s]", - graph_name.c_str(), op_desc->GetName().c_str(), node_type.node->GetType().c_str(), - node_type.GetMemType().c_str(), node_type.index, offset, op_desc->GetStreamId(),block->memory_type_, - block->Size(), real_size, no_align_size, node_type.GetLifeBeginDesc().c_str(), end, child_block_level, - block->reuse_mem_, block->continuous_block_, block->is_zero_copy_, block->same_stream_, node_type.ref_input, + GELOGI("[IMAS]Set %s name[%s] %s[%u] offset to [%ld] streamid[%ld] size[%zu] realsize[%zu] noalignsize[%zu] " + "life time begin[%zu] life time end[%zu] child[%d:%d:%d:%d:%d] isref[%d] batch[%s]", graph_name.c_str(), + op_desc->GetName().c_str(), node_type.GetMemType().c_str(), node_type.index, offset, op_desc->GetStreamId(), + block->Size(), real_size, no_align_size, op_desc->GetId(), end, child_block_level, block->reuse_mem_, + block->continuous_block_, block->is_zero_copy_, block->same_stream_, node_type.ref_input, block->batch_label_.c_str()); } diff --git a/ge/graph/build/memory/block_mem_assigner.h b/ge/graph/build/memory/block_mem_assigner.h index 78584078..58bcda75 100755 --- a/ge/graph/build/memory/block_mem_assigner.h +++ b/ge/graph/build/memory/block_mem_assigner.h @@ -39,15 +39,14 @@ using DependStreamLife = std::map>; enum OpMemoryType { kOutput, kWorkspace }; struct NodeTypeIndex { - NodeTypeIndex(ge::NodePtr node, OpMemoryType mem_type, uint32_t index, bool ref_input = false, size_t begin = 0) - : node(std::move(node)), mem_type(mem_type), index(index), ref_input(ref_input), life_time_begin(begin) {} + NodeTypeIndex(ge::NodePtr node, OpMemoryType mem_type, uint32_t index, bool ref_input = false) + : node(std::move(node)), mem_type(mem_type), index(index), ref_input(ref_input) {} ge::NodePtr node = nullptr; OpMemoryType mem_type = kOutput; uint32_t index = 0; - bool ref_input = false; - size_t life_time_begin = 0; size_t life_time_end = kMaxLifeTime; + bool ref_input = false; const string GetMemType() const { if (mem_type == kOutput) { return "output"; @@ -56,34 +55,6 @@ struct NodeTypeIndex { } return "unknown"; } - - size_t GetLifeBegin() const { - if ((node == nullptr) || (node->GetOpDesc() == nullptr)) { - return 0; - } - - if ((life_time_begin > 0) && (life_time_begin < static_cast(node->GetOpDesc()->GetId()))) { - return life_time_begin; - } else { - return node->GetOpDesc()->GetId(); - } - } - - std::string GetLifeBeginDesc() const { - if (node == nullptr) { - return ""; - } - auto node_op_desc = node->GetOpDesc(); - if (node_op_desc != nullptr) { - auto life_begin = GetLifeBegin(); - if (life_begin != static_cast(node_op_desc->GetId())) { - return std::to_string(life_begin) + "-" + std::to_string(node_op_desc->GetId()); - } else { - return std::to_string(node_op_desc->GetId()); - } - } - return ""; - } }; class MemoryBlock { @@ -115,13 +86,16 @@ class MemoryBlock { symbol_list_.clear(); } - size_t Size() const { return block_size_; } - - void SetSize(size_t size) { - if (size > block_size_) { - block_size_ = size; + void Init(size_t real_size, OpMemoryType type, const ge::NodePtr &node, uint32_t out_index, size_t no_align_size, + int64_t stream_id) { + real_size_list_.emplace_back(real_size); + no_align_size_list_.emplace_back(no_align_size); + node_type_index_list_.emplace_back(node, type, out_index, false); + if (stream_id != stream_id_) { + same_stream_ = false; } } + size_t Size() const { return block_size_; } size_t AlignSize() const; @@ -169,7 +143,7 @@ class MemoryBlock { size_t GetLifeBegin(); - size_t GetLifeEnd() const; + size_t GetLifeEnd(); void AddDependLifeBegin(DependStreamLife &node_depend_stream_life); @@ -432,7 +406,6 @@ class BlockMemAssigner : public MemAssigner { bool IsOutNodeSetContinuousInput(const NodePtr &n, uint32_t out_index, std::string &peer_name, uint32_t &peer_input_index, bool &no_need_assign_memory, bool &reset_zero_copy_flag); - bool IsContinuousMemoryReuse(const NodePtr &n, const NodePtr &peer_node, uint32_t out_index); /// /// @ingroup GE /// @|+++++++++block1++++++++| |+++++++++block1++++++++| @@ -452,6 +425,8 @@ class BlockMemAssigner : public MemAssigner { std::unordered_map>> reusable_blocks_; + std::map reusable_block_counts_; + std::unordered_map>> stream_workspace_blocks_; std::unordered_map> node_out_blocks_; @@ -481,7 +456,6 @@ class BlockMemAssigner : public MemAssigner { std::string max_batch_label_; - size_t continuous_life_begin_ = 0; /// /// @ [stream1][nodeid] /// @[nodeid] [stream2][nodeid] diff --git a/ge/graph/build/memory/graph_mem_assigner.cc b/ge/graph/build/memory/graph_mem_assigner.cc index f94eb275..98d073d4 100755 --- a/ge/graph/build/memory/graph_mem_assigner.cc +++ b/ge/graph/build/memory/graph_mem_assigner.cc @@ -35,9 +35,10 @@ namespace { const int kAllInputAddrIsAtomic = -1; const int kVirtualInputNodeMemoryReuse = 0; const int kVirtualOutputNodeMemoryReuse = 1; -// One state per bit cannot be repeated -enum ContinuousType { kTypeInput = 1, kTypeInputNoPadding = 2, kTypeOutput = 4, kTypeOutputNoPadding = 8 }; - +const size_t kVirtualInputNodeOutputSize = 1; +const size_t kVirtualOutputNodeInputSize = 1; +const size_t kVirtualNodeDataIndex = 0; +const char *const kMbatchNodeNameFlag = "_ascend_mbatch_batch_"; int64_t GetSymbolOutputOffset(const std::map &anchor_to_symbol, const std::map> &symbol_to_anchors, const ge::NodePtr &node, const uint32_t i) { @@ -135,7 +136,7 @@ ge::Status GraphMemoryAssigner::AssignVarAttr2Nodes() { return ge::SUCCESS; } -ge::Status CalculateTensorRealSizeAndOutSize(const ge::ConstGeTensorDescPtr &output_desc, +ge::Status GraphMemoryAssigner::CalculateTensorRealSizeAndOutSize(const ge::ConstGeTensorDescPtr &output_desc, int64_t dim_index, int64_t &output_mem_size, int64_t &batch_dim_num, int64_t &out_size) { graphStatus graph_status = ge::TensorUtils::GetSize(*output_desc, out_size); @@ -180,6 +181,68 @@ ge::Status CalculateTensorRealSizeAndOutSize(const ge::ConstGeTensorDescPtr &out return SUCCESS; } +Status GraphMemoryAssigner::GetMaxBatchLabel(const map> &mem_reuse_virtual_nodes_map, + int32_t mem_reuse_model, string &max_batch_label) { + for (auto &i_map : mem_reuse_virtual_nodes_map) { + vector virtual_nodes_list = i_map.second; + vector max_shape_dims; + size_t max_batch_dim = 0; + bool max_batch_dim_find = false; + for (size_t i = 0; i < virtual_nodes_list.size(); ++i) { + GE_CHECK_NOTNULL(virtual_nodes_list[i]); + OpDescPtr op_desc = virtual_nodes_list[i]->GetOpDesc(); + GE_CHECK_NOTNULL(op_desc); + + ge::ConstGeTensorDescPtr input_output_desc; + if (mem_reuse_model == kVirtualInputNodeMemoryReuse) { + input_output_desc = op_desc->GetOutputDescPtr(kVirtualNodeDataIndex); + } else if (mem_reuse_model == kVirtualOutputNodeMemoryReuse) { + input_output_desc = op_desc->GetInputDescPtr(kVirtualNodeDataIndex); + } else { + std::string error = "Invalid parameter memory reuse model, which is " + FmtToStr(mem_reuse_model); + GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); + return FAILED; + } + GE_CHECK_NOTNULL(input_output_desc); + + if (i == 0) { + // All ops must have ATTR_NAME_BATCH_LABEL, no need to check return value. + (void) ge::AttrUtils::GetStr(op_desc, ATTR_NAME_BATCH_LABEL, max_batch_label); + max_shape_dims = input_output_desc->GetShape().GetDims(); + } else { + vector current_shape_dims = input_output_desc->GetShape().GetDims(); + if (current_shape_dims.size() != max_shape_dims.size()) { + std::string error = "The shape of several nodes between multiple batches does not match."; + GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); + return FAILED; + } + for (size_t j = 0; j < current_shape_dims.size(); ++j) { + if (current_shape_dims[j] == max_shape_dims[j]) { + continue; + } + if (max_batch_dim_find && max_batch_dim != j) { + std::string error = "The shape of several nodes between multiple batches does not match."; + GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); + return FAILED; + } + max_batch_dim_find = true; + max_batch_dim = j; + if (current_shape_dims[j] > max_shape_dims[j]) { + max_shape_dims[j] = current_shape_dims[j]; + // All ops must have ATTR_NAME_BATCH_LABEL, no need to check return value. + (void) ge::AttrUtils::GetStr(op_desc, ATTR_NAME_BATCH_LABEL, max_batch_label); + } + // Only compare the first different dim in shape. + break; + } + } + } + // In every element of virtual_input_nodes_map, the label of the max batch node is the same. + break; + } + return SUCCESS; +} + Status GraphMemoryAssigner::ReAssignMemory(bool is_loop_graph, map &mem_type_to_offset) { if (memory_offset_.empty()) { GELOGE(FAILED, "memory_offset_ is empty."); @@ -187,6 +250,13 @@ Status GraphMemoryAssigner::ReAssignMemory(bool is_loop_graph, mapGetGraphMemoryMaxSize())}); - GEEVENT("[IMAS]AfterAssignMemory : %s memoffset[%zu], memtype[%ld]", compute_graph_->GetName().c_str(), - iter.second, iter.first); } return ge::FAILED; } @@ -245,137 +313,22 @@ Status GraphMemoryAssigner::AssignZeroCopyMemory(map &mem_offse return SUCCESS; } -uint32_t GetContinuousMemoryType(const OpDescPtr &op_desc) { - if (op_desc == nullptr) { - return 0; - }; - - bool is_continuous = false; - uint32_t continuous_type = 0; - // If GetBool fail, is_continuous is false. - (void)ge::AttrUtils::GetBool(op_desc, ATTR_NAME_CONTINUOUS_INPUT, is_continuous); - if (is_continuous) { - continuous_type |= kTypeInput; - } else { - (void)ge::AttrUtils::GetBool(op_desc, ATTR_NAME_NOPADDING_CONTINUOUS_INPUT, is_continuous); - if (is_continuous) { - bool attr_reuse = false; - (void)ge::AttrUtils::GetBool(op_desc, ATTR_NAME_OUTPUT_REUSE_INPUT, attr_reuse); - if (attr_reuse) { - continuous_type |= kTypeInputNoPadding; - } - } - } - - is_continuous = false; - (void)ge::AttrUtils::GetBool(op_desc, ATTR_NAME_CONTINUOUS_OUTPUT, is_continuous); - if (is_continuous) { - continuous_type |= kTypeOutput; - } else { - (void)ge::AttrUtils::GetBool(op_desc, ATTR_NAME_NOPADDING_CONTINUOUS_OUTPUT, is_continuous); - if (is_continuous) { - bool attr_reuse = false; - (void)ge::AttrUtils::GetBool(op_desc, ATTR_NAME_OUTPUT_REUSE_INPUT, attr_reuse); - if (attr_reuse) { - continuous_type |= kTypeOutputNoPadding; - } - } - } - - if (continuous_type != 0) { - GELOGI("Current node %s continuous type %d.", op_desc->GetName().c_str(), continuous_type); - } - return continuous_type; -} - -Status GetMemorySize(const OpDescPtr &op_desc, const ge::ConstGeTensorDescPtr &output_desc, uint32_t continuous_type, - int64_t &tensor_size, int64_t &nopadding_size) { - if ((op_desc == nullptr) || (output_desc == nullptr)) { - GELOGE(FAILED, "Input para is nullptr."); - return FAILED; - } - tensor_size = 0; - nopadding_size = 0; - bool is_nopadding = ((continuous_type & kTypeInputNoPadding) != 0) || ((continuous_type & kTypeOutputNoPadding) != 0); - if (is_nopadding) { - int64_t attr_dim_index; - bool get_attr_dim_flag = ge::AttrUtils::GetInt(op_desc, ATTR_NAME_REUSE_INPUT_ON_DIM_INDEX, attr_dim_index); - if (!get_attr_dim_flag) { - GELOGE(FAILED, "Get attr _reuse_input_on_dim_index failed."); - return FAILED; - } - - // Calculate tensor real size of each piece of data and out size of complete data - int64_t batch_dim_num = 1; - if (CalculateTensorRealSizeAndOutSize(output_desc, attr_dim_index, nopadding_size, batch_dim_num, tensor_size) != - SUCCESS) { - GELOGE(FAILED, "CalculateTensorRealSizeAndOutSize failed for node %s.", op_desc->GetName().c_str()); - return FAILED; - } - } else { - if (ge::TensorUtils::GetSize(*output_desc, tensor_size) != ge::SUCCESS) { - GELOGE(FAILED, "GetSize failed."); - return FAILED; - } - } - if ((tensor_size < 0) || (nopadding_size < 0)) { - GELOGE(FAILED, "GetMemorySize for node %s failed.", op_desc->GetName().c_str()); - return FAILED; - } - return SUCCESS; -} - -void AlignMemOffset(int64_t &mem_align_size) { - if (mem_align_size <= 0) { - return; - } - mem_align_size = (mem_align_size + MEM_ALIGN_SIZE - 1) / MEM_ALIGN_SIZE * MEM_ALIGN_SIZE; -} - -bool IsContinuousInputConflict(const ge::NodePtr &node, const OpDescPtr &peer_op_desc) { - bool is_peer_output_continuous = false; - // If GetBool fail, is_peer_output_continuous is false. - (void) ge::AttrUtils::GetBool(peer_op_desc, ATTR_NAME_CONTINUOUS_OUTPUT, is_peer_output_continuous); - - // Get peer node output size, if size == 1(peer node has only one output), continuous input of the node and - // continuous output of the previous node is the same, we can support it. If size != 1, there may be - // conflict between the two, we can not support it. - auto peer_output_size = peer_op_desc->GetOutputsSize(); - GE_IF_BOOL_EXEC(is_peer_output_continuous && (peer_output_size != 1), - std::string error = "Current op" + FmtToStr(node->GetOpDesc()->GetName()) + - " requires continuous input, while the previous op" + FmtToStr(peer_op_desc->GetName()) + - " requires continuous output. There may be conflict between the two." + - "This node is not supported now."; - GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); - return true;); - - bool is_peer_reference = false; - // If GetBool fail, is_peer_reference is false. - (void) AttrUtils::GetBool(peer_op_desc, ATTR_NAME_REFERENCE, is_peer_reference); - GE_IF_BOOL_EXEC(is_peer_reference, - std::string error = "Current op" + FmtToStr(node->GetOpDesc()->GetName()) + - " requires continuous input, while the previous op" + FmtToStr(peer_op_desc->GetName()) + - " requires continuous output. There may be conflict between the two." + - "This node is not supported now."; - GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); - return true;); - return false; -} - Status GraphMemoryAssigner::ReAssignContinuousMemory(bool is_loop_graph) { Status ret; for (auto &node : compute_graph_->GetAllNodes()) { - GE_CHECK_NOTNULL(node); - auto continuous_type = GetContinuousMemoryType(node->GetOpDesc()); + // Get the continuous input type of the node, default is false + bool is_input_continuous = false; + GE_CHECK_NOTNULL(node->GetOpDesc()); + // If GetBool fail, is_input_continuous is false. + (void) ge::AttrUtils::GetBool(node->GetOpDesc(), ATTR_NAME_CONTINUOUS_INPUT, is_input_continuous); // Assign continuous input memory - bool continuous_input = ((continuous_type & kTypeInput) != 0) || ((continuous_type & kTypeInputNoPadding) != 0); - int64_t memory_type = RT_MEMORY_HBM; - GE_CHK_STATUS_RET(GetNodeMemoryType(node, memory_type, "input"), "Get node memory type failed."); - if (continuous_input) { + if (is_input_continuous) { + int64_t memory_type = RT_MEMORY_HBM; + GE_CHK_STATUS_RET(GetNodeMemoryType(node, memory_type, "input"), "Get node memory type failed."); int64_t mem_clean_start = 0; int64_t mem_clean_size = 0; - ret = AssignContinuousInputMemory(node, mem_clean_start, mem_clean_size, memory_type, continuous_type); + ret = AssignContinuousInputMemory(node, mem_clean_start, mem_clean_size, memory_type); if (ret != ge::SUCCESS) { GELOGE(ret, "Assign continuous input memory failed!"); return ret; @@ -385,6 +338,7 @@ Status GraphMemoryAssigner::ReAssignContinuousMemory(bool is_loop_graph) { vector input_indexes; // If GetListInt fail, input_indexes is empty. (void) ge::AttrUtils::GetListInt(node->GetOpDesc(), ATOMIC_ATTR_INPUT_INDEX, input_indexes); + if (!input_indexes.empty() && input_indexes[0] == kAllInputAddrIsAtomic) { // check whether there is an atomic conflict between the current node and the peer out node if (!CheckInputIsSupportAtomic(node)) { @@ -396,10 +350,9 @@ Status GraphMemoryAssigner::ReAssignContinuousMemory(bool is_loop_graph) { const auto &in_control_anchor = node->GetInControlAnchor(); GE_CHECK_NOTNULL(in_control_anchor); for (const auto &peer_out_control_anchor : in_control_anchor->GetPeerOutControlAnchors()) { - GE_CHECK_NOTNULL(peer_out_control_anchor); auto peer_out_node = peer_out_control_anchor->GetOwnerNode(); if (peer_out_node->GetType() == ATOMICADDRCLEAN) { - ret = SetAtomicCleanAttr(peer_out_node, {mem_clean_start}, {mem_clean_size}, memory_type); + ret = SetAtomicCleanAttr(peer_out_node, {mem_clean_start}, {mem_clean_size}); if (ret != SUCCESS) { GELOGE(ret, "Failed to set attr for atomic addr clean node %s.", peer_out_node->GetName().c_str()); return ret; @@ -409,12 +362,23 @@ Status GraphMemoryAssigner::ReAssignContinuousMemory(bool is_loop_graph) { } } - // Assign continuous output memory - bool continuous_output = ((continuous_type & kTypeOutput) != 0) || ((continuous_type & kTypeOutputNoPadding) != 0); - if (continuous_output) { - ret = AssignContinuousOutputMemory(node, memory_type, continuous_type); + // Get the reference type of the node, default is false + bool is_ref = false; + // If GetBool fail, is_ref is false. + (void) ge::AttrUtils::GetBool(node->GetOpDesc(), ATTR_NAME_REFERENCE, is_ref); + + // Get the continuous output type of the node, default is false + bool is_output_continuous = false; + // If GetBool fail, is_output_continuous is false. + (void) ge::AttrUtils::GetBool(node->GetOpDesc(), ATTR_NAME_CONTINUOUS_OUTPUT, is_output_continuous); + + // If the output is ref type and refers to the ref of an input, the name of the output + // and the input are the same. Ge encounters ref type, finds matching relationship according + // to the names of input and output, and allocates the same memory address, eg: HCOMBroadcast + if (!is_ref && is_output_continuous) { // Assign continuous output memory + ret = AssignContinuousOutputMemory(node); if (ret != ge::SUCCESS) { - GELOGE(ret, "Assign continuous output memory failed!"); + GELOGE(ret, "Assign reference memory failed!"); return ret; } } @@ -427,181 +391,520 @@ Status GraphMemoryAssigner::ReAssignContinuousMemory(bool is_loop_graph) { } Status GraphMemoryAssigner::AssignContinuousInputMemory(const ge::NodePtr &node, int64_t &continuous_mem_start, - int64_t &continuous_mem_size, int64_t memory_type, uint32_t continuous_type) { + int64_t &continuous_mem_size, int64_t memory_type) { GELOGI("Current node %s needs continuous input.", node->GetName().c_str()); + bool continuous_input_alloc = false; + (void) ge::AttrUtils::GetBool(node->GetOpDesc(), ATTR_NAME_CONTINUOUS_INPUT_ALLOC, continuous_input_alloc); auto iter = memory_offset_.find(memory_type); if (iter == memory_offset_.end()) { std::string error = "Memory offset does not have memory type" + FmtToStr(memory_type); GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); return FAILED; } - // The head and tail of hcom continuous input should be added 512 - iter->second.mem_offset_ += MEM_ALIGN_SIZE; continuous_mem_start = iter->second.mem_offset_; - int64_t mem_offset = iter->second.mem_offset_; - int64_t extra_memory_size = 0; - bool is_continuous_input_allocated = false; - (void) ge::AttrUtils::GetBool(node->GetOpDesc(), ATTR_NAME_CONTINUOUS_INPUT_ALLOC, is_continuous_input_allocated); for (auto &in_data_anchor : node->GetAllInDataAnchors()) { - GE_IF_BOOL_EXEC(in_data_anchor == nullptr, continue); auto peer_out_data_anchor = in_data_anchor->GetPeerOutAnchor(); GE_IF_BOOL_EXEC(peer_out_data_anchor == nullptr, continue); + auto peer_op_desc = peer_out_data_anchor->GetOwnerNode()->GetOpDesc(); GE_IF_BOOL_EXEC(peer_op_desc == nullptr, continue); - GE_IF_BOOL_EXEC(IsContinuousInputConflict(node, peer_op_desc), return PARAM_INVALID;); + bool is_peer_output_continuous = false; + // If GetBool fail, is_peer_output_continuous is false. + (void) ge::AttrUtils::GetBool(peer_op_desc, ATTR_NAME_CONTINUOUS_OUTPUT, is_peer_output_continuous); + + // Get peer node output size, if size == 1(peer node has only one output), continuous input of the node and + // continuous output of the previous node is the same, we can support it. If size != 1, there may be + // conflict between the two, we can not support it. + auto peer_output_size = peer_op_desc->GetOutputsSize(); + GE_IF_BOOL_EXEC(is_peer_output_continuous && (peer_output_size != 1), + std::string error = "Current op" + FmtToStr(node->GetOpDesc()->GetName()) + + " requires continuous input, while the previous op" + FmtToStr(peer_op_desc->GetName()) + + " requires continuous output. There may be conflict between the two." + + "This node is not supported now."; + GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); + return PARAM_INVALID;); + + bool is_peer_reference = false; + // If GetBool fail, is_peer_reference is false. + (void) AttrUtils::GetBool(peer_op_desc, ATTR_NAME_REFERENCE, is_peer_reference); + GE_IF_BOOL_EXEC(is_peer_reference, + std::string error = "Current op" + FmtToStr(node->GetOpDesc()->GetName()) + + " requires continuous input, while the previous op" + FmtToStr(peer_op_desc->GetName()) + + " requires continuous output. There may be conflict between the two." + + "This node is not supported now."; + GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); + return PARAM_INVALID;); + + vector output_list = peer_op_desc->GetOutputOffset(); + std::vector offsets_for_fusion = {}; + bool has_offset_attr = + AttrUtils::GetListInt(peer_op_desc, ATTR_NAME_OUTPUT_OFFSET_FOR_BUFFER_FUSION, offsets_for_fusion); + if (peer_out_data_anchor->GetIdx() < static_cast(output_list.size())) { + if (continuous_input_alloc && !has_offset_attr) { + if (in_data_anchor->GetIdx() == 0) { + continuous_mem_start = output_list.at(peer_out_data_anchor->GetIdx()); + } + // can not use else if, incase only one input + if (in_data_anchor->GetIdx() == static_cast(node->GetAllInDataAnchors().size()) - 1) { + int64_t tensor_desc_size = 0; + Status ret = ge::TensorUtils::GetSize(*(peer_op_desc->GetOutputDescPtr(peer_out_data_anchor->GetIdx())), + tensor_desc_size); + GE_IF_BOOL_EXEC(ret != ge::SUCCESS, GELOGE(FAILED, "GetSize failed."); return FAILED;); + + tensor_desc_size = (tensor_desc_size + MEM_ALIGN_SIZE - 1) / MEM_ALIGN_SIZE * MEM_ALIGN_SIZE; + continuous_mem_size = + output_list.at(peer_out_data_anchor->GetIdx()) - continuous_mem_start + tensor_desc_size + MEM_ALIGN_SIZE; + } + GELOGI( + "[IMAS]Check Continuous input : Set %s name[%s] output[%d] offset to [%ld] stream_id[%ld] size[%u] " + "real_size[%u].", + node->GetOwnerComputeGraph()->GetName().c_str(), peer_op_desc->GetName().c_str(), + peer_out_data_anchor->GetIdx(), output_list.at(peer_out_data_anchor->GetIdx()), peer_op_desc->GetStreamId(), + 0, 0); + continue; + } + + output_list.at(peer_out_data_anchor->GetIdx()) = iter->second.mem_offset_; + } else { + std::string error = "index" + FmtToStr(peer_out_data_anchor->GetIdx()) + " is out of range."; + GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); + GELOGE(FAILED, "index : %d is out of range.", peer_out_data_anchor->GetIdx()); + return FAILED; + } + peer_op_desc->SetOutputOffset(output_list); + size_t pre_mem_offset = iter->second.mem_offset_; int64_t tensor_desc_size = 0; - int64_t nopadding_size = 0; - int64_t real_size = 0; - std::vector offsets_of_fusion = {}; - bool lx_fusion = AttrUtils::GetListInt(peer_op_desc, ATTR_NAME_OUTPUT_OFFSET_FOR_BUFFER_FUSION, offsets_of_fusion); - lx_fusion = lx_fusion && !offsets_of_fusion.empty(); - if (lx_fusion) { - if (peer_out_data_anchor->GetIdx() >= static_cast(offsets_of_fusion.size())) { + if (has_offset_attr) { + if (peer_out_data_anchor->GetIdx() < static_cast(offsets_for_fusion.size())) { + auto offset_for_fusion = offsets_for_fusion[peer_out_data_anchor->GetIdx()]; + iter->second.mem_offset_ += offset_for_fusion; + } else { std::string error = "fusion: peer node" + FmtToStr(peer_op_desc->GetName()) + " index" + FmtToStr(peer_out_data_anchor->GetIdx()) + " is out of range."; GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); return FAILED; } - nopadding_size = offsets_of_fusion[peer_out_data_anchor->GetIdx()]; - tensor_desc_size = nopadding_size; } else { - if (GetMemorySize(node->GetOpDesc(), peer_op_desc->GetOutputDescPtr(peer_out_data_anchor->GetIdx()), - continuous_type, tensor_desc_size, nopadding_size) != ge::SUCCESS) { - return FAILED; - } + Status ret = + TensorUtils::GetSize(*(peer_op_desc->GetOutputDescPtr(peer_out_data_anchor->GetIdx())), tensor_desc_size); + GE_IF_BOOL_EXEC(ret != ge::SUCCESS, GELOGE(FAILED, "GetSize failed."); return FAILED;); + + iter->second.mem_offset_ += tensor_desc_size; } - bool is_nopadding = ((continuous_type & kTypeInputNoPadding) != 0) || lx_fusion; - vector output_list = peer_op_desc->GetOutputOffset(); - if (peer_out_data_anchor->GetIdx() >= static_cast(output_list.size())) { - std::string error = "index" + FmtToStr(peer_out_data_anchor->GetIdx()) + " is out of range."; - GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); + // If set tensor_actual_size, Memory alignment is not required. + int32_t is_tensor_actual_size = 0; + ge::AttrUtils::GetInt(peer_op_desc, ATTR_NAME_GET_TENSOR_ACTUAL_SIZE, is_tensor_actual_size); + if (is_tensor_actual_size == 0) { + AlignMemOffset(MEM_ALIGN_SIZE, memory_type); + } + GELOGI( + "[IMAS]Continuous input : Set %s name[%s] output[%d] offset to [%zu] stream_id[%ld] size[%zu] " + "real_size[%ld].", node->GetOwnerComputeGraph()->GetName().c_str(), peer_op_desc->GetName().c_str(), + peer_out_data_anchor->GetIdx(), pre_mem_offset, peer_op_desc->GetStreamId(), + (iter->second.mem_offset_ - pre_mem_offset), tensor_desc_size); + } + + iter->second.mem_offset_ += MEM_ALIGN_SIZE; + if (!continuous_input_alloc) { + continuous_mem_size = iter->second.mem_offset_ - continuous_mem_start; + } + return SUCCESS; +} + +Status GraphMemoryAssigner::AssignContinuousOutputMemory(const ge::NodePtr &node) { + GELOGI("Current node %s needs continuous output.", node->GetName().c_str()); + auto out_op_desc = node->GetOpDesc(); + GE_IF_BOOL_EXEC(out_op_desc == nullptr, GELOGE(ge::FAILED, "out_op_desc is null."); return ge::FAILED); + vector output_list = out_op_desc->GetOutputOffset(); + + if ((out_op_desc->GetOutputsSize() > output_list.size()) || (output_list.size() == 0)) { + GELOGE(ge::FAILED, "The size %zu of node output desc is more than output_list's size %zu.", + out_op_desc->GetOutputsSize(), output_list.size()); + return ge::FAILED; + } + + size_t mem_offset = output_list[0]; + for (auto &out_data_anchor : node->GetAllOutDataAnchors()) { + output_list[out_data_anchor->GetIdx()] = mem_offset; + int64_t tensor_desc_size = 0; + if (ge::TensorUtils::GetSize(*(out_op_desc->GetOutputDescPtr(out_data_anchor->GetIdx())), tensor_desc_size) != + ge::SUCCESS) { + GELOGE(FAILED, "GetSize failed."); + return FAILED; + } + mem_offset += tensor_desc_size; + if (mem_offset <= 0) { return FAILED; } + mem_offset = (mem_offset + MEM_ALIGN_SIZE - 1) / MEM_ALIGN_SIZE * MEM_ALIGN_SIZE; + GELOGI( + "[IMAS]Continuous output : Set %s name[%s] output[%d] offset to [%zu] stream_id[%ld] size[%ld] " + "real_size[%ld].", + node->GetOwnerComputeGraph()->GetName().c_str(), out_op_desc->GetName().c_str(), out_data_anchor->GetIdx(), + output_list[out_data_anchor->GetIdx()], out_op_desc->GetStreamId(), tensor_desc_size, tensor_desc_size); + } + out_op_desc->SetOutputOffset(output_list); + return ge::SUCCESS; +} - // when continuous input has been allocated first input is beginning offset - bool is_allocated_first_input = is_continuous_input_allocated && (in_data_anchor->GetIdx() == 0); - if (is_allocated_first_input) { - mem_offset = output_list.at(peer_out_data_anchor->GetIdx()); - continuous_mem_start = output_list.at(peer_out_data_anchor->GetIdx()); - } else { - // set offset for input - output_list.at(peer_out_data_anchor->GetIdx()) = mem_offset; - peer_op_desc->SetOutputOffset(output_list); +Status GraphMemoryAssigner::ReAssignVirtualInputNodeMemory(NodePtr node, size_t &mem_offset_reuse) { + OpDescPtr op_desc = node->GetOpDesc(); + vector output_list = op_desc->GetOutputOffset(); + if (output_list.empty()) { + GELOGE(FAILED, "Outputoffset is empty node name:%s", node->GetName().c_str()); + return FAILED; + } + output_list.at(0) = mem_offset_reuse; + op_desc->SetOutputOffset(output_list); + GELOGI("Set virtual input node %s output offset to %zu.", op_desc->GetName().c_str(), mem_offset_reuse); + + int64_t attr_dim_index; + bool get_attr_dim_flag = ge::AttrUtils::GetInt(op_desc, ATTR_NAME_REUSE_INPUT_ON_DIM_INDEX, attr_dim_index); + if (!get_attr_dim_flag) { + GELOGE(FAILED, "Get attr _reuse_input_on_dim_index failed."); + return FAILED; + } + + size_t extra_memory_size = 0; + for (const auto &in_data_anchor : node->GetAllInDataAnchors()) { + auto peer_out_data_anchor = in_data_anchor->GetPeerOutAnchor(); + GE_CHECK_NOTNULL(peer_out_data_anchor); + auto peer_op_desc = peer_out_data_anchor->GetOwnerNode()->GetOpDesc(); + GE_CHECK_NOTNULL(peer_op_desc); + vector output_offsets = peer_op_desc->GetOutputOffset(); + if (peer_out_data_anchor->GetIdx() >= static_cast(output_offsets.size())) { + GELOGE(ge::FAILED, "Index : %d is out of range.", peer_out_data_anchor->GetIdx()); + return ge::FAILED; } + output_offsets.at(peer_out_data_anchor->GetIdx()) = mem_offset_reuse; + peer_op_desc->SetOutputOffset(output_offsets); + size_t pre_mem_offset = mem_offset_reuse; - int64_t align_size = tensor_desc_size; - if (is_nopadding) { - mem_offset += nopadding_size; - extra_memory_size += (tensor_desc_size - nopadding_size); - real_size = nopadding_size; - } else { - ge::AlignMemOffset(align_size); - mem_offset += align_size; - // The head and tail of hcom continuous input should be added 512 - extra_memory_size = MEM_ALIGN_SIZE; - real_size = tensor_desc_size; + // Calculate tensor real size of each piece of data and out size of complete data + ge::ConstGeTensorDescPtr output_desc = peer_op_desc->GetOutputDescPtr(peer_out_data_anchor->GetIdx()); + GE_CHECK_NOTNULL(output_desc); + int64_t output_mem_size; + int64_t batch_dim_num = 1; + int64_t out_size; + if (CalculateTensorRealSizeAndOutSize(output_desc, attr_dim_index, output_mem_size, batch_dim_num, out_size) != + SUCCESS) { + GELOGE(FAILED, "CalculateTensorRealSizeAndOutSize failed for node %s output [%d].", + peer_op_desc->GetName().c_str(), peer_out_data_anchor->GetIdx()); + return FAILED; } - GELOGI("[IMAS]Continuous input : Set %s name[%s] optype[%s] output[%d] offset to [%zu] stream_id[%ld] memtype[%ld] " - "size[%zu] realsize[%ld] nopadding[%d].", node->GetOwnerComputeGraph()->GetName().c_str(), - node->GetType().c_str(), peer_op_desc->GetName().c_str(),peer_out_data_anchor->GetIdx(), - output_list.at(peer_out_data_anchor->GetIdx()), peer_op_desc->GetStreamId(), memory_type, - is_continuous_input_allocated ? 0UL : align_size, real_size, is_nopadding); - } + mem_offset_reuse += output_mem_size; + extra_memory_size = extra_memory_size + out_size - output_mem_size; - mem_offset += extra_memory_size; - ge::AlignMemOffset(mem_offset); - continuous_mem_size = mem_offset - continuous_mem_start; - if (is_continuous_input_allocated) { - // not allocate memory here, so no need add 512 in header - iter->second.mem_offset_ -= MEM_ALIGN_SIZE; - } else { - iter->second.mem_offset_ = mem_offset; + GELOGI("[IMAS]Virtual node optimize: set %s name[%s] output[%d] offset to [%zu] stream_id[%ld] size[%ld] " + "real_size[%ld].", + node->GetOwnerComputeGraph()->GetName().c_str(), peer_op_desc->GetName().c_str(), + peer_out_data_anchor->GetIdx(), pre_mem_offset, peer_op_desc->GetStreamId(), out_size, + output_mem_size); } + mem_offset_reuse += extra_memory_size; + size_t after_mem_offset = mem_offset_reuse; + GELOGI("After reassign virtual input node[name: %s, type: %s] memory, memory offset = %zu.", + op_desc->GetName().c_str(), op_desc->GetType().c_str(), after_mem_offset); return SUCCESS; } -Status GetFirstInputPeerOutOutputOffset(const ge::NodePtr &node, int64_t &mem_offset) { - auto in_data_anchor_list = node->GetAllInDataAnchors(); - if (in_data_anchor_list.empty()) { - GELOGE(FAILED, "Node %s's in data anchor is empty.", node->GetName().c_str()); +Status GraphMemoryAssigner::ReAssignReuseAndNoPaddingContinuousInputMemory() { + map> mem_reuse_virtual_input_nodes_map; + int64_t memory_type = RT_MEMORY_HBM; + for (const auto &n : compute_graph_->GetAllNodes()) { + OpDescPtr op_desc = n->GetOpDesc(); + GE_CHECK_NOTNULL(op_desc); + bool attr_continuous = false; + bool get_continuous_flag = ge::AttrUtils::GetBool(op_desc, ATTR_NAME_NOPADDING_CONTINUOUS_INPUT, attr_continuous); + GE_IF_BOOL_EXEC(!get_continuous_flag, continue); + bool attr_reuse = false; + bool get_reuse_flag = ge::AttrUtils::GetBool(op_desc, ATTR_NAME_OUTPUT_REUSE_INPUT, attr_reuse); + GE_IF_BOOL_EXEC(!get_reuse_flag, continue); + if (attr_reuse && attr_continuous) { + if (op_desc->GetOutputsSize() != kVirtualInputNodeOutputSize) { + // When current virtual node has several outputs, can't directly determine which input is the tensor for reuse. + std::string error = "Only one output is supported, current virtual node" + FmtToStr(n->GetName()) + + " has " + FmtToStr(op_desc->GetOutputsSize()) + " outputs."; + GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); + return FAILED; + } + GE_CHK_STATUS_RET(GetNodeMemoryType(n, memory_type, "input"), "Get node memory type failed."); + auto iter = memory_offset_.find(memory_type); + if (iter == memory_offset_.end()) { + std::string error = "Memory offset does not have memory type" + FmtToStr(memory_type); + GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); + return FAILED; + } + GELOGD("Start to reassign memory for virtual input node, memory offset = %zu, memory type = %ld.", + iter->second.mem_offset_, memory_type); + string batch_label_string; + // Not all ops have ATTR_NAME_BATCH_LABEL, no need to check return value, only check out parameter + (void) ge::AttrUtils::GetStr(op_desc, ATTR_NAME_BATCH_LABEL, batch_label_string); + if (batch_label_string.empty()) { + size_t node_mem_offset = iter->second.mem_offset_; + // No ATTR_NAME_BATCH_LABEL, no need to reuse memory. + Status status = ReAssignVirtualInputNodeMemory(n, node_mem_offset); + if (status != SUCCESS) { + GELOGE(FAILED, "Reassign memory of virtual input node failed, node name: %s.", n->GetName().c_str()); + return FAILED; + } + + iter->second.mem_offset_ = node_mem_offset; + AlignMemOffset(MEM_ALIGN_SIZE, memory_type); + GELOGD("After reassign memory for virtual input node, align memory = %zu, memory type = %ld.", + iter->second.mem_offset_, memory_type); + } else { + // Has ATTR_NAME_BATCH_LABEL, for dynamic multi-batch node, need to reuse memory. + string current_node_full_name = op_desc->GetName(); + size_t pos = current_node_full_name.find(kMbatchNodeNameFlag); + if (pos == string::npos) { + GELOGE(FAILED, "Cannot find key string [%s] of multi-batch in name of virtual input node, node name: %s.", + kMbatchNodeNameFlag, n->GetName().c_str()); + return FAILED; + } + string fixed_name = current_node_full_name.substr(0, pos); + vector parallel_virtual_input_nodes; + if (mem_reuse_virtual_input_nodes_map.count(fixed_name) != 0) { + parallel_virtual_input_nodes = mem_reuse_virtual_input_nodes_map[fixed_name]; + } + parallel_virtual_input_nodes.emplace_back(n); + mem_reuse_virtual_input_nodes_map[fixed_name] = parallel_virtual_input_nodes; + } + } + } + + int32_t mem_reuse_model = 0; + if (ReAssignVirtualNodesMemory(mem_reuse_virtual_input_nodes_map, mem_reuse_model) != SUCCESS) { + GELOGE(FAILED, "Reassign memory of virtual input nodes failed."); return FAILED; } + return SUCCESS; +} + +Status GraphMemoryAssigner::ReAssignVirtualOutputNodeMemory(NodePtr node, size_t &mem_offset_reuse) { + OpDescPtr op_desc = node->GetOpDesc(); + + // 1. set memory of to be reused input tensor + auto in_data_anchor_list = node->GetAllInDataAnchors(); auto peer_out_data_anchor = in_data_anchor_list.at(0)->GetPeerOutAnchor(); - GE_IF_BOOL_EXEC(peer_out_data_anchor == nullptr, GELOGE(ge::FAILED, "peer_out_data_anchor is null."); - return ge::FAILED); + GE_CHECK_NOTNULL(peer_out_data_anchor); auto peer_op_desc = peer_out_data_anchor->GetOwnerNode()->GetOpDesc(); - GE_IF_BOOL_EXEC(peer_op_desc == nullptr, GELOGE(ge::FAILED, "peer_op_desc is null."); return ge::FAILED); + GE_CHECK_NOTNULL(peer_op_desc); vector in_node_output_offsets = peer_op_desc->GetOutputOffset(); if (peer_out_data_anchor->GetIdx() >= static_cast(in_node_output_offsets.size())) { GELOGE(FAILED, "Index : %d is out of range.", peer_out_data_anchor->GetIdx()); return FAILED; } - mem_offset = in_node_output_offsets.at(peer_out_data_anchor->GetIdx()); - return SUCCESS; -} + in_node_output_offsets.at(peer_out_data_anchor->GetIdx()) = mem_offset_reuse; + peer_op_desc->SetOutputOffset(in_node_output_offsets); + GELOGI("Set virtual output node %s input data offset to %zu.", op_desc->GetName().c_str(), mem_offset_reuse); -Status GraphMemoryAssigner::AssignContinuousOutputMemory(const ge::NodePtr &node, int64_t memory_type, - uint32_t continuous_type) { - GELOGI("Current node %s needs continuous output.", node->GetName().c_str()); - auto out_op_desc = node->GetOpDesc(); - GE_IF_BOOL_EXEC(out_op_desc == nullptr, GELOGE(ge::FAILED, "out_op_desc is null."); return ge::FAILED); - vector output_list = out_op_desc->GetOutputOffset(); - if ((out_op_desc->GetOutputsSize() > output_list.size()) || (output_list.size() == 0)) { - GELOGE(ge::FAILED, "The size %zu of node output desc is more than output_list's size %zu.", - out_op_desc->GetOutputsSize(), output_list.size()); - return ge::FAILED; + // 2. set memory of output tensor + vector output_list = op_desc->GetOutputOffset(); + if (output_list.empty()) { + GELOGE(FAILED, "Outputoffset is empty, node name: %s", node->GetName().c_str()); + return FAILED; + } + if (op_desc->GetOutputsSize() > output_list.size()) { + GELOGE(FAILED, "The size %zu of op_desc is more than output_list's size %zu.", op_desc->GetOutputsSize(), + output_list.size()); + return FAILED; + } + int64_t attr_dim_index; + bool get_attr_dim_flag = ge::AttrUtils::GetInt(op_desc, ATTR_NAME_REUSE_INPUT_ON_DIM_INDEX, attr_dim_index); + if (!get_attr_dim_flag) { + GELOGE(FAILED, "Get attr _reuse_input_on_dim_index failed."); + return FAILED; } - int64_t mem_offset = 0; - bool is_nopadding = ((continuous_type & kTypeOutputNoPadding) != 0); - if (is_nopadding) { - // out tensor memory must be reused input tensor memory - if (GetFirstInputPeerOutOutputOffset(node, mem_offset) != SUCCESS) { - return ge::FAILED; + size_t extra_memory_size = 0; + for (auto &out_data_anchor : node->GetAllOutDataAnchors()) { + output_list[out_data_anchor->GetIdx()] = mem_offset_reuse; + size_t pre_mem_offset = mem_offset_reuse; + + // calculate tensor real size of each piece of data and out size of complete data + ge::ConstGeTensorDescPtr output_desc = op_desc->GetOutputDescPtr(out_data_anchor->GetIdx()); + GE_CHECK_NOTNULL(output_desc); + int64_t output_mem_size; + int64_t batch_dim_num = 1; + int64_t out_size; + if (CalculateTensorRealSizeAndOutSize(output_desc, attr_dim_index, output_mem_size, batch_dim_num, out_size) != + SUCCESS) { + GELOGE(FAILED, "CalculateTensorRealSizeAndOutSize failed for node %s output [%d].", + op_desc->GetName().c_str(), out_data_anchor->GetIdx()); + return FAILED; } - } else { - // Get the reference type of the node, default is false - bool is_ref = false; - // If GetBool fail, is_ref is false. - (void) ge::AttrUtils::GetBool(node->GetOpDesc(), ATTR_NAME_REFERENCE, is_ref); - // If the output is ref type and refers to the ref of an input, the name of the output - // and the input are the same. Ge encounters ref type, finds matching relationship according - // to the names of input and output, and allocates the same memory address, eg: HCOMBroadcast - if (is_ref) { - GELOGI("Current node %s no needs assign continuous output because reference input by name.", - node->GetName().c_str()); - return SUCCESS; + mem_offset_reuse += output_mem_size; + extra_memory_size = extra_memory_size + out_size - output_mem_size; + + GELOGI("[IMAS]Virtual node optimize: set %s name[%s] output[%d] offset to [%zu], size[%ld], real_size[%ld].", + node->GetOwnerComputeGraph()->GetName().c_str(), op_desc->GetName().c_str(), out_data_anchor->GetIdx(), + pre_mem_offset, out_size, output_mem_size); + } + op_desc->SetOutputOffset(output_list); + mem_offset_reuse += extra_memory_size; + size_t after_mem_offset = mem_offset_reuse; + GELOGI("After reassign virtual output node[name: %s, type: %s] memory, memory offset = %zu.", + op_desc->GetName().c_str(), op_desc->GetType().c_str(), after_mem_offset); + return SUCCESS; +} + +Status GraphMemoryAssigner::ReAssignReuseAndNoPaddingContinuousOutputMemory() { + map> mem_reuse_virtual_output_nodes_map; + int64_t memory_type = RT_MEMORY_HBM; + for (const auto &n : compute_graph_->GetAllNodes()) { + OpDescPtr op_desc = n->GetOpDesc(); + GE_CHECK_NOTNULL(op_desc); + bool attr_continuous = false; + bool get_continuous_flag = ge::AttrUtils::GetBool(op_desc, ATTR_NAME_NOPADDING_CONTINUOUS_OUTPUT, attr_continuous); + GE_IF_BOOL_EXEC(!get_continuous_flag, continue); + bool attr_reuse = false; + bool get_reuse_flag = ge::AttrUtils::GetBool(op_desc, ATTR_NAME_OUTPUT_REUSE_INPUT, attr_reuse); + GE_IF_BOOL_EXEC(!get_reuse_flag, continue); + + if (attr_reuse && attr_continuous) { + auto in_data_anchor_list = n->GetAllInDataAnchors(); + if (in_data_anchor_list.size() != kVirtualOutputNodeInputSize) { + // When current virtual node has several inputs, can't directly determine which input is the tensor for reuse. + std::string error = "Only one input is supported, current virtual node" + FmtToStr(n->GetName()) + + " has " + FmtToStr(in_data_anchor_list.size()) + " inputs."; + GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); + return FAILED; + } + GE_CHK_STATUS_RET(GetNodeMemoryType(n, memory_type, "output"), "Get node memory type failed."); + auto iter = memory_offset_.find(memory_type); + if (iter == memory_offset_.end()) { + std::string error = "Memory offset does not have memory type" + FmtToStr(RT_MEMORY_HBM); + GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); + return FAILED; + } + GELOGD("Start to reassign memory for virtual output node, memory offset = %zu, memory type = %ld.", + iter->second.mem_offset_, memory_type); + string batch_label_string; + // Not all ops have ATTR_NAME_BATCH_LABEL, no need to check return value, only check out parameter + (void) ge::AttrUtils::GetStr(op_desc, ATTR_NAME_BATCH_LABEL, batch_label_string); + if (batch_label_string.empty()) { + size_t node_mem_offset = iter->second.mem_offset_; + // No ATTR_NAME_BATCH_LABEL, no need to reuse memory. + Status status = ReAssignVirtualOutputNodeMemory(n, node_mem_offset); + if (status != SUCCESS) { + GELOGE(FAILED, "Reassign memory of virtual output node failed, node name: %s.", n->GetName().c_str()); + return FAILED; + } + iter->second.mem_offset_ = node_mem_offset; + AlignMemOffset(MEM_ALIGN_SIZE, memory_type); + GELOGD("After reassign memory for virtual output node, align memory = %zu, memory type = %ld.", + iter->second.mem_offset_, memory_type); + } else { + // Has ATTR_NAME_BATCH_LABEL, for dynamic multi-batch node, need to reuse memory. + string current_node_full_name = op_desc->GetName(); + size_t pos = current_node_full_name.find(kMbatchNodeNameFlag); + if (pos == string::npos) { + std::string error = "Cannot find key string" + FmtToStr(kMbatchNodeNameFlag) + + " of multi-batch in name of virtual output node, the node name is " + FmtToStr(n->GetName()); + GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); + return FAILED; + } + string fixed_name = current_node_full_name.substr(0, pos); + vector parallel_virtual_output_nodes; + if (mem_reuse_virtual_output_nodes_map.count(fixed_name) != 0) { + parallel_virtual_output_nodes = mem_reuse_virtual_output_nodes_map[fixed_name]; + } + parallel_virtual_output_nodes.emplace_back(n); + mem_reuse_virtual_output_nodes_map[fixed_name] = parallel_virtual_output_nodes; + } } - mem_offset = output_list[0]; } - for (auto &out_data_anchor : node->GetAllOutDataAnchors()) { - output_list[out_data_anchor->GetIdx()] = mem_offset; - int64_t tensor_desc_size = 0; - int64_t nopadding_size = 0; - if (GetMemorySize(out_op_desc, out_op_desc->GetOutputDescPtr(out_data_anchor->GetIdx()), continuous_type, - tensor_desc_size, nopadding_size) != ge::SUCCESS) { + int32_t mem_reuse_model = 1; + if (ReAssignVirtualNodesMemory(mem_reuse_virtual_output_nodes_map, mem_reuse_model) != SUCCESS) { + GELOGE(FAILED, "Reassign memory of virtual output nodes failed."); + return FAILED; + } + return SUCCESS; +} + +Status GraphMemoryAssigner::ReAssignVirtualNodesMemory(map> &mem_reuse_nodes_map, + int32_t mem_reuse_model) { + // Find max batch label value + string max_batch_label; + GE_CHK_STATUS_RET(GetMaxBatchLabel(mem_reuse_nodes_map, mem_reuse_model, max_batch_label), + "Get max batch label failed."); + PrintMemoryOffset(); + vector nodes_mem_offset_list; + for (auto &i_map : mem_reuse_nodes_map) { + vector virtual_nodes_list = i_map.second; + int64_t memory_type = RT_MEMORY_HBM; + GE_CHK_STATUS_RET(GetNodeListMemoryType(virtual_nodes_list, mem_reuse_model, memory_type), + "Get node list memory type failed."); + auto iter = memory_offset_.find(memory_type); + if (iter == memory_offset_.end()) { + std::string error = "Memory offset does not have memory type" + FmtToStr(RT_MEMORY_HBM); + GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); return FAILED; } + size_t max_batch_node_mem_offset = iter->second.mem_offset_; + nodes_mem_offset_list.emplace_back(max_batch_node_mem_offset); + for (auto &i_node : virtual_nodes_list) { + // Op_desc is not nullptr, it has been checked. + OpDescPtr op_desc = i_node->GetOpDesc(); + string batch_label_string; + // All ops must have ATTR_NAME_BATCH_LABEL, no need to check return value. + (void) ge::AttrUtils::GetStr(op_desc, ATTR_NAME_BATCH_LABEL, batch_label_string); + if (batch_label_string == max_batch_label) { + Status status = SUCCESS; + if (mem_reuse_model == kVirtualInputNodeMemoryReuse) { + status = ReAssignVirtualInputNodeMemory(i_node, max_batch_node_mem_offset); + } else if (mem_reuse_model == kVirtualOutputNodeMemoryReuse) { + status = ReAssignVirtualOutputNodeMemory(i_node, max_batch_node_mem_offset); + } else { + std::string error = "Invalid parameter memory reuse model, which is " + FmtToStr(mem_reuse_model); + GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); + return FAILED; + } - if (is_nopadding) { - mem_offset += nopadding_size; - } else { - mem_offset += tensor_desc_size; - ge::AlignMemOffset(mem_offset); + if (status != SUCCESS) { + GELOGE(FAILED, "Reassign memory of virtual node failed, node name: %s.", i_node->GetName().c_str()); + return FAILED; + } + iter->second.mem_offset_ = max_batch_node_mem_offset; + AlignMemOffset(MEM_ALIGN_SIZE, memory_type); + GELOGD("After reassign memory for virtual node, align memory = %zu, memory type = %ld.", + iter->second.mem_offset_, memory_type); + // Only assign memory of max batch nodes. + break; + } } - GELOGI("[IMAS]Continuous output : Set %s name[%s] optype[%s] output[%d] offset to [%zu] stream_id[%ld] memtype[%ld]" - " size[%zu] realsize[%ld] nopadding[%d].", node->GetOwnerComputeGraph()->GetName().c_str(), - node->GetType().c_str(), out_op_desc->GetName().c_str(), out_data_anchor->GetIdx(), - output_list[out_data_anchor->GetIdx()], out_op_desc->GetStreamId(), memory_type, 0UL, - is_nopadding ? nopadding_size : tensor_desc_size, is_nopadding); } - out_op_desc->SetOutputOffset(output_list); - return ge::SUCCESS; + PrintMemoryOffset(); + size_t memory_reuse_index = 0; + for (auto &i_map : mem_reuse_nodes_map) { + vector virtual_nodes_list = i_map.second; + for (auto &i_node : virtual_nodes_list) { + size_t remaining_batch_node_mem_offset = nodes_mem_offset_list[memory_reuse_index]; + Status status = SUCCESS; + if (mem_reuse_model == kVirtualInputNodeMemoryReuse) { + status = ReAssignVirtualInputNodeMemory(i_node, remaining_batch_node_mem_offset); + } else if (mem_reuse_model == kVirtualOutputNodeMemoryReuse) { + status = ReAssignVirtualOutputNodeMemory(i_node, remaining_batch_node_mem_offset); + } else { + std::string error = "Invalid parameter memory reuse model, which is " + FmtToStr(mem_reuse_model); + GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); + return FAILED; + } + + if (status != SUCCESS) { + GELOGE(FAILED, "Reassign memory of virtual node failed, node name: %s.", i_node->GetName().c_str()); + return FAILED; + } + } + memory_reuse_index++; + } + return SUCCESS; } Status GraphMemoryAssigner::ReAssignAtomicMemory(bool is_loop_graph) { @@ -643,7 +946,7 @@ Status GraphMemoryAssigner::ReAssignAtomicMemory(bool is_loop_graph) { GE_CHECK_NOTNULL(mem_assigner_); GE_CHECK_NOTNULL(mem_assigner_->GetPriorityAssinger()); if ((atomic_mem_size != 0) && (iter_batch.first == mem_assigner_->GetPriorityAssinger()->GetMaxBatchLabel())) { - GE_CHK_STATUS_RET(SetAtomicCleanAttr(iter.first, {atomic_mem_start}, {atomic_mem_size}, RT_MEMORY_HBM), + GE_CHK_STATUS_RET(SetAtomicCleanAttr(iter.first, {atomic_mem_start}, {atomic_mem_size}), "Failed to set attr for atomic addr clean node %s.", iter.first->GetName().c_str()); } } @@ -781,7 +1084,7 @@ Status GraphMemoryAssigner::AssignConnectNetOutputAtomicMemory(vector & } // All atomic nodes use atomic_addr_clean op independently, so we need to set the attr separately. - if (SetIndependentAtomicAttr(node, original_atomic_mem_start, mem_offset_end, RT_MEMORY_HBM) != SUCCESS) { + if (SetIndependentAtomicAttr(node, original_atomic_mem_start, mem_offset_end) != SUCCESS) { GELOGE(FAILED, "Failed to set atomic attr separately."); return FAILED; } @@ -928,10 +1231,9 @@ Status GraphMemoryAssigner::AssignAtomicOutputMemory(const ge::NodePtr &node, ve output_list[output_index] = iter->second.mem_offset_; std::string batch_label; (void)ge::AttrUtils::GetStr(op_desc, ATTR_NAME_BATCH_LABEL, batch_label); - GELOGI("[IMAS]Atomic output : Set %s name[%s] optype[%s] output[%ld] offset to [%zu] stream_id[%ld] memtype[%ld] " - "size[%ld] real_size[%ld] batch[%s].", compute_graph_->GetName().c_str(), op_desc->GetName().c_str(), - node->GetType().c_str(), output_index, iter->second.mem_offset_, op_desc->GetStreamId(), RT_MEMORY_HBM, - size, size, batch_label.c_str()); + GELOGI("[IMAS]Atomic output : Set %s name[%s] output[%ld] offset to [%zu] stream_id[%ld] size[%ld] real_size[%ld]" + " batch[%s].", compute_graph_->GetName().c_str(), op_desc->GetName().c_str(), output_index, + iter->second.mem_offset_, op_desc->GetStreamId(), size, size, batch_label.c_str()); iter->second.mem_offset_ += size; AlignMemOffset(MEM_ALIGN_SIZE, RT_MEMORY_HBM); @@ -1007,10 +1309,10 @@ Status GraphMemoryAssigner::AssignOrdinaryAtomicWorkspaceMemory(const ge::OpDesc std::string batch_label; (void)ge::AttrUtils::GetStr(op_desc, ATTR_NAME_BATCH_LABEL, batch_label); GELOGI( - "[IMAS]Atomic ordinary workspace : Set %s name[%s] optype[%s] workspace[%lu] offset to [%zu] stream_id[%ld] " - "memtype[%ld] size[%ld] real_size[%ld] batch[%s].", - compute_graph_->GetName().c_str(), op_desc->GetName().c_str(), op_desc->GetType().c_str(), workspace_index, - mem_type_iter->second.mem_offset_, op_desc->GetStreamId(), RT_MEMORY_HBM, workspace_size, workspace_size, + "[IMAS]Atomic ordinary workspace : Set %s name[%s] workspace[%lu] offset to [%zu] stream_id[%ld] " + "size[%ld] real_size[%ld] batch[%s].", + compute_graph_->GetName().c_str(), op_desc->GetName().c_str(), workspace_index, + mem_type_iter->second.mem_offset_, op_desc->GetStreamId(), workspace_size, workspace_size, batch_label.c_str()); mem_type_iter->second.mem_offset_ += workspace_size; @@ -1048,10 +1350,10 @@ Status GraphMemoryAssigner::AssignFusionAtomicWorkspaceMemory(const ge::OpDescPt std::string batch_label; (void)ge::AttrUtils::GetStr(op_desc, ATTR_NAME_BATCH_LABEL, batch_label); GELOGI( - "[IMAS]Atomic fusion workspace : Set %s name[%s] optype[%s] workspace[%lu] offset to [%zu] stream_id[%ld] " - "memtype[%ld] ssize[%ld] real_size[%ld] batch[%s].", compute_graph_->GetName().c_str(), - op_desc->GetName().c_str(), op_desc->GetType().c_str(), workspace_index, mem_type_iter->second.mem_offset_, - op_desc->GetStreamId(), RT_MEMORY_HBM, workspace_size, workspace_size, batch_label.c_str()); + "[IMAS]Atomic fusion workspace : Set %s name[%s] workspace[%lu] offset to [%zu] stream_id[%ld] size[%ld] " + "real_size[%ld] batch[%s].", compute_graph_->GetName().c_str(), op_desc->GetName().c_str(), workspace_index, + mem_type_iter->second.mem_offset_, op_desc->GetStreamId(), workspace_size, workspace_size, + batch_label.c_str()); mem_type_iter->second.mem_offset_ += workspace_size; mem_offset_end.emplace_back(mem_type_iter->second.mem_offset_); @@ -1127,7 +1429,7 @@ ge::Status GraphMemoryAssigner::SetInputOffset() { return FAILED; } for (auto pair : memory_offset_) { - GEEVENT("[IMAS]AfterAssignMemory : %s memoffset[%zu], memtype[%ld]", compute_graph_->GetName().c_str(), + GEEVENT("[IMAS]AfterAssignMemory : %s memoffset[%zu], memory type[%ld]", compute_graph_->GetName().c_str(), pair.second.mem_offset_, pair.first); } @@ -1296,7 +1598,7 @@ ge::Status GraphMemoryAssigner::UpdateOpInputOffset(const NodePtr &node) const { } Status GraphMemoryAssigner::SetIndependentAtomicAttr(const ge::NodePtr &node, int64_t atomic_mem_start, - const vector &mem_offset_end, int64_t memory_type) { + const vector &mem_offset_end) { GELOGD("Start to set independent atomic attr, atomic_addr_clean memory offset start is %ld", atomic_mem_start); // Parsing offset and size vectors @@ -1325,7 +1627,7 @@ Status GraphMemoryAssigner::SetIndependentAtomicAttr(const ge::NodePtr &node, in GELOGD("Current node memory_offset vector size is %zu, node name %s, node type is %s.", memory_offset_size.size(), peer_out_node_desc->GetName().c_str(), peer_out_node_desc->GetType().c_str()); if (peer_out_node_desc->GetType() == ATOMICADDRCLEAN) { - if (SetAtomicCleanAttr(peer_out_node, memory_offset_start, memory_offset_size, memory_type) != SUCCESS) { + if (SetAtomicCleanAttr(peer_out_node, memory_offset_start, memory_offset_size) != SUCCESS) { GELOGE(FAILED, "Set atomic clean attr failed."); return FAILED; } @@ -1336,7 +1638,7 @@ Status GraphMemoryAssigner::SetIndependentAtomicAttr(const ge::NodePtr &node, in } ge::Status GraphMemoryAssigner::SetAtomicCleanAttr(const NodePtr &node, const vector &atomic_mem_start, - const vector &atomic_mem_size, int64_t memory_type) { + const vector &atomic_mem_size) { auto node_op_desc = node->GetOpDesc(); if (node_op_desc != nullptr) { GELOGD("Node %s, set atomic clean attr start.", node->GetName().c_str()); @@ -1375,10 +1677,9 @@ ge::Status GraphMemoryAssigner::SetAtomicCleanAttr(const NodePtr &node, const ve } string atomic_mem_size_str = ss.str(); - GELOGI("[IMAS]SetAtomicCleanAttr : Set %s atomic_node name[%s] optype[%s] output[0] offset to [%s] streamid[%ld]" - " memtype[%ld] size[%s]",node->GetOwnerComputeGraph()->GetName().c_str(), node_op_desc->GetName().c_str(), - node->GetType().c_str(), atomic_mem_start_str.c_str(), node->GetOpDesc()->GetStreamId(), memory_type, - atomic_mem_size_str.c_str()); + GELOGI("[IMAS]SetAtomicCleanAttr : Set %s atomic_node name[%s] output[0] offset to [%s] streamid[%ld] size[%s]", + node->GetOwnerComputeGraph()->GetName().c_str(), node_op_desc->GetName().c_str(), + atomic_mem_start_str.c_str(), node->GetOpDesc()->GetStreamId(), atomic_mem_size_str.c_str()); } return SUCCESS; } diff --git a/ge/graph/build/memory/graph_mem_assigner.h b/ge/graph/build/memory/graph_mem_assigner.h index a380e594..def24287 100755 --- a/ge/graph/build/memory/graph_mem_assigner.h +++ b/ge/graph/build/memory/graph_mem_assigner.h @@ -119,15 +119,31 @@ class GraphMemoryAssigner { /// ge::Status ReAssignContinuousMemory(bool is_loop_graph); + ge::Status ReAssignReuseAndNoPaddingContinuousInputMemory(); + + ge::Status ReAssignReuseAndNoPaddingContinuousOutputMemory(); + + ge::Status ReAssignVirtualInputNodeMemory(NodePtr node, size_t &mem_offset_reuse); + + ge::Status ReAssignVirtualOutputNodeMemory(NodePtr node, size_t &mem_offset_reuse); + + ge::Status ReAssignVirtualNodesMemory(map> &mem_reuse_nodes_map, int32_t mem_reuse_model); + + ge::Status GetMaxBatchLabel(const map> &mem_reuse_virtual_nodes_map, + int32_t mem_reuse_model, string &max_batch_label); + + ge::Status CalculateTensorRealSizeAndOutSize(const ge::ConstGeTensorDescPtr &output_desc, int64_t dim_index, + int64_t &output_mem_size, int64_t &batch_dim_num, int64_t &out_size); + ge::Status ReAssignAtomicMemory(bool is_loop_graph); ge::Status FilterAtomicNodesForMemoryAssign(map>> &normal_atomic_nodes_map, map> &connecting_output_atomic_nodes); ge::Status AssignContinuousInputMemory(const ge::NodePtr &node, int64_t &continuous_mem_start, - int64_t &continuous_mem_size, int64_t memory_type, uint32_t continuous_type); + int64_t &continuous_mem_size, int64_t memory_type); - ge::Status AssignContinuousOutputMemory(const ge::NodePtr &node, int64_t memory_type, uint32_t continuous_type); + ge::Status AssignContinuousOutputMemory(const ge::NodePtr &node); /// /// @brief check the input of node whether support atomic attr @@ -153,10 +169,10 @@ class GraphMemoryAssigner { ge::Status AssignConnectNetOutputAtomicMemory(vector &connect_netoutput_nodes); ge::Status SetIndependentAtomicAttr(const ge::NodePtr &node, int64_t atomic_mem_start, - const std::vector &mem_offset_end, int64_t memory_type); + const std::vector &mem_offset_end); ge::Status SetAtomicCleanAttr(const ge::NodePtr &node, const std::vector &atomic_mem_start, - const std::vector &atomic_mem_size, int64_t memory_type); + const std::vector &atomic_mem_size); ge::Status IsIndependentAtomicClean(const ge::NodePtr &node, bool &is_independent_atomic_clean_node); diff --git a/ge/graph/load/new_model_manager/davinci_model.cc b/ge/graph/load/new_model_manager/davinci_model.cc index 2afbdf30..35844b2d 100755 --- a/ge/graph/load/new_model_manager/davinci_model.cc +++ b/ge/graph/load/new_model_manager/davinci_model.cc @@ -1809,7 +1809,7 @@ void DavinciModel::GetUserDesignateShapeOrder(std::vector &user_inp /// Status DavinciModel::InitAippInfo(uint32_t index, const OpDescPtr &op_desc) { if (!op_desc->HasAttr(ATTR_NAME_AIPP)) { - GELOGW("There is not AIPP related with index %u.", index); + GELOGW("there is not AIPP related with index %u.", index); return SUCCESS; } @@ -1818,7 +1818,7 @@ Status DavinciModel::InitAippInfo(uint32_t index, const OpDescPtr &op_desc) { GE_CHK_BOOL_RET_STATUS(AttrUtils::GetNamedAttrs(op_desc, ATTR_NAME_AIPP, aipp_attr), GE_AIPP_NOT_EXIST, "Data node do not contain param aipp!"); GE_CHK_STATUS_RET(OpUtils::ConvertAippParams(aipp_attr, &aipp_params), "get aipp params failed"); - GELOGI("Node data: %s, type: %s, current index: %u, current node related input rank: %u", + GELOGI("node data: %s, type: %s, current index: %u, current node related input rank: %u", op_desc->GetName().c_str(), op_desc->GetType().c_str(), index, aipp_params.related_input_rank()); AippConfigInfo aipp_info; @@ -2481,7 +2481,7 @@ Status DavinciModel::CopyOutputData(uint32_t data_id, OutputData &output_data, r uint64_t buffer_length = buffer.length; void *buffer_addr = reinterpret_cast(reinterpret_cast(buffer.data)); - GELOGI("CopyPlainData memcpy graph_%u type[F] output[%u] memaddr[%p] mem_size[%lu] datasize[%lu]", + GELOGI("[IMAS]CopyPlainData memcpy graph_%u type[F] output[%u] memaddr[%p] mem_size[%lu] datasize[%lu]", runtime_param_.graph_id, output.first, output.second.GetBasicAddr(), data_size, buffer_length); GE_CHK_RT_RET(rtMemcpy(buffer_addr, buffer_length, output.second.GetBasicAddr(), data_size, kind)); idx++; diff --git a/metadef b/metadef index fcd0833c..dc6cceb6 160000 --- a/metadef +++ b/metadef @@ -1 +1 @@ -Subproject commit fcd0833cffcd201701f71d17db0c696c1bb01715 +Subproject commit dc6cceb67bc82b567bcbd6f415776644253e1467 diff --git a/parser b/parser index 1601d66b..4e72aae4 160000 --- a/parser +++ b/parser @@ -1 +1 @@ -Subproject commit 1601d66b6187c83cbf38e762beb5538ce2c7c573 +Subproject commit 4e72aae41e78af1a19cd965da4a45cbd988b9a75 From 2a42c89921ce0e1de2941b433c82abc84f143670 Mon Sep 17 00:00:00 2001 From: zhengyuanhua Date: Mon, 18 Jan 2021 10:19:08 +0800 Subject: [PATCH 24/41] profiling task desc info --- ge/common/profiling/profiling_manager.cc | 4 +- .../load/new_model_manager/davinci_model.cc | 99 ++++++++++++------- .../load/new_model_manager/davinci_model.h | 3 + ge/hybrid/executor/worker/execution_engine.cc | 25 +---- .../aicore/aicore_node_executor.cc | 12 +-- .../node_executor/aicore/aicore_op_task.h | 2 + .../aicpu/aicpu_node_executor.cc | 12 +-- ge/hybrid/node_executor/task_context.cc | 38 +++++++ ge/hybrid/node_executor/task_context.h | 6 ++ ge/single_op/single_op.cc | 1 + ge/single_op/task/op_task.cc | 6 ++ ge/single_op/task/op_task.h | 4 + inc/framework/common/ge_types.h | 13 +-- metadef | 2 +- parser | 2 +- 15 files changed, 142 insertions(+), 87 deletions(-) diff --git a/ge/common/profiling/profiling_manager.cc b/ge/common/profiling/profiling_manager.cc index 1fc4dba6..9ca3aced 100644 --- a/ge/common/profiling/profiling_manager.cc +++ b/ge/common/profiling/profiling_manager.cc @@ -218,6 +218,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::Profilin uint32_t stream_id = task.stream_id; std::string shape_type = task.shape_type; int64_t cur_iter_num = task.cur_iter_num; + uint32_t task_type = task.task_type; data = model_name.append(" ") .append(op_name).append(" ") .append(std::to_string(block_dim)).append(" ") @@ -225,7 +226,8 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::Profilin .append(std::to_string(stream_id)).append(" ") .append(std::to_string(model_id)).append(" ") .append(shape_type).append(" ") - .append(std::to_string(cur_iter_num)).append("\n"); + .append(std::to_string(cur_iter_num)).append(" ") + .append(std::to_string(task_type)).append("\n"); ReporterData reporter_data{}; reporter_data.deviceId = device_id; diff --git a/ge/graph/load/new_model_manager/davinci_model.cc b/ge/graph/load/new_model_manager/davinci_model.cc index 35844b2d..b0c2a0a4 100755 --- a/ge/graph/load/new_model_manager/davinci_model.cc +++ b/ge/graph/load/new_model_manager/davinci_model.cc @@ -3064,6 +3064,65 @@ Status DavinciModel::MallocKnownArgs() { return SUCCESS; } +void DavinciModel::SaveProfilingTaskDescInfo(const OpDescPtr &op, const TaskInfoPtr &task, + const domi::TaskDef &task_def, size_t task_index) { + task_desc_info_.clear(); + bool flag = GetL1FusionEnableOption(); + char skt_enable_env[MMPA_MAX_PATH] = { 0x00 }; + INT32 res = mmGetEnv("SKT_ENABLE", skt_enable_env, MMPA_MAX_PATH); + int64_t env_flag = (res == EN_OK) ? std::strtol(skt_enable_env, nullptr, kDecimal) : 0; + if (env_flag != 0) { + flag = true; + } + + TaskDescInfo task_desc_info; + if (!om_name_.empty()) { + task_desc_info.model_name = om_name_; + } else { + task_desc_info.model_name = name_; + } + task_desc_info.op_name = op->GetName(); + task_desc_info.block_dim = task_def.kernel().block_dim(); + task_desc_info.task_id = task->GetTaskID(); + task_desc_info.stream_id = task->GetStreamId(); + task_desc_info.shape_type = "static"; + task_desc_info.cur_iter_num = 0; + // task type + task_desc_info.task_type = kTaskTypeInvalid; + auto model_task_type = static_cast(task_def.type()); + if (model_task_type == RT_MODEL_TASK_KERNEL) { + const domi::KernelDef &kernel_def = task_def.kernel(); + const auto &context = kernel_def.context(); + auto kernel_type = static_cast(context.kernel_type()); + if (kernel_type == ccKernelType::TE) { + task_desc_info.task_type = kTaskTypeAicore; + } else if (kernel_type == ccKernelType::AI_CPU || kernel_type == ccKernelType::CUST_AI_CPU) { + task_desc_info.task_type = kTaskTypeAicpu; + } else { + GELOGD("Other kernel type: %u", context.kernel_type()); + } + } else if (model_task_type == RT_MODEL_TASK_KERNEL_EX) { + task_desc_info.task_type = kTaskTypeAicpu; + } else { + GELOGD("Skip task type: %d", static_cast(model_task_type)); + } + profiler_report_op_info_[task_desc_info.op_name] = + std::pair(task_desc_info.task_id, task_desc_info.stream_id); + task_desc_info_.emplace_back(task_desc_info); + if (flag) { + if (task->GetSktTaskID() != 0xFFFFFFFF) { + TaskDescInfo task_desc_info; + string op_name = "super_kernel_" + to_string(task_index); + task_desc_info.op_name = op_name; + task_desc_info.task_id = task->GetSktTaskID(); + profiler_report_op_info_[task_desc_info.op_name] = + std::pair(task_desc_info.task_id, task_desc_info.stream_id); + task_desc_info_.emplace_back(task_desc_info); + } + } + return; +} + Status DavinciModel::DistributeTask() { GELOGI("do Distribute."); for (auto &task : cpu_task_list_) { @@ -3074,19 +3133,11 @@ Status DavinciModel::DistributeTask() { GE_CHK_STATUS_RET(task->Distribute()); } - task_desc_info_.clear(); - bool flag = GetL1FusionEnableOption(); - char skt_enable_env[MMPA_MAX_PATH] = { 0x00 }; - INT32 res = mmGetEnv("SKT_ENABLE", skt_enable_env, MMPA_MAX_PATH); - int64_t env_flag = (res == EN_OK) ? std::strtol(skt_enable_env, nullptr, kDecimal) : 0; - if (env_flag != 0) { - flag = true; - } - const auto &model_task_def = ge_model_->GetModelTaskDefPtr(); for (size_t task_index = 0; task_index < task_list_.size(); ++task_index) { auto &task_def = model_task_def->task(task_index); auto &task = task_list_.at(task_index); + GE_CHECK_NOTNULL(task); GE_CHK_STATUS_RET(task->Distribute(), "Task[%zu] distribute fail", task_index); // for data dump auto op_index = std::max(task_def.kernel().context().op_index(), @@ -3106,33 +3157,9 @@ Status DavinciModel::DistributeTask() { GE_IF_BOOL_EXEC(no_need_profiling, continue); SaveDumpOpInfo(runtime_param_, op, task->GetTaskID(), task->GetStreamId()); - // Load task info for profiling - TaskDescInfo task_desc_info; - if (!om_name_.empty()) { - task_desc_info.model_name = om_name_; - } else { - task_desc_info.model_name = name_; - } - task_desc_info.op_name = op->GetName(); - task_desc_info.block_dim = task_def.kernel().block_dim(); - task_desc_info.task_id = task->GetTaskID(); - task_desc_info.stream_id = task->GetStreamId(); - task_desc_info.shape_type = "static"; - task_desc_info.cur_iter_num = 0; - profiler_report_op_info_[task_desc_info.op_name] = - std::pair(task_desc_info.task_id, task_desc_info.stream_id); - task_desc_info_.emplace_back(task_desc_info); - if (flag) { - if (task->GetSktTaskID() != 0xFFFFFFFF) { - TaskDescInfo task_desc_info; - string op_name = "super_kernel_" + to_string(task_index); - task_desc_info.op_name = op_name; - task_desc_info.task_id = task->GetSktTaskID(); - profiler_report_op_info_[task_desc_info.op_name] = - std::pair(task_desc_info.task_id, task_desc_info.stream_id); - task_desc_info_.emplace_back(task_desc_info); - } - } + + // save task info for profiling + SaveProfilingTaskDescInfo(op, task, task_def, task_index); } // launch dump kernel to aicpu GE_CHK_STATUS_RET(data_dumper_.LoadDumpInfo(), "Load dump info failed."); diff --git a/ge/graph/load/new_model_manager/davinci_model.h b/ge/graph/load/new_model_manager/davinci_model.h index 4108f2c7..582535cd 100755 --- a/ge/graph/load/new_model_manager/davinci_model.h +++ b/ge/graph/load/new_model_manager/davinci_model.h @@ -623,6 +623,9 @@ class DavinciModel { Status DistributeTask(); + void SaveProfilingTaskDescInfo(const OpDescPtr &op, const TaskInfoPtr &task, + const domi::TaskDef &task_def, size_t task_index); + uint8_t *MallocFeatureMapMem(size_t data_size); uint8_t *MallocWeightsMem(size_t weights_size); diff --git a/ge/hybrid/executor/worker/execution_engine.cc b/ge/hybrid/executor/worker/execution_engine.cc index b5de2a70..5e9d3607 100755 --- a/ge/hybrid/executor/worker/execution_engine.cc +++ b/ge/hybrid/executor/worker/execution_engine.cc @@ -159,27 +159,9 @@ Status NodeDoneCallback::GetTaskDescInfo(const NodePtr node, const HybridModel * } GELOGD("GetTaskDescInfo of node [%s] start.", node->GetName().c_str()); - auto op_desc = node->GetOpDesc(); - std::string op_name = op_desc->GetName(); - std::string dynamic_model_name = model->GetModelName(); - uint32_t task_id = context_->GetTaskId(); - uint32_t stream_id = context_->GetStreamId(); - TaskDescInfo tmp_task_desc_info; - tmp_task_desc_info.model_name = dynamic_model_name; - tmp_task_desc_info.op_name = op_name; - tmp_task_desc_info.block_dim = 0; - auto task_defs = model->GetTaskDefs(node); - if (task_defs != nullptr && (*task_defs).size() > 0) { - const auto &task_def = (*task_defs)[0]; - tmp_task_desc_info.block_dim = task_def.kernel().block_dim(); - } - tmp_task_desc_info.task_id = task_id; - tmp_task_desc_info.stream_id = stream_id; - tmp_task_desc_info.shape_type = "dynamic"; - tmp_task_desc_info.cur_iter_num = graph_context_->iteration; - GELOGD("GetTaskDescInfo of node [%s] end, task_id[%u], stream_id[%u]", - node->GetName().c_str(), task_id, stream_id); - task_desc_info.emplace_back(tmp_task_desc_info); + task_desc_info = context_->GetProfilingTaskDescInfo(); + context_->ClearProfilingTaskDescInfo(); + return SUCCESS; } @@ -247,7 +229,6 @@ Status NodeDoneCallback::ProfilingReport() { GELOGD("ProfilingReport of node [%s] model [%s] start.", node->GetName().c_str(), model->GetModelName().c_str()); std::vector task_desc_info; - TaskDescInfo tmp_task_desc_info; auto profiling_ret = GetTaskDescInfo(node, model, task_desc_info); if (profiling_ret != RT_ERROR_NONE) { GELOGE(profiling_ret, "Get task info of node[%s] failed.", node->GetName().c_str()); diff --git a/ge/hybrid/node_executor/aicore/aicore_node_executor.cc b/ge/hybrid/node_executor/aicore/aicore_node_executor.cc index 2abc5b03..a8736154 100755 --- a/ge/hybrid/node_executor/aicore/aicore_node_executor.cc +++ b/ge/hybrid/node_executor/aicore/aicore_node_executor.cc @@ -182,16 +182,8 @@ Status AiCoreNodeTask::ExecuteAsync(TaskContext &context, std::function } RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[AiCoreNodeLaunchKernel] Start"); GE_CHK_STATUS_RET_NOLOG((*it)->LaunchKernel(context.GetStream())); - uint32_t task_id = 0; - uint32_t stream_id = 0; - rtError_t rt_ret = rtGetTaskIdAndStreamID(&task_id, &stream_id); - if (rt_ret != RT_ERROR_NONE) { - GELOGE(rt_ret, "Get task_id and stream_id failed."); - return rt_ret; - } - context.SetTaskId(task_id); - context.SetStreamId(stream_id); - GELOGD("AiCore node[%s] task_id: %u, stream_id: %u.", context.GetNodeName(), task_id, stream_id); + // save profiling data + (void)context.SaveProfilingTaskDescInfo(kTaskTypeAicore, (*it)->GetBlockDim()); RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[AiCoreNodeLaunchKernel] End"); RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[AiCoreNodeLaunchKernel] End"); } diff --git a/ge/hybrid/node_executor/aicore/aicore_op_task.h b/ge/hybrid/node_executor/aicore/aicore_op_task.h index 5818f384..dd15c608 100755 --- a/ge/hybrid/node_executor/aicore/aicore_op_task.h +++ b/ge/hybrid/node_executor/aicore/aicore_op_task.h @@ -48,6 +48,8 @@ class AiCoreOpTask { bool GetClearAtomic() const {return clear_atomic_;} + uint32_t GetBlockDim() const {return block_dim_;} + protected: Status UpdateTilingInfo(TaskContext &context); virtual std::string GetKeyForOpParamSize() const; diff --git a/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc b/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc index 63ce65e9..2a7cbc67 100755 --- a/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc +++ b/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc @@ -190,16 +190,8 @@ Status AicpuNodeTaskBase::ExecuteAsync(TaskContext &context, std::functionSynchronize(GetStream()); } + +Status TaskContext::SaveProfilingTaskDescInfo(uint32_t task_type, uint32_t block_dim) { + if (ProfilingManager::Instance().ProfilingModelExecuteOn()) { + const NodeItem &node_item = GetNodeItem(); + auto op_desc = node_item.GetOpDesc(); + GE_CHECK_NOTNULL(op_desc); + + uint32_t task_id = 0; + uint32_t stream_id = 0; + rtError_t rt_ret = rtGetTaskIdAndStreamID(&task_id, &stream_id); // must be called after Launch kernel + if (rt_ret != RT_ERROR_NONE) { + GELOGE(rt_ret, "Get task_id and stream_id failed."); + return rt_ret; + } + GELOGD("Node[%s] task_id: %u, stream_id: %u.", GetNodeName(), task_id, stream_id); + + const GraphExecutionContext * graph_context = GetExecutionContext(); + GE_CHECK_NOTNULL(graph_context); + const HybridModel *model = graph_context->model; + GE_CHECK_NOTNULL(model); + + std::string op_name = op_desc->GetName(); + std::string dynamic_model_name = model->GetModelName(); + TaskDescInfo tmp_task_desc_info; + tmp_task_desc_info.model_name = dynamic_model_name; + tmp_task_desc_info.op_name = op_name; + tmp_task_desc_info.block_dim = block_dim; + tmp_task_desc_info.task_type = task_type; + tmp_task_desc_info.task_id = task_id; + tmp_task_desc_info.stream_id = stream_id; + tmp_task_desc_info.shape_type = "dynamic"; + tmp_task_desc_info.cur_iter_num = iteration_; + task_desc_info.emplace_back(tmp_task_desc_info); + } + + return SUCCESS; +} } // namespace hybrid } // namespace ge diff --git a/ge/hybrid/node_executor/task_context.h b/ge/hybrid/node_executor/task_context.h index 6a4bcb8c..9a668f8c 100644 --- a/ge/hybrid/node_executor/task_context.h +++ b/ge/hybrid/node_executor/task_context.h @@ -22,6 +22,7 @@ #include #include "common/properties_manager.h" #include "external/ge/ge_api_error_codes.h" +#include "framework/common/ge_types.h" #include "hybrid/common/tensor_value.h" #include "hybrid/common/npu_memory_allocator.h" #include "hybrid/executor/rt_callback_manager.h" @@ -108,6 +109,10 @@ class TaskContext { void SetForceInferShape(bool force_infer_shape); void *handle_ = nullptr; + const std::vector& GetProfilingTaskDescInfo() const { return task_desc_info; } + Status SaveProfilingTaskDescInfo(uint32_t task_type, uint32_t block_dim); + void ClearProfilingTaskDescInfo() { task_desc_info.clear(); } + private: TaskContext(GraphExecutionContext *execution_context, const NodeItem *node_item, @@ -127,6 +132,7 @@ class TaskContext { uint64_t iteration_ = 0; uint32_t task_id_ = 0; uint32_t stream_id_ = 0; + std::vector task_desc_info; }; } // namespace hybrid } // namespace ge diff --git a/ge/single_op/single_op.cc b/ge/single_op/single_op.cc index 1f3fc5c5..081ce13b 100755 --- a/ge/single_op/single_op.cc +++ b/ge/single_op/single_op.cc @@ -70,6 +70,7 @@ Status ProfilingTaskInfo(OpTask *op_task, const string &shape_type) { tmp_task_desc_info.stream_id = stream_id; tmp_task_desc_info.shape_type = shape_type; tmp_task_desc_info.cur_iter_num = 0; + tmp_task_desc_info.task_type = op_task->GetTaskType(); GELOGD("GetTaskDescInfo of op [%s] end, task_id[%u], stream_id[%u]", op_name.c_str(), task_id, stream_id); task_desc_info.emplace_back(tmp_task_desc_info); diff --git a/ge/single_op/task/op_task.cc b/ge/single_op/task/op_task.cc index cc63e811..1772ca88 100755 --- a/ge/single_op/task/op_task.cc +++ b/ge/single_op/task/op_task.cc @@ -145,6 +145,8 @@ Status OpTask::LaunchKernel(const vector &input_desc, return UNSUPPORTED; } +uint32_t OpTask::GetTaskType() const { return kTaskTypeInvalid; } + TbeOpTask::~TbeOpTask() { if (sm_desc_ != nullptr) { (void)rtMemFreeManaged(sm_desc_); @@ -161,6 +163,8 @@ size_t TbeOpTask::GetArgSize() const { return arg_size_; } const std::string &TbeOpTask::GetStubName() const { return stub_name_; } +uint32_t TbeOpTask::GetTaskType() const { return kTaskTypeAicore; } + Status TbeOpTask::LaunchKernel(rtStream_t stream) { GELOGD("To invoke rtKernelLaunch. task = %s, block_dim = %u", this->stub_name_.c_str(), block_dim_); auto *sm_desc = reinterpret_cast(sm_desc_); @@ -802,6 +806,8 @@ Status AiCpuBaseTask::UpdateArgTable(const SingleOpModelParam ¶m) { return DoUpdateArgTable(param, false); } +uint32_t AiCpuBaseTask::GetTaskType() const { return kTaskTypeAicpu; } + void AiCpuTask::GetIoAddr(uintptr_t *&arg_base, size_t &arg_count) { arg_base = reinterpret_cast(io_addr_host_.data()); arg_count = io_addr_host_.size(); diff --git a/ge/single_op/task/op_task.h b/ge/single_op/task/op_task.h index 2d0740a6..78e1f6f0 100644 --- a/ge/single_op/task/op_task.h +++ b/ge/single_op/task/op_task.h @@ -52,6 +52,7 @@ class OpTask { std::vector &output_desc, std::vector &output_buffers, rtStream_t stream); + virtual uint32_t GetTaskType() const; protected: Status DoUpdateArgTable(const SingleOpModelParam ¶m, bool keep_workspace); @@ -85,6 +86,7 @@ class TbeOpTask : public OpTask { size_t GetArgSize() const; const std::string &GetStubName() const; void EnableDynamicSupport(const NodePtr &node, void *tiling_buffer, size_t max_tiling_size); + uint32_t GetTaskType() const override; private: friend class SingleOpModel; @@ -113,6 +115,8 @@ class AiCpuBaseTask : public OpTask { ~AiCpuBaseTask() override; UnknowShapeOpType GetUnknownType() const { return unknown_type_; } Status UpdateArgTable(const SingleOpModelParam ¶m) override; + uint32_t GetTaskType() const override; + protected: Status UpdateIoAddr(const std::vector &inputs, const std::vector &outputs); Status SetInputConst(); diff --git a/inc/framework/common/ge_types.h b/inc/framework/common/ge_types.h index f7e6d679..9ca77f1c 100644 --- a/inc/framework/common/ge_types.h +++ b/inc/framework/common/ge_types.h @@ -41,12 +41,7 @@ enum FrameworkType { }; const std::map kFwkTypeToStr = { - {"0", "Caffe"}, - {"1", "MindSpore"}, - {"3", "TensorFlow"}, - {"4", "Android_NN"}, - {"5", "Onnx"} -}; + {"0", "Caffe"}, {"1", "MindSpore"}, {"3", "TensorFlow"}, {"4", "Android_NN"}, {"5", "Onnx"}}; enum OpEngineType { ENGINE_SYS = 0, // default engine @@ -61,6 +56,11 @@ enum InputAippType { DATA_WITHOUT_AIPP = 0, DATA_WITH_STATIC_AIPP, DATA_WITH_DYN const char *const GE_ENGINE_ATTR_MEM_TYPE_HBM = "HBM"; const char *const GE_OPTION_EXEC_PLACEMENT = "ge.exec.placement"; +// profiling data +const uint32_t kTaskTypeAicore = 0; +const uint32_t kTaskTypeAicpu = 1; +const uint32_t kTaskTypeInvalid = 0xFFFF; + // Data cache, including data address and length struct DataBuffer { public: @@ -256,6 +256,7 @@ struct TaskDescInfo { uint32_t stream_id; std::string shape_type; int64_t cur_iter_num; + uint32_t task_type; }; // Profiling info of graph diff --git a/metadef b/metadef index dc6cceb6..b00c50c2 160000 --- a/metadef +++ b/metadef @@ -1 +1 @@ -Subproject commit dc6cceb67bc82b567bcbd6f415776644253e1467 +Subproject commit b00c50c2a8c2ce06929b27f7b74185a950737ec8 diff --git a/parser b/parser index 4e72aae4..f0109a2c 160000 --- a/parser +++ b/parser @@ -1 +1 @@ -Subproject commit 4e72aae41e78af1a19cd965da4a45cbd988b9a75 +Subproject commit f0109a2c70981d74932bb38bb56722caff3323a5 From 4d1f43053c477868154d72fe21d6ad8ef56de6ca Mon Sep 17 00:00:00 2001 From: wxl Date: Mon, 18 Jan 2021 15:50:59 +0800 Subject: [PATCH 25/41] infershape paralelly --- ge/hybrid/executor/worker/shape_inference_engine.cc | 1 - 1 file changed, 1 deletion(-) diff --git a/ge/hybrid/executor/worker/shape_inference_engine.cc b/ge/hybrid/executor/worker/shape_inference_engine.cc index 56ae3ea3..46ee6bd6 100755 --- a/ge/hybrid/executor/worker/shape_inference_engine.cc +++ b/ge/hybrid/executor/worker/shape_inference_engine.cc @@ -68,7 +68,6 @@ Status ShapeInferenceEngine::InferShape(NodeState &node_state) { // Do shape inference GELOGD("[%s] Start to invoke InferShapeAndType", node_item.NodeName().c_str()); { - std::lock_guard lk(mu_); RECORD_SHAPE_INFERENCE_EVENT(execution_context_, node_item.NodeName().c_str(), "[InferShapeAndType] Start"); GE_CHK_STATUS_RET(ShapeRefiner::InferShapeAndTypeForRunning(node_item.node, true), "Invoke InferShapeAndType failed."); From 40463c84ab92312331159128c2f53f4be863afab Mon Sep 17 00:00:00 2001 From: zhengyuanhua Date: Mon, 18 Jan 2021 16:39:53 +0800 Subject: [PATCH 26/41] profiling iter num start with 1 --- ge/hybrid/node_executor/task_context.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ge/hybrid/node_executor/task_context.cc b/ge/hybrid/node_executor/task_context.cc index a95fac13..8b7c623f 100644 --- a/ge/hybrid/node_executor/task_context.cc +++ b/ge/hybrid/node_executor/task_context.cc @@ -530,7 +530,7 @@ Status TaskContext::SaveProfilingTaskDescInfo(uint32_t task_type, uint32_t block tmp_task_desc_info.task_id = task_id; tmp_task_desc_info.stream_id = stream_id; tmp_task_desc_info.shape_type = "dynamic"; - tmp_task_desc_info.cur_iter_num = iteration_; + tmp_task_desc_info.cur_iter_num = iteration_ + 1; task_desc_info.emplace_back(tmp_task_desc_info); } From a3114f023d2384932fb1cadfc6b6a601a59dd8bf Mon Sep 17 00:00:00 2001 From: chenyemeng Date: Mon, 18 Jan 2021 16:53:09 +0800 Subject: [PATCH 27/41] cache support --- ge/CMakeLists.txt | 1 + ge/common/types.cc | 1 + ge/executor/CMakeLists.txt | 1 + ge/executor/module.mk | 1 + ge/ge_runner.mk | 1 + ge/graph/build/memory/var_mem_assign_util.cc | 8 +- .../load/new_model_manager/model_utils.cc | 37 ++++-- ge/graph/load/new_model_manager/model_utils.h | 9 ++ ge/graph/manager/graph_var_manager.cc | 74 +++++++++--- ge/graph/manager/graph_var_manager.h | 29 ++++- ge/graph/manager/rdma_pool_allocator.h | 4 + ge/graph/partition/dynamic_shape_partition.cc | 27 ++++- ge/graph/partition/dynamic_shape_partition.h | 3 +- ge/graph/partition/stage_partition.cc | 38 +++++- ge/graph/passes/subgraph_pass.cc | 7 +- .../ops_kernel_store/op/host_op.cc | 3 + .../executor/hybrid_model_async_executor.cc | 7 +- ge/hybrid/model/hybrid_model_builder.cc | 7 +- .../node_executor/hccl/hccl_node_executor.cc | 114 +++++++++++++----- .../node_executor/hccl/hccl_node_executor.h | 2 + .../host_cpu/kernel/assign_kernel.cc | 4 +- .../host_cpu/kernel/data_kernel.cc | 41 +++++++ .../host_cpu/kernel/data_kernel.h | 42 +++++++ .../host_cpu/kernel/no_op_kernel.cc | 2 +- .../host_cpu/kernel/random_uniform_kernel.cc | 4 +- .../host_cpu/kernel/variable_kernel.cc | 4 +- inc/framework/common/types.h | 1 + inc/framework/omg/parser/parser_types.h | 2 + tests/ut/ge/CMakeLists.txt | 1 + .../ut/ge/graph/load/model_utils_unittest.cc | 70 +++++++++++ third_party/fwkacllib/inc/runtime/mem.h | 1 + 31 files changed, 459 insertions(+), 87 deletions(-) create mode 100644 ge/hybrid/node_executor/host_cpu/kernel/data_kernel.cc create mode 100644 ge/hybrid/node_executor/host_cpu/kernel/data_kernel.h create mode 100644 tests/ut/ge/graph/load/model_utils_unittest.cc diff --git a/ge/CMakeLists.txt b/ge/CMakeLists.txt index a8eabf05..edbf837d 100755 --- a/ge/CMakeLists.txt +++ b/ge/CMakeLists.txt @@ -375,6 +375,7 @@ set(TRAIN_SRC_LIST "hybrid/node_executor/host_cpu/kernel/variable_kernel.cc" "hybrid/node_executor/host_cpu/kernel/assign_kernel.cc" "hybrid/node_executor/host_cpu/kernel/random_uniform_kernel.cc" + "hybrid/node_executor/host_cpu/kernel/data_kernel.cc" "hybrid/node_executor/controlop/control_op_executor.cc" "hybrid/node_executor/partitioned_call/partitioned_call_node_executor.cc" "hybrid/node_executor/hccl/hccl_node_executor.cc" diff --git a/ge/common/types.cc b/ge/common/types.cc index 268e7caa..90ff9fe4 100644 --- a/ge/common/types.cc +++ b/ge/common/types.cc @@ -388,6 +388,7 @@ REGISTER_OPTYPE_DEFINE(HCOMRECEIVE, "HcomReceive"); REGISTER_OPTYPE_DEFINE(HCOMREMOTEREAD, "HcomRemoteRead"); REGISTER_OPTYPE_DEFINE(HCOMREMOTEREFREAD, "HcomRemoteRefRead"); REGISTER_OPTYPE_DEFINE(HCOMREMOTEWRITE, "HcomRemoteWrite"); +REGISTER_OPTYPE_DEFINE(HCOMREMOTESCATTERWRITE, "HcomRemoteScatterWrite"); REGISTER_OPTYPE_DEFINE(VARASSIGN, "VarAssign"); REGISTER_OPTYPE_DEFINE(VARISINITIALIZEDOP, "VarIsInitializedOp"); diff --git a/ge/executor/CMakeLists.txt b/ge/executor/CMakeLists.txt index 755bdf97..d7bca1fa 100644 --- a/ge/executor/CMakeLists.txt +++ b/ge/executor/CMakeLists.txt @@ -104,6 +104,7 @@ set(SRC_LIST "../hybrid/node_executor/host_cpu/kernel/variable_kernel.cc" "../hybrid/node_executor/host_cpu/kernel/assign_kernel.cc" "../hybrid/node_executor/host_cpu/kernel/random_uniform_kernel.cc" + "../hybrid/node_executor/host_cpu/kernel/data_kernel.cc" "../hybrid/node_executor/controlop/control_op_executor.cc" "../hybrid/node_executor/partitioned_call/partitioned_call_node_executor.cc" "../hybrid/node_executor/rts/rts_node_executor.cc" diff --git a/ge/executor/module.mk b/ge/executor/module.mk index 87abdade..7f2c1c53 100644 --- a/ge/executor/module.mk +++ b/ge/executor/module.mk @@ -95,6 +95,7 @@ local_ge_executor_src_files := \ ../hybrid/node_executor/host_cpu/kernel/variable_kernel.cc \ ../hybrid/node_executor/host_cpu/kernel/assign_kernel.cc \ ../hybrid/node_executor/host_cpu/kernel/random_uniform_kernel.cc \ + ../hybrid/node_executor/host_cpu/kernel/data_kernel.cc \ ../hybrid/node_executor/controlop/control_op_executor.cc \ ../hybrid/node_executor/partitioned_call/partitioned_call_node_executor.cc \ ../hybrid/node_executor/rts/rts_node_executor.cc \ diff --git a/ge/ge_runner.mk b/ge/ge_runner.mk index 460d5068..af938686 100644 --- a/ge/ge_runner.mk +++ b/ge/ge_runner.mk @@ -300,6 +300,7 @@ LIBGE_LOCAL_SRC_FILES := \ hybrid/node_executor/host_cpu/kernel/variable_kernel.cc \ hybrid/node_executor/host_cpu/kernel/assign_kernel.cc \ hybrid/node_executor/host_cpu/kernel/random_uniform_kernel.cc \ + hybrid/node_executor/host_cpu/kernel/data_kernel.cc \ hybrid/node_executor/controlop/control_op_executor.cc \ hybrid/node_executor/partitioned_call/partitioned_call_node_executor.cc \ hybrid/node_executor/hccl/hccl_node_executor.cc \ diff --git a/ge/graph/build/memory/var_mem_assign_util.cc b/ge/graph/build/memory/var_mem_assign_util.cc index 639bfaa0..dfc633af 100755 --- a/ge/graph/build/memory/var_mem_assign_util.cc +++ b/ge/graph/build/memory/var_mem_assign_util.cc @@ -60,9 +60,14 @@ Status VarMemAssignUtil::AssignStaticMemory2Node(ge::ComputeGraphPtr &compute_gr return FAILED); ge::ConstGeTensorDescPtr tensor_desc = n->GetOpDesc()->GetOutputDescPtr(0); GE_CHECK_NOTNULL(tensor_desc); + rtMemType_t memory_type = RT_MEMORY_HBM; + uint32_t mem_type = 0; + if (AttrUtils::GetInt(n->GetOpDesc(), ATTR_OUTPUT_MEMORY_TYPE, mem_type) && (mem_type == 1)) { + memory_type = RT_MEMORY_RDMA_HBM; + } if (!VarManager::Instance(compute_graph->GetSessionID())->IsVarExist(node_name, *tensor_desc)) { GE_CHK_STATUS_RET( - VarManager::Instance(compute_graph->GetSessionID())->AssignVarMem(node_name, *tensor_desc, RT_MEMORY_HBM)); + VarManager::Instance(compute_graph->GetSessionID())->AssignVarMem(node_name, *tensor_desc, memory_type)); GE_IF_BOOL_EXEC(n->GetType() == VARIABLE, GE_CHK_STATUS_RET(AssignData2Fp32Var(n, compute_graph->GetSessionID()))); GE_CHK_STATUS_RET(VarManager::Instance(compute_graph->GetSessionID()) @@ -70,7 +75,6 @@ Status VarMemAssignUtil::AssignStaticMemory2Node(ge::ComputeGraphPtr &compute_gr } uint8_t *dev_ptr = nullptr; - rtMemType_t memory_type = RT_MEMORY_HBM; GE_CHK_STATUS_RET(VarManager::Instance(compute_graph->GetSessionID()) ->GetVarAddr(node_name, *tensor_desc, &dev_ptr, memory_type)); vector output_list = n->GetOpDesc()->GetOutputOffset(); diff --git a/ge/graph/load/new_model_manager/model_utils.cc b/ge/graph/load/new_model_manager/model_utils.cc index 22a657ad..efd8c619 100755 --- a/ge/graph/load/new_model_manager/model_utils.cc +++ b/ge/graph/load/new_model_manager/model_utils.cc @@ -15,18 +15,10 @@ */ #include "graph/load/new_model_manager/model_utils.h" - #include - #include "common/debug/log.h" #include "common/op/ge_op_utils.h" -#include "graph/debug/ge_attr_define.h" -#include "graph/utils/attr_utils.h" #include "graph/utils/tensor_utils.h" -#include "runtime/base.h" -#include "runtime/kernel.h" - -#include "framework/common/debug/ge_log.h" #include "graph/manager/graph_var_manager.h" #define VALIDATE_MEM_RANGE(OP, SIZE, OFFSET) \ @@ -342,8 +334,8 @@ vector ModelUtils::GetInputDataAddrs(const RuntimeParam &model_param, Co int64_t input_offset = v_input_offset[non_const_index]; non_const_index++; GE_IF_BOOL_EXEC(model_param.var_size != 0 && ge::VarManager::Instance(session_id)->IsVarAddr(input_offset), - VALIDATE_MEM_RANGE(op_desc, model_param.var_size, input_offset - model_param.logic_var_base); - uint8_t *variable_addr = model_param.var_base + input_offset - model_param.logic_var_base; + uint8_t *variable_addr = nullptr; + GE_CHK_STATUS_EXEC(GetVarAddr(model_param, op_desc, input_offset, variable_addr), return {}); v_input_data_addr.push_back(variable_addr); GELOGI("[IMAS]GetInputDataAddrs graph_%u type[V] name[%s] input[%lu] memaddr[%p]", model_param.graph_id, op_desc->GetName().c_str(), i, variable_addr); @@ -380,6 +372,27 @@ vector ModelUtils::GetInputDataAddrs(const RuntimeParam &model_param, Co return v_input_data_addr; } +/// +/// @ingroup ge +/// @brief Get variable address. +/// @return Status +/// +Status ModelUtils::GetVarAddr(const RuntimeParam &model_param, const ConstOpDescPtr &op_desc, int64_t offset, + uint8_t *&var_addr) { + if (ge::VarManager::Instance(model_param.session_id)->GetVarMemType(offset) == RT_MEMORY_RDMA_HBM) { + if (offset < 0) { + GELOGE(PARAM_INVALID, "rdma var addr is invalid, addr=%p", reinterpret_cast(offset)); + return PARAM_INVALID; + } + var_addr = reinterpret_cast(offset); + GE_CHECK_NOTNULL(var_addr); + } else { + VALIDATE_MEM_RANGE(op_desc, model_param.var_size, offset - model_param.logic_var_base); + var_addr = model_param.var_base + offset - model_param.logic_var_base; + } + return SUCCESS; +} + /// /// @ingroup ge /// @brief Get output data address. @@ -405,8 +418,8 @@ vector ModelUtils::GetOutputDataAddrs(const RuntimeParam &model_param, C } for (size_t i = 0; i < outputs_size; ++i) { GE_IF_BOOL_EXEC(model_param.var_size != 0 && ge::VarManager::Instance(session_id)->IsVarAddr(v_output_offset[i]), - VALIDATE_MEM_RANGE(op_desc, model_param.var_size, v_output_offset[i] - model_param.logic_var_base); - uint8_t *variable_addr = model_param.var_base + v_output_offset[i] - model_param.logic_var_base; + uint8_t *variable_addr = nullptr; + GE_CHK_STATUS_EXEC(GetVarAddr(model_param, op_desc, v_output_offset[i], variable_addr), return {}); v_output_data_addr.push_back(variable_addr); GELOGI("[IMAS]GetOutputDataAddrs graph_%u type[V] name[%s] output[%zu] memaddr[%p]", model_param.graph_id, op_desc->GetName().c_str(), i, variable_addr); diff --git a/ge/graph/load/new_model_manager/model_utils.h b/ge/graph/load/new_model_manager/model_utils.h index 4b3d7ae7..417b9b89 100755 --- a/ge/graph/load/new_model_manager/model_utils.h +++ b/ge/graph/load/new_model_manager/model_utils.h @@ -107,6 +107,15 @@ class ModelUtils { /// @return Status /// static Status GetRtAddress(const RuntimeParam &model_param, uintptr_t logic_addr, uint8_t *&mem_addr); + + private: + /// + /// @ingroup ge + /// @brief Get variable address. + /// @return Status + /// + static Status GetVarAddr(const RuntimeParam &model_param, const ConstOpDescPtr &op_desc, int64_t offset, + uint8_t *&var_addr); }; } // namespace ge diff --git a/ge/graph/manager/graph_var_manager.cc b/ge/graph/manager/graph_var_manager.cc index 821de257..928c893f 100755 --- a/ge/graph/manager/graph_var_manager.cc +++ b/ge/graph/manager/graph_var_manager.cc @@ -16,17 +16,10 @@ #include "graph/manager/graph_var_manager.h" -#include - -#include "common/l2_cache_optimize.h" -#include "common/types.h" -#include "framework/common/debug/ge_log.h" -#include "framework/common/debug/log.h" -#include "ge/ge_api_types.h" #include "graph/debug/ge_attr_define.h" #include "graph/manager/graph_mem_allocator.h" +#include "graph/manager/rdma_pool_allocator.h" #include "graph/manager/trans_var_data_utils.h" -#include "graph/utils/attr_utils.h" #include "graph/utils/type_utils.h" using std::map; @@ -37,7 +30,7 @@ namespace ge { VarResource::VarResource(uint64_t session_id) : session_id_(session_id) {} VarResource::~VarResource() { - var_offset_set_.clear(); + var_offset_map_.clear(); var_addr_mgr_map_.clear(); cur_var_tensor_desc_map_.clear(); var_broad_cast_info_.clear(); @@ -91,8 +84,10 @@ ge::Status VarResource::SaveVarAddr(const std::string &var_name, const ge::GeTen std::string var_key = VarKey(var_name, tensor_desc); GELOGD("VarResource::SaveVarAddr, var_key = %s", var_key.c_str()); if (var_addr_mgr_map_.count(var_key) == 0) { - uint64_t logic_address = VarManager::Instance(session_id_)->GetVarMemLogicBase() + - static_cast(reinterpret_cast(address)); + uint64_t logic_address = static_cast(reinterpret_cast(address)); + if (memory_type != RT_MEMORY_RDMA_HBM) { + logic_address += VarManager::Instance(session_id_)->GetVarMemLogicBase(); + } GELOGI("SaveVarAddr node_name %s, tensor_desc format %s, type %s.", var_name.c_str(), TypeUtils::FormatToSerialString(tensor_desc.GetFormat()).c_str(), TypeUtils::DataTypeToSerialString(tensor_desc.GetDataType()).c_str()); @@ -102,7 +97,7 @@ ge::Status VarResource::SaveVarAddr(const std::string &var_name, const ge::GeTen var_addr_mgr.tensor_desc = tensor_desc; var_addr_mgr.memory_type = memory_type; var_addr_mgr_map_[var_key] = var_addr_mgr; - var_offset_set_.insert(logic_address); + var_offset_map_[logic_address] = memory_type; return SUCCESS; } @@ -211,7 +206,14 @@ ge::Status VarResource::SyncVarData(uint32_t graph_id, const std::string &var_na return SyncVarData2BroadCast(graph_id, var_name, var_tensor_desc, base_ptr); } -bool VarResource::IsVarAddr(const int64_t &offset) { return var_offset_set_.count(offset) > 0; } +bool VarResource::IsVarAddr(const int64_t &offset) { return var_offset_map_.count(offset) > 0; } + +rtMemType_t VarResource::GetVarMemType(const int64_t &offset) { + if (var_offset_map_.count(offset) > 0) { + return var_offset_map_[offset]; + } + return RT_MEMORY_HBM; +} VarTransRoad *VarResource::GetTransRoad(const std::string &var_name) { auto iter = var_to_trans_road_.find(var_name); @@ -252,7 +254,19 @@ Status VarResource::SetAllocatedGraphId(const std::string &var_name, uint32_t gr MemResource::MemResource() : total_size_(0), var_mem_size_(0) {} -Status MemResource::AssignVarMem(const std::string &var_name, uint64_t size, uint64_t session_id, size_t &mem_offset) { +MemResource *MemResource::BuildMemResourceFromType(rtMemType_t mem_type) { + switch (mem_type) { + case RT_MEMORY_HBM: + return new (std::nothrow) HbmMemResource(); + case RT_MEMORY_RDMA_HBM: + return new (std::nothrow) RdmaMemResource(); + default: + return nullptr; + } +} + +Status HbmMemResource::AssignVarMem(const std::string &var_name, uint64_t size, uint64_t session_id, + size_t &mem_offset) { size = (size + kSessionMemAlignSize - 1) / kSessionMemAlignSize * kSessionMemAlignSize; uint64_t real_size = size; total_size_ = VarManager::Instance(session_id)->GetVarMemMaxSize(); @@ -282,6 +296,19 @@ Status MemResource::AssignVarMem(const std::string &var_name, uint64_t size, uin return SUCCESS; } +Status RdmaMemResource::AssignVarMem(const std::string &var_name, uint64_t size, uint64_t session_id, size_t &address) { + uint8_t *buffer = MemManager::Instance().RdmaPoolInstance(RT_MEMORY_HBM).Malloc(size); + if (buffer == nullptr) { + GELOGE(MEMALLOC_FAILED, "Failed to malloc rdma memory for node %s, size = %llu", var_name.c_str(), size); + return MEMALLOC_FAILED; + } + address = reinterpret_cast(reinterpret_cast(buffer)); + var_mem_size_ += size; + GELOGI("[IMAS]AssignVarMem Set session_%llu name[%s] output[%d] addr to [%p] size[%llu].", + session_id, var_name.c_str(), 0, buffer, size); + return SUCCESS; +} + uint64_t MemResource::GetVarMemSize() const { return var_mem_size_; } void MemResource::UpdateVarMemSize(int64_t mem_size) { var_mem_size_ = mem_size; }; @@ -428,7 +455,7 @@ Status VarManager::UpdateVarMemSize(rtMemType_t memory_type, int64_t mem_size) { MemResource *mem_resource = nullptr; auto iter = mem_resource_map_.find(memory_type); if (iter == mem_resource_map_.end()) { - mem_resource = new (std::nothrow) MemResource(); + mem_resource = MemResource::BuildMemResourceFromType(memory_type); if (mem_resource == nullptr) { GELOGE(ge::INTERNAL_ERROR, "Alloc MemResource failed, memory_type = %u.", memory_type); return ge::INTERNAL_ERROR; @@ -465,7 +492,7 @@ ge::Status VarManager::AssignVarMem(const std::string &var_name, const ge::GeTen MemResource *mem_resource = nullptr; auto it = mem_resource_map_.find(memory_type); if (it == mem_resource_map_.end()) { - mem_resource = new (std::nothrow) MemResource(); + mem_resource = MemResource::BuildMemResourceFromType(memory_type); if (mem_resource == nullptr) { GELOGE(ge::INTERNAL_ERROR, "Alloc MemResource failed, memory_type = %u.", memory_type); return ge::INTERNAL_ERROR; @@ -629,6 +656,15 @@ bool VarManager::IsVarAddr(const int64_t &offset) { return var_resource_->IsVarAddr(offset); } +rtMemType_t VarManager::GetVarMemType(const int64_t &offset) { + std::lock_guard lock(mutex_); + if (var_resource_ == nullptr) { + GELOGW("VarManager has not been init."); + return RT_MEMORY_HBM; + } + return var_resource_->GetVarMemType(offset); +} + ge::Status VarManager::MallocVarMemory(size_t memory_size) { std::lock_guard lock(mutex_); uint8_t *var_mem_base = nullptr; @@ -654,12 +690,18 @@ ge::Status VarManager::MallocVarMemory(size_t memory_size) { uint8_t *VarManager::GetVarMemoryBase(rtMemType_t memory_type) { std::lock_guard lock(mutex_); + if (memory_type == RT_MEMORY_RDMA_HBM) { + return MemManager::Instance().RdmaPoolInstance(RT_MEMORY_HBM).GetRdmaBaseAddr(); + } string memory_key = std::to_string(session_id_); return MemManager::Instance(memory_type)->GetMemoryAddr(memory_key); } uint8_t *VarManager::GetVarMemoryAddr(uint8_t *logic_addr, rtMemType_t memory_type) { std::lock_guard lock(mutex_); + if (memory_type == RT_MEMORY_RDMA_HBM) { + return logic_addr; + } string mem_key = std::to_string(session_id_); uint8_t *mem_base = MemManager::Instance(memory_type)->GetMemoryAddr(mem_key); if (mem_base == nullptr) { diff --git a/ge/graph/manager/graph_var_manager.h b/ge/graph/manager/graph_var_manager.h index 9cf0068c..924ddcb7 100755 --- a/ge/graph/manager/graph_var_manager.h +++ b/ge/graph/manager/graph_var_manager.h @@ -158,13 +158,15 @@ class VarResource { bool IsVarAddr(const int64_t &offset); + rtMemType_t GetVarMemType(const int64_t &offset); + std::unordered_map GetAllVarDesc() const { return cur_var_tensor_desc_map_; } private: std::string VarKey(const std::string &var_name, const ge::GeTensorDesc &tensor_desc); uint64_t session_id_; - std::unordered_set var_offset_set_; + std::unordered_map var_offset_map_; std::unordered_map var_addr_mgr_map_; std::unordered_map cur_var_tensor_desc_map_; std::unordered_map> var_to_trans_road_; @@ -176,19 +178,36 @@ class VarResource { class MemResource { public: MemResource(); - ~MemResource() = default; + virtual ~MemResource() = default; + static MemResource *BuildMemResourceFromType(rtMemType_t mem_type); - Status AssignVarMem(const std::string &var_name, uint64_t size, uint64_t session_id, size_t &mem_offset); + virtual Status AssignVarMem(const std::string &var_name, uint64_t size, uint64_t session_id, size_t &mem_offset) = 0; uint64_t GetVarMemSize() const; void UpdateVarMemSize(int64_t mem_size); - private: + protected: uint64_t total_size_; uint64_t var_mem_size_; }; +class HbmMemResource : public MemResource { + public: + HbmMemResource() = default; + ~HbmMemResource() override = default; + + Status AssignVarMem(const std::string &var_name, uint64_t size, uint64_t session_id, size_t &address) override; +}; + +class RdmaMemResource : public MemResource { + public: + RdmaMemResource() = default; + ~RdmaMemResource() override = default; + + Status AssignVarMem(const std::string &var_name, uint64_t size, uint64_t session_id, size_t &address) override; +}; + class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY VarManager { public: static VarManager *Instance(uint64_t session_id); @@ -275,6 +294,8 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY VarManager { bool IsVarAddr(const int64_t &offset); + rtMemType_t GetVarMemType(const int64_t &offset); + uint8_t *GetVarMemoryBase(rtMemType_t memory_type); uint8_t *GetVarMemoryAddr(uint8_t *logic_addr, rtMemType_t memory_type); diff --git a/ge/graph/manager/rdma_pool_allocator.h b/ge/graph/manager/rdma_pool_allocator.h index 4d8cf71e..0a895a11 100644 --- a/ge/graph/manager/rdma_pool_allocator.h +++ b/ge/graph/manager/rdma_pool_allocator.h @@ -53,6 +53,10 @@ class RdmaPoolAllocator { Status GetBaseAddr(uint64_t &base_addr, uint64_t &mem_size); + uint8_t *GetRdmaBaseAddr() { return rdma_base_addr_; } + + size_t GetRdmaMemSize() { return rdma_mem_size_; } + private: void MergeBlocks(Block *dst, Block *src); diff --git a/ge/graph/partition/dynamic_shape_partition.cc b/ge/graph/partition/dynamic_shape_partition.cc index 6c81b21f..1c82eaf3 100755 --- a/ge/graph/partition/dynamic_shape_partition.cc +++ b/ge/graph/partition/dynamic_shape_partition.cc @@ -213,6 +213,7 @@ std::string DynamicShapePartitioner::DebugString() const { size_t data = 0; size_t netoutput = 0; size_t is_inputnode = 0; + size_t stage = 0; std::stringstream ss; ss << "All unknown shape nodes:" << std::endl; for (const auto &node : unknown_shape_nodes_) { @@ -229,10 +230,13 @@ std::string DynamicShapePartitioner::DebugString() const { netoutput++; } else if (cluster->IsInputNode()) { is_inputnode++; + } else if (cluster->IsIndependent()) { + stage++; } } ss << "All clusters:" << unique_clusters_.size() << ", data:" << data << ", known:" << known - << ", unknown:" << unknown << ", netoutput:" << netoutput << ", is_inputnode:" << is_inputnode << std::endl; + << ", unknown:" << unknown << ", netoutput:" << netoutput << ", is_inputnode:" << is_inputnode + << ", stage:" << stage << std::endl; for (const auto &cluster : unique_clusters_) { ss << " " << cluster->DebugString() << std::endl; } @@ -272,12 +276,15 @@ Status DynamicShapePartitioner::InitClusters() { for (const auto &node : graph->GetDirectNode()) { Cluster::Type type = Cluster::DATA; bool is_input = ((node->GetType() == CONSTANT) || (node->GetType() == CONSTANTOP)) && node->GetInNodes().empty(); + REQUIRE_NOT_NULL(node->GetOpDesc(), "op_desc is null"); if (node->GetType() == DATA) { type = Cluster::DATA; } else if (is_input) { type = Cluster::INPUT_NODE; } else if (node->GetType() == NETOUTPUT) { type = Cluster::NETOUTPUT; + } else if ((node->GetType() == PARTITIONEDCALL) && (node->GetOpDesc()->HasAttr(ATTR_STAGE_LEVEL))) { + type = Cluster::STAGE; } else if (unknown_shape_nodes_.count(node) > 0) { type = Cluster::UNKNOWN_SHAPE; } else { @@ -360,6 +367,9 @@ static std::string ToString(const std::vector &clusters) { void DynamicShapePartitioner::MergeClustersUnknownShape() { // Merge unknown shape clusters for (const auto &cluster : ordered_cluster_) { + if (cluster->IsIndependent()) { + continue; + } for (const auto &in_cluster : cluster->Inputs()) { if (!in_cluster->IsUnknownShape()) { continue; @@ -379,6 +389,9 @@ void DynamicShapePartitioner::MergeClustersUnknownShape() { void DynamicShapePartitioner::MergeClustersKnownShape() { // Merge known shape clusters for (const auto &cluster : ordered_cluster_) { + if (cluster->IsIndependent()) { + continue; + } if (cluster->IsRefVariable() && cluster->Inputs().size() == 1) { auto in_cluster = *(cluster->Inputs().begin()); in_cluster->Merge(cluster); @@ -606,6 +619,7 @@ void Cluster::UpdateRank(size_t rank) { bool Cluster::IsData() const { return type_ == DATA; }; bool Cluster::IsKnownShape() const { return type_ == KNOWN_SHAPE; }; bool Cluster::IsUnknownShape() const { return type_ == UNKNOWN_SHAPE; }; +bool Cluster::IsIndependent() const { return type_ == STAGE; }; bool Cluster::IsNetOutput() const { return type_ == NETOUTPUT; }; bool Cluster::IsInputNode() const { return type_ == INPUT_NODE; }; bool Cluster::IsRefVariable() const { @@ -641,6 +655,9 @@ void Cluster::RemoveOutput(ClusterPtr out) { out->in_clusters_.end()); }; void Cluster::Merge(ClusterPtr other) { + if (other->IsIndependent()) { + return; + } nodes_.insert(nodes_.end(), other->nodes_.begin(), other->nodes_.end()); other->in_clusters_.erase(std::remove(other->in_clusters_.begin(), other->in_clusters_.end(), shared_from_this()), other->in_clusters_.end()); @@ -689,7 +706,9 @@ std::vector Cluster::MergeAllPathFrom(ClusterPtr other) { std::unordered_set forward_reached_clusters; std::unordered_set backward_reached_clusters; std::vector path_clusters; - + if (other->IsIndependent()) { + return path_clusters; + } if (std::find(other->out_clusters_.begin(), other->out_clusters_.end(), shared_from_this()) == other->out_clusters_.end()) { return path_clusters; @@ -772,7 +791,7 @@ Status Cluster::BuildFrame() { } } } - if (IsData()) { + if (IsData() || IsIndependent()) { for (const auto &anchor : node->GetAllOutDataAnchors()) { AddFrameOutput(anchor); } @@ -888,7 +907,7 @@ Status Cluster::CombinePartitionFrame() { } Status Cluster::BuildPartitionSubgraph() { - if (IsData() || IsNetOutput()) { + if (IsData() || IsNetOutput() || IsIndependent()) { return SUCCESS; } int64_t parent_node_index = 0; diff --git a/ge/graph/partition/dynamic_shape_partition.h b/ge/graph/partition/dynamic_shape_partition.h index 9772615e..e8408ff9 100644 --- a/ge/graph/partition/dynamic_shape_partition.h +++ b/ge/graph/partition/dynamic_shape_partition.h @@ -32,7 +32,7 @@ class DynamicShapePartitioner { // DATA:DATA, UNKNOWN_SHAPE:unknowshape, KNOWN_SHAPE:knowshape, NETOUTPUT:NETOUTPUT. class Cluster : public std::enable_shared_from_this { public: - enum Type { DATA, INPUT_NODE, NETOUTPUT, KNOWN_SHAPE, UNKNOWN_SHAPE }; + enum Type { DATA, INPUT_NODE, NETOUTPUT, STAGE, KNOWN_SHAPE, UNKNOWN_SHAPE }; Cluster(size_t rank, Type type, NodePtr node, DynamicShapePartitioner *partitioner) : id_(rank), min_(rank), max_(rank), type_(type), partitioner_(partitioner) { nodes_.push_back(node); @@ -45,6 +45,7 @@ class DynamicShapePartitioner { bool IsData() const; bool IsKnownShape() const; bool IsUnknownShape() const; + bool IsIndependent() const; bool IsNetOutput() const; std::vector> Inputs() const; std::vector> Outputs() const; diff --git a/ge/graph/partition/stage_partition.cc b/ge/graph/partition/stage_partition.cc index 93a06afe..f6e49bbd 100644 --- a/ge/graph/partition/stage_partition.cc +++ b/ge/graph/partition/stage_partition.cc @@ -25,6 +25,10 @@ #include "common/types.h" namespace ge { +namespace { +const std::set kSrcNodeTypes = { DATA, AIPPDATA, ANN_DATA }; +} + Status StagePartitioner::Partition() { GE_CHECK_NOTNULL(root_graph_); if (root_graph_->GetParentGraph() != nullptr) { @@ -37,6 +41,10 @@ Status StagePartitioner::Partition() { if (!AttrUtils::GetInt(op_desc, ATTR_STAGE_LEVEL, level)) { continue; } + if ((kSrcNodeTypes.count(op_desc->GetType()) != 0) && node->GetInAllNodes().empty()) { + continue; + } + GELOGD("original node %s for stage %u", node->GetName().c_str(), level); stage_nodes_[level].insert(node); } if (stage_nodes_.empty()) { @@ -54,6 +62,13 @@ Status StagePartitioner::Partition() { return FAILED; } + root_graph_->TopologicalSorting([](const NodePtr &a, const NodePtr &b) -> bool { + uint32_t a_level = UINT32_MAX; + (void)AttrUtils::GetInt(a->GetOpDesc(), ATTR_STAGE_LEVEL, a_level); + uint32_t b_level = UINT32_MAX; + (void)AttrUtils::GetInt(b->GetOpDesc(), ATTR_STAGE_LEVEL, b_level); + return a_level < b_level; + }); if (root_graph_->TopologicalSorting() != GRAPH_SUCCESS) { GELOGE(FAILED, "Topological sort for graph %s after stage partition failed, " "maybe stage_level was not set correctly.", root_graph_->GetName().c_str()); @@ -76,20 +91,26 @@ Status StagePartitioner::SplitStageLevel() { auto node = nodes.top(); nodes.pop(); GE_CHECK_NOTNULL(node->GetOpDesc()); - if (node->GetOpDesc()->HasAttr(ATTR_STAGE_LEVEL) && (cur_stage_nodes.count(node) == 0)) { + uint32_t tmp_level = cur_stage_level; + (void)AttrUtils::GetInt(node->GetOpDesc(), ATTR_STAGE_LEVEL, tmp_level); + if (tmp_level != cur_stage_level) { continue; } for (const auto &in_node : node->GetInAllNodes()) { if (visited_stage_nodes.count(in_node) != 0) { continue; } + if (!AttrUtils::SetInt(in_node->GetOpDesc(), ATTR_STAGE_LEVEL, cur_stage_level)) { + GELOGE(INTERNAL_ERROR, "Set attr ATTR_STAGE_LEVEL on node %s failed.", in_node->GetName().c_str()); + return INTERNAL_ERROR; + } + GELOGD("Mark stage_level node %s, stage_level=%u", in_node->GetName().c_str(), cur_stage_level); + if ((kSrcNodeTypes.count(in_node->GetType()) != 0) && in_node->GetInAllNodes().empty()) { + GELOGD("skip data node %s for stage %u", in_node->GetName().c_str(), cur_stage_level); + continue; + } nodes.push(in_node); } - if (!AttrUtils::SetInt(node->GetOpDesc(), ATTR_STAGE_LEVEL, cur_stage_level)) { - GELOGE(INTERNAL_ERROR, "Set attr ATTR_STAGE_LEVEL on node %s failed.", node->GetName().c_str()); - return INTERNAL_ERROR; - } - GELOGD("Mark stage_level node %s, stage_level=%u", node->GetName().c_str(), cur_stage_level); visited_stage_nodes.emplace(node); } for (const auto &node : visited_stage_nodes) { @@ -219,6 +240,11 @@ NodePtr StagePartitioner::BuildSubgraphNode(const std::string &graph_name, const op_desc->AddSubgraphName("f"); op_desc->SetSubgraphInstanceName(0, graph_name); + if (!AttrUtils::SetInt(op_desc, ATTR_STAGE_LEVEL, stage_info.stage_level)) { + GELOGE(INTERNAL_ERROR, "Set attr ATTR_STAGE_LEVEL on node %s failed", op_desc->GetName().c_str()); + return nullptr; + } + NodePtr subgraph_node = root_graph_->AddNode(op_desc); if (subgraph_node == nullptr) { GELOGE(FAILED, "Add node %s failed.", graph_name.c_str()); diff --git a/ge/graph/passes/subgraph_pass.cc b/ge/graph/passes/subgraph_pass.cc index d1111d52..dc6269ac 100755 --- a/ge/graph/passes/subgraph_pass.cc +++ b/ge/graph/passes/subgraph_pass.cc @@ -142,17 +142,18 @@ Status SubgraphPass::SubgraphOutputNode(const ComputeGraphPtr &graph, const Node GE_CHECK_NOTNULL(in_node); // Need insert memcpy - // 1. Const->NetOutput in subgraph + // 1. Const->NetOutput in subgraph & parent graph is known // 2. AtomicOp->NetOutput in subgraph // 3. OutputContinuesRequiredOp->NetOutput in subgraph // 4. Data->NetOutput in subgraph but parent_node is not while // 5. While->NetOutput in known subgraph std::string op_type; - bool insert_flag = NodeUtils::GetConstOpType(in_node, op_type) || + bool insert_flag = + (NodeUtils::GetConstOpType(in_node, op_type) && !graph->GetParentGraph()->GetGraphUnknownFlag()) || IsAtomicRequired(in_node, peer_out_anchor->GetIdx()) || IsOutputContinuesRequired(in_node) || ((in_node->GetType() == DATA) && (kWhileOpTypes.count(graph->GetParentNode()->GetType()) == 0)) || (!graph->GetGraphUnknownFlag() && NodeUtils::IsDynamicShape(node) && - (kWhileOpTypes.count(in_node->GetType()) != 0)); + (kWhileOpTypes.count(in_node->GetType()) != 0)); if (insert_flag) { GELOGD("Insert MemcpyAsync node between %s and %s.", in_node->GetName().c_str(), node->GetName().c_str()); std::string name = node->GetName() + "_input_" + std::to_string(in_data_anchor->GetIdx()) + "_Memcpy"; diff --git a/ge/host_cpu_engine/ops_kernel_store/op/host_op.cc b/ge/host_cpu_engine/ops_kernel_store/op/host_op.cc index a6e00f4a..7f709f03 100644 --- a/ge/host_cpu_engine/ops_kernel_store/op/host_op.cc +++ b/ge/host_cpu_engine/ops_kernel_store/op/host_op.cc @@ -32,5 +32,8 @@ REGISTER_OP_CREATOR(Assign, HostOp); REGISTER_OP_CREATOR(RandomUniform, HostOp); REGISTER_OP_CREATOR(Add, HostOp); REGISTER_OP_CREATOR(Mul, HostOp); +REGISTER_OP_CREATOR(ConcatV2, HostOp); +REGISTER_OP_CREATOR(Data, HostOp); +REGISTER_OP_CREATOR(Fill, HostOp); } // namespace host_cpu } // namespace ge diff --git a/ge/hybrid/executor/hybrid_model_async_executor.cc b/ge/hybrid/executor/hybrid_model_async_executor.cc index e9881224..3673edf0 100644 --- a/ge/hybrid/executor/hybrid_model_async_executor.cc +++ b/ge/hybrid/executor/hybrid_model_async_executor.cc @@ -59,6 +59,7 @@ Status HybridModelAsyncExecutor::Start(const std::shared_ptr &lis run_flag_ = true; listener_ = listener; future_ = std::async(std::launch::async, [&]() -> Status { + GetThreadLocalContext() = *executor_->GetContext()->ge_context; GetContext().SetSessionId(executor_->GetContext()->session_id); return RunInternal(); }); @@ -229,7 +230,11 @@ Status HybridModelAsyncExecutor::PrepareInputs(const InputData ¤t_data, Hy } GE_CHECK_GE(tensor_size, 0); - auto tensor_buffer = TensorBuffer::Create(allocator, tensor_size); + AllocationAttr attr; + if (GetContext().GetHostExecFlag()) { + attr.SetMemType(HOST_DDR); + } + auto tensor_buffer = TensorBuffer::Create(allocator, tensor_size, &attr); GE_CHECK_NOTNULL(tensor_buffer); args.inputs.emplace_back(std::shared_ptr(tensor_buffer.release())); diff --git a/ge/hybrid/model/hybrid_model_builder.cc b/ge/hybrid/model/hybrid_model_builder.cc index d1f61985..7ee0bef7 100755 --- a/ge/hybrid/model/hybrid_model_builder.cc +++ b/ge/hybrid/model/hybrid_model_builder.cc @@ -772,7 +772,12 @@ Status HybridModelBuilder::VarNodeToTensor(const NodePtr &var_node, std::unique_ var_name.c_str(), hybrid_model_.GetSessionId()); - uint8_t *dev_mem = var_manager_->GetVarMemoryAddr(var_logic, RT_MEMORY_HBM); + rtMemType_t memory_type = RT_MEMORY_HBM; + uint32_t mem_type = 0; + if (AttrUtils::GetInt(var_node->GetOpDesc(), ATTR_OUTPUT_MEMORY_TYPE, mem_type) && (mem_type == 1)) { + memory_type = RT_MEMORY_RDMA_HBM; + } + uint8_t *dev_mem = var_manager_->GetVarMemoryAddr(var_logic, memory_type); if (dev_mem == nullptr) { GELOGE(INTERNAL_ERROR, "Failed to copy var %s from device, cant not get " diff --git a/ge/hybrid/node_executor/hccl/hccl_node_executor.cc b/ge/hybrid/node_executor/hccl/hccl_node_executor.cc index 94c734ca..5387a176 100644 --- a/ge/hybrid/node_executor/hccl/hccl_node_executor.cc +++ b/ge/hybrid/node_executor/hccl/hccl_node_executor.cc @@ -15,23 +15,25 @@ */ #include "hybrid/node_executor/hccl/hccl_node_executor.h" -#include "common/ge/ge_util.h" #include "common/ge/plugin_manager.h" #include "common/math/math_util.h" -#include "framework/common/debug/ge_log.h" #include "graph/attr_value.h" #include "graph/debug/ge_attr_define.h" #include "graph/manager/util/hcom_util.h" #include "graph/runtime_inference_context.h" -#include "hccl/hcom.h" +#include "graph/utils/type_utils.h" +#include "hybrid/executor/hybrid_execution_context.h" +namespace ge { namespace { -const size_t kVarTableDims = 2; -const size_t kVarTableRowCnt = 3; -const size_t kVarTableIdxAddr = 1; -const size_t kVarTableIdxLen = 2; +constexpr size_t kVarTableDims = 2; +constexpr size_t kVarTableRowCnt = 3; +constexpr size_t kVarTableIdxAddr = 1; +constexpr size_t kVarTableIdxLen = 2; +const std::set kRdmaReadTypes = { HCOMREMOTEREAD, HCOMREMOTEREFREAD }; +const std::set kRdmaWriteTypes = { HCOMREMOTEWRITE, HCOMREMOTESCATTERWRITE }; +const std::set kRdmaScatterTypes = { HCOMREMOTEREFREAD, HCOMREMOTESCATTERWRITE }; } // namespace -namespace ge { namespace hybrid { REGISTER_NODE_EXECUTOR_BUILDER(NodeExecutorManager::ExecutorType::HCCL, HcclNodeExecutor); @@ -142,11 +144,22 @@ Status RdmaNodeTask::Init(TaskContext &context) { GE_CHECK_NOTNULL(peer_node->GetOpDesc()); remote_index_ = {peer_node->GetOpDesc()->GetId(), out_data_anchor->GetIdx()}; - if (node_item.node->GetType() == HCOMREMOTEREAD) { + if (kRdmaReadTypes.count(node_item.node->GetType()) > 0) { local_index_ = 0; } else { local_index_ = op_desc->GetInputIndexByName("local"); } + int32_t offset_idx = node_item.op_desc->GetInputIndexByName("local_offset"); + if ((offset_idx != -1) && (node_item.op_desc->GetInputDescPtr(offset_idx) != nullptr)) { + skip_flag_ = true; + GE_CHECK_NOTNULL(node_item.node->GetInDataAnchor(offset_idx)); + GE_CHECK_NOTNULL(node_item.node->GetInDataAnchor(offset_idx)->GetPeerOutAnchor()); + GE_CHECK_NOTNULL(node_item.node->GetInDataAnchor(offset_idx)->GetPeerOutAnchor()->GetOwnerNode()); + GE_CHECK_NOTNULL(node_item.node->GetInDataAnchor(offset_idx)->GetPeerOutAnchor()->GetOwnerNode()->GetOpDesc()); + offset_index_ = { + node_item.node->GetInDataAnchor(offset_idx)->GetPeerOutAnchor()->GetOwnerNode()->GetOpDesc()->GetId(), + node_item.node->GetInDataAnchor(offset_idx)->GetPeerOutAnchor()->GetIdx() }; + } return SUCCESS; } @@ -158,8 +171,13 @@ Status RdmaNodeTask::ExtractTensor(TaskContext &context, vectorGetTensor(remote_index_.first, remote_index_.second, remote_tensor)); auto data = reinterpret_cast(remote_tensor.GetData()); if (data == nullptr) { - GELOGE(FAILED, "Tensor data is nullptr."); - return FAILED; + if (kRdmaScatterTypes.count(context.GetNodeItem().NodeType()) > 0) { + GELOGD("data is null, no need to do rdma read/write, node=%s", context.GetNodeName()); + return SUCCESS; + } else { + GELOGE(FAILED, "Tensor data is nullptr."); + return FAILED; + } } auto dims = remote_tensor.GetTensorDesc().GetShape().GetDims(); if (dims.size() != kVarTableDims && dims.back() != kVarTableRowCnt) { @@ -183,30 +201,63 @@ Status RdmaNodeTask::ExtractTensor(TaskContext &context, vector(tensor_buffer.release())))); } + } else if (context.GetNodeItem().NodeType() == HCOMREMOTEREFREAD) { + AllocationAttr attr; + attr.SetMemType(RDMA_HBM); + GE_CHK_STATUS_RET(context.AllocateOutputs(&attr)) } TensorValue *tv; - if (context.GetNodeItem().NodeType() == HCOMREMOTEREAD) { - tv = context.MutableOutput(0); + if (kRdmaReadTypes.count(context.GetNodeItem().NodeType()) > 0) { + tv = context.MutableOutput(local_index_); } else { tv = context.MutableInput(local_index_); } GE_CHECK_NOTNULL(tv); - auto local_addr = reinterpret_cast(reinterpret_cast(tv->MutableData())); auto row_num = dims.front(); addr_infos.resize(row_num); - auto device_len = tv->GetSize() / row_num; - if (device_len <= 0 || device_len > data[kVarTableIdxLen]) { - GELOGE(FAILED, "Local embedding length is out of range."); - return FAILED; - } + if (skip_flag_) { + int32_t offset_idx = context.GetNodeItem().op_desc->GetInputIndexByName("local_offset"); + GE_CHECK_NOTNULL(context.GetNodeItem().op_desc->GetInputDescPtr(offset_idx)); + auto data_type = context.GetNodeItem().op_desc->GetInputDesc(offset_idx).GetDataType(); + + Tensor offset_tensor; + GE_CHK_STATUS_RET(ctx->GetTensor(offset_index_.first, offset_index_.second, offset_tensor)) + if (static_cast(offset_tensor.GetSize() / GetSizeByDataType(data_type)) != row_num) { + GELOGE(PARAM_INVALID, "num of offset and remote addr mismatch, offset size=%zu, remote_addr size=%lld, dtype=%s", + offset_tensor.GetSize(), row_num, TypeUtils::DataTypeToSerialString(data_type).c_str()); + return PARAM_INVALID; + } - for (auto idx = 0; idx < row_num; ++idx) { - FMK_INT64_MULCHECK(idx, kVarTableRowCnt); - auto line_idx = idx * kVarTableRowCnt; - addr_infos[idx] = {static_cast(data[line_idx]), data[line_idx + kVarTableIdxAddr], local_addr, - device_len}; - local_addr += device_len; + auto addr_offset = reinterpret_cast(offset_tensor.GetData()); + GE_CHECK_NOTNULL(addr_offset); + auto base_addr = reinterpret_cast(tv->MutableData()); + GE_CHECK_NOTNULL(base_addr); + + for (auto idx = 0; idx < row_num; idx++) { + FMK_INT64_MULCHECK(idx, kVarTableRowCnt) + auto line_idx = idx * kVarTableRowCnt; + addr_infos[idx] = { static_cast(data[line_idx]), + data[line_idx + kVarTableIdxAddr], + reinterpret_cast(reinterpret_cast(base_addr + addr_offset[idx])), + data[line_idx + kVarTableIdxLen] }; + } + } else { + auto local_addr = reinterpret_cast(reinterpret_cast(tv->MutableData())); + auto device_len = tv->GetSize() / row_num; + if (device_len <= 0 || device_len > data[kVarTableIdxLen]) { + GELOGE(FAILED, "Local embedding length is out of range, expect %lld, but %lld exactly.", + data[kVarTableIdxLen], device_len); + return FAILED; + } + + for (auto idx = 0; idx < row_num; ++idx) { + FMK_INT64_MULCHECK(idx, kVarTableRowCnt) + auto line_idx = idx * kVarTableRowCnt; + addr_infos[idx] = { static_cast(data[line_idx]), data[line_idx + kVarTableIdxAddr], local_addr, + device_len }; + local_addr += device_len; + } } return SUCCESS; @@ -226,6 +277,10 @@ Status RdmaNodeTask::ExecuteAsync(TaskContext &context, std::function do } vector addr_infos; GE_CHK_STATUS_RET(ExtractTensor(context, addr_infos)); + if (addr_infos.empty()) { + done_callback(); + return SUCCESS; + } auto callback = [this](HcclResult status) { if (status != HCCL_SUCCESS) { @@ -235,6 +290,11 @@ Status RdmaNodeTask::ExecuteAsync(TaskContext &context, std::function do this->cond_.notify_all(); GELOGI("rdma callback success."); }; + + std::string executor_type = context.GetNodeItem().NodeType(); + if (kRdmaScatterTypes.count(context.GetNodeItem().NodeType()) > 0) { + executor_type = context.GetNodeItem().NodeType() == HCOMREMOTEREFREAD ? HCOMREMOTEREAD : HCOMREMOTEWRITE; + } HcclResult hccl_ret = HcomExecEnqueueRemoteAccess(context.GetNodeItem().NodeType(), addr_infos, callback); if (hccl_ret != HCCL_SUCCESS) { GELOGE(HCCL_E_INTERNAL, "Call HcomExecInitialize failed, ret: 0x%X", hccl_ret); @@ -262,7 +322,7 @@ Status HcclNodeExecutor::PrepareTask(NodeTask &task, TaskContext &context) const GE_CHK_STATUS_RET(task.Init(context), "hccl node load hccl so failed."); // allocate output mem, output mem or remote read will be calculated when node execute. - if (context.GetNodeItem().NodeType() != HCOMREMOTEREAD) { + if (kRdmaReadTypes.count(context.GetNodeItem().NodeType()) == 0) { GE_CHK_STATUS_RET(context.AllocateOutputs(), "hccl node task allocate output failed."); } @@ -274,7 +334,7 @@ Status HcclNodeExecutor::PrepareTask(NodeTask &task, TaskContext &context) const Status HcclNodeExecutor::LoadTask(const HybridModel &model, const NodePtr &node, shared_ptr &task) const { GELOGI("[%s] HcclNodeExecutor::LoadTask in.", node->GetName().c_str()); GE_CHECK_NOTNULL(node); - if (node->GetType() == HCOMREMOTEREAD || node->GetType() == HCOMREMOTEWRITE) { + if ((kRdmaReadTypes.count(node->GetType()) > 0) || (kRdmaWriteTypes.count(node->GetType()) > 0)) { task = MakeShared(); } else { task = MakeShared(); diff --git a/ge/hybrid/node_executor/hccl/hccl_node_executor.h b/ge/hybrid/node_executor/hccl/hccl_node_executor.h index 07dd848b..873f259f 100644 --- a/ge/hybrid/node_executor/hccl/hccl_node_executor.h +++ b/ge/hybrid/node_executor/hccl/hccl_node_executor.h @@ -55,9 +55,11 @@ class RdmaNodeTask : public NodeTask { private: Status ExtractTensor(TaskContext &context, vector &addr_infos); std::pair remote_index_; + std::pair offset_index_; int32_t local_index_ = 0; std::mutex hccl_mutex_; std::condition_variable cond_; + bool skip_flag_; }; class HcclNodeExecutor : public NodeExecutor { diff --git a/ge/hybrid/node_executor/host_cpu/kernel/assign_kernel.cc b/ge/hybrid/node_executor/host_cpu/kernel/assign_kernel.cc index 01fd391d..d54195d6 100644 --- a/ge/hybrid/node_executor/host_cpu/kernel/assign_kernel.cc +++ b/ge/hybrid/node_executor/host_cpu/kernel/assign_kernel.cc @@ -29,8 +29,6 @@ namespace ge { namespace hybrid { namespace host_cpu { Status AssignKernel::Compute(TaskContext& context) { - GELOGI("[%s] compute begin.", node_->GetName().c_str()); - auto ref_tensor = context.MutableInput(kAssignRefInputIndex); GE_CHECK_NOTNULL(ref_tensor); const auto value_tensor = context.GetInput(kAssignValueInputIndex); @@ -50,7 +48,7 @@ Status AssignKernel::Compute(TaskContext& context) { GE_CHK_STATUS_RET(context.SetOutput(kAssignRefOutputIndex, *ref_tensor), "[%s] Failed to set output.", context.GetNodeName()); - GELOGI("[%s] compute success.", node_->GetName().c_str()); + GELOGD("[%s] compute success.", node_->GetName().c_str()); return SUCCESS; } diff --git a/ge/hybrid/node_executor/host_cpu/kernel/data_kernel.cc b/ge/hybrid/node_executor/host_cpu/kernel/data_kernel.cc new file mode 100644 index 00000000..e34f601a --- /dev/null +++ b/ge/hybrid/node_executor/host_cpu/kernel/data_kernel.cc @@ -0,0 +1,41 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "hybrid/node_executor/host_cpu/kernel/data_kernel.h" +#include "framework/common/debug/ge_log.h" +#include "framework/common/util.h" +#include "hybrid/node_executor/host_cpu/kernel_factory.h" + +namespace { +constexpr size_t kDataInputIndex = 0; +constexpr size_t kDataOutputIndex = 0; +} + +namespace ge { +namespace hybrid { +namespace host_cpu { +Status DataKernel::Compute(TaskContext& context) { + auto input = context.MutableInput(kDataInputIndex); + GE_CHECK_NOTNULL(input); + GE_CHK_STATUS_RET(context.SetOutput(kDataOutputIndex, *input), "[%s] Failed to set output.", context.GetNodeName()) + GELOGD("[%s] compute success.", node_->GetName().c_str()); + return SUCCESS; +} + +REGISTER_KERNEL_CREATOR(Data, DataKernel); +} // namespace host_cpu +} // namespace hybrid +} // namespace ge diff --git a/ge/hybrid/node_executor/host_cpu/kernel/data_kernel.h b/ge/hybrid/node_executor/host_cpu/kernel/data_kernel.h new file mode 100644 index 00000000..ca42d647 --- /dev/null +++ b/ge/hybrid/node_executor/host_cpu/kernel/data_kernel.h @@ -0,0 +1,42 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef GE_HYBRID_HOST_CPU_KERNEL_DATA_KERNEL_H_ +#define GE_HYBRID_HOST_CPU_KERNEL_DATA_KERNEL_H_ + +#include "hybrid/node_executor/host_cpu/kernel/kernel.h" + +namespace ge { +namespace hybrid { +namespace host_cpu { +class DataKernel : public Kernel { + public: + DataKernel(const NodePtr &node) : Kernel(node) {} + ~DataKernel() override = default; + DataKernel &operator=(const DataKernel &op) = delete; + DataKernel(const DataKernel &op) = delete; + + /** + * @brief compute for node_task. + * @return result + */ + Status Compute(TaskContext& context) override; +}; +} // namespace host_cpu +} // namespace hybrid +} // namespace ge + +#endif // GE_HYBRID_HOST_CPU_KERNEL_DATA_KERNEL_H_ diff --git a/ge/hybrid/node_executor/host_cpu/kernel/no_op_kernel.cc b/ge/hybrid/node_executor/host_cpu/kernel/no_op_kernel.cc index ff5a7c6d..b1b4e68c 100644 --- a/ge/hybrid/node_executor/host_cpu/kernel/no_op_kernel.cc +++ b/ge/hybrid/node_executor/host_cpu/kernel/no_op_kernel.cc @@ -23,7 +23,7 @@ namespace ge { namespace hybrid { namespace host_cpu { Status NoOpKernel::Compute(TaskContext& context) { - GELOGI("[%s] no need to compute.", node_->GetName().c_str()); + GELOGD("[%s] no need to compute.", node_->GetName().c_str()); return SUCCESS; } diff --git a/ge/hybrid/node_executor/host_cpu/kernel/random_uniform_kernel.cc b/ge/hybrid/node_executor/host_cpu/kernel/random_uniform_kernel.cc index 37b07e37..52d48821 100755 --- a/ge/hybrid/node_executor/host_cpu/kernel/random_uniform_kernel.cc +++ b/ge/hybrid/node_executor/host_cpu/kernel/random_uniform_kernel.cc @@ -30,8 +30,6 @@ namespace ge { namespace hybrid { namespace host_cpu { Status RandomUniformKernel::Compute(TaskContext& context) { - GELOGI("[%s] compute begin.", node_->GetName().c_str()); - int64_t seed = 0; int64_t seed2 = 0; (void)AttrUtils::GetInt(node_->GetOpDesc(), "seed", seed); @@ -66,7 +64,7 @@ Status RandomUniformKernel::Compute(TaskContext& context) { return UNSUPPORTED; } - GELOGI("[%s] compute success.", node_->GetName().c_str()); + GELOGD("[%s] compute success.", node_->GetName().c_str()); return SUCCESS; } diff --git a/ge/hybrid/node_executor/host_cpu/kernel/variable_kernel.cc b/ge/hybrid/node_executor/host_cpu/kernel/variable_kernel.cc index 2a836458..16738c2a 100644 --- a/ge/hybrid/node_executor/host_cpu/kernel/variable_kernel.cc +++ b/ge/hybrid/node_executor/host_cpu/kernel/variable_kernel.cc @@ -23,8 +23,6 @@ namespace ge { namespace hybrid { namespace host_cpu { Status VariableKernel::Compute(TaskContext& context) { - GELOGI("[%s] compute begin.", node_->GetName().c_str()); - auto tensor = context.GetVariable(node_->GetName()); if (tensor == nullptr) { GELOGE(PARAM_INVALID, "tensor is NULL."); @@ -32,7 +30,7 @@ Status VariableKernel::Compute(TaskContext& context) { } // Constant & Variable Op has and only has one output GE_CHK_STATUS_RET(context.SetOutput(0, *tensor), "[%s] Failed to set output.", context.GetNodeName()); - GELOGI("[%s] compute success.", node_->GetName().c_str()); + GELOGD("[%s] compute success.", node_->GetName().c_str()); return SUCCESS; } diff --git a/inc/framework/common/types.h b/inc/framework/common/types.h index 4d4c54d1..2dbb1753 100644 --- a/inc/framework/common/types.h +++ b/inc/framework/common/types.h @@ -437,6 +437,7 @@ REGISTER_OPTYPE_DECLARE(HCOMRECEIVE, "HcomReceive"); REGISTER_OPTYPE_DECLARE(HCOMREMOTEREAD, "HcomRemoteRead"); REGISTER_OPTYPE_DECLARE(HCOMREMOTEREFREAD, "HcomRemoteRefRead"); REGISTER_OPTYPE_DECLARE(HCOMREMOTEWRITE, "HcomRemoteWrite"); +REGISTER_OPTYPE_DECLARE(HCOMREMOTESCATTERWRITE, "HcomRemoteScatterWrite"); REGISTER_OPTYPE_DECLARE(VARASSIGN, "VarAssign"); REGISTER_OPTYPE_DECLARE(VARISINITIALIZEDOP, "VarIsInitializedOp"); diff --git a/inc/framework/omg/parser/parser_types.h b/inc/framework/omg/parser/parser_types.h index 62c9c750..f2bd4e28 100644 --- a/inc/framework/omg/parser/parser_types.h +++ b/inc/framework/omg/parser/parser_types.h @@ -370,7 +370,9 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *HCOMREDUCESC FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *HCOMSEND; FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *HCOMRECEIVE; FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *HCOMREMOTEREAD; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *HCOMREMOTEREFREAD; FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *HCOMREMOTEWRITE; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *HCOMREMOTESCATTERWRITE; FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *VARASSIGN; FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *VARISINITIALIZEDOP; diff --git a/tests/ut/ge/CMakeLists.txt b/tests/ut/ge/CMakeLists.txt index 91a6620d..5979f5cf 100755 --- a/tests/ut/ge/CMakeLists.txt +++ b/tests/ut/ge/CMakeLists.txt @@ -589,6 +589,7 @@ set(DISTINCT_GRAPH_LOAD_TEST_FILES #"graph/graph_load_unittest.cc" "graph/ge_executor_unittest.cc" "graph/load/model_helper_unittest.cc" + "graph/load/model_utils_unittest.cc" ) set(PASS_TEST_FILES diff --git a/tests/ut/ge/graph/load/model_utils_unittest.cc b/tests/ut/ge/graph/load/model_utils_unittest.cc new file mode 100644 index 00000000..bd86c71e --- /dev/null +++ b/tests/ut/ge/graph/load/model_utils_unittest.cc @@ -0,0 +1,70 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#define protected public +#define private public +#include "graph/load/new_model_manager/model_utils.h" +#include "graph/manager/graph_var_manager.h" + +using namespace std; + +namespace ge { +class UtestModelUtils : public testing::Test { + protected: + void TearDown() {} +}; + +// test ModelUtils::GetVarAddr +TEST_F(UtestModelUtils, get_var_addr_hbm) { + uint8_t test = 2; + uint8_t *pf = &test; + RuntimeParam runtime_param; + runtime_param.session_id = 0; + runtime_param.logic_var_base = 0; + runtime_param.var_base = pf; + runtime_param.var_size = 16; + + int64_t offset = 8; + EXPECT_EQ(VarManager::Instance(runtime_param.session_id)->Init(0, 0, 0, 0), SUCCESS); + EXPECT_NE(VarManager::Instance(runtime_param.session_id)->var_resource_, nullptr); + VarManager::Instance(runtime_param.session_id)->var_resource_->var_offset_map_[offset] = RT_MEMORY_HBM; + std::shared_ptr op_desc = std::make_shared("test", "test"); + uint8_t *var_addr = nullptr; + EXPECT_EQ(ModelUtils::GetVarAddr(runtime_param, op_desc, offset, var_addr), SUCCESS); + EXPECT_EQ(runtime_param.var_base + offset - runtime_param.logic_var_base, var_addr); + VarManager::Instance(runtime_param.session_id)->Destory(); +} + +TEST_F(UtestModelUtils, get_var_addr_rdma_hbm) { + uint8_t test = 2; + uint8_t *pf = &test; + RuntimeParam runtime_param; + runtime_param.session_id = 0; + runtime_param.logic_var_base = 0; + runtime_param.var_base = pf; + + int64_t offset = 8; + EXPECT_EQ(VarManager::Instance(runtime_param.session_id)->Init(0, 0, 0, 0), SUCCESS); + EXPECT_NE(VarManager::Instance(runtime_param.session_id)->var_resource_, nullptr); + VarManager::Instance(runtime_param.session_id)->var_resource_->var_offset_map_[offset] = RT_MEMORY_RDMA_HBM; + std::shared_ptr op_desc = std::make_shared("test", "test"); + uint8_t *var_addr = nullptr; + EXPECT_EQ(ModelUtils::GetVarAddr(runtime_param, op_desc, offset, var_addr), SUCCESS); + EXPECT_EQ(reinterpret_cast(offset), var_addr); + VarManager::Instance(runtime_param.session_id)->Destory(); +} +} // namespace ge diff --git a/third_party/fwkacllib/inc/runtime/mem.h b/third_party/fwkacllib/inc/runtime/mem.h index 32bd9e6b..c305fb12 100644 --- a/third_party/fwkacllib/inc/runtime/mem.h +++ b/third_party/fwkacllib/inc/runtime/mem.h @@ -34,6 +34,7 @@ extern "C" { */ #define RT_MEMORY_DEFAULT ((uint32_t)0x0) // default memory on device #define RT_MEMORY_HBM ((uint32_t)0x2) // HBM memory on device +#define RT_MEMORY_RDMA_HBM ((uint32_t)0x3) // RDMA-HBM memory on device #define RT_MEMORY_DDR ((uint32_t)0x4) // DDR memory on device #define RT_MEMORY_SPM ((uint32_t)0x8) // shared physical memory on device #define RT_MEMORY_P2P_HBM ((uint32_t)0x10) // HBM memory on other 4P device From 2fc8c77a01f54d4c8f2f57d7eea7314d89541b3b Mon Sep 17 00:00:00 2001 From: chenyemeng Date: Mon, 18 Jan 2021 16:59:24 +0800 Subject: [PATCH 28/41] cache support --- inc/framework/omg/parser/parser_types.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/inc/framework/omg/parser/parser_types.h b/inc/framework/omg/parser/parser_types.h index f2bd4e28..f3b7f00a 100644 --- a/inc/framework/omg/parser/parser_types.h +++ b/inc/framework/omg/parser/parser_types.h @@ -238,8 +238,8 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *SOFTSIGN; FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *COSH; FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *SINH; FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *SQUAREDDIFFERENCE; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char - *REQUIREDSPACETOBATCHPADDINGS; // for retinanet scope fusion +// for retinanet scope fusion +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *REQUIREDSPACETOBATCHPADDINGS; FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *SSDPOSTPROCESSOR; FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *RETINANETBOXES; FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *RETINAMULTIANCHORS; From ad0d140f6e4e2d7d018f811643aea04cda26fc6f Mon Sep 17 00:00:00 2001 From: zhengyuanhua Date: Tue, 19 Jan 2021 11:17:35 +0800 Subject: [PATCH 29/41] dts: profiling task desc info save data error --- ge/graph/load/new_model_manager/davinci_model.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ge/graph/load/new_model_manager/davinci_model.cc b/ge/graph/load/new_model_manager/davinci_model.cc index 063c5b4c..75a5f6af 100755 --- a/ge/graph/load/new_model_manager/davinci_model.cc +++ b/ge/graph/load/new_model_manager/davinci_model.cc @@ -3067,7 +3067,6 @@ Status DavinciModel::MallocKnownArgs() { void DavinciModel::SaveProfilingTaskDescInfo(const OpDescPtr &op, const TaskInfoPtr &task, const domi::TaskDef &task_def, size_t task_index) { - task_desc_info_.clear(); bool flag = GetL1FusionEnableOption(); char skt_enable_env[MMPA_MAX_PATH] = { 0x00 }; INT32 res = mmGetEnv("SKT_ENABLE", skt_enable_env, MMPA_MAX_PATH); @@ -3134,6 +3133,7 @@ Status DavinciModel::DistributeTask() { GE_CHK_STATUS_RET(task->Distribute()); } + task_desc_info_.clear(); const auto &model_task_def = ge_model_->GetModelTaskDefPtr(); for (size_t task_index = 0; task_index < task_list_.size(); ++task_index) { auto &task_def = model_task_def->task(task_index); From a892b2bf901e9939e49d8125014dbaa599519902 Mon Sep 17 00:00:00 2001 From: chenyemeng Date: Tue, 19 Jan 2021 12:35:38 +0800 Subject: [PATCH 30/41] cache support --- .../load/new_model_manager/model_utils.cc | 25 ++++++++++++------- ge/graph/manager/graph_var_manager.cc | 4 +-- 2 files changed, 18 insertions(+), 11 deletions(-) diff --git a/ge/graph/load/new_model_manager/model_utils.cc b/ge/graph/load/new_model_manager/model_utils.cc index efd8c619..d9a9f3ca 100755 --- a/ge/graph/load/new_model_manager/model_utils.cc +++ b/ge/graph/load/new_model_manager/model_utils.cc @@ -379,17 +379,24 @@ vector ModelUtils::GetInputDataAddrs(const RuntimeParam &model_param, Co /// Status ModelUtils::GetVarAddr(const RuntimeParam &model_param, const ConstOpDescPtr &op_desc, int64_t offset, uint8_t *&var_addr) { - if (ge::VarManager::Instance(model_param.session_id)->GetVarMemType(offset) == RT_MEMORY_RDMA_HBM) { - if (offset < 0) { - GELOGE(PARAM_INVALID, "rdma var addr is invalid, addr=%p", reinterpret_cast(offset)); + rtMemType_t mem_type = ge::VarManager::Instance(model_param.session_id)->GetVarMemType(offset); + switch (mem_type) { + case RT_MEMORY_RDMA_HBM: + if (offset < 0) { + GELOGE(PARAM_INVALID, "rdma var addr is invalid, addr=%p", reinterpret_cast(offset)); + return PARAM_INVALID; + } + var_addr = reinterpret_cast(offset); + break; + case RT_MEMORY_HBM: + VALIDATE_MEM_RANGE(op_desc, model_param.var_size, offset - model_param.logic_var_base); + var_addr = model_param.var_base + offset - model_param.logic_var_base; + break; + default: + GELOGE(PARAM_INVALID, "unsupported memory type %u", mem_type); return PARAM_INVALID; - } - var_addr = reinterpret_cast(offset); - GE_CHECK_NOTNULL(var_addr); - } else { - VALIDATE_MEM_RANGE(op_desc, model_param.var_size, offset - model_param.logic_var_base); - var_addr = model_param.var_base + offset - model_param.logic_var_base; } + GE_CHECK_NOTNULL(var_addr); return SUCCESS; } diff --git a/ge/graph/manager/graph_var_manager.cc b/ge/graph/manager/graph_var_manager.cc index 928c893f..8a829d47 100755 --- a/ge/graph/manager/graph_var_manager.cc +++ b/ge/graph/manager/graph_var_manager.cc @@ -212,7 +212,7 @@ rtMemType_t VarResource::GetVarMemType(const int64_t &offset) { if (var_offset_map_.count(offset) > 0) { return var_offset_map_[offset]; } - return RT_MEMORY_HBM; + return RT_MEMORY_RESERVED; } VarTransRoad *VarResource::GetTransRoad(const std::string &var_name) { @@ -660,7 +660,7 @@ rtMemType_t VarManager::GetVarMemType(const int64_t &offset) { std::lock_guard lock(mutex_); if (var_resource_ == nullptr) { GELOGW("VarManager has not been init."); - return RT_MEMORY_HBM; + return RT_MEMORY_RESERVED; } return var_resource_->GetVarMemType(offset); } From bac7bcfc09933b1a5ca41bd837138025023b129e Mon Sep 17 00:00:00 2001 From: lwx897429 Date: Fri, 15 Jan 2021 10:29:25 +0800 Subject: [PATCH 31/41] Optional output does not allocate memory --- ge/graph/build/memory/block_mem_assigner.cc | 7 +++++ .../load/new_model_manager/model_utils.cc | 20 +++++++++----- .../node_executor/aicore/aicore_op_task.cc | 26 ++++++++++++++++++- .../node_executor/aicore/aicore_op_task.h | 1 + ge/hybrid/node_executor/task_context.cc | 8 ++++++ metadef | 2 +- parser | 2 +- 7 files changed, 57 insertions(+), 9 deletions(-) diff --git a/ge/graph/build/memory/block_mem_assigner.cc b/ge/graph/build/memory/block_mem_assigner.cc index 76e7efbe..a523ce3f 100755 --- a/ge/graph/build/memory/block_mem_assigner.cc +++ b/ge/graph/build/memory/block_mem_assigner.cc @@ -24,6 +24,7 @@ #include "graph/buffer.h" #include "graph/ge_attr_value.h" #include "graph/ge_context.h" +#include "graph/types.h" #include "graph/node.h" #include "graph/utils/graph_utils.h" #include "graph/utils/node_utils.h" @@ -1401,6 +1402,7 @@ Status BlockMemAssigner::AssignOutputMemoryWithReuse(const NodePtr &node, vector if (output_op_desc != nullptr) { GE_IF_BOOL_EXEC(ge::TensorUtils::GetSize(*output_op_desc, size) != SUCCESS, GELOGI("Get size failed")); } + // fusion: other type's size not means malloc HBM memory bool l1_flag = has_mem_type_attr && memorys_type[i] == RT_MEMORY_L1; if (l1_flag) { @@ -1408,6 +1410,11 @@ Status BlockMemAssigner::AssignOutputMemoryWithReuse(const NodePtr &node, vector op_desc->GetName().c_str(), op_desc->GetOutputNameByIndex(i).c_str(), memorys_type[i]); size = 0; } + + int32_t calc_type = 0; + bool ret = ge::AttrUtils::GetInt(output_op_desc, ATTR_NAME_MEMORY_SIZE_CALC_TYPE, calc_type); + GE_IF_BOOL_EXEC((ret && (calc_type == static_cast(ge::MemorySizeCalcType::ALWAYS_EMPTY))), size = 0;); + std::string peer_name; uint32_t peer_input_index = 0; bool out_node_set_continuous_input = false; diff --git a/ge/graph/load/new_model_manager/model_utils.cc b/ge/graph/load/new_model_manager/model_utils.cc index d9a9f3ca..3c141f06 100755 --- a/ge/graph/load/new_model_manager/model_utils.cc +++ b/ge/graph/load/new_model_manager/model_utils.cc @@ -20,6 +20,7 @@ #include "common/op/ge_op_utils.h" #include "graph/utils/tensor_utils.h" #include "graph/manager/graph_var_manager.h" +#include "graph/types.h" #define VALIDATE_MEM_RANGE(OP, SIZE, OFFSET) \ do { \ @@ -340,7 +341,7 @@ vector ModelUtils::GetInputDataAddrs(const RuntimeParam &model_param, Co GELOGI("[IMAS]GetInputDataAddrs graph_%u type[V] name[%s] input[%lu] memaddr[%p]", model_param.graph_id, op_desc->GetName().c_str(), i, variable_addr); continue); - + int64_t mem_type; bool tensor_has_mem_type = ge::AttrUtils::GetInt(tensor_desc, ATTR_NAME_TENSOR_MEM_TYPE, mem_type); // feature maps @@ -424,6 +425,18 @@ vector ModelUtils::GetOutputDataAddrs(const RuntimeParam &model_param, C return v_output_data_addr; } for (size_t i = 0; i < outputs_size; ++i) { + const GeTensorDescPtr tensor_desc = op_desc->MutableOutputDesc(i); + if (tensor_desc == nullptr) { + GELOGW("Op: %s, Index: %zu, Tensor Desc is null", op_desc->GetName().c_str(), i); + continue; + } + + int32_t calc_type = 0; + bool ret = ge::AttrUtils::GetInt(tensor_desc, ATTR_NAME_MEMORY_SIZE_CALC_TYPE, calc_type); + if (ret && (calc_type == static_cast(ge::MemorySizeCalcType::ALWAYS_EMPTY))) { + GELOGD("%s is an optional output, the address don't need to be saved.", tensor_desc->GetName().c_str()); + continue; + } GE_IF_BOOL_EXEC(model_param.var_size != 0 && ge::VarManager::Instance(session_id)->IsVarAddr(v_output_offset[i]), uint8_t *variable_addr = nullptr; GE_CHK_STATUS_EXEC(GetVarAddr(model_param, op_desc, v_output_offset[i], variable_addr), return {}); @@ -431,11 +444,6 @@ vector ModelUtils::GetOutputDataAddrs(const RuntimeParam &model_param, C GELOGI("[IMAS]GetOutputDataAddrs graph_%u type[V] name[%s] output[%zu] memaddr[%p]", model_param.graph_id, op_desc->GetName().c_str(), i, variable_addr); continue); - const GeTensorDescPtr tensor_desc = op_desc->MutableOutputDesc(i); - if (tensor_desc == nullptr) { - GELOGW("Op: %s, Index: %zu, Tensor Desc is null", op_desc->GetName().c_str(), i); - continue; - } int64_t mem_type; bool tensor_has_mem_type = ge::AttrUtils::GetInt(tensor_desc, ATTR_NAME_TENSOR_MEM_TYPE, mem_type); diff --git a/ge/hybrid/node_executor/aicore/aicore_op_task.cc b/ge/hybrid/node_executor/aicore/aicore_op_task.cc index 80ea579b..f61caf19 100644 --- a/ge/hybrid/node_executor/aicore/aicore_op_task.cc +++ b/ge/hybrid/node_executor/aicore/aicore_op_task.cc @@ -20,6 +20,7 @@ #include "hybrid/executor/hybrid_execution_context.h" #include "hybrid/node_executor/aicore/aicore_task_builder.h" #include "graph/load/new_model_manager/tbe_handle_store.h" +#include "graph/types.h" using optiling::OpRunInfo; @@ -34,6 +35,23 @@ constexpr char const *kAttrAtomicOpParamSize = "atomic_op_para_size"; Status AiCoreOpTask::Init(const OpDesc &op_desc, const domi::TaskDef &task_def) { GE_CHK_STATUS_RET_NOLOG(InitWithTaskDef(op_desc, task_def)); GE_CHK_STATUS_RET_NOLOG(InitTilingInfo(op_desc)); + + GE_CHECK_LE(op_desc.GetOutputsSize(), static_cast(INT_MAX)); + int outputs_size = static_cast(op_desc.GetOutputsSize()); + + for (int i = 0; i < outputs_size; ++i) { + const GeTensorDescPtr tensor_desc = op_desc.MutableOutputDesc(i); + if (tensor_desc == nullptr) { + GELOGW("Op: %s, Index: %d, Tensor Desc is null", op_desc.GetName().c_str(), i); + continue; + } + + int32_t calc_type = 0; + bool ret = ge::AttrUtils::GetInt(tensor_desc, ATTR_NAME_MEMORY_SIZE_CALC_TYPE, calc_type); + if (ret && (calc_type == static_cast(ge::MemorySizeCalcType::ALWAYS_EMPTY))) { + output_indices_to_skip_.push_back(i); + } + } return SUCCESS; } @@ -221,7 +239,8 @@ Status AiCoreOpTask::CalcTilingInfo(const NodePtr &node, OpRunInfo &tiling_info) } Status AiCoreOpTask::UpdateArgs(TaskContext &task_context) { - size_t expected_arg_count = task_context.NumInputs() + task_context.NumOutputs() + task_context.NumWorkspaces(); + size_t expected_arg_count = task_context.NumInputs() + task_context.NumOutputs() + task_context.NumWorkspaces() + - output_indices_to_skip_.size(); if (tiling_buffer_ != nullptr) { ++expected_arg_count; } @@ -244,6 +263,11 @@ Status AiCoreOpTask::UpdateArgs(TaskContext &task_context) { for (int i = 0; i < task_context.NumOutputs(); ++i) { const auto output = task_context.GetOutput(i); GE_CHECK_NOTNULL(output); + if (find(output_indices_to_skip_.begin(), output_indices_to_skip_.end(), i) != output_indices_to_skip_.end()) { + GELOGD("Node:%s output[%d] is an optional, the address don't need to be saved.", + task_context.GetNodeName(), i); + continue; + } arg_base_[index++] = reinterpret_cast(output->GetData()); } diff --git a/ge/hybrid/node_executor/aicore/aicore_op_task.h b/ge/hybrid/node_executor/aicore/aicore_op_task.h index dd15c608..3f350531 100755 --- a/ge/hybrid/node_executor/aicore/aicore_op_task.h +++ b/ge/hybrid/node_executor/aicore/aicore_op_task.h @@ -72,6 +72,7 @@ class AiCoreOpTask { uint32_t args_size_ = 0; uint32_t block_dim_ = 1; bool clear_atomic_ = true; + std::vector output_indices_to_skip_; }; class AtomicAddrCleanOpTask : public AiCoreOpTask { diff --git a/ge/hybrid/node_executor/task_context.cc b/ge/hybrid/node_executor/task_context.cc index 8b7c623f..e89ad874 100644 --- a/ge/hybrid/node_executor/task_context.cc +++ b/ge/hybrid/node_executor/task_context.cc @@ -18,6 +18,7 @@ #include "framework/common/ge_inner_error_codes.h" #include "framework/common/debug/log.h" #include "graph/utils/tensor_utils.h" +#include "graph/types.h" #include "graph/debug/ge_attr_define.h" #include "hybrid/executor/hybrid_execution_context.h" #include "hybrid/executor/subgraph_executor.h" @@ -213,6 +214,13 @@ Status TaskContext::AllocateOutput(int index, return SUCCESS; } + int32_t calc_type = 0; + bool ret = ge::AttrUtils::GetInt(tensor_desc, ATTR_NAME_MEMORY_SIZE_CALC_TYPE, calc_type); + if (ret && (calc_type == static_cast(ge::MemorySizeCalcType::ALWAYS_EMPTY))) { + outputs_start_[index] = TensorValue(); + return SUCCESS; + } + auto it = node_item_->ref_outputs.find(index); if (it != node_item_->ref_outputs.end()) { auto &ref_node = it->second; diff --git a/metadef b/metadef index b00c50c2..88d053a5 160000 --- a/metadef +++ b/metadef @@ -1 +1 @@ -Subproject commit b00c50c2a8c2ce06929b27f7b74185a950737ec8 +Subproject commit 88d053a5f94c40ff21620cef50b87075d5054292 diff --git a/parser b/parser index f0109a2c..6904ba94 160000 --- a/parser +++ b/parser @@ -1 +1 @@ -Subproject commit f0109a2c70981d74932bb38bb56722caff3323a5 +Subproject commit 6904ba9488658afc30076d299183fc8875045f49 From 22f83073fee7d983aea14d827c0de5bda485f4b6 Mon Sep 17 00:00:00 2001 From: zhangxiaokun Date: Tue, 19 Jan 2021 17:00:15 +0800 Subject: [PATCH 32/41] Delete useless vector &subgraph_ptr_list --- ge/graph/build/graph_builder.cc | 22 +++++++++------------- ge/graph/build/graph_builder.h | 9 ++++----- ge/graph/manager/graph_manager.cc | 5 ++--- 3 files changed, 15 insertions(+), 21 deletions(-) diff --git a/ge/graph/build/graph_builder.cc b/ge/graph/build/graph_builder.cc index ed77a7f1..7b09cbc6 100644 --- a/ge/graph/build/graph_builder.cc +++ b/ge/graph/build/graph_builder.cc @@ -187,8 +187,7 @@ Status GraphBuilder::UpdateParentNodeOutputSize(const ge::ComputeGraphPtr &graph return SUCCESS; } -Status GraphBuilder::Build(ComputeGraphPtr &comp_graph, std::vector &subgraph_ptr_list, - GeRootModelPtr &ge_root_model_ptr, uint64_t session_id) { +Status GraphBuilder::Build(ComputeGraphPtr &comp_graph, GeRootModelPtr &ge_root_model_ptr, uint64_t session_id) { if (comp_graph == nullptr) { GELOGE(GE_GRAPH_PARAM_NULLPTR, "Graph build comp_graph is null."); return GE_GRAPH_PARAM_NULLPTR; @@ -203,18 +202,18 @@ Status GraphBuilder::Build(ComputeGraphPtr &comp_graph, std::vectorGetGraphUnknownFlag()) { GE_CHK_STATUS_RET( - BuildForDynamicShapeGraph(comp_graph, subgraph_ptr_list, ge_root_model_ptr, ge_model_ptr, session_id), + BuildForDynamicShapeGraph(comp_graph, ge_root_model_ptr, ge_model_ptr, session_id), "Build for dynamic shape graph failed."); return SUCCESS; } - GE_CHK_STATUS_RET(BuildForKnownShapeGraph(comp_graph, subgraph_ptr_list, ge_model_ptr, session_id), + GE_CHK_STATUS_RET(BuildForKnownShapeGraph(comp_graph, ge_model_ptr, session_id), "Build for known shape graph failed."); ge_root_model_ptr->SetSubgraphInstanceNameToModel(comp_graph->GetName(), ge_model_ptr); return SUCCESS; } -Status GraphBuilder::BuildForKnownShapeGraph(ComputeGraphPtr &comp_graph, std::vector &subgraph_list, +Status GraphBuilder::BuildForKnownShapeGraph(ComputeGraphPtr &comp_graph, GeModelPtr &ge_model_ptr, uint64_t session_id) { if (ge::GetContext().GetHostExecFlag()) { GE_CHK_STATUS_RET(BuildForHostCpuGraph(comp_graph, ge_model_ptr, session_id), "Build for host-cpu graph failed."); @@ -222,7 +221,7 @@ Status GraphBuilder::BuildForKnownShapeGraph(ComputeGraphPtr &comp_graph, std::v } GELOGI("Begin to build known shape graph[%s].", comp_graph->GetName().c_str()); - Status ret = SecondPartition(comp_graph, subgraph_list); + Status ret = SecondPartition(comp_graph); GE_CHK_STATUS_RET(ret, "Graph[%s] second partition Failed.", comp_graph->GetName().c_str()); auto subgraph_map = graph_partitioner_.GetSubGraphMap(); @@ -470,7 +469,6 @@ Status GraphBuilder::MarkFpBpProfilingTaskAttr(ComputeGraphPtr &com_graph) { } Status GraphBuilder::BuildForDynamicShapeGraph(ComputeGraphPtr &comp_graph, - std::vector &subgraph_ptr_list, GeRootModelPtr &ge_root_model_ptr, GeModelPtr &ge_model_ptr, uint64_t session_id) { GELOGI("Start to build BuildForDynamicShape for dynamic shape."); @@ -517,7 +515,7 @@ Status GraphBuilder::BuildForDynamicShapeGraph(ComputeGraphPtr &comp_graph, } } // known shape build flow - GE_CHK_STATUS_RET(BuildForKnownShapeGraph(sub_graph, subgraph_ptr_list, ge_model_ptr, session_id), + GE_CHK_STATUS_RET(BuildForKnownShapeGraph(sub_graph, ge_model_ptr, session_id), "Build for known shape graph failed."); } ge_root_model_ptr->SetSubgraphInstanceNameToModel(sub_graph->GetName(), ge_model_ptr); @@ -719,7 +717,7 @@ Status GraphBuilder::CalcDynShapeRootGraphDataSize(const ge::OpDescPtr &op_desc) return SUCCESS; } -Status GraphBuilder::SecondPartition(ge::ComputeGraphPtr &comp_graph, vector &subgraph_ptr_list) { +Status GraphBuilder::SecondPartition(ge::ComputeGraphPtr &comp_graph) { GE_TIMESTAMP_START(GraphPartition2); auto ret = graph_partitioner_.Partition(comp_graph, GraphPartitioner::kSecondPartitioning); if (ret != SUCCESS) { @@ -727,10 +725,8 @@ Status GraphBuilder::SecondPartition(ge::ComputeGraphPtr &comp_graph, vector &subgraph_ptr_list, - GeRootModelPtr &ge_model_ptr, uint64_t session_id = INVALID_SESSION_ID); + Status Build(ComputeGraphPtr &comp_graph, GeRootModelPtr &ge_model_ptr, uint64_t session_id = INVALID_SESSION_ID); void SetOptions(const GraphManagerOptions &options); private: @@ -59,12 +58,12 @@ class GraphBuilder { Status UpdateDataInputSize(const ge::NodePtr &node_ptr); Status UpdateParentNodeOutputSize(const ge::ComputeGraphPtr &graph, ge::NodePtr &parent_node_ptr); Status CalcDynShapeRootGraphDataSize(const ge::OpDescPtr &op_desc); - Status SecondPartition(ge::ComputeGraphPtr &comp_graph, vector &subgraph_ptr_list); + Status SecondPartition(ge::ComputeGraphPtr &comp_graph); Status MarkFpBpProfilingTaskAttr(ComputeGraphPtr &com_graph); - Status BuildForDynamicShapeGraph(ComputeGraphPtr &comp_graph, std::vector &subgraph_ptr_list, + Status BuildForDynamicShapeGraph(ComputeGraphPtr &comp_graph, GeRootModelPtr &ge_root_model_ptr, GeModelPtr &ge_model_ptr, uint64_t session_id = INVALID_SESSION_ID); - Status BuildForKnownShapeGraph(ComputeGraphPtr &comp_graph, std::vector &subgraph_list, + Status BuildForKnownShapeGraph(ComputeGraphPtr &comp_graph, GeModelPtr &ge_model_ptr, uint64_t session_id = INVALID_SESSION_ID); Status BuildForUnknownShapeGraph(ComputeGraphPtr &comp_graph, GeModelPtr &ge_model_ptr, uint64_t session_id = INVALID_SESSION_ID); diff --git a/ge/graph/manager/graph_manager.cc b/ge/graph/manager/graph_manager.cc index b0d412dc..d5ee690c 100755 --- a/ge/graph/manager/graph_manager.cc +++ b/ge/graph/manager/graph_manager.cc @@ -3121,9 +3121,8 @@ Status GraphManager::Build(const GraphNodePtr &graph_node, ComputeGraphPtr &comp graph_name.append(std::to_string(graph_node->GetGraphId())); compute_graph->SetName(graph_name); } - std::vector sub_graph_list; - auto ret = GetCompilerStages(graph_node->GetGraphId()).builder.Build(compute_graph, sub_graph_list, ge_root_model, - session_id); + + auto ret = GetCompilerStages(graph_node->GetGraphId()).builder.Build(compute_graph, ge_root_model, session_id); if (ret != SUCCESS) { GELOGE(ret, "SubGraph build Failed."); return ret; From 06272b2340a4952f1cf51ccbedead70e4f9d7303 Mon Sep 17 00:00:00 2001 From: chenyemeng Date: Tue, 19 Jan 2021 19:06:20 +0800 Subject: [PATCH 33/41] modify cast --- ge/graph/manager/graph_var_manager.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ge/graph/manager/graph_var_manager.cc b/ge/graph/manager/graph_var_manager.cc index 8a829d47..2469094c 100755 --- a/ge/graph/manager/graph_var_manager.cc +++ b/ge/graph/manager/graph_var_manager.cc @@ -302,7 +302,7 @@ Status RdmaMemResource::AssignVarMem(const std::string &var_name, uint64_t size, GELOGE(MEMALLOC_FAILED, "Failed to malloc rdma memory for node %s, size = %llu", var_name.c_str(), size); return MEMALLOC_FAILED; } - address = reinterpret_cast(reinterpret_cast(buffer)); + address = reinterpret_cast(reinterpret_cast(reinterpret_cast(buffer))); var_mem_size_ += size; GELOGI("[IMAS]AssignVarMem Set session_%llu name[%s] output[%d] addr to [%p] size[%llu].", session_id, var_name.c_str(), 0, buffer, size); From 74424181814c0a6251bacf7b6cb22aabd1be318e Mon Sep 17 00:00:00 2001 From: chenyemeng Date: Tue, 19 Jan 2021 19:16:50 +0800 Subject: [PATCH 34/41] modify cast --- ge/graph/manager/graph_var_manager.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ge/graph/manager/graph_var_manager.cc b/ge/graph/manager/graph_var_manager.cc index 2469094c..e7dce824 100755 --- a/ge/graph/manager/graph_var_manager.cc +++ b/ge/graph/manager/graph_var_manager.cc @@ -302,7 +302,7 @@ Status RdmaMemResource::AssignVarMem(const std::string &var_name, uint64_t size, GELOGE(MEMALLOC_FAILED, "Failed to malloc rdma memory for node %s, size = %llu", var_name.c_str(), size); return MEMALLOC_FAILED; } - address = reinterpret_cast(reinterpret_cast(reinterpret_cast(buffer))); + address = static_cast(reinterpret_cast(buffer)); var_mem_size_ += size; GELOGI("[IMAS]AssignVarMem Set session_%llu name[%s] output[%d] addr to [%p] size[%llu].", session_id, var_name.c_str(), 0, buffer, size); From bc1f6ca510bc8129481891fe00e44149231cb626 Mon Sep 17 00:00:00 2001 From: wxl Date: Tue, 19 Jan 2021 19:28:25 +0800 Subject: [PATCH 35/41] UpdateTiling pre-place --- ge/hybrid/executor/node_state.cc | 8 +++++++ ge/hybrid/executor/node_state.h | 5 ++++ ge/hybrid/executor/subgraph_executor.cc | 29 ++++++++++++++++++++---- ge/hybrid/executor/subgraph_executor.h | 2 +- ge/hybrid/node_executor/node_executor.cc | 1 - 5 files changed, 39 insertions(+), 6 deletions(-) diff --git a/ge/hybrid/executor/node_state.cc b/ge/hybrid/executor/node_state.cc index 171ddaf3..00921705 100644 --- a/ge/hybrid/executor/node_state.cc +++ b/ge/hybrid/executor/node_state.cc @@ -188,6 +188,14 @@ Status NodeState::WaitForPrepareDone() { return SUCCESS; } +void NodeState::SetTaskContext(std::shared_ptr &task_context) { + task_context_ = task_context; +} + +std::shared_ptr NodeState::GetTaskContext() { + return task_context_; +} + Status ShapeFuture::Get(GeShape &ori_shape, GeShape &shape) { GELOGD("Start to wait node: %s for getting shape", src_node_->GetName().c_str()); HYBRID_CHK_STATUS_RET(subgraph_context_->Await(src_node_), "cancelled"); diff --git a/ge/hybrid/executor/node_state.h b/ge/hybrid/executor/node_state.h index 02a362b4..c68a19ac 100644 --- a/ge/hybrid/executor/node_state.h +++ b/ge/hybrid/executor/node_state.h @@ -29,6 +29,7 @@ namespace hybrid { class NodeTask; struct GraphExecutionContext; class SubgraphContext; +class TaskContext; class ShapeFuture { public: @@ -103,6 +104,9 @@ struct NodeState { Status AwaitInputTensors(GraphExecutionContext &context) const; + void SetTaskContext(std::shared_ptr &task_context); + std::shared_ptr GetTaskContext(); + private: const NodeItem *node_item_ = nullptr; std::shared_ptr kernel_task_ = nullptr; @@ -110,6 +114,7 @@ struct NodeState { OpDescPtr op_desc_; ShapeInferenceState shape_inference_state_; SubgraphContext *subgraph_context_; + std::shared_ptr task_context_ = nullptr; std::mutex mu_; }; diff --git a/ge/hybrid/executor/subgraph_executor.cc b/ge/hybrid/executor/subgraph_executor.cc index f7b063c7..8f7334de 100644 --- a/ge/hybrid/executor/subgraph_executor.cc +++ b/ge/hybrid/executor/subgraph_executor.cc @@ -232,6 +232,15 @@ Status SubgraphExecutor::PrepareNodes() { node_state->SetKernelTask(node_item.kernel_task); } } + auto unique_task_context = TaskContext::Create(*node_state->GetNodeItem(), context_, subgraph_context_.get()); + GE_CHECK_NOTNULL(unique_task_context); + const auto &task = node_state->GetKernelTask(); + if (task == nullptr) { + GELOGE(INTERNAL_ERROR, "[%s] NodeTask is null.", node_state->GetName().c_str()); + return INTERNAL_ERROR; + } + auto shared_task_context = std::shared_ptr(unique_task_context.release()); + node_state->SetTaskContex(shared_task_context); } if (!ready_queue_.Push(p_node_state)) { @@ -267,6 +276,19 @@ Status SubgraphExecutor::PrepareForExecution(GraphExecutionContext *ctx, NodeSta } else { node_state.SetKernelTask(node_item.kernel_task); } + auto unique_task_context = TaskContext::Create(*node_state.GetNodeItem(), context_, subgraph_context_.get()); + GE_CHECK_NOTNULL(unique_task_context); + const auto &task = node_state.GetKernelTask(); + if (task == nullptr) { + GELOGE(INTERNAL_ERROR, "[%s] NodeTask is null.", node_state.GetName().c_str()); + return INTERNAL_ERROR; + } + auto shared_task_context = std::shared_ptr(unique_task_context.release()); + node_state.SetTaskContex(shared_task_context); + GE_CHK_RT_RET(rtCtxSetCurrent(ctx->rt_context)); + RECORD_COMPILE_EVENT(ctx, node_item.NodeItem().c_str(), "[UpdateTilingData] start"); + GE_CHK_STATUS_RET_NOLOG(task->UpdateTilingData(*shared_task_context)); // update op_desc before alloc ws + RECORD_COMPILE_EVENT(ctx, node_item.NodeItem().c_str(), "[UpdateTilingData] end"); return SUCCESS; } @@ -295,10 +317,9 @@ Status SubgraphExecutor::LaunchTasks() { GE_CHK_STATUS_RET_NOLOG(node_state->WaitForPrepareDone()); GELOGD("[%s] Start to execute.", node_state->GetName().c_str()); - auto task_context = TaskContext::Create(*node_state->GetNodeItem(), context_, subgraph_context_.get()); - GE_CHECK_NOTNULL(task_context); - task_context->SetForceInferShape(force_infer_shape_); - auto shared_task_context = std::shared_ptr(task_context.release()); + auto shared_task_context = node_state->GetTaskContext(); + GE_CHECK_NOTNULL(shared_task_context); + shared_task_context->SetForceInferShape(force_infer_shape_); HYBRID_CHK_STATUS_RET(ExecutionEngine::ExecuteAsync(*node_state, shared_task_context, *context_), "[%s] Execute node failed.", node_state->GetName().c_str()); diff --git a/ge/hybrid/executor/subgraph_executor.h b/ge/hybrid/executor/subgraph_executor.h index d1949947..4523e2c4 100644 --- a/ge/hybrid/executor/subgraph_executor.h +++ b/ge/hybrid/executor/subgraph_executor.h @@ -75,7 +75,7 @@ class SubgraphExecutor { Status GetOutputs(std::vector &outputs, std::vector &output_desc); private: - static Status PrepareForExecution(GraphExecutionContext *ctx, NodeState &node_state); + Status PrepareForExecution(GraphExecutionContext *ctx, NodeState &node_state); static Status InferShape(ShapeInferenceEngine *shape_inference_engine, NodeState &node_state); Status Init(const std::vector &inputs, const std::vector &input_desc); diff --git a/ge/hybrid/node_executor/node_executor.cc b/ge/hybrid/node_executor/node_executor.cc index 02427b91..12e98160 100755 --- a/ge/hybrid/node_executor/node_executor.cc +++ b/ge/hybrid/node_executor/node_executor.cc @@ -38,7 +38,6 @@ const char *const kEngineNameHostCpu = "DNN_VM_HOST_CPU_OP_STORE"; } Status NodeExecutor::PrepareTask(NodeTask &task, TaskContext &context) const { GE_CHK_STATUS_RET_NOLOG(context.AllocateOutputs()); - GE_CHK_STATUS_RET_NOLOG(task.UpdateTilingData(context)); // update op_desc before alloc ws GE_CHK_STATUS_RET_NOLOG(context.AllocateWorkspaces()); GE_CHK_STATUS_RET_NOLOG(task.UpdateArgs(context)); return SUCCESS; From c22fe4378608c493fdee9c48ffbdcdf59c78bc93 Mon Sep 17 00:00:00 2001 From: wxl Date: Tue, 19 Jan 2021 19:38:37 +0800 Subject: [PATCH 36/41] UpdateTiling pre-place --- ge/hybrid/executor/subgraph_executor.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ge/hybrid/executor/subgraph_executor.cc b/ge/hybrid/executor/subgraph_executor.cc index 8f7334de..6103e6e8 100644 --- a/ge/hybrid/executor/subgraph_executor.cc +++ b/ge/hybrid/executor/subgraph_executor.cc @@ -240,7 +240,7 @@ Status SubgraphExecutor::PrepareNodes() { return INTERNAL_ERROR; } auto shared_task_context = std::shared_ptr(unique_task_context.release()); - node_state->SetTaskContex(shared_task_context); + node_state->SetTaskContext(shared_task_context); } if (!ready_queue_.Push(p_node_state)) { @@ -284,7 +284,7 @@ Status SubgraphExecutor::PrepareForExecution(GraphExecutionContext *ctx, NodeSta return INTERNAL_ERROR; } auto shared_task_context = std::shared_ptr(unique_task_context.release()); - node_state.SetTaskContex(shared_task_context); + node_state.SetTaskContext(shared_task_context); GE_CHK_RT_RET(rtCtxSetCurrent(ctx->rt_context)); RECORD_COMPILE_EVENT(ctx, node_item.NodeItem().c_str(), "[UpdateTilingData] start"); GE_CHK_STATUS_RET_NOLOG(task->UpdateTilingData(*shared_task_context)); // update op_desc before alloc ws From f0d77cbb217f767743dfc00d262d31b5d7a0035f Mon Sep 17 00:00:00 2001 From: wxl Date: Tue, 19 Jan 2021 20:31:16 +0800 Subject: [PATCH 37/41] UpdateTiling pre-place --- ge/hybrid/executor/subgraph_executor.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ge/hybrid/executor/subgraph_executor.cc b/ge/hybrid/executor/subgraph_executor.cc index 6103e6e8..c4d866a9 100644 --- a/ge/hybrid/executor/subgraph_executor.cc +++ b/ge/hybrid/executor/subgraph_executor.cc @@ -286,9 +286,9 @@ Status SubgraphExecutor::PrepareForExecution(GraphExecutionContext *ctx, NodeSta auto shared_task_context = std::shared_ptr(unique_task_context.release()); node_state.SetTaskContext(shared_task_context); GE_CHK_RT_RET(rtCtxSetCurrent(ctx->rt_context)); - RECORD_COMPILE_EVENT(ctx, node_item.NodeItem().c_str(), "[UpdateTilingData] start"); + RECORD_COMPILE_EVENT(ctx, node_item.NodeName().c_str(), "[UpdateTilingData] start"); GE_CHK_STATUS_RET_NOLOG(task->UpdateTilingData(*shared_task_context)); // update op_desc before alloc ws - RECORD_COMPILE_EVENT(ctx, node_item.NodeItem().c_str(), "[UpdateTilingData] end"); + RECORD_COMPILE_EVENT(ctx, node_item.NodeName().c_str(), "[UpdateTilingData] end"); return SUCCESS; } From c193588e2ff401a2dfea143c02813a2ac565eb26 Mon Sep 17 00:00:00 2001 From: zhangxiaokun Date: Tue, 19 Jan 2021 21:02:07 +0800 Subject: [PATCH 38/41] Rename new_model_manager to model_manager. --- ge/CMakeLists.txt | 126 +++++++++--------- ge/common/helper/model_cache_helper.cc | 2 +- ge/common/helper/model_helper.cc | 2 +- ge/common/profiling/profiling_manager.cc | 2 +- ge/executor/CMakeLists.txt | 62 ++++----- ge/executor/ge_executor.cc | 6 +- ge/executor/module.mk | 62 ++++----- ge/ge_inference.mk | 64 ++++----- ge/ge_runner.mk | 64 ++++----- ge/graph/execute/graph_execute.cc | 2 +- ge/graph/load/graph_loader.cc | 4 +- .../aipp_utils.cc | 2 +- .../aipp_utils.h | 0 .../cpu_queue_schedule.cc | 2 +- .../cpu_queue_schedule.h | 4 +- .../data_dumper.cc | 4 +- .../data_dumper.h | 0 .../data_inputer.cc | 2 +- .../data_inputer.h | 0 .../davinci_model.cc | 8 +- .../davinci_model.h | 12 +- .../davinci_model_parser.cc | 2 +- .../davinci_model_parser.h | 0 .../model_manager.cc | 6 +- .../model_manager.h | 0 .../model_utils.cc | 2 +- .../model_utils.h | 2 +- .../task_info/end_graph_task_info.cc | 4 +- .../task_info/end_graph_task_info.h | 2 +- .../task_info/event_record_task_info.cc | 4 +- .../task_info/event_record_task_info.h | 2 +- .../task_info/event_wait_task_info.cc | 4 +- .../task_info/event_wait_task_info.h | 2 +- .../task_info/fusion_start_task_info.cc | 4 +- .../task_info/fusion_start_task_info.h | 2 +- .../task_info/fusion_stop_task_info.cc | 4 +- .../task_info/fusion_stop_task_info.h | 2 +- .../task_info/hccl_task_info.cc | 6 +- .../task_info/hccl_task_info.h | 2 +- .../task_info/kernel_ex_task_info.cc | 6 +- .../task_info/kernel_ex_task_info.h | 2 +- .../task_info/kernel_task_info.cc | 8 +- .../task_info/kernel_task_info.h | 2 +- .../task_info/label_goto_ex_task_info.cc | 4 +- .../task_info/label_goto_ex_task_info.h | 2 +- .../task_info/label_set_task_info.cc | 4 +- .../task_info/label_set_task_info.h | 2 +- .../label_switch_by_index_task_info.cc | 4 +- .../label_switch_by_index_task_info.h | 2 +- .../task_info/memcpy_addr_async_task_info.cc | 4 +- .../task_info/memcpy_addr_async_task_info.h | 2 +- .../task_info/memcpy_async_task_info.cc | 4 +- .../task_info/memcpy_async_task_info.h | 2 +- .../task_info/model_exit_task_info.cc | 4 +- .../task_info/model_exit_task_info.h | 2 +- .../task_info/profiler_trace_task_info.cc | 4 +- .../task_info/profiler_trace_task_info.h | 2 +- .../task_info/stream_active_task_info.cc | 4 +- .../task_info/stream_active_task_info.h | 2 +- .../task_info/stream_switch_task_info.cc | 6 +- .../task_info/stream_switch_task_info.h | 2 +- .../task_info/stream_switchn_task_info.cc | 6 +- .../task_info/stream_switchn_task_info.h | 2 +- .../task_info/super_kernel/super_kernel.cc | 0 .../task_info/super_kernel/super_kernel.h | 0 .../super_kernel/super_kernel_factory.cc | 0 .../super_kernel/super_kernel_factory.h | 0 .../task_info/task_info.cc | 2 +- .../task_info/task_info.h | 4 +- .../task_info/task_info_factory.h | 0 .../tbe_handle_store.cc | 0 .../tbe_handle_store.h | 0 .../ts_mem_mall.h | 0 .../zero_copy_offset.cc | 6 +- .../zero_copy_offset.h | 2 +- .../zero_copy_task.cc | 4 +- .../zero_copy_task.h | 0 .../executor/hybrid_model_async_executor.cc | 2 +- .../executor/hybrid_model_async_executor.h | 2 +- ge/hybrid/executor/hybrid_model_executor.h | 2 +- ge/hybrid/hybrid_davinci_model.h | 2 +- ge/hybrid/model/hybrid_model.cc | 2 +- ge/hybrid/model/hybrid_model.h | 4 +- ge/hybrid/model/hybrid_model_builder.cc | 4 +- ge/hybrid/model/hybrid_model_builder.h | 2 +- .../node_executor/aicore/aicore_op_task.cc | 2 +- .../aicpu/aicpu_node_executor.cc | 2 +- .../compiledsubgraph/known_node_executor.cc | 4 +- .../compiledsubgraph/known_node_executor.h | 2 +- ge/init/gelib.cc | 2 +- ge/session/inner_session.cc | 2 +- ge/session/session_manager.cc | 2 +- ge/single_op/single_op.cc | 4 +- ge/single_op/single_op_model.cc | 2 +- ge/single_op/single_op_model.h | 2 +- .../task/aicpu_kernel_task_builder.cc | 2 +- ge/single_op/task/aicpu_task_builder.cc | 4 +- ge/single_op/task/build_task_utils.cc | 2 +- ge/single_op/task/tbe_task_builder.cc | 2 +- tests/ut/ge/CMakeLists.txt | 72 +++++----- tests/ut/ge/graph/ge_executor_unittest.cc | 10 +- tests/ut/ge/graph/graph_load_unittest.cc | 4 +- .../ut/ge/graph/load/data_dumper_unittest.cc | 4 +- .../ge/graph/load/davinci_model_unittest.cc | 2 +- .../ge/graph/load/end_graph_task_unittest.cc | 4 +- .../ge/graph/load/hccl_task_info_unittest.cc | 4 +- .../load/kernel_ex_task_info_unittest.cc | 4 +- .../graph/load/kernel_task_info_unittest.cc | 6 +- .../memcpy_addr_async_task_info_unittest.cc | 4 +- .../load/memcpy_async_task_info_unittest.cc | 4 +- .../ut/ge/graph/load/model_utils_unittest.cc | 2 +- ...new_model_manager_data_inputer_unittest.cc | 2 +- ...ew_model_manager_davinci_model_unittest.cc | 32 ++--- ...el_manager_model_manager_aicpu_unittest.cc | 6 +- ...ew_model_manager_model_manager_unittest.cc | 6 +- .../new_model_manager_task_build_unittest.cc | 2 +- tests/ut/ge/graph/load/new_op_test_utils.h | 2 +- .../graph/load/output_net_output_unittest.cc | 4 +- .../graph/load/tbe_handle_store_unittest.cc | 2 +- .../ge/single_op/single_op_model_unittest.cc | 2 +- 120 files changed, 406 insertions(+), 406 deletions(-) rename ge/graph/load/{new_model_manager => model_manager}/aipp_utils.cc (98%) rename ge/graph/load/{new_model_manager => model_manager}/aipp_utils.h (100%) rename ge/graph/load/{new_model_manager => model_manager}/cpu_queue_schedule.cc (99%) rename ge/graph/load/{new_model_manager => model_manager}/cpu_queue_schedule.h (97%) rename ge/graph/load/{new_model_manager => model_manager}/data_dumper.cc (99%) rename ge/graph/load/{new_model_manager => model_manager}/data_dumper.h (100%) rename ge/graph/load/{new_model_manager => model_manager}/data_inputer.cc (94%) rename ge/graph/load/{new_model_manager => model_manager}/data_inputer.h (100%) rename ge/graph/load/{new_model_manager => model_manager}/davinci_model.cc (99%) rename ge/graph/load/{new_model_manager => model_manager}/davinci_model.h (98%) rename ge/graph/load/{new_model_manager => model_manager}/davinci_model_parser.cc (92%) rename ge/graph/load/{new_model_manager => model_manager}/davinci_model_parser.h (100%) rename ge/graph/load/{new_model_manager => model_manager}/model_manager.cc (99%) rename ge/graph/load/{new_model_manager => model_manager}/model_manager.h (100%) rename ge/graph/load/{new_model_manager => model_manager}/model_utils.cc (99%) rename ge/graph/load/{new_model_manager => model_manager}/model_utils.h (98%) rename ge/graph/load/{new_model_manager => model_manager}/task_info/end_graph_task_info.cc (95%) rename ge/graph/load/{new_model_manager => model_manager}/task_info/end_graph_task_info.h (95%) rename ge/graph/load/{new_model_manager => model_manager}/task_info/event_record_task_info.cc (93%) rename ge/graph/load/{new_model_manager => model_manager}/task_info/event_record_task_info.h (95%) rename ge/graph/load/{new_model_manager => model_manager}/task_info/event_wait_task_info.cc (93%) rename ge/graph/load/{new_model_manager => model_manager}/task_info/event_wait_task_info.h (95%) rename ge/graph/load/{new_model_manager => model_manager}/task_info/fusion_start_task_info.cc (92%) rename ge/graph/load/{new_model_manager => model_manager}/task_info/fusion_start_task_info.h (94%) rename ge/graph/load/{new_model_manager => model_manager}/task_info/fusion_stop_task_info.cc (92%) rename ge/graph/load/{new_model_manager => model_manager}/task_info/fusion_stop_task_info.h (94%) rename ge/graph/load/{new_model_manager => model_manager}/task_info/hccl_task_info.cc (98%) rename ge/graph/load/{new_model_manager => model_manager}/task_info/hccl_task_info.h (97%) rename ge/graph/load/{new_model_manager => model_manager}/task_info/kernel_ex_task_info.cc (98%) rename ge/graph/load/{new_model_manager => model_manager}/task_info/kernel_ex_task_info.h (97%) rename ge/graph/load/{new_model_manager => model_manager}/task_info/kernel_task_info.cc (99%) rename ge/graph/load/{new_model_manager => model_manager}/task_info/kernel_task_info.h (98%) rename ge/graph/load/{new_model_manager => model_manager}/task_info/label_goto_ex_task_info.cc (94%) rename ge/graph/load/{new_model_manager => model_manager}/task_info/label_goto_ex_task_info.h (95%) rename ge/graph/load/{new_model_manager => model_manager}/task_info/label_set_task_info.cc (94%) rename ge/graph/load/{new_model_manager => model_manager}/task_info/label_set_task_info.h (94%) rename ge/graph/load/{new_model_manager => model_manager}/task_info/label_switch_by_index_task_info.cc (97%) rename ge/graph/load/{new_model_manager => model_manager}/task_info/label_switch_by_index_task_info.h (94%) rename ge/graph/load/{new_model_manager => model_manager}/task_info/memcpy_addr_async_task_info.cc (96%) rename ge/graph/load/{new_model_manager => model_manager}/task_info/memcpy_addr_async_task_info.h (96%) rename ge/graph/load/{new_model_manager => model_manager}/task_info/memcpy_async_task_info.cc (97%) rename ge/graph/load/{new_model_manager => model_manager}/task_info/memcpy_async_task_info.h (96%) rename ge/graph/load/{new_model_manager => model_manager}/task_info/model_exit_task_info.cc (93%) rename ge/graph/load/{new_model_manager => model_manager}/task_info/model_exit_task_info.h (94%) rename ge/graph/load/{new_model_manager => model_manager}/task_info/profiler_trace_task_info.cc (93%) rename ge/graph/load/{new_model_manager => model_manager}/task_info/profiler_trace_task_info.h (95%) rename ge/graph/load/{new_model_manager => model_manager}/task_info/stream_active_task_info.cc (95%) rename ge/graph/load/{new_model_manager => model_manager}/task_info/stream_active_task_info.h (95%) rename ge/graph/load/{new_model_manager => model_manager}/task_info/stream_switch_task_info.cc (97%) rename ge/graph/load/{new_model_manager => model_manager}/task_info/stream_switch_task_info.h (96%) rename ge/graph/load/{new_model_manager => model_manager}/task_info/stream_switchn_task_info.cc (97%) rename ge/graph/load/{new_model_manager => model_manager}/task_info/stream_switchn_task_info.h (96%) rename ge/graph/load/{new_model_manager => model_manager}/task_info/super_kernel/super_kernel.cc (100%) rename ge/graph/load/{new_model_manager => model_manager}/task_info/super_kernel/super_kernel.h (100%) rename ge/graph/load/{new_model_manager => model_manager}/task_info/super_kernel/super_kernel_factory.cc (100%) rename ge/graph/load/{new_model_manager => model_manager}/task_info/super_kernel/super_kernel_factory.h (100%) rename ge/graph/load/{new_model_manager => model_manager}/task_info/task_info.cc (94%) rename ge/graph/load/{new_model_manager => model_manager}/task_info/task_info.h (96%) rename ge/graph/load/{new_model_manager => model_manager}/task_info/task_info_factory.h (100%) rename ge/graph/load/{new_model_manager => model_manager}/tbe_handle_store.cc (100%) rename ge/graph/load/{new_model_manager => model_manager}/tbe_handle_store.h (100%) rename ge/graph/load/{new_model_manager => model_manager}/ts_mem_mall.h (100%) rename ge/graph/load/{new_model_manager => model_manager}/zero_copy_offset.cc (98%) rename ge/graph/load/{new_model_manager => model_manager}/zero_copy_offset.h (98%) rename ge/graph/load/{new_model_manager => model_manager}/zero_copy_task.cc (97%) rename ge/graph/load/{new_model_manager => model_manager}/zero_copy_task.h (100%) diff --git a/ge/CMakeLists.txt b/ge/CMakeLists.txt index edbf837d..888f565c 100755 --- a/ge/CMakeLists.txt +++ b/ge/CMakeLists.txt @@ -129,38 +129,38 @@ set(TRAIN_SRC_LIST "graph/label/partitioned_call_label_maker.cc" "graph/label/while_label_maker.cc" "graph/load/graph_loader.cc" - "graph/load/new_model_manager/cpu_queue_schedule.cc" - "graph/load/new_model_manager/data_dumper.cc" - "graph/load/new_model_manager/data_inputer.cc" - "graph/load/new_model_manager/davinci_model.cc" - "graph/load/new_model_manager/davinci_model_parser.cc" - "graph/load/new_model_manager/model_manager.cc" - "graph/load/new_model_manager/model_utils.cc" - "graph/load/new_model_manager/aipp_utils.cc" - "graph/load/new_model_manager/task_info/end_graph_task_info.cc" - "graph/load/new_model_manager/task_info/model_exit_task_info.cc" - "graph/load/new_model_manager/task_info/event_record_task_info.cc" - "graph/load/new_model_manager/task_info/event_wait_task_info.cc" - "graph/load/new_model_manager/task_info/fusion_start_task_info.cc" - "graph/load/new_model_manager/task_info/fusion_stop_task_info.cc" - "graph/load/new_model_manager/task_info/hccl_task_info.cc" - "graph/load/new_model_manager/task_info/kernel_ex_task_info.cc" - "graph/load/new_model_manager/task_info/kernel_task_info.cc" - "graph/load/new_model_manager/task_info/label_set_task_info.cc" - "graph/load/new_model_manager/task_info/label_switch_by_index_task_info.cc" - "graph/load/new_model_manager/task_info/label_goto_ex_task_info.cc" - "graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.cc" - "graph/load/new_model_manager/task_info/memcpy_async_task_info.cc" - "graph/load/new_model_manager/task_info/profiler_trace_task_info.cc" - "graph/load/new_model_manager/task_info/stream_active_task_info.cc" - "graph/load/new_model_manager/task_info/stream_switch_task_info.cc" - "graph/load/new_model_manager/task_info/stream_switchn_task_info.cc" - "graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc" - "graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc" - "graph/load/new_model_manager/task_info/task_info.cc" - "graph/load/new_model_manager/tbe_handle_store.cc" - "graph/load/new_model_manager/zero_copy_task.cc" - "graph/load/new_model_manager/zero_copy_offset.cc" + "graph/load/model_manager/cpu_queue_schedule.cc" + "graph/load/model_manager/data_dumper.cc" + "graph/load/model_manager/data_inputer.cc" + "graph/load/model_manager/davinci_model.cc" + "graph/load/model_manager/davinci_model_parser.cc" + "graph/load/model_manager/model_manager.cc" + "graph/load/model_manager/model_utils.cc" + "graph/load/model_manager/aipp_utils.cc" + "graph/load/model_manager/task_info/end_graph_task_info.cc" + "graph/load/model_manager/task_info/model_exit_task_info.cc" + "graph/load/model_manager/task_info/event_record_task_info.cc" + "graph/load/model_manager/task_info/event_wait_task_info.cc" + "graph/load/model_manager/task_info/fusion_start_task_info.cc" + "graph/load/model_manager/task_info/fusion_stop_task_info.cc" + "graph/load/model_manager/task_info/hccl_task_info.cc" + "graph/load/model_manager/task_info/kernel_ex_task_info.cc" + "graph/load/model_manager/task_info/kernel_task_info.cc" + "graph/load/model_manager/task_info/label_set_task_info.cc" + "graph/load/model_manager/task_info/label_switch_by_index_task_info.cc" + "graph/load/model_manager/task_info/label_goto_ex_task_info.cc" + "graph/load/model_manager/task_info/memcpy_addr_async_task_info.cc" + "graph/load/model_manager/task_info/memcpy_async_task_info.cc" + "graph/load/model_manager/task_info/profiler_trace_task_info.cc" + "graph/load/model_manager/task_info/stream_active_task_info.cc" + "graph/load/model_manager/task_info/stream_switch_task_info.cc" + "graph/load/model_manager/task_info/stream_switchn_task_info.cc" + "graph/load/model_manager/task_info/super_kernel/super_kernel.cc" + "graph/load/model_manager/task_info/super_kernel/super_kernel_factory.cc" + "graph/load/model_manager/task_info/task_info.cc" + "graph/load/model_manager/tbe_handle_store.cc" + "graph/load/model_manager/zero_copy_task.cc" + "graph/load/model_manager/zero_copy_offset.cc" "graph/manager/graph_context.cc" "graph/manager/graph_manager.cc" "graph/manager/graph_manager_utils.cc" @@ -606,37 +606,37 @@ set(INFER_SRC_LIST "graph/manager/util/rt_context_util.cc" "graph/manager/util/variable_accelerate_ctrl.cc" "graph/manager/util/debug.cc" - "graph/load/new_model_manager/model_manager.cc" - "graph/load/new_model_manager/data_inputer.cc" - "graph/load/new_model_manager/davinci_model.cc" - "graph/load/new_model_manager/davinci_model_parser.cc" - "graph/load/new_model_manager/model_utils.cc" - "graph/load/new_model_manager/aipp_utils.cc" - "graph/load/new_model_manager/tbe_handle_store.cc" - "graph/load/new_model_manager/cpu_queue_schedule.cc" - "graph/load/new_model_manager/zero_copy_task.cc" - "graph/load/new_model_manager/zero_copy_offset.cc" - "graph/load/new_model_manager/data_dumper.cc" - "graph/load/new_model_manager/task_info/task_info.cc" - "graph/load/new_model_manager/task_info/event_record_task_info.cc" - "graph/load/new_model_manager/task_info/event_wait_task_info.cc" - "graph/load/new_model_manager/task_info/fusion_start_task_info.cc" - "graph/load/new_model_manager/task_info/fusion_stop_task_info.cc" - "graph/load/new_model_manager/task_info/kernel_ex_task_info.cc" - "graph/load/new_model_manager/task_info/kernel_task_info.cc" - "graph/load/new_model_manager/task_info/label_set_task_info.cc" - "graph/load/new_model_manager/task_info/label_switch_by_index_task_info.cc" - "graph/load/new_model_manager/task_info/label_goto_ex_task_info.cc" - "graph/load/new_model_manager/task_info/memcpy_async_task_info.cc" - "graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.cc" - "graph/load/new_model_manager/task_info/profiler_trace_task_info.cc" - "graph/load/new_model_manager/task_info/stream_active_task_info.cc" - "graph/load/new_model_manager/task_info/stream_switch_task_info.cc" - "graph/load/new_model_manager/task_info/stream_switchn_task_info.cc" - "graph/load/new_model_manager/task_info/end_graph_task_info.cc" - "graph/load/new_model_manager/task_info/model_exit_task_info.cc" - "graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc" - "graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc" + "graph/load/model_manager/model_manager.cc" + "graph/load/model_manager/data_inputer.cc" + "graph/load/model_manager/davinci_model.cc" + "graph/load/model_manager/davinci_model_parser.cc" + "graph/load/model_manager/model_utils.cc" + "graph/load/model_manager/aipp_utils.cc" + "graph/load/model_manager/tbe_handle_store.cc" + "graph/load/model_manager/cpu_queue_schedule.cc" + "graph/load/model_manager/zero_copy_task.cc" + "graph/load/model_manager/zero_copy_offset.cc" + "graph/load/model_manager/data_dumper.cc" + "graph/load/model_manager/task_info/task_info.cc" + "graph/load/model_manager/task_info/event_record_task_info.cc" + "graph/load/model_manager/task_info/event_wait_task_info.cc" + "graph/load/model_manager/task_info/fusion_start_task_info.cc" + "graph/load/model_manager/task_info/fusion_stop_task_info.cc" + "graph/load/model_manager/task_info/kernel_ex_task_info.cc" + "graph/load/model_manager/task_info/kernel_task_info.cc" + "graph/load/model_manager/task_info/label_set_task_info.cc" + "graph/load/model_manager/task_info/label_switch_by_index_task_info.cc" + "graph/load/model_manager/task_info/label_goto_ex_task_info.cc" + "graph/load/model_manager/task_info/memcpy_async_task_info.cc" + "graph/load/model_manager/task_info/memcpy_addr_async_task_info.cc" + "graph/load/model_manager/task_info/profiler_trace_task_info.cc" + "graph/load/model_manager/task_info/stream_active_task_info.cc" + "graph/load/model_manager/task_info/stream_switch_task_info.cc" + "graph/load/model_manager/task_info/stream_switchn_task_info.cc" + "graph/load/model_manager/task_info/end_graph_task_info.cc" + "graph/load/model_manager/task_info/model_exit_task_info.cc" + "graph/load/model_manager/task_info/super_kernel/super_kernel_factory.cc" + "graph/load/model_manager/task_info/super_kernel/super_kernel.cc" "single_op/task/op_task.cc" "single_op/task/build_task_utils.cc" "single_op/task/tbe_task_builder.cc" diff --git a/ge/common/helper/model_cache_helper.cc b/ge/common/helper/model_cache_helper.cc index 0b592e11..7ec8cc0f 100755 --- a/ge/common/helper/model_cache_helper.cc +++ b/ge/common/helper/model_cache_helper.cc @@ -28,7 +28,7 @@ #include "framework/common/util.h" #include "graph/detail/attributes_holder.h" #include "graph/detail/model_serialize_imp.h" -#include "graph/load/new_model_manager/davinci_model_parser.h" +#include "graph/load/model_manager/davinci_model_parser.h" #include "graph/model.h" #include "graph/utils/graph_utils.h" #include "graph/utils/tensor_utils.h" diff --git a/ge/common/helper/model_helper.cc b/ge/common/helper/model_helper.cc index 1d5a4a9b..92f279be 100644 --- a/ge/common/helper/model_helper.cc +++ b/ge/common/helper/model_helper.cc @@ -23,7 +23,7 @@ #include "framework/common/debug/ge_log.h" #include "framework/omg/version.h" #include "graph/debug/ge_attr_define.h" -#include "graph/load/new_model_manager/davinci_model_parser.h" +#include "graph/load/model_manager/davinci_model_parser.h" #include "graph/utils/attr_utils.h" #include "graph/utils/graph_utils.h" diff --git a/ge/common/profiling/profiling_manager.cc b/ge/common/profiling/profiling_manager.cc index 9ca3aced..32f0ee40 100644 --- a/ge/common/profiling/profiling_manager.cc +++ b/ge/common/profiling/profiling_manager.cc @@ -21,7 +21,7 @@ #include "framework/common/string_util.h" #include "graph/ge_context.h" #include "runtime/base.h" -#include "graph/load/new_model_manager/davinci_model.h" +#include "graph/load/model_manager/davinci_model.h" namespace { const char *const kTrainingTrace = "training_trace"; diff --git a/ge/executor/CMakeLists.txt b/ge/executor/CMakeLists.txt index d7bca1fa..26e53c7b 100644 --- a/ge/executor/CMakeLists.txt +++ b/ge/executor/CMakeLists.txt @@ -32,37 +32,37 @@ set(SRC_LIST "../hybrid/node_executor/aicpu/aicpu_ext_info.cc" "../model/ge_model.cc" "../model/ge_root_model.cc" - "../graph/load/new_model_manager/davinci_model.cc" - "../graph/load/new_model_manager/davinci_model_parser.cc" - "../graph/load/new_model_manager/model_manager.cc" - "../graph/load/new_model_manager/tbe_handle_store.cc" - "../graph/load/new_model_manager/cpu_queue_schedule.cc" - "../graph/load/new_model_manager/model_utils.cc" - "../graph/load/new_model_manager/aipp_utils.cc" - "../graph/load/new_model_manager/data_inputer.cc" - "../graph/load/new_model_manager/data_dumper.cc" - "../graph/load/new_model_manager/zero_copy_task.cc" - "../graph/load/new_model_manager/zero_copy_offset.cc" - "../graph/load/new_model_manager/task_info/task_info.cc" - "../graph/load/new_model_manager/task_info/event_record_task_info.cc" - "../graph/load/new_model_manager/task_info/event_wait_task_info.cc" - "../graph/load/new_model_manager/task_info/fusion_start_task_info.cc" - "../graph/load/new_model_manager/task_info/fusion_stop_task_info.cc" - "../graph/load/new_model_manager/task_info/kernel_ex_task_info.cc" - "../graph/load/new_model_manager/task_info/kernel_task_info.cc" - "../graph/load/new_model_manager/task_info/label_set_task_info.cc" - "../graph/load/new_model_manager/task_info/label_switch_by_index_task_info.cc" - "../graph/load/new_model_manager/task_info/label_goto_ex_task_info.cc" - "../graph/load/new_model_manager/task_info/memcpy_async_task_info.cc" - "../graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.cc" - "../graph/load/new_model_manager/task_info/profiler_trace_task_info.cc" - "../graph/load/new_model_manager/task_info/stream_active_task_info.cc" - "../graph/load/new_model_manager/task_info/stream_switch_task_info.cc" - "../graph/load/new_model_manager/task_info/stream_switchn_task_info.cc" - "../graph/load/new_model_manager/task_info/end_graph_task_info.cc" - "../graph/load/new_model_manager/task_info/model_exit_task_info.cc" - "../graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc" - "../graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc" + "../graph/load/model_manager/davinci_model.cc" + "../graph/load/model_manager/davinci_model_parser.cc" + "../graph/load/model_manager/model_manager.cc" + "../graph/load/model_manager/tbe_handle_store.cc" + "../graph/load/model_manager/cpu_queue_schedule.cc" + "../graph/load/model_manager/model_utils.cc" + "../graph/load/model_manager/aipp_utils.cc" + "../graph/load/model_manager/data_inputer.cc" + "../graph/load/model_manager/data_dumper.cc" + "../graph/load/model_manager/zero_copy_task.cc" + "../graph/load/model_manager/zero_copy_offset.cc" + "../graph/load/model_manager/task_info/task_info.cc" + "../graph/load/model_manager/task_info/event_record_task_info.cc" + "../graph/load/model_manager/task_info/event_wait_task_info.cc" + "../graph/load/model_manager/task_info/fusion_start_task_info.cc" + "../graph/load/model_manager/task_info/fusion_stop_task_info.cc" + "../graph/load/model_manager/task_info/kernel_ex_task_info.cc" + "../graph/load/model_manager/task_info/kernel_task_info.cc" + "../graph/load/model_manager/task_info/label_set_task_info.cc" + "../graph/load/model_manager/task_info/label_switch_by_index_task_info.cc" + "../graph/load/model_manager/task_info/label_goto_ex_task_info.cc" + "../graph/load/model_manager/task_info/memcpy_async_task_info.cc" + "../graph/load/model_manager/task_info/memcpy_addr_async_task_info.cc" + "../graph/load/model_manager/task_info/profiler_trace_task_info.cc" + "../graph/load/model_manager/task_info/stream_active_task_info.cc" + "../graph/load/model_manager/task_info/stream_switch_task_info.cc" + "../graph/load/model_manager/task_info/stream_switchn_task_info.cc" + "../graph/load/model_manager/task_info/end_graph_task_info.cc" + "../graph/load/model_manager/task_info/model_exit_task_info.cc" + "../graph/load/model_manager/task_info/super_kernel/super_kernel_factory.cc" + "../graph/load/model_manager/task_info/super_kernel/super_kernel.cc" "../graph/common/local_context.cc" "../opskernel_manager/ops_kernel_builder_manager.cc" "../single_op/single_op_manager.cc" diff --git a/ge/executor/ge_executor.cc b/ge/executor/ge_executor.cc index 0ea0e66d..b71a8be4 100755 --- a/ge/executor/ge_executor.cc +++ b/ge/executor/ge_executor.cc @@ -29,15 +29,15 @@ #include "framework/common/util.h" #include "graph/execute/graph_execute.h" #include "graph/load/graph_loader.h" -#include "graph/load/new_model_manager/davinci_model_parser.h" -#include "graph/load/new_model_manager/model_manager.h" +#include "graph/load/model_manager/davinci_model_parser.h" +#include "graph/load/model_manager/model_manager.h" #include "graph/manager/graph_mem_allocator.h" #include "graph/model.h" #include "graph/utils/graph_utils.h" #include "mmpa/mmpa_api.h" #include "single_op/single_op_manager.h" #include "graph/manager/graph_var_manager.h" -#include "graph/load/new_model_manager/davinci_model.h" +#include "graph/load/model_manager/davinci_model.h" #include "opskernel_manager/ops_kernel_builder_manager.h" using std::string; diff --git a/ge/executor/module.mk b/ge/executor/module.mk index 7f2c1c53..4966eeb5 100644 --- a/ge/executor/module.mk +++ b/ge/executor/module.mk @@ -22,37 +22,37 @@ local_ge_executor_src_files := \ ../graph/manager/util/debug.cc \ ../model/ge_model.cc \ ../model/ge_root_model.cc \ - ../graph/load/new_model_manager/davinci_model.cc \ - ../graph/load/new_model_manager/davinci_model_parser.cc \ - ../graph/load/new_model_manager/model_manager.cc \ - ../graph/load/new_model_manager/tbe_handle_store.cc \ - ../graph/load/new_model_manager/cpu_queue_schedule.cc \ - ../graph/load/new_model_manager/model_utils.cc \ - ../graph/load/new_model_manager/aipp_utils.cc \ - ../graph/load/new_model_manager/data_inputer.cc \ - ../graph/load/new_model_manager/data_dumper.cc \ - ../graph/load/new_model_manager/zero_copy_task.cc \ - ../graph/load/new_model_manager/zero_copy_offset.cc \ - ../graph/load/new_model_manager/task_info/task_info.cc \ - ../graph/load/new_model_manager/task_info/event_record_task_info.cc \ - ../graph/load/new_model_manager/task_info/event_wait_task_info.cc \ - ../graph/load/new_model_manager/task_info/fusion_start_task_info.cc \ - ../graph/load/new_model_manager/task_info/fusion_stop_task_info.cc \ - ../graph/load/new_model_manager/task_info/kernel_ex_task_info.cc \ - ../graph/load/new_model_manager/task_info/kernel_task_info.cc \ - ../graph/load/new_model_manager/task_info/label_set_task_info.cc \ - ../graph/load/new_model_manager/task_info/label_switch_by_index_task_info.cc \ - ../graph/load/new_model_manager/task_info/label_goto_ex_task_info.cc \ - ../graph/load/new_model_manager/task_info/memcpy_async_task_info.cc \ - ../graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.cc \ - ../graph/load/new_model_manager/task_info/profiler_trace_task_info.cc \ - ../graph/load/new_model_manager/task_info/stream_active_task_info.cc \ - ../graph/load/new_model_manager/task_info/stream_switch_task_info.cc \ - ../graph/load/new_model_manager/task_info/stream_switchn_task_info.cc \ - ../graph/load/new_model_manager/task_info/end_graph_task_info.cc \ - ../graph/load/new_model_manager/task_info/model_exit_task_info.cc \ - ../graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc \ - ../graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc \ + ../graph/load/model_manager/davinci_model.cc \ + ../graph/load/model_manager/davinci_model_parser.cc \ + ../graph/load/model_manager/model_manager.cc \ + ../graph/load/model_manager/tbe_handle_store.cc \ + ../graph/load/model_manager/cpu_queue_schedule.cc \ + ../graph/load/model_manager/model_utils.cc \ + ../graph/load/model_manager/aipp_utils.cc \ + ../graph/load/model_manager/data_inputer.cc \ + ../graph/load/model_manager/data_dumper.cc \ + ../graph/load/model_manager/zero_copy_task.cc \ + ../graph/load/model_manager/zero_copy_offset.cc \ + ../graph/load/model_manager/task_info/task_info.cc \ + ../graph/load/model_manager/task_info/event_record_task_info.cc \ + ../graph/load/model_manager/task_info/event_wait_task_info.cc \ + ../graph/load/model_manager/task_info/fusion_start_task_info.cc \ + ../graph/load/model_manager/task_info/fusion_stop_task_info.cc \ + ../graph/load/model_manager/task_info/kernel_ex_task_info.cc \ + ../graph/load/model_manager/task_info/kernel_task_info.cc \ + ../graph/load/model_manager/task_info/label_set_task_info.cc \ + ../graph/load/model_manager/task_info/label_switch_by_index_task_info.cc \ + ../graph/load/model_manager/task_info/label_goto_ex_task_info.cc \ + ../graph/load/model_manager/task_info/memcpy_async_task_info.cc \ + ../graph/load/model_manager/task_info/memcpy_addr_async_task_info.cc \ + ../graph/load/model_manager/task_info/profiler_trace_task_info.cc \ + ../graph/load/model_manager/task_info/stream_active_task_info.cc \ + ../graph/load/model_manager/task_info/stream_switch_task_info.cc \ + ../graph/load/model_manager/task_info/stream_switchn_task_info.cc \ + ../graph/load/model_manager/task_info/end_graph_task_info.cc \ + ../graph/load/model_manager/task_info/model_exit_task_info.cc \ + ../graph/load/model_manager/task_info/super_kernel/super_kernel_factory.cc \ + ../graph/load/model_manager/task_info/super_kernel/super_kernel.cc \ ../opskernel_manager/ops_kernel_builder_manager.cc \ ../single_op/single_op_manager.cc \ ../single_op/single_op_model.cc \ diff --git a/ge/ge_inference.mk b/ge/ge_inference.mk index 6f9e60db..a20ff437 100755 --- a/ge/ge_inference.mk +++ b/ge/ge_inference.mk @@ -228,37 +228,37 @@ OME_HOST_SRC_FILES := \ graph/manager/util/rt_context_util.cc \ graph/manager/util/variable_accelerate_ctrl.cc \ graph/manager/util/debug.cc \ - graph/load/new_model_manager/model_manager.cc \ - graph/load/new_model_manager/data_inputer.cc \ - graph/load/new_model_manager/davinci_model.cc \ - graph/load/new_model_manager/davinci_model_parser.cc \ - graph/load/new_model_manager/model_utils.cc \ - graph/load/new_model_manager/aipp_utils.cc \ - graph/load/new_model_manager/tbe_handle_store.cc \ - graph/load/new_model_manager/cpu_queue_schedule.cc \ - graph/load/new_model_manager/zero_copy_task.cc \ - graph/load/new_model_manager/zero_copy_offset.cc \ - graph/load/new_model_manager/data_dumper.cc \ - graph/load/new_model_manager/task_info/task_info.cc \ - graph/load/new_model_manager/task_info/event_record_task_info.cc \ - graph/load/new_model_manager/task_info/event_wait_task_info.cc \ - graph/load/new_model_manager/task_info/fusion_start_task_info.cc \ - graph/load/new_model_manager/task_info/fusion_stop_task_info.cc \ - graph/load/new_model_manager/task_info/kernel_ex_task_info.cc \ - graph/load/new_model_manager/task_info/kernel_task_info.cc \ - graph/load/new_model_manager/task_info/label_set_task_info.cc \ - graph/load/new_model_manager/task_info/label_switch_by_index_task_info.cc \ - graph/load/new_model_manager/task_info/label_goto_ex_task_info.cc \ - graph/load/new_model_manager/task_info/memcpy_async_task_info.cc \ - graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.cc \ - graph/load/new_model_manager/task_info/profiler_trace_task_info.cc \ - graph/load/new_model_manager/task_info/stream_active_task_info.cc \ - graph/load/new_model_manager/task_info/stream_switch_task_info.cc \ - graph/load/new_model_manager/task_info/stream_switchn_task_info.cc \ - graph/load/new_model_manager/task_info/end_graph_task_info.cc \ - graph/load/new_model_manager/task_info/model_exit_task_info.cc \ - graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc \ - graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc \ + graph/load/model_manager/model_manager.cc \ + graph/load/model_manager/data_inputer.cc \ + graph/load/model_manager/davinci_model.cc \ + graph/load/model_manager/davinci_model_parser.cc \ + graph/load/model_manager/model_utils.cc \ + graph/load/model_manager/aipp_utils.cc \ + graph/load/model_manager/tbe_handle_store.cc \ + graph/load/model_manager/cpu_queue_schedule.cc \ + graph/load/model_manager/zero_copy_task.cc \ + graph/load/model_manager/zero_copy_offset.cc \ + graph/load/model_manager/data_dumper.cc \ + graph/load/model_manager/task_info/task_info.cc \ + graph/load/model_manager/task_info/event_record_task_info.cc \ + graph/load/model_manager/task_info/event_wait_task_info.cc \ + graph/load/model_manager/task_info/fusion_start_task_info.cc \ + graph/load/model_manager/task_info/fusion_stop_task_info.cc \ + graph/load/model_manager/task_info/kernel_ex_task_info.cc \ + graph/load/model_manager/task_info/kernel_task_info.cc \ + graph/load/model_manager/task_info/label_set_task_info.cc \ + graph/load/model_manager/task_info/label_switch_by_index_task_info.cc \ + graph/load/model_manager/task_info/label_goto_ex_task_info.cc \ + graph/load/model_manager/task_info/memcpy_async_task_info.cc \ + graph/load/model_manager/task_info/memcpy_addr_async_task_info.cc \ + graph/load/model_manager/task_info/profiler_trace_task_info.cc \ + graph/load/model_manager/task_info/stream_active_task_info.cc \ + graph/load/model_manager/task_info/stream_switch_task_info.cc \ + graph/load/model_manager/task_info/stream_switchn_task_info.cc \ + graph/load/model_manager/task_info/end_graph_task_info.cc \ + graph/load/model_manager/task_info/model_exit_task_info.cc \ + graph/load/model_manager/task_info/super_kernel/super_kernel_factory.cc \ + graph/load/model_manager/task_info/super_kernel/super_kernel.cc \ single_op/task/op_task.cc \ single_op/task/build_task_utils.cc \ single_op/task/tbe_task_builder.cc \ @@ -270,7 +270,7 @@ OME_HOST_SRC_FILES := \ single_op/single_op_manager.cc \ hybrid/hybrid_davinci_model_stub.cc \ hybrid/node_executor/aicpu/aicpu_ext_info.cc \ - # graph/load/new_model_manager/task_info/hccl_task_info.cc + # graph/load/model_manager/task_info/hccl_task_info.cc OME_DEVICE_SRC_FILES := $(OME_HOST_SRC_FILES) diff --git a/ge/ge_runner.mk b/ge/ge_runner.mk index af938686..4434dc2b 100644 --- a/ge/ge_runner.mk +++ b/ge/ge_runner.mk @@ -54,38 +54,38 @@ LIBGE_LOCAL_SRC_FILES := \ graph/label/partitioned_call_label_maker.cc \ graph/label/while_label_maker.cc \ graph/load/graph_loader.cc \ - graph/load/new_model_manager/cpu_queue_schedule.cc \ - graph/load/new_model_manager/data_dumper.cc \ - graph/load/new_model_manager/data_inputer.cc \ - graph/load/new_model_manager/davinci_model.cc \ - graph/load/new_model_manager/davinci_model_parser.cc \ - graph/load/new_model_manager/model_manager.cc \ - graph/load/new_model_manager/model_utils.cc \ - graph/load/new_model_manager/aipp_utils.cc \ - graph/load/new_model_manager/task_info/end_graph_task_info.cc \ - graph/load/new_model_manager/task_info/model_exit_task_info.cc \ - graph/load/new_model_manager/task_info/event_record_task_info.cc \ - graph/load/new_model_manager/task_info/event_wait_task_info.cc \ - graph/load/new_model_manager/task_info/fusion_start_task_info.cc \ - graph/load/new_model_manager/task_info/fusion_stop_task_info.cc \ - graph/load/new_model_manager/task_info/hccl_task_info.cc \ - graph/load/new_model_manager/task_info/kernel_ex_task_info.cc \ - graph/load/new_model_manager/task_info/kernel_task_info.cc \ - graph/load/new_model_manager/task_info/label_set_task_info.cc \ - graph/load/new_model_manager/task_info/label_switch_by_index_task_info.cc \ - graph/load/new_model_manager/task_info/label_goto_ex_task_info.cc \ - graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.cc \ - graph/load/new_model_manager/task_info/memcpy_async_task_info.cc \ - graph/load/new_model_manager/task_info/profiler_trace_task_info.cc \ - graph/load/new_model_manager/task_info/stream_active_task_info.cc \ - graph/load/new_model_manager/task_info/stream_switch_task_info.cc \ - graph/load/new_model_manager/task_info/stream_switchn_task_info.cc \ - graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc \ - graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc \ - graph/load/new_model_manager/task_info/task_info.cc \ - graph/load/new_model_manager/tbe_handle_store.cc \ - graph/load/new_model_manager/zero_copy_task.cc \ - graph/load/new_model_manager/zero_copy_offset.cc \ + graph/load/model_manager/cpu_queue_schedule.cc \ + graph/load/model_manager/data_dumper.cc \ + graph/load/model_manager/data_inputer.cc \ + graph/load/model_manager/davinci_model.cc \ + graph/load/model_manager/davinci_model_parser.cc \ + graph/load/model_manager/model_manager.cc \ + graph/load/model_manager/model_utils.cc \ + graph/load/model_manager/aipp_utils.cc \ + graph/load/model_manager/task_info/end_graph_task_info.cc \ + graph/load/model_manager/task_info/model_exit_task_info.cc \ + graph/load/model_manager/task_info/event_record_task_info.cc \ + graph/load/model_manager/task_info/event_wait_task_info.cc \ + graph/load/model_manager/task_info/fusion_start_task_info.cc \ + graph/load/model_manager/task_info/fusion_stop_task_info.cc \ + graph/load/model_manager/task_info/hccl_task_info.cc \ + graph/load/model_manager/task_info/kernel_ex_task_info.cc \ + graph/load/model_manager/task_info/kernel_task_info.cc \ + graph/load/model_manager/task_info/label_set_task_info.cc \ + graph/load/model_manager/task_info/label_switch_by_index_task_info.cc \ + graph/load/model_manager/task_info/label_goto_ex_task_info.cc \ + graph/load/model_manager/task_info/memcpy_addr_async_task_info.cc \ + graph/load/model_manager/task_info/memcpy_async_task_info.cc \ + graph/load/model_manager/task_info/profiler_trace_task_info.cc \ + graph/load/model_manager/task_info/stream_active_task_info.cc \ + graph/load/model_manager/task_info/stream_switch_task_info.cc \ + graph/load/model_manager/task_info/stream_switchn_task_info.cc \ + graph/load/model_manager/task_info/super_kernel/super_kernel.cc \ + graph/load/model_manager/task_info/super_kernel/super_kernel_factory.cc \ + graph/load/model_manager/task_info/task_info.cc \ + graph/load/model_manager/tbe_handle_store.cc \ + graph/load/model_manager/zero_copy_task.cc \ + graph/load/model_manager/zero_copy_offset.cc \ graph/manager/graph_context.cc \ graph/manager/graph_manager.cc \ graph/manager/graph_manager_utils.cc \ diff --git a/ge/graph/execute/graph_execute.cc b/ge/graph/execute/graph_execute.cc index 3c5618e8..79c22a29 100755 --- a/ge/graph/execute/graph_execute.cc +++ b/ge/graph/execute/graph_execute.cc @@ -21,7 +21,7 @@ #include "common/ge_inner_error_codes.h" #include "common/model_parser/base.h" -#include "graph/load/new_model_manager/model_manager.h" +#include "graph/load/model_manager/model_manager.h" #include "omm/csa_interact.h" #include "runtime/dev.h" #include "runtime/mem.h" diff --git a/ge/graph/load/graph_loader.cc b/ge/graph/load/graph_loader.cc index 6272e581..29afc939 100755 --- a/ge/graph/load/graph_loader.cc +++ b/ge/graph/load/graph_loader.cc @@ -22,8 +22,8 @@ #include "common/helper/model_helper.h" #include "common/util.h" #include "graph/ge_context.h" -#include "graph/load/new_model_manager/davinci_model_parser.h" -#include "graph/load/new_model_manager/model_manager.h" +#include "graph/load/model_manager/davinci_model_parser.h" +#include "graph/load/model_manager/model_manager.h" #include "graph/manager/graph_var_manager.h" #include "omm/csa_interact.h" #include "runtime/dev.h" diff --git a/ge/graph/load/new_model_manager/aipp_utils.cc b/ge/graph/load/model_manager/aipp_utils.cc similarity index 98% rename from ge/graph/load/new_model_manager/aipp_utils.cc rename to ge/graph/load/model_manager/aipp_utils.cc index e0e60d2b..8a18c421 100755 --- a/ge/graph/load/new_model_manager/aipp_utils.cc +++ b/ge/graph/load/model_manager/aipp_utils.cc @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "graph/load/new_model_manager/aipp_utils.h" +#include "graph/load/model_manager/aipp_utils.h" #include diff --git a/ge/graph/load/new_model_manager/aipp_utils.h b/ge/graph/load/model_manager/aipp_utils.h similarity index 100% rename from ge/graph/load/new_model_manager/aipp_utils.h rename to ge/graph/load/model_manager/aipp_utils.h diff --git a/ge/graph/load/new_model_manager/cpu_queue_schedule.cc b/ge/graph/load/model_manager/cpu_queue_schedule.cc similarity index 99% rename from ge/graph/load/new_model_manager/cpu_queue_schedule.cc rename to ge/graph/load/model_manager/cpu_queue_schedule.cc index 430321bd..d9b716ea 100644 --- a/ge/graph/load/new_model_manager/cpu_queue_schedule.cc +++ b/ge/graph/load/model_manager/cpu_queue_schedule.cc @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "graph/load/new_model_manager/cpu_queue_schedule.h" +#include "graph/load/model_manager/cpu_queue_schedule.h" #include "common/debug/ge_log.h" #include "common/debug/log.h" diff --git a/ge/graph/load/new_model_manager/cpu_queue_schedule.h b/ge/graph/load/model_manager/cpu_queue_schedule.h similarity index 97% rename from ge/graph/load/new_model_manager/cpu_queue_schedule.h rename to ge/graph/load/model_manager/cpu_queue_schedule.h index 8999e975..de4c5327 100644 --- a/ge/graph/load/new_model_manager/cpu_queue_schedule.h +++ b/ge/graph/load/model_manager/cpu_queue_schedule.h @@ -20,8 +20,8 @@ #include #include "common/ge_inner_error_codes.h" -#include "graph/load/new_model_manager/task_info/task_info.h" -#include "graph/load/new_model_manager/zero_copy_offset.h" +#include "graph/load/model_manager/task_info/task_info.h" +#include "graph/load/model_manager/zero_copy_offset.h" #include "runtime/kernel.h" namespace ge { diff --git a/ge/graph/load/new_model_manager/data_dumper.cc b/ge/graph/load/model_manager/data_dumper.cc similarity index 99% rename from ge/graph/load/new_model_manager/data_dumper.cc rename to ge/graph/load/model_manager/data_dumper.cc index a12a2b2a..947aac1d 100644 --- a/ge/graph/load/new_model_manager/data_dumper.cc +++ b/ge/graph/load/model_manager/data_dumper.cc @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "graph/load/new_model_manager/data_dumper.h" +#include "graph/load/model_manager/data_dumper.h" #include #include @@ -29,7 +29,7 @@ #include "framework/common/util.h" #include "graph/anchor.h" #include "graph/debug/ge_attr_define.h" -#include "graph/load/new_model_manager/model_utils.h" +#include "graph/load/model_manager/model_utils.h" #include "graph/manager/util/debug.h" #include "graph/utils/attr_utils.h" #include "graph/utils/tensor_utils.h" diff --git a/ge/graph/load/new_model_manager/data_dumper.h b/ge/graph/load/model_manager/data_dumper.h similarity index 100% rename from ge/graph/load/new_model_manager/data_dumper.h rename to ge/graph/load/model_manager/data_dumper.h diff --git a/ge/graph/load/new_model_manager/data_inputer.cc b/ge/graph/load/model_manager/data_inputer.cc similarity index 94% rename from ge/graph/load/new_model_manager/data_inputer.cc rename to ge/graph/load/model_manager/data_inputer.cc index 5efc710e..0fe75465 100755 --- a/ge/graph/load/new_model_manager/data_inputer.cc +++ b/ge/graph/load/model_manager/data_inputer.cc @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "graph/load/new_model_manager/data_inputer.h" +#include "graph/load/model_manager/data_inputer.h" #include diff --git a/ge/graph/load/new_model_manager/data_inputer.h b/ge/graph/load/model_manager/data_inputer.h similarity index 100% rename from ge/graph/load/new_model_manager/data_inputer.h rename to ge/graph/load/model_manager/data_inputer.h diff --git a/ge/graph/load/new_model_manager/davinci_model.cc b/ge/graph/load/model_manager/davinci_model.cc similarity index 99% rename from ge/graph/load/new_model_manager/davinci_model.cc rename to ge/graph/load/model_manager/davinci_model.cc index 75a5f6af..2430ae3d 100755 --- a/ge/graph/load/new_model_manager/davinci_model.cc +++ b/ge/graph/load/model_manager/davinci_model.cc @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "graph/load/new_model_manager/davinci_model.h" +#include "graph/load/model_manager/davinci_model.h" #include #include @@ -36,9 +36,9 @@ #include "graph/debug/ge_attr_define.h" #include "graph/ge_context.h" #include "graph/graph.h" -#include "graph/load/new_model_manager/cpu_queue_schedule.h" -#include "graph/load/new_model_manager/model_manager.h" -#include "graph/load/new_model_manager/tbe_handle_store.h" +#include "graph/load/model_manager/cpu_queue_schedule.h" +#include "graph/load/model_manager/model_manager.h" +#include "graph/load/model_manager/tbe_handle_store.h" #include "graph/manager/graph_mem_allocator.h" #include "graph/manager/graph_var_manager.h" #include "graph/manager/trans_var_data_utils.h" diff --git a/ge/graph/load/new_model_manager/davinci_model.h b/ge/graph/load/model_manager/davinci_model.h similarity index 98% rename from ge/graph/load/new_model_manager/davinci_model.h rename to ge/graph/load/model_manager/davinci_model.h index f02015a8..53db77a7 100755 --- a/ge/graph/load/new_model_manager/davinci_model.h +++ b/ge/graph/load/model_manager/davinci_model.h @@ -32,12 +32,12 @@ #include "common/types.h" #include "framework/common/util.h" #include "graph/debug/ge_attr_define.h" -#include "graph/load/new_model_manager/aipp_utils.h" -#include "graph/load/new_model_manager/data_dumper.h" -#include "graph/load/new_model_manager/data_inputer.h" -#include "graph/load/new_model_manager/model_utils.h" -#include "graph/load/new_model_manager/zero_copy_offset.h" -#include "graph/load/new_model_manager/zero_copy_task.h" +#include "graph/load/model_manager/aipp_utils.h" +#include "graph/load/model_manager/data_dumper.h" +#include "graph/load/model_manager/data_inputer.h" +#include "graph/load/model_manager/model_utils.h" +#include "graph/load/model_manager/zero_copy_offset.h" +#include "graph/load/model_manager/zero_copy_task.h" #include "graph/model.h" #include "graph/node.h" #include "graph/op_desc.h" diff --git a/ge/graph/load/new_model_manager/davinci_model_parser.cc b/ge/graph/load/model_manager/davinci_model_parser.cc similarity index 92% rename from ge/graph/load/new_model_manager/davinci_model_parser.cc rename to ge/graph/load/model_manager/davinci_model_parser.cc index 76526de2..c6f48b84 100644 --- a/ge/graph/load/new_model_manager/davinci_model_parser.cc +++ b/ge/graph/load/model_manager/davinci_model_parser.cc @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "graph/load/new_model_manager/davinci_model_parser.h" +#include "graph/load/model_manager/davinci_model_parser.h" namespace ge { DavinciModelParser::DavinciModelParser() {} diff --git a/ge/graph/load/new_model_manager/davinci_model_parser.h b/ge/graph/load/model_manager/davinci_model_parser.h similarity index 100% rename from ge/graph/load/new_model_manager/davinci_model_parser.h rename to ge/graph/load/model_manager/davinci_model_parser.h diff --git a/ge/graph/load/new_model_manager/model_manager.cc b/ge/graph/load/model_manager/model_manager.cc similarity index 99% rename from ge/graph/load/new_model_manager/model_manager.cc rename to ge/graph/load/model_manager/model_manager.cc index edc60e50..7cf869ac 100755 --- a/ge/graph/load/new_model_manager/model_manager.cc +++ b/ge/graph/load/model_manager/model_manager.cc @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "graph/load/new_model_manager/model_manager.h" +#include "graph/load/model_manager/model_manager.h" #include @@ -28,8 +28,8 @@ #include "framework/common/util.h" #include "graph/common/ge_call_wrapper.h" #include "graph/debug/ge_attr_define.h" -#include "graph/load/new_model_manager/davinci_model.h" -#include "graph/load/new_model_manager/davinci_model_parser.h" +#include "graph/load/model_manager/davinci_model.h" +#include "graph/load/model_manager/davinci_model_parser.h" #include "model/ge_root_model.h" #include "graph/common/local_context.h" #include "graph/utils/attr_utils.h" diff --git a/ge/graph/load/new_model_manager/model_manager.h b/ge/graph/load/model_manager/model_manager.h similarity index 100% rename from ge/graph/load/new_model_manager/model_manager.h rename to ge/graph/load/model_manager/model_manager.h diff --git a/ge/graph/load/new_model_manager/model_utils.cc b/ge/graph/load/model_manager/model_utils.cc similarity index 99% rename from ge/graph/load/new_model_manager/model_utils.cc rename to ge/graph/load/model_manager/model_utils.cc index 3c141f06..410e9364 100755 --- a/ge/graph/load/new_model_manager/model_utils.cc +++ b/ge/graph/load/model_manager/model_utils.cc @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "graph/load/new_model_manager/model_utils.h" +#include "graph/load/model_manager/model_utils.h" #include #include "common/debug/log.h" #include "common/op/ge_op_utils.h" diff --git a/ge/graph/load/new_model_manager/model_utils.h b/ge/graph/load/model_manager/model_utils.h similarity index 98% rename from ge/graph/load/new_model_manager/model_utils.h rename to ge/graph/load/model_manager/model_utils.h index 417b9b89..26f8d700 100755 --- a/ge/graph/load/new_model_manager/model_utils.h +++ b/ge/graph/load/model_manager/model_utils.h @@ -21,7 +21,7 @@ #include "common/ge_inner_error_codes.h" #include "common/types.h" -#include "graph/load/new_model_manager/task_info/task_info.h" +#include "graph/load/model_manager/task_info/task_info.h" #include "graph/op_desc.h" #include "graph/utils/tensor_adapter.h" diff --git a/ge/graph/load/new_model_manager/task_info/end_graph_task_info.cc b/ge/graph/load/model_manager/task_info/end_graph_task_info.cc similarity index 95% rename from ge/graph/load/new_model_manager/task_info/end_graph_task_info.cc rename to ge/graph/load/model_manager/task_info/end_graph_task_info.cc index b8b02f59..c306c650 100644 --- a/ge/graph/load/new_model_manager/task_info/end_graph_task_info.cc +++ b/ge/graph/load/model_manager/task_info/end_graph_task_info.cc @@ -14,11 +14,11 @@ * limitations under the License. */ -#include "graph/load/new_model_manager/task_info/end_graph_task_info.h" +#include "graph/load/model_manager/task_info/end_graph_task_info.h" #include "common/properties_manager.h" #include "framework/common/debug/ge_log.h" -#include "graph/load/new_model_manager/davinci_model.h" +#include "graph/load/model_manager/davinci_model.h" namespace { const uint32_t kDumpFlag = 2; diff --git a/ge/graph/load/new_model_manager/task_info/end_graph_task_info.h b/ge/graph/load/model_manager/task_info/end_graph_task_info.h similarity index 95% rename from ge/graph/load/new_model_manager/task_info/end_graph_task_info.h rename to ge/graph/load/model_manager/task_info/end_graph_task_info.h index 614544f9..efce19b2 100644 --- a/ge/graph/load/new_model_manager/task_info/end_graph_task_info.h +++ b/ge/graph/load/model_manager/task_info/end_graph_task_info.h @@ -17,7 +17,7 @@ #ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_END_GRAPH_TASK_INFO_H_ #define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_END_GRAPH_TASK_INFO_H_ -#include "graph/load/new_model_manager/task_info/task_info.h" +#include "graph/load/model_manager/task_info/task_info.h" namespace ge { class EndGraphTaskInfo : public TaskInfo { diff --git a/ge/graph/load/new_model_manager/task_info/event_record_task_info.cc b/ge/graph/load/model_manager/task_info/event_record_task_info.cc similarity index 93% rename from ge/graph/load/new_model_manager/task_info/event_record_task_info.cc rename to ge/graph/load/model_manager/task_info/event_record_task_info.cc index 11589258..f736c386 100755 --- a/ge/graph/load/new_model_manager/task_info/event_record_task_info.cc +++ b/ge/graph/load/model_manager/task_info/event_record_task_info.cc @@ -14,10 +14,10 @@ * limitations under the License. */ -#include "graph/load/new_model_manager/task_info/event_record_task_info.h" +#include "graph/load/model_manager/task_info/event_record_task_info.h" #include "framework/common/debug/ge_log.h" -#include "graph/load/new_model_manager/davinci_model.h" +#include "graph/load/model_manager/davinci_model.h" namespace ge { Status EventRecordTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) { diff --git a/ge/graph/load/new_model_manager/task_info/event_record_task_info.h b/ge/graph/load/model_manager/task_info/event_record_task_info.h similarity index 95% rename from ge/graph/load/new_model_manager/task_info/event_record_task_info.h rename to ge/graph/load/model_manager/task_info/event_record_task_info.h index d3f5961e..a79f1d3b 100755 --- a/ge/graph/load/new_model_manager/task_info/event_record_task_info.h +++ b/ge/graph/load/model_manager/task_info/event_record_task_info.h @@ -16,7 +16,7 @@ #ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_EVENT_RECORD_TASK_INFO_H_ #define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_EVENT_RECORD_TASK_INFO_H_ -#include "graph/load/new_model_manager/task_info/task_info.h" +#include "graph/load/model_manager/task_info/task_info.h" namespace ge { class EventRecordTaskInfo : public TaskInfo { diff --git a/ge/graph/load/new_model_manager/task_info/event_wait_task_info.cc b/ge/graph/load/model_manager/task_info/event_wait_task_info.cc similarity index 93% rename from ge/graph/load/new_model_manager/task_info/event_wait_task_info.cc rename to ge/graph/load/model_manager/task_info/event_wait_task_info.cc index 5701179b..34058502 100755 --- a/ge/graph/load/new_model_manager/task_info/event_wait_task_info.cc +++ b/ge/graph/load/model_manager/task_info/event_wait_task_info.cc @@ -14,10 +14,10 @@ * limitations under the License. */ -#include "graph/load/new_model_manager/task_info/event_wait_task_info.h" +#include "graph/load/model_manager/task_info/event_wait_task_info.h" #include "framework/common/debug/ge_log.h" -#include "graph/load/new_model_manager/davinci_model.h" +#include "graph/load/model_manager/davinci_model.h" namespace ge { Status EventWaitTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) { diff --git a/ge/graph/load/new_model_manager/task_info/event_wait_task_info.h b/ge/graph/load/model_manager/task_info/event_wait_task_info.h similarity index 95% rename from ge/graph/load/new_model_manager/task_info/event_wait_task_info.h rename to ge/graph/load/model_manager/task_info/event_wait_task_info.h index a92252d7..bd8acab1 100755 --- a/ge/graph/load/new_model_manager/task_info/event_wait_task_info.h +++ b/ge/graph/load/model_manager/task_info/event_wait_task_info.h @@ -16,7 +16,7 @@ #ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_EVENT_WAIT_TASK_INFO_H_ #define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_EVENT_WAIT_TASK_INFO_H_ -#include "graph/load/new_model_manager/task_info/task_info.h" +#include "graph/load/model_manager/task_info/task_info.h" namespace ge { class EventWaitTaskInfo : public TaskInfo { diff --git a/ge/graph/load/new_model_manager/task_info/fusion_start_task_info.cc b/ge/graph/load/model_manager/task_info/fusion_start_task_info.cc similarity index 92% rename from ge/graph/load/new_model_manager/task_info/fusion_start_task_info.cc rename to ge/graph/load/model_manager/task_info/fusion_start_task_info.cc index 32c79647..6feea9e4 100755 --- a/ge/graph/load/new_model_manager/task_info/fusion_start_task_info.cc +++ b/ge/graph/load/model_manager/task_info/fusion_start_task_info.cc @@ -14,10 +14,10 @@ * limitations under the License. */ -#include "graph/load/new_model_manager/task_info/fusion_start_task_info.h" +#include "graph/load/model_manager/task_info/fusion_start_task_info.h" #include "framework/common/debug/ge_log.h" -#include "graph/load/new_model_manager/davinci_model.h" +#include "graph/load/model_manager/davinci_model.h" namespace ge { Status FusionStartTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) { diff --git a/ge/graph/load/new_model_manager/task_info/fusion_start_task_info.h b/ge/graph/load/model_manager/task_info/fusion_start_task_info.h similarity index 94% rename from ge/graph/load/new_model_manager/task_info/fusion_start_task_info.h rename to ge/graph/load/model_manager/task_info/fusion_start_task_info.h index b1897533..284a5e0f 100755 --- a/ge/graph/load/new_model_manager/task_info/fusion_start_task_info.h +++ b/ge/graph/load/model_manager/task_info/fusion_start_task_info.h @@ -16,7 +16,7 @@ #ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_FUSION_START_TASK_INFO_H_ #define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_FUSION_START_TASK_INFO_H_ -#include "graph/load/new_model_manager/task_info/task_info.h" +#include "graph/load/model_manager/task_info/task_info.h" namespace ge { class FusionStartTaskInfo : public TaskInfo { diff --git a/ge/graph/load/new_model_manager/task_info/fusion_stop_task_info.cc b/ge/graph/load/model_manager/task_info/fusion_stop_task_info.cc similarity index 92% rename from ge/graph/load/new_model_manager/task_info/fusion_stop_task_info.cc rename to ge/graph/load/model_manager/task_info/fusion_stop_task_info.cc index dd4edfd0..22d1589c 100755 --- a/ge/graph/load/new_model_manager/task_info/fusion_stop_task_info.cc +++ b/ge/graph/load/model_manager/task_info/fusion_stop_task_info.cc @@ -14,10 +14,10 @@ * limitations under the License. */ -#include "graph/load/new_model_manager/task_info/fusion_stop_task_info.h" +#include "graph/load/model_manager/task_info/fusion_stop_task_info.h" #include "framework/common/debug/ge_log.h" -#include "graph/load/new_model_manager/davinci_model.h" +#include "graph/load/model_manager/davinci_model.h" namespace ge { Status FusionStopTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) { diff --git a/ge/graph/load/new_model_manager/task_info/fusion_stop_task_info.h b/ge/graph/load/model_manager/task_info/fusion_stop_task_info.h similarity index 94% rename from ge/graph/load/new_model_manager/task_info/fusion_stop_task_info.h rename to ge/graph/load/model_manager/task_info/fusion_stop_task_info.h index 880ca487..994498d5 100755 --- a/ge/graph/load/new_model_manager/task_info/fusion_stop_task_info.h +++ b/ge/graph/load/model_manager/task_info/fusion_stop_task_info.h @@ -16,7 +16,7 @@ #ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_FUSION_STOP_TASK_INFO_H_ #define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_FUSION_STOP_TASK_INFO_H_ -#include "graph/load/new_model_manager/task_info/task_info.h" +#include "graph/load/model_manager/task_info/task_info.h" namespace ge { class FusionStopTaskInfo : public TaskInfo { diff --git a/ge/graph/load/new_model_manager/task_info/hccl_task_info.cc b/ge/graph/load/model_manager/task_info/hccl_task_info.cc similarity index 98% rename from ge/graph/load/new_model_manager/task_info/hccl_task_info.cc rename to ge/graph/load/model_manager/task_info/hccl_task_info.cc index 7b18a9a3..2d0ad560 100644 --- a/ge/graph/load/new_model_manager/task_info/hccl_task_info.cc +++ b/ge/graph/load/model_manager/task_info/hccl_task_info.cc @@ -14,14 +14,14 @@ * limitations under the License. */ -#include "graph/load/new_model_manager/task_info/hccl_task_info.h" +#include "graph/load/model_manager/task_info/hccl_task_info.h" #include #include "common/opskernel/ops_kernel_info_store.h" #include "framework/common/debug/ge_log.h" -#include "graph/load/new_model_manager/davinci_model.h" -#include "graph/load/new_model_manager/model_utils.h" +#include "graph/load/model_manager/davinci_model.h" +#include "graph/load/model_manager/model_utils.h" namespace ge { std::mutex HcclTaskInfo::hccl_follow_stream_mutex_; diff --git a/ge/graph/load/new_model_manager/task_info/hccl_task_info.h b/ge/graph/load/model_manager/task_info/hccl_task_info.h similarity index 97% rename from ge/graph/load/new_model_manager/task_info/hccl_task_info.h rename to ge/graph/load/model_manager/task_info/hccl_task_info.h index 777f5bbf..3df155ad 100644 --- a/ge/graph/load/new_model_manager/task_info/hccl_task_info.h +++ b/ge/graph/load/model_manager/task_info/hccl_task_info.h @@ -23,7 +23,7 @@ #include #include "common/opskernel/ge_task_info.h" -#include "graph/load/new_model_manager/task_info/task_info.h" +#include "graph/load/model_manager/task_info/task_info.h" #include "graph/manager/util/hcom_util.h" namespace ge { class HcclTaskInfo : public TaskInfo { diff --git a/ge/graph/load/new_model_manager/task_info/kernel_ex_task_info.cc b/ge/graph/load/model_manager/task_info/kernel_ex_task_info.cc similarity index 98% rename from ge/graph/load/new_model_manager/task_info/kernel_ex_task_info.cc rename to ge/graph/load/model_manager/task_info/kernel_ex_task_info.cc index 98d9cb78..c34a4e9a 100644 --- a/ge/graph/load/new_model_manager/task_info/kernel_ex_task_info.cc +++ b/ge/graph/load/model_manager/task_info/kernel_ex_task_info.cc @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "graph/load/new_model_manager/task_info/kernel_ex_task_info.h" +#include "graph/load/model_manager/task_info/kernel_ex_task_info.h" #include @@ -24,8 +24,8 @@ #include "framework/common/debug/ge_log.h" #include "framework/common/fmk_error_codes.h" #include "graph/attr_value.h" -#include "graph/load/new_model_manager/davinci_model.h" -#include "graph/load/new_model_manager/model_manager.h" +#include "graph/load/model_manager/davinci_model.h" +#include "graph/load/model_manager/model_manager.h" namespace ge { Status KernelExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) { diff --git a/ge/graph/load/new_model_manager/task_info/kernel_ex_task_info.h b/ge/graph/load/model_manager/task_info/kernel_ex_task_info.h similarity index 97% rename from ge/graph/load/new_model_manager/task_info/kernel_ex_task_info.h rename to ge/graph/load/model_manager/task_info/kernel_ex_task_info.h index f6873c6c..265316ce 100644 --- a/ge/graph/load/new_model_manager/task_info/kernel_ex_task_info.h +++ b/ge/graph/load/model_manager/task_info/kernel_ex_task_info.h @@ -17,7 +17,7 @@ #ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_KERNEL_EX_TASK_INFO_H_ #define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_KERNEL_EX_TASK_INFO_H_ -#include "graph/load/new_model_manager/task_info/task_info.h" +#include "graph/load/model_manager/task_info/task_info.h" #include "graph/op_desc.h" namespace ge { diff --git a/ge/graph/load/new_model_manager/task_info/kernel_task_info.cc b/ge/graph/load/model_manager/task_info/kernel_task_info.cc similarity index 99% rename from ge/graph/load/new_model_manager/task_info/kernel_task_info.cc rename to ge/graph/load/model_manager/task_info/kernel_task_info.cc index 83bf2779..27fe8eb0 100755 --- a/ge/graph/load/new_model_manager/task_info/kernel_task_info.cc +++ b/ge/graph/load/model_manager/task_info/kernel_task_info.cc @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "graph/load/new_model_manager/task_info/kernel_task_info.h" +#include "graph/load/model_manager/task_info/kernel_task_info.h" #include #include #include @@ -25,9 +25,9 @@ #include "framework/common/debug/ge_log.h" #include "framework/common/l2_cache_optimize.h" #include "graph/debug/ge_attr_define.h" -#include "graph/load/new_model_manager/davinci_model.h" -#include "graph/load/new_model_manager/model_manager.h" -#include "graph/load/new_model_manager/model_utils.h" +#include "graph/load/model_manager/davinci_model.h" +#include "graph/load/model_manager/model_manager.h" +#include "graph/load/model_manager/model_utils.h" #include "runtime/kernel.h" #include "super_kernel/super_kernel.h" #include "super_kernel/super_kernel_factory.h" diff --git a/ge/graph/load/new_model_manager/task_info/kernel_task_info.h b/ge/graph/load/model_manager/task_info/kernel_task_info.h similarity index 98% rename from ge/graph/load/new_model_manager/task_info/kernel_task_info.h rename to ge/graph/load/model_manager/task_info/kernel_task_info.h index cea25320..7cabf259 100644 --- a/ge/graph/load/new_model_manager/task_info/kernel_task_info.h +++ b/ge/graph/load/model_manager/task_info/kernel_task_info.h @@ -22,7 +22,7 @@ #include #include -#include "graph/load/new_model_manager/task_info/task_info.h" +#include "graph/load/model_manager/task_info/task_info.h" #include "graph/op_desc.h" namespace ge { class KernelTaskInfo : public TaskInfo { diff --git a/ge/graph/load/new_model_manager/task_info/label_goto_ex_task_info.cc b/ge/graph/load/model_manager/task_info/label_goto_ex_task_info.cc similarity index 94% rename from ge/graph/load/new_model_manager/task_info/label_goto_ex_task_info.cc rename to ge/graph/load/model_manager/task_info/label_goto_ex_task_info.cc index 393c0b31..1921c85d 100755 --- a/ge/graph/load/new_model_manager/task_info/label_goto_ex_task_info.cc +++ b/ge/graph/load/model_manager/task_info/label_goto_ex_task_info.cc @@ -14,9 +14,9 @@ * limitations under the License. */ -#include "graph/load/new_model_manager/task_info/label_goto_ex_task_info.h" +#include "graph/load/model_manager/task_info/label_goto_ex_task_info.h" -#include "graph/load/new_model_manager/davinci_model.h" +#include "graph/load/model_manager/davinci_model.h" #include "graph/debug/ge_attr_define.h" namespace ge { diff --git a/ge/graph/load/new_model_manager/task_info/label_goto_ex_task_info.h b/ge/graph/load/model_manager/task_info/label_goto_ex_task_info.h similarity index 95% rename from ge/graph/load/new_model_manager/task_info/label_goto_ex_task_info.h rename to ge/graph/load/model_manager/task_info/label_goto_ex_task_info.h index f83cd1d9..25310368 100755 --- a/ge/graph/load/new_model_manager/task_info/label_goto_ex_task_info.h +++ b/ge/graph/load/model_manager/task_info/label_goto_ex_task_info.h @@ -17,7 +17,7 @@ #ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_LABEL_GOTO_EX_TASK_INFO_H_ #define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_LABEL_GOTO_EX_TASK_INFO_H_ -#include "graph/load/new_model_manager/task_info/task_info.h" +#include "graph/load/model_manager/task_info/task_info.h" namespace ge { class LabelGotoExTaskInfo : public TaskInfo { diff --git a/ge/graph/load/new_model_manager/task_info/label_set_task_info.cc b/ge/graph/load/model_manager/task_info/label_set_task_info.cc similarity index 94% rename from ge/graph/load/new_model_manager/task_info/label_set_task_info.cc rename to ge/graph/load/model_manager/task_info/label_set_task_info.cc index 5fa96a96..45cb586a 100644 --- a/ge/graph/load/new_model_manager/task_info/label_set_task_info.cc +++ b/ge/graph/load/model_manager/task_info/label_set_task_info.cc @@ -14,9 +14,9 @@ * limitations under the License. */ -#include "graph/load/new_model_manager/task_info/label_set_task_info.h" +#include "graph/load/model_manager/task_info/label_set_task_info.h" -#include "graph/load/new_model_manager/davinci_model.h" +#include "graph/load/model_manager/davinci_model.h" #include "graph/debug/ge_attr_define.h" namespace ge { diff --git a/ge/graph/load/new_model_manager/task_info/label_set_task_info.h b/ge/graph/load/model_manager/task_info/label_set_task_info.h similarity index 94% rename from ge/graph/load/new_model_manager/task_info/label_set_task_info.h rename to ge/graph/load/model_manager/task_info/label_set_task_info.h index bb02ccf0..36e41f1b 100644 --- a/ge/graph/load/new_model_manager/task_info/label_set_task_info.h +++ b/ge/graph/load/model_manager/task_info/label_set_task_info.h @@ -17,7 +17,7 @@ #ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_LABEL_SET_TASK_INFO_H_ #define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_LABEL_SET_TASK_INFO_H_ -#include "graph/load/new_model_manager/task_info/task_info.h" +#include "graph/load/model_manager/task_info/task_info.h" namespace ge { class LabelSetTaskInfo : public TaskInfo { diff --git a/ge/graph/load/new_model_manager/task_info/label_switch_by_index_task_info.cc b/ge/graph/load/model_manager/task_info/label_switch_by_index_task_info.cc similarity index 97% rename from ge/graph/load/new_model_manager/task_info/label_switch_by_index_task_info.cc rename to ge/graph/load/model_manager/task_info/label_switch_by_index_task_info.cc index ae7865a4..c2997678 100644 --- a/ge/graph/load/new_model_manager/task_info/label_switch_by_index_task_info.cc +++ b/ge/graph/load/model_manager/task_info/label_switch_by_index_task_info.cc @@ -14,10 +14,10 @@ * limitations under the License. */ -#include "graph/load/new_model_manager/task_info/label_switch_by_index_task_info.h" +#include "graph/load/model_manager/task_info/label_switch_by_index_task_info.h" #include "graph/debug/ge_attr_define.h" -#include "graph/load/new_model_manager/davinci_model.h" +#include "graph/load/model_manager/davinci_model.h" namespace ge { constexpr uint8_t kLabelSwitchIndexNum = 1; diff --git a/ge/graph/load/new_model_manager/task_info/label_switch_by_index_task_info.h b/ge/graph/load/model_manager/task_info/label_switch_by_index_task_info.h similarity index 94% rename from ge/graph/load/new_model_manager/task_info/label_switch_by_index_task_info.h rename to ge/graph/load/model_manager/task_info/label_switch_by_index_task_info.h index 538b2d68..00ca0844 100644 --- a/ge/graph/load/new_model_manager/task_info/label_switch_by_index_task_info.h +++ b/ge/graph/load/model_manager/task_info/label_switch_by_index_task_info.h @@ -17,7 +17,7 @@ #ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_LABEL_SWITCH_BY_INDEX_TASK_INFO_H_ #define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_LABEL_SWITCH_BY_INDEX_TASK_INFO_H_ -#include "graph/load/new_model_manager/task_info/task_info.h" +#include "graph/load/model_manager/task_info/task_info.h" namespace ge { class LabelSwitchByIndexTaskInfo : public TaskInfo { diff --git a/ge/graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.cc b/ge/graph/load/model_manager/task_info/memcpy_addr_async_task_info.cc similarity index 96% rename from ge/graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.cc rename to ge/graph/load/model_manager/task_info/memcpy_addr_async_task_info.cc index b95705f0..a1f58e42 100755 --- a/ge/graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.cc +++ b/ge/graph/load/model_manager/task_info/memcpy_addr_async_task_info.cc @@ -14,10 +14,10 @@ * limitations under the License. */ -#include "graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.h" +#include "graph/load/model_manager/task_info/memcpy_addr_async_task_info.h" #include "framework/common/debug/ge_log.h" -#include "graph/load/new_model_manager/davinci_model.h" +#include "graph/load/model_manager/davinci_model.h" namespace { const uint32_t kAlignBytes = 64; diff --git a/ge/graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.h b/ge/graph/load/model_manager/task_info/memcpy_addr_async_task_info.h similarity index 96% rename from ge/graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.h rename to ge/graph/load/model_manager/task_info/memcpy_addr_async_task_info.h index c7645b9f..4631c67c 100644 --- a/ge/graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.h +++ b/ge/graph/load/model_manager/task_info/memcpy_addr_async_task_info.h @@ -17,7 +17,7 @@ #ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_MEMCPY_ADDR_ASYNC_TASK_INFO_H_ #define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_MEMCPY_ADDR_ASYNC_TASK_INFO_H_ -#include "graph/load/new_model_manager/task_info/task_info.h" +#include "graph/load/model_manager/task_info/task_info.h" namespace ge { class MemcpyAddrAsyncTaskInfo : public TaskInfo { diff --git a/ge/graph/load/new_model_manager/task_info/memcpy_async_task_info.cc b/ge/graph/load/model_manager/task_info/memcpy_async_task_info.cc similarity index 97% rename from ge/graph/load/new_model_manager/task_info/memcpy_async_task_info.cc rename to ge/graph/load/model_manager/task_info/memcpy_async_task_info.cc index fa320d81..22f9267d 100755 --- a/ge/graph/load/new_model_manager/task_info/memcpy_async_task_info.cc +++ b/ge/graph/load/model_manager/task_info/memcpy_async_task_info.cc @@ -14,10 +14,10 @@ * limitations under the License. */ -#include "graph/load/new_model_manager/task_info/memcpy_async_task_info.h" +#include "graph/load/model_manager/task_info/memcpy_async_task_info.h" #include "framework/common/debug/ge_log.h" -#include "graph/load/new_model_manager/davinci_model.h" +#include "graph/load/model_manager/davinci_model.h" namespace ge { Status MemcpyAsyncTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) { diff --git a/ge/graph/load/new_model_manager/task_info/memcpy_async_task_info.h b/ge/graph/load/model_manager/task_info/memcpy_async_task_info.h similarity index 96% rename from ge/graph/load/new_model_manager/task_info/memcpy_async_task_info.h rename to ge/graph/load/model_manager/task_info/memcpy_async_task_info.h index 43b5ba13..728305ff 100755 --- a/ge/graph/load/new_model_manager/task_info/memcpy_async_task_info.h +++ b/ge/graph/load/model_manager/task_info/memcpy_async_task_info.h @@ -17,7 +17,7 @@ #ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_MEMCPY_ASYNC_TASK_INFO_H_ #define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_MEMCPY_ASYNC_TASK_INFO_H_ -#include "graph/load/new_model_manager/task_info/task_info.h" +#include "graph/load/model_manager/task_info/task_info.h" #include "graph/op_desc.h" namespace ge { diff --git a/ge/graph/load/new_model_manager/task_info/model_exit_task_info.cc b/ge/graph/load/model_manager/task_info/model_exit_task_info.cc similarity index 93% rename from ge/graph/load/new_model_manager/task_info/model_exit_task_info.cc rename to ge/graph/load/model_manager/task_info/model_exit_task_info.cc index ff8057aa..eb200e3f 100644 --- a/ge/graph/load/new_model_manager/task_info/model_exit_task_info.cc +++ b/ge/graph/load/model_manager/task_info/model_exit_task_info.cc @@ -14,11 +14,11 @@ * limitations under the License. */ -#include "graph/load/new_model_manager/task_info/model_exit_task_info.h" +#include "graph/load/model_manager/task_info/model_exit_task_info.h" #include "common/properties_manager.h" #include "framework/common/debug/ge_log.h" -#include "graph/load/new_model_manager/davinci_model.h" +#include "graph/load/model_manager/davinci_model.h" namespace ge { Status ModelExitTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) { diff --git a/ge/graph/load/new_model_manager/task_info/model_exit_task_info.h b/ge/graph/load/model_manager/task_info/model_exit_task_info.h similarity index 94% rename from ge/graph/load/new_model_manager/task_info/model_exit_task_info.h rename to ge/graph/load/model_manager/task_info/model_exit_task_info.h index c219fcc8..1e4a3923 100644 --- a/ge/graph/load/new_model_manager/task_info/model_exit_task_info.h +++ b/ge/graph/load/model_manager/task_info/model_exit_task_info.h @@ -17,7 +17,7 @@ #ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_MODEL_EXIT_TASK_INFO_H_ #define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_MODEL_EXIT_TASK_INFO_H_ -#include "graph/load/new_model_manager/task_info/task_info.h" +#include "graph/load/model_manager/task_info/task_info.h" namespace ge { class ModelExitTaskInfo : public TaskInfo { diff --git a/ge/graph/load/new_model_manager/task_info/profiler_trace_task_info.cc b/ge/graph/load/model_manager/task_info/profiler_trace_task_info.cc similarity index 93% rename from ge/graph/load/new_model_manager/task_info/profiler_trace_task_info.cc rename to ge/graph/load/model_manager/task_info/profiler_trace_task_info.cc index 533c459a..b8fd1828 100755 --- a/ge/graph/load/new_model_manager/task_info/profiler_trace_task_info.cc +++ b/ge/graph/load/model_manager/task_info/profiler_trace_task_info.cc @@ -14,10 +14,10 @@ * limitations under the License. */ -#include "graph/load/new_model_manager/task_info/profiler_trace_task_info.h" +#include "graph/load/model_manager/task_info/profiler_trace_task_info.h" #include "framework/common/debug/ge_log.h" -#include "graph/load/new_model_manager/davinci_model.h" +#include "graph/load/model_manager/davinci_model.h" namespace ge { Status ProfilerTraceTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) { diff --git a/ge/graph/load/new_model_manager/task_info/profiler_trace_task_info.h b/ge/graph/load/model_manager/task_info/profiler_trace_task_info.h similarity index 95% rename from ge/graph/load/new_model_manager/task_info/profiler_trace_task_info.h rename to ge/graph/load/model_manager/task_info/profiler_trace_task_info.h index 8989096d..b57ebfae 100755 --- a/ge/graph/load/new_model_manager/task_info/profiler_trace_task_info.h +++ b/ge/graph/load/model_manager/task_info/profiler_trace_task_info.h @@ -16,7 +16,7 @@ #ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_PROFILER_TRACE_TASK_INFO_H_ #define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_PROFILER_TRACE_TASK_INFO_H_ -#include "graph/load/new_model_manager/task_info/task_info.h" +#include "graph/load/model_manager/task_info/task_info.h" namespace ge { class ProfilerTraceTaskInfo : public TaskInfo { diff --git a/ge/graph/load/new_model_manager/task_info/stream_active_task_info.cc b/ge/graph/load/model_manager/task_info/stream_active_task_info.cc similarity index 95% rename from ge/graph/load/new_model_manager/task_info/stream_active_task_info.cc rename to ge/graph/load/model_manager/task_info/stream_active_task_info.cc index 33ebea3b..ec807777 100755 --- a/ge/graph/load/new_model_manager/task_info/stream_active_task_info.cc +++ b/ge/graph/load/model_manager/task_info/stream_active_task_info.cc @@ -14,12 +14,12 @@ * limitations under the License. */ -#include "graph/load/new_model_manager/task_info/stream_active_task_info.h" +#include "graph/load/model_manager/task_info/stream_active_task_info.h" #include #include "framework/common/debug/ge_log.h" -#include "graph/load/new_model_manager/davinci_model.h" +#include "graph/load/model_manager/davinci_model.h" #include "graph/debug/ge_attr_define.h" namespace ge { diff --git a/ge/graph/load/new_model_manager/task_info/stream_active_task_info.h b/ge/graph/load/model_manager/task_info/stream_active_task_info.h similarity index 95% rename from ge/graph/load/new_model_manager/task_info/stream_active_task_info.h rename to ge/graph/load/model_manager/task_info/stream_active_task_info.h index c6b263b4..dfbf48d1 100755 --- a/ge/graph/load/new_model_manager/task_info/stream_active_task_info.h +++ b/ge/graph/load/model_manager/task_info/stream_active_task_info.h @@ -16,7 +16,7 @@ #ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_STREAM_ACTIVE_TASK_INFO_H_ #define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_STREAM_ACTIVE_TASK_INFO_H_ -#include "graph/load/new_model_manager/task_info/task_info.h" +#include "graph/load/model_manager/task_info/task_info.h" namespace ge { class StreamActiveTaskInfo : public TaskInfo { diff --git a/ge/graph/load/new_model_manager/task_info/stream_switch_task_info.cc b/ge/graph/load/model_manager/task_info/stream_switch_task_info.cc similarity index 97% rename from ge/graph/load/new_model_manager/task_info/stream_switch_task_info.cc rename to ge/graph/load/model_manager/task_info/stream_switch_task_info.cc index 616ba85f..f129950a 100644 --- a/ge/graph/load/new_model_manager/task_info/stream_switch_task_info.cc +++ b/ge/graph/load/model_manager/task_info/stream_switch_task_info.cc @@ -14,13 +14,13 @@ * limitations under the License. */ -#include "graph/load/new_model_manager/task_info/stream_switch_task_info.h" +#include "graph/load/model_manager/task_info/stream_switch_task_info.h" #include #include "framework/common/debug/ge_log.h" -#include "graph/load/new_model_manager/davinci_model.h" -#include "graph/load/new_model_manager/model_utils.h" +#include "graph/load/model_manager/davinci_model.h" +#include "graph/load/model_manager/model_utils.h" #include "graph/debug/ge_attr_define.h" namespace ge { diff --git a/ge/graph/load/new_model_manager/task_info/stream_switch_task_info.h b/ge/graph/load/model_manager/task_info/stream_switch_task_info.h similarity index 96% rename from ge/graph/load/new_model_manager/task_info/stream_switch_task_info.h rename to ge/graph/load/model_manager/task_info/stream_switch_task_info.h index a72d7de2..0e75e183 100755 --- a/ge/graph/load/new_model_manager/task_info/stream_switch_task_info.h +++ b/ge/graph/load/model_manager/task_info/stream_switch_task_info.h @@ -16,7 +16,7 @@ #ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_STREAM_SWITCH_TASK_INFO_H_ #define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_STREAM_SWITCH_TASK_INFO_H_ -#include "graph/load/new_model_manager/task_info/task_info.h" +#include "graph/load/model_manager/task_info/task_info.h" namespace ge { class StreamSwitchTaskInfo : public TaskInfo { diff --git a/ge/graph/load/new_model_manager/task_info/stream_switchn_task_info.cc b/ge/graph/load/model_manager/task_info/stream_switchn_task_info.cc similarity index 97% rename from ge/graph/load/new_model_manager/task_info/stream_switchn_task_info.cc rename to ge/graph/load/model_manager/task_info/stream_switchn_task_info.cc index 27adbbe4..35eb23e3 100755 --- a/ge/graph/load/new_model_manager/task_info/stream_switchn_task_info.cc +++ b/ge/graph/load/model_manager/task_info/stream_switchn_task_info.cc @@ -13,12 +13,12 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#include "graph/load/new_model_manager/task_info/stream_switchn_task_info.h" +#include "graph/load/model_manager/task_info/stream_switchn_task_info.h" #include #include "framework/common/debug/ge_log.h" #include "graph/debug/ge_attr_define.h" -#include "graph/load/new_model_manager/davinci_model.h" -#include "graph/load/new_model_manager/model_utils.h" +#include "graph/load/model_manager/davinci_model.h" +#include "graph/load/model_manager/model_utils.h" namespace { const uint8_t kStreamSwitchnInputNum = 1; diff --git a/ge/graph/load/new_model_manager/task_info/stream_switchn_task_info.h b/ge/graph/load/model_manager/task_info/stream_switchn_task_info.h similarity index 96% rename from ge/graph/load/new_model_manager/task_info/stream_switchn_task_info.h rename to ge/graph/load/model_manager/task_info/stream_switchn_task_info.h index 3d65a086..6e6ca190 100755 --- a/ge/graph/load/new_model_manager/task_info/stream_switchn_task_info.h +++ b/ge/graph/load/model_manager/task_info/stream_switchn_task_info.h @@ -17,7 +17,7 @@ #ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_STREAM_SWITCHN_TASK_INFO_H_ #define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_STREAM_SWITCHN_TASK_INFO_H_ -#include "graph/load/new_model_manager/task_info/task_info.h" +#include "graph/load/model_manager/task_info/task_info.h" #include "graph/op_desc.h" namespace ge { diff --git a/ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc b/ge/graph/load/model_manager/task_info/super_kernel/super_kernel.cc similarity index 100% rename from ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc rename to ge/graph/load/model_manager/task_info/super_kernel/super_kernel.cc diff --git a/ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel.h b/ge/graph/load/model_manager/task_info/super_kernel/super_kernel.h similarity index 100% rename from ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel.h rename to ge/graph/load/model_manager/task_info/super_kernel/super_kernel.h diff --git a/ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc b/ge/graph/load/model_manager/task_info/super_kernel/super_kernel_factory.cc similarity index 100% rename from ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc rename to ge/graph/load/model_manager/task_info/super_kernel/super_kernel_factory.cc diff --git a/ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.h b/ge/graph/load/model_manager/task_info/super_kernel/super_kernel_factory.h similarity index 100% rename from ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.h rename to ge/graph/load/model_manager/task_info/super_kernel/super_kernel_factory.h diff --git a/ge/graph/load/new_model_manager/task_info/task_info.cc b/ge/graph/load/model_manager/task_info/task_info.cc similarity index 94% rename from ge/graph/load/new_model_manager/task_info/task_info.cc rename to ge/graph/load/model_manager/task_info/task_info.cc index 674d477f..e521f95c 100755 --- a/ge/graph/load/new_model_manager/task_info/task_info.cc +++ b/ge/graph/load/model_manager/task_info/task_info.cc @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "graph/load/new_model_manager/task_info/task_info.h" +#include "graph/load/model_manager/task_info/task_info.h" #include diff --git a/ge/graph/load/new_model_manager/task_info/task_info.h b/ge/graph/load/model_manager/task_info/task_info.h similarity index 96% rename from ge/graph/load/new_model_manager/task_info/task_info.h rename to ge/graph/load/model_manager/task_info/task_info.h index 26f22564..99ec3c4e 100644 --- a/ge/graph/load/new_model_manager/task_info/task_info.h +++ b/ge/graph/load/model_manager/task_info/task_info.h @@ -22,8 +22,8 @@ #include "cce/customize.h" #include "framework/common/taskdown_common.h" #include "framework/common/ge_inner_error_codes.h" -#include "graph/load/new_model_manager/ts_mem_mall.h" -#include "graph/load/new_model_manager/task_info/task_info_factory.h" +#include "graph/load/model_manager/ts_mem_mall.h" +#include "graph/load/model_manager/task_info/task_info_factory.h" #include "proto/task.pb.h" namespace ge { diff --git a/ge/graph/load/new_model_manager/task_info/task_info_factory.h b/ge/graph/load/model_manager/task_info/task_info_factory.h similarity index 100% rename from ge/graph/load/new_model_manager/task_info/task_info_factory.h rename to ge/graph/load/model_manager/task_info/task_info_factory.h diff --git a/ge/graph/load/new_model_manager/tbe_handle_store.cc b/ge/graph/load/model_manager/tbe_handle_store.cc similarity index 100% rename from ge/graph/load/new_model_manager/tbe_handle_store.cc rename to ge/graph/load/model_manager/tbe_handle_store.cc diff --git a/ge/graph/load/new_model_manager/tbe_handle_store.h b/ge/graph/load/model_manager/tbe_handle_store.h similarity index 100% rename from ge/graph/load/new_model_manager/tbe_handle_store.h rename to ge/graph/load/model_manager/tbe_handle_store.h diff --git a/ge/graph/load/new_model_manager/ts_mem_mall.h b/ge/graph/load/model_manager/ts_mem_mall.h similarity index 100% rename from ge/graph/load/new_model_manager/ts_mem_mall.h rename to ge/graph/load/model_manager/ts_mem_mall.h diff --git a/ge/graph/load/new_model_manager/zero_copy_offset.cc b/ge/graph/load/model_manager/zero_copy_offset.cc similarity index 98% rename from ge/graph/load/new_model_manager/zero_copy_offset.cc rename to ge/graph/load/model_manager/zero_copy_offset.cc index f27d862d..3f8555bb 100644 --- a/ge/graph/load/new_model_manager/zero_copy_offset.cc +++ b/ge/graph/load/model_manager/zero_copy_offset.cc @@ -14,12 +14,12 @@ * limitations under the License. */ -#include "graph/load/new_model_manager/zero_copy_offset.h" +#include "graph/load/model_manager/zero_copy_offset.h" #include "framework/common/debug/ge_log.h" #include "framework/common/util.h" -#include "graph/load/new_model_manager/model_utils.h" -#include "graph/load/new_model_manager/zero_copy_task.h" +#include "graph/load/model_manager/model_utils.h" +#include "graph/load/model_manager/zero_copy_task.h" namespace ge { namespace { diff --git a/ge/graph/load/new_model_manager/zero_copy_offset.h b/ge/graph/load/model_manager/zero_copy_offset.h similarity index 98% rename from ge/graph/load/new_model_manager/zero_copy_offset.h rename to ge/graph/load/model_manager/zero_copy_offset.h index 66fcd887..fc63fced 100644 --- a/ge/graph/load/new_model_manager/zero_copy_offset.h +++ b/ge/graph/load/model_manager/zero_copy_offset.h @@ -25,7 +25,7 @@ #include "external/ge/ge_api_error_codes.h" #include "framework/common/ge_types.h" #include "graph/debug/ge_attr_define.h" -#include "graph/load/new_model_manager/zero_copy_task.h" +#include "graph/load/model_manager/zero_copy_task.h" #include "graph/utils/attr_utils.h" #include "graph/utils/tensor_utils.h" #include "runtime/mem.h" diff --git a/ge/graph/load/new_model_manager/zero_copy_task.cc b/ge/graph/load/model_manager/zero_copy_task.cc similarity index 97% rename from ge/graph/load/new_model_manager/zero_copy_task.cc rename to ge/graph/load/model_manager/zero_copy_task.cc index b938f14b..367de87a 100755 --- a/ge/graph/load/new_model_manager/zero_copy_task.cc +++ b/ge/graph/load/model_manager/zero_copy_task.cc @@ -14,11 +14,11 @@ * limitations under the License. */ -#include "graph/load/new_model_manager/zero_copy_task.h" +#include "graph/load/model_manager/zero_copy_task.h" #include "framework/common/debug/ge_log.h" #include "framework/common/util.h" -#include "graph/load/new_model_manager/model_utils.h" +#include "graph/load/model_manager/model_utils.h" #include "common/ge_compiler_options.h" namespace ge { diff --git a/ge/graph/load/new_model_manager/zero_copy_task.h b/ge/graph/load/model_manager/zero_copy_task.h similarity index 100% rename from ge/graph/load/new_model_manager/zero_copy_task.h rename to ge/graph/load/model_manager/zero_copy_task.h diff --git a/ge/hybrid/executor/hybrid_model_async_executor.cc b/ge/hybrid/executor/hybrid_model_async_executor.cc index 3673edf0..b7c6c33d 100644 --- a/ge/hybrid/executor/hybrid_model_async_executor.cc +++ b/ge/hybrid/executor/hybrid_model_async_executor.cc @@ -15,7 +15,7 @@ */ #include "hybrid/executor/hybrid_model_async_executor.h" -#include "graph/load/new_model_manager/model_utils.h" +#include "graph/load/model_manager/model_utils.h" #include "graph/utils/tensor_utils.h" #include "graph/utils/type_utils.h" #include "graph/ge_context.h" diff --git a/ge/hybrid/executor/hybrid_model_async_executor.h b/ge/hybrid/executor/hybrid_model_async_executor.h index 21d2d033..a69cc45f 100644 --- a/ge/hybrid/executor/hybrid_model_async_executor.h +++ b/ge/hybrid/executor/hybrid_model_async_executor.h @@ -21,7 +21,7 @@ #include #include "external/ge/ge_api_error_codes.h" #include "external/ge/ge_api_types.h" -#include "graph/load/new_model_manager/data_inputer.h" +#include "graph/load/model_manager/data_inputer.h" #include "hybrid/executor/hybrid_model_executor.h" #include "runtime/stream.h" diff --git a/ge/hybrid/executor/hybrid_model_executor.h b/ge/hybrid/executor/hybrid_model_executor.h index 6299d4ff..6b2e52b4 100644 --- a/ge/hybrid/executor/hybrid_model_executor.h +++ b/ge/hybrid/executor/hybrid_model_executor.h @@ -17,7 +17,7 @@ #ifndef GE_HYBRID_EXECUTOR_HYBRID_MODEL_EXECUTOR_H_ #define GE_HYBRID_EXECUTOR_HYBRID_MODEL_EXECUTOR_H_ #include "common/thread_pool.h" -#include "graph/load/new_model_manager/data_inputer.h" +#include "graph/load/model_manager/data_inputer.h" #include "hybrid/executor/hybrid_execution_context.h" #include "hybrid/executor/rt_callback_manager.h" #include "hybrid/executor/subgraph_executor.h" diff --git a/ge/hybrid/hybrid_davinci_model.h b/ge/hybrid/hybrid_davinci_model.h index 5349390c..369c732a 100644 --- a/ge/hybrid/hybrid_davinci_model.h +++ b/ge/hybrid/hybrid_davinci_model.h @@ -19,7 +19,7 @@ #include #include "external/ge/ge_api_error_codes.h" -#include "graph/load/new_model_manager/data_inputer.h" +#include "graph/load/model_manager/data_inputer.h" #include "model/ge_root_model.h" namespace ge { diff --git a/ge/hybrid/model/hybrid_model.cc b/ge/hybrid/model/hybrid_model.cc index 91b6a549..7e5d8fe5 100644 --- a/ge/hybrid/model/hybrid_model.cc +++ b/ge/hybrid/model/hybrid_model.cc @@ -17,7 +17,7 @@ #include "hybrid_model.h" #include #include "graph/debug/ge_attr_define.h" -#include "graph/load/new_model_manager/model_utils.h" +#include "graph/load/model_manager/model_utils.h" #include "graph/utils/graph_utils.h" #include "graph/utils/node_utils.h" #include "graph/utils/tensor_utils.h" diff --git a/ge/hybrid/model/hybrid_model.h b/ge/hybrid/model/hybrid_model.h index e521b776..72495cad 100644 --- a/ge/hybrid/model/hybrid_model.h +++ b/ge/hybrid/model/hybrid_model.h @@ -21,8 +21,8 @@ #include #include #include "framework/common/ge_inner_error_codes.h" -#include "graph/load/new_model_manager/data_inputer.h" -#include "graph/load/new_model_manager/task_info/task_info.h" +#include "graph/load/model_manager/data_inputer.h" +#include "graph/load/model_manager/task_info/task_info.h" #include "graph/node.h" #include "hybrid/common/tensor_value.h" #include "hybrid/model/node_item.h" diff --git a/ge/hybrid/model/hybrid_model_builder.cc b/ge/hybrid/model/hybrid_model_builder.cc index 7ee0bef7..861cd30a 100755 --- a/ge/hybrid/model/hybrid_model_builder.cc +++ b/ge/hybrid/model/hybrid_model_builder.cc @@ -20,8 +20,8 @@ #include "graph/ge_context.h" #include "graph/build/memory/var_mem_assign_util.h" #include "graph/debug/ge_attr_define.h" -#include "graph/load/new_model_manager/model_utils.h" -#include "graph/load/new_model_manager/model_manager.h" +#include "graph/load/model_manager/model_utils.h" +#include "graph/load/model_manager/model_manager.h" #include "graph/manager/graph_var_manager.h" #include "graph/manager/host_mem_manager.h" #include "graph/manager/trans_var_data_utils.h" diff --git a/ge/hybrid/model/hybrid_model_builder.h b/ge/hybrid/model/hybrid_model_builder.h index 55a19b6c..045bf3ef 100644 --- a/ge/hybrid/model/hybrid_model_builder.h +++ b/ge/hybrid/model/hybrid_model_builder.h @@ -21,7 +21,7 @@ #include #include #include "framework/common/ge_inner_error_codes.h" -#include "graph/load/new_model_manager/task_info/task_info.h" +#include "graph/load/model_manager/task_info/task_info.h" #include "graph/node.h" #include "hybrid/model/hybrid_model.h" #include "hybrid/model/node_item.h" diff --git a/ge/hybrid/node_executor/aicore/aicore_op_task.cc b/ge/hybrid/node_executor/aicore/aicore_op_task.cc index f61caf19..f1bd6466 100644 --- a/ge/hybrid/node_executor/aicore/aicore_op_task.cc +++ b/ge/hybrid/node_executor/aicore/aicore_op_task.cc @@ -19,7 +19,7 @@ #include "framework/common/debug/log.h" #include "hybrid/executor/hybrid_execution_context.h" #include "hybrid/node_executor/aicore/aicore_task_builder.h" -#include "graph/load/new_model_manager/tbe_handle_store.h" +#include "graph/load/model_manager/tbe_handle_store.h" #include "graph/types.h" using optiling::OpRunInfo; diff --git a/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc b/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc index 2a7cbc67..109939d9 100755 --- a/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc +++ b/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc @@ -18,7 +18,7 @@ #include "framework/common/taskdown_common.h" #include "common/formats/formats.h" #include "aicpu/common/aicpu_task_struct.h" -#include "graph/load/new_model_manager/model_manager.h" +#include "graph/load/model_manager/model_manager.h" #include "graph/utils/node_utils.h" #include "hybrid/executor/hybrid_execution_context.h" #include "hybrid/model/hybrid_model.h" diff --git a/ge/hybrid/node_executor/compiledsubgraph/known_node_executor.cc b/ge/hybrid/node_executor/compiledsubgraph/known_node_executor.cc index 7f2c6288..2bca3e06 100755 --- a/ge/hybrid/node_executor/compiledsubgraph/known_node_executor.cc +++ b/ge/hybrid/node_executor/compiledsubgraph/known_node_executor.cc @@ -21,8 +21,8 @@ #include "common/ge/ge_util.h" #include "graph/attr_value.h" #include "graph/debug/ge_attr_define.h" -#include "graph/load/new_model_manager/model_utils.h" -#include "graph/load/new_model_manager/model_manager.h" +#include "graph/load/model_manager/model_utils.h" +#include "graph/load/model_manager/model_manager.h" #include "hybrid/executor/hybrid_execution_context.h" namespace ge { diff --git a/ge/hybrid/node_executor/compiledsubgraph/known_node_executor.h b/ge/hybrid/node_executor/compiledsubgraph/known_node_executor.h index 2dde993b..6e9740ad 100644 --- a/ge/hybrid/node_executor/compiledsubgraph/known_node_executor.h +++ b/ge/hybrid/node_executor/compiledsubgraph/known_node_executor.h @@ -19,7 +19,7 @@ #include "hybrid/node_executor/node_executor.h" #include "hybrid/model/hybrid_model.h" #include "graph/op_desc.h" -#include "graph/load/new_model_manager/davinci_model.h" +#include "graph/load/model_manager/davinci_model.h" namespace ge { namespace hybrid { diff --git a/ge/init/gelib.cc b/ge/init/gelib.cc index b81632bd..1a97b6f8 100755 --- a/ge/init/gelib.cc +++ b/ge/init/gelib.cc @@ -37,7 +37,7 @@ #include "graph/common/ge_call_wrapper.h" #include "graph/ge_context.h" #include "graph/ge_global_options.h" -#include "graph/load/new_model_manager/model_manager.h" +#include "graph/load/model_manager/model_manager.h" #include "graph/manager/graph_mem_allocator.h" #include "graph/manager/host_mem_manager.h" #include "graph/manager/graph_var_manager.h" diff --git a/ge/session/inner_session.cc b/ge/session/inner_session.cc index c4f8a53b..5a67f7cd 100755 --- a/ge/session/inner_session.cc +++ b/ge/session/inner_session.cc @@ -29,7 +29,7 @@ #include "graph/ge_global_options.h" #include "graph/ge_local_context.h" #include "graph/common/local_context.h" -#include "graph/load/new_model_manager/model_manager.h" +#include "graph/load/model_manager/model_manager.h" #include "graph/manager/graph_var_manager.h" #include "graph/utils/tensor_adapter.h" #include "runtime/mem.h" diff --git a/ge/session/session_manager.cc b/ge/session/session_manager.cc index 5d5a299a..3c531747 100755 --- a/ge/session/session_manager.cc +++ b/ge/session/session_manager.cc @@ -20,7 +20,7 @@ #include "common/ge/ge_util.h" #include "framework/common/debug/ge_log.h" #include "graph/ge_context.h" -#include "graph/load/new_model_manager/model_manager.h" +#include "graph/load/model_manager/model_manager.h" #include "graph/manager/util/rt_context_util.h" using std::map; diff --git a/ge/single_op/single_op.cc b/ge/single_op/single_op.cc index 081ce13b..2fa7182b 100755 --- a/ge/single_op/single_op.cc +++ b/ge/single_op/single_op.cc @@ -22,11 +22,11 @@ #include "common/profiling/profiling_manager.h" #include "framework/common/debug/ge_log.h" #include "framework/common/util.h" -#include "graph/load/new_model_manager/model_utils.h" +#include "graph/load/model_manager/model_utils.h" #include "runtime/mem.h" #include "single_op/single_op_manager.h" #include "single_op/task/build_task_utils.h" -#include "graph/load/new_model_manager/model_manager.h" +#include "graph/load/model_manager/model_manager.h" namespace ge { namespace { diff --git a/ge/single_op/single_op_model.cc b/ge/single_op/single_op_model.cc index 2a1a14e6..220adde8 100755 --- a/ge/single_op/single_op_model.cc +++ b/ge/single_op/single_op_model.cc @@ -23,7 +23,7 @@ #include "framework/common/debug/ge_log.h" #include "graph/debug/ge_attr_define.h" -#include "graph/load/new_model_manager/model_utils.h" +#include "graph/load/model_manager/model_utils.h" #include "graph/utils/attr_utils.h" #include "graph/utils/graph_utils.h" #include "graph/utils/tensor_utils.h" diff --git a/ge/single_op/single_op_model.h b/ge/single_op/single_op_model.h index 6d0109fe..6637271c 100755 --- a/ge/single_op/single_op_model.h +++ b/ge/single_op/single_op_model.h @@ -24,7 +24,7 @@ #include #include "common/helper/model_helper.h" -#include "graph/load/new_model_manager/davinci_model_parser.h" +#include "graph/load/model_manager/davinci_model_parser.h" #include "single_op/single_op.h" #include "single_op/stream_resource.h" diff --git a/ge/single_op/task/aicpu_kernel_task_builder.cc b/ge/single_op/task/aicpu_kernel_task_builder.cc index 2a5f968f..6580ea31 100755 --- a/ge/single_op/task/aicpu_kernel_task_builder.cc +++ b/ge/single_op/task/aicpu_kernel_task_builder.cc @@ -16,7 +16,7 @@ #include "single_op/task/aicpu_kernel_task_builder.h" #include "framework/common/taskdown_common.h" -#include "graph/load/new_model_manager/model_manager.h" +#include "graph/load/model_manager/model_manager.h" #include "build_task_utils.h" namespace ge { diff --git a/ge/single_op/task/aicpu_task_builder.cc b/ge/single_op/task/aicpu_task_builder.cc index 1bfbcb3c..90ddc696 100755 --- a/ge/single_op/task/aicpu_task_builder.cc +++ b/ge/single_op/task/aicpu_task_builder.cc @@ -19,8 +19,8 @@ #include "single_op/task/build_task_utils.h" #include "runtime/mem.h" #include "framework/common/debug/ge_log.h" -#include "graph/load/new_model_manager/model_utils.h" -#include "graph/load/new_model_manager/model_manager.h" +#include "graph/load/model_manager/model_utils.h" +#include "graph/load/model_manager/model_manager.h" namespace ge { AiCpuTaskBuilder::AiCpuTaskBuilder(const OpDescPtr &op_desc, const domi::KernelExDef &kernel_def) diff --git a/ge/single_op/task/build_task_utils.cc b/ge/single_op/task/build_task_utils.cc index 071e514b..9e4d55e1 100644 --- a/ge/single_op/task/build_task_utils.cc +++ b/ge/single_op/task/build_task_utils.cc @@ -17,7 +17,7 @@ #include "single_op/task/build_task_utils.h" #include "runtime/rt.h" -#include "graph/load/new_model_manager/model_utils.h" +#include "graph/load/model_manager/model_utils.h" #include "graph/manager/graph_var_manager.h" #include "graph/utils/type_utils.h" #include "framework/common/debug/ge_log.h" diff --git a/ge/single_op/task/tbe_task_builder.cc b/ge/single_op/task/tbe_task_builder.cc index 594352aa..9ba30b8e 100644 --- a/ge/single_op/task/tbe_task_builder.cc +++ b/ge/single_op/task/tbe_task_builder.cc @@ -20,7 +20,7 @@ #include #include "graph/debug/ge_attr_define.h" -#include "graph/load/new_model_manager/model_utils.h" +#include "graph/load/model_manager/model_utils.h" #include "graph/manager/graph_var_manager.h" #include "runtime/rt.h" #include "single_op/task/build_task_utils.h" diff --git a/tests/ut/ge/CMakeLists.txt b/tests/ut/ge/CMakeLists.txt index 5979f5cf..dafb97e0 100755 --- a/tests/ut/ge/CMakeLists.txt +++ b/tests/ut/ge/CMakeLists.txt @@ -132,7 +132,7 @@ set(COMMON_SRC_FILES "${GE_CODE_DIR}/ge/opskernel_manager/ops_kernel_manager.cc" "${GE_CODE_DIR}/ge/session/session_manager.cc" "${GE_CODE_DIR}/ge/opskernel_manager/ops_kernel_builder_manager.cc" - "${GE_CODE_DIR}/ge/graph/load/new_model_manager/model_manager.cc" + "${GE_CODE_DIR}/ge/graph/load/model_manager/model_manager.cc" "${GE_CODE_DIR}/ge/common/profiling/profiling_manager.cc" "${GE_CODE_DIR}/ge/graph/manager/host_mem_manager.cc" "${GE_CODE_DIR}/ge/session/inner_session.cc" @@ -140,15 +140,15 @@ set(COMMON_SRC_FILES "${GE_CODE_DIR}/ge/graph/execute/graph_execute.cc" "${GE_CODE_DIR}/ge/graph/preprocess/graph_preprocess.cc" "${GE_CODE_DIR}/ge/hybrid/hybrid_davinci_model_stub.cc" - "${GE_CODE_DIR}/ge/graph/load/new_model_manager/davinci_model.cc" - "${GE_CODE_DIR}/ge/graph/load/new_model_manager/data_inputer.cc" + "${GE_CODE_DIR}/ge/graph/load/model_manager/davinci_model.cc" + "${GE_CODE_DIR}/ge/graph/load/model_manager/data_inputer.cc" "${GE_CODE_DIR}/ge/common/dump/dump_properties.cc" "${GE_CODE_DIR}/ge/common/helper/model_helper.cc" "${GE_CODE_DIR}/ge/common/dump/dump_manager.cc" "${GE_CODE_DIR}/ge/common/helper/om_file_helper.cc" "${GE_CODE_DIR}/ge/model/ge_root_model.cc" "${GE_CODE_DIR}/ge/common/model_parser/base.cc" - "${GE_CODE_DIR}/ge/graph/load/new_model_manager/data_dumper.cc" + "${GE_CODE_DIR}/ge/graph/load/model_manager/data_dumper.cc" "${GE_CODE_DIR}/ge/graph/manager/graph_manager.cc" "${GE_CODE_DIR}/ge/common/dump/dump_server.cc" "${GE_CODE_DIR}/ge/graph/preprocess/insert_op/util_insert_aipp_op.cc" @@ -254,13 +254,13 @@ set(COMMON_SRC_FILES "${GE_CODE_DIR}/ge/graph/passes/useless_control_out_remove_pass.cc" "${GE_CODE_DIR}/ge/model/ge_model.cc" "${GE_CODE_DIR}/ge/common/cust_aicpu_kernel_store.cc" - "${GE_CODE_DIR}/ge/graph/load/new_model_manager/model_utils.cc" - "${GE_CODE_DIR}/ge/graph/load/new_model_manager/zero_copy_offset.cc" - "${GE_CODE_DIR}/ge/graph/load/new_model_manager/zero_copy_task.cc" - "${GE_CODE_DIR}/ge/graph/load/new_model_manager/cpu_queue_schedule.cc" - "${GE_CODE_DIR}/ge/graph/load/new_model_manager/aipp_utils.cc" + "${GE_CODE_DIR}/ge/graph/load/model_manager/model_utils.cc" + "${GE_CODE_DIR}/ge/graph/load/model_manager/zero_copy_offset.cc" + "${GE_CODE_DIR}/ge/graph/load/model_manager/zero_copy_task.cc" + "${GE_CODE_DIR}/ge/graph/load/model_manager/cpu_queue_schedule.cc" + "${GE_CODE_DIR}/ge/graph/load/model_manager/aipp_utils.cc" "${GE_CODE_DIR}/ge/omm/csa_interact.cc" - "${GE_CODE_DIR}/ge/graph/load/new_model_manager/tbe_handle_store.cc" + "${GE_CODE_DIR}/ge/graph/load/model_manager/tbe_handle_store.cc" "${GE_CODE_DIR}/ge/common/kernel_store.cc" "${GE_CODE_DIR}/ge/common/tbe_kernel_store.cc" "${GE_CODE_DIR}/ge/common/auth/file_saver.cc" @@ -386,32 +386,32 @@ set(DISTINCT_GRAPH_LOAD_SRC_FILES "${GE_CODE_DIR}/ge/common/model_parser/base.cc" "${GE_CODE_DIR}/ge/common/tbe_kernel_store.cc" "${GE_CODE_DIR}/ge/common/util.cc" - "${GE_CODE_DIR}/ge/graph/load/new_model_manager/cpu_queue_schedule.cc" - "${GE_CODE_DIR}/ge/graph/load/new_model_manager/data_dumper.cc" - "${GE_CODE_DIR}/ge/graph/load/new_model_manager/data_inputer.cc" - "${GE_CODE_DIR}/ge/graph/load/new_model_manager/davinci_model.cc" - "${GE_CODE_DIR}/ge/graph/load/new_model_manager/davinci_model_parser.cc" - "${GE_CODE_DIR}/ge/graph/load/new_model_manager/model_manager.cc" - "${GE_CODE_DIR}/ge/graph/load/new_model_manager/model_utils.cc" - "${GE_CODE_DIR}/ge/graph/load/new_model_manager/tbe_handle_store.cc" - "${GE_CODE_DIR}/ge/graph/load/new_model_manager/task_info/task_info.cc" - "${GE_CODE_DIR}/ge/graph/load/new_model_manager/task_info/event_record_task_info.cc" - "${GE_CODE_DIR}/ge/graph/load/new_model_manager/task_info/event_wait_task_info.cc" - "${GE_CODE_DIR}/ge/graph/load/new_model_manager/task_info/fusion_start_task_info.cc" - "${GE_CODE_DIR}/ge/graph/load/new_model_manager/task_info/fusion_stop_task_info.cc" - "${GE_CODE_DIR}/ge/graph/load/new_model_manager/task_info/hccl_task_info.cc" - "${GE_CODE_DIR}/ge/graph/load/new_model_manager/task_info/kernel_ex_task_info.cc" - "${GE_CODE_DIR}/ge/graph/load/new_model_manager/task_info/kernel_task_info.cc" - "${GE_CODE_DIR}/ge/graph/load/new_model_manager/task_info/label_set_task_info.cc" - "${GE_CODE_DIR}/ge/graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.cc" - "${GE_CODE_DIR}/ge/graph/load/new_model_manager/task_info/memcpy_async_task_info.cc" - "${GE_CODE_DIR}/ge/graph/load/new_model_manager/task_info/profiler_trace_task_info.cc" - "${GE_CODE_DIR}/ge/graph/load/new_model_manager/task_info/stream_active_task_info.cc" - "${GE_CODE_DIR}/ge/graph/load/new_model_manager/task_info/stream_switch_task_info.cc" - "${GE_CODE_DIR}/ge/graph/load/new_model_manager/task_info/end_graph_task_info.cc" - "${GE_CODE_DIR}/ge/graph/load/new_model_manager/task_info/model_exit_task_info.cc" - "${GE_CODE_DIR}/ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc" - "${GE_CODE_DIR}/ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc" + "${GE_CODE_DIR}/ge/graph/load/model_manager/cpu_queue_schedule.cc" + "${GE_CODE_DIR}/ge/graph/load/model_manager/data_dumper.cc" + "${GE_CODE_DIR}/ge/graph/load/model_manager/data_inputer.cc" + "${GE_CODE_DIR}/ge/graph/load/model_manager/davinci_model.cc" + "${GE_CODE_DIR}/ge/graph/load/model_manager/davinci_model_parser.cc" + "${GE_CODE_DIR}/ge/graph/load/model_manager/model_manager.cc" + "${GE_CODE_DIR}/ge/graph/load/model_manager/model_utils.cc" + "${GE_CODE_DIR}/ge/graph/load/model_manager/tbe_handle_store.cc" + "${GE_CODE_DIR}/ge/graph/load/model_manager/task_info/task_info.cc" + "${GE_CODE_DIR}/ge/graph/load/model_manager/task_info/event_record_task_info.cc" + "${GE_CODE_DIR}/ge/graph/load/model_manager/task_info/event_wait_task_info.cc" + "${GE_CODE_DIR}/ge/graph/load/model_manager/task_info/fusion_start_task_info.cc" + "${GE_CODE_DIR}/ge/graph/load/model_manager/task_info/fusion_stop_task_info.cc" + "${GE_CODE_DIR}/ge/graph/load/model_manager/task_info/hccl_task_info.cc" + "${GE_CODE_DIR}/ge/graph/load/model_manager/task_info/kernel_ex_task_info.cc" + "${GE_CODE_DIR}/ge/graph/load/model_manager/task_info/kernel_task_info.cc" + "${GE_CODE_DIR}/ge/graph/load/model_manager/task_info/label_set_task_info.cc" + "${GE_CODE_DIR}/ge/graph/load/model_manager/task_info/memcpy_addr_async_task_info.cc" + "${GE_CODE_DIR}/ge/graph/load/model_manager/task_info/memcpy_async_task_info.cc" + "${GE_CODE_DIR}/ge/graph/load/model_manager/task_info/profiler_trace_task_info.cc" + "${GE_CODE_DIR}/ge/graph/load/model_manager/task_info/stream_active_task_info.cc" + "${GE_CODE_DIR}/ge/graph/load/model_manager/task_info/stream_switch_task_info.cc" + "${GE_CODE_DIR}/ge/graph/load/model_manager/task_info/end_graph_task_info.cc" + "${GE_CODE_DIR}/ge/graph/load/model_manager/task_info/model_exit_task_info.cc" + "${GE_CODE_DIR}/ge/graph/load/model_manager/task_info/super_kernel/super_kernel.cc" + "${GE_CODE_DIR}/ge/graph/load/model_manager/task_info/super_kernel/super_kernel_factory.cc" "${GE_CODE_DIR}/ge/model/ge_model.cc" "${GE_CODE_DIR}/ge/common/helper/om_file_helper.cc" "${GE_CODE_DIR}/ge/common/debug/memory_dumper.cc" diff --git a/tests/ut/ge/graph/ge_executor_unittest.cc b/tests/ut/ge/graph/ge_executor_unittest.cc index 3d04fd0c..3ef8a750 100644 --- a/tests/ut/ge/graph/ge_executor_unittest.cc +++ b/tests/ut/ge/graph/ge_executor_unittest.cc @@ -33,11 +33,11 @@ #include "common/properties_manager.h" #include "common/types.h" #include "graph/load/graph_loader.h" -#include "graph/load/new_model_manager/davinci_model.h" -#include "graph/load/new_model_manager/davinci_model_parser.h" -#include "graph/load/new_model_manager/model_manager.h" -#include "graph/load/new_model_manager/task_info/kernel_task_info.h" -#include "graph/load/new_model_manager/task_info/kernel_ex_task_info.h" +#include "graph/load/model_manager/davinci_model.h" +#include "graph/load/model_manager/davinci_model_parser.h" +#include "graph/load/model_manager/model_manager.h" +#include "graph/load/model_manager/task_info/kernel_task_info.h" +#include "graph/load/model_manager/task_info/kernel_ex_task_info.h" #include "ge/common/dump/dump_properties.h" #include "graph/manager/graph_mem_allocator.h" #include "graph/utils/graph_utils.h" diff --git a/tests/ut/ge/graph/graph_load_unittest.cc b/tests/ut/ge/graph/graph_load_unittest.cc index af9d5a37..54972af7 100644 --- a/tests/ut/ge/graph/graph_load_unittest.cc +++ b/tests/ut/ge/graph/graph_load_unittest.cc @@ -24,7 +24,7 @@ #include "common/helper/model_helper.h" #include "common/op/ge_op_utils.h" #include "common/types.h" -#include "graph/load/new_model_manager/davinci_model_parser.h" +#include "graph/load/model_manager/davinci_model_parser.h" #include "graph/op_desc.h" #include "graph/types.h" #include "graph/utils/attr_utils.h" @@ -35,7 +35,7 @@ #include "graph/load/graph_loader.h" #include "framework/common/ge_inner_error_codes.h" -#include "graph/load/new_model_manager/model_manager.h" +#include "graph/load/model_manager/model_manager.h" #include "graph/manager/graph_manager_utils.h" #include "model/ge_model.h" #undef private diff --git a/tests/ut/ge/graph/load/data_dumper_unittest.cc b/tests/ut/ge/graph/load/data_dumper_unittest.cc index e53b76f4..1866f4eb 100644 --- a/tests/ut/ge/graph/load/data_dumper_unittest.cc +++ b/tests/ut/ge/graph/load/data_dumper_unittest.cc @@ -18,8 +18,8 @@ #define private public #define protected public -#include "graph/load/new_model_manager/data_dumper.h" -#include "graph/load/new_model_manager/davinci_model.h" +#include "graph/load/model_manager/data_dumper.h" +#include "graph/load/model_manager/davinci_model.h" #undef private #undef protected diff --git a/tests/ut/ge/graph/load/davinci_model_unittest.cc b/tests/ut/ge/graph/load/davinci_model_unittest.cc index 0c03c934..35413a6b 100644 --- a/tests/ut/ge/graph/load/davinci_model_unittest.cc +++ b/tests/ut/ge/graph/load/davinci_model_unittest.cc @@ -20,7 +20,7 @@ #define protected public #include "graph/utils/graph_utils.h" #include "common/profiling/profiling_manager.h" -#include "graph/load/new_model_manager/davinci_model.h" +#include "graph/load/model_manager/davinci_model.h" using namespace std; diff --git a/tests/ut/ge/graph/load/end_graph_task_unittest.cc b/tests/ut/ge/graph/load/end_graph_task_unittest.cc index 29e7a53a..a66aaaff 100644 --- a/tests/ut/ge/graph/load/end_graph_task_unittest.cc +++ b/tests/ut/ge/graph/load/end_graph_task_unittest.cc @@ -18,8 +18,8 @@ #define private public #define protected public -#include "graph/load/new_model_manager/task_info/end_graph_task_info.h" -#include "graph/load/new_model_manager/davinci_model.h" +#include "graph/load/model_manager/task_info/end_graph_task_info.h" +#include "graph/load/model_manager/davinci_model.h" #undef private #undef protected diff --git a/tests/ut/ge/graph/load/hccl_task_info_unittest.cc b/tests/ut/ge/graph/load/hccl_task_info_unittest.cc index 5c056007..6a2468ee 100644 --- a/tests/ut/ge/graph/load/hccl_task_info_unittest.cc +++ b/tests/ut/ge/graph/load/hccl_task_info_unittest.cc @@ -19,8 +19,8 @@ #define private public #define protected public -#include "graph/load/new_model_manager/davinci_model.h" -#include "graph/load/new_model_manager/task_info/hccl_task_info.h" +#include "graph/load/model_manager/davinci_model.h" +#include "graph/load/model_manager/task_info/hccl_task_info.h" namespace ge { class UtestHcclTaskInfo : public testing::Test { diff --git a/tests/ut/ge/graph/load/kernel_ex_task_info_unittest.cc b/tests/ut/ge/graph/load/kernel_ex_task_info_unittest.cc index 443d2975..53436820 100644 --- a/tests/ut/ge/graph/load/kernel_ex_task_info_unittest.cc +++ b/tests/ut/ge/graph/load/kernel_ex_task_info_unittest.cc @@ -19,9 +19,9 @@ #define private public #define protected public -#include "graph/load/new_model_manager/davinci_model.h" +#include "graph/load/model_manager/davinci_model.h" -#include "graph/load/new_model_manager/task_info/kernel_ex_task_info.h" +#include "graph/load/model_manager/task_info/kernel_ex_task_info.h" #include "cce/aicpu_engine_struct.h" namespace ge { diff --git a/tests/ut/ge/graph/load/kernel_task_info_unittest.cc b/tests/ut/ge/graph/load/kernel_task_info_unittest.cc index fe886b49..a3a27a7b 100644 --- a/tests/ut/ge/graph/load/kernel_task_info_unittest.cc +++ b/tests/ut/ge/graph/load/kernel_task_info_unittest.cc @@ -19,9 +19,9 @@ #define private public #define protected public -#include "graph/load/new_model_manager/davinci_model.h" -#include "graph/load/new_model_manager/task_info/kernel_task_info.h" -#include "graph/load/new_model_manager/task_info/hccl_task_info.h" +#include "graph/load/model_manager/davinci_model.h" +#include "graph/load/model_manager/task_info/kernel_task_info.h" +#include "graph/load/model_manager/task_info/hccl_task_info.h" namespace ge { extern OpDescPtr CreateOpDesc(string name, string type); diff --git a/tests/ut/ge/graph/load/memcpy_addr_async_task_info_unittest.cc b/tests/ut/ge/graph/load/memcpy_addr_async_task_info_unittest.cc index 9348d49e..1652841d 100644 --- a/tests/ut/ge/graph/load/memcpy_addr_async_task_info_unittest.cc +++ b/tests/ut/ge/graph/load/memcpy_addr_async_task_info_unittest.cc @@ -19,8 +19,8 @@ #define private public #define protected public -#include "graph/load/new_model_manager/davinci_model.h" -#include "graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.h" +#include "graph/load/model_manager/davinci_model.h" +#include "graph/load/model_manager/task_info/memcpy_addr_async_task_info.h" namespace ge { class UtestMemcpyAddrAsyncTaskInfo : public testing::Test { diff --git a/tests/ut/ge/graph/load/memcpy_async_task_info_unittest.cc b/tests/ut/ge/graph/load/memcpy_async_task_info_unittest.cc index 8769ec39..afc04130 100644 --- a/tests/ut/ge/graph/load/memcpy_async_task_info_unittest.cc +++ b/tests/ut/ge/graph/load/memcpy_async_task_info_unittest.cc @@ -19,8 +19,8 @@ #define private public #define protected public -#include "graph/load/new_model_manager/davinci_model.h" -#include "graph/load/new_model_manager/task_info/memcpy_async_task_info.h" +#include "graph/load/model_manager/davinci_model.h" +#include "graph/load/model_manager/task_info/memcpy_async_task_info.h" namespace ge { diff --git a/tests/ut/ge/graph/load/model_utils_unittest.cc b/tests/ut/ge/graph/load/model_utils_unittest.cc index bd86c71e..ac886cea 100644 --- a/tests/ut/ge/graph/load/model_utils_unittest.cc +++ b/tests/ut/ge/graph/load/model_utils_unittest.cc @@ -17,7 +17,7 @@ #include #define protected public #define private public -#include "graph/load/new_model_manager/model_utils.h" +#include "graph/load/model_manager/model_utils.h" #include "graph/manager/graph_var_manager.h" using namespace std; diff --git a/tests/ut/ge/graph/load/new_model_manager_data_inputer_unittest.cc b/tests/ut/ge/graph/load/new_model_manager_data_inputer_unittest.cc index 56e673f7..43c2ad15 100644 --- a/tests/ut/ge/graph/load/new_model_manager_data_inputer_unittest.cc +++ b/tests/ut/ge/graph/load/new_model_manager_data_inputer_unittest.cc @@ -17,7 +17,7 @@ #include -#include "graph/load/new_model_manager/data_inputer.h" +#include "graph/load/model_manager/data_inputer.h" #include "common/debug/log.h" #include "common/debug/memory_dumper.h" diff --git a/tests/ut/ge/graph/load/new_model_manager_davinci_model_unittest.cc b/tests/ut/ge/graph/load/new_model_manager_davinci_model_unittest.cc index 00069930..38a250ad 100644 --- a/tests/ut/ge/graph/load/new_model_manager_davinci_model_unittest.cc +++ b/tests/ut/ge/graph/load/new_model_manager_davinci_model_unittest.cc @@ -24,29 +24,29 @@ #include "graph/compute_graph.h" #include "graph/utils/graph_utils.h" #include "graph/model_serialize.h" -#include "graph/load/new_model_manager/davinci_model.h" +#include "graph/load/model_manager/davinci_model.h" #include "common/properties_manager.h" #include "common/op/ge_op_utils.h" #include #include "runtime/dev.h" #include "runtime/kernel.h" #include "cce/fwk_adpt_struct.h" -#include "graph/load/new_model_manager/task_info/task_info_factory.h" -#include "graph/load/new_model_manager/task_info/task_info.h" -#include "graph/load/new_model_manager/task_info/stream_active_task_info.h" -#include "graph/load/new_model_manager/task_info/stream_switch_task_info.h" -#include "graph/load/new_model_manager/task_info/profiler_trace_task_info.h" -#include "graph/load/new_model_manager/task_info/memcpy_async_task_info.h" -#include "graph/load/new_model_manager/task_info/label_set_task_info.h" -#include "graph/load/new_model_manager/task_info/kernel_ex_task_info.h" -#include "graph/load/new_model_manager/task_info/kernel_task_info.h" -#include "graph/load/new_model_manager/task_info/hccl_task_info.h" -#include "graph/load/new_model_manager/task_info/fusion_start_task_info.h" -#include "graph/load/new_model_manager/task_info/fusion_stop_task_info.h" -#include "graph/load/new_model_manager/task_info/event_record_task_info.h" -#include "graph/load/new_model_manager/task_info/event_wait_task_info.h" +#include "graph/load/model_manager/task_info/task_info_factory.h" +#include "graph/load/model_manager/task_info/task_info.h" +#include "graph/load/model_manager/task_info/stream_active_task_info.h" +#include "graph/load/model_manager/task_info/stream_switch_task_info.h" +#include "graph/load/model_manager/task_info/profiler_trace_task_info.h" +#include "graph/load/model_manager/task_info/memcpy_async_task_info.h" +#include "graph/load/model_manager/task_info/label_set_task_info.h" +#include "graph/load/model_manager/task_info/kernel_ex_task_info.h" +#include "graph/load/model_manager/task_info/kernel_task_info.h" +#include "graph/load/model_manager/task_info/hccl_task_info.h" +#include "graph/load/model_manager/task_info/fusion_start_task_info.h" +#include "graph/load/model_manager/task_info/fusion_stop_task_info.h" +#include "graph/load/model_manager/task_info/event_record_task_info.h" +#include "graph/load/model_manager/task_info/event_wait_task_info.h" #include "graph/manager/graph_var_manager.h" -#include "graph/load/new_model_manager/model_manager.h" +#include "graph/load/model_manager/model_manager.h" #undef private #undef protected diff --git a/tests/ut/ge/graph/load/new_model_manager_model_manager_aicpu_unittest.cc b/tests/ut/ge/graph/load/new_model_manager_model_manager_aicpu_unittest.cc index 43e094b5..a68fb307 100644 --- a/tests/ut/ge/graph/load/new_model_manager_model_manager_aicpu_unittest.cc +++ b/tests/ut/ge/graph/load/new_model_manager_model_manager_aicpu_unittest.cc @@ -30,9 +30,9 @@ #include "common/helper/om_file_helper.h" #include "common/op/ge_op_utils.h" #include "graph/load/graph_loader.h" -#include "graph/load/new_model_manager/davinci_model.h" -#include "graph/load/new_model_manager/davinci_model_parser.h" -#include "graph/load/new_model_manager/model_manager.h" +#include "graph/load/model_manager/davinci_model.h" +#include "graph/load/model_manager/davinci_model_parser.h" +#include "graph/load/model_manager/model_manager.h" //#include "new_op_test_utils.h" #undef private #undef protected diff --git a/tests/ut/ge/graph/load/new_model_manager_model_manager_unittest.cc b/tests/ut/ge/graph/load/new_model_manager_model_manager_unittest.cc index 1c6e5a10..8750610a 100644 --- a/tests/ut/ge/graph/load/new_model_manager_model_manager_unittest.cc +++ b/tests/ut/ge/graph/load/new_model_manager_model_manager_unittest.cc @@ -25,13 +25,13 @@ #define private public #define protected public -#include "graph/load/new_model_manager/model_manager.h" +#include "graph/load/model_manager/model_manager.h" #include "common/helper/om_file_helper.h" #include "common/op/ge_op_utils.h" #include "graph/load/graph_loader.h" -#include "graph/load/new_model_manager/davinci_model.h" -#include "graph/load/new_model_manager/davinci_model_parser.h" +#include "graph/load/model_manager/davinci_model.h" +#include "graph/load/model_manager/davinci_model_parser.h" #include "new_op_test_utils.h" #undef private #undef protected diff --git a/tests/ut/ge/graph/load/new_model_manager_task_build_unittest.cc b/tests/ut/ge/graph/load/new_model_manager_task_build_unittest.cc index 620fac09..f10ccd7f 100644 --- a/tests/ut/ge/graph/load/new_model_manager_task_build_unittest.cc +++ b/tests/ut/ge/graph/load/new_model_manager_task_build_unittest.cc @@ -30,7 +30,7 @@ #include "graph/compute_graph.h" #include "graph/utils/graph_utils.h" #include "graph/model_serialize.h" -#include "graph/load/new_model_manager/davinci_model.h" +#include "graph/load/model_manager/davinci_model.h" #include "common/properties_manager.h" #include "common/op/ge_op_utils.h" #include diff --git a/tests/ut/ge/graph/load/new_op_test_utils.h b/tests/ut/ge/graph/load/new_op_test_utils.h index 4cbc78ac..984cbfb4 100644 --- a/tests/ut/ge/graph/load/new_op_test_utils.h +++ b/tests/ut/ge/graph/load/new_op_test_utils.h @@ -40,7 +40,7 @@ #define private public #include "graph/compute_graph.h" #include "graph/debug/ge_attr_define.h" -#include "graph/load/new_model_manager/davinci_model.h" +#include "graph/load/model_manager/davinci_model.h" #include "graph/node.h" #include "graph/op_desc.h" #include "graph/utils/attr_utils.h" diff --git a/tests/ut/ge/graph/load/output_net_output_unittest.cc b/tests/ut/ge/graph/load/output_net_output_unittest.cc index ecd28fe3..97246dad 100644 --- a/tests/ut/ge/graph/load/output_net_output_unittest.cc +++ b/tests/ut/ge/graph/load/output_net_output_unittest.cc @@ -23,8 +23,8 @@ #define private public #include "common/debug/memory_dumper.h" #include "common/op/ge_op_utils.h" -#include "graph/load/new_model_manager/davinci_model.h" -#include "graph/load/new_model_manager/model_utils.h" +#include "graph/load/model_manager/davinci_model.h" +#include "graph/load/model_manager/model_utils.h" #include "graph/manager/graph_var_manager.h" #include "new_op_test_utils.h" #include "proto/om.pb.h" diff --git a/tests/ut/ge/graph/load/tbe_handle_store_unittest.cc b/tests/ut/ge/graph/load/tbe_handle_store_unittest.cc index a98e14c6..82ffb388 100644 --- a/tests/ut/ge/graph/load/tbe_handle_store_unittest.cc +++ b/tests/ut/ge/graph/load/tbe_handle_store_unittest.cc @@ -18,7 +18,7 @@ #define protected public #define private public -#include "graph/load/new_model_manager/tbe_handle_store.h" +#include "graph/load/model_manager/tbe_handle_store.h" #include "runtime/kernel.h" #undef protected #undef private diff --git a/tests/ut/ge/single_op/single_op_model_unittest.cc b/tests/ut/ge/single_op/single_op_model_unittest.cc index b6b97d89..ab909e11 100644 --- a/tests/ut/ge/single_op/single_op_model_unittest.cc +++ b/tests/ut/ge/single_op/single_op_model_unittest.cc @@ -18,7 +18,7 @@ #include //#include "cce/taskdown_common.hpp" -#include "graph/load/new_model_manager/model_utils.h" +#include "graph/load/model_manager/model_utils.h" #include "graph/utils/graph_utils.h" #include "runtime/rt.h" From 53a1717ba1c13731f5e46e6ab8684a7b8051ba61 Mon Sep 17 00:00:00 2001 From: zhangxiaokun9 Date: Tue, 19 Jan 2021 21:38:32 +0800 Subject: [PATCH 39/41] Ignore model manager for UT --- build.sh | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/build.sh b/build.sh index 5222ab5c..561a7efc 100644 --- a/build.sh +++ b/build.sh @@ -235,14 +235,14 @@ if [[ "X$ENABLE_GE_UT" = "Xon" || "X$ENABLE_GE_COV" = "Xon" ]]; then # fi # if [[ "X$ENABLE_GE_COV" = "Xon" ]]; then - echo "Generating coverage statistics, please wait..." - cd ${BASEPATH} - rm -rf ${BASEPATH}/cov - mkdir ${BASEPATH}/cov - lcov -c -d build/tests/ut/ge -d build/tests/ut/common/graph/ -o cov/tmp.info - lcov --remove cov/tmp.info '*/output/*' '*/build/opensrc/*' '*/build/proto/*' '*/third_party/*' '*/tests/*' '/usr/local/*' -o cov/coverage.info - cd ${BASEPATH}/cov - genhtml coverage.info + echo "Generating coverage statistics, please wait..." + cd ${BASEPATH} + rm -rf ${BASEPATH}/cov + mkdir ${BASEPATH}/cov + lcov -c -d build/tests/ut/ge -d build/tests/ut/common/graph/ -o cov/tmp.info + lcov --remove cov/tmp.info '*/output/*' '*/build/opensrc/*' '*/build/proto/*' '*/third_party/*' '*/tests/*' '/usr/local/*' '*/model_manager/*' -o cov/coverage.info + cd ${BASEPATH}/cov + genhtml coverage.info fi # generate output package in tar form, including ut/st libraries/executables From 912338363e99a846b121f6e2bbd4d04d81e46a32 Mon Sep 17 00:00:00 2001 From: wxl Date: Tue, 19 Jan 2021 22:10:05 +0800 Subject: [PATCH 40/41] UpdateTiling pre-place --- ge/hybrid/executor/subgraph_executor.cc | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/ge/hybrid/executor/subgraph_executor.cc b/ge/hybrid/executor/subgraph_executor.cc index c4d866a9..f8f122b1 100644 --- a/ge/hybrid/executor/subgraph_executor.cc +++ b/ge/hybrid/executor/subgraph_executor.cc @@ -231,16 +231,16 @@ Status SubgraphExecutor::PrepareNodes() { } else { node_state->SetKernelTask(node_item.kernel_task); } + auto unique_task_context = TaskContext::Create(*node_state->GetNodeItem(), context_, subgraph_context_.get()); + GE_CHECK_NOTNULL(unique_task_context); + const auto &task = node_state->GetKernelTask(); + if (task == nullptr) { + GELOGE(INTERNAL_ERROR, "[%s] NodeTask is null.", node_state->GetName().c_str()); + return INTERNAL_ERROR; + } + auto shared_task_context = std::shared_ptr(unique_task_context.release()); + node_state->SetTaskContext(shared_task_context); } - auto unique_task_context = TaskContext::Create(*node_state->GetNodeItem(), context_, subgraph_context_.get()); - GE_CHECK_NOTNULL(unique_task_context); - const auto &task = node_state->GetKernelTask(); - if (task == nullptr) { - GELOGE(INTERNAL_ERROR, "[%s] NodeTask is null.", node_state->GetName().c_str()); - return INTERNAL_ERROR; - } - auto shared_task_context = std::shared_ptr(unique_task_context.release()); - node_state->SetTaskContext(shared_task_context); } if (!ready_queue_.Push(p_node_state)) { From fe3fc12aed85e11d692006d1e7e6d46bb7c05306 Mon Sep 17 00:00:00 2001 From: zhangxiaokun9 Date: Wed, 20 Jan 2021 09:15:42 +0800 Subject: [PATCH 41/41] Recover 'Remove files matching' for UT lcov --- build.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build.sh b/build.sh index 561a7efc..f2fafd48 100644 --- a/build.sh +++ b/build.sh @@ -240,7 +240,7 @@ if [[ "X$ENABLE_GE_UT" = "Xon" || "X$ENABLE_GE_COV" = "Xon" ]]; then rm -rf ${BASEPATH}/cov mkdir ${BASEPATH}/cov lcov -c -d build/tests/ut/ge -d build/tests/ut/common/graph/ -o cov/tmp.info - lcov --remove cov/tmp.info '*/output/*' '*/build/opensrc/*' '*/build/proto/*' '*/third_party/*' '*/tests/*' '/usr/local/*' '*/model_manager/*' -o cov/coverage.info + lcov -r cov/tmp.info '*/output/*' '*/build/opensrc/*' '*/build/proto/*' '*/third_party/*' '*/tests/*' '/usr/local/*' -o cov/coverage.info cd ${BASEPATH}/cov genhtml coverage.info fi