diff --git a/build.sh b/build.sh index a3a6f6af..f2fafd48 100644 --- a/build.sh +++ b/build.sh @@ -134,11 +134,7 @@ build_graphengine() mk_dir "${BUILD_PATH}" cd "${BUILD_PATH}" - if [[ "X$MINDSPORE_MODE" = "Xoff" ]]; then - CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_OPEN_SRC=True -DCMAKE_INSTALL_PREFIX=${OUTPUT_PATH} -DPLATFORM=${PLATFORM} -DPRODUCT=${PRODUCT}" - else - CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_D=ON -DCMAKE_INSTALL_PREFIX=${OUTPUT_PATH}" - fi + CMAKE_ARGS="-DBUILD_PATH=$BUILD_PATH" if [[ "X$ENABLE_GE_COV" = "Xon" ]]; then CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_GE_COV=ON" @@ -156,7 +152,13 @@ build_graphengine() if [[ "X$ENABLE_GITEE" = "Xon" ]]; then CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_GITEE=ON" fi - CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_OPEN_SRC=True -DCMAKE_INSTALL_PREFIX=${OUTPUT_PATH} -DPLATFORM=${PLATFORM} -DPRODUCT=${PRODUCT}" + + if [[ "X$MINDSPORE_MODE" = "Xoff" ]]; then + CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_OPEN_SRC=True -DCMAKE_INSTALL_PREFIX=${OUTPUT_PATH} -DPLATFORM=${PLATFORM} -DPRODUCT=${PRODUCT}" + else + CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_D=ON -DCMAKE_INSTALL_PREFIX=${OUTPUT_PATH}" + fi + echo "${CMAKE_ARGS}" cmake ${CMAKE_ARGS} .. if [ $? -ne 0 ] @@ -233,14 +235,14 @@ if [[ "X$ENABLE_GE_UT" = "Xon" || "X$ENABLE_GE_COV" = "Xon" ]]; then # fi # if [[ "X$ENABLE_GE_COV" = "Xon" ]]; then - echo "Generating coverage statistics, please wait..." - cd ${BASEPATH} - rm -rf ${BASEPATH}/cov - mkdir ${BASEPATH}/cov - lcov -c -d build/tests/ut/ge -d build/tests/ut/common/graph/ -o cov/tmp.info - lcov --remove cov/tmp.info '*/output/*' '*/build/opensrc/*' '*/build/proto/*' '*/third_party/*' '*/tests/*' '/usr/local/*' -o cov/coverage.info - cd ${BASEPATH}/cov - genhtml coverage.info + echo "Generating coverage statistics, please wait..." + cd ${BASEPATH} + rm -rf ${BASEPATH}/cov + mkdir ${BASEPATH}/cov + lcov -c -d build/tests/ut/ge -d build/tests/ut/common/graph/ -o cov/tmp.info + lcov -r cov/tmp.info '*/output/*' '*/build/opensrc/*' '*/build/proto/*' '*/third_party/*' '*/tests/*' '/usr/local/*' -o cov/coverage.info + cd ${BASEPATH}/cov + genhtml coverage.info fi # generate output package in tar form, including ut/st libraries/executables diff --git a/ge/CMakeLists.txt b/ge/CMakeLists.txt index 58b6a999..888f565c 100755 --- a/ge/CMakeLists.txt +++ b/ge/CMakeLists.txt @@ -35,6 +35,7 @@ protobuf_generate(ge PROTO_HEADER_SRCS PROTO_HEADER_HDRS ${PROTO_HEADER_LIST}) if (NOT ENABLE_D AND NOT ENABLE_ACL AND NOT ENABLE_MS_TESTCASES) ############ libge_proto_common.a ############ add_library(ge_proto_common STATIC + ${PROTO_HEADER_HDRS} ${PROTO_SRCS} ) @@ -55,6 +56,7 @@ target_link_libraries(ge_proto_common PRIVATE ############ libge_proto_client.a ############ add_library(ge_proto_client STATIC + ${PROTO_HEADER_HDRS} ${PROTO_CLIENT_SRCS} ) @@ -127,38 +129,38 @@ set(TRAIN_SRC_LIST "graph/label/partitioned_call_label_maker.cc" "graph/label/while_label_maker.cc" "graph/load/graph_loader.cc" - "graph/load/new_model_manager/cpu_queue_schedule.cc" - "graph/load/new_model_manager/data_dumper.cc" - "graph/load/new_model_manager/data_inputer.cc" - "graph/load/new_model_manager/davinci_model.cc" - "graph/load/new_model_manager/davinci_model_parser.cc" - "graph/load/new_model_manager/model_manager.cc" - "graph/load/new_model_manager/model_utils.cc" - "graph/load/new_model_manager/aipp_utils.cc" - "graph/load/new_model_manager/task_info/end_graph_task_info.cc" - "graph/load/new_model_manager/task_info/model_exit_task_info.cc" - "graph/load/new_model_manager/task_info/event_record_task_info.cc" - "graph/load/new_model_manager/task_info/event_wait_task_info.cc" - "graph/load/new_model_manager/task_info/fusion_start_task_info.cc" - "graph/load/new_model_manager/task_info/fusion_stop_task_info.cc" - "graph/load/new_model_manager/task_info/hccl_task_info.cc" - "graph/load/new_model_manager/task_info/kernel_ex_task_info.cc" - "graph/load/new_model_manager/task_info/kernel_task_info.cc" - "graph/load/new_model_manager/task_info/label_set_task_info.cc" - "graph/load/new_model_manager/task_info/label_switch_by_index_task_info.cc" - "graph/load/new_model_manager/task_info/label_goto_ex_task_info.cc" - "graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.cc" - "graph/load/new_model_manager/task_info/memcpy_async_task_info.cc" - "graph/load/new_model_manager/task_info/profiler_trace_task_info.cc" - "graph/load/new_model_manager/task_info/stream_active_task_info.cc" - "graph/load/new_model_manager/task_info/stream_switch_task_info.cc" - "graph/load/new_model_manager/task_info/stream_switchn_task_info.cc" - "graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc" - "graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc" - "graph/load/new_model_manager/task_info/task_info.cc" - "graph/load/new_model_manager/tbe_handle_store.cc" - "graph/load/new_model_manager/zero_copy_task.cc" - "graph/load/new_model_manager/zero_copy_offset.cc" + "graph/load/model_manager/cpu_queue_schedule.cc" + "graph/load/model_manager/data_dumper.cc" + "graph/load/model_manager/data_inputer.cc" + "graph/load/model_manager/davinci_model.cc" + "graph/load/model_manager/davinci_model_parser.cc" + "graph/load/model_manager/model_manager.cc" + "graph/load/model_manager/model_utils.cc" + "graph/load/model_manager/aipp_utils.cc" + "graph/load/model_manager/task_info/end_graph_task_info.cc" + "graph/load/model_manager/task_info/model_exit_task_info.cc" + "graph/load/model_manager/task_info/event_record_task_info.cc" + "graph/load/model_manager/task_info/event_wait_task_info.cc" + "graph/load/model_manager/task_info/fusion_start_task_info.cc" + "graph/load/model_manager/task_info/fusion_stop_task_info.cc" + "graph/load/model_manager/task_info/hccl_task_info.cc" + "graph/load/model_manager/task_info/kernel_ex_task_info.cc" + "graph/load/model_manager/task_info/kernel_task_info.cc" + "graph/load/model_manager/task_info/label_set_task_info.cc" + "graph/load/model_manager/task_info/label_switch_by_index_task_info.cc" + "graph/load/model_manager/task_info/label_goto_ex_task_info.cc" + "graph/load/model_manager/task_info/memcpy_addr_async_task_info.cc" + "graph/load/model_manager/task_info/memcpy_async_task_info.cc" + "graph/load/model_manager/task_info/profiler_trace_task_info.cc" + "graph/load/model_manager/task_info/stream_active_task_info.cc" + "graph/load/model_manager/task_info/stream_switch_task_info.cc" + "graph/load/model_manager/task_info/stream_switchn_task_info.cc" + "graph/load/model_manager/task_info/super_kernel/super_kernel.cc" + "graph/load/model_manager/task_info/super_kernel/super_kernel_factory.cc" + "graph/load/model_manager/task_info/task_info.cc" + "graph/load/model_manager/tbe_handle_store.cc" + "graph/load/model_manager/zero_copy_task.cc" + "graph/load/model_manager/zero_copy_offset.cc" "graph/manager/graph_context.cc" "graph/manager/graph_manager.cc" "graph/manager/graph_manager_utils.cc" @@ -200,6 +202,7 @@ set(TRAIN_SRC_LIST "graph/passes/compile_nodes_pass.cc" "graph/passes/constant_folding_pass.cc" "graph/passes/constant_fuse_same_pass.cc" + "graph/passes/fuse_data_nodes_with_common_input_pass.cc" "graph/passes/remove_same_const_pass.cc" "graph/passes/useless_control_out_remove_pass.cc" "graph/passes/control_trigger_pass.cc" @@ -372,6 +375,7 @@ set(TRAIN_SRC_LIST "hybrid/node_executor/host_cpu/kernel/variable_kernel.cc" "hybrid/node_executor/host_cpu/kernel/assign_kernel.cc" "hybrid/node_executor/host_cpu/kernel/random_uniform_kernel.cc" + "hybrid/node_executor/host_cpu/kernel/data_kernel.cc" "hybrid/node_executor/controlop/control_op_executor.cc" "hybrid/node_executor/partitioned_call/partitioned_call_node_executor.cc" "hybrid/node_executor/hccl/hccl_node_executor.cc" @@ -482,6 +486,7 @@ set(INFER_SRC_LIST "graph/passes/net_output_pass.cc" "graph/passes/replace_transshape_pass.cc" "graph/passes/constant_fuse_same_pass.cc" + "graph/passes/fuse_data_nodes_with_common_input_pass.cc" "graph/passes/print_op_pass.cc" "graph/passes/no_use_reshape_remove_pass.cc" "graph/passes/iterator_op_pass.cc" @@ -601,37 +606,37 @@ set(INFER_SRC_LIST "graph/manager/util/rt_context_util.cc" "graph/manager/util/variable_accelerate_ctrl.cc" "graph/manager/util/debug.cc" - "graph/load/new_model_manager/model_manager.cc" - "graph/load/new_model_manager/data_inputer.cc" - "graph/load/new_model_manager/davinci_model.cc" - "graph/load/new_model_manager/davinci_model_parser.cc" - "graph/load/new_model_manager/model_utils.cc" - "graph/load/new_model_manager/aipp_utils.cc" - "graph/load/new_model_manager/tbe_handle_store.cc" - "graph/load/new_model_manager/cpu_queue_schedule.cc" - "graph/load/new_model_manager/zero_copy_task.cc" - "graph/load/new_model_manager/zero_copy_offset.cc" - "graph/load/new_model_manager/data_dumper.cc" - "graph/load/new_model_manager/task_info/task_info.cc" - "graph/load/new_model_manager/task_info/event_record_task_info.cc" - "graph/load/new_model_manager/task_info/event_wait_task_info.cc" - "graph/load/new_model_manager/task_info/fusion_start_task_info.cc" - "graph/load/new_model_manager/task_info/fusion_stop_task_info.cc" - "graph/load/new_model_manager/task_info/kernel_ex_task_info.cc" - "graph/load/new_model_manager/task_info/kernel_task_info.cc" - "graph/load/new_model_manager/task_info/label_set_task_info.cc" - "graph/load/new_model_manager/task_info/label_switch_by_index_task_info.cc" - "graph/load/new_model_manager/task_info/label_goto_ex_task_info.cc" - "graph/load/new_model_manager/task_info/memcpy_async_task_info.cc" - "graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.cc" - "graph/load/new_model_manager/task_info/profiler_trace_task_info.cc" - "graph/load/new_model_manager/task_info/stream_active_task_info.cc" - "graph/load/new_model_manager/task_info/stream_switch_task_info.cc" - "graph/load/new_model_manager/task_info/stream_switchn_task_info.cc" - "graph/load/new_model_manager/task_info/end_graph_task_info.cc" - "graph/load/new_model_manager/task_info/model_exit_task_info.cc" - "graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc" - "graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc" + "graph/load/model_manager/model_manager.cc" + "graph/load/model_manager/data_inputer.cc" + "graph/load/model_manager/davinci_model.cc" + "graph/load/model_manager/davinci_model_parser.cc" + "graph/load/model_manager/model_utils.cc" + "graph/load/model_manager/aipp_utils.cc" + "graph/load/model_manager/tbe_handle_store.cc" + "graph/load/model_manager/cpu_queue_schedule.cc" + "graph/load/model_manager/zero_copy_task.cc" + "graph/load/model_manager/zero_copy_offset.cc" + "graph/load/model_manager/data_dumper.cc" + "graph/load/model_manager/task_info/task_info.cc" + "graph/load/model_manager/task_info/event_record_task_info.cc" + "graph/load/model_manager/task_info/event_wait_task_info.cc" + "graph/load/model_manager/task_info/fusion_start_task_info.cc" + "graph/load/model_manager/task_info/fusion_stop_task_info.cc" + "graph/load/model_manager/task_info/kernel_ex_task_info.cc" + "graph/load/model_manager/task_info/kernel_task_info.cc" + "graph/load/model_manager/task_info/label_set_task_info.cc" + "graph/load/model_manager/task_info/label_switch_by_index_task_info.cc" + "graph/load/model_manager/task_info/label_goto_ex_task_info.cc" + "graph/load/model_manager/task_info/memcpy_async_task_info.cc" + "graph/load/model_manager/task_info/memcpy_addr_async_task_info.cc" + "graph/load/model_manager/task_info/profiler_trace_task_info.cc" + "graph/load/model_manager/task_info/stream_active_task_info.cc" + "graph/load/model_manager/task_info/stream_switch_task_info.cc" + "graph/load/model_manager/task_info/stream_switchn_task_info.cc" + "graph/load/model_manager/task_info/end_graph_task_info.cc" + "graph/load/model_manager/task_info/model_exit_task_info.cc" + "graph/load/model_manager/task_info/super_kernel/super_kernel_factory.cc" + "graph/load/model_manager/task_info/super_kernel/super_kernel.cc" "single_op/task/op_task.cc" "single_op/task/build_task_utils.cc" "single_op/task/tbe_task_builder.cc" diff --git a/ge/common/CMakeLists.txt b/ge/common/CMakeLists.txt index aad85654..0172628c 100755 --- a/ge/common/CMakeLists.txt +++ b/ge/common/CMakeLists.txt @@ -187,6 +187,8 @@ target_compile_options(ge_common PRIVATE -fvisibility=hidden -O2 -Werror + -Wno-deprecated-declarations + -fno-common ) target_include_directories(ge_common PRIVATE diff --git a/ge/common/helper/model_cache_helper.cc b/ge/common/helper/model_cache_helper.cc index 0b592e11..7ec8cc0f 100755 --- a/ge/common/helper/model_cache_helper.cc +++ b/ge/common/helper/model_cache_helper.cc @@ -28,7 +28,7 @@ #include "framework/common/util.h" #include "graph/detail/attributes_holder.h" #include "graph/detail/model_serialize_imp.h" -#include "graph/load/new_model_manager/davinci_model_parser.h" +#include "graph/load/model_manager/davinci_model_parser.h" #include "graph/model.h" #include "graph/utils/graph_utils.h" #include "graph/utils/tensor_utils.h" diff --git a/ge/common/helper/model_helper.cc b/ge/common/helper/model_helper.cc index 1d5a4a9b..92f279be 100644 --- a/ge/common/helper/model_helper.cc +++ b/ge/common/helper/model_helper.cc @@ -23,7 +23,7 @@ #include "framework/common/debug/ge_log.h" #include "framework/omg/version.h" #include "graph/debug/ge_attr_define.h" -#include "graph/load/new_model_manager/davinci_model_parser.h" +#include "graph/load/model_manager/davinci_model_parser.h" #include "graph/utils/attr_utils.h" #include "graph/utils/graph_utils.h" diff --git a/ge/common/profiling/profiling_manager.cc b/ge/common/profiling/profiling_manager.cc index 1fc4dba6..32f0ee40 100644 --- a/ge/common/profiling/profiling_manager.cc +++ b/ge/common/profiling/profiling_manager.cc @@ -21,7 +21,7 @@ #include "framework/common/string_util.h" #include "graph/ge_context.h" #include "runtime/base.h" -#include "graph/load/new_model_manager/davinci_model.h" +#include "graph/load/model_manager/davinci_model.h" namespace { const char *const kTrainingTrace = "training_trace"; @@ -218,6 +218,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::Profilin uint32_t stream_id = task.stream_id; std::string shape_type = task.shape_type; int64_t cur_iter_num = task.cur_iter_num; + uint32_t task_type = task.task_type; data = model_name.append(" ") .append(op_name).append(" ") .append(std::to_string(block_dim)).append(" ") @@ -225,7 +226,8 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::Profilin .append(std::to_string(stream_id)).append(" ") .append(std::to_string(model_id)).append(" ") .append(shape_type).append(" ") - .append(std::to_string(cur_iter_num)).append("\n"); + .append(std::to_string(cur_iter_num)).append(" ") + .append(std::to_string(task_type)).append("\n"); ReporterData reporter_data{}; reporter_data.deviceId = device_id; diff --git a/ge/common/types.cc b/ge/common/types.cc index 268e7caa..90ff9fe4 100644 --- a/ge/common/types.cc +++ b/ge/common/types.cc @@ -388,6 +388,7 @@ REGISTER_OPTYPE_DEFINE(HCOMRECEIVE, "HcomReceive"); REGISTER_OPTYPE_DEFINE(HCOMREMOTEREAD, "HcomRemoteRead"); REGISTER_OPTYPE_DEFINE(HCOMREMOTEREFREAD, "HcomRemoteRefRead"); REGISTER_OPTYPE_DEFINE(HCOMREMOTEWRITE, "HcomRemoteWrite"); +REGISTER_OPTYPE_DEFINE(HCOMREMOTESCATTERWRITE, "HcomRemoteScatterWrite"); REGISTER_OPTYPE_DEFINE(VARASSIGN, "VarAssign"); REGISTER_OPTYPE_DEFINE(VARISINITIALIZEDOP, "VarIsInitializedOp"); diff --git a/ge/executor/CMakeLists.txt b/ge/executor/CMakeLists.txt index 755bdf97..26e53c7b 100644 --- a/ge/executor/CMakeLists.txt +++ b/ge/executor/CMakeLists.txt @@ -32,37 +32,37 @@ set(SRC_LIST "../hybrid/node_executor/aicpu/aicpu_ext_info.cc" "../model/ge_model.cc" "../model/ge_root_model.cc" - "../graph/load/new_model_manager/davinci_model.cc" - "../graph/load/new_model_manager/davinci_model_parser.cc" - "../graph/load/new_model_manager/model_manager.cc" - "../graph/load/new_model_manager/tbe_handle_store.cc" - "../graph/load/new_model_manager/cpu_queue_schedule.cc" - "../graph/load/new_model_manager/model_utils.cc" - "../graph/load/new_model_manager/aipp_utils.cc" - "../graph/load/new_model_manager/data_inputer.cc" - "../graph/load/new_model_manager/data_dumper.cc" - "../graph/load/new_model_manager/zero_copy_task.cc" - "../graph/load/new_model_manager/zero_copy_offset.cc" - "../graph/load/new_model_manager/task_info/task_info.cc" - "../graph/load/new_model_manager/task_info/event_record_task_info.cc" - "../graph/load/new_model_manager/task_info/event_wait_task_info.cc" - "../graph/load/new_model_manager/task_info/fusion_start_task_info.cc" - "../graph/load/new_model_manager/task_info/fusion_stop_task_info.cc" - "../graph/load/new_model_manager/task_info/kernel_ex_task_info.cc" - "../graph/load/new_model_manager/task_info/kernel_task_info.cc" - "../graph/load/new_model_manager/task_info/label_set_task_info.cc" - "../graph/load/new_model_manager/task_info/label_switch_by_index_task_info.cc" - "../graph/load/new_model_manager/task_info/label_goto_ex_task_info.cc" - "../graph/load/new_model_manager/task_info/memcpy_async_task_info.cc" - "../graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.cc" - "../graph/load/new_model_manager/task_info/profiler_trace_task_info.cc" - "../graph/load/new_model_manager/task_info/stream_active_task_info.cc" - "../graph/load/new_model_manager/task_info/stream_switch_task_info.cc" - "../graph/load/new_model_manager/task_info/stream_switchn_task_info.cc" - "../graph/load/new_model_manager/task_info/end_graph_task_info.cc" - "../graph/load/new_model_manager/task_info/model_exit_task_info.cc" - "../graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc" - "../graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc" + "../graph/load/model_manager/davinci_model.cc" + "../graph/load/model_manager/davinci_model_parser.cc" + "../graph/load/model_manager/model_manager.cc" + "../graph/load/model_manager/tbe_handle_store.cc" + "../graph/load/model_manager/cpu_queue_schedule.cc" + "../graph/load/model_manager/model_utils.cc" + "../graph/load/model_manager/aipp_utils.cc" + "../graph/load/model_manager/data_inputer.cc" + "../graph/load/model_manager/data_dumper.cc" + "../graph/load/model_manager/zero_copy_task.cc" + "../graph/load/model_manager/zero_copy_offset.cc" + "../graph/load/model_manager/task_info/task_info.cc" + "../graph/load/model_manager/task_info/event_record_task_info.cc" + "../graph/load/model_manager/task_info/event_wait_task_info.cc" + "../graph/load/model_manager/task_info/fusion_start_task_info.cc" + "../graph/load/model_manager/task_info/fusion_stop_task_info.cc" + "../graph/load/model_manager/task_info/kernel_ex_task_info.cc" + "../graph/load/model_manager/task_info/kernel_task_info.cc" + "../graph/load/model_manager/task_info/label_set_task_info.cc" + "../graph/load/model_manager/task_info/label_switch_by_index_task_info.cc" + "../graph/load/model_manager/task_info/label_goto_ex_task_info.cc" + "../graph/load/model_manager/task_info/memcpy_async_task_info.cc" + "../graph/load/model_manager/task_info/memcpy_addr_async_task_info.cc" + "../graph/load/model_manager/task_info/profiler_trace_task_info.cc" + "../graph/load/model_manager/task_info/stream_active_task_info.cc" + "../graph/load/model_manager/task_info/stream_switch_task_info.cc" + "../graph/load/model_manager/task_info/stream_switchn_task_info.cc" + "../graph/load/model_manager/task_info/end_graph_task_info.cc" + "../graph/load/model_manager/task_info/model_exit_task_info.cc" + "../graph/load/model_manager/task_info/super_kernel/super_kernel_factory.cc" + "../graph/load/model_manager/task_info/super_kernel/super_kernel.cc" "../graph/common/local_context.cc" "../opskernel_manager/ops_kernel_builder_manager.cc" "../single_op/single_op_manager.cc" @@ -104,6 +104,7 @@ set(SRC_LIST "../hybrid/node_executor/host_cpu/kernel/variable_kernel.cc" "../hybrid/node_executor/host_cpu/kernel/assign_kernel.cc" "../hybrid/node_executor/host_cpu/kernel/random_uniform_kernel.cc" + "../hybrid/node_executor/host_cpu/kernel/data_kernel.cc" "../hybrid/node_executor/controlop/control_op_executor.cc" "../hybrid/node_executor/partitioned_call/partitioned_call_node_executor.cc" "../hybrid/node_executor/rts/rts_node_executor.cc" diff --git a/ge/executor/ge_executor.cc b/ge/executor/ge_executor.cc index 0ea0e66d..b71a8be4 100755 --- a/ge/executor/ge_executor.cc +++ b/ge/executor/ge_executor.cc @@ -29,15 +29,15 @@ #include "framework/common/util.h" #include "graph/execute/graph_execute.h" #include "graph/load/graph_loader.h" -#include "graph/load/new_model_manager/davinci_model_parser.h" -#include "graph/load/new_model_manager/model_manager.h" +#include "graph/load/model_manager/davinci_model_parser.h" +#include "graph/load/model_manager/model_manager.h" #include "graph/manager/graph_mem_allocator.h" #include "graph/model.h" #include "graph/utils/graph_utils.h" #include "mmpa/mmpa_api.h" #include "single_op/single_op_manager.h" #include "graph/manager/graph_var_manager.h" -#include "graph/load/new_model_manager/davinci_model.h" +#include "graph/load/model_manager/davinci_model.h" #include "opskernel_manager/ops_kernel_builder_manager.h" using std::string; diff --git a/ge/executor/module.mk b/ge/executor/module.mk index 87abdade..4966eeb5 100644 --- a/ge/executor/module.mk +++ b/ge/executor/module.mk @@ -22,37 +22,37 @@ local_ge_executor_src_files := \ ../graph/manager/util/debug.cc \ ../model/ge_model.cc \ ../model/ge_root_model.cc \ - ../graph/load/new_model_manager/davinci_model.cc \ - ../graph/load/new_model_manager/davinci_model_parser.cc \ - ../graph/load/new_model_manager/model_manager.cc \ - ../graph/load/new_model_manager/tbe_handle_store.cc \ - ../graph/load/new_model_manager/cpu_queue_schedule.cc \ - ../graph/load/new_model_manager/model_utils.cc \ - ../graph/load/new_model_manager/aipp_utils.cc \ - ../graph/load/new_model_manager/data_inputer.cc \ - ../graph/load/new_model_manager/data_dumper.cc \ - ../graph/load/new_model_manager/zero_copy_task.cc \ - ../graph/load/new_model_manager/zero_copy_offset.cc \ - ../graph/load/new_model_manager/task_info/task_info.cc \ - ../graph/load/new_model_manager/task_info/event_record_task_info.cc \ - ../graph/load/new_model_manager/task_info/event_wait_task_info.cc \ - ../graph/load/new_model_manager/task_info/fusion_start_task_info.cc \ - ../graph/load/new_model_manager/task_info/fusion_stop_task_info.cc \ - ../graph/load/new_model_manager/task_info/kernel_ex_task_info.cc \ - ../graph/load/new_model_manager/task_info/kernel_task_info.cc \ - ../graph/load/new_model_manager/task_info/label_set_task_info.cc \ - ../graph/load/new_model_manager/task_info/label_switch_by_index_task_info.cc \ - ../graph/load/new_model_manager/task_info/label_goto_ex_task_info.cc \ - ../graph/load/new_model_manager/task_info/memcpy_async_task_info.cc \ - ../graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.cc \ - ../graph/load/new_model_manager/task_info/profiler_trace_task_info.cc \ - ../graph/load/new_model_manager/task_info/stream_active_task_info.cc \ - ../graph/load/new_model_manager/task_info/stream_switch_task_info.cc \ - ../graph/load/new_model_manager/task_info/stream_switchn_task_info.cc \ - ../graph/load/new_model_manager/task_info/end_graph_task_info.cc \ - ../graph/load/new_model_manager/task_info/model_exit_task_info.cc \ - ../graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc \ - ../graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc \ + ../graph/load/model_manager/davinci_model.cc \ + ../graph/load/model_manager/davinci_model_parser.cc \ + ../graph/load/model_manager/model_manager.cc \ + ../graph/load/model_manager/tbe_handle_store.cc \ + ../graph/load/model_manager/cpu_queue_schedule.cc \ + ../graph/load/model_manager/model_utils.cc \ + ../graph/load/model_manager/aipp_utils.cc \ + ../graph/load/model_manager/data_inputer.cc \ + ../graph/load/model_manager/data_dumper.cc \ + ../graph/load/model_manager/zero_copy_task.cc \ + ../graph/load/model_manager/zero_copy_offset.cc \ + ../graph/load/model_manager/task_info/task_info.cc \ + ../graph/load/model_manager/task_info/event_record_task_info.cc \ + ../graph/load/model_manager/task_info/event_wait_task_info.cc \ + ../graph/load/model_manager/task_info/fusion_start_task_info.cc \ + ../graph/load/model_manager/task_info/fusion_stop_task_info.cc \ + ../graph/load/model_manager/task_info/kernel_ex_task_info.cc \ + ../graph/load/model_manager/task_info/kernel_task_info.cc \ + ../graph/load/model_manager/task_info/label_set_task_info.cc \ + ../graph/load/model_manager/task_info/label_switch_by_index_task_info.cc \ + ../graph/load/model_manager/task_info/label_goto_ex_task_info.cc \ + ../graph/load/model_manager/task_info/memcpy_async_task_info.cc \ + ../graph/load/model_manager/task_info/memcpy_addr_async_task_info.cc \ + ../graph/load/model_manager/task_info/profiler_trace_task_info.cc \ + ../graph/load/model_manager/task_info/stream_active_task_info.cc \ + ../graph/load/model_manager/task_info/stream_switch_task_info.cc \ + ../graph/load/model_manager/task_info/stream_switchn_task_info.cc \ + ../graph/load/model_manager/task_info/end_graph_task_info.cc \ + ../graph/load/model_manager/task_info/model_exit_task_info.cc \ + ../graph/load/model_manager/task_info/super_kernel/super_kernel_factory.cc \ + ../graph/load/model_manager/task_info/super_kernel/super_kernel.cc \ ../opskernel_manager/ops_kernel_builder_manager.cc \ ../single_op/single_op_manager.cc \ ../single_op/single_op_model.cc \ @@ -95,6 +95,7 @@ local_ge_executor_src_files := \ ../hybrid/node_executor/host_cpu/kernel/variable_kernel.cc \ ../hybrid/node_executor/host_cpu/kernel/assign_kernel.cc \ ../hybrid/node_executor/host_cpu/kernel/random_uniform_kernel.cc \ + ../hybrid/node_executor/host_cpu/kernel/data_kernel.cc \ ../hybrid/node_executor/controlop/control_op_executor.cc \ ../hybrid/node_executor/partitioned_call/partitioned_call_node_executor.cc \ ../hybrid/node_executor/rts/rts_node_executor.cc \ diff --git a/ge/ge_inference.mk b/ge/ge_inference.mk index 74d09404..a20ff437 100755 --- a/ge/ge_inference.mk +++ b/ge/ge_inference.mk @@ -103,6 +103,7 @@ OMG_HOST_SRC_FILES := \ graph/passes/net_output_pass.cc \ graph/passes/replace_transshape_pass.cc \ graph/passes/constant_fuse_same_pass.cc \ + graph/passes/fuse_data_nodes_with_common_input_pass.cc \ graph/passes/print_op_pass.cc \ graph/passes/no_use_reshape_remove_pass.cc \ graph/passes/iterator_op_pass.cc \ @@ -227,37 +228,37 @@ OME_HOST_SRC_FILES := \ graph/manager/util/rt_context_util.cc \ graph/manager/util/variable_accelerate_ctrl.cc \ graph/manager/util/debug.cc \ - graph/load/new_model_manager/model_manager.cc \ - graph/load/new_model_manager/data_inputer.cc \ - graph/load/new_model_manager/davinci_model.cc \ - graph/load/new_model_manager/davinci_model_parser.cc \ - graph/load/new_model_manager/model_utils.cc \ - graph/load/new_model_manager/aipp_utils.cc \ - graph/load/new_model_manager/tbe_handle_store.cc \ - graph/load/new_model_manager/cpu_queue_schedule.cc \ - graph/load/new_model_manager/zero_copy_task.cc \ - graph/load/new_model_manager/zero_copy_offset.cc \ - graph/load/new_model_manager/data_dumper.cc \ - graph/load/new_model_manager/task_info/task_info.cc \ - graph/load/new_model_manager/task_info/event_record_task_info.cc \ - graph/load/new_model_manager/task_info/event_wait_task_info.cc \ - graph/load/new_model_manager/task_info/fusion_start_task_info.cc \ - graph/load/new_model_manager/task_info/fusion_stop_task_info.cc \ - graph/load/new_model_manager/task_info/kernel_ex_task_info.cc \ - graph/load/new_model_manager/task_info/kernel_task_info.cc \ - graph/load/new_model_manager/task_info/label_set_task_info.cc \ - graph/load/new_model_manager/task_info/label_switch_by_index_task_info.cc \ - graph/load/new_model_manager/task_info/label_goto_ex_task_info.cc \ - graph/load/new_model_manager/task_info/memcpy_async_task_info.cc \ - graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.cc \ - graph/load/new_model_manager/task_info/profiler_trace_task_info.cc \ - graph/load/new_model_manager/task_info/stream_active_task_info.cc \ - graph/load/new_model_manager/task_info/stream_switch_task_info.cc \ - graph/load/new_model_manager/task_info/stream_switchn_task_info.cc \ - graph/load/new_model_manager/task_info/end_graph_task_info.cc \ - graph/load/new_model_manager/task_info/model_exit_task_info.cc \ - graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc \ - graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc \ + graph/load/model_manager/model_manager.cc \ + graph/load/model_manager/data_inputer.cc \ + graph/load/model_manager/davinci_model.cc \ + graph/load/model_manager/davinci_model_parser.cc \ + graph/load/model_manager/model_utils.cc \ + graph/load/model_manager/aipp_utils.cc \ + graph/load/model_manager/tbe_handle_store.cc \ + graph/load/model_manager/cpu_queue_schedule.cc \ + graph/load/model_manager/zero_copy_task.cc \ + graph/load/model_manager/zero_copy_offset.cc \ + graph/load/model_manager/data_dumper.cc \ + graph/load/model_manager/task_info/task_info.cc \ + graph/load/model_manager/task_info/event_record_task_info.cc \ + graph/load/model_manager/task_info/event_wait_task_info.cc \ + graph/load/model_manager/task_info/fusion_start_task_info.cc \ + graph/load/model_manager/task_info/fusion_stop_task_info.cc \ + graph/load/model_manager/task_info/kernel_ex_task_info.cc \ + graph/load/model_manager/task_info/kernel_task_info.cc \ + graph/load/model_manager/task_info/label_set_task_info.cc \ + graph/load/model_manager/task_info/label_switch_by_index_task_info.cc \ + graph/load/model_manager/task_info/label_goto_ex_task_info.cc \ + graph/load/model_manager/task_info/memcpy_async_task_info.cc \ + graph/load/model_manager/task_info/memcpy_addr_async_task_info.cc \ + graph/load/model_manager/task_info/profiler_trace_task_info.cc \ + graph/load/model_manager/task_info/stream_active_task_info.cc \ + graph/load/model_manager/task_info/stream_switch_task_info.cc \ + graph/load/model_manager/task_info/stream_switchn_task_info.cc \ + graph/load/model_manager/task_info/end_graph_task_info.cc \ + graph/load/model_manager/task_info/model_exit_task_info.cc \ + graph/load/model_manager/task_info/super_kernel/super_kernel_factory.cc \ + graph/load/model_manager/task_info/super_kernel/super_kernel.cc \ single_op/task/op_task.cc \ single_op/task/build_task_utils.cc \ single_op/task/tbe_task_builder.cc \ @@ -269,7 +270,7 @@ OME_HOST_SRC_FILES := \ single_op/single_op_manager.cc \ hybrid/hybrid_davinci_model_stub.cc \ hybrid/node_executor/aicpu/aicpu_ext_info.cc \ - # graph/load/new_model_manager/task_info/hccl_task_info.cc + # graph/load/model_manager/task_info/hccl_task_info.cc OME_DEVICE_SRC_FILES := $(OME_HOST_SRC_FILES) diff --git a/ge/ge_runner.mk b/ge/ge_runner.mk index 5a99dc8c..4434dc2b 100644 --- a/ge/ge_runner.mk +++ b/ge/ge_runner.mk @@ -54,38 +54,38 @@ LIBGE_LOCAL_SRC_FILES := \ graph/label/partitioned_call_label_maker.cc \ graph/label/while_label_maker.cc \ graph/load/graph_loader.cc \ - graph/load/new_model_manager/cpu_queue_schedule.cc \ - graph/load/new_model_manager/data_dumper.cc \ - graph/load/new_model_manager/data_inputer.cc \ - graph/load/new_model_manager/davinci_model.cc \ - graph/load/new_model_manager/davinci_model_parser.cc \ - graph/load/new_model_manager/model_manager.cc \ - graph/load/new_model_manager/model_utils.cc \ - graph/load/new_model_manager/aipp_utils.cc \ - graph/load/new_model_manager/task_info/end_graph_task_info.cc \ - graph/load/new_model_manager/task_info/model_exit_task_info.cc \ - graph/load/new_model_manager/task_info/event_record_task_info.cc \ - graph/load/new_model_manager/task_info/event_wait_task_info.cc \ - graph/load/new_model_manager/task_info/fusion_start_task_info.cc \ - graph/load/new_model_manager/task_info/fusion_stop_task_info.cc \ - graph/load/new_model_manager/task_info/hccl_task_info.cc \ - graph/load/new_model_manager/task_info/kernel_ex_task_info.cc \ - graph/load/new_model_manager/task_info/kernel_task_info.cc \ - graph/load/new_model_manager/task_info/label_set_task_info.cc \ - graph/load/new_model_manager/task_info/label_switch_by_index_task_info.cc \ - graph/load/new_model_manager/task_info/label_goto_ex_task_info.cc \ - graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.cc \ - graph/load/new_model_manager/task_info/memcpy_async_task_info.cc \ - graph/load/new_model_manager/task_info/profiler_trace_task_info.cc \ - graph/load/new_model_manager/task_info/stream_active_task_info.cc \ - graph/load/new_model_manager/task_info/stream_switch_task_info.cc \ - graph/load/new_model_manager/task_info/stream_switchn_task_info.cc \ - graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc \ - graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc \ - graph/load/new_model_manager/task_info/task_info.cc \ - graph/load/new_model_manager/tbe_handle_store.cc \ - graph/load/new_model_manager/zero_copy_task.cc \ - graph/load/new_model_manager/zero_copy_offset.cc \ + graph/load/model_manager/cpu_queue_schedule.cc \ + graph/load/model_manager/data_dumper.cc \ + graph/load/model_manager/data_inputer.cc \ + graph/load/model_manager/davinci_model.cc \ + graph/load/model_manager/davinci_model_parser.cc \ + graph/load/model_manager/model_manager.cc \ + graph/load/model_manager/model_utils.cc \ + graph/load/model_manager/aipp_utils.cc \ + graph/load/model_manager/task_info/end_graph_task_info.cc \ + graph/load/model_manager/task_info/model_exit_task_info.cc \ + graph/load/model_manager/task_info/event_record_task_info.cc \ + graph/load/model_manager/task_info/event_wait_task_info.cc \ + graph/load/model_manager/task_info/fusion_start_task_info.cc \ + graph/load/model_manager/task_info/fusion_stop_task_info.cc \ + graph/load/model_manager/task_info/hccl_task_info.cc \ + graph/load/model_manager/task_info/kernel_ex_task_info.cc \ + graph/load/model_manager/task_info/kernel_task_info.cc \ + graph/load/model_manager/task_info/label_set_task_info.cc \ + graph/load/model_manager/task_info/label_switch_by_index_task_info.cc \ + graph/load/model_manager/task_info/label_goto_ex_task_info.cc \ + graph/load/model_manager/task_info/memcpy_addr_async_task_info.cc \ + graph/load/model_manager/task_info/memcpy_async_task_info.cc \ + graph/load/model_manager/task_info/profiler_trace_task_info.cc \ + graph/load/model_manager/task_info/stream_active_task_info.cc \ + graph/load/model_manager/task_info/stream_switch_task_info.cc \ + graph/load/model_manager/task_info/stream_switchn_task_info.cc \ + graph/load/model_manager/task_info/super_kernel/super_kernel.cc \ + graph/load/model_manager/task_info/super_kernel/super_kernel_factory.cc \ + graph/load/model_manager/task_info/task_info.cc \ + graph/load/model_manager/tbe_handle_store.cc \ + graph/load/model_manager/zero_copy_task.cc \ + graph/load/model_manager/zero_copy_offset.cc \ graph/manager/graph_context.cc \ graph/manager/graph_manager.cc \ graph/manager/graph_manager_utils.cc \ @@ -127,6 +127,7 @@ LIBGE_LOCAL_SRC_FILES := \ graph/passes/compile_nodes_pass.cc \ graph/passes/constant_folding_pass.cc \ graph/passes/constant_fuse_same_pass.cc \ + graph/passes/fuse_data_nodes_with_common_input_pass.cc \ graph/passes/remove_same_const_pass.cc \ graph/passes/useless_control_out_remove_pass.cc \ graph/passes/control_trigger_pass.cc \ @@ -299,6 +300,7 @@ LIBGE_LOCAL_SRC_FILES := \ hybrid/node_executor/host_cpu/kernel/variable_kernel.cc \ hybrid/node_executor/host_cpu/kernel/assign_kernel.cc \ hybrid/node_executor/host_cpu/kernel/random_uniform_kernel.cc \ + hybrid/node_executor/host_cpu/kernel/data_kernel.cc \ hybrid/node_executor/controlop/control_op_executor.cc \ hybrid/node_executor/partitioned_call/partitioned_call_node_executor.cc \ hybrid/node_executor/hccl/hccl_node_executor.cc \ diff --git a/ge/ge_runtime/CMakeLists.txt b/ge/ge_runtime/CMakeLists.txt index ded8fd39..56b5ab41 100644 --- a/ge/ge_runtime/CMakeLists.txt +++ b/ge/ge_runtime/CMakeLists.txt @@ -23,6 +23,8 @@ add_library(ge_runtime SHARED ${GE_SRC_LIST}) target_compile_options(ge_runtime PRIVATE -Werror -O2 + -Wno-deprecated-declarations + -fno-common ) target_compile_definitions(ge_runtime PRIVATE diff --git a/ge/graph/build/graph_builder.cc b/ge/graph/build/graph_builder.cc index ed77a7f1..7b09cbc6 100644 --- a/ge/graph/build/graph_builder.cc +++ b/ge/graph/build/graph_builder.cc @@ -187,8 +187,7 @@ Status GraphBuilder::UpdateParentNodeOutputSize(const ge::ComputeGraphPtr &graph return SUCCESS; } -Status GraphBuilder::Build(ComputeGraphPtr &comp_graph, std::vector &subgraph_ptr_list, - GeRootModelPtr &ge_root_model_ptr, uint64_t session_id) { +Status GraphBuilder::Build(ComputeGraphPtr &comp_graph, GeRootModelPtr &ge_root_model_ptr, uint64_t session_id) { if (comp_graph == nullptr) { GELOGE(GE_GRAPH_PARAM_NULLPTR, "Graph build comp_graph is null."); return GE_GRAPH_PARAM_NULLPTR; @@ -203,18 +202,18 @@ Status GraphBuilder::Build(ComputeGraphPtr &comp_graph, std::vectorGetGraphUnknownFlag()) { GE_CHK_STATUS_RET( - BuildForDynamicShapeGraph(comp_graph, subgraph_ptr_list, ge_root_model_ptr, ge_model_ptr, session_id), + BuildForDynamicShapeGraph(comp_graph, ge_root_model_ptr, ge_model_ptr, session_id), "Build for dynamic shape graph failed."); return SUCCESS; } - GE_CHK_STATUS_RET(BuildForKnownShapeGraph(comp_graph, subgraph_ptr_list, ge_model_ptr, session_id), + GE_CHK_STATUS_RET(BuildForKnownShapeGraph(comp_graph, ge_model_ptr, session_id), "Build for known shape graph failed."); ge_root_model_ptr->SetSubgraphInstanceNameToModel(comp_graph->GetName(), ge_model_ptr); return SUCCESS; } -Status GraphBuilder::BuildForKnownShapeGraph(ComputeGraphPtr &comp_graph, std::vector &subgraph_list, +Status GraphBuilder::BuildForKnownShapeGraph(ComputeGraphPtr &comp_graph, GeModelPtr &ge_model_ptr, uint64_t session_id) { if (ge::GetContext().GetHostExecFlag()) { GE_CHK_STATUS_RET(BuildForHostCpuGraph(comp_graph, ge_model_ptr, session_id), "Build for host-cpu graph failed."); @@ -222,7 +221,7 @@ Status GraphBuilder::BuildForKnownShapeGraph(ComputeGraphPtr &comp_graph, std::v } GELOGI("Begin to build known shape graph[%s].", comp_graph->GetName().c_str()); - Status ret = SecondPartition(comp_graph, subgraph_list); + Status ret = SecondPartition(comp_graph); GE_CHK_STATUS_RET(ret, "Graph[%s] second partition Failed.", comp_graph->GetName().c_str()); auto subgraph_map = graph_partitioner_.GetSubGraphMap(); @@ -470,7 +469,6 @@ Status GraphBuilder::MarkFpBpProfilingTaskAttr(ComputeGraphPtr &com_graph) { } Status GraphBuilder::BuildForDynamicShapeGraph(ComputeGraphPtr &comp_graph, - std::vector &subgraph_ptr_list, GeRootModelPtr &ge_root_model_ptr, GeModelPtr &ge_model_ptr, uint64_t session_id) { GELOGI("Start to build BuildForDynamicShape for dynamic shape."); @@ -517,7 +515,7 @@ Status GraphBuilder::BuildForDynamicShapeGraph(ComputeGraphPtr &comp_graph, } } // known shape build flow - GE_CHK_STATUS_RET(BuildForKnownShapeGraph(sub_graph, subgraph_ptr_list, ge_model_ptr, session_id), + GE_CHK_STATUS_RET(BuildForKnownShapeGraph(sub_graph, ge_model_ptr, session_id), "Build for known shape graph failed."); } ge_root_model_ptr->SetSubgraphInstanceNameToModel(sub_graph->GetName(), ge_model_ptr); @@ -719,7 +717,7 @@ Status GraphBuilder::CalcDynShapeRootGraphDataSize(const ge::OpDescPtr &op_desc) return SUCCESS; } -Status GraphBuilder::SecondPartition(ge::ComputeGraphPtr &comp_graph, vector &subgraph_ptr_list) { +Status GraphBuilder::SecondPartition(ge::ComputeGraphPtr &comp_graph) { GE_TIMESTAMP_START(GraphPartition2); auto ret = graph_partitioner_.Partition(comp_graph, GraphPartitioner::kSecondPartitioning); if (ret != SUCCESS) { @@ -727,10 +725,8 @@ Status GraphBuilder::SecondPartition(ge::ComputeGraphPtr &comp_graph, vector &subgraph_ptr_list, - GeRootModelPtr &ge_model_ptr, uint64_t session_id = INVALID_SESSION_ID); + Status Build(ComputeGraphPtr &comp_graph, GeRootModelPtr &ge_model_ptr, uint64_t session_id = INVALID_SESSION_ID); void SetOptions(const GraphManagerOptions &options); private: @@ -59,12 +58,12 @@ class GraphBuilder { Status UpdateDataInputSize(const ge::NodePtr &node_ptr); Status UpdateParentNodeOutputSize(const ge::ComputeGraphPtr &graph, ge::NodePtr &parent_node_ptr); Status CalcDynShapeRootGraphDataSize(const ge::OpDescPtr &op_desc); - Status SecondPartition(ge::ComputeGraphPtr &comp_graph, vector &subgraph_ptr_list); + Status SecondPartition(ge::ComputeGraphPtr &comp_graph); Status MarkFpBpProfilingTaskAttr(ComputeGraphPtr &com_graph); - Status BuildForDynamicShapeGraph(ComputeGraphPtr &comp_graph, std::vector &subgraph_ptr_list, + Status BuildForDynamicShapeGraph(ComputeGraphPtr &comp_graph, GeRootModelPtr &ge_root_model_ptr, GeModelPtr &ge_model_ptr, uint64_t session_id = INVALID_SESSION_ID); - Status BuildForKnownShapeGraph(ComputeGraphPtr &comp_graph, std::vector &subgraph_list, + Status BuildForKnownShapeGraph(ComputeGraphPtr &comp_graph, GeModelPtr &ge_model_ptr, uint64_t session_id = INVALID_SESSION_ID); Status BuildForUnknownShapeGraph(ComputeGraphPtr &comp_graph, GeModelPtr &ge_model_ptr, uint64_t session_id = INVALID_SESSION_ID); diff --git a/ge/graph/build/memory/block_mem_assigner.cc b/ge/graph/build/memory/block_mem_assigner.cc index 76e7efbe..a523ce3f 100755 --- a/ge/graph/build/memory/block_mem_assigner.cc +++ b/ge/graph/build/memory/block_mem_assigner.cc @@ -24,6 +24,7 @@ #include "graph/buffer.h" #include "graph/ge_attr_value.h" #include "graph/ge_context.h" +#include "graph/types.h" #include "graph/node.h" #include "graph/utils/graph_utils.h" #include "graph/utils/node_utils.h" @@ -1401,6 +1402,7 @@ Status BlockMemAssigner::AssignOutputMemoryWithReuse(const NodePtr &node, vector if (output_op_desc != nullptr) { GE_IF_BOOL_EXEC(ge::TensorUtils::GetSize(*output_op_desc, size) != SUCCESS, GELOGI("Get size failed")); } + // fusion: other type's size not means malloc HBM memory bool l1_flag = has_mem_type_attr && memorys_type[i] == RT_MEMORY_L1; if (l1_flag) { @@ -1408,6 +1410,11 @@ Status BlockMemAssigner::AssignOutputMemoryWithReuse(const NodePtr &node, vector op_desc->GetName().c_str(), op_desc->GetOutputNameByIndex(i).c_str(), memorys_type[i]); size = 0; } + + int32_t calc_type = 0; + bool ret = ge::AttrUtils::GetInt(output_op_desc, ATTR_NAME_MEMORY_SIZE_CALC_TYPE, calc_type); + GE_IF_BOOL_EXEC((ret && (calc_type == static_cast(ge::MemorySizeCalcType::ALWAYS_EMPTY))), size = 0;); + std::string peer_name; uint32_t peer_input_index = 0; bool out_node_set_continuous_input = false; diff --git a/ge/graph/build/memory/var_mem_assign_util.cc b/ge/graph/build/memory/var_mem_assign_util.cc index 639bfaa0..dfc633af 100755 --- a/ge/graph/build/memory/var_mem_assign_util.cc +++ b/ge/graph/build/memory/var_mem_assign_util.cc @@ -60,9 +60,14 @@ Status VarMemAssignUtil::AssignStaticMemory2Node(ge::ComputeGraphPtr &compute_gr return FAILED); ge::ConstGeTensorDescPtr tensor_desc = n->GetOpDesc()->GetOutputDescPtr(0); GE_CHECK_NOTNULL(tensor_desc); + rtMemType_t memory_type = RT_MEMORY_HBM; + uint32_t mem_type = 0; + if (AttrUtils::GetInt(n->GetOpDesc(), ATTR_OUTPUT_MEMORY_TYPE, mem_type) && (mem_type == 1)) { + memory_type = RT_MEMORY_RDMA_HBM; + } if (!VarManager::Instance(compute_graph->GetSessionID())->IsVarExist(node_name, *tensor_desc)) { GE_CHK_STATUS_RET( - VarManager::Instance(compute_graph->GetSessionID())->AssignVarMem(node_name, *tensor_desc, RT_MEMORY_HBM)); + VarManager::Instance(compute_graph->GetSessionID())->AssignVarMem(node_name, *tensor_desc, memory_type)); GE_IF_BOOL_EXEC(n->GetType() == VARIABLE, GE_CHK_STATUS_RET(AssignData2Fp32Var(n, compute_graph->GetSessionID()))); GE_CHK_STATUS_RET(VarManager::Instance(compute_graph->GetSessionID()) @@ -70,7 +75,6 @@ Status VarMemAssignUtil::AssignStaticMemory2Node(ge::ComputeGraphPtr &compute_gr } uint8_t *dev_ptr = nullptr; - rtMemType_t memory_type = RT_MEMORY_HBM; GE_CHK_STATUS_RET(VarManager::Instance(compute_graph->GetSessionID()) ->GetVarAddr(node_name, *tensor_desc, &dev_ptr, memory_type)); vector output_list = n->GetOpDesc()->GetOutputOffset(); diff --git a/ge/graph/build/stream_allocator.cc b/ge/graph/build/stream_allocator.cc index 63112ea8..88ffda02 100644 --- a/ge/graph/build/stream_allocator.cc +++ b/ge/graph/build/stream_allocator.cc @@ -1013,6 +1013,24 @@ bool StreamAllocator::IsActivated(int64_t stream_id) const { return false; } +// Iteraotor loop : +// StreamSwitch -> StreamActive +// FpBp loop: +// StreamSwitch -> AssignAdd -> StreamActive +NodePtr FindSwitchNodeBeforeLoopActiveNode(const NodePtr &active_node) { + for (auto pre_node : active_node->GetInControlNodes()) { + if (pre_node->GetType() == STREAMSWITCH) { + return pre_node; + } + for (auto pre_pre_node : pre_node->GetInControlNodes()) { + if (pre_pre_node->GetType() == STREAMSWITCH) { + return pre_pre_node; + } + } + } + return nullptr; +} + Status StreamAllocator::SetActiveStreamsForLoop() { vector loop_active_streams; for (int64_t stream_id = 0; stream_id < stream_num_; stream_id++) { @@ -1038,6 +1056,13 @@ Status StreamAllocator::SetActiveStreamsForLoop() { bool is_loop_active = false; if (AttrUtils::GetBool(node->GetOpDesc(), ATTR_NAME_IS_LOOP_ACTIVE, is_loop_active) && is_loop_active) { vector activated_label_list; + + NodePtr pre_switch_node = FindSwitchNodeBeforeLoopActiveNode(node); + if (pre_switch_node == nullptr) { + GELOGE(FAILED, "find switch node before loop active node %s failed", node->GetName().c_str()); + return FAILED; + } + if (!AttrUtils::GetListStr(node->GetOpDesc(), ATTR_NAME_ACTIVE_LABEL_LIST, activated_label_list) || activated_label_list.empty()) { GE_CHK_BOOL_EXEC(AttrUtils::SetListInt(node->GetOpDesc(), ATTR_NAME_ACTIVE_STREAM_LIST, loop_active_streams), @@ -1053,7 +1078,7 @@ Status StreamAllocator::SetActiveStreamsForLoop() { // it may cause some stream actived by iterator next step when this stream still alive. // If above situation happen, active message will lose, cause process block in next iteration. // In order to avoid this abnormal happen, - // add event between each last node and iterator active node in target active stream + // add event between each last node and iterator switch node GELOGI("there are %zu next iterator target streams has streamswitch node.", streams_skip_iterator_event.size()); for (auto iter : stream_id_to_last_node) { if (streams_skip_iterator_event.find(iter.first) != streams_skip_iterator_event.end()) { @@ -1067,7 +1092,7 @@ Status StreamAllocator::SetActiveStreamsForLoop() { continue; } AddSendEventId(iter.second, event_num_); - AddRecvEventId(node, event_num_); + AddRecvEventId(pre_switch_node, event_num_); event_num_++; } diff --git a/ge/graph/execute/graph_execute.cc b/ge/graph/execute/graph_execute.cc index 3c5618e8..79c22a29 100755 --- a/ge/graph/execute/graph_execute.cc +++ b/ge/graph/execute/graph_execute.cc @@ -21,7 +21,7 @@ #include "common/ge_inner_error_codes.h" #include "common/model_parser/base.h" -#include "graph/load/new_model_manager/model_manager.h" +#include "graph/load/model_manager/model_manager.h" #include "omm/csa_interact.h" #include "runtime/dev.h" #include "runtime/mem.h" diff --git a/ge/graph/load/graph_loader.cc b/ge/graph/load/graph_loader.cc index 6272e581..29afc939 100755 --- a/ge/graph/load/graph_loader.cc +++ b/ge/graph/load/graph_loader.cc @@ -22,8 +22,8 @@ #include "common/helper/model_helper.h" #include "common/util.h" #include "graph/ge_context.h" -#include "graph/load/new_model_manager/davinci_model_parser.h" -#include "graph/load/new_model_manager/model_manager.h" +#include "graph/load/model_manager/davinci_model_parser.h" +#include "graph/load/model_manager/model_manager.h" #include "graph/manager/graph_var_manager.h" #include "omm/csa_interact.h" #include "runtime/dev.h" diff --git a/ge/graph/load/new_model_manager/aipp_utils.cc b/ge/graph/load/model_manager/aipp_utils.cc similarity index 98% rename from ge/graph/load/new_model_manager/aipp_utils.cc rename to ge/graph/load/model_manager/aipp_utils.cc index e0e60d2b..8a18c421 100755 --- a/ge/graph/load/new_model_manager/aipp_utils.cc +++ b/ge/graph/load/model_manager/aipp_utils.cc @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "graph/load/new_model_manager/aipp_utils.h" +#include "graph/load/model_manager/aipp_utils.h" #include diff --git a/ge/graph/load/new_model_manager/aipp_utils.h b/ge/graph/load/model_manager/aipp_utils.h similarity index 100% rename from ge/graph/load/new_model_manager/aipp_utils.h rename to ge/graph/load/model_manager/aipp_utils.h diff --git a/ge/graph/load/new_model_manager/cpu_queue_schedule.cc b/ge/graph/load/model_manager/cpu_queue_schedule.cc similarity index 99% rename from ge/graph/load/new_model_manager/cpu_queue_schedule.cc rename to ge/graph/load/model_manager/cpu_queue_schedule.cc index 430321bd..d9b716ea 100644 --- a/ge/graph/load/new_model_manager/cpu_queue_schedule.cc +++ b/ge/graph/load/model_manager/cpu_queue_schedule.cc @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "graph/load/new_model_manager/cpu_queue_schedule.h" +#include "graph/load/model_manager/cpu_queue_schedule.h" #include "common/debug/ge_log.h" #include "common/debug/log.h" diff --git a/ge/graph/load/new_model_manager/cpu_queue_schedule.h b/ge/graph/load/model_manager/cpu_queue_schedule.h similarity index 97% rename from ge/graph/load/new_model_manager/cpu_queue_schedule.h rename to ge/graph/load/model_manager/cpu_queue_schedule.h index 8999e975..de4c5327 100644 --- a/ge/graph/load/new_model_manager/cpu_queue_schedule.h +++ b/ge/graph/load/model_manager/cpu_queue_schedule.h @@ -20,8 +20,8 @@ #include #include "common/ge_inner_error_codes.h" -#include "graph/load/new_model_manager/task_info/task_info.h" -#include "graph/load/new_model_manager/zero_copy_offset.h" +#include "graph/load/model_manager/task_info/task_info.h" +#include "graph/load/model_manager/zero_copy_offset.h" #include "runtime/kernel.h" namespace ge { diff --git a/ge/graph/load/new_model_manager/data_dumper.cc b/ge/graph/load/model_manager/data_dumper.cc similarity index 99% rename from ge/graph/load/new_model_manager/data_dumper.cc rename to ge/graph/load/model_manager/data_dumper.cc index a12a2b2a..947aac1d 100644 --- a/ge/graph/load/new_model_manager/data_dumper.cc +++ b/ge/graph/load/model_manager/data_dumper.cc @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "graph/load/new_model_manager/data_dumper.h" +#include "graph/load/model_manager/data_dumper.h" #include #include @@ -29,7 +29,7 @@ #include "framework/common/util.h" #include "graph/anchor.h" #include "graph/debug/ge_attr_define.h" -#include "graph/load/new_model_manager/model_utils.h" +#include "graph/load/model_manager/model_utils.h" #include "graph/manager/util/debug.h" #include "graph/utils/attr_utils.h" #include "graph/utils/tensor_utils.h" diff --git a/ge/graph/load/new_model_manager/data_dumper.h b/ge/graph/load/model_manager/data_dumper.h similarity index 100% rename from ge/graph/load/new_model_manager/data_dumper.h rename to ge/graph/load/model_manager/data_dumper.h diff --git a/ge/graph/load/new_model_manager/data_inputer.cc b/ge/graph/load/model_manager/data_inputer.cc similarity index 94% rename from ge/graph/load/new_model_manager/data_inputer.cc rename to ge/graph/load/model_manager/data_inputer.cc index 5efc710e..0fe75465 100755 --- a/ge/graph/load/new_model_manager/data_inputer.cc +++ b/ge/graph/load/model_manager/data_inputer.cc @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "graph/load/new_model_manager/data_inputer.h" +#include "graph/load/model_manager/data_inputer.h" #include diff --git a/ge/graph/load/new_model_manager/data_inputer.h b/ge/graph/load/model_manager/data_inputer.h similarity index 100% rename from ge/graph/load/new_model_manager/data_inputer.h rename to ge/graph/load/model_manager/data_inputer.h diff --git a/ge/graph/load/new_model_manager/davinci_model.cc b/ge/graph/load/model_manager/davinci_model.cc similarity index 97% rename from ge/graph/load/new_model_manager/davinci_model.cc rename to ge/graph/load/model_manager/davinci_model.cc index 49abe17c..2430ae3d 100755 --- a/ge/graph/load/new_model_manager/davinci_model.cc +++ b/ge/graph/load/model_manager/davinci_model.cc @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "graph/load/new_model_manager/davinci_model.h" +#include "graph/load/model_manager/davinci_model.h" #include #include @@ -36,9 +36,9 @@ #include "graph/debug/ge_attr_define.h" #include "graph/ge_context.h" #include "graph/graph.h" -#include "graph/load/new_model_manager/cpu_queue_schedule.h" -#include "graph/load/new_model_manager/model_manager.h" -#include "graph/load/new_model_manager/tbe_handle_store.h" +#include "graph/load/model_manager/cpu_queue_schedule.h" +#include "graph/load/model_manager/model_manager.h" +#include "graph/load/model_manager/tbe_handle_store.h" #include "graph/manager/graph_mem_allocator.h" #include "graph/manager/graph_var_manager.h" #include "graph/manager/trans_var_data_utils.h" @@ -520,6 +520,8 @@ Status DavinciModel::DoTaskSink() { GE_CHK_STATUS_RET(InitEntryTask(), "InitEntryTask failed."); + GE_CHK_STATUS_RET(InitL1DataDumperArgs(), "InitL1DataDumperArgs failed."); + GE_CHK_STATUS_RET(DistributeTask(), "Distribute failed."); GE_CHK_RT_RET(rtModelLoadComplete(rt_model_handle_)); @@ -716,24 +718,10 @@ Status DavinciModel::Init(void *dev_ptr, size_t mem_size, void *weight_ptr, size GE_CHK_STATUS_RET(DoTaskSink(), "Task sink failed"); GE_TIMESTAMP_END(DoTaskSink, "GraphLoader::DoTaskSink"); - auto all_dump_model = GetDumpProperties().GetAllDumpModel(); - bool findByOmName = all_dump_model.find(om_name_) != all_dump_model.end(); - bool findByModelName = all_dump_model.find(name_) != all_dump_model.end(); - bool dump_l1fusion_op = (all_dump_model.find(ge::DUMP_ALL_MODEL) != all_dump_model.end()) || - findByOmName || findByModelName; - if (dump_l1fusion_op) { - // malloc 2M for dump l1fusion op - GE_CHK_RT_RET(rtMalloc(&l1_fusion_addr_, kDumpL1FusionOpMByteSize, RT_MEMORY_DDR)); - - // send l1fusion dump addr to rts - GE_CHK_RT_RET(rtDumpAddrSet(rt_model_handle_, l1_fusion_addr_, kDumpL1FusionOpMByteSize, kDumpFlagOfL1Fusion)); - } - /// In zero copy model, if a aicpu operator is connected to the first or last layer, before model execution, /// the aicpu opertor needs to destroy history record, and update operator memory address. /// The model with specified aicpu operators is only marked here, and destruction is in ModelManager::ExecuteModel(). need_destroy_aicpu_kernel_ = IsAicpuKernelConnectSpecifiedLayer(); - (void)ge::AttrUtils::GetListStr(ge_model_, ATTR_MODEL_OUT_NODES_NAME, out_node_name_); string fp_ceiling_mode; if (ge::AttrUtils::GetStr(ge_model_, ATTR_FP_CEILING_MODE, fp_ceiling_mode)) { @@ -2079,6 +2067,8 @@ void DavinciModel::CreateOutput(uint32_t index, const OpDescPtr &op_desc, InputO Status DavinciModel::InitOutputDescInfo(const vector &output_op_list) { GELOGD("Output node size: %zu", output_op_list.size()); + vector out_node_name; + (void)ge::AttrUtils::GetListStr(ge_model_, ATTR_MODEL_OUT_NODES_NAME, out_node_name); for (const auto &op_desc : output_op_list) { uint32_t out_size = static_cast(op_desc->GetInputsSize()); for (uint32_t index = 0; index < out_size; index++) { @@ -2092,11 +2082,11 @@ Status DavinciModel::InitOutputDescInfo(const vector &output_op_list) GE_CHK_BOOL_RET_STATUS(src_name.size() > index && src_index.size() > index, INTERNAL_ERROR, "construct output_name failed."); // forward compatbility, if old om has no out_node_name, need to return output follow origin way - if (out_size == out_node_name_.size()) { + if (out_size == out_node_name.size()) { // neweast plan, the index will add to name during generate model. - bool contains_colon = out_node_name_[index].find(":") != std::string::npos; + bool contains_colon = out_node_name[index].find(":") != std::string::npos; output_name = - contains_colon ? out_node_name_[index] : out_node_name_[index] + ":" + std::to_string(src_index[index]); + contains_colon ? out_node_name[index] : out_node_name[index] + ":" + std::to_string(src_index[index]); } else { output_name = std::string("output_") + std::to_string(index) + "_" + src_name[index] + "_" + std::to_string(src_index[index]); @@ -3075,6 +3065,64 @@ Status DavinciModel::MallocKnownArgs() { return SUCCESS; } +void DavinciModel::SaveProfilingTaskDescInfo(const OpDescPtr &op, const TaskInfoPtr &task, + const domi::TaskDef &task_def, size_t task_index) { + bool flag = GetL1FusionEnableOption(); + char skt_enable_env[MMPA_MAX_PATH] = { 0x00 }; + INT32 res = mmGetEnv("SKT_ENABLE", skt_enable_env, MMPA_MAX_PATH); + int64_t env_flag = (res == EN_OK) ? std::strtol(skt_enable_env, nullptr, kDecimal) : 0; + if (env_flag != 0) { + flag = true; + } + + TaskDescInfo task_desc_info; + if (!om_name_.empty()) { + task_desc_info.model_name = om_name_; + } else { + task_desc_info.model_name = name_; + } + task_desc_info.op_name = op->GetName(); + task_desc_info.block_dim = task_def.kernel().block_dim(); + task_desc_info.task_id = task->GetTaskID(); + task_desc_info.stream_id = task->GetStreamId(); + task_desc_info.shape_type = "static"; + task_desc_info.cur_iter_num = 0; + // task type + task_desc_info.task_type = kTaskTypeInvalid; + auto model_task_type = static_cast(task_def.type()); + if (model_task_type == RT_MODEL_TASK_KERNEL) { + const domi::KernelDef &kernel_def = task_def.kernel(); + const auto &context = kernel_def.context(); + auto kernel_type = static_cast(context.kernel_type()); + if (kernel_type == ccKernelType::TE) { + task_desc_info.task_type = kTaskTypeAicore; + } else if (kernel_type == ccKernelType::AI_CPU || kernel_type == ccKernelType::CUST_AI_CPU) { + task_desc_info.task_type = kTaskTypeAicpu; + } else { + GELOGD("Other kernel type: %u", context.kernel_type()); + } + } else if (model_task_type == RT_MODEL_TASK_KERNEL_EX) { + task_desc_info.task_type = kTaskTypeAicpu; + } else { + GELOGD("Skip task type: %d", static_cast(model_task_type)); + } + profiler_report_op_info_[task_desc_info.op_name] = + std::pair(task_desc_info.task_id, task_desc_info.stream_id); + task_desc_info_.emplace_back(task_desc_info); + if (flag) { + if (task->GetSktTaskID() != 0xFFFFFFFF) { + TaskDescInfo task_desc_info; + string op_name = "super_kernel_" + to_string(task_index); + task_desc_info.op_name = op_name; + task_desc_info.task_id = task->GetSktTaskID(); + profiler_report_op_info_[task_desc_info.op_name] = + std::pair(task_desc_info.task_id, task_desc_info.stream_id); + task_desc_info_.emplace_back(task_desc_info); + } + } + return; +} + Status DavinciModel::DistributeTask() { GELOGI("do Distribute."); for (auto &task : cpu_task_list_) { @@ -3086,18 +3134,11 @@ Status DavinciModel::DistributeTask() { } task_desc_info_.clear(); - bool flag = GetL1FusionEnableOption(); - char skt_enable_env[MMPA_MAX_PATH] = { 0x00 }; - INT32 res = mmGetEnv("SKT_ENABLE", skt_enable_env, MMPA_MAX_PATH); - int64_t env_flag = (res == EN_OK) ? std::strtol(skt_enable_env, nullptr, kDecimal) : 0; - if (env_flag != 0) { - flag = true; - } - const auto &model_task_def = ge_model_->GetModelTaskDefPtr(); for (size_t task_index = 0; task_index < task_list_.size(); ++task_index) { auto &task_def = model_task_def->task(task_index); auto &task = task_list_.at(task_index); + GE_CHECK_NOTNULL(task); GE_CHK_STATUS_RET(task->Distribute(), "Task[%zu] distribute fail", task_index); // for data dump auto op_index = std::max(task_def.kernel().context().op_index(), @@ -3117,33 +3158,9 @@ Status DavinciModel::DistributeTask() { GE_IF_BOOL_EXEC(no_need_profiling, continue); SaveDumpOpInfo(runtime_param_, op, task->GetTaskID(), task->GetStreamId()); - // Load task info for profiling - TaskDescInfo task_desc_info; - if (!om_name_.empty()) { - task_desc_info.model_name = om_name_; - } else { - task_desc_info.model_name = name_; - } - task_desc_info.op_name = op->GetName(); - task_desc_info.block_dim = task_def.kernel().block_dim(); - task_desc_info.task_id = task->GetTaskID(); - task_desc_info.stream_id = task->GetStreamId(); - task_desc_info.shape_type = "static"; - task_desc_info.cur_iter_num = 0; - profiler_report_op_info_[task_desc_info.op_name] = - std::pair(task_desc_info.task_id, task_desc_info.stream_id); - task_desc_info_.emplace_back(task_desc_info); - if (flag) { - if (task->GetSktTaskID() != 0xFFFFFFFF) { - TaskDescInfo task_desc_info; - string op_name = "super_kernel_" + to_string(task_index); - task_desc_info.op_name = op_name; - task_desc_info.task_id = task->GetSktTaskID(); - profiler_report_op_info_[task_desc_info.op_name] = - std::pair(task_desc_info.task_id, task_desc_info.stream_id); - task_desc_info_.emplace_back(task_desc_info); - } - } + + // save task info for profiling + SaveProfilingTaskDescInfo(op, task, task_def, task_index); } // launch dump kernel to aicpu GE_CHK_STATUS_RET(data_dumper_.LoadDumpInfo(), "Load dump info failed."); @@ -3951,7 +3968,6 @@ void DavinciModel::SetDataDumperArgs(const ComputeGraphPtr &graph, const map &variable_by_name); + Status InitL1DataDumperArgs(); + Status InitModelProfile(); Status SinkModelProfile(); @@ -881,7 +886,6 @@ class DavinciModel { GeModelPtr ge_model_; // release after DavinciModel::Init bool need_destroy_aicpu_kernel_{false}; - vector out_node_name_; map op_list_; // release after DavinciModel::Init diff --git a/ge/graph/load/new_model_manager/davinci_model_parser.cc b/ge/graph/load/model_manager/davinci_model_parser.cc similarity index 92% rename from ge/graph/load/new_model_manager/davinci_model_parser.cc rename to ge/graph/load/model_manager/davinci_model_parser.cc index 76526de2..c6f48b84 100644 --- a/ge/graph/load/new_model_manager/davinci_model_parser.cc +++ b/ge/graph/load/model_manager/davinci_model_parser.cc @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "graph/load/new_model_manager/davinci_model_parser.h" +#include "graph/load/model_manager/davinci_model_parser.h" namespace ge { DavinciModelParser::DavinciModelParser() {} diff --git a/ge/graph/load/new_model_manager/davinci_model_parser.h b/ge/graph/load/model_manager/davinci_model_parser.h similarity index 100% rename from ge/graph/load/new_model_manager/davinci_model_parser.h rename to ge/graph/load/model_manager/davinci_model_parser.h diff --git a/ge/graph/load/new_model_manager/model_manager.cc b/ge/graph/load/model_manager/model_manager.cc similarity index 98% rename from ge/graph/load/new_model_manager/model_manager.cc rename to ge/graph/load/model_manager/model_manager.cc index 840ecae1..8be8b60f 100755 --- a/ge/graph/load/new_model_manager/model_manager.cc +++ b/ge/graph/load/model_manager/model_manager.cc @@ -14,10 +14,11 @@ * limitations under the License. */ -#include "graph/load/new_model_manager/model_manager.h" +#include "graph/load/model_manager/model_manager.h" #include +#include "mmpa/mmpa_api.h" #include "aicpu/aicpu_schedule/aicpu_op_type_list.h" #include "common/dump/dump_manager.h" #include "common/l2_cache_optimize.h" @@ -27,8 +28,8 @@ #include "framework/common/util.h" #include "graph/common/ge_call_wrapper.h" #include "graph/debug/ge_attr_define.h" -#include "graph/load/new_model_manager/davinci_model.h" -#include "graph/load/new_model_manager/davinci_model_parser.h" +#include "graph/load/model_manager/davinci_model.h" +#include "graph/load/model_manager/davinci_model_parser.h" #include "model/ge_root_model.h" #include "graph/common/local_context.h" #include "graph/utils/attr_utils.h" @@ -53,7 +54,6 @@ const char *const kBatchLoadBuf = "batchLoadsoFrombuf"; const char *const kDeleteCustOp = "deleteCustOp"; const int kTimeSpecNano = 1000000000; const int kTimeSpecMiro = 1000000; -const int kSessionMaxBias = 100; const int kOpNameMaxSize = 100; struct CustAicpuSoBuf { uint64_t kernelSoBuf; @@ -1024,6 +1024,12 @@ Status ModelManager::GetAippType(uint32_t model_id, uint32_t index, InputAippTyp } Status ModelManager::GenSessionId(uint64_t &session_id) { + const uint64_t kSessionTimeMask = 0xffffffffffff0000; + const uint64_t kSessionPidMask = 0x000000000000ff00; + const uint64_t kSessionBiasMask = 0x00000000000000ff; + + const uint64_t kMaskPerOffset = 8; + std::lock_guard lock(session_id_create_mutex_); mmTimeval tv; @@ -1031,12 +1037,14 @@ Status ModelManager::GenSessionId(uint64_t &session_id) { GELOGE(INTERNAL_ERROR, "Failed to get current time."); return INTERNAL_ERROR; } - session_id = static_cast(tv.tv_sec * kTimeSpecMiro + tv.tv_usec); // 1000000us + uint64_t timestamp = static_cast(tv.tv_sec * kTimeSpecMiro + tv.tv_usec); // 1000000us + + static uint32_t pid = mmGetPid(); session_id_bias_++; - // max bais 100. - session_id_bias_ = session_id_bias_ % kSessionMaxBias; - session_id = session_id * kSessionMaxBias + session_id_bias_; + + session_id = ((timestamp< - #include "common/debug/log.h" #include "common/op/ge_op_utils.h" -#include "graph/debug/ge_attr_define.h" -#include "graph/utils/attr_utils.h" #include "graph/utils/tensor_utils.h" -#include "runtime/base.h" -#include "runtime/kernel.h" - -#include "framework/common/debug/ge_log.h" #include "graph/manager/graph_var_manager.h" +#include "graph/types.h" #define VALIDATE_MEM_RANGE(OP, SIZE, OFFSET) \ do { \ @@ -342,13 +335,13 @@ vector ModelUtils::GetInputDataAddrs(const RuntimeParam &model_param, Co int64_t input_offset = v_input_offset[non_const_index]; non_const_index++; GE_IF_BOOL_EXEC(model_param.var_size != 0 && ge::VarManager::Instance(session_id)->IsVarAddr(input_offset), - VALIDATE_MEM_RANGE(op_desc, model_param.var_size, input_offset - model_param.logic_var_base); - uint8_t *variable_addr = model_param.var_base + input_offset - model_param.logic_var_base; + uint8_t *variable_addr = nullptr; + GE_CHK_STATUS_EXEC(GetVarAddr(model_param, op_desc, input_offset, variable_addr), return {}); v_input_data_addr.push_back(variable_addr); GELOGI("[IMAS]GetInputDataAddrs graph_%u type[V] name[%s] input[%lu] memaddr[%p]", model_param.graph_id, op_desc->GetName().c_str(), i, variable_addr); continue); - + int64_t mem_type; bool tensor_has_mem_type = ge::AttrUtils::GetInt(tensor_desc, ATTR_NAME_TENSOR_MEM_TYPE, mem_type); // feature maps @@ -380,6 +373,34 @@ vector ModelUtils::GetInputDataAddrs(const RuntimeParam &model_param, Co return v_input_data_addr; } +/// +/// @ingroup ge +/// @brief Get variable address. +/// @return Status +/// +Status ModelUtils::GetVarAddr(const RuntimeParam &model_param, const ConstOpDescPtr &op_desc, int64_t offset, + uint8_t *&var_addr) { + rtMemType_t mem_type = ge::VarManager::Instance(model_param.session_id)->GetVarMemType(offset); + switch (mem_type) { + case RT_MEMORY_RDMA_HBM: + if (offset < 0) { + GELOGE(PARAM_INVALID, "rdma var addr is invalid, addr=%p", reinterpret_cast(offset)); + return PARAM_INVALID; + } + var_addr = reinterpret_cast(offset); + break; + case RT_MEMORY_HBM: + VALIDATE_MEM_RANGE(op_desc, model_param.var_size, offset - model_param.logic_var_base); + var_addr = model_param.var_base + offset - model_param.logic_var_base; + break; + default: + GELOGE(PARAM_INVALID, "unsupported memory type %u", mem_type); + return PARAM_INVALID; + } + GE_CHECK_NOTNULL(var_addr); + return SUCCESS; +} + /// /// @ingroup ge /// @brief Get output data address. @@ -404,19 +425,26 @@ vector ModelUtils::GetOutputDataAddrs(const RuntimeParam &model_param, C return v_output_data_addr; } for (size_t i = 0; i < outputs_size; ++i) { - GE_IF_BOOL_EXEC(model_param.var_size != 0 && ge::VarManager::Instance(session_id)->IsVarAddr(v_output_offset[i]), - VALIDATE_MEM_RANGE(op_desc, model_param.var_size, v_output_offset[i] - model_param.logic_var_base); - uint8_t *variable_addr = model_param.var_base + v_output_offset[i] - model_param.logic_var_base; - v_output_data_addr.push_back(variable_addr); - GELOGI("[IMAS]GetOutputDataAddrs graph_%u type[V] name[%s] output[%zu] memaddr[%p]", - model_param.graph_id, op_desc->GetName().c_str(), i, variable_addr); - continue); const GeTensorDescPtr tensor_desc = op_desc->MutableOutputDesc(i); if (tensor_desc == nullptr) { GELOGW("Op: %s, Index: %zu, Tensor Desc is null", op_desc->GetName().c_str(), i); continue; } + int32_t calc_type = 0; + bool ret = ge::AttrUtils::GetInt(tensor_desc, ATTR_NAME_MEMORY_SIZE_CALC_TYPE, calc_type); + if (ret && (calc_type == static_cast(ge::MemorySizeCalcType::ALWAYS_EMPTY))) { + GELOGD("%s is an optional output, the address don't need to be saved.", tensor_desc->GetName().c_str()); + continue; + } + GE_IF_BOOL_EXEC(model_param.var_size != 0 && ge::VarManager::Instance(session_id)->IsVarAddr(v_output_offset[i]), + uint8_t *variable_addr = nullptr; + GE_CHK_STATUS_EXEC(GetVarAddr(model_param, op_desc, v_output_offset[i], variable_addr), return {}); + v_output_data_addr.push_back(variable_addr); + GELOGI("[IMAS]GetOutputDataAddrs graph_%u type[V] name[%s] output[%zu] memaddr[%p]", + model_param.graph_id, op_desc->GetName().c_str(), i, variable_addr); + continue); + int64_t mem_type; bool tensor_has_mem_type = ge::AttrUtils::GetInt(tensor_desc, ATTR_NAME_TENSOR_MEM_TYPE, mem_type); // feature maps diff --git a/ge/graph/load/new_model_manager/model_utils.h b/ge/graph/load/model_manager/model_utils.h similarity index 90% rename from ge/graph/load/new_model_manager/model_utils.h rename to ge/graph/load/model_manager/model_utils.h index 4b3d7ae7..26f8d700 100755 --- a/ge/graph/load/new_model_manager/model_utils.h +++ b/ge/graph/load/model_manager/model_utils.h @@ -21,7 +21,7 @@ #include "common/ge_inner_error_codes.h" #include "common/types.h" -#include "graph/load/new_model_manager/task_info/task_info.h" +#include "graph/load/model_manager/task_info/task_info.h" #include "graph/op_desc.h" #include "graph/utils/tensor_adapter.h" @@ -107,6 +107,15 @@ class ModelUtils { /// @return Status /// static Status GetRtAddress(const RuntimeParam &model_param, uintptr_t logic_addr, uint8_t *&mem_addr); + + private: + /// + /// @ingroup ge + /// @brief Get variable address. + /// @return Status + /// + static Status GetVarAddr(const RuntimeParam &model_param, const ConstOpDescPtr &op_desc, int64_t offset, + uint8_t *&var_addr); }; } // namespace ge diff --git a/ge/graph/load/new_model_manager/task_info/end_graph_task_info.cc b/ge/graph/load/model_manager/task_info/end_graph_task_info.cc similarity index 95% rename from ge/graph/load/new_model_manager/task_info/end_graph_task_info.cc rename to ge/graph/load/model_manager/task_info/end_graph_task_info.cc index b8b02f59..c306c650 100644 --- a/ge/graph/load/new_model_manager/task_info/end_graph_task_info.cc +++ b/ge/graph/load/model_manager/task_info/end_graph_task_info.cc @@ -14,11 +14,11 @@ * limitations under the License. */ -#include "graph/load/new_model_manager/task_info/end_graph_task_info.h" +#include "graph/load/model_manager/task_info/end_graph_task_info.h" #include "common/properties_manager.h" #include "framework/common/debug/ge_log.h" -#include "graph/load/new_model_manager/davinci_model.h" +#include "graph/load/model_manager/davinci_model.h" namespace { const uint32_t kDumpFlag = 2; diff --git a/ge/graph/load/new_model_manager/task_info/end_graph_task_info.h b/ge/graph/load/model_manager/task_info/end_graph_task_info.h similarity index 95% rename from ge/graph/load/new_model_manager/task_info/end_graph_task_info.h rename to ge/graph/load/model_manager/task_info/end_graph_task_info.h index 614544f9..efce19b2 100644 --- a/ge/graph/load/new_model_manager/task_info/end_graph_task_info.h +++ b/ge/graph/load/model_manager/task_info/end_graph_task_info.h @@ -17,7 +17,7 @@ #ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_END_GRAPH_TASK_INFO_H_ #define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_END_GRAPH_TASK_INFO_H_ -#include "graph/load/new_model_manager/task_info/task_info.h" +#include "graph/load/model_manager/task_info/task_info.h" namespace ge { class EndGraphTaskInfo : public TaskInfo { diff --git a/ge/graph/load/new_model_manager/task_info/event_record_task_info.cc b/ge/graph/load/model_manager/task_info/event_record_task_info.cc similarity index 93% rename from ge/graph/load/new_model_manager/task_info/event_record_task_info.cc rename to ge/graph/load/model_manager/task_info/event_record_task_info.cc index 11589258..f736c386 100755 --- a/ge/graph/load/new_model_manager/task_info/event_record_task_info.cc +++ b/ge/graph/load/model_manager/task_info/event_record_task_info.cc @@ -14,10 +14,10 @@ * limitations under the License. */ -#include "graph/load/new_model_manager/task_info/event_record_task_info.h" +#include "graph/load/model_manager/task_info/event_record_task_info.h" #include "framework/common/debug/ge_log.h" -#include "graph/load/new_model_manager/davinci_model.h" +#include "graph/load/model_manager/davinci_model.h" namespace ge { Status EventRecordTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) { diff --git a/ge/graph/load/new_model_manager/task_info/event_record_task_info.h b/ge/graph/load/model_manager/task_info/event_record_task_info.h similarity index 95% rename from ge/graph/load/new_model_manager/task_info/event_record_task_info.h rename to ge/graph/load/model_manager/task_info/event_record_task_info.h index d3f5961e..a79f1d3b 100755 --- a/ge/graph/load/new_model_manager/task_info/event_record_task_info.h +++ b/ge/graph/load/model_manager/task_info/event_record_task_info.h @@ -16,7 +16,7 @@ #ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_EVENT_RECORD_TASK_INFO_H_ #define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_EVENT_RECORD_TASK_INFO_H_ -#include "graph/load/new_model_manager/task_info/task_info.h" +#include "graph/load/model_manager/task_info/task_info.h" namespace ge { class EventRecordTaskInfo : public TaskInfo { diff --git a/ge/graph/load/new_model_manager/task_info/event_wait_task_info.cc b/ge/graph/load/model_manager/task_info/event_wait_task_info.cc similarity index 93% rename from ge/graph/load/new_model_manager/task_info/event_wait_task_info.cc rename to ge/graph/load/model_manager/task_info/event_wait_task_info.cc index 5701179b..34058502 100755 --- a/ge/graph/load/new_model_manager/task_info/event_wait_task_info.cc +++ b/ge/graph/load/model_manager/task_info/event_wait_task_info.cc @@ -14,10 +14,10 @@ * limitations under the License. */ -#include "graph/load/new_model_manager/task_info/event_wait_task_info.h" +#include "graph/load/model_manager/task_info/event_wait_task_info.h" #include "framework/common/debug/ge_log.h" -#include "graph/load/new_model_manager/davinci_model.h" +#include "graph/load/model_manager/davinci_model.h" namespace ge { Status EventWaitTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) { diff --git a/ge/graph/load/new_model_manager/task_info/event_wait_task_info.h b/ge/graph/load/model_manager/task_info/event_wait_task_info.h similarity index 95% rename from ge/graph/load/new_model_manager/task_info/event_wait_task_info.h rename to ge/graph/load/model_manager/task_info/event_wait_task_info.h index a92252d7..bd8acab1 100755 --- a/ge/graph/load/new_model_manager/task_info/event_wait_task_info.h +++ b/ge/graph/load/model_manager/task_info/event_wait_task_info.h @@ -16,7 +16,7 @@ #ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_EVENT_WAIT_TASK_INFO_H_ #define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_EVENT_WAIT_TASK_INFO_H_ -#include "graph/load/new_model_manager/task_info/task_info.h" +#include "graph/load/model_manager/task_info/task_info.h" namespace ge { class EventWaitTaskInfo : public TaskInfo { diff --git a/ge/graph/load/new_model_manager/task_info/fusion_start_task_info.cc b/ge/graph/load/model_manager/task_info/fusion_start_task_info.cc similarity index 92% rename from ge/graph/load/new_model_manager/task_info/fusion_start_task_info.cc rename to ge/graph/load/model_manager/task_info/fusion_start_task_info.cc index 32c79647..6feea9e4 100755 --- a/ge/graph/load/new_model_manager/task_info/fusion_start_task_info.cc +++ b/ge/graph/load/model_manager/task_info/fusion_start_task_info.cc @@ -14,10 +14,10 @@ * limitations under the License. */ -#include "graph/load/new_model_manager/task_info/fusion_start_task_info.h" +#include "graph/load/model_manager/task_info/fusion_start_task_info.h" #include "framework/common/debug/ge_log.h" -#include "graph/load/new_model_manager/davinci_model.h" +#include "graph/load/model_manager/davinci_model.h" namespace ge { Status FusionStartTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) { diff --git a/ge/graph/load/new_model_manager/task_info/fusion_start_task_info.h b/ge/graph/load/model_manager/task_info/fusion_start_task_info.h similarity index 94% rename from ge/graph/load/new_model_manager/task_info/fusion_start_task_info.h rename to ge/graph/load/model_manager/task_info/fusion_start_task_info.h index b1897533..284a5e0f 100755 --- a/ge/graph/load/new_model_manager/task_info/fusion_start_task_info.h +++ b/ge/graph/load/model_manager/task_info/fusion_start_task_info.h @@ -16,7 +16,7 @@ #ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_FUSION_START_TASK_INFO_H_ #define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_FUSION_START_TASK_INFO_H_ -#include "graph/load/new_model_manager/task_info/task_info.h" +#include "graph/load/model_manager/task_info/task_info.h" namespace ge { class FusionStartTaskInfo : public TaskInfo { diff --git a/ge/graph/load/new_model_manager/task_info/fusion_stop_task_info.cc b/ge/graph/load/model_manager/task_info/fusion_stop_task_info.cc similarity index 92% rename from ge/graph/load/new_model_manager/task_info/fusion_stop_task_info.cc rename to ge/graph/load/model_manager/task_info/fusion_stop_task_info.cc index dd4edfd0..22d1589c 100755 --- a/ge/graph/load/new_model_manager/task_info/fusion_stop_task_info.cc +++ b/ge/graph/load/model_manager/task_info/fusion_stop_task_info.cc @@ -14,10 +14,10 @@ * limitations under the License. */ -#include "graph/load/new_model_manager/task_info/fusion_stop_task_info.h" +#include "graph/load/model_manager/task_info/fusion_stop_task_info.h" #include "framework/common/debug/ge_log.h" -#include "graph/load/new_model_manager/davinci_model.h" +#include "graph/load/model_manager/davinci_model.h" namespace ge { Status FusionStopTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) { diff --git a/ge/graph/load/new_model_manager/task_info/fusion_stop_task_info.h b/ge/graph/load/model_manager/task_info/fusion_stop_task_info.h similarity index 94% rename from ge/graph/load/new_model_manager/task_info/fusion_stop_task_info.h rename to ge/graph/load/model_manager/task_info/fusion_stop_task_info.h index 880ca487..994498d5 100755 --- a/ge/graph/load/new_model_manager/task_info/fusion_stop_task_info.h +++ b/ge/graph/load/model_manager/task_info/fusion_stop_task_info.h @@ -16,7 +16,7 @@ #ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_FUSION_STOP_TASK_INFO_H_ #define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_FUSION_STOP_TASK_INFO_H_ -#include "graph/load/new_model_manager/task_info/task_info.h" +#include "graph/load/model_manager/task_info/task_info.h" namespace ge { class FusionStopTaskInfo : public TaskInfo { diff --git a/ge/graph/load/new_model_manager/task_info/hccl_task_info.cc b/ge/graph/load/model_manager/task_info/hccl_task_info.cc similarity index 98% rename from ge/graph/load/new_model_manager/task_info/hccl_task_info.cc rename to ge/graph/load/model_manager/task_info/hccl_task_info.cc index 7b18a9a3..2d0ad560 100644 --- a/ge/graph/load/new_model_manager/task_info/hccl_task_info.cc +++ b/ge/graph/load/model_manager/task_info/hccl_task_info.cc @@ -14,14 +14,14 @@ * limitations under the License. */ -#include "graph/load/new_model_manager/task_info/hccl_task_info.h" +#include "graph/load/model_manager/task_info/hccl_task_info.h" #include #include "common/opskernel/ops_kernel_info_store.h" #include "framework/common/debug/ge_log.h" -#include "graph/load/new_model_manager/davinci_model.h" -#include "graph/load/new_model_manager/model_utils.h" +#include "graph/load/model_manager/davinci_model.h" +#include "graph/load/model_manager/model_utils.h" namespace ge { std::mutex HcclTaskInfo::hccl_follow_stream_mutex_; diff --git a/ge/graph/load/new_model_manager/task_info/hccl_task_info.h b/ge/graph/load/model_manager/task_info/hccl_task_info.h similarity index 97% rename from ge/graph/load/new_model_manager/task_info/hccl_task_info.h rename to ge/graph/load/model_manager/task_info/hccl_task_info.h index 777f5bbf..3df155ad 100644 --- a/ge/graph/load/new_model_manager/task_info/hccl_task_info.h +++ b/ge/graph/load/model_manager/task_info/hccl_task_info.h @@ -23,7 +23,7 @@ #include #include "common/opskernel/ge_task_info.h" -#include "graph/load/new_model_manager/task_info/task_info.h" +#include "graph/load/model_manager/task_info/task_info.h" #include "graph/manager/util/hcom_util.h" namespace ge { class HcclTaskInfo : public TaskInfo { diff --git a/ge/graph/load/new_model_manager/task_info/kernel_ex_task_info.cc b/ge/graph/load/model_manager/task_info/kernel_ex_task_info.cc similarity index 98% rename from ge/graph/load/new_model_manager/task_info/kernel_ex_task_info.cc rename to ge/graph/load/model_manager/task_info/kernel_ex_task_info.cc index 98d9cb78..c34a4e9a 100644 --- a/ge/graph/load/new_model_manager/task_info/kernel_ex_task_info.cc +++ b/ge/graph/load/model_manager/task_info/kernel_ex_task_info.cc @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "graph/load/new_model_manager/task_info/kernel_ex_task_info.h" +#include "graph/load/model_manager/task_info/kernel_ex_task_info.h" #include @@ -24,8 +24,8 @@ #include "framework/common/debug/ge_log.h" #include "framework/common/fmk_error_codes.h" #include "graph/attr_value.h" -#include "graph/load/new_model_manager/davinci_model.h" -#include "graph/load/new_model_manager/model_manager.h" +#include "graph/load/model_manager/davinci_model.h" +#include "graph/load/model_manager/model_manager.h" namespace ge { Status KernelExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) { diff --git a/ge/graph/load/new_model_manager/task_info/kernel_ex_task_info.h b/ge/graph/load/model_manager/task_info/kernel_ex_task_info.h similarity index 97% rename from ge/graph/load/new_model_manager/task_info/kernel_ex_task_info.h rename to ge/graph/load/model_manager/task_info/kernel_ex_task_info.h index f6873c6c..265316ce 100644 --- a/ge/graph/load/new_model_manager/task_info/kernel_ex_task_info.h +++ b/ge/graph/load/model_manager/task_info/kernel_ex_task_info.h @@ -17,7 +17,7 @@ #ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_KERNEL_EX_TASK_INFO_H_ #define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_KERNEL_EX_TASK_INFO_H_ -#include "graph/load/new_model_manager/task_info/task_info.h" +#include "graph/load/model_manager/task_info/task_info.h" #include "graph/op_desc.h" namespace ge { diff --git a/ge/graph/load/new_model_manager/task_info/kernel_task_info.cc b/ge/graph/load/model_manager/task_info/kernel_task_info.cc similarity index 99% rename from ge/graph/load/new_model_manager/task_info/kernel_task_info.cc rename to ge/graph/load/model_manager/task_info/kernel_task_info.cc index 83bf2779..27fe8eb0 100755 --- a/ge/graph/load/new_model_manager/task_info/kernel_task_info.cc +++ b/ge/graph/load/model_manager/task_info/kernel_task_info.cc @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "graph/load/new_model_manager/task_info/kernel_task_info.h" +#include "graph/load/model_manager/task_info/kernel_task_info.h" #include #include #include @@ -25,9 +25,9 @@ #include "framework/common/debug/ge_log.h" #include "framework/common/l2_cache_optimize.h" #include "graph/debug/ge_attr_define.h" -#include "graph/load/new_model_manager/davinci_model.h" -#include "graph/load/new_model_manager/model_manager.h" -#include "graph/load/new_model_manager/model_utils.h" +#include "graph/load/model_manager/davinci_model.h" +#include "graph/load/model_manager/model_manager.h" +#include "graph/load/model_manager/model_utils.h" #include "runtime/kernel.h" #include "super_kernel/super_kernel.h" #include "super_kernel/super_kernel_factory.h" diff --git a/ge/graph/load/new_model_manager/task_info/kernel_task_info.h b/ge/graph/load/model_manager/task_info/kernel_task_info.h similarity index 98% rename from ge/graph/load/new_model_manager/task_info/kernel_task_info.h rename to ge/graph/load/model_manager/task_info/kernel_task_info.h index cea25320..7cabf259 100644 --- a/ge/graph/load/new_model_manager/task_info/kernel_task_info.h +++ b/ge/graph/load/model_manager/task_info/kernel_task_info.h @@ -22,7 +22,7 @@ #include #include -#include "graph/load/new_model_manager/task_info/task_info.h" +#include "graph/load/model_manager/task_info/task_info.h" #include "graph/op_desc.h" namespace ge { class KernelTaskInfo : public TaskInfo { diff --git a/ge/graph/load/new_model_manager/task_info/label_goto_ex_task_info.cc b/ge/graph/load/model_manager/task_info/label_goto_ex_task_info.cc similarity index 94% rename from ge/graph/load/new_model_manager/task_info/label_goto_ex_task_info.cc rename to ge/graph/load/model_manager/task_info/label_goto_ex_task_info.cc index 393c0b31..1921c85d 100755 --- a/ge/graph/load/new_model_manager/task_info/label_goto_ex_task_info.cc +++ b/ge/graph/load/model_manager/task_info/label_goto_ex_task_info.cc @@ -14,9 +14,9 @@ * limitations under the License. */ -#include "graph/load/new_model_manager/task_info/label_goto_ex_task_info.h" +#include "graph/load/model_manager/task_info/label_goto_ex_task_info.h" -#include "graph/load/new_model_manager/davinci_model.h" +#include "graph/load/model_manager/davinci_model.h" #include "graph/debug/ge_attr_define.h" namespace ge { diff --git a/ge/graph/load/new_model_manager/task_info/label_goto_ex_task_info.h b/ge/graph/load/model_manager/task_info/label_goto_ex_task_info.h similarity index 95% rename from ge/graph/load/new_model_manager/task_info/label_goto_ex_task_info.h rename to ge/graph/load/model_manager/task_info/label_goto_ex_task_info.h index f83cd1d9..25310368 100755 --- a/ge/graph/load/new_model_manager/task_info/label_goto_ex_task_info.h +++ b/ge/graph/load/model_manager/task_info/label_goto_ex_task_info.h @@ -17,7 +17,7 @@ #ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_LABEL_GOTO_EX_TASK_INFO_H_ #define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_LABEL_GOTO_EX_TASK_INFO_H_ -#include "graph/load/new_model_manager/task_info/task_info.h" +#include "graph/load/model_manager/task_info/task_info.h" namespace ge { class LabelGotoExTaskInfo : public TaskInfo { diff --git a/ge/graph/load/new_model_manager/task_info/label_set_task_info.cc b/ge/graph/load/model_manager/task_info/label_set_task_info.cc similarity index 94% rename from ge/graph/load/new_model_manager/task_info/label_set_task_info.cc rename to ge/graph/load/model_manager/task_info/label_set_task_info.cc index 5fa96a96..45cb586a 100644 --- a/ge/graph/load/new_model_manager/task_info/label_set_task_info.cc +++ b/ge/graph/load/model_manager/task_info/label_set_task_info.cc @@ -14,9 +14,9 @@ * limitations under the License. */ -#include "graph/load/new_model_manager/task_info/label_set_task_info.h" +#include "graph/load/model_manager/task_info/label_set_task_info.h" -#include "graph/load/new_model_manager/davinci_model.h" +#include "graph/load/model_manager/davinci_model.h" #include "graph/debug/ge_attr_define.h" namespace ge { diff --git a/ge/graph/load/new_model_manager/task_info/label_set_task_info.h b/ge/graph/load/model_manager/task_info/label_set_task_info.h similarity index 94% rename from ge/graph/load/new_model_manager/task_info/label_set_task_info.h rename to ge/graph/load/model_manager/task_info/label_set_task_info.h index bb02ccf0..36e41f1b 100644 --- a/ge/graph/load/new_model_manager/task_info/label_set_task_info.h +++ b/ge/graph/load/model_manager/task_info/label_set_task_info.h @@ -17,7 +17,7 @@ #ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_LABEL_SET_TASK_INFO_H_ #define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_LABEL_SET_TASK_INFO_H_ -#include "graph/load/new_model_manager/task_info/task_info.h" +#include "graph/load/model_manager/task_info/task_info.h" namespace ge { class LabelSetTaskInfo : public TaskInfo { diff --git a/ge/graph/load/new_model_manager/task_info/label_switch_by_index_task_info.cc b/ge/graph/load/model_manager/task_info/label_switch_by_index_task_info.cc similarity index 97% rename from ge/graph/load/new_model_manager/task_info/label_switch_by_index_task_info.cc rename to ge/graph/load/model_manager/task_info/label_switch_by_index_task_info.cc index ae7865a4..c2997678 100644 --- a/ge/graph/load/new_model_manager/task_info/label_switch_by_index_task_info.cc +++ b/ge/graph/load/model_manager/task_info/label_switch_by_index_task_info.cc @@ -14,10 +14,10 @@ * limitations under the License. */ -#include "graph/load/new_model_manager/task_info/label_switch_by_index_task_info.h" +#include "graph/load/model_manager/task_info/label_switch_by_index_task_info.h" #include "graph/debug/ge_attr_define.h" -#include "graph/load/new_model_manager/davinci_model.h" +#include "graph/load/model_manager/davinci_model.h" namespace ge { constexpr uint8_t kLabelSwitchIndexNum = 1; diff --git a/ge/graph/load/new_model_manager/task_info/label_switch_by_index_task_info.h b/ge/graph/load/model_manager/task_info/label_switch_by_index_task_info.h similarity index 94% rename from ge/graph/load/new_model_manager/task_info/label_switch_by_index_task_info.h rename to ge/graph/load/model_manager/task_info/label_switch_by_index_task_info.h index 538b2d68..00ca0844 100644 --- a/ge/graph/load/new_model_manager/task_info/label_switch_by_index_task_info.h +++ b/ge/graph/load/model_manager/task_info/label_switch_by_index_task_info.h @@ -17,7 +17,7 @@ #ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_LABEL_SWITCH_BY_INDEX_TASK_INFO_H_ #define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_LABEL_SWITCH_BY_INDEX_TASK_INFO_H_ -#include "graph/load/new_model_manager/task_info/task_info.h" +#include "graph/load/model_manager/task_info/task_info.h" namespace ge { class LabelSwitchByIndexTaskInfo : public TaskInfo { diff --git a/ge/graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.cc b/ge/graph/load/model_manager/task_info/memcpy_addr_async_task_info.cc similarity index 96% rename from ge/graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.cc rename to ge/graph/load/model_manager/task_info/memcpy_addr_async_task_info.cc index b95705f0..a1f58e42 100755 --- a/ge/graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.cc +++ b/ge/graph/load/model_manager/task_info/memcpy_addr_async_task_info.cc @@ -14,10 +14,10 @@ * limitations under the License. */ -#include "graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.h" +#include "graph/load/model_manager/task_info/memcpy_addr_async_task_info.h" #include "framework/common/debug/ge_log.h" -#include "graph/load/new_model_manager/davinci_model.h" +#include "graph/load/model_manager/davinci_model.h" namespace { const uint32_t kAlignBytes = 64; diff --git a/ge/graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.h b/ge/graph/load/model_manager/task_info/memcpy_addr_async_task_info.h similarity index 96% rename from ge/graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.h rename to ge/graph/load/model_manager/task_info/memcpy_addr_async_task_info.h index c7645b9f..4631c67c 100644 --- a/ge/graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.h +++ b/ge/graph/load/model_manager/task_info/memcpy_addr_async_task_info.h @@ -17,7 +17,7 @@ #ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_MEMCPY_ADDR_ASYNC_TASK_INFO_H_ #define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_MEMCPY_ADDR_ASYNC_TASK_INFO_H_ -#include "graph/load/new_model_manager/task_info/task_info.h" +#include "graph/load/model_manager/task_info/task_info.h" namespace ge { class MemcpyAddrAsyncTaskInfo : public TaskInfo { diff --git a/ge/graph/load/new_model_manager/task_info/memcpy_async_task_info.cc b/ge/graph/load/model_manager/task_info/memcpy_async_task_info.cc similarity index 97% rename from ge/graph/load/new_model_manager/task_info/memcpy_async_task_info.cc rename to ge/graph/load/model_manager/task_info/memcpy_async_task_info.cc index fa320d81..22f9267d 100755 --- a/ge/graph/load/new_model_manager/task_info/memcpy_async_task_info.cc +++ b/ge/graph/load/model_manager/task_info/memcpy_async_task_info.cc @@ -14,10 +14,10 @@ * limitations under the License. */ -#include "graph/load/new_model_manager/task_info/memcpy_async_task_info.h" +#include "graph/load/model_manager/task_info/memcpy_async_task_info.h" #include "framework/common/debug/ge_log.h" -#include "graph/load/new_model_manager/davinci_model.h" +#include "graph/load/model_manager/davinci_model.h" namespace ge { Status MemcpyAsyncTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) { diff --git a/ge/graph/load/new_model_manager/task_info/memcpy_async_task_info.h b/ge/graph/load/model_manager/task_info/memcpy_async_task_info.h similarity index 96% rename from ge/graph/load/new_model_manager/task_info/memcpy_async_task_info.h rename to ge/graph/load/model_manager/task_info/memcpy_async_task_info.h index 43b5ba13..728305ff 100755 --- a/ge/graph/load/new_model_manager/task_info/memcpy_async_task_info.h +++ b/ge/graph/load/model_manager/task_info/memcpy_async_task_info.h @@ -17,7 +17,7 @@ #ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_MEMCPY_ASYNC_TASK_INFO_H_ #define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_MEMCPY_ASYNC_TASK_INFO_H_ -#include "graph/load/new_model_manager/task_info/task_info.h" +#include "graph/load/model_manager/task_info/task_info.h" #include "graph/op_desc.h" namespace ge { diff --git a/ge/graph/load/new_model_manager/task_info/model_exit_task_info.cc b/ge/graph/load/model_manager/task_info/model_exit_task_info.cc similarity index 93% rename from ge/graph/load/new_model_manager/task_info/model_exit_task_info.cc rename to ge/graph/load/model_manager/task_info/model_exit_task_info.cc index ff8057aa..eb200e3f 100644 --- a/ge/graph/load/new_model_manager/task_info/model_exit_task_info.cc +++ b/ge/graph/load/model_manager/task_info/model_exit_task_info.cc @@ -14,11 +14,11 @@ * limitations under the License. */ -#include "graph/load/new_model_manager/task_info/model_exit_task_info.h" +#include "graph/load/model_manager/task_info/model_exit_task_info.h" #include "common/properties_manager.h" #include "framework/common/debug/ge_log.h" -#include "graph/load/new_model_manager/davinci_model.h" +#include "graph/load/model_manager/davinci_model.h" namespace ge { Status ModelExitTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) { diff --git a/ge/graph/load/new_model_manager/task_info/model_exit_task_info.h b/ge/graph/load/model_manager/task_info/model_exit_task_info.h similarity index 94% rename from ge/graph/load/new_model_manager/task_info/model_exit_task_info.h rename to ge/graph/load/model_manager/task_info/model_exit_task_info.h index c219fcc8..1e4a3923 100644 --- a/ge/graph/load/new_model_manager/task_info/model_exit_task_info.h +++ b/ge/graph/load/model_manager/task_info/model_exit_task_info.h @@ -17,7 +17,7 @@ #ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_MODEL_EXIT_TASK_INFO_H_ #define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_MODEL_EXIT_TASK_INFO_H_ -#include "graph/load/new_model_manager/task_info/task_info.h" +#include "graph/load/model_manager/task_info/task_info.h" namespace ge { class ModelExitTaskInfo : public TaskInfo { diff --git a/ge/graph/load/new_model_manager/task_info/profiler_trace_task_info.cc b/ge/graph/load/model_manager/task_info/profiler_trace_task_info.cc similarity index 93% rename from ge/graph/load/new_model_manager/task_info/profiler_trace_task_info.cc rename to ge/graph/load/model_manager/task_info/profiler_trace_task_info.cc index 533c459a..b8fd1828 100755 --- a/ge/graph/load/new_model_manager/task_info/profiler_trace_task_info.cc +++ b/ge/graph/load/model_manager/task_info/profiler_trace_task_info.cc @@ -14,10 +14,10 @@ * limitations under the License. */ -#include "graph/load/new_model_manager/task_info/profiler_trace_task_info.h" +#include "graph/load/model_manager/task_info/profiler_trace_task_info.h" #include "framework/common/debug/ge_log.h" -#include "graph/load/new_model_manager/davinci_model.h" +#include "graph/load/model_manager/davinci_model.h" namespace ge { Status ProfilerTraceTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) { diff --git a/ge/graph/load/new_model_manager/task_info/profiler_trace_task_info.h b/ge/graph/load/model_manager/task_info/profiler_trace_task_info.h similarity index 95% rename from ge/graph/load/new_model_manager/task_info/profiler_trace_task_info.h rename to ge/graph/load/model_manager/task_info/profiler_trace_task_info.h index 8989096d..b57ebfae 100755 --- a/ge/graph/load/new_model_manager/task_info/profiler_trace_task_info.h +++ b/ge/graph/load/model_manager/task_info/profiler_trace_task_info.h @@ -16,7 +16,7 @@ #ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_PROFILER_TRACE_TASK_INFO_H_ #define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_PROFILER_TRACE_TASK_INFO_H_ -#include "graph/load/new_model_manager/task_info/task_info.h" +#include "graph/load/model_manager/task_info/task_info.h" namespace ge { class ProfilerTraceTaskInfo : public TaskInfo { diff --git a/ge/graph/load/new_model_manager/task_info/stream_active_task_info.cc b/ge/graph/load/model_manager/task_info/stream_active_task_info.cc similarity index 95% rename from ge/graph/load/new_model_manager/task_info/stream_active_task_info.cc rename to ge/graph/load/model_manager/task_info/stream_active_task_info.cc index 33ebea3b..ec807777 100755 --- a/ge/graph/load/new_model_manager/task_info/stream_active_task_info.cc +++ b/ge/graph/load/model_manager/task_info/stream_active_task_info.cc @@ -14,12 +14,12 @@ * limitations under the License. */ -#include "graph/load/new_model_manager/task_info/stream_active_task_info.h" +#include "graph/load/model_manager/task_info/stream_active_task_info.h" #include #include "framework/common/debug/ge_log.h" -#include "graph/load/new_model_manager/davinci_model.h" +#include "graph/load/model_manager/davinci_model.h" #include "graph/debug/ge_attr_define.h" namespace ge { diff --git a/ge/graph/load/new_model_manager/task_info/stream_active_task_info.h b/ge/graph/load/model_manager/task_info/stream_active_task_info.h similarity index 95% rename from ge/graph/load/new_model_manager/task_info/stream_active_task_info.h rename to ge/graph/load/model_manager/task_info/stream_active_task_info.h index c6b263b4..dfbf48d1 100755 --- a/ge/graph/load/new_model_manager/task_info/stream_active_task_info.h +++ b/ge/graph/load/model_manager/task_info/stream_active_task_info.h @@ -16,7 +16,7 @@ #ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_STREAM_ACTIVE_TASK_INFO_H_ #define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_STREAM_ACTIVE_TASK_INFO_H_ -#include "graph/load/new_model_manager/task_info/task_info.h" +#include "graph/load/model_manager/task_info/task_info.h" namespace ge { class StreamActiveTaskInfo : public TaskInfo { diff --git a/ge/graph/load/new_model_manager/task_info/stream_switch_task_info.cc b/ge/graph/load/model_manager/task_info/stream_switch_task_info.cc similarity index 97% rename from ge/graph/load/new_model_manager/task_info/stream_switch_task_info.cc rename to ge/graph/load/model_manager/task_info/stream_switch_task_info.cc index 616ba85f..f129950a 100644 --- a/ge/graph/load/new_model_manager/task_info/stream_switch_task_info.cc +++ b/ge/graph/load/model_manager/task_info/stream_switch_task_info.cc @@ -14,13 +14,13 @@ * limitations under the License. */ -#include "graph/load/new_model_manager/task_info/stream_switch_task_info.h" +#include "graph/load/model_manager/task_info/stream_switch_task_info.h" #include #include "framework/common/debug/ge_log.h" -#include "graph/load/new_model_manager/davinci_model.h" -#include "graph/load/new_model_manager/model_utils.h" +#include "graph/load/model_manager/davinci_model.h" +#include "graph/load/model_manager/model_utils.h" #include "graph/debug/ge_attr_define.h" namespace ge { diff --git a/ge/graph/load/new_model_manager/task_info/stream_switch_task_info.h b/ge/graph/load/model_manager/task_info/stream_switch_task_info.h similarity index 96% rename from ge/graph/load/new_model_manager/task_info/stream_switch_task_info.h rename to ge/graph/load/model_manager/task_info/stream_switch_task_info.h index a72d7de2..0e75e183 100755 --- a/ge/graph/load/new_model_manager/task_info/stream_switch_task_info.h +++ b/ge/graph/load/model_manager/task_info/stream_switch_task_info.h @@ -16,7 +16,7 @@ #ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_STREAM_SWITCH_TASK_INFO_H_ #define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_STREAM_SWITCH_TASK_INFO_H_ -#include "graph/load/new_model_manager/task_info/task_info.h" +#include "graph/load/model_manager/task_info/task_info.h" namespace ge { class StreamSwitchTaskInfo : public TaskInfo { diff --git a/ge/graph/load/new_model_manager/task_info/stream_switchn_task_info.cc b/ge/graph/load/model_manager/task_info/stream_switchn_task_info.cc similarity index 97% rename from ge/graph/load/new_model_manager/task_info/stream_switchn_task_info.cc rename to ge/graph/load/model_manager/task_info/stream_switchn_task_info.cc index 27adbbe4..35eb23e3 100755 --- a/ge/graph/load/new_model_manager/task_info/stream_switchn_task_info.cc +++ b/ge/graph/load/model_manager/task_info/stream_switchn_task_info.cc @@ -13,12 +13,12 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#include "graph/load/new_model_manager/task_info/stream_switchn_task_info.h" +#include "graph/load/model_manager/task_info/stream_switchn_task_info.h" #include #include "framework/common/debug/ge_log.h" #include "graph/debug/ge_attr_define.h" -#include "graph/load/new_model_manager/davinci_model.h" -#include "graph/load/new_model_manager/model_utils.h" +#include "graph/load/model_manager/davinci_model.h" +#include "graph/load/model_manager/model_utils.h" namespace { const uint8_t kStreamSwitchnInputNum = 1; diff --git a/ge/graph/load/new_model_manager/task_info/stream_switchn_task_info.h b/ge/graph/load/model_manager/task_info/stream_switchn_task_info.h similarity index 96% rename from ge/graph/load/new_model_manager/task_info/stream_switchn_task_info.h rename to ge/graph/load/model_manager/task_info/stream_switchn_task_info.h index 3d65a086..6e6ca190 100755 --- a/ge/graph/load/new_model_manager/task_info/stream_switchn_task_info.h +++ b/ge/graph/load/model_manager/task_info/stream_switchn_task_info.h @@ -17,7 +17,7 @@ #ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_STREAM_SWITCHN_TASK_INFO_H_ #define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_STREAM_SWITCHN_TASK_INFO_H_ -#include "graph/load/new_model_manager/task_info/task_info.h" +#include "graph/load/model_manager/task_info/task_info.h" #include "graph/op_desc.h" namespace ge { diff --git a/ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc b/ge/graph/load/model_manager/task_info/super_kernel/super_kernel.cc similarity index 100% rename from ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc rename to ge/graph/load/model_manager/task_info/super_kernel/super_kernel.cc diff --git a/ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel.h b/ge/graph/load/model_manager/task_info/super_kernel/super_kernel.h similarity index 100% rename from ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel.h rename to ge/graph/load/model_manager/task_info/super_kernel/super_kernel.h diff --git a/ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc b/ge/graph/load/model_manager/task_info/super_kernel/super_kernel_factory.cc similarity index 100% rename from ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc rename to ge/graph/load/model_manager/task_info/super_kernel/super_kernel_factory.cc diff --git a/ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.h b/ge/graph/load/model_manager/task_info/super_kernel/super_kernel_factory.h similarity index 100% rename from ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.h rename to ge/graph/load/model_manager/task_info/super_kernel/super_kernel_factory.h diff --git a/ge/graph/load/new_model_manager/task_info/task_info.cc b/ge/graph/load/model_manager/task_info/task_info.cc similarity index 94% rename from ge/graph/load/new_model_manager/task_info/task_info.cc rename to ge/graph/load/model_manager/task_info/task_info.cc index 674d477f..e521f95c 100755 --- a/ge/graph/load/new_model_manager/task_info/task_info.cc +++ b/ge/graph/load/model_manager/task_info/task_info.cc @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "graph/load/new_model_manager/task_info/task_info.h" +#include "graph/load/model_manager/task_info/task_info.h" #include diff --git a/ge/graph/load/new_model_manager/task_info/task_info.h b/ge/graph/load/model_manager/task_info/task_info.h similarity index 96% rename from ge/graph/load/new_model_manager/task_info/task_info.h rename to ge/graph/load/model_manager/task_info/task_info.h index 26f22564..99ec3c4e 100644 --- a/ge/graph/load/new_model_manager/task_info/task_info.h +++ b/ge/graph/load/model_manager/task_info/task_info.h @@ -22,8 +22,8 @@ #include "cce/customize.h" #include "framework/common/taskdown_common.h" #include "framework/common/ge_inner_error_codes.h" -#include "graph/load/new_model_manager/ts_mem_mall.h" -#include "graph/load/new_model_manager/task_info/task_info_factory.h" +#include "graph/load/model_manager/ts_mem_mall.h" +#include "graph/load/model_manager/task_info/task_info_factory.h" #include "proto/task.pb.h" namespace ge { diff --git a/ge/graph/load/new_model_manager/task_info/task_info_factory.h b/ge/graph/load/model_manager/task_info/task_info_factory.h similarity index 100% rename from ge/graph/load/new_model_manager/task_info/task_info_factory.h rename to ge/graph/load/model_manager/task_info/task_info_factory.h diff --git a/ge/graph/load/new_model_manager/tbe_handle_store.cc b/ge/graph/load/model_manager/tbe_handle_store.cc similarity index 100% rename from ge/graph/load/new_model_manager/tbe_handle_store.cc rename to ge/graph/load/model_manager/tbe_handle_store.cc diff --git a/ge/graph/load/new_model_manager/tbe_handle_store.h b/ge/graph/load/model_manager/tbe_handle_store.h similarity index 100% rename from ge/graph/load/new_model_manager/tbe_handle_store.h rename to ge/graph/load/model_manager/tbe_handle_store.h diff --git a/ge/graph/load/new_model_manager/ts_mem_mall.h b/ge/graph/load/model_manager/ts_mem_mall.h similarity index 100% rename from ge/graph/load/new_model_manager/ts_mem_mall.h rename to ge/graph/load/model_manager/ts_mem_mall.h diff --git a/ge/graph/load/new_model_manager/zero_copy_offset.cc b/ge/graph/load/model_manager/zero_copy_offset.cc similarity index 98% rename from ge/graph/load/new_model_manager/zero_copy_offset.cc rename to ge/graph/load/model_manager/zero_copy_offset.cc index f27d862d..3f8555bb 100644 --- a/ge/graph/load/new_model_manager/zero_copy_offset.cc +++ b/ge/graph/load/model_manager/zero_copy_offset.cc @@ -14,12 +14,12 @@ * limitations under the License. */ -#include "graph/load/new_model_manager/zero_copy_offset.h" +#include "graph/load/model_manager/zero_copy_offset.h" #include "framework/common/debug/ge_log.h" #include "framework/common/util.h" -#include "graph/load/new_model_manager/model_utils.h" -#include "graph/load/new_model_manager/zero_copy_task.h" +#include "graph/load/model_manager/model_utils.h" +#include "graph/load/model_manager/zero_copy_task.h" namespace ge { namespace { diff --git a/ge/graph/load/new_model_manager/zero_copy_offset.h b/ge/graph/load/model_manager/zero_copy_offset.h similarity index 95% rename from ge/graph/load/new_model_manager/zero_copy_offset.h rename to ge/graph/load/model_manager/zero_copy_offset.h index 8ead742d..fc63fced 100644 --- a/ge/graph/load/new_model_manager/zero_copy_offset.h +++ b/ge/graph/load/model_manager/zero_copy_offset.h @@ -25,7 +25,7 @@ #include "external/ge/ge_api_error_codes.h" #include "framework/common/ge_types.h" #include "graph/debug/ge_attr_define.h" -#include "graph/load/new_model_manager/zero_copy_task.h" +#include "graph/load/model_manager/zero_copy_task.h" #include "graph/utils/attr_utils.h" #include "graph/utils/tensor_utils.h" #include "runtime/mem.h" @@ -65,7 +65,7 @@ class ZeroCopyOffset { // data_size of Data/Netoutput int64_t GetDataSize() const { return data_size_; } // value of *outside_addrs_ from davinci_model - std::vector>> &GetOutsideAddrs() { return outside_addrs_; } + const std::vector>> &GetOutsideAddrs() { return outside_addrs_; } // name of op std::string GetOpName() const { return op_name_; } diff --git a/ge/graph/load/new_model_manager/zero_copy_task.cc b/ge/graph/load/model_manager/zero_copy_task.cc similarity index 97% rename from ge/graph/load/new_model_manager/zero_copy_task.cc rename to ge/graph/load/model_manager/zero_copy_task.cc index b938f14b..367de87a 100755 --- a/ge/graph/load/new_model_manager/zero_copy_task.cc +++ b/ge/graph/load/model_manager/zero_copy_task.cc @@ -14,11 +14,11 @@ * limitations under the License. */ -#include "graph/load/new_model_manager/zero_copy_task.h" +#include "graph/load/model_manager/zero_copy_task.h" #include "framework/common/debug/ge_log.h" #include "framework/common/util.h" -#include "graph/load/new_model_manager/model_utils.h" +#include "graph/load/model_manager/model_utils.h" #include "common/ge_compiler_options.h" namespace ge { diff --git a/ge/graph/load/new_model_manager/zero_copy_task.h b/ge/graph/load/model_manager/zero_copy_task.h similarity index 100% rename from ge/graph/load/new_model_manager/zero_copy_task.h rename to ge/graph/load/model_manager/zero_copy_task.h diff --git a/ge/graph/manager/graph_manager.cc b/ge/graph/manager/graph_manager.cc index cc99d2c5..d5ee690c 100755 --- a/ge/graph/manager/graph_manager.cc +++ b/ge/graph/manager/graph_manager.cc @@ -53,6 +53,7 @@ #include "graph/passes/dimension_adjust_pass.h" #include "graph/passes/dimension_compute_pass.h" #include "graph/passes/flow_ctrl_pass.h" +#include "graph/passes/fuse_data_nodes_with_common_input_pass.h" #include "graph/passes/identity_pass.h" #include "graph/passes/input_output_connection_identify_pass.h" #include "graph/passes/iterator_op_pass.h" @@ -2104,6 +2105,24 @@ Status GraphManager::OptimizeStage1(ge::ComputeGraphPtr &compute_graph) { after_merge_passes.AddPass("OptimizeStage1_1::SwitchDataEdgesBypass", new (std::nothrow) SwitchDataEdgesBypass)); GE_CHK_STATUS_RET( after_merge_passes.AddPass("OptimizeStage1_1::ConstantFuseSamePass", new (std::nothrow) ConstantFuseSamePass)); + /* + * Do CSE before FuseDataNodesWithCommonInputPass to resolve the scene in bertlarge as following: + * const + * / | \ + * cast1 cast2 cast3 + * \ | / + * case + * the node `const` is the fused const node after ConstantFuseSamePass + * the nodes `cast1`, `cast2` and 'cast3' will be fused by CSE. + * in order to eliminate hard code in FuseDataNodesWithCommonInputPass, + * we do CSE before FuseDataNodesWithCommonInputPass + * But it is a temp solution, this CSE will be deleted after change pass from graph pass to node pass + */ + GE_CHK_STATUS_RET(after_merge_passes.AddPass("OptimizeStage1_1::CSEBeforeFuseDataNodesWithCommonInputPass", + new (std::nothrow) CommonSubexpressionEliminationPass)); + // FuseDataNodesWithCommonInputPass: fuse same data with common input in same graph + GE_CHK_STATUS_RET(after_merge_passes.AddPass("OptimizeStage1_1::FuseDataNodesWithCommonInputPass", + new (std::nothrow) FuseDataNodesWithCommonInputPass)); GE_CHK_STATUS_RET(after_merge_passes.AddPass("OptimizeStage1_1::CommonSubexpressionEliminationPass", new (std::nothrow) CommonSubexpressionEliminationPass)); GE_CHK_STATUS_RET(after_merge_passes.AddPass("OptimizeStage1_1::PermutePass", new (std::nothrow) PermutePass)) @@ -2226,12 +2245,12 @@ Status GraphManager::OptimizeStage1(ge::ComputeGraphPtr &compute_graph) { GELOGE(ret, "Run passes when OptimizeStage1_3 failed, ret:%u.", ret); return ret; } - NamesToPass identity_remove_pass; - GE_TIMESTAMP_START(identity_remove_pass); + NamesToPass node_pass; + GE_TIMESTAMP_START(node_pass); IdentityPass identity_force_pass(false); // after SwitchToStreamSwitchPass - identity_remove_pass.emplace_back("IdentityPass", &identity_force_pass); - ret = GEPass(compute_graph).Run(identity_remove_pass); - GE_TIMESTAMP_END(identity_remove_pass, "GraphPrepare::IdentityRemovePass"); + node_pass.emplace_back("IdentityPass", &identity_force_pass); + ret = GEPass(compute_graph).Run(node_pass); + GE_TIMESTAMP_END(node_pass, "GraphPrepare::node_pass"); if (ret != SUCCESS) { GELOGE(ret, "Run identity remove pass for preprocess failed, ret:%u.", ret); return ret; @@ -3102,9 +3121,8 @@ Status GraphManager::Build(const GraphNodePtr &graph_node, ComputeGraphPtr &comp graph_name.append(std::to_string(graph_node->GetGraphId())); compute_graph->SetName(graph_name); } - std::vector sub_graph_list; - auto ret = GetCompilerStages(graph_node->GetGraphId()).builder.Build(compute_graph, sub_graph_list, ge_root_model, - session_id); + + auto ret = GetCompilerStages(graph_node->GetGraphId()).builder.Build(compute_graph, ge_root_model, session_id); if (ret != SUCCESS) { GELOGE(ret, "SubGraph build Failed."); return ret; diff --git a/ge/graph/manager/graph_var_manager.cc b/ge/graph/manager/graph_var_manager.cc index 821de257..e7dce824 100755 --- a/ge/graph/manager/graph_var_manager.cc +++ b/ge/graph/manager/graph_var_manager.cc @@ -16,17 +16,10 @@ #include "graph/manager/graph_var_manager.h" -#include - -#include "common/l2_cache_optimize.h" -#include "common/types.h" -#include "framework/common/debug/ge_log.h" -#include "framework/common/debug/log.h" -#include "ge/ge_api_types.h" #include "graph/debug/ge_attr_define.h" #include "graph/manager/graph_mem_allocator.h" +#include "graph/manager/rdma_pool_allocator.h" #include "graph/manager/trans_var_data_utils.h" -#include "graph/utils/attr_utils.h" #include "graph/utils/type_utils.h" using std::map; @@ -37,7 +30,7 @@ namespace ge { VarResource::VarResource(uint64_t session_id) : session_id_(session_id) {} VarResource::~VarResource() { - var_offset_set_.clear(); + var_offset_map_.clear(); var_addr_mgr_map_.clear(); cur_var_tensor_desc_map_.clear(); var_broad_cast_info_.clear(); @@ -91,8 +84,10 @@ ge::Status VarResource::SaveVarAddr(const std::string &var_name, const ge::GeTen std::string var_key = VarKey(var_name, tensor_desc); GELOGD("VarResource::SaveVarAddr, var_key = %s", var_key.c_str()); if (var_addr_mgr_map_.count(var_key) == 0) { - uint64_t logic_address = VarManager::Instance(session_id_)->GetVarMemLogicBase() + - static_cast(reinterpret_cast(address)); + uint64_t logic_address = static_cast(reinterpret_cast(address)); + if (memory_type != RT_MEMORY_RDMA_HBM) { + logic_address += VarManager::Instance(session_id_)->GetVarMemLogicBase(); + } GELOGI("SaveVarAddr node_name %s, tensor_desc format %s, type %s.", var_name.c_str(), TypeUtils::FormatToSerialString(tensor_desc.GetFormat()).c_str(), TypeUtils::DataTypeToSerialString(tensor_desc.GetDataType()).c_str()); @@ -102,7 +97,7 @@ ge::Status VarResource::SaveVarAddr(const std::string &var_name, const ge::GeTen var_addr_mgr.tensor_desc = tensor_desc; var_addr_mgr.memory_type = memory_type; var_addr_mgr_map_[var_key] = var_addr_mgr; - var_offset_set_.insert(logic_address); + var_offset_map_[logic_address] = memory_type; return SUCCESS; } @@ -211,7 +206,14 @@ ge::Status VarResource::SyncVarData(uint32_t graph_id, const std::string &var_na return SyncVarData2BroadCast(graph_id, var_name, var_tensor_desc, base_ptr); } -bool VarResource::IsVarAddr(const int64_t &offset) { return var_offset_set_.count(offset) > 0; } +bool VarResource::IsVarAddr(const int64_t &offset) { return var_offset_map_.count(offset) > 0; } + +rtMemType_t VarResource::GetVarMemType(const int64_t &offset) { + if (var_offset_map_.count(offset) > 0) { + return var_offset_map_[offset]; + } + return RT_MEMORY_RESERVED; +} VarTransRoad *VarResource::GetTransRoad(const std::string &var_name) { auto iter = var_to_trans_road_.find(var_name); @@ -252,7 +254,19 @@ Status VarResource::SetAllocatedGraphId(const std::string &var_name, uint32_t gr MemResource::MemResource() : total_size_(0), var_mem_size_(0) {} -Status MemResource::AssignVarMem(const std::string &var_name, uint64_t size, uint64_t session_id, size_t &mem_offset) { +MemResource *MemResource::BuildMemResourceFromType(rtMemType_t mem_type) { + switch (mem_type) { + case RT_MEMORY_HBM: + return new (std::nothrow) HbmMemResource(); + case RT_MEMORY_RDMA_HBM: + return new (std::nothrow) RdmaMemResource(); + default: + return nullptr; + } +} + +Status HbmMemResource::AssignVarMem(const std::string &var_name, uint64_t size, uint64_t session_id, + size_t &mem_offset) { size = (size + kSessionMemAlignSize - 1) / kSessionMemAlignSize * kSessionMemAlignSize; uint64_t real_size = size; total_size_ = VarManager::Instance(session_id)->GetVarMemMaxSize(); @@ -282,6 +296,19 @@ Status MemResource::AssignVarMem(const std::string &var_name, uint64_t size, uin return SUCCESS; } +Status RdmaMemResource::AssignVarMem(const std::string &var_name, uint64_t size, uint64_t session_id, size_t &address) { + uint8_t *buffer = MemManager::Instance().RdmaPoolInstance(RT_MEMORY_HBM).Malloc(size); + if (buffer == nullptr) { + GELOGE(MEMALLOC_FAILED, "Failed to malloc rdma memory for node %s, size = %llu", var_name.c_str(), size); + return MEMALLOC_FAILED; + } + address = static_cast(reinterpret_cast(buffer)); + var_mem_size_ += size; + GELOGI("[IMAS]AssignVarMem Set session_%llu name[%s] output[%d] addr to [%p] size[%llu].", + session_id, var_name.c_str(), 0, buffer, size); + return SUCCESS; +} + uint64_t MemResource::GetVarMemSize() const { return var_mem_size_; } void MemResource::UpdateVarMemSize(int64_t mem_size) { var_mem_size_ = mem_size; }; @@ -428,7 +455,7 @@ Status VarManager::UpdateVarMemSize(rtMemType_t memory_type, int64_t mem_size) { MemResource *mem_resource = nullptr; auto iter = mem_resource_map_.find(memory_type); if (iter == mem_resource_map_.end()) { - mem_resource = new (std::nothrow) MemResource(); + mem_resource = MemResource::BuildMemResourceFromType(memory_type); if (mem_resource == nullptr) { GELOGE(ge::INTERNAL_ERROR, "Alloc MemResource failed, memory_type = %u.", memory_type); return ge::INTERNAL_ERROR; @@ -465,7 +492,7 @@ ge::Status VarManager::AssignVarMem(const std::string &var_name, const ge::GeTen MemResource *mem_resource = nullptr; auto it = mem_resource_map_.find(memory_type); if (it == mem_resource_map_.end()) { - mem_resource = new (std::nothrow) MemResource(); + mem_resource = MemResource::BuildMemResourceFromType(memory_type); if (mem_resource == nullptr) { GELOGE(ge::INTERNAL_ERROR, "Alloc MemResource failed, memory_type = %u.", memory_type); return ge::INTERNAL_ERROR; @@ -629,6 +656,15 @@ bool VarManager::IsVarAddr(const int64_t &offset) { return var_resource_->IsVarAddr(offset); } +rtMemType_t VarManager::GetVarMemType(const int64_t &offset) { + std::lock_guard lock(mutex_); + if (var_resource_ == nullptr) { + GELOGW("VarManager has not been init."); + return RT_MEMORY_RESERVED; + } + return var_resource_->GetVarMemType(offset); +} + ge::Status VarManager::MallocVarMemory(size_t memory_size) { std::lock_guard lock(mutex_); uint8_t *var_mem_base = nullptr; @@ -654,12 +690,18 @@ ge::Status VarManager::MallocVarMemory(size_t memory_size) { uint8_t *VarManager::GetVarMemoryBase(rtMemType_t memory_type) { std::lock_guard lock(mutex_); + if (memory_type == RT_MEMORY_RDMA_HBM) { + return MemManager::Instance().RdmaPoolInstance(RT_MEMORY_HBM).GetRdmaBaseAddr(); + } string memory_key = std::to_string(session_id_); return MemManager::Instance(memory_type)->GetMemoryAddr(memory_key); } uint8_t *VarManager::GetVarMemoryAddr(uint8_t *logic_addr, rtMemType_t memory_type) { std::lock_guard lock(mutex_); + if (memory_type == RT_MEMORY_RDMA_HBM) { + return logic_addr; + } string mem_key = std::to_string(session_id_); uint8_t *mem_base = MemManager::Instance(memory_type)->GetMemoryAddr(mem_key); if (mem_base == nullptr) { diff --git a/ge/graph/manager/graph_var_manager.h b/ge/graph/manager/graph_var_manager.h index 9cf0068c..924ddcb7 100755 --- a/ge/graph/manager/graph_var_manager.h +++ b/ge/graph/manager/graph_var_manager.h @@ -158,13 +158,15 @@ class VarResource { bool IsVarAddr(const int64_t &offset); + rtMemType_t GetVarMemType(const int64_t &offset); + std::unordered_map GetAllVarDesc() const { return cur_var_tensor_desc_map_; } private: std::string VarKey(const std::string &var_name, const ge::GeTensorDesc &tensor_desc); uint64_t session_id_; - std::unordered_set var_offset_set_; + std::unordered_map var_offset_map_; std::unordered_map var_addr_mgr_map_; std::unordered_map cur_var_tensor_desc_map_; std::unordered_map> var_to_trans_road_; @@ -176,19 +178,36 @@ class VarResource { class MemResource { public: MemResource(); - ~MemResource() = default; + virtual ~MemResource() = default; + static MemResource *BuildMemResourceFromType(rtMemType_t mem_type); - Status AssignVarMem(const std::string &var_name, uint64_t size, uint64_t session_id, size_t &mem_offset); + virtual Status AssignVarMem(const std::string &var_name, uint64_t size, uint64_t session_id, size_t &mem_offset) = 0; uint64_t GetVarMemSize() const; void UpdateVarMemSize(int64_t mem_size); - private: + protected: uint64_t total_size_; uint64_t var_mem_size_; }; +class HbmMemResource : public MemResource { + public: + HbmMemResource() = default; + ~HbmMemResource() override = default; + + Status AssignVarMem(const std::string &var_name, uint64_t size, uint64_t session_id, size_t &address) override; +}; + +class RdmaMemResource : public MemResource { + public: + RdmaMemResource() = default; + ~RdmaMemResource() override = default; + + Status AssignVarMem(const std::string &var_name, uint64_t size, uint64_t session_id, size_t &address) override; +}; + class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY VarManager { public: static VarManager *Instance(uint64_t session_id); @@ -275,6 +294,8 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY VarManager { bool IsVarAddr(const int64_t &offset); + rtMemType_t GetVarMemType(const int64_t &offset); + uint8_t *GetVarMemoryBase(rtMemType_t memory_type); uint8_t *GetVarMemoryAddr(uint8_t *logic_addr, rtMemType_t memory_type); diff --git a/ge/graph/manager/rdma_pool_allocator.h b/ge/graph/manager/rdma_pool_allocator.h index 4d8cf71e..0a895a11 100644 --- a/ge/graph/manager/rdma_pool_allocator.h +++ b/ge/graph/manager/rdma_pool_allocator.h @@ -53,6 +53,10 @@ class RdmaPoolAllocator { Status GetBaseAddr(uint64_t &base_addr, uint64_t &mem_size); + uint8_t *GetRdmaBaseAddr() { return rdma_base_addr_; } + + size_t GetRdmaMemSize() { return rdma_mem_size_; } + private: void MergeBlocks(Block *dst, Block *src); diff --git a/ge/graph/partition/dynamic_shape_partition.cc b/ge/graph/partition/dynamic_shape_partition.cc index 95f13b6f..1c82eaf3 100755 --- a/ge/graph/partition/dynamic_shape_partition.cc +++ b/ge/graph/partition/dynamic_shape_partition.cc @@ -44,18 +44,46 @@ #define REQUIRE_SUCCESS(cond, ...) REQUIRE(((cond) == SUCCESS), __VA_ARGS__) #define REQUIRE_GRAPH_SUCCESS(cond, ...) REQUIRE(((cond) == GRAPH_SUCCESS), __VA_ARGS__) -bool IsExperimental() { - const static bool kIsExperimental = (std::getenv("EXPERIMENTAL_DYNAMIC_PARTITION") != nullptr); - return kIsExperimental; -} - namespace ge { using Cluster = DynamicShapePartitioner::Cluster; using ClusterPtr = std::shared_ptr; +static bool IsInExperimentalMode(const ComputeGraphPtr &root_graph) { + for (const auto &node : root_graph->GetAllNodes()) { + GE_CHECK_NOTNULL(node->GetOpDesc()); + for (const auto &input_desc : node->GetOpDesc()->GetAllInputsDesc()) { + auto type = input_desc.GetDataType(); + if (type == DT_STRING || type == DT_RESOURCE || type == DT_STRING_REF) { + if (std::getenv("EXPERIMENTAL_DYNAMIC_PARTITION") == nullptr) { + return false; + } else { + GEEVENT("In dynamic shape scene, model contains data type:" + "DT_STRING/DT_RESOURCE/DT_STRING_REF may not be supported well " + "temporarily, please retry with \"unset EXPERIMENTAL_DYNAMIC_PARTITION\"."); + break; + } + } + } + for (const auto &output_desc : node->GetOpDesc()->GetAllOutputsDesc()) { + auto type = output_desc.GetDataType(); + if (type == DT_STRING || type == DT_RESOURCE || type == DT_STRING_REF) { + if (std::getenv("EXPERIMENTAL_DYNAMIC_PARTITION") == nullptr) { + return false; + } else { + GEEVENT("In dynamic shape scene, model contains data type:" + "DT_STRING/DT_RESOURCE/DT_STRING_REF may not be supported well " + "temporarily, please retry with \"unset EXPERIMENTAL_DYNAMIC_PARTITION\"."); + break; + } + } + } + } + return true; +} + Status DynamicShapePartitioner::Partition() { REQUIRE_NOT_NULL(root_graph_, "Graph is nullptr."); - if (!IsExperimental()) { + if (!IsInExperimentalMode(root_graph_)) { GELOGD("Skip dynamic shape partition as not in experimental mode."); REQUIRE(AttrUtils::SetBool(*root_graph_, ATTR_NAME_DYNAMIC_SHAPE_PARTITIONED, false), "Failed set dynamic shape partitioned flag on root graph."); @@ -185,6 +213,7 @@ std::string DynamicShapePartitioner::DebugString() const { size_t data = 0; size_t netoutput = 0; size_t is_inputnode = 0; + size_t stage = 0; std::stringstream ss; ss << "All unknown shape nodes:" << std::endl; for (const auto &node : unknown_shape_nodes_) { @@ -201,10 +230,13 @@ std::string DynamicShapePartitioner::DebugString() const { netoutput++; } else if (cluster->IsInputNode()) { is_inputnode++; + } else if (cluster->IsIndependent()) { + stage++; } } ss << "All clusters:" << unique_clusters_.size() << ", data:" << data << ", known:" << known - << ", unknown:" << unknown << ", netoutput:" << netoutput << ", is_inputnode:" << is_inputnode << std::endl; + << ", unknown:" << unknown << ", netoutput:" << netoutput << ", is_inputnode:" << is_inputnode + << ", stage:" << stage << std::endl; for (const auto &cluster : unique_clusters_) { ss << " " << cluster->DebugString() << std::endl; } @@ -244,12 +276,15 @@ Status DynamicShapePartitioner::InitClusters() { for (const auto &node : graph->GetDirectNode()) { Cluster::Type type = Cluster::DATA; bool is_input = ((node->GetType() == CONSTANT) || (node->GetType() == CONSTANTOP)) && node->GetInNodes().empty(); + REQUIRE_NOT_NULL(node->GetOpDesc(), "op_desc is null"); if (node->GetType() == DATA) { type = Cluster::DATA; } else if (is_input) { type = Cluster::INPUT_NODE; } else if (node->GetType() == NETOUTPUT) { type = Cluster::NETOUTPUT; + } else if ((node->GetType() == PARTITIONEDCALL) && (node->GetOpDesc()->HasAttr(ATTR_STAGE_LEVEL))) { + type = Cluster::STAGE; } else if (unknown_shape_nodes_.count(node) > 0) { type = Cluster::UNKNOWN_SHAPE; } else { @@ -332,6 +367,9 @@ static std::string ToString(const std::vector &clusters) { void DynamicShapePartitioner::MergeClustersUnknownShape() { // Merge unknown shape clusters for (const auto &cluster : ordered_cluster_) { + if (cluster->IsIndependent()) { + continue; + } for (const auto &in_cluster : cluster->Inputs()) { if (!in_cluster->IsUnknownShape()) { continue; @@ -351,6 +389,9 @@ void DynamicShapePartitioner::MergeClustersUnknownShape() { void DynamicShapePartitioner::MergeClustersKnownShape() { // Merge known shape clusters for (const auto &cluster : ordered_cluster_) { + if (cluster->IsIndependent()) { + continue; + } if (cluster->IsRefVariable() && cluster->Inputs().size() == 1) { auto in_cluster = *(cluster->Inputs().begin()); in_cluster->Merge(cluster); @@ -578,6 +619,7 @@ void Cluster::UpdateRank(size_t rank) { bool Cluster::IsData() const { return type_ == DATA; }; bool Cluster::IsKnownShape() const { return type_ == KNOWN_SHAPE; }; bool Cluster::IsUnknownShape() const { return type_ == UNKNOWN_SHAPE; }; +bool Cluster::IsIndependent() const { return type_ == STAGE; }; bool Cluster::IsNetOutput() const { return type_ == NETOUTPUT; }; bool Cluster::IsInputNode() const { return type_ == INPUT_NODE; }; bool Cluster::IsRefVariable() const { @@ -613,6 +655,9 @@ void Cluster::RemoveOutput(ClusterPtr out) { out->in_clusters_.end()); }; void Cluster::Merge(ClusterPtr other) { + if (other->IsIndependent()) { + return; + } nodes_.insert(nodes_.end(), other->nodes_.begin(), other->nodes_.end()); other->in_clusters_.erase(std::remove(other->in_clusters_.begin(), other->in_clusters_.end(), shared_from_this()), other->in_clusters_.end()); @@ -661,7 +706,9 @@ std::vector Cluster::MergeAllPathFrom(ClusterPtr other) { std::unordered_set forward_reached_clusters; std::unordered_set backward_reached_clusters; std::vector path_clusters; - + if (other->IsIndependent()) { + return path_clusters; + } if (std::find(other->out_clusters_.begin(), other->out_clusters_.end(), shared_from_this()) == other->out_clusters_.end()) { return path_clusters; @@ -744,7 +791,7 @@ Status Cluster::BuildFrame() { } } } - if (IsData()) { + if (IsData() || IsIndependent()) { for (const auto &anchor : node->GetAllOutDataAnchors()) { AddFrameOutput(anchor); } @@ -860,7 +907,7 @@ Status Cluster::CombinePartitionFrame() { } Status Cluster::BuildPartitionSubgraph() { - if (IsData() || IsNetOutput()) { + if (IsData() || IsNetOutput() || IsIndependent()) { return SUCCESS; } int64_t parent_node_index = 0; diff --git a/ge/graph/partition/dynamic_shape_partition.h b/ge/graph/partition/dynamic_shape_partition.h index 9772615e..e8408ff9 100644 --- a/ge/graph/partition/dynamic_shape_partition.h +++ b/ge/graph/partition/dynamic_shape_partition.h @@ -32,7 +32,7 @@ class DynamicShapePartitioner { // DATA:DATA, UNKNOWN_SHAPE:unknowshape, KNOWN_SHAPE:knowshape, NETOUTPUT:NETOUTPUT. class Cluster : public std::enable_shared_from_this { public: - enum Type { DATA, INPUT_NODE, NETOUTPUT, KNOWN_SHAPE, UNKNOWN_SHAPE }; + enum Type { DATA, INPUT_NODE, NETOUTPUT, STAGE, KNOWN_SHAPE, UNKNOWN_SHAPE }; Cluster(size_t rank, Type type, NodePtr node, DynamicShapePartitioner *partitioner) : id_(rank), min_(rank), max_(rank), type_(type), partitioner_(partitioner) { nodes_.push_back(node); @@ -45,6 +45,7 @@ class DynamicShapePartitioner { bool IsData() const; bool IsKnownShape() const; bool IsUnknownShape() const; + bool IsIndependent() const; bool IsNetOutput() const; std::vector> Inputs() const; std::vector> Outputs() const; diff --git a/ge/graph/partition/stage_partition.cc b/ge/graph/partition/stage_partition.cc index 93a06afe..f6e49bbd 100644 --- a/ge/graph/partition/stage_partition.cc +++ b/ge/graph/partition/stage_partition.cc @@ -25,6 +25,10 @@ #include "common/types.h" namespace ge { +namespace { +const std::set kSrcNodeTypes = { DATA, AIPPDATA, ANN_DATA }; +} + Status StagePartitioner::Partition() { GE_CHECK_NOTNULL(root_graph_); if (root_graph_->GetParentGraph() != nullptr) { @@ -37,6 +41,10 @@ Status StagePartitioner::Partition() { if (!AttrUtils::GetInt(op_desc, ATTR_STAGE_LEVEL, level)) { continue; } + if ((kSrcNodeTypes.count(op_desc->GetType()) != 0) && node->GetInAllNodes().empty()) { + continue; + } + GELOGD("original node %s for stage %u", node->GetName().c_str(), level); stage_nodes_[level].insert(node); } if (stage_nodes_.empty()) { @@ -54,6 +62,13 @@ Status StagePartitioner::Partition() { return FAILED; } + root_graph_->TopologicalSorting([](const NodePtr &a, const NodePtr &b) -> bool { + uint32_t a_level = UINT32_MAX; + (void)AttrUtils::GetInt(a->GetOpDesc(), ATTR_STAGE_LEVEL, a_level); + uint32_t b_level = UINT32_MAX; + (void)AttrUtils::GetInt(b->GetOpDesc(), ATTR_STAGE_LEVEL, b_level); + return a_level < b_level; + }); if (root_graph_->TopologicalSorting() != GRAPH_SUCCESS) { GELOGE(FAILED, "Topological sort for graph %s after stage partition failed, " "maybe stage_level was not set correctly.", root_graph_->GetName().c_str()); @@ -76,20 +91,26 @@ Status StagePartitioner::SplitStageLevel() { auto node = nodes.top(); nodes.pop(); GE_CHECK_NOTNULL(node->GetOpDesc()); - if (node->GetOpDesc()->HasAttr(ATTR_STAGE_LEVEL) && (cur_stage_nodes.count(node) == 0)) { + uint32_t tmp_level = cur_stage_level; + (void)AttrUtils::GetInt(node->GetOpDesc(), ATTR_STAGE_LEVEL, tmp_level); + if (tmp_level != cur_stage_level) { continue; } for (const auto &in_node : node->GetInAllNodes()) { if (visited_stage_nodes.count(in_node) != 0) { continue; } + if (!AttrUtils::SetInt(in_node->GetOpDesc(), ATTR_STAGE_LEVEL, cur_stage_level)) { + GELOGE(INTERNAL_ERROR, "Set attr ATTR_STAGE_LEVEL on node %s failed.", in_node->GetName().c_str()); + return INTERNAL_ERROR; + } + GELOGD("Mark stage_level node %s, stage_level=%u", in_node->GetName().c_str(), cur_stage_level); + if ((kSrcNodeTypes.count(in_node->GetType()) != 0) && in_node->GetInAllNodes().empty()) { + GELOGD("skip data node %s for stage %u", in_node->GetName().c_str(), cur_stage_level); + continue; + } nodes.push(in_node); } - if (!AttrUtils::SetInt(node->GetOpDesc(), ATTR_STAGE_LEVEL, cur_stage_level)) { - GELOGE(INTERNAL_ERROR, "Set attr ATTR_STAGE_LEVEL on node %s failed.", node->GetName().c_str()); - return INTERNAL_ERROR; - } - GELOGD("Mark stage_level node %s, stage_level=%u", node->GetName().c_str(), cur_stage_level); visited_stage_nodes.emplace(node); } for (const auto &node : visited_stage_nodes) { @@ -219,6 +240,11 @@ NodePtr StagePartitioner::BuildSubgraphNode(const std::string &graph_name, const op_desc->AddSubgraphName("f"); op_desc->SetSubgraphInstanceName(0, graph_name); + if (!AttrUtils::SetInt(op_desc, ATTR_STAGE_LEVEL, stage_info.stage_level)) { + GELOGE(INTERNAL_ERROR, "Set attr ATTR_STAGE_LEVEL on node %s failed", op_desc->GetName().c_str()); + return nullptr; + } + NodePtr subgraph_node = root_graph_->AddNode(op_desc); if (subgraph_node == nullptr) { GELOGE(FAILED, "Add node %s failed.", graph_name.c_str()); diff --git a/ge/graph/passes/fuse_data_nodes_with_common_input_pass.cc b/ge/graph/passes/fuse_data_nodes_with_common_input_pass.cc new file mode 100644 index 00000000..ab8fc39b --- /dev/null +++ b/ge/graph/passes/fuse_data_nodes_with_common_input_pass.cc @@ -0,0 +1,119 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "graph/passes/fuse_data_nodes_with_common_input_pass.h" + +#include +#include +#include +#include +#include +#include "common/ge_inner_error_codes.h" +#include "graph/utils/op_desc_utils.h" +#include "graph/utils/type_utils.h" +#include "graph/utils/node_utils.h" + +using std::map; +using std::vector; +using std::set; +using std::string; + +namespace ge { +Status FuseDataNodesWithCommonInputPass::Run(ge::ComputeGraphPtr graph) { + if (graph == nullptr) { + GELOGE(GE_GRAPH_PARAM_NULLPTR, "Compute graph is null."); + return GE_GRAPH_PARAM_NULLPTR; + } + GELOGD("FuseDataNodesWithCommonInputPass in."); + // key: subgraph, value:--key: peer out anchor to parent node, --value: parent indexes to parent node + map>> subgraphs_to_need_fuse_nodes_info; + if (InitNeedFuseNodesInfo(graph, subgraphs_to_need_fuse_nodes_info) != SUCCESS) { + GELOGE(FAILED, "InitNeedFuseNodesInfo failed."); + return FAILED; + } + return FuseDataNodes(subgraphs_to_need_fuse_nodes_info); +} + +Status FuseDataNodesWithCommonInputPass::InitNeedFuseNodesInfo(ComputeGraphPtr &graph, + map>> &subgraphs_to_need_fuse_nodes_info) { + for (const auto &subgraph : graph->GetAllSubgraphs()) { + GE_CHECK_NOTNULL(subgraph); + auto parent_node = subgraph->GetParentNode(); + GE_CHECK_NOTNULL(parent_node); + if (parent_node->GetType() == CASE || parent_node->GetType() == IF) { + auto &peer_out_anchors_to_parent_indexes = subgraphs_to_need_fuse_nodes_info[subgraph]; + for (const auto &in_data_anchor : parent_node->GetAllInDataAnchors()) { + GE_CHECK_NOTNULL(in_data_anchor); + OutDataAnchorPtr peer_out_anchor = in_data_anchor->GetPeerOutAnchor(); + uint32_t parent_index = static_cast(in_data_anchor->GetIdx()); + GE_CHECK_NOTNULL(peer_out_anchor); + peer_out_anchors_to_parent_indexes[peer_out_anchor].insert(parent_index); + GELOGD("Peer node %s is the %d input of parent node %s in %s.", + peer_out_anchor->GetOwnerNode()->GetName().c_str(), parent_index, parent_node->GetName().c_str(), + subgraph->GetName().c_str()); + } + } + } + return SUCCESS; +} + +Status FuseDataNodesWithCommonInputPass::FuseDataNodes( + const map>> &subgraphs_to_need_fuse_nodes_info) { + for (const auto &subgraph_to_need_fuse_nodes_info : subgraphs_to_need_fuse_nodes_info) { + auto subgraph = subgraph_to_need_fuse_nodes_info.first; + for (const auto &peer_out_anchors_to_parent_indexes : subgraph_to_need_fuse_nodes_info.second) { + if (peer_out_anchors_to_parent_indexes.second.size() <= 1) { + continue; + } + // key: out anchor, value: data nodes with common input will be fused + map> peer_out_anchors_to_need_fuse_nodes; + for (const auto &node : subgraph->GetDirectNode()) { + if (node->GetType() != DATA) { + continue; + } + GE_CHECK_NOTNULL(node->GetOpDesc()); + uint32_t parent_index = 0; + if (AttrUtils::GetInt(node->GetOpDesc(), ATTR_NAME_PARENT_NODE_INDEX, parent_index)) { + if (peer_out_anchors_to_parent_indexes.second.count(parent_index) > 0) { + peer_out_anchors_to_need_fuse_nodes[peer_out_anchors_to_parent_indexes.first].emplace_back(node); + } + } + } + for (const auto &peer_out_anchor_to_need_fuse_nodes : peer_out_anchors_to_need_fuse_nodes) { + auto need_fuse_data_nodes = peer_out_anchor_to_need_fuse_nodes.second; + auto first_node = need_fuse_data_nodes.at(0); + for (size_t i = 1; i < need_fuse_data_nodes.size(); ++i) { + auto node = need_fuse_data_nodes.at(i); + GELOGI("Replace redundant data node %s by %s exist in graph: %s.", node->GetName().c_str(), + first_node->GetName().c_str(), subgraph->GetName().c_str()); + // the data node which can be fused has none input(both data and control in) + if (GraphUtils::MoveOutCtrlEdges(node, first_node) != SUCCESS) { + return FAILED; + } + if (GraphUtils::ReplaceNodeDataAnchors(first_node, node, {}, {0}) != SUCCESS) { + return FAILED; + } + if (GraphUtils::RemoveNodeWithoutRelink(subgraph, node) != SUCCESS) { + GELOGE(FAILED, "[%s] RemoveNodeWithoutRelink failed.", node->GetName().c_str()); + return FAILED; + } + } + } + } + } + return SUCCESS; +} +} // namespace ge diff --git a/ge/graph/passes/fuse_data_nodes_with_common_input_pass.h b/ge/graph/passes/fuse_data_nodes_with_common_input_pass.h new file mode 100755 index 00000000..9ff6ab89 --- /dev/null +++ b/ge/graph/passes/fuse_data_nodes_with_common_input_pass.h @@ -0,0 +1,38 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef GE_GRAPH_PASSES_FUSE_DATA_NODES_WITH_COMMON_INPUT_PASS_H_ +#define GE_GRAPH_PASSES_FUSE_DATA_NODES_WITH_COMMON_INPUT_PASS_H_ + +#include +#include +#include +#include "graph/types.h" +#include "inc/graph_pass.h" + +namespace ge { +class FuseDataNodesWithCommonInputPass : public GraphPass { + public: + Status Run(ge::ComputeGraphPtr graph) override; + + private: + Status InitNeedFuseNodesInfo(ComputeGraphPtr &graph, + map>> &subgraphs_to_need_fuse_nodes_info); + Status FuseDataNodes( + const map>> &subgraphs_to_need_fuse_nodes_info); +}; +} // namespace ge +#endif // GE_GRAPH_PASSES_FUSE_DATA_NODES_WITH_COMMON_INPUT_PASS_H_ diff --git a/ge/graph/passes/subgraph_const_migration_pass.cc b/ge/graph/passes/subgraph_const_migration_pass.cc index f131942c..d2effd44 100644 --- a/ge/graph/passes/subgraph_const_migration_pass.cc +++ b/ge/graph/passes/subgraph_const_migration_pass.cc @@ -145,6 +145,7 @@ Status SubgraphConstMigrationPass::ClassifyGraphNodes(const ComputeGraphPtr &gra return GE_GRAPH_EMPTY_SUBGRAPH; } + set ctrl_only_const_nodes; auto &data_nodes = all_data_nodes[subgraph]; auto &const_nodes = all_const_nodes[subgraph]; for (auto &node : subgraph->GetDirectNode()) { @@ -178,15 +179,30 @@ Status SubgraphConstMigrationPass::ClassifyGraphNodes(const ComputeGraphPtr &gra peer_name_list.insert(fixed_name + ":" + std::to_string(in_anchor->GetIdx())); } + if (peer_name_list.empty()) { + GELOGI("%s, Const: %s, no data output", subgraph->GetName().c_str(), node->GetName().c_str()); + const auto in_all_nodes = node->GetInAllNodes(); + if (in_all_nodes.empty() || std::all_of(in_all_nodes.begin(), in_all_nodes.end(), + [](const NodePtr &n) { return n->GetType() == DATA; })) { + ctrl_only_const_nodes.insert(node); + } + continue; + } + string key_of_const; for (const string &name : peer_name_list) { key_of_const += (key_of_const.empty() ? name : "_" + name); } const_nodes[key_of_const] = node; - GELOGD("%s, Key: %s, Const: %s", subgraph->GetName().c_str(), key_of_const.c_str(), node->GetName().c_str()); + GELOGD("%s, Const: %s, Key: %s", subgraph->GetName().c_str(), node->GetName().c_str(), key_of_const.c_str()); } } + + for (auto &node : ctrl_only_const_nodes) { + GE_CHK_GRAPH_STATUS_RET(GraphUtils::RemoveNodeWithoutRelink(subgraph, node), + "Remove node without relink failed, node: %s", node->GetName().c_str()); + } } return SUCCESS; @@ -352,7 +368,8 @@ Status SubgraphConstMigrationPass::DetachParallelNode(const ComputeGraphPtr &gra const auto owner_node = out_anchor->GetOwnerNode(); GELOGI("Remove Edge: %s %s", owner_node->GetName().c_str(), const_node->GetName().c_str()); if (owner_node->GetInAllNodes().empty() && owner_node->GetOutAllNodes().empty() && owner_node != data_node) { - graph->RemoveNode(owner_node); + GE_CHK_GRAPH_STATUS_RET(GraphUtils::RemoveNodeWithoutRelink(graph, owner_node), + "Remove node without relink failed, node: %s", owner_node->GetName().c_str()); } } @@ -414,7 +431,8 @@ Status SubgraphConstMigrationPass::AttachParallelNode(const ComputeGraphPtr &gra const auto owner_node = out_anchor->GetOwnerNode(); GELOGI("Remove Edge: %s %s", owner_node->GetName().c_str(), func_node->GetName().c_str()); if (owner_node->GetInAllNodes().empty() && owner_node->GetOutAllNodes().empty()) { - graph->RemoveNode(owner_node); + GE_CHK_GRAPH_STATUS_RET(GraphUtils::RemoveNodeWithoutRelink(graph, owner_node), + "Remove node without relink failed, node: %s", owner_node->GetName().c_str()); } } GE_CHK_GRAPH_STATUS_RET(GraphUtils::AddEdge(const_node->GetOutDataAnchor(kZeroIndex), in_anchor), "Add edge failed"); @@ -442,7 +460,8 @@ Status SubgraphConstMigrationPass::MoveNodeToParent(const ComputeGraphPtr &graph const map> &all_data_nodes, const string &node_key, uint32_t parent_index) { if (node_key.empty() || parent_index == kInvalidParent) { - GELOGE(FAILED, "Graph: %s, inputs is empty", graph->GetName().c_str()); + GELOGE(FAILED, "Graph: %s, node key: %s, parent index: %u invalid", + graph->GetName().c_str(), node_key.c_str(), parent_index); return FAILED; } @@ -472,7 +491,8 @@ Status SubgraphConstMigrationPass::MoveNodeToParent(const ComputeGraphPtr &graph return FAILED; } - GE_CHK_GRAPH_STATUS_RET(subgraph->RemoveNode(move_node), "Remove node failed"); + GE_CHK_GRAPH_STATUS_RET(GraphUtils::RemoveNodeWithoutRelink(subgraph, move_node), + "Remove node without relink failed, node: %s", move_node->GetName().c_str()); GELOGI("Remove Node: %s %s", subgraph->GetName().c_str(), move_node->GetName().c_str()); } diff --git a/ge/graph/passes/subgraph_pass.cc b/ge/graph/passes/subgraph_pass.cc index d1111d52..dc6269ac 100755 --- a/ge/graph/passes/subgraph_pass.cc +++ b/ge/graph/passes/subgraph_pass.cc @@ -142,17 +142,18 @@ Status SubgraphPass::SubgraphOutputNode(const ComputeGraphPtr &graph, const Node GE_CHECK_NOTNULL(in_node); // Need insert memcpy - // 1. Const->NetOutput in subgraph + // 1. Const->NetOutput in subgraph & parent graph is known // 2. AtomicOp->NetOutput in subgraph // 3. OutputContinuesRequiredOp->NetOutput in subgraph // 4. Data->NetOutput in subgraph but parent_node is not while // 5. While->NetOutput in known subgraph std::string op_type; - bool insert_flag = NodeUtils::GetConstOpType(in_node, op_type) || + bool insert_flag = + (NodeUtils::GetConstOpType(in_node, op_type) && !graph->GetParentGraph()->GetGraphUnknownFlag()) || IsAtomicRequired(in_node, peer_out_anchor->GetIdx()) || IsOutputContinuesRequired(in_node) || ((in_node->GetType() == DATA) && (kWhileOpTypes.count(graph->GetParentNode()->GetType()) == 0)) || (!graph->GetGraphUnknownFlag() && NodeUtils::IsDynamicShape(node) && - (kWhileOpTypes.count(in_node->GetType()) != 0)); + (kWhileOpTypes.count(in_node->GetType()) != 0)); if (insert_flag) { GELOGD("Insert MemcpyAsync node between %s and %s.", in_node->GetName().c_str(), node->GetName().c_str()); std::string name = node->GetName() + "_input_" + std::to_string(in_data_anchor->GetIdx()) + "_Memcpy"; diff --git a/ge/host_cpu_engine/ops_kernel_store/op/host_op.cc b/ge/host_cpu_engine/ops_kernel_store/op/host_op.cc index a6e00f4a..7f709f03 100644 --- a/ge/host_cpu_engine/ops_kernel_store/op/host_op.cc +++ b/ge/host_cpu_engine/ops_kernel_store/op/host_op.cc @@ -32,5 +32,8 @@ REGISTER_OP_CREATOR(Assign, HostOp); REGISTER_OP_CREATOR(RandomUniform, HostOp); REGISTER_OP_CREATOR(Add, HostOp); REGISTER_OP_CREATOR(Mul, HostOp); +REGISTER_OP_CREATOR(ConcatV2, HostOp); +REGISTER_OP_CREATOR(Data, HostOp); +REGISTER_OP_CREATOR(Fill, HostOp); } // namespace host_cpu } // namespace ge diff --git a/ge/hybrid/executor/hybrid_model_async_executor.cc b/ge/hybrid/executor/hybrid_model_async_executor.cc index e9881224..b7c6c33d 100644 --- a/ge/hybrid/executor/hybrid_model_async_executor.cc +++ b/ge/hybrid/executor/hybrid_model_async_executor.cc @@ -15,7 +15,7 @@ */ #include "hybrid/executor/hybrid_model_async_executor.h" -#include "graph/load/new_model_manager/model_utils.h" +#include "graph/load/model_manager/model_utils.h" #include "graph/utils/tensor_utils.h" #include "graph/utils/type_utils.h" #include "graph/ge_context.h" @@ -59,6 +59,7 @@ Status HybridModelAsyncExecutor::Start(const std::shared_ptr &lis run_flag_ = true; listener_ = listener; future_ = std::async(std::launch::async, [&]() -> Status { + GetThreadLocalContext() = *executor_->GetContext()->ge_context; GetContext().SetSessionId(executor_->GetContext()->session_id); return RunInternal(); }); @@ -229,7 +230,11 @@ Status HybridModelAsyncExecutor::PrepareInputs(const InputData ¤t_data, Hy } GE_CHECK_GE(tensor_size, 0); - auto tensor_buffer = TensorBuffer::Create(allocator, tensor_size); + AllocationAttr attr; + if (GetContext().GetHostExecFlag()) { + attr.SetMemType(HOST_DDR); + } + auto tensor_buffer = TensorBuffer::Create(allocator, tensor_size, &attr); GE_CHECK_NOTNULL(tensor_buffer); args.inputs.emplace_back(std::shared_ptr(tensor_buffer.release())); diff --git a/ge/hybrid/executor/hybrid_model_async_executor.h b/ge/hybrid/executor/hybrid_model_async_executor.h index 21d2d033..a69cc45f 100644 --- a/ge/hybrid/executor/hybrid_model_async_executor.h +++ b/ge/hybrid/executor/hybrid_model_async_executor.h @@ -21,7 +21,7 @@ #include #include "external/ge/ge_api_error_codes.h" #include "external/ge/ge_api_types.h" -#include "graph/load/new_model_manager/data_inputer.h" +#include "graph/load/model_manager/data_inputer.h" #include "hybrid/executor/hybrid_model_executor.h" #include "runtime/stream.h" diff --git a/ge/hybrid/executor/hybrid_model_executor.h b/ge/hybrid/executor/hybrid_model_executor.h index 6299d4ff..6b2e52b4 100644 --- a/ge/hybrid/executor/hybrid_model_executor.h +++ b/ge/hybrid/executor/hybrid_model_executor.h @@ -17,7 +17,7 @@ #ifndef GE_HYBRID_EXECUTOR_HYBRID_MODEL_EXECUTOR_H_ #define GE_HYBRID_EXECUTOR_HYBRID_MODEL_EXECUTOR_H_ #include "common/thread_pool.h" -#include "graph/load/new_model_manager/data_inputer.h" +#include "graph/load/model_manager/data_inputer.h" #include "hybrid/executor/hybrid_execution_context.h" #include "hybrid/executor/rt_callback_manager.h" #include "hybrid/executor/subgraph_executor.h" diff --git a/ge/hybrid/executor/node_state.cc b/ge/hybrid/executor/node_state.cc index 171ddaf3..00921705 100644 --- a/ge/hybrid/executor/node_state.cc +++ b/ge/hybrid/executor/node_state.cc @@ -188,6 +188,14 @@ Status NodeState::WaitForPrepareDone() { return SUCCESS; } +void NodeState::SetTaskContext(std::shared_ptr &task_context) { + task_context_ = task_context; +} + +std::shared_ptr NodeState::GetTaskContext() { + return task_context_; +} + Status ShapeFuture::Get(GeShape &ori_shape, GeShape &shape) { GELOGD("Start to wait node: %s for getting shape", src_node_->GetName().c_str()); HYBRID_CHK_STATUS_RET(subgraph_context_->Await(src_node_), "cancelled"); diff --git a/ge/hybrid/executor/node_state.h b/ge/hybrid/executor/node_state.h index 02a362b4..c68a19ac 100644 --- a/ge/hybrid/executor/node_state.h +++ b/ge/hybrid/executor/node_state.h @@ -29,6 +29,7 @@ namespace hybrid { class NodeTask; struct GraphExecutionContext; class SubgraphContext; +class TaskContext; class ShapeFuture { public: @@ -103,6 +104,9 @@ struct NodeState { Status AwaitInputTensors(GraphExecutionContext &context) const; + void SetTaskContext(std::shared_ptr &task_context); + std::shared_ptr GetTaskContext(); + private: const NodeItem *node_item_ = nullptr; std::shared_ptr kernel_task_ = nullptr; @@ -110,6 +114,7 @@ struct NodeState { OpDescPtr op_desc_; ShapeInferenceState shape_inference_state_; SubgraphContext *subgraph_context_; + std::shared_ptr task_context_ = nullptr; std::mutex mu_; }; diff --git a/ge/hybrid/executor/subgraph_executor.cc b/ge/hybrid/executor/subgraph_executor.cc index f7b063c7..f8f122b1 100644 --- a/ge/hybrid/executor/subgraph_executor.cc +++ b/ge/hybrid/executor/subgraph_executor.cc @@ -231,6 +231,15 @@ Status SubgraphExecutor::PrepareNodes() { } else { node_state->SetKernelTask(node_item.kernel_task); } + auto unique_task_context = TaskContext::Create(*node_state->GetNodeItem(), context_, subgraph_context_.get()); + GE_CHECK_NOTNULL(unique_task_context); + const auto &task = node_state->GetKernelTask(); + if (task == nullptr) { + GELOGE(INTERNAL_ERROR, "[%s] NodeTask is null.", node_state->GetName().c_str()); + return INTERNAL_ERROR; + } + auto shared_task_context = std::shared_ptr(unique_task_context.release()); + node_state->SetTaskContext(shared_task_context); } } @@ -267,6 +276,19 @@ Status SubgraphExecutor::PrepareForExecution(GraphExecutionContext *ctx, NodeSta } else { node_state.SetKernelTask(node_item.kernel_task); } + auto unique_task_context = TaskContext::Create(*node_state.GetNodeItem(), context_, subgraph_context_.get()); + GE_CHECK_NOTNULL(unique_task_context); + const auto &task = node_state.GetKernelTask(); + if (task == nullptr) { + GELOGE(INTERNAL_ERROR, "[%s] NodeTask is null.", node_state.GetName().c_str()); + return INTERNAL_ERROR; + } + auto shared_task_context = std::shared_ptr(unique_task_context.release()); + node_state.SetTaskContext(shared_task_context); + GE_CHK_RT_RET(rtCtxSetCurrent(ctx->rt_context)); + RECORD_COMPILE_EVENT(ctx, node_item.NodeName().c_str(), "[UpdateTilingData] start"); + GE_CHK_STATUS_RET_NOLOG(task->UpdateTilingData(*shared_task_context)); // update op_desc before alloc ws + RECORD_COMPILE_EVENT(ctx, node_item.NodeName().c_str(), "[UpdateTilingData] end"); return SUCCESS; } @@ -295,10 +317,9 @@ Status SubgraphExecutor::LaunchTasks() { GE_CHK_STATUS_RET_NOLOG(node_state->WaitForPrepareDone()); GELOGD("[%s] Start to execute.", node_state->GetName().c_str()); - auto task_context = TaskContext::Create(*node_state->GetNodeItem(), context_, subgraph_context_.get()); - GE_CHECK_NOTNULL(task_context); - task_context->SetForceInferShape(force_infer_shape_); - auto shared_task_context = std::shared_ptr(task_context.release()); + auto shared_task_context = node_state->GetTaskContext(); + GE_CHECK_NOTNULL(shared_task_context); + shared_task_context->SetForceInferShape(force_infer_shape_); HYBRID_CHK_STATUS_RET(ExecutionEngine::ExecuteAsync(*node_state, shared_task_context, *context_), "[%s] Execute node failed.", node_state->GetName().c_str()); diff --git a/ge/hybrid/executor/subgraph_executor.h b/ge/hybrid/executor/subgraph_executor.h index d1949947..4523e2c4 100644 --- a/ge/hybrid/executor/subgraph_executor.h +++ b/ge/hybrid/executor/subgraph_executor.h @@ -75,7 +75,7 @@ class SubgraphExecutor { Status GetOutputs(std::vector &outputs, std::vector &output_desc); private: - static Status PrepareForExecution(GraphExecutionContext *ctx, NodeState &node_state); + Status PrepareForExecution(GraphExecutionContext *ctx, NodeState &node_state); static Status InferShape(ShapeInferenceEngine *shape_inference_engine, NodeState &node_state); Status Init(const std::vector &inputs, const std::vector &input_desc); diff --git a/ge/hybrid/executor/worker/execution_engine.cc b/ge/hybrid/executor/worker/execution_engine.cc index b5de2a70..5e9d3607 100755 --- a/ge/hybrid/executor/worker/execution_engine.cc +++ b/ge/hybrid/executor/worker/execution_engine.cc @@ -159,27 +159,9 @@ Status NodeDoneCallback::GetTaskDescInfo(const NodePtr node, const HybridModel * } GELOGD("GetTaskDescInfo of node [%s] start.", node->GetName().c_str()); - auto op_desc = node->GetOpDesc(); - std::string op_name = op_desc->GetName(); - std::string dynamic_model_name = model->GetModelName(); - uint32_t task_id = context_->GetTaskId(); - uint32_t stream_id = context_->GetStreamId(); - TaskDescInfo tmp_task_desc_info; - tmp_task_desc_info.model_name = dynamic_model_name; - tmp_task_desc_info.op_name = op_name; - tmp_task_desc_info.block_dim = 0; - auto task_defs = model->GetTaskDefs(node); - if (task_defs != nullptr && (*task_defs).size() > 0) { - const auto &task_def = (*task_defs)[0]; - tmp_task_desc_info.block_dim = task_def.kernel().block_dim(); - } - tmp_task_desc_info.task_id = task_id; - tmp_task_desc_info.stream_id = stream_id; - tmp_task_desc_info.shape_type = "dynamic"; - tmp_task_desc_info.cur_iter_num = graph_context_->iteration; - GELOGD("GetTaskDescInfo of node [%s] end, task_id[%u], stream_id[%u]", - node->GetName().c_str(), task_id, stream_id); - task_desc_info.emplace_back(tmp_task_desc_info); + task_desc_info = context_->GetProfilingTaskDescInfo(); + context_->ClearProfilingTaskDescInfo(); + return SUCCESS; } @@ -247,7 +229,6 @@ Status NodeDoneCallback::ProfilingReport() { GELOGD("ProfilingReport of node [%s] model [%s] start.", node->GetName().c_str(), model->GetModelName().c_str()); std::vector task_desc_info; - TaskDescInfo tmp_task_desc_info; auto profiling_ret = GetTaskDescInfo(node, model, task_desc_info); if (profiling_ret != RT_ERROR_NONE) { GELOGE(profiling_ret, "Get task info of node[%s] failed.", node->GetName().c_str()); diff --git a/ge/hybrid/executor/worker/shape_inference_engine.cc b/ge/hybrid/executor/worker/shape_inference_engine.cc index 56ae3ea3..46ee6bd6 100755 --- a/ge/hybrid/executor/worker/shape_inference_engine.cc +++ b/ge/hybrid/executor/worker/shape_inference_engine.cc @@ -68,7 +68,6 @@ Status ShapeInferenceEngine::InferShape(NodeState &node_state) { // Do shape inference GELOGD("[%s] Start to invoke InferShapeAndType", node_item.NodeName().c_str()); { - std::lock_guard lk(mu_); RECORD_SHAPE_INFERENCE_EVENT(execution_context_, node_item.NodeName().c_str(), "[InferShapeAndType] Start"); GE_CHK_STATUS_RET(ShapeRefiner::InferShapeAndTypeForRunning(node_item.node, true), "Invoke InferShapeAndType failed."); diff --git a/ge/hybrid/hybrid_davinci_model.h b/ge/hybrid/hybrid_davinci_model.h index 5349390c..369c732a 100644 --- a/ge/hybrid/hybrid_davinci_model.h +++ b/ge/hybrid/hybrid_davinci_model.h @@ -19,7 +19,7 @@ #include #include "external/ge/ge_api_error_codes.h" -#include "graph/load/new_model_manager/data_inputer.h" +#include "graph/load/model_manager/data_inputer.h" #include "model/ge_root_model.h" namespace ge { diff --git a/ge/hybrid/model/hybrid_model.cc b/ge/hybrid/model/hybrid_model.cc index 91b6a549..7e5d8fe5 100644 --- a/ge/hybrid/model/hybrid_model.cc +++ b/ge/hybrid/model/hybrid_model.cc @@ -17,7 +17,7 @@ #include "hybrid_model.h" #include #include "graph/debug/ge_attr_define.h" -#include "graph/load/new_model_manager/model_utils.h" +#include "graph/load/model_manager/model_utils.h" #include "graph/utils/graph_utils.h" #include "graph/utils/node_utils.h" #include "graph/utils/tensor_utils.h" diff --git a/ge/hybrid/model/hybrid_model.h b/ge/hybrid/model/hybrid_model.h index e521b776..72495cad 100644 --- a/ge/hybrid/model/hybrid_model.h +++ b/ge/hybrid/model/hybrid_model.h @@ -21,8 +21,8 @@ #include #include #include "framework/common/ge_inner_error_codes.h" -#include "graph/load/new_model_manager/data_inputer.h" -#include "graph/load/new_model_manager/task_info/task_info.h" +#include "graph/load/model_manager/data_inputer.h" +#include "graph/load/model_manager/task_info/task_info.h" #include "graph/node.h" #include "hybrid/common/tensor_value.h" #include "hybrid/model/node_item.h" diff --git a/ge/hybrid/model/hybrid_model_builder.cc b/ge/hybrid/model/hybrid_model_builder.cc index d1f61985..861cd30a 100755 --- a/ge/hybrid/model/hybrid_model_builder.cc +++ b/ge/hybrid/model/hybrid_model_builder.cc @@ -20,8 +20,8 @@ #include "graph/ge_context.h" #include "graph/build/memory/var_mem_assign_util.h" #include "graph/debug/ge_attr_define.h" -#include "graph/load/new_model_manager/model_utils.h" -#include "graph/load/new_model_manager/model_manager.h" +#include "graph/load/model_manager/model_utils.h" +#include "graph/load/model_manager/model_manager.h" #include "graph/manager/graph_var_manager.h" #include "graph/manager/host_mem_manager.h" #include "graph/manager/trans_var_data_utils.h" @@ -772,7 +772,12 @@ Status HybridModelBuilder::VarNodeToTensor(const NodePtr &var_node, std::unique_ var_name.c_str(), hybrid_model_.GetSessionId()); - uint8_t *dev_mem = var_manager_->GetVarMemoryAddr(var_logic, RT_MEMORY_HBM); + rtMemType_t memory_type = RT_MEMORY_HBM; + uint32_t mem_type = 0; + if (AttrUtils::GetInt(var_node->GetOpDesc(), ATTR_OUTPUT_MEMORY_TYPE, mem_type) && (mem_type == 1)) { + memory_type = RT_MEMORY_RDMA_HBM; + } + uint8_t *dev_mem = var_manager_->GetVarMemoryAddr(var_logic, memory_type); if (dev_mem == nullptr) { GELOGE(INTERNAL_ERROR, "Failed to copy var %s from device, cant not get " diff --git a/ge/hybrid/model/hybrid_model_builder.h b/ge/hybrid/model/hybrid_model_builder.h index 55a19b6c..045bf3ef 100644 --- a/ge/hybrid/model/hybrid_model_builder.h +++ b/ge/hybrid/model/hybrid_model_builder.h @@ -21,7 +21,7 @@ #include #include #include "framework/common/ge_inner_error_codes.h" -#include "graph/load/new_model_manager/task_info/task_info.h" +#include "graph/load/model_manager/task_info/task_info.h" #include "graph/node.h" #include "hybrid/model/hybrid_model.h" #include "hybrid/model/node_item.h" diff --git a/ge/hybrid/node_executor/aicore/aicore_node_executor.cc b/ge/hybrid/node_executor/aicore/aicore_node_executor.cc index 2abc5b03..a8736154 100755 --- a/ge/hybrid/node_executor/aicore/aicore_node_executor.cc +++ b/ge/hybrid/node_executor/aicore/aicore_node_executor.cc @@ -182,16 +182,8 @@ Status AiCoreNodeTask::ExecuteAsync(TaskContext &context, std::function } RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[AiCoreNodeLaunchKernel] Start"); GE_CHK_STATUS_RET_NOLOG((*it)->LaunchKernel(context.GetStream())); - uint32_t task_id = 0; - uint32_t stream_id = 0; - rtError_t rt_ret = rtGetTaskIdAndStreamID(&task_id, &stream_id); - if (rt_ret != RT_ERROR_NONE) { - GELOGE(rt_ret, "Get task_id and stream_id failed."); - return rt_ret; - } - context.SetTaskId(task_id); - context.SetStreamId(stream_id); - GELOGD("AiCore node[%s] task_id: %u, stream_id: %u.", context.GetNodeName(), task_id, stream_id); + // save profiling data + (void)context.SaveProfilingTaskDescInfo(kTaskTypeAicore, (*it)->GetBlockDim()); RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[AiCoreNodeLaunchKernel] End"); RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[AiCoreNodeLaunchKernel] End"); } diff --git a/ge/hybrid/node_executor/aicore/aicore_op_task.cc b/ge/hybrid/node_executor/aicore/aicore_op_task.cc index 80ea579b..f1bd6466 100644 --- a/ge/hybrid/node_executor/aicore/aicore_op_task.cc +++ b/ge/hybrid/node_executor/aicore/aicore_op_task.cc @@ -19,7 +19,8 @@ #include "framework/common/debug/log.h" #include "hybrid/executor/hybrid_execution_context.h" #include "hybrid/node_executor/aicore/aicore_task_builder.h" -#include "graph/load/new_model_manager/tbe_handle_store.h" +#include "graph/load/model_manager/tbe_handle_store.h" +#include "graph/types.h" using optiling::OpRunInfo; @@ -34,6 +35,23 @@ constexpr char const *kAttrAtomicOpParamSize = "atomic_op_para_size"; Status AiCoreOpTask::Init(const OpDesc &op_desc, const domi::TaskDef &task_def) { GE_CHK_STATUS_RET_NOLOG(InitWithTaskDef(op_desc, task_def)); GE_CHK_STATUS_RET_NOLOG(InitTilingInfo(op_desc)); + + GE_CHECK_LE(op_desc.GetOutputsSize(), static_cast(INT_MAX)); + int outputs_size = static_cast(op_desc.GetOutputsSize()); + + for (int i = 0; i < outputs_size; ++i) { + const GeTensorDescPtr tensor_desc = op_desc.MutableOutputDesc(i); + if (tensor_desc == nullptr) { + GELOGW("Op: %s, Index: %d, Tensor Desc is null", op_desc.GetName().c_str(), i); + continue; + } + + int32_t calc_type = 0; + bool ret = ge::AttrUtils::GetInt(tensor_desc, ATTR_NAME_MEMORY_SIZE_CALC_TYPE, calc_type); + if (ret && (calc_type == static_cast(ge::MemorySizeCalcType::ALWAYS_EMPTY))) { + output_indices_to_skip_.push_back(i); + } + } return SUCCESS; } @@ -221,7 +239,8 @@ Status AiCoreOpTask::CalcTilingInfo(const NodePtr &node, OpRunInfo &tiling_info) } Status AiCoreOpTask::UpdateArgs(TaskContext &task_context) { - size_t expected_arg_count = task_context.NumInputs() + task_context.NumOutputs() + task_context.NumWorkspaces(); + size_t expected_arg_count = task_context.NumInputs() + task_context.NumOutputs() + task_context.NumWorkspaces() + - output_indices_to_skip_.size(); if (tiling_buffer_ != nullptr) { ++expected_arg_count; } @@ -244,6 +263,11 @@ Status AiCoreOpTask::UpdateArgs(TaskContext &task_context) { for (int i = 0; i < task_context.NumOutputs(); ++i) { const auto output = task_context.GetOutput(i); GE_CHECK_NOTNULL(output); + if (find(output_indices_to_skip_.begin(), output_indices_to_skip_.end(), i) != output_indices_to_skip_.end()) { + GELOGD("Node:%s output[%d] is an optional, the address don't need to be saved.", + task_context.GetNodeName(), i); + continue; + } arg_base_[index++] = reinterpret_cast(output->GetData()); } diff --git a/ge/hybrid/node_executor/aicore/aicore_op_task.h b/ge/hybrid/node_executor/aicore/aicore_op_task.h index 5818f384..3f350531 100755 --- a/ge/hybrid/node_executor/aicore/aicore_op_task.h +++ b/ge/hybrid/node_executor/aicore/aicore_op_task.h @@ -48,6 +48,8 @@ class AiCoreOpTask { bool GetClearAtomic() const {return clear_atomic_;} + uint32_t GetBlockDim() const {return block_dim_;} + protected: Status UpdateTilingInfo(TaskContext &context); virtual std::string GetKeyForOpParamSize() const; @@ -70,6 +72,7 @@ class AiCoreOpTask { uint32_t args_size_ = 0; uint32_t block_dim_ = 1; bool clear_atomic_ = true; + std::vector output_indices_to_skip_; }; class AtomicAddrCleanOpTask : public AiCoreOpTask { diff --git a/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc b/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc index 63ce65e9..109939d9 100755 --- a/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc +++ b/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc @@ -18,7 +18,7 @@ #include "framework/common/taskdown_common.h" #include "common/formats/formats.h" #include "aicpu/common/aicpu_task_struct.h" -#include "graph/load/new_model_manager/model_manager.h" +#include "graph/load/model_manager/model_manager.h" #include "graph/utils/node_utils.h" #include "hybrid/executor/hybrid_execution_context.h" #include "hybrid/model/hybrid_model.h" @@ -190,16 +190,8 @@ Status AicpuNodeTaskBase::ExecuteAsync(TaskContext &context, std::function kRdmaReadTypes = { HCOMREMOTEREAD, HCOMREMOTEREFREAD }; +const std::set kRdmaWriteTypes = { HCOMREMOTEWRITE, HCOMREMOTESCATTERWRITE }; +const std::set kRdmaScatterTypes = { HCOMREMOTEREFREAD, HCOMREMOTESCATTERWRITE }; } // namespace -namespace ge { namespace hybrid { REGISTER_NODE_EXECUTOR_BUILDER(NodeExecutorManager::ExecutorType::HCCL, HcclNodeExecutor); @@ -142,11 +144,22 @@ Status RdmaNodeTask::Init(TaskContext &context) { GE_CHECK_NOTNULL(peer_node->GetOpDesc()); remote_index_ = {peer_node->GetOpDesc()->GetId(), out_data_anchor->GetIdx()}; - if (node_item.node->GetType() == HCOMREMOTEREAD) { + if (kRdmaReadTypes.count(node_item.node->GetType()) > 0) { local_index_ = 0; } else { local_index_ = op_desc->GetInputIndexByName("local"); } + int32_t offset_idx = node_item.op_desc->GetInputIndexByName("local_offset"); + if ((offset_idx != -1) && (node_item.op_desc->GetInputDescPtr(offset_idx) != nullptr)) { + skip_flag_ = true; + GE_CHECK_NOTNULL(node_item.node->GetInDataAnchor(offset_idx)); + GE_CHECK_NOTNULL(node_item.node->GetInDataAnchor(offset_idx)->GetPeerOutAnchor()); + GE_CHECK_NOTNULL(node_item.node->GetInDataAnchor(offset_idx)->GetPeerOutAnchor()->GetOwnerNode()); + GE_CHECK_NOTNULL(node_item.node->GetInDataAnchor(offset_idx)->GetPeerOutAnchor()->GetOwnerNode()->GetOpDesc()); + offset_index_ = { + node_item.node->GetInDataAnchor(offset_idx)->GetPeerOutAnchor()->GetOwnerNode()->GetOpDesc()->GetId(), + node_item.node->GetInDataAnchor(offset_idx)->GetPeerOutAnchor()->GetIdx() }; + } return SUCCESS; } @@ -158,8 +171,13 @@ Status RdmaNodeTask::ExtractTensor(TaskContext &context, vectorGetTensor(remote_index_.first, remote_index_.second, remote_tensor)); auto data = reinterpret_cast(remote_tensor.GetData()); if (data == nullptr) { - GELOGE(FAILED, "Tensor data is nullptr."); - return FAILED; + if (kRdmaScatterTypes.count(context.GetNodeItem().NodeType()) > 0) { + GELOGD("data is null, no need to do rdma read/write, node=%s", context.GetNodeName()); + return SUCCESS; + } else { + GELOGE(FAILED, "Tensor data is nullptr."); + return FAILED; + } } auto dims = remote_tensor.GetTensorDesc().GetShape().GetDims(); if (dims.size() != kVarTableDims && dims.back() != kVarTableRowCnt) { @@ -183,30 +201,63 @@ Status RdmaNodeTask::ExtractTensor(TaskContext &context, vector(tensor_buffer.release())))); } + } else if (context.GetNodeItem().NodeType() == HCOMREMOTEREFREAD) { + AllocationAttr attr; + attr.SetMemType(RDMA_HBM); + GE_CHK_STATUS_RET(context.AllocateOutputs(&attr)) } TensorValue *tv; - if (context.GetNodeItem().NodeType() == HCOMREMOTEREAD) { - tv = context.MutableOutput(0); + if (kRdmaReadTypes.count(context.GetNodeItem().NodeType()) > 0) { + tv = context.MutableOutput(local_index_); } else { tv = context.MutableInput(local_index_); } GE_CHECK_NOTNULL(tv); - auto local_addr = reinterpret_cast(reinterpret_cast(tv->MutableData())); auto row_num = dims.front(); addr_infos.resize(row_num); - auto device_len = tv->GetSize() / row_num; - if (device_len <= 0 || device_len > data[kVarTableIdxLen]) { - GELOGE(FAILED, "Local embedding length is out of range."); - return FAILED; - } + if (skip_flag_) { + int32_t offset_idx = context.GetNodeItem().op_desc->GetInputIndexByName("local_offset"); + GE_CHECK_NOTNULL(context.GetNodeItem().op_desc->GetInputDescPtr(offset_idx)); + auto data_type = context.GetNodeItem().op_desc->GetInputDesc(offset_idx).GetDataType(); + + Tensor offset_tensor; + GE_CHK_STATUS_RET(ctx->GetTensor(offset_index_.first, offset_index_.second, offset_tensor)) + if (static_cast(offset_tensor.GetSize() / GetSizeByDataType(data_type)) != row_num) { + GELOGE(PARAM_INVALID, "num of offset and remote addr mismatch, offset size=%zu, remote_addr size=%lld, dtype=%s", + offset_tensor.GetSize(), row_num, TypeUtils::DataTypeToSerialString(data_type).c_str()); + return PARAM_INVALID; + } - for (auto idx = 0; idx < row_num; ++idx) { - FMK_INT64_MULCHECK(idx, kVarTableRowCnt); - auto line_idx = idx * kVarTableRowCnt; - addr_infos[idx] = {static_cast(data[line_idx]), data[line_idx + kVarTableIdxAddr], local_addr, - device_len}; - local_addr += device_len; + auto addr_offset = reinterpret_cast(offset_tensor.GetData()); + GE_CHECK_NOTNULL(addr_offset); + auto base_addr = reinterpret_cast(tv->MutableData()); + GE_CHECK_NOTNULL(base_addr); + + for (auto idx = 0; idx < row_num; idx++) { + FMK_INT64_MULCHECK(idx, kVarTableRowCnt) + auto line_idx = idx * kVarTableRowCnt; + addr_infos[idx] = { static_cast(data[line_idx]), + data[line_idx + kVarTableIdxAddr], + reinterpret_cast(reinterpret_cast(base_addr + addr_offset[idx])), + data[line_idx + kVarTableIdxLen] }; + } + } else { + auto local_addr = reinterpret_cast(reinterpret_cast(tv->MutableData())); + auto device_len = tv->GetSize() / row_num; + if (device_len <= 0 || device_len > data[kVarTableIdxLen]) { + GELOGE(FAILED, "Local embedding length is out of range, expect %lld, but %lld exactly.", + data[kVarTableIdxLen], device_len); + return FAILED; + } + + for (auto idx = 0; idx < row_num; ++idx) { + FMK_INT64_MULCHECK(idx, kVarTableRowCnt) + auto line_idx = idx * kVarTableRowCnt; + addr_infos[idx] = { static_cast(data[line_idx]), data[line_idx + kVarTableIdxAddr], local_addr, + device_len }; + local_addr += device_len; + } } return SUCCESS; @@ -226,6 +277,10 @@ Status RdmaNodeTask::ExecuteAsync(TaskContext &context, std::function do } vector addr_infos; GE_CHK_STATUS_RET(ExtractTensor(context, addr_infos)); + if (addr_infos.empty()) { + done_callback(); + return SUCCESS; + } auto callback = [this](HcclResult status) { if (status != HCCL_SUCCESS) { @@ -235,6 +290,11 @@ Status RdmaNodeTask::ExecuteAsync(TaskContext &context, std::function do this->cond_.notify_all(); GELOGI("rdma callback success."); }; + + std::string executor_type = context.GetNodeItem().NodeType(); + if (kRdmaScatterTypes.count(context.GetNodeItem().NodeType()) > 0) { + executor_type = context.GetNodeItem().NodeType() == HCOMREMOTEREFREAD ? HCOMREMOTEREAD : HCOMREMOTEWRITE; + } HcclResult hccl_ret = HcomExecEnqueueRemoteAccess(context.GetNodeItem().NodeType(), addr_infos, callback); if (hccl_ret != HCCL_SUCCESS) { GELOGE(HCCL_E_INTERNAL, "Call HcomExecInitialize failed, ret: 0x%X", hccl_ret); @@ -262,7 +322,7 @@ Status HcclNodeExecutor::PrepareTask(NodeTask &task, TaskContext &context) const GE_CHK_STATUS_RET(task.Init(context), "hccl node load hccl so failed."); // allocate output mem, output mem or remote read will be calculated when node execute. - if (context.GetNodeItem().NodeType() != HCOMREMOTEREAD) { + if (kRdmaReadTypes.count(context.GetNodeItem().NodeType()) == 0) { GE_CHK_STATUS_RET(context.AllocateOutputs(), "hccl node task allocate output failed."); } @@ -274,7 +334,7 @@ Status HcclNodeExecutor::PrepareTask(NodeTask &task, TaskContext &context) const Status HcclNodeExecutor::LoadTask(const HybridModel &model, const NodePtr &node, shared_ptr &task) const { GELOGI("[%s] HcclNodeExecutor::LoadTask in.", node->GetName().c_str()); GE_CHECK_NOTNULL(node); - if (node->GetType() == HCOMREMOTEREAD || node->GetType() == HCOMREMOTEWRITE) { + if ((kRdmaReadTypes.count(node->GetType()) > 0) || (kRdmaWriteTypes.count(node->GetType()) > 0)) { task = MakeShared(); } else { task = MakeShared(); diff --git a/ge/hybrid/node_executor/hccl/hccl_node_executor.h b/ge/hybrid/node_executor/hccl/hccl_node_executor.h index 07dd848b..873f259f 100644 --- a/ge/hybrid/node_executor/hccl/hccl_node_executor.h +++ b/ge/hybrid/node_executor/hccl/hccl_node_executor.h @@ -55,9 +55,11 @@ class RdmaNodeTask : public NodeTask { private: Status ExtractTensor(TaskContext &context, vector &addr_infos); std::pair remote_index_; + std::pair offset_index_; int32_t local_index_ = 0; std::mutex hccl_mutex_; std::condition_variable cond_; + bool skip_flag_; }; class HcclNodeExecutor : public NodeExecutor { diff --git a/ge/hybrid/node_executor/host_cpu/kernel/assign_kernel.cc b/ge/hybrid/node_executor/host_cpu/kernel/assign_kernel.cc index 01fd391d..d54195d6 100644 --- a/ge/hybrid/node_executor/host_cpu/kernel/assign_kernel.cc +++ b/ge/hybrid/node_executor/host_cpu/kernel/assign_kernel.cc @@ -29,8 +29,6 @@ namespace ge { namespace hybrid { namespace host_cpu { Status AssignKernel::Compute(TaskContext& context) { - GELOGI("[%s] compute begin.", node_->GetName().c_str()); - auto ref_tensor = context.MutableInput(kAssignRefInputIndex); GE_CHECK_NOTNULL(ref_tensor); const auto value_tensor = context.GetInput(kAssignValueInputIndex); @@ -50,7 +48,7 @@ Status AssignKernel::Compute(TaskContext& context) { GE_CHK_STATUS_RET(context.SetOutput(kAssignRefOutputIndex, *ref_tensor), "[%s] Failed to set output.", context.GetNodeName()); - GELOGI("[%s] compute success.", node_->GetName().c_str()); + GELOGD("[%s] compute success.", node_->GetName().c_str()); return SUCCESS; } diff --git a/ge/hybrid/node_executor/host_cpu/kernel/data_kernel.cc b/ge/hybrid/node_executor/host_cpu/kernel/data_kernel.cc new file mode 100644 index 00000000..e34f601a --- /dev/null +++ b/ge/hybrid/node_executor/host_cpu/kernel/data_kernel.cc @@ -0,0 +1,41 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "hybrid/node_executor/host_cpu/kernel/data_kernel.h" +#include "framework/common/debug/ge_log.h" +#include "framework/common/util.h" +#include "hybrid/node_executor/host_cpu/kernel_factory.h" + +namespace { +constexpr size_t kDataInputIndex = 0; +constexpr size_t kDataOutputIndex = 0; +} + +namespace ge { +namespace hybrid { +namespace host_cpu { +Status DataKernel::Compute(TaskContext& context) { + auto input = context.MutableInput(kDataInputIndex); + GE_CHECK_NOTNULL(input); + GE_CHK_STATUS_RET(context.SetOutput(kDataOutputIndex, *input), "[%s] Failed to set output.", context.GetNodeName()) + GELOGD("[%s] compute success.", node_->GetName().c_str()); + return SUCCESS; +} + +REGISTER_KERNEL_CREATOR(Data, DataKernel); +} // namespace host_cpu +} // namespace hybrid +} // namespace ge diff --git a/ge/hybrid/node_executor/host_cpu/kernel/data_kernel.h b/ge/hybrid/node_executor/host_cpu/kernel/data_kernel.h new file mode 100644 index 00000000..ca42d647 --- /dev/null +++ b/ge/hybrid/node_executor/host_cpu/kernel/data_kernel.h @@ -0,0 +1,42 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef GE_HYBRID_HOST_CPU_KERNEL_DATA_KERNEL_H_ +#define GE_HYBRID_HOST_CPU_KERNEL_DATA_KERNEL_H_ + +#include "hybrid/node_executor/host_cpu/kernel/kernel.h" + +namespace ge { +namespace hybrid { +namespace host_cpu { +class DataKernel : public Kernel { + public: + DataKernel(const NodePtr &node) : Kernel(node) {} + ~DataKernel() override = default; + DataKernel &operator=(const DataKernel &op) = delete; + DataKernel(const DataKernel &op) = delete; + + /** + * @brief compute for node_task. + * @return result + */ + Status Compute(TaskContext& context) override; +}; +} // namespace host_cpu +} // namespace hybrid +} // namespace ge + +#endif // GE_HYBRID_HOST_CPU_KERNEL_DATA_KERNEL_H_ diff --git a/ge/hybrid/node_executor/host_cpu/kernel/no_op_kernel.cc b/ge/hybrid/node_executor/host_cpu/kernel/no_op_kernel.cc index ff5a7c6d..b1b4e68c 100644 --- a/ge/hybrid/node_executor/host_cpu/kernel/no_op_kernel.cc +++ b/ge/hybrid/node_executor/host_cpu/kernel/no_op_kernel.cc @@ -23,7 +23,7 @@ namespace ge { namespace hybrid { namespace host_cpu { Status NoOpKernel::Compute(TaskContext& context) { - GELOGI("[%s] no need to compute.", node_->GetName().c_str()); + GELOGD("[%s] no need to compute.", node_->GetName().c_str()); return SUCCESS; } diff --git a/ge/hybrid/node_executor/host_cpu/kernel/random_uniform_kernel.cc b/ge/hybrid/node_executor/host_cpu/kernel/random_uniform_kernel.cc index 37b07e37..52d48821 100755 --- a/ge/hybrid/node_executor/host_cpu/kernel/random_uniform_kernel.cc +++ b/ge/hybrid/node_executor/host_cpu/kernel/random_uniform_kernel.cc @@ -30,8 +30,6 @@ namespace ge { namespace hybrid { namespace host_cpu { Status RandomUniformKernel::Compute(TaskContext& context) { - GELOGI("[%s] compute begin.", node_->GetName().c_str()); - int64_t seed = 0; int64_t seed2 = 0; (void)AttrUtils::GetInt(node_->GetOpDesc(), "seed", seed); @@ -66,7 +64,7 @@ Status RandomUniformKernel::Compute(TaskContext& context) { return UNSUPPORTED; } - GELOGI("[%s] compute success.", node_->GetName().c_str()); + GELOGD("[%s] compute success.", node_->GetName().c_str()); return SUCCESS; } diff --git a/ge/hybrid/node_executor/host_cpu/kernel/variable_kernel.cc b/ge/hybrid/node_executor/host_cpu/kernel/variable_kernel.cc index 2a836458..16738c2a 100644 --- a/ge/hybrid/node_executor/host_cpu/kernel/variable_kernel.cc +++ b/ge/hybrid/node_executor/host_cpu/kernel/variable_kernel.cc @@ -23,8 +23,6 @@ namespace ge { namespace hybrid { namespace host_cpu { Status VariableKernel::Compute(TaskContext& context) { - GELOGI("[%s] compute begin.", node_->GetName().c_str()); - auto tensor = context.GetVariable(node_->GetName()); if (tensor == nullptr) { GELOGE(PARAM_INVALID, "tensor is NULL."); @@ -32,7 +30,7 @@ Status VariableKernel::Compute(TaskContext& context) { } // Constant & Variable Op has and only has one output GE_CHK_STATUS_RET(context.SetOutput(0, *tensor), "[%s] Failed to set output.", context.GetNodeName()); - GELOGI("[%s] compute success.", node_->GetName().c_str()); + GELOGD("[%s] compute success.", node_->GetName().c_str()); return SUCCESS; } diff --git a/ge/hybrid/node_executor/node_executor.cc b/ge/hybrid/node_executor/node_executor.cc index 02427b91..12e98160 100755 --- a/ge/hybrid/node_executor/node_executor.cc +++ b/ge/hybrid/node_executor/node_executor.cc @@ -38,7 +38,6 @@ const char *const kEngineNameHostCpu = "DNN_VM_HOST_CPU_OP_STORE"; } Status NodeExecutor::PrepareTask(NodeTask &task, TaskContext &context) const { GE_CHK_STATUS_RET_NOLOG(context.AllocateOutputs()); - GE_CHK_STATUS_RET_NOLOG(task.UpdateTilingData(context)); // update op_desc before alloc ws GE_CHK_STATUS_RET_NOLOG(context.AllocateWorkspaces()); GE_CHK_STATUS_RET_NOLOG(task.UpdateArgs(context)); return SUCCESS; diff --git a/ge/hybrid/node_executor/task_context.cc b/ge/hybrid/node_executor/task_context.cc index 6488fbbe..e89ad874 100644 --- a/ge/hybrid/node_executor/task_context.cc +++ b/ge/hybrid/node_executor/task_context.cc @@ -18,9 +18,11 @@ #include "framework/common/ge_inner_error_codes.h" #include "framework/common/debug/log.h" #include "graph/utils/tensor_utils.h" +#include "graph/types.h" #include "graph/debug/ge_attr_define.h" #include "hybrid/executor/hybrid_execution_context.h" #include "hybrid/executor/subgraph_executor.h" +#include "common/profiling/profiling_manager.h" namespace ge { namespace hybrid { @@ -212,6 +214,13 @@ Status TaskContext::AllocateOutput(int index, return SUCCESS; } + int32_t calc_type = 0; + bool ret = ge::AttrUtils::GetInt(tensor_desc, ATTR_NAME_MEMORY_SIZE_CALC_TYPE, calc_type); + if (ret && (calc_type == static_cast(ge::MemorySizeCalcType::ALWAYS_EMPTY))) { + outputs_start_[index] = TensorValue(); + return SUCCESS; + } + auto it = node_item_->ref_outputs.find(index); if (it != node_item_->ref_outputs.end()) { auto &ref_node = it->second; @@ -498,5 +507,42 @@ bool TaskContext::NeedCallback() { Status TaskContext::Synchronize() { return execution_context_->Synchronize(GetStream()); } + +Status TaskContext::SaveProfilingTaskDescInfo(uint32_t task_type, uint32_t block_dim) { + if (ProfilingManager::Instance().ProfilingModelExecuteOn()) { + const NodeItem &node_item = GetNodeItem(); + auto op_desc = node_item.GetOpDesc(); + GE_CHECK_NOTNULL(op_desc); + + uint32_t task_id = 0; + uint32_t stream_id = 0; + rtError_t rt_ret = rtGetTaskIdAndStreamID(&task_id, &stream_id); // must be called after Launch kernel + if (rt_ret != RT_ERROR_NONE) { + GELOGE(rt_ret, "Get task_id and stream_id failed."); + return rt_ret; + } + GELOGD("Node[%s] task_id: %u, stream_id: %u.", GetNodeName(), task_id, stream_id); + + const GraphExecutionContext * graph_context = GetExecutionContext(); + GE_CHECK_NOTNULL(graph_context); + const HybridModel *model = graph_context->model; + GE_CHECK_NOTNULL(model); + + std::string op_name = op_desc->GetName(); + std::string dynamic_model_name = model->GetModelName(); + TaskDescInfo tmp_task_desc_info; + tmp_task_desc_info.model_name = dynamic_model_name; + tmp_task_desc_info.op_name = op_name; + tmp_task_desc_info.block_dim = block_dim; + tmp_task_desc_info.task_type = task_type; + tmp_task_desc_info.task_id = task_id; + tmp_task_desc_info.stream_id = stream_id; + tmp_task_desc_info.shape_type = "dynamic"; + tmp_task_desc_info.cur_iter_num = iteration_ + 1; + task_desc_info.emplace_back(tmp_task_desc_info); + } + + return SUCCESS; +} } // namespace hybrid } // namespace ge diff --git a/ge/hybrid/node_executor/task_context.h b/ge/hybrid/node_executor/task_context.h index 6a4bcb8c..9a668f8c 100644 --- a/ge/hybrid/node_executor/task_context.h +++ b/ge/hybrid/node_executor/task_context.h @@ -22,6 +22,7 @@ #include #include "common/properties_manager.h" #include "external/ge/ge_api_error_codes.h" +#include "framework/common/ge_types.h" #include "hybrid/common/tensor_value.h" #include "hybrid/common/npu_memory_allocator.h" #include "hybrid/executor/rt_callback_manager.h" @@ -108,6 +109,10 @@ class TaskContext { void SetForceInferShape(bool force_infer_shape); void *handle_ = nullptr; + const std::vector& GetProfilingTaskDescInfo() const { return task_desc_info; } + Status SaveProfilingTaskDescInfo(uint32_t task_type, uint32_t block_dim); + void ClearProfilingTaskDescInfo() { task_desc_info.clear(); } + private: TaskContext(GraphExecutionContext *execution_context, const NodeItem *node_item, @@ -127,6 +132,7 @@ class TaskContext { uint64_t iteration_ = 0; uint32_t task_id_ = 0; uint32_t stream_id_ = 0; + std::vector task_desc_info; }; } // namespace hybrid } // namespace ge diff --git a/ge/init/gelib.cc b/ge/init/gelib.cc index b81632bd..1a97b6f8 100755 --- a/ge/init/gelib.cc +++ b/ge/init/gelib.cc @@ -37,7 +37,7 @@ #include "graph/common/ge_call_wrapper.h" #include "graph/ge_context.h" #include "graph/ge_global_options.h" -#include "graph/load/new_model_manager/model_manager.h" +#include "graph/load/model_manager/model_manager.h" #include "graph/manager/graph_mem_allocator.h" #include "graph/manager/host_mem_manager.h" #include "graph/manager/graph_var_manager.h" diff --git a/ge/offline/main.cc b/ge/offline/main.cc index dc299ed7..363f9cda 100755 --- a/ge/offline/main.cc +++ b/ge/offline/main.cc @@ -292,11 +292,14 @@ class GFlagUtils { " --enable_small_channel Set enable small channel. 0(default): disable; 1: enable\n" " --enable_compress_weight Enable compress weight. true: enable; false(default): disable\n" " --compress_weight_conf Config file to compress weight\n" - " --buffer_optimize Set buffer optimize. \"l2_optimize\" (default). Set \"off_optimize\" to close\n" + " --buffer_optimize Set buffer optimize. Support \"l2_optimize\" (default), " + "\"l1_optimize\", \"off_optimize\"\n" " --mdl_bank_path Set the path of the custom repository generated after model tuning.\n" "\n[Operator Tuning]\n" " --precision_mode precision mode, support force_fp16(default), allow_mix_precision, " "allow_fp32_to_fp16, must_keep_origin_dtype.\n" + " --keep_dtype Retains the precision of certain operators in inference " + "scenarios by using a configuration file.\n" " --auto_tune_mode Set tune mode. E.g.: \"GA,RL\", support configure multiple, spit by ,\n" " --op_bank_path Set the path of the custom repository generated after operator tuning with Auto Tune.\n" " --op_select_implmode Set op select implmode. Support high_precision, high_performance. " diff --git a/ge/session/inner_session.cc b/ge/session/inner_session.cc index c4f8a53b..5a67f7cd 100755 --- a/ge/session/inner_session.cc +++ b/ge/session/inner_session.cc @@ -29,7 +29,7 @@ #include "graph/ge_global_options.h" #include "graph/ge_local_context.h" #include "graph/common/local_context.h" -#include "graph/load/new_model_manager/model_manager.h" +#include "graph/load/model_manager/model_manager.h" #include "graph/manager/graph_var_manager.h" #include "graph/utils/tensor_adapter.h" #include "runtime/mem.h" diff --git a/ge/session/session_manager.cc b/ge/session/session_manager.cc index 5d5a299a..3c531747 100755 --- a/ge/session/session_manager.cc +++ b/ge/session/session_manager.cc @@ -20,7 +20,7 @@ #include "common/ge/ge_util.h" #include "framework/common/debug/ge_log.h" #include "graph/ge_context.h" -#include "graph/load/new_model_manager/model_manager.h" +#include "graph/load/model_manager/model_manager.h" #include "graph/manager/util/rt_context_util.h" using std::map; diff --git a/ge/single_op/single_op.cc b/ge/single_op/single_op.cc index 1f3fc5c5..2fa7182b 100755 --- a/ge/single_op/single_op.cc +++ b/ge/single_op/single_op.cc @@ -22,11 +22,11 @@ #include "common/profiling/profiling_manager.h" #include "framework/common/debug/ge_log.h" #include "framework/common/util.h" -#include "graph/load/new_model_manager/model_utils.h" +#include "graph/load/model_manager/model_utils.h" #include "runtime/mem.h" #include "single_op/single_op_manager.h" #include "single_op/task/build_task_utils.h" -#include "graph/load/new_model_manager/model_manager.h" +#include "graph/load/model_manager/model_manager.h" namespace ge { namespace { @@ -70,6 +70,7 @@ Status ProfilingTaskInfo(OpTask *op_task, const string &shape_type) { tmp_task_desc_info.stream_id = stream_id; tmp_task_desc_info.shape_type = shape_type; tmp_task_desc_info.cur_iter_num = 0; + tmp_task_desc_info.task_type = op_task->GetTaskType(); GELOGD("GetTaskDescInfo of op [%s] end, task_id[%u], stream_id[%u]", op_name.c_str(), task_id, stream_id); task_desc_info.emplace_back(tmp_task_desc_info); diff --git a/ge/single_op/single_op_model.cc b/ge/single_op/single_op_model.cc index 2a1a14e6..220adde8 100755 --- a/ge/single_op/single_op_model.cc +++ b/ge/single_op/single_op_model.cc @@ -23,7 +23,7 @@ #include "framework/common/debug/ge_log.h" #include "graph/debug/ge_attr_define.h" -#include "graph/load/new_model_manager/model_utils.h" +#include "graph/load/model_manager/model_utils.h" #include "graph/utils/attr_utils.h" #include "graph/utils/graph_utils.h" #include "graph/utils/tensor_utils.h" diff --git a/ge/single_op/single_op_model.h b/ge/single_op/single_op_model.h index 6d0109fe..6637271c 100755 --- a/ge/single_op/single_op_model.h +++ b/ge/single_op/single_op_model.h @@ -24,7 +24,7 @@ #include #include "common/helper/model_helper.h" -#include "graph/load/new_model_manager/davinci_model_parser.h" +#include "graph/load/model_manager/davinci_model_parser.h" #include "single_op/single_op.h" #include "single_op/stream_resource.h" diff --git a/ge/single_op/task/aicpu_kernel_task_builder.cc b/ge/single_op/task/aicpu_kernel_task_builder.cc index 34f1ba7b..6580ea31 100755 --- a/ge/single_op/task/aicpu_kernel_task_builder.cc +++ b/ge/single_op/task/aicpu_kernel_task_builder.cc @@ -16,7 +16,7 @@ #include "single_op/task/aicpu_kernel_task_builder.h" #include "framework/common/taskdown_common.h" -#include "graph/load/new_model_manager/model_manager.h" +#include "graph/load/model_manager/model_manager.h" #include "build_task_utils.h" namespace ge { @@ -109,6 +109,10 @@ Status AiCpuCCTaskBuilder::BuildTask(AiCpuCCTask &task, uint64_t kernel_id, cons aicpu_param_head->extInfoAddr = reinterpret_cast(task.ext_info_addr_dev_); } + task.op_type_ = op_desc_->GetName(); + task.kernel_id_ = kernel_id; + auto debug_info = BuildTaskUtils::GetTaskInfo(op_desc_); + GELOGI("[TASK_INFO] %lu/%s %s", kernel_id, task.op_type_.c_str(), debug_info.c_str()); return SUCCESS; } } // namespace ge \ No newline at end of file diff --git a/ge/single_op/task/aicpu_task_builder.cc b/ge/single_op/task/aicpu_task_builder.cc index 5fd4879e..90ddc696 100755 --- a/ge/single_op/task/aicpu_task_builder.cc +++ b/ge/single_op/task/aicpu_task_builder.cc @@ -19,8 +19,8 @@ #include "single_op/task/build_task_utils.h" #include "runtime/mem.h" #include "framework/common/debug/ge_log.h" -#include "graph/load/new_model_manager/model_utils.h" -#include "graph/load/new_model_manager/model_manager.h" +#include "graph/load/model_manager/model_utils.h" +#include "graph/load/model_manager/model_manager.h" namespace ge { AiCpuTaskBuilder::AiCpuTaskBuilder(const OpDescPtr &op_desc, const domi::KernelExDef &kernel_def) @@ -123,7 +123,7 @@ namespace ge { task.kernel_id_ = kernel_id; auto debug_info = BuildTaskUtils::GetTaskInfo(op_desc_); - GELOGI("[TASK_INFO] %s/%s %s", std::to_string(kernel_id).c_str(), task.op_type_.c_str(), debug_info.c_str()); + GELOGI("[TASK_INFO] %lu/%s %s", kernel_id, task.op_type_.c_str(), debug_info.c_str()); return SUCCESS; } } // namespace ge diff --git a/ge/single_op/task/build_task_utils.cc b/ge/single_op/task/build_task_utils.cc index 071e514b..9e4d55e1 100644 --- a/ge/single_op/task/build_task_utils.cc +++ b/ge/single_op/task/build_task_utils.cc @@ -17,7 +17,7 @@ #include "single_op/task/build_task_utils.h" #include "runtime/rt.h" -#include "graph/load/new_model_manager/model_utils.h" +#include "graph/load/model_manager/model_utils.h" #include "graph/manager/graph_var_manager.h" #include "graph/utils/type_utils.h" #include "framework/common/debug/ge_log.h" diff --git a/ge/single_op/task/op_task.cc b/ge/single_op/task/op_task.cc index 51c3e845..1772ca88 100755 --- a/ge/single_op/task/op_task.cc +++ b/ge/single_op/task/op_task.cc @@ -145,6 +145,8 @@ Status OpTask::LaunchKernel(const vector &input_desc, return UNSUPPORTED; } +uint32_t OpTask::GetTaskType() const { return kTaskTypeInvalid; } + TbeOpTask::~TbeOpTask() { if (sm_desc_ != nullptr) { (void)rtMemFreeManaged(sm_desc_); @@ -161,6 +163,8 @@ size_t TbeOpTask::GetArgSize() const { return arg_size_; } const std::string &TbeOpTask::GetStubName() const { return stub_name_; } +uint32_t TbeOpTask::GetTaskType() const { return kTaskTypeAicore; } + Status TbeOpTask::LaunchKernel(rtStream_t stream) { GELOGD("To invoke rtKernelLaunch. task = %s, block_dim = %u", this->stub_name_.c_str(), block_dim_); auto *sm_desc = reinterpret_cast(sm_desc_); @@ -567,7 +571,7 @@ Status AiCpuTask::LaunchKernel(rtStream_t stream) { GELOGE(RT_FAILED, "Invoke rtKernelLaunch failed. ret = %d, task = %s", ret, this->op_type_.c_str()); return RT_FAILED; } - GELOGI("[TASK_INFO] %s/%s", std::to_string(kernel_id_).c_str(), op_type_.c_str()); + GELOGI("[TASK_INFO] %lu/%s", kernel_id_, op_type_.c_str()); auto status = OpenDump(stream); if (status != SUCCESS) { @@ -802,6 +806,8 @@ Status AiCpuBaseTask::UpdateArgTable(const SingleOpModelParam ¶m) { return DoUpdateArgTable(param, false); } +uint32_t AiCpuBaseTask::GetTaskType() const { return kTaskTypeAicpu; } + void AiCpuTask::GetIoAddr(uintptr_t *&arg_base, size_t &arg_count) { arg_base = reinterpret_cast(io_addr_host_.data()); arg_count = io_addr_host_.size(); @@ -840,6 +846,7 @@ Status AiCpuCCTask::LaunchKernel(rtStream_t stream) { GELOGE(ret, "Invoke rtCpuKernelLaunch failed. ret = %d", ret); return ret; } + GELOGI("[TASK_INFO] %lu/%s", kernel_id_, op_type_.c_str()); GELOGD("Invoke rtCpuKernelLaunch succeeded"); auto status = OpenDump(stream); if (status != SUCCESS) { diff --git a/ge/single_op/task/op_task.h b/ge/single_op/task/op_task.h index bf78557c..78e1f6f0 100644 --- a/ge/single_op/task/op_task.h +++ b/ge/single_op/task/op_task.h @@ -52,6 +52,7 @@ class OpTask { std::vector &output_desc, std::vector &output_buffers, rtStream_t stream); + virtual uint32_t GetTaskType() const; protected: Status DoUpdateArgTable(const SingleOpModelParam ¶m, bool keep_workspace); @@ -85,6 +86,7 @@ class TbeOpTask : public OpTask { size_t GetArgSize() const; const std::string &GetStubName() const; void EnableDynamicSupport(const NodePtr &node, void *tiling_buffer, size_t max_tiling_size); + uint32_t GetTaskType() const override; private: friend class SingleOpModel; @@ -113,6 +115,8 @@ class AiCpuBaseTask : public OpTask { ~AiCpuBaseTask() override; UnknowShapeOpType GetUnknownType() const { return unknown_type_; } Status UpdateArgTable(const SingleOpModelParam ¶m) override; + uint32_t GetTaskType() const override; + protected: Status UpdateIoAddr(const std::vector &inputs, const std::vector &outputs); Status SetInputConst(); @@ -227,6 +231,8 @@ private: size_t io_addr_num_ = 0; bool is_custom_ = false; uint32_t dump_flag_ = RT_KERNEL_DEFAULT; + std::string op_type_; + uint64_t kernel_id_ = 0; }; } // namespace ge diff --git a/ge/single_op/task/tbe_task_builder.cc b/ge/single_op/task/tbe_task_builder.cc index 594352aa..9ba30b8e 100644 --- a/ge/single_op/task/tbe_task_builder.cc +++ b/ge/single_op/task/tbe_task_builder.cc @@ -20,7 +20,7 @@ #include #include "graph/debug/ge_attr_define.h" -#include "graph/load/new_model_manager/model_utils.h" +#include "graph/load/model_manager/model_utils.h" #include "graph/manager/graph_var_manager.h" #include "runtime/rt.h" #include "single_op/task/build_task_utils.h" diff --git a/inc/framework/common/ge_types.h b/inc/framework/common/ge_types.h index f7e6d679..9ca77f1c 100644 --- a/inc/framework/common/ge_types.h +++ b/inc/framework/common/ge_types.h @@ -41,12 +41,7 @@ enum FrameworkType { }; const std::map kFwkTypeToStr = { - {"0", "Caffe"}, - {"1", "MindSpore"}, - {"3", "TensorFlow"}, - {"4", "Android_NN"}, - {"5", "Onnx"} -}; + {"0", "Caffe"}, {"1", "MindSpore"}, {"3", "TensorFlow"}, {"4", "Android_NN"}, {"5", "Onnx"}}; enum OpEngineType { ENGINE_SYS = 0, // default engine @@ -61,6 +56,11 @@ enum InputAippType { DATA_WITHOUT_AIPP = 0, DATA_WITH_STATIC_AIPP, DATA_WITH_DYN const char *const GE_ENGINE_ATTR_MEM_TYPE_HBM = "HBM"; const char *const GE_OPTION_EXEC_PLACEMENT = "ge.exec.placement"; +// profiling data +const uint32_t kTaskTypeAicore = 0; +const uint32_t kTaskTypeAicpu = 1; +const uint32_t kTaskTypeInvalid = 0xFFFF; + // Data cache, including data address and length struct DataBuffer { public: @@ -256,6 +256,7 @@ struct TaskDescInfo { uint32_t stream_id; std::string shape_type; int64_t cur_iter_num; + uint32_t task_type; }; // Profiling info of graph diff --git a/inc/framework/common/types.h b/inc/framework/common/types.h index 4d4c54d1..2dbb1753 100644 --- a/inc/framework/common/types.h +++ b/inc/framework/common/types.h @@ -437,6 +437,7 @@ REGISTER_OPTYPE_DECLARE(HCOMRECEIVE, "HcomReceive"); REGISTER_OPTYPE_DECLARE(HCOMREMOTEREAD, "HcomRemoteRead"); REGISTER_OPTYPE_DECLARE(HCOMREMOTEREFREAD, "HcomRemoteRefRead"); REGISTER_OPTYPE_DECLARE(HCOMREMOTEWRITE, "HcomRemoteWrite"); +REGISTER_OPTYPE_DECLARE(HCOMREMOTESCATTERWRITE, "HcomRemoteScatterWrite"); REGISTER_OPTYPE_DECLARE(VARASSIGN, "VarAssign"); REGISTER_OPTYPE_DECLARE(VARISINITIALIZEDOP, "VarIsInitializedOp"); diff --git a/inc/framework/omg/parser/parser_types.h b/inc/framework/omg/parser/parser_types.h index 62c9c750..f3b7f00a 100644 --- a/inc/framework/omg/parser/parser_types.h +++ b/inc/framework/omg/parser/parser_types.h @@ -238,8 +238,8 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *SOFTSIGN; FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *COSH; FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *SINH; FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *SQUAREDDIFFERENCE; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char - *REQUIREDSPACETOBATCHPADDINGS; // for retinanet scope fusion +// for retinanet scope fusion +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *REQUIREDSPACETOBATCHPADDINGS; FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *SSDPOSTPROCESSOR; FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *RETINANETBOXES; FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *RETINAMULTIANCHORS; @@ -370,7 +370,9 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *HCOMREDUCESC FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *HCOMSEND; FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *HCOMRECEIVE; FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *HCOMREMOTEREAD; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *HCOMREMOTEREFREAD; FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *HCOMREMOTEWRITE; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *HCOMREMOTESCATTERWRITE; FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *VARASSIGN; FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *VARISINITIALIZEDOP; diff --git a/metadef b/metadef index dc6cceb6..88d053a5 160000 --- a/metadef +++ b/metadef @@ -1 +1 @@ -Subproject commit dc6cceb67bc82b567bcbd6f415776644253e1467 +Subproject commit 88d053a5f94c40ff21620cef50b87075d5054292 diff --git a/parser b/parser index 4e72aae4..6904ba94 160000 --- a/parser +++ b/parser @@ -1 +1 @@ -Subproject commit 4e72aae41e78af1a19cd965da4a45cbd988b9a75 +Subproject commit 6904ba9488658afc30076d299183fc8875045f49 diff --git a/tests/depends/mmpa/src/mmpa_stub.cc b/tests/depends/mmpa/src/mmpa_stub.cc index 17a0c8e4..de09c52c 100644 --- a/tests/depends/mmpa/src/mmpa_stub.cc +++ b/tests/depends/mmpa/src/mmpa_stub.cc @@ -272,3 +272,8 @@ VOID *mmDlsym(VOID *handle, const CHAR *funcName) { return NULL; } + +INT32 mmGetPid() +{ + return (INT32)getpid(); +} diff --git a/tests/ut/ge/CMakeLists.txt b/tests/ut/ge/CMakeLists.txt index 18614c16..abff433c 100755 --- a/tests/ut/ge/CMakeLists.txt +++ b/tests/ut/ge/CMakeLists.txt @@ -132,7 +132,7 @@ set(COMMON_SRC_FILES "${GE_CODE_DIR}/ge/opskernel_manager/ops_kernel_manager.cc" "${GE_CODE_DIR}/ge/session/session_manager.cc" "${GE_CODE_DIR}/ge/opskernel_manager/ops_kernel_builder_manager.cc" - "${GE_CODE_DIR}/ge/graph/load/new_model_manager/model_manager.cc" + "${GE_CODE_DIR}/ge/graph/load/model_manager/model_manager.cc" "${GE_CODE_DIR}/ge/common/profiling/profiling_manager.cc" "${GE_CODE_DIR}/ge/graph/manager/host_mem_manager.cc" "${GE_CODE_DIR}/ge/session/inner_session.cc" @@ -140,15 +140,15 @@ set(COMMON_SRC_FILES "${GE_CODE_DIR}/ge/graph/execute/graph_execute.cc" "${GE_CODE_DIR}/ge/graph/preprocess/graph_preprocess.cc" "${GE_CODE_DIR}/ge/hybrid/hybrid_davinci_model_stub.cc" - "${GE_CODE_DIR}/ge/graph/load/new_model_manager/davinci_model.cc" - "${GE_CODE_DIR}/ge/graph/load/new_model_manager/data_inputer.cc" + "${GE_CODE_DIR}/ge/graph/load/model_manager/davinci_model.cc" + "${GE_CODE_DIR}/ge/graph/load/model_manager/data_inputer.cc" "${GE_CODE_DIR}/ge/common/dump/dump_properties.cc" "${GE_CODE_DIR}/ge/common/helper/model_helper.cc" "${GE_CODE_DIR}/ge/common/dump/dump_manager.cc" "${GE_CODE_DIR}/ge/common/helper/om_file_helper.cc" "${GE_CODE_DIR}/ge/model/ge_root_model.cc" "${GE_CODE_DIR}/ge/common/model_parser/base.cc" - "${GE_CODE_DIR}/ge/graph/load/new_model_manager/data_dumper.cc" + "${GE_CODE_DIR}/ge/graph/load/model_manager/data_dumper.cc" "${GE_CODE_DIR}/ge/graph/manager/graph_manager.cc" "${GE_CODE_DIR}/ge/common/dump/dump_server.cc" "${GE_CODE_DIR}/ge/graph/preprocess/insert_op/util_insert_aipp_op.cc" @@ -178,6 +178,7 @@ set(COMMON_SRC_FILES "${GE_CODE_DIR}/ge/graph/passes/net_output_pass.cc" "${GE_CODE_DIR}/ge/graph/passes/replace_transshape_pass.cc" "${GE_CODE_DIR}/ge/graph/passes/constant_fuse_same_pass.cc" + "${GE_CODE_DIR}/ge/graph/passes/fuse_data_nodes_with_common_input_pass.cc" "${GE_CODE_DIR}/ge/graph/passes/print_op_pass.cc" "${GE_CODE_DIR}/ge/graph/passes/no_use_reshape_remove_pass.cc" "${GE_CODE_DIR}/ge/graph/passes/iterator_op_pass.cc" @@ -253,13 +254,13 @@ set(COMMON_SRC_FILES "${GE_CODE_DIR}/ge/graph/passes/useless_control_out_remove_pass.cc" "${GE_CODE_DIR}/ge/model/ge_model.cc" "${GE_CODE_DIR}/ge/common/cust_aicpu_kernel_store.cc" - "${GE_CODE_DIR}/ge/graph/load/new_model_manager/model_utils.cc" - "${GE_CODE_DIR}/ge/graph/load/new_model_manager/zero_copy_offset.cc" - "${GE_CODE_DIR}/ge/graph/load/new_model_manager/zero_copy_task.cc" - "${GE_CODE_DIR}/ge/graph/load/new_model_manager/cpu_queue_schedule.cc" - "${GE_CODE_DIR}/ge/graph/load/new_model_manager/aipp_utils.cc" + "${GE_CODE_DIR}/ge/graph/load/model_manager/model_utils.cc" + "${GE_CODE_DIR}/ge/graph/load/model_manager/zero_copy_offset.cc" + "${GE_CODE_DIR}/ge/graph/load/model_manager/zero_copy_task.cc" + "${GE_CODE_DIR}/ge/graph/load/model_manager/cpu_queue_schedule.cc" + "${GE_CODE_DIR}/ge/graph/load/model_manager/aipp_utils.cc" "${GE_CODE_DIR}/ge/omm/csa_interact.cc" - "${GE_CODE_DIR}/ge/graph/load/new_model_manager/tbe_handle_store.cc" + "${GE_CODE_DIR}/ge/graph/load/model_manager/tbe_handle_store.cc" "${GE_CODE_DIR}/ge/common/kernel_store.cc" "${GE_CODE_DIR}/ge/common/tbe_kernel_store.cc" "${GE_CODE_DIR}/ge/common/auth/file_saver.cc" @@ -385,32 +386,32 @@ set(DISTINCT_GRAPH_LOAD_SRC_FILES "${GE_CODE_DIR}/ge/common/model_parser/base.cc" "${GE_CODE_DIR}/ge/common/tbe_kernel_store.cc" "${GE_CODE_DIR}/ge/common/util.cc" - "${GE_CODE_DIR}/ge/graph/load/new_model_manager/cpu_queue_schedule.cc" - "${GE_CODE_DIR}/ge/graph/load/new_model_manager/data_dumper.cc" - "${GE_CODE_DIR}/ge/graph/load/new_model_manager/data_inputer.cc" - "${GE_CODE_DIR}/ge/graph/load/new_model_manager/davinci_model.cc" - "${GE_CODE_DIR}/ge/graph/load/new_model_manager/davinci_model_parser.cc" - "${GE_CODE_DIR}/ge/graph/load/new_model_manager/model_manager.cc" - "${GE_CODE_DIR}/ge/graph/load/new_model_manager/model_utils.cc" - "${GE_CODE_DIR}/ge/graph/load/new_model_manager/tbe_handle_store.cc" - "${GE_CODE_DIR}/ge/graph/load/new_model_manager/task_info/task_info.cc" - "${GE_CODE_DIR}/ge/graph/load/new_model_manager/task_info/event_record_task_info.cc" - "${GE_CODE_DIR}/ge/graph/load/new_model_manager/task_info/event_wait_task_info.cc" - "${GE_CODE_DIR}/ge/graph/load/new_model_manager/task_info/fusion_start_task_info.cc" - "${GE_CODE_DIR}/ge/graph/load/new_model_manager/task_info/fusion_stop_task_info.cc" - "${GE_CODE_DIR}/ge/graph/load/new_model_manager/task_info/hccl_task_info.cc" - "${GE_CODE_DIR}/ge/graph/load/new_model_manager/task_info/kernel_ex_task_info.cc" - "${GE_CODE_DIR}/ge/graph/load/new_model_manager/task_info/kernel_task_info.cc" - "${GE_CODE_DIR}/ge/graph/load/new_model_manager/task_info/label_set_task_info.cc" - "${GE_CODE_DIR}/ge/graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.cc" - "${GE_CODE_DIR}/ge/graph/load/new_model_manager/task_info/memcpy_async_task_info.cc" - "${GE_CODE_DIR}/ge/graph/load/new_model_manager/task_info/profiler_trace_task_info.cc" - "${GE_CODE_DIR}/ge/graph/load/new_model_manager/task_info/stream_active_task_info.cc" - "${GE_CODE_DIR}/ge/graph/load/new_model_manager/task_info/stream_switch_task_info.cc" - "${GE_CODE_DIR}/ge/graph/load/new_model_manager/task_info/end_graph_task_info.cc" - "${GE_CODE_DIR}/ge/graph/load/new_model_manager/task_info/model_exit_task_info.cc" - "${GE_CODE_DIR}/ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc" - "${GE_CODE_DIR}/ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc" + "${GE_CODE_DIR}/ge/graph/load/model_manager/cpu_queue_schedule.cc" + "${GE_CODE_DIR}/ge/graph/load/model_manager/data_dumper.cc" + "${GE_CODE_DIR}/ge/graph/load/model_manager/data_inputer.cc" + "${GE_CODE_DIR}/ge/graph/load/model_manager/davinci_model.cc" + "${GE_CODE_DIR}/ge/graph/load/model_manager/davinci_model_parser.cc" + "${GE_CODE_DIR}/ge/graph/load/model_manager/model_manager.cc" + "${GE_CODE_DIR}/ge/graph/load/model_manager/model_utils.cc" + "${GE_CODE_DIR}/ge/graph/load/model_manager/tbe_handle_store.cc" + "${GE_CODE_DIR}/ge/graph/load/model_manager/task_info/task_info.cc" + "${GE_CODE_DIR}/ge/graph/load/model_manager/task_info/event_record_task_info.cc" + "${GE_CODE_DIR}/ge/graph/load/model_manager/task_info/event_wait_task_info.cc" + "${GE_CODE_DIR}/ge/graph/load/model_manager/task_info/fusion_start_task_info.cc" + "${GE_CODE_DIR}/ge/graph/load/model_manager/task_info/fusion_stop_task_info.cc" + "${GE_CODE_DIR}/ge/graph/load/model_manager/task_info/hccl_task_info.cc" + "${GE_CODE_DIR}/ge/graph/load/model_manager/task_info/kernel_ex_task_info.cc" + "${GE_CODE_DIR}/ge/graph/load/model_manager/task_info/kernel_task_info.cc" + "${GE_CODE_DIR}/ge/graph/load/model_manager/task_info/label_set_task_info.cc" + "${GE_CODE_DIR}/ge/graph/load/model_manager/task_info/memcpy_addr_async_task_info.cc" + "${GE_CODE_DIR}/ge/graph/load/model_manager/task_info/memcpy_async_task_info.cc" + "${GE_CODE_DIR}/ge/graph/load/model_manager/task_info/profiler_trace_task_info.cc" + "${GE_CODE_DIR}/ge/graph/load/model_manager/task_info/stream_active_task_info.cc" + "${GE_CODE_DIR}/ge/graph/load/model_manager/task_info/stream_switch_task_info.cc" + "${GE_CODE_DIR}/ge/graph/load/model_manager/task_info/end_graph_task_info.cc" + "${GE_CODE_DIR}/ge/graph/load/model_manager/task_info/model_exit_task_info.cc" + "${GE_CODE_DIR}/ge/graph/load/model_manager/task_info/super_kernel/super_kernel.cc" + "${GE_CODE_DIR}/ge/graph/load/model_manager/task_info/super_kernel/super_kernel_factory.cc" "${GE_CODE_DIR}/ge/model/ge_model.cc" "${GE_CODE_DIR}/ge/common/helper/om_file_helper.cc" "${GE_CODE_DIR}/ge/common/debug/memory_dumper.cc" @@ -588,6 +589,7 @@ set(DISTINCT_GRAPH_LOAD_TEST_FILES #"graph/graph_load_unittest.cc" "graph/ge_executor_unittest.cc" "graph/load/model_helper_unittest.cc" + "graph/load/model_utils_unittest.cc" ) set(PASS_TEST_FILES @@ -616,6 +618,7 @@ set(PASS_TEST_FILES "graph/passes/trans_op_depth_fusion_pass_unittest.cc" "graph/passes/transop_nearby_allreduce_fusion_pass_unittest.cc" "graph/passes/constant_folding_pass_unittest.cc" + "graph/passes/fuse_data_nodes_with_common_input_pass_unittest.cc" "graph/passes/stop_gradient_pass_unittest.cc" "graph/passes/prevent_gradient_pass_unittest.cc" "graph/passes/identity_pass_unittest.cc" diff --git a/tests/ut/ge/graph/ge_executor_unittest.cc b/tests/ut/ge/graph/ge_executor_unittest.cc index 3d04fd0c..3ef8a750 100644 --- a/tests/ut/ge/graph/ge_executor_unittest.cc +++ b/tests/ut/ge/graph/ge_executor_unittest.cc @@ -33,11 +33,11 @@ #include "common/properties_manager.h" #include "common/types.h" #include "graph/load/graph_loader.h" -#include "graph/load/new_model_manager/davinci_model.h" -#include "graph/load/new_model_manager/davinci_model_parser.h" -#include "graph/load/new_model_manager/model_manager.h" -#include "graph/load/new_model_manager/task_info/kernel_task_info.h" -#include "graph/load/new_model_manager/task_info/kernel_ex_task_info.h" +#include "graph/load/model_manager/davinci_model.h" +#include "graph/load/model_manager/davinci_model_parser.h" +#include "graph/load/model_manager/model_manager.h" +#include "graph/load/model_manager/task_info/kernel_task_info.h" +#include "graph/load/model_manager/task_info/kernel_ex_task_info.h" #include "ge/common/dump/dump_properties.h" #include "graph/manager/graph_mem_allocator.h" #include "graph/utils/graph_utils.h" diff --git a/tests/ut/ge/graph/graph_load_unittest.cc b/tests/ut/ge/graph/graph_load_unittest.cc index af9d5a37..54972af7 100644 --- a/tests/ut/ge/graph/graph_load_unittest.cc +++ b/tests/ut/ge/graph/graph_load_unittest.cc @@ -24,7 +24,7 @@ #include "common/helper/model_helper.h" #include "common/op/ge_op_utils.h" #include "common/types.h" -#include "graph/load/new_model_manager/davinci_model_parser.h" +#include "graph/load/model_manager/davinci_model_parser.h" #include "graph/op_desc.h" #include "graph/types.h" #include "graph/utils/attr_utils.h" @@ -35,7 +35,7 @@ #include "graph/load/graph_loader.h" #include "framework/common/ge_inner_error_codes.h" -#include "graph/load/new_model_manager/model_manager.h" +#include "graph/load/model_manager/model_manager.h" #include "graph/manager/graph_manager_utils.h" #include "model/ge_model.h" #undef private diff --git a/tests/ut/ge/graph/load/data_dumper_unittest.cc b/tests/ut/ge/graph/load/data_dumper_unittest.cc index e53b76f4..1866f4eb 100644 --- a/tests/ut/ge/graph/load/data_dumper_unittest.cc +++ b/tests/ut/ge/graph/load/data_dumper_unittest.cc @@ -18,8 +18,8 @@ #define private public #define protected public -#include "graph/load/new_model_manager/data_dumper.h" -#include "graph/load/new_model_manager/davinci_model.h" +#include "graph/load/model_manager/data_dumper.h" +#include "graph/load/model_manager/davinci_model.h" #undef private #undef protected diff --git a/tests/ut/ge/graph/load/davinci_model_unittest.cc b/tests/ut/ge/graph/load/davinci_model_unittest.cc index 0c03c934..35413a6b 100644 --- a/tests/ut/ge/graph/load/davinci_model_unittest.cc +++ b/tests/ut/ge/graph/load/davinci_model_unittest.cc @@ -20,7 +20,7 @@ #define protected public #include "graph/utils/graph_utils.h" #include "common/profiling/profiling_manager.h" -#include "graph/load/new_model_manager/davinci_model.h" +#include "graph/load/model_manager/davinci_model.h" using namespace std; diff --git a/tests/ut/ge/graph/load/end_graph_task_unittest.cc b/tests/ut/ge/graph/load/end_graph_task_unittest.cc index 29e7a53a..a66aaaff 100644 --- a/tests/ut/ge/graph/load/end_graph_task_unittest.cc +++ b/tests/ut/ge/graph/load/end_graph_task_unittest.cc @@ -18,8 +18,8 @@ #define private public #define protected public -#include "graph/load/new_model_manager/task_info/end_graph_task_info.h" -#include "graph/load/new_model_manager/davinci_model.h" +#include "graph/load/model_manager/task_info/end_graph_task_info.h" +#include "graph/load/model_manager/davinci_model.h" #undef private #undef protected diff --git a/tests/ut/ge/graph/load/hccl_task_info_unittest.cc b/tests/ut/ge/graph/load/hccl_task_info_unittest.cc index 5c056007..6a2468ee 100644 --- a/tests/ut/ge/graph/load/hccl_task_info_unittest.cc +++ b/tests/ut/ge/graph/load/hccl_task_info_unittest.cc @@ -19,8 +19,8 @@ #define private public #define protected public -#include "graph/load/new_model_manager/davinci_model.h" -#include "graph/load/new_model_manager/task_info/hccl_task_info.h" +#include "graph/load/model_manager/davinci_model.h" +#include "graph/load/model_manager/task_info/hccl_task_info.h" namespace ge { class UtestHcclTaskInfo : public testing::Test { diff --git a/tests/ut/ge/graph/load/kernel_ex_task_info_unittest.cc b/tests/ut/ge/graph/load/kernel_ex_task_info_unittest.cc index 443d2975..53436820 100644 --- a/tests/ut/ge/graph/load/kernel_ex_task_info_unittest.cc +++ b/tests/ut/ge/graph/load/kernel_ex_task_info_unittest.cc @@ -19,9 +19,9 @@ #define private public #define protected public -#include "graph/load/new_model_manager/davinci_model.h" +#include "graph/load/model_manager/davinci_model.h" -#include "graph/load/new_model_manager/task_info/kernel_ex_task_info.h" +#include "graph/load/model_manager/task_info/kernel_ex_task_info.h" #include "cce/aicpu_engine_struct.h" namespace ge { diff --git a/tests/ut/ge/graph/load/kernel_task_info_unittest.cc b/tests/ut/ge/graph/load/kernel_task_info_unittest.cc index fe886b49..a3a27a7b 100644 --- a/tests/ut/ge/graph/load/kernel_task_info_unittest.cc +++ b/tests/ut/ge/graph/load/kernel_task_info_unittest.cc @@ -19,9 +19,9 @@ #define private public #define protected public -#include "graph/load/new_model_manager/davinci_model.h" -#include "graph/load/new_model_manager/task_info/kernel_task_info.h" -#include "graph/load/new_model_manager/task_info/hccl_task_info.h" +#include "graph/load/model_manager/davinci_model.h" +#include "graph/load/model_manager/task_info/kernel_task_info.h" +#include "graph/load/model_manager/task_info/hccl_task_info.h" namespace ge { extern OpDescPtr CreateOpDesc(string name, string type); diff --git a/tests/ut/ge/graph/load/memcpy_addr_async_task_info_unittest.cc b/tests/ut/ge/graph/load/memcpy_addr_async_task_info_unittest.cc index 9348d49e..1652841d 100644 --- a/tests/ut/ge/graph/load/memcpy_addr_async_task_info_unittest.cc +++ b/tests/ut/ge/graph/load/memcpy_addr_async_task_info_unittest.cc @@ -19,8 +19,8 @@ #define private public #define protected public -#include "graph/load/new_model_manager/davinci_model.h" -#include "graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.h" +#include "graph/load/model_manager/davinci_model.h" +#include "graph/load/model_manager/task_info/memcpy_addr_async_task_info.h" namespace ge { class UtestMemcpyAddrAsyncTaskInfo : public testing::Test { diff --git a/tests/ut/ge/graph/load/memcpy_async_task_info_unittest.cc b/tests/ut/ge/graph/load/memcpy_async_task_info_unittest.cc index 8769ec39..afc04130 100644 --- a/tests/ut/ge/graph/load/memcpy_async_task_info_unittest.cc +++ b/tests/ut/ge/graph/load/memcpy_async_task_info_unittest.cc @@ -19,8 +19,8 @@ #define private public #define protected public -#include "graph/load/new_model_manager/davinci_model.h" -#include "graph/load/new_model_manager/task_info/memcpy_async_task_info.h" +#include "graph/load/model_manager/davinci_model.h" +#include "graph/load/model_manager/task_info/memcpy_async_task_info.h" namespace ge { diff --git a/tests/ut/ge/graph/load/model_utils_unittest.cc b/tests/ut/ge/graph/load/model_utils_unittest.cc new file mode 100644 index 00000000..ac886cea --- /dev/null +++ b/tests/ut/ge/graph/load/model_utils_unittest.cc @@ -0,0 +1,70 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#define protected public +#define private public +#include "graph/load/model_manager/model_utils.h" +#include "graph/manager/graph_var_manager.h" + +using namespace std; + +namespace ge { +class UtestModelUtils : public testing::Test { + protected: + void TearDown() {} +}; + +// test ModelUtils::GetVarAddr +TEST_F(UtestModelUtils, get_var_addr_hbm) { + uint8_t test = 2; + uint8_t *pf = &test; + RuntimeParam runtime_param; + runtime_param.session_id = 0; + runtime_param.logic_var_base = 0; + runtime_param.var_base = pf; + runtime_param.var_size = 16; + + int64_t offset = 8; + EXPECT_EQ(VarManager::Instance(runtime_param.session_id)->Init(0, 0, 0, 0), SUCCESS); + EXPECT_NE(VarManager::Instance(runtime_param.session_id)->var_resource_, nullptr); + VarManager::Instance(runtime_param.session_id)->var_resource_->var_offset_map_[offset] = RT_MEMORY_HBM; + std::shared_ptr op_desc = std::make_shared("test", "test"); + uint8_t *var_addr = nullptr; + EXPECT_EQ(ModelUtils::GetVarAddr(runtime_param, op_desc, offset, var_addr), SUCCESS); + EXPECT_EQ(runtime_param.var_base + offset - runtime_param.logic_var_base, var_addr); + VarManager::Instance(runtime_param.session_id)->Destory(); +} + +TEST_F(UtestModelUtils, get_var_addr_rdma_hbm) { + uint8_t test = 2; + uint8_t *pf = &test; + RuntimeParam runtime_param; + runtime_param.session_id = 0; + runtime_param.logic_var_base = 0; + runtime_param.var_base = pf; + + int64_t offset = 8; + EXPECT_EQ(VarManager::Instance(runtime_param.session_id)->Init(0, 0, 0, 0), SUCCESS); + EXPECT_NE(VarManager::Instance(runtime_param.session_id)->var_resource_, nullptr); + VarManager::Instance(runtime_param.session_id)->var_resource_->var_offset_map_[offset] = RT_MEMORY_RDMA_HBM; + std::shared_ptr op_desc = std::make_shared("test", "test"); + uint8_t *var_addr = nullptr; + EXPECT_EQ(ModelUtils::GetVarAddr(runtime_param, op_desc, offset, var_addr), SUCCESS); + EXPECT_EQ(reinterpret_cast(offset), var_addr); + VarManager::Instance(runtime_param.session_id)->Destory(); +} +} // namespace ge diff --git a/tests/ut/ge/graph/load/new_model_manager_data_inputer_unittest.cc b/tests/ut/ge/graph/load/new_model_manager_data_inputer_unittest.cc index 56e673f7..43c2ad15 100644 --- a/tests/ut/ge/graph/load/new_model_manager_data_inputer_unittest.cc +++ b/tests/ut/ge/graph/load/new_model_manager_data_inputer_unittest.cc @@ -17,7 +17,7 @@ #include -#include "graph/load/new_model_manager/data_inputer.h" +#include "graph/load/model_manager/data_inputer.h" #include "common/debug/log.h" #include "common/debug/memory_dumper.h" diff --git a/tests/ut/ge/graph/load/new_model_manager_davinci_model_unittest.cc b/tests/ut/ge/graph/load/new_model_manager_davinci_model_unittest.cc index 00069930..38a250ad 100644 --- a/tests/ut/ge/graph/load/new_model_manager_davinci_model_unittest.cc +++ b/tests/ut/ge/graph/load/new_model_manager_davinci_model_unittest.cc @@ -24,29 +24,29 @@ #include "graph/compute_graph.h" #include "graph/utils/graph_utils.h" #include "graph/model_serialize.h" -#include "graph/load/new_model_manager/davinci_model.h" +#include "graph/load/model_manager/davinci_model.h" #include "common/properties_manager.h" #include "common/op/ge_op_utils.h" #include #include "runtime/dev.h" #include "runtime/kernel.h" #include "cce/fwk_adpt_struct.h" -#include "graph/load/new_model_manager/task_info/task_info_factory.h" -#include "graph/load/new_model_manager/task_info/task_info.h" -#include "graph/load/new_model_manager/task_info/stream_active_task_info.h" -#include "graph/load/new_model_manager/task_info/stream_switch_task_info.h" -#include "graph/load/new_model_manager/task_info/profiler_trace_task_info.h" -#include "graph/load/new_model_manager/task_info/memcpy_async_task_info.h" -#include "graph/load/new_model_manager/task_info/label_set_task_info.h" -#include "graph/load/new_model_manager/task_info/kernel_ex_task_info.h" -#include "graph/load/new_model_manager/task_info/kernel_task_info.h" -#include "graph/load/new_model_manager/task_info/hccl_task_info.h" -#include "graph/load/new_model_manager/task_info/fusion_start_task_info.h" -#include "graph/load/new_model_manager/task_info/fusion_stop_task_info.h" -#include "graph/load/new_model_manager/task_info/event_record_task_info.h" -#include "graph/load/new_model_manager/task_info/event_wait_task_info.h" +#include "graph/load/model_manager/task_info/task_info_factory.h" +#include "graph/load/model_manager/task_info/task_info.h" +#include "graph/load/model_manager/task_info/stream_active_task_info.h" +#include "graph/load/model_manager/task_info/stream_switch_task_info.h" +#include "graph/load/model_manager/task_info/profiler_trace_task_info.h" +#include "graph/load/model_manager/task_info/memcpy_async_task_info.h" +#include "graph/load/model_manager/task_info/label_set_task_info.h" +#include "graph/load/model_manager/task_info/kernel_ex_task_info.h" +#include "graph/load/model_manager/task_info/kernel_task_info.h" +#include "graph/load/model_manager/task_info/hccl_task_info.h" +#include "graph/load/model_manager/task_info/fusion_start_task_info.h" +#include "graph/load/model_manager/task_info/fusion_stop_task_info.h" +#include "graph/load/model_manager/task_info/event_record_task_info.h" +#include "graph/load/model_manager/task_info/event_wait_task_info.h" #include "graph/manager/graph_var_manager.h" -#include "graph/load/new_model_manager/model_manager.h" +#include "graph/load/model_manager/model_manager.h" #undef private #undef protected diff --git a/tests/ut/ge/graph/load/new_model_manager_model_manager_aicpu_unittest.cc b/tests/ut/ge/graph/load/new_model_manager_model_manager_aicpu_unittest.cc index 666d40f4..a68fb307 100644 --- a/tests/ut/ge/graph/load/new_model_manager_model_manager_aicpu_unittest.cc +++ b/tests/ut/ge/graph/load/new_model_manager_model_manager_aicpu_unittest.cc @@ -16,6 +16,8 @@ #include #include +#include +#include #include "common/debug/log.h" #include "common/l2_cache_optimize.h" @@ -28,9 +30,9 @@ #include "common/helper/om_file_helper.h" #include "common/op/ge_op_utils.h" #include "graph/load/graph_loader.h" -#include "graph/load/new_model_manager/davinci_model.h" -#include "graph/load/new_model_manager/davinci_model_parser.h" -#include "graph/load/new_model_manager/model_manager.h" +#include "graph/load/model_manager/davinci_model.h" +#include "graph/load/model_manager/davinci_model_parser.h" +#include "graph/load/model_manager/model_manager.h" //#include "new_op_test_utils.h" #undef private #undef protected @@ -75,4 +77,26 @@ TEST_F(UtestModelManagerModelManagerAicpu, DestroyAicpuKernel) { // EXPECT_EQ(ge::FAILED, mm.LoadModelOffline(model_id, data, nullptr, nullptr)); } +// test GenSessionId +TEST_F(UtestModelManagerModelManagerAicpu, gen_session_id) { + ModelManager manager; + uint64_t session_id; + manager.GenSessionId(session_id); + + struct timeval tv; + gettimeofday(&tv, nullptr); + uint64_t timestamp = static_cast(tv.tv_sec * 1000000); + + const uint64_t kSessionTimeMask = 0xfffffff000000000; // 不比us + const uint64_t kSessionPidMask = 0x000000000000ff00; + const uint64_t kSessionBiasMask = 0x00000000000000ff; + + uint32_t pid = getpid(); + + EXPECT_EQ(1, kSessionBiasMask & session_id); + EXPECT_EQ(pid<<8 & kSessionPidMask, kSessionPidMask & session_id); + //EXPECT_EQ(timestamp<<16 & kSessionTimeMask, kSessionTimeMask & session_id); +} + + } // namespace ge diff --git a/tests/ut/ge/graph/load/new_model_manager_model_manager_unittest.cc b/tests/ut/ge/graph/load/new_model_manager_model_manager_unittest.cc index 29b6ae50..534a3859 100644 --- a/tests/ut/ge/graph/load/new_model_manager_model_manager_unittest.cc +++ b/tests/ut/ge/graph/load/new_model_manager_model_manager_unittest.cc @@ -21,7 +21,7 @@ #include "graph/utils/graph_utils.h" #define private public #define protected public -#include "graph/load/new_model_manager/model_manager.h" +#include "graph/load/model_manager/model_manager.h" #include "common/helper/om_file_helper.h" #include "common/op/ge_op_utils.h" diff --git a/tests/ut/ge/graph/load/new_model_manager_task_build_unittest.cc b/tests/ut/ge/graph/load/new_model_manager_task_build_unittest.cc index 620fac09..f10ccd7f 100644 --- a/tests/ut/ge/graph/load/new_model_manager_task_build_unittest.cc +++ b/tests/ut/ge/graph/load/new_model_manager_task_build_unittest.cc @@ -30,7 +30,7 @@ #include "graph/compute_graph.h" #include "graph/utils/graph_utils.h" #include "graph/model_serialize.h" -#include "graph/load/new_model_manager/davinci_model.h" +#include "graph/load/model_manager/davinci_model.h" #include "common/properties_manager.h" #include "common/op/ge_op_utils.h" #include diff --git a/tests/ut/ge/graph/load/new_op_test_utils.h b/tests/ut/ge/graph/load/new_op_test_utils.h index 4cbc78ac..984cbfb4 100644 --- a/tests/ut/ge/graph/load/new_op_test_utils.h +++ b/tests/ut/ge/graph/load/new_op_test_utils.h @@ -40,7 +40,7 @@ #define private public #include "graph/compute_graph.h" #include "graph/debug/ge_attr_define.h" -#include "graph/load/new_model_manager/davinci_model.h" +#include "graph/load/model_manager/davinci_model.h" #include "graph/node.h" #include "graph/op_desc.h" #include "graph/utils/attr_utils.h" diff --git a/tests/ut/ge/graph/load/output_net_output_unittest.cc b/tests/ut/ge/graph/load/output_net_output_unittest.cc index ecd28fe3..97246dad 100644 --- a/tests/ut/ge/graph/load/output_net_output_unittest.cc +++ b/tests/ut/ge/graph/load/output_net_output_unittest.cc @@ -23,8 +23,8 @@ #define private public #include "common/debug/memory_dumper.h" #include "common/op/ge_op_utils.h" -#include "graph/load/new_model_manager/davinci_model.h" -#include "graph/load/new_model_manager/model_utils.h" +#include "graph/load/model_manager/davinci_model.h" +#include "graph/load/model_manager/model_utils.h" #include "graph/manager/graph_var_manager.h" #include "new_op_test_utils.h" #include "proto/om.pb.h" diff --git a/tests/ut/ge/graph/load/tbe_handle_store_unittest.cc b/tests/ut/ge/graph/load/tbe_handle_store_unittest.cc index a98e14c6..82ffb388 100644 --- a/tests/ut/ge/graph/load/tbe_handle_store_unittest.cc +++ b/tests/ut/ge/graph/load/tbe_handle_store_unittest.cc @@ -18,7 +18,7 @@ #define protected public #define private public -#include "graph/load/new_model_manager/tbe_handle_store.h" +#include "graph/load/model_manager/tbe_handle_store.h" #include "runtime/kernel.h" #undef protected #undef private diff --git a/tests/ut/ge/graph/passes/fuse_data_nodes_with_common_input_pass_unittest.cc b/tests/ut/ge/graph/passes/fuse_data_nodes_with_common_input_pass_unittest.cc new file mode 100644 index 00000000..8c3469c8 --- /dev/null +++ b/tests/ut/ge/graph/passes/fuse_data_nodes_with_common_input_pass_unittest.cc @@ -0,0 +1,182 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "graph/passes/fuse_data_nodes_with_common_input_pass.h" + +#include +#include +#include +#include + +#include "inc/pass_manager.h" +#include "common/ge_inner_error_codes.h" +#include "graph_builder_utils.h" +#include "graph/utils/tensor_utils.h" +#include "graph/utils/op_desc_utils.h" +#include "graph/utils/type_utils.h" +#include "graph/utils/node_utils.h" + +namespace ge { + +class UtestFuseDataNodesWithCommonInputPass : public testing::Test { +protected: + void SetUp() {} + void TearDown() {} + +public: + NodePtr MakeNode(const ComputeGraphPtr &graph, uint32_t in_num, uint32_t out_num, string name, string type) { + GeTensorDesc test_desc(GeShape(), FORMAT_NCHW, DT_FLOAT); + auto op_desc = std::make_shared(name, type); + for (auto i = 0; i < in_num; ++i) { + op_desc->AddInputDesc(test_desc); + } + for (auto i = 0; i < out_num; ++i) { + op_desc->AddOutputDesc(test_desc); + } + return graph->AddNode(op_desc); + } +}; + +/// graph with subgraph +/// const +/// | | | +/// case +/// | +/// netoutput +/// ... +/// data0 data1 data2 +TEST_F(UtestFuseDataNodesWithCommonInputPass, graph_with_subgraph1) { + PassManager pass_manager; + pass_manager.AddPass("FuseDataNodesWithCommonInputPass", new (std::nothrow) FuseDataNodesWithCommonInputPass); + ComputeGraphPtr parent_graph = std::make_shared("parent_graph"); + auto parent_const = MakeNode(parent_graph, 0, 1, "parent_const", "Const"); + auto parent_case = MakeNode(parent_graph, 3, 1, "parent_case", "Case"); + auto parent_output = MakeNode(parent_graph, 1, 0, "parent_output", "NetOutput"); + + GeTensorDesc tensor_desc(GeShape({1,3,224,224}), FORMAT_NCHW, DT_FLOAT); + + parent_const->GetOpDesc()->UpdateOutputDesc(0, tensor_desc); + parent_case->GetOpDesc()->UpdateInputDesc(0, tensor_desc); + parent_case->GetOpDesc()->UpdateInputDesc(1, tensor_desc); + parent_case->GetOpDesc()->UpdateInputDesc(2, tensor_desc); + parent_case->GetOpDesc()->UpdateOutputDesc(0, tensor_desc); + + GraphUtils::AddEdge(parent_const->GetOutDataAnchor(0), parent_case->GetInDataAnchor(0)); + GraphUtils::AddEdge(parent_const->GetOutDataAnchor(0), parent_case->GetInDataAnchor(1)); + GraphUtils::AddEdge(parent_const->GetOutDataAnchor(0), parent_case->GetInDataAnchor(2)); + GraphUtils::AddEdge(parent_case->GetOutDataAnchor(0), parent_output->GetInDataAnchor(0)); + + auto case_node = parent_graph->FindNode("parent_case"); + EXPECT_NE(case_node, nullptr); + size_t input_data_node_num = case_node->GetInDataNodes().size(); + EXPECT_EQ(input_data_node_num, 3); + + ComputeGraphPtr sub_graph = std::make_shared("sub_graph"); + auto data0 = MakeNode(sub_graph, 1, 1, "data0", "Data"); + data0->GetOpDesc()->UpdateInputDesc(0, tensor_desc); + data0->GetOpDesc()->UpdateOutputDesc(0, tensor_desc); + auto data1 = MakeNode(sub_graph, 1, 1, "data1", "Data"); + data1->GetOpDesc()->UpdateInputDesc(0, tensor_desc); + data1->GetOpDesc()->UpdateOutputDesc(0, tensor_desc); + auto data2 = MakeNode(sub_graph, 1, 1, "data2", "Data"); + data2->GetOpDesc()->UpdateInputDesc(0, tensor_desc); + data2->GetOpDesc()->UpdateOutputDesc(0, tensor_desc); + (void)AttrUtils::SetInt(data0->GetOpDesc(), ATTR_NAME_PARENT_NODE_INDEX, 0); + (void)AttrUtils::SetInt(data1->GetOpDesc(), ATTR_NAME_PARENT_NODE_INDEX, 1); + (void)AttrUtils::SetInt(data2->GetOpDesc(), ATTR_NAME_PARENT_NODE_INDEX, 2); + + sub_graph->SetParentNode(parent_case); + sub_graph->SetParentGraph(parent_graph); + parent_graph->AddSubgraph(sub_graph->GetName(), sub_graph); + size_t sub_graph_num = parent_graph->GetAllSubgraphs().size(); + EXPECT_EQ(sub_graph_num, 1); + + auto data1_node = sub_graph->FindNode("data1"); + EXPECT_NE(data1_node, nullptr); + auto data2_node = sub_graph->FindNode("data2"); + EXPECT_NE(data2_node, nullptr); + + EXPECT_EQ(pass_manager.Run(parent_graph), SUCCESS); + + // after pass, data1 and data2 are fused to data0 + data1_node = sub_graph->FindNode("data1"); + EXPECT_EQ(data1_node, nullptr); + data2_node = sub_graph->FindNode("data2"); + EXPECT_EQ(data2_node, nullptr); +} + +/// graph with subgraph +/// const +/// / \ +/// cast1 cast1 +/// \ / +/// case +/// | +/// netoutput +/// ... +/// data1 data2 +/// \ / +/// add +TEST_F(UtestFuseDataNodesWithCommonInputPass, graph_with_subgraph2) { + PassManager pass_manager; + pass_manager.AddPass("FuseDataNodesWithCommonInputPass", new (std::nothrow) FuseDataNodesWithCommonInputPass); + ComputeGraphPtr parent_graph = std::make_shared("parent_graph"); + auto parent_const = MakeNode(parent_graph, 0, 1, "parent_const", "Const"); + auto parent_cast1 = MakeNode(parent_graph, 1, 1, "parent_cast1", "Cast"); + auto parent_case = MakeNode(parent_graph, 2, 1, "parent_case", "Case"); + auto parent_output = MakeNode(parent_graph, 1, 0, "parent_output", "NetOutput"); + + GeTensorDesc tensor_desc(GeShape({1,3,224,224}), FORMAT_NCHW, DT_FLOAT); + + parent_const->GetOpDesc()->UpdateOutputDesc(0, tensor_desc); + parent_cast1->GetOpDesc()->UpdateInputDesc(0, tensor_desc); + parent_cast1->GetOpDesc()->UpdateOutputDesc(0, tensor_desc); + parent_case->GetOpDesc()->UpdateInputDesc(0, tensor_desc); + parent_case->GetOpDesc()->UpdateInputDesc(1, tensor_desc); + parent_case->GetOpDesc()->UpdateOutputDesc(0, tensor_desc); + + GraphUtils::AddEdge(parent_const->GetOutDataAnchor(0), parent_cast1->GetInDataAnchor(0)); + GraphUtils::AddEdge(parent_cast1->GetOutDataAnchor(0), parent_case->GetInDataAnchor(0)); + GraphUtils::AddEdge(parent_const->GetOutDataAnchor(0), parent_cast1->GetInDataAnchor(0)); + GraphUtils::AddEdge(parent_cast1->GetOutDataAnchor(0), parent_case->GetInDataAnchor(1)); + GraphUtils::AddEdge(parent_case->GetOutDataAnchor(0), parent_output->GetInDataAnchor(0)); + + ComputeGraphPtr sub_graph = std::make_shared("sub_graph"); + auto data0 = MakeNode(sub_graph, 1, 1, "data0", "Data"); + data0->GetOpDesc()->UpdateInputDesc(0, tensor_desc); + data0->GetOpDesc()->UpdateOutputDesc(0, tensor_desc); + auto data1 = MakeNode(sub_graph, 1, 1, "data1", "Data"); + data1->GetOpDesc()->UpdateInputDesc(0, tensor_desc); + data1->GetOpDesc()->UpdateOutputDesc(0, tensor_desc); + (void)AttrUtils::SetInt(data0->GetOpDesc(), ATTR_NAME_PARENT_NODE_INDEX, 0); + (void)AttrUtils::SetInt(data1->GetOpDesc(), ATTR_NAME_PARENT_NODE_INDEX, 1); + + sub_graph->SetParentNode(parent_case); + sub_graph->SetParentGraph(parent_graph); + parent_graph->AddSubgraph(sub_graph->GetName(), sub_graph); + + size_t sub_graph_num = parent_graph->GetAllSubgraphs().size(); + EXPECT_EQ(sub_graph_num, 1); + auto data1_node = sub_graph->FindNode("data1"); + EXPECT_NE(data1_node, nullptr); + + EXPECT_EQ(pass_manager.Run(parent_graph), SUCCESS); + + // after pass, data1 is fused to data0 + data1_node = sub_graph->FindNode("data1"); + EXPECT_EQ(data1_node, nullptr); +} +} // namespace ge diff --git a/tests/ut/ge/graph/passes/multi_batch_clone_pass_unittest.cc b/tests/ut/ge/graph/passes/multi_batch_clone_pass_unittest.cc index b1cd6d4d..1b75a613 100644 --- a/tests/ut/ge/graph/passes/multi_batch_clone_pass_unittest.cc +++ b/tests/ut/ge/graph/passes/multi_batch_clone_pass_unittest.cc @@ -194,6 +194,9 @@ TEST_F(UtestMultiBatchClonePass, graph_with_subgraph) { auto func_node = MakeNode(owner, 3, 1, "test_if", "If"); graph->SetParentNode(func_node); graph->SetParentGraph(owner); + owner->AddSubgraph(graph->GetName(), graph); + size_t sub_graph_num = owner->GetAllSubgraphs().size(); + EXPECT_EQ(sub_graph_num, 1); EXPECT_EQ(pass_manager.Run(graph), SUCCESS); } diff --git a/tests/ut/ge/single_op/single_op_model_unittest.cc b/tests/ut/ge/single_op/single_op_model_unittest.cc index b6b97d89..ab909e11 100644 --- a/tests/ut/ge/single_op/single_op_model_unittest.cc +++ b/tests/ut/ge/single_op/single_op_model_unittest.cc @@ -18,7 +18,7 @@ #include //#include "cce/taskdown_common.hpp" -#include "graph/load/new_model_manager/model_utils.h" +#include "graph/load/model_manager/model_utils.h" #include "graph/utils/graph_utils.h" #include "runtime/rt.h" diff --git a/third_party/fwkacllib/inc/runtime/mem.h b/third_party/fwkacllib/inc/runtime/mem.h index 32bd9e6b..c305fb12 100644 --- a/third_party/fwkacllib/inc/runtime/mem.h +++ b/third_party/fwkacllib/inc/runtime/mem.h @@ -34,6 +34,7 @@ extern "C" { */ #define RT_MEMORY_DEFAULT ((uint32_t)0x0) // default memory on device #define RT_MEMORY_HBM ((uint32_t)0x2) // HBM memory on device +#define RT_MEMORY_RDMA_HBM ((uint32_t)0x3) // RDMA-HBM memory on device #define RT_MEMORY_DDR ((uint32_t)0x4) // DDR memory on device #define RT_MEMORY_SPM ((uint32_t)0x8) // shared physical memory on device #define RT_MEMORY_P2P_HBM ((uint32_t)0x10) // HBM memory on other 4P device